From 5f468cc21ef621151c200edfeea0411342c6d8bb Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 11 Sep 2020 09:11:35 +0900 Subject: [PATCH 0001/1009] [SPARK-32822][SQL] Change the number of partitions to zero when a range is empty with WholeStageCodegen disabled or falled back ### What changes were proposed in this pull request? This PR changes the behavior of RangeExec with WholeStageCodegen disabled or falled back to change the number of partitions to zero when a range is empty. In the current master, if WholeStageCodegen effects, the number of partitions of an empty range will be changed to zero. ``` spark.range(1, 1, 1, 1000).rdd.getNumPartitions res0: Int = 0 ``` But it doesn't if WholeStageCodegen is disabled or falled back. ``` spark.conf.set("spark.sql.codegen.wholeStage", false) spark.range(1, 1, 1, 1000).rdd.getNumPartitions res2: Int = 1000 ``` ### Why are the changes needed? To archive better performance even though WholeStageCodegen disabled or falled back. ### Does this PR introduce _any_ user-facing change? Yes. the number of partitions gotten with `getNumPartitions` for an empty range will be changed when WholeStageCodegen is disabled. ### How was this patch tested? New test. Closes #29681 from sarutak/zero-size-range. Authored-by: Kousuke Saruta Signed-off-by: Takeshi Yamamuro --- .../execution/basicPhysicalOperators.scala | 105 ++++++++++-------- .../spark/sql/execution/PlannerSuite.scala | 7 ++ 2 files changed, 63 insertions(+), 49 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index c240a182d32bb..1f70fde3f7654 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -371,6 +371,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range) val step: Long = range.step val numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism) val numElements: BigInt = range.numElements + val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step) override val output: Seq[Attribute] = range.output @@ -396,7 +397,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range) } override def inputRDDs(): Seq[RDD[InternalRow]] = { - val rdd = if (start == end || (start < end ^ 0 < step)) { + val rdd = if (isEmptyRange) { new EmptyRDD[InternalRow](sqlContext.sparkContext) } else { sqlContext.sparkContext.parallelize(0 until numSlices, numSlices).map(i => InternalRow(i)) @@ -562,58 +563,64 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range) protected override def doExecute(): RDD[InternalRow] = { val numOutputRows = longMetric("numOutputRows") - sqlContext - .sparkContext - .parallelize(0 until numSlices, numSlices) - .mapPartitionsWithIndex { (i, _) => - val partitionStart = (i * numElements) / numSlices * step + start - val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start - def getSafeMargin(bi: BigInt): Long = - if (bi.isValidLong) { - bi.toLong - } else if (bi > 0) { - Long.MaxValue - } else { - Long.MinValue - } - val safePartitionStart = getSafeMargin(partitionStart) - val safePartitionEnd = getSafeMargin(partitionEnd) - val rowSize = UnsafeRow.calculateBitSetWidthInBytes(1) + LongType.defaultSize - val unsafeRow = UnsafeRow.createFromByteArray(rowSize, 1) - val taskContext = TaskContext.get() - - val iter = new Iterator[InternalRow] { - private[this] var number: Long = safePartitionStart - private[this] var overflow: Boolean = false - private[this] val inputMetrics = taskContext.taskMetrics().inputMetrics - - override def hasNext = - if (!overflow) { - if (step > 0) { - number < safePartitionEnd - } else { - number > safePartitionEnd - } - } else false - - override def next() = { - val ret = number - number += step - if (number < ret ^ step < 0) { - // we have Long.MaxValue + Long.MaxValue < Long.MaxValue - // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step - // back, we are pretty sure that we have an overflow. - overflow = true + if (isEmptyRange) { + new EmptyRDD[InternalRow](sqlContext.sparkContext) + } else { + sqlContext + .sparkContext + .parallelize(0 until numSlices, numSlices) + .mapPartitionsWithIndex { (i, _) => + val partitionStart = (i * numElements) / numSlices * step + start + val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start + + def getSafeMargin(bi: BigInt): Long = + if (bi.isValidLong) { + bi.toLong + } else if (bi > 0) { + Long.MaxValue + } else { + Long.MinValue } - numOutputRows += 1 - inputMetrics.incRecordsRead(1) - unsafeRow.setLong(0, ret) - unsafeRow + val safePartitionStart = getSafeMargin(partitionStart) + val safePartitionEnd = getSafeMargin(partitionEnd) + val rowSize = UnsafeRow.calculateBitSetWidthInBytes(1) + LongType.defaultSize + val unsafeRow = UnsafeRow.createFromByteArray(rowSize, 1) + val taskContext = TaskContext.get() + + val iter = new Iterator[InternalRow] { + private[this] var number: Long = safePartitionStart + private[this] var overflow: Boolean = false + private[this] val inputMetrics = taskContext.taskMetrics().inputMetrics + + override def hasNext = + if (!overflow) { + if (step > 0) { + number < safePartitionEnd + } else { + number > safePartitionEnd + } + } else false + + override def next() = { + val ret = number + number += step + if (number < ret ^ step < 0) { + // we have Long.MaxValue + Long.MaxValue < Long.MaxValue + // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step + // back, we are pretty sure that we have an overflow. + overflow = true + } + + numOutputRows += 1 + inputMetrics.incRecordsRead(1) + unsafeRow.setLong(0, ret) + unsafeRow + } } + new InterruptibleIterator(taskContext, iter) } - new InterruptibleIterator(taskContext, iter) - } + } } override def simpleString(maxFields: Int): String = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index d428b7ebc0e91..ca52e51c87ea7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -994,6 +994,13 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } } } + + testWithWholeStageCodegenOnAndOff("Change the number of partitions to zero " + + "when a range is empty") { _ => + val range = spark.range(1, 1, 1, 1000) + val numPartitions = range.rdd.getNumPartitions + assert(numPartitions == 0) + } } // Used for unit-testing EnsureRequirements From 328d81a2d1131742bcfba5117896c093db39e721 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 11 Sep 2020 09:22:56 +0900 Subject: [PATCH 0002/1009] [SPARK-32677][SQL][DOCS][MINOR] Improve code comment in CreateFunctionCommand ### What changes were proposed in this pull request? We made a mistake in https://github.com/apache/spark/pull/29502, as there is no code comment to explain why we can't load the UDF class when creating functions. This PR improves the code comment. ### Why are the changes needed? To avoid making the same mistake. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #29713 from cloud-fan/comment. Authored-by: Wenchen Fan Signed-off-by: Takeshi Yamamuro --- .../org/apache/spark/sql/execution/command/functions.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index fae8de4780102..d76b4b8894783 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -88,7 +88,9 @@ case class CreateFunctionCommand( } else { // For a permanent, we will store the metadata into underlying external catalog. // This function will be loaded into the FunctionRegistry when a query uses it. - // We do not load it into FunctionRegistry right now. + // We do not load it into FunctionRegistry right now, to avoid loading the resource and + // UDF class immediately, as the Spark application to create the function may not have + // access to the resource and/or UDF class. catalog.createFunction(func, ignoreIfExists) } } From fe2ab255d14bbccb72b95ed776b74e86cb9762b6 Mon Sep 17 00:00:00 2001 From: yangjiang Date: Fri, 11 Sep 2020 08:05:34 -0500 Subject: [PATCH 0003/1009] [MINOR][SQL] Fix a typo at 'spark.sql.sources.fileCompressionFactor' error message in SQLConf ### What changes were proposed in this pull request? fix typo in SQLConf ### Why are the changes needed? typo fix to increase readability ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? no test Closes #29668 from Ted-Jiang/fix_annotate. Authored-by: yangjiang Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index dca421a09da62..dae715ae827e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -573,7 +573,7 @@ object SQLConf { " a heavily underestimated result.") .version("2.3.1") .doubleConf - .checkValue(_ > 0, "the value of fileDataSizeFactor must be greater than 0") + .checkValue(_ > 0, "the value of fileCompressionFactor must be greater than 0") .createWithDefault(1.0) val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema") From 9f4f49cbaa3def9f7d8573629ff3b6cbd6833b2f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 11 Sep 2020 06:15:58 -0700 Subject: [PATCH 0004/1009] [SPARK-32853][SQL] Consecutive save/load calls in DataFrame/StreamReader/Writer should not fail ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/29328 In https://github.com/apache/spark/pull/29328 , we forbid the use case that path option and path parameter are both specified. However, it breaks some use cases: ``` val dfr = spark.read.format(...).option(...) dfr.load(path1).xxx dfr.load(path2).xxx ``` The reason is that: `load` has side effects. It will set path option to the `DataFrameReader` instance. The next time you call `load`, Spark will fail because both path option and path parameter are specified. This PR removes the side effect of `save`/`load`/`start` to not set the path option. ### Why are the changes needed? recover some use cases ### Does this PR introduce _any_ user-facing change? Yes, some use cases fail before this PR, and can run successfully after this PR. ### How was this patch tested? new tests Closes #29723 from cloud-fan/df. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/DataFrameReader.scala | 34 ++++++++------- .../apache/spark/sql/DataFrameWriter.scala | 41 +++++++++++++------ .../sql/streaming/DataStreamReader.scala | 19 ++++++--- .../sql/streaming/DataStreamWriter.scala | 27 ++++++++---- .../test/DataStreamReaderWriterSuite.scala | 15 +++++++ .../sql/test/DataFrameReaderWriterSuite.scala | 9 ++++ 6 files changed, 103 insertions(+), 42 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index ab6b1ff5daccf..ab18a3119c09f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -260,25 +260,22 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.") } - val updatedPaths = if (!legacyPathOptionBehavior && paths.length == 1) { - option("path", paths.head) - Seq.empty - } else { - paths - } - DataSource.lookupDataSourceV2(source, sparkSession.sessionState.conf).map { provider => val catalogManager = sparkSession.sessionState.catalogManager val sessionOptions = DataSourceV2Utils.extractSessionConfigs( source = provider, conf = sparkSession.sessionState.conf) - val pathsOption = if (updatedPaths.isEmpty) { - None + + val optionsWithPath = if (paths.isEmpty) { + extraOptions + } else if (paths.length == 1) { + extraOptions + ("path" -> paths.head) } else { val objectMapper = new ObjectMapper() - Some("paths" -> objectMapper.writeValueAsString(updatedPaths.toArray)) + extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray)) } - val finalOptions = sessionOptions ++ extraOptions.originalMap ++ pathsOption + val finalOptions = + sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val (table, catalog, ident) = provider match { case _: SupportsCatalogOptions if userSpecifiedSchema.nonEmpty => @@ -303,20 +300,27 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { sparkSession, DataSourceV2Relation.create(table, catalog, ident, dsOptions)) - case _ => loadV1Source(updatedPaths: _*) + case _ => loadV1Source(paths: _*) } - }.getOrElse(loadV1Source(updatedPaths: _*)) + }.getOrElse(loadV1Source(paths: _*)) } private def loadV1Source(paths: String*) = { + val legacyPathOptionBehavior = sparkSession.sessionState.conf.legacyPathOptionBehavior + val (finalPaths, finalOptions) = if (!legacyPathOptionBehavior && paths.length == 1) { + (Nil, extraOptions + ("path" -> paths.head)) + } else { + (paths, extraOptions) + } + // Code path for data source v1. sparkSession.baseRelationToDataFrame( DataSource.apply( sparkSession, - paths = paths, + paths = finalPaths, userSpecifiedSchema = userSpecifiedSchema, className = source, - options = extraOptions.originalMap).resolveRelation()) + options = finalOptions.originalMap).resolveRelation()) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 25ca186c65f04..bd1997bee53f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -291,8 +291,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { "parameter. Either remove the path option, or call save() without the parameter. " + s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.") } - this.extraOptions = this.extraOptions + ("path" -> path) - save() + saveInternal(Some(path)) } /** @@ -300,7 +299,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * * @since 1.4.0 */ - def save(): Unit = { + def save(): Unit = saveInternal(None) + + private def saveInternal(path: Option[String]): Unit = { if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, you can not " + "write files of Hive data source directly.") @@ -313,8 +314,16 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { val provider = maybeV2Provider.get val sessionOptions = DataSourceV2Utils.extractSessionConfigs( provider, df.sparkSession.sessionState.conf) - val options = sessionOptions.filterKeys(!extraOptions.contains(_)) ++ extraOptions.toMap - val dsOptions = new CaseInsensitiveStringMap(options.toMap.asJava) + + val optionsWithPath = if (path.isEmpty) { + extraOptions + } else { + extraOptions + ("path" -> path.get) + } + + val finalOptions = + sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) def getTable: Table = { // For file source, it's expensive to infer schema/partition at each write. Here we pass @@ -350,7 +359,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { // Streaming also uses the data source V2 API. So it may be that the data source // implements v2, but has no v2 implementation for batch writes. In that case, we // fall back to saving as though it's a V1 source. - return saveToV1Source() + return saveToV1Source(path) } } @@ -358,14 +367,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { checkPartitioningMatchesV2Table(table) if (mode == SaveMode.Append) { runCommand(df.sparkSession, "save") { - AppendData.byName(relation, df.logicalPlan, extraOptions.toMap) + AppendData.byName(relation, df.logicalPlan, finalOptions) } } else { // Truncate the table. TableCapabilityCheck will throw a nice exception if this // isn't supported runCommand(df.sparkSession, "save") { OverwriteByExpression.byName( - relation, df.logicalPlan, Literal(true), extraOptions.toMap) + relation, df.logicalPlan, Literal(true), finalOptions) } } @@ -385,7 +394,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { partitioningAsV2, df.queryExecution.analyzed, Map(TableCatalog.PROP_PROVIDER -> source) ++ location, - extraOptions.toMap, + finalOptions, ignoreIfExists = createMode == SaveMode.Ignore) } case _: TableProvider => @@ -397,30 +406,36 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { // Streaming also uses the data source V2 API. So it may be that the data source // implements v2, but has no v2 implementation for batch writes. In that case, we // fallback to saving as though it's a V1 source. - saveToV1Source() + saveToV1Source(path) } } } } else { - saveToV1Source() + saveToV1Source(path) } } - private def saveToV1Source(): Unit = { + private def saveToV1Source(path: Option[String]): Unit = { partitioningColumns.foreach { columns => extraOptions = extraOptions + ( DataSourceUtils.PARTITIONING_COLUMNS_KEY -> DataSourceUtils.encodePartitioningColumns(columns)) } + val optionsWithPath = if (path.isEmpty) { + extraOptions + } else { + extraOptions + ("path" -> path.get) + } + // Code path for data source v1. runCommand(df.sparkSession, "save") { DataSource( sparkSession = df.sparkSession, className = source, partitionColumns = partitioningColumns.getOrElse(Nil), - options = extraOptions.toMap).planForWriting(mode, df.logicalPlan) + options = optionsWithPath.originalMap).planForWriting(mode, df.logicalPlan) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 5302357d2bfa0..c22f917d3cf91 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -188,12 +188,20 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * @since 2.0.0 */ - def load(): DataFrame = { + def load(): DataFrame = loadInternal(None) + + private def loadInternal(path: Option[String]): DataFrame = { if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, you can not " + "read files of Hive data source directly.") } + val optionsWithPath = if (path.isEmpty) { + extraOptions + } else { + extraOptions + ("path" -> path.get) + } + val ds = DataSource.lookupDataSource(source, sparkSession.sqlContext.conf). getConstructor().newInstance() // We need to generate the V1 data source so we can pass it to the V2 relation as a shim. @@ -203,7 +211,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = source, - options = extraOptions.toMap) + options = optionsWithPath.originalMap) val v1Relation = ds match { case _: StreamSourceProvider => Some(StreamingRelation(v1DataSource)) case _ => None @@ -213,8 +221,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] => val sessionOptions = DataSourceV2Utils.extractSessionConfigs( source = provider, conf = sparkSession.sessionState.conf) - val options = sessionOptions ++ extraOptions.toMap - val dsOptions = new CaseInsensitiveStringMap(options.asJava) + val finalOptions = + sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val table = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema) import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ table match { @@ -247,7 +256,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo "parameter. Either remove the path option, or call load() without the parameter. " + s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.") } - option("path", path).load() + loadInternal(Some(path)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 51ec1e7b8fea1..682f3b98ec2e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -274,7 +274,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { "path parameter. Either remove the path option, or call start() without the parameter. " + s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.") } - option("path", path).start() + startInternal(Some(path)) } /** @@ -292,7 +292,9 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { * @since 2.0.0 */ @throws[TimeoutException] - def start(): StreamingQuery = { + def start(): StreamingQuery = startInternal(None) + + private def startInternal(path: Option[String]): StreamingQuery = { if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, you can not " + "write files of Hive data source directly.") @@ -353,29 +355,36 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { // file source v2 does not support streaming yet. classOf[FileDataSourceV2].isAssignableFrom(cls) + val optionsWithPath = if (path.isEmpty) { + extraOptions + } else { + extraOptions + ("path" -> path.get) + } + val sink = if (classOf[TableProvider].isAssignableFrom(cls) && !useV1Source) { val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider] val sessionOptions = DataSourceV2Utils.extractSessionConfigs( source = provider, conf = df.sparkSession.sessionState.conf) - val options = sessionOptions ++ extraOptions.toMap - val dsOptions = new CaseInsensitiveStringMap(options.asJava) + val finalOptions = + sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val table = DataSourceV2Utils.getTableFromProvider( provider, dsOptions, userSpecifiedSchema = None) import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ table match { case table: SupportsWrite if table.supports(STREAMING_WRITE) => table - case _ => createV1Sink() + case _ => createV1Sink(optionsWithPath) } } else { - createV1Sink() + createV1Sink(optionsWithPath) } df.sparkSession.sessionState.streamingQueryManager.startQuery( extraOptions.get("queryName"), extraOptions.get("checkpointLocation"), df, - extraOptions.toMap, + optionsWithPath.originalMap, sink, outputMode, useTempCheckpointLocation = source == "console" || source == "noop", @@ -384,11 +393,11 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { } } - private def createV1Sink(): Sink = { + private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = { val ds = DataSource( df.sparkSession, className = source, - options = extraOptions.toMap, + options = optionsWithPath.originalMap, partitionColumns = normalizedParCols.getOrElse(Nil)) ds.createSink(outputMode) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala index 64b0cb296635a..a59eca25fe28e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala @@ -798,4 +798,19 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { } } } + + test("SPARK-32853: consecutive load/start calls should be allowed") { + val dfr = spark.readStream.format(classOf[DefaultSource].getName) + var df = dfr.load("1") + df = dfr.load("2") + withTempDir { checkpointPath => + val dfw = df.writeStream + .option("checkpointLocation", checkpointPath.getCanonicalPath) + .format(classOf[DefaultSource].getName) + var query = dfw.start("1") + query.stop() + query = dfw.start("2") + query.stop() + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index c4ca85d6237b2..eaca63c74c875 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -1190,4 +1190,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with verifyLoadFails(df.write.option("path", path).format("parquet").save(path)) verifyLoadFails(df.write.option("path", path).format("parquet").save("")) } + + test("SPARK-32853: consecutive load/save calls should be allowed") { + val dfr = spark.read.format(classOf[FakeSourceOne].getName) + dfr.load("1") + dfr.load("2") + val dfw = spark.range(10).write.format(classOf[DefaultSource].getName) + dfw.save("1") + dfw.save("2") + } } From 94cac5978cf33f99a9f28180c9c909d5c884c152 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 11 Sep 2020 13:42:33 +0000 Subject: [PATCH 0005/1009] [SPARK-32730][SQL][FOLLOW-UP] Improve LeftAnti SortMergeJoin right side buffering ### What changes were proposed in this pull request? This is a follow-up to https://github.com/apache/spark/pull/29572. LeftAnti SortMergeJoin should not buffer all matching right side rows when bound condition is empty, this is unnecessary and can lead to performance degradation especially when spilling happens. ### Why are the changes needed? Performance improvement. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New UT. Closes #29727 from peter-toth/SPARK-32730-improve-leftsemi-sortmergejoin-followup. Authored-by: Peter Toth Signed-off-by: Wenchen Fan --- .../spark/sql/execution/joins/SortMergeJoinExec.scala | 3 ++- .../src/test/scala/org/apache/spark/sql/JoinSuite.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala index 097ea61f13832..6e59ad07d7168 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala @@ -288,7 +288,8 @@ case class SortMergeJoinExec( RowIterator.fromScala(rightIter), inMemoryThreshold, spillThreshold, - cleanupResources + cleanupResources, + condition.isEmpty ) private[this] val joinRow = new JoinedRow diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 942cf24a3a873..8755dccb801c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -757,6 +757,14 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan ) } + // LEFT ANTI JOIN without bound condition does not spill + assertNotSpilled(sparkContext, "left anti join") { + checkAnswer( + sql("SELECT * FROM testData LEFT ANTI JOIN testData2 ON key = a WHERE key = 2"), + Nil + ) + } + val expected = new ListBuffer[Row]() expected.append( Row(1, "1", 1, 1), Row(1, "1", 1, 2), From f6322d1cb149983fbcd5b90a804eeda0fe4e8a49 Mon Sep 17 00:00:00 2001 From: "Rohit.Mishra" Date: Fri, 11 Sep 2020 10:38:01 -0500 Subject: [PATCH 0006/1009] [SPARK-32180][PYTHON][DOCS] Installation page of Getting Started in PySpark documentation ### What changes were proposed in this pull request? This PR proposes to add getting started- installation to new PySpark docs. ### Why are the changes needed? Better documentation. ### Does this PR introduce _any_ user-facing change? No. Documentation only. ### How was this patch tested? Generating documents locally. Closes #29640 from rohitmishr1484/SPARK-32180-Getting-Started-Installation. Authored-by: Rohit.Mishra Signed-off-by: Sean Owen --- python/docs/source/getting_started/index.rst | 3 + .../source/getting_started/installation.rst | 114 ++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 python/docs/source/getting_started/installation.rst diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst index cf4f7de11dbe3..0f3cea7d6ea58 100644 --- a/python/docs/source/getting_started/index.rst +++ b/python/docs/source/getting_started/index.rst @@ -20,7 +20,10 @@ Getting Started =============== +This page summarizes the basic steps required to setup and get started with PySpark. + .. toctree:: :maxdepth: 2 + installation quickstart diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst new file mode 100644 index 0000000000000..a2de0b2e2c9f4 --- /dev/null +++ b/python/docs/source/getting_started/installation.rst @@ -0,0 +1,114 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +============ +Installation +============ + +Official releases are available from the `Apache Spark website `_. +Alternatively, you can install it via ``pip`` from PyPI. PyPI installation is usually for standalone +locally or as a client to connect to a cluster instead of setting a cluster up. + +This page includes the instructions for installing PySpark by using pip, Conda, downloading manually, and building it from the source. + +Python Version Supported +------------------------ + +Python 3.6 and above. + +Using PyPI +---------- + +PySpark installation using `PyPI `_ + +.. code-block:: bash + + pip install pyspark + +Using Conda +----------- + +Conda is an open-source package management and environment management system which is a part of the `Anaconda `_ distribution. It is both cross-platform and language agnostic. + +Conda can be used to create a virtual environment from terminal as shown below: + +.. code-block:: bash + + conda create -n pyspark_env + +After the virtual environment is created, it should be visible under the list of Conda environments which can be seen using the following command: + +.. code-block:: bash + + conda env list + +The newly created environment can be accessed using the following command: + +.. code-block:: bash + + conda activate pyspark_env + +In Conda version earlier than 4.4, the following command should be used: + +.. code-block:: bash + + source activate pyspark_env + +Refer to `Using PyPI <#using-pypi>`_ to install PySpark in the newly created environment. + +Note that `PySpark at Conda `_ is available but not necessarily synced with PySpark release cycle because it is maintained by the community separately. + +Official Release Channel +------------------------ + +Different flavors of PySpark is available in the `official release channel `_. +Any suitable version can be downloaded and extracted as below: + +.. code-block:: bash + + tar xzvf spark-3.0.0-bin-hadoop2.7.tgz + +Ensure the `SPARK_HOME` environment variable points to the directory where the code has been extracted. +Define `PYTHONPATH` such that it can find the PySpark and Py4J under `SPARK_HOME/python/lib`. +One example of doing this is shown below: + +.. code-block:: bash + + cd spark-3.0.0-bin-hadoop2.7 + export SPARK_HOME=`pwd` + export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH + +Installing from Source +---------------------- + +To install PySpark from source, refer to `Building Spark `_. + +Refer to `Official Release Channel <#official-release-channel>`_ for steps to define ``PYTHONPATH``. + +Dependencies +------------ +============= ========================= ================ +Package Minimum supported version Note +============= ========================= ================ +`pandas` 0.23.2 Optional for SQL +`NumPy` 1.7 Required for ML +`pyarrow` 0.15.1 Optional for SQL +`Py4J` 0.10.9 Required +============= ========================= ================ + +**Note**: PySpark requires Java 8 or later with ``JAVA_HOME`` properly set. +If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer to `Downloading `_ \ No newline at end of file From b4be6a6d12bf62f02cffe0bcc97ef32d27827d57 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 11 Sep 2020 11:48:34 -0700 Subject: [PATCH 0007/1009] [SPARK-32845][SS][TESTS] Add sinkParameter to check sink options robustly in DataStreamReaderWriterSuite ### What changes were proposed in this pull request? This PR aims to add `sinkParameter` to check sink options robustly and independently in DataStreamReaderWriterSuite ### Why are the changes needed? `LastOptions.parameters` is designed to catch three cases: `sourceSchema`, `createSource`, `createSink`. However, `StreamQuery.stop` invokes `queryExecutionThread.join`, `runStream`, `createSource` immediately and reset the stored options by `createSink`. To catch `createSink` options, currently, the test suite is trying a workaround pattern. However, we observed a flakiness in this pattern sometimes. If we split `createSink` option separately, we don't need this workaround and can eliminate this flakiness. ```scala val query = df.writeStream. ... .start() assert(LastOptions.paramters(..)) query.stop() ``` ### Does this PR introduce _any_ user-facing change? No. This is a test-only change. ### How was this patch tested? Pass the newly updated test case. Closes #29730 from dongjoon-hyun/SPARK-32845. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../test/DataStreamReaderWriterSuite.scala | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala index a59eca25fe28e..8d39704c61d4e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala @@ -43,11 +43,13 @@ object LastOptions { var mockStreamSourceProvider = mock(classOf[StreamSourceProvider]) var mockStreamSinkProvider = mock(classOf[StreamSinkProvider]) var parameters: Map[String, String] = null + var sinkParameters: Map[String, String] = null var schema: Option[StructType] = null var partitionColumns: Seq[String] = Nil def clear(): Unit = { parameters = null + sinkParameters = null schema = null partitionColumns = null reset(mockStreamSourceProvider) @@ -101,7 +103,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider { parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { - LastOptions.parameters = parameters + LastOptions.sinkParameters = parameters LastOptions.partitionColumns = partitionColumns LastOptions.mockStreamSinkProvider.createSink(spark, parameters, partitionColumns, outputMode) (_: Long, _: DataFrame) => {} @@ -170,20 +172,19 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { LastOptions.clear() - val query = df.writeStream + df.writeStream .format("org.apache.spark.sql.streaming.test") .option("opt1", "5") .options(Map("opt2" -> "4")) .options(map) .option("checkpointLocation", newMetadataDir) .start() + .stop() - assert(LastOptions.parameters("opt1") == "5") - assert(LastOptions.parameters("opt2") == "4") - assert(LastOptions.parameters("opt3") == "3") - assert(LastOptions.parameters.contains("checkpointLocation")) - - query.stop() + assert(LastOptions.sinkParameters("opt1") == "5") + assert(LastOptions.sinkParameters("opt2") == "4") + assert(LastOptions.sinkParameters("opt3") == "3") + assert(LastOptions.sinkParameters.contains("checkpointLocation")) } test("SPARK-32832: later option should override earlier options for load()") { @@ -204,7 +205,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { .load() assert(LastOptions.parameters.isEmpty) - val query = ds.writeStream + ds.writeStream .format("org.apache.spark.sql.streaming.test") .option("checkpointLocation", newMetadataDir) .option("paTh", "1") @@ -213,8 +214,8 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { .option("patH", "4") .option("path", "5") .start() - assert(LastOptions.parameters("path") == "5") - query.stop() + .stop() + assert(LastOptions.sinkParameters("path") == "5") } test("partitioning") { @@ -787,13 +788,13 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { withTempDir { checkpointPath => withSQLConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key -> "true", SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath) { - val query = df.writeStream + df.writeStream .format("org.apache.spark.sql.streaming.test") .option("path", "tmp4") .start("tmp5") + .stop() // The legacy behavior overwrites the path option. - assert(LastOptions.parameters("path") == "tmp5") - query.stop() + assert(LastOptions.sinkParameters("path") == "tmp5") } } } From 4269c2c252d5eecf6a861160556026ee399ad976 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 12 Sep 2020 08:42:07 +0900 Subject: [PATCH 0008/1009] [SPARK-32851][SQL][TEST] Tests should fail if errors happen when generating projection code ### What changes were proposed in this pull request? This PR intends to set `CODEGEN_ONLY` at `CODEGEN_FACTORY_MODE` in test spark context so that tests can fail if errors happen when generating expr code. ### Why are the changes needed? I noticed that the code generation of `SafeProjection` failed in the existing test (https://issues.apache.org/jira/browse/SPARK-32828) but it passed because `FALLBACK` was set at `CODEGEN_FACTORY_MODE` (by default) in `SharedSparkSession`. To get aware of these failures quickly, I think its worth setting `CODEGEN_ONLY` at `CODEGEN_FACTORY_MODE`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29721 from maropu/ExprCodegenTest. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../scala/org/apache/spark/sql/test/SharedSparkSession.scala | 2 ++ .../test/scala/org/apache/spark/sql/hive/test/TestHive.scala | 2 ++ 2 files changed, 4 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala index ee29b4b8fb32b..cfc92a780308d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala @@ -25,6 +25,7 @@ import org.scalatest.concurrent.Eventually import org.apache.spark.{DebugFilesystem, SparkConf} import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK import org.apache.spark.sql.{SparkSession, SQLContext} +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} @@ -67,6 +68,7 @@ trait SharedSparkSessionBase .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) .set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true) .set(SQLConf.CODEGEN_FALLBACK.key, "false") + .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString) // Disable ConvertToLocalRelation for better test coverage. Test cases built on // LocalRelation will exercise the optimization rules better by disabling it as // this rule may potentially block testing of other optimization rules such as diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index a2518e70a013b..f98534eb2b543 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -38,6 +38,7 @@ import org.apache.spark.internal.config.UI._ import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ @@ -58,6 +59,7 @@ object TestHive new SparkConf() .set("spark.sql.test", "") .set(SQLConf.CODEGEN_FALLBACK.key, "false") + .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString) .set(HiveUtils.HIVE_METASTORE_BARRIER_PREFIXES.key, "org.apache.spark.sql.hive.execution.PairSerDe") .set(WAREHOUSE_PATH.key, TestHiveContext.makeWarehouseDir().toURI.getPath) From ce566bed17f94ac3443ebed82ad406b43dbb13c2 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Fri, 11 Sep 2020 20:08:22 -0700 Subject: [PATCH 0009/1009] [SPARK-32180][FOLLOWUP] Fix .rst error in new Pyspark installation guide This simply fixes an .rst generation error in https://github.com/apache/spark/pull/29640 Closes #29735 from srowen/SPARK-32180.2. Authored-by: Sean Owen Signed-off-by: Takuya UESHIN --- python/docs/source/getting_started/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst index a2de0b2e2c9f4..914045e898b2d 100644 --- a/python/docs/source/getting_started/installation.rst +++ b/python/docs/source/getting_started/installation.rst @@ -75,7 +75,7 @@ Note that `PySpark at Conda `_ is avai Official Release Channel ------------------------ -Different flavors of PySpark is available in the `official release channel `_. +Different flavors of PySpark are available in the `Apache Spark website `_. Any suitable version can be downloaded and extracted as below: .. code-block:: bash @@ -97,7 +97,7 @@ Installing from Source To install PySpark from source, refer to `Building Spark `_. -Refer to `Official Release Channel <#official-release-channel>`_ for steps to define ``PYTHONPATH``. +Refer to `steps above <#official-release-channel>`_ to define ``PYTHONPATH``. Dependencies ------------ From 2009f953406aa5b4fdcdcd35f4c7c143f34d53e3 Mon Sep 17 00:00:00 2001 From: "sandeep.katta" Date: Sat, 12 Sep 2020 13:22:54 -0700 Subject: [PATCH 0010/1009] [SPARK-32779][SQL][FOLLOW-UP] Delete Unused code ### What changes were proposed in this pull request? Follow-up PR as per the review comments in [29649](https://github.com/apache/spark/pull/29649/files/8d45542e915bea1b321f42988b407091065a2539#r487140171) ### Why are the changes needed? Delete the un used code ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #29736 from sandeep-katta/deadlockfollowup. Authored-by: sandeep.katta Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/sql/hive/client/HiveShim.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 99c9199e466f9..4ab0599e4477b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -1327,8 +1327,6 @@ private[client] class Shim_v3_0 extends Shim_v2_3 { inheritTableSpecs: Boolean, isSkewedStoreAsSubdir: Boolean, isSrcLocal: Boolean): Unit = { - val session = SparkSession.getActiveSession - assert(session.nonEmpty) val table = hive.getTable(tableName) val loadFileType = if (replace) { clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL")) From bbbd907780cbd07507619bcc6d309e544e0c3471 Mon Sep 17 00:00:00 2001 From: KevinSmile Date: Sat, 12 Sep 2020 16:12:37 -0500 Subject: [PATCH 0011/1009] [SPARK-32804][LAUNCHER] Fix run-example command builder bug ### What changes were proposed in this pull request? Bug fix in run-example command builder (as described in [SPARK-32804], run-example failed in standalone-cluster mode): 1. Missing primaryResource arg. 2. Wrong appResource arg. which will affect `SparkSubmit` in Standalone-Cluster mode: https://github.com/apache/spark/blob/32d87c2b595b4aac2d9274424a43697299638f61/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala#L695-L696 and get error at: https://github.com/apache/spark/blob/f55694638d45f34ab91f6f6ec2066cbf7631f4af/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala#L74-L89 ### Why are the changes needed? Bug: run-example failed in standalone-cluster mode ### Does this PR introduce _any_ user-facing change? Yes. User can run-example in standalone-cluster mode now. ### How was this patch tested? New ut added. Also it's a user-facing bug, so better re-check the real case in [SPARK-32804]. Closes #29653 from KevinSmile/bug-fix-master. Authored-by: KevinSmile Signed-off-by: Sean Owen --- .../launcher/SparkSubmitCommandBuilder.java | 15 +++++++++++++-- .../SparkSubmitCommandBuilderSuite.java | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 383c3f60a595b..43e7f8debe17d 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -139,7 +139,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { case RUN_EXAMPLE: isExample = true; - appResource = SparkLauncher.NO_RESOURCE; + appResource = findExamplesAppJar(); submitArgs = args.subList(1, args.size()); } @@ -241,9 +241,11 @@ List buildSparkSubmitArgs() { } args.addAll(parsedArgs); + if (appResource != null) { args.add(appResource); } + args.addAll(appArgs); return args; @@ -401,6 +403,15 @@ private boolean isThriftServer(String mainClass) { mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2")); } + private String findExamplesAppJar() { + for (String exampleJar : findExamplesJars()) { + if (new File(exampleJar).getName().startsWith("spark-examples")) { + return exampleJar; + } + } + throw new IllegalStateException("Failed to find examples' main app jar."); + } + private List findExamplesJars() { boolean isTesting = "1".equals(getenv("SPARK_TESTING")); List examplesJars = new ArrayList<>(); @@ -513,7 +524,7 @@ protected boolean handleUnknown(String opt) { className = EXAMPLE_CLASS_PREFIX + className; } mainClass = className; - appResource = SparkLauncher.NO_RESOURCE; + appResource = findExamplesAppJar(); return false; } else if (errorOnUnknownArgs) { checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt); diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 752e8d4c23f8b..6cd089e256b93 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -245,6 +245,24 @@ public void testExamplesRunner() throws Exception { assertEquals("42", cmd.get(cmd.size() - 1)); } + @Test + public void testExamplesRunnerPrimaryResource() throws Exception { + List sparkSubmitArgs = Arrays.asList( + SparkSubmitCommandBuilder.RUN_EXAMPLE, + parser.MASTER + "=foo", + parser.DEPLOY_MODE + "=cluster", + "SparkPi", + "100"); + + List cmd = newCommandBuilder(sparkSubmitArgs).buildSparkSubmitArgs(); + assertEquals(SparkSubmitCommandBuilder.EXAMPLE_CLASS_PREFIX + "SparkPi", + findArgValue(cmd, parser.CLASS)); + assertEquals("cluster", findArgValue(cmd, parser.DEPLOY_MODE)); + String primaryResource = cmd.get(cmd.size() - 2); + assertTrue(new File(primaryResource).getName().startsWith("spark-examples")); + assertFalse(cmd.contains(SparkLauncher.NO_RESOURCE)); + } + @Test(expected = IllegalArgumentException.class) public void testMissingAppResource() { new SparkSubmitCommandBuilder().buildSparkSubmitArgs(); From 3be552ccc8d26089881229edc034d2ebf2e75511 Mon Sep 17 00:00:00 2001 From: Karol Chmist Date: Sat, 12 Sep 2020 18:15:15 -0500 Subject: [PATCH 0012/1009] [SPARK-30090][SHELL] Adapt Spark REPL to Scala 2.13 ### What changes were proposed in this pull request? This is an attempt to adapt Spark REPL to Scala 2.13. It is based on a [scala-2.13 branch](https://github.com/smarter/spark/tree/scala-2.13) made by smarter. I had to set Scala version to 2.13 in some places, and to adapt some other modules, before I could start working on the REPL itself. These are separate commits on the branch that probably would be fixed beforehand, and thus dropped before the merge of this PR. I couldn't find a way to run the initialization code with existing REPL classes in Scala 2.13.2, so I [modified REPL in Scala](https://github.com/karolchmist/scala/commit/e9cc0dd54787351587237bbbee37d23ee744894c) to make it work. With this modification I managed to run Spark Shell, along with the units tests passing, which is good news. The bad news is that it requires an upstream change in Scala, which must be accepted first. I'd be happy to change it if someone points a way to do it differently. If not, I'd propose a PR in Scala to introduce `ILoop.internalReplAutorunCode`. ### Why are the changes needed? REPL in Scala changed quite a lot, so current version of Spark REPL needed to be adapted. ### Does this PR introduce _any_ user-facing change? In the previous version of `SparkILoop`, a lot of Scala's `ILoop` code was [overridden and duplicated](https://github.com/apache/spark/commit/2bc7b75537ec81184048738883b282e257cc58de) to make the welcome message a bit more pleasant. In this PR, the message is in a bit different order, but it's still acceptable IMHO. Before this PR: ``` 20/05/15 15:32:39 WARN Utils: Your hostname, hermes resolves to a loopback address: 127.0.1.1; using 192.168.1.28 instead (on interface enp0s31f6) 20/05/15 15:32:39 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 20/05/15 15:32:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 20/05/15 15:32:45 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041. Spark context Web UI available at http://192.168.1.28:4041 Spark context available as 'sc' (master = local[*], app id = local-1589549565502). Spark session available as 'spark'. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 3.0.1-SNAPSHOT /_/ Using Scala version 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_242) Type in expressions to have them evaluated. Type :help for more information. scala> ``` With this PR: ``` 20/05/15 15:32:15 WARN Utils: Your hostname, hermes resolves to a loopback address: 127.0.1.1; using 192.168.1.28 instead (on interface enp0s31f6) 20/05/15 15:32:15 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 20/05/15 15:32:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 3.0.0-SNAPSHOT /_/ Using Scala version 2.13.2-20200422-211118-706ef1b (OpenJDK 64-Bit Server VM, Java 1.8.0_242) Type in expressions to have them evaluated. Type :help for more information. Spark context Web UI available at http://192.168.1.28:4040 Spark context available as 'sc' (master = local[*], app id = local-1589549541259). Spark session available as 'spark'. scala> ``` It seems that currently the welcoming message is still an improvement from [the original ticket](https://issues.apache.org/jira/browse/SPARK-24785), albeit in a different order. As a bonus, some fragile code duplication was removed. ### How was this patch tested? Existing tests pass in `repl`module. The REPL runs in a terminal and the following code executed correctly: ``` scala> spark.range(1000 * 1000 * 1000).count() val res0: Long = 1000000000 ``` Closes #28545 from karolchmist/scala-2.13-repl. Authored-by: Karol Chmist Signed-off-by: Sean Owen --- .../org/apache/spark/repl/Main.scala | 0 .../org/apache/spark/repl/SparkILoop.scala | 0 .../org/apache/spark/repl/Main.scala | 138 ++++++++++++++ .../org/apache/spark/repl/SparkILoop.scala | 149 +++++++++++++++ .../org/apache/spark/repl/Repl2Suite.scala | 58 ++++++ .../spark/repl/SingletonRepl2Suite.scala | 171 ++++++++++++++++++ .../org/apache/spark/repl/Repl2Suite.scala | 53 ++++++ .../spark/repl/SingletonRepl2Suite.scala | 171 ++++++++++++++++++ .../org/apache/spark/repl/ReplSuite.scala | 27 --- .../spark/repl/SingletonReplSuite.scala | 61 ------- .../catalyst/util/CaseInsensitiveMap.scala | 2 +- 11 files changed, 741 insertions(+), 89 deletions(-) rename repl/src/main/{scala => scala-2.12}/org/apache/spark/repl/Main.scala (100%) rename repl/src/main/{scala => scala-2.12}/org/apache/spark/repl/SparkILoop.scala (100%) create mode 100644 repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala create mode 100644 repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala create mode 100644 repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala create mode 100644 repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala create mode 100644 repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala create mode 100644 repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala similarity index 100% rename from repl/src/main/scala/org/apache/spark/repl/Main.scala rename to repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/SparkILoop.scala similarity index 100% rename from repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala rename to repl/src/main/scala-2.12/org/apache/spark/repl/SparkILoop.scala diff --git a/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala new file mode 100644 index 0000000000000..95115934ed1d6 --- /dev/null +++ b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io.File +import java.net.URI +import java.util.Locale + +import scala.tools.nsc.GenericRunnerSettings + +import org.apache.spark._ +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION +import org.apache.spark.util.Utils + +object Main extends Logging { + + initializeLogIfNecessary(true) + Signaling.cancelOnInterrupt() + + val conf = new SparkConf() + val rootDir = + conf.getOption("spark.repl.classdir").getOrElse(Utils.getLocalDir(conf)) + val outputDir = Utils.createTempDir(root = rootDir, namePrefix = "repl") + + var sparkContext: SparkContext = _ + var sparkSession: SparkSession = _ + // this is a public var because tests reset it. + var interp: SparkILoop = _ + + private var hasErrors = false + private var isShellSession = false + + private def scalaOptionError(msg: String): Unit = { + hasErrors = true + // scalastyle:off println + Console.err.println(msg) + // scalastyle:on println + } + + def main(args: Array[String]): Unit = { + isShellSession = true + doMain(args, new SparkILoop) + } + + // Visible for testing + private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = { + interp = _interp + val jars = Utils + .getLocalUserJarsForShell(conf) + // Remove file:///, file:// or file:/ scheme if exists for each jar + .map { x => + if (x.startsWith("file:")) new File(new URI(x)).getPath else x + } + .mkString(File.pathSeparator) + val interpArguments = List( + "-Yrepl-class-based", + "-Yrepl-outdir", + s"${outputDir.getAbsolutePath}", + "-classpath", + jars + ) ++ args.toList + + val settings = new GenericRunnerSettings(scalaOptionError) + settings.processArguments(interpArguments, true) + + if (!hasErrors) { + interp.run(settings) // Repl starts and goes in loop of R.E.P.L + Option(sparkContext).foreach(_.stop) + } + } + + def createSparkSession(): SparkSession = { + try { + val execUri = System.getenv("SPARK_EXECUTOR_URI") + conf.setIfMissing("spark.app.name", "Spark shell") + // SparkContext will detect this configuration and register it with the RpcEnv's + // file server, setting spark.repl.class.uri to the actual URI for executors to + // use. This is sort of ugly but since executors are started as part of SparkContext + // initialization in certain cases, there's an initialization order issue that prevents + // this from being set after SparkContext is instantiated. + conf.set("spark.repl.class.outputDir", outputDir.getAbsolutePath()) + if (execUri != null) { + conf.set("spark.executor.uri", execUri) + } + if (System.getenv("SPARK_HOME") != null) { + conf.setSparkHome(System.getenv("SPARK_HOME")) + } + + val builder = SparkSession.builder.config(conf) + if (conf + .get(CATALOG_IMPLEMENTATION.key, "hive") + .toLowerCase(Locale.ROOT) == "hive") { + if (SparkSession.hiveClassesArePresent) { + // In the case that the property is not set at all, builder's config + // does not have this value set to 'hive' yet. The original default + // behavior is that when there are hive classes, we use hive catalog. + sparkSession = builder.enableHiveSupport().getOrCreate() + logInfo("Created Spark session with Hive support") + } else { + // Need to change it back to 'in-memory' if no hive classes are found + // in the case that the property is set to hive in spark-defaults.conf + builder.config(CATALOG_IMPLEMENTATION.key, "in-memory") + sparkSession = builder.getOrCreate() + logInfo("Created Spark session") + } + } else { + // In the case that the property is set but not to 'hive', the internal + // default is 'in-memory'. So the sparkSession will use in-memory catalog. + sparkSession = builder.getOrCreate() + logInfo("Created Spark session") + } + sparkContext = sparkSession.sparkContext + sparkSession + } catch { + case e: Exception if isShellSession => + logError("Failed to initialize Spark session.", e) + sys.exit(1) + } + } + +} diff --git a/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala new file mode 100644 index 0000000000000..861cf5a740ce1 --- /dev/null +++ b/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io.{BufferedReader, PrintWriter} + +// scalastyle:off println +import scala.Predef.{println => _, _} +import scala.tools.nsc.GenericRunnerSettings +import scala.tools.nsc.Settings +import scala.tools.nsc.interpreter.shell.{ILoop, ShellConfig} +import scala.tools.nsc.util.stringFromStream +import scala.util.Properties.{javaVersion, javaVmName, versionString} +// scalastyle:on println + +/** + * A Spark-specific interactive shell. + */ +class SparkILoop(in0: BufferedReader, out: PrintWriter) + extends ILoop(ShellConfig(new GenericRunnerSettings(_ => ())), in0, out) { + def this() = this(null, new PrintWriter(Console.out, true)) + + val initializationCommands: Seq[String] = Seq( + """ + @transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) { + org.apache.spark.repl.Main.sparkSession + } else { + org.apache.spark.repl.Main.createSparkSession() + } + @transient val sc = { + val _sc = spark.sparkContext + if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) { + val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null) + if (proxyUrl != null) { + println( + s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}") + } else { + println(s"Spark Context Web UI is available at Spark Master Public URL") + } + } else { + _sc.uiWebUrl.foreach { + webUrl => println(s"Spark context Web UI available at ${webUrl}") + } + } + println("Spark context available as 'sc' " + + s"(master = ${_sc.master}, app id = ${_sc.applicationId}).") + println("Spark session available as 'spark'.") + _sc + } + """, + "import org.apache.spark.SparkContext._", + "import spark.implicits._", + "import spark.sql", + "import org.apache.spark.sql.functions._" + ) + + override protected def internalReplAutorunCode(): Seq[String] = + initializationCommands + + def initializeSpark(): Unit = { + if (!intp.reporter.hasErrors) { + // `savingReplayStack` removes the commands from session history. + savingReplayStack { + initializationCommands.foreach(intp quietRun _) + } + } else { + throw new RuntimeException( + s"Scala $versionString interpreter encountered " + + "errors during initialization" + ) + } + } + + /** Print a welcome message */ + override def printWelcome(): Unit = { + import org.apache.spark.SPARK_VERSION + echo("""Welcome to + ____ __ + / __/__ ___ _____/ /__ + _\ \/ _ \/ _ `/ __/ '_/ + /___/ .__/\_,_/_/ /_/\_\ version %s + /_/ + """.format(SPARK_VERSION)) + val welcomeMsg = "Using Scala %s (%s, Java %s)".format( + versionString, + javaVmName, + javaVersion + ) + echo(welcomeMsg) + echo("Type in expressions to have them evaluated.") + echo("Type :help for more information.") + } + + /** Available commands */ + override def commands: List[LoopCommand] = standardCommands + + override def resetCommand(line: String): Unit = { + super.resetCommand(line) + initializeSpark() + echo( + "Note that after :reset, state of SparkSession and SparkContext is unchanged." + ) + } + + override def replay(): Unit = { + initializeSpark() + super.replay() + } +} + +object SparkILoop { + + /** + * Creates an interpreter loop with default settings and feeds + * the given code to it as input. + */ + def run(code: String, sets: Settings = new Settings): String = { + import java.io.{BufferedReader, StringReader, OutputStreamWriter} + + stringFromStream { ostream => + Console.withOut(ostream) { + val input = new BufferedReader(new StringReader(code)) + val output = new PrintWriter(new OutputStreamWriter(ostream), true) + val repl = new SparkILoop(input, output) + + if (sets.classpath.isDefault) { + sets.classpath.value = sys.props("java.class.path") + } + repl.run(sets) + } + } + } + def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString) +} diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala new file mode 100644 index 0000000000000..4ffa8beaf4740 --- /dev/null +++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io._ +import java.nio.file.Files + +import scala.tools.nsc.interpreter.SimpleReader + +import org.apache.log4j.{Level, LogManager, PropertyConfigurator} +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION + +class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll { + test("propagation of local properties") { + // A mock ILoop that doesn't install the SIGINT handler. + class ILoop(out: PrintWriter) extends SparkILoop(None, out) { + settings = new scala.tools.nsc.Settings + settings.usejavacp.value = true + org.apache.spark.repl.Main.interp = this + in = SimpleReader() + } + + val out = new StringWriter() + Main.interp = new ILoop(new PrintWriter(out)) + Main.sparkContext = new SparkContext("local", "repl-test") + Main.interp.createInterpreter() + + Main.sparkContext.setLocalProperty("someKey", "someValue") + + // Make sure the value we set in the caller to interpret is propagated in the thread that + // interprets the command. + Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")") + assert(out.toString.contains("someValue")) + + Main.sparkContext.stop() + System.clearProperty("spark.driver.port") + } +} diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala new file mode 100644 index 0000000000000..a4eff392a2c99 --- /dev/null +++ b/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io._ + +import org.apache.spark.SparkFunSuite + +/** + * A special test suite for REPL that all test cases share one REPL instance. + */ +class SingletonRepl2Suite extends SparkFunSuite { + private val out = new StringWriter() + private val in = new PipedOutputStream() + private var thread: Thread = _ + + private val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath" + private val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH) + + override def beforeAll(): Unit = { + super.beforeAll() + + val classpath = System.getProperty("java.class.path") + System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath) + + Main.conf.set("spark.master", "local-cluster[2,1,1024]") + val interp = new SparkILoop( + new BufferedReader(new InputStreamReader(new PipedInputStream(in))), + new PrintWriter(out)) + + // Forces to create new SparkContext + Main.sparkContext = null + Main.sparkSession = null + + // Starts a new thread to run the REPL interpreter, so that we won't block. + thread = new Thread(() => Main.doMain(Array("-classpath", classpath), interp)) + thread.setDaemon(true) + thread.start() + + waitUntil(() => out.toString.contains("Type :help for more information")) + } + + override def afterAll(): Unit = { + in.close() + thread.join() + if (oldExecutorClasspath != null) { + System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath) + } else { + System.clearProperty(CONF_EXECUTOR_CLASSPATH) + } + super.afterAll() + } + + private def waitUntil(cond: () => Boolean): Unit = { + import scala.concurrent.duration._ + import org.scalatest.concurrent.Eventually._ + + eventually(timeout(50.seconds), interval(500.millis)) { + assert(cond(), "current output: " + out.toString) + } + } + + /** + * Run the given commands string in a globally shared interpreter instance. Note that the given + * commands should not crash the interpreter, to not affect other test cases. + */ + def runInterpreter(input: String): String = { + val currentOffset = out.getBuffer.length() + // append a special statement to the end of the given code, so that we can know what's + // the final output of this code snippet and rely on it to wait until the output is ready. + val timestamp = System.currentTimeMillis() + in.write((input + s"\nval _result_$timestamp = 1\n").getBytes) + in.flush() + val stopMessage = s"_result_$timestamp: Int = 1" + waitUntil(() => out.getBuffer.substring(currentOffset).contains(stopMessage)) + out.getBuffer.substring(currentOffset) + } + + def assertContains(message: String, output: String): Unit = { + val isContain = output.contains(message) + assert(isContain, + "Interpreter output did not contain '" + message + "':\n" + output) + } + + def assertDoesNotContain(message: String, output: String): Unit = { + val isContain = output.contains(message) + assert(!isContain, + "Interpreter output contained '" + message + "':\n" + output) + } + + test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") { + // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that + // object contains an unused non-serializable field. + // Specifically, the closure in this test case contains a directly nested closure, and the + // capture is triggered by the inner closure. + // `ns` should be nulled out, but `topLevelValue` should stay intact. + + // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the + // EOT control character (i.e. Ctrl+D). + // Just write things on a single line to emulate :paste mode. + + // NOTE: in order for this test case to trigger the intended scenario, the following three + // variables need to be in the same "input", which will make the REPL pack them into the + // same REPL line object: + // - ns: a non-serializable state, not accessed by the closure; + // - topLevelValue: a serializable state, accessed by the closure; + // - closure: the starting closure, captures the enclosing REPL line object. + val output = runInterpreter( + """ + |class NotSerializableClass(val x: Int) + |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = + |(j: Int) => { + | (1 to j).flatMap { x => + | (1 to x).map { y => y + topLevelValue } + | } + |} + |val r = sc.parallelize(0 to 2).map(closure).collect + """.stripMargin) + assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + + "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) +// assertContains("r: Array[IndexedSeq[String]] = " + +// "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertDoesNotContain("Exception", output) + } + + test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") { + // Similar to the previous test case, but with indirect closure nesting instead. + // There's still nested closures involved, but the inner closure is indirectly nested in the + // outer closure, with a level of inner class in between them. + // This changes how the inner closure references/captures the outer closure/enclosing `this` + // REPL line object, and covers a different code path in inner closure discovery. + + // `ns` should be nulled out, but `topLevelValue` should stay intact. + + val output = runInterpreter( + """ + |class NotSerializableClass(val x: Int) + |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = + |(j: Int) => { + | class InnerFoo { + | val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue } + | } + | val innerFoo = new InnerFoo + | (1 to j).flatMap(innerFoo.innerClosure) + |} + |val r = sc.parallelize(0 to 2).map(closure).collect + """.stripMargin) + assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + + "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) +// assertContains("r: Array[IndexedSeq[String]] = " + +// "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output) + assertDoesNotContain("Exception", output) + } + + } diff --git a/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala new file mode 100644 index 0000000000000..a93284a129e28 --- /dev/null +++ b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io._ +import java.nio.file.Files + +import org.apache.log4j.{Level, LogManager, PropertyConfigurator} +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION + +class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll { + test("propagation of local properties") { + // A mock ILoop that doesn't install the SIGINT handler. + class ILoop(out: PrintWriter) extends SparkILoop(null, out) + + val out = new StringWriter() + Main.interp = new ILoop(new PrintWriter(out)) + Main.sparkContext = new SparkContext("local", "repl-test") + val settings = new scala.tools.nsc.Settings + settings.usejavacp.value = true + Main.interp.createInterpreter(settings) + + Main.sparkContext.setLocalProperty("someKey", "someValue") + + // Make sure the value we set in the caller to interpret is propagated in the thread that + // interprets the command. + Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")") + assert(out.toString.contains("someValue")) + + Main.sparkContext.stop() + System.clearProperty("spark.driver.port") + } +} diff --git a/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala b/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala new file mode 100644 index 0000000000000..b153a0261aaf5 --- /dev/null +++ b/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.repl + +import java.io._ + +import org.apache.spark.SparkFunSuite + +/** + * A special test suite for REPL that all test cases share one REPL instance. + */ +class SingletonRepl2Suite extends SparkFunSuite { + + private val out = new StringWriter() + private val in = new PipedOutputStream() + private var thread: Thread = _ + + private val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath" + private val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH) + + override def beforeAll(): Unit = { + super.beforeAll() + + val classpath = System.getProperty("java.class.path") + System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath) + + Main.conf.set("spark.master", "local-cluster[2,1,1024]") + val interp = new SparkILoop( + new BufferedReader(new InputStreamReader(new PipedInputStream(in))), + new PrintWriter(out)) + + // Forces to create new SparkContext + Main.sparkContext = null + Main.sparkSession = null + + // Starts a new thread to run the REPL interpreter, so that we won't block. + thread = new Thread(() => Main.doMain(Array("-classpath", classpath), interp)) + thread.setDaemon(true) + thread.start() + + waitUntil(() => out.toString.contains("Type :help for more information")) + } + + override def afterAll(): Unit = { + in.close() + thread.join() + if (oldExecutorClasspath != null) { + System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath) + } else { + System.clearProperty(CONF_EXECUTOR_CLASSPATH) + } + super.afterAll() + } + + private def waitUntil(cond: () => Boolean): Unit = { + import scala.concurrent.duration._ + import org.scalatest.concurrent.Eventually._ + + eventually(timeout(50.seconds), interval(500.millis)) { + assert(cond(), "current output: " + out.toString) + } + } + + /** + * Run the given commands string in a globally shared interpreter instance. Note that the given + * commands should not crash the interpreter, to not affect other test cases. + */ + def runInterpreter(input: String): String = { + val currentOffset = out.getBuffer.length() + // append a special statement to the end of the given code, so that we can know what's + // the final output of this code snippet and rely on it to wait until the output is ready. + val timestamp = System.currentTimeMillis() + in.write((input + s"\nval _result_$timestamp = 1\n").getBytes) + in.flush() + val stopMessage = s"_result_$timestamp: Int = 1" + waitUntil(() => out.getBuffer.substring(currentOffset).contains(stopMessage)) + out.getBuffer.substring(currentOffset) + } + + def assertContains(message: String, output: String): Unit = { + val isContain = output.contains(message) + assert(isContain, + "Interpreter output did not contain '" + message + "':\n" + output) + } + + def assertDoesNotContain(message: String, output: String): Unit = { + val isContain = output.contains(message) + assert(!isContain, + "Interpreter output contained '" + message + "':\n" + output) + } + + test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") { + // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that + // object contains an unused non-serializable field. + // Specifically, the closure in this test case contains a directly nested closure, and the + // capture is triggered by the inner closure. + // `ns` should be nulled out, but `topLevelValue` should stay intact. + + // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the + // EOT control character (i.e. Ctrl+D). + // Just write things on a single line to emulate :paste mode. + + // NOTE: in order for this test case to trigger the intended scenario, the following three + // variables need to be in the same "input", which will make the REPL pack them into the + // same REPL line object: + // - ns: a non-serializable state, not accessed by the closure; + // - topLevelValue: a serializable state, accessed by the closure; + // - closure: the starting closure, captures the enclosing REPL line object. + val output = runInterpreter( + """ + |class NotSerializableClass(val x: Int) + |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = + |(j: Int) => { + | (1 to j).flatMap { x => + | (1 to x).map { y => y + topLevelValue } + | } + |} + |val r = sc.parallelize(0 to 2).map(closure).collect + """.stripMargin) +// assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + +// "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertContains("r: Array[IndexedSeq[String]] = " + + "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertDoesNotContain("Exception", output) + } + + test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") { + // Similar to the previous test case, but with indirect closure nesting instead. + // There's still nested closures involved, but the inner closure is indirectly nested in the + // outer closure, with a level of inner class in between them. + // This changes how the inner closure references/captures the outer closure/enclosing `this` + // REPL line object, and covers a different code path in inner closure discovery. + + // `ns` should be nulled out, but `topLevelValue` should stay intact. + + val output = runInterpreter( + """ + |class NotSerializableClass(val x: Int) + |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = + |(j: Int) => { + | class InnerFoo { + | val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue } + | } + | val innerFoo = new InnerFoo + | (1 to j).flatMap(innerFoo.innerClosure) + |} + |val r = sc.parallelize(0 to 2).map(closure).collect + """.stripMargin) +// assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + +// "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertContains("r: Array[IndexedSeq[String]] = " + + "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) + assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output) + assertDoesNotContain("Exception", output) + } +} diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 1e92b36c336d8..95d908cec5de0 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -20,8 +20,6 @@ package org.apache.spark.repl import java.io._ import java.nio.file.Files -import scala.tools.nsc.interpreter.SimpleReader - import org.apache.log4j.{Level, LogManager, PropertyConfigurator} import org.scalatest.BeforeAndAfterAll @@ -86,31 +84,6 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll { "Interpreter output contained '" + message + "':\n" + output) } - test("propagation of local properties") { - // A mock ILoop that doesn't install the SIGINT handler. - class ILoop(out: PrintWriter) extends SparkILoop(None, out) { - settings = new scala.tools.nsc.Settings - settings.usejavacp.value = true - org.apache.spark.repl.Main.interp = this - in = SimpleReader() - } - - val out = new StringWriter() - Main.interp = new ILoop(new PrintWriter(out)) - Main.sparkContext = new SparkContext("local", "repl-test") - Main.interp.createInterpreter() - - Main.sparkContext.setLocalProperty("someKey", "someValue") - - // Make sure the value we set in the caller to interpret is propagated in the thread that - // interprets the command. - Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")") - assert(out.toString.contains("someValue")) - - Main.sparkContext.stop() - System.clearProperty("spark.driver.port") - } - test("SPARK-15236: use Hive catalog") { // turn on the INFO log so that it is possible the code will dump INFO // entry for using "HiveMetastore" diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala index e11a54bc88070..4795306692f7a 100644 --- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala @@ -380,67 +380,6 @@ class SingletonReplSuite extends SparkFunSuite { assertDoesNotContain("Exception", output) } - test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") { - // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that - // object contains an unused non-serializable field. - // Specifically, the closure in this test case contains a directly nested closure, and the - // capture is triggered by the inner closure. - // `ns` should be nulled out, but `topLevelValue` should stay intact. - - // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the - // EOT control character (i.e. Ctrl+D). - // Just write things on a single line to emulate :paste mode. - - // NOTE: in order for this test case to trigger the intended scenario, the following three - // variables need to be in the same "input", which will make the REPL pack them into the - // same REPL line object: - // - ns: a non-serializable state, not accessed by the closure; - // - topLevelValue: a serializable state, accessed by the closure; - // - closure: the starting closure, captures the enclosing REPL line object. - val output = runInterpreter( - """ - |class NotSerializableClass(val x: Int) - |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = - |(j: Int) => { - | (1 to j).flatMap { x => - | (1 to x).map { y => y + topLevelValue } - | } - |} - |val r = sc.parallelize(0 to 2).map(closure).collect - """.stripMargin) - assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + - "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) - assertDoesNotContain("Exception", output) - } - - test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") { - // Similar to the previous test case, but with indirect closure nesting instead. - // There's still nested closures involved, but the inner closure is indirectly nested in the - // outer closure, with a level of inner class in between them. - // This changes how the inner closure references/captures the outer closure/enclosing `this` - // REPL line object, and covers a different code path in inner closure discovery. - - // `ns` should be nulled out, but `topLevelValue` should stay intact. - - val output = runInterpreter( - """ - |class NotSerializableClass(val x: Int) - |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure = - |(j: Int) => { - | class InnerFoo { - | val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue } - | } - | val innerFoo = new InnerFoo - | (1 to j).flatMap(innerFoo.innerClosure) - |} - |val r = sc.parallelize(0 to 2).map(closure).collect - """.stripMargin) - assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " + - "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output) - assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output) - assertDoesNotContain("Exception", output) - } - test("newProductSeqEncoder with REPL defined class") { val output = runInterpreter( """ diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala index 352e5a4c59048..e18a01810d2eb 100644 --- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala +++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala @@ -43,7 +43,7 @@ class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) extends Ma new CaseInsensitiveMap[B1](originalMap.filter(!_._1.equalsIgnoreCase(key)) + (key -> value)) } - override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = this.updated(kv._1, kv._2) + override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = this.updated(kv._1, kv._2) def ++(xs: IterableOnce[(String, T)]): CaseInsensitiveMap[T] = { xs.iterator.foldLeft(this) { (m, kv) => m.updated(kv._1, kv._2) } From 3d08084022a4365966526216a616a3b760450884 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 12 Sep 2020 21:34:35 -0700 Subject: [PATCH 0013/1009] [SPARK-24994][SQL] Add UnwrapCastInBinaryComparison optimizer to simplify literal types ### What changes were proposed in this pull request? Currently, in cases like the following: ```sql SELECT * FROM t WHERE age < 40 ``` where `age` is of short type, Spark won't be able to simplify this and can only generate filter `cast(age, int) < 40`. This won't get pushed down to datasources and therefore is not optimized. This PR proposes a optimizer rule to improve this when the following constraints are satisfied: - input expression is binary comparisons when one side is a cast operation and another is a literal. - both the cast child expression and literal are of integral type (i.e., byte, short, int or long) When this is true, it tries to do several optimizations to either simplify the expression or move the cast to the literal side, so result filter for the above case becomes `age < cast(40 as smallint)`. This is better since the cast can be optimized away later and the filter can be pushed down to data sources. This PR follows a similar effort in Presto (https://prestosql.io/blog/2019/05/21/optimizing-the-casts-away.html). Here we only handles integral types but plan to extend to other types as follow-ups. ### Why are the changes needed? As mentioned in the previous section, when cast is not optimized, it cannot be pushed down to data sources which can lead to unnecessary IO and therefore longer job time and waste of resources. This helps to improve that. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests for both the optimizer rule and filter pushdown on datasource level for both Orc and Parquet. Closes #29565 from sunchao/SPARK-24994. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../UnwrapCastInBinaryComparison.scala | 236 ++++++++++++++++++ .../UnwrapCastInBinaryComparisonSuite.scala | 161 ++++++++++++ .../spark/sql/FileBasedDataSourceSuite.scala | 90 ++++++- 4 files changed, 487 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 296fe86e834e5..9216ab1631e7b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -107,6 +107,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RewriteCorrelatedScalarSubquery, EliminateSerialization, RemoveRedundantAliases, + UnwrapCastInBinaryComparison, RemoveNoopOperators, CombineWithFields, SimplifyExtractValueOps, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala new file mode 100644 index 0000000000000..89f7c0f71b7ac --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types._ + +/** + * Unwrap casts in binary comparison operations with patterns like following: + * + * `BinaryComparison(Cast(fromExp, toType), Literal(value, toType))` + * or + * `BinaryComparison(Literal(value, toType), Cast(fromExp, toType))` + * + * This rule optimizes expressions with the above pattern by either replacing the cast with simpler + * constructs, or moving the cast from the expression side to the literal side, which enables them + * to be optimized away later and pushed down to data sources. + * + * Currently this only handles cases where: + * 1). `fromType` (of `fromExp`) and `toType` are of integral types (i.e., byte, short, int and + * long) + * 2). `fromType` can be safely coerced to `toType` without precision loss (e.g., short to int, + * int to long, but not long to int) + * + * If the above conditions are satisfied, the rule checks to see if the literal `value` is within + * range `(min, max)`, where `min` and `max` are the minimum and maximum value of `fromType`, + * respectively. If this is true then it means we can safely cast `value` to `fromType` and thus + * able to move the cast to the literal side. That is: + * + * `cast(fromExp, toType) op value` ==> `fromExp op cast(value, fromType)` + * + * If the `value` is not within range `(min, max)`, the rule breaks the scenario into different + * cases and try to replace each with simpler constructs. + * + * if `value > max`, the cases are of following: + * - `cast(fromExp, toType) > value` ==> if(isnull(fromExp), null, false) + * - `cast(fromExp, toType) >= value` ==> if(isnull(fromExp), null, false) + * - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false) + * - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic) + * - `cast(fromExp, toType) <=> value` ==> cast(fromExp, toType) <=> value (if `fromExp` is + * non-deterministic) + * - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true) + * - `cast(fromExp, toType) < value` ==> if(isnull(fromExp), null, true) + * + * if `value == max`, the cases are of following: + * - `cast(fromExp, toType) > value` ==> if(isnull(fromExp), null, false) + * - `cast(fromExp, toType) >= value` ==> fromExp == max + * - `cast(fromExp, toType) === value` ==> fromExp == max + * - `cast(fromExp, toType) <=> value` ==> fromExp <=> max + * - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true) + * - `cast(fromExp, toType) < value` ==> fromExp =!= max + * + * Similarly for the cases when `value == min` and `value < min`. + * + * Further, the above `if(isnull(fromExp), null, false)` is represented using conjunction + * `and(isnull(fromExp), null)`, to enable further optimization and filter pushdown to data sources. + * Similarly, `if(isnull(fromExp), null, true)` is represented with `or(isnotnull(fromExp), null)`. + */ +object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case l: LogicalPlan => + l transformExpressionsUp { + case e @ BinaryComparison(_, _) => unwrapCast(e) + } + } + + private def unwrapCast(exp: Expression): Expression = exp match { + // Not a canonical form. In this case we first canonicalize the expression by swapping the + // literal and cast side, then process the result and swap the literal and cast again to + // restore the original order. + case BinaryComparison(Literal(_, literalType), Cast(fromExp, toType, _)) + if canImplicitlyCast(fromExp, toType, literalType) => + def swap(e: Expression): Expression = e match { + case GreaterThan(left, right) => LessThan(right, left) + case GreaterThanOrEqual(left, right) => LessThanOrEqual(right, left) + case EqualTo(left, right) => EqualTo(right, left) + case EqualNullSafe(left, right) => EqualNullSafe(right, left) + case LessThanOrEqual(left, right) => GreaterThanOrEqual(right, left) + case LessThan(left, right) => GreaterThan(right, left) + case _ => e + } + + swap(unwrapCast(swap(exp))) + + // In case both sides have integral type, optimize the comparison by removing casts or + // moving cast to the literal side. + case be @ BinaryComparison( + Cast(fromExp, toType: IntegralType, _), Literal(value, literalType)) + if canImplicitlyCast(fromExp, toType, literalType) => + simplifyIntegralComparison(be, fromExp, toType, value) + + case _ => exp + } + + /** + * Check if the input `value` is within range `(min, max)` of the `fromType`, where `min` and + * `max` are the minimum and maximum value of the `fromType`. If the above is true, this + * optimizes the expression by moving the cast to the literal side. Otherwise if result is not + * true, this replaces the input binary comparison `exp` with simpler expressions. + */ + private def simplifyIntegralComparison( + exp: BinaryComparison, + fromExp: Expression, + toType: IntegralType, + value: Any): Expression = { + + val fromType = fromExp.dataType + val (min, max) = getRange(fromType) + val (minInToType, maxInToType) = { + (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval()) + } + val ordering = toType.ordering.asInstanceOf[Ordering[Any]] + val minCmp = ordering.compare(value, minInToType) + val maxCmp = ordering.compare(value, maxInToType) + + if (maxCmp > 0) { + exp match { + case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) => + falseIfNotNull(fromExp) + case LessThan(_, _) | LessThanOrEqual(_, _) => + trueIfNotNull(fromExp) + // make sure the expression is evaluated if it is non-deterministic + case EqualNullSafe(_, _) if exp.deterministic => + FalseLiteral + case _ => exp + } + } else if (maxCmp == 0) { + exp match { + case GreaterThan(_, _) => + falseIfNotNull(fromExp) + case LessThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case LessThan(_, _) => + Not(EqualTo(fromExp, Literal(max, fromType))) + case GreaterThanOrEqual(_, _) | EqualTo(_, _) => + EqualTo(fromExp, Literal(max, fromType)) + case EqualNullSafe(_, _) => + EqualNullSafe(fromExp, Literal(max, fromType)) + case _ => exp + } + } else if (minCmp < 0) { + exp match { + case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) => + falseIfNotNull(fromExp) + // make sure the expression is evaluated if it is non-deterministic + case EqualNullSafe(_, _) if exp.deterministic => + FalseLiteral + case _ => exp + } + } else if (minCmp == 0) { + exp match { + case LessThan(_, _) => + falseIfNotNull(fromExp) + case GreaterThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case GreaterThan(_, _) => + Not(EqualTo(fromExp, Literal(min, fromType))) + case LessThanOrEqual(_, _) | EqualTo(_, _) => + EqualTo(fromExp, Literal(min, fromType)) + case EqualNullSafe(_, _) => + EqualNullSafe(fromExp, Literal(min, fromType)) + case _ => exp + } + } else { + // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the + // literal side. + val lit = Cast(Literal(value), fromType) + exp match { + case GreaterThan(_, _) => GreaterThan(fromExp, lit) + case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit) + case EqualTo(_, _) => EqualTo(fromExp, lit) + case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit) + case LessThan(_, _) => LessThan(fromExp, lit) + case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit) + case _ => exp + } + } + } + + /** + * Check if the input `fromExp` can be safely cast to `toType` without any loss of precision, + * i.e., the conversion is injective. Note this only handles the case when both sides are of + * integral type. + */ + private def canImplicitlyCast(fromExp: Expression, toType: DataType, + literalType: DataType): Boolean = { + toType.sameType(literalType) && + fromExp.dataType.isInstanceOf[IntegralType] && + toType.isInstanceOf[IntegralType] && + Cast.canUpCast(fromExp.dataType, toType) + } + + private def getRange(dt: DataType): (Any, Any) = dt match { + case ByteType => (Byte.MinValue, Byte.MaxValue) + case ShortType => (Short.MinValue, Short.MaxValue) + case IntegerType => (Int.MinValue, Int.MaxValue) + case LongType => (Long.MinValue, Long.MaxValue) + case other => throw new IllegalArgumentException(s"Unsupported type: ${other.catalogString}") + } + + /** + * Wraps input expression `e` with `if(isnull(e), null, false)`. The if-clause is represented + * using `and(isnull(e), null)` which is semantically equivalent by applying 3-valued logic. + */ + private[optimizer] def falseIfNotNull(e: Expression): Expression = { + And(IsNull(e), Literal(null, BooleanType)) + } + + /** + * Wraps input expression `e` with `if(isnull(e), null, true)`. The if-clause is represented + * using `or(isnotnull(e), null)` which is semantically equivalent by applying 3-valued logic. + */ + private[optimizer] def trueIfNotNull(e: Expression): Expression = { + Or(IsNotNull(e), Literal(null, BooleanType)) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala new file mode 100644 index 0000000000000..387964088b808 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils._ +import org.apache.spark.sql.catalyst.expressions.aggregate.First +import org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.types.{BooleanType, ByteType, DoubleType, IntegerType} + +class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelper { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches: List[Batch] = + Batch("Unwrap casts in binary comparison", FixedPoint(10), + NullPropagation, ConstantFolding, UnwrapCastInBinaryComparison) :: Nil + } + + val testRelation: LocalRelation = LocalRelation('a.short, 'b.float) + val f: BoundReference = 'a.short.canBeNull.at(0) + + test("unwrap casts when literal == max") { + val v = Short.MaxValue + assertEquivalent(castInt(f) > v.toInt, falseIfNotNull(f)) + assertEquivalent(castInt(f) >= v.toInt, f === v) + assertEquivalent(castInt(f) === v.toInt, f === v) + assertEquivalent(castInt(f) <=> v.toInt, f <=> v) + assertEquivalent(castInt(f) <= v.toInt, trueIfNotNull(f)) + assertEquivalent(castInt(f) < v.toInt, f =!= v) + } + + test("unwrap casts when literal > max") { + val v: Int = positiveInt + assertEquivalent(castInt(f) > v, falseIfNotNull(f)) + assertEquivalent(castInt(f) >= v, falseIfNotNull(f)) + assertEquivalent(castInt(f) === v, falseIfNotNull(f)) + assertEquivalent(castInt(f) <=> v, false) + assertEquivalent(castInt(f) <= v, trueIfNotNull(f)) + assertEquivalent(castInt(f) < v, trueIfNotNull(f)) + } + + test("unwrap casts when literal == min") { + val v = Short.MinValue + assertEquivalent(castInt(f) > v.toInt, f =!= v) + assertEquivalent(castInt(f) >= v.toInt, trueIfNotNull(f)) + assertEquivalent(castInt(f) === v.toInt, f === v) + assertEquivalent(castInt(f) <=> v.toInt, f <=> v) + assertEquivalent(castInt(f) <= v.toInt, f === v) + assertEquivalent(castInt(f) < v.toInt, falseIfNotNull(f)) + } + + test("unwrap casts when literal < min") { + val v: Int = negativeInt + assertEquivalent(castInt(f) > v, trueIfNotNull(f)) + assertEquivalent(castInt(f) >= v, trueIfNotNull(f)) + assertEquivalent(castInt(f) === v, falseIfNotNull(f)) + assertEquivalent(castInt(f) <=> v, false) + assertEquivalent(castInt(f) <= v, falseIfNotNull(f)) + assertEquivalent(castInt(f) < v, falseIfNotNull(f)) + } + + test("unwrap casts when literal is within range (min, max)") { + assertEquivalent(castInt(f) > 300, f > 300.toShort) + assertEquivalent(castInt(f) >= 500, f >= 500.toShort) + assertEquivalent(castInt(f) === 32766, f === 32766.toShort) + assertEquivalent(castInt(f) <=> 32766, f <=> 32766.toShort) + assertEquivalent(castInt(f) <= -6000, f <= -6000.toShort) + assertEquivalent(castInt(f) < -32767, f < -32767.toShort) + } + + test("unwrap casts when cast is on rhs") { + val v = Short.MaxValue + assertEquivalent(Literal(v.toInt) < castInt(f), falseIfNotNull(f)) + assertEquivalent(Literal(v.toInt) <= castInt(f), Literal(v) === f) + assertEquivalent(Literal(v.toInt) === castInt(f), Literal(v) === f) + assertEquivalent(Literal(v.toInt) <=> castInt(f), Literal(v) <=> f) + assertEquivalent(Literal(v.toInt) >= castInt(f), trueIfNotNull(f)) + assertEquivalent(Literal(v.toInt) > castInt(f), f =!= v) + + assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort) <= f) + } + + test("unwrap cast should have no effect when input is not integral type") { + Seq( + castDouble('b) > 42.0, + castDouble('b) >= 42.0, + castDouble('b) === 42.0, + castDouble('b) <=> 42.0, + castDouble('b) <= 42.0, + castDouble('b) < 42.0, + Literal(42.0) > castDouble('b), + Literal(42.0) >= castDouble('b), + Literal(42.0) === castDouble('b), + Literal(42.0) <=> castDouble('b), + Literal(42.0) <= castDouble('b), + Literal(42.0) < castDouble('b) + ).foreach(e => + assertEquivalent(e, e, evaluate = false) + ) + } + + test("unwrap cast should skip when expression is non-deterministic") { + Seq(positiveInt, negativeInt).foreach (v => { + val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v + assertEquivalent(e, e, evaluate = false) + }) + } + + test("unwrap casts when literal is null") { + val intLit = Literal.create(null, IntegerType) + val nullLit = Literal.create(null, BooleanType) + assertEquivalent(castInt(f) > intLit, nullLit) + assertEquivalent(castInt(f) >= intLit, nullLit) + assertEquivalent(castInt(f) === intLit, nullLit) + assertEquivalent(castInt(f) <=> intLit, IsNull(castInt(f))) + assertEquivalent(castInt(f) <= intLit, nullLit) + assertEquivalent(castInt(f) < intLit, nullLit) + } + + test("unwrap cast should skip if cannot coerce type") { + assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte) + } + + private def castInt(e: Expression): Expression = Cast(e, IntegerType) + + private def castDouble(e: Expression): Expression = Cast(e, DoubleType) + + private def assertEquivalent(e1: Expression, e2: Expression, evaluate: Boolean = true): Unit = { + val plan = testRelation.where(e1).analyze + val actual = Optimize.execute(plan) + val expected = testRelation.where(e2).analyze + comparePlans(actual, expected) + + if (evaluate) { + Seq(100.toShort, -300.toShort, null).foreach(v => { + val row = create_row(v) + checkEvaluation(e1, e2.eval(row), row) + }) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index a3cd0c230d8af..48b2e22457e3c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -31,12 +31,14 @@ import org.apache.spark.SparkException import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT} import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.FilePartition import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan} -import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable +import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan +import org.apache.spark.sql.execution.datasources.v2.parquet.{ParquetScan, ParquetTable} import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf @@ -881,6 +883,92 @@ class FileBasedDataSourceSuite extends QueryTest } } } + + test("test casts pushdown on orc/parquet for integral types") { + def checkPushedFilters( + format: String, + df: DataFrame, + filters: Array[sources.Filter], + noScan: Boolean = false): Unit = { + val scanExec = df.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec]) + if (noScan) { + assert(scanExec.isEmpty) + return + } + val scan = scanExec.get.asInstanceOf[BatchScanExec].scan + format match { + case "orc" => + assert(scan.isInstanceOf[OrcScan]) + assert(scan.asInstanceOf[OrcScan].pushedFilters === filters) + case "parquet" => + assert(scan.isInstanceOf[ParquetScan]) + assert(scan.asInstanceOf[ParquetScan].pushedFilters === filters) + case _ => + fail(s"unknown format $format") + } + } + + Seq("orc", "parquet").foreach { format => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") { + withTempPath { dir => + spark.range(100).map(i => (i.toShort, i.toString)).toDF("id", "s") + .write + .format(format) + .save(dir.getCanonicalPath) + val df = spark.read.format(format).load(dir.getCanonicalPath) + + // cases when value == MAX + var v = Short.MaxValue + checkPushedFilters(format, df.where('id > v.toInt), Array(), noScan = true) + checkPushedFilters(format, df.where('id >= v.toInt), Array(sources.IsNotNull("id"), + sources.EqualTo("id", v))) + checkPushedFilters(format, df.where('id === v.toInt), Array(sources.IsNotNull("id"), + sources.EqualTo("id", v))) + checkPushedFilters(format, df.where('id <=> v.toInt), + Array(sources.EqualNullSafe("id", v))) + checkPushedFilters(format, df.where('id <= v.toInt), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where('id < v.toInt), Array(sources.IsNotNull("id"), + sources.Not(sources.EqualTo("id", v)))) + + // cases when value > MAX + var v1: Int = positiveInt + checkPushedFilters(format, df.where('id > v1), Array(), noScan = true) + checkPushedFilters(format, df.where('id >= v1), Array(), noScan = true) + checkPushedFilters(format, df.where('id === v1), Array(), noScan = true) + checkPushedFilters(format, df.where('id <=> v1), Array(), noScan = true) + checkPushedFilters(format, df.where('id <= v1), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where('id < v1), Array(sources.IsNotNull("id"))) + + // cases when value = MIN + v = Short.MinValue + checkPushedFilters(format, df.where(lit(v.toInt) < 'id), Array(sources.IsNotNull("id"), + sources.Not(sources.EqualTo("id", v)))) + checkPushedFilters(format, df.where(lit(v.toInt) <= 'id), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v.toInt) === 'id), Array(sources.IsNotNull("id"), + sources.EqualTo("id", v))) + checkPushedFilters(format, df.where(lit(v.toInt) <=> 'id), + Array(sources.EqualNullSafe("id", v))) + checkPushedFilters(format, df.where(lit(v.toInt) >= 'id), Array(sources.IsNotNull("id"), + sources.EqualTo("id", v))) + checkPushedFilters(format, df.where(lit(v.toInt) > 'id), Array(), noScan = true) + + // cases when value < MIN + v1 = negativeInt + checkPushedFilters(format, df.where(lit(v1) < 'id), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v1) <= 'id), Array(sources.IsNotNull("id"))) + checkPushedFilters(format, df.where(lit(v1) === 'id), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v1) >= 'id), Array(), noScan = true) + checkPushedFilters(format, df.where(lit(v1) > 'id), Array(), noScan = true) + + // cases when value is within range (MIN, MAX) + checkPushedFilters(format, df.where('id > 30), Array(sources.IsNotNull("id"), + sources.GreaterThan("id", 30))) + checkPushedFilters(format, df.where(lit(100) >= 'id), Array(sources.IsNotNull("id"), + sources.LessThanOrEqual("id", 100))) + } + } + } + } } object TestingUDT { From 0549c20c6fccc7ff412818d7352b141590f88b1b Mon Sep 17 00:00:00 2001 From: "bowen.li" Date: Sat, 12 Sep 2020 21:45:55 -0700 Subject: [PATCH 0014/1009] [SPARK-32865][DOC] python section in quickstart page doesn't display SPARK_VERSION correctly ### What changes were proposed in this pull request? In https://github.com/apache/spark/blame/master/docs/quick-start.md#L402,it should be `{{site.SPARK_VERSION}}` rather than `{site.SPARK_VERSION}` ### Why are the changes needed? SPARK_VERSION isn't displayed correctly, as shown below ![image](https://user-images.githubusercontent.com/1892692/93006726-d03c8680-f514-11ea-85e3-1d7cfb682ef2.png) ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? tested locally, as shown below ![image](https://user-images.githubusercontent.com/1892692/93006712-a6835f80-f514-11ea-8d78-6831c9d65265.png) Closes #29738 from bowenli86/doc. Authored-by: bowen.li Signed-off-by: Dongjoon Hyun --- docs/quick-start.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index e7a16a3461653..557fc187fb81d 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -399,7 +399,7 @@ If you are building a packaged PySpark application or library you can add it to {% highlight python %} install_requires=[ - 'pyspark=={site.SPARK_VERSION}' + 'pyspark=={{site.SPARK_VERSION}}' ] {% endhighlight %} From a6d6ea3efedbad14d99c24143834cd4e2e52fb40 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 12 Sep 2020 22:19:30 -0700 Subject: [PATCH 0015/1009] [SPARK-32802][SQL] Avoid using SpecificInternalRow in RunLengthEncoding#Encoder ### What changes were proposed in this pull request? Currently `RunLengthEncoding#Encoder` uses `SpecificInternalRow` as a holder for the current value when calculating compression stats and doing the actual compression. It calls `ColumnType.copyField` and `ColumnType.getField` on the internal row which incurs extra cost comparing to directly operating on the internal type. This proposes to replace the `SpecificInternalRow` with `T#InternalType` to avoid the extra cost. ### Why are the changes needed? Operating on `SpecificInternalRow` carries certain cost and negatively impact performance when using `RunLengthEncoding` for compression. With the change I see some improvements through `CompressionSchemeBenchmark`: ```diff Intel(R) Core(TM) i9-9880H CPU 2.30GHz BOOLEAN Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 1 1 0 51957.0 0.0 1.0X -RunLengthEncoding(2.502) 549 555 9 122.2 8.2 0.0X -BooleanBitSet(0.125) 296 301 3 226.6 4.4 0.0X +PassThrough(1.000) 2 2 0 42985.4 0.0 1.0X +RunLengthEncoding(2.517) 487 500 10 137.7 7.3 0.0X +BooleanBitSet(0.125) 348 353 4 192.8 5.2 0.0X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz SHORT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 3 3 0 22779.9 0.0 1.0X -RunLengthEncoding(1.520) 1186 1192 9 56.6 17.7 0.0X +PassThrough(1.000) 3 4 0 21216.6 0.0 1.0X +RunLengthEncoding(1.493) 882 931 50 76.1 13.1 0.0X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz SHORT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 3 4 0 21352.2 0.0 1.0X -RunLengthEncoding(2.009) 1173 1175 3 57.2 17.5 0.0X +PassThrough(1.000) 3 3 0 22388.6 0.0 1.0X +RunLengthEncoding(2.015) 924 941 23 72.6 13.8 0.0X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz INT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 9 10 1 7410.1 0.1 1.0X -RunLengthEncoding(1.000) 1499 1502 4 44.8 22.3 0.0X -DictionaryEncoding(0.500) 621 630 11 108.0 9.3 0.0X -IntDelta(0.250) 134 149 10 502.0 2.0 0.1X +PassThrough(1.000) 9 10 1 7575.9 0.1 1.0X +RunLengthEncoding(1.002) 952 966 12 70.5 14.2 0.0X +DictionaryEncoding(0.500) 561 567 6 119.7 8.4 0.0X +IntDelta(0.250) 129 134 3 521.9 1.9 0.1X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz INT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 9 10 1 7668.3 0.1 1.0X -RunLengthEncoding(1.332) 1561 1685 175 43.0 23.3 0.0X -DictionaryEncoding(0.501) 616 642 21 108.9 9.2 0.0X -IntDelta(0.250) 126 131 2 533.4 1.9 0.1X +PassThrough(1.000) 9 10 1 7494.1 0.1 1.0X +RunLengthEncoding(1.336) 974 987 13 68.9 14.5 0.0X +DictionaryEncoding(0.501) 709 719 10 94.6 10.6 0.0X +IntDelta(0.250) 127 132 4 528.4 1.9 0.1X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz LONG Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 18 19 1 3803.0 0.3 1.0X -RunLengthEncoding(0.754) 1526 1540 20 44.0 22.7 0.0X -DictionaryEncoding(0.250) 735 759 33 91.3 11.0 0.0X -LongDelta(0.125) 126 129 2 530.8 1.9 0.1X +PassThrough(1.000) 19 21 1 3543.5 0.3 1.0X +RunLengthEncoding(0.747) 1049 1058 12 63.9 15.6 0.0X +DictionaryEncoding(0.250) 620 634 17 108.2 9.2 0.0X +LongDelta(0.125) 129 132 2 520.1 1.9 0.1X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz LONG Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 18 20 1 3705.4 0.3 1.0X -RunLengthEncoding(1.002) 1665 1669 6 40.3 24.8 0.0X -DictionaryEncoding(0.251) 890 901 11 75.4 13.3 0.0X -LongDelta(0.125) 125 130 3 537.2 1.9 0.1X +PassThrough(1.000) 18 20 2 3726.8 0.3 1.0X +RunLengthEncoding(0.999) 1076 1077 2 62.4 16.0 0.0X +DictionaryEncoding(0.251) 904 919 19 74.3 13.5 0.0X +LongDelta(0.125) 125 131 4 536.5 1.9 0.1X OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 Intel(R) Core(TM) i9-9880H CPU 2.30GHz STRING Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 27 30 2 2497.1 0.4 1.0X -RunLengthEncoding(0.892) 3443 3587 204 19.5 51.3 0.0X -DictionaryEncoding(0.167) 2286 2290 6 29.4 34.1 0.0X +PassThrough(1.000) 28 31 2 2430.2 0.4 1.0X +RunLengthEncoding(0.889) 1798 1800 3 37.3 26.8 0.0X +DictionaryEncoding(0.167) 1956 1959 4 34.3 29.1 0.0X ``` In the above diff, new results are with changes in this PR. It can be seen that encoding performance has improved quite a lot especially for string type. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Relies on existing unit tests. Closes #29654 from sunchao/SPARK-32802. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- ...mpressionSchemeBenchmark-jdk11-results.txt | 168 +++++++++--------- .../CompressionSchemeBenchmark-results.txt | 168 +++++++++--------- .../compression/compressionSchemes.scala | 27 ++- 3 files changed, 179 insertions(+), 184 deletions(-) diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt index 4fd57a9e95560..d6a5a7d11c23b 100644 --- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt @@ -2,136 +2,136 @@ Compression Scheme Benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BOOLEAN Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 3 3 0 21087.3 0.0 1.0X -RunLengthEncoding(2.514) 739 739 1 90.8 11.0 0.0X -BooleanBitSet(0.125) 378 379 1 177.4 5.6 0.0X +PassThrough(1.000) 1 1 0 53450.1 0.0 1.0X +RunLengthEncoding(2.496) 533 545 10 125.8 7.9 0.0X +BooleanBitSet(0.125) 287 293 6 234.2 4.3 0.0X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BOOLEAN Decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 147 147 1 456.1 2.2 1.0X -RunLengthEncoding 731 732 1 91.8 10.9 0.2X -BooleanBitSet 1410 1411 1 47.6 21.0 0.1X +PassThrough 105 108 2 638.6 1.6 1.0X +RunLengthEncoding 490 497 6 136.8 7.3 0.2X +BooleanBitSet 911 914 4 73.7 13.6 0.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 7 7 0 9729.9 0.1 1.0X -RunLengthEncoding(1.491) 1576 1576 1 42.6 23.5 0.0X +PassThrough(1.000) 3 3 0 20673.0 0.0 1.0X +RunLengthEncoding(1.495) 750 757 9 89.5 11.2 0.0X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1151 1152 1 58.3 17.2 1.0X -RunLengthEncoding 1619 1621 3 41.4 24.1 0.7X +PassThrough 637 647 7 105.3 9.5 1.0X +RunLengthEncoding 1056 1069 17 63.5 15.7 0.6X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 7 7 0 10135.7 0.1 1.0X -RunLengthEncoding(2.010) 1659 1660 0 40.4 24.7 0.0X +PassThrough(1.000) 3 3 0 21332.2 0.0 1.0X +RunLengthEncoding(2.004) 768 783 15 87.4 11.4 0.0X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1151 1151 1 58.3 17.2 1.0X -RunLengthEncoding 1655 1655 0 40.5 24.7 0.7X +PassThrough 640 643 4 104.9 9.5 1.0X +RunLengthEncoding 1073 1078 6 62.5 16.0 0.6X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 23 23 0 2952.8 0.3 1.0X -RunLengthEncoding(0.997) 2356 2356 0 28.5 35.1 0.0X -DictionaryEncoding(0.500) 1402 1402 0 47.9 20.9 0.0X -IntDelta(0.250) 213 213 0 315.2 3.2 0.1X +PassThrough(1.000) 9 9 1 7640.9 0.1 1.0X +RunLengthEncoding(1.003) 882 883 2 76.1 13.1 0.0X +DictionaryEncoding(0.500) 587 624 33 114.3 8.7 0.0X +IntDelta(0.250) 122 127 5 549.8 1.8 0.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1319 1319 1 50.9 19.7 1.0X -RunLengthEncoding 1803 1806 5 37.2 26.9 0.7X -DictionaryEncoding 931 931 0 72.1 13.9 1.4X -IntDelta 817 821 4 82.2 12.2 1.6X +PassThrough 684 709 27 98.1 10.2 1.0X +RunLengthEncoding 1068 1075 10 62.8 15.9 0.6X +DictionaryEncoding 517 526 6 129.8 7.7 1.3X +IntDelta 541 545 4 124.0 8.1 1.3X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 23 23 0 2976.8 0.3 1.0X -RunLengthEncoding(1.337) 2552 2552 1 26.3 38.0 0.0X -DictionaryEncoding(0.501) 1377 1377 0 48.7 20.5 0.0X -IntDelta(0.250) 213 214 2 315.3 3.2 0.1X +PassThrough(1.000) 9 10 1 7475.0 0.1 1.0X +RunLengthEncoding(1.339) 908 922 12 73.9 13.5 0.0X +DictionaryEncoding(0.501) 629 652 16 106.6 9.4 0.0X +IntDelta(0.250) 124 128 3 542.5 1.8 0.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1438 1439 1 46.7 21.4 1.0X -RunLengthEncoding 1987 1988 1 33.8 29.6 0.7X -DictionaryEncoding 1249 1250 0 53.7 18.6 1.2X -IntDelta 1135 1136 3 59.2 16.9 1.3X +PassThrough 778 783 8 86.3 11.6 1.0X +RunLengthEncoding 1217 1217 1 55.2 18.1 0.6X +DictionaryEncoding 690 704 12 97.2 10.3 1.1X +IntDelta 691 699 13 97.1 10.3 1.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 45 45 0 1487.9 0.7 1.0X -RunLengthEncoding(0.750) 2496 2496 1 26.9 37.2 0.0X -DictionaryEncoding(0.250) 1433 1433 1 46.8 21.4 0.0X -LongDelta(0.125) 215 215 0 312.6 3.2 0.2X +PassThrough(1.000) 18 19 1 3772.0 0.3 1.0X +RunLengthEncoding(0.750) 985 987 2 68.1 14.7 0.0X +DictionaryEncoding(0.250) 665 668 4 100.9 9.9 0.0X +LongDelta(0.125) 124 128 2 539.4 1.9 0.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1468 1469 1 45.7 21.9 1.0X -RunLengthEncoding 1977 1981 6 33.9 29.5 0.7X -DictionaryEncoding 1248 1250 3 53.8 18.6 1.2X -LongDelta 838 840 2 80.1 12.5 1.8X +PassThrough 837 841 7 80.2 12.5 1.0X +RunLengthEncoding 1177 1180 4 57.0 17.5 0.7X +DictionaryEncoding 741 747 7 90.6 11.0 1.1X +LongDelta 509 520 13 131.8 7.6 1.6X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 47 47 0 1437.2 0.7 1.0X -RunLengthEncoding(1.002) 2743 2744 0 24.5 40.9 0.0X -DictionaryEncoding(0.251) 2016 2016 0 33.3 30.0 0.0X -LongDelta(0.125) 215 217 5 312.1 3.2 0.2X +PassThrough(1.000) 18 20 1 3769.4 0.3 1.0X +RunLengthEncoding(1.005) 1016 1054 54 66.1 15.1 0.0X +DictionaryEncoding(0.251) 923 928 4 72.7 13.8 0.0X +LongDelta(0.125) 125 127 2 538.8 1.9 0.1X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1468 1468 0 45.7 21.9 1.0X -RunLengthEncoding 2020 2021 2 33.2 30.1 0.7X -DictionaryEncoding 1248 1248 0 53.8 18.6 1.2X -LongDelta 1131 1134 4 59.4 16.8 1.3X +PassThrough 842 846 5 79.7 12.5 1.0X +RunLengthEncoding 1222 1264 59 54.9 18.2 0.7X +DictionaryEncoding 757 776 20 88.7 11.3 1.1X +LongDelta 681 686 4 98.5 10.2 1.2X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz STRING Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 71 71 0 939.6 1.1 1.0X -RunLengthEncoding(0.890) 6050 6052 2 11.1 90.2 0.0X -DictionaryEncoding(0.167) 3723 3725 2 18.0 55.5 0.0X +PassThrough(1.000) 27 29 2 2510.4 0.4 1.0X +RunLengthEncoding(0.888) 1651 1663 18 40.7 24.6 0.0X +DictionaryEncoding(0.167) 1851 1863 17 36.3 27.6 0.0X -OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz STRING Decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 2804 2815 16 23.9 41.8 1.0X -RunLengthEncoding 3390 3391 1 19.8 50.5 0.8X -DictionaryEncoding 2901 2905 5 23.1 43.2 1.0X +PassThrough 1485 1495 15 45.2 22.1 1.0X +RunLengthEncoding 2010 2066 80 33.4 30.0 0.7X +DictionaryEncoding 1788 1790 4 37.5 26.6 0.8X diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt index 3f6fbe35a7b86..d4670070505aa 100644 --- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt +++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt @@ -2,136 +2,136 @@ Compression Scheme Benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BOOLEAN Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 3 3 0 21114.6 0.0 1.0X -RunLengthEncoding(2.505) 694 696 4 96.7 10.3 0.0X -BooleanBitSet(0.125) 366 366 0 183.4 5.5 0.0X +PassThrough(1.000) 1 2 0 49671.6 0.0 1.0X +RunLengthEncoding(2.501) 470 487 25 142.7 7.0 0.0X +BooleanBitSet(0.125) 358 362 4 187.6 5.3 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BOOLEAN Decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 145 145 0 464.2 2.2 1.0X -RunLengthEncoding 735 735 0 91.3 10.9 0.2X -BooleanBitSet 1437 1437 1 46.7 21.4 0.1X +PassThrough 90 95 5 746.2 1.3 1.0X +RunLengthEncoding 550 559 8 122.0 8.2 0.2X +BooleanBitSet 1082 1087 7 62.0 16.1 0.1X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 7 7 0 9336.6 0.1 1.0X -RunLengthEncoding(1.494) 1912 1917 7 35.1 28.5 0.0X +PassThrough(1.000) 3 4 0 20595.0 0.0 1.0X +RunLengthEncoding(1.495) 1074 1087 19 62.5 16.0 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1104 1104 0 60.8 16.4 1.0X -RunLengthEncoding 1627 1628 0 41.2 24.3 0.7X +PassThrough 807 844 33 83.1 12.0 1.0X +RunLengthEncoding 1077 1078 1 62.3 16.0 0.7X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 7 7 0 9710.6 0.1 1.0X -RunLengthEncoding(2.003) 2021 2027 9 33.2 30.1 0.0X +PassThrough(1.000) 3 3 0 23144.6 0.0 1.0X +RunLengthEncoding(2.001) 1067 1073 8 62.9 15.9 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz SHORT Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1104 1104 0 60.8 16.5 1.0X -RunLengthEncoding 1621 1621 0 41.4 24.1 0.7X +PassThrough 793 811 16 84.7 11.8 1.0X +RunLengthEncoding 1099 1123 33 61.1 16.4 0.7X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 24 24 0 2854.3 0.4 1.0X -RunLengthEncoding(1.005) 2395 2396 2 28.0 35.7 0.0X -DictionaryEncoding(0.500) 1366 1366 0 49.1 20.3 0.0X -IntDelta(0.250) 286 287 0 234.2 4.3 0.1X +PassThrough(1.000) 10 11 1 6979.9 0.1 1.0X +RunLengthEncoding(1.000) 985 994 9 68.1 14.7 0.0X +DictionaryEncoding(0.500) 896 903 10 74.9 13.4 0.0X +IntDelta(0.250) 237 244 6 283.5 3.5 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1248 1248 0 53.8 18.6 1.0X -RunLengthEncoding 1738 1739 2 38.6 25.9 0.7X -DictionaryEncoding 969 970 0 69.2 14.4 1.3X -IntDelta 777 779 1 86.3 11.6 1.6X +PassThrough 791 795 3 84.8 11.8 1.0X +RunLengthEncoding 1111 1114 5 60.4 16.6 0.7X +DictionaryEncoding 641 650 17 104.7 9.6 1.2X +IntDelta 560 575 24 119.8 8.4 1.4X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 23 23 0 2874.4 0.3 1.0X -RunLengthEncoding(1.334) 2581 2581 0 26.0 38.5 0.0X -DictionaryEncoding(0.501) 1490 1490 0 45.0 22.2 0.0X -IntDelta(0.250) 286 286 0 234.5 4.3 0.1X +PassThrough(1.000) 9 10 1 7181.9 0.1 1.0X +RunLengthEncoding(1.336) 1006 1006 1 66.7 15.0 0.0X +DictionaryEncoding(0.501) 1034 1045 15 64.9 15.4 0.0X +IntDelta(0.250) 235 238 2 285.7 3.5 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz INT Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1389 1389 0 48.3 20.7 1.0X -RunLengthEncoding 1903 1903 0 35.3 28.4 0.7X -DictionaryEncoding 1231 1232 1 54.5 18.3 1.1X -IntDelta 1103 1108 7 60.8 16.4 1.3X +PassThrough 829 832 3 81.0 12.3 1.0X +RunLengthEncoding 1199 1207 11 56.0 17.9 0.7X +DictionaryEncoding 725 726 1 92.6 10.8 1.1X +IntDelta 680 683 5 98.6 10.1 1.2X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Encode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 48 48 0 1405.2 0.7 1.0X -RunLengthEncoding(0.757) 2525 2525 1 26.6 37.6 0.0X -DictionaryEncoding(0.250) 1380 1381 1 48.6 20.6 0.0X -LongDelta(0.125) 474 474 0 141.7 7.1 0.1X +PassThrough(1.000) 20 22 1 3405.6 0.3 1.0X +RunLengthEncoding(0.747) 1097 1102 7 61.2 16.3 0.0X +DictionaryEncoding(0.250) 854 933 74 78.6 12.7 0.0X +LongDelta(0.125) 322 328 11 208.5 4.8 0.1X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Decode (Lower Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1348 1349 0 49.8 20.1 1.0X -RunLengthEncoding 1850 1851 2 36.3 27.6 0.7X -DictionaryEncoding 1190 1192 3 56.4 17.7 1.1X -LongDelta 801 801 0 83.8 11.9 1.7X +PassThrough 839 843 4 80.0 12.5 1.0X +RunLengthEncoding 1234 1234 1 54.4 18.4 0.7X +DictionaryEncoding 806 809 3 83.3 12.0 1.0X +LongDelta 550 558 6 122.0 8.2 1.5X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Encode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 46 46 0 1451.2 0.7 1.0X -RunLengthEncoding(1.003) 2742 2743 1 24.5 40.9 0.0X -DictionaryEncoding(0.251) 1714 1715 0 39.1 25.5 0.0X -LongDelta(0.125) 476 476 0 140.9 7.1 0.1X +PassThrough(1.000) 20 22 1 3319.5 0.3 1.0X +RunLengthEncoding(1.005) 1153 1169 24 58.2 17.2 0.0X +DictionaryEncoding(0.251) 923 930 9 72.7 13.7 0.0X +LongDelta(0.125) 327 332 4 205.0 4.9 0.1X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz LONG Decode (Higher Skew): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 1362 1363 1 49.3 20.3 1.0X -RunLengthEncoding 1862 1863 1 36.0 27.7 0.7X -DictionaryEncoding 1190 1192 3 56.4 17.7 1.1X -LongDelta 1079 1082 4 62.2 16.1 1.3X +PassThrough 854 864 16 78.6 12.7 1.0X +RunLengthEncoding 1242 1244 3 54.0 18.5 0.7X +DictionaryEncoding 823 823 1 81.6 12.3 1.0X +LongDelta 640 651 8 104.8 9.5 1.3X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz STRING Encode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough(1.000) 67 67 0 994.8 1.0 1.0X -RunLengthEncoding(0.888) 6135 6137 2 10.9 91.4 0.0X -DictionaryEncoding(0.167) 3747 3748 0 17.9 55.8 0.0X +PassThrough(1.000) 29 32 1 2279.8 0.4 1.0X +RunLengthEncoding(0.886) 1723 1734 15 38.9 25.7 0.0X +DictionaryEncoding(0.167) 2667 2690 33 25.2 39.7 0.0X -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5 +Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz STRING Decode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -PassThrough 3180 3185 8 21.1 47.4 1.0X -RunLengthEncoding 3658 3660 3 18.3 54.5 0.9X -DictionaryEncoding 3292 3295 4 20.4 49.1 1.0X +PassThrough 1847 1892 64 36.3 27.5 1.0X +RunLengthEncoding 2305 2332 38 29.1 34.3 0.8X +DictionaryEncoding 2134 2150 22 31.5 31.8 0.9X diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala index 3cc59af9b7ce3..cb7efd3f7716b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala @@ -23,7 +23,6 @@ import java.nio.ByteOrder import scala.collection.mutable import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow import org.apache.spark.sql.execution.columnar._ import org.apache.spark.sql.execution.vectorized.WritableColumnVector import org.apache.spark.sql.types._ @@ -182,8 +181,7 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme { private var _uncompressedSize = 0 private var _compressedSize = 0 - // Using `MutableRow` to store the last value to avoid boxing/unboxing cost. - private val lastValue = new SpecificInternalRow(Seq(columnType.dataType)) + private var lastValue: T#InternalType = _ private var lastRun = 0 override def uncompressedSize: Int = _uncompressedSize @@ -195,16 +193,16 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme { val actualSize = columnType.actualSize(row, ordinal) _uncompressedSize += actualSize - if (lastValue.isNullAt(0)) { - columnType.copyField(row, ordinal, lastValue, 0) + if (lastValue == null) { + lastValue = columnType.clone(value) lastRun = 1 _compressedSize += actualSize + 4 } else { - if (columnType.getField(lastValue, 0) == value) { + if (lastValue == value) { lastRun += 1 } else { _compressedSize += actualSize + 4 - columnType.copyField(row, ordinal, lastValue, 0) + lastValue = columnType.clone(value) lastRun = 1 } } @@ -214,30 +212,27 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme { to.putInt(RunLengthEncoding.typeId) if (from.hasRemaining) { - val currentValue = new SpecificInternalRow(Seq(columnType.dataType)) var currentRun = 1 - val value = new SpecificInternalRow(Seq(columnType.dataType)) - - columnType.extract(from, currentValue, 0) + var currentValue = columnType.extract(from) while (from.hasRemaining) { - columnType.extract(from, value, 0) + val value = columnType.extract(from) - if (value.get(0, columnType.dataType) == currentValue.get(0, columnType.dataType)) { + if (value == currentValue) { currentRun += 1 } else { // Writes current run - columnType.append(currentValue, 0, to) + columnType.append(currentValue, to) to.putInt(currentRun) // Resets current run - columnType.copyField(value, 0, currentValue, 0) + currentValue = value currentRun = 1 } } // Writes the last run - columnType.append(currentValue, 0, to) + columnType.append(currentValue, to) to.putInt(currentRun) } From fbb0f37685877499baceb5b7141c1a8e162f6735 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 14 Sep 2020 10:00:15 +0900 Subject: [PATCH 0016/1009] [SPARK-32869][BUILD] Ignore deprecation warnings for build with Scala 2.13 and sbt ### What changes were proposed in this pull request? This PR changes SparkBuild.scala to ignore deprecation warnings for build with Scala 2.13 and sbt. Actually, deprecation warnings are already ignored for Scala 2.12 but string matching logic for deprecation warnings should be changed for Scala 2.13. Currently, if a warning message contains `is deprecated`, it's ignored but some warnings contain "are deprecated` and `will be deprecated`. ``` [error] [warn] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala:656: multiarg infix syntax looks\ like a tuple and will be deprecated [error] [warn] if (opt.clOption != null) { childArgs += (opt.clOption, opt.value) } ``` ``` [error] [warn] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are de\ precated; use an implicit parameter instead. [error] example: instead of `def f[A <% Int](a: A)` use `def f[A](a: A)(implicit ev: A => Int)` [error] [warn] class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag]( ``` ### Why are the changes needed? Enable to build Spark with Scala 2.13 and sbt. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Build with the following command and confirmed deprecation warnings are not treated as fatal ( Build itself doesn't pass due to another problem). `build/sbt -Pscala-2.13 package` Closes #29741 from sarutak/scala-2.13-deprecated-warning. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- project/SparkBuild.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index c94ae4e510087..160b3b5e7edb3 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -296,7 +296,7 @@ object SparkBuild extends PomBuild { var failed = 0 analysis.infos.allInfos.foreach { case (k, i) => i.reportedProblems foreach { p => - val deprecation = p.message.contains("is deprecated") + val deprecation = p.message.contains("deprecated") if (!deprecation) { failed = failed + 1 From e558b8a0fd1b1a2d3d37a18835951a7d2b3ef19e Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 14 Sep 2020 11:57:29 +0900 Subject: [PATCH 0017/1009] [SPARK-31847][CORE][TESTS] DAGSchedulerSuite: Rewrite the test framework to support apply specified spark configurations ### What changes were proposed in this pull request? `DAGSchedulerSuite` exists some issue: `afterEach` and `init` are called when the `SparkConf` of the default `SparkContext` has no configuration that the test case must set. This causes the `SparkContext` initialized in `beforeEach` to be discarded without being used, resulting in waste. On the other hand, the flexibility to add configurations to `SparkConf` should be addressed by the test framework. Test suites inherits `LocalSparkContext` can be simplified. ### Why are the changes needed? Reduce overhead about init `SparkContext`. Rewrite the test framework to support apply specified spark configurations. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #29228 from beliefer/extend-test-frame-for-dag. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: HyukjinKwon --- .../apache/spark/TempLocalSparkContext.scala | 100 ++++++++++++++++++ .../spark/scheduler/DAGSchedulerSuite.scala | 55 ++++------ 2 files changed, 120 insertions(+), 35 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala diff --git a/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala b/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala new file mode 100644 index 0000000000000..6d5fcd1edfb03 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import _root_.io.netty.util.internal.logging.{InternalLoggerFactory, Slf4JLoggerFactory} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.BeforeAndAfterEach +import org.scalatest.Suite + +import org.apache.spark.internal.Logging +import org.apache.spark.resource.ResourceProfile + +/** + * Manages a local `sc` `SparkContext` variable, correctly stopping it after each test. + * + * Note: this class is a copy of [[LocalSparkContext]]. Why copy it? Reduce conflict. Because + * many test suites use [[LocalSparkContext]] and overwrite some variable or function (e.g. + * sc of LocalSparkContext), there occurs conflict when we refactor the `sc` as a new function. + * After migrating all test suites that use [[LocalSparkContext]] to use + * [[TempLocalSparkContext]], we will delete the original [[LocalSparkContext]] and rename + * [[TempLocalSparkContext]] to [[LocalSparkContext]]. + */ +trait TempLocalSparkContext extends BeforeAndAfterEach + with BeforeAndAfterAll with Logging { self: Suite => + + private var _conf: SparkConf = defaultSparkConf + + @transient private var _sc: SparkContext = _ + + def conf: SparkConf = _conf + + /** + * Currently, we are focusing on the reconstruction of LocalSparkContext, so this method + * was created temporarily. When the migration work is completed, this method will be + * renamed to `sc` and the variable `sc` will be deleted. + */ + def sc: SparkContext = { + if (_sc == null) { + _sc = new SparkContext(_conf) + } + _sc + } + + override def beforeAll(): Unit = { + super.beforeAll() + InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE) + } + + override def afterEach(): Unit = { + try { + resetSparkContext() + } finally { + super.afterEach() + } + } + + def resetSparkContext(): Unit = { + TempLocalSparkContext.stop(_sc) + ResourceProfile.clearDefaultProfile() + _sc = null + _conf = defaultSparkConf + } + + private def defaultSparkConf: SparkConf = new SparkConf() + .setMaster("local[2]").setAppName(s"${this.getClass.getSimpleName}") +} + +object TempLocalSparkContext { + def stop(sc: SparkContext): Unit = { + if (sc != null) { + sc.stop() + } + // To avoid RPC rebinding to the same port, since it doesn't unbind immediately on shutdown + System.clearProperty("spark.driver.port") + } + + /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */ + def withSpark[T](sc: SparkContext)(f: SparkContext => T): T = { + try { + f(sc) + } finally { + stop(sc) + } + } +} diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 436765808e22b..99be1faab8b85 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler import java.util.Properties import java.util.concurrent.{CountDownLatch, TimeUnit} -import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong, AtomicReference} +import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference} import scala.annotation.meta.param import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map} @@ -125,14 +125,14 @@ class MyRDD( class DAGSchedulerSuiteDummyException extends Exception -class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLimits { +class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with TimeLimits { import DAGSchedulerSuite._ // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x implicit val defaultSignaler: Signaler = ThreadSignaler - val conf = new SparkConf + private var firstInit: Boolean = _ /** Set of TaskSets the DAGScheduler has requested executed. */ val taskSets = scala.collection.mutable.Buffer[TaskSet]() @@ -297,11 +297,19 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi override def beforeEach(): Unit = { super.beforeEach() - init(new SparkConf()) + firstInit = true } - private def init(testConf: SparkConf): Unit = { - sc = new SparkContext("local[2]", "DAGSchedulerSuite", testConf) + override def sc: SparkContext = { + val sc = super.sc + if (firstInit) { + init(sc) + firstInit = false + } + sc + } + + private def init(sc: SparkContext): Unit = { sparkListener = new EventInfoRecordingListener failure = null sc.addSparkListener(sparkListener) @@ -310,10 +318,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi cancelledStages.clear() cacheLocations.clear() results.clear() - securityMgr = new SecurityManager(conf) - broadcastManager = new BroadcastManager(true, conf, securityMgr) - mapOutputTracker = spy(new MyMapOutputTrackerMaster(conf, broadcastManager)) - blockManagerMaster = spy(new MyBlockManagerMaster(conf)) + securityMgr = new SecurityManager(sc.getConf) + broadcastManager = new BroadcastManager(true, sc.getConf, securityMgr) + mapOutputTracker = spy(new MyMapOutputTrackerMaster(sc.getConf, broadcastManager)) + blockManagerMaster = spy(new MyBlockManagerMaster(sc.getConf)) scheduler = new DAGScheduler( sc, taskScheduler, @@ -353,6 +361,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi * DAGScheduler event loop. */ private def runEvent(event: DAGSchedulerEvent): Unit = { + // Ensure the initialization of various components + sc dagEventProcessLoopTester.post(event) } @@ -491,12 +501,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("All shuffle files on the storage endpoint should be cleaned up when it is lost") { - // reset the test context with the right shuffle service config - afterEach() - val conf = new SparkConf() conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true") conf.set("spark.files.fetchFailure.unRegisterOutputOnHost", "true") - init(conf) runEvent(ExecutorAdded("hostA-exec1", "hostA")) runEvent(ExecutorAdded("hostA-exec2", "hostA")) runEvent(ExecutorAdded("hostB-exec", "hostB")) @@ -565,11 +571,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("SPARK-32003: All shuffle files for executor should be cleaned up on fetch failure") { - // reset the test context with the right shuffle service config - afterEach() - val conf = new SparkConf() conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true") - init(conf) val shuffleMapRdd = new MyRDD(sc, 3, Nil) val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(3)) @@ -861,11 +863,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi "not lost" } test(s"shuffle files $maybeLost when $eventDescription") { - // reset the test context with the right shuffle service config - afterEach() - val conf = new SparkConf() conf.set(config.SHUFFLE_SERVICE_ENABLED.key, shuffleServiceOn.toString) - init(conf) assert(sc.env.blockManager.externalShuffleServiceEnabled == shuffleServiceOn) val shuffleMapRdd = new MyRDD(sc, 2, Nil) @@ -2888,11 +2886,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("SPARK-25341: abort stage while using old fetch protocol") { - // reset the test context with using old fetch protocol - afterEach() - val conf = new SparkConf() conf.set(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL.key, "true") - init(conf) // Construct the scenario of indeterminate stage fetch failed. constructIndeterminateStageFetchFailed() // The job should fail because Spark can't rollback the shuffle map stage while @@ -3220,10 +3214,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("test 2 resource profile with merge conflict config true") { - afterEach() - val conf = new SparkConf() conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true") - init(conf) val ereqs = new ExecutorResourceRequests().cores(4) val treqs = new TaskResourceRequests().cpus(1) @@ -3241,10 +3232,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("test multiple resource profiles created from merging use same rp") { - afterEach() - val conf = new SparkConf() conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true") - init(conf) val ereqs = new ExecutorResourceRequests().cores(4) val treqs = new TaskResourceRequests().cpus(1) @@ -3338,10 +3326,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi } test("test merge 3 resource profiles") { - afterEach() - val conf = new SparkConf() conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true") - init(conf) val ereqs = new ExecutorResourceRequests().cores(4) val treqs = new TaskResourceRequests().cpus(1) val rp1 = new ResourceProfile(ereqs.requests, treqs.requests) From 742fcff3501e46722eeaeb9d1ac20e569f8f1c2c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 14 Sep 2020 13:15:14 +0900 Subject: [PATCH 0018/1009] [SPARK-32839][WINDOWS] Make Spark scripts working with the spaces in paths on Windows ### What changes were proposed in this pull request? If you install Spark under the path that has whitespaces, it does not work on Windows, for example as below: ``` >>> SparkSession.builder.getOrCreate() Presence of build for multiple Scala versions detected (C:\...\assembly\target\scala-2.13 and C:\...\assembly\target\scala-2.12). Remove one of them or, set SPARK_SCALA_VERSION=2.13 in spark-env.cmd. Visit https://spark.apache.org/docs/latest/configuration.html#environment-variables for more details about setting environment variables in spark-env.cmd. Either clean one of them or, set SPARK_SCALA_VERSION in spark-env.cmd. ``` This PR fixes the whitespace handling to support any paths on Windows. ### Why are the changes needed? To support Spark working with whitespaces in paths on Windows. ### Does this PR introduce _any_ user-facing change? Yes, users will be able to install and run Spark under the paths with whitespaces. ### How was this patch tested? Manually tested. Closes #29706 from HyukjinKwon/window-space-path. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- bin/find-spark-home.cmd | 2 +- bin/load-spark-env.cmd | 6 +++--- bin/spark-class2.cmd | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) mode change 100644 => 100755 bin/spark-class2.cmd diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd index f795d146d49c7..3149d05039ba4 100644 --- a/bin/find-spark-home.cmd +++ b/bin/find-spark-home.cmd @@ -55,6 +55,6 @@ if "x%SPARK_HOME%"=="x" ( set SPARK_HOME=%~dp0.. ) else ( rem We are pip installed, use the Python script to resolve a reasonable SPARK_HOME - for /f "delims=" %%i in ('%PYTHON_RUNNER% %FIND_SPARK_HOME_PYTHON_SCRIPT%') do set SPARK_HOME=%%i + for /f "delims=" %%i in ('%PYTHON_RUNNER% "%FIND_SPARK_HOME_PYTHON_SCRIPT%"') do set SPARK_HOME=%%i ) ) diff --git a/bin/load-spark-env.cmd b/bin/load-spark-env.cmd index fe725a4e1a368..5692af529fb66 100644 --- a/bin/load-spark-env.cmd +++ b/bin/load-spark-env.cmd @@ -24,7 +24,7 @@ rem conf\ subdirectory. if not defined SPARK_ENV_LOADED ( set SPARK_ENV_LOADED=1 - if [%SPARK_CONF_DIR%] == [] ( + if not defined SPARK_CONF_DIR ( set SPARK_CONF_DIR=%~dp0..\conf ) @@ -36,8 +36,8 @@ rem Setting SPARK_SCALA_VERSION if not already set. set SCALA_VERSION_1=2.13 set SCALA_VERSION_2=2.12 -set ASSEMBLY_DIR1=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_1% -set ASSEMBLY_DIR2=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_2% +set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_1%" +set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_2%" set ENV_VARIABLE_DOC=https://spark.apache.org/docs/latest/configuration.html#environment-variables if not defined SPARK_SCALA_VERSION ( diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd old mode 100644 new mode 100755 index 34d04c9856d2c..68b271d1d05d9 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -30,12 +30,12 @@ if "x%1"=="x" ( rem Find Spark jars. if exist "%SPARK_HOME%\jars" ( - set SPARK_JARS_DIR="%SPARK_HOME%\jars" + set SPARK_JARS_DIR=%SPARK_HOME%\jars ) else ( - set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars" + set SPARK_JARS_DIR=%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars ) -if not exist "%SPARK_JARS_DIR%"\ ( +if not exist "%SPARK_JARS_DIR%" ( echo Failed to find Spark jars directory. echo You need to build Spark before running this program. exit /b 1 From b121f0d4596969ded3db9d5d7b0cb8adac8ac00c Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 14 Sep 2020 15:34:58 +0900 Subject: [PATCH 0019/1009] [SPARK-32873][BUILD] Fix code which causes error when build with sbt and Scala 2.13 ### What changes were proposed in this pull request? This PR fix code which causes error when build with sbt and Scala 2.13 like as follows. ``` [error] [warn] /home/kou/work/oss/spark-scala-2.13/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala:251: method with a single empty parameter list overrides method without any parameter list [error] [warn] override def hasNext(): Boolean = requestOffset < part.untilOffset [error] [warn] [error] [warn] /home/kou/work/oss/spark-scala-2.13/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala:294: method with a single empty parameter list overrides method without any parameter list [error] [warn] override def hasNext(): Boolean = okNext ``` More specifically, what this PR fixes are * Methods which has an empty parameter list and overrides an method which has no parameter list. ``` override def hasNext(): Boolean = okNext ``` * Methods which has no parameter list and overrides an method which has an empty parameter list. ``` override def next: (Int, Double) = { ``` * Infix operator expression that the operator wraps. ``` 3L * math.min(k, numFeatures) * math.min(k, numFeatures) 3L * math.min(k, numFeatures) * math.min(k, numFeatures) + + math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) * * math.min(k, numFeatures) + 4L * math.min(k, numFeatures)) ``` ### Why are the changes needed? For building Spark with sbt and Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? After this change and #29742 applied, compile passed with the following command. ``` build/sbt -Pscala-2.13 -Phive -Phive-thriftserver -Pyarn -Pkubernetes compile test:compile ``` Closes #29745 from sarutak/fix-code-for-sbt-and-spark-2.13. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .../org/apache/spark/streaming/kafka010/KafkaRDD.scala | 4 ++-- .../src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 2 +- .../src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 6 +++--- .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 +- .../org/apache/spark/sql/execution/command/commands.scala | 4 ++-- .../spark/sql/execution/datasources/v2/V2CommandExec.scala | 2 +- .../execution/datasources/v2/jdbc/JDBCTableCatalog.scala | 2 +- .../streaming/state/SymmetricHashJoinStateManager.scala | 4 ++-- .../spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala | 2 +- .../spark/streaming/receiver/ReceivedBlockHandler.scala | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala index bd2e7e11b7383..46164e9b63365 100644 --- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala +++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala @@ -248,7 +248,7 @@ private class KafkaRDDIterator[K, V]( } } - override def hasNext(): Boolean = requestOffset < part.untilOffset + override def hasNext: Boolean = requestOffset < part.untilOffset override def next(): ConsumerRecord[K, V] = { if (!hasNext) { @@ -291,7 +291,7 @@ private class CompactedKafkaRDDIterator[K, V]( private var okNext: Boolean = true - override def hasNext(): Boolean = okNext + override def hasNext: Boolean = okNext override def next(): ConsumerRecord[K, V] = { if (!hasNext) { diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 83973bcffef05..2c35ede8118c4 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -786,7 +786,7 @@ class SparseVector @Since("2.0.0") ( override def hasNext: Boolean = i < localSize - override def next: (Int, Double) = { + override def next(): (Int, Double) = { val v = if (i == k) { j += 1 k = if (j < localNumActives) localIndices(j) else -1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 356ed48e99387..c165d4810c934 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -124,9 +124,9 @@ private[feature] object PCAUtil { // 6e541be066d547a097f5089165cd7c38c3ca276d/math/src/main/scala/breeze/linalg/ // functions/svd.scala#L87 def memoryCost(k: Int, numFeatures: Int): Long = { - 3L * math.min(k, numFeatures) * math.min(k, numFeatures) - + math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) - * math.min(k, numFeatures) + 4L * math.min(k, numFeatures)) + 3L * math.min(k, numFeatures) * math.min(k, numFeatures) + + math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) * + math.min(k, numFeatures) + 4L * math.min(k, numFeatures)) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index c23088de85b8a..2fe415f14032f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -984,7 +984,7 @@ class SparseVector @Since("1.0.0") ( override def hasNext: Boolean = i < localSize - override def next: (Int, Double) = { + override def next(): (Int, Double) = { val v = if (i == k) { j += 1 k = if (j < localNumActives) localIndices(j) else -1 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 18fd2a5ac2330..70f20cd8b7c06 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -78,7 +78,7 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode { override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray - override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator + override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray @@ -119,7 +119,7 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan) override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray - override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator + override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala index 4be4a6b30edcd..7738f26dfd266 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala @@ -44,7 +44,7 @@ abstract class V2CommandExec extends SparkPlan { */ override def executeCollect(): Array[InternalRow] = result.toArray - override def executeToIterator: Iterator[InternalRow] = result.toIterator + override def executeToIterator(): Iterator[InternalRow] = result.toIterator override def executeTake(limit: Int): Array[InternalRow] = result.take(limit).toArray diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala index 0138014a8e21e..41f650d1f2ff5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala @@ -61,7 +61,7 @@ class JDBCTableCatalog extends TableCatalog with Logging { .getTables(null, schemaPattern, "%", Array("TABLE")); new Iterator[Identifier] { def hasNext = rs.next() - def next = Identifier.of(namespace, rs.getString("TABLE_NAME")) + def next() = Identifier.of(namespace, rs.getString("TABLE_NAME")) }.toArray } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala index 1a5b50dcc7901..2aa2a18b9eaf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala @@ -171,7 +171,7 @@ class SymmetricHashJoinStateManager( return null } - override def close: Unit = {} + override def close(): Unit = {} } } @@ -280,7 +280,7 @@ class SymmetricHashJoinStateManager( return reusedRet.withNew(currentKey, currentValue.value, currentValue.matched) } - override def close: Unit = {} + override def close(): Unit = {} } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala index f677c492d561f..6494e512713f8 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala @@ -96,7 +96,7 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag]( @transient private val hadoopConfig = sc.hadoopConfiguration private val broadcastedHadoopConf = new SerializableConfiguration(hadoopConfig) - override def isValid(): Boolean = true + override def isValid: Boolean = true override def getPartitions: Array[Partition] = { assertValid() diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala index 12ed8015117e5..7a561ecb4990f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala @@ -234,7 +234,7 @@ private[streaming] class CountingIterator[T](iterator: Iterator[T]) extends Iter private def isFullyConsumed: Boolean = !iterator.hasNext - def hasNext(): Boolean = iterator.hasNext + def hasNext: Boolean = iterator.hasNext def count(): Option[Long] = { if (isFullyConsumed) Some(_count) else None From 978f531010adfc08110897450d49cb569e4805ab Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Mon, 14 Sep 2020 08:49:51 +0000 Subject: [PATCH 0020/1009] [SPARK-32854][SS] Minor code and doc improvement for stream-stream join ### What changes were proposed in this pull request? Several minor code and documentation improvement for stream-stream join. Specifically: * Remove extending from `SparkPlan`, as extending from `BinaryExecNode` is enough. * Return `left/right.outputPartitioning` for `Left/RightOuter` in `outputPartitioning`, as the `PartitioningCollection` wrapper is unnecessary (similar to batch joins `ShuffledHashJoinExec`, `SortMergeJoinExec`). * Avoid per-row check for join type (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L486-L492), by creating the method before the loop of reading rows (`generateFilteredJoinedRow` in `storeAndJoinWithOtherSide`). Similar optimization (i.e. create auxiliary method/variable per different join type before the iterator of input rows) has been done in batch join world (`SortMergeJoinExec`, `ShuffledHashJoinExec`). * Minor fix for comment/indentation for better readability. ### Why are the changes needed? Minor optimization to avoid per-row unnecessary work (this probably can be optimized away by compiler, but we can do a better join to avoid it at the first place). And other comment/indentation fix to have better code readability for future developers. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests in `StreamingJoinSuite.scala` as no new logic is introduced. Closes #29724 from c21/streaming. Authored-by: Cheng Su Signed-off-by: Wenchen Fan --- .../StreamingSymmetricHashJoinExec.scala | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala index 3d071df493cec..a52f5f4ac94ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala @@ -56,8 +56,8 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration} * - Apply the optional condition to filter the joined rows as the final output. * * If a timestamp column with event time watermark is present in the join keys or in the input - * data, then the it uses the watermark figure out which rows in the buffer will not join with - * and the new data, and therefore can be discarded. Depending on the provided query conditions, we + * data, then it uses the watermark to figure out which rows in the buffer will not join with + * the new data, and therefore can be discarded. Depending on the provided query conditions, we * can define thresholds on both state key (i.e. joining keys) and state value (i.e. input rows). * There are three kinds of queries possible regarding this as explained below. * Assume that watermark has been defined on both `leftTime` and `rightTime` columns used below. @@ -134,7 +134,7 @@ case class StreamingSymmetricHashJoinExec( stateWatermarkPredicates: JoinStateWatermarkPredicates, stateFormatVersion: Int, left: SparkPlan, - right: SparkPlan) extends SparkPlan with BinaryExecNode with StateStoreWriter { + right: SparkPlan) extends BinaryExecNode with StateStoreWriter { def this( leftKeys: Seq[Expression], @@ -157,14 +157,16 @@ case class StreamingSymmetricHashJoinExec( " the checkpoint and rerun the query. See SPARK-26154 for more details.") } + private lazy val errorMessageForJoinType = + s"${getClass.getSimpleName} should not take $joinType as the JoinType" + private def throwBadJoinTypeException(): Nothing = { - throw new IllegalArgumentException( - s"${getClass.getSimpleName} should not take $joinType as the JoinType") + throw new IllegalArgumentException(errorMessageForJoinType) } require( joinType == Inner || joinType == LeftOuter || joinType == RightOuter, - s"${getClass.getSimpleName} should not take $joinType as the JoinType") + errorMessageForJoinType) require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType)) private val storeConf = new StateStoreConf(sqlContext.conf) @@ -189,11 +191,9 @@ case class StreamingSymmetricHashJoinExec( override def outputPartitioning: Partitioning = joinType match { case _: InnerLike => PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning)) - case LeftOuter => PartitioningCollection(Seq(left.outputPartitioning)) - case RightOuter => PartitioningCollection(Seq(right.outputPartitioning)) - case x => - throw new IllegalArgumentException( - s"${getClass.getSimpleName} should not take $x as the JoinType") + case LeftOuter => left.outputPartitioning + case RightOuter => right.outputPartitioning + case _ => throwBadJoinTypeException() } override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = { @@ -246,13 +246,14 @@ case class StreamingSymmetricHashJoinExec( // Join one side input using the other side's buffered/state rows. Here is how it is done. // - // - `leftJoiner.joinWith(rightJoiner)` generates all rows from matching new left input with - // stored right input, and also stores all the left input + // - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` generates all rows from + // matching new left input with stored right input, and also stores all the left input // - // - `rightJoiner.joinWith(leftJoiner)` generates all rows from matching new right input with - // stored left input, and also stores all the right input. It also generates all rows from - // matching new left input with new right input, since the new left input has become stored - // by that point. This tiny asymmetry is necessary to avoid duplication. + // - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` generates all rows from + // matching new right input with stored left input, and also stores all the right input. + // It also generates all rows from matching new left input with new right input, since + // the new left input has become stored by that point. This tiny asymmetry is necessary + // to avoid duplication. val leftOutputIter = leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner) { (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(input).withRight(matched) } @@ -459,8 +460,9 @@ case class StreamingSymmetricHashJoinExec( */ def storeAndJoinWithOtherSide( otherSideJoiner: OneSideHashJoiner)( - generateJoinedRow: (InternalRow, InternalRow) => JoinedRow): - Iterator[InternalRow] = { + generateJoinedRow: (InternalRow, InternalRow) => JoinedRow) + : Iterator[InternalRow] = { + val watermarkAttribute = inputAttributes.find(_.metadata.contains(delayKey)) val nonLateRows = WatermarkSupport.watermarkExpression(watermarkAttribute, eventTimeWatermark) match { @@ -471,6 +473,14 @@ case class StreamingSymmetricHashJoinExec( inputIter } + val generateFilteredJoinedRow: InternalRow => Iterator[InternalRow] = joinSide match { + case LeftSide if joinType == LeftOuter => + (row: InternalRow) => Iterator(generateJoinedRow(row, nullRight)) + case RightSide if joinType == RightOuter => + (row: InternalRow) => Iterator(generateJoinedRow(row, nullLeft)) + case _ => (_: InternalRow) => Iterator.empty + } + nonLateRows.flatMap { row => val thisRow = row.asInstanceOf[UnsafeRow] // If this row fails the pre join filter, that means it can never satisfy the full join @@ -483,13 +493,7 @@ case class StreamingSymmetricHashJoinExec( .getJoinedRows(key, thatRow => generateJoinedRow(thisRow, thatRow), postJoinFilter) new AddingProcessedRowToStateCompletionIterator(key, thisRow, outputIter) } else { - joinSide match { - case LeftSide if joinType == LeftOuter => - Iterator(generateJoinedRow(thisRow, nullRight)) - case RightSide if joinType == RightOuter => - Iterator(generateJoinedRow(thisRow, nullLeft)) - case _ => Iterator() - } + generateFilteredJoinedRow(thisRow) } } } From 5e825482d70e13a8cb16f1fbdac8139710482d17 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Mon, 14 Sep 2020 09:20:24 +0000 Subject: [PATCH 0021/1009] [SPARK-32844][SQL] Make `DataFrameReader.table` take the specified options for datasource v1 ### What changes were proposed in this pull request? Make `DataFrameReader.table` take the specified options for datasource v1. ### Why are the changes needed? Keep the same behavior of v1/v2 datasource, the v2 fix has been done in SPARK-32592. ### Does this PR introduce _any_ user-facing change? Yes. The DataFrameReader.table will take the specified options. Also, if there are the same key and value exists in specified options and table properties, an exception will be thrown. ### How was this patch tested? New UT added. Closes #29712 from xuanyuanking/SPARK-32844. Authored-by: Yuanjian Li Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../sql/catalyst/catalog/SessionCatalog.scala | 7 ++-- .../sql/catalyst/catalog/interface.scala | 5 ++- .../apache/spark/sql/internal/SQLConf.scala | 12 +++++++ .../datasources/DataSourceStrategy.scala | 21 +++++++----- .../datasources/DataSourceUtils.scala | 34 +++++++++++++++++++ .../sql/test/DataFrameReaderWriterSuite.scala | 22 +++++++++++- 7 files changed, 89 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4516c71bbc514..7d591eeea2b79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1025,7 +1025,7 @@ class Analyzer( case SessionCatalogAndIdentifier(catalog, ident) => lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table => - v1SessionCatalog.getRelation(v1Table.v1Table) + v1SessionCatalog.getRelation(v1Table.v1Table, options) case table => SubqueryAlias( catalog.name +: ident.asMultipartIdentifier, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 6fba3156c3919..e9a02c15f7362 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -43,6 +43,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.Utils object SessionCatalog { @@ -783,7 +784,9 @@ class SessionCatalog( } } - def getRelation(metadata: CatalogTable): LogicalPlan = { + def getRelation( + metadata: CatalogTable, + options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = { val name = metadata.identifier val db = formatDatabaseName(name.database.getOrElse(currentDb)) val table = formatTableName(name.table) @@ -801,7 +804,7 @@ class SessionCatalog( child = parser.parsePlan(viewText)) SubqueryAlias(multiParts, child) } else { - SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata)) + SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 4e63ee7428d72..be09e761272ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.CaseInsensitiveStringMap /** @@ -639,7 +640,9 @@ object CatalogTypes { * A placeholder for a table relation, which will be replaced by concrete relation like * `LogicalRelation` or `HiveTableRelation`, during analysis. */ -case class UnresolvedCatalogRelation(tableMeta: CatalogTable) extends LeafNode { +case class UnresolvedCatalogRelation( + tableMeta: CatalogTable, + options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()) extends LeafNode { assert(tableMeta.identifier.database.isDefined) override lazy val resolved: Boolean = false override def output: Seq[Attribute] = Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index dae715ae827e2..2f2b645360ed6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2732,6 +2732,18 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_EXTRA_OPTIONS_BEHAVIOR = + buildConf("spark.sql.legacy.extraOptionsBehavior.enabled") + .internal() + .doc("When true, the extra options will be ignored for DataFrameReader.table(). If set it " + + "to false, which is the default, Spark will check if the extra options have the same " + + "key, but the value is different with the table serde properties. If the check passes, " + + "the extra options will be merged with the serde properties as the scan options. " + + "Otherwise, an exception will be thrown.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val TRUNCATE_TRASH_ENABLED = buildConf("spark.sql.truncate.trash.enabled") .doc("This configuration decides when truncating table, whether data files will be moved " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 3ccff6d89babd..1f8cfee308033 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources import java.util.Locale +import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.hadoop.fs.Path @@ -42,6 +43,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.unsafe.types.UTF8String /** @@ -237,11 +239,12 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast * data source. */ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] { - private def readDataSourceTable(table: CatalogTable): LogicalPlan = { + private def readDataSourceTable( + table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = { val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table) val catalog = sparkSession.sessionState.catalog + val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table) catalog.getCachedPlan(qualifiedTableName, () => { - val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_)) val dataSource = DataSource( sparkSession, @@ -251,24 +254,24 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] partitionColumns = table.partitionColumnNames, bucketSpec = table.bucketSpec, className = table.provider.get, - options = table.storage.properties ++ pathOption, + options = dsOptions, catalogTable = Some(table)) LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table) }) } override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _) + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options), _, _, _, _) if DDLUtils.isDatasourceTable(tableMeta) => - i.copy(table = readDataSourceTable(tableMeta)) + i.copy(table = readDataSourceTable(tableMeta, options)) - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _) => + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _), _, _, _, _) => i.copy(table = DDLUtils.readHiveTable(tableMeta)) - case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) => - readDataSourceTable(tableMeta) + case UnresolvedCatalogRelation(tableMeta, options) if DDLUtils.isDatasourceTable(tableMeta) => + readDataSourceTable(tableMeta, options) - case UnresolvedCatalogRelation(tableMeta) => + case UnresolvedCatalogRelation(tableMeta, _) => DDLUtils.readHiveTable(tableMeta) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index abb74d8d09ec6..b4308a872bb39 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources import java.util.Locale +import scala.collection.JavaConverters._ + import org.apache.hadoop.fs.Path import org.json4s.NoTypeHints import org.json4s.jackson.Serialization @@ -26,11 +28,13 @@ import org.json4s.jackson.Serialization import org.apache.spark.SparkUpgradeException import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.util.RebaseDateTime import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.Utils @@ -190,4 +194,34 @@ object DataSourceUtils { case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianMicros case LegacyBehaviorPolicy.CORRECTED => identity[Long] } + + def generateDatasourceOptions( + extraOptions: CaseInsensitiveStringMap, table: CatalogTable): Map[String, String] = { + val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_)) + val options = table.storage.properties ++ pathOption + if (!SQLConf.get.getConf(SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR)) { + // Check the same key with different values + table.storage.properties.foreach { case (k, v) => + if (extraOptions.containsKey(k) && extraOptions.get(k) != v) { + throw new AnalysisException( + s"Fail to resolve data source for the table ${table.identifier} since the table " + + s"serde property has the duplicated key $k with extra options specified for this " + + "scan operation. To fix this, you can rollback to the legacy behavior of ignoring " + + "the extra options by setting the config " + + s"${SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR.key} to `false`, or address the " + + s"conflicts of the same config.") + } + } + // To keep the original key from table properties, here we filter all case insensitive + // duplicate keys out from extra options. + val lowerCasedDuplicatedKeys = + table.storage.properties.keySet.map(_.toLowerCase(Locale.ROOT)) + .intersect(extraOptions.keySet.asScala) + extraOptions.asCaseSensitiveMap().asScala.filterNot { + case (k, _) => lowerCasedDuplicatedKeys.contains(k.toLowerCase(Locale.ROOT)) + }.toMap ++ options + } else { + options + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index eaca63c74c875..4e61dba4955af 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression} import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.datasources.DataSourceUtils +import org.apache.spark.sql.execution.datasources.{DataSourceUtils, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.noop.NoopDataSource import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase import org.apache.spark.sql.internal.SQLConf @@ -1199,4 +1199,24 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with dfw.save("1") dfw.save("2") } + + test("SPARK-32844: DataFrameReader.table take the specified options for V1 relation") { + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") { + withTable("t") { + sql("CREATE TABLE t(i int, d double) USING parquet OPTIONS ('p1'='v1', 'p2'='v2')") + + val msg = intercept[AnalysisException] { + spark.read.option("P1", "v3").table("t").count() + }.getMessage + assert(msg.contains("duplicated key")) + + val df = spark.read.option("P2", "v2").option("p3", "v3").table("t") + val options = df.queryExecution.analyzed.collectFirst { + case r: LogicalRelation => r.relation.asInstanceOf[HadoopFsRelation].options + }.get + assert(options("p2") == "v2") + assert(options("p3") == "v3") + } + } + } } From 7a17158a4d7fd6d22f9550eceab42d8af308aeb4 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Mon, 14 Sep 2020 22:52:33 +0900 Subject: [PATCH 0022/1009] [SPARK-32868][SQL] Add more order irrelevant aggregates to EliminateSorts ### What changes were proposed in this pull request? Mark `BitAggregate` as order irrelevant in `EliminateSorts`. ### Why are the changes needed? Performance improvements in some queries ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Generalized an existing UT Closes #29740 from tanelk/SPARK-32868. Authored-by: tanel.kiis@gmail.com Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/dsl/package.scala | 6 +++++ .../sql/catalyst/optimizer/Optimizer.scala | 2 +- .../optimizer/EliminateSortsSuite.scala | 26 ++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 8b3243067a16c..b61c4b8d065f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -197,6 +197,12 @@ package object dsl { Max(e).toAggregateExpression(isDistinct = false, filter = filter) def maxDistinct(e: Expression, filter: Option[Expression] = None): Expression = Max(e).toAggregateExpression(isDistinct = true, filter = filter) + def bitAnd(e: Expression, filter: Option[Expression] = None): Expression = + BitAndAgg(e).toAggregateExpression(isDistinct = false, filter = filter) + def bitOr(e: Expression, filter: Option[Expression] = None): Expression = + BitOrAgg(e).toAggregateExpression(isDistinct = false, filter = filter) + def bitXor(e: Expression, filter: Option[Expression] = None): Expression = + BitXorAgg(e).toAggregateExpression(isDistinct = false, filter = filter) def upper(e: Expression): Expression = Upper(e) def lower(e: Expression): Expression = Lower(e) def coalesce(args: Expression*): Expression = Coalesce(args) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 9216ab1631e7b..b7791cd442694 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1044,7 +1044,7 @@ object EliminateSorts extends Rule[LogicalPlan] { private def isOrderIrrelevantAggs(aggs: Seq[NamedExpression]): Boolean = { def isOrderIrrelevantAggFunction(func: AggregateFunction): Boolean = func match { - case _: Min | _: Max | _: Count => true + case _: Min | _: Max | _: Count | _: BitAggregate => true // Arithmetic operations for floating-point values are order-sensitive // (they are not associative). case _: Sum | _: Average | _: CentralMomentAgg => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index e2b599a7c090c..265f0a9936759 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -197,13 +197,25 @@ class EliminateSortsSuite extends PlanTest { comparePlans(optimizedThrice, correctAnswerThrice) } - test("remove orderBy in groupBy clause with count aggs") { - val projectPlan = testRelation.select('a, 'b) - val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc) - val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(count(1)) - val optimized = Optimize.execute(groupByPlan.analyze) - val correctAnswer = projectPlan.groupBy('a)(count(1)).analyze - comparePlans(optimized, correctAnswer) + test("remove orderBy in groupBy clause with order irrelevant aggs") { + Seq( + (e : Expression) => min(e), + (e : Expression) => minDistinct(e), + (e : Expression) => max(e), + (e : Expression) => maxDistinct(e), + (e : Expression) => count(e), + (e : Expression) => countDistinct(e), + (e : Expression) => bitAnd(e), + (e : Expression) => bitOr(e), + (e : Expression) => bitXor(e) + ).foreach(agg => { + val projectPlan = testRelation.select('a, 'b) + val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc) + val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(agg('b)) + val optimized = Optimize.execute(groupByPlan.analyze) + val correctAnswer = projectPlan.groupBy('a)(agg('b)).analyze + comparePlans(optimized, correctAnswer) + }) } test("remove orderBy in groupBy clause with sum aggs") { From 0696f0467270969f40e9baa829533bdb55f4002a Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 14 Sep 2020 13:54:21 -0700 Subject: [PATCH 0023/1009] [SPARK-32876][SQL] Change default fallback versions to 3.0.1 and 2.4.7 in HiveExternalCatalogVersionsSuite ### What changes were proposed in this pull request? The Jenkins job fails to get the versions. This was fixed by adding temporary fallbacks at https://github.com/apache/spark/pull/28536. This still doesn't work without the temporary fallbacks. See https://github.com/apache/spark/pull/29694 This PR adds new fallbacks since 2.3 is EOL and Spark 3.0.1 and 2.4.7 are released. ### Why are the changes needed? To test correctly in Jenkins. ### Does this PR introduce _any_ user-facing change? No, dev-only ### How was this patch tested? Jenkins and GitHub Actions builds should test. Closes #29748 from HyukjinKwon/SPARK-32876. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index aa96fa035c4f0..cbfdb7fac88d8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -243,7 +243,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { .filter(_ < org.apache.spark.SPARK_VERSION) } catch { // do not throw exception during object initialization. - case NonFatal(_) => Seq("2.3.4", "2.4.5") // A temporary fallback to use a specific version + case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } } From 72550c3be7120fcf2844d6914e883f1bec30d93f Mon Sep 17 00:00:00 2001 From: Ankur Dave Date: Mon, 14 Sep 2020 13:58:15 -0700 Subject: [PATCH 0024/1009] [SPARK-32872][CORE] Prevent BytesToBytesMap at MAX_CAPACITY from exceeding growth threshold ### What changes were proposed in this pull request? When BytesToBytesMap is at `MAX_CAPACITY` and reaches its growth threshold, `numKeys >= growthThreshold` is true but `longArray.size() / 2 < MAX_CAPACITY` is false. This correctly prevents the map from growing, but `canGrowArray` incorrectly remains true. Therefore the map keeps accepting new keys and exceeds its growth threshold. If we attempt to spill the map in this state, the UnsafeKVExternalSorter will not be able to reuse the long array for sorting. By this point the task has typically consumed all available memory, so the allocation of the new pointer array is likely to fail. This PR fixes the issue by setting `canGrowArray` to false in this case. This prevents the map from accepting new elements when it cannot grow to accommodate them. ### Why are the changes needed? Without this change, hash aggregations will fail when the number of groups per task is greater than `MAX_CAPACITY / 2 = 2^28` (approximately 268 million), and when the grouping aggregation is the only memory-consuming operator in its stage. For example, the final aggregation in `SELECT COUNT(DISTINCT id) FROM tbl` fails when `tbl` contains 1 billion distinct values and when `spark.sql.shuffle.partitions=1`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Reproducing this issue requires building a very large BytesToBytesMap. Because this is infeasible to do in a unit test, this PR was tested manually by adding the following test to AbstractBytesToBytesMapSuite. Before this PR, the test fails in 8.5 minutes. With this PR, the test passes in 1.5 minutes. ```java public abstract class AbstractBytesToBytesMapSuite { // ... Test public void respectGrowthThresholdAtMaxCapacity() { TestMemoryManager memoryManager2 = new TestMemoryManager( new SparkConf() .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), true) .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 25600 * 1024 * 1024L) .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false) .set(package$.MODULE$.SHUFFLE_COMPRESS(), false)); TaskMemoryManager taskMemoryManager2 = new TaskMemoryManager(memoryManager2, 0); final long pageSizeBytes = 8000000 + 8; // 8 bytes for end-of-page marker final BytesToBytesMap map = new BytesToBytesMap(taskMemoryManager2, 1024, pageSizeBytes); try { // Insert keys into the map until it stops accepting new keys. for (long i = 0; i < BytesToBytesMap.MAX_CAPACITY; i++) { if (i % (1024 * 1024) == 0) System.out.println("Inserting element " + i); final long[] value = new long[]{i}; BytesToBytesMap.Location loc = map.lookup(value, Platform.LONG_ARRAY_OFFSET, 8); Assert.assertFalse(loc.isDefined()); boolean success = loc.append(value, Platform.LONG_ARRAY_OFFSET, 8, value, Platform.LONG_ARRAY_OFFSET, 8); if (!success) break; } // The map should grow to its max capacity. long capacity = map.getArray().size() / 2; Assert.assertTrue(capacity == BytesToBytesMap.MAX_CAPACITY); // The map should stop accepting new keys once it has reached its growth // threshold, which is half the max capacity. Assert.assertTrue(map.numKeys() == BytesToBytesMap.MAX_CAPACITY / 2); map.free(); } finally { map.free(); } } } ``` Closes #29744 from ankurdave/SPARK-32872. Authored-by: Ankur Dave Signed-off-by: Dongjoon Hyun --- .../spark/unsafe/map/BytesToBytesMap.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 8eea9db393aff..d7940fc08e1a5 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -808,12 +808,21 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff longArray.set(pos * 2 + 1, keyHashcode); isDefined = true; - // We use two array entries per key, so the array size is twice the capacity. - // We should compare the current capacity of the array, instead of its size. - if (numKeys >= growthThreshold && longArray.size() / 2 < MAX_CAPACITY) { - try { - growAndRehash(); - } catch (SparkOutOfMemoryError oom) { + // If the map has reached its growth threshold, try to grow it. + if (numKeys >= growthThreshold) { + // We use two array entries per key, so the array size is twice the capacity. + // We should compare the current capacity of the array, instead of its size. + if (longArray.size() / 2 < MAX_CAPACITY) { + try { + growAndRehash(); + } catch (SparkOutOfMemoryError oom) { + canGrowArray = false; + } + } else { + // The map is already at MAX_CAPACITY and cannot grow. Instead, we prevent it from + // accepting any more new elements to make sure we don't exceed the load factor. If we + // need to spill later, this allows UnsafeKVExternalSorter to reuse the array for + // sorting. canGrowArray = false; } } From d58a4a310aecb9fa1bee1be0f5cb02b3be078667 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 14 Sep 2020 16:03:19 -0700 Subject: [PATCH 0025/1009] [SPARK-32882][K8S] Remove python2 installation in K8s python image ### What changes were proposed in this pull request? This PR aims to remove python2 installation in K8s python image because spark 3.1 does not support python2. ### Why are the changes needed? This will save disk space. **BEFORE** ``` kubespark/spark-py ... 917MB ``` **AFTER** ``` kubespark/spark-py ... 823MB ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the Jenkins with the K8s IT. Closes #29751 from williamhyun/remove_py2. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- .../src/main/dockerfiles/spark/bindings/python/Dockerfile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile index 8dfc5f7ff60c5..2f082f559ca5c 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile @@ -24,15 +24,9 @@ WORKDIR / USER 0 RUN mkdir ${SPARK_HOME}/python -# TODO: Investigate running both pip and pip3 via virtualenvs RUN apt-get update && \ - apt install -y python python-pip && \ apt install -y python3 python3-pip && \ - # We remove ensurepip since it adds no functionality since pip is - # installed on the image and it just takes up 1.6MB on the image - rm -r /usr/lib/python*/ensurepip && \ - pip install --upgrade pip setuptools && \ - # You may install with python3 packages by using pip3.6 + pip3 install --upgrade pip setuptools && \ # Removed the .cache to save space rm -r /root/.cache && rm -rf /var/cache/apt/* From 4fac6d501a5d97530edb712ff3450890ac10e413 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 15 Sep 2020 09:27:47 +0900 Subject: [PATCH 0026/1009] [SPARK-32871][BUILD] Append toMap to Map#filterKeys if the result of filter is concatenated with another Map for Scala 2.13 ### What changes were proposed in this pull request? This PR appends `toMap` to `Map` instances with `filterKeys` if such maps is to be concatenated with another maps. ### Why are the changes needed? As of Scala 2.13, Map#filterKeys return a MapView, not the original Map type. This can cause compile error. ``` /sql/DataFrameReader.scala:279: type mismatch; [error] found : Iterable[(String, String)] [error] required: java.util.Map[String,String] [error] Error occurred in an application involving default arguments. [error] val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Compile passed with the following command. `build/mvn -Pscala-2.13 -Phive -Phive-thriftserver -Pyarn -Pkubernetes -DskipTests test-compile` Closes #29742 from sarutak/fix-filterKeys-issue. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 4 ++-- .../org/apache/spark/sql/streaming/DataStreamReader.scala | 4 ++-- .../org/apache/spark/sql/streaming/DataStreamWriter.scala | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index ab18a3119c09f..b0d06e862ca7b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -274,8 +274,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray)) } - val finalOptions = - sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++ + optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val (table, catalog, ident) = provider match { case _: SupportsCatalogOptions if userSpecifiedSchema.nonEmpty => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index bd1997bee53f7..6fc4dc5aed6e7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -321,8 +321,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { extraOptions + ("path" -> path.get) } - val finalOptions = - sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++ + optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) def getTable: Table = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index c22f917d3cf91..93a48946fbafc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -221,8 +221,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] => val sessionOptions = DataSourceV2Utils.extractSessionConfigs( source = provider, conf = sparkSession.sessionState.conf) - val finalOptions = - sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++ + optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val table = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema) import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 682f3b98ec2e8..dda6dec9c4ebc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -365,8 +365,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider] val sessionOptions = DataSourceV2Utils.extractSessionConfigs( source = provider, conf = df.sparkSession.sessionState.conf) - val finalOptions = - sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap + val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++ + optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) val table = DataSourceV2Utils.getTableFromProvider( provider, dsOptions, userSpecifiedSchema = None) From 7a9b066c66d29e946b4f384292021123beb6fe57 Mon Sep 17 00:00:00 2001 From: LantaoJin Date: Mon, 14 Sep 2020 18:24:52 -0700 Subject: [PATCH 0027/1009] [SPARK-32715][CORE] Fix memory leak when failed to store pieces of broadcast ### What changes were proposed in this pull request? In TorrentBroadcast.scala ```scala L133: if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) L137: TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec) L147: if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) ``` After the original value is saved successfully(TorrentBroadcast.scala: L133), but the following `blockifyObject()`(L137) or store piece(L147) steps are failed. There is no opportunity to release broadcast from memory. This patch is to remove all pieces of the broadcast when failed to blockify or failed to store some pieces of a broadcast. ### Why are the changes needed? We use Spark thrift-server as a long-running service. A bad query submitted a heavy BroadcastNestLoopJoin operation and made driver full GC. We killed the bad query but we found the driver's memory usage was still high and full GCs were still frequent. By investigating with GC dump and log, we found the broadcast may memory leak. > 2020-08-19T18:54:02.824-0700: [Full GC (Allocation Failure) 2020-08-19T18:54:02.824-0700: [Class Histogram (before full gc): 116G->112G(170G), 184.9121920 secs] [Eden: 32.0M(7616.0M)->0.0B(8704.0M) Survivors: 1088.0M->0.0B Heap: 116.4G(170.0G)->112.9G(170.0G)], [Metaspace: 177285K->177270K(182272K)] 1: 676531691 72035438432 [B 2: 676502528 32472121344 org.apache.spark.sql.catalyst.expressions.UnsafeRow 3: 99551 12018117568 [Ljava.lang.Object; 4: 26570 4349629040 [I 5: 6 3264536688 [Lorg.apache.spark.sql.catalyst.InternalRow; 6: 1708819 256299456 [C 7: 2338 179615208 [J 8: 1703669 54517408 java.lang.String 9: 103860 34896960 org.apache.spark.status.TaskDataWrapper 10: 177396 25545024 java.net.URI ... ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually test. This UT is hard to write and the patch is straightforward. Closes #29558 from LantaoJin/SPARK-32715. Authored-by: LantaoJin Signed-off-by: Dongjoon Hyun --- .../spark/broadcast/TorrentBroadcast.scala | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala index 77fbbc08c2103..1024d9b5060bc 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala @@ -133,22 +133,30 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long) if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) { throw new SparkException(s"Failed to store $broadcastId in BlockManager") } - val blocks = - TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec) - if (checksumEnabled) { - checksums = new Array[Int](blocks.length) - } - blocks.zipWithIndex.foreach { case (block, i) => + try { + val blocks = + TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec) if (checksumEnabled) { - checksums(i) = calcChecksum(block) + checksums = new Array[Int](blocks.length) } - val pieceId = BroadcastBlockId(id, "piece" + i) - val bytes = new ChunkedByteBuffer(block.duplicate()) - if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) { - throw new SparkException(s"Failed to store $pieceId of $broadcastId in local BlockManager") + blocks.zipWithIndex.foreach { case (block, i) => + if (checksumEnabled) { + checksums(i) = calcChecksum(block) + } + val pieceId = BroadcastBlockId(id, "piece" + i) + val bytes = new ChunkedByteBuffer(block.duplicate()) + if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) { + throw new SparkException(s"Failed to store $pieceId of $broadcastId " + + s"in local BlockManager") + } } + blocks.length + } catch { + case t: Throwable => + logError(s"Store broadcast $broadcastId fail, remove all pieces of the broadcast") + blockManager.removeBroadcast(id, tellMaster = true) + throw t } - blocks.length } /** Fetch torrent blocks from the driver and/or other executors. */ From 0811666ab104b41cf189233439f4158b18bc8282 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Mon, 14 Sep 2020 21:15:06 -0700 Subject: [PATCH 0028/1009] [SPARK-32878][CORE] Avoid scheduling TaskSetManager which has no pending tasks ### What changes were proposed in this pull request? This PR proposes to avoid scheduling the (non-zombie) TaskSetManager which has no pending tasks. ### Why are the changes needed? Currently, Spark always tries to schedule a (non-zombie) TaskSetManager even if it has no pending tasks. This causes notable problems for the barrier TaskSetManager: 1. `calculateAvailableSlots` can be called for multiple times for a launched barrier TaskSetManager; 2. user would see "Skip current round of resource offers for barrier stage" log message for a launched barrier TaskSetManager all the time until the barrier TaskSetManager finishes, which is quite confused. Besides, scheduling a TaskSetManager always involves many function invocations even if there're no pending tasks. Therefore, I think we can skip those un-schedulable TasksetManagers to avoid the potential overhead. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass existing tests. Closes #29750 from Ngone51/filter-out-unschedulable-stage. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/scheduler/Pool.scala | 4 +++- .../main/scala/org/apache/spark/scheduler/Schedulable.scala | 1 + .../scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +- .../scala/org/apache/spark/scheduler/TaskSetManager.scala | 3 +++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala index 2e2851eb9070b..7333b31524f2a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala @@ -59,6 +59,8 @@ private[spark] class Pool( } } + override def isSchedulable: Boolean = true + override def addSchedulable(schedulable: Schedulable): Unit = { require(schedulable != null) schedulableQueue.add(schedulable) @@ -105,7 +107,7 @@ private[spark] class Pool( val sortedSchedulableQueue = schedulableQueue.asScala.toSeq.sortWith(taskSetSchedulingAlgorithm.comparator) for (schedulable <- sortedSchedulableQueue) { - sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue + sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue.filter(_.isSchedulable) } sortedTaskSetQueue } diff --git a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala index 8cc239c81d11a..0626f8fb8150a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala @@ -39,6 +39,7 @@ private[spark] trait Schedulable { def stageId: Int def name: String + def isSchedulable: Boolean def addSchedulable(schedulable: Schedulable): Unit def removeSchedulable(schedulable: Schedulable): Unit def getSchedulableByName(name: String): Schedulable diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 107c517ca06bc..2fcf13d5268f8 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -535,7 +535,7 @@ private[spark] class TaskSchedulerImpl( val availableResources = shuffledOffers.map(_.resources).toArray val availableCpus = shuffledOffers.map(o => o.cores).toArray val resourceProfileIds = shuffledOffers.map(o => o.resourceProfileId).toArray - val sortedTaskSets = rootPool.getSortedTaskSetQueue.filterNot(_.isZombie) + val sortedTaskSets = rootPool.getSortedTaskSetQueue for (taskSet <- sortedTaskSets) { logDebug("parentName: %s, name: %s, runningTasks: %s".format( taskSet.parent.name, taskSet.name, taskSet.runningTasks)) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 673fe4fe27519..78fd412ef154c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -951,6 +951,9 @@ private[spark] class TaskSetManager( null } + override def isSchedulable: Boolean = !isZombie && + (pendingTasks.all.nonEmpty || pendingSpeculatableTasks.all.nonEmpty) + override def addSchedulable(schedulable: Schedulable): Unit = {} override def removeSchedulable(schedulable: Schedulable): Unit = {} From d8a0d8569243d29e7f091d545ee1e9eb780d3dc8 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 15 Sep 2020 14:38:01 +0900 Subject: [PATCH 0029/1009] [SPARK-32884][TESTS] Mark TPCDSQuery*Suite as ExtendedSQLTest ### What changes were proposed in this pull request? This PR aims to mark the following suite as `ExtendedSQLTest` to reduce GitHub Action test time. - TPCDSQuerySuite - TPCDSQueryANSISuite - TPCDSQueryWithStatsSuite ### Why are the changes needed? Currently, the longest GitHub Action task is `Build and test / Build modules: sql - other tests` with `1h 57m 10s` while `Build and test / Build modules: sql - slow tests` takes `42m 20s`. With this PR, we can move the workload from `other tests` to `slow tests` task and reduce the total waiting time about 7 ~ 8 minutes. ### Does this PR introduce _any_ user-facing change? No. This is a test-only change. ### How was this patch tested? Pass the GitHub Action with the reduced running time. Closes #29755 from dongjoon-hyun/SPARK-SLOWTEST. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .../src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index decd1d6d08d27..22e1b838f3f3f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -20,11 +20,13 @@ package org.apache.spark.sql import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.util.resourceToString import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.tags.ExtendedSQLTest /** * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized * and compiled without hitting the max iteration threshold. */ +@ExtendedSQLTest class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { tpcdsQueries.foreach { name => @@ -64,10 +66,12 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { } } +@ExtendedSQLTest class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite { override def injectStats: Boolean = true } +@ExtendedSQLTest class TPCDSQueryANSISuite extends TPCDSQuerySuite { override protected def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED, true) From c8baab1a1f2ac03951946ff899d1c51a69c2c8b3 Mon Sep 17 00:00:00 2001 From: herman Date: Tue, 15 Sep 2020 06:24:54 +0000 Subject: [PATCH 0030/1009] [SPARK-32879][SQL] Refactor SparkSession initial options ### What changes were proposed in this pull request? This PR refactors the way we propagate the options from the `SparkSession.Builder` to the` SessionState`. This currently done via a mutable map inside the SparkSession. These setting settings are then applied **after** the Session. This is a bit confusing when you expect something to be set when constructing the `SessionState`. This PR passes the options as a constructor parameter to the `SessionStateBuilder` and this will set the options when the configuration is created. ### Why are the changes needed? It makes it easier to reason about the configurations set in a SessionState than before. We recently had an incident where someone was using `SparkSessionExtensions` to create a planner rule that relied on a conf to be set. While this is in itself probably incorrect usage, it still illustrated this somewhat funky behavior. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. Closes #29752 from hvanhovell/SPARK-32879. Authored-by: herman Signed-off-by: Wenchen Fan --- project/MimaExcludes.scala | 5 ++- .../org/apache/spark/sql/SparkSession.scala | 42 +++++++++++-------- .../internal/BaseSessionStateBuilder.scala | 6 ++- .../spark/sql/internal/SessionState.scala | 7 ++-- .../spark/sql/test/TestSQLContext.scala | 9 ++-- .../sql/hive/HiveSessionStateBuilder.scala | 12 +++--- .../apache/spark/sql/hive/test/TestHive.scala | 9 ++-- 7 files changed, 55 insertions(+), 35 deletions(-) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 0be7b4c1003a7..d32d31daae8e7 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -71,7 +71,10 @@ object MimaExcludes { ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$BinaryClassificationSummary$$sparkSession"), ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$ClassificationSummary$_setter_$org$apache$spark$ml$classification$ClassificationSummary$$multiclassMetrics_="), ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$ClassificationSummary$$multiclassMetrics"), - ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol") + ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"), + + // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this") ) // Exclude rules for 3.0.x diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index e5d53f5fd4c65..5704414df2d0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -81,7 +81,8 @@ class SparkSession private( @transient val sparkContext: SparkContext, @transient private val existingSharedState: Option[SharedState], @transient private val parentSessionState: Option[SessionState], - @transient private[sql] val extensions: SparkSessionExtensions) + @transient private[sql] val extensions: SparkSessionExtensions, + @transient private val initialSessionOptions: Map[String, String]) extends Serializable with Closeable with Logging { self => // The call site where this SparkSession was constructed. @@ -97,7 +98,7 @@ class SparkSession private( this(sc, None, None, SparkSession.applyExtensions( sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty), - new SparkSessionExtensions)) + new SparkSessionExtensions), Map.empty) } sparkContext.assertNotStopped() @@ -133,12 +134,6 @@ class SparkSession private( existingSharedState.getOrElse(new SharedState(sparkContext, initialSessionOptions)) } - /** - * Initial options for session. This options are applied once when sessionState is created. - */ - @transient - private[sql] val initialSessionOptions = new scala.collection.mutable.HashMap[String, String] - /** * State isolated across sessions, including SQL configurations, temporary tables, registered * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]]. @@ -156,8 +151,8 @@ class SparkSession private( .getOrElse { val state = SparkSession.instantiateSessionState( SparkSession.sessionStateClassName(sparkContext.conf), - self) - initialSessionOptions.foreach { case (k, v) => state.conf.setConfString(k, v) } + self, + initialSessionOptions) state } } @@ -244,7 +239,12 @@ class SparkSession private( * @since 2.0.0 */ def newSession(): SparkSession = { - new SparkSession(sparkContext, Some(sharedState), parentSessionState = None, extensions) + new SparkSession( + sparkContext, + Some(sharedState), + parentSessionState = None, + extensions, + initialSessionOptions) } /** @@ -260,7 +260,12 @@ class SparkSession private( * implementation is Hive, this will initialize the metastore, which may take some time. */ private[sql] def cloneSession(): SparkSession = { - val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState), extensions) + val result = new SparkSession( + sparkContext, + Some(sharedState), + Some(sessionState), + extensions, + Map.empty) result.sessionState // force copy of SessionState result } @@ -939,8 +944,7 @@ object SparkSession extends Logging { sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty), extensions) - session = new SparkSession(sparkContext, None, None, extensions) - options.foreach { case (k, v) => session.initialSessionOptions.put(k, v) } + session = new SparkSession(sparkContext, None, None, extensions, options.toMap) setDefaultSession(session) setActiveSession(session) registerContextListener(sparkContext) @@ -1104,12 +1108,16 @@ object SparkSession extends Logging { */ private def instantiateSessionState( className: String, - sparkSession: SparkSession): SessionState = { + sparkSession: SparkSession, + options: Map[String, String]): SessionState = { try { - // invoke `new [Hive]SessionStateBuilder(SparkSession, Option[SessionState])` + // invoke new [Hive]SessionStateBuilder( + // SparkSession, + // Option[SessionState], + // Map[String, String]) val clazz = Utils.classForName(className) val ctor = clazz.getConstructors.head - ctor.newInstance(sparkSession, None).asInstanceOf[BaseSessionStateBuilder].build() + ctor.newInstance(sparkSession, None, options).asInstanceOf[BaseSessionStateBuilder].build() } catch { case NonFatal(e) => throw new IllegalArgumentException(s"Error while instantiating '$className':", e) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 83a7a557305e9..4ca1ac863addc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -57,7 +57,8 @@ import org.apache.spark.sql.util.ExecutionListenerManager @Unstable abstract class BaseSessionStateBuilder( val session: SparkSession, - val parentState: Option[SessionState] = None) { + val parentState: Option[SessionState], + val options: Map[String, String]) { type NewBuilder = (SparkSession, Option[SessionState]) => BaseSessionStateBuilder /** @@ -97,6 +98,9 @@ abstract class BaseSessionStateBuilder( }.getOrElse { val conf = new SQLConf mergeSparkConf(conf, session.sparkContext.conf) + options.foreach { + case (k, v) => conf.setConfString(k, v) + } conf } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index cd425b04ef311..0f9a89741c192 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -136,9 +136,10 @@ private[sql] object SessionState { @Unstable class SessionStateBuilder( session: SparkSession, - parentState: Option[SessionState] = None) - extends BaseSessionStateBuilder(session, parentState) { - override protected def newBuilder: NewBuilder = new SessionStateBuilder(_, _) + parentState: Option[SessionState], + options: Map[String, String]) + extends BaseSessionStateBuilder(session, parentState, options) { + override protected def newBuilder: NewBuilder = new SessionStateBuilder(_, _, Map.empty) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index 17603deacdcdd..ac06e1f41bfb3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -39,7 +39,7 @@ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) @transient override lazy val sessionState: SessionState = { - new TestSQLSessionStateBuilder(this, None).build() + new TestSQLSessionStateBuilder(this, None, Map.empty).build() } // Needed for Java tests @@ -66,8 +66,9 @@ private[sql] object TestSQLContext { private[sql] class TestSQLSessionStateBuilder( session: SparkSession, - state: Option[SessionState]) - extends SessionStateBuilder(session, state) with WithTestConf { + state: Option[SessionState], + options: Map[String, String]) + extends SessionStateBuilder(session, state, options) with WithTestConf { override def overrideConfs: Map[String, String] = TestSQLContext.overrideConfs - override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _) + override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _, Map.empty) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 78ec2b8e2047e..b9135733856a5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -21,10 +21,9 @@ import org.apache.spark.annotation.Unstable import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog} import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener -import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.execution.{SparkOptimizer, SparkPlanner} +import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.aggregate.ResolveEncodersInScalaAgg import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin import org.apache.spark.sql.execution.command.CommandCheck @@ -38,8 +37,11 @@ import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLo * Builder that produces a Hive-aware `SessionState`. */ @Unstable -class HiveSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None) - extends BaseSessionStateBuilder(session, parentState) { +class HiveSessionStateBuilder( + session: SparkSession, + parentState: Option[SessionState], + options: Map[String, String]) + extends BaseSessionStateBuilder(session, parentState, options) { private def externalCatalog: ExternalCatalogWithListener = session.sharedState.externalCatalog @@ -116,7 +118,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session } } - override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _) + override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _, Map.empty) } class HiveSessionResourceLoader( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index f98534eb2b543..497dda4e22213 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -224,7 +224,7 @@ private[hive] class TestHiveSparkSession( @transient override lazy val sessionState: SessionState = { - new TestHiveSessionStateBuilder(this, parentSessionState).build() + new TestHiveSessionStateBuilder(this, parentSessionState, Map.empty).build() } lazy val metadataHive: HiveClient = { @@ -650,8 +650,9 @@ private[hive] object TestHiveContext { private[sql] class TestHiveSessionStateBuilder( session: SparkSession, - state: Option[SessionState]) - extends HiveSessionStateBuilder(session, state) + state: Option[SessionState], + options: Map[String, String]) + extends HiveSessionStateBuilder(session, state, options) with WithTestConf { override def overrideConfs: Map[String, String] = TestHiveContext.overrideConfs @@ -660,7 +661,7 @@ private[sql] class TestHiveSessionStateBuilder( new TestHiveQueryExecution(session.asInstanceOf[TestHiveSparkSession], plan) } - override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _) + override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _, Map.empty) } private[hive] object HiveTestJars { From 99384d1e831b7fe82a3a80ade1da976971624ee7 Mon Sep 17 00:00:00 2001 From: Zhenhua Wang Date: Tue, 15 Sep 2020 06:46:17 +0000 Subject: [PATCH 0031/1009] [SPARK-32738][CORE] Should reduce the number of active threads if fatal error happens in `Inbox.process` ### What changes were proposed in this pull request? Processing for `ThreadSafeRpcEndpoint` is controlled by `numActiveThreads` in `Inbox`. Now if any fatal error happens during `Inbox.process`, `numActiveThreads` is not reduced. Then other threads can not process messages in that inbox, which causes the endpoint to "hang". For other type of endpoints, we also should keep `numActiveThreads` correct. This problem is more serious in previous Spark 2.x versions since the driver, executor and block manager endpoints are all thread safe endpoints. To fix this, we should reduce the number of active threads if fatal error happens in `Inbox.process`. ### Why are the changes needed? `numActiveThreads` is not correct when fatal error happens and will cause the described problem. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add a new test. Closes #29580 from wzhfy/deal_with_fatal_error. Authored-by: Zhenhua Wang Signed-off-by: Wenchen Fan --- .../org/apache/spark/rpc/netty/Inbox.scala | 20 +++++++++++++++++++ .../apache/spark/rpc/netty/InboxSuite.scala | 13 ++++++++++++ 2 files changed, 33 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala index 2ed03f7430c32..472401b23fe8e 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala @@ -200,6 +200,16 @@ private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint) * Calls action closure, and calls the endpoint's onError function in the case of exceptions. */ private def safelyCall(endpoint: RpcEndpoint)(action: => Unit): Unit = { + def dealWithFatalError(fatal: Throwable): Unit = { + inbox.synchronized { + assert(numActiveThreads > 0, "The number of active threads should be positive.") + // Should reduce the number of active threads before throw the error. + numActiveThreads -= 1 + } + logError(s"An error happened while processing message in the inbox for $endpointName", fatal) + throw fatal + } + try action catch { case NonFatal(e) => try endpoint.onError(e) catch { @@ -209,8 +219,18 @@ private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint) } else { logError("Ignoring error", ee) } + case fatal: Throwable => + dealWithFatalError(fatal) } + case fatal: Throwable => + dealWithFatalError(fatal) } } + // exposed only for testing + def getNumActiveThreads: Int = { + inbox.synchronized { + inbox.numActiveThreads + } + } } diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala index c74c728b3e3f3..8b1c602cd8e58 100644 --- a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala @@ -136,4 +136,17 @@ class InboxSuite extends SparkFunSuite { endpoint.verifySingleOnNetworkErrorMessage(cause, remoteAddress) } + + test("SPARK-32738: should reduce the number of active threads when fatal error happens") { + val endpoint = mock(classOf[TestRpcEndpoint]) + when(endpoint.receive).thenThrow(new OutOfMemoryError()) + + val dispatcher = mock(classOf[Dispatcher]) + val inbox = new Inbox("name", endpoint) + inbox.post(OneWayMessage(null, "hi")) + intercept[OutOfMemoryError] { + inbox.process(dispatcher) + } + assert(inbox.getNumActiveThreads == 0) + } } From 316242b768a232ea541e854633374aebcd2ed194 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 15 Sep 2020 13:07:03 +0000 Subject: [PATCH 0032/1009] [SPARK-32874][SQL][TEST] Enhance result set meta data check for execute statement operation with thrift server ### What changes were proposed in this pull request? This PR adds test cases for the result set metadata checking for Spark's `ExecuteStatementOperation` to make the JDBC API more future-proofing because any server-side change may affect the client compatibility. ### Why are the changes needed? add test to prevent potential silent behavior change for JDBC users. ### Does this PR introduce _any_ user-facing change? NO, test only ### How was this patch tested? add new test Closes #29746 from yaooqinn/SPARK-32874. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- ...arkThriftServerProtocolVersionsSuite.scala | 147 +++++++++++++++++- 1 file changed, 140 insertions(+), 7 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index fd45e7a48c0eb..69486eeb031b1 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -148,6 +148,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as byte)") { rs => assert(rs.next()) assert(rs.getByte(1) === 1.toByte) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(1 AS TINYINT)") + assert(metaData.getColumnTypeName(1) === "tinyint") + assert(metaData.getColumnType(1) === java.sql.Types.TINYINT) + assert(metaData.getPrecision(1) === 3) + assert(metaData.getScale(1) === 0) } } @@ -155,6 +161,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as short)") { rs => assert(rs.next()) assert(rs.getShort(1) === 1.toShort) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(1 AS SMALLINT)") + assert(metaData.getColumnTypeName(1) === "smallint") + assert(metaData.getColumnType(1) === java.sql.Types.SMALLINT) + assert(metaData.getPrecision(1) === 5) + assert(metaData.getScale(1) === 0) } } @@ -162,6 +174,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT 1") { rs => assert(rs.next()) assert(rs.getInt(1) === 1) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "1") + assert(metaData.getColumnTypeName(1) === "int") + assert(metaData.getColumnType(1) === java.sql.Types.INTEGER) + assert(metaData.getPrecision(1) === 10) + assert(metaData.getScale(1) === 0) } } @@ -169,6 +187,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as bigint)") { rs => assert(rs.next()) assert(rs.getLong(1) === 1L) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(1 AS BIGINT)") + assert(metaData.getColumnTypeName(1) === "bigint") + assert(metaData.getColumnType(1) === java.sql.Types.BIGINT) + assert(metaData.getPrecision(1) === 19) + assert(metaData.getScale(1) === 0) } } @@ -176,6 +200,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1.2 as float)") { rs => assert(rs.next()) assert(rs.getFloat(1) === 1.2F) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(1.2 AS FLOAT)") + assert(metaData.getColumnTypeName(1) === "float") + assert(metaData.getColumnType(1) === java.sql.Types.FLOAT) + assert(metaData.getPrecision(1) === 7) + assert(metaData.getScale(1) === 7) } } @@ -183,14 +213,30 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1.2 as double)") { rs => assert(rs.next()) assert(rs.getDouble(1) === 1.2D) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(1.2 AS DOUBLE)") + assert(metaData.getColumnTypeName(1) === "double") + assert(metaData.getColumnType(1) === java.sql.Types.DOUBLE) + assert(metaData.getPrecision(1) === 15) + assert(metaData.getScale(1) === 15) } } test(s"$version get decimal type") { testExecuteStatementWithProtocolVersion(version, - "SELECT cast(1 as decimal(18, 2)) as c") { rs => + "SELECT cast(1 as decimal(9, 1)) as col0, 1234.56BD as col1, 0.123 as col2") { rs => assert(rs.next()) - assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.00")) + assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.0")) + assert(rs.getBigDecimal("col1") === new java.math.BigDecimal("1234.56")) + assert(rs.getBigDecimal("col2") === new java.math.BigDecimal("0.123")) + val metaData = rs.getMetaData + (1 to 3) foreach { i => + assert(metaData.getColumnName(i) === s"col${i - 1}") + assert(metaData.getColumnTypeName(i) === "decimal") + assert(metaData.getColumnType(i) === java.sql.Types.DECIMAL) + assert(metaData.getPrecision(i) == 12 - i * 3) + assert(metaData.getScale(i) == i) + } } testExecuteStatementWithProtocolVersion(version, "SELECT cast(null as decimal) ") { rs => @@ -203,6 +249,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT 'str'") { rs => assert(rs.next()) assert(rs.getString(1) === "str") + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) ==="str") + assert(metaData.getColumnTypeName(1) === "string") + assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR) + assert(metaData.getPrecision(1) === Int.MaxValue) + assert(metaData.getScale(1) === 0) } } @@ -211,6 +263,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { "SELECT cast('char-str' as char(10))") { rs => assert(rs.next()) assert(rs.getString(1) === "char-str") + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) ==="CAST(char-str AS STRING)") + assert(metaData.getColumnTypeName(1) === "string") + assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR) + assert(metaData.getPrecision(1) === Int.MaxValue) + assert(metaData.getScale(1) === 0) } } @@ -219,6 +277,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { "SELECT cast('varchar-str' as varchar(10))") { rs => assert(rs.next()) assert(rs.getString(1) === "varchar-str") + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) ==="CAST(varchar-str AS STRING)") + assert(metaData.getColumnTypeName(1) === "string") + assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR) + assert(metaData.getPrecision(1) === Int.MaxValue) + assert(metaData.getScale(1) === 0) } } @@ -226,6 +290,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast('ABC' as binary)") { rs => assert(rs.next()) assert(rs.getString(1) === "ABC") + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(ABC AS BINARY)") + assert(metaData.getColumnTypeName(1) === "binary") + assert(metaData.getColumnType(1) === java.sql.Types.BINARY) + assert(metaData.getPrecision(1) === Int.MaxValue) + assert(metaData.getScale(1) === 0) } testExecuteStatementWithProtocolVersion(version, "SELECT cast(49960 as binary)") { rs => assert(rs.next()) @@ -241,6 +311,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT true") { rs => assert(rs.next()) assert(rs.getBoolean(1) === true) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "true") + assert(metaData.getColumnTypeName(1) === "boolean") + assert(metaData.getColumnType(1) === java.sql.Types.BOOLEAN) + assert(metaData.getPrecision(1) === 1) + assert(metaData.getScale(1) === 0) } } @@ -248,6 +324,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT cast('2019-07-22' as date)") { rs => assert(rs.next()) assert(rs.getDate(1) === Date.valueOf("2019-07-22")) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(2019-07-22 AS DATE)") + assert(metaData.getColumnTypeName(1) === "date") + assert(metaData.getColumnType(1) === java.sql.Types.DATE) + assert(metaData.getPrecision(1) === 10) + assert(metaData.getScale(1) === 0) } } @@ -256,6 +338,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { "SELECT cast('2019-07-22 18:14:00' as timestamp)") { rs => assert(rs.next()) assert(rs.getTimestamp(1) === Timestamp.valueOf("2019-07-22 18:14:00")) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "CAST(2019-07-22 18:14:00 AS TIMESTAMP)") + assert(metaData.getColumnTypeName(1) === "timestamp") + assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP) + assert(metaData.getPrecision(1) === 29) + assert(metaData.getScale(1) === 9) } } @@ -263,6 +351,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT null") { rs => assert(rs.next()) assert(rs.getString(1) === null) + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "NULL") + assert(metaData.getColumnTypeName(1) === "void") + assert(metaData.getColumnType(1) === java.sql.Types.NULL) + assert(metaData.getPrecision(1) === 0) + assert(metaData.getScale(1) === 0) } } @@ -270,28 +364,67 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { testExecuteStatementWithProtocolVersion(version, "SELECT interval '1' year '2' day") { rs => assert(rs.next()) assert(rs.getString(1) === "1 years 2 days") + val metaData = rs.getMetaData + assert(metaData.getColumnName(1) === "INTERVAL '1 years 2 days'") + assert(metaData.getColumnTypeName(1) === "string") + assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR) + assert(metaData.getPrecision(1) === Int.MaxValue) + assert(metaData.getScale(1) === 0) } } test(s"$version get array type") { - testExecuteStatementWithProtocolVersion(version, "SELECT array(1, 2)") { rs => + testExecuteStatementWithProtocolVersion( + version, "SELECT array() AS col1, array(1, 2) AS col2") { rs => assert(rs.next()) - assert(rs.getString(1) === "[1,2]") + assert(rs.getString(2) === "[1,2]") + assert(rs.getObject("col1") === "[]") + assert(rs.getObject("col2") === "[1,2]") + val metaData = rs.getMetaData + (1 to 2) foreach { i => + assert(metaData.getColumnName(i) === s"col$i") + assert(metaData.getColumnTypeName(i) === "array") + assert(metaData.getColumnType(i) === java.sql.Types.ARRAY) + assert(metaData.getPrecision(i) === Int.MaxValue) + assert(metaData.getScale(i) == 0) + } } } test(s"$version get map type") { - testExecuteStatementWithProtocolVersion(version, "SELECT map(1, 2)") { rs => + testExecuteStatementWithProtocolVersion(version, + "SELECT map(), map(1, 2, 3, 4)") { rs => assert(rs.next()) - assert(rs.getString(1) === "{1:2}") + assert(rs.getObject(1) === "{}") + assert(rs.getObject(2) === "{1:2,3:4}") + assert(rs.getString(2) === "{1:2,3:4}") + val metaData = rs.getMetaData + (1 to 2) foreach { i => + assert(metaData.getColumnName(i).startsWith("map(")) + assert(metaData.getColumnTypeName(1) === "map") + assert(metaData.getColumnType(i) === java.sql.Types.JAVA_OBJECT) + assert(metaData.getPrecision(i) === Int.MaxValue) + assert(metaData.getScale(i) == 0) + } } } test(s"$version get struct type") { testExecuteStatementWithProtocolVersion(version, - "SELECT struct('alpha' AS A, 'beta' AS B)") { rs => + "SELECT struct('alpha' AS A, 'beta' AS B) as col0," + + " struct('1', '2') AS col1, named_struct('a', 2, 'b', 4) AS col2") { rs => assert(rs.next()) assert(rs.getString(1) === """{"A":"alpha","B":"beta"}""") + assert(rs.getObject("col1") === """{"col1":"1","col2":"2"}""") + assert(rs.getObject("col2") === """{"a":2,"b":4}""") + val metaData = rs.getMetaData + (1 to 3) foreach { i => + assert(metaData.getColumnName(i) === s"col${i - 1}") + assert(metaData.getColumnTypeName(1) === "struct") + assert(metaData.getColumnType(i) === java.sql.Types.STRUCT) + assert(metaData.getPrecision(i) === Int.MaxValue) + assert(metaData.getScale(i) == 0) + } } } From 6f36db1fa511940dd43d597b7fe337fc3d5c2558 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Tue, 15 Sep 2020 08:41:22 -0500 Subject: [PATCH 0033/1009] [SPARK-31448][PYTHON] Fix storage level used in persist() in dataframe.py ### What changes were proposed in this pull request? Since the data is serialized on the Python side, we should make cache() in PySpark dataframes use StorageLevel.MEMORY_AND_DISK mode which has deserialized=false. This change was done to `pyspark/rdd.py` as part of SPARK-2014 but was missed from `pyspark/dataframe.py` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Using existing tests Closes #29242 from abhishekd0907/SPARK-31448. Authored-by: Abhishek Dixit Signed-off-by: Sean Owen --- python/pyspark/sql/dataframe.py | 7 ++++--- python/pyspark/storagelevel.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b750b8a8d30a1..db2ddde00c881 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -678,13 +678,14 @@ def cache(self): return self @since(1.3) - def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK): + def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER): """Sets the storage level to persist the contents of the :class:`DataFrame` across operations after the first time it is computed. This can only be used to assign a new storage level if the :class:`DataFrame` does not have a storage level set yet. - If no storage level is specified defaults to (`MEMORY_AND_DISK`). + If no storage level is specified defaults to (`MEMORY_AND_DISK_DESER`) - .. note:: The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0. + .. note:: The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala + in 3.0. """ self.is_cached = True javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel) diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py index 9c0d1ca661244..ecf8e5c82ea56 100644 --- a/python/pyspark/storagelevel.py +++ b/python/pyspark/storagelevel.py @@ -57,3 +57,4 @@ def __str__(self): StorageLevel.MEMORY_AND_DISK = StorageLevel(True, True, False, False) StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, False, 2) StorageLevel.OFF_HEAP = StorageLevel(True, True, True, False, 1) +StorageLevel.MEMORY_AND_DISK_DESER = StorageLevel(True, True, False, True) From 888b343587c98ae0252311d72e20abbca8262ab3 Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 15 Sep 2020 14:11:30 +0000 Subject: [PATCH 0034/1009] [SPARK-32827][SQL] Add spark.sql.maxMetadataStringLength config ### What changes were proposed in this pull request? Add a new config `spark.sql.maxMetadataStringLength`. This config aims to limit metadata value length, e.g. file location. ### Why are the changes needed? Some metadata have been abbreviated by `...` when I tried to add some test in `SQLQueryTestSuite`. We need to replace such value to `notIncludedMsg`. That caused we can't replace that like location value by `className` since the `className` has been abbreviated. Here is a case: ``` CREATE table explain_temp1 (key int, val int) USING PARQUET; EXPLAIN EXTENDED SELECT sum(distinct val) FROM explain_temp1; -- ignore parsed,analyzed,optimized -- The output like == Physical Plan == *HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL]) +- Exchange SinglePartition, true, [id=#x] +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL]) +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x] +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) +- *ColumnarToRow +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/home/runner/work/spark/spark/sql/core/spark-warehouse/org.apache.spark.sq...], PartitionFilters: ... ``` ### Does this PR introduce _any_ user-facing change? No, a new config. ### How was this patch tested? new test. Closes #29688 from ulysses-you/SPARK-32827. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../apache/spark/sql/internal/SQLConf.scala | 10 ++++++++ .../sql/execution/DataSourceScanExec.scala | 2 +- .../execution/datasources/v2/FileScan.scala | 2 +- .../spark/sql/FileBasedDataSourceSuite.scala | 23 +++++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2f2b645360ed6..0d1a3e365c918 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2370,6 +2370,14 @@ object SQLConf { "(nonnegative and shorter than the maximum size).") .createWithDefaultString(s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}") + val MAX_METADATA_STRING_LENGTH = buildConf("spark.sql.maxMetadataStringLength") + .doc("Maximum number of characters to output for a metadata string. e.g. " + + "file location in `DataSourceScanExec`, every value will be abbreviated if exceed length.") + .version("3.1.0") + .intConf + .checkValue(_ > 3, "This value must be bigger than 3.") + .createWithDefault(100) + val SET_COMMAND_REJECTS_SPARK_CORE_CONFS = buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs") .internal() @@ -3344,6 +3352,8 @@ class SQLConf extends Serializable with Logging { def maxPlanStringLength: Int = getConf(SQLConf.MAX_PLAN_STRING_LENGTH).toInt + def maxMetadataStringLength: Int = getConf(SQLConf.MAX_METADATA_STRING_LENGTH) + def setCommandRejectsSparkCoreConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index ce5909a09442c..1b9ca63ea21d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -55,7 +55,7 @@ trait DataSourceScanExec extends LeafExecNode { // Metadata that describes more details of this scan. protected def metadata: Map[String, String] - protected val maxMetadataValueLength = 100 + protected val maxMetadataValueLength = sqlContext.sessionState.conf.maxMetadataStringLength override def simpleString(maxFields: Int): String = { val metadataEntries = metadata.toSeq.sorted.map { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala index f090d7861b629..363dd154b5fbb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala @@ -95,7 +95,7 @@ trait FileScan extends Scan override def hashCode(): Int = getClass.hashCode() - val maxMetadataValueLength = 100 + val maxMetadataValueLength = sparkSession.sessionState.conf.maxMetadataStringLength override def description(): String = { val metadataStr = getMetaData().toSeq.sorted.map { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 48b2e22457e3c..8d6d93d13d143 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.Filter +import org.apache.spark.sql.execution.SimpleMode import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.FilePartition import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan} @@ -969,6 +970,28 @@ class FileBasedDataSourceSuite extends QueryTest } } } + + test("SPARK-32827: Set max metadata string length") { + withTempDir { dir => + val tableName = "t" + val path = s"${dir.getCanonicalPath}/$tableName" + withTable(tableName) { + sql(s"CREATE TABLE $tableName(c INT) USING PARQUET LOCATION '$path'") + withSQLConf(SQLConf.MAX_METADATA_STRING_LENGTH.key -> "5") { + val explain = spark.table(tableName).queryExecution.explainString(SimpleMode) + assert(!explain.contains(path)) + // metadata has abbreviated by ... + assert(explain.contains("...")) + } + + withSQLConf(SQLConf.MAX_METADATA_STRING_LENGTH.key -> "1000") { + val explain = spark.table(tableName).queryExecution.explainString(SimpleMode) + assert(explain.contains(path)) + assert(!explain.contains("...")) + } + } + } + } } object TestingUDT { From 108c4c8fdc6c839bf5f43af7a55594aa024d2eb6 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Sep 2020 08:11:43 +0900 Subject: [PATCH 0035/1009] [SPARK-32481][SQL][TESTS][FOLLOW-UP] Skip the test if trash directory cannot be created ### What changes were proposed in this pull request? This PR skips the test if trash directory cannot be created. It is possible that the trash directory cannot be created, for example, by permission. And the test fails below: ``` - SPARK-32481 Move data to trash on truncate table if enabled *** FAILED *** (154 milliseconds) fs.exists(trashPath) was false (DDLSuite.scala:3184) org.scalatest.exceptions.TestFailedException: at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530) at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529) at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560) at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503) ``` ### Why are the changes needed? To make the tests pass independently. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually tested. Closes #29759 from HyukjinKwon/SPARK-32481. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index b8ac5079b7745..adc87cd307191 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -3118,6 +3118,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { val fs = tablePath.getFileSystem(hadoopConf) val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current") val trashPath = Path.mergePaths(trashCurrent, tablePath) + assume( + fs.mkdirs(trashPath) && fs.delete(trashPath, false), + "Trash directory could not be created, skipping.") assert(!fs.exists(trashPath)) try { hadoopConf.set(trashIntervalKey, "5") From b46c7302db73ee3671035ccfd8f51297b4d5e10e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Sep 2020 12:06:57 +0900 Subject: [PATCH 0036/1009] [SPARK-32704][SQL][TESTS][FOLLOW-UP] Check any physical rule instead of a specific rule in the test ### What changes were proposed in this pull request? This PR only checks if there's any physical rule runs instead of a specific rule. This is rather just a trivial fix to make the tests more robust. In fact, I faced a test failure from a in-house fork that applies a different physical rule that makes `CollapseCodegenStages` ineffective. ### Why are the changes needed? To make the test more robust by unrelated changes. ### Does this PR introduce _any_ user-facing change? No, test-only ### How was this patch tested? Manually tested. Jenkins tests should pass. Closes #29766 from HyukjinKwon/SPARK-32704. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/execution/QueryExecutionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala index 83c80b4f3eb08..585ce4e40471d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala @@ -219,7 +219,7 @@ class QueryExecutionSuite extends SharedSparkSession { spark.range(1).groupBy("id").count().queryExecution.executedPlan } } - Seq("=== Applying Rule org.apache.spark.sql.execution.CollapseCodegenStages ===", + Seq("=== Applying Rule org.apache.spark.sql.execution", "=== Result of Batch Preparations ===").foreach { expectedMsg => assert(testAppender.loggingEvents.exists(_.getRenderedMessage.contains(expectedMsg))) } From 6051755bfe23a0e4564bf19476ec34cd7fd6008d Mon Sep 17 00:00:00 2001 From: Tanel Kiis Date: Wed, 16 Sep 2020 12:13:15 +0900 Subject: [PATCH 0037/1009] [SPARK-32688][SQL][TEST] Add special values to LiteralGenerator for float and double ### What changes were proposed in this pull request? The `LiteralGenerator` for float and double datatypes was supposed to yield special values (NaN, +-inf) among others, but the `Gen.chooseNum` method does not yield values that are outside the defined range. The `Gen.chooseNum` for a wide range of floats and doubles does not yield values in the "everyday" range as stated in https://github.com/typelevel/scalacheck/issues/113 . There is an similar class `RandomDataGenerator` that is used in some other tests. Added `-0.0` and `-0.0f` as special values to there too. These changes revealed an inconsistency with the equality check between `-0.0` and `0.0`. ### Why are the changes needed? The `LiteralGenerator` is mostly used in the `checkConsistencyBetweenInterpretedAndCodegen` method in `MathExpressionsSuite`. This change would have caught the bug fixed in #29495 . ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Locally reverted #29495 and verified that the existing test cases caught the bug. Closes #29515 from tanelk/SPARK-32688. Authored-by: Tanel Kiis Signed-off-by: Takeshi Yamamuro --- .../spark/sql/RandomDataGenerator.scala | 4 ++-- .../expressions/LiteralGenerator.scala | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 6bd7a27ac11f1..9fa27c7df3832 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -260,10 +260,10 @@ object RandomDataGenerator { new MathContext(precision)).bigDecimal) case DoubleType => randomNumeric[Double]( rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue, - Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0)) + Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0, -0.0)) case FloatType => randomNumeric[Float]( rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue, - Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f)) + Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f, -0.0f)) case ByteType => randomNumeric[Byte]( rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte)) case IntegerType => randomNumeric[Int]( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala index d92eb01b69bf0..c8e3b0e157319 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala @@ -68,16 +68,27 @@ object LiteralGenerator { lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) + // The floatLiteralGen and doubleLiteralGen will 50% of the time yield arbitrary values + // and 50% of the time will yield some special values that are more likely to reveal + // corner cases. This behavior is similar to the integral value generators. lazy val floatLiteralGen: Gen[Literal] = for { - f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, - Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) + f <- Gen.oneOf( + Gen.oneOf( + Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue, + Float.MaxValue, -Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f), + Arbitrary.arbFloat.arbitrary + ) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { - f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, - Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) + f <- Gen.oneOf( + Gen.oneOf( + Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue, + Double.MaxValue, -Double.MaxValue, 0.0, -0.0, 1.0, -1.0), + Arbitrary.arbDouble.arbitrary + ) } yield Literal.create(f, DoubleType) // TODO cache the generated data From 2e3aa2f0232a539346da3df8a20cd8e7c2b7dd4f Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Wed, 16 Sep 2020 06:05:35 +0000 Subject: [PATCH 0038/1009] [SPARK-32861][SQL] GenerateExec should require column ordering ### What changes were proposed in this pull request? This PR updates the `RemoveRedundantProjects` rule to make `GenerateExec` require column ordering. ### Why are the changes needed? `GenerateExec` was originally considered as a node that does not require column ordering. However, `GenerateExec` binds its input rows directly with its `requiredChildOutput` without using the child's output schema. In `doExecute()`: ```scala val proj = UnsafeProjection.create(output, output) ``` In `doConsume()`: ```scala val values = if (requiredChildOutput.nonEmpty) { input } else { Seq.empty } ``` In this case, changing input column ordering will result in `GenerateExec` binding the wrong schema to the input columns. For example, if we do not require child columns to be ordered, the `requiredChildOutput` [a, b, c] will directly bind to the schema of the input columns [c, b, a], which is incorrect: ``` GenerateExec explode(array(a, b, c)), [a, b, c], false, [d] HashAggregate(keys=[a, b, c], functions=[], output=[c, b, a]) ... ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #29734 from allisonwang-db/generator. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- .../execution/RemoveRedundantProjects.scala | 4 +- .../RemoveRedundantProjectsSuite.scala | 54 ++++++++++++++++--- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala index ecb4ad0f6e8dd..2bcf86edbea37 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala @@ -62,7 +62,9 @@ case class RemoveRedundantProjects(conf: SQLConf) extends Rule[SparkPlan] { val keepOrdering = a.aggregateExpressions .exists(ae => ae.mode.equals(Final) || ae.mode.equals(PartialMerge)) a.mapChildren(removeProject(_, keepOrdering)) - case g: GenerateExec => g.mapChildren(removeProject(_, false)) + // GenerateExec requires column ordering since it binds input rows directly with its + // requiredChildOutput without using child's output schema. + case g: GenerateExec => g.mapChildren(removeProject(_, true)) // JoinExec ordering requirement will inherit from its parent. If there is no ProjectExec in // its ancestors, JoinExec should require output columns to be ordered. case o => o.mapChildren(removeProject(_, requireOrdering)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala index bc24436c5806a..930935f077665 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala @@ -18,17 +18,21 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils -class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession with SQLTestUtils { +abstract class RemoveRedundantProjectsSuiteBase + extends QueryTest + with SharedSparkSession + with AdaptiveSparkPlanHelper { private def assertProjectExecCount(df: DataFrame, expected: Int): Unit = { withClue(df.queryExecution) { val plan = df.queryExecution.executedPlan - val actual = plan.collectWithSubqueries { case p: ProjectExec => p }.size + val actual = collectWithSubqueries(plan) { case p: ProjectExec => p }.size assert(actual == expected) } } @@ -115,9 +119,41 @@ class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession wit assertProjectExec(query, 1, 2) } - test("generate") { - val query = "select a, key, explode(d) from testView where a > 10" - assertProjectExec(query, 0, 1) + test("generate should require column ordering") { + withTempView("testData") { + spark.range(0, 10, 1) + .selectExpr("id as key", "id * 2 as a", "id * 3 as b") + .createOrReplaceTempView("testData") + + val data = sql("select key, a, b, count(*) from testData group by key, a, b limit 2") + val df = data.selectExpr("a", "b", "key", "explode(array(key, a, b)) as d").filter("d > 0") + df.collect() + val plan = df.queryExecution.executedPlan + val numProjects = collectWithSubqueries(plan) { case p: ProjectExec => p }.length + + // Create a new plan that reverse the GenerateExec output and add a new ProjectExec between + // GenerateExec and its child. This is to test if the ProjectExec is removed, the output of + // the query will be incorrect. + val newPlan = stripAQEPlan(plan) transform { + case g @ GenerateExec(_, requiredChildOutput, _, _, child) => + g.copy(requiredChildOutput = requiredChildOutput.reverse, + child = ProjectExec(requiredChildOutput.reverse, child)) + } + + // Re-apply remove redundant project rule. + val rule = RemoveRedundantProjects(spark.sessionState.conf) + val newExecutedPlan = rule.apply(newPlan) + // The manually added ProjectExec node shouldn't be removed. + assert(collectWithSubqueries(newExecutedPlan) { + case p: ProjectExec => p + }.size == numProjects + 1) + + // Check the original plan's output and the new plan's output are the same. + val expectedRows = plan.executeCollect() + val actualRows = newExecutedPlan.executeCollect() + assert(expectedRows.length == actualRows.length) + expectedRows.zip(actualRows).foreach { case (expected, actual) => assert(expected == actual) } + } } test("subquery") { @@ -131,3 +167,9 @@ class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession wit } } } + +class RemoveRedundantProjectsSuite extends RemoveRedundantProjectsSuiteBase + with DisableAdaptiveExecutionSuite + +class RemoveRedundantProjectsSuiteAE extends RemoveRedundantProjectsSuiteBase + with EnableAdaptiveExecutionSuite From 550c1c9cfb5e6439cdd835388fe90a9ca1ebc695 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 16 Sep 2020 20:16:15 +0900 Subject: [PATCH 0039/1009] [SPARK-32888][DOCS] Add user document about header flag and RDD as path for reading CSV ### What changes were proposed in this pull request? This proposes to enhance user document of the API for loading a Dataset of strings storing CSV rows. If the header option is set to true, the API will remove all lines same with the header. ### Why are the changes needed? This behavior can confuse users. We should explicitly document it. ### Does this PR introduce _any_ user-facing change? No. Only doc change. ### How was this patch tested? Only doc change. Closes #29765 from viirya/SPARK-32888. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- python/pyspark/sql/readwriter.py | 3 +++ .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 3 +++ 2 files changed, 6 insertions(+) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 982ab38f73654..ae715eea70b6d 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -430,6 +430,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non character. By default (None), it is disabled. :param header: uses the first line as names of columns. If None is set, it uses the default value, ``false``. + .. note:: if the given path is a RDD of Strings, this header + option will remove all lines same with the header if exists. + :param inferSchema: infers the input schema automatically from data. It requires one extra pass over the data. If None is set, it uses the default value, ``false``. :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b0d06e862ca7b..bd986d0138256 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -600,6 +600,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * If the enforceSchema is set to `false`, only the CSV header in the first line is checked * to conform specified or inferred schema. * + * @note if `header` option is set to `true` when calling this API, all lines same with + * the header will be removed if exists. + * * @param csvDataset input Dataset with one CSV row per record * @since 2.2.0 */ From e88429058723572b95502fd369f7c2c609c561e6 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 16 Sep 2020 20:18:36 +0900 Subject: [PATCH 0040/1009] [SPARK-32835][PYTHON] Add withField method to the pyspark Column class ### What changes were proposed in this pull request? This PR adds a `withField` method on the pyspark Column class to call the Scala API method added in https://github.com/apache/spark/pull/27066. ### Why are the changes needed? To update the Python API to match a new feature in the Scala API. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test Closes #29699 from Kimahriman/feature/pyspark-with-field. Authored-by: Adam Binford Signed-off-by: HyukjinKwon --- python/pyspark/sql/column.py | 29 +++++++++++++++++++++++++ python/pyspark/sql/tests/test_column.py | 16 ++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 8c08d5cfa692b..0e073d2a5da28 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -329,6 +329,35 @@ def getField(self, name): DeprecationWarning) return self[name] + @since(3.1) + def withField(self, fieldName, col): + """ + An expression that adds/replaces a field in :class:`StructType` by name. + + >>> from pyspark.sql import Row + >>> from pyspark.sql.functions import lit + >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))]) + >>> df.withColumn('a', df['a'].withField('b', lit(3))).select('a.b').show() + +---+ + | b| + +---+ + | 3| + +---+ + >>> df.withColumn('a', df['a'].withField('d', lit(4))).select('a.d').show() + +---+ + | d| + +---+ + | 4| + +---+ + """ + if not isinstance(fieldName, str): + raise TypeError("fieldName should be a string") + + if not isinstance(col, Column): + raise TypeError("col should be a Column") + + return Column(self._jc.withField(fieldName, col._jc)) + def __getattr__(self, item): if item.startswith("__"): raise AttributeError(item) diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 5e05a8b63b259..8a89e6e9d5599 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -139,6 +139,22 @@ def test_bitwise_operations(self): result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict() self.assertEqual(~75, result['~b']) + def test_with_field(self): + from pyspark.sql.functions import lit, col + df = self.spark.createDataFrame([Row(a=Row(b=1, c=2))]) + self.assertIsInstance(df['a'].withField('b', lit(3)), Column) + self.assertIsInstance(df['a'].withField('d', lit(3)), Column) + result = df.withColumn('a', df['a'].withField('d', lit(3))).collect()[0].asDict() + self.assertEqual(3, result['a']['d']) + result = df.withColumn('a', df['a'].withField('b', lit(3))).collect()[0].asDict() + self.assertEqual(3, result['a']['b']) + + self.assertRaisesRegex(TypeError, + 'col should be a Column', + lambda: df['a'].withField('b', 3)) + self.assertRaisesRegex(TypeError, + 'fieldName should be a string', + lambda: df['a'].withField(col('b'), lit(3))) if __name__ == "__main__": import unittest From c918909c1a173505e9150f01ac7882fc621cd769 Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 16 Sep 2020 20:22:11 +0900 Subject: [PATCH 0041/1009] [SPARK-32814][PYTHON] Replace __metaclass__ field with metaclass keyword ### What changes were proposed in this pull request? Replace `__metaclass__` fields with `metaclass` keyword in the class statements. ### Why are the changes needed? `__metaclass__` is no longer supported in Python 3. This means, for example, that types are no longer handled as singletons. ``` >>> from pyspark.sql.types import BooleanType >>> BooleanType() is BooleanType() False ``` and classes, which suppose to be abstract, are not ``` >>> import inspect >>> from pyspark.ml import Estimator >>> inspect.isabstract(Estimator) False ``` ### Does this PR introduce _any_ user-facing change? Yes (classes which were no longer abstract or singleton in Python 3, are now), though visible changes should be consider a bug-fix. ### How was this patch tested? Existing tests. Closes #29664 from zero323/SPARK-32138-FOLLOW-UP-METACLASS. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/ml/base.py | 23 ++++++--------- python/pyspark/ml/classification.py | 28 +++++++------------ python/pyspark/ml/evaluation.py | 9 ++---- python/pyspark/ml/param/__init__.py | 4 +-- python/pyspark/ml/regression.py | 20 ++++++-------- python/pyspark/ml/wrapper.py | 21 ++++---------- python/pyspark/sql/types.py | 43 +++++++++++------------------ 7 files changed, 52 insertions(+), 96 deletions(-) diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py index fabfc3253e6f0..f1ae123250321 100644 --- a/python/pyspark/ml/base.py +++ b/python/pyspark/ml/base.py @@ -68,14 +68,13 @@ def next(self): @inherit_doc -class Estimator(Params): +class Estimator(Params, metaclass=ABCMeta): """ Abstract class for estimators that fit models to data. .. versionadded:: 1.3.0 """ - - __metaclass__ = ABCMeta + pass @abstractmethod def _fit(self, dataset): @@ -134,14 +133,13 @@ def fit(self, dataset, params=None): @inherit_doc -class Transformer(Params): +class Transformer(Params, metaclass=ABCMeta): """ Abstract class for transformers that transform one dataset into another. .. versionadded:: 1.3.0 """ - - __metaclass__ = ABCMeta + pass @abstractmethod def _transform(self, dataset): @@ -174,14 +172,13 @@ def transform(self, dataset, params=None): @inherit_doc -class Model(Transformer): +class Model(Transformer, metaclass=ABCMeta): """ Abstract class for models that are fitted by estimators. .. versionadded:: 1.4.0 """ - - __metaclass__ = ABCMeta + pass @inherit_doc @@ -258,13 +255,11 @@ class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol): @inherit_doc -class Predictor(Estimator, _PredictorParams): +class Predictor(Estimator, _PredictorParams, metaclass=ABCMeta): """ Estimator for prediction tasks (regression and classification). """ - __metaclass__ = ABCMeta - @since("3.0.0") def setLabelCol(self, value): """ @@ -288,13 +283,11 @@ def setPredictionCol(self, value): @inherit_doc -class PredictionModel(Model, _PredictorParams): +class PredictionModel(Model, _PredictorParams, metaclass=ABCMeta): """ Model for prediction tasks (regression and classification). """ - __metaclass__ = ABCMeta - @since("3.0.0") def setFeaturesCol(self, value): """ diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 6df425211242f..b5261b30d89e4 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -73,14 +73,12 @@ class _ClassifierParams(HasRawPredictionCol, _PredictorParams): @inherit_doc -class Classifier(Predictor, _ClassifierParams): +class Classifier(Predictor, _ClassifierParams, metaclass=ABCMeta): """ Classifier for classification tasks. Classes are indexed {0, 1, ..., numClasses - 1}. """ - __metaclass__ = ABCMeta - @since("3.0.0") def setRawPredictionCol(self, value): """ @@ -90,14 +88,12 @@ def setRawPredictionCol(self, value): @inherit_doc -class ClassificationModel(PredictionModel, _ClassifierParams): +class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=ABCMeta): """ Model produced by a ``Classifier``. Classes are indexed {0, 1, ..., numClasses - 1}. """ - __metaclass__ = ABCMeta - @since("3.0.0") def setRawPredictionCol(self, value): """ @@ -133,13 +129,12 @@ class _ProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, _Classifi @inherit_doc -class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams): +class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams, + metaclass=ABCMeta): """ Probabilistic Classifier for classification tasks. """ - __metaclass__ = ABCMeta - @since("3.0.0") def setProbabilityCol(self, value): """ @@ -157,13 +152,12 @@ def setThresholds(self, value): @inherit_doc class ProbabilisticClassificationModel(ClassificationModel, - _ProbabilisticClassifierParams): + _ProbabilisticClassifierParams, + metaclass=ABCMeta): """ Model produced by a ``ProbabilisticClassifier``. """ - __metaclass__ = ABCMeta - @since("3.0.0") def setProbabilityCol(self, value): """ @@ -188,14 +182,12 @@ def predictProbability(self, value): @inherit_doc -class _JavaClassifier(Classifier, JavaPredictor): +class _JavaClassifier(Classifier, JavaPredictor, metaclass=ABCMeta): """ Java Classifier for classification tasks. Classes are indexed {0, 1, ..., numClasses - 1}. """ - __metaclass__ = ABCMeta - @since("3.0.0") def setRawPredictionCol(self, value): """ @@ -229,12 +221,12 @@ def predictRaw(self, value): @inherit_doc -class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier): +class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier, + metaclass=ABCMeta): """ Java Probabilistic Classifier for classification tasks. """ - - __metaclass__ = ABCMeta + pass @inherit_doc diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index a69a57f588571..354921e9e04b1 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -32,14 +32,13 @@ @inherit_doc -class Evaluator(Params): +class Evaluator(Params, metaclass=ABCMeta): """ Base class for evaluators that compute metrics from predictions. .. versionadded:: 1.4.0 """ - - __metaclass__ = ABCMeta + pass @abstractmethod def _evaluate(self, dataset): @@ -84,14 +83,12 @@ def isLargerBetter(self): @inherit_doc -class JavaEvaluator(JavaParams, Evaluator): +class JavaEvaluator(JavaParams, Evaluator, metaclass=ABCMeta): """ Base class for :py:class:`Evaluator`s that wrap Java/Scala implementations. """ - __metaclass__ = ABCMeta - def _evaluate(self, dataset): """ Evaluates the output. diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 95f3c32b8bcec..1853a8816ff58 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -223,7 +223,7 @@ def toBoolean(value): raise TypeError("Boolean Param requires value of type bool. Found %s." % type(value)) -class Params(Identifiable): +class Params(Identifiable, metaclass=ABCMeta): """ Components that take parameters. This also provides an internal param map to store parameter values attached to the instance. @@ -231,8 +231,6 @@ class Params(Identifiable): .. versionadded:: 1.3.0 """ - __metaclass__ = ABCMeta - def __init__(self): super(Params, self).__init__() #: internal param map for user-supplied values param map diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 6bd32ed1d636d..e1b7ffb63f8fe 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -48,45 +48,41 @@ 'FMRegressor', 'FMRegressionModel'] -class Regressor(Predictor, _PredictorParams): +class Regressor(Predictor, _PredictorParams, metaclass=ABCMeta): """ Regressor for regression tasks. .. versionadded:: 3.0.0 """ + pass - __metaclass__ = ABCMeta - -class RegressionModel(PredictionModel, _PredictorParams): +class RegressionModel(PredictionModel, _PredictorParams, metaclass=ABCMeta): """ Model produced by a ``Regressor``. .. versionadded:: 3.0.0 """ - - __metaclass__ = ABCMeta + pass -class _JavaRegressor(Regressor, JavaPredictor): +class _JavaRegressor(Regressor, JavaPredictor, metaclass=ABCMeta): """ Java Regressor for regression tasks. .. versionadded:: 3.0.0 """ + pass - __metaclass__ = ABCMeta - -class _JavaRegressionModel(RegressionModel, JavaPredictionModel): +class _JavaRegressionModel(RegressionModel, JavaPredictionModel, metaclass=ABCMeta): """ Java Model produced by a ``_JavaRegressor``. To be mixed in with :class:`pyspark.ml.JavaModel` .. versionadded:: 3.0.0 """ - - __metaclass__ = ABCMeta + pass class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam, HasMaxIter, diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index c1d060a51cf9d..da52788afea72 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -109,7 +109,7 @@ def _new_java_array(pylist, java_class): @inherit_doc -class JavaParams(JavaWrapper, Params): +class JavaParams(JavaWrapper, Params, metaclass=ABCMeta): """ Utility class to help create wrapper classes from Java/Scala implementations of pipeline components. @@ -117,8 +117,6 @@ class JavaParams(JavaWrapper, Params): #: The param values in the Java object should be #: synced with the Python wrapper in fit/transform/evaluate/copy. - __metaclass__ = ABCMeta - def _make_java_param_pair(self, param, value): """ Makes a Java param pair. @@ -287,14 +285,12 @@ def clear(self, param): @inherit_doc -class JavaEstimator(JavaParams, Estimator): +class JavaEstimator(JavaParams, Estimator, metaclass=ABCMeta): """ Base class for :py:class:`Estimator`s that wrap Java/Scala implementations. """ - __metaclass__ = ABCMeta - @abstractmethod def _create_model(self, java_model): """ @@ -321,30 +317,26 @@ def _fit(self, dataset): @inherit_doc -class JavaTransformer(JavaParams, Transformer): +class JavaTransformer(JavaParams, Transformer, metaclass=ABCMeta): """ Base class for :py:class:`Transformer`s that wrap Java/Scala implementations. Subclasses should ensure they have the transformer Java object available as _java_obj. """ - __metaclass__ = ABCMeta - def _transform(self, dataset): self._transfer_params_to_java() return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx) @inherit_doc -class JavaModel(JavaTransformer, Model): +class JavaModel(JavaTransformer, Model, metaclass=ABCMeta): """ Base class for :py:class:`Model`s that wrap Java/Scala implementations. Subclasses should inherit this class before param mix-ins, because this sets the UID from the Java model. """ - __metaclass__ = ABCMeta - def __init__(self, java_model=None): """ Initialize this instance with a Java model object. @@ -374,12 +366,11 @@ def __repr__(self): @inherit_doc -class JavaPredictor(Predictor, JavaEstimator, _PredictorParams): +class JavaPredictor(Predictor, JavaEstimator, _PredictorParams, metaclass=ABCMeta): """ (Private) Java Estimator for prediction tasks (regression and classification). """ - - __metaclass__ = ABCMeta + pass @inherit_doc diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 43f3a8531871a..5a89d5ab9a7e5 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -102,13 +102,12 @@ def __call__(cls): return cls._instances[cls] -class NullType(DataType): +class NullType(DataType, metaclass=DataTypeSingleton): """Null type. The data type representing None, used for the types that cannot be inferred. """ - - __metaclass__ = DataTypeSingleton + pass class AtomicType(DataType): @@ -121,11 +120,10 @@ class NumericType(AtomicType): """ -class IntegralType(NumericType): +class IntegralType(NumericType, metaclass=DataTypeSingleton): """Integral data types. """ - - __metaclass__ = DataTypeSingleton + pass class FractionalType(NumericType): @@ -133,33 +131,28 @@ class FractionalType(NumericType): """ -class StringType(AtomicType): +class StringType(AtomicType, metaclass=DataTypeSingleton): """String data type. """ - - __metaclass__ = DataTypeSingleton + pass -class BinaryType(AtomicType): +class BinaryType(AtomicType, metaclass=DataTypeSingleton): """Binary (byte array) data type. """ + pass - __metaclass__ = DataTypeSingleton - -class BooleanType(AtomicType): +class BooleanType(AtomicType, metaclass=DataTypeSingleton): """Boolean data type. """ - - __metaclass__ = DataTypeSingleton + pass -class DateType(AtomicType): +class DateType(AtomicType, metaclass=DataTypeSingleton): """Date (datetime.date) data type. """ - __metaclass__ = DataTypeSingleton - EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal() def needConversion(self): @@ -174,12 +167,10 @@ def fromInternal(self, v): return datetime.date.fromordinal(v + self.EPOCH_ORDINAL) -class TimestampType(AtomicType): +class TimestampType(AtomicType, metaclass=DataTypeSingleton): """Timestamp (datetime.datetime) data type. """ - __metaclass__ = DataTypeSingleton - def needConversion(self): return True @@ -226,18 +217,16 @@ def __repr__(self): return "DecimalType(%d,%d)" % (self.precision, self.scale) -class DoubleType(FractionalType): +class DoubleType(FractionalType, metaclass=DataTypeSingleton): """Double data type, representing double precision floats. """ + pass - __metaclass__ = DataTypeSingleton - -class FloatType(FractionalType): +class FloatType(FractionalType, metaclass=DataTypeSingleton): """Float data type, representing single precision floats. """ - - __metaclass__ = DataTypeSingleton + pass class ByteType(IntegralType): From 3bc13e641257182dde097d759555698701a2fcc3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 16 Sep 2020 14:08:59 +0000 Subject: [PATCH 0042/1009] [SPARK-32706][SQL] Improve cast string to decimal type ### What changes were proposed in this pull request? This pr makes cast string type to decimal decimal type fast fail if precision larger that 38. ### Why are the changes needed? It is very slow if precision very large. Benchmark and benchmark result: ```scala import org.apache.spark.benchmark.Benchmark val bd1 = new java.math.BigDecimal("6.0790316E+25569151") val bd2 = new java.math.BigDecimal("6.0790316E+25"); val benchmark = new Benchmark("Benchmark string to decimal", 1, minNumIters = 2) benchmark.addCase(bd1.toString) { _ => println(Decimal(bd1).precision) } benchmark.addCase(bd2.toString) { _ => println(Decimal(bd2).precision) } benchmark.run() ``` ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.15.6 Intel(R) Core(TM) i9-9980HK CPU 2.40GHz Benchmark string to decimal: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ 6.0790316E+25569151 9340 9381 57 0.0 9340094625.0 1.0X 6.0790316E+25 0 0 0 0.5 2150.0 4344230.1X ``` Stacktrace: ![image](https://user-images.githubusercontent.com/5399861/92941705-4c868980-f483-11ea-8a15-b93acde8c0f4.png) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test and benchmark test: Dataset | Before this pr (Seconds) | After this pr (Seconds) -- | -- | -- https://issues.apache.org/jira/secure/attachment/13011406/part-00000.parquet | 2640 | 2 Closes #29731 from wangyum/SPARK-32706. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/expressions/Cast.scala | 41 ++++++-------- .../org/apache/spark/sql/types/Decimal.scala | 52 +++++++++++++++-- .../sql/catalyst/expressions/CastSuite.scala | 56 +++++++++++++++++++ .../apache/spark/sql/types/DecimalSuite.scala | 30 ++++++++++ 4 files changed, 152 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index caa8ceea0ab91..96154917e1637 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -669,19 +669,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, s => try { - // According the benchmark test, `s.toString.trim` is much faster than `s.trim.toString`. - // Please refer to https://github.com/apache/spark/pull/26640 - changePrecision(Decimal(new JavaBigDecimal(s.toString.trim)), target) - } catch { - case _: NumberFormatException => - if (ansiEnabled) { - throw new NumberFormatException(s"invalid input syntax for type numeric: $s") - } else { - null - } + case StringType if !ansiEnabled => + buildCast[UTF8String](_, s => { + val d = Decimal.fromString(s) + if (d == null) null else changePrecision(d, target) }) + case StringType if ansiEnabled => + buildCast[UTF8String](_, s => changePrecision(Decimal.fromStringANSI(s), target)) case BooleanType => buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target)) case DateType => @@ -1185,20 +1179,21 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val tmp = ctx.freshVariable("tmpDecimal", classOf[Decimal]) val canNullSafeCast = Cast.canNullSafeCastToDecimal(from, target) from match { - case StringType => + case StringType if !ansiEnabled => (c, evPrim, evNull) => - val handleException = if (ansiEnabled) { - s"""throw new NumberFormatException("invalid input syntax for type numeric: " + $c);""" - } else { - s"$evNull =true;" - } code""" - try { - Decimal $tmp = Decimal.apply(new java.math.BigDecimal($c.toString().trim())); + Decimal $tmp = Decimal.fromString($c); + if ($tmp == null) { + $evNull = true; + } else { + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + } + """ + case StringType if ansiEnabled => + (c, evPrim, evNull) => + code""" + Decimal $tmp = Decimal.fromStringANSI($c); ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} - } catch (java.lang.NumberFormatException e) { - $handleException - } """ case BooleanType => (c, evPrim, evNull) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 48ae49740f22d..6be6d81ec3bb7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -18,12 +18,13 @@ package org.apache.spark.sql.types import java.lang.{Long => JLong} -import java.math.{BigInteger, MathContext, RoundingMode} +import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, RoundingMode} import scala.util.Try import org.apache.spark.annotation.Unstable import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.unsafe.types.UTF8String /** * A mutable implementation of BigDecimal that can hold a Long if values are small enough. @@ -550,9 +551,6 @@ object Decimal { private[sql] val ZERO = Decimal(0) private[sql] val ONE = Decimal(1) - private val LONG_MAX_BIG_INT = BigInteger.valueOf(JLong.MAX_VALUE) - private val LONG_MIN_BIG_INT = BigInteger.valueOf(JLong.MIN_VALUE) - def apply(value: Double): Decimal = new Decimal().set(value) def apply(value: Long): Decimal = new Decimal().set(value) @@ -589,6 +587,52 @@ object Decimal { } } + private def calculatePrecision(bigDecimal: JavaBigDecimal): Int = { + if (bigDecimal.scale < 0) { + bigDecimal.precision - bigDecimal.scale + } else { + bigDecimal.precision + } + } + + private def stringToJavaBigDecimal(str: UTF8String): JavaBigDecimal = { + // According the benchmark test, `s.toString.trim` is much faster than `s.trim.toString`. + // Please refer to https://github.com/apache/spark/pull/26640 + new JavaBigDecimal(str.toString.trim) + } + + def fromString(str: UTF8String): Decimal = { + try { + val bigDecimal = stringToJavaBigDecimal(str) + // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow. + // For example: Decimal("6.0790316E+25569151") + if (calculatePrecision(bigDecimal) > DecimalType.MAX_PRECISION) { + null + } else { + Decimal(bigDecimal) + } + } catch { + case _: NumberFormatException => + null + } + } + + def fromStringANSI(str: UTF8String): Decimal = { + try { + val bigDecimal = stringToJavaBigDecimal(str) + // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow. + // For example: Decimal("6.0790316E+25569151") + if (calculatePrecision(bigDecimal) > DecimalType.MAX_PRECISION) { + throw new ArithmeticException(s"out of decimal type range: $str") + } else { + Decimal(bigDecimal) + } + } catch { + case _: NumberFormatException => + throw new NumberFormatException(s"invalid input syntax for type numeric: $str") + } + } + /** * Creates a decimal from unscaled, precision and scale without checking the bounds. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 2d202ff0e7954..7caa4a55c06af 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1349,6 +1349,29 @@ class CastSuite extends CastSuiteBase { val v = Literal.create(Row(1), new ExampleSubTypeUDT()) checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1)) } + + test("Fast fail for cast string type to decimal type") { + checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)), + Decimal("12345678901234567890123456789012345678")) + checkEvaluation(cast("123456789012345678901234567890123456789", DecimalType(38, 0)), null) + checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 1)), null) + + checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)), + Decimal("0")) + checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)), + Decimal("0")) + checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)), + Decimal("0E-18")) + checkEvaluation(cast("6E-120", DecimalType(38, 0)), + Decimal("0")) + + checkEvaluation(cast("6E+37", DecimalType(38, 0)), + Decimal("60000000000000000000000000000000000000")) + checkEvaluation(cast("6E+38", DecimalType(38, 0)), null) + checkEvaluation(cast("6E+37", DecimalType(38, 1)), null) + + checkEvaluation(cast("abcd", DecimalType(38, 1)), null) + } } /** @@ -1405,4 +1428,37 @@ class AnsiCastSuite extends CastSuiteBase { checkEvaluation(cast(negativeTs, LongType), expectedSecs) } } + + test("Fast fail for cast string type to decimal type in ansi mode") { + checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)), + Decimal("12345678901234567890123456789012345678")) + checkExceptionInExpression[ArithmeticException]( + cast("123456789012345678901234567890123456789", DecimalType(38, 0)), + "out of decimal type range") + checkExceptionInExpression[ArithmeticException]( + cast("12345678901234567890123456789012345678", DecimalType(38, 1)), + "cannot be represented as Decimal(38, 1)") + + checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)), + Decimal("0")) + checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)), + Decimal("0")) + checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)), + Decimal("0E-18")) + checkEvaluation(cast("6E-120", DecimalType(38, 0)), + Decimal("0")) + + checkEvaluation(cast("6E+37", DecimalType(38, 0)), + Decimal("60000000000000000000000000000000000000")) + checkExceptionInExpression[ArithmeticException]( + cast("6E+38", DecimalType(38, 0)), + "out of decimal type range") + checkExceptionInExpression[ArithmeticException]( + cast("6E+37", DecimalType(38, 1)), + "cannot be represented as Decimal(38, 1)") + + checkExceptionInExpression[NumberFormatException]( + cast("abcd", DecimalType(38, 1)), + "invalid input syntax for type numeric") + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala index 7d0346fc0145e..7ce451ed6d577 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.Decimal._ +import org.apache.spark.unsafe.types.UTF8String class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper { /** Check that a Decimal has the given string representation, precision and scale */ @@ -256,4 +257,33 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper assert(decimalLong.toScalaBigInt == scala.math.BigInt("123456789")) assert(decimalLong.toJavaBigInteger == new java.math.BigInteger("123456789")) } + + test("UTF8String to Decimal") { + def checkFromString(string: String): Unit = { + assert(Decimal.fromString(UTF8String.fromString(string)) === Decimal(string)) + assert(Decimal.fromStringANSI(UTF8String.fromString(string)) === Decimal(string)) + } + + def checkOutOfRangeFromString(string: String): Unit = { + assert(Decimal.fromString(UTF8String.fromString(string)) === null) + val e = intercept[ArithmeticException](Decimal.fromStringANSI(UTF8String.fromString(string))) + assert(e.getMessage.contains("out of decimal type range")) + } + + checkFromString("12345678901234567890123456789012345678") + checkOutOfRangeFromString("123456789012345678901234567890123456789") + + checkFromString("0.00000000000000000000000000000000000001") + checkFromString("0.000000000000000000000000000000000000000000000001") + + checkFromString("6E-640") + + checkFromString("6E+37") + checkOutOfRangeFromString("6E+38") + checkOutOfRangeFromString("6.0790316E+25569151") + + assert(Decimal.fromString(UTF8String.fromString("str")) === null) + val e = intercept[NumberFormatException](Decimal.fromStringANSI(UTF8String.fromString("str"))) + assert(e.getMessage.contains("invalid input syntax for type numeric")) + } } From 355ab6ae94a972011d56b8449c612fd7ad30d860 Mon Sep 17 00:00:00 2001 From: KevinSmile Date: Wed, 16 Sep 2020 23:39:41 +0900 Subject: [PATCH 0043/1009] [SPARK-32804][LAUNCHER][FOLLOWUP] Fix SparkSubmitCommandBuilderSuite test failure without jars ### What changes were proposed in this pull request? It's a followup of https://github.com/apache/spark/pull/29653. Tests in `SparkSubmitCommandBuilderSuite` may fail if you didn't build first and have jars before test, so if `isTesting` we should set a dummy `SparkLauncher.NO_RESOURCE`. ### Why are the changes needed? Fix tests failure. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? mvn clean test (test without jars built first). Closes #29769 from KevinSmile/bug-fix-master. Authored-by: KevinSmile Signed-off-by: HyukjinKwon --- .../spark/launcher/SparkSubmitCommandBuilder.java | 13 +++++++++---- .../launcher/SparkSubmitCommandBuilderSuite.java | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 43e7f8debe17d..d6ed1e3a3532d 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -404,12 +404,17 @@ private boolean isThriftServer(String mainClass) { } private String findExamplesAppJar() { - for (String exampleJar : findExamplesJars()) { - if (new File(exampleJar).getName().startsWith("spark-examples")) { - return exampleJar; + boolean isTesting = "1".equals(getenv("SPARK_TESTING")); + if (isTesting) { + return SparkLauncher.NO_RESOURCE; + } else { + for (String exampleJar : findExamplesJars()) { + if (new File(exampleJar).getName().startsWith("spark-examples")) { + return exampleJar; + } } + throw new IllegalStateException("Failed to find examples' main app jar."); } - throw new IllegalStateException("Failed to find examples' main app jar."); } private List findExamplesJars() { diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 6cd089e256b93..07a9dae1256ab 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -259,8 +259,8 @@ public void testExamplesRunnerPrimaryResource() throws Exception { findArgValue(cmd, parser.CLASS)); assertEquals("cluster", findArgValue(cmd, parser.DEPLOY_MODE)); String primaryResource = cmd.get(cmd.size() - 2); - assertTrue(new File(primaryResource).getName().startsWith("spark-examples")); - assertFalse(cmd.contains(SparkLauncher.NO_RESOURCE)); + assertTrue(primaryResource.equals(SparkLauncher.NO_RESOURCE) + || new File(primaryResource).getName().startsWith("spark-examples")); } @Test(expected = IllegalArgumentException.class) From 56ae95053df4afa9764df3f1d88f300896ca0183 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 16 Sep 2020 15:00:31 +0000 Subject: [PATCH 0044/1009] [SPARK-32850][CORE] Simplify the RPC message flow of decommission ### What changes were proposed in this pull request? This PR cleans up the RPC message flow among the multiple decommission use cases, it includes changes: * Keep `Worker`'s decommission status be consistent between the case where decommission starts from `Worker` and the case where decommission starts from the `MasterWebUI`: sending `DecommissionWorker` from `Master` to `Worker` in the latter case. * Change from two-way communication to one-way communication when notifying decommission between driver and executor: it's obviously unnecessary for the executor to acknowledge the decommission status to the driver since the decommission request is from the driver. And it's same in reverse. * Only send one message instead of two(`DecommissionSelf`/`DecommissionBlockManager`) when decommission the executor: executor and `BlockManager` are in the same JVM. * Clean up codes around here. ### Why are the changes needed? Before: WeChat56c00cc34d9785a67a544dca036d49da After: WeChat05f7afb017e3f0132394c5e54245e49e (Note the diagrams only counts those RPC calls that needed to go through the network. Local RPC calls are not counted here.) After this change, We reduced 6 original RPC calls and added one more RPC call for keeping the consistent decommission status for the Worker. And the RPC flow becomes more clear. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated existing tests. Closes #29722 from Ngone51/simplify-decommission-rpc. Authored-by: yi.wu Signed-off-by: Wenchen Fan --- .../spark/ExecutorAllocationClient.scala | 19 ++-- .../spark/ExecutorAllocationManager.scala | 5 +- .../apache/spark/deploy/DeployMessage.scala | 31 +++++-- .../apache/spark/deploy/master/Master.scala | 23 +++-- .../apache/spark/deploy/worker/Worker.scala | 28 +++--- .../CoarseGrainedExecutorBackend.scala | 60 ++++++------- .../cluster/CoarseGrainedClusterMessage.scala | 16 ++-- .../CoarseGrainedSchedulerBackend.scala | 86 +++++++------------ .../cluster/StandaloneSchedulerBackend.scala | 7 +- .../apache/spark/storage/BlockManager.scala | 6 +- .../storage/BlockManagerMasterEndpoint.scala | 18 +--- .../storage/BlockManagerStorageEndpoint.scala | 2 +- .../deploy/DecommissionWorkerSuite.scala | 4 +- .../spark/deploy/client/AppClientSuite.scala | 7 +- .../scheduler/WorkerDecommissionSuite.scala | 7 +- .../ExecutorAllocationManagerSuite.scala | 6 +- 16 files changed, 177 insertions(+), 148 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala index ce47f3fd32203..cdba1c44034c0 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala @@ -91,11 +91,13 @@ private[spark] trait ExecutorAllocationClient { * @param executorsAndDecomInfo identifiers of executors & decom info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. + * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ def decommissionExecutors( - executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean): Seq[String] = { + executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean): Seq[String] = { killExecutors(executorsAndDecomInfo.map(_._1), adjustTargetNumExecutors, countFailures = false) @@ -109,14 +111,21 @@ private[spark] trait ExecutorAllocationClient { * @param executorId identifiers of executor to decommission * @param decommissionInfo information about the decommission (reason, host loss) * @param adjustTargetNumExecutors if we should adjust the target number of executors. + * @param triggeredByExecutor whether the decommission is triggered at executor. + * (TODO: add a new type like `ExecutorDecommissionInfo` for the + * case where executor is decommissioned at executor first, so we + * don't need this extra parameter.) * @return whether the request is acknowledged by the cluster manager. */ - final def decommissionExecutor(executorId: String, + final def decommissionExecutor( + executorId: String, decommissionInfo: ExecutorDecommissionInfo, - adjustTargetNumExecutors: Boolean): Boolean = { + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean = false): Boolean = { val decommissionedExecutors = decommissionExecutors( Array((executorId, decommissionInfo)), - adjustTargetNumExecutors = adjustTargetNumExecutors) + adjustTargetNumExecutors = adjustTargetNumExecutors, + triggeredByExecutor = triggeredByExecutor) decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId) } diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index b6e14e8210c86..341334c8a29c4 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -581,7 +581,10 @@ private[spark] class ExecutorAllocationManager( if (decommissionEnabled) { val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map( id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray - client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false) + client.decommissionExecutors( + executorIdsWithoutHostLoss, + adjustTargetNumExecutors = false, + triggeredByExecutor = false) } else { client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false, countFailures = false, force = false) diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index 83f373d526e90..8bc909b096e71 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -61,13 +61,34 @@ private[deploy] object DeployMessages { } /** + * An internal message that used by Master itself, in order to handle the + * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously. + * @param ids A collection of Worker ids, which should be decommissioned. + */ + case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage + + /** + * A message that sent from Master to Worker to decommission the Worker. + * It's used for the case where decommission is triggered at MasterWebUI. + * + * Note that decommission a Worker will cause all the executors on that Worker + * to be decommissioned as well. + */ + object DecommissionWorker extends DeployMessage + + /** + * A message that sent to the Worker itself when it receives PWR signal, + * indicating the Worker starts to decommission. + */ + object WorkerSigPWRReceived extends DeployMessage + + /** + * A message sent from Worker to Master to tell Master that the Worker has started + * decommissioning. It's used for the case where decommission is triggered at Worker. + * * @param id the worker id - * @param worker the worker endpoint ref */ - case class WorkerDecommission( - id: String, - worker: RpcEndpointRef) - extends DeployMessage + case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage case class ExecutorStateChanged( appId: String, diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 48516cdf83291..15f8be69d97bd 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -245,15 +245,27 @@ private[deploy] class Master( logError("Leadership has been revoked -- master shutting down.") System.exit(0) - case WorkerDecommission(id, workerRef) => - logInfo("Recording worker %s decommissioning".format(id)) + case WorkerDecommissioning(id, workerRef) => if (state == RecoveryState.STANDBY) { workerRef.send(MasterInStandby) } else { // We use foreach since get gives us an option and we can skip the failures. - idToWorker.get(id).foreach(decommissionWorker) + idToWorker.get(id).foreach(w => decommissionWorker(w)) } + case DecommissionWorkers(ids) => + // The caller has already checked the state when handling DecommissionWorkersOnHosts, + // so it should not be the STANDBY + assert(state != RecoveryState.STANDBY) + ids.foreach ( id => + // We use foreach since get gives us an option and we can skip the failures. + idToWorker.get(id).foreach { w => + decommissionWorker(w) + // Also send a message to the worker node to notify. + w.endpoint.send(DecommissionWorker) + } + ) + case RegisterWorker( id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress, resources) => @@ -891,10 +903,7 @@ private[deploy] class Master( logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}") // The workers are removed async to avoid blocking the receive loop for the entire batch - workersToRemove.foreach(wi => { - logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}") - self.send(WorkerDecommission(wi.id, wi.endpoint)) - }) + self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq)) // Return the count of workers actually removed workersToRemove.size diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 7649bc37c30b6..2e8474e3e3fc2 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -70,7 +70,10 @@ private[deploy] class Worker( if (conf.get(config.DECOMMISSION_ENABLED)) { logInfo("Registering SIGPWR handler to trigger decommissioning.") SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling worker decommission feature.")(decommissionSelf) + "disabling worker decommission feature.") { + self.send(WorkerSigPWRReceived) + true + } } else { logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.") } @@ -137,7 +140,8 @@ private[deploy] class Worker( private var registered = false private var connected = false private var decommissioned = false - private val workerId = generateWorkerId() + // expose for test + private[spark] val workerId = generateWorkerId() private val sparkHome = if (sys.props.contains(IS_TESTING.key)) { assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!") @@ -668,8 +672,13 @@ private[deploy] class Worker( finishedApps += id maybeCleanupApplication(id) - case WorkerDecommission(_, _) => + case DecommissionWorker => + decommissionSelf() + + case WorkerSigPWRReceived => decommissionSelf() + // Tell master we starts decommissioning so it stops trying to launch executor/driver on us + sendToMaster(WorkerDecommissioning(workerId, self)) } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { @@ -768,16 +777,15 @@ private[deploy] class Worker( } } - private[deploy] def decommissionSelf(): Boolean = { - if (conf.get(config.DECOMMISSION_ENABLED)) { - logDebug("Decommissioning self") + private[deploy] def decommissionSelf(): Unit = { + if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) { decommissioned = true - sendToMaster(WorkerDecommission(workerId, self)) + logInfo(s"Decommission worker $workerId.") + } else if (decommissioned) { + logWarning(s"Worker $workerId already started decommissioning.") } else { - logWarning("Asked to decommission self, but decommissioning not enabled") + logWarning(s"Receive decommission request, but decommission feature is disabled.") } - // Return true since can be called as a signal handler - true } private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = { diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 48045bafe6e3f..d002f7b407e5e 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile import org.apache.spark.resource.ResourceProfile._ import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.rpc._ -import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription} +import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.serializer.SerializerInstance import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils} @@ -79,12 +79,17 @@ private[spark] class CoarseGrainedExecutorBackend( */ private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]] - @volatile private var decommissioned = false + private var decommissioned = false override def onStart(): Unit = { - logInfo("Registering PWR handler.") - SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling decommission feature.")(decommissionSelf) + if (env.conf.get(DECOMMISSION_ENABLED)) { + logInfo("Registering PWR handler to trigger decommissioning.") + SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + + "disabling executor decommission feature.") { + self.send(ExecutorSigPWRReceived) + true + } + } logInfo("Connecting to driver: " + driverUrl) try { @@ -166,17 +171,6 @@ private[spark] class CoarseGrainedExecutorBackend( if (executor == null) { exitExecutor(1, "Received LaunchTask command but executor was null") } else { - if (decommissioned) { - val msg = "Asked to launch a task while decommissioned." - logError(msg) - driver match { - case Some(endpoint) => - logInfo("Sending DecommissionExecutor to driver.") - endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg))) - case _ => - logError("No registered driver to send Decommission to.") - } - } val taskDesc = TaskDescription.decode(data.value) logInfo("Got assigned task " + taskDesc.taskId) taskResources(taskDesc.taskId) = taskDesc.resources @@ -213,9 +207,17 @@ private[spark] class CoarseGrainedExecutorBackend( logInfo(s"Received tokens of ${tokenBytes.length} bytes") SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf) - case DecommissionSelf => - logInfo("Received decommission self") + case DecommissionExecutor => decommissionSelf() + + case ExecutorSigPWRReceived => + decommissionSelf() + if (driver.nonEmpty) { + // Tell driver we starts decommissioning so it stops trying to schedule us + driver.get.askSync[Boolean](ExecutorDecommissioning(executorId)) + } else { + logError("No driver to message decommissioning.") + } } override def onDisconnected(remoteAddress: RpcAddress): Unit = { @@ -264,17 +266,20 @@ private[spark] class CoarseGrainedExecutorBackend( System.exit(code) } - private def decommissionSelf(): Boolean = { - val msg = "Decommissioning self w/sync" + private def decommissionSelf(): Unit = { + if (!env.conf.get(DECOMMISSION_ENABLED)) { + logWarning(s"Receive decommission request, but decommission feature is disabled.") + return + } else if (decommissioned) { + logWarning(s"Executor $executorId already started decommissioning.") + return + } + val msg = s"Decommission executor $executorId." logInfo(msg) try { decommissioned = true - // Tell master we are are decommissioned so it stops trying to schedule us - if (driver.nonEmpty) { - driver.get.askSync[Boolean](DecommissionExecutor( - executorId, ExecutorDecommissionInfo(msg))) - } else { - logError("No driver to message decommissioning.") + if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) { + env.blockManager.decommissionBlockManager() } if (executor != null) { executor.decommission() @@ -333,12 +338,9 @@ private[spark] class CoarseGrainedExecutorBackend( shutdownThread.start() logInfo("Will exit when finished decommissioning") - // Return true since we are handling a signal - true } catch { case e: Exception => logError("Unexpected error while decommissioning self", e) - false } } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 7242ab7786061..d1b0e798c51be 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -95,8 +95,17 @@ private[spark] object CoarseGrainedClusterMessages { case class RemoveExecutor(executorId: String, reason: ExecutorLossReason) extends CoarseGrainedClusterMessage - case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo) - extends CoarseGrainedClusterMessage + // A message that sent from executor to driver to tell driver that the executor has started + // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S) + case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage + + // A message that sent from driver to executor to decommission that executor. + // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker. + object DecommissionExecutor extends CoarseGrainedClusterMessage + + // A message that sent to the executor itself when it receives PWR signal, + // indicating the executor starts to decommission. + object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage case class RemoveWorker(workerId: String, host: String, message: String) extends CoarseGrainedClusterMessage @@ -136,7 +145,4 @@ private[spark] object CoarseGrainedClusterMessages { // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not. case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage - - // Used to ask an executor to decommission itself. (Can be an internal message) - case object DecommissionSelf extends CoarseGrainedClusterMessage } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 0f144125af7bf..f6930da96a390 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -191,10 +191,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor)) removeExecutor(executorId, reason) - case DecommissionExecutor(executorId, decommissionInfo) => - logError(s"Received decommission executor message ${executorId}: $decommissionInfo") - decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false) - case RemoveWorker(workerId, host, message) => removeWorker(workerId, host, message) @@ -272,10 +268,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp removeWorker(workerId, host, message) context.reply(true) - case DecommissionExecutor(executorId, decommissionInfo) => - logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.") - context.reply(decommissionExecutor(executorId, decommissionInfo, - adjustTargetNumExecutors = false)) + case ExecutorDecommissioning(executorId) => + logWarning(s"Received executor $executorId decommissioned message") + context.reply( + decommissionExecutor( + executorId, + ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."), + adjustTargetNumExecutors = false, + triggeredByExecutor = true)) case RetrieveSparkAppConfig(resourceProfileId) => val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId) @@ -463,71 +463,47 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp * @param executorsAndDecomInfo Identifiers of executors & decommission info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. + * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ override def decommissionExecutors( executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean): Seq[String] = { - - val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) => - CoarseGrainedSchedulerBackend.this.synchronized { - // Only bother decommissioning executors which are alive. - if (isExecutorActive(executorId)) { - executorsPendingDecommission(executorId) = decomInfo.workerHost - true - } else { - false - } + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean): Seq[String] = withLock { + val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) => + // Only bother decommissioning executors which are alive. + if (isExecutorActive(executorId)) { + scheduler.executorDecommission(executorId, decomInfo) + executorsPendingDecommission(executorId) = decomInfo.workerHost + Some(executorId) + } else { + None } } // If we don't want to replace the executors we are decommissioning if (adjustTargetNumExecutors) { - adjustExecutors(executorsToDecommission.map(_._1)) + adjustExecutors(executorsToDecommission) } - executorsToDecommission.filter { case (executorId, decomInfo) => - doDecommission(executorId, decomInfo) - }.map(_._1) - } - - - private def doDecommission(executorId: String, - decomInfo: ExecutorDecommissionInfo): Boolean = { - - logInfo(s"Asking executor $executorId to decommissioning.") - scheduler.executorDecommission(executorId, decomInfo) - // Send decommission message to the executor (it could have originated on the executor - // but not necessarily). - CoarseGrainedSchedulerBackend.this.synchronized { - executorDataMap.get(executorId) match { - case Some(executorInfo) => - executorInfo.executorEndpoint.send(DecommissionSelf) - case None => - // Ignoring the executor since it is not registered. - logWarning(s"Attempted to decommission unknown executor $executorId.") - return false - } + // Mark those corresponding BlockManagers as decommissioned first before we sending + // decommission notification to executors. So, it's less likely to lead to the race + // condition where `getPeer` request from the decommissioned executor comes first + // before the BlockManagers are marked as decommissioned. + if (conf.get(STORAGE_DECOMMISSION_ENABLED)) { + scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission) } - logInfo(s"Asked executor $executorId to decommission.") - if (conf.get(STORAGE_DECOMMISSION_ENABLED)) { - try { - logInfo(s"Asking block manager corresponding to executor $executorId to decommission.") - scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId)) - } catch { - case e: Exception => - logError("Unexpected error during block manager " + - s"decommissioning for executor $executorId: ${e.toString}", e) - return false + if (!triggeredByExecutor) { + executorsToDecommission.foreach { executorId => + logInfo(s"Asking executor $executorId to decommissioning.") + executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor) } - logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.") } - true + executorsToDecommission } - override def start(): Unit = { if (UserGroupInformation.isSecurityEnabled()) { delegationTokenManager = createTokenManager() diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index 34b03dfec9e80..b9ac8d2ba2784 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -178,9 +178,12 @@ private[spark] class StandaloneSchedulerBackend( } override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) { - logInfo("Asked to decommission executor") + logInfo(s"Asked to decommission executor $fullId") val execId = fullId.split("/")(1) - decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false) + decommissionExecutors( + Array((execId, decommissionInfo)), + adjustTargetNumExecutors = false, + triggeredByExecutor = false) logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo)) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index ff0f38a2479b0..e1b4cb82cebf1 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter} import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter} -import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock +import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock} import org.apache.spark.storage.memory._ import org.apache.spark.unsafe.Platform import org.apache.spark.util._ @@ -1809,7 +1809,9 @@ private[spark] class BlockManager( blocksToRemove.size } - def decommissionBlockManager(): Unit = synchronized { + def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager) + + private[spark] def decommissionSelf(): Unit = synchronized { decommissioner match { case None => logInfo("Starting block manager decommissioning process...") diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index a3d42348befaa..3fcfca365846e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -163,8 +163,7 @@ class BlockManagerMasterEndpoint( context.reply(true) case DecommissionBlockManagers(executorIds) => - val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get) - decommissionBlockManagers(bmIds) + decommissioningBlockManagerSet ++= executorIds.flatMap(blockManagerIdByExecutor.get) context.reply(true) case GetReplicateInfoForRDDBlocks(blockManagerId) => @@ -359,21 +358,6 @@ class BlockManagerMasterEndpoint( blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) } - /** - * Decommission the given Seq of blockmanagers - * - Adds these block managers to decommissioningBlockManagerSet Set - * - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]] - */ - def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = { - val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet) - val futures = newBlockManagersToDecommission.map { blockManagerId => - decommissioningBlockManagerSet.add(blockManagerId) - val info = blockManagerInfo(blockManagerId) - info.storageEndpoint.ask[Unit](DecommissionBlockManager) - } - Future.sequence{ futures.toSeq } - } - /** * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId * @param blockManagerId - block manager id for which ReplicateBlock info is needed diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala index a69bebc23c661..54a72568b18fa 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala @@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint( } case DecommissionBlockManager => - context.reply(blockManager.decommissionBlockManager()) + context.reply(blockManager.decommissionSelf()) case RemoveBroadcast(broadcastId, _) => doAsync[Int]("removing broadcast " + broadcastId, context) { diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala index 9c5e460854053..abe5b7a71ca63 100644 --- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach import org.scalatest.concurrent.Eventually._ import org.apache.spark._ -import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission} +import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState} import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -414,7 +414,7 @@ class DecommissionWorkerSuite def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = { logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`") - master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint)) + master.self.send(DecommissionWorkers(Seq(workerInfo.id))) } def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = { diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala index fe88822bb46b5..a3438cab5b0a3 100644 --- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures} import org.apache.spark._ import org.apache.spark.deploy.{ApplicationDescription, Command} -import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning} import org.apache.spark.deploy.master.{ApplicationInfo, Master} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -122,7 +122,10 @@ class AppClientSuite // Send a decommission self to all the workers // Note: normally the worker would send this on their own. - workers.foreach(worker => worker.decommissionSelf()) + workers.foreach { worker => + worker.decommissionSelf() + master.self.send(WorkerDecommissioning(worker.workerId, worker.self)) + } // Decommissioning is async. eventually(timeout(1.seconds), interval(10.millis)) { diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala index 83bb66efdac9e..4a92cbcb85847 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils} class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { override def beforeEach(): Unit = { - val conf = new SparkConf().setAppName("test").setMaster("local") + val conf = new SparkConf().setAppName("test") .set(config.DECOMMISSION_ENABLED, true) sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf) @@ -78,7 +78,10 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { val execs = sched.getExecutorIds() // Make the executors decommission, finish, exit, and not be replaced. val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray - sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true) + sched.decommissionExecutors( + execsAndDecomInfo, + adjustTargetNumExecutors = true, + triggeredByExecutor = false) val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds) assert(asyncCountResult === 10) } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala index f1870718c6730..293498ae5c37b 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.streaming.scheduler -import org.mockito.ArgumentMatchers.{eq => meq} +import org.mockito.ArgumentMatchers.{any, eq => meq} import org.mockito.Mockito.{never, reset, times, verify, when} import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester} import org.scalatest.concurrent.Eventually.{eventually, timeout} @@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase val decomInfo = ExecutorDecommissionInfo("spark scale down", None) if (decommissioning) { verify(allocationClient, times(1)).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true)) + meq(expectedExec.get), meq(decomInfo), meq(true), any()) verify(allocationClient, never).killExecutor(meq(expectedExec.get)) } else { verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get)) verify(allocationClient, never).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true)) + meq(expectedExec.get), meq(decomInfo), meq(true), any()) } } else { if (decommissioning) { From 40ef5c91ade906b38169f959b3991ce8b0f45154 Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Wed, 16 Sep 2020 16:53:25 +0000 Subject: [PATCH 0045/1009] [SPARK-32816][SQL] Fix analyzer bug when aggregating multiple distinct DECIMAL columns ### What changes were proposed in this pull request? This PR fixes a conflict between `RewriteDistinctAggregates` and `DecimalAggregates`. In some cases, `DecimalAggregates` will wrap the decimal column to `UnscaledValue` using different rules for different aggregates. This means, same distinct column with different aggregates will change to different distinct columns after `DecimalAggregates`. For example: `avg(distinct decimal_col), sum(distinct decimal_col)` may change to `avg(distinct UnscaledValue(decimal_col)), sum(distinct decimal_col)` We assume after `RewriteDistinctAggregates`, there will be at most one distinct column in aggregates, but `DecimalAggregates` breaks this assumption. To fix this, we have to switch the order of these two rules. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? added test cases Closes #29673 from linhongliu-db/SPARK-32816. Authored-by: Linhong Liu Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 5 ++++- .../src/test/resources/sql-tests/inputs/group-by.sql | 3 +++ .../test/resources/sql-tests/results/group-by.sql.out | 10 +++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b7791cd442694..6033c01a60f47 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -143,7 +143,6 @@ abstract class Optimizer(catalogManager: CatalogManager) RewriteNonCorrelatedExists, ComputeCurrentTime, GetCurrentDatabaseAndCatalog(catalogManager), - RewriteDistinctAggregates, ReplaceDeduplicateWithAggregate) :: ////////////////////////////////////////////////////////////////////////////////////////// // Optimizer rules start here @@ -197,6 +196,10 @@ abstract class Optimizer(catalogManager: CatalogManager) EliminateSorts) :+ Batch("Decimal Optimizations", fixedPoint, DecimalAggregates) :+ + // This batch must run after "Decimal Optimizations", as that one may change the + // aggregate distinct column + Batch("Distinct Aggregate Rewrite", Once, + RewriteDistinctAggregates) :+ Batch("Object Expressions Optimization", fixedPoint, EliminateMapObjects, CombineTypedFilters, diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index fedf03d774e42..81e2204358bc9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -166,3 +166,6 @@ SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L; SELECT count(*) FROM test_agg WHERE count(*) > 1L; SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L; SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1; + +-- Aggregate with multiple distinct decimal columns +SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col); diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 50eb2a9f22f69..5d9553f804059 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 56 +-- Number of queries: 57 -- !query @@ -573,3 +573,11 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))] Invalid expressions: [count(1), max(test_agg.`k`)]; + + +-- !query +SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col) +-- !query schema +struct +-- !query output +1.0000 1 From 657e39a3346daf0c67cff3cf90fe68176c479747 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Sep 2020 10:13:47 -0700 Subject: [PATCH 0046/1009] [SPARK-32897][PYTHON] Don't show a deprecation warning at SparkSession.builder.getOrCreate ### What changes were proposed in this pull request? In PySpark shell, if you call `SparkSession.builder.getOrCreate` as below: ```python import warnings from pyspark.sql import SparkSession, SQLContext warnings.simplefilter('always', DeprecationWarning) spark.stop() SparkSession.builder.getOrCreate() ``` it shows the deprecation warning as below: ``` /.../spark/python/pyspark/sql/context.py:72: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead. DeprecationWarning) ``` via https://github.com/apache/spark/blob/d3304268d3046116d39ec3d54a8e319dce188f36/python/pyspark/sql/session.py#L222 We shouldn't print the deprecation warning from it. This is the only place ^. ### Why are the changes needed? To prevent to inform users that `SparkSession.builder.getOrCreate` is deprecated mistakenly. ### Does this PR introduce _any_ user-facing change? Yes, it won't show a deprecation warning to end users for calling `SparkSession.builder.getOrCreate`. ### How was this patch tested? Manually tested as above. Closes #29768 from HyukjinKwon/SPARK-32897. Authored-by: HyukjinKwon Signed-off-by: Takuya UESHIN --- python/pyspark/sql/context.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index a9c5b3ba0c254..937d44ac5ecbc 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -66,9 +66,10 @@ def __init__(self, sparkContext, sparkSession=None, jsqlContext=None): >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ - warnings.warn( - "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", - DeprecationWarning) + if sparkSession is None: + warnings.warn( + "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", + DeprecationWarning) self._sc = sparkContext self._jsc = self._sc._jsc From 7fdb57196313b0dfce1695fa4c165cf8998efbba Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 16 Sep 2020 13:42:04 -0500 Subject: [PATCH 0047/1009] [SPARK-32890][SQL] Pass all `sql/hive` module UTs in Scala 2.13 ### What changes were proposed in this pull request? This pr fix failed cases in sql hive module in Scala 2.13 as follow: - HiveSchemaInferenceSuite (1 FAILED -> PASS) - HiveSparkSubmitSuite (1 FAILED-> PASS) - StatisticsSuite (1 FAILED-> PASS) - HiveDDLSuite (1 FAILED-> PASS) After this patch all test passed in sql hive module in Scala 2.13. ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: All tests passed. Do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl sql/hive -am -Pscala-2.13 -Phive mvn clean test -pl sql/hive -Pscala-2.13 -Phive ``` **Before** ``` Tests: succeeded 3662, failed 4, canceled 0, ignored 601, pending 0 *** 4 TESTS FAILED *** ``` **After** ``` Tests: succeeded 3666, failed 0, canceled 0, ignored 601, pending 0 All tests passed. ``` Closes #29760 from LuciferYang/sql-hive-test. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../regression-test-SPARK-8489/test-2.13.jar | Bin 0 -> 19579 bytes .../sql/hive/HiveSchemaInferenceSuite.scala | 2 +- .../spark/sql/hive/HiveSparkSubmitSuite.scala | 2 +- .../apache/spark/sql/hive/StatisticsSuite.scala | 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar new file mode 100644 index 0000000000000000000000000000000000000000..0d10f7ff03b35a1829e4f64ddbc82a0124b72f1b GIT binary patch literal 19579 zcmb5V1CS`)(k0rqZQHhO+kM)$?bEhx+dOUCwr%5`@4xS#d*@BeiD8b6_vGj z=FZHuQb8IR1O)&90s;WaiCz&mfeX1F002M<5CGusQvg{}6+v1_IWc-!K{-h=QDqf6 zS+VF?L8yKP1ksm(O+&i4f>WUmE6{SG=D2-666L5CNy&wXR(4dCGN>A`&=>xjlm1gE z?wOcVE*n{x2XnEp77HuuS@*qx#N_oA3}H+GiY(s3$G9^<{$BM7A}HRdo5Vn~umq&- zg2m(%`ALckp*=`Kix8NwjdA@D;nmgcGH^E8uNDa}UFsHPbT+bKSZb+dxDfH7d2N_> zj9G#GL&{saRoanc|0!l; z=xSn1|GyU|_+N$X9L?wq?G260P3WBL4IQoie@0XNAB=W3adM_*qGM$KulM|G@*w|C zUU>1-l`bd%z&11hz~2)<*3iP1gwDv?(8(!T%|abn9mAK7OomBhu&ixSDoF!4kkYbR zyBSJru5OVaWqlcL4@oRBorAf4eTmL>=fy{#FEs}_<|qq~|AoJ~bS4|5906!LtbL=C z=SBBs=jZzk-GA#bd-2 zdns^|WB%5AUaBTvP@wWwp(mE|Z1fJNK4D2F=MZ#SoqwfzIkbGj&|TyvcL8xRkF z@=#{t5e||@x(OC~5&tn2vB@AYhHH?h4H|nO%LWnRaExp=iz79|#eUsMhT2Pj5gdm6 z0*Zz{2;2EE4Tn6+=F#H3r5jkvQX2!Z3EG-lw}~d=Hz(6Y(iieW(Z$fP4S5P4kK?<_ z$@Z4l%N0a^&@A-g@zL*JvP7)x;}ArFxDw|m-t<#F?yKgjlnC;)a;L=GxWjat;s}Mz z@eAqZ+CWaEvlIuumrImJ_>H7y3q*vokWU^1Pf1M!;Z&~O#BcDsad5a{;fm>TKtde- zA&L}_*dCaIZs~dA2E|QLb6Grnd9vuxpc{8Y95uUV?h<)~)ZKuk+NAj?s~t&JN* zRNuC&{>_<{qYcWM>JHVrodQX#zB=nWH1Tc%dMX5@j0Y8ZxL{ zZLX@srQy(Aob(Bab9-LW2B@57b~knjEb!8*@R{lSCkg|k0mGQzNj9$@O};%z_lPF# zx6T|C4RtCt%*yIsNYD(luIWFj&Q$JD>R5G1>X^-{Qw+5Y31+l876QyUs2c3Kh9&Ae zwZg(t?bDAFil1*HsAs4c(5F{)t$h{j8Ih57dV|tj%LgXwTXuO3BezipaLW1yG>X&| zm?qGMZL%XkAp&O(#o`bvcNH-ZddRC9kxB#DRnn*w-f){}0Ldf~yR==^2eMs-9U@cL z2u_jdFtO}mw|Lm`7i%F9dMdy_zq#u!Qif1sC>`)@g6}2$fG3Cq2Qzd+7va3cx zp%EZ0SmQB|S}D$-Me#c47M-WYhYrf|wGx#dFIwJMqk2|ZnmCVcvqM>Mx7{Pbsxv^- zDTGm-_2Jw*4ol-sPpmS9w#HDIKLQswd~YwGD^4I*c-V9;2?~s#K?pAa7m2HQS2X!n zNM2H__`DkZL34$qIMjP0kmWrw7LTzqM)oA0TwEx)IsKy~Icb3o$mx^gP>?n7-^{W9^Gi2L(owCA4fZ9ni&$tMxrodUHogy;N_A%F^~cOQe?(vHIt} z+MQ?}U=-+~eoUO@qGpM|)#*(IHn-@#)nH5>xCP9dGWr{008#W0RT|{)0>NkDu~L7h{_2|iYi~}YC3JSHTcfd_6}5!x5-I} zwKy^DWgd{$i|dg&GI2gIfyT#yWRhUSB~^dEZ>&nh5s^x_kgCfnp=yF)Dfnr0Vn>&p z5T4Ba=y~9E9CM7B%awwJkdR7BB={%5)FY0Qvj|g~v6GAvGL!R^Ktj&_Zp2^;r@%X) zlvIF%jEZ#-@)P>DH^sA;)7*#ZAUAkkVjXjIE`EE1!TIC)bPnQvH+K=l&;@3zU${f& zk@KG8=H_;?&wzrk=|99>i{Pk`;uL5^ULY)*3b>bcocN2voJaOX)tDP%ev5!8&5W{i zY;_1S#*`D5Nni1eJ?*3O8vNzYEi}ZFg+M~o>nXudUvSz9`ziJwW)tYT%T57TV!~_) z*aMMhmdDq^z(HhRJ@BBdZ|+SJgF|4%A)BF_WOazXw6otWrf?7>Tt4*rF0L6&M;sV1 z+xQIw`8F*|9KIZxG-Z=WqDjVNE5)Z_d@$nOOF;b=*Dmh~L%*ol>_}g{q$nyrOsLv0 z6dp?+P_t1)g&bF@WNtKmD8R7M;4rRtrObfi%$C#6vStr~8|~(~@7XW+w2&BI`vu7g zcRp^OZXs~Ve#bR@%ts{&cO!(PB{NP*a)v1#=yh2EWpH&?3Ym;N8iKM;P z(-j@ygABigHF+qK{y4qcAMl?zivP zK?N4}+7X)mI+_Tw6`rykeq)Dm$rR47Kl1CZ5(bP|^$js>}e6`(sEUeMb1 zNAV0}2NH~)9v_$y&91)Bg$R)_ov0q~vANp z?u}x&$GzY3xn2Gkk<`qrqaMEkjnu8;GRbMB%Qzr%BIAs3jO_s)!V2p9D9fB6t3{E& z;U2W6YahQYvGZcMZtYwk3_`{DW@Z8yCWLEh2p2DaRN)HmX8x&o@Vx$bJ9nGfMq_o0 z5Zzd}wb33(%B-Jq--rST{2k+1E7lf7;p7sdKN0`g%uZ+kI5Ok&y-+w_S%3VzwzM)e$WMZhdu~|1Y6K4G-v|`fv2X>HZP+1x4$WKTu(MX z+BAz^gejI?nP15(F(B(*>%{}EGlG9ZnL@U`M*2{Mt{P41N|i*C3^Y9Eog|E?gZy∾}cN8X8rpJxFIYqxx6hbf7F)N%s8tggi{ZQj`DH@>&*+{~s zIC3dDujUlM7~CwSqINI`8?1O8fIF9zH8nlScjsYElI5ZnpM?7~lI>)&HNo2jx)FZN z+`#3l%z}n~AGg~2>)c$uR&(900^_d{tY1aj>t-KjCL?5rJqY(cfe)G$Vz9uJ(TPbW zEN0%vG8!q+u$oOVsngGPD1p|85#aj1X}l9(4D@<@TG*8xN90DW!9@f~5Uguak4#Io zJ;ixyH|-2w&+HX`0`P-*?VP{3OA+^$KLIJzlF!5dgI)49L7b@rWGPgUMCmndHk!)KaC8G+eoZNC@#Mts3 zo>49*B67lQkV@aIw_F+}47x%ij$oW~wZ%8|mlA95qle4_mJT7q7GC}ozdD*)DSJ3X z8t#wIzwj@$(68u^wNF3Y_*u$fL)|5BD$F59GRnqXTa%$#DX4QUcpYocK2ibS47vPB z4|HW7BBpj^RS7EP`8*a&IASHbD#`n9xU~@}U5>N-$ti4ITQ1*dmsaIh-bs`~i01U` z7w}X@+l;B1n|01@xbrnx1PrjGp`BObhXSfSx$bwQ`txz#*i`6GaPM0v9bfa#42LOg zg{I_XmNROt|CP>ca}Brw-MndL&;+1s2^bdFO}h+745f|9g%Qh%X`~^Dc8e*oUhP07 z8ZUY&8E8}g@^)%$pw*Z;BFQn{vKe#2@g>wxL@=R^^2|mj$vUBJ`zzB)LtSk@Gx5sF zXE;ITxlNc^c+Ia=Wlxi31Y`kG7==uhB&IB_HKqaPR%HfZQWFr z3zrD}|Yh@xTFz>rdaS4DPHL&W$Mt=bfe7%8U)0P$r^@a|$Yk9-V^jq`RAdR}#9WZ)DPylpojJrG^j{xIb0L z%L>^)s|fep8Nw6P*FlicM5SD%mbbMQypkocV=4*y$uvu=G0Gz@vMT%F%llH%DWxYB zI*%F&{$gFy!izJ)ty)ikOofhb?Rj@~qqA#AyRZCae!A-Zp)B}leY7-y`|6bidPLH_ zY&nQ0$8_v}vjxVI)KR*zCPwFJSU9>tOwOt&+KC&wf0FB1 zXs^4=`nskw$TK}vkUXF6oKxEgn)sXttao7cbBxvd@iP?ol?#w-PBKzTVzMK7O@6ks&C z4xD+$vK?tSy5aHpy0-j2Z0~}Tj%!cSlGlZ;w#qfrk9?2t5D?{B=k&jGOk<+^%78@wMW~kqw?f!pzhV$>bGyKXmwo1 zl!OyFk+pWp6RZo-;$~O-4%eRgUKRauo%dUj!%B?%CB7<>|S*-vn%ikANwHTz(z`v{*+VG2t)0_08c;vtbp7ok(lq^e=i6M| zFU^e>0bXYu&t|%&Y#ZtKWx{erSC3fpiCvvy`BT=0V+tD#W%PiB<2i7GrqV66JWMtr zjG_Iu`?HSe#aE#BtIOSw)Bs|fWjv4>#Sv$MZW#*-A>zdaQC?kI0?JR1_X@=0PYt$p zu=&m_$aMj(_Vh_rtU@*Wi|W9pO02-;H;sn}_f;D8A$0g~S38a)wc7->)&|-dnY>^# zyWo^K7;I5H*N*j;7KL-H!&l!{M^B5FB$k475?kx1;?3l6nU5mBnX7hMH*tJ?wX)3_ zL1~OpAiH36AIrDSXaeaKRhvu1^aKi=ZIZ>l;Q)B^O5+u(l1JK3e<0g$4~P=t-5br`2nuZbz%OT3zJGnDkkUb z?O*GOM~pjGF>Vf=IA7xCWed8f5&4S(h!*viE;_988iA$HArC(S(XjYw@=U+vx7Ipg z>WxGDP|Y}@HG);r3dNF$&uB1_IyI;szq4Q?6=~_>Bp;_zwX6$uUv$+Ks*_n-rsrzw zdIU=)cU=jW_xP;OeDJb7?T6FtK{6txNE5ys)bPMBTc*y;^rojV1pW3X2-mP5(@C-l ztmO6$|NcFN+}^fbLg5pV55-v=>X$ufh4z3PwE}Kt%H$p;vcGX>NCU6&mXVIr2l$El z_T7@?PDU1DjB;ubv4#n`r@a)v^Xnerh_{-0fAODo<>91xcEbq~eLq!ES^JRg=Uav8 zal8{YudwFMx5~SbF^UASQ}UCUrbp5dL6RHQ;;2>~LKCJx8F;xr3fOt;1F0Wf$rp)r zd0w~rZ-1t|5YK6ti;lZIT5q1o(EKDWt@Ou$yPmyh9B+u5lPGH(;tKRgZRVMb#rfB(4 zzR<)!`-2m0YQ&)vL_k23QMjEF%L-*g(NZ6O)gjSE+go{bQ`5gGou9E1Q{#>iC+Jn~ zuA|{g{;vFa{61@5{o2sBPCg$;L*}KILHby`Se`4of5b;Fm-KcU?Cw1xRd@pSRjun- zX*%g~+u=H?R?o~*E$+qg%WfGjhKWeX+`S!UOR60&qU(a>D<)XRyD+89thj7+uz)v` z&RIlDe?PPv&qOMaLUB!_AVPHZAt{>5fc*h%sCX0n)5-Ngr9YB<>=JT$FD}@Q)}v8J z0BLa+4}q1uL{}3r>QLR9ga;0&wjh2BQ;aRcXULry@O*LvkW~BL``*VS!Ms;FMD-`K zEFM&F*{~nqYL>tLSFfb=ocT|Q65UauAF@bHp4($#jOP;1yS^_%`}~l>*xL%1x^ zs09ZZyg}>ltSnkXSH8UMh|+1Pm$)d%p^Hr>uaZUPqG&fsXUEuOdYfVMs3lf^Y6$0qLn;@3}>3YZP`xgAITM^(f36mKrB?CQJS{ zC6%ba@;<3A;y(IfaifpeVAe;)JefpUq%5Xv()X~4+udy@=U1KGZs*hxDt90E-N025 zRfJAooLJPe7Dm|L{LI#a=^_K9N49R^%u?ce$*}qttPSeJQs{gfr*(FjX36?7qA(HR zOu7{Sv4~GsH}7muVl#QpOQb(EBz;66lwoHx)e0#BcEU(AzF(4I-difGsZirx5=+aZ zsG6|i>F2G*=?IL_wKDv`WeXFiTwzSmh}A%mO2|32ymbS}j&`fpMppWkhB^|eXtnb& z+Fs_(uKW*+43qkC-iS9x(o7_+w}%@$PnrvC$B%tU>T}wRJgZ(o-FlHc`o1bB0UMg3 zBon3TVU_h)vn=kIRk;%qznX4ktJ^v z`0h;8P#WHAW;Ch_i~~s%NM+P$)Xwy1XGIoXgb_!C1UM1m2^wBfeqdkM4g^jN!i(+(1K`OErY~A4E74 zvdA`y8YE^JAu=R5H0Uo2B<5kTIV7XAs31}gH;^fv_2?GHNRn}EvWRcF7_n3??QF^~==*Dil?eN}sy<`|%s_)T+5^ZjbxKpAb%Ny!f{mz^R zmn&8EcFa0-_29~0LU)k39r7a;p6TYdWClKidcX&Tn9oZ_t@q*7D0Ulj=h|5D3xNY0 zl1_HyCK3)Qc9`=}r8&Afx-#OF6VMD3CDX=+n`7-l5Fn2VYEn{Tg&)@bZAudPCyKKW zc&%oGk!$v9;xwi4cy5 z$tRHgr1Gtads3=pHYG7fjaEb>CqzDBiD|+G(U2$QS}et;SmxnGdD5l&OEuH%=%E65 zdQI2H15wo=UhBdtJ}))?U5VJqoBNLng1Yb5MaE`sV%lALlf&9H6fVIpNnNkFc|~@b zNOP}MI*FW_WuVmG3DR?t710A9U>zbJ!#w~iD)TNu3U8Ule=O9b#GG~PCtMx^T1YE5 znSc{TEq_VSc#Lg32~=0@z-xarv3dhC{hfM-O8hDwv>DcReRD*+-mw60LM*{N@cr?M z-EoaU>61BtP(HjwPlPQoR%8NaCQ*um*=1J2(Wgw3qm7_kzu#YiulC2A{p4H_Mp{#O zagLmgX0Ez{wk3Qfm~Tsc1qQImMYolcuypWfd7*uVpdz%#;Lr&suB$v`(&{>hiKwuZ zpo!HYwiwC=8%k@sS0jM_CtU9h~^&To}*7Z|{LcnJ&Vc!rz%?M-jt%kFZ;|SKJwZZqTaCG@=E`mrN znPNe3VCcLKw92aw_;3=mbMJs=lC{8zgp z$dTgP9dIgTv$RHN(zqM1WO8oA_3=2}t_NyNw2y5N=2$yPA(`e_)G&T^1gAzqRdL`s zm7RlN2fMxtG2BFM1^=Q-tKNQU^*^{r^{ci{ZO=URQRs$k z&$fF@IZ(@~e8IQ-y%XT><-i#P?+{a%epE%PZ@cNyRiNinr5A>CTJU^?<1o+L#?gl5 z-P2_&N`$duVKum51#A#|hni-$Ik(Ppu#e;%9K%=B_j#p)LESRNPlrS?y0XhZFWL&d z8+h1XIoxA;XAcohRmI+N1%xRRF-L*4%phk^b9?Ib9NU{*&eaNYnR3`?42zOZ(`It~ zb6Gp^7Hi&Vt$*Oq0=C16vO#yCp9mAXjCmhI(xBaw=^UionE!>|SgF4~6n;*b` z7tH^GEq8!yBU^z00Q&wymstM|Taq|?*qazRnix9UIWm$k{tLy7R?(J67DV}$vxBzD zS3sl+q^MFVQ@GI8Qm9IcYcf~b9YQZP2_W6rGIdQY`jOISc-@7+6~~;h#uhfQ+6?35 zKDp0k-pOL-_w)Gy+@*AdPjkElcVi>qVrg?|%w5YXJfcfOqJw6sjBW3S-_H0Zz6^k3*>8LeEFg(lz&E=!H^K#t;r*IOFV7 z;Apv!{4hE!vfpOx6@+OXRb*jv6?gMT0uhj>SKH^)Z-WQOzJ1U zB72we$!TF+fsMNkC1JPKMkovhyL;wZy>`mMJ?xWCy9ZVbz4ow|a)1feIZtQHP`TTc zQJ5L=Y+f(~dT9@Y`#fVL#dz1p=E?~ax<(cH^K!#G=zXfYMB5;7e=0#izg6IYzp6Eu z6^z4iNAQUC5<>v_=c_0xT+W_H9C;Y$vV#*SMC)CKNmgx0wq*QJB~{+265xpXAHxGH z^A_is{8*e3Y(4b$K!^zxvUWSBv|VwgiF>e#%rrqdu~k#As7mR58-Hx^FFORL|`HUheQ4xVfar%8{KVRQFF?^`}+kA;uP-0vQb4BDr)fkCId$w(>VkbelKW6Fty_rkkh z+@n5F^(3p6Wb%(JH!K(O!7svDmfNiRqPQEbL?L^G`q6j)U2$Z5g)#6-&#Z**#R72q z|1rx>zUx-?zcd1bzgQOHKW6D6Y-eXJ{12PpU)ETyexZ!Lg3AALT~o9KT#sZhI4~V( z9TXQw+Dl-VRTT^n;Er5}W7ObJvJBCElr|_g@AlEH&vQ_V%a!dd!5y7wVa9cq>RR%n zXqM4&Qe(X4-#;s3p!Raj`;zt0dGbBo?)LMv^XC&5Kx~g4geG!i&YKT=s^X{t5h7q& z{M=|MSJesg=l;5IQEvzW2THBwtfnI=W?ye7-JzM0*H7FnrUMe!mDpy5kf0u}P!3z(pLi+I@=RX!q;1)&bO zpRS5FNW6liGYplGq8drO2wIsk1r{fv)L_~wofDy?D`Z3#nJL|%a!oG8SDv?Lm7%mc}Q?V$X7`$fS#U0O+*dCL1}$D)^YoQde~xHF`aR>ec{&tas_v9;g8ge9&HTd>k)ot_VI+|m z&)oew`vecwEx2nkSV+xuH+!~xZA~SKLd5#PO{%ZC<>gayfM#;9fgPEUBl>LOaQK zaFc|t9xm8P1dr7|P&&;eL&GBp=!3_!tQNGX)^o7FR%ilwKf?{e?++4)e#_X3M{t@X zqZ}y+9PS6-OJy?Q(v(@aEv~S3cwsot=;}AkX5xV}+PKc<|1QF_#Vp?;M!Z2|#1Hj~ z8h8m>104yivgS1JR2Vz-By(%VmS-No*?P_#YCZxNw8=l}OR1jd%@`5z2r??Q5IsfUqpWApz~nvIXsL%KDA7IQ}7ts%dXSKrJ;8wjcPR=rMun{@3Cmu zp&Qrc>&F!xaI<}mSEPc|V*W1bJL&H8!;S z>N_l;P*A&`SpZ?BQA8jmT>CV+qCMx%nPu`bV$#kbEp29BCuaM3`kE5Za`@|h1ZA^! zi8>g2W9soqMf*Co$tg%p2w%?H{2^SXW2ndRCEZz>;`ttPFhKT;dV1{qI~}L;Hr6W`PN5p zcUe?j1(CR(^u?bavud4<`k*DuDCCM1E8c_}7dNq}0PyJCsXy_BMz%fTA6FCi%wl5#!#m#-u zQl87^N{G9dxq>9sfLvx-vF@V#R9C&rVSmCD`l4fqKk?o1s77s4lMa~4un?uH+>wH{ zgt}((E-G)FAIOgY$yVDAKYPQ>fUYH!M zYE;iXszz^EBXz^bP4ZnFciiA~OWW8GLjPCz>DX_HInIz%x?c(Tv4;I|7%vsY3Bv+D z>yuv|5B$?f3;x&VdA5rJum;{x&6$!knOs84wB_RNsRB9OU~Q>04OZY-FC*0D30h;R z488%8hq3|;(@m^*!HRcqByzg(Wc2sep!hJ_y#T&41>*ss3O1!^zikoLIe*mZ1S26( z#HtNypKP`Bo0l^Mw7uSd?q}`ACoGaXL4^2|;xg6Jg1JqbxI#F4t2$nkEt)f`+kjyP zA&|S>)KHAeoE0Cd%!B(DMVZ@{Jt5R@naX$>>$OXp`HGU7plbZFF`^2?7xKww`4IfL zshU45Xk&pXs&^PF+ap|)`kTv8ZQ$vfC(`n@`2J)z^i$Df7=kWCAT-(4ZuoW6|9}&o z*I*KM%(R)D;?w@-wtJ6p zf<$QTC+097MXV`Ls9iJAvU~qJStxQ~qkGYDNr)}m`x;a!(a zW?L1cy@ty9J)^i}2rWIky4-eW}uqnY*WrJx#uv z@eQJzlREoATK8w>E`K*T*IN*1Df7=6+S{8;XVV z@W}_(Ei?0Wv1CcM4v!dY&MOK_<%ZGGe#$kjw^J7256F?DKe23bDSau9RRxb1=TUoT zLv76@=RSJ5BkYogMp>ojMaAOn5NJkIe!BnA`?q(D9gU>WmNKtjuPcV7AI&D6_jcb2 zS>yqvbHnc}|G{NfpyO*hgD;RadKU?PGj5=Mzq~LEm{3e&+ha`_+>R*)qafir5NwKC zvzvsT@Ei67vRASret}qcdZ_R-m2K;N47B-BuQJ;{vn-|ZKVwd^?6K-uJhm<}cI!Dn0_3{{Yd#HF| zq3pn(Hvn~B2Hqj)I0u?@l;!2&O^kax20pP}{Q`6AH2ydKM#}p21}h`CU~5@W^jIqH zb5VKjW7Wuy!e7ClGOPKu29pP0P{X`EwFHN6SbW38rwjaSTkBeY*VY_o)+oEvVu075 z-$0WC^v;b$pGX?T!y7<+DNIVo>KwOmb&c7Ei%!b?gpUc;se0YP=f?9hxY^6^Yo)n{ zp7{$_DLEnUV-@O9-R|c&q56&0s1%>@|IQZvA!Iw&&^(C!jlM;wbxd<(xPML$kYdn;2v zMK84?O(P*QEwMyRCqXlQEJZ^#CL=9F2RNQwgqC`Wb%u%M1PJi&Z~G^4H+%v}%K-)e zfc(p>M*5#Kg#VXzoULjjk1dYiyKI2Zm)l2HTa=$4E@>TgfFQ9jNC$gFN?IRq43W%F zXG)lggaelcy)(|dAEbzg`%>FRh$b8}V{dFJ@0R0-$ zSB6j9S<`L(;L2SZ!1$?W#oj(Op||_GoKHc2)#4M&{?*)T@t$%qscuF8W~JBC?xeux zGvf$HrB8AeuA&(QL(^&vHM8IHguT-&8nUjJe;n)DVlI_FzU_J3E5cZgbZnpWe(fhX!|r^bvsXJ_tDEZNE&w&2%M+ ztA58i@S6A8)Emx}KKUX@C8FB6me4Aavd`kf%4GvitDV)Hhjx}*b0vEXcZc`BVUH`Q z$}<&L^CUrak7P&cfb0RrtCDt2SkEeqI}0s3P!#}~m58&+W>31g#dxdQ*69_vHMNym zaHifPg4EENR~YOCgAV2nNqc=kvCo|@_>+v?4PqIWpwByjJ|i-5EbGufLYxc3{()U_ zeGS@j`W)@ta@1}Y!}3X>`{@3gPA))e{tkkliHFnbJ}7QTZ)WVQChHiJiQG4 zui9R^ba~eP~u2eVxV8UBXkD)KR!EoEgdV^pzKlco}W9*0})xK+%Y2x$oZM z@GSph5-GkFDSm}Oj)ZzGeP3TAw^*^@4pLsekYQseeTd3ZDt8F$>X+q9nyW69I^+XZ z^azw!hc?mC>F@zF?&3}^5I3GEcFZb9e~Xs*ZPxO|BXv%kt%+n+S%*PG&7uwC80tVw z@cV!8R{yc)hi?{**S~Ea`hQDtf4e3At9#&IiN2_!t{svhLiXC$gyS0(wZ5&4Wh-qx zX?bvwhW4OwzJdjzb7Y*#(Vh^x4p8S^wNW_bvEqm^U*V$#AC3bXK~PSpxH) z@6T6AfYLl-3f#RmDgq%#GQRMAowkKvZY+%>0Z>dr#tVKuIFaoFt2TCt1$OesBoxnV zRVOmKh*jR5fxTiEy?R8SaP^-dA*|`rVq;7f!Q>?-Y;lsf3;kq{S3+H zQ_x4UcV{_)B_&sx6(~ocs%`XMGaT%sWTa6{=*?VYM^WGRBK+9J9QaEL<*?*uf+x5Y z7v6Ewm)9o7LC3y%?QR(y3hkf9t4s>86J39b_@I*yFwoUU&sS=vU9pt*y|@{K< z78UP$D(dNr9blWg`rh1skl>kg^s&lRu2ynuJ6dIWI8T6>(G%64S=5;qMr2kpw+z=V z&{!(-<8qw$^X$kSj(#ZY}-m}X??GZk|P)p1=fKZeOgwZ5IVi-jf zlf)@dNJR)8QHe_j@%xW@(4pgBZ?EFeEeQeuVEMOchVlP;Cvvy3&Q{&9-QYm*c~PH) z@-PpO7%zs8tSacwg#Kl12_D}ln**hVHDOJnL|DGMo%eCYT|}ZSr`ND}O^5^XvU{}BGm=Y=)69dvTfT2o5 z$}c=ClT;ZZwE+#283YVJbnHkxkCZA7NRW!6SO`2($`B=YDU>Y|3`wH~K^jF2900Ol z0AIeOun{`#0f_LR!=d0-+7=CNjHWHLSS6N$oF>I&6U+?MUJ%C| z1h5??=_yb>U?6?nIAJ0t+SXrJN9sv%*@mpUXhZ9AU`O_Cmx9Ay1HUSM1dHXo_+R|(vM3c}!jVK~q4B^@WByq$Q~sD2XI*F77|Pkf zdyefeX~eh^$zi%Nd#p>YSvs$@4ks?;P>UH-&nix$$vM>ls)IBp%=_AP#WqD3$;g0+ znp73-qc4K3RQ4QSwl>l$b48yYU@AOhz1U<7QarCtX;BG2VfQX9-5Hy*dl$FQt{gcr zbVzBN{cFbFd>JuhMq0Z4_MZTL(ZeSn{wf?$!Ie^LIKL@cMTe@+9LOtTf>on*FQAn9 zPAE;t?=y}d+KZ?Os|=`ANLO0AGbl{qN_R=w8`*HT^0>N+gM)|deWhLm<47Mn_Mf<{WE8_rqsxtb}qHJU|J%?B{l6QdtVEiX|-LbgQa zjqNf*btVqSP!62Xur}>Ye}NrsDB8Q@gmf9$GR*_n*+hHz;W+ z(B|BQri4aB?{q{ax^e7=|~6r2%HNK)Ool~nn)HiZQyW5|!8 z+iEfDC^k_ze=O&ziEgs>nrzw$psXvS3ro)D@0D$2Jx+pDcL9Z&yov5*UXc~&v~G>o z2`N=ew7Row8>X2!BVT3L;p~N$mtw){V!6b~%}qNVbj4`zeObEgT-5qf^;~tICZW=# zIrJ@ZnPwIHk_67XuLY4U2D;auDD=Y@{2P za;{Q6o`*yJB$svP{foB?p-)-Y!Y1&$r;C*xo_2F~n6FIr+UJiioq0AVqt;ri?eT8U z0O*_A8(*!+p>wIOZcSaYER25 zM&bJCdMJF_7VH|`n0H=;*=6?9s^L?DRjOk`JwR{EQ5)H>a%ot6=I+x;MwK0V8uUi; zDAclkb)KEr!E0{gPzsP}=X(T)9G6i`KP6vXSx(fU**GB99nDB+uG*fX8-&UNHYUV1 zl8@m?Mq%J2SIKdtl8{>dn7f+v!@a`3WV&V?_Li+bgjp{7`FPgVVRQcTB zZiHaOQjl^ZCtWDt;WP%K)ZFc=r#zc0(MQwNYQMka=I;7Fnrcbb+D)FZ3L%Sh%x8l) z4_rIMC7WydGP)Z`BYIw+pKj*7Q z6DR!G4rFwCwLVj`b&vJJerCzwJFpZ}P(8fe0nZVemf%nUyS1-yKzW8R$pPDsNEVmt zv>_=W&X(}Rwd_=@t3r+*tz^-Dg0-7U=IvP|HBoV@IvX&@P~w6Mn1jrnD;?Z!3z?tl zMs6LO2sw&@F2Xj61@&O~6@ou&pIn^RWK5^TOtSR^{t+Zb2 zH^<%lB@o+Kku?X|LNkb4(2YVCigt$HB!RBG8 zag-D!L3km(I?0lRQYT~Q0^XVR{)rL`EVq>SNIzKr*7d}Y(S0M)Rg0z8tTghe(%pnY zX^ze%R#{~U7YDvY&a??u0D4fASlt05w*ny2l0u3aPE;8`S4J+C?~+vhmvcgt^nvBK zd@#a^pv<;Gk2OoKQ&y}9kHU5bIb?s7E)i$E%GLzs>nHU_Bpm^$xYN5V>u_tw4u;MyKq;uD)ZYP0Bvldr4e83S#%D?$Z*?R_&xDaKB4}#U zgl)g+=&W*NF_LmSzc3mb>|V}NK4K7jrYX$Ruc%WyF&71w(3<>=ER-vINufY@^w>gt z8KxkDA8$A#MijVY4f>oyG_&+}$r2L#>cZnX+B>6^v+4gk_G+%=7x=g}${@0Wn5YDt z)X7H?iSv0Y;(lqLjWqD{v14>FyEH#oLG8)@-)2Mmjn=!@0L}2H(V?Z0^EFd4=|-Gn zR29j1ObLQu|LMW~-{=6G!OB1b7fS#skbB^>D=J>mI)%(z()V3K$Oj{-y`Gn7*yK)e zQ}*tf+Mu~C2%X@{L~*$)PCGw)q4^tusBuQT!*`JLq#QRRuvMVcLV3^8!2+>6{?j^^ zxIPErqT8)UEd0M1B>Z*+Q@mf+J2W*uPyhyns*unVN; z9yFP60Cm9)x!>}BAAyFH#j?3X^T0>=TLSpL`2i|M;jS_wg7YXoSH&5SWx>ey@YKL5@{pJ2VDey2xqCu4!ruLUVv`^Sr@$xp2N5rO z#pzX)H5kgaYfj8!;ygi!LH(RVQU+y|87hg#pinj!^C<}yBR)!__XtpRaOp)6y$R8X zr$cCg>VfG)uZEtlldr;n>Pe5}PL>ZnZgQc9hg^$Mgs@itw`J}tG}FLq2eBZwW&;{k z#33vU_+hUnlHV|;+mk2IjMpM$l7OmKqkML>!8#3Z>jA&?L_WU^={ih|L9ozXL|;1a z1>UUK`wgMR`4qiH_??ZTqU6JIP0~}!g!Q~>>~+6Orf6|lSm$?=A2Wmy_$=s#LB7e` zVeJ*%hA~fpto=fZDO{g*Hf_?Io*{#(}`gX#I^h&e81Sa4lJ|H(iFap5~1j8C6Rjk}nDM zDUI@UQL~YVLNe>Y^cPPVBGawlDa6oU)-JNscE8z`R38`<7AbCv@>|k2z2q9yAOxEa z@?w(Ui1CGSzNAXKnae95_=9BJkq0lX0HiK?R$oH9Eb6UFPyJANvdOOJTGGS|m|2wy zz6lf}#3{g(<}pVy#i9k><6x!;cxv?)9Fvy!vO6wj>8~4BG%eb zg_85vO`<_A8PkV)>(PpSKkZAm``?rQPbF6#%~sxqt+CV+wTs45Ra&BIU&db4Uem=` zikb>3ja_L*#a3ldweM>CFxFDFC6?ONnrdrE5e-Ta(<(|B`Ls6KDnis^}4Z2d;2y>%#&(brY;#t`XQHzs}VG)er zbBL(v)x(@wNK0jSv!=ZJ=A1d!CR5?k;+e?0(W-2w){*J6-t^3C^F3F(;xqMMsX8`5 z`49rjSqv0(g+ zmro>fSPsNkuw|BV+0!`+{0vzG$u><>OIpd+>Des2mmsif=oX*+F|PE5aCWegKu2kL zEuzwA^6q9+uqHJZfCdNTkptWD>kJ|#{rygcMQrfGOzhhgBHTQ0^YF8pp%Q;-O+)W7 zs&4Vk-Pauk#OM|jD2|m*=`X|Ms&dr`Ovyf8l3YO;+`2rB_A21*rFLr6*&7&S!tV)N zZaqcQO7^USiBxqjbLMeIYOpNAXdRCb@?MA?n=DgaX$GxQ)yn{<=p={WX2SvF^bqb| zM~w|T%XJ#x&c|(@(dn8ov;7aeWjrhBA8Oi^b$b24sVViU-}l@~H=Y=$B(&1m-HFBC z@NH?n9-Yy&W4$|@2B{Qb^x45YY(hA=Ch=wBsa-z_MBcFY6@2v^;Y#@ z9SM)K*g4fPFjuJV?!%_c0aj{#>KFE5Fj2Lxmt}D`JE^F8>%x+&Tz~aOiVA{Fyz|29 z)5(Ph$WV$^0R3>zZQ&oR0En^SDo&p;Gd0lDunZ~2ToECDAJ~M8gMDG66wcjWmLK2M zI|Q92hCHAd`~*UY=os|5dxAMurQ4Hf(a7lqza2!-xj?f%2DY0a{Q{<-?FUp1SL?jg zvSwx%2}W9hDy{3xtFLM%poS8{b%>;3x(I0F9g+EPKv-8AXdz}6*6%-O)#+5;Wivag zpHilx|119%2pa%(u1_~-fn=jN*{x@)LC>k16pDUd63;mi6q4umfuo;j!qQIa6RWee3kVavSI@12WVzl8$%>IDfOlv9Sy+Z};fc?mBA^6+DdK z70NrrPr%wr8I>`)gaJp@7+^(TMK)RZ-tgIk@uLHLGhwD1bqW%x(BgJYI7u z_n50SRb(KcQs!Od$_%`qPvh;WwkaYnL}KR_7sgd5cMR~PO9!M8R-zxTx@G-akrxd! zotRqaE&wjctvB}}#U9GB>2iMNH>?O9=i}1VFHg(@A&?4_jr*Hut>k>|%~c4!@6yX6 z)m&a6lh#^37_HXR=jW#s-GE~Yo{w7-O-v8GO65VrqsS&xyy>cWnpMJV$ZJw7QN#&f z7wzIK6y;fOjV=Dv$BcalFcpMkBE976^;xnV%TwPXzNFY&m@=xLSe-@T+fla!hYCdQ z*W%gWWh#85H*qoyo7-1*bOJ-O4W5;?jLvEpmX<-&Bh2GlfM*~CC&g?~I;D_NIuV+iH-1zf)k`-OP?a?vUK_Td~H zcsEOqDBYd1Oc&M;7Qym~SfGa%Q?8UxH~jOK z>qXqs$|u{8yd-BQH4PW--{%SK|DXSN6(C8c?_w53y3hf`-^cYF;DJ=j+Ow1l{{$ns z><$(Q#=Jk{{$BW7m=CGzqiYI(!|o3kmbI1s3jT`(_fM^m;(ZPliOu)_U?U59lzU4W z*dLDj`Nmv;M3H#O;XjnXqY^5}KwSoky`jvjprjzxKJ3{gQ}}?t);`j~OUZMT8IsK7 zBtp^3fi;p6Olsn=Ly-(VCb|zk>{q0OA0@4l;qlTGwg08iN?C=} zOb_D^%Q9sA_Wl%}^p+1q8cN(z<9ad!)pZhv@BNq^MTjZ9y&FH?X2zP`@jER5*^ Rq&wL658izZ!OW0!^(WL&1BL(q literal 0 HcmV?d00001 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala index 590ef949ffbd7..ce82756428849 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala @@ -129,7 +129,7 @@ class HiveSchemaInferenceSuite // properties out). assert(!externalCatalog.getTable(DATABASE, TEST_TABLE_NAME).schemaPreservesCase) val rawTable = client.getTable(DATABASE, TEST_TABLE_NAME) - assert(rawTable.properties.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)) == Map.empty) + assert(rawTable.properties.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) schema } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 6feaaea3dfb89..501a877e8b7fb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -153,7 +153,7 @@ class HiveSparkSubmitSuite // For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8489/*scala. // TODO: revisit for Scala 2.13 support val version = Properties.versionNumberString match { - case v if v.startsWith("2.12") => v.substring(0, 4) + case v if v.startsWith("2.12") || v.startsWith("2.13") => v.substring(0, 4) case x => throw new Exception(s"Unsupported Scala Version: $x") } val jarDir = getTestResourcePath("regression-test-SPARK-8489") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index be6d023302293..1f3878ad2925d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1128,7 +1128,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto def checkColStatsProps(expected: Map[String, String]): Unit = { sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS " + stats.keys.mkString(", ")) val table = hiveClient.getTable("default", tableName) - val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats")) + val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats")).toMap assert(props == expected) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index fbd1fc1ea98df..62b6c6c201c68 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -994,7 +994,7 @@ class HiveDDLSuite |""".stripMargin) val newPart = catalog.getPartition(TableIdentifier("boxes"), Map("width" -> "4")) assert(newPart.storage.serde == Some(expectedSerde)) - assert(newPart.storage.properties.filterKeys(expectedSerdeProps.contains) == + assert(newPart.storage.properties.filterKeys(expectedSerdeProps.contains).toMap == expectedSerdeProps) } From d936cb328d1562d280a2dff29e31fefa1ad8bdd6 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Thu, 17 Sep 2020 09:01:06 +0900 Subject: [PATCH 0048/1009] [SPARK-26425][SS] Add more constraint checks to avoid checkpoint corruption ### What changes were proposed in this pull request? Credits to tdas who reported and described the fix to [SPARK-26425](https://issues.apache.org/jira/browse/SPARK-26425). I just followed the description of the issue. This patch adds more checks on commit log as well as file streaming source so that multiple concurrent runs of streaming query don't mess up the status of query/checkpoint. This patch addresses two different spots which are having a bit different issues: 1. FileStreamSource.fetchMaxOffset() In structured streaming, we don't allow multiple streaming queries to run with same checkpoint (including concurrent runs of same query), so query should fail if it fails to write the metadata of specific batch ID due to same batch ID being written by others. 2. commit log As described in JIRA issue, assertion is already applied to the `offsetLog` for the same reason. https://github.com/apache/spark/blob/8167714cab93a5c06c23f92c9077fe8b9677ab28/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala#L394-L402 This patch applied the same for commit log. ### Why are the changes needed? This prevents the inconsistent behavior on streaming query and lets query fail instead. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? N/A, as the change is simple and obvious, and it's really hard to artificially reproduce the issue. Closes #25965 from HeartSaVioR/SPARK-26425. Lead-authored-by: Jungtaek Lim (HeartSaVioR) Co-authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../sql/execution/streaming/FileStreamSource.scala | 12 +++++++++--- .../execution/streaming/MicroBatchExecution.scala | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index 03d86e42e4db7..42401fe069551 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -178,10 +178,16 @@ class FileStreamSource( if (batchFiles.nonEmpty) { metadataLogCurrentOffset += 1 - metadataLog.add(metadataLogCurrentOffset, batchFiles.map { case (p, timestamp) => + + val fileEntries = batchFiles.map { case (p, timestamp) => FileEntry(path = p, timestamp = timestamp, batchId = metadataLogCurrentOffset) - }.toArray) - logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files") + }.toArray + if (metadataLog.add(metadataLogCurrentOffset, fileEntries)) { + logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files") + } else { + throw new IllegalStateException("Concurrent update to the log. Multiple streaming jobs " + + s"detected for $metadataLogCurrentOffset") + } } FileStreamSourceOffset(metadataLogCurrentOffset) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 468a8c975b478..5a91b24a0803f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -598,7 +598,9 @@ class MicroBatchExecution( withProgressLocked { sinkCommitProgress = batchSinkProgress watermarkTracker.updateWatermark(lastExecution.executedPlan) - commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark)) + assert(commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark)), + "Concurrent update to the commit log. Multiple streaming jobs detected for " + + s"$currentBatchId") committedOffsets ++= availableOffsets } logDebug(s"Completed batch ${currentBatchId}") From bd38e0be83528ec9ce0e5f533d4b3b25203dc917 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 17 Sep 2020 05:39:40 +0000 Subject: [PATCH 0049/1009] [SPARK-32903][SQL] GeneratePredicate should be able to eliminate common sub-expressions ### What changes were proposed in this pull request? This patch proposes to make GeneratePredicate eliminate common sub-expressions. ### Why are the changes needed? Both GenerateMutableProjection and GenerateUnsafeProjection, such codegen objects can eliminate common sub-expressions. But GeneratePredicate currently doesn't do it. We encounter a customer issue that a Filter pushed down through a Project causes performance issue, compared with not pushed down case. The issue is one expression used in Filter predicates are run many times. Due to the complex schema, the query nodes are not wholestage codegen, so it runs Filter.doExecute and then call GeneratePredicate. The common expression was run many time and became performance bottleneck. GeneratePredicate should be able to eliminate common sub-expressions for such case. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests. Closes #29776 from viirya/filter-pushdown. Authored-by: Liang-Chi Hsieh Signed-off-by: Wenchen Fan --- .../codegen/GeneratePredicate.scala | 13 +++- .../sql/catalyst/expressions/predicates.scala | 2 +- ...CodegenSubexpressionEliminationSuite.scala | 73 +++++++++++++++++++ 3 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index 6ba646d360d2e..7404030b661c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -30,9 +30,17 @@ object GeneratePredicate extends CodeGenerator[Expression, BasePredicate] { protected def bind(in: Expression, inputSchema: Seq[Attribute]): Expression = BindReferences.bindReference(in, inputSchema) - protected def create(predicate: Expression): BasePredicate = { + def generate(expressions: Expression, useSubexprElimination: Boolean): BasePredicate = + create(canonicalize(expressions), useSubexprElimination) + + protected def create(predicate: Expression): BasePredicate = create(predicate, false) + + protected def create(predicate: Expression, useSubexprElimination: Boolean): BasePredicate = { val ctx = newCodeGenContext() - val eval = predicate.genCode(ctx) + + // Do sub-expression elimination for predicates. + val eval = ctx.generateExpressions(Seq(predicate), useSubexprElimination).head + val evalSubexpr = ctx.subexprFunctionsCode val codeBody = s""" public SpecificPredicate generate(Object[] references) { @@ -53,6 +61,7 @@ object GeneratePredicate extends CodeGenerator[Expression, BasePredicate] { } public boolean eval(InternalRow ${ctx.INPUT_ROW}) { + $evalSubexpr ${eval.code} return !${eval.isNull} && ${eval.value}; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index aa5cf4758564b..03066fb34cf27 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -73,7 +73,7 @@ trait Predicate extends Expression { object Predicate extends CodeGeneratorWithInterpretedFallback[Expression, BasePredicate] { override protected def createCodeGeneratedObject(in: Expression): BasePredicate = { - GeneratePredicate.generate(in) + GeneratePredicate.generate(in, SQLConf.get.subexpressionEliminationEnabled) } override protected def createInterpretedObject(in: Expression): BasePredicate = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala new file mode 100644 index 0000000000000..471f25356887f --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.types.{BooleanType, DataType} + +/** + * A test suite that makes sure code generation handles sub-expression elimination correctly. + */ +class CodegenSubexpressionEliminationSuite extends SparkFunSuite { + + test("SPARK-32903: GeneratePredicate should eliminate sub-expressions") { + Seq(true, false).foreach { useSubexprElimination => + val leaf1 = ExprWithEvaluatedState() + val leaf2 = ExprWithEvaluatedState() + val leaf3 = ExprWithEvaluatedState() + val leaf4 = ExprWithEvaluatedState() + + val cond = Or(And(leaf1, leaf2), And(leaf3, leaf4)) + val instance = GeneratePredicate.generate(cond, useSubexprElimination = useSubexprElimination) + instance.initialize(0) + assert(instance.eval(null) === false) + + if (useSubexprElimination) { + // When we do sub-expression elimination, Spark thought left and right side of + // the `Or` expression are the same. So only left side was evaluated, and Spark + // reused the evaluation for right side. + assert(leaf1.evaluated == true) + assert(leaf2.evaluated == false) + assert(leaf3.evaluated == false) + assert(leaf4.evaluated == false) + } else { + assert(leaf1.evaluated == true) + assert(leaf2.evaluated == false) + assert(leaf3.evaluated == true) + assert(leaf4.evaluated == false) + } + } + } + +} + +/** + * An expression with evaluated state so we can know whether it is evaluated. + */ +case class ExprWithEvaluatedState() extends LeafExpression with CodegenFallback { + var evaluated: Boolean = false + override def eval(input: InternalRow): Any = { + evaluated = true + false + } + + override def nullable: Boolean = false + override def dataType: DataType = BooleanType +} From 92b75dc260eb43d906a425f9f9d8d63b78c48cee Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 17 Sep 2020 06:50:30 +0000 Subject: [PATCH 0050/1009] [SPARK-32508][SQL] Disallow empty part col values in partition spec before static partition writing ### What changes were proposed in this pull request? Write to static partition, check in advance that the partition field is empty. ### Why are the changes needed? When writing to the current static partition, the partition field is empty, and an error will be reported when all tasks are completed. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add ut Closes #29316 from cxzl25/SPARK-32508. Authored-by: sychen Signed-off-by: Wenchen Fan --- .../sql/execution/datasources/rules.scala | 22 +++++++++++++++---- .../spark/sql/sources/InsertSuite.scala | 22 +++++++++++++++++++ .../apache/spark/sql/hive/InsertSuite.scala | 22 +++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 60cacda9f5f1c..5fb1a4d249070 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -386,7 +386,8 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { private def preprocess( insert: InsertIntoStatement, tblName: String, - partColNames: Seq[String]): InsertIntoStatement = { + partColNames: Seq[String], + catalogTable: Option[CatalogTable]): InsertIntoStatement = { val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec( insert.partitionSpec, partColNames, tblName, conf.resolver) @@ -402,6 +403,18 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { s"including ${staticPartCols.size} partition column(s) having constant value(s).") } + val partitionsTrackedByCatalog = catalogTable.isDefined && + catalogTable.get.partitionColumnNames.nonEmpty && + catalogTable.get.tracksPartitionsInCatalog + if (partitionsTrackedByCatalog && normalizedPartSpec.nonEmpty) { + // empty partition column value + if (normalizedPartSpec.filter(_._2.isDefined).exists(_._2.get.isEmpty)) { + val spec = normalizedPartSpec.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") + throw new AnalysisException( + s"Partition spec is invalid. The spec ($spec) contains an empty partition column value") + } + } + val newQuery = TableOutputResolver.resolveOutputColumns( tblName, expectedColumns, insert.query, byName = false, conf) if (normalizedPartSpec.nonEmpty) { @@ -427,13 +440,14 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { table match { case relation: HiveTableRelation => val metadata = relation.tableMeta - preprocess(i, metadata.identifier.quotedString, metadata.partitionColumnNames) + preprocess(i, metadata.identifier.quotedString, metadata.partitionColumnNames, + Some(metadata)) case LogicalRelation(h: HadoopFsRelation, _, catalogTable, _) => val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown") - preprocess(i, tblName, h.partitionSchema.map(_.name)) + preprocess(i, tblName, h.partitionSchema.map(_.name), catalogTable) case LogicalRelation(_: InsertableRelation, _, catalogTable, _) => val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown") - preprocess(i, tblName, Nil) + preprocess(i, tblName, Nil, catalogTable) case _ => i } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index abd33ab8a8f22..32c4fb60b8c54 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -866,6 +866,28 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { }.getMessage assert(message.contains("LOCAL is supported only with file: scheme")) } + + test("SPARK-32508 " + + "Disallow empty part col values in partition spec before static partition writing") { + withTable("insertTable") { + sql( + """ + |CREATE TABLE insertTable(i int, part1 string, part2 string) USING PARQUET + |PARTITIONED BY (part1, part2) + """.stripMargin) + val msg = "Partition spec is invalid" + assert(intercept[AnalysisException] { + sql("INSERT INTO TABLE insertTable PARTITION(part1=1, part2='') SELECT 1") + }.getMessage.contains(msg)) + assert(intercept[AnalysisException] { + sql("INSERT INTO TABLE insertTable PARTITION(part1='', part2) SELECT 1 ,'' AS part2") + }.getMessage.contains(msg)) + + sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2='2') SELECT 1") + sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'2' AS part2") + sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'' AS part2") + } + } } class FileExistingTestFileSystem extends RawLocalFileSystem { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index 421dcb499bd6a..ebc6cfb77d355 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -847,4 +847,26 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter } } } + + test("SPARK-32508 " + + "Disallow empty part col values in partition spec before static partition writing") { + withTable("t1") { + spark.sql( + """ + |CREATE TABLE t1 (c1 int) + |PARTITIONED BY (d string) + """.stripMargin) + + val e = intercept[AnalysisException] { + spark.sql( + """ + |INSERT OVERWRITE TABLE t1 PARTITION(d='') + |SELECT 1 + """.stripMargin) + }.getMessage + + assert(!e.contains("get partition: Value for key d is null or empty")) + assert(e.contains("Partition spec is invalid")) + } + } } From e5e54a3614ffd2a9150921e84e5b813d5cbf285a Mon Sep 17 00:00:00 2001 From: Tom van Bussel Date: Thu, 17 Sep 2020 12:35:40 +0200 Subject: [PATCH 0051/1009] [SPARK-32900][CORE] Allow UnsafeExternalSorter to spill when there are nulls ### What changes were proposed in this pull request? This PR changes the way `UnsafeExternalSorter.SpillableIterator` checks whether it has spilled already, by checking whether `inMemSorter` is null. It also allows it to spill other `UnsafeSorterIterator`s than `UnsafeInMemorySorter.SortedIterator`. ### Why are the changes needed? Before this PR `UnsafeExternalSorter.SpillableIterator` could not spill when there are NULLs in the input and radix sorting is used. Currently, Spark determines whether UnsafeExternalSorter.SpillableIterator has not spilled yet by checking whether `upstream` is an instance of `UnsafeInMemorySorter.SortedIterator`. When radix sorting is used and there are NULLs in the input however, `upstream` will be an instance of `UnsafeExternalSorter.ChainedIterator` instead, and Spark will assume that the `SpillableIterator` iterator has spilled already, and therefore cannot spill again when it's supposed to spill. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A test was added to `UnsafeExternalSorterSuite` (and therefore also to `UnsafeExternalSorterRadixSortSuite`). I manually confirmed that the test failed in `UnsafeExternalSorterRadixSortSuite` without this patch. Closes #29772 from tomvanbussel/SPARK-32900. Authored-by: Tom van Bussel Signed-off-by: herman --- .../unsafe/sort/UnsafeExternalSorter.java | 69 +++++++++++-------- .../unsafe/sort/UnsafeInMemorySorter.java | 1 + .../unsafe/sort/UnsafeSorterIterator.java | 2 + .../unsafe/sort/UnsafeSorterSpillMerger.java | 5 ++ .../unsafe/sort/UnsafeSorterSpillReader.java | 5 ++ .../sort/UnsafeExternalSorterSuite.java | 33 +++++++++ 6 files changed, 88 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index 55e4e609c3c7b..71b9a5bc11542 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -501,11 +501,15 @@ private static void spillIterator(UnsafeSorterIterator inMemIterator, */ class SpillableIterator extends UnsafeSorterIterator { private UnsafeSorterIterator upstream; - private UnsafeSorterIterator nextUpstream = null; private MemoryBlock lastPage = null; private boolean loaded = false; private int numRecords = 0; + private Object currentBaseObject; + private long currentBaseOffset; + private int currentRecordLength; + private long currentKeyPrefix; + SpillableIterator(UnsafeSorterIterator inMemIterator) { this.upstream = inMemIterator; this.numRecords = inMemIterator.getNumRecords(); @@ -516,23 +520,26 @@ public int getNumRecords() { return numRecords; } + @Override + public long getCurrentPageNumber() { + throw new UnsupportedOperationException(); + } + public long spill() throws IOException { synchronized (this) { - if (!(upstream instanceof UnsafeInMemorySorter.SortedIterator && nextUpstream == null - && numRecords > 0)) { + if (inMemSorter == null || numRecords <= 0) { return 0L; } - UnsafeInMemorySorter.SortedIterator inMemIterator = - ((UnsafeInMemorySorter.SortedIterator) upstream).clone(); + long currentPageNumber = upstream.getCurrentPageNumber(); - ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics(); + ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics(); // Iterate over the records that have not been returned and spill them. final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics, numRecords); - spillIterator(inMemIterator, spillWriter); + spillIterator(upstream, spillWriter); spillWriters.add(spillWriter); - nextUpstream = spillWriter.getReader(serializerManager); + upstream = spillWriter.getReader(serializerManager); long released = 0L; synchronized (UnsafeExternalSorter.this) { @@ -540,8 +547,7 @@ public long spill() throws IOException { // is accessing the current record. We free this page in that caller's next loadNext() // call. for (MemoryBlock page : allocatedPages) { - if (!loaded || page.pageNumber != - ((UnsafeInMemorySorter.SortedIterator)upstream).getCurrentPageNumber()) { + if (!loaded || page.pageNumber != currentPageNumber) { released += page.size(); freePage(page); } else { @@ -575,22 +581,26 @@ public void loadNext() throws IOException { try { synchronized (this) { loaded = true; - if (nextUpstream != null) { - // Just consumed the last record from in memory iterator - if(lastPage != null) { - // Do not free the page here, while we are locking `SpillableIterator`. The `freePage` - // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in - // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and - // `SpillableIterator` in sequence, which may happen in - // `TaskMemoryManager.acquireExecutionMemory`. - pageToFree = lastPage; - lastPage = null; - } - upstream = nextUpstream; - nextUpstream = null; + // Just consumed the last record from in memory iterator + if (lastPage != null) { + // Do not free the page here, while we are locking `SpillableIterator`. The `freePage` + // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in + // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and + // `SpillableIterator` in sequence, which may happen in + // `TaskMemoryManager.acquireExecutionMemory`. + pageToFree = lastPage; + lastPage = null; } numRecords--; upstream.loadNext(); + + // Keep track of the current base object, base offset, record length, and key prefix, + // so that the current record can still be read in case a spill is triggered and we + // switch to the spill writer's iterator. + currentBaseObject = upstream.getBaseObject(); + currentBaseOffset = upstream.getBaseOffset(); + currentRecordLength = upstream.getRecordLength(); + currentKeyPrefix = upstream.getKeyPrefix(); } } finally { if (pageToFree != null) { @@ -601,22 +611,22 @@ public void loadNext() throws IOException { @Override public Object getBaseObject() { - return upstream.getBaseObject(); + return currentBaseObject; } @Override public long getBaseOffset() { - return upstream.getBaseOffset(); + return currentBaseOffset; } @Override public int getRecordLength() { - return upstream.getRecordLength(); + return currentRecordLength; } @Override public long getKeyPrefix() { - return upstream.getKeyPrefix(); + return currentKeyPrefix; } } @@ -693,6 +703,11 @@ public int getNumRecords() { return numRecords; } + @Override + public long getCurrentPageNumber() { + return current.getCurrentPageNumber(); + } + @Override public boolean hasNext() { while (!current.hasNext() && !iterators.isEmpty()) { diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index 660eb790a550b..ff641a24a7b3e 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -330,6 +330,7 @@ public void loadNext() { @Override public long getBaseOffset() { return baseOffset; } + @Override public long getCurrentPageNumber() { return currentPageNumber; } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java index 1b3167fcc250c..d9f22311d07c2 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java @@ -34,4 +34,6 @@ public abstract class UnsafeSorterIterator { public abstract long getKeyPrefix(); public abstract int getNumRecords(); + + public abstract long getCurrentPageNumber(); } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java index ab800288dcb43..f8603c5799e9b 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java @@ -70,6 +70,11 @@ public int getNumRecords() { return numRecords; } + @Override + public long getCurrentPageNumber() { + throw new UnsupportedOperationException(); + } + @Override public boolean hasNext() { return !priorityQueue.isEmpty() || (spillReader != null && spillReader.hasNext()); diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java index a524c4790407d..db79efd008530 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java @@ -89,6 +89,11 @@ public int getNumRecords() { return numRecords; } + @Override + public long getCurrentPageNumber() { + throw new UnsupportedOperationException(); + } + @Override public boolean hasNext() { return (numRecordsRemaining > 0); diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java index 43977717f6c97..087d090c1c60e 100644 --- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java +++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java @@ -359,6 +359,39 @@ public void forcedSpillingWithReadIterator() throws Exception { assertSpillFilesWereCleanedUp(); } + @Test + public void forcedSpillingNullsWithReadIterator() throws Exception { + final UnsafeExternalSorter sorter = newSorter(); + long[] record = new long[100]; + final int recordSize = record.length * 8; + final int n = (int) pageSizeBytes / recordSize * 3; + for (int i = 0; i < n; i++) { + boolean isNull = i % 2 == 0; + sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, isNull); + } + assertTrue(sorter.getNumberOfAllocatedPages() >= 2); + + UnsafeExternalSorter.SpillableIterator iter = + (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); + final int numRecordsToReadBeforeSpilling = n / 3; + for (int i = 0; i < numRecordsToReadBeforeSpilling; i++) { + assertTrue(iter.hasNext()); + iter.loadNext(); + } + + assertTrue(iter.spill() > 0); + assertEquals(0, iter.spill()); + + for (int i = numRecordsToReadBeforeSpilling; i < n; i++) { + assertTrue(iter.hasNext()); + iter.loadNext(); + } + assertFalse(iter.hasNext()); + + sorter.cleanupResources(); + assertSpillFilesWereCleanedUp(); + } + @Test public void forcedSpillingWithNotReadIterator() throws Exception { final UnsafeExternalSorter sorter = newSorter(); From a54a6a0113115112f589d09c875f1cba5fd0bbca Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Thu, 17 Sep 2020 11:20:50 +0000 Subject: [PATCH 0052/1009] [SPARK-32287][CORE] Fix flaky o.a.s.ExecutorAllocationManagerSuite on GithubActions ### What changes were proposed in this pull request? To fix the flaky `ExecutorAllocationManagerSuite`: Avoid first `schedule()` invocation after `ExecutorAllocationManager` started. ### Why are the changes needed? `ExecutorAllocationManagerSuite` is still flaky, see: https://github.com/apache/spark/pull/29722/checks?check_run_id=1117979237 By checking the below logs, we can see that there's a race condition between thread `pool-1-thread-1-ScalaTest-running` and thread `spark-dynamic-executor-allocation`. The only possibility of thread `spark-dynamic-executor-allocation` becoming active is the first time invocation of `schedule()`(since the `TEST_SCHEDULE_INTERVAL`(30s) is really long, so it's impossible the second invocation would happen). Thus, I think we shall avoid the first invocation too. ```scala 20/09/15 12:41:20.831 pool-1-thread-1-ScalaTest-running-ExecutorAllocationManagerSuite INFO ExecutorAllocationManager: Requesting 1 new executor because tasks are backlogged (new desired total will be 2 for resource profile id: 0) 20/09/15 12:41:20.832 spark-dynamic-executor-allocation INFO ExecutorAllocationManager: Requesting 2 new executors because tasks are backlogged (new desired total will be 4 for resource profile id: 0) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The flaky can't be reproduced locally so it's hard to say it has been completely fixed by now. We need time to see the result. Closes #29773 from Ngone51/fix-SPARK-32287. Authored-by: yi.wu Signed-off-by: Wenchen Fan --- .../apache/spark/ExecutorAllocationManager.scala | 13 ++++++------- .../org/apache/spark/internal/config/Tests.scala | 10 +++++----- .../spark/ExecutorAllocationManagerSuite.scala | 9 +++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 341334c8a29c4..1dd64df106bc2 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -28,7 +28,7 @@ import com.codahale.metrics.{Gauge, MetricRegistry} import org.apache.spark.internal.{config, Logging} import org.apache.spark.internal.config._ import org.apache.spark.internal.config.DECOMMISSION_ENABLED -import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL +import org.apache.spark.internal.config.Tests.TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED import org.apache.spark.metrics.source.Source import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID import org.apache.spark.resource.ResourceProfileManager @@ -150,11 +150,7 @@ private[spark] class ExecutorAllocationManager( private var addTime: Long = NOT_SET // Polling loop interval (ms) - private val intervalMillis: Long = if (Utils.isTesting) { - conf.get(TEST_SCHEDULE_INTERVAL) - } else { - 100 - } + private val intervalMillis: Long = 100 // Listener for Spark events that impact the allocation policy val listener = new ExecutorAllocationListener @@ -247,7 +243,10 @@ private[spark] class ExecutorAllocationManager( } } } - executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS) + + if (!testing || conf.get(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED)) { + executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS) + } // copy the maps inside synchonize to ensure not being modified val (numExecutorsTarget, numLocalityAware) = synchronized { diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala index a1ebe5ce0ca32..7b8b204bab640 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala @@ -26,11 +26,11 @@ private[spark] object Tests { .longConf .createWithDefault(Runtime.getRuntime.maxMemory) - val TEST_SCHEDULE_INTERVAL = - ConfigBuilder("spark.testing.dynamicAllocation.scheduleInterval") - .version("2.3.0") - .longConf - .createWithDefault(100) + val TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED = + ConfigBuilder("spark.testing.dynamicAllocation.schedule.enabled") + .version("3.1.0") + .booleanConf + .createWithDefault(true) val IS_TESTING = ConfigBuilder("spark.testing") .version("1.0.1") diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 3f8cbf59bf527..6a38bba5dd0e5 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.PrivateMethodTester import org.apache.spark.executor.ExecutorMetrics import org.apache.spark.internal.config import org.apache.spark.internal.config.DECOMMISSION_ENABLED -import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL +import org.apache.spark.internal.config.Tests.TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED import org.apache.spark.metrics.MetricsSystem import org.apache.spark.resource._ import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID @@ -1665,9 +1665,10 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, s"${executorIdleTimeout.toString}s") .set(config.SHUFFLE_SERVICE_ENABLED, true) .set(config.DYN_ALLOCATION_TESTING, true) - // SPARK-22864: effectively disable the allocation schedule by setting the period to a - // really long value. - .set(TEST_SCHEDULE_INTERVAL, 30000L) + // SPARK-22864/SPARK-32287: effectively disable the allocation schedule for the tests so that + // we won't result in the race condition between thread "spark-dynamic-executor-allocation" + // and thread "pool-1-thread-1-ScalaTest-running". + .set(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED, false) .set(DECOMMISSION_ENABLED, decommissioningEnabled) sparkConf } From 482a79a5e39d54048533d42e1ca1266fbe95fffb Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 17 Sep 2020 07:50:39 -0700 Subject: [PATCH 0053/1009] [SPARK-24994][SQL][FOLLOW-UP] Handle foldable, timezone and cleanup ### What changes were proposed in this pull request? This is a follow-up on #29565, and addresses a few issues in the last PR: - style issue pointed by [this comment](https://github.com/apache/spark/pull/29565#discussion_r487646749) - skip optimization when `fromExp` is foldable (by [this comment](https://github.com/apache/spark/pull/29565#discussion_r487646973)) as there could be more efficient rule to apply for this case. - pass timezone info to the generated cast on the literal value - a bunch of cleanups and test improvements Originally I plan to handle this when implementing [SPARK-32858](https://issues.apache.org/jira/browse/SPARK-32858) but now think it's better to isolate these changes from that. ### Why are the changes needed? To fix a few left over issues in the above PR. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a test for the foldable case. Otherwise relying on existing tests. Closes #29775 from sunchao/SPARK-24994-followup. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../optimizer/UnwrapCastInBinaryComparison.scala | 7 +++++-- .../optimizer/UnwrapCastInBinaryComparisonSuite.scala | 10 ++++++---- .../apache/spark/sql/FileBasedDataSourceSuite.scala | 5 ++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index 89f7c0f71b7ac..d0acfe036d443 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -184,7 +184,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { } else { // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the // literal side. - val lit = Cast(Literal(value), fromType) + val lit = Literal(Cast(Literal(value), fromType).eval(), fromType) exp match { case GreaterThan(_, _) => GreaterThan(fromExp, lit) case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit) @@ -202,9 +202,12 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { * i.e., the conversion is injective. Note this only handles the case when both sides are of * integral type. */ - private def canImplicitlyCast(fromExp: Expression, toType: DataType, + private def canImplicitlyCast( + fromExp: Expression, + toType: DataType, literalType: DataType): Boolean = { toType.sameType(literalType) && + !fromExp.foldable && fromExp.dataType.isInstanceOf[IntegralType] && toType.isInstanceOf[IntegralType] && Cast.canUpCast(fromExp.dataType, toType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala index 387964088b808..373c1febd2488 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala @@ -26,14 +26,14 @@ import org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.types.{BooleanType, ByteType, DoubleType, IntegerType} +import org.apache.spark.sql.types._ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches: List[Batch] = Batch("Unwrap casts in binary comparison", FixedPoint(10), - NullPropagation, ConstantFolding, UnwrapCastInBinaryComparison) :: Nil + NullPropagation, UnwrapCastInBinaryComparison) :: Nil } val testRelation: LocalRelation = LocalRelation('a.short, 'b.float) @@ -97,7 +97,7 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(Literal(v.toInt) >= castInt(f), trueIfNotNull(f)) assertEquivalent(Literal(v.toInt) > castInt(f), f =!= v) - assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort) <= f) + assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort, ShortType) <= f) } test("unwrap cast should have no effect when input is not integral type") { @@ -119,10 +119,12 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp ) } - test("unwrap cast should skip when expression is non-deterministic") { + test("unwrap cast should skip when expression is non-deterministic or foldable") { Seq(positiveInt, negativeInt).foreach (v => { val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v assertEquivalent(e, e, evaluate = false) + val e2 = Cast(Literal(30.toShort), IntegerType) >= v + assertEquivalent(e2, e2, evaluate = false) }) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 8d6d93d13d143..f72e3347510f9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -32,14 +32,13 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt} -import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.execution.SimpleMode import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.FilePartition -import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan} +import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan} import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan -import org.apache.spark.sql.execution.datasources.v2.parquet.{ParquetScan, ParquetTable} +import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf From 88e87bc8ebfa5aa1a8cc8928672749517ae0c41f Mon Sep 17 00:00:00 2001 From: Udbhav30 Date: Thu, 17 Sep 2020 09:25:17 -0700 Subject: [PATCH 0054/1009] [SPARK-32887][DOC] Correct the typo for SHOW TABLE ### What changes were proposed in this pull request? Correct the typo in Show Table document ### Why are the changes needed? Current Document of Show Table returns in parse error, so it is misleading to users ### Does this PR introduce _any_ user-facing change? Yes, the document of show table is corrected now ### How was this patch tested? NA Closes #29758 from Udbhav30/showtable. Authored-by: Udbhav30 Signed-off-by: Dongjoon Hyun --- docs/sql-ref-syntax-aux-show-table.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sql-ref-syntax-aux-show-table.md b/docs/sql-ref-syntax-aux-show-table.md index 0ce0a3eefa538..3314402ea3e2b 100644 --- a/docs/sql-ref-syntax-aux-show-table.md +++ b/docs/sql-ref-syntax-aux-show-table.md @@ -97,7 +97,7 @@ SHOW TABLE EXTENDED LIKE 'employee'; +--------+---------+-----------+--------------------------------------------------------------+ -- showing the multiple table details with pattern matching -SHOW TABLE EXTENDED LIKE `employe*`; +SHOW TABLE EXTENDED LIKE 'employe*'; +--------+---------+-----------+--------------------------------------------------------------+ |database|tableName|isTemporary| information | +--------+---------+-----------+--------------------------------------------------------------+ @@ -146,7 +146,7 @@ SHOW TABLE EXTENDED LIKE `employe*`; +--------+---------+----------+---------------------------------------------------------------+ -- show partition file system details -SHOW TABLE EXTENDED IN default LIKE `employee` PARTITION (`grade=1`); +SHOW TABLE EXTENDED IN default LIKE 'employee' PARTITION (grade=1); +--------+---------+-----------+--------------------------------------------------------------+ |database|tableName|isTemporary| information | +--------+---------+-----------+--------------------------------------------------------------+ @@ -169,7 +169,7 @@ SHOW TABLE EXTENDED IN default LIKE `employee` PARTITION (`grade=1`); +--------+---------+-----------+--------------------------------------------------------------+ -- show partition file system details with regex fails as shown below -SHOW TABLE EXTENDED IN default LIKE `empl*` PARTITION (`grade=1`); +SHOW TABLE EXTENDED IN default LIKE 'empl*' PARTITION (grade=1); Error: Error running query: org.apache.spark.sql.catalyst.analysis.NoSuchTableException: Table or view 'emplo*' not found in database 'default'; (state=,code=0) ``` From a8442c282665c93384d3465c440be588394e8ab4 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 17 Sep 2020 14:01:52 -0700 Subject: [PATCH 0055/1009] [SPARK-32926][TESTS] Add Scala 2.13 build test in GitHub Action ### What changes were proposed in this pull request? The PR aims to add Scala 2.13 build test coverage into GitHub Action for Apache Spark 3.1.0. ### Why are the changes needed? The branch is ready for Scala 2.13 and this will prevent any regression. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the GitHub Action. Closes #29793 from dongjoon-hyun/SPARK-32926. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1c0f50328ee72..17c040323d515 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -297,3 +297,29 @@ jobs: mkdir -p ~/.m2 ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install rm -rf ~/.m2/repository/org/apache/spark + + scala-213: + name: Scala 2.13 build + runs-on: ubuntu-latest + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: scala-213-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + scala-213-maven- + - name: Install Java 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + mkdir -p ~/.m2 + ./dev/change-scala-version.sh 2.13 + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install + rm -rf ~/.m2/repository/org/apache/spark From 5817c584b8a259f5c9be13a26f2adec905474ce6 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 17 Sep 2020 14:35:01 -0700 Subject: [PATCH 0056/1009] [SPARK-32909][SQL] Pass all `sql/hive-thriftserver` module UTs in Scala 2.13 ### What changes were proposed in this pull request? This pr fix failed and aborted cases in sql hive-thriftserver module in Scala 2.13, the main change of this pr as follow: - Use `s.c.Seq` instead of `Seq` in `HiveResult` because the input type maybe `mutable.ArraySeq`, but `Seq` represent `immutable.Seq` in Scala 2.13. - Reset classLoader after `HiveMetastoreLazyInitializationSuite` completed because context class loader is `NonClosableMutableURLClassLoader` in `HiveMetastoreLazyInitializationSuite` running process, and it propagate to `HiveThriftServer2ListenerSuite` trigger following problems in Scala 2.13: ``` HiveThriftServer2ListenerSuite: *** RUN ABORTED *** java.lang.LinkageError: loader constraint violation: loader (instance of net/bytebuddy/dynamic/loading/MultipleParentClassLoader) previously initiated loading for a different type with name "org/apache/hive/service/ServiceStateChangeListener" at org.mockito.codegen.HiveThriftServer2$MockitoMock$1850222569.(Unknown Source) at sun.reflect.GeneratedSerializationConstructorAccessor530.newInstance(Unknown Source) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.objenesis.instantiator.sun.SunReflectionFactoryInstantiator.newInstance(SunReflectionFactoryInstantiator.java:48) at org.objenesis.ObjenesisBase.newInstance(ObjenesisBase.java:73) at org.mockito.internal.creation.instance.ObjenesisInstantiator.newInstance(ObjenesisInstantiator.java:19) at org.mockito.internal.creation.bytebuddy.SubclassByteBuddyMockMaker.createMock(SubclassByteBuddyMockMaker.java:47) at org.mockito.internal.creation.bytebuddy.ByteBuddyMockMaker.createMock(ByteBuddyMockMaker.java:25) at org.mockito.internal.util.MockUtil.createMock(MockUtil.java:35) at org.mockito.internal.MockitoCore.mock(MockitoCore.java:63) ... ``` After this pr `HiveThriftServer2Suites` and `HiveThriftServer2ListenerSuite` was fixed and all 461 test passed ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: All tests passed. Do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl sql/hive-thriftserver -am -Phive-thriftserver -Pscala-2.13 mvn test -pl sql/hive-thriftserver -Phive -Phive-thriftserver -Pscala-2.13 ``` **Before** ``` HiveThriftServer2ListenerSuite: *** RUN ABORTED *** ``` **After** ``` Tests: succeeded 461, failed 0, canceled 0, ignored 17, pending 0 All tests passed. ``` Closes #29783 from LuciferYang/sql-thriftserver-tests. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/sql/execution/HiveResult.scala | 2 +- .../spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 4d388e40fb8bd..dcec0b019da28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -106,7 +106,7 @@ object HiveResult { case (n, _: NumericType) => n.toString case (s: String, StringType) => if (nested) "\"" + s + "\"" else s case (interval: CalendarInterval, CalendarIntervalType) => interval.toString - case (seq: Seq[_], ArrayType(typ, _)) => + case (seq: scala.collection.Seq[_], ArrayType(typ, _)) => seq.map(v => (v, typ)).map(e => toHiveString(e, true, formatters)).mkString("[", ",", "]") case (m: Map[_, _], MapType(kType, vType, _)) => m.map { case (key, value) => diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala index 277df548aefd0..951f92793732f 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala @@ -31,6 +31,7 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite { .config("spark.hadoop.hive.metastore.uris", "thrift://127.0.0.1:11111") .getOrCreate() val originalLevel = org.apache.log4j.Logger.getRootLogger().getLevel + val originalClassLoader = Thread.currentThread().getContextClassLoader try { // Avoid outputting a lot of expected warning logs spark.sparkContext.setLogLevel("error") @@ -64,6 +65,7 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite { exceptionString.contains(msg) } } finally { + Thread.currentThread().setContextClassLoader(originalClassLoader) spark.sparkContext.setLogLevel(originalLevel.toString) spark.stop() } From ea3b979e95f6ce11e7f6e401625a51ede3e649fc Mon Sep 17 00:00:00 2001 From: jzc Date: Thu, 17 Sep 2020 14:50:47 -0700 Subject: [PATCH 0057/1009] [SPARK-32889][SQL] orc table column name supports special characters ### What changes were proposed in this pull request? make orc table column name support special characters like `$` ### Why are the changes needed? Special characters like `$` are allowed in orc table column name by Hive. But it's error when execute command "CREATE TABLE tbl(`$` INT, b INT) using orc" in spark. it's not compatible with Hive. `Column name "$" contains invalid character(s). Please use alias to rename it.;Column name "$" contains invalid character(s). Please use alias to rename it.;org.apache.spark.sql.AnalysisException: Column name "$" contains invalid character(s). Please use alias to rename it.; at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.checkFieldName(OrcFileFormat.scala:51) at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.$anonfun$checkFieldNames$1(OrcFileFormat.scala:59) at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.$anonfun$checkFieldNames$1$adapted(OrcFileFormat.scala:59) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38) ` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add unit test Closes #29761 from jzc928/orcColSpecialChar. Authored-by: jzc Signed-off-by: Dongjoon Hyun --- .../datasources/orc/OrcFileFormat.scala | 2 +- .../spark/sql/FileBasedDataSourceSuite.scala | 14 ++++ .../sql/hive/execution/SQLQuerySuite.scala | 74 ++++++++++++------- 3 files changed, 64 insertions(+), 26 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala index 69badb4f7d595..8e9a566d45971 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala @@ -45,7 +45,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils} private[sql] object OrcFileFormat { private def checkFieldName(name: String): Unit = { try { - TypeDescription.fromString(s"struct<$name:int>") + TypeDescription.fromString(s"struct<`$name`:int>") } catch { case _: IllegalArgumentException => throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index f72e3347510f9..77e07e5550f35 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -233,6 +233,20 @@ class FileBasedDataSourceSuite extends QueryTest } } + Seq("json", "orc").foreach { format => + test(s"SPARK-32889: column name supports special characters using $format") { + Seq("$", " ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => + withTempDir { dir => + val dataDir = new File(dir, "file").getCanonicalPath + Seq(1).toDF(name).write.format(format).save(dataDir) + val schema = spark.read.format(format).load(dataDir).schema + assert(schema.size == 1) + assertResult(name)(schema.head.name) + } + } + } + } + // Text file format only supports string type test("SPARK-24691 error handling for unsupported types - text") { withTempDir { dir => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 431790e1fbb6d..a69a949e3a3a2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2206,39 +2206,63 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } - test("SPARK-21912 ORC/Parquet table should not create invalid column names") { + test("SPARK-21912 Parquet table should not create invalid column names") { Seq(" ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => - Seq("ORC", "PARQUET").foreach { source => - withTable("t21912") { - val m = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912(`col$name` INT) USING $source") - }.getMessage - assert(m.contains(s"contains invalid character(s)")) + val source = "PARQUET" + withTable("t21912") { + val m = intercept[AnalysisException] { + sql(s"CREATE TABLE t21912(`col$name` INT) USING $source") + }.getMessage + assert(m.contains(s"contains invalid character(s)")) - val m1 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`") - }.getMessage - assert(m1.contains(s"contains invalid character(s)")) + val m1 = intercept[AnalysisException] { + sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`") + }.getMessage + assert(m1.contains(s"contains invalid character(s)")) + + val m2 = intercept[AnalysisException] { + sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`") + }.getMessage + assert(m2.contains(s"contains invalid character(s)")) - val m2 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`") + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") { + val m3 = intercept[AnalysisException] { + sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')") }.getMessage - assert(m2.contains(s"contains invalid character(s)")) + assert(m3.contains(s"contains invalid character(s)")) + } - withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") { - val m3 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')") - }.getMessage - assert(m3.contains(s"contains invalid character(s)")) - } + sql(s"CREATE TABLE t21912(`col` INT) USING $source") + val m4 = intercept[AnalysisException] { + sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)") + }.getMessage + assert(m4.contains(s"contains invalid character(s)")) + } + } + } - sql(s"CREATE TABLE t21912(`col` INT) USING $source") - val m4 = intercept[AnalysisException] { - sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)") - }.getMessage - assert(m4.contains(s"contains invalid character(s)")) + test("SPARK-32889: ORC table column name supports special characters") { + // " " "," is not allowed. + Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => + val source = "ORC" + Seq(s"CREATE TABLE t32889(`$name` INT) USING $source", + s"CREATE TABLE t32889 STORED AS $source AS SELECT 1 `$name`", + s"CREATE TABLE t32889 USING $source AS SELECT 1 `$name`", + s"CREATE TABLE t32889(`$name` INT) USING hive OPTIONS (fileFormat '$source')") + .foreach { command => + withTable("t32889") { + sql(command) + assertResult(name)( + sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(0).name) } } + + withTable("t32889") { + sql(s"CREATE TABLE t32889(`col` INT) USING $source") + sql(s"ALTER TABLE t32889 ADD COLUMNS(`$name` INT)") + assertResult(name)( + sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(1).name) + } } } From 4ced58862c707aa916f7a55d15c3887c94c9b210 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 18 Sep 2020 08:17:23 +0900 Subject: [PATCH 0058/1009] [SPARK-32635][SQL] Fix foldable propagation ### What changes were proposed in this pull request? This PR rewrites `FoldablePropagation` rule to replace attribute references in a node with foldables coming only from the node's children. Before this PR in the case of this example (with setting`spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation`): ```scala val a = Seq("1").toDF("col1").withColumn("col2", lit("1")) val b = Seq("2").toDF("col1").withColumn("col2", lit("2")) val aub = a.union(b) val c = aub.filter($"col1" === "2").cache() val d = Seq("2").toDF( "col4") val r = d.join(aub, $"col2" === $"col4").select("col4") val l = c.select("col2") val df = l.join(r, $"col2" === $"col4", "LeftOuter") df.show() ``` foldable propagation happens incorrectly: ``` Join LeftOuter, (col2#6 = col4#34) Join LeftOuter, (col2#6 = col4#34) !:- Project [col2#6] :- Project [1 AS col2#6] : +- InMemoryRelation [col1#4, col2#6], StorageLevel(disk, memory, deserialized, 1 replicas) : +- InMemoryRelation [col1#4, col2#6], StorageLevel(disk, memory, deserialized, 1 replicas) : +- Union : +- Union : :- *(1) Project [value#1 AS col1#4, 1 AS col2#6] : :- *(1) Project [value#1 AS col1#4, 1 AS col2#6] : : +- *(1) Filter (isnotnull(value#1) AND (value#1 = 2)) : : +- *(1) Filter (isnotnull(value#1) AND (value#1 = 2)) : : +- *(1) LocalTableScan [value#1] : : +- *(1) LocalTableScan [value#1] : +- *(2) Project [value#10 AS col1#13, 2 AS col2#15] : +- *(2) Project [value#10 AS col1#13, 2 AS col2#15] : +- *(2) Filter (isnotnull(value#10) AND (value#10 = 2)) : +- *(2) Filter (isnotnull(value#10) AND (value#10 = 2)) : +- *(2) LocalTableScan [value#10] : +- *(2) LocalTableScan [value#10] +- Project [col4#34] +- Project [col4#34] +- Join Inner, (col2#6 = col4#34) +- Join Inner, (col2#6 = col4#34) :- Project [value#31 AS col4#34] :- Project [value#31 AS col4#34] : +- LocalRelation [value#31] : +- LocalRelation [value#31] +- Project [col2#6] +- Project [col2#6] +- Union false, false +- Union false, false :- Project [1 AS col2#6] :- Project [1 AS col2#6] : +- LocalRelation [value#1] : +- LocalRelation [value#1] +- Project [2 AS col2#15] +- Project [2 AS col2#15] +- LocalRelation [value#10] +- LocalRelation [value#10] ``` and so the result is wrong: ``` +----+----+ |col2|col4| +----+----+ | 1|null| +----+----+ ``` After this PR foldable propagation will not happen incorrectly and the result is correct: ``` +----+----+ |col2|col4| +----+----+ | 2| 2| +----+----+ ``` ### Why are the changes needed? To fix a correctness issue. ### Does this PR introduce _any_ user-facing change? Yes, fixes a correctness issue. ### How was this patch tested? Existing and new UTs. Closes #29771 from peter-toth/SPARK-32635-fix-foldable-propagation. Authored-by: Peter Toth Signed-off-by: Takeshi Yamamuro --- .../catalyst/expressions/AttributeMap.scala | 2 + .../catalyst/expressions/AttributeMap.scala | 2 + .../sql/catalyst/optimizer/expressions.scala | 121 +++++++++++------- .../org/apache/spark/sql/DataFrameSuite.scala | 12 ++ 4 files changed, 88 insertions(+), 49 deletions(-) diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala index 75a8bec018a1f..42b92d4593c77 100644 --- a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala +++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala @@ -26,6 +26,8 @@ object AttributeMap { def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = { new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) } + + def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty) } class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)]) diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala index 4caa3d0461875..e6b53e3e6548f 100644 --- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala +++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala @@ -26,6 +26,8 @@ object AttributeMap { def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = { new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) } + + def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty) } class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)]) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index b2fc3936e1a29..c4e4b25d570dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -624,59 +624,82 @@ object NullPropagation extends Rule[LogicalPlan] { */ object FoldablePropagation extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - var foldableMap = AttributeMap(plan.flatMap { - case Project(projectList, _) => projectList.collect { - case a: Alias if a.child.foldable => (a.toAttribute, a) - } - case _ => Nil - }) - val replaceFoldable: PartialFunction[Expression, Expression] = { - case a: AttributeReference if foldableMap.contains(a) => foldableMap(a) + CleanupAliases(propagateFoldables(plan)._1) + } + + private def propagateFoldables(plan: LogicalPlan): (LogicalPlan, AttributeMap[Alias]) = { + plan match { + case p: Project => + val (newChild, foldableMap) = propagateFoldables(p.child) + val newProject = + replaceFoldable(p.withNewChildren(Seq(newChild)).asInstanceOf[Project], foldableMap) + val newFoldableMap = AttributeMap(newProject.projectList.collect { + case a: Alias if a.child.foldable => (a.toAttribute, a) + }) + (newProject, newFoldableMap) + + // We can not replace the attributes in `Expand.output`. If there are other non-leaf + // operators that have the `output` field, we should put them here too. + case e: Expand => + val (newChild, foldableMap) = propagateFoldables(e.child) + val expandWithNewChildren = e.withNewChildren(Seq(newChild)).asInstanceOf[Expand] + val newExpand = if (foldableMap.isEmpty) { + expandWithNewChildren + } else { + val newProjections = expandWithNewChildren.projections.map(_.map(_.transform { + case a: AttributeReference if foldableMap.contains(a) => foldableMap(a) + })) + if (newProjections == expandWithNewChildren.projections) { + expandWithNewChildren + } else { + expandWithNewChildren.copy(projections = newProjections) + } + } + (newExpand, foldableMap) + + case u: UnaryNode if canPropagateFoldables(u) => + val (newChild, foldableMap) = propagateFoldables(u.child) + val newU = replaceFoldable(u.withNewChildren(Seq(newChild)), foldableMap) + (newU, foldableMap) + + // Join derives the output attributes from its child while they are actually not the + // same attributes. For example, the output of outer join is not always picked from its + // children, but can also be null. We should exclude these miss-derived attributes when + // propagating the foldable expressions. + // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes + // of outer join. + case j: Join => + val (newChildren, foldableMaps) = j.children.map(propagateFoldables).unzip + val foldableMap = AttributeMap( + foldableMaps.foldLeft(Iterable.empty[(Attribute, Alias)])(_ ++ _.baseMap.values).toSeq) + val newJoin = + replaceFoldable(j.withNewChildren(newChildren).asInstanceOf[Join], foldableMap) + val missDerivedAttrsSet: AttributeSet = AttributeSet(newJoin.joinType match { + case _: InnerLike | LeftExistence(_) => Nil + case LeftOuter => newJoin.right.output + case RightOuter => newJoin.left.output + case FullOuter => newJoin.left.output ++ newJoin.right.output + }) + val newFoldableMap = AttributeMap(foldableMap.baseMap.values.filterNot { + case (attr, _) => missDerivedAttrsSet.contains(attr) + }.toSeq) + (newJoin, newFoldableMap) + + // For other plans, they are not safe to apply foldable propagation, and they should not + // propagate foldable expressions from children. + case o => + val newOther = o.mapChildren(propagateFoldables(_)._1) + (newOther, AttributeMap.empty) } + } + private def replaceFoldable(plan: LogicalPlan, foldableMap: AttributeMap[Alias]): plan.type = { if (foldableMap.isEmpty) { plan } else { - CleanupAliases(plan.transformUp { - // We can only propagate foldables for a subset of unary nodes. - case u: UnaryNode if foldableMap.nonEmpty && canPropagateFoldables(u) => - u.transformExpressions(replaceFoldable) - - // Join derives the output attributes from its child while they are actually not the - // same attributes. For example, the output of outer join is not always picked from its - // children, but can also be null. We should exclude these miss-derived attributes when - // propagating the foldable expressions. - // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes - // of outer join. - case j @ Join(left, right, joinType, _, _) if foldableMap.nonEmpty => - val newJoin = j.transformExpressions(replaceFoldable) - val missDerivedAttrsSet: AttributeSet = AttributeSet(joinType match { - case _: InnerLike | LeftExistence(_) => Nil - case LeftOuter => right.output - case RightOuter => left.output - case FullOuter => left.output ++ right.output - }) - foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot { - case (attr, _) => missDerivedAttrsSet.contains(attr) - }.toSeq) - newJoin - - // We can not replace the attributes in `Expand.output`. If there are other non-leaf - // operators that have the `output` field, we should put them here too. - case expand: Expand if foldableMap.nonEmpty => - expand.copy(projections = expand.projections.map { projection => - projection.map(_.transform(replaceFoldable)) - }) - - // For other plans, they are not safe to apply foldable propagation, and they should not - // propagate foldable expressions from children. - case other if foldableMap.nonEmpty => - val childrenOutputSet = AttributeSet(other.children.flatMap(_.output)) - foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot { - case (attr, _) => childrenOutputSet.contains(attr) - }.toSeq) - other - }) + plan transformExpressions { + case a: AttributeReference if foldableMap.contains(a) => foldableMap(a) + } } } @@ -684,7 +707,7 @@ object FoldablePropagation extends Rule[LogicalPlan] { * List of all [[UnaryNode]]s which allow foldable propagation. */ private def canPropagateFoldables(u: UnaryNode): Boolean = u match { - case _: Project => true + // Handling `Project` is moved to `propagateFoldables`. case _: Filter => true case _: SubqueryAlias => true case _: Aggregate => true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d95f09a4cc839..321f4966178d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2555,6 +2555,18 @@ class DataFrameSuite extends QueryTest val df = Seq(0.0 -> -0.0).toDF("pos", "neg") checkAnswer(df.select($"pos" > $"neg"), Row(false)) } + + test("SPARK-32635: Replace references with foldables coming only from the node's children") { + val a = Seq("1").toDF("col1").withColumn("col2", lit("1")) + val b = Seq("2").toDF("col1").withColumn("col2", lit("2")) + val aub = a.union(b) + val c = aub.filter($"col1" === "2").cache() + val d = Seq("2").toDF("col4") + val r = d.join(aub, $"col2" === $"col4").select("col4") + val l = c.select("col2") + val df = l.join(r, $"col2" === $"col4", "LeftOuter") + checkAnswer(df, Row("2", "2")) + } } case class GroupByKey(a: Int, b: Int) From 68e0d5f2962d4045bd159b5430a8f1ae2dfde4c3 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 18 Sep 2020 08:29:29 +0900 Subject: [PATCH 0059/1009] [SPARK-32902][SQL] Logging plan changes for AQE ### What changes were proposed in this pull request? Recently, we added code to log plan changes in the preparation phase in `QueryExecution` for execution (https://github.com/apache/spark/pull/29544). This PR intends to apply the same fix for logging plan changes in AQE. ### Why are the changes needed? Easy debugging for AQE plans ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests. Closes #29774 from maropu/PlanChangeLogForAQE. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../adaptive/AdaptiveSparkPlanExec.scala | 45 +++++++++++++++---- .../adaptive/AdaptiveQueryExecSuite.scala | 20 +++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 014358b663bbb..6c197fedd8c56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer} -import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} +import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule} import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._ @@ -75,6 +75,8 @@ case class AdaptiveSparkPlanExec( case _ => logDebug(_) } + @transient private val planChangeLogger = new PlanChangeLogger[SparkPlan]() + // The logical plan optimizer for re-optimizing the current logical plan. @transient private val optimizer = new AQEOptimizer(conf) @@ -109,7 +111,8 @@ case class AdaptiveSparkPlanExec( @transient private val costEvaluator = SimpleCostEvaluator - @transient private val initialPlan = applyPhysicalRules(inputPlan, queryStagePreparationRules) + @transient private val initialPlan = applyPhysicalRules( + inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations"))) @volatile private var currentPhysicalPlan = initialPlan @@ -231,7 +234,9 @@ case class AdaptiveSparkPlanExec( // Run the final plan when there's no more unfinished stages. currentPhysicalPlan = applyPhysicalRules( - result.newPlan, queryStageOptimizerRules ++ postStageCreationRules) + result.newPlan, + queryStageOptimizerRules ++ postStageCreationRules, + Some((planChangeLogger, "AQE Final Query Stage Optimization"))) isFinalPlan = true executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan))) currentPhysicalPlan @@ -413,11 +418,14 @@ case class AdaptiveSparkPlanExec( } private def newQueryStage(e: Exchange): QueryStageExec = { - val optimizedPlan = applyPhysicalRules(e.child, queryStageOptimizerRules) + val optimizedPlan = applyPhysicalRules( + e.child, queryStageOptimizerRules, Some((planChangeLogger, "AQE Query Stage Optimization"))) val queryStage = e match { case s: ShuffleExchangeLike => val newShuffle = applyPhysicalRules( - s.withNewChildren(Seq(optimizedPlan)), postStageCreationRules) + s.withNewChildren(Seq(optimizedPlan)), + postStageCreationRules, + Some((planChangeLogger, "AQE Post Stage Creation"))) if (!newShuffle.isInstanceOf[ShuffleExchangeLike]) { throw new IllegalStateException( "Custom columnar rules cannot transform shuffle node to something else.") @@ -425,7 +433,9 @@ case class AdaptiveSparkPlanExec( ShuffleQueryStageExec(currentStageId, newShuffle) case b: BroadcastExchangeLike => val newBroadcast = applyPhysicalRules( - b.withNewChildren(Seq(optimizedPlan)), postStageCreationRules) + b.withNewChildren(Seq(optimizedPlan)), + postStageCreationRules, + Some((planChangeLogger, "AQE Post Stage Creation"))) if (!newBroadcast.isInstanceOf[BroadcastExchangeLike]) { throw new IllegalStateException( "Custom columnar rules cannot transform broadcast node to something else.") @@ -534,7 +544,10 @@ case class AdaptiveSparkPlanExec( logicalPlan.invalidateStatsCache() val optimized = optimizer.execute(logicalPlan) val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next() - val newPlan = applyPhysicalRules(sparkPlan, preprocessingRules ++ queryStagePreparationRules) + val newPlan = applyPhysicalRules( + sparkPlan, + preprocessingRules ++ queryStagePreparationRules, + Some((planChangeLogger, "AQE Replanning"))) (newPlan, optimized) } @@ -630,8 +643,22 @@ object AdaptiveSparkPlanExec { /** * Apply a list of physical operator rules on a [[SparkPlan]]. */ - def applyPhysicalRules(plan: SparkPlan, rules: Seq[Rule[SparkPlan]]): SparkPlan = { - rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) } + def applyPhysicalRules( + plan: SparkPlan, + rules: Seq[Rule[SparkPlan]], + loggerAndBatchName: Option[(PlanChangeLogger[SparkPlan], String)] = None): SparkPlan = { + if (loggerAndBatchName.isEmpty) { + rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) } + } else { + val (logger, batchName) = loggerAndBatchName.get + val newPlan = rules.foldLeft(plan) { case (sp, rule) => + val result = rule.apply(sp) + logger.logRule(rule.ruleName, sp, result) + result + } + logger.logBatch(batchName, plan, newPlan) + newPlan + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 7e7248c312e11..8799dbb14ef34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -1238,4 +1238,24 @@ class AdaptiveQueryExecSuite } } } + + test("Logging plan changes for AQE") { + val testAppender = new LogAppender("plan changes") + withLogAppender(testAppender) { + withSQLConf( + SQLConf.PLAN_CHANGE_LOG_LEVEL.key -> "INFO", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") { + sql("SELECT * FROM testData JOIN testData2 ON key = a " + + "WHERE value = (SELECT max(a) FROM testData3)").collect() + } + Seq("=== Result of Batch AQE Preparations ===", + "=== Result of Batch AQE Post Stage Creation ===", + "=== Result of Batch AQE Replanning ===", + "=== Result of Batch AQE Query Stage Optimization ===", + "=== Result of Batch AQE Final Query Stage Optimization ===").foreach { expectedMsg => + assert(testAppender.loggingEvents.exists(_.getRenderedMessage.contains(expectedMsg))) + } + } + } } From 9d6221b9368ab3d23c63a9f24a2ba42a6f709d54 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Fri, 18 Sep 2020 08:57:52 +0800 Subject: [PATCH 0060/1009] [SPARK-18409][ML][FOLLOWUP] LSH approxNearestNeighbors optimization 2 ### What changes were proposed in this pull request? 1, simplify the aggregation by get `count` via `summary.count` 2, ignore nan values like the old impl: ``` val relativeError = 0.05 val approxQuantile = numNearestNeighbors.toDouble / count + relativeError val modelDatasetWithDist = modelDataset.withColumn(distCol, hashDistCol) if (approxQuantile >= 1) { modelDatasetWithDist } else { val hashThreshold = modelDatasetWithDist.stat .approxQuantile(distCol, Array(approxQuantile), relativeError) // Filter the dataset where the hash value is less than the threshold. modelDatasetWithDist.filter(hashDistCol <= hashThreshold(0)) } ``` ### Why are the changes needed? simplify the aggregation ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #29778 from zhengruifeng/lsh_nit. Authored-by: zhengruifeng Signed-off-by: zhengruifeng --- .../org/apache/spark/ml/feature/LSH.scala | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala index 6d5c7c50dbacc..9d647f3e514c5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala @@ -139,21 +139,21 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]] val modelDatasetWithDist = modelDataset.withColumn(distCol, hashDistCol) val relativeError = 0.05 - val (summary, count) = modelDatasetWithDist.select(distCol) - .rdd - .mapPartitions { iter => - if (iter.hasNext) { - var s = new QuantileSummaries( - QuantileSummaries.defaultCompressThreshold, relativeError) - var c = 0L - while (iter.hasNext) { - val Row(dist: Double) = iter.next - s = s.insert(dist) - c += 1 + val summary = modelDatasetWithDist.select(distCol).rdd.mapPartitions { iter => + if (iter.hasNext) { + var s = new QuantileSummaries( + QuantileSummaries.defaultCompressThreshold, relativeError) + while (iter.hasNext) { + val row = iter.next + if (!row.isNullAt(0)) { + val v = row.getDouble(0) + if (!v.isNaN) s = s.insert(v) } - Iterator.single((s.compress, c)) - } else Iterator.empty - }.treeReduce { case ((s1, c1), (s2, c2)) => (s1.merge(s2), c1 + c2) } + } + Iterator.single(s.compress) + } else Iterator.empty + }.treeReduce((s1, s2) => s1.merge(s2)) + val count = summary.count // Compute threshold to get around k elements. // To guarantee to have enough neighbors in one pass, we need (p - err) * N >= M From 75dd86400c3c2348a4139586fbbead840512b909 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 18 Sep 2020 10:47:06 +0900 Subject: [PATCH 0061/1009] [SPARK-32908][SQL] Fix target error calculation in `percentile_approx()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? 1. Change the target error calculation according to the paper [Space-Efficient Online Computation of Quantile Summaries](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf). It says that the error `e = max(gi, deltai)/2` (see the page 59). Also this has clear explanation [ε-approximate quantiles](http://www.mathcs.emory.edu/~cheung/Courses/584/Syllabus/08-Quantile/Greenwald.html#proofprop1). 2. Added a test to check different accuracies. 3. Added an input CSV file `percentile_approx-input.csv.bz2` to the resource folder `sql/catalyst/src/main/resources` for the test. ### Why are the changes needed? To fix incorrect percentile calculation, see an example in SPARK-32908. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? - By running existing tests in `QuantileSummariesSuite` and in `ApproximatePercentileQuerySuite`. - Added new test `SPARK-32908: maximum target error in percentile_approx` to `ApproximatePercentileQuerySuite`. Closes #29784 from MaxGekk/fix-percentile_approx-2. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../sql/catalyst/util/QuantileSummaries.scala | 2 +- .../test-data/percentile_approx-input.csv.bz2 | Bin 0 -> 124614 bytes .../sql/ApproximatePercentileQuerySuite.scala | 21 +++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala index 2797a40614504..ae7066d87d530 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala @@ -254,7 +254,7 @@ class QuantileSummaries( // Target rank val rank = math.ceil(quantile * count).toLong - val targetError = relativeError * count + val targetError = sampled.map(s => s.delta + s.g).max / 2 // Minimum rank at current sample var minRank = 0L var i = 0 diff --git a/sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2 b/sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f85e2896b3a89562bc5c49ca7eb915997297d9a4 GIT binary patch literal 124614 zcmYhhWl$Rcv#8xtpt!rcyGx+O-3bmw0|A1&^t8CU26qSq3k2sa?(UG#;_lkg-ud1; z^PQQyfA*PK`?IsNJG07GZnC0EmRu&5+LvX-7^LNs@7|I8&o8GB1L_sXsbr1jfvz?O zII?rcuSm+Y6CUicMKz`5Wl=4+W?JtlnwLu zvs2AzC)g|O`e_ck_Nu{FMN(JQ<73dN8WM_+jDHV7PA=t&AQ3x#CR<2iQiwKdRd9O!~v&o=k-ugr!lTkGK!2}0d#v>+8+^OtrtdRir}?H@-Aob5k!| z%2(Uzg&}LqwzWXxX4^o>2R+o;92;~U!7nkHH?vdHst{}Q#B*EPwNQjY@6uPgI zViiP(Zre=ESu6A)w3cumY2DMl^c+G?>mTO}6x7fb@gdimAW+y^+$?eqQVY98Idnp& zKtps?aG+rp+{1Gt*f%w=wNq*+Yn|K$o@<$Qwbp5hgmm&-X;ID9I8zc*Re+x6U7E%b z){qX1rPgSRgv-?cIuEnlLUFU@Ibb!cqe8vPZHH~j?M0g0ti9DzoMo%^Y>3BZN2-X$ z1XK$}<`sbvo>Iv2CXl*Uisu0g+3Uh<>zFy@9@Wy?gFNV^Am5ONi|e!Bt!Z0nJM$z#?H%i8o^XUxN2ePyx5*lk*#;?MZ8EB+ z!Z$JQsO{2Q+_8k|pU4%a+M0^_}7W`oi%3FY@3S@zwu%d}(Fl!!k}J#o;hE{f@(;T%)3!0VAs* zf#q^&^S#3}R;|EDZ6iyhsF7IHgmugr!GPM;HH2;u*#Bnxe>5LVo1%s<%SesR5$9b3 zX0CB{B~A{(cy%g01h0}JNeihAB&*TPh$YDq1e4_&SE#X?MgKqH|HY+DzC=o1O}-=^ zPNOzhRyEZ&Hge7gp=Da5iU-82u200N;eci3;%H&=Dd~VJ*|gU`zr%Y+0etrki-LzN zD;8*^%)tR92G#IsYmyq9CJ92Td&QLbKEtA{-I5@%m>nI@SZ(AFEH*($C1h4k1F1HU zhoZ)mSBnK^wq5<59b(*IYKm2Bs)mPw!&S%$1R8JdfYtOQl6VZ=)>sL_NL(HsRX9zS zaa62X2h@cElIww2UZDg8a$>`g)9j#*MK#J+0$x0|NC(vh?Enc}WHng>GQCRcouQGf zG$?4XQpuD~u%Sv1%*%^3@vPhFgOry`%&1rIV0^A&cbq$e>2JFUCb{WBE0$p9ri`1% z+wkj`dhn^?&U%kkzzUoV8)2EZ$(adnG%LMqUi(({xw13a8X}3L+sc_%F&6v=rs!Bv z?)?yG6)TA(Y+77~N+KolzDdm?+l_doMpJf3-d)s}G+}-tG}0y`t*lxK7y!U_6OdPP zdF@;!+-1L_>2RY24NR5k?($k_!M!Fs`Mi;J_uS^67s?)yGET}=uh@*LUfJz7`oh!8 z%8FChd8POavNCC3T2#t?sZf*UVCL0hFjTTyp}R zZMwESYr=kQ@d>9arCM|g9A(;}pz&>9^mWowmEUXU*L@3JeYc~~6F$*YHHtMt5N8Q17N!beeLmgy2%k23>;)I0RkHDkv zT?pYxXN(ot5&XY8W|Xf~5aQ?H?LoE~hFw&=Yb*QqPKr^Pctq(_h#WiAJlasllIx5w zFKKuH^^(Kh5dNk8M%d$r)P58D3vwegWG}|}e8n-YSWzL}nteY)s-RcwiGZ!C#eX+o zr6)u!&_gCzGXw&-lVyb?f6Huqakm`}JTAl+7W=XHFme5Zw+wWc)DN^-0~fT{zKT`@ z%UO@Se{8!@nFU?-Ddb4CHEc)(nkjuaeMQfG4N>9dAQJq<7hiPllPfeM-Wu8cNoHr< zh@drTa8qs+?*#7htT(TtW85VVMn%3iOi|=gdA6_KkM)@NPE(=dlvO6N=p4wW7v;UN zAX8r?JD;%)bhNH8A&@`(+pjWrN^&yK2+Q?YtG%#Hs!1-k!?*@1_6oPuKZTY^MCsz%PvG*>S5J9?-bDi~`l317B37~pxL4D{&pxy`FPmpfh^BN`6Y^Rt5 zw$FoW$q80mkMsI+M}D}9 z9VB|HuxfMb$v&zc9Cr8vKN0s&T!vP16tSCTQ=SEL3{%b)+aou}8LvDQnmCJwd)BQu z&M?whV{T;EeqZeKR5ipA%I8xs9@$KrM(Rc5*krKMb2@eo_Gl{c-OWdE1B)kr|3w62gPpAYASZ<`uWdrYydo@Vpd!8%cQ8OyCEKNKwc233ewYkZEdicCOp_LVcg=CF87iM?#geo2twCP5+`sOI2MD z#WD6UvO^*#lQ{cbxk#UI7_QdK>-{8IM!A8cDSDe{?c zBNO*)gg;079opjSHE9-aIXq#$4om!jlgEt-lP#FFrd0>i80ePj8K2vzCS!@)eJMBu z2rR|W!1gv&G#!>F*qp5VjZhil5I+(#tD~#yXLlFURO_X`yeO4aDP%(`f4btMk%HV2 z8~M}*fGxpt(9g|13v(qzh2z8N!L-noNHrNC*V&6K7jU{8+1CF*f$)RV@Dxu@+0+KSWkKs!-|JFEU4P_| z0Zc}=YSuC%7RHUBt4xJ0OL~SCHH6UEPa!V{jPeS<=Q7*= zLd|$u=k{efJ#PLr9y>svQ$mbcx8`9Dp@S?!&j?@gSTSMv@~r2Ws=li%ZK32wgI6J` z`%7Q)L80`NenLqs(OZW}4<@Ggq+*=(^ z0JGm~l9$H`@R+S%rfD#CYeatFlJZN%A4$Ns1A~IkU@Y5Of^5aN$i{(ab=^sM+2Hu} zxYB{2q6&sT_-Y}KwrxTd>u=--TO3*4x~FjwtkTLq?6Lt8*kqixLp23qd@dTp?Gq#) z1=j*Tg-@peDlfXpt&Fc;V70=m`Z^3OuXBwK4@=9JWH>4Ue@S!wu4qm-z8SFEcx$Gf zL^jYircr4KT;_QVstXShcUt{FkRsz=R}hv7bJOsN z*nNbdy6RJ&@wk@3CEtukLK!HtaNB>p|M}`o^H*RC5CDtd69U6K-7`&9crO^ln}4Go zAKn!Xv~-a>G8+x``02{I#W)u)k;K(T)SGS<@ZLEFJ`Qtx_X&MzI3-54XOKR06KHjB zsd!-t%S+204D%e>qgose`kKf)bPY}wrZk-9`Akx zFgx;(z&*v~(oIIT2Z-BN%g^t;Vs8x31A7-W!+Ta>>Vy+VabB~s8QydKwQ784XU*@&qYumCl2 z#g{a6j}Rm&vyt}{8~9H@QE6B?3b5m81Qm>h7XKU13K>{(P<2=|6Q-Y`h~T{6_|Ekh z6Of>k*9H;9gT^zRm)0ZidM$FT;uD~;-9u~x>wMOSLxLA#GoJKUAqE452%6kybzG87 z)?&r;imwfZM=3SAsP149fj>syK4DI#D@Y=;dP`f{5Te6uu)_t@$j0LPs}^s3y2a zKCAN3Wg-qCrF@!nM!KdDzxFz)1rsU^CGaNOf2GS4DC%$GEPtkbd&13KXd2Z#Mbz)v zj)ZJDUT)BL=7xN+ReA55NvI89$371Yr$sT0l?eoQP%AOeD)tZ*(~remu0&xm_eQ)d zX0}Me#cOEvzx1Y88lLO-3kPDcD(8v9LjrRU8OkwG&8cazkRbDFhySis#mV8qOhgaG z)3bF}x1U_1uCBgMr4z_Y?46bF_F$qv15pPmnw*{Z%jMr5Q@H-sO_mqo6fa=GyH0C$l@1WZz@ce)Biw!Rli!W0ndQT`t;c zsb)g+oQf{Tg**^r3$vIm>_$=00CCN8e2^~As5G!)xjNx0JL?by+=3%je{|K)h*E2g+I!QG|(5qmL0$<38jkKKSG!WH>Q zJj$L_7SoqbEG(hEwy(H^Ub-qC%yjS{U95+pU1WOz=w{K2$3xbHe5* zWLypx_mt4z4iCXCf}&yq@v6)ojS=jyt^Tg20+LC5IdC@!bIiYsbubff6i>LBTz zz~~Be%*{sM*p*Qem*#=*4oXu?5^ZjLYMA7sDW(| zi-*=c$`DvlQrsW#y5)YUB=qDYr4bVqWUOtiNZXa}MT%|B9k>UnT}Dpmn?|(SeiZSrqmIEF8bbu}lA*6<~_Ci0gm(~kkWnTu{JJz}eR z&mKjZ!LPH;dIIBEY6NqGF@Lv>{A}y*%kM%n9SIzsx`Q?tuAc4}sRR>Zr#dKcy&UN7 z-oTa#6uRbZ=Q6y1;Iuj+>r@*|V?;GJ^{wfgr53?S1abAKR+D|^9u4|#^eXIn=EjfW z_uV#-ibMt>?lL6oRzsS~EF8l=XI<;HsLRoF*%?Xl&=|C)uyx*k>=5~4g_Apb4r&Am>4avKnx3{OGc~OfzzT zKYL)m8@Au^Cz`T!q#w5SU^3C6DRaVQ>vp-VQ0hQhqmvRvR4iS7UuqtaYvvsuDYe>O z$ECpxS*e?+%x+-@^{*EX26phOdUtvIV7lYi*)E4Y5bxijcKpc)D8aV}vLOKmhC426 zF7B{ZEXl&c+PM!hhrV{FghsjUZwUwI@y;7BY2IBp;4grW>tYj%(`q~tx*xArB@)R- zp*3oK{w07%lCN$!!Lsta|D#rbSXmBhmQIIRPyZ?$drk8fiXlMRkbn{mq*UDJ_m=ko zdF*BOvJWg*XS6F1Fx%C6C;RIS98>yet0m(X$4~A(2b@ExL`FRZ1Bl}fb;a8vnc>@8 zBxskOq$RW6CmGS55t6!t>CBx?$gSR;nU*GhWHxd$L6F6PF5)eG8+`0G{eT6PyGHi= zmj;UHZ3(80%9)-cG>G%P)!V&vFYz9|IFgSwhcId4rPT*TtZ0}>q_NN)>tD#0sANlR zGHCP*n~)(-MV){ewPp3D9uw&ifB)g@mk$^7+cdZ+=hr^oJytF+n0|v49Yxhk3il{) z1Y)S>c_*Ei7Rcr!ct$72+7`_2E6+1)0bf}c@{gm0-o{}G`V~RpV`it(*b}u2bscI_ zPKqq{wGV3gI*}$lGrNj60?a_QI;_Le|G0~FL5s|IE3o>!!m<^5zWs}1aFWJjWXGu)@*zTGlA62wcr&1kd=iNOJD|PCng1}gtEQ@b&sxEE*G(5#T zmyt+IgP4UGBmSX@y(~tc67pv!u?3+ONy});M!yiNldhB*k5RvoEZC--paOLdUK1pm zX|m~_gS`RbG(Peo@21Qs(*t5j8yjFcmpRNCpmZ+2e+C}Njpeo-+EErHTMklxcyg7* za9n9#6%TVa!V1+I6-e_Adns9H3at$s;C)V6=sxUWAHrgdAk_#3Q|FO3iVxEwz%_zO zq^@$kM*X{U$+A1cAs}1~t~@~VMN)HkDoJzb`99(#hNICED)%&7RL>i4Lp5s4dE$!E zFe4eXGwo=6X|_?xCqADOqE|e1+gB7}7Fn^NRwVp+=e8j{X>w)S3oWZ*1-DYw*-4tr zGcSzEC6dD7=G2^UYR3>w%rtTE3d0Rnl~;7e(k{%mxe;wQ{UByP?bxgUYlCv`P8ew*8W~jbaYJZx2ty6_WuQKo5|i8 zkMhMLT?`J=+S@rE&@=;6Ruai9PG3m75B-(~8V#0@8uo2;u2li%{KM9YKaQD-aOUO` zQMtf79L;IUaalBzt)$8dcTq8q%9(Y#_uO^fw6Sg~9LL&1KU;HY*0uAgSv7~LCc|D( zDm+=W^~2WRuJ-~gmYpSKA$5{4o}{V;VY>`>1hWTPb051>Hsl{zIh=9X*S08YEV6oU zul_ja-wrEx?SF_~Qj1kt)!O=_xk+pFzdz3Yad%;^p56Z23k)upx?__qh;WseCfiQu z(VU#>b3+|`JDqB8YeR-GF=CD=3xl{?T5voY7UXXj!`XJB%8!HFY4qz-R-u}w55Yd( z>@HzAXt{@(%#IVCKCwG=vGeQGJUtLK;;C%3_?JQpi6I|O5iB1Is}k|jo9rz`ixGq> z)b>f)cAZPo)fxEr4MfHVcb}wKHzuuqBvX67-rwNDj!@fo$s3;|&cd1WmzWS31_`S= z!pXfPj?vlL%UQEea1?TdqY@#7(F3U2j`Mnq@tO(mzLOf657lJjLARK@?EILQVy+v& z(Nv$C)|rttVw<(-$E_O8&GyaSQ)3X~1K3S1@(IK`+q04ul;|ZsREaz}{-(^E)R>xm zQW@_rX!@!i_Qx&E4(bcjQ-0Z@jS~xc2hpFfMIr6-VcA zTlopqgcrfm#D=~Wdsf~{Om)O44mPJ6(tPc?rU?GvrFFn@YdeUuyCn6sVZ=?I?_*#~;Vey_GHB%=0%hd6{e81giu3D(8ISO>U-kG21tv#~Ni@S*6p*t#|EAOzx=ahxsyVwtIzrL%xm4 zBUS^g%%YhR5rK)34~^)J*3{jQ+xX1Oj~H4!UJoEriy08SYx7i>ACJBNyP${!|Ee3Bv2 ziB^rl?>I~{mGW}bgmRdh?ipU%ehc*2%dbJQB5OCyky@N_(#11@LgNh~#aWZzY!^K7nOhK<{oP{>G$|9(Tj1KuxA>bg+}OuYs-qumQc%j7F&(<ZqP54I0~V zvh`&1EjyT{p{mW&03bgEBoWK2@A(f$$lrctw1W%Tm^v?9>eqrV;XubaU)toSs{5Rl z^B?JWCr{m3MIO7tp>o=4F~03IesOP%j(fks9;aC$)M& zNMpz{7dWIWkXkXXUx?n_LcJ)f!Ips2?*%q&byQuMISOGNOY_gKa+g_xh08iJv%bBzjh=-{$50SJFZ8`sk8bJj;5>$6;C36 z%HfL4oKFDXbDFZgjR&0!jC|~LV2s%0)+fQ;3B}N~Rv|H<=vbTCMMcJ8 z4fPH)-Pp$-wFcC}q*ViS?25Z<3RdX@UE8M)>ezVvQ*o35S_I6DhBO)LsRH;GIVoN# zikDVxafM)!lKo|tf!*eL&bkduy}MEJ!^!jUFw>R2&$TUt3LJewC#rp2xG>kgV3u_U za60Xax8#QSr2YyRY;8<{N^lCvoLF z+5@sHYVqKUw)@(<_!-;I>>6$DRiz~W>G6aISZs$*~fbSFJk=|_N17se#aM!t2$g6nhSo;(BKvP`=pX^A5p zgUdD4!+1&DD&b9NhIgUi^iMl_PdoDI6W0A}DMuaIaZ zY8l4$$}($efA^C6@qBRIWRT5FB2_$a?%-jpO7dt5Qxf`|U(4G#P|R_=87*Z@^yb23 zJ%sF6TYF6KPD3uYz6Gz&g&OMdmvl1StwoaYE*7GMptEb0$t5-a~QNy#p>m(#O);b_EhU}p< z1DDPxx!O6qx#8Yh>d%ffqm;ybW=1GE7JV{bOZ zLuKw}z|LPOjG+nkiUX0;|Iqp>6X#B$zu2U=TCHfv4M#qv6_W{!;&J*a=f*>;{B!zD zPG6r_l3nSibk}_{rp;?jg`IU-EaxtJZ_Ih6T=X=OCA&q501o7#-tCLGp}bgeW@^gz zt3B!uHsRE(LBD!on%+fkLlP-PQ8B+DwzHo8)q&z&{{7)G93zPNMYP=%TY=AtX;Pg* zwQdaWs$;rF74b-lwCG#R)AC^4$_?HlTOu2MEFLk6*qsA>rnb@5VnlbfHW~6w8$GXD zFd~x54UQ|5nJc&QHEE(bIOgNc=2uDjDA^Sgp1&RTBqpG5)>J`ZeN~Y-lAzV-D}DQI znC9%6CL%XFccK`Lul{kko$p5%h$xyMD z_Dsl&Uu%-zv3=6=RP+k)PsUur`x0`u-g*aw_|xArv5RU&{4#mI@oA0N`=EtK5b zeA>}0(s_tN$6$9=rX|5i`F*x{Q-2)Bv_R^fzcX#2SfaEyva=DjDGSSv4Xh}_a9~zZ zwUf4Pg4p`j+Og#-W`m9Bv=owS&Ny*fS9k4dsdMaHnnv;+$$LhM#g39e$AEF1eWkp! z6{j!sPxSJ(!RCT-+FYLqPM;z1?*sj(DrCiAE+vODx2w{DE=gjqB8hB9lxNS>m#p-$ zZJ~Ux?t`CC*V=to0+(qqJ8-##7Ti>3sP$@ZPbA3`&+E=(PjEbf-db%6LHHw`Tjuio zT}wvCp)^B&*C{VPdpGuu^>mlCuJF=rpTXnpy>Zg46#STpOx^!suOn3Wl92!XXo8); zi85z@LGi0Z+JZm`7tP2ba^+|MOU10N^-rqR11)(Dd2v~p9~!yZO3EBI*wiKiHXu|l7mQjvRfu{ya+*mvb^5Qyzkv6w+9x|Vp@KZcC zC=`5$#~hXBD{s)3j6lGH#DOwttr!>KCzY_E{oTfu#$4(i+_n9Pn7|e-)8B!+^K#1! z4ycV&O?g{uClO^5c!Z=G1A#pBA3jfZAqVZwX6$6JNKFOD>YNvNk6ZtR$|2Y|NX4dpd2U{t z1pepBTR_eppjO6?1C5yp)MgL>_z38NU6}zce4Z*b?k>+$Y$)AiNto?_EGrwt7G}yC zESbJ|zIA0;)}@o`a#r>fdJdlS96#jj|5G~EsEDKZ%z3zV{Z^`Z&t&4w_$)GFK>gWh zqcCZ+iGEd3;X=dDS-KnhoML3u?~A|I-`w24UM@3njRP6qajgPtP(g(3X(cENFY&X1 zwXREq7{t4B^BZR|CD98aY4@0Sm^K$y{-O4$(ii7flSn3=i zf?H^<2I~u zUXU=rLx${MC>*CRu-VI)P8DlMX!^fXZcX_ZMO0K3gRSBa=7^bOgf9ku7;_CBcx)bClC1DqNt`-GsQ-F>aB zOtKPHR&y4tN_s&R6BDE2nsrbvM6=4iXI_1>-cut_np?{F6_6E;+%5Fd`>GoNfcr5? zixaa9Gz;CLT}MjRg^QkNsJnW4!Ltjh%L0g9z(b!y`S6y6wJ@7>(vaAQJ@5({z z7mEmF=3N>%gLcoZ=cYF;3p&DAt151cuB^c}k?&#ik@@mX!XEbLjG{-4Y}E$qc4P>S z1vN5>c_ljQtqq>J%hvvNqbE9=UOiF|#hl=zSI_521Gr`03q$R13u-??P>`Y6fi)qsLr}&ynO%R#rN$><7&WyseV}|EWeHIkA~=goik;Nip*)0zVPRy zlK{@ni|E4Z@PmLjkNdD7R;8<1qOJD4fHDCQt?&0QLZ>Z)rFM5H?(wrU^j`0`WtQzQ z#D>h_@lSylNQ`5xe(8Xhdll*aQ~z=k{bI)&K{u22w?EWI2Pe^FotI)yqlFl0%ATTv znHJ;OM@6$ubrm<3GE-38X@^tCS&_`xj`T!sO#==@vFQ0-uKP=n9>p0^kVp^(z}7-I zvZ>w)E1d0!Idwa~sl|dT@Y~D%O&HG!*vL6?E2~ z+|RtQE&1>Cjz7W+3TStqq`eB0rVcp8n+&5^SQ6IBYYDTeLUX(8O!PcgNzk_E7Kp>y zsCdUj)}BHJ<;7a4Ta^2U$#FXSWLZJodR~^x5EUJge=YtuMrS=zy?a=KNWeY5cx|bZ zpzQVez)RYkY}J?MR4ETo?nTcUlc&5fO;>$T^ZdP2LL9VGpVE*!lt{O42j-1{R0{<@ zg_N@GKbJA3SOENS{5I)YJ-^_SJp3lwwbaw3x)zE~{7CP#a}&{2J{mQ9>AbUfVg;vo z_#D#*!Y7sc3jU+ZHicLodCB7HG;bw;U=$#pF6};U3DC%Id{19!5h{8%&e=p+1%k3< z?2V*OblfI`M`gDB&6v=h@e6=p4e!d@%lw4oab6=Zgt~*JjY$PjZer1W9U6b+zAz`e!6{P|zyO+H@OHWi@pT~qE z9HijI&I!ENfWn0At>)h@PAcRQYJOb1W`>MRbBJ)DsPCN3)$a|d1)`j=9h@ZA7bAb) zy31wNFpPt8S_mt0oC9#k4(ot6Y2bwcgF#L6UpvjlKU&`C#b|-NX;ox|QRk0yd zWHo;GqvPAed;Zq9wRQrfD>09fsGD?(f*8y-Ijh+PVY5v&yV50`HbTm`pala9bG<9+T5KCK*eW{n!LLm z)A=QdiX4V}j|+@wGEV#LcnDCO&Mb2amd#^`Jcu2ByrliO^N9)8L+>E3KOET%!(Y2R zRC-u}G_9$diO#zRRGut>8Fm)0S}ZLxH!`?ljj!wKLcgS!$d85{nEAX_Iu}uB@&2e; z@=3`#uKx9Pe>)s9HmRPNYocTewK%Bfp4>K}U;fj0!O7Jq9dX%*;qYM;>uQ?YJD_b8 zyr9P+M!ghCJX>@=rV#6HT`&G{6w|>Ql{yT5HGdK)@{8AOY}ZaMGM z7?d)EHjd4NNDhJIYMd~GF6ku^`ac~Y3=?fo`Nn+xIVBLNC$=-U<;K*{9*7Tf%*6<; zH)m`SfB^7s%I6(V(k2BUn8SC5a*7LG5ZC-_J;-1u?fU9w}#8N!j97O%^I25 zq9n@LJrhqcFCw3VU`(vbzdjI^toiLG_^ImQw!I^rTWZMIK4FdaL5$BB&gFdD z^0zBJ*5dE=IsSdF-3V1{(+$VR2l_?_b6wF`GE*G^ZvJ&NsCV{d1A z8de1-L7=ShSU=AvpKI&)Ey^>}{lh|0&S7sR&N)83&4TMl`iBND0f{(EEVEQTFnXM7 zoCz3PsaYQV)jSv=E1Tc(615MBU%v6+9URTH(?^#II)Zxk5($FF z{b(bLK7V#P3*W(3T^bqA*OeL1gI4ZxGMREGZO0;^W2sQmje4`LQ9`a)LnM2mujN(n0hgQgVlZ# zd0p6xqwQKY;v!YLk`p`1u*BoGcHN{=v+m79zBN?EM?*SQSY|bTW9q|5W3#!IB*%sR zx}_7gbj)S6Jh5wDeVD^rP35TCJ)_-IliQ7Gux8};mu|nuzLmg0bg4#UOU-Z}u;*%L zdK>I2ie*P;O!GAcV=24~P|IEz1%zU^o|82vmeqNE2>Del)>5)L9C6bV9M;4nBMGjb zduDv(YmdPc*AEHDCO73m5zK8tYM4tbkG_5T7p=%edLWR^AP zM9DTrklD_V=BXlqaTQC2-ab4AZT{7aVcEJGjY|f9V>7w0m-0~2a09FV)8@@|vcU%L z?zm%_)ydCd+k{0*H^eswVoTEXZ^u)be;*$13F(ac*x4dLzVDIA-usv|G#j2);!*3` zMi21O%+rV~%Nv-UojEY^9r}uydV(LSB9cTjIgi47FR3?MA3jf~Tzw<7pBOZN#lKS9Zjy+dN4AI6T7-iZr?Y|CGKZ3#*1k_d@y_xBb7(*&{wypEiYCx3$# z0gsA?lw1?)(n$W`N{u+hQR3N*APk^<&jR|`NVoFRij$n3amAkXvwMy1J4oz2JGvJ@&u=z2fwAlrUh%=oMO#^=L-BaoJFrF zh`$PfR3CNVpat?idN2g1K|f)#01%L@^JC0I^#??y$L^M9B((px$L~%oAk2p5;+?OzwT!I?sIr) z6gj2?JJS_%A;@F!aT6DHSYSgQH4>9-!q3;sp1c{Mj~3_;9tF@EmhkL3(`>S2yvTKQ zuD=i|gA3i65SJncN`~TLcxE1_KfOGosV7s}&5&f);WQ59^t4SbrFIBC&5_96fLF$y zd3{-9R|eTzVR==mH~nRqi1emCJfEPDb=KmxoOm{KBdZ2mEO~j+x0Y49qeHh!%>$l_ zQ4igmzbKsxdQ0-_=XIZ-NH^Gkvh3qtlNWr&fAxBK;EP-JAHCPwc;*E56^?hy*z3%$ z+!ohuF%A^Ty51m%9V;VYbZ&Tb?AbD9BX`#AK5d0rcJ-@1-4o;UapvxG7%MlW=Z(1? z1KWNKr6YO+EggpoP0^)k{Tnk&@H%Z+qcm{w)!LUiavP@RsBhP&}fzmfVtEj zo;4cHkVrH98iHsGAvF(oY>zNV54pI9c`3MTL-s@@@-o-iv^u;R`<^-n$m9v zMwD~6k?&SD6`kII4}|2xx6aXSX!E`r2V%|kvCh@x?&t^}5C0I0dA^oO6hPZBZ$Eb>wosJfVI)P^|LVRYA?NdFk7Iu8OvsLkzL| z*>1$|R@TWl$J=7L1AqKJ_JmTKe+m!AnfY6H>7t@pb!CF=RWcEex(I`K<2;w$Wp)Z~ zzdo~V%Rq3~ilNRrj#dYJKbxK^TsQQ43V1ut31bjC7T$K&K&EyA@e-?J)VS&8iUEVj za*wh$1L-I7o&>WbI){{CMZ~1@mafeC1G6uUrA5&1PeEzDtHofPGYNfSzS|PY2^O=A8ukkUG<4j7f z|H!dLL3eaq(&lOV73M@U{1XojSj;GItul`g$s!$Xuz(chWZ*=_b$KfId$ z{V7})USI?%Eio*0fbN?x*|W7wS7RJ0S!lkt=ANL&e(?`-b0ZRv>hl%Ww#xT$GoaR>IA8{!l2(Vr zMCcR{XR#T0P5Gkf5W`U&g3kk$T6^@EzXa{$An-9)D8)nXz=8>7c-~&s3|W_#-6wXF zV7L9v(5@2k^79BGQ12wd@p5~qZn4WUuz(s7`hBaY$Ua%wgnIpuHk){EqNV8CW5R(X zwGv7BB(xUD@KRvyFN6Ah@8_WXb|WwYoPG;(obqT>79ddZC~IL30v_`z#x2va<(%@) zZ_iwj5YpurKV0evjprGi^1ltkTLb4KLgeCW{5UBXkH?KVO4ey`D>YAr$w^()`=s+$ zLL@e8G)fI^H$cge3zI+6o$vjeIZ~(yz$ywpjt8C78yru@0HWEN+74x(Lq6OlwfI?J z;r&hz;!4VSn{B3gcTU0RZsRPuhp}t51^K23gwXljq#+|zkNDi&Q(VT!cDx>#E8n#ZsEC`BfcTAhXVTj@p$Z~Qt)8% z1+X$s?x>}6bgtmKrCdNNn;$mosXQeQ)Nesx4*R_pao)?8Vl8Q(SXuhm9AkJ;Up&N_ z;r_byjz(3uk$ii;Gvs1ifqX^QBG7J~|KD^ST_90fbN5L84<$Bm5UuHMb3sHc~j ztCag&&p(=KpGcI;VP99h?4(2{Z3^eNO{9u6X(O@KhgVtvK9u2oro@HRnXyF??F(}G zNoOR8B5~`+zzObvc`FTxwCHJ)zkh!JIApUWO0=BF%dV?k*e#06z8D_$w6ne8d7l*f zBzwoAYz0&?Cig2VXZ!DCRG7bgc!m9S07qB7G6YQPrf6xKjgy})Y3G4H%G?fjsX}#J z@JCuZf8md_uRfpaOcii7A7`cuZI+Xejd2QJZBteVL`Uw*1`c^5v_(@L&f6KlMc545 z|4PDOlY{fn@cgRvWUFNrmcArT)J+Lk8MYSKs_Rb-C{^u{Y>g=6StG@B{{JXC3$8Y_ ztzGX@S}5-B?(Ps;+}+&??hrztUyHjG*A{no0&HAEa1Fto5Q>EscF(xI|6z_f=6dB> zm`~5vMLm2-&67eA+9WK3mIEZVQ9&nz3zW#-yW!y1p;bm;j}di4szyPY`8`BCxXN;- zjW0@KQ%^SQ#N?PlRKW>C-Rfm0iB5%k99~;m?R`4qj&*mQqr^(yG$$**S(u|ylXP!a zFkOG?{^{)CmEKicJM(UeRT{Z&R%buZT@Et)AQ^a`gYU#6Uf;03xi8IxFq z&dzgtQ*Ao9CYucUKi3&R$s}NMh?uJn;i<1%4Y<6Rrtq-%qU~Ol9ZCbZL5EZAv}wkh zT^r5{@WY7|RONcJUnX(;g+6xu*dSfBmerK4uPiC8F+4^BGy+VqrPskeS=M=jhy1YO zC@i*o8AP-~^T6<`UtdBWO(?84uiMUY?~R)pJcjogp0MPi=}BkX0#)%o z4I4zoGh;JXeFSg^6}IXxa^MyXt8RgI(h6e=W(7D?(wEbUv?_tBB!Em0;jmdtQk zLk$HH`jfu%_*OaNsFC{D-l>$3u-1Akn;Lxo?F=c9fPe8IHG9`SmGr@G#4cU{QIxBo3a-e z5O&3mt&>x*YYp>$GGHsYSw51@YVcX1pB@gu_M9@GOw#AP0x)FdO_m_#l44wdF=ldYkh5e(c$-iZkK)dE}f@WjCg#1+On(wBMs^TYyZ5+&Bk zNc*UB%ft2hroKVhi^V*gPvr~B3-*dX^Gj!o$n_~A73qvGzCihv$trCr-`5(XD=^<2 zsm|Fl{ZH)CXkMwp!68!ZJ?CRl$Z|slwn|!&?J>I2+6sG_V7EuIsbqZXkNV1($Y}#Z ze}hCYB~xI+SEtxHo_|HuSj?;VI`Vt$7M52%-^iDp{c+rPr62yTl#Cni+*a#2@}EM- zFyl!1`xkZk-ol71%IV;(iQx);vByII4PE`#Tm=;}O|h#h-kk0;nBRlPqP;*FMR)N5 z5e=JZ=VqKr6G@xAF$Ev6yAZG`Tc2{^QJz~z&Ou8p!zbqhmlNM_l&9P_KlgizWLTT* zgpCR$4Lpb*)~GBcPE`!`yH@vOZ*~|%B zikN!8-BrWsf_sqTpLOb#DzcS}>tcq@5K@)Tt$w%QWT3EH8RORw`cqlG{ni%tXNkpn z6ztXwI;hHXxa0#C7>ZV?@ej%Aki=Jw-qy7Xo`(4a7UukEOYLNHkg&%m7Fuhr@_Z#6 zzbHma74p>6$j#gEE@ky+_M$5_lC2yd65U1*+boC|{Mw{rh88 z)i@2fNdjMBG<+H$>Aku!UPjmKXjx`(;G%R5T_UKej|JbZIa61OZHSI2Tf*nJ32dH& zvWG9W0P@Z#`6Ecd#GpgumnQGz#Y`)b!syRfU;!XohOgHhn4PF+yL8YFSFVZ;#j<*v zQ7SYzJGwTv_i6PIOI~X?s(x;ZZ@tIHW&>F=ykzj>Bc5;JqUMN!8PzpyBRKhYm=WD( z!k*Rnm!XlF9L=9%ZZM$vr-Y7}=ptX=1(gyzKxLWW{fk+=S{c)%hb8HOC+n=AnOfA4 z7*w`VrhdcD!jZpx%*Ks$NhYno_8L#};coZQOF$=Mu4c+TEAGV+dMLTWueqltB(k#T zW%hc0JXx}iI9=TqiqI^$$>k=tjc;R9LF~CXgx^lUo@>xe>>C>l;&PaL^$Q1>>PXECtNyU)M3hcoFaLf_VJ zpJ2D4_;hvP0v~(My;ceW5uxF*eJ%h~*pFxh&XLoTHnSBSvnrA3PJuV}`fiH#;WA}T zy+S9%R-yAg(rk2RZ4G#s58dhSe>+Wp@l>h6zy0gC)iI*Wd30pu)<@P=SBz*HL(qp_9jTAClbo2(&{0;z~EpzN2R2@*fI5n`6SE zrSZ#m{Mgd)NT0LLDRJ!vCo<;E?~d~_j@kO*%Quj3-++F30%_)l{(x;}rdr#5mV(jk zjE!#PgO#^)b(4>a@zDnH`1u(eE(W{PZqnQ8!|DO%%`zGUL7pUOiOs>K{H>lnHLDzE zCX7=aT-sn^-ek5LoGr;ARCZwmTT@~Y6Ao#eoo=!Qvj_P2IL;LuR%-2h^UB#e3#wDt zPyXtxpd%WrJR=%wD8_*APV>6ALQTN;Tr9}Df?eoC<+qBp6SGFu1Rq(PCfPOJD966d z@nA%)R=`&M!bZ$Xs@PMX0W^H~RSUZy!5#Q@M)2CRKaMwk$K&V13CLt*I)&Lg_WH;= z64L2~;xlwT!Zu+27&TEKioIEPS*v+lS$jrFHM;kC@XQPph9@!^uSxBK5hyqttL@P~ zM9)P+$Pz$ut%RFlDw*fq?^dTCq?0}fVQ`WOsj+#^YMc;`CS$w*hQ=sm*eet_F(-mt zT)Sfv=*;!9Z_@$!t={Zl zi@CMK0aM%fL%=3>?T?`-#Aa~HW%t3-!3ig35Je22uyknkOnK*uyf#}0XiS&Q$z{%b zE10>{;H!(MNiuPSciB#HH^{kRAhW7(e9bvLIk*(lh4RfjWp3NZ!20;pYe^27m=6U< zqi>#CNXz{U68A|GxYLi`?YN^3OV}ijb{@$1*Sqp%*Ytsgsh1vs)(@2TxRjnLBK2Rt z=%$Fdp!D4dXl8VsRo4Y&B5rnPtJ3C^fuUwcC>ncy$~F4kve6!*cUSl%rW9}#;nQ|^ zuZnP1@0o*}{eEHG(`goUrO8!+6F49odfJx=GC?? zy{q5%9`9w?nPKz%wItJDY)6bC?`;G}`YBEmYX1a!g~-*gf+_>dGry9S9lwio%% zV=Ln?^eKC_v3vJ;^~^CUb%h_~w$UW6Dbp2}$7fZv#k%S_&?OT3*q^}mCaal%T@JF+~r!V9?fax=-&2%EjQ_a0vTGy1*OW2(_95 z$dV5|g+BD4c9OIPfm-A6ykwN0)w@LfY8_WGflTMjJL^#za;$d*xAw zl;ZBg7Lz(kif=Vjh3o4BxtDnje&p;Soay8WBYj0Dw&+HxTwfBC&!?D}96YzoqgT$iyQb&r zlyyhbXiUDY2Z;1FrJ!}!L#TI^m^^~Pl9`qEPFYI7^1{Cix>f9}W(TFoT>JM zbM$4bWGU&`=?LPE`&yciRj6vuWm~y06R&U~L4;U}0@v(xy1QuKkOjN~hXB0D7f%zZ zd1M>zv1MA8nRY6}u8(Xf>w56!i|N49Z+ZC59O0!>UPdze0l~eyhI3?RBtS-6dFjeR z&!C6dEi>k{U8o5P^R+c$cPGTQwpd$CyNJ^ClwCO9#lw_pW3CQw44p>1Y>As6?kqx; z(9c9>Ce6(js~@O`vQ|0?+<@#WSZ9DN1Cw?aV!VR@0|wviGwssOa7Gl2XV<*SNftASIoIl{}`F6ws>R!|bPIqf>CT)|@re zuwjwNacy-Ro>~KU5K|&VHA6Sx6(Y4raD4jd=1AGIo7M3Le36%b#j2J3b@8lzJbSwE zx%y_q17AHfNxEkS?F$D~_T(+LNT~;AxeNWuaJDF)r98%K_qeC>y$=N;I0EnnpLE^E zMfLY!CSI$~F3c4|(&>5dCool#uu^~foV!mGa8q<#v&b8mlOSv(lLh+$CzuS?DHJ#8 z3^B!0vVQrlyTj>H%Oxo6KU5l5z1JSNa1eRMe?qEEvmoLZMtU0It*9supkHIPH)GUT z_ffHFm(lZWzMt&$mIqOCJr_=g9;&J82!Gd*=OW#-%XNB8eyMJTD5HtSYUGQf;PL@7 z0+-gfpJ7O@Mv1fgvI*=c#0IdbD(IONSk!bJytF)~gUj?@?oz@PyDUTpS9h;?t_5Rq z9FXC*tvK!v`GH*-#kErw=MebAh7%}%+0wcT&Uq#7*tW2UUYMWbZGC?~f3hH>dctYS zB)${R#%D3~by0SA^;JRZ1wCKMHT55Y91JdO(di$${V4G#bZG&_^sh;_$v+ISIU+wg zBo}gdg+AXslt8vUAiZBBmOwdvuJESj+@4lk=XTIc631$BnOJJ&Q?2*GBU;a8WpxOQoRTQR_KHKCOZzDvwC z4vOY6gxvh-HKSzm&9Z-;u(ZwAZT51yx3Dk=EZOZr(81mcC`!=P)iqv)F9pff^9>UX z9p|b6zxLQi(7o^{O!_R)+yf53-dNTXX)^srwj)p1E2!SW7|MuHxvAM}B?mIl89wv_ zC8dC{Peb)(j}gU_*ncqb*Pj-J*}-uI z@$!q~SYZeDFfFBXGvrujHws zc^i0Mtx+&=d0;`F-qO?F(|zBDA3;Xo?Dl4R>ZKq4t}V)Ey*FxAY^PB}--Tnx+EgWd z6}r8RKz{2%z@84w-s{ay!;j59#zAFnQyTWe$mN~1k^iz12W-DZC8sC|%0-#Qn{$cH zfi^lC^ZYH`gLgHRyW2(qw1*oTegZ3iQrY^+mIg|Np!?40D%xy*zUaw4>4oG$=V^hv zSxeK4WOH%RPx{;~y*2m%V3El6`Qj!WZAX@rL-ba$+#so%cMi&1@*0%+MrCG33cAY?f6R1(;oamcD7rW z%pli0t68zbxk>k>WxK#h)b3`r?vNmjkRYrUDw2y_9M2qk?<{mCbR*5nlBg%Y@}uu2 zb;H(0@UTgBnl9fkFXC@Lvz;8w!q8A+K=PU58?%QD#3+UyKHulcgA=7=Nw zvh;$R3IWA~elI<8krsUumogGxWm9d+kk3^yRXy>8%4&|%(ZRtiyQ>O40sY!B2|4$> zr4l;ZBHpo!ESH+gB}lnoNozCXe8oVybAGm~X!6S>p#<9e{yRk5OO!E4-N2x|v%4Pt zYxuIA;kJ%7Ph~XvyE+|cOFD{CQ@b18r4=mElROswTYHK}TFGACphJ@o@Q!~T54AR+T4wJ^Qt-n}dmwVuXK8t(4IuF7PEm?Nx{ACZ{40RkP z2I`$MaYo=wv*!Az6Gw8grvoC|7|)LZz5r z$VC^NS@NOmUaV@0p!TXdU{k)nd`^6e_?2NGk%&(MQb|7>+LvJl7ooh6g%kSh@FQb`>iT*x2DfL;Pf|C4XEEoaY{~y{^41ZJ5(upY zX+Y_!4uib=?9_w8qr#^C^S9K6+;S#^ zlweL3%W*wNGOug@34k3Rxtv*qn85YpSQ84}~UbRQfQU@Vf;9}+J6*1tFM#*w_2MOz_L zH^F(q$15iA@_b+#;TQRGu&#CVxq|kzI{ggO3^Y{x{x;;um{p}U{?Pr`>ag?VV*>79 zpZe}`LFZZ1-jXO&L{LC8@5U`-a>aZ>i6;{Z{Qn^ z?60Gzo28wNq>_EAW;sp_52xVOg=RFrtq@V7b8oEAE%!8|xJ{JO5kZFH6ImCR=Dncj zHP9`;yD^abr+sM5@*tz$|T$sIAB0E1jd?)aKm+?(WmTcwxR!l>4Ujhg{DH- zLdFkLtj#u#s0?@naVQfdqNSbHvTi>2{S{>iFsF9KAh*2{4bm?c-$Nko#9URud7Mt8 zH)`xFZjjJjHwtihyk!k-Eoo?)sOddUX$~p5tj);TgOo<#0SSa7I`cfj>w1{=0E3-b zcS+OZmg@xQ4I{IDNBB@feb)BY4AUcoM&PbDps^limiysaLhOq!3tdt#491TFldzzZ zk>+%`n6M@1I4C4=6t|k0HcB2# z{Ty38ktzHk?{+T^+iXe(Q(2AA(N|4Xdb|K-X{fpD=_JFesVH#m?ZB+>-kH>&+o*o` zwjC3_x!xz4d7SY(Wk}bJ#SlzWJ)us4i_Oj8clI7-b1G1Gqm7>^HS=|Z%!03Z(Ul9q zc$5O?#z}+silY~izoeMbQdd$#y{OT%3kQ9F;>{Jqna!QTIP7|5V82%~&FlZuY$p6W z?)!}AI;>B^iu>qs1m%h#e%76>(i>qH9k8NCrbPz#^4ySSBj50#U&lbRfP#8>Swq;j zPQVaU1;YSs{Y3s91x+qEaCdh9y65-UB1NiE$9_`S@i6GsFsOg|2ER!j^E1qTW!i$ z6rY%DgT{NMyWcTk;}qn;#bdvzNg1+7)4$-7(-DL_6TNl$Uh0_!ZNFluj3u)uz%U%w z+H)VlsKzGcU{}`HJ#8J`AzjP>TmW6s_txh%`% z^yH)4X6jtdHwbdU)2nf*>1&1QBHEBst%#5@hM(&++Z+p&hk4Sq5;LDp%uM=zcVKsOy*S7{viZM63J7lf)}s*;bkL?kph`NEy})S${j3LohT-H328|3@VQ~^YqP?8cNKW&_>()& zTV80>`ABnjaN}Yq6U5)wQ76u*d+2!h>>!OR{?p&T7>++SYzp@r_Y$qKposl55gzkv3`{tTwEFi>T65a>y7jhS9iw)fH~CGb*4;@8c@cys+}2`sCVhNQDb63@N#rd6=*ArXo1L|% ze4BT50_kWzT9Q{I#rwYcqTUd1*|K^oFpS@L0*)Ehe%CnthL#}m<~7kHn35VB*$MNe zw1WTFx97U250Y#TTyL;vFhumXzrd%zJ!?L+-I+JZQPy}rI^9&iUeweIJRPA|KHbcz}=VWT<*ZC}Y-U$a^p&RpiC~RK??Dh~Lg(2c&Eq=C4kOI5#3@{O9(;7;;Elm%|Sy(mz zcAYex(W-aS3~v@C*mn>M`3y|83n`}zIMYYBuG<s7lh0&-=Iv{mSrn8Hfcl6M~(mC_z^euXZzchlMB_fsqkZ_fe6 z?-*1@c__-ce%KqY(WAzDX`$?xDl@hjgHWUT*pBi!J=eA=&w`LvQ0gmS z$<*PTq%vNU^peXC;zRI|>74djZwPL1s8IOnH7DUtgtv$1m(xB3xz=YO!oKM|RKUrdM^?B@x=+zdcP@f(^ZxfnKFuU89iEZa+8WzBBZ9T-q+5rs$jh zK8(z(9PgagtWAVh8nu?vaz*7lw$V4q+0Z!!o4uF+?heW|hy=YG4b`}5VPdaw9yk5H zNL#w-LGSF`E_=|Mu^XxPUeb4Uo2RvsU3uLtKq}?NddBAE5 z-m7pDt%z1TFF1c9x~E+MZl%&w6R{e0GR$CC8uj-5Rgt>4{pW8Bsa4+etUhzHeLTNP zCA*5IEbu%+6$irxHyb;@EcsyA$Vjfu%NvCWOhY&He!td6{3t#mEoTWoGsf-(wfMrG z?x{IBv2_7M%g^-k?nbv>NPYMI$WBo`hJa9!>j|Ka#IzK{GF_x=TU?Remnz~Sigf52 zUqx83g`eE+H}$JHQ%hvt-uoY`7aJ6LdjsGm-^N`UhwiwZdOTv9884ikL@n54j;Dz0 z+Vh*cq*hYa8X?KYNxE^J0inc~UiQ)pTVvt$itv5U1=;s@PKs6VM~w(Tkr+u1r{iW! zV6zWdca5?#e<~xuR0Ko0hibUp8VMveULGj?di~U!KcIAx$f|}OCQ{Yi^C6#}b+)S` zh;z!R_%v^)Up1PlH_lzYHNHYQ2+Elc6DJYdcEP2T`AtGBm0UURH-IKuGZ|F8>2uP_ z=u0Q{o)YUc#jI4)hq3Xq8uIo4Yl67D?0~6fKCpeiaK(*{~4!(`h1OXhQ2G&`Par2&wlfAK9@feTF?uRSSZ~x?2pddz(_xa0n*$?MJT%r zT&f^fe`=(=ZlnP-Cd9a*uH;BLto8;vOZN=L6d8Gl1tTxiJ{yV?%6nbAeFpgF8g*aX z89A8Sr4cU3kr;q^t4@r+b$3ei(LJ>>jjj4@$wj&!`Rgc)saW91Bc53 z!xjkD?*aaR~~96>2W_>=9o@o%o$3ee@L9__1Hu zl#Aied-O{;$)3$e2%)R5kX~NbSt%jo0`5Ia9Wwi@zVSbQq-FAB>o3qxCa{{eM=>h0 zPf6BeZa>ZT-F?2^JafcvW~szJ zsCJeNJ_3#UusLA~8mWp$j#k5<0hvtL6)#%-a!>WYYtsxGYs_-b8G(Z8+UWuS^bhwL z;p|ub#Cce`RQ7Rb-~-&TwPl+kLsQlM=Qk#cK68mzWRwC^-d*)bF0^Sy&I}e3ESF)o zX(DJ^Bi>qYf+a4##pMQ{VIc}s9>+7-X{00g8_w$3214F$m)iS7%50KXkK@i$WhJ50 zy63`9lwTp1XUzUGi~TM{vnI*&hn3q=Au1PpaN&~re8rJZP40et8&6GMzu9jvA6kWb z-*^8Qe`K+1-ufH06g_9WwJ;b!LBN5;EfR4y1)$tu$;qTY)p-=@1Zw$Qt&rY`Z?a81 zwIa@~0x4}=5IQBXE;l5`2MgbQrtNpZlZ=UJW5fVy&SRD`zoHcX_jnbeZ0{9LK0l{44NJIz!q`xu!<@ibPt~#WayMjm;G(7eb9sUMiVcx27n^(UZui+O>9&IMa--GGWOCzmz+Nzi zYh344*)Uz#kgXctG-G=+=7ywqn+@Z?Ett_7W$dSw*1tmp@BdhM!P-WD9eK*{NF}>52WD{;^Hh2v-}gw z;t(^+Mo2Wql_9^$(QZtfs2nBE+^JwDoVxq<4q_rwXQ*`tjaojd4R>eIg1CIPGXJ)k z0my>qWd)te;L!!mlmk8 z$w(hlp+Smbq#g-_-|S)^c1~?)5w2%va6h{}F1mK^GF+D} zU)qh1WgQ};PEMK|J_Igns3s#pUR(?Z~QWNI3QZF(y?vG_1WxG$Q0bjlDxNjz} z*s&D?uJcJXE&B>F0SB)lH*<|jd&`iOGmem^8iz(Xwt^L9zh%f~dTZOLR1(=$GGGSH z3g|O2Gph6oTsMe2cU}W`YG4gsn#T;G+=qM_Y*l(@=TW6&_mftXi>}i%vq@NK-|)r6 zR%O>s+hbK_+=M^U3U5k+$=O>%CqnF@h4tj~>tcV-D?VwoQ@fR3C!fKA3g=`O$9^&dmiDywXg zpXyf7l8P23f#JR%4V%}&8^6u!eb6K}pH~;H#MJpRJHY=^*}#@w~Pv`P5{+*D?!?8ZDEITTHi(4+yrZq)! zkl|0!2OIp~VKk~pP}=@%B9C`9%jHFYlUyRp?BEMDJwFw7 zGFo~c;G@qR0tgSqg@n^Pk$u~KKS;MtJ+YsAmZW@V6(UMv5qnAyT-*p6wJ3EAqIs=< zWwbkNY^mw;NHhprZ4Z1M9$S#3AeWL8pY!6m(b$S@j1PFgbKfxt`|~B2WXdqfc{f0x zuQhv&nDrA_U#sFX&tXuoB0>?O%FxqCMH+R{d=)V*soKr<;rQohvU}j&Yk2x&!Razr z=Fa#KQanFr&FjLVRH1xhbtFi6;dR01nRm0!CbsIC^x1Eju5i#sbslQ`1CYOHg zTjSEKE1=>M@zWsPO}ez5Kj@DnKHsLD!a8u?Jg+~8Hv{7C6OXQ>p%^45&g&w?ShQB_ zmI6nKHKO@filx2PsWWV#M>LWUKgg2EjKY5V+m5N7k(3l!Br}<#%2qCU$~j_wfJ_jqhxQOo�wo?*5buvMm$74P1@jA^)7 zCMqMamN}itGa8zM3DL2uIZV-+Y?Tvri9oMS7;}+DDqU%J=fvz0GNx!#&{O1al^8_g z71c0wY*Wl+KaS+cHl*_R9E{t~a%aMI$)p#htA_7y%=~!KFel7mfIqZ10+F$$ES9Lc zu!(4{+$lUR9=z1uJ#KNlo9bP04VL}cD_c*^4D35Vx*>_J9-kdp192nvpiz#X8BeBV zicV{e2w&QC*lU#R%oJ&oO5cnnePLq5GB*Vavbd-<9m^|<%by^je-#`Sg}WfRDYd-1 z3S~B1p((c9@Q=WAv=ll^XNP*3Y}e$27tEpqofsmjZ2yX^X`{pzWY@QmPwbfM*3YgF zPM%cz+K<28W>IZB1rb(+n4~MkYL(nSqHgNM;LEARp8k2&J;749>q!z!T*@N!8{mF- zOyDM)xf-!if9WtNIla@BAYZpv?lZXJ{MOK)RyYe>A_ojY_ot{}++J!Q4SGHYUaS8fkU6GW508o?py}vLdg* z&}JTAa(UTFkiJFQB^N2gj+ViwdAcsRNuNBz)$^@9?QU443f4ZILe30oOIW#W>DFZS zypfk+vh_z9yh$FJb~+I`%eN+BB}LalW^#JAKrwPXb*JA*1Sm`{LGEqMEDyRh|IsK8 z@~?5)VrmO~7rHDnZ7-_9GeBk>b@5%KvgVf@rES=1ZTp~qHq=Af`a6AP2}xKbmDJvU zW|wGw@xH*31=JzSVf%@`s0fcitNL2~8bG#HGBt0lf=SXf0{OANFuCs#4L$}?6X?wC z+BD&64P?qF$#nh&a!t^>GkkRTbhdSBldp|4U>T}f`GEi&U`mwRJ-0V(V`adL-o^^W z>Q11}n2K=qV{%{}kQM@&jv9cZN^)dJQCoy_iz+wHp~S=c;cv>|d8(Cp zmDT=zovBkL4Zdy#qbWSmvtZvg9Wu zAR*1QMaj%syYkA4TKrz_exai@;qnOm$02zd6L_ujWwd<0hDrMlU=>x2lWu(#A;Tss1cnb?3_g-gvkZp+j(t6 zB`jUUNyio1Mbp^3?Je-=ZE96ceLinC@VmrY$}=IJk~TWx) zT?q0nDgqjYT&&+Ug7}Dp7rAR7EwAM$>VV(u6*rZ#K8bGHmK}`&e~8yEVWmq#G{_Yb z8I8356pK19MiqNn7u?U1CUVWg7h3!>)m!A(&!je3mEv~i$~SrWk~LG^l!lXqMjJlY zW_@Ih1fH%f$;x+sT`a9F9Ojpe|x$L9c`zJJxHb{b;XSp<~ZOme1!rpuMG>c@gZ*@Y-zqdjl6< zJEueQxQ4(I1#U`BGC#ol+2JA2x}4kr*^& zj>%+KeVG4XP>9W&m%DoBVb2m2L)9me*gKH+v_XKF4Qyd2jG30UJcycQKT{qTW@Q_U z?ilCkUtktX1=gFnWbCM`p(W{a2ly=d4jU1{YOGuFq-iQ=>JR>X)W)XrUT@XwkT22H zIE;!AazW?WZBZ%J9Y4Ki@qiEUA?!&{18RJ%)-s{-Y81=icNsRaX zJ&mq+wm`t(Wgptz)b(kTM&g!PFZcBrrCt%pp0iJ-$ztE(Yfsbf;_C!=_iv?EP^pEV z!S##y2&nveJo4<8;K3Q>RhFoAFR|HtMBW={@Tq>}>B+t+VlN?g zbJG~DA54Yw>4dA3~sr&j+^_sevo_VF7aDn;QYWBybYvvow^ z4BN;Wa_?;Rv3hehc@C^#OOlr2!Ua)f7^OPtvG#n#E~_>bX-`x#$}42>Bic)l>lB_j z2jm>}lEP;B=KM7L`3kf!lkL(xq!t1zzR8nlM#ROL`VrC8%+L0xPmQmb#hEeJu)&GW4`qPl5yl50)Z* zS{(F>k%BMh(ajd(oji+laLc6?U8`lQvDps;(zZ}6FI6$G%)zIhA3=m;#hY2)5Q;q~ zm#7c8V!7-2Lvgb#x5Di+m*daXcQk*^{o@pn>{-K5i$vO9SuhJfYIy}oly<%Eh7)vW zWX@66UR#lKdHAOEWab?F(TslVQ@naxp<&hXMVVyY!3(oqp~NB4tx1jzkh^f@tzv9u zhSyIdPeiRyRB*@GH|?bF1fS`C(X5D}A3M*K79i>|jP#Elo>(~`O^*2d6l@hnUx2Ax z1Un@=o=Sk?@z)$oyM8zRn)X^;6KiI>?`rbR#9xFsa3&4Mv2zvtjUR_uB6~9q#&M`Q zrEQYqk|ZlRkehGfhr^8$ump zN2HtJ9tow<_CbhVMhu$6I*}}HI~zUbfAKnnqA5LhKXa~qhwKSGFh{b<%?F@x(xHCq zY{3p$FJQ$pp~}`;g5~VDJbn%Z*|zoV{Knn60>fAbu!{s~mC=z_$}OT* zo;&AjF>9I?n3kdZX*PAHp(ClS07k-q(=-U)3n_77vF=>NjbpU!7LnoTT5B-Sb;98- zIK;D~J3ipzrZ_YYRP z0Vb3mPR38K?rNZH7NNtK=Gr=E=R~G(_vV{!=^@_?V*H|$(&@N2}Rw1 z_$Wo#kjX_kZK#r@-9MPmhKEXn3W|96M}L}z0RQ*&&k z?D°Pd;(M4GmHK^LELWx*XX342Lt++|o(DURH#&pM%5OGox%Iv`V`i>lv0(rR~& z0H!_$0khx4bAvCHVpkXq&xd zxZxkagByc7QRmDs>gH)sPq{z`<=x3?M6a}@AV&zD$|)U#=Z_5CG1*Y%R}s^R-=ZL* z0M;N7xWqa4CbkgCm2g>jv%Pe*#g}Oa1Gfm+pAyhBB%s2!!Zdt!TH_@eVQL9*Ao+UoA5-PS&1Z zqtlz|9nvV=t1_=k^DvaqSRx@<+o(F4PQdm=5pMDo+2I4NStQYOol$vESylThLk0go zhR%Ym4Xj(Ew=G(vNO70q?u5V;cXuxq+&zKL;O_3lU4ugy+}$Aw6bVi!779$i=kh1c z-shb6UCS0!=lIY3jCZg5pkH&VeL(9cEcaePcs{U(2tU{plQg4TB|>}>1au{qf4n7s2|}D*b7igd4)ne= z6d7VzY>8CYSYBsQ8_9GzN~giqfTHOttpTDFsXkU}MVrQQlM;r&-OPzE$xSwoBD8vS zL^fMy@{N8Bs^SjUr7C|uH@W(46MM_==AUS5k1<-1a}VuOO6*sBC?utD1Pm9#r*#Q4 zPD|Sf*%c$@YAcIW!s4F>TS2$wFzU@lda(f3A6tX3-ujhzC5a{Z0OHQ`nc$rqO9O&O z{wUqDPQQtjQ=f6(+=0aG-~j+>#O$EWZ;rC&D~E5HS)AS1;)}@?!VI{3^SAQg3nXqb z)lR5h|Eoz|n2i5r2|a7*M?tm)Gr#U>+<3bfmahZ$P>H}0*Rd39>Mw>c*irk9mndR%^g!&b+WaIwEvd{Mhc zg$v$H;dA|>HP2%6f38@+k}&(P zegfbUA-Lxq_oKw=du1{!#K*%T973_Mq-J&17jrN;l6Y@vF#?TNvR4aF5#E`(ms>={oFH`9wRbR%AcCf#D!g>NUS`6H0&Pw zFPM^MC0Rd}9s;SR!W}QN+0A=DK`?CzqfQ`$MKK3(jzi*fIXf8jx zHBOFw?TntQnv+JFd?`^7rtit!jt$!mIS++4PqdE1hCpLF1Fb)y>Pvq92OPyx|{&aD&G)0K8WKRO^ zZl!z%Eig5ttNB-E^$Q9pvq&8cTjC{uEX_f*haa1UNzuTUS45QL&gq^F?|KimLaFe= zH@2#Lx1+wOEg59rN4blxl@j|rkXc_`v6)`Ww|!A{l$`%Lu2Uk3HuCCB?Bv(%*7(IP zqj8Va5Gy@O_haR?7U)~Qq{{VP;+~Wz5+;AV=z^$jSv6NvdCG#$=;NsnhDvErX zINPbO)MBfUqD~okZ*1HL10HZ#UWW9+s> za>t*X^HdJ;Fpm%$DL->2*uRdPoG|gBJoq@c#jdhyN5q_4EzM+`^Jt%%qOGb_yMxWI+pcD#MdF_jh%s7daEfB^nW0n*E~T4(e}RBe z_d1@djkMRHdUYd%BV^jTXR5}`(j=hznz2g16wjAb-f9D7Q&2sV9(VHcaj)qI7vrEk zwfW=ali$D6p>-}*5^LUK@#eu1Q&|$2A&mdmu}tRxc&~ytB}uisyF8}cJ57K_r+~`E zj4#EkjJ)M)x{YC9H8E)iHzWp62a!nLsQ9-`ZwbYdz>qw|zMi09>XL+=kM|NPjwJOY zYrx;PTMb6FjqFs4@bxYrk}48_hYW=d`8BDruaVOrQrAg0cB2r7GEHv+W zIW<}B+q&+al>JAaJkVlVg=A!ENU%d@M_Y+E&o2P!ZS3s32hdti{GX!J%~aI8hNJUd zRr!xH@g8*5u`ALbiO$$O6|KP0uI17X{!9b54a?mo$Me=KC3^J36GE{v(pWB_#`d&^ zwP2yo8jq8!TwZX6h&`L}KPJU;5elIc8NZWDIP;p}Kw@jL5^68*8S~~;Gj9i_{y!%? zQ&PGx*Ml2sZvmdKTZ&9;to3t$w0;>Wf2!l<9NJej?yf8dK-*? z5eQT`InrfhUg^8mcYE?C(4}p$s|W3T6L(ijmFodkES{!Oye+(`rft+ct7Ag^y_ZD2 zE+d|n0+K*YEVR(Y4ul^yr+mThVDCN?>R0hGq2hz@2D^3V@D&>DoJcJ#JFq&Bj2rF# zpYQ_Y(tQ3w=DK3TL?-WiZO+*tAjZp>2mqEmKr+laFi&0e9?}yubEiLtw9GYgCn(Er z6<+Z-R$fc^OY9_>HWK^JV!Va^d^fo>&hqKC3*c6VXkl)n#zI60RLQFwhFji zYx!ncT^X!$)=s|pBKaT}RkW{^482x&t|D(wazwPBwf*7J|CXZsaffI0>dV=3czp)T z-RA=;2d83>*bc-_ocrc$zcmqjab4((wKC08OW;~yc-Rq5(KPbn=K|>PFhA^^q>Lpt z%@b!4aFFbA_+XI$X@F>%8;w0Ves5I%Tv#`MC{lt;+#3+OD3Z9zEZL&kmS6COcG)IF(g^~TP>I$CaVne`qgCwxzqlnn*b z&`%n7-T>0+bH@P8Um&X*vBz$Ws6d@6A-+&2^3hkUqBGqO!`W)1YSLq&$YG(`z zTy7ieeQav$yrC#EjvZKGJ00ViB8sQ0JKOeyHin(ZnYW%6wfn-9mmKvQ$O6hQ5d$m`V0K|0vv4{5MFehr5b!g@0FRY2k{TGCqLiLyO> zOC>&kP_i!azX8BYuIhy?TT*%0IepltX0vFOey^9xT0f!t%S9>Y++V-4^a(xh^-hR5 ze$*rX#$u(R$D|O>aKd=|Q48ou*s+AgRPRl%Ats^aeAF%}yy3LRi;T&NSg&fdBOs{( zh@cvM;!piN*W`4vc%cX(beeu>8u9g$L;T#G%OXSYDu;V`3rv_YiKfbh*%sU}q%0f- zc_af5Qsfk#Jk70+Is1(ZnJCFeyqx0?MP4a3|0j!#c0CxVw)fu_Z9v1}%D{%XB3jm; zJ=|HMdp2QEvs zcu&$1Qu3)oL18T#j?sePshDlpV= zpx9UzdZhhVQ0^a=;{mqE8ddBq1A)g=?uG!Rc5j=H;Z`@DZc};=BrP5w@z4KM3xCj8 z06R<7h?tjiGP;f($$@%4PLQg}EmbQR&cVE^af=q00){S&p=9$$_X1QIw+g==EHcYL z+T>W)6ONs3VGyOHF!E{=Z>oc|e!A21O4jK*X=kDOdk4IdN=YyJfTkjl+elZ>Qk8-a zx-|p)FMhW7@>O9~=?L2Cxx(kxK-IPQPji|R6+Z@itQ5x#MS+}-9C@Suv_65$C0JXc zzPpd3;xabH3%ug_OZDr0UfR>CmuH;!FjtjQg2bgYOMjMILV{7Vbv=Q=_^vPIT^AP{+!8kAN54T}kfK zr#i#%Vo0SZqTk+)`YRX}_MQ0_DJ`zcXna5w*0M!F$Q@eXu3S}asy_yH+kNEPm{zgb z!g<(=#6H>*CKC^}S}oaviC)Dk``r*qZdVPYT=C6A4(5mbJhnM=1niqW1%E+CVi)>f z)g=skeoe&OzrVAJ1jc@yoOq|7{@C5(R|fep^lREq91)ERziRJymBq(uNn4wsPts8v z4%a}?6uS#d8B|uu)riG!HL}Blp)p(Eq_%eShP5bvYHf5`z?DQvj_`+ln~Eig#hdSf zdVOI16ZU&6NAgwIZ;A0@e6x;L*XyQ^%`bJ(!SyKUhsQwjK&!_NNkfX57Dd3B!^dR5 z0T~OAU^yWzBd#Cl-MZ=1UheZ`ff3EylH0?WR?eDsN~l?k>}zS4&cWi}DO&kD*e3Q+3c426%+WV2(l%kw(w2lD&?|KCPVkj@9BkWGfoVe z8U0rOr^Zx0LfUIhlDYq8ePt%LHt*6Dz~|dbVV#!*y&u5&{AE2?!>cX ziRBW9s(N^ZajXi}&^N=|2|b0Z4zrP8$k`xYidPj>>?;`nSZ7LZX}Tw~f^+Z; zHCfmTe-uhVtgu^YK)C6s*8d}Atakj>U%f<#eQrokUtxofT%7J`iD{W#KJK`cT(I-y zO~1f!Vw;j3ok+MM{Jq?wt?zy9q+km+mOJWWCFIeW;n)xV`MbVx?6{&Hu zjf&r=6)6>y^)IYt9WW|Isk9Sc??<`UI|gDENJv=Ju)sWRuN%XJVk{0Fb+ zlMUCY&D?2La+K(-$(F4x#F?c~;b7n(o%HPL8LKyyy1&KR4dlNLgqs(#$+ylpUJn8<)ti|WO$1_5q^XBBo;wLJX zdmqnvNdqZ=qfUwcWDnaR>Qu!+Asn1_w5fZBXcsg+XPaV`@QmMVWs`Lr+ii&O`bn>m zysS_PYwu6HlK@dO4R#a1$(L5xkiWFoTiN*Wx_|{})mx~NVpNyxA^Gg@9D}7-NVtDbn67#Nw%1mtj5!sk zvu4c{b8Qp?H+gVuZaqNc$TPKwt`CQ>jlHkk*e{An9~UgOwLeI!s0GEQ>Q^oXs<*;{ zvR0M_Sjx3jy9Hs=5K!}-ppe1UaA$*#7u$i!im)3(Z#*aR{powHk#{Xq-GaG=zKnrf z$-1fZS7vt~f})&t-4U2jq5o=WzCBR4x(K|fLT!qJEH?x=Q)HSmK6-b)alUGp zi1=U#RGFWwJ?Do1EBxmZlbCNR2|$sMW=Eq|0_xmhlt8x*&_W+~t##Q#P z+Dg&_zaem>+t-=Mkh+z|)164VWp7}a$d|>AJ^EKfqpoDQPvWXtKpb86-E&bUMmPWH^0yhRy@e^M@Mv(oaLB6(oBheaQ)h6oCbCe{Hg(o<{dsxTqS~GO zeIl9KIRm?_B!&NB%nr>cB&q^Z;LHG$Y<2irSKG0@A6QTq4|@<@Aa3C4f7D$9ef*yL za|t4wa=VAjRqwdR27f5<5k*gY?ZEM@y<9owEQ8mk?Fk!yg2`-P!WC!4Gz>TvB40a1 zc!@f~>}DYaCE^=`qz<7@dbSF>a3A;Pz_RSHFZ z=UAefa9&?_30BdZrdKvQ=>xZP>%SXUlStz~NQ4LmSj@6n&(m}Xk1HM~UVlm6AbGnJ zJwB=kmpI^yImyP?)LoyGs;GhhGrXd*tWeWr&3$q`lNDr zGv{qxC7l+92BOAmGnZ0h8?4QutV!>WeL|+cr}$a}4;CyI4O9Q)-cxOLmnpq}-gLrN z<&rjBu%TL67vWAw58)(bW|4Fl!OId~8=3%T_M(gsqs9YJ zYA+=)!Yw2Rcx^@Rq*g_RtsO_5Yh#Gi?tSNbK3MA#5JLI_Z3sTz^;A6g;PFDuwBlta%T`H$p@v* zuFlQWBPlFX3+kMP_N~xz#?7W^nFjaxm%vD{&qkPM_ii=w)JNa3#arJ2T8B*@nYExk zG=+9c55xlU9z&12Lx1q~^1QR8r#O~LQJYm#(%8}vqCda2`Xhxx;(2yMIcJ3MQnj33 zdHsPM>F%FzhzH*Ad*#2ks3vF8#mP8lq~M^ho7gRxKk1i0Y+2pM$Dki2pT0r?f23WzRwF zm>sxxt{TVcfWkfK)Sb~eQ0Aa5P%Y+d96XWp^CyqBqbNjlBWRSp(cFHg8}HOZ(+o!` zL|PfOd@(-)|8fvNUmvVBjHEZ+lnNQEiD3L1ySe&a_FEonU~0>iKsE?IUNinN+4?8M z;tu$&x%n8EL=3K+TMxzx7l92u)&}AU7{V_O2&lGe$b8(`guplWvB44Nmjsmn6?wK! z>%0#&sx}{F1%ywFJ|Yx}65l{}mW7ifOb(4f7;K}GdT(#yw-S`qE-QLpi5EysVMd9Y zO}TBH&~QaP_Nm_AT*W)sBW*2=LMW10?(xUcCSY?j~y@(VkDLXs=UspFMg)lglunu zi9VvhqQH#SF|DUE|I)XKGcN$_%QDpMmP30WTEo`?dAUBEQhjuoWOONJ9f|Rmo$OUogtS(aHYB;Z0C4T zTSHaH)m>Zm!)Op?#QF|wc_B%uU}jD-jiFKNyqd8PRPv|Onx56NE8+s z^5zfPd`j;!cxg@3*)VCr0`t@u$Xk93+QaaLPD^`3v4M;Hlp*W{_iw^nmSPN#aNx5s%`=TkHf?OzbMWrd!&d3}!rI*-#?%HlI zsOF@;;q%tOyZPFV(eNx^W9Ap(Zb6~lCMOSM>+C0U(@wijv(eDO*cANjRI{P6qjSiQ z8oTPyWSgDJApVi;kUPIOsJo(MyaI8uck1lq!yNCsdnGpS-K%v4!|Wo&CYK~F?sTkC z!$!N_P-36drPn@7G?L?^`HtcC29>t-tO23tzxohZY&~|e@2Blp&%NYT73SaNs?9*_ zqV!A;=^at^{_Vo%$~6-XK_`axEA%B?%$84G`7+7fBLQV&qgy(@!w2F%r5diJvd2FyrhA;HGsoLXHVYH} zUcG6zv)&>nAWDi~k%^jD$Vl9Z5~JPJ1GV&rzD8GB0T0j~MK>q=VK zF&@Cx**TgYr&u!F$+cN(f)Zu!40$e(`lNxZjlPu*;wO^gU|QY*{^IaCaPYhvwP+_W z6{>L-&zG{dfKLor5+)iH1Wtp8w@OZ#MI5NC^TEp=cO@Dz$bB7u`P#GH{FTp7g0&#A za+UepVXYJP8ObHGc|*vRAf1M*U)`)mHbZps zRq>GyD`S&Ht117cuHH<9B`s;+Hj#kfPEqWZ;lK;^z%FH6#nn?_${c(|{;6d5^YtyX zhl%tS!r(qhe0e6bDJ*dyhyZZ=yHED3O56Bs$BW|oegg2wW7MP=%Pi-qlCM7BBdTf;fiWJR7ZfU-4k@; zYhD;7Yo6-iC7EMASwVhM!YFP&Zz|XJlA7em#0Hu!Vrp#h}wl^>^%LvKE&K?vEO@jsL_MChf z@ccD>dFwzK>`bol9k4%Ds3-&3wHTt8u$HCPRDyv{#q?+bDEsT6_vAPd?z+?&#I_wuj7|5W*H2vvIz3T`yzr5Quqgit^SjEV1|!^@A$WOl>$O$iku_tvr7_JIx1q zEV4a#q~z#fbB3&QSy?UmB&G@l@W)mTw!b$JnMWybU(SC!X)|$6-&f)g?fL8+gwoqR z;b~8~bRg;?hgqJSXj$X$@^q^$4*)lSUIBj_4qw_F9ia!#zl_BRZ`7$miU@t0bKQJ#q3l-0rb|FQn2P;YgC@> zR*o1GorN7$`y5t}nXo$}OH~gOHPlM`u~>Z5dtiXF6J?#%a=m1W_X~OD$n*u(l@KA~ zsH?y!BD~wQV6g`9nMe!q_GGZp*$#*i0dwFh&8|Iig<8N!+kQEh_5Eo(+hA&^9v=x@ zxDYcce4%ICF>ebw84P*qH^U99?UP>P;7)g7qAS*UexWXP$9&PTcRSRC7+Tt^^NUij zSs!lY)Q^?;pCj^i|A5fY?a@$k5&m1FO{oXsI4|JTl~m{a*FKgZXKx3&F&74S0KosK zQ$-ecdb2zNC;u#o*IUSmqIHtAbR><@Vo3J0f;=GTmdejPv=7zw$i;U{^hc-6Eevk} zB395(7G-=CCFDy#8Hh~&7l%SdDjg8#A%)71YH37O3J{B|onG9V_Cb0{9F$XO9yky! zcR#0szM(StMx7Sk1bEH4DQaMx_GIRw$CL$AJ@%JLy|u{9?`#$9Q_`2wv(VPc{QK-= z1F_#fZlpq;{Lt>xQ%nU%z}z4O=82~i0P?CQ@Ex!+SDSMd;$wXQw!2kUmz;6T=J$w- zWh{jmIwqfPeMd4FMu*dL*|GPwqdEI{))GtQ{|vwZ&6`vG;?{#a2n505k|N!zDBX}O z7`ec0h-X$fRuTxi!Zn^Vz0l%N_d9aA@+Ro`IWmd)*k!LBURZzhsA4)1I1U6kA@tw> z=C{vKHQ6qqoLGewrT6Vj5J~#afHXrn{->gKQTvWMEkaUKivG^*^Hb2dPqKN3zm3M8 z-I#;i8Wjwe8Pz>3fj9QT`|- zFh@`O5w;Y|Vou!=zo^J>u4JP^^I0+MTh27zdUN#ceBksIde&LK&yTKu36A(fr`FQl zAE3nA%5!1bulCb^_n`u2tdq*bo!z8WA-O$z%yKzHqb#};gMQrnp;#*+Yq~ppfu12OiOTE8OtMsM#Z@xTgUVDF8Cl1!6>WSbwL+tC|w8(I; z0h}6jE##WHu5XtdiIH_b)m);YQiYNqTawX@qev*#`G#yg^}$*B4eu8NeyQ01SjHwD;t>?U*V zTV6Ugozo{m18qivu+p9$Qrd~hjdIbahNY~nsX&EM2 z>Fm>QzRX=}9C@fj^%-5t?K6(B4s{(+KQrnInfw<#&E1t?VJ|bPmoycMj*c;IQ*o$7 zW;Ptn2YU3?QLE6}VIgwLk;ju%pmrNcY6ats$~!`<$G{6Doxv|Wjw?Tm=1=KO=~Dfn>kptzDSa`D<`lKp} zZ4@+MKXX_#HO*kYgBgth_`J@Wf+ZxbH9f9Pa=Oo=_8NTi%QWSpal$JiCxqS_E2chA z-Cef{fjJSHjdj_S1#-QnpfYtsHiT zT-_75bLL3mCSHp64m`XKwEVGrasG_2mf^V5CGla1uOuaTn^YVlIjV<2$A_z$IC@I? zjoW1^|Gg0kAxqc;yfks8x&1Sw(jrt@T;`lATvwWx$I+S6X0MS0&wk2KKm2^IMD7^L z%jQhPEp^a*YbLzL0MV6c0$$Um-#3)=9Doy_LcR_)UxvTno(pU(w|5s8n@V#4+B|s%0({I zOb(9(2*@8CH;SX;@T0TEPITWd(ZWxcXo#*8b0(m1G$#5$2Gg_4f!}o$YD+iEOGq;jC;B?!1mef?*H zLG-kPFFc0TyMJ)E;Sobd>^qIZ=yW%Mg$tn7vD=X)4SMgNR_SZkci zQ;BG|YN3#vAiUl=G_WD~`TDoDqi_eBIui90qJ z1~V$uOM)oFB^~{OZTXrzXfvE>7R%Z9+|nyL&WEZ8<3ns%LOon=ocod8yp-FcvL6_zbU!~rqA*dY#(`9BNuH^Q zva+;}kmp=zjfxT9&b+<&FPNK4&y~* z+;R$XMB+^AljEx988MB^X7(7bnrcsC`?jRO8ftI7$vsLtYVh5U<8wU=SPyuIU z+Okt?c~?jKJ}8Y;VvmtV6lwX0V=>q}2>cx}>vSxefZ{_?=K}ap;1IFVQ+`n%#Kftl zWXQ?4uAtwk1J_N;Tgw~x@`~uEGH%0xD2$^y5MKo~kpox#5;b@r?TXS8jN;G|IIQ5= zz<*2Etwe#5CplB^C=%@e>LJ>BSrJ`KNdu*)13LrY2&KG1A&eP2D__(}#?OX^oggla zyrI*zcHY?=)1O`yc)oVDhTE_HACvlnc?+=Kwd(rP8_Fa{A==cU>+F|;R;44GVfk0` z*+7-jZB;V2H42!pPZMBq1g7b)3SsxG4?17#>sv6K23+EOiuC=eBo(gFr|@m@dy|p8 zXLKyI7`Y_znM(O7;Gt@dAUGGJ_`yomsdv+1f>StOR8UKWE|d9b`@bJl)@P2xeeWj4 z#5d25@`ZzfKpz2lb^KbNV+xKXZHtp8EsNGTy(Qd=?{sCQFB5$i+MRIY4;PXuUxn($ zMI6oFe_U>P4e+f!xS@#|i88f`wNdtUrHZWghcPTp+gmr@*o=guy#Ao&N86(H?Kq7W zJtYUG`z?9&#l@psZ)8|ZX+G;Y{T$tF)r9zs4L)Mb4#bQNCvI^m=F(d&fm@0<{0T;^ zGI9RL8fKaFq`=(?;rA-a4++cva5bG9-YV}iL0Xill}cw+{6PJfJyi!abUBBCaKdPB9qsxfr1d67cF{ojRT53i%G^#e)I8EK4kPQx1sEmL1YTeh~gCNnmE^3CP{2b$uf^+`#(rY*%N zLU%sJr(6BsQLZ3+CNX+A6$eQUGf@-z?RHi{*_vx?-itq_A`jaV zwmoSRj#K&XhdO->H@&kdxkeTo3|4hW=H2oCrCBzC@9jbIG|3!XxD>DF#RLiM6og+! z>Sb;JVmmRF98CSj>PHQ#44QpC^GR^02Zu(VP+-wFr{I2~m)^bblAWGQ#I!cC{z7kM zCk+6jK1qiY1ZRP8xq0zrg`F#k5=@g?3zH7QNc!8B=%Vn(wtu}Hczr%Z%Wr=aXp0RC zfvL`n?YP){T<`Ygg@L|sVqq!G2A&TCg3f6r`4W^=Zi2jBp2qDqZh*XQL9X>}lPOBq`VB$KHmoqi?Rm_{q=Ajv zBfGVeh`mQuN&_X_}H$)O!oM|iQ7S5JvWKDx%~2hWsKxto8?uP&L{c_fWWhL!?O`wWzb z8Sn?s8@^>^mzrEvo_k@YG$yZK@lh&U!nO77fj@2xH_fs@?O%f>IBb&_0PwA3ZQaT^ z>1TvvGOwRr%P8K?2LQE(RX;+39wW%s9cQXtqg+i^8O<>7g!(2FP=UJM=oQn?No_jd znwG&{>$;uqpkd{0XZqmsE7{I;aD-V$-i>Vd$sehMp>y61tN1-56K>j8!&E&IyAZTD zpSMyBG%9mt2PZ_=RDC4*W&uM(c3G2}hfa53m7W8~bDX;z7F(_4OYjsAb}Kqq4vOKM zr+dY}9y=yCR^Lm{{s18Lh4MVl!BsnjHjJHUBfML)d)}sJ5p5hwg=3amP8!7ujyYL@ zBf_4;DI4Zw>~foYhot#3K^BNvumt$=WLscOnxsetbwOR?H|)!>yKKbVva`$Qy|{O# z*kzplZ?H&brc|Qc4J7&KOQ8Ya5;?#BVOk%0U7luvBWP)r9e!RUNQcrzlCecimsPwN zwA{^nKZ>g~>#)2sq1}qOvWrA-$($VycQ@x`TT%?BykD@oXiUu=X$VblF= zB!*BptrYHSdGF-cl$ZzZn1@<{6vN{R!WMd=Vg~xe*-}Z;wTm?+(jsgAG8Qc9>v_31 z{qI+?L}(*Crnk?jishDrm*DOXceiK8)~pbbyIGxu#eB23rCP_I zT$YK~sY@S{avt5kq*tRiwQHji>-9c~q#(EsIj`1XwDdN3uvKA8=#BVK9ZN7jU7_7~Aw7F=InoD<7SCWhGuZyn#Z zM>`8?<~`Cc^)3$>@yj#CUNX6!#Ou#rltr zJdxsP++B{tCkEl`EU?ZP)O3c=4oN}Gv;ukvuV&EB8=*2AH@PeRyJ22KOS;g}7j@5> zYe)u(Lss#JagjbpE4#@DU zKRTEJhxw%Wi%wl2!!OZge|A~Cw#Lv}bvdpgGQOd~P!EJ+=%m=%rKoLV29cBmp`v8_ zZE<~J!K)}N*#kkM#C%JatSoH5jbd9lJPo@NgPAw2m!?jpHKepha&P>z0pGNZUq1gi zH~8h+>axEeNicTknqGfz(T4*DD%_%`_Nx?3^)xmfrr*TOeSKw;l5XlJ5bWBDWeEs* zDnA4pkS4o6H6-(wr#u-B)z{{CT08x>yHAJo!CVFA_%blRc~1kj*rcg1=y8ruOLjIf z#K?}MB`SYCh{qf#b?tKAGKL=Cnd%L4h$NSJf8J!WTE7DTIM{5Y-`d@-JB_k+)1DXf zcnxeETbq=%r+JwTAlG} zTtQ7UGA~2~O&>a9odv@$^iR(=*TWLnd2_yfvnyEbSK7ezr=Z8&-KiLx2t^Ej@l=<5 zkYB*_d_=3+6Vb7nE5yXDsN*K-+beFcOwhf+Pj+8S7dTRmv&U*ky}o7FRQ$JQ*kB=$ zW}PG?l}G(BGUrf1!x2qbmle9zr^~-)P&)D{1J9nHxP7wMsHu7WBtWj|%%x;>?t+q$ z&t6HmGC^@$?66YqXdE!)`TngfZgtKH#|ic(<&dyo9tHL`9d)jxrS}O_#g#QluI9B@ z%e#7YPgq=>e;Bi@6|Gu+sP%m~9!X-{G%3XGCwAvDA5-GfyR&JU;`N!XA5CZpqmB7i z<77rR2O^kXGd^-#k{X*hvj0n;W@|y*W1YBVAxb^Sgg-C!%gmbq`=?8{@lOYis96)z zKAF(Ds?R~<@!tK~pmfW5*IkZ_t`A8Kq>l@xGVbzP(>^(C1KY4%m*(3(oMLTRxm8`C z$!Md<3@+G){3!)Inb0?OL8@xS$~-V9C%SaL-n0?>v`@+$3@?F9>HK!m-cF5hk&qAqElGBB;2P4bOb(fmd5#+#EWuv`YI3Y z;8;Xr53McN&SG~{bN_3vQ(`mq zDDM8fxF$@g5Bvod2(hrdp0aD?l5H@qx1UuS4H!Z=2-6wjwn|wON$JUl%7o9x*SO*? z_33n<{1M0pJS>*>VP1k+zS)A&! zvV*z$-AsO&d#82C2U52;S#Fz6%wLHIG^^p#=PxDK$1|iSYE>`^tYnrZ-|SS&Fh$H^ zJKX*Qce`Vtb(jy@S9LEy@j#Doffw-P-R;)wflcIx_HWerD_oak`g{(z0weX9P;u7& z1h*A&CE@3z$0@fuKcU@TAdn9F*2DkdSR%a>&A0wL#l_OF`CN6XH5Y zY}XlG^$SzXy*D6TV*P$7_he|^p0D(uiBF1MSvvI6l3Jb>%_bhJf9Y5>Ma76n=t#89 zdFo0fsOBKg-A_wHqkoHvL%C{Y%Rho|$F`>0O9y}UJ|Aqc;`dZ%>=+y6-0p|I3fB9S z3QjIhE|F23FX>OV%+rr4(n2+3Es7%SK1dSWRW=-U)yj`{d7BP*kvtVViAJqCBoc^| zQyRCOSjm*;UF`-2+kAYr?;UT)#f_^9jmCqAG>kfr9@2g@+B22{C|y1AcU`d&9@5-b zH>deFTwB-K7?JFq#{QblV4n+b?Y&ea)>`KE+gK(!;6TY*NYV@dq{rwMvFx1u7>t45 z-2kP7tLM0x_`F;!u>(GPyAQ|L*|VdZTJ@o9m_FUg&46P+-}O?TAU_3?&8II>O|@-a z%)94)8-ZZ#@ski&6ou6$F-;o6g(BWpdEGf?XoxhSps8?9!riN95tPF5Fy20TZ5~Rv z!f1USVCPa>XK6feMKG6hx61)zjyk}>u?=mnt+Pf$OJ;jtATsaAQtY0()H4oCz>^Vwq=+UpOub-|(AMu?-D@;$~0J z^@(!X0Kw`;N1_&{-#a1nC}KTRg}?MU3FIz2PgHxdzNSX@e>eY!2DS-)fJ-@wo#c@T zyG$A4PDwzRds=!L9M;b_P_n1teCO31qmD>FZ+De^RmkD}J8Z9Zq-p%)$d=c-5(Fw? z(ZWVkvF0fZBgzn*YaebvzhZZgvuS&i>~xSYr)!3K&|gaTbtjnqKSO8X*97*L#r~6uHPcUyK0+8HVGz?Z{nex_q4>~9U=);IJ^AgJF z7yMVaKIKbh(&b6=UF2qLeAL}r{QGv?xo+@iwIurToxbz`swMuyyXUHeLqvx!E8j^W zFzEvc?4$PUERPqjy-A2WTyR%Cx8$$5Cxn-~P*r{1!6#>hB8#@1YKh0*N97i<-W(=uw@v{%pn-RNfAq4}HAjlSedi03rtzhz);q0JU%QF42t22xeALa0=UpuJ*6Qvb=q;D`MTz>AW8U6EZA=97w5kBai*V`i zQ>*Tfx)QOH7;n zYf}KN_W7rve2y0>yaB=WmM`)u8SKOz3(`1yfmSAVnjoet}R)NX;1t^S#Pyd-9C=s)W|l6_D4 zh?mQ;g2RtKE*;<`K(Mo_VM#n%v<(_`ILk-MNruS#qzlvJVj75eJHM z!}}{1l1-RA#r0k4tg0+?I(!K~2+sSB`DeH)Hah6UDl=auD=<8j33A=UP<@`GTP&9b zb$x9#a%OHC#zsOYeB?oTO1Km#Bje2(s!h^XEj34e{tTx0UyiAu$#0bq-b+ z<9N9=MRZSWf^XsU%WsVqn$vjY3Enc%iF+r4#?PhGZ{5dS?5z@Bj%nASOLi(PxMqer zm3LxyjaNP|bwO$JD8aS+i(d8yz|s_Z)6z%|Io4BD@%a1?Isc@sA?GpWHU7+9cUKc_ zSb*B`u5dUyEINV8r~^_VT*$v-NuS*x_B|4$5F9IW(0U_|i=F7jBQJCcM%M8YaRyy) z9OV&PFowHi=2j}H>TiUhSPUZH@nz`j%5)dzzT3<2>f#(+7ghxv-3Yw7kS}(5?F$tL zrawFoyfiW80-izJun*x*vRFwGq+TrFy+%61{g{hV21S7d1h3O`-K#ihh8;*i*HqZ;M2*%m$ZnrsZ&86dF_sJx;ZnE|}WWLj?p zw~t|e939?sHhF!jd?N&m*D!Dod0%C=)ivB+0MODN_q5QmmoKg2hSHG#CQFZnV&n(h zu=SnN$&Do;8SBOe_C8orKBfy|WHUiwkzceMu42CGr90oU(J$AH!@|G95gXP!n|f=7 z0jdc4J=FG<8)ZGe$&Ol|GQx4%J{i@j=p~28_c0LrIZI?YM(9 zTU*R4Q7a^qPj-Yu!;Sl`8{Jzeyv+;pJjBsGX~%kpFsz~n+8%!SVN=62E#vcafc)Bc z2^Dc+o$tU5AovMOUGA!uEnoO8Vy;JIp4pz&BYH4kCK4jv<4Lp^;xAp9|D!afN*fBKS9%{j+)pZLnKyxl#ses2Bx3s`b-9C&vyQj z0`R2q77t1gaG}eo59RJ1-}gJ8BEMTB)G6s96m~FXPJS23v_N2uH>xKwfiiHxk8G)W_;UTM91Escu2pi)7sV!XSPw^4f^?(qDX ze%|Tv3`Q{lE}?)nA~VMphEV5?BO1wLGIOJ9k+j1FRUW@KkssQTtKkVeC=F@rp3yO*m@b89@$#d6w0_$R^nJDOqoh}&|V@G z6-BibbLK4{9IW`X`%!}wr(G*{#W?h;P2j}5qwV|2Pg4hQ4r8|x1j_h%zrj@N7?0z= zHTJBVvJF&eMnTzoIj|n&-SPE()edacz!g=?TJxJkQrkO99jjufWkPC#zoFHItezoc zV!J(p|3;wKSA3{GH4u_1-#8F+M_H9|r9krQj%Tv>L{;?l+|t}O@NLMvV@}WA$gHUTw3#?Q`Rb@5Bx0X4-9~K_V zFAqFif#Zk)8gcl40mv$;>-^(jy_~Bjb-y8@;Dj&a3_xiXZTxosg&Q@L{}4_PTjuyKEOpM>*$q2`hU)$_8~*@HUgjSXld zo!KC4ZBi0So7T<5CRDD>;*~5rOW`k1G{0&UPN}Uz|GzWK|2i?IXHjNx!{SZ&Q&Uk>9TT zrTD8E^mc-s0CGl4hd-GKU@V&7v+i|Dr|p-|S8(04UyN$jFq6peh`@yJ$bSJe2K0wv z-$#CK2@+_2WVBF5XoPmpN_?_H^2QwOWLT}_#mPkca^0BT-oF+%aQ;XK62d09Rm4)v zPqyldUPHwGV;w(%==paRcBVg4#YozYg1(9Bq<4+)@%PG2GD`Lt1_s(OIpxbjZR2qm zt+T4GL`?1;NXSstuq3(LKcQX5h{jX8EP(PI^DzLVJ$GIr z5;TCxjC8UMO03SEw0|$=IP2_@HaMOsuPQJNyJ}r8J)eX*cxQV}vQxDcI*an=KjIFn z=+q?5Hw&>2jtNvwx-UWXRH=*J`+e=sGS-=EekD%wb55N;v}!nc7bZ{@bn&?6+ASG6 z7JID3Od)}3VW^r6?0%Lvs5M*`hCoI387*g&_{A-gl^WSCcdIxxBHt@u;MiVzB1!zc z=Qi-_jn+US^0GmLW^$}|yZbECurJ2fsR6nt^oN_B1*i78UF#7Tf`V82MQ8R+Gj^Ga z>eL{{E6W$mmtArp^IN9FCKhIy;MqYv6Qt}wcOEN2!;+m0xEUMEW0Xfqw4q*r_u`8s zP#}rYBCON!N~uqRyEsMfx+#hd1Mcgci)d@}{5?>TumzpP&+$#HCH&x{?_dyeu4iPy9Q=S}VVgF8BrBQ0 zA<}n{&UXH&A}=yD+!Be(Re!eXX^~DFm8J)X*jYV)xTw8qS<85iEjt(!2Qg`*CsTyk z8P`O`2hv17JbpBias+~Z6tpyth%&F$=i%Vukfma=dL3?8fGGj{9brM16ofEhZBr(j z3fp-|%C2ZxnY5@z&u=+@+k-4c)9YF{WlF4LnvRegpLQ+T)Hi}a`JwLQMk@&r;U1vE3_m%buD0qqb*iRt(cY3=%?L<*dtM9` zUzI2|kp@09Ao zUYq7g$Ru}d_7F-!@U`=Rv9G7eRZ3FN=R=^%ibMFL2{JX9<=$yee{KL&r%>U6`7c;S zTT<3c?vB%UphApcVx{rP&|i zaz$Atwtf|IhS~L%oUw3r49{tc+)Kh0L;hazj0N%yk&?${%sK}KW{r>lvm1Z9S{2u2 zjMo^Lf2|y<=@`*7rsQ_B>Pi^-bhiGd19Rg`d~*5ri5@Q0B{^@$UgR4CY#x6PkS!z; zYzT3!TaUoUzOzHjlfB9$^MfmNX>Q`p7TaWAJkQdZ5>*RnJ1#NkHcd~la^wJ=RUV4r zgw=fdOPM!|RBC*)qRNCsk79w6!O8|xEY81kbAzi{yy_PdA)}07mN>V)7qdcSlNh|eNp_G4{bn8ff zZd{-Ir`PS#(5S4A{;kZAq_QPe3p4+K70JWd*-5KFH+Lx=W$)6hz;5=)D-+RR4EsdQ zwcQrunAYed@fD%r6JKyEg=!Lib0_g?V9DVpkOTWge6?Xa@DD<<3~BW~E>@lOusSlq z@s3+9*kql07}5tGfO2FiFh{S8Oagsa2O`}m3w26N*f>LvdjI-+iR<#6s?9$$ADXv_ z&6P|i_NbG0!<}iORGs}cRu=Xb0Z8~4HSvv^FRNJi02N0Nz+w*e5Rgu`nf_@D56wFfbBlFsxM#q!Cw z7@+Ng^drC46#=e%VBFHa8^D-q(3Haq_9O+|U&@=O%c_gRs~HyWP+0Q^T~k0~g5 z598!*L!Unu573~x$CH6SPZyj9f|0AZEOV}?P?wsNqIi*X=G~cH1CgoIlmQ1AHur4z zhw#xA53SElGe;5r@45OS%h>i&DQiRxU~kinJ!JSFq>QQ}Q?HNGUoU#|+zqUz(Lqtc zkDmUPykb1(rWM_>^Bci*Y-gQUb0wNZXL9EnAQD9|p!PUa?z?)h|7N27r7`=!A^WzQ zcdKOArs!KWDpgr+e&;zCn#)l3o*w&aeRcWxmWy#laoQ@SLH4a;K3|PBS%OF>>$?cWCJlG~T4rPR%-TRJl5`h7 zMx0{=_6H+(VuGErkZTGqr7S8J)z#4nzXy8Sq8?GjP)n!^%zuaj=8J&iVV(tkc7+Jx zM%(9oB-dWdhAR4C6VRe|zEz9-YE?%zn90tx2W<^JLzoxS!$}6`!l97V^zK)#4I2;4 zhCGd(USp6-wL!4>80mh0IJ1DUHRAiFst3a}*HYG$6zUq$B2+KrpXuzC z|NS?i!nrx{U`%KJfQm}#N>qQKjzqv)o_($G^YzmeUv*SjtKkjnIDEFw5&46NFjwn8 zRTD-6T-uVsj{QZK{JQI{12DGUD942S9K%F;mk9F|i4CMw_5?vr=}yN?j8(w0SIRGlydDGnJ*ag+d z)O8rAQ8n zyvliQYi6mF-4T6yl{%n>n4LEsiQW@A{`AWerr{3nYM@iOmOd}#P;#cyde*(}P&>$yPY0G@qX>;)DmL{C+W z#))N?*)8(kw8*%VH&VyNkhC(kUZ?A9q+t4>ZA+k#R^iD7f?UfrRWPrP-f=zu5m1Ut z=+iHi?UYPkkC*)HTQ>m<^hX-*XPd-k#NyOL7{XH~ob1;0l}iGi?7ydt>yR-3q2G;P zCl4SqieCt2Oi166PQ_wcWEA|}RJ?sat-N}c4)y$~LPK9{c@cVQnSidRy@wG?K4{lF z?Xt@g4>jDrV8lEn^oEENEnJDFe9P(kL`(5)?;!<@Tq;JG{}EV$ZXp;<3Oo8hkTOS) zr$)lKE~DfPzMQf5pe1%`|Ey1seZZmK>v{C}-eoU72}%E~jKv3`=b{mvr;73$l0d(H zH(0r~>>a!fs4lUa;_!52$nt92)^~ld$%yZ ziNT`5@Fe34e5vEJzth%*MV>SY8y&D?gg9<`=_RUsiSy@@XU`kDCepDUDPAHwU9#UR zk^Fx9aRYeO;!ypF|J&CmN^e$KKf3(f9B~L`7f|7&i7%Y~lo`T$P2qVZKNs1-TUY zyIiA^sb&$8x%eXd6SDX~KgWc%Ye$m#Bbv?&3A*V?b^J zZ|o})paWuXI@2sE*U7S&0SDqy_y*SEjDqL4SY^Ngh(q@98x-xapRl6YFnX5Q?za*gYlEow}mV=IZfd; zCMIaw!Z>r3SofI|k&e>Nq@1Y?>b==o{6@z^lwISQ4WOs^O9nVNsX^U>?!mycZX`#A z#N)9j?!gNBOe1k&`$aX~F92d2>J|ggs~DiOetzI;E?t3#?>io;38VO#;Pab|R&=I;oi(i3?y^|J8ZGTZVl zW^!qhcrQJ%6YOje%2HuU*Zk@?@sAXaf*;Dcb9HrH-@j&U zj!C=sKeQh2FZ%1;Z;M7c^ofKhv%;1>I!7Ob1S_JNm-+wg{=R3lkilao?Q<71tq>VkviUEGfH>747n7P zwu4IC2>_Cv62)uIJGsL{yuIlVb*H@}eYxTt4ONB_qfIHYliRw5T=aR2Mr{8Wc#_gM z*UU}_nJs9ldA0JcqTNyaJ;I_ODSz5}RutSrcBuQ_#8lqj0z&1rGa{+zmK26O$e2V-9HSrE31-NvEvoQ7zk2F{6oc z&@nHS8fMyNgV~ryc=dV?QwMMkiYvUl>D3ix6)HC99Tg4v3BB4=`S*d8FV&{VPYn0W zfyPC%6El~LbaOnhh$kKgeM_j%kZ8l%H04GlZ*I4eB_jMlII&%Tar|$>^ zz5a%yyjAw2(VEg)GJgA?U=(wpFstTSL9=Bkv2#P^m4yvuVbcD5`*zfsglY|F>3175 z_ev-yo7eHWMX?I1Gj|rOVk6YKh zumAi){(I;77djKhNS{l3oMnn|`MUgOU%}OgUKJNtsjh5#r^81X2;M$5p`H1h$$GIq z){lm{t>a?takOoOe+$U=_UG=pbn_ncN8K1=e78%}>}|AQL3Q9yXOlDLquLy{XL|OW zz+;pE8ayDp=qOKqqi<7pqRelEU0}rU%SEMgv8>BJxG^s0)BL?&CeweqFHNmvIz)A; z#6?j%(|ZQPR8O(Rx@%Y)rrgyQ9gSSJndabY=H|7<(cB9WU5y>mPZ1r$>5?u}+>N%5FH%-iI1V>o z(6$()9InnDdzDo1j^-b+WleXm`1ga5TFU4agW`Vt3Gg(+p0{`?FQU32T_lKp`fkI7 z^}3@)I&=PvHqwrm%iqlYJX_#9|8O?PjsZKYW0jum+LN}UgFMHoQ)PYt+l7F;{()Zj ztAVynjU}&bN&Je)IJ|3}IQsnhhe>m$B8SEW2LowxV)%0iAt?Rjb&zxikqIU~hlYsg z#d~T55?%eXd$|#}{Hxd%*>B}cGAcfwY8GuOh&dW8y4&B`E}CdFHp$#QStHwwM>i6` z?5&}p_GOEvmh#4=4nAhs{EleZ;uxKEs)^$d6F?~v=g8sKRiXP|si(!OzSZ+yGy2)r zK?`&i7B0g$WOGc)er1dLzCD2}XCK1tL>1H?SbD6e&xa8R&axK@eYRg~M$#_cDPaDyduu-#FY0 z6u~Xm%o4jNO9=WOrg_udJzxdk^#4c}yvgn=P5DxI5k&hMft!Cz*X0zsMa;gGEdaKs zp3E##Ext9s^gsUgJOu`xd7UK)OuD8&wSgAE$CfH%K`}p*6geoDAi1*p>!xD1ucRXN zO%gZdL-4MDW_3pLAD#Q9hwFEmVaNYP?h!`N%bG^&sG{Qc?;bu15BLgzAs-Gv+w#nF zIm&^*<&W{7y>Fun<8dVNs?DUfnvS6LPVIrmE%4Om}Df_D_>zS;yTi!iSTo(_$*gShHxh= z?c1N*RM}}DH0E!)$%q_Ghw^DD>F2?s{l@2FnhTSq(dyscDt-c1Hp!r33g&yvJDg0d zQNc{Yef6*(WS?t7@Dp=74{r+$tLx8ZT{-|QfVR%~Ue>$n_G>|y9r`RSdUV~~fxljO zpw&rm0zL-LncRh#PU3M1)hEXKM^2zzd|~?eIHsa=pE{4f7N35^17l${3j zNd$hooZS{Ij*82=w`U?l7<9m&J&>ErKa>EHuu*?HiaYr}Rm(w?`1qjN#)gRRofv0H zjfjZ`gX{UG4fBY-_CcsaEnh#>+$L@cC8P!LjwmW085Al%)S=;Vo)WHlNwKdG3o*o(p|{_i6% zHAiRnLcp{CIBT`fGhFU`!8x@vW~oomBLBM<7DIv;?il7{XoG9mT+>yQ@i@8Ebi5rw z(mb=2g6Y9JOpo_bpImA(eYC6kVSFjRO>TY?f@0RFU!oxEOipHF%}w7l)!Hsj6z(e* zN6Yo8F#X#I1dPbWTcbm3S~HSEYhTsY&IhKw5wpkV{Q%24T3f}3d&YV;)?!g+=YawR3kokU-57w#JU+QAUjKmB`wBuYgBzCAb>>M0#g#``c-~ z1nkoe9AIhW{gn`A(!RK>*)veF@r%Tj%*!YIXxTLt(4_Kc0M5JA^Px6ZX@Sf4-_wK( zkY$|mQ|{}6=7$9$8yQvX|DknO=NYBQuz6FF7LoyRDy*a364NdCZzcu>?RSo}in`&OC`fE)tIFi}t_|9wJ za^L)7=%A)~k&UM`kmr#Jhz!6B^|2Y&@oO0jPqDj%vzEB!aVZ(?F6byyi6vCwZy7?1*s1}u3i`Bl2S@JyEZH~%32^tw! zdYO*#4)MH$=UW?(9O{GCOVTj?!Y$kRacsQmVtuPFO#(pFhoS}F^lP&?!QI>GW>>_K zk#d7|BlV=sc{t@xqJ`NG?Amn52G46IbgR1}vZ#j8Cg~2mmoJtkYZdq&$3}@(GSjJ~ znR}&rib%9lINrj{gak$~re-zes3<9%O$v~@QM@>OHmjV_$K84pbkmIrVV`{WDL437 znDX4F_XmEMd7bnGZPWbCNVr@=IEB2Q$wkb#F_Dlp*sc^6OW{6MMvo|@_AXW($e@C# zcDo&QWo!ahgJR-BUC0VYmHTY0w??H(Qdh$UEnVoftn4_~rIv+@^xDDjYP#d~o~B^N zK@aBboJkv-_zxaOU{@nX`ZIyDOuR;=UJAssEDZ5WVOz6#>x)J7% zDLA=vDX`}GV9a%~io*CouqCS7A=#$EqNw1N{7eabwKmeXjLKUj&hi^#-*`Z+XiL^< zB);qt?7|r!o%g$?_HNJdLE^T=jYzrnH#H43fsS6*dJz+tsr$Rx`8Lc;V-MIpJ@c@| zFO4rgljJm!r%wYQDSo|TW z+afU!kSZM(^SUj{YOCp%5`j2BbDIl;2*dqwxv8*U`lmblm#hREBywJEH!xinu|dQc z80gZs=p}Hr95l+F4!$%-(zJ#Ifkj;C&l|E4FP_Eu#sd{yl;-6-I5#DDc@U;>Now@g z=}D&IURG6HAXf}!Po+6P*DBC;Jpj_@@|ubt+1>}m%mu9 zIqv+4GP6!XOzdy29-AGXrew%t?OIi{-vq9LHw}57Dc@f)m^^*PAx$xrme;(=L>WIE z*a#@1A!v}fd;&#lqpY<3enL%l820NA_I9mB!mkM7{AG!6W&df+`K7I9wo3Zg0S4RN z`bH+CwF-P||4~PFuktf;Cay;a+&0z5IN{$NPl}3%G3`B3^#L}-tjL~Zko}?3)$v`&ib2O zP8l$8{SrPK=;f1P zn=wS{Kuevn&&~bj!l=grTu2gICRo$tqH}#-x^>E(M;J?=TY@zb8o4Z3uGJreabt7b znv*3PRWMwy%TxH`n7#`*9CF-f%eY}G{C;WqW$SX3<)i(^V!5?OL^m{P6Dfn7yry{% zYeHC(Y`FmoLoDZ%a)K`{i-|HUAOh=*+68s}^(}|*Dbxa)tae+Wb$R=5lO_x#gf8qh zND}Rdf@~U-2YPXAniMj#ufFT*zI>rS_~nJ9EFTRWbzIR1WRYR?p3G$Ln|f7a;MEVBxg4#`EXEMu~Mi{Em0FzIeazC%)RtbBzS;Uj{|1Gx;!=3MuyDQK$0qC_%HqbQc@zO{E(*pO1-2iOc<*LgT z&CM=IyY4;gcVIbfh+N?N1_AYplbSaUrWE5TdC&m=#a5LWz3Fk=D}SfB(lY-&jwRl3 zXp}&j01R&`R$%|9i8i#DdzAP?ChtPTTK_X7shis(&;bn631?u&p%+s_pr(6=^yeZE zpC!WmT5BI#d>?MS@r1Be`QhVfBVv}Hb2CO=0r@+9Ax}jwx(F)B)K2Aa$tLSr z#Tu{9mFD)><*wzBTyllpYSMF-*-=uq?PdOu%0DjhOBzAWbn}N4Kul2!^NOF@HHUnc z)~?KbQNgdfbfP3%K2FVs)0~#~TpG@COcNthb}OrD`+f6PqoU5nY+af981Ep3M?%hf z=bze*r#K*xF(EAzB7?bFK#Na70KYlszkz{YkIFH<$R@cHj)me*!PMNnOVm02Uxig9CVShAP7@VziL5d_v+f0im=g)@$)gL z+%JS!A`Sy8LLRUN%l}PeOg&}7o#ill9#zFz8DsbmX`@^P1H|Z~NbY{k*@P~WNIbb7 z%Xi>=-Rvinu200>QGR*u&@7S0GTU4-6YFxS$IlNx4IoZXSZ8R5nW!yQdK$1~=?4M8 zk9^F;pt1;%#TTGAf)C3zKUkusGR@GniF#CpV=rySdqkStPj=ic-U6)G%W->|uJo^I z6YGnT(H>I`ufknpCcEWoJ`?McljGTIglGo4gSZ&EloVFQhm*#75pbxLbBCLSfmJ1~ zgP;8tg2|^0q2q$nsHzm?;CF3OT@E|D-6pp?jK$NCR1^4N)S*2w{@h@xzCB~BB9aj+ z`yVhBUT2#x-M1Abw$Su%>o@XkvU)|j)^Atpw2{uR=W3y9eoJL<3vU!^k{kcq>W8Uh zQ8-~&!f`QIS{*4%{a23=@Pc%xKD{?Waa!{VZhWU^U`KcZO>>gSY{cW(o(Ztha zHo3f@2;QnvCoZ4BQPPnv@+@fAF|SRiy&x&7ELVEWM|oY6;QsHjPVgC&7CIff>9OtF ztkBTxDU}B`q7PkfSqG*TlBr%zuga4vllD@xCdJaCxl-!C{B3b>B3#G-MhG+aL3;nn z)!!3xiG(R)H8=p{&2L1<;)3PdEN978(A{1K>N@&duefqcl&2@Z1@Vf4>g?VmCX=%f z5GyzrTbHpX&U#v-M_R{)PN)a7UN@v?_YdFgIT!i$4n_P)vH{80SwDApTUpATE$QnQ z+Jw$Mi=h_9gKnQ9)-Cucu13Q%`U>e?Y79d7k0pBxq|UlAmON>}oExn#2A<0_Oul9j zp;`*5vb0~;BoV&Z#ELR6mOB_1-d81e&ZVLc5-+9QZM~K31hC~{x9#|(_Eo>TpZV!f z6{2qP5ouV<#MIcp;zZa>`LnA>dws@y59_JOs+QH~9GUvs0qe`1zWiBI``L=xhl13B z_8sq*y39`9)>3O$M@zROq4882Y1CLFNB*g|;;g)iO7p|hRkpwWg0(O<#=jAo=PXkd zWrSWB6yW+-=`Zhx2LVh0@55A`Mx?cDdcR5?Y1%$c926%wGyNudVQhO_)Bo3qDXBmY zmg!BLFSst9DJ2#&yPJnb%Gzw(m&~O0ziSa9p6k(AD>*~i<1p_&6ZRov1?S}xjuv`a zuI91S#KDk?$X%||&8>f}Lc@23;eLvSKK>r#Ux_RS7zY;^37fpV#QB>HiTNJ+1oz$S zgP9*1wrzcv*U()Cqc_)@C-17mPM-G6Z+O-p%cJ;zm`&|5|M!&k`|tauE<8eWJWl9MZ^{>>8C4i`!(K)O9SjXg#s6wPi)#Hm=3lGM25%sh769 zUbl56uR|YN2m&tRGOd64haBGf=>B%T+VT|(mj-@}Bo?svKAUX@Nu2Y0@Y3K?uE0n!RDBk!F(#(y?o##7luJhNY?Nk1IHL$)lUQKm2$-F`qh1Q_A8Z^erg%5Bk6wH14B#7u`%2 zS(t}`BFnlI#FK%Zrgisti8ob0URSmX9t25S&_2HdtaMel2YByQN$Z8+gV*t5P+C@@ zdkOos4;`m)MnJy137C77lN8>X?k;OGd91Bq&Y9GvrK|t_pmlo$gsoKGI)8q-E9Lcu z6|!#h6WDV?tZW*trVPTk20V`dyr^> zg1_vVq>}oKJ~y>yq=Wvwea)JsmSqy+M||D|qEV`tB3=yHm+`vu9!KN6hRI#bsoDNQ zi0r_)pYfG24tn)Ie#U!xa%=Su{IYMQs{*oxDF(8+&95PbZH~Y2A5h#&M*J4kfAR@d zB!56~E=xt?&{%Br%Kq1E}oR}NoHN0;IBpF8k~HQhqBWA zQVgpO4)_E<}e-R&fRmm`AiKyokyr zecifoAeUIX7_C0>T!@s~bZFsTfh5-To%<2*wCWV~vAz#nnXD@yISv=HpA4EJb?hIO z5}Bk69zAKFEyC)PH@~N90q<@5zfH&LtPj{uA8Mw)Zd4h z;ZK(4R_|^cWHKg7BI@GT6;wu)0R`|oP#*5&3}Ij zYSc@Nxhl@_M8%2p1zZrEm>5|2-eh>TwtP4oYz^IQ+NlqopTFKly3iF^+EKTt$H4-09>v*cs8`>D*Z1siG z+dtv-`>H~#a3q()1+g28Dtr57MJRJk1f4b8wVo(#Rd=(Ct*63Hc1%0tN~oaL!9~J+ zmbUdC>q4-d)mV%-0rqf#$|<9nGCgJ%p0ns_U1UKN9+_Q|Q}ENDbT!u4%Eui=&zN!= z9!%?A@NRBC8vG$-g*NPFsJH0vxl5(0PHlD~@2iePsDzI}@dbFtFeoCSCuLM>aZK)` zjM0m(npC&B&K1W}H{)JCKzDo9V+5Z#Luj(5+48Vlrq}PXvtToRtG6Lw!qJ%?gump4e(#- zx@`R~*CG+I(5AbxF6!prxF7y>#}d&wmZqTXAd1F(H}C|y z3}mnJLX$VbZSBfug#8UF2-=|{-oz}s=@_Bm57uOoI#t3sTFrE!n_k|)OF%IZ^(-Ft zhmw^8JT(;JxKHfme!YxbPQe`9i=4x7(pUA3nUt-2@z^dEtBwlc>9(V${!C4DKWBNm z&~gdWvG@Nv+OSUzQc?YAM=0Kbm~TNK_YK08vHD~HY4pVnGO_J0ABtYV@gk>vi`pTA zwaOJiqO+T4J9+Q>F`b7AAiAA5c5v$hx1H=$Ef*N0o%4d5f1uE|ZM%ByGB3ZZ?}EFS8pGD06<%&dz8vJ;+# z`K2q*qYom>$IHDL(b1Xyg0wE*O#R_KV)yO#P`S>cgNksOlh#Y42}n)3KmtT)CAga@ z<&Jn}EUP>K_)7k1{xe}2{{?K+MMKUzjdXeX3@f}uJOV#bJLG_-rgT`(fw zM5n4QPoCM~pp^X0<(YQsXehS;z70vX7w!laP&q9f$FHvFvS53BRsMe=FJ^C!lU9F@ z)B!)0^qdYD_(d3<4j&@{u(kW$T@&8N4CUWfN)%uzIvWB=0(GuQQ*B1juNR`V1HY5D z!tEh7ENjE4$PdEg@Fu>&J^gUS ztSh7SPYt`p6y1ZTY;}rvJ0-Rn2)vNVcoOm2oV!XpcG;Z=E4%dv^6-6Rqj^>n9r_09 zaFSvF7Fg+ECKl8syyLAFay5|D<0AV)LQbf2CGLjfi*S6nUN4+UdHZKbT2+|3ij`KxqnDw=08KZ&LO>4mWs-bzvOu|Zq)h?ZNpKN zbxYSE)p;of)#}&UnZJ9Ra6f(4TXg}<&{2ksEA`BQU?Q{ z0X#d@$sfW@JyiM@#?=+rTStRhyu?t}vnjIE+3 zMy&<=&+*IK2c9>&*hGMydti1S_1H7s3#X+W^4>{MQ~fK9!X&{JXTG zD4yhQ1D%e?Jxq`B#9=oqLHW*Ik`FO6Yk4AH8JpG((G{kC~hj0a`aj3=;`VTXe{@X9Vyt|>(eh+R@_kQrm% z>QtmeUWIvmCoLOpW?o}j4_v8=lUVvseQoJuLJ{2twlhPuB+t&o%1SGThNfek1hBhT zgWS=LVE|p7w65Z<5tfY-X7i$Qs&b-MlZWQ2M>K|l(0q(Qj>}`(fswU?Nb(6K;vSbm ze3HydH9>v8!E9ax67A>GC=a6Zy5vPKJ}O?kZIoWEg!d zL+1jrHX*2`buRx|6!M0Vr*P&Es;XSv9KQ%*$d34-9ZpOvy=KP~@^Y&^8@{sKy=vaF zSGa?XW&J>^T-cd?-WF%qd}Ct25MtOGX4I1hosN)F@Vxn2JN`EAG_dU}>sASGd||bs z0AkHc>RUxuW3q%m^VJl-3Bz$~FvEkT-x_;55FFy$I4hFueemfk<63&H^pX(Jk3PVz z=i#~eM4nY4^(U?aOPDDSKa?Wtwa+W@IyZcw*n+-jMn%5nd4yPZ2r(%DD-JDDM*Vc!;n{i}= z$NuUeBz3m(*1Q=}yN@ei3e%Lx-N=>6qHuRT`JN$mEpI3OEpY;{?-V!PljkgN>dc;( zahI>1snt>Go;Hb#n!Ua1`0X(pw&6jWJwr2yLp@Yk=BOwm8|I(!`$jbVXRY&mn3t-_ z+7J6>|90Wsdb(`TwWrYy6qs-~U4sbI-O$!qzY|Wt^4do@tYtnOPeun^ErS(4iYQ2gkjs z*-Rt%QJaPtx#?ESGL&xCggTw$?wmvPjiWxI^Z4!e2kh~9z4v~-uj_d|ujli+w&Q$J zb4P5`-Mb%sPCn`^-kuByUW(OiipscVUSu)+I?!}~MHDomw~0i%{7>oOZ8w{>;;dWK z&y1fOjQCOeMm!il{r{v)xd=Om^WI%sBGKDbPP`M;tG$kwF7Sd~DXnGA-$S)d< zOx|JT>mM!!T~VTNKMnBrKbm`Sp{|UgoTi@iy7m;G_-e@TVoKNI^0CNAFTSQ-%yJ$- z{QY|JRr?o=S&N%ptr2l&~a*u19ss7zEe}Cty_1zOVD<9a>{^a*x zqX&*<*qs0-wnx0WkhvCkJc)2R>Xf&n@99&^p+69<*)<>jes*spejBs2?te33vAOkQ z#KoDARL=}Mp^JC?+Wc1mn7#F_@R;_gB(5KEDdW!Fg?G<*iuu(z_9IfVH=wELihgnJ zp`*ji;k;v4mg4`o==A*KVe_jm-kq=(=z~x1Hq)%u^I1{dpT1s?tG*yPmpd0f zci@?W@|fPULi9|})#&W5_sf|}Lq}}fiLu8H9)BRz9j@0mi?pcPq0K!D|CE=RZ?!_b zX8ExDO{n()pkd@$^p(902avt~7414B?RpXCzMfh)UY-2O=!;bzOlzj)Pf%LlhiS#e zk>sMYlsR+U=$AqNt4f!=ObV}f`{+NT?83TtsR4~oS7MCQo@d~S-_87TH+lRTB=~J` z-0bGFy}uX)+<6(eoEZP%7U4Zjr?-{0KXk#6`S+dMhpby4^i@Rmt-{XrRVUAx7He6Z zEDMM?l0Hj_oXtU9LO(bue*Nt98yex^qC?G<5QlWf^{s!O+W5g*dz|jSd2k5~?SG8> z_jSzpx7eY9`K+dIyPtbr{`5Et{;}<6_WwErKKjT@d%IpJel~q_VD7h#yLazLT1}?~ z!?dk3*7i4lOcp+blJ?i`<$kw#8CcC4xb`=;#Lyo+jxU46O>Ar3`{V6hTlpEQ7p;CC z?|<)teY<>eB4RY|S%U@M@;BteY6<07NB%2~nUfR0M^W}3S^4OD>EPeXkF;;8!yAN3 zaq|(AQsc~Dek0O5JbnVDKjma-Hh{c;`R?%dd)KBt07nN!|?vDbhS|{V|8~jglsXu)?=(sfxKDUCobgp9?{`sz65@Wo6 z>b(3i;Q(wR;B&y?p!h7?!)xJ8-Lt1YA4|N*{^iK2v->jw7e4)Md8GW-U!`AHRC?g; zsjN$#!T(iUO0sFGaY7fi5gxnXjP~I-M68WtSq4*@BQEeY#c&mF z1W~TQxTRC%jI?y#%^zwc8xT@`R9JF?XScYluQN+U?FFkkX1e zZ$=}*W$?T~l+G?cfu$pcYe-Gw?dhp`j$ypD`PxU#Xk0wp0-v~Juz z0H?VyR!b72mH@=3IoR6$H$RhAGt~is>K*s4+F)C{Qt0T?|ptLVau$bEDFBfkSquD-%p;dHyej z;1J?1fMt?;3kVc$U~&y%V7dY;#@Pz&p2@rf=ZQRrJFKD-MXsyh;-+vngD*(4ts#<- zdzJt`8e?F;dZ#_YW8+dJ&(`#eoJa-Yk&Wmg3kg&{kCKQ5X+<>F&7?fl2GMR)1U0I3Cnddd(QjFNL$h1gOYZ$}ArL}dqpK`UV*6mBl8uSDn|ej6;Y zKuAFxa^<4cs!dB_w_*L)At?LpS>v<=y_U9AoPlA@;DE2pS73oq@SlfYIj5u1sTm{U zviOllE0^Nz02)xRz7yHTI$-#lT{%+;h)yhyfz8`ET}g8KZ-@eX?1)9k0=?{A>(@7` zj2mawC99&mP3I%(G5RoUDgKT;40@rmW+#`pRZ~ltH=rpGMH#x*{u$GYrrCSiLQ`y9 z2O*hU(Q1=Rk)64wS3>2ev_fr9M>L+2 zqAy9vnuV>{G?!}3PKY6U1ffJ44JhIlIzBTLeE~kji(PX4?MOhc6&p;bKEfW5uM8w8 zp?Pc&h_nhaf{)lB>|w-u820eYEgQ*?vn4G3rqeB=imNz$Kas==aA2|b{w2hkKBUNo6h*tG0>B?t$XV{BQY!_wRVMh@Dg_Ai048knynBTQ;dxRO%9VDmt5d9fu-Y51z9 zrY?;b$_z7orNw9=LShV@%Gjc8KoFSHKxd7zV#GM)DmQjU3VrFHjbW7WLqJ&NKX9X zlFQN!cGOSKcafu-j4Bvk@@2b_4D~Xp_Vx69qk4(i{HFLvyc@x_h~;MJQVLcjjF1`| z>u9km7^r8*GX5jXf%${;5rZ;@`u7)BSJ_H0BGt%i1(*-2@Upihp@rqK6zCA;0A!EQ z8ds(vwAUa7XG<7nR*!sA5J!8pwk}Ws(Uy*%tITpnz!gC=U|puPO$orG=K$-x{jJd_ z2PSybMjZgxlvtcM><6gC0mpKhpGJ#U^v;TGg(X7Q(JM>Q*nu8?F6)&6hhj9Vbx(sP zWlxAz3HC8jH%@W^orgdn69D)_rg~n|(k%jY&owcBn6;uePhb^!5*6@4enW)L)Ksz8 zms(sA6pYm%%G62(iF)4Amj{o`Vy#+;qS`UsXN6wo!?kVtEyx7_&KyD8oAq9C!kByl}x8#aM7*X*k zs;E~QVp!m_Cu-aP3Z00yaEd8`SIpW&>|8&z#;NW{8eo@Die*a`JniB}WMGemKnFhf zg=-AGu_85qs{EwS6f#Rr3dyd-M#2b;rhKhn@^i0~;-=H{X)Gzbqh-W9s*w@cAhhY< z%>7DJD*@$=(3ZjQLg$Z}gTiIsUuhm$zK8kjWuI=va|w=1L*0d)QOYs7w6Unz%W~Sr zR!1s-4!x||vZ9x`%gxtRFJaF@KFwZB?CC7hHQZZF0fvM*jg$taajOQE-j^pyj8X)q zq9?OO5To=36W!ywO>JH;?^!6c(ou`PV{~GZN5iP!e{tJ!Ha5{Em+Jr8M;yxJb(T|1 z77FIcJGtTc9xC{`Wt%)-Zs;CtJMjuw@90Q5e$I}H%1M72F*YjR8Kq=4I?suT8y6PpPWFBN-nn>hj31eMbcNC zXTb20U&Mm8poC_&d+gbErqHG_rDl)@AC*u5gf;4nCIXX}FZ^gH0_l;}+{a=A>QoWS z>aKOd0jm~4fE>gvhK_bGndNWU3yDPH&YrPR6CZ_%hJZ4*pOsT#7-?8r5arnlzDkjmblttzt2Z3Xj+a7D|xmrVYCTUyz?s0 zN=$tiw+Mi9Z_-)R^l6dH#G=r~3JehoA+c-AOYgv7#?J5p|BEd(n~rtAfRr?s*-{yo z2NjAhwuUmYLUtESaIjM|^#~ZGW)>OWhJ{T?j-N}>E`<;2Xehe~0)swinjXPioHyF$ zJ5?%Q4z~6|*9^04X(4v-+|a9Ic^q=Sps_qa>pupns7BE7{#L~DKKvE8f*6>h&TFUY z4QtNeFnw*Q|vU%#&SifAg_rK0L731h0Cz?W2YggHV#J1Q>8da0{QSunH|yJ zQnDiY`ep$*+D!zyMNboGSd9OkCp2A(oeC{BSY4KTCg&uW$#v(weX0ZL+nMs8e$daf zr`^%iab*7ZUh7S+c@pKdqPm|T-5Jwz9^|0QgrPEo2slUtxxPS9k^f1+H94XA0x&2q zCxvI;L~S7^tBc*4mC?|ry+8_?KYV!fYQ79;9#ftBn{FH|q0l|^ZH!Di+|c91exHY% zYWOa1-_K~*(c2REWjSjT^`Cei5{N4U%n?2+&Df!{YK#1lou{#Z?}v9!?M3r%Ad#A? z=vn6J1h%^T!p7=WmH9Zfz}tI0?>S=vx6f;(1f=t)fHNk-4LypEZY;e#I*!J|aD^oh9ksI^wx%)*r*09~ z!QPEt3|1HGe$QcCErbcT3{2nRo9107#zKrAyTX-V?otbu2jDfpbbU>A9u6Q*`A!ul zdcyP@TljL3JtkLaTAo)x&uWqd8jGb;1yY%K#oH7nqI%=LAoJMDs}mFgD`LbRSUBS& z35tdelF$Y2DRTV5!^7-WRd+*cc_4Rq1GKMN^FhxTN_8`c7SC~o*Ny|7r z!m7)=2S%pP1>1IXtt(C11Ey3P-uavsPZ-tCmD7UqX#;|{1T*9_PpFiyW>UpYwdC=D zfc*XSm2)99R-xnKlHrQWb3BE-J`FKQP7?xhkap_?<(?AhmaWJQAKRjEAGgYi$KFxm z3ci+xdk2fRa*Nx{Gb$mf;o1-&y1tcB;aiLrE^8ucYI;jSzKWXd)lE+K;lcT-Dv{Es z|HW-5O|hsVh_{7kapm857i2x~n~qL7tl!-jiH^sUOrZJZVMOEg)ctsx3r zW+W>3HJvRB3MhR7qXuSFjvrsjEl+;$jD)8Zq&!&ig=Q8opF~A>OWjSSx%0c`F#;U} zYJieJBJ%{4Af^n@5Ubq4V7Q!ga5eXqhG-rF%%&?%t~JpzRbhVWY*GVuD_3xJD6Jxp z(YCBUSV?G3t7C4}bliCkSqV1^!G;=Gr8IV*l?$;)FQjtayG1Kj-9ncl8X%WuorsYp zYi|HW`3gO+hfP?%3SMS}go$c~jklNzr*B7lFoJV~y>r{udWJ51jwCLkG=S5H++*a! z!ir3KxH1bY_6xs=b;fkgYsR&|RUx0&I z61#8&QDZ}!bkND-uC5_K_^(06!9HuhunjZ+8zieM$oe}a1xe~n9)TtM684xXYH@l@ zh$S{JuMEKL98J*{%^5d0WL&OqQu?Ivhq@PP1Hba5AQw4{uTW@jVCJsAf6vQbrl0{L zU|T^?RwEgcKFYu$zT}W=xX(<@TU>_37>sr?!)Ke$(w${BINetQD8^!}y@A=$Z%K>FpVi%z< z;d7!>ff&(#JnJO?yUyNN$fMj1-OpS2R(7Ta4u@DZ!2+fW0+Y5hi4T z+?Wd8Rg`Iba(ncX>g&a3Wm@9v16)Jh$A3_Qom08%X89vz1?+kEvyu{f*Ie2h7%Bu< zwHeXB9TMtAi}xyuoZ6PtV66I9f?J5G{;C^uE_0OPO9FFc;6g_#nA1WFrI)xCnI5eo zmldbjYiL@uURKNnTQSrpCp3sfB=XyyLlNHII0L9}RCKwo3JbKE4TV~D1to+Nq6j7R zue)%09JG|jD+u6uMh&$PEvAdlA|*|EW?Yl?U}wW{e%bov+y_1Z!%|2a^2$YP3WW?{ zZ`pgqNf_Svd+d&Ga6T1D#@IRlO2H})%{QJB$6T_e6uJ5sM^dM~53RmDfj*8F$*2O0Wh6VaWd^CcTQkx{peA%9g?VcHA9j<$*$Pa znP_rO(%K7ohipk!ZNq@Ddfs4ZgC@&A2?DqUA(GAb=BIZ2uJ?cxiMqiSaD@ee(%^2HZz5s_!Q3R0?MDzy& zDdH{dEom9Vo0>OzXWtmbj>NJpq3Q@B6dlB(Bn>8#@~orL5H%@m3sY30jL{GYE>EOz z?rBJ6F;*vu&rGe)5=AlE%a=A+Y~5Ni8=<+1dG|Q+EWuUbI`oLy-iymC8X(MN3=g>b zT5P3^Q7K@p?gf`YS^CR45IySa^PmgydW(7?VPSyNZ_7~PBI7o6ZbR7a(#Fe0pSoA< z04Tt!Jw`Y4{OjO5#ru6OR!ScwdG=jU2xmV%uNg5%A+$%w zXe&^eHC)yErO{ZMIt6oNYKpZ_BN(95OW4318wWXGyf%5!*|^0fRnBV1w?Iyo&@j71 z&y3L{(J|Hu42ni|4pO``DDx)dsjNiyz|oZaWlb^FjQ=7zl(2$m%VCf!DBj0TvM~Cs zhMT$m!HtlYiq54~_(++?nR=&-vv28hSBDtBM2yRGukf&ECg8kCNM@lUKzqy6`9os~ z*w|MFibgpe-#|U#YWy{) zEr;l%cn0A&#az%SOtEV6g5RL}&O1<*7utwaGF6`Qx3DO1|M z#NKhP0(>C&_vcDrmp!)26Tu0 z`CJ#?rRGl&jLORJQT>kuYV)MwrX#+&u(=@EakJ3z!$d=)%02LjE~BcuINY`~lov>l1=5!<^AUMZeOYewtuM7+0-$+{Q3bt3 zp*t4vU*C1*3rB1Nck;@1qH6{Izov3Rg_T&USb+(kDps3@qBJ%h&Ly4W)jTE4V@kcK zIw-QujCqglS&`Ibj7>!rC)&IDfH{XoZ!dxhVuP3>Q2}Agz!kBq|MI z|1oUjZHld9>ROV>BRx4yzrVg(Bq}o1w{?1fkOky}3LP<+KYA_>I!VHW7Gx(d6NOTBKvhO>b*io zA>0ZH)<}KI6bZz8%{;MMi7-{AO^otosYn1!L3CpiKs#CO0ed9w@HZH1NA8b<00So9 zcZdD8k!J!WXI^2qwNLksPJz)B@sx_Av$FzDo1VV3)D7GY`@Xs3&RqO_$%r&w|W| zVdeA{;0SFlr4R+?xzULasyz`8#V&dIvP9izMi(WsAkb@*&*T4>rW&tnW54_J%B_K2 ziW0k$06PhwB?OCW!c-VeDz*xnUx-C@0rMKl^V92gqMhN|E*wO?3`9jANLmY5Ptm_$ zGg9lbm_HC}spRJlke%K|M3y&j643CWtg#gKTV@%on7sE(j|^F!AjupybxKu0z#ijz zv4&qT0>Ebnu|J@= z^sn>5o*afn%iCzi9%D-h2kqu%Pm}3KC@nGcP`o(2K0@98zP627L=x9I*A|}bX0Xt z6r)jGOK|gbQl`O3Eu!MQF&kZJCvlF%Qn(@nxFOhH=BUn@b~siR1g5YSLv40T?D?$p>@-4UEf5?PQE2+JYF zjpdYQGF$^88Nsw&kEw+J%N2h|DSKh)uO7xJJ9Gs5z!t3AgtKy7Izt= zfhutAs^Iso*w`T2%lb^5odUgkgT`?AG>y+6W?Op4?niLa)bDXkVI=5NN*b>Lv55gd z_VlWf_S#EXBCuW}F+G>UBRTXkR@qX(Pjs5*yAYyGe2_+ZPl;wwLP%LW#-^eOny9@D zbx>9I69C7Oj#b52i65kBv&v{d&h1$i1jGbZGAigS zQSr5xqC6){eu{=#o;E;xIi+ayd^nGN#Y;p&K6%`2I1k_y>lnm)hp%>)<5VCn5D)&5 zWVG8Ge{e>9M2`R=QwRG0RHaAbuwF2{EWF;OdRBk6`12F7LV3dC2j33MX?ZRN&f&#M z&}wd~B3sf*KjYDFrJ8G|JF{nDE5F8(de_z%2PiN`>RA5>m_N{GXi(=(&s-xKbEx zw|4+H?3q8_&VWWjgHC(2V2r|`6mR;9{=+n_CTZJ98LT?N*Tk$0z#n$NeE)AO zc`eXu=YJDtBUEjZ?3T?^oPLBklSkX2!OM*F#Y%!ufF$3j1kr%y=QPHA2FAqCjGKr@ zkoph`zKM2*pOAu?x+a!cNbIcX!jck9Za6&u%jVLCt*H|FJYb}JNo@*>p8eHXgn_N6 zs9@Y`?j>Y7!QSKjnv50FktX=5(IKtD~1|?Mf_^zfBZ<-LF zu9JA}fKTe%1WaB74Aamb>K)yr+)n^KEB~uM@G{SKfeOxHmBzepS$`w6j$7q_DWI>o z@CT_0b~FInZ)f;aoTPP(9A-)izCyr&0A&YyuNFRW^V+hGW|0!DQtl?aRKApI2ok+N z`}7y|7`YV0iEpjgyQ)6v0=x%NY1;D<{b9*O`o$J1mQHQVC2ITz-@Nf|@4GbA#g&cl zzl>sVjgTUzPuTqCw1SKiCGNh)AtZ|0x~5|-Vc7tw+R|ZIW&~33NIJl=yc{I(syr&W zg&0x8@{sC1dH11i>g-n{`g`_;JpBz28{+PJ^$uIi*7<=${b_%@jEgWC=U&~M?uptY(ZlT zOE`H!_%^zw&P%imMv$@t#xG%cd{yEwh8Es-iaRZ7`JE;skggWC=uyy#j0zv1ihn)U zE=UB*jo>0IuOizQ8pyODL4I)E?rAkr#9<|<5HqC5hy#C0nNHDBKdse3d49%CGYLkNFm7^#IsZ2{N(+ zezk^sxIon;dS(jQAITWKKd5k=Ay+WQ)Cr4@7K#;J4m#mJbu z-Ss*5;!kG|uT8sspEL@gW-sdD;3JkoREw0Ry6VNq<6r+Q4vBCC0RS?eCSnE?W1WBo zVQeQ3M&x8iFP0)RhbKQ~T~AAKY<{I|P0;OC)%61b@B1#LU*&%N`R8`>olhGl;~7?q z{Ez7Bn>^hImljy~^aa=B-VQFoY=i(D|vTrPK0e6Dz-Ya>ykO*ZQNB zX#g#`W+DWd{Ged=Gr#0JB%V-i*f7J}U>5_Iw^FSoM zo`_QtN#z@hrh3;Jsh2mxG9h5iacRpGtktR@M7RYSoWW14`X8^I!cDKm#m=U-EiD~C z51iI|8brV8UnN+3{jw)zFJRn!2^=W-&B=JqCViizXZ_TtS-6p)RxUgtRd!swQMq~T z!-t|n1magcKO;1@ zi_^!bJvCm98AwN!YX0Qintd)m)N7vpV%!P}?Dh9M=Wo9Cqks$jeaQM=oBc^xeQnww z65tuBj$!KeL^C7hr6M3>sD3z9E9)>sa^jo^z}k% zYstY&PV!XiV$V)fvz3$Yj;|+Zrv8@XAfi_l7Yx+*i}(eG;UbZ;xZ44Y+iS9J>h#~m zM{+mj%8QqGSA_sv$ismQ-w)jf8}FV&Mr!i@RX$Gr)57t4Y+X{ipBBdlWzcx)2`zoK z$au=O;}e7s_GVweM0CP!_!0N)$x{if_w4*D!;D$u{KF#_Yu1vjjIsTR=YLueMDakM z+C7Rsf23_J#uMX1%tnf!~tNtW9p#&j{X)krkng@%*o*lCr()pQ z#Kc2-Uw0SW1$h26Ub2BTFca;Sb*)Rw-ud;wP4-PYYC0BcJ!y@&muMNcX2V{NQZ*vh zsApClyG@^|ad~(S4w*hy#W-NwIUBStQ(}^lx1+`@zc_l2qY}#lqXz;SXha#=Goniq zoyv`JDj)iDnJizB%8z9Xs98Ecs`@`RXa!et{!WJjavhiE^7hW>zIQ1{w{q@wKPb4x zE+&ACh^Bz%-O)dDNnKf&CrV*{1>1bH6_;g3X{jd>eVUq88AT6w;g5i-<7ud`WBlcS z12uH&qJ3(0<>R;fwmfu7`P%8}#Ao&%&nLMb4w;&`Yt*fN(TRa!nvZ_d9Srr2z;=QB zz~ozhZE}vKKAQg3QLpA*>*(FUy8i!GrxBN*h91Jd=vEVv? z(gtfA8M?=0Ew-}phdT4dIQmmXX(!q-`;z+vuB=Mho+&Avi=?yMg75-SO_~1_*!v^! ziXYi=*B-8&ig^{z6j-DBT=LI7eDK~Hs3Ec6`Z%4%;RU5HoaC%BTt|8$aL+oh*NXj4 zKYZWI3y{z9xbE8-Lo=cFI?41&p<{YhAavoT7Jq6%5uQpcK2+C_T!14>m9|srPm&Dq z{ty3t)&Bkr^E_ZJwg+_S69)A1&Zi%hL}gTxF7AfI?lTUcGK1uK%XqBP(tT_{+s}g# z!fCdna^I{J4tJaez0YfVe(y)X#QB+#-_B7Cs=77(@8@Bk1PurSoc!ibRu6=lygpEo zb~jWRV>)fLxMaq5A6Ne;!^Xz(MUQj*^RARzn@z?0d!gwAS3k5Fn2+TqGLh}~aHM;o z(UX$7W;=Vo)n@*P$c1w^`+iMbPaG|ydJC1P*XuI1*>r;zW_(8;l^C|7>dsCuZuIb; z9!&ztvRC=_sS`;(9Zt|AO4y_>5mN2VrtwAD7CY0?}e zE+e-)ejjeO;rs!M$l-oyvwIIoXC1tEAGl`n1-4@y(r}?O=jG!Bt5$tHSKP@xWn{YY z_(#^*R+m8wqNqs+k6`#?ep}l6&lTtX$f!WS_=ax4Zr=l^ZY&v)kSl(t2a6BOt^TV^ zz`hth$z3vD>-8pU4WQFXY4Dg(e@uzpjZ2ptL|2!?Z#Q*+k{46a?TeytD!J0cOq4y| zQPci@=wi=b7jPGQML2u1?0KA1%y>gQ3j?%wTMA77Z4*MqM7*L(TwZ2%e|S7&9wV!} zlRS}>aC2Xh%l1DHEbt3I&OXw`mHS@{C{+n)dJngN+ulY7yy`@|%*9#{?QbUU6`Y&; z*s*!j;m|+H4o}!U!2#d@NYm*za;d6gdt&!Oug|^5*;SQxoz&64eZ;P#*#nQ4x5th% z;5{~zH3yRLIgOI+a|5_$R4I8mMs|%<+HBuD+9)BTSYZlLb@la!^6SRUj$!+FZI`e` zrZ3Kz9z6N=Qd}|m<7H-<=;6So-u2b)DXMK%sYw|=K61Bo7*a&C%>4Y)!<+jUiHJv5 zy}dh*ar&6r(D8>XbvmipdF4>mgAmhv-`+dR|4q<@ex3ZVP2NJOVD?g|YTe5NWbL>^ z_Jff?w-Gy;K7On8P5#>0+=a6lx~(SVIO)y0u^oAM>A=wQo`#=WF9?69sWwzg{-^N7 zQpv)Rn4a6=twt{6Jb>>~$290LwCcyFBiN24$)fp*eqU%*p^wA;k1v}IsMm&*9+Z}g zkM~_F=>y+J$V)gU>k=QNzwzNsdaWHlI8LstD!4xq=lEn`?N!a0^MvAiMZ^wkddW(m z$Bm1TrwY_E$cVA_CZBx#=0-~S z*v7t3=D?ML$6 zXYZMvz8bzW&Z+Sr$lWRcMtcU%a{6MkzMa42H&zzV{M&~I-tpG&BJ7x2XFJX`9bCY| z{Y|Z$`^*k{jXyZQYNpsmiFSsr^?lTxd_1u~K+wGs-F8^8$hX+DeD)txzpuOgH9xFqb_@PUVC;^N9(Fr~n=Q9fkb*i4VpPX8px%UUYeS?K&jD%I$ zGw=3a!!*6aoll-k_c5^P`^xz~@^!4FU-bT&Q(0R3{YnQrLzxr{o7U`9M%b>UPwZnA z%Y7;~_0rP0vXjOmJ2$?eYvbBj7k(CGjJphfbNqY({ydaZd9E|L&wRivLM6)NmjVHi-jC|w`#Ol|%-VDkp- z7Scx-?2W6gK67EfE``YY7Y;l`a(SNMFlOX$8as`+080Qim#0GNmS#5`paBsp$;~eX zrC&`r2cxPIL%aX&rCGGN_sveoPXs&l`da@o?!11kN)a+)R-Jx8v&+*h`E-l>tl&Dm z)U)S-pr`)o@9q=Q?t}mPFlJQ(JNQY;VMGSlJ!vYsbFk^(()|_3RT6S!2lM*$rNe&U ztiyk7b+pP1)=osRLnDTaEBsQH!~Rj)zAs2?XbF3*9R0MlwmOqJUKY^P)@D@h6JW-D zSlArz{?9Re>#X6b_vo~U*}ST^j@N5$z{EA&H|=+feIryV*|Wk9l_mE_<)koA92%H- zK{YJV`>2OxPlGKNk)_j*JO1tO?9rD^rhl6!h3|as{v_JE_@$TsalzD7+>8UZq=YO7iJW5$A%sTQ<_{g28n0Wgwe*OK2 zGYepU+=V+hnOF>$7 z{PD1*xX_0w;VRnMZZ(yEuW`g5Svqwy_+A74%c*@C7YFpQH`{ z6Z+Hmfm4LF(?5=lWZzf5?L>sQ`JZS_cy0Y?!5(aos{Id@bIC?s>cZ~5dVcM{G*8Sx z6}$&g>#i%yGWZyG?*i#-dcY6uAOSbo?ko}Sw)+byGwhzwM zoc2{Sxrv^GORZC%cG&mU8Y zK6OsVFi$^{5L>?8Sv|sW&kxULZ$v(NziZwCV}A2pq`$V=_S&h$fX0V`L&YO~k>5X^ z`Ke<~d!4q3E)&D;VMYh`aoUS9#T-joJlDZ2`$zg!|F4meM}Mt3L^pKinrLPmJ*4%w zJH8!T{B=1*gu(DXe!AwF!M_l29QoH@j?p&ny>X{qDn7nQu?NI^(F~j(twHm!Ivpn}-#ZG_1| z-^U$4FGp8|occNC{oPyWqX}2vUHfnRNo%=n>J^WH_G9DDP!cEmen~L zvP^4b_OGVjrl(Gw`_McV+cvR*!FimG0>CN z0W)H^>PJmoNSZe4oU9wa@`}^_Hp!0~rC#PiKJAwzcq4*^2)Z)CX z^iYO*j40mQthH4y`B=rb-^ZR3LlY-Xo?bd)ZBkVR=yPndJ9kFlaJa9%6gk_se|(3l zW!3DqI8)s4;0h$2hrfq%c1UUM@&5k)?mWh&?tT%1_?PvaxDO|vj~>6iQt7mZSEz*& z+;ty|zS+0Cb}5>17~9$_>J^q>%lP@T)6sD7Jhl0K`A?@OYx;~U{ylo7d(r}a|Ksbe zUe(SR`Qh}b< z)E%jlw)>8ElU^U4n3Uh&d~&frmRg=?Tie}VV_pywwB|}JPyrP#rVuFls8u0U@962p(P8IWv3Z- z(uUr|uR-ORKh~&e4NadX!u5Wod18L`UAq3lv-rg2vSiIqJyFrU@E_;qFQP-E4v7R3 z&xo5=*DwYX=ajCm$9$(8oL}yPv5>N9of9}AKa{k-^tRJ*0Pm51H^rfe{#wzuJu@Lsvft|3;{8YvZSAqYPc64wdXj!JJ@V3};JQFygIqz$O>v@3IKorjvve(zaI*?$&ZOIiGKxeAFI}+T}lIgzoK(R zum3eBl-dR9>#g~HM!L9D7QVO=e{a^;iCVwsETe zKnToj7(w~#;Khf-fCK(_{`bkmUmZE`^ISY2DVsL?p_1h0pDUT_10VF~aqR#7@eAXCXKd&Ga|2Zl z{Y6s_%Nf>}rVe86T=^x|28IBnLpqYay_iY=ToU!pkM}O_h2j1y>WtGle@5SU4)LA% zXDNMs-odJsDLN4u>bm4#8N5CadgaA~!}xOR$-|LR7t+UjcosDMuBOnV0ne)PzP?I* zSJ!8j0G+@uF*mpAwFP|#M*Uiwmk#g$AbyJablt&wbHkNuar4S*-SN#LQBDwE#|qp4 zbCGLM69V#bfgG~Bm}u1m$hFssRLOw|wnQ5-ky`?Sk#<8=vRjcpf)it&0x5>(KtQ`&9-;uz|6O zam+4~I)@X}D~aZe8c2f$v$tx3b8z`G0ujyut3%oRT-k1Idu^CNb5RQi zVCRkcMF$4@0DNN9n@~UkOAnuDOVefQdQ~trSo$#QkkLTHG8Tde!6SFuYl!rnd`+#I zU_us0N1~&XLeq;*(L$;Z5;dqn-v9qt_0D0Y@SGHF zYo=8T$2;1HslQqXBYBDHm?VlZ5-*08%&GJ*8Y~fj6AXhG+V3LL)W(VWR$%RRA3Kn~ zBoqNsx5Ef^=`;zWy{Chq>hk_3z*f`-S1e#Uz zTtF^XWvXTJ@*yUw^8wLpBY++w2k8q9qHD91Ky|iS02`bCRB0%L(FAI&{F*WOZq;K3 zuvP9dTt%W9(^jPW|0sGFN2dS({l91>RL({!W=?a+rb&vNR&ySPP2{jS&h(-;l0zlO zIn5X*%DG`q!yKcSWlo(m$LgI^uZZ3eK1ko+^Y zDw5=chBeLLP;ICY{|KPNYD1H4hBc2ZU}>UeEGW}VPE#n!SxiXN8h>iU+*jsdl?GEn z`>3xm#by8np@plYLyaetm59!YG{CJb%0F#@;U-ckS~AX8tYxUHXg^IejLf527)b=H z4J{hbf=T0{$cM5gGqpu*9yariE2G_x=wzswX$vW)AR?2vD%_J8Bc={C18QnPItizo zhb9xL3rAdi&nT#9AHj>115aH#Q_f6qob8|@?qhE$anxEEL6r-!%zW56fO-U1cC)^m zUo=dBa+B}T>P4zlkRrf~X^j*|JLn70Y#>0u@P$9Z3G=8$$!J+Btt=*#b{qjhH=hRH zV4rfFB~w~KsFR9J9K_>AfG-_p;iicNokX4#24f#$p_oaLmdH|@Jlv6RU_jWT?ai!; zQBn-_4 z59lP)+@utX2Q1QqOE36=MGELUm5)2|5QB8c) zxtxb_V?=djD_?DqvNF+Q#siVcB$do0AP^xW5IcetmLyU!c;KUGhSO@A<;;+POIwr8 zCO`q3pE{IQXNiX{SWXHY5EY-;Xm)c6(d1rTE>SdT+vt%I-)M*nm0{7&0h9J20g^-s zPkOWng|A9B80Xp%Mfor=?Zhz1LK-XqWEL&TkD@PPUgc#bF-4C}is&RYn9-?R`cXG) zz$LrKEuMg~lGcFe$uXWi5tx`Kac;-(kU_k1h@LvT*}497xruL#12mUtG$>I_<%QVW zS%8NDBbh|R@Nu@J1qptVCCs$5vp%iDbI>=Ua+8U9Y0?;+H4ezUqa%e{0)YLx42U8f zLrle-vd}uog&p(q_Zx1O(-HA0qSmS-P!F&88cnJrz)9*=Q#fhB)n=1a^bD#Yk%t%d zBmzDq1CE8AHHb$zB2XL=I5JdlQaV%t$XrU1)=q~^C=-Oi=HN21Yd#>Na`_QK6{uDX zm^xxYWgrWwii+?__(Qg2OuMDT=3AC1P037b{v|v~P7{d6{ok)5ni~qZEKR-~nhnHk z7S+`ewxQIzAvy3%ra2fq0wj#(j&X&h72wdZF{rbaNLiII9;z)OT6Vm7Qrz6vcu+x| zebmKcz@Y{zbJV8}ElN^hHp`?S(ve7!NSfmRWf(?eRfe3Q7JrYQ?(FVC#hdt8 zFJ^*}bSmXY%ZTh52`*3K^9Koed;uR0N_>SDdRT>ggyH6!y@QF0$)R%1@SsGh(U=)e z2=J#0$qH2oE1XtIl*YrrPg}vqO`A0^#g7Eohg!;I?ds51Spk->AtUk7mX3gl^$_}_+Koby1Edp znvfl;rnxO5Xe%zHx*mfW3B1&Zkk5yQ2V>-9Ycp>uOHO7FpK&Uh)ylFmC4(65jIcPx$(4QYaPuP1E6P0=GHs#mD+>&g&aqK_)@z zS-hK|F{uv%FqP-lw#-Ew7A+zvwHq5m+J@++G*~OmoBit6&X12%R|0RJ(R$#~s2i*; z7z(jjaGY^Z5BM<8EE=G^V3&JG@E42X$AO3t-lJH68nPb`kB%)$e7yfri3F1TZ?3=n z{k4UA6Rlf&KYh0b18+(DYGGv*ZK)Xd=u@1adXL$98B9aHV{z7H@x+iXSIdh%=KO~1 zHvs|+!=dc?jOMhpWD0+8ZL1r|Wc~TuwS_Z~z@SFuP+Ov%Bjdi|<#C;r;sMdA28U*8Jf*GyeY5b<1>f7yP2>Y^$OmQdmL! z{zh;jKl4d}-W=mD`5*kj+R(Q^iNjN^|C^rkco!6zc)T23F@daI6!%T2fvhu!;DuWr zUip^M`dqm}aQBGRXaBEhnoT-l!I*}4OMDh5&S1Ra`@W!SeV$$A1b5!^`HC+)r^s26uq@UBme^a^`8f!3mNjwFe3HT@c% zn0{B>NpRI_MEkBubTk4g$p}gqA&_s}`S8!SQE%%5FK}l{C!E8~$$FBX%|wT%+{pc| zOcB#2*3@(oBuhMcHE%s+4!-DAC*~o(tA%cJ)WCP|qL>;32y4LPhJ(4oj@%iu=V*x7OFvE77Bu;v70trP|>|6xa|v ze{XVl4VA-celtHF@iB2_3r>wN1t;wC8MEj9Yet&pbpvqCFc?Ognkd@l7&PZT(~WGqbD;>)w3t@(#^ET$fs$w#HxeH-A&(`vcT87a*cpr*k!^^W_56O~)U;FRG z-&RlmV~6(q#l&=8yC&UuyZ7<;wLg19QLeH6J}>;XfyID#{mIj><7+9i=OfvjtfnP6 zy;6o-p{bf)QqBa<>40IPv+5@H`)9TOPV2V9FcT zl!Qz!6&0S=BcwiG&o}3=`>!p1Dy9Z?oIUR;rnoXX?A4m^$sn*is=r~I^YcHv6OFB= z&8uN=@vg>WnXdGvsUK^7pY@&A@C)v924ZIeILx{PM&KIfWua)Bm8;z1Nw(tCurBF* z)k2BA72HL>@nIWgE0o&^?fl%SgW@s;%$mQgpK9K>$c5{UG3NH*zbZuQH?eE5%HOr# zmP>N4F|-RDg&%YxVQ)e10~@fqz$a{`uB(dVd@v3B*w;XHgQ%b$|>Wrm`|on&kq7bUk(mR0du9EY*vWRLeX zhL`^@@81h5Kne89?upD7*?lYj`e}yu8R05Coda;5Nw;k>dIw9R{%1(e?SPkOW`DSO zJjBB<@aGqw1k#K3JvAg7MhQ)EY)pGRY-Ji$!drJ%y*A6dFR|dDljXUbgv9_iv0dfG z{#L&&%Hd^NB+}(aeq8QzA9JT^!-!3~eZ23+&i-K%$T(DO_-zIG(^!vG{%il)To#(P z03%@H4TnvjJb}L2t^WnCa;t)m_o+Ng(B`J|W13{a4^&>hlQ7r{kTm|%%nd$(NH6qh z)=}=$vViqCLn2P|Ez6`U9~EtI!W%(xEUE^}F0YVOIaRpBdnJ*700-+DJSIRjwF_6a z8`^Lfo8xO$9og1>Gh)N2kS{5N3z!d{e;G3mhrC*wqw+dC-}n4ePmR?=XN|7N)&Gks zm%T(l03Fe~9;*^g)s>19V^_uQAi#fHrDbKEKu(I2Yp&n=iGL;%p0kwLvzlcsR<#e&~>x9ZAQe0?TM_|FSoB#|67+vnfG8em_9H}>)hBGNmwtK^7zB}=XWX9 zDA)PUnr<7zXbzPW+XD2R*sJ{)O}u5fZqAh>{Vz9uv*ODOUCx=MJzZA(Kg8^vvq|0t zDWeIg{Wb>y=+gbjL8u3aX7$DHLV1Uauf|kR=ilP5Now=9hbjkZlOz>Fe*LJcb9>SJ zodR4q^Y3$qnJOoxL$l7gZTVpIpB%=Qd?97Y3nbdy%E0*fd0~cb*$so4B3~9fadXmD z`PcKcdmlJ|rgK(!@Eo#5lt=aeG5G=Ry_z? zlitKp+!<7eAIh@U$u8HP0qqw^1f!bZXZQzic}v6`yo$n=@Jh7J4U$}|&Mo+vB7)3% zm6Mm2hf(gW5aZQbO>2n%dq(B$1vb}6c=_Wfm)d2ebxrMW>iVPhN|J6;cdPCb-yL=R zUF$_ZCQkTl;{ru~ZhpOWV8GDB)Wt(6Cb{y>i51%4T|V3nZBUHSc-s`Z78Jg#oy8K| zE1H-yY!f>_+cfSzLdsR*{O!D7{AlZjv6F(s7W6}?9B1ZEBbaV9qQAGdv8(Z;#ZjTD z;HReMxjEHs%tEFU7FXw|S@i?iwtzxeouZl1_F^(01lXZa=c^sA=jpmg9~Yq-=l8b6oUKA&2Q_eSCE{Kp0RhKL`Y1>G!bJL z7~{Ke&U`4M&YWAsJy0&6-p!{$kv_4}@ zI9W!_ zxnBG&MowSax81j@+Bdg-Bn_^+p0V+%_?N!FHKw-)o_W5_$**`chWc8Y$mQz3u#Fb? zAA7@j6ZO}TT@sX`>}E6As2<*5{I|`eDXx1c<4i*v*;}y8NJV@OxEr+IbNYvibW9Y3 zA7J?B6#{6uO8z@qZV469+h1!C4T<}e)xPxva^+0x&Ih+@KNz;b_a?V)A)Xv?%6q-_ z{Qvbt8~gawexw==d>u_|a$%mce{9-L_|}DWXx$aHZzTSj9thQnd}Vd(89v)s@cbLD z5Dz=KQnuqDk@tHm26n!u5^wWf`C+z%Z`K{62HoN?Vo7xZW9&YdNleehp< z>on||wg5{8xSHP9vbZPH_n>9v`DqC2bWiJEEUfL64Rd;D9qk@HN0+U+)Pn;X2e;5E?t ztcL-1YdPV&)X^X*>-98-NuWd1uT$?{FoianLDMc4{a?&FW;_Zz6pC1%)8LaYq_>2~sb z3_;Q&Rf0t^8GSv6YuBTWXXfb5c18Q$LXGhe1S`OU-T|);EJ#im(R@8IeZ}pBD=+-A zwymP`hUd-Ttscf*2KB6Uy30=fwUBj|Ze4gj4};TuK2u}!+A+ne)7K8WTo^Q zulxC%E_Zhu{}eA$X4w&g|8#H8zL~T8s6Udtr6(LZ-{RRWw*3l(3z$pHqRW~e+TBzQ zn~pE^$f`!d2DJjZ8M_GZ=JfNW#7K{CpU3|ENu4`N{sj@JrMpdm-x7+3;?@uK8k5(g zKINq|Ch&lNke86_ef~%9j7!3!IKOa;?xmI-e%8)0j$Be7p7eYDZ|KUcaA$2vtm_q< zvVK~N&x$9AQr}zCy~qsga#)?$VQQx7`04ok&C9uPwIxvRJ>1QJEt;10LxkYc+?w~P zghZ6GcjQ z-&VWIeKEvGvH3>=Lr!?EjeoM2O?luvKErc{vUrS_t*Ou=`F%$(L=Y zr$o>Bd8^N>V{iw#!epktO}rW%ALy&P zrnwl@{>75!oFamr=qEmXzxju;UXOh0YlWTDx@8j7s>`i)zi#+;{|5Vp|J-~3+!L>3 zi?#yV|1avlH*vWT54muaPl-8C=RxMy98Oo@F~MFQq}1YdL8XQw2C=g)=7{VCW{pdh z`)!iw;mpmSns?jqw~+N)i{ci|Jn(*MtX-!+)hXyu`|K6N1(M7mOe6laQ z5_kTe?>z_B6WFP%XY!P!rwVb9SqZ7oYYUg&GUm?AoFZ^D+)vl|dn0;sBIJ4F@pGAf z1~Nu;JRtv9xMsDdYCn=)M(|ee-v0Vero_)5uZGkv0p!BTJ9=vprKP)}@Aj9onmH#F z3pM{&_!EB>j$CuwQQ!UX;oGkN@fSfQmru>sC)Xin{B5#p5~k%%#41;^`;?YSQ_2rc z+HxsGW`<4W0@fiO*5oX9)9cRI%CjbkCEw8ifLLcA-Qi>v)8T@vBRKDru9wEe#dR%D z-}f*cRoxcju#&O7B(ZFt+#dE_?gH2cx1*Df_c%qk3J5G?jV5aVC#mgHvJIr zeTzSm?>{)N4M>m=qeAcJNc=h$FB0r-pOD+>ZZh^_&aY!XTBRnTRWZWKM0H2%UA+aQ zL+HL#qtycgb#D`K(yr0%?!>G&)Skd)OON3}@Zpb+Ez=*m=Lgh@9cQ~9R6APAo^B(Z z|E8z;Ayg`%d&7~Yalvcm0_~D!UswJmo7A<0wj`M4igg#!Ia1_)QPO{@$!O6sAh!%J z0zhT}zd<1w7fEF;U^7mph4>7Y!3NXi(AW$poGUV?u7XECaz3vwKnVlq^Dw3G({P{W zE{kl04IhRMIj(Fqcch8wD)F#LjzGHA<- zA$m|3EEy}-nU7v!WR9Yx@xq8_vEfHB&N>*#aI;Y!*ba?SuE>+JVOpM7_otQB9nG8q z@@Astpoi`f6BuMMt9U30*DQx_E}}JK@Y)zI?6?Xy&)NDXK!3Mru|Qdg(xLL1wOqQ0 z&S|Kr9r3hDI2;NylVO$kTIH?WW7_#n%B+Ean7oQJTNdwJE(k$AN|Vje5?yFguscoQ}*d zVdqE{T*QjSW}d>r*#fr^Y=*NC6;q4D!cXR;yMVE@GBFiEEmj&m8Yn>#lYxs9fuL+H zyjJGn=Ex)+BS6vo?9qyRTxW_trm@UEQke~q$d2pt%!w&*%WvZ=N@iBM3II9`rfC*I zi+hO_pdhsuLg~thIz|JEsaa?eh_l|w$2B*?eJmv+1X_3log>%KnL^EJFEL0c-pSnD zhh0;(8t@T6DFo0_A7w7M&DwP+$N^BgbJmN(hE%5C(Hl0&aW>mHAU26RTi~Y5<^6rREc z7x|m4BLP{Ib=G%*Aa^gRRyh5Hn1%wVHXT5y$hn1Biq|xY4oVyw#f8WlK#RD<$wjz& zRjxJ>RH;lZuf+p$CoDwNZdqL&J|~*Qvl%WEMTt}u7blL9v|W0~axFq4R&$5H;vq-8LqETj2jH_@nXTCqnMj)3{i2ET0)Q-#fo#J3YPQ}wE*IWb-LLg z7;}OH9?bk`eJixonq@HJU^M7?@82SNt{KzM&rjFVHHin#U0xJcq1ANmy?6;|Wx|;t;{ovEnJWe$$k0)nmqtZ{*=#>-t70FxEzv@}^|_NnL6U7a?`zKo*xe5aUD zqnEmDm$2^D<^`qjRGchD=#}*oAA+Pf*pQl0Qay?k4^W4pPK~|9u&OoPZszk)MFWxI zD3FXx?NwVi5MmYpQr&{v86D9fSt92pE>fJ!c57GG8O<8Tf=X-#8gw$`xr|X^T;+(R zv;wQbYfQyC%MK8`;1A14P8VEyn@^ort}-6;smR7K*`kyTyi6^|xqG;sVvcxK&BG$8 zuVu-LP~Hgz%!2cb6*BohmL;PPt1yje=_gn3(^u~e^fL}b`&nb;^U>;8gLrY6D;Y0c z>1O+t@RNMW-v|Epj583+B1_)+%X&W8sF8eqM& z3(YlF2YM!Ldlw5@H^W6IOJ~LQYL%*kT!Z(Fic|#scejf#LtXzA{u8%~|87X?e}3;K zTf<|*?9XeW@}CLEF`k%|hJyR7G{c>Y4FMO;1*o+#563PCL3B=^_9xRS_D7iU(MF5| zeT--3{s+PR|1x#9lM<*)`8(2HeVi_2J&Aqg_eJL(qO2>bW+`4(N>{!0L+xuMb1`iF zYN))TA2xiFOTRcWgVgTrzkhVgCS0y}(z4+k;bsZ?6~xNqeo;cz_4D3Ry`wI=8PaRU z0zcCE7M4SGddVYl#T8?7LbyORGYBzUt(p(V}^!p6U&?u1(Z$WyJQX*K6f|2A6l`{>1YfmoAdb z*$~y$^P5i^GS<|HH;itpn~;!K@}?$y-Coz_VuMk;Q+g2>h(kep^lwv7tl}G5o~6-E+DFV}&lExG>SZaMhwX2&dYBYX-_IiN&?JwLk4#D6O-hB=`$T$U11`UK8JCr{F@>mEXugTA(0=kQT0E?oK>r zzx}yp%Rpq-(FSfTfW`mQ=32Q@|M^=?Sy>j0c>~T}@O$o&E=YT;wZ9iLRa;#K!o+g5I)au71)1h6w0kuhocK9?u3-D&-}O8vr$%(vXa5UO&_}UcV=#S%+09} zoNTaqW~(3~St+pLL25m|)cnxm*GfZLb&cPhYrl{%(R=SoXvD}ix5a7^|DaK`y;tuS zy#Fc@UZK{E%Hgc-1ObFErQGZGon=fKRxbaEn8RRL&v)0!sTI>=S&yVhw!wz29j5BTBr!D6T=k<5jJFV%2i24R*;X$Y| zZCbH3Bs?E-9<8$0-F=+;XY(M={!rI=>X_bMRFbuts9*bxQZ5KJBW2SAvz>>VsGYM9 z`S4w|ZJ|+4r?hW5>hGxhO6}HVt%VD%J2rsc^{l~nTg&C@#O{lWpT-5-3wGA)iGs1i z+=dV%0i0(oEwNW9h0f3k2W@<*Wnhd-W<#UqL)hh#K}3kB--44w8{{TUtiOYib9-~N zZ$8oJXE2N;caf8ELTkv8vtFV4Bt__~R3>F1;Qj*aZ<~w}Jx8liJ2NWV$tgwce)HlE z=Z^%G`Ql3Kq?Pijc(KFc+U=E^%W;A&bo3RA950y2xVL8C_>xWpH2aA`;k*F6lTO z1S2An`|9z}Ycq8$qrVgWs5=aJ@yc}LTW%A-ODou&7T7qEkkr#zJNe^LXqZ)~tDkuj zT5Iod+6}gJ*Zr?S5jG3)tt`h+eVr8@FjXQ{St}O7P)A5rTqIuU@VitI5#^QYxBfQ4 z*3Reoo3)AM7s-CN@cTbv&n7cdqmPaZPt0&CWW^%tmg_?pAwlJe=R;-O3WjCk_B1;) z{GNG6Vy@Z8|8tEx9r0DM(9}|5#L1J?aK)j##+CAgt+4G}zds$mfg`LE(w1O`jGwp5 z*!RY+GP=A=n?AMHO37<(YHe;RSq~WKeYW^n;+s|VyKc{TyTR&PBQIZOypRe(4~I>A z=-r*l){q+BYCV@6a@*!=Y}^C&Sh34$M$1ECsJw=^O={Ac8{rKr1OS_d=g(((!ovNM@WzmOp3sX-s6?z|dy2Uqga@r`~wAQt>-#2P6-%)3b{ddFi)qmB(Y*A}lfV;js zZCkbJj@QOaZcL55tpCo)pu~aR+PUA)Ra8Y( zc(d11BHY6~^EG@CH?j{eva{!9kjy%YQeahx{Q&8t=dvhg*w3yrK;>sgy2}F-#}bp@ zZHvcPbdLWUWouR2p~8SRzrQ_qA=xuml9hs9wB25YuX(ixabSF@+{$sn9bbf{*MWaA zyps6(nSV)SsV^C9=r!H`pxTm6R4yFNCGTw`jaTqlwxl^L4%gfyIu?>veez!`7P=Of zw1TS)h2}m!o}8t%nQ(~Q%lQu4anN(J2YZ#1wPn4c&v@24(y+KTwZV(q^ou&WN26>m zQiV+JeD6Urw#?#I{M_OO<%|jiMfn!h3XGyTmxld!u4jr%{WG05|Bk$9bA{bc!o&p3 zb@>Y9pAo-=0=1l3v3dL~=h}3Mb9tE+TxgQ>ZsR+|xT6VBHh{J%F9wq%l~ZSQ%-YljAeA;HK2a8Li5`0* zrF`4l1oXRv`Qe7^_kRtWZMbU*z;BAj?U(cPzBZkB`qNMm*>JvxO*$HE8er-`48Ewh zt`b9R4K-Xb_!oOqUA_Nmo^217idVk777-9$JfXk1KTV#m=KTG1xi->3N8V$Re5gA$ z_bA*nwRn8fepqB$NzJ}d4U z{yRkax582LM!tqXg60vaLw?%R4kV?gbcwWOl3KD;y z_vPxeW7A!z)QkPa=C-EQuEO)JMsK4TDIXN-16p`}4lq6C1T=u_NUB_-}KMjp%Ddym2krhV4&u4z1VAooU+*O0c}?tg`D%PUzac zf1IJc^8Xx=OoDep@^=1nt;aTTx%_jFPPjKWi9{z1N6N@6)rM8uo`2!iV;A9SQHQ$7pdRd;h zbRT;yKVc>Cvd5z9kM&e1Gn(!Nq8PSGeI9|5jaI8h91E`q-|N>W$jUcpom5VB@<&P# zRd(i8TM^AGy7Z!jwl(X_H&-9`iY*~V{ccO&R#)V@^0N-*v>l&Auh7~Lr_Tj_Bpf?D z>85e@(_s3QGf&quL?^AZMd=2jI?LrxS473|SHo8wq19qZ?yV~g7s{=8GS7>fGE`=c zIeIqTC7)lPwo)71ROrCKc{Ov0l>j{aS%>{+jfu@iuC(ej&5DS@6>xI(;fOy+kffB* zZS=9qKJ=HyNr}m(DJkjF_`D;S^}*Vl+f_j~3f|w+TTxd|?P!EBGjZiT;SK(ump-m* zEVceVGk!v<1@p|H;dlh<2jb0}nYosZ+Qx1V&}c-4XNcH!NNVOcrFj_AHb}K}XOUei zk?GJDO-*O&+9W@?8*YV~tjkdHH-1v;*@Snsua=f7UGdoqEusw(_K(^5^XI#-E!u=w zIScH`%7Tl^A#VJ!f(+L!G5^T)14ex0e(;aW1+if%f;i6)*t%3PxSEp*ELgFFo4h*-~n5xS98ix9%DE94}NOC((acv zrte1)UH_){YrYoeb_+L}?=-NA#Wf?Iq|_K(Ryyl=d*h&UA*yYaV6mt9l);Yyd5`ZL zM@TFdZvXK;ZJhCan_s~B2zwi4qkCH;76~6Mjl8%iv0;1i$&Ysq8hTD!iUupLRVYCd zm~_MFQ9}&y1kx271h4T2-63!}4Xt-6niDNHt60z~NxVTI%VRf`rH?p;dn3*bdkrAW zzP-=5|8B3XQQO<_$9C(=TwXKhP{_@+*JAo_=NUDt>xLi8*ET#Rb2=veUGnH6!Lw)J z@=|^CH%|@lG0dvv52ts2nheB*I)^kBB3gbso%V^xe9BhA-Z_S#Q3C1||(^j7j zHgpfgzrVFf-qVrcsK!QJ9d<*eu$SVf2mYclTrJnb-+Fzyj%5Q7tDbtZ#Yx>6janr_3IT>e0xZp(#-|{yx_Qb#BF&gBOiEpSg2Cyq3deG^%^Ns9H;OE^>^WaXt zUV!BpiLrcZOyvVlpXV##S0Xz9O{AO&Q}hM7E-x0Pb#9xsdX74I@q1c$iEo3s%t2?{GqMu_<$+c>;oj*tQ;-8DKb(2DGhPl z+D2EX=us08`Od2)J2!bG?XT{31o+QvC?tN3P%cRt_S1n&76E3w<@Q3@NaCQNPPfcL za{H49)OV$QV%%aLP&EHz?%9+-#?EB1aCLt>7tY^OM*t)76@%1p3viVv=INKOzlrj@ z?$U+Ikkin$m^;yAz~vhMzN;q5P+r(SyrH%@bw9AfeH{Cv zEUhKyCu=1}46S`dBr!!LtBjb>nR?=fn5A&nr@qB^XPC|uc?Y`~T&WI!(so(0{o({z z$)a^?3vT3bb31?MQGD+}?WC!ti?gZ}V5r*DUMo24BK%Hg!_=>ewg0A1!galDdqTU; zAba2*F1^WLpog^Ob0wcV1AANk1)>i9~|k4~G^&mS7= zJ0=gbofKW1PqjGhJ-h-5xdoSr-8fr_(|LferQAB}Fw5oLm%ClpXQ(nmlZ=`so=o$N zW3f+ejmo%+Ta(WmD|$WIIv35jTJ0Tt-RvND*znUE##D!QqlU)Hod1cHx1F|yep552TTx_s7)8D6VB%(&9_RV7;s@B6{fBFY^~+y??L66oPipooRzs(yYOqt z5lPY-9P}*+kOjiIX{;h`(OgR@5@gImXcbi)8$_{maU#M*HCwK+%d>zpq9>sMez_G7 z&tii{WMv6JGY_1$! zvXw-ZrzM!6jrCuq)+(vs2Y1wE0q;smg>;jD;{=rjO+DTAJ~!jmB*Ln54+CIlbz^Rj48P;}dTn5q7%*~Djr zy@cy(I4)Cs#aGke(sTCKniL1D-=@|Z<0RKE84X{lp`alvTEn$Di&0zE9?zm@WI07D z(D1ehf-X;cU_&y(_2WEL4t*WVG9?x6gM@AV%vMTKFDz6nK3Unj4PuV-Im*YI^<(15_=flh<+QG}Kem(;&%Xz@B4NG$y zS=#f6=O8$SDqGz_jL{W3**`ny2y`H^feHU=_ls`v@3(2l_k#<>cSwHEKvMlo} zMI8Z8eBD-OiY(#-mm5QTMQPQ=&9>qh7B7M`gI!!a{Y-rB>YPB#ZzFk;3+2puu?FWd zcg2S_&!e6w@XlFAB?kb1rMNkpvddz2dz4@mW|&>LILdI;Pyk83@sU>Tqj~=gzKNXi zih`}v7TC(lhkZR2(5ye#aN~Vic_w*|$gNPDcg}D%!*E2(GcE0kKX#@i)-q)qIg82J z>MrW$5>8%iH;8PY+(3LxUl8EDhrVhbi1@X!rTDg+Kx>pAtGE)PEt1AV)wflEpV*uU zon&#_$~|^{N!FMEw1>@tmFzA2&WzuBPu^6zzU+-7nE}&oSeVhIl@kTfdpSM6xa>I) z^PD65$%udF2p#f>jJ>Pni53(I*iL?aZ1n&us{lyy*sBqg3~!bIKN}N@EWVCtwR%EZ z4jmd{!$WY(^nTAdy@R5`vtjx}&lP6Oi|l43Zu7GtU)01$WiaXKPamzTX*I~0dpLS+ zYj+@ZPq)giY%%f*UitLc3Vr?{zXS_uv)1IPwE`w2a2@w|a=V~I6*1K4iA3sIj=M`O z*%cHhTjek?Q%#7KnR8(jzLV9!d^;q?kib9|8701v2CEiKE&?-dGi*pkT1Bx-=2lXd z!r7kpHW6~J4RtW6cswdO(D^HNxy*-uYZOJ%caf51IxK_ab${h!@oO;K?d#Ch=Myz1 zb_m+$q#*Jz8B4exQ#4uY++E1w{z>IODCcr&9m*$-7A{z5mhXz(x|LsKqNrS3pisDG z*JeBcAHC6;6eDu0dLX;qM93OSxkz+me1FsP^!M)e;X$j6Z74vXTMQ)ov{>0ujkz-G zf4hO>9l0#~Nb!7aNs99qGTd3qW3XfEN$&@H)Nvc)wSvc6;%Z$Pds7}FzZ!fClGj@@ zw9k5sy}Yh|l39(esquFf)OuO7#_tS^F+v>t_b$bTwYcjN5Yyp%h5zB>t+p=;nqxO+ zxKzR5N}cg!P50D>W$ooKO5fkX3pSYbX76Xsjqlxd26;Dw>kHha%AMOcrwB0?o=72WIsMval)%h z2vV1=Ba+%waDf9>pZ8dh&9v>jQ33ut-nH-araCa<|CUFx2g2bx(!OG3-9IChpYdrx z_Ig`mg;a}so_+G*@_!8U4i0)wtA!-YhWgfC6r@ic&P^rcE)d+CDUip%3xDgBl!T}R zNae#*U+6@|ngq04qA$9piRzUfvx=lo56);CTCLsGI~o5yZD6S4q&?rxa_nO7!U{_u>hu=hd}i-ECW28xr#O=!z-T zc9y*OC#9FeV-hw7MW?G2L}HXlicXI4V&=c|-|wJG3X)sck)N7E|E#eJKDUd9R+k(w zhCEj#FAe4)K8zBoJv=<)-v-cZvV}70Pv5XuOGyu{zf|@awUZk8wz)T%FeI!HfjE z{SBOsZgtSFKDCVFZF`HzCuE=2H*BKA<*fz9MWpMsKxoek{+6eOXS|rrio}KYC(>dX z`7Bobx2{Y%|J~DlcZy`QS}d3Tq#}ZzcH+U8#e!BzU&>$P;R)Ww-|p5@H0y8XRkSk+ zyJ6dNB_UtN9qwwIgLtxXw)ui^>bLw#nKdidFekD%_-tMmlJ($vXy4}RJm;q`_u$$r zl1uGeMxiQMj)^k&qZ_514Jc1;agTcusKA?eAEFh!Ct$vypGxxJ+`meudK`|mICPiH zdFurtDwWfJ7UsGZ^QPVL0>6-N1}=(e#h1spqmuHR$xOuHSzwnc>*7tz_>p}z{5GmG zi!+JEqONgu^Ez(+P@SoC%O!)-CS5XLQA8C+DJp{2oMDfvXL21X`rQyn~8o9AejG;6s+lU8tL*SC42A zlf2|ucNSJpjq6k3J&m(ZXTBEOcF2BVAWK*Ab=G zw(*umGo1N$D|HAZj?UQTMucC#^c8`C^bx4nRXT^1-_xI#XB4nurFYd9y}Ke!djVFY z^@7w#!;R7X8yUeje{0M|X_QAB4K?_WWStsO)!YBCu9oa-^vP+-Tyhmk$izHEtq$ePPV?XGb~}1N z0+00un0Ph+x(K~NgI%cNw6r#7U7LI0*c$rW?Z=Cxp4NuA)u574RMp1Jgk^8CsgoIt z^G#_mQ~Tmw5MTju)>c5||Kfq4_7q%i-6>vv;qS=sYQs2rlQZ3Q{Z_z%N&(l4$`3{k zPMyCJ9+K}uj7XT*8S3g1r^=^=Kkg~|@_@Lgx?j4UI$zyBK-WGKDKZv6Lu z;=>H*L@%$T+ydR~ZGJVmE%2H@+@9VF9^7Vm1-WlbXiV*%q$S^YZ-O34%7|EUy7Yxl zJg^){diI>ITgOxX8+)WBq^$1x6C@per6<{JqD4wI~@N=!e=|~#O*|eR^9o-+p=WDy=H&oM~`k$HF`F@2{FEyB!(7PhHcUZ-P zgfcI^cRSqi(XfqE)#l`Vs=yV~AsQpEIhudFe?5nMOt#s$Y8#KgQd#qpMDvI98E7_qvJBxl5 z)ut~}#-lYgW9N$!e0QUd;3}mHGW&?HsMe*;N_Z%4d2p){h9HooY*aFhH+N@jeKYx5 zwuG;GOU_r_ur7+^u_vjIuN(9PHc8{|4K1FLUJ2JuBjDSfXZ~3w;Xixc9x%PvG^WV? z5%wKxespS$(y0EW`rT_6Z<7#>@vp|kqa`-ntuD}H%t7q4KH~rh)o871tO>QPCZ!OU z!JkzROodwfrV6(7kSoDlK z)~TjWB0(P6|z5~vk>`I{kdN_OWc}Qv50Y3 zyT4o=xBXnbT`m>}BKF=%?7c#9p=$IhHDb>Y zv-V1iAf(n+I})^{R*cq&wn6WTaiRKqJb%M+zK+*RTPM-#6A$#(5P0j{`6_tMz5yXThj3l(Fm6I_{c8` z6Md98(`myi;V3!lL+l3L-Rm_9_2h3~yUA@o_K5j7f>pm03Pyc!DEl?pU%c!b8#t)M z0n!P2s%w0q6Z!qHBJiYH7{dfnEhc(Q3Tte%Z$xRNDmiHT(>-AoVzhL%S`Ai}wo$*O zj5b=3^HFFJ4pj+hE<<5~`H$44kXVCk-V?QQ)T7hWl5CfP=cbSY!Yyrc#|i!RV-G3} zYLgV44F!|CDXk*1(f4=;Mc;J;W6!{!{U&4@U?DbMipQA>^E`=O|ITUYW!VJVVznu; zr@P5q7YV%fun?N8eJ)j3HLK-&M9kUhZnp3TMV1qWrjiuzzo1C&Cm1&4v9F+N8`?-U zPJ4|bg-06C^mxpvWBp2vAF zlbP2`@6t}+T(lYaJB##KEwqYhCQd)inda4Fj=p-WX;d9ygJESkDEY>$IJpPSs0d|p zZWOJ%EXyaWSH(6)k8$VD<<{_pyLHXI| z-;Fn+IKff_MfrbpRd}A|H>?fhJdSyCUs<#(TzOJ*U)+75;+PiLmArJIK@b^;^~kn? z;bG4CN*{@y#NH*!ZtQUAI}Bfxyycn|GN<9SX!=LQq>0gNeA=9FH##E?_+dw|JeW@! zMR7TQ@bKjfA?1hTP56hLTl%kYhOw!-SfSQ4%JEJ!wAs5n2FZrOtnh`Qs1YF?$9iM#jEs_I5<%Uvz0>Mhqh(jt-X8PYD)53k!&9*zCG_#&th`rBy*)?u+4t zvyGnEkgVnPs@V8FcwFL|E^^~tm^5cM`nB`PTjo#7Swehhon#K>FVnE6gF1qMz>|vs z-&@U*B7-J*R4=|DF=weKAD>VX4#$n4mn6W2X#eO)2ZeZ$W8O`(Syco|VXTA6RbE3u zG7PKxE=L=jlQ&G-^mjcf`Xyc+e4`l{vTZK0CEF_QvBDX*Kvwn@hc+8t_Wk# z$q}8htE?=A=jc%SzBzU;_vcM^S}>4@^JP_6xNTP-)u9xD?k&E_z45i3q$B95N(psO!QQ?qaZrJ+8+^a(-LT66{-NC>`AJJ36^HgIVXy+MWxe;e$T0|85qH}3zOl#!D2zWo_(;GzQ4#3*lRzg_^8Y@}v@3iu zhIbM!9!kG4BZw+rQXE^;?Q21gP1d_ux5@^R>2=%Zj)S}mbo#kyZG<~J$Kn6{)(Kko zYB+`hrH(D$+@W!>%aGbmG*>eYs_>!pT)(##Fh5$f%y^K+JbpK_P&@ZLlE$1TC}S;Z z!m}^Fv5*_Qlo%Pbnhe|g5c-AnSvu}M6K=gV83Z5CXyIC8*=>{M*_H^yljaExDA?pp z$<2YbuX+0vbW4v;h4YGcbzM-#!ap(@ zJ;JCIwye$R9|7}rY_yzlRV5+RcyqA9BbhWrm&+wdHB|8!nUcZC-U2K4aeHQW_cP$3 zY9Sj%Z8pDA2AxSE>nTsQ#HTqg?TCpgWQE@K>*gLXqKft_*0*pnAS$?HLCtkgAt`^c z$Rz+>-O)^orso}_8FZ9dX;Sfc&!k{pfw4=EjpGwcyULUMhA*&A+#8aq5O46@MzCq( z?#SF0&sGAlqx`k#cXk6fTldh+`|FbN#u!EP=V(*gqRVHlR)+9xo?3X8v+4NK-Y}4w zbSrZs^_{m=K8Oyrl`ry7_S$GZd2+uZZF8zX^KZ5JKP8!_@FQ<@)QG{=xyjhKLPAqMxe5U*Wu+ z4@e~E8klz8mFoLYHq5(V9=wd~bVhz7LZl1S43!ma^d%$dR2CiwWjJ;k3X6)0VRzaU z+Vqjo&@c5M*1dEIl1|jIVc+*$2+3K~>Sn4HIrPt-I1Y(gD`J79;5G{FU|{ToGsW(( zxe+1<5sqLI?#vf?PZMZ)mVUr)DA=d?Ae`WcsC6!Uj4(G^<0&tZg`-2i2HKzlNK-(O zQ7y#Jml59Lo1yx4G7c~iZqA60Dij8~TR);GC9$a>P{r)n?ykpPaanP8%e4KdVdLe_ zVbS}YDc0hRb-s{cTO9G9>EcYg9$bLoGL~0eKY}#Lsg!a4-bbdvi^_|M3(AE17s)#8 z5uKDcd%>8~S5O>w($Q;jyk znOx)QaC^niYCB8=UBD!72S$SQ3V^}m2cQR7Y^AV1t1%{&^>h-sN)Om$e>EcWX zl8i2&t-B{Z?AAYK^W1AhWf1SBE^5f73KS$W*e)y9PxvI8f%XKxrPpU&9x$W*)y^U>QCQ&BO0o*#wCgXR-ccSC`H3*qhM3J6Iztja*3 z0l)iPS!)LtHNH-igo8v zJ5%q_oXft+e~{eqD$EH@Zx^^Ixx1EA3)NaUH~oSLwI?tV7%6sANPA$CcuQ8J&p5g~ z^DTFbS4yVPxX_u6#8P8s-2%oleZbA3A@44xz z{11f)i5`FUR-kqZCQNAs=gXkF6|+M8LDJ}cX;h&>#(gtu$Hgu3cF3NXZNhDu`_uT0 zyJ!5!7eUd7>=z<|sI=W$?|X%8z_`kO(Gs|2-q2B?RuokfzBCf+&rOnylAM3cu?@Zu+-TZAetEGbEk$D^*>Qra^omCZ4p_1v5!k{sMe-%l`iAi*hD@zj}$!JX?aC* zO7dQ2quqh9Rd%i?U#-rnE{Mt-nCl-KL2{U$cn^uxHhrJGidbs@>hmV%zxF#NYWcs% zIY*THmL{f}gAL!el5Mqd88g#^=8;s1jcHh8;!zOKhoG+KujcbYxg^k%w%rsjp7Wbn zkdjBfuTZPu)85OymTxt)gAv-#^VCimyp@wnWcuHq&c6fcXF;oAuPqSJEig-uR`F}h z_=q!9YCE@cPSbWi1@P;U(Ua$AMvscw!m=zoD`qSkCeShsm3`+Sqia=)9g3X4N(DK3 zvg`E;-(dhIn6NE;Zk_y~S8~?eBuNsEwYQ_8lTFW@a3Xs<)+_!VGXAB^{;`!S(t$>b z<^96t)fH^ZyYjzhnlssLv>P0Ju3x*9X82R#xx3UBw*YuWkyIq)?+o3Vgw*$+yLdxI zyVio|6+OzW<^~c!glBYH*G@)T_CD!i-%U^YxKMNA>9U2o38du2otdu_{cSOS-d!!~ zx@~9P6eor^>{S+;b~Ti?6%W!e#4Ik~?=xb@dJ1 zY8l|Cj%}(4qgLFW2*Vq{}OJC!6eFfTWnB0g}{?9g;x zNBkN3=Xa{8a>nW7NiKWJd!*0)V9P(R12^5L?G3K!7b1VU+Lj8Ej3W+ZvCmn4E{yq_io!nE?mWL(a7T~6P`}OK>h|zyx`#;=FmgIiASCb8PhGwX!guLzkiM*mI z_|lq!OR(bq`G$Q9G z4Zw(%?}fRKGE*d!GpkiSxbVg3<D?ySkeD{i% z*GZ8O;&s$KsySm0lQDyS(zMUvd?;=H_*?Y9E4g3+cVDmOD)U#(iF)H(dXmS`u)?}2 zz6AgF9Fd;E>xZK9MrbEQ_Vt@;Kd(-z+D=#m;Yw8P)9ZJ*g8Me~+t2AoDenx=SH3rQ zM-k5cm~?I1Xa>Bj(9c>Do3#Of30Ah9Ev-a*q$yVwZYdMC?U)b}OH`;9apZHXL7}sS zK;gEsdg|Yxxwx@|mb~1XvOR-wEuh+R?Sl*AV!XlTJaoeGJ&`!7z4BmVgBqh~&*rR3 z-nhN1K*wSX-!XC}5M0_s&Sug+em&^nc$o*Pd2Ut!yrgM&>AHUUJZ*U2EWXO*CwSDV zTCKt(jc4zlLB!Ljhb|m}KBZIhO4JLDX0FNiRmSKc98%X^jvP-ZE4j4c*=9e++dPk7 zALF(GC=&RWsOm|hIAM(Jh?u7GYOKQ5=KF|fy@6G|693p)S02dD%_*|4r_Qb3^Sq#? z>kdiITHxL)_u5f}JJnaBNG1>Y-^DVWXsk7Aq$|3R#6VW~O*8 z^vyb!@5E5)KOgKk?w+Q_A-GITB{Qy>UD%iLjz1DjB8;O7EakJY7fMoWn;%chbotJK zrxn^(8&?JOlNRRoGUs9xa|?FMwcH~xeDbM8pa(N#PkbJQk+`@%iW{pI(FG@V_sJL6 zMtt(xk$NcVkF*CB7t~Q3L&g-$icsJkIW)<}U+YHd*H{P-D$kS27Q*v9>5g{V%sHE` zGEV-tXhb!S8h|Ce`~jU`_Pxo*D!s_znzsU>e?bB?|~o%Bb08lAGN@7-Q^%goMgQ z&sL);V!KigH9Dx?*rt(oh4ie{HJb;M7sbCfB#ya@9O$QjHa+A%7j~!Pc*Ms)TggszOE6Z~6H7U%L8wB2!b>LimpMXumMQfaCS|FiT3bqSLbW-y(@I9m4p zsbE|%+zu_fPFlXh-!q|*OOo2Vlg%kzYwHGFqpvdk2v;H@;S41wO4&}ky-Q4yr{($X zjf3$&HKVDniPy}PCvqtAl+J?4yPoQIxsP!!7Mv8J!UD9INQav9I(>fdw=>Q^v55GY zU$uu?-d8v^Enn{tLz`0YlkmIcj{pIT7H<>M@bxcinI^|_F?_~vqiV-KhRU#gFxR4ObL@ z9Ol<@=@k3$;K{~XE5FuCO>Qh_*VF5VE{P-HV#lGlh)~9fNeKQu*y2u;Bi_R zL&EBXu&E3m?`pRAAe)R_n0H9&(i0`S`4x}IxWe&m#UroxMK4>x zFtX#~vS>e1#h@oW(Q3Ahw$E$jBhyx(z&wmdRob0j3NMXnLcflD7)C^74P7kmrTrTt`~wH zV}N@bm>tc?hwGtw#!q0-_~S2wg}ch?1!B8X4_49@axM!0x>8_tx6SVAo9vQs2#!!;!k@}9BwlnAw)@G$+28PmfcU# zA-*?5H_ne=tG^d0RX)a;SQU(i2S~77^e`wVM7s#t9Y5o`j$(PO5b)DKZ(Jc;l|k0_ z^4a-1u_skO3l^*ELQKPOE?0~|02?C*x7|);th=E z-+#PIn`PGR!swWNYII-ur!@R+!Z->CU&@(wa+4BcAni@QbzOO1apunW);o}Pu>*gt zcbNxFUZF$(Tn!9N3chFiE=g0AiMPu`cW`UdZ7AVA4r^ub|+2%Zg>-0C`4A#Py8_8to%bgFa-OyJqifXPj z7GJIrKV4X%*P_Y-Ch8_p5X&hx;J3>o+)*|gF%7)N-Xm7N znrk(!5;^jpsR&KnsO+_8UqZ{c@@RAziHS5nFvy0C#KK9+hIczLyt2QLim|JE27Ht(P27 z$lN0~*}Xu@RCWtRGb|VKHn_AmWxuZ1RZCvw=rDmShw}T>R1VMHv`HHnY?g4}9e4C7 zc|?Zyd$Wb8U>^6`gPhB2>ROIM$55))!e@rSupfS4oc}&;%86%!^2hk|h3wod{cPCbap~0-RED6K>pvpgVtPdHbw}1fbq(yi z1J8gs5kQ47T|1(Pqo0eNY+n}h0>d~pq@){w#}O#Ma9{!L!taEE-&cZt{~1czM=iB3 zMDr}+LI7lfLa2k8RYQw+o90WAV@J&Pjkf8OXuhf)6G+oFRglpr>e`8x_Ngok%osXf zcDPU}TXmr%o<$0~gv;9TO6XJk7TyDMC>-a|b`hs>g&rkLdL7NVeW6@YTa7skUPCtPA48aoOR1R3YA1?XfKcP-6IxsZb`Oh%& z`0vtR)p}3m=9w3sjzGK(@5%fgi>T@ah&?)(e%C*f>5#FxXC%`@_Iz*S2R;Ac<>cJW zF~!D(8Qz(X4m^K>`t?(7_xH@!M=T;|8Y+8N1%mg|&l720<&VL*p)76UzO}sAR zx#-b8fSqQ7ksi{Muhu)Yo-GD_@HaSrpp9u@y+{~S%z9)uAF>4Ed^v;ef$QoIr9RE_ z_XTtqtCPD>BcTUDQajb0oQl=;HaMQc5+6gef{#tnU&qyurPBT4Yxb>*e$cmFA|bM0 zCU1o-tFTVX))F#dQkabxlL7@tML_Ls)AVc$;+Sb`qpzP!FBDm}LM{!Z%!W_J;ZQ$) z&kT|pzDF0h?oIwJYi4MIN2+sKM%ew;C)^VAn9%Lqsy#|1qd6|f2Px~tK0@-Of@2yt zqt>?9w3YFjd40EUrbeP_rJSlaz$n>w)1bS?-4IV$UpWo~D~}0|Ep`${X4C;WCIahh z=F~HlyF=IfPdUs`&36snA_+MQx^B$Y&CzVZ>?N9-a98FQOwkv_)xw)r-zez}E> zxdk}+J+n8$*|)c?YD)+c{fn6Ms^C!F}93Pj!L;yj15gBnuV&PX~OiKLTwm`Hn(+*q`0gJGp2TTPl+AY3SHtb;&Dup zg4BL$T;!Nr)6|c^GxoA6uG4@yGZ^ncItxX!KnxhJHp~8r!d7f3N%fcL-y5SFC{Yp} zXbX-EZ7c>2ym%2(Q|beHMckNN*ny!F1XuKHts-J+b&h%J-QM}{Gll-6wc8tEC!lFv>FHlqz4(Mo^kq+$!A1VepKdbGdo@w9A#-P zqj&(}n>_j-mVi;q@pIjjQ#q`=!Bq(W=W#cCuFz>0*&b0z@{3r-h`K8E1Hh|4RFfPfz<96PS5B`(~1-{hCV;b`-y7_Ynbe(Q9JdR*+nI4Z`ZAC zryD{BZ*a2~;mhIVt6z`!%s8U`o7T(&gn5K)7lC@wJM4BcYtTTYW%+nQ6`7IhC+y4$ zo8Wh)MWcNbm@6Dknv)hbugWH5oyC2Ih<*wU;Q5wx$zMCJmLb=LIEC~&Jdc)0eQ<(dFP$pc+>7p+fzw%Dlt6f76G;&Wh7Vf zh58rO!)Y;mdFJLpA0a+cj$i66x2Ot_m3y9ZKp^ZgJ(y$HSb zjSWr%4Zhac!RpTjJ9b!8UAUUskG`xqz*8uEks%c(qD?VWVJo1)>jZ|#k_3HO8Iu8I z))UMHo#u;q(`3hvG9V(8G$?2W2fzd_MjE(rtGPAkruJ9QU`64|J?J1+I)NT>FCz;1 za1|G0czJjC)&^@+jAEVLf^LHmhF+(EV_ba6#q7~+zi4Oe6Xg^=Ni+cmgY5`Lm&_4%6Tlm3+~$`upy7EgFB9AmJJ0i%CQYk3e-}kij&~=24-`#N zt(FE#pL8VRHL4>rt>;m8EwI~W=gUTRPfSRYnV0-G_P3@%KfcqA&m3H^+F}^vkqgML zoAs&rwQq01`Rx|CC8pcg|9eZf^lt=(a&Nk^o2!YXw#)bU^z_SwnDv1F?C7LGGk(&A zI1zqM!!Slc6&mH0PSeAKlsI+*wgnk=64{_$$Bejc)~IxkJ_dI3(5i?ZcFsX75pzTgrut?$O^-ITykzddaEd&^lvSl zY`~9w=bD~g-h+30oB);^))gF;VkxRdZBOv@YSrqs#!50+U=&U(h!D$X*#_zu%vHJD zTdlu%F=$$cXkQ56=!BG{-O{zWZ4b+7%zsH)XwbuB2bggl|% zJA-@Y^ox$a|BlrKT3FTx`umCHZGF^r`Hf)`A6`q!9RI<_;^2HdGq33TH%I^F^j4otC>Cpbx*8qTG1s3G zRO2A29h7fGMJ?0ZqnDUEtuM6Z))h1hx;S5o)?Xi@v==gUj*0$#id)@md|0R-rhU~) z+>7PvzjfOQ5)RFQse2s^6Gn@NiD-8>O<998WgNQ&vWd(F`&qvWmCa-2<07yqd7*R2jP#4$hULPQ8V#EnLhAn#yFQ?Fndk zD4dJrd81dh_%NUMvcbkV4L8Z()6;!IkPn>3C7$W&^At+MWgl@Vnp!Q$@mdX+o}yz# zVlZ6?87)0h9AO($nT=auU9qFYem1t*{vFqSIhupsD(yHh5`TH&S4ZK?-z9ZDR90tzp$lE1X&b51r3>pBp+p?xnde`b_h?JQ!Lm-F zyy}c*O)Od7HcZBAAxYmp*@r>Ae~SBfUxoYKuk!zkB+5)g>G(xV6dc;|4|YEt@-&G4 z;e9$V(zzZ82A}yEm8UA%4*I^>$5nQ-Y$PrBN%Mf%zY0$Hsa?Ol(sk!UC(d+Lc{a|Z zSznVUk4NjSBAtVDv0rM);!Q^`ei-|+&+G}yt}#y;+r9|ZIsV!1zp!CQV>m*?LS_dQG$7!IWV~QB4sa z{#Ey|Bj;erYxCFgjps~cEPlOhj>2HuTV2iNt(-=2C+S$KCBC0ilrk_8T0bl(dm_zT zu8#^QP@{`#?p`o`@C!dIm2%7H4cx21>pDYZpx~9*^P5Dw6!MBTtJxELfeP5MY3V-p zimKzGJSQy?g6Ih5mhXiojNJLw)IM7~zw=gj1Rz_xct-e#T;~J!<~wy=IESs4tuJ3q z&!#|gVhRU$)L~p-YOf!hum07$eEgDB)~xes%}H3LJX7ZJV`NTT@dQN4o%mZ!Oj_w+ z3`G23Ua1}FW`mju=`YC{7#r}Hy` z*>*vpqw~lk>{D1LlCb04ysoj_eSRSQ{5c0`KSVtbA|Gg}8(E=_d(o4woij5BYf_vw z5Bm=5CxTgt`)%%j=+BVEV{=9zoZj=RhQ4K)b**E#n`G&4WieNI33FO zqnT9+W997b!e)aEdS0jj12M74BJvZJpR|BPz9e%f@_Yd_tckw&|GWS(&6so7xE+mK} zg@)!_c-i27)-ehm1Goq)_qF$)>ot?@kfe@nqt2w*(`wZatNJE^C>f(CX9pZMtv9>3 zgXWeNG{m7xuH=Q~dXV?#rj)az*itvuX!T0$r^KddyWr^<4>_Tq*XybeN9LLn-P_AI+txo-T|$7b<6C%@uD0pTl+{?#F4%#3v9ien>o z?QtPI#&%8PbRw8lXyE>d5T9dKKdO3|@O#!`SozGt+~i;Y#$dHMp!P|GKuzM@fz^6T zwbKNsB&&;f|k?vd^T zi_=rebDoL)ayh}tA|=twNH|^1^C0GG?8&^s$fKzAgtM)UO8t+;-!;S_whMZ90@A(p zF8?8sw!I&5OS0kocjbP(O1kO=mCnpBRohnnjCA^eX)d8=@xv~^3)g#+7f!kloW})! zxv5v1QA2XH^zSIBY+U_k2kPQESg_}KgUU_Qe%KIyC`qQ{l$hjTf5e6=o%~uNxrlGK z%Ynxq$TZo_Kknyh$-413TkI8?_wRzw5jR-3W^d)8k-Twaz!}~f;4YK=vv)eCatxmB zE(F%25tV&O!%o9PP=> zmwytu+4x3iDhWz`d!8U2lpu8si;ku|HOx`i$d|EuBp9En9C14@*DX-AK}q(`g@`@n z&lpN6<@zWx=s{=uSEuvWF^PHuYn@?8jkMLKm}YNZ!HrZ=<0f@>+0~_F_#CY4ISUDBpmzf+X zm|{cy&w`aKY1@O%q&~R3A(+cSWv(gzjuSJ-c5LV#mHIx-|D{8({OP{Ju!ZzeJz_vS zJp(6&Oj$mBGyePA-*q#Na&d*)7-XQvH6ro(NQcfBo`d9-n#=8Gn(T#)1TK&-_~#;j zA!H=+$SIL5HB?+k8N)RBVIvf!^tO#K)*1Pkofkdh=@JDDtXjL%mzB*)hKH;PCxZD` z>`?a={k@8tpz?8y`u=}GnlEH_t1PCu5{m)a%u=9>zlC-MmSRp*1<%NFeJ`6WiQ!O_ zrSxiF>JiFO+MaYSQQD&IUd4vb%z5 zp8gpX&scOft*pS&SoIX6r-ORz6<0T*(O|M(ZPm7$1CZ}$S>6L{v&CG%#7u=ogPHZNDlC#1(O}c<}unZxpDo`ol>(Y)DTPDl>|;Iu^;S}uAu@At3Z@uNUF zzXpf3i$cL>n;Ud1%JgMj9i_U30))y4Eo1Ap;E)V_yoi{8)wD-<>fuTHb4q6=Azx~O@tT-mHOc;Coi=yk#m<{*c@y>+7VJ)Y` zH7^7!95U5VfHwXRuP<1Xn9E0H((a)#Ei4w54M^6L;5p`W9uK zqNM^kZ5crYth^~JZNrEiSA;6vPvMtB#FB%Q!-YL4^38vUk$o|~1T%9bg}8!cRk>2L zoS466Ntm73m#QVE8IoO#8f153=oRXWqqfWxY|L8-HK5%xhn)4Q?N?k zxw1VJ`PeV|Cm!j2_`~GCGX$(36gFf|0PzuA(r2KNL}i6`Uu2;1gs0-13(pML+6TK) zwr*1HC_6K`6ie~jk5rZYA+q`u2$2y?L_Mw#e zq7R&11cgPc8v2wG*LMlFaK@B6qev;qhf&K!^@_cT^i)P*Bcpw)nd5FU+jpmv9%8xI zt3%SK#JrbOCs{*4!M1qYee?T5LdIJS4gDl9h-rzeSs3=YSu&$ByF&~kk@mL4-wllD z9w^E{L=8=DN6Zdll|oVukBodk=#;wP`GP7z5w57)KE3xL$pAKwBsUio`)hZu)$!bix3c zX;lvIlHMpR$(iurF!dnEMqkK){O0*mA=bRlQ<3##GlFUQD7A}_p(Hmwm>F{Kco5HVfAh%U{qlr&b z+l%f3?c(RpQsgNl9#^RS$1KA)ST^Y~`g{^Rd^0Xg;JxA_-_IXy3^Wz!l*PL)WPW(0 zQFQ!2X0)yHuihSL|LPiSy=9v5=k4*dZaoNn31K*G@W*F)F%J6C(u9_-~V9fFEastnp?gn%4 zhqJsPv>s1AoZloX9|0ZY3(5lTT%DFQ8~o}aeTH>;o;F2a>H;tB;fnSyBrWi$z*QE& z#lyZ8t~H6+hxEQacE_&WH_9%3=u7fzJ^DW2<*iitod@_IrqyW9Tc;&ErFVNb?IPMc zYlqh!BU2A2nH9g%z`W?r3#nRn(K>I<6_167R%Arcu2@3!tVC>)0adCst%HplOX_l7 zv*O1fdtl7q?1i0dkf;)BGJ5?e)P^Q|cUp$|o0(&@JNwrwyYbok^8Z|{it|ooMeY>vOU8pob5q7J-xyzw-kSp-hqX9Lv$zgaeF?Hc zItT7uOxQCTBbEy$Z(dsJ-+AnSe8X;uH3zT$wq16+ye{^+9TX@|;dO$oI5;8zUYrr;RhH zyUX{}j1tZ3)i2Xu4d&U~_z$n!a4Yi%IyXXXe4pLyuS}NkWca0sFc@`U~}C12JHUv#w9)T${U?PpuA|T&`j;%kF7L~^@0P%XS_Y{`K<~K zyTCt}ohwv0<4jR@fY-kJN}NJ5nuU-z(DmsLd<5cU3$V3&xF$MD*qg^kE1nHd*QrQR zXggd>6CKE8Dl(#})Udgpt45*byU&Ow@!ih6PzR_fRVqifUfzxzJ^qFQG0e(ik4l{m zfEOZJ0&IT{lv@PDy1&~z2J2%4YHuL=jrUtzBhN#GKM=is=id{Y9{nr}mHzL(^fA>LYHKXBZ=E?<$qm21m6iSzK7L!$D?c)N`Fled-NTwEF5BJa^oGEMc+sGwp4Z)r4+rK*f*ALvVw5P33Fj}Cp2+l{irG!_ z7p7az*2ERkO+MGP^*)qH{B>qC9!HjVg?5+yHU+>yV>Aoaad*k-LVX&^k#ed&RhrkRZ3^6qD6qF+tioS&BOfi$+(p{X3UUgTXs zMw8vAy_qz9jU+hYeCbZhaGuB9*#HL};xtYEJXqE{1n0?D18-{OXd%H-H&K*h&WhkC zebczh>(sZ3tlR3d+tFLwQE=ZrWFQLr_ z?n2dS+d+o;{O!k_BpXv*_U6#s1s+IamddgLS8w7oNEz$}_=ycUog$Rcru>c4 z>N*&iB_((&DL6j7sA=vs>(S%c@&okwxlQWko{t5lwv2qGp7ZA?GtuodZhfivn3qid zJ7Ei|VJ#~1Wuw!!FD%W^#fX($04(UgUVpD#lb0RiryM)dx1V3@AM*DQ)iL0C zZBY#UWeGvy$YQ--xg$TsXAY_qNlLTl2jplaL#R>D3zoJYi#?0&ntX@KR7hovTpv5D z)?~WYM|HVQ6)) z;hBl~;g|JmpWe4#psYvD;FEb}?dfm@x0}9x#4?IX`Q%Ly1B3%~uKUoBJ7!Cl*6*z{ zER7_lDgy)S+Pm7L1D0LGCk%MhN(Ll%qW`iwCXAu(%)he-t4tw%Ri;O`;ngblBbi=JjwqTgTc6AK|Ig*E z(+zrpzY1A0X9i-j%ImW&GhPz8*6y@sj{q#G7#~~JoK0}1-Bm&u7U@U>DEd#`bBFgn z`G|80KQ1JOv8$~%%tTdso^NO^WKFigK_eV7dQ;zamMf3Sm!hy3lBWkUn&*w1lq zAUW>b8GoYGmU;U4Laqf|gj=#J$_N8GIFpb9Bh>HX--AVQm`oehlkv%wJpCgN2lCDy zI4>T=QgFKc-yi?W4hN#l50ryDrxFEYeqyo@3SLE=wgm?CP%q>`ZN`n&UEK4%hRh-8Jz#GdC9d$SB*HaW=R%IH0b-y$ z@xx7QbQ*2s*gGD&bAr$2T#{s&eS@>z$vge$8q;!+BP053X$8cR{zg*Bu~RYrdkt0m zs&}nz#jx(!ytG#O^1vY*duD2HZk(9}&^1mtsU2rBu8ww8Yo2uf?j>AV685wEpcH{9 zBPHa?G;|C+6f1N&=&_9WaIzsPX4}yz(=k!`zN@+?N89h=^Z2qHgMZ%^{vP2i(~EC(Cr z=OE$9?t9>WosZcE)3ZsI5lJf7ty>d6=vheCC(rn=s(^B+`u!}@B4DJTs*w5h)wSv zraVaWX?vpa=d$XgwWA5AtW~8yXi{LbkM>R-T|=3?s68vb_`#5#h_#x#p}nm9X@a*5 z5d<;|3E?LdFMrMYMAaU%i89W)1~u*W%^x~6K<2o7Dx91c^!Zbm0{YX1Nr>|y|d7&P>L ziF3G3i5Waz>Cwlhj0>x!B#?|`R-Ey*6<)9j^6#4kQMT{51FkzC4=~}Nm%NuOmZAh{ zgY71&aQ!*GV+7;d;y&*D#Kg5X=j{^*#V5J#gm9s2ghNkF3&x^+N9~LFh^V}Cg4K(w zpFI)?3(5wsG}+eo<7xn@1`%oxGXyJjtR z5fP=OHbtfJz1%s)laCdjOHBE$)9TX^=C)HD+{bCww3bAJAsvOMuI~v3m`Zy&j5lE_ zkg1Iul*GGXQGq@vG=k?&6dDGlRb#43NGg%75RI!?#3A;!gVgp&yrRfcx(#E=RCvg1$ps=g+@khZ9%LTv+c1*u)GXr)s8dBx1Pf5UD^+3= z&K;OT&o4zJXbK9EX7?kR3(3o^EY!YaZzL9^o6F4Gmegx5N1Fo`2&i$#4Gw9TRI<$G37N4s=3xyWkSU}RX;hC`r*{R+s3D7NS#vqR zZLN#2WkE(Q!oa zd6w9swGm{4b6nlOVR!YHS3?KOb?oKEaUrmhHp}hC_Tb402x_yhyt=Q=O#5k=)S4x* zNdi7=*V4+0l!`Q@Cg1CZwG(b1q%e4r5E#E%e4*MeL91?oE80wo>_z z=7=Gax2bJnYjlCcChgnGYRiXEkGZ>>$TPPU7hwTWHMI2o$(GjxqA#TuYqv z^Su485v$)kJ1NzKAO{FXi4mY-$5;fzo-e!wv_bbF1TcT9>B0rerxBjnITwW&yeIc!Fu z1@m-{Wr88b6^aX7$_fry6vbvLrheNuNq5`k%>8z%d4ysju6A_DaXx6iB%|Jz*MD8V z5K|p{w4)S_((ADD*4bqmV~ZgiVRv8C%+Wt;vkx}m zUhJn-ksL*u$5QqmnlZQ64pI+4UF6jBN7>@TjAO>D7a@20%u7vcSk>}X{(;Bc_VZT# z&tMuNs9@LYAX>c3xaEB)?Z7RrrScK1hPRCEmRhNic>ZYlJI#HEy#qdd`wL_87L=!x zt)X+z*%e0W+ptt$j$-$%t^F&h zLvvA9#&TM0{b8ucOl2RM^E%JmqA^#k6`i~=q90`iQy7SGn4de}aKz@>IM2tItUAD5 zWz4+H>S6tDf${Pn)$Nt@h$wbL=D%ceeYbT-w(Q)8 zlBT_V%hva`!#079b&gs2#&+4#ffkYa(c!f*W8%mp(ng4dmWG~!115aRIM}ixD1C-p zgc$x4s@I<-X(R@lJoLA){5|W-r4))&nK7cs z&SW~7usYgog4=hSm$D-9s-Jq}9Zzh0kqbX7eh7*3MVcTh+=K337mG|*m(ZneWcep2 zdGmC%r^!d3V=LDboV%_#`&|1?&Sqt{huM5)r{=KKs?S-7zn{=4WClf$EYb9c(mey- z4fydzp;1p6#ucer<8q5rTCGBgexXt%J|HHWhtK3tRFc*vF9#J;vdM*FPfy;4$*)p< z(&+rxcW$N6q_MUlpCyPJkU{q#nRiv9dVaXqSmGbE>xrsz%RK64UFS$!jSBKZVq$$U`LbA#OI%)8VGN*wT-_?qEwuigeAI8R@3)*)l2! z>vl@(mkjzGL-)|f?T1uWtssw?`T4gHZ?<8_>383!UEVgD#P)L+E8F$?t~`wO%32fj z7Y_+fm=%IPB>O2mRGN!=*{7bnnXuNcn=ZW}^HEw-W$sqEi?1>Z%>8_K^y0{{Ymk-2 zXVPq_tiJu4()kQagC~2LtXEc42hdp6_VWpHu#jZ0h{ChmHRjU8uh$b*#5jlM#L(G9 zE)hPE<4fF!-QjVP8*;7@`X4oYk>B65DPPlA*Ia!*oU-&F!eOP2A%)(QW1( zWT)5U=HA=CiEeV|({%insjXUHr4#6P(-C;aWVHHC^ti}_**lgI+mwmqyC>!~ujDWD z%R1VgV7S$4cd@y}c)Rqvx?N7?TFN4s`!G1oVUaPeAsbFI$sq`jzHBd;%fF$=5lT~u z2MEfxh_j?*`F!V@9>N(L5GLGX4i`3_ksRclR#8 zB>iWM^DcIVakDM1Az6QZ$vL-e9Hjg5auJ$^cv@CbWsok`5I2g_F!&_7z3$}8EgPOo zPccx@2qPIDCZ;n|sy|vYs8wN|sdDEquY9KIVjN@h4oOWtn0*ZICo%{_B%f|(%5{^_ z&cN?GWJj|s?6R;_%#|B@+dyjid|d(ZMDFity97`{5Rc6OT`Qhx`3!X#D8|O-%Pj-l zJ%2nOu6}sxR|F4Jb)qptr7G`pyDreEAw{$q4EovvpUo9%>K0%yuZsFMOsFlta$0iq zv+to_JrzX|Zf51G(Gp1%Yh-n(d9NnE*n7}w;*%l!BR_6Zn5xnteB2g%pT4Jomx!XBWoFtHQIH9eY$HqQ1Ko$*<%b%j zYEM4T*DRutUnQFxfgB^+em+v#HAu0wRrL-dCj{$=r2F;66U(_n6I1r}!YI9QJ<`bg z=I-{~vv>9t$MCOGwRPr6m#v~BcLP$%CPU=AgCtbfj~i<>WfPFj&L+RFx%AS|RV&Gj zqwagNe6^a&sh9Q#a{S_Jyr_*@R(yuIDr+_e8u`UT=|nC{FSm4Y{E=>B8m>NTiGQ|V zKtd-~7Mp7v3>mKLXd11R%e=z%_UnLS6?Y?hIDyeb4p;xVI$c@T@m zw1yFI&8qju$ktf1FuR!|(QNG2)o+$owV5g>Hhr+w6~`Ir9D1o0MuV^9*YXQm_DoB; z8*E<4pPX!KslJA_s@21}giE&XkEE;+Q0p4vX8yC2r`hB@F&&qO%*o?7I^p<2*D-xg zZ7U8i!G~UMT{jB1@M|z8vWm2k=aio{+#ExiK0Dn$;WQC zOH}injFo~bN}_y`1wK!*b0HIrZNrJrrhPPPk(ETUR(2kA$plTGHmG&mn~W_}B-?_= zx@EToFR+QhlB~bimQQ-~7)6OSh(Dnmkwfy_D|c)ytonXU6h+2*MD*`B?yjBEP9#ju zT$4R3iQJz*CA7X)Ahlnab6ovw&A#@B=k2GK^XoN1h{q)<#Oqm*a#m#GW<9#|&ZPN= z%wNb<=jd+!IAi38=4)Cy)+-WnRotxX&$BV{>yPG&qD6`<+onzCm`(kEA+ePWL~vzc z#5`w-&SpSak4JLF6n<%~+XCIlW0{@U9m+49%v16cG_gVKUNyh6W>|&KyOm>`9f5JB zeYE0?)W$GjjIoQ%J;+}v^RH8>a)m=0#Z9}9xL?%PcHXna&zoN|HHe~%qr|Rz_133* zjiQ|iGwv*t?_nPoCBX3)7C5CR}$Z;v?B`L@-iJK1ssxfV{f zY2SQ8@9U(pnnoC7Sray7VF*ZP32h`1#F8T!ES5z2USful?z_vhMrA_MMdi}`*rQGg zx%Y7P^*i>|_FeIqjF}~7O2IINP!&SAb1yO28r<8zyS>=FR zNn&cHTUQ|!ncrsYq+rRmche0g(pwmcttlfZpEq?2?7PDEZN0J#;#E)uUnW-eX^=RY z*Ej1n>SUV<*>{cLEwiaIBMZfrElYui%%PA6ma{du@cR-}?nNKAnQzoy+qUF>ZtzQH!xOdLW7S1onG#0dK*fFAG_B1^=v z4Ru}3vhGGkW!@64GB`O*M2(l0|9-hab-dbQtnK$ z9`2U$-d(ta~#WWAF^PI4&*IE<|(Ne+pB9+uV-0$*H32V zkePX=2GvnT1~^=~N-}0=95Ia8wne=##~E;BMiN{mvALz~#m3yG^70sw!qP!wTBO40 z!*PQ=y_my@(_oS@dHJG7a{~uiU14tKA6vw`MU~`|5=3njU#VftRpi$ga}~ghR}?<+ zG-kXbH@3-rJAJ&-a8sFUByvk(Z9(?SSk%cTwB46^xlz@7voo0B_F{E9U0A$?fJ_G*&57)??q--c26%^XMGqGtcLnDORFXXctnvh1~7Otj2LJ^|U!6>lq~qqTH4H z_j5VeL932kPeO7LuC!Z5qny1m87Q7@D9%cTyXqy`8p=cdY}mNCR4 z%pe6h)hzg1!YYX4)~xiaOC1^}Pk!cK?sAXKM_P_GDX*FOYhr$r_h5a<4(0Jv^HKJ; zb$bq^GSaloMEpm@T|sUdKvDGh2Kg(hCUle>8IX!XF26OdD_bGiC~2<9KXU&jdmEPy zdh^@*n%#o5ieA&MCX9u)AFkxl&nZvTD!$vsT2k67yw73B-)-?DLaEUsD?6AN)-`GM z?V2>5WqX%rPeR%{n*RH@Txw2Ve&4yErzAfi%Z|$T?){GmOGe2{8>G8dG*6)om6gxa zP~tGN?`^LSv*vBhow~%2q|$e1T`DJA`);YG)8^AHihY*l#)_YqRhfGA$L4ReUOj{h z26j0}5<#~7S~fLg9nc2SAeeA*G1sD6Y#CEvPO{n7Cvx~Tj>s0r_djIax09DgnztT1 z$$PgctW9d;=JS%ZIPWvqQHp&CjGkMRq*u&CbgMrqw-I->&<)UVhSAe3+yX?nfZs(FC-RvRdM+%?)K5WMeVoV$=Ym|vKyBt&iylh;+LZb%;_c##+F_1o=&BSWN@kTbCF zB|dx1B5^Xw1#7Z91BrVG)#MA~wjZ5W8KsO@DCp>s@{J*{ae% zZdr$5Em~W*woTIN?&cE|vTV9OO>EezeLC2AJ)YMs-QT=(n2T9%J~9}|)Zbe>%3zf( z=3JrqC1!WU!w3W#bm7Fkt$O9LyQ)e!c)7`yq)|or;D3XtyP+|fTy1@A9 zht-Zxa#5jEARnJ(4oGzg`}NV*x6_0AV*bj!kCWx(bL8Sc`55=3BcOK!YsmQ@lP|!E zX}`XBMCFGFM^C=mENjA8;u}lkul64O!w}<*b=;g{59Es)kK|qK`(VnGLy%jPUJ}s5Xito;`k}zWh0^S-OPRK2>r}t zUI)=a87%?v42YuNS}`$Rv8B}6WusKJj2li|ddfdjGUHuJzn{IJr;V z-^{V~tWaM5r2PBKZHll_REvq+qKWm)r=PcUU1#PPj?k^!{gC_LH1ZY?&0s*deVYCIdy)>dPl={Kkh8D8HIoiAtcVE+@Zjo=tB#QtI!wKT0v<@6tJ(zBVx} zJtgd>$T1*&$bL?`ncU1e{(f=ut*RQ-UN96zNhg?c5E?^|FLUBJwESQ#a7xLef$%IG_tYc5wtgKi_HkBW3#tHSEDu>MGJPBUs0i8>sc-a`pRf$0s7kk4v!j{>^bU zFS=)69vj>)tqmAOjCPlqsCMEzXdjTmFo+`a3?hB8>y_~q+=2sm= z{gzndB8q$0bViTgK#4Eww^O$&HQ%P2D38(z)^-K{xo3okzcDS1FJ~?$ewLr4$3B-K z9FSq|cWd##hU?@XyU(|tgdZj{*K9cBS=Meqy0%wdxRW6EQ5&CKT#?p|bjb>}!AL{g zht!p#Kzf5d#H^2WP~UfkRcXd5b?w$%81tA}Ahq?ut%bEzog^^~EDk}cdf}WgFA(eG z;=1{%yCWN)rAaE>c#F9M?IC5syuT4!Yu0|Hq$^9`CES1{9*!S~_<=bfO=A-BA0}wQ z<@?$ei?e0>(zQD#v`5Lt)u{>8^Fw}?W)rOu*#$L=-7GDLUG=w+INo~uvL=bV@9Ye9 z732@eUC3*f+pmz1TI_b{X*!Yoo*35BWO$N(!p6W>v`J{76`!Y8^nSEdAe0Zj*8c2% z`p_6}?GFk?0YpnXbs9L4;=ebadd%O_#q5SMy2SSWx`?LjxBHWhp>NKik3A^keRjxx z<<}bdt@-8G>yqGh^65`Mkm7S@t2R~1iEbtDE~l=|9cbf6SeGIA<=-dVC(?;!WBMQ= z2>K?=mmk)~5H+t8XUO~OOws4!f=kGx*ye%zR~SpYbXOpbkdRLhClb!L>ou$HK#>UV zls=ZciD%5Ht0T5$Ecqs&qK_Nub1%MD*MPI;elpXIw!KSQq4q@T@6mVJg1WP030+zXGDOu0wnWs$^sWP&ad zM;f#ka`anivVNG^nRmGkWb=xiPbFiX7;s)?-!@!o<(FP@vL>?XNIPi$d&8CYA*haA zzOwFPv6x)SXEO5BXMVcqwyvR)1c83c##fkxdwKS2h`EezX}1d{CxKg%H zvo)N{nZn|gVRv+?i0P?|n|qR)(ppp=5({$+Eje;CFD~2mX=~l542n~+-w5^m;*y6wQpZe$j=*^XgZXhzZxnsim9kD(>Q5&@{g!%=#v7}QO6 zvvzlDb9Xr6YZ2YGrNZ*tE;7Z$x90+iD;ch^c*}(Jt#@Ygxn;RI6NcTU^Om|U!faTO zOqX`zVlR?fNWjOaS>DUk?{HS)hNAtRxx&%+4C`g4swwLQTnN&Hp4b3ilC!y zxmRrWU}3XWE}BQ=`QJqqc#Arft}{8ix{9!IEUER+G%6M(5MHv9tzhozC;QR}oqx_GE;CmHC+YzT1~`IVECF$h?M2m&Ds9yJV@dBJ(G*2Z4-6Z2NwwD`{qDGrG?ZOiyD_{U0#oUoj6eJS6C&R$o4OaR9 literal 0 HcmV?d00001 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala index 2b4abed645910..4991e397eb11c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala @@ -150,7 +150,7 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession (1 to 1000).toDF("col").createOrReplaceTempView(table) checkAnswer( spark.sql(s"SELECT percentile_approx(col, array(0.25 + 0.25D), 200 + 800) FROM $table"), - Row(Seq(499)) + Row(Seq(500)) ) } } @@ -296,4 +296,23 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession buffer.quantileSummaries assert(buffer.isCompressed) } + + test("SPARK-32908: maximum target error in percentile_approx") { + withTempView(table) { + spark.read + .schema("col int") + .csv(testFile("test-data/percentile_approx-input.csv.bz2")) + .repartition(1) + .createOrReplaceTempView(table) + checkAnswer( + spark.sql( + s"""SELECT + | percentile_approx(col, 0.77, 1000), + | percentile_approx(col, 0.77, 10000), + | percentile_approx(col, 0.77, 100000), + | percentile_approx(col, 0.77, 1000000) + |FROM $table""".stripMargin), + Row(18, 17, 17, 17)) + } + } } From b49aaa33e13814a448be51a7e65a29cb515b8248 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 17 Sep 2020 22:07:47 -0700 Subject: [PATCH 0062/1009] [SPARK-32906][SQL] Struct field names should not change after normalizing floats ### What changes were proposed in this pull request? This PR intends to fix a minor bug when normalizing floats for struct types; ``` scala> import org.apache.spark.sql.execution.aggregate.HashAggregateExec scala> val df = Seq(Tuple1(Tuple1(-0.0d)), Tuple1(Tuple1(0.0d))).toDF("k") scala> val agg = df.distinct() scala> agg.explain() == Physical Plan == *(2) HashAggregate(keys=[k#40], functions=[]) +- Exchange hashpartitioning(k#40, 200), true, [id=#62] +- *(1) HashAggregate(keys=[knownfloatingpointnormalized(if (isnull(k#40)) null else named_struct(col1, knownfloatingpointnormalized(normalizenanandzero(k#40._1)))) AS k#40], functions=[]) +- *(1) LocalTableScan [k#40] scala> val aggOutput = agg.queryExecution.sparkPlan.collect { case a: HashAggregateExec => a.output.head } scala> aggOutput.foreach { attr => println(attr.prettyJson) } ### Final Aggregate ### [ { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "k", "dataType" : { "type" : "struct", "fields" : [ { "name" : "_1", ^^^ "type" : "double", "nullable" : false, "metadata" : { } } ] }, "nullable" : true, "metadata" : { }, "exprId" : { "product-class" : "org.apache.spark.sql.catalyst.expressions.ExprId", "id" : 40, "jvmId" : "a824e83f-933e-4b85-a1ff-577b5a0e2366" }, "qualifier" : [ ] } ] ### Partial Aggregate ### [ { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "k", "dataType" : { "type" : "struct", "fields" : [ { "name" : "col1", ^^^^ "type" : "double", "nullable" : true, "metadata" : { } } ] }, "nullable" : true, "metadata" : { }, "exprId" : { "product-class" : "org.apache.spark.sql.catalyst.expressions.ExprId", "id" : 40, "jvmId" : "a824e83f-933e-4b85-a1ff-577b5a0e2366" }, "qualifier" : [ ] } ] ``` ### Why are the changes needed? bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #29780 from maropu/FixBugInNormalizedFloatingNumbers. Authored-by: Takeshi Yamamuro Signed-off-by: Liang-Chi Hsieh --- .../sql/catalyst/optimizer/NormalizeFloatingNumbers.scala | 6 +++--- .../org/apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala index 10f846cf910f9..bfc36ec477a73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala @@ -129,10 +129,10 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] { Coalesce(children.map(normalize)) case _ if expr.dataType.isInstanceOf[StructType] => - val fields = expr.dataType.asInstanceOf[StructType].fields.indices.map { i => - normalize(GetStructField(expr, i)) + val fields = expr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map { + case (name, i) => Seq(Literal(name), normalize(GetStructField(expr, i))) } - val struct = CreateStruct(fields) + val struct = CreateNamedStruct(fields.flatten.toSeq) KnownFloatingPointNormalized(If(IsNull(expr), Literal(null, struct.dataType), struct)) case _ if expr.dataType.isInstanceOf[ArrayType] => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index e954e2bf1c46d..353444b664412 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -1043,6 +1043,14 @@ class DataFrameAggregateSuite extends QueryTest checkAnswer(sql(queryTemplate("FIRST")), Row(1)) checkAnswer(sql(queryTemplate("LAST")), Row(3)) } + + test("SPARK-32906: struct field names should not change after normalizing floats") { + val df = Seq(Tuple1(Tuple2(-0.0d, Double.NaN)), Tuple1(Tuple2(0.0d, Double.NaN))).toDF("k") + val aggs = df.distinct().queryExecution.sparkPlan.collect { case a: HashAggregateExec => a } + assert(aggs.length == 2) + assert(aggs.head.output.map(_.dataType.simpleString).head === + aggs.last.output.map(_.dataType.simpleString).head) + } } case class B(c: Option[Double]) From 8b09536cdf5c5477114cc11601c8b68c70408279 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 18 Sep 2020 07:06:38 +0000 Subject: [PATCH 0063/1009] [SPARK-27951][SQL] Support ANSI SQL NTH_VALUE window function ### What changes were proposed in this pull request? The `NTH_VALUE` function is an ANSI SQL. For examples: ``` CREATE TEMPORARY TABLE empsalary ( depname varchar, empno bigint, salary int, enroll_date date ); INSERT INTO empsalary VALUES ('develop', 10, 5200, '2007-08-01'), ('sales', 1, 5000, '2006-10-01'), ('personnel', 5, 3500, '2007-12-10'), ('sales', 4, 4800, '2007-08-08'), ('personnel', 2, 3900, '2006-12-23'), ('develop', 7, 4200, '2008-01-01'), ('develop', 9, 4500, '2008-01-01'), ('sales', 3, 4800, '2007-08-01'), ('develop', 8, 6000, '2006-10-01'), ('develop', 11, 5200, '2007-08-15'); select first_value(salary) over(order by salary range between 1000 preceding and 1000 following), lead(salary) over(order by salary range between 1000 preceding and 1000 following), nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following), salary from empsalary; first_value | lead | nth_value | salary -------------+------+-----------+-------- 3500 | 3900 | 3500 | 3500 3500 | 4200 | 3500 | 3900 3500 | 4500 | 3500 | 4200 3500 | 4800 | 3500 | 4500 3900 | 4800 | 3900 | 4800 3900 | 5000 | 3900 | 4800 4200 | 5200 | 4200 | 5000 4200 | 5200 | 4200 | 5200 4200 | 6000 | 4200 | 5200 5000 | | 5000 | 6000 (10 rows) ``` There are some mainstream database support the syntax. **PostgreSQL:** https://www.postgresql.org/docs/8.4/functions-window.html **Vertica:** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/NTH_VALUEAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_____23 **Oracle:** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/NTH_VALUE.html#GUID-F8A0E88C-67E5-4AA6-9515-95D03A7F9EA0 **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_WF_NTH.html **Presto** https://prestodb.io/docs/current/functions/window.html **MySQL** https://www.mysqltutorial.org/mysql-window-functions/mysql-nth_value-function/ ### Why are the changes needed? The `NTH_VALUE` function is an ANSI SQL. The `NTH_VALUE` function is very useful. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Exists and new UT. Closes #29604 from beliefer/support-nth_value. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/windowExpressions.scala | 76 +++++- .../analysis/AnalysisErrorSuite.scala | 22 ++ .../org/apache/spark/sql/functions.scala | 29 ++ .../sql-functions/sql-expression-schema.md | 7 +- .../inputs/postgreSQL/window_part1.sql | 4 +- .../inputs/postgreSQL/window_part2.sql | 4 +- .../inputs/postgreSQL/window_part3.sql | 3 +- .../resources/sql-tests/inputs/window.sql | 94 ++++++- .../results/postgreSQL/window_part3.sql.out | 11 +- .../sql-tests/results/window.sql.out | 251 +++++++++++++++++- .../sql/DataFrameWindowFunctionsSuite.scala | 60 +++++ 12 files changed, 549 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 7e73667e4b85f..f62c8bb0c2931 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -510,6 +510,7 @@ object FunctionRegistry { expression[Lag]("lag"), expression[RowNumber]("row_number"), expression[CumeDist]("cume_dist"), + expression[NthValue]("nth_value"), expression[NTile]("ntile"), expression[Rank]("rank"), expression[DenseRank]("dense_rank"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index c8b6433207355..07a2b6fa96c12 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -476,7 +476,7 @@ case class Lag(input: Expression, offset: Expression, default: Expression) abstract class AggregateWindowFunction extends DeclarativeAggregate with WindowFunction { self: Product => - override val frame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow) + override val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow) override def dataType: DataType = IntegerType override def nullable: Boolean = true override lazy val mergeExpressions = @@ -549,6 +549,80 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def prettyName: String = "cume_dist" } +@ExpressionDescription( + usage = """ + _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row + from beginning of the window frame. Offset starts at 1. If ignoreNulls=true, we will skip + nulls when finding the `offset`th row. Otherwise, every row counts for the `offset`. If + there is no such an `offset`th row (e.g., when the offset is 10, size of the window frame + is less than 10), null is returned. + """, + arguments = """ + Arguments: + * input - the target column or expression that the function operates on. + * offset - a positive int literal to indicate the offset in the window frame. It starts + with 1. + * ignoreNulls - an optional specification that indicates the NthValue should skip null + values in the determination of which row to use. + """, + since = "3.1.0", + group = "window_funcs") +case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean) + extends AggregateWindowFunction with ImplicitCastInputTypes { + + def this(child: Expression, offset: Expression) = this(child, offset, false) + + override def children: Seq[Expression] = input :: offsetExpr :: Nil + + override val frame: WindowFrame = UnspecifiedFrame + + override def dataType: DataType = input.dataType + + override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegerType) + + override def checkInputDataTypes(): TypeCheckResult = { + val check = super.checkInputDataTypes() + if (check.isFailure) { + check + } else if (!offsetExpr.foldable) { + TypeCheckFailure(s"Offset expression '$offsetExpr' must be a literal.") + } else if (offset <= 0) { + TypeCheckFailure( + s"The 'offset' argument of nth_value must be greater than zero but it is $offset.") + } else { + TypeCheckSuccess + } + } + + private lazy val offset = offsetExpr.eval().asInstanceOf[Int].toLong + private lazy val result = AttributeReference("result", input.dataType)() + private lazy val count = AttributeReference("count", LongType)() + override lazy val aggBufferAttributes: Seq[AttributeReference] = result :: count :: Nil + + override lazy val initialValues: Seq[Literal] = Seq( + /* result = */ Literal.create(null, input.dataType), + /* count = */ Literal(1L) + ) + + override lazy val updateExpressions: Seq[Expression] = { + if (ignoreNulls) { + Seq( + /* result = */ If(count === offset && input.isNotNull, input, result), + /* count = */ If(input.isNull, count, count + 1L) + ) + } else { + Seq( + /* result = */ If(count === offset, input, result), + /* count = */ count + 1L + ) + } + } + + override lazy val evaluateExpression: AttributeReference = result + + override def toString: String = s"$prettyName($input, $offset)${if (ignoreNulls) " ignore nulls"}" +} + /** * The NTile function divides the rows for each window partition into `n` buckets ranging from 1 to * at most `n`. Bucket values will differ by at most 1. If the number of rows in the partition does diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index a99f7e2be6e7e..d3a14e511cdc2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -233,6 +233,28 @@ class AnalysisErrorSuite extends AnalysisTest { SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")), "window frame" :: "must match the required frame" :: Nil) + errorTest( + "the offset of nth_value window function is negative or zero", + testRelation2.select( + WindowExpression( + new NthValue(AttributeReference("b", IntegerType)(), Literal(0)), + WindowSpecDefinition( + UnresolvedAttribute("a") :: Nil, + SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, + SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")), + "The 'offset' argument of nth_value must be greater than zero but it is 0." :: Nil) + + errorTest( + "the offset of nth_value window function is not int literal", + testRelation2.select( + WindowExpression( + new NthValue(AttributeReference("b", IntegerType)(), Literal(true)), + WindowSpecDefinition( + UnresolvedAttribute("a") :: Nil, + SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, + SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")), + "argument 2 requires int type, however, 'true' is of boolean type." :: Nil) + errorTest( "too many generators", listRelation.select(Explode($"list").as("a"), Explode($"list").as("b")), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 6201492d04b0c..b20e8c241ef9d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -993,6 +993,35 @@ object functions { Lead(e.expr, Literal(offset), Literal(defaultValue)) } + /** + * Window function: returns the value that is the `offset`th row of the window frame + * (counting from 1), and `null` if the size of window frame is less than `offset` rows. + * + * It will return the `offset`th non-null value it sees when ignoreNulls is set to true. + * If all values are null, then null is returned. + * + * This is equivalent to the nth_value function in SQL. + * + * @group window_funcs + * @since 3.1.0 + */ + def nth_value(e: Column, offset: Int, ignoreNulls: Boolean): Column = withExpr { + NthValue(e.expr, Literal(offset), ignoreNulls) + } + + /** + * Window function: returns the value that is the `offset`th row of the window frame + * (counting from 1), and `null` if the size of window frame is less than `offset` rows. + * + * This is equivalent to the nth_value function in SQL. + * + * @group window_funcs + * @since 3.1.0 + */ + def nth_value(e: Column, offset: Int): Column = withExpr { + NthValue(e.expr, Literal(offset), false) + } + /** * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 855ba3f00a4e6..45f561a61df78 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,8 +1,8 @@ ## Summary - - Number of queries: 339 - - Number of expressions that missing example: 34 - - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch + - Number of queries: 340 + - Number of expressions that missing example: 35 + - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,nth_value,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions | Class name | Function name or alias | Query example | Output schema | | ---------- | ---------------------- | ------------- | ------------- | @@ -191,6 +191,7 @@ | org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct | +| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql index 6e95aca7aff62..d12bee6e47223 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql @@ -95,7 +95,7 @@ SELECT last(ten) OVER (PARTITION BY four), ten, four FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s ORDER BY four, ten; --- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- [SPARK-30707] Lead/Lag window function throws AnalysisException without ORDER BY clause -- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four -- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s; @@ -301,7 +301,7 @@ FROM tenk1 WHERE unique1 < 10; -- unique1, four -- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four); --- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- [SPARK-30707] Lead/Lag window function throws AnalysisException without ORDER BY clause -- SELECT first_value(unique1) over w, -- nth_value(unique1, 2) over w AS nth_2, -- last_value(unique1) over w, unique1, four diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql index ba1acc9f56b4a..50c0bc3410312 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql @@ -105,7 +105,7 @@ FROM tenk1 WHERE unique1 < 10; -- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following -- exclude ties), salary, enroll_date from empsalary; --- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- [SPARK-28310] ANSI SQL grammar support: first_value/last_value(expression, [RESPECT NULLS | IGNORE NULLS]) -- select first_value(salary) over(order by salary range between 1000 preceding and 1000 following), -- lead(salary) over(order by salary range between 1000 preceding and 1000 following), -- nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following), @@ -116,7 +116,7 @@ FROM tenk1 WHERE unique1 < 10; -- lag(salary) over(order by salary range between 1000 preceding and 1000 following), -- salary from empsalary; --- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- [SPARK-28310] ANSI SQL grammar support: first_value/last_value(expression, [RESPECT NULLS | IGNORE NULLS]) -- select first_value(salary) over(order by salary range between 1000 following and 3000 following -- exclude current row), -- lead(salary) over(order by salary range between 1000 following and 3000 following exclude ties), diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql index f4b8454da0d82..6f33a07631f7a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql @@ -399,8 +399,7 @@ SELECT range(1, 100) OVER () FROM empsalary; SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; --- [SPARK-27951] ANSI SQL: NTH_VALUE function --- SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; +SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; -- filter diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 72d812d6a4e49..5de6db210ce36 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -16,6 +16,26 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null) AS testData(val, val_long, val_double, val_date, val_timestamp, cate); +CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES +('Diane Murphy','Accounting',8435), +('Mary Patterson','Accounting',9998), +('Jeff Firrelli','Accounting',8992), +('William Patterson','Accounting',8870), +('Gerard Bondur','Accounting',11472), +('Anthony Bow','Accounting',6627), +('Leslie Jennings','IT',8113), +('Leslie Thompson','IT',5186), +('Julie Firrelli','Sales',9181), +('Steve Patterson','Sales',9441), +('Foon Yue Tseng','Sales',6660), +('George Vanauf','Sales',10563), +('Loui Bondur','SCM',10449), +('Gerard Hernandez','SCM',6949), +('Pamela Castillo','SCM',11303), +('Larry Bott','SCM',11798), +('Barry Jones','SCM',10586) +AS basic_pays(employee_name, department, salary); + -- RowsBetween SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData ORDER BY cate, val; @@ -124,4 +144,76 @@ WINDOW w AS (PARTITION BY cate ORDER BY val); -- with filter predicate SELECT val, cate, count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate) -FROM testData ORDER BY cate, val; \ No newline at end of file +FROM testData ORDER BY cate, val; + +-- nth_value() over () +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary + RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + department, + salary, + NTH_VALUE(employee_name, 2) OVER ( + PARTITION BY department + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) second_highest_salary +FROM + basic_pays +ORDER BY department; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 08eba6797b01d..b63b5601715a8 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 29 +-- Number of queries: 30 -- !query @@ -385,6 +385,15 @@ org.apache.spark.sql.AnalysisException cannot resolve 'ntile(0)' due to data type mismatch: Buckets expression must be positive, but got: 0; line 1 pos 7 +-- !query +SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'nthvalue(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7 + + -- !query DROP TABLE empsalary -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index ede044a44fdaa..a8875fd449bad 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 24 +-- Number of queries: 29 -- !query @@ -19,6 +19,30 @@ struct<> -- !query output +-- !query +CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES +('Diane Murphy','Accounting',8435), +('Mary Patterson','Accounting',9998), +('Jeff Firrelli','Accounting',8992), +('William Patterson','Accounting',8870), +('Gerard Bondur','Accounting',11472), +('Anthony Bow','Accounting',6627), +('Leslie Jennings','IT',8113), +('Leslie Thompson','IT',5186), +('Julie Firrelli','Sales',9181), +('Steve Patterson','Sales',9441), +('Foon Yue Tseng','Sales',6660), +('George Vanauf','Sales',10563), +('Loui Bondur','SCM',10449), +('Gerard Hernandez','SCM',6949), +('Pamela Castillo','SCM',11303), +('Larry Bott','SCM',11798), +('Barry Jones','SCM',10586) +AS basic_pays(employee_name, department, salary) +-- !query schema +struct<> +-- !query output + -- !query SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData @@ -391,3 +415,228 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException window aggregate function with filter predicate is not supported yet.; + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 NULL +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 NULL +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary + RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary +-- !query schema +struct +-- !query output +Leslie Thompson 5186 NULL +Anthony Bow 6627 Anthony Bow +Foon Yue Tseng 6660 Anthony Bow +Gerard Hernandez 6949 Anthony Bow +Leslie Jennings 8113 Foon Yue Tseng +Diane Murphy 8435 Foon Yue Tseng +William Patterson 8870 Leslie Jennings +Jeff Firrelli 8992 Diane Murphy +Julie Firrelli 9181 Diane Murphy +Steve Patterson 9441 Diane Murphy +Mary Patterson 9998 Diane Murphy +Loui Bondur 10449 Jeff Firrelli +George Vanauf 10563 Jeff Firrelli +Barry Jones 10586 Jeff Firrelli +Pamela Castillo 11303 Mary Patterson +Gerard Bondur 11472 Loui Bondur +Larry Bott 11798 Loui Bondur + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 Gerard Bondur +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Pamela Castillo +George Vanauf 10563 Barry Jones +Loui Bondur 10449 George Vanauf +Mary Patterson 9998 Loui Bondur +Steve Patterson 9441 Mary Patterson +Julie Firrelli 9181 Steve Patterson +Jeff Firrelli 8992 Julie Firrelli +William Patterson 8870 Jeff Firrelli +Diane Murphy 8435 William Patterson +Leslie Jennings 8113 Diane Murphy +Gerard Hernandez 6949 Leslie Jennings +Foon Yue Tseng 6660 Gerard Hernandez +Anthony Bow 6627 Foon Yue Tseng +Leslie Thompson 5186 Anthony Bow + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 Gerard Bondur +Gerard Bondur 11472 Pamela Castillo +Pamela Castillo 11303 Barry Jones +Barry Jones 10586 George Vanauf +George Vanauf 10563 Loui Bondur +Loui Bondur 10449 Mary Patterson +Mary Patterson 9998 Steve Patterson +Steve Patterson 9441 Julie Firrelli +Julie Firrelli 9181 Jeff Firrelli +Jeff Firrelli 8992 William Patterson +William Patterson 8870 Diane Murphy +Diane Murphy 8435 Leslie Jennings +Leslie Jennings 8113 Gerard Hernandez +Gerard Hernandez 6949 Foon Yue Tseng +Foon Yue Tseng 6660 Anthony Bow +Anthony Bow 6627 Leslie Thompson +Leslie Thompson 5186 NULL + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 Gerard Bondur +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + +-- !query +SELECT + employee_name, + department, + salary, + NTH_VALUE(employee_name, 2) OVER ( + PARTITION BY department + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) second_highest_salary +FROM + basic_pays +ORDER BY department +-- !query schema +struct +-- !query output +Gerard Bondur Accounting 11472 Mary Patterson +Mary Patterson Accounting 9998 Mary Patterson +Jeff Firrelli Accounting 8992 Mary Patterson +William Patterson Accounting 8870 Mary Patterson +Diane Murphy Accounting 8435 Mary Patterson +Anthony Bow Accounting 6627 Mary Patterson +Leslie Jennings IT 8113 Leslie Thompson +Leslie Thompson IT 5186 Leslie Thompson +Larry Bott SCM 11798 Pamela Castillo +Pamela Castillo SCM 11303 Pamela Castillo +Barry Jones SCM 10586 Pamela Castillo +Loui Bondur SCM 10449 Pamela Castillo +Gerard Hernandez SCM 6949 Pamela Castillo +George Vanauf Sales 10563 Steve Patterson +Steve Patterson Sales 9441 Steve Patterson +Julie Firrelli Sales 9181 Steve Patterson +Foon Yue Tseng Sales 6660 Steve Patterson \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index bc6adfb857b02..c5dcdc44cc64f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -541,6 +541,66 @@ class DataFrameWindowFunctionsSuite extends QueryTest Row("b", 3, null, null, null, null, null, null))) } + test("nth_value with ignoreNulls") { + val nullStr: String = null + val df = Seq( + ("a", 0, nullStr), + ("a", 1, "x"), + ("a", 2, "y"), + ("a", 3, "z"), + ("a", 4, nullStr), + ("b", 1, nullStr), + ("b", 2, nullStr)). + toDF("key", "order", "value") + val window = Window.partitionBy($"key").orderBy($"order") + checkAnswer( + df.select( + $"key", + $"order", + nth_value($"value", 2).over(window), + nth_value($"value", 2, ignoreNulls = false).over(window), + nth_value($"value", 2, ignoreNulls = true).over(window)), + Seq( + Row("a", 0, null, null, null), + Row("a", 1, "x", "x", null), + Row("a", 2, "x", "x", "y"), + Row("a", 3, "x", "x", "y"), + Row("a", 4, "x", "x", "y"), + Row("b", 1, null, null, null), + Row("b", 2, null, null, null))) + } + + test("nth_value on descending ordered window") { + val nullStr: String = null + val df = Seq( + ("a", 0, nullStr), + ("a", 1, "x"), + ("a", 2, "y"), + ("a", 3, "z"), + ("a", 4, "v"), + ("b", 1, "k"), + ("b", 2, "l"), + ("b", 3, nullStr)). + toDF("key", "order", "value") + val window = Window.partitionBy($"key").orderBy($"order".desc) + checkAnswer( + df.select( + $"key", + $"order", + nth_value($"value", 2).over(window), + nth_value($"value", 2, ignoreNulls = false).over(window), + nth_value($"value", 2, ignoreNulls = true).over(window)), + Seq( + Row("a", 0, "z", "z", "z"), + Row("a", 1, "z", "z", "z"), + Row("a", 2, "z", "z", "z"), + Row("a", 3, "z", "z", "z"), + Row("a", 4, null, null, null), + Row("b", 1, "l", "l", "k"), + Row("b", 2, "l", "l", null), + Row("b", 3, null, null, null))) + } + test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") { val src = Seq((0, 3, 5)).toDF("a", "b", "c") .withColumn("Data", struct("a", "b")) From 9e9d4b6994a29fb139fd50d24b5418a900c7f072 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 18 Sep 2020 07:41:21 +0000 Subject: [PATCH 0064/1009] [SPARK-32905][CORE][YARN] ApplicationMaster fails to receive UpdateDelegationTokens message ### What changes were proposed in this pull request? With a long-running application in kerberized mode, the AMEndpiont handles `UpdateDelegationTokens` message wrong, which is an OneWayMessage that should be handled in the `receive` function. ```java 20-09-15 18:53:01 INFO yarn.YarnAllocator: Received 22 containers from YARN, launching executors on 0 of them. 20-09-16 12:52:28 ERROR netty.Inbox: Ignoring error org.apache.spark.SparkException: NettyRpcEndpointRef(spark-client://YarnAM) does not implement 'receive' at org.apache.spark.rpc.RpcEndpoint$$anonfun$receive$1.applyOrElse(RpcEndpoint.scala:70) at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115) at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:203) at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100) at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75) at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 20-09-17 06:52:28 ERROR netty.Inbox: Ignoring error org.apache.spark.SparkException: NettyRpcEndpointRef(spark-client://YarnAM) does not implement 'receive' at org.apache.spark.rpc.RpcEndpoint$$anonfun$receive$1.applyOrElse(RpcEndpoint.scala:70) at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115) at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:203) at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100) at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75) at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` ### Why are the changes needed? bugfix, without a proper token refresher, the long-running apps are going to fail potentially in kerberized cluster ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Passing jenkins and verify manually I am running the sub-module `kyuubi-spark-sql-engine` of https://github.com/yaooqinn/kyuubi The simplest way to reproduce the bug and verify this fix is to follow these steps #### 1 build the `kyuubi-spark-sql-engine` module ``` mvn clean package -pl :kyuubi-spark-sql-engine ``` #### 2. config the spark with Kerberos settings towards your secured cluster #### 3. start it in the background ``` nohup bin/spark-submit --class org.apache.kyuubi.engine.spark.SparkSQLEngine ../kyuubi-spark-sql-engine-1.0.0-SNAPSHOT.jar > kyuubi.log & ``` #### 4. check the AM log and see "Updating delegation tokens ..." for SUCCESS "Inbox: Ignoring error ...... does not implement 'receive'" for FAILURE Closes #29777 from yaooqinn/SPARK-32905. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 5ca624a8d66cb..5f632fbb259ff 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -779,6 +779,11 @@ private[spark] class ApplicationMaster( driver.send(RegisterClusterManager(self)) } + override def receive: PartialFunction[Any, Unit] = { + case UpdateDelegationTokens(tokens) => + SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf) + } + override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case r: RequestExecutors => Option(allocator) match { @@ -813,9 +818,6 @@ private[spark] class ApplicationMaster( case None => logWarning("Container allocator is not ready to find executor loss reasons yet.") } - - case UpdateDelegationTokens(tokens) => - SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf) } override def onDisconnected(remoteAddress: RpcAddress): Unit = { From 78928879810a2e96dbb6ec4608b548a0072a040f Mon Sep 17 00:00:00 2001 From: William Hyun Date: Fri, 18 Sep 2020 18:13:11 +0900 Subject: [PATCH 0065/1009] [SPARK-32930][CORE] Replace deprecated isFile/isDirectory methods ### What changes were proposed in this pull request? This PR aims to replace deprecated `isFile` and `isDirectory` methods. ```diff - fs.isDirectory(hadoopPath) + fs.getFileStatus(hadoopPath).isDirectory ``` ```diff - fs.isFile(new Path(inProgressLog)) + fs.getFileStatus(new Path(inProgressLog)).isFile ``` ### Why are the changes needed? It shows deprecation warnings. - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-3.2-hive-2.3/1244/consoleFull ``` [warn] /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2-hive-2.3/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala:815: method isFile in class FileSystem is deprecated: see corresponding Javadoc for more information. [warn] if (!fs.isFile(new Path(inProgressLog))) { ``` ``` [warn] /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2-hive-2.3/core/src/main/scala/org/apache/spark/SparkContext.scala:1884: method isDirectory in class FileSystem is deprecated: see corresponding Javadoc for more information. [warn] if (fs.isDirectory(hadoopPath)) { ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the Jenkins. Closes #29796 from williamhyun/filesystem. Authored-by: William Hyun Signed-off-by: HyukjinKwon --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- .../spark/deploy/history/EventLogFileWritersSuite.scala | 6 +++--- .../apache/spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- .../scala/org/apache/spark/streaming/util/HdfsUtils.scala | 8 ++++++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 85a24acb97c07..409e3065492b0 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1881,7 +1881,7 @@ class SparkContext(config: SparkConf) extends Logging { if (!fs.exists(hadoopPath)) { throw new FileNotFoundException(s"Jar ${path} not found") } - if (fs.isDirectory(hadoopPath)) { + if (fs.getFileStatus(hadoopPath).isDirectory) { throw new IllegalArgumentException( s"Directory ${path} is not allowed for addJar") } diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala index 060b878fb8ef2..e9b739ce7a4c6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala @@ -213,7 +213,7 @@ class SingleEventLogFileWriterSuite extends EventLogFileWritersSuite { compressionCodecShortName) val finalLogPath = new Path(logPath) - assert(fileSystem.exists(finalLogPath) && fileSystem.isFile(finalLogPath)) + assert(fileSystem.exists(finalLogPath) && fileSystem.getFileStatus(finalLogPath).isFile) assert(expectedLines === readLinesFromEventLogFile(finalLogPath, fileSystem)) } } @@ -357,10 +357,10 @@ class RollingEventLogFilesWriterSuite extends EventLogFileWritersSuite { expectedLines: Seq[String]): Unit = { val logDirPath = getAppEventLogDirPath(logBaseDir, appId, appAttemptId) - assert(fileSystem.exists(logDirPath) && fileSystem.isDirectory(logDirPath)) + assert(fileSystem.exists(logDirPath) && fileSystem.getFileStatus(logDirPath).isDirectory) val appStatusFile = getAppStatusFilePath(logDirPath, appId, appAttemptId, inProgress = false) - assert(fileSystem.exists(appStatusFile) && fileSystem.isFile(appStatusFile)) + assert(fileSystem.exists(appStatusFile) && fileSystem.getFileStatus(appStatusFile).isFile) val eventLogFiles = listEventLogFiles(logDirPath) val allLines = mutable.ArrayBuffer[String]() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 62b6c6c201c68..44c551cf4a4c1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1192,7 +1192,7 @@ class HiveDDLSuite expectedDBUri, Map.empty)) // the database directory was created - assert(fs.exists(dbPath) && fs.isDirectory(dbPath)) + assert(fs.exists(dbPath) && fs.getFileStatus(dbPath).isDirectory) sql(s"USE $dbName") val tabName = "tab1" diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala index 146577214de17..006bcad5d68c2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala @@ -58,7 +58,7 @@ private[streaming] object HdfsUtils { // If we are really unlucky, the file may be deleted as we're opening the stream. // This can happen as clean up is performed by daemon threads that may be left over from // previous runs. - if (!dfs.isFile(dfsPath)) null else throw e + if (!dfs.getFileStatus(dfsPath).isFile) null else throw e } } @@ -92,6 +92,10 @@ private[streaming] object HdfsUtils { def checkFileExists(path: String, conf: Configuration): Boolean = { val hdpPath = new Path(path) val fs = getFileSystemForPath(hdpPath, conf) - fs.isFile(hdpPath) + try { + fs.getFileStatus(hdpPath).isFile + } catch { + case _: FileNotFoundException => false + } } } From 105225ddbc4574a8b79e4a483124a6f998a03bc1 Mon Sep 17 00:00:00 2001 From: Tom van Bussel Date: Fri, 18 Sep 2020 11:49:26 +0000 Subject: [PATCH 0066/1009] [SPARK-32911][CORE] Free memory in UnsafeExternalSorter.SpillableIterator.spill() when all records have been read ### What changes were proposed in this pull request? This PR changes `UnsafeExternalSorter.SpillableIterator` to free its memory (except for the page holding the last record) if it is forced to spill after all of its records have been read. It also makes sure that `lastPage` is freed if `loadNext` is never called the again. The latter was necessary to get my test case to succeed (otherwise it would complain about a leak). ### Why are the changes needed? No memory is freed after calling `UnsafeExternalSorter.SpillableIterator.spill()` when all records have been read, even though it is still holding onto some memory. This may cause a `SparkOutOfMemoryError` to be thrown, even though we could have just freed the memory instead. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A test was added to `UnsafeExternalSorterSuite`. Closes #29787 from tomvanbussel/SPARK-32911. Authored-by: Tom van Bussel Signed-off-by: Wenchen Fan --- .../unsafe/sort/UnsafeExternalSorter.java | 31 +++++++++++++------ .../sort/UnsafeExternalSorterSuite.java | 30 ++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index 71b9a5bc11542..e4a882d609fc2 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -503,7 +503,7 @@ class SpillableIterator extends UnsafeSorterIterator { private UnsafeSorterIterator upstream; private MemoryBlock lastPage = null; private boolean loaded = false; - private int numRecords = 0; + private int numRecords; private Object currentBaseObject; private long currentBaseOffset; @@ -527,19 +527,25 @@ public long getCurrentPageNumber() { public long spill() throws IOException { synchronized (this) { - if (inMemSorter == null || numRecords <= 0) { + if (inMemSorter == null) { return 0L; } long currentPageNumber = upstream.getCurrentPageNumber(); ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics(); - // Iterate over the records that have not been returned and spill them. - final UnsafeSorterSpillWriter spillWriter = - new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics, numRecords); - spillIterator(upstream, spillWriter); - spillWriters.add(spillWriter); - upstream = spillWriter.getReader(serializerManager); + if (numRecords > 0) { + // Iterate over the records that have not been returned and spill them. + final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter( + blockManager, fileBufferSizeBytes, writeMetrics, numRecords); + spillIterator(upstream, spillWriter); + spillWriters.add(spillWriter); + upstream = spillWriter.getReader(serializerManager); + } else { + // Nothing to spill as all records have been read already, but do not return yet, as the + // memory still has to be freed. + upstream = null; + } long released = 0L; synchronized (UnsafeExternalSorter.this) { @@ -555,6 +561,11 @@ public long spill() throws IOException { } } allocatedPages.clear(); + if (lastPage != null) { + // Add the last page back to the list of allocated pages to make sure it gets freed in + // case loadNext() never gets called again. + allocatedPages.add(lastPage); + } } // in-memory sorter will not be used after spilling @@ -577,11 +588,12 @@ public boolean hasNext() { @Override public void loadNext() throws IOException { + assert upstream != null; MemoryBlock pageToFree = null; try { synchronized (this) { loaded = true; - // Just consumed the last record from in memory iterator + // Just consumed the last record from the in-memory iterator. if (lastPage != null) { // Do not free the page here, while we are locking `SpillableIterator`. The `freePage` // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in @@ -589,6 +601,7 @@ public void loadNext() throws IOException { // `SpillableIterator` in sequence, which may happen in // `TaskMemoryManager.acquireExecutionMemory`. pageToFree = lastPage; + allocatedPages.clear(); lastPage = null; } numRecords--; diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java index 087d090c1c60e..a1b66ccfaef03 100644 --- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java +++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java @@ -392,6 +392,36 @@ public void forcedSpillingNullsWithReadIterator() throws Exception { assertSpillFilesWereCleanedUp(); } + @Test + public void forcedSpillingWithFullyReadIterator() throws Exception { + final UnsafeExternalSorter sorter = newSorter(); + long[] record = new long[100]; + final int recordSize = record.length * 8; + final int n = (int) pageSizeBytes / recordSize * 3; + for (int i = 0; i < n; i++) { + record[0] = i; + sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); + } + assertTrue(sorter.getNumberOfAllocatedPages() >= 2); + + UnsafeExternalSorter.SpillableIterator iter = + (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); + for (int i = 0; i < n; i++) { + assertTrue(iter.hasNext()); + iter.loadNext(); + assertEquals(i, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset())); + } + assertFalse(iter.hasNext()); + + assertTrue(iter.spill() > 0); + assertEquals(0, iter.spill()); + assertEquals(n - 1, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset())); + assertFalse(iter.hasNext()); + + sorter.cleanupResources(); + assertSpillFilesWereCleanedUp(); + } + @Test public void forcedSpillingWithNotReadIterator() throws Exception { final UnsafeExternalSorter sorter = newSorter(); From e2a740147c04a15e4f94c20c6039ed4f6888e0ed Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 18 Sep 2020 11:55:27 +0000 Subject: [PATCH 0067/1009] [SPARK-32874][SQL][FOLLOWUP][TEST-HIVE1.2][TEST-HADOOP2.7] Fix spark-master-test-sbt-hadoop-2.7-hive-1.2 ### What changes were proposed in this pull request? Found via discussion https://github.com/apache/spark/pull/29746#issuecomment-694726504 and the root cause it that hive-1.2 does not recognize NULL ```scala sbt.ForkMain$ForkError: java.sql.SQLException: Unrecognized column type: NULL at org.apache.hive.jdbc.JdbcColumn.typeStringToHiveType(JdbcColumn.java:160) at org.apache.hive.jdbc.HiveResultSetMetaData.getHiveType(HiveResultSetMetaData.java:48) at org.apache.hive.jdbc.HiveResultSetMetaData.getPrecision(HiveResultSetMetaData.java:86) at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$35(SparkThriftServerProtocolVersionsSuite.scala:358) at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$35$adapted(SparkThriftServerProtocolVersionsSuite.scala:351) at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.testExecuteStatementWithProtocolVersion(SparkThriftServerProtocolVersionsSuite.scala:66) at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$34(SparkThriftServerProtocolVersionsSuite.scala:351) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:176) at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187) at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199) at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:61) at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:61) at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232) at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231) at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562) at org.scalatest.Suite.run(Suite.scala:1112) at org.scalatest.Suite.run$(Suite.scala:1094) at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562) at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236) at org.scalatest.SuperEngine.runImpl(Engine.scala:535) at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236) at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:61) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61) at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318) at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513) at sbt.ForkMain$Run$2.call(ForkMain.java:296) at sbt.ForkMain$Run$2.call(ForkMain.java:286) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` In this PR, we simply ignore these checks for hive 1.2 ### Why are the changes needed? fix jenkins ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? test itself. Closes #29803 from yaooqinn/SPARK-32874-F. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../SparkThriftServerProtocolVersionsSuite.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index 69486eeb031b1..fa001b11253f5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -27,6 +27,7 @@ import org.apache.thrift.protocol.TBinaryProtocol import org.apache.thrift.transport.TSocket import org.apache.spark.sql.catalyst.util.NumberConverter +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.unsafe.types.UTF8String class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { @@ -355,8 +356,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { assert(metaData.getColumnName(1) === "NULL") assert(metaData.getColumnTypeName(1) === "void") assert(metaData.getColumnType(1) === java.sql.Types.NULL) - assert(metaData.getPrecision(1) === 0) - assert(metaData.getScale(1) === 0) + if (HiveUtils.isHive23) { + // For Hive 1.2 the o.a.h.j.JdbcColumn.typeStringToHiveType can not recognize `null` as + // type name. + assert(metaData.getPrecision(1) === 0) + assert(metaData.getScale(1) === 0) + } } } From 664a1719de2855d913c3bb1d2a94bd8681bc1a0d Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 18 Sep 2020 22:24:33 +0900 Subject: [PATCH 0068/1009] [SPARK-32936][SQL] Pass all `external/avro` module UTs in Scala 2.13 ### What changes were proposed in this pull request? This pr fix all 14 failed cases in `external/avro` module in Scala 2.13, the main change of this pr as follow: - Manual call `toSeq` in `AvroDeserializer#newWriter` and `SchemaConverters#toSqlTypeHelper` method because the object type for case match is `ArrayBuffer` not `Seq` in Scala 2.13 - Specified `Seq` to `s.c.Seq` when we call `Row.get(i).asInstanceOf[Seq]` because the data maybe `mutable.ArraySeq` but `Seq` is `immutable.Seq` in Scala 2.13 ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: Pass 2.13 Build GitHub Action and do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl external/avro -Pscala-2.13 -am mvn clean test -pl external/avro -Pscala-2.13 ``` **Before** ``` Tests: succeeded 197, failed 14, canceled 0, ignored 2, pending 0 *** 14 TESTS FAILED *** ``` **After** ``` Tests: succeeded 211, failed 0, canceled 0, ignored 2, pending 0 All tests passed. ``` Closes #29801 from LuciferYang/fix-external-avro-213. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/sql/avro/AvroDeserializer.scala | 2 +- .../scala/org/apache/spark/sql/avro/SchemaConverters.scala | 2 +- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala index 360a7fcff4363..aabf9d92ce7d8 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala @@ -256,7 +256,7 @@ private[sql] class AvroDeserializer( if (nonNullTypes.length == 1) { newWriter(nonNullTypes.head, catalystType, path) } else { - nonNullTypes.map(_.getType) match { + nonNullTypes.map(_.getType).toSeq match { case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType => (updater, ordinal, value) => value match { case null => updater.setNullAt(ordinal) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala index 27d5871070608..905f90fa79373 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala @@ -118,7 +118,7 @@ object SchemaConverters { toSqlTypeHelper(Schema.createUnion(remainingUnionTypes.asJava), existingRecordNames) .copy(nullable = true) } - } else avroSchema.getTypes.asScala.map(_.getType) match { + } else avroSchema.getTypes.asScala.map(_.getType).toSeq match { case Seq(t1) => toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames) case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) => diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 8a8a7681abd1c..b995a667be2b1 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -543,7 +543,8 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa val array_of_boolean = spark.read.format("avro").load(testAvro).select("array_of_boolean").collect() - assert(array_of_boolean.map(_(0).asInstanceOf[Seq[Boolean]].size).toSet == Set(3, 1, 0)) + assert(array_of_boolean.map(_(0).asInstanceOf[scala.collection.Seq[Boolean]].size).toSet == + Set(3, 1, 0)) val bytes = spark.read.format("avro").load(testAvro).select("bytes").collect() assert(bytes.map(_(0).asInstanceOf[Array[Byte]].length).toSet == Set(3, 1, 0)) From 2128c4f14b498e3bc98e79f0dd42d9023e718112 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 18 Sep 2020 10:38:30 -0500 Subject: [PATCH 0069/1009] [SPARK-32808][SQL] Pass all test of sql/core module in Scala 2.13 ### What changes were proposed in this pull request? After https://github.com/apache/spark/pull/29660 and https://github.com/apache/spark/pull/29689 there are 13 remaining failed cases of sql core module with Scala 2.13. The reason for the remaining failed cases is the optimization result of `CostBasedJoinReorder` maybe different with same input in Scala 2.12 and Scala 2.13 if there are more than one same cost candidate plans. In this pr give a way to make the optimization result deterministic as much as possible to pass all remaining failed cases of `sql/core` module in Scala 2.13, the main change of this pr as follow: - Change to use `LinkedHashMap` instead of `Map` to store `foundPlans` in `JoinReorderDP.search` method to ensure same iteration order with same insert order because iteration order of `Map` behave differently under Scala 2.12 and 2.13 - Fixed `StarJoinCostBasedReorderSuite` affected by the above change - Regenerate golden files affected by the above change. ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: All tests passed. Do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl sql/core -Pscala-2.13 -am mvn test -pl sql/core -Pscala-2.13 ``` **Before** ``` Tests: succeeded 8485, failed 13, canceled 1, ignored 52, pending 0 *** 13 TESTS FAILED *** ``` **After** ``` Tests: succeeded 8498, failed 0, canceled 1, ignored 52, pending 0 All tests passed. ``` Closes #29711 from LuciferYang/SPARK-32808-3. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../optimizer/CostBasedJoinReorder.scala | 19 +- .../StarJoinCostBasedReorderSuite.scala | 2 +- .../q27.sf100/explain.txt | 210 +++++------ .../q27.sf100/simplified.txt | 22 +- .../q7.sf100/explain.txt | 108 +++--- .../q7.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q13.sf100/explain.txt | 112 +++--- .../q13.sf100/simplified.txt | 12 +- .../approved-plans-v1_4/q17.sf100/explain.txt | 120 +++---- .../q17.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q19.sf100/explain.txt | 204 +++++------ .../q19.sf100/simplified.txt | 36 +- .../q24a.sf100/explain.txt | 94 ++--- .../q24a.sf100/simplified.txt | 18 +- .../q24b.sf100/explain.txt | 94 ++--- .../q24b.sf100/simplified.txt | 18 +- .../approved-plans-v1_4/q25.sf100/explain.txt | 120 +++---- .../q25.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q29.sf100/explain.txt | 118 +++---- .../q29.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q31.sf100/explain.txt | 40 +-- .../q31.sf100/simplified.txt | 12 +- .../approved-plans-v1_4/q45.sf100/explain.txt | 102 +++--- .../q45.sf100/simplified.txt | 20 +- .../approved-plans-v1_4/q50.sf100/explain.txt | 104 +++--- .../q50.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q6.sf100/explain.txt | 224 ++++++------ .../q6.sf100/simplified.txt | 74 ++-- .../approved-plans-v1_4/q61.sf100/explain.txt | 127 +++---- .../q61.sf100/simplified.txt | 17 +- .../approved-plans-v1_4/q62.sf100/explain.txt | 108 +++--- .../q62.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q66.sf100/explain.txt | 136 +++---- .../q66.sf100/simplified.txt | 16 +- .../approved-plans-v1_4/q72.sf100/explain.txt | 334 +++++++++--------- .../q72.sf100/simplified.txt | 34 +- .../approved-plans-v1_4/q80.sf100/explain.txt | 98 ++--- .../q80.sf100/simplified.txt | 38 +- .../approved-plans-v1_4/q84.sf100/explain.txt | 86 ++--- .../q84.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q85.sf100/explain.txt | 304 ++++++++-------- .../q85.sf100/simplified.txt | 44 +-- .../approved-plans-v1_4/q91.sf100/explain.txt | 202 +++++------ .../q91.sf100/simplified.txt | 26 +- .../approved-plans-v1_4/q99.sf100/explain.txt | 108 +++--- .../q99.sf100/simplified.txt | 10 +- .../approved-plans-v2_7/q6.sf100/explain.txt | 224 ++++++------ .../q6.sf100/simplified.txt | 74 ++-- .../approved-plans-v2_7/q72.sf100/explain.txt | 334 +++++++++--------- .../q72.sf100/simplified.txt | 34 +- .../q80a.sf100/explain.txt | 98 ++--- .../q80a.sf100/simplified.txt | 38 +- 52 files changed, 2204 insertions(+), 2239 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala index a64e8bcd68175..8b019f35263f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala @@ -150,9 +150,16 @@ object JoinReorderDP extends PredicateHelper with Logging { // Level i maintains all found plans for i + 1 items. // Create the initial plans: each plan is a single item with zero cost. val itemIndex = items.zipWithIndex - val foundPlans = mutable.Buffer[JoinPlanMap](itemIndex.map { - case (item, id) => Set(id) -> JoinPlan(Set(id), item, ExpressionSet(), Cost(0, 0)) - }.toMap) + val foundPlans = mutable.Buffer[JoinPlanMap]({ + // SPARK-32687: Change to use `LinkedHashMap` to make sure that items are + // inserted and iterated in the same order. + val joinPlanMap = new JoinPlanMap + itemIndex.foreach { + case (item, id) => + joinPlanMap.put(Set(id), JoinPlan(Set(id), item, ExpressionSet(), Cost(0, 0))) + } + joinPlanMap + }) // Build filters from the join graph to be used by the search algorithm. val filters = JoinReorderDPFilters.buildJoinGraphInfo(conf, items, conditions, itemIndex) @@ -198,7 +205,7 @@ object JoinReorderDP extends PredicateHelper with Logging { topOutput: AttributeSet, filters: Option[JoinGraphInfo]): JoinPlanMap = { - val nextLevel = mutable.Map.empty[Set[Int], JoinPlan] + val nextLevel = new JoinPlanMap var k = 0 val lev = existingLevels.length - 1 // Build plans for the next level from plans at level k (one side of the join) and level @@ -231,7 +238,7 @@ object JoinReorderDP extends PredicateHelper with Logging { } k += 1 } - nextLevel.toMap + nextLevel } /** @@ -316,7 +323,7 @@ object JoinReorderDP extends PredicateHelper with Logging { } /** Map[set of item ids, join plan for these items] */ - type JoinPlanMap = Map[Set[Int], JoinPlan] + type JoinPlanMap = mutable.LinkedHashMap[Set[Int], JoinPlan] /** * Partial join order in a specific level. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala index d9cf629b47c18..703be48c6a2a9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala @@ -345,8 +345,8 @@ class StarJoinCostBasedReorderSuite extends JoinReorderPlanTestBase with StatsEs val expected = f1.join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk"))) - .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk"))) .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk"))) + .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk"))) .join(t4.join(t3, Inner, Some(nameToAttr("t3_c2") === nameToAttr("t4_c2"))), Inner, Some(nameToAttr("d1_c2") === nameToAttr("t3_c1"))) .join(t2.join(t1, Inner, Some(nameToAttr("t1_c2") === nameToAttr("t2_c2"))), Inner, diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt index fa01042350149..b3b11b60ded0b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt @@ -11,15 +11,15 @@ TakeOrderedAndProject (77) : : :- * Project (17) : : : +- * BroadcastHashJoin Inner BuildRight (16) : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : : :- BroadcastExchange (5) - : : : : : +- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.date_dim (1) - : : : : +- * Filter (8) - : : : : +- * ColumnarToRow (7) - : : : : +- Scan parquet default.store_sales (6) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) : : : +- BroadcastExchange (15) : : : +- * Project (14) : : : +- * Filter (13) @@ -43,11 +43,11 @@ TakeOrderedAndProject (77) : : :- * Project (45) : : : +- * BroadcastHashJoin Inner BuildRight (44) : : : :- * Project (38) - : : : : +- * BroadcastHashJoin Inner BuildLeft (37) - : : : : :- ReusedExchange (33) - : : : : +- * Filter (36) - : : : : +- * ColumnarToRow (35) - : : : : +- Scan parquet default.store_sales (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet default.store_sales (33) + : : : : +- ReusedExchange (36) : : : +- BroadcastExchange (43) : : : +- * Project (42) : : : +- * Filter (41) @@ -65,11 +65,11 @@ TakeOrderedAndProject (77) : :- * Project (63) : : +- * BroadcastHashJoin Inner BuildRight (62) : : :- * Project (60) - : : : +- * BroadcastHashJoin Inner BuildLeft (59) - : : : :- ReusedExchange (55) - : : : +- * Filter (58) - : : : +- * ColumnarToRow (57) - : : : +- Scan parquet default.store_sales (56) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet default.store_sales (55) + : : : +- ReusedExchange (58) : : +- ReusedExchange (61) : +- ReusedExchange (64) +- BroadcastExchange (70) @@ -78,50 +78,50 @@ TakeOrderedAndProject (77) +- Scan parquet default.item (67) -(1) Scan parquet default.date_dim -Output [2]: [d_date_sk#1, d_year#2] +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -(3) Filter [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] -Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND (d_date_sk#1 >= 2451545)) AND (d_date_sk#1 <= 2451910)) AND isnotnull(d_date_sk#1)) +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) -(4) Project [codegen id : 1] -Output [1]: [d_date_sk#1] -Input [2]: [d_date_sk#1, d_year#2] +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#10] -(6) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#10] +Condition : ((((isnotnull(d_year#10) AND (d_year#10 = 2000)) AND (d_date_sk#9 >= 2451545)) AND (d_date_sk#9 <= 2451910)) AND isnotnull(d_date_sk#9)) -(7) ColumnarToRow -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_year#10] -(8) Filter -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] (9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] Join condition: None (10) Project [codegen id : 5] -Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9] (11) Scan parquet default.customer_demographics Output [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15] @@ -146,13 +146,13 @@ Input [1]: [cd_demo_sk#12] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_cdemo_sk#6] +Left keys [1]: [ss_cdemo_sk#3] Right keys [1]: [cd_demo_sk#12] Join condition: None (17) Project [codegen id : 5] -Output [6]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12] (18) Scan parquet default.store Output [2]: [s_store_sk#17, s_state#18] @@ -173,13 +173,13 @@ Input [2]: [s_store_sk#17, s_state#18] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] (22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#7] +Left keys [1]: [ss_store_sk#4] Right keys [1]: [s_store_sk#17] Join condition: None (23) Project [codegen id : 5] -Output [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18] -Input [8]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17, s_state#18] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] (24) Scan parquet default.item Output [2]: [i_item_sk#20, i_item_id#21] @@ -200,13 +200,13 @@ Input [2]: [i_item_sk#20, i_item_id#21] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#5] +Left keys [1]: [ss_item_sk#2] Right keys [1]: [i_item_sk#20] Join condition: None (29) Project [codegen id : 5] -Output [6]: [i_item_id#21, s_state#18, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] -Input [8]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18, i_item_sk#20, i_item_id#21] +Output [6]: [i_item_id#21, s_state#18, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] (30) HashAggregate [codegen id : 5] Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26] @@ -226,31 +226,31 @@ Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(U Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47] Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52] -(33) ReusedExchange [Reuses operator id: 5] -Output [1]: [d_date_sk#1] - -(34) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(33) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] ReadSchema: struct -(35) ColumnarToRow -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) -(36) Filter -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#9] (37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] Join condition: None (38) Project [codegen id : 11] -Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9] (39) Scan parquet default.store Output [2]: [s_store_sk#17, s_state#18] @@ -275,37 +275,37 @@ Input [1]: [s_store_sk#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] (44) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_store_sk#7] +Left keys [1]: [ss_store_sk#4] Right keys [1]: [s_store_sk#17] Join condition: None (45) Project [codegen id : 11] -Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17] +Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] (46) ReusedExchange [Reuses operator id: 15] Output [1]: [cd_demo_sk#12] (47) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_cdemo_sk#6] +Left keys [1]: [ss_cdemo_sk#3] Right keys [1]: [cd_demo_sk#12] Join condition: None (48) Project [codegen id : 11] -Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12] (49) ReusedExchange [Reuses operator id: 27] Output [2]: [i_item_sk#20, i_item_id#21] (50) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_item_sk#5] +Left keys [1]: [ss_item_sk#2] Right keys [1]: [i_item_sk#20] Join condition: None (51) Project [codegen id : 11] -Output [5]: [i_item_id#21, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] -Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20, i_item_id#21] +Output [5]: [i_item_id#21, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20, i_item_id#21] (52) HashAggregate [codegen id : 11] Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26] @@ -325,55 +325,55 @@ Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(U Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74] Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80] -(55) ReusedExchange [Reuses operator id: 5] -Output [1]: [d_date_sk#1] - -(56) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(55) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] ReadSchema: struct -(57) ColumnarToRow -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) -(58) Filter -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5)) +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#9] (59) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] Join condition: None (60) Project [codegen id : 17] -Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9] (61) ReusedExchange [Reuses operator id: 43] Output [1]: [s_store_sk#17] (62) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_store_sk#7] +Left keys [1]: [ss_store_sk#4] Right keys [1]: [s_store_sk#17] Join condition: None (63) Project [codegen id : 17] -Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17] +Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17] (64) ReusedExchange [Reuses operator id: 15] Output [1]: [cd_demo_sk#12] (65) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_cdemo_sk#6] +Left keys [1]: [ss_cdemo_sk#3] Right keys [1]: [cd_demo_sk#12] Join condition: None (66) Project [codegen id : 17] -Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12] (67) Scan parquet default.item Output [1]: [i_item_sk#20] @@ -394,13 +394,13 @@ Input [1]: [i_item_sk#20] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81] (71) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_item_sk#5] +Left keys [1]: [ss_item_sk#2] Right keys [1]: [i_item_sk#20] Join condition: None (72) Project [codegen id : 17] -Output [4]: [ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26] -Input [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20] +Output [4]: [ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26] +Input [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20] (73) HashAggregate [codegen id : 17] Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt index fc7202e739bcc..d14061de1d1f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt @@ -13,7 +13,11 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) @@ -22,10 +26,6 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year] - Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) @@ -61,13 +61,13 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] #2 + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + InputAdapter + ReusedExchange [d_date_sk] #2 InputAdapter BroadcastExchange #7 WholeStageCodegen (8) @@ -93,13 +93,13 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] #2 + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + InputAdapter + ReusedExchange [d_date_sk] #2 InputAdapter ReusedExchange [s_store_sk] #7 InputAdapter diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt index 6071139e809cf..220d661fd45e9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt @@ -10,15 +10,15 @@ TakeOrderedAndProject (34) : :- * Project (17) : : +- * BroadcastHashJoin Inner BuildRight (16) : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : :- BroadcastExchange (5) - : : : : +- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.date_dim (1) - : : : +- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.store_sales (6) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) : : +- BroadcastExchange (15) : : +- * Project (14) : : +- * Filter (13) @@ -35,50 +35,50 @@ TakeOrderedAndProject (34) +- Scan parquet default.item (25) -(1) Scan parquet default.date_dim -Output [2]: [d_date_sk#1, d_year#2] +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -(3) Filter [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] -Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2450815)) AND (d_date_sk#1 <= 2451179)) AND isnotnull(d_date_sk#1)) +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450815)) AND (ss_sold_date_sk#1 <= 2451179)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) -(4) Project [codegen id : 1] -Output [1]: [d_date_sk#1] -Input [2]: [d_date_sk#1, d_year#2] +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#10] -(6) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#9, d_year#10] +Condition : ((((isnotnull(d_year#10) AND (d_year#10 = 1998)) AND (d_date_sk#9 >= 2450815)) AND (d_date_sk#9 <= 2451179)) AND isnotnull(d_date_sk#9)) -(7) ColumnarToRow -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_year#10] -(8) Filter -Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2450815)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_promo_sk#7)) +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] (9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] Join condition: None (10) Project [codegen id : 5] -Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] +Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9] (11) Scan parquet default.promotion Output [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14] @@ -103,13 +103,13 @@ Input [1]: [p_promo_sk#12] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_promo_sk#7] +Left keys [1]: [ss_promo_sk#4] Right keys [1]: [p_promo_sk#12] Join condition: None (17) Project [codegen id : 5] -Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, p_promo_sk#12] +Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, p_promo_sk#12] (18) Scan parquet default.customer_demographics Output [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19] @@ -134,13 +134,13 @@ Input [1]: [cd_demo_sk#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] (23) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_cdemo_sk#6] +Left keys [1]: [ss_cdemo_sk#3] Right keys [1]: [cd_demo_sk#16] Join condition: None (24) Project [codegen id : 5] -Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11] -Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#16] +Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#16] (25) Scan parquet default.item Output [2]: [i_item_sk#21, i_item_id#22] @@ -161,18 +161,18 @@ Input [2]: [i_item_sk#21, i_item_id#22] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] (29) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#5] +Left keys [1]: [ss_item_sk#2] Right keys [1]: [i_item_sk#21] Join condition: None (30) Project [codegen id : 5] -Output [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22] -Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#21, i_item_id#22] +Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22] +Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#21, i_item_id#22] (31) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22] +Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22] Keys [1]: [i_item_id#22] -Functions [4]: [partial_avg(cast(ss_quantity#8 as bigint)), partial_avg(UnscaledValue(ss_list_price#9)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#10))] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] Results [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] @@ -183,9 +183,9 @@ Arguments: hashpartitioning(i_item_id#22, 5), true, [id=#40] (33) HashAggregate [codegen id : 6] Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] Keys [1]: [i_item_id#22] -Functions [4]: [avg(cast(ss_quantity#8 as bigint)), avg(UnscaledValue(ss_list_price#9)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#10))] -Aggregate Attributes [4]: [avg(cast(ss_quantity#8 as bigint))#41, avg(UnscaledValue(ss_list_price#9))#42, avg(UnscaledValue(ss_coupon_amt#11))#43, avg(UnscaledValue(ss_sales_price#10))#44] -Results [5]: [i_item_id#22, avg(cast(ss_quantity#8 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#9))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#11))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#10))#44 / 100.0) as decimal(11,6)) AS agg4#48] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] +Results [5]: [i_item_id#22, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] (34) TakeOrderedAndProject Input [5]: [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt index 4576b8cef59ee..61cc7daa76456 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt @@ -12,7 +12,11 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] BroadcastHashJoin [ss_promo_sk,p_promo_sk] Project [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_cdemo_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) @@ -21,10 +25,6 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year] - Filter [ss_sold_date_sk,ss_cdemo_sk,ss_item_sk,ss_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt index 586abbd8f3fef..8ee427262b332 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt @@ -11,14 +11,14 @@ : : :- * Project (15) : : : +- * BroadcastHashJoin Inner BuildRight (14) : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildLeft (8) - : : : : :- BroadcastExchange (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer_demographics (1) - : : : : +- * Filter (7) - : : : : +- * ColumnarToRow (6) - : : : : +- Scan parquet default.store_sales (5) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) : : : +- BroadcastExchange (13) : : : +- * Filter (12) : : : +- * ColumnarToRow (11) @@ -39,46 +39,46 @@ +- Scan parquet default.customer_address (29) -(1) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +(1) Scan parquet default.store_sales +Output [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct -(3) Filter [codegen id : 1] -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] -Condition : (isnotnull(cd_demo_sk#1) AND ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) OR ((cd_marital_status#2 = S) AND (cd_education_status#3 = College))) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)))) +(2) ColumnarToRow [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -(4) BroadcastExchange -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4] +(3) Filter [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Condition : ((((((isnotnull(ss_store_sk#5) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_hdemo_sk#3)) AND ((((ss_net_profit#10 >= 100.00) AND (ss_net_profit#10 <= 200.00)) OR ((ss_net_profit#10 >= 150.00) AND (ss_net_profit#10 <= 300.00))) OR ((ss_net_profit#10 >= 50.00) AND (ss_net_profit#10 <= 250.00)))) AND ((((ss_sales_price#7 >= 100.00) AND (ss_sales_price#7 <= 150.00)) OR ((ss_sales_price#7 >= 50.00) AND (ss_sales_price#7 <= 100.00))) OR ((ss_sales_price#7 >= 150.00) AND (ss_sales_price#7 <= 200.00)))) -(5) Scan parquet default.store_sales -Output [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +(4) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] -(6) ColumnarToRow -Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +(6) Filter [codegen id : 1] +Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Condition : (isnotnull(cd_demo_sk#11) AND ((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) OR ((cd_marital_status#12 = S) AND (cd_education_status#13 = College))) OR ((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree)))) -(7) Filter -Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] -Condition : ((((((isnotnull(ss_store_sk#9) AND isnotnull(ss_addr_sk#8)) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_hdemo_sk#7)) AND ((((ss_net_profit#14 >= 100.00) AND (ss_net_profit#14 <= 200.00)) OR ((ss_net_profit#14 >= 150.00) AND (ss_net_profit#14 <= 300.00))) OR ((ss_net_profit#14 >= 50.00) AND (ss_net_profit#14 <= 250.00)))) AND ((((ss_sales_price#11 >= 100.00) AND (ss_sales_price#11 <= 150.00)) OR ((ss_sales_price#11 >= 50.00) AND (ss_sales_price#11 <= 100.00))) OR ((ss_sales_price#11 >= 150.00) AND (ss_sales_price#11 <= 200.00)))) +(7) BroadcastExchange +Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] (8) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cd_demo_sk#1] -Right keys [1]: [ss_cdemo_sk#6] -Join condition: ((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) OR ((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00))) OR ((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00))) +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#11] +Join condition: ((((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) OR ((((cd_marital_status#12 = S) AND (cd_education_status#13 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00))) OR ((((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00))) (9) Project [codegen id : 6] -Output [11]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] -Input [13]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] +Output [11]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13] +Input [13]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] (10) Scan parquet default.household_demographics Output [2]: [hd_demo_sk#15, hd_dep_count#16] @@ -99,13 +99,13 @@ Input [2]: [hd_demo_sk#15, hd_dep_count#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (14) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_hdemo_sk#7] +Left keys [1]: [ss_hdemo_sk#3] Right keys [1]: [hd_demo_sk#15] -Join condition: (((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00)) AND (hd_dep_count#16 = 1))) +Join condition: (((((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#12 = S) AND (cd_education_status#13 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)) AND (hd_dep_count#16 = 1))) (15) Project [codegen id : 6] -Output [7]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] -Input [13]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, hd_demo_sk#15, hd_dep_count#16] +Output [7]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Input [13]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13, hd_demo_sk#15, hd_dep_count#16] (16) Scan parquet default.date_dim Output [2]: [d_date_sk#18, d_year#19] @@ -130,13 +130,13 @@ Input [1]: [d_date_sk#18] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#5] +Left keys [1]: [ss_sold_date_sk#1] Right keys [1]: [d_date_sk#18] Join condition: None (22) Project [codegen id : 6] -Output [6]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] -Input [8]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, d_date_sk#18] +Output [6]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Input [8]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, d_date_sk#18] (23) Scan parquet default.store Output [1]: [s_store_sk#21] @@ -157,13 +157,13 @@ Input [1]: [s_store_sk#21] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#9] +Left keys [1]: [ss_store_sk#5] Right keys [1]: [s_store_sk#21] Join condition: None (28) Project [codegen id : 6] -Output [5]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14] -Input [7]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, s_store_sk#21] +Output [5]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Input [7]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#21] (29) Scan parquet default.customer_address Output [3]: [ca_address_sk#23, ca_state#24, ca_country#25] @@ -188,18 +188,18 @@ Input [2]: [ca_address_sk#23, ca_state#24] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_addr_sk#8] +Left keys [1]: [ss_addr_sk#4] Right keys [1]: [ca_address_sk#23] -Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#14 >= 100.00)) AND (ss_net_profit#14 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#14 >= 150.00)) AND (ss_net_profit#14 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#14 >= 50.00)) AND (ss_net_profit#14 <= 250.00))) +Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) (35) Project [codegen id : 6] -Output [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13] -Input [7]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, ca_address_sk#23, ca_state#24] +Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [7]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#23, ca_state#24] (36) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13] +Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] Keys: [] -Functions [4]: [partial_avg(cast(ss_quantity#10 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#12)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#13)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#13))] +Functions [4]: [partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#8)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#9)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#9))] Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33] Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] @@ -210,7 +210,7 @@ Arguments: SinglePartition, true, [id=#41] (38) HashAggregate [codegen id : 7] Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] Keys: [] -Functions [4]: [avg(cast(ss_quantity#10 as bigint)), avg(UnscaledValue(ss_ext_sales_price#12)), avg(UnscaledValue(ss_ext_wholesale_cost#13)), sum(UnscaledValue(ss_ext_wholesale_cost#13))] -Aggregate Attributes [4]: [avg(cast(ss_quantity#10 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#12))#43, avg(UnscaledValue(ss_ext_wholesale_cost#13))#44, sum(UnscaledValue(ss_ext_wholesale_cost#13))#45] -Results [4]: [avg(cast(ss_quantity#10 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#12))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#13))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#13))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] +Functions [4]: [avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_ext_sales_price#8)), avg(UnscaledValue(ss_ext_wholesale_cost#9)), sum(UnscaledValue(ss_ext_wholesale_cost#9))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#6 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#8))#43, avg(UnscaledValue(ss_ext_wholesale_cost#9))#44, sum(UnscaledValue(ss_ext_wholesale_cost#9))#45] +Results [4]: [avg(cast(ss_quantity#6 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#9))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#9))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt index e410b27e9cf3b..b457788dbd0b2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt @@ -12,8 +12,12 @@ WholeStageCodegen (7) BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] - Project [cd_marital_status,cd_education_status,ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,cd_marital_status,cd_education_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) @@ -21,10 +25,6 @@ WholeStageCodegen (7) ColumnarToRow InputAdapter Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt index e24b656e843aa..a17356ae04a03 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt @@ -39,15 +39,15 @@ TakeOrderedAndProject (57) : +- * Sort (39) : +- Exchange (38) : +- * Project (37) - : +- * BroadcastHashJoin Inner BuildLeft (36) - : :- BroadcastExchange (32) - : : +- * Project (31) - : : +- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.date_dim (28) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.store_returns (33) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.store_returns (28) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.date_dim (31) +- * Sort (51) +- Exchange (50) +- * Project (49) @@ -177,75 +177,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0 -(28) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_quarter_name#20] +(28) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct -(29) ColumnarToRow [codegen id : 9] -Input [2]: [d_date_sk#19, d_quarter_name#20] +(29) ColumnarToRow [codegen id : 10] +Input [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] -(30) Filter [codegen id : 9] -Input [2]: [d_date_sk#19, d_quarter_name#20] -Condition : (d_quarter_name#20 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#19)) +(30) Filter [codegen id : 10] +Input [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] +Condition : (((isnotnull(sr_customer_sk#21) AND isnotnull(sr_item_sk#20)) AND isnotnull(sr_ticket_number#22)) AND isnotnull(sr_returned_date_sk#19)) -(31) Project [codegen id : 9] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_quarter_name#20] +(31) Scan parquet default.date_dim +Output [2]: [d_date_sk#24, d_quarter_name#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct -(32) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +(32) ColumnarToRow [codegen id : 9] +Input [2]: [d_date_sk#24, d_quarter_name#25] -(33) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct +(33) Filter [codegen id : 9] +Input [2]: [d_date_sk#24, d_quarter_name#25] +Condition : (d_quarter_name#25 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#24)) -(34) ColumnarToRow -Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +(34) Project [codegen id : 9] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_quarter_name#25] -(35) Filter -Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] -Condition : (((isnotnull(sr_customer_sk#24) AND isnotnull(sr_item_sk#23)) AND isnotnull(sr_ticket_number#25)) AND isnotnull(sr_returned_date_sk#22)) +(35) BroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (36) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cast(d_date_sk#19 as bigint)] -Right keys [1]: [sr_returned_date_sk#22] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#24 as bigint)] Join condition: None (37) Project [codegen id : 10] -Output [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] -Input [6]: [d_date_sk#19, sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Output [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] +Input [6]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, d_date_sk#24] (38) Exchange -Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] -Arguments: hashpartitioning(sr_customer_sk#24, sr_item_sk#23, sr_ticket_number#25, 5), true, [id=#27] +Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] +Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), true, [id=#27] (39) Sort [codegen id : 11] -Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] -Arguments: [sr_customer_sk#24 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST, sr_ticket_number#25 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] +Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0 (40) SortMergeJoin [codegen id : 12] Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#24, sr_item_sk#23, sr_ticket_number#25] +Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22] Join condition: None (41) Project [codegen id : 12] -Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] -Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26] +Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] +Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] (42) Exchange -Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] -Arguments: hashpartitioning(sr_customer_sk#24, sr_item_sk#23, 5), true, [id=#28] +Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] +Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), true, [id=#28] (43) Sort [codegen id : 13] -Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26] -Arguments: [sr_customer_sk#24 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST], false, 0 +Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] +Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST], false, 0 (44) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] @@ -261,7 +261,7 @@ Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quanti Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] Condition : ((isnotnull(cs_bill_customer_sk#30) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_sold_date_sk#29)) -(47) ReusedExchange [Reuses operator id: 32] +(47) ReusedExchange [Reuses operator id: 35] Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 15] @@ -282,18 +282,18 @@ Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] Arguments: [cast(cs_bill_customer_sk#30 as bigint) ASC NULLS FIRST, cast(cs_item_sk#31 as bigint) ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#24, sr_item_sk#23] +Left keys [2]: [sr_customer_sk#21, sr_item_sk#20] Right keys [2]: [cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint)] Join condition: None (53) Project [codegen id : 17] -Output [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] -Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] +Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] (54) HashAggregate [codegen id : 17] -Input [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] +Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] -Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#26), partial_avg(cast(sr_return_quantity#26 as bigint)), partial_stddev_samp(cast(sr_return_quantity#26 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))] +Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))] Aggregate Attributes [18]: [count#35, sum#36, count#37, n#38, avg#39, m2#40, count#41, sum#42, count#43, n#44, avg#45, m2#46, count#47, sum#48, count#49, n#50, avg#51, m2#52] Results [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] @@ -304,9 +304,9 @@ Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), true, (56) HashAggregate [codegen id : 18] Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] -Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#26), avg(cast(sr_return_quantity#26 as bigint)), stddev_samp(cast(sr_return_quantity#26 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))] -Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#26)#75, avg(cast(sr_return_quantity#26 as bigint))#76, stddev_samp(cast(sr_return_quantity#26 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80] -Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#26)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#26 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#26 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#26 as double))#77 / avg(cast(sr_return_quantity#26 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92] +Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))] +Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80] +Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92] (57) TakeOrderedAndProject Input [15]: [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt index 216adf3588eca..bfb59441f483b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt @@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 WholeStageCodegen (10) Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter BroadcastExchange #9 WholeStageCodegen (9) @@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_quarter_name] - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter WholeStageCodegen (16) Sort [cs_bill_customer_sk,cs_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt index 0fbe0ccef6d13..88b5168f6049c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt @@ -12,15 +12,15 @@ TakeOrderedAndProject (45) : :- * Project (17) : : +- * BroadcastHashJoin Inner BuildRight (16) : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : :- BroadcastExchange (5) - : : : : +- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.item (1) - : : : +- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.store_sales (6) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) : : +- BroadcastExchange (15) : : +- * Project (14) : : +- * Filter (13) @@ -38,58 +38,58 @@ TakeOrderedAndProject (45) : +- Exchange (29) : +- * Filter (28) : +- * ColumnarToRow (27) - : +- Scan parquet default.customer_address (26) + : +- Scan parquet default.customer (26) +- * Sort (35) +- Exchange (34) +- * Filter (33) +- * ColumnarToRow (32) - +- Scan parquet default.customer (31) + +- Scan parquet default.customer_address (31) -(1) Scan parquet default.item -Output [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] -(3) Filter [codegen id : 1] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] -Condition : ((isnotnull(i_manager_id#6) AND (i_manager_id#6 = 8)) AND isnotnull(i_item_sk#1)) +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4)) -(4) Project [codegen id : 1] -Output [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6] +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] -(6) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 8)) AND isnotnull(i_item_sk#6)) -(7) ColumnarToRow -Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] -(8) Filter -Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] -Condition : (((isnotnull(ss_sold_date_sk#8) AND isnotnull(ss_item_sk#9)) AND isnotnull(ss_customer_sk#10)) AND isnotnull(ss_store_sk#11)) +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#6] Join condition: None (10) Project [codegen id : 4] -Output [8]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] -Input [10]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Input [10]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] (11) Scan parquet default.date_dim Output [3]: [d_date_sk#13, d_year#14, d_moy#15] @@ -114,13 +114,13 @@ Input [1]: [d_date_sk#13] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] (16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#8] +Left keys [1]: [ss_sold_date_sk#1] Right keys [1]: [d_date_sk#13] Join condition: None (17) Project [codegen id : 4] -Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12] -Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, d_date_sk#13] +Output [7]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, d_date_sk#13] (18) Scan parquet default.store Output [2]: [s_store_sk#17, s_zip#18] @@ -141,111 +141,111 @@ Input [2]: [s_store_sk#17, s_zip#18] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#11] +Left keys [1]: [ss_store_sk#4] Right keys [1]: [s_store_sk#17] Join condition: None (23) Project [codegen id : 4] -Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] -Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, s_store_sk#17, s_zip#18] +Output [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] +Input [9]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_store_sk#17, s_zip#18] (24) Exchange -Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] -Arguments: hashpartitioning(ss_customer_sk#10, 5), true, [id=#20] +Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] +Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#20] (25) Sort [codegen id : 5] -Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18] -Arguments: [ss_customer_sk#10 ASC NULLS FIRST], false, 0 +Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] +Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 -(26) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_zip#22] +(26) Scan parquet default.customer +Output [2]: [c_customer_sk#21, c_current_addr_sk#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct (27) ColumnarToRow [codegen id : 6] -Input [2]: [ca_address_sk#21, ca_zip#22] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] (28) Filter [codegen id : 6] -Input [2]: [ca_address_sk#21, ca_zip#22] -Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_zip#22)) +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) (29) Exchange -Input [2]: [ca_address_sk#21, ca_zip#22] -Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: hashpartitioning(c_current_addr_sk#22, 5), true, [id=#23] (30) Sort [codegen id : 7] -Input [2]: [ca_address_sk#21, ca_zip#22] -Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: [c_current_addr_sk#22 ASC NULLS FIRST], false, 0 -(31) Scan parquet default.customer -Output [2]: [c_customer_sk#24, c_current_addr_sk#25] +(31) Scan parquet default.customer_address +Output [2]: [ca_address_sk#24, ca_zip#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct (32) ColumnarToRow [codegen id : 8] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] +Input [2]: [ca_address_sk#24, ca_zip#25] (33) Filter [codegen id : 8] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#25)) +Input [2]: [ca_address_sk#24, ca_zip#25] +Condition : (isnotnull(ca_address_sk#24) AND isnotnull(ca_zip#25)) (34) Exchange -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Arguments: hashpartitioning(c_current_addr_sk#25, 5), true, [id=#26] +Input [2]: [ca_address_sk#24, ca_zip#25] +Arguments: hashpartitioning(ca_address_sk#24, 5), true, [id=#26] (35) Sort [codegen id : 9] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Arguments: [c_current_addr_sk#25 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#24, ca_zip#25] +Arguments: [ca_address_sk#24 ASC NULLS FIRST], false, 0 (36) SortMergeJoin [codegen id : 10] -Left keys [1]: [ca_address_sk#21] -Right keys [1]: [c_current_addr_sk#25] +Left keys [1]: [c_current_addr_sk#22] +Right keys [1]: [ca_address_sk#24] Join condition: None (37) Project [codegen id : 10] -Output [2]: [ca_zip#22, c_customer_sk#24] -Input [4]: [ca_address_sk#21, ca_zip#22, c_customer_sk#24, c_current_addr_sk#25] +Output [2]: [c_customer_sk#21, ca_zip#25] +Input [4]: [c_customer_sk#21, c_current_addr_sk#22, ca_address_sk#24, ca_zip#25] (38) Exchange -Input [2]: [ca_zip#22, c_customer_sk#24] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#27] +Input [2]: [c_customer_sk#21, ca_zip#25] +Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#27] (39) Sort [codegen id : 11] -Input [2]: [ca_zip#22, c_customer_sk#24] -Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#21, ca_zip#25] +Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 (40) SortMergeJoin [codegen id : 12] -Left keys [1]: [ss_customer_sk#10] -Right keys [1]: [c_customer_sk#24] -Join condition: NOT (substr(ca_zip#22, 1, 5) = substr(s_zip#18, 1, 5)) +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#21] +Join condition: NOT (substr(ca_zip#25, 1, 5) = substr(s_zip#18, 1, 5)) (41) Project [codegen id : 12] -Output [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] -Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18, ca_zip#22, c_customer_sk#24] +Output [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Input [9]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18, c_customer_sk#21, ca_zip#25] (42) HashAggregate [codegen id : 12] -Input [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5] -Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#12))] +Input [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] Aggregate Attributes [1]: [sum#28] -Results [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] +Results [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] (43) Exchange -Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] -Arguments: hashpartitioning(i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, 5), true, [id=#30] +Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] +Arguments: hashpartitioning(i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, 5), true, [id=#30] (44) HashAggregate [codegen id : 13] -Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29] -Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#12))#31] -Results [5]: [i_brand_id#2 AS brand_id#32, i_brand#3 AS brand#33, i_manufact_id#4, i_manufact#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#12))#31,17,2) AS ext_price#34] +Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] +Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#31] +Results [5]: [i_brand_id#7 AS brand_id#32, i_brand#8 AS brand#33, i_manufact_id#9, i_manufact#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS ext_price#34] (45) TakeOrderedAndProject -Input [5]: [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34] -Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#4 ASC NULLS FIRST, i_manufact#5 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34] +Input [5]: [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34] +Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#9 ASC NULLS FIRST, i_manufact#10 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt index c8737d8a70782..05fa3f82e27df 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt @@ -13,12 +13,16 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] InputAdapter Exchange [ss_customer_sk] #2 WholeStageCodegen (4) - Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_customer_sk,ss_ext_sales_price,s_zip] + Project [ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,s_zip] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 WholeStageCodegen (1) @@ -27,10 +31,6 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] ColumnarToRow InputAdapter Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #4 WholeStageCodegen (2) @@ -52,25 +52,25 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] InputAdapter Exchange [c_customer_sk] #6 WholeStageCodegen (10) - Project [ca_zip,c_customer_sk] - SortMergeJoin [ca_address_sk,c_current_addr_sk] + Project [c_customer_sk,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] InputAdapter WholeStageCodegen (7) - Sort [ca_address_sk] + Sort [c_current_addr_sk] InputAdapter - Exchange [ca_address_sk] #7 + Exchange [c_current_addr_sk] #7 WholeStageCodegen (6) - Filter [ca_address_sk,ca_zip] + Filter [c_customer_sk,c_current_addr_sk] ColumnarToRow InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_zip] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter WholeStageCodegen (9) - Sort [c_current_addr_sk] + Sort [ca_address_sk] InputAdapter - Exchange [c_current_addr_sk] #8 + Exchange [ca_address_sk] #8 WholeStageCodegen (8) - Filter [c_customer_sk,c_current_addr_sk] + Filter [ca_address_sk,ca_zip] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer_address [ca_address_sk,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt index 6e0a5ced1992a..ffcf6bd4f6d47 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt @@ -296,15 +296,15 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer : : : :- * Sort (60) : : : : +- Exchange (59) : : : : +- * Project (58) - : : : : +- * BroadcastHashJoin Inner BuildLeft (57) - : : : : :- BroadcastExchange (53) - : : : : : +- * Project (52) - : : : : : +- * Filter (51) - : : : : : +- * ColumnarToRow (50) - : : : : : +- Scan parquet default.store (49) - : : : : +- * Filter (56) - : : : : +- * ColumnarToRow (55) - : : : : +- Scan parquet default.store_sales (54) + : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : :- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet default.store_sales (49) + : : : : +- BroadcastExchange (56) + : : : : +- * Project (55) + : : : : +- * Filter (54) + : : : : +- * ColumnarToRow (53) + : : : : +- Scan parquet default.store (52) : : : +- * Sort (65) : : : +- Exchange (64) : : : +- * Filter (63) @@ -327,57 +327,57 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer +- Scan parquet default.store_returns (88) -(49) Scan parquet default.store +(49) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(51) Filter [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(52) Scan parquet default.store Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct -(50) ColumnarToRow [codegen id : 1] +(53) ColumnarToRow [codegen id : 1] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] -(51) Filter [codegen id : 1] +(54) Filter [codegen id : 1] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) -(52) Project [codegen id : 1] +(55) Project [codegen id : 1] Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] -(53) BroadcastExchange +(56) BroadcastExchange Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] -(54) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(55) ColumnarToRow -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(56) Filter -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [s_store_sk#19] -Right keys [1]: [ss_store_sk#3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#19] Join condition: None (58) Project [codegen id : 2] -Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] -Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] (59) Exchange -Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] (60) Sort [codegen id : 3] -Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (61) Scan parquet default.item @@ -408,15 +408,15 @@ Right keys [1]: [i_item_sk#6] Join condition: None (67) Project [codegen id : 6] -Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (68) Exchange -Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] (69) Sort [codegen id : 7] -Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (70) Scan parquet default.customer @@ -447,15 +447,15 @@ Right keys [1]: [c_customer_sk#14] Join condition: None (76) Project [codegen id : 10] -Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (77) Exchange -Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53] (78) Sort [codegen id : 11] -Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0 (79) Scan parquet default.customer_address @@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (85) Project [codegen id : 14] -Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] (86) Exchange -Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] (87) Sort [codegen id : 15] -Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 (88) Scan parquet default.store_returns @@ -526,7 +526,7 @@ Join condition: None (94) Project [codegen id : 18] Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] (95) HashAggregate [codegen id : 18] Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt index f51d1972b630f..10f874f8f5543 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt @@ -21,7 +21,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_ticket_number,ss_item_sk] #12 WholeStageCodegen (14) - Project [s_store_name,s_state,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip] InputAdapter WholeStageCodegen (11) @@ -29,7 +29,7 @@ WholeStageCodegen (14) InputAdapter Exchange [c_birth_country,s_zip] #13 WholeStageCodegen (10) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter WholeStageCodegen (7) @@ -37,7 +37,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_customer_sk] #14 WholeStageCodegen (6) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id] + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter WholeStageCodegen (3) @@ -45,8 +45,12 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_item_sk] #15 WholeStageCodegen (2) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter BroadcastExchange #16 WholeStageCodegen (1) @@ -55,10 +59,6 @@ WholeStageCodegen (14) ColumnarToRow InputAdapter Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter WholeStageCodegen (5) Sort [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt index cbac3787cab6c..73f36e3a9ca23 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt @@ -296,15 +296,15 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer : : : :- * Sort (60) : : : : +- Exchange (59) : : : : +- * Project (58) - : : : : +- * BroadcastHashJoin Inner BuildLeft (57) - : : : : :- BroadcastExchange (53) - : : : : : +- * Project (52) - : : : : : +- * Filter (51) - : : : : : +- * ColumnarToRow (50) - : : : : : +- Scan parquet default.store (49) - : : : : +- * Filter (56) - : : : : +- * ColumnarToRow (55) - : : : : +- Scan parquet default.store_sales (54) + : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : :- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet default.store_sales (49) + : : : : +- BroadcastExchange (56) + : : : : +- * Project (55) + : : : : +- * Filter (54) + : : : : +- * ColumnarToRow (53) + : : : : +- Scan parquet default.store (52) : : : +- * Sort (65) : : : +- Exchange (64) : : : +- * Filter (63) @@ -327,57 +327,57 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer +- Scan parquet default.store_returns (88) -(49) Scan parquet default.store +(49) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(51) Filter [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(52) Scan parquet default.store Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct -(50) ColumnarToRow [codegen id : 1] +(53) ColumnarToRow [codegen id : 1] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] -(51) Filter [codegen id : 1] +(54) Filter [codegen id : 1] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23)) -(52) Project [codegen id : 1] +(55) Project [codegen id : 1] Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23] -(53) BroadcastExchange +(56) BroadcastExchange Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] -(54) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(55) ColumnarToRow -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(56) Filter -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [s_store_sk#19] -Right keys [1]: [ss_store_sk#3] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#19] Join condition: None (58) Project [codegen id : 2] -Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] -Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#19, s_store_name#20, s_state#22, s_zip#23] (59) Exchange -Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] (60) Sort [codegen id : 3] -Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (61) Scan parquet default.item @@ -408,15 +408,15 @@ Right keys [1]: [i_item_sk#6] Join condition: None (67) Project [codegen id : 6] -Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (68) Exchange -Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] (69) Sort [codegen id : 7] -Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (70) Scan parquet default.customer @@ -447,15 +447,15 @@ Right keys [1]: [c_customer_sk#14] Join condition: None (76) Project [codegen id : 10] -Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (77) Exchange -Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53] (78) Sort [codegen id : 11] -Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0 (79) Scan parquet default.customer_address @@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (85) Project [codegen id : 14] -Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] (86) Exchange -Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] (87) Sort [codegen id : 15] -Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 (88) Scan parquet default.store_returns @@ -526,7 +526,7 @@ Join condition: None (94) Project [codegen id : 18] Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] (95) HashAggregate [codegen id : 18] Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt index f51d1972b630f..10f874f8f5543 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt @@ -21,7 +21,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_ticket_number,ss_item_sk] #12 WholeStageCodegen (14) - Project [s_store_name,s_state,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip] InputAdapter WholeStageCodegen (11) @@ -29,7 +29,7 @@ WholeStageCodegen (14) InputAdapter Exchange [c_birth_country,s_zip] #13 WholeStageCodegen (10) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter WholeStageCodegen (7) @@ -37,7 +37,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_customer_sk] #14 WholeStageCodegen (6) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id] + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter WholeStageCodegen (3) @@ -45,8 +45,12 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_item_sk] #15 WholeStageCodegen (2) - Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter BroadcastExchange #16 WholeStageCodegen (1) @@ -55,10 +59,6 @@ WholeStageCodegen (14) ColumnarToRow InputAdapter Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter WholeStageCodegen (5) Sort [i_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt index 87a72d3bbe777..c6dc3db869003 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt @@ -39,15 +39,15 @@ TakeOrderedAndProject (57) : +- * Sort (39) : +- Exchange (38) : +- * Project (37) - : +- * BroadcastHashJoin Inner BuildLeft (36) - : :- BroadcastExchange (32) - : : +- * Project (31) - : : +- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.date_dim (28) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.store_returns (33) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.store_returns (28) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.date_dim (31) +- * Sort (51) +- Exchange (50) +- * Project (49) @@ -177,75 +177,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0 -(28) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +(28) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct -(29) ColumnarToRow [codegen id : 9] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(29) ColumnarToRow [codegen id : 10] +Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] -(30) Filter [codegen id : 9] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] -Condition : (((((isnotnull(d_moy#23) AND isnotnull(d_year#22)) AND (d_moy#23 >= 4)) AND (d_moy#23 <= 10)) AND (d_year#22 = 2001)) AND isnotnull(d_date_sk#21)) +(30) Filter [codegen id : 10] +Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] +Condition : (((isnotnull(sr_customer_sk#23) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_ticket_number#24)) AND isnotnull(sr_returned_date_sk#21)) -(31) Project [codegen id : 9] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(31) Scan parquet default.date_dim +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct -(32) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +(32) ColumnarToRow [codegen id : 9] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] -(33) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct +(33) Filter [codegen id : 9] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : (((((isnotnull(d_moy#28) AND isnotnull(d_year#27)) AND (d_moy#28 >= 4)) AND (d_moy#28 <= 10)) AND (d_year#27 = 2001)) AND isnotnull(d_date_sk#26)) -(34) ColumnarToRow -Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +(34) Project [codegen id : 9] +Output [1]: [d_date_sk#26] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] -(35) Filter -Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] -Condition : (((isnotnull(sr_customer_sk#27) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_ticket_number#28)) AND isnotnull(sr_returned_date_sk#25)) +(35) BroadcastExchange +Input [1]: [d_date_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] (36) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cast(d_date_sk#21 as bigint)] -Right keys [1]: [sr_returned_date_sk#25] +Left keys [1]: [sr_returned_date_sk#21] +Right keys [1]: [cast(d_date_sk#26 as bigint)] Join condition: None (37) Project [codegen id : 10] -Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] -Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Output [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] +Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, d_date_sk#26] (38) Exchange -Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] -Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28, 5), true, [id=#30] +Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30] (39) Sort [codegen id : 11] -Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] -Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_ticket_number#28 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] +Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0 (40) SortMergeJoin [codegen id : 12] Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28] +Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24] Join condition: None (41) Project [codegen id : 12] -Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] -Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29] +Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] (42) Exchange -Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] -Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31] +Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31] (43) Sort [codegen id : 13] -Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29] -Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0 +Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] +Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0 (44) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] @@ -261,7 +261,7 @@ Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_pr Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32)) -(47) ReusedExchange [Reuses operator id: 32] +(47) ReusedExchange [Reuses operator id: 35] Output [1]: [d_date_sk#36] (48) BroadcastHashJoin [codegen id : 15] @@ -282,18 +282,18 @@ Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#27, sr_item_sk#26] +Left keys [2]: [sr_customer_sk#23, sr_item_sk#22] Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)] Join condition: None (53) Project [codegen id : 17] -Output [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] -Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] (54) HashAggregate [codegen id : 17] -Input [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#29)), partial_sum(UnscaledValue(cs_net_profit#35))] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#35))] Aggregate Attributes [3]: [sum#38, sum#39, sum#40] Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] @@ -304,9 +304,9 @@ Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store (56) HashAggregate [codegen id : 18] Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#29)), sum(UnscaledValue(cs_net_profit#35))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#29))#46, sum(UnscaledValue(cs_net_profit#35))#47] -Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#29))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50] +Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#35))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#35))#47] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50] (57) TakeOrderedAndProject Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt index 8e61cf9c519fd..ad9fa718ff2bd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt @@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 WholeStageCodegen (10) Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] InputAdapter BroadcastExchange #9 WholeStageCodegen (9) @@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] InputAdapter WholeStageCodegen (16) Sort [cs_bill_customer_sk,cs_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt index 35e24698c517e..a949b93f3bcb0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt @@ -39,15 +39,15 @@ TakeOrderedAndProject (61) : +- * Sort (39) : +- Exchange (38) : +- * Project (37) - : +- * BroadcastHashJoin Inner BuildLeft (36) - : :- BroadcastExchange (32) - : : +- * Project (31) - : : +- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.date_dim (28) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.store_returns (33) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.store_returns (28) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.date_dim (31) +- * Sort (55) +- Exchange (54) +- * Project (53) @@ -181,75 +181,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0 -(28) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +(28) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct -(29) ColumnarToRow [codegen id : 9] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(29) ColumnarToRow [codegen id : 10] +Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] -(30) Filter [codegen id : 9] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] -Condition : (((((isnotnull(d_moy#23) AND isnotnull(d_year#22)) AND (d_moy#23 >= 9)) AND (d_moy#23 <= 12)) AND (d_year#22 = 1999)) AND isnotnull(d_date_sk#21)) +(30) Filter [codegen id : 10] +Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] +Condition : (((isnotnull(sr_customer_sk#23) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_ticket_number#24)) AND isnotnull(sr_returned_date_sk#21)) -(31) Project [codegen id : 9] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(31) Scan parquet default.date_dim +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct -(32) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +(32) ColumnarToRow [codegen id : 9] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] -(33) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct +(33) Filter [codegen id : 9] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : (((((isnotnull(d_moy#28) AND isnotnull(d_year#27)) AND (d_moy#28 >= 9)) AND (d_moy#28 <= 12)) AND (d_year#27 = 1999)) AND isnotnull(d_date_sk#26)) -(34) ColumnarToRow -Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +(34) Project [codegen id : 9] +Output [1]: [d_date_sk#26] +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] -(35) Filter -Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] -Condition : (((isnotnull(sr_customer_sk#27) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_ticket_number#28)) AND isnotnull(sr_returned_date_sk#25)) +(35) BroadcastExchange +Input [1]: [d_date_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] (36) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cast(d_date_sk#21 as bigint)] -Right keys [1]: [sr_returned_date_sk#25] +Left keys [1]: [sr_returned_date_sk#21] +Right keys [1]: [cast(d_date_sk#26 as bigint)] Join condition: None (37) Project [codegen id : 10] -Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] -Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Output [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] +Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25, d_date_sk#26] (38) Exchange -Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] -Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28, 5), true, [id=#30] +Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30] (39) Sort [codegen id : 11] -Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] -Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_ticket_number#28 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] +Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0 (40) SortMergeJoin [codegen id : 12] Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28] +Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24] Join condition: None (41) Project [codegen id : 12] -Output [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] -Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29] +Output [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25] (42) Exchange -Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] -Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31] +Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31] (43) Sort [codegen id : 13] -Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29] -Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0 +Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25] +Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0 (44) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] @@ -305,18 +305,18 @@ Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0 (56) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#27, sr_item_sk#26] +Left keys [2]: [sr_customer_sk#23, sr_item_sk#22] Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)] Join condition: None (57) Project [codegen id : 17] -Output [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] -Input [11]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] +Output [7]: [ss_quantity#6, sr_return_quantity#25, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [11]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35] (58) HashAggregate [codegen id : 17] -Input [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [7]: [ss_quantity#6, sr_return_quantity#25, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#29 as bigint)), partial_sum(cast(cs_quantity#35 as bigint))] +Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#25 as bigint)), partial_sum(cast(cs_quantity#35 as bigint))] Aggregate Attributes [3]: [sum#40, sum#41, sum#42] Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45] @@ -327,9 +327,9 @@ Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store (60) HashAggregate [codegen id : 18] Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#29 as bigint)), sum(cast(cs_quantity#35 as bigint))] -Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#47, sum(cast(sr_return_quantity#29 as bigint))#48, sum(cast(cs_quantity#35 as bigint))#49] -Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum(cast(ss_quantity#6 as bigint))#47 AS store_sales_quantity#50, sum(cast(sr_return_quantity#29 as bigint))#48 AS store_returns_quantity#51, sum(cast(cs_quantity#35 as bigint))#49 AS catalog_sales_quantity#52] +Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#25 as bigint)), sum(cast(cs_quantity#35 as bigint))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#47, sum(cast(sr_return_quantity#25 as bigint))#48, sum(cast(cs_quantity#35 as bigint))#49] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum(cast(ss_quantity#6 as bigint))#47 AS store_sales_quantity#50, sum(cast(sr_return_quantity#25 as bigint))#48 AS store_returns_quantity#51, sum(cast(cs_quantity#35 as bigint))#49 AS catalog_sales_quantity#52] (61) TakeOrderedAndProject Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_quantity#50, store_returns_quantity#51, catalog_sales_quantity#52] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt index f10b8e245c50e..ea91af9e8f755 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt @@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 WholeStageCodegen (10) Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter BroadcastExchange #9 WholeStageCodegen (9) @@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter WholeStageCodegen (16) Sort [cs_bill_customer_sk,cs_item_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt index d3b013660ba28..9f123c4044cc8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt @@ -138,7 +138,7 @@ Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (5) ColumnarToRow [codegen id : 1] @@ -146,7 +146,7 @@ Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] (6) Filter [codegen id : 1] Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 3)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) (7) BroadcastExchange Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] @@ -236,7 +236,7 @@ Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) Output [3]: [d_date_sk#17, d_year#18, d_qoy#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 7] @@ -244,7 +244,7 @@ Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] (27) Filter [codegen id : 7] Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] -Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 2)) AND (d_year#18 = 2000)) AND isnotnull(d_date_sk#17)) +Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 3)) AND (d_year#18 = 2000)) AND isnotnull(d_date_sk#17)) (28) BroadcastExchange Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] @@ -311,7 +311,7 @@ Right keys [1]: [ca_county#23] Join condition: None (42) Project [codegen id : 42] -Output [3]: [store_sales#16, ca_county#23, store_sales#28] +Output [3]: [ca_county#10, store_sales#16, store_sales#28] Input [4]: [ca_county#10, store_sales#16, ca_county#23, store_sales#28] (43) Scan parquet default.store_sales @@ -402,13 +402,13 @@ Input [3]: [ca_county#36, d_year#31, store_sales#41] Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42] (62) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ca_county#23] +Left keys [1]: [ca_county#10] Right keys [1]: [ca_county#36] Join condition: None (63) Project [codegen id : 42] Output [5]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41] -Input [6]: [store_sales#16, ca_county#23, store_sales#28, ca_county#36, d_year#31, store_sales#41] +Input [6]: [ca_county#10, store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41] (64) Scan parquet default.web_sales Output [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] @@ -424,7 +424,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) -(67) ReusedExchange [Reuses operator id: 28] +(67) ReusedExchange [Reuses operator id: 49] Output [3]: [d_date_sk#46, d_year#47, d_qoy#48] (68) BroadcastHashJoin [codegen id : 22] @@ -492,7 +492,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) -(82) ReusedExchange [Reuses operator id: 49] +(82) ReusedExchange [Reuses operator id: 7] Output [3]: [d_date_sk#57, d_year#58, d_qoy#59] (83) BroadcastHashJoin [codegen id : 28] @@ -556,7 +556,7 @@ Right keys [1]: [ca_county#62] Join condition: None (96) Project [codegen id : 41] -Output [3]: [web_sales#56, ca_county#62, web_sales#67] +Output [3]: [ca_county#51, web_sales#56, web_sales#67] Input [4]: [ca_county#51, web_sales#56, ca_county#62, web_sales#67] (97) Scan parquet default.web_sales @@ -573,7 +573,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45] Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44)) -(100) ReusedExchange [Reuses operator id: 7] +(100) ReusedExchange [Reuses operator id: 28] Output [3]: [d_date_sk#69, d_year#70, d_qoy#71] (101) BroadcastHashJoin [codegen id : 35] @@ -632,26 +632,26 @@ Input [2]: [ca_county#74, web_sales#79] Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#80] (113) BroadcastHashJoin [codegen id : 41] -Left keys [1]: [ca_county#62] +Left keys [1]: [ca_county#51] Right keys [1]: [ca_county#74] Join condition: None (114) Project [codegen id : 41] -Output [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79] -Input [5]: [web_sales#56, ca_county#62, web_sales#67, ca_county#74, web_sales#79] +Output [4]: [ca_county#51, web_sales#56, web_sales#67, web_sales#79] +Input [5]: [ca_county#51, web_sales#56, web_sales#67, ca_county#74, web_sales#79] (115) BroadcastExchange -Input [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#81] +Input [4]: [ca_county#51, web_sales#56, web_sales#67, web_sales#79] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#81] (116) BroadcastHashJoin [codegen id : 42] Left keys [1]: [ca_county#36] -Right keys [1]: [ca_county#62] -Join condition: ((CASE WHEN (web_sales#67 > 0.00) THEN CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#41 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) ELSE null END) AND (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) ELSE null END)) +Right keys [1]: [ca_county#51] +Join condition: ((CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#67) / promote_precision(web_sales#56)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#41 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#41)), DecimalType(37,20), true) ELSE null END) AND (CASE WHEN (web_sales#67 > 0.00) THEN CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#67)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#16 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#16)), DecimalType(37,20), true) ELSE null END)) (117) Project [codegen id : 42] -Output [6]: [ca_county#36, d_year#31, CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) AS web_q1_q2_increase#82, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) AS store_q1_q2_increase#83, CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q2_q3_increase#84, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) AS store_q2_q3_increase#85] -Input [9]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41, web_sales#56, ca_county#62, web_sales#67, web_sales#79] +Output [6]: [ca_county#36, d_year#31, CheckOverflow((promote_precision(web_sales#67) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q1_q2_increase#82, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#41)), DecimalType(37,20), true) AS store_q1_q2_increase#83, CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#67)), DecimalType(37,20), true) AS web_q2_q3_increase#84, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#16)), DecimalType(37,20), true) AS store_q2_q3_increase#85] +Input [9]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41, ca_county#51, web_sales#56, web_sales#67, web_sales#79] (118) Exchange Input [6]: [ca_county#36, d_year#31, web_q1_q2_increase#82, store_q1_q2_increase#83, web_q2_q3_increase#84, store_q2_q3_increase#85] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt index 9ec06b597cb64..c7b69500ed8a6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt @@ -7,7 +7,7 @@ WholeStageCodegen (43) BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] Project [store_sales,store_sales,ca_county,d_year,store_sales] BroadcastHashJoin [ca_county,ca_county] - Project [store_sales,ca_county,store_sales] + Project [ca_county,store_sales,store_sales] BroadcastHashJoin [ca_county,ca_county] HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter @@ -116,9 +116,9 @@ WholeStageCodegen (43) InputAdapter BroadcastExchange #14 WholeStageCodegen (41) - Project [web_sales,ca_county,web_sales,web_sales] + Project [ca_county,web_sales,web_sales,web_sales] BroadcastHashJoin [ca_county,ca_county] - Project [web_sales,ca_county,web_sales] + Project [ca_county,web_sales,web_sales] BroadcastHashJoin [ca_county,ca_county] HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter @@ -140,7 +140,7 @@ WholeStageCodegen (43) InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #9 + ReusedExchange [d_date_sk,d_year,d_qoy] #13 InputAdapter WholeStageCodegen (25) Sort [ca_address_sk] @@ -169,7 +169,7 @@ WholeStageCodegen (43) InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #13 + ReusedExchange [d_date_sk,d_year,d_qoy] #4 InputAdapter WholeStageCodegen (31) Sort [ca_address_sk] @@ -198,7 +198,7 @@ WholeStageCodegen (43) InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #4 + ReusedExchange [d_date_sk,d_year,d_qoy] #9 InputAdapter WholeStageCodegen (38) Sort [ca_address_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt index 0232d56ab7481..54e117e6cac10 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt @@ -34,12 +34,12 @@ TakeOrderedAndProject (46) : : +- Exchange (22) : : +- * Filter (21) : : +- * ColumnarToRow (20) - : : +- Scan parquet default.customer_address (19) + : : +- Scan parquet default.customer (19) : +- * Sort (28) : +- Exchange (27) : +- * Filter (26) : +- * ColumnarToRow (25) - : +- Scan parquet default.customer (24) + : +- Scan parquet default.customer_address (24) +- BroadcastExchange (39) +- * Project (38) +- * Filter (37) @@ -127,75 +127,75 @@ Arguments: hashpartitioning(ws_bill_customer_sk#4, 5), true, [id=#13] Input [3]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11] Arguments: [ws_bill_customer_sk#4 ASC NULLS FIRST], false, 0 -(19) Scan parquet default.customer_address -Output [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +(19) Scan parquet default.customer +Output [2]: [c_customer_sk#14, c_current_addr_sk#15] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct (20) ColumnarToRow [codegen id : 5] -Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] (21) Filter [codegen id : 5] -Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] -Condition : isnotnull(ca_address_sk#14) +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#15)) (22) Exchange -Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] -Arguments: hashpartitioning(ca_address_sk#14, 5), true, [id=#17] +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Arguments: hashpartitioning(c_current_addr_sk#15, 5), true, [id=#16] (23) Sort [codegen id : 6] -Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16] -Arguments: [ca_address_sk#14 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Arguments: [c_current_addr_sk#15 ASC NULLS FIRST], false, 0 -(24) Scan parquet default.customer -Output [2]: [c_customer_sk#18, c_current_addr_sk#19] +(24) Scan parquet default.customer_address +Output [3]: [ca_address_sk#17, ca_city#18, ca_zip#19] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct (25) ColumnarToRow [codegen id : 7] -Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19] (26) Filter [codegen id : 7] -Input [2]: [c_customer_sk#18, c_current_addr_sk#19] -Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_current_addr_sk#19)) +Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19] +Condition : isnotnull(ca_address_sk#17) (27) Exchange -Input [2]: [c_customer_sk#18, c_current_addr_sk#19] -Arguments: hashpartitioning(c_current_addr_sk#19, 5), true, [id=#20] +Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19] +Arguments: hashpartitioning(ca_address_sk#17, 5), true, [id=#20] (28) Sort [codegen id : 8] -Input [2]: [c_customer_sk#18, c_current_addr_sk#19] -Arguments: [c_current_addr_sk#19 ASC NULLS FIRST], false, 0 +Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19] +Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 9] -Left keys [1]: [ca_address_sk#14] -Right keys [1]: [c_current_addr_sk#19] +Left keys [1]: [c_current_addr_sk#15] +Right keys [1]: [ca_address_sk#17] Join condition: None (30) Project [codegen id : 9] -Output [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] -Input [5]: [ca_address_sk#14, ca_city#15, ca_zip#16, c_customer_sk#18, c_current_addr_sk#19] +Output [3]: [c_customer_sk#14, ca_city#18, ca_zip#19] +Input [5]: [c_customer_sk#14, c_current_addr_sk#15, ca_address_sk#17, ca_city#18, ca_zip#19] (31) Exchange -Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] -Arguments: hashpartitioning(c_customer_sk#18, 5), true, [id=#21] +Input [3]: [c_customer_sk#14, ca_city#18, ca_zip#19] +Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#21] (32) Sort [codegen id : 10] -Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18] -Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#14, ca_city#18, ca_zip#19] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 12] Left keys [1]: [ws_bill_customer_sk#4] -Right keys [1]: [c_customer_sk#18] +Right keys [1]: [c_customer_sk#14] Join condition: None (34) Project [codegen id : 12] -Output [4]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11] -Input [6]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11, ca_city#15, ca_zip#16, c_customer_sk#18] +Output [4]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11] +Input [6]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11, c_customer_sk#14, ca_city#18, ca_zip#19] (35) Scan parquet default.item Output [2]: [i_item_sk#10, i_item_id#11] @@ -225,32 +225,32 @@ Right keys [1]: [i_item_id#11#22] Join condition: None (41) Filter [codegen id : 12] -Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1] -Condition : (substr(ca_zip#16, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) +Input [5]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11, exists#1] +Condition : (substr(ca_zip#19, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) (42) Project [codegen id : 12] -Output [3]: [ws_sales_price#5, ca_city#15, ca_zip#16] -Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1] +Output [3]: [ws_sales_price#5, ca_city#18, ca_zip#19] +Input [5]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11, exists#1] (43) HashAggregate [codegen id : 12] -Input [3]: [ws_sales_price#5, ca_city#15, ca_zip#16] -Keys [2]: [ca_zip#16, ca_city#15] +Input [3]: [ws_sales_price#5, ca_city#18, ca_zip#19] +Keys [2]: [ca_zip#19, ca_city#18] Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))] Aggregate Attributes [1]: [sum#24] -Results [3]: [ca_zip#16, ca_city#15, sum#25] +Results [3]: [ca_zip#19, ca_city#18, sum#25] (44) Exchange -Input [3]: [ca_zip#16, ca_city#15, sum#25] -Arguments: hashpartitioning(ca_zip#16, ca_city#15, 5), true, [id=#26] +Input [3]: [ca_zip#19, ca_city#18, sum#25] +Arguments: hashpartitioning(ca_zip#19, ca_city#18, 5), true, [id=#26] (45) HashAggregate [codegen id : 13] -Input [3]: [ca_zip#16, ca_city#15, sum#25] -Keys [2]: [ca_zip#16, ca_city#15] +Input [3]: [ca_zip#19, ca_city#18, sum#25] +Keys [2]: [ca_zip#19, ca_city#18] Functions [1]: [sum(UnscaledValue(ws_sales_price#5))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#27] -Results [3]: [ca_zip#16, ca_city#15, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#27,17,2) AS sum(ws_sales_price)#28] +Results [3]: [ca_zip#19, ca_city#18, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#27,17,2) AS sum(ws_sales_price)#28] (46) TakeOrderedAndProject -Input [3]: [ca_zip#16, ca_city#15, sum(ws_sales_price)#28] -Arguments: 100, [ca_zip#16 ASC NULLS FIRST, ca_city#15 ASC NULLS FIRST], [ca_zip#16, ca_city#15, sum(ws_sales_price)#28] +Input [3]: [ca_zip#19, ca_city#18, sum(ws_sales_price)#28] +Arguments: 100, [ca_zip#19 ASC NULLS FIRST, ca_city#18 ASC NULLS FIRST], [ca_zip#19, ca_city#18, sum(ws_sales_price)#28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt index 1eab468e67bc0..0e9662bb6aca5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt @@ -45,28 +45,28 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] InputAdapter Exchange [c_customer_sk] #5 WholeStageCodegen (9) - Project [ca_city,ca_zip,c_customer_sk] - SortMergeJoin [ca_address_sk,c_current_addr_sk] + Project [c_customer_sk,ca_city,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] InputAdapter WholeStageCodegen (6) - Sort [ca_address_sk] + Sort [c_current_addr_sk] InputAdapter - Exchange [ca_address_sk] #6 + Exchange [c_current_addr_sk] #6 WholeStageCodegen (5) - Filter [ca_address_sk] + Filter [c_customer_sk,c_current_addr_sk] ColumnarToRow InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter WholeStageCodegen (8) - Sort [c_current_addr_sk] + Sort [ca_address_sk] InputAdapter - Exchange [c_current_addr_sk] #7 + Exchange [ca_address_sk] #7 WholeStageCodegen (7) - Filter [c_customer_sk,c_current_addr_sk] + Filter [ca_address_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] InputAdapter BroadcastExchange #8 WholeStageCodegen (11) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt index df1197d7c925e..741ee50f800ec 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt @@ -25,15 +25,15 @@ TakeOrderedAndProject (35) +- * Sort (29) +- Exchange (28) +- * Project (27) - +- * BroadcastHashJoin Inner BuildLeft (26) - :- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.date_dim (18) - +- * Filter (25) - +- * ColumnarToRow (24) - +- Scan parquet default.store_returns (23) + +- * BroadcastHashJoin Inner BuildRight (26) + :- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store_returns (18) + +- BroadcastExchange (25) + +- * Project (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet default.date_dim (21) (1) Scan parquet default.store_sales @@ -112,72 +112,72 @@ Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk# Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0 -(18) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#22, d_moy#23] +(18) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct -(19) ColumnarToRow [codegen id : 5] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(19) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] -(20) Filter [codegen id : 5] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] -Condition : ((((isnotnull(d_year#22) AND isnotnull(d_moy#23)) AND (d_year#22 = 2001)) AND (d_moy#23 = 8)) AND isnotnull(d_date_sk#21)) +(20) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] +Condition : (((isnotnull(sr_ticket_number#24) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_customer_sk#23)) AND isnotnull(sr_returned_date_sk#21)) -(21) Project [codegen id : 5] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#22, d_moy#23] +(21) Scan parquet default.date_dim +Output [3]: [d_date_sk#25, d_year#26, d_moy#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct -(22) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +(22) ColumnarToRow [codegen id : 5] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] -(23) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct +(23) Filter [codegen id : 5] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] +Condition : ((((isnotnull(d_year#26) AND isnotnull(d_moy#27)) AND (d_year#26 = 2001)) AND (d_moy#27 = 8)) AND isnotnull(d_date_sk#25)) -(24) ColumnarToRow -Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +(24) Project [codegen id : 5] +Output [1]: [d_date_sk#25] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] -(25) Filter -Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] -Condition : (((isnotnull(sr_ticket_number#28) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_customer_sk#27)) AND isnotnull(sr_returned_date_sk#25)) +(25) BroadcastExchange +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] (26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cast(d_date_sk#21 as bigint)] -Right keys [1]: [sr_returned_date_sk#25] +Left keys [1]: [sr_returned_date_sk#21] +Right keys [1]: [cast(d_date_sk#25 as bigint)] Join condition: None (27) Project [codegen id : 6] -Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] -Input [5]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Output [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] +Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, d_date_sk#25] (28) Exchange -Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] -Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27, 5), true, [id=#29] +Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] +Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), true, [id=#29] (29) Sort [codegen id : 7] -Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] -Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_customer_sk#27 ASC NULLS FIRST], false, 0 +Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] +Arguments: [sr_ticket_number#24 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_customer_sk#23 ASC NULLS FIRST], false, 0 (30) SortMergeJoin [codegen id : 8] Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] -Right keys [3]: [sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27] +Right keys [3]: [sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23] Join condition: None (31) Project [codegen id : 8] -Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] (32) HashAggregate [codegen id : 8] -Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] +Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] Aggregate Attributes [5]: [sum#30, sum#31, sum#32, sum#33, sum#34] Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] @@ -188,9 +188,9 @@ Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, (34) HashAggregate [codegen id : 9] Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45] -Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50] +Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45] +Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50] (35) TakeOrderedAndProject Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt index 5d6f38e882a5c..be11a69176810 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt @@ -42,7 +42,11 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s Exchange [sr_ticket_number,sr_item_sk,sr_customer_sk] #5 WholeStageCodegen (6) Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] InputAdapter BroadcastExchange #6 WholeStageCodegen (5) @@ -51,7 +55,3 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt index 511e1b46cd7a7..675cff99ad729 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt @@ -11,30 +11,30 @@ TakeOrderedAndProject (50) : +- Exchange (27) : +- * Project (26) : +- * BroadcastHashJoin Inner BuildRight (25) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) : : :- * Filter (3) : : : +- * ColumnarToRow (2) : : : +- Scan parquet default.store_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * BroadcastHashJoin LeftOuter BuildRight (14) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * HashAggregate (12) + : : +- Exchange (11) + : : +- * HashAggregate (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.item (7) : +- BroadcastExchange (24) : +- * Project (23) : +- * Filter (22) - : +- * BroadcastHashJoin LeftOuter BuildRight (21) - : :- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.item (11) - : +- BroadcastExchange (20) - : +- * HashAggregate (19) - : +- Exchange (18) - : +- * HashAggregate (17) - : +- * Filter (16) - : +- * ColumnarToRow (15) - : +- Scan parquet default.item (14) + : +- * ColumnarToRow (21) + : +- Scan parquet default.date_dim (20) +- * Sort (42) +- Exchange (41) +- * Project (40) @@ -65,112 +65,112 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] -Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 5] -Output [2]: [ss_item_sk#2, ss_customer_sk#3] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] - -(11) Scan parquet default.item -Output [3]: [i_item_sk#9, i_current_price#10, i_category#11] +(4) Scan parquet default.item +Output [3]: [i_item_sk#4, i_current_price#5, i_category#6] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] ReadSchema: struct -(12) ColumnarToRow [codegen id : 4] -Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] +(5) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#4, i_current_price#5, i_category#6] -(13) Filter [codegen id : 4] -Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] -Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9)) +(6) Filter [codegen id : 3] +Input [3]: [i_item_sk#4, i_current_price#5, i_category#6] +Condition : (isnotnull(i_current_price#5) AND isnotnull(i_item_sk#4)) -(14) Scan parquet default.item -Output [2]: [i_current_price#10, i_category#11] +(7) Scan parquet default.item +Output [2]: [i_current_price#5, i_category#6] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category)] ReadSchema: struct -(15) ColumnarToRow [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] - -(16) Filter [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] -Condition : isnotnull(i_category#11) - -(17) HashAggregate [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] -Keys [1]: [i_category#11] -Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))] -Aggregate Attributes [2]: [sum#12, count#13] -Results [3]: [i_category#11, sum#14, count#15] - -(18) Exchange -Input [3]: [i_category#11, sum#14, count#15] -Arguments: hashpartitioning(i_category#11, 5), true, [id=#16] - -(19) HashAggregate [codegen id : 3] -Input [3]: [i_category#11, sum#14, count#15] -Keys [1]: [i_category#11] -Functions [1]: [avg(UnscaledValue(i_current_price#10))] -Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17] -Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19] - -(20) BroadcastExchange -Input [2]: [avg(i_current_price)#18, i_category#11#19] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_category#11] -Right keys [1]: [i_category#11#19] +(8) ColumnarToRow [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] + +(9) Filter [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] +Condition : isnotnull(i_category#6) + +(10) HashAggregate [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] +Keys [1]: [i_category#6] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#5))] +Aggregate Attributes [2]: [sum#7, count#8] +Results [3]: [i_category#6, sum#9, count#10] + +(11) Exchange +Input [3]: [i_category#6, sum#9, count#10] +Arguments: hashpartitioning(i_category#6, 5), true, [id=#11] + +(12) HashAggregate [codegen id : 2] +Input [3]: [i_category#6, sum#9, count#10] +Keys [1]: [i_category#6] +Functions [1]: [avg(UnscaledValue(i_current_price#5))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#5))#12] +Results [2]: [cast((avg(UnscaledValue(i_current_price#5))#12 / 100.0) as decimal(11,6)) AS avg(i_current_price)#13, i_category#6 AS i_category#6#14] + +(13) BroadcastExchange +Input [2]: [avg(i_current_price)#13, i_category#6#14] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#15] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_category#6] +Right keys [1]: [i_category#6#14] +Join condition: None + +(15) Filter [codegen id : 3] +Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14] +Condition : (cast(i_current_price#5 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#13)), DecimalType(14,7), true)) + +(16) Project [codegen id : 3] +Output [1]: [i_item_sk#4] +Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14] + +(17) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(18) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] Join condition: None +(19) Project [codegen id : 5] +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, i_item_sk#4] + +(20) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_month_seq#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#17, d_month_seq#18] + (22) Filter [codegen id : 4] -Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] -Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true)) +Input [2]: [d_date_sk#17, d_month_seq#18] +Condition : ((isnotnull(d_month_seq#18) AND (d_month_seq#18 = Subquery scalar-subquery#19, [id=#20])) AND isnotnull(d_date_sk#17)) (23) Project [codegen id : 4] -Output [1]: [i_item_sk#9] -Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_month_seq#18] (24) BroadcastExchange -Input [1]: [i_item_sk#9] +Input [1]: [d_date_sk#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] (25) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 5] Output [1]: [ss_customer_sk#3] -Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#3, d_date_sk#17] (27) Exchange Input [1]: [ss_customer_sk#3] @@ -282,7 +282,7 @@ Arguments: 100, [cnt#35 ASC NULLS FIRST], [state#34, cnt#35] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery scalar-subquery#19, [id=#20] * HashAggregate (57) +- Exchange (56) +- * HashAggregate (55) @@ -293,39 +293,39 @@ Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery (51) Scan parquet default.date_dim -Output [3]: [d_month_seq#5, d_year#37, d_moy#38] +Output [3]: [d_month_seq#18, d_year#37, d_moy#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] ReadSchema: struct (52) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] (53) Filter [codegen id : 1] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1)) (54) Project [codegen id : 1] -Output [1]: [d_month_seq#5] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Output [1]: [d_month_seq#18] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] (55) HashAggregate [codegen id : 1] -Input [1]: [d_month_seq#5] -Keys [1]: [d_month_seq#5] +Input [1]: [d_month_seq#18] +Keys [1]: [d_month_seq#18] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#5] +Results [1]: [d_month_seq#18] (56) Exchange -Input [1]: [d_month_seq#5] -Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39] +Input [1]: [d_month_seq#18] +Arguments: hashpartitioning(d_month_seq#18, 5), true, [id=#39] (57) HashAggregate [codegen id : 2] -Input [1]: [d_month_seq#5] -Keys [1]: [d_month_seq#5] +Input [1]: [d_month_seq#18] +Keys [1]: [d_month_seq#18] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#5] +Results [1]: [d_month_seq#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt index dcebba331afb3..73d42163240f0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt @@ -16,55 +16,55 @@ TakeOrderedAndProject [cnt,state] Exchange [ss_customer_sk] #2 WholeStageCodegen (5) Project [ss_customer_sk] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - Subquery #1 - WholeStageCodegen (2) - HashAggregate [d_month_seq] + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_current_price,avg(i_current_price)] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow InputAdapter - Exchange [d_month_seq] #4 - WholeStageCodegen (1) - HashAggregate [d_month_seq] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow + Scan parquet default.item [i_item_sk,i_current_price,i_category] InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (2) + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] + InputAdapter + Exchange [i_category] #5 + WholeStageCodegen (1) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_category] InputAdapter - BroadcastExchange #5 + BroadcastExchange #6 WholeStageCodegen (4) - Project [i_item_sk] - Filter [i_current_price,avg(i_current_price)] - BroadcastHashJoin [i_category,i_category] - Filter [i_current_price,i_item_sk] - ColumnarToRow + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_category] + Exchange [d_month_seq] #7 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] - InputAdapter - Exchange [i_category] #7 - WholeStageCodegen (2) - HashAggregate [i_category,i_current_price] [sum,count,sum,count] - Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_current_price,i_category] + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter WholeStageCodegen (12) Sort [c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt index e616934bbd073..58a60763b2b57 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == -TakeOrderedAndProject (75) -+- * Project (74) - +- BroadcastNestedLoopJoin Inner BuildRight (73) +TakeOrderedAndProject (69) ++- * Project (68) + +- BroadcastNestedLoopJoin Inner BuildRight (67) :- * HashAggregate (47) : +- Exchange (46) : +- * HashAggregate (45) @@ -49,31 +49,25 @@ TakeOrderedAndProject (75) : +- * Filter (37) : +- * ColumnarToRow (36) : +- Scan parquet default.customer_address (35) - +- BroadcastExchange (72) - +- * HashAggregate (71) - +- Exchange (70) - +- * HashAggregate (69) - +- * Project (68) - +- * BroadcastHashJoin Inner BuildRight (67) + +- BroadcastExchange (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * BroadcastHashJoin Inner BuildRight (61) :- * Project (59) : +- * BroadcastHashJoin Inner BuildRight (58) : :- * Project (56) : : +- * BroadcastHashJoin Inner BuildRight (55) : : :- * Project (53) - : : : +- * BroadcastHashJoin Inner BuildLeft (52) - : : : :- ReusedExchange (48) - : : : +- * Filter (51) - : : : +- * ColumnarToRow (50) - : : : +- Scan parquet default.store_sales (49) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (50) + : : : : +- * ColumnarToRow (49) + : : : : +- Scan parquet default.store_sales (48) + : : : +- ReusedExchange (51) : : +- ReusedExchange (54) : +- ReusedExchange (57) - +- BroadcastExchange (66) - +- * Project (65) - +- * BroadcastHashJoin Inner BuildLeft (64) - :- ReusedExchange (60) - +- * Filter (63) - +- * ColumnarToRow (62) - +- Scan parquet default.customer (61) + +- ReusedExchange (60) (1) Scan parquet default.store_sales @@ -290,31 +284,31 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31] Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32] -(48) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#7] - -(49) Scan parquet default.store_sales +(48) Scan parquet default.store_sales Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] ReadSchema: struct -(50) ColumnarToRow +(49) ColumnarToRow [codegen id : 14] Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] -(51) Filter +(50) Filter [codegen id : 14] Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#7] + (52) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [d_date_sk#7] -Right keys [1]: [ss_sold_date_sk#1] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] Join condition: None (53) Project [codegen id : 14] Output [4]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] -Input [6]: [d_date_sk#7, ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, d_date_sk#7] (54) ReusedExchange [Reuses operator id: 15] Output [1]: [i_item_sk#11] @@ -340,75 +334,48 @@ Join condition: None Output [2]: [ss_customer_sk#3, ss_ext_sales_price#6] Input [4]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#19] -(60) ReusedExchange [Reuses operator id: 39] -Output [1]: [ca_address_sk#24] - -(61) Scan parquet default.customer -Output [2]: [c_customer_sk#22, c_current_addr_sk#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(62) ColumnarToRow -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] - -(63) Filter -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) - -(64) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ca_address_sk#24] -Right keys [1]: [c_current_addr_sk#23] -Join condition: None - -(65) Project [codegen id : 13] +(60) ReusedExchange [Reuses operator id: 42] Output [1]: [c_customer_sk#22] -Input [3]: [ca_address_sk#24, c_customer_sk#22, c_current_addr_sk#23] - -(66) BroadcastExchange -Input [1]: [c_customer_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] -(67) BroadcastHashJoin [codegen id : 14] +(61) BroadcastHashJoin [codegen id : 14] Left keys [1]: [ss_customer_sk#3] Right keys [1]: [c_customer_sk#22] Join condition: None -(68) Project [codegen id : 14] +(62) Project [codegen id : 14] Output [1]: [ss_ext_sales_price#6] Input [3]: [ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#22] -(69) HashAggregate [codegen id : 14] +(63) HashAggregate [codegen id : 14] Input [1]: [ss_ext_sales_price#6] Keys: [] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#34] -Results [1]: [sum#35] +Aggregate Attributes [1]: [sum#33] +Results [1]: [sum#34] -(70) Exchange -Input [1]: [sum#35] -Arguments: SinglePartition, true, [id=#36] +(64) Exchange +Input [1]: [sum#34] +Arguments: SinglePartition, true, [id=#35] -(71) HashAggregate [codegen id : 15] -Input [1]: [sum#35] +(65) HashAggregate [codegen id : 15] +Input [1]: [sum#34] Keys: [] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#37] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#37,17,2) AS total#38] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#36] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS total#37] -(72) BroadcastExchange -Input [1]: [total#38] -Arguments: IdentityBroadcastMode, [id=#39] +(66) BroadcastExchange +Input [1]: [total#37] +Arguments: IdentityBroadcastMode, [id=#38] -(73) BroadcastNestedLoopJoin +(67) BroadcastNestedLoopJoin Join condition: None -(74) Project [codegen id : 16] -Output [3]: [promotions#32, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] -Input [2]: [promotions#32, total#38] +(68) Project [codegen id : 16] +Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Input [2]: [promotions#32, total#37] -(75) TakeOrderedAndProject -Input [3]: [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] -Arguments: 100, [promotions#32 ASC NULLS FIRST, total#38 ASC NULLS FIRST], [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40] +(69) TakeOrderedAndProject +Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt index 039ccb1aa18cf..87f2b3ae03746 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt @@ -86,25 +86,16 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1 Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] #2 + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] #2 InputAdapter ReusedExchange [i_item_sk] #3 InputAdapter ReusedExchange [s_store_sk] #5 InputAdapter - BroadcastExchange #10 - WholeStageCodegen (13) - Project [c_customer_sk] - BroadcastHashJoin [ca_address_sk,c_current_addr_sk] - InputAdapter - ReusedExchange [ca_address_sk] #7 - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + ReusedExchange [c_customer_sk] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt index e9a2b7a375b01..b74dfb49c9f03 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt @@ -10,15 +10,15 @@ TakeOrderedAndProject (32) : :- * Project (16) : : +- * BroadcastHashJoin Inner BuildRight (15) : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : :- BroadcastExchange (5) - : : : : +- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.date_dim (1) - : : : +- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.web_sales (6) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) : : +- BroadcastExchange (14) : : +- * Filter (13) : : +- * ColumnarToRow (12) @@ -33,50 +33,50 @@ TakeOrderedAndProject (32) +- Scan parquet default.warehouse (23) -(1) Scan parquet default.date_dim -Output [2]: [d_date_sk#1, d_month_seq#2] +(1) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#1, d_month_seq#2] +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] -(3) Filter [codegen id : 1] -Input [2]: [d_date_sk#1, d_month_seq#2] -Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1)) +(3) Filter [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#5) AND isnotnull(ws_ship_mode_sk#4)) AND isnotnull(ws_web_site_sk#3)) AND isnotnull(ws_ship_date_sk#2)) -(4) Project [codegen id : 1] -Output [1]: [d_date_sk#1] -Input [2]: [d_date_sk#1, d_month_seq#2] +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_month_seq#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_month_seq#7] -(6) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_month_seq#7] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) -(7) ColumnarToRow -Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_month_seq#7] -(8) Filter -Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] -Condition : (((isnotnull(ws_warehouse_sk#8) AND isnotnull(ws_ship_mode_sk#7)) AND isnotnull(ws_web_site_sk#6)) AND isnotnull(ws_ship_date_sk#5)) +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] (9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ws_ship_date_sk#5] +Left keys [1]: [ws_ship_date_sk#2] +Right keys [1]: [d_date_sk#6] Join condition: None (10) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] -Input [6]: [d_date_sk#1, ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, d_date_sk#6] (11) Scan parquet default.web_site Output [2]: [web_site_sk#9, web_name#10] @@ -97,13 +97,13 @@ Input [2]: [web_site_sk#9, web_name#10] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] (15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_web_site_sk#6] +Left keys [1]: [ws_web_site_sk#3] Right keys [1]: [web_site_sk#9] Join condition: None (16) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10] -Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_site_sk#9, web_name#10] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_name#10] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_site_sk#9, web_name#10] (17) Scan parquet default.ship_mode Output [2]: [sm_ship_mode_sk#12, sm_type#13] @@ -124,13 +124,13 @@ Input [2]: [sm_ship_mode_sk#12, sm_type#13] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_ship_mode_sk#7] +Left keys [1]: [ws_ship_mode_sk#4] Right keys [1]: [sm_ship_mode_sk#12] Join condition: None (22) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13] -Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10, sm_ship_mode_sk#12, sm_type#13] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10, sm_type#13] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_name#10, sm_ship_mode_sk#12, sm_type#13] (23) Scan parquet default.warehouse Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] @@ -151,18 +151,18 @@ Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_warehouse_sk#8] +Left keys [1]: [ws_warehouse_sk#5] Right keys [1]: [w_warehouse_sk#15] Join condition: None (28) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10] -Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16] (29) HashAggregate [codegen id : 5] -Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10] Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] -Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] @@ -173,9 +173,9 @@ Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, w (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] -Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt index 59cfc4b7b249a..9b16b44792ca4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt @@ -12,7 +12,11 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days Project [ws_sold_date_sk,ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,web_name] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) @@ -21,10 +25,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_month_seq] - Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt index 4b863587b08d9..5db04537d6371 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt @@ -14,15 +14,15 @@ TakeOrderedAndProject (55) : : :- * Project (17) : : : +- * BroadcastHashJoin Inner BuildRight (16) : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : : :- BroadcastExchange (5) - : : : : : +- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.ship_mode (1) - : : : : +- * Filter (8) - : : : : +- * ColumnarToRow (7) - : : : : +- Scan parquet default.web_sales (6) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.ship_mode (4) : : : +- BroadcastExchange (15) : : : +- * Project (14) : : : +- * Filter (13) @@ -46,60 +46,60 @@ TakeOrderedAndProject (55) : :- * Project (41) : : +- * BroadcastHashJoin Inner BuildRight (40) : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildLeft (37) - : : : :- ReusedExchange (33) - : : : +- * Filter (36) - : : : +- * ColumnarToRow (35) - : : : +- Scan parquet default.catalog_sales (34) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) : : +- ReusedExchange (39) : +- ReusedExchange (42) +- ReusedExchange (45) -(1) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#1, sm_carrier#2] +(1) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] Batched: true -Location [not included in comparison]/{warehouse_dir}/ship_mode] -PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] -(3) Filter [codegen id : 1] -Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] -Condition : (sm_carrier#2 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#1)) +(3) Filter [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_sold_date_sk#1)) AND isnotnull(ws_sold_time_sk#2)) AND isnotnull(ws_ship_mode_sk#3)) -(4) Project [codegen id : 1] -Output [1]: [sm_ship_mode_sk#1] -Input [2]: [sm_ship_mode_sk#1, sm_carrier#2] +(4) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_carrier#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [sm_ship_mode_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sm_ship_mode_sk#8, sm_carrier#9] -(6) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [sm_ship_mode_sk#8, sm_carrier#9] +Condition : (sm_carrier#9 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#8)) -(7) ColumnarToRow -Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +(7) Project [codegen id : 1] +Output [1]: [sm_ship_mode_sk#8] +Input [2]: [sm_ship_mode_sk#8, sm_carrier#9] -(8) Filter -Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] -Condition : (((isnotnull(ws_warehouse_sk#7) AND isnotnull(ws_sold_date_sk#4)) AND isnotnull(ws_sold_time_sk#5)) AND isnotnull(ws_ship_mode_sk#6)) +(8) BroadcastExchange +Input [1]: [sm_ship_mode_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] (9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sm_ship_mode_sk#1] -Right keys [1]: [ws_ship_mode_sk#6] +Left keys [1]: [ws_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#8] Join condition: None (10) Project [codegen id : 5] -Output [6]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] -Input [8]: [sm_ship_mode_sk#1, ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] +Output [6]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Input [8]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, sm_ship_mode_sk#8] (11) Scan parquet default.time_dim Output [2]: [t_time_sk#11, t_time#12] @@ -124,13 +124,13 @@ Input [1]: [t_time_sk#11] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_sold_time_sk#5] +Left keys [1]: [ws_sold_time_sk#2] Right keys [1]: [t_time_sk#11] Join condition: None (17) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10] -Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, t_time_sk#11] +Output [5]: [ws_sold_date_sk#1, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, t_time_sk#11] (18) Scan parquet default.date_dim Output [3]: [d_date_sk#14, d_year#15, d_moy#16] @@ -151,13 +151,13 @@ Input [3]: [d_date_sk#14, d_year#15, d_moy#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_sold_date_sk#4] +Left keys [1]: [ws_sold_date_sk#1] Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 5] -Output [6]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16] -Input [8]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_date_sk#14, d_year#15, d_moy#16] +Output [6]: [ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_year#15, d_moy#16] +Input [8]: [ws_sold_date_sk#1, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_date_sk#14, d_year#15, d_moy#16] (24) Scan parquet default.warehouse Output [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] @@ -178,18 +178,18 @@ Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_warehouse_sk#7] +Left keys [1]: [ws_warehouse_sk#4] Right keys [1]: [w_warehouse_sk#18] Join condition: None (29) Project [codegen id : 5] -Output [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] -Input [13]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Output [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Input [13]: [ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] (30) HashAggregate [codegen id : 5] -Input [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] +Input [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16] Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] -Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] @@ -200,35 +200,35 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21 (32) HashAggregate [codegen id : 6] Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15] -Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] -Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#15 AS year#148, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] +Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] +Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#15 AS year#148, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] -(33) ReusedExchange [Reuses operator id: 5] -Output [1]: [sm_ship_mode_sk#1] - -(34) Scan parquet default.catalog_sales +(33) Scan parquet default.catalog_sales Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] ReadSchema: struct -(35) ColumnarToRow +(34) ColumnarToRow [codegen id : 11] Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] -(36) Filter +(35) Filter [codegen id : 11] Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175)) +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [sm_ship_mode_sk#8] + (37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [sm_ship_mode_sk#1] -Right keys [1]: [cs_ship_mode_sk#175] +Left keys [1]: [cs_ship_mode_sk#175] +Right keys [1]: [sm_ship_mode_sk#8] Join condition: None (38) Project [codegen id : 11] Output [6]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] -Input [8]: [sm_ship_mode_sk#1, cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Input [8]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, sm_ship_mode_sk#8] (39) ReusedExchange [Reuses operator id: 15] Output [1]: [t_time_sk#11] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt index 465d269a847c3..ddfb04d8df5e3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt @@ -20,7 +20,11 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat Project [ws_sold_date_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid] BroadcastHashJoin [ws_sold_time_sk,t_time_sk] Project [ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid] - BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid] InputAdapter BroadcastExchange #3 WholeStageCodegen (1) @@ -29,10 +33,6 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat ColumnarToRow InputAdapter Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier] - Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid] InputAdapter BroadcastExchange #4 WholeStageCodegen (2) @@ -68,13 +68,13 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat Project [cs_sold_date_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax] BroadcastHashJoin [cs_sold_time_sk,t_time_sk] Project [cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax] - BroadcastHashJoin [sm_ship_mode_sk,cs_ship_mode_sk] - InputAdapter - ReusedExchange [sm_ship_mode_sk] #3 + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk] ColumnarToRow InputAdapter Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax] + InputAdapter + ReusedExchange [sm_ship_mode_sk] #3 InputAdapter ReusedExchange [t_time_sk] #4 InputAdapter diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt index a100b6659f162..3f8106c96379a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt @@ -24,15 +24,15 @@ TakeOrderedAndProject (79) : : : : : :- * Project (17) : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) : : : : : : :- * Project (10) - : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : : : : : :- BroadcastExchange (5) - : : : : : : : : +- * Project (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.household_demographics (1) - : : : : : : : +- * Filter (8) - : : : : : : : +- * ColumnarToRow (7) - : : : : : : : +- Scan parquet default.catalog_sales (6) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : : :- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : +- BroadcastExchange (8) + : : : : : : : +- * Project (7) + : : : : : : : +- * Filter (6) + : : : : : : : +- * ColumnarToRow (5) + : : : : : : : +- Scan parquet default.household_demographics (4) : : : : : : +- BroadcastExchange (15) : : : : : : +- * Project (14) : : : : : : +- * Filter (13) @@ -49,26 +49,26 @@ TakeOrderedAndProject (79) : : : : +- Scan parquet default.item (26) : : : +- BroadcastExchange (43) : : : +- * Project (42) - : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.date_dim (33) - : : : +- BroadcastExchange (40) - : : : +- * Project (39) - : : : +- * Filter (38) - : : : +- * ColumnarToRow (37) - : : : +- Scan parquet default.date_dim (36) + : : : +- * BroadcastHashJoin Inner BuildLeft (41) + : : : :- BroadcastExchange (37) + : : : : +- * Project (36) + : : : : +- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.date_dim (33) + : : : +- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet default.date_dim (38) : : +- * Sort (58) : : +- Exchange (57) : : +- * Project (56) - : : +- * BroadcastHashJoin Inner BuildLeft (55) - : : :- BroadcastExchange (51) - : : : +- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.warehouse (48) - : : +- * Filter (54) - : : +- * ColumnarToRow (53) - : : +- Scan parquet default.inventory (52) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.inventory (48) + : : +- BroadcastExchange (54) + : : +- * Filter (53) + : : +- * ColumnarToRow (52) + : : +- Scan parquet default.warehouse (51) : +- BroadcastExchange (64) : +- * Filter (63) : +- * ColumnarToRow (62) @@ -80,50 +80,50 @@ TakeOrderedAndProject (79) +- Scan parquet default.catalog_returns (69) -(1) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#1, hd_buy_potential#2] +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +(2) ColumnarToRow [codegen id : 4] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] -(3) Filter [codegen id : 1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] -Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = >10000)) AND isnotnull(hd_demo_sk#1)) +(3) Filter [codegen id : 4] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) -(4) Project [codegen id : 1] -Output [1]: [hd_demo_sk#1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#9, hd_buy_potential#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [hd_demo_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -(6) Scan parquet default.catalog_sales -Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +Condition : ((isnotnull(hd_buy_potential#10) AND (hd_buy_potential#10 = >10000)) AND isnotnull(hd_demo_sk#9)) -(7) ColumnarToRow -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#9] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -(8) Filter -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5)) +(8) BroadcastExchange +Input [1]: [hd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [hd_demo_sk#1] -Right keys [1]: [cs_bill_hdemo_sk#7] +Left keys [1]: [cs_bill_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#9] Join condition: None (10) Project [codegen id : 4] -Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Output [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Input [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, hd_demo_sk#9] (11) Scan parquet default.customer_demographics Output [2]: [cd_demo_sk#12, cd_marital_status#13] @@ -148,13 +148,13 @@ Input [1]: [cd_demo_sk#12] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] (16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_bill_cdemo_sk#6] +Left keys [1]: [cs_bill_cdemo_sk#3] Right keys [1]: [cd_demo_sk#12] Join condition: None (17) Project [codegen id : 4] -Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12] +Output [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, cd_demo_sk#12] (18) Scan parquet default.date_dim Output [2]: [d_date_sk#15, d_date#16] @@ -175,21 +175,21 @@ Input [2]: [d_date_sk#15, d_date#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_ship_date_sk#5] +Left keys [1]: [cs_ship_date_sk#2] Right keys [1]: [d_date_sk#15] Join condition: None (23) Project [codegen id : 4] -Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16] +Output [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date_sk#15, d_date#16] (24) Exchange -Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18] +Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18] (25) Sort [codegen id : 5] -Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0 +Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Arguments: [cs_item_sk#5 ASC NULLS FIRST], false, 0 (26) Scan parquet default.item Output [2]: [i_item_sk#19, i_item_desc#20] @@ -214,137 +214,137 @@ Input [2]: [i_item_sk#19, i_item_desc#20] Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 10] -Left keys [1]: [cs_item_sk#8] +Left keys [1]: [cs_item_sk#5] Right keys [1]: [i_item_sk#19] Join condition: None (32) Project [codegen id : 10] -Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20] -Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20] +Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20] (33) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_week_seq#23] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] -ReadSchema: struct +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct -(34) ColumnarToRow [codegen id : 9] -Input [2]: [d_date_sk#22, d_week_seq#23] +(34) ColumnarToRow [codegen id : 8] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -(35) Filter [codegen id : 9] -Input [2]: [d_date_sk#22, d_week_seq#23] -Condition : (isnotnull(d_week_seq#23) AND isnotnull(d_date_sk#22)) +(35) Filter [codegen id : 8] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) -(36) Scan parquet default.date_dim -Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] -ReadSchema: struct +(36) Project [codegen id : 8] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -(37) ColumnarToRow [codegen id : 8] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +(37) BroadcastExchange +Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26] -(38) Filter [codegen id : 8] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] -Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 1999)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25)) +(38) Scan parquet default.date_dim +Output [2]: [d_date_sk#27, d_week_seq#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct -(39) Project [codegen id : 8] -Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +(39) ColumnarToRow +Input [2]: [d_date_sk#27, d_week_seq#28] -(40) BroadcastExchange -Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28] +(40) Filter +Input [2]: [d_date_sk#27, d_week_seq#28] +Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27)) (41) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_week_seq#23] -Right keys [1]: [d_week_seq#26] +Left keys [1]: [d_week_seq#24] +Right keys [1]: [d_week_seq#28] Join condition: None (42) Project [codegen id : 9] -Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] -Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28] (43) BroadcastExchange -Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] (44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#4] -Right keys [1]: [d_date_sk#24] -Join condition: (d_date#16 > d_date#25 + 5 days) +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#22] +Join condition: (d_date#16 > d_date#23 + 5 days) (45) Project [codegen id : 10] -Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] (46) Exchange -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30] (47) Sort [codegen id : 11] -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0 +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0 -(48) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +(48) Scan parquet default.inventory +Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 12] -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct -(50) Filter [codegen id : 12] -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] -Condition : isnotnull(w_warehouse_sk#31) +(49) ColumnarToRow [codegen id : 13] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] -(51) BroadcastExchange -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] +(50) Filter [codegen id : 13] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] +Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31)) -(52) Scan parquet default.inventory -Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +(51) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 12] +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] -(53) ColumnarToRow -Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +(53) Filter [codegen id : 12] +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Condition : isnotnull(w_warehouse_sk#35) -(54) Filter -Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] -Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34)) +(54) BroadcastExchange +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] (55) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [w_warehouse_sk#31] -Right keys [1]: [inv_warehouse_sk#36] +Left keys [1]: [inv_warehouse_sk#33] +Right keys [1]: [w_warehouse_sk#35] Join condition: None (56) Project [codegen id : 13] -Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] (57) Exchange -Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38] (58) Sort [codegen id : 14] -Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0 +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 (59) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#8, d_date_sk#22] -Right keys [2]: [inv_item_sk#35, inv_date_sk#34] -Join condition: (inv_quantity_on_hand#37 < cs_quantity#11) +Left keys [2]: [cs_item_sk#5, d_date_sk#27] +Right keys [2]: [inv_item_sk#32, inv_date_sk#31] +Join condition: (inv_quantity_on_hand#34 < cs_quantity#8) (60) Project [codegen id : 16] -Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] (61) Scan parquet default.promotion Output [1]: [p_promo_sk#39] @@ -365,21 +365,21 @@ Input [1]: [p_promo_sk#39] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] (65) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [cs_promo_sk#9] +Left keys [1]: [cs_promo_sk#6] Right keys [1]: [p_promo_sk#39] Join condition: None (66) Project [codegen id : 16] -Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39] (67) Exchange -Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41] (68) Sort [codegen id : 17] -Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0 +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0 (69) Scan parquet default.catalog_returns Output [2]: [cr_item_sk#42, cr_order_number#43] @@ -404,33 +404,33 @@ Input [2]: [cr_item_sk#42, cr_order_number#43] Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0 (74) SortMergeJoin -Left keys [2]: [cs_item_sk#8, cs_order_number#10] +Left keys [2]: [cs_item_sk#5, cs_order_number#7] Right keys [2]: [cr_item_sk#42, cr_order_number#43] Join condition: None (75) Project [codegen id : 20] -Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43] +Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43] (76) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#45] -Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] (77) Exchange -Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] -Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47] (78) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] -Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#48] -Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] +Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] (79) TakeOrderedAndProject -Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] -Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] +Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt index 39dba3af02359..918508787c4b0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt @@ -23,7 +23,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom InputAdapter Exchange [cs_item_sk,d_date_sk] #3 WholeStageCodegen (10) - Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_date_sk,d_week_seq] + Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date] Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] SortMergeJoin [cs_item_sk,i_item_sk] @@ -38,7 +38,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [hd_demo_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter BroadcastExchange #5 WholeStageCodegen (1) @@ -47,10 +51,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] - Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter BroadcastExchange #6 WholeStageCodegen (2) @@ -79,12 +79,8 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom InputAdapter BroadcastExchange #9 WholeStageCodegen (9) - Project [d_date_sk,d_date_sk,d_date,d_week_seq] + Project [d_date_sk,d_date,d_week_seq,d_date_sk] BroadcastHashJoin [d_week_seq,d_week_seq] - Filter [d_week_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #10 WholeStageCodegen (8) @@ -93,14 +89,22 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter WholeStageCodegen (14) Sort [inv_item_sk,inv_date_sk] InputAdapter Exchange [inv_item_sk,inv_date_sk] #11 WholeStageCodegen (13) - Project [w_warehouse_name,inv_date_sk,inv_item_sk,inv_quantity_on_hand] - BroadcastHashJoin [w_warehouse_sk,inv_warehouse_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #12 WholeStageCodegen (12) @@ -108,10 +112,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #13 WholeStageCodegen (15) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt index 057d786afbcdd..9ac081b356c94 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt @@ -37,12 +37,12 @@ TakeOrderedAndProject (108) : : : +- * Project (23) : : : +- * Filter (22) : : : +- * ColumnarToRow (21) - : : : +- Scan parquet default.date_dim (20) + : : : +- Scan parquet default.promotion (20) : : +- BroadcastExchange (31) : : +- * Project (30) : : +- * Filter (29) : : +- * ColumnarToRow (28) - : : +- Scan parquet default.promotion (27) + : : +- Scan parquet default.date_dim (27) : +- BroadcastExchange (37) : +- * Filter (36) : +- * ColumnarToRow (35) @@ -193,67 +193,67 @@ Join condition: None Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] -(20) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] +(20) Scan parquet default.promotion +Output [2]: [p_promo_sk#17, p_channel_tv#18] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct (21) ColumnarToRow [codegen id : 6] -Input [2]: [d_date_sk#17, d_date#18] +Input [2]: [p_promo_sk#17, p_channel_tv#18] (22) Filter [codegen id : 6] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 11192)) AND (d_date#18 <= 11222)) AND isnotnull(d_date_sk#17)) +Input [2]: [p_promo_sk#17, p_channel_tv#18] +Condition : ((isnotnull(p_channel_tv#18) AND (p_channel_tv#18 = N)) AND isnotnull(p_promo_sk#17)) (23) Project [codegen id : 6] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_date#18] +Output [1]: [p_promo_sk#17] +Input [2]: [p_promo_sk#17, p_channel_tv#18] (24) BroadcastExchange -Input [1]: [d_date_sk#17] +Input [1]: [p_promo_sk#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] (25) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#17] Join condition: None (26) Project [codegen id : 9] -Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] -Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17] +Output [6]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#17] -(27) Scan parquet default.promotion -Output [2]: [p_promo_sk#20, p_channel_tv#21] +(27) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_date#21] Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [p_promo_sk#20, p_channel_tv#21] +Input [2]: [d_date_sk#20, d_date#21] (29) Filter [codegen id : 7] -Input [2]: [p_promo_sk#20, p_channel_tv#21] -Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20)) +Input [2]: [d_date_sk#20, d_date#21] +Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 11192)) AND (d_date#21 <= 11222)) AND isnotnull(d_date_sk#20)) (30) Project [codegen id : 7] -Output [1]: [p_promo_sk#20] -Input [2]: [p_promo_sk#20, p_channel_tv#21] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_date#21] (31) BroadcastExchange -Input [1]: [p_promo_sk#20] +Input [1]: [d_date_sk#20] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#20] Join condition: None (33) Project [codegen id : 9] Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] -Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20] +Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#20] (34) Scan parquet default.store Output [2]: [s_store_sk#23, s_store_id#24] @@ -366,28 +366,28 @@ Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_s Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14] (58) ReusedExchange [Reuses operator id: 24] -Output [1]: [d_date_sk#17] +Output [1]: [p_promo_sk#17] (59) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#45] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [cs_promo_sk#48] +Right keys [1]: [p_promo_sk#17] Join condition: None (60) Project [codegen id : 19] -Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] -Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17] +Output [6]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#17] (61) ReusedExchange [Reuses operator id: 31] -Output [1]: [p_promo_sk#20] +Output [1]: [d_date_sk#20] (62) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#48] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#20] Join condition: None (63) Project [codegen id : 19] Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] -Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#20] (64) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] @@ -500,28 +500,28 @@ Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14] (88) ReusedExchange [Reuses operator id: 24] -Output [1]: [d_date_sk#17] +Output [1]: [p_promo_sk#17] (89) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#80] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [ws_promo_sk#83] +Right keys [1]: [p_promo_sk#17] Join condition: None (90) Project [codegen id : 29] -Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] -Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17] +Output [6]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#17] (91) ReusedExchange [Reuses operator id: 31] -Output [1]: [p_promo_sk#20] +Output [1]: [d_date_sk#20] (92) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#83] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#20] Join condition: None (93) Project [codegen id : 29] Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] -Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20] +Input [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#20] (94) Scan parquet default.web_site Output [2]: [web_site_sk#93, web_site_id#94] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt index 7b73e4307dcf0..ec00b49e71989 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt @@ -17,9 +17,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] @@ -54,19 +54,19 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #6 WholeStageCodegen (6) - Project [d_date_sk] - Filter [d_date,d_date_sk] + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.promotion [p_promo_sk,p_channel_tv] InputAdapter BroadcastExchange #7 WholeStageCodegen (7) - Project [p_promo_sk] - Filter [p_channel_tv,p_promo_sk] + Project [d_date_sk] + Filter [d_date,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_tv] + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #8 WholeStageCodegen (8) @@ -83,9 +83,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] BroadcastHashJoin [cs_item_sk,i_item_sk] Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] @@ -112,9 +112,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter ReusedExchange [i_item_sk] #5 InputAdapter - ReusedExchange [d_date_sk] #6 + ReusedExchange [p_promo_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] #7 + ReusedExchange [d_date_sk] #7 InputAdapter BroadcastExchange #12 WholeStageCodegen (18) @@ -131,9 +131,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] @@ -160,9 +160,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter ReusedExchange [i_item_sk] #5 InputAdapter - ReusedExchange [d_date_sk] #6 + ReusedExchange [p_promo_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] #7 + ReusedExchange [d_date_sk] #7 InputAdapter BroadcastExchange #16 WholeStageCodegen (28) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt index ae0b996ec28be..83ec6391d7736 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt @@ -20,15 +20,15 @@ TakeOrderedAndProject (36) : : : +- Scan parquet default.customer_address (4) : : +- BroadcastExchange (21) : : +- * Project (20) - : : +- * BroadcastHashJoin Inner BuildLeft (19) - : : :- BroadcastExchange (15) - : : : +- * Project (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.income_band (11) - : : +- * Filter (18) - : : +- * ColumnarToRow (17) - : : +- Scan parquet default.household_demographics (16) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.household_demographics (11) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.income_band (14) : +- * Filter (27) : +- * ColumnarToRow (26) : +- Scan parquet default.customer_demographics (25) @@ -82,63 +82,63 @@ Join condition: None Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] -(11) Scan parquet default.income_band -Output [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] +(11) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] Batched: true -Location [not included in comparison]/{warehouse_dir}/income_band] -PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct -(12) ColumnarToRow [codegen id : 2] -Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] +(12) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] -(13) Filter [codegen id : 2] -Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] -Condition : ((((isnotnull(ib_lower_bound#11) AND isnotnull(ib_upper_bound#12)) AND (ib_lower_bound#11 >= 38128)) AND (ib_upper_bound#12 <= 88128)) AND isnotnull(ib_income_band_sk#10)) +(13) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) -(14) Project [codegen id : 2] -Output [1]: [ib_income_band_sk#10] -Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12] +(14) Scan parquet default.income_band +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct -(15) BroadcastExchange -Input [1]: [ib_income_band_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +(15) ColumnarToRow [codegen id : 2] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] -(16) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#14, hd_income_band_sk#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] -ReadSchema: struct +(16) Filter [codegen id : 2] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) -(17) ColumnarToRow -Input [2]: [hd_demo_sk#14, hd_income_band_sk#15] +(17) Project [codegen id : 2] +Output [1]: [ib_income_band_sk#12] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] -(18) Filter -Input [2]: [hd_demo_sk#14, hd_income_band_sk#15] -Condition : (isnotnull(hd_demo_sk#14) AND isnotnull(hd_income_band_sk#15)) +(18) BroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] (19) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ib_income_band_sk#10] -Right keys [1]: [hd_income_band_sk#15] +Left keys [1]: [hd_income_band_sk#11] +Right keys [1]: [ib_income_band_sk#12] Join condition: None (20) Project [codegen id : 3] -Output [1]: [hd_demo_sk#14] -Input [3]: [ib_income_band_sk#10, hd_demo_sk#14, hd_income_band_sk#15] +Output [1]: [hd_demo_sk#10] +Input [3]: [hd_demo_sk#10, hd_income_band_sk#11, ib_income_band_sk#12] (21) BroadcastExchange -Input [1]: [hd_demo_sk#14] +Input [1]: [hd_demo_sk#10] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] (22) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#14] +Right keys [1]: [hd_demo_sk#10] Join condition: None (23) Project [codegen id : 4] Output [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#14] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#10] (24) BroadcastExchange Input [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt index 1fbc57ee7e47a..16087526bc130 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt @@ -30,7 +30,11 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] BroadcastExchange #4 WholeStageCodegen (3) Project [hd_demo_sk] - BroadcastHashJoin [ib_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen (2) @@ -39,10 +43,6 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] ColumnarToRow InputAdapter Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] - Filter [hd_demo_sk,hd_income_band_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] Filter [cd_demo_sk] ColumnarToRow InputAdapter diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt index ee550f1af4947..7c3f00d33f24e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt @@ -12,30 +12,30 @@ TakeOrderedAndProject (57) : :- * Project (31) : : +- * BroadcastHashJoin Inner BuildRight (30) : : :- * Project (25) - : : : +- * BroadcastHashJoin Inner BuildLeft (24) - : : : :- BroadcastExchange (5) - : : : : +- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.date_dim (1) - : : : +- * Project (23) - : : : +- * SortMergeJoin Inner (22) - : : : :- * Sort (16) - : : : : +- Exchange (15) - : : : : +- * Project (14) - : : : : +- * BroadcastHashJoin Inner BuildRight (13) - : : : : :- * Filter (8) - : : : : : +- * ColumnarToRow (7) - : : : : : +- Scan parquet default.web_sales (6) - : : : : +- BroadcastExchange (12) - : : : : +- * Filter (11) - : : : : +- * ColumnarToRow (10) - : : : : +- Scan parquet default.web_page (9) - : : : +- * Sort (21) - : : : +- Exchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.web_returns (17) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Project (18) + : : : : +- * SortMergeJoin Inner (17) + : : : : :- * Sort (11) + : : : : : +- Exchange (10) + : : : : : +- * Project (9) + : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.web_sales (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.web_page (4) + : : : : +- * Sort (16) + : : : : +- Exchange (15) + : : : : +- * Filter (14) + : : : : +- * ColumnarToRow (13) + : : : : +- Scan parquet default.web_returns (12) + : : : +- BroadcastExchange (23) + : : : +- * Project (22) + : : : +- * Filter (21) + : : : +- * ColumnarToRow (20) + : : : +- Scan parquet default.date_dim (19) : : +- BroadcastExchange (29) : : +- * Filter (28) : : +- * ColumnarToRow (27) @@ -48,126 +48,126 @@ TakeOrderedAndProject (57) +- * Sort (51) +- Exchange (50) +- * Project (49) - +- * BroadcastHashJoin Inner BuildRight (48) - :- * Filter (43) - : +- * ColumnarToRow (42) - : +- Scan parquet default.customer_demographics (41) - +- BroadcastExchange (47) - +- * Filter (46) - +- * ColumnarToRow (45) - +- Scan parquet default.customer_demographics (44) - - -(1) Scan parquet default.date_dim -Output [2]: [d_date_sk#1, d_year#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] - -(3) Filter [codegen id : 1] -Input [2]: [d_date_sk#1, d_year#2] -Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 1] -Output [1]: [d_date_sk#1] -Input [2]: [d_date_sk#1, d_year#2] - -(5) BroadcastExchange -Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] - -(6) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] + +- * BroadcastHashJoin Inner BuildLeft (48) + :- BroadcastExchange (44) + : +- * Filter (43) + : +- * ColumnarToRow (42) + : +- Scan parquet default.customer_demographics (41) + +- * Filter (47) + +- * ColumnarToRow (46) + +- Scan parquet default.customer_demographics (45) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] ReadSchema: struct -(7) ColumnarToRow [codegen id : 3] -Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] +(2) ColumnarToRow [codegen id : 2] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] -(8) Filter [codegen id : 3] -Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] -Condition : (((((isnotnull(ws_item_sk#5) AND isnotnull(ws_order_number#7)) AND isnotnull(ws_web_page_sk#6)) AND isnotnull(ws_sold_date_sk#4)) AND ((((ws_sales_price#9 >= 100.00) AND (ws_sales_price#9 <= 150.00)) OR ((ws_sales_price#9 >= 50.00) AND (ws_sales_price#9 <= 100.00))) OR ((ws_sales_price#9 >= 150.00) AND (ws_sales_price#9 <= 200.00)))) AND ((((ws_net_profit#10 >= 100.00) AND (ws_net_profit#10 <= 200.00)) OR ((ws_net_profit#10 >= 150.00) AND (ws_net_profit#10 <= 300.00))) OR ((ws_net_profit#10 >= 50.00) AND (ws_net_profit#10 <= 250.00)))) +(3) Filter [codegen id : 2] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Condition : (((((isnotnull(ws_item_sk#2) AND isnotnull(ws_order_number#4)) AND isnotnull(ws_web_page_sk#3)) AND isnotnull(ws_sold_date_sk#1)) AND ((((ws_sales_price#6 >= 100.00) AND (ws_sales_price#6 <= 150.00)) OR ((ws_sales_price#6 >= 50.00) AND (ws_sales_price#6 <= 100.00))) OR ((ws_sales_price#6 >= 150.00) AND (ws_sales_price#6 <= 200.00)))) AND ((((ws_net_profit#7 >= 100.00) AND (ws_net_profit#7 <= 200.00)) OR ((ws_net_profit#7 >= 150.00) AND (ws_net_profit#7 <= 300.00))) OR ((ws_net_profit#7 >= 50.00) AND (ws_net_profit#7 <= 250.00)))) -(9) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#11] +(4) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#8] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct -(10) ColumnarToRow [codegen id : 2] -Input [1]: [wp_web_page_sk#11] +(5) ColumnarToRow [codegen id : 1] +Input [1]: [wp_web_page_sk#8] -(11) Filter [codegen id : 2] -Input [1]: [wp_web_page_sk#11] -Condition : isnotnull(wp_web_page_sk#11) +(6) Filter [codegen id : 1] +Input [1]: [wp_web_page_sk#8] +Condition : isnotnull(wp_web_page_sk#8) -(12) BroadcastExchange -Input [1]: [wp_web_page_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +(7) BroadcastExchange +Input [1]: [wp_web_page_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] -(13) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_web_page_sk#6] -Right keys [1]: [wp_web_page_sk#11] +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#8] Join condition: None -(14) Project [codegen id : 3] -Output [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] -Input [8]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wp_web_page_sk#11] +(9) Project [codegen id : 2] +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Input [8]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wp_web_page_sk#8] -(15) Exchange -Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] -Arguments: hashpartitioning(cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint), 5), true, [id=#13] +(10) Exchange +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Arguments: hashpartitioning(cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint), 5), true, [id=#10] -(16) Sort [codegen id : 4] -Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10] -Arguments: [cast(ws_item_sk#5 as bigint) ASC NULLS FIRST, cast(ws_order_number#7 as bigint) ASC NULLS FIRST], false, 0 +(11) Sort [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Arguments: [cast(ws_item_sk#2 as bigint) ASC NULLS FIRST, cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0 -(17) Scan parquet default.web_returns -Output [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +(12) Scan parquet default.web_returns +Output [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] ReadSchema: struct -(18) ColumnarToRow [codegen id : 5] -Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +(13) ColumnarToRow [codegen id : 4] +Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] -(19) Filter [codegen id : 5] -Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] -Condition : (((((isnotnull(wr_item_sk#14) AND isnotnull(wr_order_number#19)) AND isnotnull(wr_refunded_cdemo_sk#15)) AND isnotnull(wr_returning_cdemo_sk#17)) AND isnotnull(wr_refunded_addr_sk#16)) AND isnotnull(wr_reason_sk#18)) +(14) Filter [codegen id : 4] +Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] +Condition : (((((isnotnull(wr_item_sk#11) AND isnotnull(wr_order_number#16)) AND isnotnull(wr_refunded_cdemo_sk#12)) AND isnotnull(wr_returning_cdemo_sk#14)) AND isnotnull(wr_refunded_addr_sk#13)) AND isnotnull(wr_reason_sk#15)) -(20) Exchange -Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] -Arguments: hashpartitioning(wr_item_sk#14, wr_order_number#19, 5), true, [id=#22] +(15) Exchange +Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] +Arguments: hashpartitioning(wr_item_sk#11, wr_order_number#16, 5), true, [id=#19] -(21) Sort [codegen id : 6] -Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] -Arguments: [wr_item_sk#14 ASC NULLS FIRST, wr_order_number#19 ASC NULLS FIRST], false, 0 +(16) Sort [codegen id : 5] +Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] +Arguments: [wr_item_sk#11 ASC NULLS FIRST, wr_order_number#16 ASC NULLS FIRST], false, 0 -(22) SortMergeJoin -Left keys [2]: [cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint)] -Right keys [2]: [wr_item_sk#14, wr_order_number#19] +(17) SortMergeJoin [codegen id : 9] +Left keys [2]: [cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint)] +Right keys [2]: [wr_item_sk#11, wr_order_number#16] Join condition: None -(23) Project -Output [10]: [ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] -Input [14]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21] +(18) Project [codegen id : 9] +Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18] +Input [14]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18] + +(19) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_year#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#20, d_year#21] + +(21) Filter [codegen id : 6] +Input [2]: [d_date_sk#20, d_year#21] +Condition : ((isnotnull(d_year#21) AND (d_year#21 = 2000)) AND isnotnull(d_date_sk#20)) + +(22) Project [codegen id : 6] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_year#21] + +(23) BroadcastExchange +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] (24) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ws_sold_date_sk#4] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#20] Join condition: None (25) Project [codegen id : 9] -Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] -Input [11]: [d_date_sk#1, ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21] +Output [9]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18] +Input [11]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18, d_date_sk#20] (26) Scan parquet default.reason Output [2]: [r_reason_sk#23, r_reason_desc#24] @@ -188,13 +188,13 @@ Input [2]: [r_reason_sk#23, r_reason_desc#24] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] (30) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [wr_reason_sk#18] +Left keys [1]: [wr_reason_sk#15] Right keys [1]: [cast(r_reason_sk#23 as bigint)] Join condition: None (31) Project [codegen id : 9] -Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] -Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21, r_reason_sk#23, r_reason_desc#24] +Output [9]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] +Input [11]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18, r_reason_sk#23, r_reason_desc#24] (32) Scan parquet default.customer_address Output [3]: [ca_address_sk#26, ca_state#27, ca_country#28] @@ -219,84 +219,84 @@ Input [2]: [ca_address_sk#26, ca_state#27] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] (37) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [wr_refunded_addr_sk#16] +Left keys [1]: [wr_refunded_addr_sk#13] Right keys [1]: [cast(ca_address_sk#26 as bigint)] -Join condition: ((((ca_state#27 IN (IN,OH,NJ) AND (ws_net_profit#10 >= 100.00)) AND (ws_net_profit#10 <= 200.00)) OR ((ca_state#27 IN (WI,CT,KY) AND (ws_net_profit#10 >= 150.00)) AND (ws_net_profit#10 <= 300.00))) OR ((ca_state#27 IN (LA,IA,AR) AND (ws_net_profit#10 >= 50.00)) AND (ws_net_profit#10 <= 250.00))) +Join condition: ((((ca_state#27 IN (IN,OH,NJ) AND (ws_net_profit#7 >= 100.00)) AND (ws_net_profit#7 <= 200.00)) OR ((ca_state#27 IN (WI,CT,KY) AND (ws_net_profit#7 >= 150.00)) AND (ws_net_profit#7 <= 300.00))) OR ((ca_state#27 IN (LA,IA,AR) AND (ws_net_profit#7 >= 50.00)) AND (ws_net_profit#7 <= 250.00))) (38) Project [codegen id : 9] -Output [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] -Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, ca_address_sk#26, ca_state#27] +Output [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] +Input [11]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24, ca_address_sk#26, ca_state#27] (39) Exchange -Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] -Arguments: hashpartitioning(wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, 5), true, [id=#30] +Input [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] +Arguments: hashpartitioning(wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, 5), true, [id=#30] (40) Sort [codegen id : 10] -Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] -Arguments: [wr_refunded_cdemo_sk#15 ASC NULLS FIRST, wr_returning_cdemo_sk#17 ASC NULLS FIRST], false, 0 +Input [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] +Arguments: [wr_refunded_cdemo_sk#12 ASC NULLS FIRST, wr_returning_cdemo_sk#14 ASC NULLS FIRST], false, 0 (41) Scan parquet default.customer_demographics Output [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] ReadSchema: struct -(42) ColumnarToRow [codegen id : 12] +(42) ColumnarToRow [codegen id : 11] +Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] + +(43) Filter [codegen id : 11] Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] +Condition : (((isnotnull(cd_demo_sk#31) AND isnotnull(cd_marital_status#32)) AND isnotnull(cd_education_status#33)) AND ((((cd_marital_status#32 = M) AND (cd_education_status#33 = Advanced Degree)) OR ((cd_marital_status#32 = S) AND (cd_education_status#33 = College))) OR ((cd_marital_status#32 = W) AND (cd_education_status#33 = 2 yr Degree)))) -(43) Filter [codegen id : 12] +(44) BroadcastExchange Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33] -Condition : ((isnotnull(cd_demo_sk#31) AND isnotnull(cd_marital_status#32)) AND isnotnull(cd_education_status#33)) +Arguments: HashedRelationBroadcastMode(List(input[1, string, false], input[2, string, false]),false), [id=#34] -(44) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +(45) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] ReadSchema: struct -(45) ColumnarToRow [codegen id : 11] -Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] - -(46) Filter [codegen id : 11] -Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] -Condition : (((isnotnull(cd_demo_sk#34) AND isnotnull(cd_marital_status#35)) AND isnotnull(cd_education_status#36)) AND ((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) OR ((cd_marital_status#35 = S) AND (cd_education_status#36 = College))) OR ((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree)))) +(46) ColumnarToRow +Input [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37] -(47) BroadcastExchange -Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] -Arguments: HashedRelationBroadcastMode(List(input[1, string, false], input[2, string, false]),false), [id=#37] +(47) Filter +Input [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37] +Condition : ((isnotnull(cd_demo_sk#35) AND isnotnull(cd_marital_status#36)) AND isnotnull(cd_education_status#37)) (48) BroadcastHashJoin [codegen id : 12] Left keys [2]: [cd_marital_status#32, cd_education_status#33] -Right keys [2]: [cd_marital_status#35, cd_education_status#36] +Right keys [2]: [cd_marital_status#36, cd_education_status#37] Join condition: None (49) Project [codegen id : 12] -Output [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] -Input [6]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Output [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35] +Input [6]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35, cd_marital_status#36, cd_education_status#37] (50) Exchange -Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] -Arguments: hashpartitioning(cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint), 5), true, [id=#38] +Input [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35] +Arguments: hashpartitioning(cast(cd_demo_sk#31 as bigint), cast(cd_demo_sk#35 as bigint), 5), true, [id=#38] (51) Sort [codegen id : 13] -Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] -Arguments: [cast(cd_demo_sk#34 as bigint) ASC NULLS FIRST, cast(cd_demo_sk#31 as bigint) ASC NULLS FIRST], false, 0 +Input [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35] +Arguments: [cast(cd_demo_sk#31 as bigint) ASC NULLS FIRST, cast(cd_demo_sk#35 as bigint) ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 14] -Left keys [2]: [wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17] -Right keys [2]: [cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint)] -Join condition: ((((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) AND (ws_sales_price#9 >= 100.00)) AND (ws_sales_price#9 <= 150.00)) OR ((((cd_marital_status#35 = S) AND (cd_education_status#36 = College)) AND (ws_sales_price#9 >= 50.00)) AND (ws_sales_price#9 <= 100.00))) OR ((((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree)) AND (ws_sales_price#9 >= 150.00)) AND (ws_sales_price#9 <= 200.00))) +Left keys [2]: [wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14] +Right keys [2]: [cast(cd_demo_sk#31 as bigint), cast(cd_demo_sk#35 as bigint)] +Join condition: ((((((cd_marital_status#32 = M) AND (cd_education_status#33 = Advanced Degree)) AND (ws_sales_price#6 >= 100.00)) AND (ws_sales_price#6 <= 150.00)) OR ((((cd_marital_status#32 = S) AND (cd_education_status#33 = College)) AND (ws_sales_price#6 >= 50.00)) AND (ws_sales_price#6 <= 100.00))) OR ((((cd_marital_status#32 = W) AND (cd_education_status#33 = 2 yr Degree)) AND (ws_sales_price#6 >= 150.00)) AND (ws_sales_price#6 <= 200.00))) (53) Project [codegen id : 14] -Output [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] -Input [11]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36] +Output [4]: [ws_quantity#5, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] +Input [11]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24, cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35] (54) HashAggregate [codegen id : 14] -Input [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24] +Input [4]: [ws_quantity#5, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24] Keys [1]: [r_reason_desc#24] -Functions [3]: [partial_avg(cast(ws_quantity#8 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#21)), partial_avg(UnscaledValue(wr_fee#20))] +Functions [3]: [partial_avg(cast(ws_quantity#5 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#18)), partial_avg(UnscaledValue(wr_fee#17))] Aggregate Attributes [6]: [sum#39, count#40, sum#41, count#42, sum#43, count#44] Results [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50] @@ -307,9 +307,9 @@ Arguments: hashpartitioning(r_reason_desc#24, 5), true, [id=#51] (56) HashAggregate [codegen id : 15] Input [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50] Keys [1]: [r_reason_desc#24] -Functions [3]: [avg(cast(ws_quantity#8 as bigint)), avg(UnscaledValue(wr_refunded_cash#21)), avg(UnscaledValue(wr_fee#20))] -Aggregate Attributes [3]: [avg(cast(ws_quantity#8 as bigint))#52, avg(UnscaledValue(wr_refunded_cash#21))#53, avg(UnscaledValue(wr_fee#20))#54] -Results [5]: [substr(r_reason_desc#24, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(cast(ws_quantity#8 as bigint))#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#21))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#20))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58, avg(cast(ws_quantity#8 as bigint))#52 AS aggOrder#59] +Functions [3]: [avg(cast(ws_quantity#5 as bigint)), avg(UnscaledValue(wr_refunded_cash#18)), avg(UnscaledValue(wr_fee#17))] +Aggregate Attributes [3]: [avg(cast(ws_quantity#5 as bigint))#52, avg(UnscaledValue(wr_refunded_cash#18))#53, avg(UnscaledValue(wr_fee#17))#54] +Results [5]: [substr(r_reason_desc#24, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(cast(ws_quantity#5 as bigint))#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#18))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#17))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58, avg(cast(ws_quantity#5 as bigint))#52 AS aggOrder#59] (57) TakeOrderedAndProject Input [5]: [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58, aggOrder#59] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt index e7aee17172e60..3fa7d84f55966 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt @@ -18,23 +18,15 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas Project [ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_fee,wr_refunded_cash,r_reason_desc] BroadcastHashJoin [wr_reason_sk,r_reason_sk] Project [ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] InputAdapter - WholeStageCodegen (4) + WholeStageCodegen (3) Sort [ws_item_sk,ws_order_number] InputAdapter - Exchange [ws_item_sk,ws_order_number] #4 - WholeStageCodegen (3) + Exchange [ws_item_sk,ws_order_number] #3 + WholeStageCodegen (2) Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit] BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk,ws_sales_price,ws_net_profit] @@ -42,22 +34,30 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit] InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) + BroadcastExchange #4 + WholeStageCodegen (1) Filter [wp_web_page_sk] ColumnarToRow InputAdapter Scan parquet default.web_page [wp_web_page_sk] InputAdapter - WholeStageCodegen (6) + WholeStageCodegen (5) Sort [wr_item_sk,wr_order_number] InputAdapter - Exchange [wr_item_sk,wr_order_number] #6 - WholeStageCodegen (5) + Exchange [wr_item_sk,wr_order_number] #5 + WholeStageCodegen (4) Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] ColumnarToRow InputAdapter Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 WholeStageCodegen (7) @@ -79,12 +79,8 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas InputAdapter Exchange [cd_demo_sk,cd_demo_sk] #9 WholeStageCodegen (12) - Project [cd_demo_sk,cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_marital_status,cd_education_status,cd_demo_sk] BroadcastHashJoin [cd_marital_status,cd_education_status,cd_marital_status,cd_education_status] - Filter [cd_demo_sk,cd_marital_status,cd_education_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #10 WholeStageCodegen (11) @@ -92,3 +88,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas ColumnarToRow InputAdapter Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt index 69b02557c4750..4e85516b594f7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt @@ -13,14 +13,14 @@ : : :- * Project (16) : : : +- * BroadcastHashJoin Inner BuildRight (15) : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildLeft (8) - : : : : :- BroadcastExchange (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer_demographics (1) - : : : : +- * Filter (7) - : : : : +- * ColumnarToRow (6) - : : : : +- Scan parquet default.customer (5) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_demographics (4) : : : +- BroadcastExchange (14) : : : +- * Project (13) : : : +- * Filter (12) @@ -33,61 +33,61 @@ : : +- Scan parquet default.customer_address (17) : +- BroadcastExchange (34) : +- * Project (33) - : +- * BroadcastHashJoin Inner BuildLeft (32) - : :- BroadcastExchange (28) - : : +- * Project (27) - : : +- * Filter (26) - : : +- * ColumnarToRow (25) - : : +- Scan parquet default.date_dim (24) - : +- * Filter (31) - : +- * ColumnarToRow (30) - : +- Scan parquet default.catalog_returns (29) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet default.catalog_returns (24) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.date_dim (27) +- BroadcastExchange (40) +- * Filter (39) +- * ColumnarToRow (38) +- Scan parquet default.call_center (37) -(1) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct -(3) Filter [codegen id : 1] -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] -Condition : ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Unknown)) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = Advanced Degree))) AND isnotnull(cd_demo_sk#1)) +(2) ColumnarToRow [codegen id : 7] +Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] -(4) BroadcastExchange -Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4] +(3) Filter [codegen id : 7] +Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] +Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#4)) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) -(5) Scan parquet default.customer -Output [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] +(4) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] -(6) ColumnarToRow -Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] +(6) Filter [codegen id : 1] +Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] +Condition : ((((cd_marital_status#6 = M) AND (cd_education_status#7 = Unknown)) OR ((cd_marital_status#6 = W) AND (cd_education_status#7 = Advanced Degree))) AND isnotnull(cd_demo_sk#5)) -(7) Filter -Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] -Condition : (((isnotnull(c_customer_sk#5) AND isnotnull(c_current_addr_sk#8)) AND isnotnull(c_current_cdemo_sk#6)) AND isnotnull(c_current_hdemo_sk#7)) +(7) BroadcastExchange +Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] (8) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cd_demo_sk#1] -Right keys [1]: [c_current_cdemo_sk#6] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#5] Join condition: None (9) Project [codegen id : 7] -Output [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8] -Input [7]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8] +Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7] +Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] (10) Scan parquet default.household_demographics Output [2]: [hd_demo_sk#9, hd_buy_potential#10] @@ -112,13 +112,13 @@ Input [1]: [hd_demo_sk#9] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] (15) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_hdemo_sk#7] +Left keys [1]: [c_current_hdemo_sk#3] Right keys [1]: [hd_demo_sk#9] Join condition: None (16) Project [codegen id : 7] -Output [4]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8] -Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8, hd_demo_sk#9] +Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7] +Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#9] (17) Scan parquet default.customer_address Output [2]: [ca_address_sk#12, ca_gmt_offset#13] @@ -143,71 +143,71 @@ Input [1]: [ca_address_sk#12] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#8] +Left keys [1]: [c_current_addr_sk#4] Right keys [1]: [ca_address_sk#12] Join condition: None (23) Project [codegen id : 7] -Output [3]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5] -Input [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8, ca_address_sk#12] +Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7] +Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#12] -(24) Scan parquet default.date_dim -Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +(24) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct -(25) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +(25) ColumnarToRow [codegen id : 5] +Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] -(26) Filter [codegen id : 4] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] -Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15)) +(26) Filter [codegen id : 5] +Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] +Condition : ((isnotnull(cr_call_center_sk#17) AND isnotnull(cr_returned_date_sk#15)) AND isnotnull(cr_returning_customer_sk#16)) -(27) Project [codegen id : 4] -Output [1]: [d_date_sk#15] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +(27) Scan parquet default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct -(28) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +(28) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] -(29) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] -ReadSchema: struct +(29) Filter [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : ((((isnotnull(d_year#20) AND isnotnull(d_moy#21)) AND (d_year#20 = 1998)) AND (d_moy#21 = 11)) AND isnotnull(d_date_sk#19)) -(30) ColumnarToRow -Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +(30) Project [codegen id : 4] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] -(31) Filter -Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] -Condition : ((isnotnull(cr_call_center_sk#21) AND isnotnull(cr_returned_date_sk#19)) AND isnotnull(cr_returning_customer_sk#20)) +(31) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] (32) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [d_date_sk#15] -Right keys [1]: [cr_returned_date_sk#19] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#19] Join condition: None (33) Project [codegen id : 5] -Output [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] -Input [5]: [d_date_sk#15, cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Output [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] +Input [5]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18, d_date_sk#19] (34) BroadcastExchange -Input [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Input [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] (35) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_customer_sk#5] -Right keys [1]: [cr_returning_customer_sk#20] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cr_returning_customer_sk#16] Join condition: None (36) Project [codegen id : 7] -Output [4]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22] -Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22] +Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18] +Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] (37) Scan parquet default.call_center Output [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] @@ -228,31 +228,31 @@ Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#2 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] (41) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cr_call_center_sk#21] +Left keys [1]: [cr_call_center_sk#17] Right keys [1]: [cc_call_center_sk#24] Join condition: None (42) Project [codegen id : 7] -Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3] -Input [8]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] +Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7] +Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] (43) HashAggregate [codegen id : 7] -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3] -Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3] -Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#22))] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#18))] Aggregate Attributes [1]: [sum#29] -Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] +Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] (44) Exchange -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] -Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, 5), true, [id=#31] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] +Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, 5), true, [id=#31] (45) HashAggregate [codegen id : 8] -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30] -Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3] -Functions [1]: [sum(UnscaledValue(cr_net_loss#22))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#22))#32] -Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#22))#32,17,2) AS Returns_Loss#36] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7] +Functions [1]: [sum(UnscaledValue(cr_net_loss#18))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#18))#32] +Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#18))#32,17,2) AS Returns_Loss#36] (46) Exchange Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt index f64791821893d..87beb3b565cc1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt @@ -12,12 +12,16 @@ WholeStageCodegen (9) BroadcastHashJoin [cr_call_center_sk,cc_call_center_sk] Project [cd_marital_status,cd_education_status,cr_call_center_sk,cr_net_loss] BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] - Project [cd_marital_status,cd_education_status,c_customer_sk] + Project [c_customer_sk,cd_marital_status,cd_education_status] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [cd_marital_status,cd_education_status,c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk,cd_marital_status,cd_education_status] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cd_marital_status,cd_education_status,c_customer_sk,c_current_hdemo_sk,c_current_addr_sk] - BroadcastHashJoin [cd_demo_sk,c_current_cdemo_sk] + Project [c_customer_sk,c_current_hdemo_sk,c_current_addr_sk,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen (1) @@ -25,10 +29,6 @@ WholeStageCodegen (9) ColumnarToRow InputAdapter Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen (2) @@ -49,7 +49,11 @@ WholeStageCodegen (9) BroadcastExchange #6 WholeStageCodegen (5) Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] - BroadcastHashJoin [d_date_sk,cr_returned_date_sk] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] InputAdapter BroadcastExchange #7 WholeStageCodegen (4) @@ -58,10 +62,6 @@ WholeStageCodegen (9) ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] InputAdapter BroadcastExchange #8 WholeStageCodegen (6) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt index c547e7af5d790..34eba382992c3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt @@ -10,15 +10,15 @@ TakeOrderedAndProject (32) : :- * Project (16) : : +- * BroadcastHashJoin Inner BuildRight (15) : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : :- BroadcastExchange (5) - : : : : +- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.date_dim (1) - : : : +- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.catalog_sales (6) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) : : +- BroadcastExchange (14) : : +- * Filter (13) : : +- * ColumnarToRow (12) @@ -33,50 +33,50 @@ TakeOrderedAndProject (32) +- Scan parquet default.warehouse (23) -(1) Scan parquet default.date_dim -Output [2]: [d_date_sk#1, d_month_seq#2] +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#1, d_month_seq#2] +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] -(3) Filter [codegen id : 1] -Input [2]: [d_date_sk#1, d_month_seq#2] -Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1)) +(3) Filter [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#5) AND isnotnull(cs_ship_mode_sk#4)) AND isnotnull(cs_call_center_sk#3)) AND isnotnull(cs_ship_date_sk#2)) -(4) Project [codegen id : 1] -Output [1]: [d_date_sk#1] -Input [2]: [d_date_sk#1, d_month_seq#2] +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_month_seq#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_month_seq#7] -(6) Scan parquet default.catalog_sales -Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_month_seq#7] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) -(7) ColumnarToRow -Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_month_seq#7] -(8) Filter -Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] -Condition : (((isnotnull(cs_warehouse_sk#8) AND isnotnull(cs_ship_mode_sk#7)) AND isnotnull(cs_call_center_sk#6)) AND isnotnull(cs_ship_date_sk#5)) +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] (9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [cs_ship_date_sk#5] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#6] Join condition: None (10) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] -Input [6]: [d_date_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, d_date_sk#6] (11) Scan parquet default.ship_mode Output [2]: [sm_ship_mode_sk#9, sm_type#10] @@ -97,13 +97,13 @@ Input [2]: [sm_ship_mode_sk#9, sm_type#10] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] (15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_ship_mode_sk#7] +Left keys [1]: [cs_ship_mode_sk#4] Right keys [1]: [sm_ship_mode_sk#9] Join condition: None (16) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10] -Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8, sm_ship_mode_sk#9, sm_type#10] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_warehouse_sk#5, sm_type#10] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, sm_ship_mode_sk#9, sm_type#10] (17) Scan parquet default.call_center Output [2]: [cc_call_center_sk#12, cc_name#13] @@ -124,13 +124,13 @@ Input [2]: [cc_call_center_sk#12, cc_name#13] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_call_center_sk#6] +Left keys [1]: [cs_call_center_sk#3] Right keys [1]: [cc_call_center_sk#12] Join condition: None (22) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13] -Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10, cc_call_center_sk#12, cc_name#13] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_warehouse_sk#5, sm_type#10, cc_call_center_sk#12, cc_name#13] (23) Scan parquet default.warehouse Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] @@ -151,18 +151,18 @@ Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_warehouse_sk#8] +Left keys [1]: [cs_warehouse_sk#5] Right keys [1]: [w_warehouse_sk#15] Join condition: None (28) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13] -Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16] (29) HashAggregate [codegen id : 5] -Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13] Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] @@ -173,9 +173,9 @@ Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, c (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt index de3b1913ae25c..b25b16136992c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt @@ -12,7 +12,11 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days , Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_warehouse_sk,sm_type] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk] - BroadcastHashJoin [d_date_sk,cs_ship_date_sk] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) @@ -21,10 +25,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days , ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_month_seq] - Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt index ab246a3449557..1b9e8f37e9418 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt @@ -11,30 +11,30 @@ TakeOrderedAndProject (50) : +- Exchange (27) : +- * Project (26) : +- * BroadcastHashJoin Inner BuildRight (25) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) : : :- * Filter (3) : : : +- * ColumnarToRow (2) : : : +- Scan parquet default.store_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * BroadcastHashJoin LeftOuter BuildRight (14) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * HashAggregate (12) + : : +- Exchange (11) + : : +- * HashAggregate (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.item (7) : +- BroadcastExchange (24) : +- * Project (23) : +- * Filter (22) - : +- * BroadcastHashJoin LeftOuter BuildRight (21) - : :- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.item (11) - : +- BroadcastExchange (20) - : +- * HashAggregate (19) - : +- Exchange (18) - : +- * HashAggregate (17) - : +- * Filter (16) - : +- * ColumnarToRow (15) - : +- Scan parquet default.item (14) + : +- * ColumnarToRow (21) + : +- Scan parquet default.date_dim (20) +- * Sort (42) +- Exchange (41) +- * Project (40) @@ -65,112 +65,112 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] -Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 5] -Output [2]: [ss_item_sk#2, ss_customer_sk#3] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] - -(11) Scan parquet default.item -Output [3]: [i_item_sk#9, i_current_price#10, i_category#11] +(4) Scan parquet default.item +Output [3]: [i_item_sk#4, i_current_price#5, i_category#6] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] ReadSchema: struct -(12) ColumnarToRow [codegen id : 4] -Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] +(5) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#4, i_current_price#5, i_category#6] -(13) Filter [codegen id : 4] -Input [3]: [i_item_sk#9, i_current_price#10, i_category#11] -Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9)) +(6) Filter [codegen id : 3] +Input [3]: [i_item_sk#4, i_current_price#5, i_category#6] +Condition : (isnotnull(i_current_price#5) AND isnotnull(i_item_sk#4)) -(14) Scan parquet default.item -Output [2]: [i_current_price#10, i_category#11] +(7) Scan parquet default.item +Output [2]: [i_current_price#5, i_category#6] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category)] ReadSchema: struct -(15) ColumnarToRow [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] - -(16) Filter [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] -Condition : isnotnull(i_category#11) - -(17) HashAggregate [codegen id : 2] -Input [2]: [i_current_price#10, i_category#11] -Keys [1]: [i_category#11] -Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))] -Aggregate Attributes [2]: [sum#12, count#13] -Results [3]: [i_category#11, sum#14, count#15] - -(18) Exchange -Input [3]: [i_category#11, sum#14, count#15] -Arguments: hashpartitioning(i_category#11, 5), true, [id=#16] - -(19) HashAggregate [codegen id : 3] -Input [3]: [i_category#11, sum#14, count#15] -Keys [1]: [i_category#11] -Functions [1]: [avg(UnscaledValue(i_current_price#10))] -Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17] -Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19] - -(20) BroadcastExchange -Input [2]: [avg(i_current_price)#18, i_category#11#19] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_category#11] -Right keys [1]: [i_category#11#19] +(8) ColumnarToRow [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] + +(9) Filter [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] +Condition : isnotnull(i_category#6) + +(10) HashAggregate [codegen id : 1] +Input [2]: [i_current_price#5, i_category#6] +Keys [1]: [i_category#6] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#5))] +Aggregate Attributes [2]: [sum#7, count#8] +Results [3]: [i_category#6, sum#9, count#10] + +(11) Exchange +Input [3]: [i_category#6, sum#9, count#10] +Arguments: hashpartitioning(i_category#6, 5), true, [id=#11] + +(12) HashAggregate [codegen id : 2] +Input [3]: [i_category#6, sum#9, count#10] +Keys [1]: [i_category#6] +Functions [1]: [avg(UnscaledValue(i_current_price#5))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#5))#12] +Results [2]: [cast((avg(UnscaledValue(i_current_price#5))#12 / 100.0) as decimal(11,6)) AS avg(i_current_price)#13, i_category#6 AS i_category#6#14] + +(13) BroadcastExchange +Input [2]: [avg(i_current_price)#13, i_category#6#14] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#15] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_category#6] +Right keys [1]: [i_category#6#14] +Join condition: None + +(15) Filter [codegen id : 3] +Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14] +Condition : (cast(i_current_price#5 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#13)), DecimalType(14,7), true)) + +(16) Project [codegen id : 3] +Output [1]: [i_item_sk#4] +Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14] + +(17) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(18) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] Join condition: None +(19) Project [codegen id : 5] +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, i_item_sk#4] + +(20) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_month_seq#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#17, d_month_seq#18] + (22) Filter [codegen id : 4] -Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] -Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true)) +Input [2]: [d_date_sk#17, d_month_seq#18] +Condition : ((isnotnull(d_month_seq#18) AND (d_month_seq#18 = Subquery scalar-subquery#19, [id=#20])) AND isnotnull(d_date_sk#17)) (23) Project [codegen id : 4] -Output [1]: [i_item_sk#9] -Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_month_seq#18] (24) BroadcastExchange -Input [1]: [i_item_sk#9] +Input [1]: [d_date_sk#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] (25) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 5] Output [1]: [ss_customer_sk#3] -Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9] +Input [3]: [ss_sold_date_sk#1, ss_customer_sk#3, d_date_sk#17] (27) Exchange Input [1]: [ss_customer_sk#3] @@ -282,7 +282,7 @@ Arguments: 100, [cnt#35 ASC NULLS FIRST, ca_state#24 ASC NULLS FIRST], [state#34 ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery scalar-subquery#19, [id=#20] * HashAggregate (57) +- Exchange (56) +- * HashAggregate (55) @@ -293,39 +293,39 @@ Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery (51) Scan parquet default.date_dim -Output [3]: [d_month_seq#5, d_year#37, d_moy#38] +Output [3]: [d_month_seq#18, d_year#37, d_moy#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] ReadSchema: struct (52) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] (53) Filter [codegen id : 1] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1)) (54) Project [codegen id : 1] -Output [1]: [d_month_seq#5] -Input [3]: [d_month_seq#5, d_year#37, d_moy#38] +Output [1]: [d_month_seq#18] +Input [3]: [d_month_seq#18, d_year#37, d_moy#38] (55) HashAggregate [codegen id : 1] -Input [1]: [d_month_seq#5] -Keys [1]: [d_month_seq#5] +Input [1]: [d_month_seq#18] +Keys [1]: [d_month_seq#18] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#5] +Results [1]: [d_month_seq#18] (56) Exchange -Input [1]: [d_month_seq#5] -Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39] +Input [1]: [d_month_seq#18] +Arguments: hashpartitioning(d_month_seq#18, 5), true, [id=#39] (57) HashAggregate [codegen id : 2] -Input [1]: [d_month_seq#5] -Keys [1]: [d_month_seq#5] +Input [1]: [d_month_seq#18] +Keys [1]: [d_month_seq#18] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#5] +Results [1]: [d_month_seq#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt index 2700741b82c04..3cbd44fc5a7d9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt @@ -16,55 +16,55 @@ TakeOrderedAndProject [cnt,ca_state,state] Exchange [ss_customer_sk] #2 WholeStageCodegen (5) Project [ss_customer_sk] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - Subquery #1 - WholeStageCodegen (2) - HashAggregate [d_month_seq] + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_current_price,avg(i_current_price)] + BroadcastHashJoin [i_category,i_category] + Filter [i_current_price,i_item_sk] + ColumnarToRow InputAdapter - Exchange [d_month_seq] #4 - WholeStageCodegen (1) - HashAggregate [d_month_seq] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow + Scan parquet default.item [i_item_sk,i_current_price,i_category] InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (2) + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] + InputAdapter + Exchange [i_category] #5 + WholeStageCodegen (1) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_category] InputAdapter - BroadcastExchange #5 + BroadcastExchange #6 WholeStageCodegen (4) - Project [i_item_sk] - Filter [i_current_price,avg(i_current_price)] - BroadcastHashJoin [i_category,i_category] - Filter [i_current_price,i_item_sk] - ColumnarToRow + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_category] + Exchange [d_month_seq] #7 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] - InputAdapter - Exchange [i_category] #7 - WholeStageCodegen (2) - HashAggregate [i_category,i_current_price] [sum,count,sum,count] - Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_current_price,i_category] + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter WholeStageCodegen (12) Sort [c_customer_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt index c2627bd7e4cc9..a7f328537b7ac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt @@ -24,15 +24,15 @@ TakeOrderedAndProject (79) : : : : : :- * Project (17) : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) : : : : : : :- * Project (10) - : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) - : : : : : : : :- BroadcastExchange (5) - : : : : : : : : +- * Project (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.household_demographics (1) - : : : : : : : +- * Filter (8) - : : : : : : : +- * ColumnarToRow (7) - : : : : : : : +- Scan parquet default.catalog_sales (6) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : : :- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : +- BroadcastExchange (8) + : : : : : : : +- * Project (7) + : : : : : : : +- * Filter (6) + : : : : : : : +- * ColumnarToRow (5) + : : : : : : : +- Scan parquet default.household_demographics (4) : : : : : : +- BroadcastExchange (15) : : : : : : +- * Project (14) : : : : : : +- * Filter (13) @@ -49,26 +49,26 @@ TakeOrderedAndProject (79) : : : : +- Scan parquet default.item (26) : : : +- BroadcastExchange (43) : : : +- * Project (42) - : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.date_dim (33) - : : : +- BroadcastExchange (40) - : : : +- * Project (39) - : : : +- * Filter (38) - : : : +- * ColumnarToRow (37) - : : : +- Scan parquet default.date_dim (36) + : : : +- * BroadcastHashJoin Inner BuildLeft (41) + : : : :- BroadcastExchange (37) + : : : : +- * Project (36) + : : : : +- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.date_dim (33) + : : : +- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet default.date_dim (38) : : +- * Sort (58) : : +- Exchange (57) : : +- * Project (56) - : : +- * BroadcastHashJoin Inner BuildLeft (55) - : : :- BroadcastExchange (51) - : : : +- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.warehouse (48) - : : +- * Filter (54) - : : +- * ColumnarToRow (53) - : : +- Scan parquet default.inventory (52) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.inventory (48) + : : +- BroadcastExchange (54) + : : +- * Filter (53) + : : +- * ColumnarToRow (52) + : : +- Scan parquet default.warehouse (51) : +- BroadcastExchange (64) : +- * Filter (63) : +- * ColumnarToRow (62) @@ -80,50 +80,50 @@ TakeOrderedAndProject (79) +- Scan parquet default.catalog_returns (69) -(1) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#1, hd_buy_potential#2] +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +(2) ColumnarToRow [codegen id : 4] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] -(3) Filter [codegen id : 1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] -Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = 1001-5000)) AND isnotnull(hd_demo_sk#1)) +(3) Filter [codegen id : 4] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) -(4) Project [codegen id : 1] -Output [1]: [hd_demo_sk#1] -Input [2]: [hd_demo_sk#1, hd_buy_potential#2] +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#9, hd_buy_potential#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)] +ReadSchema: struct -(5) BroadcastExchange -Input [1]: [hd_demo_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -(6) Scan parquet default.catalog_sales -Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] -ReadSchema: struct +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +Condition : ((isnotnull(hd_buy_potential#10) AND (hd_buy_potential#10 = 1001-5000)) AND isnotnull(hd_demo_sk#9)) -(7) ColumnarToRow -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#9] +Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -(8) Filter -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5)) +(8) BroadcastExchange +Input [1]: [hd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [hd_demo_sk#1] -Right keys [1]: [cs_bill_hdemo_sk#7] +Left keys [1]: [cs_bill_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#9] Join condition: None (10) Project [codegen id : 4] -Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] +Output [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Input [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, hd_demo_sk#9] (11) Scan parquet default.customer_demographics Output [2]: [cd_demo_sk#12, cd_marital_status#13] @@ -148,13 +148,13 @@ Input [1]: [cd_demo_sk#12] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] (16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_bill_cdemo_sk#6] +Left keys [1]: [cs_bill_cdemo_sk#3] Right keys [1]: [cd_demo_sk#12] Join condition: None (17) Project [codegen id : 4] -Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11] -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12] +Output [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, cd_demo_sk#12] (18) Scan parquet default.date_dim Output [2]: [d_date_sk#15, d_date#16] @@ -175,21 +175,21 @@ Input [2]: [d_date_sk#15, d_date#16] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_ship_date_sk#5] +Left keys [1]: [cs_ship_date_sk#2] Right keys [1]: [d_date_sk#15] Join condition: None (23) Project [codegen id : 4] -Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16] +Output [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date_sk#15, d_date#16] (24) Exchange -Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18] +Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18] (25) Sort [codegen id : 5] -Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16] -Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0 +Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] +Arguments: [cs_item_sk#5 ASC NULLS FIRST], false, 0 (26) Scan parquet default.item Output [2]: [i_item_sk#19, i_item_desc#20] @@ -214,137 +214,137 @@ Input [2]: [i_item_sk#19, i_item_desc#20] Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 10] -Left keys [1]: [cs_item_sk#8] +Left keys [1]: [cs_item_sk#5] Right keys [1]: [i_item_sk#19] Join condition: None (32) Project [codegen id : 10] -Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20] -Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20] +Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20] (33) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_week_seq#23] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] -ReadSchema: struct +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct -(34) ColumnarToRow [codegen id : 9] -Input [2]: [d_date_sk#22, d_week_seq#23] +(34) ColumnarToRow [codegen id : 8] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -(35) Filter [codegen id : 9] -Input [2]: [d_date_sk#22, d_week_seq#23] -Condition : (isnotnull(d_week_seq#23) AND isnotnull(d_date_sk#22)) +(35) Filter [codegen id : 8] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) -(36) Scan parquet default.date_dim -Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] -ReadSchema: struct +(36) Project [codegen id : 8] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -(37) ColumnarToRow [codegen id : 8] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +(37) BroadcastExchange +Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26] -(38) Filter [codegen id : 8] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] -Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 2001)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25)) +(38) Scan parquet default.date_dim +Output [2]: [d_date_sk#27, d_week_seq#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct -(39) Project [codegen id : 8] -Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26] -Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27] +(39) ColumnarToRow +Input [2]: [d_date_sk#27, d_week_seq#28] -(40) BroadcastExchange -Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28] +(40) Filter +Input [2]: [d_date_sk#27, d_week_seq#28] +Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27)) (41) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_week_seq#23] -Right keys [1]: [d_week_seq#26] +Left keys [1]: [d_week_seq#24] +Right keys [1]: [d_week_seq#28] Join condition: None (42) Project [codegen id : 9] -Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] -Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28] (43) BroadcastExchange -Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] (44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#4] -Right keys [1]: [d_date_sk#24] -Join condition: (d_date#16 > d_date#25 + 5 days) +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#22] +Join condition: (d_date#16 > d_date#23 + 5 days) (45) Project [codegen id : 10] -Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26] +Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] (46) Exchange -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30] (47) Sort [codegen id : 11] -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26] -Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0 +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0 -(48) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +(48) Scan parquet default.inventory +Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 12] -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct -(50) Filter [codegen id : 12] -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] -Condition : isnotnull(w_warehouse_sk#31) +(49) ColumnarToRow [codegen id : 13] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] -(51) BroadcastExchange -Input [2]: [w_warehouse_sk#31, w_warehouse_name#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] +(50) Filter [codegen id : 13] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] +Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31)) -(52) Scan parquet default.inventory -Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +(51) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 12] +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] -(53) ColumnarToRow -Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +(53) Filter [codegen id : 12] +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Condition : isnotnull(w_warehouse_sk#35) -(54) Filter -Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] -Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34)) +(54) BroadcastExchange +Input [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] (55) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [w_warehouse_sk#31] -Right keys [1]: [inv_warehouse_sk#36] +Left keys [1]: [inv_warehouse_sk#33] +Right keys [1]: [w_warehouse_sk#35] Join condition: None (56) Project [codegen id : 13] -Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37] +Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] (57) Exchange -Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38] +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38] (58) Sort [codegen id : 14] -Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] -Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0 +Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 (59) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#8, d_date_sk#22] -Right keys [2]: [inv_item_sk#35, inv_date_sk#34] -Join condition: (inv_quantity_on_hand#37 < cs_quantity#11) +Left keys [2]: [cs_item_sk#5, d_date_sk#27] +Right keys [2]: [inv_item_sk#32, inv_date_sk#31] +Join condition: (inv_quantity_on_hand#34 < cs_quantity#8) (60) Project [codegen id : 16] -Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] (61) Scan parquet default.promotion Output [1]: [p_promo_sk#39] @@ -365,21 +365,21 @@ Input [1]: [p_promo_sk#39] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] (65) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [cs_promo_sk#9] +Left keys [1]: [cs_promo_sk#6] Right keys [1]: [p_promo_sk#39] Join condition: None (66) Project [codegen id : 16] -Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39] (67) Exchange -Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41] (68) Sort [codegen id : 17] -Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0 +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0 (69) Scan parquet default.catalog_returns Output [2]: [cr_item_sk#42, cr_order_number#43] @@ -404,33 +404,33 @@ Input [2]: [cr_item_sk#42, cr_order_number#43] Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0 (74) SortMergeJoin -Left keys [2]: [cs_item_sk#8, cs_order_number#10] +Left keys [2]: [cs_item_sk#5, cs_order_number#7] Right keys [2]: [cr_item_sk#42, cr_order_number#43] Join condition: None (75) Project [codegen id : 20] -Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43] +Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43] (76) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26] -Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#45] -Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] +Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] (77) Exchange -Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] -Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47] (78) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46] -Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#48] -Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] +Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] (79) TakeOrderedAndProject -Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] -Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51] +Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt index 39dba3af02359..918508787c4b0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt @@ -23,7 +23,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom InputAdapter Exchange [cs_item_sk,d_date_sk] #3 WholeStageCodegen (10) - Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_date_sk,d_week_seq] + Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date] Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] SortMergeJoin [cs_item_sk,i_item_sk] @@ -38,7 +38,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [hd_demo_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter BroadcastExchange #5 WholeStageCodegen (1) @@ -47,10 +51,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] - Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter BroadcastExchange #6 WholeStageCodegen (2) @@ -79,12 +79,8 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom InputAdapter BroadcastExchange #9 WholeStageCodegen (9) - Project [d_date_sk,d_date_sk,d_date,d_week_seq] + Project [d_date_sk,d_date,d_week_seq,d_date_sk] BroadcastHashJoin [d_week_seq,d_week_seq] - Filter [d_week_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #10 WholeStageCodegen (8) @@ -93,14 +89,22 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter WholeStageCodegen (14) Sort [inv_item_sk,inv_date_sk] InputAdapter Exchange [inv_item_sk,inv_date_sk] #11 WholeStageCodegen (13) - Project [w_warehouse_name,inv_date_sk,inv_item_sk,inv_quantity_on_hand] - BroadcastHashJoin [w_warehouse_sk,inv_warehouse_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #12 WholeStageCodegen (12) @@ -108,10 +112,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom ColumnarToRow InputAdapter Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #13 WholeStageCodegen (15) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt index e6210f4a26281..025e5a6f94741 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt @@ -44,12 +44,12 @@ TakeOrderedAndProject (125) : : : : : +- * Project (23) : : : : : +- * Filter (22) : : : : : +- * ColumnarToRow (21) - : : : : : +- Scan parquet default.date_dim (20) + : : : : : +- Scan parquet default.promotion (20) : : : : +- BroadcastExchange (31) : : : : +- * Project (30) : : : : +- * Filter (29) : : : : +- * ColumnarToRow (28) - : : : : +- Scan parquet default.promotion (27) + : : : : +- Scan parquet default.date_dim (27) : : : +- BroadcastExchange (37) : : : +- * Filter (36) : : : +- * ColumnarToRow (35) @@ -210,67 +210,67 @@ Join condition: None Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] -(20) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] +(20) Scan parquet default.promotion +Output [2]: [p_promo_sk#17, p_channel_tv#18] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct (21) ColumnarToRow [codegen id : 6] -Input [2]: [d_date_sk#17, d_date#18] +Input [2]: [p_promo_sk#17, p_channel_tv#18] (22) Filter [codegen id : 6] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10442)) AND (d_date#18 <= 10472)) AND isnotnull(d_date_sk#17)) +Input [2]: [p_promo_sk#17, p_channel_tv#18] +Condition : ((isnotnull(p_channel_tv#18) AND (p_channel_tv#18 = N)) AND isnotnull(p_promo_sk#17)) (23) Project [codegen id : 6] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_date#18] +Output [1]: [p_promo_sk#17] +Input [2]: [p_promo_sk#17, p_channel_tv#18] (24) BroadcastExchange -Input [1]: [d_date_sk#17] +Input [1]: [p_promo_sk#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] (25) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#17] Join condition: None (26) Project [codegen id : 9] -Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] -Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17] +Output [6]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#17] -(27) Scan parquet default.promotion -Output [2]: [p_promo_sk#20, p_channel_tv#21] +(27) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_date#21] Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [p_promo_sk#20, p_channel_tv#21] +Input [2]: [d_date_sk#20, d_date#21] (29) Filter [codegen id : 7] -Input [2]: [p_promo_sk#20, p_channel_tv#21] -Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20)) +Input [2]: [d_date_sk#20, d_date#21] +Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 10442)) AND (d_date#21 <= 10472)) AND isnotnull(d_date_sk#20)) (30) Project [codegen id : 7] -Output [1]: [p_promo_sk#20] -Input [2]: [p_promo_sk#20, p_channel_tv#21] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_date#21] (31) BroadcastExchange -Input [1]: [p_promo_sk#20] +Input [1]: [d_date_sk#20] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#20] Join condition: None (33) Project [codegen id : 9] Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12] -Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20] +Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#20] (34) Scan parquet default.store Output [2]: [s_store_sk#23, s_store_id#24] @@ -383,28 +383,28 @@ Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_s Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14] (58) ReusedExchange [Reuses operator id: 24] -Output [1]: [d_date_sk#17] +Output [1]: [p_promo_sk#17] (59) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#45] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [cs_promo_sk#48] +Right keys [1]: [p_promo_sk#17] Join condition: None (60) Project [codegen id : 19] -Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] -Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17] +Output [6]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] +Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#17] (61) ReusedExchange [Reuses operator id: 31] -Output [1]: [p_promo_sk#20] +Output [1]: [d_date_sk#20] (62) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#48] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#20] Join condition: None (63) Project [codegen id : 19] Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56] -Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20] +Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#20] (64) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59] @@ -517,28 +517,28 @@ Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14] (88) ReusedExchange [Reuses operator id: 24] -Output [1]: [d_date_sk#17] +Output [1]: [p_promo_sk#17] (89) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#80] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [ws_promo_sk#83] +Right keys [1]: [p_promo_sk#17] Join condition: None (90) Project [codegen id : 29] -Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] -Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17] +Output [6]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] +Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#17] (91) ReusedExchange [Reuses operator id: 31] -Output [1]: [p_promo_sk#20] +Output [1]: [d_date_sk#20] (92) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#83] -Right keys [1]: [p_promo_sk#20] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#20] Join condition: None (93) Project [codegen id : 29] Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91] -Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20] +Input [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#20] (94) Scan parquet default.web_site Output [2]: [web_site_sk#93, web_site_id#94] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt index 13781c8bd5993..ad59968740aaa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt @@ -32,9 +32,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] @@ -69,19 +69,19 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #8 WholeStageCodegen (6) - Project [d_date_sk] - Filter [d_date,d_date_sk] + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.promotion [p_promo_sk,p_channel_tv] InputAdapter BroadcastExchange #9 WholeStageCodegen (7) - Project [p_promo_sk] - Filter [p_channel_tv,p_promo_sk] + Project [d_date_sk] + Filter [d_date,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_tv] + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #10 WholeStageCodegen (8) @@ -98,9 +98,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] BroadcastHashJoin [cs_item_sk,i_item_sk] Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] @@ -127,9 +127,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter ReusedExchange [i_item_sk] #7 InputAdapter - ReusedExchange [d_date_sk] #8 + ReusedExchange [p_promo_sk] #8 InputAdapter - ReusedExchange [p_promo_sk] #9 + ReusedExchange [d_date_sk] #9 InputAdapter BroadcastExchange #14 WholeStageCodegen (18) @@ -146,9 +146,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] @@ -175,9 +175,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter ReusedExchange [i_item_sk] #7 InputAdapter - ReusedExchange [d_date_sk] #8 + ReusedExchange [p_promo_sk] #8 InputAdapter - ReusedExchange [p_promo_sk] #9 + ReusedExchange [d_date_sk] #9 InputAdapter BroadcastExchange #18 WholeStageCodegen (28) From 3309a2be071f2d3f6122f3634aea998d6fa53876 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 18 Sep 2020 13:56:19 -0700 Subject: [PATCH 0070/1009] [SPARK-32635][SQL][FOLLOW-UP] Add a new test case in catalyst module ### What changes were proposed in this pull request? This is a follow-up PR to https://github.com/apache/spark/pull/29771 and just adds a new test case. ### Why are the changes needed? To have better test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New UT. Closes #29802 from peter-toth/SPARK-32635-fix-foldable-propagation-followup. Authored-by: Peter Toth Signed-off-by: Dongjoon Hyun --- .../optimizer/FoldablePropagationSuite.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala index 0d48ecb31cfa4..59dfd3a7932bd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala @@ -180,4 +180,16 @@ class FoldablePropagationSuite extends PlanTest { .select((Literal(1) + 3).as('res)).analyze comparePlans(optimized, correctAnswer) } + + test("SPARK-32635: Replace references with foldables coming only from the node's children") { + val leftExpression = 'a.int + val left = LocalRelation(leftExpression).select('a) + val rightExpression = Alias(Literal(2), "a")(leftExpression.exprId) + val right = LocalRelation('b.int).select('b, rightExpression).select('b) + val join = left.join(right, joinType = LeftOuter, condition = Some('b === 'a)) + + val query = join.analyze + val optimized = Optimize.execute(query) + comparePlans(optimized, query) + } } From f1dc479d39a6f05df7155008d8ec26dff42bb06c Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Fri, 18 Sep 2020 14:02:14 -0700 Subject: [PATCH 0071/1009] [SPARK-32898][CORE] Fix wrong executorRunTime when task killed before real start ### What changes were proposed in this pull request? Only calculate the executorRunTime when taskStartTimeNs > 0. Otherwise, set executorRunTime to 0. ### Why are the changes needed? bug fix. It's possible that a task be killed (e.g., by another successful attempt) before it reaches "taskStartTimeNs = System.nanoTime()". In this case, taskStartTimeNs is still 0 since it hasn't been really initialized. And we will get the wrong executorRunTime by calculating System.nanoTime() - taskStartTimeNs. ### Does this PR introduce _any_ user-facing change? Yes, users will see the correct executorRunTime. ### How was this patch tested? Pass existing tests. Closes #29789 from Ngone51/fix-SPARK-32898. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/executor/Executor.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 54b50e6d2fa4a..27addd8fc12e2 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -400,7 +400,9 @@ private[spark] class Executor( // Report executor runtime and JVM gc time Option(task).foreach(t => { t.metrics.setExecutorRunTime(TimeUnit.NANOSECONDS.toMillis( - System.nanoTime() - taskStartTimeNs)) + // SPARK-32898: it's possible that a task is killed when taskStartTimeNs has the initial + // value(=0) still. In this case, the executorRunTime should be considered as 0. + if (taskStartTimeNs > 0) System.nanoTime() - taskStartTimeNs else 0)) t.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime) }) From f893a19c4cf62dd13bf179de75af6feb677c4154 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Sun, 20 Sep 2020 10:58:17 +0900 Subject: [PATCH 0072/1009] [SPARK-32180][PYTHON][DOCS][FOLLOW-UP] Rephrase and add some more information in installation guide ### What changes were proposed in this pull request? This PR: - rephrases some wordings in installation guide to avoid using the terms that can be potentially ambiguous such as "different favors" - documents extra dependency installation `pip install pyspark[sql]` - uses the link that corresponds to the released version. e.g.) https://spark.apache.org/docs/latest/building-spark.html vs https://spark.apache.org/docs/3.0.0/building-spark.html - adds some more details I built it on Read the Docs to make it easier to review: https://hyukjin-spark.readthedocs.io/en/stable/getting_started/install.html ### Why are the changes needed? To improve installation guide. ### Does this PR introduce _any_ user-facing change? Yes, it updates the user-facing installation guide. ### How was this patch tested? Manually built the doc and tested. Closes #29779 from HyukjinKwon/SPARK-32180. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/docs/source/conf.py | 6 +- python/docs/source/getting_started/index.rst | 2 +- .../docs/source/getting_started/install.rst | 138 ++++++++++++++++++ .../source/getting_started/installation.rst | 114 --------------- python/setup.py | 3 + 5 files changed, 147 insertions(+), 116 deletions(-) create mode 100644 python/docs/source/getting_started/install.rst delete mode 100644 python/docs/source/getting_started/installation.rst diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py index 738765a576290..9d87bbe27df2a 100644 --- a/python/docs/source/conf.py +++ b/python/docs/source/conf.py @@ -57,7 +57,11 @@ .. _binder: https://mybinder.org/v2/gh/apache/spark/{0}?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart.ipynb .. |examples| replace:: Examples .. _examples: https://github.com/apache/spark/tree/{0}/examples/src/main/python -""".format(os.environ.get("RELEASE_TAG", "master")) +.. |downloading| replace:: Downloading +.. _downloading: https://spark.apache.org/docs/{1}/building-spark.html +.. |building_spark| replace:: Building Spark +.. _building_spark: https://spark.apache.org/docs/{1}/#downloading +""".format(os.environ.get("RELEASE_TAG", "master"), os.environ.get('RELEASE_VERSION', "latest")) # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst index 0f3cea7d6ea58..9fa3352ae27d8 100644 --- a/python/docs/source/getting_started/index.rst +++ b/python/docs/source/getting_started/index.rst @@ -25,5 +25,5 @@ This page summarizes the basic steps required to setup and get started with PySp .. toctree:: :maxdepth: 2 - installation + install quickstart diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst new file mode 100644 index 0000000000000..03570e6626d90 --- /dev/null +++ b/python/docs/source/getting_started/install.rst @@ -0,0 +1,138 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +============ +Installation +============ + +PySpark is included in the official releases of Spark available in the `Apache Spark website `_. +For Python users, PySpark also provides ``pip`` installation from PyPI. This is usually for local usage or as +a client to connect to a cluster instead of setting up a cluster itself. + +This page includes instructions for installing PySpark by using pip, Conda, downloading manually, +and building from the source. + + +Python Version Supported +------------------------ + +Python 3.6 and above. + + +Using PyPI +---------- + +PySpark installation using `PyPI `_ is as follows: + +.. code-block:: bash + + pip install pyspark + +If you want to install extra dependencies for a specific componenet, you can install it as below: + +.. code-block:: bash + + pip install pyspark[sql] + + +Using Conda +----------- + +Conda is an open-source package management and environment management system which is a part of +the `Anaconda `_ distribution. It is both cross-platform and +language agnostic. In practice, Conda can replace both `pip `_ and +`virtualenv `_. + +Create new virtual environment from your terminal as shown below: + +.. code-block:: bash + + conda create -n pyspark_env + +After the virtual environment is created, it should be visible under the list of Conda environments +which can be seen using the following command: + +.. code-block:: bash + + conda env list + +Now activate the newly created environment with the following command: + +.. code-block:: bash + + conda activate pyspark_env + +You can install pyspark by `Using PyPI <#using-pypi>`_ to install PySpark in the newly created +environment, for example as below. It will install PySpark under the new virtual environemnt +``pyspark_env`` created above. + +.. code-block:: bash + + pip install pyspark + +Alternatively, you can install PySpark from Conda itself as below: + +.. code-block:: bash + + conda install pyspark + +However, note that `PySpark at Conda `_ is not necessarily +synced with PySpark release cycle because it is maintained by the community separately. + + +Manually Downloading +-------------------- + +PySpark is included in the distributions available at the `Apache Spark website `_. +You can download a distribution you want from the site. After that, uncompress the tar file into the directoy where you want +to install Spark, for example, as below: + +.. code-block:: bash + + tar xzvf spark-3.0.0-bin-hadoop2.7.tgz + +Ensure the ``SPARK_HOME`` environment variable points to the directory where the tar file has been extracted. +Update ``PYTHONPATH`` environment variable such that it can find the PySpark and Py4J under ``SPARK_HOME/python/lib``. +One example of doing this is shown below: + +.. code-block:: bash + + cd spark-3.0.0-bin-hadoop2.7 + export SPARK_HOME=`pwd` + export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH + + +Installing from Source +---------------------- + +To install PySpark from source, refer to |building_spark|_. + + +Dependencies +------------ +============= ========================= ================ +Package Minimum supported version Note +============= ========================= ================ +`pandas` 0.23.2 Optional for SQL +`NumPy` 1.7 Required for ML +`pyarrow` 0.15.1 Optional for SQL +`Py4J` 0.10.9 Required +============= ========================= ================ + +Note that PySpark requires Java 8 or later with ``JAVA_HOME`` properly set. +If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer +to |downloading|_. diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst deleted file mode 100644 index 914045e898b2d..0000000000000 --- a/python/docs/source/getting_started/installation.rst +++ /dev/null @@ -1,114 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -============ -Installation -============ - -Official releases are available from the `Apache Spark website `_. -Alternatively, you can install it via ``pip`` from PyPI. PyPI installation is usually for standalone -locally or as a client to connect to a cluster instead of setting a cluster up. - -This page includes the instructions for installing PySpark by using pip, Conda, downloading manually, and building it from the source. - -Python Version Supported ------------------------- - -Python 3.6 and above. - -Using PyPI ----------- - -PySpark installation using `PyPI `_ - -.. code-block:: bash - - pip install pyspark - -Using Conda ------------ - -Conda is an open-source package management and environment management system which is a part of the `Anaconda `_ distribution. It is both cross-platform and language agnostic. - -Conda can be used to create a virtual environment from terminal as shown below: - -.. code-block:: bash - - conda create -n pyspark_env - -After the virtual environment is created, it should be visible under the list of Conda environments which can be seen using the following command: - -.. code-block:: bash - - conda env list - -The newly created environment can be accessed using the following command: - -.. code-block:: bash - - conda activate pyspark_env - -In Conda version earlier than 4.4, the following command should be used: - -.. code-block:: bash - - source activate pyspark_env - -Refer to `Using PyPI <#using-pypi>`_ to install PySpark in the newly created environment. - -Note that `PySpark at Conda `_ is available but not necessarily synced with PySpark release cycle because it is maintained by the community separately. - -Official Release Channel ------------------------- - -Different flavors of PySpark are available in the `Apache Spark website `_. -Any suitable version can be downloaded and extracted as below: - -.. code-block:: bash - - tar xzvf spark-3.0.0-bin-hadoop2.7.tgz - -Ensure the `SPARK_HOME` environment variable points to the directory where the code has been extracted. -Define `PYTHONPATH` such that it can find the PySpark and Py4J under `SPARK_HOME/python/lib`. -One example of doing this is shown below: - -.. code-block:: bash - - cd spark-3.0.0-bin-hadoop2.7 - export SPARK_HOME=`pwd` - export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH - -Installing from Source ----------------------- - -To install PySpark from source, refer to `Building Spark `_. - -Refer to `steps above <#official-release-channel>`_ to define ``PYTHONPATH``. - -Dependencies ------------- -============= ========================= ================ -Package Minimum supported version Note -============= ========================= ================ -`pandas` 0.23.2 Optional for SQL -`NumPy` 1.7 Required for ML -`pyarrow` 0.15.1 Optional for SQL -`Py4J` 0.10.9 Required -============= ========================= ================ - -**Note**: PySpark requires Java 8 or later with ``JAVA_HOME`` properly set. -If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer to `Downloading `_ \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index b4cc24a6d239f..7fac7b3138486 100755 --- a/python/setup.py +++ b/python/setup.py @@ -99,6 +99,7 @@ def _supports_symlinks(): # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications. +# Also don't forget to update python/docs/source/getting_started/install.rst. _minimum_pandas_version = "0.23.2" _minimum_pyarrow_version = "1.0.0" @@ -203,6 +204,8 @@ def _supports_symlinks(): 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, scripts=scripts, license='http://www.apache.org/licenses/LICENSE-2.0', + # Don't forget to update python/docs/source/getting_started/install.rst + # if you're updating the versions or dependencies. install_requires=['py4j==0.10.9'], extras_require={ 'ml': ['numpy>=1.7'], From 7fb9f6884f5e085e97b60fe45055247c2d17245c Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 21 Sep 2020 09:39:34 +0900 Subject: [PATCH 0073/1009] [SPARK-32799][R][SQL] Add allowMissingColumns to SparkR unionByName ### What changes were proposed in this pull request? Add optional `allowMissingColumns` argument to SparkR `unionByName`. ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? `unionByName` supports `allowMissingColumns`. ### How was this patch tested? Existing unit tests. New unit tests targeting this feature. Closes #29813 from zero323/SPARK-32799. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/R/DataFrame.R | 14 ++++++++++++-- R/pkg/R/generics.R | 2 +- R/pkg/tests/fulltests/test_sparkSQL.R | 13 +++++++++++++ python/pyspark/sql/dataframe.py | 9 ++++----- .../main/scala/org/apache/spark/sql/Dataset.scala | 8 ++++---- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 089e1f26b7d3b..2ce53782d9af0 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2863,11 +2863,18 @@ setMethod("unionAll", #' \code{UNION ALL} and \code{UNION DISTINCT} in SQL as column positions are not taken #' into account. Input SparkDataFrames can have different data types in the schema. #' +#' When the parameter allowMissingColumns is `TRUE`, the set of column names +#' in x and y can differ; missing columns will be filled as null. +#' Further, the missing columns of x will be added at the end +#' in the schema of the union result. +#' #' Note: This does not remove duplicate rows across the two SparkDataFrames. #' This function resolves columns by name (not by position). #' #' @param x A SparkDataFrame #' @param y A SparkDataFrame +#' @param allowMissingColumns logical +#' @param ... further arguments to be passed to or from other methods. #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions #' @rdname unionByName @@ -2880,12 +2887,15 @@ setMethod("unionAll", #' df1 <- select(createDataFrame(mtcars), "carb", "am", "gear") #' df2 <- select(createDataFrame(mtcars), "am", "gear", "carb") #' head(unionByName(df1, df2)) +#' +#' df3 <- select(createDataFrame(mtcars), "carb") +#' head(unionByName(df1, df3, allowMissingColumns = TRUE)) #' } #' @note unionByName since 2.3.0 setMethod("unionByName", signature(x = "SparkDataFrame", y = "SparkDataFrame"), - function(x, y) { - unioned <- callJMethod(x@sdf, "unionByName", y@sdf) + function(x, y, allowMissingColumns=FALSE) { + unioned <- callJMethod(x@sdf, "unionByName", y@sdf, allowMissingColumns) dataFrame(unioned) }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 839c00cf21aeb..a6a71666ae588 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -638,7 +638,7 @@ setGeneric("union", function(x, y) { standardGeneric("union") }) setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") }) #' @rdname unionByName -setGeneric("unionByName", function(x, y) { standardGeneric("unionByName") }) +setGeneric("unionByName", function(x, y, ...) { standardGeneric("unionByName") }) #' @rdname unpersist setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index e008bc5bbd7d9..5008d3005b5b1 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2696,6 +2696,19 @@ test_that("union(), unionByName(), rbind(), except(), and intersect() on a DataF expect_error(rbind(df, df2, df3), "Names of input data frames are different.") + + df4 <- unionByName(df2, select(df2, "age"), TRUE) + + expect_equal( + sum(collect( + select(df4, alias(isNull(df4$name), "missing_name") + ))$missing_name), + 3 + ) + + testthat::expect_error(unionByName(df2, select(df2, "age"), FALSE)) + testthat::expect_error(unionByName(df2, select(df2, "age"))) + excepted <- arrange(except(df, df2), desc(df$age)) expect_is(unioned, "SparkDataFrame") expect_equal(count(excepted), 2) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index db2ddde00c881..94a7df33f335e 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1569,11 +1569,10 @@ def unionByName(self, other, allowMissingColumns=False): | 6| 4| 5| +----+----+----+ - When the parameter `allowMissingColumns` is ``True``, - this function allows different set of column names between two :class:`DataFrame`\\s. - Missing columns at each side, will be filled with null values. - The missing columns at left :class:`DataFrame` will be added at the end in the schema - of the union result: + When the parameter `allowMissingColumns` is ``True``, the set of column names + in this and other :class:`DataFrame` can differ; missing columns will be filled with null. + Further, the missing columns of this :class:`DataFrame` will be added at the end + in the schema of the union result: >>> df1 = spark.createDataFrame([[1, 2, 3]], ["col0", "col1", "col2"]) >>> df2 = spark.createDataFrame([[4, 5, 6]], ["col1", "col2", "col3"]) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 4cb923d94cc55..87b9aea80c823 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2038,10 +2038,10 @@ class Dataset[T] private[sql]( * The difference between this function and [[union]] is that this function * resolves columns by name (not by position). * - * When the parameter `allowMissingColumns` is true, this function allows different set - * of column names between two Datasets. Missing columns at each side, will be filled with - * null values. The missing columns at left Dataset will be added at the end in the schema - * of the union result: + * When the parameter `allowMissingColumns` is `true`, the set of column names + * in this and other `Dataset` can differ; missing columns will be filled with null. + * Further, the missing columns of this `Dataset` will be added at the end + * in the schema of the union result: * * {{{ * val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2") From 9c653c957f5cd9237cc2ad0a5bc28ead2dab75cb Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 21 Sep 2020 12:29:17 +0900 Subject: [PATCH 0074/1009] [SPARK-32189][DOCS][PYTHON] Development - Setting up IDEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to document the way of setting up IDEs ![스크린샷 2020-09-21 오전 10 43 12](https://user-images.githubusercontent.com/44108233/93727715-5c2a6e80-fbf7-11ea-821b-555723b00bc8.png) ![스크린샷 2020-09-21 오전 10 43 45](https://user-images.githubusercontent.com/44108233/93727716-5f255f00-fbf7-11ea-9c6c-7b8a973bc511.png) ### Why are the changes needed? To let users know how to setup IDEs ### Does this PR introduce _any_ user-facing change? Yes, it adds a new page in the documentation about setting IDEs. ### How was this patch tested? Manually built the doc. Closes #29781 from itholic/SPARK-32189. Authored-by: itholic Signed-off-by: HyukjinKwon --- docs/img/pycharm-with-pyspark1.png | Bin 0 -> 160166 bytes docs/img/pycharm-with-pyspark2.png | Bin 0 -> 84813 bytes docs/img/pycharm-with-pyspark3.png | Bin 0 -> 15981 bytes python/docs/source/development/debugging.rst | 2 +- python/docs/source/development/index.rst | 1 + .../docs/source/development/setting_ide.rst | 62 ++++++++++++++++++ 6 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 docs/img/pycharm-with-pyspark1.png create mode 100644 docs/img/pycharm-with-pyspark2.png create mode 100644 docs/img/pycharm-with-pyspark3.png create mode 100644 python/docs/source/development/setting_ide.rst diff --git a/docs/img/pycharm-with-pyspark1.png b/docs/img/pycharm-with-pyspark1.png new file mode 100644 index 0000000000000000000000000000000000000000..6e2c0bc02d2b76c1a80db842f1e85840ee095d8b GIT binary patch literal 160166 zcmcG#byQnT)aVVxy_DhU#3Z1z1S5q)4DRFtk`wz|@7#5rnEDdrsw-}#fzwMbxYih1y(3*O!d7otq z?LAs`@OgcPLlJI!In5{uR&ITp%DR++93LuQ@%|1c&P^EXf)Sp-Au9OaA+#h{(SC<^I7lQOt>q$gzMtQH1%J;Gg_YBHtC~BL|uWV2fC?c{G2UKJ(6WWLyNH0Pe zLc!R+t{615Jp)f0hKGl9)|s(0U&067E@eM{9?o?whP?(wiA#kiG41_yQ^8$TVUM6dn_g zGma1!baUM2FTq7yxD;$N*jJoHJI z=`7}$Y_YMam>&t0bjHnw4AI}_C&)4GM~|uLrF>Ja)b>b!?bI{}@6$v7i11xQ8;%;A%%HWqJtu1nYPzAqzZ+}R{Fl4JB_=ufu4 zxI&@LB;be7(d0^@Ho*p%cn92)A)K3wx;)1YCl07-`}03_)+PLZ5xu;hrPL3=MWLlc z>5}GaxC5e4jiLOG$gN4YMtvjaQ{|1O!!(rnOXcPn=F^~Od8h<=c=$n-)tEP3lu#}p z8t5s0&l~=CD0nF3*l)-Z0_99tbupse;4I)bgy2?_)1U~fvj`JrblW)78a$crQgJ31 z$CSvUYs2haXQT+B$Hq#0$ySJZAl;wv+JcDZrP?4JZWzONbQa3@A+ibhdbkxq@AE$X zc(O<|`<2BoFeN^ar&M_VwG4-(vfCF~N%51amz}wuK6k%#<`<_^4RKq4()4r{ zJGEEqoVW=`lE^sZtViP94v3DSj>{tYp3XAXEZ^^g=?_^=w)eq$acuI*-%HgAerRhl z9}1RY354Fq^ymMy#$2R2i6eYFkk`CVbLD#F4vJ24%|Kr@OCq8Fsmf=^(_>;HjEoU2^-Uf~c8shQWsME3poJ4W_L%{tLu3 zge5_>ectOm&NK#GmDr6WUg%zgUU>8I51~3;M;j*>sCTk$*qv0yU#fa@!D~H4>jT)s z=;p!HFJrnGIhgDjG)eSugs@?uJo&hj-}a?rn2Jesh({s``l9=o`)+Y@aWQc#afxx? zh9%?f;0`?JV!vYZWBTxp?3MAyXNvNR??jbt86{q|Gbar(Sva{eh@`e98Yp$V36O^u zzSFAvV8MR*T!%%QO^|i*cXt_^cjE5BBBXOE7%y;nQBFq7TA{CcGwmjQqIzfMmfHhr93%UsVM}GBW6P=FN^MJskEVp0#P?Of zb~m3Yic)+s#-tD9`zH8Dc1KtTBUiUaUI+1%X;gg_<$~7yK-+ix7F5Mlj(qk)G=fun z8Rm5}KUVh>dAieVIJj3$mu|W}t8VowlJd{!2**JA@Nt~T zT$W9aX~)uIF^2ty#mnj+bxo!?`l9-{oL>5#`ljUu6|&k|+P4*oa~wzFM?ObJ)GaDM z#%wb`XA0HEe11K?$Xn(%=cUiQq`YLiG~oHX)ueUSv%>T2nDe;#Ci%vTW{?n-kex=4 zW>r*A^zy~6u%vUo=$MP%(ah=9*dOqx&E<9ReY5Sa$G=8)B$N8_ttzdIJi$%(O_k0P zC-;BG{&3EpG%eAc&Ye=8dYl5RGRJo(Dsxu$OxL?F7S1NOhR&65miPW_6`g@EN-up* z9rni{lG_qpyjDfEan-AL3-|B*RPST%TcBJJ2Z#(5c8Ai$Q^2h%tkO zk0px1g=3G`gC2(NfYUNDXX^H+{&>SAb=|W2#mmBk9|=(jd*8{vfBiita-Ov=qQ57e zZYQq8-zb6~TGQL`MS?(xU;+p6`Uy2L?FY&WhAA#r8w;!F0=A}_hBE`!x%(=U@#dpu z7Ca|4?`srm>T3#I0$hq*Bra+2D@7A3j_8w0JPwkcY*R8KdUprlUgtf8~pL-5helx8EcwyZNOk_633Ju<_ zr(R-xl=!HmVO;E8=%7OV9EdZFF7wvdH0<2LNBw^FYBHuerk8Rn-6cKt(_Q&?xduZN zi_A=7MP_#H3TMM}@a1)qG;F!=&1e#+2onus$9o9Ce*E6?uIAlGcTmN{V!Kp6=sA3$e%y3r++!`; z8?2*buQdF=qjImZv!&Zn2Kwb9_9u0#por6AEoJMcI_N4{6{kQETMSO^CK%06)CYU-3@xuD(gw{r**EjWC+DXuii7g7B!4qTPF`E325{fAYAPzw^F>o=kxTZ%|AW0TYI?Uv2~G4 zU&c$a&+LpB;CF8y!^BbMU^c-7O?N4%6{BKg`D4!4uj<<^y3vr~ztT=&jB)U)o9 z@N}Xz$zfmq8{iJfXwqiMu-ZtDja1qF*kfZq?OqOFdqDequ7U(;dEmqs)_QeY1q%ZV z0){cKFl~(&JH;g^xPg$_HXT?MxN6`_6#V;TyrI+R>f>YhE!}+hR`|o(`_;HkeT2~# zY=bC{Zc(8}A;*98URY4?QE^LS%xZUU9N=P%*-z#V_UA-=hF4ts4&E|vecIYGiworw z!Ocs+C@Fq&e#*^D#7tk?qk<9vDc(I$3@V1BGT}Tj=VUZ9gDC?;98mho(dn@#lOqJB z`N7nIkvuGm)Ln#xm)C&AV$lV)9fp_FuXd;J?tEXMI9H(fCB?+YpU~1Fv}2z?JPmBf z`YA$-LOCTH5x*!qlkMN;cP^Bkae;8}@3iCo5X5-b3TK}CNRkFRVdg+CaJ4OU6+f$} zps*ps*eIysHYn)G5GwMfLf*)=@YgRWSjg{Z$Xhn=$$zGz(dD82XBc(uZ$@bi8AV0p zw}!c^rKO{rwUfJfj;j$;)trr{uDh;^vZ%R}1DC0VlbIzK(82ky2#PpR6d81|bT_33 zI@mk9i2@}U{>dSV4F8P=Fwp;##obPVL03hcUdGAQl3sv|pNpG85|^HyUfk8h)052~mG6$!dx1+l$kkiqP@n0qX zsYlk*&D_<-+1_=ZA|n1z{{JoIzb*bNspV$rD&ypU6m*yT@A~}@ z;s2TV{}=qnP%lVd_b zd4r-TE3FAcJ#NPew3JIm7)wm^^}XB^mPw&`j)wgskRi(KEw~!)K2}y~IuBkU-8zJi ziR88JlByH)VSgolD(mVxC_?CBc8l;+T{-jLTj{8J@R&|YD@_m3@;D4Fbvx3|b}Ktv z4Qf5>@CBYOH!GT&n9vgt5wVNe;?v$aNc%m6MifHr3AXv4dHuyT$*6X$!HVsBmeQMN}zGY1TFe~2gxWHf}gUTU_ z5L!fNO#ti`wP6@}ZkuO0Pb}*ciRjTBX#!2Ck|TNmC206gANdUMl#9k$)gxMskvluS z2sO404f#>#jqiRr{wXdGjfb~&G7eFYZP}_$Ai*+lE4J4mhwg1?o%2S|)o&Rh=$d)F z#IBFNWwO>(3?<*Ale6=0)5mukrD?z!?3BQ@5F*Jm+JL)_=g^9d`$L;pfw^kSbelL@ zn<5mjft8o4{gDnJz~MUj+Ir3-!olLp(~X`~1Mr6q=ZNgN(_X<6&du4Tl!VHt$9%*T zVi=~=pL?okU|!U(m`mPsM>jCb3(qhce$?1qBP$VEWo{{n@GvHQC|IB)sh8UJx#5Ec zhr~*8``*~^%Zsm|$ghhJO584Q9VF`*iKygCu1rJrCT@9ok186C^6l&LwD7ZHeb3tG zKlongd#!koj1vS!-CLO32>iw?%?&uj>eSNtG>i3WkU{OjHsrd8G*gMEv80S)hdR^IMpyZDU^4wTbEH18cyIM+ zPZ81Z`+5F4=fuX`d&ic8Vw+w9q4H~4eV=*_=PUm)f%!XE*lwm%j_0z=fOrsPr(IRM zV7A}o(1C-KZ|2MtY3f7gG9t+vF)&^)z6>ftR4u@A;cO2})7{0|#&g1DZJS2tx~e|F zL7z{8a#}N_LOZ=)>hfjUQVbaExL%oCHD5(&$|@Q%sGE8QT)L!z+ldNaU+Ae(JGHrF zh^wIA`RC&AoBQQJvf<=nWB(Fmx8sffr0oKmZ_^F#jjXO%fvc)`UM9YWZl z{d%tBas&o1k%Ff7#kW+oYjZbHsg zA#;&X+Xg{B+LYa65v(M~_5J;c6XK4~K3ZGhcs02fF;WhGnU`S&E~>7iXjUGS;UfdWzQCSW@6}t)l&SP!tf0Y{;f4M; ze+KrV+$JQ1VE6N#s^kXF(mul)Ih%f6jPEMV#FF;o2Bvdv_>EssWeKKL=KA}K?xvah z)o-2m5_wS>Wju-9mV+jqS)ZhiMyTm|FvgM$X^1~@j(v!ti}wST5=Ir+0~DZJGoWoT zy$434MB`X;txj2?&>|Ov!Gz&X%QkSU1N2OWnfYsT>a$NZid}l>{Hc3RI9efbw*mQi4?C_seFuE z8{vG6Mtz$h_)J7ks>NIO8c<18G$bumXw!3^@h^P$crysVfevb}L_e0J9u(|@E^ZOb zwumizD)w6Y+is`{HJR1sf@&zqW!9effR6WxxZziq$Hq?BSnh&(U#$bSc~&yX{Ywyj zKW`ogl4)F`1ZW@i9?uY9Z{L0mP!zwwA}WCW8sUOKT=gC5lOZKBC*`@&QLCv{-?MC8 zmu}Mq5CZ0JxlRPSlgSWrGFtSQH=G_ZhPxWKznOCifMm+Hvn)Bbo=hnj-+!c2+qzSL zP6XWW(a}j=M5+}cU!#MAE;J%#a$`RiQP|aV`$?w#ugH-_hsKrC3ylI5TF{TiCuOS- zCylw&`E(7&{)mUQTdk*e+yObhry>nz?hRFS>v`Oco#$bID<2|Gs`q?WPD+_vLB%H5V#&JlOW=$hC3i>ACZQ=9Y^{E4& z@isY3=z-Ax)ordl;^!HQ=i?Nj<+Qn&uomv#=y7T&T{uDtgZHlH1V77v>ncgTrEG%# zn@u5z*x1{-Ys?a=NIM!n!Tnc7f@I z&Rzb;sBai+Pcq=HmyRh5qk(Fp?R*;!e>kfez&6yuH^6tI!NND7t@e9ZTJvoCJN6%B zxs59#TdDTwR}~0$bSl2C+;x^8&E?J<3}KzPg6ML=Us(=pmqf~7V-g_>&9*FkCNc3`$aXKmYal1u=VxulR?Z$NZb{awJfAs;R z3zlK0A#{)6E0FYqL&=5Jj6!Uyg1xrZ8rQY`Y1@#K)4XLFqV|5AZHQQ*jbQ^FcIwsX zrsoX0pU%CXGr98K{~mrHHS)(Q_r&#+sRFj>@Hw^$mmAot-RbI{B4WwvmGw1!WuO0K#LJj{?%Lxd$i1w$E zBhS+RuF~~?2(d@}AfWZn~|bu+Bf zF*UJnZ+_kV-6RYf)(#zW^7JkzyigF+oqW22>?+4)ujo-_PET9UPRp3PtjWj6_;N1r zQ{$7uGzuR`Xr9y=AdDc!<*ietA$&i)DN=b6GxxDAAF2E;{gvE?$ziGRc}GxxN5R!r z3xW;njf8XL8e*iz&t1!@^=(iKeIe^My-*03+K*pme1to=zpL%5NWVsu)NNjzqopR~ zzN&eijn<|)adA0vUEMtd{Gqz3#L-b$kQG6_k@zKYP}9`Cxl+Ywovn-U@_CzL5=4B+ z)=A1ef9TR}i6P5fLmSzUYsVxG{>4{PrzJNJPP?P&j>>NR<_!{*d*W#3^kvjKhTh4Y z2T>Ku`;;=NooWG>Nk5u9=+0V>xYUjUEMAju(I~vVg=E-OZeY!6HFRTA&xkuqa_^rt zZ4r85u7}iil~&V42z%NogHf!K(U~{-#oAN?Q<5VWIN^9{R{r(8@x)Ff;4ZOoYDOXl z7J>zNcGrfzjvV?5#CZ3^L}KqtRm%0`Itn+Ek0dpZo(Y5YHS4~r8QJWcQDso{G55R^ zvT^NbM?{ubBbUUW>#&B7=MQ=ZppogvHA9_B!zGXLTPgDsi>2IX1xY~kp{pc+-p_(d zL3SnyGXXuzs1NAE7snWun?)20z`LL&%JuK3=oc2x_8S8>=QntHUw5#*oS<`vkp}iD z`{1V3CopG3OtYTlgre(kWTauuJzXnWa)iEsBjs~=I4OA?Q0<0-^Bw}#u`7iIB-gkQ$YXPKLT3nD-47vBIys4S7)E$eHVBHmG@KqBD>BiF^5B_1K|6m3M3} zx(=q^xp1`Gam~Oz8_|8qO50-EuLh}$@5MaeErMPa!<2#xUjR?Jh_eUM5w{zUrPCVD_a>Mjz? zPfMtK@zQ*#BPrP0Az+{P+3{DDfmXk&U5EqZ4-WS&Wf zdpP>>^oZ{I2fxopsucmqRO3~tIFNWwS;Oe6^Xh717!xZ&M7WX<$KI*usa584wX^2W z!mrBXs#Wmzf@CHYUU+<~!rad&1yzG1v{3LJ8*cp0eAggof7WaKqkgehHuc1UVr=^b z#trzP3FK<`kyBkbGfA^3+K$eS!oaa%{P7s;ZpPS2wNczBTN2kM$(w~qs+Tt;YbN?d zfeIq;dlhVk;=N6TX+UsDZZ@6GFpjXo)x}xBZR>g}C#xLqVkhD)3}*9d&@L-* zlIw81EZ#*g<8tY8u*9()&O)aYpzr(LE{S|cT{WWmbH*hD=wJo$cPqeYcf0nrQL0SK zMc={7?@&h)VtzD?rA$!=HCMdv#@d@%ssf5Cm?&myVPSf#$_F#%cTWfasK|!=PsJ))+M!3=0ktOYeL1^uR*32qAbM@g_eW)jF&Mr zZGUhnY5e97ah!V0Vla*GQR()+G|)awoBM0+VgP{BD>-Wj_$@p|W-&bJ^`8r$2s%qP za?n98gw9VX)WL3)8GT>Zazp(AhdC>u1I8%^bV4{^>B?=y>TLdOi0U+}>~_n2Fp#1j zx5d#P)(!h8mdLpLImXYVv_P^DBL=wxmlj872IwjaEfEFuX*8RRS1lt(W?)rzG+UKN zT80|k5y^AcT5G}K!x36sX(KH2AXY=f`-_nvcm)Zk!~+TJij}Sv)I~Tgy_qfH=`Jeu zklnnftSwqTP^b`?hLbXjkLilBqTCV`IKc1C)XF5qq|Ss3++>l3{(kzy`qA-=Mg4`{ zoLbX(NlUXr!@L&i`!c`oNb}4LzeVIL;>ajwyjI+i30+3~E(yNUy`qa@IGVt1zhV>y zP8rs&#f_4~=4%a$Ob$MXev>RSqZ2~6>$RkQ%H9x@&Un4j97oZBM>Z-nU}jl!Ly;3R zATDCo_Ko9Z{ZNbtsU)zAQfz}+f%S6o4v;2Q!P{%>5ssxw{IR~D*eOD&c_Q{ie@c#sx-!9 zy1z2|;<=uvW^7}wzU_6Tb5x7m>`8U<%`m#soc+{gXfhkVe@|`HjA-%LX<@%bF5_!n z#5#Rhr)WF#KPzKazbee@D5d}!+w`)-s`Pa#Tc;3*@NmiJX1UK|be+M(g0_peN{FL< zB12+(W0lFy+n919SU>zVMa_B?qy0=BDOWTse)b5}cW2YtUfn*T7gdv;UPV0W`KS=j z-mAUs!XGh}DOykIH-jfeiz*X#$tYsdyMioo$9FClEsTy zX>o+yYPLO$d=`v8R_{mC0^Ifr5o}dWPkhjRrmSC%r@wkfqM70bB~N2oG}=+|yEJU- z5AjP}n^c;VY8gZxym*|}0|5t;Y`4a@8hbwqDqZ?UF%R1p)b$NeeiOy;-bwEAGpE@g@~-X#OcGgpQuG! zjRC%iyuBl?1PZVHyWzh#x_tPt4EitKc3WnD<f;w`L$N&xfUBLfVV?dH5dqBbwK; z%O1Cr*vqC&?yi0Cl*70cHPNo8l1_XyRynuK8v*=>(#3Uc=7X84NqE3cYs!zd+Wo#1&hOTF$jijWmOj4iM1Z0!#?+fX zYp~w9?OlPpz~j8kk99^AY+_fhUrYzd*cOcH^6b<>1}Ji9|6W^jDEjS~*+*cU&puOTBDO`d> zVfm1SoJ-ciHrvi=s%ErW@k5=rlW&k$*RMQOx;Z_zDLlf`;pdGBUO`=D@|{$xqw^TC zvMOb=98TKw+e^P0Sq}mZe-q_0t$hVhvHWvU;h?jjIj+= z_P_FK>`~Z_tBGSF#D$y?4EsA*ojA(~b|r}6doK)}y&X@DFi&>2r-ubWJoqi%p$$oZ?Vx+=YTI~h_JJ$R=L>nC5v-y6Kg5V%;yUt1Pz?q^^3etiqb zh|t*53pO1Mw?dU!?q|RJpvP0_z<>Qzxf$lY`s|vUEtKMR-Ew`7zC%*`!D1##d&CKF z@};4^h}zu3k%6DF*<$b?V}nlfU9#>X#2KCkC$}E~2Hnw*W*| z&^IXu29oZ{ehr=**xqlxe0M1+V2DOZ@$iY2N1s?i?e2^bY3*cC+Vb!PIYEzKkw5Q z&W~(DDvMNoH2YT>$^W&oKpWvpcS|5%>LhFYwKok4SHSN?l2N=KHH{7kk537)5jfsH z({KH&6E;0+sWYc(vNO~+)J(=MX|3FD)JifP)h3zM^N)0gPGM>E6p!O@ym*)nr1I|H zqG8e{N0*pgJX0+sS1o+a`ES{P(Z(@gCE-LP;3g-{q^!Y4UAHWA01p{pL)K>_ve8+V z#3gJRlO+2NtlJh!auCM6+C!|;N2*G#?7m5E{*SU8Af5t0v5v3<_hjV%GiCV8ej{VO zGSQcZ7#h#}#pAGhYyV6&NSd+=-8+Q!!J*}w=8Xtl8-Wj+b)(Nu19ds{J3@NPo;rY0 z`xE#E^~-0`#-ns=)MT^Glstwepre6nw3pc)Tqo->44tlV`Hwd@jYB03+T`)ddIH`e z)fXX~nLFD@c+ZA?yQw*j%7OgQdF&Jxw@jYlJ z7r%@?sJWVW_s0`qeplAqrImNhVeF^ZFTV4?Yg z)?7?MmhL<@dF-$jld!9qx_9E*N=ZT7x6`6B?D(dspC)~O_a9*R(+9$X&xn02q06To zz@N(JOrYelD!~ysg=IXUo-iV1aSTSs%lNwd$h$(AR;%hhmY6dQwlKE$!&5$tgT+9r zgfh1EJwai>rfNtcB@g?HKB#iI)(k?+;3N-I-kY=cj>65@5S66EBi-_dkF4QwouK+- zl^v&)R0VaTUsb`Zy(Y9S_x?NKb`n3d!)&X5Y-zw*UfJuL?@4x}iI#sXM$qi5dP$kf zsz%l)94|}@x4`J6x}idYE7G<`?v-KuG#3RN-HaT=#96{UawQa46@zCT@PD>eCl6{{ zO{W@WE7RV`W3PjMipO`x9b>njf-Zg!Shn8XnXtuhC_IXZ6!8OeSNo}QHQ$NGbHi<7 zDi`J;6BJu&<#OfboGX^QG&`1$V^m3%5d}ttLSV~B0K_;Qxs0p22zL2C2B+*Z@8a&q zJAe6wj?jEbDz}^?m%u|Ch2z9G5oHlEY~#`&F!-J78^>5_1i(eCqXvCf)>-;peKQ^u z|AuRAeU$##-|TlI2u{Y)2#4T?CKj3IL}^(mRX?VFd0ZwdE1OL`)DLUx_}GML1?C^I z$;%s0|G~ZzYBS|_N-Am79Yz%M6#Bk$v)rf3n%pRf=`xYVz(Dh76^}~IxJ7YGnLfU#Tv4R9I{7%>@A~?E#z?e6dNAdo)OglNTa>G>l z8Q_AxvQZd%ck<*eC}`fWt>w@l`9YuPdu>7qS?`xFxO`hHZJN1f|Lo_7?-1`EC=aNM zqp?V(WBCCN_xy_auADBa=}ZSw5+Rp+!Eb>t~k6;r=z9VCe_yB{ZKr$^M zJdey;s*M??Tw~xs?;c|;> zOJBaqlCH}qIlR$n%Kn=5QNcAw!nuoPa#B9XavbTP+4J$dO0A!Qa^6qC>aV1z`BESc z@$5h0<+QYC2=O03UkwO!#G1D+PP};DXdh`sUB0UmNnFWe)phdB2qIuD@ma^bh5`HV zqO-KXXE$8Hnbn5Suzxn*)&;gQJ+S*!Tqe1|Dg zs3lx0_azq$5HE7k5qUoen`(<^AhelerzIe>(XoI{-vnhkwhT@9o%0VRkKxMJUnY`T zzJpSELm#1dQ*GXE({CRn-(fCQ!K~y8$6LMWW|yz_HL$SKhYaA4 zP>Aw)Tb;>Ex;MwP{8`*^`ZQ=+&+$wz%BZT=7^ceja9Md)v084<(xF`QijQIC_EsK8 znv~R7wvZIGQGGoY)eNy5$@{Vpr*42*A0)e%_m~u2TaGOl{8`~TVD$;Zt7XSq<)k_AVbGg zlmh)NDh0ryPe;#%Zc{!lCNrgDi8AoWcl->rwR-U9({t}H@8OGY0nkKMn|($}8;LyW zj?@RIJ?1ITyzFmxLF6*j*;`lEpo*ly!uY!F+n_@hVq+UH>@otF|9UQq+hMUTVk((T z^34?c@LhpRXgu5W=XzsEGlqS(9l?2ebG%u+^+|3q#J zxb!PgUnFZUZ?C8YwAKBic+hS$CRK2gy`pKzw_MykMu5Gd?(2a0h6W)4q+NtgXmNOe zVS@Yao-LU4{<)@_y5Pk^`~8A>{QY{vML#vOWIQ1n4@CAF4HAHcd4U8SQXD&Bo#b9u2si-SAdc{kJ29OLPKA*zq9DGm zG1=#8sUzS*bD(CC;$nvOfMDx|H!)T~ykv`CLrBXs*@61O=LfS)b1I*=pxi))_X;ej5E8`Ot^e z=xKs|>&bSbN4}_S!O>Kv%CqwQr|yZ~Q@tLGo=XnQu#EaytNd!ClfYaiu}(oPqXVPM znSe<=V-oA6N^*m%KJ$wR@#x36USbdG=?}*3>my2tSO@R?lCp#ER(x8j%W7S94x6~e z6q1EFo4IZ~jX#5Sb)&W<=N4v<;2}bmp*29^kJq*3v7qv_sW`ByMxSS@`(#(tkD%SO zUbcw-SmA~~eskG+_e3m8ZIp%L(4-Bl7Kb0V(gP-L4znf&1VZk`Ne4jZnso)RAU_G<#)4kn@~T=m2gVp;Y$3_q*b*Q#v0tFW%= zr_EJz;P4l}rMopPjK3{C>ZFn}r=pof1r{<_<@t(V+7}Ng-0Kq3T@n}jX-K-^-sffg zdKQHlg~_Cb*0R<#kXDYnl*0`6pYviZ|>e!7ka3$kJ7aU+SgBw zsa{xM-2tk#(!iWnJIOj1mNtueFX9!H=9Hh@T{n{ zD)c&~2tYRJHr_Xiu(^I-G`sL&=wBA{*`yl~Tcm=FJ!Jf%$id!PyA&jnhy88V%lshg zaNaon0m@j;KBySojs1|0_-HrEz7t+I;a>VTN+1%k@=z<5B%jR3arU@t`829ttk{ii z!PxoM)&ho)qn~#kC5|oaA~zq?&kj1&yrWJY+7#&d#F`=~;$xRQFYQVygrNhS+`gqx z*Q`!ycNG&YZ?IWAI6*q%1?+#3jCuZmXP^|c#qlK) zDGM@x*ceuPxmHZK@l*Vg^%BzFRCg7>?86a3URua3henkd>5D;m(mtH}8i=ek?)r3} z0eAg?Pqs_Iqx_n>7?3wu@Sjf%d=;k~%33K0jl+ z;55#!0>(N!#Oz+O1JsD5uGD=llXA0)GeWkLZFocCyPdh)w9onUwuv}C_s^(5a8M|6bH^`Ti_r zdC1UfqW9oiYe})3P3gR4hJT+s!h^i{L?~@in(ZesRrN*{Va$6TWAe~G-5peqq1Qh( zX+9?FEPqT|AS{}NH#CKTdC^4ovfQSSG{wnPJmN0?fN}!ZpLRXf-jW{=YJU!1zG+>9 zagF6jEPePqo|R50vYk+egdwgOSkLo=@`Grp5;%)CjkYG}Z?2z;RgKc;ZiX~clHFKJ zx#8J=3~!?<9b-Lk)x__86SpVdB2iS%U8_8D{(g1B|HkcL@u>12AAKyHzHm&f9K{(I z3IoDKfeCVEw~ivxD9)LHHV?H@*YnEum?oHCOGElyAM0*ceKXO53!VpC<%M>2gg8Qv zmELtow@VrwpG>GeKjC*-&3R8$E#PDIX$a9bAeC6zyi4`I|3$-Ez{AydB7f^;_pxXy zO7-Gs{4D-?CGJh@1ck*0{=2q*ohz}kHn&Bal5F{%ptku=`1QfV8ls~%hS$7B#VAy< z*lhUS?dEqy4}QPHIs49Y(lWwfcCO3R9C+sZoB8vUaPp#L9sQ+yS(wU{d%r(XiP)~> z_WY+c-*rz*>~;7arqNK2#__Myy1HMT_Q5@HG>^nb!BSm#7{Kpk`&AJ@CDW;+xuE}- z;9`J@`Gs}LfVfSWUn+&>GZ}$8Id!j=Fa`3rwc@+xEzv&|a1d^zrQ(XMp}~wmdzQRmbX9U=&RIA>1Ltmx1M0Oq`OVm*K6_&mxZN=0 z8KgdSyYBE~&|%HW!CL{nq^dsHE>2ro&D|*J}lTicK z7rNpso$&jkb~Sq>^J#efBHEL`nT&>s?ZD#Toi~SC3*!HcoKYJmJq}_?JXL zGH1SvNRY2I-{3SKf35TgF-Ve6W1bR3$v8jqW^m0UgkXx+_c#Yh26o;D9<$R6K2-{k zTL6pca6pOU`59v~@u05>Y zvPIFzQIKzLgwuKY=lKyV8ltCA;?a&I&#hNR19A#>HG6js)d^3T2w8i?=-31Uj#Rmr-;a&f^jVz6p)2N`Y z9UEV$r~YC>?lT`0X0O}tHlwGzvFHr;z-1K@Y#KCBJb8|$O>Dd3A(H$y-1S6+?#g8N z+icq|El1M9yC~NEWU>$IXL)Jybv?|Om1WvGrNLtUR^}cm0?2m!1 z?vSp!ZEC2xK$wy9Zp&Zz=HSzWkG9XskmiKpDY zk2d~>#j(p+aUMAkJaB1bgM9u#uwNmmjmU-MwK;Y+n?57kM}I51 zgU|0Rz|4qhHzlVV&a@@BjKpe%Dr)+v4AHQ#c4+<1r9r{L=o{>D+#XMcMGpv7`d+I< z`0>6%JlQ+@%c1Gp*T`|$s)US?p*%SdYCllP`E2(F{~P%CXj#lrmEco$Y^fWf?ocEc z!DUTS?ky)qaClaB{7|X37g%+cO!79~)JcyB_Qkt!$BcXEUyziY7Kzg8=}-bkPmm1Ih`(4$ zgK?x054rR~S!K&`V@j=9U?54?O%sx77pp*yKZGQeR>vjjXXLa8i&I?;ZRBf$G|vBa zf}t3fz7qx2!q~e+wJP@{!KpHiDcrd)CA=H#j>z*jYqMnzr`<5Ie>;#)3?tY1XQQUS z9fGgIFHWJc6Z6xxuEj%Tu`_li^Pr>(pZSue0|G*C`S2^P`2ky)_AfE(&l_;0zorMk zxs^U-S;UzINRCXP(F*LMp_*D zHM#GETnvlrjpIW1devq~g*$A`YV=D;JDJojn|rQ2k`lDzlMMrpII%pV!d!z_gRJH>=__^=$$1MP<)3~?ShlWet^E3 z_wvXXA^OKFWx4w==a7q}j@1BLaMKY?HMKwTLDC^K4IjT%Z`uY1OwxdW`vu_=jQkq? zQk?*SIH7AkB$>Vz_B)h)!vA*6DL~^-4i5xt3rjVoN6)TeXV2ws7d1#H!N>(ndVu6C zFHS~BE0A2}74NQ37C8;+lPdCgvOVOY$Eo9&fYo?bADua#e_;3vqysNpEcY+CwgqW; z`GhX5Z9hLci|%?|%y|8qI)8POz;BPLSRVKS4#d812c}A~io#|dm%dCdF0oB795ZnL zaGBbfRoQkqe3jDJ27VD=bDp`=B&D*T>+PtWBL7KZy_gy=*VT`899SJT9LutHHoF3! zymKbb+=<7!$(V0rx<~~7#o8nXpC)0ge&f^25#^>3joXy0)K9X~*Uu1kfJRQq8Eq0X z>kX5JDv5=$Q$QEs2CL25#JP3BkVZI{NS~bKEr@UmPh{$m69=ZG&G*?L@Dvg zxd?;C7keS`DSrV!_|@~h>%Z(bHO=|!tbqH2g9ygBcYi5aez?>E zD~b!!Uy!&wM&Cy}#5)(QaB)kptg&MfQKMr$vD&C?J-yUfgX9IdCB<;=<$^i})cg-M z{MpHy{{ATd($Cs0vVn@>lw^TNPV_nVOr#a;o= zOYf!QYd5S+Poz1PMl`iIYpSd!C;L2Pu}bLeQMK`LI9Eb_CJ!pO3YEvqJ`SWGa$Tk$atjCrVoq-S!5==yX z45kJxX%T_6{Nfk^;z2$mKeU+R9yKvJHsxuOJxsC#CbwAwgy-4HOy>}pKAPzkeSQi? zmHN!a+e+k9E~81OMW79VE!0*)3MGw`_{KS&)2Bokx2X2Eq;9+BwiwI=acA}Ryycew z?;#~Wj3)!@1Wv$^D)Do5G==px$haFv7Fv$@lPw!Y-&PzX-KChpQvhk;)Mj508xJ9F z7^z;AUPm1oP3Kz~dMVH3I|67se+$?Pe+bC+zj46|n|;1II9*mXEBpEb|L@WuUU=Ql zuw+?$gi+A1Pqm+~*-nFzL||oP>Mw4=b?hwHVb%bX7q1BOAp>bWz;16X_`>WP6Thwx z>&rNVcx9wn1X?BrrdhvBS;xV1`c5Z=M5>tV#p2kL24F$JVzCyHMXiti+sUr4Q-Swp zDMGG~-~Djuh^|f)?9EZ80~UPbGIVAx-|IcbSL&>Pj;rWA^V<{i$2muCP%5RMc5+k% z`^gjprWTY{VV17_*uUq(5_Jh8>V>L6>p%b8l*vC(FDSiY#|lNW+wu@^O5;6t-yj7f z4(q(7Jv;K`VQ422?et4aeB}^%@Cdb@wz6IwN0VP(N(|M0uLdqfe9Q;P3m?VHvnmQihWU9?78D8-7q zw6xIT5L^osX|dw&?(Pt*#oZ;qi@UqKyGw9)4^DE^?~eQ9j{HqBlAP>y_Fi+&XDQfO z8*I;CMGHNdzwsh9%24^;Aa;kB!p*hjtmI4GjgGja+7GT-`t?OJ06<-s;hF}k9x1s( zpxZsIos{#1yQ61P%`%S|Dq38J78aOhx{9^Dhy2^SM#A^GY&wy$5{Zzo&7{AQs;`p% z=UZ|ypx%)B^!185>M!`2@BsTf70>YKV|v|3S7Yum3ahb}T0#GEB)zAZZ~aji+$CD`Yy9~EBy$#)4%x~TNrZM zYYGXp$uIMi;kcZO=Qi&5D?5DXtBGSK{f-Pe>q3JD`#zvj%{E=Qw|>$>@BkQSecjQ! zVEYre%kz%vd1-KlLHFu?XXo@s2FgJSH;~;!R+t6!l6foV2?E{;Vk@pfzeBV1_&}jhCV3OmR$l?3t-7{wM$g9EVRTs~uO2;c7 zrM=siY#Ok-gi`uhp>bIkrtmr|P8!g7h&oTFEiqyVyoBw54Z32Uk0R;}R!1FdndO46 ztmL^epcF1chtT)8#We*8kZFF9UW(&wZT_q<{AYQDE*1c~Lw2b6z7z=@NI+{8Ezn7T znSlU`7q)V2q*`GRRzm{r5EvFAy<+b5HuI0;5LxdQd@G(Utk9=Hy=iZ68wen~+Og%K z)%I;rsfKx$`AzpWlq<;#&No`Tz$>j|u!P+_Nq9#>WeTaj8e$a6hQcIA%zm(+mr)DAnk;L@wrTPYc%?y(FesQ}Uthp1hy+@tuQ2iisf$%0W~ z@~~U^&_(2JzfEF+o7742cu-;7IATTQVUm{ZRk;z$@_g^|nx$LIaYxpKFPi7>Y1Wk0P-_ z^R8gYA920*bxKPwN<`q39mJu(Cz_}0N2%jp_)Dxr`-y45Q?P1FVg&)%ZG6^QXu`-} z`a~zMWi0R8R&KGY7dSef>5fw0gojGDV+4@oeXNw_NaJ6Qcj39%YN~5{J&%)UN|xBU^mQkajNMF3Sg?Z@Brc7)=Te=;&o4v8p>NQF!de8qM6?RxN)25UN4Chhn@rzezkg_zMQv?kuQZ z)J*4J)jTE)ZQM{?Sa_Z}cLlJ>Beq{9{QXZP$Sv|dchsaf!xGhIvt?k3CH}R@r{%my zf#1-h(s?fauutAZ96B=(Gk?ABA-jh9($wM{PcSSqd3N_kfJwg?_MD&f<1240y3NESQJS*hc zN!Q|%>P;^PDd~$NDSu`a;XF|ap`XZ z;hpzh)6^#VG7qSI0_M&FFv`S1O#V-yJ4m4S4w?oMLp`5ME3Rz<){oczIq z$_;M4Z1rX6Tbd}m%8@s8m5%y3;qif+;vKv+j};;|^%Z#lR8`>s9O+K_N{EJzQ6f54 z<}Hg*j!N5X5MaiXVY0e|{f>)n`U8OSY9WPIZrpLb+-tm&yyxBBE+knl9K++B$ z7$_|#-HlX9-6OdwTl1N7-!`aSEuzauiE4Sgus@o!8QU|Y$Sk^S+U=nPe^oLw;w|lti zj>+(Q@k;bCe}7v|zfSP*f>v7>a9jrA>Us&QUR>^-M@2XvM++X7#oI)sf)BIfj(Y@> zr267Op|Va6$VMrQz59eQr~ajf(FKZKTer;$qmmN&Wx0Niy@tEj6-L*Z3;8eWP zXuvqfQ^=;^(O-hSC!co}eRoSLIk#WEU9BoGki)Vkjx6tt$-2)GI6%5dYUXF4RV~)d z$P2spo8n=eXgEe%rAWtmU|ix;9)2&*GSMZ%2ulC*(d>AW!_K|-)f{P$@t;^hIh~$a z)Rk;CKvlI@PND6~N9%v`UxPBRy7%!4sIm7aS9Fh{mt{K~-V8;T-21mygyoTvm0^3D zH<`fpG({g#HWQFWcxAP+Mrb%n5+|m)oW6|K^q{aRYSJ7}O&KImF$=!x^AVRXYFhCd zBZbEr19?b+LtrcO0hEQNWgwU7a&-DOl&i>M7WI{hmAoz@=-iky!gDOs$4Ap=;5b|d z%dS?#r+&UnXW_eD5%+vr99Y6IkTzQ0G$Pj6!uQ;kCp)(I17l6Is*mGM-a5 zn6AptBOu9d{LtIDT%nGoOD2c6usf|v=|S+gd}V(8)8mNlIHHC)XTnI62S3;H@@$kRVz%nf@g*)^jXi5Kwep> zG;MTLpolA}$z0!Krc2#O?uK&UBT1Fp0(R}^;=sM7O2*(O16v2dA!6?d5bA8eU-TCy ztkd-UP^6Dw-!-ugjL>TQkW*f#{fE-yg4F5Gaa$1Q|6>6ZP{{aRcJqdcUA8eH@xIzZ zEV-uW#z&|Nt2HJd7HNVydc6cL;=taIcDjt&&s7`M-(RFx zvrvc`j>_4B6j^6RrGIDeyXh6`SO!jFCWZ3QYv~$|=M%52((zf#c{%HSE9Z)Gfmz5{ zZA%LEScHo=*6rO~zrO{PrlF4VM{E7{wtZLnq-N=4K5$kyY{ zGL@w{vHiKc{+J6bWuk*nKx6G#b))21ePcHUronIHy9rSCs-BXx_7x3oT<*Ssp54r+ z%#N86i;VikU!@f_C3X;`v#ns+C`@9fgK-Gj^q+a}mvS!JE|Vt~Ilc%OM&`{{#QL}>*{Tg9Iu z$1?vk(8_S>+fe@ETtb&uY}O#6QJtq)G)#NH5bmBf!MLBQSno(plZD`&)FA zqWWll6;=d`+P6-@xQEwnp{V=0k48~Pf-||`kc~u2*(()|gbfmw(lDMI%lFkBO+)u0 zZS*%o`nskhmV;mQa@l1RwqVm3gi(^LlH7QSs<(qb@ipd_+9{;SoPL9Cw+ z!z+p_Cx*7?QGTyjgG;ai_(B{|Kq4w-^dF$eDrVFkEM~cvccUI3`Hsq+UE#G_H0uNT zaLV5EY6|J(iUp!M=!cocjw!1@{Ub^#9bv8u22C!OoWX5N3Q+DXbmCjDPSXV);McI* z%osXMt!g)MJfEuGeUjF;7_dWzh@A=61~n6tpq$tX{Gjhdtc+!k>$T-)`fgk-HMopsdE;( zGAU|4^Ag0CwJ1uQVg)!DFa~AWTRt+09$)F1^nDDGMAp8+D$)?@DWzV~?07HA2tqji zbw_Xazc+u}gBG&Xy@kX!uY1Y-)7Y4FDM#HHM1{B0r2TvwnBlMh z4I2M_urYP{Nw_Qt(}Q2nZ_AF|gC-H@F{=`&-tvi4h`AeBL)`%AW=ZCEl0HE0>S{xH zygSzFAQ@1PEiqQA4c|f295tYY-}ZcaVtG;%4t>on*Q-x#+$y#UWnrt5 zVgb>K!g86AlXYB=E_=jPXwLTgg(f3w$(Eng*GG)Qs^)PvmoF!geaee0s|q8~kd4-1 zsocn;V+kywWS`!1zN44Idfc63zaL~8=+}d0=HbuUeuU?|k zp_Xg+`1nckF|~5#;<{MaSLeE%%@0F@^FcA}WXMGy0sYc-jYK9Rw2co>S}g#bTj>*4 zMbY?!szqXb|2CdOKb|@x%XKZA51W9!-*vI;=yRGQ%tQBQ-#nF;!_GrJpO4OsIK%K*&0|BUG;_PL z`Πh#QP);F1781=ptGZ$Qz`n&-xyHnqxIKLwq$srX!

@#c>N^f5xs6y+{W;;q zI;3PkRX5J2Pw9Z#Lx+FDvHwP<`FB_T$96XB43YJ6Mf(mrikbaV6J4oq&6{vP1 zw&=37eNn%^b%R$JXJre>SZ99HO|V|#Ghcqng6?EgEwFfodNMv0qPo!r6s6C=uGh-F z=#PUq?mGiU{-AqnG{Kr0bcs-E>5D3iO9#avuhS~3k)1LKLWIxM>8XB1gTm7|r5m@i zz`<&V_!QWbl96TlD0`W7$49^Ud|woOni&rq%Vp^fRnD&AI6jvoZ;xri&d^6&(QRsuxlkX8sh)~tYLie$$uR(_?Wm0Mr zIWKGQ@=Npt!YK8bBnk0+HvnZBtr$drf{Wtg##oZQ{#ygy<8{a`Q(Y2P*B`z?DMK_$ z%O=ocn7qa;yp~=)tlSer=e=&FOBqhgIuUE14#2jbOZW2*e~E~w=amoIOkvixi`o-* z$oWIw!p9TOcYuaj5gUcnO>_6d{*#aG52Cyz0M7dBT(z{Og>MH@fMQ;6jxQO(X(n7t zNTACMiTd;!wgBUVniDHM(`b9z&NA)zVA$|C6~T3?96n2OvO=}IN~G-4^_ zQ0Q&N8@qsOY+TE_1_JC0=K^u=w*2vtclU+z&ZfCegSp2()|EAb1SDjcN#Yg4B4QmR z5NI@=0p_DS#p8T?P~+0>!Ogchcp2W{7U}r1GfT9Xz12siNMEYF`GlLPZz=4|=9lb1 zpq76)f@ESeT>14lu+q30m^RL#sYWM^Si#V4@s(<; z{i&0m@)i-Qve;(nK6ytoI#?ZYvGgc;jXX3#p!yn(s`1!4xO~O7>`pPp^!}2otpJJ{ zbHrfu9>k}-8%GtMcBV%1;?zo@wp!A_xo4(b8nCyFsCqj9ldoAsY%XM(!~S}N7JV<17~YV8 zaypn%Z5;c9UqJdmt2bXV>=@~axd5I0vHRojd6 z#PP{Xx$VAo_r_>Z!`u2%h$xwPe1C;q!ZxfGf|0Q}do=9w@E>Q|C(mK$=0xw|2bFC&nkWECuZ+DmRk*R7qf`$2u@6`8m2|@TAx)b7?C#LYNCd1+y~e7mm#icGLJ( zVGgBAM|Aq>tqJt%DtS1?VI4^z6ntyPTHk+w-fK)bt$yefK1mj1hGLHwxpLvN`lkrP zK?tg){p8ybFzN(5@bna}tS}Q^6y7%T1oC^YJPG^cFT(&T!BN+rI+uixQIF9v935Uf zKHUoo$?+65Doxu{RrzclkV{e)J(jDn61Ukn>VI{V?5(l2CG#V|83cF&z&fEM@!tFW zfeaCz!QdY|m2P!pvET9c#Kh1(w~;XDy&Hw&<5rE;bG}hgzA=tq-gxi>i*?XuPVa#~ zo7HcLPgk@;M3LFyE2J#V7hx~#UjZl=UZuNbeV zb*4Md(IDvC4MLyYPo?3BXT5v9wuM!gZC5Z`sMYBX>(2XBiA~SJlgXy#Q-*N?#!r#8 z93PoGJR8u6-~7+Y?2WJcj!L*U`J&U^tkF9ojFH%{=P{OE-rVvKaBh z5oZmsV`Yz>s{ZuF2#wOtsxHftsCxvqt#u8N=Z9lNw}MvYIY;r zp|~eF%Nf=6!7{eYq?a!kVYRz=-u>`S$NNCy=m=~DvLKV@AER;KmAK2xr(Y zc{y#gl%Bm}a5NPD=Z`$rsG*4l5p{?CQR~q(6yRF57aP>W72a1b45k9Q{|upL4@1fP zK&tKonfF@Hh1s8)SnRuk3ybIf>n|S{lwBtRoe4EwbkTYh?cKM_GIY?7#W=pYEEd3g zhNA>sIN-8v-9O~eUU)=Gq0#$s0Xy@&HlR`VM=p;L*S0I;E0W69fKG2qKM&KN6IP>z zNx#a&QTRwVGf8%tWSb~$rwkcAc@LONLXiu57^ODD+dd%A(^8PX4V;&F_Pb&wHZ8+Z zMxh&H{V@zs+1s@1X4jsuZo)Y)u-s3^j%7U}CU7hvjdg?}&Zg$)XIlmVJ|DB2=x29l zi*;=M<|X>sY{COlKrD{!{KPZDO)90ir=;%~WUt&MD#XrcOgCg+de^#Tcdh-F7Wcp( zQ^QiYY)a#mkah6C#LLU~WI;!(z7|O2HSsm?Vh4yRnLA68mE@~a*TH)8nA%Ej9d&{U z><&yVG^HIDI=?Vtw`xzWa!G*}fZujKn@*l%6 zGrVpZDh^V?vmEQHre~gD-IKh*^Hw{OVv}MCcrY{9KXO5r*G7mY8WV%kb#T$3W$gl; zk$e8!gG!C@qBpUd7@bPP#xoL}6cUBHH+^M3dQ&oBYJP2=bf2EZDD*nQ;4ZI?sjV;j z+&{b+onjWnXMa*Rg4ej5Zz7JIYw368V{aboqaTCT8>z%7fi9rKoszkL>}a%7Kp>+$ zBg6J%_l8e0Ga+IufTYnBWaL`wiyOF@RiqV4R+YH3X)P1e$+u`<6KW%6J0xmGe79jO z8V^T7qM_#nV-Bvpt;Cqs0scxlL6JX>-#d6RmIy!fLqXl#`)yJ@=ZycDIQC6opN4wl zYYc16>s>6_ngeBl8(N zf^NVqaEbp?3h+(=(}!eDo+A)=maDeG0XfXne$FCIXZAwdN8WRXurzOw(#yJdzC2k! zlkl?fq@wjs^q;@7zrcUIafLR+pk#b#^BAhaNT19MG(@j?`HZ5Ao+KH3YtqECWei|e> zUADNYw;|Nx<(-E39qf5vM^A6Q`yF8v>+o@Oa;zvkwNmw5{-`>};VWwkCf9ByGS>%z zVk+f#tU}9ajkw6hz`Hx9-n`X~36s)O~4YN30H z`qCs2BxEu24AYiVe4dgM_TwxrZiu{BxkXR8v-D0z<w zHr;LPRWnro7mQ{_?@9zI`BST|RIrj{M=R;OD|DpJh%KMt57>gYQyg~dZFTV9JMW)q z=pR*)_4o@F+;~%Kmtx~dA~jnUbzZ7Tih5$DmIghoW|o-iniIF^MvC}m6%64eOv$Xi zK(PRWFVwdaJK)}snQw)lI3c)R#OSaru0P$yu-L*)(Xk4sjy6r0ljFaq zf}A@JfB{Fuawh<&lmdXWgW8kBav0omn3)M@krga<+|}pOB+caboBTjt`n&I~Pl(G} zF?PtFJBB%l;Se}FNu$m?;G(O5Fp0s{`*xXT=!{CM1VP{HsZzs5X$*H@nw1`KE<=gL zpwX9L(n}HXf?d0SsRnppZHtwpsC6>>DXoN%K?a@!f6t4R&sas`dPF9U8fZ+4HaOP} z2Anfdk(gS-afQuVzzM~hRmE@voq4zaY<;<|YS)V*x0FpLurnTnCskyY2~k&}5eSk) zV5x8OG!x_Yzm5VD8O^`Wxia4h%KB01p`Ds?nbT)3dd$0*O71OjryPf59 z_uGe!TVA}gBLjyy*o#=2Hjy^cU&GYeKE3YJ3IDY77}WyJ$3Z>Z&jad4B(#<&A;lKi z9!hOCo>PV;ZBsMW+=Ey14NiA8&&NZTIk)LbcE2t>W`=C2q+2k>b*H=BIqktVjgKWe za7YTDuA*-XpKXpPT#H4r+PoSA48A@c*KMG?s{7ZgrFFDfZVr&y-982Y{@D1V?g$vV zg`>yg&Jym-PdYRen-8!D@mT=2*#KYl9c<6|x07{|Yg!RktJ`!m+BTJ+7 zay<#tud3*blF4@Ew_IN15y|tV$=(1N@a{7q=Z?R(ov;hMeb6?Y9NMpt^WQ8q3Jw}l z9%ij_@BRV%p67erVS@;)@V(PGu-f#F{90K+oH`#|Zl3rf+QG$-<_(2IMohAZ%Cmop z_vgTmkIUU`SRah(7uzd@n;h-`8(E$9l{0y_NL;K9!vYD;!(*-W{2w=&B3b9Y*hz0Zsk6VR@1UJ_Q)jC+fP=RQ37+d_Fsa)%#Js|P~KJfGq2 z#QTE(xuxO8A)TJBe@4$_ICw2`p0issraU0|-yf%)=sAT0lnzB1cSq-XuQWOqJ2C9Uxf zAh0vlwVh(NWD5C*>2kwvyKldch0{nYt)!mBsGfeWm}U9Qd<-U!i{jl+X^BYYczN!G zwI&N5@J{~RfD4<)HB_*NLA0fg=kpUD@8E2>@EB_8#~YNv^v{L%ibpCdG$!aymJ(!2 zz4F5tp<(vn7&VChcP;4}2UqEg>`z0n@u2#|RR2shG1dqGRRkbnP5hjIIBMEn9gJxl zcu=^xoIND>E~u73o!Bs*2pshQ81^)j(*W)eM!k380?zR+jvN*9dwYZ9>P=$#C4x+3 zkF4>G(Bm4%fRU(>M&I1wN%owVRp9Z#8F%xWdSUduj5(HuEbVz<#y%QsH zeAO6%*m2AyKe_1m9wf~)A7rM(3r}S}I!hbtKU$*5t|A(`jYpv1(e2J(*N>|}TAz+fXMLduzOETJ zn*#6g#!$$tH?bn|65!g95J#6=4C`+=2MU zPI`QuguTe8u^9!CxtF}s30Lb-SjGvIbAa^CKmS zpeGxE<@U51yX^a{PoHR|O8?$KKJEM^-$Bge$4T1CIu<7--<1OPe-a{RIZ9s2h{Agc(YyuzO(Rb&igCQ|%9O8veQE%BfT}|DqfnSLo zB1I|&yX?~tMPb^cZ!)mDRqC(hY=!#3L=VpMWkX?X30NGaRpSAjEGZ0%2Zk%@|ticBorQw)D|4>Ej$qJ&E z&pcR`*-k~hz1uYNi;{DzN|9AvM=guE;|$FFbdxr*=C-kh>SJ&J>)nO>ApZ;&|7nct zvPrV@QGRm4%>+NoKX7k-t-r_vyn=B5vs`v)ZvVKufnUN&i&w~^+QLK@TZqpB23>A@A^IryAX#r5ePKor4x~+L|MfSiZ29> z*2N{5wr@iHIIe5EG4S6Z(!_DdZ?^7&!3C^!H`y-3+{PgcLL6fr$77-MwMlj|Yg@wv zT(}q>fhqy~XTf^&&@dKvkNUSp3-8SNsoj@-H8YTBER^Pfy}5ZigeGMX8ITJMj0o=; zZ9I4P=ZqEi&DhO1n!DF$M9#1r9~Xm+)ytDR{^W>p$RuQ({o#wTww?HwZ~T%&k^$u6 zD1)=|!h&mXY^5}0vrb3VO<>9NFPSidn8Mj#nWhZPSbb$0wE~maN6WQl&s;hqOXp=>(t7`b zKXpe1=}&=-S)|>J{Z~m$Y}WX5e5nU_?}*E^k{^>$ ztcVV}T-+7X|FvPl?!mT9!+*k%*nxnal2I}R?L8&v6$fq}r6H;1B<9S*Q#hZr&^ z7KLHx^vv(#GT3z+3eht<4UZ1SyHJr|RRZe#*#++@g|I<>x z<{(v;Yx(ni^I z$Un18ywIgTHsenf0uA}!m{o$HM$iCi6^F|R0=S~8?z9uEojCnB`vjab_5cDLbl)m~ zu^(ciDKmsTZzY*tr*C0C=KxzpQ$LG86dbZlsKodKgnD!eX>p z;eF2HXR-k&s`T^;zMV1WR|q%h`1h9Uu+iDKMEku>mpz;PFZEQFg|nnI$uu z8mD`+NEN+v^hc9!_-Q3)lWCzg1rhBKi3kdMRT0~RRADc_AbizTV`b*SN z(!L4eoCke1uVr5#$~}M@B`fI0wCdE@brB@ycUo-%o`(av6H-2JH;Q(^@8ZwDhQ1_g z$3Y(kg$veP!Q&)VYhgeAk%7Wb)6p%d4Z9CQ?<-Y|UH-nd4>7<|ReG40c?lP%Q^C5B z5;e$~dsi0s{g{?fE{9zZv}#e zGc@(~u6L8SccY}AV@dX`-x{`}AFwQ;?5Z|?2`!hTC|h?%(>t5#25K z9j&wL^6O*`10xSBJD$+)23<5ZO)L)#4Y4sfXQMbKapLLSB81>&9jrat4eX&n@?EoE zq=&d|SaK(4?3BdWteotY7`$g=AG7iiHqQzpM9sSz?pi8V9WIgR$Yf!ul}BU7Cc4|H z(lUQ;J`argG5DT;^%y~COx$zID6X~Wk9KY zcsQ$!5UP~YRB4sqHOzZYTFh`LLH3(KeOw3lQ*5WoXo&83R$WeCIrHPJ*%+;_<3-LeOHgQ@(HW7_ikZL)~f;n2?K8F=FUK#PK{ zVI6+fC~S5*czPYxI^gkD5uHXL)$fn6k(=~fsQm{ExAAFdXKA#sf|o0Ji60#Fdg9XZ z(s&)%`n@S+@O=KS%=y6J)JXqMgILFj(BA_cUMpR*hw#|**!j4DUMT9sOOI+Ef-ZS!Hw5c8ET1eGPkb9lY8Vm1Oq>WZ1JOBO^*>1BHD@i3Q zNq!Y}+t>+Z%5a%TJ@S91f~UipJv_V7brTkJt;U@jm?Gg+i^P2lnV*dt4qkl68&SXX z*i%sjFJhcwg7*cpa>C;ctr+>7r{mvY#@tgBjT`%PM;A?yiMnoDvOFTnr=Gg$0kW0Z zz2F%Z8zlHdg%g|B1wM-nANZl-mU5?=TRlRerA@KCdRGWV;fi@R z?!R2o$4_X?TBZ-ts%cK)dOo#2v`q*{!3s*w0-1eWzQWP#o&3ry^JGT$p_E(Z{JqDQ zp3Et-&QsPV<@oDrrC?Sjb^NFld&R`J2ZEk{xTu zS$FmOIfI69$aq=iNo6b7>!2+|{X#PG77s4FCy)JvBNu~Um&Yeo8_j!e(KL?o2=pp* zeKd2w;Q%Ion=OjYHv00GVbycx%b_rS-TOBdod9groZiJwP+x~Poz(7)D%IgOlH*di}L^Eyhnu_ja}B zrPaGRzw0S(u6LP7>*2vu0xpv;z17Kr6(0M(dNsK}qC=W}mrUmg?ekL2RwmxoI0gXb zLBaNiso^PE{~`k*(#^38>iEH&k*lE~Gn1lg^V0Xkqs02$y2AW|cU4S-%ej>APY?7k zT30Dc2)3@ZdFuVyDL+W@JbN2$_bBNb=K=@tCW7`5NSPl1X&Ld11F{V&#v>F z;bUYTvx~*Bef;N5cwToUg@0<FL3k9{ z!>i@|V(~Hkuj`7%!7=Sv(6w^<7d8$H{2;he79^z%->aVDJsq+$LtOU7%m@D?x$OKKt~wArowKoRa_y7D-H}Rrb8pJy;fil=P+?J_ zR6#WBOE~lxGjaU0qI7YV)$woIS+J37MGQ7poWswT7*BDDK)>yrCjMY%L1Xs`%(-XT z(w=-mo4UIZiyM-|{(x_>$(r9T?@He$bsvd!t}}&{-qMuHHY$7Rf49Rr?$P$xOH4cs zlgvF5WlYWS&oA;H20CHe`RsHrPMfY8e6A*aqVuvRK%VH8Als`f+4+1qNRJXkR2o^9 zaww}~O=HA>=1+iwH9@b>*Z~Ji-=31KUj}*hzo8T$e}E4bI0UQN+lK9YEKh7I_HW?6 z=Ws9tI*bE`4)@%DFIZJ%%U};k4R*Q8K>vA+S4s6F_WlZ)vK!j(OSYxV^BUvL%4!3= zJ$B~4+$ESN9xb-81&P4MUhias1z<58;LJC|Plu~@aW1RDRGvI%&Ci(@`EdA;+S0E9 zctB2{Nb9s`DL4LrI z3u{r^=&bHIVt#h@(a9$IdfA8dwLm9}~=`(t(+g0LwEysBLBg9lH&P4D7}#6~GB^jWHk&D5jb9hhJ&C;g z)^s^e@XKnzILT7oH-3DEgy@9{Gc}owwSSfEhT~Aa*LAC4i3ru=vFGpIw1gIA9Skb_ z0TheqBuryE+ZeVh(d~i2IqfJ26 zy4#d%)aoBSy|eTYBlnu;CCe|YjmQ`m3oZNoRonL!(y{k1Sme4Bme|)M7?$Yv7j6uF zB!f`a6S#?bvqu8(CXKcds_MfReQOCy;jNizj;@zAZ+M9F1uk0*?5Bp)a!j5!oXqg! zEtd+u)#YF%A>E`T?K`Z@RC$i*{t=KEb`@rDI#8cqO|eYX1X0y$w$7DR(*ue>9XSjZ zuHCPWM?f(#qEfAz?(nS|Gu*1N^K|yWRt;;e&vgSOfSDK)CtGT=+TsB}f_KDZOFyuh z7so$3BFZ)yq&XQVHBvv#Z zm-K1LO$gkS-A-NrBrO*o&y7>b}b$s(sGqL<@J|N+stT1}AZSWitBi z!&kJ<>8i9%eTF%*%9`^cUI;hx!e#fBqg9~QdvARLI(D6>`x=biO*)RforK?}I1yza zs@!VtFVnax{AF0WwkD?7%4gi3y+jSA?Jdx%2;be^C%>%vBuwnSPm3&6CP@-P4~K~Y zZApptU8|$BfCi9o_OXnXbK*4QJ`xwB#QeLgl>SG0Qgsv#)@8f%?&;0MS!;+sB--H> z#VgUgvWtW*^DxuGPGmRJu>TWEI|P;3?r}?t=M`kLReAwh+-5{O5O?SQjV6w4LwX3= z&*ak9uhc$ScYXkR>{SrSu{_)!waFN_>-NL1Th*#1H9wWD=n9_hb1-Jq$1q;$_0G!J zaeKhWaq(()X?0e>Oxc{)*=qktsf_%yuv4e=qM%#hV?)4JPyxY33%KG4yumxrnfWNn zj~X!DS}e=uB)@g`{vhwn8a)4a0OKvMJ@@p8&L*)SUJ&?r@Ut1FN$Njmy7EhD7WsZX z`Mmyyip-|YffOW^vd;v{%4|82yQ3{D^$wq3_Y&mIFqs$-0MEK#I$_JDTUT4~Z5`0% z@nQ=fGO*SKV zpR*riCACaFspaL4oXF2WNR~cECJ>KKS*!`mG@b90*6DDigkEq}_fa~Q`q`Fi@M zL1RnCzA3S>(Lr6CYVTCvMbVorxttax%?odG;o3#qC%*DBu?m7MD=j(EnL|Y)GG0gIkX1r)jLgsn!&1lZL6_!eMw$JkCu9)Dk0Q+_3PRN@IcT(Jxipnv<;X`L$TE7 z)cJbnlybb)8_Yi=FG%6ywroOFiTOE!)9R;`GvU?ISBioU*NnE4A0<);T>c{E?bg#! z%zyL9Vn&x?a-1w2CC#tzM}oPbvyQB>owXR^W0l+x%0VnE(Kgw}AN#b3kR-WWy)R1oegEbe?sSp>yQYC|hPiBBziZR!Z<&X6&DbOLmI zbnhi3Yx`xXzMRz??3y=8GvON;mx~@RrJK1i4!758uSc4N-T1k@A^BKY?_+{@{Js+- z{e(=L2dclHYH%-?+~DOHOm-xAs}1=nE5ynxfwyZ~hnw{+$r`XKpGU9zLB>DLs)F~- zc@spexbrvU2=qGjO;ex3o)qH+*{?S*bv|yEzEdPTHdT!}@#;g2A$a=D_10|1Yn+)Q zkz(MTmv-;O`|#D`6v;&DiM!};b{R{0$`Jwzq)b0LCU6FVanbMoR*K<)J{Y_5${GKn(7(?Mq{jU3Ev1X?_u`sE z7jC5aogdpE$#BnvlcLInOUf~Hd-A_4_9Fo@yPiB~bJ(2pyq@Z3RYi<`EHN7Jc>_v? z2ulIAstE35962KfI22@PJ%k2~re?~0j#|6+O^RQr$_ryEJrfp(8^wPFejROuPCevp zAAC99N-!4eLmf|<$V(HUid^I@iH)vz82j%t;8Y&y=hBf~dK=sa8BhmEh%M z(-RNgk7#?)HGOQxq%ZwkN|?yN+x&WoD=sS+!!yY^9$jnslsNf$6R1xrY^6h42>P-$e4_K>87ODbqDU4cgepPf|1 zr&)U@^Mv^4cd)(Ll;DozFt+$uGLoxbBiZ)Nl&m#Kz-IK8OLRz0wx~*Jm)-R9<96>7 z&b@!b-&`aUlglEN;bh9M`F(Cn!V@%&HJv5F9XUiMb%ique|E1au2m%OZBk8={L5$Q zOUvRWcn?6{8d3d7{YbUdZ%rk3DRo65{2Y_!H_`~9B=y?G+yEwoeGkvmSs>~iQsWyb z``}#>ZaJB>-&^+Qn1bm95KQkJH(GVkM>3#;x#ptXCKo0ePWwg2GrjpFMbsgD!+h!} zWI9G%sba`|7Z>)nCGaijRi2S@7poHX-%Kn7o42mWLiEy`iFCmy>fU2wNS7!Qs8x3h z4RiCoVv~XoiBrwm1KZ|NsluQHgx-n3RBE=w1T(IE+fTzG7HUvJS%!2(MR9Bn8gztb z`+$U_&mD#{Vl>nU4!eqRdyC{*#lw|LkWDpFff)I|uONKQ-|;Mb5?Z2H4R-*fAJ3Q* zD%pRFbroix*nfO~zUlZ#v8*QpK4sy35n_A#@%*#yi!~y}oK)8*hssspUXbc4Q=Q23 ze|5Qtu{Sqt$#Jqet#S@T%A70M$s^Db!aJgNyf4{StG*d0X;iW^*ucxcI(LGGWs<#8 z_5alM8ydcct^%|+GILg~&HWoU(m_vHnldNaUIn!62W~pdrNsvOnMLw$EeQ> zy`Ii_^}IUXdqx&(vdK{aSRF@0&-b0MayMXLMl_bxFV*U znac0lp4y%s;q=@>4KoT+1mb63&Mu}zj`M3sS*}d}cj#|6pJM?H%5NV!yLJ@dH8Y-c~Fs7L3 zue-S^U+OFAuTPSbV>_KcXxM(C@tOHC9QOKH$yi_H$#bZ&d}ZQ9KOH*bDeiE zv(MRgeAZeTZecXhL0~UA7`a z3zkC*O`|_vxQoNi=d_HjVrk+nWf}14{_nY^*3BtC#e;S99Bwf#g3B%Tpkd^*yn*R* zik5Q!yAO{(f#OxzqSlC(FQ39n5lt$v6hKJ+jZ~nAP}9ig>NP^uo%b8ToSaH+w|qSe zm3=T@7qG|WTaK&W2VvTykG&B;u`$GDqYU^11yKuve5sJ0M{6YJL-Mb zKhWQ?r|ut8&3Mo?8j`Zp&uRAz$?$&3eRw+Y%Lc4Bz@RBnGzT5kTH*hDx@jli6bM%> zW+F*4ro^RaPNX)Ncz@`$E#<3_c~CMh(G?d9R^D&>wM(?gj_H2QPHX>n-*K~=o|4r6 z;BvQYbKxz1?e6aaa(Ul)6G+L8Z| ztQrr@WZ1uADd!Wf%PAXcX%79>@gvIe`c+^yrAQchleSC0@=hwE{DolO$n8xDV}8DN z6sU4o91659c&~wiR8mGym{|7onWnO?t8PqbZcI$e=DtKe1R}~jBb5&iZZ7~Bp;1*7-pp`DRD{*v9m>Y{I9Z3=={(PA(o|_U3)pv zJ1T=0>NT~*!-j6(fo(PBrHbDbc`;Y#m0U+s0GB}-o8`Am|0%ySnmM!Xsl}CJdhvNN z`d*gDh@<74wn=Dd$j#L5cQ=LedYhr|Y?#EN(CCdcs!L*tf~YSjm2roGZ#xuYTU2c2 zF_wP`)96`+ew3C0-Os--6g{Y-;I#^{H3}cWK)K4O`01j)MNME% zCmGN?L9#@)q7n8oRP03kV+a}XD7L&^I+Op$_lHP)Hp(W|o}0e$cShS{C)a&VtuNd6 zcD;S1iQY}wmO7iME{+Prc6NSeBWV%hc>2S-_9Nkg5(EYX2f!^2+|$1i%*?j4VBrzR zYUmd;;*f=#mL25?x$W2AUfj_!uKGS4vC-U5y^^ByI_4yHCATX3dhX-IXduoroHtaK z2~>s)q*v<(vZN={tHZ9m+}PAJ#^F*DMc|v$cT5_UG@cowv%6_LBg2E=0}$}FyVJWc zK79F(iq3ur8%JP_fQ1I(I#}up_GE?0{UN?rD$u(7H=NYA+npPMU+DQXN~NomC)0VU z6x#68v5SOyT6vhGN)qe*{;`*@5CLf-+x20>wp8nz-B|>`cEs$t9+cA_1(P~Q2Vs|B z0^UE#oR?3b8wxQcu&*L@Rf-Kn)os8fTuLWFUy7xg$@OmiH1&^HP+#Pr(<{6e z&4YXgGP+LGXR_jp7^-V2Vx|NgCCIECf<)fXSPS*ySSSx`8T=zt8L_VK>ZebBr}!2F z^DDF_g{$e|Re)0~ux2O*5=yCSBQIqcD`#WM^zqTgywPer ztn))J=8gP(kc1%?7xR+-Y)ZmxRIvBOU0$>>G#7R1boQlR0*v=i3Vxy4-$X$3K3#sx zCQCN-yJ`Rp4;anQmTO;#2*f(T*k8RP9NKTwiL9+^y-s7dT}HEnY>Hu-V!bsIb__?t zVv`N5C%0$pVK^Ves}P6zdXVlR{m~i5Bow{3g;MO?1xC7F30dHoy-4EdEVRcf98o&RJ6$8AzyuFK!sP-_1BOH9~;+stD4KBBSxj5Y5O z?X!7}?W|Yw917fS3O@IjGC<0&-aX)-YSyO@h(EFzA7|B+JXnHWS-pn%Xrbq`Sm>*H zSpnJpY%zdpGm*H=sz3vfNTsPHqoiM%P+Tw6cwR>MX24;~HOa3Rg|?v6=YjAKFbPwv zTKKyFm?_)p+^qTe5%M})@Sb+Vcc_?n-A`-_UW~ClD}4X@F{0Fh$hP@w>uE0GPuN;C-HBAIxwnGEE|l z4`t!d&&IYaElfIh7{-=^dPCVoJ>(#THDh(7MLbtjLeG9%<7hgk-=}%c*_t_0@SJWY zBD+nIgT;-xjXfaaqp>*a9^2Aj2F$$xBe(he0x+}Nr9+i zlS8KW#^@5cjLGN&2uQXN&H3Z9{K6o%@YRo)dpn=VR3*lHiHw7aOEwVk&H?}YZ%^}o z7uKnUrV;SidCi`5!`IqCPzM&kE) zW*IAReR>A?hnIi203=p2fD@&cuKPyFQFBF2FREpA$F(Diy_RYF+rem}J^EbQEcd+3 zIKb186JS&IcAgUb=*SIT9?(BG3kwV+VmOtres=9=QUJZTAv&C!}d@iF_wH=WRWAr>hj}Y zn2{4tfJK+iN+d0Au)9`pznqrB1-}DK3VpLD>DwhNR)8S~njkUk5UO-SeC_YZ|Mm+$dyvseqW|tC9U4T|47Tp@%RvM(oD?Y87UC!yu@~Uqk47rR9N5qXPWmjR=NZXbm?4X(5(T_owB- zWv}c!(e~nLENDTo%V}N^STynzPHT56Eq6j4DLWEO3|Rc7ZKph=)P!^o^fvQnj9pl{ z=^ODE6jl=p^ zqmsnfSzQlew4k0tb)x3K-%UTicN5K~s~o8Ej;@cF6Wy8mv_y6jdELT%@A_a#w)Z?G z_+yeIVOw<~bK-hZE5XF?x$M8j1rmdwq7(1s`?y8s#qco(u4C+S^@{*5ZO|pa7V}B> zD{!rB9{`ToJT{Gg&7kRPk44q56nU(w{n?ZlCQbMA*D#XZRV#N@OyPl#5t)fQQ2J9! zRV*r+SrI^uQ@K+jGCLljm;l7APqj|>EthLm2Ulol8O#ZDfE^BHi>o+wLxggYJBT12 zQY@GB(5-r5sd%r}8M7&Wo|*NQEd|tfuyQ-%myDv!+cyFyGmjs4yj8naQ=MC5p6;*G zKi!M!Go3nU=S(}*g3^l!V&T6Gk%zZKvX$EJw<#3DWrKpEwANL+j&JQl1(}43JWLHt zS_x1k9s4)+m`?9yFv>MN6nR5?3x+! z$nbhXW=9ql68v9*FNZcYxJ9q|_(vmRRsUu4%;F$;NB~{@ctcf)D#7yFCOO^<5uNb6 z?HI0E8Zs3swWMHGE3la}v-0;UQjQ;pA+ZkYCb-vi9HzOjG$919_XMHOJMB*8ie

Q30oADx8XaTHSTY2ko0DfN4Pm=j_bW*#XZ zVlPQ}d0GM5M9r*azUzluS%Tviimpw9_amOFdQ&p%@m!?!uVa3s{{yy&ze|Amr*A6n zjMe!QZo0ILWFnc`9SN(aqll{74B0&wFXyce;KQWEJqbH{5G_|dkZw(TJz;2rmbfv! zBNIYFqg8DM=$!X?q)SQ8lLl_Z*<=m{o7yi_Hto=4^mZ{pDwx3K)4a6L>@o45mOoh& z=NPfy>KMB%@k%@}8rUPkT}BzJ4bJ3m+Hw0q)n&5W zCk3TwA6b@6Y#^1OvoLu@724sprsQ=}Yrb@NVB~c2kB{))$lM@=d)YVa^5kVXTaovU z`pi~#ZPj!E6ybWX>U}W|u54OKn-F?=yd1gJnsFRuN(R1|jFr|p6Rv-v7Q6Fh^ z(Gx!VSL8(nwNXmKl<0i5LN?{g5-M<~eG|JSUs~X+h-@Y0qi|9WSvr4^v2^}D62`xw zED@ibcuvWDuzN_y@37BFkO|mXP&f~COJp%ig4ZzA$^-weJ4!_`Gu&i#qggG{UJ=cpG{QcVP`$sBI(U|$^h7N zo>;gYWJv-e@7jgJ?Q_Qr4*Zm)*Vnyw%}(ZZj!V|)FFE`HHav3>w(7Z=cI_K7TtnLL z5^vtt6}eBw&?`6?C5XlZebC$L&ik@2g#3y7JxHYTFFxzOd+@>=Zx_e+gwOEUE4WYD z;57;w-+zW%|s=w%;=pzXa@Fo}uFrxL^m}oaT#(HUNw;qA~E|d3*5dUQ3tjEEGTFD)pXv+T>M(l%zTI5n&(XR;W z+Q7LupISzgZvfejC&u{uoG0<^mK>I+SukTye}ulFTtttC%nx}mL4V{Z7E<>A>p-0k zEn!eOCTepn{R|9Bbb*)03r@o6n#C3mK+fTCNgKmdLvyuY_Ze*fkZ&q0C7Qeov`~ps zjpJn;rJ|RI{T7WhU8Sv72c%B58A=%e6@Dy9tQZClbtYg)PS~Hlpol$~GBL`sIqR|n zh}9|?r`s(6*_)U2#W+`X<_BW(OS5q#>#wqD4frX#+gUp03h|muQ%}t64ZD-kL<>ty z-jI+P;<+kFIifZk@wX)nS7LiK$B4*UG2sd_bdUu>jLx^8UmGfmX&fKyzop`|ys-u$skw1)4nS!}=Bu>5Y1jPCvh&-HW~ zv@RMnqteB%OGR4^6rulUpPm!eiamDI^b}PigJQvz0601yv?d6K^cu+WbgO2IzJ-G zvyi_n;0&e0PO5AWT9qq`(4c;g!&oZpV)6&RZB8HcU@h^9ew2zXv7hnI9^=?CsNH@o z2tO(GH!i+Z(A1Z;t$j%VU)Xn~>-}uy^j)7?q1ah95bfB8yPBozQdZYzP9xAA-@BeW z&OY(KfN0Riye&oK|U~0zpdhT9i2PYaB;tE($CBx-*^4J|8BKFfcYeBG@JD++5+7 z7LFikSJd28YH~+AW}Zu;3Dpz$XiDmP_*7gpVD0Jl(qKU_{QKMQdWF_{k=5ESHhRB zWN?PlrAD<1nbdm^oxYtcVe0j)06I$d&!3lSYdj0*$vOj4kz>J3}THCDjvOtoN)blde`F2A{>EbPS?}Tr7e;%@DR08WgX;>FD8@HQfehj)1^cu-+VKpd2_r*8zjsRumc~myu6Mpuj zifAN0s%Dag<=lRs|IUHnhn*=HU#d55wIYI|Yy8=&xXQt7JzRhuy+nz$;hRC^-ERs{ z+4ryxdRI2G!?cq(BiFI_ZwD){ z5fDq|sAN6?fBh}j&G2v~UZAb)pT>uLQgC`v|CztJxnuEs(x$uo2}x=P#DRb5^Qv`h&M!t~w`q z_?<6@lx+B?#On0(m84yqppYksIO_~r*QK8pBY3O@Jrv5!+|4M@ru$%k%w~*39B2<$ z_f*rRZa2ko9oEYBAmyTDy@}?UHBYV8eS`^H>rgP^Lr;iP*^?OfiGAdT!?EQ>y!5l0 zMIk)+C~l(*H|BH}yK}KZL2xf2+|b5&OZ263{k?{4O)7!F&&%5cbtYw|DIY+e7qi^w z%s6l5yffyU&8NYhxx&;PF6Ffs2m}8^aCj*TXuo?WhA*QU$8Bvg(gdUEzVr_L2>xJ6 zo7)s2nOv-_s9;E+?Za%mmmk)Cw5u5k$7cVI4uJ`=9dr%LiNFx@ML+^+s?<+dbrMP- zyz|&0b7FD1g;(jsJt2(u_Cdg=b++102>$E!3Aw_|=J{IoHPTJ!U_y4ndku2SYM0?$ zAu_DUA=K{uUm2@}for~DBmw^NcW zkJja|sU1(UL_fOsILl^{6-$1fg#S1Ow^RWgyWCcbm3f9*CaPr5fl9mRt>&-NLL1Cl zjV{8KUbe_)MAEDp;zC7N8f67-M0}$#EDT1y;c+hJj+-wqn9Uw5`dG704{&FEA5PLG znL48Sgbcl&Ty|k$?2@A2g1-jl2mPjqxz*vI6=!!v|KUe1c^~(bc%*_EzvbMG^q{u# z-spz*7+uOI;V)BaRB)XA=MJR^$?&NWJZr_+6zxtK3B8rQ=68O!f}V~>bkTev8TRaS z0SG)95`yQ`5fXV!_us`naiXWslZvr182wlKq$3Cp$9vs&FtmyI{qWPxM(r`+X%yG? z0wk6ssGe3-c5hf0fxLtnK-a(;vXU~=3$)HyM~(k%8|vg(I@n2^r$nGYu#$^^sQzgX z*<*JA9`bi(p~`pxTScL#OLoWYp*QH$OVAm>w49KqmJ-!CMT+LbSy~>N)s~F8|Tn}EOtCn0A44gE1T-DG{ zz5Xrc2?)NWfkqIdus1pf`Ty)z7m?nO{x+RzHe&sp{pW4j3-Ba;IepBV?279r1-M99%*{TbH`#TJ&l{>wAV+2* zFyl)J9u-H+lbNuaovurO_sgrx5CXbRKefx(5^`9X3Mr2(FlU7}RQtZ!oi~wz)LZXy zeiqGbO!+1_g29cs{O(w)3Rc1VIt(@QcA&C^=La`Z(lG3UMog^f9|-=;4Zzp%M(T~Bl}MSI98v~RCoby)P0`^Zz> z$a2nUjdRVLKN%0HvF>bu*8!;*hPvNB%YJc_RA4xV%XrPpeJB(cb68QdoRx30-3FtWZqku zgNyd}@Mjb_HQcT^fN6vMk{Ys@CsffM$!+SdXmv*W0V7aRy{20ft*FQg8i(#jX+e#I2lQ<4P=h(sB7)H)Z=6rx`MfSLl?yu!Q#TT;tmF- z3gWK1_o)qtUL@R4Ow$_p?ySuU2DmI$Ujr#VpLQegK`-4wm@*0{emUcTAZJOnF4u=u zH*i&(53Rzis1&8K$R3><>DgD1SmP`!(sEohlcaz&j!Hu|)pFmn>osqKpTB33q^T65 z;Uan?ey$y#%4zwzlT34zclp?PDK;vPl9p}P$iL<{3nh3h%`AT@as>faqd?9;Vf%Nu zbRQ;@qV#`_(CY`GA1<2{4Q3G*@I6a@eei zbhGb09JjGN0bxXPK%y8~vAtkPQ<0fB7GPh}L_uQal5|5YgRpW{ z+6l6eR=<{U-M%+1NcwaOkE@Mo2&!~ z4Ldyq`E2XSg3_BsDL4zkcyLkDB5o*DmFKfbcL~yjxKAtK zt$+(I;r#c5&E}C`Pyk2*xavAip_=-R|KtU`zZTJ~%LyJ?l#BV(h%uI7y#E1Q{|rbj>3>3lIF=m0 zcJ2^Y58ccX^heS3C;Qq!S7aoM->%%oDDZ*pd!GX%r!E|J~ zSLXp6k8$Lv2HIfkeC+ph&Qb9pH7t>TO5uIx;52^mQchmqq+wlT;6H?EkA}q)Za);| zUnQ{L>J~+c31(rmmE^-)fuD2AkJ(ch`UW=k5e%4wwU|-Mf-|N9B~6PFJ_W zDn{0Z{bAu9uN;|PSCZq*tdjRnm-pr~N_OrHB=&v3p>agnibw~KD}VJ*@(gQ*-D-rh ztonNoOA8emYCnI?1vVGvs*(cs(o127rXzS%2+qIiNP|nYj=+KshrhMMdd!m4 zX>_UYqVP~mR!*hN-xaC<0=>yWBVKgWmBpeIO-rdnX_oOu4I27^t1-a^Ipn;$6&Xa3 zih63zBQB8J!#~l@(5hbs4cJI;G|m2~$QRlAp-E(bMA?Q5p6-<%`etlBrlh&5q)PDtd zx)1FFMyve3F!RV$j1@8n?eYKXxQw(v%-jO^Sc`YZK`jxyoNK~eE+NxuV+Przq-?UI z>hJJVl81QMta?=K%yj$>H%e(lIsMKejpVsWe$)*+{YeYQ1(iA|ejALrvVe)~h251u zF;NeUhvSuVGHHEBq*EuD1l2O2Ar2RMxtrwpb>DvmM5tCSSoOA2ZG zD<-gTo@Y#}cZCt8NA$K89Djv<9v})?!#iSlsXY+weiujPWhO4h-!J2j1GFx0IyRbm zDg!EdoMxNgsc_&`_QN%i7Eq1@t9mG#m8B6&(pAv558606P{~>DdiqEMGm!|537zI! zRJ`!!hW9=p3vg?YTC=8LAku{y4Nc30ol^<*Vl^eRkdvZnWH7AT9F-DWi%8oFNzIyk z(XM<-{QcdOF5~CZc(+TSpATPeVa^lsoYb$oTftlYrmmUFD)C@hmvaLaTO16s#;s>D zF&llH`Ce`v^>pHlM8G85h2+aP`82L*a3qiylQ{Oj;a~s)q{I_IChyU0q&sepu7awJ z;4;aaxBkqo3y0iaLH|}@+dPeUJQv4|ter-Rfm`P;veRWJ_g>9=N>l1R-(9}i4lMy8 zo;d!cz(ncT0@k1JfY=@IDH}Hnxr$JU!KRWdxLPn;`x}Lk;l3FWk;5{^S*nyT8SRMm zzQ|~reoDt=+FJl}5>+Lb-9TTlr>*x;UfZ1|ZSbPWmDvG%wcv*Xn*Ng>W8K4Y0pbE zD1*bpRCa3uJf(glXQY3rmWEg*K|*}*bJaH=F~*WRJ@S^K&Kwc(D(rLmLcB+VejaSL z#hv6DZ>IN*|BdavwAYVrJ{I5XeMnf%_S(2SZjUUGabnIp2m$k5K-$;xpewMEMKTHF z0^A(qP12rCe)?Stem%}-iLoxRlK9M#2unKaIyRDHeAcdV+;4$ZSiRUZcDOn9_jH@a z|IC-HvVp~Cbzi9#DB8ZYqBLb1`%Ru}?F5yd_MV6dGqNH&1`<@o=5P#oY&YI)pt<{_v42@~gW~BIz@HQ+ckxUU5M%M+V0M9rC z9uy$duLVj%CDpL%88)T}!#Ti!>HEyVII2McmZf6Q3-jN-95Np)(Qw(o!7if4)cv+f zzoXVgive%Sd!pKD0k@oQkm)^l0_CLWa^#$nfNT39RG2roAtK1Ys=2{tV9fP{P2(Z5 z_nLr>I7xdFj%8a7|NcDsBH`$yQM$(MW0)jo?*|lx8ozY!^PJ*@*ncNfMvFoK#JEQ; zI(!HzKBRP=I{K$;K1^U1>XMug%ws4$-<1k! z7gAIptMWJ|ELeglSB0d=O_8~lI|*yqpP|T~Rp9p<_f~+%w4MVtpzGp`ia5CfA4Hx2 zbF5#lA-u1aISUZ&x1-O9)|CxOQap=jWrClLNIk48YD0@NYhJLR@FC%QP4ttrv=x?$ zL*KdrqB)m_jG6b6w`dpO%o&$Hda0gqrSwfqe5y2ELzBc^Qc5=Y%A<%&#I`^??^m;i zlutAuF7Fs5ERPBU)CqHPy{@Kr-Tbf%q%9C}ohm;2L7LmNmr))3w zncu6r{ruPz^}hpy_9OOZbvi zzRe^Zv}?yao3i-GhLS4^e`;K z#8=@NVynNE1T9JIU6XXoK8${wW)5ImtoOH&pz*0=wuj_~%@Yni1fMou-E@?jf7B{i z$X0kLqEWy+0-dHCSU~K*s|UMiRU=AgBzAXz!SAT~*R67j z{cb;5mV8W`4kNKnV?Zv9)XBe$1S4pZ)!+ISZ3@+m+%lsyaKAihfphj^KJqPT9{eF( zfy@VQwJ=gw7-sTh)6GMlzFw&Y0gS-lUzjq$QLwWc|$XW)K=J^5(v$%Go z6=Or2HCsY+FzB*KHo^Vrv2UaH=P#(W!Q5eW#(FVMqFQ=>RWveuP)9g253%zr}0S zM*HG=%7@hjlgX8={Q~%82r2lh1qRu_+Kw6KR#8iJP(&g$GgTFaC6XG#=}wKxZZ;MB zZq)%+1`MyPwkgvhkbSIo zr00)Kczv6arA{0Rt<}8AHQlaMkzQj#6sC;(Y+PoDH?kfA8@^4JldOf3!#5#bsRrW? zGgpEcM_}Ue#Eq1OsEqk@9Zd{gyQAS&3U9Qy^$lw;wqo?^k3?c&E`Iu(lArwKd<=A` zWIV_Phuk4EiQcFcn*PuaiIeJ#zhE2R#_b@bB-+Z^A z0@H)nZW!ER{1#!isc%(B$Io+P9SIVv|3hNUVI?{3`BTgiaVHRypM9oe@=5Oy(Y=3lu}rGrmMA?_?Sz5o!O*1zE;2ST-fiy$fvm~^%0O{FB#Oo z0`P6Uu4iik`0Gs?xvuc? zyISz41j?wVn2?l*+N&Daq(l_*9r&o58ouiH6iU@|%DxFDEgYoCxsr&W_6*%OIUbCr zcY+QEqYFCz--hkxxWkGB{Cu9ezRZQiHsyq|&? zVZV$|iQhs9Bh;wbw!Ut~cZW*qi-4{_#1%Nh1!Bw!690PtWmq4|QndRBR9+9(`ipt@ zjTevfP@hSd6(&z?+DaM@4cdx`*eY~Rh_z@hGC^lgUqRx_Mi54gv$D&AS!zERQ0p%M742NT`v{p7{Q31qla;rfAbEp0#QO@Yhm}Z8L{jJNF0=IR zTmwZILtz$7@3KsqtiD%;2OOkei*s(f{OZrSNq5FX8R?DJzi~CirzZ~W(Dg~p7ln>f zPN>Z|{J3+3j^+lXZhwnb%ho}La2oo1VYui=kraf<)=mM8M*uJThu>w>(|C=cV8j1v zf*5>@c_m?%@wQF#`Y=hZZM|Erpf%W{Z)a?>q@>~h<5;s_h=qeUV`hNKO9G5x6ayAX z7zaK#EVVyryqDlj%OsB~hSTQ7eQb>T4nO4YjXx&FC}8lk?Srg+x};$O%bV*S%d`7l zipRw04Sq**siuL>(Qm!bzLB-v`-AEedT+Kbc;OtBR|n#PE%OY_jQA<72>j>MV&68I znR%+%y1{XEfvK>>unk$q>H&X^4HVMhj5Yl=Pg2V|{YwL?R>c_XG%3 zE?))-;pcD^!pG112dg>!S*WNJz1JId5Kc|cngi-!*%VP_Qm4LW@ius)Sg@>t>*e)_ z7d#LT0W=9z^0P@^{Nd0zTnGf2^LQ|2R4Hvd#HK>A?K&^fNLAXqI(I=anyY*AA=Opc zB_iTM{#@rg$<^8ll}Yh}28W zNLsnd@0{CHuIc@3jJ!s=)*e`DhUcCmz0j51t?CjIu4z4x6`PTz}THJx196u+*>Z zW~vkJbJ`kc@VuvFLFIf`Mi6~rPX>oKm-O19@8bO*uU(<8$!R-FZ%zz7FYkf%wV4`B?M8S zB4)`atfG_<%a-l@g6@TW9+cFBpRlp+R^%R9HInk_&T&?pEV7#M&Lm@p%UxaSsMK1h zb+?D~j|mauw`pFf$z@;5y>OFxnftP4wFfym$19;R)5f}#n2GX3OrhfsEz`cZzrH^F zB4ef)OuC3KZ%*e{;8xPE3S0Qq+L1K5Qf1v0FO#GcVMWJsxRy4+XnO-V`l24_{3@bB z9cEG~Kn8BskFOirztB7+@KmAXpS0M0w@?-IdH-!U>3UL6mv;|b5gjA^%vzvH{yTAA zrwh|!D+K)Xde+3O-Rp>HH2fkZI}U6i!5q5{cGo;%#SV)-7w9;N;R1LgKz5$3LgJi{z$7KVgob27%7Z88~-dLA- zusY%*YokgkMpe;t2{H5CPX2HG*TKYU(M@c5y8Mi=i_}Ac-GU(A1*a+Tj{oKIwkges z_MZCV)j_6A8^7yZ8$VSHQt$os(Z9m62~-pmL)F@DsjslavDXLU$O5Blm#?z`d?S5| zinAr)gLnH*+h2zjwSlOcWNPG+I+TCOzxZqW16PoR#@lMPWTdZ4+gC!LJCLN>gj*g zxKBt=*I2oGxCeyD*}PZ`3MHI;?B4#$-NTUzwUMFd|lPn0c8k z@Xm-wyt=I@xDf`1`PrIiteexTE#!w60Q{T;P*5$GfNS$2&&CQj+YGROOV(ymYPJ;B z#YLzJ?#TX-^ohTC4$e_9?QKFp&T6?xa~--i5@egu9cBB^*%0|((YFi%)n!K5?o06y zT@Ie+CP__AJ)0}%zALW%Rta#mrrgtRnwmFU)Xc-$TNqAlbc!KS6yb_V2XW^wBDuKP z?uTcf^D;(jx^}mnH?S8p+AO@@w6Xl*$%_n({w(~SWyPHu2MU{brJ|n4(Fy5biDdnx z`w{vj=PiYq;o__eL=57T&bx7DL29EP8k2cbHNTYjW_>R;cpi$@ZD2r_T?7MiU2ECi zvV_gu<}+MPi(hnUbS;!AW7Yq-ED}H0mXHeG+gB_Aub$_9IMk+p=Dfo2XJw1{%6{Q-mzkK559+4Mcs|s!0V7 zuxaahAX(9pN%f_$qAr>UK2kGa!afL~`BmUVc5!dw~nOdzHeb#3*WGy%j1V@n+VzxX)5wnysHF)KbOro{RbFuys zIDdTmbP-0W5z+KMhTs$Dfsis;zl@nM4n+dPSVf>&*=pH@J#1 zQU4=DA`cFxF3%ZQe14}>`Mw9aNzAm-Z)Y}~@!YZtiKwDl;&$n1=%{*|ihh*KJhCb| zw&<75T%eF=$WzOUilTMp>2+o73K`8#+bgsC-?RSP4%W<% zmGFFwZL+{&YoO&D22!vxIFqJ`+3yu}0a!{0fS+%e2_>>kStJT`J|Ctn4#MCa+Vid* z@_C(_AI6J4zwY&_T6PH)!C79%sfq$u>QVuEZYwU}vq7>G?~zyl*SrJ_gZa*h4^-{N z0qr*#fG94}R|4zeqHiEGfcu+PzgKZpOiNakNM4WKTBVVH*y$8l;cA7owEh;A;9a#9IdU}ws4JYF*qF1 zM3M}Vkiy8}*glYD2V@v_o+p_%GqKNWu`?`w9tor+D3+_w7rNKXO{+;&Z5yqivG780 zqLzL<;T|M&UeB!qbkXGMs23yj{0Fq269AQw&~JHpMgqt6L>4kPEq$qM+$Btc)iy`P zDa&x5u>*_36WH_I0bV&D4LqSV&{EoL*~m|I(o3FgHZz3Q6_y<=+$Vv~|45pOlZ`xS zm~*d>@(mo2m}`ueOm9|Z#3gKa?wY#f79OAW#fJvhvM$p>-GQ5JJM)c8D{iZqpPol) zEDym-#r`>lE}4eA4Fw2{Z}hHoL!unYo@%qbN>Uf6p2d$*9JeJ>^fRrUpxfFVo;z$ zlpQ2-%eeYpiJbq@3vKTZ5C}!kIR^K94BYu(o)KS#bxldlNfi1ePYtN+weY(Kv3TFG zqH6BLpVCgllM3v?s9rgsDSK|iyp>6{7;^H7H9?Ns|GG&yg|gLl+^L5-lU{0GJ#cw< zC76wPnXP2acwf-8mYD!}kz!n>AhDXJ;eTna6U8&?)63Nxd3LxEnkmy|saqk&Bo4YrdlLs^%{Sj?hBQ-RP!OX@V@juY{d{kUcJ> z3O3|e3egK<6enYSGU#dgc69em)g-+7 z>%5IERkSxKkZt?C&^4p6VE7wwzz(~-pu$L&Q00{nz!V_#D(b`FVIMDrn(A3tO_O^0 zPP(Su?&?|2{BaX6R*9!21-Hwc>Dh)j7Z8^wY7(c$@LoeMhw@_=pY48L|LW3D;#9^Q z1pb|j)fBp?1}Oe@*Bu#Gak_0QJxLB(M4zv3v+Jpk3Lm_Y*&W2+$#^i{siB9bvzl8g zN+B-RbcfE^QT6C~OI_T`?_~q`h74o*W#6t_>&y-Ov5SztHo;!|Hhn|zqR$;bJ^oJb zTNQ7}MmxS*8KjIe#Zlcbl}=y@%Hw`uw}=zEj5EXHNG~;*rN>XHWR5G=Rj?s&CUiLM zLGtP%Dq^X}93SUeJG7oxUIo5LwtLY_^TxnbEz>I94e;jaisb^<;??~Hf_!x>fW+UG z(tY5GV|9wJ5y87bPhDs8+3W< z`$-@lYN~C28rQG-tvXBBSWp@Bv=^uQ1?GT#-}a282+4EM?uf3l58TN2BRPpwu=jk~ zw!xg_ia%=A*je{$2;Phq*OS%#O~;LgfNW4Bj&0VXh^JHU8>w>cD66#)m3;S z3R8G7pH{JSF{LU;+|#6r694$*W0&L~KeO;3o=&p~uy1ydbF9B{^CgikeAp?#BlFIB zA8=1|AAI8jFlI^bFEB7YuNRG3h0-3)A#M#N)%N--qU>>-7N1&v)rZ6~&It#%Aw79D zKI3P;RCw`Xs^i~%Sw=(rni+yZv{!bjza7T9#rEmJe{joojjyo**k3HjIP$_ntm>ru zy*A}y$m4wt@OG15s(|)9mDc6o*0>Vs#NBX^oUVo@den1gX~mU73i}-<0s7WcUoy@5 z1d%3OiB|)mwSOz~Ha*;*ke&{G029W}QzvPHM|Atc>$5XHOV`d90ct&jq`JNOk zkT8QZ87sMbO12ZC*UeO60c&vykEbNU zT@s|F7(#IYKWclXE1o*@b8GIk(wqI zaTM}rJZm+UhapRum!dd1&!ukhM~&z34|nr5zVprsO!?mb@gY7Unf6S_&&*=AS^)eE zrT3y168na1JWxcbpo?alF19_mwjcFc{}xaihcfEz>IQKSftrYL9&}S?el>|BtnQ?e zEVuMtd^N$e^7@+@xkZ^&vPnZyIATZtM~=>&cOZQt4ha7xEu>G(H8#U zjXd`GU3LK-i+N1Qu-IH1ax+k4DEutbq6#k1T>L#TE6{GNOH76|gO}&^w?@hHZrLJQ z(p+-es{x|bfK&NEA_tEz7H)BrdM>a71PWQ{G z+$+RkxP|ny2G3uz$Eb_i+V3&KosGZ0HoHs*@0S=6UtZsgjrdrZ3;^%TgIo^zR1e%wyLo9v5>Ml(`LhBGqd7!A3TTmk8 zj*u9H?P>gij=O^0PYzBL5_av#gpi`a5M()VJfi&LK^I|8CT7A zMlw~RIP6KMhnzeW_C_*&Xfv_0yH(eLIgS0)x9oWtoHAPwE1cZ)u*Wl-!tGB^-VC2`T2O)YF?!xOb&D7`#8^Nx{>lC~CUGa%tV2%e4hqSF5v0eRjfKmd*SWYK}iy?3l%N-YK#mTqtQS zt>O{{-7+f0`Y?k}Bf6J9#habLj|*&Y912HA$j>}sJZxJAWhP2l<@^K+B9%ce=ZB4p zw1ppQ+H_gw;AR;j@J(nKMQj3?EphKe_B*a;dv)Qik{4nby9OL%<2s&uO&|5^I&%#W z|KTl$6WN499a0!cXA&6nCQU~<%H)F|U)4m2KV0VL|4grF1B5O#)G$%@_KDEsBy2&{ zX=Of3@z>FhW8vzt$U>1f49WjVU0^0qT+Z6C(Cmt}p*ko-98GA>`OhIMglU~GvNUap zu+NjnwJJ9&U>*?Xvr$cDb{nNHF}TFw)BJ{55_Ql>43+J_!C|`L2j7Sk zhLU*>vvg!{jkO_;YU%r$KCRu~$@q00^tl|-mlOx&q#3DDxygO>aVs4A<(<)$1@3+b zMrmw$1$w4p{uX6F<>7O?e3Opu!$3ace^?vtR5l=n*4-EQxcP^0&jg!F%Ak+p!UtDD zIXvuc?Ar#}TOEe4M3gKA&55Kx1Rh-|SeI@(E!Oi07daQV$n`zVih@||&bwqgruFVD zv$cW%{kf4kOIrw;dLE_J%zvDt6$`u?38yD_|2X_vF_Mm-f($bTAx~UKU(RF3ELrD6 z!XvNOjU^PV|LS`Q!9{-5+8izJ(jMKXo$b`bQsG-%hW%fokc_+*ZRF(DU~Jk2wJnc1 zf%TFcUoVA5K%s7Sn@Ajbk}%H7+!!lGoFtt@i{ZnlJ;FjA1uB+~t!~WvH@wCYugi3D zO;%g*bTb+u3_%zEu@Ks246O8=2$e21Jn-uNxafZxH>)Rlb9-F5_XUo}f6YQOaT|UA z(+HeVtIuAdj0rS0h{F)6J7vXJVg+x)+y&Ro_6^EI=|ml;LcCqXvjeLA*?Mq2e|LTH z?7v+sLc+O!iuHR4G(pF#?-s^_}k%&gR=N z@xxK)b0zutDPzV@GUb)$01)JG!Q@rNU3cEFy0g_9t4g&#oN(gdQY9Cm!cpT_9yi`u zCy!9--=w;;e;pL$oo!H}XxfpOw6f)W!$eYq+I@8mvQ#a(N~SZnX&5 za^=bkC0gge7N&q%uvicG@KX_UNOG6zX^%P~8DeG5$cmf6hN zJfGuvim~l~{ozg@eXT4($1HjeRszFO-r?x!?VMgAP^%LIvbbxNl}@DrC>=8Fv=C%3 z3CH?@p!(=og%7i)`!apc=~h0{`UyugxlDlZ=7S4UZPJ>v7FMk5f+4NY?Z7sood7-f zKH#lbr6P*m@=r(aAZW2?nqpinU^pZaaVi|M?pGz>{>hjIFRk|&)Cks??tJpA^W-i& zLuyI~v@TXXgHP+7$WG9UC)Yq2b>IAN73}gkqsQtBD*P=}xa-@SUTs&zo|= zqaoMhYKH|c!v)-!mPtDrcS9N!X3zIDF-;Fa#lyw?r|4NRYsZ27Q*k8{0{y!I*&S0< zoT!A?Y)oE1LldOhU9C~`NVH7erZ|tLRWT^ZbQclQR>l7}xwts269ut(f@$oskM6FPmuX*!{22KsBHUf2Qvsq#?KUS6R9V1XLtNi~- zZ5JcbG1ahdj!jB?wWJ8Y%wH_K;B{qA%AkUS~$H1lSJPGNI$2# zoNIC6#uoboWSp2Ub2Z&>(=&0jn_tM+*iDrPEl^b!ui^_%rf5t}q<8+)%7Iyi5Uw#l zUY_^zjxCV7tG6#i=?njKAZZzEdmu@0)qZo90&Y@sccdx$c{&M( zFho|fM1Rs_P)C3)7c-^2l}E+bo~I-DSI*QT7Si;EhOoZ1v)0AzHuR#@eNty))N9tP z2?L1FD7vD&3_hCim0z+TwpoYKM3IJ3q(S(0G)zMiP^v{7HY9s*<2{_B$6^dTJy+j! z({b9uYdhr_HBdzIf8B;?fo}0X$KSPr2^$YT4bZztbistaIDVMJ^a%6kuhKo<*rVfU zC$;aUYqRgxN6{GAbo6P!TgQ!d$!)(aUvc;wUTW?~Fm>uEHD^&s;W1CB-AE?HX4pc< zP_;}i0B4VOiWhxKR-*1i)Td%YYh-pY`b5>u?WqLW|H{zFe_%sn{C|YPKalczWk!Xv zbZ2(}cm1`A$IA@74lFWhKxHBZTxF?|y4(QC9cTnWx}YE)lNgm4F<;MYKk$|^%C;7Q zAn&hZ2N#>nGS?Y;a1|Uy@DUebQT1Cy*YXqF2Wex`G(C;}uw`F;Jl9lwuiZ z`3>i&ASMi8MMpxzkar`~z|1S~;1&;6fji%L_oNU7)l`B7(R)s~M)H#bS{YBH@v#pD;E z*gZMFT^lJ5mJ+`Mt|DMe1B9?&3WRqD>n@2fX{K0}Qmtrs3FO&KYH|T=*d+8+y+@NT z1k(GELX|0M+e6SZKG zE{OhKtRhV|H(9znf~br!$^Y;<&RnP%`7f>9P^Jvx^>7*s&&lRPJ!l;)O6aqDZ945P zVTd};&yvod|3IeE7-r>K)WA4{FWNg*bfJftME)Hxue@~03Nb*jcvW%Ko6rOUYBuIA z%VbAy_iZ~K!r`6$9{86v z0A{c-w-%HI9B(mBE5eGWTL#{pP?La7Ni)|5qim9*c|st9J5bMi{$hdQB#(Qq$NEJ{40WO4-nz zXLqOVUVAY!T@)uQzR;2MjDC-q}CjYuKk_npgl zGaDL1preThNW6%dTbR;8-4@-?SJt!H!w+h6{+<(?s(z_hkWynDom-NkotwodZxJ(0KCk?5?mZRFG^J9Q zHIdFC)R1*a_YQBFhfFbeGu3rk`k5_1(Vnwg*m?6o=x*ZjaD$8Akavyb?@FZ|u6@g9 z2&dQ00V>ru*CB44EQ@TIj=h3L$qZydQbcA%gL-a?o(5bUD18*|0HFIs$MBR;?9&QDa9%%%@IRsZ z_> zBu~gQh)O=sC#=Y!5e|4tg;MgG_i*b&XjZwa&h6 za$DwpXgBIaJb!H$baN?w7#BUE0@GNk12|C;7`7of2L4`(o`3s zuy>9`6mGXX4_X$q?EQH8Pyg1Ckxr{%;NJPLlU-@#z??;8=lW!T=gk{)o|8@;xby4e z39H^LkN0vzBcVF-h37VhwUtu;zT%4K{agX@*xD4x3ELWI zE|2g3ho8=E*UgnUJFIrEJ`k#4y14tEptk-glD<@Fa;|VOKY8)PX@G6!RH5^rf+H=1 zv?|BBt&f-0_I_lIH)B=%;f#uMS0qeZJj~L;Pnkg5treC0?aVZxkf;x=7lk>*E%0$W z$Ws+1(3NHSJ@nG#MfJ$`d9cRLf5e=Ku`C@qWqM;d0<_H1pav$U!wC+4sLi{__f%96 zC5C!mFuE>t#!|(ESbY@U^Z->#mei-_<6J3}2==reM}$DbjRjyy$j>@`*6ZqRWB*65 z5^#&WTa<>0SpX*f_bMw~)CwjRnez>FVdFvaIb31t@%*M>;2h({WIWbYNynQh+`c7@tB-@7Uj+_gDw21Jt>2ER=cA>`|1Q}7 zX2)1I0omybYjl(ODVa$87~cWO4Ax?d7vlCgF$uTXMNQMB684 z=VscfAevhi_=jJ# za1Jp*kwjGk_rj6wR64z2>udn!MvDK{R0BA{G`I}n>bd^tq= z;q4x0E`##il+ohZ@;z6g3fT&>HN-Z-j9P@nc_VHkzw*8p{}##)+Oik?HfGsYVWcD5 z6y75R-uz{;{-Dr6Q>#_$l7B30dLGv zYRh#DV^+xjIQ22XA?8(YYTEN@^k;!ovXT`q)dAKV|NmeOc1)mBPr$9;fj~xd386{q zebIXZsGsf+`G-v?Oc8iUFJw$pg`3eI0wDOIGNSTsvge~9E)p%6IgdJ-lnRSV%y|!H z%avYJ+Odu`jO_+nzHQF{@Y_U1!OMXLV8CAVe~d*d0F)`G1;mB{7X}x;MDX!?w;w+M z)0`b$iFyPiR3(7T=1lT{+4KaQT@`@4t;D|_#erFn{(SOF$!kB$eTK-PBi@k`sObBc z!q$i=`neAwU=f%pgLPYM@D3t7ovJ(UbkD~)I4Eb#_;z`pb^W_%0m`R{f%6}(L*Rd% zRM-nP$6RD6kpDK*K&ZF+_;8`Wj62E&V{VPH_yc8yl>K)>yQ2_uE5VCUG;Ve=^72{i z9XRl=QAYFQRCsK+B|lk6J+RLF7Wq=={TClE&<@ExdYVWuJbqS%u41&ijKV`SfP4TG zm_OZ+u?`|{M50Z2@t_d$CdgwYEzzPO)JsQr{DgIG#G+5r=Q*%=ORX_Sa}$@OvCgsAc`M;GP#3&wuoR*IfRBBexQ-~je~U__y9&ulyqn&kW8hI{9F zP8Jce#g{=GE%}PddfB#pVEFJpK2_lPMrjWe$TYk@9hU%ii0vJ@$o0XY^H%fdZ?nVG zp^~5O*tY3Z?k6|J9`rHhc3V3vB`L8hLtck&u-khyZw8<>s#ipAUl+0Xj4UQ508I+S z=Z(&5p8IJv9Ak?mE096?uRt9lZk4(nIdd`h#Nj^+9rF%~+mskS`64JUJpL5cpRhIP?rDb1` zpY&-c-zgdsgMrH_X?!`@zL~BbY(+zvx99Jhxd}amDaD!_Zes?+CY&Y4UPCIYUN4ZLk$SX3Nr+2@~Jcn)J^AE+&Om0FEVa zG#7+`H3QUhZ&;~d6Fc@mkXAv1?1AXWA@JSj5HfPXZMjz6mnhK#sC?Eh_>bCvVdpe1 z(`l66H7j<+`N>AU9IM!uZKY3`4-h#_kuKT|M8M@LeizXaLF}p6j?k;u`>0V-l51WJ z2TWRDsC&sZbqFqIob5p$v4!kUegViD!}JdwkD8+6FRS<_P$Hf)TYrUKVSl>`Q5R6$ z<}~{~{a)-!hD9T>9twhs3t|+vzFmWAnx-I~I;bnj6qK?R^v~n>E&YeCTP&6d$xx9W z%KrxmPfp^=ArzX_`eXf+gY4sIRYGhQXekQ6r}N8c0dqf$icVtwr-pUGs*u%&)mP;) z*X3C#!zGbe)hS_T)qt^FEz&HSC9%d(D((ex@D|?zr6%6RLFi1ud?!Goy%O{)VdqyqHN_(= zflQ$P&!cp539;%}&+@qZcFhCyHkp>7mjd|5OO zTN`AP&7Vb`jb!Pvl+F?3K8LwT;9fSxQW_-Wb25E;JIR9c*BHEgvoO&SRhLky%|)X8 zve;c0Un(<~Ls&3P9YN!!P)VPTPZdGumVnR5HfUHo7$HRXHIimw2?<}0^*81q>d9+& z&Ux$g-Dz7pV~a$*K<)XOfb(8vn)7=5{de<;OdY^Z#t>iP*!o_9V5|LA;spRpT%Z5o z$G@HrkGDpS12BKH)f$!bI)L1Y^@{-OgM8@+@Z4h>P-41{Ty6ZqJ*IAL950D`d0+3< zlxbnb^c&{wA^bw`;=BM3KnM;Q|=S z>p;q$juk0S3XzG7`*piw{kKh+)Dpx=61pUkoY6?=$Q8g@-#mOxjV??jX578UHOT#Bh ze%vCgaSr_@y~T-e+Ux@^oN@Lq>c;R1tXl}dK7d62c>jsCTiRUU4j|mJC=Y2M@l?Hl zL*bBunt_c0i=o9`>^UEY`^!YQZctsnLqXMnZVH1L-!g2~ZKpWH{g`cw|FWl+Hg<#JD1$MtI%#_!O8= zhZ~kAd%$Y5`0n|C1CQpJF-Z?QOvQHEJk9R<$qKdK%VgNSqKM!K z&%enUC|pN|Mq~N6uZ&G>x1>&^`C#XgZG-wHp)v89a^nc|2*&W%VfV|L9;W>Om*+>< ziW{^n%Th8QQTMPICAkgkKfGgjT_GG=y-b#3zV=j}8ds7i-AB(k7L}}yaSn!G%Fm9V zgfm-+rr%l$r&=ixBG{7%>9{V;Wsft$-LW3S<08v)_u&qTUN`4Fuj(OiVW;&5q3qng}TZ6U1%HxB* zln#mG!4-}Dj#ryV_rmqOj@&$^(PF$`AF8;L&pD>6TNc+JC)_d5H2J>00&|pUmAc_= z_YuZ5VCQd3mCg@DZ6pRC49t==W^e#+Cosat_Zw2=-GpXQrTsadq-AJjh+B1oHp) z1}TW!jg>T{;|(iO1k?{U*9CBXfCbHayTk-+N&btw>aWKy<&9MWF%*1)eb~7~V%&*M zp!w6^{#_}$(#ixY`LSL+`ExhEtF~_KJ)}L_v+sa>aC`327P0uVHB!e$_{*0-ac1hT zTZQ|x9oOR}&MTEp&D=X8Z>ti~ZdqwDLVMcHG*3`|@Oj}2L!M``5_H{GngQ1zp2XoklDXoTIzm(X(RfSGxd)FKh$ z3w5r|2dLog%*i!U{q5dfVRE)ydfnZ|ByC637K@-(GYR@GC2v1R`f-H*6p^UE$*?Z# zT|Po;Vlsx5Ux6|Gx*Tr8wNOp=>A+N#B0jB>2-t`du@Hi_%9F_!Z$!r9+-{8Kq=dx|5w|!d zEB%OQkI1h2l>^wVTkCKhtwYbt^mbkgaMk+My$U`|O2s@HYD6*a<|*qUhsM?LKgvmZ z{_g>qu_kZVWmCSQ;*7 zH166W`S_qr@*&_rD~O6UW+q;XInlS|=ml)(dde;6$HIZp&2@KZ9LS>&uP4WZ5^J4q z-Cw}(CsnOE->~ah77$~dn~Z_ob;8Dw4FQ;OX+Xwof}pslq3t!(W-<*>A)GME0Rqkv zW~9`7%azE2yz%YOmdgV~t1}55M3Mg)WzKJq{-$J_OzxNKPrUA@nkkwFR`V_csZ=u% z&!;)0UJt!D^NpdNcY6RZY>Ki>UTy3f(~Tc-~dfvM@wkgYipp_qg5bsfvi-mM>bvtvMAl@z2vWQ>U!bZ#?+q&a6P1%#p%GB9FGlll~^Sn7(}^=8IZx+7yT5HO>iUS7dHeO+azLTRD;lNID)!&_)gM(H4~aD1r&qk+kz=E=s!&C+ zd$1@jzx{!Sm2txtm>udx*CyPy05g~GzZ$&%tz3qRyqL5BP{I@6`TES@D~V&1I=eoLtEWz>~{6D87- z>IM306u#ZT-yO4T!sQ0v*cCiq)~$_ua(fSB;7Dar>e;P1r}PR^XahT=y-eJuw@{;^ zt!6m|X>&YQx0bPj?roxr<|b_%&cTR`kS6~l=(8v*Y-k>Ra)2*4u{LX#S_>+H6N6kL zWC4UM)Lgwx7X9!CWfZ&QJn!j1qs{2LK%!VldN}?S|CZnBhgKsr$?|e2+U!CfbGs*M zYl<)N3={1;PbUwh;ZAanyzXSXpHC01R!2&T1JU?+|M`yD9kMk4%a)2JP4b#^TkL;P zf21@_0{r3v-^6{trc?HRy(#TiJBN8L6zX~d&s|*7S`B5nE0*NASrZC9!FS5|;Q%svcwK2xIg*j!|@YRwxfav#{f`)mLCuWk3&6~zMc=C!*Nm(|a- zh3l4_c4aij^JM;QUik%F^!-$0_tW55c6l~)c>!d>DCsjr{mK(BN z$E`Kh&}Y-fr>L10G0jv)eA%r6h!)4wjKlcSumQA*85pGoDj1%ViqqI!PJL$}2?BoDs1@x?qG`Bq;@j-MU^4=XOw zSC1(-?!1q)`xOE4NF8ZSx-90!=1nxNuIHt)>*n(yI|(#vCKaiminPz(`@m^s!>@>_%19p&roZ2EEf)9chP7PZyQzv;cbx68J+7`*;LTmJ3->I2tZvg?F?o*P z>mH9Qnhi`(dwgCelaXU=EPTcUYi-XH9M_b;T~K2<;qiOPz6q53$)~Yu_i2n(+m}0l zE26%q+}0G!c0Fi~afjOQv7Z8L z5cjF|+f$c~-Ocl@HoGUDFmUbm=jyY7GNQWPpJRFguYh$&_GObnvUD9#NNTz2xv#JG zqFd%_wSA?+mtz5mbD`SMu624mOj>8nlhE??dC_xKP7zq2^ik+m<{L)gzmrN#;!lT! zY*3M>30|;*h(O2ASFBr;P!7kt}#+=y_Pg zX&er5*8M%Ng9er{+tBwAC#jMss}bk9P7eH8u7B9zO<#b%lQ zM7Egc@SwF?CQ(6{N0iktC(^KhH_G9DxZrZYiYW!lZ1{n#`2$!<>o044iv>D)Bz8!) zVuYZd$N(+w72dr;bTM<1a{KGCI^Q5T)`mcxv#n33#EgMU8Mu@FoVAm}&z!SRQlRko z)y!NI3MP9Re1eu#+t%IWnDPWG9caBy^kg65*Nae*R7oxtc2>x%vp@RVA7guaqu?5% z#vyh_Cdu}LgQS?64oUNoakJu=?q8{C1e%*5nr#b-2;n_A=; zEv{!)YS*d?1)pc8M5&|RvqA52Y)o-sl7C-oggV@|!!5EEEx?y(hbKTLD>UY*HO@Pw z;ZE14&_oy8N3}{j4IFck+ZPGW z!jU)g4L=3{MZ)|v??ghV&x}T#_H6|Y$h`z!Wj480CN>|DZZx%f4 zkcRWuClVp2AFdB5TusNQ<@rSkR({-R?s(gSZ9Rvnif~4q&@_0`baW9o5O4H8j^{oy z{J-m^19h@EtqE*Oqcd>c2{iUHELP}Y3iIU!FSWvf-3yrBx>sMo&}X0dFi;}@Wg>hU zJwXuQ)Cyg;tI0}}zOCbbLVNy*u3O*MiKj2B{UkFFi^VstzY`QMVQFkG>=;v<(jxf#S8mjGibRI zQG*C4b@49=d!^tc-^*kQ@!lT4q+hMmAC8oeDy0~?Nv;?Vz*^0}HK&{7>>R&;X|Lqo z5|c>23`yH*corV&}M-Knd!_ zQqr~!Ch3KA7_^oa6Gqk~Lzr?X%71Zh zrf9DErp;uT?#ll-QTo{wubxJHG31TGlJSPDYNq4{jmbTkd&R>aqDpPBLy@+SPD!1+ zMgo%vO)9$|wj5Q9!U)>L>QQ(3|GlI=j8-6HO(P=_IadKyVB>uxxkXaOL6v@lUc`}a zS4CVFFpI^eT%oF3sHLGsD7@--|?Ob%BRZ ziI+FiZ9Z6zJ`&~I$uAmyH(1#sT9-GlX$RuHn(A@Fof=RZV@7+QGFp=L7bwEkgLRcB zj0|Gi8cyoPaejFbN#?BR=BC2Sb6IIAD0N?-)yz;JMn42_Qx6Cog+_!7#(ET>@F>O# zKZWU6xhc!F7Z<^y%8Kw^&>Dt<2;pqM@}9JfRa}jdFfC$3n|r-T+1@j zz016@_*gI{L5q-#WZL$Mk%l9Q=^k{Xefsn0dQQ}~mj2?b^fPSIG;yqdAUO;nM4`W0 zZ02$4+bE1Yu7cL%4_)tjqd#t}dK)^Rh=+hnrCoe1br)3sb-_)nM~Cib&7%StHGg}H zG%mB~!`16aoF$sqq?J!p`4RN;)oJ((*627v2DT->?DHlr6q0?EV7$+?S&pht%9}_f zJc^5@$}A%)3Sg0o+#E!#S)M-wE~h_Z_^c}*iIgk6=So!$zyA3-H^0zImSNlQdBqf9 z3ACRpT;6bFs{hSF)&vp*TZeev50YL=z9XG`8Ct(p67#Xh1wgA_h5Mt|l+Glbq!JDJ zj_gDEVEc?Hm_-gI1+8s;)YirWxvMI1r@_f5+^l_%)sPmlVUrY%W0}WUjWSCLqshkd4C+0){cPlJxK+W0jlWNH1l_M0Y*7t##-k_N; ztquA4#M+3hoLi0x6h0&Q4v|D3Qc^(stm{)ub3UgGs*-q z^$GSJm%k-EPOJ^nVq8_{F^-*HD@nsWhPrs|PHosLyvMwLX+e4W;hZ8mjtslQLopNX z9BZ4F$s4QUfvk`b$f-9{cLHw9%o3omu}@rq^@J(;Y^L*+NR2-TByHcvpgMnkz} z-tT>m4cz^+bk$H1oUhxWD=f0Eq}dn0ydRTY&7^{n%p0R#=oRcwP+Qq_?W#nrJ7O8#9Et8y zUpQ#=_eK@s_{B45fRic7tmv)c9!Z6YY7$}UcP{_w_;sJbEZ3hHphBitmQ#5JArZ9P1534^Kg|hh=N*0S{5%Zy8HxwuLm?4u5TU( zG|A1{x#9lQ+)QD8VC0d*E<8L27Wdeb6mTB>KEHE(#p`bEf9}p?gu9w?g+}df36cu* z4v_Zc1&$BoP?@k7H?6|{ZE{x81F?ScLD9KCYNC_0slPlR2o39Z|JssE&*Qd19PX!^ z@-$1#K^#QGgX-6^&Nv_*)mWTnxU6#prN-cbezPI_4~~PNUo<77a4;a!Z`bEQHaxOg zCW*%UI6ZA?-nlMvj~DQdfNc9Uy~F~acXJKog{`r|@A8|&l-JzvH*J zUoT=`Dz4^3C*%QRKz8&WK#uL1tv(g16;QJusrcW%?Z0716s%*j^^`67NVT@tzTzxA z>#^W#6$&Q;Rdl>E%EsTVN&mlq+u>Y+1pzJ^qqh^><^m`l7Pgg z@8wwD2vmQ_ILJy352IOT zp~PtJw7-pus*m8|@#^Aj{VCRnw5bYjZ9h#ewt3LJ%|TwFBs^w6mfbN}7VEGik(+jG z&9|6;8ltO}PYQiKRHozR-Ft>(;Z&af!AYbwwM=fa;5YJZzW8Za+Jt!P6^;crH-Q1n zBjR*`v08CcgR~CQjFCR^S+Gas_2H}1^($~mVqWx{TuPRc=`f430b2T&x47e!m zQ089uz8=UW2^o(1k3Ugf(E09jaO-aal)#S~X}9Dzu45aB!xN_)!(CiVSVT_jYQLtX zh&+`tfu-?Z^>NEj6Y4C_y|hEgszI8%Pjs+Y&zHt7?|Sm%?oehCMAsW);~1St9Ck~o zsWO6c%&dPr1ZQUkA35<7Vk&nGOy8Ft`NjAo82K>-Ur1gJ6msjH3Zxe0K^uJaP}63* zB(9NAslUWuw8K8n;pADehCmMBUJYV^l=Rx>(3!QJVa6NCVLsV&@9ImF4VyMmn``AW z%KG0>z{w~Wm{YI%;gORawkiKz^^@=)rRkOe$12zBPv*W^Lm=&bzQAkVu;JU0?f;3^ zBi-!%^FV~;x#2ZwQL-2j9Lm2=giJ6W-cBUQsNgN$obN^!$N~(fEr}SM-?GTAGVXJd zFO{zTy~V{Ut1C8ID(Oog4IjS@l#T$=?^BY9G1i2vn)RPw%L3pEe|&UEE6m|hn)*Gb zFv*%d+r8v?IQL6k>HR*QTKlgf3QWE!WMFbAkI=n|rJt70d(HD;Q$9`zoXP0<9_#%P z6Ko!gYs4C7=y6|8tJe-LNGc(pnabwl7?gM1~u2^kS)Mi?p~=P;33brlo( zlKwR?tNA8`oB%u_Yi3ig;u(TkL{d9Rhmkc9=KIzqN{eE2-Ln265d1>pOgEOfhwMN} z$WUq$ygb4I^G4gzB}2YFNCm`b!;O4x^8@JuON(sP%bE~cwG}-$BHI0H5J`*)ajEiW zR}DjD?h&oxFA_oC1^T|&AfstY8Ih|93gbt8I4ZHHh2e+@ozM+~26e&6SEOpark!iD zN6vu>ZGCNFFdBp5ppJWSa00TwIamwg=1fEia#)xb54@lLJHggTGDFfr7N<8Jdr{|Y z4g&>NT4e(KD}1Z%p!*<=K~d}NYrnL7$cBL!5sDfhoOv9}rV!O`K6 z*n`~PEhDRZ;!+FMpUwN>(C&}mc5KmA>KVCM7p*r&^|F-$;ZWUp9ekgn>|ADl9Tk2z zQVOl=9F{*0PLso}jlpN@>(s;pGD-f&(3{UVHHte5$6Ez|W3xVC0t z2U$FOd5tp~)&1GmOghF^GGKJ9ijq2-0+)Tw*_L|PY0W=87&IoQRvDY`0E55yPS`N) zTcC+`xwD+h>nhc?w5G~?j+vMN%V?cX^Pk@dh-6eRc z$o34Y*wtgcqLFrxx~tC3^umX1nEgfYb8;FTz{tXAFSLFP(!u$~P&KySOYRozoB!5p zR8O4)X=$1AeBOlz)*=IxkyFVxPHt1#u2gZ!@2nN6w_z)w@70IMUxra07eurqVXq|1 z3tg%B)p!gRPt{1xZuCauZZyxjui|sgyB)B-)mh|O&XjTS9$!OR-c&@Na~+LbxZ5$& zT-~TYj7t4CIXUn~sg1GQip08QEc^WLrv1(%bb=re-}N}r6ir88zHKztr#zcZu#{7v zuka*v8B8PEG7vf} z=^R_T%C|RX?W#(HHhjW|75aV`bP_f1aUhqyAw^f4BguK?il=QhVUdv1d%&L zZek~GgFqF21+;s1>99dT;|sRvnRv5evA1JM5rx1{B0ed6kt&vV&W7y?Mr_pMHMkVg zf`!XL)FjkB+BMm*P9+fK?5P;}XVWjY#_@z_gUu-&*V|;MeR78~#WRqEJi*4yC?}or z4FcvSGn}jizuK*tGO%KJF0ERl=RDyqZc?O1WxDnFAcDvAggeIDF!4ukEyEFA|6^}c zxYG1@c)ej3@~+e%fs~BRJpemUZjda~UjEEtwTu#Vwp(v3iu5>y6Fukp{mK0;aHfNL z?EW`6G{r+(xMTd!hmdxMSI+ttvaaGkmvD>9d|+voG$`zEq)#-9i7nQyr8Vi;*AEUTK_xl0UNJD?4mA-r=t99 zE3Dn=?P&Z!NI^D8@kF2wBi?VB@{QwdQNJ$Grcc;?b-5t{1-7Ou`-5nx> zbf78odc*LWssT&iI0l4;Del7-Qkr^z zk>iHDIB!P1Eu)!1OAP*7oVj)2k+3S9Z#yn{S~91qsUIX*4mshDL^3=doU7UmzRTFu zoc0$WIKRe^ z9cIymAjsBHU=kx;fGY0S?!DWVEpr8=`SH+@Njsr;Nr@yNFWMtw2zem_ZCPjSr&camG0-O+_niUx??Viq-r=GC%t(LFIs z=@(SK%)a?4_EMzU@`C=oG+-C^+s0mpNU~_xM%i*|-KdD<)Oz*HYCzmaG=J?E$I8`! z9Nn9(tTRE52>+7)B+CWNdSy^u=LDq0 zjt{6}M8rDn`3zju+z)H{a!LKivC-aGvM5-qQP(Bf$T~Hv#^A9Qej`c2P?zw4Aq>R; zxO*P->dY-Wm~EN9*Hwm`G_babe}FY|9mKb2CVs&Cy6vF=NXZcL{p!y9L#qk?8z_<^ zlTGwGP*I)WUn*~71s84g+DxakMG;O+oo_c;bs7LI@s8Se+&%Tjh$;p-ut=_Hp|fNk z+9>5#gm<2eqTqQU<>yzZyvFqf~c-W>{>R%35OLt*8Q4R`n*mB_i89z z)2tk?KMf|kV{O_15mfs1ryAN5454u+8C1kFZjKwnUF1G)AV` zS1T}&+&|TflAGU1UKHSf1UAZ|A2HP4Rn-RA-WMM7&EvC}U<&EuljgnQfp2$~i4Nu+ z1cf7FB1y{97NZ}I(&eHa0;fdOIxI!$R_6FoUdET`9{6rafqR-LIcB;x>c}`7)5)j= z2URK*%>DfG)2`*R7X=)`GJ7O@B}g!EhFZ>5u71y$7vvZ5m$GI!MLV`~gS4S*{!o@C zUH4NFXf4&IxY61J&|dx@^1fGI&v!}a7GjZtwdVuQ3l-nJJT(5WlJ6LP(Tq&QC~f&y zlNjQdTyrpAugX)nV&EFK&wq2aDH)n)eD8JDJV{|^tp*b!^6DLDtp5#PU71{Ny269` zRR&t&s_!nnA?tZh;O+84r}>LZ;RT4nv9A~Hx%i8M46qId02_;gF{`M)8%I-?%N0Hl z9XkTP3i1MAVPzfKFTzEgzunGyEhPQ*RlH`21oP&c;C{sHuK zsR$zX^twdd(iTa$xuWNGMaQ#UpA1!>t_-=p#0<3VN5nU{qx3kS)R!2^Hwm#T*t8R3 zw_CEe#-KM`>0$j1a3R!$lnvd5)t4WwH-IvYh0-;-Mtr}^s10#a&)56AiVGkO87OZg zIHr@~d9(a|!ZK)>L82Wl|B?!m=Gx;8I>GTtIGkWDBRPo9uv?0CH{uKKYojw}>dQpY z+$-ZTLy#d5z;-`=TTcpZmj9-2_Ey;-k(qc(RyAY!QvD~TH6aPl&Wz#@=b_5oXcaq3 zH^%B1me%1&93`;-to5N_$%r^Y#xz^C72(iimyhjT`47zkt+XNsXy!xwd(-nxu-O#x zeqk&`au7S40Xc5cB^G0#Ln89Dqqgv;_6u}6Je0nG@FrhmNUChN@0%rc@u^_=r;=He z|47gzsB$wRi?#Pb9#ciF3iINqb#EK<@Dq3xOzjee$U}qeCzI;b_^h@*A>=Wnnqe>^ zz?~)^Gk0@Fvc<##oACm9-EpTc@EWV};gcQ33H8Iz+Qh<^1x=4?f81N(Zg&U@$sJn$vU?!>Ejab#H!{Gb4r*GbkA_!`_v_wQ? zz06_+)hhOk+nW%lSWv{_qkpNAt5ce@qXn<^#KZXl5bnD$Ef~k>crI-ikhf{8`7kaj zlwz51GS~gxCFPic%w2iw@LuUVJv}gy6OoGwDW0Dgoh!)*16T)zitbAUP6qY}JMO&U zROSYjdBK?=k!WZJqT-Pu{(fGqNZRffNo>0209PT}Z@reH0+_ZSVXe}C%mGrUEDZ%c z+HKmqZ;fovvoF}SsiTaNtWDF;Wi2Kn*kV!!t%zPuwviPmfI*DxM6?wxU^{zgXBJ^X z{Jf*aKO>#fUe^tv%#!eW%-zGj<~FOcVoeiw)ikXwXXV+ooqYc@u@124Fb5Sfe%qto z5@}((yPHFdi$u5ogwL&wApDaQAhvvn z(EB-RTJi$alrt(`EYIqlVFW#W-Zf43-wI+Epet-04(LwVCiW?#{ z7GgCuVM(87tOjgwR+ep(VTUc^sj~h`3AnAu+d;(ivM*VO=0(Bek-FUx7XD9j99uAVAYpNSf+OUK#g56Ha#F?0Z9_tj;{@WndNX35*+ z0?qr-$6Qo$#|KBm8O545O$kSQMq}Z+thQ=*lx@F|7VTD?UtiTeP{B|QOR3H|2FdU! zYRdOe2_&65Q7(utK(ciCV>{rSJz%cjDcc0P|3rA*-+khOoi53Ldo;4}9!O9X5kZrd z68J{*=_@r3x(@x}bw?#}?2S&lmS%DjBE?4gzfNUvzMd(R&lbUQ&bGTt#O15Nbya>V z2@PWcE-6lVIwRai|N0p|D!|QLo19X0(LcLQy_3ut*KQg*Yp0itEq1I{@9ayX@5;L5 z%c|Nllf**)p)E8THyldNzn>p6oz7~amm@!<6>$L^DvLx>jo@bG#xy)Ts$TR#gmBa& zIjz8GC*a}7Quxs0gTF!C7xP;3FvaS08q|P}_vYm$;3e%ZsdLQvgGcYoIIu{+)d`4Q z7Cg$2?OagLk8A8~Kg%No#kPYTr&!bH=?8diqd)gkgRmkfEe8Mv53y(Tod@^pL0UM5r_#>>SI<*vNirz0@*w z&TdYGXO7RxQp(I(xDkKcA3Jpo3nVc6{cx7KC2AOgi!G9O&Pshmx+4}BmLA9XP~03% z3by-K82zXUhZ`ndo{XAgSCEL1s*@{PocJ*umtZWo`7wsvBeM{mBSObuz)C zNtxCGV}oJ^&6-126B}d*p$@DUYFd+k(Ng76DN+TEl@sM-r#?L}CgJGYzD@x_ZKg%z z`O1e$i)M5)WdzS2kpZaFiy%J9(VdyP8dqVMDjZ4npUA{@yMe5q?Xw%8>7F!g)Hb^* ziNFRd4jzv^(Arj1irG;Jm$|22LiuN0x?Z5GO(WFaXySO#^@2QNR!?9#f0J}Q^T#DJ zsn}vvskRZvin^MrUm!aZDxMVwBlVFX_EHVsQ67#OfX%$lGa%vC>7`z}9 zjrJa9o-NR!Ixn-j4iYfIDaOZrBZnw>qnlTkGF2-=y=zes#9WTB9KG5wS`uG&PdYl7 ziT1R57sd_}E7x!9rjPI7cSgY?+t|{mloZN@KDr!a*JL@Uagk%QG=~LE5$u!m;GAmJ zq%;ow4WHHBuvkjZ5DTu>reD)Mo$FfHv`fI9JekX&{$r)KLIoOIUZ(DaI`FhqH~Mu& zgj!!GAZl?otKl;zSv$%wg6bQt|7`lM*masTkC0Uky<@w$!_8F$Y|o{ibhxcszjX$7 zc%{z4aH>n?q=4%_@T;Z{+-AEuBpba`JJJk#>4cJJ+Hd^e6Kl>@;$2;MdEO}Z)p>O>XQ^rH55Fa_%rkE@-yvI)sMNS z{5{ABqZ^@;5+1?(O`In*mq5N~@dVZ3I%rop78QCjg=Has%2{j%$B!PZ0%j?SuE%Lb$&IncaR5CKeq6RDJJ=bPgw zvpe`ljqesXuNYuDY()mL$|U@|s;a+Rd;e~+n2|69C)(eQ8n$Qr1Tr`d8-6>BMZtNd zZtC=>&D#ouV31{33xg=QmG4e-dsRWMp z@43=S%+h(+>zs$sN12x&Gvc_Mg^dr2Kc(;(?OU9S$uCf6%9RppX(@LnsGbmUaY_(4 zhyE28%dGw%!{WQ%N{Lhn(U5R=XB!`5>A}+2=}(3ad8NKFi3F(o)!&?3qoA?uYBGj#-CFTXWrDysy8R znDArzLf7p+&BGFts)kvUD;J}jieq&H++;#59GENS+!L0s^AUIOipWY&S`$IWMhZ#d zUOhwVG0j%7S~wiZvH}P}2kHP(W`$i*@XK1j9)A<|QxU&+U%mWObQ)!$$7nIa(;nts z_GEy4)%ctSwAY1^EHsSu6VG9W_RLd+=yTDp)w+bqSpgLGQ>+S=Qmmbb)Hy@_zOsMw zHW>tXE!|mGRoC|uD%(egYswv+4Qfr*Kq^ULzMG!8BZ&+Ay>@*Higg{MvAI$cIN_PdRx*=pK9#e~9ir|H_4bB6@yUDO}${80*MMWNVW0e#n9(1|UqNs60f=z-BmxSi}n zD>XhsQS~z6d1-f<&RJGFeAF{KI=8;_wV}DXZWW~umm^Hhx8i!RcqO) z?Sy~k^7o2E85|l9OOE5}Ma3Bza9zc`ca#WP{Ic||`J*wr!Hous#svo#7>stG> zFzXYvW)*N!>N8r?RNMyKVtxv~JX+y0`s+D7U60wLfk;y-eyQHe=k!J9WQqpmvW| z5_m$XL2#SrzTrdR6iCV69n^XmQKe#+sup%?VQXfdJm@Mf&s!;rA&bW{SL9)Ty2^WD zz=d@$6_GV4L0FywMce-~8R#Cf+Sra4m<2hQ9L|)vrFEsQuHjG>jPIpT$)zXE>$%9E zMX*=dkr?%tq#5WCfEy~inZ5YWEJVd<;1q*ICHCN)2c`Uo84+bLzhfI^1XW9J1Bvbr z@N_*HB6)b=F;B}Q*BqNBTBr9uJL#^oiIOD^&$mMycJ~nIdehpJgc_TQHFlInfZH#! zHMiK(^DrA!cbIRFUW45r+-`bp%<5w`K_lrf`D(=p!H4V>*C}}1(j$jU>cP9<3u(0T8iS^KB`;yz%9jD^uO z@}@1*Fa0uZnMN2)3)vz^<4pXG(voZaI|e_G@ObC{AINiJcjzHO-3B=b0}a|ESXl8jK%JHsoab`KxO>aWBJjj>uJW`=Q!7X`fmN% zJ^s_KhkQ{yUe0gj4_$U$4@CI14?hAklN5L^iXj_yg}ZhgW|SZeJR`DaclT5kUEX1!~WGD`xxI6A1U0y3K1- z`(0(NyOtm6;gg3*IRl_{?`2@hfW=PCz2<4c^E=(-xZqn40%0+(foBhC<&v%i5R{Vw zoG51ZUl1%uIc}^2LEvRm783gAsSTB(_C;QEc36S%sYG0MWXCsBpJ;Xxooj=d-xioD z(0r#lX&oI$$glPysXl|d5lqOP#l`6hRrXvhW}`3*FX^r$HkK^qMuJNDK+rwt#w`gO zFzOzU41ZrO@V;bzUvf9q(PZ7KCwR+uPJOR@nq=VXRM>s+RN=qT4FL=>NMdD>m|pg` z`u;7m9kSe4y_VAfPVXFXT;dOp@ce3}J0KyaOl_xhA(d2qM_Ib1W#9m|lW!}v6_Pi^ zmlkWwg7Y3_?O#ssRywRQO46kNhSoded2PG=fT)0AnMJNQRZa7^c_*m_Ub_DCza28E z4tk{g=S>eYqG$r?624DIRuz4u7YePSV$cX~=2rloA-_+tzQ;QGi8X4~ zi**&b-js&)x1J4`5h!BN7q(wj=>t($EKDpY>GhxSQM`#uRwq(#^1^p~W0WxO<9RlF z%>=e^OO%vyTO|D4x>T_Yjh;Oh|F&`;5U$@!=%{!z{I6h6W*G=GX6wu6)oo%wP00PY zGURdk+T-fbU15QQ!A(cD{cMp2D~$>z!L0EQc1ZBB!$(#Zk>n4U%3*`?;5;CL&AtEn~jXh z5q2*LM=qVFg24HyTm+{gqVPmL3(dc5S$v?OGiOUt2w#xJ%j8cBEQnLLJ;fRmK*|wuBA0L?xT3 z|g6f)9L?N`JAdC#QxI}U8;k!G3G8-w>}~b!DmC;X4k`|mO>f(5`<0;g4_L&8-TRq zf2d_WOQ3fMNsHq-ET8QP7@9W(M!@S=)$or@xT(#OwFh`$plU;^L7ttJ3`gHpSu#=A zb2E${&g<`l#exk1jX3+ZuRIWO5HYgDbCI{qyFu|5sPO26QcWXf`F(n42OyQ6NAGr> zPG>-ao{Upfr6I&&$S_cl2An2#njtE?wV<2pH8WQT(AyUDJ$zB%P(4M&mWX-9|{fYbnZ) zcpg0oe&~R0$Y%R&+~&~UXAHvIv&J8FUPJ3&6nI#3{7ze})(kq-h}(aDxxcqz!z)p= zH+0IYHd&_i)8#tONtiG+y;XHRRoQ*6dMS;37|{$P@NG3OdBJ0!PG$2N`(|7a{UIVL zI!f=*!+)zSQ8Jw;&*w+|Z1-wS5WRz!04ni^=FjiT*IWGOe?e^o57riDjBO?!k*JEA zk9h<>^&`F5JG35@LJmr^sJP&>v4C8~z-d{=5oPKvRAf{73sp(Lf#1OCiCJbFMBHe!x7y`QZzzOQ*7K^INpDa}=Z%C0Ox4CHQ$ZUhL-dv8%af5VY? z3!*4dvP=c4OETrC21=*EkXeJ$hC-up6=ddBSA2KTa&*nL1%jk!@900Af& zWp0sU9x72i6YzsrKCVI%de7e)cz>OD;Q-1qI_dPCc2lfEt{1Kg&u1DHwl!g|L?Koh zbzVLtM|B-c*9RR&-o{(V2f~b>UMk6)#T`Ti_V6Sb4yl|_C*3i>noC}DYP`r~I{s`t zn7Hig7rdO3B$fx-D$RLJXA7(X^(t(W-IOh5u32xVWbw7OUatuF;mDu~K#g?75yqmK zpHxZ16%_>UF3@HYfQXau;gCG{Iz^9hCP`~yz}#;F)!0<0xcj0C463FoSNrYa1fp;m z)tU_6tJlm^?iq7{+KVmLfp`il9yNc4-w}ah54{O*hcd}-|<4;ESI89ZL zO2>Mqluduf+>X&Xw_9Q_xosCHG{i`cr;@*TDCdw2G0dAvk6dG&ttW#Ki*uvBcS=yX zYZZ^~ye5k!^~C*e`@q(}R@Naf2R80|TH2=-fSx2T<}$f-P?ndQ?)J;z^gH7W;1I|< z1?p3#Z!Ig*TQ=@R%>~{oLrii8PNH~-vQ>aYgKLQVc9+_$KS4>w&15fOb#^;nIWDCje1N@2P|V8ZJ=M60-ZYInk@LcW{J#O()CJq2@8nJL>uc6F zxb7GvLrMNG9cVkUo}TyT+wo?uD9}#)Uu|b4n&PP}OM9 z#fRR!N9{7=3N1C((a+$oyg%dSCo@_T1bqxuA4@F)W5@>U*y`ihj+v z%Sz{#WqSSmfwS$wr$N=k6^FKt#7CM<@X*S;1Ov=$cl!{GOsxP&f*8hBuOgbpq;{k)%ZVs)vk=YPk{HjnN)?#lV8I~gvLiHxA|d|*|bJsx+zx#xdHvSpQ7N?p$E1M z{A@gdTe=~KkXo@TOn-EQaP^T*v4LAc&nHUhG8lw~_RUvSsK4=3$`thVb3}eWi}__# zqP{LGF}M^K6EG9AU-m4DZB!f{JkV#{FfajOsSilYFfa(;zo0Vbh@cl=rLG`%(xjuaH&0`PR^r?;@o4DXDy!QmN&UIdD z{yTiz_mcs;^`$Yr?;UOONzp-(nU=AmEGSlf&^&U@N8vtgF3Zfzk43yS!)G;_YUPKYfV$8>20wce)UWps6r7#Uy4Xn z^PR?j`ej{JGaVoBXtH3;NbHJ5tpNQ@53(LkAU(N!5Q_GX|FHELiffn92E5hy=Q}JegYu;KTl`2fREXm;iLRgc}VbmV|+xrZw-X zA`}e#&)<{w!O9aJ<~fIAM(kP0;;c#$`6i8U_2cg zP&_Hw0;Pd)pz)fN5<3xD2zht2wM^|VAGORNX1xT%B4+m@OXP>ac4L?%bMpbhhn9@{ z@{`oxQz5~)q|U6N`#4v;!jVqqYUnzWpBW)3(iAz@;EJ2MIrckN%A zDM%3WPcty52yUZ^-NF>$V#tX?gyqz!{Prm;e+JXvTNr+UH|u`|@1~rR4k-wLq6pr{ zSIxQ3A@Pg-N>Ci#m)NJ+!L?iPb8Mp=zE5L_p%!bcrCU~;;xWl#?!+ej-54q?WrZz-)-RxAqh`8avI;Q9W>Dil@&lw`IAamwdVJvgXR8 zKF%^J=K`-^ICqS@@KObbTIp)oxrBvFY3)hGmJ7bG?E3OjQ zw?%^f1C1=u;@KD}VsLcOtlc$#V+d}zM!L*;&j|%P=5<%1+8c*So3$sx+A&wL0njk^ zoj4aXP|I^B;Z2*7xP71_YTpv7jDyfI{=6B9VYBWD#~3#nN}j|=@Qh4K`L~f(M}Dmj zh_{lGBIdf6;(ep3Vp&F?;9j97)X9x%;vWgYF81->gVoi`vAWXdCM~H+1aBi}X+!*L z_1_%}mIPo$BQ=tv#m?XoviC@b8zSFX2FF>V#sM`>H(i#jPoE#^f<-zpYJ zBZ&yu*@?esWZEcI{W~}{CP=613&aPgVLU`-2YX{KI1BW|uT{&QcIH@DZQW|W zW!&w;`2x;wm-xy;GLfin0iZ}W34_-&(#zU}?XWwn0#bJ3e+sG_%lX*17IZ?1$=+>6 zeZdMLiMBz=?zg;X>Uy%vGr?n}`=!(CX+#y)zt2j7u?^6!A7R{Kti>qF+Gz(7Uz?bG zk>|Sj7>?gS2busZeLmkq{qROO9ynO&m9g7As^lZ$2sl+^3&+PwQ^SYG<{hLB?M#*C ztB_o0lH!iOn_BddFJvetiUK1)P`kdasw#a#MH2!uk^#dsGILa5GOK}^l6E`)2X2BI zU}&pe{@5o8rjjFw`KJ8Iyi!a6l&xG}+Zhfc5mzID~w~~MFhSUog=hU;gEMKkdD?DC4U)# z;%UJnyXHMEs(zOBmknb6{5<(q^Z@&)q=v*Zfsa*0PcxFTZ+b5q{tml7%D|zV8hLK$ zWMK^=q!H^*O4ks-@h-WgEp|dpv%)vB?pHZ5>gA*hv8!ZR9tB5!1MspFMU~@|h{&H@ z0A8D-uCmwi_JT$zF_$}ef|P=7Z7OPHNnipnwe8oZqdCdOdywYgGjGF z{8cd6Y=q9DV9Bl!Fv>X+;HP|C3Dc`x49L7IHgHNJ;bF$KI`z5@5-Y!fXzB<;^cr9@ zMUFz^)WYKhj4Fz@^eb3)9(Lige}2yO7S~_i#m%;g=AuOmfz52cAWE_fn8dk;8G%ar z(z*mB5l_ zy#Xb=vCpDeWFyK)4D4vfyigdk@SuO)7-*r`Vs_%YR`KAXmKyPWV}1C58cMw(XR=`2 zQ)))ka6JRRF}neH8vjUc)AZ z!1T$E2Bx;wf|b&uHxL6+Nn&C8vteFX!rIUE)LcJb!{ZS`CWu0#Iz5ZKhST_l?t)Xj z*%<|YGbErP2KrUAM7mtAq7ccWtJ%|QLvlK&!5_3V37Gn_Qc-PwW4JK&r*QUO(xFnPBBVM$gjxd4ZYUd)sFUmj7RS~}di+;j zv#>U5^*{f$<1x_1MDV?G#tWCMz|yk#6rOaGVVNuICN_;MrF+9vi_*Zf+ZS*jm+Q5mwP|9Tc3MZ|T`L>OPMwue5xE z-^goy;FsGGL51nHoMe(L19f&#cHY@C~JexiRCBnH~C~ zI=(o9KOwx~Zgs9P-aBgE`EX;EkKL);i_oK{7`On^T8J5k%O#qUF++td^{-HF+StaL!k@Jn2WN<>6l8n9 zmuAK4Yc$>Gfja&wnCRN4>#F@R)7-RBL#$=e=%$8x4rQA zx4xWfIJo9>7pUq>QL&Y;w?3O$R-DvY=-4!p<*fhCHG+6M*!XVbhtg_n?__-jITtOCS4u?v9B=)dJBiv%8915=DRKU}ivEIocH z7a#7Awaze0l|J@KQq{e7?Dzx@cMj#ZsdR}qQyUncv-nX1MCO3rDF59Q39ijiA*|+M zB=_!lB}^~YLpvIqz|3`iJoMwB9=^~o=QR1e*-0Ci%;czn_>;X6{l`e`v}A6b^2is# zvIe-u_=QcryEAkWYXz^1grE{TNz6vbb+Bm23~-k&S!$|b;Un4tzK>L4%xq9;Lx#My zNw`;>Y4K$F!UC}T4@%IyN23{qmjY!GS1y%=Z2|0|jD@bB>^G$M>lA|C;p4se`LR$= zLblfG{XCbEMDP>ZPa#D?tA<$QDWzV+{jV~emm5iE0gaCVKTwTNI5T@enXX*^Uq5_Z zZt~`eqSHUIC^`nZi^K?=JB(&}ayZ z8l7u#qp4OZDg-;i-Z+liH_QFtK0JBX4hIpfvBgIXuidKfZqFH3%Pb`qT3scf-SHU| znoUOp9l!q(`NzEFJTmMQqV4O78W2Nz-)Av-IMYr^VD89?$|O2F+JJH%_pCV@fy##= z&uis$UJ9O?Sbh1FCv<#Al z^9ld%N-SjHfRV--TQsEe21Y^88t;@1q)ycFA#fCCs;4%%lg^=cDYydRaEtMKMk+to z<49rWU6?P3M+I))nTh8qUekCO@ecy_C30@1WR2rpQ1Evp?7OMu4FWsVyGjSRSBLYv zh=|caEf3%)T33_19>Z7jc`L>K0}P7$aMormFG}0J3D?7;{B5r4M}i;TojAuk+a#!& zf$|OAf}VFLSP1ex00Gq*js7MtxNyRFF_APEGlCUAX%-gmsOwBLQCBG{lijJ__a3q4 z5TLaE$2p3E$MNCUMYm{$u!cmeh}PWjw58?b$VYyfR!bMM+<}==D<1u-pW2S6{KC+g zx+z(+loZWO@`p!pdkzGwBV)%29mgK{u3`C{s+B*Pg-nkRjW1)#Z||8~9S?`n?K75V zK}SXQ|C!=)t@u!A8l7Onz#^$Bmold!e8wqLYFiR61%-nKV5bo~QM#pPoKj$g%+GDr z=*f5whOa;G4sv}IgBvdv5Sk_5E8Qlrq;D$irKEF6M>J)6juydu^%-tZvXjjYB}2ii z-aqI!685N4_cgspi!;$B9NbJNs2dt%dWR>u|CwVPYogJ2p?l@MM=4?&X04 zH1=5)uBI|h6%wYJDT1H(o<$<_2)rbJY4eV22~t@2T5Y@Ugq3LFA9RF~!ff|2{F7+~IXjNvm1OJZJY9I!5gXbl`vN5h3k3Pcm*_yn60SGV6SI^!Yb)5mmc{ z^Uy5K*@X(b>rX8;upTZSx4|#v$6oAJzROjT>9$;}ruu4rejVJb)%M)Nc#Dxaet;A= zFb#sEZk#DogrmcmL~pJ#70QF5HM<~;)DG%SHVDvvDwb}Y7QFOzS#7{Cc}lZB1i0*t}z_ZG%}*39or};XL4amB*tV#LGDHO;xOon5Km!MFz*k4{6$Ttn)<) zZf}dwI7JuBS%!alfE}vXcvn>Su4DjqE}&br{I*~dK*Do6C`lXF*bWJNizfK^m4yJ0 zqGa6iX0PO`MVU!y_6dPuh;f#Z{k4!pkXE1tNIIW^CFv+IS*^G|Fi#-Z8kJL>oA*meD7&5zeZd(nP!Xa!B+}WR___yWHv*yG5IVR~8LpPT`>eTB z^$NF=0qlr|8UR#I-cNXRH_$=!QM3uL>_6=X@@T&tHa?&`Z{kU3aD>Vd;83T6pNb=e zqZN8Ngd#|D2$0nN(QpFt7ww=E==l^K>=EXtr*m$H4GR=?8DXQ!843*SW+_Z)mP2kqZ z!SP2m%)>##Uk8-y|F7n6x2-?bipn%BQT?Ok7jcjH0?^(W1VY;c1J6x(M6uhY3IEi2 zEt6QtZmKSoh)Pr1XO0MC;a1@9%ur?77J_i{KPUg4q}{wGw*#P?Yu)vY?xGI>lfMjr zxet;yz8=ltzACP66npRhG@qM@^VMei-M<`Gt@zNr=ZpA&Y@o4`A9v~n;QcdxDf!DP zc!HJ*UUS|94S4$R*Z#)cuc->(8Ko0>ZUixnVw|M^*!0JEll^rm;OWSap6_qxY8&`i zGt{;>+R$kLo5|tqSq;y(N8ycLJQk)9*wJzkAi7Pf2URbjk>xwH#xd!rzO$SLsexf0 z38x8PyvTF>)>;mLW@owhm+cGa4@q#b*SofM$q9q+`#;(MThENwUTzwI2o*#q0ey;C z!5%@qgV(|i{N5J7+`9}D^Daq0PiR6WxvyWSb01C|VQ1t<#NtK~lr?y%IjN3gse0;< zb181}?fXvOuU8-k(>~0EIIb#ag|E47VPu%sruV*DOC{d&E}rWcX}&|6RHudP8@dlh zNX>4Co|D28hEkF4g$@BK@?0}YuT%T9`Bb4P#U++D=P!f9WRRfpdH#_xWsMNQ;$T+caQUY?(b1m5$Yqyar~<2PrkE)#BZMR7bMK~zA9 z{E@@=_B^y+f^2yW2JyX-z+SeC0+}U3xMb(h$uBVbEv!Eaiy<;y^;Ds=!~)#?2p(&a zMl?J$=gfwLVw@{qshg$&Jy03Fl%)Jt>3#y&@G_04v%}Q(6$@_c4`@NkXE!%}l_s?` zDjVDt!N&_UtDWJKnf##ZEjUFZn{G8Dn^*}L0;^C;?Cm4__{r+$73``6<}xyUN#29( zSke)6o7%yFLo7RZ)abmo=ycg6~G|QTY8my(hUDiW3cTPy zuLKq{LljLnrRI%@RBJMxk^MA}@*c#xR&o5oFSeq-y6W2IRRoMP?wIK9=u9d%+&yVh zaTdS7G%0;0j|jdO#`g(vZ46S~pIIlF6OEIy#EsNxjuGZ;!`yu9_4OM*o`3GY948mf z^T0)YuThGpuiN;)bxVTVne&giog(gMX;|cQ{$L7W zv#cccqaZZk6pu1@@%;kqL{@+MAplpwXA258w3yF85y-6l7|0O?^iNkrModo#BK5Nt z7A)FouGH~B-(dP&mS1m~WIS;xccQ@JF;;gwpdj4G`v&@5AHyndfvzC9MIoUUaO~js ztd@ADrZuXKJ##v36~Him#oIm9g$w_0Ss+7OW2Z^7@w=Fmw6{KRHIQi;E`VomY968u zHW!t`GdeF-98)oZ7@-&|vHJ8Hh@Th5;^}TEn=_>Qc>}f0KSTW4W)iV(!$RAEXb;a{ z1jO$fAkqBr(Wf6n(z8!x`8uuZky0~j^Qi*eKS;sTVKx{MP70X?jwY!3J6>T4Xcbf$ zs2x}wR|u!)jVS^ak~5G5nlm*B>JB$r?TBJJ|gKUCR%T3syL_hr|72FVl4tvZ#;c}m*(BnKoP-wPldxOPZ=x@MF;%`LXiOLvrTt~b= zg3t^-1|i)%-4s*`!#RM~TK&Rs@uQoz-idX6k@qd|rUyDq-8?y$92A4Y$$pwp9RbF2P@3W@Aoql@=)8Rz5{3B0#JV=Oi?k{SxZg< z!&yN{n)KEFXxgu8SF&j&RU4BM&=aI5fNl_-MUJaNhmkx2VOqixR!6i?l+0XcIE?dp ze@`ox%eHGf&dTvgD5ZQIfF}ENz_nWk&kqx=M=IntbEJXvYnSyQck_Iev_tmfiy;DgE5N`5=)~~XZ2+#+p$gLEIE;D#TLL~f9(vr z=A=MDh{-!_BxIUIJTk7txMmsO~ za}TF6B#&&F9zNT_GWQZ z`87I2!EYKR86x|8u?tJsavt{P(fCobS7jv;N`(R zsNSk-QNm;_>uQXCv@8!{_#@jpkTz;iDl_9;#g`+3)vA~379+!GM6;#NoAEZ%h{_T6 z!CI+rRE&nzIwxIjs=orPHcvd(rEdgD$`I6>eCZylWnK`@AYCob@^s71np|P7*eB<0 zhMHXM)$bcM#SJFwOjd`=XA;0Ng+*MKA=KlhQ*?TL-+V3QBBztT`Huexwn?4fW+0SF z_R#2cG^YrfdR@vd+gH@Up?gWod2WT;els--y?A_@8Me9dmeet z8pZCvPGBDRjr=A|C}nK+y#MN<7ckpq*lOF$o@c)J`s1yTKriPM3PAFqgV&aYa!tq_%8-iEx zPOKOkZ9Qx_PIg&~g7;q8Sg|B&LE@{hPl{2i!NSaA=8phq_8V)s4Sr;h;-B}%?h%0? zirY=|!OXWfVJ5D^5yn$GBI|fow!~4SX;=sZ&W<*Hx1TNouDvt9pDNbAi1U9l%?JH} zjS-1eN7vinfRhrH#(KqqYB@GRixQeloJP5z&Q4tHi2cTKRPbuS;lC^Z0(eVkteQXo z=ujl;(uoJQ+I= zqtv^vXq}F6{@s$bLc=4MZYLcwG2Oqk)8M6WI=x+SC=Bm|_p``*iM{CuBh z&Hs0?W`=Xly!YO(o${Fe=|8|!Gg61=S+c#=rB});y>O<3sRT$dHMfIxdKx=VSA^&m zS%saiY6m5dUGExhPe!Ey#iRfz=WaJWm(Dg%#W%gXmvu28k(@ss%8QjhulQ{jJQMhG zw!9zHBlBTwToc~Weyd4V8?3(7B2m0Zt^#9r?9Ru1c@Ly1y+!-EN62M3VA|YHh4ThxXgds8jffYZUZ>@CPn- z{?v1k#f%{z69%t?1V+356I1vtiJ z^ytPPYd+T(gi#JtB4P8&ws})CmIE&r$7>OVgGsJPyUVJUV?VPv8XY3aiR3|-L{>0Y zBA+W9QIs1_Gb~TL-G@>x)|M@aT;_}3Y)neg;ha&1i!a=A@*y`*Lx@E0~CB-*ijR^R{GmeXusyF8$8(vyYO~orAH;v> zvBs`0w%e|R{mWXw~JVOs?SsDM}`>y4?$ zv*ZRhyf$(Aorubqrv#nBGR?&TzTz%7ih!K57= zc#U1e4a8}ppdv-gE*nYVg_v7sHK`l^JRsRC{qmUES)La9?4fyY#$AM9>m~U5h~H@j zKm2rdxr9TKW_UuBxPKlFPSCAaadBCvatVC{R^#c3abSwDe?fjC%{3Z{Md>uiG~L+O z+Wo>y+x~juij-$hO5!c^b2%?!|9juJ6PTwr4{m!&&Jrf@i+>$0;6qH~=2E@NnNpp8 zirA&za60fk;%EieRz~yF@KBX?m2)=s{PcKJ>@jPPqVL_t=VM7>(EW?Z#pafLW-{MU z8?5+rWkDzMvhHSgW`0s~g?8cxi4!P+JgkB4^c%c7wg#^377hpJ?N*ULejg%K!V(3m z8|dmh0{=^SCfQ)$=e6z!pM|ps!o0=1-R6lct2tw@^)zT*+Z6ftsc*W}?w8}?fjNtzGI4cT4OK>5L#!BJ=p=BR3 zLkH37XS2oXvy%qg9kdE(f^MQR&NF#xII%6s_vOeOKrko^E<_)jlGbIqobI1(tFG+E{60am2S~H30U@Apq4e?i|id$<585uSz#fxsG3=Z-VB_+7R z2MSb$`&USh+rimdMy)wKUFPoqpb~8FUn7{`gRt9FPJc&#NyI@zAKrxWA&T_2-!zuS zom<}`on3wRy<*L8N!abepORYtYV*$axfQ}Vc)pp-U{eD%Ky9g7bThg*;3n;N}BJI3|iGfsf>{-VMsf1m%}&-IH}iMKd%jzK-q zZK|gl9+qw4`c;+MC9NXixnY~xwocR?b3N)WkIoKI`xN8e7wz(P{^9_Nx4xk(G8MjB zM1E*G$G`uQ;k^xJdz`O(4>xEsQ4)E+ip@|&LzxZMzgq&;uqmdl-;?k1a;4wjo8 zmJt**UFNLWy{TDIM1;Zmy*9X6n(8lwMsUc+@Oc6MFv={W6FXyLd#w^-NSy;SyxvY8jPk$ zCO#$sdVJK|2n6rC2f!QxL|%{14&K+(AI3llNIvu;#EagqgllJTbxT7B?rJzwYZ8=r z{G+lW+%SmYPvCJwGW3(a)FGZ?&?8Ltq1PhP!~S4GYtE7)IrB3Jk_b=9v?fZd>waG4 z8~Nlmx_stVMrc&hSWrUy55_-@#ms`v|9wD~O4HGPn#ui9|D1}Y9ik%`T6 zg~WdYeVszJO-n+WG{b(=iT#b?>Pfy~NK+}+*;YUJWIc3DtkP~fH(w>FUiaJ&M7_$p zTbg!p)nb*gm@#jputXDe^jv%r!dlx8@|3WYMA>C*kYskV+@=KQR$MINN(<3VX}m>W z!RU6QThuxzM6V)^ffFJppQ0roS_Vc~eYmBiIHb?y`aITYF~qkk?YEUS!+Nus&VQby z6~Z)k)Yv6^Ej~3(a~rN2laA$7M(6+#F;9Da_itjKHkGSui##y6s&>_+&h^uKW2nj~ z^m7aPDm78IN?n)C%tR%U3T4m3VhU@M$9neIa7 zQhhUwX>YjgEJ{-U3Y zot#AFdIc(O?ZsygTK@zH14$KOyO0QiR2)7BlKZ2f`7~YQ%s+k)qK^Je-UbS2zsS8d z&O8XKYnqFmxO+%~`uwc$8?uu*c6`?CSd)vZ#X;p zu-m6ZfgIu~x18?zCua5v8NG|R08n=!imuM!&)XrwhCg3IcKbrX+b5V0Qw1YX2<4x} z>o1#7j_%qSzuN>MGO1+8>jR+F=lyYfo5?~;TJ*qHh0EuWiXCOs%wE@SW*Y4@QciL@ zbQ+Vrt7-#!My9ItKRiKP$hzMTb2QIs+yxUHY|>k4LTExN72$Ai_b8|UD9Q7$pWIJ z5ldj#HRpP{D5}}hOp{bqNFP0R+8b3XkboY)I-m3+E#08HxPxls6bEbr2Rr&mn+qjT zDo>}F_>X&Hq>03-IVe8oSna5va>W&}A{7iR+R*!xJZZuQf4AO(Yl#K_*87n7B$p3U z&)j`KR{g_$oznr4dM5BG#x^O;0DN#m;0U zFFr)%w6{K&kg0rsa_YuEka${*C7RsRd7~Olaj%|bS_d~6$)3(&3!6Mi)`NStS1)W6 z5h5qtnpjM0bdNMV6wYUqF6T41W6?!(Z z+Fxeqe+RYK(o7Oh>eFI`l?=ZA5K_Ge19@vUJEIm_OF`C@(LqTc8{hG4-iO9h*qG(c zZ<~r|LSFg|uA370ZJ82|&IX-fqGA)`td!pxlrNCh26N4^I>sO{qy=x(@qF7?=nq4D zOI(11QbiYTugjR`jru;*SNvyAry=oAxNAZa+mPG89nn(lL1+2gB}}GsD!d3WWc?>SA>G8G?N795f|UN=N2-*#B3+!aybZ zt@&d_Hf{yf;;+}cECe{`@2+f115ZA15AUPT#8bNFur_V9fgRBP9 zSE^|02gCbbDPF}>i<7Y1!tNFxz#ppm8%$)ZQXG z`^L3W-=sc99k}6Fi@bmzX~gX69ueAi>c5&cBe!~x#FoU{rhK3)AB$vnn+S);>w9XD z`>qZdoveCI?Rcos)9==ycnX++TkY{NSiD|+?~y%u=PjkyNc&Q^xnTxsMKgw?FymD z)%EG~I?gUWnSGUo);79=)pWA;^&3on(bzJu(tpmhvX-2b_%8a|x*5niS>{0lslB9{ zSNA*At`02u<9k5<%MtSd2Qkb=HZFdTlBXq_MwM<@BGl~-?r#_TodddMH2pT^T36GL zV2LOaCXO#t51Y5uFIe#S;YEs*Kd!8yoTJArV~WNj_mRq`rRS1V_xSmeWR~#4UBX04W^Acrxb%sS!A0KT76X zQ99iF#6bg1%IozJ$-6gyjWbkO^#l>$z-XNFOWAoPVIY(*xPnBo&y#*qbgeD&Q6uF#AxTL1;b+BLwG$ z5fo5DQNWJ}mxkiF3io%;@+{EiY{7e*o{$x85&b#TPR=Oc*Ua%Yj|mnl8~e^kP$Gfe zXLDyRt05`(LK*VIog&<(3)pmwphY!S1R*5;_Rz{W>h|XiESS?CSA@67MEmPMJsS4l zII4t0hR&Pk`M0t{;Pb+I<9EMmQh>4~mciS^j_Y`0VyI}b9FHUK8CQcs+}fCqe(o$+ z?p=W2J$+CFbtd;l=dLH1)j6n25@dpk(V%0xE_Klp?M|Jq8nQFNM4&zDpn1)F$1`=4 z*dKpCV2mO2Qsr0wx4e|eqJ9!wuC_~QA^28PH_bJeZ7*QkxSB2t+_g{yJ$S`YW10qY z_Wp}p^%=T%i_);>uZ;=96X&4UC$-K{_RiDB6n4+yYoM!o;eI{ylVs^OA2Y0^wg4`P zR}q;9EHp%%wQoXELeB4_EP4eR9@ZDMhEf7ubK|Wy{D|sR4Na?ie&uIg%4)IxJU~#M z_NY=rt!#PITlg-6hUb9HtxN{b;GQ>Mf6hT96cd2#|ONde2DDBaHNWuOJcfs)i6PO+<~2y)yyam^44Mri;5pS zUCHb=`p}!4FDDW3S}l1w?WEKEW~k=4{hN3^;aKvA#r)%ADat(GV`gN0Gf?pV?rsf& z;^-!xJDYC23h<`Q+Gzw$RO>Z^6Q@b4!ACnimB1HRfdw3NCXw{uGKEYV2M=BZe0 zLN@sUo*H6eb=i-o2M*(Mpa>3g1uvvR{1SZIHzg?Hbn(LP({j``iwGF*VE!(QQMD8m zj1At>ll&lx8Fk5zj@a>>)!b0mdZWV1?Sjd*PgH=PI+2A%kOO~=VPkxrw^D+&#h`Bh%CgSx#hU}pxd+Ebyz6ha;JMxg==~8#QNdvu=2?bQ~d%vNI)AM$K0Mjuw`7% z+eKkdK*?}}Epn9qMC}fAZS`-o?+0W1 z-y6|NEmG@Hkv2Lf)N;Kv5hr7=`T2~a-4wMSN4`L>^j(70Rpyz}5rM(K&xC(h0CNa9 zRGlz z8vO(|?!;+81{qNxSkfROU+ulO^co_jC=4W6#sRU52rzg3l&i z-s|Z-1ZbfM#1W$3N#P6$wA~Vy?DOr@3@+Jf53%5fDReuP)+`?b7McBYLMMP6pim|V(8e@*yBkme_ zx0FWeW?Ny;QlXseJ73U~$#Zxx;`yOPv#d~j-E>wgAlAvY+6f$<=Qg4D0*;rD?vxrFmj8}0|GS=b;Fw^T$h%-x+CIV#fAt<( z#mXy|9y(^bnvfFgFa6gP?5d?ga1l=~{9|bs;HL;&GKMwsPQ9o7M*~6~+6u>2m1hgQ zV1sPFLh?hPKnG${VBy&t`QGQn`coXw3GR^+z*V4%Er#`t*B5gFjGnDtC&HjYg4J}FJUPZs9=D1kWfXWoMkXu$~Pa8(*`TBaz(^ucZi0K0h{ zm5_l@93BRc5w@aGe{L zcQhyTSUrHJ@Hle~w6tb9lyL@L#e4{tqc))95>?t##ejxZ%N<39nxdZVm7tRL&Gpa{ zC`I4t{`-y)7r^QBz}|{LM3ABB)O8W%g|*j8AH4FEIR-QeC3M@tzj-AW{s+Q8D%2kU zPtP60D3g#L#&s6%m^w{^`WEK9dZPpVAjyv7s_HJ&DK5u|K6F+zQs$t2-h3CqF`R%+ zKnxC;Bz4#8l!WAbW*;Qs0Z<9Ou7NvAE`aFcAtc4fj9Cuq;%e@3~p0&YMl)@s2s4AB1VFvtb9)wWq-bZ_ruLr0_OhGtM zdomvk;6bud(8L|XjUq~lx*GYr6*G!<)A2vaY{pn$)VXL?m)f_u;_|@L-lD+iE z$2Ze7uX@3h*Unw9ZyUQ7Q>WntLQf7gMn!fFf0<^(G#}?>fx10uTe2m;V)2PJ1Jf@& z)Z13|yj$;uaF@1KsGrSLl0^%ulT8kWn~Pt8ZiTm50(VLoELfK8{X z9QWy;{+@v#l+&gRD6G!}Gl=+^wABYHx5>G0n*r~(iT`jI+g>jO@KJU8Z zet`H)If&fEead}{7AmGYP^1Xk2%BDr_8$Gv_7d&*jTQW; z0x`XP4Z2u1aWtGe?Dz~-;m?LPVX>a(nnUI{JJQodBm9qi77Kb7u;eLThCRjl3S8zw zXGr!~F4|mMlX4F@s~BMIj50kCTmkZBBr5pObRS5@jqFZt){Np}%SurWpbWHZb$43* zQWs(lN-)(r9-&;-n{gve+M9-ag$_R+84r2kJk3Y;@W#VHZ|i%JBEUO=iN!o9tj>~F zLj}_k`pDe&U9U5A`eLKYx-PF_McK>(!x)IG>awGjE$%VQS4c$W)^x8-s;CqZ)=7Q0 zY_LWga`6;L2SYGDuTbvo))xM=ptH3~1zytd-;T#1iKdkBwM7BbUI|Dl=wo-G9Xo_W z+39)Q6#6k{zqRMCRx1b!pruv|EJpx-+CRUhgNN@}d7h5i+HEsM(_n^?tHOXlcL@zF z9ZX_=!2NGWm%ihWY;1zJefPjexNX6Pp@KQT&y7maFPVXs>L#uFl^h`NpQ~NXEY$`o zr26h;0n~YBmxwJT$8h$n)S2l~tBV09p{=lc5 zRsT>*HT*&c{^d>z<9liI{<6@{4?0%Tqq#q=e;Y~zv4XZls~2FGl3xmN-!1on9;Ijx zk%OxAD1T)Z{cKxJ5VJ*oJ)~RIh;9ViExZfNkThU^Ja|!4bIZSxSK+>S&=9TM4zyQYXB8a}?M3LwEu9z6tAwKA)5WIHeXQiU8^b$zu5eT-2_cUaYwnH@3t- z`2mG|o^=uPolWdDClPagZioI${dT8){YS=Q)|AP^)7_s*gl&voou{7W1vA7}Q5Uf? z0suoUJzfHExGYOW_);}W64xWIx36C=zss6uEZzGTvG2gw6$jVJA4^ZlWhYKxHP_zt zaQ7WNGN$swA3x_w$;MUtYW;1wO4wWHmPH*CzYtRVC6v@SYN^PQG#!Iqiuh=le6n#Q zdx--xkbe#=OwrpJzHzpu6~zJWPXGU`tX>-)y4DwnCrqbIEJ462>NGOlxU1*BrC#5s zFzBSV)udKMF!PzN;Fs^<__s+UUnF}%GdfDT%Hlrj3Jh2^J1JB%T(YXOGJoTK?_79k z)>s1#Zu)MYM&-C7*=F^$)m2nO{t2PSdDS#WZK@Hob#Wa&b-mQ(`mFW@lPb<*;ioQF z`<;5DN79^_-X5lsE>BvyUmwA-Jde2SEQc*u2OXlXpKXd%F3UMkNgTl*vx|?F3K=X< z^YPhX>xmw>l@d^v0x~YbLS9O(nZfXQ z`|+cmtGY9repcngX$PBBJtyGHBbi+5Xi1AwUkR{w1j#C1_n)!qiJcDDOoK<0JArG6I zRyEAR_bV1|8XZ8S9T(kQA3P>$Nbr=-cP0-*^#-K}I1Gt38*=Jk8AZWt3hr{G*!p0M z?#jn~ZH=PvWvXpiGBC*w!AZoO-gsU}IoMt9g?iMNuK-8njnpp&Y_=mU_F)+B8jAY8 zU^19!)Ul}@p7t^xXk3jNw%UO$+*f4`I5eNd;T1-J8&bLSzbt+PdhdRS=-0%LTdKA> z=K{M5hfN$>G&&D0d;4y(Sd>^Jo857mI4>1;#TP@IuMHi+^5Q;7RlB6_ELby`jnc%0 z5sltciF?mTeN|HYCQ}}T_RR3}-ISVD7~T;`oSiaXzu6ofg{=*$1!xFA&eS2=+D2Pa4hDnK*s>fkV5~=OX2I2h7=_|6~FNqGtDgo1xiQ-jP1x#{rFoE zaG4LjnwRiBYTGgtr}agbx_cc-^Ph7N?i{5Mqx3lpeHYBlms6U9f>pt zh4Q}JYd7|{CIY#;97Rv7H@CcvRaCB1`x70bo&V+gTV|ZYB!uCF<^6_dm$ba3;S-w> zQzRe%$A|Py{i&x~Y7_-tkJ~x5kdv$NwQsJw6yPbBOd3thX!tr(Ha06*Rt;Ae-P}^Z z)mTdKJ4y&4mx_&v_j~vYV8iV>6IGCY_*E#ZKNAfShF*vBM-ZGyp`3i3`x&ZtZy~<@ z9w5x&V*BVBDOpklvov+ltB~RV%sO;>!absYI{$-bI1>p|yPj}ED9o0^%m{yF=g%yR zfwd(@qm0j2QX0-BE=&40i@J9QO~$gwgfMWy^1@^Fl_ScdJEo+%S>|g<_z+(OCqnqi;E~g2%fsC(7CL!|HWWjXxEDRQwEKQ_ zJmwcHH_S`dTaU-fZO@y-8e_Sq;N-3#m@y?Cqf6S600u?|jz&tKsZOBP1J^+w#+0t; zs9_m)Yy>Dlv)f7^*ZYS41@7DT!(0TWkFwt&sES$d=r7HSS&A$ZVJ`gxpLzIMC@9lC zyOUwr@yez&ZMvHNCarl`)*eof%vX4u3HCpadL?|(r&sC5+47rNFG-{iRut#NLdiX5 zf~jtTsV7$ExvAr!)`GX3(q1;h7r!8Ybx*6UevhR$;WHB^R-Y9iL(e2O^9!uO<{G4o z-pYc14}Y88yQ*HWOYB+e9u7M5fj4TZG++2dOGEhwHW@y+_>&emP0jBS>9sAgWTQjN zwzOCZ`n{+y3A8Lq3KEgbyYg{g6>9!^9-OI;E!giX@-%ywQ z9hQKP%v<6$Qz3NX=GsQ7L6DR9y%c+AdI`HK{Xg+oTT$F$#Brlo&t^SEFtxQC;~QW}D@{*N&)IXcwsO-KbzE0=9k}kV;aI|fSt|!Yj*PV8=L--T?jPP&(Kv|6 zi+=yM+c95o*g=ki`_|{W;5*MIsHXT+!OEfPl3uG(P$PZtK&5)8ujHlCTQf#0b+pfN zu++qQXt&A8C`e|>gnhXrB7w?33sRbx5;_(NR4R}qc*BepMY7L74Ye<{dduN2ZY` z;pJ>mtqN)U@FPTv$aB)R(?bvKt;Lv1&>TFfDvbQVYPXXh47>fw4WjzE-1ar$MMEGE zh4s!KJgVhLvP-$ss?PSHgF3Ou@~1lqq*Y+^n=6ohSB)nBHPO1~H7%_&>?J?#CrOY2 zj&{scWuT7G^}&e0w&qU#&HrXtVyMm6J2gKqc8~*!!WYAAkbJ;KmrA2*R2+I){S7Dh zz5K?96jdu;U->BoyMZ)_CoVPF0RWdf(B*It!C8#b*I(WJT%2P;UKmTblwuUwNmxhw zX(e*{`D-*;_?%0YXRh^4Q*xOKsUlfG;j3%2c^k7VYF_Ms?V^l03H8jSi>6RWOB<6lwn>f`FrS|YfO z)NuR4gS1(Hk>B1UboIXqk} z)BDq-eiD4Pcl0X4I9oSinhjGzCoA;zyaI)Fvav(Oj5-N*wyBLYA$BrwdYIXnEkDQy z3v{t?!&blbB7K4DrmZ9pPXmQ4eldZ=!+5`{R;Q*?E8+_mx|FA=?X!!lhq25|)!_;| zWDR4L+rN4F9<*}3gTbXD)8{sn@31IPpu_c~a}^c&Qr4LDQPM^^V<{G%kzEu&%@!Te zd+BDn+m`hr5+re6ePsiEH~Xb!F+&JS(K~Dm3Y4W}c8lj2+-wYsH>tCEk_&h~zY-YT zY{k_~)g|m%@aWDaMN^PT!{xQQ>h>#aeSW!6K{=8bgn0R-@~!V|9eAZ&6H6e;a}(tM zKP6ik4wqkHNS9c*8dg7JW||52U7Mx$tkSJtamZC#xs&$0|D;e$wQw$nS&1sIro&@@ z03D+MdY(=A)K4YCi2l1%@vb?ru0o8B4B4{Lf_JvH(Sbl%@dIG-?PH9I|B4oFxQs|> zv?VXjL=Hq-oE&o%b$5=c)JjiN=wZd+nKEQ5!Im2tbJ>-;a7?!<7Q-U6gtC*JF9N-q zFhnU7R6UwvV)4W}!=GWNxE-eFnYI`Z-p4HHfW|QHwBJj7fK2?2;+a`VHsD%Pa_7ac zT>>SymC^5|HoGjArfj!ePmT%#yPag1%&%zAV7uUe&)oT=Zv(uf?tBM8mU6~8y5e5G z3eIbf2Uj0heZ`6SrHBHJWqY{ zx(~BE#K~7Rx!K;t3V_@h4zw^df~9yQK(M}bj{<2eEqb1JxJk02SKdM{^V5NrAE|lN zbkQw4k5PcSlFr$rq431wiBJKKVXN;?%Dj`ijdivejdp5q#sN0hw)uh2c5=RB*d^-9 zPyXN4{R$%p>w|JMB)D$9bXTFYms2Ey9f9-5I$<8cLE_bcCOKKY@L>XQpvmmP(T8dBO2OM9Y z7E>;`2&DBBuHU9+JlP0PEf>eJMA1hsHQP-5MRA0D??Ldx+;N^Zy<2pSBm^_>i5$vA z*-B%r{=3_p&&rf*!}VkKa||Qzo?5{EIYcAB;s<%R1v$OdoqcC1 zZCK|M<1)Z8On6}!2NE7XutUsi!h`5(y)~|_ow&@QKDd=zeEcJH6j2s-HdsUc zTb8Z&yDqpM{2Km>mT|4cJe^q+N13R2@i4;6mFSq$r^1ysUZjq~t_s!3P>mK`)3L+i zmp2l_@Q(CGUz#_xy`=;>ACcM0Rr}80oE*VB*^%GqLS&&u`6HB&U^tSOO%N!CB5ikR7p*rEqSY>GRMcDx=>#ni2P#bg4~s}q+>^vj4yD*ud+4cI29=+7MxL>KCYQhoo)U;@HFs}cZ{(p%FQ%IQF2DeD3}37V+-(+aTgVn z!ZU?EpBKt=pHa!GlK8;mws5!=;Hi8q$dcWxNudeoHshz!9TuJ37x zgv*@Cn)>O@kArH$#>Y|d^HGJ9r&~ZS2*LQOJ_Dp}t2U5~Q(H){=+#~+X>4aGh?z%`I=p-IC957x?l)7*hELux>1cE!A`8S_V6N4U55x(v>9 zLYT=ZKyn(K=sGR>XYw0^}UUEO?pl6G@v}Skn4*sVRNc~E+=_S^0m+8%a6Ic}j{$Wo(swnj8|-)GE~o|mw>wgp zj|iczKV_ou5>wo?;nbd$x7wyWhMP9)P(?6dblXq=@r*&ZP!%3u#pK+;rhBpHWREI$0 z1ya!5ILh6vH2QD%3r-?E4@--4n`hx%RjmjTRHDyc_rwcY0!MU9Se&Z znH79J{BT>x_>dncwA34~9zpmkpkn7@Ld;hfNWRbKLvPD}PL+3W+*T%ZsE^_SL+k%t zw+3)9Ts73ivT=$Q!?6lpe|*aE-%7soN>c?v+~tf7-6r|Fcf9@x?*er_&Z|zHXLE|I zwW&XV$?$x{lo~LCavx`JE$IdCaUKb8KE*3I1Eb_XCu=cevd_VT(r$4>fB#8G((zK} z)V^dP1VV~>4h^5)&Sg!)UI4Dj+x={3Uu4$-^$+Q^Q-Bz0Y!}1+L!Py!LGmA8fBrv8 zKM+ffA5HJFGlDxr9vGTXN9RDA;=_Ds(;lTXGd1ZNsDexNMSP2>3CHydomt;Qy`ASt z0H&0KuR*Z@rVT?}OFX*@=D6b5L>tdKPE|?SZC}7c`ADAkTmdPIdfz1Lss^LIroasr zOIOq!%q!~cTJ54KzIkndvqaJ#ZScm(J1BSH0`~AGevlTjCB9i;^3`B(*wtVwiv+85{b2%Bals=XcYExf%ES;JF1DK}HFq^anNQ)fo8-P7L1-W- zZf6!I$*@PDXBN^7{8aYTDe{tiGj=53gRJe0`K-#Z*4; z-vKharQ^q&>hhkzz|k2jXPRyr=yMS>*pXKAnG9gs>%4C`e_NFW{Z7q@ex@P9Gws{4 z=l`52&YQAjPW;x4&wgr4KwQkK-r)niSnTH0ed~u9IaGbd$ z&~CqrROWrnafVV*?o0q(1I=D`iwTOA8nAp7i5KLzFF?RQ@i&Qe*q0&G|Ga*wN#bAE zd98L0DpWUdM(cEbC(kDFeWRlTW)Kd4E)!Mw74%fpk$|xw)^-zvA;A*RO>ukLH0@i9 zJiD?&-c?RY?gY$*|B32+XMox*Ozu7-Ayyyc2XvL;QkQ8&Hf;HyyXCtvL0oo*7maGa z{N=Zuq3Sf7eF(!6Tj8);=sxc_IsTP)bEFIh$=da_`oy$FG(#9%RWj`@E=%t5_vU!? z)H`_fk%p+k{t6`;~2(pcAi#KoQLXk}XO( zCV9Smc+?L2uFiPl)vY=_9T2RRJ+XRt5IBfykZv$U_H>i6XS%}~>KUO~JdvM1^933i zx=m)4h2(Cenmv)fRAOt``P#wIa-}Oee6Bm%n-`>~y>1!Zb+x0jUSIab7JZf`BnttU z<@IR>?v0AF<&X=yTEYe2dW3TNA>L{G0sxk=2?tZurkNvNR7=ES+E6x=GU=k5t8jit#f-4gBhFEj$ zCUR0>87LFt1E2^?x;KJOR;M<}_-uG3!!v^^d@RKCH3*Nf0%|$$#om^Qy$K%v(){xq z>=c|E(vRRXm~vQ2W`bDl-$Cau2@urHR1eG1L^ZqRWsLb8x5*?1TLbWMb-gN<=%qM` z05t4Y|Lk37e@1T+mc3iY{c%t!GKDW!Tw0pN%HNpEN!cR+^Pq|aJqY15Q8fvAe=Kiz z30nAAiavkpHkBr8%kW}scLxYd8YGZiTW5-vfic!nejUSHr!o+D8>=y1x*3JQBVZfd zp=3{2kGcD5-|z@X#H!NfljQDa`kmkUOh$9E;xZ;RenQkRtpASk=4=w#WfrY~^A48I zI1c2hA3xQG*tnQ}ETl1v-NTR3nAvF=`bT66f2@ZPi_P_!-+`A%e$NV`thUE~q(-J|DaM7&;>O zgzw0+&Ou}etrI}p1ojBA3n8PxnX-aJ^dp)OMYsU`b{pUU|2UEL#0~*Z3T%Edy$4c1 zf7aD;0HHeniMYRpi?jPijHMaogXr-z5u`Wf={{`rm z`z@Trt1p32KpX#l07zn~wYe`bu4}*BaG!s!aW$d4{4O7SN3xkth<3vIPmpn6cNr;N z)HU58J^u4~5F_`#(xg&C!^}}u)fuICvHaPCkqOL%r_8lb4)7YvpM6uY5v&13ZB6~Y zTi$q^6;DW?GxBErY_Xi0X1%2{B`^_KPO#Ng#iv4)h9m9Ykge)~6USjm$FT-CE$*}* znJ2eQR(`LZI2H$=6_x+%w zp^TD)d_Qi-o;?CPA9kmX{VLXZiv*B8S8M{EX=t3l@AvElSkT2~qXXj4kG;U_dh~Lw zWQn~L_OO}Hz=GK=(S^*Y?vzN;N8DPl`q=cQ>dKJ82V50JxBIo!A^-qI+RDPSlDD%P z!8}&D>Y=~=Od6Dn{+o8E%1-zRnQ|#V9K2y+B?Q=SvB9<8pW)N62&2QCwgoJ#C~@uj=*p9*wG z+kIV6iT`yRMVtUp_J?P~Fq{C(l1c%Wo5)c~$|sL5K)Dei&R9+W-0EB#eiz+e z<1iFNSUCY7U--u$Krnpv(+1_ts|rt^v@zLij8s+E9e>2bu3>L88A!Pp(~u+IbLffk zS0iDku=hOqfeP5Ha`B4!Cg6FycB>2}r9jWh+b~L~xCW+< zGCHQo`v9OsTcy6|+Ma~HX(QJD^wD-f z6PkTbADYX`H>I6qq_AH-q~)+-&6@rKUz>mizW+M9{P54yG5WdJB{cH43YEc$Ss1fTPNDce`!H& z-1$gBe`t~Wv!b&!HK#YVQ#XFBh>mYsR6HWnU+KQcLM5o}`{oyBvq&EOfbc-`SaG>w z%5`}F1!#eNgygj$yt!R}k77Si{B`g@Op{pM#B+hdf03xmEBWB4U%S%hg{{@KKeE_K z$)7r!6o_$^^jf1Z$?YjlYMSDW)nE)t1oyuUqU-7ZRQx;Y00iD1JRqd$Zy8v>JP#^! z++hlzHb{HVKL_LA@m0>Jm6qoP3UcCnOU9w9!W-i`ArTxQxBrK+I(QtVO;G;lej{wS z0Bjj1TvG?%Ztv=YME_M6*;%xIPx)3uYm0Cqsi!=(`zc_+BSv03l{ci%00So-g8^30 z0@V=@EA?Aiiz*@1KjX27NVh6b?B6mM|07~9^m>2<&pVYbKrwh)hW9fUOxqNKwId2Q z0Ye5)vbY1DF4esD{7=*!{Ayj#q^dyPGQ492(mX0f1NFa4!B_g$*^4^SQTT$}z`LJh zxN!Y0|G|Q~wH>MahkBzC`p0bN6wh((>1R?`Im#i2(8taM-)xsiVr^jspUm$|oA^3L z)RX+5>uxK&|G1A&9x>QGg0yy{%JyNM8G!t3I1xxCl#RoW?B zJz^CC+m^?b_;1x%SC!SjFH0Vh5QohhBZcY;vY^W)Z2kmD&1q*aeGID2oLS;wBr~YG zOt|h8nV$Jok8X^-K?#yiOSoFX@4l84JxaB{gKk}TJ&u%~44S_7qyI2Ej4sol6w*Kq z%nVZbRpd)q^6}^po|*s$usu@2QZP&m>?79x{+ncD()lrD9G|xcm^(Bx;vS#i*c~+G zLr&=ecbV8=Xu#{4i)VHp<&Mal_+IyxrT;$*pjASF8t4f4$n6+FM`6_Vt(_eRaO|YQ zB$c2*2eVj=zQNoHVt?Apa;W<*ti98L&PJFz8yQXyhmLTr!-O7Z2Ydpxu+nUX_|j7d zwfXJ+607^pcP%;{joGZebW)qCC-u43#zn{}MbGS;bYJv`xm!k9hUzRITjiWDaBsg7 z`*D1uJ`f30ZlE)Gx0NSiWD<#~PX{?!J(*{3-8kka1iYAXBo%QL)^o8qRs!qp7QTOf zG0w5^PA01gLz3l8jsDG-UsQQ2Z<9&B``|MC(7Gh9r ze*Q5MeW|>;;onI_fnCb5#-sOkF2;_Xy{>3eC)OROfJ_)M0BdvmVfRPNA79dv2xKe; zLSyfci7@9QbN*IOhVmW~?m(nn4b)LTZuyyDPe@D$3cIEbVQ3J{XN|&;W6R$fq>m~= zL|8#Cs&2fSrvHo~O3(gSog91TylXy?bks6$i7);SK;T!yd z1$3$@Uj$bADdaE66Ctj6f#LThAArQOZJ&Og6wAC-|hDgKMjXX!?d5M(=>t9Lw; zRF@N)26E^63=`Xh9^44cROHPM47({0ANytMun4bVqytkqqvqES+Q}47^WS7*m-`8a&zoZvOOw(8y+rRDAB5}uY5PNeS_C?cVzR9?1OIXCP0VHeFR zltDPQ*)>PL6OZ+{e^Mg${+(tW(hLm@=6a!b^{XCY{g5y|_d|6`zh~Kir6TwLW9zM= zqJ01FZ$P?1I;9(w?v#-3Zs~528oEPTy1S)2loaWZ?(QCHcy2!5-~a4cbHK^0S;O3Q zUGKg3Yo}?m{hU>mbJwa)#z(r5fr-uQsg&v}U&hTIVR`dzlt!drw706TLtj)o`u)j7 z^I!RztxuTZL9Pg6$o6kE>Y6AA?c(fSv>86pm0`M%0)fnKER%(DLnh!Z1Lmgk__ptL zO^IWixe^gd$Nc4qr_|A|U5DuP_VQ&;+> znm&!J`7o$zR#uZP53^}qAGxuiWqloQoM9CxeYI9=gZ`^~LSIdyj9wO*0T z3Mpm5U!}*fqr$|B zZEi|~5}})sm7$zWjsOXMa1B2rHM6N z-H^|R&vQ1+d(485L%2Qiv0X~y_s)r+TgyKkGdHA*a1s_jlsV>qoV~8aL4GXSgFecS z5YPAAZ6O8;fm+0(`1(RCd8f$=46e1TCG%oMme3V~HDg5@86Oq~^Z6YVziP#E@3g;0 z6gfOM2dQ6S3{ZhPG`Jv;*53?;Y~X13|Asn1j#kzfuu?WFfdO?k4KZ7xzmxG}lUsR%kA%g7oI2a_2qNm4#+ z@Azhm{X(j%67Q_q_{C?rsi+QZWMu)-;4ol3{QFEbGyiUP?s~$PucG2f@^nl@v~{Tr zmKPl~F6e(xKlnY+i{xwb`h~&oIs(y#uaK!0Z%D;bP7m@gp{mkt49Vfb^0c1EKMIxP zui*(I^TcNJXAoQ7nECr$m3wH=j>>-?<^)k`Y|%yV9#4-BQ$Nc)$v}4sdbH2rhnI7Z zBhvs+_hqcEkfK+fd-Bn>PA4#A3b&molk4Px5#BGvZMr-Mz4~W4mJ76bbwe>G5_d%-a;K!b@{{654)Q;#)|#!M zkPw3EjvJMBhQi5guYI?X^nPdas}b$@nuiNTFRj{oCqE9t?rE+xgF_AZ(praou8yqTFzv(6t|3)XXOToftw%qDtKy%yT>=vJZAa$z+ZD)i`?|N9dEdHe|-^LE>VD= z9cPqXN4;X6+#UtubT=uz!r%Q4;!=FC`m3Uv?db=|eVIV>|E`g)L@45@yOb^HZIVxw zTG{j|K;l&bhMo*~{@O2Kn`aEGQ%T3ol9@;2!4+k@jSe)j2Kv8mBp}I)#QjyLjh|&Q z1r<*8`FI!@DLYL7m2|?Tv*wb}z3cYM>OrZIWnp6VSG?Go__EZ&PF=3q4-$=}pp*oO zoQFN8%kfitolmc2grx6RZMbu5ripB09<)gFglKM`6~s=R+M=e%)+=eNL{-9HzdT5< z(xXB!F(5HLTWmE&E8RIDW#tj-=W_Fr0|ROBOkb3&<1U6@yb+zURNoCh+4X6F2-;(S zh!E@lqI4;uj+C=EC14r zcQPQ($dr-dY)#->!NTPJjMe(fQ_!mv6%UkwJMc-z3gTn!86GKsmnWeKHYP zi3jcU&o{pDH?~|6P?kED>82R@ml>GFftI>#0h4X^-i|Ij=k1q(34d2@rh;o|GYK&$ zyF|MUCmLwAU5!O9b%$Np)(RNWhJ`?&Ex}4cH|SOzXe(DU(DcT9i2dktnd{169AF_n{0!Y4R8N{y8i1Nrv!+q9aQ`uZ4 z`e?ZdpSM8s6SnTVKrL0T+|Vw+c<{jX2d#zD@0t$r1yIeQMi+Ur0H;ewQ{9*F8TyX3 zWFKa*AXKA(6vX_YLPAJ#}{z zgLG&7FdqB=i;)34D%QZxIuiUUKva_UDq?se~Q11{wyn5#GN?8yDijFKm8GvHL8 z*(|-L^w4ibRO?C%YPSQ18rHLADkax|aK*Xm@`okzVm2g=_ARFnxo@Eeh)bgUfFk1e zrKsh-A}@vg&kO2utXT7boKPH6Sz8!BmH`qfdLC(L$RvczH4_r=n`TRU^VO1h)5|C8?+8swp`52ok zEznPr5*$$2s#fG872fHaJhb*BCW7*RYhWFu3!!)Vm?KdOe)p^Ue(X5_Z_?h{@(L~2-N!J=TIE7J<6u-&>FiJkZ($lLpz`qg=lX1s zM2DgFtniT$=V5yrD+rP*6Cf2NbKw?-@hWpJXv0H;J}S#t)#~{C$y`?x_9_G}0)q=o zkd$fTR*`{+A<^F@t6&Lsaz#kO1N=s{v+{ceaQce zU@@>x?{&-)B$u#;(z3rdDG5ImQQd!R(ED%o;)Aq7d!n=M|8nA;(nF{H8SzP8r`fF0 zkx+}Cu4uu~=P$8S4sfP-cLVvTYWcTlTS+}Y0w@8z-b_NkPort)beRFXp{wLKT06iW z0UG?mh#d}efkB`r|3O|bzkT4G8BlssKd@WVaV86B4P7i0Mv$wt8Q}W(iZTq8rr9zf z`_TidV?fy$c)+s7f1IJwvEht!)Mj6t0*0YS%6oTDfV`n^H;oV*P`3n0_g2OKvXpty7q zOUCkT@>nZkp&-p0J}W<$hnXIkZ_neGeudQWJ+=?~m+#Mh%yL6oUz6gz);iK{~HswD9ki2djRgo1%|v_<`4R1)N?Tc-@HRZE6L{){ghc!CLd<2dVveg^byK-+4^FgMMhtSTJOp$E3=1Dh9^-R z>2erV=<+*21TTDfb(m%+8uwc}yi6giWT~mXgQT8DjE=vTid3f3M-W=XC6$)hEG=m8 zyw8&fbi@^xD?sowOlBGUWci@S%E$rgI&5SEYYClQ{{m3HmLgid%7ER49{a~n>|TQ+ z$N=S1ma-_+hg%2IwyMjB%8|DV++(rTrV8N5Hwh-g>Wf~O4^^}Q0%VKryRwHdOJR-? zrt@UTn+DQRU>U^Ry=okc_Qv}{+xW3-9>z=eI4g4w1R*8{l1K>m-!SDJGxDlsF-EVC z3z~}jj7;BV(qIvrP$DKMJQokJ;Q}Yf=bv(@=WQ=YAAn$O4iH_-d6P&M-J{Xmodc^t za~*thOkCQ!Zzy6Sft9`^wU_qyIR1B3CPDdyv)cGP?AY^|$xJvHD;#g~T3kk2=b8Rx8hr+?$bpz_Ppx{fg%chdGEx&ZhFrX9sUTAhx_iI9?BDx`jgLTNK zb|25xZAMlfie_!&HSp?tD4?KxEEPgV`%jr`S@W=cY{7GR*%!G7o6Nq4fPgNg!%o+6 zuGq)Rj>1tc&^2;sp#nsQP>m*~1K&Hk-rPEZ5{{qOXuEVG?gV#{e*$m~t9P}>$z6ws z?bdd3L-@72D-X*+J?$l+|510$(w*|lRLK7cVSbt|Lh!`G69RrgZLEU&LgI@ zsl!46jAZNvG|VZ2blS{%x$Md?Wt(z}?FR+XsHSiA)bCh**S;_(o7cY`uwzREO~jg4 zpp2lEM0b%g%4hVA82?sT?FMKZMF8n`9ci+8J5HdT)p0hIoAZjPBX5@Rc5CP7A68rG z9()D4Ics?s-)3!;KF@GVd^`-pR!QeogRSdUpfur8pc%->EXN`rlOl5rB`Hpa)KtZ3 z8MvpgVAwgx5b-)0w*&Dg(xWun71{^=f`9Aus9`S3xd4Tfvv#}S8St1S8FbQ1WA(vB zpMO*Ac{z2gmcLwW2X95&x-$$t2ByH%Ai|ZrMd$*qmTNVP!jJ{@7@?#sMxYZ3wO!&U zp=8{rp`)S8EhzHkU!g96yED&Uf%RwWm)m)z^FR1r;ioNfC?om=>Z@a;11^7Rzmowy z3sE9Bjl|yu$lC)9VD{t4MLyLNZoxk0U9=&PyE!dMa47ipjrUvxH7=8he3x6`a~u6F z9ZQrhB_cFMNJ9%Mn!r>+{Xh#O`Ve(37exT@Cp!>D)Fs3K! zk=efCFESP-3Tha%43np>&wm;NF##I86*;6>Tz;+^-wNx0<+@+=O+`F3H{`6 znu!S2TW>;jsBhdC-!(E|V$gUwNc@yB!Y?dcg<|rFFSqS6fS4}tu|^a3LZ z>Q<8ouVwpefV$5j(|&%u3%Qbdo>X`OSYFM_<)vc?A8Pf1^@+rz&dR zE1L|nOPmEiJ7&ue(8AVpW4fenvXpTfEMzr#jsw$DNN^Z%2ohUh z)t{7Y4!mboTiMd~FL|y?XW6C>&s{OvU%lIfX&l*E6NXWA_m*G&z zbV%qrSQTZ8WUmz%v`Yw%vC)Q40cI2`Z%9BwY~!F(|JQibqN_1mE$Yb8hV$+xo9R6( zCED2qnrZX|S0YOV6>X@`;5LrR<(c2@$$$vb0*#3s)+j6l-UvRO58e=;UfpvX!y^Y= zOzo6mciK+rC zjF`mhaPhph73aWyIsl*CK#F*JU-+I+@5QO*DB>Jw(mo#6@O3y&@jBQzf^+y>{-J(@ z#Fp}FrDso3%!I(H940}LifSrU?#o^Y?JVNneBcjR;e)@uslnrw1$MZ7yJbyFJQ8KI zy)C)(G%=(CiuHw#x9$NOR>%|Q`IlPiKBVrT7i{!FGCzuSNVSHMO6^73Qw zTr=Nq>>*QR(S>z?NYN_(+wxCeWd0AVHzvS1Zp5Hy9?&%efI^_)nM{9{dw$_n4g7B zhuj$DHc`_1|8$rpAK91wWn4{d3a$EE+NXcpTQ1o-q@)jRh0n&-G@FcJ$M_R81-A)K ze-_T$Udj39Z2vn6t;kwkufMphE6 z%)4-+JPTWC=II=IILf07poHahCVp3Bdh4g)N#nJmwG>qQD^!!FNsI|v2wI%pjQo`| z$)~O8!$*)o8boM@B}$^JxSPb=s$2qk$9mD`lrZ+ion_uzj>ABk_n+49=U6Mf82~2K z8<4)}Z3tbg2heiA?*sipN+OddL>T32Zhn){;YA{^hxD26jr@0aIi35xuJh+IRh0*0 zeb)W_cM@Jp3L#!*OP?`<<~;SdR=s8v4o%ZZ76^rXu_0w@GAz{6uZ8EqBDdv@5r#@O zRQVIN-BR8X4cI*TphEeK`M&+K3 z&3{MwSouMBE7!NoVmOEBTAg1v;n&_2Q3lt-b0>Ikb;EEndMwu`bR6>v6|R86v9-*b zRjc$=b>MG}6cjG_{swX%0y>kJWIcZyAI!AFUMCsoE^|JLOMA0gI9h>rET8d~@ao)|WI#V+!E%IB-DM|dMCd!3Mkt^by z_p&%9QD3wLAV4qFk~_z?29K}4}ccxmiY|zD^ z;ScQKJZyJ^d1QgAUn1O$6*e5Rm9C>6{I)7!4&OKtF6sh*43iIk4l^F0n79DjijF^8 z#ERHa(L2VcQQLh-@B@P>^r?WqWMm0JM|PHeCEmAymAjbu^L^&}3VStc?q{pB+d}eE`<7+e zSRx_*zkCZ>WGI|JpAr=PB$XU{wmtlu!-`Lv7w3KPUvCl*yPwWIGvmNiiQh}t5~A4; z7J$2K^58J%xR(mnypE^69|OUjM^5<{XLWl zyLU0rvIky@(bBw+cOaf4MhXlq%Dn#t29>ENKS0k*L1{vO!iYK9Q~-YP&IPZ1UXK}S zB^UwYPJC5&mn^FeiFi{)$0_H14Na?{i4Fm~naudv1wcKZQ%sE4!$IOnwoAzGdGJky zcPG?o>(zk2$DRM_yG^`)A>|65@=PV7LfQnRaXKxSs;*Wb(b>@iTi|u^CNXK2bR53} z!^g*tW3R6QTTK2$s2-ocykgL(v0P}(etVAr=EC;Di6oXnN>sNe$4h% zuI%n@X{GYK8^hQoG+d>zsM}m@N^n!RzJKKM%e>(?#HWh98E{KJk}I2N{VoR zfUOMveGyQVUe&%8X78XwA$_lEmbX0fv_8CU--e&wvrE1`A;bdcbZ;+!g2D5z>ze>e z)p#B}xvMB@6jHPmAiSb(X%_}Y+c>^wRa$?XZz1UmcV$MPC#$*?O&^JbYr6e_TKUb> zfgNy;pOa4GYn6A(2+kajd~%$PsMLK4x*cCil@ZVWvX_)*)I)ZZw9qkKNEsNf3A5qs z(eXt=_Ze$Le?yu6XV_Qh^1+|}?*SN0{wSxhRJt-t^&2Iyg-VQERAtkoi&cR{3*)B83M{^&J?NUUkV=BIw$*aN_&_}?(G^TDmo7|Vnp5KId?*` zOkh9_v!BL42Y@UGu`=irRj80FO}+QpV=}L6&Uy|bh}|~M8>AriAsVIsO*Breeafo;$ir1;@$R6drvyObGC`_yK!6 zN9iaohS=vNjY&8^%UA?twES<^!c@0z#}n4^MnXs;ek)Fk3PkwZEY1J=&1W6$w&A_0 z5;0ApKsSXyzK=faY6HtgW$KGBsR1nHc{8X4e9eF3Dvuvo5Y%C)ovo46NX;!VN04xD zdWI+S4=Jh>v<*?Vv+^|;AN$-nk(7zPcep@T*oZuWzecVK&~4VbfRmiimOL`gi>Pq4 zyw}98i`{!#YaVCWvTOZ>SlezA8`p9nRqc;zLhXb6VH#RGG}*<tmXN?D@(Ix2?AI9(O4s+8WYR6)G@>IDbDQE5bYd4`>I|VdEK=D=9`4YLNjsZQ@v3TZuyVY z{GJ9~@2rM>d+xi=`Jz-_OpWnY_db`cKmH~|t_HPKm@N zTcnTwCfuN5*IiBZ7`<}(Z+YD=AZjQZZm7_I3N~@E1-@|WcOI(r zo3;p&o5RaO(721Y(jZvyg>$!Q9yMkEBt$W=4Y(LR5PvJHF+x7*+PJvos_LqBEO%E+ zZUPK;6@CpY8Tt&*Zm~m5p(?z&K|W=4<7n zn%~m~2)+nz-^@%O>duavJ`U`5jGQc#WZK!Zwdqv^#|UN8Ks2H0KRWIIJCdOX3GLNd z7pG|5#Sla(CRspz`|zrWMcu5TPEe3s-MmzB$Yk^qWtACqi{KDDAdg^sU#|LDzf6Cw zr5F~NOncH)QgY6MO-2bj+xEmAvsXb!tzYn;DgI9Y@7<9JSU(xZi66;d018UFil0pI zQ9N-6%DUNT5|z6+h1>Q$D{S%gg=8^QN=%#Z(WfMDS5`fLn_5qUm2+2|lXC#rdNHis zl;2*Pr5>FO^oP0LuLGgBy|ld?D?VHO8>x>Fe)EG2L8bxm-l?UK_**pis8~4vA>jk zeeg|g1JbB>U5}BRRp#0mdFYpAc(XF^c8o4!?)>?To5t~NLCG&{odC$!2FbX|9$ubE z_-GjE!|XrpaH25FF#YwcDpUetJ){W1^Vd8NW|(!=O>8D?<%@5hRTGKvIFsdqG+8u! zCCU+ZVd`ukHE9&=IA412OE_!it&Q#Ljy?xQHnS-6cus|Z)Ig_{{Bp0&cNT-n8e%UQ za#7Nj&rg-UcZ>U+*WQqrp=UyMp6A2pp+fF6z*e&Roqt>Dg}}Kk-F!ZsYnH%L>0xFX z<^He8kEtXAk(Fhz9`ziTJ6QQ<2f{N-o)c5<@tUvkU&ch81={pgi6z0O#74|>>H+v2 zP&R#oqG*qxd0lQ~7|0c@p(dt&n9Oq2{~xJJ_e;|J3+HWaUX!p3<$fURv5d%Wytbp2 zZgjY_#3SH;x5jtB^G{!SOAP`+1FyVgclji_o<=6@14Wp>0WKcA2uVaD`xEWmBw0i^ zngLCJE1)i7xrm|FANmmoiGqg5vH{lrp9^;i zpkALnSncSy%D-TH?SrnGwKvqP#Vc2qi@i3?3R(qoc1UYo``9*XdD)kcf+(8f8Wm;s z^&hQtk8P8}D$PZHXf&qY{-P(>9*-EFH^2-ne??Em@+HAOK+oLt{#UD^QUP;Q#`8;KY(H&6GeYx7?lQB zf4J+HLQ)|djid|xB|lw*a`Mgts1tv5`rffvq z@6FT{u^mzYg*GQxf>*+E*{(ss{+9v_tM93;k*mOb9X4Uy=bQGoAzmC_T475)AE(N? z<}5dA42o8E)<49A?q9jF!r>U?Ife`Hvqd#K3Cn5Q$tj|q|6=GtFO%(PA8+${29{<*efM4+p#7RQcu>}85LD}$u9|li=Alv^xVyZbd0o(PUm(a)b3t~3R z;L=cwXTi3mimxK}*DFdH!@nU4KZ6kS4^mz){#ePMcm@x=Q&t4;*MA+N?H6zOL=g}l z%lpRnCT?&~B@U)Z#h~Mrg>`Z}`R zDgih&V^cLGi%0^w35Aa|ZgDmN9qQD$PUteVM~w_HQ)B1i=O!%B8)_=$;zc@h8**?? z?!$)K;<~kC3`Z^d7{w=Np{g*oW4X1o-zL$Tem5zIMD#;F?j$XN5o)b+JM_R*1$S2% z_3ofoq}9xGP}QaKpRlL%Op_qL*TI4wK)mnc5FEP!%4LAbqdD~j;EgDi#L=k}&iDEl zJo<}Vd4?R62yE3#T#;hZXf5E%12id(k=e_pbwK67A&K%LuGH>Gkj_&3kr0aEHU8Rx#WTu@dXN79`p zF}I!ec*c$wz|8w6TMtOq9VY-0RkjM3qJ9rW4$f!!JEBsoYA=LaTb!3%*ta!)woi!2 z`$XY7l+Cx#zFA{-{CYxUT^5!Qh;l?r7ZmMzWy})Sh#QP;&v94YfBXKP+8JbBx?D zxx(Lq#ed~O8n@4%p;m16n8<%h6~NC3fT*>A(ijOvnevXOiR`V~LOv3?Hl_8+61t8`0WfQXeu7vxBqbIdaz>PC`5>W(Z;f`0Nz?dE&2}x=yp{u z;)@%JmW*`l{zWAgzfxzF;@L(U--+8usvXj9nuc~@ok{ayB@#uyn}=U%{3o{u0ITJR z?Hd!Sao$9_nx)-@^GhPzDwWFM{(h7NCY%gl{j5v4PY+l0ruA9_j9=Vwn_EvBCZx%Z zm%HVe0eqtq@6Nf}h0kK;N_Oz0Ti%01o@mGDk&$tam*}MYvk#r_H1P$#v%hZ=ayd;K z>Uj9j)zZm>X%ao{w#ixsHJ zJQdhm4L^m zlE?}V(P4kS+h6vjk(8g(&xfT?h!6ce5)X3B6t-U&*Fk3HfzH!<4>Qg2*bTK~S5-Zb z#~f>hC~0z}=I;OiP6~#eAeOB=dMv6Sqs_DCjwU^m)&* zyla0LveumU{XMz8$U_#pK4=kF2=o{&$(ps|x$4aHz)$8=yP(Mo^zI(6daf(bWxFNf z2O0r-LQcrGPef2BplB}1)&q(raidxqELs(8>u9TkY zIG(CBLvIG5$AiK{V2pgbH=K_5K9Jca9DaUsCCr_)3crxNig*!tOt>a_4DIKF)_T~q zC6oik1+T|+A0n`*?1aQ9`f!f{uT5nNa6;=AyRZ+?OpoVFS_*<0|J~5p-@vY+x^1sD0z3q%4S<5ce zO0OaRidh?hCn_wT+Aj5|>-vTvSgHFtq2yem>)FrfXAEHMThH(QMim9$on2Ym-43m^ zuFRtT=KJ<$g0}cs7iN5EEXFUqUu#9x^q4*)l>wE;2IOcXBlrVwMBzBH`JipI3FL5c z-)f=XqWx{dQ_k8|Q6N6yU2wy@urieFw}wbfo?IxaviMvfOw$B$DC(SuMzJoQ#GwRLmBg`L$|Fm-WmNf&J#rL!Q= z4CxjNw;;r6J^dNqKmJZ+b1%Q#nUe zQ0enaeXaZsu2Ql>EpnlobLXX>eXB*tc9Dr2uufy_nQUekgo(|rBoDkjwD4s};X1ZF zaT@m%_63!-h6~0bGd>(sV;(lrSKLB|J`>p!TVgUfiLlcD1Si)2Ku8Ien1xOM7#fbu zb}iOj*q6{_#!Uy!w^2mHdVE4Iy+!zxDj4{0$C3Yee>|JYhth1_F@}v zcBDXce~uWJ2H+|CXDbxN46>DSb=K&8wfii%SQ|YIS#>G4{^fminRnUO6XhXM`l;6` z9q7tjXgkc3bq31AZFl&Ipw#utZZ1wwnQA)=61;hD3nZiQh)%<=$NAW@)D#J@!lN+p zt6gl6&^}1reNl`Vm+&SMlqPjg9uI!~MGnb3b?knyXamUSZlBnG`Efg}Xg|2>BBZcL z%@x`)DsrDP5-zgbxBpxpt1InEc&=Gp$-t;BOct--BEmGtiC{lF?E|B#TN8ZKH0mwz zT}me3*`#Wz#ni6pic$jU|1^vAw@f1b$joZZhs((KNtap(YnSj?{X_ZK^#F_z?&U3E zAO@9QJ;sP_uejegOU9h6ehso7GjCSz{eG`*28_t)Wx_V56BY4u1W+xfg=!zwMg4V& zM8HJo3bt=OxcHvGOKs~heYf|fl?-{7rG3YIop&e#7 zmEvY?cfoAl<$8SCL%UkDwY!K+yo0#*iIdu_Y?m$SDi;WfDR#4E2{jY44#YVAM+CTp z$egI$oKwo>5k~B_R|mYY(QL?In82?kQ(_nj6<*YT>WOo{zh75+G!%Zgd9Tyul7h|0%Ii(R`9Yk(Xnw9-P0#ymQLWv6 z`R^rlHtj-~kuM9DY5no|19(@*t@Bp2bScJjfJ$N!=<0>2_( z`K>1}mwb;W3tw|cymTz{-Lic<0yeNPuMY+4!ao}2iB5sktj$EqwxN%Fj@7JDYKt{f z=~*_5`!UaCOUW^@U8!O=Wnxeei~-4?hXef!BAgG{JNx@-i=r&dVDu;rWpyuf6c?&N zVF_KL4{zvSxA;IX@p5l`k^1PCV^n%=gym1v4sKW&m{Wd_)cV}GG;AP!kCs$Zh zM9`739qK^Ft^3YI1*;^_j4B@G3`zVeHHt~C$I+BfUHLr6uTxDtmloYnM_AW2d;+-W z8M=p{pq||tgANB+I@_P+n0bcr?uv^MHQ(!os7Oh>3JlMZb)S_v7CXhjAxbSN_osb>4*9j&S7C>>$YwptmZza%S?PRc}|$N z2p~{cG870j74jjf_6(_c^{oux*RNAZW!>uMhjUWNp`Urpk(Rs~Nqrton(&|cQ}|r* z1yVu=^4{r~7>^4Jp!oiPDE~u8I;C9mZ^Lf8Cs|s;w_7v+7Q?HD8*x5|AxRtGB6itv zwDV~=*XHjB3?{JTQiEUaGJaDX74^4J60sWY`noubTKK?dge0bvz6<=C$^4z_v3ZSN zv&n`BwT%0tIZb`He}7JLbTgMJbd5FKJE%Bo<^b|7Pq6K&yi;zrov#}FWra3c z?}XH`&8pw1jDH(i=h!r5F*k}kS=+*>CF~=rlB|~gJY1k$O)P~##0R%>yRbVlWYEDu z*m0TogL899rLA0HOpDs+Ax!F|94|m+*!x&9I;EV(`+kimTOw6DvdlnDjW&+Pmmt|1RCDe*&TGet*$bqgo3DnivwgS`4XUGxtjc1*KL^ z8kvA=MIxZ%1IH@RTI~Xm0c<$Q9UKNpQ+{>B_<9Yz|Mf(hRu3u7^`Cx$X+;l-{_5C% zNE@p^5Ib-6eR6WqvFByfJ`?$sH+B~d8aOl`VdvwaO6x3M1bsZLKkbpI1{VGNgOlv> z_j<$K9MsR%g)C+=KtptKbnMtIi4zq0lf1;B-t6y@#ubWY)54(O^5c*2KSKk^RdCh; z4-NV;ybs|c(qimS7+ubzDoFAe$Kjn`BB^-uC zCYM-GWyFX<*cF|3!TTTGndDop?nKQ%E3W54(*?VlF0)h8<(8pR0o!n43~{-Fiw+Vy zMm~R=Tqyfx@Z820x>I=|A3#TqhpG`RfvfHAEWxiU-(1bnOIS~0T7v#yl5Q>6_GlWC zcycg*KUt}VnxW#Xap7EL{`L$Tl&fQYC4@>~_K+)b{Gc@j-rr`~A=Ccws8>8%@cmiS z?;~HBC@sbJQAgU(9gkR3Ufs_YKYe*EZPVM!65MQ?weM^?M7`}6RFL`I=S>inu0A48 z)pQTOM0FIpzDR2KV3k__&aBI!GR(2bi6*wr`uJ&D(KlQA%DqA`b{rECvFIujsnLTQ z78QD`ZlX>98Ap6c^-$|&<}hzkTBVmq9!y^G14*R{=X13OY~|)dS*gI1#fUEbNDri4 z^h#i7`;@@oVjO3KZzl@5EzPogwp>pMA6#~jLQmh(+0R2v_rc)e7{04%CH50WLZiQ_aR2cu3Pd!EU}C- z9H$g{&%6etO3PG`>k(ak>FAgUA=c=O{0+rXV0-0|S0IkjzP9ORKs#*28dk}ZCxtk6 zLfXfqs!l6w+Lo{`5SK2u>4s;Ngjg^{*mW&TZD4rrr?_~^zM}f9Mx6Hs@6669>PQPe zef}l?S}on}fx_wh%pmgZ8G3nG6fPkycSDCA7B!G4P_%U1pva-?aeCv=gt<-w%`_%k zgvW|Ck3mCy{<|tWx|<3DvJ>f#f<_*&d$1~Ev9>DvX*doZk$(CUY*_ap zf!6_MAJTU9MBa+=Z$WE&`{n+X4nq(e&@CjWW^Mq$xkRD9d=^u`18?gaX~exRf!AWZ z`jenwO(G(qH(HAdf)kmt|3e59CJIl9iD5l5u=!j`KVY3?&AV!I@~{3lWLUnW!B*mf z8kQI2FT-?OUPIW;WSdK!<( zY@@Wuy8pht^mvEL`QyGgJS;ADuGQa$KxY#A;f$RKxt{yf-jZTNF|_c(Q9$QO}se8 zFJ0GVhqYmZq4DBY%ofHSl*aSm4Ts+<)pXJvFn452VjlM@B}-O|iG?C`NW;*}QW4(h z^*ub!-FNmSut=TSXl465STd4CM@2umFeT+j{KrzQx$tZ#0OwbomXaXzUCBw(h$1Pp z)#)r}xiTe1rCOa6R*go&4jkxxQD0yb%eB}yi%$!xlhYExDHw5cx#Rp& z0z!-@L538(-KeINvF~SlI9<$c(vQ#Y?+^1Eqht1%@Sel#Lo$x^ z?9aqg=Mt4`K-cQ$&0saaT3Al^E4%wlO=TbnZcwy#Dm%EQWAZn0n#M^x=YMY_D2A(~ zFF$HUsga$kGT@-?Dq$Qgp6B;;*DoRmtTL7PVE$;~%dXD*E@n_kdhy~*@Qn_nPaIM1 ze99z&2CbxLwJx`LOJBI$rKk0zbqBW?NJCNeyba_v*oA2oGN@y~cpgYb(lYQkli=3( z?KnfmgF}0_fyD)xed8WW1r?#gs^84}&&8S)QfI#AnU&q^XncI=7g%$EE1`5=!o^)4 z{Kw^b*7X%C(QLPmhYFVhX?pGwheev)o z_Z1^2?rSk(*!Q@1>b{qv@>g~H!{w#ftOoV1PvG}Ty%|Y%gs%gzUShhkg<-KD7us?f zk0%XXuCHc~fvDiz$Uo;6xdm32X(T#X9y{;rsn+;^T&E;FzjT^hnr_y7*d7+%$Nxe+ zE_>wNCG$LS)@JUGzFDa(y*Xai@;qV20HwZgzR<-&Tu|owR$P{=U6>$1cm!?WGnypb z_$95f%yX75G=_&6SqzUM#}u#3{=SivFc^XFOg`W6V`wSGCq)rQVW74{tyY&vn+WB< z1s5u`#0M6{9Z|>O(9i?~s^ClnQzJ3IvjT0K!k9<&t`F@m1hJTtle{D1nX3D=3^VaS z$Ku2lu=72DPmkFc{48Pmug>JBKv0_#(B?i#h;7vB!j>uQ_c&~xW>B&=`swT7LHlIn zdpcPVEW#r|V0`y2ox?p9=;YCU!~LPjy)avNd`x%+K9FQ;8u0@;YiF(qsJk0;?`+$5 zlh3#p>KhPur&GxSGFAX>SfDgDVWbOBTj^SeY8*4ADHM`=xih;({+i7?azl|m`&HS|mbnB$qh;OxyKIt}b?8;V#o2>pe?WDyy< z{&!mXF;@8tdp>9SQ^CM<|k`#e_&_Jle@ z4u;|)a=G9`G7sWfe9M~u`?C>oFCoK}Rm|0Lh06IdXQ5(L|9Yh$ET{g+A>h=!fnzv^>{xrPIiLB2t=8Ds&^mOv z^b|-?%4^pOx;>u<1!=5_P5H6U`TQDiL5s}1r60p?rL2?* zTEdL6kY>V-^{=QhG#_6%d3!mfSr`q!Pf0YPZJ5rrj|97t-;!H@00DsdN?f*=No*GBtL}yAa5La8rxVa#+Tr|?KVGUU z6aiorTwhK>mA}9h=ncJq+$N_v<^k(qB~DFak*lw>91!JPq^`(e7%8hf0E%BpfXAM# zi3%JCV^Yf=#Y+8&hIY-AAK-k|;@TM{)5o*YRo4Xoeaalhw~YY`p*u4s;aODvHHx0U z>lZvukw4$Of`vw|$?p~O$eSc<0>GzaDr4IMWy;?YX5EamY4l2F-7>9m_}J#z5lht> znIBurFa*Vr^u9i3wNd6>$;9b|< z-cBsrpK%VsjGc+U1<$S3689@W)EIO@pxy-M2wCA%VdOX7jpwN>?(7fhx-sPEL|r8k zSXldRB5#q<<$XgCYZm0Jm~bQcJ~gK^YL(asy6m(b-7=1{ix+BwuJ=#utAm4^NKC${ z-9$F=mL#~)d%OOeNk12dRC|d~NqF~)eMzhrc&;9y@%*%ufP5_F=Z^yhSJW^}o|)kY z3MItsqQEq^`ae#KrcIgtEs*T;Kj_N^hw{+VKFM`^6l{<2i!|H2JY4Yt0E5hV-Nk3h zT607k^Bm^iNR|7a&qLiX^CG=RXFK`~@KElvhP*)fyOaWl+eJc9C-*;;RY41rw!^G- zxoKMDVox_(yy7+_&ZxLuZ~>ftJJ!lWXtvw^;WG`^YDl@r=_%;es5 zX`JO$oAoepEKQbu$#$842>&x%=QpZ5D@ujX2h=&DsmPE`Xt8wNrNu(g@iJGfE7pE8 zYKPfM!ETW)(_H_kHy-J8{gSrre`OO$qSfo;w?D42eo=dC_+HuP5|EsAYs-p>7}hks z^S(y7$6*0G^+51C)+RGn9eY%&96#p=P`VI8Y|3`&-~3VW(_m7`1^|A2>8bENQ2tmK z^XN5@y^$qX=CnR8 zy#WitM~Mog()xS)qp`&3jm+cBZ$m9e{G0v_oe;pK?AhnAu%H4M!_(J&e&J1f9;4@o zy2LBMv4FrOcgj!IRZ)|}r3E}!~`0QI8flz8NjTl@gwdivR`H3Ip zpVWX-N4TaWD?)EDqJUrAsJ)OP0B1M-P3HU3MLZNgf>{@H7tD>Qvxd++1Ylg;<%?3b zH;bXbOA0vo&fw0D*h2jVLaBsispgw=9#>3e@Nsa&@w3QyQDvpd{_ba(x>tmkM@1>F zB-v>Hy4>hlqK6DFu)2~$AE;u36Y0F>)JI(Ir zJ^$0P0Jq_+vb5gAkyNFJI+Yx@{D7JQu0E|gXMfApb^gu6w=#{SnzOz{FnY+9M8*bX zq#3>u#;?!j?>tU&Go?l$(Luw8Xb~^(_O_(li85tfC7(E2iH^HFz$eUgX8uj8X@Fi& z>~zPQ$o#8#Bus5dd7sP9C<@lRA(2YJ-SL}?iTC>0u3rJ&C~5th1BzUq1;V36?x=3( znnrQ=O2C*^%X7oW`jhWXlEt2=!-xC(6g3+V&4#A|?YT1+ z4rTYgp{Lr~66x*ZOOSV=czB-dx@Xjh2jPbF@9xOetwb(S;6?-fx{Lz%F?r+#ri@D! zLm@A?=CiH+-Lm;?NPag_d2yZCEk&VixM`5PWJY4;7TOS&wQT07pi?R7+77$-z>V~Xa@Jvc99JUG{i0X`(7KU zTp)Il$5D=C{Mo0faC$260x(^_y&K*UdKxXn|0| znYtd+Cf~e4TOwy0e$t6X-?e5~OKJ|TyV;ULm*?!n2gwd^kfBIL1}SB*&f8j{DJ)o; zC;PBJy$@MmI^}oxoQ;0Yv9hfh1{XNJiky)C_9kEAjrq-%5Yo%6nlP`sgjmhz7s^Ny zmN!=;gT_0iz7X2lQm7FvW^ZSfDt`jtM}2P13Znsi1VM9v7PZVKt-Iv;vg~2ss$QtqE@;&qDz3RN>XBud%Oq7L7c%D9limU6rJVlQfo05f&vgA-J||4%RqpbL407qZ_i zvXnBNOm;7DTP`!R_k2s;%SZ}+93Dr|VlylCP_>m{o5Jbcuo@^es1Y4Fas8|)drVA| zmy1P2=Af24PJSqh+8VWSJ(RV-UE`7>O9YYZf-&sA0xz4YD1eRS#xux=)ga%?W-aGEy6(#|U6d~MugFyy`Va>uE5!-Ra( z%R$bjm?bgawnbhZufc%7IqiwiF+4h1<*Y_^CwSaOk!^Dc42OG|=k{so4Z2AFC(U>$ z9HiPGc%i)yRCn6&(nudp!9Einq(0sYCvzS~v!@cc2!EpJE=GUJl@c-W$aXH$0Qv6{ zoSL}!1_&3hroXt};`FQ<{a8nwmTen?>Y{qg<(@iLDa}1GF$41t4IP72WePhwrJv(y z6r4sR5J-z;p{>)M4e|3jZelo?a9ae_jTwf+Iw8cnA=iZoLW=NcgUYyhHkP%CM%n!t zn~wop$p?VBe$uR(m~q}QPn^wXjx=li+BmI#8}6j`(ne#$*)9QTDO6 z5SVUU-6xlfJSgn5Odws*`FjAnvisDRLvc~gQF9p zNFjSX)?CLvwFs=xUVfjm35ZNbFh*%$ng0HKyc$x_bUICVC`s3n@3Bt2D2il8mwy33#isE9@xOF`eJ z{qO1SlpWqqw4FjA5buq(vcHdOz^Hv`;D~dbHOI+Z?BR$y{(fLo(7(eMpwX3ma0Art zc6;RAjI`)P0{*k(ypAY;$7+k#dgB*C-3H6&zX`q+v%&`Fleo2NA3#2ec2B8T^<IE7o5iHbt_6!U2(0;$lQ zQ<{Ub2tOh>c29aJwLy=8J8dvfJ>jRnJY3b!&0pi%*09wX*tyW+ z(M#+xXp@{YvKH`B<)WRv0rJ7zQvP=TpwGqByF1XY`>lv-5E@pAF9(fK8VQp`xh#}s ziRTg(W`02(r3UVM{wnG-q8YGw$n>`4POFk^&j%YaUkUZ%2VBN-SM^1G4RUT~CIy#) zP|JNnGL}*>IX{#>9#$6tAomz}wjX2UUxZkVb7ROWLB;~2uALjyc4(tTP{U*HHR)C6 zFDXXGACi)6+I}KI6J;VcopzF)GhghecSNZjF*V6NmMcbcvloV$MrjRW7yF&96JP)F z71pJ=5$fX8AkDSH3?&e&a_2RA=;j13e|?(EYs1Itq;N!0&8?X+DKigIrt@%^`p#0qdi#ljm+Iqn|FdqGoyJ#IvQ;&mR&F4AcDkdl?SPU~1OTvv|K5K#m(3Q~>^rXja;$N}6``id2 z3|nM4C#7bzwwEKG+oQ6{{>9N_2re|_@ygvWCzr#?0wu^nPx8ObfYXsMTHHhCyqtwg zm849&a62hMOL)g#9EQAmf1eET#+zgMY08=ce+$4-7b3-dlKq(M-5gTjT%}D9+bZI! zJKGJL((?rx`X^@WCE}PALBm#K7+ZHV9kAFK<%!o*q~m303`P8~a(cgUf@U6Y$^x3w zBf=*N;)XxR{s^nltE0Q0hx7&7O5?}CN77bRPB9Ym#_a|XESDy~7XuIpWd`#vbP zA@wtFsqQeKs@L^v&4L{>Wu0U)vC>e`@V~p73sOIEbXv4t}==Zq>sNF>+8dFu?B<|2)iCO7Aj#3up=JB(~R%+blAd;~@2JAHXGWBNu{j?P{sAtvic?_9Z zEl}6FW6&y`u8!-El$L-W5q~@rrKv3gdNjH$aF!Yz#G2(%914@g^Uf7ArDpldxUNs- ziNmP8^T85+9ZP(qRHt)A-<0RvKFAFfDYe{xAop&KJarpf)WK9y z733sBqrT=KLMv70Y)3CNr=^!|djR;!?o|l!=n?SyRIYT@l-n!lvQRsnZfyQmHxwn_ zu=xN1TB3Qq`8SLYZT$dVBow~MY}D4=yH}7ki43;><5bRj!!R4@)1rLm8m;ZO_n_$G zdsPkea2Hl?Rh%num;l^Rj!r?_mnu2Yya2m7hB_zWjMIr6VwyH=#&JC|o9s1`eY&85 zD{EkG3;*)}A1tabJJGEt`z_!TmQt+=mD72d+2*ZH2A4L`cIs*jfUS|Y3l~g8J^*! zTqGM#9rkeE(+8nlO$~y9r%>xdt)2sX4Y6@4ac{fkuWSTfZ4XhWb|Mf~o!gatA`niQ zQmi8oFzYRWk1Rs`mO{LyLAE1iX_f%7-CLlL>*z-NNGF(v)NM}OMe&bGEng$$1((nM zX*|K)F>SQ}M~l4?xFnUN3vlOKP3MKl;k{jXGTTkAr7RZpdK+~#ql z%C%N@6@80lLfIJ7iaeR@7!173CeJtc|_jV^h{v$tKhhlu-6#Vd|hj{jebunYtG5F*YnIJ0ABDZYOb({>jm9beu!l0xlP+*gaJ z_>Sj(RQ?y=FQd@-e4#3)=UifN!8@fd`v`5@zuzA--WaZ?BwqN@nEIs9OG$ogE0dmXtpHXE7#tR663-s}^5 zr;%SBA1A~~MK5`B2+y5wj;3cy)yU(w^XQs~hk;ih0#gcFl+!a#5CUxZPJN|e@ZeBSTD8b{HTS~k2)79eSgy_F;La*{2Iu8hNM)O;1q{wm0axiau zasOIu;TGxuIdHqfr!N9-Hi=Mb%@L7$4(yilB!?ViXjp7G#c>%r`K$6Tua7P+AgKK9NVWL~w`>`05g zeq(Q_zSi*m)A0hI8kwITFzz<4@pYBHc=+gF1ZQmV+WP_<4^?l*V4I!vB_XC|Ic~3g zp2Xz*5KCy30MCvf!`Ma=rP?n+HV)GeB}x!=bOP0fB83y*y1R!-l1xmF@zxs5APY>{ z6zgX1;|Lkb^)OFSnJ(HskrK8Cg^u1YG*=xnr`1>n6iAkMj-f^UNMX zVa>jAar&;`Wbylveu$eWG`M{%aQ2i zwnU|v5^hxPmXc4C?D?aH7Cm=Ub7R=OyzkxV*!QMl@Hjp5emgzhnAr0#qZOWsWb7AC zQ{tQc#_IQKPHa=pPT?rR`N2G8*{r*ctFmllZQ`e=i(}~;_|2(ZEj84e&yBi{!$hg* z^zF80e?4oc@kyrjJ&)9t=w6H%?z-!>O`g4rVIg^eJ%$grWz0Qntm(-X>0`SWLWzps zO4C=rV0u5kG=Jf}cLWRHj=rCmJKFeiRa-|4b1Y8LL2!=V-L^9j5=2=ZkHL2H+wL|h zG7?1K*4h2a zm(bDIfB!l?!3?Hq<&QA~OrDHkTlh?wI3s+Jz{LXAhdb!-ibyQUB#&u3?2o&uN@L|F zXDrJ^@WXX~7cw+z*i){=DwDO)9T}t$cv7Re7sf?}7m}MMq+|?yQ0~HqfcsHM|IGE!3esw}_N`3MZk~Y5LEKKHvG3)p-J!K04 zii+s{tb`u&n17AJ00u*UEYp#>w@uVsh)Pm8_l2B~cZYUt7L{I!Cw%p_{DfaU!=0Zcxgs4R`hGSws>D>uqVyI_D#YRj`Nmck&oo*M+O$dwO zdE8gYA6G;!6YLzK>EmVm=>T)UUZK!;`2?vT+2!P{XY{FtEzO+&<(t;P5h~L0R3YAY z7dv~JJ((#@)3mJ?+Z!%VRavtRPap8bgyEQsoZv2B+eS6ORx^7-bgmM7cck&wC&1)B z5}r8+pGx<$oNB(gvCW8ECR?!;=N(+l@-QKFf}Ty}hXrYW7$-ePusx9b&E3@S+C<{P zWEND+mTj%6Q^O{p86Qea@k!;+4*ld}8Hj;Nsb$Nsm)LSCV}QGWTZ~lxY7jN5<5Lf% zmH&JPaJO6KjF(ZJzP{(1OgS6-t)F{`#`= zv3jYazr%-S>Qr(~t~mm#3twCk3eAN9nxYKirPZfgw^1GG5BskL7EorkornkwmhVNZ zvN`PJV^hAO_VI(DcywnIL$Ci2Hu zV*KJPA9;fymqH8Jb9Vm76qM4fF`<6(dkKKpKJbhPPYzCaq1@=pcC)*PKrm3?MBG^F zQdvwZI&Zg!6BlsfOl{v>({xwPGNt5UJ;xN2zLsvg7!1SlU#>SHiKoJLlzH)fJap&% zm%W5;mEMywZPm~cK4jzf`HVxuVYtQ22~JEScTN-;B&*V0B;=4M$&KCzG+*x4jot7> z-IEn@uzqK!{#X7V>AMfSNgoCD(nWI{WlNpbJ#^g!v9`T01RBEUf~~1pWNRv`<_Te> zKWjat*Dy`n`n+M!qb3i^;|M{e1a_q*0a&aWP>ALkuCoX`Pa8HlKh-9{&|prlNqRs% zv0vX~6dV0Qt5g&i;B}VG#EMjw>c@8;hukGA^K1Wfp^6&c*u8FV9_;8mRTSrsacu~J zbnafz`7o-nE7|Ht<4iH5qWQ|oFruLIGpQj%2S=c)bPv#OrLjGd>IoA8=Vl3|ddO0{ zf3%d7tVu0x{@+82DMkaJA1)vG%8xel(Ej0qS0kioAjFE+_385C_dU*)`2hXLpMytw z7yfOEXyI3wd9Jqy_d2e_`BZJ@(%kXtLmWL767NkuxL3rI8&cqtuS+`$R<*Gh=~gPz z;Eoy#cZ3fZz35y05WQkeUD5Bryd$%I%ndZlK3 zh*`Kt+ARivwz6-cfsglP!kxx$m}Bpqx*1IaZ`0B*I%ic~d!Z5XB`r@H%fSz)H4`D> z$X16ro(q4>u@(Rk6*U5>d#%H2Rdy%j&wKD0|K{j;8gsGR;iT)md8E>r&^cv63MY&7LXJa(^_SbNbqozN;^+?eD*_~U>WWReoO0=l;-3mXJlZzT?oz_E!Ik&>i zMUzMzL1t0$Hw$ZdIlrQ;U;nc@5q5=Lq?RXI@4oDsCE`=>kXR426AEc}iDJ+CHc2Et zdl%vM3b_G)HfO%cATPU&oC{f10P|4Stzd-l!~E&_b8i-n$uC1nmS-!xT&ITyT-ezf zui21aQnKmQ-a*3rpMEaZuby|5k5>ouQ=aIpVK}cZ6t>`YHen1ODEgpS>{l}XjO1pT zAQtl@Bt)j`d3!IHr~G@izVdZ{hJmTdUP$&d|BCH?f1hq2=P7e7z8Jdo<1f=iajUUi zpv;ruXqt+-0{Ik54zU_ao$#NMsw)!4v=p*uT9aa*=7Q}yN=8R-2YKN_-v1~Q5Vfw7f-Ky5S^B~cRwq__4UqLt)!`M$&B#Y z!3>OxB8gRuIwWRiLkR2#qYSVI_$?`dd4Y_ywOsWWPW;H}vPinQ8U4nfc<&A#z8H>3 z#@JItm%eurHgT4H#$VRUi0#%RKAZo#5xEh$u)ehnt9Sm`SYh$ZrxrLI;v!r6zt$Ys zh`BMg46oNTAAW}FCb(QZVdG;A$Efc#UOz1TiqB1W9qtF_zI*@Ye11Z1T=SO1@%lqK zYG|QLp=^wL-bPok!E8xXTnGY=qU)o73CaC_`3+!!R*Pl3GkCFgY@Mr19Z81GrfoUX zE%3D2qU63zNTk}0)lvOT{IvdEc8Sh=@Sb$7&7PZvR%H6mwsukwZ zB4hylO_9ebpFX0=h!4HZyI!uNE-$Aney7|xUz^m_-olvzDTL#-^1{9OYFdCt+Zsp6 zEqgw&1D?zgwsmKAEgn}xM|knpn<;RW<&eFOVS!g#F- z0vV@iBo|z{m@uKk#Da2l9r(&Bp`zzfJ7K0t-SFpE|Dk>f`KU+hOrqx%3YAE}I2(uf z>%QX4^OF?-vkz*ct3KAgqCPqjTcvWVdc|!`ys*s-uw~c!ubk~U%wNlD+}1z)SDd=< zyuP+;HT4Y|GqvdR?R1~V#FV{_9a*&dYuH36%D;n{5l~*q%96UGKd~Ky4cY}C>y&Gi zI?nM(q$O!&JxUvEDjA*d6qE^!7kNC=i4jmC`9sEj=5XWZHmqZlf53IqGx4z+EYPXY z>vL~-;TUqw%vQoVuP<=d9p5#r*&-=L9%iDAkqF9Au6Yp9{B%-ivA_GL_k zErbL*H0S0N;qNU$98CIndfaX5M#%nxr&;y22Zel{6qM!Kn;c+~{CZDg0y5E0asK=J z2z^@eB$0oF;!yP*SyDH(vdGodkfMm-a+)jf>3rYlZ(!NAoLzh7xH4QKL5SQ zQEa2ZFrqV5N7)3U%p2zVOKy|+HQ9o)zMbl`zh)idGXQvz(-@h3&^nHJj zo#zk>0JBkoZ>TRHv$cxcB&<>ECh)PGEtSY5-Pd>=yN$XRb&v6FNdD7-*pa{(sU}Z^ zipqoxqwDZz5=QecO^R%B(tJU96Iv3bxs~R9Ng?1Vz*Jx_qoKV0yXj=~!FireEMI^3 z(u+K(iKInRz*@CqM~M0#dKw6;=sW;eL^H)DvW$fmn0{BP#Q2C0nb?q(cZaRsA?mLL zePCp6g?xT$16;%Co2x3^>Q8pEa+lew+j`zEXQYfTjLY z{-0%nG%M6#l}M;UCO$bR5d4x|@9PLln-rQ?w5%654Nqo`BNiWC?uQ{_Nn``+qbNZF zNV|F|n>1H8K0T_CnkiT>tiX3S0$!mp8n2xw1zp;X;rXgYS_lk;&oo6!3Vr4wix7W{ zxVL9!!63puR{%{d6*{r)Ipl`sxXCcXOJT5Nlyv8@LQy%O8_FP85+Yrkkc{fJWR3vX zY_`7Y=PyAxiY8=l(t!_^j}-<_$7OcMX~v!m2IP`v+`p(Re?AK0hRJkeR3c|85P)Fr zl%=eAe=JBP!yqF2sy`A^RHSl~WMmD7sH0z6i99Rh`RT9;4vlZf>cC0Z+anewh-0o1 zuQh>A?_(}6^D_)Fq>Er7nxF>EZW`kMPp3U#SD6Z{0?-WJzJ2@SQXYww`hgW=>tGDf zVNF-TocFv0W&vbYC&~p&0vKMRV58eCizt`0JU*zJ0fSO#NQe!SP8ouR_CZ#dee+=+ z(~n>@9u_T4%YLP243D=u8Ugv-5VG3OloFDf{ySFL!q3&Ko;<(@fg+ucs8w{dozGv0 zH!0J4M{${^+O_kp9Rux#rCqqyR&es>ec4!_pEm98Yl`*Ub)T)?<8BkJl$o1!?>8pL zguIwrMr_Q{Ni=MCi!QGS2?y3FJv&>@FcxyUuXk1t&R1##yfjINg1%bh_NO4QYQEFA zACe}J;l*z@!Po@#Me`zPBKGg6diR9I`Sc#Afu<0s9nBHV&RVlSgmcGZ2ne|cAY5EI zM;-mlwbGB+H<8~C{LyYiKm|~>nB-qY%e?PF2qNB>W`Z}rq?q#?KS-I)Qk!Yu@;_Od zy&H9ab^j@cWE7*(e;sUstm3S|@%~S8a$|F7TCRzj#(Vs_82=p+-5Ntyw}T()%MFL4 z4xq!yENU4U8CM{t{fl;d+DR(;L%;2tlNNwKQhlCG+vK8VNyS zBHpK^!$i8Tk0S(ZXAZz2MHnT{i4r4Hz597P8TA6*TK}r$=X<_)ltx;UT!s{ONuSx! zePnx?tO{?7-k=GH3wfQh0B#5^Mt)~arv=S+Qd=z&z{`K;6Ua_wMS#Bbcb*sImN+c+ ztp46+7gvB|txwP{N{zE!S7`8yk_fs>Y*ez+{kcFz!<8?;k~OhDAl&6Qo5JUa*_FGiV(; zd0Jn@^{W|sz{p?R{oOFN5T$pv+G0xHHct#QpYT~mr_wV@r57*avm@6LOGGBtB)x2E z2qCo80DcAZ&S@7UnAuuE0L#l#0kgy%&M`qUPfqbX4r-H!fQ90t8dkX-;0JX>7pGn` zO-OGF7c#ndQf$BdU-B?F zKx}k0lw08qiw#AJka1|rm+fJa=8Q+vKAs@e1d9&gzhgdGl$?Cs*+`?+g}W0W5dji`k{xpL8`fa=Ynky-zxxWzZc^wWmgE{_DGq9)XO>n zsVxDT*NT%@~9|e-+4lh&Y=hmF!=CSR~b& zkz{n#E<{-bHvQkK-(O~DC5UvkZnv(32`3Npyt;X8mMC;XDod>>zh)?$uW+i2;v2M@ z6#xwI(t5p`u13`3g=*GRAkbQA8IF!~!9=;gRdYiitmUsmp_PDJt_xKGP8=54=V!#U z!fKV%V>NsE7ND$)u{aw$CajCk-59&k_7;393kn2ggrhcm~S-_qK zp|EGAv-RD&=dzBf*sgmyP^hf=9Om?M_(~-R!dEKp19>|scQ1Eh%BO~vTJ3;`3XAQ$ zT?Les;_lsmG`msI01;tAGNU7`tpx2ZddM5K1Tn71)U1pG+;LVOq)L}FCjK=4#flr! zpoe36+l|h)GuK>E`S=t{bxN4Ws#ihe|E5UaL!j_M)^6vxDZ<}TF^z>~GAhVL;JChy z1bh5Iu*$^wnuyE5%KT0GHX|2j#+R;J)m8)ZE4{bytMn=r37d|>;lc=4S%`=lfl9=} zgzRqS=a(-#Q~=l8w1~hjk|TKpAcNc{E?#u~HT5}M_u8LPVoS5HZdVM^+b5Aom>wS0FZR?E8EL=ED_{RoT;EFc>>P8?XAp9IAU`K5Tb5 zQ4Dd)0RkO)10oN&(cmQ-i6@RcCI{L6E7u^$YT|uVWKqc3W=mC4 zF6dOgAhWlsn*=?1q@4&zbYH-Z0BjzV#^$$+cw-;DxjolC^xBR~vKw;>uc};jsO~Mc zppRF*$gAu&tGoO|5m{t799@sBqOVXN1?|KnSUCfKB{Pp`Lhj!TQ+T2_#$)50^sx8YF(aSpWXa!`paXa2dG`j%!Po4r0x==Gcqqg07D!KWojAz>&#)w-jNOjc zb$hlO5Q_nqc!~HcKgw@nfOpCtQysR3d&<8RXIqri22~vR$A7m$L6KyPa)*#h{5{|A z*7S6o2#>|J$@k_anU0L;!OckGV}sd7El5^|f_*T=Wj_u^kIe21Dsl0b4Zphuc_ybW zNP6er4L_iNGy9|#aU;x8k2edzOvZuAs=&F}{T_0dhpD}vl4=wG+RdQ^E6a$#BNudzyZnF9G|#f z6l(%06t`{S&=$gNC!oYPAoP;8@gu=wHDcHmeUK;fxxsXe9rKEa221-0Fk_+#`n0)@ z_90A!Qj;eQ#}iMScbwskm2MW>Hk4?RoBIQP`nXyAUb+7F+j3w?*p!qT1aQn`)O);r zYf$H(<$k`VVAyC=5IZgSoW#&zJ=L{$T$+~-SkmZ1)*{q<7J;yum1*sfn#bPC&Z|?N zN(zia*ypswynqRJNS%2p?}Qoj04M1MR3-F3yl6fYik!$uYveBmYJ6IS?pO>i^Nn(0 zA%+K~gAbEdwdb0P+_GcG*$rSXHw7Wi<+V@u%uPm-GJfoI-5b@$nN{<{YN z#A#KljmgGW;>aEdWX}2i!I4u6^r-Xok0CISApB>AVgw=+6mmY4*r|*$9X(UpiYw<& zkxvTG{#SVx*BQ*fFnF-~bcw`yTr6(hd>_ z2zUw!0@9kjW$V28=BoEC>3};|x3U;P4hK#3-FL0h1b9S2RFgArstZ7KH`cuB$UIvs zn>5@H1m+;5$ytQlk_>Q9iBNa^-nokcqv+!ek9_NmjgXVSJzYdMcs*NRX&MetFm0XX zVDtWR2~Oi>#;I1$$VG7Sw>V|*>>4H>V1fVvV5WSh6zy)lOGVeg4sB8K`;{3@MQKtS zyT$Ue{SNz)AIujf&lWHKKW-7KzVh3%v3DL(X_u2AmU4cBWgw`139L5-{x}?S*{;C~ z=SIRByl=8& zXV^r!{dP|%*XI$4ab_EQa@`1lJJDJAOLwt-ssIEpB+14TjiCwu&d)_B!ZcbO8N^3H zX)w|FpxqDH0qVVjPoY;!2V7^;DQP!LRI*RVL*rNs>ZQAAyw+SIXX+B5mAqfjhX)6v zu|3wwQLDVEYQ`TV4Mk(SA5NIJi5g!KRns_>F zEil*Lj_g#urOgB|jU83Oh=^!tn$(CH#CY~I#|g;H@b^+mRCC%j^PHx%|K8bijVDhn z<>Y7oUoU`BU5|nH&Apvpt`M-!0~0kYzy! zE=xvxka_V zF$!z^$+tQRByG_EYS$piWM4@%u}A0I@3P^e>YhgkQ$~wlKm^q}xkRcjufBI%!9i+l zZF^X4fWPdVJdo`ZY58z-yz(1}`^F_w2I9yvh>X=luB5qy0UsjZl+Bl=>RpEzy;WqL z`t{+;VY-MuNM%3$!MDr}I~>dD?sUGYA{O-xGgBm`Eg@PYsc)Gm%~ASAROF97fDBJl zY(`|mrz^T@Pu}&*Tl8{)n@N!~NqWlX6Znnne00#g=+ZKeNB0hLiq z%#{E0{w-i}#{`5ZMfy?;kls1zXN-7|0*OTgAE$Dw;I;CFPs$3zWcz?3vjQHji;)Mm zRhA>Sl`M2tL~2L2@wb}mH%@NLQ4H4e`HBqp-%A=m;2!%^2jC^>p@O_R&9NQPT50j{ zIwm8KHMBwa-&RWj6D?d$4pN2IWW|A#@Vi^|?WbfqICVE2;W}^VMUceXNVp1aH0^m# zsNma@D8e3}61(vKgnwUaw49$0OUSnW3kUJBDc3nFCS|Vy)mDG1C?Q^btBa&>FTWh~ zo`DsefdlgTpezxa{}}>l!ye6>B_Y5+Z#7l`8}R#&lbHpvJ>?E=J*2+f1MM;6ofH7h zfnR-=8T{ww14#_NHsv&6NqM+W>&tvZC>vQ~bI-3*=l5KBifE5yfIuRNpk`@A_-V`}XdD+SzJ?9U^7)7HC945b-kVH<)l9q9P&z1!aRs zoED=-C}RJ7J{TZO(@7@Z6H;ge6Q-y!kPwJC3ISG8q>iHXpBKQC zcM;Ka?`OHm81jME2TSRH$Co7rOy|^r-CnsFZvHhTShI4}hu@5%QVafJOD7+*V2l{c z+!uR7|8Rv)K>DT1})skV`6F>NkBK zHH*-GpXCipqN(00hz>Q!VGXp%x3=i;krN&bfS#1tx!Dhic}Ou zKyCJxg0l+_LuYezsf<{hmU%e($+=EY3%>lc)lS?`fU29n8U`cp?k1kC3c?PZozS~;`}g!#6fH<=+Vj_&-CaPeibD zCBKzQzlW6A`7)4B1W-l=Xnpz!wnuxeOBC>@{rwMSjDg}`uF$DSCn75UeVAM@(CGMI ziB*IN?K|0E>ph4H3r7F=d10%6z5_|!FfugP0(HQH!Bs2l7d%|$uBrAR_X>Pu8K^(G zk;FRXqSx(yjsyT;RR@9#81<@+vfx#R`+>&+Xfq;8KP}Z3I8xa}j6_gaH~z1XTSj#L zYF9m}!|I|~9U98ib1g+9LZBUF3|j(~MT9L7)~c)s4U7UgK+pxS$sU*sQDF^+q~r3- zg{&!K=nM-2kGIzc~j6cVIdhJsUs9AsD;DSawl`e|CUHj4jkWz!_T2ff@Uusz>%^o7d28_0ALa$qr% zFi!w!R?wU0RaT+sThJq^>C>41 zT%5d%CnCm_lhcNYjrgl4Fd6)-Wl>Vk0vxOY4AGy{vTbDy@`5!vdAw_(-%Qv>OucW19pev$fP}gfQF%|?^(8c-r6dzvaB7?}R z^IOY^kYMb^ykD2o(u6-E74=Sroxs6LSRBX0o4dxw^uAL6Xt@vn&sK>;l*$PkRFxgs zajmj@FtP9VDJ?LVxHp*?-LYw2cLpdGQEjY>{(Dr}Fu_KSh(kDff;tFt1_Bm3?bN-3 zF<*#JcCF07TcPN79n^t$!amR8_?D-inv?0`i9?0@oW{P;f?-Zng|299r|ZKPjjdC?RPCjiYq;}YvS=1vy(NrA?UYeo9FHF$5V z9!7YZ%Y|O=Jb#q{eymKR&~pDIOfM{E7m(}i-0uYA;bCk7L7?85xQaPK9$_7QuDjo# z|DMs~gMzvZ{|{Mj9hODZ^^FRGAl==Fbcl49BHf+RjdX*QNOyO4mvl)>cPQP`-Dlr^ z&vV}Iyyq`2F1TZ6&+J+2R|_@M=PH@Vc{f{j%G9SdVfZPKJc_n5xtqdm+vnyp!rf_< zw$I~bv_g*KyW8q-3JDZ(qvtc0dOyg|Hatbz3x=tW_C76xm_CNE5CIXW zJ0K=SS$=n#(?mTl+GoQ1b~cMi^@f$t_pBG-INd~ar3HG!@J`{lS398;;ZX-Xj@zkS zmYoX!`rba?f8~t5;@EEqP1+j5G=$$6>?WJx_4QMsR~rCr4{;=2nBE85fQNChd(ECL z`T=u3pdpH$%#*+bu&6FU69GLaFlkVI(}?iyt5_gZ<;#YSKiY1iUExR{Sj0}kXzVIo z^h0T65;n8aei~0b+@5bijHIHc>$_mKLn4HKsR2Vjhg8&bs2~Xwb!HPHdLN0wfK?1L zIWU)ZZMdk}=>SZm$e`)@hD<*^U4i7wMtQQE8@Vr4@$`5<3?TGkLem0`fa>=Ub;3pO zJ5n5l?w4;;ynh;0;={ov7X&HBMsPP{e+fat8T-yA^{zg|2jnNdYDJ7S23`ZrCD8kD zZ~S_{*?xZ%+k4`N#I-7P`*%uCTk6c%5CO^52Os{ds(1zpj+kal;JVBcaW1IAr!&24zCmU0NCWA-`C5z z8JQqSWdO8DTLY94adxCW3}F3i^T*&Gj}>Muk9DJBsT{UPeM6r~l3^CC7we*-?op$9 zyj^}cCwI?VqkI*GFP7Hr-wNkK6YIH19_rJ>e-iH7W)jqoi8!CtvQ0tYxNe*z_oBZH@eMG!rp&?4v z27lr)B)HFl`Bpkg*#DKp{ngkWLgiNsW=0;dFhn0aY97cdky}wZ-dd-(3g)T}i;|?y{va0rI zy*D)fbM8mYfwMV1QcCJfK53>OtI)Rw-LBdl6@TCQ2x{2e+^^(&Tp!s#u2dwF%%iUN z5!wcQT_e)b zp0@lih+99*3iVA$<^%9XQN`(B$I+BFf8)H7cYG9mqbUldoOU;iaBX(_CS=aO>~z<+ zShWjYzD;xK`vbi3h~bs1_sT#Pd6+EJNG~TxDtcFtQru^T4d!e zCUCon$-O;qxYmnD4Yt#mSbd`^ADw3BXxsLuj{*eO(N>MWzmy4@ms{^fe|A63$Bp-< z*-dVC+$*u&7J%_;JZw;DT2@I#Z*}h>oYd-R?(096|3Y=NChf_^G;7947hISSh4w0F zTS0Z=W-iiaz1EO`&5%2`Sz0;yPJFpEl-{7SLLka5BN5pu#dqp*$kru)rwC;9l{g z$MmvqU54eS-x_>=RdK8FrNyBFPv;P{4H0#gbFzfh5)QvmXYq~DrZLEIn`qKm!QszU zv>g=h{BAhp7?L8~=vN(VYL(#|h)H9P+sP7G@U5X|ZSkM;ab*&9CG7VWZMru4<%{zzq6 z9?zeAB7=?TUCB(Mo;NuFs?PYgPinMN*X75nb3ISD5(#cY#^%fpc0Y(jcBjWQxwpU zq`Q6&n%qcg{bE~fO8v5DoiDFTKWF**eM#&3Lp(fyx)nb#=WN_mE#(2?V_L!B`LiMV z%3X@{NC5(BJ)g^XqRac-x(`zYGSh)PHp~4r2TZ)f8Q-%mCqtPBO0$kgdY6YMZTILu(~uaNL;LhY2^W zBfo*mNNstzsSEyDR7%}{+w{+1zz-is)==^1`Sa&ZSS`O-aA#>T@x!U8ikrS$ZKDv$ zLU3-;jVmLCuH6XS91N?SPCt;sRVrY*fwW#r9sN-=HAsa1Y%O1zj?o_w+bRpg1-m42 zSb(XpQJ+uQ`2wX$xoCq{QTKH9i(4g0B7al!_g6$Ye6OY0d0t;%{Q!;l#b>*5VK~d% z#1q%5GW4rkv&p21BE?uzPr7!(gMpnM@%z%hL~@hP#}hgbE>;I`6rK)g#kCLLbZC@? zZT_nQJ)Qt$Cbx?JI!u7=o5v~jPu{4zT77$6$4TV<0}utd9N%r;xAbLPThY7A!&JNu zXXt=W*_FRq416xIyY%(j-YUawR4`-|AzNeQSrC0(m#+jKJqrwV|G?5bq=FUA_E?L5 z8dotG$trWWIlJ-ktWbX!On3GZ13cz0+T7PytL*T>dp@;HigH0x=qLmY z-EWt#M_XV)?c!e#MjU+xi6KHCBdP@6$j$(&Mi~y8qql@do;^d06?*z#!UpVpPxPz2q$P@R5wxF*y8pblAC?b}1OQGaGOHPeM+}G3D5p`h1>} zS@yA%{9G)~Hd;jlx_VBv+O3iSx&I<&J{GK?S-D0ioK8gr?PrhKn0E5r^kX=x>QiQo zX0gKX2`^=Z5GnXumK3AgY=IL=?I0YQ;K3a<47O~Q1&3CF&6kavv)KY)K0*Faob%w8 zlsnR<;aw^=2FF=f2T_q0lLJHa_Vb@K3LPFCu8%AXRd#N1c!wouYHU36`D3paX<8#s zl23c$%+DhIc{VIIFy=hJSkU*Jy(N4@*gv6(554!#9qs-G4_7qHgj^_JrBUp0&;619vuLx!il57AB2Lrl~dEMYblo_{%v& z1PJe58_3eaX%Q3N*fT7vSAFjJ7EBA}tt_m6xcJ1Fa*VPRl~LS!>Ja1{^LFxXi}wq3 z1e6#xi+0m!ivaIn@ml844QJB>jC#WRe+XcfPX8$D)zqmXG(9CEd0CCVd0ZS;&wVu2 zg8H(sVQx_op3}gF$TiHG7v51!`l@e*XObeiA%CptWP@CJOsO$?>#_F1y9Gn6Qf`BSx7v}T1*L?NO&^JXDi z%KNdgv7{C{ZG&)<###2O`{+T^y@I#3l|$_Qc=IBiC1hsS$UrCze{Yxfr<>+KLR2K* z8VDf{_jEcr`SUN33`>t<(Ud;(Tqec~B$mOH)}$qQi+OyAH`@BtL&`5QuwdAg zbi`WqVIfJ%=Jc14tI&LnL4q`G?*FqH%7vIyMt&!m=Qh~#Y?dw-m5jKPgbwzt;c4VA zDevw*nJ_L^_Y1S%q+jo$|4FPbC{AeS`5}NtEQ<>6^I4T{lx-Cg{aI=uTmHlKY5f+BFkoP5~;)8=tS5G7$LHDfLxGdJM-c?&Yrw!RWV#b9qcJB zP3;>=9KNpUUELF3<^tX#h~ohLla(TRoMy(U{8~?-gxr&6{Ia@^`tRszwj`X_z1$~I zpI6lo8pXSrp`+1?ocMkk!MI=bZ}NIDdU7Ox7wk})iNX5Xms@lL?HNdMY_l{iUR!11K zK-+Y2lVAulPA6*MMd^CvXWGsuUwrpxo((ItGtN#k9l3|zAMo|la0$m?L*cs&lZ=%s zvtW9%aopnu1ILsha?3qH{os@A`A+}9gT|(aij8G#V!1iDUzXjTSCcONBG|`R%;v&F!nZ%+aP;ZVCuPw}hm^%?SX3*0dfamQS z03gJIL0dZWHj8!U#+$u@sFR9Jr(BoQN`W_4=D`Zs0DzEvBw48Y!+1E=c&Z8q-MVAf zo23_ojw}2ep!C%oYd3(#h=P#fV@YV=XOW`QPJ1J50oCojmolp|-%_r3V6)r0*#FF@gogH5$>I%}!Z5M^nG$_(Fl8{fci#9`i77^WmH$!cJ)>3YP>SeL7(1R( zlAn6CS8=p|dn_$Cqpa4L!Z}}06nc%6em}Tfo>XjV-mJQ@Pw=QDW2D+bXI|)2f4B2I zO=!tM()#sd7#=x9x=Bk$XjSD45w^cw)dkD(mJAa5dIn0kVQ?vsLkN*HyFPq-FjY(A z4`ke%HN}q(NS_Kat_C0ygeEYaYzrVKwX858ck_{BBm9Mc)BbcB($;pQ&qFSe)3m}z z7eL<*0p8SxZ&t&Un0qrybQ1(j#sZ;(E~ItmUjI(XDvTw74BOEF9>ar><{YqtiD0^! z6vv9E13s7|7e>U*s^lkcVfGB(Cjkf^{s|}CK*pAI6I3P;=TB9bgDFpmfYzT=&-nqm zsawcZr=i#M5yq$+$k%N4K=}0aKkZt&%OCTb>@s1Xm=7BK3iT2uJ0*Ri>8;;2_p?V< zS$WG*a{&ybyk`Pzey&aw$|^=F9n+@{SM7u4k6*3PEi9^IsbA0CoQ8~RwA_}(&0b4K zFh~_93%(^Oe8RzcovHhw0CDnGbx=xJOD25Y{KR(n=H8aXjtmW6nE?8|<2$$+>ggex zN#PBkXE5CAZ1#Qa)l5K9G7W7&vPaN3_hy5e8S!yNpX;EkRvP}>Dj4MX4ypyv>D?>- zr~+nwH*(%*n5BX?%UrcF%i!M7@KBeo(r3AhIRQ|3@{5SAyy7pQ(+F*RJR4>n1`v+) zp!rIBTUm7MaN<9|Zi<*7IY>v#E@MLK8sIVWCKScfgFaL3Q#n53cPbLo3rO z$emJUBp+E7ugNVh1n+UwRQO1~DVxEkA>aqb zYwYjBKI{U(uIijR(^C&BU`Rl*S?|YaGCaH`((22C(?+wI3pMc_@3OepGi{j<)O>xK^V05OPt|%Qg4ZGu z*s148A40E1+int<5$y=zVW8vvzqZp!pX#T?!^I)Xp3YEa_n{Um%8B&dTFmg#l&Yp^ zvcskD*?TXocFl3R)hQGR)l;H$e?1i2%uWiR$H@; z?-T%C2-EA|ImcaKgbqPa+)jIOx4 z*xkFl#V?<$8jGT4m)W4`;wiLaDY^&8S5RA3OO%_bpD(8565qc{Cx;{UjGCAf!8kbZ zxz%@n}qQ40phFzdFPQkgmuf_Y$7#5hs_E!bhbOn*<(&wCU>f(Ejn~Pn`hhQb8 z1jt8>z4AuShbo0qN<8V&cgJW)>nHIjgK(`70gi@AvT+%Lc^o!00z{$1IMVgVP!p!@ zcPD*31eX?`sA)lKi7JR3a60*=2bnX@WDp<~+*-P#_|iKfd#+%z7))2Dr4^mfaNIlB zT*qbigL3=l$7?P2G71_glByh6b@#ped;oGM&-^?OJ~()>78~n5CG$>b> zQYDl=yDsO?uwj{vX1dI_BpHJPYdjmfy28^T_=EFX$0L&+I7_}EqWT4NSciz-L^m#u z6~@ETm+lT}Q3oY=mY?SjvbRXyQ(#$hTI_w}QFB$WHHu!Qh&G{~EvUP1y@HtJ)Ec z0+y^O7VJ=SWs)BszqEko!bnr5xebFt#7r@pbf&=Xv?B$MD)_Z56dcc6y(nluj9KzB zg&UZrKr`WPvI=%IGiw-EcJTEkvbWgGQLoHMaOl+8 zm6PorsKNi;>-Uv3#|)pzg#m1`MpX)%(*#%i2G$50DLAq2r>6S#RPFN~1mgCpdKB_- zu%@Am4J<3K18h~hDajaeSXSpcr{BVuq23njY<_kVJ{8jFn)BaNhCd2rsQF&5+Pkl& zu4lH9pQu|CU)ow;VBi{mcFx09Z#by0i7C5dS_o?i>@Z#>8Y8 zrib87^8BzP2!z_qBM;JMVU}ynUo3)99m|e->l0rU*jIz}=HK!+UpPqfNBRQE9-yRu`=-Qj_EBkaAg4l&v9V~YA>@v*8a z1paX=-2{@Xwuefe_zZpz1id*vE}zlRx%GSk>bJdNL@mm3!voQ&B1+vW(SydObEy6e zozGI~D5ts0!yWL;%Mxou-#anL(cl5|oK-A<2;c78kV8}e@S>Qa8I+w`cW z5$*^DFa2G~!5kx@pCIvF_3}xm%J4ye;;9qK^Q-HQlMsf|Vcgr+=YYgDQ7Er;7qM(K zDFi>4CMYQ(FKF)fu<{YhE4j;8L)nJz^WBKd+9>89{cE|E8mjzUJJ&L>yX6z$)dkfh z@d9ZhO&xC7aRrZu{m?kCelft$kkES zw2uhCh8(y}LsF-Zxh^aaGO{esXV%gb0_U`UfGEJCE)S$jD3+M{{*QcTA>t0V;Mf-< z!FoXZ3VFfn-;U=yA87KcxBOQ1%y~6g_0;rfZ0~P&5@PzTC)W2EymVcmUiT060l1iJ z(&v~GrkCe(+v`D&O8GJfLvWWF#H>GO-|mz~wr$2e|EMQuKz(L^O4Y!0haq(lqvLX4uOmpGy6iTW-oW$e8Cxyx z@%p#itFkmzOyA@3=CzqQo_^OE71e0|>v<(rU8i^?>VvR`OnjXXG`HwemjfxfZG_3i zTL-_6dX*Q>-kY|#B>&j}?ig0|GQ4lulk+t{#xs3Z$xrD6Qm&c1>|>2$6v%Gu|NejJofqr6uMSE*9x!6OY9eB6_(nUthg+20cDcB%8t+s^>=>DLS>o>_ukQ5DXCtkLv=vZjby zC}C$DSSiGDGYaqd8Ato{9K$b(H-Y9ip4oAKbh7y!hA*-10y!>Kx9e``Ff@rl0$y%YY)ZjwLTrp)rerX({TiJOkt%kUFi#~>HU?@0~bryB-*@JboJT!$mIOe za=XiJXNVGisuts}*7p|)2!F!S!T*q}tFc71kq?f3>eT2=>(*oD7_wjIIw`(#o3mOU|n3h?1f-2p>UB9F_a* z>R8}e#Xv#GqT}=Xa?;`SB8Tw3N!hZ;O546;Vgmm%$rv)c&YCz zbV@U6*1X9Y?(=d^4~Sm;^`!d@+SiZ0!nnZALWkh{@PvvzbGmB<%6)L}bb1DU9aR%q zFQ-SO7UqlnJjKQkV9YpBaF(Z3>%4ic-eeI)hY5$PN+DsLKlF#`VfE9|OiA9&A2KFJ z-t>M6*-F-fexOOuKgfFSDT+{fphapIHb&-KE3hK8T~R` z-cVzVC%$h}QQ3MH!qD+>c2#K9x9FuPpalD>cN?tg8PkIMX&Gt9+DF30x9V1JochyU zh7cC}NO%q{C`Vij`g`t;5mGbM+jWs{on=zKP|9_c`&Srk$(l}86|Gw1srWKwCVn0A zrrba0TsdLB;LQPx`EQf>KR;kGeN!yboqHFzZ=+xg)8EER^aUz1(hM{vf0cga2Hylz zh6g_>&;^nj6)RI|lt{#D6U@lX&B9Rhuz7%iz*DeF2PA_Ie_BsRhB{6I;2 zRs5`I^zGen<>rykpK>WQT^i2a<~>BG31X`~xn{PFOdXL~xlbY5wQ&g^Dc-qQCUds< zDT$Un=KN!$h{@-?&vz{0t^ePnYXHqH=}av`D$-(p$w|nAg%4~oH*cfoD1!p$?EAz> z>rB*mZ>2Udy`uPiJjUq{=*#<2xdFu)eN+#{%M}Ib<&P22e)iQ*?oI4V^=;n6*R@;r zm3W?kp6vUUwKGWZCX4|7w_E6PEa zAgV$7?SbMq(Qkb8_dx069O4DMkk($e*MGd^<_{eMvF}P_^Y4D?3)`dp@Rm6jKe0Sx zgWVK9R{5*^KE2`MPN{uHQcT0}4Z@+T;rYAg=_RgL;I_;sKdEuP$rBd@fh&Y>sRcNc zWj!7+xmBiCR2Kuqe89d2S!8b(m=dQC7+QyP2TaZ0ic@eS&;Cj*p`zY_TB+G!d^=00n1 zS|bUwDscH6e-fROn>^R@yp@2N(w#ZM$#DgTCy&tWEb-sD4n%P#8_~N#)NXq^; z3s!9*yPDKjvu5~}Kit#33A)_LNCEo!w;ttvYy=39mxC>h8h)OxW&c@zxUN(hUY5f3 zEdljA4Jn--8jS7zyeE`NS~Va$NnuL&DA0sMVbTszBa;mg!D!*0U3}|*M9un1twiL0 zAar|V?U)GduZBUDVNiq#T=+*a z6_oO(9dMbC8u74NxEDk$sWhL$4gXcGPc0^ds#&f;VQq%iuiv(<)zMq{^=iC;IWhcv z{=)Sgl4x_M@eY_ze4I^&KDq;{whUP;*V-un4QcLrMm}pP@S~X_K&eg`thijI?2lIIE z@!Dx#Rs>1u{iIqMhWLSdxsNAuKk6^8nGv4@Jc%Inf*+b$p$E}S(YFcRctj3VtI80# zjeHX+@sX*4GBTAiJwMhQV;X7|%|ec336qw^CCA!X5GJh2@!L@;eaKz%ER$ZIAA|+e z{B~X%+)lNvP%?%yb&BNKr^ycc0>*tE6x65FOHaF1lI zqa|f;BQMo!dkBbT+L1Uov(AP88hMOlvklfDm=ntN>5b$zd*jseU+7WW>vE>SwI3q> zx?XmOWTb=M0!M8@j}k$`uL*&mwQ#%uGK?#IrBoRw`mi;0cG-zI5u7%ApE!D1 z#MVWsJoNlH+`Zx(@t;Bff1hw(Ht1tq7z)9dewcDP^uenm;whD-B7>p$JJ;`hhwQBJ zqQe)@R=$;b0d^aG3*BT8UrYMNxiHsS0H)(}UN}Se_%(oX;D^M`=YjIacq5#%;V-Y# zaXtv+Wr~Ybj9PD1LF??;0B#RXDPo)!c(4zRd&0u)k&|KvKGxgu|VT(MgeWSpYWdwmL@8k2y`M6T8cNCeNpam6T1IpXDJ%l~Q&;msa z2qM@|>eC8Am20|Q5zd>`(om47PYk^z>}I`gZyzsJt7Pyf&QGXm%cu$a1Or_j_JUPu z?~hseFNzdIm_;_|GNx2o(kF{?mmt_trA52wvd_~;oPAQd|ZenDY zG+W|S$M{R-g}X>YQvJ z7dq2l8viV#NYTl?CB7uee<=lz@QJ@UXgdTl_oz8?5>4y!q<&5}77z=wXWX8EQP4<< zKa*!jLC7baO38^l6UyR-uWFooYTm(By8rj|7)BFvtCUp=4G|G!#u7QZ`BZ!{G+`)- zWte;6Pybqi*6ue2dm(~&w$A+<6P4C#_xGQQ>2o?Unj&$=0Ddwb?i~eGQR;B4jL;==#P|S4f6KD}XSe8)j@=Zaoq?0Vfy>x)sxL;mRUC zaoDs>gvJaiF!?DV*nM@#w;yZMi#lr{@%FV_?Su5!Zmg67(_8}?_DoBnA?Df}w?}p!hiJI#h@u3kaxQ+khzn~fwFP`w@U zXx)NLE-P=ZAQWN~`p_^TeQzNMY~j7DcR>zmZmMKnE$t=dX#R#7@<%L$FUZpo%ID*C zi48JI!dMl*3-xBZfnjc!2(}p=5RVWO~uEgpeFZuW zzZ}?}rJ=V9*o#-@D#h$Oi8u7teQ$0Ev8<7OT0$q=F#91l1_O+AF@XDyZ1^d1WF zTZ^^~#uCE#e-#%z%pwZ(;943mwv0M;tr%7))d&Bb#)R#qMD@ouZ;w^ic%81ncM;X- z`J=&iUCE`tTjZdt<9o5cmS3rGAsm(<^}7qn-EFvs_vQ5`=O3{kwS&JR2E#sAHBCavBshzN zDpkm}8i*v|PJOK}$LH~ySN$|dkl%@b7+uh&t8y|{C9zvD#c(L3k!J6s{?KN6bUL(Nl^t?qC}PSS@Bv3dEJ4?OPyMI?+GlFl{@;KD9x3 z=T=X4aet?hPzcAVn1!XDarHyU!fR|&SI_YJiEP+yZR4|fWIfAGVdlygrw6PpX^knr zq27)!!h0D1aD43VFG9rkK3ald0dWw@u5rbiewewVm6j6Q)qG;>Kc}uK?1cws^{7n& z64ovG-?zQp0s5n8?&LJlq$3jAh}pGS!EvKo*r5zt)>8@jh<3ixxvR%`zWKB^DP+vV zpYr5`Z#-5gQNTGI68*~0UFEp>oB#}FOW%P@+6Ls=WWsy6{5m)Roxp3!@mBcr7`3Lk zo?}+vjdTMKN+ZDW+8?)Fo6Hv5FPR}X-=e3^{-`jc1gA_B4vNFSyuvuKD34LSRJui^ zTSmk-#;IJLYK2H6q0b&F{sXPKdFCIb0lGRhEArgJyd-OSTiEA@5J4Bw0LWG82(#E| z%X}+%wYM5rvl$~b1g0d?^#q}E(hsz@j)F6x4G{J{oZ1JVQ{Sv;Iif3lN;{(qm3SWv z%*ZTFK-UN1#S30!K*&~5Q*+kFb5m!MRwtjAhTwo}CJRD%RVZ$VW7EMAe65(XQIMqm z2+J-~8JZshYmWarM_wmSt@lH)mxbENj;UGh&HV`4KeCYx`qKJo zuG&yMnaw3>F6wm`eOZ-AuXNWP&=+q4vt6Rz(^(?+^->k)1E5Z+pYP^6@&MI1 z`wTobzaQ-Ajtp{oAVoz1x_&6?4FB?QP8O58vNEweKU&~xb?(UknsEMDPXrbHQ42Q> za@j4kCi@(2Nu;(|lLk!7HF*T>H^ zfsv++9T%O`m4MyQp4Id5S^(}!NMkniX@BDuNErr!$B7=Se}+}lR>F1%M94e@V1<;K z77zup3D#wFl)nqB7DyK~IR3aAf-F>G8K{$*Z|l5AKSaT@P%r5FBX6Sm-1yU4kGpFL zKAlExfw-&UheKPR;S`~F(3BOS?hwrb@(virJH)YMWjOzJ0m5Bx^ha`r0ZN>=^cmos z13*A;sR1gx@;~6Y_XgNsTxJ`ny@7pc7&KBwh10P9z$&Q@jOtY2ER<_I&8XB&d?Xwk z^S*j7+~m6gE^kIq`!z}@v2>*dLuyNC-&g4RLpJRG+)%^*h_`9fdeCY(1E2P!8Nc`` z#}TtO``o7-i8xqe9iz+1{r4M>uF>*7X zrkE_$^TGUxZeQ)L|Js};D+w}4)*dJ*2O#2$6ME&6snO@>$Op#J-OZ9|En} zMdDY3z_C71@EAzGR!RasU;+A6R^t#_B#?lTJtI1J4|1fKfO>#4@MR997l=dpbD-ya zn|~xEIIQ|sv>1;?|LZa!1RLn^L;68LZ29kpHE=KB#FYWiSsr38u1y14cISe5;3571 zD{r&fG^gtU>8Y6f;ZQmBQlIZTo`*THL4rM43IegLSD`NrY)#zc6|rFS6b3mL3IhEk z6_T*yim|Zrc@jhofyP`et?!oDeP%RSp?$sFz>#RI{az`egB$1YSIZ z_}rZ$rial$c60*YF>#qV#LPo-=7=N)cK(9FbG@+`iDNG+!4*5I%llm>DTe9*PoQ@| z)}@@q=>B210pq*)$f)amYb?A}O0D+d1?7f>n6UEr$9G*RHGKq@^NHSRMWBU}yBGao zGE)wEBOM5ZpK4VjK`a1n^|o}S9o&waK!_dZ>av|Y1C69i zRKL;379M9pj^sQ0t|0@M!+&}N1O+fx;U^=<^xk59DJ>g_WlhpRsiH#z`$|XBnFxW@ z@bkmiGagA=&-rOTDQ76cPH9DR2}n2-cr!xh%j*2=UB8i+Nq#k8jZ$U?zh9hyg+GuE z)Wl~qi<0quIR+#=-Ju6RSA!Mpx?fYz*KwDLG35V+(gi8M$L%d^O~5>)iJnm8w)OYe z>)-7`hY8h3sbj(EWyg%d3=1nXS8;Pm9Jn=*9Y2xp~S;4!BNUIN( zeB=q~Kqxu{WFQ*ipG;{D3h3;-NUC5yyXi}+>uQMXtwlD-|Gc|A*im74+>Dho*=cjX z9Gc|1&edkeu2QdhMo=P#r@_pZqS{&F0!o`ru%BWxkkFUcDYg&bV3NIjCh3oN@bvTy z!QJ^k*t{7$R9_aseaSmS@|D35XEaRb`dwc*Qnc-`92m4TN9;$0^8+0pW9|a7U1~Sn zD9msY2`svtFKB#BX6(hSLPNog>ko(pIJR%=v&GfE=wm!|%|uI8E!lZ@uXs0cK}sSm zq18+D^t=+C@{4^1#+`>9j$5sJCRU5izO((4y_)|04VhcBeV$wU(L`qk80in)B3)QT zih9w&DD?~G0y3C9F2Xd!tq%)oKjD=pnJoXMGUkIFiFi?=4C>`6ACQ@}{g4e?flYvG z-#2_si!3cDa?;27awTX|NKoo!h|1I5JRvMB&<`Y_C(!k2ilqn<1QutnCZv5E8Wf+% zdS>VirFvb8-p-8-TVV8=f35lSw|SV4sI{2)ze)<3yz@uC)FDL`P7lL0+?!j!ZlSfi z16#6|!-f{5XA}SI(MH!x%k6XS%dPv$ zLaCLA+~GTfh~t-2E;oyj@lb+@4`@ku{*};E(Mg(t%us;i^gIvu+B*hPn%58NFn@ry z!QFS0o{zV}5PwiE-r_rTc$FASLQG}H6OSckzeRz{nszu1!J}uhY;|8Wss|>QyZr4P_X&Km-~nTh_l5fTmc<>P_F>ZLTfm++ zbTamBxM9hPo_;@V-be7l)=m7NisNBl6Ab)XGAj)>^?C@!K?^XvOM+(KmE)jdNOx6& zEfHgrVlT1Cm)1_Jl3!u<7yLRR^AIZdDGyR~O`^}cF1((smx61$6`V4@shi1jHmNVG zpJrGv+B>cJ64@@JOG2;lF8q6>hWe`A_m4t-DPLcVDl-(wW{a&x$%Jj(Du@#sw_}2N z**Rq(x&)uJcKrZfDIbOJM5rsJYtCEM&0rsA3;8q;{XsTqC;*c^;ut*ZY91$a!6y2P zZtYhZ?#@jCi%Kwd3p@m1wXBJCM}fa&IFKSPV?7aGr5?u_UE>&#Y^B2kt%Y_t@=+DE1o18y=fEcCp|mYLI%=RB~oTHf}ydwNJp>` z3_sxKZ{D4SGFzAy<2TSmjG=r-e8?4#oJj22YRCynwZEnNiQ&)Aet3yEvtsSmSwE{S z#nirO@SfPVW-xMVW==OAPz`fL&pgh%X7rAK8=t+eRQ4rwpzXwo^>Nj<`WTob;EJYR z@90t2wqtg{MfruAY9 zRRN~Z8i_d}@q@&`JsKB(H}}vdbA1bJNnt}@=W0*jQ#9tN>2qBd_mz7Ig@}NIT3}Z_ zZ3R#+WYX!RMP=F515zin2dS5y=KYUslEbME5xw8@y^=Zl9O)K6HVJ2Mq*#qS?q5hm z(rUP6#IYBcu@#b9$2X*z7L@CHXseNpW5572UO9&Y@r&1P$t!^$BB%O%g5N4oJGVLM z?ssOQ#6<=n$J;u6HC*kLe#%GiLR$3{))47C@)adq8>>?H>kQ$Lc&#hSaZ(A z0I%&8gCp_xIKe2VLojsoP=JK}o^UPk|S3b&_07AF3kjO+2q6fsUv}6te z9UPdD0&npZH2sC10*}ic9b(&UiL|a&j^jalRayJ2`}Xp38vSNlm!4O)K<~@u0xVQB z0?Wo~Ry95V0K+b+y?jReV=NPR7gyM~Pc}VQ=qev%u2_yO-~J5=pMvv`Yi>EwH<5wA zu-E=wberUru2G514k_RN(LzZ|Fcu|Fi`aUcXXLN9_sydvwu{-C{5&)_y>wnhG1DcP z-R7)c92Ny_R`?)U*4iV;0#)^9)ym`O_I^#+DkhKZb?3^{Y}%ssyzO^E0{{5@0;T3C zQzt~^B#Lad@jI%`gS$H&O>JedIIbI9miPK5&yVMLJgOOUL=WZy|G=oX(6`L@WjR0f zM9Jlr&_*6o#`cx1el#vxlo}s5+}2mhT!!-Vs@EgJ*(cDOkS0hzS*JArp+N=Dqg!04 zb9S}<+C{}J!>2?03kJyk`#+64_>B%7ilK}>Nu{SwGgwgXg8P5{LlF^75|xUm%R67O z|NTnvKKeG~`X5oRr;@a!bAJD)HU7VUSOvDp3#@w`< zp3&8P%4_pHaT3Spq%wIu6LevKI1&@@Arp(7kjeCzn50WOXyrCgqXJCJPS)Jsq!4T_ zjF@*eU{%2DZh#_`fmO>tdZHN~bWOrQ>-YIvS#+({(A+d6Xq&c1dcX%6S5QC8)#=+rO?Z@ zv9-oZ-?g_fOb1_;8ksOwlQ6t(djY4y)A_S{cUi$`-G%*ZvXFRKQJ_fYs;-n)so)dH zje>sxBDF@nj-$#AN|3Jp8JQS z)CTvxO7e~|C^lyFPo8$_&-M%JSFa(G2ebkYCsr2_5C5hf&MTzf|*+{Xn3^V03Y zYf0}6zabH@eTopcGvnUP^4~9Il{*7!V*b6^a;?%+I`IsH=6K!qAswfMl=0sP9uCKK z-tm8%?IsP{)dUU`ac)lm^X}|ep60g0KyLel{Gl|63#Xtgo|ADTF4E1;jzt6=Vz)HT z4#Y{pWq3~lv0yxaF7DSTCHnuIb#T4OtbwZ3> zY)Wc&DFW|bFAP-Ou)D5$%61#jy?TBSpJCO(4tu_xV{kJSD`!|#*+k`4Mx2deR4t8x z(=8p&P2Z<|f1qvha70G3i^No?Jr%hm%1w;1f=W%pc8Ra3F;ys`-2lSTV4UQvf@N`0m}eGX;|rn z1ldBo;Sk|`&2CO8f>J4PoA5z`EcdM&tA9x`HHndU-+Ts6l~xT=NMoS^tPZB7-4|7$ zW7Pr{32Y6ZXg3C>eyQk`{0cb_pz5U840=ldu%q?|MBo7dvFuCZw6D);53h`?;A8%0 zlPn^G(Ntpk0b0S4v$YU77M7Pr^HrZ4c9BcdDUSc8X9AD~UlM`>rGaea2IQA#ql!|D zRF&;_B>)%UtpWPlVUQxh1Zkir6t6)J6~-pt6*D2%mn;nx5Tk8+NI{t}^i{Pw9(8L# ziDycu9JS&F7Ix%x{N(wz)0cQ-$30yY_zI64Rb5iq-*lIWXiXQsnz!fPDmAJrbWJ

vyUGv@=Y7*_6n71-%4YTRK^FbNI&jpMPS0&=ui<}RmMc5cx%Ncbwy5F5JzX0 zt5NY*Xi<@RDp+nY$X@Z)#tfEx;=ye(aE3O@Ip*Z|~zPuxm+qP!02gF1{+g_;Z;g zheomF(`CqAa1pjKZu*eZ8FR_z_1sttQ!}IX*b&9BbZLxM{ZSvi>Y%fw>Y!KU?2u_L z=5qpxQ>c(rhxjSkmU(>{jmg$>w3$Z0l*q*N%0ltYmmlJmoy=8(v-axSNkLq>&ih(-?6Cd!EL*fh+s%|_cY z+xMfs&+qB?{rtJlXRrIZ@B6wB*ZX>3*9FyP36KH5brzuNgR)}UG$3{RRNvJa&C!YNVgqCtZ_8cMM%QQh9?tn&wN}3~Yapc?K9)!fK5M2AK6`c|J^Y`;4cK!K zf>{da>>#_y;ls@ZcpDjyR52iBry>A$JOX^R6EljH79JBoJlIOsSJkaB0cBYYRU_6+ zrc?${erw#a$ob2Io%6m;m#?Kmb<7Iitxq&;M;$ox$!fKLaYM*%$2OJW%zZeBx!=Zo>S!)nmlKjx^XRF!h*!JKZJ!DiV_<-lN_F~2R1%wY46vifEdD(_ zl0Ut2jno7Ygq=TUDh#qj4~)W@M7{<`qS*Qq0=rjD{v58f?P-jZ*x?>N*b#^0FyL%4 zP}OtYH0G<+M9lK_@X=j4vDX56B-&dPH$>%pn@;7~qiucGbjJVKt+B?gtXaF#k(k?! zYeS2hlg6$;gmQ#Kd2dw{*vpyh;Si68mwbl!jfb_CHUgxuUjB#H7WdEDE%9GZ zRgrh8c|R2KO65xA^}nmEC9*;`>^?{cjI7{q2rpo57aK}%G*SO?=Bl-rAjtMLSRFJArbG|HL zHN`yoo3R_h({j^_9?6#h^I1B|#zrZ8ZGIY6pO+{sd@u1#rlJs%m6&u4b@bfCdrc5T z{C(0cZ$8z#6+gcR@A~updK4}ngBk`Z{ccrs@OToQFsQm$wBjS(B=@kNU+aQmW*63p zU-*j16>lJ6(1|peoz!h6`uULAsw<1=2gSRyqZ?XHqt7BSq1>9%jb1w%PQ)%CY@Ze^ z!EPf-qg|Cods)sbfvS~#Q`H)naWYuEq~f%;^l{pN?^bG6oZMob1}#^>cE8L-%1R$+ z1oZIrM#YKqu|tQf7Ve&_wyac#e#zjAJ~n&LyAd78#rigC8RWDY0YO?If!p9M_<$8k zOzKX%A0#eHw!apT@Q}FCS?@QwtOU6W+`zBhX`a4!s#f;YSjTmM1E9SkBjDekrlgxE z<|NMWp3r}ec@9!5-xB#)^3t70NiaP#xQTcCteT6NHed1-KTRF6E>Z(L z73fGHZuZ2;B9Kx-9Cci_icoEPgieVmf?eP2z<_!ZZLA1s{-WSnj#W^f;`2ZBEaBj% zi(gUiaBl0$9*CngGe8`N&TfzUFF&|&IR@6J`2wYnJ23T9hhnVnLDt zHC1~Te=YF)MVS*RgalE zrG|dWuJ%xV0uDYlAORl1UhNXsFR)lraZuG%sE;*BZD3$@GLoU}a25DMj-&ori5$XW zvb}@0VpJJNuIbD8y#&%~6yZknV8&Qup_I5-br*u&F^`+I4kG}+zUbGIP<2rBWXza&j9U6gNq-$IkZ@LlF z6Tz)To1o+z934V@;y5~cP{kq=1KDg|#e_ZwPk1M)AZF@9d<*ITPT-5+9CxIWKetQG z(=nZ=IUd=Pd;_O%{_&3s;~tRd37;uVyE^Dd87^3Y8M0lJroAH(^HSBi_g-N7`&C`V zvlddBnR7|jDXNAV-J?QwOF-4egBnqRX} zQYt3+6@`G?x6puSD#BbMRU5@Xl=8X&S$O-pDwU-(c0=6C1 zK+|H|b$iR43YlZ>NiV%kl;m#8Lj~MD=Whq<0f}s{(wYhgUZP{>BY*VaAkYU@3dXw* z`ntzEb0(hqrH?A)l*xN610^%-^fB^e0_|xsCQ#TqTiw3+QVuLu?W#^Blj@jb<)dQU z|J(*ZCEtvQZMjpgde>HmUzj=u$`i#{UhS`O&oq5^C+oQNtDBRhm;VJRfFN7BCpw!c zuP+sa`#MA*mecB5Uz`e+GBh`OC&XH?SH9H=-RnR9>XY`B8p@%s9xK)L-*=?xRm|9Y zV^j^z-oZJpV3z{QND3G0db*d)C!ru4ZDy?h@{VU^h%x?u>QOd$wKe z)Ki1N=l#U`|M%sI3?6*lbF)=XIy&f@>p%VZuLRV1g_te`Gm=e&n{6S*{|5wYHS4({ z=(<*Qitqma{J!;7$K*+#*|Od_=LL366wqnp>qrrvE$?SbO-wzME z%{wUM2hyO5aOR3&clnUPE&5_{?72 zUg@GTggytvy~{&M?tp(shjDJO@2~297^ItW)XAHqdMJ&bq&=uaYr-+u&Y-E$H7Ns* z;P48Ni+UI+XxHV85-UjxD~o=pog8R#@O;d+PKg+(+alsfs#0i)FC{57uQfP%3|IAy zRsMmIvU=Qr-W~J^%wm8WSJ@*H<68oUMkBe+2fkC_$a@!#vGadu zH?mhJ>EzKf^d$jQHZ*D@bGAxMZNvoAi4Sq0HPS2yD>GkTU#!G)Hq1b1}ag)=B+k7BD5ihju zCSSN`n`l6`Dn!hbsCBo^T}TwAd+MBir%QJ*7WW}kdc0f#Gz40l!&^PcSU8|PfKizf zEdqQN&Kl;qMuM7WC3{ncnUy}r0!pdajP z^|&?bdX!v6D%>yk1d*~5Vc3XE&@aq2qpW0#auz~^CV$O0tE!xA4bM6~TIbYY(vN}k zrv2=NK)})woc6S#Rm`AQ?x#=8E~ezKq4Gd-G4xzmQ7;q!lD&)M_vq4yB?WBN6a-OMNM*N}dVBMk5;nE?l^u%u1s3w!E7xtc9Z z&!oD9=Ru_LdTwp8?RNDv6YFu8d7vnO$i~@mDYM)`hNP~q7<;xDEk*&)P6?2~QNJa6 zY%uYYt_vh(c=FdQDGtts-D9qAgxRNm{Y}&YI!pmYPdX9>8_L z+a>9YE0f4GChO#sJR&2taK00_sUIX=$zcE9c0xJQNS2XcYQ(cZC<0XER^hmMz-+Ld z<-ww2>y9?N4#r!JO330@DKCenT6ylkrT-F}>*4gSxk)k!3HGp#o7FkV% zF)KB9r@wH^!$?5eq122GO=QLlMSPGeR}7DY0l69Lr-K@bK z`^Ne-hf()zQ@7p-GhH47an}MrxXCap!ET6iY#5#4GKpMpt0e6nbV-G4arRdF{4~VO zx))_a7B{;L>y$apR?C6qU(-x6C0bD4c7# z>3lQCb7qQ>v^G&Rx~V73_%YEf4bNhd+}w)3cvbCZFQs?6N&)|q9CW$BA?8!H6t=Wd zb!D@weTf360ILkMxk5QJl*%2hrYHzA(5(@t-vV$CSX$5D^#|{>M*>69R%UE{H|KsC zgW$Z{1L2;6-JBAHg%QC^uTpfj5!{|w!7Mv;A(0p*%!x%4t_E;l*OL7J0^-*^QgSSu z!TwUd$+`uV^Z(vSNr^0kip<`TR&+ApgkA0Kgh%bex^0PvY3OVa-x@^LtL)%Z^cjtT zjC@3@%Jilg CLOk05 literal 0 HcmV?d00001 diff --git a/docs/img/pycharm-with-pyspark2.png b/docs/img/pycharm-with-pyspark2.png new file mode 100644 index 0000000000000000000000000000000000000000..8acefc47476c90437b3a10d9f1a4e5368b2d74d8 GIT binary patch literal 84813 zcmeFZWmsIxwl0jj26qSq2yVe$f(H!}q;W!UcL>3q;1EKiA-H=(;~F3V0>K@ELu1`Y zb31G8bN4>yuKRpHzQ31#cxH7~jZvdU)vOxzzH@f0_A6z49BLdSBqV%Q6-8YnBs5MW zBorVP1|mmURvrrp31`wlK|xzpL4i@*!`05g$rcGoB{m}qQ(tfY$@|B^83lP&`QJnV z%Hqf+5%jYAul41hsIW7!pb>XAw0$5WBd&^3({b}gx8aUqZ&9khK{r-R-?Ng_*Iz}a zxAa}}Km9DScW>V<=xYauJ$mltwGc(I3T4^#(e%btf>G= z0TH*5oku*47{m(nK&Pee8zyQQ8ym}CXT$o$1RwE)DC*jc0X@DvJPVDJl!;Db-7ELh z#97s1C$sk;e*_v{fS+;d33o+fH>jYfYO@s8k32|?j1YWmWVdzsC4}pr22bNkK_Ao* zG(7kuIw6?^h>;Zbbonh*hJ*SGBH4cQ+hJGzq4)y>d29)vB_^g5zNSzy0NTx-#;4CuQJ~*`JECEf3Cz6I+9PY&sc--An-R7k zCU|wBRF#uA?dR_-*$sS5&`MYanQnhl znS|gV(NiJy$O*PU{g9|9kiN$N>vP^B%PIxb`lA}Mj(%#;yncc~6!xS58NUD*FN~@V zO;YdYmlwFT9p;oH`0xq!;V#q6E3U4sP_O zDDypF?`Mi_7+pBAFAQ>le3rr^E7 zsSZ;s(EW%SOLI1vdWe@Zg@V3xd+=seJ3 z*;ya_i2F?!7Fewk`B}WjejtV&oK=+C2%S9D&jd#paEM07AqpUWr9#6%xKp#!j zyJbUO!b?=f+5`lLTo4%Bya=d~-# zDas#&xF*tvzLd_Wy;XKFYqESDS5f?~>RtD{(gUiW^x|*S%F8+oTfAR+XC;<@ep%={PO0|cR!gbn|gqaQmA0m{?YsRx293Hn^!)o#Q%` zJPbHAr|HoAI`Qt4-6xU81iNRGivpFNbG{~QOBze>mWF*uJ1shY`c(Uz9&sPFUuRtV z(vA=y6L8YLp=f?&%Vaq z+-I=uwynlZ>iBkHVu5@9xNV8yWbTCO#QTKT{?p{{R89WMp5=P)`LEL-Tcc+h*UNhg zTcxLi=M|8E6X)L(7t-5OJp%TnjY)N@&|kMIL9cETZaZK+7tR+7F!WopJ8ziYz4blk zJrJD|tpt4*6Ax1yod??qw+}56%^AC6YR=Mgq4{XTB5U2YmyW3<%=6SITzL+;J zrarYMij3T>XF)J^rF7Nx0AKt|oHc1k{jkT-6yyPxk!Q{U+PABhKN9K^`l+^Z+;b9* zp;g;edQaon6=pwHfBFnu;cg+Nhz;~u_GsC|QbzqR6&)EJGpiuWEvt$5C!phPbr!^3#t&hzmlL&_R< z(;r{~H`@u0&5DHA;F=aQvyR*T+_o3hk>Zh@xOX8~_l4am^(wkvq1AVbT{1SJo%HG4JSj$Ri9Sk!r9IZdaqnCoIARi|+iY?Zv0`$Z{7DSBzJ z$)1z*;?OSx&~g8Du~Tv^si; zxZYGttp-wG9~^Nn+}Q%%*1{jc?mks$RE!Z=5N^}?>Cl<1_^0^SU8n38rYfuv#1q`i zD$aS$cFfMrIm|m(L?7B-g)65fElVEwHkI~vScWop)o)XZvPxVZlx+h~=m^DDLq7Ny zZP;%CeLt7vbGdxTB^SXL!OabP^W;s37CP{qtGBBZEPjGFTIBUv0^H)FiH(AT)N zhclVj6uT5PS@!wV5%3ERmA{Xapd+R;qfp>6?rDN6UIDaMhI&;4IqV>TA(9z%Rd3>_mxpHH5BGm@; z0`DQ8_KrvA5Uxp&1qxmJ?T~y++bkQ?8qar-sl1)IZyloF%NOhl>6!;BONA~E9|t~k zUf$F`MDmXCj$vG4yaOzPC8a3&{4Rci3?FI-Yloxa;DwN6GuLmc_xI5^4D-=j(RXWV zt4Uy!NAsbV4nn*DhUv~FZ!@j_gS+Vcg@^jnS2C3eMJ0lHL zp)q3p$cPb^6B?v>AIs0aNYg_=0J-A*_(lAe*3MI=8J67{C^V1`$*meGC@mp5`Iw&J z(IAoZj_7?$Ubz@O64kU~O!A`Q?B`%m(3wb1?)jtF5ZIAVCyW`|31^$rCCLswX5&Hx zxK_4Cs&<;1NF0bX77_~b6C_kb3K{W1igrLk`zwuv#EQ5gA)%&5Az>n}PZ0mw1t|Yj zipE)h`d?`j;Gc?edJ3wlh^wBphpnxP=UZ1VjTQa?L{Kg5pl{@5q^Tip?dr^9Y2#{T z%j4(l_D2Ls(oY($r>DaP_cd ze8D5c!}nAghmny{(!<71Tvzeszr+#Wq@KR@@^TaB<@NRT@l(#qA_OX}&g@lOD>-O>K3Adynjk~`FZ$w|4Gct!S4Se_NU};vA_EDcXN_|l!?FgwDnMMb#}IO z@sj?xb0z<3>Aw~J+s?lQwH^Fyos1M65R#sVK1uWQ35)zo?0**hqp0z}MTJH9|5Nfm zO8!IgPanj!JRA^)TK;jOG(zV8$op4)N!~vm{tu7;J)D2#BF0l1N0Rq{&kboDN?eX4 zBqUiRRYf^{Kjfp&m_c-dS1rn^fygu>PS#FN)~LeVm+$oOBXYlEKa+i{`koXjuYwE@ z`!0{o5|RHs@N~tCj}BFshjKaawB_q0E?OSJKr=a~* z-g9bxD@EgI0y@*fx$2cTGEP%f)mTn$Ztuat!J4Y7Z;x?tarZx_W7wo7_nv;hd^!PH z_~?hr9L^^bZwTvKyUo4@p2$drUfws~peY9m6V2Tzp8^tPIFe^5l^yO&&)FViw_GP= zoN8>9ZzCg*yDDH=yTCbK>c#uO-Vf+5yWetL78-18G(QQ~ygisLx7B}Agj&XQ_}jtn z@PqPB7H7Z#%94;kB>1c~%QP3R`<}KJvkrM8Fi7JVR%d)7UP{=)$gBnL1g)l-W{jk^4 zV@f~d8Z&F-t+K)^=uGR+N3X{PIvp4#7{F0xAU%Un$#ARw#8{@G`207la<6t(Cg9P| z-^r#sWa;ARyv`{@SLh}D$DPPNmE{(J1oc#|Fub6<>Yn`;es?MPsf)(0jj3!l3PGa&U)KNg#Q*O(0rZl7ORD53cfV)EIV*6h(P-B(eY56U3J5we zNTLhpGMw&A=3MaL+-q4|Z6D#czgz+9TAmq&x#vKKvPE3;``w2&2jeZhxlLNUoR?az zz+W^!+1W%2NLpVew`uPt?C7loHUfNs--728&CiRz9$@Nk;A1Y*z$rfX4*-PLyvGt_US65X(vB-h0~B!tmFMyCYL)d1&@bk z;FY@)Cf!k-@JSZb;{02$Ogf3*b%a~)k1At9hM}3gR2A0QdTV9iuGWW8%Py{f?GL;w z)_Nx%0CGa#36o~`34@=m#w?EeMd&UU(_trUhrspQ)`jJUbV4puf6p`<*1?ZrCLnB9 zx;StX_ev6cNEb$*#G;Y5X)#F-?0pRlWS@N$NESGx&Omguh~iVeOt%(`lpfc~jF!xm zgmtfq>E7gLvzwC@K?!dC1N} z2E0@0XQ%CZ`4{(rC++s~@09clZ)F(h-27g=n`M8bKbfExX1X2p_;@iY* zk)eyc#Vy|&>qZ`PT~Y0_>Xe@!-M&2J4X|^`d_(Q9pX4T0-nm>|;{Dz)Hk*Y=&vlV8@!SG&mrCR7nyJIWFPw&^tOP@HqP zal1!p=l+*Zpa!e1tGNr&Lav42n-xyY^|XVWFF}ih@FnHUPbSXI8Ll#`J^ZN6ErUxy z?rt0a{_UgpyxHFMlB-PXDC?*1EYVkNSM>t1jcyxO?HNy;nRw zgW^AD#|hTGGd%x@LB#`Sx!q&vs9a~9Saeq2)Tqf&Sk=xSqydf*-U82V%(#zurLBG< zB9D_B4?aZ90kfVmlOW%O1@<6g`c-1G?g2yL?VXSCyOp~isUyANnDe)=WGi?yLx`Kq z-NF>;2`~Ur1GZXWHJK;o@~h7BB$C4XEHK_$M>bGit>e+(K-(}d`Bz|M@eU!7jodY8 zNI`7KewgogzYOTJ)y=RM0>1!uP6#i38xMiQ_EtD+L7QxsG_IifWWjb??ojrr%|t@_ zkac;99v~sr_XT_4adM_i`NMYgU83g}L-@MANW0SH-d*a^>|r_pZ0gCY3xqv40Mkb&G{-_zCAqOIfBHJ@+{e8{+#U-VPLO4LkM zi{oAo;)4i*NNj13`f2EPnidEh=_+3;u%+|%pt8%V@O7m@H;$GP`A0zTRmPInjAroN zYT&69?LT`<;FRRn zVcayjzlAEy=#^;*Y^OVzP#RhgxL=wjiP05s<-r9m3~s`w#oywrY3my|^Ih={UQ{5I z+J?G+hW5yHsiJ}_YdSa_O?uH`ro3+NX1*lW&LCEqBl_S=9o}T0%hR>BEQNOHJhh-* z!mwlKam$5TB{m5w6pp;h?mD6_(VKKFI2YS`F-QUMVlR5jc9kdWoZER=(-~xb5Q&~_ zrgg|ht8g!9nJ6QvxZbO^RU3C+d-bu7a0iD?>QO!<&E6>O%T4VnxSQcOeCDEz9tfry z#|e~$MNj)$o9WEIg++te#$CVtu1IfGObY}OJx4#Y)}cOsSxf>nIA0HM4SJ$REB(AK zvksHt8+i)_{vNd6;>^y0R@07>zVVD^zG3E;nBjwg)YtK`L!zA5P|@i#Yks>i5p(lw zAS6+v`?JYHUbKH-U8yl`xZqcLgPm)dP^Aw0o5xLe4u+$#(X$=OC{_He2=FaKaM7mu z^aF)-=y6DSPjt3N+^{=j_dSV?_{Y?!gJAIB1d{(>-!FYR!eY~}` zuF!=@auTjFvXlA~U);>7WSH zk3~RZobxT20#*p+O9RbqW9cNzy}G(mlLy2Wmp0ZKbVlR41Ik!(z2^WxO#lU8Bcq2} zNU-w_DfBnIQj#c1MgfmgDs+sPeYn_X3UIspy-=a66eZ4w6hrj&Ks1U&x}|d8IY6g` zPX_Xp*=&Y&ClkZyRRXQp^uupu83-uC7zM@HeEYmn^j=OjC@dNisKqXmeZO@^GLA(` zy5!WDsC7D9o+hyw!yH7Pj$O>`h&1iKPL)Y4?P(DeJt{`>4$D4A<99WW4-4?QjrMyZ z$p(eZdqhO%8IC~+86%l?%S(Kr)TG$~tmY+QjmX!~>I_xj(3ZMK-vaM4KdERBtl z*HuVE8Hl{wcXJ&DoVi>bFCQY3DRh30n9v)BOr*qGp+}-R_Yq?68@az1+j*5I`eS@( zI;`|5U6I+8TOAH?bSvf=RwnN(%_jBtNrn*0Q*qz@BiOlbuLl*RsASV#jW?Z>-Lfr; z{WD`Fr=`^6<$Y^6!)hye3%u_~419}itQWcjRc++ECnvb0J>LYBPo}s4=k2lG$;(Nhac+tt9K~}o(#T>NAB=O54hlsqjRYu# ze5ffSO8_9$hiMrJf?zTOzOKKZPh#!g9GU6-$$E14OpkU_=v-&xFh(hoIE1t4md6A% zaFHtVjnG7w_S_)<{Fi?=ux>EgzoB`}#$8Ak_PQ4PS43NsC~hb{$`0wKe1hc{c-H+r za&g_`FMr7-zy4{zBgt(tc;X{$xTM{%);vTEGlcY&G|Z-!eSL8WNqCjbh+LKyaQ`Vi zo!4Ioa~iayV%;~%;$5654Dm=&0zFiLH~i9sNU_<~vDW;y61~Rpy!WRE_n1HMNp9cl zeeSd&rDOe6Ol%3&i7L>j6M_itw|G?&?hPr^pJ886${gFYD`b5Q8iDZd+sC3`MaV~D z#xdqOev(XLB{at^*6z_o*$EreBgCbFH{t4C)t#`WJ)^dxlA|5htcxiK+H4n|do`^o zEYQ!V##JR>C3RMGRPfnUf-mB2S%#*Bj<_5qy+pO%!+veis?Tm zY-9YZ2K;krbE96gFXd*vu>BXp0jaS(U zr&P=)A=$Qo?E22+4|<`2XvIBTe=dBuUq1I6*VAq@FW}l0s(o&^OAD<{M!4p|2>F+@ zMOoAw%ZI9jGfFKj4w-ZXaa7V`K5*nQ<*pzc%D??0WV45jWozpzBocgue@HRs+h;B5 zL)3!yuE1L1~;v7rXah zy%%IWJdA*fRaqye`?fy0cJKRV<1MA8$=q)chB!oIkc|x;Aj@M6YzG#rFrny3dn=Of zvy&W|eJjZ=Wz@LzPv$eWH%2GNunc}HXg~+`xA>6UjAXM@A5`>bGR2~f>t&}e1!g@X z8c`aRHI0cLt!wBL+LRArg`_7#>@Ft8TUnGp+ISZvMfA6Le6Ff4lpI4h3F z5p`-*RzL1UX4zluQ$!C--Nz>i#or@uunT)nk)e}SWkZR@Gx~Ea;xaa zU@RH}9-P{O9hu&&E&4wGu!o*^_Xt=4gRjrKpSU%}6osiiBhk30K<~F5Ti#To)IQCbNCZ#qs_b(mBmOQua^&V=)*nb(Z1vPo&H+!dbDswn>iF=#) z+2q$PaB%o9&po&v2R^zVGyRzW&MG%x0EQJI-(i96@nUDg!?kakx!-EFh_o(NlgrL$ zhuh2F>Llc3>>LJ)lBI_)ml7~|jnaC5#9#=%-QYdv&s&G~B^fwszQUEZ+AbIWv4+gT!hd^N*(W{qIf7reyv^%DDD{7Bs+88ZZUX zy*dp;CRLPX2yJKv;Ji<>1MY(z5jxm2$5-D?A|p~YivW5}0N*dl8c>!_KA+KVz{0gq zSnoueCHL|??|JBt?%&U-vkq%O7k)5(v#0!|vAzB))lB}((2_(WuzMxs?iikF>ye;@ z7D84>&-mH**|>I{xkc)@w}MXnFnu7@Ytnsvbb$65GKn{9uf`W9T3KCR))lVTveG*W zN7Hh~qCaiK6u}`6peIotOqD>HSa@N9g;nwg)Ka7Y#n06m6Se)8(R90O7e_w5XnCbv zWqBTPwG{pzsm5;PKZIH+2Z)k0=J*Af@zoCl(Dnk4o3qw7!udqM&#?i6rthK(s-f$A z{ss{li4E2xDNN=Iih(eHQ(JfxwSxT-}U&k`3)~dZ^zXWGU)Y7K;z#TG>n(PIB1l z7wE4W-Fs^zAoiwXkSdvO=C3-xwkv}+7*6#rNe&t2eE+T}O%bKpeG9w=fV6&SRfU!u z+q+Y3w2vC4-2rsA>o^3rkZB!B@wxK_mogLqG;vQ)H+dFKR`jOKYq;L9ez;O{D4DIJ z;r>Q36lrM=0uA{07MHxIjNyN~KCCXm!zOt{RTRqVIn+?@^6NwEa#nm)dwu*%=yu|p z>l<~Mx^CHWOVCI?UxE+)7@O35J1^s{y@y8B=y;Y3EI|g~e|{MRxhSo?A0r$T?S@TW zY<%K1gW`rBGG8B!T!CV5l*b((Mr?=}vaH=zL6&bM0X3cwIaPrz*g7*$tcXVZ-njQG zX?9gRNroVAGwCr*E@Wp_Ok4SdgW;?|AE@Xl=t96deqLf;tij*c>b1q`SRI$TeS3uF zB{$s;nm-IeNF{7!W8qLg^Gd?s7G*3_owsXND?cY8`s$pjIFywH7jMyp9P`|Z^(@^n zgp!LP$RLCrcxk$~-Tw~Fs+H8wxF~!&#_UDF)xpk8%rHOO{LmQ7WFOyxnDPOZOx)AS z#470$62je-QGTkDaGGNcHMU7VDFwI5RI)<_M07JS)$i9=sL}WG{C3d4|H7}^bPu+@ zr`{S+ejn$Uh3jheI5rg?^uP(|xCIUY=9Jq`H*XBPJ^$qta5DG*bi%;1^C*Y&p!Ug8 z%x9wsd*QX;cPNyW-oFOzurod7AR#xWHz8*DQih|9!p^d-WOtt@$_@z1&gY;V)rY-y z$FINWslCnOmDbQABZnV^>M&s4;h1rUyrND2D=*3B0A3sC81or3`j+%lk|?(F6v z`S^K#0Q5XL^yni&y@?sL@hDhZx{uhcK2cF%Pdvrn4%vw;{JLEJ^gu)#p| zetF+Vq^?Jfv-mBCJs&*&X3->a*I_;$dr>t~I2kmIjC9k8UJowwcZXuh;IGdS_As9% zck8>+ReI4nVkxul?;MeQ2(8Qgux5ThHac>yWLs2)_q5N=uLcq+^C0(7d%qgH#%5vt z7l3t0`ze0+hgNsB`)}ezfYFiDg{)LInaFItbkvD~AM@Z^G*Wxd(AODj>P|NM2(s`cJA-VbPH1-kT@d?Hu~zG?pa z@*cc>Iim$0!EKRn`^gXDZ!C>AMl2K3+!b}7mwq$>Di<5#s_!e^P4lHhxm+cs_CZf- z5hi#w#Z&>lK4v%qydPoq4yD~2S_Tir;NLL4g$m9jt?Bj^duh$7wQruoZhr_thWxGo z_yKIZ5vjpm0cH#f{lnM&geq($n@&YTFN$znnQ1~_P>FezFr>3d@!{ijI^f|=GQUu~ zOy}M^+au;18jjB%s5q|_P%I|{kp=BcSHMNgb&r@3qXfPMz`As=q%VBqPGs`+GV;{} z;ORG%QlZD6tsWFk!Eu58w=s11oR9j(-ZJ|Sm4ASZ9p9K$@cB(usP5bQ!t>}M0ClKA z?&0t-=})RqvWaYd<~#O5mP2;5i1$Q9DDtvcSjbF_|NasdAYagyKc>U~f0X>Ino8vR zbh2`bCu#pAjm)GCz)~^R0jN~c{-@S|H^E4TqH9p{K~>|Q^~(k`G1@8xGgW*1U(C}- zsQ-w=%8&hTmH*j=t3TFVMK+BQqx@q7x_{JjWDl^Q{iFU_3WRmD6w@5a|HCc+YVH5K z^FrmdAM}&{_Dxj|R?zi=y`$aWYsWv{kPRdH^{%oSheg%u@75W)|2RWQE$EM}Va=3b zUDu0_+%8?YJZKR;iwQlIwcQ=UA^>NKKb@%(Mk}d`kqaGtmNj5ikq_VD4K1f`j~e7M zY55im-H3KsZga0GDH%BKJngPgh$e6};?y_$he3FofN)`bZp}ODQWc|Sc@7nF?l;>f zpo>=`p1T@=ftt<}4kab5>vvcPNO`Y&mb-OXKrg_-p`7N$yL6KVn^?!irt-tZ=1NBd z#K~&*FUKHelEtyod-uNIKZ;idVo6q}OVvG5ejr$m`L00U`Bqnep($6Ew^;8R^(Q|= z=TdRalqa_PZ}P-EL6-=0z1?mi_ou3=cTrVctREBGFYoDFTKW0yj=i^;S^)@Xk>feX z^|I%b201BJ^_%9gyxMBy!~VIDbTs?V-kGiP^=%!}0@<>1VMSKAm-{Z$C8~%~eDOP# z_g|xIfiH)oGzjv)SVpi5r*B%EavJC2DR_Ml$XBAF%jMxbFI(-Cc>=WA(7B{TQ^V>! z3E!GOj0XikUw>QN`#_&f>1S+1(W`eI)Dc5$RjRMQR_ihY;6mOi6^1pU<_6jb3TtJ( zH#|5y)Ot9{ySD2(SK?|`PtYAFotZE2+UW*C)pwftER?P?T3UpCDEjl{gF%&%j;n2D z!7`Q`(5dOiOE`rG0gdptX6L0j=Y~;MORr)A@c9r;QkZM2%L+&HML40f8=ZuYSMcp+ zIVnTF#ftasepx3jFX+270?f124ac@#@IruRZHS}8jG0?6I!{x`JH8B!uOn5)jgl>) z(0+zx;U%ZE9v$M>F^4@_W*FTF__%^7k4VjA&UY<%k9+)yA~fWxnBjr_xvIOoMYX&I z;*p6E(}qd$Ls3@Tb}V{QYD9>U6l%A(+1x}WXxFyUAAKhPb+ht1Y2WJrz#&;Kh|__O z<*(`aChK2Oe0i004ls{VqZJ45g-|0qI08(RU~Ni>^;vcwZeA?H#En>cIPpC>YMiQ> zaYrLJxLNT91|rbH<*87(r!N9|h6Q?EOo;G~bd6~p?6Db264&2 zGsz;LLhEWkrAeJd*xiI{NSdX3+&GH?v$mdCEeH~KHxGvIka5sB)ych3)3B>{kdk+B zu;t5^UC4>*J4}g^nG6@$LmbVn!)N^7+)HX2v-kGU6={7ZH^FGVn3U*TWapgTq^ND3 zjFeSScpr;>qm;(FA9ZoIYr^xS}D!Yup&!mnF`JA;BJ6K%VrXm zo22TgqUvrLu;`|_W5Lvbh;nT)b{)pYhYV)|A|TTvMy_2~9F{b@$rlN93EbuxgWyzfHz-OG^7=5 zmDu$*UQmQ}9LPMvZ@?>BArKPRL@e98w1j)T8E$+_w*07=fd~G}Xmr60~+s=~? zPC8G5nxJ37kWn@l#MJiwGi$oi<rEAUl9DyyPdqaq`{dG5F4AnO_id`tENh5-ia`8k*)utCBa-6D zry&Y|bMhc#i-tSIOb(Q$|NV!c-Lg&c4>?`!TEQ1tUaxcN(@6HLBBo@QH<~?5wHHI@ z_3zfEl!8OIl6b%4=e*@T9xgwn4?Nlc#{j&3D8X%q1!ILU1RhTxA7o^x2FnQCC9_gD z>uPQXc|$fZ2xx!*dX>S8^>Rw;CNKGL4Gfk*aPtBHSlYJD)(c}=m_V<0|GS<8Ran_) zpIoqkB<~+*+jsLAnY8Dh=}o1i9_XxGvy5Vm*SW=TySE=WgYZ1Q`c5|sv^yj(j5Rj2 z?M4}Dj#u*d%t~Q{g@T!!I|td&4oJcd*g2}q9WLrjBVsW#J%c{JP!3>a+aEzg*YlOu zrVBdjJ-I!soswjze=DC!Y<-Bh(WYL}Wya2%2RiTOA?x*O-Wp{SA!}C`Y~Kol?8n}ywCw10UizvyYVG! z6X39^&;WIz$wUM?e(D=d=aU^8ZRY*+_3NR7%+{3QreG#6K>&RoPlHzn0*cFVLdC#y zYh4TJ1c;A&hwkRs-~Wnq9FGP%Jq*>w0*0IBpUq#j@FUh|1VJ~GQ+%gUTKqmEnJfVG zX#7TZCG`Y($m>$6im<@cx#z>i4fwGOoZJ(WY_+Y`H;HR##k*((8=KSqJ(Jb#_A?b8j4(EyZH`n5=HN)ZRiY*QEMksE8DqIQ)Ds&Yk zHr3`>FUS~uLBGJJk-ZKMSVM4^16 z30NVPm1O_j^YXQI1>O+48S|{5c^U?NcCz1Vgi@C&k7qq?Po02fpGx4voucI~3n$2% zFz7%-!k_sWTRP;t*mO==qz_$MgYP~R4%S=a5kGk#gE6%j*o|aW-a7?e?BuxoJ``lg4KAt(y%a(OhgTDSLuZ>U)itUYv09nlfV4U6!}s|~J~He5T(V&?s|*@L9Utd6^dCvm(;V9ppS1_X%p32?7cemrh3sgU0EPUsQ~2!q z7sKo!DNG8q>T({>Iko8p?c~ZWNyzf+p&(h~dc^JK&uP9I&K{<7CP@B(*byECfw2Ve zx2Z8V#Zr1yu5_N?_*WtC?PX4lGlh$rdeq{%*K&R<2vfbtS%7j&Nlug7Y{WfRmt`>mGF`1dkQ6P?jj)S3Qt?5+-{;;$6U#T#B-hAZH7{OlBKO= znN?1X-!$v7v|OtG0GmUR-Ltj25+2FB8B>a}5uYw9Yi((AJdLUg*qquyDCn-R&ZT|v z175LdaJ)PHv2~uk`dX|4Hqr3p_5v}S$YeKEHNEm)C|E+(&TNQG(O4&o%n7R@jkQQv z)fww*{B^Wx98PxwSsdg!M^#>jt`Xy!%RA1>nk3bSLRQ=Jf_U)2H@rm5Co-Au&))ZG z@Z5U<(je(&XIOmC?KTr~%)VAxoDB$~II^-1fh1!T)Lm`n({>Em@nBXq16h>N2~#&j zNZZ1~Fy%o!Vq^2fP!EYaIB%0z_0w|y@VuLO=Fevs&xs5yUak+M60x4)alj6uAefDM z7>ugCs|ROq#AvBx-6r~5_96c!U%ibw1ek40lP zr0N_t{FX{$k3Wd*_C-#^;NrqZM3QTQmpl&Db>z(2dmC4_OXic_6hM_OVQ182x;nA;$!0mVUU@t3CVr zYV~%r)XeB(o#|icVjKenpc-4KcO>fZ>ugZdIoi39y z%9NK(kuT~l)s~qpVl9QLLw>_K9=M5<>lY%?8Zg@=HNsEDDmY)SeMK-{u(ED3XO5?z z?I#Nx!`+z`GSxf6`APBajq45xN~GfZJxlNBn)IudRXcJ~s24awDZGXzNA&My3~A~l zH>IsUx#~^3fzRY3k})F-On2J^TS_!MC(ck z&8Q?^BV71xKBBoDzl-8Iq}FHVsN<1uv3PaNv9c)};3T*xSR^a*{sFH{%0wB)b@{Fa zPLaa(^To0yrms!Ir`l*Isb+)D2Pr~`-ti9bCDr0C&n828|4a%*XpW(l6MH2h9Kz;(>i!bV(BQ|0hky>Wh;%NCm-i0E)Tz(uQWA}* zM+GDgd|*3Sf%YNSN?E@6nN(1fkt<0dTaPy(c^Co>T2%dVkBpRFX~$y+vl&(pLQ8+UC>p4ZTvE%UhfD@_0$a6^woO@w#KX zV>LnW_Y|lfHZpL>LdzSmL;UI>RF#2fW*K#4{3phl1zvw%iaW*)@tjn$Bm~R2OR3s2 zs}dZz%k}7y0elv3xXge3Ub`u08s}N$bB-9nXSAT9G)=ojsCa$V*U9;vCNeU>6*1yj zF6BLBP9>$Qv+?<8?k-kxvh&NsW0_{@$uq(uiG6=COOQ8R$CSx?c~`%UUKB8H9&&nz0c-0J9ADK`>K93fk(iGL2d@dO?JSza zb|mz58LyF<7byxpI&!|825D+H2HQ_!^QCuu&p(T@W+o_UvqnT%<(gG#>&tHht7iwM zdflUMFy_`CfJpl3Wc+;2%-3@wr;+4GzoF&(nhMJkoKhc5xoEMoer5L8LOeS1m@jpI z)-OdcJ9uEHy#17!MrNOh*K)C+g^WQ_axcdZpI)*pmCQ05lPYC!;iG6?P=hDwsZ)1% zbhlW-RMOcz#ZRTjLf(euD0hJ^GOTfhbikKi_6Y+7GzlO5_UsQuzPMr4t9qKT3(jBd zDiCeqRmnW>&x~X-cFL{tdu)vL1Gy|LexNNzM1r{yrB%zyP*&?a>j&Vy`%&LB4py0} zG(@XSraLr%Y2qEaIFS?Q^aS>T>`K`I>w39|oX#FSc8)j=@ruP|aKWv|9S|A9Rz*PTXkyOJY0}p5 zi)r|*F~s0gcS^N+oU0_38$*ugcGN6K_pD%;eXL^lDDo6qH@l#m<}HWmh)=y~(kxriWn}&P!HFt!W zWb26^hFEXr%xkDfR^2r8RzlKt|J=Q>8r595{c$YAmUFK-xe}^GXXN+?&t@i*H&1-d zAv5{*oXnabT&DX9L{p?g%t0)Kv89~NX5CM1m14g3{f)9~qlXEN#c|}2>hNOmK?hMo zRH~wf_~`*+b+kkXq+VMBjk==Y!zlS=M~=|83CCDT3U9Fy%K%$avqIU4M|7G}W>zdt zysY)9OtVs@_JgbGHVeZzp7geb>aD=CpfDObY|dyOTeZ>wYtmeu&;J~H~`m%I_Xun%aqyYH#? zS9o+YTleR>_it!cM++N@hzu3S;^Yh1sr~}uUnU@QCPFtW91vf(l@Jgkh$vR>4 zhV@n?MLg`aeB?i~C%I3x**e2!n*_$#{>Gm1Nszm_;{H3?fFLcm>(tLxtcW{?l}7d;|p{l&fAI_1}3QSSBKRO^b_>b9UJ zBBN`hkq4fY;tR^Xv%4({_@F%ak0ubITqVlNadEEt@6m~PAIw&HpH7#?FWF|9HGE-v z#$~LtE1eJO!3P?==I6IQhm>$-Y$ASTh2t{?nbH@k<^lw8+O-PzyTN)5y4%w^mS2mvv1U?8Ytk12o5D z-o6nZZ_nWspRcO)boL$pU+jHlR94v=ucU+^4bt5updct9-5oE2lpx)b(%m2pf+($Y zcSs9T(%q>bT@rU6$C+{FKi}@U-|kv0mn`MwoPG9r_OpMr+k~$N|<7LLZa_&_vy&TQo0&i371m3!{1b}$%YL`7j=V5u?+MU_@@!B$KU8gGk zoCuDJhKkit#s8w8gypdqgUbxl9LMY+q4m7S$vf zCrg2bn;(y=de%vsJ|deFQsmA;bdBd>CPgifmd;-8ie|w=Oqbe2bgv+u{R#krKMpTX z=I0-sF8C#1AeO~Ha@{v_2I$ziJ0xBg08Q%5Of5d&!yn(uFnNqS{OOojGzx4HZeqTGXrL&etC|5Hp-_`IuALDSLUZp>S~$RLWOFCh zp!!-^tt+t~$D}#~__ZZKF|9x%Kla)c9Ny=jiGsp2Bl&wCT^`%9P#qP8gOGc6J0Fu!DC*E~&cEG(K+aqy&w~KUHi4$nQ$#o4L&niCL z6HB)NoS3H>nxp9EmqKk`XNV{Iu~W}c%2s>Ijj#MWgZkHaw3v&$rR)_h*a3{w9x0A#{*ZpW6aN#-_jdvLnyvh_ZfR6(@# zh42+1QsR`oZxL~}JOWTwQBBRn`VRni1IKaWdORVD=~zMXz;KQ{i8F)_0%&dqP+;0S z-Uyhx`^xv*dx3VU`cV-8lrsg+R$?DfghqlZYJcaF#BCC3Lz2MC$jn~}Z@aw7m6(lR?89$)N8hX{bUDHy{c*|LjSQ&W1xb2EbYE`YqYzw)7PSxA zHt zo;jK+4LBvU7KZh(-EkKyC-1kq2-ZGcTDIOC9{-WYKw~UU9MA#WcbjZfN*WXU=!h39 z3_^SMx*kXBL9OsC{=m{T-W712z){>FZ(hZw)EA1#f*n>{QSoFkP3) zci->W9ipefi+!Mhb6VHp>qjPgJonNj$afiHx6Zz$e`@2W#b{m~R{qbj=RvJBXiTZG zw`Hkr?#hR`S^s7_&jX#;?24<}I&7dP76%VM;bs8Ri``BJm|8py0r_)OZ2B@CKRPOi zu7up?F!i@3oLiISxwq%sz822SBZm6wQ~-vr29u1x#-J_Gbv=>wL>T)^m6diiB#!zT zrgPR-;W9Z=__W^1^9;od_I$>+4MPxJR!r$hs?(HN(<7LMek0tgo1wjJUXtvd>C)H# zSXKZ0rhpz1&00%&+UfA?U=PO5X#YJ9E%i!_Vn>$d*KwtJ!M&0OckQ1qj8k_;_u;_b zFJMtp6Nb0|p(!EKXBiv%0vc5h<*8%)usd$JuY43j_GE$4+XJ4o9(XtA1x~~QS5eyx zNcZ;2>~=pz(qEn*IY2<ApIHF^G4>j`@Zw;-VC>aA-d${Kr0{olN0Qe&edjPQ;Yo+X$}%;_)tR`)jM8`Won6Xi;SOV*(A5LQhxhidJYRN zcy*I?jO=@b)y_p-^J%Z|JQyMM2)DSOHB=z9xKgustlJL)8!TbB3W{N2k4Q(?oFs^bPjIBbZ2&U_S~ z5fq+p#+2`0Q%OLrq;Q+W0)+KJ>sX`eJg}9Gc>ri)XI<#J)?&7<^2b5?wa#)+xi|>; zf`vU6y4V%Pzvj6|@P=WTChFl5WbBcAIZ;+8*xRpKnfeJ9mN&qmKUBBPM zLMDLw7-Dd%qc=wL69sl#5N4spW_fb)rtytwf@M7uNU<$*8^>!eOs|Ge&wc0;T>9Q# z_Y|lov%o>_aRrq`tD40?-Q+!pQ(}=R@qUXb!K(b0J(ga6zaCY23^3k1=f~S$-?*;b zIRUIS-^$^)$>DJuptBHUnWlyG9-f-newh5-)ondR(%J=rM~+7IW>o=IdcXt7wXN8i zGMnjn@Os|p!_CMFXJDQZWM{vP6tMW}Qw4KN8Cg~D+LLGa+-`>(e7)um2ApQ|Ze8Ae zFZ9X@tcO3Uz6b%Xb>@jSI+g10@+T&ik()oCnq&Bl*r9CMM1IoqzzU!ofJ}A$Y&tcA zWG}y!Mq0(~J^Dy{;*YCf3s&BPCbrH9lJ!LG8F1ISz`q`l&84g50KHD}i zlO_EMQH0OTJQ;v-@j-Em7Gyhg{4`*H+tYvhNqZrY)b-?m=UUDE)6$Kx!q?4XM?-5m z#w%NXj7B9m(p!qG4z&xu=nd47g8P6ptGP)Y?6mj$j_hp$3eUMQWwvp@d{T9LpLX3B zwkU;A>XsBd!dgtW*L6i!uSanNjeqL2RwTVqpY|rOk?GCHqFlqt5nO}2;Hy%V(AsN7 z=coZvn{jRR==G&Aj^o(ni2GCBm*2gv9Y8mqM)TPo!5!;xbGVCzcR^fNODnnB&GbuF zNMaC@#FIM6B{fa?49S}{4(lfgy&{YW5>;Niy@mO}=2V8OT;Prlzk_DGu)_EV^vl~R zyf!K-Zhj)6%>m>p+oJtm9UM6kMC+c2)u%q zIK&0|8icD`euvYRmmw}F`Q&)~K+!a{W$xSa_r{6%4t&I1nPGp-DrSKA`dHsg4`T`S zIh&U97Ja|o76GgFWH)N2W;;8FKQ$tq+t&7n2!#Xnh}GtIT`I>SBCzpXc|Ztb6(dTVMX>U zDO{yp=Ekw|_xHw_7^OSK3E1*u?T;Qi6@MB_S1Nfs^Dg}mG%o@!X?LG>Zdr!*5d2kG ziBaJrbhUuEY$pcaddv5N_8FUY^6V!fglgJc(_2%P5w2B&Et=wExn1&w<$J}EE}zqY zKho+tAfa;00`2bOAK(Fhee`Ypg9B9&T+RUSXMR!-SCmy91Inx!K;7Yb;nK@>YX=gI zSt9Pr4}MT*IRRElP4jMTo5t1Vf)gOg`4NtS>*_gjC@Zk~uB3zxedhaEqOwj2p1&vh z5N|nlzW{~X2GupBS$j4qox0S6PG7laB4ag$!(rq2=K3&K=!k%WPi7Xpte5-5`HDcV zb{T`2;HbYD6VYCaP*xf9(ec zrLasZdO`zwHS3bYdJ38OE3LQqE6kVbD%p7VzynpQnyED&67vur{mB0nh;AcsV=+Fb z9$vF#^UZ zF}zZPK9Th!4J$R3DS0jyL{kW&o_?@cpZwf$5dWH>xF$7a4Sf|)M1rxnH0X>u%1`z} zr~#C6-H=@%HLG{%f6%4<$g*-&V=fn|mqRHgBRccC^zbdcf16xFni9PNCsP+_vbwz) z5yX+M=*zHWf?{HDnmZU|wYqU}G{_J23M9g-CJe)Tt2$N)&V%;@P-`ybTx0V|ZH-R| zF2di|NhTxyVp3>K{K6zR}3r6T_oi-R|Uv`HaoqSJW7^@14Y{*-h$u44zi z3IRbc|Kl~Vf)}Yfep!fD6P7;+C_|&7iL1Wv?*&u^F9SY;5P%(g97edvhx<}NnLR)a zS_3LX+ee2<<3v|4?CbOK=8H!uMy`JQetP#?OSSoKjePSaLEh}wT3%b-M{Xa9*#>oR#HE1bu5X}gP=dfHJ`x9X~5 zIa0t)78{RPKLA(ZJmFsh$@oN@m@TebzM!wt$J)9-@D;ydXNKApBXWh_#uIjJW4a+) zd-rj-dO3uNoJgMsdS65gqdz^dZ#d^po-oTzn<+79Yv6)s2>|Qla53(>#VVjhw}(GJ zCd0LAI9}%AAnzi6L0VO6oh%I}gZSe^Pn~&FMRIl_(7w$vSfeimeNjA1(*K{ zn?}N=_x6_9qhwp{v-Z+AZlH$l&+VBp94*RnuJy`UhsSIw+KVSw7@= zKL@KMqOdP}%CeObA$Ycedjhmb^^i*PO}3J8M?C(!nYyOqfErysl<<@KEQ<~mKb@Lq zR=xqqdf$>u7E_~&_Y*k2Iaq!t`W^&CNWx2%CT2*ol@r7`8GVfCQG81%UXiFmO{5d> zamB4O)ko4WWV`fo(iT$wz}073J~UlZSQ_wh?8&h5hj6_&OCUA!WmR7H^AHq)xWV;( zY{#YEy0eks0~(f1U1l-uCF6TJ2b1L{nonfDC`}PGTbrHZdW5_r6i@#Bu>=UeReqwu z8cAvzsD`tBHB=^TcQ~3$UG?;UYutYI1RpzlGHq0yQ{6x5*7C47+ZCqZsXm1#&`n>a z7}E=7l`4?Z-S*@2WE48xB8qxu=!*klw(jsTJoD|K^{s4WC87M@Nn-OYoL+Xr?HS)n4QFB5X+fjELVocQn(;JQuAf+w1L4`J|uH8aS za;J)seL|P>{Tyg?B-e;n>NMmQz9>D7510W7Gr4m`9#lnyJ%ujm3ay-kJBP;SM0(FQ zrx}G{_M#7gUq;ZRtbk(Qk$<|fYXAZ5@&2ypWEX7@{RrchQhOnLhQ~YZE+Ey&&l4s; zK@6>eq?OM;Yos<(AGq(J)5M`mqan-Qjyl_llUHbRH@IW*EQ!sSgu0_xJLRhV2ZQw4 z7}j(y|KI4a1u}euiWc?;thENt2JP&yrY_=baMU!s3Xpj4EU-hAq z$G%n0lu$oc>3ti;5hHie*XH>U(*mXHvds`q7rDe`PLjL_Ex7o*97@u}osQLsx;d8hjoXxR?%$_nK!XKIrl(f?qJIph(yrYJd`e~>6NhV zX+^cZ93xT7x=GdK*2P3_68rMB1Uc_hf|Z=cJFvozQk-=W8YDd6WNNoxjxfE0Qtyn_ z#f24j$L<5BO;RE%cWhD#8>0rysnzJVjK!38UX}k*FM)#UcTSs$rY+t4I_7*Om(jy1 z!Vec%>n?mGJA5haD)4k9J&5K0B^S~j{Y&;wZ{zu$J~McaRQBWO>w5EP**AZO1g_GN z_ocY?o&`H(^uz+47OFo+(#LZ03#-uJ{6EkO$uBX688a4aD~f6aiB!?))du7_!;r0z5q^{n zw2w835!SJ?R7bn0uTPC1yD59c8d$yW-viTl?}}N0ZxPXETjaWa3CzH-Zs$iOsVy<^ zEHnkLYKqC-cON&C`e)Psz9IXG@DtyM@4%>cyO&R<9dA8V8b6))6n&iMyl#UjCl7@# zv!tb!nwC^x0{0 zLG7iD$BoLjYLq9}VBx#nq4TEjj+s#X(*;Qbs7bU8VD9ayc=m3$$9cOjB2GK_D}GN& zmD;)~fy|8JjGiT`r3ZgRs{1%X0Ad3Ep-BO2NsjPjaEy@^3_b=~bnl;3hEHI)aNWQf zMlF)o)>xfZ$a3P{0@DXA@oZ0)*qCF=-j657I_DKMq_I5uV zgx)qIlLvWV5nxjVtP^xD(hD$_+?#_sw8n!juo~|0b+MC;>o`OthGcjbvX{QmUkWhs zqSsFPQJ(fzOW)tVpKm!lwZY&~8z<(q?KkN&Mv2{1#WS!aOxFx=v9Llt(BdmV*k%#a zzQw>ovze4X*QwZ8xZ`wd7-g9R5&fo9YRes(eu+hlY9}@|t-Z2W+9(bCPhVQl2w37R zxpf$qEQ_VyvJrT~zs}nCR&U;QJxy2oapT5t)}3auPbLNeYxU$X-GzL3IwUmg;Qf|& zFHd%^bp8h}Gp#}4=_!L3aq{qG;@G3I^P-jy zz9F|meW$RlD3*Gem+bgR{DVHhq}(0X&@0%6QHD{1?DzLN4+7VdmQ|bNcUvpm7fUdA z$12!hiEn2@aFaNtCEhszJj|Fhp;hy>n_r^5@2igc9*lt^dg$#WqdBaxuU-@|=~C@X z7I`#8#8Ip*H_)%r_)*l$w}*TU_`*T>?l9B3YKETBrm7-E=QiG}k>sy(>A!Clf1>Gt zzB8>zz`v6uo59AZEs=B4OD|qNsHkI?OL0c)u0gb_qSk^tUo^b0enO;>oyF zmMjNa?e^7`<%nL)0Sj(3RwHf8WMLR@+`Z}<`>^?;k4_q;hf^a0YHF0XJJi@e$ZOTE zjICBiRljlr&w>iUSp3V83^VtfXMu~k?7c>EX~!a~r*V3}b(;{p@6$>-c*@37wxGs; zRNqkVWAy36#;Cvd8Rn$E9|p6QNI}k4-h_mA3kVf4)*3=7%1rsQ;{I`Kr&y*J3HVw? zccv3W_MQrtm9W&oQMPcUl0S0e@lMG@v*3mgn&)|LW$o!_{)5Kk*i7k2+4wJD2&!&q zLBg3c7+JQvQZaw+EB^62l;%;gstQg4KDRe)#cDx>s05l;tNp^*bf{ zYjF|8GCfSdH^vSfPKc`wU`f@+PQw#lryFW^%pk)RxW6v&QK_)eScziE+$~dCf6nS?t(?^Y5#s;2*I(aYp!-M^7chodG7*T$glP4al;+97 zo)~3t5Ia!FnDZg%A^f-;I?g2La{y6ey z9R8v&CEodSf0{r&&>c60es;$;+v{PF-2C5r2@Dj@RMYqzk>Dw(n~LZdkl{OmQ{7(4 zNVa_^k-%mpfzVODM{T=it?zX`==!;1ql&kB6FCM_aiv1@+oSiha_n;y({~_-&h2C; ztt{)C<-I~t9Yf*=c9zRr-yX81?~Sh|32Z9RWeoSd*o@vD?R_8 zXDv(y4nYAZs9v1n4zMffST#4Tvt&kZswTz>41Z=QS1k`H5+AxXK;T+o%M!|OQnouv zj2;S1a>h$2?g0D$nPv<8eNf1ot+=>2CQG%cD!9d*oe-PAl%fezF5{<97Atm=IDp4SZh$IF3w%j8*iVdr=oae|5y~$1c$7 za+r=b-xj(U>=k<;oTl_dA88+KglZu+tBXyyVPdZ%-EByr04|SW|K;JvE9^wIAdyNW zI^op3T!U$ljVt*mc1vDx88`*a#w^rzQa$KKfYZ#wId9}5yGiA0+~U4qqPpR!R6?b<4<=@cTb@ah30y8?Ob7N(F}&LwmgV<7J> z%T1M|9yx(w^GQeI8Ew3@4z#U+oE%_VQFY9u`>)yzM>QCYWg6FmCd8qa?Qe=-S)v$5 zp@UmZ>Gv|(Vk?2*#6E16Ah{+5dH1Q$JtThwad%w*gqw_{yYemVUO*bK=e}1{>#)x6 z3HCy<>&6+8hVZV(qlEbp;Ei|523DWTFo;q}R+L$)d`PNH>+jg=djsiHl-I>G4qWmA z6p0Ap)p-0=DBy99+22-bk00*?o57vQ*0}h>xp!Vzz3DH0>-6VJ2LG3{8;r&VHxT;# z6@JH(`WXIfN}pX{Fa7hP$L}o`6XFgX<)14$_f9)ImnSMZY`>;|?Q<|Hpx{1qJ0zqT zd(mAWfXK$hs0^(UVDttR)nA0)mjKkH4zd^sn6}&dg}6X|$US=%vN=v}+~;^yjO5`; z1*`z3F10{uj|t;Z%d5&_Vv%nYeNb7e;i#_hZui4xmgONoh zw5Jf`2B~<*QViUy48uSRE)&DrGQ$oYFSaE#^b3)aY?ET$MtA;eCAGKV8}rRx^hH>3 zJ+1pofONKHT@4?Nn~9=0TQLW!f!d3kKb@oir)6Z}h%a?e8Evc?etxprs57OcB! zg{36=D2H__+))Ow;T}W&%1gol?7ip_SNJ%^TZgzTdXoQJHAKZI>zzLa!lvUK&F2fc zk1U_A)K#(C%Tv?cz|&Oo&v-70hR(_=vNhLOe}?YRb%0;0*%f{vah9{Kht3B1VeGv&f(PZ$_nHJ2+)dCbYJENMgB)6 z_7~2qD7dkZ@apl6WQtw0H0A6Ne zn|N#l)ONHm6v^cnb^x14|Iuiq|UVx$O4*)!&N zcMCnp@vTRIY1pEXF)f+Hs6Ys4%ZTJa@R&BAB>?a|6Y=oGv1FL0OEZ;>wa5xAR|w#X zw{%|S`~BD5h6*cJ;6GRIx>Y)bNg?=fS&S=TWI<83XRS6NyA79(g}|nwk)+TjE>QH36Y+U10ZcCvrRvCUfaRY^sKfpbPZl=Vfw`&hrOZ&t)lckYf+9% zK+X8tl=h>s92#x7asdU$4WLVU0d{k)j%C4`|9nLXj!@K`NuRn(%%CiSwO+wUeP}u! zUmTNG({o#+;`@e11q<(X_1YR}W&Om(t?==?Ls*z&mUcz4HPu?WjQC$?P_r{oa9m8H zP+%;!KO)gnac}UbG((TwIlWDXtRSToV<3SUK}?*PHicY4J)sbTVvJny%6`t1<)plPCyzow<@ z(0OD!cD*;zs;U}MdgM{ijo4#(-q{KnA6UYIC=^SZACrSZH8_aX0&lvz@KGJaT70v3Zq3}HYC=fnt-^

t-B>;zan+gFYx^|EwDtPRm5)@G-w;nWN5~U z>4J}5RTtSt>-@9WM1$_wxWgL`A!eUN0UFyW#MQY!UEJ zd)~n_<8u%%^&ZxYBVlFvXd?R%vzZ@ohs7tW_A9R>&#hcujRKv{I4}`*v5NwDE$HiP zeKn9{4K%1M11>R-`lc7#q|0*CQw;`dX6=7G&OMswCt5?N@08GgHuOhPH>Q!xFFANX z$}_Liw+R_RZ6&dVU3_C7Howp=AW7>pz_#nA3OX%}S2is<+5<@hjb*+KAO!m`(pTAq zsAIKcgqEau^^@}j(7_#)GCuvsZ^Nc;kt`F50ra_;YT3^%037@pC3o=t^itqDMgl5kRUC}3zKmDtR zqJr;OlorL{fR(g7^*9>!^p41^v7W7yRvuE~NZ6u7^!Jv0tJmzgb1)#}tmk=_J^u+I zEF|}hTMuC3>Vf&U++-k?$Suj2T&2%9n37)f{S%qUI6(Z6#AuGDUr<-^^f_v?_jkx`m4T64G|J=3txCy^C?F_$TeAZ(i z{LpmpZGmOfqvIqB(vz*x-q*y-%1zNPfMB)OxHqA;=xKc&7}c$uTQ&#m^%U(T;{th>xx2(U&|5jJAK;#z<+E*MDmd)ZQ zf7@Yyp;Vf+d$Fo)lY&#W%UN>?EaNkvw34n30CA+9#QSJE*$<}3gU3kr`zxR3D$Z(_ zBM+Kbpps*APD<$irTyWY7l@shdy!J8c}9$S&uWV2P4?r|gQFubrpE#ZQd&cl>>

    cUmX?C!rdfYSXOeo z0D4eo@Cbc0b-sZM--(fB-?LZtw*+Y#a~#N_2nkg>uB-8$Y<%kl=v{Zo*1iIlZh79oeH%_4ro``e7eg2VngY@HD3`V zIcj_+&|}PmCJQ@7a|2!813av%KvE>sX3L}Ypqu+3>w~p;gAW7)%z{tdhT`(aa}_C^ z!3S&J#M%23n*JyJqJ%P%Tn3*TQ(0QTDIUN?zm&Ild6kzLX+K-<9L2Euu*(?{4Y_~` z$|W5LqeIzSYmcqVl+s7-(a^S9jmP0jZIdwq(v-u5TbU9O_AaAAC0D|WLtx2p28Hz{ z3u?j(|4_U*LcV5Es*lz;PT?Lf*~fKR!(pKXL0ep7N+`MRWWWx51nn5Ecn7rAtYtO( z_+hE6m;E60_7?lU%HbaiZV5e&lC3dP?E2Jsfs~4xzFCV(8`)d(R5n zGchYsk0j*Lyx0Xt7C?d8L3`1Pwa}(@ee@DKF`%oT0;ZTmCly8hh&jX{Hwp?#uugpH zWCv++8jKMWsHDZ7+K#k$kWaM{=A7{On~_HeYT#F?>)m?~JWq@iAztwKKQMbym7*q7F6i0Yp%d zSPD)IP>%@^K}g|u{O}UAP*5a`7E>B-dB1*QED)SOw0N|q=o1wVw3dBGAn;pTqXZA} zLlm2q$K&&VHAR11&)Y~WMs|Ol0wh4Ay(NcHBpJi6R~=^FC%}if3p*LDW5nUGsv9sw zfY6igbJS2%)Gv1!!TWhV0MTBA+=+G)1PegohxJRH1>plA+ACqb>fWIR_U`&;t|(V@ zF;(>9i$3YMtGvaNUlqQoFaS&1u^{h4#}IvkP7peK&0CdP{(ngjf6byCDkUCJnabSb zJMXN#eeAbiI?7Mhi&lUp5lO0rB`)5CR{HH3SitWc?|Das0iL*iuJL6(C>d>#w5n7T zwTZn$0S(f#Mt zDXF4?%KGC3A3U@dsSa05gw3{4RhyTFM#yYS4aFk5Bz3G%J193~k>}wz0vwehw}iMj zwU?4{N>#0Y_Md+~`kyT|F)=)bj20XpAWwy^P6Go~4Lv6AtR!e#?|E|I-1&C_|MN%H zNPLiU3-X-MHJ$}(6h5g1nKL%`#)QSu;r@BWKWm|>JBpgF&EV-&F2O$+*T4R=1rac( z6c`Z7js7h>`tw(Ts~nubjxNh6)ibF46CnP5tpj?$Kwy2bm_Sl>{7*qNj}r*gGX16r z+x~CmcOb|L>mYXe_@60SSnv^Ka7--3Kkzkv{B}t{Z|rAvEHFUf(Q4bu-}<9w3L}W? z&ycR`lB#|aOk0^b1QR16?MZxXVf_Q%EXDUuK;el6}1H=zp|s8o~vc_H~oKKR-Fvlq(BS$T|h zMK#GR%ptmLbgMm~pJaPyvW8e@%IDd`@CaCDH>VS7>B}~?*~XbzD?lby1jq-MPvQCG z_;suggOqO&>iNncWCd#cpolTSLa*zqqP)Bdw?HnuB0>1p#`7empg;NxU=9JXV#m5K zD|Mj0TFC-2K{7N{s9ED%Q_9c?KCjhchL)Aw^H(h9Uq08C?(r>_#0AEM*H3jcy|D1VajEGIwS1-o_FlIiQyl#-l{?vZ$ z4p>$p_;VI)6!I_D`qREPYpmmyAk$r$z+SzyF)Wu-9^!hejLk!*IhhD0cukbsQ|}fh!5l>5)bGAa>Of@n zMHR<-r7tYZb#Xv2Hc`F#=7kA6uX~V55~h0Yhqlm|?f}Vx)DRx&%`43GlJoU`2EqFM8|;x(cdUYD#$M zEBc;~+?z~48GJ45tanz}Sa8_g2yo$*K;^|RAFjXwoJYj>EjNxK$I&!svk#PH%zsL7 zxsRbZ%n{TuJEtRJd{oiI;Luyy_M<5wo760pC-RB8I&DipBI!hP&C#&y%l*c03(7zt zrkI*&e0j3)zG9k>AN1F06`NAKpvFwU3nEWZo+^t8W+E?8gaS^rfUsFbgQN?|tO^*4 zUarP#rY^TC;)nE{f;D0Q>|zbUi^X|s6}ZBBR~v!wcD6FSCw7bI1Vfp8bLHbriV855 zh9Rxw3+ZxxiZSifD9iBL0K?A&aK)HF!gE>Z?2HL)Nz%m( z{oti;3EQ1VK<<3UI9br6?h+{H?qv7TD+?YG#QXr^F%;EJ-C&KkfG8b#$j7k_=1H|x z)nq3mjk8R7N^2!}2T!?x40`m?)Aw%K%KdZ;hdJcuAPDcFHUJ>E3N%smL+g=XCre-<@6Kfw8SZI_R)g9I`x?#t4I)jup-YO*SzcQ{^ay- zj85T`dtTirOb-t}b7M$ANyf9_$P0E1)TAv3urIt*@Z;pW4Unm6am{lyUl>9*ieb`= zk5FdC9aqnJFV#+9H#2zj^7zI0cVNNV%MEoukid8$2zHxQ;MA15dsF~S!W5TYP3p=6SlMn2rZPnE{Z=+l}yb^%s^RPG;Ka|39VU_Yf7$f2|xeEWEZub=P>L_@Pb zlHgHD1eDQa<+!dzL8aZ%sAhdrtEp?w5$afk)l{lEvm{^RJi~3*?+3&JmEs7M7k!?)HCov96Mj~7w)n}w7u&#V@!S+;aZ_-yq%F9~yl#Sq1Wp)W2f zN)vJVxHt*un|)#LK_`jlciU}gInP5L^Skb^WGM&1L3y4(PM4!*8Sf&e>J7$5f3qm6 z?Hu**IOjyW<6-hnniMCTwY-JA^I;d~gzGkKD?jl^Q$**I8`r=?xHb|w+BNpVFpK5Q z`=nE~Kbp8VmX@gCfsACm8_ceD!qy zq0ph~eS@Q;tx*T$yUL_uANZ7F%%pMZxe(iPxc26V`E|kM zXOi0!8NqIm$@kDnwT)N$#Dd8U5J_l)^FkpL=_f|?{ew09@B&JPaE(+PqV805V*AH| z$#IIrw|EfOV+dimM_3G3{PyI2#JQ-hM2jZoH!WDP=&+iut}rkgA`lP~yAe3V=%NJ^ z&t1q4nQ}Mr2gv=u~i0&6j5}<&*f11@0y(XkTV9u4HiLADI!igx z00;6YQskCLHPL~o%v!Jph8lhGYICdnqefA}+aazL1k+?L|7=%G;6E2K9H!>om@|C5 zQODcRDsG@-M^57mC70%g^z63AVd~y};CbP~d%hW!xZ=E?lW*Bs-CL+Ky7kHMdpfLF zgFW5w<@J&E7Zaf*gX_z(Cank~9PLWO+E=`|z~c~Ay4D8Tv2b3$({7yf#39%4r@DpG zJgB~z@Fs9Gt?NlDOWEe5qh9dI{7bpFOUUl`xYi>TFbQou+;!u+^aRXw_=C#=Hjxo9 z2Q8G1SUTR{?qpKES>~t_9n@gwTPnwcWRp&r0e>ad)&RN|YoAF-tNau_ht(w)G4o6= zch{yTWVp{RBeZJ9UN*5^68H4|=u;QbyAg{Fm% z>mpb%P&UU#Q;Hzw`Lo3PAxso3xE7|g5_N>yT}% zQPJIC>F|=Mo%d-zvs$KXzrYcy^toTEal?Wai|JJ%UalZfU&ZK2hg=Umo2$wLmYVxS zbtTUu5ObUtWd2sft=vn8@i3sy6DlpyEdG%zhbD4B&3$@slsftZ^P?U_KN ziJIs4nb3OjD&!tvbUvq@=L(0?U5)~-K7LUtCH zfJ)KMZlys>&2**feB&}cf2@Q!;=U6_m7mCy{b=4a7lHx|X{>@_^Pg-!YFumEF+2c)ScDEoT zWAVw^;fyAqT1|1%IWC>@nd;NqFT^&FjL*4Re_Xko)+^001-ZQerjO&#+yi{a-V7VZJ-BcpBd)Lk2|=K(nQ)}y8f_avl<;mF#_&B$M(FQm4D?TD z_8s=s_43r+Hl1rhC$+>m z$II3V*7mL@#=;}G=AB;2jszaZRsg~cNhH;K7iw8@TfG~CRz_38Ad+Y6b7SkHmW(v9 zp|`x|%$N^rOA}E7@iivf2Fs{Q?ZinU-tvf*dRvmUwf??e{H~N^aX|0rpZgaV z_(?%Yi#{|(g`W$Ok_aN#U6s3hNmq~gcxgKQo4KD5hZpURMsUsX1wDiJ!wcaeo(%!# zQVuWf`yVV8MI}ths^S#iDAbu{R$o*MbAD1_Z=uF{^lV&Atiw*bxa@<)IeMs8*yyC{ z11}YM18;fyNHiH~b%~q^7|8YJl25t+wCL#LtCkl&AVHvZ+y-ao``3WIeqwy0CR-}2 z`#3uOT+Dh!j?e2PJ~WX#srmOt_I{rxQy36DQQ~h@K3j^!j}b5~+b~J}b$8uI#GRw^ zUB4bQWvDkbOGe&C5?EXVC`+oSS-1keX0jwjMZKy5U8DBEn@8Mzx=WimSJtRO;)X*5 z?bCSc7qwGbrMiXcRqCyth7qB1Ii7JY*(@$TVsluItwAVgh&KuQ$pp`MMjRgZ1z&?b z_xGIe6+=owUN3gXep*Gdbr4APM{5OM^fJ1)-JzBtOpVZ1DNY4mOcNv?yp0>P?lWHu z=PR->)Rh{)H?S$SN$bt+%Y>jjj4UG>acA##F-?qoW9Vz4*JWkW+EcYA!vxv*zY2*6 zHlq@!^}7!&jeoQd|N3(^4eUr)FVXTn{`C*gw@RI$-%##kPx`>}tKk3spZ@=k{u+({ zhr=X1x<4me`EwB%qxS)wV?p1Ndetv90m2l7k*Eg&h}At-1XAz;AO@0fk6(j$7gjF$ zoSXUT`Oh(kc}jp*5eDENuD%3E{s$~&5;?b^D~~4g5%f4WN__nO*ZlvZ+)>KH1N1ti zZ?7xg4lDi7EC33xiSNzV)*yzpw9fJC+Bc(?O`jrwOA}3lzB`PqyoP}XBdZ26UhmhY z0ZitoN801`nS9J36g-@DlmqZOex&7(>>341~j22h)dRWZ75 ze{D5EGTguJ(5FnsyMyz9Enx6YL1rrcW8x`j4192?g-33%a1ly=*=#BC0>s1@f+4`R zhkwadk2CO<>lIjyQw_gu{}G9B0)m9VlyC~nZfB6~ViU~mhbGM*U&52L(bmz7i5mDK zCMD!4$T+{?Iz~8nh-8E!bgbRlfR)M;;$Xo!A#KOA&Uz>PTWnPmt=3>CY$X52ZHs9c z1|$epfYWgr=FfOR%Y9K<3Q#bM$+B!H;)d*9I4GS10P-^h{d}dvz@u?-Rm9;-5S}iP z2J!F~<3(wO%~$6jLnC(*VBM`-!#jy$u4fAb9lTb{alcKY9xNXbJ_59N+`($E{vHd; zmXX^z_dcf!%@;LaxmgTl1ib#@{cI6L36*||WtaxaTZ_Jwd*xuJPXI{J9O9^;-JlbM ze@Qc>t&oyECIfrWCz)&hYf!MQb@p>ooceENVIYpy?LJVJR9m+L6Lg{dYR`S6?r0r$gNRc!u-~Y90GA^u(!Y8!)|CcV2=Nq+J7}X2pGjn38 z=)fxIpY++VS~euWi~k%H06PkT#P(!wQD4R&hBq#)@}D+9`c_zYcU1~VHL&l;{GLZC z6d41m#T)^LZbM+5K7URC;0$adc-})}bGDGc(exg+k^1*w6j=RwzoKJ*o8`0%qU-l3 zjc^I6?gE6mdV?O_rvEkX0HiZCcL2EKG=PvTAea1Wc7SCC)f}WA^6RwXU6)EAeO1`y zSLNVi28MU*dlqAdL1pLk*Ffc*^jS1cB#qy3#*?Z?mf&z)qNyf(iG*GC$KUIk5&^2w z`R640X)w`XO}||Z^JN}Mu4B{&Sg*~o+=V_u01204$Cm)M_m1jam)AEX&Y*+! z2%3zxQLa;l-bTLgVrBHJybED~ZA|+BM6TJSfX=AbU?YF%Qi1_8I#=H*fVbbN&ZVsr z^uFc*wT%);EZ$d9FH+4_=vaeFSLeRGdR2eO-!HBfbu=^DQqF7ZGH_Zu?PThLr>#Kp zLUDu|f18=ZWMVlrp?MDtR02C_Z2?a_4MG750EH9yPRQ-hv^obd%-oIU0$6~npwJCA z>Q7VnqaW1EAw>!}w}bp-Ecz|xSg;h`J@b(g$aln+^+^r)@CcO8Zf!{ziQny|=0=5P zNJf?$vP1OfD>pQln2X)We)EZ8wLxK7fKxiyj#P4EKG+yw*A^ zeEdLeV%!#Vo5Mae4GMqTKpk_^_dw%l2rvTr=u&X&de4%DWcV1qCp2o&3%_q*~daRIuEEBT>NeY(fY3(sl;( zpXKN#7bL7q-*$L$toT7Wdkb`x;0X$kA#$F!eH9-8n-3MmcxLr6TeaxHulnXvO0d^U zK*_ql2k9I0H|h*8YHb`SkqY{O|S9r%dif zE<$>qN$qYaG#4g3i#c3a=cny8R$k{FLK8xd}7_c~bA2FZRPAlD*JFdBxx9 zuVotig_#tj5pX5NTSqgga&Cy2P#O18m5C(K|KfJ&L?HX<=Sn&25YSHM0&qM!uh5q`Buoi@F{{J8KM)|)RCUq7mfl;|% zqyS?jBE4LalKI!1(BG<7v@i&n@T*6fPAJdNH_e9qY7n^?26GkDKjX4w|NZO=kf0uv zsZ`}2MpnC0KIN~^{bR!X`qM`a0z*;hvVTD)e|1ei|H60|JoGps^Z&!%dxvA$|NrCJ zduPw=C^NcjWkohwAwm+eLm446BOzplWQVL|%dE(r*(GFTWb=KVjr;EX`Mi(cKfgb} z-{ZKC`?wBqUDtV@ukn06){~O`znolHAK{^i@d`c5I)@CWM>yzUnkwzdR6=IH&aHju z3BDY{78#;OAt?h|$}ym9zQMRQW%JH+v1RZ2-T=Gl4G1h$uv<|bW5}1~`j08Z4sXpf zXQiJB<9!g@5AhRY6VjMLe;<|387HAEqZ)26A81@u;rv?jJpuikxmj6TosMj^uzEi zpH$w0;&#{+z8Y}0NeD@gJ2O}bo+rx1p}an@N|;_sQ{$pMqV)JY`~ zc?9wlh#khR(I4v$WDO2OnxGLo9U5B|F#UsIP_{S z!8`xqf%`RIkZ67B7PQAq6m!YGTnTI=YQFV(yy+sV5F|-h&e4k6CAh6ld>SpcwMM86 zMD+4Gvkuuu@+Sc$v_`T5B~n-Fb?tvxO+b6uq9fINjXll{XgF2QNTEhg0o61cL(%C&auMR{+{35_s>66UP6vhkNx%Oz$JKqc{3y=SPxTRd0BKv1LPyHNI zkIwob=~ze*)fhxYgogyfTa6-pYzAMVAf&JmxScMVBPiksn}-ROu4akX-x}aa0!|fa z>jGjJk$KWayb^FdCCXG!H&9c6Wi<#YMQuS?C_kD-G60mbPU z7%+)~8w{;ete}c11zlRTE0i@=M6^P01)30N@Dc#dB6h&nykrCY<*nl)((quCRQ zxm@(!nZ}{^YgDh^sFOy?E)KnPfE}|Ga#Y?knu29X#vaa<&-am*dZ5G>c4r7ap@oIQ zdW2Lm3VOwdNV-zFNXSkC@SQGRq_j=`Pa%QsRX(*Ap3~#J4~>%0Jms|ud9mzQP;D&3 zk)#qy(c@^ubNL`Lgc4*7LrfOpSCM$q2fEd{bu<~B-w{YD%#~@2~i>Bm`Yd-HJSbvs`?#@j>9aVP$*v< z$iMoj+AU^esXupiF`^H@j0OD!W?~4C{P|mvc-9KHo+p& z>7_G~xJv0j?&jYs+>NjTO@3tO@IxoxK?JiI4+|bT_G>9|$OJPuhg{8Z1^~dl2UE4B z!E+7%eGJ?ou?1Se#Ejah8l0+jq;dDjBB$TqrMz%TD8qB(%N;NmRXhJoWO>?$xI_}{ zBJAtAIl9mXZ_-G5tkG3BC-#<$@$dxPBpZ?w`wQ3O4tY*m-a9 zLG@vP1(Kh=rx!F>Y9g3VplZA-X@KLuKvIx&AqDAaSGj_2qfCABhtrK>?ysO@jy5I; z$ni59of6&cE8m6buKZ=&({R!cOWMh3?Fk73S?9k0Z{mJ7cy z^>l64tctH|S24*hI(eqYASIowC+iwu5>-71cB5~TI6U6TpL)=am4TCxP0F#*6%!nS zKan1N>lO}y6)XvTh0CN@)cXbNeXy2PPF^%1jMa}Qz4;>ThX40t0DGuB<9M|QRG)p< zxe=yAJ+Xqu#Xys&a`IY+hJofe1Fl8dI|*4l0V1l7b(j9V;pyin%nJEUJ<}dq?lTmM z@H#&$kVHnB<_YvO!wD+ADO~*6&OAz3Hl-%>ev-iH&g7e2v}^$iaC{Rl0~K<@6XIou zLW&xqDO~YSzPNa%IT3A1Q0<(f*nNHh82aWX3Pq?6En1*G|7m zq)N;TF2(q4)WDx4H3DZQ-9!M_!~)Ax{}~of2j)3;OOUN05Du zKxk(n)Md(n2rx5Z==b>ZyZEurINq4O&fZp7@@xpG&J=_6=gpZbSKWRSEI-Fa`+XP{ zw6fYiPqy4=F%eBa>| z|2ZHX>DMWCpwjpTKwvSbMdf`^N>69nGZY>cxXgEn+6@aVfu9%AmGafBUA$W5_)O;> zbdMZ?eZe3L4lSsrqPo3XW6)Ii8&pB(U(Ui>>Dq>hdY>%Thvd19S;nXpT&=u)yAwB> zM%r69@o1dUeSA7Qz6_+jh^+@4D{Tc(HMb&(>gJGMe(A{tPXzXdERSI%=`He}l$KSp z&)&ih$SXxupeAEbNM4OsI{x>p2WS8ZdRPHB^N>~{%p30Kn6RyzA*QxNXf;?4k)l+& z97}4Bk>?US^{;tbfksD9NUVp9-o7jvK}-MCy7pkGx&wQb;S2SY0yuwEK)TH+wAEdP z(?TUYsS!x}{>J(zY)i) zf2S*`N(j(f_ZLeLnj@Hf3juP(#2j^zH1RGmXV{%b!3}1EB*rtke!PI3dgukOKR|N9 zgV?SI5Q!E;<=tBGFqe5_zT3S|*1!b=`)^RgZ(R;nu638Q1MpKnZIyH=c)D!X06XN& z#LLsoor!efqi;OrN6|oleF$v}zTAQ9@S-+s?1JvS$VoAb*W%&XiegBD@BQ3Nwgy{O z_17Q(q(}3WeS|+?rnEyt)~mVspU{ZkR!FOiT8ufezO*N)4crSxe2$6XHQz`Rvrod$JWGx2XV_v!MmIy4~lrLg`0!-)7vZWE>ohZP9 z@%QZ`t$0oGtQc(lT-)Q2q$pRd;%Tv&Csv*`Y-2$e#FcmgD zs(iIn$wayT812p%)3vX=ZI?A`ohfa_$oegi@B>E++tLC}%@^US)Qg@o(RpW)8d-R9py*{n6w zekJ`+t@TgQ1utZIU>Id(Wc~&C{u%x-131)SdXp;^q5s$F0zMW!DfjC?1V=0j+F3gT z|KcmLZ0Uif17yGUAr)RU1-;lo-pspyFPneLJ}g8CfPZ&-!0uZtkS+dJ6VC%;~xjT2-%^ut$7|M1v}vY_SGZ5!6~BmJjEL5r-&Uq4T3h@~5=#xLS-IH@Pb ztFm3Lzx?asdAJekS@+|U{1FaiGbefT#d~C$u9tp3Co>`h`!k$1kS3Xv(PK+1kNJ5_ z{`9X91C3Sb+h3|CQ|j`l!#q zgLs?KgZJ|=p3-3Zvi1g$&!;oe0c?7*WV{CS`Y5h(`Jj zLb5{C5m_kzr`>i{4iCr9*wcW)p&s)6l|V@p zYP~_&2JrUS0U23#YaoxK8)qZa1yW>6Ab*BRth)p;V?=4i!+u$Pmz^X?^do(j(qr&^ z4ucci0=hTb2(`gJW(|__qC(Xgl0xYIF$f~oIWf_X136l3LqAx#$R!ZCzwb4i@vk-q z5j|X6vw2MoiEGR5!h3aKTU~}O8WmtJyAeCV)hUwJowmHKrNn5306cG2M!sCR`)dG( zDX?7Wmly4!4$R=RR(RH~@)9OVAm@LSO~f?nuf=&aOM(8iuHm43@+<<#us$SQDm>?+ zo`jC#8C*B8jkf7=eA*oj=Wv_>MRqB|2#Zd5SXdV$y?(ot9l=L`6_)>H*wEY~0{|r) z1C;0+Y>;Ib#HfbbRnyq{y1Ck=LB=t#TMvPG@dh|HY}Y*mj++7rmf_|Ur%K%$1r4MH zFn_Llr;pPEh@Cxix)IO}cp6)6rGD(Y0lQvLK^#0mI|$O^5@dHldXLbH+=_Jx(wuzA zSKST^Gqt|PHDA|hv&LtBa-7@{ARdn-yqCb1IP7%Fe#aEie(QCG0f{;Q2V_D0V>82r z3YQP@>JE7uP(hXZVc=w|Wc*n|G&vD54JTg=50rkuG8+5res0C}md*kx(+@DVe32V4 z>bbyvBFN>`^8(?{SfbX$Q^5?^H~Hrvy1N{9)-h;Rah95+u>|}!V%F$AQfx|*Wk{y6 zSmzxmX4ECxhM+dJh0Cg&w!;DLJ7HJtBjUEZ9v`8MmJpwJm_ZqK>1TT6Itv@O@JFy3Vt*!kqTwN(IUF)XaRr-g zjOG!LS9k1>!69VAbbQS?2{L$%;^zkRuazThgN({qb39k*D#U|>c!V44<*djh3-K7(2sAY*rY95lA^U8-y&0VC#QsveM* z&pX~6t5~LMfvMQ6W-=uXrikE<4Vp>$YGVL^e?ZkCkz;-9TzeTJh# zF%-F7-bWAukE8Lq<`CxRC>~exBUjz@kkfqfSCLlE3Y$eRloj{Vr@p4Y#C}nZ**#xP z#I`Egp{OJ<2{q&tgCY_I;c{K*5bGIp-^C9JHh08;M6Qe8e1jm!q9wB)~7C<%rF0h zlos)#RL|C_cKa=$r)uSS)!8MVqG|09%_Wy_JOi$@v^&e;w)~+6{OA@bTF0O*7^6*; z7tnkOXtukOsG2B)h_Pe>$ z$vT_wpaSKH>Y0Ci17+jC4ko**(`>5r&}U`gGu)s`-j_Ab5mEdPmqJ~*#zgr1cW<&0 z$Afu#+|=_k<@x2nBNaO7*g(0*_X78C1Ne2D2gf^HxT(@I~Oj_;a{YyFXke{9j6|*-jvuo=1TLL zNtQ>Q2)LM~S!~Q;j=~|`BO>E9f7X3U^;M(t+`a0~ zsY|mH{%6Sq`vbr7G6-&78SPxfY>G{B?8oWSN)~`7HbU&bk0*X8^ygNcO1g*Oa>&TA zK&m3(IB8x^i|`(}Moo>;3UvAFunF+tdTw{&PsoyTph#*2w8P0x_CK^ZG22xi$L_&r z%(*1Ra^_=*IaP8}cM_ez6M;vhs|jXFvyEiix>k>*)n`V-eNfluuCIZswmYsPr!<}g z$M;N(R}DuqK@ZazrViYi^D2iS-p2O~v&h;ykbDY4OH}cFcI+4P3tRIWNw)GQiI`c* z1XMy38a-zt27!3$F2}OQ?2{MpIqWA3zZ34VM`PUM7xPfMeN3D*n@jL%w-W<%76*DW z!RIOVQFw6U?J~rLs&8KglBq65*kqSX>6&}T6gM|bGMigd_oW&nfW5EVHCkR?<^h{K zH*cQtH5>-1+y`GNH*fGZdZCsKY#*GyjKWy3K#_&2ekZp}O6!$-6Kcmw&N+O%Flm~x zCu2ow(p1L%%eiKO8khXFDcn=K69lby zpdrc-Zs_*>tHav^d2#fi)>$c+2&7f@l|=$s`h;di55`@(U*n#bkZS{uEv!( zx1nSIu_ffZL-@^+QY&qdZvsDb%Bh3VS|yiA1Ufa^rJ@M}$rr;L-}Mt)0tDd*Oc!ke z`E=lE9QiF$Pd}YgUlSVFtcc&aT7rKA& zAldt}9<^b;CvCLEG&SshX)xxZPxJ|dD6TOph6|Hr3lz6v*j_YqG&}c#Lh+U$6Rmlx zvR}+B5aaKbjJFVP&};&1_uKL=W@SRxlhp-01os5n{WF`SVFy2KN|mP&(C~%SuISxb zrsEW4>1>_i6Q|r*F_|qAazc^}RYUdoZ|Bl1d+WLv9KZxpIg?|R-j z-$!=T-7f6bFkh?qNrjfJTlw|2d^eDULL%!6yOLRx-0a-0xB#w z)r7nL8;vhdPbVtHMaThjSaSigNW7CbFy_@zGRqc5xIseX(owJcxAXbqd}k2Bh>a=Cx}AcXiCSfym9lQhPe zr;=(o;+_O@(`s4uzt+6CBknR2(CS@XN<(&|@WEOVUGL{vNdltBX>MYr(W>holFq*j z>7jWpbLF03Y|jPP5Ng(w0-UuM7NT(KZMSzM!kU?Vr&0%c&f7l;cj-HK@IA*N1MB9s zck-Aa*9?StOq7`{@KtvxGh;O@W&86*bOs^wIT#aAr<+V}lO0m`L z%6o1PImmo|d_xtvV_M_$ax4BMjue>Aq#$)U{Z!H1>07uuWeDSp(t;O##oTE!?y9T&BsL?JzNSXXDi}@boABxa#{UU<0I&%yV z|6GsJ$PC_L3X~;MX`9YLE$|`t^vK-oz^ej>ix0y8Ktd}1vUxW{A9Ks}3C$I>$Hw;l z3%zu!hS8`Gdn zg%v%J^CMfviLF{(Zt-9GBh49vZg868(fo}?X;a`96{Lti?+TOsjw>(5+kYI^ru!Pf zlhRqqbF}Y^(PL>ElJQ53{y9zk{jroW4R_1+zSK^K6|_repFa5;5gEs25G+=^O&fRl zU%+b+-S4eD0*eyL}`G;zkuTAMb%9>y`&|Bl-W zJJ#PSqA7HRp*O|d`hB4wqyYk~06CmkUipidY!7Kzr;Ix@6m=!fGQReTaHg}fS{4XT z9_vW|{!4BaKQJtvF%a*rOWTf9OO==~>bJK~dw;j>%aK^0xX4J?Ud8y(V4>;D`y#P_ zK8_URre+D1@sfy%JY@zi^#d&r+d;Oy!SzB39CmcH_n9i&vAC zcSYz%A5%WG?^%~2nr|tS$sRtg^KS2=muQea+AVziGC}3{IEGP4fvLxSK&q7%Q+=i4 z1D(Dzb46yaqus)KQ#D0iGC&>yebLEM_bSTmMubd+Qjmw?C!{b1z)UfO8e3km_8|co z-C>z0(pz;OI%h3IbXhr~NWV2$U;|2MwIqI?MnuI2fcCrUBCYC-$P(#`$47_A@dU(x zPZcI;1kv|L!*XO-O@uh-Us?BiKYD8 zu8y;`5As4IP{fSa#^qLkM0o@v7R?Z33JDw-1pG_&6_qlQfinVFV|0%L8f`rPwcCjD z%N}Bxi@F-`tkG*Rtc(mkza&>WybH*}GJ>taO^_nnz1pE|G7DCnVaQc9KOinLe%&iN zfvP^p2LS!t=If>(B6Cb@H9PT^K?PDm#O=CumHpOW-d8(wn$EnJ^H z_X4{^(L%enC?oT3Z-C6kE5g1{L}~b%USVhS{L+11qO9+6IDNu`OPRibfqn?T!Y`L# zBW|*5uQC!PGluBOpc`@-1TVh!HdRhkhwe`5fZr`W?f|OK0>m%2&h@19Gz2u13+%>~ zt_uX7b>{}2KUxm&t#uy2x4lxIvfS@n|3En7`NJX>lPDth5;loc^LV>s<}rckEQ7cm z_haj!kpua+3b72HbJ0Jb!{ocI4*?|7!=j@%*(%eNWg>dpZ1r3|Jgh@NB|dma+yg5# zTT&3G_YLAlixKlQ;#NUmpzmujp$E{a?GCj zcUS;&rEf+>A2mViZtrRn9pQg?_RB|6$gRO-HJTBLkZmq;cLb=ea8aZ=1CUG1ld+eU&Ahj z{ToyBwBgYyIVAAn6%r~RM9g>#$+i4G^;DfEZthuG)*t*5>|LLFsSJDFE?;@ps~fMo z^?v*Hi0_dli8{kAig_}{|IWXufRmx9L$pd_KW!_@(El;PwrVH-ljxVru(XbVKGYf(g1W{ve*(T@!20hH z%|@()o9V{VSmjGZ*BWFV#pXXL6aD}dIztg(QNY;EC9(Grpqp!$7y3(B@wC6Kxd-7s zH?{=BO(@yG+ef!#g$9B*QYOQ6iPu$oF~hoST&9}>i)3DOzL(ktH}x<;-19#4Kw&>s zO|`ifLmO72_1NlpK4*QoL(O-^YReTb9-j+XsFyoq8;D}jfwjy3;=VcamD{4;9Qu)s zN2zN20ruR5WA7-sf~MyCSK^k#&g+f7HOzn9Hu8mL-rr`#G}<8jKm2s;xcvY+2Hvl< z>1%~Uk2M4aCjmI-qcF_i)~|JE;EMhTU_mQQFFe~<$6vo9_`o#{T2?yc2NdnLVc09(gHwRsJJD+@rMSAZ(7e*!uwS|3n&I|9;J zm79(zM;{WWuafu3#-50ltE=ycs>+^ii*jGM)Sa(Sd)RWH2!9A^GU|Spn@x~oK!F8u zwjo}>>KvTx>@4yX-W9wR%1+B$SQjznbVnfL0}1CDgI)dGW=wDmg5M1knT*x)`~U-) zvv@*~T&?4bhZq>YKOHV??@E73usCCqavDbp;UNTUoh9C<^2ddBkKK76BFJL$%vTd%La+(w>D;8 z+x0(`G4>_o^0}Ah`6Y2Jcg{z8WEpZVyLK5}zbY_Zrb@jL;G=x*3ZH)u^$LID;uy4e z8F%o_Q+Ob(9CNw$Rfk;t=gLP5_6SI9|7N+O%Y5F=a?~`f4mHVPsOPju`#vf1n!wqv zd?R9aUerKHCGYW?lQd@ zDszZMSxc0a?(y6hDSY%OrqFz#{JLEyjy(vvj!$x2*vxI9Y4xlshu;3z$G>DaT6V- zDel-j393~-PZExmKj_?=Wu?BR2aF$VL7Hk+2q^=eq@MCsp$LbF4uQ!ot^`}SV-FtU8 zX(XxdTJcEe`3IJ)G&d0^0Z?t-CL|ZV1zQDgnFpD_6=QvX%iJcAUGL<*pj3bnK+Lq_ zWGuj828p_SkNt4e_ISOqn`_~A7IpXW1A*mn-Wh4H{6<>w+We>`E6|1Rii=1Of$X5S zaS=VrRRY`8dkenumQ(5G#}Louu6}-9M`^UH66DOgRuNxM5Nb6w*LLNI`@)tx2pdK0 zk_G1LW}0~-?9)NgLl|vz+gNVlJP$(>jt}N#ihNXM0;F{KXNsuRFORxI1whv)QgxhY z)CW3xcbPrWeTNlFpL9>Sf`|1p#;$<6pK zS4LD-MYzH2rXdh>pUQ|jC!n57c6qdq1`C;nD#>1|44As>PrwT*R1 zGOn)g7~f#9zDrwgvnf`uPU$^-%(?TUSM))(rqa>ihQx^%U^hEkiu84M_N)Mwaa-*z z`LZ8djg12+r$4#!_)K~k7IQSQc}Z1uhgfFRNndHPj&+rq+F_bOVO*@KM})|?pZ#T(un zMKVr@wW)Su>k(`XE0= zI!N2q)?7PJDM{UyvY&d1oq+;{HFfDEW-Ds@cJYxZv$_ncJn80ol0O?ZlhzP{HI_4I zcf>No>@zTSyc;}eIA~s=E_yi5PuGCST&pAbRG?PlDrqa`VOXHTEPEKqRz@vu*U1dU zul!#_uQ#vK*C)zfH*cTz!oM7=9zIzmJw>EEc^t5p;PU_@>a?=_klD`qJ@Qy2gGqUo zH|^U_;dZoeT}_Tfa`RNCgA3yn#mP}MX$C496ZuRt58)5N(r+i}*Eh~%)zMgt)0Z)F zts^a;3GXsEg$yY%Ew(vedtPc2%=qeP-~}mhe9YStO=3m#pDm`Eqwgi&3p&>yg-g|C zp{gU3j4t0JKkx>`3($T?^N8R zAX=@_&`&+wbCcp3)gbi?96l|Ek4m3HJf>E~iD@6y1#Vz+aeWRBtmkz)XtJ3O6Ta_9 zyhqYSGHLC--x1Jvavt+Abe`FV!aV-h9H{!S-xw{RRP}My(X40ENmGX;*+8Yi>c(2}{k0R#0c6ZfO$YtNU4fHTO_!pnS0CYZX>2VB_bhL%r6}KhnY1aHupiCv z<6YPplajYxqhG|g!+KraQpFv%bi+8y_2^G(Ezm|^FVRlMll*#}W@qW*T_z?nrd9#W zfS~gv)-}sPIOqAlv3+d{Z3q_8at^{k_v8hv=#))HYPL%k2(jdyJ>XxRzy zgp&KwVq+eju94P^Rre9@-9no@_V(Us6${o{N@<|6m(RwOK`qzJ#V``m6MZ=3Ni~8(n6sdsI!ik&-2`qrPn{IOViYpQBJR(@D9>)+QW1s znjrWV!!(;z|MF6C?bkqxHpkY-sTP-XO}=KzOMO%yZ5zA)wHY<_y5a!ag*cdH!QZL{ z0)@0?I$KI)z0GN#N8MFecsAk0Q1wKz{t@;m6|@)JzTHkaAL|_qj#XKcK41qN5804F%l>4F)7nUPY;%YC&4b# zWxRIWm`gJehLO8CGE8|*j|*4q`hwr_^6tCk4ZR9=ktyS!kF1Q#pCMXGqgJon_BE0k z9n0;wQ}V$gHf6UdoVbMM`Zzc0S&!sNp-=4uL*#OVY)u5%0^Zj_jNrZ| z1TX8leVLYMe?PC*!f4G&EJ^5nrJf7^rcy9j(yQ2q&g(pVGKPzeY3wg zN_F3>Khc&kyo<#-OuCWVDOUbFo;d&EDptMwB?fv^qRY~IWa^lTF}V-pxl)&3JXQx#ZR;MjG8^n1iwbPi+gDKG&V5pf9Y3^aZ>2!9-+X^$eo`A*zx0QVeYo1{{ zoVp@__|25q3W>Cq_s1nU4PB;}GK*uA4#@2C?9CPFJ5xz#Bww4DdtRaN!4JEl$Du5EFALuns||gBoC9*Uk=0vZjH^mv?_?7 z0sZxE2PqrAMM_?IfyTRmQ(G-Iyy}FC*A1_sXA*MJFGUGB$Z$&v2s?Xro_>J}#$-Eq z&`ll`B|z8OEIGX)X^Xb)!KU6RV4ac{EC}`|#~0o4nG1tcI#eq;nz}89Ji0y5q>QaAk zp<2^MNv$y&MpTKxL;p4F@bfDU2D>*Cknr|EuTcN=6?rB|tL%^2I=AD!ZqsZf|GhD~ zdE7G@Z<8%t%!lg{wSmO)Y2d>@yL4CS(8-&))>oV`Nx>LBv^N#}4|l}*CKNdm=8_f1ka zSteHtM=~ov%&lFo-Mwr+npddLxy*g2orp{Nt2yg}Mc!8z=nq6s$+J$KU=@0FPC5_$ z1y7giqpKWFFs;!J0n>^O zF(l%do4XbA3)86)Gxt}Dr*>E~9#1MKKl!%tW<_T65alD!#)1-`lK0J!KB!~qkM_W zTP5AhUV}6!MCdL@MRHCuTnL};Jw@CQ2kxPgc-o-vKD&>M@%eBHMc*8ZE()dxxfqI1 zx@YTa`MM~0BEPrF(LM}#-1=f;vW#E-e0A7WYF1N^V*E&>eeowhRwEY6N;>MpE7Pct zZmxdt7@+Gn?iMW?8l>Vs?(<35j(C{UpcS2d>v&?_j*X5@MRG}Z-iwY&QE+%VaJoH! zo$cN9d2h2F!L3kN6!BMzEQ?x<2TLk%$=K>7H8)7xcM-jzl(Dc|n3n6{b-bXJnE2}A zNDOACGojfHxGg;mVv7lRcD*}4rYy~~PFkM`B6#_Qonbu9S`B?s^*Aea^Qv#xP@u!g ztghP%^imRD3o&kdM*e>3&h;aUMVubyg6SEKq5`wpOYeO0mI6<$_Pvg%KO$I~7GQf6 zUZE`-gtzY{{BlylP%H}V!zSrqElZx?quWJNTj^163!TX3-oKxX!brjY(;uEkD8fAr#HRy@KwhvIjYLb<$9$l9_|E)BH!)`MJcrshbn^t$R%! z4%Y9<-rW;#bJG5(WO!0y*p!Cc)6X=S)@`xh$xZchar#l#>~>WBCtUkxPXdR{shx&) z;wO~uQteUdi>mc(jFaZxu@~6YW0}^KoMhdT9gll&v%YX7$L5G+wxWpebBV6#lU{N0 zSw+pK5mlKUFydCEO?oq+$_VakDoyJ9TLo7Jc&EB(EpK}^F^DmJTb*!P?AuUdoLPCU z>n)UHVQ>HDYe+~_r7DU`TH<-BMDrcv>gQOI1v#-_i*07lCi=E?zWFJ~;?}@SpbjfZ zju3b$WG8DCBa`r?1=Em*B;{b55TAtSKyZFqfMVJ&nT-F)gW70jvGpeI)>bCx^xZX_ zBrT2XI)Uyu9CD@`LeZ#CuLlE#JO$>_*kW)}R2LJv&z-puIvuBCaC4(hW?_A-m~b*o+5atN^L zTTRyLLaXCx8o#)QmeHu?zTGfiK1qloWcIOsElQ(r9%|8?!yAp=xTL_?yP!IN(_}SY zokns;mHBPMmqISnjL2OVr%9S4d10)fwNq!i?oUZYXuZ20fXdsscd{<((5>wP<#Ya` z1LLS!%Vt`-Cyuqv^UeGHNgBn4S)sSs+LSNL%$)bHV`z)I7gH@iKqi%lzx)>MBp>gm zt#mJ{xfZ9H7iwBJG@17oHs3n2IPj5wnRnBiTwA^LNO)n!!1~^YfV;irGLdY{?VE|G z#`ZipZD;Rg($AQnEgiVMIcu~$8CfvVyO96w;e~pI)Q92;5d1Qob}RE%?oR=hW+UF3 zkyHi`ZXvM*k8CsZTU7{oi4rP9nc{}#(_>%AZfZ8pj>LCuUEotU>7~_7tgy0wZkrI2 zje7R3MfA?-vE4y|yZw(Q+ECIDpDTQpZ?w`;dQ<4g3`T_JaNka89tv0h$Xa--L4pUW zX3f?(>Ond2A{!@0%CE}$qBf7x+03@0>yKYVVG>Q&cc1*EqHmp@FR<92cPu^kCMPK4 zp0xA*oM^|x-nc8eeT%i+U0MshjLozV6tzvRC6X96Kigd0(dHl@o5@N^Pgnzo1a<2K zr|hjRP7|jQLB|k^7;Ywht+s>Lx|8Gd({B8im_;XE7foE4t`E3-=oZySt(p|IjN+25 znpo(}n5bK6EYq@|le7t_=W<&9JQ1F6QJm~Lc7d_6G$;6F=JAheM^zCHyn+wyVdl+2hVS0u8VZb8xc2e*EYrl9-R3MvV+;+JIC?+*6Tc|`Ko-r_uTyff4iDAWEU4tY)SweN+-N@ASOpIa}BiLYw*K0y%c?1esA{4o8_|7uk# zMcm~TZ)1NZZ2eD3M|wIU1^ENj|BruFk@3sz^DWEmvp$)7bJf@Gs%1U9JIu7lWmldc z&X7|-eYpT;8<>w?Z4UyMzz}2~lOP%K1cu?Re>vPiD^uDMG+3&zq1_B4LEGiDE*T7q zb@{2R81+)bHDe$1hb8XK<_*Ps{+v&1%b?Edbs=Pe8=JlJ!x~RapK zqL8x|{{=(`Zgw}il5LQ7oaMm8-QE0Zj@U>fR@^>8uuTQ@i5>ye`pdf1dOI7R$-5_K zKa~K#d0I$HeAZQIfP?RBXS3S{cg^;DV0#$lOtQO z+_5)1GY{<>?xVHdd+9wQ?Ttt++=7>SA&g{n)4-*cK{x6V zIT9($0x>6GDGed~(;i$n9W`28K9F=bWLEk+Vh3q{{yntM8ya=IEu8@4FVXo@SG%b+3=9He??8h}Gm0Y+l6)Q+H=f8QNkKe~bZF+Ilf=j&>xiVc!jty@O3UY5cv z;vw|=l`r#g3gU|z5$*lbWPQDhfNq{{2Q;%6ZUZwd%?0HruC z#uw5h%U3mpJ_i2FPol)pz=-tFZiD08wasf!x7tZ|>f4%|a{S(1fGtowGXN^W8xth8Fnwk)qdM=%}aByMG3OMrjf+UH{A2nqEUOwUs4( z-Cu-<(`T#Pe!$%2>Wdp6w1-^5J@|=3D-dQXj}TfmtH4?FgPres(0`ki+lyTxIv_L! zQTG%QG&(V$<1Xnx;D}q9$s(G!s>X8z)AqFwO75!ASmh}szOIcAIqHbD$Jo>c?DZ`W zXA?OHdp{rvhvXbl31D)=aS_*XP|U5rf^0iEV6C&Ry7PUIgn=@t+{T25ky2Ni!0yRA zz8pnRjKxx-Bc>}CRWn(N3q8-|a4fvTAlR!LNvO&=X&^BJdYmZxUP^uAX;-s@n5aN8 z2yJg+Wd#y`?S?WCX5SI-2MNO<6EMw=qRW3Rs;}AZH_!AbUxWFbiM@5%ejj$x z@_0o`*nqj;VHz-_;rZ^1?#_aMCGDzXDVQ2XA$KW1)u8_?!YA4an%V%Uyg7GH=oho)hUVOadcwPdvOx@q?OA2qi-|%7D z{89-o5@)H(bq|kdjzscqm-CKONlBl(A`v?@v~aEb0~h0bC|9cE(hY7|M~DM6sP{Si z2K|jU4aaTs?gWpWqDXTs$f@?edSF|X>?KmJ=*COqy8Ai)%>s+j3ptzNV)NT2YS)7F zF-c`lpuzvr`3P>N6k|2Au0Buh^uIEi?`t2IRSu)w%~ybHq=ggtl|lS)5Gh1hwtvCg z+Z0x7N?(y^8_dAVeiRM$KIHWWonF@mdZM3t6djn#7B&d%^}DzUyV^&Pkl^# zAf@1J?s|5fM2(4txDEMxzk-|{t3h+lW%a>G$J)DT1uJhQL({fozU*$yx4n3=b9mP| z%%36DkGQ^m_$AnaN+5Bed^q^C2QJI~6X>`snr&E$HHW6v$18yd@#DlaAF1 zy8d^B&PAP%mnVDb54IA%pK1?5e*KXKn%g4^49dKEtF`bY)tFSv=&Du)-4R_kF5Am_ zuxRdk!&b585L1_Z%f0jtyWXRsmA40b#-^rqM(G#F%GGi4kQ;^Et(+zFH{Xg)3dIzT z#Pdbs!-81h3k_hwlv>Qbb+e~qH1?M58`~H`dxf9(7XEkZ%?UQ%Tlg!R6fPH4o~PrAM7|z!Nd;(Vih2cTuEe2#e)8)|;oowxXbhuAgQR}{q9EE?BZZ59 z-hDCuL}S?A-OpkU$QSf`Oxb?JYWcAg9$nC%_vx3tZ^I(ekjee~lm2;M+_`J_%@mH* za>@Q&+^_L~d}kKf;tXt$lmA@A@1b(S?|^``puPUI2y)rKAI7g&vP77;di^J&GXCSI z$S*zeMf0cJJ^x(s_bvSAQ%H0C;LFoZX`TFEUtpbIwwU!eyZ`6o`;U(jA!RYp!jKI6 z-(TPe3s-M$GM0|)-w*ipDjgf0QiS`m*6+{fzTc1_Xo0WEd*z?^*rgnXuHYM2&2vQk zDNhkj{Q3t?9&|*@l6x-j&ObLTAFl?&2=eXU&0}Z#&FREc|G6{&Tta#)IwtQe)k|3a z`$lLWH=;j_?|m}-#{a|KTSis2g@2=pVr&eQQjk!T z4n;yiN+hK_Bt$w@*hnKNC@3H$(jA-blu}UyX*RV<5h>{gf%jP)>v-OKKioUU9e2Fr z|4D|h*Isk3x#oQ07nh?iJ>gk5u|D>s5hKC-WUll)`B&TOSozAykIQSpBFN3x$qwN@ zD&;ZDzw7JS@x1l!{XGVl_E>JzH{e2M^#HyjdYzsZu18^o1MNCX1mQIc=mkrPi+Vx> zvbo@Oo9;VMr!~sR#ME(#9`Ljvaw2z6nHxcCEv4o_v}EY35R|7u&GHCu-J}^5_3j9g zOhm*v7x3(R8EIfFpdl$AOd?v&E^s6j0r(}tgG7paD^~A4Xm9tA0WY$LdS^|13aktfv0=n> zet`u-JhYTG5W>|U?jA;MeZMK}kND6<1AZt}Txp+kDd_3piV&kgpq9(LtsrC7{2~Q* z=W2EkD{6rnsdn{^^&%YG`p$FWE4$hx1EBKLBk+Ch?bW`I!ZX5 z=SRkUz252q?biy2wB;+OvtP!&bZK$n1gG#wO-i;+2pY>ChU1xpYbG+@SoqeJ9aDY) zRRqH^YLe@!rpMmvzWt=MI->WilI~2WmNk0RpKs<$4FC292)cxOI3V((&JouB?Ahiq z_I(KN-WAA*9~#CR;lwlnJ&`CFnZrtll3b75&Zh2zTm9Km2y}EB0H>YlYV4a0y4(8U zcB4Tv6@Wz^066BIvJ|xS_+X;-TOi&JZCipkQ0GkL(vFp~K+p=4u5v`7V9ZKdb1{F2 zTd)g<$mt34m6?8=wm-mLb)Vd0)W8+eYVczgveLW<|Je|39NPcZ;m(As$L+_LP9pDn z*Z!r!BHNv=#&%4 z_348S{4;HkcFc{v4E~npciuJNQ*x;p6P|yH0LkrO<&}>-nC|5wwQbW!q!RGo7WG#G zdNSM2%FLY5^O%gCbX0mT`F;g`VR4=!czxd?Sn~%KA+Y2&`>vYn z5=z{gR-*7wz{{nHJ-G>D26td$+$;cy`3#IGnMgBM1KPy9=v&7TWaKg7oVqCg=M4=F z{r7^aK=>Ozl-pZMr!&R1uAzfbR8di}0N%boNY`+yybQ*@HGG6QoeM1ZS`XRO%LpS1 zpwr9QcWMrKsuRBR!52%=0GmB z>sn0+4)llgg@<5HkKWn_*m;Y#xOM{#!%3&{xF>yfTc~3CSKJ{BB{dyP#q5V5;T9e` zo9v{%LuKn94WRB^8U|7VkGr`bl=Z<@ddq2JaA$k0_vHT1`iRgbbGd%44>D!=drP`u z_|ycv?YXfcR0MdWmj=CPa>ge+t8ziD;XPmocS`cYiqygqdLyl!w21~+a@>+vyuE~n zXu0j@hPq?6mr}YB;%t-dPS)~Zin3*>j6VNm<0ffGe@6D*ASHGU@2z+RMSHXVqFa4K z&GM=gw;Q|R#)E?FAXAzX?q}@I!f{%PGI11dxtfP4YFO*TD^7dHLpFbcq6vJ%%dMS3 zUtCW(rQPKi?*xFjrbYVOYX5zfL*%6==*fjYi~E^W_yS7zeFL_LM|i9i#IL^0#8}{{ z*bs_A*=OM=ZHOj5nYL}f<~Q*ZCj22Z_QOT^H5VUDCrgLv3+H@5v+jK`3j}Tw z?!WBYsnEtHd1iSYNY~+F%!pwmR!rhCjtG5A=0WQr@3Cm7Ce8)1)x6#~aNdx!t%4n` z8o}Ck9oXUp{%t?Qv5GE}*rDT0e4tU)Uv|kXU5q;u!i!!@a?!hR8bKN#^;KZ1?~z`D zQ?>$4`1_<*URVjT!Y^nIBesZBoO`CK=!N$*6!NqR5;>6^p_m*@p6rldu*zE3lhN@^v0_S>0KX)@)5iGvo5qnx*r;avg6M zTBw&agng-+X|GJfA+ZxB(D7zrP>(y7ob0k+!H^(ZEl!jNtFh#c!p7cSN1+gf0F`dg zomU}w;)*DhkUEQ)R&8LjuWO}O9qq#CJ@VL#QXaP#kdHeV?i$Eqit<|65b|6|B_a)R z*24BED83M-+E9oUazEwPH0+4&Q52%TwElY7Msi8vwA-zUE@FIq=3q`yVM;T!Sx7;X z7mTqpag%;+hB9jR9i13k$LH;mF8*V4>19_~s%fd{Pn1g2LXR`j4(DMO zHm4_FO=wx*i#WH9dm2b4oDJr=Rpmh$mZ*2AY_mwq{NVV#tc}*sP8)W&YEEF&t&LsU zigCl->(ixcXF$4bMm_JC*G9RXXLfc`RodHcvr!h!7jnPtJ$ObBm5i^%DLr*L(s2?q zpVIv}+mm-es*IYGLrl(Do?e}DM=wp(phal3Ao^*iS4;2NVJF61{`!bX*a~ULOxmBG z^9`2IITz`ibN)4Jk5hlB_i90tH7E+)+god3_`S%YH=RSll12`Kq|ACZ-^lRslxY+&kp$u$M=BC@qw^B?@TS#}Hoh43v>Ln`? z583GD8lU)!HiQr~`QvS@Xi+kwS++(bx$1@7)~0fkHQ&igzQu7RZb6lZ8=q0kpK6*4 zcRFL4@MAKE<2z#jrLw%znjuYj&N8-)SV-T^UqAvj}_T6{e1{7W?f-|7}y;zAiE z^*8=cr9V+`XXH)pfhcO7jUn#D$#B(J>qYDVemf`|TqT!@HC0COCYnE+;V&H9*`D2DQ8#`oBihDr z0ZSQ@XPG@Ih5f2naGauEV&?#R>6eqm7cm}+sMq7<$>vTa(Y6i=uX~>SldidEMe*S$ zFeN8$xg`naiCwhQ5=mg{IN(9DEj8x@fAU46+HEtll~};k7v=IOQ^=E=D{kT}$ZkKL zGBGu&sPyjG28|AmZC`ZW@Juf$OSox`{!6QV0)F8ek~kOFok{=f=1GGC&*aM<3g@K8 zQ%nqZG0&}kH_xF-o$5AMFX_#scQSE#{x102E8c9#CEj<9weV7*bk2Y%;|zs`%S{TF z*e{)wEq86uLL@l~Z5w+Wc^@U0m`n;YQI9T3_u{N$ zwj`ckKO;_XPXKXXREVEUWMVX#hu1~biN*ADF7~zxy$Hc+qX^C$w`xwKiHU-mG;l<# z6BPF|&kjA}WeWB6$K%*AQty(EqKhHkukeITBc*9fqdi;GUfF+ET3U0D;-F)U^TNk@ zZSOIn?K7@185vBi5{FsWQd=3M!q3IsF*$LQe?fIBa&3=S!`vdfWfxalI9+MtyA^}` zxA3cMv(3!p*1{*(KF);NPS<~Rtxu}pbp^Tp+5x@`mxBYgkJ7P8MReQgKG7kX@~)>~ z`{1;@WxPQ>U;j=(5WPQeUU*o}5?O|NP+3C{cIf;kwcon1aJq`~GdDbV@N2rf6rbK=w zz0)#sjjj&kqv^*FGIjX9TRiM`tV&K*b#(bLIdy8w*j>dI7LC~J4kxwW zj&eA}r13LFd@FH8Jr(m|FnRHGLZ%jNaHi zvl;xV!|mh6WtyJriOMW7JVPF);@-@Av8^PIH{v=UUx5Ux|L%^aBNU*tYWbzd1t z`gBGu#S(GYEv00`t&W(insd)QrM;&mAKc<+6{{&Upp`}S$m%{QnpP>bz(qDy7bhiW zdygr$Z!qj0T@QuM?tgVa@sP^ojXmy~XXSm`h0O5J__CA;rJ2&;3tl|sp;oz3H{%dtDu1NQ2b}T z>etK@)>T27=zD}B?cZ~L2#S1qqDSW#knQJG-HOidOTj@)euyw5 zbuX3Zf9hp4J5a$|u$J=u>rxQl?1Ku{|3xh^_S}xjKX)dit|w_27RB?r)I}h@FHPTY zSUbIfo&A|c_AwQ^D!fZ9Tt-e7CoMJcUpmJR3%Kh9ekQB zzjfy#^$PRui%;zmC4dcAjFdv4?dFwuwQX8X^Qo`s342I*Ey0LnMjdI+`0@;NgVHV~ zK$jjL<~HwSK|I6K3JswWkZfKCr9T|x)bf@wJ2{=2<{B{qF4D^nraD=5QU-URW1$X; zZW&M{aSej5)`Jx=y6OhqWJ@5?BI?WUS0{WYCxK_Ud3bgWQsGQcLCwlaU7R;OIXi{8 zv;k3h`hl8LDTwkntG^YBWQiFl2E%7~iIE9-#n(6PX2lshGR&o7Ezg(H?$=cErWwXWP}m&`rl0!?p= zpZsiCFj&HbB)PblP{M@lbC=Dm|F%R)-kUm~|0v%w6nk1tMWqLt!v>&vn==;UCcvQ{ zpi(PZ_Z@Dg-0y+Fv5@AnwnF-@JENLS#bXeqd;lvuvGJw~@i6czy7tE||NlK-H@U00pPAMcyvUfm-7f)W|5o z`7QbO+HkOy+m-C((}*?zw_f!g9JCkvi`ORQnHoX9aLKZCWg6DM9h6YY8S!q0vY{Pz zpbCOR(g4Ur)2FJ&chcv*rV$p5IBOy5T&2ML;BkM8{-L9PHgB>j~aH} zl>w*O!itsN24%N2?sVx6Fnd!ysACzj2zG)7;k*H_0z;nh%IdGr8ROG(;>E+)tw2+) zTpJN3M9gkD$%R+n!s})N=52`tU{c%pWecI^LrGEjntWUt@tLb&sUp4kMzU)b^r>?L zXvMvF=v`vIImgdTC5Ghu>{?7lLVsnl@51O~q`Z6hTmHR}<>|H#uWMVrjqJ0L=yvR| z+6z5IS`;J}dk{KnrAw#08(NPmbO(y5PY_2(5Gr4=-n!^^;)*Q$D?tUdOz5%K0yV^<$!MZz*9}viZ@RNPE5%j2V$sR2sY;pD z_{YI(^ow5t<`DELqfP6pp$s(Ao1s9f=}#L`_GY@P7sbQ}BIZz_Tt3QPrQqE`JD78|ci` z!Xan`;MFH-bs>s~R8L&}u?-mrNtXRFM{Nr}hKw zVl5>(TR|p?1R<{h@uYW)l4xCV?C`2+)RX47%s-Bi;7T}g+N#6~2>0%vd{OPPnO#s^ zdD&@b05vH*KTGdUUiMh!b?FY7D(t^Y+S%Mro?X^Na@4L9ww>ZL8iO^ z`qRm!P0v#)P+nQ>v++-7?%$r<8>05?idmU=wmG8YMN3*>4JL1F5_#|{l>?c=?C4P_ z1`zM*1{wFhO@xe$P}@Q7w0KI7IW^9cGNf{2lKDX-RHK`wmG+3ZpYpAg&$-UnUUuv2 zV_kvRxzSMFWQ+VGN>TnEhj_*D4>*@&!@8V&ueEL*LA^eI(|a_&_XU%B+zAR25)w9v zc@T3f8!nz`l(vT69d`Wy> zo);?lBjtsuBd~*$??jqv&*R1MkQ>H-X{OhRAh{@s7Rc)i_Pnb&bbX4W4-hAIKoc)7 z0xK(Twz`u|p=$zZM?d-{EI^?&jv&#b^%FvQ9)Y)sOQ+C-4yA5#!$()?T;=6QJg9>6 zY&jETg4i;$G~HF-ChGI3 ze>Bge#372AxJ97pu?KHiXsD>QG2>49X}guCbg{FyHsA1YmkY173*JbITOjwCjoA}t z>8SXaw6_notiB-b0xwUu1(CJ+5c0y*PhZMPqK_Rf(w zRbKw=I<~7B3<~Ncb>ePJ9^6^+PP4t)KAsJ9Ox6$e#2qL3)+v{Ty zVge?;(rLt4FXazc0Un`BKzF_~`=N*HLX<84(jjG;%Tmra(ux=vURtDy&kj_`)KIV% zVVusLX^}t+D6FE>6zJClbSHNkQxJJ^vClO;?&TUJ)kMPQ4hQ$V_n8)TPdPUA-kjOg z`+Uo3BVxStJ4R^9;PBA|wI_1Vj5nxR?_!!N#S{vx2M01)cbz~96G&eznSyhqU*GKF zgbJFmByRa2@mz}%9p{Gd;6rPWY0XMV<6^NrGhGg?q;@|R`3kcbF^|-s>=VRhU_@U6 z>ayB;HxiB)NF>IT#dBa3^|dhzrcTNc2`lToZ$Gqgy;z4%Xjcv3*d6V0coCLIcx_%DqRBac&*ZVwmi2tA?O}*7u7|NXGe;>}!^qIr!;|@B;6j?T zvZB!SsD5BomuYhIV?>^rGMU^3ORN&c;~Sm2&)5;f$Gmla;hsAk+jo=5&&G-J7TSMx z-JCX_bhZL3s2N5=a{Sn1>7;5M#QS9^ki`kjQ&3T6$`!&_2vgvL)G*>1&+glvWEf1oJ!-+p&rU?*1i(KqO26`iOdKUa3#wt~y!3$$qpK#;3K z^OCL@1(&9n@M&Mp!`9iVLu1D$X3S+M=NYxN1!CM9xvSVe{M>&tPP^xUI`)(8$Fc*5 zwp!Cgntj?dd@Q5f1tmnQq|Tm~)$OS|R`xY^{N&QQQw3ilj{K4QOe4@2zAU0lq(s9aU)q)-94=Jr z8ll=_?ZlHq8_i;dDg`vWm~dXxkI$^=!7!_2Qz zehIwEcCW?<6u){vtJnZ^|8obwf90_QbXhkjXqoZ!+Xf-p=UM!@>YotQCNn#GG1$G; z;va`$2^3qm#)}r|*P_hZlYKY)imi3YGUT_?j1jdN+*URKfcx>qnglj27MCl*WrHm> zF7~5}nzkWGVA-x}^fU+XV6osTO~GrMim4Fj2iF!hNB|V0nn7~OW6waE+bQqW9K18@ zSB?)-{Zj*lhrV(c$`>wO&q-+?3Q-b`ccgui9gK_fB^Ttol5eLGrLCe8f&gYZ0f^`X zM>y>x5)Bn;Bl;)iu;xPJ3EOb%GJXYAp;D@gi>LjWngunerBL4ALd zGwmya1r&8#EguYX4Mx0yDEJ)<_~)wmoHO3<*~70Wc|%xv-;51+6)^+i82SxdvG09Q z>4CM?g;-aXgO2$(P;T&3TLld9t6p1B7JpxSp%S7l1a)h$dbhHPHkZ;_?4Lcsy_dE+ zoTP9}vsDq=9FOoebk_SQU*de~naZ~tNi<^O#H7Y4mJq5OqJzH)_cnqyIkbZmDU%PL zLGr-3^?&j}-Yg_%OP>P#_14D=A#vWGvd~Ncc`lqYLcDkp`fdeDFQNXb=-SnNHoYy6L*48yUaEhlGpC0 zD-VpWT45ZP?F2jlD3m{|MT^B(6 zHX^7}?i#hBPuY_o)i*z3WvOI%&offVLc@aK^-v_~HXdP`!X;sjI&4rx=nh<+GD^Fg zDO3YajwT3H9W-KkT=KJqcP6{dqC60F2`kn|kQ`pqaVy)Je_j&-NW^l?0hHAEl?_r| zB|J{7@L*1{C9V^n6fxDBOf@&zfj8DFBqW4Qet6%pG$k~ci+;lABz|`aoLv!=iB;o1 zeANOY4-ewWKHYn@B%OJNH@i$b3p^2F9L|a%NMpRaY4)K!IHgx5~Jscwc@mJRqju#_gN&3ZTD8*V$?n@yMLv*LK3^ zu%#ADnp?@EcC3nh@|yGSm&^_YFIiNVuPVG`tRyAYJ0UBb4~&jG_f_+9^6(Yv zdRA;=&o~)M9b`6t)i244WPcI6*Tz7G!@iwDcXNYQ`F-U<+agZkHvi6D#G? zak?lWlP`}tFUWt;o`AV8fb_L=&qr`Fn=Zlxr7vIZ=xIK`?pHP0h8Gy#_8C_W{=OwA zPVsuJnBMT>odWCE`+LcK?In(=uHEsPlEI2OB=x4OOhwvWiz+8gR5lg}dnB0bCc_f? zEQqu`BQ^D|`ymsNQQTuKGPJ*@M8YS@bJX` zo#AnUcyz-Lk%oEH%T?qvSm*+3d&g zNHX&U_|V*i$?lbnswpz=9tey%;9z)OY_-SiTKWPQjJ@*@uyVV0XHJaLE>rw2pq6@I z)3OGm6HFB3WLCqHly46*DT_rpqV2ZYS>7z4cghZ+mfG;TmRrH^v|_tST)ms}^{2a< z?v*H{Rgucz(V(b8a^+73n;ot)8i!JpkCFGB4EbJa=y(RZj41mE`H(h!!={HW&VsoS z-*Y@9uY2s>nOk>(aVWT?I$8IIvXYeTk$zf%E5rVAcbGh5PQ^Eu*^y5pSsmH$tnO`? zw52z%|2yuv(?4Pyvrq`3deIdotdQ&6cgKnGx$pc33$421K5J?udg{423? z86ul}kn!?vke`lCoH)xH?UpUBNk@l%rxKb>vnKRN?7m6;Qz~DKDXFH3qSB>2N%E+G zG+$IkVsGfF)VUARUt*1RKKUB0s<~XzFCys7X@e4*iz` zmVi(U*mM!7FuN^_NBcfMYayQzg5wD9Z?;pt?*JZ`R z`Q2ns=bvQnIK7RG8vW+ay5$o515_kB7m-wjFkCP>JS#)_Uker>LLt$1GZ)g?2x5=ECg15n#9 z9!l)wls~B2ex3L^{PT>}Ow%LzL@K{#RW&_pG^X%E$AL`4p(@O30^WCXfV1^qk0tla z1)ygbZ-tgcuBX$+V=?MJa0|22eB5a%uO~LxN9{Nsw|#M5O*h6(rAOKFdG8o=M;WOm zjV&N+bHP`L4s8NLXw6h^M?;m21yv|M)N(Gw>-(KI;62rMSjt>SR zFaD8xm7)^ENTSRO|3L=-nRo5of1e0avIvT#|D;#3La?8s_TMc0Z+aC08(Lo>7e$;tD^b6%EBi-a*x4ZjODa6rrjlPP1{Wlz9G;lF| zk0j0i+;g1_%(<0S@E8Aa^zZ*6wJ#qUPJjVJa7)J1~FJ>xX~b;qy@ zI@I6>mY3^o2LAp{a4IaxlNA>^34UGX$K%~VZs13v{jZ_ItijyGlX)5ay;`AF3%rmn z=4OWT3r0ls<@9 zCIPjVgZ43?bbG-C$pY}RRW8upIUVxxLOaB4R)|6<8xV$#WT*w%>4h)+evqdRD;oqq zlivLge3C1p!Acgultdg{wQ}^_8)F0q?fUMCZ-NW0C3I|kudN_`F%J^Atts71FxU-n zFwa{ANNzDSk?*!3lM9!G;(C3t3geEasKI}zd0e))t(ZEGZ>jzJxaKzcx>by9@p+iV@ zJ;qIu6O%C*65+lK?~)?3C~@Rw+JcSNjo1)SP?kiH)ze|}Tn%s1-5 z^iNw}A4s0X{43pW0h_?;1WK@$E8lvul(P5Kf&Th84^qMJbB_>u1|oc1D(jY32DTy|M!91_<$3TT8|5N_Ff*Upx}jE0aN_ri2uN7-R1>T^&R0 z9&bN6bN?xGLKkphN{g7E-iMS5(TVS0PrJmMf^_8yWIsSLXG}Wy_jyetUkp2NN9NSW z9@v5d@R3erS8HeQNdWiHj02*;Dq_VjDB5w7kt{#pC@gPCyI_&R%7VKe6hYx{^!i5o4I96ZzJcn0M z=>A?AN%3<8O#A&%s9z?A9buvaZ>81OxrJL0`)=&!^5i3i-t5?S zkZfzK4%z`0?LBN6ebb9oNGE!*EY=r~5~Ikyb6HyDAnx~B)cG8?dajWsg%d)teutNU zx#G*wVmjd>6O?TK4GMl%Uml7vn~*=Dx|zm!LP1jaWaeJziIVS);{;|}JHzoz_Y=|4 zaV|X_nct-l)IdKFx{?EEG@Y)0SgZ;2@ln~)YF#3tW_bnlHK+QIX>AxcbKj%bBs7Eb zMNwf2d<|!9%*}jYl>InOV%ZMoR2ylRg-1fcJwstHx^FqiJp9rfFPU)8k1~xd!N*xO zF}2Jj-{t2%1N-d(rGu}~+H$trbf=ZXHG~K)op79!>$j6x zdrL(laSCPGt4I`_ojQ9nuihICQR{`o!5ERsMue8Lx|ur}MJB^?zV$6i(gj8zppdL(DT9jP$<5gL^v4?P zCuAVlX1KMCR)4o{Qp3G$+8RqPiN5%gMNq(;!DWx?`HJHh#bPKVCQ*W{3hc z?gjiT*8Gm@P^=K^wY=&02Oj$KE0Ekl1IZYCo_+drpu6LdH-U?Jk|w+U=br1N;N?-h zW$<@+y2l-ABpD0^7nlEg&#Dj}=6ybQ`HwmO=NpMN;bL)~n4hqL-}jt{^w<6gN`5Vj z-Fb^`fs3J7rGbO}bLM~i^dbai^M4-A|EovSLFx|WavT^dL@XIYkT%1K$bm%gJ+QL8 znxxs@R5lqt<)EUf+6!ocZ=Pg2IU4y!Bul(sd*3^An#oY{QBS8FgG`v!-KWj$Smv+h z=mPC-v*uX5nbtotH*MjZrk|N~tt*xh&*9{2&~E$Kymv~16(EZkFuHx?x}2^%>MX{E zZWO1uZ{>WW=2o*g$ji^@^g+2h0~}>eh^MY+-NLCa(o*b1FStuk9-<+H2=qkC(9s{u ztmA_FK!sC;Yw&>Ej!Oh|`Vhz*dV-kXKAYgyQV2eXWNU60$7$bsf=7fjS@#T5Hi9Zs)Qp*E{RnL3gIax~I6;j2UN=;2YS9t8&{( zw3NtbFE#}{Uolcd))QHs>CXhM8R&4nwa0asTJ@FWB2ASdKsRUtal9~X8zTN51dde% zh@}p|S+PStAl$aX)DCq{OT_!LA-{=ts-*Q4;t5j7i~a<#yWM6Ov@2z_=rkx`yg{v3 z?GNJPe=kIAM5Y0och8+kFip57JuQuCEWtdiKC{>R`}f-rd$O%1O^EOsYbO9s?E@~< zzUl~gD}$F0g_>q^nVN0$+9vaSc&fJds#@}@u)b4Jzd023r6yXz|8&jeRlNa7NX$Da zuD+~lCg)V=D!+!UN_kf_kL*a^@r>)R!MH)|tb~;)a8_-D{q`XeDXWkT=URfiR6Uh-}O`D05LDEqg zSPdN=olG6{!^;>Y^auS*YN}&-kj#{?!uorO9&f2Z)9E_Ti<n++tB2?yN(3&WanEiA%#r)kjWB&9X3FuCuWhz zG_sH0+Y+fOE;fES^IBxBcg@mqS+YyG3pac7Yw{k&JWdc(!U3wZuL4-x)UP^CTdt;* zsiICzs1I=VuQS+_R95Ac9)B|}gU+Uez%pywfQpIQFuwY6mdLK9QsTn~=r~OXr1wjG z+~&6g8nYF36>OdE7RW)9Agc3Y(@wR)o0P}|3!<6VoGLe_2q2iPWqp0seon9Jwpz<| zT;);HZ_0YYl2H!mwh(yPgE-u-sK;?#M*OPP#Dw?a?=6^%uS`vVX>Ie&!ai4*_AKje z?95$6MaKeQ+=zjE$>NdOn<`=sfrY}PVbPB!=e!)VZDc|NOOsHl3@?&o%iDk@KMmB| zVw+AbX}hZN4f=4gDF4}|O`Ez=0ePHazd7o)VPIg`xK5h`735y?g*?R4txri~0=Jyb zsSl&=MgMoR;cE6!R$BNldJcO@)e$t~`_p$u`NlUg6i5XO0|&=kAd2iuJ>s&M89?9- zLzaoEJVyPz#W*T7`f0o)vGk!?C7SE)d`rGnVzxJul&4j2(1IOGc4ruh#~dXmiHr3M zEW*ry_5AyEIW@ZIOT#inV?#%Y(j?a}S&Z~68md0&r|)M6grXHcvE{QeUFXvk$$ZqK z^J6a>Tw)ibFj@U+6|Gf4VQY3zb{faao+#PJ?5q+GG$Yg)8YMW8!=wU^|2PdNeII&F zn%lZp3U|I(z1wzo`yFwJV5AzLU2i-}erdA*@uVXxuD2SWXD?@7)}g7TfKiPwT#2{F zg#d2Om{U9UoVF9!hLB*|EI6R%!k)n|aNht)h|IZ6!ruXqm{8Czjl;WXxE=HH>e|GG z@a_1f{6H4s>W>#pEs0_{uir>Ob=;OiSFODs?y_j2@7kon7N&bYNxn7#&ty~2hew`I z5_V1@0escW$3e~Hm|r!h)KNTg;kc$(ojLNx(t`}<0b-eIFIWu2b`959?sh`?`|E9F%di_vQ4}HOHm2MFpBKAanI#EOFwJraebjqLWa&rvZkG zmw0J{oaIb-<5pnf`WHv)MCgR|2c|#i&3x$JX|2#Aw%P{1+@qwI8?iz6k?}Bryh~!> zZN8t!!p*%diy6mOK-Iz&G4Tcuvu=E{j?!6ddY7483!$R&_Qm+g7G5%FmPSr>Okqe| zZ@BNg$Bi`?$d1afOii_GmiUx^*tJ5G^4`l+O#vsEOXBE%GYAHzW18h;B#I<%3ZZZZ&Q zlVZK(6sEB}V?Fzsl#O@4%JSpaGtOZaey7=pmIbg}l7;*#+C7nSMd+zlm@Mt&E*k1- z*?&@OcZR*48Bg18Q~x)B)rJ7J_2X5V|NQ0#gx3G_k^h;ozp@nK|5-5q^S=BavR%Z+ zkBfhKh@6S6zzMr=z$&;e{Er%gkSS6Sn4S`uf(p@oe8<%pZI8@&BqsP-f(Xt+$}#z7 zXWE7SKQ0fvV7Lg3Kt#pH64D^=^0hzeIYP2fVYsiK04zNad)g0y@f{GbDO6`M0(lnO z*??bJEetCe{?v>~!U2iUe5(=0)dt2Kla^KwjRQ*{ocP7`LhoOD=v<{c&mRN-0;+81 z4s!Y+Q?aZql)!*MSW)UrJp40aerQx=5@SD4(&&-9tiqcSpKdAy3^H zeK2ffQ;&OCi}vBi)XY6%6%j#rtuF4S^I>|)^64u#6R110gq(jardvsO04=YfW&h- z$qpcmPw+6EDH46IA1{PHwPs@j~^5~c}_Rs zj5B}bz`I2{`2dDD#y^r2A&|LQ-<--@vmAT6J42!;C63Uny|sfiOGeC=rOd(C4Rz1>?`%bCQwc*QKRvD-@yJ zcqdy?Ps|KdkTKlQzN4;AS=p6Ty6o;gw=OX5>Lef&@uz}b>Sa8P81I+zJ28`2yk7|9 zz=ymy^S91q(SH}fIoAD}3z`?Y5R|VkCnD6KSlPpCLqcnb3zVty{V3uLj9yJRuT^*O z>hH&nh0>k!>JSmKu7n;x5^_ra01~dx34bC+{<+;R2)3kB6!SBE`h6uc1WqC5O#I(^rZXatSj@2ecSZT3 zknetVz5M@v25yH-n<0-4w`Rlq?oFb@QB`FX6%9i}r1{&nRQdXKAJPJa{BNd0@WfB_ z^N**)PWY_WqQ(Os$*Hw+^*sk~W$RLaYcD7T7eX~2srjgq+GjPX5H%aB81cBae5#D> zuVzxq(s|(0r+DI1Yj@(76kqzRzWFVkQ|_<#GcQpGgP>no3?RWZ02i4FEy*J1_4|nG z(xz!^yyt{C&*5KMn7gw%hliBKwtybnS>f~m93oXZqil5Lv>jA(gy}$Xg`>h6)S6XO zyo}LV;f^}1R+lLR%lmqZ81lu0A8?nqC@dFsc*^Atoqu`oQf;$waF_+#x2h7F6qN)$ zPqs1-;*H}C00_0DuqwITc7mAP@RH3}X{0QN2ub6!=J_5Y{qha-F9FHUl4`Is&jhBN z#)JAU`?mxgdzM3pW5+u)BV;14TrwxSmah9M+|gAap-&Uule`0lMdwZ#3ZwA2YwCQy z%8=wRwB-2gxjLrj3Bn!8Nv%W?mPKI{k^z(Pro44!Q(5Lvpm>SYkB&~ay`aM*#k}kO zqk$o`bwa(&*0hsyVIwx-p{GTjj6EG$Lyt|E#+2%QrjsfpuQke~M_5on_m8QXn6%~@ zHr$H6KBZ_7$jOxmhI!mpeOGug7;(eWVP_uw(R~6qPWk>NQ3hRuEZ0VZa}gFq(I0E? zFK1-1^)|*_jqF<=O?J1>kkeN7X(OlUi@y3-#brbG6fqZILj!m3ieg*fX%}t5? zMV-TA96W|M%x|Se-#X`sH6PjC0Dr~+9Y*fXONToSZ-K^>2~csLAk<_#1St#lsnY=R zR=>judG%f0w$8-8KkmDJvHOmHAyTtwKX-bx;fV^}!TMS{`=KPQ^y^b9#3SMfS7dw5 zFxq@3pn3k`J}!L|Rqi;X&*u`6WcB!o_1nY3(b48A?{NhpDN>tNt~WCz9at(G$5=bk zEl?qO(FfvdIhNJW`ua&oH$T4|K`o7O+}xkv$eY!>+3#=U?`CAzK7h5!hVm-6VLfrN zpC^^2)Ss;?_YKAu4aNQ5ilFtL0M$9Im3KsP-Oyhgvl|310k=UNA)-ZK@r<=df<=*8 zU`#*gW|;Ivrbp2ep#C{Ul{E(>gI-|yPf7cPC`aG2;Kbf`AQWOy<+X1gEpj2WU&E!+ zt+XzwxQw#$s`iL(g?78wRJyrUnzcMMIy9k>W`=uduGTZJ#a6f}U}`~2PnPYbt=KMv zNC7xrGtJxcNKBRU2k2vzxgWF`s&+4ef#Y;{ zHq^8VOagF%citc28-7fLz#Q2S+cVS266Oi-C&1>8T!v<~T|Zi_d}muv)|pC)jk`4X zW8z0zaX(wRN3SQ-(xqQIPCgA>bppgplsi>*tSKc(3^bTjH7qS=|5S#2!qf9Z7*e~? zvedP%R!oT!#l>+}qr%Bn>$aSqwZW_`<|*~+Thq~%?h>)04_?flaR%}v`Xyj)AQ7QezccAAN4{om%Tk*cv-CiKu#AdsW zdJu=R-3KAZvgyfcvyN4$WElahy-*vBlau{c0Xp>As=4`?y~?_ZEn-H^TnbwWyfGDZLVlF&D!MNBd{Pft?(hOiS;h@ zC}a|dRL$)zBkJu;yZ5OVf3uLz^QF!-VTR#+SJcar>i7F_&?qV=y4<*yMgi)FcGi~Hd~ z-a2p|_cA5+S^%g6-`A;GI3g%yVoy*Ym{*AK??mxEu$wY4*;`++D*Ni-tXbo>8nVnJ zyEyodE+FU-^M~Vchc|&v!)SjGut2rX>YL?`mVOO4w}8T(tuRdwLsIXoc8Iu~KZ30% zcpQoD)K~$o5$|fHi<0xctm_(yw=BNb`LG-dvgte~^C<45aQH_1q`y>Do*p(Zh|)u^ zHdrr8B5810)9U7 z&#pai{osnpPm7kOb_y>eUHKV5?Z%Lnq;*OAP74eI(V)bC(0NhjS}fr}_UeX*^5FOM zN12}6TE$k=MV?irx&W^lsqF*+{Oa`fW_GK7m+p*f=T0H+J>?{A_H7D!H)Fi;M{F}9f8LuUjn%hRVqrByCYU%(knC_G1P%lxR*-F!K z61#RM-|3kOGMomEweakY`Q!pa_3)z!egK9 zQQv3;l(c!EO7rV^Y6OI~!5VJV1{||c5v4N-00KH3=q*~`GHB(f1h#Pv20xQw1EnWr zVJey0ma;PfjN=K@DN%OexFz5M*X|>7EoeAEVKJ1iRkkHC0?XJM(VQ^#eGW1+Ed};w zm5OZK&`2tz@(K9L%19t4!Eh%;(yILJ59D&JEWwur%LnJ`5C|7KeU;xi)TP-a5YL7w zWs{os(!5y+M}^?ANd*DF#*JrEX2Ok#{f3OWIxxxl$;lAx97S#%7e#@@*lFX)fbMsZ z{~Tkf?-9XcM8UcgeGQa~H|fOt$CE2 zYXT#mCWXjX6pLx_O=P0T`E%kYL-z<*_6DX!M+{o)tId{OIV8oIGvY`iDC6$yBxq%kvaD@%$5^HN)x;Ey)8P zOgd5n5E&3xfYb$->K@tVYtet&Qzal9RvV(mpY{jH56R5cXo6yec_}mFV)Z;oKXxSC z#D2u~o1TFK_9t{c)O3YRnIl_}w^>YdcsyvUpU-GcKSAjD`AH|90{J!j z94&$yv=_fNMvRg69*IcC%}!y{L6gLa)i1mmmZA#bkrpDhXUmI|T_koI7o+Y7T@3t; z8DuLkKBi$(en4(->E}kem9}?Y^9cHkdN7Mm(9D8*|FgT>E-m`ztrGAsa_vX;*$z!={d2_7i z_0FXEY1>}IGOp?H_F=PIsdpBKZW9Z$hHoqQn0_oa(%rJmx?rQlX?DvuJQB-ENGa)# zpLsbhjz2-%-|j@i!osA8^a`p|MdPmNg58AaIi{3_47Gs1ro*Bx4gcJuiucN zNR|AcwiT)j1d23{FpK6|x=@a=f(t>{G?{45q)M;^Sye5V-)=;pT{2UckIPcmir^i{ zH*;^~B4SxPYtJOXm~vfq-$!2cvkWr*EjH-FoKGNmVytCdffn<>jJ+d8k~%jUhNoCx zOlkQ;Py*S%wret+;kY`GqO;1|@q* zEv4c6hY-!oq^*2TJ0p)t$$7bPAJ~sblK75y!YV> zkp&kP*L>-T1x{lH$g(N}TcMAhX@eFn=U%U`0S*#9|95bspR2FF>{Hkv2QU;?NHc+U z1^UmvpfvSY(&l~r8!m7#)M~C#FF2ugyCuDJA}gZslH9SysOjP zzE0LZvOK}XH78Txz|7QbrRBVqnm(th8qcPjy#ur>$U%t`NsaW%U(HazF6?peAC}W;^(i% z8h`cK7VzZN){UydjkEMN%$%Okn!jQ`^OAa*Ul;AQ{s-S*y!c)Re3S~(&2PK}Tn;yJ z|LdU339jcZJ4~9(#Mqglm4g)dP<1&hQYn*X?m=+ zsR#o1;R}hFVT@%Qm<8N5v+>y*OCyXS7Zr{d4`o@jg#vzH-rbPxmjJu~fJOV7jJ-SN k$jJtW$jF&TlMH{?XY7mOe135MDh43%boFyt=akR{0CvvtV*mgE literal 0 HcmV?d00001 diff --git a/docs/img/pycharm-with-pyspark3.png b/docs/img/pycharm-with-pyspark3.png new file mode 100644 index 0000000000000000000000000000000000000000..7a4113dd4e658d8c99e254c1d9df71f3b4a5122f GIT binary patch literal 15981 zcmdVBbyyuu_brGV+#x`4x8N?pCAho0ySo!45L|;NxVvkx;0X@F-Q9JX_x+OZH#2vh z`R_i@C5N2us@_%ARlBSAT6+Jp@s&Ak)MH6?7gj7_Pp$HA#S3FNLId>k-J6PS# zp6_0qce0wnV$Gd<*JU(t0_YSg$XR}dp=HPZ=z2mv6+|%zd|$QUkZ+{s;Xy5EqV=N6 zzYO^^q{Egk?qDVRMLqZQR*3;j0sT7-FAXQc8ixtr#wUtqu$*fWw;r+Pb+}`}R7xRu z(c5lNq3qdDP@xP9uFSot_&uo%B$)iV1Gyzw?A!3nxB*#_^Vl}D2F$Y%(vUt_Zzm{x z!tVp94MRgiS*ujA=_JoToXykz$&W>Y4-H#mN5es-HU&`-b)@)Tb2kjEy&2zI}U5J&XWlVT@+VXZ} zzeLGLE$U|+kgZ5(;4s|mm_rX8O|(&a+Ay3ujrE2OpD<-&;Pa0XV}T$xL!IvKnlbtw z%p$R+Lo9=Ipb_?%_=D-!7c`g;ZI5ihQ+H>J)mQm^Yfwn;r*X9X;K2xSz`BH38*V+o za7V#@hG$o&S%3?Qc~yEss#6Z8*T`O@L!$N;oLEJK>DL1eivku z07C%7h84t$4G=S+(SZ6S2sek=5DZ_1jSt4YO3j7*rQ6bhPzPeROV$CK7n(1Js13Sj zmF#UWF)U0R$-6x8J)!!J@Hn3qv<0X;{c3y>OLS2SU=}|pJYl7oP z(GNcP&Ub3<0fJJ1r{<9&GW~4y)ko2=NL1yWRFGEmyAMgZB??GIYARIw93?R9A@@=J zUyCiE7jTcFk%b3xns@Opoi4pxpjx1pLXHJjKHPi_`?bPs62;Mvz7c9@yv}TyDwawr zgR_mgO}o4DoS9H>nov4epry;k+CA1TBm42d;Uh9 zTJ0SN&9aiuH&F!^H6t2&g?9??lzDPpL@XuJa%{dHmX0XuRp=D^NZoauhr_G($x+Bq zNSn4xWe#B`rM$uK3t5+6Q-r61p~j~6Re3WVyDz<^G^!ym9cjK75 zQ!VM3mJJszd994B(9LUR^5!@5i(6=Z*LBq`Uq;}}j||fdlnot5bI)X0W*K(;ek_2p zSu-K2DynNTgwy8HhNpMecGEU2(i2-#n^uF?X}5B>lSBH$=If+ucl;m7;K;Q2TKLO6T0G|jH(dM< zUwK9ywGO6^FGuJ5G}jkbd3TLA;}2_wxA+tK5zQ;i_1yZJ?wcwc_>S)9N9XBhkD7iH z9nT!&9J?Mfnx~I#k5^tZp_ zuZ^x+zMZ?5@R7TZx^KB-xUjttxr4gLd~m%}el&iheawb>3(A9iX($ zj((2$j(lge>nVNu(m!QTDeK7B(Ty=sD56I$vKxK%{Nt6C6ers63N9{o^fhyuiJiOC zK1v~QBgm5*$8W?R#CJM<=Tgmxd*e1#k!V=K=niwkKAztAne)fZYRWl`5}%T^vVMVQ zo~3FNOQ%%U@AdQ9U>tX^v^1HE+u{%aln!QHxB)m8WNBFVQ!= z!H)2DT6AjIL6jCI56g>n>8PVf2}S%cYid?QWYX@L<(cG}jn&7%6IU6}IMmrOXLQ!m z2MGlZ1(TAK;*e6uIIZ7eWqHCfQ}Z;OyG1knTm5YXwa%|j76;SOck3lls+|=Ly1Fg* zJztvG%Rlmbq(yk}gMG~Hkf@eWa`7*JSZEjc>i_0>u71pLY0Pyc(z8!p+D3Xvs-t43 zqO+yjPUJ51>~k?*D+jmzLM=|KX5HN~RwX@q{=0mrd|&My8{38cIkmQlCu^_Xe9J}o{(8S~Gy9kTg?rgA$n)!(`9`*TW?mMZUC#~X(|VXD%LMbnt!!pbIHZr3Ew`nxX|^#uoVv+~dTmj{CkI~4_Z z44hhaEz34XSxdgx$+P7bjZb#^Ej=C1N5_k;OE*Txd`;Odojc$s-NRvNC@cI!-fY(% zTNon=>xDz|!&#OBrT3$cjs1i>S*-1T?X%h9eEy3AN8V4Zmp7G99~plz4nbc+Tj?)! z^76f9^0=68Q-7-Ls~iaZ^qhMhqia91{P-AlLo^$<5%#bmwH)24{i3(=w1yH*v>^Un zJj-|Fo{K~4QF23h)O>qqjL}gax*yB8&zJti?795P`^OE{hUUhGQFI6^H+)VkRAB+c z=`j-v3KenfcUiFTi-PSv$-shVa7wsGs;n=~RDC!B!M0$%WgueMUrFH{LTr6_0TIm9 z3wT|~$mds#aRoec@>`@NljPfzx3}H|U=HPAJ_%7VF-L?%FKVCPJfH?NWE67~g5gYv zhQ}<3PG$PG`J8g5eK~t^>F=~=QVb-!ZGEPiQNl>|Kcb=o0$d|g4M{UuS+IA&F)SDa zI64?4a0Cwg0}HbR1O0Oh21W^dgMmTDhl0TX-{`=vLJq{AS_rBf$UnzmVXqeoDT_!- z0^iEUPNt@I&KCAAL@a!wKw!;hsiNVcAuGdUY;Vh8Xku?<%HUz^@M;3a>%jvY+M2o; z5_{O%*g5lf@R9y=1rKoiTFgjF{LdvW)_kNIvI@i^_D-h6>wb8~YuGO;kSu+Rfn&^vqDxfpuT+c}f{ z+sL1GL`|KIoh%((EbZ-xU)wb_vUhdiBPD(9=B<8W-gXy{~uZ!_n)5rZRvSWbR;UdF$!!4FS4;tRMc(>F;`Ar%ksqfD68H)8kw z0tSPIgWP12Ado03D)>g=Thv$#)-X~tKGZA0v&ZbUD2Gq*yTz*!q=iL40yn88V)KGF zGg2r9o@TWlrhc+cPA(;cZI$n678KI@K9%lQG^~n1h&NP+u;hXe|9)u10@lg%zXbRHy+9$O3gz!f-W%{w35iz7xFY|#o?OuJ zJO3YJ@>nGhwVYB3w*}(g4*K98{&W>9D20~8C0V?k!~C~DpmWH-C;z)Uw-gnGs3whV zI5!_`wb3j4WXuf=MUvd1#!#BuRq=vctE*((9GIUS=YxUUg$srkNq7_@%6QC;AWeL@ zSV1GWO?2)>ZE87m>CAdfScMD?gHtAGP$HA!I6E{taiDXmAQsiz9fyN0#L4Cza`QT} z%Ao(OgcBmTmz4OO2lV6?TajT2FwpLiiGTz<&|mQkDfdX$MhZuAg6zU(Rq6Ux ztl$`Ftot7R;LGGhhO1)R7w0d63z=lIp=|;3UeoY61F+~0WMDCagOS>UzX(xUC`@^Q z0mj2^DzAqSCX#bPl-{7NW?>M)6aIH8A;1SrGmp|myj7LDJr-iMKtm>nao@oy35Moq zTjydRicV?a^02^{TZ^X&>pqa%9zyE07>b7@1=cC$KP`TDKue)kmHISeL==f+m@Eu& zM$+ZY$*-enwFzn}!{C0uO%9Gu{Ld#G5^x$WLDRUu3mu1%!xfQ1;2i(QHF^WV`Xupl zY9#(+LLB%5P9Z~dESg*LkMphz0-lMiph^D6wFEao`2IV+<#S}>(LaKq5H@dC{8M}% zuJ<-6q>~d{Tn~owQ}o__gvWHEC8a#0lFv$P^StQKD-*@4@qh6fMRpC+j6kPA*W^1O z{dc5+iR7r&Tz6Nq`lgS!XHJi|j)MtQF>h$qWb{|F_`FSa#xl<*4KBwI<=lyv0 z9xaC$RB}Ha?=JF_@9DotCB5?;GDgqj_tkp7-9Yi}3~tYC;&%sn}qWx*pBw7j}$A7ubPlf&{ThtUVzD%NDmIT+TqF zkip}nRw#~otLb+Igm1{)4nb1ntF<*XpoV06Z8a8F)~n#9#NaSq5EkKjtJ_gLi|NoV zOkTwJUN$PF2(~ZvZ{(xIqy3XOv-Tj+&|E@3(T&M*?!rJw|$b*y7bj}!w44?bG zLI{-Ar<;{*GSnVCPV+H*`PmxBjSqXXmD#|!KiuD(F#11VWbDq){%AxI5pv%0qZMx<9B)8A_sk zIK1+z^1Lw9Y;wrecaqrzil{hOd}_C`N*FE&#`zy)oOZ_O?=E&y(pb%h=o=Tm9-Y(1 z>PryAV8Ci;to8>EfR^Kb+jr`gsFjzT8K^}DoRn)fCcjfH`B9|!%|&ZO@Q_fCC%8v| zvF$V{nmr3X97OiE>^ms!C}Ty6^bJZ-)Cv=qHMzI%mtpV+ex;LUKjIhJZ=j z`WC}Ku|oC)6AT-&euu@By5c;}=QK!>0t=EQ2iH>y-^FFO0-1&pr}6u?L=WWPzZ2{d zH2HvMq&NmVE})zD&p+im@3!$YTU;zf*EhdLGN3=EfiYrag?V6+a!s>)U)vnf>@Byq z`i-rpG21S%E!{w#5GeBtND>#K0dryy14qbln)=odg8JoQ<%KOB@XG`FULgqsw*cOw zh5E^zZ+8P8Os9f+Vgz>)_57#lM1HY5IS1da1i0KB0M94b&6|UNP-EF1R$Myz9 zq;13}t*(`nvdP{fBF$n4i<9}gUsIw%qw6yHFhsUBT`WSQ8WV6(w zs#dO*e=;zUFKum)kbH4CM5D##q4{$R+8KEr{Gnrng-sMJS^qE@?U(qeOmLAH?qMaX zGy8RDEw2@;L;foV$RKyd-iirHJn6n-S(QvSoSdR@K17#CPDp987flUxD0qB zmC8noy}XRZdr0K9OG3}G?0v%5e6#Fr`XiQv#q->xn;7xe&vI?fyYpejE64|6U!Sn7 zPILxV_aN-${Ye|y+n>Vu^)+FsfvKVdDEzlMZC5keUNpyN14Off3Om5FySuqE=l&Am$G?-ztNTz^B1OB}C= zk}R_Z*To;6M5}JK25yK%0xNHs^R~;A?49s5IEev0BqXgU!M<3lAYGY8weSge;9MYa z*K=fPWrr5Z#HkgrtpRu3p>20Nbf9XU1ez@a^viUq`h#p2D!3uMJhX=3qcHhe8tA<+ z)TMlQGo|d8-N8_0-Bj2$tuv!QIE|K&t}3r41)^SfQ5X+{dBkA_)Sg*g_skBgI*eX@ za*p(tJ4uK^Qq+bW5GSl1)`(RZIEu}p%1i*8gmrsy*;yQ6PjDcNLK`3DhtwRxDeTW& zfvXn38_NBfj2j}uSj>_sSKp$TA$p~PliA9_X~t7f$!EJRiR5JPJ8Lx%E$82)uJU{y zAlESrYO|iLn0}zt^FCrfqUh?EtHVsjBtAbi2utl-z3Ih}CyqqccnMPnKYM#gj4GVZ z?pc-%ypEHQ33zsuHN1MWkPdsg=x`z5LZew1exQR3Ot&s;)17A9Utd3(g>s>%#&DzS#0HRDf*HgbeBd6tv!&|tkuf`2-g3+SFTO`YUEFj7 zbM<5)pJwZdQ0$yBi0x2U23Q6%83q5G z#_v3k;yzs9tj@6D2Z9mS-R&^;f@Q(;53H{HDxF`up^c!1Gc*VTsjw0VZHNbm58pTl z$g7i1Gjwf0oZ-H$dV|CqIr>;Mi(7R@6!b<9-{vta4Ayk+s8xu6D#oRXfDTf`K(`+f zs=q%Ha^}=8mCG8Zz?RtH)Vv;hVAp+Z!G=D2fIOBCqT4+(k|Nd8fDGS^X}e!Ax%H$& zyK<#x`jan)`=Vo52Mr4gOWna_{#^LiQNlN^fs|h%R4GS7Pk(-wwdJLZOEw)1OT*CF zG{b^HiExY|BZziH2DhvdA+kR8SltXUpi*$q{RNMa2riGoC7a6BeGMMnPIw!v4n9l> z361zuntyvOhD8RJihf~mB((Yk0(=<}v0~01LI|YJe~d||Ow5&Jj|~%tvA+NWCS{{%BB^?nOR}bMK>mWNbT1~lJ z!2k>QXy^p8hlPknlfgS+G zu#Cm>G(ubDL3xbKXJY2~I@0;L#dvRMIVSc1hn#9Fl$Uxst**bflGk;jv^ocR-G>Jy zyI~VKl4^9fs!=Idl?&wv_=0`6lGH(+1c6%^u+nt)24LcyAr}2~c(t~OO$7v1=3ups zu-Ij@@AHvtBK@BZ^tDusn0m}~+9JRmgU->IltZ-%9{8CYzE>!0h1FmX;*5+PRUpkG zJ?Q(KiM4ZMLT-hlkv)#|#>dp#--)&O1P(w5gKn64xo&gC+GnA3=q_lRVYgit1vDBHSKMeWCaTt=XT(@+w7>ALztl)~0$ z00F)hkx%Kt@{>ct`jBbw6iyMw%}o!Zu%)UOp^~Wr)1JhT5$;}%y4s|C@|X)h{pkDx zAC5d&a4rj@iHX@ba2A49?Evq66PO3VDa;Angu%Aa4v%bqU#N0n+UJi612ptQ!Bpg@ zszw|1LVB>gO}pL5md3Wh3BD1ZSRtU61HSZ?N|JlaM3tw;kp|#}KyYLlITH`r%~oVC zdL5MC2S)pX&ppEbGDwyu&O)nbzt(yhqV^j%8cAjLLJ=ye;|bm(3fE}MgdT?laf!m( zeE0N_VoLAt`nUy-NQa7>Ytuclz<_N34R{eD{eIQf!_OL^+?j1GcI4Yfd?qg|1i`0$x)C~|OTc@A8 zq52Uc-#{EAdEgO)es0e>#K^Zgb&+NSY+>7IiFN%Jp9vT}?N|SfTX*a zqc;|DAl1*A?JZ~EWh9yVNu&s~FG^rXQbjO&<6z@f8hl0`OAXOHg)X~myG>G7^$d$g z*qe&8N-5SAHSh?FXZHN-5jOqqDGt@_RxW)-=qwZVf>`ltHP>pH<$D=wC$_PkYH!y^ zO?GJ6+7*f^NWPI_X^r!`UndgJ?ki`dCrw^e*)30ooz7>EK{#B{WrBubVZ5oooBMw; zyk!~@+)2c0=!pa)37majeF9qVwG#jv`(`t8S>9djQ`n?18(OoL8;FGZW+vc#gxHji z=$JW#0D8gWN|Kj03M|0Z0>uZVEvu^mzAd`T31Y+eiok7^3UkDnQEf|WSmrj)5QxEgTuxLVmLA+H$e7Dk6e;2pUiZx@zZ{{-6wiY4tn+ zYpe476^?8covQCUM;@u4bcOr%Dm+h% zFYsF9WT{(%4-Ao&AG#@yk)U9q@7scq@Y%lsNz+QcPMZVw+O{Kq!>wO=g zpfh@GrV#7!4ItqnmZz|9hciF4#c4BDs>vgZvVs0{@{gi1|TZ(W&ex(*m*f$_DK2 z#%?@lX%Eis#y-x<|3eQs_@RcEa)ApYSH!vIWqhX}H?n^bu`@|bjRZ*mY3XyiG`vV7 z4QRODD8a=($q_nJp&jzTy6j}Q=RcfP7j592&DM<;-elY#svtH10sIpNwqO+HAAUiC z4&WDh&&THLi-~MThn|1UWcX&b@u2@$1J#@uFOP7yr77Q^PpFk?#7#g?G&zR}#*v!l z&5i1g5d>S(06d8V1WaUdTrfV7@VTL=58S5GI{9ICWlW_c-IESgz)L;uOF1~?e`p6$ z;ecVDI0a+Ezf46FKx#$cX<^Cyp~O((fbmEaF{AutDi#59s27PI;qQ>cL%{q=IMA(- zJ);TUQ)c^|N4&<|B`U?Fw05hm6nja@W!Eo%7=@_xW%8-h+Wk$Vzz2`2+wKx?1z`nHS}Y0@?ID^YJWzEjg>Q8cd>1 zS@rK=_fJbpJO3q4G#E!F-dp_0*l{bY=XYmXtX`RAv)H8Qb#qJ)AR(mEV-re3KCe%C z*j|Iz_Ctwc%&fL+-}PMy-UA`d`o`S`4f}BRPCeP(~QB$kVBDbA7 zMaLUOTC75rw?aXitZbJT;Zy{&Y>=mmjj|AmaSgE|pZnTq5?Qpp*RY`yv>6>0J!J zm&7~uXx;Ye7BI$Zab@+k;h27o&CmTEBqAMgIIJ8c-rqt;zF^YHEv;wTtTZwz!&MVa zJt+%q(>2Lmp9qjY-9O2fsHT(o-n(3zE;ZOTA`*-b9DJEmfAAp{}*c>E#VrV-RE>?CCU5ib~*a&q)E%|5lu=WA0}l-fuj8iYIf`7;tfE)bgrxDeZpqkb!irIl)Lt}?973Hl$$D5S3GX34PLMPu9nf% z_pOOu+l^+8N2^3#K4n(Fb5>o%Y}@dvvx-w?c1dA>ro3caKD$GT!+!ZhIyiWoKRq|u z*=dhLq!ZSU1RVVo*vHOt!;ip|ouCol5@+(bDxp;GcYNoxnaeR#dDtp0%zEYTiUO}b z)D!A=VH0^UIc-WnKLcl6wKmM)+pj>^P@Xw~hK`-J49;(ntxa0s| zI;)qi`0axwBRohJaE|(}pd9!CIsJS`{cct%TlKSM7HE^@+V6EQ z!qFxZe-7m*o7hcw6ZA)=F(yzk{#ws78-=%0*dzYkc#<%PVfN$crZN7j7y?aT{D8S@ zty%qaj^2j`yCpLBB@XFjuY)K?WElU)xZ?dMSwfFh_<9|8DbEthVm`5p%{Xy8^ux;A z0RhV5YX-`f$DONBD?(SkpYL z?xAA6D2l?rOXt1Op>5tT36%<`SAlRXz@pN-Z8{5M;`Tg`vPzM2TJ17)MUjaUcq zFTD~tGV8>Cc$7wbcI!bZ50=*&Yw9DTxfZis@sl~ylu{Nx39%QDz1qi|QnI4bx13Wx ze>i!_lTHcka7MKsztkoPe%4cmJJipU%aoE5oVn?{0AAn3>96d$7X-9_I7(ACKpuK_ z;#LuXISwJ^;F4ABn%1B@*|rgQ88wmv1*@T3&#O0sN1MWfe+Y89?8f5y)S8dX zJaSbi7e@1|R_JiI8_sSPs}#rU*})Htb=7QKJ*{?=4Gq_6-QRSoAT?3AuoLj8B<)SA zKR~ZsUP4&y?T564Doc?k-lWePML_!_V3kdH9!!^(w241MnzwsRCt{a=`J$P9l?Bop zpH`n$t=)*c+Vi|n8!nKY_l5^NvRHJS#)qPlUvn{^lPcTY zDsASfbP*DORetwNQBn8Ai->6IcZ03vmTa?BdI0}I?ar(f>Qa%}!!-U~)0u$JZIRZ* zVtI(+dsXp<)aSjz`UPc-eXsJJrNf&EznSxOS46b)ryCSYMRY3>`u*qAo)?-xHzm)) zu}?9K9Y2C! zue{J>*tq#V95r5rPIEhN1FV(El*oR7)7dhC30$&HRU7&)do^vRR=t(e_6Uxy-RPJ3 z2M&OCuY}&+7uxdTMvKnW8;TUe9VweF<0)@qhy$GL0Ifv#y|*NctILUBS`@*1{o@wv z3>yvS56AvRUjZO^gemf}m1}WGqt_P8_&zy{g{6j*Af59wJSNWp9IX<$zs5Q_?nC#^ z)oiCy@Y2Z;t&-MNJDXPx@Ge9cGS>qfgJiHNih9Vc=yc8@Vc0yMulvJ|Dc z&4(FPORCO=O}Q`+A}_2u8AZ@hMp)7^;ZQAm!sJzkBRgqP)UdO`LaA&YZ)MMyeBE%$sd5M!zKnzBo_TdB?Pc9 z`=)^8kqRGm7-tKhrB%3t%qSVf^^w(i@AN; zSDJ||*2@dedT{@FV)JZ3JIp7Q(^5(Nl+&OK?dsj>2hW?O;{DNZF=kAvT!u28=6I!c zH@o^3qxS0zd@ctGPXmr&DC`VO`P}m_7r*8@owmGiE-x!t9Hq?nev){9+oG-X$!((! zlQ~oLZ8;P7>9cC4V7&;h%!-Pt9q33PL0-4@54R%YA=pP2 zBx$)c`_sdXdiv?{6gxXW|N6V_{uDZL{ZYyxGow<8$6r3K`4Tvr2jp`ms@CSOtQya& zLqhn-JbqKs)62G#{1iV?uTDmY31?qzUE;2E(;EJC!+aeE08}U`8@kzH9*1K2)GCd4 zYIF=Yi_V$L7t97-R92AIEei+E5qir!&Xlwknz=G*>sq2CnXOW%p-@AXb5+S4M0hOB z!2=y{LV%!fFq+^Y5vHVVfh@5-F^|mVSyG;nRG#;jD50l8+=5Pv-t{qGMe_KBt2tU&ZQ zC!-hSajz&#_NvkU%#lA`q^Jmu+$PiU=#(b&sMfxH=RmRLID|!I-^Q%S|Kd_9`EIuE zTphwQv8p>0f;#W@lqF8Vm09w?Tx|8b3sxL1J|+-mf%6hG=gdKk z7@bwQT)53`eZI!$B?cn&M6pP{ir#5r>(C3e9-#_+vt1e8{yB^rFd;U1^hFZ>$zmjh} zi00s*uFSrshz6eBb&YIJb_(H)JeHCDY5kvgB6u95rM%qJTir14DaFY1S;!VXyy8JQ zAQ`?P8skv^6%!;e{H&fR)f+` z|Efg<(SgB}qw$yi1H*LI;LAEj zx{n-^uqs;Ff(?SBdeW(l|bNtrBtJ^pcnVsk$y0)P}0j;!^B$HLcD9 zWcQ1e@;|ZJFY9<+AJR}rCH|a$xZEQH@JRev=KF&aKq8S441*%|^89plqu1^w3B-X= zL%#DhrT}JDmjbYbtENPiOo#U$Z_O{ZN1axD zZ{Tc%o?(c56yB*v7=d4V8MX#Pa1dfs7SwOOH7)9Wb z7*Kj7Twb~?VR0^dd`o1|#r;*|ab|RP;{Q@Q9CUz>*ac`R7P5xihbn0uZq0Xp<|;Kg z7`|pf|3O|r?6}wR%DFf1evRBLNVki(xJ@m|ypNH@*q8&&xeQdM+KJ>;PWrXpPighzNct zhQn@^NQk)NbwJN&0eH@Z5`a1OX3EnWFi=nryqE|l9&S!C$0=mpge|&W=W_=^?F<4ae7o6bP9B#1ij*H`=Gb zVNe37a)l94yG;8SBx>_c2sR0M@LkUtw-CJMKWWs;QY)W6M7<^PJm35k+`TthXbMpN zueoY^xwgcUM53$rP_-%_y9vHyK0zA(`YPrXJK)IX6HC-x{9;mA^)f5{GCner2=HS= zfF6TNUDq{fo6qOgfxf2wlCkhP_>md`Vhy0#rXGrk6&z87yF_eS>MNCBV%#5s-t7wj zn5`qb)n$*gvhDm+RtWFiX2PqcLjs@-UHxl6VZdTP=IZlY`A=HO9U-8h7{YW%?w}rD z0D2Gv8|G_nzHAwSI3F)dfkvmy`!MW>o@Re)xkatuGa3;vQt;sUP$axMrtYqAoHamK z8?xtL_m6L>=7^K>?3VhO$Kf z&$o)gTB|^!@|5am_;bLf-1T6F=lx703K2hkNm2M%SP9Wb?s=3G)aS0>c^}=oL5&`; zeewcND&Ez^)M!l3A5(_Hr4b#PmI2|DMcg%@&PiW)U6Q*Z%T@N%7a~D6z~JRMn5|6V zaoyi_<8xRI$TAe^A{Ff!P&t_Lm%(k7uY4l%#aI>v(Fv`-Wp&$(Y4A6U?e9Voif#bI z4#%v+fp%-W{wSM!pRN5Ov~00}eTUZ72_&oUJ*G=kYn-;Ee06dJ!F^9UFDInEi~_B{ zo4^KPcSrh4V3+tRF84-YYyrSz=84d9%~JSU^HPQG`y;E5DUf!@4zmFyTryo{PrxR!U7Dq; zxlwJN_nPkZz}MJ-ym+8MJfl!~23X%c{M7Dr1%O#`ep@@Pa5YdCgbJ1W#Prr+<_So> z?+S!-jY!Du0y>=Q{zcbWUH+IUK&WE$RafBKAc|v+&z;kgL)Z;`20((70Gg*s;lX;R z7&y|cllm9!6k-x5lI`jc)RX6}eBVIoi39O}r=;v^a|GSBHAwJcKawyv1WO2zb2yZGV2;dNSm`;!E`zkF!CV z(1qd4qiU(z^f33X`6;YH1jrU#o$dN7`FoGO7dZ~EJ^f#oM2trVCfO-X77Me9F>dVO zH*XI)z#m4cM4?b%DCuAIUiJd%k{EmD&yROk=L!Z-)foXg&ZF#cUv_Bp*`9iop^`nuMILrI^*WypL zIl(B|(bFP>046*KS*YA07s=bzjipgQMIpYvhYMYD?`b;EPJ~EnXeqO-j znEpFCz^kEV6c@Yln%#A^5NuUaxKmiamnDSVKoi=#Z*{B6+byJKTq|^m8N|?5RfMrj zPj|6;!(X-bpab{o5=Fjovobu3BGk#IgjHU|}MM!-X`YWsJkzQEnM|MNGPo@(QP z{mf$z?=BFp1u=+et{qTMdTkpos|eyApa%^akNOV~xbJqOON-Cd&F2U~c3v4YKHl15 z%-LQmteW-T0DH;a{^Gf@LT^rtFT^SCs_ff?k+C`aeb$8o_gbQ>nIc6IXk>h9eZRZ> z4i206I_N@=HnERxy{^l?H%nfVm1jX~JZLEPm@@@uSg_D0P3Pq#DtCGp+Qaa?p}Y5w z7vuGx4Nx4dT_eSZ2g;uykE;D{CSJ<-j-r?DQ?y^Y(NmNitn@c8oSx2X^yhv8oK$-R z_7k%3T+0gN*_`;thX(hP<&H)-hvg83Ym()o`z|89_BEWoWvtWhwf&&RM@rqiY`dc! zwF>PQhgyM*2bhB^L~>MR2&-UqnPCJv8Mq|6tu$EPqZ?iNJ`iM;aun4V(7L@{r@I5174^vv+KXmq!j_L9u*hJF>CcH4> z#-(4}YkH(#J~g~F)mJ9?>aCO7>ep*>Nk6?2!g^cO&efn=93h&#!1d*-4_kk(e_%b& zE1j`qN14thydcWrAuOe(xom?H6DQjV1c=b6Zvhu=|A4W&F0a;gNMH|U$jV@*wmq8T za2RoY-&0H*UuSo{#%c2v?$>rq!Li5A)x=At&FMtv;=(|&Pp??|O8KM3Z)`n|wq_&j z)#l!lXi=w?`6P4qvHj1rk@8p0XLkBJ{tVQqDlkG1WAqSnThT;3M&VHb^Y$C2*-|F# zOtW_~t3Jp29!EHkr3p>l*0Ub9&=0@g#)|{c!1K`AX7mjecY~#;;VaIFMTJ;IO-CIm z3;Bafx@dv?VP#%w^q&G^5&(vv2c;$b1w{mq|0d;`2rST=0!Oxq{PnL}B*o-JD})V#{x1#c;RXNz literal 0 HcmV?d00001 diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst index 5dbe913026397..c5f3351527f11 100644 --- a/python/docs/source/development/debugging.rst +++ b/python/docs/source/development/debugging.rst @@ -35,7 +35,7 @@ with JVM. Profiling and debugging JVM is described at `Useful Developer Tools `__. - *There are many other ways of debugging PySpark applications*. For example, you can remotely debug by using the open source `Remote Debugger `_ instead of using PyCharm Professional documented here. diff --git a/python/docs/source/development/index.rst b/python/docs/source/development/index.rst index 9b486976c6a71..bf8b2ccafaa9e 100644 --- a/python/docs/source/development/index.rst +++ b/python/docs/source/development/index.rst @@ -25,3 +25,4 @@ Development contributing testing debugging + setting_ide diff --git a/python/docs/source/development/setting_ide.rst b/python/docs/source/development/setting_ide.rst new file mode 100644 index 0000000000000..dcb44c1483006 --- /dev/null +++ b/python/docs/source/development/setting_ide.rst @@ -0,0 +1,62 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +=============== +Setting up IDEs +=============== + + +PyCharm +------- + +This section describes how to setup PySpark on PyCharm. +It guides step by step to the process of downloading the source code from GitHub and running the test code successfully. + +Firstly, download the Spark source code from GitHub using git url. You can download the source code by simply using ``git clone`` command as shown below. +If you want to download the code from any forked repository rather than Spark original repository, please change the url properly. + +.. code-block:: bash + + git clone https://github.com/apache/spark.git + +When the download is completed, go to the ``spark`` directory and build the package. +SBT build is generally much faster than Maven. More details about the build are documented `here `_. + +.. code-block:: bash + + build/sbt package + +After building is finished, run PyCharm and select the path ``spark/python``. + +.. image:: ../../../../docs/img/pycharm-with-pyspark1.png + :alt: Select the Spark path + + +Let's go to the path ``python/pyspark/tests`` in PyCharm and try to run the any test like ``test_join.py``. +You might can see the ``KeyError: 'SPARK_HOME'`` because the environment variable has not been set yet. + +Go **Run -> Edit Configurations**, and set the environment variables as below. +Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **Okay** to apply the changes. + +.. image:: ../../../../docs/img/pycharm-with-pyspark2.png + :alt: Setting up SPARK_HOME + + +Once ``SPARK_HOME`` is set properly, you'll be able to run the tests properly as below: + +.. image:: ../../../../docs/img/pycharm-with-pyspark3.png + :alt: Running tests properly From 0c66813ad9867e366689b47c81bdd8a94ac17828 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 21 Sep 2020 13:28:31 +0800 Subject: [PATCH 0075/1009] Revert "[SPARK-32850][CORE] Simplify the RPC message flow of decommission" This reverts commit 56ae95053df4afa9764df3f1d88f300896ca0183. --- .../spark/ExecutorAllocationClient.scala | 19 ++-- .../spark/ExecutorAllocationManager.scala | 5 +- .../apache/spark/deploy/DeployMessage.scala | 31 ++----- .../apache/spark/deploy/master/Master.scala | 23 ++--- .../apache/spark/deploy/worker/Worker.scala | 28 +++--- .../CoarseGrainedExecutorBackend.scala | 60 +++++++------ .../cluster/CoarseGrainedClusterMessage.scala | 16 ++-- .../CoarseGrainedSchedulerBackend.scala | 86 ++++++++++++------- .../cluster/StandaloneSchedulerBackend.scala | 7 +- .../apache/spark/storage/BlockManager.scala | 6 +- .../storage/BlockManagerMasterEndpoint.scala | 18 +++- .../storage/BlockManagerStorageEndpoint.scala | 2 +- .../deploy/DecommissionWorkerSuite.scala | 4 +- .../spark/deploy/client/AppClientSuite.scala | 7 +- .../scheduler/WorkerDecommissionSuite.scala | 7 +- .../ExecutorAllocationManagerSuite.scala | 6 +- 16 files changed, 148 insertions(+), 177 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala index cdba1c44034c0..ce47f3fd32203 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala @@ -91,13 +91,11 @@ private[spark] trait ExecutorAllocationClient { * @param executorsAndDecomInfo identifiers of executors & decom info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. - * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ def decommissionExecutors( - executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean, - triggeredByExecutor: Boolean): Seq[String] = { + executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], + adjustTargetNumExecutors: Boolean): Seq[String] = { killExecutors(executorsAndDecomInfo.map(_._1), adjustTargetNumExecutors, countFailures = false) @@ -111,21 +109,14 @@ private[spark] trait ExecutorAllocationClient { * @param executorId identifiers of executor to decommission * @param decommissionInfo information about the decommission (reason, host loss) * @param adjustTargetNumExecutors if we should adjust the target number of executors. - * @param triggeredByExecutor whether the decommission is triggered at executor. - * (TODO: add a new type like `ExecutorDecommissionInfo` for the - * case where executor is decommissioned at executor first, so we - * don't need this extra parameter.) * @return whether the request is acknowledged by the cluster manager. */ - final def decommissionExecutor( - executorId: String, + final def decommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo, - adjustTargetNumExecutors: Boolean, - triggeredByExecutor: Boolean = false): Boolean = { + adjustTargetNumExecutors: Boolean): Boolean = { val decommissionedExecutors = decommissionExecutors( Array((executorId, decommissionInfo)), - adjustTargetNumExecutors = adjustTargetNumExecutors, - triggeredByExecutor = triggeredByExecutor) + adjustTargetNumExecutors = adjustTargetNumExecutors) decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId) } diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 1dd64df106bc2..596508a2cf8c8 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -580,10 +580,7 @@ private[spark] class ExecutorAllocationManager( if (decommissionEnabled) { val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map( id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray - client.decommissionExecutors( - executorIdsWithoutHostLoss, - adjustTargetNumExecutors = false, - triggeredByExecutor = false) + client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false) } else { client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false, countFailures = false, force = false) diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index 8bc909b096e71..83f373d526e90 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -61,34 +61,13 @@ private[deploy] object DeployMessages { } /** - * An internal message that used by Master itself, in order to handle the - * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously. - * @param ids A collection of Worker ids, which should be decommissioned. - */ - case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage - - /** - * A message that sent from Master to Worker to decommission the Worker. - * It's used for the case where decommission is triggered at MasterWebUI. - * - * Note that decommission a Worker will cause all the executors on that Worker - * to be decommissioned as well. - */ - object DecommissionWorker extends DeployMessage - - /** - * A message that sent to the Worker itself when it receives PWR signal, - * indicating the Worker starts to decommission. - */ - object WorkerSigPWRReceived extends DeployMessage - - /** - * A message sent from Worker to Master to tell Master that the Worker has started - * decommissioning. It's used for the case where decommission is triggered at Worker. - * * @param id the worker id + * @param worker the worker endpoint ref */ - case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage + case class WorkerDecommission( + id: String, + worker: RpcEndpointRef) + extends DeployMessage case class ExecutorStateChanged( appId: String, diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 15f8be69d97bd..48516cdf83291 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -245,27 +245,15 @@ private[deploy] class Master( logError("Leadership has been revoked -- master shutting down.") System.exit(0) - case WorkerDecommissioning(id, workerRef) => + case WorkerDecommission(id, workerRef) => + logInfo("Recording worker %s decommissioning".format(id)) if (state == RecoveryState.STANDBY) { workerRef.send(MasterInStandby) } else { // We use foreach since get gives us an option and we can skip the failures. - idToWorker.get(id).foreach(w => decommissionWorker(w)) + idToWorker.get(id).foreach(decommissionWorker) } - case DecommissionWorkers(ids) => - // The caller has already checked the state when handling DecommissionWorkersOnHosts, - // so it should not be the STANDBY - assert(state != RecoveryState.STANDBY) - ids.foreach ( id => - // We use foreach since get gives us an option and we can skip the failures. - idToWorker.get(id).foreach { w => - decommissionWorker(w) - // Also send a message to the worker node to notify. - w.endpoint.send(DecommissionWorker) - } - ) - case RegisterWorker( id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress, resources) => @@ -903,7 +891,10 @@ private[deploy] class Master( logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}") // The workers are removed async to avoid blocking the receive loop for the entire batch - self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq)) + workersToRemove.foreach(wi => { + logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}") + self.send(WorkerDecommission(wi.id, wi.endpoint)) + }) // Return the count of workers actually removed workersToRemove.size diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 2e8474e3e3fc2..7649bc37c30b6 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -70,10 +70,7 @@ private[deploy] class Worker( if (conf.get(config.DECOMMISSION_ENABLED)) { logInfo("Registering SIGPWR handler to trigger decommissioning.") SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling worker decommission feature.") { - self.send(WorkerSigPWRReceived) - true - } + "disabling worker decommission feature.")(decommissionSelf) } else { logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.") } @@ -140,8 +137,7 @@ private[deploy] class Worker( private var registered = false private var connected = false private var decommissioned = false - // expose for test - private[spark] val workerId = generateWorkerId() + private val workerId = generateWorkerId() private val sparkHome = if (sys.props.contains(IS_TESTING.key)) { assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!") @@ -672,13 +668,8 @@ private[deploy] class Worker( finishedApps += id maybeCleanupApplication(id) - case DecommissionWorker => - decommissionSelf() - - case WorkerSigPWRReceived => + case WorkerDecommission(_, _) => decommissionSelf() - // Tell master we starts decommissioning so it stops trying to launch executor/driver on us - sendToMaster(WorkerDecommissioning(workerId, self)) } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { @@ -777,15 +768,16 @@ private[deploy] class Worker( } } - private[deploy] def decommissionSelf(): Unit = { - if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) { + private[deploy] def decommissionSelf(): Boolean = { + if (conf.get(config.DECOMMISSION_ENABLED)) { + logDebug("Decommissioning self") decommissioned = true - logInfo(s"Decommission worker $workerId.") - } else if (decommissioned) { - logWarning(s"Worker $workerId already started decommissioning.") + sendToMaster(WorkerDecommission(workerId, self)) } else { - logWarning(s"Receive decommission request, but decommission feature is disabled.") + logWarning("Asked to decommission self, but decommissioning not enabled") } + // Return true since can be called as a signal handler + true } private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = { diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index d002f7b407e5e..48045bafe6e3f 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile import org.apache.spark.resource.ResourceProfile._ import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.rpc._ -import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription} +import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.serializer.SerializerInstance import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils} @@ -79,17 +79,12 @@ private[spark] class CoarseGrainedExecutorBackend( */ private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]] - private var decommissioned = false + @volatile private var decommissioned = false override def onStart(): Unit = { - if (env.conf.get(DECOMMISSION_ENABLED)) { - logInfo("Registering PWR handler to trigger decommissioning.") - SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling executor decommission feature.") { - self.send(ExecutorSigPWRReceived) - true - } - } + logInfo("Registering PWR handler.") + SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + + "disabling decommission feature.")(decommissionSelf) logInfo("Connecting to driver: " + driverUrl) try { @@ -171,6 +166,17 @@ private[spark] class CoarseGrainedExecutorBackend( if (executor == null) { exitExecutor(1, "Received LaunchTask command but executor was null") } else { + if (decommissioned) { + val msg = "Asked to launch a task while decommissioned." + logError(msg) + driver match { + case Some(endpoint) => + logInfo("Sending DecommissionExecutor to driver.") + endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg))) + case _ => + logError("No registered driver to send Decommission to.") + } + } val taskDesc = TaskDescription.decode(data.value) logInfo("Got assigned task " + taskDesc.taskId) taskResources(taskDesc.taskId) = taskDesc.resources @@ -207,17 +213,9 @@ private[spark] class CoarseGrainedExecutorBackend( logInfo(s"Received tokens of ${tokenBytes.length} bytes") SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf) - case DecommissionExecutor => + case DecommissionSelf => + logInfo("Received decommission self") decommissionSelf() - - case ExecutorSigPWRReceived => - decommissionSelf() - if (driver.nonEmpty) { - // Tell driver we starts decommissioning so it stops trying to schedule us - driver.get.askSync[Boolean](ExecutorDecommissioning(executorId)) - } else { - logError("No driver to message decommissioning.") - } } override def onDisconnected(remoteAddress: RpcAddress): Unit = { @@ -266,20 +264,17 @@ private[spark] class CoarseGrainedExecutorBackend( System.exit(code) } - private def decommissionSelf(): Unit = { - if (!env.conf.get(DECOMMISSION_ENABLED)) { - logWarning(s"Receive decommission request, but decommission feature is disabled.") - return - } else if (decommissioned) { - logWarning(s"Executor $executorId already started decommissioning.") - return - } - val msg = s"Decommission executor $executorId." + private def decommissionSelf(): Boolean = { + val msg = "Decommissioning self w/sync" logInfo(msg) try { decommissioned = true - if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) { - env.blockManager.decommissionBlockManager() + // Tell master we are are decommissioned so it stops trying to schedule us + if (driver.nonEmpty) { + driver.get.askSync[Boolean](DecommissionExecutor( + executorId, ExecutorDecommissionInfo(msg))) + } else { + logError("No driver to message decommissioning.") } if (executor != null) { executor.decommission() @@ -338,9 +333,12 @@ private[spark] class CoarseGrainedExecutorBackend( shutdownThread.start() logInfo("Will exit when finished decommissioning") + // Return true since we are handling a signal + true } catch { case e: Exception => logError("Unexpected error while decommissioning self", e) + false } } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index d1b0e798c51be..7242ab7786061 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -95,17 +95,8 @@ private[spark] object CoarseGrainedClusterMessages { case class RemoveExecutor(executorId: String, reason: ExecutorLossReason) extends CoarseGrainedClusterMessage - // A message that sent from executor to driver to tell driver that the executor has started - // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S) - case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage - - // A message that sent from driver to executor to decommission that executor. - // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker. - object DecommissionExecutor extends CoarseGrainedClusterMessage - - // A message that sent to the executor itself when it receives PWR signal, - // indicating the executor starts to decommission. - object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage + case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo) + extends CoarseGrainedClusterMessage case class RemoveWorker(workerId: String, host: String, message: String) extends CoarseGrainedClusterMessage @@ -145,4 +136,7 @@ private[spark] object CoarseGrainedClusterMessages { // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not. case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage + + // Used to ask an executor to decommission itself. (Can be an internal message) + case object DecommissionSelf extends CoarseGrainedClusterMessage } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index f6930da96a390..0f144125af7bf 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -191,6 +191,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor)) removeExecutor(executorId, reason) + case DecommissionExecutor(executorId, decommissionInfo) => + logError(s"Received decommission executor message ${executorId}: $decommissionInfo") + decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false) + case RemoveWorker(workerId, host, message) => removeWorker(workerId, host, message) @@ -268,14 +272,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp removeWorker(workerId, host, message) context.reply(true) - case ExecutorDecommissioning(executorId) => - logWarning(s"Received executor $executorId decommissioned message") - context.reply( - decommissionExecutor( - executorId, - ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."), - adjustTargetNumExecutors = false, - triggeredByExecutor = true)) + case DecommissionExecutor(executorId, decommissionInfo) => + logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.") + context.reply(decommissionExecutor(executorId, decommissionInfo, + adjustTargetNumExecutors = false)) case RetrieveSparkAppConfig(resourceProfileId) => val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId) @@ -463,47 +463,71 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp * @param executorsAndDecomInfo Identifiers of executors & decommission info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. - * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ override def decommissionExecutors( executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean, - triggeredByExecutor: Boolean): Seq[String] = withLock { - val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) => - // Only bother decommissioning executors which are alive. - if (isExecutorActive(executorId)) { - scheduler.executorDecommission(executorId, decomInfo) - executorsPendingDecommission(executorId) = decomInfo.workerHost - Some(executorId) - } else { - None + adjustTargetNumExecutors: Boolean): Seq[String] = { + + val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) => + CoarseGrainedSchedulerBackend.this.synchronized { + // Only bother decommissioning executors which are alive. + if (isExecutorActive(executorId)) { + executorsPendingDecommission(executorId) = decomInfo.workerHost + true + } else { + false + } } } // If we don't want to replace the executors we are decommissioning if (adjustTargetNumExecutors) { - adjustExecutors(executorsToDecommission) + adjustExecutors(executorsToDecommission.map(_._1)) } - // Mark those corresponding BlockManagers as decommissioned first before we sending - // decommission notification to executors. So, it's less likely to lead to the race - // condition where `getPeer` request from the decommissioned executor comes first - // before the BlockManagers are marked as decommissioned. - if (conf.get(STORAGE_DECOMMISSION_ENABLED)) { - scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission) + executorsToDecommission.filter { case (executorId, decomInfo) => + doDecommission(executorId, decomInfo) + }.map(_._1) + } + + + private def doDecommission(executorId: String, + decomInfo: ExecutorDecommissionInfo): Boolean = { + + logInfo(s"Asking executor $executorId to decommissioning.") + scheduler.executorDecommission(executorId, decomInfo) + // Send decommission message to the executor (it could have originated on the executor + // but not necessarily). + CoarseGrainedSchedulerBackend.this.synchronized { + executorDataMap.get(executorId) match { + case Some(executorInfo) => + executorInfo.executorEndpoint.send(DecommissionSelf) + case None => + // Ignoring the executor since it is not registered. + logWarning(s"Attempted to decommission unknown executor $executorId.") + return false + } } + logInfo(s"Asked executor $executorId to decommission.") - if (!triggeredByExecutor) { - executorsToDecommission.foreach { executorId => - logInfo(s"Asking executor $executorId to decommissioning.") - executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor) + if (conf.get(STORAGE_DECOMMISSION_ENABLED)) { + try { + logInfo(s"Asking block manager corresponding to executor $executorId to decommission.") + scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId)) + } catch { + case e: Exception => + logError("Unexpected error during block manager " + + s"decommissioning for executor $executorId: ${e.toString}", e) + return false } + logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.") } - executorsToDecommission + true } + override def start(): Unit = { if (UserGroupInformation.isSecurityEnabled()) { delegationTokenManager = createTokenManager() diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index b9ac8d2ba2784..34b03dfec9e80 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -178,12 +178,9 @@ private[spark] class StandaloneSchedulerBackend( } override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) { - logInfo(s"Asked to decommission executor $fullId") + logInfo("Asked to decommission executor") val execId = fullId.split("/")(1) - decommissionExecutors( - Array((execId, decommissionInfo)), - adjustTargetNumExecutors = false, - triggeredByExecutor = false) + decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false) logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo)) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index e1b4cb82cebf1..ff0f38a2479b0 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter} import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter} -import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock} +import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock import org.apache.spark.storage.memory._ import org.apache.spark.unsafe.Platform import org.apache.spark.util._ @@ -1809,9 +1809,7 @@ private[spark] class BlockManager( blocksToRemove.size } - def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager) - - private[spark] def decommissionSelf(): Unit = synchronized { + def decommissionBlockManager(): Unit = synchronized { decommissioner match { case None => logInfo("Starting block manager decommissioning process...") diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 3fcfca365846e..a3d42348befaa 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -163,7 +163,8 @@ class BlockManagerMasterEndpoint( context.reply(true) case DecommissionBlockManagers(executorIds) => - decommissioningBlockManagerSet ++= executorIds.flatMap(blockManagerIdByExecutor.get) + val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get) + decommissionBlockManagers(bmIds) context.reply(true) case GetReplicateInfoForRDDBlocks(blockManagerId) => @@ -358,6 +359,21 @@ class BlockManagerMasterEndpoint( blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) } + /** + * Decommission the given Seq of blockmanagers + * - Adds these block managers to decommissioningBlockManagerSet Set + * - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]] + */ + def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = { + val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet) + val futures = newBlockManagersToDecommission.map { blockManagerId => + decommissioningBlockManagerSet.add(blockManagerId) + val info = blockManagerInfo(blockManagerId) + info.storageEndpoint.ask[Unit](DecommissionBlockManager) + } + Future.sequence{ futures.toSeq } + } + /** * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId * @param blockManagerId - block manager id for which ReplicateBlock info is needed diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala index 54a72568b18fa..a69bebc23c661 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala @@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint( } case DecommissionBlockManager => - context.reply(blockManager.decommissionSelf()) + context.reply(blockManager.decommissionBlockManager()) case RemoveBroadcast(broadcastId, _) => doAsync[Int]("removing broadcast " + broadcastId, context) { diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala index abe5b7a71ca63..9c5e460854053 100644 --- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach import org.scalatest.concurrent.Eventually._ import org.apache.spark._ -import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState} +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission} import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -414,7 +414,7 @@ class DecommissionWorkerSuite def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = { logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`") - master.self.send(DecommissionWorkers(Seq(workerInfo.id))) + master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint)) } def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = { diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala index a3438cab5b0a3..fe88822bb46b5 100644 --- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures} import org.apache.spark._ import org.apache.spark.deploy.{ApplicationDescription, Command} -import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning} +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} import org.apache.spark.deploy.master.{ApplicationInfo, Master} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -122,10 +122,7 @@ class AppClientSuite // Send a decommission self to all the workers // Note: normally the worker would send this on their own. - workers.foreach { worker => - worker.decommissionSelf() - master.self.send(WorkerDecommissioning(worker.workerId, worker.self)) - } + workers.foreach(worker => worker.decommissionSelf()) // Decommissioning is async. eventually(timeout(1.seconds), interval(10.millis)) { diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala index 4a92cbcb85847..83bb66efdac9e 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils} class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { override def beforeEach(): Unit = { - val conf = new SparkConf().setAppName("test") + val conf = new SparkConf().setAppName("test").setMaster("local") .set(config.DECOMMISSION_ENABLED, true) sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf) @@ -78,10 +78,7 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { val execs = sched.getExecutorIds() // Make the executors decommission, finish, exit, and not be replaced. val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray - sched.decommissionExecutors( - execsAndDecomInfo, - adjustTargetNumExecutors = true, - triggeredByExecutor = false) + sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true) val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds) assert(asyncCountResult === 10) } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala index 293498ae5c37b..f1870718c6730 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.streaming.scheduler -import org.mockito.ArgumentMatchers.{any, eq => meq} +import org.mockito.ArgumentMatchers.{eq => meq} import org.mockito.Mockito.{never, reset, times, verify, when} import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester} import org.scalatest.concurrent.Eventually.{eventually, timeout} @@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase val decomInfo = ExecutorDecommissionInfo("spark scale down", None) if (decommissioning) { verify(allocationClient, times(1)).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true), any()) + meq(expectedExec.get), meq(decomInfo), meq(true)) verify(allocationClient, never).killExecutor(meq(expectedExec.get)) } else { verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get)) verify(allocationClient, never).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true), any()) + meq(expectedExec.get), meq(decomInfo), meq(true)) } } else { if (decommissioning) { From 1ad1f7153592344d3b2adc1196ffe8cc921e0292 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 21 Sep 2020 16:35:00 +0900 Subject: [PATCH 0076/1009] [SPARK-32946][R][SQL] Add withColumn to SparkR ### What changes were proposed in this pull request? This PR adds `withColumn` function SparkR. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes, new function, equivalent to Scala and PySpark equivalents, is exposed to the end user. ### How was this patch tested? New unit tests added. Closes #29814 from zero323/SPARK-32946. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 1 + R/pkg/R/column.R | 31 +++++++++++++++++++++++++++ R/pkg/R/generics.R | 3 +++ R/pkg/tests/fulltests/test_sparkSQL.R | 13 +++++++++++ 4 files changed, 48 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 53a0b7856567e..f27913ae0b1bd 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -428,6 +428,7 @@ exportMethods("%<=>%", "weekofyear", "when", "window", + "withField", "xxhash64", "year") diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 7926a9a2467ee..36d792c647e52 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -356,3 +356,34 @@ setMethod("%<=>%", #' } #' @note ! since 2.3.0 setMethod("!", signature(x = "Column"), function(x) not(x)) + +#' withField +#' +#' Adds/replaces field in a struct \code{Column} by name. +#' +#' @param x a Column +#' @param fieldName a character +#' @param col a Column expression +#' +#' @rdname withField +#' @aliases withField withField,Column-method +#' @examples +#' \dontrun{ +#' df <- withColumn( +#' createDataFrame(iris), +#' "sepal", +#' struct(column("Sepal_Width"), column("Sepal_Length")) +#' ) +#' +#' head(select( +#' df, +#' withField(df$sepal, "product", df$Sepal_Length * df$Sepal_Width) +#' )) +#' } +#' @note withField since 3.1.0 +setMethod("withField", + signature(x = "Column", fieldName = "character", col = "Column"), + function(x, fieldName, col) { + jc <- callJMethod(x@jc, "withField", fieldName, col@jc) + column(jc) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index a6a71666ae588..604308c8803eb 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -729,6 +729,9 @@ setGeneric("over", function(x, window) { standardGeneric("over") }) #' @rdname eq_null_safe setGeneric("%<=>%", function(x, value) { standardGeneric("%<=>%") }) +#' @rdname withField +setGeneric("withField", function(x, fieldName, col) { standardGeneric("withField") }) + ###################### WindowSpec Methods ########################## #' @rdname partitionBy diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 5008d3005b5b1..0ad7f9e88b0fd 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1803,6 +1803,19 @@ test_that("column functions", { ) expect_equal(actual, expected) + + # Test withField + lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24}}") + jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") + writeLines(lines, jsonPath) + df <- read.df(jsonPath, "json") + result <- collect( + select( + select(df, alias(withField(df$Person, "dummy", lit(42)), "Person")), + "Person.dummy" + ) + ) + expect_equal(result, data.frame(dummy = 42)) }) test_that("column binary mathfunctions", { From c336ddfdb81dd5c27fd109d62138dc129a02c30b Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 21 Sep 2020 09:15:12 +0000 Subject: [PATCH 0077/1009] [SPARK-32867][SQL] When explain, HiveTableRelation show limited message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? In current mode, when explain a SQL plan with HiveTableRelation, it will show so many info about HiveTableRelation's prunedPartition, this make plan hard to read, this pr make this information simpler. Before: ![image](https://user-images.githubusercontent.com/46485123/93012078-aeeca080-f5cf-11ea-9286-f5c15eadbee3.png) For UT ``` test("Make HiveTableScanExec message simple") { withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { withTable("df") { spark.range(30) .select(col("id"), col("id").as("k")) .write .partitionBy("k") .format("hive") .mode("overwrite") .saveAsTable("df") val df = sql("SELECT df.id, df.k FROM df WHERE df.k < 2") df.explain(true) } } } ``` After this pr will show ``` == Parsed Logical Plan == 'Project ['df.id, 'df.k] +- 'Filter ('df.k < 2) +- 'UnresolvedRelation [df], [] == Analyzed Logical Plan == id: bigint, k: bigint Project [id#11L, k#12L] +- Filter (k#12L < cast(2 as bigint)) +- SubqueryAlias spark_catalog.default.df +- HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L]] == Optimized Logical Plan == Filter (isnotnull(k#12L) AND (k#12L < 2)) +- HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L], Pruned Partitions: [(k=0), (k=1)]] == Physical Plan == Scan hive default.df [id#11L, k#12L], HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L], Pruned Partitions: [(k=0), (k=1)]], [isnotnull(k#12L), (k#12L < 2)] ``` In my pr, I will construct `HiveTableRelation`'s `simpleString` method to avoid show too much unnecessary info in explain plan. compared to what we had before,I decrease the detail metadata of each partition and only retain the partSpec to show each partition was pruned. Since for detail information, we always don't see this in Plan but to use DESC EXTENDED statement. ### Why are the changes needed? Make plan about HiveTableRelation more readable ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No Closes #29739 from AngersZhuuuu/HiveTableScan-meta-location-info. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../sql/catalyst/catalog/interface.scala | 41 ++++++++++++- .../hive/execution/HiveTableScanSuite.scala | 61 +++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index be09e761272ce..db01999ab9bb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -24,6 +24,8 @@ import java.util.Date import scala.collection.mutable import scala.util.control.NonFatal +import org.apache.commons.lang3.StringUtils + import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier} @@ -31,8 +33,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils -import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter} -import org.apache.spark.sql.catalyst.util.quoteIdentifier +import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -693,4 +694,40 @@ case class HiveTableRelation( override def newInstance(): HiveTableRelation = copy( dataCols = dataCols.map(_.newInstance()), partitionCols = partitionCols.map(_.newInstance())) + + override def simpleString(maxFields: Int): String = { + val catalogTable = tableMeta.storage.serde match { + case Some(serde) => tableMeta.identifier :: serde :: Nil + case _ => tableMeta.identifier :: Nil + } + + var metadata = Map( + "CatalogTable" -> catalogTable.mkString(", "), + "Data Cols" -> truncatedString(dataCols, "[", ", ", "]", maxFields), + "Partition Cols" -> truncatedString(partitionCols, "[", ", ", "]", maxFields) + ) + + if (prunedPartitions.nonEmpty) { + metadata += ("Pruned Partitions" -> { + val parts = prunedPartitions.get.map { part => + val spec = part.spec.map { case (k, v) => s"$k=$v" }.mkString(", ") + if (part.storage.serde.nonEmpty && part.storage.serde != tableMeta.storage.serde) { + s"($spec, ${part.storage.serde.get})" + } else { + s"($spec)" + } + } + truncatedString(parts, "[", ", ", "]", maxFields) + }) + } + + val metadataEntries = metadata.toSeq.map { + case (key, value) if key == "CatalogTable" => value + case (key, value) => + key + ": " + StringUtils.abbreviate(value, SQLConf.get.maxMetadataStringLength) + } + + val metadataStr = truncatedString(metadataEntries, "[", ", ", "]", maxFields) + s"$nodeName $metadataStr" + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 67d7ed0841abb..bdccfccbc5bdb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -18,6 +18,8 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ @@ -187,6 +189,65 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH } } + test("SPARK-32867: When explain, HiveTableRelation show limited message") { + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + withTable("df") { + spark.range(30) + .select(col("id"), col("id").as("k")) + .write + .partitionBy("k") + .format("hive") + .mode("overwrite") + .saveAsTable("df") + + val scan1 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 3") + assert(scan1.simpleString(100).replaceAll("#\\d+L", "") == + "Scan hive default.df [id, k]," + + " HiveTableRelation [" + + "`default`.`df`," + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + " Data Cols: [id]," + + " Partition Cols: [k]," + + " Pruned Partitions: [(k=0), (k=1), (k=2)]" + + "]," + + " [isnotnull(k), (k < 3)]") + + val scan2 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30") + assert(scan2.simpleString(100).replaceAll("#\\d+L", "") == + "Scan hive default.df [id, k]," + + " HiveTableRelation [" + + "`default`.`df`," + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + " Data Cols: [id]," + + " Partition Cols: [k]," + + " Pruned Partitions: [(k=0), (k=1), (k=10), (k=11), (k=12), (k=13), (k=14), (k=15)," + + " (k=16), (k=17), (k=18), (k=19), (k..." + + "]," + + " [isnotnull(k), (k < 30)]") + + sql( + """ + |ALTER TABLE df PARTITION (k=10) SET SERDE + |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; + """.stripMargin) + + val scan3 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30") + assert(scan3.simpleString(100).replaceAll("#\\d+L", "") == + "Scan hive default.df [id, k]," + + " HiveTableRelation [" + + "`default`.`df`," + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + " Data Cols: [id]," + + " Partition Cols: [k]," + + " Pruned Partitions: [(k=0), (k=1)," + + " (k=10, org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)," + + " (k=11), (k=12), (k=1..." + + "]," + + " [isnotnull(k), (k < 30)]") + } + } + } + private def getHiveTableScanExec(query: String): HiveTableScanExec = { sql(query).queryExecution.sparkPlan.collectFirst { case p: HiveTableScanExec => p From d01594e8d186e63a6c3ce361e756565e830d5237 Mon Sep 17 00:00:00 2001 From: Zhen Li Date: Mon, 21 Sep 2020 09:05:40 -0500 Subject: [PATCH 0078/1009] [SPARK-32886][WEBUI] fix 'undefined' link in event timeline view ### What changes were proposed in this pull request? Fix ".../jobs/undefined" link from "Event Timeline" in jobs page. Job page link in "Event Timeline" view is constructed by fetching job page link defined in job list below. when job count exceeds page size of job table, only links of jobs in job table can be fetched from page. Other jobs' link would be 'undefined', and links of them in "Event Timeline" are broken, they are redirected to some wired URL like ".../jobs/undefined". This PR is fixing this wrong link issue. With this PR, job link in "Event Timeline" view would always redirect to correct job page. ### Why are the changes needed? Wrong link (".../jobs/undefined") in "Event Timeline" of jobs page. for example, the first job in below page is not in table below, as job count(116) exceeds page size(100). When clicking it's item in "Event Timeline", page is redirected to ".../jobs/undefined", which is wrong. Links in "Event Timeline" should always be correct. ![undefinedlink](https://user-images.githubusercontent.com/10524738/93184779-83fa6d80-f6f1-11ea-8a80-1a304ca9cbb2.JPG) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested. Closes #29757 from zhli1142015/fix-link-event-timeline-view. Authored-by: Zhen Li Signed-off-by: Sean Owen --- .../apache/spark/ui/static/timeline-view.js | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js index 5be8cffd1f8db..220b76a0f1b27 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js +++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js @@ -42,26 +42,31 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime, offset) { setupZoomable("#application-timeline-zoom-lock", applicationTimeline); setupExecutorEventAction(); + function getIdForJobEntry(baseElem) { + var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text(); + var jobId = jobIdText.match("\\(Job (\\d+)\\)$")[1]; + return jobId; + } + + function getSelectorForJobEntry(jobId) { + return "#job-" + jobId; + } + function setupJobEventAction() { $(".vis-item.vis-range.job.application-timeline-object").each(function() { - var getSelectorForJobEntry = function(baseElem) { - var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text(); - var jobId = jobIdText.match("\\(Job (\\d+)\\)$")[1]; - return "#job-" + jobId; - }; - $(this).click(function() { - var jobPagePath = $(getSelectorForJobEntry(this)).find("a.name-link").attr("href"); - window.location.href = jobPagePath + var jobId = getIdForJobEntry(this); + var jobPagePath = uiRoot + appBasePath + "/jobs/job/?id=" + jobId; + window.location.href = jobPagePath; }); $(this).hover( function() { - $(getSelectorForJobEntry(this)).addClass("corresponding-item-hover"); + $(getSelectorForJobEntry(getIdForJobEntry(this))).addClass("corresponding-item-hover"); $($(this).find("div.application-timeline-content")[0]).tooltip("show"); }, function() { - $(getSelectorForJobEntry(this)).removeClass("corresponding-item-hover"); + $(getSelectorForJobEntry(getIdForJobEntry(this))).removeClass("corresponding-item-hover"); $($(this).find("div.application-timeline-content")[0]).tooltip("hide"); } ); @@ -125,26 +130,34 @@ function drawJobTimeline(groupArray, eventObjArray, startTime, offset) { setupZoomable("#job-timeline-zoom-lock", jobTimeline); setupExecutorEventAction(); + function getStageIdAndAttemptForStageEntry(baseElem) { + var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text(); + var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)$")[1].split("."); + return stageIdAndAttempt; + } + + function getSelectorForStageEntry(stageIdAndAttempt) { + return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1]; + } + function setupStageEventAction() { $(".vis-item.vis-range.stage.job-timeline-object").each(function() { - var getSelectorForStageEntry = function(baseElem) { - var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text(); - var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)$")[1].split("."); - return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1]; - }; - $(this).click(function() { - var stagePagePath = $(getSelectorForStageEntry(this)).find("a.name-link").attr("href") - window.location.href = stagePagePath + var stageIdAndAttempt = getStageIdAndAttemptForStageEntry(this); + var stagePagePath = uiRoot + appBasePath + + "/stages/stage/?id=" + stageIdAndAttempt[0] + "&attempt=" + stageIdAndAttempt[1]; + window.location.href = stagePagePath; }); $(this).hover( function() { - $(getSelectorForStageEntry(this)).addClass("corresponding-item-hover"); + $(getSelectorForStageEntry(getStageIdAndAttemptForStageEntry(this))) + .addClass("corresponding-item-hover"); $($(this).find("div.job-timeline-content")[0]).tooltip("show"); }, function() { - $(getSelectorForStageEntry(this)).removeClass("corresponding-item-hover"); + $(getSelectorForStageEntry(getStageIdAndAttemptForStageEntry(this))) + .removeClass("corresponding-item-hover"); $($(this).find("div.job-timeline-content")[0]).tooltip("hide"); } ); From 5440ea84eeb2008d70cf890f0e3765167c2b6a62 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Tue, 22 Sep 2020 11:04:14 +0900 Subject: [PATCH 0079/1009] [SPARK-32312][DOC][FOLLOWUP] Fix the minimum version of PyArrow in the installation guide ### What changes were proposed in this pull request? Now that the minimum version of PyArrow is `1.0.0`, we should update the version in the installation guide. ### Why are the changes needed? The minimum version of PyArrow was upgraded to `1.0.0`. ### Does this PR introduce _any_ user-facing change? Users see the correct minimum version in the installation guide. ### How was this patch tested? N/A Closes #29829 from ueshin/issues/SPARK-32312/doc. Authored-by: Takuya UESHIN Signed-off-by: HyukjinKwon --- python/docs/source/getting_started/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 03570e6626d90..8516d514c7c72 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -129,7 +129,7 @@ Package Minimum supported version Note ============= ========================= ================ `pandas` 0.23.2 Optional for SQL `NumPy` 1.7 Required for ML -`pyarrow` 0.15.1 Optional for SQL +`pyarrow` 1.0.0 Optional for SQL `Py4J` 0.10.9 Required ============= ========================= ================ From f03c03576a34e6888da6eeb870dae1f6189b62c1 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 21 Sep 2020 21:43:17 -0700 Subject: [PATCH 0080/1009] [SPARK-32951][SQL] Foldable propagation from Aggregate ### What changes were proposed in this pull request? This PR adds foldable propagation from `Aggregate` as per: https://github.com/apache/spark/pull/29771#discussion_r490412031 ### Why are the changes needed? This is an improvement as `Aggregate`'s `aggregateExpressions` can contain foldables that can be propagated up. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New UT. Closes #29816 from peter-toth/SPARK-32951-foldable-propagation-from-aggregate. Authored-by: Peter Toth Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/expressions.scala | 19 +- .../optimizer/FoldablePropagationSuite.scala | 12 + .../q14a.sf100/explain.txt | 50 +- .../q14a.sf100/simplified.txt | 12 +- .../approved-plans-v1_4/q14a/explain.txt | 50 +- .../approved-plans-v1_4/q14a/simplified.txt | 12 +- .../q14b.sf100/explain.txt | 30 +- .../q14b.sf100/simplified.txt | 10 +- .../approved-plans-v1_4/q14b/explain.txt | 30 +- .../approved-plans-v1_4/q14b/simplified.txt | 10 +- .../approved-plans-v1_4/q41.sf100/explain.txt | 12 +- .../q41.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q41/explain.txt | 12 +- .../approved-plans-v1_4/q41/simplified.txt | 4 +- .../approved-plans-v2_7/q14.sf100/explain.txt | 30 +- .../q14.sf100/simplified.txt | 10 +- .../approved-plans-v2_7/q14/explain.txt | 30 +- .../approved-plans-v2_7/q14/simplified.txt | 10 +- .../q14a.sf100/explain.txt | 530 +++++++++--------- .../q14a.sf100/simplified.txt | 60 +- .../approved-plans-v2_7/q14a/explain.txt | 530 +++++++++--------- .../approved-plans-v2_7/q14a/simplified.txt | 60 +- 22 files changed, 775 insertions(+), 752 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index c4e4b25d570dd..0e7a39c54050e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -633,11 +633,16 @@ object FoldablePropagation extends Rule[LogicalPlan] { val (newChild, foldableMap) = propagateFoldables(p.child) val newProject = replaceFoldable(p.withNewChildren(Seq(newChild)).asInstanceOf[Project], foldableMap) - val newFoldableMap = AttributeMap(newProject.projectList.collect { - case a: Alias if a.child.foldable => (a.toAttribute, a) - }) + val newFoldableMap = collectFoldables(newProject.projectList) (newProject, newFoldableMap) + case a: Aggregate => + val (newChild, foldableMap) = propagateFoldables(a.child) + val newAggregate = + replaceFoldable(a.withNewChildren(Seq(newChild)).asInstanceOf[Aggregate], foldableMap) + val newFoldableMap = collectFoldables(newAggregate.aggregateExpressions) + (newAggregate, newFoldableMap) + // We can not replace the attributes in `Expand.output`. If there are other non-leaf // operators that have the `output` field, we should put them here too. case e: Expand => @@ -703,6 +708,12 @@ object FoldablePropagation extends Rule[LogicalPlan] { } } + private def collectFoldables(expressions: Seq[NamedExpression]) = { + AttributeMap(expressions.collect { + case a: Alias if a.child.foldable => (a.toAttribute, a) + }) + } + /** * List of all [[UnaryNode]]s which allow foldable propagation. */ @@ -710,7 +721,7 @@ object FoldablePropagation extends Rule[LogicalPlan] { // Handling `Project` is moved to `propagateFoldables`. case _: Filter => true case _: SubqueryAlias => true - case _: Aggregate => true + // Handling `Aggregate` is moved to `propagateFoldables`. case _: Window => true case _: Sample => true case _: GlobalLimit => true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala index 59dfd3a7932bd..fe43e8e288673 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala @@ -192,4 +192,16 @@ class FoldablePropagationSuite extends PlanTest { val optimized = Optimize.execute(query) comparePlans(optimized, query) } + + test("SPARK-32951: Foldable propagation from Aggregate") { + val query = testRelation + .groupBy('a)('a, sum('b).as('b), Literal(1).as('c)) + .select('a, 'b, 'c) + + val optimized = Optimize.execute(query.analyze) + val correctAnswer = testRelation + .groupBy('a)('a, sum('b).as('b), Literal(1).as('c)) + .select('a, 'b, Literal(1).as('c)).analyze + comparePlans(optimized, correctAnswer) + } } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt index c3e9f9418cef5..b346701fa3148 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt @@ -517,15 +517,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#42, count(1)#41 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] (86) Filter [codegen id : 39] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (87) Project [codegen id : 39] -Output [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Output [6]: [sales#42, number_sales#43, store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] (88) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] @@ -601,15 +601,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#60, count(1)#59 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] (105) Filter [codegen id : 78] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (106) Project [codegen id : 78] -Output [6]: [sales#61, number_sales#62, channel#60, i_brand_id#7, i_class_id#8, i_category_id#9] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Output [6]: [sales#60, number_sales#61, catalog AS channel#63, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] (107) Scan parquet default.web_sales Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] @@ -685,26 +685,26 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#76, count(1)#75 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] (124) Filter [codegen id : 117] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (125) Project [codegen id : 117] -Output [6]: [sales#77, number_sales#78, channel#76, i_brand_id#7, i_class_id#8, i_category_id#9] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Output [6]: [sales#76, number_sales#77, web AS channel#79, i_brand_id#7, i_class_id#8, i_category_id#9] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] (126) Union (127) Expand [codegen id : 118] -Input [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Arguments: [List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 0), List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, null, 1), List(sales#43, number_sales#44, channel#42, i_brand_id#7, null, null, 3), List(sales#43, number_sales#44, channel#42, null, null, null, 7), List(sales#43, number_sales#44, null, null, null, null, 15)], [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Input [6]: [sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Arguments: [List(sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 0), List(sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, null, 1), List(sales#42, number_sales#43, channel#47, i_brand_id#7, null, null, 3), List(sales#42, number_sales#43, channel#47, null, null, null, 7), List(sales#42, number_sales#43, null, null, null, null, 15)], [sales#42, number_sales#43, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] (128) HashAggregate [codegen id : 118] -Input [7]: [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Input [7]: [sales#42, number_sales#43, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] Aggregate Attributes [3]: [sum#85, isEmpty#86, sum#87] Results [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] @@ -715,9 +715,9 @@ Arguments: hashpartitioning(channel#80, i_brand_id#81, i_class_id#82, i_category (130) HashAggregate [codegen id : 119] Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#92, sum(number_sales#44)#93] -Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#43)#92 AS sum(sales)#94, sum(number_sales#44)#93 AS sum(number_sales)#95] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#92, sum(number_sales#43)#93] +Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#42)#92 AS sum(sales)#94, sum(number_sales#43)#93 AS sum(number_sales)#95] (131) TakeOrderedAndProject Input [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#94, sum(number_sales)#95] @@ -725,7 +725,7 @@ Arguments: 100, [channel#80 ASC NULLS FIRST, i_brand_id#81 ASC NULLS FIRST, i_cl ===== Subqueries ===== -Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46] * HashAggregate (157) +- Exchange (156) +- * HashAggregate (155) @@ -871,8 +871,8 @@ Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as de Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108] Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108 AS average_sales#109] -Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt index c6dbfcaa3fe43..5b93392d023db 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt @@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter Union WholeStageCodegen (39) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 WholeStageCodegen (8) @@ -53,7 +53,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #17 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #2 WholeStageCodegen (38) @@ -189,10 +189,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter ReusedExchange [ss_item_sk] #4 WholeStageCodegen (78) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #18 WholeStageCodegen (77) @@ -221,10 +221,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14 WholeStageCodegen (117) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #20 WholeStageCodegen (116) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt index c1b77321f16e6..3f0cc9e7acb1e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -461,15 +461,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] (76) Filter [codegen id : 26] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (77) Project [codegen id : 26] -Output [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Output [6]: [sales#39, number_sales#40, store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] (78) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] @@ -533,15 +533,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] (92) Filter [codegen id : 52] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (93) Project [codegen id : 52] -Output [6]: [sales#57, number_sales#58, channel#56, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Output [6]: [sales#56, number_sales#57, catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] (94) Scan parquet default.web_sales Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] @@ -605,26 +605,26 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] (108) Filter [codegen id : 78] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (109) Project [codegen id : 78] -Output [6]: [sales#72, number_sales#73, channel#71, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Output [6]: [sales#71, number_sales#72, web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] (110) Union (111) Expand [codegen id : 79] -Input [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: [List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, null, 1), List(sales#40, number_sales#41, channel#39, i_brand_id#6, null, null, 3), List(sales#40, number_sales#41, channel#39, null, null, null, 7), List(sales#40, number_sales#41, null, null, null, null, 15)], [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Input [6]: [sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, null, 1), List(sales#39, number_sales#40, channel#44, i_brand_id#6, null, null, 3), List(sales#39, number_sales#40, channel#44, null, null, null, 7), List(sales#39, number_sales#40, null, null, null, null, 15)], [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] (112) HashAggregate [codegen id : 79] -Input [7]: [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Input [7]: [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] Results [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] @@ -635,9 +635,9 @@ Arguments: hashpartitioning(channel#75, i_brand_id#76, i_class_id#77, i_category (114) HashAggregate [codegen id : 80] Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#87, sum(number_sales#41)#88] -Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#40)#87 AS sum(sales)#89, sum(number_sales#41)#88 AS sum(number_sales)#90] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#87, sum(number_sales#40)#88] +Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#39)#87 AS sum(sales)#89, sum(number_sales#40)#88 AS sum(number_sales)#90] (115) TakeOrderedAndProject Input [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] @@ -645,7 +645,7 @@ Arguments: 100, [channel#75 ASC NULLS FIRST, i_brand_id#76 ASC NULLS FIRST, i_cl ===== Subqueries ===== -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44] +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * HashAggregate (141) +- Exchange (140) +- * HashAggregate (139) @@ -791,8 +791,8 @@ Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as de Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103] Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103 AS average_sales#104] -Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt index 604bd792f5ffd..dfa8c1bcc1579 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter Union WholeStageCodegen (26) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 WholeStageCodegen (8) @@ -53,7 +53,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #14 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #2 WholeStageCodegen (25) @@ -165,10 +165,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] WholeStageCodegen (52) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen (51) @@ -189,10 +189,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su InputAdapter ReusedExchange [d_date_sk] #12 WholeStageCodegen (78) - Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #16 WholeStageCodegen (77) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt index f71ceaaf91f47..2d2b56e32bdb8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt @@ -496,15 +496,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43] -Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#44, count(1)#43 AS number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] (86) Filter [codegen id : 78] -Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46 as decimal(32,6)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) (87) Project [codegen id : 78] -Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46] -Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Output [6]: [store AS channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] (88) ReusedExchange [Reuses operator id: 4] Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] @@ -584,18 +584,18 @@ Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65] -Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#66, count(1)#65 AS number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] (106) Filter [codegen id : 77] -Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) (107) Project [codegen id : 77] -Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] -Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Output [6]: [store AS channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] (108) BroadcastExchange -Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [6]: [channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70] (109) BroadcastHashJoin [codegen id : 78] @@ -604,12 +604,12 @@ Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] Join condition: None (110) TakeOrderedAndProject -Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] -Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [12]: [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] +Arguments: 100, [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] ===== Subqueries ===== -Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#47, [id=#48] * HashAggregate (136) +- Exchange (135) +- * HashAggregate (134) @@ -780,7 +780,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_d Output [1]: [d_week_seq#29] Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] -Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48] Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51] * Project (144) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt index 37186560cb3b8..d6b8ba4395d2e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] WholeStageCodegen (78) BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #2 WholeStageCodegen (8) @@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #16 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen (38) @@ -190,10 +190,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ InputAdapter BroadcastExchange #17 WholeStageCodegen (77) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #2 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #18 WholeStageCodegen (76) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt index b68ce0e9f2264..1f31ded51f1ef 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -446,15 +446,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] -Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] (76) Filter [codegen id : 52] -Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) (77) Project [codegen id : 52] -Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43] -Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] (78) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] @@ -537,18 +537,18 @@ Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] -Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] (96) Filter [codegen id : 51] -Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) (97) Project [codegen id : 51] -Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] -Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] (98) BroadcastExchange -Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] (99) BroadcastHashJoin [codegen id : 52] @@ -557,12 +557,12 @@ Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] Join condition: None (100) TakeOrderedAndProject -Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] -Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] ===== Subqueries ===== -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45] * HashAggregate (126) +- Exchange (125) +- * HashAggregate (124) @@ -733,7 +733,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_d Output [1]: [d_week_seq#28] Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] * Project (134) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt index 6e6950d4cb33a..7bbf83e3de707 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] WholeStageCodegen (52) BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #2 WholeStageCodegen (8) @@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #13 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen (25) @@ -166,10 +166,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ InputAdapter BroadcastExchange #14 WholeStageCodegen (51) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #2 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen (50) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt index c5eb50e25d82c..13d73e61e1443 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt @@ -73,19 +73,19 @@ Input [2]: [i_manufact#2, count#9] Keys [1]: [i_manufact#2] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#11] -Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13] (12) Filter [codegen id : 2] -Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] -Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0) +Input [2]: [item_cnt#12, i_manufact#2#13] +Condition : (item_cnt#12 > 0) (13) Project [codegen id : 2] Output [1]: [i_manufact#2#13] -Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] +Input [2]: [item_cnt#12, i_manufact#2#13] (14) BroadcastExchange Input [1]: [i_manufact#2#13] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_manufact#2] @@ -105,7 +105,7 @@ Results [1]: [i_product_name#3] (18) Exchange Input [1]: [i_product_name#3] -Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16] +Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15] (19) HashAggregate [codegen id : 4] Input [1]: [i_product_name#3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt index 350aa9a3c572b..2d14d75ca9362 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt @@ -16,8 +16,8 @@ TakeOrderedAndProject [i_product_name] BroadcastExchange #2 WholeStageCodegen (2) Project [i_manufact] - Filter [alwaysTrue,item_cnt] - HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,alwaysTrue,count] + Filter [item_cnt] + HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count] InputAdapter Exchange [i_manufact] #3 WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt index c5eb50e25d82c..13d73e61e1443 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -73,19 +73,19 @@ Input [2]: [i_manufact#2, count#9] Keys [1]: [i_manufact#2] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#11] -Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13] (12) Filter [codegen id : 2] -Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] -Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0) +Input [2]: [item_cnt#12, i_manufact#2#13] +Condition : (item_cnt#12 > 0) (13) Project [codegen id : 2] Output [1]: [i_manufact#2#13] -Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14] +Input [2]: [item_cnt#12, i_manufact#2#13] (14) BroadcastExchange Input [1]: [i_manufact#2#13] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_manufact#2] @@ -105,7 +105,7 @@ Results [1]: [i_product_name#3] (18) Exchange Input [1]: [i_product_name#3] -Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16] +Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15] (19) HashAggregate [codegen id : 4] Input [1]: [i_product_name#3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt index 350aa9a3c572b..2d14d75ca9362 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -16,8 +16,8 @@ TakeOrderedAndProject [i_product_name] BroadcastExchange #2 WholeStageCodegen (2) Project [i_manufact] - Filter [alwaysTrue,item_cnt] - HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,alwaysTrue,count] + Filter [item_cnt] + HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count] InputAdapter Exchange [i_manufact] #3 WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt index 25a1ca79cc500..dad6098ce4685 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt @@ -496,15 +496,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43] -Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#44, count(1)#43 AS number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] (86) Filter [codegen id : 78] -Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46 as decimal(32,6)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) (87) Project [codegen id : 78] -Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46] -Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47] +Output [6]: [store AS channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46] (88) ReusedExchange [Reuses operator id: 4] Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] @@ -584,18 +584,18 @@ Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61, Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65] -Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#66, count(1)#65 AS number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] (106) Filter [codegen id : 77] -Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) (107) Project [codegen id : 77] -Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] -Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69] +Output [6]: [store AS channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68] (108) BroadcastExchange -Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [6]: [channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70] (109) BroadcastHashJoin [codegen id : 78] @@ -604,12 +604,12 @@ Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] Join condition: None (110) TakeOrderedAndProject -Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] -Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68] +Input [12]: [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] +Arguments: 100, [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67] ===== Subqueries ===== -Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#47, [id=#48] * HashAggregate (136) +- Exchange (135) +- * HashAggregate (134) @@ -780,7 +780,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_d Output [1]: [d_week_seq#29] Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90] -Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48] Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51] * Project (144) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt index 37186560cb3b8..d6b8ba4395d2e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] WholeStageCodegen (78) BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #2 WholeStageCodegen (8) @@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #16 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen (38) @@ -190,10 +190,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ InputAdapter BroadcastExchange #17 WholeStageCodegen (77) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #2 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #18 WholeStageCodegen (76) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt index ea0e8319f3fe0..1af2e69d57338 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt @@ -446,15 +446,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] -Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] (76) Filter [codegen id : 52] -Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) (77) Project [codegen id : 52] -Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43] -Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] (78) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] @@ -537,18 +537,18 @@ Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] -Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] (96) Filter [codegen id : 51] -Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) (97) Project [codegen id : 51] -Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] -Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66] +Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] (98) BroadcastExchange -Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] (99) BroadcastHashJoin [codegen id : 52] @@ -557,12 +557,12 @@ Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] Join condition: None (100) TakeOrderedAndProject -Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] -Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65] +Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] ===== Subqueries ===== -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45] * HashAggregate (126) +- Exchange (125) +- * HashAggregate (124) @@ -733,7 +733,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_d Output [1]: [d_week_seq#28] Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] * Project (134) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt index 6e6950d4cb33a..7bbf83e3de707 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] WholeStageCodegen (52) BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #2 WholeStageCodegen (8) @@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #13 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen (25) @@ -166,10 +166,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_ InputAdapter BroadcastExchange #14 WholeStageCodegen (51) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #2 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen (50) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt index 8c697ff080952..38292528b42fc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt @@ -608,15 +608,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#42, count(1)#41 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] (86) Filter [codegen id : 39] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (87) Project [codegen id : 39] -Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45] +Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44] (88) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] @@ -692,15 +692,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#60, count(1)#59 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] (105) Filter [codegen id : 78] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (106) Project [codegen id : 78] -Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63] +Output [6]: [catalog AS channel#63, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62] (107) Scan parquet default.web_sales Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] @@ -776,35 +776,35 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#76, count(1)#75 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] (124) Filter [codegen id : 117] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (125) Project [codegen id : 117] -Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79] +Output [6]: [web AS channel#79, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78] (126) Union (127) HashAggregate [codegen id : 118] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] -Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] (128) Exchange -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86] (129) HashAggregate [codegen id : 119] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#87, sum(number_sales#44)#88] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(sales#43)#87 AS sum_sales#89, sum(number_sales#44)#88 AS number_sales#90] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#87, sum(number_sales#43)#88] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum(sales#42)#87 AS sum_sales#89, sum(number_sales#43)#88 AS number_sales#90] (130) ReusedExchange [Reuses operator id: 84] Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, count#93] @@ -814,15 +814,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94, count(1)#95] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sales#43, count(1)#95 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sales#42, count(1)#95 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] (132) Filter [codegen id : 158] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (133) Project [codegen id : 158] -Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] +Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96] (134) ReusedExchange [Reuses operator id: 103] Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, count#99] @@ -832,435 +832,435 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, cou Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#61, count(1)#101 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#60, count(1)#101 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] (136) Filter [codegen id : 197] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (137) Project [codegen id : 197] -Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] +Output [6]: [catalog AS channel#103, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102] (138) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#104, isEmpty#105, count#106] (139) HashAggregate [codegen id : 236] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#104, isEmpty#105, count#106] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106, count(1)#107] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sales#77, count(1)#107 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#76, count(1)#108 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109] (140) Filter [codegen id : 236] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (141) Project [codegen id : 236] -Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108] +Output [6]: [web AS channel#110, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109] (142) Union (143) HashAggregate [codegen id : 237] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] -Aggregate Attributes [3]: [sum#109, isEmpty#110, sum#111] -Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] +Aggregate Attributes [3]: [sum#111, isEmpty#112, sum#113] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116] (144) Exchange -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#115] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#117] (145) HashAggregate [codegen id : 238] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#116, sum(number_sales#44)#117] -Results [5]: [channel#42, i_brand_id#7, i_class_id#8, sum(sales#43)#116 AS sum_sales#89, sum(number_sales#44)#117 AS number_sales#90] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#118, sum(number_sales#43)#119] +Results [5]: [channel#47, i_brand_id#7, i_class_id#8, sum(sales#42)#118 AS sum_sales#89, sum(number_sales#43)#119 AS number_sales#90] (146) HashAggregate [codegen id : 238] -Input [5]: [channel#42, i_brand_id#7, i_class_id#8, sum_sales#89, number_sales#90] -Keys [3]: [channel#42, i_brand_id#7, i_class_id#8] +Input [5]: [channel#47, i_brand_id#7, i_class_id#8, sum_sales#89, number_sales#90] +Keys [3]: [channel#47, i_brand_id#7, i_class_id#8] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#118, isEmpty#119, sum#120] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] +Aggregate Attributes [3]: [sum#120, isEmpty#121, sum#122] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125] (147) Exchange -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, 5), true, [id=#124] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), true, [id=#126] (148) HashAggregate [codegen id : 239] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123] -Keys [3]: [channel#42, i_brand_id#7, i_class_id#8] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125] +Keys [3]: [channel#47, i_brand_id#7, i_class_id#8] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#125, sum(number_sales#90)#126] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, null AS i_category_id#127, sum(sum_sales#89)#125 AS sum(sum_sales)#128, sum(number_sales#90)#126 AS sum(number_sales)#129] +Aggregate Attributes [2]: [sum(sum_sales#89)#127, sum(number_sales#90)#128] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, null AS i_category_id#129, sum(sum_sales#89)#127 AS sum(sum_sales)#130, sum(number_sales#90)#128 AS sum(number_sales)#131] (149) Union (150) HashAggregate [codegen id : 240] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (151) Exchange -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#130] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#132] (152) HashAggregate [codegen id : 241] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (153) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135] (154) HashAggregate [codegen id : 280] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134, count(1)#135] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sales#43, count(1)#135 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#42, count(1)#137 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] (155) Filter [codegen id : 280] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (156) Project [codegen id : 280] -Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136] +Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] (157) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141] (158) HashAggregate [codegen id : 319] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140, count(1)#141] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sales#61, count(1)#141 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142, count(1)#143] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sales#60, count(1)#143 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] (159) Filter [codegen id : 319] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (160) Project [codegen id : 319] -Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142] +Output [6]: [catalog AS channel#145, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] (161) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148] (162) HashAggregate [codegen id : 358] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146, count(1)#147] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sales#77, count(1)#147 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149, count(1)#150] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sales#76, count(1)#150 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] (163) Filter [codegen id : 358] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (164) Project [codegen id : 358] -Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148] +Output [6]: [web AS channel#152, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] (165) Union (166) HashAggregate [codegen id : 359] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] -Aggregate Attributes [3]: [sum#149, isEmpty#150, sum#151] -Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] +Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] (167) Exchange -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#155] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#159] (168) HashAggregate [codegen id : 360] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#156, sum(number_sales#44)#157] -Results [4]: [channel#42, i_brand_id#7, sum(sales#43)#156 AS sum_sales#89, sum(number_sales#44)#157 AS number_sales#90] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#160, sum(number_sales#43)#161] +Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#160 AS sum_sales#89, sum(number_sales#43)#161 AS number_sales#90] (169) HashAggregate [codegen id : 360] -Input [4]: [channel#42, i_brand_id#7, sum_sales#89, number_sales#90] -Keys [2]: [channel#42, i_brand_id#7] +Input [4]: [channel#47, i_brand_id#7, sum_sales#89, number_sales#90] +Keys [2]: [channel#47, i_brand_id#7] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#158, isEmpty#159, sum#160] -Results [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] +Aggregate Attributes [3]: [sum#162, isEmpty#163, sum#164] +Results [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] (170) Exchange -Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] -Arguments: hashpartitioning(channel#42, i_brand_id#7, 5), true, [id=#164] +Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] +Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), true, [id=#168] (171) HashAggregate [codegen id : 361] -Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163] -Keys [2]: [channel#42, i_brand_id#7] +Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] +Keys [2]: [channel#47, i_brand_id#7] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#165, sum(number_sales#90)#166] -Results [6]: [channel#42, i_brand_id#7, null AS i_class_id#167, null AS i_category_id#168, sum(sum_sales#89)#165 AS sum(sum_sales)#169, sum(number_sales#90)#166 AS sum(number_sales)#170] +Aggregate Attributes [2]: [sum(sum_sales#89)#169, sum(number_sales#90)#170] +Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#171, null AS i_category_id#172, sum(sum_sales#89)#169 AS sum(sum_sales)#173, sum(number_sales#90)#170 AS sum(number_sales)#174] (172) Union (173) HashAggregate [codegen id : 362] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (174) Exchange -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#171] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#175] (175) HashAggregate [codegen id : 363] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (176) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178] (177) HashAggregate [codegen id : 402] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175, count(1)#176] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sales#43, count(1)#176 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179, count(1)#180] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sales#42, count(1)#180 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] (178) Filter [codegen id : 402] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (179) Project [codegen id : 402] -Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177] +Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] (180) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184] (181) HashAggregate [codegen id : 441] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181, count(1)#182] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sales#61, count(1)#182 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#60, count(1)#186 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] (182) Filter [codegen id : 441] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (183) Project [codegen id : 441] -Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183] +Output [6]: [catalog AS channel#188, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] (184) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191] (185) HashAggregate [codegen id : 480] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#77, count(1)#188 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192, count(1)#193] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sales#76, count(1)#193 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] (186) Filter [codegen id : 480] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (187) Project [codegen id : 480] -Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189] +Output [6]: [web AS channel#195, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] (188) Union (189) HashAggregate [codegen id : 481] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] -Aggregate Attributes [3]: [sum#190, isEmpty#191, sum#192] -Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] +Aggregate Attributes [3]: [sum#196, isEmpty#197, sum#198] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] (190) Exchange -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#196] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#202] (191) HashAggregate [codegen id : 482] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#197, sum(number_sales#44)#198] -Results [3]: [channel#42, sum(sales#43)#197 AS sum_sales#89, sum(number_sales#44)#198 AS number_sales#90] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#203, sum(number_sales#43)#204] +Results [3]: [channel#47, sum(sales#42)#203 AS sum_sales#89, sum(number_sales#43)#204 AS number_sales#90] (192) HashAggregate [codegen id : 482] -Input [3]: [channel#42, sum_sales#89, number_sales#90] -Keys [1]: [channel#42] +Input [3]: [channel#47, sum_sales#89, number_sales#90] +Keys [1]: [channel#47] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#199, isEmpty#200, sum#201] -Results [4]: [channel#42, sum#202, isEmpty#203, sum#204] +Aggregate Attributes [3]: [sum#205, isEmpty#206, sum#207] +Results [4]: [channel#47, sum#208, isEmpty#209, sum#210] (193) Exchange -Input [4]: [channel#42, sum#202, isEmpty#203, sum#204] -Arguments: hashpartitioning(channel#42, 5), true, [id=#205] +Input [4]: [channel#47, sum#208, isEmpty#209, sum#210] +Arguments: hashpartitioning(channel#47, 5), true, [id=#211] (194) HashAggregate [codegen id : 483] -Input [4]: [channel#42, sum#202, isEmpty#203, sum#204] -Keys [1]: [channel#42] +Input [4]: [channel#47, sum#208, isEmpty#209, sum#210] +Keys [1]: [channel#47] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#206, sum(number_sales#90)#207] -Results [6]: [channel#42, null AS i_brand_id#208, null AS i_class_id#209, null AS i_category_id#210, sum(sum_sales#89)#206 AS sum(sum_sales)#211, sum(number_sales#90)#207 AS sum(number_sales)#212] +Aggregate Attributes [2]: [sum(sum_sales#89)#212, sum(number_sales#90)#213] +Results [6]: [channel#47, null AS i_brand_id#214, null AS i_class_id#215, null AS i_category_id#216, sum(sum_sales#89)#212 AS sum(sum_sales)#217, sum(number_sales#90)#213 AS sum(number_sales)#218] (195) Union (196) HashAggregate [codegen id : 484] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (197) Exchange -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#213] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#219] (198) HashAggregate [codegen id : 485] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (199) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] (200) HashAggregate [codegen id : 524] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217, count(1)#218] -Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sales#43, count(1)#218 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#42, count(1)#224 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] (201) Filter [codegen id : 524] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (202) Project [codegen id : 524] -Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219] +Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] (203) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] (204) HashAggregate [codegen id : 563] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224] -Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#61, count(1)#224 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#60, count(1)#230 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] (205) Filter [codegen id : 563] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (206) Project [codegen id : 563] -Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62] -Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225] +Output [6]: [catalog AS channel#232, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] (207) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235] (208) HashAggregate [codegen id : 602] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230] -Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#77, count(1)#230 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236, count(1)#237] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sales#76, count(1)#237 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] (209) Filter [codegen id : 602] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6)))) +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) (210) Project [codegen id : 602] -Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78] -Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231] +Output [6]: [web AS channel#239, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] (211) Union (212) HashAggregate [codegen id : 603] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)] -Aggregate Attributes [3]: [sum#232, isEmpty#233, sum#234] -Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] +Aggregate Attributes [3]: [sum#240, isEmpty#241, sum#242] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] (213) Exchange -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#238] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#246] (214) HashAggregate [codegen id : 604] -Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237] -Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9] -Functions [2]: [sum(sales#43), sum(number_sales#44)] -Aggregate Attributes [2]: [sum(sales#43)#239, sum(number_sales#44)#240] -Results [2]: [sum(sales#43)#239 AS sum_sales#89, sum(number_sales#44)#240 AS number_sales#90] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] +Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] +Functions [2]: [sum(sales#42), sum(number_sales#43)] +Aggregate Attributes [2]: [sum(sales#42)#247, sum(number_sales#43)#248] +Results [2]: [sum(sales#42)#247 AS sum_sales#89, sum(number_sales#43)#248 AS number_sales#90] (215) HashAggregate [codegen id : 604] Input [2]: [sum_sales#89, number_sales#90] Keys: [] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#241, isEmpty#242, sum#243] -Results [3]: [sum#244, isEmpty#245, sum#246] +Aggregate Attributes [3]: [sum#249, isEmpty#250, sum#251] +Results [3]: [sum#252, isEmpty#253, sum#254] (216) Exchange -Input [3]: [sum#244, isEmpty#245, sum#246] -Arguments: SinglePartition, true, [id=#247] +Input [3]: [sum#252, isEmpty#253, sum#254] +Arguments: SinglePartition, true, [id=#255] (217) HashAggregate [codegen id : 605] -Input [3]: [sum#244, isEmpty#245, sum#246] +Input [3]: [sum#252, isEmpty#253, sum#254] Keys: [] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#248, sum(number_sales#90)#249] -Results [6]: [null AS channel#250, null AS i_brand_id#251, null AS i_class_id#252, null AS i_category_id#253, sum(sum_sales#89)#248 AS sum(sum_sales)#254, sum(number_sales#90)#249 AS sum(number_sales)#255] +Aggregate Attributes [2]: [sum(sum_sales#89)#256, sum(number_sales#90)#257] +Results [6]: [null AS channel#258, null AS i_brand_id#259, null AS i_class_id#260, null AS i_category_id#261, sum(sum_sales#89)#256 AS sum(sum_sales)#262, sum(number_sales#90)#257 AS sum(number_sales)#263] (218) Union (219) HashAggregate [codegen id : 606] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (220) Exchange -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#256] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#264] (221) HashAggregate [codegen id : 607] -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] (222) TakeOrderedAndProject -Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: 100, [channel#42 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Arguments: 100, [channel#47 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] ===== Subqueries ===== -Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46] * HashAggregate (252) +- Exchange (251) +- * HashAggregate (250) @@ -1327,7 +1327,7 @@ Input [2]: [d_date_sk#10, d_year#11] (230) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#257] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265] (231) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#1] @@ -1335,7 +1335,7 @@ Right keys [1]: [d_date_sk#10] Join condition: None (232) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#258, ss_list_price#4 AS list_price#259] +Output [2]: [ss_quantity#3 AS quantity#266, ss_list_price#4 AS list_price#267] Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] (233) Scan parquet default.catalog_sales @@ -1372,7 +1372,7 @@ Input [2]: [d_date_sk#10, d_year#11] (240) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#268] (241) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#18] @@ -1380,7 +1380,7 @@ Right keys [1]: [d_date_sk#10] Join condition: None (242) Project [codegen id : 4] -Output [2]: [cs_quantity#48 AS quantity#261, cs_list_price#49 AS list_price#262] +Output [2]: [cs_quantity#48 AS quantity#269, cs_list_price#49 AS list_price#270] Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10] (243) Scan parquet default.web_sales @@ -1406,55 +1406,55 @@ Right keys [1]: [d_date_sk#10] Join condition: None (248) Project [codegen id : 6] -Output [2]: [ws_quantity#64 AS quantity#263, ws_list_price#65 AS list_price#264] +Output [2]: [ws_quantity#64 AS quantity#271, ws_list_price#65 AS list_price#272] Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10] (249) Union (250) HashAggregate [codegen id : 7] -Input [2]: [quantity#258, list_price#259] +Input [2]: [quantity#266, list_price#267] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#265, count#266] -Results [2]: [sum#267, count#268] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#273, count#274] +Results [2]: [sum#275, count#276] (251) Exchange -Input [2]: [sum#267, count#268] -Arguments: SinglePartition, true, [id=#269] +Input [2]: [sum#275, count#276] +Arguments: SinglePartition, true, [id=#277] (252) HashAggregate [codegen id : 8] -Input [2]: [sum#267, count#268] +Input [2]: [sum#275, count#276] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270 AS average_sales#271] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278 AS average_sales#279] -Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:4 Hosting operator id = 132 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:4 Hosting operator id = 132 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47] +Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt index e4a9b46cf741d..30856e02f2b62 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt @@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (39) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 WholeStageCodegen (8) @@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #22 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #6 WholeStageCodegen (38) @@ -226,10 +226,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter ReusedExchange [ss_item_sk] #8 WholeStageCodegen (78) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #23 WholeStageCodegen (77) @@ -258,10 +258,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18 WholeStageCodegen (117) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #25 WholeStageCodegen (116) @@ -303,24 +303,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (158) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (197) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 WholeStageCodegen (236) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 WholeStageCodegen (361) @@ -337,24 +337,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (280) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (319) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 WholeStageCodegen (358) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 WholeStageCodegen (483) @@ -371,24 +371,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (402) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (441) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 WholeStageCodegen (480) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 WholeStageCodegen (605) @@ -405,23 +405,23 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (524) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (563) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 WholeStageCodegen (602) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt index c54ad0e36216d..238053a3428e3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt @@ -552,15 +552,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] (76) Filter [codegen id : 26] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (77) Project [codegen id : 26] -Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42] +Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] (78) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] @@ -624,15 +624,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] (92) Filter [codegen id : 52] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (93) Project [codegen id : 52] -Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59] +Output [6]: [catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] (94) Scan parquet default.web_sales Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] @@ -696,35 +696,35 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] (108) Filter [codegen id : 78] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (109) Project [codegen id : 78] -Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74] +Output [6]: [web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] (110) Union (111) HashAggregate [codegen id : 79] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] Aggregate Attributes [3]: [sum#75, isEmpty#76, sum#77] -Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] (112) Exchange -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81] (113) HashAggregate [codegen id : 80] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#82, sum(number_sales#41)#83] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(sales#40)#82 AS sum_sales#84, sum(number_sales#41)#83 AS number_sales#85] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#82, sum(number_sales#40)#83] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum(sales#39)#82 AS sum_sales#84, sum(number_sales#40)#83 AS number_sales#85] (114) ReusedExchange [Reuses operator id: 74] Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, count#88] @@ -734,15 +734,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89, count(1)#90] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sales#40, count(1)#90 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sales#39, count(1)#90 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] (116) Filter [codegen id : 106] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (117) Project [codegen id : 106] -Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] +Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91] (118) ReusedExchange [Reuses operator id: 90] Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, count#94] @@ -752,435 +752,435 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, cou Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95, count(1)#96] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sales#57, count(1)#96 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sales#56, count(1)#96 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] (120) Filter [codegen id : 132] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (121) Project [codegen id : 132] -Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] +Output [6]: [catalog AS channel#98, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97] (122) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#99, isEmpty#100, count#101] (123) HashAggregate [codegen id : 158] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#99, isEmpty#100, count#101] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101, count(1)#102] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sales#72, count(1)#102 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102, count(1)#103] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102 AS sales#71, count(1)#103 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104] (124) Filter [codegen id : 158] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (125) Project [codegen id : 158] -Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103] +Output [6]: [web AS channel#105, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104] (126) Union (127) HashAggregate [codegen id : 159] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] -Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106] -Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] +Aggregate Attributes [3]: [sum#106, isEmpty#107, sum#108] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111] (128) Exchange -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#110] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#112] (129) HashAggregate [codegen id : 160] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#111, sum(number_sales#41)#112] -Results [5]: [channel#39, i_brand_id#6, i_class_id#7, sum(sales#40)#111 AS sum_sales#84, sum(number_sales#41)#112 AS number_sales#85] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#113, sum(number_sales#40)#114] +Results [5]: [channel#44, i_brand_id#6, i_class_id#7, sum(sales#39)#113 AS sum_sales#84, sum(number_sales#40)#114 AS number_sales#85] (130) HashAggregate [codegen id : 160] -Input [5]: [channel#39, i_brand_id#6, i_class_id#7, sum_sales#84, number_sales#85] -Keys [3]: [channel#39, i_brand_id#6, i_class_id#7] +Input [5]: [channel#44, i_brand_id#6, i_class_id#7, sum_sales#84, number_sales#85] +Keys [3]: [channel#44, i_brand_id#6, i_class_id#7] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#113, isEmpty#114, sum#115] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] +Aggregate Attributes [3]: [sum#115, isEmpty#116, sum#117] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120] (131) Exchange -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, 5), true, [id=#119] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), true, [id=#121] (132) HashAggregate [codegen id : 161] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118] -Keys [3]: [channel#39, i_brand_id#6, i_class_id#7] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120] +Keys [3]: [channel#44, i_brand_id#6, i_class_id#7] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#120, sum(number_sales#85)#121] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, null AS i_category_id#122, sum(sum_sales#84)#120 AS sum(sum_sales)#123, sum(number_sales#85)#121 AS sum(number_sales)#124] +Aggregate Attributes [2]: [sum(sum_sales#84)#122, sum(number_sales#85)#123] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, null AS i_category_id#124, sum(sum_sales#84)#122 AS sum(sum_sales)#125, sum(number_sales#85)#123 AS sum(number_sales)#126] (133) Union (134) HashAggregate [codegen id : 162] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (135) Exchange -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#125] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#127] (136) HashAggregate [codegen id : 163] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (137) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130] (138) HashAggregate [codegen id : 189] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129, count(1)#130] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sales#40, count(1)#130 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131, count(1)#132] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sales#39, count(1)#132 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] (139) Filter [codegen id : 189] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (140) Project [codegen id : 189] -Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131] +Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] (141) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136] (142) HashAggregate [codegen id : 215] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135, count(1)#136] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sales#57, count(1)#136 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137, count(1)#138] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sales#56, count(1)#138 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] (143) Filter [codegen id : 215] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (144) Project [codegen id : 215] -Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137] +Output [6]: [catalog AS channel#140, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] (145) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143] (146) HashAggregate [codegen id : 241] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141, count(1)#142] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sales#72, count(1)#142 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144, count(1)#145] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sales#71, count(1)#145 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] (147) Filter [codegen id : 241] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (148) Project [codegen id : 241] -Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143] +Output [6]: [web AS channel#147, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] (149) Union (150) HashAggregate [codegen id : 242] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] -Aggregate Attributes [3]: [sum#144, isEmpty#145, sum#146] -Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] +Aggregate Attributes [3]: [sum#148, isEmpty#149, sum#150] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] (151) Exchange -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#150] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#154] (152) HashAggregate [codegen id : 243] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#151, sum(number_sales#41)#152] -Results [4]: [channel#39, i_brand_id#6, sum(sales#40)#151 AS sum_sales#84, sum(number_sales#41)#152 AS number_sales#85] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#155, sum(number_sales#40)#156] +Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#155 AS sum_sales#84, sum(number_sales#40)#156 AS number_sales#85] (153) HashAggregate [codegen id : 243] -Input [4]: [channel#39, i_brand_id#6, sum_sales#84, number_sales#85] -Keys [2]: [channel#39, i_brand_id#6] +Input [4]: [channel#44, i_brand_id#6, sum_sales#84, number_sales#85] +Keys [2]: [channel#44, i_brand_id#6] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155] -Results [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] +Aggregate Attributes [3]: [sum#157, isEmpty#158, sum#159] +Results [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] (154) Exchange -Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] -Arguments: hashpartitioning(channel#39, i_brand_id#6, 5), true, [id=#159] +Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] +Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), true, [id=#163] (155) HashAggregate [codegen id : 244] -Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158] -Keys [2]: [channel#39, i_brand_id#6] +Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] +Keys [2]: [channel#44, i_brand_id#6] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#160, sum(number_sales#85)#161] -Results [6]: [channel#39, i_brand_id#6, null AS i_class_id#162, null AS i_category_id#163, sum(sum_sales#84)#160 AS sum(sum_sales)#164, sum(number_sales#85)#161 AS sum(number_sales)#165] +Aggregate Attributes [2]: [sum(sum_sales#84)#164, sum(number_sales#85)#165] +Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#84)#164 AS sum(sum_sales)#168, sum(number_sales#85)#165 AS sum(number_sales)#169] (156) Union (157) HashAggregate [codegen id : 245] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (158) Exchange -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#166] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#170] (159) HashAggregate [codegen id : 246] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (160) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173] (161) HashAggregate [codegen id : 272] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170, count(1)#171] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sales#40, count(1)#171 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174, count(1)#175] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sales#39, count(1)#175 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] (162) Filter [codegen id : 272] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (163) Project [codegen id : 272] -Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172] +Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] (164) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179] (165) HashAggregate [codegen id : 298] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176, count(1)#177] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sales#57, count(1)#177 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180, count(1)#181] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sales#56, count(1)#181 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] (166) Filter [codegen id : 298] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (167) Project [codegen id : 298] -Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178] +Output [6]: [catalog AS channel#183, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] (168) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186] (169) HashAggregate [codegen id : 324] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182, count(1)#183] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sales#72, count(1)#183 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#71, count(1)#188 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] (170) Filter [codegen id : 324] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (171) Project [codegen id : 324] -Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184] +Output [6]: [web AS channel#190, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] (172) Union (173) HashAggregate [codegen id : 325] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] -Aggregate Attributes [3]: [sum#185, isEmpty#186, sum#187] -Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] +Aggregate Attributes [3]: [sum#191, isEmpty#192, sum#193] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] (174) Exchange -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#191] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#197] (175) HashAggregate [codegen id : 326] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#192, sum(number_sales#41)#193] -Results [3]: [channel#39, sum(sales#40)#192 AS sum_sales#84, sum(number_sales#41)#193 AS number_sales#85] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#198, sum(number_sales#40)#199] +Results [3]: [channel#44, sum(sales#39)#198 AS sum_sales#84, sum(number_sales#40)#199 AS number_sales#85] (176) HashAggregate [codegen id : 326] -Input [3]: [channel#39, sum_sales#84, number_sales#85] -Keys [1]: [channel#39] +Input [3]: [channel#44, sum_sales#84, number_sales#85] +Keys [1]: [channel#44] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#194, isEmpty#195, sum#196] -Results [4]: [channel#39, sum#197, isEmpty#198, sum#199] +Aggregate Attributes [3]: [sum#200, isEmpty#201, sum#202] +Results [4]: [channel#44, sum#203, isEmpty#204, sum#205] (177) Exchange -Input [4]: [channel#39, sum#197, isEmpty#198, sum#199] -Arguments: hashpartitioning(channel#39, 5), true, [id=#200] +Input [4]: [channel#44, sum#203, isEmpty#204, sum#205] +Arguments: hashpartitioning(channel#44, 5), true, [id=#206] (178) HashAggregate [codegen id : 327] -Input [4]: [channel#39, sum#197, isEmpty#198, sum#199] -Keys [1]: [channel#39] +Input [4]: [channel#44, sum#203, isEmpty#204, sum#205] +Keys [1]: [channel#44] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#201, sum(number_sales#85)#202] -Results [6]: [channel#39, null AS i_brand_id#203, null AS i_class_id#204, null AS i_category_id#205, sum(sum_sales#84)#201 AS sum(sum_sales)#206, sum(number_sales#85)#202 AS sum(number_sales)#207] +Aggregate Attributes [2]: [sum(sum_sales#84)#207, sum(number_sales#85)#208] +Results [6]: [channel#44, null AS i_brand_id#209, null AS i_class_id#210, null AS i_category_id#211, sum(sum_sales#84)#207 AS sum(sum_sales)#212, sum(number_sales#85)#208 AS sum(number_sales)#213] (179) Union (180) HashAggregate [codegen id : 328] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (181) Exchange -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#208] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#214] (182) HashAggregate [codegen id : 329] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (183) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] (184) HashAggregate [codegen id : 355] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212, count(1)#213] -Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sales#40, count(1)#213 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#39, count(1)#219 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] (185) Filter [codegen id : 355] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (186) Project [codegen id : 355] -Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214] +Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] (187) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] (188) HashAggregate [codegen id : 381] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219] -Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#57, count(1)#219 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#56, count(1)#225 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] (189) Filter [codegen id : 381] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (190) Project [codegen id : 381] -Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58] -Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220] +Output [6]: [catalog AS channel#227, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] (191) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230] (192) HashAggregate [codegen id : 407] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225] -Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#72, count(1)#225 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231, count(1)#232] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sales#71, count(1)#232 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] (193) Filter [codegen id : 407] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) (194) Project [codegen id : 407] -Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73] -Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226] +Output [6]: [web AS channel#234, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] (195) Union (196) HashAggregate [codegen id : 408] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)] -Aggregate Attributes [3]: [sum#227, isEmpty#228, sum#229] -Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] +Aggregate Attributes [3]: [sum#235, isEmpty#236, sum#237] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] (197) Exchange -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#233] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#241] (198) HashAggregate [codegen id : 409] -Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232] -Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(sales#40), sum(number_sales#41)] -Aggregate Attributes [2]: [sum(sales#40)#234, sum(number_sales#41)#235] -Results [2]: [sum(sales#40)#234 AS sum_sales#84, sum(number_sales#41)#235 AS number_sales#85] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] +Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#242, sum(number_sales#40)#243] +Results [2]: [sum(sales#39)#242 AS sum_sales#84, sum(number_sales#40)#243 AS number_sales#85] (199) HashAggregate [codegen id : 409] Input [2]: [sum_sales#84, number_sales#85] Keys: [] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#236, isEmpty#237, sum#238] -Results [3]: [sum#239, isEmpty#240, sum#241] +Aggregate Attributes [3]: [sum#244, isEmpty#245, sum#246] +Results [3]: [sum#247, isEmpty#248, sum#249] (200) Exchange -Input [3]: [sum#239, isEmpty#240, sum#241] -Arguments: SinglePartition, true, [id=#242] +Input [3]: [sum#247, isEmpty#248, sum#249] +Arguments: SinglePartition, true, [id=#250] (201) HashAggregate [codegen id : 410] -Input [3]: [sum#239, isEmpty#240, sum#241] +Input [3]: [sum#247, isEmpty#248, sum#249] Keys: [] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#243, sum(number_sales#85)#244] -Results [6]: [null AS channel#245, null AS i_brand_id#246, null AS i_class_id#247, null AS i_category_id#248, sum(sum_sales#84)#243 AS sum(sum_sales)#249, sum(number_sales#85)#244 AS sum(number_sales)#250] +Aggregate Attributes [2]: [sum(sum_sales#84)#251, sum(number_sales#85)#252] +Results [6]: [null AS channel#253, null AS i_brand_id#254, null AS i_class_id#255, null AS i_category_id#256, sum(sum_sales#84)#251 AS sum(sum_sales)#257, sum(number_sales#85)#252 AS sum(number_sales)#258] (202) Union (203) HashAggregate [codegen id : 411] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (204) Exchange -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#251] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#259] (205) HashAggregate [codegen id : 412] -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] (206) TakeOrderedAndProject -Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: 100, [channel#39 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] ===== Subqueries ===== -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44] +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * HashAggregate (236) +- Exchange (235) +- * HashAggregate (234) @@ -1247,7 +1247,7 @@ Input [2]: [d_date_sk#10, d_year#11] (214) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#252] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260] (215) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#1] @@ -1255,7 +1255,7 @@ Right keys [1]: [d_date_sk#10] Join condition: None (216) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#253, ss_list_price#4 AS list_price#254] +Output [2]: [ss_quantity#3 AS quantity#261, ss_list_price#4 AS list_price#262] Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] (217) Scan parquet default.catalog_sales @@ -1292,7 +1292,7 @@ Input [2]: [d_date_sk#10, d_year#11] (224) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#255] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#263] (225) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#16] @@ -1300,7 +1300,7 @@ Right keys [1]: [d_date_sk#10] Join condition: None (226) Project [codegen id : 4] -Output [2]: [cs_quantity#45 AS quantity#256, cs_list_price#46 AS list_price#257] +Output [2]: [cs_quantity#45 AS quantity#264, cs_list_price#46 AS list_price#265] Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] (227) Scan parquet default.web_sales @@ -1326,55 +1326,55 @@ Right keys [1]: [d_date_sk#10] Join condition: None (232) Project [codegen id : 6] -Output [2]: [ws_quantity#60 AS quantity#258, ws_list_price#61 AS list_price#259] +Output [2]: [ws_quantity#60 AS quantity#266, ws_list_price#61 AS list_price#267] Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] (233) Union (234) HashAggregate [codegen id : 7] -Input [2]: [quantity#253, list_price#254] +Input [2]: [quantity#261, list_price#262] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#260, count#261] -Results [2]: [sum#262, count#263] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#268, count#269] +Results [2]: [sum#270, count#271] (235) Exchange -Input [2]: [sum#262, count#263] -Arguments: SinglePartition, true, [id=#264] +Input [2]: [sum#270, count#271] +Arguments: SinglePartition, true, [id=#272] (236) HashAggregate [codegen id : 8] -Input [2]: [sum#262, count#263] +Input [2]: [sum#270, count#271] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265 AS average_sales#266] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273 AS average_sales#274] -Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:4 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:4 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] +Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt index fc86da1801926..e96f1d6fed14f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt @@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (26) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 WholeStageCodegen (8) @@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] #19 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #6 WholeStageCodegen (25) @@ -202,10 +202,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] WholeStageCodegen (52) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #20 WholeStageCodegen (51) @@ -226,10 +226,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter ReusedExchange [d_date_sk] #16 WholeStageCodegen (78) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #21 WholeStageCodegen (77) @@ -263,24 +263,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (106) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (132) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 WholeStageCodegen (158) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 WholeStageCodegen (244) @@ -297,24 +297,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (189) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (215) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 WholeStageCodegen (241) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 WholeStageCodegen (327) @@ -331,24 +331,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (272) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (298) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 WholeStageCodegen (324) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 WholeStageCodegen (410) @@ -365,23 +365,23 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num InputAdapter Union WholeStageCodegen (355) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 WholeStageCodegen (381) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 WholeStageCodegen (407) - Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 From 3118c220f919ba185a57abfbe55eac1822c89a52 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 21 Sep 2020 22:32:25 -0700 Subject: [PATCH 0081/1009] [SPARK-32949][R][SQL] Add timestamp_seconds to SparkR ### What changes were proposed in this pull request? This PR adds R wrapper for `timestamp_seconds` function. ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? Yes, it adds a new R function. ### How was this patch tested? New unit tests. Closes #29822 from zero323/SPARK-32949. Authored-by: zero323 Signed-off-by: Dongjoon Hyun --- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 15 +++++++++++++++ R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 1 + 4 files changed, 21 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index f27913ae0b1bd..6d28caff0d56f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -405,6 +405,7 @@ exportMethods("%<=>%", "sumDistinct", "tan", "tanh", + "timestamp_seconds", "toDegrees", "toRadians", "to_csv", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5d9c8e8124d9a..1d75819cb6133 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -4407,3 +4407,18 @@ setMethod("current_timestamp", jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp") column(jc) }) + +#' @details +#' \code{timestamp_seconds}: Creates timestamp from the number of seconds since UTC epoch. +#' +#' @rdname column_datetime_functions +#' @aliases timestamp_seconds timestamp_seconds,Column-method +#' @note timestamp_seconds since 3.1.0 +setMethod("timestamp_seconds", + signature(x = "Column"), + function(x) { + jc <- callJStatic( + "org.apache.spark.sql.functions", "timestamp_seconds", x@jc + ) + column(jc) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 604308c8803eb..a7a9379b927b1 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1357,6 +1357,10 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst #' @name NULL setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") }) +#' @rdname column_datetime_functions +#' @name timestamp_seconds +setGeneric("timestamp_seconds", function(x) { standardGeneric("timestamp_seconds") }) + #' @rdname column_collection_functions #' @name NULL setGeneric("transform_keys", function(x, f) { standardGeneric("transform_keys") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 0ad7f9e88b0fd..1c65dabaf6656 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1424,6 +1424,7 @@ test_that("column functions", { date_trunc("quarter", c) + current_date() + current_timestamp() c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) + overlay(c1, c2, 3, 4) + c26 <- timestamp_seconds(c1) # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) From 790d9ef2d3a90388ef3c36d5ae47b2fe369a83ba Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 22 Sep 2020 14:46:27 +0900 Subject: [PATCH 0082/1009] [SPARK-32955][DOCS] An item in the navigation bar in the WebUI has a wrong link ### What changes were proposed in this pull request? This PR fixes an link in `_layouts/global.html`. The item `More` in the navigation bar in the WebUI links to `api.html` but it seems to be wrong. This PR also removes `api.md` because it and `api.html` generated from it are not referred from anywhere. ### Why are the changes needed? Fix the wrong link. ### Does this PR introduce _any_ user-facing change? Yes. "More" item no longer links to `api.html`. ### How was this patch tested? `SKIP_API=1 jekyll build` and confirmed that the item no longer links to `api.html`. I also confirmed `api.md` and `api.html` are no longer referred from anywhere by the following command. ``` $ grep -Erl "api\.(html|md)" docs ``` Closes #29821 from sarutak/fix-api-doc-link. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- docs/_layouts/global.html | 2 +- docs/api.md | 27 --------------------------- 2 files changed, 1 insertion(+), 28 deletions(-) delete mode 100644 docs/api.md diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index 09f7018262a0b..d6548f0fa9534 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -110,7 +110,7 @@
  1. `SaveMode.ErrorIfExists`: throw an exception at runtime.
  2. * *

    - * When writing to data source v1, the default option is `ErrorIfExists`. When writing to data - * source v2, the default option is `Append`. + * The default option is `ErrorIfExists`. * * @since 1.4.0 */ From 0bc0e91e4015eb98bd2f4bf17da2ec7135b520a9 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 23 Sep 2020 20:10:01 -0700 Subject: [PATCH 0099/1009] [SPARK-32971][K8S][FOLLOWUP] Add `.toSeq` for Scala 2.13 compilation ### What changes were proposed in this pull request? This is a follow-up to fix Scala 2.13 compilation at Kubernetes module. ### Why are the changes needed? To fix Scala 2.13 compilation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the GitHub Action Scala 2.13 compilation job. Closes #29859 from dongjoon-hyun/SPARK-32971-2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/deploy/k8s/features/MountVolumesFeatureStep.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala index fe4717d099510..788ddeaf51cba 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala @@ -114,7 +114,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf) } override def getAdditionalKubernetesResources(): Seq[HasMetadata] = { - additionalResources + additionalResources.toSeq } } From 31a16fbb405a19dc3eb732347e0e1f873b16971d Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 24 Sep 2020 14:15:36 +0900 Subject: [PATCH 0100/1009] [SPARK-32714][PYTHON] Initial pyspark-stubs port ### What changes were proposed in this pull request? This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes. This PR adds type annotations directly to Spark source. This can impact interaction with development tools for users, which haven't used `pyspark-stubs`. ### How was this patch tested? - [x] MyPy tests of the PySpark source ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` - [x] MyPy tests of Spark examples ``` MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` - [x] Existing Flake8 linter - [x] Existing unit tests Tested against: - `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894` - `mypy==0.782` Closes #29591 from zero323/SPARK-32681. Authored-by: zero323 Signed-off-by: HyukjinKwon --- dev/.rat-excludes | 1 + dev/tox.ini | 2 +- .../ml/estimator_transformer_param_example.py | 8 +- .../main/python/ml/fm_classifier_example.py | 6 +- .../main/python/ml/fm_regressor_example.py | 6 +- .../src/main/python/ml/pipeline_example.py | 8 +- examples/src/main/python/sql/arrow.py | 4 +- python/MANIFEST.in | 1 + python/mypy.ini | 36 + python/pyspark/__init__.pyi | 73 + python/pyspark/_globals.pyi | 27 + python/pyspark/_typing.pyi | 33 + python/pyspark/accumulators.pyi | 71 + python/pyspark/broadcast.pyi | 46 + python/pyspark/conf.pyi | 44 + python/pyspark/context.pyi | 176 ++ python/pyspark/daemon.pyi | 29 + python/pyspark/files.pyi | 24 + python/pyspark/find_spark_home.pyi | 17 + python/pyspark/java_gateway.pyi | 24 + python/pyspark/join.pyi | 50 + python/pyspark/ml/__init__.pyi | 45 + python/pyspark/ml/_typing.pyi | 76 + python/pyspark/ml/base.pyi | 103 ++ python/pyspark/ml/classification.pyi | 922 ++++++++++ python/pyspark/ml/clustering.pyi | 437 +++++ python/pyspark/ml/common.pyi | 20 + python/pyspark/ml/evaluation.pyi | 281 +++ python/pyspark/ml/feature.pyi | 1629 +++++++++++++++++ python/pyspark/ml/fpm.pyi | 109 ++ python/pyspark/ml/functions.pyi | 22 + python/pyspark/ml/image.pyi | 40 + python/pyspark/ml/linalg/__init__.pyi | 255 +++ python/pyspark/ml/param/__init__.pyi | 96 + .../ml/param/_shared_params_code_gen.pyi | 19 + python/pyspark/ml/param/shared.pyi | 187 ++ python/pyspark/ml/pipeline.pyi | 97 + python/pyspark/ml/recommendation.pyi | 152 ++ python/pyspark/ml/regression.pyi | 825 +++++++++ python/pyspark/ml/stat.pyi | 89 + python/pyspark/ml/tests/test_algorithms.py | 2 +- python/pyspark/ml/tests/test_base.py | 2 +- python/pyspark/ml/tests/test_evaluation.py | 2 +- python/pyspark/ml/tests/test_feature.py | 2 +- python/pyspark/ml/tests/test_image.py | 2 +- python/pyspark/ml/tests/test_linalg.py | 2 +- python/pyspark/ml/tests/test_param.py | 2 +- python/pyspark/ml/tests/test_persistence.py | 2 +- python/pyspark/ml/tests/test_pipeline.py | 2 +- python/pyspark/ml/tests/test_stat.py | 2 +- .../pyspark/ml/tests/test_training_summary.py | 2 +- python/pyspark/ml/tests/test_tuning.py | 2 +- python/pyspark/ml/tests/test_wrapper.py | 6 +- python/pyspark/ml/tree.pyi | 112 ++ python/pyspark/ml/tuning.pyi | 185 ++ python/pyspark/ml/util.pyi | 128 ++ python/pyspark/ml/wrapper.pyi | 48 + python/pyspark/mllib/__init__.pyi | 32 + python/pyspark/mllib/_typing.pyi | 23 + python/pyspark/mllib/classification.pyi | 151 ++ python/pyspark/mllib/clustering.pyi | 196 ++ python/pyspark/mllib/common.pyi | 27 + python/pyspark/mllib/evaluation.pyi | 94 + python/pyspark/mllib/feature.pyi | 167 ++ python/pyspark/mllib/fpm.pyi | 57 + python/pyspark/mllib/linalg/__init__.pyi | 273 +++ python/pyspark/mllib/linalg/distributed.pyi | 147 ++ python/pyspark/mllib/random.pyi | 126 ++ python/pyspark/mllib/recommendation.pyi | 75 + python/pyspark/mllib/regression.pyi | 155 ++ python/pyspark/mllib/stat/KernelDensity.pyi | 27 + python/pyspark/mllib/stat/__init__.pyi | 29 + python/pyspark/mllib/stat/_statistics.pyi | 69 + python/pyspark/mllib/stat/distribution.pyi | 25 + python/pyspark/mllib/stat/test.pyi | 39 + python/pyspark/mllib/tests/test_algorithms.py | 2 +- python/pyspark/mllib/tests/test_feature.py | 2 +- python/pyspark/mllib/tests/test_linalg.py | 6 +- python/pyspark/mllib/tests/test_stat.py | 2 +- .../mllib/tests/test_streaming_algorithms.py | 2 +- python/pyspark/mllib/tests/test_util.py | 4 +- python/pyspark/mllib/tree.pyi | 126 ++ python/pyspark/mllib/util.pyi | 90 + python/pyspark/profiler.pyi | 56 + python/pyspark/py.typed | 1 + python/pyspark/rdd.pyi | 479 +++++ python/pyspark/rddsampler.pyi | 54 + python/pyspark/resource/__init__.pyi | 31 + python/pyspark/resource/information.pyi | 26 + python/pyspark/resource/profile.pyi | 51 + python/pyspark/resource/requests.pyi | 71 + .../pyspark/resource/tests/test_resources.py | 2 +- python/pyspark/resultiterable.pyi | 30 + python/pyspark/serializers.pyi | 122 ++ python/pyspark/shell.pyi | 31 + python/pyspark/shuffle.pyi | 109 ++ python/pyspark/sql/__init__.pyi | 41 + python/pyspark/sql/_typing.pyi | 57 + python/pyspark/sql/avro/__init__.pyi | 22 + python/pyspark/sql/avro/functions.pyi | 27 + python/pyspark/sql/catalog.pyi | 63 + python/pyspark/sql/column.pyi | 112 ++ python/pyspark/sql/conf.pyi | 27 + python/pyspark/sql/context.pyi | 139 ++ python/pyspark/sql/dataframe.pyi | 324 ++++ python/pyspark/sql/functions.pyi | 343 ++++ python/pyspark/sql/group.pyi | 44 + python/pyspark/sql/pandas/__init__.pyi | 17 + .../pyspark/sql/pandas/_typing/__init__.pyi | 338 ++++ .../sql/pandas/_typing/protocols/__init__.pyi | 17 + .../sql/pandas/_typing/protocols/frame.pyi | 428 +++++ .../sql/pandas/_typing/protocols/series.pyi | 253 +++ python/pyspark/sql/pandas/conversion.pyi | 58 + python/pyspark/sql/pandas/functions.pyi | 176 ++ python/pyspark/sql/pandas/group_ops.pyi | 49 + python/pyspark/sql/pandas/map_ops.pyi | 30 + python/pyspark/sql/pandas/serializers.pyi | 65 + python/pyspark/sql/pandas/typehints.pyi | 33 + python/pyspark/sql/pandas/types.pyi | 41 + python/pyspark/sql/pandas/utils.pyi | 20 + python/pyspark/sql/readwriter.pyi | 250 +++ python/pyspark/sql/session.pyi | 125 ++ python/pyspark/sql/streaming.pyi | 179 ++ python/pyspark/sql/tests/test_arrow.py | 6 +- python/pyspark/sql/tests/test_catalog.py | 2 +- python/pyspark/sql/tests/test_column.py | 2 +- python/pyspark/sql/tests/test_conf.py | 2 +- python/pyspark/sql/tests/test_context.py | 2 +- python/pyspark/sql/tests/test_dataframe.py | 22 +- python/pyspark/sql/tests/test_datasources.py | 2 +- python/pyspark/sql/tests/test_functions.py | 2 +- python/pyspark/sql/tests/test_group.py | 2 +- .../sql/tests/test_pandas_cogrouped_map.py | 4 +- .../sql/tests/test_pandas_grouped_map.py | 4 +- python/pyspark/sql/tests/test_pandas_map.py | 4 +- python/pyspark/sql/tests/test_pandas_udf.py | 4 +- .../sql/tests/test_pandas_udf_grouped_agg.py | 4 +- .../sql/tests/test_pandas_udf_scalar.py | 6 +- .../sql/tests/test_pandas_udf_typehints.py | 4 +- .../sql/tests/test_pandas_udf_window.py | 4 +- python/pyspark/sql/tests/test_readwriter.py | 2 +- python/pyspark/sql/tests/test_serde.py | 2 +- python/pyspark/sql/tests/test_session.py | 2 +- python/pyspark/sql/tests/test_streaming.py | 2 +- python/pyspark/sql/tests/test_types.py | 9 +- python/pyspark/sql/tests/test_udf.py | 11 +- python/pyspark/sql/tests/test_utils.py | 2 +- python/pyspark/sql/types.pyi | 204 +++ python/pyspark/sql/udf.pyi | 57 + python/pyspark/sql/utils.pyi | 55 + python/pyspark/sql/window.pyi | 40 + python/pyspark/statcounter.pyi | 44 + python/pyspark/status.pyi | 42 + python/pyspark/storagelevel.pyi | 43 + python/pyspark/streaming/__init__.pyi | 23 + python/pyspark/streaming/context.pyi | 75 + python/pyspark/streaming/dstream.pyi | 208 +++ python/pyspark/streaming/kinesis.pyi | 46 + python/pyspark/streaming/listener.pyi | 35 + .../pyspark/streaming/tests/test_context.py | 2 +- .../pyspark/streaming/tests/test_dstream.py | 2 +- .../pyspark/streaming/tests/test_kinesis.py | 2 +- .../pyspark/streaming/tests/test_listener.py | 2 +- python/pyspark/streaming/util.pyi | 48 + python/pyspark/taskcontext.pyi | 45 + python/pyspark/testing/mlutils.py | 5 +- python/pyspark/testing/sqlutils.py | 2 +- python/pyspark/testing/streamingutils.py | 4 +- python/pyspark/tests/test_appsubmit.py | 2 +- python/pyspark/tests/test_broadcast.py | 2 +- python/pyspark/tests/test_conf.py | 2 +- python/pyspark/tests/test_context.py | 11 +- python/pyspark/tests/test_daemon.py | 2 +- python/pyspark/tests/test_join.py | 2 +- python/pyspark/tests/test_pin_thread.py | 2 +- python/pyspark/tests/test_profiler.py | 2 +- python/pyspark/tests/test_rdd.py | 2 +- python/pyspark/tests/test_rddbarrier.py | 2 +- python/pyspark/tests/test_readwrite.py | 2 +- python/pyspark/tests/test_serializers.py | 4 +- python/pyspark/tests/test_shuffle.py | 2 +- python/pyspark/tests/test_taskcontext.py | 2 +- python/pyspark/tests/test_util.py | 2 +- python/pyspark/tests/test_worker.py | 2 +- python/pyspark/traceback_utils.pyi | 29 + python/pyspark/util.pyi | 35 + python/pyspark/version.pyi | 19 + python/pyspark/worker.pyi | 73 + python/setup.py | 3 +- 189 files changed, 14053 insertions(+), 119 deletions(-) create mode 100644 python/mypy.ini create mode 100644 python/pyspark/__init__.pyi create mode 100644 python/pyspark/_globals.pyi create mode 100644 python/pyspark/_typing.pyi create mode 100644 python/pyspark/accumulators.pyi create mode 100644 python/pyspark/broadcast.pyi create mode 100644 python/pyspark/conf.pyi create mode 100644 python/pyspark/context.pyi create mode 100644 python/pyspark/daemon.pyi create mode 100644 python/pyspark/files.pyi create mode 100644 python/pyspark/find_spark_home.pyi create mode 100644 python/pyspark/java_gateway.pyi create mode 100644 python/pyspark/join.pyi create mode 100644 python/pyspark/ml/__init__.pyi create mode 100644 python/pyspark/ml/_typing.pyi create mode 100644 python/pyspark/ml/base.pyi create mode 100644 python/pyspark/ml/classification.pyi create mode 100644 python/pyspark/ml/clustering.pyi create mode 100644 python/pyspark/ml/common.pyi create mode 100644 python/pyspark/ml/evaluation.pyi create mode 100644 python/pyspark/ml/feature.pyi create mode 100644 python/pyspark/ml/fpm.pyi create mode 100644 python/pyspark/ml/functions.pyi create mode 100644 python/pyspark/ml/image.pyi create mode 100644 python/pyspark/ml/linalg/__init__.pyi create mode 100644 python/pyspark/ml/param/__init__.pyi create mode 100644 python/pyspark/ml/param/_shared_params_code_gen.pyi create mode 100644 python/pyspark/ml/param/shared.pyi create mode 100644 python/pyspark/ml/pipeline.pyi create mode 100644 python/pyspark/ml/recommendation.pyi create mode 100644 python/pyspark/ml/regression.pyi create mode 100644 python/pyspark/ml/stat.pyi create mode 100644 python/pyspark/ml/tree.pyi create mode 100644 python/pyspark/ml/tuning.pyi create mode 100644 python/pyspark/ml/util.pyi create mode 100644 python/pyspark/ml/wrapper.pyi create mode 100644 python/pyspark/mllib/__init__.pyi create mode 100644 python/pyspark/mllib/_typing.pyi create mode 100644 python/pyspark/mllib/classification.pyi create mode 100644 python/pyspark/mllib/clustering.pyi create mode 100644 python/pyspark/mllib/common.pyi create mode 100644 python/pyspark/mllib/evaluation.pyi create mode 100644 python/pyspark/mllib/feature.pyi create mode 100644 python/pyspark/mllib/fpm.pyi create mode 100644 python/pyspark/mllib/linalg/__init__.pyi create mode 100644 python/pyspark/mllib/linalg/distributed.pyi create mode 100644 python/pyspark/mllib/random.pyi create mode 100644 python/pyspark/mllib/recommendation.pyi create mode 100644 python/pyspark/mllib/regression.pyi create mode 100644 python/pyspark/mllib/stat/KernelDensity.pyi create mode 100644 python/pyspark/mllib/stat/__init__.pyi create mode 100644 python/pyspark/mllib/stat/_statistics.pyi create mode 100644 python/pyspark/mllib/stat/distribution.pyi create mode 100644 python/pyspark/mllib/stat/test.pyi create mode 100644 python/pyspark/mllib/tree.pyi create mode 100644 python/pyspark/mllib/util.pyi create mode 100644 python/pyspark/profiler.pyi create mode 100644 python/pyspark/py.typed create mode 100644 python/pyspark/rdd.pyi create mode 100644 python/pyspark/rddsampler.pyi create mode 100644 python/pyspark/resource/__init__.pyi create mode 100644 python/pyspark/resource/information.pyi create mode 100644 python/pyspark/resource/profile.pyi create mode 100644 python/pyspark/resource/requests.pyi create mode 100644 python/pyspark/resultiterable.pyi create mode 100644 python/pyspark/serializers.pyi create mode 100644 python/pyspark/shell.pyi create mode 100644 python/pyspark/shuffle.pyi create mode 100644 python/pyspark/sql/__init__.pyi create mode 100644 python/pyspark/sql/_typing.pyi create mode 100644 python/pyspark/sql/avro/__init__.pyi create mode 100644 python/pyspark/sql/avro/functions.pyi create mode 100644 python/pyspark/sql/catalog.pyi create mode 100644 python/pyspark/sql/column.pyi create mode 100644 python/pyspark/sql/conf.pyi create mode 100644 python/pyspark/sql/context.pyi create mode 100644 python/pyspark/sql/dataframe.pyi create mode 100644 python/pyspark/sql/functions.pyi create mode 100644 python/pyspark/sql/group.pyi create mode 100644 python/pyspark/sql/pandas/__init__.pyi create mode 100644 python/pyspark/sql/pandas/_typing/__init__.pyi create mode 100644 python/pyspark/sql/pandas/_typing/protocols/__init__.pyi create mode 100644 python/pyspark/sql/pandas/_typing/protocols/frame.pyi create mode 100644 python/pyspark/sql/pandas/_typing/protocols/series.pyi create mode 100644 python/pyspark/sql/pandas/conversion.pyi create mode 100644 python/pyspark/sql/pandas/functions.pyi create mode 100644 python/pyspark/sql/pandas/group_ops.pyi create mode 100644 python/pyspark/sql/pandas/map_ops.pyi create mode 100644 python/pyspark/sql/pandas/serializers.pyi create mode 100644 python/pyspark/sql/pandas/typehints.pyi create mode 100644 python/pyspark/sql/pandas/types.pyi create mode 100644 python/pyspark/sql/pandas/utils.pyi create mode 100644 python/pyspark/sql/readwriter.pyi create mode 100644 python/pyspark/sql/session.pyi create mode 100644 python/pyspark/sql/streaming.pyi create mode 100644 python/pyspark/sql/types.pyi create mode 100644 python/pyspark/sql/udf.pyi create mode 100644 python/pyspark/sql/utils.pyi create mode 100644 python/pyspark/sql/window.pyi create mode 100644 python/pyspark/statcounter.pyi create mode 100644 python/pyspark/status.pyi create mode 100644 python/pyspark/storagelevel.pyi create mode 100644 python/pyspark/streaming/__init__.pyi create mode 100644 python/pyspark/streaming/context.pyi create mode 100644 python/pyspark/streaming/dstream.pyi create mode 100644 python/pyspark/streaming/kinesis.pyi create mode 100644 python/pyspark/streaming/listener.pyi create mode 100644 python/pyspark/streaming/util.pyi create mode 100644 python/pyspark/taskcontext.pyi create mode 100644 python/pyspark/traceback_utils.pyi create mode 100644 python/pyspark/util.pyi create mode 100644 python/pyspark/version.pyi create mode 100644 python/pyspark/worker.pyi diff --git a/dev/.rat-excludes b/dev/.rat-excludes index df1dd51a7c519..98786437f7b1c 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -124,3 +124,4 @@ GangliaReporter.java application_1578436911597_0052 config.properties app-20200706201101-0003 +py.typed diff --git a/dev/tox.ini b/dev/tox.ini index c14e6b9446cca..7edf7d597fb58 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -20,5 +20,5 @@ exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,wor [flake8] select = E901,E999,F821,F822,F823,F401,F405 -exclude = python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* +exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi max-line-length = 100 diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py index 1dcca6c201119..2cf9432646b5e 100644 --- a/examples/src/main/python/ml/estimator_transformer_param_example.py +++ b/examples/src/main/python/ml/estimator_transformer_param_example.py @@ -56,12 +56,14 @@ # We may alternatively specify parameters using a Python dictionary as a paramMap paramMap = {lr.maxIter: 20} paramMap[lr.maxIter] = 30 # Specify 1 Param, overwriting the original maxIter. - paramMap.update({lr.regParam: 0.1, lr.threshold: 0.55}) # Specify multiple Params. + # Specify multiple Params. + paramMap.update({lr.regParam: 0.1, lr.threshold: 0.55}) # type: ignore # You can combine paramMaps, which are python dictionaries. - paramMap2 = {lr.probabilityCol: "myProbability"} # Change output column name + # Change output column name + paramMap2 = {lr.probabilityCol: "myProbability"} # type: ignore paramMapCombined = paramMap.copy() - paramMapCombined.update(paramMap2) + paramMapCombined.update(paramMap2) # type: ignore # Now learn a new model using the paramMapCombined parameters. # paramMapCombined overrides all parameters set earlier via lr.set* methods. diff --git a/examples/src/main/python/ml/fm_classifier_example.py b/examples/src/main/python/ml/fm_classifier_example.py index b47bdc5275beb..da49e5fc2baa9 100644 --- a/examples/src/main/python/ml/fm_classifier_example.py +++ b/examples/src/main/python/ml/fm_classifier_example.py @@ -67,9 +67,9 @@ print("Test set accuracy = %g" % accuracy) fmModel = model.stages[2] - print("Factors: " + str(fmModel.factors)) - print("Linear: " + str(fmModel.linear)) - print("Intercept: " + str(fmModel.intercept)) + print("Factors: " + str(fmModel.factors)) # type: ignore + print("Linear: " + str(fmModel.linear)) # type: ignore + print("Intercept: " + str(fmModel.intercept)) # type: ignore # $example off$ spark.stop() diff --git a/examples/src/main/python/ml/fm_regressor_example.py b/examples/src/main/python/ml/fm_regressor_example.py index 5c8133996ae83..47544b6324203 100644 --- a/examples/src/main/python/ml/fm_regressor_example.py +++ b/examples/src/main/python/ml/fm_regressor_example.py @@ -64,9 +64,9 @@ print("Root Mean Squared Error (RMSE) on test data = %g" % rmse) fmModel = model.stages[1] - print("Factors: " + str(fmModel.factors)) - print("Linear: " + str(fmModel.linear)) - print("Intercept: " + str(fmModel.intercept)) + print("Factors: " + str(fmModel.factors)) # type: ignore + print("Linear: " + str(fmModel.linear)) # type: ignore + print("Intercept: " + str(fmModel.intercept)) # type: ignore # $example off$ spark.stop() diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py index e1fab7cbe6d80..66fdd73632a70 100644 --- a/examples/src/main/python/ml/pipeline_example.py +++ b/examples/src/main/python/ml/pipeline_example.py @@ -62,8 +62,12 @@ prediction = model.transform(test) selected = prediction.select("id", "text", "probability", "prediction") for row in selected.collect(): - rid, text, prob, prediction = row - print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction)) + rid, text, prob, prediction = row # type: ignore + print( + "(%d, %s) --> prob=%s, prediction=%f" % ( + rid, text, str(prob), prediction # type: ignore + ) + ) # $example off$ spark.stop() diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py index 1789a54f0276e..9978e8601449a 100644 --- a/examples/src/main/python/sql/arrow.py +++ b/examples/src/main/python/sql/arrow.py @@ -32,8 +32,8 @@ def dataframe_with_arrow_example(spark): - import numpy as np - import pandas as pd + import numpy as np # type: ignore[import] + import pandas as pd # type: ignore[import] # Enable Arrow-based columnar data transfers spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true") diff --git a/python/MANIFEST.in b/python/MANIFEST.in index 2d78a001a4d98..862d62b1d3b29 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -22,4 +22,5 @@ recursive-include deps/data *.data *.txt recursive-include deps/licenses *.txt recursive-include deps/examples *.py recursive-include lib *.zip +recursive-include pyspark *.pyi py.typed include README.md diff --git a/python/mypy.ini b/python/mypy.ini new file mode 100644 index 0000000000000..a9523e622ca0d --- /dev/null +++ b/python/mypy.ini @@ -0,0 +1,36 @@ +; +; Licensed to the Apache Software Foundation (ASF) under one or more +; contributor license agreements. See the NOTICE file distributed with +; this work for additional information regarding copyright ownership. +; The ASF licenses this file to You under the Apache License, Version 2.0 +; (the "License"); you may not use this file except in compliance with +; the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; + +[mypy] + +[mypy-pyspark.cloudpickle.*] +ignore_errors = True + +[mypy-py4j.*] +ignore_missing_imports = True + +[mypy-numpy] +ignore_missing_imports = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-pyarrow] +ignore_missing_imports = True diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi new file mode 100644 index 0000000000000..98bd40684c01b --- /dev/null +++ b/python/pyspark/__init__.pyi @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Callable, Optional, TypeVar + +from pyspark.accumulators import ( # noqa: F401 + Accumulator as Accumulator, + AccumulatorParam as AccumulatorParam, +) +from pyspark.broadcast import Broadcast as Broadcast # noqa: F401 +from pyspark.conf import SparkConf as SparkConf # noqa: F401 +from pyspark.context import SparkContext as SparkContext # noqa: F401 +from pyspark.files import SparkFiles as SparkFiles # noqa: F401 +from pyspark.status import ( + StatusTracker as StatusTracker, + SparkJobInfo as SparkJobInfo, + SparkStageInfo as SparkStageInfo, +) # noqa: F401 +from pyspark.profiler import ( # noqa: F401 + BasicProfiler as BasicProfiler, + Profiler as Profiler, +) +from pyspark.rdd import RDD as RDD, RDDBarrier as RDDBarrier # noqa: F401 +from pyspark.serializers import ( # noqa: F401 + MarshalSerializer as MarshalSerializer, + PickleSerializer as PickleSerializer, +) +from pyspark.status import ( # noqa: F401 + SparkJobInfo as SparkJobInfo, + SparkStageInfo as SparkStageInfo, + StatusTracker as StatusTracker, +) +from pyspark.storagelevel import StorageLevel as StorageLevel # noqa: F401 +from pyspark.taskcontext import ( # noqa: F401 + BarrierTaskContext as BarrierTaskContext, + BarrierTaskInfo as BarrierTaskInfo, + TaskContext as TaskContext, +) +from pyspark.util import InheritableThread as InheritableThread # noqa: F401 + +# Compatiblity imports +from pyspark.sql import ( # noqa: F401 + SQLContext as SQLContext, + HiveContext as HiveContext, + Row as Row, +) + +T = TypeVar("T") +F = TypeVar("F", bound=Callable) + +def since(version: str) -> Callable[[T], T]: ... +def copy_func( + f: F, + name: Optional[str] = ..., + sinceversion: Optional[str] = ..., + doc: Optional[str] = ..., +) -> F: ... +def keyword_only(func: F) -> F: ... diff --git a/python/pyspark/_globals.pyi b/python/pyspark/_globals.pyi new file mode 100644 index 0000000000000..9453775621196 --- /dev/null +++ b/python/pyspark/_globals.pyi @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any + +__ALL__: Any + +class _NoValueType: + def __new__(cls): ... + def __reduce__(self): ... diff --git a/python/pyspark/_typing.pyi b/python/pyspark/_typing.pyi new file mode 100644 index 0000000000000..637e4cb4fbccc --- /dev/null +++ b/python/pyspark/_typing.pyi @@ -0,0 +1,33 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Callable, Iterable, Sized, TypeVar, Union +from typing_extensions import Protocol + +F = TypeVar("F", bound=Callable) +T = TypeVar("T", covariant=True) + +PrimitiveType = Union[bool, float, int, str] + +class SupportsIAdd(Protocol): + def __iadd__(self, other: SupportsIAdd) -> SupportsIAdd: ... + +class SupportsOrdering(Protocol): + def __le__(self, other: SupportsOrdering) -> bool: ... + +class SizedIterable(Protocol, Sized, Iterable[T]): ... diff --git a/python/pyspark/accumulators.pyi b/python/pyspark/accumulators.pyi new file mode 100644 index 0000000000000..94f8023d1102b --- /dev/null +++ b/python/pyspark/accumulators.pyi @@ -0,0 +1,71 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Callable, Generic, Tuple, Type, TypeVar + +import socketserver.BaseRequestHandler # type: ignore + +from pyspark._typing import SupportsIAdd + +T = TypeVar("T") +U = TypeVar("U", bound=SupportsIAdd) + +import socketserver as SocketServer + +class Accumulator(Generic[T]): + aid: int + accum_param: AccumulatorParam[T] + def __init__( + self, aid: int, value: T, accum_param: AccumulatorParam[T] + ) -> None: ... + def __reduce__( + self, + ) -> Tuple[ + Callable[[int, int, AccumulatorParam[T]], Accumulator[T]], + Tuple[int, int, AccumulatorParam[T]], + ]: ... + @property + def value(self) -> T: ... + @value.setter + def value(self, value: T) -> None: ... + def add(self, term: T) -> None: ... + def __iadd__(self, term: T) -> Accumulator[T]: ... + +class AccumulatorParam(Generic[T]): + def zero(self, value: T) -> T: ... + def addInPlace(self, value1: T, value2: T) -> T: ... + +class AddingAccumulatorParam(AccumulatorParam[U]): + zero_value: U + def __init__(self, zero_value: U) -> None: ... + def zero(self, value: U) -> U: ... + def addInPlace(self, value1: U, value2: U) -> U: ... + +class _UpdateRequestHandler(SocketServer.StreamRequestHandler): + def handle(self) -> None: ... + +class AccumulatorServer(SocketServer.TCPServer): + auth_token: str + def __init__( + self, + server_address: Tuple[str, int], + RequestHandlerClass: Type[socketserver.BaseRequestHandler], + auth_token: str, + ) -> None: ... + server_shutdown: bool + def shutdown(self) -> None: ... diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi new file mode 100644 index 0000000000000..c2ea3c6f7d8b4 --- /dev/null +++ b/python/pyspark/broadcast.pyi @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import threading +from typing import Any, Generic, Optional, TypeVar + +T = TypeVar("T") + +class Broadcast(Generic[T]): + def __init__( + self, + sc: Optional[Any] = ..., + value: Optional[T] = ..., + pickle_registry: Optional[Any] = ..., + path: Optional[Any] = ..., + sock_file: Optional[Any] = ..., + ) -> None: ... + def dump(self, value: Any, f: Any) -> None: ... + def load_from_path(self, path: Any): ... + def load(self, file: Any): ... + @property + def value(self) -> T: ... + def unpersist(self, blocking: bool = ...) -> None: ... + def destroy(self, blocking: bool = ...) -> None: ... + def __reduce__(self): ... + +class BroadcastPickleRegistry(threading.local): + def __init__(self) -> None: ... + def __iter__(self) -> None: ... + def add(self, bcast: Any) -> None: ... + def clear(self) -> None: ... diff --git a/python/pyspark/conf.pyi b/python/pyspark/conf.pyi new file mode 100644 index 0000000000000..f7ca61dea9cd2 --- /dev/null +++ b/python/pyspark/conf.pyi @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import List, Optional, Tuple + +from py4j.java_gateway import JVMView, JavaObject # type: ignore[import] + +class SparkConf: + def __init__( + self, + loadDefaults: bool = ..., + _jvm: Optional[JVMView] = ..., + _jconf: Optional[JavaObject] = ..., + ) -> None: ... + def set(self, key: str, value: str) -> SparkConf: ... + def setIfMissing(self, key: str, value: str) -> SparkConf: ... + def setMaster(self, value: str) -> SparkConf: ... + def setAppName(self, value: str) -> SparkConf: ... + def setSparkHome(self, value: str) -> SparkConf: ... + @overload + def setExecutorEnv(self, key: str, value: str) -> SparkConf: ... + @overload + def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> SparkConf: ... + def setAll(self, pairs: List[Tuple[str, str]]) -> SparkConf: ... + def get(self, key: str, defaultValue: Optional[str] = ...) -> str: ... + def getAll(self) -> List[Tuple[str, str]]: ... + def contains(self, key: str) -> bool: ... + def toDebugString(self) -> str: ... diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi new file mode 100644 index 0000000000000..76ecf8911471a --- /dev/null +++ b/python/pyspark/context.pyi @@ -0,0 +1,176 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar + +from py4j.java_gateway import JavaGateway, JavaObject # type: ignore[import] + +from pyspark.accumulators import Accumulator, AccumulatorParam +from pyspark.broadcast import Broadcast +from pyspark.conf import SparkConf +from pyspark.profiler import Profiler # noqa: F401 +from pyspark.resource.information import ResourceInformation +from pyspark.rdd import RDD +from pyspark.serializers import Serializer +from pyspark.status import StatusTracker + +T = TypeVar("T") +U = TypeVar("U") + +class SparkContext: + master: str + appName: str + sparkHome: str + PACKAGE_EXTENSIONS: Iterable[str] + def __init__( + self, + master: Optional[str] = ..., + appName: Optional[str] = ..., + sparkHome: Optional[str] = ..., + pyFiles: Optional[List[str]] = ..., + environment: Optional[Dict[str, str]] = ..., + batchSize: int = ..., + serializer: Serializer = ..., + conf: Optional[SparkConf] = ..., + gateway: Optional[JavaGateway] = ..., + jsc: Optional[JavaObject] = ..., + profiler_cls: type = ..., + ) -> None: ... + def __getnewargs__(self): ... + def __enter__(self): ... + def __exit__(self, type, value, trace): ... + @classmethod + def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ... + def setLogLevel(self, logLevel: str) -> None: ... + @classmethod + def setSystemProperty(cls, key: str, value: str) -> None: ... + @property + def version(self) -> str: ... + @property + def applicationId(self) -> str: ... + @property + def uiWebUrl(self) -> str: ... + @property + def startTime(self) -> int: ... + @property + def defaultParallelism(self) -> int: ... + @property + def defaultMinPartitions(self) -> int: ... + def stop(self) -> None: ... + def emptyRDD(self) -> RDD[Any]: ... + def range( + self, + start: int, + end: Optional[int] = ..., + step: int = ..., + numSlices: Optional[int] = ..., + ) -> RDD[int]: ... + def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ... + def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ... + def textFile( + self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ... + ) -> RDD[str]: ... + def wholeTextFiles( + self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ... + ) -> RDD[Tuple[str, str]]: ... + def binaryFiles( + self, path: str, minPartitions: Optional[int] = ... + ) -> RDD[Tuple[str, bytes]]: ... + def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ... + def sequenceFile( + self, + path: str, + keyClass: Optional[str] = ..., + valueClass: Optional[str] = ..., + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + minSplits: Optional[int] = ..., + batchSize: int = ..., + ) -> RDD[Tuple[T, U]]: ... + def newAPIHadoopFile( + self, + path: str, + inputFormatClass: str, + keyClass: str, + valueClass: str, + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[Dict[str, str]] = ..., + batchSize: int = ..., + ) -> RDD[Tuple[T, U]]: ... + def newAPIHadoopRDD( + self, + inputFormatClass: str, + keyClass: str, + valueClass: str, + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[Dict[str, str]] = ..., + batchSize: int = ..., + ) -> RDD[Tuple[T, U]]: ... + def hadoopFile( + self, + path: str, + inputFormatClass: str, + keyClass: str, + valueClass: str, + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[Dict[str, str]] = ..., + batchSize: int = ..., + ) -> RDD[Tuple[T, U]]: ... + def hadoopRDD( + self, + inputFormatClass: str, + keyClass: str, + valueClass: str, + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[Dict[str, str]] = ..., + batchSize: int = ..., + ) -> RDD[Tuple[T, U]]: ... + def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ... + def broadcast(self, value: T) -> Broadcast[T]: ... + def accumulator( + self, value: T, accum_param: Optional[AccumulatorParam[T]] = ... + ) -> Accumulator[T]: ... + def addFile(self, path: str, recursive: bool = ...) -> None: ... + def addPyFile(self, path: str) -> None: ... + def setCheckpointDir(self, dirName: str) -> None: ... + def setJobGroup( + self, groupId: str, description: str, interruptOnCancel: bool = ... + ) -> None: ... + def setLocalProperty(self, key: str, value: str) -> None: ... + def getLocalProperty(self, key: str) -> Optional[str]: ... + def sparkUser(self) -> str: ... + def setJobDescription(self, value: str) -> None: ... + def cancelJobGroup(self, groupId: str) -> None: ... + def cancelAllJobs(self) -> None: ... + def statusTracker(self) -> StatusTracker: ... + def runJob( + self, + rdd: RDD[T], + partitionFunc: Callable[[Iterable[T]], Iterable[U]], + partitions: Optional[List[int]] = ..., + allowLocal: bool = ..., + ) -> List[U]: ... + def show_profiles(self) -> None: ... + def dump_profiles(self, path: str) -> None: ... + def getConf(self) -> SparkConf: ... + @property + def resources(self) -> Dict[str, ResourceInformation]: ... diff --git a/python/pyspark/daemon.pyi b/python/pyspark/daemon.pyi new file mode 100644 index 0000000000000..dfacf30a9f8a7 --- /dev/null +++ b/python/pyspark/daemon.pyi @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.serializers import ( # noqa: F401 + UTF8Deserializer as UTF8Deserializer, + read_int as read_int, + write_int as write_int, + write_with_length as write_with_length, +) +from typing import Any + +def compute_real_exit_code(exit_code: Any): ... +def worker(sock: Any, authenticated: Any): ... +def manager() -> None: ... diff --git a/python/pyspark/files.pyi b/python/pyspark/files.pyi new file mode 100644 index 0000000000000..9e7cad17ebbdb --- /dev/null +++ b/python/pyspark/files.pyi @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SparkFiles: + def __init__(self) -> None: ... + @classmethod + def get(cls, filename: str) -> str: ... + @classmethod + def getRootDirectory(cls) -> str: ... diff --git a/python/pyspark/find_spark_home.pyi b/python/pyspark/find_spark_home.pyi new file mode 100644 index 0000000000000..217e5db960782 --- /dev/null +++ b/python/pyspark/find_spark_home.pyi @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/pyspark/java_gateway.pyi b/python/pyspark/java_gateway.pyi new file mode 100644 index 0000000000000..5b45206dc045c --- /dev/null +++ b/python/pyspark/java_gateway.pyi @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length # type: ignore[attr-defined] +from typing import Any, Optional + +def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ... +def local_connect_and_auth(port: Any, auth_secret: Any): ... +def ensure_callback_server_started(gw: Any) -> None: ... diff --git a/python/pyspark/join.pyi b/python/pyspark/join.pyi new file mode 100644 index 0000000000000..e89e0fbbcda9b --- /dev/null +++ b/python/pyspark/join.pyi @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Hashable, Iterable, Optional, Tuple, TypeVar + +from pyspark.resultiterable import ResultIterable +import pyspark.rdd + +K = TypeVar("K", bound=Hashable) +V = TypeVar("V") +U = TypeVar("U") + +def python_join( + rdd: pyspark.rdd.RDD[Tuple[K, V]], + other: pyspark.rdd.RDD[Tuple[K, U]], + numPartitions: int, +) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ... +def python_right_outer_join( + rdd: pyspark.rdd.RDD[Tuple[K, V]], + other: pyspark.rdd.RDD[Tuple[K, U]], + numPartitions: int, +) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ... +def python_left_outer_join( + rdd: pyspark.rdd.RDD[Tuple[K, V]], + other: pyspark.rdd.RDD[Tuple[K, U]], + numPartitions: int, +) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ... +def python_full_outer_join( + rdd: pyspark.rdd.RDD[Tuple[K, V]], + other: pyspark.rdd.RDD[Tuple[K, U]], + numPartitions: int, +) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... +def python_cogroup( + rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int +) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ... diff --git a/python/pyspark/ml/__init__.pyi b/python/pyspark/ml/__init__.pyi new file mode 100644 index 0000000000000..8e3b8a5daeb08 --- /dev/null +++ b/python/pyspark/ml/__init__.pyi @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.ml import ( # noqa: F401 + classification as classification, + clustering as clustering, + evaluation as evaluation, + feature as feature, + fpm as fpm, + image as image, + linalg as linalg, + param as param, + recommendation as recommendation, + regression as regression, + stat as stat, + tuning as tuning, + util as util, +) +from pyspark.ml.base import ( # noqa: F401 + Estimator as Estimator, + Model as Model, + PredictionModel as PredictionModel, + Predictor as Predictor, + Transformer as Transformer, + UnaryTransformer as UnaryTransformer, +) +from pyspark.ml.pipeline import ( # noqa: F401 + Pipeline as Pipeline, + PipelineModel as PipelineModel, +) diff --git a/python/pyspark/ml/_typing.pyi b/python/pyspark/ml/_typing.pyi new file mode 100644 index 0000000000000..d966a787c0fca --- /dev/null +++ b/python/pyspark/ml/_typing.pyi @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, TypeVar, Union +from typing_extensions import Literal + +import pyspark.ml.base +import pyspark.ml.param +import pyspark.ml.util +import pyspark.ml.wrapper + +ParamMap = Dict[pyspark.ml.param.Param, Any] +PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer] + +T = TypeVar("T") +P = TypeVar("P", bound=pyspark.ml.param.Params) +M = TypeVar("M", bound=pyspark.ml.base.Transformer) +JM = TypeVar("JM", bound=pyspark.ml.wrapper.JavaTransformer) + +BinaryClassificationEvaluatorMetricType = Union[ + Literal["areaUnderROC"], Literal["areaUnderPR"] +] +RegressionEvaluatorMetricType = Union[ + Literal["rmse"], Literal["mse"], Literal["r2"], Literal["mae"], Literal["var"] +] +MulticlassClassificationEvaluatorMetricType = Union[ + Literal["f1"], + Literal["accuracy"], + Literal["weightedPrecision"], + Literal["weightedRecall"], + Literal["weightedTruePositiveRate"], + Literal["weightedFalsePositiveRate"], + Literal["weightedFMeasure"], + Literal["truePositiveRateByLabel"], + Literal["falsePositiveRateByLabel"], + Literal["precisionByLabel"], + Literal["recallByLabel"], + Literal["fMeasureByLabel"], +] +MultilabelClassificationEvaluatorMetricType = Union[ + Literal["subsetAccuracy"], + Literal["accuracy"], + Literal["hammingLoss"], + Literal["precision"], + Literal["recall"], + Literal["f1Measure"], + Literal["precisionByLabel"], + Literal["recallByLabel"], + Literal["f1MeasureByLabel"], + Literal["microPrecision"], + Literal["microRecall"], + Literal["microF1Measure"], +] +ClusteringEvaluatorMetricType = Union[Literal["silhouette"]] +RankingEvaluatorMetricType = Union[ + Literal["meanAveragePrecision"], + Literal["meanAveragePrecisionAtK"], + Literal["precisionAtK"], + Literal["ndcgAtK"], + Literal["recallAtK"], +] diff --git a/python/pyspark/ml/base.pyi b/python/pyspark/ml/base.pyi new file mode 100644 index 0000000000000..7fd8c3b70b672 --- /dev/null +++ b/python/pyspark/ml/base.pyi @@ -0,0 +1,103 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import ( + Callable, + Generic, + Iterable, + List, + Optional, + Tuple, +) +from pyspark.ml._typing import M, P, T, ParamMap + +import _thread + +import abc +from abc import abstractmethod +from pyspark import since as since # noqa: F401 +from pyspark.ml.common import inherit_doc as inherit_doc # noqa: F401 +from pyspark.ml.param.shared import ( + HasFeaturesCol as HasFeaturesCol, + HasInputCol as HasInputCol, + HasLabelCol as HasLabelCol, + HasOutputCol as HasOutputCol, + HasPredictionCol as HasPredictionCol, + Params as Params, +) +from pyspark.sql.functions import udf as udf # noqa: F401 +from pyspark.sql.types import ( # noqa: F401 + DataType, + StructField as StructField, + StructType as StructType, +) + +from pyspark.sql.dataframe import DataFrame + +class _FitMultipleIterator: + fitSingleModel: Callable[[int], Transformer] + numModel: int + counter: int = ... + lock: _thread.LockType + def __init__( + self, fitSingleModel: Callable[[int], Transformer], numModels: int + ) -> None: ... + def __iter__(self) -> _FitMultipleIterator: ... + def __next__(self) -> Tuple[int, Transformer]: ... + def next(self) -> Tuple[int, Transformer]: ... + +class Estimator(Generic[M], Params, metaclass=abc.ABCMeta): + @overload + def fit(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> M: ... + @overload + def fit(self, dataset: DataFrame, params: List[ParamMap]) -> List[M]: ... + def fitMultiple( + self, dataset: DataFrame, params: List[ParamMap] + ) -> Iterable[Tuple[int, M]]: ... + +class Transformer(Params, metaclass=abc.ABCMeta): + def transform( + self, dataset: DataFrame, params: Optional[ParamMap] = ... + ) -> DataFrame: ... + +class Model(Transformer, metaclass=abc.ABCMeta): ... + +class UnaryTransformer(HasInputCol, HasOutputCol, Transformer, metaclass=abc.ABCMeta): + def createTransformFunc(self) -> Callable: ... + def outputDataType(self) -> DataType: ... + def validateInputType(self, inputType: DataType) -> None: ... + def transformSchema(self, schema: StructType) -> StructType: ... + def setInputCol(self: M, value: str) -> M: ... + def setOutputCol(self: M, value: str) -> M: ... + +class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol): ... + +class Predictor(Estimator[M], _PredictorParams, metaclass=abc.ABCMeta): + def setLabelCol(self: P, value: str) -> P: ... + def setFeaturesCol(self: P, value: str) -> P: ... + def setPredictionCol(self: P, value: str) -> P: ... + +class PredictionModel(Generic[T], Model, _PredictorParams, metaclass=abc.ABCMeta): + def setFeaturesCol(self: M, value: str) -> M: ... + def setPredictionCol(self: M, value: str) -> M: ... + @property + @abc.abstractmethod + def numFeatures(self) -> int: ... + @abstractmethod + def predict(self, value: T) -> float: ... diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi new file mode 100644 index 0000000000000..55afc20a54cb9 --- /dev/null +++ b/python/pyspark/ml/classification.pyi @@ -0,0 +1,922 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List, Optional +from pyspark.ml._typing import JM, M, P, T, ParamMap + +import abc +from abc import abstractmethod +from pyspark.ml import Estimator, Model, PredictionModel, Predictor, Transformer +from pyspark.ml.base import _PredictorParams +from pyspark.ml.param.shared import ( + HasAggregationDepth, + HasBlockSize, + HasElasticNetParam, + HasFitIntercept, + HasMaxIter, + HasParallelism, + HasProbabilityCol, + HasRawPredictionCol, + HasRegParam, + HasSeed, + HasSolver, + HasStandardization, + HasStepSize, + HasThreshold, + HasThresholds, + HasTol, + HasWeightCol, +) +from pyspark.ml.regression import _FactorizationMachinesParams +from pyspark.ml.tree import ( + _DecisionTreeModel, + _DecisionTreeParams, + _GBTParams, + _HasVarianceImpurity, + _RandomForestParams, + _TreeClassifierParams, + _TreeEnsembleModel, +) +from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable +from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper + +from pyspark.ml.linalg import Matrix, Vector +from pyspark.ml.param import Param +from pyspark.ml.regression import DecisionTreeRegressionModel +from pyspark.sql.dataframe import DataFrame + +class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ... + +class Classifier(Predictor, _ClassifierParams, metaclass=abc.ABCMeta): + def setRawPredictionCol(self: P, value: str) -> P: ... + +class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=abc.ABCMeta): + def setRawPredictionCol(self: P, value: str) -> P: ... + @property + @abc.abstractmethod + def numClasses(self) -> int: ... + @abstractmethod + def predictRaw(self, value: Vector) -> Vector: ... + +class _ProbabilisticClassifierParams( + HasProbabilityCol, HasThresholds, _ClassifierParams +): ... + +class ProbabilisticClassifier( + Classifier, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta +): + def setProbabilityCol(self: P, value: str) -> P: ... + def setThresholds(self: P, value: List[float]) -> P: ... + +class ProbabilisticClassificationModel( + ClassificationModel, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta +): + def setProbabilityCol(self: M, value: str) -> M: ... + def setThresholds(self: M, value: List[float]) -> M: ... + @abstractmethod + def predictProbability(self, value: Vector) -> Vector: ... + +class _JavaClassifier(Classifier, JavaPredictor[JM], metaclass=abc.ABCMeta): + def setRawPredictionCol(self: P, value: str) -> P: ... + +class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]): + @property + def numClasses(self) -> int: ... + def predictRaw(self, value: Vector) -> Vector: ... + +class _JavaProbabilisticClassifier( + ProbabilisticClassifier, _JavaClassifier[JM], metaclass=abc.ABCMeta +): ... + +class _JavaProbabilisticClassificationModel( + ProbabilisticClassificationModel, _JavaClassificationModel[T] +): + def predictProbability(self, value: Any): ... + +class _ClassificationSummary(JavaWrapper): + @property + def predictions(self) -> DataFrame: ... + @property + def predictionCol(self) -> str: ... + @property + def labelCol(self) -> str: ... + @property + def weightCol(self) -> str: ... + @property + def labels(self) -> List[str]: ... + @property + def truePositiveRateByLabel(self) -> List[float]: ... + @property + def falsePositiveRateByLabel(self) -> List[float]: ... + @property + def precisionByLabel(self) -> List[float]: ... + @property + def recallByLabel(self) -> List[float]: ... + def fMeasureByLabel(self, beta: float = ...) -> List[float]: ... + @property + def accuracy(self) -> float: ... + @property + def weightedTruePositiveRate(self) -> float: ... + @property + def weightedFalsePositiveRate(self) -> float: ... + @property + def weightedRecall(self) -> float: ... + @property + def weightedPrecision(self) -> float: ... + def weightedFMeasure(self, beta: float = ...) -> float: ... + +class _TrainingSummary(JavaWrapper): + @property + def objectiveHistory(self) -> List[float]: ... + @property + def totalIterations(self) -> int: ... + +class _BinaryClassificationSummary(_ClassificationSummary): + @property + def scoreCol(self) -> str: ... + @property + def roc(self) -> DataFrame: ... + @property + def areaUnderROC(self) -> float: ... + @property + def pr(self) -> DataFrame: ... + @property + def fMeasureByThreshold(self) -> DataFrame: ... + @property + def precisionByThreshold(self) -> DataFrame: ... + @property + def recallByThreshold(self) -> DataFrame: ... + +class _LinearSVCParams( + _ClassifierParams, + HasRegParam, + HasMaxIter, + HasFitIntercept, + HasTol, + HasStandardization, + HasWeightCol, + HasAggregationDepth, + HasThreshold, + HasBlockSize, +): + threshold: Param[float] + def __init__(self, *args: Any) -> None: ... + +class LinearSVC( + _JavaClassifier[LinearSVCModel], + _LinearSVCParams, + JavaMLWritable, + JavaMLReadable[LinearSVC], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + tol: float = ..., + rawPredictionCol: str = ..., + fitIntercept: bool = ..., + standardization: bool = ..., + threshold: float = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + blockSize: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + tol: float = ..., + rawPredictionCol: str = ..., + fitIntercept: bool = ..., + standardization: bool = ..., + threshold: float = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + blockSize: int = ... + ) -> LinearSVC: ... + def setMaxIter(self, value: int) -> LinearSVC: ... + def setRegParam(self, value: float) -> LinearSVC: ... + def setTol(self, value: float) -> LinearSVC: ... + def setFitIntercept(self, value: bool) -> LinearSVC: ... + def setStandardization(self, value: bool) -> LinearSVC: ... + def setThreshold(self, value: float) -> LinearSVC: ... + def setWeightCol(self, value: str) -> LinearSVC: ... + def setAggregationDepth(self, value: int) -> LinearSVC: ... + def setBlockSize(self, value: int) -> LinearSVC: ... + +class LinearSVCModel( + _JavaClassificationModel[Vector], + _LinearSVCParams, + JavaMLWritable, + JavaMLReadable[LinearSVCModel], + HasTrainingSummary[LinearSVCTrainingSummary], +): + def setThreshold(self, value: float) -> LinearSVCModel: ... + @property + def coefficients(self) -> Vector: ... + @property + def intercept(self) -> float: ... + def summary(self) -> LinearSVCTrainingSummary: ... + def evaluate(self, dataset: DataFrame) -> LinearSVCSummary: ... + +class LinearSVCSummary(_BinaryClassificationSummary): ... +class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary): ... + +class _LogisticRegressionParams( + _ProbabilisticClassifierParams, + HasRegParam, + HasElasticNetParam, + HasMaxIter, + HasFitIntercept, + HasTol, + HasStandardization, + HasWeightCol, + HasAggregationDepth, + HasThreshold, + HasBlockSize, +): + threshold: Param[float] + family: Param[str] + lowerBoundsOnCoefficients: Param[Matrix] + upperBoundsOnCoefficients: Param[Matrix] + lowerBoundsOnIntercepts: Param[Vector] + upperBoundsOnIntercepts: Param[Vector] + def __init__(self, *args: Any): ... + def setThreshold(self: P, value: float) -> P: ... + def getThreshold(self) -> float: ... + def setThresholds(self: P, value: List[float]) -> P: ... + def getThresholds(self) -> List[float]: ... + def getFamily(self) -> str: ... + def getLowerBoundsOnCoefficients(self) -> Matrix: ... + def getUpperBoundsOnCoefficients(self) -> Matrix: ... + def getLowerBoundsOnIntercepts(self) -> Vector: ... + def getUpperBoundsOnIntercepts(self) -> Vector: ... + +class LogisticRegression( + _JavaProbabilisticClassifier[LogisticRegressionModel], + _LogisticRegressionParams, + JavaMLWritable, + JavaMLReadable[LogisticRegression], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + threshold: float = ..., + thresholds: Optional[List[float]] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + blockSize: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + threshold: float = ..., + thresholds: Optional[List[float]] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + blockSize: int = ... + ) -> LogisticRegression: ... + def setFamily(self, value: str) -> LogisticRegression: ... + def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ... + def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ... + def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... + def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... + def setMaxIter(self, value: int) -> LogisticRegression: ... + def setRegParam(self, value: float) -> LogisticRegression: ... + def setTol(self, value: float) -> LogisticRegression: ... + def setElasticNetParam(self, value: float) -> LogisticRegression: ... + def setFitIntercept(self, value: bool) -> LogisticRegression: ... + def setStandardization(self, value: bool) -> LogisticRegression: ... + def setWeightCol(self, value: str) -> LogisticRegression: ... + def setAggregationDepth(self, value: int) -> LogisticRegression: ... + def setBlockSize(self, value: int) -> LogisticRegression: ... + +class LogisticRegressionModel( + _JavaProbabilisticClassificationModel[Vector], + _LogisticRegressionParams, + JavaMLWritable, + JavaMLReadable[LogisticRegressionModel], + HasTrainingSummary[LogisticRegressionTrainingSummary], +): + @property + def coefficients(self) -> Vector: ... + @property + def intercept(self) -> float: ... + @property + def coefficientMatrix(self) -> Matrix: ... + @property + def interceptVector(self) -> Vector: ... + @property + def summary(self) -> LogisticRegressionTrainingSummary: ... + def evaluate(self, dataset: DataFrame) -> LogisticRegressionSummary: ... + +class LogisticRegressionSummary(_ClassificationSummary): + @property + def probabilityCol(self) -> str: ... + @property + def featuresCol(self) -> str: ... + +class LogisticRegressionTrainingSummary( + LogisticRegressionSummary, _TrainingSummary +): ... +class BinaryLogisticRegressionSummary( + _BinaryClassificationSummary, LogisticRegressionSummary +): ... +class BinaryLogisticRegressionTrainingSummary( + BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary +): ... + +class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): + def __init__(self, *args: Any): ... + +class DecisionTreeClassifier( + _JavaProbabilisticClassifier[DecisionTreeClassificationModel], + _DecisionTreeClassifierParams, + JavaMLWritable, + JavaMLReadable[DecisionTreeClassifier], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + seed: Optional[int] = ..., + weightCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + seed: Optional[int] = ..., + weightCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ... + ) -> DecisionTreeClassifier: ... + def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ... + def setMaxBins(self, value: int) -> DecisionTreeClassifier: ... + def setMinInstancesPerNode(self, value: int) -> DecisionTreeClassifier: ... + def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeClassifier: ... + def setMinInfoGain(self, value: float) -> DecisionTreeClassifier: ... + def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ... + def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ... + def setImpurity(self, value: str) -> DecisionTreeClassifier: ... + def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ... + def setSeed(self, value: int) -> DecisionTreeClassifier: ... + def setWeightCol(self, value: str) -> DecisionTreeClassifier: ... + +class DecisionTreeClassificationModel( + _DecisionTreeModel, + _JavaProbabilisticClassificationModel[Vector], + _DecisionTreeClassifierParams, + JavaMLWritable, + JavaMLReadable[DecisionTreeClassificationModel], +): + @property + def featureImportances(self) -> Vector: ... + +class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): + def __init__(self, *args: Any): ... + +class RandomForestClassifier( + _JavaProbabilisticClassifier[RandomForestClassificationModel], + _RandomForestClassifierParams, + JavaMLWritable, + JavaMLReadable[RandomForestClassifier], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + numTrees: int = ..., + featureSubsetStrategy: str = ..., + seed: Optional[int] = ..., + subsamplingRate: float = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ..., + bootstrap: Optional[bool] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + seed: Optional[int] = ..., + impurity: str = ..., + numTrees: int = ..., + featureSubsetStrategy: str = ..., + subsamplingRate: float = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ..., + bootstrap: Optional[bool] = ... + ) -> RandomForestClassifier: ... + def setMaxDepth(self, value: int) -> RandomForestClassifier: ... + def setMaxBins(self, value: int) -> RandomForestClassifier: ... + def setMinInstancesPerNode(self, value: int) -> RandomForestClassifier: ... + def setMinInfoGain(self, value: float) -> RandomForestClassifier: ... + def setMaxMemoryInMB(self, value: int) -> RandomForestClassifier: ... + def setCacheNodeIds(self, value: bool) -> RandomForestClassifier: ... + def setImpurity(self, value: str) -> RandomForestClassifier: ... + def setNumTrees(self, value: int) -> RandomForestClassifier: ... + def setBootstrap(self, value: bool) -> RandomForestClassifier: ... + def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... + def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... + def setSeed(self, value: int) -> RandomForestClassifier: ... + def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ... + def setWeightCol(self, value: str) -> RandomForestClassifier: ... + def setMinWeightFractionPerNode(self, value: float) -> RandomForestClassifier: ... + +class RandomForestClassificationModel( + _TreeEnsembleModel, + _JavaProbabilisticClassificationModel[Vector], + _RandomForestClassifierParams, + JavaMLWritable, + JavaMLReadable[RandomForestClassificationModel], + HasTrainingSummary[RandomForestClassificationTrainingSummary], +): + @property + def featureImportances(self) -> Vector: ... + @property + def trees(self) -> List[DecisionTreeClassificationModel]: ... + def summary(self) -> RandomForestClassificationTrainingSummary: ... + def evaluate(self, dataset) -> RandomForestClassificationSummary: ... + +class RandomForestClassificationSummary(_ClassificationSummary): ... +class RandomForestClassificationTrainingSummary( + RandomForestClassificationSummary, _TrainingSummary +): ... +class BinaryRandomForestClassificationSummary(_BinaryClassificationSummary): ... +class BinaryRandomForestClassificationTrainingSummary( + BinaryRandomForestClassificationSummary, RandomForestClassificationTrainingSummary +): ... + +class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): + supportedLossTypes: List[str] + lossType: Param[str] + def __init__(self, *args: Any): ... + def getLossType(self) -> str: ... + +class GBTClassifier( + _JavaProbabilisticClassifier[GBTClassificationModel], + _GBTClassifierParams, + JavaMLWritable, + JavaMLReadable[GBTClassifier], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + lossType: str = ..., + maxIter: int = ..., + stepSize: float = ..., + seed: Optional[int] = ..., + subsamplingRate: float = ..., + featureSubsetStrategy: str = ..., + validationTol: float = ..., + validationIndicatorCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + lossType: str = ..., + maxIter: int = ..., + stepSize: float = ..., + seed: Optional[int] = ..., + subsamplingRate: float = ..., + featureSubsetStrategy: str = ..., + validationTol: float = ..., + validationIndicatorCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ... + ) -> GBTClassifier: ... + def setMaxDepth(self, value: int) -> GBTClassifier: ... + def setMaxBins(self, value: int) -> GBTClassifier: ... + def setMinInstancesPerNode(self, value: int) -> GBTClassifier: ... + def setMinInfoGain(self, value: float) -> GBTClassifier: ... + def setMaxMemoryInMB(self, value: int) -> GBTClassifier: ... + def setCacheNodeIds(self, value: bool) -> GBTClassifier: ... + def setImpurity(self, value: str) -> GBTClassifier: ... + def setLossType(self, value: str) -> GBTClassifier: ... + def setSubsamplingRate(self, value: float) -> GBTClassifier: ... + def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ... + def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ... + def setMaxIter(self, value: int) -> GBTClassifier: ... + def setCheckpointInterval(self, value: int) -> GBTClassifier: ... + def setSeed(self, value: int) -> GBTClassifier: ... + def setStepSize(self, value: float) -> GBTClassifier: ... + def setWeightCol(self, value: str) -> GBTClassifier: ... + def setMinWeightFractionPerNode(self, value: float) -> GBTClassifier: ... + +class GBTClassificationModel( + _TreeEnsembleModel, + _JavaProbabilisticClassificationModel[Vector], + _GBTClassifierParams, + JavaMLWritable, + JavaMLReadable[GBTClassificationModel], +): + @property + def featureImportances(self) -> Vector: ... + @property + def trees(self) -> List[DecisionTreeRegressionModel]: ... + def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: ... + +class _NaiveBayesParams(_PredictorParams, HasWeightCol): + smoothing: Param[float] + modelType: Param[str] + def __init__(self, *args: Any): ... + def getSmoothing(self) -> float: ... + def getModelType(self) -> str: ... + +class NaiveBayes( + _JavaProbabilisticClassifier[NaiveBayesModel], + _NaiveBayesParams, + HasThresholds, + HasWeightCol, + JavaMLWritable, + JavaMLReadable[NaiveBayes], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + smoothing: float = ..., + modelType: str = ..., + thresholds: Optional[List[float]] = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + smoothing: float = ..., + modelType: str = ..., + thresholds: Optional[List[float]] = ..., + weightCol: Optional[str] = ... + ) -> NaiveBayes: ... + def setSmoothing(self, value: float) -> NaiveBayes: ... + def setModelType(self, value: str) -> NaiveBayes: ... + def setWeightCol(self, value: str) -> NaiveBayes: ... + +class NaiveBayesModel( + _JavaProbabilisticClassificationModel[Vector], + _NaiveBayesParams, + JavaMLWritable, + JavaMLReadable[NaiveBayesModel], +): + @property + def pi(self) -> Vector: ... + @property + def theta(self) -> Matrix: ... + @property + def sigma(self) -> Matrix: ... + +class _MultilayerPerceptronParams( + _ProbabilisticClassifierParams, + HasSeed, + HasMaxIter, + HasTol, + HasStepSize, + HasSolver, + HasBlockSize, +): + layers: Param[List[int]] + solver: Param[str] + initialWeights: Param[Vector] + def __init__(self, *args: Any): ... + def getLayers(self) -> List[int]: ... + def getInitialWeights(self) -> Vector: ... + +class MultilayerPerceptronClassifier( + _JavaProbabilisticClassifier[MultilayerPerceptronClassificationModel], + _MultilayerPerceptronParams, + JavaMLWritable, + JavaMLReadable[MultilayerPerceptronClassifier], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + tol: float = ..., + seed: Optional[int] = ..., + layers: Optional[List[int]] = ..., + blockSize: int = ..., + stepSize: float = ..., + solver: str = ..., + initialWeights: Optional[Vector] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + tol: float = ..., + seed: Optional[int] = ..., + layers: Optional[List[int]] = ..., + blockSize: int = ..., + stepSize: float = ..., + solver: str = ..., + initialWeights: Optional[Vector] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ... + ) -> MultilayerPerceptronClassifier: ... + def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ... + def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ... + def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ... + def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ... + def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ... + def setTol(self, value: float) -> MultilayerPerceptronClassifier: ... + def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ... + def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ... + +class MultilayerPerceptronClassificationModel( + _JavaProbabilisticClassificationModel[Vector], + _MultilayerPerceptronParams, + JavaMLWritable, + JavaMLReadable[MultilayerPerceptronClassificationModel], + HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary], +): + @property + def weights(self) -> Vector: ... + def summary(self) -> MultilayerPerceptronClassificationTrainingSummary: ... + def evaluate( + self, dataset: DataFrame + ) -> MultilayerPerceptronClassificationSummary: ... + +class MultilayerPerceptronClassificationSummary(_ClassificationSummary): ... +class MultilayerPerceptronClassificationTrainingSummary( + MultilayerPerceptronClassificationSummary, _TrainingSummary +): ... + +class _OneVsRestParams(_ClassifierParams, HasWeightCol): + classifier: Param[Estimator] + def getClassifier(self) -> Estimator[M]: ... + +class OneVsRest( + Estimator[OneVsRestModel], + _OneVsRestParams, + HasParallelism, + JavaMLReadable[OneVsRest], + JavaMLWritable, +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + rawPredictionCol: str = ..., + classifier: Optional[Estimator[M]] = ..., + weightCol: Optional[str] = ..., + parallelism: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: Optional[str] = ..., + labelCol: Optional[str] = ..., + predictionCol: Optional[str] = ..., + rawPredictionCol: str = ..., + classifier: Optional[Estimator[M]] = ..., + weightCol: Optional[str] = ..., + parallelism: int = ... + ) -> OneVsRest: ... + def setClassifier(self, value: Estimator[M]) -> OneVsRest: ... + def setLabelCol(self, value: str) -> OneVsRest: ... + def setFeaturesCol(self, value: str) -> OneVsRest: ... + def setPredictionCol(self, value: str) -> OneVsRest: ... + def setRawPredictionCol(self, value: str) -> OneVsRest: ... + def setWeightCol(self, value: str) -> OneVsRest: ... + def setParallelism(self, value: int) -> OneVsRest: ... + def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ... + +class OneVsRestModel( + Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable +): + models: List[Transformer] + def __init__(self, models: List[Transformer]) -> None: ... + def setFeaturesCol(self, value: str) -> OneVsRestModel: ... + def setPredictionCol(self, value: str) -> OneVsRestModel: ... + def setRawPredictionCol(self, value: str) -> OneVsRestModel: ... + def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ... + +class FMClassifier( + _JavaProbabilisticClassifier[FMClassificationModel], + _FactorizationMachinesParams, + JavaMLWritable, + JavaMLReadable[FMClassifier], +): + factorSize: Param[int] + fitLinear: Param[bool] + miniBatchFraction: Param[float] + initStd: Param[float] + solver: Param[str] + def __init__( + self, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + factorSize: int = ..., + fitIntercept: bool = ..., + fitLinear: bool = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initStd: float = ..., + maxIter: int = ..., + stepSize: float = ..., + tol: float = ..., + solver: str = ..., + thresholds: Optional[Any] = ..., + seed: Optional[Any] = ..., + ) -> None: ... + def setParams( + self, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + factorSize: int = ..., + fitIntercept: bool = ..., + fitLinear: bool = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initStd: float = ..., + maxIter: int = ..., + stepSize: float = ..., + tol: float = ..., + solver: str = ..., + thresholds: Optional[Any] = ..., + seed: Optional[Any] = ..., + ): ... + def setFactorSize(self, value: int) -> FMClassifier: ... + def setFitLinear(self, value: bool) -> FMClassifier: ... + def setMiniBatchFraction(self, value: float) -> FMClassifier: ... + def setInitStd(self, value: float) -> FMClassifier: ... + def setMaxIter(self, value: int) -> FMClassifier: ... + def setStepSize(self, value: float) -> FMClassifier: ... + def setTol(self, value: float) -> FMClassifier: ... + def setSolver(self, value: str) -> FMClassifier: ... + def setSeed(self, value: int) -> FMClassifier: ... + def setFitIntercept(self, value: bool) -> FMClassifier: ... + def setRegParam(self, value: float) -> FMClassifier: ... + +class FMClassificationModel( + _JavaProbabilisticClassificationModel[Vector], + _FactorizationMachinesParams, + JavaMLWritable, + JavaMLReadable[FMClassificationModel], +): + @property + def intercept(self) -> float: ... + @property + def linear(self) -> Vector: ... + @property + def factors(self) -> Matrix: ... + def summary(self) -> FMClassificationTrainingSummary: ... + def evaluate(self, dataset: DataFrame) -> FMClassificationSummary: ... + +class FMClassificationSummary(_BinaryClassificationSummary): ... +class FMClassificationTrainingSummary(FMClassificationSummary, _TrainingSummary): ... diff --git a/python/pyspark/ml/clustering.pyi b/python/pyspark/ml/clustering.pyi new file mode 100644 index 0000000000000..e2a2d7e888367 --- /dev/null +++ b/python/pyspark/ml/clustering.pyi @@ -0,0 +1,437 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List, Optional + +from pyspark.ml.linalg import Matrix, Vector +from pyspark.ml.util import ( + GeneralJavaMLWritable, + HasTrainingSummary, + JavaMLReadable, + JavaMLWritable, +) +from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper +from pyspark.ml.param.shared import ( + HasAggregationDepth, + HasCheckpointInterval, + HasDistanceMeasure, + HasFeaturesCol, + HasMaxIter, + HasPredictionCol, + HasProbabilityCol, + HasSeed, + HasTol, + HasWeightCol, +) + +from pyspark.ml.param import Param +from pyspark.ml.stat import MultivariateGaussian +from pyspark.sql.dataframe import DataFrame + +from numpy import ndarray # type: ignore[import] + +class ClusteringSummary(JavaWrapper): + @property + def predictionCol(self) -> str: ... + @property + def predictions(self) -> DataFrame: ... + @property + def featuresCol(self) -> str: ... + @property + def k(self) -> int: ... + @property + def cluster(self) -> DataFrame: ... + @property + def clusterSizes(self) -> List[int]: ... + @property + def numIter(self) -> int: ... + +class _GaussianMixtureParams( + HasMaxIter, + HasFeaturesCol, + HasSeed, + HasPredictionCol, + HasProbabilityCol, + HasTol, + HasAggregationDepth, + HasWeightCol, +): + k: Param[int] + def __init__(self, *args: Any): ... + def getK(self) -> int: ... + +class GaussianMixtureModel( + JavaModel, + _GaussianMixtureParams, + JavaMLWritable, + JavaMLReadable[GaussianMixtureModel], + HasTrainingSummary[GaussianMixtureSummary], +): + def setFeaturesCol(self, value: str) -> GaussianMixtureModel: ... + def setPredictionCol(self, value: str) -> GaussianMixtureModel: ... + def setProbabilityCol(self, value: str) -> GaussianMixtureModel: ... + @property + def weights(self) -> List[float]: ... + @property + def gaussians(self) -> List[MultivariateGaussian]: ... + @property + def gaussiansDF(self) -> DataFrame: ... + @property + def summary(self) -> GaussianMixtureSummary: ... + def predict(self, value: Vector) -> int: ... + def predictProbability(self, value: Vector) -> Vector: ... + +class GaussianMixture( + JavaEstimator[GaussianMixtureModel], + _GaussianMixtureParams, + JavaMLWritable, + JavaMLReadable[GaussianMixture], +): + def __init__( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + k: int = ..., + probabilityCol: str = ..., + tol: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + aggregationDepth: int = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + k: int = ..., + probabilityCol: str = ..., + tol: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + aggregationDepth: int = ..., + weightCol: Optional[str] = ... + ) -> GaussianMixture: ... + def setK(self, value: int) -> GaussianMixture: ... + def setMaxIter(self, value: int) -> GaussianMixture: ... + def setFeaturesCol(self, value: str) -> GaussianMixture: ... + def setPredictionCol(self, value: str) -> GaussianMixture: ... + def setProbabilityCol(self, value: str) -> GaussianMixture: ... + def setWeightCol(self, value: str) -> GaussianMixture: ... + def setSeed(self, value: int) -> GaussianMixture: ... + def setTol(self, value: float) -> GaussianMixture: ... + def setAggregationDepth(self, value: int) -> GaussianMixture: ... + +class GaussianMixtureSummary(ClusteringSummary): + @property + def probabilityCol(self) -> str: ... + @property + def probability(self) -> DataFrame: ... + @property + def logLikelihood(self) -> float: ... + +class KMeansSummary(ClusteringSummary): + def trainingCost(self) -> float: ... + +class _KMeansParams( + HasMaxIter, + HasFeaturesCol, + HasSeed, + HasPredictionCol, + HasTol, + HasDistanceMeasure, + HasWeightCol, +): + k: Param[int] + initMode: Param[str] + initSteps: Param[int] + def __init__(self, *args: Any): ... + def getK(self) -> int: ... + def getInitMode(self) -> str: ... + def getInitSteps(self) -> int: ... + +class KMeansModel( + JavaModel, + _KMeansParams, + GeneralJavaMLWritable, + JavaMLReadable[KMeansModel], + HasTrainingSummary[KMeansSummary], +): + def setFeaturesCol(self, value: str) -> KMeansModel: ... + def setPredictionCol(self, value: str) -> KMeansModel: ... + def clusterCenters(self) -> List[ndarray]: ... + @property + def summary(self) -> KMeansSummary: ... + def predict(self, value: Vector) -> int: ... + +class KMeans( + JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLReadable[KMeans] +): + def __init__( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + k: int = ..., + initMode: str = ..., + initSteps: int = ..., + tol: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + k: int = ..., + initMode: str = ..., + initSteps: int = ..., + tol: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> KMeans: ... + def setK(self, value: int) -> KMeans: ... + def setInitMode(self, value: str) -> KMeans: ... + def setInitSteps(self, value: int) -> KMeans: ... + def setDistanceMeasure(self, value: str) -> KMeans: ... + def setMaxIter(self, value: int) -> KMeans: ... + def setFeaturesCol(self, value: str) -> KMeans: ... + def setPredictionCol(self, value: str) -> KMeans: ... + def setSeed(self, value: int) -> KMeans: ... + def setTol(self, value: float) -> KMeans: ... + def setWeightCol(self, value: str) -> KMeans: ... + +class _BisectingKMeansParams( + HasMaxIter, + HasFeaturesCol, + HasSeed, + HasPredictionCol, + HasDistanceMeasure, + HasWeightCol, +): + k: Param[int] + minDivisibleClusterSize: Param[float] + def __init__(self, *args: Any): ... + def getK(self) -> int: ... + def getMinDivisibleClusterSize(self) -> float: ... + +class BisectingKMeansModel( + JavaModel, + _BisectingKMeansParams, + JavaMLWritable, + JavaMLReadable[BisectingKMeansModel], + HasTrainingSummary[BisectingKMeansSummary], +): + def setFeaturesCol(self, value: str) -> BisectingKMeansModel: ... + def setPredictionCol(self, value: str) -> BisectingKMeansModel: ... + def clusterCenters(self) -> List[ndarray]: ... + def computeCost(self, dataset: DataFrame) -> float: ... + @property + def summary(self) -> BisectingKMeansSummary: ... + def predict(self, value: Vector) -> int: ... + +class BisectingKMeans( + JavaEstimator[BisectingKMeansModel], + _BisectingKMeansParams, + JavaMLWritable, + JavaMLReadable[BisectingKMeans], +): + def __init__( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + k: int = ..., + minDivisibleClusterSize: float = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + k: int = ..., + minDivisibleClusterSize: float = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> BisectingKMeans: ... + def setK(self, value: int) -> BisectingKMeans: ... + def setMinDivisibleClusterSize(self, value: float) -> BisectingKMeans: ... + def setDistanceMeasure(self, value: str) -> BisectingKMeans: ... + def setMaxIter(self, value: int) -> BisectingKMeans: ... + def setFeaturesCol(self, value: str) -> BisectingKMeans: ... + def setPredictionCol(self, value: str) -> BisectingKMeans: ... + def setSeed(self, value: int) -> BisectingKMeans: ... + def setWeightCol(self, value: str) -> BisectingKMeans: ... + +class BisectingKMeansSummary(ClusteringSummary): + @property + def trainingCost(self) -> float: ... + +class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval): + k: Param[int] + optimizer: Param[str] + learningOffset: Param[float] + learningDecay: Param[float] + subsamplingRate: Param[float] + optimizeDocConcentration: Param[bool] + docConcentration: Param[List[float]] + topicConcentration: Param[float] + topicDistributionCol: Param[str] + keepLastCheckpoint: Param[bool] + def __init__(self, *args: Any): ... + def setK(self, value: int) -> LDA: ... + def getOptimizer(self) -> str: ... + def getLearningOffset(self) -> float: ... + def getLearningDecay(self) -> float: ... + def getSubsamplingRate(self) -> float: ... + def getOptimizeDocConcentration(self) -> bool: ... + def getDocConcentration(self) -> List[float]: ... + def getTopicConcentration(self) -> float: ... + def getTopicDistributionCol(self) -> str: ... + def getKeepLastCheckpoint(self) -> bool: ... + +class LDAModel(JavaModel, _LDAParams): + def setFeaturesCol(self, value: str) -> LDAModel: ... + def setSeed(self, value: int) -> LDAModel: ... + def setTopicDistributionCol(self, value: str) -> LDAModel: ... + def isDistributed(self) -> bool: ... + def vocabSize(self) -> int: ... + def topicsMatrix(self) -> Matrix: ... + def logLikelihood(self, dataset: DataFrame) -> float: ... + def logPerplexity(self, dataset: DataFrame) -> float: ... + def describeTopics(self, maxTermsPerTopic: int = ...) -> DataFrame: ... + def estimatedDocConcentration(self) -> Vector: ... + +class DistributedLDAModel( + LDAModel, JavaMLReadable[DistributedLDAModel], JavaMLWritable +): + def toLocal(self) -> LDAModel: ... + def trainingLogLikelihood(self) -> float: ... + def logPrior(self) -> float: ... + def getCheckpointFiles(self) -> List[str]: ... + +class LocalLDAModel(LDAModel, JavaMLReadable[LocalLDAModel], JavaMLWritable): ... + +class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritable): + def __init__( + self, + *, + featuresCol: str = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + checkpointInterval: int = ..., + k: int = ..., + optimizer: str = ..., + learningOffset: float = ..., + learningDecay: float = ..., + subsamplingRate: float = ..., + optimizeDocConcentration: bool = ..., + docConcentration: Optional[List[float]] = ..., + topicConcentration: Optional[float] = ..., + topicDistributionCol: str = ..., + keepLastCheckpoint: bool = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + checkpointInterval: int = ..., + k: int = ..., + optimizer: str = ..., + learningOffset: float = ..., + learningDecay: float = ..., + subsamplingRate: float = ..., + optimizeDocConcentration: bool = ..., + docConcentration: Optional[List[float]] = ..., + topicConcentration: Optional[float] = ..., + topicDistributionCol: str = ..., + keepLastCheckpoint: bool = ... + ) -> LDA: ... + def setCheckpointInterval(self, value: int) -> LDA: ... + def setSeed(self, value: int) -> LDA: ... + def setK(self, value: int) -> LDA: ... + def setOptimizer(self, value: str) -> LDA: ... + def setLearningOffset(self, value: float) -> LDA: ... + def setLearningDecay(self, value: float) -> LDA: ... + def setSubsamplingRate(self, value: float) -> LDA: ... + def setOptimizeDocConcentration(self, value: bool) -> LDA: ... + def setDocConcentration(self, value: List[float]) -> LDA: ... + def setTopicConcentration(self, value: float) -> LDA: ... + def setTopicDistributionCol(self, value: str) -> LDA: ... + def setKeepLastCheckpoint(self, value: bool) -> LDA: ... + def setMaxIter(self, value: int) -> LDA: ... + def setFeaturesCol(self, value: str) -> LDA: ... + +class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol): + k: Param[int] + initMode: Param[str] + srcCol: Param[str] + dstCol: Param[str] + def __init__(self, *args: Any): ... + def getK(self) -> int: ... + def getInitMode(self) -> str: ... + def getSrcCol(self) -> str: ... + def getDstCol(self) -> str: ... + +class PowerIterationClustering( + _PowerIterationClusteringParams, + JavaParams, + JavaMLReadable[PowerIterationClustering], + JavaMLWritable, +): + def __init__( + self, + *, + k: int = ..., + maxIter: int = ..., + initMode: str = ..., + srcCol: str = ..., + dstCol: str = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + k: int = ..., + maxIter: int = ..., + initMode: str = ..., + srcCol: str = ..., + dstCol: str = ..., + weightCol: Optional[str] = ... + ) -> PowerIterationClustering: ... + def setK(self, value: int) -> PowerIterationClustering: ... + def setInitMode(self, value: str) -> PowerIterationClustering: ... + def setSrcCol(self, value: str) -> str: ... + def setDstCol(self, value: str) -> PowerIterationClustering: ... + def setMaxIter(self, value: int) -> PowerIterationClustering: ... + def setWeightCol(self, value: str) -> PowerIterationClustering: ... + def assignClusters(self, dataset: DataFrame) -> DataFrame: ... diff --git a/python/pyspark/ml/common.pyi b/python/pyspark/ml/common.pyi new file mode 100644 index 0000000000000..7bf0ed6183d8a --- /dev/null +++ b/python/pyspark/ml/common.pyi @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def callJavaFunc(sc, func, *args): ... +def inherit_doc(cls): ... diff --git a/python/pyspark/ml/evaluation.pyi b/python/pyspark/ml/evaluation.pyi new file mode 100644 index 0000000000000..ea0a9f045cd6a --- /dev/null +++ b/python/pyspark/ml/evaluation.pyi @@ -0,0 +1,281 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import abc +from typing import Optional +from pyspark.ml._typing import ( + ParamMap, + BinaryClassificationEvaluatorMetricType, + ClusteringEvaluatorMetricType, + MulticlassClassificationEvaluatorMetricType, + MultilabelClassificationEvaluatorMetricType, + RankingEvaluatorMetricType, + RegressionEvaluatorMetricType, +) + +from pyspark.ml.wrapper import JavaParams +from pyspark.ml.param import Param, Params +from pyspark.ml.param.shared import ( + HasFeaturesCol, + HasLabelCol, + HasPredictionCol, + HasProbabilityCol, + HasRawPredictionCol, + HasWeightCol, +) +from pyspark.ml.util import JavaMLReadable, JavaMLWritable + +class Evaluator(Params, metaclass=abc.ABCMeta): + def evaluate(self, dataset, params: Optional[ParamMap] = ...) -> float: ... + def isLargerBetter(self) -> bool: ... + +class JavaEvaluator(JavaParams, Evaluator, metaclass=abc.ABCMeta): + def isLargerBetter(self) -> bool: ... + +class BinaryClassificationEvaluator( + JavaEvaluator, + HasLabelCol, + HasRawPredictionCol, + HasWeightCol, + JavaMLReadable[BinaryClassificationEvaluator], + JavaMLWritable, +): + metricName: Param[BinaryClassificationEvaluatorMetricType] + numBins: Param[int] + def __init__( + self, + *, + rawPredictionCol: str = ..., + labelCol: str = ..., + metricName: BinaryClassificationEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + numBins: int = ... + ) -> None: ... + def setMetricName( + self, value: BinaryClassificationEvaluatorMetricType + ) -> BinaryClassificationEvaluator: ... + def getMetricName(self) -> BinaryClassificationEvaluatorMetricType: ... + def setNumBins(self, value: int) -> BinaryClassificationEvaluator: ... + def getNumBins(self) -> int: ... + def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ... + def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ... + def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ... + +def setParams( + self, + *, + rawPredictionCol: str = ..., + labelCol: str = ..., + metricName: BinaryClassificationEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + numBins: int = ... +) -> BinaryClassificationEvaluator: ... + +class RegressionEvaluator( + JavaEvaluator, + HasLabelCol, + HasPredictionCol, + HasWeightCol, + JavaMLReadable[RegressionEvaluator], + JavaMLWritable, +): + metricName: Param[RegressionEvaluatorMetricType] + throughOrigin: Param[bool] + def __init__( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: RegressionEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + throughOrigin: bool = ... + ) -> None: ... + def setMetricName( + self, value: RegressionEvaluatorMetricType + ) -> RegressionEvaluator: ... + def getMetricName(self) -> RegressionEvaluatorMetricType: ... + def setThroughOrigin(self, value: bool) -> RegressionEvaluator: ... + def getThroughOrigin(self) -> bool: ... + def setLabelCol(self, value: str) -> RegressionEvaluator: ... + def setPredictionCol(self, value: str) -> RegressionEvaluator: ... + def setWeightCol(self, value: str) -> RegressionEvaluator: ... + def setParams( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: RegressionEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + throughOrigin: bool = ... + ) -> RegressionEvaluator: ... + +class MulticlassClassificationEvaluator( + JavaEvaluator, + HasLabelCol, + HasPredictionCol, + HasWeightCol, + HasProbabilityCol, + JavaMLReadable[MulticlassClassificationEvaluator], + JavaMLWritable, +): + metricName: Param[MulticlassClassificationEvaluatorMetricType] + metricLabel: Param[float] + beta: Param[float] + eps: Param[float] + def __init__( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: MulticlassClassificationEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + metricLabel: float = ..., + beta: float = ..., + probabilityCol: str = ..., + eps: float = ... + ) -> None: ... + def setMetricName( + self, value: MulticlassClassificationEvaluatorMetricType + ) -> MulticlassClassificationEvaluator: ... + def getMetricName(self) -> MulticlassClassificationEvaluatorMetricType: ... + def setMetricLabel(self, value: float) -> MulticlassClassificationEvaluator: ... + def getMetricLabel(self) -> float: ... + def setBeta(self, value: float) -> MulticlassClassificationEvaluator: ... + def getBeta(self) -> float: ... + def setEps(self, value: float) -> MulticlassClassificationEvaluator: ... + def getEps(self) -> float: ... + def setLabelCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setPredictionCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setProbabilityCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setWeightCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setParams( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: MulticlassClassificationEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + metricLabel: float = ..., + beta: float = ..., + probabilityCol: str = ..., + eps: float = ... + ) -> MulticlassClassificationEvaluator: ... + +class MultilabelClassificationEvaluator( + JavaEvaluator, + HasLabelCol, + HasPredictionCol, + JavaMLReadable[MultilabelClassificationEvaluator], + JavaMLWritable, +): + metricName: Param[MultilabelClassificationEvaluatorMetricType] + metricLabel: Param[float] + def __init__( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: MultilabelClassificationEvaluatorMetricType = ..., + metricLabel: float = ... + ) -> None: ... + def setMetricName( + self, value: MultilabelClassificationEvaluatorMetricType + ) -> MultilabelClassificationEvaluator: ... + def getMetricName(self) -> MultilabelClassificationEvaluatorMetricType: ... + def setMetricLabel(self, value: float) -> MultilabelClassificationEvaluator: ... + def getMetricLabel(self) -> float: ... + def setLabelCol(self, value: str) -> MultilabelClassificationEvaluator: ... + def setPredictionCol(self, value: str) -> MultilabelClassificationEvaluator: ... + def setParams( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: MultilabelClassificationEvaluatorMetricType = ..., + metricLabel: float = ... + ) -> MultilabelClassificationEvaluator: ... + +class ClusteringEvaluator( + JavaEvaluator, + HasPredictionCol, + HasFeaturesCol, + HasWeightCol, + JavaMLReadable[ClusteringEvaluator], + JavaMLWritable, +): + metricName: Param[ClusteringEvaluatorMetricType] + distanceMeasure: Param[str] + def __init__( + self, + *, + predictionCol: str = ..., + featuresCol: str = ..., + metricName: ClusteringEvaluatorMetricType = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + predictionCol: str = ..., + featuresCol: str = ..., + metricName: ClusteringEvaluatorMetricType = ..., + distanceMeasure: str = ..., + weightCol: Optional[str] = ... + ) -> ClusteringEvaluator: ... + def setMetricName( + self, value: ClusteringEvaluatorMetricType + ) -> ClusteringEvaluator: ... + def getMetricName(self) -> ClusteringEvaluatorMetricType: ... + def setDistanceMeasure(self, value: str) -> ClusteringEvaluator: ... + def getDistanceMeasure(self) -> str: ... + def setFeaturesCol(self, value: str) -> ClusteringEvaluator: ... + def setPredictionCol(self, value: str) -> ClusteringEvaluator: ... + def setWeightCol(self, value: str) -> ClusteringEvaluator: ... + +class RankingEvaluator( + JavaEvaluator, + HasLabelCol, + HasPredictionCol, + JavaMLReadable[RankingEvaluator], + JavaMLWritable, +): + metricName: Param[RankingEvaluatorMetricType] + k: Param[int] + def __init__( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: RankingEvaluatorMetricType = ..., + k: int = ... + ) -> None: ... + def setMetricName(self, value: RankingEvaluatorMetricType) -> RankingEvaluator: ... + def getMetricName(self) -> RankingEvaluatorMetricType: ... + def setK(self, value: int) -> RankingEvaluator: ... + def getK(self) -> int: ... + def setLabelCol(self, value: str) -> RankingEvaluator: ... + def setPredictionCol(self, value: str) -> RankingEvaluator: ... + def setParams( + self, + *, + predictionCol: str = ..., + labelCol: str = ..., + metricName: RankingEvaluatorMetricType = ..., + k: int = ... + ) -> RankingEvaluator: ... diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi new file mode 100644 index 0000000000000..f5b12a5b2ffc6 --- /dev/null +++ b/python/pyspark/ml/feature.pyi @@ -0,0 +1,1629 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Dict, Generic, List, Optional, Tuple +from pyspark.ml._typing import JM, P + +from pyspark.ml.param.shared import ( + HasFeaturesCol, + HasHandleInvalid, + HasInputCol, + HasInputCols, + HasLabelCol, + HasMaxIter, + HasNumFeatures, + HasOutputCol, + HasOutputCols, + HasRelativeError, + HasSeed, + HasStepSize, + HasThreshold, + HasThresholds, +) +from pyspark.ml.util import JavaMLReadable, JavaMLWritable +from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer +from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix +from pyspark.sql.dataframe import DataFrame +from pyspark.ml.param import Param + +class Binarizer( + JavaTransformer, + HasThreshold, + HasThresholds, + HasInputCol, + HasOutputCol, + HasInputCols, + HasOutputCols, + JavaMLReadable[Binarizer], + JavaMLWritable, +): + threshold: Param[float] + thresholds: Param[List[float]] + @overload + def __init__( + self, + *, + threshold: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + @overload + def __init__( + self, + *, + thresholds: Optional[List[float]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> None: ... + @overload + def setParams( + self, + *, + threshold: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> Binarizer: ... + @overload + def setParams( + self, + *, + thresholds: Optional[List[float]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> Binarizer: ... + def setThreshold(self, value: float) -> Binarizer: ... + def setThresholds(self, value: List[float]) -> Binarizer: ... + def setInputCol(self, value: str) -> Binarizer: ... + def setInputCols(self, value: List[str]) -> Binarizer: ... + def setOutputCol(self, value: str) -> Binarizer: ... + def setOutputCols(self, value: List[str]) -> Binarizer: ... + +class _LSHParams(HasInputCol, HasOutputCol): + numHashTables: Param[int] + def __init__(self, *args: Any): ... + def getNumHashTables(self) -> int: ... + +class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable): + def setNumHashTables(self: P, value) -> P: ... + def setInputCol(self: P, value) -> P: ... + def setOutputCol(self: P, value) -> P: ... + +class _LSHModel(JavaModel, _LSHParams): + def setInputCol(self: P, value: str) -> P: ... + def setOutputCol(self: P, value: str) -> P: ... + def approxNearestNeighbors( + self, + dataset: DataFrame, + key: Vector, + numNearestNeighbors: int, + distCol: str = ..., + ) -> DataFrame: ... + def approxSimilarityJoin( + self, + datasetA: DataFrame, + datasetB: DataFrame, + threshold: float, + distCol: str = ..., + ) -> DataFrame: ... + +class _BucketedRandomProjectionLSHParams: + bucketLength: Param[float] + def getBucketLength(self) -> float: ... + +class BucketedRandomProjectionLSH( + _LSH[BucketedRandomProjectionLSHModel], + _LSHParams, + HasSeed, + JavaMLReadable[BucketedRandomProjectionLSH], + JavaMLWritable, +): + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + seed: Optional[int] = ..., + numHashTables: int = ..., + bucketLength: Optional[float] = ... + ) -> None: ... + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + seed: Optional[int] = ..., + numHashTables: int = ..., + bucketLength: Optional[float] = ... + ) -> BucketedRandomProjectionLSH: ... + def setBucketLength(self, value: float) -> BucketedRandomProjectionLSH: ... + def setSeed(self, value: int) -> BucketedRandomProjectionLSH: ... + +class BucketedRandomProjectionLSHModel( + _LSHModel, + _BucketedRandomProjectionLSHParams, + JavaMLReadable[BucketedRandomProjectionLSHModel], + JavaMLWritable, +): ... + +class Bucketizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + HasInputCols, + HasOutputCols, + HasHandleInvalid, + JavaMLReadable[Bucketizer], + JavaMLWritable, +): + splits: Param[List[float]] + handleInvalid: Param[str] + splitsArray: Param[List[List[float]]] + @overload + def __init__( + self, + *, + splits: Optional[List[float]] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> None: ... + @overload + def __init__( + self, + *, + handleInvalid: str = ..., + splitsArray: Optional[List[List[float]]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> None: ... + @overload + def setParams( + self, + *, + splits: Optional[List[float]] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> Bucketizer: ... + @overload + def setParams( + self, + *, + handleInvalid: str = ..., + splitsArray: Optional[List[List[float]]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> Bucketizer: ... + def setSplits(self, value: List[float]) -> Bucketizer: ... + def getSplits(self) -> List[float]: ... + def setSplitsArray(self, value: List[List[float]]) -> Bucketizer: ... + def getSplitsArray(self) -> List[List[float]]: ... + def setInputCol(self, value: str) -> Bucketizer: ... + def setInputCols(self, value: List[str]) -> Bucketizer: ... + def setOutputCol(self, value: str) -> Bucketizer: ... + def setOutputCols(self, value: List[str]) -> Bucketizer: ... + def setHandleInvalid(self, value: str) -> Bucketizer: ... + +class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): + minTF: Param[float] + minDF: Param[float] + maxDF: Param[float] + vocabSize: Param[int] + binary: Param[bool] + def __init__(self, *args: Any) -> None: ... + def getMinTF(self) -> float: ... + def getMinDF(self) -> float: ... + def getMaxDF(self) -> float: ... + def getVocabSize(self) -> int: ... + def getBinary(self) -> bool: ... + +class CountVectorizer( + JavaEstimator[CountVectorizerModel], + _CountVectorizerParams, + JavaMLReadable[CountVectorizer], + JavaMLWritable, +): + def __init__( + self, + *, + minTF: float = ..., + minDF: float = ..., + maxDF: float = ..., + vocabSize: int = ..., + binary: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + minTF: float = ..., + minDF: float = ..., + maxDF: float = ..., + vocabSize: int = ..., + binary: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> CountVectorizer: ... + def setMinTF(self, value: float) -> CountVectorizer: ... + def setMinDF(self, value: float) -> CountVectorizer: ... + def setMaxDF(self, value: float) -> CountVectorizer: ... + def setVocabSize(self, value: int) -> CountVectorizer: ... + def setBinary(self, value: bool) -> CountVectorizer: ... + def setInputCol(self, value: str) -> CountVectorizer: ... + def setOutputCol(self, value: str) -> CountVectorizer: ... + +class CountVectorizerModel( + JavaModel, JavaMLReadable[CountVectorizerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> CountVectorizerModel: ... + def setOutputCol(self, value: str) -> CountVectorizerModel: ... + def setMinTF(self, value: float) -> CountVectorizerModel: ... + def setBinary(self, value: bool) -> CountVectorizerModel: ... + @classmethod + def from_vocabulary( + cls, + vocabulary: List[str], + inputCol: str, + outputCol: Optional[str] = ..., + minTF: Optional[float] = ..., + binary: Optional[bool] = ..., + ) -> CountVectorizerModel: ... + @property + def vocabulary(self) -> List[str]: ... + +class DCT( + JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[DCT], JavaMLWritable +): + inverse: Param[bool] + def __init__( + self, + *, + inverse: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + inverse: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> DCT: ... + def setInverse(self, value: bool) -> DCT: ... + def getInverse(self) -> bool: ... + def setInputCol(self, value: str) -> DCT: ... + def setOutputCol(self, value: str) -> DCT: ... + +class ElementwiseProduct( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[ElementwiseProduct], + JavaMLWritable, +): + scalingVec: Param[Vector] + def __init__( + self, + *, + scalingVec: Optional[Vector] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + scalingVec: Optional[Vector] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> ElementwiseProduct: ... + def setScalingVec(self, value: Vector) -> ElementwiseProduct: ... + def getScalingVec(self) -> Vector: ... + def setInputCol(self, value: str) -> ElementwiseProduct: ... + def setOutputCol(self, value: str) -> ElementwiseProduct: ... + +class FeatureHasher( + JavaTransformer, + HasInputCols, + HasOutputCol, + HasNumFeatures, + JavaMLReadable[FeatureHasher], + JavaMLWritable, +): + categoricalCols: Param[List[str]] + def __init__( + self, + *, + numFeatures: int = ..., + inputCols: Optional[List[str]] = ..., + outputCol: Optional[str] = ..., + categoricalCols: Optional[List[str]] = ... + ) -> None: ... + def setParams( + self, + *, + numFeatures: int = ..., + inputCols: Optional[List[str]] = ..., + outputCol: Optional[str] = ..., + categoricalCols: Optional[List[str]] = ... + ) -> FeatureHasher: ... + def setCategoricalCols(self, value: List[str]) -> FeatureHasher: ... + def getCategoricalCols(self) -> List[str]: ... + def setInputCols(self, value: List[str]) -> FeatureHasher: ... + def setOutputCol(self, value: str) -> FeatureHasher: ... + def setNumFeatures(self, value: int) -> FeatureHasher: ... + +class HashingTF( + JavaTransformer, + HasInputCol, + HasOutputCol, + HasNumFeatures, + JavaMLReadable[HashingTF], + JavaMLWritable, +): + binary: Param[bool] + def __init__( + self, + *, + numFeatures: int = ..., + binary: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + numFeatures: int = ..., + binary: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> HashingTF: ... + def setBinary(self, value: bool) -> HashingTF: ... + def getBinary(self) -> bool: ... + def setInputCol(self, value: str) -> HashingTF: ... + def setOutputCol(self, value: str) -> HashingTF: ... + def setNumFeatures(self, value: int) -> HashingTF: ... + def indexOf(self, term: Any) -> int: ... + +class _IDFParams(HasInputCol, HasOutputCol): + minDocFreq: Param[int] + def __init__(self, *args: Any): ... + def getMinDocFreq(self) -> int: ... + +class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable): + def __init__( + self, + *, + minDocFreq: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + minDocFreq: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> IDF: ... + def setMinDocFreq(self, value: int) -> IDF: ... + def setInputCol(self, value: str) -> IDF: ... + def setOutputCol(self, value: str) -> IDF: ... + +class IDFModel(JavaModel, _IDFParams, JavaMLReadable[IDFModel], JavaMLWritable): + def setInputCol(self, value: str) -> IDFModel: ... + def setOutputCol(self, value: str) -> IDFModel: ... + @property + def idf(self) -> Vector: ... + @property + def docFreq(self) -> List[int]: ... + @property + def numDocs(self) -> int: ... + +class _ImputerParams( + HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasRelativeError +): + strategy: Param[str] + missingValue: Param[float] + def getStrategy(self) -> str: ... + def getMissingValue(self) -> float: ... + +class Imputer( + JavaEstimator[ImputerModel], _ImputerParams, JavaMLReadable[Imputer], JavaMLWritable +): + @overload + def __init__( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + relativeError: float = ... + ) -> None: ... + @overload + def __init__( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ... + ) -> None: ... + @overload + def setParams( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + relativeError: float = ... + ) -> Imputer: ... + @overload + def setParams( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCol: Optional[str] = ..., + outputCols: Optional[str] = ..., + relativeError: float = ... + ) -> Imputer: ... + def setStrategy(self, value: str) -> Imputer: ... + def setMissingValue(self, value: float) -> Imputer: ... + def setInputCols(self, value: List[str]) -> Imputer: ... + def setOutputCols(self, value: List[str]) -> Imputer: ... + def setInputCol(self, value: str) -> Imputer: ... + def setOutputCol(self, value: str) -> Imputer: ... + def setRelativeError(self, value: float) -> Imputer: ... + +class ImputerModel( + JavaModel, _ImputerParams, JavaMLReadable[ImputerModel], JavaMLWritable +): + def setInputCols(self, value: List[str]) -> ImputerModel: ... + def setOutputCols(self, value: List[str]) -> ImputerModel: ... + def setInputCol(self, value: str) -> ImputerModel: ... + def setOutputCol(self, value: str) -> ImputerModel: ... + @property + def surrogateDF(self) -> DataFrame: ... + +class Interaction( + JavaTransformer, + HasInputCols, + HasOutputCol, + JavaMLReadable[Interaction], + JavaMLWritable, +): + def __init__( + self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ... + ) -> Interaction: ... + def setInputCols(self, value: List[str]) -> Interaction: ... + def setOutputCol(self, value: str) -> Interaction: ... + +class _MaxAbsScalerParams(HasInputCol, HasOutputCol): ... + +class MaxAbsScaler( + JavaEstimator[MaxAbsScalerModel], + _MaxAbsScalerParams, + JavaMLReadable[MaxAbsScaler], + JavaMLWritable, +): + def __init__( + self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... + ) -> MaxAbsScaler: ... + def setInputCol(self, value: str) -> MaxAbsScaler: ... + def setOutputCol(self, value: str) -> MaxAbsScaler: ... + +class MaxAbsScalerModel( + JavaModel, _MaxAbsScalerParams, JavaMLReadable[MaxAbsScalerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> MaxAbsScalerModel: ... + def setOutputCol(self, value: str) -> MaxAbsScalerModel: ... + @property + def maxAbs(self) -> Vector: ... + +class MinHashLSH( + _LSH[MinHashLSHModel], + HasInputCol, + HasOutputCol, + HasSeed, + JavaMLReadable[MinHashLSH], + JavaMLWritable, +): + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + seed: Optional[int] = ..., + numHashTables: int = ... + ) -> None: ... + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + seed: Optional[int] = ..., + numHashTables: int = ... + ) -> MinHashLSH: ... + def setSeed(self, value: int) -> MinHashLSH: ... + +class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable): ... + +class _MinMaxScalerParams(HasInputCol, HasOutputCol): + min: Param[float] + max: Param[float] + def __init__(self, *args: Any): ... + def getMin(self) -> float: ... + def getMax(self) -> float: ... + +class MinMaxScaler( + JavaEstimator[MinMaxScalerModel], + _MinMaxScalerParams, + JavaMLReadable[MinMaxScaler], + JavaMLWritable, +): + def __init__( + self, + *, + min: float = ..., + max: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + min: float = ..., + max: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> MinMaxScaler: ... + def setMin(self, value: float) -> MinMaxScaler: ... + def setMax(self, value: float) -> MinMaxScaler: ... + def setInputCol(self, value: str) -> MinMaxScaler: ... + def setOutputCol(self, value: str) -> MinMaxScaler: ... + +class MinMaxScalerModel( + JavaModel, _MinMaxScalerParams, JavaMLReadable[MinMaxScalerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> MinMaxScalerModel: ... + def setOutputCol(self, value: str) -> MinMaxScalerModel: ... + def setMin(self, value: float) -> MinMaxScalerModel: ... + def setMax(self, value: float) -> MinMaxScalerModel: ... + @property + def originalMin(self) -> Vector: ... + @property + def originalMax(self) -> Vector: ... + +class NGram( + JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[NGram], JavaMLWritable +): + n: Param[int] + def __init__( + self, + *, + n: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + n: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> NGram: ... + def setN(self, value: int) -> NGram: ... + def getN(self) -> int: ... + def setInputCol(self, value: str) -> NGram: ... + def setOutputCol(self, value: str) -> NGram: ... + +class Normalizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[Normalizer], + JavaMLWritable, +): + p: Param[float] + def __init__( + self, + *, + p: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + p: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> Normalizer: ... + def setP(self, value: float) -> Normalizer: ... + def getP(self) -> float: ... + def setInputCol(self, value: str) -> Normalizer: ... + def setOutputCol(self, value: str) -> Normalizer: ... + +class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid): + handleInvalid: Param[str] + dropLast: Param[bool] + def __init__(self, *args: Any): ... + def getDropLast(self) -> bool: ... + +class OneHotEncoder( + JavaEstimator[OneHotEncoderModel], + _OneHotEncoderParams, + JavaMLReadable[OneHotEncoder], + JavaMLWritable, +): + @overload + def __init__( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + dropLast: bool = ... + ) -> None: ... + @overload + def __init__( + self, + *, + handleInvalid: str = ..., + dropLast: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + @overload + def setParams( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + dropLast: bool = ... + ) -> OneHotEncoder: ... + @overload + def setParams( + self, + *, + handleInvalid: str = ..., + dropLast: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> OneHotEncoder: ... + def setDropLast(self, value: bool) -> OneHotEncoder: ... + def setInputCols(self, value: List[str]) -> OneHotEncoder: ... + def setOutputCols(self, value: List[str]) -> OneHotEncoder: ... + def setHandleInvalid(self, value: str) -> OneHotEncoder: ... + def setInputCol(self, value: str) -> OneHotEncoder: ... + def setOutputCol(self, value: str) -> OneHotEncoder: ... + +class OneHotEncoderModel( + JavaModel, _OneHotEncoderParams, JavaMLReadable[OneHotEncoderModel], JavaMLWritable +): + def setDropLast(self, value: bool) -> OneHotEncoderModel: ... + def setInputCols(self, value: List[str]) -> OneHotEncoderModel: ... + def setOutputCols(self, value: List[str]) -> OneHotEncoderModel: ... + def setInputCol(self, value: str) -> OneHotEncoderModel: ... + def setOutputCol(self, value: str) -> OneHotEncoderModel: ... + def setHandleInvalid(self, value: str) -> OneHotEncoderModel: ... + @property + def categorySizes(self) -> List[int]: ... + +class PolynomialExpansion( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[PolynomialExpansion], + JavaMLWritable, +): + degree: Param[int] + def __init__( + self, + *, + degree: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + degree: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> PolynomialExpansion: ... + def setDegree(self, value: int) -> PolynomialExpansion: ... + def getDegree(self) -> int: ... + def setInputCol(self, value: str) -> PolynomialExpansion: ... + def setOutputCol(self, value: str) -> PolynomialExpansion: ... + +class QuantileDiscretizer( + JavaEstimator[Bucketizer], + HasInputCol, + HasOutputCol, + HasInputCols, + HasOutputCols, + HasHandleInvalid, + HasRelativeError, + JavaMLReadable[QuantileDiscretizer], + JavaMLWritable, +): + numBuckets: Param[int] + handleInvalid: Param[str] + numBucketsArray: Param[List[int]] + @overload + def __init__( + self, + *, + numBuckets: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + handleInvalid: str = ... + ) -> None: ... + @overload + def __init__( + self, + *, + relativeError: float = ..., + handleInvalid: str = ..., + numBucketsArray: Optional[List[int]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> None: ... + @overload + def setParams( + self, + *, + numBuckets: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + handleInvalid: str = ... + ) -> QuantileDiscretizer: ... + @overload + def setParams( + self, + *, + relativeError: float = ..., + handleInvalid: str = ..., + numBucketsArray: Optional[List[int]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> QuantileDiscretizer: ... + def setNumBuckets(self, value: int) -> QuantileDiscretizer: ... + def getNumBuckets(self) -> int: ... + def setNumBucketsArray(self, value: List[int]) -> QuantileDiscretizer: ... + def getNumBucketsArray(self) -> List[int]: ... + def setRelativeError(self, value: float) -> QuantileDiscretizer: ... + def setInputCol(self, value: str) -> QuantileDiscretizer: ... + def setInputCols(self, value: List[str]) -> QuantileDiscretizer: ... + def setOutputCol(self, value: str) -> QuantileDiscretizer: ... + def setOutputCols(self, value: List[str]) -> QuantileDiscretizer: ... + def setHandleInvalid(self, value: str) -> QuantileDiscretizer: ... + +class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError): + lower: Param[float] + upper: Param[float] + withCentering: Param[bool] + withScaling: Param[bool] + def __init__(self, *args: Any): ... + def getLower(self) -> float: ... + def getUpper(self) -> float: ... + def getWithCentering(self) -> bool: ... + def getWithScaling(self) -> bool: ... + +class RobustScaler( + JavaEstimator, _RobustScalerParams, JavaMLReadable[RobustScaler], JavaMLWritable +): + def __init__( + self, + *, + lower: float = ..., + upper: float = ..., + withCentering: bool = ..., + withScaling: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ... + ) -> None: ... + def setParams( + self, + *, + lower: float = ..., + upper: float = ..., + withCentering: bool = ..., + withScaling: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ... + ) -> RobustScaler: ... + def setLower(self, value: float) -> RobustScaler: ... + def setUpper(self, value: float) -> RobustScaler: ... + def setWithCentering(self, value: bool) -> RobustScaler: ... + def setWithScaling(self, value: bool) -> RobustScaler: ... + def setInputCol(self, value: str) -> RobustScaler: ... + def setOutputCol(self, value: str) -> RobustScaler: ... + def setRelativeError(self, value: float) -> RobustScaler: ... + +class RobustScalerModel( + JavaModel, _RobustScalerParams, JavaMLReadable[RobustScalerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> RobustScalerModel: ... + def setOutputCol(self, value: str) -> RobustScalerModel: ... + @property + def median(self) -> Vector: ... + @property + def range(self) -> Vector: ... + +class RegexTokenizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[RegexTokenizer], + JavaMLWritable, +): + minTokenLength: Param[int] + gaps: Param[bool] + pattern: Param[str] + toLowercase: Param[bool] + def __init__( + self, + *, + minTokenLength: int = ..., + gaps: bool = ..., + pattern: str = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + toLowercase: bool = ... + ) -> None: ... + def setParams( + self, + *, + minTokenLength: int = ..., + gaps: bool = ..., + pattern: str = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + toLowercase: bool = ... + ) -> RegexTokenizer: ... + def setMinTokenLength(self, value: int) -> RegexTokenizer: ... + def getMinTokenLength(self) -> int: ... + def setGaps(self, value: bool) -> RegexTokenizer: ... + def getGaps(self) -> bool: ... + def setPattern(self, value: str) -> RegexTokenizer: ... + def getPattern(self) -> str: ... + def setToLowercase(self, value: bool) -> RegexTokenizer: ... + def getToLowercase(self) -> bool: ... + def setInputCol(self, value: str) -> RegexTokenizer: ... + def setOutputCol(self, value: str) -> RegexTokenizer: ... + +class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWritable): + statement: Param[str] + def __init__(self, *, statement: Optional[str] = ...) -> None: ... + def setParams(self, *, statement: Optional[str] = ...) -> SQLTransformer: ... + def setStatement(self, value: str) -> SQLTransformer: ... + def getStatement(self) -> str: ... + +class _StandardScalerParams(HasInputCol, HasOutputCol): + withMean: Param[bool] + withStd: Param[bool] + def __init__(self, *args: Any): ... + def getWithMean(self) -> bool: ... + def getWithStd(self) -> bool: ... + +class StandardScaler( + JavaEstimator[StandardScalerModel], + _StandardScalerParams, + JavaMLReadable[StandardScaler], + JavaMLWritable, +): + def __init__( + self, + *, + withMean: bool = ..., + withStd: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + withMean: bool = ..., + withStd: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> StandardScaler: ... + def setWithMean(self, value: bool) -> StandardScaler: ... + def setWithStd(self, value: bool) -> StandardScaler: ... + def setInputCol(self, value: str) -> StandardScaler: ... + def setOutputCol(self, value: str) -> StandardScaler: ... + +class StandardScalerModel( + JavaModel, + _StandardScalerParams, + JavaMLReadable[StandardScalerModel], + JavaMLWritable, +): + def setInputCol(self, value: str) -> StandardScalerModel: ... + def setOutputCol(self, value: str) -> StandardScalerModel: ... + @property + def std(self) -> Vector: ... + @property + def mean(self) -> Vector: ... + +class _StringIndexerParams( + JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols +): + stringOrderType: Param[str] + handleInvalid: Param[str] + def __init__(self, *args: Any) -> None: ... + def getStringOrderType(self) -> str: ... + +class StringIndexer( + JavaEstimator[StringIndexerModel], + _StringIndexerParams, + JavaMLReadable[StringIndexer], + JavaMLWritable, +): + @overload + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + stringOrderType: str = ... + ) -> None: ... + @overload + def __init__( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + stringOrderType: str = ... + ) -> None: ... + @overload + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + stringOrderType: str = ... + ) -> StringIndexer: ... + @overload + def setParams( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + stringOrderType: str = ... + ) -> StringIndexer: ... + def setStringOrderType(self, value: str) -> StringIndexer: ... + def setInputCol(self, value: str) -> StringIndexer: ... + def setInputCols(self, value: List[str]) -> StringIndexer: ... + def setOutputCol(self, value: str) -> StringIndexer: ... + def setOutputCols(self, value: List[str]) -> StringIndexer: ... + def setHandleInvalid(self, value: str) -> StringIndexer: ... + +class StringIndexerModel( + JavaModel, _StringIndexerParams, JavaMLReadable[StringIndexerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> StringIndexerModel: ... + def setInputCols(self, value: List[str]) -> StringIndexerModel: ... + def setOutputCol(self, value: str) -> StringIndexerModel: ... + def setOutputCols(self, value: List[str]) -> StringIndexerModel: ... + def setHandleInvalid(self, value: str) -> StringIndexerModel: ... + @classmethod + def from_labels( + cls, + labels: List[str], + inputCol: str, + outputCol: Optional[str] = ..., + handleInvalid: Optional[str] = ..., + ) -> StringIndexerModel: ... + @classmethod + def from_arrays_of_labels( + cls, + arrayOfLabels: List[List[str]], + inputCols: List[str], + outputCols: Optional[List[str]] = ..., + handleInvalid: Optional[str] = ..., + ) -> StringIndexerModel: ... + @property + def labels(self) -> List[str]: ... + +class IndexToString( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[IndexToString], + JavaMLWritable, +): + labels: Param[List[str]] + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + labels: Optional[List[str]] = ... + ) -> None: ... + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + labels: Optional[List[str]] = ... + ) -> IndexToString: ... + def setLabels(self, value: List[str]) -> IndexToString: ... + def getLabels(self) -> List[str]: ... + def setInputCol(self, value: str) -> IndexToString: ... + def setOutputCol(self, value: str) -> IndexToString: ... + +class StopWordsRemover( + JavaTransformer, + HasInputCol, + HasOutputCol, + HasInputCols, + HasOutputCols, + JavaMLReadable[StopWordsRemover], + JavaMLWritable, +): + stopWords: Param[List[str]] + caseSensitive: Param[bool] + locale: Param[str] + @overload + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ... + ) -> None: ... + @overload + def __init__( + self, + *, + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> None: ... + @overload + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ... + ) -> StopWordsRemover: ... + @overload + def setParams( + self, + *, + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> StopWordsRemover: ... + def setStopWords(self, value: List[str]) -> StopWordsRemover: ... + def getStopWords(self) -> List[str]: ... + def setCaseSensitive(self, value: bool) -> StopWordsRemover: ... + def getCaseSensitive(self) -> bool: ... + def setLocale(self, value: str) -> StopWordsRemover: ... + def getLocale(self) -> str: ... + def setInputCol(self, value: str) -> StopWordsRemover: ... + def setOutputCol(self, value: str) -> StopWordsRemover: ... + def setInputCols(self, value: List[str]) -> StopWordsRemover: ... + def setOutputCols(self, value: List[str]) -> StopWordsRemover: ... + @staticmethod + def loadDefaultStopWords(language: str) -> List[str]: ... + +class Tokenizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[Tokenizer], + JavaMLWritable, +): + def __init__( + self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... + ) -> Tokenizer: ... + def setInputCol(self, value: str) -> Tokenizer: ... + def setOutputCol(self, value: str) -> Tokenizer: ... + +class VectorAssembler( + JavaTransformer, + HasInputCols, + HasOutputCol, + HasHandleInvalid, + JavaMLReadable[VectorAssembler], + JavaMLWritable, +): + handleInvalid: Param[str] + def __init__( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> None: ... + def setParams( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> VectorAssembler: ... + def setInputCols(self, value: List[str]) -> VectorAssembler: ... + def setOutputCol(self, value: str) -> VectorAssembler: ... + def setHandleInvalid(self, value: str) -> VectorAssembler: ... + +class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): + maxCategories: Param[int] + handleInvalid: Param[str] + def __init__(self, *args: Any): ... + def getMaxCategories(self) -> int: ... + +class VectorIndexer( + JavaEstimator[VectorIndexerModel], + _VectorIndexerParams, + HasHandleInvalid, + JavaMLReadable[VectorIndexer], + JavaMLWritable, +): + def __init__( + self, + *, + maxCategories: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> None: ... + def setParams( + self, + *, + maxCategories: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ... + ) -> VectorIndexer: ... + def setMaxCategories(self, value: int) -> VectorIndexer: ... + def setInputCol(self, value: str) -> VectorIndexer: ... + def setOutputCol(self, value: str) -> VectorIndexer: ... + def setHandleInvalid(self, value: str) -> VectorIndexer: ... + +class VectorIndexerModel( + JavaModel, _VectorIndexerParams, JavaMLReadable[VectorIndexerModel], JavaMLWritable +): + def setInputCol(self, value: str) -> VectorIndexerModel: ... + def setOutputCol(self, value: str) -> VectorIndexerModel: ... + @property + def numFeatures(self) -> int: ... + @property + def categoryMaps(self) -> Dict[int, Tuple[float, int]]: ... + +class VectorSlicer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable[VectorSlicer], + JavaMLWritable, +): + indices: Param[List[int]] + names: Param[List[str]] + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + indices: Optional[List[int]] = ..., + names: Optional[List[str]] = ... + ) -> None: ... + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + indices: Optional[List[int]] = ..., + names: Optional[List[str]] = ... + ) -> VectorSlicer: ... + def setIndices(self, value: List[int]) -> VectorSlicer: ... + def getIndices(self) -> List[int]: ... + def setNames(self, value: List[str]) -> VectorSlicer: ... + def getNames(self) -> List[str]: ... + def setInputCol(self, value: str) -> VectorSlicer: ... + def setOutputCol(self, value: str) -> VectorSlicer: ... + +class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol): + vectorSize: Param[int] + numPartitions: Param[int] + minCount: Param[int] + windowSize: Param[int] + maxSentenceLength: Param[int] + def __init__(self, *args: Any): ... + def getVectorSize(self) -> int: ... + def getNumPartitions(self) -> int: ... + def getMinCount(self) -> int: ... + def getWindowSize(self) -> int: ... + def getMaxSentenceLength(self) -> int: ... + +class Word2Vec( + JavaEstimator[Word2VecModel], + _Word2VecParams, + JavaMLReadable[Word2Vec], + JavaMLWritable, +): + def __init__( + self, + *, + vectorSize: int = ..., + minCount: int = ..., + numPartitions: int = ..., + stepSize: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + windowSize: int = ..., + maxSentenceLength: int = ... + ) -> None: ... + def setParams( + self, + *, + vectorSize: int = ..., + minCount: int = ..., + numPartitions: int = ..., + stepSize: float = ..., + maxIter: int = ..., + seed: Optional[int] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + windowSize: int = ..., + maxSentenceLength: int = ... + ) -> Word2Vec: ... + def setVectorSize(self, value: int) -> Word2Vec: ... + def setNumPartitions(self, value: int) -> Word2Vec: ... + def setMinCount(self, value: int) -> Word2Vec: ... + def setWindowSize(self, value: int) -> Word2Vec: ... + def setMaxSentenceLength(self, value: int) -> Word2Vec: ... + def setMaxIter(self, value: int) -> Word2Vec: ... + def setInputCol(self, value: str) -> Word2Vec: ... + def setOutputCol(self, value: str) -> Word2Vec: ... + def setSeed(self, value: int) -> Word2Vec: ... + def setStepSize(self, value: float) -> Word2Vec: ... + +class Word2VecModel( + JavaModel, _Word2VecParams, JavaMLReadable[Word2VecModel], JavaMLWritable +): + def getVectors(self) -> DataFrame: ... + def setInputCol(self, value: str) -> Word2VecModel: ... + def setOutputCol(self, value: str) -> Word2VecModel: ... + @overload + def findSynonyms(self, word: str, num: int) -> DataFrame: ... + @overload + def findSynonyms(self, word: Vector, num: int) -> DataFrame: ... + @overload + def findSynonymsArray(self, word: str, num: int) -> List[Tuple[str, float]]: ... + @overload + def findSynonymsArray(self, word: Vector, num: int) -> List[Tuple[str, float]]: ... + +class _PCAParams(HasInputCol, HasOutputCol): + k: Param[int] + def getK(self) -> int: ... + +class PCA(JavaEstimator[PCAModel], _PCAParams, JavaMLReadable[PCA], JavaMLWritable): + def __init__( + self, + *, + k: Optional[int] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + k: Optional[int] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> PCA: ... + def setK(self, value: int) -> PCA: ... + def setInputCol(self, value: str) -> PCA: ... + def setOutputCol(self, value: str) -> PCA: ... + +class PCAModel(JavaModel, _PCAParams, JavaMLReadable[PCAModel], JavaMLWritable): + def setInputCol(self, value: str) -> PCAModel: ... + def setOutputCol(self, value: str) -> PCAModel: ... + @property + def pc(self) -> DenseMatrix: ... + @property + def explainedVariance(self) -> DenseVector: ... + +class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): + formula: Param[str] + forceIndexLabel: Param[bool] + stringIndexerOrderType: Param[str] + handleInvalid: Param[str] + def __init__(self, *args: Any): ... + def getFormula(self) -> str: ... + def getForceIndexLabel(self) -> bool: ... + def getStringIndexerOrderType(self) -> str: ... + +class RFormula( + JavaEstimator[RFormulaModel], + _RFormulaParams, + JavaMLReadable[RFormula], + JavaMLWritable, +): + def __init__( + self, + *, + formula: Optional[str] = ..., + featuresCol: str = ..., + labelCol: str = ..., + forceIndexLabel: bool = ..., + stringIndexerOrderType: str = ..., + handleInvalid: str = ... + ) -> None: ... + def setParams( + self, + *, + formula: Optional[str] = ..., + featuresCol: str = ..., + labelCol: str = ..., + forceIndexLabel: bool = ..., + stringIndexerOrderType: str = ..., + handleInvalid: str = ... + ) -> RFormula: ... + def setFormula(self, value: str) -> RFormula: ... + def setForceIndexLabel(self, value: bool) -> RFormula: ... + def setStringIndexerOrderType(self, value: str) -> RFormula: ... + def setFeaturesCol(self, value: str) -> RFormula: ... + def setLabelCol(self, value: str) -> RFormula: ... + def setHandleInvalid(self, value: str) -> RFormula: ... + +class RFormulaModel( + JavaModel, _RFormulaParams, JavaMLReadable[RFormulaModel], JavaMLWritable +): ... + +class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): + selectorType: Param[str] + numTopFeatures: Param[int] + percentile: Param[float] + fpr: Param[float] + fdr: Param[float] + fwe: Param[float] + def __init__(self, *args: Any): ... + def getSelectorType(self) -> str: ... + def getNumTopFeatures(self) -> int: ... + def getPercentile(self) -> float: ... + def getFpr(self) -> float: ... + def getFdr(self) -> float: ... + def getFwe(self) -> float: ... + +class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable): + def setSelectorType(self: P, value: str) -> P: ... + def setNumTopFeatures(self: P, value: int) -> P: ... + def setPercentile(self: P, value: float) -> P: ... + def setFpr(self: P, value: float) -> P: ... + def setFdr(self: P, value: float) -> P: ... + def setFwe(self: P, value: float) -> P: ... + def setFeaturesCol(self: P, value: str) -> P: ... + def setOutputCol(self: P, value: str) -> P: ... + def setLabelCol(self: P, value: str) -> P: ... + +class _SelectorModel(JavaModel, _SelectorParams): + def setFeaturesCol(self: P, value: str) -> P: ... + def setOutputCol(self: P, value: str) -> P: ... + @property + def selectedFeatures(self) -> List[int]: ... + +class ANOVASelector( + _Selector[ANOVASelectorModel], JavaMLReadable[ANOVASelector], JavaMLWritable +): + def __init__( + self, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ..., + ) -> None: ... + def setParams( + self, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ..., + ) -> ANOVASelector: ... + +class ANOVASelectorModel( + _SelectorModel, JavaMLReadable[ANOVASelectorModel], JavaMLWritable +): ... + +class ChiSqSelector( + _Selector[ChiSqSelectorModel], + JavaMLReadable[ChiSqSelector], + JavaMLWritable, +): + def __init__( + self, + *, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ... + ) -> None: ... + def setParams( + self, + *, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ... + ): ... + def setSelectorType(self, value: str) -> ChiSqSelector: ... + def setNumTopFeatures(self, value: int) -> ChiSqSelector: ... + def setPercentile(self, value: float) -> ChiSqSelector: ... + def setFpr(self, value: float) -> ChiSqSelector: ... + def setFdr(self, value: float) -> ChiSqSelector: ... + def setFwe(self, value: float) -> ChiSqSelector: ... + def setFeaturesCol(self, value: str) -> ChiSqSelector: ... + def setOutputCol(self, value: str) -> ChiSqSelector: ... + def setLabelCol(self, value: str) -> ChiSqSelector: ... + +class ChiSqSelectorModel( + _SelectorModel, JavaMLReadable[ChiSqSelectorModel], JavaMLWritable +): + def setFeaturesCol(self, value: str) -> ChiSqSelectorModel: ... + def setOutputCol(self, value: str) -> ChiSqSelectorModel: ... + @property + def selectedFeatures(self) -> List[int]: ... + +class VectorSizeHint( + JavaTransformer, + HasInputCol, + HasHandleInvalid, + JavaMLReadable[VectorSizeHint], + JavaMLWritable, +): + size: Param[int] + handleInvalid: Param[str] + def __init__( + self, + *, + inputCol: Optional[str] = ..., + size: Optional[int] = ..., + handleInvalid: str = ... + ) -> None: ... + def setParams( + self, + *, + inputCol: Optional[str] = ..., + size: Optional[int] = ..., + handleInvalid: str = ... + ) -> VectorSizeHint: ... + def setSize(self, value: int) -> VectorSizeHint: ... + def getSize(self) -> int: ... + def setInputCol(self, value: str) -> VectorSizeHint: ... + def setHandleInvalid(self, value: str) -> VectorSizeHint: ... + +class FValueSelector( + _Selector[FValueSelectorModel], JavaMLReadable[FValueSelector], JavaMLWritable +): + def __init__( + self, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ..., + ) -> None: ... + def setParams( + self, + numTopFeatures: int = ..., + featuresCol: str = ..., + outputCol: Optional[str] = ..., + labelCol: str = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ..., + ) -> FValueSelector: ... + +class FValueSelectorModel( + _SelectorModel, JavaMLReadable[FValueSelectorModel], JavaMLWritable +): ... + +class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): + varianceThreshold: Param[float] = ... + def getVarianceThreshold(self) -> float: ... + +class VarianceThresholdSelector( + JavaEstimator, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable +): + def __init__( + self, + featuresCol: str = ..., + outputCol: Optional[str] = ..., + varianceThreshold: float = ..., + ) -> None: ... + def setParams( + self, + featuresCol: str = ..., + outputCol: Optional[str] = ..., + varianceThreshold: float = ..., + ): ... + def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ... + def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ... + def setOutputCol(self, value: str) -> VarianceThresholdSelector: ... + +class VarianceThresholdSelectorModel( + JavaModel, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable +): + def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ... + def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ... + @property + def selectedFeatures(self) -> List[int]: ... diff --git a/python/pyspark/ml/fpm.pyi b/python/pyspark/ml/fpm.pyi new file mode 100644 index 0000000000000..7cc304a2ffa39 --- /dev/null +++ b/python/pyspark/ml/fpm.pyi @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Optional + +from pyspark.ml.util import JavaMLReadable, JavaMLWritable +from pyspark.ml.wrapper import JavaEstimator, JavaParams, JavaModel +from pyspark.ml.param.shared import HasPredictionCol +from pyspark.sql.dataframe import DataFrame + +from pyspark.ml.param import Param + +class _FPGrowthParams(HasPredictionCol): + itemsCol: Param[str] + minSupport: Param[float] + numPartitions: Param[int] + minConfidence: Param[float] + def __init__(self, *args: Any): ... + def getItemsCol(self) -> str: ... + def getMinSupport(self) -> float: ... + def getNumPartitions(self) -> int: ... + def getMinConfidence(self) -> float: ... + +class FPGrowthModel( + JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable[FPGrowthModel] +): + def setItemsCol(self, value: str) -> FPGrowthModel: ... + def setMinConfidence(self, value: float) -> FPGrowthModel: ... + def setPredictionCol(self, value: str) -> FPGrowthModel: ... + @property + def freqItemsets(self) -> DataFrame: ... + @property + def associationRules(self) -> DataFrame: ... + +class FPGrowth( + JavaEstimator[FPGrowthModel], + _FPGrowthParams, + JavaMLWritable, + JavaMLReadable[FPGrowth], +): + def __init__( + self, + *, + minSupport: float = ..., + minConfidence: float = ..., + itemsCol: str = ..., + predictionCol: str = ..., + numPartitions: Optional[int] = ... + ) -> None: ... + def setParams( + self, + *, + minSupport: float = ..., + minConfidence: float = ..., + itemsCol: str = ..., + predictionCol: str = ..., + numPartitions: Optional[int] = ... + ) -> FPGrowth: ... + def setItemsCol(self, value: str) -> FPGrowth: ... + def setMinSupport(self, value: float) -> FPGrowth: ... + def setNumPartitions(self, value: int) -> FPGrowth: ... + def setMinConfidence(self, value: float) -> FPGrowth: ... + def setPredictionCol(self, value: str) -> FPGrowth: ... + +class PrefixSpan(JavaParams): + minSupport: Param[float] + maxPatternLength: Param[int] + maxLocalProjDBSize: Param[int] + sequenceCol: Param[str] + def __init__( + self, + *, + minSupport: float = ..., + maxPatternLength: int = ..., + maxLocalProjDBSize: int = ..., + sequenceCol: str = ... + ) -> None: ... + def setParams( + self, + *, + minSupport: float = ..., + maxPatternLength: int = ..., + maxLocalProjDBSize: int = ..., + sequenceCol: str = ... + ) -> PrefixSpan: ... + def setMinSupport(self, value: float) -> PrefixSpan: ... + def getMinSupport(self) -> float: ... + def setMaxPatternLength(self, value: int) -> PrefixSpan: ... + def getMaxPatternLength(self) -> int: ... + def setMaxLocalProjDBSize(self, value: int) -> PrefixSpan: ... + def getMaxLocalProjDBSize(self) -> int: ... + def setSequenceCol(self, value: str) -> PrefixSpan: ... + def getSequenceCol(self) -> str: ... + def findFrequentSequentialPatterns(self, dataset: DataFrame) -> DataFrame: ... diff --git a/python/pyspark/ml/functions.pyi b/python/pyspark/ml/functions.pyi new file mode 100644 index 0000000000000..42650e742e781 --- /dev/null +++ b/python/pyspark/ml/functions.pyi @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark import SparkContext as SparkContext, since as since # noqa: F401 +from pyspark.sql.column import Column as Column + +def vector_to_array(col: Column) -> Column: ... diff --git a/python/pyspark/ml/image.pyi b/python/pyspark/ml/image.pyi new file mode 100644 index 0000000000000..9ff3a8817aadd --- /dev/null +++ b/python/pyspark/ml/image.pyi @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict, List + +from pyspark.sql.types import Row, StructType + +from numpy import ndarray # type: ignore[import] + +class _ImageSchema: + def __init__(self) -> None: ... + @property + def imageSchema(self) -> StructType: ... + @property + def ocvTypes(self) -> Dict[str, int]: ... + @property + def columnSchema(self) -> StructType: ... + @property + def imageFields(self) -> List[str]: ... + @property + def undefinedImageType(self) -> str: ... + def toNDArray(self, image: Row) -> ndarray: ... + def toImage(self, array: ndarray, origin: str = ...) -> Row: ... + +ImageSchema: _ImageSchema diff --git a/python/pyspark/ml/linalg/__init__.pyi b/python/pyspark/ml/linalg/__init__.pyi new file mode 100644 index 0000000000000..a576b30aec308 --- /dev/null +++ b/python/pyspark/ml/linalg/__init__.pyi @@ -0,0 +1,255 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union + +from pyspark.ml import linalg as newlinalg # noqa: F401 +from pyspark.sql.types import StructType, UserDefinedType + +from numpy import float64, ndarray # type: ignore[import] + +class VectorUDT(UserDefinedType): + @classmethod + def sqlType(cls) -> StructType: ... + @classmethod + def module(cls) -> str: ... + @classmethod + def scalaUDT(cls) -> str: ... + def serialize( + self, obj: Vector + ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ... + def deserialize(self, datum: Any) -> Vector: ... + def simpleString(self) -> str: ... + +class MatrixUDT(UserDefinedType): + @classmethod + def sqlType(cls) -> StructType: ... + @classmethod + def module(cls) -> str: ... + @classmethod + def scalaUDT(cls) -> str: ... + def serialize( + self, obj + ) -> Tuple[ + int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool + ]: ... + def deserialize(self, datum: Any) -> Matrix: ... + def simpleString(self) -> str: ... + +class Vector: + __UDT__: VectorUDT + def toArray(self) -> ndarray: ... + +class DenseVector(Vector): + array: ndarray + @overload + def __init__(self, *elements: float) -> None: ... + @overload + def __init__(self, __arr: bytes) -> None: ... + @overload + def __init__(self, __arr: Iterable[float]) -> None: ... + @staticmethod + def parse(s) -> DenseVector: ... + def __reduce__(self) -> Tuple[type, bytes]: ... + def numNonzeros(self) -> int: ... + def norm(self, p: Union[float, str]) -> float64: ... + def dot(self, other: Iterable[float]) -> float64: ... + def squared_distance(self, other: Iterable[float]) -> float64: ... + def toArray(self) -> ndarray: ... + @property + def values(self) -> ndarray: ... + def __getitem__(self, item: int) -> float64: ... + def __len__(self) -> int: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + def __hash__(self) -> int: ... + def __getattr__(self, item: str) -> Any: ... + def __neg__(self) -> DenseVector: ... + def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + +class SparseVector(Vector): + size: int + indices: ndarray + values: ndarray + @overload + def __init__(self, size: int, *args: Tuple[int, float]) -> None: ... + @overload + def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ... + @overload + def __init__( + self, size: int, __indices: Iterable[int], __values: Iterable[float] + ) -> None: ... + @overload + def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ... + @overload + def __init__(self, size: int, __map: Dict[int, float]) -> None: ... + def numNonzeros(self) -> int: ... + def norm(self, p: Union[float, str]) -> float64: ... + def __reduce__(self): ... + @staticmethod + def parse(s: str) -> SparseVector: ... + def dot(self, other: Iterable[float]) -> float64: ... + def squared_distance(self, other: Iterable[float]) -> float64: ... + def toArray(self) -> ndarray: ... + def __len__(self) -> int: ... + def __eq__(self, other) -> bool: ... + def __getitem__(self, index: int) -> float64: ... + def __ne__(self, other) -> bool: ... + def __hash__(self) -> int: ... + +class Vectors: + @overload + @staticmethod + def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ... + @overload + @staticmethod + def sparse( + size: int, __indices: Iterable[int], __values: Iterable[float] + ) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ... + @overload + @staticmethod + def dense(self, *elements: float) -> DenseVector: ... + @overload + @staticmethod + def dense(self, __arr: bytes) -> DenseVector: ... + @overload + @staticmethod + def dense(self, __arr: Iterable[float]) -> DenseVector: ... + @staticmethod + def stringify(vector: Vector) -> str: ... + @staticmethod + def squared_distance(v1: Vector, v2: Vector) -> float64: ... + @staticmethod + def norm(vector: Vector, p: Union[float, str]) -> float64: ... + @staticmethod + def parse(s: str) -> Vector: ... + @staticmethod + def zeros(size: int) -> DenseVector: ... + +class Matrix: + __UDT__: MatrixUDT + numRows: int + numCols: int + isTransposed: bool + def __init__( + self, numRows: int, numCols: int, isTransposed: bool = ... + ) -> None: ... + def toArray(self): ... + +class DenseMatrix(Matrix): + values: Any + @overload + def __init__( + self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ... + ) -> None: ... + @overload + def __init__( + self, + numRows: int, + numCols: int, + values: Iterable[float], + isTransposed: bool = ..., + ) -> None: ... + def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ... + def toArray(self) -> ndarray: ... + def toSparse(self) -> SparseMatrix: ... + def __getitem__(self, indices: Tuple[int, int]) -> float64: ... + def __eq__(self, other) -> bool: ... + +class SparseMatrix(Matrix): + colPtrs: ndarray + rowIndices: ndarray + values: ndarray + @overload + def __init__( + self, + numRows: int, + numCols: int, + colPtrs: bytes, + rowIndices: bytes, + values: bytes, + isTransposed: bool = ..., + ) -> None: ... + @overload + def __init__( + self, + numRows: int, + numCols: int, + colPtrs: Iterable[int], + rowIndices: Iterable[int], + values: Iterable[float], + isTransposed: bool = ..., + ) -> None: ... + def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ... + def __getitem__(self, indices: Tuple[int, int]) -> float64: ... + def toArray(self) -> ndarray: ... + def toDense(self) -> DenseMatrix: ... + def __eq__(self, other) -> bool: ... + +class Matrices: + @overload + @staticmethod + def dense( + numRows: int, numCols: int, values: bytes, isTransposed: bool = ... + ) -> DenseMatrix: ... + @overload + @staticmethod + def dense( + numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ... + ) -> DenseMatrix: ... + @overload + @staticmethod + def sparse( + numRows: int, + numCols: int, + colPtrs: bytes, + rowIndices: bytes, + values: bytes, + isTransposed: bool = ..., + ) -> SparseMatrix: ... + @overload + @staticmethod + def sparse( + numRows: int, + numCols: int, + colPtrs: Iterable[int], + rowIndices: Iterable[int], + values: Iterable[float], + isTransposed: bool = ..., + ) -> SparseMatrix: ... diff --git a/python/pyspark/ml/param/__init__.pyi b/python/pyspark/ml/param/__init__.pyi new file mode 100644 index 0000000000000..23a63c573e452 --- /dev/null +++ b/python/pyspark/ml/param/__init__.pyi @@ -0,0 +1,96 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import abc +from typing import overload +from typing import Any, Callable, Generic, List, Optional +from pyspark.ml._typing import T +import pyspark.ml._typing + +import pyspark.ml.util +from pyspark.ml.linalg import DenseVector, Matrix + +class Param(Generic[T]): + parent: str + name: str + doc: str + typeConverter: Callable[[Any], T] + def __init__( + self, + parent: pyspark.ml.util.Identifiable, + name: str, + doc: str, + typeConverter: Optional[Callable[[Any], T]] = ..., + ) -> None: ... + def __hash__(self) -> int: ... + def __eq__(self, other: Any) -> bool: ... + +class TypeConverters: + @staticmethod + def identity(value: T) -> T: ... + @staticmethod + def toList(value: Any) -> List: ... + @staticmethod + def toListFloat(value: Any) -> List[float]: ... + @staticmethod + def toListInt(value: Any) -> List[int]: ... + @staticmethod + def toListString(value: Any) -> List[str]: ... + @staticmethod + def toVector(value: Any) -> DenseVector: ... + @staticmethod + def toMatrix(value: Any) -> Matrix: ... + @staticmethod + def toFloat(value: Any) -> float: ... + @staticmethod + def toInt(value: Any) -> int: ... + @staticmethod + def toString(value: Any) -> str: ... + @staticmethod + def toBoolean(value: Any) -> bool: ... + +class Params(pyspark.ml.util.Identifiable, metaclass=abc.ABCMeta): + def __init__(self) -> None: ... + @property + def params(self) -> List[Param]: ... + def explainParam(self, param: str) -> str: ... + def explainParams(self) -> str: ... + def getParam(self, paramName: str) -> Param: ... + @overload + def isSet(self, param: str) -> bool: ... + @overload + def isSet(self, param: Param[Any]) -> bool: ... + @overload + def hasDefault(self, param: str) -> bool: ... + @overload + def hasDefault(self, param: Param[Any]) -> bool: ... + @overload + def isDefined(self, param: str) -> bool: ... + @overload + def isDefined(self, param: Param[Any]) -> bool: ... + def hasParam(self, paramName: str) -> bool: ... + @overload + def getOrDefault(self, param: str) -> Any: ... + @overload + def getOrDefault(self, param: Param[T]) -> T: ... + def extractParamMap( + self, extra: Optional[pyspark.ml._typing.ParamMap] = ... + ) -> pyspark.ml._typing.ParamMap: ... + def copy(self, extra: Optional[pyspark.ml._typing.ParamMap] = ...) -> Params: ... + def set(self, param: Param, value: Any) -> None: ... + def clear(self, param: Param) -> None: ... diff --git a/python/pyspark/ml/param/_shared_params_code_gen.pyi b/python/pyspark/ml/param/_shared_params_code_gen.pyi new file mode 100644 index 0000000000000..e436a54c0eaa4 --- /dev/null +++ b/python/pyspark/ml/param/_shared_params_code_gen.pyi @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +header: str diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi new file mode 100644 index 0000000000000..5999c0eaa4661 --- /dev/null +++ b/python/pyspark/ml/param/shared.pyi @@ -0,0 +1,187 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Generic, List +from pyspark.ml._typing import T + +from pyspark.ml.param import * + +class HasMaxIter(Params): + maxIter: Param[int] + def __init__(self) -> None: ... + def getMaxIter(self) -> int: ... + +class HasRegParam(Params): + regParam: Param[float] + def __init__(self) -> None: ... + def getRegParam(self) -> float: ... + +class HasFeaturesCol(Params): + featuresCol: Param[str] + def __init__(self) -> None: ... + def getFeaturesCol(self) -> str: ... + +class HasLabelCol(Params): + labelCol: Param[str] + def __init__(self) -> None: ... + def getLabelCol(self) -> str: ... + +class HasPredictionCol(Params): + predictionCol: Param[str] + def __init__(self) -> None: ... + def getPredictionCol(self) -> str: ... + +class HasProbabilityCol(Params): + probabilityCol: Param[str] + def __init__(self) -> None: ... + def getProbabilityCol(self) -> str: ... + +class HasRawPredictionCol(Params): + rawPredictionCol: Param[str] + def __init__(self) -> None: ... + def getRawPredictionCol(self) -> str: ... + +class HasInputCol(Params): + inputCol: Param[str] + def __init__(self) -> None: ... + def getInputCol(self) -> str: ... + +class HasInputCols(Params): + inputCols: Param[List[str]] + def __init__(self) -> None: ... + def getInputCols(self) -> List[str]: ... + +class HasOutputCol(Params): + outputCol: Param[str] + def __init__(self) -> None: ... + def getOutputCol(self) -> str: ... + +class HasOutputCols(Params): + outputCols: Param[List[str]] + def __init__(self) -> None: ... + def getOutputCols(self) -> List[str]: ... + +class HasNumFeatures(Params): + numFeatures: Param[int] + def __init__(self) -> None: ... + def getNumFeatures(self) -> int: ... + +class HasCheckpointInterval(Params): + checkpointInterval: Param[int] + def __init__(self) -> None: ... + def getCheckpointInterval(self) -> int: ... + +class HasSeed(Params): + seed: Param[int] + def __init__(self) -> None: ... + def getSeed(self) -> int: ... + +class HasTol(Params): + tol: Param[float] + def __init__(self) -> None: ... + def getTol(self) -> float: ... + +class HasRelativeError(Params): + relativeError: Param[float] + def __init__(self) -> None: ... + def getRelativeError(self) -> float: ... + +class HasStepSize(Params): + stepSize: Param[float] + def __init__(self) -> None: ... + def getStepSize(self) -> float: ... + +class HasHandleInvalid(Params): + handleInvalid: Param[str] + def __init__(self) -> None: ... + def getHandleInvalid(self) -> str: ... + +class HasElasticNetParam(Params): + elasticNetParam: Param[float] + def __init__(self) -> None: ... + def getElasticNetParam(self) -> float: ... + +class HasFitIntercept(Params): + fitIntercept: Param[bool] + def __init__(self) -> None: ... + def getFitIntercept(self) -> bool: ... + +class HasStandardization(Params): + standardization: Param[bool] + def __init__(self) -> None: ... + def getStandardization(self) -> bool: ... + +class HasThresholds(Params): + thresholds: Param[List[float]] + def __init__(self) -> None: ... + def getThresholds(self) -> List[float]: ... + +class HasThreshold(Params): + threshold: Param[float] + def __init__(self) -> None: ... + def getThreshold(self) -> float: ... + +class HasWeightCol(Params): + weightCol: Param[str] + def __init__(self) -> None: ... + def getWeightCol(self) -> str: ... + +class HasSolver(Params): + solver: Param[str] + def __init__(self) -> None: ... + def getSolver(self) -> str: ... + +class HasVarianceCol(Params): + varianceCol: Param[str] + def __init__(self) -> None: ... + def getVarianceCol(self) -> str: ... + +class HasAggregationDepth(Params): + aggregationDepth: Param[int] + def __init__(self) -> None: ... + def getAggregationDepth(self) -> int: ... + +class HasParallelism(Params): + parallelism: Param[int] + def __init__(self) -> None: ... + def getParallelism(self) -> int: ... + +class HasCollectSubModels(Params): + collectSubModels: Param[bool] + def __init__(self) -> None: ... + def getCollectSubModels(self) -> bool: ... + +class HasLoss(Params): + loss: Param[str] + def __init__(self) -> None: ... + def getLoss(self) -> str: ... + +class HasValidationIndicatorCol(Params): + validationIndicatorCol: Param[str] + def __init__(self) -> None: ... + def getValidationIndicatorCol(self) -> str: ... + +class HasDistanceMeasure(Params): + distanceMeasure: Param[str] + def __init__(self) -> None: ... + def getDistanceMeasure(self) -> str: ... + +class HasBlockSize(Params): + blockSize: Param[int] + def __init__(self) -> None: ... + def getBlockSize(self) -> int: ... diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi new file mode 100644 index 0000000000000..44680586d70d1 --- /dev/null +++ b/python/pyspark/ml/pipeline.pyi @@ -0,0 +1,97 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, List, Optional, Tuple, Type, Union + +from pyspark.ml._typing import PipelineStage +from pyspark.context import SparkContext +from pyspark.ml.base import Estimator, Model, Transformer +from pyspark.ml.param import Param +from pyspark.ml.util import ( # noqa: F401 + DefaultParamsReader as DefaultParamsReader, + DefaultParamsWriter as DefaultParamsWriter, + JavaMLReader as JavaMLReader, + JavaMLWritable as JavaMLWritable, + JavaMLWriter as JavaMLWriter, + MLReadable as MLReadable, + MLReader as MLReader, + MLWritable as MLWritable, + MLWriter as MLWriter, +) + +class Pipeline(Estimator[PipelineModel], MLReadable[Pipeline], MLWritable): + stages: List[PipelineStage] + def __init__(self, *, stages: Optional[List[PipelineStage]] = ...) -> None: ... + def setStages(self, stages: List[PipelineStage]) -> Pipeline: ... + def getStages(self) -> List[PipelineStage]: ... + def setParams(self, *, stages: Optional[List[PipelineStage]] = ...) -> Pipeline: ... + def copy(self, extra: Optional[Dict[Param, str]] = ...) -> Pipeline: ... + def write(self) -> JavaMLWriter: ... + def save(self, path: str) -> None: ... + @classmethod + def read(cls) -> PipelineReader: ... + +class PipelineWriter(MLWriter): + instance: Pipeline + def __init__(self, instance: Pipeline) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class PipelineReader(MLReader): + cls: Type[Pipeline] + def __init__(self, cls: Type[Pipeline]) -> None: ... + def load(self, path: str) -> Pipeline: ... + +class PipelineModelWriter(MLWriter): + instance: PipelineModel + def __init__(self, instance: PipelineModel) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class PipelineModelReader(MLReader): + cls: Type[PipelineModel] + def __init__(self, cls: Type[PipelineModel]) -> None: ... + def load(self, path: str) -> PipelineModel: ... + +class PipelineModel(Model, MLReadable[PipelineModel], MLWritable): + stages: List[PipelineStage] + def __init__(self, stages: List[Transformer]) -> None: ... + def copy(self, extra: Optional[Dict[Param, Any]] = ...) -> PipelineModel: ... + def write(self) -> JavaMLWriter: ... + def save(self, path: str) -> None: ... + @classmethod + def read(cls) -> PipelineModelReader: ... + +class PipelineSharedReadWrite: + @staticmethod + def checkStagesForJava(stages: List[PipelineStage]) -> bool: ... + @staticmethod + def validateStages(stages: List[PipelineStage]) -> None: ... + @staticmethod + def saveImpl( + instance: Union[Pipeline, PipelineModel], + stages: List[PipelineStage], + sc: SparkContext, + path: str, + ) -> None: ... + @staticmethod + def load( + metadata: Dict[str, Any], sc: SparkContext, path: str + ) -> Tuple[str, List[PipelineStage]]: ... + @staticmethod + def getStagePath( + stageUid: str, stageIdx: int, numStages: int, stagesDir: str + ) -> str: ... diff --git a/python/pyspark/ml/recommendation.pyi b/python/pyspark/ml/recommendation.pyi new file mode 100644 index 0000000000000..390486b45c5e6 --- /dev/null +++ b/python/pyspark/ml/recommendation.pyi @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Optional + +import sys # noqa: F401 + +from pyspark import since, keyword_only # noqa: F401 +from pyspark.ml.param.shared import ( + HasBlockSize, + HasCheckpointInterval, + HasMaxIter, + HasPredictionCol, + HasRegParam, + HasSeed, +) +from pyspark.ml.wrapper import JavaEstimator, JavaModel +from pyspark.ml.common import inherit_doc # noqa: F401 +from pyspark.ml.param import Param +from pyspark.ml.util import JavaMLWritable, JavaMLReadable + +from pyspark.sql.dataframe import DataFrame + +class _ALSModelParams(HasPredictionCol, HasBlockSize): + userCol: Param[str] + itemCol: Param[str] + coldStartStrategy: Param[str] + def getUserCol(self) -> str: ... + def getItemCol(self) -> str: ... + def getColdStartStrategy(self) -> str: ... + +class _ALSParams( + _ALSModelParams, HasMaxIter, HasRegParam, HasCheckpointInterval, HasSeed +): + rank: Param[int] + numUserBlocks: Param[int] + numItemBlocks: Param[int] + implicitPrefs: Param[bool] + alpha: Param[float] + ratingCol: Param[str] + nonnegative: Param[bool] + intermediateStorageLevel: Param[str] + finalStorageLevel: Param[str] + def __init__(self, *args: Any): ... + def getRank(self) -> int: ... + def getNumUserBlocks(self) -> int: ... + def getNumItemBlocks(self) -> int: ... + def getImplicitPrefs(self) -> bool: ... + def getAlpha(self) -> float: ... + def getRatingCol(self) -> str: ... + def getNonnegative(self) -> bool: ... + def getIntermediateStorageLevel(self) -> str: ... + def getFinalStorageLevel(self) -> str: ... + +class ALS(JavaEstimator[ALSModel], _ALSParams, JavaMLWritable, JavaMLReadable[ALS]): + def __init__( + self, + *, + rank: int = ..., + maxIter: int = ..., + regParam: float = ..., + numUserBlocks: int = ..., + numItemBlocks: int = ..., + implicitPrefs: bool = ..., + alpha: float = ..., + userCol: str = ..., + itemCol: str = ..., + seed: Optional[int] = ..., + ratingCol: str = ..., + nonnegative: bool = ..., + checkpointInterval: int = ..., + intermediateStorageLevel: str = ..., + finalStorageLevel: str = ..., + coldStartStrategy: str = ..., + blockSize: int = ... + ) -> None: ... + def setParams( + self, + *, + rank: int = ..., + maxIter: int = ..., + regParam: float = ..., + numUserBlocks: int = ..., + numItemBlocks: int = ..., + implicitPrefs: bool = ..., + alpha: float = ..., + userCol: str = ..., + itemCol: str = ..., + seed: Optional[int] = ..., + ratingCol: str = ..., + nonnegative: bool = ..., + checkpointInterval: int = ..., + intermediateStorageLevel: str = ..., + finalStorageLevel: str = ..., + coldStartStrategy: str = ..., + blockSize: int = ... + ) -> ALS: ... + def setRank(self, value: int) -> ALS: ... + def setNumUserBlocks(self, value: int) -> ALS: ... + def setNumItemBlocks(self, value: int) -> ALS: ... + def setNumBlocks(self, value: int) -> ALS: ... + def setImplicitPrefs(self, value: bool) -> ALS: ... + def setAlpha(self, value: float) -> ALS: ... + def setUserCol(self, value: str) -> ALS: ... + def setItemCol(self, value: str) -> ALS: ... + def setRatingCol(self, value: str) -> ALS: ... + def setNonnegative(self, value: bool) -> ALS: ... + def setIntermediateStorageLevel(self, value: str) -> ALS: ... + def setFinalStorageLevel(self, value: str) -> ALS: ... + def setColdStartStrategy(self, value: str) -> ALS: ... + def setMaxIter(self, value: int) -> ALS: ... + def setRegParam(self, value: float) -> ALS: ... + def setPredictionCol(self, value: str) -> ALS: ... + def setCheckpointInterval(self, value: int) -> ALS: ... + def setSeed(self, value: int) -> ALS: ... + def setBlockSize(self, value: int) -> ALS: ... + +class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable[ALSModel]): + def setUserCol(self, value: str) -> ALSModel: ... + def setItemCol(self, value: str) -> ALSModel: ... + def setColdStartStrategy(self, value: str) -> ALSModel: ... + def setPredictionCol(self, value: str) -> ALSModel: ... + def setBlockSize(self, value: int) -> ALSModel: ... + @property + def rank(self) -> int: ... + @property + def userFactors(self) -> DataFrame: ... + @property + def itemFactors(self) -> DataFrame: ... + def recommendForAllUsers(self, numItems: int) -> DataFrame: ... + def recommendForAllItems(self, numUsers: int) -> DataFrame: ... + def recommendForUserSubset( + self, dataset: DataFrame, numItems: int + ) -> DataFrame: ... + def recommendForItemSubset( + self, dataset: DataFrame, numUsers: int + ) -> DataFrame: ... diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi new file mode 100644 index 0000000000000..991eb4f12ac85 --- /dev/null +++ b/python/pyspark/ml/regression.pyi @@ -0,0 +1,825 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List, Optional +from pyspark.ml._typing import JM, M, T + +import abc +from pyspark.ml import PredictionModel, Predictor +from pyspark.ml.base import _PredictorParams +from pyspark.ml.param.shared import ( + HasAggregationDepth, + HasBlockSize, + HasElasticNetParam, + HasFeaturesCol, + HasFitIntercept, + HasLabelCol, + HasLoss, + HasMaxIter, + HasPredictionCol, + HasRegParam, + HasSeed, + HasSolver, + HasStandardization, + HasStepSize, + HasTol, + HasVarianceCol, + HasWeightCol, +) +from pyspark.ml.tree import ( + _DecisionTreeModel, + _DecisionTreeParams, + _GBTParams, + _RandomForestParams, + _TreeEnsembleModel, + _TreeRegressorParams, +) +from pyspark.ml.util import ( + GeneralJavaMLWritable, + HasTrainingSummary, + JavaMLReadable, + JavaMLWritable, +) +from pyspark.ml.wrapper import ( + JavaEstimator, + JavaModel, + JavaPredictionModel, + JavaPredictor, + JavaWrapper, +) + +from pyspark.ml.linalg import Matrix, Vector +from pyspark.ml.param import Param +from pyspark.sql.dataframe import DataFrame + +class Regressor(Predictor[M], _PredictorParams, metaclass=abc.ABCMeta): ... +class RegressionModel(PredictionModel[T], _PredictorParams, metaclass=abc.ABCMeta): ... +class _JavaRegressor(Regressor, JavaPredictor[JM], metaclass=abc.ABCMeta): ... +class _JavaRegressionModel( + RegressionModel, JavaPredictionModel[T], metaclass=abc.ABCMeta +): ... + +class _LinearRegressionParams( + _PredictorParams, + HasRegParam, + HasElasticNetParam, + HasMaxIter, + HasTol, + HasFitIntercept, + HasStandardization, + HasWeightCol, + HasSolver, + HasAggregationDepth, + HasLoss, + HasBlockSize, +): + solver: Param[str] + loss: Param[str] + epsilon: Param[float] + def __init__(self, *args: Any): ... + def getEpsilon(self) -> float: ... + +class LinearRegression( + _JavaRegressor[LinearRegressionModel], + _LinearRegressionParams, + JavaMLWritable, + JavaMLReadable[LinearRegression], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + standardization: bool = ..., + solver: str = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + epsilon: float = ..., + blockSize: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + standardization: bool = ..., + solver: str = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + epsilon: float = ..., + blockSize: int = ... + ) -> LinearRegression: ... + def setEpsilon(self, value: float) -> LinearRegression: ... + def setMaxIter(self, value: int) -> LinearRegression: ... + def setRegParam(self, value: float) -> LinearRegression: ... + def setTol(self, value: float) -> LinearRegression: ... + def setElasticNetParam(self, value: float) -> LinearRegression: ... + def setFitIntercept(self, value: bool) -> LinearRegression: ... + def setStandardization(self, value: bool) -> LinearRegression: ... + def setWeightCol(self, value: str) -> LinearRegression: ... + def setSolver(self, value: str) -> LinearRegression: ... + def setAggregationDepth(self, value: int) -> LinearRegression: ... + def setLoss(self, value: str) -> LinearRegression: ... + def setBlockSize(self, value: int) -> LinearRegression: ... + +class LinearRegressionModel( + _JavaRegressionModel[Vector], + _LinearRegressionParams, + GeneralJavaMLWritable, + JavaMLReadable[LinearRegressionModel], + HasTrainingSummary[LinearRegressionSummary], +): + @property + def coefficients(self) -> Vector: ... + @property + def intercept(self) -> float: ... + @property + def summary(self) -> LinearRegressionTrainingSummary: ... + def evaluate(self, dataset: DataFrame) -> LinearRegressionSummary: ... + +class LinearRegressionSummary(JavaWrapper): + @property + def predictions(self) -> DataFrame: ... + @property + def predictionCol(self) -> str: ... + @property + def labelCol(self) -> str: ... + @property + def featuresCol(self) -> str: ... + @property + def explainedVariance(self) -> float: ... + @property + def meanAbsoluteError(self) -> float: ... + @property + def meanSquaredError(self) -> float: ... + @property + def rootMeanSquaredError(self) -> float: ... + @property + def r2(self) -> float: ... + @property + def r2adj(self) -> float: ... + @property + def residuals(self) -> DataFrame: ... + @property + def numInstances(self) -> int: ... + @property + def devianceResiduals(self) -> List[float]: ... + @property + def coefficientStandardErrors(self) -> List[float]: ... + @property + def tValues(self) -> List[float]: ... + @property + def pValues(self) -> List[float]: ... + +class LinearRegressionTrainingSummary(LinearRegressionSummary): + @property + def objectiveHistory(self) -> List[float]: ... + @property + def totalIterations(self) -> int: ... + +class _IsotonicRegressionParams( + HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol +): + isotonic: Param[bool] + featureIndex: Param[int] + def getIsotonic(self) -> bool: ... + def getFeatureIndex(self) -> int: ... + +class IsotonicRegression( + JavaEstimator[IsotonicRegressionModel], + _IsotonicRegressionParams, + HasWeightCol, + JavaMLWritable, + JavaMLReadable[IsotonicRegression], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + weightCol: Optional[str] = ..., + isotonic: bool = ..., + featureIndex: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + weightCol: Optional[str] = ..., + isotonic: bool = ..., + featureIndex: int = ... + ) -> IsotonicRegression: ... + def setIsotonic(self, value: bool) -> IsotonicRegression: ... + def setFeatureIndex(self, value: int) -> IsotonicRegression: ... + def setFeaturesCol(self, value: str) -> IsotonicRegression: ... + def setPredictionCol(self, value: str) -> IsotonicRegression: ... + def setLabelCol(self, value: str) -> IsotonicRegression: ... + def setWeightCol(self, value: str) -> IsotonicRegression: ... + +class IsotonicRegressionModel( + JavaModel, + _IsotonicRegressionParams, + JavaMLWritable, + JavaMLReadable[IsotonicRegressionModel], +): + def setFeaturesCol(self, value: str) -> IsotonicRegressionModel: ... + def setPredictionCol(self, value: str) -> IsotonicRegressionModel: ... + def setFeatureIndex(self, value: int) -> IsotonicRegressionModel: ... + @property + def boundaries(self) -> Vector: ... + @property + def predictions(self) -> Vector: ... + @property + def numFeatures(self) -> int: ... + def predict(self, value: float) -> float: ... + +class _DecisionTreeRegressorParams( + _DecisionTreeParams, _TreeRegressorParams, HasVarianceCol +): + def __init__(self, *args: Any): ... + +class DecisionTreeRegressor( + _JavaRegressor[DecisionTreeRegressionModel], + _DecisionTreeRegressorParams, + JavaMLWritable, + JavaMLReadable[DecisionTreeRegressor], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + seed: Optional[int] = ..., + varianceCol: Optional[str] = ..., + weightCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + seed: Optional[int] = ..., + varianceCol: Optional[str] = ..., + weightCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ... + ) -> DecisionTreeRegressor: ... + def setMaxDepth(self, value: int) -> DecisionTreeRegressor: ... + def setMaxBins(self, value: int) -> DecisionTreeRegressor: ... + def setMinInstancesPerNode(self, value: int) -> DecisionTreeRegressor: ... + def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeRegressor: ... + def setMinInfoGain(self, value: float) -> DecisionTreeRegressor: ... + def setMaxMemoryInMB(self, value: int) -> DecisionTreeRegressor: ... + def setCacheNodeIds(self, value: bool) -> DecisionTreeRegressor: ... + def setImpurity(self, value: str) -> DecisionTreeRegressor: ... + def setCheckpointInterval(self, value: int) -> DecisionTreeRegressor: ... + def setSeed(self, value: int) -> DecisionTreeRegressor: ... + def setWeightCol(self, value: str) -> DecisionTreeRegressor: ... + def setVarianceCol(self, value: str) -> DecisionTreeRegressor: ... + +class DecisionTreeRegressionModel( + _JavaRegressionModel[Vector], + _DecisionTreeModel, + _DecisionTreeRegressorParams, + JavaMLWritable, + JavaMLReadable[DecisionTreeRegressionModel], +): + def setVarianceCol(self, value: str) -> DecisionTreeRegressionModel: ... + @property + def featureImportances(self) -> Vector: ... + +class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): + def __init__(self, *args: Any): ... + +class RandomForestRegressor( + _JavaRegressor[RandomForestRegressionModel], + _RandomForestRegressorParams, + JavaMLWritable, + JavaMLReadable[RandomForestRegressor], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + subsamplingRate: float = ..., + seed: Optional[int] = ..., + numTrees: int = ..., + featureSubsetStrategy: str = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ..., + bootstrap: Optional[bool] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + checkpointInterval: int = ..., + impurity: str = ..., + subsamplingRate: float = ..., + seed: Optional[int] = ..., + numTrees: int = ..., + featureSubsetStrategy: str = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ..., + bootstrap: Optional[bool] = ... + ) -> RandomForestRegressor: ... + def setMaxDepth(self, value: int) -> RandomForestRegressor: ... + def setMaxBins(self, value: int) -> RandomForestRegressor: ... + def setMinInstancesPerNode(self, value: int) -> RandomForestRegressor: ... + def setMinInfoGain(self, value: float) -> RandomForestRegressor: ... + def setMaxMemoryInMB(self, value: int) -> RandomForestRegressor: ... + def setCacheNodeIds(self, value: bool) -> RandomForestRegressor: ... + def setImpurity(self, value: str) -> RandomForestRegressor: ... + def setNumTrees(self, value: int) -> RandomForestRegressor: ... + def setBootstrap(self, value: bool) -> RandomForestRegressor: ... + def setSubsamplingRate(self, value: float) -> RandomForestRegressor: ... + def setFeatureSubsetStrategy(self, value: str) -> RandomForestRegressor: ... + def setCheckpointInterval(self, value: int) -> RandomForestRegressor: ... + def setSeed(self, value: int) -> RandomForestRegressor: ... + def setWeightCol(self, value: str) -> RandomForestRegressor: ... + def setMinWeightFractionPerNode(self, value: float) -> RandomForestRegressor: ... + +class RandomForestRegressionModel( + _JavaRegressionModel[Vector], + _TreeEnsembleModel, + _RandomForestRegressorParams, + JavaMLWritable, + JavaMLReadable, +): + @property + def trees(self) -> List[DecisionTreeRegressionModel]: ... + @property + def featureImportances(self) -> Vector: ... + +class _GBTRegressorParams(_GBTParams, _TreeRegressorParams): + supportedLossTypes: List[str] + lossType: Param[str] + def __init__(self, *args: Any): ... + def getLossType(self) -> str: ... + +class GBTRegressor( + _JavaRegressor[GBTRegressionModel], + _GBTRegressorParams, + JavaMLWritable, + JavaMLReadable[GBTRegressor], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + subsamplingRate: float = ..., + checkpointInterval: int = ..., + lossType: str = ..., + maxIter: int = ..., + stepSize: float = ..., + seed: Optional[int] = ..., + impurity: str = ..., + featureSubsetStrategy: str = ..., + validationTol: float = ..., + validationIndicatorCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + maxMemoryInMB: int = ..., + cacheNodeIds: bool = ..., + subsamplingRate: float = ..., + checkpointInterval: int = ..., + lossType: str = ..., + maxIter: int = ..., + stepSize: float = ..., + seed: Optional[int] = ..., + impuriy: str = ..., + featureSubsetStrategy: str = ..., + validationTol: float = ..., + validationIndicatorCol: Optional[str] = ..., + leafCol: str = ..., + minWeightFractionPerNode: float = ..., + weightCol: Optional[str] = ... + ) -> GBTRegressor: ... + def setMaxDepth(self, value: int) -> GBTRegressor: ... + def setMaxBins(self, value: int) -> GBTRegressor: ... + def setMinInstancesPerNode(self, value: int) -> GBTRegressor: ... + def setMinInfoGain(self, value: float) -> GBTRegressor: ... + def setMaxMemoryInMB(self, value: int) -> GBTRegressor: ... + def setCacheNodeIds(self, value: bool) -> GBTRegressor: ... + def setImpurity(self, value: str) -> GBTRegressor: ... + def setLossType(self, value: str) -> GBTRegressor: ... + def setSubsamplingRate(self, value: float) -> GBTRegressor: ... + def setFeatureSubsetStrategy(self, value: str) -> GBTRegressor: ... + def setValidationIndicatorCol(self, value: str) -> GBTRegressor: ... + def setMaxIter(self, value: int) -> GBTRegressor: ... + def setCheckpointInterval(self, value: int) -> GBTRegressor: ... + def setSeed(self, value: int) -> GBTRegressor: ... + def setStepSize(self, value: float) -> GBTRegressor: ... + def setWeightCol(self, value: str) -> GBTRegressor: ... + def setMinWeightFractionPerNode(self, value: float) -> GBTRegressor: ... + +class GBTRegressionModel( + _JavaRegressionModel[Vector], + _TreeEnsembleModel, + _GBTRegressorParams, + JavaMLWritable, + JavaMLReadable[GBTRegressionModel], +): + @property + def featureImportances(self) -> Vector: ... + @property + def trees(self) -> List[DecisionTreeRegressionModel]: ... + def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]: ... + +class _AFTSurvivalRegressionParams( + _PredictorParams, + HasMaxIter, + HasTol, + HasFitIntercept, + HasAggregationDepth, + HasBlockSize, +): + censorCol: Param[str] + quantileProbabilities: Param[List[float]] + quantilesCol: Param[str] + def __init__(self, *args: Any): ... + def getCensorCol(self) -> str: ... + def getQuantileProbabilities(self) -> List[float]: ... + def getQuantilesCol(self) -> str: ... + +class AFTSurvivalRegression( + _JavaRegressor[AFTSurvivalRegressionModel], + _AFTSurvivalRegressionParams, + JavaMLWritable, + JavaMLReadable[AFTSurvivalRegression], +): + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + fitIntercept: bool = ..., + maxIter: int = ..., + tol: float = ..., + censorCol: str = ..., + quantileProbabilities: List[float] = ..., + quantilesCol: Optional[str] = ..., + aggregationDepth: int = ..., + blockSize: int = ... + ) -> None: ... + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + fitIntercept: bool = ..., + maxIter: int = ..., + tol: float = ..., + censorCol: str = ..., + quantileProbabilities: List[float] = ..., + quantilesCol: Optional[str] = ..., + aggregationDepth: int = ..., + blockSize: int = ... + ) -> AFTSurvivalRegression: ... + def setCensorCol(self, value: str) -> AFTSurvivalRegression: ... + def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ... + def setQuantilesCol(self, value: str) -> AFTSurvivalRegression: ... + def setMaxIter(self, value: int) -> AFTSurvivalRegression: ... + def setTol(self, value: float) -> AFTSurvivalRegression: ... + def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ... + def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ... + def setBlockSize(self, value: int) -> AFTSurvivalRegression: ... + +class AFTSurvivalRegressionModel( + _JavaRegressionModel[Vector], + _AFTSurvivalRegressionParams, + JavaMLWritable, + JavaMLReadable[AFTSurvivalRegressionModel], +): + def setQuantileProbabilities( + self, value: List[float] + ) -> AFTSurvivalRegressionModel: ... + def setQuantilesCol(self, value: str) -> AFTSurvivalRegressionModel: ... + @property + def coefficients(self) -> Vector: ... + @property + def intercept(self) -> float: ... + @property + def scale(self) -> float: ... + def predictQuantiles(self, features: Vector) -> Vector: ... + def predict(self, features: Vector) -> float: ... + +class _GeneralizedLinearRegressionParams( + _PredictorParams, + HasFitIntercept, + HasMaxIter, + HasTol, + HasRegParam, + HasWeightCol, + HasSolver, + HasAggregationDepth, +): + family: Param[str] + link: Param[str] + linkPredictionCol: Param[str] + variancePower: Param[float] + linkPower: Param[float] + solver: Param[str] + offsetCol: Param[str] + def __init__(self, *args: Any): ... + def getFamily(self) -> str: ... + def getLinkPredictionCol(self) -> str: ... + def getLink(self) -> str: ... + def getVariancePower(self) -> float: ... + def getLinkPower(self) -> float: ... + def getOffsetCol(self) -> str: ... + +class GeneralizedLinearRegression( + _JavaRegressor[GeneralizedLinearRegressionModel], + _GeneralizedLinearRegressionParams, + JavaMLWritable, + JavaMLReadable[GeneralizedLinearRegression], +): + def __init__( + self, + *, + labelCol: str = ..., + featuresCol: str = ..., + predictionCol: str = ..., + family: str = ..., + link: Optional[str] = ..., + fitIntercept: bool = ..., + maxIter: int = ..., + tol: float = ..., + regParam: float = ..., + weightCol: Optional[str] = ..., + solver: str = ..., + linkPredictionCol: Optional[str] = ..., + variancePower: float = ..., + linkPower: Optional[float] = ..., + offsetCol: Optional[str] = ..., + aggregationDepth: int = ... + ) -> None: ... + def setParams( + self, + *, + labelCol: str = ..., + featuresCol: str = ..., + predictionCol: str = ..., + family: str = ..., + link: Optional[str] = ..., + fitIntercept: bool = ..., + maxIter: int = ..., + tol: float = ..., + regParam: float = ..., + weightCol: Optional[str] = ..., + solver: str = ..., + linkPredictionCol: Optional[str] = ..., + variancePower: float = ..., + linkPower: Optional[float] = ..., + offsetCol: Optional[str] = ..., + aggregationDepth: int = ... + ) -> GeneralizedLinearRegression: ... + def setFamily(self, value: str) -> GeneralizedLinearRegression: ... + def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegression: ... + def setLink(self, value: str) -> GeneralizedLinearRegression: ... + def setVariancePower(self, value: float) -> GeneralizedLinearRegression: ... + def setLinkPower(self, value: float) -> GeneralizedLinearRegression: ... + def setOffsetCol(self, value: str) -> GeneralizedLinearRegression: ... + def setMaxIter(self, value: int) -> GeneralizedLinearRegression: ... + def setRegParam(self, value: float) -> GeneralizedLinearRegression: ... + def setTol(self, value: float) -> GeneralizedLinearRegression: ... + def setFitIntercept(self, value: bool) -> GeneralizedLinearRegression: ... + def setWeightCol(self, value: str) -> GeneralizedLinearRegression: ... + def setSolver(self, value: str) -> GeneralizedLinearRegression: ... + def setAggregationDepth(self, value: int) -> GeneralizedLinearRegression: ... + +class GeneralizedLinearRegressionModel( + _JavaRegressionModel[Vector], + _GeneralizedLinearRegressionParams, + JavaMLWritable, + JavaMLReadable[GeneralizedLinearRegressionModel], + HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary], +): + def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegressionModel: ... + @property + def coefficients(self) -> Vector: ... + @property + def intercept(self) -> float: ... + @property + def summary(self) -> GeneralizedLinearRegressionTrainingSummary: ... + def evaluate(self, dataset: DataFrame) -> GeneralizedLinearRegressionSummary: ... + +class GeneralizedLinearRegressionSummary(JavaWrapper): + @property + def predictions(self) -> DataFrame: ... + @property + def predictionCol(self) -> str: ... + @property + def rank(self) -> int: ... + @property + def degreesOfFreedom(self) -> int: ... + @property + def residualDegreeOfFreedom(self) -> int: ... + @property + def residualDegreeOfFreedomNull(self) -> int: ... + def residuals(self, residualsType: str = ...) -> DataFrame: ... + @property + def nullDeviance(self) -> float: ... + @property + def deviance(self) -> float: ... + @property + def dispersion(self) -> float: ... + @property + def aic(self) -> float: ... + +class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary): + @property + def numIterations(self) -> int: ... + @property + def solver(self) -> str: ... + @property + def coefficientStandardErrors(self) -> List[float]: ... + @property + def tValues(self) -> List[float]: ... + @property + def pValues(self) -> List[float]: ... + +class _FactorizationMachinesParams( + _PredictorParams, + HasMaxIter, + HasStepSize, + HasTol, + HasSolver, + HasSeed, + HasFitIntercept, + HasRegParam, + HasWeightCol, +): + factorSize: Param[int] + fitLinear: Param[bool] + miniBatchFraction: Param[float] + initStd: Param[float] + solver: Param[str] + def __init__(self, *args: Any): ... + def getFactorSize(self): ... + def getFitLinear(self): ... + def getMiniBatchFraction(self): ... + def getInitStd(self): ... + +class FMRegressor( + _JavaRegressor[FMRegressionModel], + _FactorizationMachinesParams, + JavaMLWritable, + JavaMLReadable[FMRegressor], +): + factorSize: Param[int] + fitLinear: Param[bool] + miniBatchFraction: Param[float] + initStd: Param[float] + solver: Param[str] + def __init__( + self, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + factorSize: int = ..., + fitIntercept: bool = ..., + fitLinear: bool = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initStd: float = ..., + maxIter: int = ..., + stepSize: float = ..., + tol: float = ..., + solver: str = ..., + seed: Optional[int] = ..., + ) -> None: ... + def setParams( + self, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + factorSize: int = ..., + fitIntercept: bool = ..., + fitLinear: bool = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initStd: float = ..., + maxIter: int = ..., + stepSize: float = ..., + tol: float = ..., + solver: str = ..., + seed: Optional[int] = ..., + ) -> FMRegressor: ... + def setFactorSize(self, value: int) -> FMRegressor: ... + def setFitLinear(self, value: bool) -> FMRegressor: ... + def setMiniBatchFraction(self, value: float) -> FMRegressor: ... + def setInitStd(self, value: float) -> FMRegressor: ... + def setMaxIter(self, value: int) -> FMRegressor: ... + def setStepSize(self, value: float) -> FMRegressor: ... + def setTol(self, value: float) -> FMRegressor: ... + def setSolver(self, value: str) -> FMRegressor: ... + def setSeed(self, value: int) -> FMRegressor: ... + def setFitIntercept(self, value: bool) -> FMRegressor: ... + def setRegParam(self, value: float) -> FMRegressor: ... + +class FMRegressionModel( + _JavaRegressionModel, + _FactorizationMachinesParams, + JavaMLWritable, + JavaMLReadable[FMRegressionModel], +): + @property + def intercept(self) -> float: ... + @property + def linear(self) -> Vector: ... + @property + def factors(self) -> Matrix: ... diff --git a/python/pyspark/ml/stat.pyi b/python/pyspark/ml/stat.pyi new file mode 100644 index 0000000000000..83b0f7eacb8f0 --- /dev/null +++ b/python/pyspark/ml/stat.pyi @@ -0,0 +1,89 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Optional + +from pyspark.ml.linalg import Matrix, Vector +from pyspark.ml.wrapper import JavaWrapper +from pyspark.sql.column import Column +from pyspark.sql.dataframe import DataFrame + +from py4j.java_gateway import JavaObject # type: ignore[import] + +class ChiSquareTest: + @staticmethod + def test( + dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ... + ) -> DataFrame: ... + +class Correlation: + @staticmethod + def corr(dataset: DataFrame, column: str, method: str = ...) -> DataFrame: ... + +class KolmogorovSmirnovTest: + @staticmethod + def test( + dataset: DataFrame, sampleCol: str, distName: str, *params: float + ) -> DataFrame: ... + +class Summarizer: + @staticmethod + def mean(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def sum(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def variance(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def std(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def count(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def numNonZeros(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def max(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def min(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def normL1(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def normL2(col: Column, weightCol: Optional[Column] = ...) -> Column: ... + @staticmethod + def metrics(*metrics: str) -> SummaryBuilder: ... + +class SummaryBuilder(JavaWrapper): + def __init__(self, jSummaryBuilder: JavaObject) -> None: ... + def summary( + self, featuresCol: Column, weightCol: Optional[Column] = ... + ) -> Column: ... + +class MultivariateGaussian: + mean: Vector + cov: Matrix + def __init__(self, mean: Vector, cov: Matrix) -> None: ... + +class ANOVATest: + @staticmethod + def test( + dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ... + ) -> DataFrame: ... + +class FValueTest: + @staticmethod + def test( + dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ... + ) -> DataFrame: ... diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 492e849658f7a..03653c25b4ad4 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -333,7 +333,7 @@ def test_linear_regression_with_huber_loss(self): from pyspark.ml.tests.test_algorithms import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_base.py b/python/pyspark/ml/tests/test_base.py index cba5369ca2623..d2c0bdfdf8556 100644 --- a/python/pyspark/ml/tests/test_base.py +++ b/python/pyspark/ml/tests/test_base.py @@ -70,7 +70,7 @@ def testDefaultFitMultiple(self): from pyspark.ml.tests.test_base import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_evaluation.py b/python/pyspark/ml/tests/test_evaluation.py index 7883df7882769..746605076f86b 100644 --- a/python/pyspark/ml/tests/test_evaluation.py +++ b/python/pyspark/ml/tests/test_evaluation.py @@ -56,7 +56,7 @@ def test_clustering_evaluator_with_cosine_distance(self): from pyspark.ml.tests.test_evaluation import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py index 7fd8c0b669d9a..244110a986138 100644 --- a/python/pyspark/ml/tests/test_feature.py +++ b/python/pyspark/ml/tests/test_feature.py @@ -303,7 +303,7 @@ def test_apply_binary_term_freqs(self): from pyspark.ml.tests.test_feature import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py index 069ffceb50103..ceecdae971c99 100644 --- a/python/pyspark/ml/tests/test_image.py +++ b/python/pyspark/ml/tests/test_image.py @@ -69,7 +69,7 @@ def test_read_images(self): from pyspark.ml.tests.test_image import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py index 60dda82fe0911..18c01ddf88e67 100644 --- a/python/pyspark/ml/tests/test_linalg.py +++ b/python/pyspark/ml/tests/test_linalg.py @@ -381,7 +381,7 @@ def test_infer_schema(self): from pyspark.ml.tests.test_linalg import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py index abee6d1be5e29..4cddf50f36bdf 100644 --- a/python/pyspark/ml/tests/test_param.py +++ b/python/pyspark/ml/tests/test_param.py @@ -372,7 +372,7 @@ def test_java_params(self): from pyspark.ml.tests.test_param import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py index 4acf58da21531..826e6cd351d32 100644 --- a/python/pyspark/ml/tests/test_persistence.py +++ b/python/pyspark/ml/tests/test_persistence.py @@ -456,7 +456,7 @@ def test_default_read_write_default_params(self): from pyspark.ml.tests.test_persistence import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_pipeline.py b/python/pyspark/ml/tests/test_pipeline.py index 011e6537a8db5..c29b2d3f44679 100644 --- a/python/pyspark/ml/tests/test_pipeline.py +++ b/python/pyspark/ml/tests/test_pipeline.py @@ -62,7 +62,7 @@ def doTransform(pipeline): from pyspark.ml.tests.test_pipeline import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_stat.py b/python/pyspark/ml/tests/test_stat.py index 666d0aec58db5..a2403b38873db 100644 --- a/python/pyspark/ml/tests/test_stat.py +++ b/python/pyspark/ml/tests/test_stat.py @@ -43,7 +43,7 @@ def test_chisquaretest(self): from pyspark.ml.tests.test_stat import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_training_summary.py b/python/pyspark/ml/tests/test_training_summary.py index cb0effbe2bf2a..7dafdcb3d683b 100644 --- a/python/pyspark/ml/tests/test_training_summary.py +++ b/python/pyspark/ml/tests/test_training_summary.py @@ -445,7 +445,7 @@ def test_kmeans_summary(self): from pyspark.ml.tests.test_training_summary import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index c9163627fdd54..729e46419ae2c 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -864,7 +864,7 @@ def test_copy(self): from pyspark.ml.tests.test_tuning import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py index e6eef8a7de97a..31475299c7b98 100644 --- a/python/pyspark/ml/tests/test_wrapper.py +++ b/python/pyspark/ml/tests/test_wrapper.py @@ -21,7 +21,9 @@ from pyspark.ml.linalg import DenseVector, Vectors from pyspark.ml.regression import LinearRegression -from pyspark.ml.wrapper import _java2py, _py2java, JavaParams, JavaWrapper +from pyspark.ml.wrapper import ( # type: ignore[attr-defined] + _java2py, _py2java, JavaParams, JavaWrapper +) from pyspark.testing.mllibutils import MLlibTestCase from pyspark.testing.mlutils import SparkSessionTestCase from pyspark.testing.utils import eventually @@ -120,7 +122,7 @@ def test_new_java_array(self): from pyspark.ml.tests.test_wrapper import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/ml/tree.pyi b/python/pyspark/ml/tree.pyi new file mode 100644 index 0000000000000..ff6307654c569 --- /dev/null +++ b/python/pyspark/ml/tree.pyi @@ -0,0 +1,112 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Sequence +from pyspark.ml._typing import P, T + +from pyspark.ml.linalg import Vector +from pyspark import since as since # noqa: F401 +from pyspark.ml.common import inherit_doc as inherit_doc # noqa: F401 +from pyspark.ml.param import Param, Params as Params +from pyspark.ml.param.shared import ( # noqa: F401 + HasCheckpointInterval as HasCheckpointInterval, + HasMaxIter as HasMaxIter, + HasSeed as HasSeed, + HasStepSize as HasStepSize, + HasValidationIndicatorCol as HasValidationIndicatorCol, + HasWeightCol as HasWeightCol, + Param as Param, + TypeConverters as TypeConverters, +) +from pyspark.ml.wrapper import JavaPredictionModel as JavaPredictionModel + +class _DecisionTreeModel(JavaPredictionModel[T]): + @property + def numNodes(self) -> int: ... + @property + def depth(self) -> int: ... + @property + def toDebugString(self) -> str: ... + def predictLeaf(self, value: Vector) -> float: ... + +class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol): + leafCol: Param[str] + maxDepth: Param[int] + maxBins: Param[int] + minInstancesPerNode: Param[int] + minWeightFractionPerNode: Param[float] + minInfoGain: Param[float] + maxMemoryInMB: Param[int] + cacheNodeIds: Param[bool] + def __init__(self) -> None: ... + def setLeafCol(self: P, value: str) -> P: ... + def getLeafCol(self) -> str: ... + def getMaxDepth(self) -> int: ... + def getMaxBins(self) -> int: ... + def getMinInstancesPerNode(self) -> int: ... + def getMinInfoGain(self) -> float: ... + def getMaxMemoryInMB(self) -> int: ... + def getCacheNodeIds(self) -> bool: ... + +class _TreeEnsembleModel(JavaPredictionModel[T]): + @property + def trees(self) -> Sequence[_DecisionTreeModel]: ... + @property + def getNumTrees(self) -> int: ... + @property + def treeWeights(self) -> List[float]: ... + @property + def totalNumNodes(self) -> int: ... + @property + def toDebugString(self) -> str: ... + +class _TreeEnsembleParams(_DecisionTreeParams): + subsamplingRate: Param[float] + supportedFeatureSubsetStrategies: List[str] + featureSubsetStrategy: Param[str] + def __init__(self) -> None: ... + def getSubsamplingRate(self) -> float: ... + def getFeatureSubsetStrategy(self) -> str: ... + +class _RandomForestParams(_TreeEnsembleParams): + numTrees: Param[int] + bootstrap: Param[bool] + def __init__(self) -> None: ... + def getNumTrees(self) -> int: ... + def getBootstrap(self) -> bool: ... + +class _GBTParams( + _TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol +): + stepSize: Param[float] + validationTol: Param[float] + def getValidationTol(self) -> float: ... + +class _HasVarianceImpurity(Params): + supportedImpurities: List[str] + impurity: Param[str] + def __init__(self) -> None: ... + def getImpurity(self) -> str: ... + +class _TreeClassifierParams(Params): + supportedImpurities: List[str] + impurity: Param[str] + def __init__(self) -> None: ... + def getImpurity(self) -> str: ... + +class _TreeRegressorParams(_HasVarianceImpurity): ... diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi new file mode 100644 index 0000000000000..63cd75f0e1d74 --- /dev/null +++ b/python/pyspark/ml/tuning.pyi @@ -0,0 +1,185 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, List, Optional, Tuple, Type +from pyspark.ml._typing import ParamMap + +from pyspark.ml import Estimator, Model +from pyspark.ml.evaluation import Evaluator +from pyspark.ml.param import Param +from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed +from pyspark.ml.util import MLReader, MLReadable, MLWriter, MLWritable + +class ParamGridBuilder: + def __init__(self) -> None: ... + def addGrid(self, param: Param, values: List[Any]) -> ParamGridBuilder: ... + @overload + def baseOn(self, __args: ParamMap) -> ParamGridBuilder: ... + @overload + def baseOn(self, *args: Tuple[Param, Any]) -> ParamGridBuilder: ... + def build(self) -> List[ParamMap]: ... + +class _ValidatorParams(HasSeed): + estimator: Param[Estimator] + estimatorParamMaps: Param[List[ParamMap]] + evaluator: Param[Evaluator] + def getEstimator(self) -> Estimator: ... + def getEstimatorParamMaps(self) -> List[ParamMap]: ... + def getEvaluator(self) -> Evaluator: ... + +class _CrossValidatorParams(_ValidatorParams): + numFolds: Param[int] + foldCol: Param[str] + def __init__(self, *args: Any): ... + def getNumFolds(self) -> int: ... + def getFoldCol(self) -> str: ... + +class CrossValidator( + Estimator[CrossValidatorModel], + _CrossValidatorParams, + HasParallelism, + HasCollectSubModels, + MLReadable[CrossValidator], + MLWritable, +): + def __init__( + self, + *, + estimator: Optional[Estimator] = ..., + estimatorParamMaps: Optional[List[ParamMap]] = ..., + evaluator: Optional[Evaluator] = ..., + numFolds: int = ..., + seed: Optional[int] = ..., + parallelism: int = ..., + collectSubModels: bool = ..., + foldCol: str = ... + ) -> None: ... + def setParams( + self, + *, + estimator: Optional[Estimator] = ..., + estimatorParamMaps: Optional[List[ParamMap]] = ..., + evaluator: Optional[Evaluator] = ..., + numFolds: int = ..., + seed: Optional[int] = ..., + parallelism: int = ..., + collectSubModels: bool = ..., + foldCol: str = ... + ) -> CrossValidator: ... + def setEstimator(self, value: Estimator) -> CrossValidator: ... + def setEstimatorParamMaps(self, value: List[ParamMap]) -> CrossValidator: ... + def setEvaluator(self, value: Evaluator) -> CrossValidator: ... + def setNumFolds(self, value: int) -> CrossValidator: ... + def setFoldCol(self, value: str) -> CrossValidator: ... + def setSeed(self, value: int) -> CrossValidator: ... + def setParallelism(self, value: int) -> CrossValidator: ... + def setCollectSubModels(self, value: bool) -> CrossValidator: ... + def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidator: ... + def write(self) -> MLWriter: ... + @classmethod + def read(cls: Type[CrossValidator]) -> MLReader: ... + +class CrossValidatorModel( + Model, _CrossValidatorParams, MLReadable[CrossValidatorModel], MLWritable +): + bestModel: Model + avgMetrics: List[float] + subModels: List[List[Model]] + def __init__( + self, + bestModel: Model, + avgMetrics: List[float] = ..., + subModels: Optional[List[List[Model]]] = ..., + ) -> None: ... + def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ... + def write(self) -> MLWriter: ... + @classmethod + def read(cls: Type[CrossValidatorModel]) -> MLReader: ... + +class _TrainValidationSplitParams(_ValidatorParams): + trainRatio: Param[float] + def __init__(self, *args: Any): ... + def getTrainRatio(self) -> float: ... + +class TrainValidationSplit( + Estimator[TrainValidationSplitModel], + _TrainValidationSplitParams, + HasParallelism, + HasCollectSubModels, + MLReadable[TrainValidationSplit], + MLWritable, +): + def __init__( + self, + *, + estimator: Optional[Estimator] = ..., + estimatorParamMaps: Optional[List[ParamMap]] = ..., + evaluator: Optional[Evaluator] = ..., + trainRatio: float = ..., + parallelism: int = ..., + collectSubModels: bool = ..., + seed: Optional[int] = ... + ) -> None: ... + def setParams( + self, + *, + estimator: Optional[Estimator] = ..., + estimatorParamMaps: Optional[List[ParamMap]] = ..., + evaluator: Optional[Evaluator] = ..., + trainRatio: float = ..., + parallelism: int = ..., + collectSubModels: bool = ..., + seed: Optional[int] = ... + ) -> TrainValidationSplit: ... + def setEstimator(self, value: Estimator) -> TrainValidationSplit: ... + def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplit: ... + def setEvaluator(self, value: Evaluator) -> TrainValidationSplit: ... + def setTrainRatio(self, value: float) -> TrainValidationSplit: ... + def setSeed(self, value: int) -> TrainValidationSplit: ... + def setParallelism(self, value: int) -> TrainValidationSplit: ... + def setCollectSubModels(self, value: bool) -> TrainValidationSplit: ... + def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplit: ... + def write(self) -> MLWriter: ... + @classmethod + def read(cls: Type[TrainValidationSplit]) -> MLReader: ... + +class TrainValidationSplitModel( + Model, + _TrainValidationSplitParams, + MLReadable[TrainValidationSplitModel], + MLWritable, +): + bestModel: Model + validationMetrics: List[float] + subModels: List[Model] + def __init__( + self, + bestModel: Model, + validationMetrics: List[float] = ..., + subModels: Optional[List[Model]] = ..., + ) -> None: ... + def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ... + def setEstimatorParamMaps( + self, value: List[ParamMap] + ) -> TrainValidationSplitModel: ... + def setEvaluator(self, value: Evaluator) -> TrainValidationSplitModel: ... + def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplitModel: ... + def write(self) -> MLWriter: ... + @classmethod + def read(cls: Type[TrainValidationSplitModel]) -> MLReader: ... diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi new file mode 100644 index 0000000000000..d0781b2e26ed5 --- /dev/null +++ b/python/pyspark/ml/util.pyi @@ -0,0 +1,128 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Generic, Optional, Type, TypeVar, Union + +from pyspark import SparkContext as SparkContext, since as since # noqa: F401 +from pyspark.ml.common import inherit_doc as inherit_doc # noqa: F401 +from pyspark.sql import SparkSession as SparkSession +from pyspark.util import VersionUtils as VersionUtils # noqa: F401 + +S = TypeVar("S") +R = TypeVar("R", bound=MLReadable) + +class Identifiable: + uid: str + def __init__(self) -> None: ... + +class BaseReadWrite: + def __init__(self) -> None: ... + def session(self, sparkSession: SparkSession) -> Union[MLWriter, MLReader]: ... + @property + def sparkSession(self) -> SparkSession: ... + @property + def sc(self) -> SparkContext: ... + +class MLWriter(BaseReadWrite): + shouldOverwrite: bool = ... + def __init__(self) -> None: ... + def save(self, path: str) -> None: ... + def saveImpl(self, path: str) -> None: ... + def overwrite(self) -> MLWriter: ... + +class GeneralMLWriter(MLWriter): + source: str + def format(self, source: str) -> MLWriter: ... + +class JavaMLWriter(MLWriter): + def __init__(self, instance: JavaMLWritable) -> None: ... + def save(self, path: str) -> None: ... + def overwrite(self) -> JavaMLWriter: ... + def option(self, key: str, value: Any) -> JavaMLWriter: ... + def session(self, sparkSession: SparkSession) -> JavaMLWriter: ... + +class GeneralJavaMLWriter(JavaMLWriter): + def __init__(self, instance: MLWritable) -> None: ... + def format(self, source: str) -> GeneralJavaMLWriter: ... + +class MLWritable: + def write(self) -> MLWriter: ... + def save(self, path: str) -> None: ... + +class JavaMLWritable(MLWritable): + def write(self) -> JavaMLWriter: ... + +class GeneralJavaMLWritable(JavaMLWritable): + def write(self) -> GeneralJavaMLWriter: ... + +class MLReader(BaseReadWrite, Generic[R]): + def load(self, path: str) -> R: ... + +class JavaMLReader(MLReader[R]): + def __init__(self, clazz: Type[JavaMLReadable]) -> None: ... + def load(self, path: str) -> R: ... + def session(self, sparkSession: SparkSession) -> JavaMLReader[R]: ... + +class MLReadable(Generic[R]): + @classmethod + def read(cls: Type[R]) -> MLReader[R]: ... + @classmethod + def load(cls: Type[R], path: str) -> R: ... + +class JavaMLReadable(MLReadable[R]): + @classmethod + def read(cls: Type[R]) -> JavaMLReader[R]: ... + +class DefaultParamsWritable(MLWritable): + def write(self) -> MLWriter: ... + +class DefaultParamsWriter(MLWriter): + instance: DefaultParamsWritable + def __init__(self, instance: DefaultParamsWritable) -> None: ... + def saveImpl(self, path: str) -> None: ... + @staticmethod + def saveMetadata( + instance: DefaultParamsWritable, + path: str, + sc: SparkContext, + extraMetadata: Optional[Dict[str, Any]] = ..., + paramMap: Optional[Dict[str, Any]] = ..., + ) -> None: ... + +class DefaultParamsReadable(MLReadable[R]): + @classmethod + def read(cls: Type[R]) -> MLReader[R]: ... + +class DefaultParamsReader(MLReader[R]): + cls: Type[R] + def __init__(self, cls: Type[MLReadable]) -> None: ... + def load(self, path: str) -> R: ... + @staticmethod + def loadMetadata( + path: str, sc: SparkContext, expectedClassName: str = ... + ) -> Dict[str, Any]: ... + @staticmethod + def getAndSetParams(instance: R, metadata: Dict[str, Any]) -> None: ... + @staticmethod + def loadParamsInstance(path: str, sc: SparkContext) -> R: ... + +class HasTrainingSummary(Generic[S]): + @property + def hasSummary(self) -> bool: ... + @property + def summary(self) -> S: ... diff --git a/python/pyspark/ml/wrapper.pyi b/python/pyspark/ml/wrapper.pyi new file mode 100644 index 0000000000000..830224c177d1e --- /dev/null +++ b/python/pyspark/ml/wrapper.pyi @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import abc +from typing import Any, Optional +from pyspark.ml._typing import P, T, JM, ParamMap + +from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model +from pyspark.ml.base import _PredictorParams +from pyspark.ml.param import Param, Params + +class JavaWrapper: + def __init__(self, java_obj: Optional[Any] = ...) -> None: ... + def __del__(self) -> None: ... + +class JavaParams(JavaWrapper, Params, metaclass=abc.ABCMeta): + def copy(self: P, extra: Optional[ParamMap] = ...) -> P: ... + def clear(self, param: Param) -> None: ... + +class JavaEstimator(JavaParams, Estimator[JM], metaclass=abc.ABCMeta): ... +class JavaTransformer(JavaParams, Transformer, metaclass=abc.ABCMeta): ... + +class JavaModel(JavaTransformer, Model, metaclass=abc.ABCMeta): + def __init__(self, java_model: Optional[Any] = ...) -> None: ... + +class JavaPredictor( + Predictor[JM], JavaEstimator, _PredictorParams, metaclass=abc.ABCMeta +): ... + +class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams): + @property + def numFeatures(self) -> int: ... + def predict(self, value: T) -> float: ... diff --git a/python/pyspark/mllib/__init__.pyi b/python/pyspark/mllib/__init__.pyi new file mode 100644 index 0000000000000..83032c4580fc8 --- /dev/null +++ b/python/pyspark/mllib/__init__.pyi @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +# Names in __all__ with no definition: +# classification +# clustering +# feature +# fpm +# linalg +# random +# recommendation +# regression +# stat +# tree +# util diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi new file mode 100644 index 0000000000000..213a69996b0ad --- /dev/null +++ b/python/pyspark/mllib/_typing.pyi @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Tuple, Union +from pyspark.mllib.linalg import Vector +from numpy import ndarray # noqa: F401 + +VectorLike = Union[Vector, List[float], Tuple[float, ...]] diff --git a/python/pyspark/mllib/classification.pyi b/python/pyspark/mllib/classification.pyi new file mode 100644 index 0000000000000..c51882c87bfc2 --- /dev/null +++ b/python/pyspark/mllib/classification.pyi @@ -0,0 +1,151 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Optional, Union + +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib._typing import VectorLike +from pyspark.mllib.linalg import Vector +from pyspark.mllib.regression import LabeledPoint, LinearModel, StreamingLinearAlgorithm +from pyspark.mllib.util import Saveable, Loader +from pyspark.streaming.dstream import DStream + +from numpy import float64, ndarray # type: ignore[import] + +class LinearClassificationModel(LinearModel): + def __init__(self, weights: Vector, intercept: float) -> None: ... + def setThreshold(self, value: float) -> None: ... + @property + def threshold(self) -> Optional[float]: ... + def clearThreshold(self) -> None: ... + @overload + def predict(self, test: VectorLike) -> Union[int, float, float64]: ... + @overload + def predict(self, test: RDD[VectorLike]) -> RDD[Union[int, float]]: ... + +class LogisticRegressionModel(LinearClassificationModel): + def __init__( + self, weights: Vector, intercept: float, numFeatures: int, numClasses: int + ) -> None: ... + @property + def numFeatures(self) -> int: ... + @property + def numClasses(self) -> int: ... + @overload + def predict(self, x: VectorLike) -> Union[int, float]: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[Union[int, float]]: ... + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> LogisticRegressionModel: ... + +class LogisticRegressionWithSGD: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + step: float = ..., + miniBatchFraction: float = ..., + initialWeights: Optional[VectorLike] = ..., + regParam: float = ..., + regType: str = ..., + intercept: bool = ..., + validateData: bool = ..., + convergenceTol: float = ..., + ) -> LogisticRegressionModel: ... + +class LogisticRegressionWithLBFGS: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + initialWeights: Optional[VectorLike] = ..., + regParam: float = ..., + regType: str = ..., + intercept: bool = ..., + corrections: int = ..., + tolerance: float = ..., + validateData: bool = ..., + numClasses: int = ..., + ) -> LogisticRegressionModel: ... + +class SVMModel(LinearClassificationModel): + def __init__(self, weights: Vector, intercept: float) -> None: ... + @overload + def predict(self, x: VectorLike) -> float64: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ... + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> SVMModel: ... + +class SVMWithSGD: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + step: float = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initialWeights: Optional[VectorLike] = ..., + regType: str = ..., + intercept: bool = ..., + validateData: bool = ..., + convergenceTol: float = ..., + ) -> SVMModel: ... + +class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]): + labels: ndarray + pi: ndarray + theta: ndarray + def __init__(self, labels, pi, theta) -> None: ... + @overload + def predict(self, x: VectorLike) -> float64: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ... + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> NaiveBayesModel: ... + +class NaiveBayes: + @classmethod + def train(cls, data: RDD[VectorLike], lambda_: float = ...) -> NaiveBayesModel: ... + +class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm): + stepSize: float + numIterations: int + regParam: float + miniBatchFraction: float + convergenceTol: float + def __init__( + self, + stepSize: float = ..., + numIterations: int = ..., + miniBatchFraction: float = ..., + regParam: float = ..., + convergenceTol: float = ..., + ) -> None: ... + def setInitialWeights( + self, initialWeights: VectorLike + ) -> StreamingLogisticRegressionWithSGD: ... + def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ... diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi new file mode 100644 index 0000000000000..1c3eba17e201c --- /dev/null +++ b/python/pyspark/mllib/clustering.pyi @@ -0,0 +1,196 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import List, NamedTuple, Optional, Tuple, TypeVar + +import array + +from numpy import float64, int64, ndarray # type: ignore[import] +from py4j.java_gateway import JavaObject # type: ignore[import] + +from pyspark.mllib._typing import VectorLike +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.stat.distribution import MultivariateGaussian +from pyspark.mllib.util import Saveable, Loader, JavaLoader, JavaSaveable +from pyspark.streaming.dstream import DStream + +T = TypeVar("T") + +class BisectingKMeansModel(JavaModelWrapper): + centers: List[ndarray] + def __init__(self, java_model: JavaObject) -> None: ... + @property + def clusterCenters(self) -> List[ndarray]: ... + @property + def k(self) -> int: ... + @overload + def predict(self, x: VectorLike) -> int: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... + @overload + def computeCost(self, x: VectorLike) -> float: ... + @overload + def computeCost(self, x: RDD[VectorLike]) -> float: ... + +class BisectingKMeans: + @classmethod + def train( + self, + rdd: RDD[VectorLike], + k: int = ..., + maxIterations: int = ..., + minDivisibleClusterSize: float = ..., + seed: int = ..., + ) -> BisectingKMeansModel: ... + +class KMeansModel(Saveable, Loader[KMeansModel]): + centers: List[ndarray] + def __init__(self, centers: List[ndarray]) -> None: ... + @property + def clusterCenters(self) -> List[ndarray]: ... + @property + def k(self) -> int: ... + @overload + def predict(self, x: VectorLike) -> int: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... + def computeCost(self, rdd: RDD[VectorLike]) -> float: ... + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> KMeansModel: ... + +class KMeans: + @classmethod + def train( + cls, + rdd: RDD[VectorLike], + k: int, + maxIterations: int = ..., + initializationMode: str = ..., + seed: Optional[int] = ..., + initializationSteps: int = ..., + epsilon: float = ..., + initialModel: Optional[KMeansModel] = ..., + ) -> KMeansModel: ... + +class GaussianMixtureModel( + JavaModelWrapper, JavaSaveable, JavaLoader[GaussianMixtureModel] +): + @property + def weights(self) -> ndarray: ... + @property + def gaussians(self) -> List[MultivariateGaussian]: ... + @property + def k(self) -> int: ... + @overload + def predict(self, x: VectorLike) -> int64: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... + @overload + def predictSoft(self, x: VectorLike) -> ndarray: ... + @overload + def predictSoft(self, x: RDD[VectorLike]) -> RDD[array.array]: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> GaussianMixtureModel: ... + +class GaussianMixture: + @classmethod + def train( + cls, + rdd: RDD[VectorLike], + k: int, + convergenceTol: float = ..., + maxIterations: int = ..., + seed: Optional[int] = ..., + initialModel: Optional[GaussianMixtureModel] = ..., + ) -> GaussianMixtureModel: ... + +class PowerIterationClusteringModel( + JavaModelWrapper, JavaSaveable, JavaLoader[PowerIterationClusteringModel] +): + @property + def k(self) -> int: ... + def assignments(self) -> RDD[PowerIterationClustering.Assignment]: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> PowerIterationClusteringModel: ... + +class PowerIterationClustering: + @classmethod + def train( + cls, + rdd: RDD[Tuple[int, int, float]], + k: int, + maxIterations: int = ..., + initMode: str = ..., + ) -> PowerIterationClusteringModel: ... + class Assignment(NamedTuple("Assignment", [("id", int), ("cluster", int)])): ... + +class StreamingKMeansModel(KMeansModel): + def __init__(self, clusterCenters, clusterWeights) -> None: ... + @property + def clusterWeights(self) -> List[float64]: ... + centers: ndarray + def update( + self, data: RDD[VectorLike], decayFactor: float, timeUnit: str + ) -> StreamingKMeansModel: ... + +class StreamingKMeans: + def __init__( + self, k: int = ..., decayFactor: float = ..., timeUnit: str = ... + ) -> None: ... + def latestModel(self) -> StreamingKMeansModel: ... + def setK(self, k: int) -> StreamingKMeans: ... + def setDecayFactor(self, decayFactor: float) -> StreamingKMeans: ... + def setHalfLife(self, halfLife: float, timeUnit: str) -> StreamingKMeans: ... + def setInitialCenters( + self, centers: List[VectorLike], weights: List[float] + ) -> StreamingKMeans: ... + def setRandomCenters( + self, dim: int, weight: float, seed: int + ) -> StreamingKMeans: ... + def trainOn(self, dstream: DStream[VectorLike]) -> None: ... + def predictOn(self, dstream: DStream[VectorLike]) -> DStream[int]: ... + def predictOnValues( + self, dstream: DStream[Tuple[T, VectorLike]] + ) -> DStream[Tuple[T, int]]: ... + +class LDAModel(JavaModelWrapper, JavaSaveable, Loader[LDAModel]): + def topicsMatrix(self) -> ndarray: ... + def vocabSize(self) -> int: ... + def describeTopics( + self, maxTermsPerTopic: Optional[int] = ... + ) -> List[Tuple[List[int], List[float]]]: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> LDAModel: ... + +class LDA: + @classmethod + def train( + cls, + rdd: RDD[Tuple[int, VectorLike]], + k: int = ..., + maxIterations: int = ..., + docConcentration: float = ..., + topicConcentration: float = ..., + seed: Optional[int] = ..., + checkpointInterval: int = ..., + optimizer: str = ..., + ) -> LDAModel: ... diff --git a/python/pyspark/mllib/common.pyi b/python/pyspark/mllib/common.pyi new file mode 100644 index 0000000000000..1df308b91b5a1 --- /dev/null +++ b/python/pyspark/mllib/common.pyi @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def callJavaFunc(sc, func, *args): ... +def callMLlibFunc(name, *args): ... + +class JavaModelWrapper: + def __init__(self, java_model) -> None: ... + def __del__(self): ... + def call(self, name, *a): ... + +def inherit_doc(cls): ... diff --git a/python/pyspark/mllib/evaluation.pyi b/python/pyspark/mllib/evaluation.pyi new file mode 100644 index 0000000000000..03583784f0c3b --- /dev/null +++ b/python/pyspark/mllib/evaluation.pyi @@ -0,0 +1,94 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Tuple, TypeVar +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.linalg import Matrix + +T = TypeVar("T") + +class BinaryClassificationMetrics(JavaModelWrapper): + def __init__(self, scoreAndLabels: RDD[Tuple[float, float]]) -> None: ... + @property + def areaUnderROC(self) -> float: ... + @property + def areaUnderPR(self) -> float: ... + def unpersist(self) -> None: ... + +class RegressionMetrics(JavaModelWrapper): + def __init__(self, predictionAndObservations: RDD[Tuple[float, float]]) -> None: ... + @property + def explainedVariance(self) -> float: ... + @property + def meanAbsoluteError(self) -> float: ... + @property + def meanSquaredError(self) -> float: ... + @property + def rootMeanSquaredError(self) -> float: ... + @property + def r2(self) -> float: ... + +class MulticlassMetrics(JavaModelWrapper): + def __init__(self, predictionAndLabels: RDD[Tuple[float, float]]) -> None: ... + def confusionMatrix(self) -> Matrix: ... + def truePositiveRate(self, label: float) -> float: ... + def falsePositiveRate(self, label: float) -> float: ... + def precision(self, label: float = ...) -> float: ... + def recall(self, label: float = ...) -> float: ... + def fMeasure(self, label: float = ..., beta: Optional[float] = ...) -> float: ... + @property + def accuracy(self) -> float: ... + @property + def weightedTruePositiveRate(self) -> float: ... + @property + def weightedFalsePositiveRate(self) -> float: ... + @property + def weightedRecall(self) -> float: ... + @property + def weightedPrecision(self) -> float: ... + def weightedFMeasure(self, beta: Optional[float] = ...) -> float: ... + +class RankingMetrics(JavaModelWrapper): + def __init__(self, predictionAndLabels: RDD[Tuple[List[T], List[T]]]) -> None: ... + def precisionAt(self, k: int) -> float: ... + @property + def meanAveragePrecision(self) -> float: ... + def meanAveragePrecisionAt(self, k: int) -> float: ... + def ndcgAt(self, k: int) -> float: ... + def recallAt(self, k: int) -> float: ... + +class MultilabelMetrics(JavaModelWrapper): + def __init__( + self, predictionAndLabels: RDD[Tuple[List[float], List[float]]] + ) -> None: ... + def precision(self, label: Optional[float] = ...) -> float: ... + def recall(self, label: Optional[float] = ...) -> float: ... + def f1Measure(self, label: Optional[float] = ...) -> float: ... + @property + def microPrecision(self) -> float: ... + @property + def microRecall(self) -> float: ... + @property + def microF1Measure(self) -> float: ... + @property + def hammingLoss(self) -> float: ... + @property + def subsetAccuracy(self) -> float: ... + @property + def accuracy(self) -> float: ... diff --git a/python/pyspark/mllib/feature.pyi b/python/pyspark/mllib/feature.pyi new file mode 100644 index 0000000000000..9ccec36abd6ff --- /dev/null +++ b/python/pyspark/mllib/feature.pyi @@ -0,0 +1,167 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Iterable, Hashable, List, Tuple + +from pyspark.mllib._typing import VectorLike +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.linalg import Vector +from pyspark.mllib.regression import LabeledPoint +from pyspark.mllib.util import JavaLoader, JavaSaveable + +from py4j.java_collections import JavaMap # type: ignore[import] + +class VectorTransformer: + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... + +class Normalizer(VectorTransformer): + p: float + def __init__(self, p: float = ...) -> None: ... + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... + +class JavaVectorTransformer(JavaModelWrapper, VectorTransformer): + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... + +class StandardScalerModel(JavaVectorTransformer): + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... + def setWithMean(self, withMean: bool) -> StandardScalerModel: ... + def setWithStd(self, withStd: bool) -> StandardScalerModel: ... + @property + def withStd(self) -> bool: ... + @property + def withMean(self) -> bool: ... + @property + def std(self) -> Vector: ... + @property + def mean(self) -> Vector: ... + +class StandardScaler: + withMean: bool + withStd: bool + def __init__(self, withMean: bool = ..., withStd: bool = ...) -> None: ... + def fit(self, dataset: RDD[VectorLike]) -> StandardScalerModel: ... + +class ChiSqSelectorModel(JavaVectorTransformer): + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... + +class ChiSqSelector: + numTopFeatures: int + selectorType: str + percentile: float + fpr: float + fdr: float + fwe: float + def __init__( + self, + numTopFeatures: int = ..., + selectorType: str = ..., + percentile: float = ..., + fpr: float = ..., + fdr: float = ..., + fwe: float = ..., + ) -> None: ... + def setNumTopFeatures(self, numTopFeatures: int) -> ChiSqSelector: ... + def setPercentile(self, percentile: float) -> ChiSqSelector: ... + def setFpr(self, fpr: float) -> ChiSqSelector: ... + def setFdr(self, fdr: float) -> ChiSqSelector: ... + def setFwe(self, fwe: float) -> ChiSqSelector: ... + def setSelectorType(self, selectorType: str) -> ChiSqSelector: ... + def fit(self, data: RDD[LabeledPoint]) -> ChiSqSelectorModel: ... + +class PCAModel(JavaVectorTransformer): ... + +class PCA: + k: int + def __init__(self, k: int) -> None: ... + def fit(self, data: RDD[VectorLike]) -> PCAModel: ... + +class HashingTF: + numFeatures: int + binary: bool + def __init__(self, numFeatures: int = ...) -> None: ... + def setBinary(self, value: bool) -> HashingTF: ... + def indexOf(self, term: Hashable) -> int: ... + @overload + def transform(self, document: Iterable[Hashable]) -> Vector: ... + @overload + def transform(self, document: RDD[Iterable[Hashable]]) -> RDD[Vector]: ... + +class IDFModel(JavaVectorTransformer): + @overload + def transform(self, x: VectorLike) -> Vector: ... + @overload + def transform(self, x: RDD[VectorLike]) -> RDD[Vector]: ... + def idf(self) -> Vector: ... + def docFreq(self) -> List[int]: ... + def numDocs(self) -> int: ... + +class IDF: + minDocFreq: int + def __init__(self, minDocFreq: int = ...) -> None: ... + def fit(self, dataset: RDD[VectorLike]) -> IDFModel: ... + +class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader[Word2VecModel]): + def transform(self, word: str) -> Vector: ... # type: ignore + def findSynonyms(self, word: str, num: int) -> Iterable[Tuple[str, float]]: ... + def getVectors(self) -> JavaMap: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> Word2VecModel: ... + +class Word2Vec: + vectorSize: int + learningRate: float + numPartitions: int + numIterations: int + seed: int + minCount: int + windowSize: int + def __init__(self) -> None: ... + def setVectorSize(self, vectorSize: int) -> Word2Vec: ... + def setLearningRate(self, learningRate: float) -> Word2Vec: ... + def setNumPartitions(self, numPartitions: int) -> Word2Vec: ... + def setNumIterations(self, numIterations: int) -> Word2Vec: ... + def setSeed(self, seed: int) -> Word2Vec: ... + def setMinCount(self, minCount: int) -> Word2Vec: ... + def setWindowSize(self, windowSize: int) -> Word2Vec: ... + def fit(self, data: RDD[List[str]]) -> Word2VecModel: ... + +class ElementwiseProduct(VectorTransformer): + scalingVector: Vector + def __init__(self, scalingVector: Vector) -> None: ... + @overload + def transform(self, vector: VectorLike) -> Vector: ... + @overload + def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ... diff --git a/python/pyspark/mllib/fpm.pyi b/python/pyspark/mllib/fpm.pyi new file mode 100644 index 0000000000000..880baae1a91a5 --- /dev/null +++ b/python/pyspark/mllib/fpm.pyi @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Generic, List, TypeVar +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.util import JavaSaveable, JavaLoader + +T = TypeVar("T") + +class FPGrowthModel( + JavaModelWrapper, JavaSaveable, JavaLoader[FPGrowthModel], Generic[T] +): + def freqItemsets(self) -> RDD[FPGrowth.FreqItemset[T]]: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> FPGrowthModel: ... + +class FPGrowth: + @classmethod + def train( + cls, data: RDD[List[T]], minSupport: float = ..., numPartitions: int = ... + ) -> FPGrowthModel[T]: ... + class FreqItemset(Generic[T]): + items = ... # List[T] + freq = ... # int + +class PrefixSpanModel(JavaModelWrapper, Generic[T]): + def freqSequences(self) -> RDD[PrefixSpan.FreqSequence[T]]: ... + +class PrefixSpan: + @classmethod + def train( + cls, + data: RDD[List[List[T]]], + minSupport: float = ..., + maxPatternLength: int = ..., + maxLocalProjDBSize: int = ..., + ) -> PrefixSpanModel[T]: ... + class FreqSequence(tuple, Generic[T]): + sequence: List[T] + freq: int diff --git a/python/pyspark/mllib/linalg/__init__.pyi b/python/pyspark/mllib/linalg/__init__.pyi new file mode 100644 index 0000000000000..c0719c535c8f4 --- /dev/null +++ b/python/pyspark/mllib/linalg/__init__.pyi @@ -0,0 +1,273 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar, Union +from pyspark.ml import linalg as newlinalg +from pyspark.sql.types import StructType, UserDefinedType +from numpy import float64, ndarray # type: ignore[import] + +QT = TypeVar("QT") +RT = TypeVar("RT") + +class VectorUDT(UserDefinedType): + @classmethod + def sqlType(cls) -> StructType: ... + @classmethod + def module(cls) -> str: ... + @classmethod + def scalaUDT(cls) -> str: ... + def serialize( + self, obj: Vector + ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ... + def deserialize(self, datum: Any) -> Vector: ... + def simpleString(self) -> str: ... + +class MatrixUDT(UserDefinedType): + @classmethod + def sqlType(cls) -> StructType: ... + @classmethod + def module(cls) -> str: ... + @classmethod + def scalaUDT(cls) -> str: ... + def serialize( + self, obj + ) -> Tuple[ + int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool + ]: ... + def deserialize(self, datum: Any) -> Matrix: ... + def simpleString(self) -> str: ... + +class Vector: + __UDT__: VectorUDT + def toArray(self) -> ndarray: ... + def asML(self) -> newlinalg.Vector: ... + +class DenseVector(Vector): + array: ndarray + @overload + def __init__(self, *elements: float) -> None: ... + @overload + def __init__(self, __arr: bytes) -> None: ... + @overload + def __init__(self, __arr: Iterable[float]) -> None: ... + @staticmethod + def parse(s) -> DenseVector: ... + def __reduce__(self) -> Tuple[type, bytes]: ... + def numNonzeros(self) -> int: ... + def norm(self, p: Union[float, str]) -> float64: ... + def dot(self, other: Iterable[float]) -> float64: ... + def squared_distance(self, other: Iterable[float]) -> float64: ... + def toArray(self) -> ndarray: ... + def asML(self) -> newlinalg.DenseVector: ... + @property + def values(self) -> ndarray: ... + def __getitem__(self, item: int) -> float64: ... + def __len__(self) -> int: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + def __hash__(self) -> int: ... + def __getattr__(self, item: str) -> Any: ... + def __neg__(self) -> DenseVector: ... + def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ... + +class SparseVector(Vector): + size: int + indices: ndarray + values: ndarray + @overload + def __init__(self, size: int, *args: Tuple[int, float]) -> None: ... + @overload + def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ... + @overload + def __init__( + self, size: int, __indices: Iterable[int], __values: Iterable[float] + ) -> None: ... + @overload + def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ... + @overload + def __init__(self, size: int, __map: Dict[int, float]) -> None: ... + def numNonzeros(self) -> int: ... + def norm(self, p: Union[float, str]) -> float64: ... + def __reduce__(self): ... + @staticmethod + def parse(s: str) -> SparseVector: ... + def dot(self, other: Iterable[float]) -> float64: ... + def squared_distance(self, other: Iterable[float]) -> float64: ... + def toArray(self) -> ndarray: ... + def asML(self) -> newlinalg.SparseVector: ... + def __len__(self) -> int: ... + def __eq__(self, other) -> bool: ... + def __getitem__(self, index: int) -> float64: ... + def __ne__(self, other) -> bool: ... + def __hash__(self) -> int: ... + +class Vectors: + @overload + @staticmethod + def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ... + @overload + @staticmethod + def sparse( + size: int, __indices: Iterable[int], __values: Iterable[float] + ) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ... + @overload + @staticmethod + def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ... + @overload + @staticmethod + def dense(self, *elements: float) -> DenseVector: ... + @overload + @staticmethod + def dense(self, __arr: bytes) -> DenseVector: ... + @overload + @staticmethod + def dense(self, __arr: Iterable[float]) -> DenseVector: ... + @staticmethod + def fromML(vec: newlinalg.DenseVector) -> DenseVector: ... + @staticmethod + def stringify(vector: Vector) -> str: ... + @staticmethod + def squared_distance(v1: Vector, v2: Vector) -> float64: ... + @staticmethod + def norm(vector: Vector, p: Union[float, str]) -> float64: ... + @staticmethod + def parse(s: str) -> Vector: ... + @staticmethod + def zeros(size: int) -> DenseVector: ... + +class Matrix: + __UDT__: MatrixUDT + numRows: int + numCols: int + isTransposed: bool + def __init__( + self, numRows: int, numCols: int, isTransposed: bool = ... + ) -> None: ... + def toArray(self): ... + def asML(self): ... + +class DenseMatrix(Matrix): + values: Any + @overload + def __init__( + self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ... + ) -> None: ... + @overload + def __init__( + self, + numRows: int, + numCols: int, + values: Iterable[float], + isTransposed: bool = ..., + ) -> None: ... + def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ... + def toArray(self) -> ndarray: ... + def toSparse(self) -> SparseMatrix: ... + def asML(self) -> newlinalg.DenseMatrix: ... + def __getitem__(self, indices: Tuple[int, int]) -> float64: ... + def __eq__(self, other) -> bool: ... + +class SparseMatrix(Matrix): + colPtrs: ndarray + rowIndices: ndarray + values: ndarray + @overload + def __init__( + self, + numRows: int, + numCols: int, + colPtrs: bytes, + rowIndices: bytes, + values: bytes, + isTransposed: bool = ..., + ) -> None: ... + @overload + def __init__( + self, + numRows: int, + numCols: int, + colPtrs: Iterable[int], + rowIndices: Iterable[int], + values: Iterable[float], + isTransposed: bool = ..., + ) -> None: ... + def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ... + def __getitem__(self, indices: Tuple[int, int]) -> float64: ... + def toArray(self) -> ndarray: ... + def toDense(self) -> DenseMatrix: ... + def asML(self) -> newlinalg.SparseMatrix: ... + def __eq__(self, other) -> bool: ... + +class Matrices: + @overload + @staticmethod + def dense( + numRows: int, numCols: int, values: bytes, isTransposed: bool = ... + ) -> DenseMatrix: ... + @overload + @staticmethod + def dense( + numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ... + ) -> DenseMatrix: ... + @overload + @staticmethod + def sparse( + numRows: int, + numCols: int, + colPtrs: bytes, + rowIndices: bytes, + values: bytes, + isTransposed: bool = ..., + ) -> SparseMatrix: ... + @overload + @staticmethod + def sparse( + numRows: int, + numCols: int, + colPtrs: Iterable[int], + rowIndices: Iterable[int], + values: Iterable[float], + isTransposed: bool = ..., + ) -> SparseMatrix: ... + @staticmethod + def fromML(mat: newlinalg.Matrix) -> Matrix: ... + +class QRDecomposition(Generic[QT, RT]): + def __init__(self, Q: QT, R: RT) -> None: ... + @property + def Q(self) -> QT: ... + @property + def R(self) -> RT: ... diff --git a/python/pyspark/mllib/linalg/distributed.pyi b/python/pyspark/mllib/linalg/distributed.pyi new file mode 100644 index 0000000000000..238c4ea32e4e8 --- /dev/null +++ b/python/pyspark/mllib/linalg/distributed.pyi @@ -0,0 +1,147 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union +from pyspark.rdd import RDD +from pyspark.storagelevel import StorageLevel +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition +from pyspark.mllib.stat import MultivariateStatisticalSummary +from numpy import ndarray # noqa: F401 + +VectorLike = Union[Vector, Sequence[Union[float, int]]] + +UT = TypeVar("UT") +VT = TypeVar("VT") + +class DistributedMatrix: + def numRows(self) -> int: ... + def numCols(self) -> int: ... + +class RowMatrix(DistributedMatrix): + def __init__( + self, rows: RDD[Vector], numRows: int = ..., numCols: int = ... + ) -> None: ... + @property + def rows(self) -> RDD[Vector]: ... + def numRows(self) -> int: ... + def numCols(self) -> int: ... + def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary: ... + def computeCovariance(self) -> Matrix: ... + def computeGramianMatrix(self) -> Matrix: ... + def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix: ... + def tallSkinnyQR( + self, computeQ: bool = ... + ) -> QRDecomposition[RowMatrix, Matrix]: ... + def computeSVD( + self, k: int, computeU: bool = ..., rCond: float = ... + ) -> SingularValueDecomposition[RowMatrix, Matrix]: ... + def computePrincipalComponents(self, k: int) -> Matrix: ... + def multiply(self, matrix: Matrix) -> RowMatrix: ... + +class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]): + @property + def U(self) -> Optional[UT]: ... + @property + def s(self) -> Vector: ... + @property + def V(self) -> VT: ... + +class IndexedRow: + index: int + vector: VectorLike + def __init__(self, index: int, vector: VectorLike) -> None: ... + +class IndexedRowMatrix(DistributedMatrix): + def __init__( + self, + rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]], + numRows: int = ..., + numCols: int = ..., + ) -> None: ... + @property + def rows(self) -> RDD[IndexedRow]: ... + def numRows(self) -> int: ... + def numCols(self) -> int: ... + def columnSimilarities(self) -> CoordinateMatrix: ... + def computeGramianMatrix(self) -> Matrix: ... + def toRowMatrix(self) -> RowMatrix: ... + def toCoordinateMatrix(self) -> CoordinateMatrix: ... + def toBlockMatrix( + self, rowsPerBlock: int = ..., colsPerBlock: int = ... + ) -> BlockMatrix: ... + def computeSVD( + self, k: int, computeU: bool = ..., rCond: float = ... + ) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ... + def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ... + +class MatrixEntry: + i: int + j: int + value: float + def __init__(self, i: int, j: int, value: float) -> None: ... + +class CoordinateMatrix(DistributedMatrix): + def __init__( + self, + entries: RDD[Union[Tuple[int, int, float], MatrixEntry]], + numRows: int = ..., + numCols: int = ..., + ) -> None: ... + @property + def entries(self) -> RDD[MatrixEntry]: ... + def numRows(self) -> int: ... + def numCols(self) -> int: ... + def transpose(self) -> CoordinateMatrix: ... + def toRowMatrix(self) -> RowMatrix: ... + def toIndexedRowMatrix(self) -> IndexedRowMatrix: ... + def toBlockMatrix( + self, rowsPerBlock: int = ..., colsPerBlock: int = ... + ) -> BlockMatrix: ... + +class BlockMatrix(DistributedMatrix): + def __init__( + self, + blocks: RDD[Tuple[Tuple[int, int], Matrix]], + rowsPerBlock: int, + colsPerBlock: int, + numRows: int = ..., + numCols: int = ..., + ) -> None: ... + @property + def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ... + @property + def rowsPerBlock(self) -> int: ... + @property + def colsPerBlock(self) -> int: ... + @property + def numRowBlocks(self) -> int: ... + @property + def numColBlocks(self) -> int: ... + def numRows(self) -> int: ... + def numCols(self) -> int: ... + def cache(self) -> BlockMatrix: ... + def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ... + def validate(self) -> None: ... + def add(self, other: BlockMatrix) -> BlockMatrix: ... + def subtract(self, other: BlockMatrix) -> BlockMatrix: ... + def multiply(self, other: BlockMatrix) -> BlockMatrix: ... + def transpose(self) -> BlockMatrix: ... + def toLocalMatrix(self) -> Matrix: ... + def toIndexedRowMatrix(self) -> IndexedRowMatrix: ... + def toCoordinateMatrix(self) -> CoordinateMatrix: ... diff --git a/python/pyspark/mllib/random.pyi b/python/pyspark/mllib/random.pyi new file mode 100644 index 0000000000000..dc5f4701614da --- /dev/null +++ b/python/pyspark/mllib/random.pyi @@ -0,0 +1,126 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Optional +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib.linalg import Vector + +class RandomRDDs: + @staticmethod + def uniformRDD( + sc: SparkContext, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def normalRDD( + sc: SparkContext, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def logNormalRDD( + sc: SparkContext, + mean: float, + std: float, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def poissonRDD( + sc: SparkContext, + mean: float, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def exponentialRDD( + sc: SparkContext, + mean: float, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def gammaRDD( + sc: SparkContext, + shape: float, + scale: float, + size: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[float]: ... + @staticmethod + def uniformVectorRDD( + sc: SparkContext, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... + @staticmethod + def normalVectorRDD( + sc: SparkContext, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... + @staticmethod + def logNormalVectorRDD( + sc: SparkContext, + mean: float, + std, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... + @staticmethod + def poissonVectorRDD( + sc: SparkContext, + mean: float, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... + @staticmethod + def exponentialVectorRDD( + sc: SparkContext, + mean: float, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... + @staticmethod + def gammaVectorRDD( + sc: SparkContext, + shape: float, + scale: float, + numRows: int, + numCols: int, + numPartitions: Optional[int] = ..., + seed: Optional[int] = ..., + ) -> RDD[Vector]: ... diff --git a/python/pyspark/mllib/recommendation.pyi b/python/pyspark/mllib/recommendation.pyi new file mode 100644 index 0000000000000..e2f15494209e9 --- /dev/null +++ b/python/pyspark/mllib/recommendation.pyi @@ -0,0 +1,75 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Tuple, Union + +import array +from collections import namedtuple + +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.util import JavaLoader, JavaSaveable + +class Rating(namedtuple("Rating", ["user", "product", "rating"])): + def __reduce__(self): ... + +class MatrixFactorizationModel( + JavaModelWrapper, JavaSaveable, JavaLoader[MatrixFactorizationModel] +): + def predict(self, user: int, product: int) -> float: ... + def predictAll(self, user_product: RDD[Tuple[int, int]]) -> RDD[Rating]: ... + def userFeatures(self) -> RDD[Tuple[int, array.array]]: ... + def productFeatures(self) -> RDD[Tuple[int, array.array]]: ... + def recommendUsers(self, product: int, num: int) -> List[Rating]: ... + def recommendProducts(self, user: int, num: int) -> List[Rating]: ... + def recommendProductsForUsers( + self, num: int + ) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ... + def recommendUsersForProducts( + self, num: int + ) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ... + @property + def rank(self) -> int: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> MatrixFactorizationModel: ... + +class ALS: + @classmethod + def train( + cls, + ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]], + rank: int, + iterations: int = ..., + lambda_: float = ..., + blocks: int = ..., + nonnegative: bool = ..., + seed: Optional[int] = ..., + ) -> MatrixFactorizationModel: ... + @classmethod + def trainImplicit( + cls, + ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]], + rank: int, + iterations: int = ..., + lambda_: float = ..., + blocks: int = ..., + alpha: float = ..., + nonnegative: bool = ..., + seed: Optional[int] = ..., + ) -> MatrixFactorizationModel: ... diff --git a/python/pyspark/mllib/regression.pyi b/python/pyspark/mllib/regression.pyi new file mode 100644 index 0000000000000..0283378b98cf3 --- /dev/null +++ b/python/pyspark/mllib/regression.pyi @@ -0,0 +1,155 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Iterable, Optional, Tuple, TypeVar +from pyspark.rdd import RDD +from pyspark.mllib._typing import VectorLike +from pyspark.context import SparkContext +from pyspark.mllib.linalg import Vector +from pyspark.mllib.util import Saveable, Loader +from pyspark.streaming.dstream import DStream +from numpy import ndarray # type: ignore[import] + +K = TypeVar("K") + +class LabeledPoint: + label: int + features: Vector + def __init__(self, label: float, features: Iterable[float]) -> None: ... + def __reduce__(self) -> Tuple[type, Tuple[bytes]]: ... + +class LinearModel: + def __init__(self, weights: Vector, intercept: float) -> None: ... + @property + def weights(self) -> Vector: ... + @property + def intercept(self) -> float: ... + +class LinearRegressionModelBase(LinearModel): + @overload + def predict(self, x: Vector) -> float: ... + @overload + def predict(self, x: RDD[Vector]) -> RDD[float]: ... + +class LinearRegressionModel(LinearRegressionModelBase): + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> LinearRegressionModel: ... + +class LinearRegressionWithSGD: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + step: float = ..., + miniBatchFraction: float = ..., + initialWeights: Optional[VectorLike] = ..., + regParam: float = ..., + regType: Optional[str] = ..., + intercept: bool = ..., + validateData: bool = ..., + convergenceTol: float = ..., + ) -> LinearRegressionModel: ... + +class LassoModel(LinearRegressionModelBase): + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> LassoModel: ... + +class LassoWithSGD: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + step: float = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initialWeights: Optional[VectorLike] = ..., + intercept: bool = ..., + validateData: bool = ..., + convergenceTol: float = ..., + ) -> LassoModel: ... + +class RidgeRegressionModel(LinearRegressionModelBase): + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> RidgeRegressionModel: ... + +class RidgeRegressionWithSGD: + @classmethod + def train( + cls, + data: RDD[LabeledPoint], + iterations: int = ..., + step: float = ..., + regParam: float = ..., + miniBatchFraction: float = ..., + initialWeights: Optional[VectorLike] = ..., + intercept: bool = ..., + validateData: bool = ..., + convergenceTol: float = ..., + ) -> RidgeRegressionModel: ... + +class IsotonicRegressionModel(Saveable, Loader[IsotonicRegressionModel]): + boundaries: ndarray + predictions: ndarray + isotonic: bool + def __init__( + self, boundaries: ndarray, predictions: ndarray, isotonic: bool + ) -> None: ... + @overload + def predict(self, x: Vector) -> ndarray: ... + @overload + def predict(self, x: RDD[Vector]) -> RDD[ndarray]: ... + def save(self, sc: SparkContext, path: str) -> None: ... + @classmethod + def load(cls, sc: SparkContext, path: str) -> IsotonicRegressionModel: ... + +class IsotonicRegression: + @classmethod + def train( + cls, data: RDD[VectorLike], isotonic: bool = ... + ) -> IsotonicRegressionModel: ... + +class StreamingLinearAlgorithm: + def __init__(self, model: LinearModel) -> None: ... + def latestModel(self) -> LinearModel: ... + def predictOn(self, dstream: DStream[VectorLike]) -> DStream[float]: ... + def predictOnValues( + self, dstream: DStream[Tuple[K, VectorLike]] + ) -> DStream[Tuple[K, float]]: ... + +class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm): + stepSize: float + numIterations: int + miniBatchFraction: float + convergenceTol: float + def __init__( + self, + stepSize: float = ..., + numIterations: int = ..., + miniBatchFraction: float = ..., + convergenceTol: float = ..., + ) -> None: ... + def setInitialWeights( + self, initialWeights: VectorLike + ) -> StreamingLinearRegressionWithSGD: ... + def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ... diff --git a/python/pyspark/mllib/stat/KernelDensity.pyi b/python/pyspark/mllib/stat/KernelDensity.pyi new file mode 100644 index 0000000000000..efc70c9470dbe --- /dev/null +++ b/python/pyspark/mllib/stat/KernelDensity.pyi @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Iterable +from pyspark.rdd import RDD +from numpy import ndarray # type: ignore[import] + +class KernelDensity: + def __init__(self) -> None: ... + def setBandwidth(self, bandwidth: float) -> None: ... + def setSample(self, sample: RDD[float]) -> None: ... + def estimate(self, points: Iterable[float]) -> ndarray: ... diff --git a/python/pyspark/mllib/stat/__init__.pyi b/python/pyspark/mllib/stat/__init__.pyi new file mode 100644 index 0000000000000..bdd080a08cd56 --- /dev/null +++ b/python/pyspark/mllib/stat/__init__.pyi @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.mllib.stat.KernelDensity import ( # noqa: F401 + KernelDensity as KernelDensity, +) +from pyspark.mllib.stat._statistics import ( # noqa: F401 + MultivariateStatisticalSummary as MultivariateStatisticalSummary, + Statistics as Statistics, +) +from pyspark.mllib.stat.distribution import ( # noqa: F401 + MultivariateGaussian as MultivariateGaussian, +) +from pyspark.mllib.stat.test import ChiSqTestResult as ChiSqTestResult # noqa: F401 diff --git a/python/pyspark/mllib/stat/_statistics.pyi b/python/pyspark/mllib/stat/_statistics.pyi new file mode 100644 index 0000000000000..4d2701d486881 --- /dev/null +++ b/python/pyspark/mllib/stat/_statistics.pyi @@ -0,0 +1,69 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, overload, Union +from typing_extensions import Literal + +from numpy import ndarray # type: ignore[import] + +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.linalg import Vector, Matrix +from pyspark.mllib.regression import LabeledPoint +from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult +from pyspark.rdd import RDD + +CorrelationMethod = Union[Literal["spearman"], Literal["pearson"]] + +class MultivariateStatisticalSummary(JavaModelWrapper): + def mean(self) -> ndarray: ... + def variance(self) -> ndarray: ... + def count(self) -> int: ... + def numNonzeros(self) -> ndarray: ... + def max(self) -> ndarray: ... + def min(self) -> ndarray: ... + def normL1(self) -> ndarray: ... + def normL2(self) -> ndarray: ... + +class Statistics: + @staticmethod + def colStats(rdd: RDD[Vector]) -> MultivariateStatisticalSummary: ... + @overload + @staticmethod + def corr( + x: RDD[Vector], *, method: Optional[CorrelationMethod] = ... + ) -> Matrix: ... + @overload + @staticmethod + def corr( + x: RDD[float], y: RDD[float], method: Optional[CorrelationMethod] = ... + ) -> float: ... + @overload + @staticmethod + def chiSqTest(observed: Matrix) -> ChiSqTestResult: ... + @overload + @staticmethod + def chiSqTest( + observed: Vector, expected: Optional[Vector] = ... + ) -> ChiSqTestResult: ... + @overload + @staticmethod + def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: ... + @staticmethod + def kolmogorovSmirnovTest( + data, distName: Literal["norm"] = ..., *params: float + ) -> KolmogorovSmirnovTestResult: ... diff --git a/python/pyspark/mllib/stat/distribution.pyi b/python/pyspark/mllib/stat/distribution.pyi new file mode 100644 index 0000000000000..8bb93f91b07b5 --- /dev/null +++ b/python/pyspark/mllib/stat/distribution.pyi @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import NamedTuple + +from pyspark.mllib.linalg import Vector, Matrix + +class MultivariateGaussian(NamedTuple): + mu: Vector + sigma: Matrix diff --git a/python/pyspark/mllib/stat/test.pyi b/python/pyspark/mllib/stat/test.pyi new file mode 100644 index 0000000000000..a65f8e40e87d8 --- /dev/null +++ b/python/pyspark/mllib/stat/test.pyi @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Generic, Tuple, TypeVar + +from pyspark.mllib.common import JavaModelWrapper + +DF = TypeVar("DF", int, float, Tuple[int, ...], Tuple[float, ...]) + +class TestResult(JavaModelWrapper, Generic[DF]): + @property + def pValue(self) -> float: ... + @property + def degreesOfFreedom(self) -> DF: ... + @property + def statistic(self) -> float: ... + @property + def nullHypothesis(self) -> str: ... + +class ChiSqTestResult(TestResult[int]): + @property + def method(self) -> str: ... + +class KolmogorovSmirnovTestResult(TestResult[int]): ... diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py index 27a340068a52a..89d09fae5cfbc 100644 --- a/python/pyspark/mllib/tests/test_algorithms.py +++ b/python/pyspark/mllib/tests/test_algorithms.py @@ -295,7 +295,7 @@ def test_fpgrowth(self): from pyspark.mllib.tests.test_algorithms import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tests/test_feature.py b/python/pyspark/mllib/tests/test_feature.py index 165c1466ddfa8..7fba83b3ea35f 100644 --- a/python/pyspark/mllib/tests/test_feature.py +++ b/python/pyspark/mllib/tests/test_feature.py @@ -185,7 +185,7 @@ def test_pca(self): from pyspark.mllib.tests.test_feature import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py index 0e25836599307..a8303ba4341f3 100644 --- a/python/pyspark/mllib/tests/test_linalg.py +++ b/python/pyspark/mllib/tests/test_linalg.py @@ -22,8 +22,10 @@ import pyspark.ml.linalg as newlinalg from pyspark.serializers import PickleSerializer -from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector, \ +from pyspark.mllib.linalg import ( # type: ignore[attr-defined] + Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector, DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT +) from pyspark.mllib.linalg.distributed import RowMatrix, IndexedRowMatrix from pyspark.mllib.regression import LabeledPoint from pyspark.sql import Row @@ -641,7 +643,7 @@ def test_regression(self): from pyspark.mllib.tests.test_linalg import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tests/test_stat.py b/python/pyspark/mllib/tests/test_stat.py index 6ed0589387a46..414106fe51cc8 100644 --- a/python/pyspark/mllib/tests/test_stat.py +++ b/python/pyspark/mllib/tests/test_stat.py @@ -180,7 +180,7 @@ def test_R_implementation_equivalence(self): from pyspark.mllib.tests.test_stat import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py index 666f6f4d8628b..b94fb2778d88d 100644 --- a/python/pyspark/mllib/tests/test_streaming_algorithms.py +++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py @@ -469,7 +469,7 @@ def condition(): from pyspark.mllib.tests.test_streaming_algorithms import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tests/test_util.py b/python/pyspark/mllib/tests/test_util.py index 12578e417bcdf..2be3f17069fd4 100644 --- a/python/pyspark/mllib/tests/test_util.py +++ b/python/pyspark/mllib/tests/test_util.py @@ -19,7 +19,7 @@ import tempfile import unittest -from pyspark.mllib.common import _to_java_object_rdd +from pyspark.mllib.common import _to_java_object_rdd # type: ignore[attr-defined] from pyspark.mllib.util import LinearDataGenerator from pyspark.mllib.util import MLUtils from pyspark.mllib.linalg import SparseVector, DenseVector, Vectors @@ -97,7 +97,7 @@ def test_to_java_object_rdd(self): # SPARK-6660 from pyspark.mllib.tests.test_util import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/mllib/tree.pyi b/python/pyspark/mllib/tree.pyi new file mode 100644 index 0000000000000..511afdeb063d9 --- /dev/null +++ b/python/pyspark/mllib/tree.pyi @@ -0,0 +1,126 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Dict, Optional, Tuple +from pyspark.mllib._typing import VectorLike +from pyspark.rdd import RDD +from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.regression import LabeledPoint +from pyspark.mllib.util import JavaLoader, JavaSaveable + +class TreeEnsembleModel(JavaModelWrapper, JavaSaveable): + @overload + def predict(self, x: VectorLike) -> float: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ... + def numTrees(self) -> int: ... + def totalNumNodes(self) -> int: ... + def toDebugString(self) -> str: ... + +class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader[DecisionTreeModel]): + @overload + def predict(self, x: VectorLike) -> float: ... + @overload + def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ... + def numNodes(self) -> int: ... + def depth(self) -> int: ... + def toDebugString(self) -> str: ... + +class DecisionTree: + @classmethod + def trainClassifier( + cls, + data: RDD[LabeledPoint], + numClasses: int, + categoricalFeaturesInfo: Dict[int, int], + impurity: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + ) -> DecisionTreeModel: ... + @classmethod + def trainRegressor( + cls, + data: RDD[LabeledPoint], + categoricalFeaturesInfo: Dict[int, int], + impurity: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + minInstancesPerNode: int = ..., + minInfoGain: float = ..., + ) -> DecisionTreeModel: ... + +class RandomForestModel(TreeEnsembleModel, JavaLoader[RandomForestModel]): ... + +class RandomForest: + supportedFeatureSubsetStrategies: Tuple[str, ...] + @classmethod + def trainClassifier( + cls, + data: RDD[LabeledPoint], + numClasses: int, + categoricalFeaturesInfo: Dict[int, int], + numTrees: int, + featureSubsetStrategy: str = ..., + impurity: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + seed: Optional[int] = ..., + ) -> RandomForestModel: ... + @classmethod + def trainRegressor( + cls, + data: RDD[LabeledPoint], + categoricalFeaturesInfo: Dict[int, int], + numTrees: int, + featureSubsetStrategy: str = ..., + impurity: str = ..., + maxDepth: int = ..., + maxBins: int = ..., + seed: Optional[int] = ..., + ) -> RandomForestModel: ... + +class GradientBoostedTreesModel( + TreeEnsembleModel, JavaLoader[GradientBoostedTreesModel] +): ... + +class GradientBoostedTrees: + @classmethod + def trainClassifier( + cls, + data: RDD[LabeledPoint], + categoricalFeaturesInfo: Dict[int, int], + loss: str = ..., + numIterations: int = ..., + learningRate: float = ..., + maxDepth: int = ..., + maxBins: int = ..., + ) -> GradientBoostedTreesModel: ... + @classmethod + def trainRegressor( + cls, + data: RDD[LabeledPoint], + categoricalFeaturesInfo: Dict[int, int], + loss: str = ..., + numIterations: int = ..., + learningRate: float = ..., + maxDepth: int = ..., + maxBins: int = ..., + ) -> GradientBoostedTreesModel: ... diff --git a/python/pyspark/mllib/util.pyi b/python/pyspark/mllib/util.pyi new file mode 100644 index 0000000000000..265f765ee263a --- /dev/null +++ b/python/pyspark/mllib/util.pyi @@ -0,0 +1,90 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Generic, List, Optional, TypeVar + +from pyspark.mllib._typing import VectorLike +from pyspark.context import SparkContext +from pyspark.mllib.linalg import Vector +from pyspark.mllib.regression import LabeledPoint +from pyspark.rdd import RDD +from pyspark.sql.dataframe import DataFrame + +T = TypeVar("T") + +class MLUtils: + @staticmethod + def loadLibSVMFile( + sc: SparkContext, + path: str, + numFeatures: int = ..., + minPartitions: Optional[int] = ..., + ) -> RDD[LabeledPoint]: ... + @staticmethod + def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: str) -> None: ... + @staticmethod + def loadLabeledPoints( + sc: SparkContext, path: str, minPartitions: Optional[int] = ... + ) -> RDD[LabeledPoint]: ... + @staticmethod + def appendBias(data: Vector) -> Vector: ... + @staticmethod + def loadVectors(sc: SparkContext, path: str) -> RDD[Vector]: ... + @staticmethod + def convertVectorColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ... + @staticmethod + def convertVectorColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ... + @staticmethod + def convertMatrixColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ... + @staticmethod + def convertMatrixColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ... + +class Saveable: + def save(self, sc: SparkContext, path: str) -> None: ... + +class JavaSaveable(Saveable): + def save(self, sc: SparkContext, path: str) -> None: ... + +class Loader(Generic[T]): + @classmethod + def load(cls, sc: SparkContext, path: str) -> T: ... + +class JavaLoader(Loader[T]): + @classmethod + def load(cls, sc: SparkContext, path: str) -> T: ... + +class LinearDataGenerator: + @staticmethod + def generateLinearInput( + intercept: float, + weights: VectorLike, + xMean: VectorLike, + xVariance: VectorLike, + nPoints: int, + seed: int, + eps: float, + ) -> List[LabeledPoint]: ... + @staticmethod + def generateLinearRDD( + sc: SparkContext, + nexamples: int, + nfeatures: int, + eps: float, + nParts: int = ..., + intercept: float = ..., + ) -> RDD[LabeledPoint]: ... diff --git a/python/pyspark/profiler.pyi b/python/pyspark/profiler.pyi new file mode 100644 index 0000000000000..7276da529fa17 --- /dev/null +++ b/python/pyspark/profiler.pyi @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable, List, Optional, Tuple, Type + +import pstats + +from pyspark.accumulators import AccumulatorParam +from pyspark.context import SparkContext + +class ProfilerCollector: + profiler_cls: Type[Profiler] + profile_dump_path: Optional[str] + profilers: List[Tuple[int, Profiler, bool]] + def __init__( + self, profiler_cls: Type[Profiler], dump_path: Optional[str] = ... + ) -> None: ... + def new_profiler(self, ctx: SparkContext) -> Profiler: ... + def add_profiler(self, id: int, profiler: Profiler) -> None: ... + def dump_profiles(self, path: str) -> None: ... + def show_profiles(self) -> None: ... + +class Profiler: + def __init__(self, ctx: SparkContext) -> None: ... + def profile(self, func: Callable[[], Any]) -> None: ... + def stats(self) -> pstats.Stats: ... + def show(self, id: int) -> None: ... + def dump(self, id: int, path: str) -> None: ... + +class PStatsParam(AccumulatorParam): + @staticmethod + def zero(value: pstats.Stats) -> None: ... + @staticmethod + def addInPlace( + value1: Optional[pstats.Stats], value2: Optional[pstats.Stats] + ) -> Optional[pstats.Stats]: ... + +class BasicProfiler(Profiler): + def __init__(self, ctx: SparkContext) -> None: ... + def profile(self, func: Callable[[], Any]) -> None: ... + def stats(self) -> pstats.Stats: ... diff --git a/python/pyspark/py.typed b/python/pyspark/py.typed new file mode 100644 index 0000000000000..b648ac9233330 --- /dev/null +++ b/python/pyspark/py.typed @@ -0,0 +1 @@ +partial diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi new file mode 100644 index 0000000000000..35c49e952b0cd --- /dev/null +++ b/python/pyspark/rdd.pyi @@ -0,0 +1,479 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import ( + Any, + Callable, + Dict, + Generic, + Hashable, + Iterable, + Iterator, + List, + Optional, + Tuple, + Union, + TypeVar, +) +from typing_extensions import Literal + +from numpy import int32, int64, float32, float64, ndarray # type: ignore[import] + +from pyspark._typing import SupportsOrdering +from pyspark.sql.pandas._typing import ( + PandasScalarUDFType, + PandasScalarIterUDFType, + PandasGroupedMapUDFType, + PandasCogroupedMapUDFType, + PandasGroupedAggUDFType, + PandasMapIterUDFType, +) +import pyspark.context +from pyspark.resultiterable import ResultIterable +from pyspark.serializers import Serializer +from pyspark.storagelevel import StorageLevel +from pyspark.resource.requests import ( # noqa: F401 + ExecutorResourceRequests, + TaskResourceRequests, +) +from pyspark.resource.profile import ResourceProfile +from pyspark.statcounter import StatCounter +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.types import StructType +from pyspark.sql._typing import RowLike +from py4j.java_gateway import JavaObject # type: ignore[import] + +T = TypeVar("T") +U = TypeVar("U") +K = TypeVar("K", bound=Hashable) +V = TypeVar("V") +V1 = TypeVar("V1") +V2 = TypeVar("V2") +V3 = TypeVar("V3") +O = TypeVar("O", bound=SupportsOrdering) +NumberOrArray = TypeVar( + "NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray +) + +def portable_hash(x: Hashable) -> int: ... + +class PythonEvalType: + NON_UDF: Literal[0] + SQL_BATCHED_UDF: Literal[100] + SQL_SCALAR_PANDAS_UDF: PandasScalarUDFType + SQL_GROUPED_MAP_PANDAS_UDF: PandasGroupedMapUDFType + SQL_GROUPED_AGG_PANDAS_UDF: PandasGroupedAggUDFType + SQL_WINDOW_AGG_PANDAS_UDF: Literal[203] + SQL_SCALAR_PANDAS_ITER_UDF: PandasScalarIterUDFType + SQL_MAP_PANDAS_ITER_UDF: PandasMapIterUDFType + SQL_COGROUPED_MAP_PANDAS_UDF: PandasCogroupedMapUDFType + +class BoundedFloat(float): + def __new__(cls, mean: float, confidence: float, low: float, high: float): ... + +class Partitioner: + numPartitions: int + partitionFunc: Callable[[Any], int] + def __init__(self, numPartitions, partitionFunc) -> None: ... + def __eq__(self, other: Any) -> bool: ... + def __call__(self, k: Any) -> int: ... + +class RDD(Generic[T]): + is_cached: bool + is_checkpointed: bool + ctx: pyspark.context.SparkContext + partitioner: Optional[Partitioner] + def __init__( + self, + jrdd: JavaObject, + ctx: pyspark.context.SparkContext, + jrdd_deserializer: Serializer = ..., + ) -> None: ... + def id(self) -> int: ... + def __getnewargs__(self) -> Any: ... + @property + def context(self) -> pyspark.context.SparkContext: ... + def cache(self) -> RDD[T]: ... + def persist(self, storageLevel: StorageLevel = ...) -> RDD[T]: ... + def unpersist(self, blocking: bool = ...) -> RDD[T]: ... + def checkpoint(self) -> None: ... + def isCheckpointed(self) -> bool: ... + def localCheckpoint(self) -> None: ... + def isLocallyCheckpointed(self) -> bool: ... + def getCheckpointFile(self) -> Optional[str]: ... + def map(self, f: Callable[[T], U], preservesPartitioning: bool = ...) -> RDD[U]: ... + def flatMap( + self, f: Callable[[T], Iterable[U]], preservesPartitioning: bool = ... + ) -> RDD[U]: ... + def mapPartitions( + self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ... + ) -> RDD[U]: ... + def mapPartitionsWithIndex( + self, + f: Callable[[int, Iterable[T]], Iterable[U]], + preservesPartitioning: bool = ..., + ) -> RDD[U]: ... + def mapPartitionsWithSplit( + self, + f: Callable[[int, Iterable[T]], Iterable[U]], + preservesPartitioning: bool = ..., + ) -> RDD[U]: ... + def getNumPartitions(self) -> int: ... + def filter(self, f: Callable[[T], bool]) -> RDD[T]: ... + def distinct(self, numPartitions: Optional[int] = ...) -> RDD[T]: ... + def sample( + self, withReplacement: bool, fraction: float, seed: Optional[int] = ... + ) -> RDD[T]: ... + def randomSplit( + self, weights: List[Union[int, float]], seed: Optional[int] = ... + ) -> List[RDD[T]]: ... + def takeSample( + self, withReplacement: bool, num: int, seed: Optional[int] = ... + ) -> List[T]: ... + def union(self, other: RDD[U]) -> RDD[Union[T, U]]: ... + def intersection(self, other: RDD[T]) -> RDD[T]: ... + def __add__(self, other: RDD[T]) -> RDD[T]: ... + @overload + def repartitionAndSortWithinPartitions( + self: RDD[Tuple[O, V]], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[O], int] = ..., + ascending: bool = ..., + ) -> RDD[Tuple[O, V]]: ... + @overload + def repartitionAndSortWithinPartitions( + self: RDD[Tuple[K, V]], + numPartitions: Optional[int], + partitionFunc: Callable[[K], int], + ascending: bool, + keyfunc: Callable[[K], O], + ) -> RDD[Tuple[K, V]]: ... + @overload + def repartitionAndSortWithinPartitions( + self: RDD[Tuple[K, V]], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ascending: bool = ..., + *, + keyfunc: Callable[[K], O] + ) -> RDD[Tuple[K, V]]: ... + @overload + def sortByKey( + self: RDD[Tuple[O, V]], + ascending: bool = ..., + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, V]]: ... + @overload + def sortByKey( + self: RDD[Tuple[K, V]], + ascending: bool, + numPartitions: int, + keyfunc: Callable[[K], O], + ) -> RDD[Tuple[K, V]]: ... + @overload + def sortByKey( + self: RDD[Tuple[K, V]], + ascending: bool = ..., + numPartitions: Optional[int] = ..., + *, + keyfunc: Callable[[K], O] + ) -> RDD[Tuple[K, V]]: ... + def sortBy( + self: RDD[T], + keyfunc: Callable[[T], O], + ascending: bool = ..., + numPartitions: Optional[int] = ..., + ) -> RDD[T]: ... + def glom(self) -> RDD[List[T]]: ... + def cartesian(self, other: RDD[U]) -> RDD[Tuple[T, U]]: ... + def groupBy( + self, + f: Callable[[T], K], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, Iterable[T]]]: ... + def pipe( + self, command: str, env: Optional[Dict[str, str]] = ..., checkCode: bool = ... + ) -> RDD[str]: ... + def foreach(self, f: Callable[[T], None]) -> None: ... + def foreachPartition(self, f: Callable[[Iterable[T]], None]) -> None: ... + def collect(self) -> List[T]: ... + def collectWithJobGroup( + self, groupId: str, description: str, interruptOnCancel: bool = ... + ) -> List[T]: ... + def reduce(self, f: Callable[[T, T], T]) -> T: ... + def treeReduce(self, f: Callable[[T, T], T], depth: int = ...) -> T: ... + def fold(self, zeroValue: T, op: Callable[[T, T], T]) -> T: ... + def aggregate( + self, zeroValue: U, seqOp: Callable[[U, T], U], combOp: Callable[[U, U], U] + ) -> U: ... + def treeAggregate( + self, + zeroValue: U, + seqOp: Callable[[U, T], U], + combOp: Callable[[U, U], U], + depth: int = ..., + ) -> U: ... + @overload + def max(self: RDD[O]) -> O: ... + @overload + def max(self, key: Callable[[T], O]) -> T: ... + @overload + def min(self: RDD[O]) -> O: ... + @overload + def min(self, key: Callable[[T], O]) -> T: ... + def sum(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def count(self) -> int: ... + def stats(self: RDD[NumberOrArray]) -> StatCounter: ... + def histogram(self, buckets: List[T]) -> Tuple[List[T], List[int]]: ... + def mean(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def variance(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def stdev(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def sampleStdev(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def sampleVariance(self: RDD[NumberOrArray]) -> NumberOrArray: ... + def countByValue(self: RDD[K]) -> Dict[K, int]: ... + @overload + def top(self: RDD[O], num: int) -> List[O]: ... + @overload + def top(self: RDD[T], num: int, key: Callable[[T], O]) -> List[T]: ... + @overload + def takeOrdered(self: RDD[O], num: int) -> List[O]: ... + @overload + def takeOrdered(self: RDD[T], num: int, key: Callable[[T], O]) -> List[T]: ... + def take(self, num: int) -> List[T]: ... + def first(self) -> T: ... + def isEmpty(self) -> bool: ... + def saveAsNewAPIHadoopDataset( + self: RDD[Tuple[K, V]], + conf: Dict[str, str], + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + ) -> None: ... + def saveAsNewAPIHadoopFile( + self: RDD[Tuple[K, V]], + path: str, + outputFormatClass: str, + keyClass: Optional[str] = ..., + valueClass: Optional[str] = ..., + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[Dict[str, str]] = ..., + ) -> None: ... + def saveAsHadoopDataset( + self: RDD[Tuple[K, V]], + conf: Dict[str, str], + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + ) -> None: ... + def saveAsHadoopFile( + self: RDD[Tuple[K, V]], + path: str, + outputFormatClass: str, + keyClass: Optional[str] = ..., + valueClass: Optional[str] = ..., + keyConverter: Optional[str] = ..., + valueConverter: Optional[str] = ..., + conf: Optional[str] = ..., + compressionCodecClass: Optional[str] = ..., + ) -> None: ... + def saveAsSequenceFile( + self: RDD[Tuple[K, V]], path: str, compressionCodecClass: Optional[str] = ... + ) -> None: ... + def saveAsPickleFile(self, path: str, batchSize: int = ...) -> None: ... + def saveAsTextFile( + self, path: str, compressionCodecClass: Optional[str] = ... + ) -> None: ... + def collectAsMap(self: RDD[Tuple[K, V]]) -> Dict[K, V]: ... + def keys(self: RDD[Tuple[K, V]]) -> RDD[K]: ... + def values(self: RDD[Tuple[K, V]]) -> RDD[V]: ... + def reduceByKey( + self: RDD[Tuple[K, V]], + func: Callable[[V, V], V], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, V]]: ... + def reduceByKeyLocally( + self: RDD[Tuple[K, V]], func: Callable[[V, V], V] + ) -> Dict[K, V]: ... + def countByKey(self: RDD[Tuple[K, V]]) -> Dict[K, int]: ... + def join( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, Tuple[V, U]]]: ... + def leftOuterJoin( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, Tuple[V, Optional[U]]]]: ... + def rightOuterJoin( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, Tuple[Optional[V], U]]]: ... + def fullOuterJoin( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... + def partitionBy( + self: RDD[Tuple[K, V]], + numPartitions: int, + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, V]]: ... + def combineByKey( + self: RDD[Tuple[K, V]], + createCombiner: Callable[[V], U], + mergeValue: Callable[[U, V], U], + mergeCombiners: Callable[[U, U], U], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, U]]: ... + def aggregateByKey( + self: RDD[Tuple[K, V]], + zeroValue: U, + seqFunc: Callable[[U, V], U], + combFunc: Callable[[U, U], U], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, U]]: ... + def foldByKey( + self: RDD[Tuple[K, V]], + zeroValue: V, + func: Callable[[V, V], V], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, V]]: ... + def groupByKey( + self: RDD[Tuple[K, V]], + numPartitions: Optional[int] = ..., + partitionFunc: Callable[[K], int] = ..., + ) -> RDD[Tuple[K, Iterable[V]]]: ... + def flatMapValues( + self: RDD[Tuple[K, V]], f: Callable[[V], Iterable[U]] + ) -> RDD[Tuple[K, U]]: ... + def mapValues(self: RDD[Tuple[K, V]], f: Callable[[V], U]) -> RDD[Tuple[K, U]]: ... + @overload + def groupWith( + self: RDD[Tuple[K, V]], __o: RDD[Tuple[K, V1]] + ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1]]]]: ... + @overload + def groupWith( + self: RDD[Tuple[K, V]], __o1: RDD[Tuple[K, V1]], __o2: RDD[Tuple[K, V2]] + ) -> RDD[ + Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1], ResultIterable[V2]]] + ]: ... + @overload + def groupWith( + self: RDD[Tuple[K, V]], + other1: RDD[Tuple[K, V1]], + other2: RDD[Tuple[K, V2]], + other3: RDD[Tuple[K, V3]], + ) -> RDD[ + Tuple[ + K, + Tuple[ + ResultIterable[V], + ResultIterable[V1], + ResultIterable[V2], + ResultIterable[V3], + ], + ] + ]: ... + def cogroup( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[U]]]]: ... + def sampleByKey( + self: RDD[Tuple[K, V]], + withReplacement: bool, + fractions: Dict[K, Union[float, int]], + seed: Optional[int] = ..., + ) -> RDD[Tuple[K, V]]: ... + def subtractByKey( + self: RDD[Tuple[K, V]], + other: RDD[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> RDD[Tuple[K, V]]: ... + def subtract( + self: RDD[T], other: RDD[T], numPartitions: Optional[int] = ... + ) -> RDD[T]: ... + def keyBy(self: RDD[T], f: Callable[[T], K]) -> RDD[Tuple[K, T]]: ... + def repartition(self, numPartitions: int) -> RDD[T]: ... + def coalesce(self, numPartitions: int, shuffle: bool = ...) -> RDD[T]: ... + def zip(self, other: RDD[U]) -> RDD[Tuple[T, U]]: ... + def zipWithIndex(self) -> RDD[Tuple[T, int]]: ... + def zipWithUniqueId(self) -> RDD[Tuple[T, int]]: ... + def name(self) -> str: ... + def setName(self, name: str) -> RDD[T]: ... + def toDebugString(self) -> bytes: ... + def getStorageLevel(self) -> StorageLevel: ... + def lookup(self: RDD[Tuple[K, V]], key: K) -> List[V]: ... + def countApprox(self, timeout: int, confidence: float = ...) -> int: ... + def sumApprox( + self: RDD[Union[float, int]], timeout: int, confidence: float = ... + ) -> BoundedFloat: ... + def meanApprox( + self: RDD[Union[float, int]], timeout: int, confidence: float = ... + ) -> BoundedFloat: ... + def countApproxDistinct(self, relativeSD: float = ...) -> int: ... + def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[T]: ... + def barrier(self: RDD[T]) -> RDDBarrier[T]: ... + def withResources(self: RDD[T], profile: ResourceProfile) -> RDD[T]: ... + def getResourceProfile(self) -> Optional[ResourceProfile]: ... + @overload + def toDF( + self: RDD[RowLike], + schema: Optional[List[str]] = ..., + sampleRatio: Optional[float] = ..., + ) -> DataFrame: ... + @overload + def toDF(self: RDD[RowLike], schema: Optional[StructType] = ...) -> DataFrame: ... + +class RDDBarrier(Generic[T]): + rdd: RDD[T] + def __init__(self, rdd: RDD[T]) -> None: ... + def mapPartitions( + self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ... + ) -> RDD[U]: ... + def mapPartitionsWithIndex( + self, + f: Callable[[int, Iterable[T]], Iterable[U]], + preservesPartitioning: bool = ..., + ) -> RDD[U]: ... + +class PipelinedRDD(RDD[U], Generic[T, U]): + func: Callable[[T], U] + preservesPartitioning: bool + is_cached: bool + is_checkpointed: bool + ctx: pyspark.context.SparkContext + prev: RDD[T] + partitioner: Optional[Partitioner] + is_barrier: bool + def __init__( + self, + prev: RDD[T], + func: Callable[[Iterable[T]], Iterable[U]], + preservesPartitioning: bool = ..., + isFromBarrier: bool = ..., + ) -> None: ... + def getNumPartitions(self) -> int: ... + def id(self) -> int: ... diff --git a/python/pyspark/rddsampler.pyi b/python/pyspark/rddsampler.pyi new file mode 100644 index 0000000000000..8fbf72d90025c --- /dev/null +++ b/python/pyspark/rddsampler.pyi @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar + +T = TypeVar("T") +U = TypeVar("U") +K = TypeVar("K") +V = TypeVar("V") + +class RDDSamplerBase: + def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ... + def initRandomGenerator(self, split: int) -> None: ... + def getUniformSample(self) -> float: ... + def getPoissonSample(self, mean: float) -> int: ... + def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ... + +class RDDSampler(RDDSamplerBase): + def __init__( + self, withReplacement: bool, fraction: float, seed: Optional[int] = ... + ) -> None: ... + def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ... + +class RDDRangeSampler(RDDSamplerBase): + def __init__( + self, lowerBound: T, upperBound: T, seed: Optional[Any] = ... + ) -> None: ... + def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ... + +class RDDStratifiedSampler(RDDSamplerBase): + def __init__( + self, + withReplacement: bool, + fractions: Dict[K, float], + seed: Optional[int] = ..., + ) -> None: ... + def func( + self, split: int, iterator: Iterator[Tuple[K, V]] + ) -> Iterator[Tuple[K, V]]: ... diff --git a/python/pyspark/resource/__init__.pyi b/python/pyspark/resource/__init__.pyi new file mode 100644 index 0000000000000..87a9b53c268ac --- /dev/null +++ b/python/pyspark/resource/__init__.pyi @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.resource.information import ( # noqa: F401 + ResourceInformation as ResourceInformation, +) +from pyspark.resource.profile import ( # noqa: F401 + ResourceProfile as ResourceProfile, + ResourceProfileBuilder as ResourceProfileBuilder, +) +from pyspark.resource.requests import ( # noqa: F401 + ExecutorResourceRequest as ExecutorResourceRequest, + ExecutorResourceRequests as ExecutorResourceRequests, + TaskResourceRequest as TaskResourceRequest, + TaskResourceRequests as TaskResourceRequests, +) diff --git a/python/pyspark/resource/information.pyi b/python/pyspark/resource/information.pyi new file mode 100644 index 0000000000000..7baa6ca8520bd --- /dev/null +++ b/python/pyspark/resource/information.pyi @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +class ResourceInformation: + def __init__(self, name: Any, addresses: Any) -> None: ... + @property + def name(self): ... + @property + def addresses(self): ... diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi new file mode 100644 index 0000000000000..8ce7d93b29e93 --- /dev/null +++ b/python/pyspark/resource/profile.pyi @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.resource.requests import ( # noqa: F401 + ExecutorResourceRequest as ExecutorResourceRequest, + ExecutorResourceRequests as ExecutorResourceRequests, + TaskResourceRequest as TaskResourceRequest, + TaskResourceRequests as TaskResourceRequests, +) +from typing import Any, Optional + +class ResourceProfile: + def __init__( + self, + _java_resource_profile: Optional[Any] = ..., + _exec_req: Any = ..., + _task_req: Any = ..., + ) -> None: ... + @property + def id(self): ... + @property + def taskResources(self): ... + @property + def executorResources(self): ... + +class ResourceProfileBuilder: + def __init__(self) -> None: ... + def require(self, resourceRequest: Any): ... + def clearExecutorResourceRequests(self) -> None: ... + def clearTaskResourceRequests(self) -> None: ... + @property + def taskResources(self): ... + @property + def executorResources(self): ... + @property + def build(self): ... diff --git a/python/pyspark/resource/requests.pyi b/python/pyspark/resource/requests.pyi new file mode 100644 index 0000000000000..f9448d0780409 --- /dev/null +++ b/python/pyspark/resource/requests.pyi @@ -0,0 +1,71 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Optional + +class ExecutorResourceRequest: + def __init__( + self, + resourceName: Any, + amount: Any, + discoveryScript: str = ..., + vendor: str = ..., + ) -> None: ... + @property + def resourceName(self): ... + @property + def amount(self): ... + @property + def discoveryScript(self): ... + @property + def vendor(self): ... + +class ExecutorResourceRequests: + def __init__( + self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ... + ) -> None: ... + def memory(self, amount: Any): ... + def memoryOverhead(self, amount: Any): ... + def pysparkMemory(self, amount: Any): ... + def offheapMemory(self, amount: Any): ... + def cores(self, amount: Any): ... + def resource( + self, + resourceName: Any, + amount: Any, + discoveryScript: str = ..., + vendor: str = ..., + ): ... + @property + def requests(self): ... + +class TaskResourceRequest: + def __init__(self, resourceName: Any, amount: Any) -> None: ... + @property + def resourceName(self): ... + @property + def amount(self): ... + +class TaskResourceRequests: + def __init__( + self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ... + ) -> None: ... + def cpus(self, amount: Any): ... + def resource(self, resourceName: Any, amount: Any): ... + @property + def requests(self): ... diff --git a/python/pyspark/resource/tests/test_resources.py b/python/pyspark/resource/tests/test_resources.py index c2b574c61abc5..6149f1ff7205a 100644 --- a/python/pyspark/resource/tests/test_resources.py +++ b/python/pyspark/resource/tests/test_resources.py @@ -75,7 +75,7 @@ def assert_request_contents(exec_reqs, task_reqs): from pyspark.resource.tests.test_resources import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/resultiterable.pyi b/python/pyspark/resultiterable.pyi new file mode 100644 index 0000000000000..69596ad82c8cc --- /dev/null +++ b/python/pyspark/resultiterable.pyi @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark._typing import SizedIterable +from typing import Iterator, TypeVar + +T = TypeVar("T") + +class ResultIterable(SizedIterable[T]): + data: SizedIterable[T] + index: int + maxindex: int + def __init__(self, data: SizedIterable[T]) -> None: ... + def __iter__(self) -> Iterator[T]: ... + def __len__(self) -> int: ... diff --git a/python/pyspark/serializers.pyi b/python/pyspark/serializers.pyi new file mode 100644 index 0000000000000..26ef17c38d227 --- /dev/null +++ b/python/pyspark/serializers.pyi @@ -0,0 +1,122 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +class SpecialLengths: + END_OF_DATA_SECTION: int = ... + PYTHON_EXCEPTION_THROWN: int = ... + TIMING_DATA: int = ... + END_OF_STREAM: int = ... + NULL: int = ... + START_ARROW_STREAM: int = ... + +class Serializer: + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + def load_stream(self, stream: Any) -> None: ... + def __eq__(self, other: Any) -> Any: ... + def __ne__(self, other: Any) -> Any: ... + def __hash__(self) -> Any: ... + +class FramedSerializer(Serializer): + def __init__(self) -> None: ... + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + def load_stream(self, stream: Any) -> None: ... + def dumps(self, obj: Any) -> None: ... + def loads(self, obj: Any) -> None: ... + +class BatchedSerializer(Serializer): + UNLIMITED_BATCH_SIZE: int = ... + UNKNOWN_BATCH_SIZE: int = ... + serializer: Any = ... + batchSize: Any = ... + def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ... + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + def load_stream(self, stream: Any): ... + +class FlattenedValuesSerializer(BatchedSerializer): + def __init__(self, serializer: Any, batchSize: int = ...) -> None: ... + def load_stream(self, stream: Any): ... + +class AutoBatchedSerializer(BatchedSerializer): + bestSize: Any = ... + def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ... + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + +class CartesianDeserializer(Serializer): + key_ser: Any = ... + val_ser: Any = ... + def __init__(self, key_ser: Any, val_ser: Any) -> None: ... + def load_stream(self, stream: Any): ... + +class PairDeserializer(Serializer): + key_ser: Any = ... + val_ser: Any = ... + def __init__(self, key_ser: Any, val_ser: Any) -> None: ... + def load_stream(self, stream: Any): ... + +class NoOpSerializer(FramedSerializer): + def loads(self, obj: Any): ... + def dumps(self, obj: Any): ... + +class PickleSerializer(FramedSerializer): + def dumps(self, obj: Any): ... + def loads(self, obj: Any, encoding: str = ...): ... + +class CloudPickleSerializer(PickleSerializer): + def dumps(self, obj: Any): ... + +class MarshalSerializer(FramedSerializer): + def dumps(self, obj: Any): ... + def loads(self, obj: Any): ... + +class AutoSerializer(FramedSerializer): + def __init__(self) -> None: ... + def dumps(self, obj: Any): ... + def loads(self, obj: Any): ... + +class CompressedSerializer(FramedSerializer): + serializer: Any = ... + def __init__(self, serializer: Any) -> None: ... + def dumps(self, obj: Any): ... + def loads(self, obj: Any): ... + +class UTF8Deserializer(Serializer): + use_unicode: Any = ... + def __init__(self, use_unicode: bool = ...) -> None: ... + def loads(self, stream: Any): ... + def load_stream(self, stream: Any) -> None: ... + +class ChunkedStream: + buffer_size: Any = ... + buffer: Any = ... + current_pos: int = ... + wrapped: Any = ... + def __init__(self, wrapped: Any, buffer_size: Any) -> None: ... + def write(self, bytes: Any) -> None: ... + def close(self) -> None: ... + @property + def closed(self): ... + +def write_with_length(obj: Any, stream: Any): ... +def pack_long(value): ... +def read_int(stream): ... +def read_long(stream): ... +def read_bool(stream): ... +def write_int(value, stream): ... +def write_long(value, stream): ... diff --git a/python/pyspark/shell.pyi b/python/pyspark/shell.pyi new file mode 100644 index 0000000000000..0760309542f8d --- /dev/null +++ b/python/pyspark/shell.pyi @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark import SparkConf as SparkConf # noqa: F401 +from pyspark.context import SparkContext as SparkContext +from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession +from typing import Any, Callable + +from pyspark.sql.dataframe import DataFrame + +spark: SparkSession +sc: SparkContext +sql: Callable[[str], DataFrame] +sqlContext: SQLContext +sqlCtx: SQLContext +code: Any diff --git a/python/pyspark/shuffle.pyi b/python/pyspark/shuffle.pyi new file mode 100644 index 0000000000000..10648c51dca8f --- /dev/null +++ b/python/pyspark/shuffle.pyi @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.serializers import ( # noqa: F401 + AutoBatchedSerializer as AutoBatchedSerializer, + BatchedSerializer as BatchedSerializer, + CompressedSerializer as CompressedSerializer, + FlattenedValuesSerializer as FlattenedValuesSerializer, + PickleSerializer as PickleSerializer, +) +from pyspark.util import fail_on_stopiteration as fail_on_stopiteration # noqa: F401 +from typing import Any, Optional + +process: Any + +def get_used_memory(): ... + +MemoryBytesSpilled: int +DiskBytesSpilled: int + +class Aggregator: + createCombiner: Any = ... + mergeValue: Any = ... + mergeCombiners: Any = ... + def __init__( + self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any + ) -> None: ... + +class SimpleAggregator(Aggregator): + def __init__(self, combiner: Any): ... + +class Merger: + agg: Any = ... + def __init__(self, aggregator: Any) -> None: ... + def mergeValues(self, iterator: Any) -> None: ... + def mergeCombiners(self, iterator: Any) -> None: ... + def items(self) -> None: ... + +class ExternalMerger(Merger): + MAX_TOTAL_PARTITIONS: int = ... + memory_limit: Any = ... + serializer: Any = ... + localdirs: Any = ... + partitions: Any = ... + batch: Any = ... + scale: Any = ... + data: Any = ... + pdata: Any = ... + spills: int = ... + def __init__( + self, + aggregator: Any, + memory_limit: int = ..., + serializer: Optional[Any] = ..., + localdirs: Optional[Any] = ..., + scale: int = ..., + partitions: int = ..., + batch: int = ..., + ) -> None: ... + def mergeValues(self, iterator: Any) -> None: ... + def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ... + def items(self): ... + +class ExternalSorter: + memory_limit: Any = ... + local_dirs: Any = ... + serializer: Any = ... + def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ... + def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ... + +class ExternalList: + LIMIT: int = ... + values: Any = ... + count: Any = ... + def __init__(self, values: Any) -> None: ... + def __iter__(self) -> Any: ... + def __len__(self): ... + def append(self, value: Any) -> None: ... + def __del__(self) -> None: ... + +class ExternalListOfList(ExternalList): + count: Any = ... + def __init__(self, values: Any) -> None: ... + def append(self, value: Any) -> None: ... + def __iter__(self) -> Any: ... + +class GroupByKey: + iterator: Any = ... + def __init__(self, iterator: Any) -> None: ... + def __iter__(self) -> Any: ... + +class ExternalGroupBy(ExternalMerger): + SORT_KEY_LIMIT: int = ... + def flattened_serializer(self): ... diff --git a/python/pyspark/sql/__init__.pyi b/python/pyspark/sql/__init__.pyi new file mode 100644 index 0000000000000..787be5647772e --- /dev/null +++ b/python/pyspark/sql/__init__.pyi @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql.catalog import Catalog as Catalog # noqa: F401 +from pyspark.sql.column import Column as Column # noqa: F401 +from pyspark.sql.context import ( # noqa: F401 + HiveContext as HiveContext, + SQLContext as SQLContext, + UDFRegistration as UDFRegistration, +) +from pyspark.sql.dataframe import ( # noqa: F401 + DataFrame as DataFrame, + DataFrameNaFunctions as DataFrameNaFunctions, + DataFrameStatFunctions as DataFrameStatFunctions, +) +from pyspark.sql.group import GroupedData as GroupedData # noqa: F401 +from pyspark.sql.pandas.group_ops import ( # noqa: F401 + PandasCogroupedOps as PandasCogroupedOps, +) +from pyspark.sql.readwriter import ( # noqa: F401 + DataFrameReader as DataFrameReader, + DataFrameWriter as DataFrameWriter, +) +from pyspark.sql.session import SparkSession as SparkSession # noqa: F401 +from pyspark.sql.types import Row as Row # noqa: F401 +from pyspark.sql.window import Window as Window, WindowSpec as WindowSpec # noqa: F401 diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi new file mode 100644 index 0000000000000..799a73204a639 --- /dev/null +++ b/python/pyspark/sql/_typing.pyi @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + Any, + List, + Optional, + Tuple, + TypeVar, + Union, +) +from typing_extensions import Protocol + +import datetime +import decimal + +from pyspark._typing import PrimitiveType +import pyspark.sql.column +import pyspark.sql.types +from pyspark.sql.column import Column + +ColumnOrName = Union[pyspark.sql.column.Column, str] +DecimalLiteral = decimal.Decimal +DateTimeLiteral = Union[datetime.datetime, datetime.date] +LiteralType = PrimitiveType +AtomicDataTypeOrString = Union[pyspark.sql.types.AtomicType, str] +DataTypeOrString = Union[pyspark.sql.types.DataType, str] +OptionalPrimitiveType = Optional[PrimitiveType] + +RowLike = TypeVar("RowLike", List[Any], Tuple[Any, ...], pyspark.sql.types.Row) + +class SupportsOpen(Protocol): + def open(self, partition_id: int, epoch_id: int) -> bool: ... + +class SupportsProcess(Protocol): + def process(self, row: pyspark.sql.types.Row) -> None: ... + +class SupportsClose(Protocol): + def close(self, error: Exception) -> None: ... + +class UserDefinedFunctionLike(Protocol): + def __call__(self, *_: ColumnOrName) -> Column: ... diff --git a/python/pyspark/sql/avro/__init__.pyi b/python/pyspark/sql/avro/__init__.pyi new file mode 100644 index 0000000000000..0d7871da4c100 --- /dev/null +++ b/python/pyspark/sql/avro/__init__.pyi @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +# Names in __all__ with no definition: +# functions diff --git a/python/pyspark/sql/avro/functions.pyi b/python/pyspark/sql/avro/functions.pyi new file mode 100644 index 0000000000000..4c2e3814a9e94 --- /dev/null +++ b/python/pyspark/sql/avro/functions.pyi @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict + +from pyspark.sql._typing import ColumnOrName +from pyspark.sql.column import Column + +def from_avro( + data: ColumnOrName, jsonFormatSchema: str, options: Dict[str, str] = ... +) -> Column: ... +def to_avro(data: ColumnOrName, jsonFormatSchema: str = ...) -> Column: ... diff --git a/python/pyspark/sql/catalog.pyi b/python/pyspark/sql/catalog.pyi new file mode 100644 index 0000000000000..86263fff63ce8 --- /dev/null +++ b/python/pyspark/sql/catalog.pyi @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable, List, Optional +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.session import SparkSession +from pyspark.sql.types import DataType, StructType +from collections import namedtuple + +Database = namedtuple("Database", "name description locationUri") + +Table = namedtuple("Table", "name database description tableType isTemporary") + +Column = namedtuple("Column", "name description dataType nullable isPartition isBucket") + +Function = namedtuple("Function", "name description className isTemporary") + +class Catalog: + def __init__(self, sparkSession: SparkSession) -> None: ... + def currentDatabase(self) -> str: ... + def setCurrentDatabase(self, dbName: str) -> None: ... + def listDatabases(self) -> List[Database]: ... + def listTables(self, dbName: Optional[str] = ...) -> List[Table]: ... + def listFunctions(self, dbName: Optional[str] = ...) -> List[Function]: ... + def listColumns( + self, tableName: str, dbName: Optional[str] = ... + ) -> List[Column]: ... + def createTable( + self, + tableName: str, + path: Optional[str] = ..., + source: Optional[str] = ..., + schema: Optional[StructType] = ..., + description: Optional[str] = ..., + **options: str + ) -> DataFrame: ... + def dropTempView(self, viewName: str) -> None: ... + def dropGlobalTempView(self, viewName: str) -> None: ... + def registerFunction( + self, name: str, f: Callable[..., Any], returnType: DataType = ... + ) -> None: ... + def isCached(self, tableName: str) -> bool: ... + def cacheTable(self, tableName: str) -> None: ... + def uncacheTable(self, tableName: str) -> None: ... + def clearCache(self) -> None: ... + def refreshTable(self, tableName: str) -> None: ... + def recoverPartitions(self, tableName: str) -> None: ... + def refreshByPath(self, path: str) -> None: ... diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi new file mode 100644 index 0000000000000..261fb6e5f3911 --- /dev/null +++ b/python/pyspark/sql/column.pyi @@ -0,0 +1,112 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Union + +from pyspark.sql._typing import LiteralType, DecimalLiteral, DateTimeLiteral +from pyspark.sql.types import ( # noqa: F401 + DataType, + StructField, + StructType, + IntegerType, + StringType, +) +from pyspark.sql.window import WindowSpec + +from py4j.java_gateway import JavaObject # type: ignore[import] + +class Column: + def __init__(self, JavaObject) -> None: ... + def __neg__(self) -> Column: ... + def __add__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __sub__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __mul__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __div__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __truediv__( + self, other: Union[Column, LiteralType, DecimalLiteral] + ) -> Column: ... + def __mod__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __radd__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __rsub__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __rmul__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __rdiv__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __rtruediv__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __rmod__(self, other: Union[bool, int, float, DecimalLiteral]) -> Column: ... + def __pow__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... + def __rpow__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ... + def __eq__(self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]) -> Column: ... # type: ignore[override] + def __ne__(self, other: Any) -> Column: ... # type: ignore[override] + def __lt__( + self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral] + ) -> Column: ... + def __le__( + self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral] + ) -> Column: ... + def __ge__( + self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral] + ) -> Column: ... + def __gt__( + self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral] + ) -> Column: ... + def eqNullSafe( + self, other: Union[Column, LiteralType, DecimalLiteral] + ) -> Column: ... + def __and__(self, other: Column) -> Column: ... + def __or__(self, other: Column) -> Column: ... + def __invert__(self) -> Column: ... + def __rand__(self, other: Column) -> Column: ... + def __ror__(self, other: Column) -> Column: ... + def __contains__(self, other: Any) -> Column: ... + def __getitem__(self, other: Any) -> Column: ... + def bitwiseOR(self, other: Union[Column, int]) -> Column: ... + def bitwiseAND(self, other: Union[Column, int]) -> Column: ... + def bitwiseXOR(self, other: Union[Column, int]) -> Column: ... + def getItem(self, key: Any) -> Column: ... + def getField(self, name: Any) -> Column: ... + def withField(self, fieldName: str, col: Column) -> Column: ... + def __getattr__(self, item: Any) -> Column: ... + def __iter__(self) -> None: ... + def rlike(self, item: str) -> Column: ... + def like(self, item: str) -> Column: ... + def startswith(self, item: Union[str, Column]) -> Column: ... + def endswith(self, item: Union[str, Column]) -> Column: ... + @overload + def substr(self, startPos: int, length: int) -> Column: ... + @overload + def substr(self, startPos: Column, length: Column) -> Column: ... + def __getslice__(self, startPos: int, length: int) -> Column: ... + def isin(self, *cols: Any) -> Column: ... + def asc(self) -> Column: ... + def asc_nulls_first(self) -> Column: ... + def asc_nulls_last(self) -> Column: ... + def desc(self) -> Column: ... + def desc_nulls_first(self) -> Column: ... + def desc_nulls_last(self) -> Column: ... + def isNull(self) -> Column: ... + def isNotNull(self) -> Column: ... + def alias(self, *alias: str, **kwargs: Any) -> Column: ... + def name(self, *alias: str) -> Column: ... + def cast(self, dataType: Union[DataType, str]) -> Column: ... + def astype(self, dataType: Union[DataType, str]) -> Column: ... + def between(self, lowerBound, upperBound) -> Column: ... + def when(self, condition: Column, value: Any) -> Column: ... + def otherwise(self, value: Any) -> Column: ... + def over(self, window: WindowSpec) -> Column: ... + def __nonzero__(self) -> None: ... + def __bool__(self) -> None: ... diff --git a/python/pyspark/sql/conf.pyi b/python/pyspark/sql/conf.pyi new file mode 100644 index 0000000000000..304dfcb3f9e53 --- /dev/null +++ b/python/pyspark/sql/conf.pyi @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Optional +from py4j.java_gateway import JavaObject # type: ignore[import] + +class RuntimeConfig: + def __init__(self, jconf: JavaObject) -> None: ... + def set(self, key: str, value: str) -> str: ... + def get(self, key: str, default: Optional[str] = ...) -> str: ... + def unset(self, key: str) -> None: ... + def isModifiable(self, key: str) -> bool: ... diff --git a/python/pyspark/sql/context.pyi b/python/pyspark/sql/context.pyi new file mode 100644 index 0000000000000..64927b37ac2a9 --- /dev/null +++ b/python/pyspark/sql/context.pyi @@ -0,0 +1,139 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union + +from py4j.java_gateway import JavaObject # type: ignore[import] + +from pyspark.sql._typing import ( + DateTimeLiteral, + LiteralType, + DecimalLiteral, + RowLike, +) +from pyspark.sql.pandas._typing import DataFrameLike +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.session import SparkSession +from pyspark.sql.types import AtomicType, DataType, StructType +from pyspark.sql.udf import UDFRegistration as UDFRegistration +from pyspark.sql.readwriter import DataFrameReader +from pyspark.sql.streaming import DataStreamReader, StreamingQueryManager + +T = TypeVar("T") + +class SQLContext: + sparkSession: SparkSession + def __init__( + self, + sparkContext, + sparkSession: Optional[SparkSession] = ..., + jsqlContext: Optional[JavaObject] = ..., + ) -> None: ... + @classmethod + def getOrCreate(cls: type, sc: SparkContext) -> SQLContext: ... + def newSession(self) -> SQLContext: ... + def setConf(self, key: str, value) -> None: ... + def getConf(self, key: str, defaultValue: Optional[str] = ...) -> str: ... + @property + def udf(self) -> UDFRegistration: ... + def range( + self, + start: int, + end: Optional[int] = ..., + step: int = ..., + numPartitions: Optional[int] = ..., + ) -> DataFrame: ... + def registerFunction( + self, name: str, f: Callable[..., Any], returnType: DataType = ... + ) -> None: ... + def registerJavaFunction( + self, name: str, javaClassName: str, returnType: Optional[DataType] = ... + ) -> None: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + samplingRatio: Optional[float] = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + schema: Union[List[str], Tuple[str, ...]] = ..., + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[ + RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]], + Iterable[Union[DateTimeLiteral, LiteralType, DecimalLiteral]], + ], + schema: Union[AtomicType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, data: DataFrameLike, samplingRatio: Optional[float] = ... + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: DataFrameLike, + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + def registerDataFrameAsTable(self, df: DataFrame, tableName: str) -> None: ... + def dropTempTable(self, tableName: str) -> None: ... + def createExternalTable( + self, + tableName: str, + path: Optional[str] = ..., + source: Optional[str] = ..., + schema: Optional[StructType] = ..., + **options + ) -> DataFrame: ... + def sql(self, sqlQuery: str) -> DataFrame: ... + def table(self, tableName: str) -> DataFrame: ... + def tables(self, dbName: Optional[str] = ...) -> DataFrame: ... + def tableNames(self, dbName: Optional[str] = ...) -> List[str]: ... + def cacheTable(self, tableName: str) -> None: ... + def uncacheTable(self, tableName: str) -> None: ... + def clearCache(self) -> None: ... + @property + def read(self) -> DataFrameReader: ... + @property + def readStream(self) -> DataStreamReader: ... + @property + def streams(self) -> StreamingQueryManager: ... + +class HiveContext(SQLContext): + def __init__( + self, sparkContext: SparkContext, jhiveContext: Optional[JavaObject] = ... + ) -> None: ... + def refreshTable(self, tableName: str) -> None: ... diff --git a/python/pyspark/sql/dataframe.pyi b/python/pyspark/sql/dataframe.pyi new file mode 100644 index 0000000000000..c498d529d820f --- /dev/null +++ b/python/pyspark/sql/dataframe.pyi @@ -0,0 +1,324 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, +) + +from py4j.java_gateway import JavaObject # type: ignore[import] + +from pyspark.sql._typing import ColumnOrName, LiteralType, OptionalPrimitiveType +from pyspark.sql.types import ( # noqa: F401 + StructType, + StructField, + StringType, + IntegerType, + Row, +) # noqa: F401 +from pyspark.sql.context import SQLContext +from pyspark.sql.group import GroupedData +from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2 +from pyspark.sql.streaming import DataStreamWriter +from pyspark.sql.column import Column +from pyspark.rdd import RDD +from pyspark.storagelevel import StorageLevel + +from pyspark.sql.pandas.conversion import PandasConversionMixin +from pyspark.sql.pandas.map_ops import PandasMapOpsMixin + +class DataFrame(PandasMapOpsMixin, PandasConversionMixin): + sql_ctx: SQLContext + is_cached: bool + def __init__(self, jdf: JavaObject, sql_ctx: SQLContext) -> None: ... + @property + def rdd(self) -> RDD[Row]: ... + @property + def na(self) -> DataFrameNaFunctions: ... + @property + def stat(self) -> DataFrameStatFunctions: ... + def toJSON(self, use_unicode: bool = ...) -> RDD[str]: ... + def registerTempTable(self, name: str) -> None: ... + def createTempView(self, name: str) -> None: ... + def createOrReplaceTempView(self, name: str) -> None: ... + def createGlobalTempView(self, name: str) -> None: ... + @property + def write(self) -> DataFrameWriter: ... + @property + def writeStream(self) -> DataStreamWriter: ... + @property + def schema(self) -> StructType: ... + def printSchema(self) -> None: ... + def explain( + self, extended: Optional[Union[bool, str]] = ..., mode: Optional[str] = ... + ) -> None: ... + def exceptAll(self, other: DataFrame) -> DataFrame: ... + def isLocal(self) -> bool: ... + @property + def isStreaming(self) -> bool: ... + def show( + self, n: int = ..., truncate: Union[bool, int] = ..., vertical: bool = ... + ) -> None: ... + def checkpoint(self, eager: bool = ...) -> DataFrame: ... + def localCheckpoint(self, eager: bool = ...) -> DataFrame: ... + def withWatermark( + self, eventTime: ColumnOrName, delayThreshold: str + ) -> DataFrame: ... + def hint(self, name: str, *parameters: Any) -> DataFrame: ... + def count(self) -> int: ... + def collect(self) -> List[Row]: ... + def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[Row]: ... + def limit(self, num: int) -> DataFrame: ... + def take(self, num: int) -> List[Row]: ... + def tail(self, num: int) -> List[Row]: ... + def foreach(self, f: Callable[[Row], None]) -> None: ... + def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None: ... + def cache(self) -> DataFrame: ... + def persist(self, storageLevel: StorageLevel = ...) -> DataFrame: ... + @property + def storageLevel(self) -> StorageLevel: ... + def unpersist(self, blocking: bool = ...) -> DataFrame: ... + def coalesce(self, numPartitions: int) -> DataFrame: ... + @overload + def repartition(self, numPartitions: int, *cols: ColumnOrName) -> DataFrame: ... + @overload + def repartition(self, *cols: ColumnOrName) -> DataFrame: ... + @overload + def repartitionByRange( + self, numPartitions: int, *cols: ColumnOrName + ) -> DataFrame: ... + @overload + def repartitionByRange(self, *cols: ColumnOrName) -> DataFrame: ... + def distinct(self) -> DataFrame: ... + @overload + def sample(self, fraction: float, seed: Optional[int] = ...) -> DataFrame: ... + @overload + def sample( + self, + withReplacement: Optional[bool], + fraction: float, + seed: Optional[int] = ..., + ) -> DataFrame: ... + def sampleBy( + self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ... + ) -> DataFrame: ... + def randomSplit( + self, weights: List[float], seed: Optional[int] = ... + ) -> List[DataFrame]: ... + @property + def dtypes(self) -> List[Tuple[str, str]]: ... + @property + def columns(self) -> List[str]: ... + def colRegex(self, colName: str) -> Column: ... + def alias(self, alias: str) -> DataFrame: ... + def crossJoin(self, other: DataFrame) -> DataFrame: ... + def join( + self, + other: DataFrame, + on: Optional[Union[str, List[str], Column, List[Column]]] = ..., + how: Optional[str] = ..., + ) -> DataFrame: ... + def sortWithinPartitions( + self, + *cols: Union[str, Column, List[Union[str, Column]]], + ascending: Union[bool, List[bool]] = ... + ) -> DataFrame: ... + def sort( + self, + *cols: Union[str, Column, List[Union[str, Column]]], + ascending: Union[bool, List[bool]] = ... + ) -> DataFrame: ... + def orderBy( + self, + *cols: Union[str, Column, List[Union[str, Column]]], + ascending: Union[bool, List[bool]] = ... + ) -> DataFrame: ... + def describe(self, *cols: Union[str, List[str]]) -> DataFrame: ... + def summary(self, *statistics: str) -> DataFrame: ... + @overload + def head(self) -> Row: ... + @overload + def head(self, n: int) -> List[Row]: ... + def first(self) -> Row: ... + def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Column: ... + def __getattr__(self, name: str) -> Column: ... + @overload + def select(self, *cols: ColumnOrName) -> DataFrame: ... + @overload + def select(self, __cols: Union[List[Column], List[str]]) -> DataFrame: ... + @overload + def selectExpr(self, *expr: str) -> DataFrame: ... + @overload + def selectExpr(self, *expr: List[str]) -> DataFrame: ... + def filter(self, condition: ColumnOrName) -> DataFrame: ... + @overload + def groupBy(self, *cols: ColumnOrName) -> GroupedData: ... + @overload + def groupBy(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ... + @overload + def rollup(self, *cols: ColumnOrName) -> GroupedData: ... + @overload + def rollup(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ... + @overload + def cube(self, *cols: ColumnOrName) -> GroupedData: ... + @overload + def cube(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ... + def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame: ... + def union(self, other: DataFrame) -> DataFrame: ... + def unionAll(self, other: DataFrame) -> DataFrame: ... + def unionByName( + self, other: DataFrame, allowMissingColumns: bool = ... + ) -> DataFrame: ... + def intersect(self, other: DataFrame) -> DataFrame: ... + def intersectAll(self, other: DataFrame) -> DataFrame: ... + def subtract(self, other: DataFrame) -> DataFrame: ... + def dropDuplicates(self, subset: Optional[List[str]] = ...) -> DataFrame: ... + def dropna( + self, + how: str = ..., + thresh: Optional[int] = ..., + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def fillna( + self, + value: LiteralType, + subset: Optional[Union[str, Tuple[str, ...], List[str]]] = ..., + ) -> DataFrame: ... + @overload + def fillna(self, value: Dict[str, LiteralType]) -> DataFrame: ... + @overload + def replace( + self, + to_replace: LiteralType, + value: OptionalPrimitiveType, + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: List[LiteralType], + value: List[OptionalPrimitiveType], + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: Dict[LiteralType, OptionalPrimitiveType], + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: List[LiteralType], + value: OptionalPrimitiveType, + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + def approxQuantile( + self, col: str, probabilities: List[float], relativeError: float + ) -> List[float]: ... + def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ... + def cov(self, col1: str, col2: str) -> float: ... + def crosstab(self, col1: str, col2: str) -> DataFrame: ... + def freqItems( + self, cols: List[str], support: Optional[float] = ... + ) -> DataFrame: ... + def withColumn(self, colName: str, col: Column) -> DataFrame: ... + def withColumnRenamed(self, existing: str, new: str) -> DataFrame: ... + @overload + def drop(self, cols: ColumnOrName) -> DataFrame: ... + @overload + def drop(self, *cols: str) -> DataFrame: ... + def toDF(self, *cols: ColumnOrName) -> DataFrame: ... + def transform(self, func: Callable[[DataFrame], DataFrame]) -> DataFrame: ... + @overload + def groupby(self, *cols: ColumnOrName) -> GroupedData: ... + @overload + def groupby(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ... + def drop_duplicates(self, subset: Optional[List[str]] = ...) -> DataFrame: ... + def where(self, condition: ColumnOrName) -> DataFrame: ... + def sameSemantics(self, other: DataFrame) -> bool: ... + def semanticHash(self) -> int: ... + def inputFiles(self) -> List[str]: ... + def writeTo(self, table: str) -> DataFrameWriterV2: ... + +class DataFrameNaFunctions: + df: DataFrame + def __init__(self, df: DataFrame) -> None: ... + def drop( + self, + how: str = ..., + thresh: Optional[int] = ..., + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def fill( + self, value: LiteralType, subset: Optional[List[str]] = ... + ) -> DataFrame: ... + @overload + def fill(self, value: Dict[str, LiteralType]) -> DataFrame: ... + @overload + def replace( + self, + to_replace: LiteralType, + value: OptionalPrimitiveType, + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: List[LiteralType], + value: List[OptionalPrimitiveType], + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: Dict[LiteralType, OptionalPrimitiveType], + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + @overload + def replace( + self, + to_replace: List[LiteralType], + value: OptionalPrimitiveType, + subset: Optional[List[str]] = ..., + ) -> DataFrame: ... + +class DataFrameStatFunctions: + df: DataFrame + def __init__(self, df: DataFrame) -> None: ... + def approxQuantile( + self, col: str, probabilities: List[float], relativeError: float + ) -> List[float]: ... + def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ... + def cov(self, col1: str, col2: str) -> float: ... + def crosstab(self, col1: str, col2: str) -> DataFrame: ... + def freqItems( + self, cols: List[str], support: Optional[float] = ... + ) -> DataFrame: ... + def sampleBy( + self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ... + ) -> DataFrame: ... diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi new file mode 100644 index 0000000000000..3b0b2030178ef --- /dev/null +++ b/python/pyspark/sql/functions.pyi @@ -0,0 +1,343 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Callable, Dict, List, Optional, Union + +from pyspark.sql._typing import ( + ColumnOrName, + DataTypeOrString, +) +from pyspark.sql.pandas.functions import ( # noqa: F401 + pandas_udf as pandas_udf, + PandasUDFType as PandasUDFType, +) +from pyspark.sql.column import Column +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.types import ( # noqa: F401 + ArrayType, + StringType, + StructType, + DataType, +) +from pyspark.sql.utils import to_str # noqa: F401 + +def approxCountDistinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ... +def approx_count_distinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ... +def broadcast(df: DataFrame) -> DataFrame: ... +def coalesce(*cols: ColumnOrName) -> Column: ... +def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def countDistinct(col: ColumnOrName, *cols: ColumnOrName) -> Column: ... +def first(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ... +def grouping(col: ColumnOrName) -> Column: ... +def grouping_id(*cols: ColumnOrName) -> Column: ... +def input_file_name() -> Column: ... +def isnan(col: ColumnOrName) -> Column: ... +def isnull(col: ColumnOrName) -> Column: ... +def last(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ... +def monotonically_increasing_id() -> Column: ... +def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def percentile_approx( + col: ColumnOrName, + percentage: Union[Column, float, List[float]], + accuracy: Union[Column, float] = ..., +) -> Column: ... +def rand(seed: Optional[int] = ...) -> Column: ... +def randn(seed: Optional[int] = ...) -> Column: ... +def round(col: ColumnOrName, scale: int = ...) -> Column: ... +def bround(col: ColumnOrName, scale: int = ...) -> Column: ... +def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ... +def shiftRight(col: ColumnOrName, numBits: int) -> Column: ... +def shiftRightUnsigned(col, numBits) -> Column: ... +def spark_partition_id() -> Column: ... +def expr(str: str) -> Column: ... +def struct(*cols: ColumnOrName) -> Column: ... +def greatest(*cols: ColumnOrName) -> Column: ... +def least(*cols: Column) -> Column: ... +def when(condition: Column, value) -> Column: ... +@overload +def log(arg1: ColumnOrName) -> Column: ... +@overload +def log(arg1: float, arg2: ColumnOrName) -> Column: ... +def log2(col: ColumnOrName) -> Column: ... +def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column: ... +def factorial(col: ColumnOrName) -> Column: ... +def lag( + col: ColumnOrName, offset: int = ..., default: Optional[Any] = ... +) -> Column: ... +def lead( + col: ColumnOrName, offset: int = ..., default: Optional[Any] = ... +) -> Column: ... +def ntile(n: int) -> Column: ... +def current_date() -> Column: ... +def current_timestamp() -> Column: ... +def date_format(date: ColumnOrName, format: str) -> Column: ... +def year(col: ColumnOrName) -> Column: ... +def quarter(col: ColumnOrName) -> Column: ... +def month(col: ColumnOrName) -> Column: ... +def dayofweek(col: ColumnOrName) -> Column: ... +def dayofmonth(col: ColumnOrName) -> Column: ... +def dayofyear(col: ColumnOrName) -> Column: ... +def hour(col: ColumnOrName) -> Column: ... +def minute(col: ColumnOrName) -> Column: ... +def second(col: ColumnOrName) -> Column: ... +def weekofyear(col: ColumnOrName) -> Column: ... +def date_add(start: ColumnOrName, days: int) -> Column: ... +def date_sub(start: ColumnOrName, days: int) -> Column: ... +def datediff(end: ColumnOrName, start: ColumnOrName) -> Column: ... +def add_months(start: ColumnOrName, months: int) -> Column: ... +def months_between( + date1: ColumnOrName, date2: ColumnOrName, roundOff: bool = ... +) -> Column: ... +def to_date(col: ColumnOrName, format: Optional[str] = ...) -> Column: ... +@overload +def to_timestamp(col: ColumnOrName) -> Column: ... +@overload +def to_timestamp(col: ColumnOrName, format: str) -> Column: ... +def trunc(date: ColumnOrName, format: str) -> Column: ... +def date_trunc(format: str, timestamp: ColumnOrName) -> Column: ... +def next_day(date: ColumnOrName, dayOfWeek: str) -> Column: ... +def last_day(date: ColumnOrName) -> Column: ... +def from_unixtime(timestamp: ColumnOrName, format: str = ...) -> Column: ... +def unix_timestamp( + timestamp: Optional[ColumnOrName] = ..., format: str = ... +) -> Column: ... +def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ... +def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ... +def timestamp_seconds(col: ColumnOrName) -> Column: ... +def window( + timeColumn: ColumnOrName, + windowDuration: str, + slideDuration: Optional[str] = ..., + startTime: Optional[str] = ..., +) -> Column: ... +def crc32(col: ColumnOrName) -> Column: ... +def md5(col: ColumnOrName) -> Column: ... +def sha1(col: ColumnOrName) -> Column: ... +def sha2(col: ColumnOrName, numBits: int) -> Column: ... +def hash(*cols: ColumnOrName) -> Column: ... +def xxhash64(*cols: ColumnOrName) -> Column: ... +def concat(*cols: ColumnOrName) -> Column: ... +def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ... +def decode(col: ColumnOrName, charset: str) -> Column: ... +def encode(col: ColumnOrName, charset: str) -> Column: ... +def format_number(col: ColumnOrName, d: int) -> Column: ... +def format_string(format: str, *cols: ColumnOrName) -> Column: ... +def instr(str: ColumnOrName, substr: str) -> Column: ... +def overlay( + src: ColumnOrName, + replace: ColumnOrName, + pos: Union[Column, int], + len: Union[Column, int] = ..., +) -> Column: ... +def substring(str: ColumnOrName, pos: int, len: int) -> Column: ... +def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ... +def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column: ... +def locate(substr: str, str: Column, pos: int = ...) -> Column: ... +def lpad(col: Column, len: int, pad: str) -> Column: ... +def rpad(col: Column, len: int, pad: str) -> Column: ... +def repeat(col: Column, n: int) -> Column: ... +def split(str: Column, pattern: str, limit: int = ...) -> Column: ... +def regexp_extract(str: ColumnOrName, pattern: str, idx: int) -> Column: ... +def regexp_replace(str: ColumnOrName, pattern: str, replacement: str) -> Column: ... +def initcap(col: ColumnOrName) -> Column: ... +def soundex(col: ColumnOrName) -> Column: ... +def bin(col: ColumnOrName) -> Column: ... +def hex(col: ColumnOrName) -> Column: ... +def unhex(col: ColumnOrName) -> Column: ... +def length(col: ColumnOrName) -> Column: ... +def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column: ... +def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def create_map(*cols: ColumnOrName) -> Column: ... +def array(*cols: ColumnOrName) -> Column: ... +def array_contains(col: ColumnOrName, value: Any) -> Column: ... +def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ... +def slice(x: ColumnOrName, start: int, length: int) -> Column: ... +def array_join( + col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ... +) -> Column: ... +def array_position(col: ColumnOrName, value: Any) -> Column: ... +def element_at(col: ColumnOrName, extraction: Any) -> Column: ... +def array_remove(col: ColumnOrName, element: Any) -> Column: ... +def array_distinct(col: ColumnOrName) -> Column: ... +def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +def explode(col: ColumnOrName) -> Column: ... +def explode_outer(col: ColumnOrName) -> Column: ... +def posexplode(col: ColumnOrName) -> Column: ... +def posexplode_outer(col: ColumnOrName) -> Column: ... +def get_json_object(col: ColumnOrName, path: str) -> Column: ... +def json_tuple(col: ColumnOrName, *fields: str) -> Column: ... +def from_json( + col: ColumnOrName, + schema: Union[ArrayType, StructType, Column, str], + options: Dict[str, str] = ..., +) -> Column: ... +def to_json(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... +def schema_of_json(json: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... +def schema_of_csv(csv: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... +def to_csv(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... +def size(col: ColumnOrName) -> Column: ... +def array_min(col: ColumnOrName) -> Column: ... +def array_max(col: ColumnOrName) -> Column: ... +def sort_array(col: ColumnOrName, asc: bool = ...) -> Column: ... +def array_sort(col: ColumnOrName) -> Column: ... +def shuffle(col: ColumnOrName) -> Column: ... +def reverse(col: ColumnOrName) -> Column: ... +def flatten(col: ColumnOrName) -> Column: ... +def map_keys(col: ColumnOrName) -> Column: ... +def map_values(col: ColumnOrName) -> Column: ... +def map_entries(col: ColumnOrName) -> Column: ... +def map_from_entries(col: ColumnOrName) -> Column: ... +def array_repeat(col: ColumnOrName, count: Union[Column, int]) -> Column: ... +def arrays_zip(*cols: ColumnOrName) -> Column: ... +def map_concat(*cols: ColumnOrName) -> Column: ... +def sequence( + start: ColumnOrName, stop: ColumnOrName, step: Optional[ColumnOrName] = ... +) -> Column: ... +def from_csv( + col: ColumnOrName, + schema: Union[StructType, Column, str], + options: Dict[str, str] = ..., +) -> Column: ... +@overload +def transform(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... +@overload +def transform(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ... +def exists(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... +def forall(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... +@overload +def filter(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... +@overload +def filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ... +def aggregate( + col: ColumnOrName, + zero: ColumnOrName, + merge: Callable[[Column, Column], Column], + finish: Optional[Callable[[Column], Column]] = ..., +) -> Column: ... +def zip_with( + col1: ColumnOrName, + ColumnOrName: ColumnOrName, + f: Callable[[Column, Column], Column], +) -> Column: ... +def transform_keys( + col: ColumnOrName, f: Callable[[Column, Column], Column] +) -> Column: ... +def transform_values( + col: ColumnOrName, f: Callable[[Column, Column], Column] +) -> Column: ... +def map_filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ... +def map_zip_with( + col1: ColumnOrName, + col2: ColumnOrName, + f: Callable[[Column, Column, Column], Column], +) -> Column: ... +def abs(col: ColumnOrName) -> Column: ... +def acos(col: ColumnOrName) -> Column: ... +def asc(col: ColumnOrName) -> Column: ... +def asc_nulls_first(col: ColumnOrName) -> Column: ... +def asc_nulls_last(col: ColumnOrName) -> Column: ... +def ascii(col: ColumnOrName) -> Column: ... +def asin(col: ColumnOrName) -> Column: ... +def atan(col: ColumnOrName) -> Column: ... +@overload +def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +@overload +def atan2(col1: float, col2: ColumnOrName) -> Column: ... +@overload +def atan2(col1: ColumnOrName, col2: float) -> Column: ... +def avg(col: ColumnOrName) -> Column: ... +def base64(col: ColumnOrName) -> Column: ... +def bitwiseNOT(col: ColumnOrName) -> Column: ... +def cbrt(col: ColumnOrName) -> Column: ... +def ceil(col: ColumnOrName) -> Column: ... +def col(col: str) -> Column: ... +def collect_list(col: ColumnOrName) -> Column: ... +def collect_set(col: ColumnOrName) -> Column: ... +def column(col: str) -> Column: ... +def cos(col: ColumnOrName) -> Column: ... +def cosh(col: ColumnOrName) -> Column: ... +def count(col: ColumnOrName) -> Column: ... +def cume_dist() -> Column: ... +def degrees(col: ColumnOrName) -> Column: ... +def dense_rank() -> Column: ... +def desc(col: ColumnOrName) -> Column: ... +def desc_nulls_first(col: ColumnOrName) -> Column: ... +def desc_nulls_last(col: ColumnOrName) -> Column: ... +def exp(col: ColumnOrName) -> Column: ... +def expm1(col: ColumnOrName) -> Column: ... +def floor(col: ColumnOrName) -> Column: ... +@overload +def hypot(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +@overload +def hypot(col1: float, col2: ColumnOrName) -> Column: ... +@overload +def hypot(col1: ColumnOrName, col2: float) -> Column: ... +def kurtosis(col: ColumnOrName) -> Column: ... +def lit(col: Any) -> Column: ... +def log10(col: ColumnOrName) -> Column: ... +def log1p(col: ColumnOrName) -> Column: ... +def lower(col: ColumnOrName) -> Column: ... +def ltrim(col: ColumnOrName) -> Column: ... +def max(col: ColumnOrName) -> Column: ... +def mean(col: ColumnOrName) -> Column: ... +def min(col: ColumnOrName) -> Column: ... +def percent_rank() -> Column: ... +@overload +def pow(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... +@overload +def pow(col1: float, col2: ColumnOrName) -> Column: ... +@overload +def pow(col1: ColumnOrName, col2: float) -> Column: ... +def radians(col: ColumnOrName) -> Column: ... +def rank() -> Column: ... +def rint(col: ColumnOrName) -> Column: ... +def row_number() -> Column: ... +def rtrim(col: ColumnOrName) -> Column: ... +def signum(col: ColumnOrName) -> Column: ... +def sin(col: ColumnOrName) -> Column: ... +def sinh(col: ColumnOrName) -> Column: ... +def skewness(col: ColumnOrName) -> Column: ... +def sqrt(col: ColumnOrName) -> Column: ... +def stddev(col: ColumnOrName) -> Column: ... +def stddev_pop(col: ColumnOrName) -> Column: ... +def stddev_samp(col: ColumnOrName) -> Column: ... +def sum(col: ColumnOrName) -> Column: ... +def sumDistinct(col: ColumnOrName) -> Column: ... +def tan(col: ColumnOrName) -> Column: ... +def tanh(col: ColumnOrName) -> Column: ... +def toDegrees(col: ColumnOrName) -> Column: ... +def toRadians(col: ColumnOrName) -> Column: ... +def trim(col: ColumnOrName) -> Column: ... +def unbase64(col: ColumnOrName) -> Column: ... +def upper(col: ColumnOrName) -> Column: ... +def var_pop(col: ColumnOrName) -> Column: ... +def var_samp(col: ColumnOrName) -> Column: ... +def variance(col: ColumnOrName) -> Column: ... +@overload +def udf( + f: Callable[..., Any], returnType: DataTypeOrString = ... +) -> Callable[..., Column]: ... +@overload +def udf( + f: DataTypeOrString = ..., +) -> Callable[[Callable[..., Any]], Callable[..., Column]]: ... diff --git a/python/pyspark/sql/group.pyi b/python/pyspark/sql/group.pyi new file mode 100644 index 0000000000000..0b0df8c63cfdd --- /dev/null +++ b/python/pyspark/sql/group.pyi @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Dict, List, Optional + +from pyspark.sql._typing import LiteralType +from pyspark.sql.context import SQLContext +from pyspark.sql.column import Column +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin +from py4j.java_gateway import JavaObject # type: ignore[import] + +class GroupedData(PandasGroupedOpsMixin): + sql_ctx: SQLContext + def __init__(self, jgd: JavaObject, df: DataFrame) -> None: ... + @overload + def agg(self, *exprs: Column) -> DataFrame: ... + @overload + def agg(self, __exprs: Dict[str, str]) -> DataFrame: ... + def count(self) -> DataFrame: ... + def mean(self, *cols: str) -> DataFrame: ... + def avg(self, *cols: str) -> DataFrame: ... + def max(self, *cols: str) -> DataFrame: ... + def min(self, *cols: str) -> DataFrame: ... + def sum(self, *cols: str) -> DataFrame: ... + def pivot( + self, pivot_col: str, values: Optional[List[LiteralType]] = ... + ) -> GroupedData: ... diff --git a/python/pyspark/sql/pandas/__init__.pyi b/python/pyspark/sql/pandas/__init__.pyi new file mode 100644 index 0000000000000..217e5db960782 --- /dev/null +++ b/python/pyspark/sql/pandas/__init__.pyi @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi new file mode 100644 index 0000000000000..dda1b3341b31c --- /dev/null +++ b/python/pyspark/sql/pandas/_typing/__init__.pyi @@ -0,0 +1,338 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + Any, + Callable, + Iterable, + NewType, + Tuple, + Type, + Union, +) +from typing_extensions import Protocol, Literal +from types import FunctionType + +from pyspark.sql._typing import LiteralType +from pyspark.sql.pandas._typing.protocols.frame import DataFrameLike as DataFrameLike +from pyspark.sql.pandas._typing.protocols.series import SeriesLike as SeriesLike + +import pandas.core.frame # type: ignore[import] +import pandas.core.series # type: ignore[import] + +# POC compatibility annotations +PandasDataFrame: Type[DataFrameLike] = pandas.core.frame.DataFrame +PandasSeries: Type[SeriesLike] = pandas.core.series.Series + +DataFrameOrSeriesLike = Union[DataFrameLike, SeriesLike] + +# UDF annotations +PandasScalarUDFType = Literal[200] +PandasScalarIterUDFType = Literal[204] +PandasGroupedMapUDFType = Literal[201] +PandasCogroupedMapUDFType = Literal[206] +PandasGroupedAggUDFType = Literal[202] +PandasMapIterUDFType = Literal[205] + +class PandasVariadicScalarToScalarFunction(Protocol): + def __call__(self, *_: DataFrameOrSeriesLike) -> SeriesLike: ... + +PandasScalarToScalarFunction = Union[ + PandasVariadicScalarToScalarFunction, + Callable[[DataFrameOrSeriesLike], SeriesLike], + Callable[[DataFrameOrSeriesLike, DataFrameOrSeriesLike], SeriesLike], + Callable[ + [DataFrameOrSeriesLike, DataFrameOrSeriesLike, DataFrameOrSeriesLike], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + SeriesLike, + ], +] + +class PandasVariadicScalarToStructFunction(Protocol): + def __call__(self, *_: DataFrameOrSeriesLike) -> DataFrameLike: ... + +PandasScalarToStructFunction = Union[ + PandasVariadicScalarToStructFunction, + Callable[[DataFrameOrSeriesLike], DataFrameLike], + Callable[[DataFrameOrSeriesLike, DataFrameOrSeriesLike], DataFrameLike], + Callable[ + [DataFrameOrSeriesLike, DataFrameOrSeriesLike, DataFrameOrSeriesLike], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], + Callable[ + [ + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + DataFrameOrSeriesLike, + ], + DataFrameLike, + ], +] + +PandasScalarIterFunction = Callable[ + [Iterable[Union[DataFrameOrSeriesLike, Tuple[DataFrameOrSeriesLike, ...]]]], + Iterable[SeriesLike], +] + +PandasGroupedMapFunction = Union[ + Callable[[DataFrameLike], DataFrameLike], + Callable[[Any, DataFrameLike], DataFrameLike], +] + +class PandasVariadicGroupedAggFunction(Protocol): + def __call__(self, *_: SeriesLike) -> LiteralType: ... + +PandasGroupedAggFunction = Union[ + Callable[[SeriesLike], LiteralType], + Callable[[SeriesLike, SeriesLike], LiteralType], + Callable[[SeriesLike, SeriesLike, SeriesLike], LiteralType], + Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], + Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], + Callable[ + [SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], + LiteralType, + ], + Callable[ + [ + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + ], + LiteralType, + ], + Callable[ + [ + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + ], + LiteralType, + ], + Callable[ + [ + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + ], + LiteralType, + ], + Callable[ + [ + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + SeriesLike, + ], + LiteralType, + ], + PandasVariadicGroupedAggFunction, +] + +PandasMapIterFunction = Callable[[Iterable[DataFrameLike]], Iterable[DataFrameLike]] + +PandasCogroupedMapFunction = Callable[[DataFrameLike, DataFrameLike], DataFrameLike] + +MapIterPandasUserDefinedFunction = NewType( + "MapIterPandasUserDefinedFunction", FunctionType +) +GroupedMapPandasUserDefinedFunction = NewType( + "GroupedMapPandasUserDefinedFunction", FunctionType +) +CogroupedMapPandasUserDefinedFunction = NewType( + "CogroupedMapPandasUserDefinedFunction", FunctionType +) diff --git a/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi b/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi new file mode 100644 index 0000000000000..217e5db960782 --- /dev/null +++ b/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi new file mode 100644 index 0000000000000..de679ee2cd017 --- /dev/null +++ b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi @@ -0,0 +1,428 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This Protocol resuses core Pandas annotation. +# Overall pipeline looks as follows +# - Stubgen pandas.core.frame +# - Add Protocol as a base class +# - Replace imports with Any + +import numpy.ma as np # type: ignore[import] +from typing import Any, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union +from typing_extensions import Protocol +from .series import SeriesLike + +Axes = Any +Dtype = Any +Index = Any +Renamer = Any +Axis = Any +Level = Any + +class DataFrameLike(Protocol): + def __init__( + self, + data: Any = ..., + index: Optional[Axes] = ..., + columns: Optional[Axes] = ..., + dtype: Optional[Dtype] = ..., + copy: bool = ..., + ) -> None: ... + @property + def axes(self) -> List[Index]: ... + @property + def shape(self) -> Tuple[int, int]: ... + @property + def style(self) -> Any: ... + def items(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ... + def iteritems(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ... + def iterrows(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ... + def itertuples(self, index: bool = ..., name: str = ...): ... + def __len__(self) -> int: ... + def dot(self, other: Any): ... + def __matmul__(self, other: Any): ... + def __rmatmul__(self, other: Any): ... + @classmethod + def from_dict( + cls: Any, data: Any, orient: Any = ..., dtype: Any = ..., columns: Any = ... + ) -> DataFrameLike: ... + def to_numpy(self, dtype: Any = ..., copy: Any = ...) -> np.ndarray: ... + def to_dict(self, orient: str = ..., into: Any = ...): ... + def to_gbq( + self, + destination_table: Any, + project_id: Any = ..., + chunksize: Any = ..., + reauth: Any = ..., + if_exists: Any = ..., + auth_local_webserver: Any = ..., + table_schema: Any = ..., + location: Any = ..., + progress_bar: Any = ..., + credentials: Any = ..., + ) -> None: ... + @classmethod + def from_records( + cls: Any, + data: Any, + index: Any = ..., + exclude: Any = ..., + columns: Any = ..., + coerce_float: Any = ..., + nrows: Any = ..., + ) -> DataFrameLike: ... + def to_records( + self, index: Any = ..., column_dtypes: Any = ..., index_dtypes: Any = ... + ) -> np.recarray: ... + def to_stata( + self, + path: Any, + convert_dates: Optional[Any] = ..., + write_index: bool = ..., + byteorder: Optional[Any] = ..., + time_stamp: Optional[Any] = ..., + data_label: Optional[Any] = ..., + variable_labels: Optional[Any] = ..., + version: int = ..., + convert_strl: Optional[Any] = ..., + ) -> None: ... + def to_feather(self, path: Any) -> None: ... + def to_markdown( + self, buf: Optional[IO[str]] = ..., mode: Optional[str] = ..., **kwargs: Any + ) -> Optional[str]: ... + def to_parquet( + self, + path: Any, + engine: Any = ..., + compression: Any = ..., + index: Any = ..., + partition_cols: Any = ..., + **kwargs: Any + ) -> None: ... + def to_html( + self, + buf: Optional[Any] = ..., + columns: Optional[Any] = ..., + col_space: Optional[Any] = ..., + header: bool = ..., + index: bool = ..., + na_rep: str = ..., + formatters: Optional[Any] = ..., + float_format: Optional[Any] = ..., + sparsify: Optional[Any] = ..., + index_names: bool = ..., + justify: Optional[Any] = ..., + max_rows: Optional[Any] = ..., + max_cols: Optional[Any] = ..., + show_dimensions: bool = ..., + decimal: str = ..., + bold_rows: bool = ..., + classes: Optional[Any] = ..., + escape: bool = ..., + notebook: bool = ..., + border: Optional[Any] = ..., + table_id: Optional[Any] = ..., + render_links: bool = ..., + encoding: Optional[Any] = ..., + ): ... + def info( + self, + verbose: Any = ..., + buf: Any = ..., + max_cols: Any = ..., + memory_usage: Any = ..., + null_counts: Any = ..., + ) -> None: ... + def memory_usage(self, index: Any = ..., deep: Any = ...) -> SeriesLike: ... + def transpose(self, *args: Any, copy: bool = ...) -> DataFrameLike: ... + T: Any = ... + def __getitem__(self, key: Any): ... + def __setitem__(self, key: Any, value: Any): ... + def query(self, expr: Any, inplace: bool = ..., **kwargs: Any): ... + def eval(self, expr: Any, inplace: bool = ..., **kwargs: Any): ... + def select_dtypes( + self, include: Any = ..., exclude: Any = ... + ) -> DataFrameLike: ... + def insert( + self, loc: Any, column: Any, value: Any, allow_duplicates: Any = ... + ) -> None: ... + def assign(self, **kwargs: Any) -> DataFrameLike: ... + def lookup(self, row_labels: Any, col_labels: Any) -> np.ndarray: ... + def align( + self, + other: Any, + join: Any = ..., + axis: Any = ..., + level: Any = ..., + copy: Any = ..., + fill_value: Any = ..., + method: Any = ..., + limit: Any = ..., + fill_axis: Any = ..., + broadcast_axis: Any = ..., + ) -> DataFrameLike: ... + def reindex(self, *args: Any, **kwargs: Any) -> DataFrameLike: ... + def drop( + self, + labels: Optional[Any] = ..., + axis: int = ..., + index: Optional[Any] = ..., + columns: Optional[Any] = ..., + level: Optional[Any] = ..., + inplace: bool = ..., + errors: str = ..., + ): ... + def rename( + self, + mapper: Optional[Renamer] = ..., + *, + index: Optional[Renamer] = ..., + columns: Optional[Renamer] = ..., + axis: Optional[Axis] = ..., + copy: bool = ..., + inplace: bool = ..., + level: Optional[Level] = ..., + errors: str = ... + ) -> Optional[DataFrameLike]: ... + def fillna( + self, + value: Any = ..., + method: Any = ..., + axis: Any = ..., + inplace: Any = ..., + limit: Any = ..., + downcast: Any = ..., + ) -> Optional[DataFrameLike]: ... + def replace( + self, + to_replace: Optional[Any] = ..., + value: Optional[Any] = ..., + inplace: bool = ..., + limit: Optional[Any] = ..., + regex: bool = ..., + method: str = ..., + ): ... + def shift( + self, + periods: Any = ..., + freq: Any = ..., + axis: Any = ..., + fill_value: Any = ..., + ) -> DataFrameLike: ... + def set_index( + self, + keys: Any, + drop: bool = ..., + append: bool = ..., + inplace: bool = ..., + verify_integrity: bool = ..., + ): ... + def reset_index( + self, + level: Optional[Union[Hashable, Sequence[Hashable]]] = ..., + drop: bool = ..., + inplace: bool = ..., + col_level: Hashable = ..., + col_fill: Optional[Hashable] = ..., + ) -> Optional[DataFrameLike]: ... + def isna(self) -> DataFrameLike: ... + def isnull(self) -> DataFrameLike: ... + def notna(self) -> DataFrameLike: ... + def notnull(self) -> DataFrameLike: ... + def dropna( + self, + axis: int = ..., + how: str = ..., + thresh: Optional[Any] = ..., + subset: Optional[Any] = ..., + inplace: bool = ..., + ): ... + def drop_duplicates( + self, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = ..., + keep: Union[str, bool] = ..., + inplace: bool = ..., + ignore_index: bool = ..., + ) -> Optional[DataFrameLike]: ... + def duplicated( + self, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = ..., + keep: Union[str, bool] = ..., + ) -> SeriesLike: ... + def sort_values( + self, + by: Any, + axis: int = ..., + ascending: bool = ..., + inplace: bool = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + ): ... + def sort_index( + self, + axis: Any = ..., + level: Any = ..., + ascending: Any = ..., + inplace: Any = ..., + kind: Any = ..., + na_position: Any = ..., + sort_remaining: Any = ..., + ignore_index: bool = ..., + ) -> Any: ... + def nlargest(self, n: Any, columns: Any, keep: Any = ...) -> DataFrameLike: ... + def nsmallest(self, n: Any, columns: Any, keep: Any = ...) -> DataFrameLike: ... + def swaplevel( + self, i: Any = ..., j: Any = ..., axis: Any = ... + ) -> DataFrameLike: ... + def reorder_levels(self, order: Any, axis: Any = ...) -> DataFrameLike: ... + def combine( + self, + other: DataFrameLike, + func: Any, + fill_value: Any = ..., + overwrite: Any = ..., + ) -> DataFrameLike: ... + def combine_first(self, other: DataFrameLike) -> DataFrameLike: ... + def update( + self, + other: Any, + join: Any = ..., + overwrite: Any = ..., + filter_func: Any = ..., + errors: Any = ..., + ) -> None: ... + def groupby( + self, + by: Any = ..., + axis: Any = ..., + level: Any = ..., + as_index: bool = ..., + sort: bool = ..., + group_keys: bool = ..., + squeeze: bool = ..., + observed: bool = ..., + ) -> Any: ... + def pivot( + self, index: Any = ..., columns: Any = ..., values: Any = ... + ) -> DataFrameLike: ... + def pivot_table( + self, + values: Any = ..., + index: Any = ..., + columns: Any = ..., + aggfunc: Any = ..., + fill_value: Any = ..., + margins: Any = ..., + dropna: Any = ..., + margins_name: Any = ..., + observed: Any = ..., + ) -> DataFrameLike: ... + def stack(self, level: int = ..., dropna: bool = ...): ... + def explode(self, column: Union[str, Tuple]) -> DataFrameLike: ... + def unstack(self, level: int = ..., fill_value: Optional[Any] = ...): ... + def melt( + self, + id_vars: Any = ..., + value_vars: Any = ..., + var_name: Any = ..., + value_name: Any = ..., + col_level: Any = ..., + ) -> DataFrameLike: ... + def diff(self, periods: Any = ..., axis: Any = ...) -> DataFrameLike: ... + def aggregate(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ... + agg: Any = ... + def transform( + self, func: Any, axis: Any = ..., *args: Any, **kwargs: Any + ) -> DataFrameLike: ... + def apply( + self, + func: Any, + axis: int = ..., + raw: bool = ..., + result_type: Optional[Any] = ..., + args: Any = ..., + **kwds: Any + ): ... + def applymap(self, func: Any) -> DataFrameLike: ... + def append( + self, + other: Any, + ignore_index: Any = ..., + verify_integrity: Any = ..., + sort: Any = ..., + ) -> DataFrameLike: ... + def join( + self, + other: Any, + on: Any = ..., + how: Any = ..., + lsuffix: Any = ..., + rsuffix: Any = ..., + sort: Any = ..., + ) -> DataFrameLike: ... + def merge( + self, + right: Any, + how: Any = ..., + on: Any = ..., + left_on: Any = ..., + right_on: Any = ..., + left_index: Any = ..., + right_index: Any = ..., + sort: Any = ..., + suffixes: Any = ..., + copy: Any = ..., + indicator: Any = ..., + validate: Any = ..., + ) -> DataFrameLike: ... + def round( + self, decimals: Any = ..., *args: Any, **kwargs: Any + ) -> DataFrameLike: ... + def corr(self, method: Any = ..., min_periods: Any = ...) -> DataFrameLike: ... + def cov(self, min_periods: Any = ...) -> DataFrameLike: ... + def corrwith( + self, other: Any, axis: Any = ..., drop: Any = ..., method: Any = ... + ) -> SeriesLike: ... + def count( + self, axis: int = ..., level: Optional[Any] = ..., numeric_only: bool = ... + ): ... + def nunique(self, axis: Any = ..., dropna: Any = ...) -> SeriesLike: ... + def idxmin(self, axis: Any = ..., skipna: Any = ...) -> SeriesLike: ... + def idxmax(self, axis: Any = ..., skipna: Any = ...) -> SeriesLike: ... + def mode( + self, axis: Any = ..., numeric_only: Any = ..., dropna: Any = ... + ) -> DataFrameLike: ... + def quantile( + self, + q: float = ..., + axis: int = ..., + numeric_only: bool = ..., + interpolation: str = ..., + ): ... + def to_timestamp( + self, freq: Any = ..., how: Any = ..., axis: Any = ..., copy: Any = ... + ) -> DataFrameLike: ... + def to_period( + self, freq: Any = ..., axis: Any = ..., copy: Any = ... + ) -> DataFrameLike: ... + def isin(self, values: Any) -> DataFrameLike: ... + plot: Any = ... + hist: Any = ... + boxplot: Any = ... + sparse: Any = ... diff --git a/python/pyspark/sql/pandas/_typing/protocols/series.pyi b/python/pyspark/sql/pandas/_typing/protocols/series.pyi new file mode 100644 index 0000000000000..14babb067da0d --- /dev/null +++ b/python/pyspark/sql/pandas/_typing/protocols/series.pyi @@ -0,0 +1,253 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This Protocol resuses core Pandas annotation. +# Overall pipeline looks as follows +# - Stubgen pandas.core.series +# - Add Protocol as a base class +# - Replace imports with Any + +import numpy as np # type: ignore[import] +from typing import Any, Callable, Hashable, IO, Optional +from typing_extensions import Protocol + +groupby_generic = Any + +class SeriesLike(Protocol): + hasnans: Any = ... + div: Callable[[SeriesLike, Any], SeriesLike] + rdiv: Callable[[SeriesLike, Any], SeriesLike] + def __init__( + self, + data: Optional[Any] = ..., + index: Optional[Any] = ..., + dtype: Optional[Any] = ..., + name: Optional[Any] = ..., + copy: bool = ..., + fastpath: bool = ..., + ) -> None: ... + @property + def dtype(self): ... + @property + def dtypes(self): ... + @property + def name(self) -> Optional[Hashable]: ... + @name.setter + def name(self, value: Optional[Hashable]) -> None: ... + @property + def values(self): ... + def ravel(self, order: str = ...): ... + def __len__(self) -> int: ... + def view(self, dtype: Optional[Any] = ...): ... + def __array_ufunc__( + self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any + ) -> Any: ... + def __array__(self, dtype: Any = ...) -> np.ndarray: ... + __float__: Any = ... + __long__: Any = ... + __int__: Any = ... + @property + def axes(self): ... + def take( + self, indices: Any, axis: int = ..., is_copy: bool = ..., **kwargs: Any + ): ... + def __getitem__(self, key: Any): ... + def __setitem__(self, key: Any, value: Any) -> None: ... + def repeat(self, repeats: Any, axis: Optional[Any] = ...): ... + index: Any = ... + def reset_index( + self, + level: Optional[Any] = ..., + drop: bool = ..., + name: Optional[Any] = ..., + inplace: bool = ..., + ): ... + def to_string( + self, + buf: Optional[Any] = ..., + na_rep: str = ..., + float_format: Optional[Any] = ..., + header: bool = ..., + index: bool = ..., + length: bool = ..., + dtype: bool = ..., + name: bool = ..., + max_rows: Optional[Any] = ..., + min_rows: Optional[Any] = ..., + ): ... + def to_markdown( + self, buf: Optional[IO[str]] = ..., mode: Optional[str] = ..., **kwargs: Any + ) -> Optional[str]: ... + def items(self): ... + def iteritems(self): ... + def keys(self): ... + def to_dict(self, into: Any = ...): ... + def to_frame(self, name: Optional[Any] = ...): ... + def groupby( + self, + by: Any = ..., + axis: Any = ..., + level: Any = ..., + as_index: bool = ..., + sort: bool = ..., + group_keys: bool = ..., + squeeze: bool = ..., + observed: bool = ..., + ) -> Any: ... + def count(self, level: Optional[Any] = ...): ... + def mode(self, dropna: bool = ...): ... + def unique(self): ... + def drop_duplicates(self, keep: str = ..., inplace: bool = ...): ... + def duplicated(self, keep: str = ...): ... + def idxmin( + self, axis: int = ..., skipna: bool = ..., *args: Any, **kwargs: Any + ): ... + def idxmax( + self, axis: int = ..., skipna: bool = ..., *args: Any, **kwargs: Any + ): ... + def round(self, decimals: int = ..., *args: Any, **kwargs: Any): ... + def quantile(self, q: float = ..., interpolation: str = ...): ... + def corr(self, other: Any, method: str = ..., min_periods: Optional[Any] = ...): ... + def cov(self, other: Any, min_periods: Optional[Any] = ...): ... + def diff(self, periods: int = ...): ... + def autocorr(self, lag: int = ...): ... + def dot(self, other: Any): ... + def __matmul__(self, other: Any): ... + def __rmatmul__(self, other: Any): ... + def searchsorted( + self, value: Any, side: str = ..., sorter: Optional[Any] = ... + ): ... + def append( + self, to_append: Any, ignore_index: bool = ..., verify_integrity: bool = ... + ): ... + def combine(self, other: Any, func: Any, fill_value: Optional[Any] = ...): ... + def combine_first(self, other: Any): ... + def update(self, other: Any) -> None: ... + def sort_values( + self, + axis: int = ..., + ascending: bool = ..., + inplace: bool = ..., + kind: str = ..., + na_position: str = ..., + ignore_index: bool = ..., + ): ... + def sort_index( + self, + axis: Any = ..., + level: Any = ..., + ascending: Any = ..., + inplace: Any = ..., + kind: Any = ..., + na_position: Any = ..., + sort_remaining: Any = ..., + ignore_index: bool = ..., + ) -> Any: ... + def argsort(self, axis: int = ..., kind: str = ..., order: Optional[Any] = ...): ... + def nlargest(self, n: int = ..., keep: str = ...): ... + def nsmallest(self, n: int = ..., keep: str = ...): ... + def swaplevel(self, i: int = ..., j: int = ..., copy: bool = ...): ... + def reorder_levels(self, order: Any): ... + def explode(self) -> SeriesLike: ... + def unstack(self, level: int = ..., fill_value: Optional[Any] = ...): ... + def map(self, arg: Any, na_action: Optional[Any] = ...): ... + def aggregate(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ... + agg: Any = ... + def transform(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ... + def apply( + self, func: Any, convert_dtype: bool = ..., args: Any = ..., **kwds: Any + ): ... + def align( + self, + other: Any, + join: str = ..., + axis: Optional[Any] = ..., + level: Optional[Any] = ..., + copy: bool = ..., + fill_value: Optional[Any] = ..., + method: Optional[Any] = ..., + limit: Optional[Any] = ..., + fill_axis: int = ..., + broadcast_axis: Optional[Any] = ..., + ): ... + def rename( + self, + index: Optional[Any] = ..., + *, + axis: Optional[Any] = ..., + copy: bool = ..., + inplace: bool = ..., + level: Optional[Any] = ..., + errors: str = ... + ): ... + def reindex(self, index: Optional[Any] = ..., **kwargs: Any): ... + def drop( + self, + labels: Optional[Any] = ..., + axis: int = ..., + index: Optional[Any] = ..., + columns: Optional[Any] = ..., + level: Optional[Any] = ..., + inplace: bool = ..., + errors: str = ..., + ): ... + def fillna( + self, + value: Any = ..., + method: Any = ..., + axis: Any = ..., + inplace: Any = ..., + limit: Any = ..., + downcast: Any = ..., + ) -> Optional[SeriesLike]: ... + def replace( + self, + to_replace: Optional[Any] = ..., + value: Optional[Any] = ..., + inplace: bool = ..., + limit: Optional[Any] = ..., + regex: bool = ..., + method: str = ..., + ): ... + def shift( + self, + periods: int = ..., + freq: Optional[Any] = ..., + axis: int = ..., + fill_value: Optional[Any] = ..., + ): ... + def memory_usage(self, index: bool = ..., deep: bool = ...): ... + def isin(self, values: Any): ... + def between(self, left: Any, right: Any, inclusive: bool = ...): ... + def isna(self): ... + def isnull(self): ... + def notna(self): ... + def notnull(self): ... + def dropna( + self, axis: int = ..., inplace: bool = ..., how: Optional[Any] = ... + ): ... + def to_timestamp( + self, freq: Optional[Any] = ..., how: str = ..., copy: bool = ... + ): ... + def to_period(self, freq: Optional[Any] = ..., copy: bool = ...): ... + str: Any = ... + dt: Any = ... + cat: Any = ... + plot: Any = ... + sparse: Any = ... + hist: Any = ... diff --git a/python/pyspark/sql/pandas/conversion.pyi b/python/pyspark/sql/pandas/conversion.pyi new file mode 100644 index 0000000000000..031852fcc053d --- /dev/null +++ b/python/pyspark/sql/pandas/conversion.pyi @@ -0,0 +1,58 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Optional, Union + +from pyspark.sql.pandas._typing import DataFrameLike +from pyspark import since as since # noqa: F401 +from pyspark.rdd import RDD # noqa: F401 +import pyspark.sql.dataframe +from pyspark.sql.pandas.serializers import ( # noqa: F401 + ArrowCollectSerializer as ArrowCollectSerializer, +) +from pyspark.sql.types import ( # noqa: F401 + BooleanType as BooleanType, + ByteType as ByteType, + DataType as DataType, + DoubleType as DoubleType, + FloatType as FloatType, + IntegerType as IntegerType, + IntegralType as IntegralType, + LongType as LongType, + ShortType as ShortType, + StructType as StructType, + TimestampType as TimestampType, +) +from pyspark.traceback_utils import SCCallSiteSync as SCCallSiteSync # noqa: F401 + +class PandasConversionMixin: + def toPandas(self) -> DataFrameLike: ... + +class SparkConversionMixin: + @overload + def createDataFrame( + self, data: DataFrameLike, samplingRatio: Optional[float] = ... + ) -> pyspark.sql.dataframe.DataFrame: ... + @overload + def createDataFrame( + self, + data: DataFrameLike, + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> pyspark.sql.dataframe.DataFrame: ... diff --git a/python/pyspark/sql/pandas/functions.pyi b/python/pyspark/sql/pandas/functions.pyi new file mode 100644 index 0000000000000..09318e43f8aa1 --- /dev/null +++ b/python/pyspark/sql/pandas/functions.pyi @@ -0,0 +1,176 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Union, Callable + +from pyspark.sql._typing import ( + AtomicDataTypeOrString, + UserDefinedFunctionLike, +) +from pyspark.sql.pandas._typing import ( + GroupedMapPandasUserDefinedFunction, + MapIterPandasUserDefinedFunction, + CogroupedMapPandasUserDefinedFunction, + PandasCogroupedMapFunction, + PandasCogroupedMapUDFType, + PandasGroupedAggFunction, + PandasGroupedAggUDFType, + PandasGroupedMapFunction, + PandasGroupedMapUDFType, + PandasMapIterFunction, + PandasMapIterUDFType, + PandasScalarIterFunction, + PandasScalarIterUDFType, + PandasScalarToScalarFunction, + PandasScalarToStructFunction, + PandasScalarUDFType, +) + +from pyspark import since as since # noqa: F401 +from pyspark.rdd import PythonEvalType as PythonEvalType # noqa: F401 +from pyspark.sql.types import ArrayType, StructType + +class PandasUDFType: + SCALAR: PandasScalarUDFType + SCALAR_ITER: PandasScalarIterUDFType + GROUPED_MAP: PandasGroupedMapUDFType + GROUPED_AGG: PandasGroupedAggUDFType + +@overload +def pandas_udf( + f: PandasScalarToScalarFunction, + returnType: Union[AtomicDataTypeOrString, ArrayType], + functionType: PandasScalarUDFType, +) -> UserDefinedFunctionLike: ... +@overload +def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf( + f: PandasScalarToStructFunction, + returnType: Union[StructType, str], + functionType: PandasScalarUDFType, +) -> UserDefinedFunctionLike: ... +@overload +def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc] +@overload +def pandas_udf( + f: PandasScalarIterFunction, + returnType: Union[AtomicDataTypeOrString, ArrayType], + functionType: PandasScalarIterUDFType, +) -> UserDefinedFunctionLike: ... +@overload +def pandas_udf( + f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarIterUDFType +) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + *, + returnType: Union[AtomicDataTypeOrString, ArrayType], + functionType: PandasScalarIterUDFType +) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + f: Union[AtomicDataTypeOrString, ArrayType], + *, + functionType: PandasScalarIterUDFType +) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + f: PandasGroupedMapFunction, + returnType: Union[StructType, str], + functionType: PandasGroupedMapUDFType, +) -> GroupedMapPandasUserDefinedFunction: ... +@overload +def pandas_udf( + f: Union[StructType, str], returnType: PandasGroupedMapUDFType +) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + *, returnType: Union[StructType, str], functionType: PandasGroupedMapUDFType +) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + f: Union[StructType, str], *, functionType: PandasGroupedMapUDFType +) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + f: PandasGroupedAggFunction, + returnType: Union[AtomicDataTypeOrString, ArrayType], + functionType: PandasGroupedAggUDFType, +) -> UserDefinedFunctionLike: ... +@overload +def pandas_udf( + f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasGroupedAggUDFType +) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + *, + returnType: Union[AtomicDataTypeOrString, ArrayType], + functionType: PandasGroupedAggUDFType +) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + f: Union[AtomicDataTypeOrString, ArrayType], + *, + functionType: PandasGroupedAggUDFType +) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf( + f: PandasMapIterFunction, + returnType: Union[StructType, str], + functionType: PandasMapIterUDFType, +) -> MapIterPandasUserDefinedFunction: ... +@overload +def pandas_udf( + f: Union[StructType, str], returnType: PandasMapIterUDFType +) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + *, returnType: Union[StructType, str], functionType: PandasMapIterUDFType +) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + f: Union[StructType, str], *, functionType: PandasMapIterUDFType +) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + f: PandasCogroupedMapFunction, + returnType: Union[StructType, str], + functionType: PandasCogroupedMapUDFType, +) -> CogroupedMapPandasUserDefinedFunction: ... +@overload +def pandas_udf( + f: Union[StructType, str], returnType: PandasCogroupedMapUDFType +) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + *, returnType: Union[StructType, str], functionType: PandasCogroupedMapUDFType +) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ... +@overload +def pandas_udf( + f: Union[StructType, str], *, functionType: PandasCogroupedMapUDFType +) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ... diff --git a/python/pyspark/sql/pandas/group_ops.pyi b/python/pyspark/sql/pandas/group_ops.pyi new file mode 100644 index 0000000000000..2c543e0dc77b9 --- /dev/null +++ b/python/pyspark/sql/pandas/group_ops.pyi @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Union + +from pyspark.sql.pandas._typing import ( + GroupedMapPandasUserDefinedFunction, + PandasGroupedMapFunction, + PandasCogroupedMapFunction, +) + +from pyspark import since as since # noqa: F401 +from pyspark.rdd import PythonEvalType as PythonEvalType # noqa: F401 +from pyspark.sql.column import Column as Column # noqa: F401 +from pyspark.sql.context import SQLContext +import pyspark.sql.group +from pyspark.sql.dataframe import DataFrame as DataFrame +from pyspark.sql.types import StructType + +class PandasGroupedOpsMixin: + def cogroup(self, other: pyspark.sql.group.GroupedData) -> PandasCogroupedOps: ... + def apply(self, udf: GroupedMapPandasUserDefinedFunction) -> DataFrame: ... + def applyInPandas( + self, func: PandasGroupedMapFunction, schema: Union[StructType, str] + ) -> DataFrame: ... + +class PandasCogroupedOps: + sql_ctx: SQLContext + def __init__( + self, gd1: pyspark.sql.group.GroupedData, gd2: pyspark.sql.group.GroupedData + ) -> None: ... + def applyInPandas( + self, func: PandasCogroupedMapFunction, schema: Union[StructType, str] + ) -> DataFrame: ... diff --git a/python/pyspark/sql/pandas/map_ops.pyi b/python/pyspark/sql/pandas/map_ops.pyi new file mode 100644 index 0000000000000..cab885278c388 --- /dev/null +++ b/python/pyspark/sql/pandas/map_ops.pyi @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Union + +from pyspark.sql.pandas._typing import PandasMapIterFunction +from pyspark import since as since # noqa: F401 +from pyspark.rdd import PythonEvalType as PythonEvalType # noqa: F401 +from pyspark.sql.types import StructType +import pyspark.sql.dataframe + +class PandasMapOpsMixin: + def mapInPandas( + self, udf: PandasMapIterFunction, schema: Union[StructType, str] + ) -> pyspark.sql.dataframe.DataFrame: ... diff --git a/python/pyspark/sql/pandas/serializers.pyi b/python/pyspark/sql/pandas/serializers.pyi new file mode 100644 index 0000000000000..8be3c0dcbc9ad --- /dev/null +++ b/python/pyspark/sql/pandas/serializers.pyi @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.serializers import ( # noqa: F401 + Serializer as Serializer, + UTF8Deserializer as UTF8Deserializer, + read_int as read_int, + write_int as write_int, +) +from typing import Any + +class SpecialLengths: + END_OF_DATA_SECTION: int = ... + PYTHON_EXCEPTION_THROWN: int = ... + TIMING_DATA: int = ... + END_OF_STREAM: int = ... + NULL: int = ... + START_ARROW_STREAM: int = ... + +class ArrowCollectSerializer(Serializer): + serializer: Any = ... + def __init__(self) -> None: ... + def dump_stream(self, iterator: Any, stream: Any): ... + def load_stream(self, stream: Any) -> None: ... + +class ArrowStreamSerializer(Serializer): + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + def load_stream(self, stream: Any) -> None: ... + +class ArrowStreamPandasSerializer(ArrowStreamSerializer): + def __init__( + self, timezone: Any, safecheck: Any, assign_cols_by_name: Any + ) -> None: ... + def arrow_to_pandas(self, arrow_column: Any): ... + def dump_stream(self, iterator: Any, stream: Any) -> None: ... + def load_stream(self, stream: Any) -> None: ... + +class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer): + def __init__( + self, + timezone: Any, + safecheck: Any, + assign_cols_by_name: Any, + df_for_struct: bool = ..., + ) -> None: ... + def arrow_to_pandas(self, arrow_column: Any): ... + def dump_stream(self, iterator: Any, stream: Any): ... + +class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer): + def load_stream(self, stream: Any) -> None: ... diff --git a/python/pyspark/sql/pandas/typehints.pyi b/python/pyspark/sql/pandas/typehints.pyi new file mode 100644 index 0000000000000..eea9c86225332 --- /dev/null +++ b/python/pyspark/sql/pandas/typehints.pyi @@ -0,0 +1,33 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql.pandas.utils import ( # noqa: F401 + require_minimum_pandas_version as require_minimum_pandas_version, +) +from typing import Any, Optional + +def infer_eval_type(sig: Any): ... +def check_tuple_annotation( + annotation: Any, parameter_check_func: Optional[Any] = ... +): ... +def check_iterator_annotation( + annotation: Any, parameter_check_func: Optional[Any] = ... +): ... +def check_union_annotation( + annotation: Any, parameter_check_func: Optional[Any] = ... +): ... diff --git a/python/pyspark/sql/pandas/types.pyi b/python/pyspark/sql/pandas/types.pyi new file mode 100644 index 0000000000000..5ae29bd273180 --- /dev/null +++ b/python/pyspark/sql/pandas/types.pyi @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql.types import ( # noqa: F401 + ArrayType as ArrayType, + BinaryType as BinaryType, + BooleanType as BooleanType, + ByteType as ByteType, + DateType as DateType, + DecimalType as DecimalType, + DoubleType as DoubleType, + FloatType as FloatType, + IntegerType as IntegerType, + LongType as LongType, + ShortType as ShortType, + StringType as StringType, + StructField as StructField, + StructType as StructType, + TimestampType as TimestampType, +) +from typing import Any + +def to_arrow_type(dt: Any): ... +def to_arrow_schema(schema: Any): ... +def from_arrow_type(at: Any): ... +def from_arrow_schema(arrow_schema: Any): ... diff --git a/python/pyspark/sql/pandas/utils.pyi b/python/pyspark/sql/pandas/utils.pyi new file mode 100644 index 0000000000000..e4d315b0ce205 --- /dev/null +++ b/python/pyspark/sql/pandas/utils.pyi @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def require_minimum_pandas_version() -> None: ... +def require_minimum_pyarrow_version() -> None: ... diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi new file mode 100644 index 0000000000000..a111cbe416c2f --- /dev/null +++ b/python/pyspark/sql/readwriter.pyi @@ -0,0 +1,250 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Dict, List, Optional, Tuple, Union + +from pyspark.sql._typing import OptionalPrimitiveType +from pyspark.sql.dataframe import DataFrame +from pyspark.rdd import RDD +from pyspark.sql.column import Column +from pyspark.sql.context import SQLContext +from pyspark.sql.types import StructType + +PathOrPaths = Union[str, List[str]] +TupleOrListOfString = Union[List[str], Tuple[str, ...]] + +class OptionUtils: ... + +class DataFrameReader(OptionUtils): + def __init__(self, spark: SQLContext) -> None: ... + def format(self, source: str) -> DataFrameReader: ... + def schema(self, schema: Union[StructType, str]) -> DataFrameReader: ... + def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameReader: ... + def options(self, **options: OptionalPrimitiveType) -> DataFrameReader: ... + def load( + self, + path: Optional[PathOrPaths] = ..., + format: Optional[str] = ..., + schema: Optional[StructType] = ..., + **options: OptionalPrimitiveType + ) -> DataFrame: ... + def json( + self, + path: Union[str, List[str], RDD[str]], + schema: Optional[Union[StructType, str]] = ..., + primitivesAsString: Optional[Union[bool, str]] = ..., + prefersDecimal: Optional[Union[bool, str]] = ..., + allowComments: Optional[Union[bool, str]] = ..., + allowUnquotedFieldNames: Optional[Union[bool, str]] = ..., + allowSingleQuotes: Optional[Union[bool, str]] = ..., + allowNumericLeadingZero: Optional[Union[bool, str]] = ..., + allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = ..., + mode: Optional[str] = ..., + columnNameOfCorruptRecord: Optional[str] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + multiLine: Optional[Union[bool, str]] = ..., + allowUnquotedControlChars: Optional[Union[bool, str]] = ..., + lineSep: Optional[str] = ..., + samplingRatio: Optional[Union[float, str]] = ..., + dropFieldIfAllNull: Optional[Union[bool, str]] = ..., + encoding: Optional[str] = ..., + locale: Optional[str] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def table(self, tableName: str) -> DataFrame: ... + def parquet(self, *paths: str, **options: OptionalPrimitiveType) -> DataFrame: ... + def text( + self, + paths: PathOrPaths, + wholetext: bool = ..., + lineSep: Optional[str] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def csv( + self, + path: PathOrPaths, + schema: Optional[Union[StructType, str]] = ..., + sep: Optional[str] = ..., + encoding: Optional[str] = ..., + quote: Optional[str] = ..., + escape: Optional[str] = ..., + comment: Optional[str] = ..., + header: Optional[Union[bool, str]] = ..., + inferSchema: Optional[Union[bool, str]] = ..., + ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ..., + ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ..., + nullValue: Optional[str] = ..., + nanValue: Optional[str] = ..., + positiveInf: Optional[str] = ..., + negativeInf: Optional[str] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + maxColumns: Optional[int] = ..., + maxCharsPerColumn: Optional[int] = ..., + maxMalformedLogPerPartition: Optional[int] = ..., + mode: Optional[str] = ..., + columnNameOfCorruptRecord: Optional[str] = ..., + multiLine: Optional[Union[bool, str]] = ..., + charToEscapeQuoteEscaping: Optional[str] = ..., + samplingRatio: Optional[Union[float, str]] = ..., + enforceSchema: Optional[Union[bool, str]] = ..., + emptyValue: Optional[str] = ..., + locale: Optional[str] = ..., + lineSep: Optional[str] = ..., + ) -> DataFrame: ... + def orc( + self, + path: PathOrPaths, + mergeSchema: Optional[bool] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + @overload + def jdbc( + self, url: str, table: str, *, properties: Optional[Dict[str, str]] = ... + ) -> DataFrame: ... + @overload + def jdbc( + self, + url: str, + table: str, + column: str, + lowerBound: int, + upperBound: int, + numPartitions: int, + *, + properties: Optional[Dict[str, str]] = ... + ) -> DataFrame: ... + @overload + def jdbc( + self, + url: str, + table: str, + *, + predicates: List[str], + properties: Optional[Dict[str, str]] = ... + ) -> DataFrame: ... + +class DataFrameWriter(OptionUtils): + def __init__(self, df: DataFrame) -> None: ... + def mode(self, saveMode: str) -> DataFrameWriter: ... + def format(self, source: str) -> DataFrameWriter: ... + def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameWriter: ... + def options(self, **options: OptionalPrimitiveType) -> DataFrameWriter: ... + @overload + def partitionBy(self, *cols: str) -> DataFrameWriter: ... + @overload + def partitionBy(self, __cols: List[str]) -> DataFrameWriter: ... + @overload + def bucketBy(self, numBuckets: int, col: str, *cols: str) -> DataFrameWriter: ... + @overload + def bucketBy( + self, numBuckets: int, col: TupleOrListOfString + ) -> DataFrameWriter: ... + @overload + def sortBy(self, col: str, *cols: str) -> DataFrameWriter: ... + @overload + def sortBy(self, col: TupleOrListOfString) -> DataFrameWriter: ... + def save( + self, + path: Optional[str] = ..., + format: Optional[str] = ..., + mode: Optional[str] = ..., + partitionBy: Optional[List[str]] = ..., + **options: OptionalPrimitiveType + ) -> None: ... + def insertInto(self, tableName: str, overwrite: Optional[bool] = ...) -> None: ... + def saveAsTable( + self, + name: str, + format: Optional[str] = ..., + mode: Optional[str] = ..., + partitionBy: Optional[List[str]] = ..., + **options: OptionalPrimitiveType + ) -> None: ... + def json( + self, + path: str, + mode: Optional[str] = ..., + compression: Optional[str] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + lineSep: Optional[str] = ..., + encoding: Optional[str] = ..., + ignoreNullFields: Optional[bool] = ..., + ) -> None: ... + def parquet( + self, + path: str, + mode: Optional[str] = ..., + partitionBy: Optional[List[str]] = ..., + compression: Optional[str] = ..., + ) -> None: ... + def text( + self, path: str, compression: Optional[str] = ..., lineSep: Optional[str] = ... + ) -> None: ... + def csv( + self, + path: str, + mode: Optional[str] = ..., + compression: Optional[str] = ..., + sep: Optional[str] = ..., + quote: Optional[str] = ..., + escape: Optional[str] = ..., + header: Optional[Union[bool, str]] = ..., + nullValue: Optional[str] = ..., + escapeQuotes: Optional[Union[bool, str]] = ..., + quoteAll: Optional[Union[bool, str]] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ..., + ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ..., + charToEscapeQuoteEscaping: Optional[str] = ..., + encoding: Optional[str] = ..., + emptyValue: Optional[str] = ..., + lineSep: Optional[str] = ..., + ) -> None: ... + def orc( + self, + path: str, + mode: Optional[str] = ..., + partitionBy: Optional[List[str]] = ..., + compression: Optional[str] = ..., + ) -> None: ... + def jdbc( + self, + url: str, + table: str, + mode: Optional[str] = ..., + properties: Optional[Dict[str, str]] = ..., + ) -> None: ... + +class DataFrameWriterV2: + def __init__(self, df: DataFrame, table: str) -> None: ... + def using(self, provider: str) -> DataFrameWriterV2: ... + def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameWriterV2: ... + def options(self, **options: OptionalPrimitiveType) -> DataFrameWriterV2: ... + def tableProperty(self, property: str, value: str) -> DataFrameWriterV2: ... + def partitionedBy(self, col: Column, *cols: Column) -> DataFrameWriterV2: ... + def create(self) -> None: ... + def replace(self) -> None: ... + def createOrReplace(self) -> None: ... + def append(self) -> None: ... + def overwrite(self, condition: Column) -> None: ... + def overwritePartitions(self) -> None: ... diff --git a/python/pyspark/sql/session.pyi b/python/pyspark/sql/session.pyi new file mode 100644 index 0000000000000..17ba8894c1731 --- /dev/null +++ b/python/pyspark/sql/session.pyi @@ -0,0 +1,125 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Iterable, List, Optional, Tuple, TypeVar, Union + +from py4j.java_gateway import JavaObject # type: ignore[import] + +from pyspark.sql._typing import DateTimeLiteral, LiteralType, DecimalLiteral, RowLike +from pyspark.sql.pandas._typing import DataFrameLike +from pyspark.conf import SparkConf +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.sql.catalog import Catalog +from pyspark.sql.conf import RuntimeConfig +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.pandas.conversion import SparkConversionMixin +from pyspark.sql.types import AtomicType, StructType +from pyspark.sql.readwriter import DataFrameReader +from pyspark.sql.streaming import DataStreamReader, StreamingQueryManager +from pyspark.sql.udf import UDFRegistration + +T = TypeVar("T") + +class SparkSession(SparkConversionMixin): + class Builder: + @overload + def config(self, *, conf: SparkConf) -> SparkSession.Builder: ... + @overload + def config(self, key: str, value: Any) -> SparkSession.Builder: ... + def master(self, master: str) -> SparkSession.Builder: ... + def appName(self, name: str) -> SparkSession.Builder: ... + def enableHiveSupport(self) -> SparkSession.Builder: ... + def getOrCreate(self) -> SparkSession: ... + builder: SparkSession.Builder + def __init__( + self, sparkContext: SparkContext, jsparkSession: Optional[JavaObject] = ... + ) -> None: ... + def newSession(self) -> SparkSession: ... + @classmethod + def getActiveSession(cls) -> SparkSession: ... + @property + def sparkContext(self) -> SparkContext: ... + @property + def version(self) -> str: ... + @property + def conf(self) -> RuntimeConfig: ... + @property + def catalog(self) -> Catalog: ... + @property + def udf(self) -> UDFRegistration: ... + def range( + self, + start: int, + end: Optional[int] = ..., + step: int = ..., + numPartitions: Optional[int] = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + samplingRatio: Optional[float] = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + schema: Union[List[str], Tuple[str, ...]] = ..., + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[ + RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]], + Iterable[Union[DateTimeLiteral, LiteralType, DecimalLiteral]], + ], + schema: Union[AtomicType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: Union[RDD[RowLike], Iterable[RowLike]], + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + @overload + def createDataFrame( + self, data: DataFrameLike, samplingRatio: Optional[float] = ... + ) -> DataFrame: ... + @overload + def createDataFrame( + self, + data: DataFrameLike, + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> DataFrame: ... + def sql(self, sqlQuery: str) -> DataFrame: ... + def table(self, tableName: str) -> DataFrame: ... + @property + def read(self) -> DataFrameReader: ... + @property + def readStream(self) -> DataStreamReader: ... + @property + def streams(self) -> StreamingQueryManager: ... + def stop(self) -> None: ... + def __enter__(self) -> SparkSession: ... + def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi new file mode 100644 index 0000000000000..22055b2efc06b --- /dev/null +++ b/python/pyspark/sql/streaming.pyi @@ -0,0 +1,179 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Callable, Dict, List, Optional, Union + +from pyspark.sql._typing import SupportsProcess, OptionalPrimitiveType +from pyspark.sql.context import SQLContext +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.readwriter import OptionUtils +from pyspark.sql.types import Row, StructType +from pyspark.sql.utils import StreamingQueryException + +from py4j.java_gateway import JavaObject # type: ignore[import] + +class StreamingQuery: + def __init__(self, jsq: JavaObject) -> None: ... + @property + def id(self) -> str: ... + @property + def runId(self) -> str: ... + @property + def name(self) -> str: ... + @property + def isActive(self) -> bool: ... + def awaitTermination(self, timeout: Optional[int] = ...) -> Optional[bool]: ... + @property + def status(self) -> Dict[str, Any]: ... + @property + def recentProgress(self) -> List[Dict[str, Any]]: ... + @property + def lastProgress(self) -> Optional[Dict[str, Any]]: ... + def processAllAvailable(self) -> None: ... + def stop(self) -> None: ... + def explain(self, extended: bool = ...) -> None: ... + def exception(self) -> Optional[StreamingQueryException]: ... + +class StreamingQueryManager: + def __init__(self, jsqm: JavaObject) -> None: ... + @property + def active(self) -> List[StreamingQuery]: ... + def get(self, id: str) -> StreamingQuery: ... + def awaitAnyTermination(self, timeout: Optional[int] = ...) -> bool: ... + def resetTerminated(self) -> None: ... + +class DataStreamReader(OptionUtils): + def __init__(self, spark: SQLContext) -> None: ... + def format(self, source: str) -> DataStreamReader: ... + def schema(self, schema: Union[StructType, str]) -> DataStreamReader: ... + def option(self, key: str, value: OptionalPrimitiveType) -> DataStreamReader: ... + def options(self, **options: OptionalPrimitiveType) -> DataStreamReader: ... + def load( + self, + path: Optional[str] = ..., + format: Optional[str] = ..., + schema: Optional[StructType] = ..., + **options: OptionalPrimitiveType + ) -> DataFrame: ... + def json( + self, + path: str, + schema: Optional[Union[StructType, str]] = ..., + primitivesAsString: Optional[Union[bool, str]] = ..., + prefersDecimal: Optional[Union[bool, str]] = ..., + allowComments: Optional[Union[bool, str]] = ..., + allowUnquotedFieldNames: Optional[Union[bool, str]] = ..., + allowSingleQuotes: Optional[Union[bool, str]] = ..., + allowNumericLeadingZero: Optional[Union[bool, str]] = ..., + allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = ..., + mode: Optional[str] = ..., + columnNameOfCorruptRecord: Optional[str] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + multiLine: Optional[Union[bool, str]] = ..., + allowUnquotedControlChars: Optional[Union[bool, str]] = ..., + lineSep: Optional[str] = ..., + locale: Optional[str] = ..., + dropFieldIfAllNull: Optional[Union[bool, str]] = ..., + encoding: Optional[str] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def orc( + self, + path: str, + mergeSchema: Optional[bool] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def parquet( + self, + path: str, + mergeSchema: Optional[bool] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def text( + self, + path: str, + wholetext: bool = ..., + lineSep: Optional[str] = ..., + recursiveFileLookup: Optional[bool] = ..., + ) -> DataFrame: ... + def csv( + self, + path: str, + schema: Optional[Union[StructType, str]] = ..., + sep: Optional[str] = ..., + encoding: Optional[str] = ..., + quote: Optional[str] = ..., + escape: Optional[str] = ..., + comment: Optional[str] = ..., + header: Optional[Union[bool, str]] = ..., + inferSchema: Optional[Union[bool, str]] = ..., + ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ..., + ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ..., + nullValue: Optional[str] = ..., + nanValue: Optional[str] = ..., + positiveInf: Optional[str] = ..., + negativeInf: Optional[str] = ..., + dateFormat: Optional[str] = ..., + timestampFormat: Optional[str] = ..., + maxColumns: Optional[Union[int, str]] = ..., + maxCharsPerColumn: Optional[Union[int, str]] = ..., + mode: Optional[str] = ..., + columnNameOfCorruptRecord: Optional[str] = ..., + multiLine: Optional[Union[bool, str]] = ..., + charToEscapeQuoteEscaping: Optional[Union[bool, str]] = ..., + enforceSchema: Optional[Union[bool, str]] = ..., + emptyValue: Optional[str] = ..., + locale: Optional[str] = ..., + lineSep: Optional[str] = ..., + ) -> DataFrame: ... + +class DataStreamWriter: + def __init__(self, df: DataFrame) -> None: ... + def outputMode(self, outputMode: str) -> DataStreamWriter: ... + def format(self, source: str) -> DataStreamWriter: ... + def option(self, key: str, value: OptionalPrimitiveType) -> DataStreamWriter: ... + def options(self, **options: OptionalPrimitiveType) -> DataStreamWriter: ... + @overload + def partitionBy(self, *cols: str) -> DataStreamWriter: ... + @overload + def partitionBy(self, __cols: List[str]) -> DataStreamWriter: ... + def queryName(self, queryName: str) -> DataStreamWriter: ... + @overload + def trigger(self, processingTime: str) -> DataStreamWriter: ... + @overload + def trigger(self, once: bool) -> DataStreamWriter: ... + @overload + def trigger(self, continuous: bool) -> DataStreamWriter: ... + def start( + self, + path: Optional[str] = ..., + format: Optional[str] = ..., + outputMode: Optional[str] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., + queryName: Optional[str] = ..., + **options: OptionalPrimitiveType + ) -> StreamingQuery: ... + @overload + def foreach(self, f: Callable[[Row], None]) -> DataStreamWriter: ... + @overload + def foreach(self, f: SupportsProcess) -> DataStreamWriter: ... + def foreachBatch( + self, func: Callable[[DataFrame, int], None] + ) -> DataStreamWriter: ... diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index fb4f619c8bf63..c6497923d84fb 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -42,7 +42,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore class ArrowTests(ReusedSQLTestCase): @classmethod @@ -465,7 +465,7 @@ def test_createDataFrame_empty_partition(self): @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore class MaxResultArrowTests(unittest.TestCase): # These tests are separate as 'spark.driver.maxResultSize' configuration # is a static configuration to Spark context. @@ -500,7 +500,7 @@ def conf(cls): from pyspark.sql.tests.test_arrow import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 141b249db0fc6..ca4e427a7db28 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -206,7 +206,7 @@ def test_list_columns(self): from pyspark.sql.tests.test_catalog import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 8a89e6e9d5599..7e03e2ef3e6d0 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -161,7 +161,7 @@ def test_with_field(self): from pyspark.sql.tests.test_column import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py index dd2e0be85d508..1cc0c1b7562c5 100644 --- a/python/pyspark/sql/tests/test_conf.py +++ b/python/pyspark/sql/tests/test_conf.py @@ -49,7 +49,7 @@ def test_conf(self): from pyspark.sql.tests.test_conf import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py index ce22a52dc119e..d506908b784db 100644 --- a/python/pyspark/sql/tests/test_context.py +++ b/python/pyspark/sql/tests/test_context.py @@ -276,7 +276,7 @@ def test_get_or_create(self): from pyspark.sql.tests.test_context import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index d03939821a176..d941707b8969f 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -518,7 +518,7 @@ def _to_pandas(self): df = self.spark.createDataFrame(data, schema) return df.toPandas() - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas(self): import numpy as np pdf = self._to_pandas() @@ -530,7 +530,7 @@ def test_to_pandas(self): self.assertEquals(types[4], np.object) # datetime.date self.assertEquals(types[5], 'datetime64[ns]') - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_with_duplicated_column_names(self): import numpy as np @@ -543,7 +543,7 @@ def test_to_pandas_with_duplicated_column_names(self): self.assertEquals(types.iloc[0], np.int32) self.assertEquals(types.iloc[1], np.int32) - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_on_cross_join(self): import numpy as np @@ -569,7 +569,7 @@ def test_to_pandas_required_pandas_not_found(self): with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'): self._to_pandas() - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_avoid_astype(self): import numpy as np schema = StructType().add("a", IntegerType()).add("b", StringType())\ @@ -581,7 +581,7 @@ def test_to_pandas_avoid_astype(self): self.assertEquals(types[1], np.object) self.assertEquals(types[2], np.float64) - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_from_empty_dataframe(self): with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}): # SPARK-29188 test that toPandas() on an empty dataframe has the correct dtypes @@ -601,7 +601,7 @@ def test_to_pandas_from_empty_dataframe(self): dtypes_when_empty_df = self.spark.sql(sql).filter("False").toPandas().dtypes self.assertTrue(np.all(dtypes_when_empty_df == dtypes_when_nonempty_df)) - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_from_null_dataframe(self): with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}): # SPARK-29188 test that toPandas() on a dataframe with only nulls has correct dtypes @@ -629,7 +629,7 @@ def test_to_pandas_from_null_dataframe(self): self.assertEqual(types[7], np.object) self.assertTrue(np.can_cast(np.datetime64, types[8])) - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_from_mixed_dataframe(self): with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}): # SPARK-29188 test that toPandas() on a dataframe with some nulls has correct dtypes @@ -657,7 +657,7 @@ def test_create_dataframe_from_array_of_long(self): df = self.spark.createDataFrame(data) self.assertEqual(df.first(), Row(longarray=[-9223372036854775808, 0, 9223372036854775807])) - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_create_dataframe_from_pandas_with_timestamp(self): import pandas as pd from datetime import datetime @@ -685,7 +685,7 @@ def test_create_dataframe_required_pandas_not_found(self): self.spark.createDataFrame(pdf) # Regression test for SPARK-23360 - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_create_dataframe_from_pandas_with_dst(self): import pandas as pd from pandas.util.testing import assert_frame_equal @@ -889,7 +889,7 @@ def test_query_execution_listener_on_collect(self): @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore def test_query_execution_listener_on_collect_with_arrow(self): with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": True}): self.assertFalse( @@ -907,7 +907,7 @@ def test_query_execution_listener_on_collect_with_arrow(self): from pyspark.sql.tests.test_dataframe import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py index dfef8f5740050..9425494fb0d90 100644 --- a/python/pyspark/sql/tests/test_datasources.py +++ b/python/pyspark/sql/tests/test_datasources.py @@ -164,7 +164,7 @@ def test_ignore_column_of_all_nulls(self): from pyspark.sql.tests.test_datasources import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 09f5960c6f648..5638cad51b755 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -396,7 +396,7 @@ def test_higher_order_function_failures(self): from pyspark.sql.tests.test_functions import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py index 2fab7a08da1da..324c964f4f0cf 100644 --- a/python/pyspark/sql/tests/test_group.py +++ b/python/pyspark/sql/tests/test_group.py @@ -39,7 +39,7 @@ def test_aggregator(self): from pyspark.sql.tests.test_group import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py index 5013e2d4d6bd9..f9a7dd69b61fb 100644 --- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py @@ -33,7 +33,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class CogroupedMapInPandasTests(ReusedSQLTestCase): @property @@ -247,7 +247,7 @@ def merge_pandas(l, r): from pyspark.sql.tests.test_pandas_cogrouped_map import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index 6eb5355044bb0..81b6d5efb710a 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -41,7 +41,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class GroupedMapInPandasTests(ReusedSQLTestCase): @property @@ -611,7 +611,7 @@ def my_pandas_udf(pdf): from pyspark.sql.tests.test_pandas_grouped_map import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index bda370dffbf6a..3ca437f75fc23 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -27,7 +27,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class MapInPandasTests(ReusedSQLTestCase): @classmethod @@ -117,7 +117,7 @@ def func(iterator): from pyspark.sql.tests.test_pandas_map import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py index 24b98182b7fcf..cc742fc4267cb 100644 --- a/python/pyspark/sql/tests/test_pandas_udf.py +++ b/python/pyspark/sql/tests/test_pandas_udf.py @@ -28,7 +28,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class PandasUDFTests(ReusedSQLTestCase): def test_pandas_udf_basic(self): @@ -244,7 +244,7 @@ def udf(column): from pyspark.sql.tests.test_pandas_udf import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py index f63f52239fdf2..451308927629b 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py @@ -35,7 +35,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class GroupedAggPandasUDFTests(ReusedSQLTestCase): @property @@ -514,7 +514,7 @@ def mean(x): from pyspark.sql.tests.test_pandas_udf_grouped_agg import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index 522807b03af70..6d325c9085ce1 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -46,7 +46,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore class ScalarPandasUDFTests(ReusedSQLTestCase): @classmethod @@ -1095,7 +1095,7 @@ def f3i(it): self.assertEquals(expected, df1.collect()) # SPARK-24721 - @unittest.skipIf(not test_compiled, test_not_compiled_message) + @unittest.skipIf(not test_compiled, test_not_compiled_message) # type: ignore def test_datasource_with_udf(self): # Same as SQLTests.test_datasource_with_udf, but with Pandas UDF # This needs to a separate test because Arrow dependency is optional @@ -1142,7 +1142,7 @@ def test_datasource_with_udf(self): from pyspark.sql.tests.test_pandas_udf_scalar import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py index 7be81f82808e4..d9717da4d2fbd 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py +++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py @@ -34,7 +34,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class PandasUDFTypeHintsTests(ReusedSQLTestCase): def test_type_annotation_scalar(self): def func(col: pd.Series) -> pd.Series: @@ -246,7 +246,7 @@ def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]: from pyspark.sql.tests.test_pandas_udf_typehints import * # noqa: #401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py index 6e59255da13a2..5ad2ecd8f85d4 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_window.py +++ b/python/pyspark/sql/tests/test_pandas_udf_window.py @@ -31,7 +31,7 @@ @unittest.skipIf( not have_pandas or not have_pyarrow, - pandas_requirement_message or pyarrow_requirement_message) + pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type] class WindowPandasUDFTests(ReusedSQLTestCase): @property def data(self): @@ -355,7 +355,7 @@ def test_bounded_mixed(self): from pyspark.sql.tests.test_pandas_udf_window import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py index 55ffefc43c105..80b4118ae796a 100644 --- a/python/pyspark/sql/tests/test_readwriter.py +++ b/python/pyspark/sql/tests/test_readwriter.py @@ -204,7 +204,7 @@ def test_partitioning_functions(self): from pyspark.sql.tests.test_readwriter import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 35c14e430af50..ce087ff4ce550 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -142,7 +142,7 @@ def test_bytes_as_binary_type(self): from pyspark.sql.tests.test_serde import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index d10f7bf906c3b..7faeb1857b983 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -361,7 +361,7 @@ def test_use_custom_class_for_extensions(self): from pyspark.sql.tests.test_session import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index 21ce04618a904..28a50f9575a0a 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -575,7 +575,7 @@ def collectBatch(df, id): from pyspark.sql.tests.test_streaming import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 7256db055fb9c..e85e8a6e6d1ee 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -25,12 +25,15 @@ import unittest from pyspark.sql import Row -from pyspark.sql.functions import col, UserDefinedFunction +from pyspark.sql.functions import col +from pyspark.sql.udf import UserDefinedFunction from pyspark.sql.types import ByteType, ShortType, IntegerType, FloatType, DateType, \ TimestampType, MapType, StringType, StructType, StructField, ArrayType, DoubleType, LongType, \ DecimalType, BinaryType, BooleanType, NullType -from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings, \ +from pyspark.sql.types import ( # type: ignore + _array_signed_int_typecode_ctype_mappings, _array_type_mappings, _array_unsigned_int_typecode_ctype_mappings, _infer_type, _make_type_verifier, _merge_type +) from pyspark.testing.sqlutils import ReusedSQLTestCase, ExamplePointUDT, PythonOnlyUDT, \ ExamplePoint, PythonOnlyPoint, MyObject @@ -974,7 +977,7 @@ def test_row_without_field_sorting(self): from pyspark.sql.tests.test_types import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index ad94bc83cc5be..a7dcbfd32ac1c 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -23,7 +23,8 @@ from pyspark import SparkContext from pyspark.sql import SparkSession, Column, Row -from pyspark.sql.functions import UserDefinedFunction, udf +from pyspark.sql.functions import udf +from pyspark.sql.udf import UserDefinedFunction from pyspark.sql.types import StringType, IntegerType, BooleanType, DoubleType, LongType, \ ArrayType, StructType, StructField from pyspark.sql.utils import AnalysisException @@ -356,7 +357,7 @@ def test_udf_registration_returns_udf(self): df.select(add_four("id").alias("plus_four")).collect() ) - @unittest.skipIf(not test_compiled, test_not_compiled_message) + @unittest.skipIf(not test_compiled, test_not_compiled_message) # type: ignore def test_register_java_function(self): self.spark.udf.registerJavaFunction( "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType()) @@ -373,7 +374,7 @@ def test_register_java_function(self): [value] = self.spark.sql("SELECT javaStringLength3('test')").first() self.assertEqual(value, 4) - @unittest.skipIf(not test_compiled, test_not_compiled_message) + @unittest.skipIf(not test_compiled, test_not_compiled_message) # type: ignore def test_register_java_udaf(self): self.spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg") df = self.spark.createDataFrame([(1, "a"), (2, "b"), (3, "a")], ["id", "name"]) @@ -560,7 +561,7 @@ def test_nonparam_udf_with_aggregate(self): self.assertEqual(rows, [Row(_1=1, _2=2, a=u'const_str')]) # SPARK-24721 - @unittest.skipIf(not test_compiled, test_not_compiled_message) + @unittest.skipIf(not test_compiled, test_not_compiled_message) # type: ignore def test_datasource_with_udf(self): from pyspark.sql.functions import lit, col @@ -699,7 +700,7 @@ def test_udf_init_shouldnt_initialize_context(self): from pyspark.sql.tests.test_udf import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index c6e7fcd8ec11a..b08e17208d8af 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -55,7 +55,7 @@ def test_capture_illegalargument_exception(self): from pyspark.sql.tests.test_utils import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/sql/types.pyi b/python/pyspark/sql/types.pyi new file mode 100644 index 0000000000000..31765e94884d7 --- /dev/null +++ b/python/pyspark/sql/types.pyi @@ -0,0 +1,204 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, TypeVar +import datetime + +T = TypeVar("T") +U = TypeVar("U") + +class DataType: + def __hash__(self) -> int: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + @classmethod + def typeName(cls) -> str: ... + def simpleString(self) -> str: ... + def jsonValue(self) -> Union[str, Dict[str, Any]]: ... + def json(self) -> str: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: Any) -> Any: ... + def fromInternal(self, obj: Any) -> Any: ... + +class DataTypeSingleton(type): + def __call__(cls): ... + +class NullType(DataType, metaclass=DataTypeSingleton): ... +class AtomicType(DataType): ... +class NumericType(AtomicType): ... +class IntegralType(NumericType, metaclass=DataTypeSingleton): ... +class FractionalType(NumericType): ... +class StringType(AtomicType, metaclass=DataTypeSingleton): ... +class BinaryType(AtomicType, metaclass=DataTypeSingleton): ... +class BooleanType(AtomicType, metaclass=DataTypeSingleton): ... + +class DateType(AtomicType, metaclass=DataTypeSingleton): + EPOCH_ORDINAL: int + def needConversion(self) -> bool: ... + def toInternal(self, d: datetime.date) -> int: ... + def fromInternal(self, v: int) -> datetime.date: ... + +class TimestampType(AtomicType, metaclass=DataTypeSingleton): + def needConversion(self) -> bool: ... + def toInternal(self, dt: datetime.datetime) -> int: ... + def fromInternal(self, ts: int) -> datetime.datetime: ... + +class DecimalType(FractionalType): + precision: int + scale: int + hasPrecisionInfo: bool + def __init__(self, precision: int = ..., scale: int = ...) -> None: ... + def simpleString(self) -> str: ... + def jsonValue(self) -> str: ... + +class DoubleType(FractionalType, metaclass=DataTypeSingleton): ... +class FloatType(FractionalType, metaclass=DataTypeSingleton): ... + +class ByteType(IntegralType): + def simpleString(self) -> str: ... + +class IntegerType(IntegralType): + def simpleString(self) -> str: ... + +class LongType(IntegralType): + def simpleString(self) -> str: ... + +class ShortType(IntegralType): + def simpleString(self) -> str: ... + +class ArrayType(DataType): + elementType: DataType + containsNull: bool + def __init__(self, elementType=DataType, containsNull: bool = ...) -> None: ... + def simpleString(self): ... + def jsonValue(self) -> Dict[str, Any]: ... + @classmethod + def fromJson(cls, json: Dict[str, Any]) -> ArrayType: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]: ... + def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]: ... + +class MapType(DataType): + keyType: DataType + valueType: DataType + valueContainsNull: bool + def __init__( + self, keyType: DataType, valueType: DataType, valueContainsNull: bool = ... + ) -> None: ... + def simpleString(self) -> str: ... + def jsonValue(self) -> Dict[str, Any]: ... + @classmethod + def fromJson(cls, json: Dict[str, Any]) -> MapType: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]: ... + def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]: ... + +class StructField(DataType): + name: str + dataType: DataType + nullable: bool + metadata: Dict[str, Any] + def __init__( + self, + name: str, + dataType: DataType, + nullable: bool = ..., + metadata: Optional[Dict[str, Any]] = ..., + ) -> None: ... + def simpleString(self) -> str: ... + def jsonValue(self) -> Dict[str, Any]: ... + @classmethod + def fromJson(cls, json: Dict[str, Any]) -> StructField: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: T) -> T: ... + def fromInternal(self, obj: T) -> T: ... + +class StructType(DataType): + fields: List[StructField] + names: List[str] + def __init__(self, fields: Optional[List[StructField]] = ...) -> None: ... + @overload + def add( + self, + field: str, + data_type: Union[str, DataType], + nullable: bool = ..., + metadata: Optional[Dict[str, Any]] = ..., + ) -> StructType: ... + @overload + def add(self, field: StructField) -> StructType: ... + def __iter__(self) -> Iterator[StructField]: ... + def __len__(self) -> int: ... + def __getitem__(self, key: Union[str, int]) -> StructField: ... + def simpleString(self) -> str: ... + def jsonValue(self) -> Dict[str, Any]: ... + @classmethod + def fromJson(cls, json: Dict[str, Any]) -> StructType: ... + def fieldNames(self) -> List[str]: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: Tuple) -> Tuple: ... + def fromInternal(self, obj: Tuple) -> Row: ... + +class UserDefinedType(DataType): + @classmethod + def typeName(cls) -> str: ... + @classmethod + def sqlType(cls) -> DataType: ... + @classmethod + def module(cls) -> str: ... + @classmethod + def scalaUDT(cls) -> str: ... + def needConversion(self) -> bool: ... + def toInternal(self, obj: Any) -> Any: ... + def fromInternal(self, obj: Any) -> Any: ... + def serialize(self, obj: Any) -> Any: ... + def deserialize(self, datum: Any) -> Any: ... + def simpleString(self) -> str: ... + def json(self) -> str: ... + def jsonValue(self) -> Dict[str, Any]: ... + @classmethod + def fromJson(cls, json: Dict[str, Any]) -> UserDefinedType: ... + def __eq__(self, other: Any) -> bool: ... + +class Row(tuple): + @overload + def __new__(self, *args: str) -> Row: ... + @overload + def __new__(self, **kwargs: Any) -> Row: ... + @overload + def __init__(self, *args: str) -> None: ... + @overload + def __init__(self, **kwargs: Any) -> None: ... + def asDict(self, recursive: bool = ...) -> Dict[str, Any]: ... + def __contains__(self, item: Any) -> bool: ... + def __call__(self, *args: Any) -> Row: ... + def __getitem__(self, item: Any) -> Any: ... + def __getattr__(self, item: str) -> Any: ... + def __setattr__(self, key: Any, value: Any) -> None: ... + def __reduce__( + self, + ) -> Tuple[Callable[[List[str], List[Any]], Row], Tuple[List[str], Tuple]]: ... + +class DateConverter: + def can_convert(self, obj: Any) -> bool: ... + def convert(self, obj, gateway_client) -> Any: ... + +class DatetimeConverter: + def can_convert(self, obj) -> bool: ... + def convert(self, obj, gateway_client) -> Any: ... diff --git a/python/pyspark/sql/udf.pyi b/python/pyspark/sql/udf.pyi new file mode 100644 index 0000000000000..87c3672780037 --- /dev/null +++ b/python/pyspark/sql/udf.pyi @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable, Optional + +from pyspark.sql._typing import ColumnOrName, DataTypeOrString +from pyspark.sql.column import Column +import pyspark.sql.session + +class UserDefinedFunction: + func: Callable[..., Any] + evalType: int + deterministic: bool + def __init__( + self, + func: Callable[..., Any], + returnType: DataTypeOrString = ..., + name: Optional[str] = ..., + evalType: int = ..., + deterministic: bool = ..., + ) -> None: ... + @property + def returnType(self): ... + def __call__(self, *cols: ColumnOrName) -> Column: ... + def asNondeterministic(self) -> UserDefinedFunction: ... + +class UDFRegistration: + sparkSession: pyspark.sql.session.SparkSession + def __init__(self, sparkSession: pyspark.sql.session.SparkSession) -> None: ... + def register( + self, + name: str, + f: Callable[..., Any], + returnType: Optional[DataTypeOrString] = ..., + ): ... + def registerJavaFunction( + self, + name: str, + javaClassName: str, + returnType: Optional[DataTypeOrString] = ..., + ) -> None: ... + def registerJavaUDAF(self, name: str, javaClassName: str) -> None: ... diff --git a/python/pyspark/sql/utils.pyi b/python/pyspark/sql/utils.pyi new file mode 100644 index 0000000000000..c11e4bed54e7f --- /dev/null +++ b/python/pyspark/sql/utils.pyi @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from pyspark import SparkContext as SparkContext # noqa: F401 +from typing import Any, Optional + +class CapturedException(Exception): + desc: Any = ... + stackTrace: Any = ... + cause: Any = ... + def __init__( + self, desc: Any, stackTrace: Any, cause: Optional[Any] = ... + ) -> None: ... + +class AnalysisException(CapturedException): ... +class ParseException(CapturedException): ... +class IllegalArgumentException(CapturedException): ... +class StreamingQueryException(CapturedException): ... +class QueryExecutionException(CapturedException): ... +class PythonException(CapturedException): ... +class UnknownException(CapturedException): ... + +def convert_exception(e: Any): ... +def capture_sql_exception(f: Any): ... +def install_exception_handler() -> None: ... +def toJArray(gateway: Any, jtype: Any, arr: Any): ... +def require_test_compiled() -> None: ... + +class ForeachBatchFunction: + sql_ctx: Any = ... + func: Any = ... + def __init__(self, sql_ctx: Any, func: Any) -> None: ... + error: Any = ... + def call(self, jdf: Any, batch_id: Any) -> None: ... + class Java: + implements: Any = ... + +def to_str(value: Any): ... diff --git a/python/pyspark/sql/window.pyi b/python/pyspark/sql/window.pyi new file mode 100644 index 0000000000000..4e31d57bec4d0 --- /dev/null +++ b/python/pyspark/sql/window.pyi @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql._typing import ColumnOrName +from py4j.java_gateway import JavaObject # type: ignore[import] + +class Window: + unboundedPreceding: int + unboundedFollowing: int + currentRow: int + @staticmethod + def partitionBy(*cols: ColumnOrName) -> WindowSpec: ... + @staticmethod + def orderBy(*cols: ColumnOrName) -> WindowSpec: ... + @staticmethod + def rowsBetween(start: int, end: int) -> WindowSpec: ... + @staticmethod + def rangeBetween(start: int, end: int) -> WindowSpec: ... + +class WindowSpec: + def __init__(self, jspec: JavaObject) -> None: ... + def partitionBy(self, *cols: ColumnOrName) -> WindowSpec: ... + def orderBy(self, *cols: ColumnOrName) -> WindowSpec: ... + def rowsBetween(self, start: int, end: int) -> WindowSpec: ... + def rangeBetween(self, start: int, end: int) -> WindowSpec: ... diff --git a/python/pyspark/statcounter.pyi b/python/pyspark/statcounter.pyi new file mode 100644 index 0000000000000..38e5970501527 --- /dev/null +++ b/python/pyspark/statcounter.pyi @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Iterable, Optional, Union + +maximum: Any +minimum: Any +sqrt: Any + +class StatCounter: + n: int + mu: float + m2: float + maxValue: float + minValue: float + def __init__(self, values: Optional[Iterable[float]] = ...) -> None: ... + def merge(self, value: float) -> StatCounter: ... + def mergeStats(self, other: StatCounter) -> StatCounter: ... + def copy(self) -> StatCounter: ... + def count(self) -> int: ... + def mean(self) -> float: ... + def sum(self) -> float: ... + def min(self) -> float: ... + def max(self) -> float: ... + def variance(self) -> float: ... + def sampleVariance(self) -> float: ... + def stdev(self) -> float: ... + def sampleStdev(self) -> float: ... + def asDict(self, sample: bool = ...) -> Dict[str, Union[float, int]]: ... diff --git a/python/pyspark/status.pyi b/python/pyspark/status.pyi new file mode 100644 index 0000000000000..0558e245f49cc --- /dev/null +++ b/python/pyspark/status.pyi @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, NamedTuple, Optional +from py4j.java_gateway import JavaArray, JavaObject # type: ignore[import] + +class SparkJobInfo(NamedTuple): + jobId: int + stageIds: JavaArray + status: str + +class SparkStageInfo(NamedTuple): + stageId: int + currentAttemptId: int + name: str + numTasks: int + numActiveTasks: int + numCompletedTasks: int + numFailedTasks: int + +class StatusTracker: + def __init__(self, jtracker: JavaObject) -> None: ... + def getJobIdsForGroup(self, jobGroup: Optional[str] = ...) -> List[int]: ... + def getActiveStageIds(self) -> List[int]: ... + def getActiveJobsIds(self) -> List[int]: ... + def getJobInfo(self, jobId: int) -> SparkJobInfo: ... + def getStageInfo(self, stageId: int) -> SparkStageInfo: ... diff --git a/python/pyspark/storagelevel.pyi b/python/pyspark/storagelevel.pyi new file mode 100644 index 0000000000000..2eb05850bae78 --- /dev/null +++ b/python/pyspark/storagelevel.pyi @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ClassVar + +class StorageLevel: + DISK_ONLY: ClassVar[StorageLevel] + DISK_ONLY_2: ClassVar[StorageLevel] + MEMORY_ONLY: ClassVar[StorageLevel] + MEMORY_ONLY_2: ClassVar[StorageLevel] + DISK_ONLY_3: ClassVar[StorageLevel] + MEMORY_AND_DISK: ClassVar[StorageLevel] + MEMORY_AND_DISK_2: ClassVar[StorageLevel] + OFF_HEAP: ClassVar[StorageLevel] + + useDisk: bool + useMemory: bool + useOffHeap: bool + deserialized: bool + replication: int + def __init__( + self, + useDisk: bool, + useMemory: bool, + useOffHeap: bool, + deserialized: bool, + replication: int = ..., + ) -> None: ... diff --git a/python/pyspark/streaming/__init__.pyi b/python/pyspark/streaming/__init__.pyi new file mode 100644 index 0000000000000..281c06e51cc60 --- /dev/null +++ b/python/pyspark/streaming/__init__.pyi @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.streaming.context import StreamingContext as StreamingContext # noqa: F401 +from pyspark.streaming.dstream import DStream as DStream # noqa: F401 +from pyspark.streaming.listener import ( # noqa: F401 + StreamingListener as StreamingListener, +) diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi new file mode 100644 index 0000000000000..f4b3dad38f1fb --- /dev/null +++ b/python/pyspark/streaming/context.pyi @@ -0,0 +1,75 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Callable, List, Optional, TypeVar, Union + +from py4j.java_gateway import JavaObject # type: ignore[import] + +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.storagelevel import StorageLevel +from pyspark.streaming.dstream import DStream +from pyspark.streaming.listener import StreamingListener + +T = TypeVar("T") + +class StreamingContext: + def __init__( + self, + sparkContext: SparkContext, + batchDuration: Union[float, int] = ..., + jssc: Optional[JavaObject] = ..., + ) -> None: ... + @classmethod + def getOrCreate( + cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext] + ) -> StreamingContext: ... + @classmethod + def getActive(cls) -> StreamingContext: ... + @classmethod + def getActiveOrCreate( + cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext] + ) -> StreamingContext: ... + @property + def sparkContext(self) -> SparkContext: ... + def start(self) -> None: ... + def awaitTermination(self, timeout: Optional[int] = ...) -> None: ... + def awaitTerminationOrTimeout(self, timeout: int) -> None: ... + def stop( + self, stopSparkContext: bool = ..., stopGraceFully: bool = ... + ) -> None: ... + def remember(self, duration: int) -> None: ... + def checkpoint(self, directory: str) -> None: ... + def socketTextStream( + self, hostname: str, port: int, storageLevel: StorageLevel = ... + ) -> DStream[str]: ... + def textFileStream(self, directory: str) -> DStream[str]: ... + def binaryRecordsStream( + self, directory: str, recordLength: int + ) -> DStream[bytes]: ... + def queueStream( + self, + rdds: List[RDD[T]], + oneAtATime: bool = ..., + default: Optional[RDD[T]] = ..., + ) -> DStream[T]: ... + def transform( + self, dstreams: List[DStream[Any]], transformFunc: Callable[..., RDD[T]] + ) -> DStream[T]: ... + def union(self, *dstreams: DStream[T]) -> DStream[T]: ... + def addStreamingListener(self, streamingListener: StreamingListener) -> None: ... diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi new file mode 100644 index 0000000000000..bbeea69ee9ac2 --- /dev/null +++ b/python/pyspark/streaming/dstream.pyi @@ -0,0 +1,208 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import overload +from typing import ( + Callable, + Generic, + Hashable, + Iterable, + List, + Optional, + Tuple, + TypeVar, + Union, +) +import datetime +from pyspark.rdd import RDD +from pyspark.storagelevel import StorageLevel +import pyspark.streaming.context + +S = TypeVar("S") +T = TypeVar("T") +U = TypeVar("U") +K = TypeVar("K", bound=Hashable) +V = TypeVar("V") + +class DStream(Generic[T]): + is_cached: bool + is_checkpointed: bool + def __init__(self, jdstream, ssc, jrdd_deserializer) -> None: ... + def context(self) -> pyspark.streaming.context.StreamingContext: ... + def count(self) -> DStream[int]: ... + def filter(self, f: Callable[[T], bool]) -> DStream[T]: ... + def flatMap( + self: DStream[T], + f: Callable[[T], Iterable[U]], + preservesPartitioning: bool = ..., + ) -> DStream[U]: ... + def map( + self: DStream[T], f: Callable[[T], U], preservesPartitioning: bool = ... + ) -> DStream[U]: ... + def mapPartitions( + self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ... + ) -> DStream[U]: ... + def mapPartitionsWithIndex( + self, + f: Callable[[int, Iterable[T]], Iterable[U]], + preservesPartitioning: bool = ..., + ) -> DStream[U]: ... + def reduce(self, func: Callable[[T, T], T]) -> DStream[T]: ... + def reduceByKey( + self: DStream[Tuple[K, V]], + func: Callable[[V, V], V], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, V]]: ... + def combineByKey( + self: DStream[Tuple[K, V]], + createCombiner: Callable[[V], U], + mergeValue: Callable[[U, V], U], + mergeCombiners: Callable[[U, U], U], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, U]]: ... + def partitionBy( + self: DStream[Tuple[K, V]], + numPartitions: int, + partitionFunc: Callable[[K], int] = ..., + ) -> DStream[Tuple[K, V]]: ... + @overload + def foreachRDD(self, func: Callable[[RDD[T]], None]) -> None: ... + @overload + def foreachRDD(self, func: Callable[[datetime.datetime, RDD[T]], None]) -> None: ... + def pprint(self, num: int = ...) -> None: ... + def mapValues( + self: DStream[Tuple[K, V]], f: Callable[[V], U] + ) -> DStream[Tuple[K, U]]: ... + def flatMapValues( + self: DStream[Tuple[K, V]], f: Callable[[V], Iterable[U]] + ) -> DStream[Tuple[K, U]]: ... + def glom(self) -> DStream[List[T]]: ... + def cache(self) -> DStream[T]: ... + def persist(self, storageLevel: StorageLevel) -> DStream[T]: ... + def checkpoint(self, interval: Union[float, int]) -> DStream[T]: ... + def groupByKey( + self: DStream[Tuple[K, V]], numPartitions: Optional[int] = ... + ) -> DStream[Tuple[K, Iterable[V]]]: ... + def countByValue(self) -> DStream[Tuple[T, int]]: ... + def saveAsTextFiles(self, prefix: str, suffix: Optional[str] = ...) -> None: ... + @overload + def transform(self, func: Callable[[RDD[T]], RDD[U]]) -> TransformedDStream[U]: ... + @overload + def transform( + self, func: Callable[[datetime.datetime, RDD[T]], RDD[U]] + ) -> TransformedDStream[U]: ... + @overload + def transformWith( + self, + func: Callable[[RDD[T], RDD[U]], RDD[V]], + other: RDD[U], + keepSerializer: bool = ..., + ) -> DStream[V]: ... + @overload + def transformWith( + self, + func: Callable[[datetime.datetime, RDD[T], RDD[U]], RDD[V]], + other: RDD[U], + keepSerializer: bool = ..., + ) -> DStream[V]: ... + def repartition(self, numPartitions: int) -> DStream[T]: ... + def union(self, other: DStream[U]) -> DStream[Union[T, U]]: ... + def cogroup( + self: DStream[Tuple[K, V]], + other: DStream[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Tuple[List[V], List[U]]]]: ... + def join( + self: DStream[Tuple[K, V]], + other: DStream[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Tuple[V, U]]]: ... + def leftOuterJoin( + self: DStream[Tuple[K, V]], + other: DStream[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Tuple[V, Optional[U]]]]: ... + def rightOuterJoin( + self: DStream[Tuple[K, V]], + other: DStream[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Tuple[Optional[V], U]]]: ... + def fullOuterJoin( + self: DStream[Tuple[K, V]], + other: DStream[Tuple[K, U]], + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... + def slice( + self, begin: Union[datetime.datetime, int], end: Union[datetime.datetime, int] + ) -> List[RDD[T]]: ... + def window( + self, windowDuration: int, slideDuration: Optional[int] = ... + ) -> DStream[T]: ... + def reduceByWindow( + self, + reduceFunc: Callable[[T, T], T], + invReduceFunc: Optional[Callable[[T, T], T]], + windowDuration: int, + slideDuration: int, + ) -> DStream[T]: ... + def countByWindow( + self, windowDuration: int, slideDuration: int + ) -> DStream[Tuple[T, int]]: ... + def countByValueAndWindow( + self, + windowDuration: int, + slideDuration: int, + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[T, int]]: ... + def groupByKeyAndWindow( + self: DStream[Tuple[K, V]], + windowDuration: int, + slideDuration: int, + numPartitions: Optional[int] = ..., + ) -> DStream[Tuple[K, Iterable[V]]]: ... + def reduceByKeyAndWindow( + self: DStream[Tuple[K, V]], + func: Callable[[V, V], V], + invFunc: Optional[Callable[[V, V], V]], + windowDuration: int, + slideDuration: Optional[int] = ..., + numPartitions: Optional[int] = ..., + filterFunc: Optional[Callable[[Tuple[K, V]], bool]] = ..., + ) -> DStream[Tuple[K, V]]: ... + def updateStateByKey( + self: DStream[Tuple[K, V]], + updateFunc: Callable[[Iterable[V], Optional[S]], S], + numPartitions: Optional[int] = ..., + initialRDD: Optional[RDD[Tuple[K, S]]] = ..., + ) -> DStream[Tuple[K, S]]: ... + +class TransformedDStream(DStream[U]): + is_cached: bool + is_checkpointed: bool + func: Callable + prev: DStream + @overload + def __init__( + self: DStream[U], prev: DStream[T], func: Callable[[RDD[T]], RDD[U]] + ) -> None: ... + @overload + def __init__( + self: DStream[U], + prev: DStream[T], + func: Callable[[datetime.datetime, RDD[T]], RDD[U]], + ) -> None: ... diff --git a/python/pyspark/streaming/kinesis.pyi b/python/pyspark/streaming/kinesis.pyi new file mode 100644 index 0000000000000..246fa58ca6da3 --- /dev/null +++ b/python/pyspark/streaming/kinesis.pyi @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any, Optional + +def utf8_decoder(s): ... + +class KinesisUtils: + @staticmethod + def createStream( + ssc, + kinesisAppName, + streamName, + endpointUrl, + regionName, + initialPositionInStream, + checkpointInterval, + storageLevel: Any = ..., + awsAccessKeyId: Optional[Any] = ..., + awsSecretKey: Optional[Any] = ..., + decoder: Any = ..., + stsAssumeRoleArn: Optional[Any] = ..., + stsSessionName: Optional[Any] = ..., + stsExternalId: Optional[Any] = ..., + ): ... + +class InitialPositionInStream: + LATEST: Any + TRIM_HORIZON: Any diff --git a/python/pyspark/streaming/listener.pyi b/python/pyspark/streaming/listener.pyi new file mode 100644 index 0000000000000..4033529607cea --- /dev/null +++ b/python/pyspark/streaming/listener.pyi @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any + +class StreamingListener: + def __init__(self) -> None: ... + def onStreamingStarted(self, streamingStarted: Any) -> None: ... + def onReceiverStarted(self, receiverStarted: Any) -> None: ... + def onReceiverError(self, receiverError: Any) -> None: ... + def onReceiverStopped(self, receiverStopped: Any) -> None: ... + def onBatchSubmitted(self, batchSubmitted: Any) -> None: ... + def onBatchStarted(self, batchStarted: Any) -> None: ... + def onBatchCompleted(self, batchCompleted: Any) -> None: ... + def onOutputOperationStarted(self, outputOperationStarted: Any) -> None: ... + def onOutputOperationCompleted(self, outputOperationCompleted: Any) -> None: ... + class Java: + implements: Any = ... diff --git a/python/pyspark/streaming/tests/test_context.py b/python/pyspark/streaming/tests/test_context.py index 26f1d24f644ea..b255796cdcdd7 100644 --- a/python/pyspark/streaming/tests/test_context.py +++ b/python/pyspark/streaming/tests/test_context.py @@ -178,7 +178,7 @@ def test_await_termination_or_timeout(self): from pyspark.streaming.tests.test_context import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py index 00d00b50c9283..ea5353c77b6b2 100644 --- a/python/pyspark/streaming/tests/test_dstream.py +++ b/python/pyspark/streaming/tests/test_dstream.py @@ -647,7 +647,7 @@ def check_output(n): from pyspark.streaming.tests.test_dstream import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/streaming/tests/test_kinesis.py b/python/pyspark/streaming/tests/test_kinesis.py index b39809e2f69c2..70c9a012e7a03 100644 --- a/python/pyspark/streaming/tests/test_kinesis.py +++ b/python/pyspark/streaming/tests/test_kinesis.py @@ -83,7 +83,7 @@ def get_output(_, rdd): from pyspark.streaming.tests.test_kinesis import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/streaming/tests/test_listener.py b/python/pyspark/streaming/tests/test_listener.py index 3970cf6589394..e4dab1bba3a6c 100644 --- a/python/pyspark/streaming/tests/test_listener.py +++ b/python/pyspark/streaming/tests/test_listener.py @@ -152,7 +152,7 @@ def func(dstream): from pyspark.streaming.tests.test_listener import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/streaming/util.pyi b/python/pyspark/streaming/util.pyi new file mode 100644 index 0000000000000..d552eb15f4818 --- /dev/null +++ b/python/pyspark/streaming/util.pyi @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any, Optional + +class TransformFunction: + ctx: Any + func: Any + deserializers: Any + rdd_wrap_func: Any + failure: Any + def __init__(self, ctx, func, *deserializers) -> None: ... + def rdd_wrapper(self, func): ... + def call(self, milliseconds, jrdds): ... + def getLastFailure(self): ... + class Java: + implements: Any + +class TransformFunctionSerializer: + ctx: Any + serializer: Any + gateway: Any + failure: Any + def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ... + def dumps(self, id): ... + def loads(self, data): ... + def getLastFailure(self): ... + class Java: + implements: Any + +def rddToFileName(prefix, suffix, timestamp): ... diff --git a/python/pyspark/taskcontext.pyi b/python/pyspark/taskcontext.pyi new file mode 100644 index 0000000000000..3415c69f02177 --- /dev/null +++ b/python/pyspark/taskcontext.pyi @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict, List +from typing_extensions import Literal +from pyspark.resource.information import ResourceInformation + +class TaskContext: + def __new__(cls) -> TaskContext: ... + @classmethod + def get(cls) -> TaskContext: ... + def stageId(self) -> int: ... + def partitionId(self) -> int: ... + def attemptNumber(self) -> int: ... + def taskAttemptId(self) -> int: ... + def getLocalProperty(self, key: str) -> str: ... + def resources(self) -> Dict[str, ResourceInformation]: ... + +BARRIER_FUNCTION = Literal[1] + +class BarrierTaskContext(TaskContext): + @classmethod + def get(cls) -> BarrierTaskContext: ... + def barrier(self) -> None: ... + def allGather(self, message: str = ...) -> List[str]: ... + def getTaskInfos(self) -> List[BarrierTaskInfo]: ... + +class BarrierTaskInfo: + address: str + def __init__(self, address: str) -> None: ... diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py index a36d0709d8013..a8cf53b31f8c9 100644 --- a/python/pyspark/testing/mlutils.py +++ b/python/pyspark/testing/mlutils.py @@ -20,7 +20,7 @@ from pyspark.ml import Estimator, Model, Transformer, UnaryTransformer from pyspark.ml.param import Param, Params, TypeConverters from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable -from pyspark.ml.wrapper import _java2py +from pyspark.ml.wrapper import _java2py # type: ignore from pyspark.sql import DataFrame, SparkSession from pyspark.sql.types import DoubleType from pyspark.testing.utils import ReusedPySparkTestCase as PySparkTestCase @@ -116,7 +116,8 @@ def _transform(self, dataset): class MockUnaryTransformer(UnaryTransformer, DefaultParamsReadable, DefaultParamsWritable): - shift = Param(Params._dummy(), "shift", "The amount by which to shift " + + shift = Param(Params._dummy(), # type: ignore + "shift", "The amount by which to shift " + "data in a DataFrame", typeConverter=TypeConverters.toFloat) diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index e85cae7dda2c6..a394e8eecc69e 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -147,7 +147,7 @@ class PythonOnlyPoint(ExamplePoint): """ An example class to demonstrate UDT in only Python """ - __UDT__ = PythonOnlyUDT() + __UDT__ = PythonOnlyUDT() # type: ignore class MyObject(object): diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py index a6abc2ef673b7..f6a317e97331c 100644 --- a/python/pyspark/testing/streamingutils.py +++ b/python/pyspark/testing/streamingutils.py @@ -37,7 +37,7 @@ "spark-streaming-kinesis-asl-assembly-", "spark-streaming-kinesis-asl-assembly_") if kinesis_asl_assembly_jar is None: - kinesis_requirement_message = ( + kinesis_requirement_message = ( # type: ignore "Skipping all Kinesis Python tests as the optional Kinesis project was " "not compiled into a JAR. To run these tests, " "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package " @@ -47,7 +47,7 @@ existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell") jars_args = "--jars %s" % kinesis_asl_assembly_jar os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args]) - kinesis_requirement_message = None + kinesis_requirement_message = None # type: ignore should_test_kinesis = kinesis_requirement_message is None diff --git a/python/pyspark/tests/test_appsubmit.py b/python/pyspark/tests/test_appsubmit.py index 15170b878eb22..3f45bf039d3a9 100644 --- a/python/pyspark/tests/test_appsubmit.py +++ b/python/pyspark/tests/test_appsubmit.py @@ -241,7 +241,7 @@ def test_user_configuration(self): from pyspark.tests.test_appsubmit import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_broadcast.py b/python/pyspark/tests/test_broadcast.py index 543dc98660fde..c35c5a68e4986 100644 --- a/python/pyspark/tests/test_broadcast.py +++ b/python/pyspark/tests/test_broadcast.py @@ -148,7 +148,7 @@ def random_bytes(n): from pyspark.tests.test_broadcast import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_conf.py b/python/pyspark/tests/test_conf.py index 3e80c17f4931c..a8d65b8919777 100644 --- a/python/pyspark/tests/test_conf.py +++ b/python/pyspark/tests/test_conf.py @@ -36,7 +36,7 @@ def test_memory_conf(self): from pyspark.tests.test_conf import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py index 9f159f7703950..9b6b74a111288 100644 --- a/python/pyspark/tests/test_context.py +++ b/python/pyspark/tests/test_context.py @@ -93,7 +93,7 @@ def test_add_py_file(self): # this job fails due to `userlibrary` not being on the Python path: # disable logging in log4j temporarily def func(x): - from userlibrary import UserClass + from userlibrary import UserClass # type: ignore return UserClass().hello() with QuietTest(self.sc): self.assertRaises(Exception, self.sc.parallelize(range(2)).map(func).first) @@ -137,7 +137,8 @@ def test_add_egg_file_locally(self): # To ensure that we're actually testing addPyFile's effects, check that # this fails due to `userlibrary` not being on the Python path: def func(): - from userlib import UserClass # noqa: F401 + from userlib import UserClass # type: ignore[import] + UserClass() self.assertRaises(ImportError, func) path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1.zip") self.sc.addPyFile(path) @@ -147,11 +148,11 @@ def func(): def test_overwrite_system_module(self): self.sc.addPyFile(os.path.join(SPARK_HOME, "python/test_support/SimpleHTTPServer.py")) - import SimpleHTTPServer + import SimpleHTTPServer # type: ignore[import] self.assertEqual("My Server", SimpleHTTPServer.__name__) def func(x): - import SimpleHTTPServer + import SimpleHTTPServer # type: ignore[import] return SimpleHTTPServer.__name__ self.assertEqual(["My Server"], self.sc.parallelize(range(1)).map(func).collect()) @@ -321,7 +322,7 @@ def tearDown(self): from pyspark.tests.test_context import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_daemon.py b/python/pyspark/tests/test_daemon.py index b1f8c71c77ba9..c3fd89fef72c2 100644 --- a/python/pyspark/tests/test_daemon.py +++ b/python/pyspark/tests/test_daemon.py @@ -76,7 +76,7 @@ def test_termination_sigterm(self): from pyspark.tests.test_daemon import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_join.py b/python/pyspark/tests/test_join.py index 815c78ef9a8e2..63dd1cfef9a6a 100644 --- a/python/pyspark/tests/test_join.py +++ b/python/pyspark/tests/test_join.py @@ -62,7 +62,7 @@ def test_narrow_dependency_in_join(self): from pyspark.tests.test_join import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_pin_thread.py b/python/pyspark/tests/test_pin_thread.py index efe7d7f6639b1..b612796c963a0 100644 --- a/python/pyspark/tests/test_pin_thread.py +++ b/python/pyspark/tests/test_pin_thread.py @@ -169,7 +169,7 @@ def get_outer_local_prop(): from pyspark.tests.test_pin_thread import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py index ca144cc6e1eb6..de72a547b0844 100644 --- a/python/pyspark/tests/test_profiler.py +++ b/python/pyspark/tests/test_profiler.py @@ -101,7 +101,7 @@ def test_profiler_disabled(self): from pyspark.tests.test_profiler import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py index c154bda00d605..47b8f10a5b05e 100644 --- a/python/pyspark/tests/test_rdd.py +++ b/python/pyspark/tests/test_rdd.py @@ -884,7 +884,7 @@ def run_job(job_group, index): from pyspark.tests.test_rdd import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_rddbarrier.py b/python/pyspark/tests/test_rddbarrier.py index f0a05a23cc4e0..ba2c4b9ba84d4 100644 --- a/python/pyspark/tests/test_rddbarrier.py +++ b/python/pyspark/tests/test_rddbarrier.py @@ -43,7 +43,7 @@ def f(index, iterator): from pyspark.tests.test_rddbarrier import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_readwrite.py b/python/pyspark/tests/test_readwrite.py index adbc343c650a7..145b53a5eaaa1 100644 --- a/python/pyspark/tests/test_readwrite.py +++ b/python/pyspark/tests/test_readwrite.py @@ -307,7 +307,7 @@ def test_malformed_RDD(self): from pyspark.tests.test_readwrite import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py index 8eaa9c7d5a8d2..cc7838e595b8a 100644 --- a/python/pyspark/tests/test_serializers.py +++ b/python/pyspark/tests/test_serializers.py @@ -87,7 +87,7 @@ def __getattr__(self, item): def test_pickling_file_handles(self): # to be corrected with SPARK-11160 try: - import xmlrunner # noqa: F401 + import xmlrunner # type: ignore[import] # noqa: F401 except ImportError: ser = CloudPickleSerializer() out1 = sys.stderr @@ -227,7 +227,7 @@ def test_chunked_stream(self): from pyspark.tests.test_serializers import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_shuffle.py b/python/pyspark/tests/test_shuffle.py index 061b93f32c56c..6a245a26b4551 100644 --- a/python/pyspark/tests/test_shuffle.py +++ b/python/pyspark/tests/test_shuffle.py @@ -170,7 +170,7 @@ def test_external_sort_in_rdd(self): from pyspark.tests.test_shuffle import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py index f5be685643dd5..f0e6672957c13 100644 --- a/python/pyspark/tests/test_taskcontext.py +++ b/python/pyspark/tests/test_taskcontext.py @@ -324,7 +324,7 @@ def tearDown(self): from pyspark.tests.test_taskcontext import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_util.py b/python/pyspark/tests/test_util.py index e853bc322c184..a25c41b296944 100644 --- a/python/pyspark/tests/test_util.py +++ b/python/pyspark/tests/test_util.py @@ -77,7 +77,7 @@ def test_parsing_version_string(self): from pyspark.tests.test_util import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py index a855eaafc1927..bfaf3a3186cad 100644 --- a/python/pyspark/tests/test_worker.py +++ b/python/pyspark/tests/test_worker.py @@ -202,7 +202,7 @@ def tearDown(self): from pyspark.tests.test_worker import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None diff --git a/python/pyspark/traceback_utils.pyi b/python/pyspark/traceback_utils.pyi new file mode 100644 index 0000000000000..33b1b7dc3227f --- /dev/null +++ b/python/pyspark/traceback_utils.pyi @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections import namedtuple +from typing import Any + +CallSite = namedtuple("CallSite", "function file linenum") + +def first_spark_call(): ... + +class SCCallSiteSync: + def __init__(self, sc: Any) -> None: ... + def __enter__(self) -> None: ... + def __exit__(self, type: Any, value: Any, tb: Any) -> None: ... diff --git a/python/pyspark/util.pyi b/python/pyspark/util.pyi new file mode 100644 index 0000000000000..023b409831459 --- /dev/null +++ b/python/pyspark/util.pyi @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from typing import Any, Tuple +from pyspark._typing import F + +import threading + +def print_exec(stream: Any) -> None: ... + +class VersionUtils: + @staticmethod + def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ... + +def fail_on_stopiteration(f: F) -> F: ... + +class InheritableThread(threading.Thread): + def __init__(self, target: Any, *args: Any, **kwargs: Any): ... + def __del__(self) -> None: ... diff --git a/python/pyspark/version.pyi b/python/pyspark/version.pyi new file mode 100644 index 0000000000000..444dae62f0c09 --- /dev/null +++ b/python/pyspark/version.pyi @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +__version__: str diff --git a/python/pyspark/worker.pyi b/python/pyspark/worker.pyi new file mode 100644 index 0000000000000..cc264823cc867 --- /dev/null +++ b/python/pyspark/worker.pyi @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark import shuffle as shuffle +from pyspark.broadcast import Broadcast as Broadcast +from pyspark.files import SparkFiles as SparkFiles +from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth +from pyspark.rdd import PythonEvalType as PythonEvalType +from pyspark.resource import ResourceInformation as ResourceInformation +from pyspark.serializers import ( + BatchedSerializer as BatchedSerializer, + PickleSerializer as PickleSerializer, + SpecialLengths as SpecialLengths, + UTF8Deserializer as UTF8Deserializer, + read_bool as read_bool, + read_int as read_int, + read_long as read_long, + write_int as write_int, + write_long as write_long, + write_with_length as write_with_length, +) +from pyspark.sql.pandas.serializers import ( + ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer, + CogroupUDFSerializer as CogroupUDFSerializer, +) +from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type +from pyspark.sql.types import StructType as StructType +from pyspark.taskcontext import ( + BarrierTaskContext as BarrierTaskContext, + TaskContext as TaskContext, +) +from pyspark.util import fail_on_stopiteration as fail_on_stopiteration +from typing import Any + +has_resource_module: bool +pickleSer: Any +utf8_deserializer: Any + +def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ... +def add_path(path: Any) -> None: ... +def read_command(serializer: Any, file: Any): ... +def chain(f: Any, g: Any): ... +def wrap_udf(f: Any, return_type: Any): ... +def wrap_scalar_pandas_udf(f: Any, return_type: Any): ... +def wrap_pandas_iter_udf(f: Any, return_type: Any): ... +def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... +def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... +def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ... +def wrap_window_agg_pandas_udf( + f: Any, return_type: Any, runner_conf: Any, udf_index: Any +): ... +def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ... +def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ... +def read_single_udf( + pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any +): ... +def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ... +def main(infile: Any, outfile: Any) -> None: ... diff --git a/python/setup.py b/python/setup.py index 7c12b112acd65..206765389335f 100755 --- a/python/setup.py +++ b/python/setup.py @@ -265,7 +265,8 @@ def run(self): 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy'], + 'Programming Language :: Python :: Implementation :: PyPy', + 'Typing :: Typed'], cmdclass={ 'install': InstallCommand, }, From 688d016c7acc4b9d96d75b40123be9f40b7b2693 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 24 Sep 2020 14:49:58 +0900 Subject: [PATCH 0101/1009] [SPARK-32982][BUILD] Remove hive-1.2 profiles in PIP installation option ### What changes were proposed in this pull request? This PR removes Hive 1.2 option (and therefore `HIVE_VERSION` environment variable as well). ### Why are the changes needed? Hive 1.2 is a fork version. We shouldn't promote users to use. ### Does this PR introduce _any_ user-facing change? Nope, `HIVE_VERSION` and Hive 1.2 are removed but this is new experimental feature in master only. ### How was this patch tested? Manually tested: ```bash SPARK_VERSION=3.0.1 HADOOP_VERSION=3.2 pip install pyspark-3.1.0.dev0.tar.gz -v SPARK_VERSION=3.0.1 HADOOP_VERSION=2.7 pip install pyspark-3.1.0.dev0.tar.gz -v SPARK_VERSION=3.0.1 HADOOP_VERSION=invalid pip install pyspark-3.1.0.dev0.tar.gz -v ``` Closes #29858 from HyukjinKwon/SPARK-32981. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/create-release/release-build.sh | 2 +- .../docs/source/getting_started/install.rst | 24 +++++-------------- python/pyspark/install.py | 16 ++++--------- python/pyspark/tests/test_install_spark.py | 13 ++++------ python/setup.py | 2 ++ 5 files changed, 18 insertions(+), 39 deletions(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c47469a2f6d95..c7fee13d39c6b 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -276,7 +276,7 @@ if [[ "$1" == "package" ]]; then # list of packages to be built, so it's ok for things to be missing in BINARY_PKGS_EXTRA. # NOTE: Don't forget to update the valid combinations of distributions at - # 'python/pyspark.install.py' and 'python/docs/source/getting_started/installation.rst' + # 'python/pyspark/install.py' and 'python/docs/source/getting_started/install.rst' # if you're changing them. declare -A BINARY_PKGS_ARGS BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES" diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index d915e9c7349d4..4039698d39958 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -48,40 +48,28 @@ If you want to install extra dependencies for a specific componenet, you can ins pip install pyspark[sql] -For PySpark with different Hadoop and/or Hive, you can install it by using ``HIVE_VERSION`` and ``HADOOP_VERSION`` environment variables as below: +For PySpark with a different Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below: .. code-block:: bash - HIVE_VERSION=2.3 pip install pyspark HADOOP_VERSION=2.7 pip install pyspark - HIVE_VERSION=1.2 HADOOP_VERSION=2.7 pip install pyspark -The default distribution has built-in Hadoop 3.2 and Hive 2.3. If users specify different versions, the pip installation automatically +The default distribution uses Hadoop 3.2 and Hive 2.3. If users specify different versions of Hadoop, the pip installation automatically downloads a different version and use it in PySpark. Downloading it can take a while depending on -the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror -for faster downloading. +the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror for faster downloading. .. code-block:: bash PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org HADOOP_VERSION=2.7 pip install -It is recommended to use `-v` option in `pip` to track the installation and download status. +It is recommended to use ``-v`` option in ``pip`` to track the installation and download status. .. code-block:: bash HADOOP_VERSION=2.7 pip install pyspark -v -Supported versions are as below: - -====================================== ====================================== ====================================== -``HADOOP_VERSION`` \\ ``HIVE_VERSION`` 1.2 2.3 (default) -====================================== ====================================== ====================================== -**2.7** O O -**3.2 (default)** X O -**without** X O -====================================== ====================================== ====================================== - -Note that this installation of PySpark with different versions of Hadoop and Hive is experimental. It can change or be removed between minor releases. +Supported versions of Hadoop are ``HADOOP_VERSION=2.7`` and ``HADOOP_VERSION=3.2`` (default). +Note that this installation of PySpark with a different version of Hadoop is experimental. It can change or be removed between minor releases. Using Conda diff --git a/python/pyspark/install.py b/python/pyspark/install.py index 89573577cd994..84dd2c9964563 100644 --- a/python/pyspark/install.py +++ b/python/pyspark/install.py @@ -26,18 +26,13 @@ DEFAULT_HADOOP = "hadoop3.2" DEFAULT_HIVE = "hive2.3" SUPPORTED_HADOOP_VERSIONS = ["hadoop2.7", "hadoop3.2", "without-hadoop"] -SUPPORTED_HIVE_VERSIONS = ["hive1.2", "hive2.3"] +SUPPORTED_HIVE_VERSIONS = ["hive2.3"] UNSUPPORTED_COMBINATIONS = [ - ("without-hadoop", "hive1.2"), - ("hadoop3.2", "hive1.2"), ] def checked_package_name(spark_version, hadoop_version, hive_version): - if hive_version == "hive1.2": - return "%s-bin-%s-%s" % (spark_version, hadoop_version, hive_version) - else: - return "%s-bin-%s" % (spark_version, hadoop_version) + return "%s-bin-%s" % (spark_version, hadoop_version) def checked_versions(spark_version, hadoop_version, hive_version): @@ -48,7 +43,7 @@ def checked_versions(spark_version, hadoop_version, hive_version): :param hadoop_version: Hadoop version. It should be X.X such as '2.7' or 'hadoop2.7'. 'without' and 'without-hadoop' are supported as special keywords for Hadoop free distribution. - :param hive_version: Hive version. It should be X.X such as '1.2' or 'hive1.2'. + :param hive_version: Hive version. It should be X.X such as '2.3' or 'hive2.3'. :return it returns fully-qualified versions of Spark, Hadoop and Hive in a tuple. For example, spark-3.0.0, hadoop3.2 and hive2.3. @@ -80,9 +75,6 @@ def checked_versions(spark_version, hadoop_version, hive_version): "one of [%s]" % (hive_version, ", ".join( SUPPORTED_HADOOP_VERSIONS))) - if (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS: - raise RuntimeError("Hive 1.2 should only be with Hadoop 2.7.") - return spark_version, hadoop_version, hive_version @@ -95,7 +87,7 @@ def install_spark(dest, spark_version, hadoop_version, hive_version): :param spark_version: Spark version. It should be spark-X.X.X form. :param hadoop_version: Hadoop version. It should be hadoopX.X such as 'hadoop2.7' or 'without-hadoop'. - :param hive_version: Hive version. It should be hiveX.X such as 'hive1.2'. + :param hive_version: Hive version. It should be hiveX.X such as 'hive2.3'. """ package_name = checked_package_name(spark_version, hadoop_version, hive_version) diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py index b215cf6b01317..6f9949aa8b2e0 100644 --- a/python/pyspark/tests/test_install_spark.py +++ b/python/pyspark/tests/test_install_spark.py @@ -41,9 +41,6 @@ def test_install_spark(self): self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir)) def test_package_name(self): - self.assertEqual( - "spark-3.0.0-bin-hadoop3.2-hive1.2", - checked_package_name("spark-3.0.0", "hadoop3.2", "hive1.2")) self.assertEqual( "spark-3.0.0-bin-hadoop3.2", checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3")) @@ -53,12 +50,12 @@ def test_checked_versions(self): # Positive test cases self.assertEqual( - ("spark-3.0.0", "hadoop2.7", "hive1.2"), - checked_versions("spark-3.0.0", "hadoop2.7", "hive1.2")) + ("spark-3.0.0", "hadoop2.7", "hive2.3"), + checked_versions("spark-3.0.0", "hadoop2.7", "hive2.3")) self.assertEqual( - ("spark-3.0.0", "hadoop2.7", "hive1.2"), - checked_versions("3.0.0", "2.7", "1.2")) + ("spark-3.0.0", "hadoop2.7", "hive2.3"), + checked_versions("3.0.0", "2.7", "2.3")) self.assertEqual( ("spark-2.4.1", "without-hadoop", "hive2.3"), @@ -94,7 +91,7 @@ def test_checked_versions(self): hadoop_version=DEFAULT_HADOOP, hive_version="malformed") - with self.assertRaisesRegex(RuntimeError, "Hive 1.2 should only be with Hadoop 2.7"): + with self.assertRaisesRegex(RuntimeError, "Spark distribution of hive1.2 is not supported"): checked_versions( spark_version=test_version, hadoop_version="hadoop3.2", diff --git a/python/setup.py b/python/setup.py index 206765389335f..8d9cf2ee5459a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -127,6 +127,8 @@ def run(self): if ("HADOOP_VERSION" in os.environ) or ("HIVE_VERSION" in os.environ): # Note that SPARK_VERSION environment is just a testing purpose. + # HIVE_VERSION environment variable is also internal for now in case + # we support another version of Hive in the future. spark_version, hadoop_version, hive_version = install_module.checked_versions( os.environ.get("SPARK_VERSION", VERSION).lower(), os.environ.get("HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(), From fe6d38d24356df46af7705154070cde536e5ac38 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 24 Sep 2020 08:25:24 -0700 Subject: [PATCH 0102/1009] [SPARK-32987][MESOS] Pass all `mesos` module UTs in Scala 2.13 ### What changes were proposed in this pull request? The main change of this pr is add a manual sort to `defaultConf ++ driverConf` before constructing `--conf` options to ensure options has same order in Scala 2.12 and Scala 2.13. ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: Pass GitHub 2.13 Build Action Do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl resource-managers/mesos -Pscala-2.13 -Pmesos -am mvn test -pl resource-managers/mesos -Pscala-2.13 -Pmesos ``` **Before** ``` Tests: succeeded 106, failed 1, canceled 0, ignored 0, pending 0 *** 1 TESTS FAILED *** ``` **After** ``` Tests: succeeded 107, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Closes #29865 from LuciferYang/SPARK-32987-2. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../spark/scheduler/cluster/mesos/MesosClusterScheduler.scala | 2 +- .../scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 96f6737894392..39168a5e3c7a5 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -550,7 +550,7 @@ private[spark] class MesosClusterScheduler( val driverConf = desc.conf.getAll .filter { case (key, _) => !replicatedOptionsExcludeList.contains(key) } .toMap - (defaultConf ++ driverConf).foreach { case (key, value) => + (defaultConf ++ driverConf).toSeq.sortBy(_._1).foreach { case (key, value) => options ++= Seq("--conf", s"${key}=${value}") } options.map(shellEscape) diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala index 287c235d5b047..5ff7f99aadb2f 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala @@ -593,9 +593,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi val expectedCmd = "cd spark-version*; " + "bin/spark-submit --name \"app name\" --master mesos://mesos://localhost:5050 " + "--driver-cores 1.0 --driver-memory 1000M --class Main " + - "--conf spark.executor.uri=s3a://bucket/spark-version.tgz " + "--conf \"another.conf=\\\\value\" " + "--conf \"spark.app.name=app name\" " + + "--conf spark.executor.uri=s3a://bucket/spark-version.tgz " + "../jar " + "\"--a=\\$2\" " + "--b \"x y z\"" From 4ae0f703954cbd837dd96ff453270148b327343b Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 24 Sep 2020 08:32:32 -0700 Subject: [PATCH 0103/1009] [SPARK-32954][YARN][TEST] Add jakarta.servlet-api test dependency to yarn module to avoid UTs badcase ### What changes were proposed in this pull request? When I tried to verify that the `resource-managers/yarn` module passed all UTs in Scala 2.13 , I found that there is a issue related to classpath order maybe blocked the UTs because there are more than one `servlet-api` dependency in spark now: - One is `javax.servlet:javax.servlet-api:3.10:compile` config in core/pom.xml, - The other is `jakarta.servlet:jakarta.servlet-api:4.0.3:test` cascaded by `org.glassfish.jersey.test-framework.providers` we can use `mvn dependency:tree` to check it . So when we execute `resource-managers/yarn` module test use ``` mvn clean test -pl resource-managers/yarn -Pyarn ``` or ``` mvn clean test -pl resource-managers/yarn -Pyarn -Pscala-2.13 ``` and if the position of `javax.servlet-api` in the in classpath is before `jakarta.servlet-api`, there are some cases failed in `YarnClusterSuite`, `YarnShuffleIntegrationSuite` and `YarnShuffleAuthSuite`. The failed reason as follow: ``` 20/09/18 19:14:07.486 launcher-proc-1 INFO YarnClusterDriver: Exception in thread "main" java.lang.ExceptionInInitializerError ... 20/09/18 19:14:07.486 launcher-proc-1 INFO YarnClusterDriver: Caused by: java.lang.SecurityException: class "javax.servlet.http.HttpSessionIdListener"'s signer information does not match signer information of other classes in the same package ... ``` ### Why are the changes needed? Avoid UTs error caused by classpath order . ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: Pass 2.13 Build GitHub Action and do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl resource-managers/yarn -Pyarn -Pscala-2.13 -am mvn clean test -pl resource-managers/yarn -Pyarn -Pscala-2.13 ``` ``` Tests: succeeded 136, failed 0, canceled 1, ignored 0, pending 0 All tests passed. ``` Closes #29824 from LuciferYang/yarn-tests-deps. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- resource-managers/yarn/pom.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index d081be94ba7ae..bc80769be2390 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -88,6 +88,13 @@ hadoop-client + + jakarta.servlet + jakarta.servlet-api + 4.0.3 + test + + com.google.guava From 8ccfbc114e3e8d9fc919bf05602e02a506566e31 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 24 Sep 2020 10:58:52 -0700 Subject: [PATCH 0104/1009] [SPARK-32381][CORE][SQL] Move and refactor parallel listing & non-location sensitive listing to core ### What changes were proposed in this pull request? This moves and refactors the parallel listing utilities from `InMemoryFileIndex` to Spark core so it can be reused by modules beside SQL. Along the process this also did some cleanups/refactorings: - Created a `HadoopFSUtils` class under core - Moved `InMemoryFileIndex.bulkListLeafFiles` into `HadoopFSUtils.parallelListLeafFiles`. It now depends on a `SparkContext` instead of `SparkSession` in SQL. Also added a few parameters which used to be read from `SparkSession.conf`: `ignoreMissingFiles`, `ignoreLocality`, `parallelismThreshold`, `parallelismMax ` and `filterFun` (for additional filtering support but we may be able to merge this with `filter` parameter in future). - Moved `InMemoryFileIndex.listLeafFiles` into `HadoopFSUtils.listLeafFiles` with similar changes above. ### Why are the changes needed? Currently the locality-aware parallel listing mechanism only applies to `InMemoryFileIndex`. By moving this to core, we can potentially reuse the same mechanism for other code paths as well. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Since this is mostly a refactoring, it relies on existing unit tests such as those for `InMemoryFileIndex`. Closes #29471 from sunchao/SPARK-32381. Lead-authored-by: Chao Sun Co-authored-by: Holden Karau Co-authored-by: Chao Sun Signed-off-by: Holden Karau --- .../org/apache/spark/util/HadoopFSUtils.scala | 360 ++++++++++++++++++ .../sql/execution/command/CommandUtils.scala | 2 +- .../datasources/InMemoryFileIndex.scala | 297 +-------------- 3 files changed, 376 insertions(+), 283 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala new file mode 100644 index 0000000000000..c0a135e04bac5 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.io.FileNotFoundException + +import scala.collection.mutable + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.viewfs.ViewFileSystem +import org.apache.hadoop.hdfs.DistributedFileSystem + +import org.apache.spark._ +import org.apache.spark.annotation.Private +import org.apache.spark.internal.Logging +import org.apache.spark.metrics.source.HiveCatalogMetrics + +/** + * Utility functions to simplify and speed-up file listing. + */ +private[spark] object HadoopFSUtils extends Logging { + /** + * Lists a collection of paths recursively. Picks the listing strategy adaptively depending + * on the number of paths to list. + * + * This may only be called on the driver. + * + * @param sc Spark context used to run parallel listing. + * @param paths Input paths to list + * @param hadoopConf Hadoop configuration + * @param filter Path filter used to exclude leaf files from result + * @param isRootLevel Whether the input paths are at the root level, i.e., they are the root + * paths as opposed to nested paths encountered during recursive calls of this. + * @param ignoreMissingFiles Ignore missing files that happen during recursive listing + * (e.g., due to race conditions) + * @param ignoreLocality Whether to fetch data locality info when listing leaf files. If false, + * this will return `FileStatus` without `BlockLocation` info. + * @param parallelismThreshold The threshold to enable parallelism. If the number of input paths + * is smaller than this value, this will fallback to use + * sequential listing. + * @param parallelismMax The maximum parallelism for listing. If the number of input paths is + * larger than this value, parallelism will be throttled to this value + * to avoid generating too many tasks. + * @param filterFun Optional predicate on the leaf files. Files who failed the check will be + * excluded from the results + * @return for each input path, the set of discovered files for the path + */ + def parallelListLeafFiles( + sc: SparkContext, + paths: Seq[Path], + hadoopConf: Configuration, + filter: PathFilter, + isRootLevel: Boolean, + ignoreMissingFiles: Boolean, + ignoreLocality: Boolean, + parallelismThreshold: Int, + parallelismMax: Int, + filterFun: Option[String => Boolean] = None): Seq[(Path, Seq[FileStatus])] = { + + // Short-circuits parallel listing when serial listing is likely to be faster. + if (paths.size <= parallelismThreshold) { + return paths.map { path => + val leafFiles = listLeafFiles( + path, + hadoopConf, + filter, + Some(sc), + ignoreMissingFiles = ignoreMissingFiles, + ignoreLocality = ignoreLocality, + isRootPath = isRootLevel, + parallelismThreshold = parallelismThreshold, + parallelismMax = parallelismMax, + filterFun = filterFun) + (path, leafFiles) + } + } + + logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." + + s" The first several paths are: ${paths.take(10).mkString(", ")}.") + HiveCatalogMetrics.incrementParallelListingJobCount(1) + + val serializableConfiguration = new SerializableConfiguration(hadoopConf) + val serializedPaths = paths.map(_.toString) + + // Set the number of parallelism to prevent following file listing from generating many tasks + // in case of large #defaultParallelism. + val numParallelism = Math.min(paths.size, parallelismMax) + + val previousJobDescription = sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) + val statusMap = try { + val description = paths.size match { + case 0 => + "Listing leaf files and directories 0 paths" + case 1 => + s"Listing leaf files and directories for 1 path:
    ${paths(0)}" + case s => + s"Listing leaf files and directories for $s paths:
    ${paths(0)}, ..." + } + sc.setJobDescription(description) + sc + .parallelize(serializedPaths, numParallelism) + .mapPartitions { pathStrings => + val hadoopConf = serializableConfiguration.value + pathStrings.map(new Path(_)).toSeq.map { path => + val leafFiles = listLeafFiles( + path = path, + hadoopConf = hadoopConf, + filter = filter, + contextOpt = None, // Can't execute parallel scans on workers + ignoreMissingFiles = ignoreMissingFiles, + ignoreLocality = ignoreLocality, + isRootPath = isRootLevel, + filterFun = filterFun, + parallelismThreshold = Int.MaxValue, + parallelismMax = 0) + (path, leafFiles) + }.iterator + }.map { case (path, statuses) => + val serializableStatuses = statuses.map { status => + // Turn FileStatus into SerializableFileStatus so we can send it back to the driver + val blockLocations = status match { + case f: LocatedFileStatus => + f.getBlockLocations.map { loc => + SerializableBlockLocation( + loc.getNames, + loc.getHosts, + loc.getOffset, + loc.getLength) + } + + case _ => + Array.empty[SerializableBlockLocation] + } + + SerializableFileStatus( + status.getPath.toString, + status.getLen, + status.isDirectory, + status.getReplication, + status.getBlockSize, + status.getModificationTime, + status.getAccessTime, + blockLocations) + } + (path.toString, serializableStatuses) + }.collect() + } finally { + sc.setJobDescription(previousJobDescription) + } + + // turn SerializableFileStatus back to Status + statusMap.map { case (path, serializableStatuses) => + val statuses = serializableStatuses.map { f => + val blockLocations = f.blockLocations.map { loc => + new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length) + } + new LocatedFileStatus( + new FileStatus( + f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime, + new Path(f.path)), + blockLocations) + } + (new Path(path), statuses) + } + } + + // scalastyle:off argcount + /** + * Lists a single filesystem path recursively. If a `SparkContext` object is specified, this + * function may launch Spark jobs to parallelize listing based on `parallelismThreshold`. + * + * If sessionOpt is None, this may be called on executors. + * + * @return all children of path that match the specified filter. + */ + private def listLeafFiles( + path: Path, + hadoopConf: Configuration, + filter: PathFilter, + contextOpt: Option[SparkContext], + ignoreMissingFiles: Boolean, + ignoreLocality: Boolean, + isRootPath: Boolean, + filterFun: Option[String => Boolean], + parallelismThreshold: Int, + parallelismMax: Int): Seq[FileStatus] = { + + logTrace(s"Listing $path") + val fs = path.getFileSystem(hadoopConf) + + // Note that statuses only include FileStatus for the files and dirs directly under path, + // and does not include anything else recursively. + val statuses: Array[FileStatus] = try { + fs match { + // DistributedFileSystem overrides listLocatedStatus to make 1 single call to namenode + // to retrieve the file status with the file block location. The reason to still fallback + // to listStatus is because the default implementation would potentially throw a + // FileNotFoundException which is better handled by doing the lookups manually below. + case (_: DistributedFileSystem | _: ViewFileSystem) if !ignoreLocality => + val remoteIter = fs.listLocatedStatus(path) + new Iterator[LocatedFileStatus]() { + def next(): LocatedFileStatus = remoteIter.next + def hasNext(): Boolean = remoteIter.hasNext + }.toArray + case _ => fs.listStatus(path) + } + } catch { + // If we are listing a root path for SQL (e.g. a top level directory of a table), we need to + // ignore FileNotFoundExceptions during this root level of the listing because + // + // (a) certain code paths might construct an InMemoryFileIndex with root paths that + // might not exist (i.e. not all callers are guaranteed to have checked + // path existence prior to constructing InMemoryFileIndex) and, + // (b) we need to ignore deleted root paths during REFRESH TABLE, otherwise we break + // existing behavior and break the ability drop SessionCatalog tables when tables' + // root directories have been deleted (which breaks a number of Spark's own tests). + // + // If we are NOT listing a root path then a FileNotFoundException here means that the + // directory was present in a previous level of file listing but is absent in this + // listing, likely indicating a race condition (e.g. concurrent table overwrite or S3 + // list inconsistency). + // + // The trade-off in supporting existing behaviors / use-cases is that we won't be + // able to detect race conditions involving root paths being deleted during + // InMemoryFileIndex construction. However, it's still a net improvement to detect and + // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion. + case _: FileNotFoundException if isRootPath || ignoreMissingFiles => + logWarning(s"The directory $path was not found. Was it deleted very recently?") + Array.empty[FileStatus] + } + + def doFilter(statuses: Array[FileStatus]) = filterFun match { + case Some(shouldFilterOut) => + statuses.filterNot(status => shouldFilterOut(status.getPath.getName)) + case None => + statuses + } + + val filteredStatuses = doFilter(statuses) + val allLeafStatuses = { + val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory) + val nestedFiles: Seq[FileStatus] = contextOpt match { + case Some(context) if dirs.size > parallelismThreshold => + parallelListLeafFiles( + context, + dirs.map(_.getPath), + hadoopConf = hadoopConf, + filter = filter, + isRootLevel = false, + ignoreMissingFiles = ignoreMissingFiles, + ignoreLocality = ignoreLocality, + filterFun = filterFun, + parallelismThreshold = parallelismThreshold, + parallelismMax = parallelismMax + ).flatMap(_._2) + case _ => + dirs.flatMap { dir => + listLeafFiles( + path = dir.getPath, + hadoopConf = hadoopConf, + filter = filter, + contextOpt = contextOpt, + ignoreMissingFiles = ignoreMissingFiles, + ignoreLocality = ignoreLocality, + isRootPath = false, + filterFun = filterFun, + parallelismThreshold = parallelismThreshold, + parallelismMax = parallelismMax) + } + } + val allFiles = topLevelFiles ++ nestedFiles + if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles + } + + val missingFiles = mutable.ArrayBuffer.empty[String] + val filteredLeafStatuses = doFilter(allLeafStatuses) + val resolvedLeafStatuses = filteredLeafStatuses.flatMap { + case f: LocatedFileStatus => + Some(f) + + // NOTE: + // + // - Although S3/S3A/S3N file system can be quite slow for remote file metadata + // operations, calling `getFileBlockLocations` does no harm here since these file system + // implementations don't actually issue RPC for this method. + // + // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not + // be a big deal since we always use to `parallelListLeafFiles` when the number of + // paths exceeds threshold. + case f if !ignoreLocality => + // The other constructor of LocatedFileStatus will call FileStatus.getPermission(), + // which is very slow on some file system (RawLocalFileSystem, which is launch a + // subprocess and parse the stdout). + try { + val locations = fs.getFileBlockLocations(f, 0, f.getLen).map { loc => + // Store BlockLocation objects to consume less memory + if (loc.getClass == classOf[BlockLocation]) { + loc + } else { + new BlockLocation(loc.getNames, loc.getHosts, loc.getOffset, loc.getLength) + } + } + val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, f.getReplication, f.getBlockSize, + f.getModificationTime, 0, null, null, null, null, f.getPath, locations) + if (f.isSymlink) { + lfs.setSymlink(f.getSymlink) + } + Some(lfs) + } catch { + case _: FileNotFoundException if ignoreMissingFiles => + missingFiles += f.getPath.toString + None + } + + case f => Some(f) + } + + if (missingFiles.nonEmpty) { + logWarning( + s"the following files were missing during file scan:\n ${missingFiles.mkString("\n ")}") + } + + resolvedLeafStatuses + } + // scalastyle:on argcount + + /** A serializable variant of HDFS's BlockLocation. */ + private case class SerializableBlockLocation( + names: Array[String], + hosts: Array[String], + offset: Long, + length: Long) + + /** A serializable variant of HDFS's FileStatus. */ + private case class SerializableFileStatus( + path: String, + length: Long, + isDir: Boolean, + blockReplication: Short, + blockSize: Long, + modificationTime: Long, + accessTime: Long, + blockLocations: Array[SerializableBlockLocation]) +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index c047be774d99a..8bf7504716f79 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -163,7 +163,7 @@ object CommandUtils extends Logging { .getConfString("hive.exec.stagingdir", ".hive-staging") val filter = new PathFilterIgnoreNonData(stagingDir) val sizes = InMemoryFileIndex.bulkListLeafFiles(paths.flatten, - sparkSession.sessionState.newHadoopConf(), filter, sparkSession, areRootPaths = true).map { + sparkSession.sessionState.newHadoopConf(), filter, sparkSession, isRootLevel = true).map { case (_, files) => files.map(_.getLen).sum } // the size is 0 where paths(i) is not defined and sizes(i) where it is defined diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala index a488ed16a835a..130894e9bc025 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala @@ -17,23 +17,18 @@ package org.apache.spark.sql.execution.datasources -import java.io.FileNotFoundException - import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ -import org.apache.hadoop.fs.viewfs.ViewFileSystem -import org.apache.hadoop.hdfs.DistributedFileSystem import org.apache.hadoop.mapred.{FileInputFormat, JobConf} -import org.apache.spark.SparkContext import org.apache.spark.internal.Logging import org.apache.spark.metrics.source.HiveCatalogMetrics import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.FileStreamSink import org.apache.spark.sql.types.StructType -import org.apache.spark.util.SerializableConfiguration +import org.apache.spark.util.HadoopFSUtils /** @@ -133,7 +128,7 @@ class InMemoryFileIndex( } val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass)) val discovered = InMemoryFileIndex.bulkListLeafFiles( - pathsToFetch.toSeq, hadoopConf, filter, sparkSession, areRootPaths = true) + pathsToFetch.toSeq, hadoopConf, filter, sparkSession, isRootLevel = true) discovered.foreach { case (path, leafFiles) => HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size) fileStatusCache.putLeafFiles(path, leafFiles.toArray) @@ -147,286 +142,24 @@ class InMemoryFileIndex( object InMemoryFileIndex extends Logging { - /** A serializable variant of HDFS's BlockLocation. */ - private case class SerializableBlockLocation( - names: Array[String], - hosts: Array[String], - offset: Long, - length: Long) - - /** A serializable variant of HDFS's FileStatus. */ - private case class SerializableFileStatus( - path: String, - length: Long, - isDir: Boolean, - blockReplication: Short, - blockSize: Long, - modificationTime: Long, - accessTime: Long, - blockLocations: Array[SerializableBlockLocation]) - - /** - * Lists a collection of paths recursively. Picks the listing strategy adaptively depending - * on the number of paths to list. - * - * This may only be called on the driver. - * - * @return for each input path, the set of discovered files for the path - */ private[sql] def bulkListLeafFiles( paths: Seq[Path], hadoopConf: Configuration, filter: PathFilter, sparkSession: SparkSession, - areRootPaths: Boolean): Seq[(Path, Seq[FileStatus])] = { - - val ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles - val ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality - - // Short-circuits parallel listing when serial listing is likely to be faster. - if (paths.size <= sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) { - return paths.map { path => - val leafFiles = listLeafFiles( - path, - hadoopConf, - filter, - Some(sparkSession), - ignoreMissingFiles = ignoreMissingFiles, - ignoreLocality = ignoreLocality, - isRootPath = areRootPaths) - (path, leafFiles) - } - } - - logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." + - s" The first several paths are: ${paths.take(10).mkString(", ")}.") - HiveCatalogMetrics.incrementParallelListingJobCount(1) - - val sparkContext = sparkSession.sparkContext - val serializableConfiguration = new SerializableConfiguration(hadoopConf) - val serializedPaths = paths.map(_.toString) - val parallelPartitionDiscoveryParallelism = - sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism - - // Set the number of parallelism to prevent following file listing from generating many tasks - // in case of large #defaultParallelism. - val numParallelism = Math.min(paths.size, parallelPartitionDiscoveryParallelism) - - val previousJobDescription = sparkContext.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) - val statusMap = try { - val description = paths.size match { - case 0 => - s"Listing leaf files and directories 0 paths" - case 1 => - s"Listing leaf files and directories for 1 path:
    ${paths(0)}" - case s => - s"Listing leaf files and directories for $s paths:
    ${paths(0)}, ..." - } - sparkContext.setJobDescription(description) - sparkContext - .parallelize(serializedPaths, numParallelism) - .mapPartitions { pathStrings => - val hadoopConf = serializableConfiguration.value - pathStrings.map(new Path(_)).toSeq.map { path => - val leafFiles = listLeafFiles( - path, - hadoopConf, - filter, - None, - ignoreMissingFiles = ignoreMissingFiles, - ignoreLocality = ignoreLocality, - isRootPath = areRootPaths) - (path, leafFiles) - }.iterator - }.map { case (path, statuses) => - val serializableStatuses = statuses.map { status => - // Turn FileStatus into SerializableFileStatus so we can send it back to the driver - val blockLocations = status match { - case f: LocatedFileStatus => - f.getBlockLocations.map { loc => - SerializableBlockLocation( - loc.getNames, - loc.getHosts, - loc.getOffset, - loc.getLength) - } - - case _ => - Array.empty[SerializableBlockLocation] - } - - SerializableFileStatus( - status.getPath.toString, - status.getLen, - status.isDirectory, - status.getReplication, - status.getBlockSize, - status.getModificationTime, - status.getAccessTime, - blockLocations) - } - (path.toString, serializableStatuses) - }.collect() - } finally { - sparkContext.setJobDescription(previousJobDescription) - } - - // turn SerializableFileStatus back to Status - statusMap.map { case (path, serializableStatuses) => - val statuses = serializableStatuses.map { f => - val blockLocations = f.blockLocations.map { loc => - new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length) - } - new LocatedFileStatus( - new FileStatus( - f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime, - new Path(f.path)), - blockLocations) - } - (new Path(path), statuses) - } - } - - /** - * Lists a single filesystem path recursively. If a SparkSession object is specified, this - * function may launch Spark jobs to parallelize listing. - * - * If sessionOpt is None, this may be called on executors. - * - * @return all children of path that match the specified filter. - */ - private def listLeafFiles( - path: Path, - hadoopConf: Configuration, - filter: PathFilter, - sessionOpt: Option[SparkSession], - ignoreMissingFiles: Boolean, - ignoreLocality: Boolean, - isRootPath: Boolean): Seq[FileStatus] = { - logTrace(s"Listing $path") - val fs = path.getFileSystem(hadoopConf) - - // Note that statuses only include FileStatus for the files and dirs directly under path, - // and does not include anything else recursively. - val statuses: Array[FileStatus] = try { - fs match { - // DistributedFileSystem overrides listLocatedStatus to make 1 single call to namenode - // to retrieve the file status with the file block location. The reason to still fallback - // to listStatus is because the default implementation would potentially throw a - // FileNotFoundException which is better handled by doing the lookups manually below. - case (_: DistributedFileSystem | _: ViewFileSystem) if !ignoreLocality => - val remoteIter = fs.listLocatedStatus(path) - new Iterator[LocatedFileStatus]() { - def next(): LocatedFileStatus = remoteIter.next - def hasNext(): Boolean = remoteIter.hasNext - }.toArray - case _ => fs.listStatus(path) - } - } catch { - // If we are listing a root path (e.g. a top level directory of a table), we need to - // ignore FileNotFoundExceptions during this root level of the listing because - // - // (a) certain code paths might construct an InMemoryFileIndex with root paths that - // might not exist (i.e. not all callers are guaranteed to have checked - // path existence prior to constructing InMemoryFileIndex) and, - // (b) we need to ignore deleted root paths during REFRESH TABLE, otherwise we break - // existing behavior and break the ability drop SessionCatalog tables when tables' - // root directories have been deleted (which breaks a number of Spark's own tests). - // - // If we are NOT listing a root path then a FileNotFoundException here means that the - // directory was present in a previous level of file listing but is absent in this - // listing, likely indicating a race condition (e.g. concurrent table overwrite or S3 - // list inconsistency). - // - // The trade-off in supporting existing behaviors / use-cases is that we won't be - // able to detect race conditions involving root paths being deleted during - // InMemoryFileIndex construction. However, it's still a net improvement to detect and - // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion. - case _: FileNotFoundException if isRootPath || ignoreMissingFiles => - logWarning(s"The directory $path was not found. Was it deleted very recently?") - Array.empty[FileStatus] - } - - val filteredStatuses = statuses.filterNot(status => shouldFilterOut(status.getPath.getName)) - - val allLeafStatuses = { - val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory) - val nestedFiles: Seq[FileStatus] = sessionOpt match { - case Some(session) => - bulkListLeafFiles( - dirs.map(_.getPath), - hadoopConf, - filter, - session, - areRootPaths = false - ).flatMap(_._2) - case _ => - dirs.flatMap { dir => - listLeafFiles( - dir.getPath, - hadoopConf, - filter, - sessionOpt, - ignoreMissingFiles = ignoreMissingFiles, - ignoreLocality = ignoreLocality, - isRootPath = false) - } - } - val allFiles = topLevelFiles ++ nestedFiles - if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles - } - - val missingFiles = mutable.ArrayBuffer.empty[String] - val filteredLeafStatuses = allLeafStatuses.filterNot( - status => shouldFilterOut(status.getPath.getName)) - val resolvedLeafStatuses = filteredLeafStatuses.flatMap { - case f: LocatedFileStatus => - Some(f) - - // NOTE: - // - // - Although S3/S3A/S3N file system can be quite slow for remote file metadata - // operations, calling `getFileBlockLocations` does no harm here since these file system - // implementations don't actually issue RPC for this method. - // - // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not - // be a big deal since we always use to `bulkListLeafFiles` when the number of - // paths exceeds threshold. - case f if !ignoreLocality => - // The other constructor of LocatedFileStatus will call FileStatus.getPermission(), - // which is very slow on some file system (RawLocalFileSystem, which is launch a - // subprocess and parse the stdout). - try { - val locations = fs.getFileBlockLocations(f, 0, f.getLen).map { loc => - // Store BlockLocation objects to consume less memory - if (loc.getClass == classOf[BlockLocation]) { - loc - } else { - new BlockLocation(loc.getNames, loc.getHosts, loc.getOffset, loc.getLength) - } - } - val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, f.getReplication, f.getBlockSize, - f.getModificationTime, 0, null, null, null, null, f.getPath, locations) - if (f.isSymlink) { - lfs.setSymlink(f.getSymlink) - } - Some(lfs) - } catch { - case _: FileNotFoundException if ignoreMissingFiles => - missingFiles += f.getPath.toString - None - } - - case f => Some(f) - } - - if (missingFiles.nonEmpty) { - logWarning( - s"the following files were missing during file scan:\n ${missingFiles.mkString("\n ")}") - } - - resolvedLeafStatuses - } + isRootLevel: Boolean): Seq[(Path, Seq[FileStatus])] = { + HadoopFSUtils.parallelListLeafFiles( + sc = sparkSession.sparkContext, + paths = paths, + hadoopConf = hadoopConf, + filter = filter, + isRootLevel = isRootLevel, + ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles, + ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality, + parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold, + parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism, + filterFun = Some(shouldFilterOut)) + } /** Checks if we should filter out this path name. */ def shouldFilterOut(pathName: String): Boolean = { From d7aa3b56e8dbdc5582565ce3427f368edbabc708 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 24 Sep 2020 16:22:08 -0700 Subject: [PATCH 0105/1009] [SPARK-32889][SQL][TESTS][FOLLOWUP] Skip special column names test in Hive 1.2 ### What changes were proposed in this pull request? This PR is a followup of SPARK-32889 in order to ignore the special column names test in `hive-1.2` profile. ### Why are the changes needed? Hive 1.2 is too old to support special column names because it doesn't use Apache ORC. This will recover our `hive-1.2` Jenkins job. - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-2.7-hive-1.2/ - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7-hive-1.2/ ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the test with Hive 1.2 profile. Closes #29867 from dongjoon-hyun/SPARK-32889-2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index a69a949e3a3a2..96bca5404831d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2242,6 +2242,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } test("SPARK-32889: ORC table column name supports special characters") { + assume(HiveUtils.isHive23) // " " "," is not allowed. Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => val source = "ORC" From e9c98c910aee10efe447dc4fff951e748441d10a Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Fri, 25 Sep 2020 04:29:09 +0000 Subject: [PATCH 0106/1009] [SPARK-32990][SQL] Migrate REFRESH TABLE to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `REFRESH TABLE` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? The current behavior is not consistent between v1 and v2 commands when resolving a temp view. In v2, the `t` in the following example is resolved to a table: ```scala sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE testcat.ns") sql("REFRESH TABLE t") // 't' is resolved to testcat.ns.t ``` whereas in v1, the `t` is resolved to a temp view: ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("REFRESH TABLE t") // 't' is resolved to a temp view ``` ### Does this PR introduce _any_ user-facing change? After this PR, `REFRESH TABLE t` is resolved to a temp view `t` instead of `testcat.ns.t`. ### How was this patch tested? Added a new test Closes #29866 from imback82/refresh_table_consistent. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../catalyst/analysis/ResolveCatalogs.scala | 3 --- .../sql/catalyst/parser/AstBuilder.scala | 4 ++-- .../catalyst/plans/logical/statements.scala | 5 ----- .../catalyst/plans/logical/v2Commands.scala | 6 +++--- .../sql/catalyst/parser/DDLParserSuite.scala | 2 +- .../analysis/ResolveSessionCatalog.scala | 10 ++++++---- .../spark/sql/execution/command/tables.scala | 19 +++++++++++++++++++ .../spark/sql/execution/datasources/ddl.scala | 11 ----------- .../datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../sql/connector/DataSourceV2SQLSuite.scala | 17 +++++++++++++++++ 10 files changed, 50 insertions(+), 31 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index a40604045978c..0d0f80be359e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -174,9 +174,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) writeOptions = c.writeOptions, ignoreIfExists = c.ifNotExists) - case RefreshTableStatement(NonSessionCatalogAndTable(catalog, tbl)) => - RefreshTable(catalog.asTableCatalog, tbl.asIdentifier) - case c @ ReplaceTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 6682b0575430a..f133235a2636e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3364,7 +3364,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[RefreshTableStatement]]. + * Create a [[RefreshTable]]. * * For example: * {{{ @@ -3372,7 +3372,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * }}} */ override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) { - RefreshTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier())) + RefreshTable(UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier()))) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 19831a7b5ef84..d09e08d105c21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -429,11 +429,6 @@ case class ShowPartitionsStatement( tableName: Seq[String], partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * A REFRESH TABLE statement, as parsed from SQL - */ -case class RefreshTableStatement(tableName: Seq[String]) extends ParsedStatement - /** * A SHOW COLUMNS statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 70e03c23fd115..fa0a10c3a5a45 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -463,9 +463,9 @@ case class SetCatalogAndNamespace( /** * The logical plan of the REFRESH TABLE command that works for v2 catalogs. */ -case class RefreshTable( - catalog: TableCatalog, - ident: Identifier) extends Command +case class RefreshTable(child: LogicalPlan) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} /** * The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index ac6af4f4e3231..378026b1ce9c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1665,7 +1665,7 @@ class DDLParserSuite extends AnalysisTest { test("REFRESH TABLE") { comparePlans( parsePlan("REFRESH TABLE a.b.c"), - RefreshTableStatement(Seq("a", "b", "c"))) + RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c")))) } test("show columns") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 8923d5c86e19a..11493ad59a760 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable} +import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType} @@ -318,9 +318,11 @@ class ResolveSessionCatalog( ignoreIfExists = c.ifNotExists) } - // v1 REFRESH TABLE supports temp view. - case RefreshTableStatement(TempViewOrV1Table(name)) => - RefreshTable(name.asTableIdentifier) + case RefreshTable(r @ ResolvedTable(_, _, _: V1Table)) if isSessionCatalog(r.catalog) => + RefreshTableCommand(r.identifier.asTableIdentifier) + + case RefreshTable(r: ResolvedView) => + RefreshTableCommand(r.identifier.asTableIdentifier) // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the // session catalog and the table provider is not v2. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index f94c9712a31cc..e4be2a8d3bb8e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -1381,3 +1381,22 @@ case class ShowCreateTableAsSerdeCommand(table: TableIdentifier) } } } + +/** + * A command to refresh all cached entries associated with the table. + * + * The syntax of using this command in SQL is: + * {{{ + * REFRESH TABLE [db_name.]table_name + * }}} + */ +case class RefreshTableCommand(tableIdent: TableIdentifier) + extends RunnableCommand { + + override def run(sparkSession: SparkSession): Seq[Row] = { + // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, + // drop the original cached version and make the new version cached lazily. + sparkSession.catalog.refreshTable(tableIdent.quotedString) + Seq.empty[Row] + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala index 4022640224424..e455fae4675f4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala @@ -103,17 +103,6 @@ case class CreateTempViewUsing( } } -case class RefreshTable(tableIdent: TableIdentifier) - extends RunnableCommand { - - override def run(sparkSession: SparkSession): Seq[Row] = { - // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, - // drop the original cached version and make the new version cached lazily. - sparkSession.catalog.refreshTable(tableIdent.quotedString) - Seq.empty[Row] - } -} - case class RefreshResource(path: String) extends RunnableCommand { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index fe4f8bc83fcff..c5ddba43a56aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -127,8 +127,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat propsWithOwner, writeOptions, ifNotExists) :: Nil } - case RefreshTable(catalog, ident) => - RefreshTableExec(catalog, ident) :: Nil + case RefreshTable(r: ResolvedTable) => + RefreshTableExec(r.catalog, r.identifier) :: Nil case ReplaceTable(catalog, ident, schema, parts, props, orCreate) => val propsWithOwner = CatalogV2Util.withDefaultOwnership(props) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 0a4ece83717d5..e3782c7409198 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1729,6 +1729,23 @@ class DataSourceV2SQLSuite } } + test("SPARK-32990: REFRESH TABLE should resolve to a temporary view first") { + withTable("testcat.ns.t") { + withTempView("t") { + sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") + sql("CREATE TEMPORARY VIEW t AS SELECT 2") + sql("USE testcat.ns") + + val testCatalog = catalog("testcat").asTableCatalog.asInstanceOf[InMemoryTableCatalog] + val identifier = Identifier.of(Array("ns"), "t") + + assert(!testCatalog.isTableInvalidated(identifier)) + sql("REFRESH TABLE t") + assert(!testCatalog.isTableInvalidated(identifier)) + } + } + } + test("REPLACE TABLE: v1 table") { val e = intercept[AnalysisException] { sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}") From f2fc96667481169affbc20cec95b9fc1c19fc7c3 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 24 Sep 2020 22:16:05 -0700 Subject: [PATCH 0107/1009] [SPARK-32877][SQL][TEST] Add test for Hive UDF complex decimal type ### What changes were proposed in this pull request? Add test to cover Hive UDF whose input contains complex decimal type. Add comment to explain why we can't make `HiveSimpleUDF` extend `ImplicitTypeCasts`. ### Why are the changes needed? For better test coverage with Hive which we compatible or not. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #29863 from ulysses-you/SPARK-32877-test. Authored-by: ulysses Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/hive/hiveUDFs.scala | 5 +++ .../sql/hive/execution/HiveUDFSuite.scala | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 8ad5cb70d248b..462e67c4ed35c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -41,6 +41,11 @@ import org.apache.spark.sql.hive.HiveShim._ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils +/** + * Here we cannot extends `ImplicitTypeCasts` to compatible with UDF input data type, the reason is: + * we use children data type to reflect UDF method first and will get exception if it fails so that + * we can never go into `ImplicitTypeCasts`. + */ private[hive] case class HiveSimpleUDF( name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression]) extends Expression diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 057f2f4ce01be..f5cd4f9f843d8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.command.FunctionsCommand import org.apache.spark.sql.functions.max +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils @@ -658,6 +659,25 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils { } } + test("SPARK-32877: add test for Hive UDF complex decimal type") { + assume(HiveUtils.isHive23) + withUserDefinedFunction("testArraySum" -> false) { + sql(s"CREATE FUNCTION testArraySum AS '${classOf[ArraySumUDF].getName}'") + checkAnswer( + sql("SELECT testArraySum(array(1, 1.1, 1.2))"), + Seq(Row(3.3))) + + val msg = intercept[AnalysisException] { + sql("SELECT testArraySum(1)") + }.getMessage + assert(msg.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'")) + + val msg2 = intercept[AnalysisException] { + sql("SELECT testArraySum(1, 2)") + }.getMessage + assert(msg2.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'")) + } + } } class TestPair(x: Int, y: Int) extends Writable with Serializable { @@ -741,3 +761,14 @@ class StatelessUDF extends UDF { result } } + +class ArraySumUDF extends UDF { + import scala.collection.JavaConverters._ + def evaluate(values: java.util.List[java.lang.Double]): java.lang.Double = { + var r = 0d + for (v <- values.asScala) { + r += v + } + r + } +} From 9e6882feca0800d5d4f9920886cb5dae73bbe1d4 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Fri, 25 Sep 2020 06:50:24 +0000 Subject: [PATCH 0108/1009] [SPARK-32885][SS] Add DataStreamReader.table API ### What changes were proposed in this pull request? This pr aims to add a new `table` API in DataStreamReader, which is similar to the table API in DataFrameReader. ### Why are the changes needed? Users can directly use this API to get a Streaming DataFrame on a table. Below is a simple example: Application 1 for initializing and starting the streaming job: ``` val path = "/home/yuanjian.li/runtime/to_be_deleted" val tblName = "my_table" // Write some data to `my_table` spark.range(3).write.format("parquet").option("path", path).saveAsTable(tblName) // Read the table as a streaming source, write result to destination directory val table = spark.readStream.table(tblName) table.writeStream.format("parquet").option("checkpointLocation", "/home/yuanjian.li/runtime/to_be_deleted_ck").start("/home/yuanjian.li/runtime/to_be_deleted_2") ``` Application 2 for appending new data: ``` // Append new data into the path spark.range(5).write.format("parquet").option("path", "/home/yuanjian.li/runtime/to_be_deleted").mode("append").save() ``` Check result: ``` // The desitination directory should contains all written data spark.read.parquet("/home/yuanjian.li/runtime/to_be_deleted_2").show() ``` ### Does this PR introduce _any_ user-facing change? Yes, a new API added. ### How was this patch tested? New UT added and integrated testing. Closes #29756 from xuanyuanking/SPARK-32885. Authored-by: Yuanjian Li Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 85 +++++-- .../catalyst/analysis/CTESubstitution.scala | 2 +- .../sql/catalyst/analysis/ResolveHints.scala | 4 +- .../sql/catalyst/analysis/unresolved.scala | 11 +- .../sql/catalyst/catalog/interface.scala | 3 +- .../streaming/StreamingRelationV2.scala | 4 +- .../spark/sql/connector/catalog/V1Table.scala | 8 + .../spark/sql/execution/command/views.scala | 2 +- .../datasources/DataSourceStrategy.scala | 41 ++- .../streaming/MicroBatchExecution.scala | 2 +- .../continuous/ContinuousExecution.scala | 2 +- .../sql/execution/streaming/memory.scala | 2 + .../sql/streaming/DataStreamReader.scala | 21 +- .../sql-tests/results/explain-aqe.sql.out | 2 +- .../sql-tests/results/explain.sql.out | 2 +- .../connector/TableCapabilityCheckSuite.scala | 2 + .../test/DataStreamTableAPISuite.scala | 234 ++++++++++++++++++ .../apache/spark/sql/hive/test/TestHive.scala | 2 +- 18 files changed, 391 insertions(+), 38 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7d591eeea2b79..6e1f371b1a2b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.catalyst.trees.TreeNodeRef import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.connector.catalog._ @@ -846,9 +847,9 @@ class Analyzer( */ object ResolveTempViews extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { - case u @ UnresolvedRelation(ident, _) => - lookupTempView(ident).getOrElse(u) - case i @ InsertIntoStatement(UnresolvedRelation(ident, _), _, _, _, _) => + case u @ UnresolvedRelation(ident, _, isStreaming) => + lookupTempView(ident, isStreaming).getOrElse(u) + case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _) => lookupTempView(ident) .map(view => i.copy(table = view)) .getOrElse(i) @@ -861,15 +862,22 @@ class Analyzer( lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u) } - def lookupTempView(identifier: Seq[String]): Option[LogicalPlan] = { + def lookupTempView( + identifier: Seq[String], isStreaming: Boolean = false): Option[LogicalPlan] = { // Permanent View can't refer to temp views, no need to lookup at all. if (isResolvingView) return None - identifier match { + val tmpView = identifier match { case Seq(part1) => v1SessionCatalog.lookupTempView(part1) case Seq(part1, part2) => v1SessionCatalog.lookupGlobalTempView(part1, part2) case _ => None } + + if (isStreaming && tmpView.nonEmpty && !tmpView.get.isStreaming) { + throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " + + s"logical plan, please use batch API such as `DataFrameReader.table` to read it.") + } + tmpView } } @@ -895,10 +903,13 @@ class Analyzer( object ResolveTables extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp { case u: UnresolvedRelation => - lookupV2Relation(u.multipartIdentifier, u.options) - .map { rel => - val ident = rel.identifier.get - SubqueryAlias(rel.catalog.get.name +: ident.namespace :+ ident.name, rel) + lookupV2Relation(u.multipartIdentifier, u.options, u.isStreaming) + .map { relation => + val (catalog, ident) = relation match { + case ds: DataSourceV2Relation => (ds.catalog, ds.identifier.get) + case s: StreamingRelationV2 => (s.catalog, s.identifier.get) + } + SubqueryAlias(catalog.get.name +: ident.namespace :+ ident.name, relation) }.getOrElse(u) case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) => @@ -911,8 +922,9 @@ class Analyzer( .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) - case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved => - lookupV2Relation(u.multipartIdentifier, u.options) + case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _) + if i.query.resolved => + lookupV2Relation(u.multipartIdentifier, u.options, false) .map(v2Relation => i.copy(table = v2Relation)) .getOrElse(i) @@ -930,12 +942,18 @@ class Analyzer( */ private def lookupV2Relation( identifier: Seq[String], - options: CaseInsensitiveStringMap): Option[DataSourceV2Relation] = + options: CaseInsensitiveStringMap, + isStreaming: Boolean): Option[LogicalPlan] = expandRelationName(identifier) match { case NonSessionCatalogAndIdentifier(catalog, ident) => CatalogV2Util.loadTable(catalog, ident) match { case Some(table) => - Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)) + if (isStreaming) { + Some(StreamingRelationV2(None, table.name, table, options, + table.schema.toAttributes, Some(catalog), Some(ident), None)) + } else { + Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)) + } case None => None } case _ => None @@ -976,8 +994,8 @@ class Analyzer( def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp { case i @ InsertIntoStatement(table, _, _, _, _) if i.query.resolved => val relation = table match { - case u: UnresolvedRelation => - lookupRelation(u.multipartIdentifier, u.options).getOrElse(u) + case u @ UnresolvedRelation(_, _, false) => + lookupRelation(u.multipartIdentifier, u.options, false).getOrElse(u) case other => other } @@ -988,7 +1006,8 @@ class Analyzer( } case u: UnresolvedRelation => - lookupRelation(u.multipartIdentifier, u.options).map(resolveViews).getOrElse(u) + lookupRelation(u.multipartIdentifier, u.options, u.isStreaming) + .map(resolveViews).getOrElse(u) case u @ UnresolvedTable(identifier) => lookupTableOrView(identifier).map { @@ -1020,16 +1039,40 @@ class Analyzer( // 3) If a v1 table is found, create a v1 relation. Otherwise, create a v2 relation. private def lookupRelation( identifier: Seq[String], - options: CaseInsensitiveStringMap): Option[LogicalPlan] = { + options: CaseInsensitiveStringMap, + isStreaming: Boolean): Option[LogicalPlan] = { expandRelationName(identifier) match { case SessionCatalogAndIdentifier(catalog, ident) => lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table => - v1SessionCatalog.getRelation(v1Table.v1Table, options) + if (isStreaming) { + if (v1Table.v1Table.tableType == CatalogTableType.VIEW) { + throw new AnalysisException(s"${identifier.quoted} is a permanent view, " + + "which is not supported by streaming reading API such as " + + "`DataStreamReader.table` yet.") + } + SubqueryAlias( + catalog.name +: ident.asMultipartIdentifier, + UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true)) + } else { + v1SessionCatalog.getRelation(v1Table.v1Table, options) + } case table => - SubqueryAlias( - catalog.name +: ident.asMultipartIdentifier, - DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)) + if (isStreaming) { + val v1Fallback = table match { + case withFallback: V2TableWithV1Fallback => + Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true)) + case _ => None + } + SubqueryAlias( + catalog.name +: ident.asMultipartIdentifier, + StreamingRelationV2(None, table.name, table, options, table.schema.toAttributes, + Some(catalog), Some(ident), v1Fallback)) + } else { + SubqueryAlias( + catalog.name +: ident.asMultipartIdentifier, + DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)) + } } val key = catalog.name +: ident.namespace :+ ident.name AnalysisContext.get.relationCache.get(key).map(_.transform { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index b177aa8dd0aa7..8d3b04c202962 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -171,7 +171,7 @@ object CTESubstitution extends Rule[LogicalPlan] { plan: LogicalPlan, cteRelations: Seq[(String, LogicalPlan)]): LogicalPlan = plan resolveOperatorsUp { - case u @ UnresolvedRelation(Seq(table), _) => + case u @ UnresolvedRelation(Seq(table), _, _) => cteRelations.find(r => plan.conf.resolver(r._1, table)).map(_._2).getOrElse(u) case other => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index 1f0de78b696fd..c0a9414d61f8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -105,7 +105,7 @@ object ResolveHints { val newNode = CurrentOrigin.withOrigin(plan.origin) { plan match { - case ResolvedHint(u @ UnresolvedRelation(ident, _), hint) + case ResolvedHint(u @ UnresolvedRelation(ident, _, _), hint) if matchedIdentifierInHint(ident) => ResolvedHint(u, createHintInfo(hintName).merge(hint, hintErrorHandler)) @@ -113,7 +113,7 @@ object ResolveHints { if matchedIdentifierInHint(extractIdentifier(r)) => ResolvedHint(r, createHintInfo(hintName).merge(hint, hintErrorHandler)) - case UnresolvedRelation(ident, _) if matchedIdentifierInHint(ident) => + case UnresolvedRelation(ident, _, _) if matchedIdentifierInHint(ident) => ResolvedHint(plan, createHintInfo(hintName)) case r: SubqueryAlias if matchedIdentifierInHint(extractIdentifier(r)) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 62000ac0efbb3..49861f9172a2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -45,7 +45,8 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str */ case class UnresolvedRelation( multipartIdentifier: Seq[String], - options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()) + options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(), + override val isStreaming: Boolean = false) extends LeafNode with NamedRelation { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ @@ -60,6 +61,14 @@ case class UnresolvedRelation( } object UnresolvedRelation { + def apply( + tableIdentifier: TableIdentifier, + extraOptions: CaseInsensitiveStringMap, + isStreaming: Boolean): UnresolvedRelation = { + UnresolvedRelation( + tableIdentifier.database.toSeq :+ tableIdentifier.table, extraOptions, isStreaming) + } + def apply(tableIdentifier: TableIdentifier): UnresolvedRelation = UnresolvedRelation(tableIdentifier.database.toSeq :+ tableIdentifier.table) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index db01999ab9bb2..9c93691ca3b41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -643,7 +643,8 @@ object CatalogTypes { */ case class UnresolvedCatalogRelation( tableMeta: CatalogTable, - options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()) extends LeafNode { + options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(), + override val isStreaming: Boolean = false) extends LeafNode { assert(tableMeta.identifier.database.isDefined) override lazy val resolved: Boolean = false override def output: Seq[Attribute] = Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala index 92c4926c3a7f9..6a059025a71f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.streaming import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} -import org.apache.spark.sql.connector.catalog.{Table, TableProvider} +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableProvider} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -37,6 +37,8 @@ case class StreamingRelationV2( table: Table, extraOptions: CaseInsensitiveStringMap, output: Seq[Attribute], + catalog: Option[CatalogPlugin], + identifier: Option[Identifier], v1Relation: Option[LogicalPlan]) extends LeafNode with MultiInstanceRelation { override lazy val resolved = v1Relation.forall(_.resolved) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 70fc9689e6087..9aed550ff97c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -80,3 +80,11 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override def toString: String = s"V1Table($name)" } + +/** + * A V2 table with V1 fallback support. This is used to fallback to V1 table when the V2 one + * doesn't implement specific capabilities but V1 already has. + */ +private[sql] trait V2TableWithV1Fallback extends Table { + def v1Table: CatalogTable +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index cc2a4a6b3ca96..94f34a9b39b28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -174,7 +174,7 @@ case class CreateViewCommand( def verify(child: LogicalPlan) { child.collect { // Disallow creating permanent views based on temporary views. - case UnresolvedRelation(nameParts, _) if catalog.isTempView(nameParts) => + case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) => throw new AnalysisException(s"Not allowed to create a permanent view $name by " + s"referencing a temporary view ${nameParts.quoted}. " + "Please create a temp view instead by CREATE TEMP VIEW") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 1f8cfee308033..86e85719272e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -37,8 +37,12 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.ScanOperation import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 +import org.apache.spark.sql.connector.catalog.SupportsRead +import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.command._ +import org.apache.spark.sql.execution.streaming.StreamingRelation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.sql.sources._ @@ -260,19 +264,48 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] }) } + private def getStreamingRelation( + table: CatalogTable, + extraOptions: CaseInsensitiveStringMap): StreamingRelation = { + val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table) + val dataSource = DataSource( + sparkSession, + className = table.provider.get, + userSpecifiedSchema = Some(table.schema), + options = dsOptions) + StreamingRelation(dataSource) + } + + override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options), _, _, _, _) + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _, _) if DDLUtils.isDatasourceTable(tableMeta) => i.copy(table = readDataSourceTable(tableMeta, options)) - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _), _, _, _, _) => + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _) => i.copy(table = DDLUtils.readHiveTable(tableMeta)) - case UnresolvedCatalogRelation(tableMeta, options) if DDLUtils.isDatasourceTable(tableMeta) => + case UnresolvedCatalogRelation(tableMeta, options, false) + if DDLUtils.isDatasourceTable(tableMeta) => readDataSourceTable(tableMeta, options) - case UnresolvedCatalogRelation(tableMeta, _) => + case UnresolvedCatalogRelation(tableMeta, _, false) => DDLUtils.readHiveTable(tableMeta) + + case UnresolvedCatalogRelation(tableMeta, extraOptions, true) => + getStreamingRelation(tableMeta, extraOptions) + + case s @ StreamingRelationV2( + _, _, table, extraOptions, _, _, _, Some(UnresolvedCatalogRelation(tableMeta, _, true))) => + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ + val v1Relation = getStreamingRelation(tableMeta, extraOptions) + if (table.isInstanceOf[SupportsRead] + && table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ)) { + s.copy(v1Relation = Some(v1Relation)) + } else { + // Fallback to V1 relation + v1Relation + } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 5a91b24a0803f..aad212cc13486 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -90,7 +90,7 @@ class MicroBatchExecution( StreamingExecutionRelation(source, output)(sparkSession) }) - case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, v1) => + case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, _, _, v1) => val dsStr = if (src.nonEmpty) s"[${src.get}]" else "" val v2Disabled = disabledSources.contains(src.getOrElse(None).getClass.getCanonicalName) if (!v2Disabled && table.supports(TableCapability.MICRO_BATCH_READ)) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 12198f735c4c3..6eb28d4c66ded 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -65,7 +65,7 @@ class ContinuousExecution( var nextSourceId = 0 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ val _logicalPlan = analyzedPlan.transform { - case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _) => + case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _, _, _) => val dsStr = if (ds.nonEmpty) s"[${ds.get}]" else "" if (!table.supports(TableCapability.CONTINUOUS_READ)) { throw new UnsupportedOperationException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index c6ba0da6ef04d..ee1cb127a3bc5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -83,6 +83,8 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa new MemoryStreamTable(this), CaseInsensitiveStringMap.empty(), attributes, + None, + None, None) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 93a48946fbafc..9bc4acd49a980 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -24,6 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.annotation.Evolving import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider} @@ -231,7 +232,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo Dataset.ofRows( sparkSession, StreamingRelationV2( - Some(provider), source, table, dsOptions, table.schema.toAttributes, v1Relation)) + Some(provider), source, table, dsOptions, + table.schema.toAttributes, None, None, v1Relation)) // fallback to v1 // TODO (SPARK-27483): we should move this fallback logic to an analyzer rule. @@ -475,6 +477,23 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo format("parquet").load(path) } + /** + * Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should + * support streaming mode. + * @param tableName The name of the table + * @since 3.1.0 + */ + def table(tableName: String): DataFrame = { + require(tableName != null, "The table name can't be null") + val identifier = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName) + Dataset.ofRows( + sparkSession, + UnresolvedRelation( + identifier, + new CaseInsensitiveStringMap(extraOptions.toMap.asJava), + isStreaming = true)) + } + /** * Loads text files and returns a `DataFrame` whose schema starts with a string column named * "value", and followed by partitioned columns if there are any. diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 5a59ffa03880f..3a850160b43e0 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -693,7 +693,7 @@ Output: [] Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView (3) UnresolvedRelation -Arguments: [explain_temp1], [] +Arguments: [explain_temp1], [], false (4) Project Arguments: ['key, 'val] diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index f28c408407c3f..6b3b71f85ced2 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -827,7 +827,7 @@ Output: [] Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView (3) UnresolvedRelation -Arguments: [explain_temp1], [] +Arguments: [explain_temp1], [], false (4) Project Arguments: ['key, 'val] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala index 1d016496df2de..2d75a35215866 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala @@ -46,6 +46,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession { table, CaseInsensitiveStringMap.empty(), TableCapabilityCheckSuite.schema.toAttributes, + None, + None, v1Relation) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala new file mode 100644 index 0000000000000..788452dace84b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.streaming.test + +import java.util + +import scala.collection.JavaConverters._ + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 +import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, V2TableWithV1Fallback} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.connector.read.ScanBuilder +import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.StreamTest +import org.apache.spark.sql.streaming.sources.FakeScanBuilder +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { + import testImplicits._ + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + before { + spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) + spark.conf.set("spark.sql.catalog.teststream", classOf[InMemoryStreamTableCatalog].getName) + } + + after { + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.clear() + } + + test("table API with file source") { + Seq("parquet", "").foreach { source => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> source) { + withTempDir { tempDir => + val tblName = "my_table" + val dir = tempDir.getAbsolutePath + withTable(tblName) { + spark.range(3).write.format("parquet").option("path", dir).saveAsTable(tblName) + + testStream(spark.readStream.table(tblName))( + ProcessAllAvailable(), + CheckAnswer(Row(0), Row(1), Row(2)) + ) + } + } + } + } + } + + test("read non-exist table") { + intercept[AnalysisException] { + spark.readStream.table("non_exist_table") + }.message.contains("Table not found") + } + + test("stream table API with temp view") { + val tblName = "my_table" + val stream = MemoryStream[Int] + withTable(tblName) { + stream.toDF().createOrReplaceTempView(tblName) + + testStream(spark.readStream.table(tblName)) ( + AddData(stream, 1, 2, 3), + CheckLastBatch(1, 2, 3), + AddData(stream, 4, 5), + CheckLastBatch(4, 5) + ) + } + } + + test("stream table API with non-streaming temp view") { + val tblName = "my_table" + withTable(tblName) { + spark.range(3).createOrReplaceTempView(tblName) + intercept[AnalysisException] { + spark.readStream.table(tblName) + }.message.contains("is not a temp view of streaming logical plan") + } + } + + test("read table without streaming capability support") { + val tableIdentifer = "testcat.table_name" + + spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo") + + intercept[AnalysisException] { + spark.readStream.table(tableIdentifer) + }.message.contains("does not support either micro-batch or continuous scan") + } + + test("read table with custom catalog") { + val tblName = "teststream.table_name" + withTable(tblName) { + spark.sql(s"CREATE TABLE $tblName (data int) USING foo") + val stream = MemoryStream[Int] + val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(), "table_name")) + table.asInstanceOf[InMemoryStreamTable].setStream(stream) + + testStream(spark.readStream.table(tblName)) ( + AddData(stream, 1, 2, 3), + CheckLastBatch(1, 2, 3), + AddData(stream, 4, 5), + CheckLastBatch(4, 5) + ) + } + } + + test("read table with custom catalog & namespace") { + spark.sql("CREATE NAMESPACE teststream.ns") + + val tblName = "teststream.ns.table_name" + withTable(tblName) { + spark.sql(s"CREATE TABLE $tblName (data int) USING foo") + val stream = MemoryStream[Int] + val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array("ns"), "table_name")) + table.asInstanceOf[InMemoryStreamTable].setStream(stream) + + testStream(spark.readStream.table(tblName)) ( + AddData(stream, 1, 2, 3), + CheckLastBatch(1, 2, 3), + AddData(stream, 4, 5), + CheckLastBatch(4, 5) + ) + } + } + + test("fallback to V1 relation") { + val tblName = DataStreamTableAPISuite.V1FallbackTestTableName + spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, + classOf[InMemoryStreamTableCatalog].getName) + val v2Source = classOf[FakeV2Provider].getName + withTempDir { tempDir => + withTable(tblName) { + spark.sql(s"CREATE TABLE $tblName (data int) USING $v2Source") + + // Check the StreamingRelationV2 has been replaced by StreamingRelation + val plan = spark.readStream.option("path", tempDir.getCanonicalPath).table(tblName) + .queryExecution.analyzed.collectFirst { + case d: StreamingRelationV2 => d + } + assert(plan.isEmpty) + } + } + } +} + +object DataStreamTableAPISuite { + val V1FallbackTestTableName = "fallbackV1Test" +} + +class InMemoryStreamTable(override val name: String) extends Table with SupportsRead { + var stream: MemoryStream[Int] = _ + + def setStream(inputData: MemoryStream[Int]): Unit = stream = inputData + + override def schema(): StructType = stream.fullSchema() + + override def capabilities(): util.Set[TableCapability] = { + Set(TableCapability.MICRO_BATCH_READ, TableCapability.CONTINUOUS_READ).asJava + } + + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { + new MemoryStreamScanBuilder(stream) + } +} + +class NonStreamV2Table(override val name: String) + extends Table with SupportsRead with V2TableWithV1Fallback { + override def schema(): StructType = StructType(Nil) + override def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_READ).asJava + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder + + override def v1Table: CatalogTable = { + CatalogTable( + identifier = + TableIdentifier(DataStreamTableAPISuite.V1FallbackTestTableName, Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty, + owner = null, + schema = schema(), + provider = Some("parquet")) + } +} + + +class InMemoryStreamTableCatalog extends InMemoryTableCatalog { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): Table = { + if (tables.containsKey(ident)) { + throw new TableAlreadyExistsException(ident) + } + + val table = if (ident.name() == DataStreamTableAPISuite.V1FallbackTestTableName) { + new NonStreamV2Table(s"$name.${ident.quoted}") + } else { + new InMemoryStreamTable(s"$name.${ident.quoted}") + } + tables.put(ident, table) + namespaces.putIfAbsent(ident.namespace.toList, Map()) + table + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 497dda4e22213..accfcb8d9deff 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -601,7 +601,7 @@ private[hive] class TestHiveQueryExecution( // Make sure any test tables referenced are loaded. val referencedTables = describedTables ++ - logical.collect { case UnresolvedRelation(ident, _) => ident.asTableIdentifier } + logical.collect { case UnresolvedRelation(ident, _, _) => ident.asTableIdentifier } val resolver = sparkSession.sessionState.conf.resolver val referencedTestTables = referencedTables.flatMap { tbl => val testTableOpt = sparkSession.testTables.keys.find(resolver(_, tbl.table)) From e887c639a766fde0a74e7557d1ad2b2cc4b92f1b Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 25 Sep 2020 07:27:29 +0000 Subject: [PATCH 0109/1009] [SPARK-32931][SQL] Unevaluable Expressions are not Foldable ### What changes were proposed in this pull request? Unevaluable expressions are not foldable because we don't have an eval for it. This PR is to clean up the code and enforce it. ### Why are the changes needed? Ensure that we will not hit the weird cases that trigger ConstantFolding. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The existing tests. Closes #29798 from gatorsmile/refactorUneval. Lead-authored-by: gatorsmile Co-authored-by: Xiao Li Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/unresolved.scala | 5 ----- .../spark/sql/catalyst/expressions/Expression.scala | 4 +++- .../spark/sql/catalyst/expressions/SortOrder.scala | 3 --- .../catalyst/expressions/aggregate/interfaces.scala | 12 ++++++++---- .../catalyst/expressions/complexTypeCreator.scala | 1 - .../apache/spark/sql/catalyst/expressions/misc.scala | 2 -- .../spark/sql/catalyst/expressions/predicates.scala | 1 - .../sql/catalyst/expressions/windowExpressions.scala | 10 ++++------ .../sql/catalyst/plans/logical/v2Commands.scala | 2 -- 9 files changed, 15 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 49861f9172a2a..9c7d572a12071 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -264,7 +264,6 @@ case class UnresolvedFunction( override def children: Seq[Expression] = arguments ++ filter.toSeq override def dataType: DataType = throw new UnresolvedException(this, "dataType") - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false @@ -452,7 +451,6 @@ case class UnresolvedExtractValue(child: Expression, extraction: Expression) override def right: Expression = extraction override def dataType: DataType = throw new UnresolvedException(this, "dataType") - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false @@ -522,14 +520,12 @@ case class UnresolvedDeserializer(deserializer: Expression, inputAttributes: Seq override def child: Expression = deserializer override def dataType: DataType = throw new UnresolvedException(this, "dataType") - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } case class GetColumnByOrdinal(ordinal: Int, dataType: DataType) extends LeafExpression with Unevaluable with NonSQLExpression { - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } @@ -547,7 +543,6 @@ case class GetColumnByOrdinal(ordinal: Int, dataType: DataType) extends LeafExpr case class UnresolvedOrdinal(ordinal: Int) extends LeafExpression with Unevaluable with NonSQLExpression { override def dataType: DataType = throw new UnresolvedException(this, "dataType") - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 18cc648e57d71..ce4aa1c2b7c2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -298,6 +298,9 @@ abstract class Expression extends TreeNode[Expression] { */ trait Unevaluable extends Expression { + /** Unevaluable is not foldable because we don't have an eval for it. */ + final override def foldable: Boolean = false + final override def eval(input: InternalRow = null): Any = throw new UnsupportedOperationException(s"Cannot evaluate expression: $this") @@ -318,7 +321,6 @@ trait Unevaluable extends Expression { */ trait RuntimeReplaceable extends UnaryExpression with Unevaluable { override def nullable: Boolean = child.nullable - override def foldable: Boolean = child.foldable override def dataType: DataType = child.dataType // As this expression gets replaced at optimization with its `child" expression, // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 536276b5cb29f..54259e713accd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -66,9 +66,6 @@ case class SortOrder( sameOrderExpressions: Set[Expression]) extends UnaryExpression with Unevaluable { - /** Sort order is not foldable because we don't have an eval for it. */ - override def foldable: Boolean = false - override def checkInputDataTypes(): TypeCheckResult = { if (RowOrdering.isOrderable(dataType)) { TypeCheckResult.TypeCheckSuccess diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala index 26367cc058bfa..421b8ee2a25b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode} import org.apache.spark.sql.types._ /** The mode of an [[AggregateFunction]]. */ @@ -133,7 +133,6 @@ case class AggregateExpression( override def children: Seq[Expression] = aggregateFunction +: filter.toSeq override def dataType: DataType = aggregateFunction.dataType - override def foldable: Boolean = false override def nullable: Boolean = aggregateFunction.nullable @transient @@ -374,8 +373,7 @@ abstract class ImperativeAggregate extends AggregateFunction with CodegenFallbac */ abstract class DeclarativeAggregate extends AggregateFunction - with Serializable - with Unevaluable { + with Serializable { /** * Expressions for initializing empty aggregation buffers. @@ -421,6 +419,12 @@ abstract class DeclarativeAggregate /** Represents this attribute at the input buffer side (the data value is read-only). */ def right: AttributeReference = inputAggBufferAttributes(aggBufferAttributes.indexOf(a)) } + + final override def eval(input: InternalRow = null): Any = + throw new UnsupportedOperationException(s"Cannot evaluate expression: $this") + + final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + throw new UnsupportedOperationException(s"Cannot generate code for expression: $this") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 42e4d3ec6df57..c1471455b58c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -304,7 +304,6 @@ case class MapFromArrays(left: Expression, right: Expression) */ case object NamePlaceholder extends LeafExpression with Unevaluable { override lazy val resolved: Boolean = false - override def foldable: Boolean = false override def nullable: Boolean = false override def dataType: DataType = StringType override def prettyName: String = "NamePlaceholder" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 2458a4aaba650..1eec26c8e987a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -113,7 +113,6 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa since = "1.6.0") case class CurrentDatabase() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType - override def foldable: Boolean = true override def nullable: Boolean = false override def prettyName: String = "current_database" } @@ -131,7 +130,6 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable { since = "3.1.0") case class CurrentCatalog() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType - override def foldable: Boolean = true override def nullable: Boolean = false override def prettyName: String = "current_catalog" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index ddc4d8c0d39b6..1f55045dbca74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -337,7 +337,6 @@ case class InSubquery(values: Seq[Expression], query: ListQuery) override def children: Seq[Expression] = values :+ query override def nullable: Boolean = children.exists(_.nullable) - override def foldable: Boolean = children.forall(_.foldable) override def toString: String = s"$value IN ($query)" override def sql: String = s"(${value.sql} IN (${query.sql}))" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 1a35a52098f4d..8e3702c157a3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -50,7 +50,6 @@ case class WindowSpecDefinition( frameSpecification.isInstanceOf[SpecifiedWindowFrame] override def nullable: Boolean = true - override def foldable: Boolean = false override def dataType: DataType = throw new UnsupportedOperationException("dataType") override def checkInputDataTypes(): TypeCheckResult = { @@ -144,7 +143,6 @@ case object RangeFrame extends FrameType { sealed trait SpecialFrameBoundary extends Expression with Unevaluable { override def children: Seq[Expression] = Nil override def dataType: DataType = NullType - override def foldable: Boolean = false override def nullable: Boolean = false } @@ -168,7 +166,6 @@ case object CurrentRow extends SpecialFrameBoundary { sealed trait WindowFrame extends Expression with Unevaluable { override def children: Seq[Expression] = Nil override def dataType: DataType = throw new UnsupportedOperationException("dataType") - override def foldable: Boolean = false override def nullable: Boolean = false } @@ -275,7 +272,6 @@ case class UnresolvedWindowExpression( windowSpec: WindowSpecReference) extends UnaryExpression with Unevaluable { override def dataType: DataType = throw new UnresolvedException(this, "dataType") - override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } @@ -287,7 +283,6 @@ case class WindowExpression( override def children: Seq[Expression] = windowFunction :: windowSpec :: Nil override def dataType: DataType = windowFunction.dataType - override def foldable: Boolean = windowFunction.foldable override def nullable: Boolean = windowFunction.nullable override def toString: String = s"$windowFunction $windowSpec" @@ -370,8 +365,11 @@ abstract class OffsetWindowFunction * OffsetWindowFunction is executed, the input expression and the default expression. Even when * both the input and the default expression are foldable, the result is still not foldable due to * the frame. + * + * Note, the value of foldable is set to false in the trait Unevaluable + * + * override def foldable: Boolean = false */ - override def foldable: Boolean = false override def nullable: Boolean = default == null || default.nullable || input.nullable diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index fa0a10c3a5a45..475eb7d74773d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -348,7 +348,6 @@ case class MergeIntoTable( sealed abstract class MergeAction extends Expression with Unevaluable { def condition: Option[Expression] - override def foldable: Boolean = false override def nullable: Boolean = false override def dataType: DataType = throw new UnresolvedException(this, "nullable") override def children: Seq[Expression] = condition.toSeq @@ -369,7 +368,6 @@ case class InsertAction( } case class Assignment(key: Expression, value: Expression) extends Expression with Unevaluable { - override def foldable: Boolean = false override def nullable: Boolean = false override def dataType: DataType = throw new UnresolvedException(this, "nullable") override def children: Seq[Expression] = key :: value :: Nil From 6c805470a7e8d1f44747dc64c2e49ebd302f9ba4 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 25 Sep 2020 16:36:15 -0700 Subject: [PATCH 0110/1009] [SPARK-32997][K8S] Support dynamic PVC creation and deletion in K8s driver ### What changes were proposed in this pull request? This PR aims to support dynamic PVC creation and deletion in K8s driver. **Configuration** This PR reuses the existing PVC volume configs. ``` spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp2 spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=200Gi spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false ``` **PVC** ``` $ kubectl get pvc | grep driver tpcds-d6087874c6705564-driver-pvc-0 Bound pvc-fae914a2-ca5c-4e1e-8aba-54a35357d072 200Gi RWO gp2 12m ``` **Disk** ``` $ k exec -it tpcds-d6087874c6705564-driver -- df -h | grep data /dev/nvme5n1 197G 61M 197G 1% /data ``` ``` $ k exec -it tpcds-d6087874c6705564-driver -- ls -al /data total 28 drwxr-xr-x 5 root root 4096 Sep 25 18:06 . drwxr-xr-x 1 root root 63 Sep 25 18:06 .. drwxr-xr-x 66 root root 4096 Sep 25 18:09 blockmgr-2c9a8cc5-a05c-45fe-a58e-b8f42da88a57 drwx------ 2 root root 16384 Sep 25 18:06 lost+found drwx------ 4 root root 4096 Sep 25 18:07 spark-0448efe7-da2c-4f3a-bd3c-769aadb11dd6 ``` **NOTE** This should be used carefully because Apache Spark doesn't delete driver pod automatically. Since the driver PVC shares the lifecycle of driver pod, it will exist after the job completion until the pod deletion. However, if the users are already using pre-populated PVCs, this isn't a regression at all in terms of the cost. ``` $ k get pod -l spark-role=driver NAME READY STATUS RESTARTS AGE tpcds-d6087874c6705564-driver 0/1 Completed 0 35m ``` ### Why are the changes needed? Like executors, driver also needs larger PVC. ### Does this PR introduce _any_ user-facing change? Yes. This is a new feature. ### How was this patch tested? Pass the newly added test case. Closes #29873 from dongjoon-hyun/SPARK-32997. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../features/MountVolumesFeatureStep.scala | 38 +++++++++---------- .../MountVolumesFeatureStepSuite.scala | 17 +++++++++ 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala index 788ddeaf51cba..e297656520200 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala @@ -66,32 +66,30 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf) case KubernetesPVCVolumeConf(claimNameTemplate, storageClass, size) => val claimName = conf match { case c: KubernetesExecutorConf => - val claimName = claimNameTemplate + claimNameTemplate .replaceAll(PVC_ON_DEMAND, s"${conf.resourceNamePrefix}-exec-${c.executorId}$PVC_POSTFIX-$i") .replaceAll(ENV_EXECUTOR_ID, c.executorId) - - if (storageClass.isDefined && size.isDefined) { - additionalResources.append(new PersistentVolumeClaimBuilder() - .withKind(PVC) - .withApiVersion("v1") - .withNewMetadata() - .withName(claimName) - .endMetadata() - .withNewSpec() - .withStorageClassName(storageClass.get) - .withAccessModes(PVC_ACCESS_MODE) - .withResources(new ResourceRequirementsBuilder() - .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build()) - .endSpec() - .build()) - } - - claimName - case _ => claimNameTemplate + .replaceAll(PVC_ON_DEMAND, s"${conf.resourceNamePrefix}-driver$PVC_POSTFIX-$i") } + if (storageClass.isDefined && size.isDefined) { + additionalResources.append(new PersistentVolumeClaimBuilder() + .withKind(PVC) + .withApiVersion("v1") + .withNewMetadata() + .withName(claimName) + .endMetadata() + .withNewSpec() + .withStorageClassName(storageClass.get) + .withAccessModes(PVC_ACCESS_MODE) + .withResources(new ResourceRequirementsBuilder() + .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build()) + .endSpec() + .build()) + } + new VolumeBuilder() .withPersistentVolumeClaim( new PersistentVolumeClaimVolumeSource(claimName, spec.mountReadOnly)) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala index df7616271681d..e95af264d09ec 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala @@ -89,6 +89,23 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite { assert(executorPVC.getClaimName === s"pvc-spark-${KubernetesTestConf.EXECUTOR_ID}") } + test("Create and mounts persistentVolumeClaims in driver") { + val volumeConf = KubernetesVolumeSpec( + "testVolume", + "/tmp", + "", + true, + KubernetesPVCVolumeConf("OnDemand") + ) + val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf)) + val step = new MountVolumesFeatureStep(kubernetesConf) + val configuredPod = step.configurePod(SparkPod.initialPod()) + + assert(configuredPod.pod.getSpec.getVolumes.size() === 1) + val pvcClaim = configuredPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim + assert(pvcClaim.getClaimName.endsWith("-driver-pvc-0")) + } + test("Create and mount persistentVolumeClaims in executors") { val volumeConf = KubernetesVolumeSpec( "testVolume", From 934a91fcb4de1e5c4b93b58e7452afa4bb4a9586 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Sat, 26 Sep 2020 08:16:39 -0500 Subject: [PATCH 0111/1009] [SPARK-21481][ML][FOLLOWUP][TRIVIAL] HashingTF use util.collection.OpenHashMap instead of mutable.HashMap ### What changes were proposed in this pull request? `HashingTF` use `util.collection.OpenHashMap` instead of `mutable.HashMap` ### Why are the changes needed? according to `util.collection.OpenHashMap` 's doc: > This map is about 5X faster than java.util.HashMap, while using much less space overhead. according to performance tests like ([Simple microbenchmarks comparing Scala vs Java mutable map performance ](https://gist.github.com/pchiusano/1423303)), `mutable.HashMap` maybe more inefficient than `java.util.HashMap` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #29852 from zhengruifeng/hashingtf_opt. Authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../apache/spark/ml/feature/HashingTF.scala | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index d2bb013448aae..f4223bc85943d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml.feature -import scala.collection.mutable - import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup @@ -32,6 +30,7 @@ import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{ArrayType, StructType} import org.apache.spark.util.Utils import org.apache.spark.util.VersionUtils.majorMinorVersion +import org.apache.spark.util.collection.OpenHashMap /** * Maps a sequence of terms to their term frequencies using the hashing trick. @@ -91,20 +90,13 @@ class HashingTF @Since("3.0.0") private[ml] ( @Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { val outputSchema = transformSchema(dataset.schema) - val localNumFeatures = $(numFeatures) - val localBinary = $(binary) + val n = $(numFeatures) + val updateFunc = if ($(binary)) (v: Double) => 1.0 else (v: Double) => v + 1.0 val hashUDF = udf { terms: Seq[_] => - val termFrequencies = mutable.HashMap.empty[Int, Double].withDefaultValue(0.0) - terms.foreach { term => - val i = indexOf(term) - if (localBinary) { - termFrequencies(i) = 1.0 - } else { - termFrequencies(i) += 1.0 - } - } - Vectors.sparse(localNumFeatures, termFrequencies.toSeq) + val map = new OpenHashMap[Int, Double]() + terms.foreach { term => map.changeValue(indexOf(term), 1.0, updateFunc) } + Vectors.sparse(n, map.toSeq) } dataset.withColumn($(outputCol), hashUDF(col($(inputCol))), From 9a155d42a3202fbafc48f8b722bbc27cce522e11 Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Sat, 26 Sep 2020 16:03:59 -0700 Subject: [PATCH 0112/1009] [SPARK-32999][SQL] Use Utils.getSimpleName to avoid hitting Malformed class name in TreeNode ### What changes were proposed in this pull request? Use `Utils.getSimpleName` to avoid hitting `Malformed class name` error in `TreeNode`. ### Why are the changes needed? On older JDK versions (e.g. JDK8u), nested Scala classes may trigger `java.lang.Class.getSimpleName` to throw an `java.lang.InternalError: Malformed class name` error. Similar to https://github.com/apache/spark/pull/29050, we should use Spark's `Utils.getSimpleName` utility function in place of `Class.getSimpleName` to avoid hitting the issue. ### Does this PR introduce _any_ user-facing change? Fixes a bug that throws an error when invoking `TreeNode.nodeName`, otherwise no changes. ### How was this patch tested? Added new unit test case in `TreeNodeSuite`. Note that the test case assumes the test code can trigger the expected error, otherwise it'll skip the test safely, for compatibility with newer JDKs. Manually tested on JDK8u and JDK11u and observed expected behavior: - JDK8u: the test case triggers the "Malformed class name" issue and the fix works; - JDK11u: the test case does not trigger the "Malformed class name" issue, and the test case is safely skipped. Closes #29875 from rednaxelafx/spark-32999-getsimplename. Authored-by: Kris Mok Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/trees/TreeNode.scala | 9 ++++--- .../sql/catalyst/trees/TreeNodeSuite.scala | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 8003012f30ca5..1ab7bbdcff697 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils /** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given number */ private class MutableInt(var i: Int) @@ -521,11 +522,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { mapChildren(_.clone(), forceCopy = true) } + private def simpleClassName: String = Utils.getSimpleName(this.getClass) + /** * Returns the name of this type of TreeNode. Defaults to the class name. * Note that we remove the "Exec" suffix for physical operators here. */ - def nodeName: String = getClass.getSimpleName.replaceAll("Exec$", "") + def nodeName: String = simpleClassName.replaceAll("Exec$", "") /** * The arguments that should be included in the arg string. Defaults to the `productIterator`. @@ -747,7 +750,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { protected def jsonFields: List[JField] = { val fieldNames = getConstructorParameterNames(getClass) val fieldValues = productIterator.toSeq ++ otherCopyArgs - assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " + + assert(fieldNames.length == fieldValues.length, s"$simpleClassName fields: " + fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", ")) fieldNames.zip(fieldValues).map { @@ -801,7 +804,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { try { val fieldNames = getConstructorParameterNames(p.getClass) val fieldValues = p.productIterator.toSeq - assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " + + assert(fieldNames.length == fieldValues.length, s"$simpleClassName fields: " + fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", ")) ("product-class" -> JString(p.getClass.getName)) :: fieldNames.zip(fieldValues).map { case (name, value) => name -> parseToJson(value) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index ff51bc0071c80..4ad8475a0113c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -736,4 +736,30 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { assertDifferentInstance(leaf, leafCloned) assert(leaf.child.eq(leafCloned.asInstanceOf[FakeLeafPlan].child)) } + + object MalformedClassObject extends Serializable { + case class MalformedNameExpression(child: Expression) extends TaggingExpression + } + + test("SPARK-32999: TreeNode.nodeName should not throw malformed class name error") { + val testTriggersExpectedError = try { + classOf[MalformedClassObject.MalformedNameExpression].getSimpleName + false + } catch { + case ex: java.lang.InternalError if ex.getMessage.contains("Malformed class name") => + true + case ex: Throwable => throw ex + } + // This test case only applies on older JDK versions (e.g. JDK8u), and doesn't trigger the + // issue on newer JDK versions (e.g. JDK11u). + assume(testTriggersExpectedError, "the test case didn't trigger malformed class name error") + + val expr = MalformedClassObject.MalformedNameExpression(Literal(1)) + try { + expr.nodeName + } catch { + case ex: java.lang.InternalError if ex.getMessage.contains("Malformed class name") => + fail("TreeNode.nodeName should not throw malformed class name error") + } + } } From 0c38765b297337c3d80496db09ae7f79d2acf778 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Sun, 27 Sep 2020 09:35:05 +0800 Subject: [PATCH 0113/1009] [SPARK-32974][ML] FeatureHasher transform optimization ### What changes were proposed in this pull request? pre-compute the output indices of numerical columns, instead of computing them on each row. ### Why are the changes needed? for a numerical column, its output index is a hash of its `col_name`, we can pre-compute it at first, instead of computing it on each row. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #29850 from zhengruifeng/hash_opt. Authored-by: zhengruifeng Signed-off-by: zhengruifeng --- .../spark/ml/feature/FeatureHasher.scala | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index 39862554c5d8d..0bb0b05322873 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -125,19 +125,24 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme @Since("2.3.0") override def transform(dataset: Dataset[_]): DataFrame = { + val outputSchema = transformSchema(dataset.schema) val hashFunc: Any => Int = FeatureHasher.murmur3Hash + val n = $(numFeatures) val localInputCols = $(inputCols) - val catCols = if (isSet(categoricalCols)) { - $(categoricalCols).toSet - } else { - Set[String]() + + var catCols = dataset.schema(localInputCols.toSet) + .filterNot(_.dataType.isInstanceOf[NumericType]).map(_.name).toArray + if (isSet(categoricalCols)) { + // categoricalCols may contain columns not set in inputCols + catCols = (catCols ++ $(categoricalCols).intersect(localInputCols)).distinct } + val catIndices = catCols.map(c => localInputCols.indexOf(c)) - val outputSchema = transformSchema(dataset.schema) - val realFields = outputSchema.fields.filter { f => - f.dataType.isInstanceOf[NumericType] && !catCols.contains(f.name) - }.map(_.name).toSet + val realCols = (localInputCols.toSet -- catCols).toArray + val realIndices = realCols.map(c => localInputCols.indexOf(c)) + // pre-compute output indices of real columns + val realOutputIndices = realCols.map(c => Utils.nonNegativeMod(hashFunc(c), n)) def getDouble(x: Any): Double = { x match { @@ -151,33 +156,38 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme val hashFeatures = udf { row: Row => val map = new OpenHashMap[Int, Double]() - localInputCols.foreach { colName => - val fieldIndex = row.fieldIndex(colName) - if (!row.isNullAt(fieldIndex)) { - val (rawIdx, value) = if (realFields(colName)) { - // numeric values are kept as is, with vector index based on hash of "column_name" - val value = getDouble(row.get(fieldIndex)) - val hash = hashFunc(colName) - (hash, value) - } else { - // string, boolean and numeric values that are in catCols are treated as categorical, - // with an indicator value of 1.0 and vector index based on hash of "column_name=value" - val value = row.get(fieldIndex).toString - val fieldName = s"$colName=$value" - val hash = hashFunc(fieldName) - (hash, 1.0) - } - val idx = Utils.nonNegativeMod(rawIdx, n) + + var i = 0 + while (i < realIndices.length) { + val realIdx = realIndices(i) + if (!row.isNullAt(realIdx)) { + // numeric values are kept as is, with vector index based on hash of "column_name" + val value = getDouble(row.get(realIdx)) + val idx = realOutputIndices(i) map.changeValue(idx, value, v => v + value) } + i += 1 } + + i = 0 + while (i < catIndices.length) { + val catIdx = catIndices(i) + if (!row.isNullAt(catIdx)) { + // string, boolean and numeric values that are in catCols are treated as categorical, + // with an indicator value of 1.0 and vector index based on hash of "column_name=value" + val string = row.get(catIdx).toString + val rawIdx = hashFunc(s"${catCols(i)}=$string") + val idx = Utils.nonNegativeMod(rawIdx, n) + map.changeValue(idx, 1.0, v => v + 1.0) + } + i += 1 + } + Vectors.sparse(n, map.toSeq) } val metadata = outputSchema($(outputCol)).metadata - dataset.select( - col("*"), - hashFeatures(struct($(inputCols).map(col): _*)).as($(outputCol), metadata)) + dataset.withColumn($(outputCol), hashFeatures(struct($(inputCols).map(col): _*)), metadata) } @Since("2.3.0") From c65b64552f947a7eaf4f379edbdce05daa923363 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 27 Sep 2020 16:21:23 +0900 Subject: [PATCH 0114/1009] [SPARK-32714][FOLLOW-UP][PYTHON] Address pyspark.install typing errors ### What changes were proposed in this pull request? This PR adds two `type: ignores`, one in `pyspark.install` and one in related tests. ### Why are the changes needed? To satisfy MyPy type checks. It seems like we originally missed some changes that happened around merge of https://github.com/apache/spark/commit/31a16fbb405a19dc3eb732347e0e1f873b16971d ``` python/pyspark/install.py:30: error: Need type annotation for 'UNSUPPORTED_COMBINATIONS' (hint: "UNSUPPORTED_COMBINATIONS: List[] = ...") [var-annotated] python/pyspark/tests/test_install_spark.py:105: error: Cannot find implementation or library stub for module named 'xmlrunner' [import] python/pyspark/tests/test_install_spark.py:105: note: See https://mypy.readthedocs.io/en/latest/running_mypy.html#missing-imports ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Existing tests. - MyPy tests ``` mypy --show-error-code --no-incremental --config python/mypy.ini python/pyspark ``` Closes #29878 from zero323/SPARK-32714-FOLLOW-UP. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/install.py | 2 +- python/pyspark/tests/test_install_spark.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/install.py b/python/pyspark/install.py index 84dd2c9964563..2de7b21832abf 100644 --- a/python/pyspark/install.py +++ b/python/pyspark/install.py @@ -27,7 +27,7 @@ DEFAULT_HIVE = "hive2.3" SUPPORTED_HADOOP_VERSIONS = ["hadoop2.7", "hadoop3.2", "without-hadoop"] SUPPORTED_HIVE_VERSIONS = ["hive2.3"] -UNSUPPORTED_COMBINATIONS = [ +UNSUPPORTED_COMBINATIONS = [ # type: ignore ] diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py index 6f9949aa8b2e0..f761e0088cd77 100644 --- a/python/pyspark/tests/test_install_spark.py +++ b/python/pyspark/tests/test_install_spark.py @@ -102,7 +102,7 @@ def test_checked_versions(self): from pyspark.tests.test_install_spark import * # noqa: F401 try: - import xmlrunner + import xmlrunner # type: ignore[import] testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) except ImportError: testRunner = None From bc77e5b840b2feb18a9c8a61dfe75f421e5b64ca Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Sun, 27 Sep 2020 10:26:05 -0500 Subject: [PATCH 0115/1009] [SPARK-32973][ML][DOC] FeatureHasher does not check categoricalCols in inputCols ### What changes were proposed in this pull request? 1, update the comment: `Note, the relevant columns must also be set in inputCols` -> `Note, the relevant columns should also be set in inputCols`; 2, add a check, and if there are `categoricalCols` not set in `inputCols`, log.warn it; ### Why are the changes needed? 1, there is no check to make sure `categoricalCols` are all set in `inputCols`, to keep existing behavior, update this comments; ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? repl Closes #29868 from zhengruifeng/feature_hash_cat_doc. Authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../org/apache/spark/ml/feature/FeatureHasher.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index 0bb0b05322873..f1268bdf6bd89 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -91,8 +91,8 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme /** * Numeric columns to treat as categorical features. By default only string and boolean * columns are treated as categorical, so this param can be used to explicitly specify the - * numerical columns to treat as categorical. Note, the relevant columns must also be set in - * `inputCols`. + * numerical columns to treat as categorical. Note, the relevant columns should also be set in + * `inputCols`, categorical columns not set in `inputCols` will be listed in a warning. * @group param */ @Since("2.3.0") @@ -195,7 +195,14 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme @Since("2.3.0") override def transformSchema(schema: StructType): StructType = { - val fields = schema($(inputCols).toSet) + val localInputCols = $(inputCols).toSet + if (isSet(categoricalCols)) { + val set = $(categoricalCols).filterNot(c => localInputCols.contains(c)) + if (set.nonEmpty) { + log.warn(s"categoricalCols ${set.mkString("[", ",", "]")} do not exist in inputCols") + } + } + val fields = schema(localInputCols) fields.foreach { fieldSchema => val dataType = fieldSchema.dataType val fieldName = fieldSchema.name From bb6d5e7a908dbd0918a9fe50147be7d16a4733f5 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sun, 27 Sep 2020 10:26:51 -0500 Subject: [PATCH 0116/1009] [SPARK-32972][ML] Pass all UTs of `mllib` module in Scala 2.13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? The purpose of this pr is to resolve SPARK-32972, total of 51 Scala failed test cases and 3 Java failed test cases were fixed, the main change of this pr as follow: - Specified `Seq` to `scala.collection.Seq` in case match `Seq` scene and `x.asInstanceOf[Seq[T]]` scene - Use `Row.getSeq[T]` instead of `Row.getAs[Seq]` - Manual call `toMap` method to convert `MapView` to `Map` in Scala 2.13 - Change the tol in the last test to 0.75 to pass `RandomForestRegressorSuite#training with sample weights` in Scala 2.13 ### Why are the changes needed? We need to support a Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: Pass GitHub 2.13 Build Action Do the follow: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl mllib -Pscala-2.13 -am mvn test -pl mllib -Pscala-2.13 -fn ``` **Before** ``` [ERROR] Errors: [ERROR] JavaVectorIndexerSuite.vectorIndexerAPI:51 » ClassCast scala.collection.conver... [ERROR] JavaWord2VecSuite.testJavaWord2Vec:51 » Spark Job aborted due to stage failure... [ERROR] JavaPrefixSpanSuite.runPrefixSpanSaveLoad:79 » Spark Job aborted due to stage ... Tests: succeeded 1567, failed 51, canceled 0, ignored 7, pending 0 *** 51 TESTS FAILED *** ``` **After** ``` [INFO] Tests run: 122, Failures: 0, Errors: 0, Skipped: 0 Tests: succeeded 1617, failed 0, canceled 0, ignored 7, pending 0 All tests passed. ``` Closes #29857 from LuciferYang/fix-mllib-2. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/ml/feature/IDF.scala | 6 +++--- .../scala/org/apache/spark/ml/feature/MinHashLSH.scala | 2 +- .../scala/org/apache/spark/ml/feature/RFormula.scala | 2 +- .../org/apache/spark/ml/feature/StringIndexer.scala | 5 +++-- .../org/apache/spark/ml/feature/VectorIndexer.scala | 2 +- .../scala/org/apache/spark/ml/feature/Word2Vec.scala | 3 ++- .../main/scala/org/apache/spark/ml/fpm/FPGrowth.scala | 2 +- .../main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala | 2 +- .../apache/spark/mllib/classification/NaiveBayes.scala | 4 ++-- .../scala/org/apache/spark/mllib/fpm/PrefixSpan.scala | 2 +- .../mllib/recommendation/MatrixFactorizationModel.scala | 8 ++++---- .../spark/mllib/tree/model/DecisionTreeModel.scala | 2 +- .../scala/org/apache/spark/ml/clustering/LDASuite.scala | 4 ++-- .../ml/feature/BucketedRandomProjectionLSHSuite.scala | 2 +- .../test/scala/org/apache/spark/ml/feature/LSHTest.scala | 3 ++- .../org/apache/spark/ml/feature/MinHashLSHSuite.scala | 2 +- .../scala/org/apache/spark/ml/feature/NGramSuite.scala | 2 +- .../apache/spark/ml/feature/StopWordsRemoverSuite.scala | 8 +++++--- .../scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala | 2 +- .../spark/ml/regression/RandomForestRegressorSuite.scala | 2 +- .../scala/org/apache/spark/ml/util/MLTestSuite.scala | 2 +- .../org/apache/spark/mllib/feature/Word2VecSuite.scala | 9 ++++++--- 22 files changed, 42 insertions(+), 34 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index e6f124ef7d666..e451d4daffbc7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -215,10 +215,10 @@ object IDFModel extends MLReadable[IDFModel] { val data = sparkSession.read.parquet(dataPath) val model = if (majorVersion(metadata.sparkVersion) >= 3) { - val Row(idf: Vector, df: Seq[_], numDocs: Long) = data.select("idf", "docFreq", "numDocs") - .head() + val Row(idf: Vector, df: scala.collection.Seq[_], numDocs: Long) = + data.select("idf", "docFreq", "numDocs").head() new IDFModel(metadata.uid, new feature.IDFModel(OldVectors.fromML(idf), - df.asInstanceOf[Seq[Long]].toArray, numDocs)) + df.asInstanceOf[scala.collection.Seq[Long]].toArray, numDocs)) } else { val Row(idf: Vector) = MLUtils.convertVectorColumnsToML(data, "idf") .select("idf") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala index be467c654aaa1..12cae13174379 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala @@ -224,7 +224,7 @@ object MinHashLSHModel extends MLReadable[MinHashLSHModel] { val dataPath = new Path(path, "data").toString val data = sparkSession.read.parquet(dataPath).select("randCoefficients").head() - val randCoefficients = data.getAs[Seq[Int]](0).grouped(2) + val randCoefficients = data.getSeq[Int](0).grouped(2) .map(tuple => (tuple(0), tuple(1))).toArray val model = new MinHashLSHModel(metadata.uid, randCoefficients) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index b8da020017f12..563e1708acdf1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -449,7 +449,7 @@ object RFormulaModel extends MLReadable[RFormulaModel] { val dataPath = new Path(path, "data").toString val data = sparkSession.read.parquet(dataPath).select("label", "terms", "hasIntercept").head() val label = data.getString(0) - val terms = data.getAs[Seq[Seq[String]]](1) + val terms = data.getSeq[Seq[String]](1) val hasIntercept = data.getBoolean(2) val resolvedRFormula = ResolvedRFormula(label, terms, hasIntercept) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index ab51fe6e78bd7..0ca88b8e61e29 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -220,7 +220,8 @@ class StringIndexer @Since("1.4.0") ( val selectedCols = getSelectedCols(dataset, inputCols).map(collect_set(_)) val allLabels = dataset.select(selectedCols: _*) - .collect().toSeq.flatMap(_.toSeq).asInstanceOf[Seq[Seq[String]]] + .collect().toSeq.flatMap(_.toSeq) + .asInstanceOf[scala.collection.Seq[scala.collection.Seq[String]]].toSeq ThreadUtils.parmap(allLabels, "sortingStringLabels", 8) { labels => val sorted = labels.filter(_ != null).sorted if (ascending) { @@ -522,7 +523,7 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { val data = sparkSession.read.parquet(dataPath) .select("labelsArray") .head() - data.getAs[Seq[Seq[String]]](0).map(_.toArray).toArray + data.getSeq[scala.collection.Seq[String]](0).map(_.toArray).toArray } val model = new StringIndexerModel(metadata.uid, labelsArray) metadata.getAndSetParams(model) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index b7cf4392cd177..874b421387279 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -300,7 +300,7 @@ class VectorIndexerModel private[ml] ( /** Java-friendly version of [[categoryMaps]] */ @Since("1.4.0") def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = { - categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] + categoryMaps.mapValues(_.asJava).toMap.asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 01db39f9e3921..9b5f5a619e02c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -169,7 +169,8 @@ final class Word2Vec @Since("1.4.0") ( @Since("2.0.0") override def fit(dataset: Dataset[_]): Word2VecModel = { transformSchema(dataset.schema, logging = true) - val input = dataset.select($(inputCol)).rdd.map(_.getAs[Seq[String]](0)) + val input = + dataset.select($(inputCol)).rdd.map(_.getSeq[String](0)) val wordVectors = new feature.Word2Vec() .setLearningRate($(stepSize)) .setMinCount($(minCount)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index 7aab4ef62c4d9..8aaa5efdf06c5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -363,7 +363,7 @@ object FPGrowthModel extends MLReadable[FPGrowthModel] { Map.empty[Any, Double] } else { frequentItems.rdd.flatMap { - case Row(items: Seq[_], count: Long) if items.length == 1 => + case Row(items: scala.collection.Seq[_], count: Long) if items.length == 1 => Some(items.head -> count.toDouble / numTrainingRecords) case _ => None }.collectAsMap() diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala index c9c049248f70c..10a569a8ff88b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala @@ -146,7 +146,7 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params val data = dataset.select(sequenceColParam) val sequences = data.where(col(sequenceColParam).isNotNull).rdd - .map(r => r.getAs[Seq[Seq[Any]]](0).map(_.toArray).toArray) + .map(r => r.getSeq[scala.collection.Seq[Any]](0).map(_.toArray).toArray) val mllibPrefixSpan = new mllibPrefixSpan() .setMinSupport($(minSupport)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index 586f622fc47c5..5b13deffcf056 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -216,7 +216,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] { val data = dataArray(0) val labels = data.getAs[Seq[Double]](0).toArray val pi = data.getAs[Seq[Double]](1).toArray - val theta = data.getAs[Seq[Seq[Double]]](2).map(_.toArray).toArray + val theta = data.getSeq[scala.collection.Seq[Double]](2).map(_.toArray).toArray val modelType = data.getString(3) new NaiveBayesModel(labels, pi, theta, modelType) } @@ -260,7 +260,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] { val data = dataArray(0) val labels = data.getAs[Seq[Double]](0).toArray val pi = data.getAs[Seq[Double]](1).toArray - val theta = data.getAs[Seq[Seq[Double]]](2).map(_.toArray).toArray + val theta = data.getSeq[scala.collection.Seq[Double]](2).map(_.toArray).toArray new NaiveBayesModel(labels, pi, theta) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index de3209c34bf07..cd71aac34c268 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -683,7 +683,7 @@ object PrefixSpanModel extends Loader[PrefixSpanModel[_]] { def loadImpl[Item: ClassTag](freqSequences: DataFrame, sample: Item): PrefixSpanModel[Item] = { val freqSequencesRDD = freqSequences.select("sequence", "freq").rdd.map { x => - val sequence = x.getAs[Seq[Seq[Item]]](0).map(_.toArray).toArray + val sequence = x.getSeq[scala.collection.Seq[Item]](0).map(_.toArray).toArray val freq = x.getLong(1) new PrefixSpan.FreqSequence(sequence, freq) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index e5e82d19f1cbd..d79314b9637a8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -386,12 +386,12 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] { assert(formatVersion == thisFormatVersion) val rank = (metadata \ "rank").extract[Int] val userFeatures = spark.read.parquet(userPath(path)).rdd.map { - case Row(id: Int, features: Seq[_]) => - (id, features.asInstanceOf[Seq[Double]].toArray) + case Row(id: Int, features: scala.collection.Seq[_]) => + (id, features.asInstanceOf[scala.collection.Seq[Double]].toArray) } val productFeatures = spark.read.parquet(productPath(path)).rdd.map { - case Row(id: Int, features: Seq[_]) => - (id, features.asInstanceOf[Seq[Double]].toArray) + case Row(id: Int, features: scala.collection.Seq[_]) => + (id, features.asInstanceOf[scala.collection.Seq[Double]].toArray) } new MatrixFactorizationModel(rank, userFeatures, productFeatures) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 9983ca7dc5e87..cdc998000c2fc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -164,7 +164,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { } def apply(r: Row): SplitData = { - SplitData(r.getInt(0), r.getDouble(1), r.getInt(2), r.getAs[Seq[Double]](3)) + SplitData(r.getInt(0), r.getDouble(1), r.getInt(2), r.getSeq[Double](3)) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala index d0898220b80de..e05d76cf70ed3 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala @@ -199,11 +199,11 @@ class LDASuite extends MLTest with DefaultReadWriteTest { assert(topics.count() === k) assert(topics.select("topic").rdd.map(_.getInt(0)).collect().toSet === Range(0, k).toSet) topics.select("termIndices").collect().foreach { case r: Row => - val termIndices = r.getAs[Seq[Int]](0) + val termIndices = r.getSeq[Int](0) assert(termIndices.length === 3 && termIndices.toSet.size === 3) } topics.select("termWeights").collect().foreach { case r: Row => - val termWeights = r.getAs[Seq[Double]](0) + val termWeights = r.getSeq[Double](0) assert(termWeights.length === 3 && termWeights.forall(w => w >= 0.0 && w <= 1.0)) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala index 9b823259b1deb..a7d320e8164b6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala @@ -115,7 +115,7 @@ class BucketedRandomProjectionLSHSuite extends MLTest with DefaultReadWriteTest val brpModel = brp.fit(dataset) testTransformer[Tuple1[Vector]](dataset.toDF(), brpModel, "values") { - case Row(values: Seq[_]) => + case Row(values: scala.collection.Seq[_]) => assert(values.length === brp.getNumHashTables) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala index 1d052fbebd92d..93564681994d7 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala @@ -71,7 +71,8 @@ private[ml] object LSHTest { transformedData.schema, model.getOutputCol, DataTypes.createArrayType(new VectorUDT)) // Check output column dimensions - val headHashValue = transformedData.select(outputCol).head().get(0).asInstanceOf[Seq[Vector]] + val headHashValue = + transformedData.select(outputCol).head().get(0).asInstanceOf[scala.collection.Seq[Vector]] assert(headHashValue.length == model.getNumHashTables) // Perform a cross join and label each pair of same_bucket and distance diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala index 1c2956cb82908..c99e0fa3f8623 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala @@ -182,7 +182,7 @@ class MinHashLSHSuite extends MLTest with DefaultReadWriteTest { val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0))) model.set(model.inputCol, "keys") testTransformer[Tuple1[Vector]](dataset.toDF(), model, "keys", model.getOutputCol) { - case Row(_: Vector, output: Seq[_]) => + case Row(_: Vector, output: scala.collection.Seq[_]) => assert(output.length === model.randCoefficients.length) // no AND-amplification yet: SPARK-18450, so each hash output is of length 1 output.foreach { diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala index 1483d5df4d224..bf276ceed2097 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala @@ -83,7 +83,7 @@ class NGramSuite extends MLTest with DefaultReadWriteTest { def testNGram(t: NGram, dataFrame: DataFrame): Unit = { testTransformer[(Seq[String], Seq[String])](dataFrame, t, "nGrams", "wantedNGrams") { - case Row(actualNGrams : Seq[_], wantedNGrams: Seq[_]) => + case Row(actualNGrams : scala.collection.Seq[_], wantedNGrams: scala.collection.Seq[_]) => assert(actualNGrams === wantedNGrams) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala index dc6fb31a1f8e4..eaf91769a08dd 100755 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala @@ -29,7 +29,7 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest { def testStopWordsRemover(t: StopWordsRemover, dataFrame: DataFrame): Unit = { testTransformer[(Array[String], Array[String])](dataFrame, t, "filtered", "expected") { - case Row(tokens: Seq[_], wantedTokens: Seq[_]) => + case Row(tokens: scala.collection.Seq[_], wantedTokens: scala.collection.Seq[_]) => assert(tokens === wantedTokens) } } @@ -242,7 +242,8 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest { remover.transform(df) .select("filtered1", "expected1", "filtered2", "expected2") .collect().foreach { - case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) => + case Row(r1: scala.collection.Seq[_], e1: scala.collection.Seq[_], + r2: scala.collection.Seq[_], e2: scala.collection.Seq[_]) => assert(r1 === e1, s"The result value is not correct after bucketing. Expected $e1 but found $r1") assert(r2 === e2, @@ -268,7 +269,8 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest { remover.transform(df) .select("filtered1", "expected1", "filtered2", "expected2") .collect().foreach { - case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) => + case Row(r1: scala.collection.Seq[_], e1: scala.collection.Seq[_], + r2: scala.collection.Seq[_], e2: scala.collection.Seq[_]) => assert(r1 === e1, s"The result value is not correct after bucketing. Expected $e1 but found $r1") assert(r2 === e2, diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala index d42ced0f8f91b..3d994366b8918 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala @@ -121,7 +121,7 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul val prediction = model.transform( spark.createDataFrame(Seq(Tuple1(Array("1", "2")))).toDF("items") - ).first().getAs[Seq[String]]("prediction") + ).first().getAs[scala.collection.Seq[String]]("prediction") assert(prediction === Seq("3")) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala index 31dc6d379e76c..aeddb5ac7b13e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala @@ -175,7 +175,7 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{ val testParams = Seq( (50, 5, 1.0, 0.75), (50, 10, 1.0, 0.75), - (50, 10, 0.95, 0.78) + (50, 10, 0.95, 0.75) ) for ((numTrees, maxDepth, subsamplingRate, tol) <- testParams) { diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala index 20c5b5395f6a4..1732469ccf590 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala @@ -47,7 +47,7 @@ class MLTestSuite extends MLTest { } intercept[Exception] { testTransformerOnStreamData[(Int, String)](data, indexerModel, "id", "indexed") { - rows: Seq[Row] => + rows: scala.collection.Seq[Row] => assert(rows.map(_.getDouble(1)).max === 1.0) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala index a679fe43414f2..e4cd492be3d2e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala @@ -43,7 +43,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { // and a Word2VecMap give the same values. val word2VecMap = model.getVectors val newModel = new Word2VecModel(word2VecMap) - assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq)) + assert(newModel.getVectors.mapValues(_.toSeq).toMap === + word2VecMap.mapValues(_.toSeq).toMap) } test("Word2Vec throws exception when vocabulary is empty") { @@ -102,7 +103,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { try { model.save(sc, path) val sameModel = Word2VecModel.load(sc, path) - assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq)) + assert(sameModel.getVectors.mapValues(_.toSeq).toMap === + model.getVectors.mapValues(_.toSeq).toMap) } finally { Utils.deleteRecursively(tempDir) } @@ -136,7 +138,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { try { model.save(sc, path) val sameModel = Word2VecModel.load(sc, path) - assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq)) + assert(sameModel.getVectors.mapValues(_.toSeq).toMap === + model.getVectors.mapValues(_.toSeq).toMap) } catch { case t: Throwable => fail("exception thrown persisting a model " + From f41ba2a2f3b86e485aa0ca1c10a2efe9a7163fb3 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Mon, 28 Sep 2020 12:22:15 +0900 Subject: [PATCH 0117/1009] [SPARK-32927][SQL] Bitwise OR, AND and XOR should have similar canonicalization rules to boolean OR and AND ### What changes were proposed in this pull request? Add canonicalization rules for commutative bitwise operations. ### Why are the changes needed? Canonical form is used in many other optimization rules. Reduces the number of cases, where plans with identical results are considered to be distinct. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #29794 from tanelk/SPARK-32927. Lead-authored-by: tanel.kiis@gmail.com Co-authored-by: Tanel Kiis Signed-off-by: HyukjinKwon --- .../catalyst/expressions/Canonicalize.scala | 7 +++ .../expressions/CanonicalizeSuite.scala | 47 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index a8031086d82f7..1ecf4372cfb58 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -80,6 +80,13 @@ object Canonicalize { orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)}) .reduce(And) + case o: BitwiseOr => + orderCommutative(o, { case BitwiseOr(l, r) => Seq(l, r) }).reduce(BitwiseOr) + case a: BitwiseAnd => + orderCommutative(a, { case BitwiseAnd(l, r) => Seq(l, r) }).reduce(BitwiseAnd) + case x: BitwiseXor => + orderCommutative(x, { case BitwiseXor(l, r) => Seq(l, r) }).reduce(BitwiseXor) + case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l) case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala index a043b4cbed1f1..bcbccd93e509f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import java.util.TimeZone import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.Range import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType} @@ -95,4 +96,50 @@ class CanonicalizeSuite extends SparkFunSuite { val castWithTimeZoneId = Cast(literal, LongType, Some(TimeZone.getDefault.getID)) assert(castWithTimeZoneId.semanticEquals(cast)) } + + test("SPARK-32927: Bitwise operations are commutative") { + Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f => + val e1 = f('a, f('b, 'c)) + val e2 = f(f('a, 'b), 'c) + val e3 = f('a, f('b, 'a)) + + assert(e1.canonicalized == e2.canonicalized) + assert(e1.canonicalized != e3.canonicalized) + } + } + + test("SPARK-32927: Bitwise operations are commutative for non-deterministic expressions") { + Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f => + val e1 = f('a, f(rand(42), 'c)) + val e2 = f(f('a, rand(42)), 'c) + val e3 = f('a, f(rand(42), 'a)) + + assert(e1.canonicalized == e2.canonicalized) + assert(e1.canonicalized != e3.canonicalized) + } + } + + test("SPARK-32927: Bitwise operations are commutative for literal expressions") { + Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f => + val e1 = f('a, f(42, 'c)) + val e2 = f(f('a, 42), 'c) + val e3 = f('a, f(42, 'a)) + + assert(e1.canonicalized == e2.canonicalized) + assert(e1.canonicalized != e3.canonicalized) + } + } + + test("SPARK-32927: Bitwise operations are commutative in a complex case") { + Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f1 => + Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f2 => + val e1 = f2(f1('a, f1('b, 'c)), 'a) + val e2 = f2(f1(f1('a, 'b), 'c), 'a) + val e3 = f2(f1('a, f1('b, 'a)), 'a) + + assert(e1.canonicalized == e2.canonicalized) + assert(e1.canonicalized != e3.canonicalized) + } + } + } } From a7f84a0b457ed3e1b854729f132e218a4ae48b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20H=C3=B6ring?= Date: Mon, 28 Sep 2020 12:30:28 +0900 Subject: [PATCH 0118/1009] [SPARK-32187][PYTHON][DOCS] Doc on Python packaging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to document PySpark specific packaging guidelines. ### Why are the changes needed? To have a single place for PySpark users, and better documentation. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? ``` cd python/docs make clean html ``` Closes #29806 from fhoering/add_doc_python_packaging. Lead-authored-by: Fabian Höring Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- python/docs/source/user_guide/index.rst | 1 + .../source/user_guide/python_packaging.rst | 201 ++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 python/docs/source/user_guide/python_packaging.rst diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst index c39feace05209..3e535ce16b22e 100644 --- a/python/docs/source/user_guide/index.rst +++ b/python/docs/source/user_guide/index.rst @@ -24,4 +24,5 @@ User Guide :maxdepth: 2 arrow_pandas + python_packaging diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst new file mode 100644 index 0000000000000..ef4d05a8eefea --- /dev/null +++ b/python/docs/source/user_guide/python_packaging.rst @@ -0,0 +1,201 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +========================= +3rd Party Python Packages +========================= + +When you want to run your PySpark application on a cluster such as YARN, Kubernetes, Mesos, etc., you need to make +sure that your code and all used libraries are available on the executors. + +As an example let's say you may want to run the `Pandas UDF's examples `_. +As it uses pyarrow as an underlying implementation we need to make sure to have pyarrow installed on each executor +on the cluster. Otherwise you may get errors such as ``ModuleNotFoundError: No module named 'pyarrow'``. + +Here is the script ``app.py`` from the previous example that will be executed on the cluster: + +.. code-block:: python + + import pandas as pd + from pyspark.sql.functions import pandas_udf + from pyspark.sql import SparkSession + + def main(spark): + df = spark.createDataFrame( + [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], + ("id", "v")) + + @pandas_udf("double") + def mean_udf(v: pd.Series) -> float: + return v.mean() + + print(df.groupby("id").agg(mean_udf(df['v'])).collect()) + + + if __name__ == "__main__": + main(SparkSession.builder.getOrCreate()) + + +There are multiple ways to ship the dependencies to the cluster: + +- Using PySpark Native Features +- Using Zipped Virtual Environment +- Using PEX + + +Using PySpark Native Features +----------------------------- + +PySpark allows to upload Python files (``.py``), zipped Python packages (``.zip``), and Egg files (``.egg``) +to the executors by setting the configuration setting ``spark.submit.pyFiles`` or by directly +calling :meth:`pyspark.SparkContext.addPyFile`. + +This is an easy way to ship additional custom Python code to the cluster. You can just add individual files or zip whole +packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code +even after having started your job. + +Note that it doesn't allow to add packages built as `Wheels `_ and therefore doesn't +allow to include dependencies with native code. + + +Using Zipped Virtual Environment +-------------------------------- + +The idea of zipped environments is to zip your whole `virtual environment `_, +ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. Note that this +is currently supported *only for YARN*. + +Zip Virtual Environment +~~~~~~~~~~~~~~~~~~~~~~~ + +You can zip the virtual environment on your own or use tools for doing this: + +* `conda-pack `_ for conda environments +* `venv-pack `_ for virtual environments + +Example with `conda-pack`: + +.. code-block:: bash + + conda create -y -n conda_env -c conda-forge \ + pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 conda-pack==0.4.0 + conda activate conda_env + conda pack -f -o conda_env.tar.gz + +Upload to Spark Executors +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unzipping will be done by Spark when using target ``--archives`` option in spark-submit +or setting ``spark.yarn.dist.archives`` configuration. + +Example with ``spark-submit``: + +.. code-block:: bash + + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./environment/bin/python + spark-submit --master=yarn --deploy-mode client \ + --archives conda_env.tar.gz#environment app.py + +Example using ``SparkSession.builder``: + +.. code-block:: python + + import os + from pyspark.sql import SparkSession + from app import main + + os.environ['PYSPARK_PYTHON'] = "./environment/bin/python" + builder = SparkSession.builder.master("yarn").config( + "spark.yarn.dist.archives", "conda_env.tar.gz#environment") + spark = builder.getOrCreate() + main(spark) + + +Using PEX +--------- + +`PEX `_ is a library for generating ``.pex`` (Python EXecutable) files. +A PEX file is a self-contained executable Python environment. It can be seen as the Python equivalent of Java uber-JARs (a.k.a. fat JARs). + +You need to build the PEX file somewhere with all your requirements and then upload it to each Spark executor. + +Using CLI to Build PEX file +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + pex pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 -o myarchive.pex + + +Invoking the PEX file will by default invoke the Python interpreter. pyarrow, pandas and pyspark will be included in the PEX file. + +.. code-block:: bash + + ./myarchive.pex + Python 3.6.6 (default, Jan 26 2019, 16:53:05) + (InteractiveConsole) + >>> import pyarrow + >>> import pandas + >>> import pyspark + >>> + +This can also be done directly with the Python API. For more information on how to build PEX files, +please refer to `Building .pex files `_ + +Upload to Spark Executors +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The upload can be done by setting ``--files`` option in spark-submit or setting ``spark.files`` configuration (``spark.yarn.dist.files`` on YARN) +and changing the ``PYSPARK_PYTHON`` environment variable to change the Python interpreter to the PEX executable on each executor. + +.. + TODO: we should also document the way on other cluster modes. + +Example with ``spark-submit`` on YARN: + +.. code-block:: bash + + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./myarchive.pex + spark-submit --master=yarn --deploy-mode client --files myarchive.pex app.py + +Example using ``SparkSession.builder`` on YARN: + +.. code-block:: python + + import os + from pyspark.sql import SparkSession + from app import main + + os.environ['PYSPARK_PYTHON']="./myarchive.pex" + builder = SparkSession.builder + builder.master("yarn") \ + .config("spark.submit.deployMode", "client") \ + .config("spark.yarn.dist.files", "myarchive.pex") + spark = builder.getOrCreate() + main(spark) + +Notes +~~~~~ + +* The Python interpreter that has been used to generate the PEX file must be available on each executor. PEX doesn't include the Python interpreter. + +* In YARN cluster mode you may also need to set ``PYSPARK_PYTHON`` environment variable on the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex``. + +* An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX can be found `here `_ - it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having to create & upload the PEX manually. From d15f504a5e8bd8acfb6dc1ee138f7d92ff211396 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Mon, 28 Sep 2020 14:57:59 +0900 Subject: [PATCH 0119/1009] [SPARK-33011][ML] Promote the stability annotation to Evolving for MLEvent traits/classes ### What changes were proposed in this pull request? This PR proposes to promote the stability annotation to `Evolving` for MLEvent traits/classes. ### Why are the changes needed? The feature is released in Spark 3.0.0 having SPARK-26818 as the last change in Feb. 2020, and haven't changed in Spark 3.0.1. (There's no change more than a half of year.) While we'd better to wait for some minor releases to consider the API as stable, it would worth to promote to Evolving so that we clearly state that we support the API. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Just changed the annotation, no tests required. Closes #29887 from HeartSaVioR/SPARK-33011. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/ml/events.scala | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/src/main/scala/org/apache/spark/ml/events.scala index dc4be4dd9efda..f221183369dfd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/events.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml import com.fasterxml.jackson.annotation.JsonIgnore import org.apache.spark.SparkContext -import org.apache.spark.annotation.Unstable +import org.apache.spark.annotation.Evolving import org.apache.spark.internal.Logging import org.apache.spark.ml.util.{MLReader, MLWriter} import org.apache.spark.scheduler.SparkListenerEvent @@ -31,10 +31,8 @@ import org.apache.spark.sql.{DataFrame, Dataset} * after each operation (the event should document this). * * @note This is supported via [[Pipeline]] and [[PipelineModel]]. - * @note This is experimental and unstable. Do not use this unless you fully - * understand what `Unstable` means. */ -@Unstable +@Evolving sealed trait MLEvent extends SparkListenerEvent { // Do not log ML events in event log. It should be revisited to see // how it works with history server. @@ -44,7 +42,7 @@ sealed trait MLEvent extends SparkListenerEvent { /** * Event fired before `Transformer.transform`. */ -@Unstable +@Evolving case class TransformStart() extends MLEvent { @JsonIgnore var transformer: Transformer = _ @JsonIgnore var input: Dataset[_] = _ @@ -53,7 +51,7 @@ case class TransformStart() extends MLEvent { /** * Event fired after `Transformer.transform`. */ -@Unstable +@Evolving case class TransformEnd() extends MLEvent { @JsonIgnore var transformer: Transformer = _ @JsonIgnore var output: Dataset[_] = _ @@ -62,7 +60,7 @@ case class TransformEnd() extends MLEvent { /** * Event fired before `Estimator.fit`. */ -@Unstable +@Evolving case class FitStart[M <: Model[M]]() extends MLEvent { @JsonIgnore var estimator: Estimator[M] = _ @JsonIgnore var dataset: Dataset[_] = _ @@ -71,7 +69,7 @@ case class FitStart[M <: Model[M]]() extends MLEvent { /** * Event fired after `Estimator.fit`. */ -@Unstable +@Evolving case class FitEnd[M <: Model[M]]() extends MLEvent { @JsonIgnore var estimator: Estimator[M] = _ @JsonIgnore var model: M = _ @@ -80,7 +78,7 @@ case class FitEnd[M <: Model[M]]() extends MLEvent { /** * Event fired before `MLReader.load`. */ -@Unstable +@Evolving case class LoadInstanceStart[T](path: String) extends MLEvent { @JsonIgnore var reader: MLReader[T] = _ } @@ -88,7 +86,7 @@ case class LoadInstanceStart[T](path: String) extends MLEvent { /** * Event fired after `MLReader.load`. */ -@Unstable +@Evolving case class LoadInstanceEnd[T]() extends MLEvent { @JsonIgnore var reader: MLReader[T] = _ @JsonIgnore var instance: T = _ @@ -97,7 +95,7 @@ case class LoadInstanceEnd[T]() extends MLEvent { /** * Event fired before `MLWriter.save`. */ -@Unstable +@Evolving case class SaveInstanceStart(path: String) extends MLEvent { @JsonIgnore var writer: MLWriter = _ } @@ -105,7 +103,7 @@ case class SaveInstanceStart(path: String) extends MLEvent { /** * Event fired after `MLWriter.save`. */ -@Unstable +@Evolving case class SaveInstanceEnd(path: String) extends MLEvent { @JsonIgnore var writer: MLWriter = _ } From 173da5bf11daecbd428add1a5e0aedd58a66fadb Mon Sep 17 00:00:00 2001 From: Shruti Gumma Date: Mon, 28 Sep 2020 10:07:36 -0700 Subject: [PATCH 0120/1009] [SPARK-32996][WEB-UI] Handle empty ExecutorMetrics in ExecutorMetricsJsonSerializer ### What changes were proposed in this pull request? When `peakMemoryMetrics` in `ExecutorSummary` is `Option.empty`, then the `ExecutorMetricsJsonSerializer#serialize` method does not execute the `jsonGenerator.writeObject` method. This causes the json to be generated with `peakMemoryMetrics` key added to the serialized string, but no corresponding value. This causes an error to be thrown when it is the next key `attributes` turn to be added to the json: `com.fasterxml.jackson.core.JsonGenerationException: Can not write a field name, expecting a value ` ### Why are the changes needed? At the start of the Spark job, if `peakMemoryMetrics` is `Option.empty`, then it causes a `com.fasterxml.jackson.core.JsonGenerationException` to be thrown when we navigate to the Executors tab in Spark UI. Complete stacktrace: > com.fasterxml.jackson.core.JsonGenerationException: Can not write a field name, expecting a value > at com.fasterxml.jackson.core.JsonGenerator._reportError(JsonGenerator.java:2080) > at com.fasterxml.jackson.core.json.WriterBasedJsonGenerator.writeFieldName(WriterBasedJsonGenerator.java:161) > at com.fasterxml.jackson.databind.ser.BeanPropertyWriter.serializeAsField(BeanPropertyWriter.java:725) > at com.fasterxml.jackson.databind.ser.std.BeanSerializerBase.serializeFields(BeanSerializerBase.java:721) > at com.fasterxml.jackson.databind.ser.BeanSerializer.serialize(BeanSerializer.java:166) > at com.fasterxml.jackson.databind.ser.std.CollectionSerializer.serializeContents(CollectionSerializer.java:145) > at com.fasterxml.jackson.module.scala.ser.IterableSerializer.serializeContents(IterableSerializerModule.scala:26) > at com.fasterxml.jackson.module.scala.ser.IterableSerializer.serializeContents$(IterableSerializerModule.scala:25) > at com.fasterxml.jackson.module.scala.ser.UnresolvedIterableSerializer.serializeContents(IterableSerializerModule.scala:54) > at com.fasterxml.jackson.module.scala.ser.UnresolvedIterableSerializer.serializeContents(IterableSerializerModule.scala:54) > at com.fasterxml.jackson.databind.ser.std.AsArraySerializerBase.serialize(AsArraySerializerBase.java:250) > at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider._serialize(DefaultSerializerProvider.java:480) > at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:319) > at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:4094) > at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:3404) > at org.apache.spark.ui.exec.ExecutorsPage.allExecutorsDataScript$1(ExecutorsTab.scala:64) > at org.apache.spark.ui.exec.ExecutorsPage.render(ExecutorsTab.scala:76) > at org.apache.spark.ui.WebUI.$anonfun$attachPage$1(WebUI.scala:89) > at org.apache.spark.ui.JettyUtils$$anon$1.doGet(JettyUtils.scala:80) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:687) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) > at org.sparkproject.jetty.servlet.ServletHolder.handle(ServletHolder.java:873) > at org.sparkproject.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1623) > at org.apache.spark.ui.HttpSecurityFilter.doFilter(HttpSecurityFilter.scala:95) > at org.sparkproject.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1610) > at org.sparkproject.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:540) > at org.sparkproject.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255) > at org.sparkproject.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1345) > at org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203) > at org.sparkproject.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480) > at org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201) > at org.sparkproject.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247) > at org.sparkproject.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144) > at org.sparkproject.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:753) > at org.sparkproject.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:220) > at org.sparkproject.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132) > at org.sparkproject.jetty.server.Server.handle(Server.java:505) > at org.sparkproject.jetty.server.HttpChannel.handle(HttpChannel.java:370) > at org.sparkproject.jetty.server.HttpConnection.onFillable(HttpConnection.java:267) > at org.sparkproject.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305) > at org.sparkproject.jetty.io.FillInterest.fillable(FillInterest.java:103) > at org.sparkproject.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117) > at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333) > at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310) > at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168) > at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126) > at org.sparkproject.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366) > at org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:698) > at org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:804) > at java.base/java.lang.Thread.run(Thread.java:834) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #29872 from shrutig/SPARK-32996. Authored-by: Shruti Gumma Signed-off-by: Liang-Chi Hsieh --- .../org/apache/spark/status/api/v1/api.scala | 16 ++++-- .../status/api/v1/ExecutorSummarySuite.scala | 51 +++++++++++++++++++ 2 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala index d207a6023f7f9..5a8cf09e1cba6 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala @@ -136,6 +136,10 @@ private[spark] class ExecutorMetricsJsonDeserializer new TypeReference[Option[Map[String, java.lang.Long]]] {}) metricsMap.map(metrics => new ExecutorMetrics(metrics)) } + + override def getNullValue(ctxt: DeserializationContext): Option[ExecutorMetrics] = { + None + } } /** serializer for peakMemoryMetrics: convert ExecutorMetrics to map with metric name as key */ private[spark] class ExecutorMetricsJsonSerializer @@ -144,11 +148,15 @@ private[spark] class ExecutorMetricsJsonSerializer metrics: Option[ExecutorMetrics], jsonGenerator: JsonGenerator, serializerProvider: SerializerProvider): Unit = { - metrics.foreach { m: ExecutorMetrics => - val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) => - metric -> m.getMetricValue(metric) + if (metrics.isEmpty) { + jsonGenerator.writeNull() + } else { + metrics.foreach { m: ExecutorMetrics => + val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) => + metric -> m.getMetricValue(metric) + } + jsonGenerator.writeObject(metricsMap) } - jsonGenerator.writeObject(metricsMap) } } diff --git a/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala new file mode 100644 index 0000000000000..286911bdfc19a --- /dev/null +++ b/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.util.Date + +import com.fasterxml.jackson.core.`type`.TypeReference +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +import org.apache.spark.SparkFunSuite + +class ExecutorSummarySuite extends SparkFunSuite { + + test("Check ExecutorSummary serialize and deserialize with empty peakMemoryMetrics") { + val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + val executorSummary = new ExecutorSummary("id", "host:port", true, 1, + 10, 10, 1, 1, 1, + 0, 0, 1, 100, + 1, 100, 100, + 10, false, 20, new Date(1600984336352L), + Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1) + val expectedJson = "{\"id\":\"id\",\"hostPort\":\"host:port\",\"isActive\":true," + + "\"rddBlocks\":1,\"memoryUsed\":10,\"diskUsed\":10,\"totalCores\":1,\"maxTasks\":1," + + "\"activeTasks\":1,\"failedTasks\":0,\"completedTasks\":0,\"totalTasks\":1," + + "\"totalDuration\":100,\"totalGCTime\":1,\"totalInputBytes\":100," + + "\"totalShuffleRead\":100,\"totalShuffleWrite\":10,\"isBlacklisted\":false," + + "\"maxMemory\":20,\"addTime\":1600984336352,\"removeTime\":null,\"removeReason\":null," + + "\"executorLogs\":{},\"memoryMetrics\":null,\"blacklistedInStages\":[]," + + "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1}" + val json = mapper.writeValueAsString(executorSummary) + assert(expectedJson.equals(json)) + val deserializeExecutorSummary = mapper.readValue(json, new TypeReference[ExecutorSummary] {}) + assert(deserializeExecutorSummary.peakMemoryMetrics == None) + } + +} From a53fc9b7ae2b96b302d72170db6572b337ec9894 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 29 Sep 2020 09:54:43 +0900 Subject: [PATCH 0121/1009] [SPARK-27951][SQL][FOLLOWUP] Improve the window function nth_value ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/29604 supports the ANSI SQL NTH_VALUE. We should override the `prettyName` and `sql`. ### Why are the changes needed? Make the name of nth_value correct. To show the ignoreNulls parameter correctly. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #29886 from beliefer/improve-nth_value. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/expressions/windowExpressions.scala | 4 +++- .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +- .../sql-tests/results/postgreSQL/window_part3.sql.out | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 8e3702c157a3c..0e15ff2904306 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -669,7 +669,9 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool override lazy val evaluateExpression: AttributeReference = result - override def toString: String = s"$prettyName($input, $offset)${if (ignoreNulls) " ignore nulls"}" + override def prettyName: String = "nth_value" + override def sql: String = + s"$prettyName(${input.sql}, ${offsetExpr.sql})${if (ignoreNulls) " ignore nulls" else ""}" } /** diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 742a2ffee83f7..473204c182a69 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -191,7 +191,7 @@ | org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> | | org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> | | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct | -| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | +| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index b63b5601715a8..553432e503d5c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -391,7 +391,7 @@ SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1 struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'nthvalue(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7 +cannot resolve 'nth_value(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7 -- !query From 376ede130149e0fa2029da423f8d9c654b096921 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 28 Sep 2020 21:54:00 -0700 Subject: [PATCH 0122/1009] [SPARK-33021][PYTHON][TESTS] Move functions related test cases into test_functions.py ### What changes were proposed in this pull request? Move functions related test cases from `test_context.py` to `test_functions.py`. ### Why are the changes needed? To group the similar test cases. ### Does this PR introduce _any_ user-facing change? Nope, test-only. ### How was this patch tested? Jenkins and GitHub Actions should test. Closes #29898 from HyukjinKwon/SPARK-33021. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/tests/test_context.py | 101 -------------------- python/pyspark/sql/tests/test_functions.py | 102 ++++++++++++++++++++- 2 files changed, 101 insertions(+), 102 deletions(-) diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py index d506908b784db..ff1db31072df9 100644 --- a/python/pyspark/sql/tests/test_context.py +++ b/python/pyspark/sql/tests/test_context.py @@ -26,7 +26,6 @@ from pyspark import SparkContext, SQLContext from pyspark.sql import Row, SparkSession from pyspark.sql.types import StructType, StringType, StructField -from pyspark.sql.window import Window from pyspark.testing.utils import ReusedPySparkTestCase @@ -108,99 +107,6 @@ def test_save_and_load_table(self): shutil.rmtree(tmpPath) - def test_window_functions(self): - df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) - w = Window.partitionBy("value").orderBy("key") - from pyspark.sql import functions as F - sel = df.select(df.value, df.key, - F.max("key").over(w.rowsBetween(0, 1)), - F.min("key").over(w.rowsBetween(0, 1)), - F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), - F.row_number().over(w), - F.rank().over(w), - F.dense_rank().over(w), - F.ntile(2).over(w)) - rs = sorted(sel.collect()) - expected = [ - ("1", 1, 1, 1, 1, 1, 1, 1, 1), - ("2", 1, 1, 1, 3, 1, 1, 1, 1), - ("2", 1, 2, 1, 3, 2, 1, 1, 1), - ("2", 2, 2, 2, 3, 3, 3, 2, 2) - ] - for r, ex in zip(rs, expected): - self.assertEqual(tuple(r), ex[:len(r)]) - - def test_window_functions_without_partitionBy(self): - df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) - w = Window.orderBy("key", df.value) - from pyspark.sql import functions as F - sel = df.select(df.value, df.key, - F.max("key").over(w.rowsBetween(0, 1)), - F.min("key").over(w.rowsBetween(0, 1)), - F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), - F.row_number().over(w), - F.rank().over(w), - F.dense_rank().over(w), - F.ntile(2).over(w)) - rs = sorted(sel.collect()) - expected = [ - ("1", 1, 1, 1, 4, 1, 1, 1, 1), - ("2", 1, 1, 1, 4, 2, 2, 2, 1), - ("2", 1, 2, 1, 4, 3, 2, 2, 2), - ("2", 2, 2, 2, 4, 4, 4, 3, 2) - ] - for r, ex in zip(rs, expected): - self.assertEqual(tuple(r), ex[:len(r)]) - - def test_window_functions_cumulative_sum(self): - df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"]) - from pyspark.sql import functions as F - - # Test cumulative sum - sel = df.select( - df.key, - F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0))) - rs = sorted(sel.collect()) - expected = [("one", 1), ("two", 3)] - for r, ex in zip(rs, expected): - self.assertEqual(tuple(r), ex[:len(r)]) - - # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow - sel = df.select( - df.key, - F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0))) - rs = sorted(sel.collect()) - expected = [("one", 1), ("two", 3)] - for r, ex in zip(rs, expected): - self.assertEqual(tuple(r), ex[:len(r)]) - - # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow - frame_end = Window.unboundedFollowing + 1 - sel = df.select( - df.key, - F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end))) - rs = sorted(sel.collect()) - expected = [("one", 3), ("two", 2)] - for r, ex in zip(rs, expected): - self.assertEqual(tuple(r), ex[:len(r)]) - - def test_collect_functions(self): - df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) - from pyspark.sql import functions - - self.assertEqual( - sorted(df.select(functions.collect_set(df.key).alias('r')).collect()[0].r), - [1, 2]) - self.assertEqual( - sorted(df.select(functions.collect_list(df.key).alias('r')).collect()[0].r), - [1, 1, 1, 2]) - self.assertEqual( - sorted(df.select(functions.collect_set(df.value).alias('r')).collect()[0].r), - ["1", "2"]) - self.assertEqual( - sorted(df.select(functions.collect_list(df.value).alias('r')).collect()[0].r), - ["1", "2", "2", "2"]) - def test_limit_and_take(self): df = self.spark.range(1, 1000, numPartitions=10) @@ -219,13 +125,6 @@ def assert_runs_only_one_job_stage_and_task(job_group_name, f): # Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n) assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect()) - def test_datetime_functions(self): - from pyspark.sql import functions - from datetime import date - df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol") - parse_result = df.select(functions.to_date(functions.col("dateCol"))).first() - self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)']) - def test_unbounded_frames(self): from pyspark.sql import functions as F from pyspark.sql import window diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 5638cad51b755..fdc5e247043de 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -19,7 +19,7 @@ from itertools import chain import re -from pyspark.sql import Row +from pyspark.sql import Row, Window from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, lit from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -390,6 +390,106 @@ def test_higher_order_function_failures(self): with self.assertRaises(ValueError): transform(col("foo"), lambda x: 1) + def test_window_functions(self): + df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) + w = Window.partitionBy("value").orderBy("key") + from pyspark.sql import functions as F + sel = df.select(df.value, df.key, + F.max("key").over(w.rowsBetween(0, 1)), + F.min("key").over(w.rowsBetween(0, 1)), + F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), + F.row_number().over(w), + F.rank().over(w), + F.dense_rank().over(w), + F.ntile(2).over(w)) + rs = sorted(sel.collect()) + expected = [ + ("1", 1, 1, 1, 1, 1, 1, 1, 1), + ("2", 1, 1, 1, 3, 1, 1, 1, 1), + ("2", 1, 2, 1, 3, 2, 1, 1, 1), + ("2", 2, 2, 2, 3, 3, 3, 2, 2) + ] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + def test_window_functions_without_partitionBy(self): + df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) + w = Window.orderBy("key", df.value) + from pyspark.sql import functions as F + sel = df.select(df.value, df.key, + F.max("key").over(w.rowsBetween(0, 1)), + F.min("key").over(w.rowsBetween(0, 1)), + F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), + F.row_number().over(w), + F.rank().over(w), + F.dense_rank().over(w), + F.ntile(2).over(w)) + rs = sorted(sel.collect()) + expected = [ + ("1", 1, 1, 1, 4, 1, 1, 1, 1), + ("2", 1, 1, 1, 4, 2, 2, 2, 1), + ("2", 1, 2, 1, 4, 3, 2, 2, 2), + ("2", 2, 2, 2, 4, 4, 4, 3, 2) + ] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + def test_window_functions_cumulative_sum(self): + df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"]) + from pyspark.sql import functions as F + + # Test cumulative sum + sel = df.select( + df.key, + F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0))) + rs = sorted(sel.collect()) + expected = [("one", 1), ("two", 3)] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow + sel = df.select( + df.key, + F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0))) + rs = sorted(sel.collect()) + expected = [("one", 1), ("two", 3)] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow + frame_end = Window.unboundedFollowing + 1 + sel = df.select( + df.key, + F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end))) + rs = sorted(sel.collect()) + expected = [("one", 3), ("two", 2)] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + def test_collect_functions(self): + df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) + from pyspark.sql import functions + + self.assertEqual( + sorted(df.select(functions.collect_set(df.key).alias('r')).collect()[0].r), + [1, 2]) + self.assertEqual( + sorted(df.select(functions.collect_list(df.key).alias('r')).collect()[0].r), + [1, 1, 1, 2]) + self.assertEqual( + sorted(df.select(functions.collect_set(df.value).alias('r')).collect()[0].r), + ["1", "2"]) + self.assertEqual( + sorted(df.select(functions.collect_list(df.value).alias('r')).collect()[0].r), + ["1", "2", "2", "2"]) + + def test_datetime_functions(self): + from pyspark.sql import functions + from datetime import date + df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol") + parse_result = df.select(functions.to_date(functions.col("dateCol"))).first() + self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)']) + if __name__ == "__main__": import unittest From 68cd5677ae0e3891e6bb4938a64ff98810656ba8 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 29 Sep 2020 05:13:01 +0000 Subject: [PATCH 0123/1009] [SPARK-33015][SQL] Compute the current date only once ### What changes were proposed in this pull request? Compute the current date at the specified time zone using timestamp taken at the start of query evaluation. ### Why are the changes needed? According to the doc for [current_date()](http://spark.apache.org/docs/latest/api/sql/#current_date), the current date should be computed at the start of query evaluation but it can be computed multiple times. As a consequence of that, the function can return different values if the query is executed at the border of two dates. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By existing test suites `ComputeCurrentTimeSuite` and `DateExpressionsSuite`. Closes #29889 from MaxGekk/fix-current_date. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../apache/spark/sql/catalyst/optimizer/finishAnalysis.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 6c9bb6db06d86..76b9bd03f216c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -79,10 +79,10 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { val currentTime = Literal.create(timestamp, timeExpr.dataType) plan transformAllExpressions { - case CurrentDate(Some(timeZoneId)) => + case currentDate @ CurrentDate(Some(timeZoneId)) => currentDates.getOrElseUpdate(timeZoneId, { Literal.create( - LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)), + DateTimeUtils.microsToDays(timestamp, currentDate.zoneId), DateType) }) case CurrentTimestamp() | Now() => currentTime From 6868b405171bfaa8d013bd938dbef6636a8c9845 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 28 Sep 2020 22:14:28 -0700 Subject: [PATCH 0124/1009] [SPARK-33020][PYTHON] Add nth_value as a PySpark function ### What changes were proposed in this pull request? `nth_value` was added at SPARK-27951. This PR adds the corresponding PySpark API. ### Why are the changes needed? To support the consistent APIs ### Does this PR introduce _any_ user-facing change? Yes, it introduces a new PySpark function API. ### How was this patch tested? Unittest was added. Closes #29899 from HyukjinKwon/SPARK-33020. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/docs/source/reference/pyspark.sql.rst | 1 + python/pyspark/sql/functions.py | 20 ++++++++++++ python/pyspark/sql/functions.pyi | 3 ++ python/pyspark/sql/tests/test_functions.py | 34 ++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index e5348c6c6e9aa..692d098c89cdc 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -409,6 +409,7 @@ Functions months_between nanvl next_day + nth_value ntile overlay pandas_udf diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 14d101a65252a..e6c7eb6edb904 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -934,6 +934,26 @@ def lead(col, offset=1, default=None): return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default)) +@since(3.1) +def nth_value(col, offset, ignoreNulls=False): + """ + Window function: returns the value that is the `offset`\\th row of the window frame + (counting from 1), and `null` if the size of window frame is less than `offset` rows. + + It will return the `offset`\\th non-null value it sees when `ignoreNulls` is set to + true. If all values are null, then null is returned. + + This is equivalent to the nth_value function in SQL. + + :param col: name of column or expression + :param offset: number of row to use as the value + :param ignoreNulls: indicates the Nth value should skip null in the + determination of which row to use + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.nth_value(_to_java_column(col), offset, ignoreNulls)) + + @since(1.4) def ntile(n): """ diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 3b0b2030178ef..8efe65205315e 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -85,6 +85,9 @@ def lag( def lead( col: ColumnOrName, offset: int = ..., default: Optional[Any] = ... ) -> Column: ... +def nth_value( + col: ColumnOrName, offset: int, ignoreNulls: Optional[bool] = ... +) -> Column: ... def ntile(n: int) -> Column: ... def current_date() -> Column: ... def current_timestamp() -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index fdc5e247043de..8d05ed28b8d4e 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -367,6 +367,40 @@ def test_percentile_approx(self): self.assertListEqual(actual, expected) + def test_nth_value(self): + from pyspark.sql import Window + from pyspark.sql.functions import nth_value + + df = self.spark.createDataFrame([ + ("a", 0, None), + ("a", 1, "x"), + ("a", 2, "y"), + ("a", 3, "z"), + ("a", 4, None), + ("b", 1, None), + ("b", 2, None)], schema=("key", "order", "value")) + w = Window.partitionBy("key").orderBy("order") + + rs = df.select( + df.key, + df.order, + nth_value("value", 2).over(w), + nth_value("value", 2, False).over(w), + nth_value("value", 2, True).over(w)).collect() + + expected = [ + ("a", 0, None, None, None), + ("a", 1, "x", "x", None), + ("a", 2, "x", "x", "y"), + ("a", 3, "x", "x", "y"), + ("a", 4, "x", "x", "y"), + ("b", 1, None, None, None), + ("b", 2, None, None, None) + ] + + for r, ex in zip(sorted(rs), sorted(expected)): + self.assertEqual(tuple(r), ex[:len(r)]) + def test_higher_order_function_failures(self): from pyspark.sql.functions import col, transform From 1b60ff5afea0637f74c5f064642225b35b13b069 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 29 Sep 2020 05:20:12 +0000 Subject: [PATCH 0125/1009] [MINOR][DOCS] Document when `current_date` and `current_timestamp` are evaluated ### What changes were proposed in this pull request? Explicitly document that `current_date` and `current_timestamp` are executed at the start of query evaluation. And all calls of `current_date`/`current_timestamp` within the same query return the same value ### Why are the changes needed? Users could expect that `current_date` and `current_timestamp` return the current date/timestamp at the moment of query execution but in fact the functions are folded by the optimizer at the start of query evaluation: https://github.com/apache/spark/blob/0df8dd60733066076967f0525210bbdb5e12415a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala#L71-L91 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? by running `./dev/scalastyle`. Closes #29892 from MaxGekk/doc-current_date. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- R/pkg/R/functions.R | 6 ++++-- python/pyspark/sql/functions.py | 6 ++++-- .../catalyst/expressions/datetimeExpressions.scala | 12 ++++++------ .../main/scala/org/apache/spark/sql/functions.scala | 6 ++++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 2d1667f563490..df221de4c7327 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -4382,7 +4382,8 @@ setMethod("date_trunc", }) #' @details -#' \code{current_date}: Returns the current date as a date column. +#' \code{current_date}: Returns the current date at the start of query evaluation as a date column. +#' All calls of current_date within the same query return the same value. #' #' @rdname column_datetime_functions #' @aliases current_date current_date,missing-method @@ -4398,7 +4399,8 @@ setMethod("current_date", }) #' @details -#' \code{current_timestamp}: Returns the current timestamp as a timestamp column. +#' \code{current_timestamp}: Returns the current timestamp at the start of query evaluation as +#' a timestamp column. All calls of current_timestamp within the same query return the same value. #' #' @rdname column_datetime_functions #' @aliases current_timestamp current_timestamp,missing-method diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index e6c7eb6edb904..7007d505d048d 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -975,7 +975,8 @@ def ntile(n): @since(1.5) def current_date(): """ - Returns the current date as a :class:`DateType` column. + Returns the current date at the start of query evaluation as a :class:`DateType` column. + All calls of current_date within the same query return the same value. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.current_date()) @@ -983,7 +984,8 @@ def current_date(): def current_timestamp(): """ - Returns the current timestamp as a :class:`TimestampType` column. + Returns the current timestamp at the start of query evaluation as a :class:`TimestampType` + column. All calls of current_timestamp within the same query return the same value. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.current_timestamp()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index e889cfbec990f..571b0be40c6e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -75,13 +75,12 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression { /** * Returns the current date at the start of query evaluation. - * All calls of current_date within the same query return the same value. - * * There is no code generation since this expression should get constant folded by the optimizer. */ +// scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_() - Returns the current date at the start of query evaluation. + _FUNC_() - Returns the current date at the start of query evaluation. All calls of current_date within the same query return the same value. _FUNC_ - Returns the current date at the start of query evaluation. """, @@ -97,6 +96,7 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression { """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.size.limit case class CurrentDate(timeZoneId: Option[String] = None) extends LeafExpression with TimeZoneAwareExpression with CodegenFallback { @@ -124,13 +124,12 @@ abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallbac /** * Returns the current timestamp at the start of query evaluation. - * All calls of current_timestamp within the same query return the same value. - * * There is no code generation since this expression should get constant folded by the optimizer. */ +// scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_() - Returns the current timestamp at the start of query evaluation. + _FUNC_() - Returns the current timestamp at the start of query evaluation. All calls of current_timestamp within the same query return the same value. _FUNC_ - Returns the current timestamp at the start of query evaluation. """, @@ -146,6 +145,7 @@ abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallbac """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.size.limit case class CurrentTimestamp() extends CurrentTimestampLike { override def prettyName: String = "current_timestamp" } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index acf845d6eceaf..2c545fe762b6d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2760,7 +2760,8 @@ object functions { } /** - * Returns the current date as a date column. + * Returns the current date at the start of query evaluation as a date column. + * All calls of current_date within the same query return the same value. * * @group datetime_funcs * @since 1.5.0 @@ -2768,7 +2769,8 @@ object functions { def current_date(): Column = withExpr { CurrentDate() } /** - * Returns the current timestamp as a timestamp column. + * Returns the current timestamp at the start of query evaluation as a timestamp column. + * All calls of current_timestamp within the same query return the same value. * * @group datetime_funcs * @since 1.5.0 From 202115e7cd0bc2b32c68274e625cded0d628a0c5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 28 Sep 2020 22:22:47 -0700 Subject: [PATCH 0126/1009] [SPARK-32948][SQL] Optimize to_json and from_json expression chain ### What changes were proposed in this pull request? This patch proposes to optimize from_json + to_json expression chain. ### Why are the changes needed? To optimize json expression chain that could be manually generated or generated automatically during query optimization. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #29828 from viirya/SPARK-32948. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../optimizer/OptimizeJsonExprs.scala | 43 ++++++ .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../optimizer/OptimizeJsonExprsSuite.scala | 144 ++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala new file mode 100644 index 0000000000000..24df480208220 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * Simplify redundant json related expressions. + */ +object OptimizeJsonExprs extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case p => p.transformExpressions { + case jsonToStructs @ JsonToStructs(_, options1, + StructsToJson(options2, child, timeZoneId2), timeZoneId1) + if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 && + jsonToStructs.dataType == child.dataType => + // `StructsToJson` only fails when `JacksonGenerator` encounters data types it + // cannot convert to JSON. But `StructsToJson.checkInputDataTypes` already + // verifies its child's data types is convertible to JSON. But in + // `StructsToJson(JsonToStructs(...))` case, we cannot verify input json string + // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize + // this case similarly. + child + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 6033c01a60f47..94970740d8d91 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -111,6 +111,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveNoopOperators, CombineWithFields, SimplifyExtractValueOps, + OptimizeJsonExprs, CombineConcats) ++ extendedOperatorOptimizationRules diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala new file mode 100644 index 0000000000000..90397d4cabee8 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getZoneId +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { + + object Optimizer extends RuleExecutor[LogicalPlan] { + val batches = Batch("Json optimization", FixedPoint(10), OptimizeJsonExprs) :: Nil + } + + val schema = StructType.fromDDL("a int, b int") + + private val structAtt = 'struct.struct(schema).notNull + + private val testRelation = LocalRelation(structAtt) + + test("SPARK-32948: optimize from_json + to_json") { + val options = Map.empty[String, String] + + val query1 = testRelation + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized1 = Optimizer.execute(query1.analyze) + + val expected = testRelation.select('struct.as("struct")).analyze + comparePlans(optimized1, expected) + + val query2 = testRelation + .select( + JsonToStructs(schema, options, + StructsToJson(options, + JsonToStructs(schema, options, + StructsToJson(options, 'struct)))).as("struct")) + val optimized2 = Optimizer.execute(query2.analyze) + + comparePlans(optimized2, expected) + } + + test("SPARK-32948: not optimize from_json + to_json if schema is different") { + val options = Map.empty[String, String] + val schema = StructType.fromDDL("a int") + + val query = testRelation + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized = Optimizer.execute(query.analyze) + + val expected = testRelation.select( + JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze + comparePlans(optimized, expected) + } + + test("SPARK-32948: if user gives schema with different letter case under case-insensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val options = Map.empty[String, String] + val schema = StructType.fromDDL("a int, B int") + + val query = testRelation + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized = Optimizer.execute(query.analyze) + + val expected = testRelation.select( + JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze + comparePlans(optimized, expected) + } + } + + test("SPARK-32948: not optimize from_json + to_json if nullability is different") { + val options = Map.empty[String, String] + val nonNullSchema = StructType( + StructField("a", IntegerType, false) :: StructField("b", IntegerType, false) :: Nil) + + val structAtt = 'struct.struct(nonNullSchema).notNull + val testRelationWithNonNullAttr = LocalRelation(structAtt) + + val schema = StructType.fromDDL("a int, b int") + + val query = testRelationWithNonNullAttr + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized = Optimizer.execute(query.analyze) + + val expected = testRelationWithNonNullAttr.select( + JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze + comparePlans(optimized, expected) + } + + test("SPARK-32948: not optimize from_json + to_json if option is not empty") { + val options = Map("testOption" -> "test") + + val query = testRelation + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized = Optimizer.execute(query.analyze) + + val expected = testRelation.select( + JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze + comparePlans(optimized, expected) + } + + test("SPARK-32948: not optimize from_json + to_json if timezone is different") { + val options = Map.empty[String, String] + val UTC_OPT = Option("UTC") + val PST = getZoneId("-08:00") + + val query1 = testRelation + .select(JsonToStructs(schema, options, + StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct")) + val optimized1 = Optimizer.execute(query1.analyze) + + val expected1 = testRelation.select( + JsonToStructs(schema, options, + StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct")).analyze + comparePlans(optimized1, expected1) + + val query2 = testRelation + .select(JsonToStructs(schema, options, + StructsToJson(options, 'struct, UTC_OPT), UTC_OPT).as("struct")) + val optimized2 = Optimizer.execute(query2.analyze) + val expected2 = testRelation.select('struct.as("struct")).analyze + comparePlans(optimized2, expected2) + } +} From 90e86f6fac8ac42cf61e523397dc1bcc01871744 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Tue, 29 Sep 2020 16:51:44 +0900 Subject: [PATCH 0127/1009] [SPARK-32970][SPARK-32019][SQL][TEST] Reduce the runtime of an UT for ### What changes were proposed in this pull request? The UT for SPARK-32019 (#28853) tries to write about 16GB of data do the disk. We must change the value of `spark.sql.files.maxPartitionBytes` to a smaller value do check the correct behavior with less data. By default it is `128MB`. The other parameters in this UT are also changed to smaller values to keep the behavior the same. ### Why are the changes needed? The runtime of this one UT can be over 7 minutes on Jenkins. After the change it is few seconds. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #29842 from tanelk/SPARK-32970. Authored-by: tanel.kiis@gmail.com Signed-off-by: HyukjinKwon --- .../datasources/FileSourceStrategySuite.scala | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala index a808546745817..dfd9ba03f5be0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala @@ -549,17 +549,22 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre assert(table.rdd.partitions.length == 3) } - withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "16") { - val partitions = (1 to 100).map(i => s"file$i" -> 128 * 1024 * 1024) - val table = createTable(files = partitions) - // partition is limited by filesMaxPartitionBytes(128MB) - assert(table.rdd.partitions.length == 100) - } + withSQLConf( + SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2MB", + SQLConf.FILES_OPEN_COST_IN_BYTES.key -> String.valueOf(4 * 1024 * 1024)) { + + withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "8") { + val partitions = (1 to 12).map(i => s"file$i" -> 2 * 1024 * 1024) + val table = createTable(files = partitions) + // partition is limited by filesMaxPartitionBytes(2MB) + assert(table.rdd.partitions.length == 12) + } - withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "32") { - val partitions = (1 to 800).map(i => s"file$i" -> 4 * 1024 * 1024) - val table = createTable(files = partitions) - assert(table.rdd.partitions.length == 50) + withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "16") { + val partitions = (1 to 12).map(i => s"file$i" -> 4 * 1024 * 1024) + val table = createTable(files = partitions) + assert(table.rdd.partitions.length == 24) + } } } From f167002522d50eefb261c8ba2d66a23b781a38c4 Mon Sep 17 00:00:00 2001 From: Tom van Bussel Date: Tue, 29 Sep 2020 13:05:33 +0200 Subject: [PATCH 0128/1009] [SPARK-32901][CORE] Do not allocate memory while spilling UnsafeExternalSorter ### What changes were proposed in this pull request? This PR changes `UnsafeExternalSorter` to no longer allocate any memory while spilling. In particular it removes the allocation of a new pointer array in `UnsafeInMemorySorter`. Instead the new pointer array is allocated whenever the next record is inserted into the sorter. ### Why are the changes needed? Without this change the `UnsafeExternalSorter` could throw an OOM while spilling. The following sequence of events would have triggered an OOM: 1. `UnsafeExternalSorter` runs out of space in its pointer array and attempts to allocate a new large array to replace the old one. 2. `TaskMemoryManager` tries to allocate the memory backing the new large array using `MemoryManager`, but `MemoryManager` is only willing to return most but not all of the memory requested. 3. `TaskMemoryManager` asks `UnsafeExternalSorter` to spill, which causes `UnsafeExternalSorter` to spill the current run to disk, to free its record pages and to reset its `UnsafeInMemorySorter`. 4. `UnsafeInMemorySorter` frees the old pointer array, and tries to allocate a new small pointer array. 5. `TaskMemoryManager` tries to allocate the memory backing the small array using `MemoryManager`, but `MemoryManager` is unwilling to give it any memory, as the `TaskMemoryManager` is still holding on to the memory it got for the new large array. 6. `TaskMemoryManager` again asks `UnsafeExternalSorter` to spill, but this time there is nothing to spill. 7. `UnsafeInMemorySorter` receives less memory than it requested, and causes a `SparkOutOfMemoryError` to be thrown, which causes the current task to fail. With the changes in the PR the following will happen instead: 1. `UnsafeExternalSorter` runs out of space in its pointer array and attempts to allocate a new large array to replace the old one. 2. `TaskMemoryManager` tries to allocate the memory backing the new large array using `MemoryManager`, but `MemoryManager` is only willing to return most but not all of the memory requested. 3. `TaskMemoryManager` asks `UnsafeExternalSorter` to spill, which causes `UnsafeExternalSorter` to spill the current run to disk, to free its record pages and to reset its `UnsafeInMemorySorter`. 4. `UnsafeInMemorySorter` frees the old pointer array. 5. `TaskMemoryManager` returns control to `UnsafeExternalSorter.growPointerArrayIfNecessary` (either by returning the the new large array or by throwing a `SparkOutOfMemoryError`). 6. `UnsafeExternalSorter` either frees the new large array or it ignores the `SparkOutOfMemoryError` depending on what happened in the previous step. 7. `UnsafeExternalSorter` successfully allocates a new small pointer array and operation continues as normal. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tests were added in `UnsafeExternalSorterSuite` and `UnsafeInMemorySorterSuite`. Closes #29785 from tomvanbussel/SPARK-32901. Authored-by: Tom van Bussel Signed-off-by: herman --- .../unsafe/sort/UnsafeExternalSorter.java | 96 ++++++++++++++----- .../unsafe/sort/UnsafeInMemorySorter.java | 55 +++++------ .../sort/UnsafeExternalSorterSuite.java | 46 ++++----- .../sort/UnsafeInMemorySorterSuite.java | 40 ++++---- .../spark/memory/TestMemoryManager.scala | 8 ++ 5 files changed, 143 insertions(+), 102 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index e4a882d609fc2..dda8ed4c239ae 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -203,6 +203,10 @@ public long spill(long size, MemoryConsumer trigger) throws IOException { } if (inMemSorter == null || inMemSorter.numRecords() <= 0) { + // There could still be some memory allocated when there are no records in the in-memory + // sorter. We will not spill it however, to ensure that we can always process at least one + // record before spilling. See the comments in `allocateMemoryForRecordIfNecessary` for why + // this is necessary. return 0L; } @@ -224,7 +228,7 @@ public long spill(long size, MemoryConsumer trigger) throws IOException { // Note that this is more-or-less going to be a multiple of the page size, so wasted space in // pages will currently be counted as memory spilled even though that space isn't actually // written to disk. This also counts the space needed to store the sorter's pointer array. - inMemSorter.reset(); + inMemSorter.freeMemory(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. @@ -325,7 +329,7 @@ public void cleanupResources() { deleteSpillFiles(); freeMemory(); if (inMemSorter != null) { - inMemSorter.free(); + inMemSorter.freeMemory(); inMemSorter = null; } } @@ -339,40 +343,53 @@ public void cleanupResources() { private void growPointerArrayIfNecessary() throws IOException { assert(inMemSorter != null); if (!inMemSorter.hasSpaceForAnotherRecord()) { + if (inMemSorter.numRecords() <= 0) { + // Spilling was triggered just before this method was called. The pointer array was freed + // during the spill, so a new pointer array needs to be allocated here. + LongArray array = allocateArray(inMemSorter.getInitialSize()); + inMemSorter.expandPointerArray(array); + return; + } + long used = inMemSorter.getMemoryUsage(); - LongArray array; + LongArray array = null; try { // could trigger spilling array = allocateArray(used / 8 * 2); } catch (TooLargePageException e) { // The pointer array is too big to fix in a single page, spill. spill(); - return; } catch (SparkOutOfMemoryError e) { - // should have trigger spilling - if (!inMemSorter.hasSpaceForAnotherRecord()) { + if (inMemSorter.numRecords() > 0) { logger.error("Unable to grow the pointer array"); throw e; } - return; + // The new array could not be allocated, but that is not an issue as it is longer needed, + // as all records were spilled. } - // check if spilling is triggered or not - if (inMemSorter.hasSpaceForAnotherRecord()) { - freeArray(array); - } else { - inMemSorter.expandPointerArray(array); + + if (inMemSorter.numRecords() <= 0) { + // Spilling was triggered while trying to allocate the new array. + if (array != null) { + // We succeeded in allocating the new array, but, since all records were spilled, a + // smaller array would also suffice. + freeArray(array); + } + // The pointer array was freed during the spill, so a new pointer array needs to be + // allocated here. + array = allocateArray(inMemSorter.getInitialSize()); } + inMemSorter.expandPointerArray(array); } } /** - * Allocates more memory in order to insert an additional record. This will request additional - * memory from the memory manager and spill if the requested memory can not be obtained. + * Allocates an additional page in order to insert an additional record. This will request + * additional memory from the memory manager and spill if the requested memory can not be + * obtained. * * @param required the required space in the data page, in bytes, including space for storing - * the record size. This must be less than or equal to the page size (records - * that exceed the page size are handled via a different code path which uses - * special overflow pages). + * the record size. */ private void acquireNewPageIfNecessary(int required) { if (currentPage == null || @@ -384,6 +401,37 @@ private void acquireNewPageIfNecessary(int required) { } } + /** + * Allocates more memory in order to insert an additional record. This will request additional + * memory from the memory manager and spill if the requested memory can not be obtained. + * + * @param required the required space in the data page, in bytes, including space for storing + * the record size. + */ + private void allocateMemoryForRecordIfNecessary(int required) throws IOException { + // Step 1: + // Ensure that the pointer array has space for another record. This may cause a spill. + growPointerArrayIfNecessary(); + // Step 2: + // Ensure that the last page has space for another record. This may cause a spill. + acquireNewPageIfNecessary(required); + // Step 3: + // The allocation in step 2 could have caused a spill, which would have freed the pointer + // array allocated in step 1. Therefore we need to check again whether we have to allocate + // a new pointer array. + // + // If the allocation in this step causes a spill event then it will not cause the page + // allocated in the previous step to be freed. The function `spill` only frees memory if at + // least one record has been inserted in the in-memory sorter. This will not be the case if + // we have spilled in the previous step. + // + // If we did not spill in the previous step then `growPointerArrayIfNecessary` will be a + // no-op that does not allocate any memory, and therefore can't cause a spill event. + // + // Thus there is no need to call `acquireNewPageIfNecessary` again after this step. + growPointerArrayIfNecessary(); + } + /** * Write a record to the sorter. */ @@ -398,11 +446,10 @@ public void insertRecord( spill(); } - growPointerArrayIfNecessary(); - int uaoSize = UnsafeAlignedOffset.getUaoSize(); + final int uaoSize = UnsafeAlignedOffset.getUaoSize(); // Need 4 or 8 bytes to store the record length. final int required = length + uaoSize; - acquireNewPageIfNecessary(required); + allocateMemoryForRecordIfNecessary(required); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); @@ -425,10 +472,9 @@ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen, Object valueBase, long valueOffset, int valueLen, long prefix, boolean prefixIsNull) throws IOException { - growPointerArrayIfNecessary(); - int uaoSize = UnsafeAlignedOffset.getUaoSize(); + final int uaoSize = UnsafeAlignedOffset.getUaoSize(); final int required = keyLen + valueLen + (2 * uaoSize); - acquireNewPageIfNecessary(required); + allocateMemoryForRecordIfNecessary(required); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); @@ -572,7 +618,7 @@ public long spill() throws IOException { assert(inMemSorter != null); released += inMemSorter.getMemoryUsage(); totalSortTimeNanos += inMemSorter.getSortTimeNanos(); - inMemSorter.free(); + inMemSorter.freeMemory(); inMemSorter = null; taskContext.taskMetrics().incMemoryBytesSpilled(released); taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); @@ -669,7 +715,7 @@ public UnsafeSorterIterator getIterator(int startIndex) throws IOException { } i += spillWriter.recordsSpilled(); } - if (inMemSorter != null) { + if (inMemSorter != null && inMemSorter.numRecords() > 0) { UnsafeSorterIterator iter = inMemSorter.getSortedIterator(); moveOver(iter, startIndex - i); queue.add(iter); diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index ff641a24a7b3e..33be899b6b438 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -159,32 +159,26 @@ private int getUsableCapacity() { return (int) (array.size() / (radixSortSupport != null ? 2 : 1.5)); } + public long getInitialSize() { + return initialSize; + } + /** * Free the memory used by pointer array. */ - public void free() { + public void freeMemory() { if (consumer != null) { if (array != null) { consumer.freeArray(array); } - array = null; - } - } - public void reset() { - if (consumer != null) { - consumer.freeArray(array); - // the call to consumer.allocateArray may trigger a spill which in turn access this instance - // and eventually re-enter this method and try to free the array again. by setting the array - // to null and its length to 0 we effectively make the spill code-path a no-op. setting the - // array to null also indicates that it has already been de-allocated which prevents a double - // de-allocation in free(). + // Set the array to null instead of allocating a new array. Allocating an array could have + // triggered another spill and this method already is called from UnsafeExternalSorter when + // spilling. Attempting to allocate while spilling is dangerous, as we could be holding onto + // a large partially complete allocation, which may prevent other memory from being allocated. + // Instead we will allocate the new array when it is necessary. array = null; usableCapacity = 0; - pos = 0; - nullBoundaryPos = 0; - array = consumer.allocateArray(initialSize); - usableCapacity = getUsableCapacity(); } pos = 0; nullBoundaryPos = 0; @@ -217,18 +211,20 @@ public boolean hasSpaceForAnotherRecord() { } public void expandPointerArray(LongArray newArray) { - if (newArray.size() < array.size()) { - // checkstyle.off: RegexpSinglelineJava - throw new SparkOutOfMemoryError("Not enough memory to grow pointer array"); - // checkstyle.on: RegexpSinglelineJava + if (array != null) { + if (newArray.size() < array.size()) { + // checkstyle.off: RegexpSinglelineJava + throw new SparkOutOfMemoryError("Not enough memory to grow pointer array"); + // checkstyle.on: RegexpSinglelineJava + } + Platform.copyMemory( + array.getBaseObject(), + array.getBaseOffset(), + newArray.getBaseObject(), + newArray.getBaseOffset(), + pos * 8L); + consumer.freeArray(array); } - Platform.copyMemory( - array.getBaseObject(), - array.getBaseOffset(), - newArray.getBaseObject(), - newArray.getBaseOffset(), - pos * 8L); - consumer.freeArray(array); array = newArray; usableCapacity = getUsableCapacity(); } @@ -347,6 +343,11 @@ public long getCurrentPageNumber() { * {@code next()} will return the same mutable object. */ public UnsafeSorterIterator getSortedIterator() { + if (numRecords() == 0) { + // `array` might be null, so make sure that it is not accessed by returning early. + return new SortedIterator(0, 0); + } + int offset = 0; long start = System.nanoTime(); if (sortComparator != null) { diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java index a1b66ccfaef03..dc2b4814c8284 100644 --- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java +++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java @@ -23,7 +23,6 @@ import java.util.LinkedList; import java.util.UUID; -import org.hamcrest.Matchers; import scala.Tuple2$; import org.junit.After; @@ -38,7 +37,6 @@ import org.apache.spark.executor.TaskMetrics; import org.apache.spark.internal.config.package$; import org.apache.spark.memory.TestMemoryManager; -import org.apache.spark.memory.SparkOutOfMemoryError; import org.apache.spark.memory.TaskMemoryManager; import org.apache.spark.serializer.JavaSerializer; import org.apache.spark.serializer.SerializerInstance; @@ -581,40 +579,28 @@ public void testGetIterator() throws Exception { } @Test - public void testOOMDuringSpill() throws Exception { + public void testNoOOMDuringSpill() throws Exception { final UnsafeExternalSorter sorter = newSorter(); - // we assume that given default configuration, - // the size of the data we insert to the sorter (ints) - // and assuming we shouldn't spill before pointers array is exhausted - // (memory manager is not configured to throw at this point) - // - so this loop runs a reasonable number of iterations (<2000). - // test indeed completed within <30ms (on a quad i7 laptop). - for (int i = 0; sorter.hasSpaceForAnotherRecord(); ++i) { + for (int i = 0; i < 100; i++) { insertNumber(sorter, i); } - // we expect the next insert to attempt growing the pointerssArray first - // allocation is expected to fail, then a spill is triggered which - // attempts another allocation which also fails and we expect to see this - // OOM here. the original code messed with a released array within the - // spill code and ended up with a failed assertion. we also expect the - // location of the OOM to be - // org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset - memoryManager.markconsequentOOM(2); - try { - insertNumber(sorter, 1024); - fail("expected OutOfMmoryError but it seems operation surprisingly succeeded"); - } - // we expect an SparkOutOfMemoryError here, anything else (i.e the original NPE is a failure) - catch (SparkOutOfMemoryError oom){ - String oomStackTrace = Utils.exceptionString(oom); - assertThat("expected SparkOutOfMemoryError in " + - "org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset", - oomStackTrace, - Matchers.containsString( - "org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset")); + + // Check that spilling still succeeds when the task is starved for memory. + memoryManager.markconsequentOOM(Integer.MAX_VALUE); + sorter.spill(); + memoryManager.resetConsequentOOM(); + + // Ensure that records can be appended after spilling, i.e. check that the sorter will allocate + // the new pointer array that it could not allocate while spilling. + for (int i = 0; i < 100; ++i) { + insertNumber(sorter, i); } + + sorter.cleanupResources(); + assertSpillFilesWereCleanedUp(); } + private void verifyIntIterator(UnsafeSorterIterator iter, int start, int end) throws IOException { for (int i = start; i < end; i++) { diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java index 2b8a0602730e1..9d4909ddce792 100644 --- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java +++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java @@ -20,6 +20,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; +import org.apache.spark.unsafe.array.LongArray; import org.junit.Assert; import org.junit.Test; @@ -27,7 +28,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.memory.TestMemoryConsumer; import org.apache.spark.memory.TestMemoryManager; -import org.apache.spark.memory.SparkOutOfMemoryError; import org.apache.spark.memory.TaskMemoryManager; import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.memory.MemoryBlock; @@ -37,7 +37,6 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.isIn; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; public class UnsafeInMemorySorterSuite { @@ -147,7 +146,7 @@ public int compare( } @Test - public void freeAfterOOM() { + public void testNoOOMDuringReset() { final SparkConf sparkConf = new SparkConf(); sparkConf.set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false); @@ -156,12 +155,7 @@ public void freeAfterOOM() { final TaskMemoryManager memoryManager = new TaskMemoryManager( testMemoryManager, 0); final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager); - final MemoryBlock dataPage = memoryManager.allocatePage(2048, consumer); - final Object baseObject = dataPage.getBaseObject(); - // Write the records into the data page: - long position = dataPage.getBaseOffset(); - final HashPartitioner hashPartitioner = new HashPartitioner(4); // Use integer comparison for comparing prefixes (which are partition ids, in this case) final PrefixComparator prefixComparator = PrefixComparators.LONG; final RecordComparator recordComparator = new RecordComparator() { @@ -179,18 +173,24 @@ public int compare( UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, recordComparator, prefixComparator, 100, shouldUseRadixSort()); - testMemoryManager.markExecutionAsOutOfMemoryOnce(); - try { - sorter.reset(); - fail("expected SparkOutOfMemoryError but it seems operation surprisingly succeeded"); - } catch (SparkOutOfMemoryError oom) { - // as expected - } - // [SPARK-21907] this failed on NPE at - // org.apache.spark.memory.MemoryConsumer.freeArray(MemoryConsumer.java:108) - sorter.free(); - // simulate a 'back to back' free. - sorter.free(); + // Ensure that the sorter does not OOM while freeing its memory. + testMemoryManager.markconsequentOOM(Integer.MAX_VALUE); + sorter.freeMemory(); + testMemoryManager.resetConsequentOOM(); + Assert.assertFalse(sorter.hasSpaceForAnotherRecord()); + + // Get the sorter in an usable state again by allocating a new pointer array. + LongArray array = consumer.allocateArray(1000); + sorter.expandPointerArray(array); + + // Ensure that it is safe to call freeMemory() multiple times. + testMemoryManager.markconsequentOOM(Integer.MAX_VALUE); + sorter.freeMemory(); + sorter.freeMemory(); + testMemoryManager.resetConsequentOOM(); + Assert.assertFalse(sorter.hasSpaceForAnotherRecord()); + + assertEquals(0L, memoryManager.cleanUpAllAllocatedMemory()); } } diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala index 60f67699f81be..987f383c9c4fa 100644 --- a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala +++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala @@ -119,6 +119,14 @@ class TestMemoryManager(conf: SparkConf) consequentOOM += n } + /** + * Undos the effects of [[markExecutionAsOutOfMemoryOnce]] and [[markconsequentOOM]] and lets + * calls to [[acquireExecutionMemory()]] (if there is enough memory available). + */ + def resetConsequentOOM(): Unit = synchronized { + consequentOOM = 0 + } + def limit(avail: Long): Unit = synchronized { require(avail >= 0) available = avail From 7766fd13c9e7cb72b97fdfee224d3958fbe882a0 Mon Sep 17 00:00:00 2001 From: Akshat Bordia Date: Tue, 29 Sep 2020 08:38:43 -0500 Subject: [PATCH 0129/1009] [MINOR][DOCS] Fixing log message for better clarity Fixing log message for better clarity. Closes #29870 from akshatb1/master. Lead-authored-by: Akshat Bordia Co-authored-by: Akshat Bordia Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/SparkConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index dbd89d646ae54..427e98e616515 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -568,7 +568,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria // If spark.executor.heartbeatInterval bigger than spark.network.timeout, // it will almost always cause ExecutorLostFailure. See SPARK-22754. require(executorTimeoutThresholdMs > executorHeartbeatIntervalMs, "The value of " + - s"${networkTimeout}=${executorTimeoutThresholdMs}ms must be no less than the value of " + + s"${networkTimeout}=${executorTimeoutThresholdMs}ms must be greater than the value of " + s"${EXECUTOR_HEARTBEAT_INTERVAL.key}=${executorHeartbeatIntervalMs}ms.") } From 711d8dd28afd9af92b025f9908534e5f1d575042 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 29 Sep 2020 16:46:04 +0000 Subject: [PATCH 0130/1009] [SPARK-33018][SQL] Fix estimate statistics issue if child has 0 bytes ### What changes were proposed in this pull request? This pr fix estimate statistics issue if child has 0 bytes. ### Why are the changes needed? The `sizeInBytes` can be `0` when AQE and CBO are enabled(`spark.sql.adaptive.enabled`=true, `spark.sql.cbo.enabled`=true and `spark.sql.cbo.planStats.enabled`=true). This will generate incorrect BroadcastJoin, resulting in Driver OOM. For example: ![SPARK-33018](https://user-images.githubusercontent.com/5399861/94457606-647e3d00-01e7-11eb-85ee-812ae6efe7bb.jpg) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test. Closes #29894 from wangyum/SPARK-33018. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../SizeInBytesOnlyStatsPlanVisitor.scala | 3 ++- .../statsEstimation/JoinEstimationSuite.scala | 22 +++++++++++++++++++ .../StatsEstimationTestBase.scala | 9 +++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala index da36db7ae1f5f..a586988fd3253 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala @@ -53,7 +53,8 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { */ override def default(p: LogicalPlan): Statistics = p match { case p: LeafNode => p.computeStats() - case _: LogicalPlan => Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).product) + case _: LogicalPlan => + Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).filter(_ > 0L).product) } override def visitAggregate(p: Aggregate): Statistics = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala index 6c5a2b247fc23..cdfc863cc0212 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala @@ -551,4 +551,26 @@ class JoinEstimationSuite extends StatsEstimationTestBase { attributeStats = AttributeMap(Nil)) assert(join.stats == expectedStats) } + + test("SPARK-33018 Fix estimate statistics issue if child has 0 bytes") { + case class MyStatsTestPlan( + outputList: Seq[Attribute], + sizeInBytes: BigInt) extends LeafNode { + override def output: Seq[Attribute] = outputList + override def computeStats(): Statistics = Statistics(sizeInBytes = sizeInBytes) + } + + val left = MyStatsTestPlan( + outputList = Seq("key-1-2", "key-2-4").map(nameToAttr), + sizeInBytes = BigInt(100)) + + val right = MyStatsTestPlan( + outputList = Seq("key-1-2", "key-2-3").map(nameToAttr), + sizeInBytes = BigInt(0)) + + val join = Join(left, right, LeftOuter, + Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))), JoinHint.NONE) + + assert(join.stats == Statistics(sizeInBytes = 100)) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala index 9dceca59f5b87..0a27e31b3c9f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala @@ -26,17 +26,20 @@ import org.apache.spark.sql.types.{IntegerType, StringType} trait StatsEstimationTestBase extends SparkFunSuite { - var originalValue: Boolean = false + var originalCBOValue: Boolean = false + var originalPlanStatsValue: Boolean = false override def beforeAll(): Unit = { super.beforeAll() // Enable stats estimation based on CBO. - originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED) + originalCBOValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED) + originalPlanStatsValue = SQLConf.get.getConf(SQLConf.PLAN_STATS_ENABLED) SQLConf.get.setConf(SQLConf.CBO_ENABLED, true) } override def afterAll(): Unit = { - SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue) + SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalCBOValue) + SQLConf.get.setConf(SQLConf.PLAN_STATS_ENABLED, originalPlanStatsValue) super.afterAll() } From cc06266ade5a4eb35089501a3b32736624208d4c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 29 Sep 2020 12:02:45 -0700 Subject: [PATCH 0131/1009] [SPARK-33019][CORE] Use spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=1 by default ### What changes were proposed in this pull request? Apache Spark 3.1's default Hadoop profile is `hadoop-3.2`. Instead of having a warning documentation, this PR aims to use a consistent and safer version of Apache Hadoop file output committer algorithm which is `v1`. This will prevent a silent correctness regression during migration from Apache Spark 2.4/3.0 to Apache Spark 3.1.0. Of course, if there is a user-provided configuration, `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2`, that will be used still. ### Why are the changes needed? Apache Spark provides multiple distributions with Hadoop 2.7 and Hadoop 3.2. `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version` depends on the Hadoop version. Apache Hadoop 3.0 switches the default algorithm from `v1` to `v2` and now there exists a discussion to remove `v2`. We had better provide a consistent default behavior of `v1` across various Spark distributions. - [MAPREDUCE-7282](https://issues.apache.org/jira/browse/MAPREDUCE-7282) MR v2 commit algorithm should be deprecated and not the default ### Does this PR introduce _any_ user-facing change? Yes. This changes the default behavior. Users can override this conf. ### How was this patch tested? Manual. **BEFORE (spark-3.0.1-bin-hadoop3.2)** ```scala scala> sc.version res0: String = 3.0.1 scala> sc.hadoopConfiguration.get("mapreduce.fileoutputcommitter.algorithm.version") res1: String = 2 ``` **AFTER** ```scala scala> sc.hadoopConfiguration.get("mapreduce.fileoutputcommitter.algorithm.version") res0: String = 1 ``` Closes #29895 from dongjoon-hyun/SPARK-DEFAUT-COMMITTER. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/deploy/SparkHadoopUtil.scala | 3 +++ docs/configuration.md | 10 ++-------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 1180501e8c738..6f799a542bc1e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -462,6 +462,9 @@ private[spark] object SparkHadoopUtil { for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) { hadoopConf.set(key.substring("spark.hadoop.".length), value) } + if (conf.getOption("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version").isEmpty) { + hadoopConf.set("mapreduce.fileoutputcommitter.algorithm.version", "1") + } } private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = { diff --git a/docs/configuration.md b/docs/configuration.md index 8b6ae9d777cce..d825a589dfd31 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1761,16 +1761,10 @@ Apart from these, the following properties are also available, and may be useful spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version - Dependent on environment + 1 The file output committer algorithm version, valid algorithm version number: 1 or 2. - Version 2 may have better performance, but version 1 may handle failures better in certain situations, - as per MAPREDUCE-4815. - The default value depends on the Hadoop version used in an environment: - 1 for Hadoop versions lower than 3.0 - 2 for Hadoop versions 3.0 and higher - It's important to note that this can change back to 1 again in the future once MAPREDUCE-7282 - is fixed and merged. + Note that 2 may cause a correctness issue like MAPREDUCE-7282. 2.2.0 From 3a299aa6480ac22501512cd0310d31a441d7dfdc Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 30 Sep 2020 21:37:29 +0900 Subject: [PATCH 0132/1009] [SPARK-32741][SQL] Check if the same ExprId refers to the unique attribute in logical plans ### What changes were proposed in this pull request? Some plan transformations (e.g., `RemoveNoopOperators`) implicitly assume the same `ExprId` refers to the unique attribute. But, `RuleExecutor` does not check this integrity between logical plan transformations. So, this PR intends to add this check in `isPlanIntegral` of `Analyzer`/`Optimizer`. This PR comes from the talk with cloud-fan viirya in https://github.com/apache/spark/pull/29485#discussion_r475346278 ### Why are the changes needed? For better logical plan integrity checking. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29585 from maropu/PlanIntegrityTest. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/analysis/Analyzer.scala | 11 ++- .../sql/catalyst/optimizer/Optimizer.scala | 15 ++-- .../sql/catalyst/optimizer/subquery.scala | 51 ++++++++------ .../catalyst/plans/logical/LogicalPlan.scala | 70 +++++++++++++++++++ .../optimizer/FoldablePropagationSuite.scala | 4 +- .../logical/LogicalPlanIntegritySuite.scala | 51 ++++++++++++++ .../sql/execution/adaptive/AQEOptimizer.scala | 8 ++- .../spark/sql/streaming/StreamSuite.scala | 7 +- 8 files changed, 181 insertions(+), 36 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6e1f371b1a2b5..77a6631b250e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -48,6 +48,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.util.Utils /** * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]]. @@ -136,6 +137,10 @@ class Analyzer( private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog + override protected def isPlanIntegral(plan: LogicalPlan): Boolean = { + !Utils.isTesting || LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan) + } + override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts) // Only for tests. @@ -2777,8 +2782,8 @@ class Analyzer( // a resolved Aggregate will not have Window Functions. case f @ UnresolvedHaving(condition, a @ Aggregate(groupingExprs, aggregateExprs, child)) if child.resolved && - hasWindowFunction(aggregateExprs) && - a.expressions.forall(_.resolved) => + hasWindowFunction(aggregateExprs) && + a.expressions.forall(_.resolved) => val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) // Create an Aggregate operator to evaluate aggregation functions. val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) @@ -2795,7 +2800,7 @@ class Analyzer( // Aggregate without Having clause. case a @ Aggregate(groupingExprs, aggregateExprs, child) if hasWindowFunction(aggregateExprs) && - a.expressions.forall(_.resolved) => + a.expressions.forall(_.resolved) => val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) // Create an Aggregate operator to evaluate aggregation functions. val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 94970740d8d91..f2360150e47b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.mutable import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions._ @@ -44,9 +43,11 @@ abstract class Optimizer(catalogManager: CatalogManager) // Currently we check after the execution of each rule if a plan: // - is still resolved // - only host special expressions in supported operators + // - has globally-unique attribute IDs override protected def isPlanIntegral(plan: LogicalPlan): Boolean = { !Utils.isTesting || (plan.resolved && - plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty) + plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty && + LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan)) } override protected val excludedOnceBatches: Set[String] = @@ -1585,14 +1586,14 @@ object ReplaceDistinctWithAggregate extends Rule[LogicalPlan] { * Replaces logical [[Deduplicate]] operator with an [[Aggregate]] operator. */ object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case Deduplicate(keys, child) if !child.isStreaming => + def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput { + case d @ Deduplicate(keys, child) if !child.isStreaming => val keyExprIds = keys.map(_.exprId) val aggCols = child.output.map { attr => if (keyExprIds.contains(attr.exprId)) { attr } else { - Alias(new First(attr).toAggregateExpression(), attr.name)(attr.exprId) + Alias(new First(attr).toAggregateExpression(), attr.name)() } } // SPARK-22951: Physical aggregate operators distinguishes global aggregation and grouping @@ -1601,7 +1602,9 @@ object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] { // we append a literal when the grouping key list is empty so that the result aggregate // operator is properly treated as a grouping aggregation. val nonemptyKeys = if (keys.isEmpty) Literal(1) :: Nil else keys - Aggregate(nonemptyKeys, aggCols, child) + val newAgg = Aggregate(nonemptyKeys, aggCols, child) + val attrMapping = d.output.zip(newAgg.output) + newAgg -> attrMapping } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 7b696912aa465..a168dcd7a83f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -338,15 +338,20 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { /** * Extract all correlated scalar subqueries from an expression. The subqueries are collected using - * the given collector. The expression is rewritten and returned. + * the given collector. To avoid the reuse of `exprId`s, this method generates new `exprId` + * for the subqueries and rewrite references in the given `expression`. + * This method returns extracted subqueries and the corresponding `exprId`s and these values + * will be used later in `constructLeftJoins` for building the child plan that + * returns subquery output with the `exprId`s. */ private def extractCorrelatedScalarSubqueries[E <: Expression]( expression: E, - subqueries: ArrayBuffer[ScalarSubquery]): E = { + subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): E = { val newExpression = expression transform { case s: ScalarSubquery if s.children.nonEmpty => - subqueries += s - s.plan.output.head + val newExprId = NamedExpression.newExprId + subqueries += s -> newExprId + s.plan.output.head.withExprId(newExprId) } newExpression.asInstanceOf[E] } @@ -510,16 +515,16 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { */ private def constructLeftJoins( child: LogicalPlan, - subqueries: ArrayBuffer[ScalarSubquery]): LogicalPlan = { + subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): LogicalPlan = { subqueries.foldLeft(child) { - case (currentChild, ScalarSubquery(query, conditions, _)) => + case (currentChild, (ScalarSubquery(query, conditions, _), newExprId)) => val origOutput = query.output.head val resultWithZeroTups = evalSubqueryOnZeroTups(query) if (resultWithZeroTups.isEmpty) { // CASE 1: Subquery guaranteed not to have the COUNT bug Project( - currentChild.output :+ origOutput, + currentChild.output :+ Alias(origOutput, origOutput.name)(exprId = newExprId), Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE)) } else { // Subquery might have the COUNT bug. Add appropriate corrections. @@ -544,7 +549,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { Alias( If(IsNull(alwaysTrueRef), resultWithZeroTups.get, - aggValRef), origOutput.name)(exprId = origOutput.exprId), + aggValRef), origOutput.name)(exprId = newExprId), Join(currentChild, Project(query.output :+ alwaysTrueExpr, query), LeftOuter, conditions.reduceOption(And), JoinHint.NONE)) @@ -571,7 +576,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { (IsNull(alwaysTrueRef), resultWithZeroTups.get), (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))), aggValRef), - origOutput.name)(exprId = origOutput.exprId) + origOutput.name)(exprId = newExprId) Project( currentChild.output :+ caseExpr, @@ -588,36 +593,42 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { * Rewrite [[Filter]], [[Project]] and [[Aggregate]] plans containing correlated scalar * subqueries. */ - def apply(plan: LogicalPlan): LogicalPlan = plan transform { + def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput { case a @ Aggregate(grouping, expressions, child) => - val subqueries = ArrayBuffer.empty[ScalarSubquery] + val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) if (subqueries.nonEmpty) { // We currently only allow correlated subqueries in an aggregate if they are part of the // grouping expressions. As a result we need to replace all the scalar subqueries in the // grouping expressions by their result. val newGrouping = grouping.map { e => - subqueries.find(_.semanticEquals(e)).map(_.plan.output.head).getOrElse(e) + subqueries.find(_._1.semanticEquals(e)).map(_._1.plan.output.head).getOrElse(e) } - Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries)) + val newAgg = Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries)) + val attrMapping = a.output.zip(newAgg.output) + newAgg -> attrMapping } else { - a + a -> Nil } case p @ Project(expressions, child) => - val subqueries = ArrayBuffer.empty[ScalarSubquery] + val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) if (subqueries.nonEmpty) { - Project(newExpressions, constructLeftJoins(child, subqueries)) + val newProj = Project(newExpressions, constructLeftJoins(child, subqueries)) + val attrMapping = p.output.zip(newProj.output) + newProj -> attrMapping } else { - p + p -> Nil } case f @ Filter(condition, child) => - val subqueries = ArrayBuffer.empty[ScalarSubquery] + val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] val newCondition = extractCorrelatedScalarSubqueries(condition, subqueries) if (subqueries.nonEmpty) { - Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries))) + val newProj = Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries))) + val attrMapping = f.output.zip(newProj.output) + newProj -> attrMapping } else { - f + f -> Nil } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 96c550616065a..48dfc5fd57e63 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -203,3 +203,73 @@ abstract class BinaryNode extends LogicalPlan { abstract class OrderPreservingUnaryNode extends UnaryNode { override final def outputOrdering: Seq[SortOrder] = child.outputOrdering } + +object LogicalPlanIntegrity { + + private def canGetOutputAttrs(p: LogicalPlan): Boolean = { + p.resolved && !p.expressions.exists { e => + e.collectFirst { + // We cannot call `output` in plans with a `ScalarSubquery` expr having no column, + // so, we filter out them in advance. + case s: ScalarSubquery if s.plan.schema.fields.isEmpty => true + }.isDefined + } + } + + /** + * Since some logical plans (e.g., `Union`) can build `AttributeReference`s in their `output`, + * this method checks if the same `ExprId` refers to attributes having the same data type + * in plan output. + */ + def hasUniqueExprIdsForOutput(plan: LogicalPlan): Boolean = { + val exprIds = plan.collect { case p if canGetOutputAttrs(p) => + // NOTE: we still need to filter resolved expressions here because the output of + // some resolved logical plans can have unresolved references, + // e.g., outer references in `ExistenceJoin`. + p.output.filter(_.resolved).map { a => (a.exprId, a.dataType) } + }.flatten + + val ignoredExprIds = plan.collect { + // NOTE: `Union` currently reuses input `ExprId`s for output references, but we cannot + // simply modify the code for assigning new `ExprId`s in `Union#output` because + // the modification will make breaking changes (See SPARK-32741(#29585)). + // So, this check just ignores the `exprId`s of `Union` output. + case u: Union if u.resolved => u.output.map(_.exprId) + }.flatten.toSet + + val groupedDataTypesByExprId = exprIds.filterNot { case (exprId, _) => + ignoredExprIds.contains(exprId) + }.groupBy(_._1).values.map(_.distinct) + + groupedDataTypesByExprId.forall(_.length == 1) + } + + /** + * This method checks if reference `ExprId`s are not reused when assigning a new `ExprId`. + * For example, it returns false if plan transformers create an alias having the same `ExprId` + * with one of reference attributes, e.g., `a#1 + 1 AS a#1`. + */ + def checkIfSameExprIdNotReused(plan: LogicalPlan): Boolean = { + plan.collect { case p if p.resolved => + p.expressions.forall { + case a: Alias => + // Even if a plan is resolved, `a.references` can return unresolved references, + // e.g., in `Grouping`/`GroupingID`, so we need to filter out them and + // check if the same `exprId` in `Alias` does not exist + // among reference `exprId`s. + !a.references.filter(_.resolved).map(_.exprId).exists(_ == a.exprId) + case _ => + true + } + }.forall(identity) + } + + /** + * This method checks if the same `ExprId` refers to an unique attribute in a plan tree. + * Some plan transformers (e.g., `RemoveNoopOperators`) rewrite logical + * plans based on this assumption. + */ + def checkIfExprIdsAreGloballyUnique(plan: LogicalPlan): Boolean = { + checkIfSameExprIdNotReused(plan) && hasUniqueExprIdsForOutput(plan) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala index fe43e8e288673..92e4fa345e2ad 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala @@ -156,8 +156,8 @@ class FoldablePropagationSuite extends PlanTest { val query = expand.where(a1.isNotNull).select(a1, a2).analyze val optimized = Optimize.execute(query) val correctExpand = expand.copy(projections = Seq( - Seq(Literal(null), c2), - Seq(c1, Literal(null)))) + Seq(Literal(null), Literal(2)), + Seq(Literal(1), Literal(null)))) val correctAnswer = correctExpand.where(a1.isNotNull).select(a1, a2).analyze comparePlans(optimized, correctAnswer) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala new file mode 100644 index 0000000000000..6f342b8d94379 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.types.LongType + +class LogicalPlanIntegritySuite extends PlanTest { + import LogicalPlanIntegrity._ + + case class OutputTestPlan(child: LogicalPlan, output: Seq[Attribute]) extends UnaryNode { + override val analyzed = true + } + + test("Checks if the same `ExprId` refers to a semantically-equal attribute in a plan output") { + val t = LocalRelation('a.int, 'b.int) + assert(hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output))) + assert(!hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output.zipWithIndex.map { + case (a, i) => AttributeReference(s"c$i", LongType)(a.exprId) + }))) + } + + test("Checks if reference ExprIds are not reused when assigning a new ExprId") { + val t = LocalRelation('a.int, 'b.int) + val Seq(a, b) = t.output + assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")()))) + assert(!checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = a.exprId)))) + assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = b.exprId)))) + assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")()))) + assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = a.exprId)))) + assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = b.exprId)))) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala index c82b264a600ef..0170f8b2f71c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.adaptive -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity, PlanHelper} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.Utils @@ -54,4 +54,10 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] { } } } + + override protected def isPlanIntegral(plan: LogicalPlan): Boolean = { + !Utils.isTesting || (plan.resolved && + plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty && + LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 9f3ff1a6708e4..8797e5ad64149 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -36,7 +36,6 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.logical.Range import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2} -import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_MILLIS import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.{LocalLimitExec, SimpleMode, SparkPlan} import org.apache.spark.sql.execution.command.ExplainCommand @@ -47,7 +46,7 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.StreamSourceProvider import org.apache.spark.sql.streaming.util.{BlockOnStopSourceProvider, StreamManualClock} -import org.apache.spark.sql.types.{IntegerType, StructField, StructType} +import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType} import org.apache.spark.util.Utils class StreamSuite extends StreamTest { @@ -1268,7 +1267,7 @@ class StreamSuite extends StreamTest { } abstract class FakeSource extends StreamSourceProvider { - private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil) + private val fakeSchema = StructType(StructField("a", LongType) :: Nil) override def sourceSchema( spark: SQLContext, @@ -1290,7 +1289,7 @@ class FakeDefaultSource extends FakeSource { new Source { private var offset = -1L - override def schema: StructType = StructType(StructField("a", IntegerType) :: Nil) + override def schema: StructType = StructType(StructField("a", LongType) :: Nil) override def getOffset: Option[Offset] = { if (offset >= 10) { From ece8d8e22cf7e3924e44c16f58028c323dc54356 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 30 Sep 2020 09:27:57 -0700 Subject: [PATCH 0133/1009] [SPARK-33006][K8S][DOCS] Add dynamic PVC usage example into K8s doc ### What changes were proposed in this pull request? This updates K8s document to describe new dynamic PVC features. ### Why are the changes needed? This will help the user use the new features easily. ### Does this PR introduce _any_ user-facing change? Yes, but it's a doc updates. ### How was this patch tested? Manual. Screen Shot 2020-09-28 at 3 54 53 PM Screen Shot 2020-09-28 at 3 55 07 PM Closes #29897 from dongjoon-hyun/SPARK-33006. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index d0c6012e00aa6..e9c292d21fd47 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -307,7 +307,18 @@ And, the claim name of a `persistentVolumeClaim` with volume name `checkpointpvc spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=check-point-pvc-claim ``` -The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`. For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below. +The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`. + +For example, you can mount a dynamically-created persistent volume claim per executor by using `OnDemand` as a claim name and `storageClass` and `sizeLimit` options like the following. This is useful in case of [Dynamic Allocation](configuration.html#dynamic-allocation). +``` +spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName=OnDemand +spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass=gp +spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit=500Gi +spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path=/data +spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly=false +``` + +For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below. ## Local Storage @@ -318,6 +329,15 @@ Spark supports using volumes to spill data during shuffles and other operations. --conf spark.kubernetes.driver.volumes.[VolumeType].spark-local-dir-[VolumeName].mount.readOnly=false ``` +Specifically, you can use persistent volume claims if the jobs require large shuffle and sorting operations in executors. + +``` +spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand +spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp +spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=500Gi +spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data +spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false +``` If no volume is set as local storage, Spark uses temporary scratch space to spill data to disk during shuffles and other operations. When using Kubernetes as the resource manager the pods will be created with an [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) volume mounted for each directory listed in `spark.local.dir` or the environment variable `SPARK_LOCAL_DIRS` . If no directories are explicitly specified then a default directory is created and configured appropriately. From 3bdbb5546d2517dda6f71613927cc1783c87f319 Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Thu, 1 Oct 2020 08:15:53 +0900 Subject: [PATCH 0134/1009] [SPARK-31753][SQL][DOCS][FOLLOW-UP] Add missing keywords in the SQL docs ### What changes were proposed in this pull request? update sql-ref docs, the following key words will be added in this PR. CLUSTERED BY SORTED BY INTO num_buckets BUCKETS ### Why are the changes needed? let more users know the sql key words usage ### Does this PR introduce _any_ user-facing change? No ![image](https://user-images.githubusercontent.com/46367746/94428281-0a6b8080-01c3-11eb-9ff3-899f8da602ca.png) ![image](https://user-images.githubusercontent.com/46367746/94428285-0d667100-01c3-11eb-8a54-90e7641d917b.png) ![image](https://user-images.githubusercontent.com/46367746/94428288-0f303480-01c3-11eb-9e1d-023538aa6e2d.png) ### How was this patch tested? generate html test Closes #29883 from GuoPhilipse/add-sql-missing-keywords. Lead-authored-by: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Co-authored-by: GuoPhilipse Signed-off-by: Takeshi Yamamuro --- ...-ref-syntax-ddl-create-table-datasource.md | 7 +++- ...-ref-syntax-ddl-create-table-hiveformat.md | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md index d334447a91011..ba0516afbbfad 100644 --- a/docs/sql-ref-syntax-ddl-create-table-datasource.md +++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md @@ -67,7 +67,12 @@ as any order. For example, you can write COMMENT table_comment after TBLPROPERTI * **SORTED BY** - Determines the order in which the data is stored in buckets. Default is Ascending order. + Specifies an ordering of bucket columns. Optionally, one can use ASC for an ascending order or DESC for a descending order after any column names in the SORTED BY clause. + If not specified, ASC is assumed by default. + +* **INTO num_buckets BUCKETS** + + Specifies buckets numbers, which is used in `CLUSTERED BY` clause. * **LOCATION** diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md index 7bf847df98150..3a8c8d5b1160a 100644 --- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md +++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md @@ -31,6 +31,9 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier [ COMMENT table_comment ] [ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... ) | ( col_name1, col_name2, ... ) ] + [ CLUSTERED BY ( col_name1, col_name2, ...) + [ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ] + INTO num_buckets BUCKETS ] [ ROW FORMAT row_format ] [ STORED AS file_format ] [ LOCATION path ] @@ -65,6 +68,21 @@ as any order. For example, you can write COMMENT table_comment after TBLPROPERTI Partitions are created on the table, based on the columns specified. +* **CLUSTERED BY** + + Partitions created on the table will be bucketed into fixed buckets based on the column specified for bucketing. + + **NOTE:** Bucketing is an optimization technique that uses buckets (and bucketing columns) to determine data partitioning and avoid data shuffle. + +* **SORTED BY** + + Specifies an ordering of bucket columns. Optionally, one can use ASC for an ascending order or DESC for a descending order after any column names in the SORTED BY clause. + If not specified, ASC is assumed by default. + +* **INTO num_buckets BUCKETS** + + Specifies buckets numbers, which is used in `CLUSTERED BY` clause. + * **row_format** Use the `SERDE` clause to specify a custom SerDe for one table. Otherwise, use the `DELIMITED` clause to use the native SerDe and specify the delimiter, escape character, null character and so on. @@ -203,6 +221,20 @@ CREATE EXTERNAL TABLE family (id INT, name STRING) STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat' OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat' LOCATION '/tmp/family/'; + +--Use `CLUSTERED BY` clause to create bucket table without `SORTED BY` +CREATE TABLE clustered_by_test1 (ID INT, AGE STRING) + CLUSTERED BY (ID) + INTO 4 BUCKETS + STORED AS ORC + +--Use `CLUSTERED BY` clause to create bucket table with `SORTED BY` +CREATE TABLE clustered_by_test2 (ID INT, NAME STRING) + PARTITIONED BY (YEAR STRING) + CLUSTERED BY (ID, NAME) + SORTED BY (ID ASC) + INTO 3 BUCKETS + STORED AS PARQUET ``` ### Related Statements From d75222dd1b0cdaaa7c22964e974117923fd069bb Mon Sep 17 00:00:00 2001 From: jlafleche Date: Wed, 30 Sep 2020 19:00:18 -0700 Subject: [PATCH 0135/1009] [SPARK-33012][BUILD][K8S] Upgrade fabric8 to 4.10.3 ### What changes were proposed in this pull request? This PR aims to upgrade `kubernetes-client` library to track fabric8's declared compatibility for k8s 1.18.0: https://github.com/fabric8io/kubernetes-client#compatibility-matrix ### Why are the changes needed? According to fabric8, 4.9.2 is incompatible with k8s 1.18.0. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Not tested yet. Closes #29888 from laflechejonathan/jlf/fabric8Ugprade. Authored-by: jlafleche Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 28 +++++++++++++++---- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 28 +++++++++++++++---- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 28 +++++++++++++++---- resource-managers/kubernetes/core/pom.xml | 2 +- .../kubernetes/integration-tests/pom.xml | 2 +- 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 index 900ee6d18d06d..fef1a6442cd33 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 @@ -139,14 +139,31 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar -kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar -kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar +kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar +kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar +kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar +kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar +kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar +kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar +kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar +kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar +kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar +kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar +kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar +kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar +kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar +kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar +kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar +kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar +kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar +kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar +kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar +kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar log4j/1.2.17//log4j-1.2.17.jar -logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar +logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.7.1//lz4-java-1.7.1.jar machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar @@ -159,9 +176,10 @@ metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar objenesis/2.6//objenesis-2.6.jar -okhttp/3.12.6//okhttp-3.12.6.jar +okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar +openshift-model/4.10.3//openshift-model-4.10.3.jar orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar orc-shims/1.5.10//orc-shims-1.5.10.jar diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 9e167256236c0..6d1934b46261b 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -154,14 +154,31 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar -kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar -kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar +kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar +kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar +kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar +kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar +kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar +kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar +kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar +kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar +kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar +kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar +kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar +kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar +kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar +kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar +kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar +kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar +kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar +kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar +kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar +kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar log4j/1.2.17//log4j-1.2.17.jar -logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar +logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.7.1//lz4-java-1.7.1.jar machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar @@ -174,9 +191,10 @@ metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar objenesis/2.6//objenesis-2.6.jar -okhttp/3.12.6//okhttp-3.12.6.jar +okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar +openshift-model/4.10.3//openshift-model-4.10.3.jar orc-core/1.5.10//orc-core-1.5.10.jar orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar orc-shims/1.5.10//orc-shims-1.5.10.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b44b461014cd7..2e29d831b9e66 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -166,14 +166,31 @@ kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar kerby-util/1.0.1//kerby-util-1.0.1.jar kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar -kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar -kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar +kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar +kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar +kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar +kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar +kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar +kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar +kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar +kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar +kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar +kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar +kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar +kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar +kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar +kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar +kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar +kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar +kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar +kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar +kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar +kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar log4j/1.2.17//log4j-1.2.17.jar -logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar +logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.7.1//lz4-java-1.7.1.jar machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar @@ -188,9 +205,10 @@ netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar okhttp/2.7.5//okhttp-2.7.5.jar -okhttp/3.12.6//okhttp-3.12.6.jar +okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar +openshift-model/4.10.3//openshift-model-4.10.3.jar orc-core/1.5.10//orc-core-1.5.10.jar orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar orc-shims/1.5.10//orc-shims-1.5.10.jar diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index c1a7dafb69c46..a4c80f551cdfc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -30,7 +30,7 @@ kubernetes - 4.9.2 + 4.10.3 diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 4a55ead38aae2..952081030f5f3 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -28,7 +28,7 @@ 1.3.0 - 4.9.2 + 4.10.3 kubernetes-integration-tests From 0b5a379c1fb87aa536ebe9433e501dcf4f80ea60 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 30 Sep 2020 19:24:50 -0700 Subject: [PATCH 0136/1009] [SPARK-33023][CORE] Judge path of Windows need add condition `Utils.isWindows` ### What changes were proposed in this pull request? according to https://github.com/apache/spark/pull/29881#discussion_r496648397 we need add condition `Utils.isWindows` ### Why are the changes needed? add strict condition of judging path is window path ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No Closes #29909 from AngersZhuuuu/SPARK-33023. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 409e3065492b0..501e865c4105a 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1899,7 +1899,7 @@ class SparkContext(config: SparkConf) extends Logging { if (path == null || path.isEmpty) { logWarning("null or empty path specified as parameter to addJar") } else { - val key = if (path.contains("\\")) { + val key = if (path.contains("\\") && Utils.isWindows) { // For local paths with backslashes on Windows, URI throws an exception addLocalJarFile(new File(path)) } else { From 28ed3a512ac6fcaafa885eb8092a68fe9e8f5c26 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 30 Sep 2020 21:30:17 -0700 Subject: [PATCH 0137/1009] [SPARK-32723][WEBUI] Upgrade to jQuery 3.5.1 ### What changes were proposed in this pull request? Upgrade to the latest available version of jQuery (3.5.1). ### Why are the changes needed? There are some CVE-s reported (CVE-2020-11022, CVE-2020-11023) affecting older versions of jQuery. Although Spark UI is read-only and those CVEs doesn't seem to affect Spark, using the latest version of this library can help to handle vulnerability reports of security scans. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual tests and checked the jQuery 3.5 upgrade guide. Closes #29902 from peter-toth/SPARK-32723-upgrade-to-jquery-3.5.1. Authored-by: Peter Toth Signed-off-by: Dongjoon Hyun --- .../resources/org/apache/spark/ui/static/jquery-3.4.1.min.js | 2 -- .../resources/org/apache/spark/ui/static/jquery-3.5.1.min.js | 2 ++ core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 +- dev/.rat-excludes | 2 +- docs/_layouts/global.html | 2 +- docs/js/vendor/jquery-3.4.1.min.js | 2 -- docs/js/vendor/jquery-3.5.1.min.js | 2 ++ 7 files changed, 7 insertions(+), 7 deletions(-) delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-3.5.1.min.js delete mode 100644 docs/js/vendor/jquery-3.4.1.min.js create mode 100644 docs/js/vendor/jquery-3.5.1.min.js diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js deleted file mode 100644 index 07c00cd227da0..0000000000000 --- a/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js +++ /dev/null @@ -1,2 +0,0 @@ -/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */ -!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,""],thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;nx",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="

    ",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function D(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||j,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,j=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function qe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function Le(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function He(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Oe(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Ut,Xt=[],Vt=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Xt.pop()||S.expando+"_"+Ct.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Vt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Vt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Vt,"$1"+r):!1!==e.jsonp&&(e.url+=(Et.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Xt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Ut=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===Ut.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):("number"==typeof f.top&&(f.top+="px"),"number"==typeof f.left&&(f.left+="px"),c.css(f))}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=$e(y.pixelPosition,function(e,t){if(t)return t=Be(e,n),Me.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0 - + diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 98786437f7b1c..0e892a927906a 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -25,7 +25,7 @@ bootstrap.bundle.min.js bootstrap.bundle.min.js.map bootstrap.min.css bootstrap.min.css.map -jquery-3.4.1.min.js +jquery-3.5.1.min.js d3.min.js dagre-d3.min.js graphlib-dot.min.js diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index d6548f0fa9534..5f6cd7c6b7f20 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -168,7 +168,7 @@

    {{ page.title }}

    - + diff --git a/docs/js/vendor/jquery-3.4.1.min.js b/docs/js/vendor/jquery-3.4.1.min.js deleted file mode 100644 index 07c00cd227da0..0000000000000 --- a/docs/js/vendor/jquery-3.4.1.min.js +++ /dev/null @@ -1,2 +0,0 @@ -/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */ -!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,""],thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;nx",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function D(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||j,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,j=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function qe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function Le(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function He(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Oe(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Ut,Xt=[],Vt=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Xt.pop()||S.expando+"_"+Ct.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Vt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Vt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Vt,"$1"+r):!1!==e.jsonp&&(e.url+=(Et.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Xt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Ut=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===Ut.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):("number"==typeof f.top&&(f.top+="px"),"number"==typeof f.left&&(f.left+="px"),c.css(f))}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=$e(y.pixelPosition,function(e,t){if(t)return t=Be(e,n),Me.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0 Date: Thu, 1 Oct 2020 14:50:32 +0900 Subject: [PATCH 0138/1009] [SPARK-32992][SQL] Map Oracle's ROWID type to StringType in read via JDBC ### What changes were proposed in this pull request? Convert the `ROWID` type in the Oracle JDBC dialect to Catalyst's `StringType`. The doc for Oracle 19c says explicitly that the type must be string: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html#GUID-AEF1FE4C-2DE5-4BE7-BB53-83AD8F1E34EF ### Why are the changes needed? To avoid the exception showed in https://stackoverflow.com/questions/52244492/spark-jdbc-dataframereader-fails-to-read-oracle-table-with-datatype-as-rowid ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? N/A Closes #29884 from MaxGekk/jdbc-oracle-rowid-string. Authored-by: Max Gekk Signed-off-by: Takeshi Yamamuro --- .../spark/sql/jdbc/OracleIntegrationSuite.scala | 11 +++++++++++ .../org/apache/spark/sql/jdbc/OracleDialect.scala | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 9c59023cd8766..ce63d1df6f028 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -518,4 +518,15 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark """.stripMargin.replaceAll("\n", " ")) assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult) } + + test("SPARK-32992: map Oracle's ROWID type to StringType") { + val rows = spark.read.format("jdbc") + .option("url", jdbcUrl) + .option("query", "SELECT ROWID from datetime") + .load() + .collect() + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types(0).equals("class java.lang.String")) + assert(!rows(0).getString(0).isEmpty) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 4c0623729e00d..3f12b9acd0fc4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -64,6 +64,12 @@ private case object OracleDialect extends JdbcDialect { => Some(TimestampType) // Value for Timestamp with Time Zone in Oracle case BINARY_FLOAT => Some(FloatType) // Value for OracleTypes.BINARY_FLOAT case BINARY_DOUBLE => Some(DoubleType) // Value for OracleTypes.BINARY_DOUBLE + // scalastyle:off line.size.limit + // According to the documentation for Oracle Database 19c: + // "Values of the ROWID pseudocolumn are strings representing the address of each row." + // https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html#GUID-AEF1FE4C-2DE5-4BE7-BB53-83AD8F1E34EF + // scalastyle:on line.size.limit + case Types.ROWID => Some(StringType) case _ => None } } From d3dbe1a9076c8a76be0590ca071bfbec6114813b Mon Sep 17 00:00:00 2001 From: iRakson Date: Thu, 1 Oct 2020 20:50:16 +0900 Subject: [PATCH 0139/1009] [SQL][DOC][MINOR] Corrects input table names in the examples of CREATE FUNCTION doc ### What changes were proposed in this pull request? Fix Typo ### Why are the changes needed? To maintain consistency. Correct table name should be used for SELECT command. ### Does this PR introduce _any_ user-facing change? Yes. Now CREATE FUNCTION doc will show the correct name of table. ### How was this patch tested? Manually. Doc changes. Closes #29920 from iRakson/fixTypo. Authored-by: iRakson Signed-off-by: Takeshi Yamamuro --- docs/sql-ref-syntax-ddl-create-function.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-ref-syntax-ddl-create-function.md b/docs/sql-ref-syntax-ddl-create-function.md index aa6c1fad7b56b..dfa4f4f8123d8 100644 --- a/docs/sql-ref-syntax-ddl-create-function.md +++ b/docs/sql-ref-syntax-ddl-create-function.md @@ -112,7 +112,7 @@ SHOW USER FUNCTIONS; +------------------+ -- Invoke the function. Every selected value should be incremented by 10. -SELECT simple_udf(c1) AS function_return_value FROM t1; +SELECT simple_udf(c1) AS function_return_value FROM test; +---------------------+ |function_return_value| +---------------------+ @@ -150,7 +150,7 @@ CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR' USING JAR '/tmp/SimpleUdfR.jar'; -- Invoke the function. Every selected value should be incremented by 20. -SELECT simple_udf(c1) AS function_return_value FROM t1; +SELECT simple_udf(c1) AS function_return_value FROM test; +---------------------+ |function_return_value| +---------------------+ From 0963fcd848f62b4f2231dfcf67f9beabf927c21e Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 1 Oct 2020 08:37:07 -0500 Subject: [PATCH 0140/1009] [SPARK-33024][SQL] Fix CodeGen fallback issue of UDFSuite in Scala 2.13 ### What changes were proposed in this pull request? After `SPARK-32851` set `CODEGEN_FACTORY_MODE` to `CODEGEN_ONLY` of `sparkConf` in `SharedSparkSessionBase` to construction `SparkSession` in test, the test suite `SPARK-32459: UDF should not fail on WrappedArray` in s.sql.UDFSuite exposed a codegen fallback issue in Scala 2.13 as follow: ``` - SPARK-32459: UDF should not fail on WrappedArray *** FAILED *** Caused by: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 47, Column 99: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 47, Column 99: No applicable constructor/method found for zero actual parameters; candidates are: "public scala.collection.mutable.Builder scala.collection.mutable.ArraySeq$.newBuilder(java.lang.Object)", "public scala.collection.mutable.Builder scala.collection.mutable.ArraySeq$.newBuilder(scala.reflect.ClassTag)", "public abstract scala.collection.mutable.Builder scala.collection.EvidenceIterableFactory.newBuilder(java.lang.Object)" ``` The root cause is `WrappedArray` represent `mutable.ArraySeq` in Scala 2.13 and has a different constructor of `newBuilder` method. The main change of is pr is add Scala 2.13 only code part to deal with `case match WrappedArray` in Scala 2.13. ### Why are the changes needed? We need to support a Scala 2.13 build ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Scala 2.12: Pass the Jenkins or GitHub Action - Scala 2.13: All tests passed. Do the following: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl sql/core -Pscala-2.13 -am mvn test -pl sql/core -Pscala-2.13 ``` **Before** ``` Tests: succeeded 8540, failed 1, canceled 1, ignored 52, pending 0 *** 1 TEST FAILED *** ``` **After** ``` Tests: succeeded 8541, failed 0, canceled 1, ignored 52, pending 0 All tests passed. ``` Closes #29903 from LuciferYang/fix-udfsuite. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../expressions/objects/objects.scala | 53 ++++++++++++++----- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 7933d05c8dba4..9701420e65870 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -22,7 +22,7 @@ import java.lang.reflect.{Method, Modifier} import scala.collection.JavaConverters._ import scala.collection.mutable.{Builder, IndexedSeq, WrappedArray} import scala.reflect.ClassTag -import scala.util.Try +import scala.util.{Properties, Try} import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.serializer._ @@ -916,19 +916,44 @@ case class MapObjects private( val (initCollection, addElement, getResult): (String, String => String, String) = customCollectionCls match { case Some(cls) if classOf[WrappedArray[_]].isAssignableFrom(cls) => - // Scala WrappedArray - val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()" - val builder = ctx.freshName("collectionBuilder") - ( - s""" - ${classOf[Builder[_, _]].getName} $builder = $getBuilder; - $builder.sizeHint($dataLength); - """, - (genValue: String) => s"$builder.$$plus$$eq($genValue);", - s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." + - s"MODULE$$.make(((${classOf[IndexedSeq[_]].getName})$builder" + - s".result()).toArray(scala.reflect.ClassTag$$.MODULE$$.Object()));" - ) + def doCodeGenForScala212 = { + // WrappedArray in Scala 2.12 + val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()" + val builder = ctx.freshName("collectionBuilder") + ( + s""" + ${classOf[Builder[_, _]].getName} $builder = $getBuilder; + $builder.sizeHint($dataLength); + """, + (genValue: String) => s"$builder.$$plus$$eq($genValue);", + s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." + + s"MODULE$$.make(((${classOf[IndexedSeq[_]].getName})$builder" + + s".result()).toArray(scala.reflect.ClassTag$$.MODULE$$.Object()));" + ) + } + + def doCodeGenForScala213 = { + // In Scala 2.13, WrappedArray is mutable.ArraySeq and newBuilder method need + // a ClassTag type construction parameter + val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder(" + + s"scala.reflect.ClassTag$$.MODULE$$.Object())" + val builder = ctx.freshName("collectionBuilder") + ( + s""" + ${classOf[Builder[_, _]].getName} $builder = $getBuilder; + $builder.sizeHint($dataLength); + """, + (genValue: String) => s"$builder.$$plus$$eq($genValue);", + s"(${cls.getName})$builder.result();" + ) + } + + val scalaVersion = Properties.versionNumberString + if (scalaVersion.startsWith("2.12")) { + doCodeGenForScala212 + } else { + doCodeGenForScala213 + } case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) || classOf[scala.collection.Set[_]].isAssignableFrom(cls) => // Scala sequence or set From 9c618b33084c8ff6f68e5183e2574ba368fb7758 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 1 Oct 2020 12:41:40 -0700 Subject: [PATCH 0141/1009] [SPARK-33047][BUILD] Upgrade hive-storage-api to 2.7.2 ### What changes were proposed in this pull request? This PR aims to upgrade Apache Hive `hive-storage-api` library from 2.7.1 to 2.7.2. ### Why are the changes needed? [storage-api 2.7.2](https://github.com/apache/hive/commits/rel/storage-release-2.7.2/storage-api) has the following extension and can be used when users uses a provided orc dependency. [HIVE-22959](https://github.com/apache/hive/commit/dade9919d904f8a4bff12a9130c150301a4713ed#diff-ccfc9dd7584117f531322cda3a29f3c3) : Extend storage-api to expose FilterContext [HIVE-23215](https://github.com/apache/hive/commit/361925d2f3675bb9c6566b615a4b53faee335385#diff-ccfc9dd7584117f531322cda3a29f3c3) : Make FilterContext and MutableFilterContext interfaces ### Does this PR introduce _any_ user-facing change? Yes. This is a dependency change. ### How was this patch tested? Pass the existing tests. Closes #29923 from dongjoon-hyun/SPARK-33047. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 6d1934b46261b..7b31bdd98ef26 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -92,7 +92,7 @@ hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar hive-shims/2.3.7//hive-shims-2.3.7.jar -hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar +hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 2e29d831b9e66..960ea5f836ddf 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -91,7 +91,7 @@ hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar hive-shims/2.3.7//hive-shims-2.3.7.jar -hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar +hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar diff --git a/pom.xml b/pom.xml index 873daaa6161ee..421d932cef5fa 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ compile compile provided - 2.7.1 + 2.7.2 compile compile compile From e62d24717eb774f1c7adfd0fbe39640b96bc661d Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 1 Oct 2020 15:58:01 -0400 Subject: [PATCH 0142/1009] [SPARK-32585][SQL] Support scala enumeration in ScalaReflection ### What changes were proposed in this pull request? Add code in `ScalaReflection` to support scala enumeration and make enumeration type as string type in Spark. ### Why are the changes needed? We support java enum but failed with scala enum, it's better to keep the same behavior. Here is a example. ``` package test object TestEnum extends Enumeration { type TestEnum = Value val E1, E2, E3 = Value } import TestEnum._ case class TestClass(i: Int, e: TestEnum) { } import test._ Seq(TestClass(1, TestEnum.E1)).toDS ``` Before this PR ``` Exception in thread "main" java.lang.UnsupportedOperationException: No Encoder found for test.TestEnum.TestEnum - field (class: "scala.Enumeration.Value", name: "e") - root class: "test.TestClass" at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerFor$1(ScalaReflection.scala:567) at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:69) at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:882) at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:881) ``` After this PR `org.apache.spark.sql.Dataset[test.TestClass] = [i: int, e: string]` ### Does this PR introduce _any_ user-facing change? Yes, user can make case class which include scala enumeration field as dataset. ### How was this patch tested? Add test. Closes #29403 from ulysses-you/SPARK-32585. Authored-by: ulysses Signed-off-by: Tathagata Das --- .../spark/sql/catalyst/ScalaReflection.scala | 28 +++++++++++++++++++ .../sql/catalyst/ScalaReflectionSuite.scala | 15 ++++++++++ .../encoders/ExpressionEncoderSuite.scala | 10 ++++++- .../org/apache/spark/sql/DatasetSuite.scala | 15 +++++++++- 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index a9c8b0bf4df2c..c65e181181e83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.util.Utils /** @@ -377,6 +378,23 @@ object ScalaReflection extends ScalaReflection { expressions.Literal.create(null, ObjectType(cls)), newInstance ) + + case t if isSubtype(t, localTypeOf[Enumeration#Value]) => + // package example + // object Foo extends Enumeration { + // type Foo = Value + // val E1, E2 = Value + // } + // the fullName of tpe is example.Foo.Foo, but we need example.Foo so that + // we can call example.Foo.withName to deserialize string to enumeration. + val parent = t.asInstanceOf[TypeRef].pre.typeSymbol.asClass + val cls = mirror.runtimeClass(parent) + StaticInvoke( + cls, + ObjectType(getClassFromType(t)), + "withName", + createDeserializerForString(path, false) :: Nil, + returnNullable = false) } } @@ -561,6 +579,14 @@ object ScalaReflection extends ScalaReflection { } createSerializerForObject(inputObject, fields) + case t if isSubtype(t, localTypeOf[Enumeration#Value]) => + createSerializerForString( + Invoke( + inputObject, + "toString", + ObjectType(classOf[java.lang.String]), + returnNullable = false)) + case _ => throw new UnsupportedOperationException( s"No Encoder found for $tpe\n" + walkedTypePath) @@ -738,6 +764,8 @@ object ScalaReflection extends ScalaReflection { val Schema(dataType, nullable) = schemaFor(fieldType) StructField(fieldName, dataType, nullable) }), nullable = true) + case t if isSubtype(t, localTypeOf[Enumeration#Value]) => + Schema(StringType, nullable = true) case other => throw new UnsupportedOperationException(s"Schema for type $other is not supported") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala index b981a50499bf5..e8c7aed6d72ce 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala @@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp} import scala.reflect.runtime.universe.TypeTag import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.FooEnum.FooEnum import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, If, SpecificInternalRow, UpCast} import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, NewInstance} @@ -90,6 +91,13 @@ case class FooWithAnnotation(f1: String @FooAnnotation, f2: Option[String] @FooA case class SpecialCharAsFieldData(`field.1`: String, `field 2`: String) +object FooEnum extends Enumeration { + type FooEnum = Value + val E1, E2 = Value +} + +case class FooClassWithEnum(i: Int, e: FooEnum) + object TestingUDT { @SQLUserDefinedType(udt = classOf[NestedStructUDT]) class NestedStruct(val a: Integer, val b: Long, val c: Double) @@ -437,4 +445,11 @@ class ScalaReflectionSuite extends SparkFunSuite { StructField("f2", StringType)))) assert(deserializerFor[FooWithAnnotation].dataType == ObjectType(classOf[FooWithAnnotation])) } + + test("SPARK-32585: Support scala enumeration in ScalaReflection") { + assert(serializerFor[FooClassWithEnum].dataType == StructType(Seq( + StructField("i", IntegerType, false), + StructField("e", StringType, true)))) + assert(deserializerFor[FooClassWithEnum].dataType == ObjectType(classOf[FooClassWithEnum])) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index 6a094d4aaddae..f2598a925e08e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{Encoder, Encoders} -import org.apache.spark.sql.catalyst.{OptionalData, PrimitiveData} +import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData} import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.AttributeReference @@ -389,6 +389,14 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes assert(e.getMessage.contains("tuple with more than 22 elements are not supported")) } + encodeDecodeTest((1, FooEnum.E1), "Tuple with Int and scala Enum") + encodeDecodeTest((null, FooEnum.E1, FooEnum.E2), "Tuple with Null and scala Enum") + encodeDecodeTest(Seq(FooEnum.E1, null), "Seq with scala Enum") + encodeDecodeTest(Map("key" -> FooEnum.E1), "Map with String key and scala Enum") + encodeDecodeTest(Map(FooEnum.E1 -> "value"), "Map with scala Enum key and String value") + encodeDecodeTest(FooClassWithEnum(1, FooEnum.E1), "case class with Int and scala Enum") + encodeDecodeTest(FooEnum.E1, "scala Enum") + // Scala / Java big decimals ---------------------------------------------------------- encodeDecodeTest(BigDecimal(("9" * 20) + "." + "9" * 18), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 4923e8b556907..3c914ae043677 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -25,7 +25,7 @@ import org.scalatest.exceptions.TestFailedException import org.scalatest.prop.TableDrivenPropertyChecks._ import org.apache.spark.{SparkException, TaskContext} -import org.apache.spark.sql.catalyst.ScroogeLikeExample +import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExample} import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder} import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} import org.apache.spark.sql.catalyst.util.sideBySide @@ -1926,6 +1926,19 @@ class DatasetSuite extends QueryTest } } } + + test("SPARK-32585: Support scala enumeration in ScalaReflection") { + checkDataset( + Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)).toDS(), + Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)): _* + ) + + // test null + checkDataset( + Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)).toDS(), + Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)): _* + ) + } } object AssertExecutionId { From 005999721f103bce653c39a0001cba7f2e04b7c8 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 1 Oct 2020 18:01:23 -0500 Subject: [PATCH 0143/1009] [SPARK-33046][DOCS] Update how to build doc for Scala 2.13 with sbt ### What changes were proposed in this pull request? This PR fixes the description how to build Spark for Scala 2.13 with sbt. In the current doc, how to build Spark for Scala 2.13 with sbt is described like: ![scala-2 13-build-before](https://user-images.githubusercontent.com/4736016/94816248-80c3e900-0436-11eb-9bc2-99af5786971a.png) But build fails with this command because scala-2.13 profile is not enabled and scala-parallel-collections is absent. ``` [error] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala:23: object parallel is not a member of package collection ``` The correct command should be: ``` build/sbt -Pspark-2.13 compile ``` ### Why are the changes needed? The build command is wrong. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I checked that `sbt -Pspark-2.13` is correct with the following command: ``` build/sbt -Dscala.version=2.13.3 -Phive -Phive-thriftserver -Pyarn -Pkubernetes compile ``` I also build the modified doc and checked the generated html: ![spark-scala-2 13-build-doc-after](https://user-images.githubusercontent.com/4736016/94869259-f2745500-047f-11eb-89e5-20816f3ed24d.png) Closes #29921 from sarutak/fix-scala-2.13-build-doc. Authored-by: Kousuke Saruta Signed-off-by: Sean Owen --- docs/building-spark.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/building-spark.md b/docs/building-spark.md index 3d12a60e2b974..73c527b7a5ed6 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -265,15 +265,13 @@ Change the major Scala version using (e.g. 2.13): ./dev/change-scala-version.sh 2.13 -For Maven, please enable the profile (e.g. 2.13): +Enable the profile (e.g. 2.13): + # For Maven ./build/mvn -Pscala-2.13 compile -For SBT, specify a complete scala version using (e.g. 2.13.0): - - ./build/sbt -Dscala.version=2.13.0 - -Otherwise, the sbt-pom-reader plugin will use the `scala.version` specified in the spark-parent pom. + # For sbt + ./build/sbt -Pscala-2.13 compile ## Running Jenkins tests with Github Enterprise From 8657742ec7570c8292ed45629fc61b9791f28796 Mon Sep 17 00:00:00 2001 From: Shruti Gumma Date: Thu, 1 Oct 2020 16:33:19 -0700 Subject: [PATCH 0144/1009] [SPARK-32996][WEB-UI][FOLLOWUP] Move ExecutorSummarySuite to proper path ### What changes were proposed in this pull request? This change updates the test file location in #29872 to proper path. ### Why are the changes needed? ExecutorSummarySuite.scala should be in core/src/test/scala instead of core/src/test/java. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests Closes #29926 from shrutig/SPARK-32996. Authored-by: Shruti Gumma Signed-off-by: Liang-Chi Hsieh --- .../org/apache/spark/status/api/v1/ExecutorSummarySuite.scala | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/src/test/{java => scala}/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala (100%) diff --git a/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala similarity index 100% rename from core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala rename to core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala From d6f3138352042e33a2291e11c325b8eadb8dd5f2 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Fri, 2 Oct 2020 09:01:15 +0900 Subject: [PATCH 0145/1009] [SPARK-32859][SQL] Introduce physical rule to decide bucketing dynamically ### What changes were proposed in this pull request? This PR is to add support to decide bucketed table scan dynamically based on actual query plan. Currently bucketing is enabled by default (`spark.sql.sources.bucketing.enabled`=true), so for all bucketed tables in the query plan, we will use bucket table scan (all input files per the bucket will be read by same task). This has the drawback that if the bucket table scan is not benefitting at all (no join/groupby/etc in the query), we don't need to use bucket table scan as it would restrict the # of tasks to be # of buckets and might hurt parallelism. The feature is to add a physical plan rule right after `EnsureRequirements`: The rule goes through plan nodes. For all operators which has "interesting partition" (i.e., require `ClusteredDistribution` or `HashClusteredDistribution`), check if the sub-plan for operator has `Exchange` and bucketed table scan (and only allow certain operators in plan (i.e. `Scan/Filter/Project/Sort/PartialAgg/etc`.), see details in `DisableUnnecessaryBucketedScan.disableBucketWithInterestingPartition`). If yes, disable the bucketed table scan in the sub-plan. In addition, disabling bucketed table scan if there's operator with interesting partition along the sub-plan. Why the algorithm works is that if there's a shuffle between the bucketed table scan and operator with interesting partition, then bucketed table scan partitioning will be destroyed by the shuffle operator in the middle, and we don't need bucketed table scan for sure. The idea of "interesting partition" is inspired from "interesting order" in "Access Path Selection in a Relational Database Management System"(http://www.inf.ed.ac.uk/teaching/courses/adbs/AccessPath.pdf), after discussion with cloud-fan . ### Why are the changes needed? To avoid unnecessary bucketed scan in the query, and this is prerequisite for https://github.com/apache/spark/pull/29625 (decide bucketed sorted scan dynamically will be added later in that PR). ### Does this PR introduce _any_ user-facing change? A new config `spark.sql.sources.bucketing.autoBucketedScan.enabled` is introduced which set to false by default (the rule is disabled by default as it can regress cached bucketed table query, see discussion in https://github.com/apache/spark/pull/29804#issuecomment-701151447). User can opt-in/opt-out by enabling/disabling the config, as we found in prod, some users rely on assumption of # of tasks == # of buckets when reading bucket table to precisely control # of tasks. This is a bad assumption but it does happen on our side, so leave a config here to allow them opt-out for the feature. ### How was this patch tested? Added unit tests in `DisableUnnecessaryBucketedScanSuite.scala` Closes #29804 from c21/bucket-rule. Authored-by: Cheng Su Signed-off-by: Takeshi Yamamuro --- .../optimizer/CostBasedJoinReorder.scala | 2 +- .../apache/spark/sql/internal/SQLConf.scala | 13 ++ .../sql/execution/DataSourceScanExec.scala | 37 +-- .../spark/sql/execution/QueryExecution.scala | 3 +- .../DisableUnnecessaryBucketedScan.scala | 161 +++++++++++++ .../apache/spark/sql/DataFrameJoinSuite.scala | 2 +- .../org/apache/spark/sql/SubquerySuite.scala | 2 +- .../DisableUnnecessaryBucketedScanSuite.scala | 221 ++++++++++++++++++ ...saryBucketedScanWithHiveSupportSuite.scala | 31 +++ 9 files changed, 454 insertions(+), 18 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala index 8b019f35263f3..45541051a6b13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala @@ -114,7 +114,7 @@ case class OrderedJoin( /** * Reorder the joins using a dynamic programming algorithm. This implementation is based on the * paper: Access Path Selection in a Relational Database Management System. - * http://www.inf.ed.ac.uk/teaching/courses/adbs/AccessPath.pdf + * https://dl.acm.org/doi/10.1145/582095.582099 * * First we put all items (basic joined nodes) into level 0, then we build all two-way joins * at level 1 from plans at level 0 (single items), then build all 3-way joins from plans diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0d1a3e365c918..18ffc655b2174 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -951,6 +951,17 @@ object SQLConf { .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0") .createWithDefault(100000) + val AUTO_BUCKETED_SCAN_ENABLED = + buildConf("spark.sql.sources.bucketing.autoBucketedScan.enabled") + .doc("When true, decide whether to do bucketed scan on input tables based on query plan " + + "automatically. Do not use bucketed scan if 1. query does not have operators to utilize " + + "bucketing (e.g. join, group-by, etc), or 2. there's an exchange operator between these " + + s"operators and table scan. Note when '${BUCKETING_ENABLED.key}' is set to " + + "false, this configuration does not take any effect.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled") .internal() .doc("When false, we will throw an error if a query contains a cartesian product without " + @@ -3164,6 +3175,8 @@ class SQLConf extends Serializable with Logging { def bucketingMaxBuckets: Int = getConf(SQLConf.BUCKETING_MAX_BUCKETS) + def autoBucketedScanEnabled: Boolean = getConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED) + def dataFrameSelfJoinAutoResolveAmbiguity: Boolean = getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 1b9ca63ea21d3..45d28ddb42fc3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -156,7 +156,9 @@ case class RowDataSourceScanExec( * @param optionalBucketSet Bucket ids for bucket pruning. * @param optionalNumCoalescedBuckets Number of coalesced buckets. * @param dataFilters Filters on non-partition columns. - * @param tableIdentifier identifier for the table in the metastore. + * @param tableIdentifier Identifier for the table in the metastore. + * @param disableBucketedScan Disable bucketed scan based on physical query plan, see rule + * [[DisableUnnecessaryBucketedScan]] for details. */ case class FileSourceScanExec( @transient relation: HadoopFsRelation, @@ -166,7 +168,8 @@ case class FileSourceScanExec( optionalBucketSet: Option[BitSet], optionalNumCoalescedBuckets: Option[Int], dataFilters: Seq[Expression], - tableIdentifier: Option[TableIdentifier]) + tableIdentifier: Option[TableIdentifier], + disableBucketedScan: Boolean = false) extends DataSourceScanExec { // Note that some vals referring the file-based relation are lazy intentionally @@ -257,7 +260,8 @@ case class FileSourceScanExec( // exposed for testing lazy val bucketedScan: Boolean = { - if (relation.sparkSession.sessionState.conf.bucketingEnabled && relation.bucketSpec.isDefined) { + if (relation.sparkSession.sessionState.conf.bucketingEnabled && relation.bucketSpec.isDefined + && !disableBucketedScan) { val spec = relation.bucketSpec.get val bucketColumns = spec.bucketColumnNames.flatMap(n => toAttribute(n)) bucketColumns.size == spec.bucketColumnNames.size @@ -348,20 +352,23 @@ case class FileSourceScanExec( "DataFilters" -> seqToString(dataFilters), "Location" -> locationDesc) - val withSelectedBucketsCount = relation.bucketSpec.map { spec => - val numSelectedBuckets = optionalBucketSet.map { b => - b.cardinality() + // TODO(SPARK-32986): Add bucketed scan info in explain output of FileSourceScanExec + if (bucketedScan) { + relation.bucketSpec.map { spec => + val numSelectedBuckets = optionalBucketSet.map { b => + b.cardinality() + } getOrElse { + spec.numBuckets + } + metadata + ("SelectedBucketsCount" -> + (s"$numSelectedBuckets out of ${spec.numBuckets}" + + optionalNumCoalescedBuckets.map { b => s" (Coalesced to $b)"}.getOrElse(""))) } getOrElse { - spec.numBuckets + metadata } - metadata + ("SelectedBucketsCount" -> - (s"$numSelectedBuckets out of ${spec.numBuckets}" + - optionalNumCoalescedBuckets.map { b => s" (Coalesced to $b)"}.getOrElse(""))) - } getOrElse { + } else { metadata } - - withSelectedBucketsCount } override def verboseStringWithOperatorId(): String = { @@ -539,6 +546,7 @@ case class FileSourceScanExec( .getOrElse(sys.error(s"Invalid bucket file ${f.filePath}")) } + // TODO(SPARK-32985): Decouple bucket filter pruning and bucketed table scan val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) { val bucketSet = optionalBucketSet.get filesGroupedToBuckets.filter { @@ -624,6 +632,7 @@ case class FileSourceScanExec( optionalBucketSet, optionalNumCoalescedBuckets, QueryPlan.normalizePredicates(dataFilters, output), - None) + None, + disableBucketedScan) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index dca2c5b16e8d5..a056500fa361a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule} import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan} -import org.apache.spark.sql.execution.bucketing.CoalesceBucketsInJoin +import org.apache.spark.sql.execution.bucketing.{CoalesceBucketsInJoin, DisableUnnecessaryBucketedScan} import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange} import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata} @@ -344,6 +344,7 @@ object QueryExecution { PlanSubqueries(sparkSession), RemoveRedundantProjects(sparkSession.sessionState.conf), EnsureRequirements(sparkSession.sessionState.conf), + DisableUnnecessaryBucketedScan(sparkSession.sessionState.conf), ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf, sparkSession.sessionState.columnarRules), CollapseCodegenStages(sparkSession.sessionState.conf), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala new file mode 100644 index 0000000000000..9b4f898df00b6 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.bucketing + +import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashClusteredDistribution} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan} +import org.apache.spark.sql.execution.aggregate.BaseAggregateExec +import org.apache.spark.sql.execution.exchange.Exchange +import org.apache.spark.sql.internal.SQLConf + +/** + * Disable unnecessary bucketed table scan based on actual physical query plan. + * NOTE: this rule is designed to be applied right after [[EnsureRequirements]], + * where all [[ShuffleExchangeExec]] and [[SortExec]] have been added to plan properly. + * + * When BUCKETING_ENABLED and AUTO_BUCKETED_SCAN_ENABLED are set to true, go through + * query plan to check where bucketed table scan is unnecessary, and disable bucketed table + * scan if: + * + * 1. The sub-plan from root to bucketed table scan, does not contain + * [[hasInterestingPartition]] operator. + * + * 2. The sub-plan from the nearest downstream [[hasInterestingPartition]] operator + * to the bucketed table scan, contains only [[isAllowedUnaryExecNode]] operators + * and at least one [[Exchange]]. + * + * Examples: + * 1. no [[hasInterestingPartition]] operator: + * Project + * | + * Filter + * | + * Scan(t1: i, j) + * (bucketed on column j, DISABLE bucketed scan) + * + * 2. join: + * SortMergeJoin(t1.i = t2.j) + * / \ + * Sort(i) Sort(j) + * / \ + * Shuffle(i) Scan(t2: i, j) + * / (bucketed on column j, enable bucketed scan) + * Scan(t1: i, j) + * (bucketed on column j, DISABLE bucketed scan) + * + * 3. aggregate: + * HashAggregate(i, ..., Final) + * | + * Shuffle(i) + * | + * HashAggregate(i, ..., Partial) + * | + * Filter + * | + * Scan(t1: i, j) + * (bucketed on column j, DISABLE bucketed scan) + * + * The idea of [[hasInterestingPartition]] is inspired from "interesting order" in + * the paper "Access Path Selection in a Relational Database Management System" + * (https://dl.acm.org/doi/10.1145/582095.582099). + */ +case class DisableUnnecessaryBucketedScan(conf: SQLConf) extends Rule[SparkPlan] { + + /** + * Disable bucketed table scan with pre-order traversal of plan. + * + * @param withInterestingPartition The traversed plan has operator with interesting partition. + * @param withExchange The traversed plan has [[Exchange]] operator. + * @param withAllowedNode The traversed plan has only [[isAllowedUnaryExecNode]] operators. + */ + private def disableBucketWithInterestingPartition( + plan: SparkPlan, + withInterestingPartition: Boolean, + withExchange: Boolean, + withAllowedNode: Boolean): SparkPlan = { + plan match { + case p if hasInterestingPartition(p) => + // Operator with interesting partition, propagates `withInterestingPartition` as true + // to its children, and resets `withExchange` and `withAllowedNode`. + p.mapChildren(disableBucketWithInterestingPartition(_, true, false, true)) + case exchange: Exchange => + // Exchange operator propagates `withExchange` as true to its child. + exchange.mapChildren(disableBucketWithInterestingPartition( + _, withInterestingPartition, true, withAllowedNode)) + case scan: FileSourceScanExec => + if (isBucketedScanWithoutFilter(scan)) { + if (!withInterestingPartition || (withExchange && withAllowedNode)) { + scan.copy(disableBucketedScan = true) + } else { + scan + } + } else { + scan + } + case o => + o.mapChildren(disableBucketWithInterestingPartition( + _, + withInterestingPartition, + withExchange, + withAllowedNode && isAllowedUnaryExecNode(o))) + } + } + + private def hasInterestingPartition(plan: SparkPlan): Boolean = { + plan.requiredChildDistribution.exists { + case _: ClusteredDistribution | _: HashClusteredDistribution => true + case _ => false + } + } + + /** + * Check if the operator is allowed single-child operator. + * We may revisit this method later as we probably can + * remove this restriction to allow arbitrary operator between + * bucketed table scan and operator with interesting partition. + */ + private def isAllowedUnaryExecNode(plan: SparkPlan): Boolean = { + plan match { + case _: SortExec | _: ProjectExec | _: FilterExec => true + case partialAgg: BaseAggregateExec => + partialAgg.requiredChildDistributionExpressions.isEmpty + case _ => false + } + } + + private def isBucketedScanWithoutFilter(scan: FileSourceScanExec): Boolean = { + // Do not disable bucketed table scan if it has filter pruning, + // because bucketed table scan is still useful here to save CPU/IO cost with + // only reading selected bucket files. + scan.bucketedScan && scan.optionalBucketSet.isEmpty + } + + def apply(plan: SparkPlan): SparkPlan = { + lazy val hasBucketedScanWithoutFilter = plan.find { + case scan: FileSourceScanExec => isBucketedScanWithoutFilter(scan) + case _ => false + }.isDefined + + if (!conf.bucketingEnabled || !conf.autoBucketedScanEnabled || !hasBucketedScanWithoutFilter) { + plan + } else { + disableBucketWithInterestingPartition(plan, false, false, true) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index b463a76a74026..14d03a30453ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -348,7 +348,7 @@ class DataFrameJoinSuite extends QueryTest } assert(broadcastExchanges.size == 1) val tables = broadcastExchanges.head.collect { - case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent)) => tableIdent + case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent), _) => tableIdent } assert(tables.size == 1) assert(tables.head === TableIdentifier(table1Name, Some(dbName))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index a21c461e84588..73b23496de515 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -1314,7 +1314,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark // need to execute the query before we can examine fs.inputRDDs() assert(stripAQEPlan(df.queryExecution.executedPlan) match { case WholeStageCodegenExec(ColumnarToRowExec(InputAdapter( - fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _)))) => + fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _, _)))) => partitionFilters.exists(ExecSubqueryExpression.hasSubquery) && fs.inputRDDs().forall( _.asInstanceOf[FileScanRDD].filePartitions.forall( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala new file mode 100644 index 0000000000000..1c258bc0dadb9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite + extends DisableUnnecessaryBucketedScanSuite + with SharedSparkSession { + + protected override def beforeAll(): Unit = { + super.beforeAll() + assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") + } +} + +abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTestUtils { + import testImplicits._ + + private lazy val df1 = + (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k").as("df1") + private lazy val df2 = + (0 until 50).map(i => (i % 7, i % 11, i.toString)).toDF("i", "j", "k").as("df2") + + private def checkDisableBucketedScan( + query: String, + expectedNumScanWithAutoScanEnabled: Int, + expectedNumScanWithAutoScanDisabled: Int): Unit = { + + def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = { + val plan = sql(query).queryExecution.executedPlan + val bucketedScan = plan.collect { case s: FileSourceScanExec if s.bucketedScan => s } + assert(bucketedScan.length == expectedNumBucketedScan) + } + + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "true") { + checkNumBucketedScan(query, expectedNumScanWithAutoScanEnabled) + val result = sql(query).collect() + + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") { + checkNumBucketedScan(query, expectedNumScanWithAutoScanDisabled) + checkAnswer(sql(query), result) + } + } + } + + test("SPARK-32859: disable unnecessary bucketed table scan - basic test") { + withTable("t1", "t2", "t3") { + df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1") + df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2") + df2.write.format("parquet").bucketBy(4, "i").saveAsTable("t3") + + Seq( + // Read bucketed table + ("SELECT * FROM t1", 0, 1), + ("SELECT i FROM t1", 0, 1), + ("SELECT j FROM t1", 0, 0), + // Filter on bucketed column + ("SELECT * FROM t1 WHERE i = 1", 1, 1), + // Filter on non-bucketed column + ("SELECT * FROM t1 WHERE j = 1", 0, 1), + // Join with same buckets + ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 0, 2), + ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 2, 2), + ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 2, 2), + // Join with different buckets + ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 0, 2), + ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 1, 2), + ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 1, 2), + // Join on non-bucketed column + ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 0, 2), + ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 1, 2), + ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 1, 2), + ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2), + ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2), + ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2), + // Aggregate on bucketed column + ("SELECT SUM(i) FROM t1 GROUP BY i", 1, 1), + // Aggregate on non-bucketed column + ("SELECT SUM(i) FROM t1 GROUP BY j", 0, 1), + ("SELECT j, SUM(i), COUNT(j) FROM t1 GROUP BY j", 0, 1) + ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) => + checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) + } + } + } + + test("SPARK-32859: disable unnecessary bucketed table scan - multiple joins test") { + withTable("t1", "t2", "t3") { + df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1") + df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2") + df2.write.format("parquet").bucketBy(4, "i").saveAsTable("t3") + + Seq( + // Multiple joins on bucketed columns + (""" + SELECT /*+ broadcast(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.i AND t2.i = t3.i + """.stripMargin, 0, 3), + (""" + SELECT /*+ broadcast(t1) merge(t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.i AND t2.i = t3.i + """.stripMargin, 2, 3), + (""" + SELECT /*+ merge(t1) broadcast(t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.i AND t2.i = t3.i + """.stripMargin, 2, 3), + (""" + SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.i AND t2.i = t3.i + """.stripMargin, 2, 3), + // Multiple joins on non-bucketed columns + (""" + SELECT /*+ broadcast(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.j AND t2.j = t3.i + """.stripMargin, 0, 3), + (""" + SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.i = t2.j AND t2.j = t3.i + """.stripMargin, 1, 3), + (""" + SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3 + ON t1.j = t2.j AND t2.j = t3.j + """.stripMargin, 0, 3) + ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) => + checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) + } + } + } + + test("SPARK-32859: disable unnecessary bucketed table scan - multiple bucketed columns test") { + withTable("t1", "t2", "t3") { + df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t1") + df2.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t2") + df2.write.format("parquet").bucketBy(4, "i", "j").saveAsTable("t3") + + Seq( + // Filter on bucketed columns + ("SELECT * FROM t1 WHERE i = 1", 0, 1), + ("SELECT * FROM t1 WHERE i = 1 AND j = 1", 0, 1), + // Join on bucketed columns + (""" + SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i AND t1.j = t2.j + """.stripMargin, 0, 2), + (""" + SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i AND t1.j = t2.j + """.stripMargin, 2, 2), + (""" + SELECT /*+ merge(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i AND t1.j = t3.j + """.stripMargin, 1, 2), + ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 0, 2), + // Aggregate on bucketed columns + ("SELECT i, j, COUNT(*) FROM t1 GROUP BY i, j", 1, 1), + ("SELECT i, COUNT(i) FROM t1 GROUP BY i", 0, 0), + ("SELECT i, COUNT(j) FROM t1 GROUP BY i", 0, 1) + ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) => + checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) + } + } + } + + test("SPARK-32859: disable unnecessary bucketed table scan - other operators test") { + withTable("t1", "t2", "t3") { + df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1") + df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2") + df1.write.format("parquet").saveAsTable("t3") + + Seq( + // Operator with interesting partition not in sub-plan + (""" + SELECT t1.i FROM t1 + UNION ALL + (SELECT t2.i FROM t2 GROUP BY t2.i) + """.stripMargin, 1, 2), + // Non-allowed operator in sub-plan + (""" + SELECT COUNT(*) + FROM (SELECT t1.i FROM t1 UNION ALL SELECT t2.i FROM t2) + GROUP BY i + """.stripMargin, 2, 2), + // Multiple [[Exchange]] in sub-plan + (""" + SELECT j, SUM(i), COUNT(*) FROM t1 GROUP BY j + DISTRIBUTE BY j + """.stripMargin, 0, 1), + (""" + SELECT j, COUNT(*) + FROM (SELECT i, j FROM t1 DISTRIBUTE BY i, j) + GROUP BY j + """.stripMargin, 0, 1), + // No bucketed table scan in plan + (""" + SELECT j, COUNT(*) + FROM (SELECT t1.j FROM t1 JOIN t3 ON t1.j = t3.j) + GROUP BY j + """.stripMargin, 0, 0) + ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) => + checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala new file mode 100644 index 0000000000000..30eb93cb5c3e8 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION + +class DisableUnnecessaryBucketedScanWithHiveSupportSuite + extends DisableUnnecessaryBucketedScanSuite + with TestHiveSingleton { + + protected override def beforeAll(): Unit = { + super.beforeAll() + assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") + } +} From 991f7e81d46820f6e097fcf92c025689b677491f Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Fri, 2 Oct 2020 13:04:40 +0900 Subject: [PATCH 0146/1009] [SPARK-32001][SQL] Create JDBC authentication provider developer API ### What changes were proposed in this pull request? At the moment only the baked in JDBC connection providers can be used but there is a need to support additional databases and use-cases. In this PR I'm proposing a new developer API name `JdbcConnectionProvider`. To show how an external JDBC connection provider can be implemented I've created an example [here](https://github.com/gaborgsomogyi/spark-jdbc-connection-provider). The PR contains the following changes: * Added connection provider developer API * Made JDBC connection providers constructor to noarg => needed to load them w/ service loader * Connection providers are now loaded w/ service loader * Added tests to load providers independently * Moved `SecurityConfigurationLock` into a central place because other areas will change global JVM security config ### Why are the changes needed? No custom authentication possibility. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? * Existing + additional unit tests * Docker integration tests * Tested manually the newly created external JDBC connection provider Closes #29024 from gaborgsomogyi/SPARK-32001. Authored-by: Gabor Somogyi Signed-off-by: HyukjinKwon --- .../security/SecurityConfigurationLock.scala | 24 ++++++ .../sql/jdbc/DB2KrbIntegrationSuite.scala | 2 +- ...ache.spark.sql.jdbc.JdbcConnectionProvider | 6 ++ .../datasources/jdbc/JDBCOptions.scala | 4 +- .../datasources/jdbc/JdbcUtils.scala | 2 +- .../connection/BasicConnectionProvider.scala | 30 +++++-- .../jdbc/connection/ConnectionProvider.scala | 83 ++++++++----------- .../connection/DB2ConnectionProvider.scala | 39 +++++---- .../connection/MSSQLConnectionProvider.scala | 41 +++++---- .../MariaDBConnectionProvider.scala | 18 ++-- .../connection/OracleConnectionProvider.scala | 39 +++++---- .../PostgresConnectionProvider.scala | 17 ++-- .../connection/SecureConnectionProvider.scala | 42 ++++++---- .../sql/jdbc/JdbcConnectionProvider.scala | 58 +++++++++++++ ...ache.spark.sql.jdbc.JdbcConnectionProvider | 1 + .../connection/ConnectionProviderSuite.scala | 39 ++++++--- .../ConnectionProviderSuiteBase.scala | 17 ++-- .../DB2ConnectionProviderSuite.scala | 6 +- ...ntentionallyFaultyConnectionProvider.scala | 33 ++++++++ .../MSSQLConnectionProviderSuite.scala | 42 +++++----- .../MariaDBConnectionProviderSuite.scala | 6 +- .../OracleConnectionProviderSuite.scala | 7 +- .../PostgresConnectionProviderSuite.scala | 16 ++-- 23 files changed, 355 insertions(+), 217 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala create mode 100644 sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala create mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala diff --git a/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala b/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala new file mode 100644 index 0000000000000..0741a8c1580df --- /dev/null +++ b/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.security + +/** + * There are cases when global JVM security configuration must be modified. + * In order to avoid race the modification must be synchronized with this. + */ +object SecurityConfigurationLock diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala index fc88985cf2ec7..fa5ce2d106a10 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala @@ -54,7 +54,7 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { JDBCOptions.JDBC_KEYTAB -> keytabFileName, JDBCOptions.JDBC_PRINCIPAL -> principal )) - new DB2ConnectionProvider(null, options).getAdditionalProperties() + new DB2ConnectionProvider().getAdditionalProperties(options) } override def beforeContainerStart( diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider new file mode 100644 index 0000000000000..6e42517a6d40c --- /dev/null +++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider @@ -0,0 +1,6 @@ +org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider +org.apache.spark.sql.execution.datasources.jdbc.connection.DB2ConnectionProvider +org.apache.spark.sql.execution.datasources.jdbc.connection.MariaDBConnectionProvider +org.apache.spark.sql.execution.datasources.jdbc.connection.MSSQLConnectionProvider +org.apache.spark.sql.execution.datasources.jdbc.connection.PostgresConnectionProvider +org.apache.spark.sql.execution.datasources.jdbc.connection.OracleConnectionProvider \ No newline at end of file diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala index 9e0438c0016bd..e6fff8dbdbd7c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap * Options for the JDBC data source. */ class JDBCOptions( - @transient val parameters: CaseInsensitiveMap[String]) + val parameters: CaseInsensitiveMap[String]) extends Serializable with Logging { import JDBCOptions._ @@ -209,7 +209,7 @@ class JDBCOptions( } class JdbcOptionsInWrite( - @transient override val parameters: CaseInsensitiveMap[String]) + override val parameters: CaseInsensitiveMap[String]) extends JDBCOptions(parameters) { import JDBCOptions._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 5831c35c7e301..202f2e03b68d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -63,7 +63,7 @@ object JdbcUtils extends Logging { throw new IllegalStateException( s"Did not find registered driver with class $driverClass") } - val connection = ConnectionProvider.create(driver, options).getConnection() + val connection = ConnectionProvider.create(driver, options.parameters) require(connection != null, s"The driver could not open a JDBC connection. Check the URL: ${options.url}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala index 16b244cc617ce..a5f04649e6628 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala @@ -18,18 +18,30 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import java.sql.{Connection, Driver} +import java.util.Properties -import scala.collection.JavaConverters._ - +import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions +import org.apache.spark.sql.jdbc.JdbcConnectionProvider + +private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with Logging { + /** + * Additional properties for data connection (Data source property takes precedence). + */ + def getAdditionalProperties(options: JDBCOptions): Properties = new Properties() + + override def canHandle(driver: Driver, options: Map[String, String]): Boolean = { + val jdbcOptions = new JDBCOptions(options) + jdbcOptions.keytab == null || jdbcOptions.principal == null + } -private[jdbc] class BasicConnectionProvider(driver: Driver, options: JDBCOptions) - extends ConnectionProvider { - def getConnection(): Connection = { - val properties = getAdditionalProperties() - options.asConnectionProperties.entrySet().asScala.foreach { e => - properties.put(e.getKey(), e.getValue()) + override def getConnection(driver: Driver, options: Map[String, String]): Connection = { + val jdbcOptions = new JDBCOptions(options) + val properties = getAdditionalProperties(jdbcOptions) + options.foreach { case(k, v) => + properties.put(k, v) } - driver.connect(options.url, properties) + logDebug(s"JDBC connection initiated with URL: ${jdbcOptions.url} and properties: $properties") + driver.connect(jdbcOptions.url, properties) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala index ce45be442ccc3..546756677edce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala @@ -18,60 +18,45 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import java.sql.{Connection, Driver} -import java.util.Properties +import java.util.ServiceLoader -import org.apache.spark.internal.Logging -import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions - -/** - * Connection provider which opens connection toward various databases (database specific instance - * needed). If kerberos authentication required then it's the provider's responsibility to set all - * the parameters. - */ -private[jdbc] trait ConnectionProvider { - /** - * Additional properties for data connection (Data source property takes precedence). - */ - def getAdditionalProperties(): Properties = new Properties() +import scala.collection.mutable - /** - * Opens connection toward the database. - */ - def getConnection(): Connection -} +import org.apache.spark.internal.Logging +import org.apache.spark.security.SecurityConfigurationLock +import org.apache.spark.sql.jdbc.JdbcConnectionProvider +import org.apache.spark.util.Utils private[jdbc] object ConnectionProvider extends Logging { - def create(driver: Driver, options: JDBCOptions): ConnectionProvider = { - if (options.keytab == null || options.principal == null) { - logDebug("No authentication configuration found, using basic connection provider") - new BasicConnectionProvider(driver, options) - } else { - logDebug("Authentication configuration found, using database specific connection provider") - options.driverClass match { - case PostgresConnectionProvider.driverClass => - logDebug("Postgres connection provider found") - new PostgresConnectionProvider(driver, options) - - case MariaDBConnectionProvider.driverClass => - logDebug("MariaDB connection provider found") - new MariaDBConnectionProvider(driver, options) - - case DB2ConnectionProvider.driverClass => - logDebug("DB2 connection provider found") - new DB2ConnectionProvider(driver, options) - - case MSSQLConnectionProvider.driverClass => - logDebug("MS SQL connection provider found") - new MSSQLConnectionProvider(driver, options) - - case OracleConnectionProvider.driverClass => - logDebug("Oracle connection provider found") - new OracleConnectionProvider(driver, options) - - case _ => - throw new IllegalArgumentException(s"Driver ${options.driverClass} does not support " + - "Kerberos authentication") + private val providers = loadProviders() + + def loadProviders(): Seq[JdbcConnectionProvider] = { + val loader = ServiceLoader.load(classOf[JdbcConnectionProvider], + Utils.getContextOrSparkClassLoader) + val providers = mutable.ArrayBuffer[JdbcConnectionProvider]() + + val iterator = loader.iterator + while (iterator.hasNext) { + try { + val provider = iterator.next + logDebug(s"Loaded built in provider: $provider") + providers += provider + } catch { + case t: Throwable => + logError(s"Failed to load built in provider.", t) } } + // Seems duplicate but it's needed for Scala 2.13 + providers.toSeq + } + + def create(driver: Driver, options: Map[String, String]): Connection = { + val filteredProviders = providers.filter(_.canHandle(driver, options)) + require(filteredProviders.size == 1, + "JDBC connection initiated but not exactly one connection provider found which can handle " + + s"it. Found active providers: ${filteredProviders.mkString(", ")}") + SecurityConfigurationLock.synchronized { + filteredProviders.head.getConnection(driver, options) + } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala index 095821cf83890..ca82cdc561bef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala @@ -25,22 +25,25 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions -private[sql] class DB2ConnectionProvider(driver: Driver, options: JDBCOptions) - extends SecureConnectionProvider(driver, options) { - override val appEntry: String = "JaasClient" - - override def getConnection(): Connection = { - setAuthenticationConfigIfNeeded() - UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs( - new PrivilegedExceptionAction[Connection]() { - override def run(): Connection = { - DB2ConnectionProvider.super.getConnection() +private[sql] class DB2ConnectionProvider extends SecureConnectionProvider { + override val driverClass = "com.ibm.db2.jcc.DB2Driver" + + override def appEntry(driver: Driver, options: JDBCOptions): String = "JaasClient" + + override def getConnection(driver: Driver, options: Map[String, String]): Connection = { + val jdbcOptions = new JDBCOptions(options) + setAuthenticationConfigIfNeeded(driver, jdbcOptions) + UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab) + .doAs( + new PrivilegedExceptionAction[Connection]() { + override def run(): Connection = { + DB2ConnectionProvider.super.getConnection(driver, options) + } } - } - ) + ) } - override def getAdditionalProperties(): Properties = { + override def getAdditionalProperties(options: JDBCOptions): Properties = { val result = new Properties() // 11 is the integer value for kerberos result.put("securityMechanism", new String("11")) @@ -48,14 +51,10 @@ private[sql] class DB2ConnectionProvider(driver: Driver, options: JDBCOptions) result } - override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized { - val (parent, configEntry) = getConfigWithAppEntry() + override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = { + val (parent, configEntry) = getConfigWithAppEntry(driver, options) if (configEntry == null || configEntry.isEmpty) { - setAuthenticationConfig(parent) + setAuthenticationConfig(parent, driver, options) } } } - -private[sql] object DB2ConnectionProvider { - val driverClass = "com.ibm.db2.jcc.DB2Driver" -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala index 2950aa9b4db94..4e405b2187e56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala @@ -25,12 +25,11 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions -private[sql] class MSSQLConnectionProvider( - driver: Driver, - options: JDBCOptions, - parserMethod: String = "parseAndMergeProperties" - ) extends SecureConnectionProvider(driver, options) { - override val appEntry: String = { +private[sql] class MSSQLConnectionProvider extends SecureConnectionProvider { + override val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver" + val parserMethod: String = "parseAndMergeProperties" + + override def appEntry(driver: Driver, options: JDBCOptions): String = { val configName = "jaasConfigurationName" val appEntryDefault = "SQLJDBCDriver" @@ -58,18 +57,20 @@ private[sql] class MSSQLConnectionProvider( } } - override def getConnection(): Connection = { - setAuthenticationConfigIfNeeded() - UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs( - new PrivilegedExceptionAction[Connection]() { - override def run(): Connection = { - MSSQLConnectionProvider.super.getConnection() + override def getConnection(driver: Driver, options: Map[String, String]): Connection = { + val jdbcOptions = new JDBCOptions(options) + setAuthenticationConfigIfNeeded(driver, jdbcOptions) + UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab) + .doAs( + new PrivilegedExceptionAction[Connection]() { + override def run(): Connection = { + MSSQLConnectionProvider.super.getConnection(driver, options) + } } - } - ) + ) } - override def getAdditionalProperties(): Properties = { + override def getAdditionalProperties(options: JDBCOptions): Properties = { val result = new Properties() // These props needed to reach internal kerberos authentication in the JDBC driver result.put("integratedSecurity", "true") @@ -77,8 +78,8 @@ private[sql] class MSSQLConnectionProvider( result } - override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized { - val (parent, configEntry) = getConfigWithAppEntry() + override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = { + val (parent, configEntry) = getConfigWithAppEntry(driver, options) /** * Couple of things to mention here (v8.2.2 client): * 1. MS SQL supports JAAS application name configuration @@ -87,11 +88,7 @@ private[sql] class MSSQLConnectionProvider( val entryUsesKeytab = configEntry != null && configEntry.exists(_.getOptions().get("useKeyTab") == "true") if (configEntry == null || configEntry.isEmpty || !entryUsesKeytab) { - setAuthenticationConfig(parent) + setAuthenticationConfig(parent, driver, options) } } } - -private[sql] object MSSQLConnectionProvider { - val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver" -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala index 3c0286654a8ec..d5fe13bf0ca19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala @@ -21,14 +21,14 @@ import java.sql.Driver import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions -private[jdbc] class MariaDBConnectionProvider(driver: Driver, options: JDBCOptions) - extends SecureConnectionProvider(driver, options) { - override val appEntry: String = { +private[jdbc] class MariaDBConnectionProvider extends SecureConnectionProvider { + override val driverClass = "org.mariadb.jdbc.Driver" + + override def appEntry(driver: Driver, options: JDBCOptions): String = "Krb5ConnectorContext" - } - override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized { - val (parent, configEntry) = getConfigWithAppEntry() + override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = { + val (parent, configEntry) = getConfigWithAppEntry(driver, options) /** * Couple of things to mention here (v2.5.4 client): * 1. MariaDB doesn't support JAAS application name configuration @@ -37,11 +37,7 @@ private[jdbc] class MariaDBConnectionProvider(driver: Driver, options: JDBCOptio val entryUsesKeytab = configEntry != null && configEntry.exists(_.getOptions().get("useKeyTab") == "true") if (configEntry == null || configEntry.isEmpty || !entryUsesKeytab) { - setAuthenticationConfig(parent) + setAuthenticationConfig(parent, driver, options) } } } - -private[sql] object MariaDBConnectionProvider { - val driverClass = "org.mariadb.jdbc.Driver" -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala index c2b71b35b8128..3defda3871765 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala @@ -25,22 +25,25 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions -private[sql] class OracleConnectionProvider(driver: Driver, options: JDBCOptions) - extends SecureConnectionProvider(driver, options) { - override val appEntry: String = "kprb5module" - - override def getConnection(): Connection = { - setAuthenticationConfigIfNeeded() - UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs( - new PrivilegedExceptionAction[Connection]() { - override def run(): Connection = { - OracleConnectionProvider.super.getConnection() +private[sql] class OracleConnectionProvider extends SecureConnectionProvider { + override val driverClass = "oracle.jdbc.OracleDriver" + + override def appEntry(driver: Driver, options: JDBCOptions): String = "kprb5module" + + override def getConnection(driver: Driver, options: Map[String, String]): Connection = { + val jdbcOptions = new JDBCOptions(options) + setAuthenticationConfigIfNeeded(driver, jdbcOptions) + UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab) + .doAs( + new PrivilegedExceptionAction[Connection]() { + override def run(): Connection = { + OracleConnectionProvider.super.getConnection(driver, options) + } } - } - ) + ) } - override def getAdditionalProperties(): Properties = { + override def getAdditionalProperties(options: JDBCOptions): Properties = { val result = new Properties() // This prop is needed to turn on kerberos authentication in the JDBC driver. // The possible values can be found in AnoServices public interface @@ -49,14 +52,10 @@ private[sql] class OracleConnectionProvider(driver: Driver, options: JDBCOptions result } - override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized { - val (parent, configEntry) = getConfigWithAppEntry() + override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = { + val (parent, configEntry) = getConfigWithAppEntry(driver, options) if (configEntry == null || configEntry.isEmpty) { - setAuthenticationConfig(parent) + setAuthenticationConfig(parent, driver, options) } } } - -private[sql] object OracleConnectionProvider { - val driverClass = "oracle.jdbc.OracleDriver" -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala index fa9232e00bd88..dae8aea81f20a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala @@ -22,22 +22,19 @@ import java.util.Properties import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions -private[jdbc] class PostgresConnectionProvider(driver: Driver, options: JDBCOptions) - extends SecureConnectionProvider(driver, options) { - override val appEntry: String = { +private[jdbc] class PostgresConnectionProvider extends SecureConnectionProvider { + override val driverClass = "org.postgresql.Driver" + + override def appEntry(driver: Driver, options: JDBCOptions): String = { val parseURL = driver.getClass.getMethod("parseURL", classOf[String], classOf[Properties]) val properties = parseURL.invoke(driver, options.url, null).asInstanceOf[Properties] properties.getProperty("jaasApplicationName", "pgjdbc") } - override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized { - val (parent, configEntry) = getConfigWithAppEntry() + override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = { + val (parent, configEntry) = getConfigWithAppEntry(driver, options) if (configEntry == null || configEntry.isEmpty) { - setAuthenticationConfig(parent) + setAuthenticationConfig(parent, driver, options) } } } - -private[sql] object PostgresConnectionProvider { - val driverClass = "org.postgresql.Driver" -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala index 24eec63a7244f..80c795957dac8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala @@ -26,39 +26,49 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.util.SecurityUtils -/** - * Some of the secure connection providers modify global JVM security configuration. - * In order to avoid race the modification must be synchronized with this. - */ -private[connection] object SecurityConfigurationLock +private[jdbc] abstract class SecureConnectionProvider extends BasicConnectionProvider with Logging { + /** + * Returns the driver canonical class name which the connection provider supports. + */ + protected val driverClass: String + + override def canHandle(driver: Driver, options: Map[String, String]): Boolean = { + val jdbcOptions = new JDBCOptions(options) + jdbcOptions.keytab != null && jdbcOptions.principal != null && + driverClass.equalsIgnoreCase(jdbcOptions.driverClass) + } -private[jdbc] abstract class SecureConnectionProvider(driver: Driver, options: JDBCOptions) - extends BasicConnectionProvider(driver, options) with Logging { - override def getConnection(): Connection = { - setAuthenticationConfigIfNeeded() - super.getConnection() + override def getConnection(driver: Driver, options: Map[String, String]): Connection = { + val jdbcOptions = new JDBCOptions(options) + setAuthenticationConfigIfNeeded(driver, jdbcOptions) + super.getConnection(driver: Driver, options: Map[String, String]) } /** * Returns JAAS application name. This is sometimes configurable on the JDBC driver level. */ - val appEntry: String + def appEntry(driver: Driver, options: JDBCOptions): String /** * Sets database specific authentication configuration when needed. If configuration already set * then later calls must be no op. When the global JVM security configuration changed then the * related code parts must be synchronized properly. */ - def setAuthenticationConfigIfNeeded(): Unit + def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit - protected def getConfigWithAppEntry(): (Configuration, Array[AppConfigurationEntry]) = { + protected def getConfigWithAppEntry( + driver: Driver, + options: JDBCOptions): (Configuration, Array[AppConfigurationEntry]) = { val parent = Configuration.getConfiguration - (parent, parent.getAppConfigurationEntry(appEntry)) + (parent, parent.getAppConfigurationEntry(appEntry(driver, options))) } - protected def setAuthenticationConfig(parent: Configuration) = { + protected def setAuthenticationConfig( + parent: Configuration, + driver: Driver, + options: JDBCOptions) = { val config = new SecureConnectionProvider.JDBCConfiguration( - parent, appEntry, options.keytab, options.principal) + parent, appEntry(driver, options), options.keytab, options.principal) logDebug("Adding database specific security configuration") Configuration.setConfiguration(config) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala new file mode 100644 index 0000000000000..caf574b0c2284 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.sql.{Connection, Driver} + +import org.apache.spark.annotation.{DeveloperApi, Unstable} + +/** + * ::DeveloperApi:: + * Connection provider which opens connection toward various databases (database specific instance + * needed). If any authentication required then it's the provider's responsibility to set all + * the parameters. + * Important to mention connection providers within a JVM used from multiple threads so adding + * internal state is not advised. If any state added then it must be synchronized properly. + * + * @since 3.1.0 + */ +@DeveloperApi +@Unstable +abstract class JdbcConnectionProvider { + /** + * Checks if this connection provider instance can handle the connection initiated by the driver. + * There must be exactly one active connection provider which can handle the connection for a + * specific driver. If this requirement doesn't met then `IllegalArgumentException` + * will be thrown by the provider framework. + * + * @param driver Java driver which initiates the connection + * @param options Driver options which initiates the connection + * @return True if the connection provider can handle the driver with the given options. + */ + def canHandle(driver: Driver, options: Map[String, String]): Boolean + + /** + * Opens connection toward the database. Since global JVM security configuration change may needed + * this API is called synchronized by `SecurityConfigurationLock` to avoid race. + * + * @param driver Java driver which initiates the connection + * @param options Driver options which initiates the connection + * @return a `Connection` object that represents a connection to the URL + */ + def getConnection(driver: Driver, options: Map[String, String]): Connection +} diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider new file mode 100644 index 0000000000000..afb48e1a3511f --- /dev/null +++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider @@ -0,0 +1 @@ +org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala index ff5fe4f620a1d..a48dbdebea7e9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala @@ -20,26 +20,43 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import javax.security.auth.login.Configuration class ConnectionProviderSuite extends ConnectionProviderSuiteBase { + test("All built-in provides must be loaded") { + IntentionallyFaultyConnectionProvider.constructed = false + val providers = ConnectionProvider.loadProviders() + assert(providers.exists(_.isInstanceOf[BasicConnectionProvider])) + assert(providers.exists(_.isInstanceOf[DB2ConnectionProvider])) + assert(providers.exists(_.isInstanceOf[MariaDBConnectionProvider])) + assert(providers.exists(_.isInstanceOf[MSSQLConnectionProvider])) + assert(providers.exists(_.isInstanceOf[PostgresConnectionProvider])) + assert(providers.exists(_.isInstanceOf[OracleConnectionProvider])) + assert(IntentionallyFaultyConnectionProvider.constructed) + assert(!providers.exists(_.isInstanceOf[IntentionallyFaultyConnectionProvider])) + assert(providers.size === 6) + } + test("Multiple security configs must be reachable") { Configuration.setConfiguration(null) - val postgresDriver = registerDriver(PostgresConnectionProvider.driverClass) - val postgresProvider = new PostgresConnectionProvider( - postgresDriver, options("jdbc:postgresql://localhost/postgres")) - val db2Driver = registerDriver(DB2ConnectionProvider.driverClass) - val db2Provider = new DB2ConnectionProvider(db2Driver, options("jdbc:db2://localhost/db2")) + val postgresProvider = new PostgresConnectionProvider() + val postgresDriver = registerDriver(postgresProvider.driverClass) + val postgresOptions = options("jdbc:postgresql://localhost/postgres") + val postgresAppEntry = postgresProvider.appEntry(postgresDriver, postgresOptions) + val db2Provider = new DB2ConnectionProvider() + val db2Driver = registerDriver(db2Provider.driverClass) + val db2Options = options("jdbc:db2://localhost/db2") + val db2AppEntry = db2Provider.appEntry(db2Driver, db2Options) // Make sure no authentication for the databases are set val oldConfig = Configuration.getConfiguration - assert(oldConfig.getAppConfigurationEntry(postgresProvider.appEntry) == null) - assert(oldConfig.getAppConfigurationEntry(db2Provider.appEntry) == null) + assert(oldConfig.getAppConfigurationEntry(postgresAppEntry) == null) + assert(oldConfig.getAppConfigurationEntry(db2AppEntry) == null) - postgresProvider.setAuthenticationConfigIfNeeded() - db2Provider.setAuthenticationConfigIfNeeded() + postgresProvider.setAuthenticationConfigIfNeeded(postgresDriver, postgresOptions) + db2Provider.setAuthenticationConfigIfNeeded(db2Driver, db2Options) // Make sure authentication for the databases are set val newConfig = Configuration.getConfiguration assert(oldConfig != newConfig) - assert(newConfig.getAppConfigurationEntry(postgresProvider.appEntry) != null) - assert(newConfig.getAppConfigurationEntry(db2Provider.appEntry) != null) + assert(newConfig.getAppConfigurationEntry(postgresAppEntry) != null) + assert(newConfig.getAppConfigurationEntry(db2AppEntry) != null) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala index d18a3088c4f2f..be08a3c2f7367 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala @@ -50,20 +50,25 @@ abstract class ConnectionProviderSuiteBase extends SparkFunSuite with BeforeAndA } } - protected def testSecureConnectionProvider(provider: SecureConnectionProvider): Unit = { + protected def testSecureConnectionProvider( + provider: SecureConnectionProvider, + driver: Driver, + options: JDBCOptions): Unit = { + val providerAppEntry = provider.appEntry(driver, options) + // Make sure no authentication for the database is set - assert(Configuration.getConfiguration.getAppConfigurationEntry(provider.appEntry) == null) + assert(Configuration.getConfiguration.getAppConfigurationEntry(providerAppEntry) == null) // Make sure the first call sets authentication properly val savedConfig = Configuration.getConfiguration - provider.setAuthenticationConfigIfNeeded() + provider.setAuthenticationConfigIfNeeded(driver, options) val config = Configuration.getConfiguration assert(savedConfig != config) - val appEntry = config.getAppConfigurationEntry(provider.appEntry) + val appEntry = config.getAppConfigurationEntry(providerAppEntry) assert(appEntry != null) // Make sure a second call is not modifying the existing authentication - provider.setAuthenticationConfigIfNeeded() - assert(config.getAppConfigurationEntry(provider.appEntry) === appEntry) + provider.setAuthenticationConfigIfNeeded(driver, options) + assert(config.getAppConfigurationEntry(providerAppEntry) === appEntry) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala index d656f83e2ebb9..5885af82532d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection class DB2ConnectionProviderSuite extends ConnectionProviderSuiteBase { test("setAuthenticationConfigIfNeeded must set authentication if not set") { - val driver = registerDriver(DB2ConnectionProvider.driverClass) - val provider = new DB2ConnectionProvider(driver, options("jdbc:db2://localhost/db2")) + val provider = new DB2ConnectionProvider() + val driver = registerDriver(provider.driverClass) - testSecureConnectionProvider(provider) + testSecureConnectionProvider(provider, driver, options("jdbc:db2://localhost/db2")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala new file mode 100644 index 0000000000000..fbefcb91cccde --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.jdbc.connection + +import java.sql.{Connection, Driver} + +import org.apache.spark.sql.jdbc.JdbcConnectionProvider + +private class IntentionallyFaultyConnectionProvider extends JdbcConnectionProvider { + IntentionallyFaultyConnectionProvider.constructed = true + throw new IllegalArgumentException("Intentional Exception") + override def canHandle(driver: Driver, options: Map[String, String]): Boolean = true + override def getConnection(driver: Driver, options: Map[String, String]): Connection = null +} + +private object IntentionallyFaultyConnectionProvider { + var constructed = false +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala index 249f1e36347ed..a5704e842e018 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala @@ -17,35 +17,35 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection +import java.sql.Driver + +import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions + class MSSQLConnectionProviderSuite extends ConnectionProviderSuiteBase { test("setAuthenticationConfigIfNeeded default parser must set authentication if not set") { - val driver = registerDriver(MSSQLConnectionProvider.driverClass) - val defaultProvider = new MSSQLConnectionProvider( - driver, options("jdbc:sqlserver://localhost/mssql")) - val customProvider = new MSSQLConnectionProvider( - driver, options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql")) + val provider = new MSSQLConnectionProvider() + val driver = registerDriver(provider.driverClass) - testProviders(defaultProvider, customProvider) + testProviders(driver, provider, options("jdbc:sqlserver://localhost/mssql"), + options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql")) } test("setAuthenticationConfigIfNeeded custom parser must set authentication if not set") { - val parserMethod = "IntentionallyNotExistingMethod" - val driver = registerDriver(MSSQLConnectionProvider.driverClass) - val defaultProvider = new MSSQLConnectionProvider( - driver, options("jdbc:sqlserver://localhost/mssql"), parserMethod) - val customProvider = new MSSQLConnectionProvider( - driver, - options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql"), - parserMethod) - - testProviders(defaultProvider, customProvider) + val provider = new MSSQLConnectionProvider() { + override val parserMethod: String = "IntentionallyNotExistingMethod" + } + val driver = registerDriver(provider.driverClass) + + testProviders(driver, provider, options("jdbc:sqlserver://localhost/mssql"), + options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql")) } private def testProviders( - defaultProvider: SecureConnectionProvider, - customProvider: SecureConnectionProvider) = { - assert(defaultProvider.appEntry !== customProvider.appEntry) - testSecureConnectionProvider(defaultProvider) - testSecureConnectionProvider(customProvider) + driver: Driver, + provider: SecureConnectionProvider, + defaultOptions: JDBCOptions, + customOptions: JDBCOptions) = { + testSecureConnectionProvider(provider, driver, defaultOptions) + testSecureConnectionProvider(provider, driver, customOptions) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala index 70cad2097eb43..f450662fcbe74 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection class MariaDBConnectionProviderSuite extends ConnectionProviderSuiteBase { test("setAuthenticationConfigIfNeeded must set authentication if not set") { - val driver = registerDriver(MariaDBConnectionProvider.driverClass) - val provider = new MariaDBConnectionProvider(driver, options("jdbc:mysql://localhost/mysql")) + val provider = new MariaDBConnectionProvider() + val driver = registerDriver(provider.driverClass) - testSecureConnectionProvider(provider) + testSecureConnectionProvider(provider, driver, options("jdbc:mysql://localhost/mysql")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala index 13cde32ddbe4e..40e7f1191dccc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala @@ -19,10 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection class OracleConnectionProviderSuite extends ConnectionProviderSuiteBase { test("setAuthenticationConfigIfNeeded must set authentication if not set") { - val driver = registerDriver(OracleConnectionProvider.driverClass) - val provider = new OracleConnectionProvider(driver, - options("jdbc:oracle:thin:@//localhost/xe")) + val provider = new OracleConnectionProvider() + val driver = registerDriver(provider.driverClass) - testSecureConnectionProvider(provider) + testSecureConnectionProvider(provider, driver, options("jdbc:oracle:thin:@//localhost/xe")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala index 8cef7652f9c54..ee43a7d9708c5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala @@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection class PostgresConnectionProviderSuite extends ConnectionProviderSuiteBase { test("setAuthenticationConfigIfNeeded must set authentication if not set") { - val driver = registerDriver(PostgresConnectionProvider.driverClass) - val defaultProvider = new PostgresConnectionProvider( - driver, options("jdbc:postgresql://localhost/postgres")) - val customProvider = new PostgresConnectionProvider( - driver, options(s"jdbc:postgresql://localhost/postgres?jaasApplicationName=custompgjdbc")) + val provider = new PostgresConnectionProvider() + val defaultOptions = options("jdbc:postgresql://localhost/postgres") + val customOptions = + options(s"jdbc:postgresql://localhost/postgres?jaasApplicationName=custompgjdbc") + val driver = registerDriver(provider.driverClass) - assert(defaultProvider.appEntry !== customProvider.appEntry) - testSecureConnectionProvider(defaultProvider) - testSecureConnectionProvider(customProvider) + assert(provider.appEntry(driver, defaultOptions) !== provider.appEntry(driver, customOptions)) + testSecureConnectionProvider(provider, driver, defaultOptions) + testSecureConnectionProvider(provider, driver, customOptions) } } From 9996e252ad3ef20760fcbc785e8d3a6f20b6acb5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 1 Oct 2020 23:01:31 -0700 Subject: [PATCH 0147/1009] [SPARK-33026][SQL] Add numRows to metric of BroadcastExchangeExec ### What changes were proposed in this pull request? This pr adds `numRows` to the metric and runtimeStatistics of `BroadcastExchangeExec`. ### Why are the changes needed? [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need row count. The [ShuffleExchangeExec](https://github.com/apache/spark/blob/1c6dff7b5fc171c190feea0d8f7d323e330d9151/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala#L127) have added the row count, but `BroadcastExchangeExec` missing the row count. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #29904 from wangyum/SPARK-33026. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../exchange/BroadcastExchangeExec.scala | 5 ++++- .../execution/metric/SQLMetricsSuite.scala | 21 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala index 6d8d37022ea42..4b884dfe537e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala @@ -78,6 +78,7 @@ case class BroadcastExchangeExec( override lazy val metrics = Map( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), + "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"), "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"), "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"), "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast")) @@ -90,7 +91,8 @@ case class BroadcastExchangeExec( override def runtimeStatistics: Statistics = { val dataSize = metrics("dataSize").value - Statistics(dataSize) + val numRows = metrics("numRows").value + Statistics(dataSize, Some(numRows)) } @transient @@ -118,6 +120,7 @@ case class BroadcastExchangeExec( throw new SparkException( s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows") } + longMetric("numRows") += numRows val beforeBuild = System.nanoTime() longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index 4e10c27edb0e9..e404e460fe611 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.execution.{FilterExec, RangeExec, SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite import org.apache.spark.sql.execution.aggregate.HashAggregateExec -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf @@ -736,4 +736,23 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils Map("dataSize" -> 3200, "shuffleRecordsWritten" -> 100)) testMetricsInSparkPlanOperator(exchanges(1), Map("dataSize" -> 0, "shuffleRecordsWritten" -> 0)) } + + test("Add numRows to metric of BroadcastExchangeExec") { + withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") { + withTable("t1", "t2") { + spark.range(2).write.saveAsTable("t1") + spark.range(2).write.saveAsTable("t2") + val df = sql("SELECT t1.* FROM t1 JOIN t2 ON t1.id = t2.id") + df.collect() + val plan = df.queryExecution.executedPlan + + val exchanges = plan.collect { + case s: BroadcastExchangeExec => s + } + + assert(exchanges.size === 1) + testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2)) + } + } + } } From b205be5ff6926454b0afe76e4c3438cfa0f34832 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 2 Oct 2020 15:12:33 +0900 Subject: [PATCH 0148/1009] [SPARK-33051][INFRA][R] Uses setup-r to install R in GitHub Actions build ### What changes were proposed in this pull request? At SPARK-32493, the R installation was switched to manual installation because setup-r was broken. This seems fixed in the upstream so we should better switch it back. ### Why are the changes needed? To avoid maintaining the installation steps by ourselve. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions build in this PR should test it. Closes #29931 from HyukjinKwon/recover-r-build. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 17c040323d515..667371dacf5dc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -168,12 +168,10 @@ jobs: python3.8 -m pip list # SparkR - name: Install R 4.0 + uses: r-lib/actions/setup-r@v1 if: contains(matrix.modules, 'sparkr') - run: | - sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" - curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add - sudo apt-get update - sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev + with: + r-version: 4.0 - name: Install R packages if: contains(matrix.modules, 'sparkr') run: | @@ -232,11 +230,9 @@ jobs: # See also https://github.com/sphinx-doc/sphinx/issues/7551. pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx - name: Install R 4.0 - run: | - sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" - curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add - sudo apt-get update - sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev + uses: r-lib/actions/setup-r@v1 + with: + r-version: 4.0 - name: Install R linter dependencies and SparkR run: | sudo apt-get install -y libcurl4-openssl-dev From f7ba95264d38484f57c772e459bffb939c9c718e Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 2 Oct 2020 15:17:44 +0900 Subject: [PATCH 0149/1009] [SPARK-33048][BUILD] Fix SparkBuild.scala to recognize build settings for Scala 2.13 ### What changes were proposed in this pull request? This PR fixes `SparkBuild.scala` to recognize build settings for Scala 2.13. In `SparkBuild.scala`, a variable `scalaBinaryVersion` is hardcoded as `2.12`. So, an environment variable `SPARK_SCALA_VERSION` is also to be `2.12`. This issue causes some test suites (e.g. `SparkSubmitSuite`) to be error. ``` ===== TEST OUTPUT FOR o.a.s.deploy.SparkSubmitSuite: 'user classpath first in driver' ===== 20/10/02 08:55:30.234 redirect stderr for command /home/kou/work/oss/spark-scala-2.13/bin/spark-submit INFO Utils: Error: Could not find or load m ain class org.apache.spark.launcher.Main 20/10/02 08:55:30.235 redirect stderr for command /home/kou/work/oss/spark-scala-2.13/bin/spark-submit INFO Utils: /home/kou/work/oss/spark-scala- 2.13/bin/spark-class: line 96: CMD: bad array subscript ``` The reason of this error is that environment variables `SPARK_JARS_DIR` and `LAUNCH_CLASSPATH` is defined in `bin/spark-class` as follows. ``` SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars" LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" ``` ### Why are the changes needed? To build for Scala 2.13 successfully. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tests for `core` module finish successfully. ``` build/sbt -Pscala-2.13 clean "core/test" ``` Closes #29927 from sarutak/fix-sparkbuild-for-scala-2.13. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- project/SparkBuild.scala | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 160b3b5e7edb3..6328daec027ef 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -94,21 +94,6 @@ object SparkBuild extends PomBuild { case Some(v) => v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq } - - // TODO: revisit for Scala 2.13 support - /* - Option(System.getProperty("scala.version")) - .filter(_.startsWith("2.11")) - .foreach { versionString => - System.setProperty("scala-2.11", "true") - } - if (System.getProperty("scala-2.11") == "") { - // To activate scala-2.10 profile, replace empty property value to non-empty value - // in the same way as Maven which handles -Dname as -Dname=true before executes build process. - // see: https://github.com/apache/maven/blob/maven-3.0.4/maven-embedder/src/main/java/org/apache/maven/cli/MavenCli.java#L1082 - System.setProperty("scala-2.11", "true") - } - */ profiles } @@ -965,17 +950,6 @@ object CopyDependencies { object TestSettings { import BuildCommons._ - - // TODO revisit for Scala 2.13 support - private val scalaBinaryVersion = "2.12" - /* - if (System.getProperty("scala-2.11") == "true") { - "2.11" - } else { - "2.12" - } - */ - private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest") lazy val settings = Seq ( @@ -988,7 +962,7 @@ object TestSettings { (fullClasspath in Test).value.files.map(_.getAbsolutePath) .mkString(File.pathSeparator).stripSuffix(File.pathSeparator), "SPARK_PREPEND_CLASSES" -> "1", - "SPARK_SCALA_VERSION" -> scalaBinaryVersion, + "SPARK_SCALA_VERSION" -> scalaBinaryVersion.value, "SPARK_TESTING" -> "1", "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))), javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir", From aa6657981aefae8067672d2c99ca560b6179b723 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 2 Oct 2020 00:06:03 -0700 Subject: [PATCH 0150/1009] [SPARK-33050][BUILD] Upgrade Apache ORC to 1.5.12 ### What changes were proposed in this pull request? This PR aims to upgrade Apache ORC to 1.5.12. ### Why are the changes needed? This brings us the latest bug patches like the followings. - ORC-644 nested struct evolution does not respect to orc.force.positional.evolution - ORC-667 Positional mapping for nested struct types should not applied by default ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. Closes #29930 from dongjoon-hyun/SPARK-ORC-1.5.12. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 6 +++--- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 +++--- pom.xml | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 index fef1a6442cd33..d07b04608328f 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 @@ -180,9 +180,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar openshift-model/4.10.3//openshift-model-4.10.3.jar -orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar -orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar -orc-shims/1.5.10//orc-shims-1.5.10.jar +orc-core/1.5.12/nohive/orc-core-1.5.12-nohive.jar +orc-mapreduce/1.5.12/nohive/orc-mapreduce-1.5.12-nohive.jar +orc-shims/1.5.12//orc-shims-1.5.12.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 7b31bdd98ef26..979bb1419ce7b 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -195,9 +195,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar openshift-model/4.10.3//openshift-model-4.10.3.jar -orc-core/1.5.10//orc-core-1.5.10.jar -orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar -orc-shims/1.5.10//orc-shims-1.5.10.jar +orc-core/1.5.12//orc-core-1.5.12.jar +orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar +orc-shims/1.5.12//orc-shims-1.5.12.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 960ea5f836ddf..ebaff6d1977c9 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -209,9 +209,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar openshift-model/4.10.3//openshift-model-4.10.3.jar -orc-core/1.5.10//orc-core-1.5.10.jar -orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar -orc-shims/1.5.10//orc-shims-1.5.10.jar +orc-core/1.5.12//orc-core-1.5.12.jar +orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar +orc-shims/1.5.12//orc-shims-1.5.12.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index 421d932cef5fa..5d6b0511ce458 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ 2.6.0 10.12.1.1 1.10.1 - 1.5.10 + 1.5.12 com.twitter 1.6.0 From 9b88aca2954cd931c94a7cc788c3c3f7a33e99b7 Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 2 Oct 2020 00:53:17 -0700 Subject: [PATCH 0151/1009] [SPARK-33030][R] Add nth_value to SparkR ### What changes were proposed in this pull request? Adds `nth_value` function to SparkR. ### Why are the changes needed? Feature parity. The function has been already added to [Scala](https://issues.apache.org/jira/browse/SPARK-27951) and [Python](https://issues.apache.org/jira/browse/SPARK-33020). ### Does this PR introduce _any_ user-facing change? Yes. New function is exposed to R users. ### How was this patch tested? New unit tests. Closes #29905 from zero323/SPARK-33030. Authored-by: zero323 Signed-off-by: Dongjoon Hyun --- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 34 ++++++++++++++++++++++++++- R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 2 ++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 6d28caff0d56f..4ea05b25ecc9e 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -348,6 +348,7 @@ exportMethods("%<=>%", "negate", "next_day", "not", + "nth_value", "ntile", "otherwise", "over", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index df221de4c7327..18206f6f67778 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -338,7 +338,8 @@ NULL #' tmp <- mutate(df, dist = over(cume_dist(), ws), dense_rank = over(dense_rank(), ws), #' lag = over(lag(df$mpg), ws), lead = over(lead(df$mpg, 1), ws), #' percent_rank = over(percent_rank(), ws), -#' rank = over(rank(), ws), row_number = over(row_number(), ws)) +#' rank = over(rank(), ws), row_number = over(row_number(), ws), +#' nth_value = over(nth_value(df$mpg, 3), ws)) #' # Get ntile group id (1-4) for hp #' tmp <- mutate(tmp, ntile = over(ntile(4), ws)) #' head(tmp)} @@ -3298,6 +3299,37 @@ setMethod("lead", column(jc) }) +#' @details +#' \code{nth_value}: Window function: returns the value that is the \code{offset}th +#' row of the window frame# (counting from 1), and \code{null} if the size of window +#' frame is less than \code{offset} rows. +#' +#' @param offset a numeric indicating number of row to use as the value +#' @param na.rm a logical which indicates that the Nth value should skip null in the +#' determination of which row to use +#' +#' @rdname column_window_functions +#' @aliases nth_value nth_value,characterOrColumn-method +#' @note nth_value since 3.1.0 +setMethod("nth_value", + signature(x = "characterOrColumn", offset = "numeric"), + function(x, offset, na.rm = FALSE) { + x <- if (is.character(x)) { + column(x) + } else { + x + } + offset <- as.integer(offset) + jc <- callJStatic( + "org.apache.spark.sql.functions", + "nth_value", + x@jc, + offset, + na.rm + ) + column(jc) + }) + #' @details #' \code{ntile}: Returns the ntile group id (from 1 to n inclusive) in an ordered window #' partition. For example, if n is 4, the first quarter of the rows will get value 1, the second diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index a7a9379b927b1..985678679dec8 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1164,6 +1164,10 @@ setGeneric("months_between", function(y, x, ...) { standardGeneric("months_betwe #' @rdname count setGeneric("n", function(x) { standardGeneric("n") }) +#' @rdname column_window_functions +#' @name NULL +setGeneric("nth_value", function(x, offset, ...) { standardGeneric("nth_value") }) + #' @rdname column_nonaggregate_functions #' @name NULL setGeneric("nanvl", function(y, x) { standardGeneric("nanvl") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 1c65dabaf6656..c36620227593d 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1425,6 +1425,8 @@ test_that("column functions", { c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) + overlay(c1, c2, 3, 4) c26 <- timestamp_seconds(c1) + c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) + + nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE) # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) From 82721ce00b6cf535abd3d9cd66445e452554d15d Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 2 Oct 2020 22:16:19 +0900 Subject: [PATCH 0152/1009] [SPARK-32741][SQL][FOLLOWUP] Run plan integrity check only for effective plan changes ### What changes were proposed in this pull request? (This is a followup PR of #29585) The PR modified `RuleExecutor#isPlanIntegral` code for checking if a plan has globally-unique attribute IDs, but this check made Jenkins maven test jobs much longer (See [the Dongjoon comment](https://github.com/apache/spark/pull/29585#issuecomment-702461314) and thanks, dongjoon-hyun !). To recover running time for the Jenkins tests, this PR intends to update the code to run plan integrity check only for effective plans. ### Why are the changes needed? To recover running time for Jenkins tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29928 from maropu/PR29585-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../org/apache/spark/sql/catalyst/rules/RuleExecutor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala index 3bd8fa78ec92c..d5b0884f6ff13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala @@ -229,7 +229,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging { tracker.foreach(_.recordRuleInvocation(rule.ruleName, runTime, effective)) // Run the structural integrity checker against the plan after each rule. - if (!isPlanIntegral(result)) { + if (effective && !isPlanIntegral(result)) { val message = s"After applying rule ${rule.ruleName} in batch ${batch.name}, " + "the structural integrity of the plan is broken." throw new TreeNodeException(result, message, null) From 1299c8a81ddba7f0fd8ff1f9afa223a4bb75f7f9 Mon Sep 17 00:00:00 2001 From: Bo Yang Date: Fri, 2 Oct 2020 20:26:46 -0700 Subject: [PATCH 0153/1009] [SPARK-33037][SHUFFLE] Remove knownManagers to support user's custom shuffle manager plugin ### What changes were proposed in this pull request? Spark has a hardcode list to contain known shuffle managers, which has two values now. It does not contain user's custom shuffle manager which is set through Spark config "spark.shuffle.manager". We hit issue when set "spark.shuffle.manager" with our own shuffle manager plugin (Uber Remote Shuffle Service implementation, https://github.com/uber/RemoteShuffleService). Other users will hit same issue when they implement their own shuffle manager. It is better to remove that knownManagers hardcode list, to support user's custom shuffle manager implementation. ### Why are the changes needed? Spark has shuffle manager API to support custom shuffle manager implementation. The hardcoded known managers list does not consider that shuffle manager config value which could be set by user. Thus better to remove this hardcoded known managers list. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Current Spark unit test already covers the code path. Closes #29916 from boy-uber/knownManagers. Lead-authored-by: Bo Yang Co-authored-by: Bo Yang Signed-off-by: Liang-Chi Hsieh --- .../network/shuffle/ExternalShuffleBlockResolver.java | 8 -------- .../shuffle/ExternalShuffleBlockResolverSuite.java | 9 --------- .../network/shuffle/ExternalShuffleIntegrationSuite.java | 6 +++--- .../scala/org/apache/spark/shuffle/ShuffleManager.scala | 6 +++++- 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java index a6bcbb8850566..a095bf2723418 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java @@ -92,10 +92,6 @@ public class ExternalShuffleBlockResolver { @VisibleForTesting final DB db; - private final List knownManagers = Arrays.asList( - "org.apache.spark.shuffle.sort.SortShuffleManager", - "org.apache.spark.shuffle.unsafe.UnsafeShuffleManager"); - public ExternalShuffleBlockResolver(TransportConf conf, File registeredExecutorFile) throws IOException { this(conf, registeredExecutorFile, Executors.newSingleThreadExecutor( @@ -148,10 +144,6 @@ public void registerExecutor( ExecutorShuffleInfo executorInfo) { AppExecId fullId = new AppExecId(appId, execId); logger.info("Registered executor {} with {}", fullId, executorInfo); - if (!knownManagers.contains(executorInfo.shuffleManager)) { - throw new UnsupportedOperationException( - "Unsupported shuffle manager of executor: " + executorInfo); - } try { if (db != null) { byte[] key = dbAppExecKey(fullId); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java index 88bcf43c2371f..04d4bdf92bae7 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java @@ -71,15 +71,6 @@ public void testBadRequests() throws IOException { assertTrue("Bad error message: " + e, e.getMessage().contains("not registered")); } - // Invalid shuffle manager - try { - resolver.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar")); - resolver.getBlockData("app0", "exec2", 1, 1, 0); - fail("Should have failed"); - } catch (UnsupportedOperationException e) { - // pass - } - // Nonexistent shuffle block resolver.registerExecutor("app0", "exec3", dataContext.createExecutorInfo(SORT_MANAGER)); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java index 9d398e372056b..49d02e5dc6fb4 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java @@ -233,9 +233,9 @@ public void testFetchThreeSort() throws Exception { exec0Fetch.releaseBuffers(); } - @Test (expected = RuntimeException.class) - public void testRegisterInvalidExecutor() throws Exception { - registerExecutor("exec-1", dataContext0.createExecutorInfo("unknown sort manager")); + @Test + public void testRegisterWithCustomShuffleManager() throws Exception { + registerExecutor("exec-1", dataContext0.createExecutorInfo("custom shuffle manager")); } @Test diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala index 400c4526f0114..4e2183451c258 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala @@ -24,8 +24,12 @@ import org.apache.spark.{ShuffleDependency, TaskContext} * and on each executor, based on the spark.shuffle.manager setting. The driver registers shuffles * with it, and executors (or tasks running locally in the driver) can ask to read and write data. * - * NOTE: this will be instantiated by SparkEnv so its constructor can take a SparkConf and + * NOTE: + * 1. This will be instantiated by SparkEnv so its constructor can take a SparkConf and * boolean isDriver as parameters. + * 2. This contains a method ShuffleBlockResolver which interacts with External Shuffle Service + * when it is enabled. Need to pay attention to that, if implementing a custom ShuffleManager, to + * make sure the custom ShuffleManager could co-exist with External Shuffle Service. */ private[spark] trait ShuffleManager { From 5af62a2ec74356ce1a97c1371321b3424b674289 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 3 Oct 2020 23:37:01 +0900 Subject: [PATCH 0154/1009] [SPARK-33052][SQL][TEST] Make all the database versions up-to-date for integration tests ### What changes were proposed in this pull request? This PR intends to update database versions below for integration tests; - ibmcom/db2:11.5.0.0a => ibmcom/db2:11.5.4.0 in `DB2[Krb]IntegrationSuite` - mysql:5.7.28 => mysql:5.7.31 in `MySQLIntegrationSuite` - postgres:12.0 => postgres:13.0 in `Postgres[Krb]IntegrationSuite` - mariadb:10.4 => mariadb:10.5 in `MariaDBKrbIntegrationSuite` Also, this added environmental variables so that we can test with any database version and all the variables are as follows (see documents in the code for how to use all the variables); - DB2_DOCKER_IMAGE_NAME - MSSQLSERVER_DOCKER_IMAGE_NAME - MYSQL_DOCKER_IMAGE_NAME - POSTGRES_DOCKER_IMAGE_NAME ### Why are the changes needed? To improve tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked. Closes #29932 from maropu/UpdateIntegrationTests. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../src/test/resources/mariadb_docker_entrypoint.sh | 2 +- .../apache/spark/sql/jdbc/DB2IntegrationSuite.scala | 9 ++++++++- .../spark/sql/jdbc/DB2KrbIntegrationSuite.scala | 9 ++++++++- .../spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala | 4 +++- .../spark/sql/jdbc/MsSqlServerIntegrationSuite.scala | 10 +++++++++- .../apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 11 +++++++++-- .../spark/sql/jdbc/PostgresIntegrationSuite.scala | 9 ++++++++- .../spark/sql/jdbc/PostgresKrbIntegrationSuite.scala | 9 ++++++++- 8 files changed, 54 insertions(+), 9 deletions(-) diff --git a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh index 343bc01651318..97c00a9d81b76 100755 --- a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh +++ b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh @@ -18,7 +18,7 @@ dpkg-divert --add /bin/systemctl && ln -sT /bin/true /bin/systemctl apt update -apt install -y mariadb-plugin-gssapi-server=1:10.4.12+maria~bionic +apt install -y mariadb-plugin-gssapi-server=1:10.5.5+maria~focal echo "gssapi_keytab_path=/docker-entrypoint-initdb.d/mariadb.keytab" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf echo "gssapi_principal_name=mariadb/__IP_ADDRESS_REPLACE_ME__@EXAMPLE.COM" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf docker-entrypoint.sh mysqld diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index 02a7ff8f16073..91498493e78e2 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -25,10 +25,17 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType} import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): + * {{{ + * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite" + * }}} + */ @DockerTest class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { override val db = new DatabaseOnDocker { - override val imageName = "ibmcom/db2:11.5.0.0a" + override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0") override val env = Map( "DB2INST1_PASSWORD" -> "rootpass", "LICENSE" -> "accept", diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala index fa5ce2d106a10..7ab544c17a5d8 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala @@ -29,13 +29,20 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2ConnectionProvider, SecureConnectionProvider} import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): + * {{{ + * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ./build/sbt -Pdocker-integration-tests "test-only *DB2KrbIntegrationSuite" + * }}} + */ @DockerTest class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { override protected val userName = s"db2/$dockerIp" override protected val keytabFileName = "db2.keytab" override val db = new DatabaseOnDocker { - override val imageName = "ibmcom/db2:11.5.0.0a" + override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0") override val env = Map( "DB2INST1_PASSWORD" -> "rootpass", "LICENSE" -> "accept", diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala index 9b9d15517d572..adee2bebe41ce 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala @@ -30,7 +30,9 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { override protected val keytabFileName = "mariadb.keytab" override val db = new DatabaseOnDocker { - override val imageName = "mariadb:10.4" + // If you change `imageName`, you need to update the version of `mariadb-plugin-gssapi-server` + // in `resources/mariadb_docker_entrypoint.sh` accordingly. + override val imageName = "mariadb:10.5" override val env = Map( "MYSQL_ROOT_PASSWORD" -> "rootpass" ) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 6c633af1fde84..5d3deff9d2704 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -24,10 +24,18 @@ import java.util.Properties import org.apache.spark.sql.internal.SQLConf import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): + * {{{ + * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 + * ./build/sbt -Pdocker-integration-tests "test-only *MsSqlServerIntegrationSuite" + * }}} + */ @DockerTest class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { override val db = new DatabaseOnDocker { - override val imageName = "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04" + override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME", + "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04") override val env = Map( "SA_PASSWORD" -> "Sapass123", "ACCEPT_EULA" -> "Y" diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 4cbcb59e02de1..4cd27f8b9fff2 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -21,13 +21,20 @@ import java.math.BigDecimal import java.sql.{Connection, Date, Timestamp} import java.util.Properties -import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.Row import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., mysql:5.7.31): + * {{{ + * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 + * ./build/sbt -Pdocker-integration-tests "test-only *MySQLIntegrationSuite" + * }}} + */ @DockerTest class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { override val db = new DatabaseOnDocker { - override val imageName = "mysql:5.7.28" + override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.31") override val env = Map( "MYSQL_ROOT_PASSWORD" -> "rootpass" ) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 36d96a69ec659..ba71c942714da 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -26,10 +26,17 @@ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType} import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., postgres:13.0): + * {{{ + * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ./build/sbt -Pdocker-integration-tests "test-only *PostgresIntegrationSuite" + * }}} + */ @DockerTest class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { override val db = new DatabaseOnDocker { - override val imageName = "postgres:12.0-alpine" + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine") override val env = Map( "POSTGRES_PASSWORD" -> "rootpass" ) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala index e94bf3dd588aa..6b215485247d9 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala @@ -24,13 +24,20 @@ import com.spotify.docker.client.messages.{ContainerConfig, HostConfig} import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., postgres:13.0): + * {{{ + * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ./build/sbt -Pdocker-integration-tests "test-only *PostgresKrbIntegrationSuite" + * }}} + */ @DockerTest class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { override protected val userName = s"postgres/$dockerIp" override protected val keytabFileName = "postgres.keytab" override val db = new DatabaseOnDocker { - override val imageName = "postgres:12.0" + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0") override val env = Map( "POSTGRES_PASSWORD" -> "rootpass" ) From f86171aea43479f54ac2bbbca8f128baa3fc4a8c Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sat, 3 Oct 2020 13:12:55 -0500 Subject: [PATCH 0155/1009] [SPARK-33043][ML] Handle spark.driver.maxResultSize=0 in RowMatrix heuristic computation ### What changes were proposed in this pull request? RowMatrix contains a computation based on spark.driver.maxResultSize. However, when this value is set to 0, the computation fails (log of 0). The fix is simply to correctly handle this setting, which means unlimited result size, by using a tree depth of 1 in the RowMatrix method. ### Why are the changes needed? Simple bug fix to make several Spark ML functions which use RowMatrix run correctly in this case. ### Does this PR introduce _any_ user-facing change? Not other than the bug fix of course. ### How was this patch tested? Existing RowMatrix tests plus a new test. Closes #29925 from srowen/SPARK-33043. Authored-by: Sean Owen Signed-off-by: Sean Owen --- .../mllib/linalg/distributed/RowMatrix.scala | 6 +++++- .../mllib/linalg/distributed/RowMatrixSuite.scala | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 20e26cee9e0d6..07b9d91c1f59b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -786,11 +786,15 @@ class RowMatrix @Since("1.0.0") ( * Based on the formulae: (numPartitions)^(1/depth) * objectSize <= DriverMaxResultSize * @param aggregatedObjectSizeInBytes the size, in megabytes, of the object being tree aggregated */ - private[spark] def getTreeAggregateIdealDepth(aggregatedObjectSizeInBytes: Long) = { + private[spark] def getTreeAggregateIdealDepth(aggregatedObjectSizeInBytes: Long): Int = { require(aggregatedObjectSizeInBytes > 0, "Cannot compute aggregate depth heuristic based on a zero-size object to aggregate") val maxDriverResultSizeInBytes = rows.conf.get[Long](MAX_RESULT_SIZE) + if (maxDriverResultSizeInBytes <= 0) { + // Unlimited result size, so 1 is OK + return 1 + } require(maxDriverResultSizeInBytes > aggregatedObjectSizeInBytes, s"Cannot aggregate object of size $aggregatedObjectSizeInBytes Bytes, " diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 0a4b11935580a..adc4eeef91bb1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -25,6 +25,7 @@ import breeze.linalg.{norm => brzNorm, svd => brzSvd, DenseMatrix => BDM, DenseV import breeze.numerics.abs import org.apache.spark.SparkFunSuite +import org.apache.spark.internal.config.MAX_RESULT_SIZE import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} @@ -121,6 +122,20 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(objectBiggerThanResultSize.getMessage.contains("it's bigger than maxResultSize")) } + test("SPARK-33043: getTreeAggregateIdealDepth with unlimited driver size") { + val originalMaxResultSize = sc.conf.get[Long](MAX_RESULT_SIZE) + sc.conf.set(MAX_RESULT_SIZE, 0L) + try { + val nbPartitions = 100 + val vectors = sc.emptyRDD[Vector] + .repartition(nbPartitions) + val rowMat = new RowMatrix(vectors) + assert(rowMat.getTreeAggregateIdealDepth(700 * 1024 * 1024) === 1) + } finally { + sc.conf.set(MAX_RESULT_SIZE, originalMaxResultSize) + } + } + test("similar columns") { val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94)) val expected = BDM( From 9b21fdd731489b529a52cd2074f79dc7293eed3b Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 3 Oct 2020 13:50:38 -0700 Subject: [PATCH 0156/1009] [SPARK-32949][FOLLOW-UP][R][SQL] Reindent lines in SparkR timestamp_seconds ### What changes were proposed in this pull request? Re-indent lines of SparkR `timestamp_seconds`. ### Why are the changes needed? Current indentation is not aligned with the opening line. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29940 from zero323/SPARK-32949-FOLLOW-UP. Authored-by: zero323 Signed-off-by: Dongjoon Hyun --- R/pkg/R/functions.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 18206f6f67778..b216f404a3ca5 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -4451,10 +4451,10 @@ setMethod("current_timestamp", #' @aliases timestamp_seconds timestamp_seconds,Column-method #' @note timestamp_seconds since 3.1.0 setMethod("timestamp_seconds", - signature(x = "Column"), - function(x) { - jc <- callJStatic( - "org.apache.spark.sql.functions", "timestamp_seconds", x@jc - ) - column(jc) - }) + signature(x = "Column"), + function(x) { + jc <- callJStatic( + "org.apache.spark.sql.functions", "timestamp_seconds", x@jc + ) + column(jc) + }) From 37c806af2bd3fb4c1f25e02f4986226e5e8d994d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Oct 2020 14:55:02 -0700 Subject: [PATCH 0157/1009] [SPARK-32958][SQL] Prune unnecessary columns from JsonToStructs ### What changes were proposed in this pull request? This patch proposes to do column pruning for `JsonToStructs` expression if we only require some fields from it. ### Why are the changes needed? `JsonToStructs` takes a schema parameter used to tell `JacksonParser` what fields are needed to parse. If `JsonToStructs` is followed by `GetStructField`. We can prune the schema to only parse certain field. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #29900 from viirya/SPARK-32958. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../optimizer/OptimizeJsonExprs.scala | 16 +++++ .../optimizer/OptimizeJsonExprsSuite.scala | 58 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala index 24df480208220..59228904d84b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala @@ -20,9 +20,14 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.{ArrayType, StructType} /** * Simplify redundant json related expressions. + * + * The optimization includes: + * 1. JsonToStructs(StructsToJson(child)) => child. + * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs. */ object OptimizeJsonExprs extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transform { @@ -38,6 +43,17 @@ object OptimizeJsonExprs extends Rule[LogicalPlan] { // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize // this case similarly. child + + case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _) + if schema.length > 1 => + val prunedSchema = StructType(Seq(schema(ordinal))) + g.copy(child = j.copy(schema = prunedSchema), ordinal = 0) + + case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _) + if schema.elementType.asInstanceOf[StructType].length > 1 => + val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull) + g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala index 90397d4cabee8..e47a141dfed1f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala @@ -36,8 +36,10 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { val schema = StructType.fromDDL("a int, b int") private val structAtt = 'struct.struct(schema).notNull + private val jsonAttr = 'json.string private val testRelation = LocalRelation(structAtt) + private val testRelation2 = LocalRelation(jsonAttr) test("SPARK-32948: optimize from_json + to_json") { val options = Map.empty[String, String] @@ -141,4 +143,60 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { val expected2 = testRelation.select('struct.as("struct")).analyze comparePlans(optimized2, expected2) } + + test("SPARK-32958: prune unnecessary columns from GetStructField + from_json") { + val options = Map.empty[String, String] + + val query1 = testRelation2 + .select(GetStructField(JsonToStructs(schema, options, 'json), 0)) + val optimized1 = Optimizer.execute(query1.analyze) + + val prunedSchema1 = StructType.fromDDL("a int") + val expected1 = testRelation2 + .select(GetStructField(JsonToStructs(prunedSchema1, options, 'json), 0)).analyze + comparePlans(optimized1, expected1) + + val query2 = testRelation2 + .select(GetStructField(JsonToStructs(schema, options, 'json), 1)) + val optimized2 = Optimizer.execute(query2.analyze) + + val prunedSchema2 = StructType.fromDDL("b int") + val expected2 = testRelation2 + .select(GetStructField(JsonToStructs(prunedSchema2, options, 'json), 0)).analyze + comparePlans(optimized2, expected2) + } + + test("SPARK-32958: prune unnecessary columns from GetArrayStructFields + from_json") { + val options = Map.empty[String, String] + val schema1 = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true) + val field1 = schema1.elementType.asInstanceOf[StructType](0) + + val query1 = testRelation2 + .select(GetArrayStructFields( + JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a")) + val optimized1 = Optimizer.execute(query1.analyze) + + val prunedSchema1 = ArrayType(StructType.fromDDL("a int"), containsNull = true) + val expected1 = testRelation2 + .select(GetArrayStructFields( + JsonToStructs(prunedSchema1, options, 'json), field1, 0, 1, true).as("a")).analyze + comparePlans(optimized1, expected1) + + val schema2 = ArrayType( + StructType( + StructField("a", IntegerType, false) :: + StructField("b", IntegerType, false) :: Nil), containsNull = false) + val field2 = schema2.elementType.asInstanceOf[StructType](1) + val query2 = testRelation2 + .select(GetArrayStructFields( + JsonToStructs(schema2, options, 'json), field2, 1, 2, false).as("b")) + val optimized2 = Optimizer.execute(query2.analyze) + + val prunedSchema2 = ArrayType( + StructType(StructField("b", IntegerType, false) :: Nil), containsNull = false) + val expected2 = testRelation2 + .select(GetArrayStructFields( + JsonToStructs(prunedSchema2, options, 'json), field2, 0, 1, false).as("b")).analyze + comparePlans(optimized2, expected2) + } } From db420f79cc588dc0f98b906accb34d63a1e4664c Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 3 Oct 2020 15:14:48 -0700 Subject: [PATCH 0158/1009] [SPARK-33049][CORE] Decommission shuffle block test is flaky ### What changes were proposed in this pull request? Increase the listener bus event length, syncrhonize the addition of blocks modified to the array list. ### Why are the changes needed? This test appears flaky in Jenkins (can not repro locally). Given that the index file made it through and the index file is only transferred after the data file, the only two reasons I could come up with an interminentent failure here are with the listenerbus dropping a message or the two block change messages being received at the same time. ### Does this PR introduce _any_ user-facing change? No (test only). ### How was this patch tested? The tests still pass on my machine but they did before. We'll need to run it through jenkins a few times first. Closes #29929 from holdenk/fix-.BlockManagerDecommissionIntegrationSuite. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../storage/BlockManagerDecommissionIntegrationSuite.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala index 094b893cdda2e..dcf313f671d5e 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala @@ -69,6 +69,8 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS .set(config.STORAGE_DECOMMISSION_ENABLED, true) .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED, persist) .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, shuffle) + // Since we use the bus for testing we don't want to drop any messages + .set(config.LISTENER_BUS_EVENT_QUEUE_CAPACITY, 1000000) // Just replicate blocks quickly during testing, there isn't another // workload we need to worry about. .set(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL, 10L) @@ -137,7 +139,7 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS taskEndEvents.add(taskEnd) } - override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = { + override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = synchronized { blocksUpdated.append(blockUpdated) } From fab53212cb110a81696cee8546c35095332f6e09 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sun, 4 Oct 2020 16:11:06 -0700 Subject: [PATCH 0159/1009] [SPARK-33065][TESTS] Expand the stack size of a thread in a test in LocalityPlacementStrategySuite for Java 11 with sbt ### What changes were proposed in this pull request? This PR fixes an issue that a test in `LocalityPlacementStrategySuite` fails with Java 11 due to `StackOverflowError`. ``` [info] - handle large number of containers and tasks (SPARK-18750) *** FAILED *** (170 milliseconds) [info] StackOverflowError should not be thrown; however, got: [info] [info] java.lang.StackOverflowError [info] at java.base/java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1012) [info] at java.base/java.util.concurrent.ConcurrentHashMap.putIfAbsent(ConcurrentHashMap.java:1541) [info] at java.base/java.lang.ClassLoader.getClassLoadingLock(ClassLoader.java:668) [info] at java.base/jdk.internal.loader.BuiltinClassLoader.loadClassOrNull(BuiltinClassLoader.java:591) [info] at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:579) [info] at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:178) [info] at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:522) ``` The solution is to expand the stack size of a thread in the test from 32KB to 256KB. Currently, the stack size is specified as 32KB but the actual stack size can be greater than 32KB. According to the code of Hotspot, the minimum stack size is prefer to the specified size. Java 8: https://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c92ba514724d/src/os/linux/vm/os_linux.cpp#l900 Java 11: https://hg.openjdk.java.net/jdk-updates/jdk11u/file/73edf743a93a/src/hotspot/os/posix/os_posix.cpp#l1555 For Linux on x86_64, the minimum stack size seems to be 224KB and 136KB for Java 8 and Java 11 respectively. So, the actual stack size should be 224KB rather than 32KB for Java 8 on x86_64/Linux. As the test passes for Java 8 but doesn't for Java 11, 224KB is enough while 136KB is not. So I think specifing 256KB is reasonable for the new stack size. ### Why are the changes needed? To pass the test for Java 11. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Following command with Java 11. ``` build/sbt -Pyarn clean package "testOnly org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite" ``` Closes #29943 from sarutak/fix-stack-size. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/deploy/yarn/LocalityPlacementStrategySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala index 3c9209c292418..d2397504ba140 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala @@ -43,7 +43,7 @@ class LocalityPlacementStrategySuite extends SparkFunSuite { } } - val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024) + val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 256 * 1024) thread.start() thread.join() From 4ab9aa03055d3ad90137efacb2e00eff4ac3fbf1 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Mon, 5 Oct 2020 11:48:28 +0900 Subject: [PATCH 0160/1009] [SPARK-33017][PYTHON] Add getCheckpointDir method to PySpark Context ### What changes were proposed in this pull request? Adding a method to get the checkpoint directory from the PySpark context to match the Scala API ### Why are the changes needed? To make the Scala and Python APIs consistent and remove the need to use the JavaObject ### Does this PR introduce _any_ user-facing change? Yes, there is a new method which makes it easier to get the checkpoint directory directly rather than using the JavaObject #### Previous behaviour: ```python >>> spark.sparkContext.setCheckpointDir('/tmp/spark/checkpoint/') >>> sc._jsc.sc().getCheckpointDir().get() 'file:/tmp/spark/checkpoint/63f7b67c-e5dc-4d11-a70c-33554a71717a' ``` This method returns a confusing Scala error if it has not been set ```python >>> sc._jsc.sc().getCheckpointDir().get() Traceback (most recent call last): File "", line 1, in File "/home/paul/Desktop/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ File "/home/paul/Desktop/spark/python/pyspark/sql/utils.py", line 111, in deco return f(*a, **kw) File "/home/paul/Desktop/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o25.get. : java.util.NoSuchElementException: None.get at scala.None$.get(Option.scala:529) at scala.None$.get(Option.scala:527) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748) ``` #### New method: ```python >>> spark.sparkContext.setCheckpointDir('/tmp/spark/checkpoint/') >>> spark.sparkContext.getCheckpointDir() 'file:/tmp/spark/checkpoint/b38aca2e-8ace-44fc-a4c4-f4e36c2da2a7' ``` ``getCheckpointDir()`` returns ``None`` if it has not been set ```python >>> print(spark.sparkContext.getCheckpointDir()) None ``` ### How was this patch tested? Added to existing unit tests. But I'm not sure how to add a test for the case where ``getCheckpointDir()`` should return ``None`` since the existing checkpoint tests set the checkpoint directory in the ``setUp`` method before any tests are run as far as I can tell. Closes #29918 from reidy-p/SPARK-33017. Authored-by: reidy-p Signed-off-by: HyukjinKwon --- python/pyspark/__init__.py | 3 ++- python/pyspark/context.py | 12 +++++++++++- python/pyspark/context.pyi | 1 + python/pyspark/tests/test_context.py | 3 +++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index fb05819e74124..19269e4466507 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -50,7 +50,6 @@ import types from pyspark.conf import SparkConf -from pyspark.context import SparkContext from pyspark.rdd import RDD, RDDBarrier from pyspark.files import SparkFiles from pyspark.status import StatusTracker, SparkJobInfo, SparkStageInfo @@ -113,6 +112,8 @@ def wrapper(self, *args, **kwargs): return func(self, **kwargs) return wrapper +# To avoid circular dependencies +from pyspark.context import SparkContext # for back compatibility from pyspark.sql import SQLContext, HiveContext, Row # noqa: F401 diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 55a5657b64055..4213a742a1dc4 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -28,7 +28,7 @@ from py4j.protocol import Py4JError from py4j.java_gateway import is_instance_of -from pyspark import accumulators +from pyspark import accumulators, since from pyspark.accumulators import Accumulator from pyspark.broadcast import Broadcast, BroadcastPickleRegistry from pyspark.conf import SparkConf @@ -956,6 +956,16 @@ def setCheckpointDir(self, dirName): """ self._jsc.sc().setCheckpointDir(dirName) + @since(3.1) + def getCheckpointDir(self): + """ + Return the directory where RDDs are checkpointed. Returns None if no + checkpoint directory has been set. + """ + if not self._jsc.sc().getCheckpointDir().isEmpty(): + return self._jsc.sc().getCheckpointDir().get() + return None + def _getJavaStorageLevel(self, storageLevel): """ Returns a Java StorageLevel based on a pyspark.StorageLevel. diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi index 76ecf8911471a..2789a38b3be9f 100644 --- a/python/pyspark/context.pyi +++ b/python/pyspark/context.pyi @@ -152,6 +152,7 @@ class SparkContext: def addFile(self, path: str, recursive: bool = ...) -> None: ... def addPyFile(self, path: str) -> None: ... def setCheckpointDir(self, dirName: str) -> None: ... + def getCheckpointDir(self) -> Optional[str]: ... def setJobGroup( self, groupId: str, description: str, interruptOnCancel: bool = ... ) -> None: ... diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py index 9b6b74a111288..d86f6c3c1571c 100644 --- a/python/pyspark/tests/test_context.py +++ b/python/pyspark/tests/test_context.py @@ -43,6 +43,7 @@ def test_basic_checkpointing(self): self.assertFalse(flatMappedRDD.isCheckpointed()) self.assertTrue(flatMappedRDD.getCheckpointFile() is None) + self.assertFalse(self.sc.getCheckpointDir() is None) flatMappedRDD.checkpoint() result = flatMappedRDD.collect() @@ -51,6 +52,8 @@ def test_basic_checkpointing(self): self.assertEqual(flatMappedRDD.collect(), result) self.assertEqual("file:" + self.checkpointDir.name, os.path.dirname(os.path.dirname(flatMappedRDD.getCheckpointFile()))) + self.assertEqual(self.sc.getCheckpointDir(), + os.path.dirname(flatMappedRDD.getCheckpointFile())) def test_checkpoint_and_restore(self): parCollection = self.sc.parallelize([1, 2, 3, 4]) From e83d03ca4861a69cd688beacc544b3f6dae32ae0 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 5 Oct 2020 13:18:12 +0900 Subject: [PATCH 0161/1009] [SPARK-33040][R][ML] Add SparkR wrapper for vector_to_array ### What changes were proposed in this pull request? Add SparkR wrapper for `o.a.s.ml.functions.vector_to_array` ### Why are the changes needed? - Currently ML vectors, including predictions, are almost inaccessible to R users. That's is a serious loss of functionality. - Feature parity. ### Does this PR introduce _any_ user-facing change? Yes, new R function is added. ### How was this patch tested? - New unit tests. - Manual verification. Closes #29917 from zero323/SPARK-33040. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 33 +++++++++++++++++++++++++++ R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 3 ++- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 4ea05b25ecc9e..25162f3e23b38 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -427,6 +427,7 @@ exportMethods("%<=>%", "variance", "var_pop", "var_samp", + "vector_to_array", "weekofyear", "when", "window", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index b216f404a3ca5..61ea90efb348d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -345,6 +345,17 @@ NULL #' head(tmp)} NULL +#' ML functions for Column operations +#' +#' ML functions defined for \code{Column}. +#' +#' @param x Column to compute on. +#' @param ... additional argument(s). +#' @name column_ml_functions +#' @rdname column_ml_functions +#' @family ml functions +NULL + #' @details #' \code{lit}: A new Column is created to represent the literal value. #' If the parameter is a Column, it is returned unchanged. @@ -4458,3 +4469,25 @@ setMethod("timestamp_seconds", ) column(jc) }) + +#' @details +#' \code{vector_to_array} Converts a column of MLlib sparse/dense vectors into +#' a column of dense arrays. +#' +#' @param dtype The data type of the output array. Valid values: "float64" or "float32". +#' +#' @rdname column_ml_functions +#' @aliases vector_to_array vector_to_array,Column-method +#' @note vector_to_array since 3.1.0 +setMethod("vector_to_array", + signature(x = "Column"), + function(x, dtype = c("float32", "float64")) { + dtype <- match.arg(dtype) + jc <- callJStatic( + "org.apache.spark.ml.functions", + "vector_to_array", + x@jc, + dtype + ) + column(jc) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 985678679dec8..993fc758adbe5 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1449,6 +1449,10 @@ setGeneric("var_pop", function(x) { standardGeneric("var_pop") }) #' @name NULL setGeneric("var_samp", function(x) { standardGeneric("var_samp") }) +#' @rdname column_ml_functions +#' @name NULL +setGeneric("vector_to_array", function(x, ...) { standardGeneric("vector_to_array") }) + #' @rdname column_datetime_functions #' @name NULL setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c36620227593d..c3b271b1205c5 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1424,7 +1424,8 @@ test_that("column functions", { date_trunc("quarter", c) + current_date() + current_timestamp() c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) + overlay(c1, c2, 3, 4) - c26 <- timestamp_seconds(c1) + c26 <- timestamp_seconds(c1) + vector_to_array(c) + + vector_to_array(c, "float32") + vector_to_array(c, "float64") c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) + nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE) From 24f890e8e81ee03fe0d9ce4c8f232784e9fdaccd Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 5 Oct 2020 16:31:17 +0900 Subject: [PATCH 0162/1009] [SPARK-33040][FOLLOW-UP][R] Reorder argument choices and add examples ### What changes were proposed in this pull request? - Reorder choices of `dtype` to match Scala defaults. - Add example to ml_functions. ### Why are the changes needed? As requested: - https://github.com/apache/spark/pull/29917#pullrequestreview-501715344 - https://github.com/apache/spark/pull/29917#pullrequestreview-501716521 ### Does this PR introduce _any_ user-facing change? No (changes to newly added component). ### How was this patch tested? Existing tests. Closes #29944 from zero323/SPARK-33040-FOLLOW-UP. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/R/functions.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 61ea90efb348d..959edf29e2429 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -354,6 +354,11 @@ NULL #' @name column_ml_functions #' @rdname column_ml_functions #' @family ml functions +#' @examples +#' \dontrun{ +#' df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm") +#' head(select(df, vector_to_array(df$features))) +#' } NULL #' @details @@ -4481,7 +4486,7 @@ setMethod("timestamp_seconds", #' @note vector_to_array since 3.1.0 setMethod("vector_to_array", signature(x = "Column"), - function(x, dtype = c("float32", "float64")) { + function(x, dtype = c("float64", "float32")) { dtype <- match.arg(dtype) jc <- callJStatic( "org.apache.spark.ml.functions", From 0fb2574d4e75fa4a545da1d53357c2359c0bffeb Mon Sep 17 00:00:00 2001 From: Yuning Zhang Date: Mon, 5 Oct 2020 20:25:57 +0900 Subject: [PATCH 0163/1009] [SPARK-33042][SQL][TEST] Add a test case to ensure changes to spark.sql.optimizer.maxIterations take effect at runtime ### What changes were proposed in this pull request? Add a test case to ensure changes to `spark.sql.optimizer.maxIterations` take effect at runtime. ### Why are the changes needed? Currently, there is only one related test case: https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala#L156 However, this test case only checks the value of the conf can be changed at runtime. It does not check the updated value is actually used by the Optimizer. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? unit test Closes #29919 from yuningzh-db/add_optimizer_test. Authored-by: Yuning Zhang Signed-off-by: HyukjinKwon --- .../catalyst/optimizer/OptimizerSuite.scala | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala new file mode 100644 index 0000000000000..b48555ec2fb28 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.errors.TreeNodeException +import org.apache.spark.sql.catalyst.expressions.{Alias, IntegerLiteral, Literal} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.internal.SQLConf + +/** + * A dummy optimizer rule for testing that decrements integer literals until 0. + */ +object DecrementLiterals extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions { + case IntegerLiteral(i) if i > 0 => Literal(i - 1) + } +} + +class OptimizerSuite extends PlanTest { + test("Optimizer exceeds max iterations") { + val iterations = 5 + val maxIterationsNotEnough = 3 + val maxIterationsEnough = 10 + val analyzed = Project(Alias(Literal(iterations), "attr")() :: Nil, OneRowRelation()).analyze + + withSQLConf(SQLConf.OPTIMIZER_MAX_ITERATIONS.key -> maxIterationsNotEnough.toString) { + val optimizer = new SimpleTestOptimizer() { + override def defaultBatches: Seq[Batch] = + Batch("test", fixedPoint, + DecrementLiterals) :: Nil + } + + val message1 = intercept[TreeNodeException[LogicalPlan]] { + optimizer.execute(analyzed) + }.getMessage + assert(message1.startsWith(s"Max iterations ($maxIterationsNotEnough) reached for batch " + + s"test, please set '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}' to a larger value.")) + + withSQLConf(SQLConf.OPTIMIZER_MAX_ITERATIONS.key -> maxIterationsEnough.toString) { + try { + optimizer.execute(analyzed) + } catch { + case ex: TreeNodeException[LogicalPlan] + if ex.getMessage.contains(SQLConf.OPTIMIZER_MAX_ITERATIONS.key) => + fail("optimizer.execute should not reach max iterations.") + } + } + + val message2 = intercept[TreeNodeException[LogicalPlan]] { + optimizer.execute(analyzed) + }.getMessage + assert(message2.startsWith(s"Max iterations ($maxIterationsNotEnough) reached for batch " + + s"test, please set '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}' to a larger value.")) + } + } +} From 023eb482b23b5d63d2157b3def9926673844e0a3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 5 Oct 2020 22:00:42 +0900 Subject: [PATCH 0164/1009] [SPARK-32914][SQL] Avoid constructing dataType multiple times ### What changes were proposed in this pull request? Some expression's data type not a static value. It needs to be constructed a new object when calling `dataType` function. E.g.: `CaseWhen`. We should avoid constructing dataType multiple times because it may be used many times. E.g.: [`HyperLogLogPlusPlus.update`](https://github.com/apache/spark/blob/10edeafc69250afef8c71ed7b3c77992f67aa4ff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala#L122). ### Why are the changes needed? Improve query performance. for example: ```scala spark.range(100000000L).selectExpr("approx_count_distinct(case when id % 400 > 20 then id else 0 end)").show ``` Profiling result: ``` -- Execution profile --- Total samples : 18365 Frame buffer usage : 2.6688% --- 58443254327 ns (31.82%), 5844 samples [ 0] GenericTaskQueueSet, (MemoryType)1>::steal_best_of_2(unsigned int, int*, StarTask&) [ 1] StealTask::do_it(GCTaskManager*, unsigned int) [ 2] GCTaskThread::run() [ 3] java_start(Thread*) [ 4] start_thread --- 6140668667 ns (3.34%), 614 samples [ 0] GenericTaskQueueSet, (MemoryType)1>::peek() [ 1] ParallelTaskTerminator::offer_termination(TerminatorTerminator*) [ 2] StealTask::do_it(GCTaskManager*, unsigned int) [ 3] GCTaskThread::run() [ 4] java_start(Thread*) [ 5] start_thread --- 5679994036 ns (3.09%), 568 samples [ 0] scala.collection.generic.Growable.$plus$plus$eq [ 1] scala.collection.generic.Growable.$plus$plus$eq$ [ 2] scala.collection.mutable.ListBuffer.$plus$plus$eq [ 3] scala.collection.mutable.ListBuffer.$plus$plus$eq [ 4] scala.collection.generic.GenericTraversableTemplate.$anonfun$flatten$1 [ 5] scala.collection.generic.GenericTraversableTemplate$$Lambda$107.411506101.apply [ 6] scala.collection.immutable.List.foreach [ 7] scala.collection.generic.GenericTraversableTemplate.flatten [ 8] scala.collection.generic.GenericTraversableTemplate.flatten$ [ 9] scala.collection.AbstractTraversable.flatten [10] org.apache.spark.internal.config.ConfigEntry.readString [11] org.apache.spark.internal.config.ConfigEntryWithDefault.readFrom [12] org.apache.spark.sql.internal.SQLConf.getConf [13] org.apache.spark.sql.internal.SQLConf.caseSensitiveAnalysis [14] org.apache.spark.sql.types.DataType.sameType [15] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.$anonfun$haveSameType$1 [16] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.$anonfun$haveSameType$1$adapted [17] org.apache.spark.sql.catalyst.analysis.TypeCoercion$$$Lambda$1527.1975399904.apply [18] scala.collection.IndexedSeqOptimized.prefixLengthImpl [19] scala.collection.IndexedSeqOptimized.forall [20] scala.collection.IndexedSeqOptimized.forall$ [21] scala.collection.mutable.ArrayBuffer.forall [22] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.haveSameType [23] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataTypeCheck [24] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataTypeCheck$ [25] org.apache.spark.sql.catalyst.expressions.CaseWhen.dataTypeCheck [26] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataType [27] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataType$ [28] org.apache.spark.sql.catalyst.expressions.CaseWhen.dataType [29] org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus.update [30] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1.$anonfun$applyOrElse$2 [31] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1.$anonfun$applyOrElse$2$adapted [32] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1$$Lambda$1534.1383512673.apply [33] org.apache.spark.sql.execution.aggregate.AggregationIterator.$anonfun$generateProcessRow$7 [34] org.apache.spark.sql.execution.aggregate.AggregationIterator.$anonfun$generateProcessRow$7$adapted [35] org.apache.spark.sql.execution.aggregate.AggregationIterator$$Lambda$1555.725788712.apply ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test and benchmark test: Benchmark code | Before this PR(Milliseconds) | After this PR(Milliseconds) --- | --- | --- spark.range(100000000L).selectExpr("approx_count_distinct(case when id % 400 > 20 then id else 0 end)").collect() | 56462 | 3794 Closes #29790 from wangyum/SPARK-32914. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/expressions/Expression.scala | 4 +++- .../aggregate/ApproximatePercentile.scala | 4 +++- .../catalyst/expressions/collectionOperations.scala | 13 ++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index ce4aa1c2b7c2f..35b192cc5544a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -1060,10 +1060,12 @@ trait ComplexTypeMergingExpression extends Expression { s" The input types found are\n\t${inputTypesForMerging.mkString("\n\t")}") } - override def dataType: DataType = { + private lazy val internalDataType: DataType = { dataTypeCheck inputTypesForMerging.reduceLeft(TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(_, _).get) } + + override def dataType: DataType = internalDataType } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala index 3327f4ccf4461..2a5275e75d4f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala @@ -187,10 +187,12 @@ case class ApproximatePercentile( override def nullable: Boolean = true // The result type is the same as the input type. - override def dataType: DataType = { + private lazy val internalDataType: DataType = { if (returnPercentileArray) ArrayType(child.dataType, false) else child.dataType } + override def dataType: DataType = internalDataType + override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("percentile_approx") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 8555f63df986f..8719b2e065663 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -371,7 +371,7 @@ case class MapEntries(child: Expression) @transient private lazy val childDataType: MapType = child.dataType.asInstanceOf[MapType] - override def dataType: DataType = { + private lazy val internalDataType: DataType = { ArrayType( StructType( StructField("key", childDataType.keyType, false) :: @@ -380,6 +380,8 @@ case class MapEntries(child: Expression) false) } + override def dataType: DataType = internalDataType + override protected def nullSafeEval(input: Any): Any = { val childMap = input.asInstanceOf[MapData] val keys = childMap.keyArray() @@ -3504,13 +3506,16 @@ object ArrayUnion { since = "2.4.0") case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBinaryLike with ComplexTypeMergingExpression { - override def dataType: DataType = { + + private lazy val internalDataType: DataType = { dataTypeCheck ArrayType(elementType, left.dataType.asInstanceOf[ArrayType].containsNull && right.dataType.asInstanceOf[ArrayType].containsNull) } + override def dataType: DataType = internalDataType + @transient lazy val evalIntersect: (ArrayData, ArrayData) => ArrayData = { if (TypeUtils.typeWithProperEquals(elementType)) { (array1, array2) => @@ -3747,11 +3752,13 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryLike with ComplexTypeMergingExpression { - override def dataType: DataType = { + private lazy val internalDataType: DataType = { dataTypeCheck left.dataType } + override def dataType: DataType = internalDataType + @transient lazy val evalExcept: (ArrayData, ArrayData) => ArrayData = { if (TypeUtils.typeWithProperEquals(elementType)) { (array1, array2) => From a09747bf326677e212fbc284285cce822571c315 Mon Sep 17 00:00:00 2001 From: gschiavon Date: Mon, 5 Oct 2020 09:02:06 -0700 Subject: [PATCH 0165/1009] [SPARK-33063][K8S] Improve error message for insufficient K8s volume confs ### What changes were proposed in this pull request? Provide error handling when creating kubernetes volumes. Right now they keys are expected to be there and if not it fails with a `key not found` error, but not knowing why do you need that `key`. Also I renamed some tests that didn't indicate the kind of kubernetes volume ### Why are the changes needed? Easier for the users to understand why `spark-submit` command is failing if not providing they right kubernetes volumes properties. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? It was tested with the current tests plus added one more. [Jira ticket](https://issues.apache.org/jira/browse/SPARK-33063) Closes #29941 from Gschiavon/SPARK-33063-provide-error-handling-k8s-volumes. Authored-by: gschiavon Signed-off-by: Dongjoon Hyun --- .../spark/deploy/k8s/KubernetesVolumeUtils.scala | 10 ++++++++++ .../spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala index 77921f6338c74..b2eacca042794 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala @@ -67,6 +67,7 @@ private[spark] object KubernetesVolumeUtils { volumeType match { case KUBERNETES_VOLUMES_HOSTPATH_TYPE => val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY" + verifyOptionKey(options, pathKey, KUBERNETES_VOLUMES_HOSTPATH_TYPE) KubernetesHostPathVolumeConf(options(pathKey)) case KUBERNETES_VOLUMES_PVC_TYPE => @@ -74,6 +75,7 @@ private[spark] object KubernetesVolumeUtils { val storageClassKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY" val sizeLimitKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY" + verifyOptionKey(options, claimNameKey, KUBERNETES_VOLUMES_PVC_TYPE) KubernetesPVCVolumeConf( options(claimNameKey), options.get(storageClassKey), @@ -87,6 +89,8 @@ private[spark] object KubernetesVolumeUtils { case KUBERNETES_VOLUMES_NFS_TYPE => val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY" val serverKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SERVER_KEY" + verifyOptionKey(options, pathKey, KUBERNETES_VOLUMES_NFS_TYPE) + verifyOptionKey(options, serverKey, KUBERNETES_VOLUMES_NFS_TYPE) KubernetesNFSVolumeConf( options(pathKey), options(serverKey)) @@ -95,4 +99,10 @@ private[spark] object KubernetesVolumeUtils { throw new IllegalArgumentException(s"Kubernetes Volume type `$volumeType` is not supported") } } + + private def verifyOptionKey(options: Map[String, String], key: String, msg: String): Unit = { + if (!options.isDefinedAt(key)) { + throw new NoSuchElementException(key + s" is required for $msg") + } + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala index 6596c5e2ad2e7..349cbd04f6027 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala @@ -118,6 +118,17 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite { assert(e.getMessage.contains("hostPath.volumeName.options.path")) } + test("SPARK-33063: Fails on missing option key in persistentVolumeClaim") { + val sparkConf = new SparkConf(false) + sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path") + sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true") + + val e = intercept[NoSuchElementException] { + KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.") + } + assert(e.getMessage.contains("persistentVolumeClaim.volumeName.options.claimName")) + } + test("Parses read-only nfs volumes correctly") { val sparkConf = new SparkConf(false) sparkConf.set("test.nfs.volumeName.mount.path", "/path") From 14aeab3b279b1c23cddb86b97afc048c195b9b75 Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Mon, 5 Oct 2020 09:30:27 -0700 Subject: [PATCH 0166/1009] [SPARK-33038][SQL] Combine AQE initial and current plan string when two plans are the same ### What changes were proposed in this pull request? This PR combines the current plan and the initial plan in the AQE query plan string when the two plans are the same. It also removes the `== Current Plan ==` and `== Initial Plan ==` headers: Before ```scala AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == SortMergeJoin [key#13], [a#23], Inner :- Sort [key#13 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(key#13, 5), true, [id=#94] ... +- == Initial Plan == SortMergeJoin [key#13], [a#23], Inner :- Sort [key#13 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(key#13, 5), true, [id=#94] ... ``` After ```scala AdaptiveSparkPlan isFinalPlan=false +- SortMergeJoin [key#13], [a#23], Inner :- Sort [key#13 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(key#13, 5), true, [id=#94] ... ``` For SQL `EXPLAIN` output: Before ```scala AdaptiveSparkPlan (8) +- == Current Plan == Sort (7) +- Exchange (6) ... +- == Initial Plan == Sort (7) +- Exchange (6) ... ``` After ```scala AdaptiveSparkPlan (8) +- Sort (7) +- Exchange (6) ... ``` ### Why are the changes needed? To simplify the AQE plan string by removing the redundant plan information. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Modified the existing unit test. Closes #29915 from allisonwang-db/aqe-explain. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Xiao Li --- .../adaptive/AdaptiveSparkPlanExec.scala | 50 ++++--- .../sql-tests/results/explain-aqe.sql.out | 123 ++---------------- .../adaptive/AdaptiveQueryExecSuite.scala | 4 +- 3 files changed, 47 insertions(+), 130 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 6c197fedd8c56..0e032569bb8a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -300,26 +300,40 @@ case class AdaptiveSparkPlanExec( maxFields, printNodeId, indent) - generateTreeStringWithHeader( - if (isFinalPlan) "Final Plan" else "Current Plan", - currentPhysicalPlan, - depth, - lastChildren, - append, - verbose, - maxFields, - printNodeId) - generateTreeStringWithHeader( - "Initial Plan", - initialPlan, - depth, - lastChildren, - append, - verbose, - maxFields, - printNodeId) + if (currentPhysicalPlan.fastEquals(initialPlan)) { + currentPhysicalPlan.generateTreeString( + depth + 1, + lastChildren :+ true, + append, + verbose, + prefix = "", + addSuffix = false, + maxFields, + printNodeId, + indent) + } else { + generateTreeStringWithHeader( + if (isFinalPlan) "Final Plan" else "Current Plan", + currentPhysicalPlan, + depth, + lastChildren, + append, + verbose, + maxFields, + printNodeId) + generateTreeStringWithHeader( + "Initial Plan", + initialPlan, + depth, + lastChildren, + append, + verbose, + maxFields, + printNodeId) + } } + private def generateTreeStringWithHeader( header: String, plan: SparkPlan, diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 3a850160b43e0..5435cde050fd1 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -54,16 +54,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (8) -+- == Current Plan == - Sort (7) - +- Exchange (6) - +- HashAggregate (5) - +- Exchange (4) - +- HashAggregate (3) - +- Filter (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - Sort (7) ++- Sort (7) +- Exchange (6) +- HashAggregate (5) +- Exchange (4) @@ -126,16 +117,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (8) -+- == Current Plan == - Project (7) - +- Filter (6) - +- HashAggregate (5) - +- Exchange (4) - +- HashAggregate (3) - +- Filter (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - Project (7) ++- Project (7) +- Filter (6) +- HashAggregate (5) +- Exchange (4) @@ -196,17 +178,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (9) -+- == Current Plan == - HashAggregate (8) - +- Exchange (7) - +- HashAggregate (6) - +- Union (5) - :- Filter (2) - : +- Scan parquet default.explain_temp1 (1) - +- Filter (4) - +- Scan parquet default.explain_temp1 (3) -+- == Initial Plan == - HashAggregate (8) ++- HashAggregate (8) +- Exchange (7) +- HashAggregate (6) +- Union (5) @@ -274,15 +246,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (7) -+- == Current Plan == - BroadcastHashJoin Inner BuildRight (6) - :- Filter (2) - : +- Scan parquet default.explain_temp1 (1) - +- BroadcastExchange (5) - +- Filter (4) - +- Scan parquet default.explain_temp2 (3) -+- == Initial Plan == - BroadcastHashJoin Inner BuildRight (6) ++- BroadcastHashJoin Inner BuildRight (6) :- Filter (2) : +- Scan parquet default.explain_temp1 (1) +- BroadcastExchange (5) @@ -337,14 +301,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (6) -+- == Current Plan == - BroadcastHashJoin LeftOuter BuildRight (5) - :- Scan parquet default.explain_temp1 (1) - +- BroadcastExchange (4) - +- Filter (3) - +- Scan parquet default.explain_temp2 (2) -+- == Initial Plan == - BroadcastHashJoin LeftOuter BuildRight (5) ++- BroadcastHashJoin LeftOuter BuildRight (5) :- Scan parquet default.explain_temp1 (1) +- BroadcastExchange (4) +- Filter (3) @@ -398,11 +355,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (3) -+- == Current Plan == - Filter (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - Filter (2) ++- Filter (2) +- Scan parquet default.explain_temp1 (1) @@ -438,11 +391,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (3) -+- == Current Plan == - Filter (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - Filter (2) ++- Filter (2) +- Scan parquet default.explain_temp1 (1) @@ -470,11 +419,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (3) -+- == Current Plan == - Project (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - Project (2) ++- Project (2) +- Scan parquet default.explain_temp1 (1) @@ -506,15 +451,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (7) -+- == Current Plan == - BroadcastHashJoin Inner BuildRight (6) - :- Filter (2) - : +- Scan parquet default.explain_temp1 (1) - +- BroadcastExchange (5) - +- Filter (4) - +- Scan parquet default.explain_temp1 (3) -+- == Initial Plan == - BroadcastHashJoin Inner BuildRight (6) ++- BroadcastHashJoin Inner BuildRight (6) :- Filter (2) : +- Scan parquet default.explain_temp1 (1) +- BroadcastExchange (5) @@ -572,21 +509,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (13) -+- == Current Plan == - BroadcastHashJoin Inner BuildRight (12) - :- HashAggregate (5) - : +- Exchange (4) - : +- HashAggregate (3) - : +- Filter (2) - : +- Scan parquet default.explain_temp1 (1) - +- BroadcastExchange (11) - +- HashAggregate (10) - +- Exchange (9) - +- HashAggregate (8) - +- Filter (7) - +- Scan parquet default.explain_temp1 (6) -+- == Initial Plan == - BroadcastHashJoin Inner BuildRight (12) ++- BroadcastHashJoin Inner BuildRight (12) :- HashAggregate (5) : +- Exchange (4) : +- HashAggregate (3) @@ -710,13 +633,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (5) -+- == Current Plan == - HashAggregate (4) - +- Exchange (3) - +- HashAggregate (2) - +- Scan parquet default.explain_temp1 (1) -+- == Initial Plan == - HashAggregate (4) ++- HashAggregate (4) +- Exchange (3) +- HashAggregate (2) +- Scan parquet default.explain_temp1 (1) @@ -761,13 +678,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (5) -+- == Current Plan == - ObjectHashAggregate (4) - +- Exchange (3) - +- ObjectHashAggregate (2) - +- Scan parquet default.explain_temp4 (1) -+- == Initial Plan == - ObjectHashAggregate (4) ++- ObjectHashAggregate (4) +- Exchange (3) +- ObjectHashAggregate (2) +- Scan parquet default.explain_temp4 (1) @@ -812,15 +723,7 @@ struct -- !query output == Physical Plan == AdaptiveSparkPlan (7) -+- == Current Plan == - SortAggregate (6) - +- Sort (5) - +- Exchange (4) - +- SortAggregate (3) - +- Sort (2) - +- Scan parquet default.explain_temp4 (1) -+- == Initial Plan == - SortAggregate (6) ++- SortAggregate (6) +- Sort (5) +- Exchange (4) +- SortAggregate (3) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 8799dbb14ef34..0dfb1d2fd9eda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -842,8 +842,8 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { val df = sql("SELECT * FROM testData join testData2 ON key = a where value = '1'") val planBefore = df.queryExecution.executedPlan - assert(planBefore.toString.contains("== Current Plan ==")) - assert(planBefore.toString.contains("== Initial Plan ==")) + assert(!planBefore.toString.contains("== Current Plan ==")) + assert(!planBefore.toString.contains("== Initial Plan ==")) df.collect() val planAfter = df.queryExecution.executedPlan assert(planAfter.toString.contains("== Final Plan ==")) From 008a2ad1f836ff04fafd51a9c94c355ef35f1692 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 5 Oct 2020 15:29:56 -0700 Subject: [PATCH 0167/1009] [SPARK-20202][BUILD][SQL] Remove references to org.spark-project.hive (Hive 1.2.1) ### What changes were proposed in this pull request? As of today, - SPARK-30034 Apache Spark 3.0.0 switched its default Hive execution engine from Hive 1.2 to Hive 2.3. This removes the direct dependency to the forked Hive 1.2.1 in maven repository. - SPARK-32981 Apache Spark 3.1.0(`master` branch) removed Hive 1.2 related artifacts from Apache Spark binary distributions. This PR(SPARK-20202) aims to remove the following usage of unofficial Apache Hive fork completely from Apache Spark master for Apache Spark 3.1.0. ``` org.spark-project.hive 1.2.1.spark2 ``` For the forked Hive 1.2.1.spark2 users, Apache Spark 2.4(LTS) and 3.0 (~ 2021.12) will provide it. ### Why are the changes needed? - First, Apache Spark community should not use the unofficial forked release of another Apache project. - Second, Apache Hive 1.2.1 was released at 2015-06-26 and the forked Hive `1.2.1.spark2` exposed many unfixable bugs in Apache because the forked `1.2.1.spark2` is not maintained at all. Apache Hive 2.3.0 was released at 2017-07-19 and it has been used with less number of bugs compared with `1.2.1.spark2`. Many bugs still exist in `hive-1.2` profile and new Apache Spark unit tests are added with `HiveUtils.isHive23` condition so far. ### Does this PR introduce _any_ user-facing change? No. This is a dev-only change. PRBuilder will not accept `[test-hive1.2]` on master and `branch-3.1`. ### How was this patch tested? 1. SBT/Hadoop 3.2/Hive 2.3 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129366) 2. SBT/Hadoop 2.7/Hive 2.3 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129382) 3. SBT/Hadoop 3.2/Hive 1.2 (This has not been supported already due to Hive 1.2 doesn't work with Hadoop 3.2.) 4. SBT/Hadoop 2.7/Hive 1.2 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129383, This is rejected) Closes #29936 from dongjoon-hyun/SPARK-REMOVE-HIVE1. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/run-tests.py | 1 - dev/test-dependencies.sh | 6 +- docs/sql-migration-guide.md | 2 + pom.xml | 25 - sql/core/pom.xml | 3 - .../datasources/orc/OrcColumnVector.java | 0 .../datasources/orc/OrcFilters.scala | 0 .../datasources/orc/OrcShimUtils.scala | 0 .../datasources/orc/OrcFilterSuite.scala | 0 .../datasources/orc/OrcColumnVector.java | 208 - .../datasources/orc/DaysWritable.scala | 79 - .../datasources/orc/OrcFilters.scala | 275 - .../datasources/orc/OrcShimUtils.scala | 66 - .../datasources/orc/OrcFilterSuite.scala | 676 - .../{v2.3 => }/if/TCLIService.thrift | 0 sql/hive-thriftserver/pom.xml | 4 +- .../service/rpc/thrift/TArrayTypeEntry.java | 0 .../service/rpc/thrift/TBinaryColumn.java | 0 .../hive/service/rpc/thrift/TBoolColumn.java | 0 .../hive/service/rpc/thrift/TBoolValue.java | 0 .../hive/service/rpc/thrift/TByteColumn.java | 0 .../hive/service/rpc/thrift/TByteValue.java | 0 .../hive/service/rpc/thrift/TCLIService.java | 0 .../rpc/thrift/TCLIServiceConstants.java | 0 .../rpc/thrift/TCancelDelegationTokenReq.java | 0 .../thrift/TCancelDelegationTokenResp.java | 0 .../rpc/thrift/TCancelOperationReq.java | 0 .../rpc/thrift/TCancelOperationResp.java | 0 .../rpc/thrift/TCloseOperationReq.java | 0 .../rpc/thrift/TCloseOperationResp.java | 0 .../service/rpc/thrift/TCloseSessionReq.java | 0 .../service/rpc/thrift/TCloseSessionResp.java | 0 .../hive/service/rpc/thrift/TColumn.java | 0 .../hive/service/rpc/thrift/TColumnDesc.java | 0 .../hive/service/rpc/thrift/TColumnValue.java | 0 .../service/rpc/thrift/TDoubleColumn.java | 0 .../hive/service/rpc/thrift/TDoubleValue.java | 0 .../rpc/thrift/TExecuteStatementReq.java | 0 .../rpc/thrift/TExecuteStatementResp.java | 0 .../service/rpc/thrift/TFetchOrientation.java | 0 .../service/rpc/thrift/TFetchResultsReq.java | 0 .../service/rpc/thrift/TFetchResultsResp.java | 0 .../service/rpc/thrift/TGetCatalogsReq.java | 0 .../service/rpc/thrift/TGetCatalogsResp.java | 0 .../service/rpc/thrift/TGetColumnsReq.java | 0 .../service/rpc/thrift/TGetColumnsResp.java | 0 .../rpc/thrift/TGetCrossReferenceReq.java | 0 .../rpc/thrift/TGetCrossReferenceResp.java | 0 .../rpc/thrift/TGetDelegationTokenReq.java | 0 .../rpc/thrift/TGetDelegationTokenResp.java | 0 .../service/rpc/thrift/TGetFunctionsReq.java | 0 .../service/rpc/thrift/TGetFunctionsResp.java | 0 .../hive/service/rpc/thrift/TGetInfoReq.java | 0 .../hive/service/rpc/thrift/TGetInfoResp.java | 0 .../hive/service/rpc/thrift/TGetInfoType.java | 0 .../service/rpc/thrift/TGetInfoValue.java | 0 .../rpc/thrift/TGetOperationStatusReq.java | 0 .../rpc/thrift/TGetOperationStatusResp.java | 0 .../rpc/thrift/TGetPrimaryKeysReq.java | 0 .../rpc/thrift/TGetPrimaryKeysResp.java | 0 .../rpc/thrift/TGetResultSetMetadataReq.java | 0 .../rpc/thrift/TGetResultSetMetadataResp.java | 0 .../service/rpc/thrift/TGetSchemasReq.java | 0 .../service/rpc/thrift/TGetSchemasResp.java | 0 .../service/rpc/thrift/TGetTableTypesReq.java | 0 .../rpc/thrift/TGetTableTypesResp.java | 0 .../service/rpc/thrift/TGetTablesReq.java | 0 .../service/rpc/thrift/TGetTablesResp.java | 0 .../service/rpc/thrift/TGetTypeInfoReq.java | 0 .../service/rpc/thrift/TGetTypeInfoResp.java | 0 .../service/rpc/thrift/THandleIdentifier.java | 0 .../hive/service/rpc/thrift/TI16Column.java | 0 .../hive/service/rpc/thrift/TI16Value.java | 0 .../hive/service/rpc/thrift/TI32Column.java | 0 .../hive/service/rpc/thrift/TI32Value.java | 0 .../hive/service/rpc/thrift/TI64Column.java | 0 .../hive/service/rpc/thrift/TI64Value.java | 0 .../rpc/thrift/TJobExecutionStatus.java | 0 .../service/rpc/thrift/TMapTypeEntry.java | 0 .../service/rpc/thrift/TOpenSessionReq.java | 0 .../service/rpc/thrift/TOpenSessionResp.java | 0 .../service/rpc/thrift/TOperationHandle.java | 0 .../service/rpc/thrift/TOperationState.java | 0 .../service/rpc/thrift/TOperationType.java | 0 .../rpc/thrift/TPrimitiveTypeEntry.java | 0 .../rpc/thrift/TProgressUpdateResp.java | 0 .../service/rpc/thrift/TProtocolVersion.java | 0 .../rpc/thrift/TRenewDelegationTokenReq.java | 0 .../rpc/thrift/TRenewDelegationTokenResp.java | 0 .../apache/hive/service/rpc/thrift/TRow.java | 0 .../hive/service/rpc/thrift/TRowSet.java | 0 .../service/rpc/thrift/TSessionHandle.java | 0 .../hive/service/rpc/thrift/TStatus.java | 0 .../hive/service/rpc/thrift/TStatusCode.java | 0 .../service/rpc/thrift/TStringColumn.java | 0 .../hive/service/rpc/thrift/TStringValue.java | 0 .../service/rpc/thrift/TStructTypeEntry.java | 0 .../hive/service/rpc/thrift/TTableSchema.java | 0 .../hive/service/rpc/thrift/TTypeDesc.java | 0 .../hive/service/rpc/thrift/TTypeEntry.java | 0 .../hive/service/rpc/thrift/TTypeId.java | 0 .../rpc/thrift/TTypeQualifierValue.java | 0 .../service/rpc/thrift/TTypeQualifiers.java | 0 .../service/rpc/thrift/TUnionTypeEntry.java | 0 .../rpc/thrift/TUserDefinedTypeEntry.java | 0 .../apache/hive/service/AbstractService.java | 0 .../apache/hive/service/CompositeService.java | 0 .../org/apache/hive/service/CookieSigner.java | 0 .../hive/service/ServiceOperations.java | 0 .../org/apache/hive/service/ServiceUtils.java | 0 .../hive/service/auth/HiveAuthFactory.java | 0 .../hive/service/auth/HttpAuthUtils.java | 0 .../hive/service/auth/KerberosSaslHelper.java | 0 .../hive/service/auth/PlainSaslHelper.java | 0 .../service/auth/TSetIpAddressProcessor.java | 0 .../apache/hive/service/cli/CLIService.java | 0 .../hive/service/cli/ColumnBasedSet.java | 0 .../hive/service/cli/ColumnDescriptor.java | 0 .../apache/hive/service/cli/ColumnValue.java | 0 .../hive/service/cli/FetchOrientation.java | 0 .../apache/hive/service/cli/GetInfoType.java | 0 .../apache/hive/service/cli/GetInfoValue.java | 0 .../org/apache/hive/service/cli/Handle.java | 0 .../hive/service/cli/HandleIdentifier.java | 0 .../hive/service/cli/HiveSQLException.java | 0 .../apache/hive/service/cli/ICLIService.java | 0 .../hive/service/cli/OperationHandle.java | 0 .../hive/service/cli/OperationState.java | 0 .../hive/service/cli/OperationType.java | 0 .../apache/hive/service/cli/RowBasedSet.java | 0 .../org/apache/hive/service/cli/RowSet.java | 0 .../hive/service/cli/RowSetFactory.java | 0 .../hive/service/cli/SessionHandle.java | 0 .../apache/hive/service/cli/TableSchema.java | 0 .../hive/service/cli/TypeDescriptor.java | 0 .../hive/service/cli/TypeQualifiers.java | 0 .../operation/ClassicTableTypeMapping.java | 0 .../operation/ExecuteStatementOperation.java | 0 .../cli/operation/GetCatalogsOperation.java | 0 .../cli/operation/GetColumnsOperation.java | 0 .../operation/GetCrossReferenceOperation.java | 0 .../cli/operation/GetFunctionsOperation.java | 0 .../operation/GetPrimaryKeysOperation.java | 0 .../cli/operation/GetSchemasOperation.java | 0 .../cli/operation/GetTableTypesOperation.java | 0 .../cli/operation/GetTablesOperation.java | 0 .../cli/operation/GetTypeInfoOperation.java | 0 .../cli/operation/HiveCommandOperation.java | 0 .../cli/operation/HiveTableTypeMapping.java | 0 .../cli/operation/MetadataOperation.java | 0 .../hive/service/cli/operation/Operation.java | 0 .../cli/operation/OperationManager.java | 0 .../service/cli/operation/SQLOperation.java | 0 .../cli/operation/TableTypeMapping.java | 0 .../hive/service/cli/session/HiveSession.java | 0 .../service/cli/session/HiveSessionBase.java | 0 .../cli/session/HiveSessionHookContext.java | 0 .../session/HiveSessionHookContextImpl.java | 0 .../service/cli/session/HiveSessionImpl.java | 0 .../cli/session/HiveSessionImplwithUGI.java | 0 .../service/cli/session/SessionManager.java | 0 .../cli/thrift/ThriftBinaryCLIService.java | 0 .../service/cli/thrift/ThriftCLIService.java | 0 .../cli/thrift/ThriftCLIServiceClient.java | 0 .../cli/thrift/ThriftHttpCLIService.java | 0 .../service/cli/thrift/ThriftHttpServlet.java | 0 .../hive/service/server/HiveServer2.java | 0 .../server/ThreadWithGarbageCleanup.java | 0 .../thriftserver/ThriftserverShimUtils.scala | 0 .../v1.2/if/TCLIService.thrift | 1173 -- .../service/cli/thrift/TArrayTypeEntry.java | 383 - .../service/cli/thrift/TBinaryColumn.java | 550 - .../hive/service/cli/thrift/TBoolColumn.java | 548 - .../hive/service/cli/thrift/TBoolValue.java | 386 - .../hive/service/cli/thrift/TByteColumn.java | 548 - .../hive/service/cli/thrift/TByteValue.java | 386 - .../hive/service/cli/thrift/TCLIService.java | 15414 ---------------- .../cli/thrift/TCLIServiceConstants.java | 103 - .../cli/thrift/TCancelDelegationTokenReq.java | 491 - .../thrift/TCancelDelegationTokenResp.java | 390 - .../cli/thrift/TCancelOperationReq.java | 390 - .../cli/thrift/TCancelOperationResp.java | 390 - .../cli/thrift/TCloseOperationReq.java | 390 - .../cli/thrift/TCloseOperationResp.java | 390 - .../service/cli/thrift/TCloseSessionReq.java | 390 - .../service/cli/thrift/TCloseSessionResp.java | 390 - .../hive/service/cli/thrift/TColumn.java | 732 - .../hive/service/cli/thrift/TColumnDesc.java | 700 - .../hive/service/cli/thrift/TColumnValue.java | 671 - .../service/cli/thrift/TDoubleColumn.java | 548 - .../hive/service/cli/thrift/TDoubleValue.java | 386 - .../cli/thrift/TExecuteStatementReq.java | 769 - .../cli/thrift/TExecuteStatementResp.java | 505 - .../service/cli/thrift/TFetchOrientation.java | 57 - .../service/cli/thrift/TFetchResultsReq.java | 710 - .../service/cli/thrift/TFetchResultsResp.java | 608 - .../service/cli/thrift/TGetCatalogsReq.java | 390 - .../service/cli/thrift/TGetCatalogsResp.java | 505 - .../service/cli/thrift/TGetColumnsReq.java | 818 - .../service/cli/thrift/TGetColumnsResp.java | 505 - .../cli/thrift/TGetDelegationTokenReq.java | 592 - .../cli/thrift/TGetDelegationTokenResp.java | 500 - .../service/cli/thrift/TGetFunctionsReq.java | 707 - .../service/cli/thrift/TGetFunctionsResp.java | 505 - .../hive/service/cli/thrift/TGetInfoReq.java | 503 - .../hive/service/cli/thrift/TGetInfoResp.java | 493 - .../hive/service/cli/thrift/TGetInfoType.java | 180 - .../service/cli/thrift/TGetInfoValue.java | 593 - .../cli/thrift/TGetOperationStatusReq.java | 390 - .../cli/thrift/TGetOperationStatusResp.java | 827 - .../cli/thrift/TGetResultSetMetadataReq.java | 390 - .../cli/thrift/TGetResultSetMetadataResp.java | 505 - .../service/cli/thrift/TGetSchemasReq.java | 606 - .../service/cli/thrift/TGetSchemasResp.java | 505 - .../service/cli/thrift/TGetTableTypesReq.java | 390 - .../cli/thrift/TGetTableTypesResp.java | 505 - .../service/cli/thrift/TGetTablesReq.java | 870 - .../service/cli/thrift/TGetTablesResp.java | 505 - .../service/cli/thrift/TGetTypeInfoReq.java | 390 - .../service/cli/thrift/TGetTypeInfoResp.java | 505 - .../service/cli/thrift/THandleIdentifier.java | 506 - .../hive/service/cli/thrift/TI16Column.java | 548 - .../hive/service/cli/thrift/TI16Value.java | 386 - .../hive/service/cli/thrift/TI32Column.java | 548 - .../hive/service/cli/thrift/TI32Value.java | 386 - .../hive/service/cli/thrift/TI64Column.java | 548 - .../hive/service/cli/thrift/TI64Value.java | 386 - .../service/cli/thrift/TMapTypeEntry.java | 478 - .../service/cli/thrift/TOpenSessionReq.java | 785 - .../service/cli/thrift/TOpenSessionResp.java | 790 - .../service/cli/thrift/TOperationHandle.java | 705 - .../service/cli/thrift/TOperationState.java | 63 - .../service/cli/thrift/TOperationType.java | 66 - .../cli/thrift/TPrimitiveTypeEntry.java | 512 - .../service/cli/thrift/TProtocolVersion.java | 63 - .../cli/thrift/TRenewDelegationTokenReq.java | 491 - .../cli/thrift/TRenewDelegationTokenResp.java | 390 - .../apache/hive/service/cli/thrift/TRow.java | 439 - .../hive/service/cli/thrift/TRowSet.java | 702 - .../service/cli/thrift/TSessionHandle.java | 390 - .../hive/service/cli/thrift/TStatus.java | 874 - .../hive/service/cli/thrift/TStatusCode.java | 54 - .../service/cli/thrift/TStringColumn.java | 548 - .../hive/service/cli/thrift/TStringValue.java | 389 - .../service/cli/thrift/TStructTypeEntry.java | 448 - .../hive/service/cli/thrift/TTableSchema.java | 439 - .../hive/service/cli/thrift/TTypeDesc.java | 439 - .../hive/service/cli/thrift/TTypeEntry.java | 610 - .../hive/service/cli/thrift/TTypeId.java | 105 - .../cli/thrift/TTypeQualifierValue.java | 361 - .../service/cli/thrift/TTypeQualifiers.java | 450 - .../service/cli/thrift/TUnionTypeEntry.java | 448 - .../cli/thrift/TUserDefinedTypeEntry.java | 385 - .../apache/hive/service/AbstractService.java | 184 - .../apache/hive/service/CompositeService.java | 133 - .../org/apache/hive/service/CookieSigner.java | 108 - .../hive/service/ServiceOperations.java | 141 - .../org/apache/hive/service/ServiceUtils.java | 44 - .../hive/service/auth/HiveAuthFactory.java | 419 - .../hive/service/auth/HttpAuthUtils.java | 189 - .../hive/service/auth/KerberosSaslHelper.java | 111 - .../hive/service/auth/PlainSaslHelper.java | 154 - .../service/auth/TSetIpAddressProcessor.java | 114 - .../apache/hive/service/cli/CLIService.java | 507 - .../org/apache/hive/service/cli/Column.java | 423 - .../hive/service/cli/ColumnBasedSet.java | 149 - .../hive/service/cli/ColumnDescriptor.java | 99 - .../apache/hive/service/cli/ColumnValue.java | 288 - .../service/cli/EmbeddedCLIServiceClient.java | 208 - .../hive/service/cli/FetchOrientation.java | 54 - .../apache/hive/service/cli/GetInfoType.java | 96 - .../apache/hive/service/cli/GetInfoValue.java | 82 - .../org/apache/hive/service/cli/Handle.java | 78 - .../hive/service/cli/HandleIdentifier.java | 113 - .../hive/service/cli/HiveSQLException.java | 249 - .../apache/hive/service/cli/ICLIService.java | 105 - .../hive/service/cli/OperationHandle.java | 102 - .../hive/service/cli/OperationState.java | 108 - .../hive/service/cli/OperationType.java | 58 - .../hive/service/cli/PatternOrIdentifier.java | 47 - .../apache/hive/service/cli/RowBasedSet.java | 140 - .../org/apache/hive/service/cli/RowSet.java | 38 - .../hive/service/cli/RowSetFactory.java | 41 - .../hive/service/cli/SessionHandle.java | 67 - .../apache/hive/service/cli/TableSchema.java | 102 - .../org/apache/hive/service/cli/Type.java | 349 - .../hive/service/cli/TypeDescriptor.java | 159 - .../hive/service/cli/TypeQualifiers.java | 133 - .../operation/ClassicTableTypeMapping.java | 86 - .../operation/ExecuteStatementOperation.java | 83 - .../cli/operation/GetCatalogsOperation.java | 81 - .../cli/operation/GetColumnsOperation.java | 234 - .../cli/operation/GetFunctionsOperation.java | 147 - .../cli/operation/GetSchemasOperation.java | 96 - .../cli/operation/GetTableTypesOperation.java | 93 - .../cli/operation/GetTablesOperation.java | 135 - .../cli/operation/GetTypeInfoOperation.java | 142 - .../cli/operation/HiveCommandOperation.java | 215 - .../cli/operation/HiveTableTypeMapping.java | 51 - .../cli/operation/MetadataOperation.java | 134 - .../hive/service/cli/operation/Operation.java | 328 - .../cli/operation/OperationManager.java | 284 - .../service/cli/operation/SQLOperation.java | 456 - .../cli/operation/TableTypeMapping.java | 44 - .../hive/service/cli/session/HiveSession.java | 156 - .../service/cli/session/HiveSessionBase.java | 90 - .../service/cli/session/HiveSessionImpl.java | 842 - .../cli/session/HiveSessionImplwithUGI.java | 182 - .../service/cli/session/SessionManager.java | 377 - .../cli/thrift/ThriftBinaryCLIService.java | 121 - .../service/cli/thrift/ThriftCLIService.java | 693 - .../cli/thrift/ThriftCLIServiceClient.java | 440 - .../cli/thrift/ThriftHttpCLIService.java | 194 - .../service/cli/thrift/ThriftHttpServlet.java | 545 - .../hive/service/server/HiveServer2.java | 277 - .../server/ThreadWithGarbageCleanup.java | 77 - .../thriftserver/ThriftserverShimUtils.scala | 77 - ...IntoHiveTableBenchmark-hive1.2-results.txt | 11 - .../sql/hive/client/HiveClientImpl.scala | 3 +- .../InsertIntoHiveTableBenchmark.scala | 7 +- 320 files changed, 7 insertions(+), 69240 deletions(-) rename sql/core/{v2.3 => }/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java (100%) rename sql/core/{v2.3 => }/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala (100%) rename sql/core/{v2.3 => }/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala (100%) rename sql/core/{v2.3 => }/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala (100%) delete mode 100644 sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala delete mode 100644 sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala rename sql/hive-thriftserver/{v2.3 => }/if/TCLIService.thrift (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/AbstractService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/CompositeService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/CookieSigner.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/ServiceOperations.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/ServiceUtils.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/CLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/FetchOrientation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/GetInfoType.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/GetInfoValue.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/Handle.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/HiveSQLException.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ICLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationHandle.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationState.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationType.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowBasedSet.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowSet.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowSetFactory.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/SessionHandle.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TableSchema.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/Operation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSession.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/SessionManager.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/server/HiveServer2.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java (100%) rename sql/hive-thriftserver/{v2.3 => }/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala (100%) delete mode 100644 sql/hive-thriftserver/v1.2/if/TCLIService.thrift delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java delete mode 100644 sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala delete mode 100644 sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt diff --git a/dev/run-tests.py b/dev/run-tests.py index 3e118dcbc160d..48191e9bb024d 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -325,7 +325,6 @@ def get_hive_profiles(hive_version): """ sbt_maven_hive_profiles = { - "hive1.2": ["-Phive-1.2"], "hive2.3": ["-Phive-2.3"], } diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh index 129b073d75254..e9e9227d239e1 100755 --- a/dev/test-dependencies.sh +++ b/dev/test-dependencies.sh @@ -32,7 +32,6 @@ export LC_ALL=C HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive" MVN="build/mvn" HADOOP_HIVE_PROFILES=( - hadoop-2.7-hive-1.2 hadoop-2.7-hive-2.3 hadoop-3.2-hive-2.3 ) @@ -71,12 +70,9 @@ for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do if [[ $HADOOP_HIVE_PROFILE == **hadoop-3.2-hive-2.3** ]]; then HADOOP_PROFILE=hadoop-3.2 HIVE_PROFILE=hive-2.3 - elif [[ $HADOOP_HIVE_PROFILE == **hadoop-2.7-hive-2.3** ]]; then - HADOOP_PROFILE=hadoop-2.7 - HIVE_PROFILE=hive-2.3 else HADOOP_PROFILE=hadoop-2.7 - HIVE_PROFILE=hive-1.2 + HIVE_PROFILE=hive-2.3 fi echo "Performing Maven install for $HADOOP_HIVE_PROFILE" $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE -P$HIVE_PROFILE jar:jar jar:test-jar install:install clean -q diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index de60aed7483c7..feff2c7e9f543 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -42,6 +42,8 @@ license: | - In Spark 3.1, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0, they result `NULL`s. + - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details. + ## Upgrading from Spark SQL 3.0 to 3.0.1 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. diff --git a/pom.xml b/pom.xml index 5d6b0511ce458..b13d5ab81856c 100644 --- a/pom.xml +++ b/pom.xml @@ -2970,13 +2970,9 @@ ${basedir}/src/main/java ${basedir}/src/main/scala - ${basedir}/v${hive.version.short}/src/main/java - ${basedir}/v${hive.version.short}/src/main/scala ${basedir}/src/test/java - ${basedir}/v${hive.version.short}/src/test/java - ${basedir}/v${hive.version.short}/src/test/scala dev/checkstyle.xml ${basedir}/target/checkstyle-output.xml @@ -3148,27 +3144,6 @@ - - hive-1.2 - - org.spark-project.hive - - - 1.2.1.spark2 - - 1.2 - ${hive.deps.scope} - 2.6.0 - provided - provided - provided - provided - provided - nohive - 3.2.10 - - - hive-2.3 diff --git a/sql/core/pom.xml b/sql/core/pom.xml index c2ed4c079d3cf..0f5d3fd55c15d 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -221,8 +221,6 @@ - v${hive.version.short}/src/main/scala - v${hive.version.short}/src/main/java src/main/scala-${scala.binary.version} @@ -235,7 +233,6 @@ - v${hive.version.short}/src/test/scala src/test/gen-java diff --git a/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java similarity index 100% rename from sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java rename to sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala similarity index 100% rename from sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala similarity index 100% rename from sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala diff --git a/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala similarity index 100% rename from sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala diff --git a/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java deleted file mode 100644 index 6601bcb9018f4..0000000000000 --- a/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc; - -import java.math.BigDecimal; - -import org.apache.orc.storage.ql.exec.vector.*; - -import org.apache.spark.sql.catalyst.util.DateTimeUtils; -import org.apache.spark.sql.catalyst.util.RebaseDateTime; -import org.apache.spark.sql.types.DataType; -import org.apache.spark.sql.types.DateType; -import org.apache.spark.sql.types.Decimal; -import org.apache.spark.sql.types.TimestampType; -import org.apache.spark.sql.vectorized.ColumnarArray; -import org.apache.spark.sql.vectorized.ColumnarMap; -import org.apache.spark.unsafe.types.UTF8String; - -/** - * A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts - * Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with - * Spark ColumnarVector. - */ -public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVector { - private ColumnVector baseData; - private LongColumnVector longData; - private DoubleColumnVector doubleData; - private BytesColumnVector bytesData; - private DecimalColumnVector decimalData; - private TimestampColumnVector timestampData; - private final boolean isTimestamp; - private final boolean isDate; - - private int batchSize; - - OrcColumnVector(DataType type, ColumnVector vector) { - super(type); - - if (type instanceof TimestampType) { - isTimestamp = true; - } else { - isTimestamp = false; - } - - if (type instanceof DateType) { - isDate = true; - } else { - isDate = false; - } - - baseData = vector; - if (vector instanceof LongColumnVector) { - longData = (LongColumnVector) vector; - } else if (vector instanceof DoubleColumnVector) { - doubleData = (DoubleColumnVector) vector; - } else if (vector instanceof BytesColumnVector) { - bytesData = (BytesColumnVector) vector; - } else if (vector instanceof DecimalColumnVector) { - decimalData = (DecimalColumnVector) vector; - } else if (vector instanceof TimestampColumnVector) { - timestampData = (TimestampColumnVector) vector; - } else { - throw new UnsupportedOperationException(); - } - } - - public void setBatchSize(int batchSize) { - this.batchSize = batchSize; - } - - @Override - public void close() { - - } - - @Override - public boolean hasNull() { - return !baseData.noNulls; - } - - @Override - public int numNulls() { - if (baseData.isRepeating) { - if (baseData.isNull[0]) { - return batchSize; - } else { - return 0; - } - } else if (baseData.noNulls) { - return 0; - } else { - int count = 0; - for (int i = 0; i < batchSize; i++) { - if (baseData.isNull[i]) count++; - } - return count; - } - } - - /* A helper method to get the row index in a column. */ - private int getRowIndex(int rowId) { - return baseData.isRepeating ? 0 : rowId; - } - - @Override - public boolean isNullAt(int rowId) { - return baseData.isNull[getRowIndex(rowId)]; - } - - @Override - public boolean getBoolean(int rowId) { - return longData.vector[getRowIndex(rowId)] == 1; - } - - @Override - public byte getByte(int rowId) { - return (byte) longData.vector[getRowIndex(rowId)]; - } - - @Override - public short getShort(int rowId) { - return (short) longData.vector[getRowIndex(rowId)]; - } - - @Override - public int getInt(int rowId) { - int value = (int) longData.vector[getRowIndex(rowId)]; - if (isDate) { - return RebaseDateTime.rebaseJulianToGregorianDays(value); - } else { - return value; - } - } - - @Override - public long getLong(int rowId) { - int index = getRowIndex(rowId); - if (isTimestamp) { - return DateTimeUtils.fromJavaTimestamp(timestampData.asScratchTimestamp(index)); - } else { - return longData.vector[index]; - } - } - - @Override - public float getFloat(int rowId) { - return (float) doubleData.vector[getRowIndex(rowId)]; - } - - @Override - public double getDouble(int rowId) { - return doubleData.vector[getRowIndex(rowId)]; - } - - @Override - public Decimal getDecimal(int rowId, int precision, int scale) { - if (isNullAt(rowId)) return null; - BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue(); - return Decimal.apply(data, precision, scale); - } - - @Override - public UTF8String getUTF8String(int rowId) { - if (isNullAt(rowId)) return null; - int index = getRowIndex(rowId); - BytesColumnVector col = bytesData; - return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]); - } - - @Override - public byte[] getBinary(int rowId) { - if (isNullAt(rowId)) return null; - int index = getRowIndex(rowId); - byte[] binary = new byte[bytesData.length[index]]; - System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length); - return binary; - } - - @Override - public ColumnarArray getArray(int rowId) { - throw new UnsupportedOperationException(); - } - - @Override - public ColumnarMap getMap(int rowId) { - throw new UnsupportedOperationException(); - } - - @Override - public org.apache.spark.sql.vectorized.ColumnVector getChild(int ordinal) { - throw new UnsupportedOperationException(); - } -} diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala deleted file mode 100644 index 1dccf0ca1faef..0000000000000 --- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import java.io.{DataInput, DataOutput, IOException} -import java.sql.Date - -import org.apache.hadoop.io.WritableUtils -import org.apache.orc.storage.serde2.io.DateWritable - -import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseJulianToGregorianDays} - -/** - * The class accepts/returns days in Gregorian calendar and rebase them - * via conversion to local date in Julian calendar for dates before 1582-10-15 - * in read/write for backward compatibility with Spark 2.4 and earlier versions. - * - * This is a clone of `org.apache.spark.sql.execution.datasources.DaysWritable`. - * The class is cloned because Hive ORC v1.2 uses different `DateWritable`: - * - v1.2: `org.apache.orc.storage.serde2.io.DateWritable` - * - v2.3 and `HiveInspectors`: `org.apache.hadoop.hive.serde2.io.DateWritable` - * - * @param gregorianDays The number of days since the epoch 1970-01-01 in - * Gregorian calendar. - * @param julianDays The number of days since the epoch 1970-01-01 in - * Julian calendar. - */ -class DaysWritable( - var gregorianDays: Int, - var julianDays: Int) - extends DateWritable { - - def this() = this(0, 0) - def this(gregorianDays: Int) = - this(gregorianDays, rebaseGregorianToJulianDays(gregorianDays)) - def this(dateWritable: DateWritable) = { - this( - gregorianDays = dateWritable match { - case daysWritable: DaysWritable => daysWritable.gregorianDays - case dateWritable: DateWritable => - rebaseJulianToGregorianDays(dateWritable.getDays) - }, - julianDays = dateWritable.getDays) - } - - override def getDays: Int = julianDays - override def get(): Date = new Date(DateWritable.daysToMillis(julianDays)) - - override def set(d: Int): Unit = { - gregorianDays = d - julianDays = rebaseGregorianToJulianDays(d) - } - - @throws[IOException] - override def write(out: DataOutput): Unit = { - WritableUtils.writeVInt(out, julianDays) - } - - @throws[IOException] - override def readFields(in: DataInput): Unit = { - julianDays = WritableUtils.readVInt(in) - gregorianDays = rebaseJulianToGregorianDays(julianDays) - } -} diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala deleted file mode 100644 index 0e657bfe66238..0000000000000 --- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import java.time.{Instant, LocalDate} - -import org.apache.orc.storage.common.`type`.HiveDecimal -import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} -import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder -import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder -import org.apache.orc.storage.serde2.io.HiveDecimalWritable - -import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateToDays, toJavaDate, toJavaTimestamp} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types._ - -/** - * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down. - * - * Due to limitation of ORC `SearchArgument` builder, we had to implement separate checking and - * conversion passes through the Filter to make sure we only convert predicates that are known - * to be convertible. - * - * An ORC `SearchArgument` must be built in one pass using a single builder. For example, you can't - * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`. This is quite - * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using - * existing simpler ones. - * - * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and - * `startNot()` mutate internal state of the builder instance. This forces us to translate all - * convertible filters with a single builder instance. However, if we try to translate a filter - * before checking whether it can be converted or not, we may end up with a builder whose internal - * state is inconsistent in the case of an inconvertible filter. - * - * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then - * try to convert its children. Say we convert `left` child successfully, but find that `right` - * child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent - * now. - * - * The workaround employed here is to trim the Spark filters before trying to convert them. This - * way, we can only do the actual conversion on the part of the Filter that is known to be - * convertible. - * - * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. Usage of - * builder methods mentioned above can only be found in test code, where all tested filters are - * known to be convertible. - */ -private[sql] object OrcFilters extends OrcFiltersBase { - - /** - * Create ORC filter as a SearchArgument instance. - */ - def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = { - val dataTypeMap = OrcFilters.getSearchableTypeMap(schema, SQLConf.get.caseSensitiveAnalysis) - // Combines all convertible filters using `And` to produce a single conjunction - val conjunctionOptional = buildTree(convertibleFilters(schema, dataTypeMap, filters)) - conjunctionOptional.map { conjunction => - // Then tries to build a single ORC `SearchArgument` for the conjunction predicate. - // The input predicate is fully convertible. There should not be any empty result in the - // following recursive method call `buildSearchArgument`. - buildSearchArgument(dataTypeMap, conjunction, newBuilder).build() - } - } - - def convertibleFilters( - schema: StructType, - dataTypeMap: Map[String, OrcPrimitiveField], - filters: Seq[Filter]): Seq[Filter] = { - import org.apache.spark.sql.sources._ - - def convertibleFiltersHelper( - filter: Filter, - canPartialPushDown: Boolean): Option[Filter] = filter match { - // At here, it is not safe to just convert one side and remove the other side - // if we do not understand what the parent filters are. - // - // Here is an example used to explain the reason. - // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to - // convert b in ('1'). If we only convert a = 2, we will end up with a filter - // NOT(a = 2), which will generate wrong results. - // - // Pushing one side of AND down is only safe to do at the top level or in the child - // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate - // can be safely removed. - case And(left, right) => - val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) - val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) - (leftResultOptional, rightResultOptional) match { - case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult)) - case (Some(leftResult), None) if canPartialPushDown => Some(leftResult) - case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult) - case _ => None - } - - // The Or predicate is convertible when both of its children can be pushed down. - // That is to say, if one/both of the children can be partially pushed down, the Or - // predicate can be partially pushed down as well. - // - // Here is an example used to explain the reason. - // Let's say we have - // (a1 AND a2) OR (b1 AND b2), - // a1 and b1 is convertible, while a2 and b2 is not. - // The predicate can be converted as - // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) - // As per the logical in And predicate, we can push down (a1 OR b1). - case Or(left, right) => - for { - lhs <- convertibleFiltersHelper(left, canPartialPushDown) - rhs <- convertibleFiltersHelper(right, canPartialPushDown) - } yield Or(lhs, rhs) - case Not(pred) => - val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false) - childResultOptional.map(Not) - case other => - for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other - } - filters.flatMap { filter => - convertibleFiltersHelper(filter, true) - } - } - - /** - * Get PredicateLeafType which is corresponding to the given DataType. - */ - def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match { - case BooleanType => PredicateLeaf.Type.BOOLEAN - case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG - case FloatType | DoubleType => PredicateLeaf.Type.FLOAT - case StringType => PredicateLeaf.Type.STRING - case DateType => PredicateLeaf.Type.DATE - case TimestampType => PredicateLeaf.Type.TIMESTAMP - case _: DecimalType => PredicateLeaf.Type.DECIMAL - case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}") - } - - /** - * Cast literal values for filters. - * - * We need to cast to long because ORC raises exceptions - * at 'checkLiteralType' of SearchArgumentImpl.java. - */ - private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match { - case ByteType | ShortType | IntegerType | LongType => - value.asInstanceOf[Number].longValue - case FloatType | DoubleType => - value.asInstanceOf[Number].doubleValue() - case _: DecimalType => - new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal])) - case _: DateType if value.isInstanceOf[LocalDate] => - toJavaDate(localDateToDays(value.asInstanceOf[LocalDate])) - case _: TimestampType if value.isInstanceOf[Instant] => - toJavaTimestamp(instantToMicros(value.asInstanceOf[Instant])) - case _ => value - } - - /** - * Build a SearchArgument and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input predicates, which should be fully convertible to SearchArgument. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildSearchArgument( - dataTypeMap: Map[String, OrcPrimitiveField], - expression: Filter, - builder: Builder): Builder = { - import org.apache.spark.sql.sources._ - - expression match { - case And(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Or(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Not(child) => - buildSearchArgument(dataTypeMap, child, builder.startNot()).end() - - case other => - buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse { - throw new SparkException( - "The input filter of OrcFilters.buildSearchArgument should be fully convertible.") - } - } - } - - /** - * Build a SearchArgument for a leaf predicate and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input filter predicates. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildLeafSearchArgument( - dataTypeMap: Map[String, OrcPrimitiveField], - expression: Filter, - builder: Builder): Option[Builder] = { - def getType(attribute: String): PredicateLeaf.Type = - getPredicateLeafType(dataTypeMap(attribute).fieldType) - - import org.apache.spark.sql.sources._ - - // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` - // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be - // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). - expression match { - case EqualTo(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .equals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case EqualNullSafe(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .nullSafeEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case LessThan(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case LessThanOrEqual(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startAnd() - .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case GreaterThan(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startNot() - .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case GreaterThanOrEqual(name, value) if dataTypeMap.contains(name) => - val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType) - Some(builder.startNot() - .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end()) - - case IsNull(name) if dataTypeMap.contains(name) => - Some(builder.startAnd().isNull(dataTypeMap(name).fieldName, getType(name)).end()) - - case IsNotNull(name) if dataTypeMap.contains(name) => - Some(builder.startNot().isNull(dataTypeMap(name).fieldName, getType(name)).end()) - - case In(name, values) if dataTypeMap.contains(name) => - val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(name).fieldType)) - Some(builder.startAnd().in(dataTypeMap(name).fieldName, getType(name), - castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) - - case _ => None - } - } -} - diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala deleted file mode 100644 index 7fbc1cd205b13..0000000000000 --- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import org.apache.orc.storage.common.`type`.HiveDecimal -import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch -import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument} -import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator} -import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable} - -import org.apache.spark.sql.catalyst.expressions.SpecializedGetters -import org.apache.spark.sql.types.Decimal - -/** - * Various utilities for ORC used to upgrade the built-in Hive. - */ -private[sql] object OrcShimUtils { - - class VectorizedRowBatchWrap(val batch: VectorizedRowBatch) {} - - private[sql] type Operator = OrcOperator - private[sql] type SearchArgument = OrcSearchArgument - - def getGregorianDays(value: Any): Int = { - new DaysWritable(value.asInstanceOf[DateWritable]).gregorianDays - } - - def getDecimal(value: Any): Decimal = { - val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal() - Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale()) - } - - def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = { - if (reuseObj) { - val result = new DaysWritable() - (getter, ordinal) => - result.set(getter.getInt(ordinal)) - result - } else { - (getter: SpecializedGetters, ordinal: Int) => - new DaysWritable(getter.getInt(ordinal)) - } - } - - def getHiveDecimalWritable(precision: Int, scale: Int): - (SpecializedGetters, Int) => HiveDecimalWritable = { - (getter, ordinal) => - val d = getter.getDecimal(ordinal, precision, scale) - new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal)) - } -} diff --git a/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala deleted file mode 100644 index e159a0588dfff..0000000000000 --- a/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ /dev/null @@ -1,676 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import java.math.MathContext -import java.nio.charset.StandardCharsets -import java.sql.{Date, Timestamp} - -import scala.collection.JavaConverters._ - -import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} -import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder - -import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Row} -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation -import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types._ - -/** - * A test suite that tests Apache ORC filter API based filter pushdown optimization. - * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage. - * The difference are the packages containing 'Predicate' and 'SearchArgument' classes. - * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package. - * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package. - */ -class OrcFilterSuite extends OrcTest with SharedSparkSession { - - override protected def sparkConf: SparkConf = - super - .sparkConf - .set(SQLConf.USE_V1_SOURCE_LIST, "") - - protected def checkFilterPredicate( - df: DataFrame, - predicate: Predicate, - checker: (SearchArgument) => Unit): Unit = { - val output = predicate.collect { case a: Attribute => a }.distinct - val query = df - .select(output.map(e => Column(e)): _*) - .where(Column(predicate)) - - query.queryExecution.optimizedPlan match { - case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _)) => - assert(filters.nonEmpty, "No filter is analyzed from the given query") - assert(o.pushedFilters.nonEmpty, "No filter is pushed down") - val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters) - assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for ${o.pushedFilters}") - checker(maybeFilter.get) - - case _ => - throw new AnalysisException("Can not match OrcTable in the query.") - } - } - - protected def checkFilterPredicate - (predicate: Predicate, filterOperator: PredicateLeaf.Operator) - (implicit df: DataFrame): Unit = { - def checkComparisonOperator(filter: SearchArgument) = { - val operator = filter.getLeaves.asScala - assert(operator.map(_.getOperator).contains(filterOperator)) - } - checkFilterPredicate(df, predicate, checkComparisonOperator) - } - - protected def checkFilterPredicate - (predicate: Predicate, stringExpr: String) - (implicit df: DataFrame): Unit = { - def checkLogicalOperator(filter: SearchArgument) = { - assert(filter.toString == stringExpr) - } - checkFilterPredicate(df, predicate, checkLogicalOperator) - } - - test("filter pushdown - integer") { - withNestedOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val intAttr = df(colName).expr - assert(df(colName).expr.dataType === IntegerType) - - checkFilterPredicate(intAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(intAttr === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(intAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(intAttr < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(intAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(intAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(intAttr >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === intAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> intAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > intAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < intAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= intAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= intAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - long") { - withNestedOrcDataFrame( - (1 to 4).map(i => Tuple1(Option(i.toLong)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val longAttr = df(colName).expr - assert(df(colName).expr.dataType === LongType) - - checkFilterPredicate(longAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(longAttr === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(longAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(longAttr < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(longAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(longAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(longAttr >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === longAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> longAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > longAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < longAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= longAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= longAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - float") { - withNestedOrcDataFrame( - (1 to 4).map(i => Tuple1(Option(i.toFloat)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val floatAttr = df(colName).expr - assert(df(colName).expr.dataType === FloatType) - - checkFilterPredicate(floatAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(floatAttr === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(floatAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(floatAttr < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(floatAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(floatAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(floatAttr >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === floatAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> floatAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > floatAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < floatAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= floatAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= floatAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - double") { - withNestedOrcDataFrame( - (1 to 4).map(i => Tuple1(Option(i.toDouble)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val doubleAttr = df(colName).expr - assert(df(colName).expr.dataType === DoubleType) - - checkFilterPredicate(doubleAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(doubleAttr === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(doubleAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(doubleAttr < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(doubleAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(doubleAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(doubleAttr >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === doubleAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> doubleAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > doubleAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < doubleAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= doubleAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= doubleAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - string") { - withNestedOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val strAttr = df(colName).expr - assert(df(colName).expr.dataType === StringType) - - checkFilterPredicate(strAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(strAttr === "1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(strAttr <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(strAttr < "2", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(strAttr > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(strAttr <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(strAttr >= "4", PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal("1") === strAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal("1") <=> strAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal("2") > strAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal("3") < strAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal("1") >= strAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal("4") <= strAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - boolean") { - withNestedOrcDataFrame( - (true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val booleanAttr = df(colName).expr - assert(df(colName).expr.dataType === BooleanType) - - checkFilterPredicate(booleanAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(booleanAttr === true, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(booleanAttr <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(booleanAttr < true, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(booleanAttr > false, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(booleanAttr <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(booleanAttr >= false, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(false) === booleanAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(false) <=> booleanAttr, - PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(false) > booleanAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(true) < booleanAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(true) >= booleanAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(true) <= booleanAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - decimal") { - withNestedOrcDataFrame( - (1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { case (inputDF, colName, _) => - implicit val df: DataFrame = inputDF - - val decimalAttr = df(colName).expr - assert(df(colName).expr.dataType === DecimalType(38, 18)) - - checkFilterPredicate(decimalAttr.isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate(decimalAttr === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(decimalAttr <=> BigDecimal.valueOf(1), - PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate(decimalAttr < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(decimalAttr > BigDecimal.valueOf(3), - PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(decimalAttr <= BigDecimal.valueOf(1), - PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(decimalAttr >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) === decimalAttr, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) <=> decimalAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(2)) > decimalAttr, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate( - Literal(BigDecimal.valueOf(3)) < decimalAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) >= decimalAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(4)) <= decimalAttr, PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - timestamp") { - val input = Seq( - "1000-01-01 01:02:03", - "1582-10-01 00:11:22", - "1900-01-01 23:59:59", - "2020-05-25 10:11:12").map(Timestamp.valueOf) - - withOrcFile(input.map(Tuple1(_))) { path => - Seq(false, true).foreach { java8Api => - withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString) { - readFile(path) { implicit df => - val timestamps = input.map(Literal(_)) - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate( - Literal(timestamps(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate( - Literal(timestamps(2)) < $"_1", - PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate( - Literal(timestamps(0)) >= $"_1", - PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - } - } - } - - test("filter pushdown - combinations with logical operators") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df => - checkFilterPredicate( - $"_1".isNotNull, - "leaf-0 = (IS_NULL _1), expr = (not leaf-0)" - ) - checkFilterPredicate( - $"_1" =!= 1, - "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))" - ) - checkFilterPredicate( - !($"_1" < 4), - "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))" - ) - checkFilterPredicate( - $"_1" < 2 || $"_1" > 3, - "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " + - "expr = (or leaf-0 (not leaf-1))" - ) - checkFilterPredicate( - $"_1" < 2 && $"_1" > 3, - "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " + - "expr = (and (not leaf-0) leaf-1 (not leaf-2))" - ) - } - } - - test("filter pushdown - date") { - val input = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day => - Date.valueOf(day) - } - withOrcFile(input.map(Tuple1(_))) { path => - Seq(false, true).foreach { java8Api => - withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString) { - readFile(path) { implicit df => - val dates = input.map(Literal(_)) - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(dates(0) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(dates(0) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(dates(1) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(dates(2) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(dates(0) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(dates(3) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - } - } - } - - test("no filter pushdown - non-supported types") { - implicit class IntToBinary(int: Int) { - def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8) - } - // ArrayType - withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df => - checkNoFilterPredicate($"_1".isNull, noneSupported = true) - } - // BinaryType - withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df => - checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true) - } - // MapType - withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df => - checkNoFilterPredicate($"_1".isNotNull, noneSupported = true) - } - } - - test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") { - import org.apache.spark.sql.sources._ - // The `LessThan` should be converted while the `StringContains` shouldn't - val schema = new StructType( - Array( - StructField("a", IntegerType, nullable = true), - StructField("b", StringType, nullable = true))) - assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") { - OrcFilters.createFilter(schema, Array( - LessThan("a", 10), - StringContains("b", "prefix") - )).get.toString - } - - // The `LessThan` should be converted while the whole inner `And` shouldn't - assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") { - OrcFilters.createFilter(schema, Array( - LessThan("a", 10), - Not(And( - GreaterThan("a", 1), - StringContains("b", "prefix") - )) - )).get.toString - } - - // Safely remove unsupported `StringContains` predicate and push down `LessThan` - assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") { - OrcFilters.createFilter(schema, Array( - And( - LessThan("a", 10), - StringContains("b", "prefix") - ) - )).get.toString - } - - // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`. - assertResult("leaf-0 = (LESS_THAN a 10), leaf-1 = (LESS_THAN_EQUALS a 1)," + - " expr = (and leaf-0 (not leaf-1))") { - OrcFilters.createFilter(schema, Array( - And( - And( - LessThan("a", 10), - StringContains("b", "prefix") - ), - GreaterThan("a", 1) - ) - )).get.toString - } - } - - test("SPARK-27699 Converting disjunctions into ORC SearchArguments") { - import org.apache.spark.sql.sources._ - // The `LessThan` should be converted while the `StringContains` shouldn't - val schema = new StructType( - Array( - StructField("a", IntegerType, nullable = true), - StructField("b", StringType, nullable = true))) - - // The predicate `StringContains` predicate is not able to be pushed down. - assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," + - " expr = (or (not leaf-0) leaf-1)") { - OrcFilters.createFilter(schema, Array( - Or( - GreaterThan("a", 10), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).get.toString - } - - assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," + - " expr = (or (not leaf-0) leaf-1)") { - OrcFilters.createFilter(schema, Array( - Or( - And( - GreaterThan("a", 10), - StringContains("b", "foobar") - ), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).get.toString - } - - assert(OrcFilters.createFilter(schema, Array( - Or( - StringContains("b", "foobar"), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).isEmpty) - } - - test("SPARK-27160: Fix casting of the DecimalType literal") { - import org.apache.spark.sql.sources._ - val schema = StructType(Array(StructField("a", DecimalType(3, 2)))) - assertResult("leaf-0 = (LESS_THAN a 3.14), expr = leaf-0") { - OrcFilters.createFilter(schema, Array( - LessThan( - "a", - new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2))) - ).get.toString - } - } - - test("SPARK-32622: case sensitivity in predicate pushdown") { - withTempPath { dir => - val count = 10 - val tableName = "spark_32622" - val tableDir1 = dir.getAbsoluteFile + "/table1" - - // Physical ORC files have both `A` and `a` fields. - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - spark.range(count).repartition(count).selectExpr("id - 1 as A", "id as a") - .write.mode("overwrite").orc(tableDir1) - } - - // Metastore table has both `A` and `a` fields too. - withTable(tableName) { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - sql( - s""" - |CREATE TABLE $tableName (A LONG, a LONG) USING ORC LOCATION '$tableDir1' - """.stripMargin) - - checkAnswer(sql(s"select a, A from $tableName"), (0 until count).map(c => Row(c, c - 1))) - - val actual1 = stripSparkFilter(sql(s"select A from $tableName where A < 0")) - assert(actual1.count() == 1) - - val actual2 = stripSparkFilter(sql(s"select A from $tableName where a < 0")) - assert(actual2.count() == 0) - } - - // Exception thrown for ambiguous case. - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - val e = intercept[AnalysisException] { - sql(s"select a from $tableName where a < 0").collect() - } - assert(e.getMessage.contains( - "Reference 'a' is ambiguous")) - } - } - - // Metastore table has only `A` field. - withTable(tableName) { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - sql( - s""" - |CREATE TABLE $tableName (A LONG) USING ORC LOCATION '$tableDir1' - """.stripMargin) - - val e = intercept[SparkException] { - sql(s"select A from $tableName where A < 0").collect() - } - assert(e.getCause.isInstanceOf[RuntimeException] && e.getCause.getMessage.contains( - """Found duplicate field(s) "A": [A, a] in case-insensitive mode""")) - } - } - - // Physical ORC files have only `A` field. - val tableDir2 = dir.getAbsoluteFile + "/table2" - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - spark.range(count).repartition(count).selectExpr("id - 1 as A") - .write.mode("overwrite").orc(tableDir2) - } - - withTable(tableName) { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - sql( - s""" - |CREATE TABLE $tableName (a LONG) USING ORC LOCATION '$tableDir2' - """.stripMargin) - - checkAnswer(sql(s"select a from $tableName"), (0 until count).map(c => Row(c - 1))) - - val actual = stripSparkFilter(sql(s"select a from $tableName where a < 0")) - assert(actual.count() == 1) - } - } - - withTable(tableName) { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - sql( - s""" - |CREATE TABLE $tableName (A LONG) USING ORC LOCATION '$tableDir2' - """.stripMargin) - - checkAnswer(sql(s"select A from $tableName"), (0 until count).map(c => Row(c - 1))) - - val actual = stripSparkFilter(sql(s"select A from $tableName where A < 0")) - assert(actual.count() == 1) - } - } - } - } - - test("SPARK-32646: Case-insensitive field resolution for pushdown when reading ORC") { - import org.apache.spark.sql.sources._ - - def getOrcFilter( - schema: StructType, - filters: Seq[Filter], - caseSensitive: String): Option[SearchArgument] = { - var orcFilter: Option[SearchArgument] = None - withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { - orcFilter = - OrcFilters.createFilter(schema, filters) - } - orcFilter - } - - def testFilter( - schema: StructType, - filters: Seq[Filter], - expected: SearchArgument): Unit = { - val caseSensitiveFilters = getOrcFilter(schema, filters, "true") - val caseInsensitiveFilters = getOrcFilter(schema, filters, "false") - - assert(caseSensitiveFilters.isEmpty) - assert(caseInsensitiveFilters.isDefined) - - assert(caseInsensitiveFilters.get.getLeaves().size() > 0) - assert(caseInsensitiveFilters.get.getLeaves().size() == expected.getLeaves().size()) - (0 until expected.getLeaves().size()).foreach { index => - assert(caseInsensitiveFilters.get.getLeaves().get(index) == expected.getLeaves().get(index)) - } - } - - val schema1 = StructType(Seq(StructField("cint", IntegerType))) - testFilter(schema1, Seq(GreaterThan("CINT", 1)), - newBuilder.startNot() - .lessThanEquals("cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build()) - testFilter(schema1, Seq( - And(GreaterThan("CINT", 1), EqualTo("Cint", 2))), - newBuilder.startAnd() - .startNot() - .lessThanEquals("cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`() - .equals("cint", OrcFilters.getPredicateLeafType(IntegerType), 2L) - .`end`().build()) - - // Nested column case - val schema2 = StructType(Seq(StructField("a", - StructType(Seq(StructField("cint", IntegerType)))))) - - testFilter(schema2, Seq(GreaterThan("A.CINT", 1)), - newBuilder.startNot() - .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build()) - testFilter(schema2, Seq(GreaterThan("a.CINT", 1)), - newBuilder.startNot() - .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build()) - testFilter(schema2, Seq(GreaterThan("A.cint", 1)), - newBuilder.startNot() - .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build()) - testFilter(schema2, Seq( - And(GreaterThan("a.CINT", 1), EqualTo("a.Cint", 2))), - newBuilder.startAnd() - .startNot() - .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`() - .equals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 2L) - .`end`().build()) - } -} - diff --git a/sql/hive-thriftserver/v2.3/if/TCLIService.thrift b/sql/hive-thriftserver/if/TCLIService.thrift similarity index 100% rename from sql/hive-thriftserver/v2.3/if/TCLIService.thrift rename to sql/hive-thriftserver/if/TCLIService.thrift diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 5bf20b209aff7..4a96afe9df20a 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -146,9 +146,7 @@ - v${hive.version.short}/src/gen/java - v${hive.version.short}/src/main/java - v${hive.version.short}/src/main/scala + src/gen/java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java diff --git a/sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala similarity index 100% rename from sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala rename to sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala diff --git a/sql/hive-thriftserver/v1.2/if/TCLIService.thrift b/sql/hive-thriftserver/v1.2/if/TCLIService.thrift deleted file mode 100644 index 225e319737811..0000000000000 --- a/sql/hive-thriftserver/v1.2/if/TCLIService.thrift +++ /dev/null @@ -1,1173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Coding Conventions for this file: -// -// Structs/Enums/Unions -// * Struct, Enum, and Union names begin with a "T", -// and use a capital letter for each new word, with no underscores. -// * All fields should be declared as either optional or required. -// -// Functions -// * Function names start with a capital letter and have a capital letter for -// each new word, with no underscores. -// * Each function should take exactly one parameter, named TFunctionNameReq, -// and should return either void or TFunctionNameResp. This convention allows -// incremental updates. -// -// Services -// * Service names begin with the letter "T", use a capital letter for each -// new word (with no underscores), and end with the word "Service". - -namespace java org.apache.hive.service.cli.thrift -namespace cpp apache.hive.service.cli.thrift - -// List of protocol versions. A new token should be -// added to the end of this list every time a change is made. -enum TProtocolVersion { - HIVE_CLI_SERVICE_PROTOCOL_V1, - - // V2 adds support for asynchronous execution - HIVE_CLI_SERVICE_PROTOCOL_V2 - - // V3 add varchar type, primitive type qualifiers - HIVE_CLI_SERVICE_PROTOCOL_V3 - - // V4 add decimal precision/scale, char type - HIVE_CLI_SERVICE_PROTOCOL_V4 - - // V5 adds error details when GetOperationStatus returns in error state - HIVE_CLI_SERVICE_PROTOCOL_V5 - - // V6 uses binary type for binary payload (was string) and uses columnar result set - HIVE_CLI_SERVICE_PROTOCOL_V6 - - // V7 adds support for delegation token based connection - HIVE_CLI_SERVICE_PROTOCOL_V7 - - // V8 adds support for interval types - HIVE_CLI_SERVICE_PROTOCOL_V8 -} - -enum TTypeId { - BOOLEAN_TYPE, - TINYINT_TYPE, - SMALLINT_TYPE, - INT_TYPE, - BIGINT_TYPE, - FLOAT_TYPE, - DOUBLE_TYPE, - STRING_TYPE, - TIMESTAMP_TYPE, - BINARY_TYPE, - ARRAY_TYPE, - MAP_TYPE, - STRUCT_TYPE, - UNION_TYPE, - USER_DEFINED_TYPE, - DECIMAL_TYPE, - NULL_TYPE, - DATE_TYPE, - VARCHAR_TYPE, - CHAR_TYPE, - INTERVAL_YEAR_MONTH_TYPE, - INTERVAL_DAY_TIME_TYPE -} - -const set PRIMITIVE_TYPES = [ - TTypeId.BOOLEAN_TYPE, - TTypeId.TINYINT_TYPE, - TTypeId.SMALLINT_TYPE, - TTypeId.INT_TYPE, - TTypeId.BIGINT_TYPE, - TTypeId.FLOAT_TYPE, - TTypeId.DOUBLE_TYPE, - TTypeId.STRING_TYPE, - TTypeId.TIMESTAMP_TYPE, - TTypeId.BINARY_TYPE, - TTypeId.DECIMAL_TYPE, - TTypeId.NULL_TYPE, - TTypeId.DATE_TYPE, - TTypeId.VARCHAR_TYPE, - TTypeId.CHAR_TYPE, - TTypeId.INTERVAL_YEAR_MONTH_TYPE, - TTypeId.INTERVAL_DAY_TIME_TYPE -] - -const set COMPLEX_TYPES = [ - TTypeId.ARRAY_TYPE - TTypeId.MAP_TYPE - TTypeId.STRUCT_TYPE - TTypeId.UNION_TYPE - TTypeId.USER_DEFINED_TYPE -] - -const set COLLECTION_TYPES = [ - TTypeId.ARRAY_TYPE - TTypeId.MAP_TYPE -] - -const map TYPE_NAMES = { - TTypeId.BOOLEAN_TYPE: "BOOLEAN", - TTypeId.TINYINT_TYPE: "TINYINT", - TTypeId.SMALLINT_TYPE: "SMALLINT", - TTypeId.INT_TYPE: "INT", - TTypeId.BIGINT_TYPE: "BIGINT", - TTypeId.FLOAT_TYPE: "FLOAT", - TTypeId.DOUBLE_TYPE: "DOUBLE", - TTypeId.STRING_TYPE: "STRING", - TTypeId.TIMESTAMP_TYPE: "TIMESTAMP", - TTypeId.BINARY_TYPE: "BINARY", - TTypeId.ARRAY_TYPE: "ARRAY", - TTypeId.MAP_TYPE: "MAP", - TTypeId.STRUCT_TYPE: "STRUCT", - TTypeId.UNION_TYPE: "UNIONTYPE", - TTypeId.DECIMAL_TYPE: "DECIMAL", - TTypeId.NULL_TYPE: "NULL" - TTypeId.DATE_TYPE: "DATE" - TTypeId.VARCHAR_TYPE: "VARCHAR" - TTypeId.CHAR_TYPE: "CHAR" - TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH" - TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME" -} - -// Thrift does not support recursively defined types or forward declarations, -// which makes it difficult to represent Hive's nested types. -// To get around these limitations TTypeDesc employs a type list that maps -// integer "pointers" to TTypeEntry objects. The following examples show -// how different types are represented using this scheme: -// -// "INT": -// TTypeDesc { -// types = [ -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// } -// ] -// } -// -// "ARRAY": -// TTypeDesc { -// types = [ -// TTypeEntry.array_entry { -// object_type_ptr = 1 -// }, -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// } -// ] -// } -// -// "MAP": -// TTypeDesc { -// types = [ -// TTypeEntry.map_entry { -// key_type_ptr = 1 -// value_type_ptr = 2 -// }, -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// }, -// TTypeEntry.primitive_entry { -// type = STRING_TYPE -// } -// ] -// } - -typedef i32 TTypeEntryPtr - -// Valid TTypeQualifiers key names -const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength" - -// Type qualifier key name for decimal -const string PRECISION = "precision" -const string SCALE = "scale" - -union TTypeQualifierValue { - 1: optional i32 i32Value - 2: optional string stringValue -} - -// Type qualifiers for primitive type. -struct TTypeQualifiers { - 1: required map qualifiers -} - -// Type entry for a primitive type. -struct TPrimitiveTypeEntry { - // The primitive type token. This must satisfy the condition - // that type is in the PRIMITIVE_TYPES set. - 1: required TTypeId type - 2: optional TTypeQualifiers typeQualifiers -} - -// Type entry for an ARRAY type. -struct TArrayTypeEntry { - 1: required TTypeEntryPtr objectTypePtr -} - -// Type entry for a MAP type. -struct TMapTypeEntry { - 1: required TTypeEntryPtr keyTypePtr - 2: required TTypeEntryPtr valueTypePtr -} - -// Type entry for a STRUCT type. -struct TStructTypeEntry { - 1: required map nameToTypePtr -} - -// Type entry for a UNIONTYPE type. -struct TUnionTypeEntry { - 1: required map nameToTypePtr -} - -struct TUserDefinedTypeEntry { - // The fully qualified name of the class implementing this type. - 1: required string typeClassName -} - -// We use a union here since Thrift does not support inheritance. -union TTypeEntry { - 1: TPrimitiveTypeEntry primitiveEntry - 2: TArrayTypeEntry arrayEntry - 3: TMapTypeEntry mapEntry - 4: TStructTypeEntry structEntry - 5: TUnionTypeEntry unionEntry - 6: TUserDefinedTypeEntry userDefinedTypeEntry -} - -// Type descriptor for columns. -struct TTypeDesc { - // The "top" type is always the first element of the list. - // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE - // type, then subsequent elements represent nested types. - 1: required list types -} - -// A result set column descriptor. -struct TColumnDesc { - // The name of the column - 1: required string columnName - - // The type descriptor for this column - 2: required TTypeDesc typeDesc - - // The ordinal position of this column in the schema - 3: required i32 position - - 4: optional string comment -} - -// Metadata used to describe the schema (column names, types, comments) -// of result sets. -struct TTableSchema { - 1: required list columns -} - -// A Boolean column value. -struct TBoolValue { - // NULL if value is unset. - 1: optional bool value -} - -// A Byte column value. -struct TByteValue { - // NULL if value is unset. - 1: optional byte value -} - -// A signed, 16 bit column value. -struct TI16Value { - // NULL if value is unset - 1: optional i16 value -} - -// A signed, 32 bit column value -struct TI32Value { - // NULL if value is unset - 1: optional i32 value -} - -// A signed 64 bit column value -struct TI64Value { - // NULL if value is unset - 1: optional i64 value -} - -// A floating point 64 bit column value -struct TDoubleValue { - // NULL if value is unset - 1: optional double value -} - -struct TStringValue { - // NULL if value is unset - 1: optional string value -} - -// A single column value in a result set. -// Note that Hive's type system is richer than Thrift's, -// so in some cases we have to map multiple Hive types -// to the same Thrift type. On the client-side this is -// disambiguated by looking at the Schema of the -// result set. -union TColumnValue { - 1: TBoolValue boolVal // BOOLEAN - 2: TByteValue byteVal // TINYINT - 3: TI16Value i16Val // SMALLINT - 4: TI32Value i32Val // INT - 5: TI64Value i64Val // BIGINT, TIMESTAMP - 6: TDoubleValue doubleVal // FLOAT, DOUBLE - 7: TStringValue stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME -} - -// Represents a row in a rowset. -struct TRow { - 1: required list colVals -} - -struct TBoolColumn { - 1: required list values - 2: required binary nulls -} - -struct TByteColumn { - 1: required list values - 2: required binary nulls -} - -struct TI16Column { - 1: required list values - 2: required binary nulls -} - -struct TI32Column { - 1: required list values - 2: required binary nulls -} - -struct TI64Column { - 1: required list values - 2: required binary nulls -} - -struct TDoubleColumn { - 1: required list values - 2: required binary nulls -} - -struct TStringColumn { - 1: required list values - 2: required binary nulls -} - -struct TBinaryColumn { - 1: required list values - 2: required binary nulls -} - -// Note that Hive's type system is richer than Thrift's, -// so in some cases we have to map multiple Hive types -// to the same Thrift type. On the client-side this is -// disambiguated by looking at the Schema of the -// result set. -union TColumn { - 1: TBoolColumn boolVal // BOOLEAN - 2: TByteColumn byteVal // TINYINT - 3: TI16Column i16Val // SMALLINT - 4: TI32Column i32Val // INT - 5: TI64Column i64Val // BIGINT, TIMESTAMP - 6: TDoubleColumn doubleVal // FLOAT, DOUBLE - 7: TStringColumn stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL - 8: TBinaryColumn binaryVal // BINARY -} - -// Represents a rowset -struct TRowSet { - // The starting row offset of this rowset. - 1: required i64 startRowOffset - 2: required list rows - 3: optional list columns -} - -// The return status code contained in each response. -enum TStatusCode { - SUCCESS_STATUS, - SUCCESS_WITH_INFO_STATUS, - STILL_EXECUTING_STATUS, - ERROR_STATUS, - INVALID_HANDLE_STATUS -} - -// The return status of a remote request -struct TStatus { - 1: required TStatusCode statusCode - - // If status is SUCCESS_WITH_INFO, info_msgs may be populated with - // additional diagnostic information. - 2: optional list infoMessages - - // If status is ERROR, then the following fields may be set - 3: optional string sqlState // as defined in the ISO/IEF CLI specification - 4: optional i32 errorCode // internal error code - 5: optional string errorMessage -} - -// The state of an operation (i.e. a query or other -// asynchronous operation that generates a result set) -// on the server. -enum TOperationState { - // The operation has been initialized - INITIALIZED_STATE, - - // The operation is running. In this state the result - // set is not available. - RUNNING_STATE, - - // The operation has completed. When an operation is in - // this state its result set may be fetched. - FINISHED_STATE, - - // The operation was canceled by a client - CANCELED_STATE, - - // The operation was closed by a client - CLOSED_STATE, - - // The operation failed due to an error - ERROR_STATE, - - // The operation is in an unrecognized state - UKNOWN_STATE, - - // The operation is in an pending state - PENDING_STATE, -} - -// A string identifier. This is interpreted literally. -typedef string TIdentifier - -// A search pattern. -// -// Valid search pattern characters: -// '_': Any single character. -// '%': Any sequence of zero or more characters. -// '\': Escape character used to include special characters, -// e.g. '_', '%', '\'. If a '\' precedes a non-special -// character it has no special meaning and is interpreted -// literally. -typedef string TPattern - - -// A search pattern or identifier. Used as input -// parameter for many of the catalog functions. -typedef string TPatternOrIdentifier - -struct THandleIdentifier { - // 16 byte globally unique identifier - // This is the public ID of the handle and - // can be used for reporting. - 1: required binary guid, - - // 16 byte secret generated by the server - // and used to verify that the handle is not - // being hijacked by another user. - 2: required binary secret, -} - -// Client-side handle to persistent -// session information on the server-side. -struct TSessionHandle { - 1: required THandleIdentifier sessionId -} - -// The subtype of an OperationHandle. -enum TOperationType { - EXECUTE_STATEMENT, - GET_TYPE_INFO, - GET_CATALOGS, - GET_SCHEMAS, - GET_TABLES, - GET_TABLE_TYPES, - GET_COLUMNS, - GET_FUNCTIONS, - UNKNOWN, -} - -// Client-side reference to a task running -// asynchronously on the server. -struct TOperationHandle { - 1: required THandleIdentifier operationId - 2: required TOperationType operationType - - // If hasResultSet = TRUE, then this operation - // generates a result set that can be fetched. - // Note that the result set may be empty. - // - // If hasResultSet = FALSE, then this operation - // does not generate a result set, and calling - // GetResultSetMetadata or FetchResults against - // this OperationHandle will generate an error. - 3: required bool hasResultSet - - // For operations that don't generate result sets, - // modifiedRowCount is either: - // - // 1) The number of rows that were modified by - // the DML operation (e.g. number of rows inserted, - // number of rows deleted, etc). - // - // 2) 0 for operations that don't modify or add rows. - // - // 3) < 0 if the operation is capable of modifiying rows, - // but Hive is unable to determine how many rows were - // modified. For example, Hive's LOAD DATA command - // doesn't generate row count information because - // Hive doesn't inspect the data as it is loaded. - // - // modifiedRowCount is unset if the operation generates - // a result set. - 4: optional double modifiedRowCount -} - - -// OpenSession() -// -// Open a session (connection) on the server against -// which operations may be executed. -struct TOpenSessionReq { - // The version of the HiveServer2 protocol that the client is using. - 1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8 - - // Username and password for authentication. - // Depending on the authentication scheme being used, - // this information may instead be provided by a lower - // protocol layer, in which case these fields may be - // left unset. - 2: optional string username - 3: optional string password - - // Configuration overlay which is applied when the session is - // first created. - 4: optional map configuration -} - -struct TOpenSessionResp { - 1: required TStatus status - - // The protocol version that the server is using. - 2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8 - - // Session Handle - 3: optional TSessionHandle sessionHandle - - // The configuration settings for this session. - 4: optional map configuration -} - - -// CloseSession() -// -// Closes the specified session and frees any resources -// currently allocated to that session. Any open -// operations in that session will be canceled. -struct TCloseSessionReq { - 1: required TSessionHandle sessionHandle -} - -struct TCloseSessionResp { - 1: required TStatus status -} - - - -enum TGetInfoType { - CLI_MAX_DRIVER_CONNECTIONS = 0, - CLI_MAX_CONCURRENT_ACTIVITIES = 1, - CLI_DATA_SOURCE_NAME = 2, - CLI_FETCH_DIRECTION = 8, - CLI_SERVER_NAME = 13, - CLI_SEARCH_PATTERN_ESCAPE = 14, - CLI_DBMS_NAME = 17, - CLI_DBMS_VER = 18, - CLI_ACCESSIBLE_TABLES = 19, - CLI_ACCESSIBLE_PROCEDURES = 20, - CLI_CURSOR_COMMIT_BEHAVIOR = 23, - CLI_DATA_SOURCE_READ_ONLY = 25, - CLI_DEFAULT_TXN_ISOLATION = 26, - CLI_IDENTIFIER_CASE = 28, - CLI_IDENTIFIER_QUOTE_CHAR = 29, - CLI_MAX_COLUMN_NAME_LEN = 30, - CLI_MAX_CURSOR_NAME_LEN = 31, - CLI_MAX_SCHEMA_NAME_LEN = 32, - CLI_MAX_CATALOG_NAME_LEN = 34, - CLI_MAX_TABLE_NAME_LEN = 35, - CLI_SCROLL_CONCURRENCY = 43, - CLI_TXN_CAPABLE = 46, - CLI_USER_NAME = 47, - CLI_TXN_ISOLATION_OPTION = 72, - CLI_INTEGRITY = 73, - CLI_GETDATA_EXTENSIONS = 81, - CLI_NULL_COLLATION = 85, - CLI_ALTER_TABLE = 86, - CLI_ORDER_BY_COLUMNS_IN_SELECT = 90, - CLI_SPECIAL_CHARACTERS = 94, - CLI_MAX_COLUMNS_IN_GROUP_BY = 97, - CLI_MAX_COLUMNS_IN_INDEX = 98, - CLI_MAX_COLUMNS_IN_ORDER_BY = 99, - CLI_MAX_COLUMNS_IN_SELECT = 100, - CLI_MAX_COLUMNS_IN_TABLE = 101, - CLI_MAX_INDEX_SIZE = 102, - CLI_MAX_ROW_SIZE = 104, - CLI_MAX_STATEMENT_LEN = 105, - CLI_MAX_TABLES_IN_SELECT = 106, - CLI_MAX_USER_NAME_LEN = 107, - CLI_OJ_CAPABILITIES = 115, - - CLI_XOPEN_CLI_YEAR = 10000, - CLI_CURSOR_SENSITIVITY = 10001, - CLI_DESCRIBE_PARAMETER = 10002, - CLI_CATALOG_NAME = 10003, - CLI_COLLATION_SEQ = 10004, - CLI_MAX_IDENTIFIER_LEN = 10005, -} - -union TGetInfoValue { - 1: string stringValue - 2: i16 smallIntValue - 3: i32 integerBitmask - 4: i32 integerFlag - 5: i32 binaryValue - 6: i64 lenValue -} - -// GetInfo() -// -// This function is based on ODBC's CLIGetInfo() function. -// The function returns general information about the data source -// using the same keys as ODBC. -struct TGetInfoReq { - // The session to run this request against - 1: required TSessionHandle sessionHandle - - 2: required TGetInfoType infoType -} - -struct TGetInfoResp { - 1: required TStatus status - - 2: required TGetInfoValue infoValue -} - - -// ExecuteStatement() -// -// Execute a statement. -// The returned OperationHandle can be used to check on the -// status of the statement, and to fetch results once the -// statement has finished executing. -struct TExecuteStatementReq { - // The session to execute the statement against - 1: required TSessionHandle sessionHandle - - // The statement to be executed (DML, DDL, SET, etc) - 2: required string statement - - // Configuration properties that are overlayed on top of the - // the existing session configuration before this statement - // is executed. These properties apply to this statement - // only and will not affect the subsequent state of the Session. - 3: optional map confOverlay - - // Execute asynchronously when runAsync is true - 4: optional bool runAsync = false -} - -struct TExecuteStatementResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - -// GetTypeInfo() -// -// Get information about types supported by the HiveServer instance. -// The information is returned as a result set which can be fetched -// using the OperationHandle provided in the response. -// -// Refer to the documentation for ODBC's CLIGetTypeInfo function for -// the format of the result set. -struct TGetTypeInfoReq { - // The session to run this request against. - 1: required TSessionHandle sessionHandle -} - -struct TGetTypeInfoResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetCatalogs() -// -// Returns the list of catalogs (databases) -// Results are ordered by TABLE_CATALOG -// -// Resultset columns : -// col1 -// name: TABLE_CAT -// type: STRING -// desc: Catalog name. NULL if not applicable. -// -struct TGetCatalogsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle -} - -struct TGetCatalogsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetSchemas() -// -// Retrieves the schema names available in this database. -// The results are ordered by TABLE_CATALOG and TABLE_SCHEM. -// col1 -// name: TABLE_SCHEM -// type: STRING -// desc: schema name -// col2 -// name: TABLE_CATALOG -// type: STRING -// desc: catalog name -struct TGetSchemasReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog. Must not contain a search pattern. - 2: optional TIdentifier catalogName - - // schema name or pattern - 3: optional TPatternOrIdentifier schemaName -} - -struct TGetSchemasResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetTables() -// -// Returns a list of tables with catalog, schema, and table -// type information. The information is returned as a result -// set which can be fetched using the OperationHandle -// provided in the response. -// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME -// -// Result Set Columns: -// -// col1 -// name: TABLE_CAT -// type: STRING -// desc: Catalog name. NULL if not applicable. -// -// col2 -// name: TABLE_SCHEM -// type: STRING -// desc: Schema name. -// -// col3 -// name: TABLE_NAME -// type: STRING -// desc: Table name. -// -// col4 -// name: TABLE_TYPE -// type: STRING -// desc: The table type, e.g. "TABLE", "VIEW", etc. -// -// col5 -// name: REMARKS -// type: STRING -// desc: Comments about the table -// -struct TGetTablesReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog or a search pattern. - 2: optional TPatternOrIdentifier catalogName - - // Name of the schema or a search pattern. - 3: optional TPatternOrIdentifier schemaName - - // Name of the table or a search pattern. - 4: optional TPatternOrIdentifier tableName - - // List of table types to match - // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY", - // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc. - 5: optional list tableTypes -} - -struct TGetTablesResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetTableTypes() -// -// Returns the table types available in this database. -// The results are ordered by table type. -// -// col1 -// name: TABLE_TYPE -// type: STRING -// desc: Table type name. -struct TGetTableTypesReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle -} - -struct TGetTableTypesResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetColumns() -// -// Returns a list of columns in the specified tables. -// The information is returned as a result set which can be fetched -// using the OperationHandle provided in the response. -// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME, -// and ORDINAL_POSITION. -// -// Result Set Columns are the same as those for the ODBC CLIColumns -// function. -// -struct TGetColumnsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog. Must not contain a search pattern. - 2: optional TIdentifier catalogName - - // Schema name or search pattern - 3: optional TPatternOrIdentifier schemaName - - // Table name or search pattern - 4: optional TPatternOrIdentifier tableName - - // Column name or search pattern - 5: optional TPatternOrIdentifier columnName -} - -struct TGetColumnsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetFunctions() -// -// Returns a list of functions supported by the data source. The -// behavior of this function matches -// java.sql.DatabaseMetaData.getFunctions() both in terms of -// inputs and outputs. -// -// Result Set Columns: -// -// col1 -// name: FUNCTION_CAT -// type: STRING -// desc: Function catalog (may be null) -// -// col2 -// name: FUNCTION_SCHEM -// type: STRING -// desc: Function schema (may be null) -// -// col3 -// name: FUNCTION_NAME -// type: STRING -// desc: Function name. This is the name used to invoke the function. -// -// col4 -// name: REMARKS -// type: STRING -// desc: Explanatory comment on the function. -// -// col5 -// name: FUNCTION_TYPE -// type: SMALLINT -// desc: Kind of function. One of: -// * functionResultUnknown - Cannot determine if a return value or a table -// will be returned. -// * functionNoTable - Does not a return a table. -// * functionReturnsTable - Returns a table. -// -// col6 -// name: SPECIFIC_NAME -// type: STRING -// desc: The name which uniquely identifies this function within its schema. -// In this case this is the fully qualified class name of the class -// that implements this function. -// -struct TGetFunctionsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // A catalog name; must match the catalog name as it is stored in the - // database; "" retrieves those without a catalog; null means - // that the catalog name should not be used to narrow the search. - 2: optional TIdentifier catalogName - - // A schema name pattern; must match the schema name as it is stored - // in the database; "" retrieves those without a schema; null means - // that the schema name should not be used to narrow the search. - 3: optional TPatternOrIdentifier schemaName - - // A function name pattern; must match the function name as it is stored - // in the database. - 4: required TPatternOrIdentifier functionName -} - -struct TGetFunctionsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetOperationStatus() -// -// Get the status of an operation running on the server. -struct TGetOperationStatusReq { - // Session to run this request against - 1: required TOperationHandle operationHandle -} - -struct TGetOperationStatusResp { - 1: required TStatus status - 2: optional TOperationState operationState - - // If operationState is ERROR_STATE, then the following fields may be set - // sqlState as defined in the ISO/IEF CLI specification - 3: optional string sqlState - - // Internal error code - 4: optional i32 errorCode - - // Error message - 5: optional string errorMessage -} - - -// CancelOperation() -// -// Cancels processing on the specified operation handle and -// frees any resources which were allocated. -struct TCancelOperationReq { - // Operation to cancel - 1: required TOperationHandle operationHandle -} - -struct TCancelOperationResp { - 1: required TStatus status -} - - -// CloseOperation() -// -// Given an operation in the FINISHED, CANCELED, -// or ERROR states, CloseOperation() will free -// all of the resources which were allocated on -// the server to service the operation. -struct TCloseOperationReq { - 1: required TOperationHandle operationHandle -} - -struct TCloseOperationResp { - 1: required TStatus status -} - - -// GetResultSetMetadata() -// -// Retrieves schema information for the specified operation -struct TGetResultSetMetadataReq { - // Operation for which to fetch result set schema information - 1: required TOperationHandle operationHandle -} - -struct TGetResultSetMetadataResp { - 1: required TStatus status - 2: optional TTableSchema schema -} - - -enum TFetchOrientation { - // Get the next rowset. The fetch offset is ignored. - FETCH_NEXT, - - // Get the previous rowset. The fetch offset is ignored. - FETCH_PRIOR, - - // Return the rowset at the given fetch offset relative - // to the current rowset. - // NOT SUPPORTED - FETCH_RELATIVE, - - // Return the rowset at the specified fetch offset. - // NOT SUPPORTED - FETCH_ABSOLUTE, - - // Get the first rowset in the result set. - FETCH_FIRST, - - // Get the last rowset in the result set. - // NOT SUPPORTED - FETCH_LAST -} - -// FetchResults() -// -// Fetch rows from the server corresponding to -// a particular OperationHandle. -struct TFetchResultsReq { - // Operation from which to fetch results. - 1: required TOperationHandle operationHandle - - // The fetch orientation. This must be either - // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT. - 2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT - - // Max number of rows that should be returned in - // the rowset. - 3: required i64 maxRows - - // The type of a fetch results request. 0 represents Query output. 1 represents Log - 4: optional i16 fetchType = 0 -} - -struct TFetchResultsResp { - 1: required TStatus status - - // TRUE if there are more rows left to fetch from the server. - 2: optional bool hasMoreRows - - // The rowset. This is optional so that we have the - // option in the future of adding alternate formats for - // representing result set data, e.g. delimited strings, - // binary encoded, etc. - 3: optional TRowSet results -} - -// GetDelegationToken() -// Retrieve delegation token for the current user -struct TGetDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // userid for the proxy user - 2: required string owner - - // designated renewer userid - 3: required string renewer -} - -struct TGetDelegationTokenResp { - // status of the request - 1: required TStatus status - - // delegation token string - 2: optional string delegationToken -} - -// CancelDelegationToken() -// Cancel the given delegation token -struct TCancelDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // delegation token to cancel - 2: required string delegationToken -} - -struct TCancelDelegationTokenResp { - // status of the request - 1: required TStatus status -} - -// RenewDelegationToken() -// Renew the given delegation token -struct TRenewDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // delegation token to renew - 2: required string delegationToken -} - -struct TRenewDelegationTokenResp { - // status of the request - 1: required TStatus status -} - -service TCLIService { - - TOpenSessionResp OpenSession(1:TOpenSessionReq req); - - TCloseSessionResp CloseSession(1:TCloseSessionReq req); - - TGetInfoResp GetInfo(1:TGetInfoReq req); - - TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req); - - TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req); - - TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req); - - TGetSchemasResp GetSchemas(1:TGetSchemasReq req); - - TGetTablesResp GetTables(1:TGetTablesReq req); - - TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req); - - TGetColumnsResp GetColumns(1:TGetColumnsReq req); - - TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req); - - TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req); - - TCancelOperationResp CancelOperation(1:TCancelOperationReq req); - - TCloseOperationResp CloseOperation(1:TCloseOperationReq req); - - TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req); - - TFetchResultsResp FetchResults(1:TFetchResultsReq req); - - TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req); - - TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req); - - TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req); -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java deleted file mode 100644 index 6323d34eac734..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java +++ /dev/null @@ -1,383 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TArrayTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TArrayTypeEntry"); - - private static final org.apache.thrift.protocol.TField OBJECT_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("objectTypePtr", org.apache.thrift.protocol.TType.I32, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TArrayTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TArrayTypeEntryTupleSchemeFactory()); - } - - private int objectTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OBJECT_TYPE_PTR((short)1, "objectTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OBJECT_TYPE_PTR - return OBJECT_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __OBJECTTYPEPTR_ISSET_ID = 0; - private byte __isset_bitfield = 0; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OBJECT_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("objectTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TArrayTypeEntry.class, metaDataMap); - } - - public TArrayTypeEntry() { - } - - public TArrayTypeEntry( - int objectTypePtr) - { - this(); - this.objectTypePtr = objectTypePtr; - setObjectTypePtrIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TArrayTypeEntry(TArrayTypeEntry other) { - __isset_bitfield = other.__isset_bitfield; - this.objectTypePtr = other.objectTypePtr; - } - - public TArrayTypeEntry deepCopy() { - return new TArrayTypeEntry(this); - } - - @Override - public void clear() { - setObjectTypePtrIsSet(false); - this.objectTypePtr = 0; - } - - public int getObjectTypePtr() { - return this.objectTypePtr; - } - - public void setObjectTypePtr(int objectTypePtr) { - this.objectTypePtr = objectTypePtr; - setObjectTypePtrIsSet(true); - } - - public void unsetObjectTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID); - } - - /** Returns true if field objectTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetObjectTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID); - } - - public void setObjectTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OBJECT_TYPE_PTR: - if (value == null) { - unsetObjectTypePtr(); - } else { - setObjectTypePtr((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OBJECT_TYPE_PTR: - return Integer.valueOf(getObjectTypePtr()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OBJECT_TYPE_PTR: - return isSetObjectTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TArrayTypeEntry) - return this.equals((TArrayTypeEntry)that); - return false; - } - - public boolean equals(TArrayTypeEntry that) { - if (that == null) - return false; - - boolean this_present_objectTypePtr = true; - boolean that_present_objectTypePtr = true; - if (this_present_objectTypePtr || that_present_objectTypePtr) { - if (!(this_present_objectTypePtr && that_present_objectTypePtr)) - return false; - if (this.objectTypePtr != that.objectTypePtr) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_objectTypePtr = true; - builder.append(present_objectTypePtr); - if (present_objectTypePtr) - builder.append(objectTypePtr); - - return builder.toHashCode(); - } - - public int compareTo(TArrayTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TArrayTypeEntry typedOther = (TArrayTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetObjectTypePtr()).compareTo(typedOther.isSetObjectTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetObjectTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.objectTypePtr, typedOther.objectTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TArrayTypeEntry("); - boolean first = true; - - sb.append("objectTypePtr:"); - sb.append(this.objectTypePtr); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetObjectTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'objectTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TArrayTypeEntryStandardSchemeFactory implements SchemeFactory { - public TArrayTypeEntryStandardScheme getScheme() { - return new TArrayTypeEntryStandardScheme(); - } - } - - private static class TArrayTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OBJECT_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.objectTypePtr = iprot.readI32(); - struct.setObjectTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(OBJECT_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.objectTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TArrayTypeEntryTupleSchemeFactory implements SchemeFactory { - public TArrayTypeEntryTupleScheme getScheme() { - return new TArrayTypeEntryTupleScheme(); - } - } - - private static class TArrayTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.objectTypePtr); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.objectTypePtr = iprot.readI32(); - struct.setObjectTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java deleted file mode 100644 index 6b1b054d1acad..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java +++ /dev/null @@ -1,550 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TBinaryColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBinaryColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBinaryColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBinaryColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBinaryColumn.class, metaDataMap); - } - - public TBinaryColumn() { - } - - public TBinaryColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TBinaryColumn(TBinaryColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (ByteBuffer other_element : other.values) { - ByteBuffer temp_binary_element = org.apache.thrift.TBaseHelper.copyBinary(other_element); -; - __this__values.add(temp_binary_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TBinaryColumn deepCopy() { - return new TBinaryColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(ByteBuffer elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBinaryColumn) - return this.equals((TBinaryColumn)that); - return false; - } - - public boolean equals(TBinaryColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TBinaryColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TBinaryColumn typedOther = (TBinaryColumn)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBinaryColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBinaryColumnStandardSchemeFactory implements SchemeFactory { - public TBinaryColumnStandardScheme getScheme() { - return new TBinaryColumnStandardScheme(); - } - } - - private static class TBinaryColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBinaryColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list110 = iprot.readListBegin(); - struct.values = new ArrayList(_list110.size); - for (int _i111 = 0; _i111 < _list110.size; ++_i111) - { - ByteBuffer _elem112; // optional - _elem112 = iprot.readBinary(); - struct.values.add(_elem112); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBinaryColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size())); - for (ByteBuffer _iter113 : struct.values) - { - oprot.writeBinary(_iter113); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBinaryColumnTupleSchemeFactory implements SchemeFactory { - public TBinaryColumnTupleScheme getScheme() { - return new TBinaryColumnTupleScheme(); - } - } - - private static class TBinaryColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (ByteBuffer _iter114 : struct.values) - { - oprot.writeBinary(_iter114); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list115 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.values = new ArrayList(_list115.size); - for (int _i116 = 0; _i116 < _list115.size; ++_i116) - { - ByteBuffer _elem117; // optional - _elem117 = iprot.readBinary(); - struct.values.add(_elem117); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java deleted file mode 100644 index efd571cfdfbbf..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TBoolColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBoolColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBoolColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolColumn.class, metaDataMap); - } - - public TBoolColumn() { - } - - public TBoolColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TBoolColumn(TBoolColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Boolean other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TBoolColumn deepCopy() { - return new TBoolColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(boolean elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBoolColumn) - return this.equals((TBoolColumn)that); - return false; - } - - public boolean equals(TBoolColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TBoolColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TBoolColumn typedOther = (TBoolColumn)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBoolColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBoolColumnStandardSchemeFactory implements SchemeFactory { - public TBoolColumnStandardScheme getScheme() { - return new TBoolColumnStandardScheme(); - } - } - - private static class TBoolColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list54 = iprot.readListBegin(); - struct.values = new ArrayList(_list54.size); - for (int _i55 = 0; _i55 < _list54.size; ++_i55) - { - boolean _elem56; // optional - _elem56 = iprot.readBool(); - struct.values.add(_elem56); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, struct.values.size())); - for (boolean _iter57 : struct.values) - { - oprot.writeBool(_iter57); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBoolColumnTupleSchemeFactory implements SchemeFactory { - public TBoolColumnTupleScheme getScheme() { - return new TBoolColumnTupleScheme(); - } - } - - private static class TBoolColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (boolean _iter58 : struct.values) - { - oprot.writeBool(_iter58); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list59 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, iprot.readI32()); - struct.values = new ArrayList(_list59.size); - for (int _i60 = 0; _i60 < _list59.size; ++_i60) - { - boolean _elem61; // optional - _elem61 = iprot.readBool(); - struct.values.add(_elem61); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java deleted file mode 100644 index c7495ee79e4b5..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TBoolValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BOOL, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBoolValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBoolValueTupleSchemeFactory()); - } - - private boolean value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolValue.class, metaDataMap); - } - - public TBoolValue() { - } - - /** - * Performs a deep copy on other. - */ - public TBoolValue(TBoolValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TBoolValue deepCopy() { - return new TBoolValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = false; - } - - public boolean isValue() { - return this.value; - } - - public void setValue(boolean value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Boolean)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Boolean.valueOf(isValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBoolValue) - return this.equals((TBoolValue)that); - return false; - } - - public boolean equals(TBoolValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TBoolValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TBoolValue typedOther = (TBoolValue)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBoolValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBoolValueStandardSchemeFactory implements SchemeFactory { - public TBoolValueStandardScheme getScheme() { - return new TBoolValueStandardScheme(); - } - } - - private static class TBoolValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.value = iprot.readBool(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeBool(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBoolValueTupleSchemeFactory implements SchemeFactory { - public TBoolValueTupleScheme getScheme() { - return new TBoolValueTupleScheme(); - } - } - - private static class TBoolValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeBool(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readBool(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java deleted file mode 100644 index 169bfdeab3eea..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TByteColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TByteColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TByteColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteColumn.class, metaDataMap); - } - - public TByteColumn() { - } - - public TByteColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TByteColumn(TByteColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Byte other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TByteColumn deepCopy() { - return new TByteColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(byte elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TByteColumn) - return this.equals((TByteColumn)that); - return false; - } - - public boolean equals(TByteColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TByteColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TByteColumn typedOther = (TByteColumn)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TByteColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TByteColumnStandardSchemeFactory implements SchemeFactory { - public TByteColumnStandardScheme getScheme() { - return new TByteColumnStandardScheme(); - } - } - - private static class TByteColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TByteColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list62 = iprot.readListBegin(); - struct.values = new ArrayList(_list62.size); - for (int _i63 = 0; _i63 < _list62.size; ++_i63) - { - byte _elem64; // optional - _elem64 = iprot.readByte(); - struct.values.add(_elem64); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TByteColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, struct.values.size())); - for (byte _iter65 : struct.values) - { - oprot.writeByte(_iter65); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TByteColumnTupleSchemeFactory implements SchemeFactory { - public TByteColumnTupleScheme getScheme() { - return new TByteColumnTupleScheme(); - } - } - - private static class TByteColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (byte _iter66 : struct.values) - { - oprot.writeByte(_iter66); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list67 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, iprot.readI32()); - struct.values = new ArrayList(_list67.size); - for (int _i68 = 0; _i68 < _list67.size; ++_i68) - { - byte _elem69; // optional - _elem69 = iprot.readByte(); - struct.values.add(_elem69); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java deleted file mode 100644 index 23d9693759968..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TByteValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BYTE, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TByteValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TByteValueTupleSchemeFactory()); - } - - private byte value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteValue.class, metaDataMap); - } - - public TByteValue() { - } - - /** - * Performs a deep copy on other. - */ - public TByteValue(TByteValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TByteValue deepCopy() { - return new TByteValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public byte getValue() { - return this.value; - } - - public void setValue(byte value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Byte)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Byte.valueOf(getValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TByteValue) - return this.equals((TByteValue)that); - return false; - } - - public boolean equals(TByteValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TByteValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TByteValue typedOther = (TByteValue)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TByteValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TByteValueStandardSchemeFactory implements SchemeFactory { - public TByteValueStandardScheme getScheme() { - return new TByteValueStandardScheme(); - } - } - - private static class TByteValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TByteValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.BYTE) { - struct.value = iprot.readByte(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TByteValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeByte(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TByteValueTupleSchemeFactory implements SchemeFactory { - public TByteValueTupleScheme getScheme() { - return new TByteValueTupleScheme(); - } - } - - private static class TByteValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeByte(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readByte(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java deleted file mode 100644 index 54851b8d51317..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java +++ /dev/null @@ -1,15414 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCLIService { - - public interface Iface { - - public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException; - - public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException; - - public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException; - - public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException; - - public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException; - - public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException; - - public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException; - - public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException; - - public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException; - - public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException; - - public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException; - - public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException; - - public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException; - - public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException; - - public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException; - - public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException; - - public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException; - - public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException; - - public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException; - - } - - public interface AsyncIface { - - public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - } - - public static class Client extends org.apache.thrift.TServiceClient implements Iface { - public static class Factory implements org.apache.thrift.TServiceClientFactory { - public Factory() {} - public Client getClient(org.apache.thrift.protocol.TProtocol prot) { - return new Client(prot); - } - public Client getClient(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) { - return new Client(iprot, oprot); - } - } - - public Client(org.apache.thrift.protocol.TProtocol prot) - { - super(prot, prot); - } - - public Client(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) { - super(iprot, oprot); - } - - public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException - { - send_OpenSession(req); - return recv_OpenSession(); - } - - public void send_OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException - { - OpenSession_args args = new OpenSession_args(); - args.setReq(req); - sendBase("OpenSession", args); - } - - public TOpenSessionResp recv_OpenSession() throws org.apache.thrift.TException - { - OpenSession_result result = new OpenSession_result(); - receiveBase(result, "OpenSession"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "OpenSession failed: unknown result"); - } - - public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException - { - send_CloseSession(req); - return recv_CloseSession(); - } - - public void send_CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException - { - CloseSession_args args = new CloseSession_args(); - args.setReq(req); - sendBase("CloseSession", args); - } - - public TCloseSessionResp recv_CloseSession() throws org.apache.thrift.TException - { - CloseSession_result result = new CloseSession_result(); - receiveBase(result, "CloseSession"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseSession failed: unknown result"); - } - - public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException - { - send_GetInfo(req); - return recv_GetInfo(); - } - - public void send_GetInfo(TGetInfoReq req) throws org.apache.thrift.TException - { - GetInfo_args args = new GetInfo_args(); - args.setReq(req); - sendBase("GetInfo", args); - } - - public TGetInfoResp recv_GetInfo() throws org.apache.thrift.TException - { - GetInfo_result result = new GetInfo_result(); - receiveBase(result, "GetInfo"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetInfo failed: unknown result"); - } - - public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException - { - send_ExecuteStatement(req); - return recv_ExecuteStatement(); - } - - public void send_ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException - { - ExecuteStatement_args args = new ExecuteStatement_args(); - args.setReq(req); - sendBase("ExecuteStatement", args); - } - - public TExecuteStatementResp recv_ExecuteStatement() throws org.apache.thrift.TException - { - ExecuteStatement_result result = new ExecuteStatement_result(); - receiveBase(result, "ExecuteStatement"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "ExecuteStatement failed: unknown result"); - } - - public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException - { - send_GetTypeInfo(req); - return recv_GetTypeInfo(); - } - - public void send_GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException - { - GetTypeInfo_args args = new GetTypeInfo_args(); - args.setReq(req); - sendBase("GetTypeInfo", args); - } - - public TGetTypeInfoResp recv_GetTypeInfo() throws org.apache.thrift.TException - { - GetTypeInfo_result result = new GetTypeInfo_result(); - receiveBase(result, "GetTypeInfo"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTypeInfo failed: unknown result"); - } - - public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException - { - send_GetCatalogs(req); - return recv_GetCatalogs(); - } - - public void send_GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException - { - GetCatalogs_args args = new GetCatalogs_args(); - args.setReq(req); - sendBase("GetCatalogs", args); - } - - public TGetCatalogsResp recv_GetCatalogs() throws org.apache.thrift.TException - { - GetCatalogs_result result = new GetCatalogs_result(); - receiveBase(result, "GetCatalogs"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCatalogs failed: unknown result"); - } - - public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException - { - send_GetSchemas(req); - return recv_GetSchemas(); - } - - public void send_GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException - { - GetSchemas_args args = new GetSchemas_args(); - args.setReq(req); - sendBase("GetSchemas", args); - } - - public TGetSchemasResp recv_GetSchemas() throws org.apache.thrift.TException - { - GetSchemas_result result = new GetSchemas_result(); - receiveBase(result, "GetSchemas"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetSchemas failed: unknown result"); - } - - public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException - { - send_GetTables(req); - return recv_GetTables(); - } - - public void send_GetTables(TGetTablesReq req) throws org.apache.thrift.TException - { - GetTables_args args = new GetTables_args(); - args.setReq(req); - sendBase("GetTables", args); - } - - public TGetTablesResp recv_GetTables() throws org.apache.thrift.TException - { - GetTables_result result = new GetTables_result(); - receiveBase(result, "GetTables"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTables failed: unknown result"); - } - - public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException - { - send_GetTableTypes(req); - return recv_GetTableTypes(); - } - - public void send_GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException - { - GetTableTypes_args args = new GetTableTypes_args(); - args.setReq(req); - sendBase("GetTableTypes", args); - } - - public TGetTableTypesResp recv_GetTableTypes() throws org.apache.thrift.TException - { - GetTableTypes_result result = new GetTableTypes_result(); - receiveBase(result, "GetTableTypes"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTableTypes failed: unknown result"); - } - - public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException - { - send_GetColumns(req); - return recv_GetColumns(); - } - - public void send_GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException - { - GetColumns_args args = new GetColumns_args(); - args.setReq(req); - sendBase("GetColumns", args); - } - - public TGetColumnsResp recv_GetColumns() throws org.apache.thrift.TException - { - GetColumns_result result = new GetColumns_result(); - receiveBase(result, "GetColumns"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetColumns failed: unknown result"); - } - - public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException - { - send_GetFunctions(req); - return recv_GetFunctions(); - } - - public void send_GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException - { - GetFunctions_args args = new GetFunctions_args(); - args.setReq(req); - sendBase("GetFunctions", args); - } - - public TGetFunctionsResp recv_GetFunctions() throws org.apache.thrift.TException - { - GetFunctions_result result = new GetFunctions_result(); - receiveBase(result, "GetFunctions"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetFunctions failed: unknown result"); - } - - public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException - { - send_GetOperationStatus(req); - return recv_GetOperationStatus(); - } - - public void send_GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException - { - GetOperationStatus_args args = new GetOperationStatus_args(); - args.setReq(req); - sendBase("GetOperationStatus", args); - } - - public TGetOperationStatusResp recv_GetOperationStatus() throws org.apache.thrift.TException - { - GetOperationStatus_result result = new GetOperationStatus_result(); - receiveBase(result, "GetOperationStatus"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetOperationStatus failed: unknown result"); - } - - public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException - { - send_CancelOperation(req); - return recv_CancelOperation(); - } - - public void send_CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException - { - CancelOperation_args args = new CancelOperation_args(); - args.setReq(req); - sendBase("CancelOperation", args); - } - - public TCancelOperationResp recv_CancelOperation() throws org.apache.thrift.TException - { - CancelOperation_result result = new CancelOperation_result(); - receiveBase(result, "CancelOperation"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelOperation failed: unknown result"); - } - - public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException - { - send_CloseOperation(req); - return recv_CloseOperation(); - } - - public void send_CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException - { - CloseOperation_args args = new CloseOperation_args(); - args.setReq(req); - sendBase("CloseOperation", args); - } - - public TCloseOperationResp recv_CloseOperation() throws org.apache.thrift.TException - { - CloseOperation_result result = new CloseOperation_result(); - receiveBase(result, "CloseOperation"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseOperation failed: unknown result"); - } - - public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException - { - send_GetResultSetMetadata(req); - return recv_GetResultSetMetadata(); - } - - public void send_GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException - { - GetResultSetMetadata_args args = new GetResultSetMetadata_args(); - args.setReq(req); - sendBase("GetResultSetMetadata", args); - } - - public TGetResultSetMetadataResp recv_GetResultSetMetadata() throws org.apache.thrift.TException - { - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - receiveBase(result, "GetResultSetMetadata"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetResultSetMetadata failed: unknown result"); - } - - public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException - { - send_FetchResults(req); - return recv_FetchResults(); - } - - public void send_FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException - { - FetchResults_args args = new FetchResults_args(); - args.setReq(req); - sendBase("FetchResults", args); - } - - public TFetchResultsResp recv_FetchResults() throws org.apache.thrift.TException - { - FetchResults_result result = new FetchResults_result(); - receiveBase(result, "FetchResults"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "FetchResults failed: unknown result"); - } - - public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException - { - send_GetDelegationToken(req); - return recv_GetDelegationToken(); - } - - public void send_GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException - { - GetDelegationToken_args args = new GetDelegationToken_args(); - args.setReq(req); - sendBase("GetDelegationToken", args); - } - - public TGetDelegationTokenResp recv_GetDelegationToken() throws org.apache.thrift.TException - { - GetDelegationToken_result result = new GetDelegationToken_result(); - receiveBase(result, "GetDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetDelegationToken failed: unknown result"); - } - - public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException - { - send_CancelDelegationToken(req); - return recv_CancelDelegationToken(); - } - - public void send_CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException - { - CancelDelegationToken_args args = new CancelDelegationToken_args(); - args.setReq(req); - sendBase("CancelDelegationToken", args); - } - - public TCancelDelegationTokenResp recv_CancelDelegationToken() throws org.apache.thrift.TException - { - CancelDelegationToken_result result = new CancelDelegationToken_result(); - receiveBase(result, "CancelDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelDelegationToken failed: unknown result"); - } - - public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException - { - send_RenewDelegationToken(req); - return recv_RenewDelegationToken(); - } - - public void send_RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException - { - RenewDelegationToken_args args = new RenewDelegationToken_args(); - args.setReq(req); - sendBase("RenewDelegationToken", args); - } - - public TRenewDelegationTokenResp recv_RenewDelegationToken() throws org.apache.thrift.TException - { - RenewDelegationToken_result result = new RenewDelegationToken_result(); - receiveBase(result, "RenewDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "RenewDelegationToken failed: unknown result"); - } - - } - public static class AsyncClient extends org.apache.thrift.async.TAsyncClient implements AsyncIface { - public static class Factory implements org.apache.thrift.async.TAsyncClientFactory { - private org.apache.thrift.async.TAsyncClientManager clientManager; - private org.apache.thrift.protocol.TProtocolFactory protocolFactory; - public Factory(org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.protocol.TProtocolFactory protocolFactory) { - this.clientManager = clientManager; - this.protocolFactory = protocolFactory; - } - public AsyncClient getAsyncClient(org.apache.thrift.transport.TNonblockingTransport transport) { - return new AsyncClient(protocolFactory, clientManager, transport); - } - } - - public AsyncClient(org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.transport.TNonblockingTransport transport) { - super(protocolFactory, clientManager, transport); - } - - public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - OpenSession_call method_call = new OpenSession_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class OpenSession_call extends org.apache.thrift.async.TAsyncMethodCall { - private TOpenSessionReq req; - public OpenSession_call(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("OpenSession", org.apache.thrift.protocol.TMessageType.CALL, 0)); - OpenSession_args args = new OpenSession_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TOpenSessionResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_OpenSession(); - } - } - - public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CloseSession_call method_call = new CloseSession_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CloseSession_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCloseSessionReq req; - public CloseSession_call(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseSession", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CloseSession_args args = new CloseSession_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCloseSessionResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CloseSession(); - } - } - - public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetInfo_call method_call = new GetInfo_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetInfo_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetInfoReq req; - public GetInfo_call(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetInfo", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetInfo_args args = new GetInfo_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetInfoResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetInfo(); - } - } - - public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - ExecuteStatement_call method_call = new ExecuteStatement_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class ExecuteStatement_call extends org.apache.thrift.async.TAsyncMethodCall { - private TExecuteStatementReq req; - public ExecuteStatement_call(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("ExecuteStatement", org.apache.thrift.protocol.TMessageType.CALL, 0)); - ExecuteStatement_args args = new ExecuteStatement_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TExecuteStatementResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_ExecuteStatement(); - } - } - - public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTypeInfo_call method_call = new GetTypeInfo_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTypeInfo_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTypeInfoReq req; - public GetTypeInfo_call(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTypeInfo", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTypeInfo_args args = new GetTypeInfo_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTypeInfoResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTypeInfo(); - } - } - - public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetCatalogs_call method_call = new GetCatalogs_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetCatalogs_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetCatalogsReq req; - public GetCatalogs_call(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCatalogs", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetCatalogs_args args = new GetCatalogs_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetCatalogsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetCatalogs(); - } - } - - public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetSchemas_call method_call = new GetSchemas_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetSchemas_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetSchemasReq req; - public GetSchemas_call(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetSchemas", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetSchemas_args args = new GetSchemas_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetSchemasResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetSchemas(); - } - } - - public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTables_call method_call = new GetTables_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTables_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTablesReq req; - public GetTables_call(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTables", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTables_args args = new GetTables_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTablesResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTables(); - } - } - - public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTableTypes_call method_call = new GetTableTypes_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTableTypes_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTableTypesReq req; - public GetTableTypes_call(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTableTypes", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTableTypes_args args = new GetTableTypes_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTableTypesResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTableTypes(); - } - } - - public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetColumns_call method_call = new GetColumns_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetColumns_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetColumnsReq req; - public GetColumns_call(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetColumns", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetColumns_args args = new GetColumns_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetColumnsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetColumns(); - } - } - - public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetFunctions_call method_call = new GetFunctions_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetFunctions_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetFunctionsReq req; - public GetFunctions_call(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetFunctions", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetFunctions_args args = new GetFunctions_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetFunctionsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetFunctions(); - } - } - - public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetOperationStatus_call method_call = new GetOperationStatus_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetOperationStatus_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetOperationStatusReq req; - public GetOperationStatus_call(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetOperationStatus", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetOperationStatus_args args = new GetOperationStatus_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetOperationStatusResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetOperationStatus(); - } - } - - public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CancelOperation_call method_call = new CancelOperation_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CancelOperation_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCancelOperationReq req; - public CancelOperation_call(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelOperation", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CancelOperation_args args = new CancelOperation_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCancelOperationResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CancelOperation(); - } - } - - public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CloseOperation_call method_call = new CloseOperation_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CloseOperation_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCloseOperationReq req; - public CloseOperation_call(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseOperation", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CloseOperation_args args = new CloseOperation_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCloseOperationResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CloseOperation(); - } - } - - public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetResultSetMetadata_call method_call = new GetResultSetMetadata_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetResultSetMetadata_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetResultSetMetadataReq req; - public GetResultSetMetadata_call(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetResultSetMetadata", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetResultSetMetadata_args args = new GetResultSetMetadata_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetResultSetMetadataResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetResultSetMetadata(); - } - } - - public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - FetchResults_call method_call = new FetchResults_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class FetchResults_call extends org.apache.thrift.async.TAsyncMethodCall { - private TFetchResultsReq req; - public FetchResults_call(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("FetchResults", org.apache.thrift.protocol.TMessageType.CALL, 0)); - FetchResults_args args = new FetchResults_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TFetchResultsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_FetchResults(); - } - } - - public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetDelegationToken_call method_call = new GetDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetDelegationTokenReq req; - public GetDelegationToken_call(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetDelegationToken_args args = new GetDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetDelegationToken(); - } - } - - public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CancelDelegationToken_call method_call = new CancelDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CancelDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCancelDelegationTokenReq req; - public CancelDelegationToken_call(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CancelDelegationToken_args args = new CancelDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCancelDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CancelDelegationToken(); - } - } - - public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - RenewDelegationToken_call method_call = new RenewDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class RenewDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TRenewDelegationTokenReq req; - public RenewDelegationToken_call(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("RenewDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - RenewDelegationToken_args args = new RenewDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TRenewDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_RenewDelegationToken(); - } - } - - } - - public static class Processor extends org.apache.thrift.TBaseProcessor implements org.apache.thrift.TProcessor { - private static final Logger LOGGER = LoggerFactory.getLogger(Processor.class.getName()); - public Processor(I iface) { - super(iface, getProcessMap(new HashMap>())); - } - - protected Processor(I iface, Map> processMap) { - super(iface, getProcessMap(processMap)); - } - - private static Map> getProcessMap(Map> processMap) { - processMap.put("OpenSession", new OpenSession()); - processMap.put("CloseSession", new CloseSession()); - processMap.put("GetInfo", new GetInfo()); - processMap.put("ExecuteStatement", new ExecuteStatement()); - processMap.put("GetTypeInfo", new GetTypeInfo()); - processMap.put("GetCatalogs", new GetCatalogs()); - processMap.put("GetSchemas", new GetSchemas()); - processMap.put("GetTables", new GetTables()); - processMap.put("GetTableTypes", new GetTableTypes()); - processMap.put("GetColumns", new GetColumns()); - processMap.put("GetFunctions", new GetFunctions()); - processMap.put("GetOperationStatus", new GetOperationStatus()); - processMap.put("CancelOperation", new CancelOperation()); - processMap.put("CloseOperation", new CloseOperation()); - processMap.put("GetResultSetMetadata", new GetResultSetMetadata()); - processMap.put("FetchResults", new FetchResults()); - processMap.put("GetDelegationToken", new GetDelegationToken()); - processMap.put("CancelDelegationToken", new CancelDelegationToken()); - processMap.put("RenewDelegationToken", new RenewDelegationToken()); - return processMap; - } - - public static class OpenSession extends org.apache.thrift.ProcessFunction { - public OpenSession() { - super("OpenSession"); - } - - public OpenSession_args getEmptyArgsInstance() { - return new OpenSession_args(); - } - - protected boolean isOneway() { - return false; - } - - public OpenSession_result getResult(I iface, OpenSession_args args) throws org.apache.thrift.TException { - OpenSession_result result = new OpenSession_result(); - result.success = iface.OpenSession(args.req); - return result; - } - } - - public static class CloseSession extends org.apache.thrift.ProcessFunction { - public CloseSession() { - super("CloseSession"); - } - - public CloseSession_args getEmptyArgsInstance() { - return new CloseSession_args(); - } - - protected boolean isOneway() { - return false; - } - - public CloseSession_result getResult(I iface, CloseSession_args args) throws org.apache.thrift.TException { - CloseSession_result result = new CloseSession_result(); - result.success = iface.CloseSession(args.req); - return result; - } - } - - public static class GetInfo extends org.apache.thrift.ProcessFunction { - public GetInfo() { - super("GetInfo"); - } - - public GetInfo_args getEmptyArgsInstance() { - return new GetInfo_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetInfo_result getResult(I iface, GetInfo_args args) throws org.apache.thrift.TException { - GetInfo_result result = new GetInfo_result(); - result.success = iface.GetInfo(args.req); - return result; - } - } - - public static class ExecuteStatement extends org.apache.thrift.ProcessFunction { - public ExecuteStatement() { - super("ExecuteStatement"); - } - - public ExecuteStatement_args getEmptyArgsInstance() { - return new ExecuteStatement_args(); - } - - protected boolean isOneway() { - return false; - } - - public ExecuteStatement_result getResult(I iface, ExecuteStatement_args args) throws org.apache.thrift.TException { - ExecuteStatement_result result = new ExecuteStatement_result(); - result.success = iface.ExecuteStatement(args.req); - return result; - } - } - - public static class GetTypeInfo extends org.apache.thrift.ProcessFunction { - public GetTypeInfo() { - super("GetTypeInfo"); - } - - public GetTypeInfo_args getEmptyArgsInstance() { - return new GetTypeInfo_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTypeInfo_result getResult(I iface, GetTypeInfo_args args) throws org.apache.thrift.TException { - GetTypeInfo_result result = new GetTypeInfo_result(); - result.success = iface.GetTypeInfo(args.req); - return result; - } - } - - public static class GetCatalogs extends org.apache.thrift.ProcessFunction { - public GetCatalogs() { - super("GetCatalogs"); - } - - public GetCatalogs_args getEmptyArgsInstance() { - return new GetCatalogs_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetCatalogs_result getResult(I iface, GetCatalogs_args args) throws org.apache.thrift.TException { - GetCatalogs_result result = new GetCatalogs_result(); - result.success = iface.GetCatalogs(args.req); - return result; - } - } - - public static class GetSchemas extends org.apache.thrift.ProcessFunction { - public GetSchemas() { - super("GetSchemas"); - } - - public GetSchemas_args getEmptyArgsInstance() { - return new GetSchemas_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetSchemas_result getResult(I iface, GetSchemas_args args) throws org.apache.thrift.TException { - GetSchemas_result result = new GetSchemas_result(); - result.success = iface.GetSchemas(args.req); - return result; - } - } - - public static class GetTables extends org.apache.thrift.ProcessFunction { - public GetTables() { - super("GetTables"); - } - - public GetTables_args getEmptyArgsInstance() { - return new GetTables_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTables_result getResult(I iface, GetTables_args args) throws org.apache.thrift.TException { - GetTables_result result = new GetTables_result(); - result.success = iface.GetTables(args.req); - return result; - } - } - - public static class GetTableTypes extends org.apache.thrift.ProcessFunction { - public GetTableTypes() { - super("GetTableTypes"); - } - - public GetTableTypes_args getEmptyArgsInstance() { - return new GetTableTypes_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTableTypes_result getResult(I iface, GetTableTypes_args args) throws org.apache.thrift.TException { - GetTableTypes_result result = new GetTableTypes_result(); - result.success = iface.GetTableTypes(args.req); - return result; - } - } - - public static class GetColumns extends org.apache.thrift.ProcessFunction { - public GetColumns() { - super("GetColumns"); - } - - public GetColumns_args getEmptyArgsInstance() { - return new GetColumns_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetColumns_result getResult(I iface, GetColumns_args args) throws org.apache.thrift.TException { - GetColumns_result result = new GetColumns_result(); - result.success = iface.GetColumns(args.req); - return result; - } - } - - public static class GetFunctions extends org.apache.thrift.ProcessFunction { - public GetFunctions() { - super("GetFunctions"); - } - - public GetFunctions_args getEmptyArgsInstance() { - return new GetFunctions_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetFunctions_result getResult(I iface, GetFunctions_args args) throws org.apache.thrift.TException { - GetFunctions_result result = new GetFunctions_result(); - result.success = iface.GetFunctions(args.req); - return result; - } - } - - public static class GetOperationStatus extends org.apache.thrift.ProcessFunction { - public GetOperationStatus() { - super("GetOperationStatus"); - } - - public GetOperationStatus_args getEmptyArgsInstance() { - return new GetOperationStatus_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetOperationStatus_result getResult(I iface, GetOperationStatus_args args) throws org.apache.thrift.TException { - GetOperationStatus_result result = new GetOperationStatus_result(); - result.success = iface.GetOperationStatus(args.req); - return result; - } - } - - public static class CancelOperation extends org.apache.thrift.ProcessFunction { - public CancelOperation() { - super("CancelOperation"); - } - - public CancelOperation_args getEmptyArgsInstance() { - return new CancelOperation_args(); - } - - protected boolean isOneway() { - return false; - } - - public CancelOperation_result getResult(I iface, CancelOperation_args args) throws org.apache.thrift.TException { - CancelOperation_result result = new CancelOperation_result(); - result.success = iface.CancelOperation(args.req); - return result; - } - } - - public static class CloseOperation extends org.apache.thrift.ProcessFunction { - public CloseOperation() { - super("CloseOperation"); - } - - public CloseOperation_args getEmptyArgsInstance() { - return new CloseOperation_args(); - } - - protected boolean isOneway() { - return false; - } - - public CloseOperation_result getResult(I iface, CloseOperation_args args) throws org.apache.thrift.TException { - CloseOperation_result result = new CloseOperation_result(); - result.success = iface.CloseOperation(args.req); - return result; - } - } - - public static class GetResultSetMetadata extends org.apache.thrift.ProcessFunction { - public GetResultSetMetadata() { - super("GetResultSetMetadata"); - } - - public GetResultSetMetadata_args getEmptyArgsInstance() { - return new GetResultSetMetadata_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetResultSetMetadata_result getResult(I iface, GetResultSetMetadata_args args) throws org.apache.thrift.TException { - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - result.success = iface.GetResultSetMetadata(args.req); - return result; - } - } - - public static class FetchResults extends org.apache.thrift.ProcessFunction { - public FetchResults() { - super("FetchResults"); - } - - public FetchResults_args getEmptyArgsInstance() { - return new FetchResults_args(); - } - - protected boolean isOneway() { - return false; - } - - public FetchResults_result getResult(I iface, FetchResults_args args) throws org.apache.thrift.TException { - FetchResults_result result = new FetchResults_result(); - result.success = iface.FetchResults(args.req); - return result; - } - } - - public static class GetDelegationToken extends org.apache.thrift.ProcessFunction { - public GetDelegationToken() { - super("GetDelegationToken"); - } - - public GetDelegationToken_args getEmptyArgsInstance() { - return new GetDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetDelegationToken_result getResult(I iface, GetDelegationToken_args args) throws org.apache.thrift.TException { - GetDelegationToken_result result = new GetDelegationToken_result(); - result.success = iface.GetDelegationToken(args.req); - return result; - } - } - - public static class CancelDelegationToken extends org.apache.thrift.ProcessFunction { - public CancelDelegationToken() { - super("CancelDelegationToken"); - } - - public CancelDelegationToken_args getEmptyArgsInstance() { - return new CancelDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public CancelDelegationToken_result getResult(I iface, CancelDelegationToken_args args) throws org.apache.thrift.TException { - CancelDelegationToken_result result = new CancelDelegationToken_result(); - result.success = iface.CancelDelegationToken(args.req); - return result; - } - } - - public static class RenewDelegationToken extends org.apache.thrift.ProcessFunction { - public RenewDelegationToken() { - super("RenewDelegationToken"); - } - - public RenewDelegationToken_args getEmptyArgsInstance() { - return new RenewDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public RenewDelegationToken_result getResult(I iface, RenewDelegationToken_args args) throws org.apache.thrift.TException { - RenewDelegationToken_result result = new RenewDelegationToken_result(); - result.success = iface.RenewDelegationToken(args.req); - return result; - } - } - - } - - public static class OpenSession_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new OpenSession_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new OpenSession_argsTupleSchemeFactory()); - } - - private TOpenSessionReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_args.class, metaDataMap); - } - - public OpenSession_args() { - } - - public OpenSession_args( - TOpenSessionReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public OpenSession_args(OpenSession_args other) { - if (other.isSetReq()) { - this.req = new TOpenSessionReq(other.req); - } - } - - public OpenSession_args deepCopy() { - return new OpenSession_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TOpenSessionReq getReq() { - return this.req; - } - - public void setReq(TOpenSessionReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TOpenSessionReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof OpenSession_args) - return this.equals((OpenSession_args)that); - return false; - } - - public boolean equals(OpenSession_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(OpenSession_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - OpenSession_args typedOther = (OpenSession_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("OpenSession_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class OpenSession_argsStandardSchemeFactory implements SchemeFactory { - public OpenSession_argsStandardScheme getScheme() { - return new OpenSession_argsStandardScheme(); - } - } - - private static class OpenSession_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TOpenSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class OpenSession_argsTupleSchemeFactory implements SchemeFactory { - public OpenSession_argsTupleScheme getScheme() { - return new OpenSession_argsTupleScheme(); - } - } - - private static class OpenSession_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TOpenSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class OpenSession_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new OpenSession_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new OpenSession_resultTupleSchemeFactory()); - } - - private TOpenSessionResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_result.class, metaDataMap); - } - - public OpenSession_result() { - } - - public OpenSession_result( - TOpenSessionResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public OpenSession_result(OpenSession_result other) { - if (other.isSetSuccess()) { - this.success = new TOpenSessionResp(other.success); - } - } - - public OpenSession_result deepCopy() { - return new OpenSession_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TOpenSessionResp getSuccess() { - return this.success; - } - - public void setSuccess(TOpenSessionResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TOpenSessionResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof OpenSession_result) - return this.equals((OpenSession_result)that); - return false; - } - - public boolean equals(OpenSession_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(OpenSession_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - OpenSession_result typedOther = (OpenSession_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("OpenSession_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class OpenSession_resultStandardSchemeFactory implements SchemeFactory { - public OpenSession_resultStandardScheme getScheme() { - return new OpenSession_resultStandardScheme(); - } - } - - private static class OpenSession_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TOpenSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class OpenSession_resultTupleSchemeFactory implements SchemeFactory { - public OpenSession_resultTupleScheme getScheme() { - return new OpenSession_resultTupleScheme(); - } - } - - private static class OpenSession_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TOpenSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CloseSession_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseSession_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseSession_argsTupleSchemeFactory()); - } - - private TCloseSessionReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_args.class, metaDataMap); - } - - public CloseSession_args() { - } - - public CloseSession_args( - TCloseSessionReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CloseSession_args(CloseSession_args other) { - if (other.isSetReq()) { - this.req = new TCloseSessionReq(other.req); - } - } - - public CloseSession_args deepCopy() { - return new CloseSession_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCloseSessionReq getReq() { - return this.req; - } - - public void setReq(TCloseSessionReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCloseSessionReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseSession_args) - return this.equals((CloseSession_args)that); - return false; - } - - public boolean equals(CloseSession_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(CloseSession_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CloseSession_args typedOther = (CloseSession_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseSession_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseSession_argsStandardSchemeFactory implements SchemeFactory { - public CloseSession_argsStandardScheme getScheme() { - return new CloseSession_argsStandardScheme(); - } - } - - private static class CloseSession_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCloseSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseSession_argsTupleSchemeFactory implements SchemeFactory { - public CloseSession_argsTupleScheme getScheme() { - return new CloseSession_argsTupleScheme(); - } - } - - private static class CloseSession_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCloseSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CloseSession_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseSession_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseSession_resultTupleSchemeFactory()); - } - - private TCloseSessionResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_result.class, metaDataMap); - } - - public CloseSession_result() { - } - - public CloseSession_result( - TCloseSessionResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CloseSession_result(CloseSession_result other) { - if (other.isSetSuccess()) { - this.success = new TCloseSessionResp(other.success); - } - } - - public CloseSession_result deepCopy() { - return new CloseSession_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCloseSessionResp getSuccess() { - return this.success; - } - - public void setSuccess(TCloseSessionResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCloseSessionResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseSession_result) - return this.equals((CloseSession_result)that); - return false; - } - - public boolean equals(CloseSession_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(CloseSession_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CloseSession_result typedOther = (CloseSession_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseSession_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseSession_resultStandardSchemeFactory implements SchemeFactory { - public CloseSession_resultStandardScheme getScheme() { - return new CloseSession_resultStandardScheme(); - } - } - - private static class CloseSession_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCloseSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseSession_resultTupleSchemeFactory implements SchemeFactory { - public CloseSession_resultTupleScheme getScheme() { - return new CloseSession_resultTupleScheme(); - } - } - - private static class CloseSession_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCloseSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetInfo_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetInfo_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetInfo_argsTupleSchemeFactory()); - } - - private TGetInfoReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_args.class, metaDataMap); - } - - public GetInfo_args() { - } - - public GetInfo_args( - TGetInfoReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetInfo_args(GetInfo_args other) { - if (other.isSetReq()) { - this.req = new TGetInfoReq(other.req); - } - } - - public GetInfo_args deepCopy() { - return new GetInfo_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetInfoReq getReq() { - return this.req; - } - - public void setReq(TGetInfoReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetInfoReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetInfo_args) - return this.equals((GetInfo_args)that); - return false; - } - - public boolean equals(GetInfo_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetInfo_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetInfo_args typedOther = (GetInfo_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetInfo_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetInfo_argsStandardSchemeFactory implements SchemeFactory { - public GetInfo_argsStandardScheme getScheme() { - return new GetInfo_argsStandardScheme(); - } - } - - private static class GetInfo_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetInfo_argsTupleSchemeFactory implements SchemeFactory { - public GetInfo_argsTupleScheme getScheme() { - return new GetInfo_argsTupleScheme(); - } - } - - private static class GetInfo_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetInfo_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetInfo_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetInfo_resultTupleSchemeFactory()); - } - - private TGetInfoResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_result.class, metaDataMap); - } - - public GetInfo_result() { - } - - public GetInfo_result( - TGetInfoResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetInfo_result(GetInfo_result other) { - if (other.isSetSuccess()) { - this.success = new TGetInfoResp(other.success); - } - } - - public GetInfo_result deepCopy() { - return new GetInfo_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetInfoResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetInfoResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetInfoResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetInfo_result) - return this.equals((GetInfo_result)that); - return false; - } - - public boolean equals(GetInfo_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetInfo_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetInfo_result typedOther = (GetInfo_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetInfo_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetInfo_resultStandardSchemeFactory implements SchemeFactory { - public GetInfo_resultStandardScheme getScheme() { - return new GetInfo_resultStandardScheme(); - } - } - - private static class GetInfo_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetInfo_resultTupleSchemeFactory implements SchemeFactory { - public GetInfo_resultTupleScheme getScheme() { - return new GetInfo_resultTupleScheme(); - } - } - - private static class GetInfo_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class ExecuteStatement_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new ExecuteStatement_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new ExecuteStatement_argsTupleSchemeFactory()); - } - - private TExecuteStatementReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_args.class, metaDataMap); - } - - public ExecuteStatement_args() { - } - - public ExecuteStatement_args( - TExecuteStatementReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public ExecuteStatement_args(ExecuteStatement_args other) { - if (other.isSetReq()) { - this.req = new TExecuteStatementReq(other.req); - } - } - - public ExecuteStatement_args deepCopy() { - return new ExecuteStatement_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TExecuteStatementReq getReq() { - return this.req; - } - - public void setReq(TExecuteStatementReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TExecuteStatementReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof ExecuteStatement_args) - return this.equals((ExecuteStatement_args)that); - return false; - } - - public boolean equals(ExecuteStatement_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(ExecuteStatement_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - ExecuteStatement_args typedOther = (ExecuteStatement_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("ExecuteStatement_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class ExecuteStatement_argsStandardSchemeFactory implements SchemeFactory { - public ExecuteStatement_argsStandardScheme getScheme() { - return new ExecuteStatement_argsStandardScheme(); - } - } - - private static class ExecuteStatement_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TExecuteStatementReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class ExecuteStatement_argsTupleSchemeFactory implements SchemeFactory { - public ExecuteStatement_argsTupleScheme getScheme() { - return new ExecuteStatement_argsTupleScheme(); - } - } - - private static class ExecuteStatement_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TExecuteStatementReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class ExecuteStatement_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new ExecuteStatement_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new ExecuteStatement_resultTupleSchemeFactory()); - } - - private TExecuteStatementResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_result.class, metaDataMap); - } - - public ExecuteStatement_result() { - } - - public ExecuteStatement_result( - TExecuteStatementResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public ExecuteStatement_result(ExecuteStatement_result other) { - if (other.isSetSuccess()) { - this.success = new TExecuteStatementResp(other.success); - } - } - - public ExecuteStatement_result deepCopy() { - return new ExecuteStatement_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TExecuteStatementResp getSuccess() { - return this.success; - } - - public void setSuccess(TExecuteStatementResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TExecuteStatementResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof ExecuteStatement_result) - return this.equals((ExecuteStatement_result)that); - return false; - } - - public boolean equals(ExecuteStatement_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(ExecuteStatement_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - ExecuteStatement_result typedOther = (ExecuteStatement_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("ExecuteStatement_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class ExecuteStatement_resultStandardSchemeFactory implements SchemeFactory { - public ExecuteStatement_resultStandardScheme getScheme() { - return new ExecuteStatement_resultStandardScheme(); - } - } - - private static class ExecuteStatement_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TExecuteStatementResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class ExecuteStatement_resultTupleSchemeFactory implements SchemeFactory { - public ExecuteStatement_resultTupleScheme getScheme() { - return new ExecuteStatement_resultTupleScheme(); - } - } - - private static class ExecuteStatement_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TExecuteStatementResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTypeInfo_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTypeInfo_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTypeInfo_argsTupleSchemeFactory()); - } - - private TGetTypeInfoReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_args.class, metaDataMap); - } - - public GetTypeInfo_args() { - } - - public GetTypeInfo_args( - TGetTypeInfoReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTypeInfo_args(GetTypeInfo_args other) { - if (other.isSetReq()) { - this.req = new TGetTypeInfoReq(other.req); - } - } - - public GetTypeInfo_args deepCopy() { - return new GetTypeInfo_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTypeInfoReq getReq() { - return this.req; - } - - public void setReq(TGetTypeInfoReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTypeInfoReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTypeInfo_args) - return this.equals((GetTypeInfo_args)that); - return false; - } - - public boolean equals(GetTypeInfo_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetTypeInfo_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTypeInfo_args typedOther = (GetTypeInfo_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTypeInfo_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTypeInfo_argsStandardSchemeFactory implements SchemeFactory { - public GetTypeInfo_argsStandardScheme getScheme() { - return new GetTypeInfo_argsStandardScheme(); - } - } - - private static class GetTypeInfo_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTypeInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTypeInfo_argsTupleSchemeFactory implements SchemeFactory { - public GetTypeInfo_argsTupleScheme getScheme() { - return new GetTypeInfo_argsTupleScheme(); - } - } - - private static class GetTypeInfo_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTypeInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTypeInfo_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTypeInfo_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTypeInfo_resultTupleSchemeFactory()); - } - - private TGetTypeInfoResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_result.class, metaDataMap); - } - - public GetTypeInfo_result() { - } - - public GetTypeInfo_result( - TGetTypeInfoResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTypeInfo_result(GetTypeInfo_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTypeInfoResp(other.success); - } - } - - public GetTypeInfo_result deepCopy() { - return new GetTypeInfo_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTypeInfoResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTypeInfoResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTypeInfoResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTypeInfo_result) - return this.equals((GetTypeInfo_result)that); - return false; - } - - public boolean equals(GetTypeInfo_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetTypeInfo_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTypeInfo_result typedOther = (GetTypeInfo_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTypeInfo_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTypeInfo_resultStandardSchemeFactory implements SchemeFactory { - public GetTypeInfo_resultStandardScheme getScheme() { - return new GetTypeInfo_resultStandardScheme(); - } - } - - private static class GetTypeInfo_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTypeInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTypeInfo_resultTupleSchemeFactory implements SchemeFactory { - public GetTypeInfo_resultTupleScheme getScheme() { - return new GetTypeInfo_resultTupleScheme(); - } - } - - private static class GetTypeInfo_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTypeInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetCatalogs_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCatalogs_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCatalogs_argsTupleSchemeFactory()); - } - - private TGetCatalogsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_args.class, metaDataMap); - } - - public GetCatalogs_args() { - } - - public GetCatalogs_args( - TGetCatalogsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetCatalogs_args(GetCatalogs_args other) { - if (other.isSetReq()) { - this.req = new TGetCatalogsReq(other.req); - } - } - - public GetCatalogs_args deepCopy() { - return new GetCatalogs_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetCatalogsReq getReq() { - return this.req; - } - - public void setReq(TGetCatalogsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetCatalogsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCatalogs_args) - return this.equals((GetCatalogs_args)that); - return false; - } - - public boolean equals(GetCatalogs_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetCatalogs_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetCatalogs_args typedOther = (GetCatalogs_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCatalogs_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCatalogs_argsStandardSchemeFactory implements SchemeFactory { - public GetCatalogs_argsStandardScheme getScheme() { - return new GetCatalogs_argsStandardScheme(); - } - } - - private static class GetCatalogs_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetCatalogsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCatalogs_argsTupleSchemeFactory implements SchemeFactory { - public GetCatalogs_argsTupleScheme getScheme() { - return new GetCatalogs_argsTupleScheme(); - } - } - - private static class GetCatalogs_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetCatalogsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetCatalogs_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCatalogs_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCatalogs_resultTupleSchemeFactory()); - } - - private TGetCatalogsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_result.class, metaDataMap); - } - - public GetCatalogs_result() { - } - - public GetCatalogs_result( - TGetCatalogsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetCatalogs_result(GetCatalogs_result other) { - if (other.isSetSuccess()) { - this.success = new TGetCatalogsResp(other.success); - } - } - - public GetCatalogs_result deepCopy() { - return new GetCatalogs_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetCatalogsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetCatalogsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetCatalogsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCatalogs_result) - return this.equals((GetCatalogs_result)that); - return false; - } - - public boolean equals(GetCatalogs_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetCatalogs_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetCatalogs_result typedOther = (GetCatalogs_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCatalogs_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCatalogs_resultStandardSchemeFactory implements SchemeFactory { - public GetCatalogs_resultStandardScheme getScheme() { - return new GetCatalogs_resultStandardScheme(); - } - } - - private static class GetCatalogs_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetCatalogsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCatalogs_resultTupleSchemeFactory implements SchemeFactory { - public GetCatalogs_resultTupleScheme getScheme() { - return new GetCatalogs_resultTupleScheme(); - } - } - - private static class GetCatalogs_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetCatalogsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetSchemas_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetSchemas_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetSchemas_argsTupleSchemeFactory()); - } - - private TGetSchemasReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_args.class, metaDataMap); - } - - public GetSchemas_args() { - } - - public GetSchemas_args( - TGetSchemasReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetSchemas_args(GetSchemas_args other) { - if (other.isSetReq()) { - this.req = new TGetSchemasReq(other.req); - } - } - - public GetSchemas_args deepCopy() { - return new GetSchemas_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetSchemasReq getReq() { - return this.req; - } - - public void setReq(TGetSchemasReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetSchemasReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetSchemas_args) - return this.equals((GetSchemas_args)that); - return false; - } - - public boolean equals(GetSchemas_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetSchemas_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetSchemas_args typedOther = (GetSchemas_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetSchemas_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetSchemas_argsStandardSchemeFactory implements SchemeFactory { - public GetSchemas_argsStandardScheme getScheme() { - return new GetSchemas_argsStandardScheme(); - } - } - - private static class GetSchemas_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetSchemasReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetSchemas_argsTupleSchemeFactory implements SchemeFactory { - public GetSchemas_argsTupleScheme getScheme() { - return new GetSchemas_argsTupleScheme(); - } - } - - private static class GetSchemas_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetSchemasReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetSchemas_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetSchemas_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetSchemas_resultTupleSchemeFactory()); - } - - private TGetSchemasResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_result.class, metaDataMap); - } - - public GetSchemas_result() { - } - - public GetSchemas_result( - TGetSchemasResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetSchemas_result(GetSchemas_result other) { - if (other.isSetSuccess()) { - this.success = new TGetSchemasResp(other.success); - } - } - - public GetSchemas_result deepCopy() { - return new GetSchemas_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetSchemasResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetSchemasResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetSchemasResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetSchemas_result) - return this.equals((GetSchemas_result)that); - return false; - } - - public boolean equals(GetSchemas_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetSchemas_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetSchemas_result typedOther = (GetSchemas_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetSchemas_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetSchemas_resultStandardSchemeFactory implements SchemeFactory { - public GetSchemas_resultStandardScheme getScheme() { - return new GetSchemas_resultStandardScheme(); - } - } - - private static class GetSchemas_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetSchemasResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetSchemas_resultTupleSchemeFactory implements SchemeFactory { - public GetSchemas_resultTupleScheme getScheme() { - return new GetSchemas_resultTupleScheme(); - } - } - - private static class GetSchemas_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetSchemasResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTables_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTables_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTables_argsTupleSchemeFactory()); - } - - private TGetTablesReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_args.class, metaDataMap); - } - - public GetTables_args() { - } - - public GetTables_args( - TGetTablesReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTables_args(GetTables_args other) { - if (other.isSetReq()) { - this.req = new TGetTablesReq(other.req); - } - } - - public GetTables_args deepCopy() { - return new GetTables_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTablesReq getReq() { - return this.req; - } - - public void setReq(TGetTablesReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTablesReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTables_args) - return this.equals((GetTables_args)that); - return false; - } - - public boolean equals(GetTables_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetTables_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTables_args typedOther = (GetTables_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTables_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTables_argsStandardSchemeFactory implements SchemeFactory { - public GetTables_argsStandardScheme getScheme() { - return new GetTables_argsStandardScheme(); - } - } - - private static class GetTables_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTablesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTables_argsTupleSchemeFactory implements SchemeFactory { - public GetTables_argsTupleScheme getScheme() { - return new GetTables_argsTupleScheme(); - } - } - - private static class GetTables_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTablesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTables_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTables_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTables_resultTupleSchemeFactory()); - } - - private TGetTablesResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_result.class, metaDataMap); - } - - public GetTables_result() { - } - - public GetTables_result( - TGetTablesResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTables_result(GetTables_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTablesResp(other.success); - } - } - - public GetTables_result deepCopy() { - return new GetTables_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTablesResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTablesResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTablesResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTables_result) - return this.equals((GetTables_result)that); - return false; - } - - public boolean equals(GetTables_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetTables_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTables_result typedOther = (GetTables_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTables_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTables_resultStandardSchemeFactory implements SchemeFactory { - public GetTables_resultStandardScheme getScheme() { - return new GetTables_resultStandardScheme(); - } - } - - private static class GetTables_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTablesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTables_resultTupleSchemeFactory implements SchemeFactory { - public GetTables_resultTupleScheme getScheme() { - return new GetTables_resultTupleScheme(); - } - } - - private static class GetTables_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTablesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTableTypes_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTableTypes_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTableTypes_argsTupleSchemeFactory()); - } - - private TGetTableTypesReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_args.class, metaDataMap); - } - - public GetTableTypes_args() { - } - - public GetTableTypes_args( - TGetTableTypesReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTableTypes_args(GetTableTypes_args other) { - if (other.isSetReq()) { - this.req = new TGetTableTypesReq(other.req); - } - } - - public GetTableTypes_args deepCopy() { - return new GetTableTypes_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTableTypesReq getReq() { - return this.req; - } - - public void setReq(TGetTableTypesReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTableTypesReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTableTypes_args) - return this.equals((GetTableTypes_args)that); - return false; - } - - public boolean equals(GetTableTypes_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetTableTypes_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTableTypes_args typedOther = (GetTableTypes_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTableTypes_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTableTypes_argsStandardSchemeFactory implements SchemeFactory { - public GetTableTypes_argsStandardScheme getScheme() { - return new GetTableTypes_argsStandardScheme(); - } - } - - private static class GetTableTypes_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTableTypesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTableTypes_argsTupleSchemeFactory implements SchemeFactory { - public GetTableTypes_argsTupleScheme getScheme() { - return new GetTableTypes_argsTupleScheme(); - } - } - - private static class GetTableTypes_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTableTypesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTableTypes_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTableTypes_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTableTypes_resultTupleSchemeFactory()); - } - - private TGetTableTypesResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_result.class, metaDataMap); - } - - public GetTableTypes_result() { - } - - public GetTableTypes_result( - TGetTableTypesResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTableTypes_result(GetTableTypes_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTableTypesResp(other.success); - } - } - - public GetTableTypes_result deepCopy() { - return new GetTableTypes_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTableTypesResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTableTypesResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTableTypesResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTableTypes_result) - return this.equals((GetTableTypes_result)that); - return false; - } - - public boolean equals(GetTableTypes_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetTableTypes_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetTableTypes_result typedOther = (GetTableTypes_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTableTypes_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTableTypes_resultStandardSchemeFactory implements SchemeFactory { - public GetTableTypes_resultStandardScheme getScheme() { - return new GetTableTypes_resultStandardScheme(); - } - } - - private static class GetTableTypes_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTableTypesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTableTypes_resultTupleSchemeFactory implements SchemeFactory { - public GetTableTypes_resultTupleScheme getScheme() { - return new GetTableTypes_resultTupleScheme(); - } - } - - private static class GetTableTypes_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTableTypesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetColumns_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetColumns_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetColumns_argsTupleSchemeFactory()); - } - - private TGetColumnsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_args.class, metaDataMap); - } - - public GetColumns_args() { - } - - public GetColumns_args( - TGetColumnsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetColumns_args(GetColumns_args other) { - if (other.isSetReq()) { - this.req = new TGetColumnsReq(other.req); - } - } - - public GetColumns_args deepCopy() { - return new GetColumns_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetColumnsReq getReq() { - return this.req; - } - - public void setReq(TGetColumnsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetColumnsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetColumns_args) - return this.equals((GetColumns_args)that); - return false; - } - - public boolean equals(GetColumns_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetColumns_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetColumns_args typedOther = (GetColumns_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetColumns_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetColumns_argsStandardSchemeFactory implements SchemeFactory { - public GetColumns_argsStandardScheme getScheme() { - return new GetColumns_argsStandardScheme(); - } - } - - private static class GetColumns_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetColumnsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetColumns_argsTupleSchemeFactory implements SchemeFactory { - public GetColumns_argsTupleScheme getScheme() { - return new GetColumns_argsTupleScheme(); - } - } - - private static class GetColumns_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetColumnsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetColumns_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetColumns_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetColumns_resultTupleSchemeFactory()); - } - - private TGetColumnsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_result.class, metaDataMap); - } - - public GetColumns_result() { - } - - public GetColumns_result( - TGetColumnsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetColumns_result(GetColumns_result other) { - if (other.isSetSuccess()) { - this.success = new TGetColumnsResp(other.success); - } - } - - public GetColumns_result deepCopy() { - return new GetColumns_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetColumnsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetColumnsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetColumnsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetColumns_result) - return this.equals((GetColumns_result)that); - return false; - } - - public boolean equals(GetColumns_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetColumns_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetColumns_result typedOther = (GetColumns_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetColumns_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetColumns_resultStandardSchemeFactory implements SchemeFactory { - public GetColumns_resultStandardScheme getScheme() { - return new GetColumns_resultStandardScheme(); - } - } - - private static class GetColumns_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetColumnsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetColumns_resultTupleSchemeFactory implements SchemeFactory { - public GetColumns_resultTupleScheme getScheme() { - return new GetColumns_resultTupleScheme(); - } - } - - private static class GetColumns_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetColumnsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetFunctions_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetFunctions_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetFunctions_argsTupleSchemeFactory()); - } - - private TGetFunctionsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_args.class, metaDataMap); - } - - public GetFunctions_args() { - } - - public GetFunctions_args( - TGetFunctionsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetFunctions_args(GetFunctions_args other) { - if (other.isSetReq()) { - this.req = new TGetFunctionsReq(other.req); - } - } - - public GetFunctions_args deepCopy() { - return new GetFunctions_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetFunctionsReq getReq() { - return this.req; - } - - public void setReq(TGetFunctionsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetFunctionsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetFunctions_args) - return this.equals((GetFunctions_args)that); - return false; - } - - public boolean equals(GetFunctions_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetFunctions_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetFunctions_args typedOther = (GetFunctions_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetFunctions_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetFunctions_argsStandardSchemeFactory implements SchemeFactory { - public GetFunctions_argsStandardScheme getScheme() { - return new GetFunctions_argsStandardScheme(); - } - } - - private static class GetFunctions_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetFunctionsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetFunctions_argsTupleSchemeFactory implements SchemeFactory { - public GetFunctions_argsTupleScheme getScheme() { - return new GetFunctions_argsTupleScheme(); - } - } - - private static class GetFunctions_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetFunctionsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetFunctions_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetFunctions_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetFunctions_resultTupleSchemeFactory()); - } - - private TGetFunctionsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_result.class, metaDataMap); - } - - public GetFunctions_result() { - } - - public GetFunctions_result( - TGetFunctionsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetFunctions_result(GetFunctions_result other) { - if (other.isSetSuccess()) { - this.success = new TGetFunctionsResp(other.success); - } - } - - public GetFunctions_result deepCopy() { - return new GetFunctions_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetFunctionsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetFunctionsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetFunctionsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetFunctions_result) - return this.equals((GetFunctions_result)that); - return false; - } - - public boolean equals(GetFunctions_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetFunctions_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetFunctions_result typedOther = (GetFunctions_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetFunctions_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetFunctions_resultStandardSchemeFactory implements SchemeFactory { - public GetFunctions_resultStandardScheme getScheme() { - return new GetFunctions_resultStandardScheme(); - } - } - - private static class GetFunctions_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetFunctionsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetFunctions_resultTupleSchemeFactory implements SchemeFactory { - public GetFunctions_resultTupleScheme getScheme() { - return new GetFunctions_resultTupleScheme(); - } - } - - private static class GetFunctions_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetFunctionsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetOperationStatus_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetOperationStatus_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetOperationStatus_argsTupleSchemeFactory()); - } - - private TGetOperationStatusReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_args.class, metaDataMap); - } - - public GetOperationStatus_args() { - } - - public GetOperationStatus_args( - TGetOperationStatusReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetOperationStatus_args(GetOperationStatus_args other) { - if (other.isSetReq()) { - this.req = new TGetOperationStatusReq(other.req); - } - } - - public GetOperationStatus_args deepCopy() { - return new GetOperationStatus_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetOperationStatusReq getReq() { - return this.req; - } - - public void setReq(TGetOperationStatusReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetOperationStatusReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetOperationStatus_args) - return this.equals((GetOperationStatus_args)that); - return false; - } - - public boolean equals(GetOperationStatus_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetOperationStatus_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetOperationStatus_args typedOther = (GetOperationStatus_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetOperationStatus_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetOperationStatus_argsStandardSchemeFactory implements SchemeFactory { - public GetOperationStatus_argsStandardScheme getScheme() { - return new GetOperationStatus_argsStandardScheme(); - } - } - - private static class GetOperationStatus_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetOperationStatusReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetOperationStatus_argsTupleSchemeFactory implements SchemeFactory { - public GetOperationStatus_argsTupleScheme getScheme() { - return new GetOperationStatus_argsTupleScheme(); - } - } - - private static class GetOperationStatus_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetOperationStatusReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetOperationStatus_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetOperationStatus_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetOperationStatus_resultTupleSchemeFactory()); - } - - private TGetOperationStatusResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_result.class, metaDataMap); - } - - public GetOperationStatus_result() { - } - - public GetOperationStatus_result( - TGetOperationStatusResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetOperationStatus_result(GetOperationStatus_result other) { - if (other.isSetSuccess()) { - this.success = new TGetOperationStatusResp(other.success); - } - } - - public GetOperationStatus_result deepCopy() { - return new GetOperationStatus_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetOperationStatusResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetOperationStatusResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetOperationStatusResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetOperationStatus_result) - return this.equals((GetOperationStatus_result)that); - return false; - } - - public boolean equals(GetOperationStatus_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetOperationStatus_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetOperationStatus_result typedOther = (GetOperationStatus_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetOperationStatus_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetOperationStatus_resultStandardSchemeFactory implements SchemeFactory { - public GetOperationStatus_resultStandardScheme getScheme() { - return new GetOperationStatus_resultStandardScheme(); - } - } - - private static class GetOperationStatus_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetOperationStatusResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetOperationStatus_resultTupleSchemeFactory implements SchemeFactory { - public GetOperationStatus_resultTupleScheme getScheme() { - return new GetOperationStatus_resultTupleScheme(); - } - } - - private static class GetOperationStatus_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetOperationStatusResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CancelOperation_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelOperation_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelOperation_argsTupleSchemeFactory()); - } - - private TCancelOperationReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_args.class, metaDataMap); - } - - public CancelOperation_args() { - } - - public CancelOperation_args( - TCancelOperationReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CancelOperation_args(CancelOperation_args other) { - if (other.isSetReq()) { - this.req = new TCancelOperationReq(other.req); - } - } - - public CancelOperation_args deepCopy() { - return new CancelOperation_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCancelOperationReq getReq() { - return this.req; - } - - public void setReq(TCancelOperationReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCancelOperationReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelOperation_args) - return this.equals((CancelOperation_args)that); - return false; - } - - public boolean equals(CancelOperation_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(CancelOperation_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CancelOperation_args typedOther = (CancelOperation_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelOperation_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelOperation_argsStandardSchemeFactory implements SchemeFactory { - public CancelOperation_argsStandardScheme getScheme() { - return new CancelOperation_argsStandardScheme(); - } - } - - private static class CancelOperation_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCancelOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelOperation_argsTupleSchemeFactory implements SchemeFactory { - public CancelOperation_argsTupleScheme getScheme() { - return new CancelOperation_argsTupleScheme(); - } - } - - private static class CancelOperation_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCancelOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CancelOperation_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelOperation_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelOperation_resultTupleSchemeFactory()); - } - - private TCancelOperationResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_result.class, metaDataMap); - } - - public CancelOperation_result() { - } - - public CancelOperation_result( - TCancelOperationResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CancelOperation_result(CancelOperation_result other) { - if (other.isSetSuccess()) { - this.success = new TCancelOperationResp(other.success); - } - } - - public CancelOperation_result deepCopy() { - return new CancelOperation_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCancelOperationResp getSuccess() { - return this.success; - } - - public void setSuccess(TCancelOperationResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCancelOperationResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelOperation_result) - return this.equals((CancelOperation_result)that); - return false; - } - - public boolean equals(CancelOperation_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(CancelOperation_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CancelOperation_result typedOther = (CancelOperation_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelOperation_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelOperation_resultStandardSchemeFactory implements SchemeFactory { - public CancelOperation_resultStandardScheme getScheme() { - return new CancelOperation_resultStandardScheme(); - } - } - - private static class CancelOperation_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCancelOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelOperation_resultTupleSchemeFactory implements SchemeFactory { - public CancelOperation_resultTupleScheme getScheme() { - return new CancelOperation_resultTupleScheme(); - } - } - - private static class CancelOperation_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCancelOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CloseOperation_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseOperation_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseOperation_argsTupleSchemeFactory()); - } - - private TCloseOperationReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_args.class, metaDataMap); - } - - public CloseOperation_args() { - } - - public CloseOperation_args( - TCloseOperationReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CloseOperation_args(CloseOperation_args other) { - if (other.isSetReq()) { - this.req = new TCloseOperationReq(other.req); - } - } - - public CloseOperation_args deepCopy() { - return new CloseOperation_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCloseOperationReq getReq() { - return this.req; - } - - public void setReq(TCloseOperationReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCloseOperationReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseOperation_args) - return this.equals((CloseOperation_args)that); - return false; - } - - public boolean equals(CloseOperation_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(CloseOperation_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CloseOperation_args typedOther = (CloseOperation_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseOperation_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseOperation_argsStandardSchemeFactory implements SchemeFactory { - public CloseOperation_argsStandardScheme getScheme() { - return new CloseOperation_argsStandardScheme(); - } - } - - private static class CloseOperation_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCloseOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseOperation_argsTupleSchemeFactory implements SchemeFactory { - public CloseOperation_argsTupleScheme getScheme() { - return new CloseOperation_argsTupleScheme(); - } - } - - private static class CloseOperation_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCloseOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CloseOperation_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseOperation_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseOperation_resultTupleSchemeFactory()); - } - - private TCloseOperationResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_result.class, metaDataMap); - } - - public CloseOperation_result() { - } - - public CloseOperation_result( - TCloseOperationResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CloseOperation_result(CloseOperation_result other) { - if (other.isSetSuccess()) { - this.success = new TCloseOperationResp(other.success); - } - } - - public CloseOperation_result deepCopy() { - return new CloseOperation_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCloseOperationResp getSuccess() { - return this.success; - } - - public void setSuccess(TCloseOperationResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCloseOperationResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseOperation_result) - return this.equals((CloseOperation_result)that); - return false; - } - - public boolean equals(CloseOperation_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(CloseOperation_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CloseOperation_result typedOther = (CloseOperation_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseOperation_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseOperation_resultStandardSchemeFactory implements SchemeFactory { - public CloseOperation_resultStandardScheme getScheme() { - return new CloseOperation_resultStandardScheme(); - } - } - - private static class CloseOperation_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCloseOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseOperation_resultTupleSchemeFactory implements SchemeFactory { - public CloseOperation_resultTupleScheme getScheme() { - return new CloseOperation_resultTupleScheme(); - } - } - - private static class CloseOperation_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCloseOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetResultSetMetadata_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetResultSetMetadata_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetResultSetMetadata_argsTupleSchemeFactory()); - } - - private TGetResultSetMetadataReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_args.class, metaDataMap); - } - - public GetResultSetMetadata_args() { - } - - public GetResultSetMetadata_args( - TGetResultSetMetadataReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetResultSetMetadata_args(GetResultSetMetadata_args other) { - if (other.isSetReq()) { - this.req = new TGetResultSetMetadataReq(other.req); - } - } - - public GetResultSetMetadata_args deepCopy() { - return new GetResultSetMetadata_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetResultSetMetadataReq getReq() { - return this.req; - } - - public void setReq(TGetResultSetMetadataReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetResultSetMetadataReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetResultSetMetadata_args) - return this.equals((GetResultSetMetadata_args)that); - return false; - } - - public boolean equals(GetResultSetMetadata_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetResultSetMetadata_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetResultSetMetadata_args typedOther = (GetResultSetMetadata_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetResultSetMetadata_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetResultSetMetadata_argsStandardSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_argsStandardScheme getScheme() { - return new GetResultSetMetadata_argsStandardScheme(); - } - } - - private static class GetResultSetMetadata_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetResultSetMetadataReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetResultSetMetadata_argsTupleSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_argsTupleScheme getScheme() { - return new GetResultSetMetadata_argsTupleScheme(); - } - } - - private static class GetResultSetMetadata_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetResultSetMetadataReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetResultSetMetadata_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetResultSetMetadata_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetResultSetMetadata_resultTupleSchemeFactory()); - } - - private TGetResultSetMetadataResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_result.class, metaDataMap); - } - - public GetResultSetMetadata_result() { - } - - public GetResultSetMetadata_result( - TGetResultSetMetadataResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetResultSetMetadata_result(GetResultSetMetadata_result other) { - if (other.isSetSuccess()) { - this.success = new TGetResultSetMetadataResp(other.success); - } - } - - public GetResultSetMetadata_result deepCopy() { - return new GetResultSetMetadata_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetResultSetMetadataResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetResultSetMetadataResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetResultSetMetadataResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetResultSetMetadata_result) - return this.equals((GetResultSetMetadata_result)that); - return false; - } - - public boolean equals(GetResultSetMetadata_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetResultSetMetadata_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetResultSetMetadata_result typedOther = (GetResultSetMetadata_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetResultSetMetadata_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetResultSetMetadata_resultStandardSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_resultStandardScheme getScheme() { - return new GetResultSetMetadata_resultStandardScheme(); - } - } - - private static class GetResultSetMetadata_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetResultSetMetadataResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetResultSetMetadata_resultTupleSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_resultTupleScheme getScheme() { - return new GetResultSetMetadata_resultTupleScheme(); - } - } - - private static class GetResultSetMetadata_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetResultSetMetadataResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class FetchResults_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new FetchResults_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new FetchResults_argsTupleSchemeFactory()); - } - - private TFetchResultsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_args.class, metaDataMap); - } - - public FetchResults_args() { - } - - public FetchResults_args( - TFetchResultsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public FetchResults_args(FetchResults_args other) { - if (other.isSetReq()) { - this.req = new TFetchResultsReq(other.req); - } - } - - public FetchResults_args deepCopy() { - return new FetchResults_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TFetchResultsReq getReq() { - return this.req; - } - - public void setReq(TFetchResultsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TFetchResultsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof FetchResults_args) - return this.equals((FetchResults_args)that); - return false; - } - - public boolean equals(FetchResults_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(FetchResults_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - FetchResults_args typedOther = (FetchResults_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("FetchResults_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class FetchResults_argsStandardSchemeFactory implements SchemeFactory { - public FetchResults_argsStandardScheme getScheme() { - return new FetchResults_argsStandardScheme(); - } - } - - private static class FetchResults_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TFetchResultsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class FetchResults_argsTupleSchemeFactory implements SchemeFactory { - public FetchResults_argsTupleScheme getScheme() { - return new FetchResults_argsTupleScheme(); - } - } - - private static class FetchResults_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TFetchResultsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class FetchResults_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new FetchResults_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new FetchResults_resultTupleSchemeFactory()); - } - - private TFetchResultsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_result.class, metaDataMap); - } - - public FetchResults_result() { - } - - public FetchResults_result( - TFetchResultsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public FetchResults_result(FetchResults_result other) { - if (other.isSetSuccess()) { - this.success = new TFetchResultsResp(other.success); - } - } - - public FetchResults_result deepCopy() { - return new FetchResults_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TFetchResultsResp getSuccess() { - return this.success; - } - - public void setSuccess(TFetchResultsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TFetchResultsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof FetchResults_result) - return this.equals((FetchResults_result)that); - return false; - } - - public boolean equals(FetchResults_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(FetchResults_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - FetchResults_result typedOther = (FetchResults_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("FetchResults_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class FetchResults_resultStandardSchemeFactory implements SchemeFactory { - public FetchResults_resultStandardScheme getScheme() { - return new FetchResults_resultStandardScheme(); - } - } - - private static class FetchResults_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TFetchResultsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class FetchResults_resultTupleSchemeFactory implements SchemeFactory { - public FetchResults_resultTupleScheme getScheme() { - return new FetchResults_resultTupleScheme(); - } - } - - private static class FetchResults_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TFetchResultsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetDelegationToken_argsTupleSchemeFactory()); - } - - private TGetDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_args.class, metaDataMap); - } - - public GetDelegationToken_args() { - } - - public GetDelegationToken_args( - TGetDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetDelegationToken_args(GetDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TGetDelegationTokenReq(other.req); - } - } - - public GetDelegationToken_args deepCopy() { - return new GetDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TGetDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetDelegationToken_args) - return this.equals((GetDelegationToken_args)that); - return false; - } - - public boolean equals(GetDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(GetDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetDelegationToken_args typedOther = (GetDelegationToken_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public GetDelegationToken_argsStandardScheme getScheme() { - return new GetDelegationToken_argsStandardScheme(); - } - } - - private static class GetDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public GetDelegationToken_argsTupleScheme getScheme() { - return new GetDelegationToken_argsTupleScheme(); - } - } - - private static class GetDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetDelegationToken_resultTupleSchemeFactory()); - } - - private TGetDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_result.class, metaDataMap); - } - - public GetDelegationToken_result() { - } - - public GetDelegationToken_result( - TGetDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetDelegationToken_result(GetDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TGetDelegationTokenResp(other.success); - } - } - - public GetDelegationToken_result deepCopy() { - return new GetDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetDelegationToken_result) - return this.equals((GetDelegationToken_result)that); - return false; - } - - public boolean equals(GetDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(GetDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - GetDelegationToken_result typedOther = (GetDelegationToken_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public GetDelegationToken_resultStandardScheme getScheme() { - return new GetDelegationToken_resultStandardScheme(); - } - } - - private static class GetDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public GetDelegationToken_resultTupleScheme getScheme() { - return new GetDelegationToken_resultTupleScheme(); - } - } - - private static class GetDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CancelDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelDelegationToken_argsTupleSchemeFactory()); - } - - private TCancelDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_args.class, metaDataMap); - } - - public CancelDelegationToken_args() { - } - - public CancelDelegationToken_args( - TCancelDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CancelDelegationToken_args(CancelDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TCancelDelegationTokenReq(other.req); - } - } - - public CancelDelegationToken_args deepCopy() { - return new CancelDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCancelDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TCancelDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCancelDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelDelegationToken_args) - return this.equals((CancelDelegationToken_args)that); - return false; - } - - public boolean equals(CancelDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(CancelDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CancelDelegationToken_args typedOther = (CancelDelegationToken_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public CancelDelegationToken_argsStandardScheme getScheme() { - return new CancelDelegationToken_argsStandardScheme(); - } - } - - private static class CancelDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCancelDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public CancelDelegationToken_argsTupleScheme getScheme() { - return new CancelDelegationToken_argsTupleScheme(); - } - } - - private static class CancelDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCancelDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CancelDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelDelegationToken_resultTupleSchemeFactory()); - } - - private TCancelDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_result.class, metaDataMap); - } - - public CancelDelegationToken_result() { - } - - public CancelDelegationToken_result( - TCancelDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CancelDelegationToken_result(CancelDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TCancelDelegationTokenResp(other.success); - } - } - - public CancelDelegationToken_result deepCopy() { - return new CancelDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCancelDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TCancelDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCancelDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelDelegationToken_result) - return this.equals((CancelDelegationToken_result)that); - return false; - } - - public boolean equals(CancelDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(CancelDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - CancelDelegationToken_result typedOther = (CancelDelegationToken_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public CancelDelegationToken_resultStandardScheme getScheme() { - return new CancelDelegationToken_resultStandardScheme(); - } - } - - private static class CancelDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCancelDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public CancelDelegationToken_resultTupleScheme getScheme() { - return new CancelDelegationToken_resultTupleScheme(); - } - } - - private static class CancelDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCancelDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class RenewDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new RenewDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new RenewDelegationToken_argsTupleSchemeFactory()); - } - - private TRenewDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_args.class, metaDataMap); - } - - public RenewDelegationToken_args() { - } - - public RenewDelegationToken_args( - TRenewDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public RenewDelegationToken_args(RenewDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TRenewDelegationTokenReq(other.req); - } - } - - public RenewDelegationToken_args deepCopy() { - return new RenewDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TRenewDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TRenewDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TRenewDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof RenewDelegationToken_args) - return this.equals((RenewDelegationToken_args)that); - return false; - } - - public boolean equals(RenewDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_req = true && (isSetReq()); - builder.append(present_req); - if (present_req) - builder.append(req); - - return builder.toHashCode(); - } - - public int compareTo(RenewDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - RenewDelegationToken_args typedOther = (RenewDelegationToken_args)other; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("RenewDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class RenewDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public RenewDelegationToken_argsStandardScheme getScheme() { - return new RenewDelegationToken_argsStandardScheme(); - } - } - - private static class RenewDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TRenewDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class RenewDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public RenewDelegationToken_argsTupleScheme getScheme() { - return new RenewDelegationToken_argsTupleScheme(); - } - } - - private static class RenewDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TRenewDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class RenewDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new RenewDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new RenewDelegationToken_resultTupleSchemeFactory()); - } - - private TRenewDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_result.class, metaDataMap); - } - - public RenewDelegationToken_result() { - } - - public RenewDelegationToken_result( - TRenewDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public RenewDelegationToken_result(RenewDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TRenewDelegationTokenResp(other.success); - } - } - - public RenewDelegationToken_result deepCopy() { - return new RenewDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TRenewDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TRenewDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TRenewDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof RenewDelegationToken_result) - return this.equals((RenewDelegationToken_result)that); - return false; - } - - public boolean equals(RenewDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_success = true && (isSetSuccess()); - builder.append(present_success); - if (present_success) - builder.append(success); - - return builder.toHashCode(); - } - - public int compareTo(RenewDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - RenewDelegationToken_result typedOther = (RenewDelegationToken_result)other; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("RenewDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class RenewDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public RenewDelegationToken_resultStandardScheme getScheme() { - return new RenewDelegationToken_resultStandardScheme(); - } - } - - private static class RenewDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TRenewDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class RenewDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public RenewDelegationToken_resultTupleScheme getScheme() { - return new RenewDelegationToken_resultTupleScheme(); - } - } - - private static class RenewDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TRenewDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java deleted file mode 100644 index 25a38b178428a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCLIServiceConstants { - - public static final Set PRIMITIVE_TYPES = new HashSet(); - static { - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE); - } - - public static final Set COMPLEX_TYPES = new HashSet(); - static { - COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.USER_DEFINED_TYPE); - } - - public static final Set COLLECTION_TYPES = new HashSet(); - static { - COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE); - COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE); - } - - public static final Map TYPE_NAMES = new HashMap(); - static { - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE, "BOOLEAN"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE, "TINYINT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE, "SMALLINT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE, "INT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE, "BIGINT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE, "FLOAT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE, "DOUBLE"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE, "STRING"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE, "TIMESTAMP"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE, "BINARY"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE, "ARRAY"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE, "MAP"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE, "STRUCT"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE, "UNIONTYPE"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE, "DECIMAL"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE, "NULL"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE, "DATE"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE, "CHAR"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH"); - TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME"); - } - - public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"; - - public static final String PRECISION = "precision"; - - public static final String SCALE = "scale"; - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java deleted file mode 100644 index e23fcdd77a1a4..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java +++ /dev/null @@ -1,491 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCancelDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String delegationToken; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenReq.class, metaDataMap); - } - - public TCancelDelegationTokenReq() { - } - - public TCancelDelegationTokenReq( - TSessionHandle sessionHandle, - String delegationToken) - { - this(); - this.sessionHandle = sessionHandle; - this.delegationToken = delegationToken; - } - - /** - * Performs a deep copy on other. - */ - public TCancelDelegationTokenReq(TCancelDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TCancelDelegationTokenReq deepCopy() { - return new TCancelDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.delegationToken = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelDelegationTokenReq) - return this.equals((TCancelDelegationTokenReq)that); - return false; - } - - public boolean equals(TCancelDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_delegationToken = true && (isSetDelegationToken()); - builder.append(present_delegationToken); - if (present_delegationToken) - builder.append(delegationToken); - - return builder.toHashCode(); - } - - public int compareTo(TCancelDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCancelDelegationTokenReq typedOther = (TCancelDelegationTokenReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetDelegationToken()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenReqStandardScheme getScheme() { - return new TCancelDelegationTokenReqStandardScheme(); - } - } - - private static class TCancelDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenReqTupleScheme getScheme() { - return new TCancelDelegationTokenReqTupleScheme(); - } - } - - private static class TCancelDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.delegationToken); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java deleted file mode 100644 index 77c9ee77ec59b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCancelDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenResp.class, metaDataMap); - } - - public TCancelDelegationTokenResp() { - } - - public TCancelDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCancelDelegationTokenResp(TCancelDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCancelDelegationTokenResp deepCopy() { - return new TCancelDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelDelegationTokenResp) - return this.equals((TCancelDelegationTokenResp)that); - return false; - } - - public boolean equals(TCancelDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - return builder.toHashCode(); - } - - public int compareTo(TCancelDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCancelDelegationTokenResp typedOther = (TCancelDelegationTokenResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenRespStandardScheme getScheme() { - return new TCancelDelegationTokenRespStandardScheme(); - } - } - - private static class TCancelDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenRespTupleScheme getScheme() { - return new TCancelDelegationTokenRespTupleScheme(); - } - } - - private static class TCancelDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java deleted file mode 100644 index 45eac48ab12d3..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCancelOperationReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelOperationReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelOperationReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationReq.class, metaDataMap); - } - - public TCancelOperationReq() { - } - - public TCancelOperationReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCancelOperationReq(TCancelOperationReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TCancelOperationReq deepCopy() { - return new TCancelOperationReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelOperationReq) - return this.equals((TCancelOperationReq)that); - return false; - } - - public boolean equals(TCancelOperationReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TCancelOperationReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCancelOperationReq typedOther = (TCancelOperationReq)other; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelOperationReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelOperationReqStandardSchemeFactory implements SchemeFactory { - public TCancelOperationReqStandardScheme getScheme() { - return new TCancelOperationReqStandardScheme(); - } - } - - private static class TCancelOperationReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelOperationReqTupleSchemeFactory implements SchemeFactory { - public TCancelOperationReqTupleScheme getScheme() { - return new TCancelOperationReqTupleScheme(); - } - } - - private static class TCancelOperationReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java deleted file mode 100644 index 2a39414d601aa..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCancelOperationResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelOperationRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelOperationRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationResp.class, metaDataMap); - } - - public TCancelOperationResp() { - } - - public TCancelOperationResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCancelOperationResp(TCancelOperationResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCancelOperationResp deepCopy() { - return new TCancelOperationResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelOperationResp) - return this.equals((TCancelOperationResp)that); - return false; - } - - public boolean equals(TCancelOperationResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - return builder.toHashCode(); - } - - public int compareTo(TCancelOperationResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCancelOperationResp typedOther = (TCancelOperationResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelOperationResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelOperationRespStandardSchemeFactory implements SchemeFactory { - public TCancelOperationRespStandardScheme getScheme() { - return new TCancelOperationRespStandardScheme(); - } - } - - private static class TCancelOperationRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelOperationRespTupleSchemeFactory implements SchemeFactory { - public TCancelOperationRespTupleScheme getScheme() { - return new TCancelOperationRespTupleScheme(); - } - } - - private static class TCancelOperationRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java deleted file mode 100644 index 0cbb7ccced073..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCloseOperationReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseOperationReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseOperationReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationReq.class, metaDataMap); - } - - public TCloseOperationReq() { - } - - public TCloseOperationReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCloseOperationReq(TCloseOperationReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TCloseOperationReq deepCopy() { - return new TCloseOperationReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseOperationReq) - return this.equals((TCloseOperationReq)that); - return false; - } - - public boolean equals(TCloseOperationReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TCloseOperationReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCloseOperationReq typedOther = (TCloseOperationReq)other; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseOperationReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseOperationReqStandardSchemeFactory implements SchemeFactory { - public TCloseOperationReqStandardScheme getScheme() { - return new TCloseOperationReqStandardScheme(); - } - } - - private static class TCloseOperationReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseOperationReqTupleSchemeFactory implements SchemeFactory { - public TCloseOperationReqTupleScheme getScheme() { - return new TCloseOperationReqTupleScheme(); - } - } - - private static class TCloseOperationReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java deleted file mode 100644 index 7334d67173d7b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCloseOperationResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseOperationRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseOperationRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationResp.class, metaDataMap); - } - - public TCloseOperationResp() { - } - - public TCloseOperationResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCloseOperationResp(TCloseOperationResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCloseOperationResp deepCopy() { - return new TCloseOperationResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseOperationResp) - return this.equals((TCloseOperationResp)that); - return false; - } - - public boolean equals(TCloseOperationResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - return builder.toHashCode(); - } - - public int compareTo(TCloseOperationResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCloseOperationResp typedOther = (TCloseOperationResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseOperationResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseOperationRespStandardSchemeFactory implements SchemeFactory { - public TCloseOperationRespStandardScheme getScheme() { - return new TCloseOperationRespStandardScheme(); - } - } - - private static class TCloseOperationRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseOperationRespTupleSchemeFactory implements SchemeFactory { - public TCloseOperationRespTupleScheme getScheme() { - return new TCloseOperationRespTupleScheme(); - } - } - - private static class TCloseOperationRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java deleted file mode 100644 index 027e8295436b0..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCloseSessionReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseSessionReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseSessionReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionReq.class, metaDataMap); - } - - public TCloseSessionReq() { - } - - public TCloseSessionReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCloseSessionReq(TCloseSessionReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TCloseSessionReq deepCopy() { - return new TCloseSessionReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseSessionReq) - return this.equals((TCloseSessionReq)that); - return false; - } - - public boolean equals(TCloseSessionReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - return builder.toHashCode(); - } - - public int compareTo(TCloseSessionReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCloseSessionReq typedOther = (TCloseSessionReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseSessionReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseSessionReqStandardSchemeFactory implements SchemeFactory { - public TCloseSessionReqStandardScheme getScheme() { - return new TCloseSessionReqStandardScheme(); - } - } - - private static class TCloseSessionReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseSessionReqTupleSchemeFactory implements SchemeFactory { - public TCloseSessionReqTupleScheme getScheme() { - return new TCloseSessionReqTupleScheme(); - } - } - - private static class TCloseSessionReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java deleted file mode 100644 index 168c8fc775e33..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TCloseSessionResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseSessionRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseSessionRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionResp.class, metaDataMap); - } - - public TCloseSessionResp() { - } - - public TCloseSessionResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCloseSessionResp(TCloseSessionResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCloseSessionResp deepCopy() { - return new TCloseSessionResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseSessionResp) - return this.equals((TCloseSessionResp)that); - return false; - } - - public boolean equals(TCloseSessionResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - return builder.toHashCode(); - } - - public int compareTo(TCloseSessionResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TCloseSessionResp typedOther = (TCloseSessionResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseSessionResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseSessionRespStandardSchemeFactory implements SchemeFactory { - public TCloseSessionRespStandardScheme getScheme() { - return new TCloseSessionRespStandardScheme(); - } - } - - private static class TCloseSessionRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseSessionRespTupleSchemeFactory implements SchemeFactory { - public TCloseSessionRespTupleScheme getScheme() { - return new TCloseSessionRespTupleScheme(); - } - } - - private static class TCloseSessionRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java deleted file mode 100644 index fc2171dc99e4c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java +++ /dev/null @@ -1,732 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TColumn extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumn"); - private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6); - private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7); - private static final org.apache.thrift.protocol.TField BINARY_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryVal", org.apache.thrift.protocol.TType.STRUCT, (short)8); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - BOOL_VAL((short)1, "boolVal"), - BYTE_VAL((short)2, "byteVal"), - I16_VAL((short)3, "i16Val"), - I32_VAL((short)4, "i32Val"), - I64_VAL((short)5, "i64Val"), - DOUBLE_VAL((short)6, "doubleVal"), - STRING_VAL((short)7, "stringVal"), - BINARY_VAL((short)8, "binaryVal"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // BOOL_VAL - return BOOL_VAL; - case 2: // BYTE_VAL - return BYTE_VAL; - case 3: // I16_VAL - return I16_VAL; - case 4: // I32_VAL - return I32_VAL; - case 5: // I64_VAL - return I64_VAL; - case 6: // DOUBLE_VAL - return DOUBLE_VAL; - case 7: // STRING_VAL - return STRING_VAL; - case 8: // BINARY_VAL - return BINARY_VAL; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolColumn.class))); - tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteColumn.class))); - tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Column.class))); - tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Column.class))); - tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Column.class))); - tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleColumn.class))); - tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringColumn.class))); - tmpMap.put(_Fields.BINARY_VAL, new org.apache.thrift.meta_data.FieldMetaData("binaryVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBinaryColumn.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumn.class, metaDataMap); - } - - public TColumn() { - super(); - } - - public TColumn(TColumn._Fields setField, Object value) { - super(setField, value); - } - - public TColumn(TColumn other) { - super(other); - } - public TColumn deepCopy() { - return new TColumn(this); - } - - public static TColumn boolVal(TBoolColumn value) { - TColumn x = new TColumn(); - x.setBoolVal(value); - return x; - } - - public static TColumn byteVal(TByteColumn value) { - TColumn x = new TColumn(); - x.setByteVal(value); - return x; - } - - public static TColumn i16Val(TI16Column value) { - TColumn x = new TColumn(); - x.setI16Val(value); - return x; - } - - public static TColumn i32Val(TI32Column value) { - TColumn x = new TColumn(); - x.setI32Val(value); - return x; - } - - public static TColumn i64Val(TI64Column value) { - TColumn x = new TColumn(); - x.setI64Val(value); - return x; - } - - public static TColumn doubleVal(TDoubleColumn value) { - TColumn x = new TColumn(); - x.setDoubleVal(value); - return x; - } - - public static TColumn stringVal(TStringColumn value) { - TColumn x = new TColumn(); - x.setStringVal(value); - return x; - } - - public static TColumn binaryVal(TBinaryColumn value) { - TColumn x = new TColumn(); - x.setBinaryVal(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case BOOL_VAL: - if (value instanceof TBoolColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TBoolColumn for field 'boolVal', but got " + value.getClass().getSimpleName()); - case BYTE_VAL: - if (value instanceof TByteColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TByteColumn for field 'byteVal', but got " + value.getClass().getSimpleName()); - case I16_VAL: - if (value instanceof TI16Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI16Column for field 'i16Val', but got " + value.getClass().getSimpleName()); - case I32_VAL: - if (value instanceof TI32Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI32Column for field 'i32Val', but got " + value.getClass().getSimpleName()); - case I64_VAL: - if (value instanceof TI64Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI64Column for field 'i64Val', but got " + value.getClass().getSimpleName()); - case DOUBLE_VAL: - if (value instanceof TDoubleColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TDoubleColumn for field 'doubleVal', but got " + value.getClass().getSimpleName()); - case STRING_VAL: - if (value instanceof TStringColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TStringColumn for field 'stringVal', but got " + value.getClass().getSimpleName()); - case BINARY_VAL: - if (value instanceof TBinaryColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TBinaryColumn for field 'binaryVal', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - if (field.type == BOOL_VAL_FIELD_DESC.type) { - TBoolColumn boolVal; - boolVal = new TBoolColumn(); - boolVal.read(iprot); - return boolVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BYTE_VAL: - if (field.type == BYTE_VAL_FIELD_DESC.type) { - TByteColumn byteVal; - byteVal = new TByteColumn(); - byteVal.read(iprot); - return byteVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I16_VAL: - if (field.type == I16_VAL_FIELD_DESC.type) { - TI16Column i16Val; - i16Val = new TI16Column(); - i16Val.read(iprot); - return i16Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I32_VAL: - if (field.type == I32_VAL_FIELD_DESC.type) { - TI32Column i32Val; - i32Val = new TI32Column(); - i32Val.read(iprot); - return i32Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I64_VAL: - if (field.type == I64_VAL_FIELD_DESC.type) { - TI64Column i64Val; - i64Val = new TI64Column(); - i64Val.read(iprot); - return i64Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case DOUBLE_VAL: - if (field.type == DOUBLE_VAL_FIELD_DESC.type) { - TDoubleColumn doubleVal; - doubleVal = new TDoubleColumn(); - doubleVal.read(iprot); - return doubleVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VAL: - if (field.type == STRING_VAL_FIELD_DESC.type) { - TStringColumn stringVal; - stringVal = new TStringColumn(); - stringVal.read(iprot); - return stringVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BINARY_VAL: - if (field.type == BINARY_VAL_FIELD_DESC.type) { - TBinaryColumn binaryVal; - binaryVal = new TBinaryColumn(); - binaryVal.read(iprot); - return binaryVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolColumn boolVal = (TBoolColumn)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteColumn byteVal = (TByteColumn)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Column i16Val = (TI16Column)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Column i32Val = (TI32Column)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Column i64Val = (TI64Column)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleColumn doubleVal = (TDoubleColumn)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringColumn stringVal = (TStringColumn)value_; - stringVal.write(oprot); - return; - case BINARY_VAL: - TBinaryColumn binaryVal = (TBinaryColumn)value_; - binaryVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - TBoolColumn boolVal; - boolVal = new TBoolColumn(); - boolVal.read(iprot); - return boolVal; - case BYTE_VAL: - TByteColumn byteVal; - byteVal = new TByteColumn(); - byteVal.read(iprot); - return byteVal; - case I16_VAL: - TI16Column i16Val; - i16Val = new TI16Column(); - i16Val.read(iprot); - return i16Val; - case I32_VAL: - TI32Column i32Val; - i32Val = new TI32Column(); - i32Val.read(iprot); - return i32Val; - case I64_VAL: - TI64Column i64Val; - i64Val = new TI64Column(); - i64Val.read(iprot); - return i64Val; - case DOUBLE_VAL: - TDoubleColumn doubleVal; - doubleVal = new TDoubleColumn(); - doubleVal.read(iprot); - return doubleVal; - case STRING_VAL: - TStringColumn stringVal; - stringVal = new TStringColumn(); - stringVal.read(iprot); - return stringVal; - case BINARY_VAL: - TBinaryColumn binaryVal; - binaryVal = new TBinaryColumn(); - binaryVal.read(iprot); - return binaryVal; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolColumn boolVal = (TBoolColumn)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteColumn byteVal = (TByteColumn)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Column i16Val = (TI16Column)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Column i32Val = (TI32Column)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Column i64Val = (TI64Column)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleColumn doubleVal = (TDoubleColumn)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringColumn stringVal = (TStringColumn)value_; - stringVal.write(oprot); - return; - case BINARY_VAL: - TBinaryColumn binaryVal = (TBinaryColumn)value_; - binaryVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case BOOL_VAL: - return BOOL_VAL_FIELD_DESC; - case BYTE_VAL: - return BYTE_VAL_FIELD_DESC; - case I16_VAL: - return I16_VAL_FIELD_DESC; - case I32_VAL: - return I32_VAL_FIELD_DESC; - case I64_VAL: - return I64_VAL_FIELD_DESC; - case DOUBLE_VAL: - return DOUBLE_VAL_FIELD_DESC; - case STRING_VAL: - return STRING_VAL_FIELD_DESC; - case BINARY_VAL: - return BINARY_VAL_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TBoolColumn getBoolVal() { - if (getSetField() == _Fields.BOOL_VAL) { - return (TBoolColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBoolVal(TBoolColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BOOL_VAL; - value_ = value; - } - - public TByteColumn getByteVal() { - if (getSetField() == _Fields.BYTE_VAL) { - return (TByteColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setByteVal(TByteColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BYTE_VAL; - value_ = value; - } - - public TI16Column getI16Val() { - if (getSetField() == _Fields.I16_VAL) { - return (TI16Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI16Val(TI16Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I16_VAL; - value_ = value; - } - - public TI32Column getI32Val() { - if (getSetField() == _Fields.I32_VAL) { - return (TI32Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Val(TI32Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I32_VAL; - value_ = value; - } - - public TI64Column getI64Val() { - if (getSetField() == _Fields.I64_VAL) { - return (TI64Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI64Val(TI64Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I64_VAL; - value_ = value; - } - - public TDoubleColumn getDoubleVal() { - if (getSetField() == _Fields.DOUBLE_VAL) { - return (TDoubleColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setDoubleVal(TDoubleColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.DOUBLE_VAL; - value_ = value; - } - - public TStringColumn getStringVal() { - if (getSetField() == _Fields.STRING_VAL) { - return (TStringColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringVal(TStringColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VAL; - value_ = value; - } - - public TBinaryColumn getBinaryVal() { - if (getSetField() == _Fields.BINARY_VAL) { - return (TBinaryColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'binaryVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBinaryVal(TBinaryColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BINARY_VAL; - value_ = value; - } - - public boolean isSetBoolVal() { - return setField_ == _Fields.BOOL_VAL; - } - - - public boolean isSetByteVal() { - return setField_ == _Fields.BYTE_VAL; - } - - - public boolean isSetI16Val() { - return setField_ == _Fields.I16_VAL; - } - - - public boolean isSetI32Val() { - return setField_ == _Fields.I32_VAL; - } - - - public boolean isSetI64Val() { - return setField_ == _Fields.I64_VAL; - } - - - public boolean isSetDoubleVal() { - return setField_ == _Fields.DOUBLE_VAL; - } - - - public boolean isSetStringVal() { - return setField_ == _Fields.STRING_VAL; - } - - - public boolean isSetBinaryVal() { - return setField_ == _Fields.BINARY_VAL; - } - - - public boolean equals(Object other) { - if (other instanceof TColumn) { - return equals((TColumn)other); - } else { - return false; - } - } - - public boolean equals(TColumn other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TColumn other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(); - hcb.append(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - hcb.append(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - hcb.append(value); - } - } - return hcb.toHashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java deleted file mode 100644 index 247db6489457f..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java +++ /dev/null @@ -1,700 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TColumnDesc implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnDesc"); - - private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField TYPE_DESC_FIELD_DESC = new org.apache.thrift.protocol.TField("typeDesc", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField POSITION_FIELD_DESC = new org.apache.thrift.protocol.TField("position", org.apache.thrift.protocol.TType.I32, (short)3); - private static final org.apache.thrift.protocol.TField COMMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("comment", org.apache.thrift.protocol.TType.STRING, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TColumnDescStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TColumnDescTupleSchemeFactory()); - } - - private String columnName; // required - private TTypeDesc typeDesc; // required - private int position; // required - private String comment; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COLUMN_NAME((short)1, "columnName"), - TYPE_DESC((short)2, "typeDesc"), - POSITION((short)3, "position"), - COMMENT((short)4, "comment"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COLUMN_NAME - return COLUMN_NAME; - case 2: // TYPE_DESC - return TYPE_DESC; - case 3: // POSITION - return POSITION; - case 4: // COMMENT - return COMMENT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __POSITION_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.COMMENT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.TYPE_DESC, new org.apache.thrift.meta_data.FieldMetaData("typeDesc", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeDesc.class))); - tmpMap.put(_Fields.POSITION, new org.apache.thrift.meta_data.FieldMetaData("position", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.COMMENT, new org.apache.thrift.meta_data.FieldMetaData("comment", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnDesc.class, metaDataMap); - } - - public TColumnDesc() { - } - - public TColumnDesc( - String columnName, - TTypeDesc typeDesc, - int position) - { - this(); - this.columnName = columnName; - this.typeDesc = typeDesc; - this.position = position; - setPositionIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TColumnDesc(TColumnDesc other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetColumnName()) { - this.columnName = other.columnName; - } - if (other.isSetTypeDesc()) { - this.typeDesc = new TTypeDesc(other.typeDesc); - } - this.position = other.position; - if (other.isSetComment()) { - this.comment = other.comment; - } - } - - public TColumnDesc deepCopy() { - return new TColumnDesc(this); - } - - @Override - public void clear() { - this.columnName = null; - this.typeDesc = null; - setPositionIsSet(false); - this.position = 0; - this.comment = null; - } - - public String getColumnName() { - return this.columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public void unsetColumnName() { - this.columnName = null; - } - - /** Returns true if field columnName is set (has been assigned a value) and false otherwise */ - public boolean isSetColumnName() { - return this.columnName != null; - } - - public void setColumnNameIsSet(boolean value) { - if (!value) { - this.columnName = null; - } - } - - public TTypeDesc getTypeDesc() { - return this.typeDesc; - } - - public void setTypeDesc(TTypeDesc typeDesc) { - this.typeDesc = typeDesc; - } - - public void unsetTypeDesc() { - this.typeDesc = null; - } - - /** Returns true if field typeDesc is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeDesc() { - return this.typeDesc != null; - } - - public void setTypeDescIsSet(boolean value) { - if (!value) { - this.typeDesc = null; - } - } - - public int getPosition() { - return this.position; - } - - public void setPosition(int position) { - this.position = position; - setPositionIsSet(true); - } - - public void unsetPosition() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __POSITION_ISSET_ID); - } - - /** Returns true if field position is set (has been assigned a value) and false otherwise */ - public boolean isSetPosition() { - return EncodingUtils.testBit(__isset_bitfield, __POSITION_ISSET_ID); - } - - public void setPositionIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __POSITION_ISSET_ID, value); - } - - public String getComment() { - return this.comment; - } - - public void setComment(String comment) { - this.comment = comment; - } - - public void unsetComment() { - this.comment = null; - } - - /** Returns true if field comment is set (has been assigned a value) and false otherwise */ - public boolean isSetComment() { - return this.comment != null; - } - - public void setCommentIsSet(boolean value) { - if (!value) { - this.comment = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COLUMN_NAME: - if (value == null) { - unsetColumnName(); - } else { - setColumnName((String)value); - } - break; - - case TYPE_DESC: - if (value == null) { - unsetTypeDesc(); - } else { - setTypeDesc((TTypeDesc)value); - } - break; - - case POSITION: - if (value == null) { - unsetPosition(); - } else { - setPosition((Integer)value); - } - break; - - case COMMENT: - if (value == null) { - unsetComment(); - } else { - setComment((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COLUMN_NAME: - return getColumnName(); - - case TYPE_DESC: - return getTypeDesc(); - - case POSITION: - return Integer.valueOf(getPosition()); - - case COMMENT: - return getComment(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COLUMN_NAME: - return isSetColumnName(); - case TYPE_DESC: - return isSetTypeDesc(); - case POSITION: - return isSetPosition(); - case COMMENT: - return isSetComment(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TColumnDesc) - return this.equals((TColumnDesc)that); - return false; - } - - public boolean equals(TColumnDesc that) { - if (that == null) - return false; - - boolean this_present_columnName = true && this.isSetColumnName(); - boolean that_present_columnName = true && that.isSetColumnName(); - if (this_present_columnName || that_present_columnName) { - if (!(this_present_columnName && that_present_columnName)) - return false; - if (!this.columnName.equals(that.columnName)) - return false; - } - - boolean this_present_typeDesc = true && this.isSetTypeDesc(); - boolean that_present_typeDesc = true && that.isSetTypeDesc(); - if (this_present_typeDesc || that_present_typeDesc) { - if (!(this_present_typeDesc && that_present_typeDesc)) - return false; - if (!this.typeDesc.equals(that.typeDesc)) - return false; - } - - boolean this_present_position = true; - boolean that_present_position = true; - if (this_present_position || that_present_position) { - if (!(this_present_position && that_present_position)) - return false; - if (this.position != that.position) - return false; - } - - boolean this_present_comment = true && this.isSetComment(); - boolean that_present_comment = true && that.isSetComment(); - if (this_present_comment || that_present_comment) { - if (!(this_present_comment && that_present_comment)) - return false; - if (!this.comment.equals(that.comment)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_columnName = true && (isSetColumnName()); - builder.append(present_columnName); - if (present_columnName) - builder.append(columnName); - - boolean present_typeDesc = true && (isSetTypeDesc()); - builder.append(present_typeDesc); - if (present_typeDesc) - builder.append(typeDesc); - - boolean present_position = true; - builder.append(present_position); - if (present_position) - builder.append(position); - - boolean present_comment = true && (isSetComment()); - builder.append(present_comment); - if (present_comment) - builder.append(comment); - - return builder.toHashCode(); - } - - public int compareTo(TColumnDesc other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TColumnDesc typedOther = (TColumnDesc)other; - - lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumnName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTypeDesc()).compareTo(typedOther.isSetTypeDesc()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeDesc()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeDesc, typedOther.typeDesc); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetPosition()).compareTo(typedOther.isSetPosition()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetPosition()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.position, typedOther.position); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetComment()).compareTo(typedOther.isSetComment()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetComment()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.comment, typedOther.comment); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TColumnDesc("); - boolean first = true; - - sb.append("columnName:"); - if (this.columnName == null) { - sb.append("null"); - } else { - sb.append(this.columnName); - } - first = false; - if (!first) sb.append(", "); - sb.append("typeDesc:"); - if (this.typeDesc == null) { - sb.append("null"); - } else { - sb.append(this.typeDesc); - } - first = false; - if (!first) sb.append(", "); - sb.append("position:"); - sb.append(this.position); - first = false; - if (isSetComment()) { - if (!first) sb.append(", "); - sb.append("comment:"); - if (this.comment == null) { - sb.append("null"); - } else { - sb.append(this.comment); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColumnName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'columnName' is unset! Struct:" + toString()); - } - - if (!isSetTypeDesc()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeDesc' is unset! Struct:" + toString()); - } - - if (!isSetPosition()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'position' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (typeDesc != null) { - typeDesc.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TColumnDescStandardSchemeFactory implements SchemeFactory { - public TColumnDescStandardScheme getScheme() { - return new TColumnDescStandardScheme(); - } - } - - private static class TColumnDescStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TColumnDesc struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COLUMN_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // TYPE_DESC - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.typeDesc = new TTypeDesc(); - struct.typeDesc.read(iprot); - struct.setTypeDescIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // POSITION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.position = iprot.readI32(); - struct.setPositionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // COMMENT - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.comment = iprot.readString(); - struct.setCommentIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TColumnDesc struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.columnName != null) { - oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC); - oprot.writeString(struct.columnName); - oprot.writeFieldEnd(); - } - if (struct.typeDesc != null) { - oprot.writeFieldBegin(TYPE_DESC_FIELD_DESC); - struct.typeDesc.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(POSITION_FIELD_DESC); - oprot.writeI32(struct.position); - oprot.writeFieldEnd(); - if (struct.comment != null) { - if (struct.isSetComment()) { - oprot.writeFieldBegin(COMMENT_FIELD_DESC); - oprot.writeString(struct.comment); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TColumnDescTupleSchemeFactory implements SchemeFactory { - public TColumnDescTupleScheme getScheme() { - return new TColumnDescTupleScheme(); - } - } - - private static class TColumnDescTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeString(struct.columnName); - struct.typeDesc.write(oprot); - oprot.writeI32(struct.position); - BitSet optionals = new BitSet(); - if (struct.isSetComment()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetComment()) { - oprot.writeString(struct.comment); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - struct.typeDesc = new TTypeDesc(); - struct.typeDesc.read(iprot); - struct.setTypeDescIsSet(true); - struct.position = iprot.readI32(); - struct.setPositionIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.comment = iprot.readString(); - struct.setCommentIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java deleted file mode 100644 index 8504c6d608d42..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java +++ /dev/null @@ -1,671 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TColumnValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnValue"); - private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6); - private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - BOOL_VAL((short)1, "boolVal"), - BYTE_VAL((short)2, "byteVal"), - I16_VAL((short)3, "i16Val"), - I32_VAL((short)4, "i32Val"), - I64_VAL((short)5, "i64Val"), - DOUBLE_VAL((short)6, "doubleVal"), - STRING_VAL((short)7, "stringVal"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // BOOL_VAL - return BOOL_VAL; - case 2: // BYTE_VAL - return BYTE_VAL; - case 3: // I16_VAL - return I16_VAL; - case 4: // I32_VAL - return I32_VAL; - case 5: // I64_VAL - return I64_VAL; - case 6: // DOUBLE_VAL - return DOUBLE_VAL; - case 7: // STRING_VAL - return STRING_VAL; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolValue.class))); - tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteValue.class))); - tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Value.class))); - tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Value.class))); - tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Value.class))); - tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleValue.class))); - tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringValue.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnValue.class, metaDataMap); - } - - public TColumnValue() { - super(); - } - - public TColumnValue(TColumnValue._Fields setField, Object value) { - super(setField, value); - } - - public TColumnValue(TColumnValue other) { - super(other); - } - public TColumnValue deepCopy() { - return new TColumnValue(this); - } - - public static TColumnValue boolVal(TBoolValue value) { - TColumnValue x = new TColumnValue(); - x.setBoolVal(value); - return x; - } - - public static TColumnValue byteVal(TByteValue value) { - TColumnValue x = new TColumnValue(); - x.setByteVal(value); - return x; - } - - public static TColumnValue i16Val(TI16Value value) { - TColumnValue x = new TColumnValue(); - x.setI16Val(value); - return x; - } - - public static TColumnValue i32Val(TI32Value value) { - TColumnValue x = new TColumnValue(); - x.setI32Val(value); - return x; - } - - public static TColumnValue i64Val(TI64Value value) { - TColumnValue x = new TColumnValue(); - x.setI64Val(value); - return x; - } - - public static TColumnValue doubleVal(TDoubleValue value) { - TColumnValue x = new TColumnValue(); - x.setDoubleVal(value); - return x; - } - - public static TColumnValue stringVal(TStringValue value) { - TColumnValue x = new TColumnValue(); - x.setStringVal(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case BOOL_VAL: - if (value instanceof TBoolValue) { - break; - } - throw new ClassCastException("Was expecting value of type TBoolValue for field 'boolVal', but got " + value.getClass().getSimpleName()); - case BYTE_VAL: - if (value instanceof TByteValue) { - break; - } - throw new ClassCastException("Was expecting value of type TByteValue for field 'byteVal', but got " + value.getClass().getSimpleName()); - case I16_VAL: - if (value instanceof TI16Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI16Value for field 'i16Val', but got " + value.getClass().getSimpleName()); - case I32_VAL: - if (value instanceof TI32Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI32Value for field 'i32Val', but got " + value.getClass().getSimpleName()); - case I64_VAL: - if (value instanceof TI64Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI64Value for field 'i64Val', but got " + value.getClass().getSimpleName()); - case DOUBLE_VAL: - if (value instanceof TDoubleValue) { - break; - } - throw new ClassCastException("Was expecting value of type TDoubleValue for field 'doubleVal', but got " + value.getClass().getSimpleName()); - case STRING_VAL: - if (value instanceof TStringValue) { - break; - } - throw new ClassCastException("Was expecting value of type TStringValue for field 'stringVal', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - if (field.type == BOOL_VAL_FIELD_DESC.type) { - TBoolValue boolVal; - boolVal = new TBoolValue(); - boolVal.read(iprot); - return boolVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BYTE_VAL: - if (field.type == BYTE_VAL_FIELD_DESC.type) { - TByteValue byteVal; - byteVal = new TByteValue(); - byteVal.read(iprot); - return byteVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I16_VAL: - if (field.type == I16_VAL_FIELD_DESC.type) { - TI16Value i16Val; - i16Val = new TI16Value(); - i16Val.read(iprot); - return i16Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I32_VAL: - if (field.type == I32_VAL_FIELD_DESC.type) { - TI32Value i32Val; - i32Val = new TI32Value(); - i32Val.read(iprot); - return i32Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I64_VAL: - if (field.type == I64_VAL_FIELD_DESC.type) { - TI64Value i64Val; - i64Val = new TI64Value(); - i64Val.read(iprot); - return i64Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case DOUBLE_VAL: - if (field.type == DOUBLE_VAL_FIELD_DESC.type) { - TDoubleValue doubleVal; - doubleVal = new TDoubleValue(); - doubleVal.read(iprot); - return doubleVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VAL: - if (field.type == STRING_VAL_FIELD_DESC.type) { - TStringValue stringVal; - stringVal = new TStringValue(); - stringVal.read(iprot); - return stringVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolValue boolVal = (TBoolValue)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteValue byteVal = (TByteValue)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Value i16Val = (TI16Value)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Value i32Val = (TI32Value)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Value i64Val = (TI64Value)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleValue doubleVal = (TDoubleValue)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringValue stringVal = (TStringValue)value_; - stringVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - TBoolValue boolVal; - boolVal = new TBoolValue(); - boolVal.read(iprot); - return boolVal; - case BYTE_VAL: - TByteValue byteVal; - byteVal = new TByteValue(); - byteVal.read(iprot); - return byteVal; - case I16_VAL: - TI16Value i16Val; - i16Val = new TI16Value(); - i16Val.read(iprot); - return i16Val; - case I32_VAL: - TI32Value i32Val; - i32Val = new TI32Value(); - i32Val.read(iprot); - return i32Val; - case I64_VAL: - TI64Value i64Val; - i64Val = new TI64Value(); - i64Val.read(iprot); - return i64Val; - case DOUBLE_VAL: - TDoubleValue doubleVal; - doubleVal = new TDoubleValue(); - doubleVal.read(iprot); - return doubleVal; - case STRING_VAL: - TStringValue stringVal; - stringVal = new TStringValue(); - stringVal.read(iprot); - return stringVal; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolValue boolVal = (TBoolValue)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteValue byteVal = (TByteValue)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Value i16Val = (TI16Value)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Value i32Val = (TI32Value)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Value i64Val = (TI64Value)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleValue doubleVal = (TDoubleValue)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringValue stringVal = (TStringValue)value_; - stringVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case BOOL_VAL: - return BOOL_VAL_FIELD_DESC; - case BYTE_VAL: - return BYTE_VAL_FIELD_DESC; - case I16_VAL: - return I16_VAL_FIELD_DESC; - case I32_VAL: - return I32_VAL_FIELD_DESC; - case I64_VAL: - return I64_VAL_FIELD_DESC; - case DOUBLE_VAL: - return DOUBLE_VAL_FIELD_DESC; - case STRING_VAL: - return STRING_VAL_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TBoolValue getBoolVal() { - if (getSetField() == _Fields.BOOL_VAL) { - return (TBoolValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBoolVal(TBoolValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BOOL_VAL; - value_ = value; - } - - public TByteValue getByteVal() { - if (getSetField() == _Fields.BYTE_VAL) { - return (TByteValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setByteVal(TByteValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BYTE_VAL; - value_ = value; - } - - public TI16Value getI16Val() { - if (getSetField() == _Fields.I16_VAL) { - return (TI16Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI16Val(TI16Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I16_VAL; - value_ = value; - } - - public TI32Value getI32Val() { - if (getSetField() == _Fields.I32_VAL) { - return (TI32Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Val(TI32Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I32_VAL; - value_ = value; - } - - public TI64Value getI64Val() { - if (getSetField() == _Fields.I64_VAL) { - return (TI64Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI64Val(TI64Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I64_VAL; - value_ = value; - } - - public TDoubleValue getDoubleVal() { - if (getSetField() == _Fields.DOUBLE_VAL) { - return (TDoubleValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setDoubleVal(TDoubleValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.DOUBLE_VAL; - value_ = value; - } - - public TStringValue getStringVal() { - if (getSetField() == _Fields.STRING_VAL) { - return (TStringValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringVal(TStringValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VAL; - value_ = value; - } - - public boolean isSetBoolVal() { - return setField_ == _Fields.BOOL_VAL; - } - - - public boolean isSetByteVal() { - return setField_ == _Fields.BYTE_VAL; - } - - - public boolean isSetI16Val() { - return setField_ == _Fields.I16_VAL; - } - - - public boolean isSetI32Val() { - return setField_ == _Fields.I32_VAL; - } - - - public boolean isSetI64Val() { - return setField_ == _Fields.I64_VAL; - } - - - public boolean isSetDoubleVal() { - return setField_ == _Fields.DOUBLE_VAL; - } - - - public boolean isSetStringVal() { - return setField_ == _Fields.STRING_VAL; - } - - - public boolean equals(Object other) { - if (other instanceof TColumnValue) { - return equals((TColumnValue)other); - } else { - return false; - } - } - - public boolean equals(TColumnValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TColumnValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(); - hcb.append(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - hcb.append(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - hcb.append(value); - } - } - return hcb.toHashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java deleted file mode 100644 index 4fc54544c1bea..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TDoubleColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TDoubleColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TDoubleColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleColumn.class, metaDataMap); - } - - public TDoubleColumn() { - } - - public TDoubleColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TDoubleColumn(TDoubleColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Double other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TDoubleColumn deepCopy() { - return new TDoubleColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(double elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TDoubleColumn) - return this.equals((TDoubleColumn)that); - return false; - } - - public boolean equals(TDoubleColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TDoubleColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TDoubleColumn typedOther = (TDoubleColumn)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TDoubleColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TDoubleColumnStandardSchemeFactory implements SchemeFactory { - public TDoubleColumnStandardScheme getScheme() { - return new TDoubleColumnStandardScheme(); - } - } - - private static class TDoubleColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list94 = iprot.readListBegin(); - struct.values = new ArrayList(_list94.size); - for (int _i95 = 0; _i95 < _list94.size; ++_i95) - { - double _elem96; // optional - _elem96 = iprot.readDouble(); - struct.values.add(_elem96); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, struct.values.size())); - for (double _iter97 : struct.values) - { - oprot.writeDouble(_iter97); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TDoubleColumnTupleSchemeFactory implements SchemeFactory { - public TDoubleColumnTupleScheme getScheme() { - return new TDoubleColumnTupleScheme(); - } - } - - private static class TDoubleColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (double _iter98 : struct.values) - { - oprot.writeDouble(_iter98); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list99 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, iprot.readI32()); - struct.values = new ArrayList(_list99.size); - for (int _i100 = 0; _i100 < _list99.size; ++_i100) - { - double _elem101; // optional - _elem101 = iprot.readDouble(); - struct.values.add(_elem101); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java deleted file mode 100644 index d21573633ef51..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TDoubleValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.DOUBLE, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TDoubleValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TDoubleValueTupleSchemeFactory()); - } - - private double value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleValue.class, metaDataMap); - } - - public TDoubleValue() { - } - - /** - * Performs a deep copy on other. - */ - public TDoubleValue(TDoubleValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TDoubleValue deepCopy() { - return new TDoubleValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0.0; - } - - public double getValue() { - return this.value; - } - - public void setValue(double value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Double)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Double.valueOf(getValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TDoubleValue) - return this.equals((TDoubleValue)that); - return false; - } - - public boolean equals(TDoubleValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TDoubleValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TDoubleValue typedOther = (TDoubleValue)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TDoubleValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TDoubleValueStandardSchemeFactory implements SchemeFactory { - public TDoubleValueStandardScheme getScheme() { - return new TDoubleValueStandardScheme(); - } - } - - private static class TDoubleValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) { - struct.value = iprot.readDouble(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeDouble(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TDoubleValueTupleSchemeFactory implements SchemeFactory { - public TDoubleValueTupleScheme getScheme() { - return new TDoubleValueTupleScheme(); - } - } - - private static class TDoubleValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeDouble(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readDouble(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java deleted file mode 100644 index 4f157ad5a6450..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java +++ /dev/null @@ -1,769 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TExecuteStatementReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField STATEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("statement", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField CONF_OVERLAY_FIELD_DESC = new org.apache.thrift.protocol.TField("confOverlay", org.apache.thrift.protocol.TType.MAP, (short)3); - private static final org.apache.thrift.protocol.TField RUN_ASYNC_FIELD_DESC = new org.apache.thrift.protocol.TField("runAsync", org.apache.thrift.protocol.TType.BOOL, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TExecuteStatementReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TExecuteStatementReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String statement; // required - private Map confOverlay; // optional - private boolean runAsync; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - STATEMENT((short)2, "statement"), - CONF_OVERLAY((short)3, "confOverlay"), - RUN_ASYNC((short)4, "runAsync"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // STATEMENT - return STATEMENT; - case 3: // CONF_OVERLAY - return CONF_OVERLAY; - case 4: // RUN_ASYNC - return RUN_ASYNC; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __RUNASYNC_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.CONF_OVERLAY,_Fields.RUN_ASYNC}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.STATEMENT, new org.apache.thrift.meta_data.FieldMetaData("statement", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.CONF_OVERLAY, new org.apache.thrift.meta_data.FieldMetaData("confOverlay", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.RUN_ASYNC, new org.apache.thrift.meta_data.FieldMetaData("runAsync", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementReq.class, metaDataMap); - } - - public TExecuteStatementReq() { - this.runAsync = false; - - } - - public TExecuteStatementReq( - TSessionHandle sessionHandle, - String statement) - { - this(); - this.sessionHandle = sessionHandle; - this.statement = statement; - } - - /** - * Performs a deep copy on other. - */ - public TExecuteStatementReq(TExecuteStatementReq other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetStatement()) { - this.statement = other.statement; - } - if (other.isSetConfOverlay()) { - Map __this__confOverlay = new HashMap(); - for (Map.Entry other_element : other.confOverlay.entrySet()) { - - String other_element_key = other_element.getKey(); - String other_element_value = other_element.getValue(); - - String __this__confOverlay_copy_key = other_element_key; - - String __this__confOverlay_copy_value = other_element_value; - - __this__confOverlay.put(__this__confOverlay_copy_key, __this__confOverlay_copy_value); - } - this.confOverlay = __this__confOverlay; - } - this.runAsync = other.runAsync; - } - - public TExecuteStatementReq deepCopy() { - return new TExecuteStatementReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.statement = null; - this.confOverlay = null; - this.runAsync = false; - - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getStatement() { - return this.statement; - } - - public void setStatement(String statement) { - this.statement = statement; - } - - public void unsetStatement() { - this.statement = null; - } - - /** Returns true if field statement is set (has been assigned a value) and false otherwise */ - public boolean isSetStatement() { - return this.statement != null; - } - - public void setStatementIsSet(boolean value) { - if (!value) { - this.statement = null; - } - } - - public int getConfOverlaySize() { - return (this.confOverlay == null) ? 0 : this.confOverlay.size(); - } - - public void putToConfOverlay(String key, String val) { - if (this.confOverlay == null) { - this.confOverlay = new HashMap(); - } - this.confOverlay.put(key, val); - } - - public Map getConfOverlay() { - return this.confOverlay; - } - - public void setConfOverlay(Map confOverlay) { - this.confOverlay = confOverlay; - } - - public void unsetConfOverlay() { - this.confOverlay = null; - } - - /** Returns true if field confOverlay is set (has been assigned a value) and false otherwise */ - public boolean isSetConfOverlay() { - return this.confOverlay != null; - } - - public void setConfOverlayIsSet(boolean value) { - if (!value) { - this.confOverlay = null; - } - } - - public boolean isRunAsync() { - return this.runAsync; - } - - public void setRunAsync(boolean runAsync) { - this.runAsync = runAsync; - setRunAsyncIsSet(true); - } - - public void unsetRunAsync() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __RUNASYNC_ISSET_ID); - } - - /** Returns true if field runAsync is set (has been assigned a value) and false otherwise */ - public boolean isSetRunAsync() { - return EncodingUtils.testBit(__isset_bitfield, __RUNASYNC_ISSET_ID); - } - - public void setRunAsyncIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __RUNASYNC_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case STATEMENT: - if (value == null) { - unsetStatement(); - } else { - setStatement((String)value); - } - break; - - case CONF_OVERLAY: - if (value == null) { - unsetConfOverlay(); - } else { - setConfOverlay((Map)value); - } - break; - - case RUN_ASYNC: - if (value == null) { - unsetRunAsync(); - } else { - setRunAsync((Boolean)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case STATEMENT: - return getStatement(); - - case CONF_OVERLAY: - return getConfOverlay(); - - case RUN_ASYNC: - return Boolean.valueOf(isRunAsync()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case STATEMENT: - return isSetStatement(); - case CONF_OVERLAY: - return isSetConfOverlay(); - case RUN_ASYNC: - return isSetRunAsync(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TExecuteStatementReq) - return this.equals((TExecuteStatementReq)that); - return false; - } - - public boolean equals(TExecuteStatementReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_statement = true && this.isSetStatement(); - boolean that_present_statement = true && that.isSetStatement(); - if (this_present_statement || that_present_statement) { - if (!(this_present_statement && that_present_statement)) - return false; - if (!this.statement.equals(that.statement)) - return false; - } - - boolean this_present_confOverlay = true && this.isSetConfOverlay(); - boolean that_present_confOverlay = true && that.isSetConfOverlay(); - if (this_present_confOverlay || that_present_confOverlay) { - if (!(this_present_confOverlay && that_present_confOverlay)) - return false; - if (!this.confOverlay.equals(that.confOverlay)) - return false; - } - - boolean this_present_runAsync = true && this.isSetRunAsync(); - boolean that_present_runAsync = true && that.isSetRunAsync(); - if (this_present_runAsync || that_present_runAsync) { - if (!(this_present_runAsync && that_present_runAsync)) - return false; - if (this.runAsync != that.runAsync) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_statement = true && (isSetStatement()); - builder.append(present_statement); - if (present_statement) - builder.append(statement); - - boolean present_confOverlay = true && (isSetConfOverlay()); - builder.append(present_confOverlay); - if (present_confOverlay) - builder.append(confOverlay); - - boolean present_runAsync = true && (isSetRunAsync()); - builder.append(present_runAsync); - if (present_runAsync) - builder.append(runAsync); - - return builder.toHashCode(); - } - - public int compareTo(TExecuteStatementReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TExecuteStatementReq typedOther = (TExecuteStatementReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetStatement()).compareTo(typedOther.isSetStatement()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatement()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statement, typedOther.statement); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfOverlay()).compareTo(typedOther.isSetConfOverlay()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfOverlay()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.confOverlay, typedOther.confOverlay); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRunAsync()).compareTo(typedOther.isSetRunAsync()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRunAsync()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.runAsync, typedOther.runAsync); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TExecuteStatementReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("statement:"); - if (this.statement == null) { - sb.append("null"); - } else { - sb.append(this.statement); - } - first = false; - if (isSetConfOverlay()) { - if (!first) sb.append(", "); - sb.append("confOverlay:"); - if (this.confOverlay == null) { - sb.append("null"); - } else { - sb.append(this.confOverlay); - } - first = false; - } - if (isSetRunAsync()) { - if (!first) sb.append(", "); - sb.append("runAsync:"); - sb.append(this.runAsync); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetStatement()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'statement' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TExecuteStatementReqStandardSchemeFactory implements SchemeFactory { - public TExecuteStatementReqStandardScheme getScheme() { - return new TExecuteStatementReqStandardScheme(); - } - } - - private static class TExecuteStatementReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // STATEMENT - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.statement = iprot.readString(); - struct.setStatementIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // CONF_OVERLAY - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map162 = iprot.readMapBegin(); - struct.confOverlay = new HashMap(2*_map162.size); - for (int _i163 = 0; _i163 < _map162.size; ++_i163) - { - String _key164; // required - String _val165; // required - _key164 = iprot.readString(); - _val165 = iprot.readString(); - struct.confOverlay.put(_key164, _val165); - } - iprot.readMapEnd(); - } - struct.setConfOverlayIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // RUN_ASYNC - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.runAsync = iprot.readBool(); - struct.setRunAsyncIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.statement != null) { - oprot.writeFieldBegin(STATEMENT_FIELD_DESC); - oprot.writeString(struct.statement); - oprot.writeFieldEnd(); - } - if (struct.confOverlay != null) { - if (struct.isSetConfOverlay()) { - oprot.writeFieldBegin(CONF_OVERLAY_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.confOverlay.size())); - for (Map.Entry _iter166 : struct.confOverlay.entrySet()) - { - oprot.writeString(_iter166.getKey()); - oprot.writeString(_iter166.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - if (struct.isSetRunAsync()) { - oprot.writeFieldBegin(RUN_ASYNC_FIELD_DESC); - oprot.writeBool(struct.runAsync); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TExecuteStatementReqTupleSchemeFactory implements SchemeFactory { - public TExecuteStatementReqTupleScheme getScheme() { - return new TExecuteStatementReqTupleScheme(); - } - } - - private static class TExecuteStatementReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.statement); - BitSet optionals = new BitSet(); - if (struct.isSetConfOverlay()) { - optionals.set(0); - } - if (struct.isSetRunAsync()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetConfOverlay()) { - { - oprot.writeI32(struct.confOverlay.size()); - for (Map.Entry _iter167 : struct.confOverlay.entrySet()) - { - oprot.writeString(_iter167.getKey()); - oprot.writeString(_iter167.getValue()); - } - } - } - if (struct.isSetRunAsync()) { - oprot.writeBool(struct.runAsync); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.statement = iprot.readString(); - struct.setStatementIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TMap _map168 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.confOverlay = new HashMap(2*_map168.size); - for (int _i169 = 0; _i169 < _map168.size; ++_i169) - { - String _key170; // required - String _val171; // required - _key170 = iprot.readString(); - _val171 = iprot.readString(); - struct.confOverlay.put(_key170, _val171); - } - } - struct.setConfOverlayIsSet(true); - } - if (incoming.get(1)) { - struct.runAsync = iprot.readBool(); - struct.setRunAsyncIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java deleted file mode 100644 index fdde51e70f783..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TExecuteStatementResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TExecuteStatementRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TExecuteStatementRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementResp.class, metaDataMap); - } - - public TExecuteStatementResp() { - } - - public TExecuteStatementResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TExecuteStatementResp(TExecuteStatementResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TExecuteStatementResp deepCopy() { - return new TExecuteStatementResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TExecuteStatementResp) - return this.equals((TExecuteStatementResp)that); - return false; - } - - public boolean equals(TExecuteStatementResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TExecuteStatementResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TExecuteStatementResp typedOther = (TExecuteStatementResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TExecuteStatementResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TExecuteStatementRespStandardSchemeFactory implements SchemeFactory { - public TExecuteStatementRespStandardScheme getScheme() { - return new TExecuteStatementRespStandardScheme(); - } - } - - private static class TExecuteStatementRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TExecuteStatementRespTupleSchemeFactory implements SchemeFactory { - public TExecuteStatementRespTupleScheme getScheme() { - return new TExecuteStatementRespTupleScheme(); - } - } - - private static class TExecuteStatementRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java deleted file mode 100644 index b2a22effd91af..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TFetchOrientation implements org.apache.thrift.TEnum { - FETCH_NEXT(0), - FETCH_PRIOR(1), - FETCH_RELATIVE(2), - FETCH_ABSOLUTE(3), - FETCH_FIRST(4), - FETCH_LAST(5); - - private final int value; - - private TFetchOrientation(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TFetchOrientation findByValue(int value) { - switch (value) { - case 0: - return FETCH_NEXT; - case 1: - return FETCH_PRIOR; - case 2: - return FETCH_RELATIVE; - case 3: - return FETCH_ABSOLUTE; - case 4: - return FETCH_FIRST; - case 5: - return FETCH_LAST; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java deleted file mode 100644 index 068711fc44440..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java +++ /dev/null @@ -1,710 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TFetchResultsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField ORIENTATION_FIELD_DESC = new org.apache.thrift.protocol.TField("orientation", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField MAX_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("maxRows", org.apache.thrift.protocol.TType.I64, (short)3); - private static final org.apache.thrift.protocol.TField FETCH_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("fetchType", org.apache.thrift.protocol.TType.I16, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TFetchResultsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TFetchResultsReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - private TFetchOrientation orientation; // required - private long maxRows; // required - private short fetchType; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"), - /** - * - * @see TFetchOrientation - */ - ORIENTATION((short)2, "orientation"), - MAX_ROWS((short)3, "maxRows"), - FETCH_TYPE((short)4, "fetchType"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - case 2: // ORIENTATION - return ORIENTATION; - case 3: // MAX_ROWS - return MAX_ROWS; - case 4: // FETCH_TYPE - return FETCH_TYPE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __MAXROWS_ISSET_ID = 0; - private static final int __FETCHTYPE_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.FETCH_TYPE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - tmpMap.put(_Fields.ORIENTATION, new org.apache.thrift.meta_data.FieldMetaData("orientation", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TFetchOrientation.class))); - tmpMap.put(_Fields.MAX_ROWS, new org.apache.thrift.meta_data.FieldMetaData("maxRows", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.FETCH_TYPE, new org.apache.thrift.meta_data.FieldMetaData("fetchType", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsReq.class, metaDataMap); - } - - public TFetchResultsReq() { - this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT; - - this.fetchType = (short)0; - - } - - public TFetchResultsReq( - TOperationHandle operationHandle, - TFetchOrientation orientation, - long maxRows) - { - this(); - this.operationHandle = operationHandle; - this.orientation = orientation; - this.maxRows = maxRows; - setMaxRowsIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TFetchResultsReq(TFetchResultsReq other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - if (other.isSetOrientation()) { - this.orientation = other.orientation; - } - this.maxRows = other.maxRows; - this.fetchType = other.fetchType; - } - - public TFetchResultsReq deepCopy() { - return new TFetchResultsReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT; - - setMaxRowsIsSet(false); - this.maxRows = 0; - this.fetchType = (short)0; - - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - /** - * - * @see TFetchOrientation - */ - public TFetchOrientation getOrientation() { - return this.orientation; - } - - /** - * - * @see TFetchOrientation - */ - public void setOrientation(TFetchOrientation orientation) { - this.orientation = orientation; - } - - public void unsetOrientation() { - this.orientation = null; - } - - /** Returns true if field orientation is set (has been assigned a value) and false otherwise */ - public boolean isSetOrientation() { - return this.orientation != null; - } - - public void setOrientationIsSet(boolean value) { - if (!value) { - this.orientation = null; - } - } - - public long getMaxRows() { - return this.maxRows; - } - - public void setMaxRows(long maxRows) { - this.maxRows = maxRows; - setMaxRowsIsSet(true); - } - - public void unsetMaxRows() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MAXROWS_ISSET_ID); - } - - /** Returns true if field maxRows is set (has been assigned a value) and false otherwise */ - public boolean isSetMaxRows() { - return EncodingUtils.testBit(__isset_bitfield, __MAXROWS_ISSET_ID); - } - - public void setMaxRowsIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MAXROWS_ISSET_ID, value); - } - - public short getFetchType() { - return this.fetchType; - } - - public void setFetchType(short fetchType) { - this.fetchType = fetchType; - setFetchTypeIsSet(true); - } - - public void unsetFetchType() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __FETCHTYPE_ISSET_ID); - } - - /** Returns true if field fetchType is set (has been assigned a value) and false otherwise */ - public boolean isSetFetchType() { - return EncodingUtils.testBit(__isset_bitfield, __FETCHTYPE_ISSET_ID); - } - - public void setFetchTypeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __FETCHTYPE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - case ORIENTATION: - if (value == null) { - unsetOrientation(); - } else { - setOrientation((TFetchOrientation)value); - } - break; - - case MAX_ROWS: - if (value == null) { - unsetMaxRows(); - } else { - setMaxRows((Long)value); - } - break; - - case FETCH_TYPE: - if (value == null) { - unsetFetchType(); - } else { - setFetchType((Short)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - case ORIENTATION: - return getOrientation(); - - case MAX_ROWS: - return Long.valueOf(getMaxRows()); - - case FETCH_TYPE: - return Short.valueOf(getFetchType()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - case ORIENTATION: - return isSetOrientation(); - case MAX_ROWS: - return isSetMaxRows(); - case FETCH_TYPE: - return isSetFetchType(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TFetchResultsReq) - return this.equals((TFetchResultsReq)that); - return false; - } - - public boolean equals(TFetchResultsReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - boolean this_present_orientation = true && this.isSetOrientation(); - boolean that_present_orientation = true && that.isSetOrientation(); - if (this_present_orientation || that_present_orientation) { - if (!(this_present_orientation && that_present_orientation)) - return false; - if (!this.orientation.equals(that.orientation)) - return false; - } - - boolean this_present_maxRows = true; - boolean that_present_maxRows = true; - if (this_present_maxRows || that_present_maxRows) { - if (!(this_present_maxRows && that_present_maxRows)) - return false; - if (this.maxRows != that.maxRows) - return false; - } - - boolean this_present_fetchType = true && this.isSetFetchType(); - boolean that_present_fetchType = true && that.isSetFetchType(); - if (this_present_fetchType || that_present_fetchType) { - if (!(this_present_fetchType && that_present_fetchType)) - return false; - if (this.fetchType != that.fetchType) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - boolean present_orientation = true && (isSetOrientation()); - builder.append(present_orientation); - if (present_orientation) - builder.append(orientation.getValue()); - - boolean present_maxRows = true; - builder.append(present_maxRows); - if (present_maxRows) - builder.append(maxRows); - - boolean present_fetchType = true && (isSetFetchType()); - builder.append(present_fetchType); - if (present_fetchType) - builder.append(fetchType); - - return builder.toHashCode(); - } - - public int compareTo(TFetchResultsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TFetchResultsReq typedOther = (TFetchResultsReq)other; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOrientation()).compareTo(typedOther.isSetOrientation()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOrientation()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.orientation, typedOther.orientation); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetMaxRows()).compareTo(typedOther.isSetMaxRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetMaxRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.maxRows, typedOther.maxRows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetFetchType()).compareTo(typedOther.isSetFetchType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetFetchType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.fetchType, typedOther.fetchType); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TFetchResultsReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("orientation:"); - if (this.orientation == null) { - sb.append("null"); - } else { - sb.append(this.orientation); - } - first = false; - if (!first) sb.append(", "); - sb.append("maxRows:"); - sb.append(this.maxRows); - first = false; - if (isSetFetchType()) { - if (!first) sb.append(", "); - sb.append("fetchType:"); - sb.append(this.fetchType); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - if (!isSetOrientation()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'orientation' is unset! Struct:" + toString()); - } - - if (!isSetMaxRows()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'maxRows' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TFetchResultsReqStandardSchemeFactory implements SchemeFactory { - public TFetchResultsReqStandardScheme getScheme() { - return new TFetchResultsReqStandardScheme(); - } - } - - private static class TFetchResultsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // ORIENTATION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.orientation = TFetchOrientation.findByValue(iprot.readI32()); - struct.setOrientationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // MAX_ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.maxRows = iprot.readI64(); - struct.setMaxRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // FETCH_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I16) { - struct.fetchType = iprot.readI16(); - struct.setFetchTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.orientation != null) { - oprot.writeFieldBegin(ORIENTATION_FIELD_DESC); - oprot.writeI32(struct.orientation.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(MAX_ROWS_FIELD_DESC); - oprot.writeI64(struct.maxRows); - oprot.writeFieldEnd(); - if (struct.isSetFetchType()) { - oprot.writeFieldBegin(FETCH_TYPE_FIELD_DESC); - oprot.writeI16(struct.fetchType); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TFetchResultsReqTupleSchemeFactory implements SchemeFactory { - public TFetchResultsReqTupleScheme getScheme() { - return new TFetchResultsReqTupleScheme(); - } - } - - private static class TFetchResultsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - oprot.writeI32(struct.orientation.getValue()); - oprot.writeI64(struct.maxRows); - BitSet optionals = new BitSet(); - if (struct.isSetFetchType()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetFetchType()) { - oprot.writeI16(struct.fetchType); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - struct.orientation = TFetchOrientation.findByValue(iprot.readI32()); - struct.setOrientationIsSet(true); - struct.maxRows = iprot.readI64(); - struct.setMaxRowsIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.fetchType = iprot.readI16(); - struct.setFetchTypeIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java deleted file mode 100644 index 19991f1da3eb3..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java +++ /dev/null @@ -1,608 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TFetchResultsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField HAS_MORE_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("hasMoreRows", org.apache.thrift.protocol.TType.BOOL, (short)2); - private static final org.apache.thrift.protocol.TField RESULTS_FIELD_DESC = new org.apache.thrift.protocol.TField("results", org.apache.thrift.protocol.TType.STRUCT, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TFetchResultsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TFetchResultsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private boolean hasMoreRows; // optional - private TRowSet results; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - HAS_MORE_ROWS((short)2, "hasMoreRows"), - RESULTS((short)3, "results"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // HAS_MORE_ROWS - return HAS_MORE_ROWS; - case 3: // RESULTS - return RESULTS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __HASMOREROWS_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.HAS_MORE_ROWS,_Fields.RESULTS}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.HAS_MORE_ROWS, new org.apache.thrift.meta_data.FieldMetaData("hasMoreRows", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.RESULTS, new org.apache.thrift.meta_data.FieldMetaData("results", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRowSet.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsResp.class, metaDataMap); - } - - public TFetchResultsResp() { - } - - public TFetchResultsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TFetchResultsResp(TFetchResultsResp other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - this.hasMoreRows = other.hasMoreRows; - if (other.isSetResults()) { - this.results = new TRowSet(other.results); - } - } - - public TFetchResultsResp deepCopy() { - return new TFetchResultsResp(this); - } - - @Override - public void clear() { - this.status = null; - setHasMoreRowsIsSet(false); - this.hasMoreRows = false; - this.results = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public boolean isHasMoreRows() { - return this.hasMoreRows; - } - - public void setHasMoreRows(boolean hasMoreRows) { - this.hasMoreRows = hasMoreRows; - setHasMoreRowsIsSet(true); - } - - public void unsetHasMoreRows() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASMOREROWS_ISSET_ID); - } - - /** Returns true if field hasMoreRows is set (has been assigned a value) and false otherwise */ - public boolean isSetHasMoreRows() { - return EncodingUtils.testBit(__isset_bitfield, __HASMOREROWS_ISSET_ID); - } - - public void setHasMoreRowsIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASMOREROWS_ISSET_ID, value); - } - - public TRowSet getResults() { - return this.results; - } - - public void setResults(TRowSet results) { - this.results = results; - } - - public void unsetResults() { - this.results = null; - } - - /** Returns true if field results is set (has been assigned a value) and false otherwise */ - public boolean isSetResults() { - return this.results != null; - } - - public void setResultsIsSet(boolean value) { - if (!value) { - this.results = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case HAS_MORE_ROWS: - if (value == null) { - unsetHasMoreRows(); - } else { - setHasMoreRows((Boolean)value); - } - break; - - case RESULTS: - if (value == null) { - unsetResults(); - } else { - setResults((TRowSet)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case HAS_MORE_ROWS: - return Boolean.valueOf(isHasMoreRows()); - - case RESULTS: - return getResults(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case HAS_MORE_ROWS: - return isSetHasMoreRows(); - case RESULTS: - return isSetResults(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TFetchResultsResp) - return this.equals((TFetchResultsResp)that); - return false; - } - - public boolean equals(TFetchResultsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_hasMoreRows = true && this.isSetHasMoreRows(); - boolean that_present_hasMoreRows = true && that.isSetHasMoreRows(); - if (this_present_hasMoreRows || that_present_hasMoreRows) { - if (!(this_present_hasMoreRows && that_present_hasMoreRows)) - return false; - if (this.hasMoreRows != that.hasMoreRows) - return false; - } - - boolean this_present_results = true && this.isSetResults(); - boolean that_present_results = true && that.isSetResults(); - if (this_present_results || that_present_results) { - if (!(this_present_results && that_present_results)) - return false; - if (!this.results.equals(that.results)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_hasMoreRows = true && (isSetHasMoreRows()); - builder.append(present_hasMoreRows); - if (present_hasMoreRows) - builder.append(hasMoreRows); - - boolean present_results = true && (isSetResults()); - builder.append(present_results); - if (present_results) - builder.append(results); - - return builder.toHashCode(); - } - - public int compareTo(TFetchResultsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TFetchResultsResp typedOther = (TFetchResultsResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetHasMoreRows()).compareTo(typedOther.isSetHasMoreRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHasMoreRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasMoreRows, typedOther.hasMoreRows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetResults()).compareTo(typedOther.isSetResults()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetResults()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.results, typedOther.results); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TFetchResultsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetHasMoreRows()) { - if (!first) sb.append(", "); - sb.append("hasMoreRows:"); - sb.append(this.hasMoreRows); - first = false; - } - if (isSetResults()) { - if (!first) sb.append(", "); - sb.append("results:"); - if (this.results == null) { - sb.append("null"); - } else { - sb.append(this.results); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (results != null) { - results.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TFetchResultsRespStandardSchemeFactory implements SchemeFactory { - public TFetchResultsRespStandardScheme getScheme() { - return new TFetchResultsRespStandardScheme(); - } - } - - private static class TFetchResultsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // HAS_MORE_ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.hasMoreRows = iprot.readBool(); - struct.setHasMoreRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // RESULTS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.results = new TRowSet(); - struct.results.read(iprot); - struct.setResultsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.isSetHasMoreRows()) { - oprot.writeFieldBegin(HAS_MORE_ROWS_FIELD_DESC); - oprot.writeBool(struct.hasMoreRows); - oprot.writeFieldEnd(); - } - if (struct.results != null) { - if (struct.isSetResults()) { - oprot.writeFieldBegin(RESULTS_FIELD_DESC); - struct.results.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TFetchResultsRespTupleSchemeFactory implements SchemeFactory { - public TFetchResultsRespTupleScheme getScheme() { - return new TFetchResultsRespTupleScheme(); - } - } - - private static class TFetchResultsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetHasMoreRows()) { - optionals.set(0); - } - if (struct.isSetResults()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetHasMoreRows()) { - oprot.writeBool(struct.hasMoreRows); - } - if (struct.isSetResults()) { - struct.results.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.hasMoreRows = iprot.readBool(); - struct.setHasMoreRowsIsSet(true); - } - if (incoming.get(1)) { - struct.results = new TRowSet(); - struct.results.read(iprot); - struct.setResultsIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java deleted file mode 100644 index cfd157f701b26..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetCatalogsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCatalogsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCatalogsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsReq.class, metaDataMap); - } - - public TGetCatalogsReq() { - } - - public TGetCatalogsReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetCatalogsReq(TGetCatalogsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetCatalogsReq deepCopy() { - return new TGetCatalogsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCatalogsReq) - return this.equals((TGetCatalogsReq)that); - return false; - } - - public boolean equals(TGetCatalogsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetCatalogsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetCatalogsReq typedOther = (TGetCatalogsReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCatalogsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCatalogsReqStandardSchemeFactory implements SchemeFactory { - public TGetCatalogsReqStandardScheme getScheme() { - return new TGetCatalogsReqStandardScheme(); - } - } - - private static class TGetCatalogsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCatalogsReqTupleSchemeFactory implements SchemeFactory { - public TGetCatalogsReqTupleScheme getScheme() { - return new TGetCatalogsReqTupleScheme(); - } - } - - private static class TGetCatalogsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java deleted file mode 100644 index 1c5a35437d416..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetCatalogsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCatalogsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCatalogsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsResp.class, metaDataMap); - } - - public TGetCatalogsResp() { - } - - public TGetCatalogsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetCatalogsResp(TGetCatalogsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetCatalogsResp deepCopy() { - return new TGetCatalogsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCatalogsResp) - return this.equals((TGetCatalogsResp)that); - return false; - } - - public boolean equals(TGetCatalogsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetCatalogsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetCatalogsResp typedOther = (TGetCatalogsResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCatalogsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCatalogsRespStandardSchemeFactory implements SchemeFactory { - public TGetCatalogsRespStandardScheme getScheme() { - return new TGetCatalogsRespStandardScheme(); - } - } - - private static class TGetCatalogsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCatalogsRespTupleSchemeFactory implements SchemeFactory { - public TGetCatalogsRespTupleScheme getScheme() { - return new TGetCatalogsRespTupleScheme(); - } - } - - private static class TGetCatalogsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java deleted file mode 100644 index a2c793bd95927..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java +++ /dev/null @@ -1,818 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetColumnsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetColumnsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetColumnsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String tableName; // optional - private String columnName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - TABLE_NAME((short)4, "tableName"), - COLUMN_NAME((short)5, "columnName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // TABLE_NAME - return TABLE_NAME; - case 5: // COLUMN_NAME - return COLUMN_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.COLUMN_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsReq.class, metaDataMap); - } - - public TGetColumnsReq() { - } - - public TGetColumnsReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetColumnsReq(TGetColumnsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetTableName()) { - this.tableName = other.tableName; - } - if (other.isSetColumnName()) { - this.columnName = other.columnName; - } - } - - public TGetColumnsReq deepCopy() { - return new TGetColumnsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.tableName = null; - this.columnName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getTableName() { - return this.tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public void unsetTableName() { - this.tableName = null; - } - - /** Returns true if field tableName is set (has been assigned a value) and false otherwise */ - public boolean isSetTableName() { - return this.tableName != null; - } - - public void setTableNameIsSet(boolean value) { - if (!value) { - this.tableName = null; - } - } - - public String getColumnName() { - return this.columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public void unsetColumnName() { - this.columnName = null; - } - - /** Returns true if field columnName is set (has been assigned a value) and false otherwise */ - public boolean isSetColumnName() { - return this.columnName != null; - } - - public void setColumnNameIsSet(boolean value) { - if (!value) { - this.columnName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case TABLE_NAME: - if (value == null) { - unsetTableName(); - } else { - setTableName((String)value); - } - break; - - case COLUMN_NAME: - if (value == null) { - unsetColumnName(); - } else { - setColumnName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case TABLE_NAME: - return getTableName(); - - case COLUMN_NAME: - return getColumnName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case TABLE_NAME: - return isSetTableName(); - case COLUMN_NAME: - return isSetColumnName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetColumnsReq) - return this.equals((TGetColumnsReq)that); - return false; - } - - public boolean equals(TGetColumnsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_tableName = true && this.isSetTableName(); - boolean that_present_tableName = true && that.isSetTableName(); - if (this_present_tableName || that_present_tableName) { - if (!(this_present_tableName && that_present_tableName)) - return false; - if (!this.tableName.equals(that.tableName)) - return false; - } - - boolean this_present_columnName = true && this.isSetColumnName(); - boolean that_present_columnName = true && that.isSetColumnName(); - if (this_present_columnName || that_present_columnName) { - if (!(this_present_columnName && that_present_columnName)) - return false; - if (!this.columnName.equals(that.columnName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - builder.append(present_catalogName); - if (present_catalogName) - builder.append(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - builder.append(present_schemaName); - if (present_schemaName) - builder.append(schemaName); - - boolean present_tableName = true && (isSetTableName()); - builder.append(present_tableName); - if (present_tableName) - builder.append(tableName); - - boolean present_columnName = true && (isSetColumnName()); - builder.append(present_columnName); - if (present_columnName) - builder.append(columnName); - - return builder.toHashCode(); - } - - public int compareTo(TGetColumnsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetColumnsReq typedOther = (TGetColumnsReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumnName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetColumnsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (isSetTableName()) { - if (!first) sb.append(", "); - sb.append("tableName:"); - if (this.tableName == null) { - sb.append("null"); - } else { - sb.append(this.tableName); - } - first = false; - } - if (isSetColumnName()) { - if (!first) sb.append(", "); - sb.append("columnName:"); - if (this.columnName == null) { - sb.append("null"); - } else { - sb.append(this.columnName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetColumnsReqStandardSchemeFactory implements SchemeFactory { - public TGetColumnsReqStandardScheme getScheme() { - return new TGetColumnsReqStandardScheme(); - } - } - - private static class TGetColumnsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // COLUMN_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.tableName != null) { - if (struct.isSetTableName()) { - oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.tableName); - oprot.writeFieldEnd(); - } - } - if (struct.columnName != null) { - if (struct.isSetColumnName()) { - oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC); - oprot.writeString(struct.columnName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetColumnsReqTupleSchemeFactory implements SchemeFactory { - public TGetColumnsReqTupleScheme getScheme() { - return new TGetColumnsReqTupleScheme(); - } - } - - private static class TGetColumnsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - if (struct.isSetTableName()) { - optionals.set(2); - } - if (struct.isSetColumnName()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - if (struct.isSetTableName()) { - oprot.writeString(struct.tableName); - } - if (struct.isSetColumnName()) { - oprot.writeString(struct.columnName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } - if (incoming.get(3)) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java deleted file mode 100644 index d6cf1be6d304b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetColumnsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetColumnsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetColumnsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsResp.class, metaDataMap); - } - - public TGetColumnsResp() { - } - - public TGetColumnsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetColumnsResp(TGetColumnsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetColumnsResp deepCopy() { - return new TGetColumnsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetColumnsResp) - return this.equals((TGetColumnsResp)that); - return false; - } - - public boolean equals(TGetColumnsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetColumnsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetColumnsResp typedOther = (TGetColumnsResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetColumnsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetColumnsRespStandardSchemeFactory implements SchemeFactory { - public TGetColumnsRespStandardScheme getScheme() { - return new TGetColumnsRespStandardScheme(); - } - } - - private static class TGetColumnsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetColumnsRespTupleSchemeFactory implements SchemeFactory { - public TGetColumnsRespTupleScheme getScheme() { - return new TGetColumnsRespTupleScheme(); - } - } - - private static class TGetColumnsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java deleted file mode 100644 index 6c6bb00e43e43..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java +++ /dev/null @@ -1,592 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OWNER_FIELD_DESC = new org.apache.thrift.protocol.TField("owner", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField RENEWER_FIELD_DESC = new org.apache.thrift.protocol.TField("renewer", org.apache.thrift.protocol.TType.STRING, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String owner; // required - private String renewer; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - OWNER((short)2, "owner"), - RENEWER((short)3, "renewer"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // OWNER - return OWNER; - case 3: // RENEWER - return RENEWER; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.OWNER, new org.apache.thrift.meta_data.FieldMetaData("owner", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.RENEWER, new org.apache.thrift.meta_data.FieldMetaData("renewer", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenReq.class, metaDataMap); - } - - public TGetDelegationTokenReq() { - } - - public TGetDelegationTokenReq( - TSessionHandle sessionHandle, - String owner, - String renewer) - { - this(); - this.sessionHandle = sessionHandle; - this.owner = owner; - this.renewer = renewer; - } - - /** - * Performs a deep copy on other. - */ - public TGetDelegationTokenReq(TGetDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetOwner()) { - this.owner = other.owner; - } - if (other.isSetRenewer()) { - this.renewer = other.renewer; - } - } - - public TGetDelegationTokenReq deepCopy() { - return new TGetDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.owner = null; - this.renewer = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getOwner() { - return this.owner; - } - - public void setOwner(String owner) { - this.owner = owner; - } - - public void unsetOwner() { - this.owner = null; - } - - /** Returns true if field owner is set (has been assigned a value) and false otherwise */ - public boolean isSetOwner() { - return this.owner != null; - } - - public void setOwnerIsSet(boolean value) { - if (!value) { - this.owner = null; - } - } - - public String getRenewer() { - return this.renewer; - } - - public void setRenewer(String renewer) { - this.renewer = renewer; - } - - public void unsetRenewer() { - this.renewer = null; - } - - /** Returns true if field renewer is set (has been assigned a value) and false otherwise */ - public boolean isSetRenewer() { - return this.renewer != null; - } - - public void setRenewerIsSet(boolean value) { - if (!value) { - this.renewer = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case OWNER: - if (value == null) { - unsetOwner(); - } else { - setOwner((String)value); - } - break; - - case RENEWER: - if (value == null) { - unsetRenewer(); - } else { - setRenewer((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case OWNER: - return getOwner(); - - case RENEWER: - return getRenewer(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case OWNER: - return isSetOwner(); - case RENEWER: - return isSetRenewer(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetDelegationTokenReq) - return this.equals((TGetDelegationTokenReq)that); - return false; - } - - public boolean equals(TGetDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_owner = true && this.isSetOwner(); - boolean that_present_owner = true && that.isSetOwner(); - if (this_present_owner || that_present_owner) { - if (!(this_present_owner && that_present_owner)) - return false; - if (!this.owner.equals(that.owner)) - return false; - } - - boolean this_present_renewer = true && this.isSetRenewer(); - boolean that_present_renewer = true && that.isSetRenewer(); - if (this_present_renewer || that_present_renewer) { - if (!(this_present_renewer && that_present_renewer)) - return false; - if (!this.renewer.equals(that.renewer)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_owner = true && (isSetOwner()); - builder.append(present_owner); - if (present_owner) - builder.append(owner); - - boolean present_renewer = true && (isSetRenewer()); - builder.append(present_renewer); - if (present_renewer) - builder.append(renewer); - - return builder.toHashCode(); - } - - public int compareTo(TGetDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetDelegationTokenReq typedOther = (TGetDelegationTokenReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOwner()).compareTo(typedOther.isSetOwner()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOwner()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.owner, typedOther.owner); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRenewer()).compareTo(typedOther.isSetRenewer()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRenewer()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.renewer, typedOther.renewer); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("owner:"); - if (this.owner == null) { - sb.append("null"); - } else { - sb.append(this.owner); - } - first = false; - if (!first) sb.append(", "); - sb.append("renewer:"); - if (this.renewer == null) { - sb.append("null"); - } else { - sb.append(this.renewer); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetOwner()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'owner' is unset! Struct:" + toString()); - } - - if (!isSetRenewer()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'renewer' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TGetDelegationTokenReqStandardScheme getScheme() { - return new TGetDelegationTokenReqStandardScheme(); - } - } - - private static class TGetDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OWNER - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.owner = iprot.readString(); - struct.setOwnerIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // RENEWER - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.renewer = iprot.readString(); - struct.setRenewerIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.owner != null) { - oprot.writeFieldBegin(OWNER_FIELD_DESC); - oprot.writeString(struct.owner); - oprot.writeFieldEnd(); - } - if (struct.renewer != null) { - oprot.writeFieldBegin(RENEWER_FIELD_DESC); - oprot.writeString(struct.renewer); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TGetDelegationTokenReqTupleScheme getScheme() { - return new TGetDelegationTokenReqTupleScheme(); - } - } - - private static class TGetDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.owner); - oprot.writeString(struct.renewer); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.owner = iprot.readString(); - struct.setOwnerIsSet(true); - struct.renewer = iprot.readString(); - struct.setRenewerIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java deleted file mode 100644 index d14c5e029a35d..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java +++ /dev/null @@ -1,500 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - private String delegationToken; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.DELEGATION_TOKEN}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenResp.class, metaDataMap); - } - - public TGetDelegationTokenResp() { - } - - public TGetDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetDelegationTokenResp(TGetDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TGetDelegationTokenResp deepCopy() { - return new TGetDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - this.delegationToken = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetDelegationTokenResp) - return this.equals((TGetDelegationTokenResp)that); - return false; - } - - public boolean equals(TGetDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_delegationToken = true && (isSetDelegationToken()); - builder.append(present_delegationToken); - if (present_delegationToken) - builder.append(delegationToken); - - return builder.toHashCode(); - } - - public int compareTo(TGetDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetDelegationTokenResp typedOther = (TGetDelegationTokenResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetDelegationToken()) { - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TGetDelegationTokenRespStandardScheme getScheme() { - return new TGetDelegationTokenRespStandardScheme(); - } - } - - private static class TGetDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - if (struct.isSetDelegationToken()) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TGetDelegationTokenRespTupleScheme getScheme() { - return new TGetDelegationTokenRespTupleScheme(); - } - } - - private static class TGetDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetDelegationToken()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetDelegationToken()) { - oprot.writeString(struct.delegationToken); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java deleted file mode 100644 index ff45ee0386cb9..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java +++ /dev/null @@ -1,707 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetFunctionsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField FUNCTION_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("functionName", org.apache.thrift.protocol.TType.STRING, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetFunctionsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetFunctionsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String functionName; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - FUNCTION_NAME((short)4, "functionName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // FUNCTION_NAME - return FUNCTION_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.FUNCTION_NAME, new org.apache.thrift.meta_data.FieldMetaData("functionName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsReq.class, metaDataMap); - } - - public TGetFunctionsReq() { - } - - public TGetFunctionsReq( - TSessionHandle sessionHandle, - String functionName) - { - this(); - this.sessionHandle = sessionHandle; - this.functionName = functionName; - } - - /** - * Performs a deep copy on other. - */ - public TGetFunctionsReq(TGetFunctionsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetFunctionName()) { - this.functionName = other.functionName; - } - } - - public TGetFunctionsReq deepCopy() { - return new TGetFunctionsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.functionName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getFunctionName() { - return this.functionName; - } - - public void setFunctionName(String functionName) { - this.functionName = functionName; - } - - public void unsetFunctionName() { - this.functionName = null; - } - - /** Returns true if field functionName is set (has been assigned a value) and false otherwise */ - public boolean isSetFunctionName() { - return this.functionName != null; - } - - public void setFunctionNameIsSet(boolean value) { - if (!value) { - this.functionName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case FUNCTION_NAME: - if (value == null) { - unsetFunctionName(); - } else { - setFunctionName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case FUNCTION_NAME: - return getFunctionName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case FUNCTION_NAME: - return isSetFunctionName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetFunctionsReq) - return this.equals((TGetFunctionsReq)that); - return false; - } - - public boolean equals(TGetFunctionsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_functionName = true && this.isSetFunctionName(); - boolean that_present_functionName = true && that.isSetFunctionName(); - if (this_present_functionName || that_present_functionName) { - if (!(this_present_functionName && that_present_functionName)) - return false; - if (!this.functionName.equals(that.functionName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - builder.append(present_catalogName); - if (present_catalogName) - builder.append(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - builder.append(present_schemaName); - if (present_schemaName) - builder.append(schemaName); - - boolean present_functionName = true && (isSetFunctionName()); - builder.append(present_functionName); - if (present_functionName) - builder.append(functionName); - - return builder.toHashCode(); - } - - public int compareTo(TGetFunctionsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetFunctionsReq typedOther = (TGetFunctionsReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetFunctionName()).compareTo(typedOther.isSetFunctionName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetFunctionName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.functionName, typedOther.functionName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetFunctionsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (!first) sb.append(", "); - sb.append("functionName:"); - if (this.functionName == null) { - sb.append("null"); - } else { - sb.append(this.functionName); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetFunctionName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'functionName' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetFunctionsReqStandardSchemeFactory implements SchemeFactory { - public TGetFunctionsReqStandardScheme getScheme() { - return new TGetFunctionsReqStandardScheme(); - } - } - - private static class TGetFunctionsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // FUNCTION_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.functionName = iprot.readString(); - struct.setFunctionNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.functionName != null) { - oprot.writeFieldBegin(FUNCTION_NAME_FIELD_DESC); - oprot.writeString(struct.functionName); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetFunctionsReqTupleSchemeFactory implements SchemeFactory { - public TGetFunctionsReqTupleScheme getScheme() { - return new TGetFunctionsReqTupleScheme(); - } - } - - private static class TGetFunctionsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.functionName); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.functionName = iprot.readString(); - struct.setFunctionNameIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java deleted file mode 100644 index 3adafdacb54ef..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetFunctionsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetFunctionsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetFunctionsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsResp.class, metaDataMap); - } - - public TGetFunctionsResp() { - } - - public TGetFunctionsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetFunctionsResp(TGetFunctionsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetFunctionsResp deepCopy() { - return new TGetFunctionsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetFunctionsResp) - return this.equals((TGetFunctionsResp)that); - return false; - } - - public boolean equals(TGetFunctionsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetFunctionsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetFunctionsResp typedOther = (TGetFunctionsResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetFunctionsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetFunctionsRespStandardSchemeFactory implements SchemeFactory { - public TGetFunctionsRespStandardScheme getScheme() { - return new TGetFunctionsRespStandardScheme(); - } - } - - private static class TGetFunctionsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetFunctionsRespTupleSchemeFactory implements SchemeFactory { - public TGetFunctionsRespTupleScheme getScheme() { - return new TGetFunctionsRespTupleScheme(); - } - } - - private static class TGetFunctionsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java deleted file mode 100644 index 0139bf04ec7db..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java +++ /dev/null @@ -1,503 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetInfoReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField INFO_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoType", org.apache.thrift.protocol.TType.I32, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetInfoReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetInfoReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private TGetInfoType infoType; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - /** - * - * @see TGetInfoType - */ - INFO_TYPE((short)2, "infoType"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // INFO_TYPE - return INFO_TYPE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.INFO_TYPE, new org.apache.thrift.meta_data.FieldMetaData("infoType", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TGetInfoType.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoReq.class, metaDataMap); - } - - public TGetInfoReq() { - } - - public TGetInfoReq( - TSessionHandle sessionHandle, - TGetInfoType infoType) - { - this(); - this.sessionHandle = sessionHandle; - this.infoType = infoType; - } - - /** - * Performs a deep copy on other. - */ - public TGetInfoReq(TGetInfoReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetInfoType()) { - this.infoType = other.infoType; - } - } - - public TGetInfoReq deepCopy() { - return new TGetInfoReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.infoType = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - /** - * - * @see TGetInfoType - */ - public TGetInfoType getInfoType() { - return this.infoType; - } - - /** - * - * @see TGetInfoType - */ - public void setInfoType(TGetInfoType infoType) { - this.infoType = infoType; - } - - public void unsetInfoType() { - this.infoType = null; - } - - /** Returns true if field infoType is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoType() { - return this.infoType != null; - } - - public void setInfoTypeIsSet(boolean value) { - if (!value) { - this.infoType = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case INFO_TYPE: - if (value == null) { - unsetInfoType(); - } else { - setInfoType((TGetInfoType)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case INFO_TYPE: - return getInfoType(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case INFO_TYPE: - return isSetInfoType(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetInfoReq) - return this.equals((TGetInfoReq)that); - return false; - } - - public boolean equals(TGetInfoReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_infoType = true && this.isSetInfoType(); - boolean that_present_infoType = true && that.isSetInfoType(); - if (this_present_infoType || that_present_infoType) { - if (!(this_present_infoType && that_present_infoType)) - return false; - if (!this.infoType.equals(that.infoType)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_infoType = true && (isSetInfoType()); - builder.append(present_infoType); - if (present_infoType) - builder.append(infoType.getValue()); - - return builder.toHashCode(); - } - - public int compareTo(TGetInfoReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetInfoReq typedOther = (TGetInfoReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoType()).compareTo(typedOther.isSetInfoType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoType, typedOther.infoType); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetInfoReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("infoType:"); - if (this.infoType == null) { - sb.append("null"); - } else { - sb.append(this.infoType); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetInfoType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoType' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetInfoReqStandardSchemeFactory implements SchemeFactory { - public TGetInfoReqStandardScheme getScheme() { - return new TGetInfoReqStandardScheme(); - } - } - - private static class TGetInfoReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.infoType = TGetInfoType.findByValue(iprot.readI32()); - struct.setInfoTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.infoType != null) { - oprot.writeFieldBegin(INFO_TYPE_FIELD_DESC); - oprot.writeI32(struct.infoType.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetInfoReqTupleSchemeFactory implements SchemeFactory { - public TGetInfoReqTupleScheme getScheme() { - return new TGetInfoReqTupleScheme(); - } - } - - private static class TGetInfoReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeI32(struct.infoType.getValue()); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.infoType = TGetInfoType.findByValue(iprot.readI32()); - struct.setInfoTypeIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java deleted file mode 100644 index 2faaa9211b3ba..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java +++ /dev/null @@ -1,493 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetInfoResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField INFO_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetInfoRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetInfoRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TGetInfoValue infoValue; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - INFO_VALUE((short)2, "infoValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // INFO_VALUE - return INFO_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.INFO_VALUE, new org.apache.thrift.meta_data.FieldMetaData("infoValue", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoValue.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoResp.class, metaDataMap); - } - - public TGetInfoResp() { - } - - public TGetInfoResp( - TStatus status, - TGetInfoValue infoValue) - { - this(); - this.status = status; - this.infoValue = infoValue; - } - - /** - * Performs a deep copy on other. - */ - public TGetInfoResp(TGetInfoResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetInfoValue()) { - this.infoValue = new TGetInfoValue(other.infoValue); - } - } - - public TGetInfoResp deepCopy() { - return new TGetInfoResp(this); - } - - @Override - public void clear() { - this.status = null; - this.infoValue = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TGetInfoValue getInfoValue() { - return this.infoValue; - } - - public void setInfoValue(TGetInfoValue infoValue) { - this.infoValue = infoValue; - } - - public void unsetInfoValue() { - this.infoValue = null; - } - - /** Returns true if field infoValue is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoValue() { - return this.infoValue != null; - } - - public void setInfoValueIsSet(boolean value) { - if (!value) { - this.infoValue = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case INFO_VALUE: - if (value == null) { - unsetInfoValue(); - } else { - setInfoValue((TGetInfoValue)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case INFO_VALUE: - return getInfoValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case INFO_VALUE: - return isSetInfoValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetInfoResp) - return this.equals((TGetInfoResp)that); - return false; - } - - public boolean equals(TGetInfoResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_infoValue = true && this.isSetInfoValue(); - boolean that_present_infoValue = true && that.isSetInfoValue(); - if (this_present_infoValue || that_present_infoValue) { - if (!(this_present_infoValue && that_present_infoValue)) - return false; - if (!this.infoValue.equals(that.infoValue)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_infoValue = true && (isSetInfoValue()); - builder.append(present_infoValue); - if (present_infoValue) - builder.append(infoValue); - - return builder.toHashCode(); - } - - public int compareTo(TGetInfoResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetInfoResp typedOther = (TGetInfoResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoValue()).compareTo(typedOther.isSetInfoValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoValue, typedOther.infoValue); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetInfoResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (!first) sb.append(", "); - sb.append("infoValue:"); - if (this.infoValue == null) { - sb.append("null"); - } else { - sb.append(this.infoValue); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - if (!isSetInfoValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoValue' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetInfoRespStandardSchemeFactory implements SchemeFactory { - public TGetInfoRespStandardScheme getScheme() { - return new TGetInfoRespStandardScheme(); - } - } - - private static class TGetInfoRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.infoValue = new TGetInfoValue(); - struct.infoValue.read(iprot); - struct.setInfoValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.infoValue != null) { - oprot.writeFieldBegin(INFO_VALUE_FIELD_DESC); - struct.infoValue.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetInfoRespTupleSchemeFactory implements SchemeFactory { - public TGetInfoRespTupleScheme getScheme() { - return new TGetInfoRespTupleScheme(); - } - } - - private static class TGetInfoRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - struct.infoValue.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - struct.infoValue = new TGetInfoValue(); - struct.infoValue.read(iprot); - struct.setInfoValueIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java deleted file mode 100644 index d9dd62414f001..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TGetInfoType implements org.apache.thrift.TEnum { - CLI_MAX_DRIVER_CONNECTIONS(0), - CLI_MAX_CONCURRENT_ACTIVITIES(1), - CLI_DATA_SOURCE_NAME(2), - CLI_FETCH_DIRECTION(8), - CLI_SERVER_NAME(13), - CLI_SEARCH_PATTERN_ESCAPE(14), - CLI_DBMS_NAME(17), - CLI_DBMS_VER(18), - CLI_ACCESSIBLE_TABLES(19), - CLI_ACCESSIBLE_PROCEDURES(20), - CLI_CURSOR_COMMIT_BEHAVIOR(23), - CLI_DATA_SOURCE_READ_ONLY(25), - CLI_DEFAULT_TXN_ISOLATION(26), - CLI_IDENTIFIER_CASE(28), - CLI_IDENTIFIER_QUOTE_CHAR(29), - CLI_MAX_COLUMN_NAME_LEN(30), - CLI_MAX_CURSOR_NAME_LEN(31), - CLI_MAX_SCHEMA_NAME_LEN(32), - CLI_MAX_CATALOG_NAME_LEN(34), - CLI_MAX_TABLE_NAME_LEN(35), - CLI_SCROLL_CONCURRENCY(43), - CLI_TXN_CAPABLE(46), - CLI_USER_NAME(47), - CLI_TXN_ISOLATION_OPTION(72), - CLI_INTEGRITY(73), - CLI_GETDATA_EXTENSIONS(81), - CLI_NULL_COLLATION(85), - CLI_ALTER_TABLE(86), - CLI_ORDER_BY_COLUMNS_IN_SELECT(90), - CLI_SPECIAL_CHARACTERS(94), - CLI_MAX_COLUMNS_IN_GROUP_BY(97), - CLI_MAX_COLUMNS_IN_INDEX(98), - CLI_MAX_COLUMNS_IN_ORDER_BY(99), - CLI_MAX_COLUMNS_IN_SELECT(100), - CLI_MAX_COLUMNS_IN_TABLE(101), - CLI_MAX_INDEX_SIZE(102), - CLI_MAX_ROW_SIZE(104), - CLI_MAX_STATEMENT_LEN(105), - CLI_MAX_TABLES_IN_SELECT(106), - CLI_MAX_USER_NAME_LEN(107), - CLI_OJ_CAPABILITIES(115), - CLI_XOPEN_CLI_YEAR(10000), - CLI_CURSOR_SENSITIVITY(10001), - CLI_DESCRIBE_PARAMETER(10002), - CLI_CATALOG_NAME(10003), - CLI_COLLATION_SEQ(10004), - CLI_MAX_IDENTIFIER_LEN(10005); - - private final int value; - - private TGetInfoType(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TGetInfoType findByValue(int value) { - switch (value) { - case 0: - return CLI_MAX_DRIVER_CONNECTIONS; - case 1: - return CLI_MAX_CONCURRENT_ACTIVITIES; - case 2: - return CLI_DATA_SOURCE_NAME; - case 8: - return CLI_FETCH_DIRECTION; - case 13: - return CLI_SERVER_NAME; - case 14: - return CLI_SEARCH_PATTERN_ESCAPE; - case 17: - return CLI_DBMS_NAME; - case 18: - return CLI_DBMS_VER; - case 19: - return CLI_ACCESSIBLE_TABLES; - case 20: - return CLI_ACCESSIBLE_PROCEDURES; - case 23: - return CLI_CURSOR_COMMIT_BEHAVIOR; - case 25: - return CLI_DATA_SOURCE_READ_ONLY; - case 26: - return CLI_DEFAULT_TXN_ISOLATION; - case 28: - return CLI_IDENTIFIER_CASE; - case 29: - return CLI_IDENTIFIER_QUOTE_CHAR; - case 30: - return CLI_MAX_COLUMN_NAME_LEN; - case 31: - return CLI_MAX_CURSOR_NAME_LEN; - case 32: - return CLI_MAX_SCHEMA_NAME_LEN; - case 34: - return CLI_MAX_CATALOG_NAME_LEN; - case 35: - return CLI_MAX_TABLE_NAME_LEN; - case 43: - return CLI_SCROLL_CONCURRENCY; - case 46: - return CLI_TXN_CAPABLE; - case 47: - return CLI_USER_NAME; - case 72: - return CLI_TXN_ISOLATION_OPTION; - case 73: - return CLI_INTEGRITY; - case 81: - return CLI_GETDATA_EXTENSIONS; - case 85: - return CLI_NULL_COLLATION; - case 86: - return CLI_ALTER_TABLE; - case 90: - return CLI_ORDER_BY_COLUMNS_IN_SELECT; - case 94: - return CLI_SPECIAL_CHARACTERS; - case 97: - return CLI_MAX_COLUMNS_IN_GROUP_BY; - case 98: - return CLI_MAX_COLUMNS_IN_INDEX; - case 99: - return CLI_MAX_COLUMNS_IN_ORDER_BY; - case 100: - return CLI_MAX_COLUMNS_IN_SELECT; - case 101: - return CLI_MAX_COLUMNS_IN_TABLE; - case 102: - return CLI_MAX_INDEX_SIZE; - case 104: - return CLI_MAX_ROW_SIZE; - case 105: - return CLI_MAX_STATEMENT_LEN; - case 106: - return CLI_MAX_TABLES_IN_SELECT; - case 107: - return CLI_MAX_USER_NAME_LEN; - case 115: - return CLI_OJ_CAPABILITIES; - case 10000: - return CLI_XOPEN_CLI_YEAR; - case 10001: - return CLI_CURSOR_SENSITIVITY; - case 10002: - return CLI_DESCRIBE_PARAMETER; - case 10003: - return CLI_CATALOG_NAME; - case 10004: - return CLI_COLLATION_SEQ; - case 10005: - return CLI_MAX_IDENTIFIER_LEN; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java deleted file mode 100644 index fe2a211c46309..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java +++ /dev/null @@ -1,593 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetInfoValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoValue"); - private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField SMALL_INT_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("smallIntValue", org.apache.thrift.protocol.TType.I16, (short)2); - private static final org.apache.thrift.protocol.TField INTEGER_BITMASK_FIELD_DESC = new org.apache.thrift.protocol.TField("integerBitmask", org.apache.thrift.protocol.TType.I32, (short)3); - private static final org.apache.thrift.protocol.TField INTEGER_FLAG_FIELD_DESC = new org.apache.thrift.protocol.TField("integerFlag", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField BINARY_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryValue", org.apache.thrift.protocol.TType.I32, (short)5); - private static final org.apache.thrift.protocol.TField LEN_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lenValue", org.apache.thrift.protocol.TType.I64, (short)6); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STRING_VALUE((short)1, "stringValue"), - SMALL_INT_VALUE((short)2, "smallIntValue"), - INTEGER_BITMASK((short)3, "integerBitmask"), - INTEGER_FLAG((short)4, "integerFlag"), - BINARY_VALUE((short)5, "binaryValue"), - LEN_VALUE((short)6, "lenValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STRING_VALUE - return STRING_VALUE; - case 2: // SMALL_INT_VALUE - return SMALL_INT_VALUE; - case 3: // INTEGER_BITMASK - return INTEGER_BITMASK; - case 4: // INTEGER_FLAG - return INTEGER_FLAG; - case 5: // BINARY_VALUE - return BINARY_VALUE; - case 6: // LEN_VALUE - return LEN_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.SMALL_INT_VALUE, new org.apache.thrift.meta_data.FieldMetaData("smallIntValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - tmpMap.put(_Fields.INTEGER_BITMASK, new org.apache.thrift.meta_data.FieldMetaData("integerBitmask", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.INTEGER_FLAG, new org.apache.thrift.meta_data.FieldMetaData("integerFlag", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.BINARY_VALUE, new org.apache.thrift.meta_data.FieldMetaData("binaryValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.LEN_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lenValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoValue.class, metaDataMap); - } - - public TGetInfoValue() { - super(); - } - - public TGetInfoValue(TGetInfoValue._Fields setField, Object value) { - super(setField, value); - } - - public TGetInfoValue(TGetInfoValue other) { - super(other); - } - public TGetInfoValue deepCopy() { - return new TGetInfoValue(this); - } - - public static TGetInfoValue stringValue(String value) { - TGetInfoValue x = new TGetInfoValue(); - x.setStringValue(value); - return x; - } - - public static TGetInfoValue smallIntValue(short value) { - TGetInfoValue x = new TGetInfoValue(); - x.setSmallIntValue(value); - return x; - } - - public static TGetInfoValue integerBitmask(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setIntegerBitmask(value); - return x; - } - - public static TGetInfoValue integerFlag(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setIntegerFlag(value); - return x; - } - - public static TGetInfoValue binaryValue(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setBinaryValue(value); - return x; - } - - public static TGetInfoValue lenValue(long value) { - TGetInfoValue x = new TGetInfoValue(); - x.setLenValue(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case STRING_VALUE: - if (value instanceof String) { - break; - } - throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName()); - case SMALL_INT_VALUE: - if (value instanceof Short) { - break; - } - throw new ClassCastException("Was expecting value of type Short for field 'smallIntValue', but got " + value.getClass().getSimpleName()); - case INTEGER_BITMASK: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'integerBitmask', but got " + value.getClass().getSimpleName()); - case INTEGER_FLAG: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'integerFlag', but got " + value.getClass().getSimpleName()); - case BINARY_VALUE: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'binaryValue', but got " + value.getClass().getSimpleName()); - case LEN_VALUE: - if (value instanceof Long) { - break; - } - throw new ClassCastException("Was expecting value of type Long for field 'lenValue', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case STRING_VALUE: - if (field.type == STRING_VALUE_FIELD_DESC.type) { - String stringValue; - stringValue = iprot.readString(); - return stringValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case SMALL_INT_VALUE: - if (field.type == SMALL_INT_VALUE_FIELD_DESC.type) { - Short smallIntValue; - smallIntValue = iprot.readI16(); - return smallIntValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case INTEGER_BITMASK: - if (field.type == INTEGER_BITMASK_FIELD_DESC.type) { - Integer integerBitmask; - integerBitmask = iprot.readI32(); - return integerBitmask; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case INTEGER_FLAG: - if (field.type == INTEGER_FLAG_FIELD_DESC.type) { - Integer integerFlag; - integerFlag = iprot.readI32(); - return integerFlag; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BINARY_VALUE: - if (field.type == BINARY_VALUE_FIELD_DESC.type) { - Integer binaryValue; - binaryValue = iprot.readI32(); - return binaryValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case LEN_VALUE: - if (field.type == LEN_VALUE_FIELD_DESC.type) { - Long lenValue; - lenValue = iprot.readI64(); - return lenValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - case SMALL_INT_VALUE: - Short smallIntValue = (Short)value_; - oprot.writeI16(smallIntValue); - return; - case INTEGER_BITMASK: - Integer integerBitmask = (Integer)value_; - oprot.writeI32(integerBitmask); - return; - case INTEGER_FLAG: - Integer integerFlag = (Integer)value_; - oprot.writeI32(integerFlag); - return; - case BINARY_VALUE: - Integer binaryValue = (Integer)value_; - oprot.writeI32(binaryValue); - return; - case LEN_VALUE: - Long lenValue = (Long)value_; - oprot.writeI64(lenValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case STRING_VALUE: - String stringValue; - stringValue = iprot.readString(); - return stringValue; - case SMALL_INT_VALUE: - Short smallIntValue; - smallIntValue = iprot.readI16(); - return smallIntValue; - case INTEGER_BITMASK: - Integer integerBitmask; - integerBitmask = iprot.readI32(); - return integerBitmask; - case INTEGER_FLAG: - Integer integerFlag; - integerFlag = iprot.readI32(); - return integerFlag; - case BINARY_VALUE: - Integer binaryValue; - binaryValue = iprot.readI32(); - return binaryValue; - case LEN_VALUE: - Long lenValue; - lenValue = iprot.readI64(); - return lenValue; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - case SMALL_INT_VALUE: - Short smallIntValue = (Short)value_; - oprot.writeI16(smallIntValue); - return; - case INTEGER_BITMASK: - Integer integerBitmask = (Integer)value_; - oprot.writeI32(integerBitmask); - return; - case INTEGER_FLAG: - Integer integerFlag = (Integer)value_; - oprot.writeI32(integerFlag); - return; - case BINARY_VALUE: - Integer binaryValue = (Integer)value_; - oprot.writeI32(binaryValue); - return; - case LEN_VALUE: - Long lenValue = (Long)value_; - oprot.writeI64(lenValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case STRING_VALUE: - return STRING_VALUE_FIELD_DESC; - case SMALL_INT_VALUE: - return SMALL_INT_VALUE_FIELD_DESC; - case INTEGER_BITMASK: - return INTEGER_BITMASK_FIELD_DESC; - case INTEGER_FLAG: - return INTEGER_FLAG_FIELD_DESC; - case BINARY_VALUE: - return BINARY_VALUE_FIELD_DESC; - case LEN_VALUE: - return LEN_VALUE_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public String getStringValue() { - if (getSetField() == _Fields.STRING_VALUE) { - return (String)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringValue(String value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VALUE; - value_ = value; - } - - public short getSmallIntValue() { - if (getSetField() == _Fields.SMALL_INT_VALUE) { - return (Short)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'smallIntValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setSmallIntValue(short value) { - setField_ = _Fields.SMALL_INT_VALUE; - value_ = value; - } - - public int getIntegerBitmask() { - if (getSetField() == _Fields.INTEGER_BITMASK) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'integerBitmask' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setIntegerBitmask(int value) { - setField_ = _Fields.INTEGER_BITMASK; - value_ = value; - } - - public int getIntegerFlag() { - if (getSetField() == _Fields.INTEGER_FLAG) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'integerFlag' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setIntegerFlag(int value) { - setField_ = _Fields.INTEGER_FLAG; - value_ = value; - } - - public int getBinaryValue() { - if (getSetField() == _Fields.BINARY_VALUE) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'binaryValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBinaryValue(int value) { - setField_ = _Fields.BINARY_VALUE; - value_ = value; - } - - public long getLenValue() { - if (getSetField() == _Fields.LEN_VALUE) { - return (Long)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'lenValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setLenValue(long value) { - setField_ = _Fields.LEN_VALUE; - value_ = value; - } - - public boolean isSetStringValue() { - return setField_ == _Fields.STRING_VALUE; - } - - - public boolean isSetSmallIntValue() { - return setField_ == _Fields.SMALL_INT_VALUE; - } - - - public boolean isSetIntegerBitmask() { - return setField_ == _Fields.INTEGER_BITMASK; - } - - - public boolean isSetIntegerFlag() { - return setField_ == _Fields.INTEGER_FLAG; - } - - - public boolean isSetBinaryValue() { - return setField_ == _Fields.BINARY_VALUE; - } - - - public boolean isSetLenValue() { - return setField_ == _Fields.LEN_VALUE; - } - - - public boolean equals(Object other) { - if (other instanceof TGetInfoValue) { - return equals((TGetInfoValue)other); - } else { - return false; - } - } - - public boolean equals(TGetInfoValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TGetInfoValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(); - hcb.append(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - hcb.append(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - hcb.append(value); - } - } - return hcb.toHashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java deleted file mode 100644 index b88591ea1945b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetOperationStatusReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetOperationStatusReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetOperationStatusReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusReq.class, metaDataMap); - } - - public TGetOperationStatusReq() { - } - - public TGetOperationStatusReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetOperationStatusReq(TGetOperationStatusReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetOperationStatusReq deepCopy() { - return new TGetOperationStatusReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetOperationStatusReq) - return this.equals((TGetOperationStatusReq)that); - return false; - } - - public boolean equals(TGetOperationStatusReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetOperationStatusReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetOperationStatusReq typedOther = (TGetOperationStatusReq)other; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetOperationStatusReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetOperationStatusReqStandardSchemeFactory implements SchemeFactory { - public TGetOperationStatusReqStandardScheme getScheme() { - return new TGetOperationStatusReqStandardScheme(); - } - } - - private static class TGetOperationStatusReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetOperationStatusReqTupleSchemeFactory implements SchemeFactory { - public TGetOperationStatusReqTupleScheme getScheme() { - return new TGetOperationStatusReqTupleScheme(); - } - } - - private static class TGetOperationStatusReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java deleted file mode 100644 index 94ba6bb1146de..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java +++ /dev/null @@ -1,827 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetOperationStatusResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationState", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetOperationStatusRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetOperationStatusRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationState operationState; // optional - private String sqlState; // optional - private int errorCode; // optional - private String errorMessage; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - /** - * - * @see TOperationState - */ - OPERATION_STATE((short)2, "operationState"), - SQL_STATE((short)3, "sqlState"), - ERROR_CODE((short)4, "errorCode"), - ERROR_MESSAGE((short)5, "errorMessage"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_STATE - return OPERATION_STATE; - case 3: // SQL_STATE - return SQL_STATE; - case 4: // ERROR_CODE - return ERROR_CODE; - case 5: // ERROR_MESSAGE - return ERROR_MESSAGE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __ERRORCODE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.OPERATION_STATE,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_STATE, new org.apache.thrift.meta_data.FieldMetaData("operationState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationState.class))); - tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusResp.class, metaDataMap); - } - - public TGetOperationStatusResp() { - } - - public TGetOperationStatusResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetOperationStatusResp(TGetOperationStatusResp other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationState()) { - this.operationState = other.operationState; - } - if (other.isSetSqlState()) { - this.sqlState = other.sqlState; - } - this.errorCode = other.errorCode; - if (other.isSetErrorMessage()) { - this.errorMessage = other.errorMessage; - } - } - - public TGetOperationStatusResp deepCopy() { - return new TGetOperationStatusResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationState = null; - this.sqlState = null; - setErrorCodeIsSet(false); - this.errorCode = 0; - this.errorMessage = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - /** - * - * @see TOperationState - */ - public TOperationState getOperationState() { - return this.operationState; - } - - /** - * - * @see TOperationState - */ - public void setOperationState(TOperationState operationState) { - this.operationState = operationState; - } - - public void unsetOperationState() { - this.operationState = null; - } - - /** Returns true if field operationState is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationState() { - return this.operationState != null; - } - - public void setOperationStateIsSet(boolean value) { - if (!value) { - this.operationState = null; - } - } - - public String getSqlState() { - return this.sqlState; - } - - public void setSqlState(String sqlState) { - this.sqlState = sqlState; - } - - public void unsetSqlState() { - this.sqlState = null; - } - - /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */ - public boolean isSetSqlState() { - return this.sqlState != null; - } - - public void setSqlStateIsSet(boolean value) { - if (!value) { - this.sqlState = null; - } - } - - public int getErrorCode() { - return this.errorCode; - } - - public void setErrorCode(int errorCode) { - this.errorCode = errorCode; - setErrorCodeIsSet(true); - } - - public void unsetErrorCode() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorCode() { - return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - public void setErrorCodeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value); - } - - public String getErrorMessage() { - return this.errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public void unsetErrorMessage() { - this.errorMessage = null; - } - - /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorMessage() { - return this.errorMessage != null; - } - - public void setErrorMessageIsSet(boolean value) { - if (!value) { - this.errorMessage = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_STATE: - if (value == null) { - unsetOperationState(); - } else { - setOperationState((TOperationState)value); - } - break; - - case SQL_STATE: - if (value == null) { - unsetSqlState(); - } else { - setSqlState((String)value); - } - break; - - case ERROR_CODE: - if (value == null) { - unsetErrorCode(); - } else { - setErrorCode((Integer)value); - } - break; - - case ERROR_MESSAGE: - if (value == null) { - unsetErrorMessage(); - } else { - setErrorMessage((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_STATE: - return getOperationState(); - - case SQL_STATE: - return getSqlState(); - - case ERROR_CODE: - return Integer.valueOf(getErrorCode()); - - case ERROR_MESSAGE: - return getErrorMessage(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_STATE: - return isSetOperationState(); - case SQL_STATE: - return isSetSqlState(); - case ERROR_CODE: - return isSetErrorCode(); - case ERROR_MESSAGE: - return isSetErrorMessage(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetOperationStatusResp) - return this.equals((TGetOperationStatusResp)that); - return false; - } - - public boolean equals(TGetOperationStatusResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationState = true && this.isSetOperationState(); - boolean that_present_operationState = true && that.isSetOperationState(); - if (this_present_operationState || that_present_operationState) { - if (!(this_present_operationState && that_present_operationState)) - return false; - if (!this.operationState.equals(that.operationState)) - return false; - } - - boolean this_present_sqlState = true && this.isSetSqlState(); - boolean that_present_sqlState = true && that.isSetSqlState(); - if (this_present_sqlState || that_present_sqlState) { - if (!(this_present_sqlState && that_present_sqlState)) - return false; - if (!this.sqlState.equals(that.sqlState)) - return false; - } - - boolean this_present_errorCode = true && this.isSetErrorCode(); - boolean that_present_errorCode = true && that.isSetErrorCode(); - if (this_present_errorCode || that_present_errorCode) { - if (!(this_present_errorCode && that_present_errorCode)) - return false; - if (this.errorCode != that.errorCode) - return false; - } - - boolean this_present_errorMessage = true && this.isSetErrorMessage(); - boolean that_present_errorMessage = true && that.isSetErrorMessage(); - if (this_present_errorMessage || that_present_errorMessage) { - if (!(this_present_errorMessage && that_present_errorMessage)) - return false; - if (!this.errorMessage.equals(that.errorMessage)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationState = true && (isSetOperationState()); - builder.append(present_operationState); - if (present_operationState) - builder.append(operationState.getValue()); - - boolean present_sqlState = true && (isSetSqlState()); - builder.append(present_sqlState); - if (present_sqlState) - builder.append(sqlState); - - boolean present_errorCode = true && (isSetErrorCode()); - builder.append(present_errorCode); - if (present_errorCode) - builder.append(errorCode); - - boolean present_errorMessage = true && (isSetErrorMessage()); - builder.append(present_errorMessage); - if (present_errorMessage) - builder.append(errorMessage); - - return builder.toHashCode(); - } - - public int compareTo(TGetOperationStatusResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetOperationStatusResp typedOther = (TGetOperationStatusResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationState()).compareTo(typedOther.isSetOperationState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationState, typedOther.operationState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSqlState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorMessage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetOperationStatusResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationState()) { - if (!first) sb.append(", "); - sb.append("operationState:"); - if (this.operationState == null) { - sb.append("null"); - } else { - sb.append(this.operationState); - } - first = false; - } - if (isSetSqlState()) { - if (!first) sb.append(", "); - sb.append("sqlState:"); - if (this.sqlState == null) { - sb.append("null"); - } else { - sb.append(this.sqlState); - } - first = false; - } - if (isSetErrorCode()) { - if (!first) sb.append(", "); - sb.append("errorCode:"); - sb.append(this.errorCode); - first = false; - } - if (isSetErrorMessage()) { - if (!first) sb.append(", "); - sb.append("errorMessage:"); - if (this.errorMessage == null) { - sb.append("null"); - } else { - sb.append(this.errorMessage); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetOperationStatusRespStandardSchemeFactory implements SchemeFactory { - public TGetOperationStatusRespStandardScheme getScheme() { - return new TGetOperationStatusRespStandardScheme(); - } - } - - private static class TGetOperationStatusRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.operationState = TOperationState.findByValue(iprot.readI32()); - struct.setOperationStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SQL_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // ERROR_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // ERROR_MESSAGE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationState != null) { - if (struct.isSetOperationState()) { - oprot.writeFieldBegin(OPERATION_STATE_FIELD_DESC); - oprot.writeI32(struct.operationState.getValue()); - oprot.writeFieldEnd(); - } - } - if (struct.sqlState != null) { - if (struct.isSetSqlState()) { - oprot.writeFieldBegin(SQL_STATE_FIELD_DESC); - oprot.writeString(struct.sqlState); - oprot.writeFieldEnd(); - } - } - if (struct.isSetErrorCode()) { - oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC); - oprot.writeI32(struct.errorCode); - oprot.writeFieldEnd(); - } - if (struct.errorMessage != null) { - if (struct.isSetErrorMessage()) { - oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC); - oprot.writeString(struct.errorMessage); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetOperationStatusRespTupleSchemeFactory implements SchemeFactory { - public TGetOperationStatusRespTupleScheme getScheme() { - return new TGetOperationStatusRespTupleScheme(); - } - } - - private static class TGetOperationStatusRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationState()) { - optionals.set(0); - } - if (struct.isSetSqlState()) { - optionals.set(1); - } - if (struct.isSetErrorCode()) { - optionals.set(2); - } - if (struct.isSetErrorMessage()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetOperationState()) { - oprot.writeI32(struct.operationState.getValue()); - } - if (struct.isSetSqlState()) { - oprot.writeString(struct.sqlState); - } - if (struct.isSetErrorCode()) { - oprot.writeI32(struct.errorCode); - } - if (struct.isSetErrorMessage()) { - oprot.writeString(struct.errorMessage); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - struct.operationState = TOperationState.findByValue(iprot.readI32()); - struct.setOperationStateIsSet(true); - } - if (incoming.get(1)) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } - if (incoming.get(2)) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } - if (incoming.get(3)) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java deleted file mode 100644 index 3bf363c958468..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetResultSetMetadataReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetResultSetMetadataReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetResultSetMetadataReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataReq.class, metaDataMap); - } - - public TGetResultSetMetadataReq() { - } - - public TGetResultSetMetadataReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetResultSetMetadataReq(TGetResultSetMetadataReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetResultSetMetadataReq deepCopy() { - return new TGetResultSetMetadataReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetResultSetMetadataReq) - return this.equals((TGetResultSetMetadataReq)that); - return false; - } - - public boolean equals(TGetResultSetMetadataReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetResultSetMetadataReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetResultSetMetadataReq typedOther = (TGetResultSetMetadataReq)other; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetResultSetMetadataReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetResultSetMetadataReqStandardSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataReqStandardScheme getScheme() { - return new TGetResultSetMetadataReqStandardScheme(); - } - } - - private static class TGetResultSetMetadataReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetResultSetMetadataReqTupleSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataReqTupleScheme getScheme() { - return new TGetResultSetMetadataReqTupleScheme(); - } - } - - private static class TGetResultSetMetadataReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java deleted file mode 100644 index a9bef9f722c16..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetResultSetMetadataResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField SCHEMA_FIELD_DESC = new org.apache.thrift.protocol.TField("schema", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetResultSetMetadataRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetResultSetMetadataRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TTableSchema schema; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - SCHEMA((short)2, "schema"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // SCHEMA - return SCHEMA; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.SCHEMA}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.SCHEMA, new org.apache.thrift.meta_data.FieldMetaData("schema", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTableSchema.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataResp.class, metaDataMap); - } - - public TGetResultSetMetadataResp() { - } - - public TGetResultSetMetadataResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetResultSetMetadataResp(TGetResultSetMetadataResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetSchema()) { - this.schema = new TTableSchema(other.schema); - } - } - - public TGetResultSetMetadataResp deepCopy() { - return new TGetResultSetMetadataResp(this); - } - - @Override - public void clear() { - this.status = null; - this.schema = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TTableSchema getSchema() { - return this.schema; - } - - public void setSchema(TTableSchema schema) { - this.schema = schema; - } - - public void unsetSchema() { - this.schema = null; - } - - /** Returns true if field schema is set (has been assigned a value) and false otherwise */ - public boolean isSetSchema() { - return this.schema != null; - } - - public void setSchemaIsSet(boolean value) { - if (!value) { - this.schema = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case SCHEMA: - if (value == null) { - unsetSchema(); - } else { - setSchema((TTableSchema)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case SCHEMA: - return getSchema(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case SCHEMA: - return isSetSchema(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetResultSetMetadataResp) - return this.equals((TGetResultSetMetadataResp)that); - return false; - } - - public boolean equals(TGetResultSetMetadataResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_schema = true && this.isSetSchema(); - boolean that_present_schema = true && that.isSetSchema(); - if (this_present_schema || that_present_schema) { - if (!(this_present_schema && that_present_schema)) - return false; - if (!this.schema.equals(that.schema)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_schema = true && (isSetSchema()); - builder.append(present_schema); - if (present_schema) - builder.append(schema); - - return builder.toHashCode(); - } - - public int compareTo(TGetResultSetMetadataResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetResultSetMetadataResp typedOther = (TGetResultSetMetadataResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchema()).compareTo(typedOther.isSetSchema()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchema()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schema, typedOther.schema); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetResultSetMetadataResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetSchema()) { - if (!first) sb.append(", "); - sb.append("schema:"); - if (this.schema == null) { - sb.append("null"); - } else { - sb.append(this.schema); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (schema != null) { - schema.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetResultSetMetadataRespStandardSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataRespStandardScheme getScheme() { - return new TGetResultSetMetadataRespStandardScheme(); - } - } - - private static class TGetResultSetMetadataRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SCHEMA - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.schema = new TTableSchema(); - struct.schema.read(iprot); - struct.setSchemaIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.schema != null) { - if (struct.isSetSchema()) { - oprot.writeFieldBegin(SCHEMA_FIELD_DESC); - struct.schema.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetResultSetMetadataRespTupleSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataRespTupleScheme getScheme() { - return new TGetResultSetMetadataRespTupleScheme(); - } - } - - private static class TGetResultSetMetadataRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetSchema()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSchema()) { - struct.schema.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.schema = new TTableSchema(); - struct.schema.read(iprot); - struct.setSchemaIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java deleted file mode 100644 index c2aadaa49a1e9..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java +++ /dev/null @@ -1,606 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetSchemasReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetSchemasReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetSchemasReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasReq.class, metaDataMap); - } - - public TGetSchemasReq() { - } - - public TGetSchemasReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetSchemasReq(TGetSchemasReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - } - - public TGetSchemasReq deepCopy() { - return new TGetSchemasReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetSchemasReq) - return this.equals((TGetSchemasReq)that); - return false; - } - - public boolean equals(TGetSchemasReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - builder.append(present_catalogName); - if (present_catalogName) - builder.append(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - builder.append(present_schemaName); - if (present_schemaName) - builder.append(schemaName); - - return builder.toHashCode(); - } - - public int compareTo(TGetSchemasReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetSchemasReq typedOther = (TGetSchemasReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetSchemasReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetSchemasReqStandardSchemeFactory implements SchemeFactory { - public TGetSchemasReqStandardScheme getScheme() { - return new TGetSchemasReqStandardScheme(); - } - } - - private static class TGetSchemasReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetSchemasReqTupleSchemeFactory implements SchemeFactory { - public TGetSchemasReqTupleScheme getScheme() { - return new TGetSchemasReqTupleScheme(); - } - } - - private static class TGetSchemasReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java deleted file mode 100644 index ac1ea3e7cc7af..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetSchemasResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetSchemasRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetSchemasRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasResp.class, metaDataMap); - } - - public TGetSchemasResp() { - } - - public TGetSchemasResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetSchemasResp(TGetSchemasResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetSchemasResp deepCopy() { - return new TGetSchemasResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetSchemasResp) - return this.equals((TGetSchemasResp)that); - return false; - } - - public boolean equals(TGetSchemasResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetSchemasResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetSchemasResp typedOther = (TGetSchemasResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetSchemasResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetSchemasRespStandardSchemeFactory implements SchemeFactory { - public TGetSchemasRespStandardScheme getScheme() { - return new TGetSchemasRespStandardScheme(); - } - } - - private static class TGetSchemasRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetSchemasRespTupleSchemeFactory implements SchemeFactory { - public TGetSchemasRespTupleScheme getScheme() { - return new TGetSchemasRespTupleScheme(); - } - } - - private static class TGetSchemasRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java deleted file mode 100644 index 6f2c713e0be6a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTableTypesReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTableTypesReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTableTypesReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesReq.class, metaDataMap); - } - - public TGetTableTypesReq() { - } - - public TGetTableTypesReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTableTypesReq(TGetTableTypesReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetTableTypesReq deepCopy() { - return new TGetTableTypesReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTableTypesReq) - return this.equals((TGetTableTypesReq)that); - return false; - } - - public boolean equals(TGetTableTypesReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetTableTypesReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTableTypesReq typedOther = (TGetTableTypesReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTableTypesReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTableTypesReqStandardSchemeFactory implements SchemeFactory { - public TGetTableTypesReqStandardScheme getScheme() { - return new TGetTableTypesReqStandardScheme(); - } - } - - private static class TGetTableTypesReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTableTypesReqTupleSchemeFactory implements SchemeFactory { - public TGetTableTypesReqTupleScheme getScheme() { - return new TGetTableTypesReqTupleScheme(); - } - } - - private static class TGetTableTypesReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java deleted file mode 100644 index 6f33fbcf5dadc..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTableTypesResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTableTypesRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTableTypesRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesResp.class, metaDataMap); - } - - public TGetTableTypesResp() { - } - - public TGetTableTypesResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTableTypesResp(TGetTableTypesResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTableTypesResp deepCopy() { - return new TGetTableTypesResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTableTypesResp) - return this.equals((TGetTableTypesResp)that); - return false; - } - - public boolean equals(TGetTableTypesResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetTableTypesResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTableTypesResp typedOther = (TGetTableTypesResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTableTypesResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTableTypesRespStandardSchemeFactory implements SchemeFactory { - public TGetTableTypesRespStandardScheme getScheme() { - return new TGetTableTypesRespStandardScheme(); - } - } - - private static class TGetTableTypesRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTableTypesRespTupleSchemeFactory implements SchemeFactory { - public TGetTableTypesRespTupleScheme getScheme() { - return new TGetTableTypesRespTupleScheme(); - } - } - - private static class TGetTableTypesRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java deleted file mode 100644 index c973fcc24cb10..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java +++ /dev/null @@ -1,870 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTablesReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField TABLE_TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("tableTypes", org.apache.thrift.protocol.TType.LIST, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTablesReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTablesReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String tableName; // optional - private List tableTypes; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - TABLE_NAME((short)4, "tableName"), - TABLE_TYPES((short)5, "tableTypes"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // TABLE_NAME - return TABLE_NAME; - case 5: // TABLE_TYPES - return TABLE_TYPES; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.TABLE_TYPES}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_TYPES, new org.apache.thrift.meta_data.FieldMetaData("tableTypes", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesReq.class, metaDataMap); - } - - public TGetTablesReq() { - } - - public TGetTablesReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTablesReq(TGetTablesReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetTableName()) { - this.tableName = other.tableName; - } - if (other.isSetTableTypes()) { - List __this__tableTypes = new ArrayList(); - for (String other_element : other.tableTypes) { - __this__tableTypes.add(other_element); - } - this.tableTypes = __this__tableTypes; - } - } - - public TGetTablesReq deepCopy() { - return new TGetTablesReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.tableName = null; - this.tableTypes = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getTableName() { - return this.tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public void unsetTableName() { - this.tableName = null; - } - - /** Returns true if field tableName is set (has been assigned a value) and false otherwise */ - public boolean isSetTableName() { - return this.tableName != null; - } - - public void setTableNameIsSet(boolean value) { - if (!value) { - this.tableName = null; - } - } - - public int getTableTypesSize() { - return (this.tableTypes == null) ? 0 : this.tableTypes.size(); - } - - public java.util.Iterator getTableTypesIterator() { - return (this.tableTypes == null) ? null : this.tableTypes.iterator(); - } - - public void addToTableTypes(String elem) { - if (this.tableTypes == null) { - this.tableTypes = new ArrayList(); - } - this.tableTypes.add(elem); - } - - public List getTableTypes() { - return this.tableTypes; - } - - public void setTableTypes(List tableTypes) { - this.tableTypes = tableTypes; - } - - public void unsetTableTypes() { - this.tableTypes = null; - } - - /** Returns true if field tableTypes is set (has been assigned a value) and false otherwise */ - public boolean isSetTableTypes() { - return this.tableTypes != null; - } - - public void setTableTypesIsSet(boolean value) { - if (!value) { - this.tableTypes = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case TABLE_NAME: - if (value == null) { - unsetTableName(); - } else { - setTableName((String)value); - } - break; - - case TABLE_TYPES: - if (value == null) { - unsetTableTypes(); - } else { - setTableTypes((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case TABLE_NAME: - return getTableName(); - - case TABLE_TYPES: - return getTableTypes(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case TABLE_NAME: - return isSetTableName(); - case TABLE_TYPES: - return isSetTableTypes(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTablesReq) - return this.equals((TGetTablesReq)that); - return false; - } - - public boolean equals(TGetTablesReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_tableName = true && this.isSetTableName(); - boolean that_present_tableName = true && that.isSetTableName(); - if (this_present_tableName || that_present_tableName) { - if (!(this_present_tableName && that_present_tableName)) - return false; - if (!this.tableName.equals(that.tableName)) - return false; - } - - boolean this_present_tableTypes = true && this.isSetTableTypes(); - boolean that_present_tableTypes = true && that.isSetTableTypes(); - if (this_present_tableTypes || that_present_tableTypes) { - if (!(this_present_tableTypes && that_present_tableTypes)) - return false; - if (!this.tableTypes.equals(that.tableTypes)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - builder.append(present_catalogName); - if (present_catalogName) - builder.append(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - builder.append(present_schemaName); - if (present_schemaName) - builder.append(schemaName); - - boolean present_tableName = true && (isSetTableName()); - builder.append(present_tableName); - if (present_tableName) - builder.append(tableName); - - boolean present_tableTypes = true && (isSetTableTypes()); - builder.append(present_tableTypes); - if (present_tableTypes) - builder.append(tableTypes); - - return builder.toHashCode(); - } - - public int compareTo(TGetTablesReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTablesReq typedOther = (TGetTablesReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableTypes()).compareTo(typedOther.isSetTableTypes()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableTypes()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableTypes, typedOther.tableTypes); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTablesReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (isSetTableName()) { - if (!first) sb.append(", "); - sb.append("tableName:"); - if (this.tableName == null) { - sb.append("null"); - } else { - sb.append(this.tableName); - } - first = false; - } - if (isSetTableTypes()) { - if (!first) sb.append(", "); - sb.append("tableTypes:"); - if (this.tableTypes == null) { - sb.append("null"); - } else { - sb.append(this.tableTypes); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTablesReqStandardSchemeFactory implements SchemeFactory { - public TGetTablesReqStandardScheme getScheme() { - return new TGetTablesReqStandardScheme(); - } - } - - private static class TGetTablesReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // TABLE_TYPES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list172 = iprot.readListBegin(); - struct.tableTypes = new ArrayList(_list172.size); - for (int _i173 = 0; _i173 < _list172.size; ++_i173) - { - String _elem174; // optional - _elem174 = iprot.readString(); - struct.tableTypes.add(_elem174); - } - iprot.readListEnd(); - } - struct.setTableTypesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.tableName != null) { - if (struct.isSetTableName()) { - oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.tableName); - oprot.writeFieldEnd(); - } - } - if (struct.tableTypes != null) { - if (struct.isSetTableTypes()) { - oprot.writeFieldBegin(TABLE_TYPES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.tableTypes.size())); - for (String _iter175 : struct.tableTypes) - { - oprot.writeString(_iter175); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTablesReqTupleSchemeFactory implements SchemeFactory { - public TGetTablesReqTupleScheme getScheme() { - return new TGetTablesReqTupleScheme(); - } - } - - private static class TGetTablesReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - if (struct.isSetTableName()) { - optionals.set(2); - } - if (struct.isSetTableTypes()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - if (struct.isSetTableName()) { - oprot.writeString(struct.tableName); - } - if (struct.isSetTableTypes()) { - { - oprot.writeI32(struct.tableTypes.size()); - for (String _iter176 : struct.tableTypes) - { - oprot.writeString(_iter176); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } - if (incoming.get(3)) { - { - org.apache.thrift.protocol.TList _list177 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.tableTypes = new ArrayList(_list177.size); - for (int _i178 = 0; _i178 < _list177.size; ++_i178) - { - String _elem179; // optional - _elem179 = iprot.readString(); - struct.tableTypes.add(_elem179); - } - } - struct.setTableTypesIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java deleted file mode 100644 index d526f4478a24e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTablesResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTablesRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTablesRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesResp.class, metaDataMap); - } - - public TGetTablesResp() { - } - - public TGetTablesResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTablesResp(TGetTablesResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTablesResp deepCopy() { - return new TGetTablesResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTablesResp) - return this.equals((TGetTablesResp)that); - return false; - } - - public boolean equals(TGetTablesResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetTablesResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTablesResp typedOther = (TGetTablesResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTablesResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTablesRespStandardSchemeFactory implements SchemeFactory { - public TGetTablesRespStandardScheme getScheme() { - return new TGetTablesRespStandardScheme(); - } - } - - private static class TGetTablesRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTablesRespTupleSchemeFactory implements SchemeFactory { - public TGetTablesRespTupleScheme getScheme() { - return new TGetTablesRespTupleScheme(); - } - } - - private static class TGetTablesRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java deleted file mode 100644 index d40115e83ec45..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTypeInfoReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTypeInfoReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTypeInfoReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoReq.class, metaDataMap); - } - - public TGetTypeInfoReq() { - } - - public TGetTypeInfoReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTypeInfoReq(TGetTypeInfoReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetTypeInfoReq deepCopy() { - return new TGetTypeInfoReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTypeInfoReq) - return this.equals((TGetTypeInfoReq)that); - return false; - } - - public boolean equals(TGetTypeInfoReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetTypeInfoReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTypeInfoReq typedOther = (TGetTypeInfoReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTypeInfoReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTypeInfoReqStandardSchemeFactory implements SchemeFactory { - public TGetTypeInfoReqStandardScheme getScheme() { - return new TGetTypeInfoReqStandardScheme(); - } - } - - private static class TGetTypeInfoReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTypeInfoReqTupleSchemeFactory implements SchemeFactory { - public TGetTypeInfoReqTupleScheme getScheme() { - return new TGetTypeInfoReqTupleScheme(); - } - } - - private static class TGetTypeInfoReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java deleted file mode 100644 index 59be1a33b55e2..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TGetTypeInfoResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTypeInfoRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTypeInfoRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoResp.class, metaDataMap); - } - - public TGetTypeInfoResp() { - } - - public TGetTypeInfoResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTypeInfoResp(TGetTypeInfoResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTypeInfoResp deepCopy() { - return new TGetTypeInfoResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTypeInfoResp) - return this.equals((TGetTypeInfoResp)that); - return false; - } - - public boolean equals(TGetTypeInfoResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - builder.append(present_operationHandle); - if (present_operationHandle) - builder.append(operationHandle); - - return builder.toHashCode(); - } - - public int compareTo(TGetTypeInfoResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TGetTypeInfoResp typedOther = (TGetTypeInfoResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTypeInfoResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTypeInfoRespStandardSchemeFactory implements SchemeFactory { - public TGetTypeInfoRespStandardScheme getScheme() { - return new TGetTypeInfoRespStandardScheme(); - } - } - - private static class TGetTypeInfoRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTypeInfoRespTupleSchemeFactory implements SchemeFactory { - public TGetTypeInfoRespTupleScheme getScheme() { - return new TGetTypeInfoRespTupleScheme(); - } - } - - private static class TGetTypeInfoRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java deleted file mode 100644 index 368273c341c7b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java +++ /dev/null @@ -1,506 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class THandleIdentifier implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("THandleIdentifier"); - - private static final org.apache.thrift.protocol.TField GUID_FIELD_DESC = new org.apache.thrift.protocol.TField("guid", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField SECRET_FIELD_DESC = new org.apache.thrift.protocol.TField("secret", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new THandleIdentifierStandardSchemeFactory()); - schemes.put(TupleScheme.class, new THandleIdentifierTupleSchemeFactory()); - } - - private ByteBuffer guid; // required - private ByteBuffer secret; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - GUID((short)1, "guid"), - SECRET((short)2, "secret"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // GUID - return GUID; - case 2: // SECRET - return SECRET; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.GUID, new org.apache.thrift.meta_data.FieldMetaData("guid", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - tmpMap.put(_Fields.SECRET, new org.apache.thrift.meta_data.FieldMetaData("secret", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(THandleIdentifier.class, metaDataMap); - } - - public THandleIdentifier() { - } - - public THandleIdentifier( - ByteBuffer guid, - ByteBuffer secret) - { - this(); - this.guid = guid; - this.secret = secret; - } - - /** - * Performs a deep copy on other. - */ - public THandleIdentifier(THandleIdentifier other) { - if (other.isSetGuid()) { - this.guid = org.apache.thrift.TBaseHelper.copyBinary(other.guid); -; - } - if (other.isSetSecret()) { - this.secret = org.apache.thrift.TBaseHelper.copyBinary(other.secret); -; - } - } - - public THandleIdentifier deepCopy() { - return new THandleIdentifier(this); - } - - @Override - public void clear() { - this.guid = null; - this.secret = null; - } - - public byte[] getGuid() { - setGuid(org.apache.thrift.TBaseHelper.rightSize(guid)); - return guid == null ? null : guid.array(); - } - - public ByteBuffer bufferForGuid() { - return guid; - } - - public void setGuid(byte[] guid) { - setGuid(guid == null ? (ByteBuffer)null : ByteBuffer.wrap(guid)); - } - - public void setGuid(ByteBuffer guid) { - this.guid = guid; - } - - public void unsetGuid() { - this.guid = null; - } - - /** Returns true if field guid is set (has been assigned a value) and false otherwise */ - public boolean isSetGuid() { - return this.guid != null; - } - - public void setGuidIsSet(boolean value) { - if (!value) { - this.guid = null; - } - } - - public byte[] getSecret() { - setSecret(org.apache.thrift.TBaseHelper.rightSize(secret)); - return secret == null ? null : secret.array(); - } - - public ByteBuffer bufferForSecret() { - return secret; - } - - public void setSecret(byte[] secret) { - setSecret(secret == null ? (ByteBuffer)null : ByteBuffer.wrap(secret)); - } - - public void setSecret(ByteBuffer secret) { - this.secret = secret; - } - - public void unsetSecret() { - this.secret = null; - } - - /** Returns true if field secret is set (has been assigned a value) and false otherwise */ - public boolean isSetSecret() { - return this.secret != null; - } - - public void setSecretIsSet(boolean value) { - if (!value) { - this.secret = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case GUID: - if (value == null) { - unsetGuid(); - } else { - setGuid((ByteBuffer)value); - } - break; - - case SECRET: - if (value == null) { - unsetSecret(); - } else { - setSecret((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case GUID: - return getGuid(); - - case SECRET: - return getSecret(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case GUID: - return isSetGuid(); - case SECRET: - return isSetSecret(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof THandleIdentifier) - return this.equals((THandleIdentifier)that); - return false; - } - - public boolean equals(THandleIdentifier that) { - if (that == null) - return false; - - boolean this_present_guid = true && this.isSetGuid(); - boolean that_present_guid = true && that.isSetGuid(); - if (this_present_guid || that_present_guid) { - if (!(this_present_guid && that_present_guid)) - return false; - if (!this.guid.equals(that.guid)) - return false; - } - - boolean this_present_secret = true && this.isSetSecret(); - boolean that_present_secret = true && that.isSetSecret(); - if (this_present_secret || that_present_secret) { - if (!(this_present_secret && that_present_secret)) - return false; - if (!this.secret.equals(that.secret)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_guid = true && (isSetGuid()); - builder.append(present_guid); - if (present_guid) - builder.append(guid); - - boolean present_secret = true && (isSetSecret()); - builder.append(present_secret); - if (present_secret) - builder.append(secret); - - return builder.toHashCode(); - } - - public int compareTo(THandleIdentifier other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - THandleIdentifier typedOther = (THandleIdentifier)other; - - lastComparison = Boolean.valueOf(isSetGuid()).compareTo(typedOther.isSetGuid()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetGuid()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.guid, typedOther.guid); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSecret()).compareTo(typedOther.isSetSecret()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSecret()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secret, typedOther.secret); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("THandleIdentifier("); - boolean first = true; - - sb.append("guid:"); - if (this.guid == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.guid, sb); - } - first = false; - if (!first) sb.append(", "); - sb.append("secret:"); - if (this.secret == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.secret, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetGuid()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'guid' is unset! Struct:" + toString()); - } - - if (!isSetSecret()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'secret' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class THandleIdentifierStandardSchemeFactory implements SchemeFactory { - public THandleIdentifierStandardScheme getScheme() { - return new THandleIdentifierStandardScheme(); - } - } - - private static class THandleIdentifierStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, THandleIdentifier struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // GUID - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.guid = iprot.readBinary(); - struct.setGuidIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SECRET - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.secret = iprot.readBinary(); - struct.setSecretIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, THandleIdentifier struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.guid != null) { - oprot.writeFieldBegin(GUID_FIELD_DESC); - oprot.writeBinary(struct.guid); - oprot.writeFieldEnd(); - } - if (struct.secret != null) { - oprot.writeFieldBegin(SECRET_FIELD_DESC); - oprot.writeBinary(struct.secret); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class THandleIdentifierTupleSchemeFactory implements SchemeFactory { - public THandleIdentifierTupleScheme getScheme() { - return new THandleIdentifierTupleScheme(); - } - } - - private static class THandleIdentifierTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeBinary(struct.guid); - oprot.writeBinary(struct.secret); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.guid = iprot.readBinary(); - struct.setGuidIsSet(true); - struct.secret = iprot.readBinary(); - struct.setSecretIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java deleted file mode 100644 index c83663072f877..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI16Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI16ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI16ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Column.class, metaDataMap); - } - - public TI16Column() { - } - - public TI16Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TI16Column(TI16Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Short other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TI16Column deepCopy() { - return new TI16Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(short elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI16Column) - return this.equals((TI16Column)that); - return false; - } - - public boolean equals(TI16Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TI16Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI16Column typedOther = (TI16Column)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI16Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI16ColumnStandardSchemeFactory implements SchemeFactory { - public TI16ColumnStandardScheme getScheme() { - return new TI16ColumnStandardScheme(); - } - } - - private static class TI16ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list70 = iprot.readListBegin(); - struct.values = new ArrayList(_list70.size); - for (int _i71 = 0; _i71 < _list70.size; ++_i71) - { - short _elem72; // optional - _elem72 = iprot.readI16(); - struct.values.add(_elem72); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, struct.values.size())); - for (short _iter73 : struct.values) - { - oprot.writeI16(_iter73); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI16ColumnTupleSchemeFactory implements SchemeFactory { - public TI16ColumnTupleScheme getScheme() { - return new TI16ColumnTupleScheme(); - } - } - - private static class TI16ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (short _iter74 : struct.values) - { - oprot.writeI16(_iter74); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list75 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, iprot.readI32()); - struct.values = new ArrayList(_list75.size); - for (int _i76 = 0; _i76 < _list75.size; ++_i76) - { - short _elem77; // optional - _elem77 = iprot.readI16(); - struct.values.add(_elem77); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java deleted file mode 100644 index bb5ae9609de86..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI16Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I16, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI16ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI16ValueTupleSchemeFactory()); - } - - private short value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Value.class, metaDataMap); - } - - public TI16Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI16Value(TI16Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI16Value deepCopy() { - return new TI16Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public short getValue() { - return this.value; - } - - public void setValue(short value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Short)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Short.valueOf(getValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI16Value) - return this.equals((TI16Value)that); - return false; - } - - public boolean equals(TI16Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TI16Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI16Value typedOther = (TI16Value)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI16Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI16ValueStandardSchemeFactory implements SchemeFactory { - public TI16ValueStandardScheme getScheme() { - return new TI16ValueStandardScheme(); - } - } - - private static class TI16ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I16) { - struct.value = iprot.readI16(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI16(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI16ValueTupleSchemeFactory implements SchemeFactory { - public TI16ValueTupleScheme getScheme() { - return new TI16ValueTupleScheme(); - } - } - - private static class TI16ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI16(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI16(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java deleted file mode 100644 index 6c6c5f35b7c8e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI32Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI32ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI32ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Column.class, metaDataMap); - } - - public TI32Column() { - } - - public TI32Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TI32Column(TI32Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Integer other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TI32Column deepCopy() { - return new TI32Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(int elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI32Column) - return this.equals((TI32Column)that); - return false; - } - - public boolean equals(TI32Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TI32Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI32Column typedOther = (TI32Column)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI32Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI32ColumnStandardSchemeFactory implements SchemeFactory { - public TI32ColumnStandardScheme getScheme() { - return new TI32ColumnStandardScheme(); - } - } - - private static class TI32ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list78 = iprot.readListBegin(); - struct.values = new ArrayList(_list78.size); - for (int _i79 = 0; _i79 < _list78.size; ++_i79) - { - int _elem80; // optional - _elem80 = iprot.readI32(); - struct.values.add(_elem80); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.values.size())); - for (int _iter81 : struct.values) - { - oprot.writeI32(_iter81); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI32ColumnTupleSchemeFactory implements SchemeFactory { - public TI32ColumnTupleScheme getScheme() { - return new TI32ColumnTupleScheme(); - } - } - - private static class TI32ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (int _iter82 : struct.values) - { - oprot.writeI32(_iter82); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list83 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.values = new ArrayList(_list83.size); - for (int _i84 = 0; _i84 < _list83.size; ++_i84) - { - int _elem85; // optional - _elem85 = iprot.readI32(); - struct.values.add(_elem85); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java deleted file mode 100644 index 059408b96c8ce..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI32Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I32, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI32ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI32ValueTupleSchemeFactory()); - } - - private int value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Value.class, metaDataMap); - } - - public TI32Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI32Value(TI32Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI32Value deepCopy() { - return new TI32Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public int getValue() { - return this.value; - } - - public void setValue(int value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Integer.valueOf(getValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI32Value) - return this.equals((TI32Value)that); - return false; - } - - public boolean equals(TI32Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TI32Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI32Value typedOther = (TI32Value)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI32Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI32ValueStandardSchemeFactory implements SchemeFactory { - public TI32ValueStandardScheme getScheme() { - return new TI32ValueStandardScheme(); - } - } - - private static class TI32ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.value = iprot.readI32(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI32(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI32ValueTupleSchemeFactory implements SchemeFactory { - public TI32ValueTupleScheme getScheme() { - return new TI32ValueTupleScheme(); - } - } - - private static class TI32ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI32(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI32(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java deleted file mode 100644 index cc383ed089fa4..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI64Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI64ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI64ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Column.class, metaDataMap); - } - - public TI64Column() { - } - - public TI64Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TI64Column(TI64Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (Long other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TI64Column deepCopy() { - return new TI64Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(long elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI64Column) - return this.equals((TI64Column)that); - return false; - } - - public boolean equals(TI64Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TI64Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI64Column typedOther = (TI64Column)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI64Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI64ColumnStandardSchemeFactory implements SchemeFactory { - public TI64ColumnStandardScheme getScheme() { - return new TI64ColumnStandardScheme(); - } - } - - private static class TI64ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list86 = iprot.readListBegin(); - struct.values = new ArrayList(_list86.size); - for (int _i87 = 0; _i87 < _list86.size; ++_i87) - { - long _elem88; // optional - _elem88 = iprot.readI64(); - struct.values.add(_elem88); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, struct.values.size())); - for (long _iter89 : struct.values) - { - oprot.writeI64(_iter89); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI64ColumnTupleSchemeFactory implements SchemeFactory { - public TI64ColumnTupleScheme getScheme() { - return new TI64ColumnTupleScheme(); - } - } - - private static class TI64ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (long _iter90 : struct.values) - { - oprot.writeI64(_iter90); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list91 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, iprot.readI32()); - struct.values = new ArrayList(_list91.size); - for (int _i92 = 0; _i92 < _list91.size; ++_i92) - { - long _elem93; // optional - _elem93 = iprot.readI64(); - struct.values.add(_elem93); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java deleted file mode 100644 index 9a941cce0c077..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java +++ /dev/null @@ -1,386 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TI64Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I64, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI64ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI64ValueTupleSchemeFactory()); - } - - private long value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Value.class, metaDataMap); - } - - public TI64Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI64Value(TI64Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI64Value deepCopy() { - return new TI64Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public long getValue() { - return this.value; - } - - public void setValue(long value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Long)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return Long.valueOf(getValue()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI64Value) - return this.equals((TI64Value)that); - return false; - } - - public boolean equals(TI64Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TI64Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TI64Value typedOther = (TI64Value)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI64Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI64ValueStandardSchemeFactory implements SchemeFactory { - public TI64ValueStandardScheme getScheme() { - return new TI64ValueStandardScheme(); - } - } - - private static class TI64ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.value = iprot.readI64(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI64(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI64ValueTupleSchemeFactory implements SchemeFactory { - public TI64ValueTupleScheme getScheme() { - return new TI64ValueTupleScheme(); - } - } - - private static class TI64ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI64(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI64(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java deleted file mode 100644 index 425603cbdecbd..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java +++ /dev/null @@ -1,478 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TMapTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TMapTypeEntry"); - - private static final org.apache.thrift.protocol.TField KEY_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("keyTypePtr", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField VALUE_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("valueTypePtr", org.apache.thrift.protocol.TType.I32, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TMapTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TMapTypeEntryTupleSchemeFactory()); - } - - private int keyTypePtr; // required - private int valueTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - KEY_TYPE_PTR((short)1, "keyTypePtr"), - VALUE_TYPE_PTR((short)2, "valueTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // KEY_TYPE_PTR - return KEY_TYPE_PTR; - case 2: // VALUE_TYPE_PTR - return VALUE_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __KEYTYPEPTR_ISSET_ID = 0; - private static final int __VALUETYPEPTR_ISSET_ID = 1; - private byte __isset_bitfield = 0; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.KEY_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("keyTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - tmpMap.put(_Fields.VALUE_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("valueTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TMapTypeEntry.class, metaDataMap); - } - - public TMapTypeEntry() { - } - - public TMapTypeEntry( - int keyTypePtr, - int valueTypePtr) - { - this(); - this.keyTypePtr = keyTypePtr; - setKeyTypePtrIsSet(true); - this.valueTypePtr = valueTypePtr; - setValueTypePtrIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TMapTypeEntry(TMapTypeEntry other) { - __isset_bitfield = other.__isset_bitfield; - this.keyTypePtr = other.keyTypePtr; - this.valueTypePtr = other.valueTypePtr; - } - - public TMapTypeEntry deepCopy() { - return new TMapTypeEntry(this); - } - - @Override - public void clear() { - setKeyTypePtrIsSet(false); - this.keyTypePtr = 0; - setValueTypePtrIsSet(false); - this.valueTypePtr = 0; - } - - public int getKeyTypePtr() { - return this.keyTypePtr; - } - - public void setKeyTypePtr(int keyTypePtr) { - this.keyTypePtr = keyTypePtr; - setKeyTypePtrIsSet(true); - } - - public void unsetKeyTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID); - } - - /** Returns true if field keyTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetKeyTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID); - } - - public void setKeyTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID, value); - } - - public int getValueTypePtr() { - return this.valueTypePtr; - } - - public void setValueTypePtr(int valueTypePtr) { - this.valueTypePtr = valueTypePtr; - setValueTypePtrIsSet(true); - } - - public void unsetValueTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID); - } - - /** Returns true if field valueTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetValueTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID); - } - - public void setValueTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case KEY_TYPE_PTR: - if (value == null) { - unsetKeyTypePtr(); - } else { - setKeyTypePtr((Integer)value); - } - break; - - case VALUE_TYPE_PTR: - if (value == null) { - unsetValueTypePtr(); - } else { - setValueTypePtr((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case KEY_TYPE_PTR: - return Integer.valueOf(getKeyTypePtr()); - - case VALUE_TYPE_PTR: - return Integer.valueOf(getValueTypePtr()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case KEY_TYPE_PTR: - return isSetKeyTypePtr(); - case VALUE_TYPE_PTR: - return isSetValueTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TMapTypeEntry) - return this.equals((TMapTypeEntry)that); - return false; - } - - public boolean equals(TMapTypeEntry that) { - if (that == null) - return false; - - boolean this_present_keyTypePtr = true; - boolean that_present_keyTypePtr = true; - if (this_present_keyTypePtr || that_present_keyTypePtr) { - if (!(this_present_keyTypePtr && that_present_keyTypePtr)) - return false; - if (this.keyTypePtr != that.keyTypePtr) - return false; - } - - boolean this_present_valueTypePtr = true; - boolean that_present_valueTypePtr = true; - if (this_present_valueTypePtr || that_present_valueTypePtr) { - if (!(this_present_valueTypePtr && that_present_valueTypePtr)) - return false; - if (this.valueTypePtr != that.valueTypePtr) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_keyTypePtr = true; - builder.append(present_keyTypePtr); - if (present_keyTypePtr) - builder.append(keyTypePtr); - - boolean present_valueTypePtr = true; - builder.append(present_valueTypePtr); - if (present_valueTypePtr) - builder.append(valueTypePtr); - - return builder.toHashCode(); - } - - public int compareTo(TMapTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TMapTypeEntry typedOther = (TMapTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetKeyTypePtr()).compareTo(typedOther.isSetKeyTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetKeyTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.keyTypePtr, typedOther.keyTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetValueTypePtr()).compareTo(typedOther.isSetValueTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValueTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.valueTypePtr, typedOther.valueTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TMapTypeEntry("); - boolean first = true; - - sb.append("keyTypePtr:"); - sb.append(this.keyTypePtr); - first = false; - if (!first) sb.append(", "); - sb.append("valueTypePtr:"); - sb.append(this.valueTypePtr); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetKeyTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'keyTypePtr' is unset! Struct:" + toString()); - } - - if (!isSetValueTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'valueTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TMapTypeEntryStandardSchemeFactory implements SchemeFactory { - public TMapTypeEntryStandardScheme getScheme() { - return new TMapTypeEntryStandardScheme(); - } - } - - private static class TMapTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TMapTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // KEY_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.keyTypePtr = iprot.readI32(); - struct.setKeyTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // VALUE_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.valueTypePtr = iprot.readI32(); - struct.setValueTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TMapTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(KEY_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.keyTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(VALUE_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.valueTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TMapTypeEntryTupleSchemeFactory implements SchemeFactory { - public TMapTypeEntryTupleScheme getScheme() { - return new TMapTypeEntryTupleScheme(); - } - } - - private static class TMapTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.keyTypePtr); - oprot.writeI32(struct.valueTypePtr); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.keyTypePtr = iprot.readI32(); - struct.setKeyTypePtrIsSet(true); - struct.valueTypePtr = iprot.readI32(); - struct.setValueTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java deleted file mode 100644 index c0481615b06d3..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java +++ /dev/null @@ -1,785 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TOpenSessionReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionReq"); - - private static final org.apache.thrift.protocol.TField CLIENT_PROTOCOL_FIELD_DESC = new org.apache.thrift.protocol.TField("client_protocol", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField USERNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("username", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField PASSWORD_FIELD_DESC = new org.apache.thrift.protocol.TField("password", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOpenSessionReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOpenSessionReqTupleSchemeFactory()); - } - - private TProtocolVersion client_protocol; // required - private String username; // optional - private String password; // optional - private Map configuration; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TProtocolVersion - */ - CLIENT_PROTOCOL((short)1, "client_protocol"), - USERNAME((short)2, "username"), - PASSWORD((short)3, "password"), - CONFIGURATION((short)4, "configuration"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // CLIENT_PROTOCOL - return CLIENT_PROTOCOL; - case 2: // USERNAME - return USERNAME; - case 3: // PASSWORD - return PASSWORD; - case 4: // CONFIGURATION - return CONFIGURATION; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.USERNAME,_Fields.PASSWORD,_Fields.CONFIGURATION}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.CLIENT_PROTOCOL, new org.apache.thrift.meta_data.FieldMetaData("client_protocol", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class))); - tmpMap.put(_Fields.USERNAME, new org.apache.thrift.meta_data.FieldMetaData("username", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.PASSWORD, new org.apache.thrift.meta_data.FieldMetaData("password", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionReq.class, metaDataMap); - } - - public TOpenSessionReq() { - this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; - - } - - public TOpenSessionReq( - TProtocolVersion client_protocol) - { - this(); - this.client_protocol = client_protocol; - } - - /** - * Performs a deep copy on other. - */ - public TOpenSessionReq(TOpenSessionReq other) { - if (other.isSetClient_protocol()) { - this.client_protocol = other.client_protocol; - } - if (other.isSetUsername()) { - this.username = other.username; - } - if (other.isSetPassword()) { - this.password = other.password; - } - if (other.isSetConfiguration()) { - Map __this__configuration = new HashMap(); - for (Map.Entry other_element : other.configuration.entrySet()) { - - String other_element_key = other_element.getKey(); - String other_element_value = other_element.getValue(); - - String __this__configuration_copy_key = other_element_key; - - String __this__configuration_copy_value = other_element_value; - - __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value); - } - this.configuration = __this__configuration; - } - } - - public TOpenSessionReq deepCopy() { - return new TOpenSessionReq(this); - } - - @Override - public void clear() { - this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; - - this.username = null; - this.password = null; - this.configuration = null; - } - - /** - * - * @see TProtocolVersion - */ - public TProtocolVersion getClient_protocol() { - return this.client_protocol; - } - - /** - * - * @see TProtocolVersion - */ - public void setClient_protocol(TProtocolVersion client_protocol) { - this.client_protocol = client_protocol; - } - - public void unsetClient_protocol() { - this.client_protocol = null; - } - - /** Returns true if field client_protocol is set (has been assigned a value) and false otherwise */ - public boolean isSetClient_protocol() { - return this.client_protocol != null; - } - - public void setClient_protocolIsSet(boolean value) { - if (!value) { - this.client_protocol = null; - } - } - - public String getUsername() { - return this.username; - } - - public void setUsername(String username) { - this.username = username; - } - - public void unsetUsername() { - this.username = null; - } - - /** Returns true if field username is set (has been assigned a value) and false otherwise */ - public boolean isSetUsername() { - return this.username != null; - } - - public void setUsernameIsSet(boolean value) { - if (!value) { - this.username = null; - } - } - - public String getPassword() { - return this.password; - } - - public void setPassword(String password) { - this.password = password; - } - - public void unsetPassword() { - this.password = null; - } - - /** Returns true if field password is set (has been assigned a value) and false otherwise */ - public boolean isSetPassword() { - return this.password != null; - } - - public void setPasswordIsSet(boolean value) { - if (!value) { - this.password = null; - } - } - - public int getConfigurationSize() { - return (this.configuration == null) ? 0 : this.configuration.size(); - } - - public void putToConfiguration(String key, String val) { - if (this.configuration == null) { - this.configuration = new HashMap(); - } - this.configuration.put(key, val); - } - - public Map getConfiguration() { - return this.configuration; - } - - public void setConfiguration(Map configuration) { - this.configuration = configuration; - } - - public void unsetConfiguration() { - this.configuration = null; - } - - /** Returns true if field configuration is set (has been assigned a value) and false otherwise */ - public boolean isSetConfiguration() { - return this.configuration != null; - } - - public void setConfigurationIsSet(boolean value) { - if (!value) { - this.configuration = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case CLIENT_PROTOCOL: - if (value == null) { - unsetClient_protocol(); - } else { - setClient_protocol((TProtocolVersion)value); - } - break; - - case USERNAME: - if (value == null) { - unsetUsername(); - } else { - setUsername((String)value); - } - break; - - case PASSWORD: - if (value == null) { - unsetPassword(); - } else { - setPassword((String)value); - } - break; - - case CONFIGURATION: - if (value == null) { - unsetConfiguration(); - } else { - setConfiguration((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case CLIENT_PROTOCOL: - return getClient_protocol(); - - case USERNAME: - return getUsername(); - - case PASSWORD: - return getPassword(); - - case CONFIGURATION: - return getConfiguration(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case CLIENT_PROTOCOL: - return isSetClient_protocol(); - case USERNAME: - return isSetUsername(); - case PASSWORD: - return isSetPassword(); - case CONFIGURATION: - return isSetConfiguration(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOpenSessionReq) - return this.equals((TOpenSessionReq)that); - return false; - } - - public boolean equals(TOpenSessionReq that) { - if (that == null) - return false; - - boolean this_present_client_protocol = true && this.isSetClient_protocol(); - boolean that_present_client_protocol = true && that.isSetClient_protocol(); - if (this_present_client_protocol || that_present_client_protocol) { - if (!(this_present_client_protocol && that_present_client_protocol)) - return false; - if (!this.client_protocol.equals(that.client_protocol)) - return false; - } - - boolean this_present_username = true && this.isSetUsername(); - boolean that_present_username = true && that.isSetUsername(); - if (this_present_username || that_present_username) { - if (!(this_present_username && that_present_username)) - return false; - if (!this.username.equals(that.username)) - return false; - } - - boolean this_present_password = true && this.isSetPassword(); - boolean that_present_password = true && that.isSetPassword(); - if (this_present_password || that_present_password) { - if (!(this_present_password && that_present_password)) - return false; - if (!this.password.equals(that.password)) - return false; - } - - boolean this_present_configuration = true && this.isSetConfiguration(); - boolean that_present_configuration = true && that.isSetConfiguration(); - if (this_present_configuration || that_present_configuration) { - if (!(this_present_configuration && that_present_configuration)) - return false; - if (!this.configuration.equals(that.configuration)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_client_protocol = true && (isSetClient_protocol()); - builder.append(present_client_protocol); - if (present_client_protocol) - builder.append(client_protocol.getValue()); - - boolean present_username = true && (isSetUsername()); - builder.append(present_username); - if (present_username) - builder.append(username); - - boolean present_password = true && (isSetPassword()); - builder.append(present_password); - if (present_password) - builder.append(password); - - boolean present_configuration = true && (isSetConfiguration()); - builder.append(present_configuration); - if (present_configuration) - builder.append(configuration); - - return builder.toHashCode(); - } - - public int compareTo(TOpenSessionReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TOpenSessionReq typedOther = (TOpenSessionReq)other; - - lastComparison = Boolean.valueOf(isSetClient_protocol()).compareTo(typedOther.isSetClient_protocol()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetClient_protocol()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.client_protocol, typedOther.client_protocol); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetUsername()).compareTo(typedOther.isSetUsername()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetUsername()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.username, typedOther.username); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetPassword()).compareTo(typedOther.isSetPassword()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetPassword()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.password, typedOther.password); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfiguration()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOpenSessionReq("); - boolean first = true; - - sb.append("client_protocol:"); - if (this.client_protocol == null) { - sb.append("null"); - } else { - sb.append(this.client_protocol); - } - first = false; - if (isSetUsername()) { - if (!first) sb.append(", "); - sb.append("username:"); - if (this.username == null) { - sb.append("null"); - } else { - sb.append(this.username); - } - first = false; - } - if (isSetPassword()) { - if (!first) sb.append(", "); - sb.append("password:"); - if (this.password == null) { - sb.append("null"); - } else { - sb.append(this.password); - } - first = false; - } - if (isSetConfiguration()) { - if (!first) sb.append(", "); - sb.append("configuration:"); - if (this.configuration == null) { - sb.append("null"); - } else { - sb.append(this.configuration); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetClient_protocol()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'client_protocol' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOpenSessionReqStandardSchemeFactory implements SchemeFactory { - public TOpenSessionReqStandardScheme getScheme() { - return new TOpenSessionReqStandardScheme(); - } - } - - private static class TOpenSessionReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // CLIENT_PROTOCOL - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32()); - struct.setClient_protocolIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // USERNAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.username = iprot.readString(); - struct.setUsernameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // PASSWORD - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.password = iprot.readString(); - struct.setPasswordIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // CONFIGURATION - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map142 = iprot.readMapBegin(); - struct.configuration = new HashMap(2*_map142.size); - for (int _i143 = 0; _i143 < _map142.size; ++_i143) - { - String _key144; // required - String _val145; // required - _key144 = iprot.readString(); - _val145 = iprot.readString(); - struct.configuration.put(_key144, _val145); - } - iprot.readMapEnd(); - } - struct.setConfigurationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.client_protocol != null) { - oprot.writeFieldBegin(CLIENT_PROTOCOL_FIELD_DESC); - oprot.writeI32(struct.client_protocol.getValue()); - oprot.writeFieldEnd(); - } - if (struct.username != null) { - if (struct.isSetUsername()) { - oprot.writeFieldBegin(USERNAME_FIELD_DESC); - oprot.writeString(struct.username); - oprot.writeFieldEnd(); - } - } - if (struct.password != null) { - if (struct.isSetPassword()) { - oprot.writeFieldBegin(PASSWORD_FIELD_DESC); - oprot.writeString(struct.password); - oprot.writeFieldEnd(); - } - } - if (struct.configuration != null) { - if (struct.isSetConfiguration()) { - oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size())); - for (Map.Entry _iter146 : struct.configuration.entrySet()) - { - oprot.writeString(_iter146.getKey()); - oprot.writeString(_iter146.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOpenSessionReqTupleSchemeFactory implements SchemeFactory { - public TOpenSessionReqTupleScheme getScheme() { - return new TOpenSessionReqTupleScheme(); - } - } - - private static class TOpenSessionReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.client_protocol.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetUsername()) { - optionals.set(0); - } - if (struct.isSetPassword()) { - optionals.set(1); - } - if (struct.isSetConfiguration()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); - if (struct.isSetUsername()) { - oprot.writeString(struct.username); - } - if (struct.isSetPassword()) { - oprot.writeString(struct.password); - } - if (struct.isSetConfiguration()) { - { - oprot.writeI32(struct.configuration.size()); - for (Map.Entry _iter147 : struct.configuration.entrySet()) - { - oprot.writeString(_iter147.getKey()); - oprot.writeString(_iter147.getValue()); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32()); - struct.setClient_protocolIsSet(true); - BitSet incoming = iprot.readBitSet(3); - if (incoming.get(0)) { - struct.username = iprot.readString(); - struct.setUsernameIsSet(true); - } - if (incoming.get(1)) { - struct.password = iprot.readString(); - struct.setPasswordIsSet(true); - } - if (incoming.get(2)) { - { - org.apache.thrift.protocol.TMap _map148 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.configuration = new HashMap(2*_map148.size); - for (int _i149 = 0; _i149 < _map148.size; ++_i149) - { - String _key150; // required - String _val151; // required - _key150 = iprot.readString(); - _val151 = iprot.readString(); - struct.configuration.put(_key150, _val151); - } - } - struct.setConfigurationIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java deleted file mode 100644 index 351f78b2de20c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java +++ /dev/null @@ -1,790 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TOpenSessionResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField SERVER_PROTOCOL_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("serverProtocolVersion", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOpenSessionRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOpenSessionRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TProtocolVersion serverProtocolVersion; // required - private TSessionHandle sessionHandle; // optional - private Map configuration; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - /** - * - * @see TProtocolVersion - */ - SERVER_PROTOCOL_VERSION((short)2, "serverProtocolVersion"), - SESSION_HANDLE((short)3, "sessionHandle"), - CONFIGURATION((short)4, "configuration"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // SERVER_PROTOCOL_VERSION - return SERVER_PROTOCOL_VERSION; - case 3: // SESSION_HANDLE - return SESSION_HANDLE; - case 4: // CONFIGURATION - return CONFIGURATION; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.SESSION_HANDLE,_Fields.CONFIGURATION}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.SERVER_PROTOCOL_VERSION, new org.apache.thrift.meta_data.FieldMetaData("serverProtocolVersion", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class))); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionResp.class, metaDataMap); - } - - public TOpenSessionResp() { - this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; - - } - - public TOpenSessionResp( - TStatus status, - TProtocolVersion serverProtocolVersion) - { - this(); - this.status = status; - this.serverProtocolVersion = serverProtocolVersion; - } - - /** - * Performs a deep copy on other. - */ - public TOpenSessionResp(TOpenSessionResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetServerProtocolVersion()) { - this.serverProtocolVersion = other.serverProtocolVersion; - } - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetConfiguration()) { - Map __this__configuration = new HashMap(); - for (Map.Entry other_element : other.configuration.entrySet()) { - - String other_element_key = other_element.getKey(); - String other_element_value = other_element.getValue(); - - String __this__configuration_copy_key = other_element_key; - - String __this__configuration_copy_value = other_element_value; - - __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value); - } - this.configuration = __this__configuration; - } - } - - public TOpenSessionResp deepCopy() { - return new TOpenSessionResp(this); - } - - @Override - public void clear() { - this.status = null; - this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; - - this.sessionHandle = null; - this.configuration = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - /** - * - * @see TProtocolVersion - */ - public TProtocolVersion getServerProtocolVersion() { - return this.serverProtocolVersion; - } - - /** - * - * @see TProtocolVersion - */ - public void setServerProtocolVersion(TProtocolVersion serverProtocolVersion) { - this.serverProtocolVersion = serverProtocolVersion; - } - - public void unsetServerProtocolVersion() { - this.serverProtocolVersion = null; - } - - /** Returns true if field serverProtocolVersion is set (has been assigned a value) and false otherwise */ - public boolean isSetServerProtocolVersion() { - return this.serverProtocolVersion != null; - } - - public void setServerProtocolVersionIsSet(boolean value) { - if (!value) { - this.serverProtocolVersion = null; - } - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public int getConfigurationSize() { - return (this.configuration == null) ? 0 : this.configuration.size(); - } - - public void putToConfiguration(String key, String val) { - if (this.configuration == null) { - this.configuration = new HashMap(); - } - this.configuration.put(key, val); - } - - public Map getConfiguration() { - return this.configuration; - } - - public void setConfiguration(Map configuration) { - this.configuration = configuration; - } - - public void unsetConfiguration() { - this.configuration = null; - } - - /** Returns true if field configuration is set (has been assigned a value) and false otherwise */ - public boolean isSetConfiguration() { - return this.configuration != null; - } - - public void setConfigurationIsSet(boolean value) { - if (!value) { - this.configuration = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case SERVER_PROTOCOL_VERSION: - if (value == null) { - unsetServerProtocolVersion(); - } else { - setServerProtocolVersion((TProtocolVersion)value); - } - break; - - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CONFIGURATION: - if (value == null) { - unsetConfiguration(); - } else { - setConfiguration((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case SERVER_PROTOCOL_VERSION: - return getServerProtocolVersion(); - - case SESSION_HANDLE: - return getSessionHandle(); - - case CONFIGURATION: - return getConfiguration(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case SERVER_PROTOCOL_VERSION: - return isSetServerProtocolVersion(); - case SESSION_HANDLE: - return isSetSessionHandle(); - case CONFIGURATION: - return isSetConfiguration(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOpenSessionResp) - return this.equals((TOpenSessionResp)that); - return false; - } - - public boolean equals(TOpenSessionResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_serverProtocolVersion = true && this.isSetServerProtocolVersion(); - boolean that_present_serverProtocolVersion = true && that.isSetServerProtocolVersion(); - if (this_present_serverProtocolVersion || that_present_serverProtocolVersion) { - if (!(this_present_serverProtocolVersion && that_present_serverProtocolVersion)) - return false; - if (!this.serverProtocolVersion.equals(that.serverProtocolVersion)) - return false; - } - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_configuration = true && this.isSetConfiguration(); - boolean that_present_configuration = true && that.isSetConfiguration(); - if (this_present_configuration || that_present_configuration) { - if (!(this_present_configuration && that_present_configuration)) - return false; - if (!this.configuration.equals(that.configuration)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - boolean present_serverProtocolVersion = true && (isSetServerProtocolVersion()); - builder.append(present_serverProtocolVersion); - if (present_serverProtocolVersion) - builder.append(serverProtocolVersion.getValue()); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_configuration = true && (isSetConfiguration()); - builder.append(present_configuration); - if (present_configuration) - builder.append(configuration); - - return builder.toHashCode(); - } - - public int compareTo(TOpenSessionResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TOpenSessionResp typedOther = (TOpenSessionResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetServerProtocolVersion()).compareTo(typedOther.isSetServerProtocolVersion()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetServerProtocolVersion()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.serverProtocolVersion, typedOther.serverProtocolVersion); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfiguration()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOpenSessionResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (!first) sb.append(", "); - sb.append("serverProtocolVersion:"); - if (this.serverProtocolVersion == null) { - sb.append("null"); - } else { - sb.append(this.serverProtocolVersion); - } - first = false; - if (isSetSessionHandle()) { - if (!first) sb.append(", "); - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - } - if (isSetConfiguration()) { - if (!first) sb.append(", "); - sb.append("configuration:"); - if (this.configuration == null) { - sb.append("null"); - } else { - sb.append(this.configuration); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - if (!isSetServerProtocolVersion()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'serverProtocolVersion' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOpenSessionRespStandardSchemeFactory implements SchemeFactory { - public TOpenSessionRespStandardScheme getScheme() { - return new TOpenSessionRespStandardScheme(); - } - } - - private static class TOpenSessionRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SERVER_PROTOCOL_VERSION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32()); - struct.setServerProtocolVersionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // CONFIGURATION - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map152 = iprot.readMapBegin(); - struct.configuration = new HashMap(2*_map152.size); - for (int _i153 = 0; _i153 < _map152.size; ++_i153) - { - String _key154; // required - String _val155; // required - _key154 = iprot.readString(); - _val155 = iprot.readString(); - struct.configuration.put(_key154, _val155); - } - iprot.readMapEnd(); - } - struct.setConfigurationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.serverProtocolVersion != null) { - oprot.writeFieldBegin(SERVER_PROTOCOL_VERSION_FIELD_DESC); - oprot.writeI32(struct.serverProtocolVersion.getValue()); - oprot.writeFieldEnd(); - } - if (struct.sessionHandle != null) { - if (struct.isSetSessionHandle()) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - if (struct.configuration != null) { - if (struct.isSetConfiguration()) { - oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size())); - for (Map.Entry _iter156 : struct.configuration.entrySet()) - { - oprot.writeString(_iter156.getKey()); - oprot.writeString(_iter156.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOpenSessionRespTupleSchemeFactory implements SchemeFactory { - public TOpenSessionRespTupleScheme getScheme() { - return new TOpenSessionRespTupleScheme(); - } - } - - private static class TOpenSessionRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - oprot.writeI32(struct.serverProtocolVersion.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetSessionHandle()) { - optionals.set(0); - } - if (struct.isSetConfiguration()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetSessionHandle()) { - struct.sessionHandle.write(oprot); - } - if (struct.isSetConfiguration()) { - { - oprot.writeI32(struct.configuration.size()); - for (Map.Entry _iter157 : struct.configuration.entrySet()) - { - oprot.writeString(_iter157.getKey()); - oprot.writeString(_iter157.getValue()); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32()); - struct.setServerProtocolVersionIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - if (incoming.get(1)) { - { - org.apache.thrift.protocol.TMap _map158 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.configuration = new HashMap(2*_map158.size); - for (int _i159 = 0; _i159 < _map158.size; ++_i159) - { - String _key160; // required - String _val161; // required - _key160 = iprot.readString(); - _val161 = iprot.readString(); - struct.configuration.put(_key160, _val161); - } - } - struct.setConfigurationIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java deleted file mode 100644 index 8fbd8752eaca6..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java +++ /dev/null @@ -1,705 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TOperationHandle implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOperationHandle"); - - private static final org.apache.thrift.protocol.TField OPERATION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("operationId", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationType", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)3); - private static final org.apache.thrift.protocol.TField MODIFIED_ROW_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("modifiedRowCount", org.apache.thrift.protocol.TType.DOUBLE, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOperationHandleStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOperationHandleTupleSchemeFactory()); - } - - private THandleIdentifier operationId; // required - private TOperationType operationType; // required - private boolean hasResultSet; // required - private double modifiedRowCount; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_ID((short)1, "operationId"), - /** - * - * @see TOperationType - */ - OPERATION_TYPE((short)2, "operationType"), - HAS_RESULT_SET((short)3, "hasResultSet"), - MODIFIED_ROW_COUNT((short)4, "modifiedRowCount"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_ID - return OPERATION_ID; - case 2: // OPERATION_TYPE - return OPERATION_TYPE; - case 3: // HAS_RESULT_SET - return HAS_RESULT_SET; - case 4: // MODIFIED_ROW_COUNT - return MODIFIED_ROW_COUNT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __HASRESULTSET_ISSET_ID = 0; - private static final int __MODIFIEDROWCOUNT_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.MODIFIED_ROW_COUNT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_ID, new org.apache.thrift.meta_data.FieldMetaData("operationId", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class))); - tmpMap.put(_Fields.OPERATION_TYPE, new org.apache.thrift.meta_data.FieldMetaData("operationType", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationType.class))); - tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.MODIFIED_ROW_COUNT, new org.apache.thrift.meta_data.FieldMetaData("modifiedRowCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOperationHandle.class, metaDataMap); - } - - public TOperationHandle() { - } - - public TOperationHandle( - THandleIdentifier operationId, - TOperationType operationType, - boolean hasResultSet) - { - this(); - this.operationId = operationId; - this.operationType = operationType; - this.hasResultSet = hasResultSet; - setHasResultSetIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TOperationHandle(TOperationHandle other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetOperationId()) { - this.operationId = new THandleIdentifier(other.operationId); - } - if (other.isSetOperationType()) { - this.operationType = other.operationType; - } - this.hasResultSet = other.hasResultSet; - this.modifiedRowCount = other.modifiedRowCount; - } - - public TOperationHandle deepCopy() { - return new TOperationHandle(this); - } - - @Override - public void clear() { - this.operationId = null; - this.operationType = null; - setHasResultSetIsSet(false); - this.hasResultSet = false; - setModifiedRowCountIsSet(false); - this.modifiedRowCount = 0.0; - } - - public THandleIdentifier getOperationId() { - return this.operationId; - } - - public void setOperationId(THandleIdentifier operationId) { - this.operationId = operationId; - } - - public void unsetOperationId() { - this.operationId = null; - } - - /** Returns true if field operationId is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationId() { - return this.operationId != null; - } - - public void setOperationIdIsSet(boolean value) { - if (!value) { - this.operationId = null; - } - } - - /** - * - * @see TOperationType - */ - public TOperationType getOperationType() { - return this.operationType; - } - - /** - * - * @see TOperationType - */ - public void setOperationType(TOperationType operationType) { - this.operationType = operationType; - } - - public void unsetOperationType() { - this.operationType = null; - } - - /** Returns true if field operationType is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationType() { - return this.operationType != null; - } - - public void setOperationTypeIsSet(boolean value) { - if (!value) { - this.operationType = null; - } - } - - public boolean isHasResultSet() { - return this.hasResultSet; - } - - public void setHasResultSet(boolean hasResultSet) { - this.hasResultSet = hasResultSet; - setHasResultSetIsSet(true); - } - - public void unsetHasResultSet() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */ - public boolean isSetHasResultSet() { - return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - public void setHasResultSetIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value); - } - - public double getModifiedRowCount() { - return this.modifiedRowCount; - } - - public void setModifiedRowCount(double modifiedRowCount) { - this.modifiedRowCount = modifiedRowCount; - setModifiedRowCountIsSet(true); - } - - public void unsetModifiedRowCount() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID); - } - - /** Returns true if field modifiedRowCount is set (has been assigned a value) and false otherwise */ - public boolean isSetModifiedRowCount() { - return EncodingUtils.testBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID); - } - - public void setModifiedRowCountIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_ID: - if (value == null) { - unsetOperationId(); - } else { - setOperationId((THandleIdentifier)value); - } - break; - - case OPERATION_TYPE: - if (value == null) { - unsetOperationType(); - } else { - setOperationType((TOperationType)value); - } - break; - - case HAS_RESULT_SET: - if (value == null) { - unsetHasResultSet(); - } else { - setHasResultSet((Boolean)value); - } - break; - - case MODIFIED_ROW_COUNT: - if (value == null) { - unsetModifiedRowCount(); - } else { - setModifiedRowCount((Double)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_ID: - return getOperationId(); - - case OPERATION_TYPE: - return getOperationType(); - - case HAS_RESULT_SET: - return Boolean.valueOf(isHasResultSet()); - - case MODIFIED_ROW_COUNT: - return Double.valueOf(getModifiedRowCount()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_ID: - return isSetOperationId(); - case OPERATION_TYPE: - return isSetOperationType(); - case HAS_RESULT_SET: - return isSetHasResultSet(); - case MODIFIED_ROW_COUNT: - return isSetModifiedRowCount(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOperationHandle) - return this.equals((TOperationHandle)that); - return false; - } - - public boolean equals(TOperationHandle that) { - if (that == null) - return false; - - boolean this_present_operationId = true && this.isSetOperationId(); - boolean that_present_operationId = true && that.isSetOperationId(); - if (this_present_operationId || that_present_operationId) { - if (!(this_present_operationId && that_present_operationId)) - return false; - if (!this.operationId.equals(that.operationId)) - return false; - } - - boolean this_present_operationType = true && this.isSetOperationType(); - boolean that_present_operationType = true && that.isSetOperationType(); - if (this_present_operationType || that_present_operationType) { - if (!(this_present_operationType && that_present_operationType)) - return false; - if (!this.operationType.equals(that.operationType)) - return false; - } - - boolean this_present_hasResultSet = true; - boolean that_present_hasResultSet = true; - if (this_present_hasResultSet || that_present_hasResultSet) { - if (!(this_present_hasResultSet && that_present_hasResultSet)) - return false; - if (this.hasResultSet != that.hasResultSet) - return false; - } - - boolean this_present_modifiedRowCount = true && this.isSetModifiedRowCount(); - boolean that_present_modifiedRowCount = true && that.isSetModifiedRowCount(); - if (this_present_modifiedRowCount || that_present_modifiedRowCount) { - if (!(this_present_modifiedRowCount && that_present_modifiedRowCount)) - return false; - if (this.modifiedRowCount != that.modifiedRowCount) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_operationId = true && (isSetOperationId()); - builder.append(present_operationId); - if (present_operationId) - builder.append(operationId); - - boolean present_operationType = true && (isSetOperationType()); - builder.append(present_operationType); - if (present_operationType) - builder.append(operationType.getValue()); - - boolean present_hasResultSet = true; - builder.append(present_hasResultSet); - if (present_hasResultSet) - builder.append(hasResultSet); - - boolean present_modifiedRowCount = true && (isSetModifiedRowCount()); - builder.append(present_modifiedRowCount); - if (present_modifiedRowCount) - builder.append(modifiedRowCount); - - return builder.toHashCode(); - } - - public int compareTo(TOperationHandle other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TOperationHandle typedOther = (TOperationHandle)other; - - lastComparison = Boolean.valueOf(isSetOperationId()).compareTo(typedOther.isSetOperationId()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationId()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationId, typedOther.operationId); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationType()).compareTo(typedOther.isSetOperationType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationType, typedOther.operationType); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(typedOther.isSetHasResultSet()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHasResultSet()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, typedOther.hasResultSet); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetModifiedRowCount()).compareTo(typedOther.isSetModifiedRowCount()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetModifiedRowCount()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.modifiedRowCount, typedOther.modifiedRowCount); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOperationHandle("); - boolean first = true; - - sb.append("operationId:"); - if (this.operationId == null) { - sb.append("null"); - } else { - sb.append(this.operationId); - } - first = false; - if (!first) sb.append(", "); - sb.append("operationType:"); - if (this.operationType == null) { - sb.append("null"); - } else { - sb.append(this.operationType); - } - first = false; - if (!first) sb.append(", "); - sb.append("hasResultSet:"); - sb.append(this.hasResultSet); - first = false; - if (isSetModifiedRowCount()) { - if (!first) sb.append(", "); - sb.append("modifiedRowCount:"); - sb.append(this.modifiedRowCount); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationId()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationId' is unset! Struct:" + toString()); - } - - if (!isSetOperationType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationType' is unset! Struct:" + toString()); - } - - if (!isSetHasResultSet()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'hasResultSet' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationId != null) { - operationId.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOperationHandleStandardSchemeFactory implements SchemeFactory { - public TOperationHandleStandardScheme getScheme() { - return new TOperationHandleStandardScheme(); - } - } - - private static class TOperationHandleStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOperationHandle struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_ID - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationId = new THandleIdentifier(); - struct.operationId.read(iprot); - struct.setOperationIdIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.operationType = TOperationType.findByValue(iprot.readI32()); - struct.setOperationTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // HAS_RESULT_SET - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // MODIFIED_ROW_COUNT - if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) { - struct.modifiedRowCount = iprot.readDouble(); - struct.setModifiedRowCountIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOperationHandle struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationId != null) { - oprot.writeFieldBegin(OPERATION_ID_FIELD_DESC); - struct.operationId.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationType != null) { - oprot.writeFieldBegin(OPERATION_TYPE_FIELD_DESC); - oprot.writeI32(struct.operationType.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC); - oprot.writeBool(struct.hasResultSet); - oprot.writeFieldEnd(); - if (struct.isSetModifiedRowCount()) { - oprot.writeFieldBegin(MODIFIED_ROW_COUNT_FIELD_DESC); - oprot.writeDouble(struct.modifiedRowCount); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOperationHandleTupleSchemeFactory implements SchemeFactory { - public TOperationHandleTupleScheme getScheme() { - return new TOperationHandleTupleScheme(); - } - } - - private static class TOperationHandleTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationId.write(oprot); - oprot.writeI32(struct.operationType.getValue()); - oprot.writeBool(struct.hasResultSet); - BitSet optionals = new BitSet(); - if (struct.isSetModifiedRowCount()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetModifiedRowCount()) { - oprot.writeDouble(struct.modifiedRowCount); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationId = new THandleIdentifier(); - struct.operationId.read(iprot); - struct.setOperationIdIsSet(true); - struct.operationType = TOperationType.findByValue(iprot.readI32()); - struct.setOperationTypeIsSet(true); - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.modifiedRowCount = iprot.readDouble(); - struct.setModifiedRowCountIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java deleted file mode 100644 index 219866223a6b0..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TOperationState implements org.apache.thrift.TEnum { - INITIALIZED_STATE(0), - RUNNING_STATE(1), - FINISHED_STATE(2), - CANCELED_STATE(3), - CLOSED_STATE(4), - ERROR_STATE(5), - UKNOWN_STATE(6), - PENDING_STATE(7); - - private final int value; - - private TOperationState(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TOperationState findByValue(int value) { - switch (value) { - case 0: - return INITIALIZED_STATE; - case 1: - return RUNNING_STATE; - case 2: - return FINISHED_STATE; - case 3: - return CANCELED_STATE; - case 4: - return CLOSED_STATE; - case 5: - return ERROR_STATE; - case 6: - return UKNOWN_STATE; - case 7: - return PENDING_STATE; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java deleted file mode 100644 index b6d4b2fab9f96..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TOperationType implements org.apache.thrift.TEnum { - EXECUTE_STATEMENT(0), - GET_TYPE_INFO(1), - GET_CATALOGS(2), - GET_SCHEMAS(3), - GET_TABLES(4), - GET_TABLE_TYPES(5), - GET_COLUMNS(6), - GET_FUNCTIONS(7), - UNKNOWN(8); - - private final int value; - - private TOperationType(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TOperationType findByValue(int value) { - switch (value) { - case 0: - return EXECUTE_STATEMENT; - case 1: - return GET_TYPE_INFO; - case 2: - return GET_CATALOGS; - case 3: - return GET_SCHEMAS; - case 4: - return GET_TABLES; - case 5: - return GET_TABLE_TYPES; - case 6: - return GET_COLUMNS; - case 7: - return GET_FUNCTIONS; - case 8: - return UNKNOWN; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java deleted file mode 100644 index 9d2abf2b3b084..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java +++ /dev/null @@ -1,512 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TPrimitiveTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry"); - - private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory()); - } - - private TTypeId type; // required - private TTypeQualifiers typeQualifiers; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TTypeId - */ - TYPE((short)1, "type"), - TYPE_QUALIFIERS((short)2, "typeQualifiers"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPE - return TYPE; - case 2: // TYPE_QUALIFIERS - return TYPE_QUALIFIERS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.TYPE_QUALIFIERS}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class))); - tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap); - } - - public TPrimitiveTypeEntry() { - } - - public TPrimitiveTypeEntry( - TTypeId type) - { - this(); - this.type = type; - } - - /** - * Performs a deep copy on other. - */ - public TPrimitiveTypeEntry(TPrimitiveTypeEntry other) { - if (other.isSetType()) { - this.type = other.type; - } - if (other.isSetTypeQualifiers()) { - this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers); - } - } - - public TPrimitiveTypeEntry deepCopy() { - return new TPrimitiveTypeEntry(this); - } - - @Override - public void clear() { - this.type = null; - this.typeQualifiers = null; - } - - /** - * - * @see TTypeId - */ - public TTypeId getType() { - return this.type; - } - - /** - * - * @see TTypeId - */ - public void setType(TTypeId type) { - this.type = type; - } - - public void unsetType() { - this.type = null; - } - - /** Returns true if field type is set (has been assigned a value) and false otherwise */ - public boolean isSetType() { - return this.type != null; - } - - public void setTypeIsSet(boolean value) { - if (!value) { - this.type = null; - } - } - - public TTypeQualifiers getTypeQualifiers() { - return this.typeQualifiers; - } - - public void setTypeQualifiers(TTypeQualifiers typeQualifiers) { - this.typeQualifiers = typeQualifiers; - } - - public void unsetTypeQualifiers() { - this.typeQualifiers = null; - } - - /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeQualifiers() { - return this.typeQualifiers != null; - } - - public void setTypeQualifiersIsSet(boolean value) { - if (!value) { - this.typeQualifiers = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPE: - if (value == null) { - unsetType(); - } else { - setType((TTypeId)value); - } - break; - - case TYPE_QUALIFIERS: - if (value == null) { - unsetTypeQualifiers(); - } else { - setTypeQualifiers((TTypeQualifiers)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPE: - return getType(); - - case TYPE_QUALIFIERS: - return getTypeQualifiers(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPE: - return isSetType(); - case TYPE_QUALIFIERS: - return isSetTypeQualifiers(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TPrimitiveTypeEntry) - return this.equals((TPrimitiveTypeEntry)that); - return false; - } - - public boolean equals(TPrimitiveTypeEntry that) { - if (that == null) - return false; - - boolean this_present_type = true && this.isSetType(); - boolean that_present_type = true && that.isSetType(); - if (this_present_type || that_present_type) { - if (!(this_present_type && that_present_type)) - return false; - if (!this.type.equals(that.type)) - return false; - } - - boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers(); - boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers(); - if (this_present_typeQualifiers || that_present_typeQualifiers) { - if (!(this_present_typeQualifiers && that_present_typeQualifiers)) - return false; - if (!this.typeQualifiers.equals(that.typeQualifiers)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_type = true && (isSetType()); - builder.append(present_type); - if (present_type) - builder.append(type.getValue()); - - boolean present_typeQualifiers = true && (isSetTypeQualifiers()); - builder.append(present_typeQualifiers); - if (present_typeQualifiers) - builder.append(typeQualifiers); - - return builder.toHashCode(); - } - - public int compareTo(TPrimitiveTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TPrimitiveTypeEntry typedOther = (TPrimitiveTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetType()).compareTo(typedOther.isSetType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.type, typedOther.type); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(typedOther.isSetTypeQualifiers()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeQualifiers()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, typedOther.typeQualifiers); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TPrimitiveTypeEntry("); - boolean first = true; - - sb.append("type:"); - if (this.type == null) { - sb.append("null"); - } else { - sb.append(this.type); - } - first = false; - if (isSetTypeQualifiers()) { - if (!first) sb.append(", "); - sb.append("typeQualifiers:"); - if (this.typeQualifiers == null) { - sb.append("null"); - } else { - sb.append(this.typeQualifiers); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'type' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (typeQualifiers != null) { - typeQualifiers.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TPrimitiveTypeEntryStandardSchemeFactory implements SchemeFactory { - public TPrimitiveTypeEntryStandardScheme getScheme() { - return new TPrimitiveTypeEntryStandardScheme(); - } - } - - private static class TPrimitiveTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.type = TTypeId.findByValue(iprot.readI32()); - struct.setTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // TYPE_QUALIFIERS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.typeQualifiers = new TTypeQualifiers(); - struct.typeQualifiers.read(iprot); - struct.setTypeQualifiersIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.type != null) { - oprot.writeFieldBegin(TYPE_FIELD_DESC); - oprot.writeI32(struct.type.getValue()); - oprot.writeFieldEnd(); - } - if (struct.typeQualifiers != null) { - if (struct.isSetTypeQualifiers()) { - oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC); - struct.typeQualifiers.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TPrimitiveTypeEntryTupleSchemeFactory implements SchemeFactory { - public TPrimitiveTypeEntryTupleScheme getScheme() { - return new TPrimitiveTypeEntryTupleScheme(); - } - } - - private static class TPrimitiveTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.type.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetTypeQualifiers()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetTypeQualifiers()) { - struct.typeQualifiers.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.type = TTypeId.findByValue(iprot.readI32()); - struct.setTypeIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.typeQualifiers = new TTypeQualifiers(); - struct.typeQualifiers.read(iprot); - struct.setTypeQualifiersIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java deleted file mode 100644 index a4279d29f662e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TProtocolVersion implements org.apache.thrift.TEnum { - HIVE_CLI_SERVICE_PROTOCOL_V1(0), - HIVE_CLI_SERVICE_PROTOCOL_V2(1), - HIVE_CLI_SERVICE_PROTOCOL_V3(2), - HIVE_CLI_SERVICE_PROTOCOL_V4(3), - HIVE_CLI_SERVICE_PROTOCOL_V5(4), - HIVE_CLI_SERVICE_PROTOCOL_V6(5), - HIVE_CLI_SERVICE_PROTOCOL_V7(6), - HIVE_CLI_SERVICE_PROTOCOL_V8(7); - - private final int value; - - private TProtocolVersion(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TProtocolVersion findByValue(int value) { - switch (value) { - case 0: - return HIVE_CLI_SERVICE_PROTOCOL_V1; - case 1: - return HIVE_CLI_SERVICE_PROTOCOL_V2; - case 2: - return HIVE_CLI_SERVICE_PROTOCOL_V3; - case 3: - return HIVE_CLI_SERVICE_PROTOCOL_V4; - case 4: - return HIVE_CLI_SERVICE_PROTOCOL_V5; - case 5: - return HIVE_CLI_SERVICE_PROTOCOL_V6; - case 6: - return HIVE_CLI_SERVICE_PROTOCOL_V7; - case 7: - return HIVE_CLI_SERVICE_PROTOCOL_V8; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java deleted file mode 100644 index a3e39c8cdf321..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java +++ /dev/null @@ -1,491 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TRenewDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRenewDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRenewDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String delegationToken; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenReq.class, metaDataMap); - } - - public TRenewDelegationTokenReq() { - } - - public TRenewDelegationTokenReq( - TSessionHandle sessionHandle, - String delegationToken) - { - this(); - this.sessionHandle = sessionHandle; - this.delegationToken = delegationToken; - } - - /** - * Performs a deep copy on other. - */ - public TRenewDelegationTokenReq(TRenewDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TRenewDelegationTokenReq deepCopy() { - return new TRenewDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.delegationToken = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRenewDelegationTokenReq) - return this.equals((TRenewDelegationTokenReq)that); - return false; - } - - public boolean equals(TRenewDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - builder.append(present_sessionHandle); - if (present_sessionHandle) - builder.append(sessionHandle); - - boolean present_delegationToken = true && (isSetDelegationToken()); - builder.append(present_delegationToken); - if (present_delegationToken) - builder.append(delegationToken); - - return builder.toHashCode(); - } - - public int compareTo(TRenewDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TRenewDelegationTokenReq typedOther = (TRenewDelegationTokenReq)other; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRenewDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetDelegationToken()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRenewDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenReqStandardScheme getScheme() { - return new TRenewDelegationTokenReqStandardScheme(); - } - } - - private static class TRenewDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRenewDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenReqTupleScheme getScheme() { - return new TRenewDelegationTokenReqTupleScheme(); - } - } - - private static class TRenewDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.delegationToken); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java deleted file mode 100644 index 5f3eb6c4d4b90..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TRenewDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRenewDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRenewDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenResp.class, metaDataMap); - } - - public TRenewDelegationTokenResp() { - } - - public TRenewDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TRenewDelegationTokenResp(TRenewDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TRenewDelegationTokenResp deepCopy() { - return new TRenewDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRenewDelegationTokenResp) - return this.equals((TRenewDelegationTokenResp)that); - return false; - } - - public boolean equals(TRenewDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_status = true && (isSetStatus()); - builder.append(present_status); - if (present_status) - builder.append(status); - - return builder.toHashCode(); - } - - public int compareTo(TRenewDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TRenewDelegationTokenResp typedOther = (TRenewDelegationTokenResp)other; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRenewDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRenewDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenRespStandardScheme getScheme() { - return new TRenewDelegationTokenRespStandardScheme(); - } - } - - private static class TRenewDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRenewDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenRespTupleScheme getScheme() { - return new TRenewDelegationTokenRespTupleScheme(); - } - } - - private static class TRenewDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java deleted file mode 100644 index a44cfb08ff01a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java +++ /dev/null @@ -1,439 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TRow implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRow"); - - private static final org.apache.thrift.protocol.TField COL_VALS_FIELD_DESC = new org.apache.thrift.protocol.TField("colVals", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRowStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRowTupleSchemeFactory()); - } - - private List colVals; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COL_VALS((short)1, "colVals"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COL_VALS - return COL_VALS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COL_VALS, new org.apache.thrift.meta_data.FieldMetaData("colVals", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnValue.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRow.class, metaDataMap); - } - - public TRow() { - } - - public TRow( - List colVals) - { - this(); - this.colVals = colVals; - } - - /** - * Performs a deep copy on other. - */ - public TRow(TRow other) { - if (other.isSetColVals()) { - List __this__colVals = new ArrayList(); - for (TColumnValue other_element : other.colVals) { - __this__colVals.add(new TColumnValue(other_element)); - } - this.colVals = __this__colVals; - } - } - - public TRow deepCopy() { - return new TRow(this); - } - - @Override - public void clear() { - this.colVals = null; - } - - public int getColValsSize() { - return (this.colVals == null) ? 0 : this.colVals.size(); - } - - public java.util.Iterator getColValsIterator() { - return (this.colVals == null) ? null : this.colVals.iterator(); - } - - public void addToColVals(TColumnValue elem) { - if (this.colVals == null) { - this.colVals = new ArrayList(); - } - this.colVals.add(elem); - } - - public List getColVals() { - return this.colVals; - } - - public void setColVals(List colVals) { - this.colVals = colVals; - } - - public void unsetColVals() { - this.colVals = null; - } - - /** Returns true if field colVals is set (has been assigned a value) and false otherwise */ - public boolean isSetColVals() { - return this.colVals != null; - } - - public void setColValsIsSet(boolean value) { - if (!value) { - this.colVals = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COL_VALS: - if (value == null) { - unsetColVals(); - } else { - setColVals((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COL_VALS: - return getColVals(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COL_VALS: - return isSetColVals(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRow) - return this.equals((TRow)that); - return false; - } - - public boolean equals(TRow that) { - if (that == null) - return false; - - boolean this_present_colVals = true && this.isSetColVals(); - boolean that_present_colVals = true && that.isSetColVals(); - if (this_present_colVals || that_present_colVals) { - if (!(this_present_colVals && that_present_colVals)) - return false; - if (!this.colVals.equals(that.colVals)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_colVals = true && (isSetColVals()); - builder.append(present_colVals); - if (present_colVals) - builder.append(colVals); - - return builder.toHashCode(); - } - - public int compareTo(TRow other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TRow typedOther = (TRow)other; - - lastComparison = Boolean.valueOf(isSetColVals()).compareTo(typedOther.isSetColVals()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColVals()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.colVals, typedOther.colVals); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRow("); - boolean first = true; - - sb.append("colVals:"); - if (this.colVals == null) { - sb.append("null"); - } else { - sb.append(this.colVals); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColVals()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'colVals' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRowStandardSchemeFactory implements SchemeFactory { - public TRowStandardScheme getScheme() { - return new TRowStandardScheme(); - } - } - - private static class TRowStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRow struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COL_VALS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list46 = iprot.readListBegin(); - struct.colVals = new ArrayList(_list46.size); - for (int _i47 = 0; _i47 < _list46.size; ++_i47) - { - TColumnValue _elem48; // optional - _elem48 = new TColumnValue(); - _elem48.read(iprot); - struct.colVals.add(_elem48); - } - iprot.readListEnd(); - } - struct.setColValsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRow struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.colVals != null) { - oprot.writeFieldBegin(COL_VALS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.colVals.size())); - for (TColumnValue _iter49 : struct.colVals) - { - _iter49.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRowTupleSchemeFactory implements SchemeFactory { - public TRowTupleScheme getScheme() { - return new TRowTupleScheme(); - } - } - - private static class TRowTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.colVals.size()); - for (TColumnValue _iter50 : struct.colVals) - { - _iter50.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list51 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.colVals = new ArrayList(_list51.size); - for (int _i52 = 0; _i52 < _list51.size; ++_i52) - { - TColumnValue _elem53; // optional - _elem53 = new TColumnValue(); - _elem53.read(iprot); - struct.colVals.add(_elem53); - } - } - struct.setColValsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java deleted file mode 100644 index d16c8a4bb32da..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java +++ /dev/null @@ -1,702 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TRowSet implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRowSet"); - - private static final org.apache.thrift.protocol.TField START_ROW_OFFSET_FIELD_DESC = new org.apache.thrift.protocol.TField("startRowOffset", org.apache.thrift.protocol.TType.I64, (short)1); - private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2); - private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRowSetStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRowSetTupleSchemeFactory()); - } - - private long startRowOffset; // required - private List rows; // required - private List columns; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - START_ROW_OFFSET((short)1, "startRowOffset"), - ROWS((short)2, "rows"), - COLUMNS((short)3, "columns"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // START_ROW_OFFSET - return START_ROW_OFFSET; - case 2: // ROWS - return ROWS; - case 3: // COLUMNS - return COLUMNS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __STARTROWOFFSET_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.COLUMNS}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.START_ROW_OFFSET, new org.apache.thrift.meta_data.FieldMetaData("startRowOffset", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRow.class)))); - tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumn.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRowSet.class, metaDataMap); - } - - public TRowSet() { - } - - public TRowSet( - long startRowOffset, - List rows) - { - this(); - this.startRowOffset = startRowOffset; - setStartRowOffsetIsSet(true); - this.rows = rows; - } - - /** - * Performs a deep copy on other. - */ - public TRowSet(TRowSet other) { - __isset_bitfield = other.__isset_bitfield; - this.startRowOffset = other.startRowOffset; - if (other.isSetRows()) { - List __this__rows = new ArrayList(); - for (TRow other_element : other.rows) { - __this__rows.add(new TRow(other_element)); - } - this.rows = __this__rows; - } - if (other.isSetColumns()) { - List __this__columns = new ArrayList(); - for (TColumn other_element : other.columns) { - __this__columns.add(new TColumn(other_element)); - } - this.columns = __this__columns; - } - } - - public TRowSet deepCopy() { - return new TRowSet(this); - } - - @Override - public void clear() { - setStartRowOffsetIsSet(false); - this.startRowOffset = 0; - this.rows = null; - this.columns = null; - } - - public long getStartRowOffset() { - return this.startRowOffset; - } - - public void setStartRowOffset(long startRowOffset) { - this.startRowOffset = startRowOffset; - setStartRowOffsetIsSet(true); - } - - public void unsetStartRowOffset() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID); - } - - /** Returns true if field startRowOffset is set (has been assigned a value) and false otherwise */ - public boolean isSetStartRowOffset() { - return EncodingUtils.testBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID); - } - - public void setStartRowOffsetIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID, value); - } - - public int getRowsSize() { - return (this.rows == null) ? 0 : this.rows.size(); - } - - public java.util.Iterator getRowsIterator() { - return (this.rows == null) ? null : this.rows.iterator(); - } - - public void addToRows(TRow elem) { - if (this.rows == null) { - this.rows = new ArrayList(); - } - this.rows.add(elem); - } - - public List getRows() { - return this.rows; - } - - public void setRows(List rows) { - this.rows = rows; - } - - public void unsetRows() { - this.rows = null; - } - - /** Returns true if field rows is set (has been assigned a value) and false otherwise */ - public boolean isSetRows() { - return this.rows != null; - } - - public void setRowsIsSet(boolean value) { - if (!value) { - this.rows = null; - } - } - - public int getColumnsSize() { - return (this.columns == null) ? 0 : this.columns.size(); - } - - public java.util.Iterator getColumnsIterator() { - return (this.columns == null) ? null : this.columns.iterator(); - } - - public void addToColumns(TColumn elem) { - if (this.columns == null) { - this.columns = new ArrayList(); - } - this.columns.add(elem); - } - - public List getColumns() { - return this.columns; - } - - public void setColumns(List columns) { - this.columns = columns; - } - - public void unsetColumns() { - this.columns = null; - } - - /** Returns true if field columns is set (has been assigned a value) and false otherwise */ - public boolean isSetColumns() { - return this.columns != null; - } - - public void setColumnsIsSet(boolean value) { - if (!value) { - this.columns = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case START_ROW_OFFSET: - if (value == null) { - unsetStartRowOffset(); - } else { - setStartRowOffset((Long)value); - } - break; - - case ROWS: - if (value == null) { - unsetRows(); - } else { - setRows((List)value); - } - break; - - case COLUMNS: - if (value == null) { - unsetColumns(); - } else { - setColumns((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case START_ROW_OFFSET: - return Long.valueOf(getStartRowOffset()); - - case ROWS: - return getRows(); - - case COLUMNS: - return getColumns(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case START_ROW_OFFSET: - return isSetStartRowOffset(); - case ROWS: - return isSetRows(); - case COLUMNS: - return isSetColumns(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRowSet) - return this.equals((TRowSet)that); - return false; - } - - public boolean equals(TRowSet that) { - if (that == null) - return false; - - boolean this_present_startRowOffset = true; - boolean that_present_startRowOffset = true; - if (this_present_startRowOffset || that_present_startRowOffset) { - if (!(this_present_startRowOffset && that_present_startRowOffset)) - return false; - if (this.startRowOffset != that.startRowOffset) - return false; - } - - boolean this_present_rows = true && this.isSetRows(); - boolean that_present_rows = true && that.isSetRows(); - if (this_present_rows || that_present_rows) { - if (!(this_present_rows && that_present_rows)) - return false; - if (!this.rows.equals(that.rows)) - return false; - } - - boolean this_present_columns = true && this.isSetColumns(); - boolean that_present_columns = true && that.isSetColumns(); - if (this_present_columns || that_present_columns) { - if (!(this_present_columns && that_present_columns)) - return false; - if (!this.columns.equals(that.columns)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_startRowOffset = true; - builder.append(present_startRowOffset); - if (present_startRowOffset) - builder.append(startRowOffset); - - boolean present_rows = true && (isSetRows()); - builder.append(present_rows); - if (present_rows) - builder.append(rows); - - boolean present_columns = true && (isSetColumns()); - builder.append(present_columns); - if (present_columns) - builder.append(columns); - - return builder.toHashCode(); - } - - public int compareTo(TRowSet other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TRowSet typedOther = (TRowSet)other; - - lastComparison = Boolean.valueOf(isSetStartRowOffset()).compareTo(typedOther.isSetStartRowOffset()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStartRowOffset()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startRowOffset, typedOther.startRowOffset); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRows()).compareTo(typedOther.isSetRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, typedOther.rows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumns()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRowSet("); - boolean first = true; - - sb.append("startRowOffset:"); - sb.append(this.startRowOffset); - first = false; - if (!first) sb.append(", "); - sb.append("rows:"); - if (this.rows == null) { - sb.append("null"); - } else { - sb.append(this.rows); - } - first = false; - if (isSetColumns()) { - if (!first) sb.append(", "); - sb.append("columns:"); - if (this.columns == null) { - sb.append("null"); - } else { - sb.append(this.columns); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStartRowOffset()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'startRowOffset' is unset! Struct:" + toString()); - } - - if (!isSetRows()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRowSetStandardSchemeFactory implements SchemeFactory { - public TRowSetStandardScheme getScheme() { - return new TRowSetStandardScheme(); - } - } - - private static class TRowSetStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRowSet struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // START_ROW_OFFSET - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.startRowOffset = iprot.readI64(); - struct.setStartRowOffsetIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list118 = iprot.readListBegin(); - struct.rows = new ArrayList(_list118.size); - for (int _i119 = 0; _i119 < _list118.size; ++_i119) - { - TRow _elem120; // optional - _elem120 = new TRow(); - _elem120.read(iprot); - struct.rows.add(_elem120); - } - iprot.readListEnd(); - } - struct.setRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // COLUMNS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list121 = iprot.readListBegin(); - struct.columns = new ArrayList(_list121.size); - for (int _i122 = 0; _i122 < _list121.size; ++_i122) - { - TColumn _elem123; // optional - _elem123 = new TColumn(); - _elem123.read(iprot); - struct.columns.add(_elem123); - } - iprot.readListEnd(); - } - struct.setColumnsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRowSet struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(START_ROW_OFFSET_FIELD_DESC); - oprot.writeI64(struct.startRowOffset); - oprot.writeFieldEnd(); - if (struct.rows != null) { - oprot.writeFieldBegin(ROWS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.rows.size())); - for (TRow _iter124 : struct.rows) - { - _iter124.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.columns != null) { - if (struct.isSetColumns()) { - oprot.writeFieldBegin(COLUMNS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size())); - for (TColumn _iter125 : struct.columns) - { - _iter125.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRowSetTupleSchemeFactory implements SchemeFactory { - public TRowSetTupleScheme getScheme() { - return new TRowSetTupleScheme(); - } - } - - private static class TRowSetTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI64(struct.startRowOffset); - { - oprot.writeI32(struct.rows.size()); - for (TRow _iter126 : struct.rows) - { - _iter126.write(oprot); - } - } - BitSet optionals = new BitSet(); - if (struct.isSetColumns()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetColumns()) { - { - oprot.writeI32(struct.columns.size()); - for (TColumn _iter127 : struct.columns) - { - _iter127.write(oprot); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.startRowOffset = iprot.readI64(); - struct.setStartRowOffsetIsSet(true); - { - org.apache.thrift.protocol.TList _list128 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.rows = new ArrayList(_list128.size); - for (int _i129 = 0; _i129 < _list128.size; ++_i129) - { - TRow _elem130; // optional - _elem130 = new TRow(); - _elem130.read(iprot); - struct.rows.add(_elem130); - } - } - struct.setRowsIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TList _list131 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.columns = new ArrayList(_list131.size); - for (int _i132 = 0; _i132 < _list131.size; ++_i132) - { - TColumn _elem133; // optional - _elem133 = new TColumn(); - _elem133.read(iprot); - struct.columns.add(_elem133); - } - } - struct.setColumnsIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java deleted file mode 100644 index 82c00dd68a98b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TSessionHandle implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSessionHandle"); - - private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TSessionHandleStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TSessionHandleTupleSchemeFactory()); - } - - private THandleIdentifier sessionId; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_ID((short)1, "sessionId"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_ID - return SESSION_ID; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_ID, new org.apache.thrift.meta_data.FieldMetaData("sessionId", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSessionHandle.class, metaDataMap); - } - - public TSessionHandle() { - } - - public TSessionHandle( - THandleIdentifier sessionId) - { - this(); - this.sessionId = sessionId; - } - - /** - * Performs a deep copy on other. - */ - public TSessionHandle(TSessionHandle other) { - if (other.isSetSessionId()) { - this.sessionId = new THandleIdentifier(other.sessionId); - } - } - - public TSessionHandle deepCopy() { - return new TSessionHandle(this); - } - - @Override - public void clear() { - this.sessionId = null; - } - - public THandleIdentifier getSessionId() { - return this.sessionId; - } - - public void setSessionId(THandleIdentifier sessionId) { - this.sessionId = sessionId; - } - - public void unsetSessionId() { - this.sessionId = null; - } - - /** Returns true if field sessionId is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionId() { - return this.sessionId != null; - } - - public void setSessionIdIsSet(boolean value) { - if (!value) { - this.sessionId = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_ID: - if (value == null) { - unsetSessionId(); - } else { - setSessionId((THandleIdentifier)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_ID: - return getSessionId(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_ID: - return isSetSessionId(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TSessionHandle) - return this.equals((TSessionHandle)that); - return false; - } - - public boolean equals(TSessionHandle that) { - if (that == null) - return false; - - boolean this_present_sessionId = true && this.isSetSessionId(); - boolean that_present_sessionId = true && that.isSetSessionId(); - if (this_present_sessionId || that_present_sessionId) { - if (!(this_present_sessionId && that_present_sessionId)) - return false; - if (!this.sessionId.equals(that.sessionId)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_sessionId = true && (isSetSessionId()); - builder.append(present_sessionId); - if (present_sessionId) - builder.append(sessionId); - - return builder.toHashCode(); - } - - public int compareTo(TSessionHandle other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TSessionHandle typedOther = (TSessionHandle)other; - - lastComparison = Boolean.valueOf(isSetSessionId()).compareTo(typedOther.isSetSessionId()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionId()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionId, typedOther.sessionId); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TSessionHandle("); - boolean first = true; - - sb.append("sessionId:"); - if (this.sessionId == null) { - sb.append("null"); - } else { - sb.append(this.sessionId); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionId()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionId' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionId != null) { - sessionId.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TSessionHandleStandardSchemeFactory implements SchemeFactory { - public TSessionHandleStandardScheme getScheme() { - return new TSessionHandleStandardScheme(); - } - } - - private static class TSessionHandleStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TSessionHandle struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_ID - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionId = new THandleIdentifier(); - struct.sessionId.read(iprot); - struct.setSessionIdIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TSessionHandle struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionId != null) { - oprot.writeFieldBegin(SESSION_ID_FIELD_DESC); - struct.sessionId.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TSessionHandleTupleSchemeFactory implements SchemeFactory { - public TSessionHandleTupleScheme getScheme() { - return new TSessionHandleTupleScheme(); - } - } - - private static class TSessionHandleTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionId.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionId = new THandleIdentifier(); - struct.sessionId.read(iprot); - struct.setSessionIdIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java deleted file mode 100644 index 24a746e94965d..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java +++ /dev/null @@ -1,874 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TStatus implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStatus"); - - private static final org.apache.thrift.protocol.TField STATUS_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("statusCode", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField INFO_MESSAGES_FIELD_DESC = new org.apache.thrift.protocol.TField("infoMessages", org.apache.thrift.protocol.TType.LIST, (short)2); - private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStatusStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStatusTupleSchemeFactory()); - } - - private TStatusCode statusCode; // required - private List infoMessages; // optional - private String sqlState; // optional - private int errorCode; // optional - private String errorMessage; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TStatusCode - */ - STATUS_CODE((short)1, "statusCode"), - INFO_MESSAGES((short)2, "infoMessages"), - SQL_STATE((short)3, "sqlState"), - ERROR_CODE((short)4, "errorCode"), - ERROR_MESSAGE((short)5, "errorMessage"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS_CODE - return STATUS_CODE; - case 2: // INFO_MESSAGES - return INFO_MESSAGES; - case 3: // SQL_STATE - return SQL_STATE; - case 4: // ERROR_CODE - return ERROR_CODE; - case 5: // ERROR_MESSAGE - return ERROR_MESSAGE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __ERRORCODE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private _Fields optionals[] = {_Fields.INFO_MESSAGES,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS_CODE, new org.apache.thrift.meta_data.FieldMetaData("statusCode", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TStatusCode.class))); - tmpMap.put(_Fields.INFO_MESSAGES, new org.apache.thrift.meta_data.FieldMetaData("infoMessages", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStatus.class, metaDataMap); - } - - public TStatus() { - } - - public TStatus( - TStatusCode statusCode) - { - this(); - this.statusCode = statusCode; - } - - /** - * Performs a deep copy on other. - */ - public TStatus(TStatus other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatusCode()) { - this.statusCode = other.statusCode; - } - if (other.isSetInfoMessages()) { - List __this__infoMessages = new ArrayList(); - for (String other_element : other.infoMessages) { - __this__infoMessages.add(other_element); - } - this.infoMessages = __this__infoMessages; - } - if (other.isSetSqlState()) { - this.sqlState = other.sqlState; - } - this.errorCode = other.errorCode; - if (other.isSetErrorMessage()) { - this.errorMessage = other.errorMessage; - } - } - - public TStatus deepCopy() { - return new TStatus(this); - } - - @Override - public void clear() { - this.statusCode = null; - this.infoMessages = null; - this.sqlState = null; - setErrorCodeIsSet(false); - this.errorCode = 0; - this.errorMessage = null; - } - - /** - * - * @see TStatusCode - */ - public TStatusCode getStatusCode() { - return this.statusCode; - } - - /** - * - * @see TStatusCode - */ - public void setStatusCode(TStatusCode statusCode) { - this.statusCode = statusCode; - } - - public void unsetStatusCode() { - this.statusCode = null; - } - - /** Returns true if field statusCode is set (has been assigned a value) and false otherwise */ - public boolean isSetStatusCode() { - return this.statusCode != null; - } - - public void setStatusCodeIsSet(boolean value) { - if (!value) { - this.statusCode = null; - } - } - - public int getInfoMessagesSize() { - return (this.infoMessages == null) ? 0 : this.infoMessages.size(); - } - - public java.util.Iterator getInfoMessagesIterator() { - return (this.infoMessages == null) ? null : this.infoMessages.iterator(); - } - - public void addToInfoMessages(String elem) { - if (this.infoMessages == null) { - this.infoMessages = new ArrayList(); - } - this.infoMessages.add(elem); - } - - public List getInfoMessages() { - return this.infoMessages; - } - - public void setInfoMessages(List infoMessages) { - this.infoMessages = infoMessages; - } - - public void unsetInfoMessages() { - this.infoMessages = null; - } - - /** Returns true if field infoMessages is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoMessages() { - return this.infoMessages != null; - } - - public void setInfoMessagesIsSet(boolean value) { - if (!value) { - this.infoMessages = null; - } - } - - public String getSqlState() { - return this.sqlState; - } - - public void setSqlState(String sqlState) { - this.sqlState = sqlState; - } - - public void unsetSqlState() { - this.sqlState = null; - } - - /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */ - public boolean isSetSqlState() { - return this.sqlState != null; - } - - public void setSqlStateIsSet(boolean value) { - if (!value) { - this.sqlState = null; - } - } - - public int getErrorCode() { - return this.errorCode; - } - - public void setErrorCode(int errorCode) { - this.errorCode = errorCode; - setErrorCodeIsSet(true); - } - - public void unsetErrorCode() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorCode() { - return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - public void setErrorCodeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value); - } - - public String getErrorMessage() { - return this.errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public void unsetErrorMessage() { - this.errorMessage = null; - } - - /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorMessage() { - return this.errorMessage != null; - } - - public void setErrorMessageIsSet(boolean value) { - if (!value) { - this.errorMessage = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS_CODE: - if (value == null) { - unsetStatusCode(); - } else { - setStatusCode((TStatusCode)value); - } - break; - - case INFO_MESSAGES: - if (value == null) { - unsetInfoMessages(); - } else { - setInfoMessages((List)value); - } - break; - - case SQL_STATE: - if (value == null) { - unsetSqlState(); - } else { - setSqlState((String)value); - } - break; - - case ERROR_CODE: - if (value == null) { - unsetErrorCode(); - } else { - setErrorCode((Integer)value); - } - break; - - case ERROR_MESSAGE: - if (value == null) { - unsetErrorMessage(); - } else { - setErrorMessage((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS_CODE: - return getStatusCode(); - - case INFO_MESSAGES: - return getInfoMessages(); - - case SQL_STATE: - return getSqlState(); - - case ERROR_CODE: - return Integer.valueOf(getErrorCode()); - - case ERROR_MESSAGE: - return getErrorMessage(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS_CODE: - return isSetStatusCode(); - case INFO_MESSAGES: - return isSetInfoMessages(); - case SQL_STATE: - return isSetSqlState(); - case ERROR_CODE: - return isSetErrorCode(); - case ERROR_MESSAGE: - return isSetErrorMessage(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStatus) - return this.equals((TStatus)that); - return false; - } - - public boolean equals(TStatus that) { - if (that == null) - return false; - - boolean this_present_statusCode = true && this.isSetStatusCode(); - boolean that_present_statusCode = true && that.isSetStatusCode(); - if (this_present_statusCode || that_present_statusCode) { - if (!(this_present_statusCode && that_present_statusCode)) - return false; - if (!this.statusCode.equals(that.statusCode)) - return false; - } - - boolean this_present_infoMessages = true && this.isSetInfoMessages(); - boolean that_present_infoMessages = true && that.isSetInfoMessages(); - if (this_present_infoMessages || that_present_infoMessages) { - if (!(this_present_infoMessages && that_present_infoMessages)) - return false; - if (!this.infoMessages.equals(that.infoMessages)) - return false; - } - - boolean this_present_sqlState = true && this.isSetSqlState(); - boolean that_present_sqlState = true && that.isSetSqlState(); - if (this_present_sqlState || that_present_sqlState) { - if (!(this_present_sqlState && that_present_sqlState)) - return false; - if (!this.sqlState.equals(that.sqlState)) - return false; - } - - boolean this_present_errorCode = true && this.isSetErrorCode(); - boolean that_present_errorCode = true && that.isSetErrorCode(); - if (this_present_errorCode || that_present_errorCode) { - if (!(this_present_errorCode && that_present_errorCode)) - return false; - if (this.errorCode != that.errorCode) - return false; - } - - boolean this_present_errorMessage = true && this.isSetErrorMessage(); - boolean that_present_errorMessage = true && that.isSetErrorMessage(); - if (this_present_errorMessage || that_present_errorMessage) { - if (!(this_present_errorMessage && that_present_errorMessage)) - return false; - if (!this.errorMessage.equals(that.errorMessage)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_statusCode = true && (isSetStatusCode()); - builder.append(present_statusCode); - if (present_statusCode) - builder.append(statusCode.getValue()); - - boolean present_infoMessages = true && (isSetInfoMessages()); - builder.append(present_infoMessages); - if (present_infoMessages) - builder.append(infoMessages); - - boolean present_sqlState = true && (isSetSqlState()); - builder.append(present_sqlState); - if (present_sqlState) - builder.append(sqlState); - - boolean present_errorCode = true && (isSetErrorCode()); - builder.append(present_errorCode); - if (present_errorCode) - builder.append(errorCode); - - boolean present_errorMessage = true && (isSetErrorMessage()); - builder.append(present_errorMessage); - if (present_errorMessage) - builder.append(errorMessage); - - return builder.toHashCode(); - } - - public int compareTo(TStatus other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TStatus typedOther = (TStatus)other; - - lastComparison = Boolean.valueOf(isSetStatusCode()).compareTo(typedOther.isSetStatusCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatusCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statusCode, typedOther.statusCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoMessages()).compareTo(typedOther.isSetInfoMessages()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoMessages()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoMessages, typedOther.infoMessages); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSqlState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorMessage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStatus("); - boolean first = true; - - sb.append("statusCode:"); - if (this.statusCode == null) { - sb.append("null"); - } else { - sb.append(this.statusCode); - } - first = false; - if (isSetInfoMessages()) { - if (!first) sb.append(", "); - sb.append("infoMessages:"); - if (this.infoMessages == null) { - sb.append("null"); - } else { - sb.append(this.infoMessages); - } - first = false; - } - if (isSetSqlState()) { - if (!first) sb.append(", "); - sb.append("sqlState:"); - if (this.sqlState == null) { - sb.append("null"); - } else { - sb.append(this.sqlState); - } - first = false; - } - if (isSetErrorCode()) { - if (!first) sb.append(", "); - sb.append("errorCode:"); - sb.append(this.errorCode); - first = false; - } - if (isSetErrorMessage()) { - if (!first) sb.append(", "); - sb.append("errorMessage:"); - if (this.errorMessage == null) { - sb.append("null"); - } else { - sb.append(this.errorMessage); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatusCode()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'statusCode' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStatusStandardSchemeFactory implements SchemeFactory { - public TStatusStandardScheme getScheme() { - return new TStatusStandardScheme(); - } - } - - private static class TStatusStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStatus struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.statusCode = TStatusCode.findByValue(iprot.readI32()); - struct.setStatusCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_MESSAGES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list134 = iprot.readListBegin(); - struct.infoMessages = new ArrayList(_list134.size); - for (int _i135 = 0; _i135 < _list134.size; ++_i135) - { - String _elem136; // optional - _elem136 = iprot.readString(); - struct.infoMessages.add(_elem136); - } - iprot.readListEnd(); - } - struct.setInfoMessagesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SQL_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // ERROR_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // ERROR_MESSAGE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStatus struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.statusCode != null) { - oprot.writeFieldBegin(STATUS_CODE_FIELD_DESC); - oprot.writeI32(struct.statusCode.getValue()); - oprot.writeFieldEnd(); - } - if (struct.infoMessages != null) { - if (struct.isSetInfoMessages()) { - oprot.writeFieldBegin(INFO_MESSAGES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.infoMessages.size())); - for (String _iter137 : struct.infoMessages) - { - oprot.writeString(_iter137); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - if (struct.sqlState != null) { - if (struct.isSetSqlState()) { - oprot.writeFieldBegin(SQL_STATE_FIELD_DESC); - oprot.writeString(struct.sqlState); - oprot.writeFieldEnd(); - } - } - if (struct.isSetErrorCode()) { - oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC); - oprot.writeI32(struct.errorCode); - oprot.writeFieldEnd(); - } - if (struct.errorMessage != null) { - if (struct.isSetErrorMessage()) { - oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC); - oprot.writeString(struct.errorMessage); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStatusTupleSchemeFactory implements SchemeFactory { - public TStatusTupleScheme getScheme() { - return new TStatusTupleScheme(); - } - } - - private static class TStatusTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.statusCode.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetInfoMessages()) { - optionals.set(0); - } - if (struct.isSetSqlState()) { - optionals.set(1); - } - if (struct.isSetErrorCode()) { - optionals.set(2); - } - if (struct.isSetErrorMessage()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetInfoMessages()) { - { - oprot.writeI32(struct.infoMessages.size()); - for (String _iter138 : struct.infoMessages) - { - oprot.writeString(_iter138); - } - } - } - if (struct.isSetSqlState()) { - oprot.writeString(struct.sqlState); - } - if (struct.isSetErrorCode()) { - oprot.writeI32(struct.errorCode); - } - if (struct.isSetErrorMessage()) { - oprot.writeString(struct.errorMessage); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.statusCode = TStatusCode.findByValue(iprot.readI32()); - struct.setStatusCodeIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TList _list139 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.infoMessages = new ArrayList(_list139.size); - for (int _i140 = 0; _i140 < _list139.size; ++_i140) - { - String _elem141; // optional - _elem141 = iprot.readString(); - struct.infoMessages.add(_elem141); - } - } - struct.setInfoMessagesIsSet(true); - } - if (incoming.get(1)) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } - if (incoming.get(2)) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } - if (incoming.get(3)) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java deleted file mode 100644 index e7fde45fd131a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TStatusCode implements org.apache.thrift.TEnum { - SUCCESS_STATUS(0), - SUCCESS_WITH_INFO_STATUS(1), - STILL_EXECUTING_STATUS(2), - ERROR_STATUS(3), - INVALID_HANDLE_STATUS(4); - - private final int value; - - private TStatusCode(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TStatusCode findByValue(int value) { - switch (value) { - case 0: - return SUCCESS_STATUS; - case 1: - return SUCCESS_WITH_INFO_STATUS; - case 2: - return STILL_EXECUTING_STATUS; - case 3: - return ERROR_STATUS; - case 4: - return INVALID_HANDLE_STATUS; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java deleted file mode 100644 index 3dae460c8621d..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TStringColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStringColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStringColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringColumn.class, metaDataMap); - } - - public TStringColumn() { - } - - public TStringColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = nulls; - } - - /** - * Performs a deep copy on other. - */ - public TStringColumn(TStringColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(); - for (String other_element : other.values) { - __this__values.add(other_element); - } - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); -; - } - } - - public TStringColumn deepCopy() { - return new TStringColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(String elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return nulls; - } - - public void setNulls(byte[] nulls) { - setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = nulls; - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStringColumn) - return this.equals((TStringColumn)that); - return false; - } - - public boolean equals(TStringColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_values = true && (isSetValues()); - builder.append(present_values); - if (present_values) - builder.append(values); - - boolean present_nulls = true && (isSetNulls()); - builder.append(present_nulls); - if (present_nulls) - builder.append(nulls); - - return builder.toHashCode(); - } - - public int compareTo(TStringColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TStringColumn typedOther = (TStringColumn)other; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStringColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStringColumnStandardSchemeFactory implements SchemeFactory { - public TStringColumnStandardScheme getScheme() { - return new TStringColumnStandardScheme(); - } - } - - private static class TStringColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStringColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list102 = iprot.readListBegin(); - struct.values = new ArrayList(_list102.size); - for (int _i103 = 0; _i103 < _list102.size; ++_i103) - { - String _elem104; // optional - _elem104 = iprot.readString(); - struct.values.add(_elem104); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStringColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size())); - for (String _iter105 : struct.values) - { - oprot.writeString(_iter105); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStringColumnTupleSchemeFactory implements SchemeFactory { - public TStringColumnTupleScheme getScheme() { - return new TStringColumnTupleScheme(); - } - } - - private static class TStringColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (String _iter106 : struct.values) - { - oprot.writeString(_iter106); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list107 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.values = new ArrayList(_list107.size); - for (int _i108 = 0; _i108 < _list107.size; ++_i108) - { - String _elem109; // optional - _elem109 = iprot.readString(); - struct.values.add(_elem109); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java deleted file mode 100644 index af7a109775a8b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java +++ /dev/null @@ -1,389 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TStringValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.STRING, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStringValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStringValueTupleSchemeFactory()); - } - - private String value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringValue.class, metaDataMap); - } - - public TStringValue() { - } - - /** - * Performs a deep copy on other. - */ - public TStringValue(TStringValue other) { - if (other.isSetValue()) { - this.value = other.value; - } - } - - public TStringValue deepCopy() { - return new TStringValue(this); - } - - @Override - public void clear() { - this.value = null; - } - - public String getValue() { - return this.value; - } - - public void setValue(String value) { - this.value = value; - } - - public void unsetValue() { - this.value = null; - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return this.value != null; - } - - public void setValueIsSet(boolean value) { - if (!value) { - this.value = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStringValue) - return this.equals((TStringValue)that); - return false; - } - - public boolean equals(TStringValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (!this.value.equals(that.value)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_value = true && (isSetValue()); - builder.append(present_value); - if (present_value) - builder.append(value); - - return builder.toHashCode(); - } - - public int compareTo(TStringValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TStringValue typedOther = (TStringValue)other; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStringValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - if (this.value == null) { - sb.append("null"); - } else { - sb.append(this.value); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStringValueStandardSchemeFactory implements SchemeFactory { - public TStringValueStandardScheme getScheme() { - return new TStringValueStandardScheme(); - } - } - - private static class TStringValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStringValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.value = iprot.readString(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStringValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.value != null) { - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeString(struct.value); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStringValueTupleSchemeFactory implements SchemeFactory { - public TStringValueTupleScheme getScheme() { - return new TStringValueTupleScheme(); - } - } - - private static class TStringValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeString(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readString(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java deleted file mode 100644 index 20f5fb6c29073..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java +++ /dev/null @@ -1,448 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TStructTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStructTypeEntry"); - - private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStructTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStructTypeEntryTupleSchemeFactory()); - } - - private Map nameToTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - NAME_TO_TYPE_PTR((short)1, "nameToTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // NAME_TO_TYPE_PTR - return NAME_TO_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr")))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStructTypeEntry.class, metaDataMap); - } - - public TStructTypeEntry() { - } - - public TStructTypeEntry( - Map nameToTypePtr) - { - this(); - this.nameToTypePtr = nameToTypePtr; - } - - /** - * Performs a deep copy on other. - */ - public TStructTypeEntry(TStructTypeEntry other) { - if (other.isSetNameToTypePtr()) { - Map __this__nameToTypePtr = new HashMap(); - for (Map.Entry other_element : other.nameToTypePtr.entrySet()) { - - String other_element_key = other_element.getKey(); - Integer other_element_value = other_element.getValue(); - - String __this__nameToTypePtr_copy_key = other_element_key; - - Integer __this__nameToTypePtr_copy_value = other_element_value; - - __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value); - } - this.nameToTypePtr = __this__nameToTypePtr; - } - } - - public TStructTypeEntry deepCopy() { - return new TStructTypeEntry(this); - } - - @Override - public void clear() { - this.nameToTypePtr = null; - } - - public int getNameToTypePtrSize() { - return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size(); - } - - public void putToNameToTypePtr(String key, int val) { - if (this.nameToTypePtr == null) { - this.nameToTypePtr = new HashMap(); - } - this.nameToTypePtr.put(key, val); - } - - public Map getNameToTypePtr() { - return this.nameToTypePtr; - } - - public void setNameToTypePtr(Map nameToTypePtr) { - this.nameToTypePtr = nameToTypePtr; - } - - public void unsetNameToTypePtr() { - this.nameToTypePtr = null; - } - - /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetNameToTypePtr() { - return this.nameToTypePtr != null; - } - - public void setNameToTypePtrIsSet(boolean value) { - if (!value) { - this.nameToTypePtr = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case NAME_TO_TYPE_PTR: - if (value == null) { - unsetNameToTypePtr(); - } else { - setNameToTypePtr((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case NAME_TO_TYPE_PTR: - return getNameToTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case NAME_TO_TYPE_PTR: - return isSetNameToTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStructTypeEntry) - return this.equals((TStructTypeEntry)that); - return false; - } - - public boolean equals(TStructTypeEntry that) { - if (that == null) - return false; - - boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr(); - boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr(); - if (this_present_nameToTypePtr || that_present_nameToTypePtr) { - if (!(this_present_nameToTypePtr && that_present_nameToTypePtr)) - return false; - if (!this.nameToTypePtr.equals(that.nameToTypePtr)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_nameToTypePtr = true && (isSetNameToTypePtr()); - builder.append(present_nameToTypePtr); - if (present_nameToTypePtr) - builder.append(nameToTypePtr); - - return builder.toHashCode(); - } - - public int compareTo(TStructTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TStructTypeEntry typedOther = (TStructTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNameToTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStructTypeEntry("); - boolean first = true; - - sb.append("nameToTypePtr:"); - if (this.nameToTypePtr == null) { - sb.append("null"); - } else { - sb.append(this.nameToTypePtr); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetNameToTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStructTypeEntryStandardSchemeFactory implements SchemeFactory { - public TStructTypeEntryStandardScheme getScheme() { - return new TStructTypeEntryStandardScheme(); - } - } - - private static class TStructTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStructTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // NAME_TO_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map10 = iprot.readMapBegin(); - struct.nameToTypePtr = new HashMap(2*_map10.size); - for (int _i11 = 0; _i11 < _map10.size; ++_i11) - { - String _key12; // required - int _val13; // required - _key12 = iprot.readString(); - _val13 = iprot.readI32(); - struct.nameToTypePtr.put(_key12, _val13); - } - iprot.readMapEnd(); - } - struct.setNameToTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStructTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.nameToTypePtr != null) { - oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size())); - for (Map.Entry _iter14 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter14.getKey()); - oprot.writeI32(_iter14.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStructTypeEntryTupleSchemeFactory implements SchemeFactory { - public TStructTypeEntryTupleScheme getScheme() { - return new TStructTypeEntryTupleScheme(); - } - } - - private static class TStructTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.nameToTypePtr.size()); - for (Map.Entry _iter15 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter15.getKey()); - oprot.writeI32(_iter15.getValue()); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map16 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.nameToTypePtr = new HashMap(2*_map16.size); - for (int _i17 = 0; _i17 < _map16.size; ++_i17) - { - String _key18; // required - int _val19; // required - _key18 = iprot.readString(); - _val19 = iprot.readI32(); - struct.nameToTypePtr.put(_key18, _val19); - } - } - struct.setNameToTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java deleted file mode 100644 index ff5e54db7c16c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java +++ /dev/null @@ -1,439 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TTableSchema implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTableSchema"); - - private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTableSchemaStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTableSchemaTupleSchemeFactory()); - } - - private List columns; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COLUMNS((short)1, "columns"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COLUMNS - return COLUMNS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnDesc.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTableSchema.class, metaDataMap); - } - - public TTableSchema() { - } - - public TTableSchema( - List columns) - { - this(); - this.columns = columns; - } - - /** - * Performs a deep copy on other. - */ - public TTableSchema(TTableSchema other) { - if (other.isSetColumns()) { - List __this__columns = new ArrayList(); - for (TColumnDesc other_element : other.columns) { - __this__columns.add(new TColumnDesc(other_element)); - } - this.columns = __this__columns; - } - } - - public TTableSchema deepCopy() { - return new TTableSchema(this); - } - - @Override - public void clear() { - this.columns = null; - } - - public int getColumnsSize() { - return (this.columns == null) ? 0 : this.columns.size(); - } - - public java.util.Iterator getColumnsIterator() { - return (this.columns == null) ? null : this.columns.iterator(); - } - - public void addToColumns(TColumnDesc elem) { - if (this.columns == null) { - this.columns = new ArrayList(); - } - this.columns.add(elem); - } - - public List getColumns() { - return this.columns; - } - - public void setColumns(List columns) { - this.columns = columns; - } - - public void unsetColumns() { - this.columns = null; - } - - /** Returns true if field columns is set (has been assigned a value) and false otherwise */ - public boolean isSetColumns() { - return this.columns != null; - } - - public void setColumnsIsSet(boolean value) { - if (!value) { - this.columns = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COLUMNS: - if (value == null) { - unsetColumns(); - } else { - setColumns((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COLUMNS: - return getColumns(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COLUMNS: - return isSetColumns(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTableSchema) - return this.equals((TTableSchema)that); - return false; - } - - public boolean equals(TTableSchema that) { - if (that == null) - return false; - - boolean this_present_columns = true && this.isSetColumns(); - boolean that_present_columns = true && that.isSetColumns(); - if (this_present_columns || that_present_columns) { - if (!(this_present_columns && that_present_columns)) - return false; - if (!this.columns.equals(that.columns)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_columns = true && (isSetColumns()); - builder.append(present_columns); - if (present_columns) - builder.append(columns); - - return builder.toHashCode(); - } - - public int compareTo(TTableSchema other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TTableSchema typedOther = (TTableSchema)other; - - lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumns()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTableSchema("); - boolean first = true; - - sb.append("columns:"); - if (this.columns == null) { - sb.append("null"); - } else { - sb.append(this.columns); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColumns()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'columns' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTableSchemaStandardSchemeFactory implements SchemeFactory { - public TTableSchemaStandardScheme getScheme() { - return new TTableSchemaStandardScheme(); - } - } - - private static class TTableSchemaStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTableSchema struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COLUMNS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list38 = iprot.readListBegin(); - struct.columns = new ArrayList(_list38.size); - for (int _i39 = 0; _i39 < _list38.size; ++_i39) - { - TColumnDesc _elem40; // optional - _elem40 = new TColumnDesc(); - _elem40.read(iprot); - struct.columns.add(_elem40); - } - iprot.readListEnd(); - } - struct.setColumnsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTableSchema struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.columns != null) { - oprot.writeFieldBegin(COLUMNS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size())); - for (TColumnDesc _iter41 : struct.columns) - { - _iter41.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTableSchemaTupleSchemeFactory implements SchemeFactory { - public TTableSchemaTupleScheme getScheme() { - return new TTableSchemaTupleScheme(); - } - } - - private static class TTableSchemaTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.columns.size()); - for (TColumnDesc _iter42 : struct.columns) - { - _iter42.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list43 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.columns = new ArrayList(_list43.size); - for (int _i44 = 0; _i44 < _list43.size; ++_i44) - { - TColumnDesc _elem45; // optional - _elem45 = new TColumnDesc(); - _elem45.read(iprot); - struct.columns.add(_elem45); - } - } - struct.setColumnsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java deleted file mode 100644 index 251f86a914719..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java +++ /dev/null @@ -1,439 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TTypeDesc implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeDesc"); - - private static final org.apache.thrift.protocol.TField TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("types", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTypeDescStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTypeDescTupleSchemeFactory()); - } - - private List types; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - TYPES((short)1, "types"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPES - return TYPES; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPES, new org.apache.thrift.meta_data.FieldMetaData("types", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeEntry.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeDesc.class, metaDataMap); - } - - public TTypeDesc() { - } - - public TTypeDesc( - List types) - { - this(); - this.types = types; - } - - /** - * Performs a deep copy on other. - */ - public TTypeDesc(TTypeDesc other) { - if (other.isSetTypes()) { - List __this__types = new ArrayList(); - for (TTypeEntry other_element : other.types) { - __this__types.add(new TTypeEntry(other_element)); - } - this.types = __this__types; - } - } - - public TTypeDesc deepCopy() { - return new TTypeDesc(this); - } - - @Override - public void clear() { - this.types = null; - } - - public int getTypesSize() { - return (this.types == null) ? 0 : this.types.size(); - } - - public java.util.Iterator getTypesIterator() { - return (this.types == null) ? null : this.types.iterator(); - } - - public void addToTypes(TTypeEntry elem) { - if (this.types == null) { - this.types = new ArrayList(); - } - this.types.add(elem); - } - - public List getTypes() { - return this.types; - } - - public void setTypes(List types) { - this.types = types; - } - - public void unsetTypes() { - this.types = null; - } - - /** Returns true if field types is set (has been assigned a value) and false otherwise */ - public boolean isSetTypes() { - return this.types != null; - } - - public void setTypesIsSet(boolean value) { - if (!value) { - this.types = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPES: - if (value == null) { - unsetTypes(); - } else { - setTypes((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPES: - return getTypes(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPES: - return isSetTypes(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTypeDesc) - return this.equals((TTypeDesc)that); - return false; - } - - public boolean equals(TTypeDesc that) { - if (that == null) - return false; - - boolean this_present_types = true && this.isSetTypes(); - boolean that_present_types = true && that.isSetTypes(); - if (this_present_types || that_present_types) { - if (!(this_present_types && that_present_types)) - return false; - if (!this.types.equals(that.types)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_types = true && (isSetTypes()); - builder.append(present_types); - if (present_types) - builder.append(types); - - return builder.toHashCode(); - } - - public int compareTo(TTypeDesc other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TTypeDesc typedOther = (TTypeDesc)other; - - lastComparison = Boolean.valueOf(isSetTypes()).compareTo(typedOther.isSetTypes()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypes()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.types, typedOther.types); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTypeDesc("); - boolean first = true; - - sb.append("types:"); - if (this.types == null) { - sb.append("null"); - } else { - sb.append(this.types); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetTypes()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'types' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTypeDescStandardSchemeFactory implements SchemeFactory { - public TTypeDescStandardScheme getScheme() { - return new TTypeDescStandardScheme(); - } - } - - private static class TTypeDescStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeDesc struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list30 = iprot.readListBegin(); - struct.types = new ArrayList(_list30.size); - for (int _i31 = 0; _i31 < _list30.size; ++_i31) - { - TTypeEntry _elem32; // optional - _elem32 = new TTypeEntry(); - _elem32.read(iprot); - struct.types.add(_elem32); - } - iprot.readListEnd(); - } - struct.setTypesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeDesc struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.types != null) { - oprot.writeFieldBegin(TYPES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.types.size())); - for (TTypeEntry _iter33 : struct.types) - { - _iter33.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTypeDescTupleSchemeFactory implements SchemeFactory { - public TTypeDescTupleScheme getScheme() { - return new TTypeDescTupleScheme(); - } - } - - private static class TTypeDescTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.types.size()); - for (TTypeEntry _iter34 : struct.types) - { - _iter34.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list35 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.types = new ArrayList(_list35.size); - for (int _i36 = 0; _i36 < _list35.size; ++_i36) - { - TTypeEntry _elem37; // optional - _elem37 = new TTypeEntry(); - _elem37.read(iprot); - struct.types.add(_elem37); - } - } - struct.setTypesIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java deleted file mode 100644 index d0d70c1279572..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java +++ /dev/null @@ -1,610 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TTypeEntry extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeEntry"); - private static final org.apache.thrift.protocol.TField PRIMITIVE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("primitiveEntry", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField ARRAY_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("arrayEntry", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField MAP_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("mapEntry", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField STRUCT_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("structEntry", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField UNION_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("unionEntry", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField USER_DEFINED_TYPE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("userDefinedTypeEntry", org.apache.thrift.protocol.TType.STRUCT, (short)6); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - PRIMITIVE_ENTRY((short)1, "primitiveEntry"), - ARRAY_ENTRY((short)2, "arrayEntry"), - MAP_ENTRY((short)3, "mapEntry"), - STRUCT_ENTRY((short)4, "structEntry"), - UNION_ENTRY((short)5, "unionEntry"), - USER_DEFINED_TYPE_ENTRY((short)6, "userDefinedTypeEntry"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // PRIMITIVE_ENTRY - return PRIMITIVE_ENTRY; - case 2: // ARRAY_ENTRY - return ARRAY_ENTRY; - case 3: // MAP_ENTRY - return MAP_ENTRY; - case 4: // STRUCT_ENTRY - return STRUCT_ENTRY; - case 5: // UNION_ENTRY - return UNION_ENTRY; - case 6: // USER_DEFINED_TYPE_ENTRY - return USER_DEFINED_TYPE_ENTRY; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.PRIMITIVE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("primitiveEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TPrimitiveTypeEntry.class))); - tmpMap.put(_Fields.ARRAY_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("arrayEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TArrayTypeEntry.class))); - tmpMap.put(_Fields.MAP_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("mapEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TMapTypeEntry.class))); - tmpMap.put(_Fields.STRUCT_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("structEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStructTypeEntry.class))); - tmpMap.put(_Fields.UNION_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("unionEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUnionTypeEntry.class))); - tmpMap.put(_Fields.USER_DEFINED_TYPE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("userDefinedTypeEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUserDefinedTypeEntry.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeEntry.class, metaDataMap); - } - - public TTypeEntry() { - super(); - } - - public TTypeEntry(TTypeEntry._Fields setField, Object value) { - super(setField, value); - } - - public TTypeEntry(TTypeEntry other) { - super(other); - } - public TTypeEntry deepCopy() { - return new TTypeEntry(this); - } - - public static TTypeEntry primitiveEntry(TPrimitiveTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setPrimitiveEntry(value); - return x; - } - - public static TTypeEntry arrayEntry(TArrayTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setArrayEntry(value); - return x; - } - - public static TTypeEntry mapEntry(TMapTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setMapEntry(value); - return x; - } - - public static TTypeEntry structEntry(TStructTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setStructEntry(value); - return x; - } - - public static TTypeEntry unionEntry(TUnionTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setUnionEntry(value); - return x; - } - - public static TTypeEntry userDefinedTypeEntry(TUserDefinedTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setUserDefinedTypeEntry(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case PRIMITIVE_ENTRY: - if (value instanceof TPrimitiveTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TPrimitiveTypeEntry for field 'primitiveEntry', but got " + value.getClass().getSimpleName()); - case ARRAY_ENTRY: - if (value instanceof TArrayTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TArrayTypeEntry for field 'arrayEntry', but got " + value.getClass().getSimpleName()); - case MAP_ENTRY: - if (value instanceof TMapTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TMapTypeEntry for field 'mapEntry', but got " + value.getClass().getSimpleName()); - case STRUCT_ENTRY: - if (value instanceof TStructTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TStructTypeEntry for field 'structEntry', but got " + value.getClass().getSimpleName()); - case UNION_ENTRY: - if (value instanceof TUnionTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TUnionTypeEntry for field 'unionEntry', but got " + value.getClass().getSimpleName()); - case USER_DEFINED_TYPE_ENTRY: - if (value instanceof TUserDefinedTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TUserDefinedTypeEntry for field 'userDefinedTypeEntry', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case PRIMITIVE_ENTRY: - if (field.type == PRIMITIVE_ENTRY_FIELD_DESC.type) { - TPrimitiveTypeEntry primitiveEntry; - primitiveEntry = new TPrimitiveTypeEntry(); - primitiveEntry.read(iprot); - return primitiveEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case ARRAY_ENTRY: - if (field.type == ARRAY_ENTRY_FIELD_DESC.type) { - TArrayTypeEntry arrayEntry; - arrayEntry = new TArrayTypeEntry(); - arrayEntry.read(iprot); - return arrayEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case MAP_ENTRY: - if (field.type == MAP_ENTRY_FIELD_DESC.type) { - TMapTypeEntry mapEntry; - mapEntry = new TMapTypeEntry(); - mapEntry.read(iprot); - return mapEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRUCT_ENTRY: - if (field.type == STRUCT_ENTRY_FIELD_DESC.type) { - TStructTypeEntry structEntry; - structEntry = new TStructTypeEntry(); - structEntry.read(iprot); - return structEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case UNION_ENTRY: - if (field.type == UNION_ENTRY_FIELD_DESC.type) { - TUnionTypeEntry unionEntry; - unionEntry = new TUnionTypeEntry(); - unionEntry.read(iprot); - return unionEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case USER_DEFINED_TYPE_ENTRY: - if (field.type == USER_DEFINED_TYPE_ENTRY_FIELD_DESC.type) { - TUserDefinedTypeEntry userDefinedTypeEntry; - userDefinedTypeEntry = new TUserDefinedTypeEntry(); - userDefinedTypeEntry.read(iprot); - return userDefinedTypeEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_; - primitiveEntry.write(oprot); - return; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_; - arrayEntry.write(oprot); - return; - case MAP_ENTRY: - TMapTypeEntry mapEntry = (TMapTypeEntry)value_; - mapEntry.write(oprot); - return; - case STRUCT_ENTRY: - TStructTypeEntry structEntry = (TStructTypeEntry)value_; - structEntry.write(oprot); - return; - case UNION_ENTRY: - TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_; - unionEntry.write(oprot); - return; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_; - userDefinedTypeEntry.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry; - primitiveEntry = new TPrimitiveTypeEntry(); - primitiveEntry.read(iprot); - return primitiveEntry; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry; - arrayEntry = new TArrayTypeEntry(); - arrayEntry.read(iprot); - return arrayEntry; - case MAP_ENTRY: - TMapTypeEntry mapEntry; - mapEntry = new TMapTypeEntry(); - mapEntry.read(iprot); - return mapEntry; - case STRUCT_ENTRY: - TStructTypeEntry structEntry; - structEntry = new TStructTypeEntry(); - structEntry.read(iprot); - return structEntry; - case UNION_ENTRY: - TUnionTypeEntry unionEntry; - unionEntry = new TUnionTypeEntry(); - unionEntry.read(iprot); - return unionEntry; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry; - userDefinedTypeEntry = new TUserDefinedTypeEntry(); - userDefinedTypeEntry.read(iprot); - return userDefinedTypeEntry; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_; - primitiveEntry.write(oprot); - return; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_; - arrayEntry.write(oprot); - return; - case MAP_ENTRY: - TMapTypeEntry mapEntry = (TMapTypeEntry)value_; - mapEntry.write(oprot); - return; - case STRUCT_ENTRY: - TStructTypeEntry structEntry = (TStructTypeEntry)value_; - structEntry.write(oprot); - return; - case UNION_ENTRY: - TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_; - unionEntry.write(oprot); - return; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_; - userDefinedTypeEntry.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case PRIMITIVE_ENTRY: - return PRIMITIVE_ENTRY_FIELD_DESC; - case ARRAY_ENTRY: - return ARRAY_ENTRY_FIELD_DESC; - case MAP_ENTRY: - return MAP_ENTRY_FIELD_DESC; - case STRUCT_ENTRY: - return STRUCT_ENTRY_FIELD_DESC; - case UNION_ENTRY: - return UNION_ENTRY_FIELD_DESC; - case USER_DEFINED_TYPE_ENTRY: - return USER_DEFINED_TYPE_ENTRY_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TPrimitiveTypeEntry getPrimitiveEntry() { - if (getSetField() == _Fields.PRIMITIVE_ENTRY) { - return (TPrimitiveTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'primitiveEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setPrimitiveEntry(TPrimitiveTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.PRIMITIVE_ENTRY; - value_ = value; - } - - public TArrayTypeEntry getArrayEntry() { - if (getSetField() == _Fields.ARRAY_ENTRY) { - return (TArrayTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'arrayEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setArrayEntry(TArrayTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.ARRAY_ENTRY; - value_ = value; - } - - public TMapTypeEntry getMapEntry() { - if (getSetField() == _Fields.MAP_ENTRY) { - return (TMapTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'mapEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setMapEntry(TMapTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.MAP_ENTRY; - value_ = value; - } - - public TStructTypeEntry getStructEntry() { - if (getSetField() == _Fields.STRUCT_ENTRY) { - return (TStructTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'structEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStructEntry(TStructTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRUCT_ENTRY; - value_ = value; - } - - public TUnionTypeEntry getUnionEntry() { - if (getSetField() == _Fields.UNION_ENTRY) { - return (TUnionTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'unionEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setUnionEntry(TUnionTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.UNION_ENTRY; - value_ = value; - } - - public TUserDefinedTypeEntry getUserDefinedTypeEntry() { - if (getSetField() == _Fields.USER_DEFINED_TYPE_ENTRY) { - return (TUserDefinedTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'userDefinedTypeEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setUserDefinedTypeEntry(TUserDefinedTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.USER_DEFINED_TYPE_ENTRY; - value_ = value; - } - - public boolean isSetPrimitiveEntry() { - return setField_ == _Fields.PRIMITIVE_ENTRY; - } - - - public boolean isSetArrayEntry() { - return setField_ == _Fields.ARRAY_ENTRY; - } - - - public boolean isSetMapEntry() { - return setField_ == _Fields.MAP_ENTRY; - } - - - public boolean isSetStructEntry() { - return setField_ == _Fields.STRUCT_ENTRY; - } - - - public boolean isSetUnionEntry() { - return setField_ == _Fields.UNION_ENTRY; - } - - - public boolean isSetUserDefinedTypeEntry() { - return setField_ == _Fields.USER_DEFINED_TYPE_ENTRY; - } - - - public boolean equals(Object other) { - if (other instanceof TTypeEntry) { - return equals((TTypeEntry)other); - } else { - return false; - } - } - - public boolean equals(TTypeEntry other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TTypeEntry other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(); - hcb.append(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - hcb.append(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - hcb.append(value); - } - } - return hcb.toHashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java deleted file mode 100644 index 40f05894623c0..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TTypeId implements org.apache.thrift.TEnum { - BOOLEAN_TYPE(0), - TINYINT_TYPE(1), - SMALLINT_TYPE(2), - INT_TYPE(3), - BIGINT_TYPE(4), - FLOAT_TYPE(5), - DOUBLE_TYPE(6), - STRING_TYPE(7), - TIMESTAMP_TYPE(8), - BINARY_TYPE(9), - ARRAY_TYPE(10), - MAP_TYPE(11), - STRUCT_TYPE(12), - UNION_TYPE(13), - USER_DEFINED_TYPE(14), - DECIMAL_TYPE(15), - NULL_TYPE(16), - DATE_TYPE(17), - VARCHAR_TYPE(18), - CHAR_TYPE(19), - INTERVAL_YEAR_MONTH_TYPE(20), - INTERVAL_DAY_TIME_TYPE(21); - - private final int value; - - private TTypeId(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TTypeId findByValue(int value) { - switch (value) { - case 0: - return BOOLEAN_TYPE; - case 1: - return TINYINT_TYPE; - case 2: - return SMALLINT_TYPE; - case 3: - return INT_TYPE; - case 4: - return BIGINT_TYPE; - case 5: - return FLOAT_TYPE; - case 6: - return DOUBLE_TYPE; - case 7: - return STRING_TYPE; - case 8: - return TIMESTAMP_TYPE; - case 9: - return BINARY_TYPE; - case 10: - return ARRAY_TYPE; - case 11: - return MAP_TYPE; - case 12: - return STRUCT_TYPE; - case 13: - return UNION_TYPE; - case 14: - return USER_DEFINED_TYPE; - case 15: - return DECIMAL_TYPE; - case 16: - return NULL_TYPE; - case 17: - return DATE_TYPE; - case 18: - return VARCHAR_TYPE; - case 19: - return CHAR_TYPE; - case 20: - return INTERVAL_YEAR_MONTH_TYPE; - case 21: - return INTERVAL_DAY_TIME_TYPE; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java deleted file mode 100644 index a3e3829372276..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java +++ /dev/null @@ -1,361 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TTypeQualifierValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifierValue"); - private static final org.apache.thrift.protocol.TField I32_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Value", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)2); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - I32_VALUE((short)1, "i32Value"), - STRING_VALUE((short)2, "stringValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // I32_VALUE - return I32_VALUE; - case 2: // STRING_VALUE - return STRING_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.I32_VALUE, new org.apache.thrift.meta_data.FieldMetaData("i32Value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifierValue.class, metaDataMap); - } - - public TTypeQualifierValue() { - super(); - } - - public TTypeQualifierValue(TTypeQualifierValue._Fields setField, Object value) { - super(setField, value); - } - - public TTypeQualifierValue(TTypeQualifierValue other) { - super(other); - } - public TTypeQualifierValue deepCopy() { - return new TTypeQualifierValue(this); - } - - public static TTypeQualifierValue i32Value(int value) { - TTypeQualifierValue x = new TTypeQualifierValue(); - x.setI32Value(value); - return x; - } - - public static TTypeQualifierValue stringValue(String value) { - TTypeQualifierValue x = new TTypeQualifierValue(); - x.setStringValue(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case I32_VALUE: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'i32Value', but got " + value.getClass().getSimpleName()); - case STRING_VALUE: - if (value instanceof String) { - break; - } - throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case I32_VALUE: - if (field.type == I32_VALUE_FIELD_DESC.type) { - Integer i32Value; - i32Value = iprot.readI32(); - return i32Value; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VALUE: - if (field.type == STRING_VALUE_FIELD_DESC.type) { - String stringValue; - stringValue = iprot.readString(); - return stringValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case I32_VALUE: - Integer i32Value = (Integer)value_; - oprot.writeI32(i32Value); - return; - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case I32_VALUE: - Integer i32Value; - i32Value = iprot.readI32(); - return i32Value; - case STRING_VALUE: - String stringValue; - stringValue = iprot.readString(); - return stringValue; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case I32_VALUE: - Integer i32Value = (Integer)value_; - oprot.writeI32(i32Value); - return; - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case I32_VALUE: - return I32_VALUE_FIELD_DESC; - case STRING_VALUE: - return STRING_VALUE_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public int getI32Value() { - if (getSetField() == _Fields.I32_VALUE) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Value' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Value(int value) { - setField_ = _Fields.I32_VALUE; - value_ = value; - } - - public String getStringValue() { - if (getSetField() == _Fields.STRING_VALUE) { - return (String)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringValue(String value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VALUE; - value_ = value; - } - - public boolean isSetI32Value() { - return setField_ == _Fields.I32_VALUE; - } - - - public boolean isSetStringValue() { - return setField_ == _Fields.STRING_VALUE; - } - - - public boolean equals(Object other) { - if (other instanceof TTypeQualifierValue) { - return equals((TTypeQualifierValue)other); - } else { - return false; - } - } - - public boolean equals(TTypeQualifierValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TTypeQualifierValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - HashCodeBuilder hcb = new HashCodeBuilder(); - hcb.append(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - hcb.append(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - hcb.append(value); - } - } - return hcb.toHashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java deleted file mode 100644 index 39355551d3722..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java +++ /dev/null @@ -1,450 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TTypeQualifiers implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers"); - - private static final org.apache.thrift.protocol.TField QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("qualifiers", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory()); - } - - private Map qualifiers; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - QUALIFIERS((short)1, "qualifiers"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // QUALIFIERS - return QUALIFIERS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("qualifiers", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifierValue.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap); - } - - public TTypeQualifiers() { - } - - public TTypeQualifiers( - Map qualifiers) - { - this(); - this.qualifiers = qualifiers; - } - - /** - * Performs a deep copy on other. - */ - public TTypeQualifiers(TTypeQualifiers other) { - if (other.isSetQualifiers()) { - Map __this__qualifiers = new HashMap(); - for (Map.Entry other_element : other.qualifiers.entrySet()) { - - String other_element_key = other_element.getKey(); - TTypeQualifierValue other_element_value = other_element.getValue(); - - String __this__qualifiers_copy_key = other_element_key; - - TTypeQualifierValue __this__qualifiers_copy_value = new TTypeQualifierValue(other_element_value); - - __this__qualifiers.put(__this__qualifiers_copy_key, __this__qualifiers_copy_value); - } - this.qualifiers = __this__qualifiers; - } - } - - public TTypeQualifiers deepCopy() { - return new TTypeQualifiers(this); - } - - @Override - public void clear() { - this.qualifiers = null; - } - - public int getQualifiersSize() { - return (this.qualifiers == null) ? 0 : this.qualifiers.size(); - } - - public void putToQualifiers(String key, TTypeQualifierValue val) { - if (this.qualifiers == null) { - this.qualifiers = new HashMap(); - } - this.qualifiers.put(key, val); - } - - public Map getQualifiers() { - return this.qualifiers; - } - - public void setQualifiers(Map qualifiers) { - this.qualifiers = qualifiers; - } - - public void unsetQualifiers() { - this.qualifiers = null; - } - - /** Returns true if field qualifiers is set (has been assigned a value) and false otherwise */ - public boolean isSetQualifiers() { - return this.qualifiers != null; - } - - public void setQualifiersIsSet(boolean value) { - if (!value) { - this.qualifiers = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case QUALIFIERS: - if (value == null) { - unsetQualifiers(); - } else { - setQualifiers((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case QUALIFIERS: - return getQualifiers(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case QUALIFIERS: - return isSetQualifiers(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTypeQualifiers) - return this.equals((TTypeQualifiers)that); - return false; - } - - public boolean equals(TTypeQualifiers that) { - if (that == null) - return false; - - boolean this_present_qualifiers = true && this.isSetQualifiers(); - boolean that_present_qualifiers = true && that.isSetQualifiers(); - if (this_present_qualifiers || that_present_qualifiers) { - if (!(this_present_qualifiers && that_present_qualifiers)) - return false; - if (!this.qualifiers.equals(that.qualifiers)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_qualifiers = true && (isSetQualifiers()); - builder.append(present_qualifiers); - if (present_qualifiers) - builder.append(qualifiers); - - return builder.toHashCode(); - } - - public int compareTo(TTypeQualifiers other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TTypeQualifiers typedOther = (TTypeQualifiers)other; - - lastComparison = Boolean.valueOf(isSetQualifiers()).compareTo(typedOther.isSetQualifiers()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetQualifiers()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.qualifiers, typedOther.qualifiers); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTypeQualifiers("); - boolean first = true; - - sb.append("qualifiers:"); - if (this.qualifiers == null) { - sb.append("null"); - } else { - sb.append(this.qualifiers); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetQualifiers()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'qualifiers' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory { - public TTypeQualifiersStandardScheme getScheme() { - return new TTypeQualifiersStandardScheme(); - } - } - - private static class TTypeQualifiersStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // QUALIFIERS - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map0 = iprot.readMapBegin(); - struct.qualifiers = new HashMap(2*_map0.size); - for (int _i1 = 0; _i1 < _map0.size; ++_i1) - { - String _key2; // required - TTypeQualifierValue _val3; // required - _key2 = iprot.readString(); - _val3 = new TTypeQualifierValue(); - _val3.read(iprot); - struct.qualifiers.put(_key2, _val3); - } - iprot.readMapEnd(); - } - struct.setQualifiersIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.qualifiers != null) { - oprot.writeFieldBegin(QUALIFIERS_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, struct.qualifiers.size())); - for (Map.Entry _iter4 : struct.qualifiers.entrySet()) - { - oprot.writeString(_iter4.getKey()); - _iter4.getValue().write(oprot); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory { - public TTypeQualifiersTupleScheme getScheme() { - return new TTypeQualifiersTupleScheme(); - } - } - - private static class TTypeQualifiersTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.qualifiers.size()); - for (Map.Entry _iter5 : struct.qualifiers.entrySet()) - { - oprot.writeString(_iter5.getKey()); - _iter5.getValue().write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map6 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.qualifiers = new HashMap(2*_map6.size); - for (int _i7 = 0; _i7 < _map6.size; ++_i7) - { - String _key8; // required - TTypeQualifierValue _val9; // required - _key8 = iprot.readString(); - _val9 = new TTypeQualifierValue(); - _val9.read(iprot); - struct.qualifiers.put(_key8, _val9); - } - } - struct.setQualifiersIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java deleted file mode 100644 index 73dd45d3dd01a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java +++ /dev/null @@ -1,448 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TUnionTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUnionTypeEntry"); - - private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TUnionTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TUnionTypeEntryTupleSchemeFactory()); - } - - private Map nameToTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - NAME_TO_TYPE_PTR((short)1, "nameToTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // NAME_TO_TYPE_PTR - return NAME_TO_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr")))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUnionTypeEntry.class, metaDataMap); - } - - public TUnionTypeEntry() { - } - - public TUnionTypeEntry( - Map nameToTypePtr) - { - this(); - this.nameToTypePtr = nameToTypePtr; - } - - /** - * Performs a deep copy on other. - */ - public TUnionTypeEntry(TUnionTypeEntry other) { - if (other.isSetNameToTypePtr()) { - Map __this__nameToTypePtr = new HashMap(); - for (Map.Entry other_element : other.nameToTypePtr.entrySet()) { - - String other_element_key = other_element.getKey(); - Integer other_element_value = other_element.getValue(); - - String __this__nameToTypePtr_copy_key = other_element_key; - - Integer __this__nameToTypePtr_copy_value = other_element_value; - - __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value); - } - this.nameToTypePtr = __this__nameToTypePtr; - } - } - - public TUnionTypeEntry deepCopy() { - return new TUnionTypeEntry(this); - } - - @Override - public void clear() { - this.nameToTypePtr = null; - } - - public int getNameToTypePtrSize() { - return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size(); - } - - public void putToNameToTypePtr(String key, int val) { - if (this.nameToTypePtr == null) { - this.nameToTypePtr = new HashMap(); - } - this.nameToTypePtr.put(key, val); - } - - public Map getNameToTypePtr() { - return this.nameToTypePtr; - } - - public void setNameToTypePtr(Map nameToTypePtr) { - this.nameToTypePtr = nameToTypePtr; - } - - public void unsetNameToTypePtr() { - this.nameToTypePtr = null; - } - - /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetNameToTypePtr() { - return this.nameToTypePtr != null; - } - - public void setNameToTypePtrIsSet(boolean value) { - if (!value) { - this.nameToTypePtr = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case NAME_TO_TYPE_PTR: - if (value == null) { - unsetNameToTypePtr(); - } else { - setNameToTypePtr((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case NAME_TO_TYPE_PTR: - return getNameToTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case NAME_TO_TYPE_PTR: - return isSetNameToTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TUnionTypeEntry) - return this.equals((TUnionTypeEntry)that); - return false; - } - - public boolean equals(TUnionTypeEntry that) { - if (that == null) - return false; - - boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr(); - boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr(); - if (this_present_nameToTypePtr || that_present_nameToTypePtr) { - if (!(this_present_nameToTypePtr && that_present_nameToTypePtr)) - return false; - if (!this.nameToTypePtr.equals(that.nameToTypePtr)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_nameToTypePtr = true && (isSetNameToTypePtr()); - builder.append(present_nameToTypePtr); - if (present_nameToTypePtr) - builder.append(nameToTypePtr); - - return builder.toHashCode(); - } - - public int compareTo(TUnionTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TUnionTypeEntry typedOther = (TUnionTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNameToTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TUnionTypeEntry("); - boolean first = true; - - sb.append("nameToTypePtr:"); - if (this.nameToTypePtr == null) { - sb.append("null"); - } else { - sb.append(this.nameToTypePtr); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetNameToTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TUnionTypeEntryStandardSchemeFactory implements SchemeFactory { - public TUnionTypeEntryStandardScheme getScheme() { - return new TUnionTypeEntryStandardScheme(); - } - } - - private static class TUnionTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // NAME_TO_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map20 = iprot.readMapBegin(); - struct.nameToTypePtr = new HashMap(2*_map20.size); - for (int _i21 = 0; _i21 < _map20.size; ++_i21) - { - String _key22; // required - int _val23; // required - _key22 = iprot.readString(); - _val23 = iprot.readI32(); - struct.nameToTypePtr.put(_key22, _val23); - } - iprot.readMapEnd(); - } - struct.setNameToTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.nameToTypePtr != null) { - oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size())); - for (Map.Entry _iter24 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter24.getKey()); - oprot.writeI32(_iter24.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TUnionTypeEntryTupleSchemeFactory implements SchemeFactory { - public TUnionTypeEntryTupleScheme getScheme() { - return new TUnionTypeEntryTupleScheme(); - } - } - - private static class TUnionTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.nameToTypePtr.size()); - for (Map.Entry _iter25 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter25.getKey()); - oprot.writeI32(_iter25.getValue()); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map26 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.nameToTypePtr = new HashMap(2*_map26.size); - for (int _i27 = 0; _i27 < _map26.size; ++_i27) - { - String _key28; // required - int _val29; // required - _key28 = iprot.readString(); - _val29 = iprot.readI32(); - struct.nameToTypePtr.put(_key28, _val29); - } - } - struct.setNameToTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java deleted file mode 100644 index 3a111a2c8c2c6..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java +++ /dev/null @@ -1,385 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.cli.thrift; - -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TUserDefinedTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUserDefinedTypeEntry"); - - private static final org.apache.thrift.protocol.TField TYPE_CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("typeClassName", org.apache.thrift.protocol.TType.STRING, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TUserDefinedTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TUserDefinedTypeEntryTupleSchemeFactory()); - } - - private String typeClassName; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - TYPE_CLASS_NAME((short)1, "typeClassName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPE_CLASS_NAME - return TYPE_CLASS_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPE_CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("typeClassName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUserDefinedTypeEntry.class, metaDataMap); - } - - public TUserDefinedTypeEntry() { - } - - public TUserDefinedTypeEntry( - String typeClassName) - { - this(); - this.typeClassName = typeClassName; - } - - /** - * Performs a deep copy on other. - */ - public TUserDefinedTypeEntry(TUserDefinedTypeEntry other) { - if (other.isSetTypeClassName()) { - this.typeClassName = other.typeClassName; - } - } - - public TUserDefinedTypeEntry deepCopy() { - return new TUserDefinedTypeEntry(this); - } - - @Override - public void clear() { - this.typeClassName = null; - } - - public String getTypeClassName() { - return this.typeClassName; - } - - public void setTypeClassName(String typeClassName) { - this.typeClassName = typeClassName; - } - - public void unsetTypeClassName() { - this.typeClassName = null; - } - - /** Returns true if field typeClassName is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeClassName() { - return this.typeClassName != null; - } - - public void setTypeClassNameIsSet(boolean value) { - if (!value) { - this.typeClassName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPE_CLASS_NAME: - if (value == null) { - unsetTypeClassName(); - } else { - setTypeClassName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPE_CLASS_NAME: - return getTypeClassName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPE_CLASS_NAME: - return isSetTypeClassName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TUserDefinedTypeEntry) - return this.equals((TUserDefinedTypeEntry)that); - return false; - } - - public boolean equals(TUserDefinedTypeEntry that) { - if (that == null) - return false; - - boolean this_present_typeClassName = true && this.isSetTypeClassName(); - boolean that_present_typeClassName = true && that.isSetTypeClassName(); - if (this_present_typeClassName || that_present_typeClassName) { - if (!(this_present_typeClassName && that_present_typeClassName)) - return false; - if (!this.typeClassName.equals(that.typeClassName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - HashCodeBuilder builder = new HashCodeBuilder(); - - boolean present_typeClassName = true && (isSetTypeClassName()); - builder.append(present_typeClassName); - if (present_typeClassName) - builder.append(typeClassName); - - return builder.toHashCode(); - } - - public int compareTo(TUserDefinedTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - TUserDefinedTypeEntry typedOther = (TUserDefinedTypeEntry)other; - - lastComparison = Boolean.valueOf(isSetTypeClassName()).compareTo(typedOther.isSetTypeClassName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeClassName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeClassName, typedOther.typeClassName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TUserDefinedTypeEntry("); - boolean first = true; - - sb.append("typeClassName:"); - if (this.typeClassName == null) { - sb.append("null"); - } else { - sb.append(this.typeClassName); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetTypeClassName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeClassName' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TUserDefinedTypeEntryStandardSchemeFactory implements SchemeFactory { - public TUserDefinedTypeEntryStandardScheme getScheme() { - return new TUserDefinedTypeEntryStandardScheme(); - } - } - - private static class TUserDefinedTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPE_CLASS_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.typeClassName = iprot.readString(); - struct.setTypeClassNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.typeClassName != null) { - oprot.writeFieldBegin(TYPE_CLASS_NAME_FIELD_DESC); - oprot.writeString(struct.typeClassName); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TUserDefinedTypeEntryTupleSchemeFactory implements SchemeFactory { - public TUserDefinedTypeEntryTupleScheme getScheme() { - return new TUserDefinedTypeEntryTupleScheme(); - } - } - - private static class TUserDefinedTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeString(struct.typeClassName); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.typeClassName = iprot.readString(); - struct.setTypeClassNameIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java deleted file mode 100644 index 7e557aeccf5b0..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; - -/** - * AbstractService. - * - */ -public abstract class AbstractService implements Service { - - private static final Log LOG = LogFactory.getLog(AbstractService.class); - - /** - * Service state: initially {@link STATE#NOTINITED}. - */ - private Service.STATE state = STATE.NOTINITED; - - /** - * Service name. - */ - private final String name; - /** - * Service start time. Will be zero until the service is started. - */ - private long startTime; - - /** - * The configuration. Will be null until the service is initialized. - */ - private HiveConf hiveConf; - - /** - * List of state change listeners; it is final to ensure - * that it will never be null. - */ - private final List listeners = - new ArrayList(); - - /** - * Construct the service. - * - * @param name - * service name - */ - public AbstractService(String name) { - this.name = name; - } - - @Override - public synchronized Service.STATE getServiceState() { - return state; - } - - /** - * {@inheritDoc} - * - * @throws IllegalStateException - * if the current service state does not permit - * this action - */ - @Override - public synchronized void init(HiveConf hiveConf) { - ensureCurrentState(STATE.NOTINITED); - this.hiveConf = hiveConf; - changeState(STATE.INITED); - LOG.info("Service:" + getName() + " is inited."); - } - - /** - * {@inheritDoc} - * - * @throws IllegalStateException - * if the current service state does not permit - * this action - */ - @Override - public synchronized void start() { - startTime = System.currentTimeMillis(); - ensureCurrentState(STATE.INITED); - changeState(STATE.STARTED); - LOG.info("Service:" + getName() + " is started."); - } - - /** - * {@inheritDoc} - * - * @throws IllegalStateException - * if the current service state does not permit - * this action - */ - @Override - public synchronized void stop() { - if (state == STATE.STOPPED || - state == STATE.INITED || - state == STATE.NOTINITED) { - // already stopped, or else it was never - // started (eg another service failing canceled startup) - return; - } - ensureCurrentState(STATE.STARTED); - changeState(STATE.STOPPED); - LOG.info("Service:" + getName() + " is stopped."); - } - - @Override - public synchronized void register(ServiceStateChangeListener l) { - listeners.add(l); - } - - @Override - public synchronized void unregister(ServiceStateChangeListener l) { - listeners.remove(l); - } - - @Override - public String getName() { - return name; - } - - @Override - public synchronized HiveConf getHiveConf() { - return hiveConf; - } - - @Override - public long getStartTime() { - return startTime; - } - - /** - * Verify that a service is in a given state. - * - * @param currentState - * the desired state - * @throws IllegalStateException - * if the service state is different from - * the desired state - */ - private void ensureCurrentState(Service.STATE currentState) { - ServiceOperations.ensureCurrentState(state, currentState); - } - - /** - * Change to a new state and notify all listeners. - * This is a private method that is only invoked from synchronized methods, - * which avoid having to clone the listener list. It does imply that - * the state change listener methods should be short lived, as they - * will delay the state transition. - * - * @param newState - * new service state - */ - private void changeState(Service.STATE newState) { - state = newState; - // notify listeners - for (ServiceStateChangeListener l : listeners) { - l.stateChanged(this); - } - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java deleted file mode 100644 index 897911872b80f..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; - -/** - * CompositeService. - * - */ -public class CompositeService extends AbstractService { - - private static final Log LOG = LogFactory.getLog(CompositeService.class); - - private final List serviceList = new ArrayList(); - - public CompositeService(String name) { - super(name); - } - - public Collection getServices() { - return Collections.unmodifiableList(serviceList); - } - - protected synchronized void addService(Service service) { - serviceList.add(service); - } - - protected synchronized boolean removeService(Service service) { - return serviceList.remove(service); - } - - @Override - public synchronized void init(HiveConf hiveConf) { - for (Service service : serviceList) { - service.init(hiveConf); - } - super.init(hiveConf); - } - - @Override - public synchronized void start() { - int i = 0; - try { - for (int n = serviceList.size(); i < n; i++) { - Service service = serviceList.get(i); - service.start(); - } - super.start(); - } catch (Throwable e) { - LOG.error("Error starting services " + getName(), e); - // Note that the state of the failed service is still INITED and not - // STARTED. Even though the last service is not started completely, still - // call stop() on all services including failed service to make sure cleanup - // happens. - stop(i); - throw new ServiceException("Failed to Start " + getName(), e); - } - - } - - @Override - public synchronized void stop() { - if (this.getServiceState() == STATE.STOPPED) { - // The base composite-service is already stopped, don't do anything again. - return; - } - if (serviceList.size() > 0) { - stop(serviceList.size() - 1); - } - super.stop(); - } - - private synchronized void stop(int numOfServicesStarted) { - // stop in reserve order of start - for (int i = numOfServicesStarted; i >= 0; i--) { - Service service = serviceList.get(i); - try { - service.stop(); - } catch (Throwable t) { - LOG.info("Error stopping " + service.getName(), t); - } - } - } - - /** - * JVM Shutdown hook for CompositeService which will stop the given - * CompositeService gracefully in case of JVM shutdown. - */ - public static class CompositeServiceShutdownHook implements Runnable { - - private final CompositeService compositeService; - - public CompositeServiceShutdownHook(CompositeService compositeService) { - this.compositeService = compositeService; - } - - @Override - public void run() { - try { - // Stop the Composite Service - compositeService.stop(); - } catch (Throwable t) { - LOG.info("Error stopping " + compositeService.getName(), t); - } - } - } - - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java deleted file mode 100644 index f2a80c9d5ffbc..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.Log; - -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; - -/** - * The cookie signer generates a signature based on SHA digest - * and appends it to the cookie value generated at the - * server side. It uses SHA digest algorithm to sign and verify signatures. - */ -public class CookieSigner { - private static final String SIGNATURE = "&s="; - private static final String SHA_STRING = "SHA"; - private byte[] secretBytes; - private static final Log LOG = LogFactory.getLog(CookieSigner.class); - - /** - * Constructor - * @param secret Secret Bytes - */ - public CookieSigner(byte[] secret) { - if (secret == null) { - throw new IllegalArgumentException(" NULL Secret Bytes"); - } - this.secretBytes = secret.clone(); - } - - /** - * Sign the cookie given the string token as input. - * @param str Input token - * @return Signed token that can be used to create a cookie - */ - public String signCookie(String str) { - if (str == null || str.isEmpty()) { - throw new IllegalArgumentException("NULL or empty string to sign"); - } - String signature = getSignature(str); - - if (LOG.isDebugEnabled()) { - LOG.debug("Signature generated for " + str + " is " + signature); - } - return str + SIGNATURE + signature; - } - - /** - * Verify a signed string and extracts the original string. - * @param signedStr The already signed string - * @return Raw Value of the string without the signature - */ - public String verifyAndExtract(String signedStr) { - int index = signedStr.lastIndexOf(SIGNATURE); - if (index == -1) { - throw new IllegalArgumentException("Invalid input sign: " + signedStr); - } - String originalSignature = signedStr.substring(index + SIGNATURE.length()); - String rawValue = signedStr.substring(0, index); - String currentSignature = getSignature(rawValue); - - if (LOG.isDebugEnabled()) { - LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature); - } - if (!MessageDigest.isEqual(originalSignature.getBytes(), currentSignature.getBytes())) { - throw new IllegalArgumentException("Invalid sign, original = " + originalSignature + - " current = " + currentSignature); - } - return rawValue; - } - - /** - * Get the signature of the input string based on SHA digest algorithm. - * @param str Input token - * @return Signed String - */ - private String getSignature(String str) { - try { - MessageDigest md = MessageDigest.getInstance(SHA_STRING); - md.update(str.getBytes()); - md.update(secretBytes); - byte[] digest = md.digest(); - return new Base64(0).encodeToString(digest); - } catch (NoSuchAlgorithmException ex) { - throw new RuntimeException("Invalid SHA digest String: " + SHA_STRING + - " " + ex.getMessage(), ex); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java deleted file mode 100644 index f16863c1b41aa..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; - -/** - * ServiceOperations. - * - */ -public final class ServiceOperations { - private static final Log LOG = LogFactory.getLog(ServiceOperations.class); - - private ServiceOperations() { - } - - /** - * Verify that a service is in a given state. - * @param state the actual state a service is in - * @param expectedState the desired state - * @throws IllegalStateException if the service state is different from - * the desired state - */ - public static void ensureCurrentState(Service.STATE state, - Service.STATE expectedState) { - if (state != expectedState) { - throw new IllegalStateException("For this operation, the " + - "current service state must be " - + expectedState - + " instead of " + state); - } - } - - /** - * Initialize a service. - * - * The service state is checked before the operation begins. - * This process is not thread safe. - * @param service a service that must be in the state - * {@link Service.STATE#NOTINITED} - * @param configuration the configuration to initialize the service with - * @throws RuntimeException on a state change failure - * @throws IllegalStateException if the service is in the wrong state - */ - - public static void init(Service service, HiveConf configuration) { - Service.STATE state = service.getServiceState(); - ensureCurrentState(state, Service.STATE.NOTINITED); - service.init(configuration); - } - - /** - * Start a service. - * - * The service state is checked before the operation begins. - * This process is not thread safe. - * @param service a service that must be in the state - * {@link Service.STATE#INITED} - * @throws RuntimeException on a state change failure - * @throws IllegalStateException if the service is in the wrong state - */ - - public static void start(Service service) { - Service.STATE state = service.getServiceState(); - ensureCurrentState(state, Service.STATE.INITED); - service.start(); - } - - /** - * Initialize then start a service. - * - * The service state is checked before the operation begins. - * This process is not thread safe. - * @param service a service that must be in the state - * {@link Service.STATE#NOTINITED} - * @param configuration the configuration to initialize the service with - * @throws RuntimeException on a state change failure - * @throws IllegalStateException if the service is in the wrong state - */ - public static void deploy(Service service, HiveConf configuration) { - init(service, configuration); - start(service); - } - - /** - * Stop a service. - * - * Do nothing if the service is null or not in a state in which it can be/needs to be stopped. - * - * The service state is checked before the operation begins. - * This process is not thread safe. - * @param service a service or null - */ - public static void stop(Service service) { - if (service != null) { - Service.STATE state = service.getServiceState(); - if (state == Service.STATE.STARTED) { - service.stop(); - } - } - } - - /** - * Stop a service; if it is null do nothing. Exceptions are caught and - * logged at warn level. (but not Throwables). This operation is intended to - * be used in cleanup operations - * - * @param service a service; may be null - * @return any exception that was caught; null if none was. - */ - public static Exception stopQuietly(Service service) { - try { - stop(service); - } catch (Exception e) { - LOG.warn("When stopping the service " + service.getName() - + " : " + e, - e); - return e; - } - return null; - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java deleted file mode 100644 index edb5eff9615bf..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service; - -public class ServiceUtils { - - /* - * Get the index separating the user name from domain name (the user's name up - * to the first '/' or '@'). - * - * @param userName full user name. - * @return index of domain match or -1 if not found - */ - public static int indexOfDomainMatch(String userName) { - if (userName == null) { - return -1; - } - - int idx = userName.indexOf('/'); - int idx2 = userName.indexOf('@'); - int endIdx = Math.min(idx, idx2); // Use the earlier match. - // Unless at least one of '/' or '@' was not found, in - // which case, user the latter match. - if (endIdx == -1) { - endIdx = Math.max(idx, idx2); - } - return endIdx; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java deleted file mode 100644 index 10000f12ab329..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java +++ /dev/null @@ -1,419 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.auth; - -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.net.InetSocketAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; - -import javax.net.ssl.SSLServerSocket; -import javax.security.auth.login.LoginException; -import javax.security.sasl.Sasl; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStore; -import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.DBTokenStore; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.thrift.ThriftCLIService; -import org.apache.thrift.TProcessorFactory; -import org.apache.thrift.transport.TSSLTransportFactory; -import org.apache.thrift.transport.TServerSocket; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.apache.thrift.transport.TTransportFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class helps in some aspects of authentication. It creates the proper Thrift classes for the - * given configuration as well as helps with authenticating requests. - */ -public class HiveAuthFactory { - private static final Logger LOG = LoggerFactory.getLogger(HiveAuthFactory.class); - - - public enum AuthTypes { - NOSASL("NOSASL"), - NONE("NONE"), - LDAP("LDAP"), - KERBEROS("KERBEROS"), - CUSTOM("CUSTOM"), - PAM("PAM"); - - private final String authType; - - AuthTypes(String authType) { - this.authType = authType; - } - - public String getAuthName() { - return authType; - } - - } - - private HadoopThriftAuthBridge.Server saslServer; - private String authTypeStr; - private final String transportMode; - private final HiveConf conf; - - public static final String HS2_PROXY_USER = "hive.server2.proxy.user"; - public static final String HS2_CLIENT_TOKEN = "hiveserver2ClientToken"; - - private static Field keytabFile = null; - private static Method getKeytab = null; - static { - Class clz = UserGroupInformation.class; - try { - keytabFile = clz.getDeclaredField("keytabFile"); - keytabFile.setAccessible(true); - } catch (NoSuchFieldException nfe) { - LOG.debug("Cannot find private field \"keytabFile\" in class: " + - UserGroupInformation.class.getCanonicalName(), nfe); - keytabFile = null; - } - - try { - getKeytab = clz.getDeclaredMethod("getKeytab"); - getKeytab.setAccessible(true); - } catch(NoSuchMethodException nme) { - LOG.debug("Cannot find private method \"getKeytab\" in class:" + - UserGroupInformation.class.getCanonicalName(), nme); - getKeytab = null; - } - } - - public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException { - this.conf = conf; - transportMode = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); - authTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION); - - // In http mode we use NOSASL as the default auth type - if ("http".equalsIgnoreCase(transportMode)) { - if (authTypeStr == null) { - authTypeStr = AuthTypes.NOSASL.getAuthName(); - } - } else { - if (authTypeStr == null) { - authTypeStr = AuthTypes.NONE.getAuthName(); - } - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { - String principal = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); - String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); - if (needUgiLogin(UserGroupInformation.getCurrentUser(), - SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) { - saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab); - } else { - // Using the default constructor to avoid unnecessary UGI login. - saslServer = new HadoopThriftAuthBridge.Server(); - } - - // start delegation token manager - try { - // rawStore is only necessary for DBTokenStore - Object rawStore = null; - String tokenStoreClass = conf.getVar(HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS); - - if (tokenStoreClass.equals(DBTokenStore.class.getName())) { - HMSHandler baseHandler = new HiveMetaStore.HMSHandler( - "new db based metaserver", conf, true); - rawStore = baseHandler.getMS(); - } - - saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2); - } - catch (MetaException|IOException e) { - throw new TTransportException("Failed to start token manager", e); - } - } - } - } - - public Map getSaslProperties() { - Map saslProps = new HashMap(); - SaslQOP saslQOP = SaslQOP.fromString(conf.getVar(ConfVars.HIVE_SERVER2_THRIFT_SASL_QOP)); - saslProps.put(Sasl.QOP, saslQOP.toString()); - saslProps.put(Sasl.SERVER_AUTH, "true"); - return saslProps; - } - - public TTransportFactory getAuthTransFactory() throws LoginException { - TTransportFactory transportFactory; - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { - try { - transportFactory = saslServer.createTransportFactory(getSaslProperties()); - } catch (TTransportException e) { - throw new LoginException(e.getMessage()); - } - } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NONE.getAuthName())) { - transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr); - } else if (authTypeStr.equalsIgnoreCase(AuthTypes.LDAP.getAuthName())) { - transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr); - } else if (authTypeStr.equalsIgnoreCase(AuthTypes.PAM.getAuthName())) { - transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr); - } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NOSASL.getAuthName())) { - transportFactory = new TTransportFactory(); - } else if (authTypeStr.equalsIgnoreCase(AuthTypes.CUSTOM.getAuthName())) { - transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr); - } else { - throw new LoginException("Unsupported authentication type " + authTypeStr); - } - return transportFactory; - } - - /** - * Returns the thrift processor factory for HiveServer2 running in binary mode - * @param service - * @return - * @throws LoginException - */ - public TProcessorFactory getAuthProcFactory(ThriftCLIService service) throws LoginException { - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { - return KerberosSaslHelper.getKerberosProcessorFactory(saslServer, service); - } else { - return PlainSaslHelper.getPlainProcessorFactory(service); - } - } - - public String getRemoteUser() { - return saslServer == null ? null : saslServer.getRemoteUser(); - } - - public String getIpAddress() { - if (saslServer == null || saslServer.getRemoteAddress() == null) { - return null; - } else { - return saslServer.getRemoteAddress().getHostAddress(); - } - } - - // Perform kerberos login using the hadoop shim API if the configuration is available - public static void loginFromKeytab(HiveConf hiveConf) throws IOException { - String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); - String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); - if (principal.isEmpty() || keyTabFile.isEmpty()) { - throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); - } else { - UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); - } - } - - // Perform SPNEGO login using the hadoop shim API if the configuration is available - public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf) - throws IOException { - String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL); - String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB); - if (principal.isEmpty() || keyTabFile.isEmpty()) { - throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); - } else { - return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); - } - } - - public static TTransport getSocketTransport(String host, int port, int loginTimeout) { - return new TSocket(host, port, loginTimeout); - } - - public static TTransport getSSLSocket(String host, int port, int loginTimeout) - throws TTransportException { - return TSSLTransportFactory.getClientSocket(host, port, loginTimeout); - } - - public static TTransport getSSLSocket(String host, int port, int loginTimeout, - String trustStorePath, String trustStorePassWord) throws TTransportException { - TSSLTransportFactory.TSSLTransportParameters params = - new TSSLTransportFactory.TSSLTransportParameters(); - params.setTrustStore(trustStorePath, trustStorePassWord); - params.requireClientAuth(true); - return TSSLTransportFactory.getClientSocket(host, port, loginTimeout, params); - } - - public static TServerSocket getServerSocket(String hiveHost, int portNum) - throws TTransportException { - InetSocketAddress serverAddress; - if (hiveHost == null || hiveHost.isEmpty()) { - // Wildcard bind - serverAddress = new InetSocketAddress(portNum); - } else { - serverAddress = new InetSocketAddress(hiveHost, portNum); - } - return new TServerSocket(serverAddress); - } - - public static TServerSocket getServerSSLSocket(String hiveHost, int portNum, String keyStorePath, - String keyStorePassWord, List sslVersionBlacklist) throws TTransportException, - UnknownHostException { - TSSLTransportFactory.TSSLTransportParameters params = - new TSSLTransportFactory.TSSLTransportParameters(); - params.setKeyStore(keyStorePath, keyStorePassWord); - InetSocketAddress serverAddress; - if (hiveHost == null || hiveHost.isEmpty()) { - // Wildcard bind - serverAddress = new InetSocketAddress(portNum); - } else { - serverAddress = new InetSocketAddress(hiveHost, portNum); - } - TServerSocket thriftServerSocket = - TSSLTransportFactory.getServerSocket(portNum, 0, serverAddress.getAddress(), params); - if (thriftServerSocket.getServerSocket() instanceof SSLServerSocket) { - List sslVersionBlacklistLocal = new ArrayList(); - for (String sslVersion : sslVersionBlacklist) { - sslVersionBlacklistLocal.add(sslVersion.trim().toLowerCase(Locale.ROOT)); - } - SSLServerSocket sslServerSocket = (SSLServerSocket) thriftServerSocket.getServerSocket(); - List enabledProtocols = new ArrayList(); - for (String protocol : sslServerSocket.getEnabledProtocols()) { - if (sslVersionBlacklistLocal.contains(protocol.toLowerCase(Locale.ROOT))) { - LOG.debug("Disabling SSL Protocol: " + protocol); - } else { - enabledProtocols.add(protocol); - } - } - sslServerSocket.setEnabledProtocols(enabledProtocols.toArray(new String[0])); - LOG.info("SSL Server Socket Enabled Protocols: " - + Arrays.toString(sslServerSocket.getEnabledProtocols())); - } - return thriftServerSocket; - } - - // retrieve delegation token for the given user - public String getDelegationToken(String owner, String renewer) throws HiveSQLException { - if (saslServer == null) { - throw new HiveSQLException( - "Delegation token only supported over kerberos authentication", "08S01"); - } - - try { - String tokenStr = saslServer.getDelegationTokenWithService(owner, renewer, HS2_CLIENT_TOKEN); - if (tokenStr == null || tokenStr.isEmpty()) { - throw new HiveSQLException( - "Received empty retrieving delegation token for user " + owner, "08S01"); - } - return tokenStr; - } catch (IOException e) { - throw new HiveSQLException( - "Error retrieving delegation token for user " + owner, "08S01", e); - } catch (InterruptedException e) { - throw new HiveSQLException("delegation token retrieval interrupted", "08S01", e); - } - } - - // cancel given delegation token - public void cancelDelegationToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { - throw new HiveSQLException( - "Delegation token only supported over kerberos authentication", "08S01"); - } - try { - saslServer.cancelDelegationToken(delegationToken); - } catch (IOException e) { - throw new HiveSQLException( - "Error canceling delegation token " + delegationToken, "08S01", e); - } - } - - public void renewDelegationToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { - throw new HiveSQLException( - "Delegation token only supported over kerberos authentication", "08S01"); - } - try { - saslServer.renewDelegationToken(delegationToken); - } catch (IOException e) { - throw new HiveSQLException( - "Error renewing delegation token " + delegationToken, "08S01", e); - } - } - - public String getUserFromToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { - throw new HiveSQLException( - "Delegation token only supported over kerberos authentication", "08S01"); - } - try { - return saslServer.getUserFromToken(delegationToken); - } catch (IOException e) { - throw new HiveSQLException( - "Error extracting user from delegation token " + delegationToken, "08S01", e); - } - } - - public static void verifyProxyAccess(String realUser, String proxyUser, String ipAddress, - HiveConf hiveConf) throws HiveSQLException { - try { - UserGroupInformation sessionUgi; - if (UserGroupInformation.isSecurityEnabled()) { - KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser); - sessionUgi = UserGroupInformation.createProxyUser( - kerbName.getServiceName(), UserGroupInformation.getLoginUser()); - } else { - sessionUgi = UserGroupInformation.createRemoteUser(realUser); - } - if (!proxyUser.equalsIgnoreCase(realUser)) { - ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf); - ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi), - ipAddress, hiveConf); - } - } catch (IOException e) { - throw new HiveSQLException( - "Failed to validate proxy privilege of " + realUser + " for " + proxyUser, "08S01", e); - } - } - - public static boolean needUgiLogin(UserGroupInformation ugi, String principal, String keytab) { - return null == ugi || !ugi.hasKerberosCredentials() || !ugi.getUserName().equals(principal) || - !Objects.equals(keytab, getKeytabFromUgi()); - } - - private static String getKeytabFromUgi() { - synchronized (UserGroupInformation.class) { - try { - if (keytabFile != null) { - return (String) keytabFile.get(null); - } else if (getKeytab != null) { - return (String) getKeytab.invoke(UserGroupInformation.getCurrentUser()); - } else { - return null; - } - } catch (Exception e) { - LOG.debug("Fail to get keytabFile path via reflection", e); - return null; - } - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java deleted file mode 100644 index f7375ee707830..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java +++ /dev/null @@ -1,189 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.auth; - -import java.security.AccessControlContext; -import java.security.AccessController; -import java.security.PrivilegedExceptionAction; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.StringTokenizer; - -import javax.security.auth.Subject; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.http.protocol.BasicHttpContext; -import org.apache.http.protocol.HttpContext; -import org.ietf.jgss.GSSContext; -import org.ietf.jgss.GSSManager; -import org.ietf.jgss.GSSName; -import org.ietf.jgss.Oid; - -/** - * Utility functions for HTTP mode authentication. - */ -public final class HttpAuthUtils { - public static final String WWW_AUTHENTICATE = "WWW-Authenticate"; - public static final String AUTHORIZATION = "Authorization"; - public static final String BASIC = "Basic"; - public static final String NEGOTIATE = "Negotiate"; - private static final Log LOG = LogFactory.getLog(HttpAuthUtils.class); - private static final String COOKIE_ATTR_SEPARATOR = "&"; - private static final String COOKIE_CLIENT_USER_NAME = "cu"; - private static final String COOKIE_CLIENT_RAND_NUMBER = "rn"; - private static final String COOKIE_KEY_VALUE_SEPARATOR = "="; - private static final Set COOKIE_ATTRIBUTES = - new HashSet(Arrays.asList(COOKIE_CLIENT_USER_NAME, COOKIE_CLIENT_RAND_NUMBER)); - - /** - * @return Stringified Base64 encoded kerberosAuthHeader on success - * @throws Exception - */ - public static String getKerberosServiceTicket(String principal, String host, - String serverHttpUrl, boolean assumeSubject) throws Exception { - String serverPrincipal = - ShimLoader.getHadoopThriftAuthBridge().getServerPrincipal(principal, host); - if (assumeSubject) { - // With this option, we're assuming that the external application, - // using the JDBC driver has done a JAAS kerberos login already - AccessControlContext context = AccessController.getContext(); - Subject subject = Subject.getSubject(context); - if (subject == null) { - throw new Exception("The Subject is not set"); - } - return Subject.doAs(subject, new HttpKerberosClientAction(serverPrincipal, serverHttpUrl)); - } else { - // JAAS login from ticket cache to setup the client UserGroupInformation - UserGroupInformation clientUGI = - ShimLoader.getHadoopThriftAuthBridge().getCurrentUGIWithConf("kerberos"); - return clientUGI.doAs(new HttpKerberosClientAction(serverPrincipal, serverHttpUrl)); - } - } - - /** - * Creates and returns a HS2 cookie token. - * @param clientUserName Client User name. - * @return An unsigned cookie token generated from input parameters. - * The final cookie generated is of the following format : - * {@code cu=&rn=&s=} - */ - public static String createCookieToken(String clientUserName) { - StringBuffer sb = new StringBuffer(); - sb.append(COOKIE_CLIENT_USER_NAME).append(COOKIE_KEY_VALUE_SEPARATOR).append(clientUserName) - .append(COOKIE_ATTR_SEPARATOR); - sb.append(COOKIE_CLIENT_RAND_NUMBER).append(COOKIE_KEY_VALUE_SEPARATOR) - .append((new Random(System.currentTimeMillis())).nextLong()); - return sb.toString(); - } - - /** - * Parses a cookie token to retrieve client user name. - * @param tokenStr Token String. - * @return A valid user name if input is of valid format, else returns null. - */ - public static String getUserNameFromCookieToken(String tokenStr) { - Map map = splitCookieToken(tokenStr); - - if (!map.keySet().equals(COOKIE_ATTRIBUTES)) { - LOG.error("Invalid token with missing attributes " + tokenStr); - return null; - } - return map.get(COOKIE_CLIENT_USER_NAME); - } - - /** - * Splits the cookie token into attributes pairs. - * @param str input token. - * @return a map with the attribute pairs of the token if the input is valid. - * Else, returns null. - */ - private static Map splitCookieToken(String tokenStr) { - Map map = new HashMap(); - StringTokenizer st = new StringTokenizer(tokenStr, COOKIE_ATTR_SEPARATOR); - - while (st.hasMoreTokens()) { - String part = st.nextToken(); - int separator = part.indexOf(COOKIE_KEY_VALUE_SEPARATOR); - if (separator == -1) { - LOG.error("Invalid token string " + tokenStr); - return null; - } - String key = part.substring(0, separator); - String value = part.substring(separator + 1); - map.put(key, value); - } - return map; - } - - - private HttpAuthUtils() { - throw new UnsupportedOperationException("Can't initialize class"); - } - - /** - * We'll create an instance of this class within a doAs block so that the client's TGT credentials - * can be read from the Subject - */ - public static class HttpKerberosClientAction implements PrivilegedExceptionAction { - public static final String HTTP_RESPONSE = "HTTP_RESPONSE"; - public static final String SERVER_HTTP_URL = "SERVER_HTTP_URL"; - private final String serverPrincipal; - private final String serverHttpUrl; - private final Base64 base64codec; - private final HttpContext httpContext; - - public HttpKerberosClientAction(String serverPrincipal, String serverHttpUrl) { - this.serverPrincipal = serverPrincipal; - this.serverHttpUrl = serverHttpUrl; - base64codec = new Base64(0); - httpContext = new BasicHttpContext(); - httpContext.setAttribute(SERVER_HTTP_URL, serverHttpUrl); - } - - @Override - public String run() throws Exception { - // This Oid for Kerberos GSS-API mechanism. - Oid mechOid = new Oid("1.2.840.113554.1.2.2"); - // Oid for kerberos principal name - Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1"); - GSSManager manager = GSSManager.getInstance(); - // GSS name for server - GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid); - // Create a GSSContext for authentication with the service. - // We're passing client credentials as null since we want them to be read from the Subject. - GSSContext gssContext = - manager.createContext(serverName, mechOid, null, GSSContext.DEFAULT_LIFETIME); - gssContext.requestMutualAuth(false); - // Establish context - byte[] inToken = new byte[0]; - byte[] outToken = gssContext.initSecContext(inToken, 0, inToken.length); - gssContext.dispose(); - // Base64 encoded and stringified token for server - return new String(base64codec.encode(outToken)); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java deleted file mode 100644 index 52eb752f1e026..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.auth; - -import java.io.IOException; -import java.util.Map; -import javax.security.sasl.SaslException; - -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server; -import org.apache.hive.service.cli.thrift.TCLIService; -import org.apache.hive.service.cli.thrift.TCLIService.Iface; -import org.apache.hive.service.cli.thrift.ThriftCLIService; -import org.apache.thrift.TProcessor; -import org.apache.thrift.TProcessorFactory; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TTransport; - -public final class KerberosSaslHelper { - - public static TProcessorFactory getKerberosProcessorFactory(Server saslServer, - ThriftCLIService service) { - return new CLIServiceProcessorFactory(saslServer, service); - } - - public static TTransport getKerberosTransport(String principal, String host, - TTransport underlyingTransport, Map saslProps, boolean assumeSubject) - throws SaslException { - try { - String[] names = principal.split("[/@]"); - if (names.length != 3) { - throw new IllegalArgumentException("Kerberos principal should have 3 parts: " + principal); - } - - if (assumeSubject) { - return createSubjectAssumedTransport(principal, underlyingTransport, saslProps); - } else { - HadoopThriftAuthBridge.Client authBridge = - ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos"); - return authBridge.createClientTransport(principal, host, "KERBEROS", null, - underlyingTransport, saslProps); - } - } catch (IOException e) { - throw new SaslException("Failed to open client transport", e); - } - } - - public static TTransport createSubjectAssumedTransport(String principal, - TTransport underlyingTransport, Map saslProps) throws IOException { - String[] names = principal.split("[/@]"); - try { - TTransport saslTransport = - new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null, - underlyingTransport); - return new TSubjectAssumingTransport(saslTransport); - } catch (SaslException se) { - throw new IOException("Could not instantiate SASL transport", se); - } - } - - public static TTransport getTokenTransport(String tokenStr, String host, - TTransport underlyingTransport, Map saslProps) throws SaslException { - HadoopThriftAuthBridge.Client authBridge = - ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos"); - - try { - return authBridge.createClientTransport(null, host, "DIGEST", tokenStr, underlyingTransport, - saslProps); - } catch (IOException e) { - throw new SaslException("Failed to open client transport", e); - } - } - - private KerberosSaslHelper() { - throw new UnsupportedOperationException("Can't initialize class"); - } - - private static class CLIServiceProcessorFactory extends TProcessorFactory { - - private final ThriftCLIService service; - private final Server saslServer; - - CLIServiceProcessorFactory(Server saslServer, ThriftCLIService service) { - super(null); - this.service = service; - this.saslServer = saslServer; - } - - @Override - public TProcessor getProcessor(TTransport trans) { - TProcessor sqlProcessor = new TCLIService.Processor(service); - return saslServer.wrapNonAssumingProcessor(sqlProcessor); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java deleted file mode 100644 index afc144199f1e8..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.auth; - -import java.io.IOException; -import java.security.Security; -import java.util.HashMap; -import javax.security.auth.callback.Callback; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.callback.NameCallback; -import javax.security.auth.callback.PasswordCallback; -import javax.security.auth.callback.UnsupportedCallbackException; -import javax.security.auth.login.LoginException; -import javax.security.sasl.AuthenticationException; -import javax.security.sasl.AuthorizeCallback; -import javax.security.sasl.SaslException; - -import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods; -import org.apache.hive.service.auth.PlainSaslServer.SaslPlainProvider; -import org.apache.hive.service.cli.thrift.TCLIService.Iface; -import org.apache.hive.service.cli.thrift.ThriftCLIService; -import org.apache.thrift.TProcessor; -import org.apache.thrift.TProcessorFactory; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TSaslServerTransport; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportFactory; - -public final class PlainSaslHelper { - - public static TProcessorFactory getPlainProcessorFactory(ThriftCLIService service) { - return new SQLPlainProcessorFactory(service); - } - - // Register Plain SASL server provider - static { - Security.addProvider(new SaslPlainProvider()); - } - - public static TTransportFactory getPlainTransportFactory(String authTypeStr) - throws LoginException { - TSaslServerTransport.Factory saslFactory = new TSaslServerTransport.Factory(); - try { - saslFactory.addServerDefinition("PLAIN", authTypeStr, null, new HashMap(), - new PlainServerCallbackHandler(authTypeStr)); - } catch (AuthenticationException e) { - throw new LoginException("Error setting callback handler" + e); - } - return saslFactory; - } - - public static TTransport getPlainTransport(String username, String password, - TTransport underlyingTransport) throws SaslException { - return new TSaslClientTransport("PLAIN", null, null, null, new HashMap(), - new PlainCallbackHandler(username, password), underlyingTransport); - } - - private PlainSaslHelper() { - throw new UnsupportedOperationException("Can't initialize class"); - } - - private static final class PlainServerCallbackHandler implements CallbackHandler { - - private final AuthMethods authMethod; - - PlainServerCallbackHandler(String authMethodStr) throws AuthenticationException { - authMethod = AuthMethods.getValidAuthMethod(authMethodStr); - } - - @Override - public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException { - String username = null; - String password = null; - AuthorizeCallback ac = null; - - for (Callback callback : callbacks) { - if (callback instanceof NameCallback) { - NameCallback nc = (NameCallback) callback; - username = nc.getName(); - } else if (callback instanceof PasswordCallback) { - PasswordCallback pc = (PasswordCallback) callback; - password = new String(pc.getPassword()); - } else if (callback instanceof AuthorizeCallback) { - ac = (AuthorizeCallback) callback; - } else { - throw new UnsupportedCallbackException(callback); - } - } - PasswdAuthenticationProvider provider = - AuthenticationProviderFactory.getAuthenticationProvider(authMethod); - provider.Authenticate(username, password); - if (ac != null) { - ac.setAuthorized(true); - } - } - } - - public static class PlainCallbackHandler implements CallbackHandler { - - private final String username; - private final String password; - - public PlainCallbackHandler(String username, String password) { - this.username = username; - this.password = password; - } - - @Override - public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException { - for (Callback callback : callbacks) { - if (callback instanceof NameCallback) { - NameCallback nameCallback = (NameCallback) callback; - nameCallback.setName(username); - } else if (callback instanceof PasswordCallback) { - PasswordCallback passCallback = (PasswordCallback) callback; - passCallback.setPassword(password.toCharArray()); - } else { - throw new UnsupportedCallbackException(callback); - } - } - } - } - - private static final class SQLPlainProcessorFactory extends TProcessorFactory { - - private final ThriftCLIService service; - - SQLPlainProcessorFactory(ThriftCLIService service) { - super(null); - this.service = service; - } - - @Override - public TProcessor getProcessor(TTransport trans) { - return new TSetIpAddressProcessor(service); - } - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java deleted file mode 100644 index 9a61ad49942c8..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.auth; - -import org.apache.hive.service.cli.thrift.TCLIService; -import org.apache.hive.service.cli.thrift.TCLIService.Iface; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TSaslServerTransport; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class is responsible for setting the ipAddress for operations executed via HiveServer2. - * - * - IP address is only set for operations that calls listeners with hookContext - * - IP address is only set if the underlying transport mechanism is socket - * - * @see org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext - */ -public class TSetIpAddressProcessor extends TCLIService.Processor { - - private static final Logger LOGGER = LoggerFactory.getLogger(TSetIpAddressProcessor.class.getName()); - - public TSetIpAddressProcessor(Iface iface) { - super(iface); - } - - @Override - public boolean process(final TProtocol in, final TProtocol out) throws TException { - setIpAddress(in); - setUserName(in); - try { - return super.process(in, out); - } finally { - THREAD_LOCAL_USER_NAME.remove(); - THREAD_LOCAL_IP_ADDRESS.remove(); - } - } - - private void setUserName(final TProtocol in) { - TTransport transport = in.getTransport(); - if (transport instanceof TSaslServerTransport) { - String userName = ((TSaslServerTransport) transport).getSaslServer().getAuthorizationID(); - THREAD_LOCAL_USER_NAME.set(userName); - } - } - - protected void setIpAddress(final TProtocol in) { - TTransport transport = in.getTransport(); - TSocket tSocket = getUnderlyingSocketFromTransport(transport); - if (tSocket == null) { - LOGGER.warn("Unknown Transport, cannot determine ipAddress"); - } else { - THREAD_LOCAL_IP_ADDRESS.set(tSocket.getSocket().getInetAddress().getHostAddress()); - } - } - - private TSocket getUnderlyingSocketFromTransport(TTransport transport) { - while (transport != null) { - if (transport instanceof TSaslServerTransport) { - transport = ((TSaslServerTransport) transport).getUnderlyingTransport(); - } - if (transport instanceof TSaslClientTransport) { - transport = ((TSaslClientTransport) transport).getUnderlyingTransport(); - } - if (transport instanceof TSocket) { - return (TSocket) transport; - } - } - return null; - } - - private static final ThreadLocal THREAD_LOCAL_IP_ADDRESS = new ThreadLocal() { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - private static final ThreadLocal THREAD_LOCAL_USER_NAME = new ThreadLocal() { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - public static String getUserIpAddress() { - return THREAD_LOCAL_IP_ADDRESS.get(); - } - - public static String getUserName() { - return THREAD_LOCAL_USER_NAME.get(); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java deleted file mode 100644 index 791ddcbd2c5b6..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java +++ /dev/null @@ -1,507 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import javax.security.auth.login.LoginException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.service.CompositeService; -import org.apache.hive.service.ServiceException; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.operation.Operation; -import org.apache.hive.service.cli.session.SessionManager; -import org.apache.hive.service.cli.thrift.TProtocolVersion; -import org.apache.hive.service.server.HiveServer2; - -/** - * CLIService. - * - */ -public class CLIService extends CompositeService implements ICLIService { - - public static final TProtocolVersion SERVER_VERSION; - - static { - TProtocolVersion[] protocols = TProtocolVersion.values(); - SERVER_VERSION = protocols[protocols.length - 1]; - } - - private final Log LOG = LogFactory.getLog(CLIService.class.getName()); - - private HiveConf hiveConf; - private SessionManager sessionManager; - private UserGroupInformation serviceUGI; - private UserGroupInformation httpUGI; - // The HiveServer2 instance running this service - private final HiveServer2 hiveServer2; - - public CLIService(HiveServer2 hiveServer2) { - super(CLIService.class.getSimpleName()); - this.hiveServer2 = hiveServer2; - } - - @Override - public synchronized void init(HiveConf hiveConf) { - this.hiveConf = hiveConf; - sessionManager = new SessionManager(hiveServer2); - addService(sessionManager); - // If the hadoop cluster is secure, do a kerberos login for the service from the keytab - if (UserGroupInformation.isSecurityEnabled()) { - try { - HiveAuthFactory.loginFromKeytab(hiveConf); - this.serviceUGI = Utils.getUGI(); - } catch (IOException e) { - throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); - } catch (LoginException e) { - throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); - } - - // Also try creating a UGI object for the SPNego principal - String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL); - String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB); - if (principal.isEmpty() || keyTabFile.isEmpty()) { - LOG.info("SPNego httpUGI not created, spNegoPrincipal: " + principal + - ", ketabFile: " + keyTabFile); - } else { - try { - this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf); - LOG.info("SPNego httpUGI successfully created."); - } catch (IOException e) { - LOG.warn("SPNego httpUGI creation failed: ", e); - } - } - } - // creates connection to HMS and thus *must* occur after kerberos login above - try { - applyAuthorizationConfigPolicy(hiveConf); - } catch (Exception e) { - throw new RuntimeException("Error applying authorization policy on hive configuration: " - + e.getMessage(), e); - } - setupBlockedUdfs(); - super.init(hiveConf); - } - - private void applyAuthorizationConfigPolicy(HiveConf newHiveConf) throws HiveException, - MetaException { - // authorization setup using SessionState should be revisited eventually, as - // authorization and authentication are not session specific settings - SessionState ss = new SessionState(newHiveConf); - ss.setIsHiveServerQuery(true); - SessionState.start(ss); - ss.applyAuthorizationPolicy(); - } - - private void setupBlockedUdfs() { - FunctionRegistry.setupPermissionsForBuiltinUDFs( - hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST), - hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST)); - } - - public UserGroupInformation getServiceUGI() { - return this.serviceUGI; - } - - public UserGroupInformation getHttpUGI() { - return this.httpUGI; - } - - @Override - public synchronized void start() { - super.start(); - // Initialize and test a connection to the metastore - IMetaStoreClient metastoreClient = null; - try { - metastoreClient = new HiveMetaStoreClient(hiveConf); - metastoreClient.getDatabases("default"); - } catch (Exception e) { - throw new ServiceException("Unable to connect to MetaStore!", e); - } - finally { - if (metastoreClient != null) { - metastoreClient.close(); - } - } - } - - @Override - public synchronized void stop() { - super.stop(); - } - - /** - * @deprecated Use {@link #openSession(TProtocolVersion, String, String, String, Map)} - */ - @Deprecated - public SessionHandle openSession(TProtocolVersion protocol, String username, String password, - Map configuration) throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration, false, null); - LOG.debug(sessionHandle + ": openSession()"); - return sessionHandle; - } - - /** - * @deprecated Use {@link #openSessionWithImpersonation(TProtocolVersion, String, String, String, Map, String)} - */ - @Deprecated - public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username, - String password, Map configuration, String delegationToken) - throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration, - true, delegationToken); - LOG.debug(sessionHandle + ": openSessionWithImpersonation()"); - return sessionHandle; - } - - public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress, - Map configuration) throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration, false, null); - LOG.debug(sessionHandle + ": openSession()"); - return sessionHandle; - } - - public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username, - String password, String ipAddress, Map configuration, String delegationToken) - throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration, - true, delegationToken); - LOG.debug(sessionHandle + ": openSession()"); - return sessionHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map) - */ - @Override - public SessionHandle openSession(String username, String password, Map configuration) - throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration, false, null); - LOG.debug(sessionHandle + ": openSession()"); - return sessionHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map) - */ - @Override - public SessionHandle openSessionWithImpersonation(String username, String password, Map configuration, - String delegationToken) throws HiveSQLException { - SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration, - true, delegationToken); - LOG.debug(sessionHandle + ": openSession()"); - return sessionHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public void closeSession(SessionHandle sessionHandle) - throws HiveSQLException { - sessionManager.closeSession(sessionHandle); - LOG.debug(sessionHandle + ": closeSession()"); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List) - */ - @Override - public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType) - throws HiveSQLException { - GetInfoValue infoValue = sessionManager.getSession(sessionHandle) - .getInfo(getInfoType); - LOG.debug(sessionHandle + ": getInfo()"); - return infoValue; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle, - * java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatement(SessionHandle sessionHandle, String statement, - Map confOverlay) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .executeStatement(statement, confOverlay); - LOG.debug(sessionHandle + ": executeStatement()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle, - * java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement, - Map confOverlay) throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .executeStatementAsync(statement, confOverlay); - LOG.debug(sessionHandle + ": executeStatementAsync()"); - return opHandle; - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTypeInfo(SessionHandle sessionHandle) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getTypeInfo(); - LOG.debug(sessionHandle + ": getTypeInfo()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getCatalogs(SessionHandle sessionHandle) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getCatalogs(); - LOG.debug(sessionHandle + ": getCatalogs()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String) - */ - @Override - public OperationHandle getSchemas(SessionHandle sessionHandle, - String catalogName, String schemaName) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getSchemas(catalogName, schemaName); - LOG.debug(sessionHandle + ": getSchemas()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List) - */ - @Override - public OperationHandle getTables(SessionHandle sessionHandle, - String catalogName, String schemaName, String tableName, List tableTypes) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getTables(catalogName, schemaName, tableName, tableTypes); - LOG.debug(sessionHandle + ": getTables()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTableTypes(SessionHandle sessionHandle) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getTableTypes(); - LOG.debug(sessionHandle + ": getTableTypes()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getColumns(SessionHandle sessionHandle, - String catalogName, String schemaName, String tableName, String columnName) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getColumns(catalogName, schemaName, tableName, columnName); - LOG.debug(sessionHandle + ": getColumns()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getFunctions(SessionHandle sessionHandle, - String catalogName, String schemaName, String functionName) - throws HiveSQLException { - OperationHandle opHandle = sessionManager.getSession(sessionHandle) - .getFunctions(catalogName, schemaName, functionName); - LOG.debug(sessionHandle + ": getFunctions()"); - return opHandle; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public OperationStatus getOperationStatus(OperationHandle opHandle) - throws HiveSQLException { - Operation operation = sessionManager.getOperationManager().getOperation(opHandle); - /** - * If this is a background operation run asynchronously, - * we block for a configured duration, before we return - * (duration: HIVE_SERVER2_LONG_POLLING_TIMEOUT). - * However, if the background operation is complete, we return immediately. - */ - if (operation.shouldRunAsync()) { - HiveConf conf = operation.getParentSession().getHiveConf(); - long timeout = HiveConf.getTimeVar(conf, - HiveConf.ConfVars.HIVE_SERVER2_LONG_POLLING_TIMEOUT, TimeUnit.MILLISECONDS); - try { - operation.getBackgroundHandle().get(timeout, TimeUnit.MILLISECONDS); - } catch (TimeoutException e) { - // No Op, return to the caller since long polling timeout has expired - LOG.trace(opHandle + ": Long polling timed out"); - } catch (CancellationException e) { - // The background operation thread was cancelled - LOG.trace(opHandle + ": The background operation was cancelled", e); - } catch (ExecutionException e) { - // The background operation thread was aborted - LOG.warn(opHandle + ": The background operation was aborted", e); - } catch (InterruptedException e) { - // No op, this thread was interrupted - // In this case, the call might return sooner than long polling timeout - } - } - OperationStatus opStatus = operation.getStatus(); - LOG.debug(opHandle + ": getOperationStatus()"); - return opStatus; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void cancelOperation(OperationHandle opHandle) - throws HiveSQLException { - sessionManager.getOperationManager().getOperation(opHandle) - .getParentSession().cancelOperation(opHandle); - LOG.debug(opHandle + ": cancelOperation()"); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void closeOperation(OperationHandle opHandle) - throws HiveSQLException { - sessionManager.getOperationManager().getOperation(opHandle) - .getParentSession().closeOperation(opHandle); - LOG.debug(opHandle + ": closeOperation"); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public TableSchema getResultSetMetadata(OperationHandle opHandle) - throws HiveSQLException { - TableSchema tableSchema = sessionManager.getOperationManager() - .getOperation(opHandle).getParentSession().getResultSetMetadata(opHandle); - LOG.debug(opHandle + ": getResultSetMetadata()"); - return tableSchema; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public RowSet fetchResults(OperationHandle opHandle) - throws HiveSQLException { - return fetchResults(opHandle, Operation.DEFAULT_FETCH_ORIENTATION, - Operation.DEFAULT_FETCH_MAX_ROWS, FetchType.QUERY_OUTPUT); - } - - @Override - public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, - long maxRows, FetchType fetchType) throws HiveSQLException { - RowSet rowSet = sessionManager.getOperationManager().getOperation(opHandle) - .getParentSession().fetchResults(opHandle, orientation, maxRows, fetchType); - LOG.debug(opHandle + ": fetchResults()"); - return rowSet; - } - - // obtain delegation token for the give user from metastore - public synchronized String getDelegationTokenFromMetaStore(String owner) - throws HiveSQLException, UnsupportedOperationException, LoginException, IOException { - if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL) || - !hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) { - throw new UnsupportedOperationException( - "delegation token is can only be obtained for a secure remote metastore"); - } - - try { - Hive.closeCurrent(); - return Hive.get(hiveConf).getDelegationToken(owner, owner); - } catch (HiveException e) { - if (e.getCause() instanceof UnsupportedOperationException) { - throw (UnsupportedOperationException)e.getCause(); - } else { - throw new HiveSQLException("Error connect metastore to setup impersonation", e); - } - } - } - - @Override - public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String owner, String renewer) throws HiveSQLException { - String delegationToken = sessionManager.getSession(sessionHandle) - .getDelegationToken(authFactory, owner, renewer); - LOG.info(sessionHandle + ": getDelegationToken()"); - return delegationToken; - } - - @Override - public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - sessionManager.getSession(sessionHandle).cancelDelegationToken(authFactory, tokenStr); - LOG.info(sessionHandle + ": cancelDelegationToken()"); - } - - @Override - public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - sessionManager.getSession(sessionHandle).renewDelegationToken(authFactory, tokenStr); - LOG.info(sessionHandle + ": renewDelegationToken()"); - } - - public SessionManager getSessionManager() { - return sessionManager; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java deleted file mode 100644 index 26d0f718f383a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java +++ /dev/null @@ -1,423 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.nio.ByteBuffer; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.List; - -import com.google.common.primitives.Booleans; -import com.google.common.primitives.Bytes; -import com.google.common.primitives.Doubles; -import com.google.common.primitives.Ints; -import com.google.common.primitives.Longs; -import com.google.common.primitives.Shorts; -import org.apache.hive.service.cli.thrift.TBinaryColumn; -import org.apache.hive.service.cli.thrift.TBoolColumn; -import org.apache.hive.service.cli.thrift.TByteColumn; -import org.apache.hive.service.cli.thrift.TColumn; -import org.apache.hive.service.cli.thrift.TDoubleColumn; -import org.apache.hive.service.cli.thrift.TI16Column; -import org.apache.hive.service.cli.thrift.TI32Column; -import org.apache.hive.service.cli.thrift.TI64Column; -import org.apache.hive.service.cli.thrift.TStringColumn; - -/** - * Column. - */ -public class Column extends AbstractList { - - private static final int DEFAULT_SIZE = 100; - - private final Type type; - - private BitSet nulls; - - private int size; - private boolean[] boolVars; - private byte[] byteVars; - private short[] shortVars; - private int[] intVars; - private long[] longVars; - private double[] doubleVars; - private List stringVars; - private List binaryVars; - - public Column(Type type, BitSet nulls, Object values) { - this.type = type; - this.nulls = nulls; - if (type == Type.BOOLEAN_TYPE) { - boolVars = (boolean[]) values; - size = boolVars.length; - } else if (type == Type.TINYINT_TYPE) { - byteVars = (byte[]) values; - size = byteVars.length; - } else if (type == Type.SMALLINT_TYPE) { - shortVars = (short[]) values; - size = shortVars.length; - } else if (type == Type.INT_TYPE) { - intVars = (int[]) values; - size = intVars.length; - } else if (type == Type.BIGINT_TYPE) { - longVars = (long[]) values; - size = longVars.length; - } else if (type == Type.DOUBLE_TYPE) { - doubleVars = (double[]) values; - size = doubleVars.length; - } else if (type == Type.BINARY_TYPE) { - binaryVars = (List) values; - size = binaryVars.size(); - } else if (type == Type.STRING_TYPE) { - stringVars = (List) values; - size = stringVars.size(); - } else { - throw new IllegalStateException("invalid union object"); - } - } - - public Column(Type type) { - nulls = new BitSet(); - switch (type) { - case BOOLEAN_TYPE: - boolVars = new boolean[DEFAULT_SIZE]; - break; - case TINYINT_TYPE: - byteVars = new byte[DEFAULT_SIZE]; - break; - case SMALLINT_TYPE: - shortVars = new short[DEFAULT_SIZE]; - break; - case INT_TYPE: - intVars = new int[DEFAULT_SIZE]; - break; - case BIGINT_TYPE: - longVars = new long[DEFAULT_SIZE]; - break; - case FLOAT_TYPE: - case DOUBLE_TYPE: - type = Type.DOUBLE_TYPE; - doubleVars = new double[DEFAULT_SIZE]; - break; - case BINARY_TYPE: - binaryVars = new ArrayList(); - break; - default: - type = Type.STRING_TYPE; - stringVars = new ArrayList(); - } - this.type = type; - } - - public Column(TColumn colValues) { - if (colValues.isSetBoolVal()) { - type = Type.BOOLEAN_TYPE; - nulls = toBitset(colValues.getBoolVal().getNulls()); - boolVars = Booleans.toArray(colValues.getBoolVal().getValues()); - size = boolVars.length; - } else if (colValues.isSetByteVal()) { - type = Type.TINYINT_TYPE; - nulls = toBitset(colValues.getByteVal().getNulls()); - byteVars = Bytes.toArray(colValues.getByteVal().getValues()); - size = byteVars.length; - } else if (colValues.isSetI16Val()) { - type = Type.SMALLINT_TYPE; - nulls = toBitset(colValues.getI16Val().getNulls()); - shortVars = Shorts.toArray(colValues.getI16Val().getValues()); - size = shortVars.length; - } else if (colValues.isSetI32Val()) { - type = Type.INT_TYPE; - nulls = toBitset(colValues.getI32Val().getNulls()); - intVars = Ints.toArray(colValues.getI32Val().getValues()); - size = intVars.length; - } else if (colValues.isSetI64Val()) { - type = Type.BIGINT_TYPE; - nulls = toBitset(colValues.getI64Val().getNulls()); - longVars = Longs.toArray(colValues.getI64Val().getValues()); - size = longVars.length; - } else if (colValues.isSetDoubleVal()) { - type = Type.DOUBLE_TYPE; - nulls = toBitset(colValues.getDoubleVal().getNulls()); - doubleVars = Doubles.toArray(colValues.getDoubleVal().getValues()); - size = doubleVars.length; - } else if (colValues.isSetBinaryVal()) { - type = Type.BINARY_TYPE; - nulls = toBitset(colValues.getBinaryVal().getNulls()); - binaryVars = colValues.getBinaryVal().getValues(); - size = binaryVars.size(); - } else if (colValues.isSetStringVal()) { - type = Type.STRING_TYPE; - nulls = toBitset(colValues.getStringVal().getNulls()); - stringVars = colValues.getStringVal().getValues(); - size = stringVars.size(); - } else { - throw new IllegalStateException("invalid union object"); - } - } - - public Column extractSubset(int start, int end) { - BitSet subNulls = nulls.get(start, end); - if (type == Type.BOOLEAN_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(boolVars, start, end)); - boolVars = Arrays.copyOfRange(boolVars, end, size); - nulls = nulls.get(start, size); - size = boolVars.length; - return subset; - } - if (type == Type.TINYINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(byteVars, start, end)); - byteVars = Arrays.copyOfRange(byteVars, end, size); - nulls = nulls.get(start, size); - size = byteVars.length; - return subset; - } - if (type == Type.SMALLINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(shortVars, start, end)); - shortVars = Arrays.copyOfRange(shortVars, end, size); - nulls = nulls.get(start, size); - size = shortVars.length; - return subset; - } - if (type == Type.INT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(intVars, start, end)); - intVars = Arrays.copyOfRange(intVars, end, size); - nulls = nulls.get(start, size); - size = intVars.length; - return subset; - } - if (type == Type.BIGINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(longVars, start, end)); - longVars = Arrays.copyOfRange(longVars, end, size); - nulls = nulls.get(start, size); - size = longVars.length; - return subset; - } - if (type == Type.DOUBLE_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(doubleVars, start, end)); - doubleVars = Arrays.copyOfRange(doubleVars, end, size); - nulls = nulls.get(start, size); - size = doubleVars.length; - return subset; - } - if (type == Type.BINARY_TYPE) { - Column subset = new Column(type, subNulls, binaryVars.subList(start, end)); - binaryVars = binaryVars.subList(end, binaryVars.size()); - nulls = nulls.get(start, size); - size = binaryVars.size(); - return subset; - } - if (type == Type.STRING_TYPE) { - Column subset = new Column(type, subNulls, stringVars.subList(start, end)); - stringVars = stringVars.subList(end, stringVars.size()); - nulls = nulls.get(start, size); - size = stringVars.size(); - return subset; - } - throw new IllegalStateException("invalid union object"); - } - - private static final byte[] MASKS = new byte[] { - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte)0x80 - }; - - private static BitSet toBitset(byte[] nulls) { - BitSet bitset = new BitSet(); - int bits = nulls.length * 8; - for (int i = 0; i < bits; i++) { - bitset.set(i, (nulls[i / 8] & MASKS[i % 8]) != 0); - } - return bitset; - } - - private static byte[] toBinary(BitSet bitset) { - byte[] nulls = new byte[1 + (bitset.length() / 8)]; - for (int i = 0; i < bitset.length(); i++) { - nulls[i / 8] |= bitset.get(i) ? MASKS[i % 8] : 0; - } - return nulls; - } - - public Type getType() { - return type; - } - - @Override - public Object get(int index) { - if (nulls.get(index)) { - return null; - } - switch (type) { - case BOOLEAN_TYPE: - return boolVars[index]; - case TINYINT_TYPE: - return byteVars[index]; - case SMALLINT_TYPE: - return shortVars[index]; - case INT_TYPE: - return intVars[index]; - case BIGINT_TYPE: - return longVars[index]; - case DOUBLE_TYPE: - return doubleVars[index]; - case STRING_TYPE: - return stringVars.get(index); - case BINARY_TYPE: - return binaryVars.get(index).array(); - } - return null; - } - - @Override - public int size() { - return size; - } - - public TColumn toTColumn() { - TColumn value = new TColumn(); - ByteBuffer nullMasks = ByteBuffer.wrap(toBinary(nulls)); - switch (type) { - case BOOLEAN_TYPE: - value.setBoolVal(new TBoolColumn(Booleans.asList(Arrays.copyOfRange(boolVars, 0, size)), nullMasks)); - break; - case TINYINT_TYPE: - value.setByteVal(new TByteColumn(Bytes.asList(Arrays.copyOfRange(byteVars, 0, size)), nullMasks)); - break; - case SMALLINT_TYPE: - value.setI16Val(new TI16Column(Shorts.asList(Arrays.copyOfRange(shortVars, 0, size)), nullMasks)); - break; - case INT_TYPE: - value.setI32Val(new TI32Column(Ints.asList(Arrays.copyOfRange(intVars, 0, size)), nullMasks)); - break; - case BIGINT_TYPE: - value.setI64Val(new TI64Column(Longs.asList(Arrays.copyOfRange(longVars, 0, size)), nullMasks)); - break; - case DOUBLE_TYPE: - value.setDoubleVal(new TDoubleColumn(Doubles.asList(Arrays.copyOfRange(doubleVars, 0, size)), nullMasks)); - break; - case STRING_TYPE: - value.setStringVal(new TStringColumn(stringVars, nullMasks)); - break; - case BINARY_TYPE: - value.setBinaryVal(new TBinaryColumn(binaryVars, nullMasks)); - break; - } - return value; - } - - private static final ByteBuffer EMPTY_BINARY = ByteBuffer.allocate(0); - private static final String EMPTY_STRING = ""; - - public void addValue(Type type, Object field) { - switch (type) { - case BOOLEAN_TYPE: - nulls.set(size, field == null); - boolVars()[size] = field == null ? true : (Boolean)field; - break; - case TINYINT_TYPE: - nulls.set(size, field == null); - byteVars()[size] = field == null ? 0 : (Byte) field; - break; - case SMALLINT_TYPE: - nulls.set(size, field == null); - shortVars()[size] = field == null ? 0 : (Short)field; - break; - case INT_TYPE: - nulls.set(size, field == null); - intVars()[size] = field == null ? 0 : (Integer)field; - break; - case BIGINT_TYPE: - nulls.set(size, field == null); - longVars()[size] = field == null ? 0 : (Long)field; - break; - case FLOAT_TYPE: - nulls.set(size, field == null); - doubleVars()[size] = field == null ? 0 : Double.valueOf(field.toString()); - break; - case DOUBLE_TYPE: - nulls.set(size, field == null); - doubleVars()[size] = field == null ? 0 : (Double)field; - break; - case BINARY_TYPE: - nulls.set(binaryVars.size(), field == null); - binaryVars.add(field == null ? EMPTY_BINARY : ByteBuffer.wrap((byte[])field)); - break; - default: - nulls.set(stringVars.size(), field == null); - stringVars.add(field == null ? EMPTY_STRING : String.valueOf(field)); - break; - } - size++; - } - - private boolean[] boolVars() { - if (boolVars.length == size) { - boolean[] newVars = new boolean[size << 1]; - System.arraycopy(boolVars, 0, newVars, 0, size); - return boolVars = newVars; - } - return boolVars; - } - - private byte[] byteVars() { - if (byteVars.length == size) { - byte[] newVars = new byte[size << 1]; - System.arraycopy(byteVars, 0, newVars, 0, size); - return byteVars = newVars; - } - return byteVars; - } - - private short[] shortVars() { - if (shortVars.length == size) { - short[] newVars = new short[size << 1]; - System.arraycopy(shortVars, 0, newVars, 0, size); - return shortVars = newVars; - } - return shortVars; - } - - private int[] intVars() { - if (intVars.length == size) { - int[] newVars = new int[size << 1]; - System.arraycopy(intVars, 0, newVars, 0, size); - return intVars = newVars; - } - return intVars; - } - - private long[] longVars() { - if (longVars.length == size) { - long[] newVars = new long[size << 1]; - System.arraycopy(longVars, 0, newVars, 0, size); - return longVars = newVars; - } - return longVars; - } - - private double[] doubleVars() { - if (doubleVars.length == size) { - double[] newVars = new double[size << 1]; - System.arraycopy(doubleVars, 0, newVars, 0, size); - return doubleVars = newVars; - } - return doubleVars; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java deleted file mode 100644 index 47a582e2223e4..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java +++ /dev/null @@ -1,149 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hive.service.cli.thrift.TColumn; -import org.apache.hive.service.cli.thrift.TRow; -import org.apache.hive.service.cli.thrift.TRowSet; - -/** - * ColumnBasedSet. - */ -public class ColumnBasedSet implements RowSet { - - private long startOffset; - - private final Type[] types; // non-null only for writing (server-side) - private final List columns; - - public ColumnBasedSet(TableSchema schema) { - types = schema.toTypes(); - columns = new ArrayList(); - for (ColumnDescriptor colDesc : schema.getColumnDescriptors()) { - columns.add(new Column(colDesc.getType())); - } - } - - public ColumnBasedSet(TRowSet tRowSet) { - types = null; - columns = new ArrayList(); - for (TColumn tvalue : tRowSet.getColumns()) { - columns.add(new Column(tvalue)); - } - startOffset = tRowSet.getStartRowOffset(); - } - - private ColumnBasedSet(Type[] types, List columns, long startOffset) { - this.types = types; - this.columns = columns; - this.startOffset = startOffset; - } - - @Override - public ColumnBasedSet addRow(Object[] fields) { - for (int i = 0; i < fields.length; i++) { - columns.get(i).addValue(types[i], fields[i]); - } - return this; - } - - public List getColumns() { - return columns; - } - - @Override - public int numColumns() { - return columns.size(); - } - - @Override - public int numRows() { - return columns.isEmpty() ? 0 : columns.get(0).size(); - } - - @Override - public ColumnBasedSet extractSubset(int maxRows) { - int numRows = Math.min(numRows(), maxRows); - - List subset = new ArrayList(); - for (int i = 0; i < columns.size(); i++) { - subset.add(columns.get(i).extractSubset(0, numRows)); - } - ColumnBasedSet result = new ColumnBasedSet(types, subset, startOffset); - startOffset += numRows; - return result; - } - - @Override - public long getStartOffset() { - return startOffset; - } - - @Override - public void setStartOffset(long startOffset) { - this.startOffset = startOffset; - } - - public TRowSet toTRowSet() { - TRowSet tRowSet = new TRowSet(startOffset, new ArrayList()); - for (int i = 0; i < columns.size(); i++) { - tRowSet.addToColumns(columns.get(i).toTColumn()); - } - return tRowSet; - } - - @Override - public Iterator iterator() { - return new Iterator() { - - private int index; - private final Object[] convey = new Object[numColumns()]; - - @Override - public boolean hasNext() { - return index < numRows(); - } - - @Override - public Object[] next() { - for (int i = 0; i < columns.size(); i++) { - convey[i] = columns.get(i).get(index); - } - index++; - return convey; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("remove"); - } - }; - } - - public Object[] fill(int index, Object[] convey) { - for (int i = 0; i < columns.size(); i++) { - convey[i] = columns.get(i).get(index); - } - return convey; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java deleted file mode 100644 index f0bbf14693160..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hive.service.cli.thrift.TColumnDesc; - - -/** - * ColumnDescriptor. - * - */ -public class ColumnDescriptor { - private final String name; - private final String comment; - private final TypeDescriptor type; - // ordinal position of this column in the schema - private final int position; - - public ColumnDescriptor(String name, String comment, TypeDescriptor type, int position) { - this.name = name; - this.comment = comment; - this.type = type; - this.position = position; - } - - public ColumnDescriptor(TColumnDesc tColumnDesc) { - name = tColumnDesc.getColumnName(); - comment = tColumnDesc.getComment(); - type = new TypeDescriptor(tColumnDesc.getTypeDesc()); - position = tColumnDesc.getPosition(); - } - - public ColumnDescriptor(FieldSchema column, int position) { - name = column.getName(); - comment = column.getComment(); - type = new TypeDescriptor(column.getType()); - this.position = position; - } - - public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) { - // Current usage looks like it's only for metadata columns, but if that changes then - // this method may need to require a type qualifiers aruments. - return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position); - } - - public String getName() { - return name; - } - - public String getComment() { - return comment; - } - - public TypeDescriptor getTypeDescriptor() { - return type; - } - - public int getOrdinalPosition() { - return position; - } - - public TColumnDesc toTColumnDesc() { - TColumnDesc tColumnDesc = new TColumnDesc(); - tColumnDesc.setColumnName(name); - tColumnDesc.setComment(comment); - tColumnDesc.setTypeDesc(type.toTTypeDesc()); - tColumnDesc.setPosition(position); - return tColumnDesc; - } - - public Type getType() { - return type.getType(); - } - - public boolean isPrimitive() { - return type.getType().isPrimitiveType(); - } - - public String getTypeName() { - return type.getTypeName(); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java deleted file mode 100644 index 462b93a0f09fe..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java +++ /dev/null @@ -1,288 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; - -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hive.service.cli.thrift.TBoolValue; -import org.apache.hive.service.cli.thrift.TByteValue; -import org.apache.hive.service.cli.thrift.TColumnValue; -import org.apache.hive.service.cli.thrift.TDoubleValue; -import org.apache.hive.service.cli.thrift.TI16Value; -import org.apache.hive.service.cli.thrift.TI32Value; -import org.apache.hive.service.cli.thrift.TI64Value; -import org.apache.hive.service.cli.thrift.TStringValue; - -import org.apache.spark.unsafe.types.UTF8String; - -/** - * Protocols before HIVE_CLI_SERVICE_PROTOCOL_V6 (used by RowBasedSet) - * - */ -public class ColumnValue { - - private static TColumnValue booleanValue(Boolean value) { - TBoolValue tBoolValue = new TBoolValue(); - if (value != null) { - tBoolValue.setValue(value); - } - return TColumnValue.boolVal(tBoolValue); - } - - private static TColumnValue byteValue(Byte value) { - TByteValue tByteValue = new TByteValue(); - if (value != null) { - tByteValue.setValue(value); - } - return TColumnValue.byteVal(tByteValue); - } - - private static TColumnValue shortValue(Short value) { - TI16Value tI16Value = new TI16Value(); - if (value != null) { - tI16Value.setValue(value); - } - return TColumnValue.i16Val(tI16Value); - } - - private static TColumnValue intValue(Integer value) { - TI32Value tI32Value = new TI32Value(); - if (value != null) { - tI32Value.setValue(value); - } - return TColumnValue.i32Val(tI32Value); - } - - private static TColumnValue longValue(Long value) { - TI64Value tI64Value = new TI64Value(); - if (value != null) { - tI64Value.setValue(value); - } - return TColumnValue.i64Val(tI64Value); - } - - private static TColumnValue floatValue(Float value) { - TDoubleValue tDoubleValue = new TDoubleValue(); - if (value != null) { - tDoubleValue.setValue(value); - } - return TColumnValue.doubleVal(tDoubleValue); - } - - private static TColumnValue doubleValue(Double value) { - TDoubleValue tDoubleValue = new TDoubleValue(); - if (value != null) { - tDoubleValue.setValue(value); - } - return TColumnValue.doubleVal(tDoubleValue); - } - - private static TColumnValue stringValue(String value) { - TStringValue tStringValue = new TStringValue(); - if (value != null) { - tStringValue.setValue(value); - } - return TColumnValue.stringVal(tStringValue); - } - - private static TColumnValue stringValue(HiveChar value) { - TStringValue tStringValue = new TStringValue(); - if (value != null) { - tStringValue.setValue(value.toString()); - } - return TColumnValue.stringVal(tStringValue); - } - - private static TColumnValue stringValue(HiveVarchar value) { - TStringValue tStringValue = new TStringValue(); - if (value != null) { - tStringValue.setValue(value.toString()); - } - return TColumnValue.stringVal(tStringValue); - } - - private static TColumnValue stringValue(HiveIntervalYearMonth value) { - TStringValue tStrValue = new TStringValue(); - if (value != null) { - tStrValue.setValue(value.toString()); - } - return TColumnValue.stringVal(tStrValue); - } - - private static TColumnValue stringValue(HiveIntervalDayTime value) { - TStringValue tStrValue = new TStringValue(); - if (value != null) { - tStrValue.setValue(value.toString()); - } - return TColumnValue.stringVal(tStrValue); - } - - public static TColumnValue toTColumnValue(Type type, Object value) { - switch (type) { - case BOOLEAN_TYPE: - return booleanValue((Boolean)value); - case TINYINT_TYPE: - return byteValue((Byte)value); - case SMALLINT_TYPE: - return shortValue((Short)value); - case INT_TYPE: - return intValue((Integer)value); - case BIGINT_TYPE: - return longValue((Long)value); - case FLOAT_TYPE: - return floatValue((Float)value); - case DOUBLE_TYPE: - return doubleValue((Double)value); - case STRING_TYPE: - return stringValue((String)value); - case CHAR_TYPE: - return stringValue((HiveChar)value); - case VARCHAR_TYPE: - return stringValue((HiveVarchar)value); - case DATE_TYPE: - case TIMESTAMP_TYPE: - // SPARK-31859, SPARK-31861: converted to string already in SparkExecuteStatementOperation - return stringValue((String)value); - case INTERVAL_YEAR_MONTH_TYPE: - return stringValue((HiveIntervalYearMonth) value); - case INTERVAL_DAY_TIME_TYPE: - return stringValue((HiveIntervalDayTime) value); - case DECIMAL_TYPE: - String plainStr = value == null ? null : ((BigDecimal)value).toPlainString(); - return stringValue(plainStr); - case BINARY_TYPE: - String strVal = value == null ? null : UTF8String.fromBytes((byte[])value).toString(); - return stringValue(strVal); - case ARRAY_TYPE: - case MAP_TYPE: - case STRUCT_TYPE: - case UNION_TYPE: - case USER_DEFINED_TYPE: - return stringValue((String)value); - case NULL_TYPE: - return stringValue((String)value); - default: - return null; - } - } - - private static Boolean getBooleanValue(TBoolValue tBoolValue) { - if (tBoolValue.isSetValue()) { - return tBoolValue.isValue(); - } - return null; - } - - private static Byte getByteValue(TByteValue tByteValue) { - if (tByteValue.isSetValue()) { - return tByteValue.getValue(); - } - return null; - } - - private static Short getShortValue(TI16Value tI16Value) { - if (tI16Value.isSetValue()) { - return tI16Value.getValue(); - } - return null; - } - - private static Integer getIntegerValue(TI32Value tI32Value) { - if (tI32Value.isSetValue()) { - return tI32Value.getValue(); - } - return null; - } - - private static Long getLongValue(TI64Value tI64Value) { - if (tI64Value.isSetValue()) { - return tI64Value.getValue(); - } - return null; - } - - private static Double getDoubleValue(TDoubleValue tDoubleValue) { - if (tDoubleValue.isSetValue()) { - return tDoubleValue.getValue(); - } - return null; - } - - private static String getStringValue(TStringValue tStringValue) { - if (tStringValue.isSetValue()) { - return tStringValue.getValue(); - } - return null; - } - - private static Date getDateValue(TStringValue tStringValue) { - if (tStringValue.isSetValue()) { - return Date.valueOf(tStringValue.getValue()); - } - return null; - } - - private static Timestamp getTimestampValue(TStringValue tStringValue) { - if (tStringValue.isSetValue()) { - return Timestamp.valueOf(tStringValue.getValue()); - } - return null; - } - - private static byte[] getBinaryValue(TStringValue tString) { - if (tString.isSetValue()) { - return tString.getValue().getBytes(); - } - return null; - } - - private static BigDecimal getBigDecimalValue(TStringValue tStringValue) { - if (tStringValue.isSetValue()) { - return new BigDecimal(tStringValue.getValue()); - } - return null; - } - - public static Object toColumnValue(TColumnValue value) { - TColumnValue._Fields field = value.getSetField(); - switch (field) { - case BOOL_VAL: - return getBooleanValue(value.getBoolVal()); - case BYTE_VAL: - return getByteValue(value.getByteVal()); - case I16_VAL: - return getShortValue(value.getI16Val()); - case I32_VAL: - return getIntegerValue(value.getI32Val()); - case I64_VAL: - return getLongValue(value.getI64Val()); - case DOUBLE_VAL: - return getDoubleValue(value.getDoubleVal()); - case STRING_VAL: - return getStringValue(value.getStringVal()); - } - throw new IllegalArgumentException("never"); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java deleted file mode 100644 index 9cad5be198c06..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.List; -import java.util.Map; - -import org.apache.hive.service.auth.HiveAuthFactory; - - -/** - * EmbeddedCLIServiceClient. - * - */ -public class EmbeddedCLIServiceClient extends CLIServiceClient { - private final ICLIService cliService; - - public EmbeddedCLIServiceClient(ICLIService cliService) { - this.cliService = cliService; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#openSession(java.lang.String, java.lang.String, java.util.Map) - */ - @Override - public SessionHandle openSession(String username, String password, - Map configuration) throws HiveSQLException { - return cliService.openSession(username, password, configuration); - } - - @Override - public SessionHandle openSessionWithImpersonation(String username, String password, - Map configuration, String delegationToken) throws HiveSQLException { - throw new HiveSQLException("Impersonated session is not supported in the embedded mode"); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#closeSession(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public void closeSession(SessionHandle sessionHandle) throws HiveSQLException { - cliService.closeSession(sessionHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List) - */ - @Override - public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType) - throws HiveSQLException { - return cliService.getInfo(sessionHandle, getInfoType); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#executeStatement(org.apache.hive.service.cli.SessionHandle, - * java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatement(SessionHandle sessionHandle, String statement, - Map confOverlay) throws HiveSQLException { - return cliService.executeStatement(sessionHandle, statement, confOverlay); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#executeStatementAsync(org.apache.hive.service.cli.SessionHandle, - * java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement, - Map confOverlay) throws HiveSQLException { - return cliService.executeStatementAsync(sessionHandle, statement, confOverlay); - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getTypeInfo(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException { - return cliService.getTypeInfo(sessionHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getCatalogs(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException { - return cliService.getCatalogs(sessionHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String) - */ - @Override - public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName, - String schemaName) throws HiveSQLException { - return cliService.getSchemas(sessionHandle, catalogName, schemaName); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List) - */ - @Override - public OperationHandle getTables(SessionHandle sessionHandle, String catalogName, - String schemaName, String tableName, List tableTypes) throws HiveSQLException { - return cliService.getTables(sessionHandle, catalogName, schemaName, tableName, tableTypes); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getTableTypes(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException { - return cliService.getTableTypes(sessionHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getColumns(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.lang.String) - */ - @Override - public OperationHandle getColumns(SessionHandle sessionHandle, String catalogName, - String schemaName, String tableName, String columnName) throws HiveSQLException { - return cliService.getColumns(sessionHandle, catalogName, schemaName, tableName, columnName); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getFunctions(org.apache.hive.service.cli.SessionHandle, java.lang.String) - */ - @Override - public OperationHandle getFunctions(SessionHandle sessionHandle, - String catalogName, String schemaName, String functionName) - throws HiveSQLException { - return cliService.getFunctions(sessionHandle, catalogName, schemaName, functionName); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getOperationStatus(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException { - return cliService.getOperationStatus(opHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#cancelOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void cancelOperation(OperationHandle opHandle) throws HiveSQLException { - cliService.cancelOperation(opHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#closeOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void closeOperation(OperationHandle opHandle) throws HiveSQLException { - cliService.closeOperation(opHandle); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.CLIServiceClient#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException { - return cliService.getResultSetMetadata(opHandle); - } - - @Override - public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, - long maxRows, FetchType fetchType) throws HiveSQLException { - return cliService.fetchResults(opHandle, orientation, maxRows, fetchType); - } - - - @Override - public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String owner, String renewer) throws HiveSQLException { - return cliService.getDelegationToken(sessionHandle, authFactory, owner, renewer); - } - - @Override - public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - cliService.cancelDelegationToken(sessionHandle, authFactory, tokenStr); - } - - @Override - public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - cliService.renewDelegationToken(sessionHandle, authFactory, tokenStr); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java deleted file mode 100644 index ffa6f2e1f3743..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TFetchOrientation; - -/** - * FetchOrientation. - * - */ -public enum FetchOrientation { - FETCH_NEXT(TFetchOrientation.FETCH_NEXT), - FETCH_PRIOR(TFetchOrientation.FETCH_PRIOR), - FETCH_RELATIVE(TFetchOrientation.FETCH_RELATIVE), - FETCH_ABSOLUTE(TFetchOrientation.FETCH_ABSOLUTE), - FETCH_FIRST(TFetchOrientation.FETCH_FIRST), - FETCH_LAST(TFetchOrientation.FETCH_LAST); - - private TFetchOrientation tFetchOrientation; - - FetchOrientation(TFetchOrientation tFetchOrientation) { - this.tFetchOrientation = tFetchOrientation; - } - - public static FetchOrientation getFetchOrientation(TFetchOrientation tFetchOrientation) { - for (FetchOrientation fetchOrientation : values()) { - if (tFetchOrientation.equals(fetchOrientation.toTFetchOrientation())) { - return fetchOrientation; - } - } - // TODO: Should this really default to FETCH_NEXT? - return FETCH_NEXT; - } - - public TFetchOrientation toTFetchOrientation() { - return tFetchOrientation; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java deleted file mode 100644 index 8dd33a88fdeb2..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TGetInfoType; - -/** - * GetInfoType. - * - */ -public enum GetInfoType { - CLI_MAX_DRIVER_CONNECTIONS(TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS), - CLI_MAX_CONCURRENT_ACTIVITIES(TGetInfoType.CLI_MAX_CONCURRENT_ACTIVITIES), - CLI_DATA_SOURCE_NAME(TGetInfoType.CLI_DATA_SOURCE_NAME), - CLI_FETCH_DIRECTION(TGetInfoType.CLI_FETCH_DIRECTION), - CLI_SERVER_NAME(TGetInfoType.CLI_SERVER_NAME), - CLI_SEARCH_PATTERN_ESCAPE(TGetInfoType.CLI_SEARCH_PATTERN_ESCAPE), - CLI_DBMS_NAME(TGetInfoType.CLI_DBMS_NAME), - CLI_DBMS_VER(TGetInfoType.CLI_DBMS_VER), - CLI_ACCESSIBLE_TABLES(TGetInfoType.CLI_ACCESSIBLE_TABLES), - CLI_ACCESSIBLE_PROCEDURES(TGetInfoType.CLI_ACCESSIBLE_PROCEDURES), - CLI_CURSOR_COMMIT_BEHAVIOR(TGetInfoType.CLI_CURSOR_COMMIT_BEHAVIOR), - CLI_DATA_SOURCE_READ_ONLY(TGetInfoType.CLI_DATA_SOURCE_READ_ONLY), - CLI_DEFAULT_TXN_ISOLATION(TGetInfoType.CLI_DEFAULT_TXN_ISOLATION), - CLI_IDENTIFIER_CASE(TGetInfoType.CLI_IDENTIFIER_CASE), - CLI_IDENTIFIER_QUOTE_CHAR(TGetInfoType.CLI_IDENTIFIER_QUOTE_CHAR), - CLI_MAX_COLUMN_NAME_LEN(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN), - CLI_MAX_CURSOR_NAME_LEN(TGetInfoType.CLI_MAX_CURSOR_NAME_LEN), - CLI_MAX_SCHEMA_NAME_LEN(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN), - CLI_MAX_CATALOG_NAME_LEN(TGetInfoType.CLI_MAX_CATALOG_NAME_LEN), - CLI_MAX_TABLE_NAME_LEN(TGetInfoType.CLI_MAX_TABLE_NAME_LEN), - CLI_SCROLL_CONCURRENCY(TGetInfoType.CLI_SCROLL_CONCURRENCY), - CLI_TXN_CAPABLE(TGetInfoType.CLI_TXN_CAPABLE), - CLI_USER_NAME(TGetInfoType.CLI_USER_NAME), - CLI_TXN_ISOLATION_OPTION(TGetInfoType.CLI_TXN_ISOLATION_OPTION), - CLI_INTEGRITY(TGetInfoType.CLI_INTEGRITY), - CLI_GETDATA_EXTENSIONS(TGetInfoType.CLI_GETDATA_EXTENSIONS), - CLI_NULL_COLLATION(TGetInfoType.CLI_NULL_COLLATION), - CLI_ALTER_TABLE(TGetInfoType.CLI_ALTER_TABLE), - CLI_ORDER_BY_COLUMNS_IN_SELECT(TGetInfoType.CLI_ORDER_BY_COLUMNS_IN_SELECT), - CLI_SPECIAL_CHARACTERS(TGetInfoType.CLI_SPECIAL_CHARACTERS), - CLI_MAX_COLUMNS_IN_GROUP_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_GROUP_BY), - CLI_MAX_COLUMNS_IN_INDEX(TGetInfoType.CLI_MAX_COLUMNS_IN_INDEX), - CLI_MAX_COLUMNS_IN_ORDER_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_ORDER_BY), - CLI_MAX_COLUMNS_IN_SELECT(TGetInfoType.CLI_MAX_COLUMNS_IN_SELECT), - CLI_MAX_COLUMNS_IN_TABLE(TGetInfoType.CLI_MAX_COLUMNS_IN_TABLE), - CLI_MAX_INDEX_SIZE(TGetInfoType.CLI_MAX_INDEX_SIZE), - CLI_MAX_ROW_SIZE(TGetInfoType.CLI_MAX_ROW_SIZE), - CLI_MAX_STATEMENT_LEN(TGetInfoType.CLI_MAX_STATEMENT_LEN), - CLI_MAX_TABLES_IN_SELECT(TGetInfoType.CLI_MAX_TABLES_IN_SELECT), - CLI_MAX_USER_NAME_LEN(TGetInfoType.CLI_MAX_USER_NAME_LEN), - CLI_OJ_CAPABILITIES(TGetInfoType.CLI_OJ_CAPABILITIES), - - CLI_XOPEN_CLI_YEAR(TGetInfoType.CLI_XOPEN_CLI_YEAR), - CLI_CURSOR_SENSITIVITY(TGetInfoType.CLI_CURSOR_SENSITIVITY), - CLI_DESCRIBE_PARAMETER(TGetInfoType.CLI_DESCRIBE_PARAMETER), - CLI_CATALOG_NAME(TGetInfoType.CLI_CATALOG_NAME), - CLI_COLLATION_SEQ(TGetInfoType.CLI_COLLATION_SEQ), - CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN); - - private final TGetInfoType tInfoType; - - GetInfoType(TGetInfoType tInfoType) { - this.tInfoType = tInfoType; - } - - public static GetInfoType getGetInfoType(TGetInfoType tGetInfoType) { - for (GetInfoType infoType : values()) { - if (tGetInfoType.equals(infoType.tInfoType)) { - return infoType; - } - } - throw new IllegalArgumentException("Unrecognized Thrift TGetInfoType value: " + tGetInfoType); - } - - public TGetInfoType toTGetInfoType() { - return tInfoType; - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java deleted file mode 100644 index ba92ff4ab5c11..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TGetInfoValue; - -/** - * GetInfoValue. - * - */ -public class GetInfoValue { - private String stringValue = null; - private short shortValue; - private int intValue; - private long longValue; - - public GetInfoValue(String stringValue) { - this.stringValue = stringValue; - } - - public GetInfoValue(short shortValue) { - this.shortValue = shortValue; - } - - public GetInfoValue(int intValue) { - this.intValue = intValue; - } - - public GetInfoValue(long longValue) { - this.longValue = longValue; - } - - public GetInfoValue(TGetInfoValue tGetInfoValue) { - switch (tGetInfoValue.getSetField()) { - case STRING_VALUE: - stringValue = tGetInfoValue.getStringValue(); - break; - default: - throw new IllegalArgumentException("Unreconigzed TGetInfoValue"); - } - } - - public TGetInfoValue toTGetInfoValue() { - TGetInfoValue tInfoValue = new TGetInfoValue(); - if (stringValue != null) { - tInfoValue.setStringValue(stringValue); - } - return tInfoValue; - } - - public String getStringValue() { - return stringValue; - } - - public short getShortValue() { - return shortValue; - } - - public int getIntValue() { - return intValue; - } - - public long getLongValue() { - return longValue; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java deleted file mode 100644 index cf3427ae20f3c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.THandleIdentifier; - - - - -public abstract class Handle { - - private final HandleIdentifier handleId; - - public Handle() { - handleId = new HandleIdentifier(); - } - - public Handle(HandleIdentifier handleId) { - this.handleId = handleId; - } - - public Handle(THandleIdentifier tHandleIdentifier) { - this.handleId = new HandleIdentifier(tHandleIdentifier); - } - - public HandleIdentifier getHandleIdentifier() { - return handleId; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((handleId == null) ? 0 : handleId.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof Handle)) { - return false; - } - Handle other = (Handle) obj; - if (handleId == null) { - if (other.handleId != null) { - return false; - } - } else if (!handleId.equals(other.handleId)) { - return false; - } - return true; - } - - @Override - public abstract String toString(); - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java deleted file mode 100644 index 4dc80da8dc500..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.nio.ByteBuffer; -import java.util.UUID; - -import org.apache.hive.service.cli.thrift.THandleIdentifier; - -/** - * HandleIdentifier. - * - */ -public class HandleIdentifier { - private final UUID publicId; - private final UUID secretId; - - public HandleIdentifier() { - publicId = UUID.randomUUID(); - secretId = UUID.randomUUID(); - } - - public HandleIdentifier(UUID publicId, UUID secretId) { - this.publicId = publicId; - this.secretId = secretId; - } - - public HandleIdentifier(THandleIdentifier tHandleId) { - ByteBuffer bb = ByteBuffer.wrap(tHandleId.getGuid()); - this.publicId = new UUID(bb.getLong(), bb.getLong()); - bb = ByteBuffer.wrap(tHandleId.getSecret()); - this.secretId = new UUID(bb.getLong(), bb.getLong()); - } - - public UUID getPublicId() { - return publicId; - } - - public UUID getSecretId() { - return secretId; - } - - public THandleIdentifier toTHandleIdentifier() { - byte[] guid = new byte[16]; - byte[] secret = new byte[16]; - ByteBuffer guidBB = ByteBuffer.wrap(guid); - ByteBuffer secretBB = ByteBuffer.wrap(secret); - guidBB.putLong(publicId.getMostSignificantBits()); - guidBB.putLong(publicId.getLeastSignificantBits()); - secretBB.putLong(secretId.getMostSignificantBits()); - secretBB.putLong(secretId.getLeastSignificantBits()); - return new THandleIdentifier(ByteBuffer.wrap(guid), ByteBuffer.wrap(secret)); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((publicId == null) ? 0 : publicId.hashCode()); - result = prime * result + ((secretId == null) ? 0 : secretId.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HandleIdentifier)) { - return false; - } - HandleIdentifier other = (HandleIdentifier) obj; - if (publicId == null) { - if (other.publicId != null) { - return false; - } - } else if (!publicId.equals(other.publicId)) { - return false; - } - if (secretId == null) { - if (other.secretId != null) { - return false; - } - } else if (!secretId.equals(other.secretId)) { - return false; - } - return true; - } - - @Override - public String toString() { - return publicId.toString(); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java deleted file mode 100644 index 86e57fbf31fe0..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java +++ /dev/null @@ -1,249 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hive.service.cli.thrift.TStatus; -import org.apache.hive.service.cli.thrift.TStatusCode; - -/** - * HiveSQLException. - * - */ -public class HiveSQLException extends SQLException { - - /** - * - */ - private static final long serialVersionUID = -6095254671958748094L; - - /** - * - */ - public HiveSQLException() { - super(); - } - - /** - * @param reason - */ - public HiveSQLException(String reason) { - super(reason); - } - - /** - * @param cause - */ - public HiveSQLException(Throwable cause) { - super(cause); - } - - /** - * @param reason - * @param sqlState - */ - public HiveSQLException(String reason, String sqlState) { - super(reason, sqlState); - } - - /** - * @param reason - * @param cause - */ - public HiveSQLException(String reason, Throwable cause) { - super(reason, cause); - } - - /** - * @param reason - * @param sqlState - * @param vendorCode - */ - public HiveSQLException(String reason, String sqlState, int vendorCode) { - super(reason, sqlState, vendorCode); - } - - /** - * @param reason - * @param sqlState - * @param cause - */ - public HiveSQLException(String reason, String sqlState, Throwable cause) { - super(reason, sqlState, cause); - } - - /** - * @param reason - * @param sqlState - * @param vendorCode - * @param cause - */ - public HiveSQLException(String reason, String sqlState, int vendorCode, Throwable cause) { - super(reason, sqlState, vendorCode, cause); - } - - public HiveSQLException(TStatus status) { - // TODO: set correct vendorCode field - super(status.getErrorMessage(), status.getSqlState(), status.getErrorCode()); - if (status.getInfoMessages() != null) { - initCause(toCause(status.getInfoMessages())); - } - } - - /** - * Converts current object to a {@link TStatus} object - * @return a {@link TStatus} object - */ - public TStatus toTStatus() { - // TODO: convert sqlState, etc. - TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS); - tStatus.setSqlState(getSQLState()); - tStatus.setErrorCode(getErrorCode()); - tStatus.setErrorMessage(getMessage()); - tStatus.setInfoMessages(toString(this)); - return tStatus; - } - - /** - * Converts the specified {@link Exception} object into a {@link TStatus} object - * @param e a {@link Exception} object - * @return a {@link TStatus} object - */ - public static TStatus toTStatus(Exception e) { - if (e instanceof HiveSQLException) { - return ((HiveSQLException)e).toTStatus(); - } - TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS); - tStatus.setErrorMessage(e.getMessage()); - tStatus.setInfoMessages(toString(e)); - return tStatus; - } - - /** - * Converts a {@link Throwable} object into a flattened list of texts including its stack trace - * and the stack traces of the nested causes. - * @param ex a {@link Throwable} object - * @return a flattened list of texts including the {@link Throwable} object's stack trace - * and the stack traces of the nested causes. - */ - public static List toString(Throwable ex) { - return toString(ex, null); - } - - private static List toString(Throwable cause, StackTraceElement[] parent) { - StackTraceElement[] trace = cause.getStackTrace(); - int m = trace.length - 1; - if (parent != null) { - int n = parent.length - 1; - while (m >= 0 && n >= 0 && trace[m].equals(parent[n])) { - m--; - n--; - } - } - List detail = enroll(cause, trace, m); - cause = cause.getCause(); - if (cause != null) { - detail.addAll(toString(cause, trace)); - } - return detail; - } - - private static List enroll(Throwable ex, StackTraceElement[] trace, int max) { - List details = new ArrayList(); - StringBuilder builder = new StringBuilder(); - builder.append('*').append(ex.getClass().getName()).append(':'); - builder.append(ex.getMessage()).append(':'); - builder.append(trace.length).append(':').append(max); - details.add(builder.toString()); - for (int i = 0; i <= max; i++) { - builder.setLength(0); - builder.append(trace[i].getClassName()).append(':'); - builder.append(trace[i].getMethodName()).append(':'); - String fileName = trace[i].getFileName(); - builder.append(fileName == null ? "" : fileName).append(':'); - builder.append(trace[i].getLineNumber()); - details.add(builder.toString()); - } - return details; - } - - /** - * Converts a flattened list of texts including the stack trace and the stack - * traces of the nested causes into a {@link Throwable} object. - * @param details a flattened list of texts including the stack trace and the stack - * traces of the nested causes - * @return a {@link Throwable} object - */ - public static Throwable toCause(List details) { - return toStackTrace(details, null, 0); - } - - private static Throwable toStackTrace(List details, StackTraceElement[] parent, int index) { - String detail = details.get(index++); - if (!detail.startsWith("*")) { - return null; // should not be happened. ignore remaining - } - int i1 = detail.indexOf(':'); - int i3 = detail.lastIndexOf(':'); - int i2 = detail.substring(0, i3).lastIndexOf(':'); - String exceptionClass = detail.substring(1, i1); - String exceptionMessage = detail.substring(i1 + 1, i2); - Throwable ex = newInstance(exceptionClass, exceptionMessage); - - Integer length = Integer.valueOf(detail.substring(i2 + 1, i3)); - Integer unique = Integer.valueOf(detail.substring(i3 + 1)); - - int i = 0; - StackTraceElement[] trace = new StackTraceElement[length]; - for (; i <= unique; i++) { - detail = details.get(index++); - int j1 = detail.indexOf(':'); - int j3 = detail.lastIndexOf(':'); - int j2 = detail.substring(0, j3).lastIndexOf(':'); - String className = detail.substring(0, j1); - String methodName = detail.substring(j1 + 1, j2); - String fileName = detail.substring(j2 + 1, j3); - if (fileName.isEmpty()) { - fileName = null; - } - int lineNumber = Integer.valueOf(detail.substring(j3 + 1)); - trace[i] = new StackTraceElement(className, methodName, fileName, lineNumber); - } - int common = trace.length - i; - if (common > 0) { - System.arraycopy(parent, parent.length - common, trace, trace.length - common, common); - } - if (details.size() > index) { - ex.initCause(toStackTrace(details, trace, index)); - } - ex.setStackTrace(trace); - return ex; - } - - private static Throwable newInstance(String className, String message) { - try { - return (Throwable)Class.forName(className).getConstructor(String.class).newInstance(message); - } catch (Exception e) { - return new RuntimeException(className + ":" + message); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java deleted file mode 100644 index c9cc1f4da56f1..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli; - -import java.util.List; -import java.util.Map; - - - - -import org.apache.hive.service.auth.HiveAuthFactory; - -public interface ICLIService { - - SessionHandle openSession(String username, String password, - Map configuration) - throws HiveSQLException; - - SessionHandle openSessionWithImpersonation(String username, String password, - Map configuration, String delegationToken) - throws HiveSQLException; - - void closeSession(SessionHandle sessionHandle) - throws HiveSQLException; - - GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType) - throws HiveSQLException; - - OperationHandle executeStatement(SessionHandle sessionHandle, String statement, - Map confOverlay) - throws HiveSQLException; - - OperationHandle executeStatementAsync(SessionHandle sessionHandle, - String statement, Map confOverlay) - throws HiveSQLException; - - OperationHandle getTypeInfo(SessionHandle sessionHandle) - throws HiveSQLException; - - OperationHandle getCatalogs(SessionHandle sessionHandle) - throws HiveSQLException; - - OperationHandle getSchemas(SessionHandle sessionHandle, - String catalogName, String schemaName) - throws HiveSQLException; - - OperationHandle getTables(SessionHandle sessionHandle, - String catalogName, String schemaName, String tableName, List tableTypes) - throws HiveSQLException; - - OperationHandle getTableTypes(SessionHandle sessionHandle) - throws HiveSQLException; - - OperationHandle getColumns(SessionHandle sessionHandle, - String catalogName, String schemaName, String tableName, String columnName) - throws HiveSQLException; - - OperationHandle getFunctions(SessionHandle sessionHandle, - String catalogName, String schemaName, String functionName) - throws HiveSQLException; - - OperationStatus getOperationStatus(OperationHandle opHandle) - throws HiveSQLException; - - void cancelOperation(OperationHandle opHandle) - throws HiveSQLException; - - void closeOperation(OperationHandle opHandle) - throws HiveSQLException; - - TableSchema getResultSetMetadata(OperationHandle opHandle) - throws HiveSQLException; - - RowSet fetchResults(OperationHandle opHandle) - throws HiveSQLException; - - RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, - long maxRows, FetchType fetchType) throws HiveSQLException; - - String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String owner, String renewer) throws HiveSQLException; - - void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException; - - void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException; - - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java deleted file mode 100644 index 5426e28471239..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TOperationHandle; -import org.apache.hive.service.cli.thrift.TProtocolVersion; - -public class OperationHandle extends Handle { - - private final OperationType opType; - private final TProtocolVersion protocol; - private boolean hasResultSet = false; - - public OperationHandle(OperationType opType, TProtocolVersion protocol) { - super(); - this.opType = opType; - this.protocol = protocol; - } - - // dummy handle for ThriftCLIService - public OperationHandle(TOperationHandle tOperationHandle) { - this(tOperationHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1); - } - - public OperationHandle(TOperationHandle tOperationHandle, TProtocolVersion protocol) { - super(tOperationHandle.getOperationId()); - this.opType = OperationType.getOperationType(tOperationHandle.getOperationType()); - this.hasResultSet = tOperationHandle.isHasResultSet(); - this.protocol = protocol; - } - - public OperationType getOperationType() { - return opType; - } - - public void setHasResultSet(boolean hasResultSet) { - this.hasResultSet = hasResultSet; - } - - public boolean hasResultSet() { - return hasResultSet; - } - - public TOperationHandle toTOperationHandle() { - TOperationHandle tOperationHandle = new TOperationHandle(); - tOperationHandle.setOperationId(getHandleIdentifier().toTHandleIdentifier()); - tOperationHandle.setOperationType(opType.toTOperationType()); - tOperationHandle.setHasResultSet(hasResultSet); - return tOperationHandle; - } - - public TProtocolVersion getProtocolVersion() { - return protocol; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result + ((opType == null) ? 0 : opType.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!super.equals(obj)) { - return false; - } - if (!(obj instanceof OperationHandle)) { - return false; - } - OperationHandle other = (OperationHandle) obj; - if (opType != other.opType) { - return false; - } - return true; - } - - @Override - public String toString() { - return "OperationHandle [opType=" + opType + ", getHandleIdentifier()=" + getHandleIdentifier() - + "]"; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java deleted file mode 100644 index 1165180118413..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TOperationState; - -/** - * OperationState. - * - */ -public enum OperationState { - INITIALIZED(TOperationState.INITIALIZED_STATE, false), - RUNNING(TOperationState.RUNNING_STATE, false), - FINISHED(TOperationState.FINISHED_STATE, true), - CANCELED(TOperationState.CANCELED_STATE, true), - CLOSED(TOperationState.CLOSED_STATE, true), - ERROR(TOperationState.ERROR_STATE, true), - UNKNOWN(TOperationState.UKNOWN_STATE, false), - PENDING(TOperationState.PENDING_STATE, false); - - private final TOperationState tOperationState; - private final boolean terminal; - - OperationState(TOperationState tOperationState, boolean terminal) { - this.tOperationState = tOperationState; - this.terminal = terminal; - } - - // must be sync with TOperationState in order - public static OperationState getOperationState(TOperationState tOperationState) { - return OperationState.values()[tOperationState.getValue()]; - } - - public static void validateTransition(OperationState oldState, - OperationState newState) - throws HiveSQLException { - switch (oldState) { - case INITIALIZED: - switch (newState) { - case PENDING: - case RUNNING: - case CANCELED: - case CLOSED: - return; - } - break; - case PENDING: - switch (newState) { - case RUNNING: - case FINISHED: - case CANCELED: - case ERROR: - case CLOSED: - return; - } - break; - case RUNNING: - switch (newState) { - case FINISHED: - case CANCELED: - case ERROR: - case CLOSED: - return; - } - break; - case FINISHED: - case CANCELED: - case ERROR: - if (OperationState.CLOSED.equals(newState)) { - return; - } - break; - default: - // fall-through - } - throw new HiveSQLException("Illegal Operation state transition " + - "from " + oldState + " to " + newState); - } - - public void validateTransition(OperationState newState) - throws HiveSQLException { - validateTransition(this, newState); - } - - public TOperationState toTOperationState() { - return tOperationState; - } - - public boolean isTerminal() { - return terminal; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java deleted file mode 100644 index 429d9a4c25688..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TOperationType; - -/** - * OperationType. - * - */ -public enum OperationType { - - UNKNOWN_OPERATION(TOperationType.UNKNOWN), - EXECUTE_STATEMENT(TOperationType.EXECUTE_STATEMENT), - GET_TYPE_INFO(TOperationType.GET_TYPE_INFO), - GET_CATALOGS(TOperationType.GET_CATALOGS), - GET_SCHEMAS(TOperationType.GET_SCHEMAS), - GET_TABLES(TOperationType.GET_TABLES), - GET_TABLE_TYPES(TOperationType.GET_TABLE_TYPES), - GET_COLUMNS(TOperationType.GET_COLUMNS), - GET_FUNCTIONS(TOperationType.GET_FUNCTIONS); - - private TOperationType tOperationType; - - OperationType(TOperationType tOpType) { - this.tOperationType = tOpType; - } - - public static OperationType getOperationType(TOperationType tOperationType) { - // TODO: replace this with a Map? - for (OperationType opType : values()) { - if (tOperationType.equals(opType.tOperationType)) { - return opType; - } - } - return OperationType.UNKNOWN_OPERATION; - } - - public TOperationType toTOperationType() { - return tOperationType; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java deleted file mode 100644 index 6e4d43fd5df63..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -/** - * PatternOrIdentifier. - * - */ -public class PatternOrIdentifier { - - boolean isPattern = false; - String text; - - public PatternOrIdentifier(String tpoi) { - text = tpoi; - isPattern = false; - } - - public boolean isPattern() { - return isPattern; - } - - public boolean isIdentifier() { - return !isPattern; - } - - @Override - public String toString() { - return text; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java deleted file mode 100644 index 7452137f077db..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hive.service.cli.thrift.TColumnValue; -import org.apache.hive.service.cli.thrift.TRow; -import org.apache.hive.service.cli.thrift.TRowSet; - -/** - * RowBasedSet - */ -public class RowBasedSet implements RowSet { - - private long startOffset; - - private final Type[] types; // non-null only for writing (server-side) - private final RemovableList rows; - - public RowBasedSet(TableSchema schema) { - types = schema.toTypes(); - rows = new RemovableList(); - } - - public RowBasedSet(TRowSet tRowSet) { - types = null; - rows = new RemovableList(tRowSet.getRows()); - startOffset = tRowSet.getStartRowOffset(); - } - - private RowBasedSet(Type[] types, List rows, long startOffset) { - this.types = types; - this.rows = new RemovableList(rows); - this.startOffset = startOffset; - } - - @Override - public RowBasedSet addRow(Object[] fields) { - TRow tRow = new TRow(); - for (int i = 0; i < fields.length; i++) { - tRow.addToColVals(ColumnValue.toTColumnValue(types[i], fields[i])); - } - rows.add(tRow); - return this; - } - - @Override - public int numColumns() { - return rows.isEmpty() ? 0 : rows.get(0).getColVals().size(); - } - - @Override - public int numRows() { - return rows.size(); - } - - public RowBasedSet extractSubset(int maxRows) { - int numRows = Math.min(numRows(), maxRows); - RowBasedSet result = new RowBasedSet(types, rows.subList(0, numRows), startOffset); - rows.removeRange(0, numRows); - startOffset += numRows; - return result; - } - - public long getStartOffset() { - return startOffset; - } - - public void setStartOffset(long startOffset) { - this.startOffset = startOffset; - } - - public int getSize() { - return rows.size(); - } - - public TRowSet toTRowSet() { - TRowSet tRowSet = new TRowSet(); - tRowSet.setStartRowOffset(startOffset); - tRowSet.setRows(new ArrayList(rows)); - return tRowSet; - } - - @Override - public Iterator iterator() { - return new Iterator() { - - final Iterator iterator = rows.iterator(); - final Object[] convey = new Object[numColumns()]; - - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Object[] next() { - TRow row = iterator.next(); - List values = row.getColVals(); - for (int i = 0; i < values.size(); i++) { - convey[i] = ColumnValue.toColumnValue(values.get(i)); - } - return convey; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("remove"); - } - }; - } - - private static class RemovableList extends ArrayList { - RemovableList() { super(); } - RemovableList(List rows) { super(rows); } - @Override - public void removeRange(int fromIndex, int toIndex) { - super.removeRange(fromIndex, toIndex); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java deleted file mode 100644 index ab0787e1d389e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TRowSet; - -public interface RowSet extends Iterable { - - RowSet addRow(Object[] fields); - - RowSet extractSubset(int maxRows); - - int numColumns(); - - int numRows(); - - long getStartOffset(); - - void setStartOffset(long startOffset); - - TRowSet toTRowSet(); -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java deleted file mode 100644 index e8f68eaaf9063..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import org.apache.hive.service.cli.thrift.TProtocolVersion; -import org.apache.hive.service.cli.thrift.TRowSet; - -import static org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6; - -public class RowSetFactory { - - public static RowSet create(TableSchema schema, TProtocolVersion version) { - if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) { - return new ColumnBasedSet(schema); - } - return new RowBasedSet(schema); - } - - public static RowSet create(TRowSet results, TProtocolVersion version) { - if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) { - return new ColumnBasedSet(results); - } - return new RowBasedSet(results); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java deleted file mode 100644 index 52e0ad4834d8b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.UUID; - -import org.apache.hive.service.cli.thrift.TProtocolVersion; -import org.apache.hive.service.cli.thrift.TSessionHandle; - - -/** - * SessionHandle. - * - */ -public class SessionHandle extends Handle { - - private final TProtocolVersion protocol; - - public SessionHandle(TProtocolVersion protocol) { - this.protocol = protocol; - } - - // dummy handle for ThriftCLIService - public SessionHandle(TSessionHandle tSessionHandle) { - this(tSessionHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1); - } - - public SessionHandle(TSessionHandle tSessionHandle, TProtocolVersion protocol) { - super(tSessionHandle.getSessionId()); - this.protocol = protocol; - } - - public UUID getSessionId() { - return getHandleIdentifier().getPublicId(); - } - - public TSessionHandle toTSessionHandle() { - TSessionHandle tSessionHandle = new TSessionHandle(); - tSessionHandle.setSessionId(getHandleIdentifier().toTHandleIdentifier()); - return tSessionHandle; - } - - public TProtocolVersion getProtocolVersion() { - return protocol; - } - - @Override - public String toString() { - return "SessionHandle [" + getHandleIdentifier() + "]"; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java deleted file mode 100644 index ee019bc737101..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hive.service.cli.thrift.TColumnDesc; -import org.apache.hive.service.cli.thrift.TTableSchema; - -/** - * TableSchema. - * - */ -public class TableSchema { - private final List columns = new ArrayList(); - - public TableSchema() { - } - - public TableSchema(int numColumns) { - // TODO: remove this constructor - } - - public TableSchema(TTableSchema tTableSchema) { - for (TColumnDesc tColumnDesc : tTableSchema.getColumns()) { - columns.add(new ColumnDescriptor(tColumnDesc)); - } - } - - public TableSchema(List fieldSchemas) { - int pos = 1; - for (FieldSchema field : fieldSchemas) { - columns.add(new ColumnDescriptor(field, pos++)); - } - } - - public TableSchema(Schema schema) { - this(schema.getFieldSchemas()); - } - - public List getColumnDescriptors() { - return new ArrayList(columns); - } - - public ColumnDescriptor getColumnDescriptorAt(int pos) { - return columns.get(pos); - } - - public int getSize() { - return columns.size(); - } - - public void clear() { - columns.clear(); - } - - - public TTableSchema toTTableSchema() { - TTableSchema tTableSchema = new TTableSchema(); - for (ColumnDescriptor col : columns) { - tTableSchema.addToColumns(col.toTColumnDesc()); - } - return tTableSchema; - } - - public Type[] toTypes() { - Type[] types = new Type[columns.size()]; - for (int i = 0; i < types.length; i++) { - types[i] = columns.get(i).getType(); - } - return types; - } - - public TableSchema addPrimitiveColumn(String columnName, Type columnType, String columnComment) { - columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, columnType, columns.size() + 1)); - return this; - } - - public TableSchema addStringColumn(String columnName, String columnComment) { - columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, Type.STRING_TYPE, columns.size() + 1)); - return this; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java deleted file mode 100644 index 7752ec03a29b7..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java +++ /dev/null @@ -1,349 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.sql.DatabaseMetaData; -import java.util.Locale; - -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hive.service.cli.thrift.TTypeId; - -/** - * Type. - * - */ -public enum Type { - NULL_TYPE("VOID", - java.sql.Types.NULL, - TTypeId.NULL_TYPE), - BOOLEAN_TYPE("BOOLEAN", - java.sql.Types.BOOLEAN, - TTypeId.BOOLEAN_TYPE), - TINYINT_TYPE("TINYINT", - java.sql.Types.TINYINT, - TTypeId.TINYINT_TYPE), - SMALLINT_TYPE("SMALLINT", - java.sql.Types.SMALLINT, - TTypeId.SMALLINT_TYPE), - INT_TYPE("INT", - java.sql.Types.INTEGER, - TTypeId.INT_TYPE), - BIGINT_TYPE("BIGINT", - java.sql.Types.BIGINT, - TTypeId.BIGINT_TYPE), - FLOAT_TYPE("FLOAT", - java.sql.Types.FLOAT, - TTypeId.FLOAT_TYPE), - DOUBLE_TYPE("DOUBLE", - java.sql.Types.DOUBLE, - TTypeId.DOUBLE_TYPE), - STRING_TYPE("STRING", - java.sql.Types.VARCHAR, - TTypeId.STRING_TYPE), - CHAR_TYPE("CHAR", - java.sql.Types.CHAR, - TTypeId.CHAR_TYPE, - true, false, false), - VARCHAR_TYPE("VARCHAR", - java.sql.Types.VARCHAR, - TTypeId.VARCHAR_TYPE, - true, false, false), - DATE_TYPE("DATE", - java.sql.Types.DATE, - TTypeId.DATE_TYPE), - TIMESTAMP_TYPE("TIMESTAMP", - java.sql.Types.TIMESTAMP, - TTypeId.TIMESTAMP_TYPE), - INTERVAL_YEAR_MONTH_TYPE("INTERVAL_YEAR_MONTH", - java.sql.Types.OTHER, - TTypeId.INTERVAL_YEAR_MONTH_TYPE), - INTERVAL_DAY_TIME_TYPE("INTERVAL_DAY_TIME", - java.sql.Types.OTHER, - TTypeId.INTERVAL_DAY_TIME_TYPE), - BINARY_TYPE("BINARY", - java.sql.Types.BINARY, - TTypeId.BINARY_TYPE), - DECIMAL_TYPE("DECIMAL", - java.sql.Types.DECIMAL, - TTypeId.DECIMAL_TYPE, - true, false, false), - ARRAY_TYPE("ARRAY", - java.sql.Types.ARRAY, - TTypeId.ARRAY_TYPE, - true, true), - MAP_TYPE("MAP", - java.sql.Types.JAVA_OBJECT, - TTypeId.MAP_TYPE, - true, true), - STRUCT_TYPE("STRUCT", - java.sql.Types.STRUCT, - TTypeId.STRUCT_TYPE, - true, false), - UNION_TYPE("UNIONTYPE", - java.sql.Types.OTHER, - TTypeId.UNION_TYPE, - true, false), - USER_DEFINED_TYPE("USER_DEFINED", - java.sql.Types.OTHER, - TTypeId.USER_DEFINED_TYPE, - true, false); - - private final String name; - private final TTypeId tType; - private final int javaSQLType; - private final boolean isQualified; - private final boolean isComplex; - private final boolean isCollection; - - Type(String name, int javaSQLType, TTypeId tType, boolean isQualified, boolean isComplex, boolean isCollection) { - this.name = name; - this.javaSQLType = javaSQLType; - this.tType = tType; - this.isQualified = isQualified; - this.isComplex = isComplex; - this.isCollection = isCollection; - } - - Type(String name, int javaSQLType, TTypeId tType, boolean isComplex, boolean isCollection) { - this(name, javaSQLType, tType, false, isComplex, isCollection); - } - - Type(String name, int javaSqlType, TTypeId tType) { - this(name, javaSqlType, tType, false, false, false); - } - - public boolean isPrimitiveType() { - return !isComplex; - } - - public boolean isQualifiedType() { - return isQualified; - } - - public boolean isComplexType() { - return isComplex; - } - - public boolean isCollectionType() { - return isCollection; - } - - public static Type getType(TTypeId tType) { - for (Type type : values()) { - if (tType.equals(type.tType)) { - return type; - } - } - throw new IllegalArgumentException("Unregonized Thrift TTypeId value: " + tType); - } - - public static Type getType(String name) { - if (name == null) { - throw new IllegalArgumentException("Invalid type name: null"); - } - for (Type type : values()) { - if (name.equalsIgnoreCase(type.name)) { - return type; - } else if (type.isQualifiedType() || type.isComplexType()) { - if (name.toUpperCase(Locale.ROOT).startsWith(type.name)) { - return type; - } - } - } - throw new IllegalArgumentException("Unrecognized type name: " + name); - } - - /** - * Radix for this type (typically either 2 or 10) - * Null is returned for data types where this is not applicable. - */ - public Integer getNumPrecRadix() { - if (this.isNumericType()) { - return 10; - } - return null; - } - - /** - * Maximum precision for numeric types. - * Returns null for non-numeric types. - * @return - */ - public Integer getMaxPrecision() { - switch (this) { - case TINYINT_TYPE: - return 3; - case SMALLINT_TYPE: - return 5; - case INT_TYPE: - return 10; - case BIGINT_TYPE: - return 19; - case FLOAT_TYPE: - return 7; - case DOUBLE_TYPE: - return 15; - case DECIMAL_TYPE: - return HiveDecimal.MAX_PRECISION; - default: - return null; - } - } - - public boolean isNumericType() { - switch (this) { - case TINYINT_TYPE: - case SMALLINT_TYPE: - case INT_TYPE: - case BIGINT_TYPE: - case FLOAT_TYPE: - case DOUBLE_TYPE: - case DECIMAL_TYPE: - return true; - default: - return false; - } - } - - /** - * Prefix used to quote a literal of this type (may be null) - */ - public String getLiteralPrefix() { - return null; - } - - /** - * Suffix used to quote a literal of this type (may be null) - * @return - */ - public String getLiteralSuffix() { - return null; - } - - /** - * Can you use NULL for this type? - * @return - * DatabaseMetaData.typeNoNulls - does not allow NULL values - * DatabaseMetaData.typeNullable - allows NULL values - * DatabaseMetaData.typeNullableUnknown - nullability unknown - */ - public Short getNullable() { - // All Hive types are nullable - return DatabaseMetaData.typeNullable; - } - - /** - * Is the type case sensitive? - * @return - */ - public Boolean isCaseSensitive() { - switch (this) { - case STRING_TYPE: - return true; - default: - return false; - } - } - - /** - * Parameters used in creating the type (may be null) - * @return - */ - public String getCreateParams() { - return null; - } - - /** - * Can you use WHERE based on this type? - * @return - * DatabaseMetaData.typePredNone - No support - * DatabaseMetaData.typePredChar - Only support with WHERE .. LIKE - * DatabaseMetaData.typePredBasic - Supported except for WHERE .. LIKE - * DatabaseMetaData.typeSearchable - Supported for all WHERE .. - */ - public Short getSearchable() { - if (isPrimitiveType()) { - return DatabaseMetaData.typeSearchable; - } - return DatabaseMetaData.typePredNone; - } - - /** - * Is this type unsigned? - * @return - */ - public Boolean isUnsignedAttribute() { - if (isNumericType()) { - return false; - } - return true; - } - - /** - * Can this type represent money? - * @return - */ - public Boolean isFixedPrecScale() { - return false; - } - - /** - * Can this type be used for an auto-increment value? - * @return - */ - public Boolean isAutoIncrement() { - return false; - } - - /** - * Localized version of type name (may be null). - * @return - */ - public String getLocalizedName() { - return null; - } - - /** - * Minimum scale supported for this type - * @return - */ - public Short getMinimumScale() { - return 0; - } - - /** - * Maximum scale supported for this type - * @return - */ - public Short getMaximumScale() { - return 0; - } - - public TTypeId toTType() { - return tType; - } - - public int toJavaSQLType() { - return javaSQLType; - } - - public String getName() { - return name; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java deleted file mode 100644 index b80fd67884add..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.List; - -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hive.service.cli.thrift.TPrimitiveTypeEntry; -import org.apache.hive.service.cli.thrift.TTypeDesc; -import org.apache.hive.service.cli.thrift.TTypeEntry; - -/** - * TypeDescriptor. - * - */ -public class TypeDescriptor { - - private final Type type; - private String typeName = null; - private TypeQualifiers typeQualifiers = null; - - public TypeDescriptor(Type type) { - this.type = type; - } - - public TypeDescriptor(TTypeDesc tTypeDesc) { - List tTypeEntries = tTypeDesc.getTypes(); - TPrimitiveTypeEntry top = tTypeEntries.get(0).getPrimitiveEntry(); - this.type = Type.getType(top.getType()); - if (top.isSetTypeQualifiers()) { - setTypeQualifiers(TypeQualifiers.fromTTypeQualifiers(top.getTypeQualifiers())); - } - } - - public TypeDescriptor(String typeName) { - this.type = Type.getType(typeName); - if (this.type.isComplexType()) { - this.typeName = typeName; - } else if (this.type.isQualifiedType()) { - PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(typeName); - setTypeQualifiers(TypeQualifiers.fromTypeInfo(pti)); - } - } - - public Type getType() { - return type; - } - - public TTypeDesc toTTypeDesc() { - TPrimitiveTypeEntry primitiveEntry = new TPrimitiveTypeEntry(type.toTType()); - if (getTypeQualifiers() != null) { - primitiveEntry.setTypeQualifiers(getTypeQualifiers().toTTypeQualifiers()); - } - TTypeEntry entry = TTypeEntry.primitiveEntry(primitiveEntry); - - TTypeDesc desc = new TTypeDesc(); - desc.addToTypes(entry); - return desc; - } - - public String getTypeName() { - if (typeName != null) { - return typeName; - } else { - return type.getName(); - } - } - - public TypeQualifiers getTypeQualifiers() { - return typeQualifiers; - } - - public void setTypeQualifiers(TypeQualifiers typeQualifiers) { - this.typeQualifiers = typeQualifiers; - } - - /** - * The column size for this type. - * For numeric data this is the maximum precision. - * For character data this is the length in characters. - * For datetime types this is the length in characters of the String representation - * (assuming the maximum allowed precision of the fractional seconds component). - * For binary data this is the length in bytes. - * Null is returned for data types where the column size is not applicable. - */ - public Integer getColumnSize() { - if (type.isNumericType()) { - return getPrecision(); - } - switch (type) { - case STRING_TYPE: - case BINARY_TYPE: - return Integer.MAX_VALUE; - case CHAR_TYPE: - case VARCHAR_TYPE: - return typeQualifiers.getCharacterMaximumLength(); - case DATE_TYPE: - return 10; - case TIMESTAMP_TYPE: - return 29; - default: - return null; - } - } - - /** - * Maximum precision for numeric types. - * Returns null for non-numeric types. - * @return - */ - public Integer getPrecision() { - if (this.type == Type.DECIMAL_TYPE) { - return typeQualifiers.getPrecision(); - } - return this.type.getMaxPrecision(); - } - - /** - * The number of fractional digits for this type. - * Null is returned for data types where this is not applicable. - */ - public Integer getDecimalDigits() { - switch (this.type) { - case BOOLEAN_TYPE: - case TINYINT_TYPE: - case SMALLINT_TYPE: - case INT_TYPE: - case BIGINT_TYPE: - return 0; - case FLOAT_TYPE: - return 7; - case DOUBLE_TYPE: - return 15; - case DECIMAL_TYPE: - return typeQualifiers.getScale(); - case TIMESTAMP_TYPE: - return 9; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java deleted file mode 100644 index c6da52c15a2b5..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hive.service.cli.thrift.TCLIServiceConstants; -import org.apache.hive.service.cli.thrift.TTypeQualifierValue; -import org.apache.hive.service.cli.thrift.TTypeQualifiers; - -/** - * This class holds type qualifier information for a primitive type, - * such as char/varchar length or decimal precision/scale. - */ -public class TypeQualifiers { - private Integer characterMaximumLength; - private Integer precision; - private Integer scale; - - public TypeQualifiers() {} - - public Integer getCharacterMaximumLength() { - return characterMaximumLength; - } - public void setCharacterMaximumLength(int characterMaximumLength) { - this.characterMaximumLength = characterMaximumLength; - } - - public TTypeQualifiers toTTypeQualifiers() { - TTypeQualifiers ret = null; - - Map qMap = new HashMap(); - if (getCharacterMaximumLength() != null) { - TTypeQualifierValue val = new TTypeQualifierValue(); - val.setI32Value(getCharacterMaximumLength().intValue()); - qMap.put(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH, val); - } - - if (precision != null) { - TTypeQualifierValue val = new TTypeQualifierValue(); - val.setI32Value(precision.intValue()); - qMap.put(TCLIServiceConstants.PRECISION, val); - } - - if (scale != null) { - TTypeQualifierValue val = new TTypeQualifierValue(); - val.setI32Value(scale.intValue()); - qMap.put(TCLIServiceConstants.SCALE, val); - } - - if (qMap.size() > 0) { - ret = new TTypeQualifiers(qMap); - } - - return ret; - } - - public static TypeQualifiers fromTTypeQualifiers(TTypeQualifiers ttq) { - TypeQualifiers ret = null; - if (ttq != null) { - ret = new TypeQualifiers(); - Map tqMap = ttq.getQualifiers(); - - if (tqMap.containsKey(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH)) { - ret.setCharacterMaximumLength( - tqMap.get(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH).getI32Value()); - } - - if (tqMap.containsKey(TCLIServiceConstants.PRECISION)) { - ret.setPrecision(tqMap.get(TCLIServiceConstants.PRECISION).getI32Value()); - } - - if (tqMap.containsKey(TCLIServiceConstants.SCALE)) { - ret.setScale(tqMap.get(TCLIServiceConstants.SCALE).getI32Value()); - } - } - return ret; - } - - public static TypeQualifiers fromTypeInfo(PrimitiveTypeInfo pti) { - TypeQualifiers result = null; - if (pti instanceof VarcharTypeInfo) { - result = new TypeQualifiers(); - result.setCharacterMaximumLength(((VarcharTypeInfo)pti).getLength()); - } else if (pti instanceof CharTypeInfo) { - result = new TypeQualifiers(); - result.setCharacterMaximumLength(((CharTypeInfo)pti).getLength()); - } else if (pti instanceof DecimalTypeInfo) { - result = new TypeQualifiers(); - result.setPrecision(((DecimalTypeInfo)pti).precision()); - result.setScale(((DecimalTypeInfo)pti).scale()); - } - return result; - } - - public Integer getPrecision() { - return precision; - } - - public void setPrecision(Integer precision) { - this.precision = precision; - } - - public Integer getScale() { - return scale; - } - - public void setScale(Integer scale) { - this.scale = scale; - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java deleted file mode 100644 index af36057bdaeca..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.hive.metastore.TableType; - -/** - * ClassicTableTypeMapping. - * Classic table type mapping : - * Managed Table to Table - * External Table to Table - * Virtual View to View - */ -public class ClassicTableTypeMapping implements TableTypeMapping { - - public enum ClassicTableTypes { - TABLE, - VIEW, - } - - private final Map hiveToClientMap = new HashMap(); - private final Map clientToHiveMap = new HashMap(); - - public ClassicTableTypeMapping() { - hiveToClientMap.put(TableType.MANAGED_TABLE.toString(), - ClassicTableTypes.TABLE.toString()); - hiveToClientMap.put(TableType.EXTERNAL_TABLE.toString(), - ClassicTableTypes.TABLE.toString()); - hiveToClientMap.put(TableType.VIRTUAL_VIEW.toString(), - ClassicTableTypes.VIEW.toString()); - - clientToHiveMap.put(ClassicTableTypes.TABLE.toString(), - TableType.MANAGED_TABLE.toString()); - clientToHiveMap.put(ClassicTableTypes.VIEW.toString(), - TableType.VIRTUAL_VIEW.toString()); - } - - @Override - public String mapToHiveType(String clientTypeName) { - if (clientToHiveMap.containsKey(clientTypeName)) { - return clientToHiveMap.get(clientTypeName); - } else { - return clientTypeName; - } - } - - @Override - public String mapToClientType(String hiveTypeName) { - if (hiveToClientMap.containsKey(hiveTypeName)) { - return hiveToClientMap.get(hiveTypeName); - } else { - return hiveTypeName; - } - } - - @Override - public Set getTableTypeNames() { - Set typeNameSet = new HashSet(); - for (ClassicTableTypes typeNames : ClassicTableTypes.values()) { - typeNameSet.add(typeNames.toString()); - } - return typeNameSet; - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java deleted file mode 100644 index 6740d3bb59dc3..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli.operation; - -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.hive.ql.processors.CommandProcessor; -import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; -import org.apache.hadoop.hive.ql.session.OperationLog; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.session.HiveSession; - -public abstract class ExecuteStatementOperation extends Operation { - protected String statement = null; - protected Map confOverlay = new HashMap(); - - public ExecuteStatementOperation(HiveSession parentSession, String statement, - Map confOverlay, boolean runInBackground) { - super(parentSession, OperationType.EXECUTE_STATEMENT, runInBackground); - this.statement = statement; - setConfOverlay(confOverlay); - } - - public String getStatement() { - return statement; - } - - public static ExecuteStatementOperation newExecuteStatementOperation( - HiveSession parentSession, String statement, Map confOverlay, boolean runAsync) - throws HiveSQLException { - String[] tokens = statement.trim().split("\\s+"); - CommandProcessor processor = null; - try { - processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf()); - } catch (SQLException e) { - throw new HiveSQLException(e.getMessage(), e.getSQLState(), e); - } - if (processor == null) { - return new SQLOperation(parentSession, statement, confOverlay, runAsync); - } - return new HiveCommandOperation(parentSession, statement, processor, confOverlay); - } - - protected Map getConfOverlay() { - return confOverlay; - } - - protected void setConfOverlay(Map confOverlay) { - if (confOverlay != null) { - this.confOverlay = confOverlay; - } - } - - protected void registerCurrentOperationLog() { - if (isOperationLogEnabled) { - if (operationLog == null) { - LOG.warn("Failed to get current OperationLog object of Operation: " + - getHandle().getHandleIdentifier()); - isOperationLogEnabled = false; - return; - } - OperationLog.setCurrentOperationLog(operationLog); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java deleted file mode 100644 index 581d975344060..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetCatalogsOperation. - * - */ -public class GetCatalogsOperation extends MetadataOperation { - private static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable."); - - protected final RowSet rowSet; - - protected GetCatalogsOperation(HiveSession parentSession) { - super(parentSession, OperationType.GET_CATALOGS); - rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - try { - if (isAuthV2Enabled()) { - authorizeMetaGets(HiveOperationType.GET_CATALOGS, null); - } - setState(OperationState.FINISHED); - } catch (HiveSQLException e) { - setState(OperationState.ERROR); - throw e; - } - - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java deleted file mode 100644 index 96ba4890075ac..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ /dev/null @@ -1,234 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.sql.DatabaseMetaData; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.regex.Pattern; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; -import org.apache.hive.service.cli.ColumnDescriptor; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.Type; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetColumnsOperation. - * - */ -public class GetColumnsOperation extends MetadataOperation { - - protected static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addPrimitiveColumn("TABLE_CAT", Type.STRING_TYPE, - "Catalog name. NULL if not applicable") - .addPrimitiveColumn("TABLE_SCHEM", Type.STRING_TYPE, - "Schema name") - .addPrimitiveColumn("TABLE_NAME", Type.STRING_TYPE, - "Table name") - .addPrimitiveColumn("COLUMN_NAME", Type.STRING_TYPE, - "Column name") - .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE, - "SQL type from java.sql.Types") - .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE, - "Data source dependent type name, for a UDT the type name is fully qualified") - .addPrimitiveColumn("COLUMN_SIZE", Type.INT_TYPE, - "Column size. For char or date types this is the maximum number of characters," - + " for numeric or decimal types this is precision.") - .addPrimitiveColumn("BUFFER_LENGTH", Type.TINYINT_TYPE, - "Unused") - .addPrimitiveColumn("DECIMAL_DIGITS", Type.INT_TYPE, - "The number of fractional digits") - .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE, - "Radix (typically either 10 or 2)") - .addPrimitiveColumn("NULLABLE", Type.INT_TYPE, - "Is NULL allowed") - .addPrimitiveColumn("REMARKS", Type.STRING_TYPE, - "Comment describing column (may be null)") - .addPrimitiveColumn("COLUMN_DEF", Type.STRING_TYPE, - "Default value (may be null)") - .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE, - "Unused") - .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE, - "Unused") - .addPrimitiveColumn("CHAR_OCTET_LENGTH", Type.INT_TYPE, - "For char types the maximum number of bytes in the column") - .addPrimitiveColumn("ORDINAL_POSITION", Type.INT_TYPE, - "Index of column in table (starting at 1)") - .addPrimitiveColumn("IS_NULLABLE", Type.STRING_TYPE, - "\"NO\" means column definitely does not allow NULL values; " - + "\"YES\" means the column might allow NULL values. An empty " - + "string means nobody knows.") - .addPrimitiveColumn("SCOPE_CATALOG", Type.STRING_TYPE, - "Catalog of table that is the scope of a reference attribute " - + "(null if DATA_TYPE isn't REF)") - .addPrimitiveColumn("SCOPE_SCHEMA", Type.STRING_TYPE, - "Schema of table that is the scope of a reference attribute " - + "(null if the DATA_TYPE isn't REF)") - .addPrimitiveColumn("SCOPE_TABLE", Type.STRING_TYPE, - "Table name that this the scope of a reference attribure " - + "(null if the DATA_TYPE isn't REF)") - .addPrimitiveColumn("SOURCE_DATA_TYPE", Type.SMALLINT_TYPE, - "Source type of a distinct type or user-generated Ref type, " - + "SQL type from java.sql.Types (null if DATA_TYPE isn't DISTINCT or user-generated REF)") - .addPrimitiveColumn("IS_AUTO_INCREMENT", Type.STRING_TYPE, - "Indicates whether this column is auto incremented."); - - private final String catalogName; - private final String schemaName; - private final String tableName; - private final String columnName; - - protected final RowSet rowSet; - - protected GetColumnsOperation(HiveSession parentSession, String catalogName, String schemaName, - String tableName, String columnName) { - super(parentSession, OperationType.GET_COLUMNS); - this.catalogName = catalogName; - this.schemaName = schemaName; - this.tableName = tableName; - this.columnName = columnName; - this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - try { - IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient(); - String schemaPattern = convertSchemaPattern(schemaName); - String tablePattern = convertIdentifierPattern(tableName, true); - - Pattern columnPattern = null; - if (columnName != null) { - columnPattern = Pattern.compile(convertIdentifierPattern(columnName, false)); - } - - List dbNames = metastoreClient.getDatabases(schemaPattern); - Collections.sort(dbNames); - Map> db2Tabs = new HashMap<>(); - - for (String dbName : dbNames) { - List tableNames = metastoreClient.getTables(dbName, tablePattern); - Collections.sort(tableNames); - db2Tabs.put(dbName, tableNames); - } - - if (isAuthV2Enabled()) { - List privObjs = getPrivObjs(db2Tabs); - String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName - + ", tablePattern : " + tableName; - authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr); - } - - for (Entry> dbTabs : db2Tabs.entrySet()) { - String dbName = dbTabs.getKey(); - List tableNames = dbTabs.getValue(); - for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) { - TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName())); - for (ColumnDescriptor column : schema.getColumnDescriptors()) { - if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) { - continue; - } - Object[] rowData = new Object[] { - null, // TABLE_CAT - table.getDbName(), // TABLE_SCHEM - table.getTableName(), // TABLE_NAME - column.getName(), // COLUMN_NAME - column.getType().toJavaSQLType(), // DATA_TYPE - column.getTypeName(), // TYPE_NAME - column.getTypeDescriptor().getColumnSize(), // COLUMN_SIZE - null, // BUFFER_LENGTH, unused - column.getTypeDescriptor().getDecimalDigits(), // DECIMAL_DIGITS - column.getType().getNumPrecRadix(), // NUM_PREC_RADIX - DatabaseMetaData.columnNullable, // NULLABLE - column.getComment(), // REMARKS - null, // COLUMN_DEF - null, // SQL_DATA_TYPE - null, // SQL_DATETIME_SUB - null, // CHAR_OCTET_LENGTH - column.getOrdinalPosition(), // ORDINAL_POSITION - "YES", // IS_NULLABLE - null, // SCOPE_CATALOG - null, // SCOPE_SCHEMA - null, // SCOPE_TABLE - null, // SOURCE_DATA_TYPE - "NO", // IS_AUTO_INCREMENT - }; - rowSet.addRow(rowData); - } - } - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - - } - - - private List getPrivObjs(Map> db2Tabs) { - List privObjs = new ArrayList<>(); - for (Entry> dbTabs : db2Tabs.entrySet()) { - for (String tabName : dbTabs.getValue()) { - privObjs.add(new HivePrivilegeObject(HivePrivilegeObjectType.TABLE_OR_VIEW, dbTabs.getKey(), - tabName)); - } - } - return privObjs; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java deleted file mode 100644 index 5dec8bdbf45de..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java +++ /dev/null @@ -1,147 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.sql.DatabaseMetaData; -import java.util.List; -import java.util.Set; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils; -import org.apache.hive.service.cli.CLIServiceUtils; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.Type; -import org.apache.hive.service.cli.session.HiveSession; -import org.apache.thrift.TException; - -/** - * GetFunctionsOperation. - * - */ -public class GetFunctionsOperation extends MetadataOperation { - private static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addPrimitiveColumn("FUNCTION_CAT", Type.STRING_TYPE, - "Function catalog (may be null)") - .addPrimitiveColumn("FUNCTION_SCHEM", Type.STRING_TYPE, - "Function schema (may be null)") - .addPrimitiveColumn("FUNCTION_NAME", Type.STRING_TYPE, - "Function name. This is the name used to invoke the function") - .addPrimitiveColumn("REMARKS", Type.STRING_TYPE, - "Explanatory comment on the function") - .addPrimitiveColumn("FUNCTION_TYPE", Type.INT_TYPE, - "Kind of function.") - .addPrimitiveColumn("SPECIFIC_NAME", Type.STRING_TYPE, - "The name which uniquely identifies this function within its schema"); - - private final String catalogName; - private final String schemaName; - private final String functionName; - - protected final RowSet rowSet; - - public GetFunctionsOperation(HiveSession parentSession, - String catalogName, String schemaName, String functionName) { - super(parentSession, OperationType.GET_FUNCTIONS); - this.catalogName = catalogName; - this.schemaName = schemaName; - this.functionName = functionName; - this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - if (isAuthV2Enabled()) { - // get databases for schema pattern - IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient(); - String schemaPattern = convertSchemaPattern(schemaName); - List matchingDbs; - try { - matchingDbs = metastoreClient.getDatabases(schemaPattern); - } catch (TException e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - // authorize this call on the schema objects - List privObjs = HivePrivilegeObjectUtils - .getHivePrivDbObjects(matchingDbs); - String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName; - authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr); - } - - try { - if ((null == catalogName || "".equals(catalogName)) - && (null == schemaName || "".equals(schemaName))) { - Set functionNames = FunctionRegistry - .getFunctionNames(CLIServiceUtils.patternToRegex(functionName)); - for (String functionName : functionNames) { - FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo(functionName); - Object[] rowData = new Object[] { - null, // FUNCTION_CAT - null, // FUNCTION_SCHEM - functionInfo.getDisplayName(), // FUNCTION_NAME - "", // REMARKS - (functionInfo.isGenericUDTF() ? - DatabaseMetaData.functionReturnsTable - : DatabaseMetaData.functionNoTable), // FUNCTION_TYPE - functionInfo.getClass().getCanonicalName() - }; - rowSet.addRow(rowData); - } - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java deleted file mode 100644 index 3516bc2ba242c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetSchemasOperation. - * - */ -public class GetSchemasOperation extends MetadataOperation { - private final String catalogName; - private final String schemaName; - - private static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addStringColumn("TABLE_SCHEM", "Schema name.") - .addStringColumn("TABLE_CATALOG", "Catalog name."); - - protected RowSet rowSet; - - protected GetSchemasOperation(HiveSession parentSession, - String catalogName, String schemaName) { - super(parentSession, OperationType.GET_SCHEMAS); - this.catalogName = catalogName; - this.schemaName = schemaName; - this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - if (isAuthV2Enabled()) { - String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName; - authorizeMetaGets(HiveOperationType.GET_SCHEMAS, null, cmdStr); - } - try { - IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient(); - String schemaPattern = convertSchemaPattern(schemaName); - for (String dbName : metastoreClient.getDatabases(schemaPattern)) { - rowSet.addRow(new Object[] {dbName, DEFAULT_HIVE_CATALOG}); - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java deleted file mode 100644 index b372f55cedd1c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetTableTypesOperation. - * - */ -public class GetTableTypesOperation extends MetadataOperation { - - protected static TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addStringColumn("TABLE_TYPE", "Table type name."); - - protected final RowSet rowSet; - private final TableTypeMapping tableTypeMapping; - - protected GetTableTypesOperation(HiveSession parentSession) { - super(parentSession, OperationType.GET_TABLE_TYPES); - String tableMappingStr = getParentSession().getHiveConf() - .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING); - tableTypeMapping = - TableTypeMappingFactory.getTableTypeMapping(tableMappingStr); - rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - if (isAuthV2Enabled()) { - authorizeMetaGets(HiveOperationType.GET_TABLETYPES, null); - } - try { - for (TableType type : TableType.values()) { - rowSet.addRow(new String[] {tableTypeMapping.mapToClientType(type.toString())}); - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java deleted file mode 100644 index 2af17a662a296..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetTablesOperation. - * - */ -public class GetTablesOperation extends MetadataOperation { - - private final String catalogName; - private final String schemaName; - private final String tableName; - private final List tableTypes = new ArrayList(); - protected final RowSet rowSet; - private final TableTypeMapping tableTypeMapping; - - - private static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable.") - .addStringColumn("TABLE_SCHEM", "Schema name.") - .addStringColumn("TABLE_NAME", "Table name.") - .addStringColumn("TABLE_TYPE", "The table type, e.g. \"TABLE\", \"VIEW\", etc.") - .addStringColumn("REMARKS", "Comments about the table."); - - protected GetTablesOperation(HiveSession parentSession, - String catalogName, String schemaName, String tableName, - List tableTypes) { - super(parentSession, OperationType.GET_TABLES); - this.catalogName = catalogName; - this.schemaName = schemaName; - this.tableName = tableName; - String tableMappingStr = getParentSession().getHiveConf() - .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING); - tableTypeMapping = - TableTypeMappingFactory.getTableTypeMapping(tableMappingStr); - if (tableTypes != null) { - this.tableTypes.addAll(tableTypes); - } - this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - try { - IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient(); - String schemaPattern = convertSchemaPattern(schemaName); - List matchingDbs = metastoreClient.getDatabases(schemaPattern); - if(isAuthV2Enabled()){ - List privObjs = HivePrivilegeObjectUtils.getHivePrivDbObjects(matchingDbs); - String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName; - authorizeMetaGets(HiveOperationType.GET_TABLES, privObjs, cmdStr); - } - - String tablePattern = convertIdentifierPattern(tableName, true); - for (String dbName : metastoreClient.getDatabases(schemaPattern)) { - List tableNames = metastoreClient.getTables(dbName, tablePattern); - for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) { - Object[] rowData = new Object[] { - DEFAULT_HIVE_CATALOG, - table.getDbName(), - table.getTableName(), - tableTypeMapping.mapToClientType(table.getTableType()), - table.getParameters().get("comment") - }; - if (tableTypes.isEmpty() || tableTypes.contains( - tableTypeMapping.mapToClientType(table.getTableType()))) { - rowSet.addRow(rowData); - } - } - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java deleted file mode 100644 index 3e81f8afbd85f..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.Type; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * GetTypeInfoOperation. - * - */ -public class GetTypeInfoOperation extends MetadataOperation { - - private static final TableSchema RESULT_SET_SCHEMA = new TableSchema() - .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE, - "Type name") - .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE, - "SQL data type from java.sql.Types") - .addPrimitiveColumn("PRECISION", Type.INT_TYPE, - "Maximum precision") - .addPrimitiveColumn("LITERAL_PREFIX", Type.STRING_TYPE, - "Prefix used to quote a literal (may be null)") - .addPrimitiveColumn("LITERAL_SUFFIX", Type.STRING_TYPE, - "Suffix used to quote a literal (may be null)") - .addPrimitiveColumn("CREATE_PARAMS", Type.STRING_TYPE, - "Parameters used in creating the type (may be null)") - .addPrimitiveColumn("NULLABLE", Type.SMALLINT_TYPE, - "Can you use NULL for this type") - .addPrimitiveColumn("CASE_SENSITIVE", Type.BOOLEAN_TYPE, - "Is it case sensitive") - .addPrimitiveColumn("SEARCHABLE", Type.SMALLINT_TYPE, - "Can you use \"WHERE\" based on this type") - .addPrimitiveColumn("UNSIGNED_ATTRIBUTE", Type.BOOLEAN_TYPE, - "Is it unsigned") - .addPrimitiveColumn("FIXED_PREC_SCALE", Type.BOOLEAN_TYPE, - "Can it be a money value") - .addPrimitiveColumn("AUTO_INCREMENT", Type.BOOLEAN_TYPE, - "Can it be used for an auto-increment value") - .addPrimitiveColumn("LOCAL_TYPE_NAME", Type.STRING_TYPE, - "Localized version of type name (may be null)") - .addPrimitiveColumn("MINIMUM_SCALE", Type.SMALLINT_TYPE, - "Minimum scale supported") - .addPrimitiveColumn("MAXIMUM_SCALE", Type.SMALLINT_TYPE, - "Maximum scale supported") - .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE, - "Unused") - .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE, - "Unused") - .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE, - "Usually 2 or 10"); - - protected final RowSet rowSet; - - protected GetTypeInfoOperation(HiveSession parentSession) { - super(parentSession, OperationType.GET_TYPE_INFO); - rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion()); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - if (isAuthV2Enabled()) { - authorizeMetaGets(HiveOperationType.GET_TYPEINFO, null); - } - try { - for (Type type : Type.values()) { - Object[] rowData = new Object[] { - type.getName(), // TYPE_NAME - type.toJavaSQLType(), // DATA_TYPE - type.getMaxPrecision(), // PRECISION - type.getLiteralPrefix(), // LITERAL_PREFIX - type.getLiteralSuffix(), // LITERAL_SUFFIX - type.getCreateParams(), // CREATE_PARAMS - type.getNullable(), // NULLABLE - type.isCaseSensitive(), // CASE_SENSITIVE - type.getSearchable(), // SEARCHABLE - type.isUnsignedAttribute(), // UNSIGNED_ATTRIBUTE - type.isFixedPrecScale(), // FIXED_PREC_SCALE - type.isAutoIncrement(), // AUTO_INCREMENT - type.getLocalizedName(), // LOCAL_TYPE_NAME - type.getMinimumScale(), // MINIMUM_SCALE - type.getMaximumScale(), // MAXIMUM_SCALE - null, // SQL_DATA_TYPE, unused - null, // SQL_DATETIME_SUB, unused - type.getNumPrecRadix() //NUM_PREC_RADIX - }; - rowSet.addRow(rowData); - } - setState(OperationState.FINISHED); - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException(e); - } - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - return RESULT_SET_SCHEMA; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - assertState(OperationState.FINISHED); - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - rowSet.setStartOffset(0); - } - return rowSet.extractSubset((int)maxRows); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java deleted file mode 100644 index 5b6e6ad042412..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java +++ /dev/null @@ -1,215 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static java.nio.charset.StandardCharsets.UTF_8; - -import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.processors.CommandProcessor; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.IOUtils; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * Executes a HiveCommand - */ -public class HiveCommandOperation extends ExecuteStatementOperation { - private CommandProcessor commandProcessor; - private TableSchema resultSchema = null; - - /** - * For processors other than Hive queries (Driver), they output to session.out (a temp file) - * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn. - */ - private BufferedReader resultReader; - - - protected HiveCommandOperation(HiveSession parentSession, String statement, - CommandProcessor commandProcessor, Map confOverlay) { - super(parentSession, statement, confOverlay, false); - this.commandProcessor = commandProcessor; - setupSessionIO(parentSession.getSessionState()); - } - - private void setupSessionIO(SessionState sessionState) { - try { - LOG.info("Putting temp output to file " + sessionState.getTmpOutputFile().toString()); - sessionState.in = null; // hive server's session input stream is not used - // open a per-session file in auto-flush mode for writing temp results - sessionState.out = new PrintStream(new FileOutputStream(sessionState.getTmpOutputFile()), true, UTF_8.name()); - // TODO: for hadoop jobs, progress is printed out to session.err, - // we should find a way to feed back job progress to client - sessionState.err = new PrintStream(System.err, true, UTF_8.name()); - } catch (IOException e) { - LOG.error("Error in creating temp output file ", e); - try { - sessionState.in = null; - sessionState.out = new PrintStream(System.out, true, UTF_8.name()); - sessionState.err = new PrintStream(System.err, true, UTF_8.name()); - } catch (UnsupportedEncodingException ee) { - LOG.error("Error creating PrintStream", e); - ee.printStackTrace(); - sessionState.out = null; - sessionState.err = null; - } - } - } - - - private void tearDownSessionIO() { - IOUtils.cleanup(LOG, parentSession.getSessionState().out); - IOUtils.cleanup(LOG, parentSession.getSessionState().err); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.RUNNING); - try { - String command = getStatement().trim(); - String[] tokens = statement.split("\\s"); - String commandArgs = command.substring(tokens[0].length()).trim(); - - CommandProcessorResponse response = commandProcessor.run(commandArgs); - int returnCode = response.getResponseCode(); - if (returnCode != 0) { - throw toSQLException("Error while processing statement", response); - } - Schema schema = response.getSchema(); - if (schema != null) { - setHasResultSet(true); - resultSchema = new TableSchema(schema); - } else { - setHasResultSet(false); - resultSchema = new TableSchema(); - } - } catch (HiveSQLException e) { - setState(OperationState.ERROR); - throw e; - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException("Error running query: " + e.toString(), e); - } - setState(OperationState.FINISHED); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.operation.Operation#close() - */ - @Override - public void close() throws HiveSQLException { - setState(OperationState.CLOSED); - tearDownSessionIO(); - cleanTmpFile(); - cleanupOperationLog(); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.operation.Operation#getResultSetSchema() - */ - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - return resultSchema; - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.operation.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long) - */ - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - validateDefaultFetchOrientation(orientation); - if (orientation.equals(FetchOrientation.FETCH_FIRST)) { - resetResultReader(); - } - List rows = readResults((int) maxRows); - RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion()); - - for (String row : rows) { - rowSet.addRow(new String[] {row}); - } - return rowSet; - } - - /** - * Reads the temporary results for non-Hive (non-Driver) commands to the - * resulting List of strings. - * @param nLines number of lines read at once. If it is <= 0, then read all lines. - */ - private List readResults(int nLines) throws HiveSQLException { - if (resultReader == null) { - SessionState sessionState = getParentSession().getSessionState(); - File tmp = sessionState.getTmpOutputFile(); - try { - resultReader = new BufferedReader(new FileReader(tmp)); - } catch (FileNotFoundException e) { - LOG.error("File " + tmp + " not found. ", e); - throw new HiveSQLException(e); - } - } - List results = new ArrayList(); - - for (int i = 0; i < nLines || nLines <= 0; ++i) { - try { - String line = resultReader.readLine(); - if (line == null) { - // reached the end of the result file - break; - } else { - results.add(line); - } - } catch (IOException e) { - LOG.error("Reading temp results encountered an exception: ", e); - throw new HiveSQLException(e); - } - } - return results; - } - - private void cleanTmpFile() { - resetResultReader(); - SessionState sessionState = getParentSession().getSessionState(); - File tmp = sessionState.getTmpOutputFile(); - tmp.delete(); - } - - private void resetResultReader() { - if (resultReader != null) { - IOUtils.cleanup(LOG, resultReader); - resultReader = null; - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java deleted file mode 100644 index b530f217125b8..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.util.HashSet; -import java.util.Set; - -import org.apache.hadoop.hive.metastore.TableType; - -/** - * HiveTableTypeMapping. - * Default table type mapping - * - */ -public class HiveTableTypeMapping implements TableTypeMapping { - - @Override - public String mapToHiveType(String clientTypeName) { - return clientTypeName; - } - - @Override - public String mapToClientType(String hiveTypeName) { - return hiveTypeName; - } - - @Override - public Set getTableTypeNames() { - Set typeNameSet = new HashSet(); - for (TableType typeNames : TableType.values()) { - typeNameSet.add(typeNames.toString()); - } - return typeNameSet; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java deleted file mode 100644 index 6c819876a556d..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; - -/** - * MetadataOperation. - * - */ -public abstract class MetadataOperation extends Operation { - - protected static final String DEFAULT_HIVE_CATALOG = ""; - protected static TableSchema RESULT_SET_SCHEMA; - private static final char SEARCH_STRING_ESCAPE = '\\'; - - protected MetadataOperation(HiveSession parentSession, OperationType opType) { - super(parentSession, opType, false); - setHasResultSet(true); - } - - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.Operation#close() - */ - @Override - public void close() throws HiveSQLException { - setState(OperationState.CLOSED); - cleanupOperationLog(); - } - - /** - * Convert wildchars and escape sequence from JDBC format to datanucleous/regex - */ - protected String convertIdentifierPattern(final String pattern, boolean datanucleusFormat) { - if (pattern == null) { - return convertPattern("%", true); - } else { - return convertPattern(pattern, datanucleusFormat); - } - } - - /** - * Convert wildchars and escape sequence of schema pattern from JDBC format to datanucleous/regex - * The schema pattern treats empty string also as wildchar - */ - protected String convertSchemaPattern(final String pattern) { - if ((pattern == null) || pattern.isEmpty()) { - return convertPattern("%", true); - } else { - return convertPattern(pattern, true); - } - } - - /** - * Convert a pattern containing JDBC catalog search wildcards into - * Java regex patterns. - * - * @param pattern input which may contain '%' or '_' wildcard characters, or - * these characters escaped using {@link #getSearchStringEscape()}. - * @return replace %/_ with regex search characters, also handle escaped - * characters. - * - * The datanucleus module expects the wildchar as '*'. The columns search on the - * other hand is done locally inside the hive code and that requires the regex wildchar - * format '.*' This is driven by the datanucleusFormat flag. - */ - private String convertPattern(final String pattern, boolean datanucleusFormat) { - String wStr; - if (datanucleusFormat) { - wStr = "*"; - } else { - wStr = ".*"; - } - return pattern - .replaceAll("([^\\\\])%", "$1" + wStr).replaceAll("\\\\%", "%").replaceAll("^%", wStr) - .replaceAll("([^\\\\])_", "$1.").replaceAll("\\\\_", "_").replaceAll("^_", "."); - } - - protected boolean isAuthV2Enabled(){ - SessionState ss = SessionState.get(); - return (ss.isAuthorizationModeV2() && - HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)); - } - - protected void authorizeMetaGets(HiveOperationType opType, List inpObjs) - throws HiveSQLException { - authorizeMetaGets(opType, inpObjs, null); - } - - protected void authorizeMetaGets(HiveOperationType opType, List inpObjs, - String cmdString) throws HiveSQLException { - SessionState ss = SessionState.get(); - HiveAuthzContext.Builder ctxBuilder = new HiveAuthzContext.Builder(); - ctxBuilder.setUserIpAddress(ss.getUserIpAddress()); - ctxBuilder.setCommandString(cmdString); - try { - ss.getAuthorizerV2().checkPrivileges(opType, inpObjs, null, - ctxBuilder.build()); - } catch (HiveAuthzPluginException | HiveAccessControlException e) { - throw new HiveSQLException(e.getMessage(), e); - } - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java deleted file mode 100644 index 4b331423948fa..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java +++ /dev/null @@ -1,328 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli.operation; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.EnumSet; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.OperationLog; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationHandle; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationStatus; -import org.apache.hive.service.cli.OperationType; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; -import org.apache.hive.service.cli.thrift.TProtocolVersion; - -public abstract class Operation { - protected final HiveSession parentSession; - private OperationState state = OperationState.INITIALIZED; - private final OperationHandle opHandle; - private HiveConf configuration; - public static final Log LOG = LogFactory.getLog(Operation.class.getName()); - public static final FetchOrientation DEFAULT_FETCH_ORIENTATION = FetchOrientation.FETCH_NEXT; - public static final long DEFAULT_FETCH_MAX_ROWS = 100; - protected boolean hasResultSet; - protected volatile HiveSQLException operationException; - protected final boolean runAsync; - protected volatile Future backgroundHandle; - protected OperationLog operationLog; - protected boolean isOperationLogEnabled; - - private long operationTimeout; - private long lastAccessTime; - - protected static final EnumSet DEFAULT_FETCH_ORIENTATION_SET = - EnumSet.of( - FetchOrientation.FETCH_NEXT, - FetchOrientation.FETCH_FIRST, - FetchOrientation.FETCH_PRIOR); - - protected Operation(HiveSession parentSession, OperationType opType, boolean runInBackground) { - this.parentSession = parentSession; - this.runAsync = runInBackground; - this.opHandle = new OperationHandle(opType, parentSession.getProtocolVersion()); - lastAccessTime = System.currentTimeMillis(); - operationTimeout = HiveConf.getTimeVar(parentSession.getHiveConf(), - HiveConf.ConfVars.HIVE_SERVER2_IDLE_OPERATION_TIMEOUT, TimeUnit.MILLISECONDS); - } - - public Future getBackgroundHandle() { - return backgroundHandle; - } - - protected void setBackgroundHandle(Future backgroundHandle) { - this.backgroundHandle = backgroundHandle; - } - - public boolean shouldRunAsync() { - return runAsync; - } - - public void setConfiguration(HiveConf configuration) { - this.configuration = new HiveConf(configuration); - } - - public HiveConf getConfiguration() { - return new HiveConf(configuration); - } - - public HiveSession getParentSession() { - return parentSession; - } - - public OperationHandle getHandle() { - return opHandle; - } - - public TProtocolVersion getProtocolVersion() { - return opHandle.getProtocolVersion(); - } - - public OperationType getType() { - return opHandle.getOperationType(); - } - - public OperationStatus getStatus() { - return new OperationStatus(state, operationException); - } - - public boolean hasResultSet() { - return hasResultSet; - } - - protected void setHasResultSet(boolean hasResultSet) { - this.hasResultSet = hasResultSet; - opHandle.setHasResultSet(hasResultSet); - } - - public OperationLog getOperationLog() { - return operationLog; - } - - protected final OperationState setState(OperationState newState) throws HiveSQLException { - state.validateTransition(newState); - this.state = newState; - this.lastAccessTime = System.currentTimeMillis(); - return this.state; - } - - public boolean isTimedOut(long current) { - if (operationTimeout == 0) { - return false; - } - if (operationTimeout > 0) { - // check only when it's in terminal state - return state.isTerminal() && lastAccessTime + operationTimeout <= current; - } - return lastAccessTime + -operationTimeout <= current; - } - - public long getLastAccessTime() { - return lastAccessTime; - } - - public long getOperationTimeout() { - return operationTimeout; - } - - public void setOperationTimeout(long operationTimeout) { - this.operationTimeout = operationTimeout; - } - - protected void setOperationException(HiveSQLException operationException) { - this.operationException = operationException; - } - - protected final void assertState(OperationState state) throws HiveSQLException { - if (this.state != state) { - throw new HiveSQLException("Expected state " + state + ", but found " + this.state); - } - this.lastAccessTime = System.currentTimeMillis(); - } - - public boolean isRunning() { - return OperationState.RUNNING.equals(state); - } - - public boolean isFinished() { - return OperationState.FINISHED.equals(state); - } - - public boolean isCanceled() { - return OperationState.CANCELED.equals(state); - } - - public boolean isFailed() { - return OperationState.ERROR.equals(state); - } - - protected void createOperationLog() { - if (parentSession.isOperationLogEnabled()) { - File operationLogFile = new File(parentSession.getOperationLogSessionDir(), - opHandle.getHandleIdentifier().toString()); - isOperationLogEnabled = true; - - // create log file - try { - if (operationLogFile.exists()) { - LOG.warn("The operation log file should not exist, but it is already there: " + - operationLogFile.getAbsolutePath()); - operationLogFile.delete(); - } - if (!operationLogFile.createNewFile()) { - // the log file already exists and cannot be deleted. - // If it can be read/written, keep its contents and use it. - if (!operationLogFile.canRead() || !operationLogFile.canWrite()) { - LOG.warn("The already existed operation log file cannot be recreated, " + - "and it cannot be read or written: " + operationLogFile.getAbsolutePath()); - isOperationLogEnabled = false; - return; - } - } - } catch (Exception e) { - LOG.warn("Unable to create operation log file: " + operationLogFile.getAbsolutePath(), e); - isOperationLogEnabled = false; - return; - } - - // create OperationLog object with above log file - try { - operationLog = new OperationLog(opHandle.toString(), operationLogFile, parentSession.getHiveConf()); - } catch (FileNotFoundException e) { - LOG.warn("Unable to instantiate OperationLog object for operation: " + - opHandle, e); - isOperationLogEnabled = false; - return; - } - - // register this operationLog to current thread - OperationLog.setCurrentOperationLog(operationLog); - } - } - - protected void unregisterOperationLog() { - if (isOperationLogEnabled) { - OperationLog.removeCurrentOperationLog(); - } - } - - /** - * Invoked before runInternal(). - * Set up some preconditions, or configurations. - */ - protected void beforeRun() { - createOperationLog(); - } - - /** - * Invoked after runInternal(), even if an exception is thrown in runInternal(). - * Clean up resources, which was set up in beforeRun(). - */ - protected void afterRun() { - unregisterOperationLog(); - } - - /** - * Implemented by subclass of Operation class to execute specific behaviors. - * @throws HiveSQLException - */ - protected abstract void runInternal() throws HiveSQLException; - - public void run() throws HiveSQLException { - beforeRun(); - try { - runInternal(); - } finally { - afterRun(); - } - } - - protected void cleanupOperationLog() { - if (isOperationLogEnabled) { - if (operationLog == null) { - LOG.error("Operation [ " + opHandle.getHandleIdentifier() + " ] " - + "logging is enabled, but its OperationLog object cannot be found."); - } else { - operationLog.close(); - } - } - } - - // TODO: make this abstract and implement in subclasses. - public void cancel() throws HiveSQLException { - setState(OperationState.CANCELED); - throw new UnsupportedOperationException("SQLOperation.cancel()"); - } - - public void close() throws HiveSQLException { - setState(OperationState.CLOSED); - cleanupOperationLog(); - } - - public abstract TableSchema getResultSetSchema() throws HiveSQLException; - - public abstract RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException; - - public RowSet getNextRowSet() throws HiveSQLException { - return getNextRowSet(FetchOrientation.FETCH_NEXT, DEFAULT_FETCH_MAX_ROWS); - } - - /** - * Verify if the given fetch orientation is part of the default orientation types. - * @param orientation - * @throws HiveSQLException - */ - protected void validateDefaultFetchOrientation(FetchOrientation orientation) - throws HiveSQLException { - validateFetchOrientation(orientation, DEFAULT_FETCH_ORIENTATION_SET); - } - - /** - * Verify if the given fetch orientation is part of the supported orientation types. - * @param orientation - * @param supportedOrientations - * @throws HiveSQLException - */ - protected void validateFetchOrientation(FetchOrientation orientation, - EnumSet supportedOrientations) throws HiveSQLException { - if (!supportedOrientations.contains(orientation)) { - throw new HiveSQLException("The fetch type " + orientation.toString() + - " is not supported for this resultset", "HY106"); - } - } - - protected HiveSQLException toSQLException(String prefix, CommandProcessorResponse response) { - HiveSQLException ex = new HiveSQLException(prefix + ": " + response.getErrorMessage(), - response.getSQLState(), response.getResponseCode()); - if (response.getException() != null) { - ex.initCause(response.getException()); - } - return ex; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java deleted file mode 100644 index 92c340a29c107..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java +++ /dev/null @@ -1,284 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.session.OperationLog; -import org.apache.hive.service.AbstractService; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationHandle; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationStatus; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; -import org.apache.log4j.Appender; -import org.apache.log4j.Logger; - -/** - * OperationManager. - * - */ -public class OperationManager extends AbstractService { - private final Log LOG = LogFactory.getLog(OperationManager.class.getName()); - - private final Map handleToOperation = - new HashMap(); - - public OperationManager() { - super(OperationManager.class.getSimpleName()); - } - - @Override - public synchronized void init(HiveConf hiveConf) { - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { - initOperationLogCapture(hiveConf.getVar( - HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL)); - } else { - LOG.debug("Operation level logging is turned off"); - } - super.init(hiveConf); - } - - @Override - public synchronized void start() { - super.start(); - // TODO - } - - @Override - public synchronized void stop() { - // TODO - super.stop(); - } - - private void initOperationLogCapture(String loggingMode) { - // Register another Appender (with the same layout) that talks to us. - Appender ap = new LogDivertAppender(this, OperationLog.getLoggingLevel(loggingMode)); - Logger.getRootLogger().addAppender(ap); - } - - public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession, - String statement, Map confOverlay, boolean runAsync) - throws HiveSQLException { - ExecuteStatementOperation executeStatementOperation = ExecuteStatementOperation - .newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync); - addOperation(executeStatementOperation); - return executeStatementOperation; - } - - public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) { - GetTypeInfoOperation operation = new GetTypeInfoOperation(parentSession); - addOperation(operation); - return operation; - } - - public GetCatalogsOperation newGetCatalogsOperation(HiveSession parentSession) { - GetCatalogsOperation operation = new GetCatalogsOperation(parentSession); - addOperation(operation); - return operation; - } - - public GetSchemasOperation newGetSchemasOperation(HiveSession parentSession, - String catalogName, String schemaName) { - GetSchemasOperation operation = new GetSchemasOperation(parentSession, catalogName, schemaName); - addOperation(operation); - return operation; - } - - public MetadataOperation newGetTablesOperation(HiveSession parentSession, - String catalogName, String schemaName, String tableName, - List tableTypes) { - MetadataOperation operation = - new GetTablesOperation(parentSession, catalogName, schemaName, tableName, tableTypes); - addOperation(operation); - return operation; - } - - public GetTableTypesOperation newGetTableTypesOperation(HiveSession parentSession) { - GetTableTypesOperation operation = new GetTableTypesOperation(parentSession); - addOperation(operation); - return operation; - } - - public GetColumnsOperation newGetColumnsOperation(HiveSession parentSession, - String catalogName, String schemaName, String tableName, String columnName) { - GetColumnsOperation operation = new GetColumnsOperation(parentSession, - catalogName, schemaName, tableName, columnName); - addOperation(operation); - return operation; - } - - public GetFunctionsOperation newGetFunctionsOperation(HiveSession parentSession, - String catalogName, String schemaName, String functionName) { - GetFunctionsOperation operation = new GetFunctionsOperation(parentSession, - catalogName, schemaName, functionName); - addOperation(operation); - return operation; - } - - public Operation getOperation(OperationHandle operationHandle) throws HiveSQLException { - Operation operation = getOperationInternal(operationHandle); - if (operation == null) { - throw new HiveSQLException("Invalid OperationHandle: " + operationHandle); - } - return operation; - } - - private synchronized Operation getOperationInternal(OperationHandle operationHandle) { - return handleToOperation.get(operationHandle); - } - - private synchronized Operation removeTimedOutOperation(OperationHandle operationHandle) { - Operation operation = handleToOperation.get(operationHandle); - if (operation != null && operation.isTimedOut(System.currentTimeMillis())) { - handleToOperation.remove(operationHandle); - return operation; - } - return null; - } - - private synchronized void addOperation(Operation operation) { - handleToOperation.put(operation.getHandle(), operation); - } - - private synchronized Operation removeOperation(OperationHandle opHandle) { - return handleToOperation.remove(opHandle); - } - - public OperationStatus getOperationStatus(OperationHandle opHandle) - throws HiveSQLException { - return getOperation(opHandle).getStatus(); - } - - public void cancelOperation(OperationHandle opHandle) throws HiveSQLException { - Operation operation = getOperation(opHandle); - OperationState opState = operation.getStatus().getState(); - if (opState == OperationState.CANCELED || - opState == OperationState.CLOSED || - opState == OperationState.FINISHED || - opState == OperationState.ERROR || - opState == OperationState.UNKNOWN) { - // Cancel should be a no-op in either cases - LOG.debug(opHandle + ": Operation is already aborted in state - " + opState); - } - else { - LOG.debug(opHandle + ": Attempting to cancel from state - " + opState); - operation.cancel(); - } - } - - public void closeOperation(OperationHandle opHandle) throws HiveSQLException { - Operation operation = removeOperation(opHandle); - if (operation == null) { - throw new HiveSQLException("Operation does not exist!"); - } - operation.close(); - } - - public TableSchema getOperationResultSetSchema(OperationHandle opHandle) - throws HiveSQLException { - return getOperation(opHandle).getResultSetSchema(); - } - - public RowSet getOperationNextRowSet(OperationHandle opHandle) - throws HiveSQLException { - return getOperation(opHandle).getNextRowSet(); - } - - public RowSet getOperationNextRowSet(OperationHandle opHandle, - FetchOrientation orientation, long maxRows) - throws HiveSQLException { - return getOperation(opHandle).getNextRowSet(orientation, maxRows); - } - - public RowSet getOperationLogRowSet(OperationHandle opHandle, - FetchOrientation orientation, long maxRows) - throws HiveSQLException { - // get the OperationLog object from the operation - OperationLog operationLog = getOperation(opHandle).getOperationLog(); - if (operationLog == null) { - throw new HiveSQLException("Couldn't find log associated with operation handle: " + opHandle); - } - - // read logs - List logs; - try { - logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows); - } catch (SQLException e) { - throw new HiveSQLException(e.getMessage(), e.getCause()); - } - - - // convert logs to RowSet - TableSchema tableSchema = new TableSchema(getLogSchema()); - RowSet rowSet = RowSetFactory.create(tableSchema, getOperation(opHandle).getProtocolVersion()); - for (String log : logs) { - rowSet.addRow(new String[] {log}); - } - - return rowSet; - } - - private boolean isFetchFirst(FetchOrientation fetchOrientation) { - //TODO: Since OperationLog is moved to package o.a.h.h.ql.session, - // we may add a Enum there and map FetchOrientation to it. - if (fetchOrientation.equals(FetchOrientation.FETCH_FIRST)) { - return true; - } - return false; - } - - private Schema getLogSchema() { - Schema schema = new Schema(); - FieldSchema fieldSchema = new FieldSchema(); - fieldSchema.setName("operation_log"); - fieldSchema.setType("string"); - schema.addToFieldSchemas(fieldSchema); - return schema; - } - - public OperationLog getOperationLogByThread() { - return OperationLog.getCurrentOperationLog(); - } - - public List removeExpiredOperations(OperationHandle[] handles) { - List removed = new ArrayList(); - for (OperationHandle handle : handles) { - Operation operation = removeTimedOutOperation(handle); - if (operation != null) { - LOG.warn("Operation " + handle + " is timed-out and will be closed"); - removed.add(operation); - } - } - return removed; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java deleted file mode 100644 index c7726f1fac07a..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ /dev/null @@ -1,456 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; -import java.security.PrivilegedExceptionAction; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.Future; -import java.util.concurrent.RejectedExecutionException; - -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.exec.ExplainTask; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.VariableSubstitution; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.RowSetFactory; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.session.HiveSession; -import org.apache.hive.service.server.ThreadWithGarbageCleanup; - -/** - * SQLOperation. - * - */ -public class SQLOperation extends ExecuteStatementOperation { - - private Driver driver = null; - private CommandProcessorResponse response; - private TableSchema resultSchema = null; - private Schema mResultSchema = null; - private SerDe serde = null; - private boolean fetchStarted = false; - - public SQLOperation(HiveSession parentSession, String statement, Map confOverlay, boolean runInBackground) { - // TODO: call setRemoteUser in ExecuteStatementOperation or higher. - super(parentSession, statement, confOverlay, runInBackground); - } - - /*** - * Compile the query and extract metadata - * @param sqlOperationConf - * @throws HiveSQLException - */ - public void prepare(HiveConf sqlOperationConf) throws HiveSQLException { - setState(OperationState.RUNNING); - - try { - driver = new Driver(sqlOperationConf, getParentSession().getUserName()); - - // set the operation handle information in Driver, so that thrift API users - // can use the operation handle they receive, to lookup query information in - // Yarn ATS - String guid64 = Base64.encodeBase64URLSafeString(getHandle().getHandleIdentifier() - .toTHandleIdentifier().getGuid()).trim(); - driver.setOperationId(guid64); - - // In Hive server mode, we are not able to retry in the FetchTask - // case, when calling fetch queries since execute() has returned. - // For now, we disable the test attempts. - driver.setTryCount(Integer.MAX_VALUE); - - String subStatement = new VariableSubstitution().substitute(sqlOperationConf, statement); - response = driver.compileAndRespond(subStatement); - if (0 != response.getResponseCode()) { - throw toSQLException("Error while compiling statement", response); - } - - mResultSchema = driver.getSchema(); - - // hasResultSet should be true only if the query has a FetchTask - // "explain" is an exception for now - if(driver.getPlan().getFetchTask() != null) { - //Schema has to be set - if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) { - throw new HiveSQLException("Error compiling query: Schema and FieldSchema " + - "should be set when query plan has a FetchTask"); - } - resultSchema = new TableSchema(mResultSchema); - setHasResultSet(true); - } else { - setHasResultSet(false); - } - // Set hasResultSet true if the plan has ExplainTask - // TODO explain should use a FetchTask for reading - for (Task task: driver.getPlan().getRootTasks()) { - if (task.getClass() == ExplainTask.class) { - resultSchema = new TableSchema(mResultSchema); - setHasResultSet(true); - break; - } - } - } catch (HiveSQLException e) { - setState(OperationState.ERROR); - throw e; - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException("Error running query: " + e.toString(), e); - } - } - - private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException { - try { - // In Hive server mode, we are not able to retry in the FetchTask - // case, when calling fetch queries since execute() has returned. - // For now, we disable the test attempts. - driver.setTryCount(Integer.MAX_VALUE); - response = driver.run(); - if (0 != response.getResponseCode()) { - throw toSQLException("Error while processing statement", response); - } - } catch (HiveSQLException e) { - // If the operation was cancelled by another thread, - // Driver#run will return a non-zero response code. - // We will simply return if the operation state is CANCELED, - // otherwise throw an exception - if (getStatus().getState() == OperationState.CANCELED) { - return; - } - else { - setState(OperationState.ERROR); - throw e; - } - } catch (Exception e) { - setState(OperationState.ERROR); - throw new HiveSQLException("Error running query: " + e.toString(), e); - } - setState(OperationState.FINISHED); - } - - @Override - public void runInternal() throws HiveSQLException { - setState(OperationState.PENDING); - final HiveConf opConfig = getConfigForOperation(); - prepare(opConfig); - if (!shouldRunAsync()) { - runQuery(opConfig); - } else { - // We'll pass ThreadLocals in the background thread from the foreground (handler) thread - final SessionState parentSessionState = SessionState.get(); - // ThreadLocal Hive object needs to be set in background thread. - // The metastore client in Hive is associated with right user. - final Hive parentHive = getSessionHive(); - // Current UGI will get used by metastore when metsatore is in embedded mode - // So this needs to get passed to the new background thread - final UserGroupInformation currentUGI = getCurrentUGI(opConfig); - // Runnable impl to call runInternal asynchronously, - // from a different thread - Runnable backgroundOperation = new Runnable() { - @Override - public void run() { - PrivilegedExceptionAction doAsAction = new PrivilegedExceptionAction() { - @Override - public Object run() throws HiveSQLException { - Hive.set(parentHive); - SessionState.setCurrentSessionState(parentSessionState); - // Set current OperationLog in this async thread for keeping on saving query log. - registerCurrentOperationLog(); - try { - runQuery(opConfig); - } catch (HiveSQLException e) { - setOperationException(e); - LOG.error("Error running hive query: ", e); - } finally { - unregisterOperationLog(); - } - return null; - } - }; - - try { - currentUGI.doAs(doAsAction); - } catch (Exception e) { - setOperationException(new HiveSQLException(e)); - LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); - } - finally { - /** - * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup - * when this thread is garbage collected later. - * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize() - */ - if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) { - ThreadWithGarbageCleanup currentThread = - (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread(); - currentThread.cacheThreadLocalRawStore(); - } - } - } - }; - try { - // This submit blocks if no background threads are available to run this operation - Future backgroundHandle = - getParentSession().getSessionManager().submitBackgroundOperation(backgroundOperation); - setBackgroundHandle(backgroundHandle); - } catch (RejectedExecutionException rejected) { - setState(OperationState.ERROR); - throw new HiveSQLException("The background threadpool cannot accept" + - " new task for execution, please retry the operation", rejected); - } - } - } - - /** - * Returns the current UGI on the stack - * @param opConfig - * @return UserGroupInformation - * @throws HiveSQLException - */ - private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException { - try { - return Utils.getUGI(); - } catch (Exception e) { - throw new HiveSQLException("Unable to get current user", e); - } - } - - /** - * Returns the ThreadLocal Hive for the current thread - * @return Hive - * @throws HiveSQLException - */ - private Hive getSessionHive() throws HiveSQLException { - try { - return Hive.get(); - } catch (HiveException e) { - throw new HiveSQLException("Failed to get ThreadLocal Hive object", e); - } - } - - private void cleanup(OperationState state) throws HiveSQLException { - setState(state); - if (shouldRunAsync()) { - Future backgroundHandle = getBackgroundHandle(); - if (backgroundHandle != null) { - backgroundHandle.cancel(true); - } - } - if (driver != null) { - driver.close(); - driver.destroy(); - } - driver = null; - - SessionState ss = SessionState.get(); - if (ss.getTmpOutputFile() != null) { - ss.getTmpOutputFile().delete(); - } - } - - @Override - public void cancel() throws HiveSQLException { - cleanup(OperationState.CANCELED); - } - - @Override - public void close() throws HiveSQLException { - cleanup(OperationState.CLOSED); - cleanupOperationLog(); - } - - @Override - public TableSchema getResultSetSchema() throws HiveSQLException { - assertState(OperationState.FINISHED); - if (resultSchema == null) { - resultSchema = new TableSchema(driver.getSchema()); - } - return resultSchema; - } - - private final transient List convey = new ArrayList(); - - @Override - public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException { - validateDefaultFetchOrientation(orientation); - assertState(OperationState.FINISHED); - - RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion()); - - try { - /* if client is requesting fetch-from-start and its not the first time reading from this operation - * then reset the fetch position to beginning - */ - if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) { - driver.resetFetch(); - } - fetchStarted = true; - driver.setMaxRows((int) maxRows); - if (driver.getResults(convey)) { - return decode(convey, rowSet); - } - return rowSet; - } catch (IOException e) { - throw new HiveSQLException(e); - } catch (CommandNeedRetryException e) { - throw new HiveSQLException(e); - } catch (Exception e) { - throw new HiveSQLException(e); - } finally { - convey.clear(); - } - } - - private RowSet decode(List rows, RowSet rowSet) throws Exception { - if (driver.isFetchingTable()) { - return prepareFromRow(rows, rowSet); - } - return decodeFromString(rows, rowSet); - } - - // already encoded to thrift-able object in ThriftFormatter - private RowSet prepareFromRow(List rows, RowSet rowSet) throws Exception { - for (Object row : rows) { - rowSet.addRow((Object[]) row); - } - return rowSet; - } - - private RowSet decodeFromString(List rows, RowSet rowSet) - throws SQLException, SerDeException { - getSerDe(); - StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector(); - List fieldRefs = soi.getAllStructFieldRefs(); - - Object[] deserializedFields = new Object[fieldRefs.size()]; - Object rowObj; - ObjectInspector fieldOI; - - int protocol = getProtocolVersion().getValue(); - for (Object rowString : rows) { - rowObj = serde.deserialize(new BytesWritable(((String)rowString).getBytes(StandardCharsets.UTF_8))); - for (int i = 0; i < fieldRefs.size(); i++) { - StructField fieldRef = fieldRefs.get(i); - fieldOI = fieldRef.getFieldObjectInspector(); - Object fieldData = soi.getStructFieldData(rowObj, fieldRef); - deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol); - } - rowSet.addRow(deserializedFields); - } - return rowSet; - } - - private SerDe getSerDe() throws SQLException { - if (serde != null) { - return serde; - } - try { - List fieldSchemas = mResultSchema.getFieldSchemas(); - StringBuilder namesSb = new StringBuilder(); - StringBuilder typesSb = new StringBuilder(); - - if (fieldSchemas != null && !fieldSchemas.isEmpty()) { - for (int pos = 0; pos < fieldSchemas.size(); pos++) { - if (pos != 0) { - namesSb.append(","); - typesSb.append(","); - } - namesSb.append(fieldSchemas.get(pos).getName()); - typesSb.append(fieldSchemas.get(pos).getType()); - } - } - String names = namesSb.toString(); - String types = typesSb.toString(); - - serde = new LazySimpleSerDe(); - Properties props = new Properties(); - if (names.length() > 0) { - LOG.debug("Column names: " + names); - props.setProperty(serdeConstants.LIST_COLUMNS, names); - } - if (types.length() > 0) { - LOG.debug("Column types: " + types); - props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types); - } - SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null); - - } catch (Exception ex) { - ex.printStackTrace(); - throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex); - } - return serde; - } - - /** - * If there are query specific settings to overlay, then create a copy of config - * There are two cases we need to clone the session config that's being passed to hive driver - * 1. Async query - - * If the client changes a config setting, that shouldn't reflect in the execution already underway - * 2. confOverlay - - * The query specific settings should only be applied to the query config and not session - * @return new configuration - * @throws HiveSQLException - */ - private HiveConf getConfigForOperation() throws HiveSQLException { - HiveConf sqlOperationConf = getParentSession().getHiveConf(); - if (!getConfOverlay().isEmpty() || shouldRunAsync()) { - // clone the parent session config for this query - sqlOperationConf = new HiveConf(sqlOperationConf); - - // apply overlay query specific settings, if any - for (Map.Entry confEntry : getConfOverlay().entrySet()) { - try { - sqlOperationConf.verifyAndSet(confEntry.getKey(), confEntry.getValue()); - } catch (IllegalArgumentException e) { - throw new HiveSQLException("Error applying statement specific settings", e); - } - } - } - return sqlOperationConf; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java deleted file mode 100644 index e59d19ea6be42..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.operation; - -import java.util.Set; - - -public interface TableTypeMapping { - /** - * Map client's table type name to hive's table type - * @param clientTypeName - * @return - */ - String mapToHiveType(String clientTypeName); - - /** - * Map hive's table type name to client's table type - * @param hiveTypeName - * @return - */ - String mapToClientType(String hiveTypeName); - - /** - * Get all the table types of this mapping - * @return - */ - Set getTableTypeNames(); -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java deleted file mode 100644 index 65f9b298bf4f6..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java +++ /dev/null @@ -1,156 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.session; - -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.*; - -public interface HiveSession extends HiveSessionBase { - - void open(Map sessionConfMap) throws Exception; - - IMetaStoreClient getMetaStoreClient() throws HiveSQLException; - - /** - * getInfo operation handler - * @param getInfoType - * @return - * @throws HiveSQLException - */ - GetInfoValue getInfo(GetInfoType getInfoType) throws HiveSQLException; - - /** - * execute operation handler - * @param statement - * @param confOverlay - * @return - * @throws HiveSQLException - */ - OperationHandle executeStatement(String statement, - Map confOverlay) throws HiveSQLException; - - /** - * execute operation handler - * @param statement - * @param confOverlay - * @return - * @throws HiveSQLException - */ - OperationHandle executeStatementAsync(String statement, - Map confOverlay) throws HiveSQLException; - - /** - * getTypeInfo operation handler - * @return - * @throws HiveSQLException - */ - OperationHandle getTypeInfo() throws HiveSQLException; - - /** - * getCatalogs operation handler - * @return - * @throws HiveSQLException - */ - OperationHandle getCatalogs() throws HiveSQLException; - - /** - * getSchemas operation handler - * @param catalogName - * @param schemaName - * @return - * @throws HiveSQLException - */ - OperationHandle getSchemas(String catalogName, String schemaName) - throws HiveSQLException; - - /** - * getTables operation handler - * @param catalogName - * @param schemaName - * @param tableName - * @param tableTypes - * @return - * @throws HiveSQLException - */ - OperationHandle getTables(String catalogName, String schemaName, - String tableName, List tableTypes) throws HiveSQLException; - - /** - * getTableTypes operation handler - * @return - * @throws HiveSQLException - */ - OperationHandle getTableTypes() throws HiveSQLException ; - - /** - * getColumns operation handler - * @param catalogName - * @param schemaName - * @param tableName - * @param columnName - * @return - * @throws HiveSQLException - */ - OperationHandle getColumns(String catalogName, String schemaName, - String tableName, String columnName) throws HiveSQLException; - - /** - * getFunctions operation handler - * @param catalogName - * @param schemaName - * @param functionName - * @return - * @throws HiveSQLException - */ - OperationHandle getFunctions(String catalogName, String schemaName, - String functionName) throws HiveSQLException; - - /** - * close the session - * @throws HiveSQLException - */ - void close() throws HiveSQLException; - - void cancelOperation(OperationHandle opHandle) throws HiveSQLException; - - void closeOperation(OperationHandle opHandle) throws HiveSQLException; - - TableSchema getResultSetMetadata(OperationHandle opHandle) - throws HiveSQLException; - - RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, - long maxRows, FetchType fetchType) throws HiveSQLException; - - String getDelegationToken(HiveAuthFactory authFactory, String owner, - String renewer) throws HiveSQLException; - - void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException; - - void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException; - - void closeExpiredOperations(); - - long getNoOperationTime(); -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java deleted file mode 100644 index b72c18b2b2135..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.session; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hive.service.cli.SessionHandle; -import org.apache.hive.service.cli.operation.OperationManager; -import org.apache.hive.service.cli.thrift.TProtocolVersion; - -import java.io.File; - -/** - * Methods that don't need to be executed under a doAs - * context are here. Rest of them in HiveSession interface - */ -public interface HiveSessionBase { - - TProtocolVersion getProtocolVersion(); - - /** - * Set the session manager for the session - * @param sessionManager - */ - void setSessionManager(SessionManager sessionManager); - - /** - * Get the session manager for the session - */ - SessionManager getSessionManager(); - - /** - * Set operation manager for the session - * @param operationManager - */ - void setOperationManager(OperationManager operationManager); - - /** - * Check whether operation logging is enabled and session dir is created successfully - */ - boolean isOperationLogEnabled(); - - /** - * Get the session dir, which is the parent dir of operation logs - * @return a file representing the parent directory of operation logs - */ - File getOperationLogSessionDir(); - - /** - * Set the session dir, which is the parent dir of operation logs - * @param operationLogRootDir the parent dir of the session dir - */ - void setOperationLogSessionDir(File operationLogRootDir); - - SessionHandle getSessionHandle(); - - String getUsername(); - - String getPassword(); - - HiveConf getHiveConf(); - - SessionState getSessionState(); - - String getUserName(); - - void setUserName(String userName); - - String getIpAddress(); - - void setIpAddress(String ipAddress); - - long getLastAccessTime(); -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java deleted file mode 100644 index e3fb54d9f47e9..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ /dev/null @@ -1,842 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.session; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.cli.HiveFileProcessor; -import org.apache.hadoop.hive.common.cli.IHiveFileProcessor; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.exec.FetchFormatter; -import org.apache.hadoop.hive.ql.exec.ListSinkOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.history.HiveHistory; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.VariableSubstitution; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hive.common.util.HiveVersionInfo; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.FetchType; -import org.apache.hive.service.cli.GetInfoType; -import org.apache.hive.service.cli.GetInfoValue; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationHandle; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.SessionHandle; -import org.apache.hive.service.cli.TableSchema; -import org.apache.hive.service.cli.operation.ExecuteStatementOperation; -import org.apache.hive.service.cli.operation.GetCatalogsOperation; -import org.apache.hive.service.cli.operation.GetColumnsOperation; -import org.apache.hive.service.cli.operation.GetFunctionsOperation; -import org.apache.hive.service.cli.operation.GetSchemasOperation; -import org.apache.hive.service.cli.operation.GetTableTypesOperation; -import org.apache.hive.service.cli.operation.GetTypeInfoOperation; -import org.apache.hive.service.cli.operation.MetadataOperation; -import org.apache.hive.service.cli.operation.Operation; -import org.apache.hive.service.cli.operation.OperationManager; -import org.apache.hive.service.cli.thrift.TProtocolVersion; -import org.apache.hive.service.server.ThreadWithGarbageCleanup; - -import static org.apache.hadoop.hive.conf.SystemVariables.ENV_PREFIX; -import static org.apache.hadoop.hive.conf.SystemVariables.HIVECONF_PREFIX; -import static org.apache.hadoop.hive.conf.SystemVariables.HIVEVAR_PREFIX; -import static org.apache.hadoop.hive.conf.SystemVariables.METACONF_PREFIX; -import static org.apache.hadoop.hive.conf.SystemVariables.SYSTEM_PREFIX; - -/** - * HiveSession - * - */ -public class HiveSessionImpl implements HiveSession { - private final SessionHandle sessionHandle; - private String username; - private final String password; - private HiveConf hiveConf; - private SessionState sessionState; - private String ipAddress; - private static final String FETCH_WORK_SERDE_CLASS = - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; - private static final Log LOG = LogFactory.getLog(HiveSessionImpl.class); - private SessionManager sessionManager; - private OperationManager operationManager; - private final Set opHandleSet = new HashSet(); - private boolean isOperationLogEnabled; - private File sessionLogDir; - private volatile long lastAccessTime; - private volatile long lastIdleTime; - - public HiveSessionImpl(TProtocolVersion protocol, String username, String password, - HiveConf serverhiveConf, String ipAddress) { - this.username = username; - this.password = password; - this.sessionHandle = new SessionHandle(protocol); - this.hiveConf = new HiveConf(serverhiveConf); - this.ipAddress = ipAddress; - - try { - // In non-impersonation mode, map scheduler queue to current user - // if fair scheduler is configured. - if (! hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) && - hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_MAP_FAIR_SCHEDULER_QUEUE)) { - ShimLoader.getHadoopShims().refreshDefaultQueue(hiveConf, username); - } - } catch (IOException e) { - LOG.warn("Error setting scheduler queue: " + e, e); - } - // Set an explicit session name to control the download directory name - hiveConf.set(ConfVars.HIVESESSIONID.varname, - sessionHandle.getHandleIdentifier().toString()); - // Use thrift transportable formatter - hiveConf.set(ListSinkOperator.OUTPUT_FORMATTER, - FetchFormatter.ThriftFormatter.class.getName()); - hiveConf.setInt(ListSinkOperator.OUTPUT_PROTOCOL, protocol.getValue()); - } - - @Override - /** - * Opens a new HiveServer2 session for the client connection. - * Creates a new SessionState object that will be associated with this HiveServer2 session. - * When the server executes multiple queries in the same session, - * this SessionState object is reused across multiple queries. - * Note that if doAs is true, this call goes through a proxy object, - * which wraps the method logic in a UserGroupInformation#doAs. - * That's why it is important to create SessionState here rather than in the constructor. - */ - public void open(Map sessionConfMap) throws HiveSQLException { - sessionState = new SessionState(hiveConf, username); - sessionState.setUserIpAddress(ipAddress); - sessionState.setIsHiveServerQuery(true); - SessionState.start(sessionState); - try { - sessionState.reloadAuxJars(); - } catch (IOException e) { - String msg = "Failed to load reloadable jar file path: " + e; - LOG.error(msg, e); - throw new HiveSQLException(msg, e); - } - // Process global init file: .hiverc - processGlobalInitFile(); - if (sessionConfMap != null) { - configureSession(sessionConfMap); - } - lastAccessTime = System.currentTimeMillis(); - lastIdleTime = lastAccessTime; - } - - /** - * It is used for processing hiverc file from HiveServer2 side. - */ - private class GlobalHivercFileProcessor extends HiveFileProcessor { - @Override - protected BufferedReader loadFile(String fileName) throws IOException { - FileInputStream initStream = null; - BufferedReader bufferedReader = null; - initStream = new FileInputStream(fileName); - bufferedReader = new BufferedReader(new InputStreamReader(initStream)); - return bufferedReader; - } - - @Override - protected int processCmd(String cmd) { - int rc = 0; - String cmd_trimed = cmd.trim(); - try { - executeStatementInternal(cmd_trimed, null, false); - } catch (HiveSQLException e) { - rc = -1; - LOG.warn("Failed to execute HQL command in global .hiverc file.", e); - } - return rc; - } - } - - private void processGlobalInitFile() { - IHiveFileProcessor processor = new GlobalHivercFileProcessor(); - - try { - String hiverc = hiveConf.getVar(ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION); - if (hiverc != null) { - File hivercFile = new File(hiverc); - if (hivercFile.isDirectory()) { - hivercFile = new File(hivercFile, SessionManager.HIVERCFILE); - } - if (hivercFile.isFile()) { - LOG.info("Running global init file: " + hivercFile); - int rc = processor.processFile(hivercFile.getAbsolutePath()); - if (rc != 0) { - LOG.error("Failed on initializing global .hiverc file"); - } - } else { - LOG.debug("Global init file " + hivercFile + " does not exist"); - } - } - } catch (IOException e) { - LOG.warn("Failed on initializing global .hiverc file", e); - } - } - - private void configureSession(Map sessionConfMap) throws HiveSQLException { - SessionState.setCurrentSessionState(sessionState); - for (Map.Entry entry : sessionConfMap.entrySet()) { - String key = entry.getKey(); - if (key.startsWith("set:")) { - try { - setVariable(key.substring(4), entry.getValue()); - } catch (Exception e) { - throw new HiveSQLException(e); - } - } else if (key.startsWith("use:")) { - SessionState.get().setCurrentDatabase(entry.getValue()); - } else { - hiveConf.verifyAndSet(key, entry.getValue()); - } - } - } - - // Copy from org.apache.hadoop.hive.ql.processors.SetProcessor, only change: - // setConf(varname, propName, varvalue, true) when varname.startsWith(HIVECONF_PREFIX) - public static int setVariable(String varname, String varvalue) throws Exception { - SessionState ss = SessionState.get(); - if (varvalue.contains("\n")){ - ss.err.println("Warning: Value had a \\n character in it."); - } - varname = varname.trim(); - if (varname.startsWith(ENV_PREFIX)){ - ss.err.println("env:* variables can not be set."); - return 1; - } else if (varname.startsWith(SYSTEM_PREFIX)){ - String propName = varname.substring(SYSTEM_PREFIX.length()); - System.getProperties().setProperty(propName, - new VariableSubstitution().substitute(ss.getConf(),varvalue)); - } else if (varname.startsWith(HIVECONF_PREFIX)){ - String propName = varname.substring(HIVECONF_PREFIX.length()); - setConf(varname, propName, varvalue, true); - } else if (varname.startsWith(HIVEVAR_PREFIX)) { - String propName = varname.substring(HIVEVAR_PREFIX.length()); - ss.getHiveVariables().put(propName, - new VariableSubstitution().substitute(ss.getConf(),varvalue)); - } else if (varname.startsWith(METACONF_PREFIX)) { - String propName = varname.substring(METACONF_PREFIX.length()); - Hive hive = Hive.get(ss.getConf()); - hive.setMetaConf(propName, new VariableSubstitution().substitute(ss.getConf(), varvalue)); - } else { - setConf(varname, varname, varvalue, true); - } - return 0; - } - - // returns non-null string for validation fail - private static void setConf(String varname, String key, String varvalue, boolean register) - throws IllegalArgumentException { - HiveConf conf = SessionState.get().getConf(); - String value = new VariableSubstitution().substitute(conf, varvalue); - if (conf.getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) { - HiveConf.ConfVars confVars = HiveConf.getConfVars(key); - if (confVars != null) { - if (!confVars.isType(value)) { - StringBuilder message = new StringBuilder(); - message.append("'SET ").append(varname).append('=').append(varvalue); - message.append("' FAILED because ").append(key).append(" expects "); - message.append(confVars.typeString()).append(" type value."); - throw new IllegalArgumentException(message.toString()); - } - String fail = confVars.validate(value); - if (fail != null) { - StringBuilder message = new StringBuilder(); - message.append("'SET ").append(varname).append('=').append(varvalue); - message.append("' FAILED in validation : ").append(fail).append('.'); - throw new IllegalArgumentException(message.toString()); - } - } else if (key.startsWith("hive.")) { - throw new IllegalArgumentException("hive configuration " + key + " does not exists."); - } - } - conf.verifyAndSet(key, value); - if (register) { - SessionState.get().getOverriddenConfigurations().put(key, value); - } - } - - @Override - public void setOperationLogSessionDir(File operationLogRootDir) { - if (!operationLogRootDir.exists()) { - LOG.warn("The operation log root directory is removed, recreating: " + - operationLogRootDir.getAbsolutePath()); - if (!operationLogRootDir.mkdirs()) { - LOG.warn("Unable to create operation log root directory: " + - operationLogRootDir.getAbsolutePath()); - } - } - if (!operationLogRootDir.canWrite()) { - LOG.warn("The operation log root directory is not writable: " + - operationLogRootDir.getAbsolutePath()); - } - sessionLogDir = new File(operationLogRootDir, sessionHandle.getHandleIdentifier().toString()); - isOperationLogEnabled = true; - if (!sessionLogDir.exists()) { - if (!sessionLogDir.mkdir()) { - LOG.warn("Unable to create operation log session directory: " + - sessionLogDir.getAbsolutePath()); - isOperationLogEnabled = false; - } - } - if (isOperationLogEnabled) { - LOG.info("Operation log session directory is created: " + sessionLogDir.getAbsolutePath()); - } - } - - @Override - public boolean isOperationLogEnabled() { - return isOperationLogEnabled; - } - - @Override - public File getOperationLogSessionDir() { - return sessionLogDir; - } - - @Override - public TProtocolVersion getProtocolVersion() { - return sessionHandle.getProtocolVersion(); - } - - @Override - public SessionManager getSessionManager() { - return sessionManager; - } - - @Override - public void setSessionManager(SessionManager sessionManager) { - this.sessionManager = sessionManager; - } - - private OperationManager getOperationManager() { - return operationManager; - } - - @Override - public void setOperationManager(OperationManager operationManager) { - this.operationManager = operationManager; - } - - protected synchronized void acquire(boolean userAccess) { - // Need to make sure that the this HiveServer2's session's SessionState is - // stored in the thread local for the handler thread. - SessionState.setCurrentSessionState(sessionState); - if (userAccess) { - lastAccessTime = System.currentTimeMillis(); - } - } - - /** - * 1. We'll remove the ThreadLocal SessionState as this thread might now serve - * other requests. - * 2. We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup - * when this thread is garbage collected later. - * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize() - */ - protected synchronized void release(boolean userAccess) { - SessionState.detachSession(); - if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) { - ThreadWithGarbageCleanup currentThread = - (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread(); - currentThread.cacheThreadLocalRawStore(); - } - if (userAccess) { - lastAccessTime = System.currentTimeMillis(); - } - if (opHandleSet.isEmpty()) { - lastIdleTime = System.currentTimeMillis(); - } else { - lastIdleTime = 0; - } - } - - @Override - public SessionHandle getSessionHandle() { - return sessionHandle; - } - - @Override - public String getUsername() { - return username; - } - - @Override - public String getPassword() { - return password; - } - - @Override - public HiveConf getHiveConf() { - hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE, FETCH_WORK_SERDE_CLASS); - return hiveConf; - } - - @Override - public IMetaStoreClient getMetaStoreClient() throws HiveSQLException { - try { - return Hive.get(getHiveConf()).getMSC(); - } catch (HiveException e) { - throw new HiveSQLException("Failed to get metastore connection", e); - } catch (MetaException e) { - throw new HiveSQLException("Failed to get metastore connection", e); - } - } - - @Override - public GetInfoValue getInfo(GetInfoType getInfoType) - throws HiveSQLException { - acquire(true); - try { - switch (getInfoType) { - case CLI_SERVER_NAME: - return new GetInfoValue("Hive"); - case CLI_DBMS_NAME: - return new GetInfoValue("Apache Hive"); - case CLI_DBMS_VER: - return new GetInfoValue(HiveVersionInfo.getVersion()); - case CLI_MAX_COLUMN_NAME_LEN: - return new GetInfoValue(128); - case CLI_MAX_SCHEMA_NAME_LEN: - return new GetInfoValue(128); - case CLI_MAX_TABLE_NAME_LEN: - return new GetInfoValue(128); - case CLI_TXN_CAPABLE: - default: - throw new HiveSQLException("Unrecognized GetInfoType value: " + getInfoType.toString()); - } - } finally { - release(true); - } - } - - @Override - public OperationHandle executeStatement(String statement, Map confOverlay) - throws HiveSQLException { - return executeStatementInternal(statement, confOverlay, false); - } - - @Override - public OperationHandle executeStatementAsync(String statement, Map confOverlay) - throws HiveSQLException { - return executeStatementInternal(statement, confOverlay, true); - } - - private OperationHandle executeStatementInternal(String statement, Map confOverlay, - boolean runAsync) - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - ExecuteStatementOperation operation = operationManager - .newExecuteStatementOperation(getSession(), statement, confOverlay, runAsync); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - // Referring to SQLOperation.java, there is no chance that a HiveSQLException throws and the asyn - // background operation submits to thread pool successfully at the same time. So, Cleanup - // opHandle directly when got HiveSQLException - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getTypeInfo() - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - GetTypeInfoOperation operation = operationManager.newGetTypeInfoOperation(getSession()); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getCatalogs() - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - GetCatalogsOperation operation = operationManager.newGetCatalogsOperation(getSession()); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getSchemas(String catalogName, String schemaName) - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - GetSchemasOperation operation = - operationManager.newGetSchemasOperation(getSession(), catalogName, schemaName); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getTables(String catalogName, String schemaName, String tableName, - List tableTypes) - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - MetadataOperation operation = - operationManager.newGetTablesOperation(getSession(), catalogName, schemaName, tableName, tableTypes); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getTableTypes() - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - GetTableTypesOperation operation = operationManager.newGetTableTypesOperation(getSession()); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getColumns(String catalogName, String schemaName, - String tableName, String columnName) throws HiveSQLException { - acquire(true); - String addedJars = Utilities.getResourceFiles(hiveConf, SessionState.ResourceType.JAR); - if (StringUtils.isNotBlank(addedJars)) { - IMetaStoreClient metastoreClient = getSession().getMetaStoreClient(); - metastoreClient.setHiveAddedJars(addedJars); - } - OperationManager operationManager = getOperationManager(); - GetColumnsOperation operation = operationManager.newGetColumnsOperation(getSession(), - catalogName, schemaName, tableName, columnName); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public OperationHandle getFunctions(String catalogName, String schemaName, String functionName) - throws HiveSQLException { - acquire(true); - - OperationManager operationManager = getOperationManager(); - GetFunctionsOperation operation = operationManager - .newGetFunctionsOperation(getSession(), catalogName, schemaName, functionName); - OperationHandle opHandle = operation.getHandle(); - try { - operation.run(); - opHandleSet.add(opHandle); - return opHandle; - } catch (HiveSQLException e) { - operationManager.closeOperation(opHandle); - throw e; - } finally { - release(true); - } - } - - @Override - public void close() throws HiveSQLException { - try { - acquire(true); - // Iterate through the opHandles and close their operations - for (OperationHandle opHandle : opHandleSet) { - try { - operationManager.closeOperation(opHandle); - } catch (Exception e) { - LOG.warn("Exception is thrown closing operation " + opHandle, e); - } - } - opHandleSet.clear(); - // Cleanup session log directory. - cleanupSessionLogDir(); - // Cleanup pipeout file. - cleanupPipeoutFile(); - HiveHistory hiveHist = sessionState.getHiveHistory(); - if (null != hiveHist) { - hiveHist.closeStream(); - } - try { - sessionState.close(); - } finally { - sessionState = null; - } - } catch (IOException ioe) { - throw new HiveSQLException("Failure to close", ioe); - } finally { - if (sessionState != null) { - try { - sessionState.close(); - } catch (Throwable t) { - LOG.warn("Error closing session", t); - } - sessionState = null; - } - release(true); - } - } - - private void cleanupPipeoutFile() { - String lScratchDir = hiveConf.getVar(ConfVars.LOCALSCRATCHDIR); - String sessionID = hiveConf.getVar(ConfVars.HIVESESSIONID); - - File[] fileAry = new File(lScratchDir).listFiles( - (dir, name) -> name.startsWith(sessionID) && name.endsWith(".pipeout")); - - if (fileAry == null) { - LOG.error("Unable to access pipeout files in " + lScratchDir); - } else { - for (File file : fileAry) { - try { - FileUtils.forceDelete(file); - } catch (Exception e) { - LOG.error("Failed to cleanup pipeout file: " + file, e); - } - } - } - } - - private void cleanupSessionLogDir() { - if (isOperationLogEnabled) { - try { - FileUtils.forceDelete(sessionLogDir); - } catch (Exception e) { - LOG.error("Failed to cleanup session log dir: " + sessionHandle, e); - } - } - } - - @Override - public SessionState getSessionState() { - return sessionState; - } - - @Override - public String getUserName() { - return username; - } - - @Override - public void setUserName(String userName) { - this.username = userName; - } - - @Override - public long getLastAccessTime() { - return lastAccessTime; - } - - @Override - public void closeExpiredOperations() { - OperationHandle[] handles = opHandleSet.toArray(new OperationHandle[opHandleSet.size()]); - if (handles.length > 0) { - List operations = operationManager.removeExpiredOperations(handles); - if (!operations.isEmpty()) { - closeTimedOutOperations(operations); - } - } - } - - @Override - public long getNoOperationTime() { - return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0; - } - - private void closeTimedOutOperations(List operations) { - acquire(false); - try { - for (Operation operation : operations) { - opHandleSet.remove(operation.getHandle()); - try { - operation.close(); - } catch (Exception e) { - LOG.warn("Exception is thrown closing timed-out operation " + operation.getHandle(), e); - } - } - } finally { - release(false); - } - } - - @Override - public void cancelOperation(OperationHandle opHandle) throws HiveSQLException { - acquire(true); - try { - sessionManager.getOperationManager().cancelOperation(opHandle); - } finally { - release(true); - } - } - - @Override - public void closeOperation(OperationHandle opHandle) throws HiveSQLException { - acquire(true); - try { - operationManager.closeOperation(opHandle); - opHandleSet.remove(opHandle); - } finally { - release(true); - } - } - - @Override - public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException { - acquire(true); - try { - return sessionManager.getOperationManager().getOperationResultSetSchema(opHandle); - } finally { - release(true); - } - } - - @Override - public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, - long maxRows, FetchType fetchType) throws HiveSQLException { - acquire(true); - try { - if (fetchType == FetchType.QUERY_OUTPUT) { - return operationManager.getOperationNextRowSet(opHandle, orientation, maxRows); - } - return operationManager.getOperationLogRowSet(opHandle, orientation, maxRows); - } finally { - release(true); - } - } - - protected HiveSession getSession() { - return this; - } - - @Override - public String getIpAddress() { - return ipAddress; - } - - @Override - public void setIpAddress(String ipAddress) { - this.ipAddress = ipAddress; - } - - @Override - public String getDelegationToken(HiveAuthFactory authFactory, String owner, String renewer) - throws HiveSQLException { - HiveAuthFactory.verifyProxyAccess(getUsername(), owner, getIpAddress(), getHiveConf()); - return authFactory.getDelegationToken(owner, renewer); - } - - @Override - public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException { - HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr), - getIpAddress(), getHiveConf()); - authFactory.cancelDelegationToken(tokenStr); - } - - @Override - public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException { - HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr), - getIpAddress(), getHiveConf()); - authFactory.renewDelegationToken(tokenStr); - } - - // extract the real user from the given token string - private String getUserFromToken(HiveAuthFactory authFactory, String tokenStr) throws HiveSQLException { - return authFactory.getUserFromToken(tokenStr); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java deleted file mode 100644 index 762dbb2faadec..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.session; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.thrift.TProtocolVersion; - -/** - * - * HiveSessionImplwithUGI. - * HiveSession with connecting user's UGI and delegation token if required - */ -public class HiveSessionImplwithUGI extends HiveSessionImpl { - public static final String HS2TOKEN = "HiveServer2ImpersonationToken"; - - private UserGroupInformation sessionUgi = null; - private String delegationTokenStr = null; - private Hive sessionHive = null; - private HiveSession proxySession = null; - static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class); - - public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password, - HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException { - super(protocol, username, password, hiveConf, ipAddress); - setSessionUGI(username); - setDelegationToken(delegationToken); - - // create a new metastore connection for this particular user session - Hive.set(null); - try { - sessionHive = Hive.get(getHiveConf()); - } catch (HiveException e) { - throw new HiveSQLException("Failed to setup metastore connection", e); - } - } - - // setup appropriate UGI for the session - public void setSessionUGI(String owner) throws HiveSQLException { - if (owner == null) { - throw new HiveSQLException("No username provided for impersonation"); - } - if (UserGroupInformation.isSecurityEnabled()) { - try { - sessionUgi = UserGroupInformation.createProxyUser( - owner, UserGroupInformation.getLoginUser()); - } catch (IOException e) { - throw new HiveSQLException("Couldn't setup proxy user", e); - } - } else { - sessionUgi = UserGroupInformation.createRemoteUser(owner); - } - } - - public UserGroupInformation getSessionUgi() { - return this.sessionUgi; - } - - public String getDelegationToken() { - return this.delegationTokenStr; - } - - @Override - protected synchronized void acquire(boolean userAccess) { - super.acquire(userAccess); - // if we have a metastore connection with impersonation, then set it first - if (sessionHive != null) { - Hive.set(sessionHive); - } - } - - /** - * Close the file systems for the session and remove it from the FileSystem cache. - * Cancel the session's delegation token and close the metastore connection - */ - @Override - public void close() throws HiveSQLException { - try { - acquire(true); - cancelDelegationToken(); - } finally { - try { - super.close(); - } finally { - try { - FileSystem.closeAllForUGI(sessionUgi); - } catch (IOException ioe) { - throw new HiveSQLException("Could not clean up file-system handles for UGI: " - + sessionUgi, ioe); - } - } - } - } - - /** - * Enable delegation token for the session - * save the token string and set the token.signature in hive conf. The metastore client uses - * this token.signature to determine where to use kerberos or delegation token - * @throws HiveException - * @throws IOException - */ - private void setDelegationToken(String delegationTokenStr) throws HiveSQLException { - this.delegationTokenStr = delegationTokenStr; - if (delegationTokenStr != null) { - getHiveConf().set("hive.metastore.token.signature", HS2TOKEN); - try { - Utils.setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN); - } catch (IOException e) { - throw new HiveSQLException("Couldn't setup delegation token in the ugi", e); - } - } - } - - // If the session has a delegation token obtained from the metastore, then cancel it - private void cancelDelegationToken() throws HiveSQLException { - if (delegationTokenStr != null) { - try { - Hive.get(getHiveConf()).cancelDelegationToken(delegationTokenStr); - } catch (HiveException e) { - throw new HiveSQLException("Couldn't cancel delegation token", e); - } - // close the metastore connection created with this delegation token - Hive.closeCurrent(); - } - } - - @Override - protected HiveSession getSession() { - assert proxySession != null; - - return proxySession; - } - - public void setProxySession(HiveSession proxySession) { - this.proxySession = proxySession; - } - - @Override - public String getDelegationToken(HiveAuthFactory authFactory, String owner, - String renewer) throws HiveSQLException { - return authFactory.getDelegationToken(owner, renewer); - } - - @Override - public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException { - authFactory.cancelDelegationToken(tokenStr); - } - - @Override - public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr) - throws HiveSQLException { - authFactory.renewDelegationToken(tokenStr); - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java deleted file mode 100644 index ad6fb3ba37a0e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java +++ /dev/null @@ -1,377 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.session; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hive.service.CompositeService; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.SessionHandle; -import org.apache.hive.service.cli.operation.OperationManager; -import org.apache.hive.service.cli.thrift.TProtocolVersion; -import org.apache.hive.service.server.HiveServer2; -import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup; - -/** - * SessionManager. - * - */ -public class SessionManager extends CompositeService { - - private static final Log LOG = LogFactory.getLog(SessionManager.class); - public static final String HIVERCFILE = ".hiverc"; - private HiveConf hiveConf; - private final Map handleToSession = - new ConcurrentHashMap(); - private final OperationManager operationManager = new OperationManager(); - private ThreadPoolExecutor backgroundOperationPool; - private boolean isOperationLogEnabled; - private File operationLogRootDir; - - private long checkInterval; - private long sessionTimeout; - private boolean checkOperation; - - private volatile boolean shutdown; - // The HiveServer2 instance running this service - private final HiveServer2 hiveServer2; - - public SessionManager(HiveServer2 hiveServer2) { - super(SessionManager.class.getSimpleName()); - this.hiveServer2 = hiveServer2; - } - - @Override - public synchronized void init(HiveConf hiveConf) { - this.hiveConf = hiveConf; - //Create operation log root directory, if operation logging is enabled - if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { - initOperationLogRootDir(); - } - createBackgroundOperationPool(); - addService(operationManager); - super.init(hiveConf); - } - - private void createBackgroundOperationPool() { - int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS); - LOG.info("HiveServer2: Background operation thread pool size: " + poolSize); - int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE); - LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize); - long keepAliveTime = HiveConf.getTimeVar( - hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS); - LOG.info( - "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds"); - - // Create a thread pool with #poolSize threads - // Threads terminate when they are idle for more than the keepAliveTime - // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize - String threadPoolName = "HiveServer2-Background-Pool"; - backgroundOperationPool = new ThreadPoolExecutor(poolSize, poolSize, - keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue(poolQueueSize), - new ThreadFactoryWithGarbageCleanup(threadPoolName)); - backgroundOperationPool.allowCoreThreadTimeOut(true); - - checkInterval = HiveConf.getTimeVar( - hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS); - sessionTimeout = HiveConf.getTimeVar( - hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); - checkOperation = HiveConf.getBoolVar(hiveConf, - ConfVars.HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION); - } - - private void initOperationLogRootDir() { - operationLogRootDir = new File( - hiveConf.getVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION)); - isOperationLogEnabled = true; - - if (operationLogRootDir.exists() && !operationLogRootDir.isDirectory()) { - LOG.warn("The operation log root directory exists, but it is not a directory: " + - operationLogRootDir.getAbsolutePath()); - isOperationLogEnabled = false; - } - - if (!operationLogRootDir.exists()) { - if (!operationLogRootDir.mkdirs()) { - LOG.warn("Unable to create operation log root directory: " + - operationLogRootDir.getAbsolutePath()); - isOperationLogEnabled = false; - } - } - - if (isOperationLogEnabled) { - LOG.info("Operation log root directory is created: " + operationLogRootDir.getAbsolutePath()); - try { - FileUtils.forceDeleteOnExit(operationLogRootDir); - } catch (IOException e) { - LOG.warn("Failed to schedule cleanup HS2 operation logging root dir: " + - operationLogRootDir.getAbsolutePath(), e); - } - } - } - - @Override - public synchronized void start() { - super.start(); - if (checkInterval > 0) { - startTimeoutChecker(); - } - } - - private final Object timeoutCheckerLock = new Object(); - - private void startTimeoutChecker() { - final long interval = Math.max(checkInterval, 3000L); // minimum 3 seconds - final Runnable timeoutChecker = new Runnable() { - @Override - public void run() { - sleepFor(interval); - while (!shutdown) { - long current = System.currentTimeMillis(); - for (HiveSession session : new ArrayList(handleToSession.values())) { - if (shutdown) { - break; - } - if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current - && (!checkOperation || session.getNoOperationTime() > sessionTimeout)) { - SessionHandle handle = session.getSessionHandle(); - LOG.warn("Session " + handle + " is Timed-out (last access : " + - new Date(session.getLastAccessTime()) + ") and will be closed"); - try { - closeSession(handle); - } catch (HiveSQLException e) { - LOG.warn("Exception is thrown closing session " + handle, e); - } - } else { - session.closeExpiredOperations(); - } - } - sleepFor(interval); - } - } - - private void sleepFor(long interval) { - synchronized (timeoutCheckerLock) { - try { - timeoutCheckerLock.wait(interval); - } catch (InterruptedException e) { - // Ignore, and break. - } - } - } - }; - backgroundOperationPool.execute(timeoutChecker); - } - - private void shutdownTimeoutChecker() { - shutdown = true; - synchronized (timeoutCheckerLock) { - timeoutCheckerLock.notify(); - } - } - - @Override - public synchronized void stop() { - super.stop(); - shutdownTimeoutChecker(); - if (backgroundOperationPool != null) { - backgroundOperationPool.shutdown(); - long timeout = hiveConf.getTimeVar( - ConfVars.HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT, TimeUnit.SECONDS); - try { - backgroundOperationPool.awaitTermination(timeout, TimeUnit.SECONDS); - } catch (InterruptedException e) { - LOG.warn("HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT = " + timeout + - " seconds has been exceeded. RUNNING background operations will be shut down", e); - } - backgroundOperationPool = null; - } - cleanupLoggingRootDir(); - } - - private void cleanupLoggingRootDir() { - if (isOperationLogEnabled) { - try { - FileUtils.forceDelete(operationLogRootDir); - } catch (Exception e) { - LOG.warn("Failed to cleanup root dir of HS2 logging: " + operationLogRootDir - .getAbsolutePath(), e); - } - } - } - - public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress, - Map sessionConf) throws HiveSQLException { - return openSession(protocol, username, password, ipAddress, sessionConf, false, null); - } - - /** - * Opens a new session and creates a session handle. - * The username passed to this method is the effective username. - * If withImpersonation is true (==doAs true) we wrap all the calls in HiveSession - * within a UGI.doAs, where UGI corresponds to the effective user. - * - * Please see {@code org.apache.hive.service.cli.thrift.ThriftCLIService.getUserName()} for - * more details. - * - * @param protocol - * @param username - * @param password - * @param ipAddress - * @param sessionConf - * @param withImpersonation - * @param delegationToken - * @return - * @throws HiveSQLException - */ - public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress, - Map sessionConf, boolean withImpersonation, String delegationToken) - throws HiveSQLException { - HiveSession session; - // If doAs is set to true for HiveServer2, we will create a proxy object for the session impl. - // Within the proxy object, we wrap the method call in a UserGroupInformation#doAs - if (withImpersonation) { - HiveSessionImplwithUGI sessionWithUGI = new HiveSessionImplwithUGI(protocol, username, password, - hiveConf, ipAddress, delegationToken); - session = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi()); - sessionWithUGI.setProxySession(session); - } else { - session = new HiveSessionImpl(protocol, username, password, hiveConf, ipAddress); - } - session.setSessionManager(this); - session.setOperationManager(operationManager); - try { - session.open(sessionConf); - } catch (Exception e) { - try { - session.close(); - } catch (Throwable t) { - LOG.warn("Error closing session", t); - } - session = null; - throw new HiveSQLException("Failed to open new session: " + e, e); - } - if (isOperationLogEnabled) { - session.setOperationLogSessionDir(operationLogRootDir); - } - handleToSession.put(session.getSessionHandle(), session); - return session.getSessionHandle(); - } - - public void closeSession(SessionHandle sessionHandle) throws HiveSQLException { - HiveSession session = handleToSession.remove(sessionHandle); - if (session == null) { - throw new HiveSQLException("Session does not exist!"); - } - session.close(); - } - - public HiveSession getSession(SessionHandle sessionHandle) throws HiveSQLException { - HiveSession session = handleToSession.get(sessionHandle); - if (session == null) { - throw new HiveSQLException("Invalid SessionHandle: " + sessionHandle); - } - return session; - } - - public OperationManager getOperationManager() { - return operationManager; - } - - private static ThreadLocal threadLocalIpAddress = new ThreadLocal() { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - public static void setIpAddress(String ipAddress) { - threadLocalIpAddress.set(ipAddress); - } - - public static void clearIpAddress() { - threadLocalIpAddress.remove(); - } - - public static String getIpAddress() { - return threadLocalIpAddress.get(); - } - - private static ThreadLocal threadLocalUserName = new ThreadLocal(){ - @Override - protected synchronized String initialValue() { - return null; - } - }; - - public static void setUserName(String userName) { - threadLocalUserName.set(userName); - } - - public static void clearUserName() { - threadLocalUserName.remove(); - } - - public static String getUserName() { - return threadLocalUserName.get(); - } - - private static ThreadLocal threadLocalProxyUserName = new ThreadLocal(){ - @Override - protected synchronized String initialValue() { - return null; - } - }; - - public static void setProxyUserName(String userName) { - LOG.debug("setting proxy user name based on query param to: " + userName); - threadLocalProxyUserName.set(userName); - } - - public static String getProxyUserName() { - return threadLocalProxyUserName.get(); - } - - public static void clearProxyUserName() { - threadLocalProxyUserName.remove(); - } - - public Future submitBackgroundOperation(Runnable r) { - return backgroundOperationPool.submit(r); - } - - public int getOpenSessionCount() { - return handleToSession.size(); - } -} - diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java deleted file mode 100644 index 00bdf7e19126e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.thrift; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hive.service.ServiceException; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.CLIService; -import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup; -import org.apache.thrift.TProcessorFactory; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.server.TThreadPoolServer; -import org.apache.thrift.transport.TServerSocket; -import org.apache.thrift.transport.TTransportFactory; - - -public class ThriftBinaryCLIService extends ThriftCLIService { - - public ThriftBinaryCLIService(CLIService cliService) { - super(cliService, ThriftBinaryCLIService.class.getSimpleName()); - } - - @Override - protected void initializeServer() { - try { - // Server thread pool - String threadPoolName = "HiveServer2-Handler-Pool"; - ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads, - workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue(), - new ThreadFactoryWithGarbageCleanup(threadPoolName)); - - // Thrift configs - hiveAuthFactory = new HiveAuthFactory(hiveConf); - TTransportFactory transportFactory = hiveAuthFactory.getAuthTransFactory(); - TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this); - TServerSocket serverSocket = null; - List sslVersionBlacklist = new ArrayList(); - for (String sslVersion : hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(",")) { - sslVersionBlacklist.add(sslVersion); - } - if (!hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL)) { - serverSocket = HiveAuthFactory.getServerSocket(hiveHost, portNum); - } else { - String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim(); - if (keyStorePath.isEmpty()) { - throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname - + " Not configured for SSL connection"); - } - String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf, - HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname); - serverSocket = HiveAuthFactory.getServerSSLSocket(hiveHost, portNum, keyStorePath, - keyStorePassword, sslVersionBlacklist); - } - - // In case HIVE_SERVER2_THRIFT_PORT or hive.server2.thrift.port is configured with 0 which - // represents any free port, we should set it to the actual one - portNum = serverSocket.getServerSocket().getLocalPort(); - - // Server args - int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE); - int requestTimeout = (int) hiveConf.getTimeVar( - HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS); - int beBackoffSlotLength = (int) hiveConf.getTimeVar( - HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS); - TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket) - .processorFactory(processorFactory).transportFactory(transportFactory) - .protocolFactory(new TBinaryProtocol.Factory()) - .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize)) - .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS) - .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS) - .executorService(executorService); - - // TCP Server - server = new TThreadPoolServer(sargs); - server.setServerEventHandler(serverEventHandler); - String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port " - + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; - LOG.info(msg); - } catch (Exception t) { - throw new ServiceException("Error initializing " + getName(), t); - } - } - - @Override - public void run() { - try { - server.serve(); - } catch (Throwable t) { - LOG.fatal( - "Error starting HiveServer2: could not start " - + ThriftBinaryCLIService.class.getSimpleName(), t); - System.exit(-1); - } - } - -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java deleted file mode 100644 index ff533769b5b84..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ /dev/null @@ -1,693 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.thrift; - -import javax.security.auth.login.LoginException; -import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hive.service.AbstractService; -import org.apache.hive.service.ServiceException; -import org.apache.hive.service.ServiceUtils; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.auth.TSetIpAddressProcessor; -import org.apache.hive.service.cli.*; -import org.apache.hive.service.cli.session.SessionManager; -import org.apache.hive.service.server.HiveServer2; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.server.ServerContext; -import org.apache.thrift.server.TServer; -import org.apache.thrift.server.TServerEventHandler; -import org.apache.thrift.transport.TTransport; - -/** - * ThriftCLIService. - * - */ -public abstract class ThriftCLIService extends AbstractService implements TCLIService.Iface, Runnable { - - public static final Log LOG = LogFactory.getLog(ThriftCLIService.class.getName()); - - protected CLIService cliService; - private static final TStatus OK_STATUS = new TStatus(TStatusCode.SUCCESS_STATUS); - protected static HiveAuthFactory hiveAuthFactory; - - protected int portNum; - protected InetAddress serverIPAddress; - protected String hiveHost; - protected TServer server; - protected org.eclipse.jetty.server.Server httpServer; - - private boolean isStarted = false; - protected boolean isEmbedded = false; - - protected HiveConf hiveConf; - - protected int minWorkerThreads; - protected int maxWorkerThreads; - protected long workerKeepAliveTime; - - protected TServerEventHandler serverEventHandler; - protected ThreadLocal currentServerContext; - - static class ThriftCLIServerContext implements ServerContext { - private SessionHandle sessionHandle = null; - - public void setSessionHandle(SessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public SessionHandle getSessionHandle() { - return sessionHandle; - } - } - - public ThriftCLIService(CLIService service, String serviceName) { - super(serviceName); - this.cliService = service; - currentServerContext = new ThreadLocal(); - serverEventHandler = new TServerEventHandler() { - @Override - public ServerContext createContext( - TProtocol input, TProtocol output) { - return new ThriftCLIServerContext(); - } - - @Override - public void deleteContext(ServerContext serverContext, - TProtocol input, TProtocol output) { - ThriftCLIServerContext context = (ThriftCLIServerContext)serverContext; - SessionHandle sessionHandle = context.getSessionHandle(); - if (sessionHandle != null) { - LOG.info("Session disconnected without closing properly, close it now"); - try { - cliService.closeSession(sessionHandle); - } catch (HiveSQLException e) { - LOG.warn("Failed to close session: " + e, e); - } - } - } - - @Override - public void preServe() { - } - - @Override - public void processContext(ServerContext serverContext, - TTransport input, TTransport output) { - currentServerContext.set(serverContext); - } - }; - } - - @Override - public synchronized void init(HiveConf hiveConf) { - this.hiveConf = hiveConf; - // Initialize common server configs needed in both binary & http modes - String portString; - hiveHost = System.getenv("HIVE_SERVER2_THRIFT_BIND_HOST"); - if (hiveHost == null) { - hiveHost = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST); - } - try { - if (hiveHost != null && !hiveHost.isEmpty()) { - serverIPAddress = InetAddress.getByName(hiveHost); - } else { - serverIPAddress = InetAddress.getLocalHost(); - } - } catch (UnknownHostException e) { - throw new ServiceException(e); - } - // HTTP mode - if (HiveServer2.isHTTPTransportMode(hiveConf)) { - workerKeepAliveTime = - hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME, - TimeUnit.SECONDS); - portString = System.getenv("HIVE_SERVER2_THRIFT_HTTP_PORT"); - if (portString != null) { - portNum = Integer.valueOf(portString); - } else { - portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT); - } - } - // Binary mode - else { - workerKeepAliveTime = - hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS); - portString = System.getenv("HIVE_SERVER2_THRIFT_PORT"); - if (portString != null) { - portNum = Integer.valueOf(portString); - } else { - portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT); - } - } - minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS); - maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS); - super.init(hiveConf); - } - - @Override - public synchronized void start() { - super.start(); - if (!isStarted && !isEmbedded) { - initializeServer(); - new Thread(this).start(); - isStarted = true; - } - } - - @Override - public synchronized void stop() { - if (isStarted && !isEmbedded) { - if(server != null) { - server.stop(); - LOG.info("Thrift server has stopped"); - } - if((httpServer != null) && httpServer.isStarted()) { - try { - httpServer.stop(); - LOG.info("Http server has stopped"); - } catch (Exception e) { - LOG.error("Error stopping Http server: ", e); - } - } - isStarted = false; - } - super.stop(); - } - - public int getPortNumber() { - return portNum; - } - - public InetAddress getServerIPAddress() { - return serverIPAddress; - } - - @Override - public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) - throws TException { - TGetDelegationTokenResp resp = new TGetDelegationTokenResp(); - resp.setStatus(notSupportTokenErrorStatus()); - return resp; - } - - @Override - public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) - throws TException { - TCancelDelegationTokenResp resp = new TCancelDelegationTokenResp(); - resp.setStatus(notSupportTokenErrorStatus()); - return resp; - } - - @Override - public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) - throws TException { - TRenewDelegationTokenResp resp = new TRenewDelegationTokenResp(); - resp.setStatus(notSupportTokenErrorStatus()); - return resp; - } - - private TStatus notSupportTokenErrorStatus() { - TStatus errorStatus = new TStatus(TStatusCode.ERROR_STATUS); - errorStatus.setErrorMessage("Delegation token is not supported"); - return errorStatus; - } - - @Override - public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException { - LOG.info("Client protocol version: " + req.getClient_protocol()); - TOpenSessionResp resp = new TOpenSessionResp(); - try { - SessionHandle sessionHandle = getSessionHandle(req, resp); - resp.setSessionHandle(sessionHandle.toTSessionHandle()); - // TODO: set real configuration map - resp.setConfiguration(new HashMap()); - resp.setStatus(OK_STATUS); - ThriftCLIServerContext context = - (ThriftCLIServerContext)currentServerContext.get(); - if (context != null) { - context.setSessionHandle(sessionHandle); - } - } catch (Exception e) { - LOG.warn("Error opening session: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - private String getIpAddress() { - String clientIpAddress; - // Http transport mode. - // We set the thread local ip address, in ThriftHttpServlet. - if (cliService.getHiveConf().getVar( - ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) { - clientIpAddress = SessionManager.getIpAddress(); - } - else { - // Kerberos - if (isKerberosAuthMode()) { - clientIpAddress = hiveAuthFactory.getIpAddress(); - } - // Except kerberos, NOSASL - else { - clientIpAddress = TSetIpAddressProcessor.getUserIpAddress(); - } - } - LOG.debug("Client's IP Address: " + clientIpAddress); - return clientIpAddress; - } - - /** - * Returns the effective username. - * 1. If hive.server2.allow.user.substitution = false: the username of the connecting user - * 2. If hive.server2.allow.user.substitution = true: the username of the end user, - * that the connecting user is trying to proxy for. - * This includes a check whether the connecting user is allowed to proxy for the end user. - * @param req - * @return - * @throws HiveSQLException - */ - private String getUserName(TOpenSessionReq req) throws HiveSQLException { - String userName = null; - // Kerberos - if (isKerberosAuthMode()) { - userName = hiveAuthFactory.getRemoteUser(); - } - // Except kerberos, NOSASL - if (userName == null) { - userName = TSetIpAddressProcessor.getUserName(); - } - // Http transport mode. - // We set the thread local username, in ThriftHttpServlet. - if (cliService.getHiveConf().getVar( - ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) { - userName = SessionManager.getUserName(); - } - if (userName == null) { - userName = req.getUsername(); - } - - userName = getShortName(userName); - String effectiveClientUser = getProxyUser(userName, req.getConfiguration(), getIpAddress()); - LOG.debug("Client's username: " + effectiveClientUser); - return effectiveClientUser; - } - - private String getShortName(String userName) { - String ret = null; - if (userName != null) { - int indexOfDomainMatch = ServiceUtils.indexOfDomainMatch(userName); - ret = (indexOfDomainMatch <= 0) ? userName : - userName.substring(0, indexOfDomainMatch); - } - - return ret; - } - - /** - * Create a session handle - * @param req - * @param res - * @return - * @throws HiveSQLException - * @throws LoginException - * @throws IOException - */ - SessionHandle getSessionHandle(TOpenSessionReq req, TOpenSessionResp res) - throws HiveSQLException, LoginException, IOException { - String userName = getUserName(req); - String ipAddress = getIpAddress(); - TProtocolVersion protocol = getMinVersion(CLIService.SERVER_VERSION, - req.getClient_protocol()); - res.setServerProtocolVersion(protocol); - SessionHandle sessionHandle; - if (cliService.getHiveConf().getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) && - (userName != null)) { - String delegationTokenStr = getDelegationToken(userName); - sessionHandle = cliService.openSessionWithImpersonation(protocol, userName, - req.getPassword(), ipAddress, req.getConfiguration(), delegationTokenStr); - } else { - sessionHandle = cliService.openSession(protocol, userName, req.getPassword(), - ipAddress, req.getConfiguration()); - } - return sessionHandle; - } - - - private String getDelegationToken(String userName) - throws HiveSQLException, LoginException, IOException { - if (userName == null || !cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION) - .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString())) { - return null; - } - try { - return cliService.getDelegationTokenFromMetaStore(userName); - } catch (UnsupportedOperationException e) { - // The delegation token is not applicable in the given deployment mode - } - return null; - } - - private TProtocolVersion getMinVersion(TProtocolVersion... versions) { - TProtocolVersion[] values = TProtocolVersion.values(); - int current = values[values.length - 1].getValue(); - for (TProtocolVersion version : versions) { - if (current > version.getValue()) { - current = version.getValue(); - } - } - for (TProtocolVersion version : values) { - if (version.getValue() == current) { - return version; - } - } - throw new IllegalArgumentException("never"); - } - - @Override - public TCloseSessionResp CloseSession(TCloseSessionReq req) throws TException { - TCloseSessionResp resp = new TCloseSessionResp(); - try { - SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle()); - cliService.closeSession(sessionHandle); - resp.setStatus(OK_STATUS); - ThriftCLIServerContext context = - (ThriftCLIServerContext)currentServerContext.get(); - if (context != null) { - context.setSessionHandle(null); - } - } catch (Exception e) { - LOG.warn("Error closing session: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetInfoResp GetInfo(TGetInfoReq req) throws TException { - TGetInfoResp resp = new TGetInfoResp(); - try { - GetInfoValue getInfoValue = - cliService.getInfo(new SessionHandle(req.getSessionHandle()), - GetInfoType.getGetInfoType(req.getInfoType())); - resp.setInfoValue(getInfoValue.toTGetInfoValue()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting info: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws TException { - TExecuteStatementResp resp = new TExecuteStatementResp(); - try { - SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle()); - String statement = req.getStatement(); - Map confOverlay = req.getConfOverlay(); - Boolean runAsync = req.isRunAsync(); - OperationHandle operationHandle = runAsync ? - cliService.executeStatementAsync(sessionHandle, statement, confOverlay) - : cliService.executeStatement(sessionHandle, statement, confOverlay); - resp.setOperationHandle(operationHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error executing statement: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws TException { - TGetTypeInfoResp resp = new TGetTypeInfoResp(); - try { - OperationHandle operationHandle = cliService.getTypeInfo(new SessionHandle(req.getSessionHandle())); - resp.setOperationHandle(operationHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting type info: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws TException { - TGetCatalogsResp resp = new TGetCatalogsResp(); - try { - OperationHandle opHandle = cliService.getCatalogs(new SessionHandle(req.getSessionHandle())); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting catalogs: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws TException { - TGetSchemasResp resp = new TGetSchemasResp(); - try { - OperationHandle opHandle = cliService.getSchemas( - new SessionHandle(req.getSessionHandle()), req.getCatalogName(), req.getSchemaName()); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting schemas: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetTablesResp GetTables(TGetTablesReq req) throws TException { - TGetTablesResp resp = new TGetTablesResp(); - try { - OperationHandle opHandle = cliService - .getTables(new SessionHandle(req.getSessionHandle()), req.getCatalogName(), - req.getSchemaName(), req.getTableName(), req.getTableTypes()); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting tables: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws TException { - TGetTableTypesResp resp = new TGetTableTypesResp(); - try { - OperationHandle opHandle = cliService.getTableTypes(new SessionHandle(req.getSessionHandle())); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting table types: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetColumnsResp GetColumns(TGetColumnsReq req) throws TException { - TGetColumnsResp resp = new TGetColumnsResp(); - try { - OperationHandle opHandle = cliService.getColumns( - new SessionHandle(req.getSessionHandle()), - req.getCatalogName(), - req.getSchemaName(), - req.getTableName(), - req.getColumnName()); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting columns: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws TException { - TGetFunctionsResp resp = new TGetFunctionsResp(); - try { - OperationHandle opHandle = cliService.getFunctions( - new SessionHandle(req.getSessionHandle()), req.getCatalogName(), - req.getSchemaName(), req.getFunctionName()); - resp.setOperationHandle(opHandle.toTOperationHandle()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting functions: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws TException { - TGetOperationStatusResp resp = new TGetOperationStatusResp(); - try { - OperationStatus operationStatus = cliService.getOperationStatus( - new OperationHandle(req.getOperationHandle())); - resp.setOperationState(operationStatus.getState().toTOperationState()); - HiveSQLException opException = operationStatus.getOperationException(); - if (opException != null) { - resp.setSqlState(opException.getSQLState()); - resp.setErrorCode(opException.getErrorCode()); - resp.setErrorMessage(org.apache.hadoop.util.StringUtils - .stringifyException(opException)); - } - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting operation status: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws TException { - TCancelOperationResp resp = new TCancelOperationResp(); - try { - cliService.cancelOperation(new OperationHandle(req.getOperationHandle())); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error cancelling operation: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws TException { - TCloseOperationResp resp = new TCloseOperationResp(); - try { - cliService.closeOperation(new OperationHandle(req.getOperationHandle())); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error closing operation: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) - throws TException { - TGetResultSetMetadataResp resp = new TGetResultSetMetadataResp(); - try { - TableSchema schema = cliService.getResultSetMetadata(new OperationHandle(req.getOperationHandle())); - resp.setSchema(schema.toTTableSchema()); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error getting result set metadata: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - @Override - public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException { - TFetchResultsResp resp = new TFetchResultsResp(); - try { - RowSet rowSet = cliService.fetchResults( - new OperationHandle(req.getOperationHandle()), - FetchOrientation.getFetchOrientation(req.getOrientation()), - req.getMaxRows(), - FetchType.getFetchType(req.getFetchType())); - resp.setResults(rowSet.toTRowSet()); - resp.setHasMoreRows(false); - resp.setStatus(OK_STATUS); - } catch (Exception e) { - LOG.warn("Error fetching results: ", e); - resp.setStatus(HiveSQLException.toTStatus(e)); - } - return resp; - } - - protected abstract void initializeServer(); - - @Override - public abstract void run(); - - /** - * If the proxy user name is provided then check privileges to substitute the user. - * @param realUser - * @param sessionConf - * @param ipAddress - * @return - * @throws HiveSQLException - */ - private String getProxyUser(String realUser, Map sessionConf, - String ipAddress) throws HiveSQLException { - String proxyUser = null; - // Http transport mode. - // We set the thread local proxy username, in ThriftHttpServlet. - if (cliService.getHiveConf().getVar( - ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) { - proxyUser = SessionManager.getProxyUserName(); - LOG.debug("Proxy user from query string: " + proxyUser); - } - - if (proxyUser == null && sessionConf != null && sessionConf.containsKey(HiveAuthFactory.HS2_PROXY_USER)) { - String proxyUserFromThriftBody = sessionConf.get(HiveAuthFactory.HS2_PROXY_USER); - LOG.debug("Proxy user from thrift body: " + proxyUserFromThriftBody); - proxyUser = proxyUserFromThriftBody; - } - - if (proxyUser == null) { - return realUser; - } - - // check whether substitution is allowed - if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ALLOW_USER_SUBSTITUTION)) { - throw new HiveSQLException("Proxy user substitution is not allowed"); - } - - // If there's no authentication, then directly substitute the user - if (HiveAuthFactory.AuthTypes.NONE.toString() - .equalsIgnoreCase(hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION))) { - return proxyUser; - } - - // Verify proxy user privilege of the realUser for the proxyUser - HiveAuthFactory.verifyProxyAccess(realUser, proxyUser, ipAddress, hiveConf); - LOG.debug("Verified proxy user: " + proxyUser); - return proxyUser; - } - - private boolean isKerberosAuthMode() { - return cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION) - .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString()); - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java deleted file mode 100644 index 1af45398b895c..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java +++ /dev/null @@ -1,440 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.thrift; - -import java.util.List; -import java.util.Map; - -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.*; -import org.apache.thrift.TException; - -/** - * ThriftCLIServiceClient. - * - */ -public class ThriftCLIServiceClient extends CLIServiceClient { - private final TCLIService.Iface cliService; - - public ThriftCLIServiceClient(TCLIService.Iface cliService) { - this.cliService = cliService; - } - - public void checkStatus(TStatus status) throws HiveSQLException { - if (TStatusCode.ERROR_STATUS.equals(status.getStatusCode())) { - throw new HiveSQLException(status); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map) - */ - @Override - public SessionHandle openSession(String username, String password, - Map configuration) - throws HiveSQLException { - try { - TOpenSessionReq req = new TOpenSessionReq(); - req.setUsername(username); - req.setPassword(password); - req.setConfiguration(configuration); - TOpenSessionResp resp = cliService.OpenSession(req); - checkStatus(resp.getStatus()); - return new SessionHandle(resp.getSessionHandle(), resp.getServerProtocolVersion()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public SessionHandle openSessionWithImpersonation(String username, String password, - Map configuration, String delegationToken) throws HiveSQLException { - throw new HiveSQLException("open with impersonation operation is not supported in the client"); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public void closeSession(SessionHandle sessionHandle) throws HiveSQLException { - try { - TCloseSessionReq req = new TCloseSessionReq(sessionHandle.toTSessionHandle()); - TCloseSessionResp resp = cliService.CloseSession(req); - checkStatus(resp.getStatus()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List) - */ - @Override - public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType) - throws HiveSQLException { - try { - // FIXME extract the right info type - TGetInfoReq req = new TGetInfoReq(sessionHandle.toTSessionHandle(), infoType.toTGetInfoType()); - TGetInfoResp resp = cliService.GetInfo(req); - checkStatus(resp.getStatus()); - return new GetInfoValue(resp.getInfoValue()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatement(SessionHandle sessionHandle, String statement, - Map confOverlay) - throws HiveSQLException { - return executeStatementInternal(sessionHandle, statement, confOverlay, false); - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map) - */ - @Override - public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement, - Map confOverlay) - throws HiveSQLException { - return executeStatementInternal(sessionHandle, statement, confOverlay, true); - } - - private OperationHandle executeStatementInternal(SessionHandle sessionHandle, String statement, - Map confOverlay, boolean isAsync) - throws HiveSQLException { - try { - TExecuteStatementReq req = - new TExecuteStatementReq(sessionHandle.toTSessionHandle(), statement); - req.setConfOverlay(confOverlay); - req.setRunAsync(isAsync); - TExecuteStatementResp resp = cliService.ExecuteStatement(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException { - try { - TGetTypeInfoReq req = new TGetTypeInfoReq(sessionHandle.toTSessionHandle()); - TGetTypeInfoResp resp = cliService.GetTypeInfo(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException { - try { - TGetCatalogsReq req = new TGetCatalogsReq(sessionHandle.toTSessionHandle()); - TGetCatalogsResp resp = cliService.GetCatalogs(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String) - */ - @Override - public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName, - String schemaName) - throws HiveSQLException { - try { - TGetSchemasReq req = new TGetSchemasReq(sessionHandle.toTSessionHandle()); - req.setCatalogName(catalogName); - req.setSchemaName(schemaName); - TGetSchemasResp resp = cliService.GetSchemas(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List) - */ - @Override - public OperationHandle getTables(SessionHandle sessionHandle, String catalogName, - String schemaName, String tableName, List tableTypes) - throws HiveSQLException { - try { - TGetTablesReq req = new TGetTablesReq(sessionHandle.toTSessionHandle()); - req.setTableName(tableName); - req.setTableTypes(tableTypes); - req.setSchemaName(schemaName); - TGetTablesResp resp = cliService.GetTables(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException { - try { - TGetTableTypesReq req = new TGetTableTypesReq(sessionHandle.toTSessionHandle()); - TGetTableTypesResp resp = cliService.GetTableTypes(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getColumns(SessionHandle sessionHandle, - String catalogName, String schemaName, String tableName, String columnName) - throws HiveSQLException { - try { - TGetColumnsReq req = new TGetColumnsReq(); - req.setSessionHandle(sessionHandle.toTSessionHandle()); - req.setCatalogName(catalogName); - req.setSchemaName(schemaName); - req.setTableName(tableName); - req.setColumnName(columnName); - TGetColumnsResp resp = cliService.GetColumns(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle) - */ - @Override - public OperationHandle getFunctions(SessionHandle sessionHandle, - String catalogName, String schemaName, String functionName) throws HiveSQLException { - try { - TGetFunctionsReq req = new TGetFunctionsReq(sessionHandle.toTSessionHandle(), functionName); - req.setCatalogName(catalogName); - req.setSchemaName(schemaName); - TGetFunctionsResp resp = cliService.GetFunctions(req); - checkStatus(resp.getStatus()); - TProtocolVersion protocol = sessionHandle.getProtocolVersion(); - return new OperationHandle(resp.getOperationHandle(), protocol); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException { - try { - TGetOperationStatusReq req = new TGetOperationStatusReq(opHandle.toTOperationHandle()); - TGetOperationStatusResp resp = cliService.GetOperationStatus(req); - // Checks the status of the RPC call, throws an exception in case of error - checkStatus(resp.getStatus()); - OperationState opState = OperationState.getOperationState(resp.getOperationState()); - HiveSQLException opException = null; - if (opState == OperationState.ERROR) { - opException = new HiveSQLException(resp.getErrorMessage(), resp.getSqlState(), resp.getErrorCode()); - } - return new OperationStatus(opState, opException); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void cancelOperation(OperationHandle opHandle) throws HiveSQLException { - try { - TCancelOperationReq req = new TCancelOperationReq(opHandle.toTOperationHandle()); - TCancelOperationResp resp = cliService.CancelOperation(req); - checkStatus(resp.getStatus()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public void closeOperation(OperationHandle opHandle) - throws HiveSQLException { - try { - TCloseOperationReq req = new TCloseOperationReq(opHandle.toTOperationHandle()); - TCloseOperationResp resp = cliService.CloseOperation(req); - checkStatus(resp.getStatus()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public TableSchema getResultSetMetadata(OperationHandle opHandle) - throws HiveSQLException { - try { - TGetResultSetMetadataReq req = new TGetResultSetMetadataReq(opHandle.toTOperationHandle()); - TGetResultSetMetadataResp resp = cliService.GetResultSetMetadata(req); - checkStatus(resp.getStatus()); - return new TableSchema(resp.getSchema()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - @Override - public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, long maxRows, - FetchType fetchType) throws HiveSQLException { - try { - TFetchResultsReq req = new TFetchResultsReq(); - req.setOperationHandle(opHandle.toTOperationHandle()); - req.setOrientation(orientation.toTFetchOrientation()); - req.setMaxRows(maxRows); - req.setFetchType(fetchType.toTFetchType()); - TFetchResultsResp resp = cliService.FetchResults(req); - checkStatus(resp.getStatus()); - return RowSetFactory.create(resp.getResults(), opHandle.getProtocolVersion()); - } catch (HiveSQLException e) { - throw e; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - /* (non-Javadoc) - * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle) - */ - @Override - public RowSet fetchResults(OperationHandle opHandle) throws HiveSQLException { - // TODO: set the correct default fetch size - return fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 10000, FetchType.QUERY_OUTPUT); - } - - @Override - public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String owner, String renewer) throws HiveSQLException { - TGetDelegationTokenReq req = new TGetDelegationTokenReq( - sessionHandle.toTSessionHandle(), owner, renewer); - try { - TGetDelegationTokenResp tokenResp = cliService.GetDelegationToken(req); - checkStatus(tokenResp.getStatus()); - return tokenResp.getDelegationToken(); - } catch (Exception e) { - throw new HiveSQLException(e); - } - } - - @Override - public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - TCancelDelegationTokenReq cancelReq = new TCancelDelegationTokenReq( - sessionHandle.toTSessionHandle(), tokenStr); - try { - TCancelDelegationTokenResp cancelResp = - cliService.CancelDelegationToken(cancelReq); - checkStatus(cancelResp.getStatus()); - return; - } catch (TException e) { - throw new HiveSQLException(e); - } - } - - @Override - public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, - String tokenStr) throws HiveSQLException { - TRenewDelegationTokenReq cancelReq = new TRenewDelegationTokenReq( - sessionHandle.toTSessionHandle(), tokenStr); - try { - TRenewDelegationTokenResp renewResp = - cliService.RenewDelegationToken(cancelReq); - checkStatus(renewResp.getStatus()); - return; - } catch (Exception e) { - throw new HiveSQLException(e); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java deleted file mode 100644 index bd64c777c1d76..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.thrift; - -import java.util.Arrays; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.Shell; -import org.apache.hive.service.ServiceException; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.cli.CLIService; -import org.apache.hive.service.cli.thrift.TCLIService.Iface; -import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup; -import org.apache.thrift.TProcessor; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TProtocolFactory; -import org.apache.thrift.server.TServlet; -import org.eclipse.jetty.server.AbstractConnectionFactory; -import org.eclipse.jetty.server.ConnectionFactory; -import org.eclipse.jetty.server.HttpConnectionFactory; -import org.eclipse.jetty.server.ServerConnector; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; -import org.eclipse.jetty.util.ssl.SslContextFactory; -import org.eclipse.jetty.util.thread.ExecutorThreadPool; -import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler; - - -public class ThriftHttpCLIService extends ThriftCLIService { - - public ThriftHttpCLIService(CLIService cliService) { - super(cliService, ThriftHttpCLIService.class.getSimpleName()); - } - - @Override - protected void initializeServer() { - try { - // Server thread pool - // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests - String threadPoolName = "HiveServer2-HttpHandler-Pool"; - ThreadPoolExecutor executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads, - workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue(), - new ThreadFactoryWithGarbageCleanup(threadPoolName)); - ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService); - - // HTTP Server - httpServer = new org.eclipse.jetty.server.Server(threadPool); - - // Connector configs - - ConnectionFactory[] connectionFactories; - boolean useSsl = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL); - String schemeName = useSsl ? "https" : "http"; - // Change connector if SSL is used - if (useSsl) { - String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim(); - String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf, - HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname); - if (keyStorePath.isEmpty()) { - throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname - + " Not configured for SSL connection"); - } - SslContextFactory sslContextFactory = new SslContextFactory.Server(); - String[] excludedProtocols = hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(","); - LOG.info("HTTP Server SSL: adding excluded protocols: " + Arrays.toString(excludedProtocols)); - sslContextFactory.addExcludeProtocols(excludedProtocols); - LOG.info("HTTP Server SSL: SslContextFactory.getExcludeProtocols = " + - Arrays.toString(sslContextFactory.getExcludeProtocols())); - sslContextFactory.setKeyStorePath(keyStorePath); - sslContextFactory.setKeyStorePassword(keyStorePassword); - connectionFactories = AbstractConnectionFactory.getFactories( - sslContextFactory, new HttpConnectionFactory()); - } else { - connectionFactories = new ConnectionFactory[] { new HttpConnectionFactory() }; - } - ServerConnector connector = new ServerConnector( - httpServer, - null, - // Call this full constructor to set this, which forces daemon threads: - new ScheduledExecutorScheduler("HiveServer2-HttpHandler-JettyScheduler", true), - null, - -1, - -1, - connectionFactories); - - connector.setPort(portNum); - // Linux:yes, Windows:no - connector.setReuseAddress(!Shell.WINDOWS); - int maxIdleTime = (int) hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME, - TimeUnit.MILLISECONDS); - connector.setIdleTimeout(maxIdleTime); - - httpServer.addConnector(connector); - - // Thrift configs - hiveAuthFactory = new HiveAuthFactory(hiveConf); - TProcessor processor = new TCLIService.Processor(this); - TProtocolFactory protocolFactory = new TBinaryProtocol.Factory(); - // Set during the init phase of HiveServer2 if auth mode is kerberos - // UGI for the hive/_HOST (kerberos) principal - UserGroupInformation serviceUGI = cliService.getServiceUGI(); - // UGI for the http/_HOST (SPNego) principal - UserGroupInformation httpUGI = cliService.getHttpUGI(); - String authType = hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); - TServlet thriftHttpServlet = new ThriftHttpServlet(processor, protocolFactory, authType, - serviceUGI, httpUGI); - - // Context handler - final ServletContextHandler context = new ServletContextHandler( - ServletContextHandler.SESSIONS); - context.setContextPath("/"); - String httpPath = getHttpPath(hiveConf - .getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH)); - httpServer.setHandler(context); - context.addServlet(new ServletHolder(thriftHttpServlet), httpPath); - - // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc. - // Finally, start the server - httpServer.start(); - // In case HIVE_SERVER2_THRIFT_HTTP_PORT or hive.server2.thrift.http.port is configured with - // 0 which represents any free port, we should set it to the actual one - portNum = connector.getLocalPort(); - String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName - + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..." - + maxWorkerThreads + " worker threads"; - LOG.info(msg); - } catch (Exception t) { - throw new ServiceException("Error initializing " + getName(), t); - } - } - - /** - * Configure Jetty to serve http requests. Example of a client connection URL: - * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ, - * e.g. http://gateway:port/hive2/servlets/thrifths2/ - */ - @Override - public void run() { - try { - httpServer.join(); - } catch (Throwable t) { - LOG.fatal( - "Error starting HiveServer2: could not start " - + ThriftHttpCLIService.class.getSimpleName(), t); - System.exit(-1); - } - } - - /** - * The config parameter can be like "path", "/path", "/path/", "path/*", "/path1/path2/*" and so on. - * httpPath should end up as "/*", "/path/*" or "/path1/../pathN/*" - * @param httpPath - * @return - */ - private String getHttpPath(String httpPath) { - if(httpPath == null || httpPath.equals("")) { - httpPath = "/*"; - } - else { - if(!httpPath.startsWith("/")) { - httpPath = "/" + httpPath; - } - if(httpPath.endsWith("/")) { - httpPath = httpPath + "*"; - } - if(!httpPath.endsWith("/*")) { - httpPath = httpPath + "/*"; - } - } - return httpPath; - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java deleted file mode 100644 index e15d2d0566d2b..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java +++ /dev/null @@ -1,545 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.cli.thrift; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.security.PrivilegedExceptionAction; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -import javax.servlet.ServletException; -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import javax.ws.rs.core.NewCookie; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.service.auth.AuthenticationProviderFactory; -import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods; -import org.apache.hive.service.auth.HiveAuthFactory; -import org.apache.hive.service.auth.HttpAuthUtils; -import org.apache.hive.service.auth.HttpAuthenticationException; -import org.apache.hive.service.auth.PasswdAuthenticationProvider; -import org.apache.hive.service.cli.session.SessionManager; -import org.apache.hive.service.CookieSigner; -import org.apache.thrift.TProcessor; -import org.apache.thrift.protocol.TProtocolFactory; -import org.apache.thrift.server.TServlet; -import org.ietf.jgss.GSSContext; -import org.ietf.jgss.GSSCredential; -import org.ietf.jgss.GSSException; -import org.ietf.jgss.GSSManager; -import org.ietf.jgss.GSSName; -import org.ietf.jgss.Oid; - -/** - * - * ThriftHttpServlet - * - */ -public class ThriftHttpServlet extends TServlet { - - private static final long serialVersionUID = 1L; - public static final Log LOG = LogFactory.getLog(ThriftHttpServlet.class.getName()); - private final String authType; - private final UserGroupInformation serviceUGI; - private final UserGroupInformation httpUGI; - private HiveConf hiveConf = new HiveConf(); - - // Class members for cookie based authentication. - private CookieSigner signer; - public static final String AUTH_COOKIE = "hive.server2.auth"; - private static final Random RAN = new Random(); - private boolean isCookieAuthEnabled; - private String cookieDomain; - private String cookiePath; - private int cookieMaxAge; - private boolean isCookieSecure; - private boolean isHttpOnlyCookie; - - public ThriftHttpServlet(TProcessor processor, TProtocolFactory protocolFactory, - String authType, UserGroupInformation serviceUGI, UserGroupInformation httpUGI) { - super(processor, protocolFactory); - this.authType = authType; - this.serviceUGI = serviceUGI; - this.httpUGI = httpUGI; - this.isCookieAuthEnabled = hiveConf.getBoolVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED); - // Initialize the cookie based authentication related variables. - if (isCookieAuthEnabled) { - // Generate the signer with secret. - String secret = Long.toString(RAN.nextLong()); - LOG.debug("Using the random number as the secret for cookie generation " + secret); - this.signer = new CookieSigner(secret.getBytes()); - this.cookieMaxAge = (int) hiveConf.getTimeVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS); - this.cookieDomain = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN); - this.cookiePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH); - this.isCookieSecure = hiveConf.getBoolVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE); - this.isHttpOnlyCookie = hiveConf.getBoolVar( - ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY); - } - } - - @Override - protected void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - String clientUserName = null; - String clientIpAddress; - boolean requireNewCookie = false; - - try { - // If the cookie based authentication is already enabled, parse the - // request and validate the request cookies. - if (isCookieAuthEnabled) { - clientUserName = validateCookie(request); - requireNewCookie = (clientUserName == null); - if (requireNewCookie) { - LOG.info("Could not validate cookie sent, will try to generate a new cookie"); - } - } - // If the cookie based authentication is not enabled or the request does - // not have a valid cookie, use the kerberos or password based authentication - // depending on the server setup. - if (clientUserName == null) { - // For a kerberos setup - if (isKerberosAuthMode(authType)) { - clientUserName = doKerberosAuth(request); - } - // For password based authentication - else { - clientUserName = doPasswdAuth(request, authType); - } - } - LOG.debug("Client username: " + clientUserName); - - // Set the thread local username to be used for doAs if true - SessionManager.setUserName(clientUserName); - - // find proxy user if any from query param - String doAsQueryParam = getDoAsQueryParam(request.getQueryString()); - if (doAsQueryParam != null) { - SessionManager.setProxyUserName(doAsQueryParam); - } - - clientIpAddress = request.getRemoteAddr(); - LOG.debug("Client IP Address: " + clientIpAddress); - // Set the thread local ip address - SessionManager.setIpAddress(clientIpAddress); - // Generate new cookie and add it to the response - if (requireNewCookie && - !authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) { - String cookieToken = HttpAuthUtils.createCookieToken(clientUserName); - Cookie hs2Cookie = createCookie(signer.signCookie(cookieToken)); - - if (isHttpOnlyCookie) { - response.setHeader("SET-COOKIE", getHttpOnlyCookieHeader(hs2Cookie)); - } else { - response.addCookie(hs2Cookie); - } - LOG.info("Cookie added for clientUserName " + clientUserName); - } - super.doPost(request, response); - } - catch (HttpAuthenticationException e) { - LOG.error("Error: ", e); - // Send a 401 to the client - response.setStatus(HttpServletResponse.SC_UNAUTHORIZED); - if(isKerberosAuthMode(authType)) { - response.addHeader(HttpAuthUtils.WWW_AUTHENTICATE, HttpAuthUtils.NEGOTIATE); - } - response.getWriter().println("Authentication Error: " + e.getMessage()); - } - finally { - // Clear the thread locals - SessionManager.clearUserName(); - SessionManager.clearIpAddress(); - SessionManager.clearProxyUserName(); - } - } - - /** - * Retrieves the client name from cookieString. If the cookie does not - * correspond to a valid client, the function returns null. - * @param cookies HTTP Request cookies. - * @return Client Username if cookieString has a HS2 Generated cookie that is currently valid. - * Else, returns null. - */ - private String getClientNameFromCookie(Cookie[] cookies) { - // Current Cookie Name, Current Cookie Value - String currName, currValue; - - // Following is the main loop which iterates through all the cookies send by the client. - // The HS2 generated cookies are of the format hive.server2.auth= - // A cookie which is identified as a hiveserver2 generated cookie is validated - // by calling signer.verifyAndExtract(). If the validation passes, send the - // username for which the cookie is validated to the caller. If no client side - // cookie passes the validation, return null to the caller. - for (Cookie currCookie : cookies) { - // Get the cookie name - currName = currCookie.getName(); - if (!currName.equals(AUTH_COOKIE)) { - // Not a HS2 generated cookie, continue. - continue; - } - // If we reached here, we have match for HS2 generated cookie - currValue = currCookie.getValue(); - // Validate the value. - currValue = signer.verifyAndExtract(currValue); - // Retrieve the user name, do the final validation step. - if (currValue != null) { - String userName = HttpAuthUtils.getUserNameFromCookieToken(currValue); - - if (userName == null) { - LOG.warn("Invalid cookie token " + currValue); - continue; - } - //We have found a valid cookie in the client request. - if (LOG.isDebugEnabled()) { - LOG.debug("Validated the cookie for user " + userName); - } - return userName; - } - } - // No valid HS2 generated cookies found, return null - return null; - } - - /** - * Convert cookie array to human readable cookie string - * @param cookies Cookie Array - * @return String containing all the cookies separated by a newline character. - * Each cookie is of the format [key]=[value] - */ - private String toCookieStr(Cookie[] cookies) { - String cookieStr = ""; - - for (Cookie c : cookies) { - cookieStr += c.getName() + "=" + c.getValue() + " ;\n"; - } - return cookieStr; - } - - /** - * Validate the request cookie. This function iterates over the request cookie headers - * and finds a cookie that represents a valid client/server session. If it finds one, it - * returns the client name associated with the session. Else, it returns null. - * @param request The HTTP Servlet Request send by the client - * @return Client Username if the request has valid HS2 cookie, else returns null - * @throws UnsupportedEncodingException - */ - private String validateCookie(HttpServletRequest request) throws UnsupportedEncodingException { - // Find all the valid cookies associated with the request. - Cookie[] cookies = request.getCookies(); - - if (cookies == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("No valid cookies associated with the request " + request); - } - return null; - } - if (LOG.isDebugEnabled()) { - LOG.debug("Received cookies: " + toCookieStr(cookies)); - } - return getClientNameFromCookie(cookies); - } - - /** - * Generate a server side cookie given the cookie value as the input. - * @param str Input string token. - * @return The generated cookie. - * @throws UnsupportedEncodingException - */ - private Cookie createCookie(String str) throws UnsupportedEncodingException { - if (LOG.isDebugEnabled()) { - LOG.debug("Cookie name = " + AUTH_COOKIE + " value = " + str); - } - Cookie cookie = new Cookie(AUTH_COOKIE, str); - - cookie.setMaxAge(cookieMaxAge); - if (cookieDomain != null) { - cookie.setDomain(cookieDomain); - } - if (cookiePath != null) { - cookie.setPath(cookiePath); - } - cookie.setSecure(isCookieSecure); - return cookie; - } - - /** - * Generate httponly cookie from HS2 cookie - * @param cookie HS2 generated cookie - * @return The httponly cookie - */ - private static String getHttpOnlyCookieHeader(Cookie cookie) { - NewCookie newCookie = new NewCookie(cookie.getName(), cookie.getValue(), - cookie.getPath(), cookie.getDomain(), cookie.getVersion(), - cookie.getComment(), cookie.getMaxAge(), cookie.getSecure()); - return newCookie + "; HttpOnly"; - } - - /** - * Do the LDAP/PAM authentication - * @param request - * @param authType - * @throws HttpAuthenticationException - */ - private String doPasswdAuth(HttpServletRequest request, String authType) - throws HttpAuthenticationException { - String userName = getUsername(request, authType); - // No-op when authType is NOSASL - if (!authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) { - try { - AuthMethods authMethod = AuthMethods.getValidAuthMethod(authType); - PasswdAuthenticationProvider provider = - AuthenticationProviderFactory.getAuthenticationProvider(authMethod); - provider.Authenticate(userName, getPassword(request, authType)); - - } catch (Exception e) { - throw new HttpAuthenticationException(e); - } - } - return userName; - } - - /** - * Do the GSS-API kerberos authentication. - * We already have a logged in subject in the form of serviceUGI, - * which GSS-API will extract information from. - * In case of a SPNego request we use the httpUGI, - * for the authenticating service tickets. - * @param request - * @return - * @throws HttpAuthenticationException - */ - private String doKerberosAuth(HttpServletRequest request) - throws HttpAuthenticationException { - // Try authenticating with the http/_HOST principal - if (httpUGI != null) { - try { - return httpUGI.doAs(new HttpKerberosServerAction(request, httpUGI)); - } catch (Exception e) { - LOG.info("Failed to authenticate with http/_HOST kerberos principal, " + - "trying with hive/_HOST kerberos principal"); - } - } - // Now try with hive/_HOST principal - try { - return serviceUGI.doAs(new HttpKerberosServerAction(request, serviceUGI)); - } catch (Exception e) { - LOG.error("Failed to authenticate with hive/_HOST kerberos principal"); - throw new HttpAuthenticationException(e); - } - - } - - class HttpKerberosServerAction implements PrivilegedExceptionAction { - HttpServletRequest request; - UserGroupInformation serviceUGI; - - HttpKerberosServerAction(HttpServletRequest request, - UserGroupInformation serviceUGI) { - this.request = request; - this.serviceUGI = serviceUGI; - } - - @Override - public String run() throws HttpAuthenticationException { - // Get own Kerberos credentials for accepting connection - GSSManager manager = GSSManager.getInstance(); - GSSContext gssContext = null; - String serverPrincipal = getPrincipalWithoutRealm( - serviceUGI.getUserName()); - try { - // This Oid for Kerberos GSS-API mechanism. - Oid kerberosMechOid = new Oid("1.2.840.113554.1.2.2"); - // Oid for SPNego GSS-API mechanism. - Oid spnegoMechOid = new Oid("1.3.6.1.5.5.2"); - // Oid for kerberos principal name - Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1"); - - // GSS name for server - GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid); - - // GSS credentials for server - GSSCredential serverCreds = manager.createCredential(serverName, - GSSCredential.DEFAULT_LIFETIME, - new Oid[]{kerberosMechOid, spnegoMechOid}, - GSSCredential.ACCEPT_ONLY); - - // Create a GSS context - gssContext = manager.createContext(serverCreds); - // Get service ticket from the authorization header - String serviceTicketBase64 = getAuthHeader(request, authType); - byte[] inToken = Base64.decodeBase64(serviceTicketBase64.getBytes()); - gssContext.acceptSecContext(inToken, 0, inToken.length); - // Authenticate or deny based on its context completion - if (!gssContext.isEstablished()) { - throw new HttpAuthenticationException("Kerberos authentication failed: " + - "unable to establish context with the service ticket " + - "provided by the client."); - } - else { - return getPrincipalWithoutRealmAndHost(gssContext.getSrcName().toString()); - } - } - catch (GSSException e) { - throw new HttpAuthenticationException("Kerberos authentication failed: ", e); - } - finally { - if (gssContext != null) { - try { - gssContext.dispose(); - } catch (GSSException e) { - // No-op - } - } - } - } - - private String getPrincipalWithoutRealm(String fullPrincipal) - throws HttpAuthenticationException { - KerberosNameShim fullKerberosName; - try { - fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal); - } catch (IOException e) { - throw new HttpAuthenticationException(e); - } - String serviceName = fullKerberosName.getServiceName(); - String hostName = fullKerberosName.getHostName(); - String principalWithoutRealm = serviceName; - if (hostName != null) { - principalWithoutRealm = serviceName + "/" + hostName; - } - return principalWithoutRealm; - } - - private String getPrincipalWithoutRealmAndHost(String fullPrincipal) - throws HttpAuthenticationException { - KerberosNameShim fullKerberosName; - try { - fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal); - return fullKerberosName.getShortName(); - } catch (IOException e) { - throw new HttpAuthenticationException(e); - } - } - } - - private String getUsername(HttpServletRequest request, String authType) - throws HttpAuthenticationException { - String[] creds = getAuthHeaderTokens(request, authType); - // Username must be present - if (creds[0] == null || creds[0].isEmpty()) { - throw new HttpAuthenticationException("Authorization header received " + - "from the client does not contain username."); - } - return creds[0]; - } - - private String getPassword(HttpServletRequest request, String authType) - throws HttpAuthenticationException { - String[] creds = getAuthHeaderTokens(request, authType); - // Password must be present - if (creds[1] == null || creds[1].isEmpty()) { - throw new HttpAuthenticationException("Authorization header received " + - "from the client does not contain username."); - } - return creds[1]; - } - - private String[] getAuthHeaderTokens(HttpServletRequest request, - String authType) throws HttpAuthenticationException { - String authHeaderBase64 = getAuthHeader(request, authType); - String authHeaderString = StringUtils.newStringUtf8( - Base64.decodeBase64(authHeaderBase64.getBytes())); - String[] creds = authHeaderString.split(":"); - return creds; - } - - /** - * Returns the base64 encoded auth header payload - * @param request - * @param authType - * @return - * @throws HttpAuthenticationException - */ - private String getAuthHeader(HttpServletRequest request, String authType) - throws HttpAuthenticationException { - String authHeader = request.getHeader(HttpAuthUtils.AUTHORIZATION); - // Each http request must have an Authorization header - if (authHeader == null || authHeader.isEmpty()) { - throw new HttpAuthenticationException("Authorization header received " + - "from the client is empty."); - } - - String authHeaderBase64String; - int beginIndex; - if (isKerberosAuthMode(authType)) { - beginIndex = (HttpAuthUtils.NEGOTIATE + " ").length(); - } - else { - beginIndex = (HttpAuthUtils.BASIC + " ").length(); - } - authHeaderBase64String = authHeader.substring(beginIndex); - // Authorization header must have a payload - if (authHeaderBase64String == null || authHeaderBase64String.isEmpty()) { - throw new HttpAuthenticationException("Authorization header received " + - "from the client does not contain any data."); - } - return authHeaderBase64String; - } - - private boolean isKerberosAuthMode(String authType) { - return authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString()); - } - - private static String getDoAsQueryParam(String queryString) { - if (LOG.isDebugEnabled()) { - LOG.debug("URL query string:" + queryString); - } - if (queryString == null) { - return null; - } - Map params = javax.servlet.http.HttpUtils.parseQueryString( queryString ); - Set keySet = params.keySet(); - for (String key: keySet) { - if (key.equalsIgnoreCase("doAs")) { - return params.get(key)[0]; - } - } - return null; - } - -} - - diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java deleted file mode 100644 index 95233996cbbcb..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java +++ /dev/null @@ -1,277 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.service.server; - -import java.util.Properties; - -import scala.runtime.AbstractFunction0; -import scala.runtime.BoxedUnit; - -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hive.common.util.HiveStringUtils; -import org.apache.hive.service.CompositeService; -import org.apache.hive.service.cli.CLIService; -import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService; -import org.apache.hive.service.cli.thrift.ThriftCLIService; -import org.apache.hive.service.cli.thrift.ThriftHttpCLIService; - -import org.apache.spark.util.ShutdownHookManager; - -/** - * HiveServer2. - * - */ -public class HiveServer2 extends CompositeService { - private static final Log LOG = LogFactory.getLog(HiveServer2.class); - - private CLIService cliService; - private ThriftCLIService thriftCLIService; - - public HiveServer2() { - super(HiveServer2.class.getSimpleName()); - HiveConf.setLoadHiveServer2Config(true); - } - - @Override - public synchronized void init(HiveConf hiveConf) { - cliService = new CLIService(this); - addService(cliService); - if (isHTTPTransportMode(hiveConf)) { - thriftCLIService = new ThriftHttpCLIService(cliService); - } else { - thriftCLIService = new ThriftBinaryCLIService(cliService); - } - addService(thriftCLIService); - super.init(hiveConf); - - // Add a shutdown hook for catching SIGTERM & SIGINT - // this must be higher than the Hadoop Filesystem priority of 10, - // which the default priority is. - // The signature of the callback must match that of a scala () -> Unit - // function - ShutdownHookManager.addShutdownHook( - new AbstractFunction0() { - public BoxedUnit apply() { - try { - LOG.info("Hive Server Shutdown hook invoked"); - stop(); - } catch (Throwable e) { - LOG.warn("Ignoring Exception while stopping Hive Server from shutdown hook", - e); - } - return BoxedUnit.UNIT; - } - }); - } - - public static boolean isHTTPTransportMode(HiveConf hiveConf) { - String transportMode = System.getenv("HIVE_SERVER2_TRANSPORT_MODE"); - if (transportMode == null) { - transportMode = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); - } - if (transportMode != null && (transportMode.equalsIgnoreCase("http"))) { - return true; - } - return false; - } - - @Override - public synchronized void start() { - super.start(); - } - - @Override - public synchronized void stop() { - LOG.info("Shutting down HiveServer2"); - super.stop(); - } - - private static void startHiveServer2() throws Throwable { - long attempts = 0, maxAttempts = 1; - while (true) { - LOG.info("Starting HiveServer2"); - HiveConf hiveConf = new HiveConf(); - maxAttempts = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_SERVER2_MAX_START_ATTEMPTS); - HiveServer2 server = null; - try { - server = new HiveServer2(); - server.init(hiveConf); - server.start(); - ShimLoader.getHadoopShims().startPauseMonitor(hiveConf); - break; - } catch (Throwable throwable) { - if (server != null) { - try { - server.stop(); - } catch (Throwable t) { - LOG.info("Exception caught when calling stop of HiveServer2 before retrying start", t); - } finally { - server = null; - } - } - if (++attempts >= maxAttempts) { - throw new Error("Max start attempts " + maxAttempts + " exhausted", throwable); - } else { - LOG.warn("Error starting HiveServer2 on attempt " + attempts - + ", will retry in 60 seconds", throwable); - try { - Thread.sleep(60L * 1000L); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - } - } - } - - public static void main(String[] args) { - HiveConf.setLoadHiveServer2Config(true); - ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2"); - ServerOptionsProcessorResponse oprocResponse = oproc.parse(args); - - HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG); - - // Call the executor which will execute the appropriate command based on the parsed options - oprocResponse.getServerOptionsExecutor().execute(); - } - - /** - * ServerOptionsProcessor. - * Process arguments given to HiveServer2 (-hiveconf property=value) - * Set properties in System properties - * Create an appropriate response object, - * which has executor to execute the appropriate command based on the parsed options. - */ - public static class ServerOptionsProcessor { - private final Options options = new Options(); - private org.apache.commons.cli.CommandLine commandLine; - private final String serverName; - private final StringBuilder debugMessage = new StringBuilder(); - - @SuppressWarnings("static-access") - public ServerOptionsProcessor(String serverName) { - this.serverName = serverName; - // -hiveconf x=y - options.addOption(OptionBuilder - .withValueSeparator() - .hasArgs(2) - .withArgName("property=value") - .withLongOpt("hiveconf") - .withDescription("Use value for given property") - .create()); - options.addOption(new Option("H", "help", false, "Print help information")); - } - - public ServerOptionsProcessorResponse parse(String[] argv) { - try { - commandLine = new GnuParser().parse(options, argv); - // Process --hiveconf - // Get hiveconf param values and set the System property values - Properties confProps = commandLine.getOptionProperties("hiveconf"); - for (String propKey : confProps.stringPropertyNames()) { - // save logging message for log4j output latter after log4j initialize properly - debugMessage.append("Setting " + propKey + "=" + confProps.getProperty(propKey) + ";\n"); - System.setProperty(propKey, confProps.getProperty(propKey)); - } - - // Process --help - if (commandLine.hasOption('H')) { - return new ServerOptionsProcessorResponse(new HelpOptionExecutor(serverName, options)); - } - } catch (ParseException e) { - // Error out & exit - we were not able to parse the args successfully - System.err.println("Error starting HiveServer2 with given arguments: "); - System.err.println(e.getMessage()); - System.exit(-1); - } - // Default executor, when no option is specified - return new ServerOptionsProcessorResponse(new StartOptionExecutor()); - } - - StringBuilder getDebugMessage() { - return debugMessage; - } - } - - /** - * The response sent back from {@link ServerOptionsProcessor#parse(String[])} - */ - static class ServerOptionsProcessorResponse { - private final ServerOptionsExecutor serverOptionsExecutor; - - ServerOptionsProcessorResponse(ServerOptionsExecutor serverOptionsExecutor) { - this.serverOptionsExecutor = serverOptionsExecutor; - } - - ServerOptionsExecutor getServerOptionsExecutor() { - return serverOptionsExecutor; - } - } - - /** - * The executor interface for running the appropriate HiveServer2 command based on parsed options - */ - interface ServerOptionsExecutor { - void execute(); - } - - /** - * HelpOptionExecutor: executes the --help option by printing out the usage - */ - static class HelpOptionExecutor implements ServerOptionsExecutor { - private final Options options; - private final String serverName; - - HelpOptionExecutor(String serverName, Options options) { - this.options = options; - this.serverName = serverName; - } - - @Override - public void execute() { - new HelpFormatter().printHelp(serverName, options); - System.exit(0); - } - } - - /** - * StartOptionExecutor: starts HiveServer2. - * This is the default executor, when no option is specified. - */ - static class StartOptionExecutor implements ServerOptionsExecutor { - @Override - public void execute() { - try { - startHiveServer2(); - } catch (Throwable t) { - LOG.fatal("Error starting HiveServer2", t); - System.exit(-1); - } - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java deleted file mode 100644 index 8ee98103f7ef7..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.hive.service.server; - -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.metastore.HiveMetaStore; -import org.apache.hadoop.hive.metastore.RawStore; - -/** - * A HiveServer2 thread used to construct new server threads. - * In particular, this thread ensures an orderly cleanup, - * when killed by its corresponding ExecutorService. - */ -public class ThreadWithGarbageCleanup extends Thread { - private static final Log LOG = LogFactory.getLog(ThreadWithGarbageCleanup.class); - - Map threadRawStoreMap = - ThreadFactoryWithGarbageCleanup.getThreadRawStoreMap(); - - public ThreadWithGarbageCleanup(Runnable runnable) { - super(runnable); - } - - /** - * Add any Thread specific garbage cleanup code here. - * Currently, it shuts down the RawStore object for this thread if it is not null. - */ - @Override - public void finalize() throws Throwable { - cleanRawStore(); - super.finalize(); - } - - private void cleanRawStore() { - Long threadId = this.getId(); - RawStore threadLocalRawStore = threadRawStoreMap.get(threadId); - if (threadLocalRawStore != null) { - LOG.debug("RawStore: " + threadLocalRawStore + ", for the thread: " + - this.getName() + " will be closed now."); - threadLocalRawStore.shutdown(); - threadRawStoreMap.remove(threadId); - } - } - - /** - * Cache the ThreadLocal RawStore object. Called from the corresponding thread. - */ - public void cacheThreadLocalRawStore() { - Long threadId = this.getId(); - RawStore threadLocalRawStore = HiveMetaStore.HMSHandler.getRawStore(); - if (threadLocalRawStore != null && !threadRawStoreMap.containsKey(threadId)) { - LOG.debug("Adding RawStore: " + threadLocalRawStore + ", for the thread: " + - this.getName() + " to threadRawStoreMap for future cleanup."); - threadRawStoreMap.put(threadId, threadLocalRawStore); - } - } -} diff --git a/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala deleted file mode 100644 index 9a28dd6a31e6e..0000000000000 --- a/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.thriftserver - -import org.apache.commons.logging.LogFactory -import org.apache.hadoop.hive.ql.session.SessionState -import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type} -import org.apache.hive.service.cli.Type._ -import org.apache.hive.service.cli.thrift.TProtocolVersion._ - -/** - * Various utilities for hive-thriftserver used to upgrade the built-in Hive. - */ -private[thriftserver] object ThriftserverShimUtils { - - private[thriftserver] object TOperationType { - val GET_TYPE_INFO = org.apache.hive.service.cli.thrift.TOperationType.GET_TYPE_INFO - } - - private[thriftserver] type TProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion - private[thriftserver] type Client = org.apache.hive.service.cli.thrift.TCLIService.Client - private[thriftserver] type TOpenSessionReq = org.apache.hive.service.cli.thrift.TOpenSessionReq - private[thriftserver] type TGetSchemasReq = org.apache.hive.service.cli.thrift.TGetSchemasReq - private[thriftserver] type TGetTablesReq = org.apache.hive.service.cli.thrift.TGetTablesReq - private[thriftserver] type TGetColumnsReq = org.apache.hive.service.cli.thrift.TGetColumnsReq - private[thriftserver] type TGetInfoReq = org.apache.hive.service.cli.thrift.TGetInfoReq - private[thriftserver] type TExecuteStatementReq = - org.apache.hive.service.cli.thrift.TExecuteStatementReq - private[thriftserver] type THandleIdentifier = - org.apache.hive.service.cli.thrift.THandleIdentifier - private[thriftserver] type TOperationType = org.apache.hive.service.cli.thrift.TOperationType - private[thriftserver] type TOperationHandle = org.apache.hive.service.cli.thrift.TOperationHandle - - private[thriftserver] def getConsole: SessionState.LogHelper = { - val LOG = LogFactory.getLog(classOf[SparkSQLCLIDriver]) - new SessionState.LogHelper(LOG) - } - - private[thriftserver] def resultRowSet( - getResultSetSchema: TableSchema, - getProtocolVersion: TProtocolVersion): RowSet = { - RowSetFactory.create(getResultSetSchema, getProtocolVersion) - } - - private[thriftserver] def supportedType(): Seq[Type] = { - Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE, - TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, - FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE, - DATE_TYPE, TIMESTAMP_TYPE, - ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) - } - - private[thriftserver] val testedProtocolVersions = Seq( - HIVE_CLI_SERVICE_PROTOCOL_V1, - HIVE_CLI_SERVICE_PROTOCOL_V2, - HIVE_CLI_SERVICE_PROTOCOL_V3, - HIVE_CLI_SERVICE_PROTOCOL_V4, - HIVE_CLI_SERVICE_PROTOCOL_V5, - HIVE_CLI_SERVICE_PROTOCOL_V6, - HIVE_CLI_SERVICE_PROTOCOL_V7, - HIVE_CLI_SERVICE_PROTOCOL_V8) -} diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt deleted file mode 100644 index 85884a1aaf739..0000000000000 --- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt +++ /dev/null @@ -1,11 +0,0 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.15.4 -Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz -insert hive table benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -INSERT INTO DYNAMIC 6812 7043 328 0.0 665204.8 1.0X -INSERT INTO HYBRID 817 852 32 0.0 79783.6 8.3X -INSERT INTO STATIC 231 246 21 0.0 22568.2 29.5X -INSERT OVERWRITE DYNAMIC 25947 26671 1024 0.0 2533910.2 0.3X -INSERT OVERWRITE HYBRID 2846 2884 54 0.0 277908.7 2.4X -INSERT OVERWRITE STATIC 232 247 26 0.0 22659.9 29.4X - diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 805bcb2bc3a60..1611a3da8a3da 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -296,8 +296,7 @@ private[hive] class HiveClientImpl( case e: NoClassDefFoundError if HiveUtils.isHive23 && e.getMessage.contains("org/apache/hadoop/hive/serde2/SerDe") => throw new ClassNotFoundException("The SerDe interface removed since Hive 2.3(HIVE-15167)." + - " Please migrate your custom SerDes to Hive 2.3 or build your own Spark with" + - " hive-1.2 profile. See HIVE-15167 for more details.", e) + " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details.", e) } finally { state.getConf.setClassLoader(originalConfLoader) Thread.currentThread().setContextClassLoader(original) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala index 81eb5e2591f13..da34c54cb36a2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala @@ -28,14 +28,11 @@ import org.apache.spark.sql.hive.test.TestHive * {{{ * 1. without sbt: bin/spark-submit --class * --jars ,, - * --packages org.spark-project.hive:hive-exec:1.2.1.spark2 * - * 2. build/sbt "hive/test:runMain " -Phive-1.2 or - * build/sbt "hive/test:runMain " -Phive-2.3 + * 2. build/sbt "hive/test:runMain " * 3. generate result: * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain " * Results will be written to "benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt". - * 4. -Phive-1.2 does not work for JDK 11 * }}} */ object InsertIntoHiveTableBenchmark extends SqlBasedBenchmark { @@ -136,5 +133,5 @@ object InsertIntoHiveTableBenchmark extends SqlBasedBenchmark { } } - override def suffix: String = if (HiveUtils.isHive23) "-hive2.3" else "-hive1.2" + override def suffix: String = "-hive2.3" } From a0aa8f33a9420feb9228b51a3dfad2e7e86d65a5 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 6 Oct 2020 09:09:58 +0900 Subject: [PATCH 0168/1009] [SPARK-33069][INFRA] Skip test result report if no JUnit XML files are found ### What changes were proposed in this pull request? This PR proposes to skip test reporting ("Report test results") if there are no JUnit XML files are found. Currently, we're running and skipping the tests dynamically. For example, - if there are only changes in SparkR at the underlying commit, it only runs the SparkR tests, and skip the other tests and generate JUnit XML files for SparkR test cases. - if there are only changes in `docs` at the underlying commit, the build skips all tests except linters and do not generate any JUnit XML files. When test reporting ("Report test results") job is triggered after the main build ("Build and test ") is finished, and there are no JUnit XML files found, it reports the case as a failure. See https://github.com/apache/spark/runs/1196184007 as an example. This PR works around it by simply skipping the testing report when there are no JUnit XML files are found. Please see https://github.com/apache/spark/pull/29906#issuecomment-702525542 for more details. ### Why are the changes needed? To avoid false alarm for test results. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested in my fork. Positive case: https://github.com/HyukjinKwon/spark/runs/1208624679?check_suite_focus=true https://github.com/HyukjinKwon/spark/actions/runs/288996327 Negative case: https://github.com/HyukjinKwon/spark/runs/1208229838?check_suite_focus=true https://github.com/HyukjinKwon/spark/actions/runs/289000058 Closes #29946 from HyukjinKwon/test-junit-files. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .github/workflows/test_report.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml index 93cdb86687261..060a8795b6a77 100644 --- a/.github/workflows/test_report.yml +++ b/.github/workflows/test_report.yml @@ -15,7 +15,16 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} workflow: ${{ github.event.workflow_run.workflow_id }} commit: ${{ github.event.workflow_run.head_commit.id }} + - name: Check if JUnit report XML files exist + run: | + if ls **/target/test-reports/*.xml > /dev/null 2>&1; then + echo '::set-output name=FILE_EXISTS::true' + else + echo '::set-output name=FILE_EXISTS::false' + fi + id: check-junit-file - name: Publish test report + if: steps.check-junit-file.outputs.FILE_EXISTS == 'true' uses: scacap/action-surefire-report@v1 with: check_name: Report test results From 9870cf9c086172a390c80f5ef23aacfe2ce3f2cf Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 6 Oct 2020 13:01:57 +0900 Subject: [PATCH 0169/1009] [SPARK-33067][SQL][TESTS] Add negative checks to JDBC v2 Table Catalog tests ### What changes were proposed in this pull request? Add checks for the cases when JDBC v2 Table Catalog commands fail. ### Why are the changes needed? To improve test coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `JDBCTableCatalogSuite`. Closes #29945 from MaxGekk/jdbcv2-negative-tests. Lead-authored-by: Max Gekk Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- .../v2/jdbc/JDBCTableCatalogSuite.scala | 114 +++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index b308934ba03c0..bf71f90779b71 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -20,7 +20,8 @@ import java.sql.{Connection, DriverManager} import java.util.Properties import org.apache.spark.SparkConf -import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -63,6 +64,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("show tables") { checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) + // Check not existing namespace + checkAnswer(sql("SHOW TABLES IN h2.bad_test"), Seq()) } test("drop a table and test whether the table exists") { @@ -72,6 +75,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "to_drop"), Row("test", "people"))) sql("DROP TABLE h2.test.to_drop") checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[NoSuchTableException] { + sql(s"DROP TABLE $table") + } + } } test("rename a table") { @@ -87,6 +95,26 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("SHOW TABLES IN h2.test"), Seq(Row("test", "dst_table"), Row("test", "people"))) } + // Rename not existing table or namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[org.h2.jdbc.JdbcSQLException] { + sql(s"ALTER TABLE $table RENAME TO test.dst_table") + } + } + // Rename to an existing table + withTable("h2.test.dst_table") { + withConnection { conn => + conn.prepareStatement("""CREATE TABLE "test"."dst_table" (id INTEGER)""").executeUpdate() + } + withTable("h2.test.src_table") { + withConnection { conn => + conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate() + } + intercept[org.h2.jdbc.JdbcSQLException] { + sql("ALTER TABLE h2.test.src_table RENAME TO h2.test.dst_table") + } + } + } } test("load a table") { @@ -95,6 +123,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { .add("NAME", StringType) .add("ID", IntegerType) assert(t.schema === expectedSchema) + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + spark.table(s"h2.$table").schema + } + } } test("create a table") { @@ -105,6 +138,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"), Row("test", "new_table"))) } + withTable("h2.test.new_table") { + sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") + intercept[AnalysisException] { + sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") + } + } + intercept[org.h2.jdbc.JdbcSQLException] { + sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _") + } } test("alter table ... add column") { @@ -121,16 +163,38 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { t = spark.table("h2.test.alt_table") expectedSchema = expectedSchema.add("C3", DoubleType) assert(t.schema === expectedSchema) + // Add already existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)") + } + } + // Add a column to not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table ADD COLUMNS (C4 STRING)") + } } } test("alter table ... rename column") { withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") + sql("CREATE TABLE h2.test.alt_table (ID INTEGER, C0 INTEGER) USING _") sql("ALTER TABLE h2.test.alt_table RENAME COLUMN ID TO C") val t = spark.table("h2.test.alt_table") - val expectedSchema = new StructType().add("C", IntegerType) + val expectedSchema = new StructType() + .add("C", IntegerType) + .add("C0", IntegerType) assert(t.schema === expectedSchema) + // Rename to already existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0") + } + } + // Rename a column in not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table RENAME COLUMN ID TO C") + } } } @@ -141,6 +205,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val t = spark.table("h2.test.alt_table") val expectedSchema = new StructType().add("C2", IntegerType) assert(t.schema === expectedSchema) + // Drop not existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column") + } + } + // Drop a column to not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table DROP COLUMN C1") + } } } @@ -151,6 +225,20 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val t = spark.table("h2.test.alt_table") val expectedSchema = new StructType().add("ID", DoubleType) assert(t.schema === expectedSchema) + // Update not existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE") + } + // Update column to wrong type + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type") + } + } + // Update column type in not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table ALTER COLUMN id TYPE DOUBLE") + } } } @@ -161,6 +249,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val t = spark.table("h2.test.alt_table") val expectedSchema = new StructType().add("ID", IntegerType, nullable = true) assert(t.schema === expectedSchema) + // Update nullability of not existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL") + } + } + // Update column nullability in not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table ALTER COLUMN ID DROP NOT NULL") + } } } @@ -171,6 +269,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'") } assert(thrown.getMessage.contains("Unsupported TableChange")) + // Update comment for not existing column + intercept[AnalysisException] { + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'") + } + } + // Update column comments in not existing table and namespace + Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => + intercept[AnalysisException] { + sql(s"ALTER TABLE $table ALTER COLUMN ID COMMENT 'test'") + } } } } From 4adc2822a3c7b7552b436ffb61d5c134680e56b3 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Tue, 6 Oct 2020 08:32:55 +0000 Subject: [PATCH 0170/1009] [SPARK-33035][SQL] Updates the obsoleted entries of attribute mapping in QueryPlan#transformUpWithNewOutput ### What changes were proposed in this pull request? This PR intends to fix corner-case bugs in the `QueryPlan#transformUpWithNewOutput` that is used to propagate updated `ExprId`s in a bottom-up way. Let's say we have a rule to simply assign new `ExprId`s in a projection list like this; ``` case class TestRule extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithNewOutput { case p Project(projList, _) => val newPlan = p.copy(projectList = projList.map { _.transform { // Assigns a new `ExprId` for references case a: AttributeReference => Alias(a, a.name)() }}.asInstanceOf[Seq[NamedExpression]]) val attrMapping = p.output.zip(newPlan.output) newPlan -> attrMapping } } ``` Then, this rule is applied into a plan below; ``` (3) Project [a#5, b#6] +- (2) Project [a#5, b#6] +- (1) Project [a#5, b#6] +- LocalRelation , [a#5, b#6] ``` In the first transformation, the rule assigns new `ExprId`s in `(1) Project` (e.g., a#5 AS a#7, b#6 AS b#8). In the second transformation, the rule corrects the input references of `(2) Project` first by using attribute mapping given from `(1) Project` (a#5->a#7 and b#6->b#8) and then assigns new `ExprId`s (e.g., a#7 AS a#9, b#8 AS b#10). But, in the third transformation, the rule fails because it tries to correct the references of `(3) Project` by using incorrect attribute mapping (a#7->a#9 and b#8->b#10) even though the correct one is a#5->a#9 and b#6->b#10. To fix this issue, this PR modified the code to update the attribute mapping entries that are obsoleted by generated entries in a given rule. ### Why are the changes needed? bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests in `QueryPlanSuite`. Closes #29911 from maropu/QueryPlanBug. Authored-by: Takeshi Yamamuro Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/plans/QueryPlan.scala | 26 ++++++++++++++----- .../sql/catalyst/plans/QueryPlanSuite.scala | 26 ++++++++++++++++--- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index a89f055e2ac80..3e8467bab0348 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -201,11 +201,6 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT case (oldAttr, _) => plan.references.contains(oldAttr) } - val (planAfterRule, newAttrMapping) = CurrentOrigin.withOrigin(origin) { - rule.applyOrElse(newPlan, (plan: PlanType) => plan -> Nil) - } - newPlan = planAfterRule - if (attrMappingForCurrentPlan.nonEmpty) { assert(!attrMappingForCurrentPlan.groupBy(_._1.exprId) .exists(_._2.map(_._2.exprId).distinct.length > 1), @@ -222,10 +217,27 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT } } - attrMapping ++= newAttrMapping.filter { + val (planAfterRule, newAttrMapping) = CurrentOrigin.withOrigin(origin) { + rule.applyOrElse(newPlan, (plan: PlanType) => plan -> Nil) + } + + val newValidAttrMapping = newAttrMapping.filter { case (a1, a2) => a1.exprId != a2.exprId } - newPlan -> attrMapping.toSeq + + // Updates the `attrMapping` entries that are obsoleted by generated entries in `rule`. + // For example, `attrMapping` has a mapping entry 'id#1 -> id#2' and `rule` + // generates a new entry 'id#2 -> id#3'. In this case, we need to update + // the corresponding old entry from 'id#1 -> id#2' to '#id#1 -> #id#3'. + val updatedAttrMap = AttributeMap(newValidAttrMapping) + val transferAttrMapping = attrMapping.map { + case (a1, a2) => (a1, updatedAttrMap.getOrElse(a2, a2)) + } + val newOtherAttrMapping = { + val existingAttrMappingSet = transferAttrMapping.map(_._2).toSet + newValidAttrMapping.filterNot { case (_, a) => existingAttrMappingSet.contains(a) } + } + planAfterRule -> (transferAttrMapping ++ newOtherAttrMapping).toSeq } } rewrite(this)._1 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala index 91ce187f4d270..404c8895c4d11 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala @@ -20,9 +20,11 @@ package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation -import org.apache.spark.sql.catalyst.dsl.plans -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ListQuery, Literal, NamedExpression} -import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project, Union} +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, ListQuery, Literal, NamedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan, Project, Union} +import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.types.IntegerType @@ -31,7 +33,7 @@ class QueryPlanSuite extends SparkFunSuite { test("origin remains the same after mapExpressions (SPARK-23823)") { CurrentOrigin.setPosition(0, 0) val column = AttributeReference("column", IntegerType)(NamedExpression.newExprId) - val query = plans.DslLogicalPlan(plans.table("table")).select(column) + val query = DslLogicalPlan(table("table")).select(column) CurrentOrigin.reset() val mappedQuery = query mapExpressions { @@ -83,4 +85,20 @@ class QueryPlanSuite extends SparkFunSuite { assert(countRelationsInPlan == 2) assert(countRelationsInPlanAndSubqueries == 5) } + + test("SPARK-33035: consecutive attribute updates in parent plan nodes") { + val testRule = new Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithNewOutput { + case p @ Project(projList, _) => + // Assigns new `ExprId`s for output references + val newPlan = p.copy(projectList = projList.map { ne => Alias(ne, ne.name)() }) + val attrMapping = p.output.zip(newPlan.output) + newPlan -> attrMapping + } + } + + val t = LocalRelation('a.int, 'b.int) + val plan = t.select($"a", $"b").select($"a", $"b").select($"a", $"b").analyze + assert(testRule(plan).resolved) + } } From 279334797234f5f83abd6879874b389e110920c2 Mon Sep 17 00:00:00 2001 From: "fqaiser94@gmail.com" Date: Tue, 6 Oct 2020 08:53:30 +0000 Subject: [PATCH 0171/1009] [SPARK-32511][SQL] Add dropFields method to Column class ### What changes were proposed in this pull request? 1. Refactored `WithFields` Expression to make it more extensible (now `UpdateFields`). 2. Added a new `dropFields` method to the `Column` class. This method should allow users to drop a `StructField` in a `StructType` column (with similar semantics to the `drop` method on `Dataset`). ### Why are the changes needed? Often Spark users have to work with deeply nested data e.g. to fix a data quality issue with an existing `StructField`. To do this with the existing Spark APIs, users have to rebuild the entire struct column. For example, let's say you have the following deeply nested data structure which has a data quality issue (`5` is missing): ``` import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ val data = spark.createDataFrame(sc.parallelize( Seq(Row(Row(Row(1, 2, 3), Row(Row(4, null, 6), Row(7, 8, 9), Row(10, 11, 12)), Row(13, 14, 15))))), StructType(Seq( StructField("a", StructType(Seq( StructField("a", StructType(Seq( StructField("a", IntegerType), StructField("b", IntegerType), StructField("c", IntegerType)))), StructField("b", StructType(Seq( StructField("a", StructType(Seq( StructField("a", IntegerType), StructField("b", IntegerType), StructField("c", IntegerType)))), StructField("b", StructType(Seq( StructField("a", IntegerType), StructField("b", IntegerType), StructField("c", IntegerType)))), StructField("c", StructType(Seq( StructField("a", IntegerType), StructField("b", IntegerType), StructField("c", IntegerType)))) ))), StructField("c", StructType(Seq( StructField("a", IntegerType), StructField("b", IntegerType), StructField("c", IntegerType)))) )))))).cache data.show(false) +---------------------------------+ |a | +---------------------------------+ |[[1, 2, 3], [[4,, 6], [7, 8, 9]]]| +---------------------------------+ ``` Currently, to drop the missing value users would have to do something like this: ``` val result = data.withColumn("a", struct( $"a.a", struct( struct( $"a.b.a.a", $"a.b.a.c" ).as("a"), $"a.b.b", $"a.b.c" ).as("b"), $"a.c" )) result.show(false) +---------------------------------------------------------------+ |a | +---------------------------------------------------------------+ |[[1, 2, 3], [[4, 6], [7, 8, 9], [10, 11, 12]], [13, 14, 15]]| +---------------------------------------------------------------+ ``` As you can see above, with the existing methods users must call the `struct` function and list all fields, including fields they don't want to change. This is not ideal as: >this leads to complex, fragile code that cannot survive schema evolution. [SPARK-16483](https://issues.apache.org/jira/browse/SPARK-16483) In contrast, with the method added in this PR, a user could simply do something like this to get the same result: ``` val result = data.withColumn("a", 'a.dropFields("b.a.b")) result.show(false) +---------------------------------------------------------------+ |a | +---------------------------------------------------------------+ |[[1, 2, 3], [[4, 6], [7, 8, 9], [10, 11, 12]], [13, 14, 15]]| +---------------------------------------------------------------+ ``` This is the second of maybe 3 methods that could be added to the `Column` class to make it easier to manipulate nested data. Other methods under discussion in [SPARK-22231](https://issues.apache.org/jira/browse/SPARK-22231) include `withFieldRenamed`. However, this should be added in a separate PR. ### Does this PR introduce _any_ user-facing change? The documentation for `Column.withField` method has changed to include an additional note about how to write optimized queries when adding multiple nested Column directly. ### How was this patch tested? New unit tests were added. Jenkins must pass them. ### Related JIRAs: More discussion on this topic can be found here: - https://issues.apache.org/jira/browse/SPARK-22231 - https://issues.apache.org/jira/browse/SPARK-16483 Closes #29795 from fqaiser94/SPARK-32511-dropFields-second-try. Authored-by: fqaiser94@gmail.com Signed-off-by: Wenchen Fan --- .../expressions/complexTypeCreator.scala | 127 ++- .../sql/catalyst/optimizer/ComplexTypes.scala | 20 +- .../sql/catalyst/optimizer/Optimizer.scala | 6 +- .../{WithFields.scala => UpdateFields.scala} | 16 +- ...e.scala => CombineUpdateFieldsSuite.scala} | 41 +- .../optimizer/complexTypesSuite.scala | 345 ++++++- .../UpdateFieldsBenchmark-results.txt | 26 + .../scala/org/apache/spark/sql/Column.scala | 119 ++- .../spark/sql/ColumnExpressionSuite.scala | 881 ++++++++++++++++-- .../spark/sql/UpdateFieldsBenchmark.scala | 224 +++++ 10 files changed, 1607 insertions(+), 198 deletions(-) rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/{WithFields.scala => UpdateFields.scala} (68%) rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{CombineWithFieldsSuite.scala => CombineUpdateFieldsSuite.scala} (65%) create mode 100644 sql/core/benchmarks/UpdateFieldsBenchmark-results.txt create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index c1471455b58c0..d5b1950e82c56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.catalyst.expressions +import scala.collection.mutable.ArrayBuffer + import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} +import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FUNC_ALIAS, FunctionBuilder} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ @@ -548,57 +550,114 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E } /** - * Adds/replaces field in struct by name. + * Represents an operation to be applied to the fields of a struct. */ -case class WithFields( - structExpr: Expression, - names: Seq[String], - valExprs: Seq[Expression]) extends Unevaluable { +trait StructFieldsOperation { + + val resolver: Resolver = SQLConf.get.resolver - assert(names.length == valExprs.length) + /** + * Returns an updated list of StructFields and Expressions that will ultimately be used + * as the fields argument for [[StructType]] and as the children argument for + * [[CreateNamedStruct]] respectively inside of [[UpdateFields]]. + */ + def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] +} + +/** + * Add or replace a field by name. + * + * We extend [[Unevaluable]] here to ensure that [[UpdateFields]] can include it as part of its + * children, and thereby enable the analyzer to resolve and transform valExpr as necessary. + */ +case class WithField(name: String, valExpr: Expression) + extends Unevaluable with StructFieldsOperation { + + override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] = { + val newFieldExpr = (StructField(name, valExpr.dataType, valExpr.nullable), valExpr) + val result = ArrayBuffer.empty[(StructField, Expression)] + var hasMatch = false + for (existingFieldExpr @ (existingField, _) <- values) { + if (resolver(existingField.name, name)) { + hasMatch = true + result += newFieldExpr + } else { + result += existingFieldExpr + } + } + if (!hasMatch) result += newFieldExpr + result + } + + override def children: Seq[Expression] = valExpr :: Nil + + override def dataType: DataType = throw new IllegalStateException( + "WithField.dataType should not be called.") + + override def nullable: Boolean = throw new IllegalStateException( + "WithField.nullable should not be called.") + + override def prettyName: String = "WithField" +} + +/** + * Drop a field by name. + */ +case class DropField(name: String) extends StructFieldsOperation { + override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] = + values.filterNot { case (field, _) => resolver(field.name, name) } +} + +/** + * Updates fields in a struct. + */ +case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperation]) + extends Unevaluable { override def checkInputDataTypes(): TypeCheckResult = { - if (!structExpr.dataType.isInstanceOf[StructType]) { - TypeCheckResult.TypeCheckFailure( - "struct argument should be struct type, got: " + structExpr.dataType.catalogString) + val dataType = structExpr.dataType + if (!dataType.isInstanceOf[StructType]) { + TypeCheckResult.TypeCheckFailure("struct argument should be struct type, got: " + + dataType.catalogString) + } else if (newExprs.isEmpty) { + TypeCheckResult.TypeCheckFailure("cannot drop all fields in struct") } else { TypeCheckResult.TypeCheckSuccess } } - override def children: Seq[Expression] = structExpr +: valExprs + override def children: Seq[Expression] = structExpr +: fieldOps.collect { + case e: Expression => e + } - override def dataType: StructType = evalExpr.dataType.asInstanceOf[StructType] + override def dataType: StructType = StructType(newFields) override def nullable: Boolean = structExpr.nullable - override def prettyName: String = "with_fields" + override def prettyName: String = "update_fields" - lazy val evalExpr: Expression = { - val existingExprs = structExpr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map { - case (name, i) => (name, GetStructField(KnownNotNull(structExpr), i).asInstanceOf[Expression]) - } + private lazy val newFieldExprs: Seq[(StructField, Expression)] = { + val existingFieldExprs: Seq[(StructField, Expression)] = + structExpr.dataType.asInstanceOf[StructType].fields.zipWithIndex.map { + case (field, i) => (field, GetStructField(structExpr, i)) + } - val addOrReplaceExprs = names.zip(valExprs) - - val resolver = SQLConf.get.resolver - val newExprs = addOrReplaceExprs.foldLeft(existingExprs) { - case (resultExprs, newExpr @ (newExprName, _)) => - if (resultExprs.exists(x => resolver(x._1, newExprName))) { - resultExprs.map { - case (name, _) if resolver(name, newExprName) => newExpr - case x => x - } - } else { - resultExprs :+ newExpr - } - }.flatMap { case (name, expr) => Seq(Literal(name), expr) } + fieldOps.foldLeft(existingFieldExprs)((exprs, op) => op(exprs)) + } + + private lazy val newFields: Seq[StructField] = newFieldExprs.map(_._1) + + lazy val newExprs: Seq[Expression] = newFieldExprs.map(_._2) + + lazy val evalExpr: Expression = { + val createNamedStructExpr = CreateNamedStruct(newFieldExprs.flatMap { + case (field, expr) => Seq(Literal(field.name), expr) + }) - val expr = CreateNamedStruct(newExprs) if (structExpr.nullable) { - If(IsNull(structExpr), Literal(null, expr.dataType), expr) + If(IsNull(structExpr), Literal(null, dataType), createNamedStructExpr) } else { - expr + createNamedStructExpr } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala index 2aba4bae397c7..860219e55b052 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.StructType /** * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions. @@ -39,18 +40,13 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] { // Remove redundant field extraction. case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) => createNamedStruct.valExprs(ordinal) - case GetStructField(w @ WithFields(struct, names, valExprs), ordinal, maybeName) => - val name = w.dataType(ordinal).name - val matches = names.zip(valExprs).filter(_._1 == name) - if (matches.nonEmpty) { - // return last matching element as that is the final value for the field being extracted. - // For example, if a user submits a query like this: - // `$"struct_col".withField("b", lit(1)).withField("b", lit(2)).getField("b")` - // we want to return `lit(2)` (and not `lit(1)`). - val expr = matches.last._2 - If(IsNull(struct), Literal(null, expr.dataType), expr) - } else { - GetStructField(struct, ordinal, maybeName) + case GetStructField(u: UpdateFields, ordinal, _)if !u.structExpr.isInstanceOf[UpdateFields] => + val structExpr = u.structExpr + u.newExprs(ordinal) match { + // if the struct itself is null, then any value extracted from it (expr) will be null + // so we don't need to wrap expr in If(IsNull(struct), Literal(null, expr.dataType), expr) + case expr: GetStructField if expr.child.semanticEquals(structExpr) => expr + case expr => If(IsNull(structExpr), Literal(null, expr.dataType), expr) } // Remove redundant array indexing. case GetArrayStructFields(CreateArray(elems, useStringTypeWhenEmpty), field, ordinal, _, _) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f2360150e47b5..5bdaa504a3beb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -110,7 +110,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveRedundantAliases, UnwrapCastInBinaryComparison, RemoveNoopOperators, - CombineWithFields, + CombineUpdateFields, SimplifyExtractValueOps, OptimizeJsonExprs, CombineConcats) ++ @@ -223,7 +223,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveNoopOperators) :+ // This batch must be executed after the `RewriteSubquery` batch, which creates joins. Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers) :+ - Batch("ReplaceWithFieldsExpression", Once, ReplaceWithFieldsExpression) + Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression) // remove any batches with no rules. this may happen when subclasses do not add optional rules. batches.filter(_.rules.nonEmpty) @@ -257,7 +257,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RewriteCorrelatedScalarSubquery.ruleName :: RewritePredicateSubquery.ruleName :: NormalizeFloatingNumbers.ruleName :: - ReplaceWithFieldsExpression.ruleName :: Nil + ReplaceUpdateFieldsExpression.ruleName :: Nil /** * Optimize all the subqueries inside expression. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala similarity index 68% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala index 05c90864e4bb0..c7154210e0c62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala @@ -17,26 +17,26 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.WithFields +import org.apache.spark.sql.catalyst.expressions.UpdateFields import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule /** - * Combines all adjacent [[WithFields]] expression into a single [[WithFields]] expression. + * Combines all adjacent [[UpdateFields]] expression into a single [[UpdateFields]] expression. */ -object CombineWithFields extends Rule[LogicalPlan] { +object CombineUpdateFields extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case WithFields(WithFields(struct, names1, valExprs1), names2, valExprs2) => - WithFields(struct, names1 ++ names2, valExprs1 ++ valExprs2) + case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) => + UpdateFields(struct, fieldOps1 ++ fieldOps2) } } /** - * Replaces [[WithFields]] expression with an evaluable expression. + * Replaces [[UpdateFields]] expression with an evaluable expression. */ -object ReplaceWithFieldsExpression extends Rule[LogicalPlan] { +object ReplaceUpdateFieldsExpression extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case w: WithFields => w.evalExpr + case u: UpdateFields => u.evalExpr } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala similarity index 65% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala index a3e0bbc57e639..ff9c60a2fa5bd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala @@ -19,56 +19,53 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, WithFields} +import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, UpdateFields, WithField} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -class CombineWithFieldsSuite extends PlanTest { +class CombineUpdateFieldsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { - val batches = Batch("CombineWithFields", FixedPoint(10), CombineWithFields) :: Nil + val batches = Batch("CombineUpdateFields", FixedPoint(10), CombineUpdateFields) :: Nil } private val testRelation = LocalRelation('a.struct('a1.int)) - test("combines two WithFields") { + test("combines two adjacent UpdateFields Expressions") { val originalQuery = testRelation .select(Alias( - WithFields( - WithFields( + UpdateFields( + UpdateFields( 'a, - Seq("b1"), - Seq(Literal(4))), - Seq("c1"), - Seq(Literal(5))), "out")()) + WithField("b1", Literal(4)) :: Nil), + WithField("c1", Literal(5)) :: Nil), "out")()) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation - .select(Alias(WithFields('a, Seq("b1", "c1"), Seq(Literal(4), Literal(5))), "out")()) + .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) :: + Nil), "out")()) .analyze comparePlans(optimized, correctAnswer) } - test("combines three WithFields") { + test("combines three adjacent UpdateFields Expressions") { val originalQuery = testRelation .select(Alias( - WithFields( - WithFields( - WithFields( + UpdateFields( + UpdateFields( + UpdateFields( 'a, - Seq("b1"), - Seq(Literal(4))), - Seq("c1"), - Seq(Literal(5))), - Seq("d1"), - Seq(Literal(6))), "out")()) + WithField("b1", Literal(4)) :: Nil), + WithField("c1", Literal(5)) :: Nil), + WithField("d1", Literal(6)) :: Nil), "out")()) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation - .select(Alias(WithFields('a, Seq("b1", "c1", "d1"), Seq(4, 5, 6).map(Literal(_))), "out")()) + .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) :: + WithField("d1", Literal(6)) :: Nil), "out")()) .analyze comparePlans(optimized, correctAnswer) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index 00aed6a10cd64..d9cefdaf3fe70 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Range} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ @@ -37,14 +37,15 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { object Optimizer extends RuleExecutor[LogicalPlan] { val batches = Batch("collapse projections", FixedPoint(10), - CollapseProject) :: + CollapseProject) :: Batch("Constant Folding", FixedPoint(10), - NullPropagation, - ConstantFolding, - BooleanSimplification, - SimplifyConditionals, - SimplifyBinaryComparison, - SimplifyExtractValueOps) :: Nil + NullPropagation, + ConstantFolding, + BooleanSimplification, + SimplifyConditionals, + SimplifyBinaryComparison, + CombineUpdateFields, + SimplifyExtractValueOps) :: Nil } private val idAtt = ('id).long.notNull @@ -453,58 +454,182 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null) } - private val structAttr = 'struct1.struct('a.int).withNullability(false) + private val structAttr = 'struct1.struct('a.int, 'b.int).withNullability(false) private val testStructRelation = LocalRelation(structAttr) - private val nullableStructAttr = 'struct1.struct('a.int) + private val nullableStructAttr = 'struct1.struct('a.int, 'b.int) private val testNullableStructRelation = LocalRelation(nullableStructAttr) - test("simplify GetStructField on WithFields that is not changing the attribute being extracted") { - def query(relation: LocalRelation): LogicalPlan = relation.select( - GetStructField(WithFields('struct1, Seq("b"), Seq(Literal(1))), 0, Some("a")) as "outerAttr") + test("simplify GetStructField on basic UpdateFields") { + def check(fieldOps: Seq[StructFieldsOperation], ordinal: Int, expected: Expression): Unit = { + def query(relation: LocalRelation): LogicalPlan = + relation.select(GetStructField(UpdateFields('struct1, fieldOps), ordinal).as("res")) + + checkRule( + query(testStructRelation), + testStructRelation.select(expected.as("res"))) + + checkRule( + query(testNullableStructRelation), + testNullableStructRelation.select((expected match { + case expr: GetStructField => expr + case expr => If(IsNull('struct1), Literal(null, expr.dataType), expr) + }).as("res"))) + } + + // scalastyle:off line.size.limit + + // add attribute, extract an attribute from the original struct + check(WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0)) + check(WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1)) + // add attribute, extract added attribute + check(WithField("c", Literal(3)) :: Nil, 2, Literal(3)) + + // replace attribute, extract an attribute from the original struct + check(WithField("a", Literal(1)) :: Nil, 1, GetStructField('struct1, 1)) + check(WithField("b", Literal(2)) :: Nil, 0, GetStructField('struct1, 0)) + // replace attribute, extract replaced attribute + check(WithField("a", Literal(1)) :: Nil, 0, Literal(1)) + check(WithField("b", Literal(2)) :: Nil, 1, Literal(2)) + + // add multiple attributes, extract an attribute from the original struct + check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 0, GetStructField('struct1, 0)) + check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 0, GetStructField('struct1, 0)) + check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 1, GetStructField('struct1, 1)) + check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 1, GetStructField('struct1, 1)) + // add multiple attributes, extract newly added attribute + check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 2, Literal(4)) + check(WithField("c", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 2, Literal(3)) + check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 2, Literal(3)) + check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 3, Literal(4)) + check(WithField("d", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 2, Literal(4)) + check(WithField("d", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 3, Literal(3)) + + // drop attribute, extract an attribute from the original struct + check(DropField("b") :: Nil, 0, GetStructField('struct1, 0)) + check(DropField("a") :: Nil, 0, GetStructField('struct1, 1)) + + // drop attribute, add attribute, extract an attribute from the original struct + check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0)) + check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 1)) + // drop attribute, add attribute, extract added attribute + check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3)) + check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3)) + + // add attribute, drop attribute, extract an attribute from the original struct + check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1)) + check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0)) + // add attribute, drop attribute, extract added attribute + check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 1, Literal(3)) + check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 1, Literal(3)) + + // replace attribute, drop same attribute, extract an attribute from the original struct + check(WithField("b", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0)) + check(WithField("a", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1)) + + // add attribute, drop same attribute, extract an attribute from the original struct + check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 0, GetStructField('struct1, 0)) + check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 1, GetStructField('struct1, 1)) + + // replace attribute, drop another attribute, extract added attribute + check(WithField("b", Literal(3)) :: DropField("a") :: Nil, 0, Literal(3)) + check(WithField("a", Literal(3)) :: DropField("b") :: Nil, 0, Literal(3)) + + // drop attribute, add same attribute, extract attribute from the original struct + check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 0, GetStructField('struct1, 0)) + check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 0, GetStructField('struct1, 1)) + // drop attribute, add same attribute, extract added attribute + check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 1, Literal(3)) + check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 1, Literal(3)) + + // drop non-existent attribute, add same attribute, extract attribute from the original struct + check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0)) + check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1)) + // drop non-existent attribute, add same attribute, extract added attribute + check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 2, Literal(3)) + + // scalastyle:on line.size.limit + } + + test("simplify GetStructField that is extracting a field nested inside a struct") { + val struct2 = 'struct2.struct('b.int) + val testStructRelation = LocalRelation(structAttr, struct2) + val testNullableStructRelation = LocalRelation(nullableStructAttr, struct2) + + // if the field being extracted is from the same struct that UpdateFields is modifying, + // we can just return GetStructField in both the non-nullable and nullable struct scenario + + def addFieldFromSameStructAndThenExtractIt(relation: LocalRelation): LogicalPlan = + relation.select(GetStructField( + UpdateFields('struct1, WithField("b", GetStructField('struct1, 0)) :: Nil), 1).as("res")) checkRule( - query(testStructRelation), - testStructRelation.select(GetStructField('struct1, 0, Some("a")) as "outerAttr")) + addFieldFromSameStructAndThenExtractIt(testStructRelation), + testStructRelation.select(GetStructField('struct1, 0).as("res"))) checkRule( - query(testNullableStructRelation), - testNullableStructRelation.select(GetStructField('struct1, 0, Some("a")) as "outerAttr")) - } + addFieldFromSameStructAndThenExtractIt(testNullableStructRelation), + testNullableStructRelation.select(GetStructField('struct1, 0).as("res"))) - test("simplify GetStructField on WithFields that is changing the attribute being extracted") { - def query(relation: LocalRelation): LogicalPlan = relation.select( - GetStructField(WithFields('struct1, Seq("b"), Seq(Literal(1))), 1, Some("b")) as "res") + // if the field being extracted is from a different struct than the one UpdateFields is + // modifying, we must return GetStructField wrapped in If(IsNull(struct), null, GetStructField) + // in the nullable struct scenario + + def addFieldFromAnotherStructAndThenExtractIt(relation: LocalRelation): LogicalPlan = + relation.select(GetStructField( + UpdateFields('struct1, WithField("b", GetStructField('struct2, 0)) :: Nil), 1).as("res")) checkRule( - query(testStructRelation), - testStructRelation.select(Literal(1) as "res")) + addFieldFromAnotherStructAndThenExtractIt(testStructRelation), + testStructRelation.select(GetStructField('struct2, 0).as("res"))) checkRule( - query(testNullableStructRelation), + addFieldFromAnotherStructAndThenExtractIt(testNullableStructRelation), testNullableStructRelation.select( - If(IsNull('struct1), Literal(null, IntegerType), Literal(1)) as "res")) + If(IsNull('struct1), Literal(null, IntegerType), GetStructField('struct2, 0)).as("res"))) } - test( - "simplify GetStructField on WithFields that is changing the attribute being extracted twice") { - def query(relation: LocalRelation): LogicalPlan = relation.select( - GetStructField(WithFields('struct1, Seq("b", "b"), Seq(Literal(1), Literal(2))), 1, Some("b")) - as "outerAtt") + test("simplify GetStructField on nested UpdateFields") { + def query(relation: LocalRelation, ordinal: Int): LogicalPlan = { + val nestedUpdateFields = + UpdateFields( + UpdateFields( + UpdateFields( + UpdateFields( + 'struct1, + WithField("c", Literal(1)) :: Nil), + WithField("d", Literal(2)) :: Nil), + WithField("e", Literal(3)) :: Nil), + WithField("f", Literal(4)) :: Nil) + + relation.select(GetStructField(nestedUpdateFields, ordinal) as "res") + } + + // extract newly added field checkRule( - query(testStructRelation), - testStructRelation.select(Literal(2) as "outerAtt")) + query(testStructRelation, 5), + testStructRelation.select(Literal(4) as "res")) checkRule( - query(testNullableStructRelation), + query(testNullableStructRelation, 5), testNullableStructRelation.select( - If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "outerAtt")) + If(IsNull('struct1), Literal(null, IntegerType), Literal(4)) as "res")) + + // extract field from original struct + + checkRule( + query(testStructRelation, 0), + testStructRelation.select(GetStructField('struct1, 0) as "res")) + + checkRule( + query(testNullableStructRelation, 0), + testNullableStructRelation.select(GetStructField('struct1, 0) as "res")) } - test("collapse multiple GetStructField on the same WithFields") { + test("simplify multiple GetStructField on the same UpdateFields") { def query(relation: LocalRelation): LogicalPlan = relation - .select(WithFields('struct1, Seq("b"), Seq(Literal(2))) as "struct2") + .select(UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2") .select( GetStructField('struct2, 0, Some("a")) as "struct1A", GetStructField('struct2, 1, Some("b")) as "struct1B") @@ -512,21 +637,21 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { checkRule( query(testStructRelation), testStructRelation.select( - GetStructField('struct1, 0, Some("a")) as "struct1A", + GetStructField('struct1, 0) as "struct1A", Literal(2) as "struct1B")) checkRule( query(testNullableStructRelation), testNullableStructRelation.select( - GetStructField('struct1, 0, Some("a")) as "struct1A", + GetStructField('struct1, 0) as "struct1A", If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct1B")) } - test("collapse multiple GetStructField on different WithFields") { + test("simplify multiple GetStructField on different UpdateFields") { def query(relation: LocalRelation): LogicalPlan = relation .select( - WithFields('struct1, Seq("b"), Seq(Literal(2))) as "struct2", - WithFields('struct1, Seq("b"), Seq(Literal(3))) as "struct3") + UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2", + UpdateFields('struct1, WithField("b", Literal(3)) :: Nil) as "struct3") .select( GetStructField('struct2, 0, Some("a")) as "struct2A", GetStructField('struct2, 1, Some("b")) as "struct2B", @@ -537,18 +662,148 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { query(testStructRelation), testStructRelation .select( - GetStructField('struct1, 0, Some("a")) as "struct2A", + GetStructField('struct1, 0) as "struct2A", Literal(2) as "struct2B", - GetStructField('struct1, 0, Some("a")) as "struct3A", + GetStructField('struct1, 0) as "struct3A", Literal(3) as "struct3B")) checkRule( query(testNullableStructRelation), testNullableStructRelation .select( - GetStructField('struct1, 0, Some("a")) as "struct2A", + GetStructField('struct1, 0) as "struct2A", If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct2B", - GetStructField('struct1, 0, Some("a")) as "struct3A", + GetStructField('struct1, 0) as "struct3A", If(IsNull('struct1), Literal(null, IntegerType), Literal(3)) as "struct3B")) } + + test("simplify add multiple nested fields to non-nullable struct") { + // this scenario is possible if users add multiple nested columns to a non-nullable struct + // using the Column.withField API in a non-performant way + val structLevel2 = LocalRelation( + 'a1.struct( + 'a2.struct('a3.int.notNull)).notNull) + + val query = { + val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2", + UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2))))))) + + structLevel2.select( + UpdateFields( + addB3toA1A2, + Seq(WithField("a2", UpdateFields( + GetStructField(addB3toA1A2, 0), Seq(WithField("c3", Literal(3))))))).as("a1")) + } + + val expected = structLevel2.select( + UpdateFields('a1, Seq( + // scalastyle:off line.size.limit + WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: Nil)), + WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil)) + // scalastyle:on line.size.limit + )).as("a1")) + + checkRule(query, expected) + } + + test("simplify add multiple nested fields to nullable struct") { + // this scenario is possible if users add multiple nested columns to a nullable struct + // using the Column.withField API in a non-performant way + val structLevel2 = LocalRelation( + 'a1.struct( + 'a2.struct('a3.int.notNull))) + + val query = { + val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2", + UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2))))))) + + structLevel2.select( + UpdateFields( + addB3toA1A2, + Seq(WithField("a2", UpdateFields( + GetStructField(addB3toA1A2, 0), Seq(WithField("c3", Literal(3))))))).as("a1")) + } + + val expected = { + val repeatedExpr = UpdateFields(GetStructField('a1, 0), WithField("b3", Literal(2)) :: Nil) + val repeatedExprDataType = StructType(Seq( + StructField("a3", IntegerType, nullable = false), + StructField("b3", IntegerType, nullable = false))) + + structLevel2.select( + UpdateFields('a1, Seq( + WithField("a2", repeatedExpr), + WithField("a2", UpdateFields( + If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr), + WithField("c3", Literal(3)) :: Nil)) + )).as("a1")) + } + + checkRule(query, expected) + } + + test("simplify drop multiple nested fields in non-nullable struct") { + // this scenario is possible if users drop multiple nested columns in a non-nullable struct + // using the Column.dropFields API in a non-performant way + val structLevel2 = LocalRelation( + 'a1.struct( + 'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull).notNull + ).notNull) + + val query = { + val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields( + GetStructField('a1, 0), Seq(DropField("b3")))))) + + structLevel2.select( + UpdateFields( + dropA1A2B, + Seq(WithField("a2", UpdateFields( + GetStructField(dropA1A2B, 0), Seq(DropField("c3")))))).as("a1")) + } + + val expected = structLevel2.select( + UpdateFields('a1, Seq( + WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3")))), + WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3"), DropField("c3")))) + )).as("a1")) + + checkRule(query, expected) + } + + test("simplify drop multiple nested fields in nullable struct") { + // this scenario is possible if users drop multiple nested columns in a nullable struct + // using the Column.dropFields API in a non-performant way + val structLevel2 = LocalRelation( + 'a1.struct( + 'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull) + )) + + val query = { + val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields( + GetStructField('a1, 0), Seq(DropField("b3")))))) + + structLevel2.select( + UpdateFields( + dropA1A2B, + Seq(WithField("a2", UpdateFields( + GetStructField(dropA1A2B, 0), Seq(DropField("c3")))))).as("a1")) + } + + val expected = { + val repeatedExpr = UpdateFields(GetStructField('a1, 0), DropField("b3") :: Nil) + val repeatedExprDataType = StructType(Seq( + StructField("a3", IntegerType, nullable = false), + StructField("c3", IntegerType, nullable = false))) + + structLevel2.select( + UpdateFields('a1, Seq( + WithField("a2", repeatedExpr), + WithField("a2", UpdateFields( + If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr), + DropField("c3") :: Nil)) + )).as("a1")) + } + + checkRule(query, expected) + } } diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt new file mode 100644 index 0000000000000..5feca0e100bb1 --- /dev/null +++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt @@ -0,0 +1,26 @@ +================================================================================================ +Add 2 columns and drop 2 columns at 3 different depths of nesting +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_212-b03 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Add 2 columns and drop 2 columns at 3 different depths of nesting: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------------- +To non-nullable StructTypes using performant method 10 11 2 0.0 Infinity 1.0X +To nullable StructTypes using performant method 9 10 1 0.0 Infinity 1.0X +To non-nullable StructTypes using non-performant method 2457 2464 10 0.0 Infinity 0.0X +To nullable StructTypes using non-performant method 42641 43804 1644 0.0 Infinity 0.0X + + +================================================================================================ +Add 50 columns and drop 50 columns at 100 different depths of nesting +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_212-b03 on Mac OS X 10.14.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Add 50 columns and drop 50 columns at 100 different depths of nesting: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------------------------- +To non-nullable StructTypes using performant method 4595 4927 470 0.0 Infinity 1.0X +To nullable StructTypes using performant method 5185 5516 468 0.0 Infinity 0.9X + + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index da542c67d9c51..a46d6c0bb2282 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -901,6 +901,23 @@ class Column(val expr: Expression) extends Logging { * // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields * }}} * + * This method supports adding/replacing nested fields directly e.g. + * + * {{{ + * val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + * df.select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4))) + * // result: {"a":{"a":1,"b":2,"c":3,"d":4}} + * }}} + * + * However, if you are going to add/replace multiple nested fields, it is more optimal to extract + * out the nested struct before adding/replacing multiple fields e.g. + * + * {{{ + * val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + * df.select($"struct_col".withField("a", $"struct_col.a".withField("c", lit(3)).withField("d", lit(4)))) + * // result: {"a":{"a":1,"b":2,"c":3,"d":4}} + * }}} + * * @group expr_ops * @since 3.1.0 */ @@ -908,32 +925,102 @@ class Column(val expr: Expression) extends Logging { def withField(fieldName: String, col: Column): Column = withExpr { require(fieldName != null, "fieldName cannot be null") require(col != null, "col cannot be null") + updateFieldsHelper(expr, nameParts(fieldName), name => WithField(name, col.expr)) + } - val nameParts = if (fieldName.isEmpty) { + // scalastyle:off line.size.limit + /** + * An expression that drops fields in `StructType` by name. + * This is a no-op if schema doesn't contain field name(s). + * + * {{{ + * val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + * df.select($"struct_col".dropFields("b")) + * // result: {"a":1} + * + * val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + * df.select($"struct_col".dropFields("c")) + * // result: {"a":1,"b":2} + * + * val df = sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col") + * df.select($"struct_col".dropFields("b", "c")) + * // result: {"a":1} + * + * val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + * df.select($"struct_col".dropFields("a", "b")) + * // result: org.apache.spark.sql.AnalysisException: cannot resolve 'update_fields(update_fields(`struct_col`))' due to data type mismatch: cannot drop all fields in struct + * + * val df = sql("SELECT CAST(NULL AS struct) struct_col") + * df.select($"struct_col".dropFields("b")) + * // result: null of type struct + * + * val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col") + * df.select($"struct_col".dropFields("b")) + * // result: {"a":1} + * + * val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + * df.select($"struct_col".dropFields("a.b")) + * // result: {"a":{"a":1}} + * + * val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") + * df.select($"struct_col".dropFields("a.c")) + * // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields + * }}} + * + * This method supports dropping multiple nested fields directly e.g. + * + * {{{ + * val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + * df.select($"struct_col".dropFields("a.b", "a.c")) + * // result: {"a":{"a":1}} + * }}} + * + * However, if you are going to drop multiple nested fields, it is more optimal to extract + * out the nested struct before dropping multiple fields from it e.g. + * + * {{{ + * val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + * df.select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c"))) + * // result: {"a":{"a":1}} + * }}} + * + * @group expr_ops + * @since 3.1.0 + */ + // scalastyle:on line.size.limit + def dropFields(fieldNames: String*): Column = withExpr { + def dropField(structExpr: Expression, fieldName: String): UpdateFields = + updateFieldsHelper(structExpr, nameParts(fieldName), name => DropField(name)) + + fieldNames.tail.foldLeft(dropField(expr, fieldNames.head)) { + (resExpr, fieldName) => dropField(resExpr, fieldName) + } + } + + private def nameParts(fieldName: String): Seq[String] = { + require(fieldName != null, "fieldName cannot be null") + + if (fieldName.isEmpty) { fieldName :: Nil } else { CatalystSqlParser.parseMultipartIdentifier(fieldName) } - withFieldHelper(expr, nameParts, Nil, col.expr) } - private def withFieldHelper( - struct: Expression, + private def updateFieldsHelper( + structExpr: Expression, namePartsRemaining: Seq[String], - namePartsDone: Seq[String], - value: Expression) : WithFields = { - val name = namePartsRemaining.head + valueFunc: String => StructFieldsOperation): UpdateFields = { + + val fieldName = namePartsRemaining.head if (namePartsRemaining.length == 1) { - WithFields(struct, name :: Nil, value :: Nil) + UpdateFields(structExpr, valueFunc(fieldName) :: Nil) } else { - val newNamesRemaining = namePartsRemaining.tail - val newNamesDone = namePartsDone :+ name - val newValue = withFieldHelper( - struct = UnresolvedExtractValue(struct, Literal(name)), - namePartsRemaining = newNamesRemaining, - namePartsDone = newNamesDone, - value = value) - WithFields(struct, name :: Nil, newValue :: Nil) + val newValue = updateFieldsHelper( + structExpr = UnresolvedExtractValue(structExpr, Literal(fieldName)), + namePartsRemaining = namePartsRemaining.tail, + valueFunc = valueFunc) + UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 24419968c0472..b11f4c603dfd6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -24,6 +24,7 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.matchers.should.Matchers._ +import org.apache.spark.sql.UpdateFieldsBenchmark._ import org.apache.spark.sql.catalyst.expressions.{InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.functions._ @@ -922,11 +923,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { assert(inSet.sql === "('a' IN ('a', 'b'))") } - def checkAnswerAndSchema( + def checkAnswer( df: => DataFrame, expectedAnswer: Seq[Row], expectedSchema: StructType): Unit = { - checkAnswer(df, expectedAnswer) assert(df.schema == expectedSchema) } @@ -940,8 +940,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { sparkContext.parallelize(Row(Row(1, null, 3)) :: Nil), StructType(Seq(StructField("a", structType, nullable = false)))) - private lazy val nullStructLevel1: DataFrame = spark.createDataFrame( - sparkContext.parallelize(Row(null) :: Nil), + private lazy val nullableStructLevel1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(null) :: Row(Row(1, null, 3)) :: Nil), StructType(Seq(StructField("a", structType, nullable = true)))) private lazy val structLevel2: DataFrame = spark.createDataFrame( @@ -951,12 +951,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("a", structType, nullable = false))), nullable = false)))) - private lazy val nullStructLevel2: DataFrame = spark.createDataFrame( - sparkContext.parallelize(Row(Row(null)) :: Nil), + private lazy val nullableStructLevel2: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3))) :: Nil), StructType(Seq( StructField("a", StructType(Seq( StructField("a", structType, nullable = true))), - nullable = false)))) + nullable = true)))) private lazy val structLevel3: DataFrame = spark.createDataFrame( sparkContext.parallelize(Row(Row(Row(Row(1, null, 3)))) :: Nil), @@ -1018,7 +1018,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should add field with no name") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", $"a".withField("", lit(4))), Row(Row(1, null, 3, 4)) :: Nil, StructType(Seq( @@ -1031,7 +1031,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should add field to struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("d", lit(4))), Row(Row(1, null, 3, 4)) :: Nil, StructType(Seq( @@ -1043,10 +1043,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) } - test("withField should add field to null struct") { - checkAnswerAndSchema( - nullStructLevel1.withColumn("a", $"a".withField("d", lit(4))), - Row(null) :: Nil, + test("withField should add field to nullable struct") { + checkAnswer( + nullableStructLevel1.withColumn("a", $"a".withField("d", lit(4))), + Row(null) :: Row(Row(1, null, 3, 4)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( StructField("a", IntegerType, nullable = false), @@ -1056,10 +1056,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = true)))) } - test("withField should add field to nested null struct") { - checkAnswerAndSchema( - nullStructLevel2.withColumn("a", $"a".withField("a.d", lit(4))), - Row(Row(null)) :: Nil, + test("withField should add field to nested nullable struct") { + checkAnswer( + nullableStructLevel2.withColumn("a", $"a".withField("a.d", lit(4))), + Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3, 4))) :: Nil, StructType( Seq(StructField("a", StructType(Seq( StructField("a", StructType(Seq( @@ -1068,11 +1068,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("c", IntegerType, nullable = false), StructField("d", IntegerType, nullable = false))), nullable = true))), - nullable = false)))) + nullable = true)))) } test("withField should add null field to struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("d", lit(null).cast(IntegerType))), Row(Row(1, null, 3, null)) :: Nil, StructType(Seq( @@ -1085,7 +1085,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should add multiple fields to struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))), Row(Row(1, null, 3, 4, 5)) :: Nil, StructType(Seq( @@ -1098,12 +1098,26 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) } + test("withField should add multiple fields to nullable struct") { + checkAnswer( + nullableStructLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))), + Row(null) :: Row(Row(1, null, 3, 4, 5)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("c", IntegerType, nullable = false), + StructField("d", IntegerType, nullable = false), + StructField("e", IntegerType, nullable = false))), + nullable = true)))) + } + test("withField should add field to nested struct") { Seq( structLevel2.withColumn("a", 'a.withField("a.d", lit(4))), structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("d", lit(4)))) ).foreach { df => - checkAnswerAndSchema( + checkAnswer( df, Row(Row(Row(1, null, 3, 4))) :: Nil, StructType( @@ -1118,8 +1132,50 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } } + test("withField should add multiple fields to nested struct") { + Seq( + col("a").withField("a", $"a.a".withField("d", lit(4)).withField("e", lit(5))), + col("a").withField("a.d", lit(4)).withField("a.e", lit(5)) + ).foreach { column => + checkAnswer( + structLevel2.select(column.as("a")), + Row(Row(Row(1, null, 3, 4, 5))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("c", IntegerType, nullable = false), + StructField("d", IntegerType, nullable = false), + StructField("e", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + } + + test("withField should add multiple fields to nested nullable struct") { + Seq( + col("a").withField("a", $"a.a".withField("d", lit(4)).withField("e", lit(5))), + col("a").withField("a.d", lit(4)).withField("a.e", lit(5)) + ).foreach { column => + checkAnswer( + nullableStructLevel2.select(column.as("a")), + Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3, 4, 5))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("c", IntegerType, nullable = false), + StructField("d", IntegerType, nullable = false), + StructField("e", IntegerType, nullable = false))), + nullable = true))), + nullable = true)))) + } + } + test("withField should add field to deeply nested struct") { - checkAnswerAndSchema( + checkAnswer( structLevel3.withColumn("a", 'a.withField("a.a.d", lit(4))), Row(Row(Row(Row(1, null, 3, 4)))) :: Nil, StructType(Seq( @@ -1136,7 +1192,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should replace field in struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("b", lit(2))), Row(Row(1, 2, 3)) :: Nil, StructType(Seq( @@ -1147,10 +1203,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) } - test("withField should replace field in null struct") { - checkAnswerAndSchema( - nullStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))), - Row(null) :: Nil, + test("withField should replace field in nullable struct") { + checkAnswer( + nullableStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))), + Row(null) :: Row(Row(1, "foo", 3)) :: Nil, StructType(Seq( StructField("a", StructType(Seq( StructField("a", IntegerType, nullable = false), @@ -1159,10 +1215,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = true)))) } - test("withField should replace field in nested null struct") { - checkAnswerAndSchema( - nullStructLevel2.withColumn("a", $"a".withField("a.b", lit("foo"))), - Row(Row(null)) :: Nil, + test("withField should replace field in nested nullable struct") { + checkAnswer( + nullableStructLevel2.withColumn("a", $"a".withField("a.b", lit("foo"))), + Row(null) :: Row(Row(null)) :: Row(Row(Row(1, "foo", 3))) :: Nil, StructType( Seq(StructField("a", StructType(Seq( StructField("a", StructType(Seq( @@ -1170,11 +1226,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("b", StringType, nullable = false), StructField("c", IntegerType, nullable = false))), nullable = true))), - nullable = false)))) + nullable = true)))) } test("withField should replace field with null value in struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("c", lit(null).cast(IntegerType))), Row(Row(1, null, null)) :: Nil, StructType(Seq( @@ -1186,7 +1242,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should replace multiple fields in struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))), Row(Row(10, 20, 3)) :: Nil, StructType(Seq( @@ -1197,12 +1253,24 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false)))) } + test("withField should replace multiple fields in nullable struct") { + checkAnswer( + nullableStructLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))), + Row(null) :: Row(Row(10, 20, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = true)))) + } + test("withField should replace field in nested struct") { Seq( structLevel2.withColumn("a", $"a".withField("a.b", lit(2))), structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("b", lit(2)))) ).foreach { df => - checkAnswerAndSchema( + checkAnswer( df, Row(Row(Row(1, 2, 3))) :: Nil, StructType(Seq( @@ -1216,8 +1284,46 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } } + test("withField should replace multiple fields in nested struct") { + Seq( + col("a").withField("a", $"a.a".withField("a", lit(10)).withField("b", lit(20))), + col("a").withField("a.a", lit(10)).withField("a.b", lit(20)) + ).foreach { column => + checkAnswer( + structLevel2.select(column.as("a")), + Row(Row(Row(10, 20, 3))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + } + + test("withField should replace multiple fields in nested nullable struct") { + Seq( + col("a").withField("a", $"a.a".withField("a", lit(10)).withField("b", lit(20))), + col("a").withField("a.a", lit(10)).withField("a.b", lit(20)) + ).foreach { column => + checkAnswer( + nullableStructLevel2.select(column.as("a")), + Row(null) :: Row(Row(null)) :: Row(Row(Row(10, 20, 3))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = true))), + nullable = true)))) + } + } + test("withField should replace field in deeply nested struct") { - checkAnswerAndSchema( + checkAnswer( structLevel3.withColumn("a", $"a".withField("a.a.b", lit(2))), Row(Row(Row(Row(1, 2, 3)))) :: Nil, StructType(Seq( @@ -1242,7 +1348,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("b", IntegerType, nullable = false))), nullable = false)))) - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("b", lit(100))), Row(Row(1, 100, 100)) :: Nil, StructType(Seq( @@ -1254,7 +1360,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should replace fields in struct in given order") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("b", lit(2)).withField("b", lit(20))), Row(Row(1, 20, 3)) :: Nil, StructType(Seq( @@ -1266,7 +1372,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should add field and then replace same field in struct") { - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("d", lit(5))), Row(Row(1, null, 3, 5)) :: Nil, StructType(Seq( @@ -1290,7 +1396,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false))), nullable = false)))) - checkAnswerAndSchema( + checkAnswer( df.withColumn("a", 'a.withField("`a.b`.`e.f`", lit(2))), Row(Row(Row(1, 2, 3))) :: Nil, StructType(Seq( @@ -1317,7 +1423,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))), Row(Row(2, 1)) :: Nil, StructType(Seq( @@ -1326,7 +1432,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("B", IntegerType, nullable = false))), nullable = false)))) - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))), Row(Row(1, 2)) :: Nil, StructType(Seq( @@ -1339,7 +1445,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should add field to struct because casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))), Row(Row(1, 1, 2)) :: Nil, StructType(Seq( @@ -1349,7 +1455,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructField("A", IntegerType, nullable = false))), nullable = false)))) - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))), Row(Row(1, 1, 2)) :: Nil, StructType(Seq( @@ -1377,7 +1483,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("withField should replace nested field in struct even if casing is different") { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2))), Row(Row(Row(2, 1), Row(1, 1))) :: Nil, StructType(Seq( @@ -1392,7 +1498,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { nullable = false))), nullable = false)))) - checkAnswerAndSchema( + checkAnswer( mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2))), Row(Row(Row(1, 1), Row(2, 1))) :: Nil, StructType(Seq( @@ -1451,30 +1557,41 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") .select($"struct_col".withField("a.c", lit(3))) }.getMessage should include("Ambiguous reference to fields") + + checkAnswer( + sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + .select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4))), + Row(Row(Row(1, 2, 3, 4)))) + + checkAnswer( + sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + .select($"struct_col".withField("a", + $"struct_col.a".withField("c", lit(3)).withField("d", lit(4)))), + Row(Row(Row(1, 2, 3, 4)))) } test("SPARK-32641: extracting field from non-null struct column after withField should return " + "field value") { // extract newly added field - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("d")), Row(4) :: Nil, StructType(Seq(StructField("a", IntegerType, nullable = false)))) // extract newly replaced field - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("a")), Row(4) :: Nil, StructType(Seq(StructField("a", IntegerType, nullable = false)))) // add new field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("c")), Row(3):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = false)))) // replace field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( structLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("c")), Row(3):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = false)))) @@ -1482,26 +1599,30 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { test("SPARK-32641: extracting field from null struct column after withField should return " + "null if the original struct was null") { + val nullStructLevel1 = spark.createDataFrame( + sparkContext.parallelize(Row(null) :: Nil), + StructType(Seq(StructField("a", structType, nullable = true)))) + // extract newly added field - checkAnswerAndSchema( + checkAnswer( nullStructLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("d")), Row(null) :: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // extract newly replaced field - checkAnswerAndSchema( + checkAnswer( nullStructLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("a")), Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // add new field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( nullStructLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("c")), Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // replace field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( nullStructLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("c")), Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) @@ -1514,27 +1635,671 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { StructType(Seq(StructField("a", structType, nullable = true)))) // extract newly added field - checkAnswerAndSchema( + checkAnswer( df.withColumn("a", $"a".withField("d", lit(4)).getField("d")), Row(4) :: Row(null) :: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // extract newly replaced field - checkAnswerAndSchema( + checkAnswer( df.withColumn("a", $"a".withField("a", lit(4)).getField("a")), Row(4) :: Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // add new field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( df.withColumn("a", $"a".withField("d", lit(4)).getField("c")), Row(3) :: Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) // replace field, extract another field from original struct - checkAnswerAndSchema( + checkAnswer( df.withColumn("a", $"a".withField("a", lit(4)).getField("c")), Row(3) :: Row(null):: Nil, StructType(Seq(StructField("a", IntegerType, nullable = true)))) } + + + test("dropFields should throw an exception if called on a non-StructType column") { + intercept[AnalysisException] { + testData.withColumn("key", $"key".dropFields("a")) + }.getMessage should include("struct argument should be struct type, got: int") + } + + test("dropFields should throw an exception if fieldName argument is null") { + intercept[IllegalArgumentException] { + structLevel1.withColumn("a", $"a".dropFields(null)) + }.getMessage should include("fieldName cannot be null") + } + + test("dropFields should throw an exception if any intermediate structs don't exist") { + intercept[AnalysisException] { + structLevel2.withColumn("a", 'a.dropFields("x.b")) + }.getMessage should include("No such struct field x in a") + + intercept[AnalysisException] { + structLevel3.withColumn("a", 'a.dropFields("a.x.b")) + }.getMessage should include("No such struct field x in a") + } + + test("dropFields should throw an exception if intermediate field is not a struct") { + intercept[AnalysisException] { + structLevel1.withColumn("a", 'a.dropFields("b.a")) + }.getMessage should include("struct argument should be struct type, got: int") + } + + test("dropFields should throw an exception if intermediate field reference is ambiguous") { + intercept[AnalysisException] { + val structLevel2: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", structType, nullable = false), + StructField("a", structType, nullable = false))), + nullable = false)))) + + structLevel2.withColumn("a", 'a.dropFields("a.b")) + }.getMessage should include("Ambiguous reference to fields") + } + + test("dropFields should drop field in struct") { + checkAnswer( + structLevel1.withColumn("a", 'a.dropFields("b")), + Row(Row(1, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false)))) + } + + test("dropFields should drop field in nullable struct") { + checkAnswer( + nullableStructLevel1.withColumn("a", $"a".dropFields("b")), + Row(null) :: Row(Row(1, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = true)))) + } + + test("dropFields should drop multiple fields in struct") { + Seq( + structLevel1.withColumn("a", $"a".dropFields("b", "c")), + structLevel1.withColumn("a", 'a.dropFields("b").dropFields("c")) + ).foreach { df => + checkAnswer( + df, + Row(Row(1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = false)))) + } + } + + test("dropFields should throw an exception if no fields will be left in struct") { + intercept[AnalysisException] { + structLevel1.withColumn("a", 'a.dropFields("a", "b", "c")) + }.getMessage should include("cannot drop all fields in struct") + } + + test("dropFields should drop field with no name in struct") { + val structType = StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("", IntegerType, nullable = false))) + + val structLevel1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(1, 2)) :: Nil), + StructType(Seq(StructField("a", structType, nullable = false)))) + + checkAnswer( + structLevel1.withColumn("a", $"a".dropFields("")), + Row(Row(1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = false)))) + } + + test("dropFields should drop field in nested struct") { + checkAnswer( + structLevel2.withColumn("a", 'a.dropFields("a.b")), + Row(Row(Row(1, 3))) :: Nil, + StructType( + Seq(StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + + test("dropFields should drop multiple fields in nested struct") { + checkAnswer( + structLevel2.withColumn("a", 'a.dropFields("a.b", "a.c")), + Row(Row(Row(1))) :: Nil, + StructType( + Seq(StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + + test("dropFields should drop field in nested nullable struct") { + checkAnswer( + nullableStructLevel2.withColumn("a", $"a".dropFields("a.b")), + Row(null) :: Row(Row(null)) :: Row(Row(Row(1, 3))) :: Nil, + StructType( + Seq(StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = true))), + nullable = true)))) + } + + test("dropFields should drop multiple fields in nested nullable struct") { + checkAnswer( + nullableStructLevel2.withColumn("a", $"a".dropFields("a.b", "a.c")), + Row(null) :: Row(Row(null)) :: Row(Row(Row(1))) :: Nil, + StructType( + Seq(StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = true))), + nullable = true)))) + } + + test("dropFields should drop field in deeply nested struct") { + checkAnswer( + structLevel3.withColumn("a", 'a.dropFields("a.a.b")), + Row(Row(Row(Row(1, 3)))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false))), + nullable = false))), + nullable = false)))) + } + + test("dropFields should drop all fields with given name in struct") { + val structLevel1 = spark.createDataFrame( + sparkContext.parallelize(Row(Row(1, 2, 3)) :: Nil), + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + structLevel1.withColumn("a", 'a.dropFields("b")), + Row(Row(1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = false)))) + } + + test("dropFields should drop field in struct even if casing is different") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + checkAnswer( + mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")), + Row(Row(1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("B", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")), + Row(Row(1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false))), + nullable = false)))) + } + } + + test("dropFields should not drop field in struct because casing is different") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")), + Row(Row(1, 1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("B", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")), + Row(Row(1, 1)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("B", IntegerType, nullable = false))), + nullable = false)))) + } + } + + test("dropFields should drop nested field in struct even if casing is different") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + checkAnswer( + mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")), + Row(Row(Row(1), Row(1, 1))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("A", StructType(Seq( + StructField("b", IntegerType, nullable = false))), + nullable = false), + StructField("B", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + + checkAnswer( + mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")), + Row(Row(Row(1, 1), Row(1))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false))), + nullable = false), + StructField("b", StructType(Seq( + StructField("b", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + } + + test("dropFields should throw an exception because casing is different") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + intercept[AnalysisException] { + mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")) + }.getMessage should include("No such struct field A in a, B") + + intercept[AnalysisException] { + mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")) + }.getMessage should include("No such struct field b in a, B") + } + } + + test("dropFields should drop only fields that exist") { + checkAnswer( + structLevel1.withColumn("a", 'a.dropFields("d")), + Row(Row(1, null, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("c", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + structLevel1.withColumn("a", 'a.dropFields("b", "d")), + Row(Row(1, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + structLevel2.withColumn("a", $"a".dropFields("a.b", "a.d")), + Row(Row(Row(1, 3))) :: Nil, + StructType( + Seq(StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + } + + test("dropFields should drop multiple fields at arbitrary levels of nesting in a single call") { + val df: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", structType, nullable = false), + StructField("b", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + df.withColumn("a", $"a".dropFields("a.b", "b")), + Row(Row(Row(1, 3))) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("c", IntegerType, nullable = false))), nullable = false))), + nullable = false)))) + } + + test("dropFields user-facing examples") { + checkAnswer( + sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + .select($"struct_col".dropFields("b")), + Row(Row(1))) + + checkAnswer( + sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + .select($"struct_col".dropFields("c")), + Row(Row(1, 2))) + + checkAnswer( + sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col") + .select($"struct_col".dropFields("b", "c")), + Row(Row(1))) + + intercept[AnalysisException] { + sql("SELECT named_struct('a', 1, 'b', 2) struct_col") + .select($"struct_col".dropFields("a", "b")) + }.getMessage should include("cannot drop all fields in struct") + + checkAnswer( + sql("SELECT CAST(NULL AS struct) struct_col") + .select($"struct_col".dropFields("b")), + Row(null)) + + checkAnswer( + sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col") + .select($"struct_col".dropFields("b")), + Row(Row(1))) + + checkAnswer( + sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") + .select($"struct_col".dropFields("a.b")), + Row(Row(Row(1)))) + + intercept[AnalysisException] { + sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") + .select($"struct_col".dropFields("a.c")) + }.getMessage should include("Ambiguous reference to fields") + + checkAnswer( + sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col") + .select($"struct_col".dropFields("a.b", "a.c")), + Row(Row(Row(1)))) + + checkAnswer( + sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col") + .select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c"))), + Row(Row(Row(1)))) + } + + test("should correctly handle different dropField + withField + getField combinations") { + val structType = StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = false))) + + val structLevel1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(1, 2)) :: Nil), + StructType(Seq(StructField("a", structType, nullable = false)))) + + val nullStructLevel1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(null) :: Nil), + StructType(Seq(StructField("a", structType, nullable = true)))) + + val nullableStructLevel1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(1, 2)) :: Row(null) :: Nil), + StructType(Seq(StructField("a", structType, nullable = true)))) + + def check( + fieldOps: Column => Column, + getFieldName: String, + expectedValue: Option[Int]): Unit = { + + def query(df: DataFrame): DataFrame = + df.select(fieldOps(col("a")).getField(getFieldName).as("res")) + + checkAnswer( + query(structLevel1), + Row(expectedValue.orNull) :: Nil, + StructType(Seq(StructField("res", IntegerType, nullable = expectedValue.isEmpty)))) + + checkAnswer( + query(nullStructLevel1), + Row(null) :: Nil, + StructType(Seq(StructField("res", IntegerType, nullable = true)))) + + checkAnswer( + query(nullableStructLevel1), + Row(expectedValue.orNull) :: Row(null) :: Nil, + StructType(Seq(StructField("res", IntegerType, nullable = true)))) + } + + // add attribute, extract an attribute from the original struct + check(_.withField("c", lit(3)), "a", Some(1)) + check(_.withField("c", lit(3)), "b", Some(2)) + + // add attribute, extract added attribute + check(_.withField("c", lit(3)), "c", Some(3)) + check(_.withField("c", col("a.a")), "c", Some(1)) + check(_.withField("c", col("a.b")), "c", Some(2)) + check(_.withField("c", lit(null).cast(IntegerType)), "c", None) + + // replace attribute, extract an attribute from the original struct + check(_.withField("b", lit(3)), "a", Some(1)) + check(_.withField("a", lit(3)), "b", Some(2)) + + // replace attribute, extract replaced attribute + check(_.withField("b", lit(3)), "b", Some(3)) + check(_.withField("b", lit(null).cast(IntegerType)), "b", None) + check(_.withField("a", lit(3)), "a", Some(3)) + check(_.withField("a", lit(null).cast(IntegerType)), "a", None) + + // drop attribute, extract an attribute from the original struct + check(_.dropFields("b"), "a", Some(1)) + check(_.dropFields("a"), "b", Some(2)) + + // drop attribute, add attribute, extract an attribute from the original struct + check(_.dropFields("b").withField("c", lit(3)), "a", Some(1)) + check(_.dropFields("a").withField("c", lit(3)), "b", Some(2)) + + // drop attribute, add another attribute, extract added attribute + check(_.dropFields("a").withField("c", lit(3)), "c", Some(3)) + check(_.dropFields("b").withField("c", lit(3)), "c", Some(3)) + + // add attribute, drop attribute, extract an attribute from the original struct + check(_.withField("c", lit(3)).dropFields("a"), "b", Some(2)) + check(_.withField("c", lit(3)).dropFields("b"), "a", Some(1)) + + // add attribute, drop another attribute, extract added attribute + check(_.withField("c", lit(3)).dropFields("a"), "c", Some(3)) + check(_.withField("c", lit(3)).dropFields("b"), "c", Some(3)) + + // replace attribute, drop same attribute, extract an attribute from the original struct + check(_.withField("b", lit(3)).dropFields("b"), "a", Some(1)) + check(_.withField("a", lit(3)).dropFields("a"), "b", Some(2)) + + // add attribute, drop same attribute, extract an attribute from the original struct + check(_.withField("c", lit(3)).dropFields("c"), "a", Some(1)) + check(_.withField("c", lit(3)).dropFields("c"), "b", Some(2)) + + // add attribute, drop another attribute, extract added attribute + check(_.withField("b", lit(3)).dropFields("a"), "b", Some(3)) + check(_.withField("a", lit(3)).dropFields("b"), "a", Some(3)) + check(_.withField("b", lit(null).cast(IntegerType)).dropFields("a"), "b", None) + check(_.withField("a", lit(null).cast(IntegerType)).dropFields("b"), "a", None) + + // drop attribute, add same attribute, extract added attribute + check(_.dropFields("b").withField("b", lit(3)), "b", Some(3)) + check(_.dropFields("a").withField("a", lit(3)), "a", Some(3)) + check(_.dropFields("b").withField("b", lit(null).cast(IntegerType)), "b", None) + check(_.dropFields("a").withField("a", lit(null).cast(IntegerType)), "a", None) + check(_.dropFields("c").withField("c", lit(3)), "c", Some(3)) + + // add attribute, drop same attribute, add same attribute again, extract added attribute + check(_.withField("c", lit(3)).dropFields("c").withField("c", lit(4)), "c", Some(4)) + } + + test("should move field up one level of nesting") { + // move a field up one level + checkAnswer( + nullableStructLevel2.select( + col("a").withField("c", col("a.a.c")).dropFields("a.c").as("res")), + Row(null) :: Row(Row(null, null)) :: Row(Row(Row(1, null), 3)) :: Nil, + StructType(Seq( + StructField("res", StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true))), + nullable = true), + StructField("c", IntegerType, nullable = true))), + nullable = true)))) + + // move a field up one level and then extract it + checkAnswer( + nullableStructLevel2.select( + col("a").withField("c", col("a.a.c")).dropFields("a.c").getField("c").as("res")), + Row(null) :: Row(null) :: Row(3) :: Nil, + StructType(Seq(StructField("res", IntegerType, nullable = true)))) + } + + test("should be able to refer to newly added nested column") { + intercept[AnalysisException] { + structLevel1.select($"a".withField("d", lit(4)).withField("e", $"a.d" + 1).as("a")) + }.getMessage should include("No such struct field d in a, b, c") + + checkAnswer( + structLevel1 + .select($"a".withField("d", lit(4)).as("a")) + .select($"a".withField("e", $"a.d" + 1).as("a")), + Row(Row(1, null, 3, 4, 5)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("c", IntegerType, nullable = false), + StructField("d", IntegerType, nullable = false), + StructField("e", IntegerType, nullable = false))), + nullable = false)))) + } + + test("should be able to drop newly added nested column") { + Seq( + structLevel1.select($"a".withField("d", lit(4)).dropFields("d").as("a")), + structLevel1 + .select($"a".withField("d", lit(4)).as("a")) + .select($"a".dropFields("d").as("a")) + ).foreach { query => + checkAnswer( + query, + Row(Row(1, null, 3)) :: Nil, + StructType(Seq( + StructField("a", structType, nullable = false)))) + } + } + + test("should still be able to refer to dropped column within the same select statement") { + // we can still access the nested column even after dropping it within the same select statement + checkAnswer( + structLevel1.select($"a".dropFields("c").withField("z", $"a.c").as("a")), + Row(Row(1, null, 3)) :: Nil, + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", IntegerType, nullable = true), + StructField("z", IntegerType, nullable = false))), + nullable = false)))) + + // we can't access the nested column in subsequent select statement after dropping it in a + // previous select statement + intercept[AnalysisException]{ + structLevel1 + .select($"a".dropFields("c").as("a")) + .select($"a".withField("z", $"a.c")).as("a") + }.getMessage should include("No such struct field c in a, b;") + } + + test("nestedDf should generate nested DataFrames") { + checkAnswer( + emptyNestedDf(1, 1, nullable = false), + Seq.empty[Row], + StructType(Seq(StructField("nested0Col0", StructType(Seq( + StructField("nested1Col0", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + emptyNestedDf(1, 2, nullable = false), + Seq.empty[Row], + StructType(Seq(StructField("nested0Col0", StructType(Seq( + StructField("nested1Col0", IntegerType, nullable = false), + StructField("nested1Col1", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + emptyNestedDf(2, 1, nullable = false), + Seq.empty[Row], + StructType(Seq(StructField("nested0Col0", StructType(Seq( + StructField("nested1Col0", StructType(Seq( + StructField("nested2Col0", IntegerType, nullable = false))), + nullable = false))), + nullable = false)))) + + checkAnswer( + emptyNestedDf(2, 2, nullable = false), + Seq.empty[Row], + StructType(Seq(StructField("nested0Col0", StructType(Seq( + StructField("nested1Col0", StructType(Seq( + StructField("nested2Col0", IntegerType, nullable = false), + StructField("nested2Col1", IntegerType, nullable = false))), + nullable = false), + StructField("nested1Col1", IntegerType, nullable = false))), + nullable = false)))) + + checkAnswer( + emptyNestedDf(2, 2, nullable = true), + Seq.empty[Row], + StructType(Seq(StructField("nested0Col0", StructType(Seq( + StructField("nested1Col0", StructType(Seq( + StructField("nested2Col0", IntegerType, nullable = false), + StructField("nested2Col1", IntegerType, nullable = false))), + nullable = true), + StructField("nested1Col1", IntegerType, nullable = false))), + nullable = true)))) + } + + Seq(Performant, NonPerformant).foreach { method => + Seq(false, true).foreach { nullable => + test(s"should add and drop 1 column at each depth of nesting using ${method.name} method, " + + s"nullable = $nullable") { + val maxDepth = 3 + + // dataframe with nested*Col0 to nested*Col2 at each depth + val inputDf = emptyNestedDf(maxDepth, 3, nullable) + + // add nested*Col3 and drop nested*Col2 + val modifiedColumn = method( + column = col(nestedColName(0, 0)), + numsToAdd = Seq(3), + numsToDrop = Seq(2), + maxDepth = maxDepth + ).as(nestedColName(0, 0)) + val resultDf = inputDf.select(modifiedColumn) + + // dataframe with nested*Col0, nested*Col1, nested*Col3 at each depth + val expectedDf = { + val colNums = Seq(0, 1, 3) + val nestedColumnDataType = nestedStructType(colNums, nullable, maxDepth) + + spark.createDataFrame( + spark.sparkContext.emptyRDD[Row], + StructType(Seq(StructField(nestedColName(0, 0), nestedColumnDataType, nullable)))) + } + + checkAnswer(resultDf, expectedDf.collect(), expectedDf.schema) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala new file mode 100644 index 0000000000000..28af552fe586b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} + +/** + * Benchmark to measure Spark's performance analyzing and optimizing long UpdateFields chains. + * + * {{{ + * To run this benchmark: + * 1. without sbt: + * bin/spark-submit --class + * 2. with sbt: + * build/sbt "sql/test:runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/UpdateFieldsBenchmark-results.txt". + * }}} + */ +object UpdateFieldsBenchmark extends SqlBasedBenchmark { + + def nestedColName(d: Int, colNum: Int): String = s"nested${d}Col$colNum" + + def nestedStructType( + colNums: Seq[Int], + nullable: Boolean, + maxDepth: Int, + currDepth: Int = 1): StructType = { + + if (currDepth == maxDepth) { + val fields = colNums.map { colNum => + val name = nestedColName(currDepth, colNum) + StructField(name, IntegerType, nullable = false) + } + StructType(fields) + } else { + val fields = colNums.foldLeft(Seq.empty[StructField]) { + case (structFields, colNum) if colNum == 0 => + val nested = nestedStructType(colNums, nullable, maxDepth, currDepth + 1) + structFields :+ StructField(nestedColName(currDepth, colNum), nested, nullable) + case (structFields, colNum) => + val name = nestedColName(currDepth, colNum) + structFields :+ StructField(name, IntegerType, nullable = false) + } + StructType(fields) + } + } + + /** + * Utility function for generating an empty DataFrame with nested columns. + * + * @param maxDepth: The depth to which to create nested columns. + * @param numColsAtEachDepth: The number of columns to create at each depth. + * @param nullable: This value is used to set the nullability of any StructType columns. + */ + def emptyNestedDf(maxDepth: Int, numColsAtEachDepth: Int, nullable: Boolean): DataFrame = { + require(maxDepth > 0) + require(numColsAtEachDepth > 0) + + val nestedColumnDataType = nestedStructType(0 until numColsAtEachDepth, nullable, maxDepth) + spark.createDataFrame( + spark.sparkContext.emptyRDD[Row], + StructType(Seq(StructField(nestedColName(0, 0), nestedColumnDataType, nullable)))) + } + + trait ModifyNestedColumns { + val name: String + def apply(column: Column, numsToAdd: Seq[Int], numsToDrop: Seq[Int], maxDepth: Int): Column + } + + object Performant extends ModifyNestedColumns { + override val name: String = "performant" + + override def apply( + column: Column, + numsToAdd: Seq[Int], + numsToDrop: Seq[Int], + maxDepth: Int): Column = helper(column, numsToAdd, numsToDrop, maxDepth, 1) + + private def helper( + column: Column, + numsToAdd: Seq[Int], + numsToDrop: Seq[Int], + maxDepth: Int, + currDepth: Int): Column = { + + // drop columns at the current depth + val dropped = if (numsToDrop.nonEmpty) { + column.dropFields(numsToDrop.map(num => nestedColName(currDepth, num)): _*) + } else column + + // add columns at the current depth + val added = numsToAdd.foldLeft(dropped) { + (res, num) => res.withField(nestedColName(currDepth, num), lit(num)) + } + + if (currDepth == maxDepth) { + added + } else { + // add/drop columns at the next depth + val newValue = helper( + column = col((0 to currDepth).map(d => nestedColName(d, 0)).mkString(".")), + numsToAdd = numsToAdd, + numsToDrop = numsToDrop, + currDepth = currDepth + 1, + maxDepth = maxDepth) + added.withField(nestedColName(currDepth, 0), newValue) + } + } + } + + object NonPerformant extends ModifyNestedColumns { + override val name: String = "non-performant" + + override def apply( + column: Column, + numsToAdd: Seq[Int], + numsToDrop: Seq[Int], + maxDepth: Int): Column = { + + val dropped = if (numsToDrop.nonEmpty) { + val colsToDrop = (1 to maxDepth).flatMap { depth => + numsToDrop.map(num => s"${prefix(depth)}${nestedColName(depth, num)}") + } + column.dropFields(colsToDrop: _*) + } else column + + val added = { + val colsToAdd = (1 to maxDepth).flatMap { depth => + numsToAdd.map(num => (s"${prefix(depth)}${nestedColName(depth, num)}", lit(num))) + } + colsToAdd.foldLeft(dropped)((col, add) => col.withField(add._1, add._2)) + } + + added + } + + private def prefix(depth: Int): String = + if (depth == 1) "" + else (1 until depth).map(d => nestedColName(d, 0)).mkString("", ".", ".") + } + + private def updateFieldsBenchmark( + methods: Seq[ModifyNestedColumns], + maxDepth: Int, + initialNumberOfColumns: Int, + numsToAdd: Seq[Int] = Seq.empty, + numsToDrop: Seq[Int] = Seq.empty): Unit = { + + val name = s"Add ${numsToAdd.length} columns and drop ${numsToDrop.length} columns " + + s"at $maxDepth different depths of nesting" + + runBenchmark(name) { + val benchmark = new Benchmark( + name = name, + // The purpose of this benchmark is to ensure Spark is able to analyze and optimize long + // UpdateFields chains quickly so it runs over 0 rows of data. + valuesPerIteration = 0, + output = output) + + val nonNullableStructsDf = emptyNestedDf(maxDepth, initialNumberOfColumns, nullable = false) + val nullableStructsDf = emptyNestedDf(maxDepth, initialNumberOfColumns, nullable = true) + + methods.foreach { method => + val modifiedColumn = method( + column = col(nestedColName(0, 0)), + numsToAdd = numsToAdd, + numsToDrop = numsToDrop, + maxDepth = maxDepth + ).as(nestedColName(0, 0)) + + benchmark.addCase(s"To non-nullable StructTypes using ${method.name} method") { _ => + nonNullableStructsDf.select(modifiedColumn).queryExecution.optimizedPlan + } + + benchmark.addCase(s"To nullable StructTypes using ${method.name} method") { _ => + nullableStructsDf.select(modifiedColumn).queryExecution.optimizedPlan + } + } + + benchmark.run() + } + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + // This benchmark compares the performant and non-performant methods of writing the same query. + // We use small values for maxDepth, numsToAdd, and numsToDrop because the NonPerformant method + // scales extremely poorly with the number of nested columns being added/dropped. + updateFieldsBenchmark( + methods = Seq(Performant, NonPerformant), + maxDepth = 3, + initialNumberOfColumns = 5, + numsToAdd = 5 to 6, + numsToDrop = 3 to 4) + + // This benchmark is to show that the performant method of writing a query when we want to add + // and drop a large number of nested columns scales nicely. + updateFieldsBenchmark( + methods = Seq(Performant), + maxDepth = 100, + initialNumberOfColumns = 51, + numsToAdd = 51 to 100, + numsToDrop = 1 to 50) + } +} From ddc7012b3d4cd05c6695378989c9d1a78102bbbd Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 6 Oct 2020 09:09:19 +0000 Subject: [PATCH 0172/1009] [SPARK-32243][SQL] HiveSessionCatalog call super.makeFunctionExpression should throw earlier when got Spark UDAF Invalid arguments number error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? When we create a UDAF function use class extended `UserDefinedAggregeteFunction`,  when we call the function,  in support hive mode, in HiveSessionCatalog, it will call super.makeFunctionExpression,  but it will catch error  such as the function need 2 parameter and we only give 1, throw exception only show  ``` No handler for UDF/UDAF/UDTF xxxxxxxx ``` This is confused for develop , we should show error thrown by super method too, For this pr's UT : Before change, throw Exception like ``` No handler for UDF/UDAF/UDTF 'org.apache.spark.sql.hive.execution.LongProductSum'; line 1 pos 7 ``` After this pr, throw exception ``` Spark UDAF Error: Invalid number of arguments for function longProductSum. Expected: 2; Found: 1; Hive UDF/UDAF/UDTF Error: No handler for UDF/UDAF/UDTF 'org.apache.spark.sql.hive.execution.LongProductSum'; line 1 pos 7 ``` ### Why are the changes needed? Show more detail error message when define UDAF ### Does this PR introduce _any_ user-facing change? People will see more detail error message when use spark sql's UDAF in hive support Mode ### How was this patch tested? Added UT Closes #29054 from AngersZhuuuu/SPARK-32243. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../catalog/InvalidUDFClassException.scala | 28 +++++ .../sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../spark/sql/hive/HiveSessionCatalog.scala | 103 ++++++++++-------- .../sql/hive/execution/HiveUDAFSuite.scala | 14 +++ 4 files changed, 102 insertions(+), 45 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala new file mode 100644 index 0000000000000..bc02efd5113c2 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.catalog + +import org.apache.spark.sql.AnalysisException + +/** + * Thrown when a query failed for invalid function class, usually because a SQL + * function's class does not follow the rules of the UDF/UDAF/UDTF class definition. + */ +class InvalidUDFClassException private[sql](message: String) + extends AnalysisException(message, None, None, None, None) { +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index e9a02c15f7362..4865629329831 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1335,7 +1335,7 @@ class SessionCatalog( } e } else { - throw new AnalysisException(s"No handler for UDAF '${clazz.getCanonicalName}'. " + + throw new InvalidUDFClassException(s"No handler for UDAF '${clazz.getCanonicalName}'. " + s"Use sparkSession.udf.register(...) instead.") } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index bc7760c982aab..f24834b938a1e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -30,7 +30,7 @@ import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, Gener import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry -import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, ExternalCatalog, FunctionResourceLoader, GlobalTempViewManager, SessionCatalog} +import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.{Cast, Expression} import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper @@ -57,6 +57,56 @@ private[sql] class HiveSessionCatalog( parser, functionResourceLoader) { + private def makeHiveFunctionExpression( + name: String, + clazz: Class[_], + input: Seq[Expression]): Expression = { + var udfExpr: Option[Expression] = None + try { + // When we instantiate hive UDF wrapper class, we may throw exception if the input + // expressions don't satisfy the hive UDF, such as type mismatch, input number + // mismatch, etc. Here we catch the exception and throw AnalysisException instead. + if (classOf[UDF].isAssignableFrom(clazz)) { + udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input)) + udfExpr.get.dataType // Force it to check input data types. + } else if (classOf[GenericUDF].isAssignableFrom(clazz)) { + udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input)) + udfExpr.get.dataType // Force it to check input data types. + } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) { + udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input)) + udfExpr.get.dataType // Force it to check input data types. + } else if (classOf[UDAF].isAssignableFrom(clazz)) { + udfExpr = Some(HiveUDAFFunction( + name, + new HiveFunctionWrapper(clazz.getName), + input, + isUDAFBridgeRequired = true)) + udfExpr.get.dataType // Force it to check input data types. + } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) { + udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input)) + // Force it to check data types. + udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema + } + } catch { + case NonFatal(e) => + val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e" + val errorMsg = + if (classOf[GenericUDTF].isAssignableFrom(clazz)) { + s"$noHandlerMsg\nPlease make sure your function overrides " + + "`public StructObjectInspector initialize(ObjectInspector[] args)`." + } else { + noHandlerMsg + } + val analysisException = new AnalysisException(errorMsg) + analysisException.setStackTrace(e.getStackTrace) + throw analysisException + } + udfExpr.getOrElse { + throw new InvalidUDFClassException( + s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'") + } + } + /** * Constructs a [[Expression]] based on the provided class that represents a function. * @@ -69,49 +119,14 @@ private[sql] class HiveSessionCatalog( // Current thread context classloader may not be the one loaded the class. Need to switch // context classloader to initialize instance properly. Utils.withContextClassLoader(clazz.getClassLoader) { - Try(super.makeFunctionExpression(name, clazz, input)).getOrElse { - var udfExpr: Option[Expression] = None - try { - // When we instantiate hive UDF wrapper class, we may throw exception if the input - // expressions don't satisfy the hive UDF, such as type mismatch, input number - // mismatch, etc. Here we catch the exception and throw AnalysisException instead. - if (classOf[UDF].isAssignableFrom(clazz)) { - udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input)) - udfExpr.get.dataType // Force it to check input data types. - } else if (classOf[GenericUDF].isAssignableFrom(clazz)) { - udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input)) - udfExpr.get.dataType // Force it to check input data types. - } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) { - udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input)) - udfExpr.get.dataType // Force it to check input data types. - } else if (classOf[UDAF].isAssignableFrom(clazz)) { - udfExpr = Some(HiveUDAFFunction( - name, - new HiveFunctionWrapper(clazz.getName), - input, - isUDAFBridgeRequired = true)) - udfExpr.get.dataType // Force it to check input data types. - } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) { - udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input)) - udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema // Force it to check data types. - } - } catch { - case NonFatal(e) => - val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e" - val errorMsg = - if (classOf[GenericUDTF].isAssignableFrom(clazz)) { - s"$noHandlerMsg\nPlease make sure your function overrides " + - "`public StructObjectInspector initialize(ObjectInspector[] args)`." - } else { - noHandlerMsg - } - val analysisException = new AnalysisException(errorMsg) - analysisException.setStackTrace(e.getStackTrace) - throw analysisException - } - udfExpr.getOrElse { - throw new AnalysisException(s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'") - } + try { + super.makeFunctionExpression(name, clazz, input) + } catch { + // If `super.makeFunctionExpression` throw `InvalidUDFClassException`, we construct + // Hive UDF/UDAF/UDTF with function definition. Otherwise, we just throw it earlier. + case _: InvalidUDFClassException => + makeHiveFunctionExpression(name, clazz, input) + case e => throw e } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala index 9e33a8ee4cc5c..ed44dcd8d7a29 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala @@ -161,6 +161,20 @@ class HiveUDAFSuite extends QueryTest checkAnswer(sql("select histogram_numeric(a,2) from abc where a=3"), Row(null)) } } + + test("SPARK-32243: Spark UDAF Invalid arguments number error should throw earlier") { + // func need two arguments + val functionName = "longProductSum" + val functionClass = "org.apache.spark.sql.hive.execution.LongProductSum" + withUserDefinedFunction(functionName -> true) { + sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$functionClass'") + val e = intercept[AnalysisException] { + sql(s"SELECT $functionName(100)") + }.getMessage + assert(e.contains( + s"Invalid number of arguments for function $functionName. Expected: 2; Found: 1;")) + } + } } /** From 0812d6c17cc4876bb87a9d1fec35ec8c7b2365f0 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 6 Oct 2020 18:11:24 +0900 Subject: [PATCH 0173/1009] [SPARK-33073][PYTHON] Improve error handling on Pandas to Arrow conversion failures ### What changes were proposed in this pull request? This improves error handling when a failure in conversion from Pandas to Arrow occurs. And fixes tests to be compatible with upcoming Arrow 2.0.0 release. ### Why are the changes needed? Current tests will fail with Arrow 2.0.0 because of a change in error message when the schema is invalid. For these cases, the current error message also includes information on disabling safe conversion config, which is mainly meant for floating point truncation and overflow. The tests have been updated to use a message that is show for past Arrow versions, and upcoming. If the user enters an invalid schema, the error produced by pyarrow is not consistent and either `TypeError` or `ArrowInvalid`, with the latter being caught, and raised as a `RuntimeError` with the extra info. The error handling is improved by: - narrowing the exception type to `TypeError`s, which `ArrowInvalid` is a subclass and what is raised on safe conversion failures. - The exception is only raised with additional information on disabling "spark.sql.execution.pandas.convertToArrowArraySafely" if it is enabled in the first place. - The original exception is chained to better show it to the user. ### Does this PR introduce _any_ user-facing change? Yes, the error re-raised changes from a RuntimeError to a ValueError, which better categorizes this type of error and in-line with the original Arrow error. ### How was this patch tested? Existing tests, using pyarrow 1.0.1 and 2.0.0-snapshot Closes #29951 from BryanCutler/arrow-better-handle-pandas-errors-SPARK-33073. Authored-by: Bryan Cutler Signed-off-by: HyukjinKwon --- python/pyspark/sql/pandas/serializers.py | 17 ++++++++++------- python/pyspark/sql/tests/test_arrow.py | 9 +++++---- .../sql/tests/test_pandas_grouped_map.py | 15 ++++++++------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py index 4b91c6a0f8730..63fb8562799e3 100644 --- a/python/pyspark/sql/pandas/serializers.py +++ b/python/pyspark/sql/pandas/serializers.py @@ -153,13 +153,16 @@ def create_array(s, t): s = s.astype(s.dtypes.categories.dtype) try: array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck) - except pa.ArrowException as e: - error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \ - "Array (%s). It can be caused by overflows or other unsafe " + \ - "conversions warned by Arrow. Arrow safe type check can be " + \ - "disabled by using SQL config " + \ - "`spark.sql.execution.pandas.convertToArrowArraySafely`." - raise RuntimeError(error_msg % (s.dtype, t), e) + except ValueError as e: + if self._safecheck: + error_msg = "Exception thrown when converting pandas.Series (%s) to " + \ + "Arrow Array (%s). It can be caused by overflows or other " + \ + "unsafe conversions warned by Arrow. Arrow safe type check " + \ + "can be disabled by using SQL config " + \ + "`spark.sql.execution.pandas.convertToArrowArraySafely`." + raise ValueError(error_msg % (s.dtype, t)) from e + else: + raise e return array arrs = [] diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index c6497923d84fb..55d5e9017b345 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -264,11 +264,12 @@ def test_createDataFrame_with_schema(self): def test_createDataFrame_with_incorrect_schema(self): pdf = self.create_pandas_data_frame() fields = list(self.schema) - fields[0], fields[1] = fields[1], fields[0] # swap str with int + fields[5], fields[6] = fields[6], fields[5] # swap decimal with date wrong_schema = StructType(fields) - with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "integer.*required"): - self.spark.createDataFrame(pdf, schema=wrong_schema) + with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}): + with QuietTest(self.sc): + with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"): + self.spark.createDataFrame(pdf, schema=wrong_schema) def test_createDataFrame_with_names(self): pdf = self.create_pandas_data_frame() diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index 81b6d5efb710a..93e37125eaa33 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -446,15 +446,16 @@ def int_index(pdf): def column_name_typo(pdf): return pd.DataFrame({'iid': pdf.id, 'v': pdf.v}) - @pandas_udf('id long, v int', PandasUDFType.GROUPED_MAP) + @pandas_udf('id long, v decimal', PandasUDFType.GROUPED_MAP) def invalid_positional_types(pdf): - return pd.DataFrame([(u'a', 1.2)]) + return pd.DataFrame([(1, datetime.date(2020, 10, 5))]) - with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "KeyError: 'id'"): - grouped_df.apply(column_name_typo).collect() - with self.assertRaisesRegexp(Exception, "an integer is required"): - grouped_df.apply(invalid_positional_types).collect() + with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}): + with QuietTest(self.sc): + with self.assertRaisesRegexp(Exception, "KeyError: 'id'"): + grouped_df.apply(column_name_typo).collect() + with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"): + grouped_df.apply(invalid_positional_types).collect() def test_positional_assignment_conf(self): with self.sql_conf({ From b5e4b8c73e10743eef4d35b6e82053a5a065b2ed Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 6 Oct 2020 08:31:06 -0500 Subject: [PATCH 0174/1009] [SPARK-27428][CORE][TEST] Increase receive buffer size used in StatsdSinkSuite ### What changes were proposed in this pull request? Increase size of socket receive buffer in these tests. ### Why are the changes needed? The socket receive buffer size set in this test was too small for the StatsdSinkSuite tests to run reliably on some systems. For a test in this suite to run reliably the buffer needs to be large enough to hold all the data in the packets being sent in a test along with any additional kernel or protocol overhead. The amount of kernel overhead per packet can vary from system to system but is typically far higher than the protocol overhead. If the receive buffer is too small and fills up then packets are silently dropped. This leads to the test failing with a timeout. If the socket defaults to a larger receive buffer (normally true) then we should keep that size. As well as increasing the minimum buffer size I've also decoupled the datagram packet buffer size from the receive buffer size. The receive buffer should in general be far larger to account for the fact that multiple packets might be buffered, as well as the aforementioned overhead. Any truncated data in individual packets will be picked up by the tests. ### Does this PR introduce _any_ user-facing change? No, this only affects the tests. ### How was this patch tested? Existing tests on IBM Z and x86. Closes #29819 from mundaym/fix-statsd. Authored-by: Michael Munday Signed-off-by: Sean Owen --- .../spark/metrics/sink/StatsdSinkSuite.scala | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala index 0e21a36071c42..3d4b8c868d6fc 100644 --- a/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala @@ -35,12 +35,27 @@ class StatsdSinkSuite extends SparkFunSuite { STATSD_KEY_UNIT -> "seconds", STATSD_KEY_HOST -> "127.0.0.1" ) - private val socketTimeout = 30000 // milliseconds - private val socketBufferSize = 8192 + // The maximum size of a single datagram packet payload. Payloads + // larger than this will be truncated. + private val maxPayloadSize = 256 // bytes + + // The receive buffer must be large enough to hold all inflight + // packets. This includes any kernel and protocol overhead. + // This value was determined experimentally and should be + // increased if timeouts are seen. + private val socketMinRecvBufferSize = 16384 // bytes + private val socketTimeout = 30000 // milliseconds private def withSocketAndSink(testCode: (DatagramSocket, StatsdSink) => Any): Unit = { val socket = new DatagramSocket - socket.setReceiveBufferSize(socketBufferSize) + + // Leave the receive buffer size untouched unless it is too + // small. If the receive buffer is too small packets will be + // silently dropped and receive operations will timeout. + if (socket.getReceiveBufferSize() < socketMinRecvBufferSize) { + socket.setReceiveBufferSize(socketMinRecvBufferSize) + } + socket.setSoTimeout(socketTimeout) val props = new Properties defaultProps.foreach(e => props.put(e._1, e._2)) @@ -61,7 +76,7 @@ class StatsdSinkSuite extends SparkFunSuite { sink.registry.register("counter", counter) sink.report() - val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize) + val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize) socket.receive(p) val result = new String(p.getData, 0, p.getLength, UTF_8) @@ -77,7 +92,7 @@ class StatsdSinkSuite extends SparkFunSuite { sink.registry.register("gauge", gauge) sink.report() - val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize) + val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize) socket.receive(p) val result = new String(p.getData, 0, p.getLength, UTF_8) @@ -87,7 +102,7 @@ class StatsdSinkSuite extends SparkFunSuite { test("metrics StatsD sink with Histogram") { withSocketAndSink { (socket, sink) => - val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize) + val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize) val histogram = new Histogram(new UniformReservoir) histogram.update(10) histogram.update(20) @@ -121,7 +136,7 @@ class StatsdSinkSuite extends SparkFunSuite { test("metrics StatsD sink with Timer") { withSocketAndSink { (socket, sink) => - val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize) + val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize) val timer = new Timer() timer.update(1, SECONDS) timer.update(2, SECONDS) From ec6fccb922f721e5a44d89c93f711f44ce9d6592 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 6 Oct 2020 14:33:34 +0000 Subject: [PATCH 0175/1009] [SPARK-32243][SQL][FOLLOWUP] Fix compilation in HiveSessionCatalog Fix a mistake when merging https://github.com/apache/spark/pull/29054 Closes #29955 from cloud-fan/hot-fix. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index f24834b938a1e..8a248a251820f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -126,7 +126,7 @@ private[sql] class HiveSessionCatalog( // Hive UDF/UDAF/UDTF with function definition. Otherwise, we just throw it earlier. case _: InvalidUDFClassException => makeHiveFunctionExpression(name, clazz, input) - case e => throw e + case NonFatal(e) => throw e } } } From 17d309dfacd4bdebbcd9609dd24a9e65a1a2b4f5 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 6 Oct 2020 16:01:10 +0000 Subject: [PATCH 0176/1009] [SPARK-32963][SQL] empty string should be consistent for schema name in SparkGetSchemasOperation ### What changes were proposed in this pull request? This PR makes the empty string for schema name pattern match the global temp view as same as it works for other databases. This PR also add new tests to covering different kinds of wildcards to verify the SparkGetSchemasOperation ### Why are the changes needed? When the schema name is empty string, it is considered as ".*" and can match all databases in the catalog. But when it can not match the global temp view as it is not converted to ".*" ### Does this PR introduce _any_ user-facing change? yes , JDBC operation like `statement.getConnection.getMetaData..getSchemas(null, "")` now also provides the global temp view in the result set. ### How was this patch tested? new tests Closes #29834 from yaooqinn/SPARK-32963. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../SparkGetSchemasOperation.scala | 3 +- .../SparkMetadataOperationSuite.scala | 35 ++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala index 16fd502048e80..e58357a415545 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala @@ -77,7 +77,8 @@ private[hive] class SparkGetSchemasOperation( val globalTempViewDb = sqlContext.sessionState.catalog.globalTempViewManager.database val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName)) - if (databasePattern.matcher(globalTempViewDb).matches()) { + if (schemaName == null || schemaName.isEmpty || + databasePattern.matcher(globalTempViewDb).matches()) { rowSet.addRow(Array[AnyRef](globalTempViewDb, DEFAULT_HIVE_CATALOG)) } setState(OperationState.FINISHED) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index 7369dbfcf7a51..818f387f131d6 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.thriftserver import java.sql.{DatabaseMetaData, ResultSet} +import org.apache.hive.service.cli.HiveSQLException + import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.types._ @@ -28,23 +30,40 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest { test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") { def checkResult(rs: ResultSet, dbNames: Seq[String]): Unit = { - for (i <- dbNames.indices) { - assert(rs.next()) - assert(rs.getString("TABLE_SCHEM") === dbNames(i)) + val expected = dbNames.iterator + while(rs.next() || expected.hasNext) { + assert(rs.getString("TABLE_SCHEM") === expected.next) + assert(rs.getString("TABLE_CATALOG").isEmpty) } // Make sure there are no more elements assert(!rs.next()) + assert(!expected.hasNext, "All expected schemas should be visited") } - withDatabase("db1", "db2") { statement => - Seq("CREATE DATABASE db1", "CREATE DATABASE db2").foreach(statement.execute) - + val dbs = Seq("db1", "db2", "db33", "db44") + val dbDflts = Seq("default", "global_temp") + withDatabase(dbs: _*) { statement => + dbs.foreach( db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db")) val metaData = statement.getConnection.getMetaData - checkResult(metaData.getSchemas(null, "%"), Seq("db1", "db2", "default", "global_temp")) + Seq("", "%", null, ".*", "_*", "_%", ".%") foreach { pattern => + checkResult(metaData.getSchemas(null, pattern), dbs ++ dbDflts) + } + + Seq("db%", "db*") foreach { pattern => + checkResult(metaData.getSchemas(null, pattern), dbs) + } + + Seq("db_", "db.") foreach { pattern => + checkResult(metaData.getSchemas(null, pattern), dbs.take(2)) + } + checkResult(metaData.getSchemas(null, "db1"), Seq("db1")) checkResult(metaData.getSchemas(null, "db_not_exist"), Seq.empty) - checkResult(metaData.getSchemas(null, "db*"), Seq("db1", "db2")) + + val e = intercept[HiveSQLException](metaData.getSchemas(null, "*")) + assert(e.getCause.getMessage === + "Error operating GET_SCHEMAS Dangling meta character '*' near index 0\n*\n^") } } From 3b2a38d73578e8760dbd6c34e427896a8cde00dd Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 6 Oct 2020 09:40:16 -0700 Subject: [PATCH 0177/1009] [SPARK-32511][SQL][FOLLOWUP] Fix the broken build for Scala 2.13 with Maven ### What changes were proposed in this pull request? This PR fixes the broken build for Scala 2.13 with Maven. https://github.com/apache/spark/pull/29913/checks?check_run_id=1187826966 #29795 was merged though it doesn't successfully finish the build for Scala 2.13 ### Why are the changes needed? To fix the build. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? `build/mvn -Pscala-2.13 -Phive -Phive-thriftserver -DskipTests package` Closes #29954 from sarutak/hotfix-seq. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index d5b1950e82c56..f6485a51f8fae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -586,7 +586,7 @@ case class WithField(name: String, valExpr: Expression) } } if (!hasMatch) result += newFieldExpr - result + result.toSeq } override def children: Seq[Expression] = valExpr :: Nil From 0b326d532752fd4e05b08dd16c096f80afe7d727 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Tue, 6 Oct 2020 14:18:37 -0700 Subject: [PATCH 0178/1009] [SPARK-32857][CORE] Fix flaky o.a.s.s.BarrierTaskContextSuite.throw exception if the number of barrier() calls are not the same on every task ### What changes were proposed in this pull request? Fix the flaky test. ### Why are the changes needed? The test is flaky: `Expected exception org.apache.spark.SparkException to be thrown, but no exception was thrown`. Check the full error stack [here](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/128548/testReport/org.apache.spark.scheduler/BarrierTaskContextSuite/throw_exception_if_the_number_of_barrier___calls_are_not_the_same_on_every_task/). By analyzing the log below, I found that task 0 hadn't reached the second `context.barrier()` when another three tasks already raised the sync timeout exceptions by the first `context.barrier()`. The timeout exceptions were caught by the `try...catch...`. Then, each task started another round barrier sync from the second `context.barrier()` and completed the sync successfully. ```scala 20/09/10 20:54:48.821 dispatcher-event-loop-10 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:48.822 dispatcher-event-loop-10 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 2, current progress: 1/4. 20/09/10 20:54:48.826 dispatcher-BlockManagerMaster INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:38420 (size: 2.2 KiB, free: 546.3 MiB) 20/09/10 20:54:48.908 dispatcher-event-loop-12 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:48.909 dispatcher-event-loop-12 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 1, current progress: 2/4. 20/09/10 20:54:48.959 dispatcher-event-loop-11 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:48.960 dispatcher-event-loop-11 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 3, current progress: 3/4. 20/09/10 20:54:49.616 dispatcher-CoarseGrainedScheduler INFO TaskSchedulerImpl: Skip current round of resource offers for barrier stage 0 because the barrier taskSet requires 4 slots, while the total number of available slots is 0. 20/09/10 20:54:49.899 dispatcher-event-loop-15 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:49.900 dispatcher-event-loop-15 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 1, current progress: 1/4. 20/09/10 20:54:49.965 dispatcher-event-loop-13 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:49.966 dispatcher-event-loop-13 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 3, current progress: 2/4. 20/09/10 20:54:50.112 dispatcher-event-loop-16 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:50.113 dispatcher-event-loop-16 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 0, current progress: 3/4. 20/09/10 20:54:50.609 dispatcher-CoarseGrainedScheduler INFO TaskSchedulerImpl: Skip current round of resource offers for barrier stage 0 because the barrier taskSet requires 4 slots, while the total number of available slots is 0. 20/09/10 20:54:50.826 dispatcher-event-loop-17 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0. 20/09/10 20:54:50.827 dispatcher-event-loop-17 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 2, current progress: 4/4. 20/09/10 20:54:50.827 dispatcher-event-loop-17 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received all updates from tasks, finished successfully. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated the test and tested a hundred times without failure(Previously, there could be several failures). Closes #29732 from Ngone51/fix-flaky-throw-exception. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/scheduler/BarrierTaskContextSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala index d18ca36f1fa60..e4ec62f8efc5b 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala @@ -189,7 +189,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with test("throw exception if the number of barrier() calls are not the same on every task") { initLocalClusterSparkContext() - sc.conf.set("spark.barrier.sync.timeout", "1") + sc.conf.set("spark.barrier.sync.timeout", "5") val rdd = sc.makeRDD(1 to 10, 4) val rdd2 = rdd.barrier().mapPartitions { it => val context = BarrierTaskContext.get() @@ -212,7 +212,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with rdd2.collect() }.getMessage assert(error.contains("The coordinator didn't get all barrier sync requests")) - assert(error.contains("within 1 second(s)")) + assert(error.contains("within 5 second(s)")) } def testBarrierTaskKilled(interruptOnKill: Boolean): Unit = { From 57ed5a829b7dd8c92e5dfb7bb96373c8f464246c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 6 Oct 2020 16:59:23 -0700 Subject: [PATCH 0179/1009] [SPARK-33007][SQL] Simplify named_struct + get struct field + from_json expression chain ### What changes were proposed in this pull request? This proposes to simplify named_struct + get struct field + from_json expression chain from `struct(from_json.col1, from_json.col2, from_json.col3...)` to `struct(from_json)`. ### Why are the changes needed? Simplify complex expression tree that could be produced by query optimization or user. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #29942 from viirya/SPARK-33007. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../optimizer/OptimizeJsonExprs.scala | 36 ++++++++++ .../optimizer/OptimizeJsonExprsSuite.scala | 67 +++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala index 59228904d84b7..fcd5412d66d41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala @@ -28,10 +28,46 @@ import org.apache.spark.sql.types.{ArrayType, StructType} * The optimization includes: * 1. JsonToStructs(StructsToJson(child)) => child. * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs. + * 3. CreateNamedStruct(JsonToStructs(json).col1, JsonToStructs(json).col2, ...) => + * If(IsNull(json), nullStruct, KnownNotNull(JsonToStructs(prunedSchema, ..., json))) + * if JsonToStructs(json) is shared among all fields of CreateNamedStruct. `prunedSchema` + * contains all accessed fields in original CreateNamedStruct. */ object OptimizeJsonExprs extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transform { case p => p.transformExpressions { + + case c: CreateNamedStruct + // If we create struct from various fields of the same `JsonToStructs`. + if c.valExprs.forall { v => + v.isInstanceOf[GetStructField] && + v.asInstanceOf[GetStructField].child.isInstanceOf[JsonToStructs] && + v.children.head.semanticEquals(c.valExprs.head.children.head) + } => + val jsonToStructs = c.valExprs.map(_.children.head) + val sameFieldName = c.names.zip(c.valExprs).forall { + case (name, valExpr: GetStructField) => + name.toString == valExpr.childSchema(valExpr.ordinal).name + case _ => false + } + + // Although `CreateNamedStruct` allows duplicated field names, e.g. "a int, a int", + // `JsonToStructs` does not support parsing json with duplicated field names. + val duplicateFields = c.names.map(_.toString).distinct.length != c.names.length + + // If we create struct from various fields of the same `JsonToStructs` and we don't + // alias field names and there is no duplicated field in the struct. + if (sameFieldName && !duplicateFields) { + val fromJson = jsonToStructs.head.asInstanceOf[JsonToStructs].copy(schema = c.dataType) + val nullFields = c.children.grouped(2).flatMap { + case Seq(name, value) => Seq(name, Literal(null, value.dataType)) + }.toSeq + + If(IsNull(fromJson.child), c.copy(children = nullFields), KnownNotNull(fromJson)) + } else { + c + } + case jsonToStructs @ JsonToStructs(_, options1, StructsToJson(options2, child, timeZoneId2), timeZoneId1) if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 && diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala index e47a141dfed1f..7d975a1b00466 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala @@ -199,4 +199,71 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { JsonToStructs(prunedSchema2, options, 'json), field2, 0, 1, false).as("b")).analyze comparePlans(optimized2, expected2) } + + test("SPARK-33007: simplify named_struct + from_json") { + val options = Map.empty[String, String] + val schema = StructType.fromDDL("a int, b int, c long, d string") + + val prunedSchema1 = StructType.fromDDL("a int, b int") + val nullStruct = namedStruct("a", Literal(null, IntegerType), "b", Literal(null, IntegerType)) + + val UTC_OPT = Option("UTC") + val json: BoundReference = 'json.string.canBeNull.at(0) + + assertEquivalent( + testRelation2, + namedStruct( + "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0), + "b", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 1)).as("struct"), + If(IsNull(json), + nullStruct, + KnownNotNull(JsonToStructs(prunedSchema1, options, json, UTC_OPT))).as("struct")) + + val field1 = StructType.fromDDL("a int") + val field2 = StructType.fromDDL("b int") + + // Skip optimization if `namedStruct` aliases field name. + assertEquivalent( + testRelation2, + namedStruct( + "a1", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0), + "b", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 1)).as("struct"), + namedStruct( + "a1", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0), + "b", GetStructField(JsonToStructs(field2, options, json, UTC_OPT), 0)).as("struct")) + + assertEquivalent( + testRelation2, + namedStruct( + "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0), + "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0)).as("struct"), + namedStruct( + "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0), + "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0)).as("struct")) + + val PST = getZoneId("-08:00") + // Skip optimization if `JsonToStructs`s are not the same. + assertEquivalent( + testRelation2, + namedStruct( + "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0), + "b", GetStructField(JsonToStructs(schema, options, json, Option(PST.getId)), 1)) + .as("struct"), + namedStruct( + "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0), + "b", GetStructField(JsonToStructs(field2, options, json, Option(PST.getId)), 0)) + .as("struct")) + } + + private def assertEquivalent(relation: LocalRelation, e1: Expression, e2: Expression): Unit = { + val plan = relation.select(e1).analyze + val actual = Optimizer.execute(plan) + val expected = relation.select(e2).analyze + comparePlans(actual, expected) + + Seq("""{"a":1, "b":2, "c": 123, "d": "test"}""", null).foreach(v => { + val row = create_row(v) + checkEvaluation(e1, e2.eval(row), row) + }) + } } From 584f90c82e8e47cdcaab50f95e6c709f460cd789 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 7 Oct 2020 09:29:30 +0900 Subject: [PATCH 0180/1009] [SPARK-33067][SQL][TESTS][FOLLOWUP] Check error messages in JDBCTableCatalogSuite ### What changes were proposed in this pull request? Get error message from the expected exception, and check that they are reasonable. ### Why are the changes needed? To improve tests by expecting particular error messages. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `JDBCTableCatalogSuite`. Closes #29957 from MaxGekk/jdbcv2-negative-tests-followup. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../v2/jdbc/JDBCTableCatalogSuite.scala | 116 +++++++++++------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index bf71f90779b71..ca86a8f593621 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -22,6 +22,7 @@ import java.util.Properties import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -75,10 +76,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "to_drop"), Row("test", "people"))) sql("DROP TABLE h2.test.to_drop") checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) - Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[NoSuchTableException] { + Seq( + "h2.test.not_existing_table" -> "Table test.not_existing_table not found", + "h2.bad_test.not_existing_table" -> "Table bad_test.not_existing_table not found" + ).foreach { case (table, expectedMsg) => + val msg = intercept[NoSuchTableException] { sql(s"DROP TABLE $table") - } + }.getMessage + assert(msg.contains(expectedMsg)) } } @@ -96,10 +101,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { Seq(Row("test", "dst_table"), Row("test", "people"))) } // Rename not existing table or namespace - Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[org.h2.jdbc.JdbcSQLException] { + Seq( + "h2.test.not_existing_table" -> "Table \"not_existing_table\" not found", + "h2.bad_test.not_existing_table" -> "Schema \"bad_test\" not found" + ).foreach { case (table, expectedMsg) => + val msg = intercept[org.h2.jdbc.JdbcSQLException] { sql(s"ALTER TABLE $table RENAME TO test.dst_table") - } + }.getMessage + assert(msg.contains(expectedMsg)) } // Rename to an existing table withTable("h2.test.dst_table") { @@ -110,9 +119,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { withConnection { conn => conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate() } - intercept[org.h2.jdbc.JdbcSQLException] { - sql("ALTER TABLE h2.test.src_table RENAME TO h2.test.dst_table") - } + val msg = intercept[org.h2.jdbc.JdbcSQLException] { + sql("ALTER TABLE h2.test.src_table RENAME TO test.dst_table") + }.getMessage + assert(msg.contains("Table \"dst_table\" already exists")) } } } @@ -124,9 +134,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { .add("ID", IntegerType) assert(t.schema === expectedSchema) Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { - spark.table(s"h2.$table").schema - } + val msg = intercept[AnalysisException] { + spark.table(table).schema + }.getMessage + assert(msg.contains("Table or view not found")) } } @@ -140,13 +151,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } withTable("h2.test.new_table") { sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") - } + }.getMessage + assert(msg.contains("Table test.new_table already exists")) } - intercept[org.h2.jdbc.JdbcSQLException] { + val msg = intercept[org.h2.jdbc.JdbcSQLException] { sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _") - } + }.getMessage + assert(msg.contains("Schema \"bad_test\" not found")) } test("alter table ... add column") { @@ -164,15 +177,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { expectedSchema = expectedSchema.add("C3", DoubleType) assert(t.schema === expectedSchema) // Add already existing column - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)") - } + }.getMessage + assert(msg.contains("Cannot add column, because C3 already exists")) } // Add a column to not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table ADD COLUMNS (C4 STRING)") - } + }.getMessage + assert(msg.contains("Table not found")) } } @@ -186,15 +201,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { .add("C0", IntegerType) assert(t.schema === expectedSchema) // Rename to already existing column - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0") - } + }.getMessage + assert(msg.contains("Cannot rename column, because C0 already exists")) } // Rename a column in not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table RENAME COLUMN ID TO C") - } + }.getMessage + assert(msg.contains("Table not found")) } } @@ -206,15 +223,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val expectedSchema = new StructType().add("C2", IntegerType) assert(t.schema === expectedSchema) // Drop not existing column - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column") - } + }.getMessage + assert(msg.contains("Cannot delete missing field bad_column in test.alt_table schema")) } // Drop a column to not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table DROP COLUMN C1") - } + }.getMessage + assert(msg.contains("Table not found")) } } @@ -226,19 +245,22 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val expectedSchema = new StructType().add("ID", DoubleType) assert(t.schema === expectedSchema) // Update not existing column - intercept[AnalysisException] { + val msg1 = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE") - } + }.getMessage + assert(msg1.contains("Cannot update missing field bad_column in test.alt_table schema")) // Update column to wrong type - intercept[AnalysisException] { + val msg2 = intercept[ParseException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type") - } + }.getMessage + assert(msg2.contains("DataType bad_type is not supported")) } // Update column type in not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table ALTER COLUMN id TYPE DOUBLE") - } + }.getMessage + assert(msg.contains("Table not found")) } } @@ -250,35 +272,39 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { val expectedSchema = new StructType().add("ID", IntegerType, nullable = true) assert(t.schema === expectedSchema) // Update nullability of not existing column - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL") - } + }.getMessage + assert(msg.contains("Cannot update missing field bad_column in test.alt_table")) } // Update column nullability in not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table ALTER COLUMN ID DROP NOT NULL") - } + }.getMessage + assert(msg.contains("Table not found")) } } test("alter table ... update column comment not supported") { withTable("h2.test.alt_table") { sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") - val thrown = intercept[java.sql.SQLFeatureNotSupportedException] { + val msg1 = intercept[java.sql.SQLFeatureNotSupportedException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'") - } - assert(thrown.getMessage.contains("Unsupported TableChange")) + }.getMessage + assert(msg1.contains("Unsupported TableChange")) // Update comment for not existing column - intercept[AnalysisException] { + val msg2 = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'") - } + }.getMessage + assert(msg2.contains("Cannot update missing field bad_column in test.alt_table")) } // Update column comments in not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => - intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $table ALTER COLUMN ID COMMENT 'test'") - } + }.getMessage + assert(msg.contains("Table not found")) } } } From 5ce321dc80a699fa525ca5b69bf2c28e10f8a12a Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 7 Oct 2020 13:00:59 +0900 Subject: [PATCH 0181/1009] [SPARK-33017][PYTHON][DOCS][FOLLOW-UP] Add getCheckpointDir into API documentation ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/29918. We should add it into the documentation as well. ### Why are the changes needed? To show users new APIs. ### Does this PR introduce _any_ user-facing change? Yes, `SparkContext.getCheckpointDir` will be documented. ### How was this patch tested? Manually built the PySpark documentation: ```bash cd python/docs make clean html cd build/html open index.html ``` Closes #29960 from HyukjinKwon/SPARK-33017. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/docs/source/reference/pyspark.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst index c13df6ee2d2b4..fc0775eb7f8f5 100644 --- a/python/docs/source/reference/pyspark.rst +++ b/python/docs/source/reference/pyspark.rst @@ -64,6 +64,7 @@ Spark Context APIs SparkContext.defaultParallelism SparkContext.dump_profiles SparkContext.emptyRDD + SparkContext.getCheckpointDir SparkContext.getConf SparkContext.getLocalProperty SparkContext.getOrCreate From aea78d2c8cdf12f4978fa6a69107d096c07c6fec Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 7 Oct 2020 04:48:57 +0000 Subject: [PATCH 0182/1009] [SPARK-33034][SQL] Support ALTER TABLE in JDBC v2 Table Catalog: add, update type and nullability of columns (Oracle dialect) ### What changes were proposed in this pull request? 1. Override the default SQL strings in the Oracle Dialect for: - ALTER TABLE ADD COLUMN - ALTER TABLE UPDATE COLUMN TYPE - ALTER TABLE UPDATE COLUMN NULLABILITY 2. Add new docker integration test suite `jdbc/v2/OracleIntegrationSuite.scala` ### Why are the changes needed? In SPARK-24907, we implemented JDBC v2 Table Catalog but it doesn't support some `ALTER TABLE` at the moment. This PR supports Oracle specific `ALTER TABLE`. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By running new integration test suite: ``` $ ./build/sbt -Pdocker-integration-tests "test-only *.OracleIntegrationSuite" ``` Closes #29912 from MaxGekk/jdbcv2-oracle-alter-table. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 15 +- .../sql/jdbc/v2/OracleIntegrationSuite.scala | 152 ++++++++++++++++++ .../datasources/v2/jdbc/JDBCTable.scala | 1 - .../apache/spark/sql/jdbc/JdbcDialects.scala | 25 ++- .../apache/spark/sql/jdbc/OracleDialect.scala | 19 +++ 5 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index 6d1a22dd22b65..24927da16d50c 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -98,7 +98,13 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu val connectionTimeout = timeout(2.minutes) private var docker: DockerClient = _ - protected var externalPort: Int = _ + // Configure networking (necessary for boot2docker / Docker Machine) + protected lazy val externalPort: Int = { + val sock = new ServerSocket(0) + val port = sock.getLocalPort + sock.close() + port + } private var containerId: String = _ protected var jdbcUrl: String = _ @@ -122,13 +128,6 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") docker.pull(db.imageName) } - // Configure networking (necessary for boot2docker / Docker Machine) - externalPort = { - val sock = new ServerSocket(0) - val port = sock.getLocalPort - sock.close() - port - } val hostConfigBuilder = HostConfig.builder() .privileged(db.privileged) .networkMode("bridge") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala new file mode 100644 index 0000000000000..400459c0ea17b --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +/** + * The following would be the steps to test this + * 1. Build Oracle database in Docker, please refer below link about how to. + * https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md + * 2. export ORACLE_DOCKER_IMAGE_NAME=$ORACLE_DOCKER_IMAGE_NAME + * Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME + * 3. Start docker - sudo service docker start + * 4. Run spark test - ./build/sbt -Pdocker-integration-tests + * "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" + * + * An actual sequence of commands to run the test is as follows + * + * $ git clone https://github.com/oracle/docker-images.git + * // Head SHA: 3e352a22618070595f823977a0fd1a3a8071a83c + * $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles + * $ ./buildDockerImage.sh -v 18.4.0 -x + * $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe + * $ cd $SPARK_HOME + * $ ./build/sbt -Pdocker-integration-tests + * "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" + * + * It has been validated with 18.4.0 Express Edition. + */ +@DockerTest +class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession { + override val db = new DatabaseOnDocker { + override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") + override val env = Map( + "ORACLE_PWD" -> "oracle" + ) + override val usesIpc = false + override val jdbcPort: Int = 1521 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:oracle:thin:system/oracle@//$ip:$port/xe" + } + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName) + .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort)) + + override val connectionTimeout = timeout(7.minutes) + override def dataPreparation(conn: Connection): Unit = {} + + test("SPARK-33034: ALTER TABLE ... add new columns") { + withTable("oracle.alt_table") { + sql("CREATE TABLE oracle.alt_table (ID STRING) USING _") + sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C1 STRING, C2 STRING)") + var t = spark.table("oracle.alt_table") + var expectedSchema = new StructType() + .add("ID", StringType) + .add("C1", StringType) + .add("C2", StringType) + assert(t.schema === expectedSchema) + sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C3 STRING)") + t = spark.table("oracle.alt_table") + expectedSchema = expectedSchema.add("C3", StringType) + assert(t.schema === expectedSchema) + // Add already existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE oracle.alt_table ADD COLUMNS (C3 DOUBLE)") + }.getMessage + assert(msg.contains("Cannot add column, because C3 already exists")) + } + // Add a column to not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE oracle.not_existing_table ADD COLUMNS (C4 STRING)") + }.getMessage + assert(msg.contains("Table not found")) + } + + test("SPARK-33034: ALTER TABLE ... update column type") { + withTable("oracle.alt_table") { + sql("CREATE TABLE oracle.alt_table (ID INTEGER) USING _") + sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE STRING") + val t = spark.table("oracle.alt_table") + val expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + // Update column type from STRING to INTEGER + val msg1 = intercept[AnalysisException] { + sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE INTEGER") + }.getMessage + assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) + // Update not existing column + val msg2 = intercept[AnalysisException] { + sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column TYPE DOUBLE") + }.getMessage + assert(msg2.contains("Cannot update missing field bad_column")) + // Update column to wrong type + val msg3 = intercept[ParseException] { + sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE bad_type") + }.getMessage + assert(msg3.contains("DataType bad_type is not supported")) + } + // Update column type in not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN id TYPE DOUBLE") + }.getMessage + assert(msg.contains("Table not found")) + } + + test("SPARK-33034: ALTER TABLE ... update column nullability") { + withTable("oracle.alt_table") { + sql("CREATE TABLE oracle.alt_table (ID STRING NOT NULL) USING _") + sql("ALTER TABLE oracle.alt_table ALTER COLUMN ID DROP NOT NULL") + val t = spark.table("oracle.alt_table") + val expectedSchema = new StructType().add("ID", StringType, nullable = true) + assert(t.schema === expectedSchema) + // Update nullability of not existing column + val msg = intercept[AnalysisException] { + sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column DROP NOT NULL") + }.getMessage + assert(msg.contains("Cannot update missing field bad_column")) + } + // Update column nullability in not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN ID DROP NOT NULL") + }.getMessage + assert(msg.contains("Table not found")) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala index 55759497bd910..5e11ea66be4c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOptions) extends Table with SupportsRead with SupportsWrite { - assert(ident.namespace().length == 1) override def name(): String = ident.toString diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index cea5a20917532..a01720d1eefc7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -212,7 +212,7 @@ abstract class JdbcDialect extends Serializable { case add: AddColumn if add.fieldNames.length == 1 => val dataType = JdbcUtils.getJdbcType(add.dataType(), this).databaseTypeDefinition val name = add.fieldNames - updateClause += s"ALTER TABLE $tableName ADD COLUMN ${name(0)} $dataType" + updateClause += getAddColumnQuery(tableName, name(0), dataType) case rename: RenameColumn if rename.fieldNames.length == 1 => val name = rename.fieldNames updateClause += s"ALTER TABLE $tableName RENAME COLUMN ${name(0)} TO ${rename.newName}" @@ -223,17 +223,36 @@ abstract class JdbcDialect extends Serializable { val name = updateColumnType.fieldNames val dataType = JdbcUtils.getJdbcType(updateColumnType.newDataType(), this) .databaseTypeDefinition - updateClause += s"ALTER TABLE $tableName ALTER COLUMN ${name(0)} $dataType" + updateClause += getUpdateColumnTypeQuery(tableName, name(0), dataType) case updateNull: UpdateColumnNullability if updateNull.fieldNames.length == 1 => val name = updateNull.fieldNames val nullable = if (updateNull.nullable()) "NULL" else "NOT NULL" - updateClause += s"ALTER TABLE $tableName ALTER COLUMN ${name(0)} SET $nullable" + updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable()) case _ => throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change") } } updateClause.result() } + + def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = { + s"ALTER TABLE $tableName ADD COLUMN $columnName $dataType" + } + + def getUpdateColumnTypeQuery( + tableName: String, + columnName: String, + newDataType: String): String = { + s"ALTER TABLE $tableName ALTER COLUMN $columnName $newDataType" + } + + def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + val nullable = if (isNullable) "NULL" else "NOT NULL" + s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable" + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 3f12b9acd0fc4..128b90a190481 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -118,4 +118,23 @@ private case object OracleDialect extends JdbcDialect { case _ => s"TRUNCATE TABLE $table" } } + + // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001 + override def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = + s"ALTER TABLE $tableName ADD $columnName $dataType" + + // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001 + override def getUpdateColumnTypeQuery( + tableName: String, + columnName: String, + newDataType: String): String = + s"ALTER TABLE $tableName MODIFY $columnName $newDataType" + + override def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + val nullable = if (isNullable) "NULL" else "NOT NULL" + s"ALTER TABLE $tableName MODIFY $columnName $nullable" + } } From 7e99fcd64efa425f3c985df4fe957a3be274a49a Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 7 Oct 2020 06:33:20 +0000 Subject: [PATCH 0183/1009] [SPARK-33004][SQL] Migrate DESCRIBE column to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `DESCRIBE tbl colname` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? The current behavior is not consistent between v1 and v2 commands when resolving a temp view. In v2, the `t` in the following example is resolved to a table: ```scala sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i") sql("USE testcat.ns") sql("DESCRIBE t i") // 't' is resolved to testcat.ns.t Describing columns is not supported for v2 tables.; org.apache.spark.sql.AnalysisException: Describing columns is not supported for v2 tables.; ``` whereas in v1, the `t` is resolved to a temp view: ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv") sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i") sql("USE spark_catalog.test") sql("DESCRIBE t i").show // 't' is resolved to a temp view +---------+----------+ |info_name|info_value| +---------+----------+ | col_name| i| |data_type| int| | comment| NULL| +---------+----------+ ``` ### Does this PR introduce _any_ user-facing change? After this PR, `DESCRIBE t i` is resolved to a temp view `t` instead of `testcat.ns.t`. ### How was this patch tested? Added a new test Closes #29880 from imback82/describe_column_consistent. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../catalyst/analysis/ResolveCatalogs.scala | 4 --- .../sql/catalyst/parser/AstBuilder.scala | 6 ++-- ...hema.scala => DescribeCommandSchema.scala} | 10 +++++-- .../catalyst/plans/logical/statements.scala | 8 ------ .../catalyst/plans/logical/v2Commands.scala | 15 ++++++++-- .../sql/catalyst/parser/DDLParserSuite.scala | 28 +++++++++---------- .../analysis/ResolveSessionCatalog.scala | 12 +++++--- .../spark/sql/execution/command/tables.scala | 13 ++------- .../datasources/v2/DataSourceV2Strategy.scala | 3 ++ .../apache/spark/sql/SQLQueryTestSuite.scala | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 24 ++++++++++++++++ .../hive/execution/HiveComparisonTest.scala | 2 +- 12 files changed, 78 insertions(+), 49 deletions(-) rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/{DescribeTableSchema.scala => DescribeCommandSchema.scala} (74%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 0d0f80be359e7..65ddff8c44ed9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -142,10 +142,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) } RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier) - case DescribeColumnStatement( - NonSessionCatalogAndTable(catalog, tbl), colNameParts, isExtended) => - throw new AnalysisException("Describing columns is not supported for v2 tables.") - case c @ CreateTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f133235a2636e..f29e7b11e02de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3183,7 +3183,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[DescribeColumnStatement]] or [[DescribeRelation]] commands. + * Create a [[DescribeColumn]] or [[DescribeRelation]] commands. */ override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) { val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null @@ -3191,8 +3191,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging if (ctx.partitionSpec != null) { throw new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx) } else { - DescribeColumnStatement( - visitMultipartIdentifier(ctx.multipartIdentifier()), + DescribeColumn( + UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())), ctx.describeColName.nameParts.asScala.map(_.getText).toSeq, isExtended) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala similarity index 74% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala index ff35972b901f9..99d2ea7751959 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.catalyst.plans import org.apache.spark.sql.catalyst.expressions.AttributeReference -import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField, StructType} +import org.apache.spark.sql.types.{MetadataBuilder, StringType} -private[sql] object DescribeTableSchema { +private[sql] object DescribeCommandSchema { def describeTableAttributes(): Seq[AttributeReference] = Seq( AttributeReference("col_name", StringType, nullable = false, new MetadataBuilder().putString("comment", "name of the column").build())(), @@ -28,4 +28,10 @@ private[sql] object DescribeTableSchema { new MetadataBuilder().putString("comment", "data type of the column").build())(), AttributeReference("comment", StringType, nullable = true, new MetadataBuilder().putString("comment", "comment of the column").build())()) + + def describeColumnAttributes(): Seq[AttributeReference] = Seq( + AttributeReference("info_name", StringType, nullable = false, + new MetadataBuilder().putString("comment", "name of the column info").build())(), + AttributeReference("info_value", StringType, nullable = false, + new MetadataBuilder().putString("comment", "value of the column info").build())()) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index d09e08d105c21..d7c097af9120f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -298,14 +298,6 @@ case class DropViewStatement( viewName: Seq[String], ifExists: Boolean) extends ParsedStatement -/** - * A DESCRIBE TABLE tbl_name col_name statement, as parsed from SQL. - */ -case class DescribeColumnStatement( - tableName: Seq[String], - colNameParts: Seq[String], - isExtended: Boolean) extends ParsedStatement - /** * An INSERT INTO statement, as parsed from SQL. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 475eb7d74773d..50af16ca276e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Unevaluable} -import org.apache.spark.sql.catalyst.plans.DescribeTableSchema +import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange} import org.apache.spark.sql.connector.expressions.Transform @@ -312,7 +312,18 @@ case class DescribeRelation( partitionSpec: TablePartitionSpec, isExtended: Boolean) extends Command { override def children: Seq[LogicalPlan] = Seq(relation) - override def output: Seq[Attribute] = DescribeTableSchema.describeTableAttributes() + override def output: Seq[Attribute] = DescribeCommandSchema.describeTableAttributes() +} + +/** + * The logical plan of the DESCRIBE relation_name col_name command that works for v2 tables. + */ +case class DescribeColumn( + relation: LogicalPlan, + colNameParts: Seq[String], + isExtended: Boolean) extends Command { + override def children: Seq[LogicalPlan] = Seq(relation) + override def output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes() } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 378026b1ce9c6..8b8531b2bb3b1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -780,27 +780,27 @@ class DDLParserSuite extends AnalysisTest { test("describe table column") { comparePlans(parsePlan("DESCRIBE t col"), - DescribeColumnStatement( - Seq("t"), Seq("col"), isExtended = false)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t `abc.xyz`"), - DescribeColumnStatement( - Seq("t"), Seq("abc.xyz"), isExtended = false)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("abc.xyz"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t abc.xyz"), - DescribeColumnStatement( - Seq("t"), Seq("abc", "xyz"), isExtended = false)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("abc", "xyz"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"), - DescribeColumnStatement( - Seq("t"), Seq("a.b", "x.y"), isExtended = false)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("a.b", "x.y"), isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE t col"), - DescribeColumnStatement( - Seq("t"), Seq("col"), isExtended = false)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"), - DescribeColumnStatement( - Seq("t"), Seq("col"), isExtended = true)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true)) comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"), - DescribeColumnStatement( - Seq("t"), Seq("col"), isExtended = true)) + DescribeColumn( + UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true)) val caught = intercept[AnalysisException]( parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col")) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 11493ad59a760..24382e07a2966 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -256,16 +256,20 @@ class ResolveSessionCatalog( case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) - case DescribeRelation(ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended) => + case DescribeRelation(r @ ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended) + if isSessionCatalog(r.catalog) => DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet. case DescribeRelation(ResolvedView(ident), partitionSpec, isExtended) => DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) - case DescribeColumnStatement(tbl, colNameParts, isExtended) => - val name = parseTempViewOrV1Table(tbl, "Describing columns") - DescribeColumnCommand(name.asTableIdentifier, colNameParts, isExtended) + case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended) + if isSessionCatalog(r.catalog) => + DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended) + + case DescribeColumn(ResolvedView(ident), colNameParts, isExtended) => + DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended) // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the // session catalog and the table provider is not v2. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index e4be2a8d3bb8e..206f952fed0ca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTableType._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} -import org.apache.spark.sql.catalyst.plans.DescribeTableSchema +import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap} import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils} @@ -597,7 +597,7 @@ case class TruncateTableCommand( } abstract class DescribeCommandBase extends RunnableCommand { - override val output = DescribeTableSchema.describeTableAttributes() + override val output = DescribeCommandSchema.describeTableAttributes() protected def describeSchema( schema: StructType, @@ -760,14 +760,7 @@ case class DescribeColumnCommand( isExtended: Boolean) extends RunnableCommand { - override val output: Seq[Attribute] = { - Seq( - AttributeReference("info_name", StringType, nullable = false, - new MetadataBuilder().putString("comment", "name of the column info").build())(), - AttributeReference("info_value", StringType, nullable = false, - new MetadataBuilder().putString("comment", "value of the column info").build())() - ) - } + override val output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes() override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index c5ddba43a56aa..3841bd0a66987 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -225,6 +225,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } DescribeTableExec(desc.output, r.table, isExtended) :: Nil + case DescribeColumn(_: ResolvedTable, _, _) => + throw new AnalysisException("Describing columns is not supported for v2 tables.") + case DropTable(catalog, ident, ifExists) => DropTableExec(catalog, ident, ifExists) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index b7cf0798a9d4b..0bb1f5e20fc5b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -502,7 +502,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper case _: DescribeCommandBase | _: DescribeColumnCommand | _: DescribeRelation - | _: DescribeColumnStatement => true + | _: DescribeColumn => true case PhysicalOperation(_, _, Sort(_, true, _)) => true case _ => plan.children.iterator.exists(isSorted) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index e3782c7409198..e3618f1326941 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -168,7 +168,31 @@ class DataSourceV2SQLSuite Array("Provider", "foo", ""), Array(TableCatalog.PROP_OWNER.capitalize, defaultUser, ""), Array("Table Properties", "[bar=baz]", ""))) + } + test("Describe column is not supported for v2 catalog") { + withTable("testcat.tbl") { + spark.sql("CREATE TABLE testcat.tbl (id bigint) USING foo") + val ex = intercept[AnalysisException] { + spark.sql("DESCRIBE testcat.tbl id") + } + assert(ex.message.contains("Describing columns is not supported for v2 tables")) + } + } + + test("SPARK-33004: Describe column should resolve to a temporary view first") { + withTable("testcat.ns.t") { + withTempView("t") { + sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") + sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i") + sql("USE testcat.ns") + checkAnswer( + sql("DESCRIBE t i"), + Seq(Row("col_name", "i"), + Row("data_type", "int"), + Row("comment", "NULL"))) + } + } } test("CreateTable: use v2 plan and session catalog when provider is v2") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 7f198632a1cd6..01cf214574eeb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -375,7 +375,7 @@ abstract class HiveComparisonTest extends SparkFunSuite with BeforeAndAfterAll { (!hiveQuery.logical.isInstanceOf[DescribeFunction]) && (!hiveQuery.logical.isInstanceOf[DescribeCommandBase]) && (!hiveQuery.logical.isInstanceOf[DescribeRelation]) && - (!hiveQuery.logical.isInstanceOf[DescribeColumnStatement]) && + (!hiveQuery.logical.isInstanceOf[DescribeColumn]) && preparedHive != catalyst) { val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive From 4e1ded67f88ffc869379319758d923aa538554b2 Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 7 Oct 2020 16:39:25 +0900 Subject: [PATCH 0184/1009] [SPARK-32189][DOCS][PYTHON][FOLLOW-UP] Fixed broken link and typo in PySpark docs ### What changes were proposed in this pull request? This PR is a follow-up of #29781 to fix broken link and typo. Screen Shot 2020-10-07 at 3 56 28 PM Screen Shot 2020-10-07 at 3 55 36 PM ### Why are the changes needed? Current link is not working properly because of wrong path. ### Does this PR introduce _any_ user-facing change? Yes, the link is working properly now. ### How was this patch tested? Manually built the doc. Closes #29963 from itholic/SPARK-32189-FOLLOWUP. Authored-by: itholic Signed-off-by: HyukjinKwon --- python/docs/source/development/debugging.rst | 2 +- python/docs/source/development/setting_ide.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst index c5f3351527f11..bc141a6f44a6f 100644 --- a/python/docs/source/development/debugging.rst +++ b/python/docs/source/development/debugging.rst @@ -35,7 +35,7 @@ with JVM. Profiling and debugging JVM is described at `Useful Developer Tools `__. +- If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature. Setting PySpark with IDEs is documented `here `__. - *There are many other ways of debugging PySpark applications*. For example, you can remotely debug by using the open source `Remote Debugger `_ instead of using PyCharm Professional documented here. diff --git a/python/docs/source/development/setting_ide.rst b/python/docs/source/development/setting_ide.rst index dcb44c1483006..6e8f0148c6eb3 100644 --- a/python/docs/source/development/setting_ide.rst +++ b/python/docs/source/development/setting_ide.rst @@ -50,7 +50,7 @@ Let's go to the path ``python/pyspark/tests`` in PyCharm and try to run the any You might can see the ``KeyError: 'SPARK_HOME'`` because the environment variable has not been set yet. Go **Run -> Edit Configurations**, and set the environment variables as below. -Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **Okay** to apply the changes. +Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **OK** to apply the changes. .. image:: ../../../../docs/img/pycharm-with-pyspark2.png :alt: Setting up SPARK_HOME From 72da6f86cfbdd36dac3fc440c333bc1db1935edd Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 7 Oct 2020 19:53:59 +0900 Subject: [PATCH 0185/1009] [SPARK-33002][PYTHON] Remove non-API annotations ### What changes were proposed in this pull request? This PR: - removes annotations for modules which are not part of the public API. - removes `__init__.pyi` files, if no annotations, beyond exports, are present. ### Why are the changes needed? Primarily to reduce maintenance overhead and as requested in the comments to https://github.com/apache/spark/pull/29591 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests and additional MyPy checks: ``` mypy --no-incremental --config python/mypy.ini python/pyspark MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming ``` Closes #29879 from zero323/SPARK-33002. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/mypy.ini | 5 +- python/pyspark/_globals.pyi | 27 ----- python/pyspark/accumulators.pyi | 4 +- python/pyspark/broadcast.pyi | 4 +- python/pyspark/daemon.pyi | 29 ----- python/pyspark/find_spark_home.pyi | 17 --- python/pyspark/java_gateway.pyi | 24 ----- python/pyspark/join.pyi | 50 --------- python/pyspark/ml/__init__.pyi | 45 -------- python/pyspark/mllib/__init__.pyi | 32 ------ python/pyspark/rddsampler.pyi | 54 ---------- python/pyspark/resource/__init__.pyi | 31 ------ python/pyspark/serializers.py | 2 +- python/pyspark/serializers.pyi | 122 ---------------------- python/pyspark/shell.py | 4 +- python/pyspark/shell.pyi | 31 ------ python/pyspark/shuffle.pyi | 109 ------------------- python/pyspark/sql/avro/__init__.pyi | 22 ---- python/pyspark/sql/pandas/__init__.pyi | 17 --- python/pyspark/sql/pandas/serializers.pyi | 65 ------------ python/pyspark/sql/pandas/typehints.pyi | 33 ------ python/pyspark/sql/pandas/types.pyi | 41 -------- python/pyspark/sql/pandas/utils.pyi | 20 ---- python/pyspark/sql/utils.pyi | 55 ---------- python/pyspark/streaming/__init__.pyi | 23 ---- python/pyspark/streaming/util.pyi | 48 --------- python/pyspark/traceback_utils.pyi | 29 ----- python/pyspark/util.py | 2 +- python/pyspark/util.pyi | 35 ------- python/pyspark/worker.pyi | 73 ------------- 30 files changed, 14 insertions(+), 1039 deletions(-) delete mode 100644 python/pyspark/_globals.pyi delete mode 100644 python/pyspark/daemon.pyi delete mode 100644 python/pyspark/find_spark_home.pyi delete mode 100644 python/pyspark/java_gateway.pyi delete mode 100644 python/pyspark/join.pyi delete mode 100644 python/pyspark/ml/__init__.pyi delete mode 100644 python/pyspark/mllib/__init__.pyi delete mode 100644 python/pyspark/rddsampler.pyi delete mode 100644 python/pyspark/resource/__init__.pyi delete mode 100644 python/pyspark/serializers.pyi delete mode 100644 python/pyspark/shell.pyi delete mode 100644 python/pyspark/shuffle.pyi delete mode 100644 python/pyspark/sql/avro/__init__.pyi delete mode 100644 python/pyspark/sql/pandas/__init__.pyi delete mode 100644 python/pyspark/sql/pandas/serializers.pyi delete mode 100644 python/pyspark/sql/pandas/typehints.pyi delete mode 100644 python/pyspark/sql/pandas/types.pyi delete mode 100644 python/pyspark/sql/pandas/utils.pyi delete mode 100644 python/pyspark/sql/utils.pyi delete mode 100644 python/pyspark/streaming/__init__.pyi delete mode 100644 python/pyspark/streaming/util.pyi delete mode 100644 python/pyspark/traceback_utils.pyi delete mode 100644 python/pyspark/util.pyi delete mode 100644 python/pyspark/worker.pyi diff --git a/python/mypy.ini b/python/mypy.ini index a9523e622ca0d..4a5368a519097 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -32,5 +32,8 @@ ignore_missing_imports = True [mypy-pandas.*] ignore_missing_imports = True -[mypy-pyarrow] +[mypy-pyarrow.*] +ignore_missing_imports = True + +[mypy-psutil.*] ignore_missing_imports = True diff --git a/python/pyspark/_globals.pyi b/python/pyspark/_globals.pyi deleted file mode 100644 index 9453775621196..0000000000000 --- a/python/pyspark/_globals.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any - -__ALL__: Any - -class _NoValueType: - def __new__(cls): ... - def __reduce__(self): ... diff --git a/python/pyspark/accumulators.pyi b/python/pyspark/accumulators.pyi index 94f8023d1102b..13a1792cd247d 100644 --- a/python/pyspark/accumulators.pyi +++ b/python/pyspark/accumulators.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Callable, Generic, Tuple, Type, TypeVar +from typing import Callable, Dict, Generic, Tuple, Type, TypeVar import socketserver.BaseRequestHandler # type: ignore @@ -27,6 +27,8 @@ U = TypeVar("U", bound=SupportsIAdd) import socketserver as SocketServer +_accumulatorRegistry: Dict[int, Accumulator] + class Accumulator(Generic[T]): aid: int accum_param: AccumulatorParam[T] diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi index c2ea3c6f7d8b4..4b019a509a003 100644 --- a/python/pyspark/broadcast.pyi +++ b/python/pyspark/broadcast.pyi @@ -17,10 +17,12 @@ # under the License. import threading -from typing import Any, Generic, Optional, TypeVar +from typing import Any, Dict, Generic, Optional, TypeVar T = TypeVar("T") +_broadcastRegistry: Dict[int, Broadcast] + class Broadcast(Generic[T]): def __init__( self, diff --git a/python/pyspark/daemon.pyi b/python/pyspark/daemon.pyi deleted file mode 100644 index dfacf30a9f8a7..0000000000000 --- a/python/pyspark/daemon.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.serializers import ( # noqa: F401 - UTF8Deserializer as UTF8Deserializer, - read_int as read_int, - write_int as write_int, - write_with_length as write_with_length, -) -from typing import Any - -def compute_real_exit_code(exit_code: Any): ... -def worker(sock: Any, authenticated: Any): ... -def manager() -> None: ... diff --git a/python/pyspark/find_spark_home.pyi b/python/pyspark/find_spark_home.pyi deleted file mode 100644 index 217e5db960782..0000000000000 --- a/python/pyspark/find_spark_home.pyi +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/python/pyspark/java_gateway.pyi b/python/pyspark/java_gateway.pyi deleted file mode 100644 index 5b45206dc045c..0000000000000 --- a/python/pyspark/java_gateway.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length # type: ignore[attr-defined] -from typing import Any, Optional - -def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ... -def local_connect_and_auth(port: Any, auth_secret: Any): ... -def ensure_callback_server_started(gw: Any) -> None: ... diff --git a/python/pyspark/join.pyi b/python/pyspark/join.pyi deleted file mode 100644 index e89e0fbbcda9b..0000000000000 --- a/python/pyspark/join.pyi +++ /dev/null @@ -1,50 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Hashable, Iterable, Optional, Tuple, TypeVar - -from pyspark.resultiterable import ResultIterable -import pyspark.rdd - -K = TypeVar("K", bound=Hashable) -V = TypeVar("V") -U = TypeVar("U") - -def python_join( - rdd: pyspark.rdd.RDD[Tuple[K, V]], - other: pyspark.rdd.RDD[Tuple[K, U]], - numPartitions: int, -) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ... -def python_right_outer_join( - rdd: pyspark.rdd.RDD[Tuple[K, V]], - other: pyspark.rdd.RDD[Tuple[K, U]], - numPartitions: int, -) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ... -def python_left_outer_join( - rdd: pyspark.rdd.RDD[Tuple[K, V]], - other: pyspark.rdd.RDD[Tuple[K, U]], - numPartitions: int, -) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ... -def python_full_outer_join( - rdd: pyspark.rdd.RDD[Tuple[K, V]], - other: pyspark.rdd.RDD[Tuple[K, U]], - numPartitions: int, -) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... -def python_cogroup( - rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int -) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ... diff --git a/python/pyspark/ml/__init__.pyi b/python/pyspark/ml/__init__.pyi deleted file mode 100644 index 8e3b8a5daeb08..0000000000000 --- a/python/pyspark/ml/__init__.pyi +++ /dev/null @@ -1,45 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.ml import ( # noqa: F401 - classification as classification, - clustering as clustering, - evaluation as evaluation, - feature as feature, - fpm as fpm, - image as image, - linalg as linalg, - param as param, - recommendation as recommendation, - regression as regression, - stat as stat, - tuning as tuning, - util as util, -) -from pyspark.ml.base import ( # noqa: F401 - Estimator as Estimator, - Model as Model, - PredictionModel as PredictionModel, - Predictor as Predictor, - Transformer as Transformer, - UnaryTransformer as UnaryTransformer, -) -from pyspark.ml.pipeline import ( # noqa: F401 - Pipeline as Pipeline, - PipelineModel as PipelineModel, -) diff --git a/python/pyspark/mllib/__init__.pyi b/python/pyspark/mllib/__init__.pyi deleted file mode 100644 index 83032c4580fc8..0000000000000 --- a/python/pyspark/mllib/__init__.pyi +++ /dev/null @@ -1,32 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -# Names in __all__ with no definition: -# classification -# clustering -# feature -# fpm -# linalg -# random -# recommendation -# regression -# stat -# tree -# util diff --git a/python/pyspark/rddsampler.pyi b/python/pyspark/rddsampler.pyi deleted file mode 100644 index 8fbf72d90025c..0000000000000 --- a/python/pyspark/rddsampler.pyi +++ /dev/null @@ -1,54 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar - -T = TypeVar("T") -U = TypeVar("U") -K = TypeVar("K") -V = TypeVar("V") - -class RDDSamplerBase: - def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ... - def initRandomGenerator(self, split: int) -> None: ... - def getUniformSample(self) -> float: ... - def getPoissonSample(self, mean: float) -> int: ... - def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ... - -class RDDSampler(RDDSamplerBase): - def __init__( - self, withReplacement: bool, fraction: float, seed: Optional[int] = ... - ) -> None: ... - def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ... - -class RDDRangeSampler(RDDSamplerBase): - def __init__( - self, lowerBound: T, upperBound: T, seed: Optional[Any] = ... - ) -> None: ... - def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ... - -class RDDStratifiedSampler(RDDSamplerBase): - def __init__( - self, - withReplacement: bool, - fractions: Dict[K, float], - seed: Optional[int] = ..., - ) -> None: ... - def func( - self, split: int, iterator: Iterator[Tuple[K, V]] - ) -> Iterator[Tuple[K, V]]: ... diff --git a/python/pyspark/resource/__init__.pyi b/python/pyspark/resource/__init__.pyi deleted file mode 100644 index 87a9b53c268ac..0000000000000 --- a/python/pyspark/resource/__init__.pyi +++ /dev/null @@ -1,31 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.resource.information import ( # noqa: F401 - ResourceInformation as ResourceInformation, -) -from pyspark.resource.profile import ( # noqa: F401 - ResourceProfile as ResourceProfile, - ResourceProfileBuilder as ResourceProfileBuilder, -) -from pyspark.resource.requests import ( # noqa: F401 - ExecutorResourceRequest as ExecutorResourceRequest, - ExecutorResourceRequests as ExecutorResourceRequests, - TaskResourceRequest as TaskResourceRequest, - TaskResourceRequests as TaskResourceRequests, -) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 80ce9b8408d4e..e6033dd7505c1 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -342,7 +342,7 @@ def dumps(self, obj): # Hack namedtuple, make it picklable -__cls = {} +__cls = {} # type: ignore def _restore(name, fields, value): diff --git a/python/pyspark/serializers.pyi b/python/pyspark/serializers.pyi deleted file mode 100644 index 26ef17c38d227..0000000000000 --- a/python/pyspark/serializers.pyi +++ /dev/null @@ -1,122 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any - -class SpecialLengths: - END_OF_DATA_SECTION: int = ... - PYTHON_EXCEPTION_THROWN: int = ... - TIMING_DATA: int = ... - END_OF_STREAM: int = ... - NULL: int = ... - START_ARROW_STREAM: int = ... - -class Serializer: - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - def load_stream(self, stream: Any) -> None: ... - def __eq__(self, other: Any) -> Any: ... - def __ne__(self, other: Any) -> Any: ... - def __hash__(self) -> Any: ... - -class FramedSerializer(Serializer): - def __init__(self) -> None: ... - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - def load_stream(self, stream: Any) -> None: ... - def dumps(self, obj: Any) -> None: ... - def loads(self, obj: Any) -> None: ... - -class BatchedSerializer(Serializer): - UNLIMITED_BATCH_SIZE: int = ... - UNKNOWN_BATCH_SIZE: int = ... - serializer: Any = ... - batchSize: Any = ... - def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ... - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - def load_stream(self, stream: Any): ... - -class FlattenedValuesSerializer(BatchedSerializer): - def __init__(self, serializer: Any, batchSize: int = ...) -> None: ... - def load_stream(self, stream: Any): ... - -class AutoBatchedSerializer(BatchedSerializer): - bestSize: Any = ... - def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ... - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - -class CartesianDeserializer(Serializer): - key_ser: Any = ... - val_ser: Any = ... - def __init__(self, key_ser: Any, val_ser: Any) -> None: ... - def load_stream(self, stream: Any): ... - -class PairDeserializer(Serializer): - key_ser: Any = ... - val_ser: Any = ... - def __init__(self, key_ser: Any, val_ser: Any) -> None: ... - def load_stream(self, stream: Any): ... - -class NoOpSerializer(FramedSerializer): - def loads(self, obj: Any): ... - def dumps(self, obj: Any): ... - -class PickleSerializer(FramedSerializer): - def dumps(self, obj: Any): ... - def loads(self, obj: Any, encoding: str = ...): ... - -class CloudPickleSerializer(PickleSerializer): - def dumps(self, obj: Any): ... - -class MarshalSerializer(FramedSerializer): - def dumps(self, obj: Any): ... - def loads(self, obj: Any): ... - -class AutoSerializer(FramedSerializer): - def __init__(self) -> None: ... - def dumps(self, obj: Any): ... - def loads(self, obj: Any): ... - -class CompressedSerializer(FramedSerializer): - serializer: Any = ... - def __init__(self, serializer: Any) -> None: ... - def dumps(self, obj: Any): ... - def loads(self, obj: Any): ... - -class UTF8Deserializer(Serializer): - use_unicode: Any = ... - def __init__(self, use_unicode: bool = ...) -> None: ... - def loads(self, stream: Any): ... - def load_stream(self, stream: Any) -> None: ... - -class ChunkedStream: - buffer_size: Any = ... - buffer: Any = ... - current_pos: int = ... - wrapped: Any = ... - def __init__(self, wrapped: Any, buffer_size: Any) -> None: ... - def write(self, bytes: Any) -> None: ... - def close(self) -> None: ... - @property - def closed(self): ... - -def write_with_length(obj: Any, stream: Any): ... -def pack_long(value): ... -def read_int(stream): ... -def read_long(stream): ... -def read_bool(stream): ... -def write_int(value, stream): ... -def write_long(value, stream): ... diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index cde163bd2d73d..0c6cc1302ff62 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -32,10 +32,10 @@ if os.environ.get("SPARK_EXECUTOR_URI"): SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"]) -SparkContext._ensure_initialized() +SparkContext._ensure_initialized() # type: ignore try: - spark = SparkSession._create_shell_session() + spark = SparkSession._create_shell_session() # type: ignore except Exception: import sys import traceback diff --git a/python/pyspark/shell.pyi b/python/pyspark/shell.pyi deleted file mode 100644 index 0760309542f8d..0000000000000 --- a/python/pyspark/shell.pyi +++ /dev/null @@ -1,31 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark import SparkConf as SparkConf # noqa: F401 -from pyspark.context import SparkContext as SparkContext -from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession -from typing import Any, Callable - -from pyspark.sql.dataframe import DataFrame - -spark: SparkSession -sc: SparkContext -sql: Callable[[str], DataFrame] -sqlContext: SQLContext -sqlCtx: SQLContext -code: Any diff --git a/python/pyspark/shuffle.pyi b/python/pyspark/shuffle.pyi deleted file mode 100644 index 10648c51dca8f..0000000000000 --- a/python/pyspark/shuffle.pyi +++ /dev/null @@ -1,109 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.serializers import ( # noqa: F401 - AutoBatchedSerializer as AutoBatchedSerializer, - BatchedSerializer as BatchedSerializer, - CompressedSerializer as CompressedSerializer, - FlattenedValuesSerializer as FlattenedValuesSerializer, - PickleSerializer as PickleSerializer, -) -from pyspark.util import fail_on_stopiteration as fail_on_stopiteration # noqa: F401 -from typing import Any, Optional - -process: Any - -def get_used_memory(): ... - -MemoryBytesSpilled: int -DiskBytesSpilled: int - -class Aggregator: - createCombiner: Any = ... - mergeValue: Any = ... - mergeCombiners: Any = ... - def __init__( - self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any - ) -> None: ... - -class SimpleAggregator(Aggregator): - def __init__(self, combiner: Any): ... - -class Merger: - agg: Any = ... - def __init__(self, aggregator: Any) -> None: ... - def mergeValues(self, iterator: Any) -> None: ... - def mergeCombiners(self, iterator: Any) -> None: ... - def items(self) -> None: ... - -class ExternalMerger(Merger): - MAX_TOTAL_PARTITIONS: int = ... - memory_limit: Any = ... - serializer: Any = ... - localdirs: Any = ... - partitions: Any = ... - batch: Any = ... - scale: Any = ... - data: Any = ... - pdata: Any = ... - spills: int = ... - def __init__( - self, - aggregator: Any, - memory_limit: int = ..., - serializer: Optional[Any] = ..., - localdirs: Optional[Any] = ..., - scale: int = ..., - partitions: int = ..., - batch: int = ..., - ) -> None: ... - def mergeValues(self, iterator: Any) -> None: ... - def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ... - def items(self): ... - -class ExternalSorter: - memory_limit: Any = ... - local_dirs: Any = ... - serializer: Any = ... - def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ... - def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ... - -class ExternalList: - LIMIT: int = ... - values: Any = ... - count: Any = ... - def __init__(self, values: Any) -> None: ... - def __iter__(self) -> Any: ... - def __len__(self): ... - def append(self, value: Any) -> None: ... - def __del__(self) -> None: ... - -class ExternalListOfList(ExternalList): - count: Any = ... - def __init__(self, values: Any) -> None: ... - def append(self, value: Any) -> None: ... - def __iter__(self) -> Any: ... - -class GroupByKey: - iterator: Any = ... - def __init__(self, iterator: Any) -> None: ... - def __iter__(self) -> Any: ... - -class ExternalGroupBy(ExternalMerger): - SORT_KEY_LIMIT: int = ... - def flattened_serializer(self): ... diff --git a/python/pyspark/sql/avro/__init__.pyi b/python/pyspark/sql/avro/__init__.pyi deleted file mode 100644 index 0d7871da4c100..0000000000000 --- a/python/pyspark/sql/avro/__init__.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -# Names in __all__ with no definition: -# functions diff --git a/python/pyspark/sql/pandas/__init__.pyi b/python/pyspark/sql/pandas/__init__.pyi deleted file mode 100644 index 217e5db960782..0000000000000 --- a/python/pyspark/sql/pandas/__init__.pyi +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/python/pyspark/sql/pandas/serializers.pyi b/python/pyspark/sql/pandas/serializers.pyi deleted file mode 100644 index 8be3c0dcbc9ad..0000000000000 --- a/python/pyspark/sql/pandas/serializers.pyi +++ /dev/null @@ -1,65 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.serializers import ( # noqa: F401 - Serializer as Serializer, - UTF8Deserializer as UTF8Deserializer, - read_int as read_int, - write_int as write_int, -) -from typing import Any - -class SpecialLengths: - END_OF_DATA_SECTION: int = ... - PYTHON_EXCEPTION_THROWN: int = ... - TIMING_DATA: int = ... - END_OF_STREAM: int = ... - NULL: int = ... - START_ARROW_STREAM: int = ... - -class ArrowCollectSerializer(Serializer): - serializer: Any = ... - def __init__(self) -> None: ... - def dump_stream(self, iterator: Any, stream: Any): ... - def load_stream(self, stream: Any) -> None: ... - -class ArrowStreamSerializer(Serializer): - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - def load_stream(self, stream: Any) -> None: ... - -class ArrowStreamPandasSerializer(ArrowStreamSerializer): - def __init__( - self, timezone: Any, safecheck: Any, assign_cols_by_name: Any - ) -> None: ... - def arrow_to_pandas(self, arrow_column: Any): ... - def dump_stream(self, iterator: Any, stream: Any) -> None: ... - def load_stream(self, stream: Any) -> None: ... - -class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer): - def __init__( - self, - timezone: Any, - safecheck: Any, - assign_cols_by_name: Any, - df_for_struct: bool = ..., - ) -> None: ... - def arrow_to_pandas(self, arrow_column: Any): ... - def dump_stream(self, iterator: Any, stream: Any): ... - -class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer): - def load_stream(self, stream: Any) -> None: ... diff --git a/python/pyspark/sql/pandas/typehints.pyi b/python/pyspark/sql/pandas/typehints.pyi deleted file mode 100644 index eea9c86225332..0000000000000 --- a/python/pyspark/sql/pandas/typehints.pyi +++ /dev/null @@ -1,33 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.sql.pandas.utils import ( # noqa: F401 - require_minimum_pandas_version as require_minimum_pandas_version, -) -from typing import Any, Optional - -def infer_eval_type(sig: Any): ... -def check_tuple_annotation( - annotation: Any, parameter_check_func: Optional[Any] = ... -): ... -def check_iterator_annotation( - annotation: Any, parameter_check_func: Optional[Any] = ... -): ... -def check_union_annotation( - annotation: Any, parameter_check_func: Optional[Any] = ... -): ... diff --git a/python/pyspark/sql/pandas/types.pyi b/python/pyspark/sql/pandas/types.pyi deleted file mode 100644 index 5ae29bd273180..0000000000000 --- a/python/pyspark/sql/pandas/types.pyi +++ /dev/null @@ -1,41 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.sql.types import ( # noqa: F401 - ArrayType as ArrayType, - BinaryType as BinaryType, - BooleanType as BooleanType, - ByteType as ByteType, - DateType as DateType, - DecimalType as DecimalType, - DoubleType as DoubleType, - FloatType as FloatType, - IntegerType as IntegerType, - LongType as LongType, - ShortType as ShortType, - StringType as StringType, - StructField as StructField, - StructType as StructType, - TimestampType as TimestampType, -) -from typing import Any - -def to_arrow_type(dt: Any): ... -def to_arrow_schema(schema: Any): ... -def from_arrow_type(at: Any): ... -def from_arrow_schema(arrow_schema: Any): ... diff --git a/python/pyspark/sql/pandas/utils.pyi b/python/pyspark/sql/pandas/utils.pyi deleted file mode 100644 index e4d315b0ce205..0000000000000 --- a/python/pyspark/sql/pandas/utils.pyi +++ /dev/null @@ -1,20 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -def require_minimum_pandas_version() -> None: ... -def require_minimum_pyarrow_version() -> None: ... diff --git a/python/pyspark/sql/utils.pyi b/python/pyspark/sql/utils.pyi deleted file mode 100644 index c11e4bed54e7f..0000000000000 --- a/python/pyspark/sql/utils.pyi +++ /dev/null @@ -1,55 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from pyspark import SparkContext as SparkContext # noqa: F401 -from typing import Any, Optional - -class CapturedException(Exception): - desc: Any = ... - stackTrace: Any = ... - cause: Any = ... - def __init__( - self, desc: Any, stackTrace: Any, cause: Optional[Any] = ... - ) -> None: ... - -class AnalysisException(CapturedException): ... -class ParseException(CapturedException): ... -class IllegalArgumentException(CapturedException): ... -class StreamingQueryException(CapturedException): ... -class QueryExecutionException(CapturedException): ... -class PythonException(CapturedException): ... -class UnknownException(CapturedException): ... - -def convert_exception(e: Any): ... -def capture_sql_exception(f: Any): ... -def install_exception_handler() -> None: ... -def toJArray(gateway: Any, jtype: Any, arr: Any): ... -def require_test_compiled() -> None: ... - -class ForeachBatchFunction: - sql_ctx: Any = ... - func: Any = ... - def __init__(self, sql_ctx: Any, func: Any) -> None: ... - error: Any = ... - def call(self, jdf: Any, batch_id: Any) -> None: ... - class Java: - implements: Any = ... - -def to_str(value: Any): ... diff --git a/python/pyspark/streaming/__init__.pyi b/python/pyspark/streaming/__init__.pyi deleted file mode 100644 index 281c06e51cc60..0000000000000 --- a/python/pyspark/streaming/__init__.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.streaming.context import StreamingContext as StreamingContext # noqa: F401 -from pyspark.streaming.dstream import DStream as DStream # noqa: F401 -from pyspark.streaming.listener import ( # noqa: F401 - StreamingListener as StreamingListener, -) diff --git a/python/pyspark/streaming/util.pyi b/python/pyspark/streaming/util.pyi deleted file mode 100644 index d552eb15f4818..0000000000000 --- a/python/pyspark/streaming/util.pyi +++ /dev/null @@ -1,48 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any, Optional - -class TransformFunction: - ctx: Any - func: Any - deserializers: Any - rdd_wrap_func: Any - failure: Any - def __init__(self, ctx, func, *deserializers) -> None: ... - def rdd_wrapper(self, func): ... - def call(self, milliseconds, jrdds): ... - def getLastFailure(self): ... - class Java: - implements: Any - -class TransformFunctionSerializer: - ctx: Any - serializer: Any - gateway: Any - failure: Any - def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ... - def dumps(self, id): ... - def loads(self, data): ... - def getLastFailure(self): ... - class Java: - implements: Any - -def rddToFileName(prefix, suffix, timestamp): ... diff --git a/python/pyspark/traceback_utils.pyi b/python/pyspark/traceback_utils.pyi deleted file mode 100644 index 33b1b7dc3227f..0000000000000 --- a/python/pyspark/traceback_utils.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from collections import namedtuple -from typing import Any - -CallSite = namedtuple("CallSite", "function file linenum") - -def first_spark_call(): ... - -class SCCallSiteSync: - def __init__(self, sc: Any) -> None: ... - def __enter__(self) -> None: ... - def __exit__(self, type: Any, value: Any, tb: Any) -> None: ... diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 86e5ab5a01585..d2ca484e8ace6 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -23,7 +23,7 @@ from py4j.clientserver import ClientServer -__all__ = [] +__all__ = [] # type: ignore def print_exec(stream): diff --git a/python/pyspark/util.pyi b/python/pyspark/util.pyi deleted file mode 100644 index 023b409831459..0000000000000 --- a/python/pyspark/util.pyi +++ /dev/null @@ -1,35 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -from typing import Any, Tuple -from pyspark._typing import F - -import threading - -def print_exec(stream: Any) -> None: ... - -class VersionUtils: - @staticmethod - def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ... - -def fail_on_stopiteration(f: F) -> F: ... - -class InheritableThread(threading.Thread): - def __init__(self, target: Any, *args: Any, **kwargs: Any): ... - def __del__(self) -> None: ... diff --git a/python/pyspark/worker.pyi b/python/pyspark/worker.pyi deleted file mode 100644 index cc264823cc867..0000000000000 --- a/python/pyspark/worker.pyi +++ /dev/null @@ -1,73 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark import shuffle as shuffle -from pyspark.broadcast import Broadcast as Broadcast -from pyspark.files import SparkFiles as SparkFiles -from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth -from pyspark.rdd import PythonEvalType as PythonEvalType -from pyspark.resource import ResourceInformation as ResourceInformation -from pyspark.serializers import ( - BatchedSerializer as BatchedSerializer, - PickleSerializer as PickleSerializer, - SpecialLengths as SpecialLengths, - UTF8Deserializer as UTF8Deserializer, - read_bool as read_bool, - read_int as read_int, - read_long as read_long, - write_int as write_int, - write_long as write_long, - write_with_length as write_with_length, -) -from pyspark.sql.pandas.serializers import ( - ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer, - CogroupUDFSerializer as CogroupUDFSerializer, -) -from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type -from pyspark.sql.types import StructType as StructType -from pyspark.taskcontext import ( - BarrierTaskContext as BarrierTaskContext, - TaskContext as TaskContext, -) -from pyspark.util import fail_on_stopiteration as fail_on_stopiteration -from typing import Any - -has_resource_module: bool -pickleSer: Any -utf8_deserializer: Any - -def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ... -def add_path(path: Any) -> None: ... -def read_command(serializer: Any, file: Any): ... -def chain(f: Any, g: Any): ... -def wrap_udf(f: Any, return_type: Any): ... -def wrap_scalar_pandas_udf(f: Any, return_type: Any): ... -def wrap_pandas_iter_udf(f: Any, return_type: Any): ... -def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... -def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... -def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ... -def wrap_window_agg_pandas_udf( - f: Any, return_type: Any, runner_conf: Any, udf_index: Any -): ... -def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ... -def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ... -def read_single_udf( - pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any -): ... -def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ... -def main(infile: Any, outfile: Any) -> None: ... From 94d648dff5f24b4dea3873fd8e6609b1a099d0a2 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 7 Oct 2020 20:16:40 +0900 Subject: [PATCH 0186/1009] [SPARK-33036][SQL] Refactor RewriteCorrelatedScalarSubquery code to replace exprIds in a bottom-up manner ### What changes were proposed in this pull request? This PR intends to refactor code in `RewriteCorrelatedScalarSubquery` for replacing `ExprId`s in a bottom-up manner instead of doing in a top-down one. This PR comes from the talk with cloud-fan in https://github.com/apache/spark/pull/29585#discussion_r490371252. ### Why are the changes needed? To improve code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29913 from maropu/RefactorRewriteCorrelatedScalarSubquery. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/optimizer/subquery.scala | 80 ++++++++++++------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index a168dcd7a83f5..f184253ef0595 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -338,20 +338,15 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { /** * Extract all correlated scalar subqueries from an expression. The subqueries are collected using - * the given collector. To avoid the reuse of `exprId`s, this method generates new `exprId` - * for the subqueries and rewrite references in the given `expression`. - * This method returns extracted subqueries and the corresponding `exprId`s and these values - * will be used later in `constructLeftJoins` for building the child plan that - * returns subquery output with the `exprId`s. + * the given collector. The expression is rewritten and returned. */ private def extractCorrelatedScalarSubqueries[E <: Expression]( expression: E, - subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): E = { + subqueries: ArrayBuffer[ScalarSubquery]): E = { val newExpression = expression transform { case s: ScalarSubquery if s.children.nonEmpty => - val newExprId = NamedExpression.newExprId - subqueries += s -> newExprId - s.plan.output.head.withExprId(newExprId) + subqueries += s + s.plan.output.head } newExpression.asInstanceOf[E] } @@ -512,19 +507,23 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { /** * Construct a new child plan by left joining the given subqueries to a base plan. + * This method returns the child plan and an attribute mapping + * for the updated `ExprId`s of subqueries. If the non-empty mapping returned, + * this rule will rewrite subquery references in a parent plan based on it. */ private def constructLeftJoins( child: LogicalPlan, - subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): LogicalPlan = { - subqueries.foldLeft(child) { - case (currentChild, (ScalarSubquery(query, conditions, _), newExprId)) => + subqueries: ArrayBuffer[ScalarSubquery]): (LogicalPlan, AttributeMap[Attribute]) = { + val subqueryAttrMapping = ArrayBuffer[(Attribute, Attribute)]() + val newChild = subqueries.foldLeft(child) { + case (currentChild, ScalarSubquery(query, conditions, _)) => val origOutput = query.output.head val resultWithZeroTups = evalSubqueryOnZeroTups(query) if (resultWithZeroTups.isEmpty) { // CASE 1: Subquery guaranteed not to have the COUNT bug Project( - currentChild.output :+ Alias(origOutput, origOutput.name)(exprId = newExprId), + currentChild.output :+ origOutput, Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE)) } else { // Subquery might have the COUNT bug. Add appropriate corrections. @@ -544,12 +543,13 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { if (havingNode.isEmpty) { // CASE 2: Subquery with no HAVING clause + val subqueryResultExpr = + Alias(If(IsNull(alwaysTrueRef), + resultWithZeroTups.get, + aggValRef), origOutput.name)() + subqueryAttrMapping += ((origOutput, subqueryResultExpr.toAttribute)) Project( - currentChild.output :+ - Alias( - If(IsNull(alwaysTrueRef), - resultWithZeroTups.get, - aggValRef), origOutput.name)(exprId = newExprId), + currentChild.output :+ subqueryResultExpr, Join(currentChild, Project(query.output :+ alwaysTrueExpr, query), LeftOuter, conditions.reduceOption(And), JoinHint.NONE)) @@ -576,7 +576,9 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { (IsNull(alwaysTrueRef), resultWithZeroTups.get), (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))), aggValRef), - origOutput.name)(exprId = newExprId) + origOutput.name)() + + subqueryAttrMapping += ((origOutput, caseExpr.toAttribute)) Project( currentChild.output :+ caseExpr, @@ -587,6 +589,20 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { } } } + (newChild, AttributeMap(subqueryAttrMapping.toSeq)) + } + + private def updateAttrs[E <: Expression]( + exprs: Seq[E], + attrMap: AttributeMap[Attribute]): Seq[E] = { + if (attrMap.nonEmpty) { + val newExprs = exprs.map { _.transform { + case a: AttributeReference => attrMap.getOrElse(a, a) + }} + newExprs.asInstanceOf[Seq[E]] + } else { + exprs + } } /** @@ -595,36 +611,42 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { */ def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput { case a @ Aggregate(grouping, expressions, child) => - val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] - val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) + val subqueries = ArrayBuffer.empty[ScalarSubquery] + val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) if (subqueries.nonEmpty) { // We currently only allow correlated subqueries in an aggregate if they are part of the // grouping expressions. As a result we need to replace all the scalar subqueries in the // grouping expressions by their result. val newGrouping = grouping.map { e => - subqueries.find(_._1.semanticEquals(e)).map(_._1.plan.output.head).getOrElse(e) + subqueries.find(_.semanticEquals(e)).map(_.plan.output.head).getOrElse(e) } - val newAgg = Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries)) + val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries) + val newExprs = updateAttrs(rewriteExprs, subqueryAttrMapping) + val newAgg = Aggregate(newGrouping, newExprs, newChild) val attrMapping = a.output.zip(newAgg.output) newAgg -> attrMapping } else { a -> Nil } case p @ Project(expressions, child) => - val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] - val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) + val subqueries = ArrayBuffer.empty[ScalarSubquery] + val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries)) if (subqueries.nonEmpty) { - val newProj = Project(newExpressions, constructLeftJoins(child, subqueries)) + val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries) + val newExprs = updateAttrs(rewriteExprs, subqueryAttrMapping) + val newProj = Project(newExprs, newChild) val attrMapping = p.output.zip(newProj.output) newProj -> attrMapping } else { p -> Nil } case f @ Filter(condition, child) => - val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)] - val newCondition = extractCorrelatedScalarSubqueries(condition, subqueries) + val subqueries = ArrayBuffer.empty[ScalarSubquery] + val rewriteCondition = extractCorrelatedScalarSubqueries(condition, subqueries) if (subqueries.nonEmpty) { - val newProj = Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries))) + val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries) + val newCondition = updateAttrs(Seq(rewriteCondition), subqueryAttrMapping).head + val newProj = Project(f.output, Filter(newCondition, newChild)) val attrMapping = f.output.zip(newProj.output) newProj -> attrMapping } else { From 3099fd9f9d576c96642c0e66c74797b8882b70bb Mon Sep 17 00:00:00 2001 From: Stijn De Haes Date: Wed, 7 Oct 2020 09:52:00 -0700 Subject: [PATCH 0187/1009] [SPARK-32067][K8S] Use unique ConfigMap name for executor pod template ### What changes were proposed in this pull request? The pod template configmap always had the same name. This PR makes it unique. ### Why are the changes needed? If you scheduled 2 spark jobs they will both use the same configmap name this will result in conflicts. This PR fixes that **BEFORE** ``` $ kubectl get cm --all-namespaces -w | grep podspec podspec-configmap 1 65s ``` **AFTER** ``` $ kubectl get cm --all-namespaces -w | grep podspec aaece65ef82e4a30b7b7800aad600d4f spark-test-app-aac9f37502b2ca55-driver-podspec-conf-map 1 0s ``` This can be seen when running the integration tests ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests and the integration tests test if this works Closes #29934 from stijndehaes/bugfix/SPARK-32067-unique-name-for-template-configmap. Authored-by: Stijn De Haes Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/deploy/k8s/Constants.scala | 2 +- .../deploy/k8s/features/PodTemplateConfigMapStep.scala | 6 ++++-- .../k8s/features/PodTemplateConfigMapStepSuite.scala | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala index c9c5aa606cf55..991205a47f846 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala @@ -77,7 +77,7 @@ private[spark] object Constants { val EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME = "pod-spec-template.yml" val EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH = "/opt/spark/pod-template" val POD_TEMPLATE_VOLUME = "pod-template-volume" - val POD_TEMPLATE_CONFIGMAP = "podspec-configmap" + val POD_TEMPLATE_CONFIGMAP = "driver-podspec-conf-map" val POD_TEMPLATE_KEY = "podspec-configmap-key" // Miscellaneous diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala index 7f41ca43589b6..1040419a4a6e9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala @@ -31,6 +31,8 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf) private val hasTemplate = conf.contains(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE) + private val configmapName = s"${conf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP" + def configurePod(pod: SparkPod): SparkPod = { if (hasTemplate) { val podWithVolume = new PodBuilder(pod.pod) @@ -38,7 +40,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf) .addNewVolume() .withName(POD_TEMPLATE_VOLUME) .withNewConfigMap() - .withName(POD_TEMPLATE_CONFIGMAP) + .withName(configmapName) .addNewItem() .withKey(POD_TEMPLATE_KEY) .withPath(EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME) @@ -76,7 +78,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf) val podTemplateString = Files.toString(new File(podTemplateFile), StandardCharsets.UTF_8) Seq(new ConfigMapBuilder() .withNewMetadata() - .withName(POD_TEMPLATE_CONFIGMAP) + .withName(configmapName) .endMetadata() .addToData(POD_TEMPLATE_KEY, podTemplateString) .build()) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala index 051320fa44c5e..1b38fd6a0d2ab 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala @@ -16,13 +16,14 @@ */ package org.apache.spark.deploy.k8s.features -import java.io.{File, PrintWriter} +import java.io.PrintWriter import java.nio.file.Files import io.fabric8.kubernetes.api.model.ConfigMap import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.k8s._ +import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.util.Utils class PodTemplateConfigMapStepSuite extends SparkFunSuite { @@ -56,8 +57,9 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite { assert(configuredPod.pod.getSpec.getVolumes.size() === 1) val volume = configuredPod.pod.getSpec.getVolumes.get(0) + val generatedResourceName = s"${kubernetesConf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP" assert(volume.getName === Constants.POD_TEMPLATE_VOLUME) - assert(volume.getConfigMap.getName === Constants.POD_TEMPLATE_CONFIGMAP) + assert(volume.getConfigMap.getName === generatedResourceName) assert(volume.getConfigMap.getItems.size() === 1) assert(volume.getConfigMap.getItems.get(0).getKey === Constants.POD_TEMPLATE_KEY) assert(volume.getConfigMap.getItems.get(0).getPath === @@ -70,7 +72,7 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite { val resources = step.getAdditionalKubernetesResources() assert(resources.size === 1) - assert(resources.head.getMetadata.getName === Constants.POD_TEMPLATE_CONFIGMAP) + assert(resources.head.getMetadata.getName === generatedResourceName) assert(resources.head.isInstanceOf[ConfigMap]) val configMap = resources.head.asInstanceOf[ConfigMap] assert(configMap.getData.size() === 1) From a127387a53e1a24e76de83c5a1858fcdbd38c3a2 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 7 Oct 2020 12:27:23 -0700 Subject: [PATCH 0188/1009] [SPARK-33082][SQL] Remove hive-1.2 workaround code ### What changes were proposed in this pull request? This PR removes old Hive-1.2 profile related workaround code. ### Why are the changes needed? To simply the code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. Closes #29961 from dongjoon-hyun/SPARK-HIVE12. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../BaseScriptTransformationSuite.scala | 12 +---- .../datasources/orc/OrcSourceSuite.scala | 7 ++- .../SparkGetTablesOperation.scala | 6 +-- .../thriftserver/SparkSQLCLIService.scala | 11 +---- .../HiveThriftServer2Suites.scala | 18 ++----- ...arkThriftServerProtocolVersionsSuite.scala | 8 +--- .../execution/HiveCompatibilitySuite.scala | 5 +- .../org/apache/spark/sql/hive/HiveShim.scala | 48 ++++++------------- .../org/apache/spark/sql/hive/HiveUtils.scala | 3 +- .../sql/hive/client/HiveClientImpl.scala | 11 ++--- .../org/apache/spark/sql/hive/hiveUDFs.scala | 18 ++----- .../spark/sql/hive/orc/OrcFilters.scala | 16 +------ .../sql/hive/ClasspathDependenciesSuite.scala | 25 +++------- .../sql/hive/HiveMetastoreCatalogSuite.scala | 18 ++----- .../apache/spark/sql/hive/HiveShimSuite.scala | 12 +---- .../spark/sql/hive/StatisticsSuite.scala | 34 +++---------- .../HiveScriptTransformationSuite.scala | 2 - .../sql/hive/execution/HiveUDFSuite.scala | 1 - .../sql/hive/execution/SQLQuerySuite.scala | 1 - .../sql/hive/orc/HiveOrcFilterSuite.scala | 19 +------- .../sql/hive/orc/HiveOrcQuerySuite.scala | 1 - .../sql/hive/orc/HiveOrcSourceSuite.scala | 10 +--- 22 files changed, 60 insertions(+), 226 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index 02f447bd14339..c07ea0f12f94e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -63,16 +63,6 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU uncaughtExceptionHandler.cleanStatus() } - def isHive23OrSpark: Boolean - - // In Hive 1.2, the string representation of a decimal omits trailing zeroes. - // But in Hive 2.3, it is always padded to 18 digits with trailing zeroes if necessary. - val decimalToString: Column => Column = if (isHive23OrSpark) { - c => c.cast("string") - } else { - c => c.cast("decimal(1, 0)").cast("string") - } - def createScriptTransformationExec( input: Seq[Expression], script: String, @@ -142,7 +132,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU 'a.cast("string"), 'b.cast("string"), 'c.cast("string"), - decimalToString('d), + 'd.cast("string"), 'e.cast("string")).collect()) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index b70fd7476ed98..b6f41ab085fe1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -120,8 +120,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } - protected def testSelectiveDictionaryEncoding(isSelective: Boolean, - isHive23: Boolean = false): Unit = { + protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHiveOrc: Boolean): Unit = { val tableName = "orcTable" withTempDir { dir => @@ -174,7 +173,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { // Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements. // For more details, see https://orc.apache.org/specification/ assert(stripe.getColumns(1).getKind === DICTIONARY_V2) - if (isSelective || isHive23) { + if (isSelective || isHiveOrc) { assert(stripe.getColumns(2).getKind === DIRECT_V2) } else { assert(stripe.getColumns(2).getKind === DICTIONARY_V2) @@ -581,7 +580,7 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession { } test("Enforce direct encoding column-wise selectively") { - testSelectiveDictionaryEncoding(isSelective = true) + testSelectiveDictionaryEncoding(isSelective = true, isHiveOrc = false) } test("SPARK-11412 read and merge orc schemas in parallel") { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala index 0d4b9b392f074..bccad865be27a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala @@ -125,10 +125,6 @@ private[hive] class SparkGetTablesOperation( tableType, comment.getOrElse("")) // Since HIVE-7575(Hive 2.0.0), adds 5 additional columns to the ResultSet of GetTables. - if (HiveUtils.isHive23) { - rowSet.addRow(rowData ++ Array(null, null, null, null, null)) - } else { - rowSet.addRow(rowData) - } + rowSet.addRow(rowData ++ Array(null, null, null, null, null)) } } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala index 984625c76e057..c39d2ecdd7923 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala @@ -113,17 +113,10 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC private[thriftserver] trait ReflectedCompositeService { this: AbstractService => - private val logInfo = (msg: String) => if (HiveUtils.isHive23) { - getAncestorField[Logger](this, 3, "LOG").info(msg) - } else { - getAncestorField[Log](this, 3, "LOG").info(msg) - } + private val logInfo = (msg: String) => getAncestorField[Logger](this, 3, "LOG").info(msg) - private val logError = (msg: String, e: Throwable) => if (HiveUtils.isHive23) { + private val logError = (msg: String, e: Throwable) => getAncestorField[Logger](this, 3, "LOG").error(msg, e) - } else { - getAncestorField[Log](this, 3, "LOG").error(msg, e) - } def initCompositeService(hiveConf: HiveConf): Unit = { // Emulating `CompositeService.init(hiveConf)` diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index ad0f97cae3f8e..27d4c4bc40bec 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -544,11 +544,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { conf += resultSet.getString(1) -> resultSet.getString(2) } - if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) - } else { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) - } + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) } } @@ -561,11 +557,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { conf += resultSet.getString(1) -> resultSet.getString(2) } - if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) - } else { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) - } + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7")) } } @@ -643,11 +635,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { val sessionHandle = client.openSession(user, "") val sessionID = sessionHandle.getSessionId - if (HiveUtils.isHive23) { - assert(pipeoutFileList(sessionID).length == 2) - } else { - assert(pipeoutFileList(sessionID).length == 1) - } + assert(pipeoutFileList(sessionID).length == 2) client.closeSession(sessionHandle) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index fa001b11253f5..d5582077d6170 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -356,12 +356,8 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { assert(metaData.getColumnName(1) === "NULL") assert(metaData.getColumnTypeName(1) === "void") assert(metaData.getColumnType(1) === java.sql.Types.NULL) - if (HiveUtils.isHive23) { - // For Hive 1.2 the o.a.h.j.JdbcColumn.typeStringToHiveType can not recognize `null` as - // type name. - assert(metaData.getPrecision(1) === 0) - assert(metaData.getScale(1) === 0) - } + assert(metaData.getPrecision(1) === 0) + assert(metaData.getScale(1) === 0) } } diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index b7ea0630dd85f..a685549290f0e 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -1145,11 +1145,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { * The set of tests that are believed to be working in catalyst. Tests not on includeList or * excludeList are implicitly marked as ignored. */ - override def includeList: Seq[String] = if (HiveUtils.isHive23) { + override def includeList: Seq[String] = commonIncludeList ++ Seq( "decimal_1_1" ) - } else { - commonIncludeList - } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index 04a6a8f8aa9a5..1f8ce04270a04 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -161,46 +161,26 @@ private[hive] object HiveShim { } def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = { - if (HiveUtils.isHive23) { - val borrowKryo = serUtilClass.getMethod("borrowKryo") - val kryo = borrowKryo.invoke(serUtilClass) - val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName, - kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]]) - try { - deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType] - } finally { - serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) - } - } else { - val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo") - val threadLocalValue = runtimeSerializationKryo.get(utilClass) - val getMethod = threadLocalValue.getClass.getMethod("get") - val kryo = getMethod.invoke(threadLocalValue) - val deserializeObjectByKryo = findMethod(utilClass, deserializeMethodName, - kryo.getClass, classOf[InputStream], classOf[Class[_]]) + val borrowKryo = serUtilClass.getMethod("borrowKryo") + val kryo = borrowKryo.invoke(serUtilClass) + val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName, + kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]]) + try { deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType] + } finally { + serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) } } def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = { - if (HiveUtils.isHive23) { - val borrowKryo = serUtilClass.getMethod("borrowKryo") - val kryo = borrowKryo.invoke(serUtilClass) - val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName, - kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream]) - try { - serializeObjectByKryo.invoke(null, kryo, function, out) - } finally { - serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) - } - } else { - val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo") - val threadLocalValue = runtimeSerializationKryo.get(utilClass) - val getMethod = threadLocalValue.getClass.getMethod("get") - val kryo = getMethod.invoke(threadLocalValue) - val serializeObjectByKryo = findMethod(utilClass, serializeMethodName, - kryo.getClass, classOf[Object], classOf[OutputStream]) + val borrowKryo = serUtilClass.getMethod("borrowKryo") + val kryo = borrowKryo.invoke(serUtilClass) + val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName, + kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream]) + try { serializeObjectByKryo.invoke(null, kryo, function, out) + } finally { + serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 62ff2db2ecb3c..7d4bf7305546c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -56,10 +56,9 @@ private[spark] object HiveUtils extends Logging { } private val hiveVersion = HiveVersionInfo.getVersion - val isHive23: Boolean = hiveVersion.startsWith("2.3") /** The version of hive used internally by Spark SQL. */ - val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1" + val builtinHiveVersion: String = hiveVersion val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 1611a3da8a3da..a78e1cebc588c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -169,9 +169,7 @@ private[hive] class HiveClientImpl( // since HIVE-11878, and ADDJarCommand will add jars to clientLoader.classLoader. // For this reason we cannot load the jars added by ADDJarCommand because of class loader // got changed. We reset it to clientLoader.ClassLoader here. - if (HiveUtils.isHive23) { - state.getConf.setClassLoader(clientLoader.classLoader) - } + state.getConf.setClassLoader(clientLoader.classLoader) SessionState.start(state) state.out = new PrintStream(outputBuffer, true, UTF_8.name()) state.err = new PrintStream(outputBuffer, true, UTF_8.name()) @@ -179,9 +177,7 @@ private[hive] class HiveClientImpl( } /** Returns the configuration for the current session. */ - def conf: HiveConf = if (!HiveUtils.isHive23) { - state.getConf - } else { + def conf: HiveConf = { val hiveConf = state.getConf // Hive changed the default of datanucleus.schema.autoCreateAll from true to false // and hive.metastore.schema.verification from false to true since Hive 2.0. @@ -293,8 +289,7 @@ private[hive] class HiveClientImpl( val ret = try { f } catch { - case e: NoClassDefFoundError - if HiveUtils.isHive23 && e.getMessage.contains("org/apache/hadoop/hive/serde2/SerDe") => + case e: NoClassDefFoundError if e.getMessage.contains("apache/hadoop/hive/serde2/SerDe") => throw new ClassNotFoundException("The SerDe interface removed since Hive 2.3(HIVE-15167)." + " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details.", e) } finally { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 462e67c4ed35c..7fccb72fb913b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -350,19 +350,11 @@ private[hive] case class HiveUDAFFunction( } val clazz = Utils.classForName(classOf[SimpleGenericUDAFParameterInfo].getName) - if (HiveUtils.isHive23) { - val ctor = clazz.getDeclaredConstructor( - classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE) - val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE) - val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo] - resolver.getEvaluator(parameterInfo) - } else { - val ctor = clazz.getDeclaredConstructor( - classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE) - val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE) - val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo] - resolver.getEvaluator(parameterInfo) - } + val ctor = clazz.getDeclaredConstructor( + classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE) + val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE) + val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo] + resolver.getEvaluator(parameterInfo) } private case class HiveEvaluator( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala index f9c514567c639..ea5c7ca15b065 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala @@ -71,21 +71,7 @@ private[orc] object OrcFilters extends Logging { } def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = { - if (HiveUtils.isHive23) { - DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]] - } else { - val dataTypeMap = schema.map(f => quoteIfNeeded(f.name) -> f.dataType).toMap - // TODO (SPARK-25557): ORC doesn't support nested predicate pushdown, so they are removed. - val newFilters = filters.filter(!_.containsNestedColumn) - // Combines all convertible filters using `And` to produce a single conjunction - val conjunctionOptional = buildTree(convertibleFilters(schema, dataTypeMap, newFilters)) - conjunctionOptional.map { conjunction => - // Then tries to build a single ORC `SearchArgument` for the conjunction predicate. - // The input predicate is fully convertible. There should not be any empty result in the - // following recursive method call `buildSearchArgument`. - buildSearchArgument(dataTypeMap, conjunction, newBuilder).build() - } - } + DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]] } def convertibleFilters( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala index a696d6aaff27b..c136c4c9790fd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala @@ -57,20 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } } - test("shaded Protobuf") { - if (HiveUtils.isHive23) { - assertLoads("com.google.protobuf.ServiceException") - } else { - assertLoads("org.apache.hive.com.google.protobuf.ServiceException") - } + test("protobuf") { + assertLoads("com.google.protobuf.ServiceException") } - test("shaded Kryo") { - if (HiveUtils.isHive23) { - assertLoads("com.esotericsoftware.kryo.Kryo") - } else { - assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo") - } + test("kryo") { + assertLoads("com.esotericsoftware.kryo.Kryo") } test("hive-common") { @@ -89,12 +81,7 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } test("parquet-hadoop-bundle") { - if (HiveUtils.isHive23) { - assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat") - assertLoads("org.apache.parquet.hadoop.ParquetInputFormat") - } else { - assertLoads("parquet.hadoop.ParquetOutputFormat") - assertLoads("parquet.hadoop.ParquetInputFormat") - } + assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat") + assertLoads("org.apache.parquet.hadoop.ParquetInputFormat") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 95e99c653d6f6..8f71ba3337aa2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -206,13 +206,8 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType)) checkAnswer(table("t"), testDF) - if (HiveUtils.isHive23) { - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.100\t1", "2.100\t2")) - } else { - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.1\t1", "2.1\t2")) - } + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.100\t1", "2.100\t2")) } } @@ -244,13 +239,8 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType)) checkAnswer(table("t"), testDF) - if (HiveUtils.isHive23) { - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.100\t1", "2.100\t2")) - } else { - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.1\t1", "2.1\t2")) - } + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.100\t1", "2.100\t2")) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala index 14d07cdf8db08..54c64a4eeb190 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala @@ -35,18 +35,10 @@ class HiveShimSuite extends SparkFunSuite { // test when READ_COLUMN_NAMES_CONF_STR is empty HiveShim.appendReadColumns(conf, ids, names) - if (HiveUtils.isHive23) { - assert(names === ColumnProjectionUtils.getReadColumnNames(conf)) - } else { - assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf)) - } + assert(names === ColumnProjectionUtils.getReadColumnNames(conf)) // test when READ_COLUMN_NAMES_CONF_STR is non-empty HiveShim.appendReadColumns(conf, moreIds, moreNames) - if (HiveUtils.isHive23) { - assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf)) - } else { - assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf)) - } + assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf)) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 1f3878ad2925d..52dd2b34a0e95 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -101,14 +101,9 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto .asInstanceOf[HiveTableRelation] val properties = relation.tableMeta.ignoredProperties - if (HiveUtils.isHive23) { - // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect. - assert(properties("totalSize").toLong == 6) - assert(properties.get("rawDataSize").isEmpty) - } else { - assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0") - assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0") - } + // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect. + assert(properties("totalSize").toLong == 6) + assert(properties.get("rawDataSize").isEmpty) val sizeInBytes = relation.stats.sizeInBytes assert(sizeInBytes === BigInt(file1.length() + file2.length())) @@ -872,25 +867,10 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") val numRows = extractStatsPropValues(describeResult, "numRows") - if (HiveUtils.isHive23) { - // Since HIVE-15653(Hive 2.3.0), Hive fixs some ALTER TABLE commands drop table stats. - assert(numRows.isDefined && numRows.get == 500) - val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") - assert(rawDataSize.isDefined && rawDataSize.get == 5312) - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) - } else { - // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not - // Spark specific statistics. This is triggered by the Hive alterTable API. - assert(numRows.isDefined && numRows.get == -1, "numRows is lost") - val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") - assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") - - if (analyzedBySpark) { - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) - } else { - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None) - } - } + assert(numRows.isDefined && numRows.get == 500) + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + assert(rawDataSize.isDefined && rawDataSize.get == 5312) + checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index bb87246acf4ca..d247f37130776 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -38,8 +38,6 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T import ScriptTransformationIOSchema._ - override def isHive23OrSpark: Boolean = HiveUtils.isHive23 - override def createScriptTransformationExec( input: Seq[Expression], script: String, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index f5cd4f9f843d8..dd797b39e0939 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -660,7 +660,6 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils { } test("SPARK-32877: add test for Hive UDF complex decimal type") { - assume(HiveUtils.isHive23) withUserDefinedFunction("testArraySum" -> false) { sql(s"CREATE FUNCTION testArraySum AS '${classOf[ArraySumUDF].getName}'") checkAnswer( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 96bca5404831d..a69a949e3a3a2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2242,7 +2242,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } test("SPARK-32889: ORC table column name supports special characters") { - assume(HiveUtils.isHive23) // " " "," is not allowed. Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => val source = "ORC" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala index 5fc41067f661d..deb85f30463ae 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala @@ -81,21 +81,13 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { (predicate: Predicate, stringExpr: String) (implicit df: DataFrame): Unit = { def checkLogicalOperator(filter: SearchArgument) = { - if (HiveUtils.isHive23) { - assert(filter.toString == stringExpr.replace("\n", ", ")) - } else { - assert(filter.toString == stringExpr) - } + assert(filter.toString == stringExpr.replace("\n", ", ")) } checkFilterPredicate(df, predicate, checkLogicalOperator) } private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = { - if (HiveUtils.isHive23) { - assertResult(expected.replace("\n", ", "))(c) - } else { - assertResult(expected)(c) - } + assertResult(expected.replace("\n", ", "))(c) } private def checkNoFilterPredicate @@ -354,13 +346,6 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df => checkNoFilterPredicate($"_1" <=> 1.b) } - // DateType - if (!HiveUtils.isHive23) { - val stringDate = "2015-01-01" - withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df => - checkNoFilterPredicate($"_1" === Date.valueOf(stringDate)) - } - } // MapType withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df => checkNoFilterPredicate($"_1".isNotNull) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 12ee5bea7c2f9..1901ed505197c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -224,7 +224,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } test("SPARK-26437 Can not query decimal type when value is 0") { - assume(HiveUtils.isHive23, "bad test: This bug fixed by HIVE-13083(Hive 2.0.1)") withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") { withTable("spark_26437") { sql("CREATE TABLE spark_26437 STORED AS ORCFILE AS SELECT 0.00 AS c1") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 91fd8a47339fc..e94e0b39c859c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -149,12 +149,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("Check BloomFilter creation") { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { - if (HiveUtils.isHive23) { - testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8) - } else { - // Before ORC-101 - testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER) - } + testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8) } } } @@ -162,7 +157,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("Enforce direct encoding column-wise selectively") { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { - testSelectiveDictionaryEncoding(isSelective = false, isHive23 = HiveUtils.isHive23) + testSelectiveDictionaryEncoding(isSelective = false, isHiveOrc = true) } } } @@ -322,7 +317,6 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } test("SPARK-31580: Read a file written before ORC-569") { - assume(HiveUtils.isHive23) // Hive 1.2 doesn't use Apache ORC // Test ORC file came from ORC-621 val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc") assert(df.where("str < 'row 001000'").count() === 1000) From 23afc930ae2fb0f3d7fd214324351fc6a0b8253a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 7 Oct 2020 13:50:15 -0700 Subject: [PATCH 0189/1009] [SPARK-26499][SQL][FOLLOWUP] Print the loading provider exception starting from the INFO level ### What changes were proposed in this pull request? 1. Don't print the exception in the error message while loading a built-in provider. 2. Print the exception starting from the INFO level. Up to the INFO level, the output is: ``` 17:48:32.342 ERROR org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Failed to load built in provider. ``` and starting from the INFO level: ``` 17:48:32.342 ERROR org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Failed to load built in provider. 17:48:32.342 INFO org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Loading of the provider failed with the exception: java.util.ServiceConfigurationError: org.apache.spark.sql.jdbc.JdbcConnectionProvider: Provider org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider could not be instantiated at java.util.ServiceLoader.fail(ServiceLoader.java:232) at java.util.ServiceLoader.access$100(ServiceLoader.java:185) at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384) at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404) at java.util.ServiceLoader$1.next(ServiceLoader.java:480) at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.loadProviders(ConnectionProvider.scala:41) ``` ### Why are the changes needed? To avoid "noise" in logs while running tests. Currently, logs are blown up: ``` org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Loading of the provider failed with the exception: java.util.ServiceConfigurationError: org.apache.spark.sql.jdbc.JdbcConnectionProvider: Provider org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider could not be instantiated at java.util.ServiceLoader.fail(ServiceLoader.java:232) at java.util.ServiceLoader.access$100(ServiceLoader.java:185) at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384) at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404) at java.util.ServiceLoader$1.next(ServiceLoader.java:480) at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.loadProviders(ConnectionProvider.scala:41) ... at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.IllegalArgumentException: Intentional Exception at org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider.(IntentionallyFaultyConnectionProvider.scala:26) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at java.lang.Class.newInstance(Class.java:442) at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running: ``` $ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalogSuite" ``` Closes #29968 from MaxGekk/gaborgsomogyi-SPARK-32001-followup. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../datasources/jdbc/connection/ConnectionProvider.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala index 546756677edce..649a0bda4236c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala @@ -39,11 +39,12 @@ private[jdbc] object ConnectionProvider extends Logging { while (iterator.hasNext) { try { val provider = iterator.next - logDebug(s"Loaded built in provider: $provider") + logDebug(s"Loaded built-in provider: $provider") providers += provider } catch { case t: Throwable => - logError(s"Failed to load built in provider.", t) + logError("Failed to load built-in provider.") + logInfo("Loading of the provider failed with the exception:", t) } } // Seems duplicate but it's needed for Scala 2.13 From 6daa2aeb0164277088396102897b2ea4426b9f1c Mon Sep 17 00:00:00 2001 From: Denis Pyshev Date: Wed, 7 Oct 2020 15:28:00 -0700 Subject: [PATCH 0190/1009] [SPARK-21708][BUILD] Migrate build to sbt 1.x ### What changes were proposed in this pull request? Migrate sbt-launcher URL to download one for sbt 1.x. Update plugins versions where required by sbt update. Change sbt version to be used to latest released at the moment, 1.3.13 Adjust build settings according to plugins and sbt changes. ### Why are the changes needed? Migration to sbt 1.x: 1. enhances dev experience in development 2. updates build plugins to bring there new features/to fix bugs in them 3. enhances build performance on sbt side 4. eases movement to Scala 3 / dotty ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? All existing tests passed, both on Jenkins and via Github Actions, also manually for Scala 2.13 profile. Closes #29286 from gemelen/feature/sbt-1.x. Authored-by: Denis Pyshev Signed-off-by: Dongjoon Hyun --- .sbtopts | 17 ++++ build/sbt-launch-lib.bash | 2 +- project/MimaBuild.scala | 17 ++-- project/MimaExcludes.scala | 30 ++++++ project/SparkBuild.scala | 96 ++++++++++++------- project/build.properties | 2 +- project/plugins.sbt | 30 ++---- .../spark/tools/GenerateMIMAIgnore.scala | 3 +- 8 files changed, 128 insertions(+), 69 deletions(-) create mode 100644 .sbtopts diff --git a/.sbtopts b/.sbtopts new file mode 100644 index 0000000000000..9afbdca6db1c7 --- /dev/null +++ b/.sbtopts @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +-J-Xmx4G +-J-Xss4m diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash index 162bfbf2257c7..423ba3b766e61 100755 --- a/build/sbt-launch-lib.bash +++ b/build/sbt-launch-lib.bash @@ -39,7 +39,7 @@ dlog () { acquire_sbt_jar () { SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` - URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar + URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar JAR=build/sbt-launch-${SBT_VERSION}.jar sbt_jar=$JAR diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 10c02103aeddb..badcdf34a2ad0 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -22,9 +22,7 @@ import com.typesafe.tools.mima.core._ import com.typesafe.tools.mima.core.MissingClassProblem import com.typesafe.tools.mima.core.MissingTypesProblem import com.typesafe.tools.mima.core.ProblemFilters._ -import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts} -import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings - +import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaFailOnNoPrevious} object MimaBuild { @@ -86,14 +84,17 @@ object MimaBuild { ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion) } - def mimaSettings(sparkHome: File, projectRef: ProjectRef) = { + def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = { val organization = "org.apache.spark" - val previousSparkVersion = "2.4.0" + val previousSparkVersion = "3.0.0" val project = projectRef.project val fullId = "spark-" + project + "_2.12" - mimaDefaultSettings ++ - Seq(mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion), - mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)) + + Seq( + mimaFailOnNoPrevious := true, + mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion), + mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value) + ) } } diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index d32d31daae8e7..98769d951b6ac 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,36 @@ object MimaExcludes { // Exclude rules for 3.1.x lazy val v31excludes = v30excludes ++ Seq( + // mima plugin update caused new incompatibilities to be detected + // core module + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"), + // mllib module + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.areaUnderROC"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"), + ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.FMClassifier.trainImpl"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.FMRegressor.trainImpl"), // [SPARK-31077] Remove ChiSqSelector dependency on mllib.ChiSqSelectorModel // private constructor ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.ChiSqSelectorModel.this"), diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 6328daec027ef..6929342d2f539 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -28,13 +28,13 @@ import scala.collection.mutable.Stack import sbt._ import sbt.Classpaths.publishTask import sbt.Keys._ -import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion import com.etsy.sbt.checkstyle.CheckstylePlugin.autoImport._ import com.simplytyped.Antlr4Plugin._ import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} import com.typesafe.tools.mima.plugin.MimaKeys import org.scalastyle.sbt.ScalastylePlugin.autoImport._ import org.scalastyle.sbt.Tasks +import sbtassembly.AssemblyPlugin.autoImport._ import spray.revolver.RevolverPlugin._ @@ -83,6 +83,8 @@ object BuildCommons { object SparkBuild extends PomBuild { import BuildCommons._ + import sbtunidoc.GenJavadocPlugin + import sbtunidoc.GenJavadocPlugin.autoImport._ import scala.collection.mutable.Map val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty @@ -106,13 +108,10 @@ object SparkBuild extends PomBuild { override val userPropertiesMap = System.getProperties.asScala.toMap lazy val MavenCompile = config("m2r") extend(Compile) - lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") + lazy val publishLocalBoth = TaskKey[Unit]("localPublish", "publish local for m2 and ivy", KeyRanks.ATask) - lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq( - libraryDependencies += compilerPlugin( - "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % unidocGenjavadocVersion.value cross CrossVersion.full), + lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = GenJavadocPlugin.projectSettings ++ Seq( scalacOptions ++= Seq( - "-P:genjavadoc:out=" + (target.value / "java"), "-P:genjavadoc:strictVisibility=true" // hide package private types ) ) @@ -157,7 +156,7 @@ object SparkBuild extends PomBuild { val scalaSourceV = Seq(file(scalaSource.in(config).value.getAbsolutePath)) val configV = (baseDirectory in ThisBuild).value / scalaStyleOnCompileConfig val configUrlV = scalastyleConfigUrl.in(config).value - val streamsV = streams.in(config).value + val streamsV = (streams.in(config).value: @sbtUnchecked) val failOnErrorV = true val failOnWarningV = false val scalastyleTargetV = scalastyleTarget.in(config).value @@ -204,7 +203,6 @@ object SparkBuild extends PomBuild { javaHome := sys.env.get("JAVA_HOME") .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() }) .map(file), - incOptions := incOptions.value.withNameHashing(true), publishMavenStyle := true, unidocGenjavadocVersion := "0.16", @@ -219,10 +217,12 @@ object SparkBuild extends PomBuild { ), externalResolvers := resolvers.value, otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value, - publishLocalConfiguration in MavenCompile := - new PublishConfiguration(None, "dotM2", packagedArtifacts.value, Seq(), ivyLoggingLevel.value), + publishLocalConfiguration in MavenCompile := PublishConfiguration() + .withResolverName("dotM2") + .withArtifacts(packagedArtifacts.value.toVector) + .withLogging(ivyLoggingLevel.value), publishMavenStyle in MavenCompile := true, - publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile, deliverLocal).value, + publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile).value, publishLocalBoth := Seq(publishLocal in MavenCompile, publishLocal).dependOn.value, javacOptions in (Compile, doc) ++= { @@ -251,6 +251,8 @@ object SparkBuild extends PomBuild { "-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath // Required for relative source links in scaladoc ), + SbtPomKeys.profiles := profiles, + // Remove certain packages from Scaladoc scalacOptions in (Compile, doc) := Seq( "-groups", @@ -273,14 +275,15 @@ object SparkBuild extends PomBuild { val out = streams.value def logProblem(l: (=> String) => Unit, f: File, p: xsbti.Problem) = { - l(f.toString + ":" + p.position.line.fold("")(_ + ":") + " " + p.message) + val jmap = new java.util.function.Function[Integer, String]() {override def apply(i: Integer): String = {i.toString}} + l(f.toString + ":" + p.position.line.map[String](jmap.apply).map(_ + ":").orElse("") + " " + p.message) l(p.position.lineContent) l("") } var failed = 0 - analysis.infos.allInfos.foreach { case (k, i) => - i.reportedProblems foreach { p => + analysis.asInstanceOf[sbt.internal.inc.Analysis].infos.allInfos.foreach { case (k, i) => + i.getReportedProblems foreach { p => val deprecation = p.message.contains("deprecated") if (!deprecation) { @@ -302,7 +305,10 @@ object SparkBuild extends PomBuild { sys.error(s"$failed fatal warnings") } analysis - } + }, + // disable Mima check for all modules, + // to be enabled in specific ones that have previous artifacts + MimaKeys.mimaFailOnNoPrevious := false ) def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = { @@ -411,7 +417,7 @@ object SparkBuild extends PomBuild { } ))(assembly) - enable(Seq(sparkShell := sparkShell in LocalProject("assembly")))(spark) + enable(Seq(sparkShell := (sparkShell in LocalProject("assembly")).value))(spark) // TODO: move this to its upstream project. override def projectDefinitions(baseDirectory: File): Seq[Project] = { @@ -485,12 +491,12 @@ object SparkParallelTestGrouping { testGrouping in Test := { val tests: Seq[TestDefinition] = (definedTests in Test).value val defaultForkOptions = ForkOptions( - bootJars = Nil, javaHome = javaHome.value, - connectInput = connectInput.value, outputStrategy = outputStrategy.value, - runJVMOptions = (javaOptions in Test).value, + bootJars = Vector.empty[java.io.File], workingDirectory = Some(baseDirectory.value), + runJVMOptions = (javaOptions in Test).value.toVector, + connectInput = connectInput.value, envVars = (envVars in Test).value ) tests.groupBy(test => testNameToTestGroup(test.name)).map { case (groupName, groupTests) => @@ -498,7 +504,7 @@ object SparkParallelTestGrouping { if (groupName == DEFAULT_TEST_GROUP) { defaultForkOptions } else { - defaultForkOptions.copy(runJVMOptions = defaultForkOptions.runJVMOptions ++ + defaultForkOptions.withRunJVMOptions(defaultForkOptions.runJVMOptions ++ Seq(s"-Djava.io.tmpdir=${baseDirectory.value}/target/tmp/$groupName")) } } @@ -512,6 +518,7 @@ object SparkParallelTestGrouping { } object Core { + import scala.sys.process.Process lazy val settings = Seq( resourceGenerators in Compile += Def.task { val buildScript = baseDirectory.value + "/../build/spark-build-info" @@ -557,6 +564,7 @@ object DockerIntegrationTests { */ object KubernetesIntegrationTests { import BuildCommons._ + import scala.sys.process.Process val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.") val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.") @@ -634,7 +642,9 @@ object ExcludedDependencies { */ object OldDeps { - lazy val project = Project("oldDeps", file("dev"), settings = oldDepsSettings) + lazy val project = Project("oldDeps", file("dev")) + .settings(oldDepsSettings) + .disablePlugins(com.typesafe.sbt.pom.PomReaderPlugin) lazy val allPreviousArtifactKeys = Def.settingDyn[Seq[Set[ModuleID]]] { SparkBuild.mimaProjects @@ -650,7 +660,10 @@ object OldDeps { } object Catalyst { - lazy val settings = antlr4Settings ++ Seq( + import com.simplytyped.Antlr4Plugin + import com.simplytyped.Antlr4Plugin.autoImport._ + + lazy val settings = Antlr4Plugin.projectSettings ++ Seq( antlr4Version in Antlr4 := SbtPomKeys.effectivePom.value.getProperties.get("antlr4.version").asInstanceOf[String], antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser"), antlr4GenListener in Antlr4 := true, @@ -660,6 +673,9 @@ object Catalyst { } object SQL { + + import sbtavro.SbtAvro.autoImport._ + lazy val settings = Seq( initialCommands in console := """ @@ -681,8 +697,10 @@ object SQL { |import sqlContext.implicits._ |import sqlContext._ """.stripMargin, - cleanupCommands in console := "sc.stop()" + cleanupCommands in console := "sc.stop()", + Test / avroGenerate := (Compile / avroGenerate).value ) + } object Hive { @@ -721,27 +739,27 @@ object Hive { object Assembly { import sbtassembly.AssemblyUtils._ - import sbtassembly.Plugin._ - import AssemblyKeys._ + import sbtassembly.AssemblyPlugin.autoImport._ val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.") - lazy val settings = assemblySettings ++ Seq( + lazy val settings = baseAssemblySettings ++ Seq( test in assembly := {}, hadoopVersion := { sys.props.get("hadoop.version") .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String]) }, - jarName in assembly := { + assemblyJarName in assembly := { + lazy val hdpVersion = hadoopVersion.value if (moduleName.value.contains("streaming-kafka-0-10-assembly") || moduleName.value.contains("streaming-kinesis-asl-assembly")) { s"${moduleName.value}-${version.value}.jar" } else { - s"${moduleName.value}-${version.value}-hadoop${hadoopVersion.value}.jar" + s"${moduleName.value}-${version.value}-hadoop${hdpVersion}.jar" } }, - jarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar", - mergeStrategy in assembly := { + assemblyJarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar", + assemblyMergeStrategy in assembly := { case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase(Locale.ROOT).matches("meta-inf.*\\.sf$") @@ -756,8 +774,7 @@ object Assembly { } object PySparkAssembly { - import sbtassembly.Plugin._ - import AssemblyKeys._ + import sbtassembly.AssemblyPlugin.autoImport._ import java.util.zip.{ZipOutputStream, ZipEntry} lazy val settings = Seq( @@ -807,8 +824,13 @@ object PySparkAssembly { object Unidoc { import BuildCommons._ - import sbtunidoc.Plugin._ - import UnidocKeys._ + import sbtunidoc.BaseUnidocPlugin + import sbtunidoc.JavaUnidocPlugin + import sbtunidoc.ScalaUnidocPlugin + import sbtunidoc.BaseUnidocPlugin.autoImport._ + import sbtunidoc.GenJavadocPlugin.autoImport._ + import sbtunidoc.JavaUnidocPlugin.autoImport._ + import sbtunidoc.ScalaUnidocPlugin.autoImport._ private def ignoreUndocumentedPackages(packages: Seq[Seq[File]]): Seq[Seq[File]] = { packages @@ -838,6 +860,7 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hive"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/v2/avro"))) + .map(_.filterNot(_.getCanonicalPath.contains("SSLOptions"))) } private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = { @@ -848,7 +871,10 @@ object Unidoc { val unidocSourceBase = settingKey[String]("Base URL of source links in Scaladoc.") - lazy val settings = scalaJavaUnidocSettings ++ Seq ( + lazy val settings = BaseUnidocPlugin.projectSettings ++ + ScalaUnidocPlugin.projectSettings ++ + JavaUnidocPlugin.projectSettings ++ + Seq ( publish := {}, unidocProjectFilter in(ScalaUnidoc, unidoc) := diff --git a/project/build.properties b/project/build.properties index 23aa187fb35a7..b1e5e313d853f 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=0.13.18 +sbt.version=1.3.13 diff --git a/project/plugins.sbt b/project/plugins.sbt index 5f21d8126e48a..da466da9945c1 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -23,8 +23,7 @@ libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.25" // checkstyle uses guava 23.0. libraryDependencies += "com.google.guava" % "guava" % "23.0" -// need to make changes to uptake sbt 1.0 support in "com.eed3si9n" % "sbt-assembly" % "1.14.5" -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4") @@ -32,19 +31,12 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") -// SPARK-29560 Only sbt-mima-plugin needs this repo -resolvers += Resolver.url("bintray", - new java.net.URL("https://dl.bintray.com/typesafe/sbt-plugins"))(Resolver.defaultIvyPatterns) -addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.3.0") +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0") -// sbt 1.0.0 support: https://github.com/AlpineNow/junit_xml_listener/issues/6 -addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.1") +addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3") -// need to make changes to uptake sbt 1.0 support in "com.eed3si9n" % "sbt-unidoc" % "0.4.1" -addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.3") - -// need to make changes to uptake sbt 1.0 support in "com.cavorite" % "sbt-avro-1-7" % "1.1.2" -addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") +addSbtPlugin("com.cavorite" % "sbt-avro" % "2.1.1") +libraryDependencies += "org.apache.avro" % "avro-compiler" % "1.8.2" addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1") @@ -52,14 +44,6 @@ libraryDependencies += "org.ow2.asm" % "asm" % "7.2" libraryDependencies += "org.ow2.asm" % "asm-commons" % "7.2" -// sbt 1.0.0 support: https://github.com/ihji/sbt-antlr4/issues/14 -addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.13") - -// Spark uses a custom fork of the sbt-pom-reader plugin which contains a patch to fix issues -// related to test-jar dependencies (https://github.com/sbt/sbt-pom-reader/pull/14). The source for -// this fork is published at https://github.com/JoshRosen/sbt-pom-reader/tree/v1.0.0-spark -// and corresponds to commit b160317fcb0b9d1009635a7c5aa05d0f3be61936 in that repository. -// In the long run, we should try to merge our patch upstream and switch to an upstream version of -// the plugin; this is tracked at SPARK-14401. +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2") -addSbtPlugin("org.spark-project" % "sbt-pom-reader" % "1.0.0-spark") +addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0") diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index f9bc499961ad7..a6fee8616df11 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -24,6 +24,7 @@ import scala.reflect.runtime.universe.runtimeMirror import scala.util.Try import org.clapper.classutil.ClassFinder +import org.objectweb.asm.Opcodes /** * A tool for generating classes to be excluded during binary checking with MIMA. It is expected @@ -146,7 +147,7 @@ object GenerateMIMAIgnore { * and subpackages both from directories and jars present on the classpath. */ private def getClasses(packageName: String): Set[String] = { - val finder = ClassFinder() + val finder = ClassFinder(maybeOverrideAsmVersion = Some(Opcodes.ASM7)) finder .getClasses .map(_.name) From 37e1b0c4a5e999ba420cc6eacb2f5a7100fef029 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 8 Oct 2020 10:32:30 +0900 Subject: [PATCH 0191/1009] [SPARK-33086][PYTHON] Add static annotations for pyspark.resource ### What changes were proposed in this pull request? This PR replaces dynamically generated annotations for following modules: - `pyspark.resource.information` - `pyspark.resource.profile` - `pyspark.resource.requests` ### Why are the changes needed? These modules where not manually annotated in `pyspark-stubs`, but are part of the public API and we should provide more precise annotations. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? MyPy tests: ``` mypy --no-incremental --config python/mypy.ini python/pyspark ``` Closes #29969 from zero323/SPARK-32714-FOLLOW-UP-RESOURCE. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/resource/information.pyi | 8 ++-- python/pyspark/resource/profile.pyi | 31 ++++++++----- python/pyspark/resource/requests.pyi | 60 +++++++++++++++---------- 3 files changed, 60 insertions(+), 39 deletions(-) diff --git a/python/pyspark/resource/information.pyi b/python/pyspark/resource/information.pyi index 7baa6ca8520bd..5474afa659746 100644 --- a/python/pyspark/resource/information.pyi +++ b/python/pyspark/resource/information.pyi @@ -16,11 +16,11 @@ # specific language governing permissions and limitations # under the License. -from typing import Any +from typing import List class ResourceInformation: - def __init__(self, name: Any, addresses: Any) -> None: ... + def __init__(self, name: str, addresses: List[str]) -> None: ... @property - def name(self): ... + def name(self) -> str: ... @property - def addresses(self): ... + def addresses(self) -> List[str]: ... diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi index 8ce7d93b29e93..d6ea340bb510f 100644 --- a/python/pyspark/resource/profile.pyi +++ b/python/pyspark/resource/profile.pyi @@ -22,30 +22,39 @@ from pyspark.resource.requests import ( # noqa: F401 TaskResourceRequest as TaskResourceRequest, TaskResourceRequests as TaskResourceRequests, ) -from typing import Any, Optional +from typing import overload, Dict, Optional, Union +from py4j.java_gateway import JavaObject # type: ignore[import] class ResourceProfile: + @overload def __init__( self, - _java_resource_profile: Optional[Any] = ..., - _exec_req: Any = ..., - _task_req: Any = ..., + _java_resource_profile: JavaObject, + ) -> None: ... + @overload + def __init__( + self, + _java_resource_profile: None = ..., + _exec_req: Dict[str, ExecutorResourceRequest] = ..., + _task_req: Dict[str, TaskResourceRequest] = ..., ) -> None: ... @property - def id(self): ... + def id(self) -> int: ... @property - def taskResources(self): ... + def taskResources(self) -> Dict[str, TaskResourceRequest]: ... @property - def executorResources(self): ... + def executorResources(self) -> Dict[str, ExecutorResourceRequest]: ... class ResourceProfileBuilder: def __init__(self) -> None: ... - def require(self, resourceRequest: Any): ... + def require( + self, resourceRequest: Union[ExecutorResourceRequest, TaskResourceRequests] + ): ... def clearExecutorResourceRequests(self) -> None: ... def clearTaskResourceRequests(self) -> None: ... @property - def taskResources(self): ... + def taskResources(self) -> Dict[str, TaskResourceRequest]: ... @property - def executorResources(self): ... + def executorResources(self) -> Dict[str, ExecutorResourceRequest]: ... @property - def build(self): ... + def build(self) -> ResourceProfile: ... diff --git a/python/pyspark/resource/requests.pyi b/python/pyspark/resource/requests.pyi index f9448d0780409..6ba14d65eb516 100644 --- a/python/pyspark/resource/requests.pyi +++ b/python/pyspark/resource/requests.pyi @@ -16,56 +16,68 @@ # specific language governing permissions and limitations # under the License. -from typing import Any, Optional +from typing import overload, Dict, Optional + +from py4j.java_gateway import JVMView # type: ignore[import] class ExecutorResourceRequest: def __init__( self, - resourceName: Any, - amount: Any, + resourceName: str, + amount: int, discoveryScript: str = ..., vendor: str = ..., ) -> None: ... @property - def resourceName(self): ... + def resourceName(self) -> str: ... @property - def amount(self): ... + def amount(self) -> int: ... @property - def discoveryScript(self): ... + def discoveryScript(self) -> str: ... @property - def vendor(self): ... + def vendor(self) -> str: ... class ExecutorResourceRequests: + @overload + def __init__(self, _jvm: JVMView) -> None: ... + @overload def __init__( - self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ... + self, + _jvm: None = ..., + _requests: Optional[Dict[str, ExecutorResourceRequest]] = ..., ) -> None: ... - def memory(self, amount: Any): ... - def memoryOverhead(self, amount: Any): ... - def pysparkMemory(self, amount: Any): ... - def offheapMemory(self, amount: Any): ... - def cores(self, amount: Any): ... + def memory(self, amount: str) -> ExecutorResourceRequests: ... + def memoryOverhead(self, amount: str) -> ExecutorResourceRequests: ... + def pysparkMemory(self, amount: str) -> ExecutorResourceRequests: ... + def offheapMemory(self, amount: str) -> ExecutorResourceRequests: ... + def cores(self, amount: int) -> ExecutorResourceRequests: ... def resource( self, - resourceName: Any, - amount: Any, + resourceName: str, + amount: int, discoveryScript: str = ..., vendor: str = ..., - ): ... + ) -> ExecutorResourceRequests: ... @property - def requests(self): ... + def requests(self) -> Dict[str, ExecutorResourceRequest]: ... class TaskResourceRequest: - def __init__(self, resourceName: Any, amount: Any) -> None: ... + def __init__(self, resourceName: str, amount: float) -> None: ... @property - def resourceName(self): ... + def resourceName(self) -> str: ... @property - def amount(self): ... + def amount(self) -> float: ... class TaskResourceRequests: + @overload + def __init__(self, _jvm: JVMView) -> None: ... + @overload def __init__( - self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ... + self, + _jvm: None = ..., + _requests: Optional[Dict[str, TaskResourceRequest]] = ..., ) -> None: ... - def cpus(self, amount: Any): ... - def resource(self, resourceName: Any, amount: Any): ... + def cpus(self, amount: int) -> TaskResourceRequests: ... + def resource(self, resourceName: str, amount: float) -> TaskResourceRequests: ... @property - def requests(self): ... + def requests(self) -> Dict[str, TaskResourceRequest]: ... From 473b3ba6aa3ead60c6f3d66c982b7883e39b7ad2 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 8 Oct 2020 10:37:42 +0900 Subject: [PATCH 0192/1009] [SPARK-32511][FOLLOW-UP][SQL][R][PYTHON] Add dropFields to SparkR and PySpark ### What changes were proposed in this pull request? This PR adds `dropFields` method to: - PySpark `Column` - SparkR `Column` ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? No, new API. ### How was this patch tested? - New unit tests. - Manual verification of examples / doctests. - Manual run of MyPy tests Closes #29967 from zero323/SPARK-32511-FOLLOW-UP-PYSPARK-SPARKR. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 1 + R/pkg/R/column.R | 69 +++++++++++++++++++++++++ R/pkg/R/generics.R | 3 ++ R/pkg/tests/fulltests/test_sparkSQL.R | 19 ++++++- python/pyspark/sql/column.py | 51 ++++++++++++++++++ python/pyspark/sql/column.pyi | 1 + python/pyspark/sql/tests/test_column.py | 22 ++++++++ 7 files changed, 165 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 25162f3e23b38..2fadf20da491c 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -272,6 +272,7 @@ exportMethods("%<=>%", "degrees", "dense_rank", "desc", + "dropFields", "element_at", "encode", "endsWith", diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 36d792c647e52..c5fcfaff94029 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -387,3 +387,72 @@ setMethod("withField", jc <- callJMethod(x@jc, "withField", fieldName, col@jc) column(jc) }) + +#' dropFields +#' +#' Drops fields in a struct \code{Column} by name. +#' +#' @param x a Column +#' @param ... names of the fields to be dropped. +#' +#' @rdname dropFields +#' @aliases dropFields dropFields,Column-method +#' @examples +#' \dontrun{ +#' df <- select( +#' createDataFrame(iris), +#' alias( +#' struct( +#' column("Sepal_Width"), column("Sepal_Length"), +#' alias( +#' struct( +#' column("Petal_Width"), column("Petal_Length"), +#' alias( +#' column("Petal_Width") * column("Petal_Length"), +#' "Petal_Product" +#' ) +#' ), +#' "Petal" +#' ) +#' ), +#' "dimensions" +#' ) +#' ) +#' head(withColumn(df, "dimensions", dropFields(df$dimensions, "Petal"))) +#' +#' head( +#' withColumn( +#' df, "dimensions", +#' dropFields(df$dimensions, "Sepal_Width", "Sepal_Length") +#' ) +#' ) +#' +#' # This method supports dropping multiple nested fields directly e.g. +#' head( +#' withColumn( +#' df, "dimensions", +#' dropFields(df$dimensions, "Petal.Petal_Width", "Petal.Petal_Length") +#' ) +#' ) +#' +#' # However, if you are going to add/replace multiple nested fields, +#' # it is preffered to extract out the nested struct before +#' # adding/replacing multiple fields e.g. +#' head( +#' withColumn( +#' df, "dimensions", +#' withField( +#' column("dimensions"), +#' "Petal", +#' dropFields(column("dimensions.Petal"), "Petal_Width", "Petal_Length") +#' ) +#' ) +#' ) +#' } +#' @note dropFields since 3.1.0 +setMethod("dropFields", + signature(x = "Column"), + function(x, ...) { + jc <- callJMethod(x@jc, "dropFields", list(...)) + column(jc) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 993fc758adbe5..b9cf0261adc28 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -732,6 +732,9 @@ setGeneric("%<=>%", function(x, value) { standardGeneric("%<=>%") }) #' @rdname withField setGeneric("withField", function(x, fieldName, col) { standardGeneric("withField") }) +#' @rdname dropFields +setGeneric("dropFields", function(x, ...) { standardGeneric("dropFields") }) + ###################### WindowSpec Methods ########################## #' @rdname partitionBy diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c3b271b1205c5..2ac3093e77ea8 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1809,7 +1809,7 @@ test_that("column functions", { expect_equal(actual, expected) # Test withField - lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24}}") + lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24, \"height\": 170}}") jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(lines, jsonPath) df <- read.df(jsonPath, "json") @@ -1820,6 +1820,23 @@ test_that("column functions", { ) ) expect_equal(result, data.frame(dummy = 42)) + + # Test dropFields + expect_setequal( + colnames(select( + withColumn(df, "Person", dropFields(df$Person, "age")), + column("Person.*") + )), + c("name", "height") + ) + + expect_equal( + colnames(select( + withColumn(df, "Person", dropFields(df$Person, "height", "name")), + column("Person.*") + )), + "age" + ) }) test_that("column binary mathfunctions", { diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 0e073d2a5da28..3cf7a033641d8 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -358,6 +358,57 @@ def withField(self, fieldName, col): return Column(self._jc.withField(fieldName, col._jc)) + @since(3.1) + def dropFields(self, *fieldNames): + """ + An expression that drops fields in :class:`StructType` by name. + + >>> from pyspark.sql import Row + >>> from pyspark.sql.functions import col, lit + >>> df = spark.createDataFrame([ + ... Row(a=Row(b=1, c=2, d=3, e=Row(f=4, g=5, h=6)))]) + >>> df.withColumn('a', df['a'].dropFields('b')).show() + +-----------------+ + | a| + +-----------------+ + |{2, 3, {4, 5, 6}}| + +-----------------+ + + >>> df.withColumn('a', df['a'].dropFields('b', 'c')).show() + +--------------+ + | a| + +--------------+ + |{3, {4, 5, 6}}| + +--------------+ + + This method supports dropping multiple nested fields directly e.g. + + >>> df.withColumn("a", col("a").dropFields("e.g", "e.h")).show() + +--------------+ + | a| + +--------------+ + |{1, 2, 3, {4}}| + +--------------+ + + However, if you are going to add/replace multiple nested fields, + it is preffered to extract out the nested struct before + adding/replacing multiple fields e.g. + + >>> df.select(col("a").withField( + ... "e", col("a.e").dropFields("g", "h")).alias("a") + ... ).show() + +--------------+ + | a| + +--------------+ + |{1, 2, 3, {4}}| + +--------------+ + + """ + sc = SparkContext._active_spark_context + + jc = self._jc.dropFields(_to_seq(sc, fieldNames)) + return Column(jc) + def __getattr__(self, item): if item.startswith("__"): raise AttributeError(item) diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi index 261fb6e5f3911..0fbb10053fdbf 100644 --- a/python/pyspark/sql/column.pyi +++ b/python/pyspark/sql/column.pyi @@ -80,6 +80,7 @@ class Column: def getItem(self, key: Any) -> Column: ... def getField(self, name: Any) -> Column: ... def withField(self, fieldName: str, col: Column) -> Column: ... + def dropFields(self, *fieldNames: str) -> Column: ... def __getattr__(self, item: Any) -> Column: ... def __iter__(self) -> None: ... def rlike(self, item: str) -> Column: ... diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 7e03e2ef3e6d0..4b4ac3bf9cd6c 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -156,6 +156,28 @@ def test_with_field(self): 'fieldName should be a string', lambda: df['a'].withField(col('b'), lit(3))) + def test_drop_fields(self): + df = self.spark.createDataFrame([Row(a=Row(b=1, c=2, d=Row(e=3, f=4)))]) + self.assertIsInstance(df["a"].dropFields("b"), Column) + self.assertIsInstance(df["a"].dropFields("b", "c"), Column) + self.assertIsInstance(df["a"].dropFields("d.e"), Column) + + result = df.select( + df["a"].dropFields("b").alias("a1"), + df["a"].dropFields("d.e").alias("a2"), + ).first().asDict(True) + + self.assertTrue( + "b" not in result["a1"] and + "c" in result["a1"] and + "d" in result["a1"] + ) + + self.assertTrue( + "e" not in result["a2"]["d"] and + "f" in result["a2"]["d"] + ) + if __name__ == "__main__": import unittest from pyspark.sql.tests.test_column import * # noqa: F401 From 39510b0e9b79ca59c073bed2219d35d4b81fb7f1 Mon Sep 17 00:00:00 2001 From: Karen Feng Date: Thu, 8 Oct 2020 12:05:39 +0900 Subject: [PATCH 0193/1009] [SPARK-32793][SQL] Add raise_error function, adds error message parameter to assert_true ## What changes were proposed in this pull request? Adds a SQL function `raise_error` which underlies the refactored `assert_true` function. `assert_true` now also (optionally) accepts a custom error message field. `raise_error` is exposed in SQL, Python, Scala, and R. `assert_true` was previously only exposed in SQL; it is now also exposed in Python, Scala, and R. ### Why are the changes needed? Improves usability of `assert_true` by clarifying error messaging, and adds the useful helper function `raise_error`. ### Does this PR introduce _any_ user-facing change? Yes: - Adds `raise_error` function to the SQL, Python, Scala, and R APIs. - Adds `assert_true` function to the SQL, Python and R APIs. ### How was this patch tested? Adds unit tests in SQL, Python, Scala, and R for `assert_true` and `raise_error`. Closes #29947 from karenfeng/spark-32793. Lead-authored-by: Karen Feng Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- R/pkg/R/functions.R | 49 +++++++++++ R/pkg/R/generics.R | 8 ++ R/pkg/tests/fulltests/test_sparkSQL.R | 18 ++++ python/docs/source/reference/pyspark.sql.rst | 2 + python/pyspark/sql/functions.py | 55 +++++++++++- python/pyspark/sql/functions.pyi | 2 + python/pyspark/sql/tests/test_functions.py | 50 +++++++++++ .../catalyst/analysis/FunctionRegistry.scala | 1 + .../spark/sql/catalyst/expressions/misc.scala | 84 +++++++++++++------ .../expressions/CodeGenerationSuite.scala | 2 +- .../expressions/ExpressionEvalHelper.scala | 6 +- .../expressions/MiscExpressionsSuite.scala | 30 +++---- .../org/apache/spark/sql/functions.scala | 30 +++++++ .../sql-functions/sql-expression-schema.md | 5 +- .../sql-tests/inputs/misc-functions.sql | 12 +++ .../sql-tests/results/misc-functions.sql.out | 81 +++++++++++++++++- .../spark/sql/ColumnExpressionSuite.scala | 51 +++++++++++ .../sql/expressions/ExpressionInfoSuite.scala | 17 +++- 18 files changed, 450 insertions(+), 53 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 959edf29e2429..ce384a64bccaf 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -826,6 +826,55 @@ setMethod("xxhash64", column(jc) }) +#' @details +#' \code{assert_true}: Returns null if the input column is true; throws an exception +#' with the provided error message otherwise. +#' +#' @param errMsg (optional) The error message to be thrown. +#' +#' @rdname column_misc_functions +#' @aliases assert_true assert_true,Column-method +#' @examples +#' \dontrun{ +#' tmp <- mutate(df, v1 = assert_true(df$vs < 2), +#' v2 = assert_true(df$vs < 2, "custom error message"), +#' v3 = assert_true(df$vs < 2, df$vs)) +#' head(tmp)} +#' @note assert_true since 3.1.0 +setMethod("assert_true", + signature(x = "Column"), + function(x, errMsg = NULL) { + jc <- if (is.null(errMsg)) { + callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc) + } else { + if (is.character(errMsg) && length(errMsg) == 1) { + errMsg <- lit(errMsg) + } + callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc, errMsg@jc) + } + column(jc) + }) + +#' @details +#' \code{raise_error}: Throws an exception with the provided error message. +#' +#' @rdname column_misc_functions +#' @aliases raise_error raise_error,characterOrColumn-method +#' @examples +#' \dontrun{ +#' tmp <- mutate(df, v1 = raise_error("error message")) +#' head(tmp)} +#' @note raise_error since 3.1.0 +setMethod("raise_error", + signature(x = "characterOrColumn"), + function(x) { + if (is.character(x) && length(x) == 1) { + x <- lit(x) + } + jc <- callJStatic("org.apache.spark.sql.functions", "raise_error", x@jc) + column(jc) + }) + #' @details #' \code{dayofmonth}: Extracts the day of the month as an integer from a #' given date/timestamp/string. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index b9cf0261adc28..6b732e594cd3f 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -850,6 +850,10 @@ setGeneric("arrays_zip_with", function(x, y, f) { standardGeneric("arrays_zip_wi #' @name NULL setGeneric("ascii", function(x) { standardGeneric("ascii") }) +#' @rdname column_misc_functions +#' @name NULL +setGeneric("assert_true", function(x, errMsg = NULL) { standardGeneric("assert_true") }) + #' @param x Column to compute on or a GroupedData object. #' @param ... additional argument(s) when \code{x} is a GroupedData object. #' @rdname avg @@ -1223,6 +1227,10 @@ setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer") #' @name NULL setGeneric("quarter", function(x) { standardGeneric("quarter") }) +#' @rdname column_misc_functions +#' @name NULL +setGeneric("raise_error", function(x) { standardGeneric("raise_error") }) + #' @rdname column_nonaggregate_functions #' @name NULL setGeneric("rand", function(seed) { standardGeneric("rand") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 2ac3093e77ea8..268f5734813ba 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -3945,6 +3945,24 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", { dropTempView("cars") }) +test_that("assert_true, raise_error", { + df <- read.json(jsonPath) + filtered <- filter(df, "age < 20") + + expect_equal(collect(select(filtered, assert_true(filtered$age < 20)))$age, c(NULL)) + expect_equal(collect(select(filtered, assert_true(filtered$age < 20, "error message")))$age, + c(NULL)) + expect_equal(collect(select(filtered, assert_true(filtered$age < 20, filtered$name)))$age, + c(NULL)) + expect_error(collect(select(df, assert_true(df$age < 20))), "is not true!") + expect_error(collect(select(df, assert_true(df$age < 20, "error message"))), + "error message") + expect_error(collect(select(df, assert_true(df$age < 20, df$name))), "Michael") + + expect_error(collect(select(filtered, raise_error("error message"))), "error message") + expect_error(collect(select(filtered, raise_error(filtered$name))), "Justin") +}) + compare_list <- function(list1, list2) { # get testthat to show the diff by first making the 2 lists equal in length expect_equal(length(list1), length(list2)) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index 692d098c89cdc..0ed2f1b86ada5 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -292,6 +292,7 @@ Functions asc_nulls_last ascii asin + assert_true atan atan2 avg @@ -420,6 +421,7 @@ Functions pow quarter radians + raise_error rand randn rank diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 7007d505d048d..97146fdb804ab 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1592,6 +1592,57 @@ def xxhash64(*cols): return Column(jc) +@since(3.1) +def assert_true(col, errMsg=None): + """ + Returns null if the input column is true; throws an exception with the provided error message + otherwise. + + >>> df = spark.createDataFrame([(0,1)], ['a', 'b']) + >>> df.select(assert_true(df.a < df.b).alias('r')).collect() + [Row(r=None)] + >>> df = spark.createDataFrame([(0,1)], ['a', 'b']) + >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect() + [Row(r=None)] + >>> df = spark.createDataFrame([(0,1)], ['a', 'b']) + >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect() + [Row(r=None)] + """ + sc = SparkContext._active_spark_context + if errMsg is None: + return Column(sc._jvm.functions.assert_true(_to_java_column(col))) + if not isinstance(errMsg, (str, Column)): + raise TypeError( + "errMsg should be a Column or a str, got {}".format(type(errMsg)) + ) + + errMsg = ( + _create_column_from_literal(errMsg) + if isinstance(errMsg, str) + else _to_java_column(errMsg) + ) + return Column(sc._jvm.functions.assert_true(_to_java_column(col), errMsg)) + + +@since(3.1) +def raise_error(errMsg): + """ + Throws an exception with the provided error message. + """ + if not isinstance(errMsg, (str, Column)): + raise TypeError( + "errMsg should be a Column or a str, got {}".format(type(errMsg)) + ) + + sc = SparkContext._active_spark_context + errMsg = ( + _create_column_from_literal(errMsg) + if isinstance(errMsg, str) + else _to_java_column(errMsg) + ) + return Column(sc._jvm.functions.raise_error(errMsg)) + + # ---------------------- String/Binary functions ------------------------------ _string_functions = { @@ -3448,14 +3499,14 @@ def bucket(numBuckets, col): ... ).createOrReplace() .. warning:: - This function can be used only in combinatiion with + This function can be used only in combination with :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` method of the `DataFrameWriterV2`. """ if not isinstance(numBuckets, (int, Column)): raise TypeError( - "numBuckets should be a Column or and int, got {}".format(type(numBuckets)) + "numBuckets should be a Column or an int, got {}".format(type(numBuckets)) ) sc = SparkContext._active_spark_context diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 8efe65205315e..6249bca5cef68 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -137,6 +137,8 @@ def sha1(col: ColumnOrName) -> Column: ... def sha2(col: ColumnOrName, numBits: int) -> Column: ... def hash(*cols: ColumnOrName) -> Column: ... def xxhash64(*cols: ColumnOrName) -> Column: ... +def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...): ... +def raise_error(errMsg: Union[Column, str]): ... def concat(*cols: ColumnOrName) -> Column: ... def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ... def decode(col: ColumnOrName, charset: str) -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 8d05ed28b8d4e..26d260fe77b0c 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -19,6 +19,7 @@ from itertools import chain import re +from py4j.protocol import Py4JJavaError from pyspark.sql import Row, Window from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, lit from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -524,6 +525,55 @@ def test_datetime_functions(self): parse_result = df.select(functions.to_date(functions.col("dateCol"))).first() self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)']) + def test_assert_true(self): + from pyspark.sql.functions import assert_true + + df = self.spark.range(3) + + self.assertEquals( + df.select(assert_true(df.id < 3)).toDF("val").collect(), + [Row(val=None), Row(val=None), Row(val=None)], + ) + + with self.assertRaises(Py4JJavaError) as cm: + df.select(assert_true(df.id < 2, 'too big')).toDF("val").collect() + self.assertIn("java.lang.RuntimeException", str(cm.exception)) + self.assertIn("too big", str(cm.exception)) + + with self.assertRaises(Py4JJavaError) as cm: + df.select(assert_true(df.id < 2, df.id * 1e6)).toDF("val").collect() + self.assertIn("java.lang.RuntimeException", str(cm.exception)) + self.assertIn("2000000", str(cm.exception)) + + with self.assertRaises(TypeError) as cm: + df.select(assert_true(df.id < 2, 5)) + self.assertEquals( + "errMsg should be a Column or a str, got ", + str(cm.exception) + ) + + def test_raise_error(self): + from pyspark.sql.functions import raise_error + + df = self.spark.createDataFrame([Row(id="foobar")]) + + with self.assertRaises(Py4JJavaError) as cm: + df.select(raise_error(df.id)).collect() + self.assertIn("java.lang.RuntimeException", str(cm.exception)) + self.assertIn("foobar", str(cm.exception)) + + with self.assertRaises(Py4JJavaError) as cm: + df.select(raise_error("barfoo")).collect() + self.assertIn("java.lang.RuntimeException", str(cm.exception)) + self.assertIn("barfoo", str(cm.exception)) + + with self.assertRaises(TypeError) as cm: + df.select(raise_error(None)) + self.assertEquals( + "errMsg should be a Column or a str, got ", + str(cm.exception) + ) + if __name__ == "__main__": import unittest diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3fae34cbf00c2..508239077a70e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -479,6 +479,7 @@ object FunctionRegistry { // misc functions expression[AssertTrue]("assert_true"), + expression[RaiseError]("raise_error"), expression[Crc32]("crc32"), expression[Md5]("md5"), expression[Uuid]("uuid"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 1eec26c8e987a..4e71c8c103889 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -53,51 +53,81 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { } /** - * A function throws an exception if 'condition' is not true. + * Throw with the result of an expression (used for debugging). */ @ExpressionDescription( - usage = "_FUNC_(expr) - Throws an exception if `expr` is not true.", + usage = "_FUNC_(expr) - Throws an exception with `expr`.", examples = """ Examples: - > SELECT _FUNC_(0 < 1); - NULL + > SELECT _FUNC_('custom error message'); + java.lang.RuntimeException + custom error message """, - since = "2.0.0") -case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + since = "3.1.0") +case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + override def foldable: Boolean = false override def nullable: Boolean = true - - override def inputTypes: Seq[DataType] = Seq(BooleanType) - override def dataType: DataType = NullType + override def inputTypes: Seq[AbstractDataType] = Seq(StringType) - override def prettyName: String = "assert_true" + override def prettyName: String = "raise_error" - private val errMsg = s"'${child.simpleString(SQLConf.get.maxToStringFields)}' is not true!" - - override def eval(input: InternalRow) : Any = { - val v = child.eval(input) - if (v == null || java.lang.Boolean.FALSE.equals(v)) { - throw new RuntimeException(errMsg) - } else { - null + override def eval(input: InternalRow): Any = { + val value = child.eval(input) + if (value == null) { + throw new RuntimeException() } + throw new RuntimeException(value.toString) } + // if (true) is to avoid codegen compilation exception that statement is unreachable override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) + ExprCode( + code = code"""${eval.code} + |if (true) { + | if (${eval.isNull}) { + | throw new RuntimeException(); + | } + | throw new RuntimeException(${eval.value}.toString()); + |}""".stripMargin, + isNull = TrueLiteral, + value = JavaCode.defaultLiteral(dataType) + ) + } +} - // Use unnamed reference that doesn't create a local field here to reduce the number of fields - // because errMsgField is used only when the value is null or false. - val errMsgField = ctx.addReferenceObj("errMsg", errMsg) - ExprCode(code = code"""${eval.code} - |if (${eval.isNull} || !${eval.value}) { - | throw new RuntimeException($errMsgField); - |}""".stripMargin, isNull = TrueLiteral, - value = JavaCode.defaultLiteral(dataType)) +/** + * A function that throws an exception if 'condition' is not true. + */ +@ExpressionDescription( + usage = "_FUNC_(expr) - Throws an exception if `expr` is not true.", + examples = """ + Examples: + > SELECT _FUNC_(0 < 1); + NULL + """, + since = "2.0.0") +case class AssertTrue(left: Expression, right: Expression, child: Expression) + extends RuntimeReplaceable { + + override def prettyName: String = "assert_true" + + def this(left: Expression, right: Expression) = { + this(left, right, If(left, Literal(null), RaiseError(right))) } - override def sql: String = s"assert_true(${child.sql})" + def this(left: Expression) = { + this(left, Literal(s"'${left.simpleString(SQLConf.get.maxToStringFields)}' is not true!")) + } + + override def flatArguments: Iterator[Any] = Iterator(left, right) + override def exprsReplaced: Seq[Expression] = Seq(left, right) +} + +object AssertTrue { + def apply(left: Expression): AssertTrue = new AssertTrue(left) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index f1de63adc3d9a..adaabfe4d32bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -332,7 +332,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { } test("SPARK-17160: field names are properly escaped by AssertTrue") { - GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)) :: Nil) + GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)).child :: Nil) } test("should not apply common subexpression elimination on conditional expressions") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 341b26ddf6575..d0b0d04d1f719 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -164,7 +164,11 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB val errMsg = intercept[T] { eval }.getMessage - if (!errMsg.contains(expectedErrMsg)) { + if (errMsg == null) { + if (expectedErrMsg != null) { + fail(s"Expected null error message, but `$errMsg` found") + } + } else if (!errMsg.contains(expectedErrMsg)) { fail(s"Expected error message is `$expectedErrMsg`, but `$errMsg` found") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala index 4b2d153a28cc8..d42081024c1dd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala @@ -26,21 +26,21 @@ import org.apache.spark.sql.types._ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { - test("assert_true") { - intercept[RuntimeException] { - checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null) - } - intercept[RuntimeException] { - checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null) - } - intercept[RuntimeException] { - checkEvaluation(AssertTrue(Literal.create(null, NullType)), null) - } - intercept[RuntimeException] { - checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null) - } - checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null) - checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null) + test("RaiseError") { + checkExceptionInExpression[RuntimeException]( + RaiseError(Literal("error message")), + EmptyRow, + "error message" + ) + + checkExceptionInExpression[RuntimeException]( + RaiseError(Literal.create(null, StringType)), + EmptyRow, + null + ) + + // Expects a string + assert(RaiseError(Literal(5)).checkInputDataTypes().isFailure) } test("uuid") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 2c545fe762b6d..2efe5aae09709 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2318,6 +2318,36 @@ object functions { new XxHash64(cols.map(_.expr)) } + /** + * Returns null if the condition is true, and throws an exception otherwise. + * + * @group misc_funcs + * @since 3.1.0 + */ + def assert_true(c: Column): Column = withExpr { + new AssertTrue(c.expr) + } + + /** + * Returns null if the condition is true; throws an exception with the error message otherwise. + * + * @group misc_funcs + * @since 3.1.0 + */ + def assert_true(c: Column, e: Column): Column = withExpr { + new AssertTrue(c.expr, e.expr) + } + + /** + * Throws an exception with the provided error message. + * + * @group misc_funcs + * @since 3.1.0 + */ + def raise_error(c: Column): Column = withExpr { + RaiseError(c.expr) + } + ////////////////////////////////////////////////////////////////////////////////////////////// // String functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 473204c182a69..1675fb1cc7c62 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 340 + - Number of queries: 341 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -34,7 +34,7 @@ | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | @@ -210,6 +210,7 @@ | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | +| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql index 95f71925e9294..907ff33000d8e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -8,3 +8,15 @@ select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); select typeof(x'ABCD'), typeof('SPARK'); select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); + +-- Spark-32793: Rewrite AssertTrue with RaiseError +SELECT assert_true(true), assert_true(boolean(1)); +SELECT assert_true(false); +SELECT assert_true(boolean(0)); +SELECT assert_true(null); +SELECT assert_true(boolean(null)); +SELECT assert_true(false, 'custom error message'); + +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); +SELECT raise_error('error message'); +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index bd8ffb82ee129..bf45ec3d10215 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 7 +-- Number of queries: 16 -- !query @@ -56,3 +56,82 @@ select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', struct -- !query output array map struct + + +-- !query +SELECT assert_true(true), assert_true(boolean(1)) +-- !query schema +struct +-- !query output +NULL NULL + + +-- !query +SELECT assert_true(false) +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +'false' is not true! + + +-- !query +SELECT assert_true(boolean(0)) +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +'cast(0 as boolean)' is not true! + + +-- !query +SELECT assert_true(null) +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +'null' is not true! + + +-- !query +SELECT assert_true(boolean(null)) +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +'cast(null as boolean)' is not true! + + +-- !query +SELECT assert_true(false, 'custom error message') +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +custom error message + + +-- !query +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT raise_error('error message') +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +error message + + +-- !query +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc +-- !query schema +struct<> +-- !query output +java.lang.RuntimeException +too big: 8 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index b11f4c603dfd6..937de92bcaba6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -24,6 +24,7 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.matchers.should.Matchers._ +import org.apache.spark.SparkException import org.apache.spark.sql.UpdateFieldsBenchmark._ import org.apache.spark.sql.catalyst.expressions.{InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec @@ -2302,4 +2303,54 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } } } + + test("assert_true") { + // assert_true(condition, errMsgCol) + val booleanDf = Seq((true), (false)).toDF("cond") + checkAnswer( + booleanDf.filter("cond = true").select(assert_true($"cond")), + Row(null) :: Nil + ) + val e1 = intercept[SparkException] { + booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + assert(e1.getCause.getMessage == null) + + val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") + checkAnswer( + nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), + Row(null) :: Nil + ) + val e2 = intercept[SparkException] { + nullDf.select(assert_true($"cond", $"n")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage == "first row") + + // assert_true(condition) + val intDf = Seq((0, 1)).toDF("a", "b") + checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) + val e3 = intercept[SparkException] { + intDf.select(assert_true($"a" > $"b")).collect() + } + assert(e3.getCause.isInstanceOf[RuntimeException]) + assert(e3.getCause.getMessage == "'('a > 'b)' is not true!") + } + + test("raise_error") { + val strDf = Seq(("hello")).toDF("a") + + val e1 = intercept[SparkException] { + strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + assert(e1.getCause.getMessage == null) + + val e2 = intercept[SparkException] { + strDf.select(raise_error($"a")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage == "hello") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index f487a30c8dfa3..9f62ff8301ebc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -163,7 +163,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { "org.apache.spark.sql.catalyst.expressions.InputFileBlockLength", // The example calls methods that return unstable results. "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection", - "org.apache.spark.sql.catalyst.expressions.SparkVersion") + "org.apache.spark.sql.catalyst.expressions.SparkVersion", + // Throws an error + "org.apache.spark.sql.catalyst.expressions.RaiseError") val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector) parFuncs.foreach { funcId => @@ -197,9 +199,16 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { val exprTypesToCheck = Seq(classOf[UnaryExpression], classOf[BinaryExpression], classOf[TernaryExpression], classOf[QuaternaryExpression], classOf[SeptenaryExpression]) - // Do not check these expressions, because these expressions extend NullIntolerant - // and override the eval method to avoid evaluating input1 if input2 is 0. - val ignoreSet = Set(classOf[IntegralDivide], classOf[Divide], classOf[Remainder], classOf[Pmod]) + // Do not check these expressions, because these expressions override the eval method + val ignoreSet = Set( + // Extend NullIntolerant and avoid evaluating input1 if input2 is 0 + classOf[IntegralDivide], + classOf[Divide], + classOf[Remainder], + classOf[Pmod], + // Throws an exception, even if input is null + classOf[RaiseError] + ) val candidateExprsToCheck = spark.sessionState.functionRegistry.listFunction() .map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName) From bbc887bf73233b8c65ace05929290c0de4f63de8 Mon Sep 17 00:00:00 2001 From: Yuning Zhang Date: Thu, 8 Oct 2020 12:18:06 +0900 Subject: [PATCH 0194/1009] [SPARK-33089][SQL] make avro format propagate Hadoop config from DS options to underlying HDFS file system ### What changes were proposed in this pull request? In `AvroUtils`'s `inferSchema()`, propagate Hadoop config from DS options to underlying HDFS file system. ### Why are the changes needed? There is a bug that when running: ```scala spark.read.format("avro").options(conf).load(path) ``` The underlying file system will not receive the `conf` options. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? unit test added Closes #29971 from yuningzh-db/avro_options. Authored-by: Yuning Zhang Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +- .../scala/org/apache/spark/sql/avro/AvroSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 4a38970812f9d..3583b38a01333 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -43,7 +43,7 @@ private[sql] object AvroUtils extends Logging { spark: SparkSession, options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { - val conf = spark.sessionState.newHadoopConf() + val conf = spark.sessionState.newHadoopConfWithOptions(options) val parsedOptions = new AvroOptions(options, conf) if (parsedOptions.parameters.contains(ignoreExtensionKey)) { diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index b995a667be2b1..1005a274d0304 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1802,6 +1802,16 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa assert(version === SPARK_VERSION_SHORT) } } + + test("SPARK-33089: should propagate Hadoop config from DS options to underlying file system") { + withSQLConf( + "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, + "fs.file.impl.disable.cache" -> "true") { + val conf = Map("ds_option" -> "value") + val path = "file:" + testAvro.stripPrefix("file:") + spark.read.format("avro").options(conf).load(path) + } + } } class AvroV1Suite extends AvroSuite { From 1c781a4354666bba4329e588a0e9a9fa8980303b Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Thu, 8 Oct 2020 04:58:41 +0000 Subject: [PATCH 0195/1009] [SPARK-32282][SQL] Improve EnsureRquirement.reorderJoinKeys to handle more scenarios such as PartitioningCollection ### What changes were proposed in this pull request? This PR proposes to improve `EnsureRquirement.reorderJoinKeys` to handle the following scenarios: 1. If the keys cannot be reordered to match the left-side `HashPartitioning`, consider the right-side `HashPartitioning`. 2. Handle `PartitioningCollection`, which may contain `HashPartitioning` ### Why are the changes needed? 1. For the scenario 1), the current behavior matches either the left-side `HashPartitioning` or the right-side `HashPartitioning`. This means that if both sides are `HashPartitioning`, it will try to match only the left side. The following will not consider the right-side `HashPartitioning`: ``` val df1 = (0 until 10).map(i => (i % 5, i % 13)).toDF("i1", "j1") val df2 = (0 until 10).map(i => (i % 7, i % 11)).toDF("i2", "j2") df1.write.format("parquet").bucketBy(4, "i1", "j1").saveAsTable("t1")df2.write.format("parquet").bucketBy(4, "i2", "j2").saveAsTable("t2") val t1 = spark.table("t1") val t2 = spark.table("t2") val join = t1.join(t2, t1("i1") === t2("j2") && t1("i1") === t2("i2")) join.explain == Physical Plan == *(5) SortMergeJoin [i1#26, i1#26], [j2#31, i2#30], Inner :- *(2) Sort [i1#26 ASC NULLS FIRST, i1#26 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i1#26, i1#26, 4), true, [id=#69] : +- *(1) Project [i1#26, j1#27] : +- *(1) Filter isnotnull(i1#26) : +- *(1) ColumnarToRow : +- FileScan parquet default.t1[i1#26,j1#27] Batched: true, DataFilters: [isnotnull(i1#26)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(i1)], ReadSchema: struct, SelectedBucketsCount: 4 out of 4 +- *(4) Sort [j2#31 ASC NULLS FIRST, i2#30 ASC NULLS FIRST], false, 0. +- Exchange hashpartitioning(j2#31, i2#30, 4), true, [id=#79]. <===== This can be removed +- *(3) Project [i2#30, j2#31] +- *(3) Filter (((j2#31 = i2#30) AND isnotnull(j2#31)) AND isnotnull(i2#30)) +- *(3) ColumnarToRow +- FileScan parquet default.t2[i2#30,j2#31] Batched: true, DataFilters: [(j2#31 = i2#30), isnotnull(j2#31), isnotnull(i2#30)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(j2), IsNotNull(i2)], ReadSchema: struct, SelectedBucketsCount: 4 out of 4 ``` 2. For the scenario 2), the current behavior does not handle `PartitioningCollection`: ``` val df1 = (0 until 100).map(i => (i % 5, i % 13)).toDF("i1", "j1") val df2 = (0 until 100).map(i => (i % 7, i % 11)).toDF("i2", "j2") val df3 = (0 until 100).map(i => (i % 5, i % 13)).toDF("i3", "j3") val join = df1.join(df2, df1("i1") === df2("i2") && df1("j1") === df2("j2")) // PartitioningCollection val join2 = join.join(df3, join("j1") === df3("j3") && join("i1") === df3("i3")) join2.explain == Physical Plan == *(9) SortMergeJoin [j1#8, i1#7], [j3#30, i3#29], Inner :- *(6) Sort [j1#8 ASC NULLS FIRST, i1#7 ASC NULLS FIRST], false, 0. <===== This can be removed : +- Exchange hashpartitioning(j1#8, i1#7, 5), true, [id=#58] <===== This can be removed : +- *(5) SortMergeJoin [i1#7, j1#8], [i2#18, j2#19], Inner : :- *(2) Sort [i1#7 ASC NULLS FIRST, j1#8 ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(i1#7, j1#8, 5), true, [id=#45] : : +- *(1) Project [_1#2 AS i1#7, _2#3 AS j1#8] : : +- *(1) LocalTableScan [_1#2, _2#3] : +- *(4) Sort [i2#18 ASC NULLS FIRST, j2#19 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i2#18, j2#19, 5), true, [id=#51] : +- *(3) Project [_1#13 AS i2#18, _2#14 AS j2#19] : +- *(3) LocalTableScan [_1#13, _2#14] +- *(8) Sort [j3#30 ASC NULLS FIRST, i3#29 ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(j3#30, i3#29, 5), true, [id=#64] +- *(7) Project [_1#24 AS i3#29, _2#25 AS j3#30] +- *(7) LocalTableScan [_1#24, _2#25] ``` ### Does this PR introduce _any_ user-facing change? Yes, now from the above examples, the shuffle/sort nodes pointed by `This can be removed` are now removed: 1. Senario 1): ``` == Physical Plan == *(4) SortMergeJoin [i1#26, i1#26], [i2#30, j2#31], Inner :- *(2) Sort [i1#26 ASC NULLS FIRST, i1#26 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i1#26, i1#26, 4), true, [id=#67] : +- *(1) Project [i1#26, j1#27] : +- *(1) Filter isnotnull(i1#26) : +- *(1) ColumnarToRow : +- FileScan parquet default.t1[i1#26,j1#27] Batched: true, DataFilters: [isnotnull(i1#26)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(i1)], ReadSchema: struct, SelectedBucketsCount: 4 out of 4 +- *(3) Sort [i2#30 ASC NULLS FIRST, j2#31 ASC NULLS FIRST], false, 0 +- *(3) Project [i2#30, j2#31] +- *(3) Filter (((j2#31 = i2#30) AND isnotnull(j2#31)) AND isnotnull(i2#30)) +- *(3) ColumnarToRow +- FileScan parquet default.t2[i2#30,j2#31] Batched: true, DataFilters: [(j2#31 = i2#30), isnotnull(j2#31), isnotnull(i2#30)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(j2), IsNotNull(i2)], ReadSchema: struct, SelectedBucketsCount: 4 out of 4 ``` 2. Scenario 2): ``` == Physical Plan == *(8) SortMergeJoin [i1#7, j1#8], [i3#29, j3#30], Inner :- *(5) SortMergeJoin [i1#7, j1#8], [i2#18, j2#19], Inner : :- *(2) Sort [i1#7 ASC NULLS FIRST, j1#8 ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(i1#7, j1#8, 5), true, [id=#43] : : +- *(1) Project [_1#2 AS i1#7, _2#3 AS j1#8] : : +- *(1) LocalTableScan [_1#2, _2#3] : +- *(4) Sort [i2#18 ASC NULLS FIRST, j2#19 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i2#18, j2#19, 5), true, [id=#49] : +- *(3) Project [_1#13 AS i2#18, _2#14 AS j2#19] : +- *(3) LocalTableScan [_1#13, _2#14] +- *(7) Sort [i3#29 ASC NULLS FIRST, j3#30 ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(i3#29, j3#30, 5), true, [id=#58] +- *(6) Project [_1#24 AS i3#29, _2#25 AS j3#30] +- *(6) LocalTableScan [_1#24, _2#25] ``` ### How was this patch tested? Added tests. Closes #29074 from imback82/reorder_keys. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../exchange/EnsureRequirements.scala | 58 +++++++-- .../exchange/EnsureRequirementsSuite.scala | 122 ++++++++++++++++++ 2 files changed, 168 insertions(+), 12 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index b176598ed8c2c..3641654b89b76 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -135,9 +135,14 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { leftKeys: IndexedSeq[Expression], rightKeys: IndexedSeq[Expression], expectedOrderOfKeys: Seq[Expression], - currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = { + currentOrderOfKeys: Seq[Expression]): Option[(Seq[Expression], Seq[Expression])] = { if (expectedOrderOfKeys.size != currentOrderOfKeys.size) { - return (leftKeys, rightKeys) + return None + } + + // Check if the current order already satisfies the expected order. + if (expectedOrderOfKeys.zip(currentOrderOfKeys).forall(p => p._1.semanticEquals(p._2))) { + return Some(leftKeys, rightKeys) } // Build a lookup between an expression and the positions its holds in the current key seq. @@ -164,10 +169,10 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { rightKeysBuffer += rightKeys(index) case _ => // The expression cannot be found, or we have exhausted all indices for that expression. - return (leftKeys, rightKeys) + return None } } - (leftKeysBuffer.toSeq, rightKeysBuffer.toSeq) + Some(leftKeysBuffer.toSeq, rightKeysBuffer.toSeq) } private def reorderJoinKeys( @@ -176,19 +181,48 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { leftPartitioning: Partitioning, rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = { if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) { - (leftPartitioning, rightPartitioning) match { - case (HashPartitioning(leftExpressions, _), _) => - reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys) - case (_, HashPartitioning(rightExpressions, _)) => - reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys) - case _ => - (leftKeys, rightKeys) - } + reorderJoinKeysRecursively( + leftKeys, + rightKeys, + Some(leftPartitioning), + Some(rightPartitioning)) + .getOrElse((leftKeys, rightKeys)) } else { (leftKeys, rightKeys) } } + /** + * Recursively reorders the join keys based on partitioning. It starts reordering the + * join keys to match HashPartitioning on either side, followed by PartitioningCollection. + */ + private def reorderJoinKeysRecursively( + leftKeys: Seq[Expression], + rightKeys: Seq[Expression], + leftPartitioning: Option[Partitioning], + rightPartitioning: Option[Partitioning]): Option[(Seq[Expression], Seq[Expression])] = { + (leftPartitioning, rightPartitioning) match { + case (Some(HashPartitioning(leftExpressions, _)), _) => + reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys) + .orElse(reorderJoinKeysRecursively( + leftKeys, rightKeys, None, rightPartitioning)) + case (_, Some(HashPartitioning(rightExpressions, _))) => + reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys) + .orElse(reorderJoinKeysRecursively( + leftKeys, rightKeys, leftPartitioning, None)) + case (Some(PartitioningCollection(partitionings)), _) => + partitionings.foldLeft(Option.empty[(Seq[Expression], Seq[Expression])]) { (res, p) => + res.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, Some(p), rightPartitioning)) + }.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, None, rightPartitioning)) + case (_, Some(PartitioningCollection(partitionings))) => + partitionings.foldLeft(Option.empty[(Seq[Expression], Seq[Expression])]) { (res, p) => + res.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, Some(p))) + }.orElse(None) + case _ => + None + } + } + /** * When the physical operators are created for JOIN, the ordering of join keys is based on order * in which the join keys appear in the user query. That might not match with the output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala new file mode 100644 index 0000000000000..38e68cd2512e7 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.exchange + +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.plans.Inner +import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, PartitioningCollection} +import org.apache.spark.sql.execution.{DummySparkPlan, SortExec} +import org.apache.spark.sql.execution.joins.SortMergeJoinExec +import org.apache.spark.sql.test.SharedSparkSession + +class EnsureRequirementsSuite extends SharedSparkSession { + private val exprA = Literal(1) + private val exprB = Literal(2) + private val exprC = Literal(3) + + test("reorder should handle PartitioningCollection") { + val plan1 = DummySparkPlan( + outputPartitioning = PartitioningCollection(Seq( + HashPartitioning(exprA :: exprB :: Nil, 5), + HashPartitioning(exprA :: Nil, 5)))) + val plan2 = DummySparkPlan() + + // Test PartitioningCollection on the left side of join. + val smjExec1 = SortMergeJoinExec( + exprB :: exprA :: Nil, exprA :: exprB :: Nil, Inner, None, plan1, plan2) + EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprA, exprB)) + assert(rightKeys === Seq(exprB, exprA)) + case other => fail(other.toString) + } + + // Test PartitioningCollection on the right side of join. + val smjExec2 = SortMergeJoinExec( + exprA :: exprB :: Nil, exprB :: exprA :: Nil, Inner, None, plan2, plan1) + EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => + assert(leftKeys === Seq(exprB, exprA)) + assert(rightKeys === Seq(exprA, exprB)) + case other => fail(other.toString) + } + + // Both sides are PartitioningCollection, but left side cannot be reorderd to match + // and it should fall back to the right side. + val smjExec3 = SortMergeJoinExec( + exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1) + EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => + assert(leftKeys === Seq(exprC, exprA)) + assert(rightKeys === Seq(exprA, exprB)) + case other => fail(other.toString) + } + } + + test("reorder should fallback to the other side partitioning") { + val plan1 = DummySparkPlan( + outputPartitioning = HashPartitioning(exprA :: exprB :: exprC :: Nil, 5)) + val plan2 = DummySparkPlan( + outputPartitioning = HashPartitioning(exprB :: exprC :: Nil, 5)) + + // Test fallback to the right side, which has HashPartitioning. + val smjExec1 = SortMergeJoinExec( + exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2) + EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprB, exprA)) + assert(rightKeys === Seq(exprB, exprC)) + case other => fail(other.toString) + } + + // Test fallback to the right side, which has PartitioningCollection. + val plan3 = DummySparkPlan( + outputPartitioning = PartitioningCollection(Seq(HashPartitioning(exprB :: exprC :: Nil, 5)))) + val smjExec2 = SortMergeJoinExec( + exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan3) + EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => + assert(leftKeys === Seq(exprB, exprA)) + assert(rightKeys === Seq(exprB, exprC)) + case other => fail(other.toString) + } + + // The right side has HashPartitioning, so it is matched first, but no reordering match is + // found, and it should fall back to the left side, which has a PartitioningCollection. + val smjExec3 = SortMergeJoinExec( + exprC :: exprB :: Nil, exprA :: exprB :: Nil, Inner, None, plan3, plan1) + EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprB, exprC)) + assert(rightKeys === Seq(exprB, exprA)) + case other => fail(other.toString) + } + } +} From 7d6e3fb998021b4873f3bee8a8218d2504ed88a0 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 8 Oct 2020 05:28:33 +0000 Subject: [PATCH 0196/1009] [SPARK-33074][SQL] Classify dialect exceptions in JDBC v2 Table Catalog ### What changes were proposed in this pull request? 1. Add new method to the `JdbcDialect` class - `classifyException()`. It converts dialect specific exception to Spark's `AnalysisException` or its sub-classes. 2. Replace H2 exception `org.h2.jdbc.JdbcSQLException` in `JDBCTableCatalogSuite` by `AnalysisException`. 3. Add `H2Dialect` ### Why are the changes needed? Currently JDBC v2 Table Catalog implementation throws dialect specific exception and ignores exceptions defined in the `TableCatalog` interface. This PR adds new method for converting dialect specific exception, and assumes that follow up PRs will implement `classifyException()`. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running existing test suites `JDBCTableCatalogSuite` and `JDBCV2Suite`. Closes #29952 from MaxGekk/jdbcv2-classify-exception. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/AlreadyExistException.scala | 3 +- .../analysis/NoSuchItemException.scala | 6 ++- .../v2/jdbc/JDBCTableCatalog.scala | 24 ++++++++-- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 48 +++++++++++++++++++ .../apache/spark/sql/jdbc/JdbcDialects.scala | 12 +++++ .../v2/jdbc/JDBCTableCatalogSuite.scala | 47 ++++++++++-------- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 11 +++-- .../spark/sql/jdbc/JDBCWriteSuite.scala | 37 +++++++------- 8 files changed, 142 insertions(+), 46 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala index bfc3b3d0ac966..c50ba623c27b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala @@ -37,7 +37,8 @@ class NamespaceAlreadyExistsException(message: String) extends AnalysisException } } -class TableAlreadyExistsException(message: String) extends AnalysisException(message) { +class TableAlreadyExistsException(message: String, cause: Option[Throwable] = None) + extends AnalysisException(message, cause = cause) { def this(db: String, table: String) = { this(s"Table or view '$table' already exists in database '$db'") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala index 88be441d808db..8a1913b40b310 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala @@ -32,13 +32,15 @@ import org.apache.spark.sql.types.StructType class NoSuchDatabaseException( val db: String) extends NoSuchNamespaceException(s"Database '$db' not found") -class NoSuchNamespaceException(message: String) extends AnalysisException(message) { +class NoSuchNamespaceException(message: String, cause: Option[Throwable] = None) + extends AnalysisException(message, cause = cause) { def this(namespace: Array[String]) = { this(s"Namespace '${namespace.quoted}' not found") } } -class NoSuchTableException(message: String) extends AnalysisException(message) { +class NoSuchTableException(message: String, cause: Option[Throwable] = None) + extends AnalysisException(message, cause = cause) { def this(db: String, table: String) = { this(s"Table or view '$table' not found in database '$db'") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala index 41f650d1f2ff5..8edc2fe5585e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala @@ -70,7 +70,9 @@ class JDBCTableCatalog extends TableCatalog with Logging { checkNamespace(ident.namespace()) val writeOptions = new JdbcOptionsInWrite( options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident))) - withConnection(JdbcUtils.tableExists(_, writeOptions)) + classifyException(s"Failed table existence check: $ident") { + withConnection(JdbcUtils.tableExists(_, writeOptions)) + } } override def dropTable(ident: Identifier): Boolean = { @@ -88,7 +90,9 @@ class JDBCTableCatalog extends TableCatalog with Logging { override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { checkNamespace(oldIdent.namespace()) withConnection { conn => - JdbcUtils.renameTable(conn, getTableName(oldIdent), getTableName(newIdent), options) + classifyException(s"Failed table renaming from $oldIdent to $newIdent") { + JdbcUtils.renameTable(conn, getTableName(oldIdent), getTableName(newIdent), options) + } } } @@ -123,7 +127,9 @@ class JDBCTableCatalog extends TableCatalog with Logging { options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident))) val caseSensitive = SQLConf.get.caseSensitiveAnalysis withConnection { conn => - JdbcUtils.createTable(conn, getTableName(ident), schema, caseSensitive, writeOptions) + classifyException(s"Failed table creation: $ident") { + JdbcUtils.createTable(conn, getTableName(ident), schema, caseSensitive, writeOptions) + } } JDBCTable(ident, schema, writeOptions) @@ -132,7 +138,9 @@ class JDBCTableCatalog extends TableCatalog with Logging { override def alterTable(ident: Identifier, changes: TableChange*): Table = { checkNamespace(ident.namespace()) withConnection { conn => - JdbcUtils.alterTable(conn, getTableName(ident), changes, options) + classifyException(s"Failed table altering: $ident") { + JdbcUtils.alterTable(conn, getTableName(ident), changes, options) + } loadTable(ident) } } @@ -156,4 +164,12 @@ class JDBCTableCatalog extends TableCatalog with Logging { private def getTableName(ident: Identifier): String = { (ident.namespace() :+ ident.name()).map(dialect.quoteIdentifier).mkString(".") } + + private def classifyException[T](message: String)(f: => T): T = { + try { + f + } catch { + case e: Throwable => throw dialect.classifyException(message, e) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala new file mode 100644 index 0000000000000..9c727957ffab8 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.sql.SQLException +import java.util.Locale + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} + +private object H2Dialect extends JdbcDialect { + override def canHandle(url: String): Boolean = + url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2") + + override def classifyException(message: String, e: Throwable): AnalysisException = { + if (e.isInstanceOf[SQLException]) { + // Error codes are from https://www.h2database.com/javadoc/org/h2/api/ErrorCode.html + e.asInstanceOf[SQLException].getErrorCode match { + // TABLE_OR_VIEW_ALREADY_EXISTS_1 + case 42101 => + throw new TableAlreadyExistsException(message, cause = Some(e)) + // TABLE_OR_VIEW_NOT_FOUND_1 + case 42102 => + throw new NoSuchTableException(message, cause = Some(e)) + // SCHEMA_NOT_FOUND_1 + case 90079 => + throw new NoSuchNamespaceException(message, cause = Some(e)) + case _ => + } + } + super.classifyException(message, e) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index a01720d1eefc7..5f8d788bc7a22 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuilder import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.TableChange._ import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils @@ -253,6 +254,16 @@ abstract class JdbcDialect extends Serializable { val nullable = if (isNullable) "NULL" else "NOT NULL" s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable" } + + /** + * Gets a dialect exception, classifies it and wraps it by `AnalysisException`. + * @param message The error message to be placed to the returned exception. + * @param e The dialect specific exception. + * @return `AnalysisException` or its sub-class. + */ + def classifyException(message: String, e: Throwable): AnalysisException = { + new AnalysisException(message, cause = Some(e)) + } } /** @@ -297,6 +308,7 @@ object JdbcDialects { registerDialect(DerbyDialect) registerDialect(OracleDialect) registerDialect(TeradataDialect) + registerDialect(H2Dialect) /** * Fetch the JdbcDialect class corresponding to a given database url. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index ca86a8f593621..8fe58e3a0a28a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -21,7 +21,7 @@ import java.util.Properties import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ @@ -101,15 +101,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { Seq(Row("test", "dst_table"), Row("test", "people"))) } // Rename not existing table or namespace - Seq( - "h2.test.not_existing_table" -> "Table \"not_existing_table\" not found", - "h2.bad_test.not_existing_table" -> "Schema \"bad_test\" not found" - ).foreach { case (table, expectedMsg) => - val msg = intercept[org.h2.jdbc.JdbcSQLException] { - sql(s"ALTER TABLE $table RENAME TO test.dst_table") - }.getMessage - assert(msg.contains(expectedMsg)) + val exp1 = intercept[NoSuchTableException] { + sql(s"ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table") + } + assert(exp1.getMessage.contains( + "Failed table renaming from test.not_existing_table to test.dst_table")) + assert(exp1.cause.get.getMessage.contains("Table \"not_existing_table\" not found")) + val exp2 = intercept[NoSuchNamespaceException] { + sql(s"ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table") } + assert(exp2.getMessage.contains( + "Failed table renaming from bad_test.not_existing_table to test.dst_table")) + assert(exp2.cause.get.getMessage.contains("Schema \"bad_test\" not found")) // Rename to an existing table withTable("h2.test.dst_table") { withConnection { conn => @@ -119,10 +122,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { withConnection { conn => conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate() } - val msg = intercept[org.h2.jdbc.JdbcSQLException] { + val exp = intercept[TableAlreadyExistsException] { sql("ALTER TABLE h2.test.src_table RENAME TO test.dst_table") - }.getMessage - assert(msg.contains("Table \"dst_table\" already exists")) + } + assert(exp.getMessage.contains( + "Failed table renaming from test.src_table to test.dst_table")) + assert(exp.cause.get.getMessage.contains("Table \"dst_table\" already exists")) } } } @@ -156,10 +161,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { }.getMessage assert(msg.contains("Table test.new_table already exists")) } - val msg = intercept[org.h2.jdbc.JdbcSQLException] { + val exp = intercept[NoSuchNamespaceException] { sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _") - }.getMessage - assert(msg.contains("Schema \"bad_test\" not found")) + } + assert(exp.getMessage.contains("Failed table creation: bad_test.new_table")) + assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found")) } test("alter table ... add column") { @@ -289,15 +295,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("alter table ... update column comment not supported") { withTable("h2.test.alt_table") { sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") - val msg1 = intercept[java.sql.SQLFeatureNotSupportedException] { + val exp = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'") - }.getMessage - assert(msg1.contains("Unsupported TableChange")) + } + assert(exp.getMessage.contains("Failed table altering: test.alt_table")) + assert(exp.cause.get.getMessage.contains("Unsupported TableChange")) // Update comment for not existing column - val msg2 = intercept[AnalysisException] { + val msg = intercept[AnalysisException] { sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'") }.getMessage - assert(msg2.contains("Cannot update missing field bad_column in test.alt_table")) + assert(msg.contains("Cannot update missing field bad_column in test.alt_table")) } // Update column comments in not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 7af55550a7736..f0b19071a969b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -770,9 +770,14 @@ class JDBCSuite extends QueryTest } test("Dialect unregister") { - JdbcDialects.registerDialect(testH2Dialect) - JdbcDialects.unregisterDialect(testH2Dialect) - assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect) + JdbcDialects.unregisterDialect(H2Dialect) + try { + JdbcDialects.registerDialect(testH2Dialect) + JdbcDialects.unregisterDialect(testH2Dialect) + assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect) + } finally { + JdbcDialects.registerDialect(H2Dialect) + } } test("Aggregated dialects") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala index 3f621e04338a3..fb46c2ff4c0ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala @@ -194,24 +194,29 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter { } test("Truncate") { - JdbcDialects.registerDialect(testH2Dialect) - val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2) - val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2) - val df3 = spark.createDataFrame(sparkContext.parallelize(arr2x3), schema3) - - df.write.jdbc(url1, "TEST.TRUNCATETEST", properties) - df2.write.mode(SaveMode.Overwrite).option("truncate", true) - .jdbc(url1, "TEST.TRUNCATETEST", properties) - assert(1 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count()) - assert(2 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length) + JdbcDialects.unregisterDialect(H2Dialect) + try { + JdbcDialects.registerDialect(testH2Dialect) + val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2) + val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2) + val df3 = spark.createDataFrame(sparkContext.parallelize(arr2x3), schema3) - val m = intercept[AnalysisException] { - df3.write.mode(SaveMode.Overwrite).option("truncate", true) + df.write.jdbc(url1, "TEST.TRUNCATETEST", properties) + df2.write.mode(SaveMode.Overwrite).option("truncate", true) .jdbc(url1, "TEST.TRUNCATETEST", properties) - }.getMessage - assert(m.contains("Column \"seq\" not found")) - assert(0 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count()) - JdbcDialects.unregisterDialect(testH2Dialect) + assert(1 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count()) + assert(2 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length) + + val m = intercept[AnalysisException] { + df3.write.mode(SaveMode.Overwrite).option("truncate", true) + .jdbc(url1, "TEST.TRUNCATETEST", properties) + }.getMessage + assert(m.contains("Column \"seq\" not found")) + assert(0 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count()) + } finally { + JdbcDialects.unregisterDialect(testH2Dialect) + JdbcDialects.registerDialect(H2Dialect) + } } test("createTableOptions") { From 5effa8ea261ba59214afedc2853d1b248b330ca6 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 8 Oct 2020 16:29:15 +0900 Subject: [PATCH 0197/1009] [SPARK-33091][SQL] Avoid using map instead of foreach to avoid potential side effect at callers of OrcUtils.readCatalystSchema ### What changes were proposed in this pull request? This is a kind of a followup of SPARK-32646. New JIRA was filed to control the fixed versions properly. When you use `map`, it might be lazily evaluated and not executed. To avoid this, we should better use `foreach`. See also SPARK-16694. Current codes look not causing any bug for now but it should be best to fix to avoid potential issues. ### Why are the changes needed? To avoid potential issues from `map` being lazy and not executed. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Ran related tests. CI in this PR should verify. Closes #29974 from HyukjinKwon/SPARK-32646. Authored-by: HyukjinKwon Signed-off-by: Takeshi Yamamuro --- .../spark/sql/execution/datasources/orc/OrcFileFormat.scala | 2 +- .../datasources/v2/orc/OrcPartitionReaderFactory.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala index 8e9a566d45971..2671682e18f31 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala @@ -185,7 +185,7 @@ class OrcFileFormat } else { // ORC predicate pushdown if (orcFilterPushDown) { - OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema => + OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala index 1f38128e98fa5..b0ddee0a6b336 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala @@ -69,7 +69,7 @@ case class OrcPartitionReaderFactory( private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = { if (orcFilterPushDown) { - OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema => + OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) } From 4a47b3e1103170eacf2fb910864c6db22a9a37e6 Mon Sep 17 00:00:00 2001 From: manubatham20 Date: Thu, 8 Oct 2020 07:52:00 -0500 Subject: [PATCH 0198/1009] [DOC][MINOR] pySpark usage - removed repeated keyword causing confusion ### What changes were proposed in this pull request? While explaining pySpark usage, use of repeated synonymous words were causing confusion. Removed "instead of a JAR" word, to keep it more readable. ### Why are the changes needed? To keep the docs more readable and easy to understand. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No code changes, minor documentation change only. No tests added. Closes #29956 from manubatham20/patch-1. Authored-by: manubatham20 Signed-off-by: Sean Owen --- docs/submitting-applications.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md index b27cf36b863ee..7a0f17f5ad13f 100644 --- a/docs/submitting-applications.md +++ b/docs/submitting-applications.md @@ -76,7 +76,7 @@ locally on your laptop), it is common to use `cluster` mode to minimize network the drivers and the executors. Currently, the standalone mode does not support cluster mode for Python applications. -For Python applications, simply pass a `.py` file in the place of `` instead of a JAR, +For Python applications, simply pass a `.py` file in the place of ``, and add Python `.zip`, `.egg` or `.py` files to the search path with `--py-files`. There are a few options available that are specific to the From 4987db8c88b49a0c0d8503b6291455e92e114efa Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 8 Oct 2020 11:50:53 -0700 Subject: [PATCH 0199/1009] [SPARK-33096][K8S] Use LinkedHashMap instead of Map for newlyCreatedExecutors ### What changes were proposed in this pull request? This PR aims to use `LinkedHashMap` instead of `Map` for `newlyCreatedExecutors`. ### Why are the changes needed? This makes log messages (INFO/DEBUG) more readable. This is helpful when `spark.kubernetes.allocation.batch.size` is large and especially when K8s dynamic allocation is used. **BEFORE** ``` 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 8 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 2 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 5 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 4 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 7 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 10 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 9 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 3 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 6 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:24:21 INFO ExecutorPodsAllocator: Deleting 9 excess pod requests (5,10,6,9,2,7,3,8,4). ``` **AFTER** ``` 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 2 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 3 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 4 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 5 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 6 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 7 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 8 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 9 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 10 was not found in the Kubernetes cluster since it was created 0 milliseconds ago. 20/10/08 10:25:17 INFO ExecutorPodsAllocator: Deleting 9 excess pod requests (2,3,4,5,6,7,8,9,10). ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI or `build/sbt -Pkubernetes "kubernetes/test"` Closes #29979 from dongjoon-hyun/SPARK-K8S-LOG. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala index 2bf8685038cf5..774ef34f69e40 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala @@ -67,7 +67,7 @@ private[spark] class ExecutorPodsAllocator( // Executor IDs that have been requested from Kubernetes but have not been detected in any // snapshot yet. Mapped to the timestamp when they were created. - private val newlyCreatedExecutors = mutable.Map.empty[Long, Long] + private val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, Long] private val dynamicAllocationEnabled = Utils.isDynamicAllocationEnabled(conf) From c5f6af9f17498bb0ec393c16616f2d99e5d3ee3d Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 8 Oct 2020 11:59:30 -0700 Subject: [PATCH 0200/1009] [SPARK-33094][SQL] Make ORC format propagate Hadoop config from DS options to underlying HDFS file system ### What changes were proposed in this pull request? Propagate ORC options to Hadoop configs in Hive `OrcFileFormat` and in the regular ORC datasource. ### Why are the changes needed? There is a bug that when running: ```scala spark.read.format("orc").options(conf).load(path) ``` The underlying file system will not receive the conf options. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Added UT to `OrcSourceSuite`. Closes #29976 from MaxGekk/orc-option-propagation. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../execution/datasources/orc/OrcUtils.scala | 6 +++--- .../datasources/orc/OrcSourceSuite.scala | 17 ++++++++++++++++- .../spark/sql/hive/orc/OrcFileFormat.scala | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala index 264cf8165e13b..623f4f7a54d00 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala @@ -81,10 +81,10 @@ object OrcUtils extends Logging { } } - def readSchema(sparkSession: SparkSession, files: Seq[FileStatus]) + def readSchema(sparkSession: SparkSession, files: Seq[FileStatus], options: Map[String, String]) : Option[StructType] = { val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles - val conf = sparkSession.sessionState.newHadoopConf() + val conf = sparkSession.sessionState.newHadoopConfWithOptions(options) files.toIterator.map(file => readSchema(file.getPath, conf, ignoreCorruptFiles)).collectFirst { case Some(schema) => logDebug(s"Reading schema from file $files, got Hive schema string: $schema") @@ -125,7 +125,7 @@ object OrcUtils extends Logging { SchemaMergeUtils.mergeSchemasInParallel( sparkSession, options, files, OrcUtils.readOrcSchemasInParallel) } else { - OrcUtils.readSchema(sparkSession, files) + OrcUtils.readSchema(sparkSession, files, options) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index b6f41ab085fe1..1242b8c693d64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -32,7 +32,7 @@ import org.apache.orc.impl.RecordReaderImpl import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SPARK_VERSION_SHORT, SparkException} -import org.apache.spark.sql.{Row, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.execution.datasources.SchemaMergeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -537,6 +537,21 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } } + + test("SPARK-33094: should propagate Hadoop config from DS options to underlying file system") { + withSQLConf( + "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, + "fs.file.impl.disable.cache" -> "true") { + Seq(false, true).foreach { mergeSchema => + withTempPath { dir => + val path = dir.getAbsolutePath + val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString) + spark.range(1).write.options(conf).orc(path) + checkAnswer(spark.read.options(conf).orc(path), Row(0)) + } + } + } + } } class OrcSourceSuite extends OrcSuite with SharedSparkSession { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 356b92b4652b3..d1ee1baadcbce 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -75,7 +75,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles OrcFileOperator.readSchema( files.map(_.getPath.toString), - Some(sparkSession.sessionState.newHadoopConf()), + Some(sparkSession.sessionState.newHadoopConfWithOptions(options)), ignoreCorruptFiles ) } From a9077299d769bc9569a15f6500754661111fe9ab Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 9 Oct 2020 09:25:22 +0900 Subject: [PATCH 0201/1009] [SPARK-32743][SQL] Add distinct info at UnresolvedFunction toString ### What changes were proposed in this pull request? Add distinct info at `UnresolvedFunction.toString`. ### Why are the changes needed? Make `UnresolvedFunction` info complete. ``` create table test (c1 int, c2 int); explain extended select sum(distinct c1) from test; -- before this pr == Parsed Logical Plan == 'Project [unresolvedalias('sum('c1), None)] +- 'UnresolvedRelation [test] -- after this pr == Parsed Logical Plan == 'Project [unresolvedalias('sum(distinct 'c1), None)] +- 'UnresolvedRelation [test] ``` ### Does this PR introduce _any_ user-facing change? Yes, get distinct info during sql parse. ### How was this patch tested? manual test. Closes #29586 from ulysses-you/SPARK-32743. Authored-by: ulysses Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/analysis/unresolved.scala | 5 ++- .../sql-tests/inputs/explain-aqe.sql | 1 + .../resources/sql-tests/inputs/explain.sql | 6 ++++ .../sql-tests/results/explain-aqe.sql.out | 33 +++++++++++++++++++ .../sql-tests/results/explain.sql.out | 33 +++++++++++++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 9c7d572a12071..efc9e971df72a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -268,7 +268,10 @@ case class UnresolvedFunction( override lazy val resolved = false override def prettyName: String = name.unquotedString - override def toString: String = s"'$name(${children.mkString(", ")})" + override def toString: String = { + val distinct = if (isDistinct) "distinct " else "" + s"'$name($distinct${children.mkString(", ")})" + } } object UnresolvedFunction { diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql index f4afa2b77a9d7..7aef901da4fb5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql @@ -1,3 +1,4 @@ --IMPORT explain.sql --SET spark.sql.adaptive.enabled=true +--SET spark.sql.maxMetadataStringLength = 500 diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain.sql b/sql/core/src/test/resources/sql-tests/inputs/explain.sql index 80bf258704c70..fdff1b4eef941 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/explain.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/explain.sql @@ -1,5 +1,6 @@ --SET spark.sql.codegen.wholeStage = true --SET spark.sql.adaptive.enabled = false +--SET spark.sql.maxMetadataStringLength = 500 -- Test tables CREATE table explain_temp1 (key int, val int) USING PARQUET; @@ -9,6 +10,11 @@ CREATE table explain_temp4 (key int, val string) USING PARQUET; SET spark.sql.codegen.wholeStage = true; +-- distinct func +EXPLAIN EXTENDED + SELECT sum(distinct val) + FROM explain_temp1; + -- single table EXPLAIN FORMATTED SELECT key, max(val) diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 5435cde050fd1..567e0eabe1805 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -42,6 +42,39 @@ struct spark.sql.codegen.wholeStage true +-- !query +EXPLAIN EXTENDED + SELECT sum(distinct val) + FROM explain_temp1 +-- !query schema +struct +-- !query output +== Parsed Logical Plan == +'Project [unresolvedalias('sum(distinct 'val), None)] ++- 'UnresolvedRelation [explain_temp1], [], false + +== Analyzed Logical Plan == +sum(DISTINCT val): bigint +Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] ++- SubqueryAlias spark_catalog.default.explain_temp1 + +- Relation[key#x,val#x] parquet + +== Optimized Logical Plan == +Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] ++- Project [val#x] + +- Relation[key#x,val#x] parquet + +== Physical Plan == +AdaptiveSparkPlan isFinalPlan=false ++- HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL]) + +- Exchange SinglePartition, true, [id=#x] + +- HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL]) + +- HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) + +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x] + +- HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) + +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + + -- !query EXPLAIN FORMATTED SELECT key, max(val) diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 6b3b71f85ced2..fcd69549f2c6e 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -42,6 +42,39 @@ struct spark.sql.codegen.wholeStage true +-- !query +EXPLAIN EXTENDED + SELECT sum(distinct val) + FROM explain_temp1 +-- !query schema +struct +-- !query output +== Parsed Logical Plan == +'Project [unresolvedalias('sum(distinct 'val), None)] ++- 'UnresolvedRelation [explain_temp1], [], false + +== Analyzed Logical Plan == +sum(DISTINCT val): bigint +Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] ++- SubqueryAlias spark_catalog.default.explain_temp1 + +- Relation[key#x,val#x] parquet + +== Optimized Logical Plan == +Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] ++- Project [val#x] + +- Relation[key#x,val#x] parquet + +== Physical Plan == +*HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL]) ++- Exchange SinglePartition, true, [id=#x] + +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL]) + +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) + +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x] + +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) + +- *ColumnarToRow + +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + + -- !query EXPLAIN FORMATTED SELECT key, max(val) From 3beab8d8a8e2ed5e46e063d5a44face40c5fac90 Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 9 Oct 2020 09:50:45 +0900 Subject: [PATCH 0202/1009] [SPARK-32793][FOLLOW-UP] Minor corrections for PySpark annotations and SparkR ### What changes were proposed in this pull request? - Annotated return types of `assert_true` and `raise_error` as discussed [here](https://github.com/apache/spark/pull/29947#pullrequestreview-504495801). - Add `assert_true` and `raise_error` to SparkR NAMESPACE. - Validating message vector size in SparkR as discussed [here](https://github.com/apache/spark/pull/29947#pullrequestreview-504539004). ### Why are the changes needed? As discussed in review for #29947. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Existing tests. - Validation of annotations using MyPy Closes #29978 from zero323/SPARK-32793-FOLLOW-UP. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 2 ++ R/pkg/R/functions.R | 6 ++++-- python/pyspark/sql/functions.pyi | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 2fadf20da491c..a9cca4bf6f6fc 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -230,6 +230,7 @@ exportMethods("%<=>%", "asc", "ascii", "asin", + "assert_true", "atan", "atan2", "avg", @@ -361,6 +362,7 @@ exportMethods("%<=>%", "posexplode_outer", "quarter", "radians", + "raise_error", "rand", "randn", "rank", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index ce384a64bccaf..bcd798a8c31e2 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -847,7 +847,8 @@ setMethod("assert_true", jc <- if (is.null(errMsg)) { callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc) } else { - if (is.character(errMsg) && length(errMsg) == 1) { + if (is.character(errMsg)) { + stopifnot(length(errMsg) == 1) errMsg <- lit(errMsg) } callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc, errMsg@jc) @@ -868,7 +869,8 @@ setMethod("assert_true", setMethod("raise_error", signature(x = "characterOrColumn"), function(x) { - if (is.character(x) && length(x) == 1) { + if (is.character(x)) { + stopifnot(length(x) == 1) x <- lit(x) } jc <- callJStatic("org.apache.spark.sql.functions", "raise_error", x@jc) diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 6249bca5cef68..779a29c086d5a 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -137,8 +137,8 @@ def sha1(col: ColumnOrName) -> Column: ... def sha2(col: ColumnOrName, numBits: int) -> Column: ... def hash(*cols: ColumnOrName) -> Column: ... def xxhash64(*cols: ColumnOrName) -> Column: ... -def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...): ... -def raise_error(errMsg: Union[Column, str]): ... +def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...) -> Column: ... +def raise_error(errMsg: Union[Column, str]) -> Column: ... def concat(*cols: ColumnOrName) -> Column: ... def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ... def decode(col: ColumnOrName, charset: str) -> Column: ... From 1234c66fa6b6d2c45edb40237788fa3bfdf96cf3 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 9 Oct 2020 02:37:47 -0700 Subject: [PATCH 0203/1009] [SPARK-33101][ML] Make LibSVM format propagate Hadoop config from DS options to underlying HDFS file system ### What changes were proposed in this pull request? Propagate LibSVM options to Hadoop configs in the LibSVM datasource. ### Why are the changes needed? There is a bug that when running: ```scala spark.read.format("libsvm").options(conf).load(path) ``` The underlying file system will not receive the `conf` options. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, for example, users should read files from Azure Data Lake successfully: ```scala def hadoopConf1() = Map[String, String]( s"fs.adl.oauth2.access.token.provider.type" -> "ClientCredential", s"fs.adl.oauth2.client.id" -> dbutils.secrets.get(scope = "...", key = "..."), s"fs.adl.oauth2.credential" -> dbutils.secrets.get(scope = "...", key = "..."), s"fs.adl.oauth2.refresh.url" -> s"https://login.microsoftonline.com/.../oauth2/token") val df = spark.read.format("libsvm").options(hadoopConf1).load("adl://....azuredatalakestore.net/foldersp1/...") ``` and not get the following exception because the settings above are not propagated to the filesystem: ```java java.lang.IllegalArgumentException: No value for fs.adl.oauth2.access.token.provider found in conf file. at ....adl.AdlFileSystem.getNonEmptyVal(AdlFileSystem.java:820) at ....adl.AdlFileSystem.getCustomAccessTokenProvider(AdlFileSystem.java:220) at ....adl.AdlFileSystem.getAccessTokenProvider(AdlFileSystem.java:257) at ....adl.AdlFileSystem.initialize(AdlFileSystem.java:164) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669) ``` ### How was this patch tested? Added UT to `LibSVMRelationSuite`. Closes #29984 from MaxGekk/ml-option-propagation. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../spark/ml/source/libsvm/LibSVMRelation.scala | 2 +- .../org/apache/spark/mllib/util/MLUtils.scala | 6 ++++-- .../ml/source/libsvm/LibSVMRelationSuite.scala | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala index 11be1d85fbead..df64de4b10075 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala @@ -101,7 +101,7 @@ private[libsvm] class LibSVMFileFormat "'numFeatures' option to avoid the extra scan.") val paths = files.map(_.getPath.toString) - val parsed = MLUtils.parseLibSVMFile(sparkSession, paths) + val parsed = MLUtils.parseLibSVMFile(sparkSession, paths, options) MLUtils.computeNumFeatures(parsed) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index a20949910d25e..832f31323f546 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -105,13 +105,15 @@ object MLUtils extends Logging { } private[spark] def parseLibSVMFile( - sparkSession: SparkSession, paths: Seq[String]): RDD[(Double, Array[Int], Array[Double])] = { + sparkSession: SparkSession, + paths: Seq[String], + options: Map[String, String]): RDD[(Double, Array[Int], Array[Double])] = { val lines = sparkSession.baseRelationToDataFrame( DataSource.apply( sparkSession, paths = paths, className = classOf[TextFileFormat].getName, - options = Map(DataSource.GLOB_PATHS_KEY -> "false") + options = options ++ Map(DataSource.GLOB_PATHS_KEY -> "false") ).resolveRelation(checkFilesExist = false)) .select("value") diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala index 0999892364e2c..cc0ca308cb668 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala @@ -27,12 +27,13 @@ import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SaveMode} +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.types.{DoubleType, StructField, StructType} import org.apache.spark.util.Utils -class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { +class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with SQLHelper { // Path for dataset var path: String = _ @@ -211,4 +212,13 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { assert(v == Vectors.sparse(2, Seq((0, 2.0), (1, 3.0)))) } } + + test("SPARK-33101: should propagate Hadoop config from DS options to underlying file system") { + withSQLConf( + "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, + "fs.file.impl.disable.cache" -> "true") { + val df = spark.read.option("ds_option", "value").format("libsvm").load(path) + assert(df.columns(0) == "label") + } + } } From e1909c96fbfc3d3f7808f6ddcadec88cc4d11fb9 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 9 Oct 2020 02:50:38 -0700 Subject: [PATCH 0204/1009] [SPARK-33099][K8S] Respect executor idle timeout conf in ExecutorPodsAllocator ### What changes were proposed in this pull request? This PR aims to protect the executor pod request or pending pod during executor idle timeout. ### Why are the changes needed? In case of dynamic allocation, Apache Spark K8s `ExecutorPodsAllocator` cancels the pod requests or pending pods too eagerly. Like the following example, `ExecutorPodsAllocator` received the new total executor adjust request rapidly in two minutes. Sometimes, it's called 3 times in a single second. It repeats `request` and `delete` on that request or pending pod frequently. This PR is reusing `spark.dynamicAllocation.executorIdleTimeout (default: 60s)` to keep the pod request or pending pod. ``` 20/10/08 05:58:08 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:58:08 INFO ExecutorPodsAllocator: Going to request 3 executors from Kubernetes. 20/10/08 05:58:09 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:58:43 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 1 20/10/08 05:58:47 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0 20/10/08 05:59:26 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:59:30 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2 20/10/08 05:59:31 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:59:44 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2 20/10/08 05:59:44 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0 20/10/08 05:59:45 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2 20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 1 20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0 20/10/08 05:59:54 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3 20/10/08 05:59:54 INFO ExecutorPodsAllocator: Going to request 1 executors from Kubernetes. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the newly added test case. Closes #29981 from dongjoon-hyun/SPARK-K8S-INITIAL. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../cluster/k8s/ExecutorPodsAllocator.scala | 30 +++++++++-- .../k8s/ExecutorLifecycleTestUtils.scala | 5 ++ .../k8s/ExecutorPodsAllocatorSuite.scala | 53 ++++++++++++++++++- ...ecutorPodsPollingSnapshotSourceSuite.scala | 8 +-- .../k8s/ExecutorPodsSnapshotSuite.scala | 5 +- ...ExecutorPodsWatchSnapshotSourceSuite.scala | 10 ++-- 6 files changed, 97 insertions(+), 14 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala index 774ef34f69e40..5e09de37f2848 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.scheduler.cluster.k8s +import java.time.Instant +import java.time.format.DateTimeParseException import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong} import scala.collection.mutable @@ -30,6 +32,7 @@ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.KubernetesConf import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT import org.apache.spark.util.{Clock, Utils} private[spark] class ExecutorPodsAllocator( @@ -50,6 +53,8 @@ private[spark] class ExecutorPodsAllocator( private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000) + private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000 + private val namespace = conf.get(KUBERNETES_NAMESPACE) private val kubernetesDriverPodName = conf @@ -87,6 +92,7 @@ private[spark] class ExecutorPodsAllocator( } def setTotalExpectedExecutors(total: Int): Unit = { + logDebug(s"Set totalExpectedExecutors to $total") totalExpectedExecutors.set(total) if (!hasPendingPods.get()) { snapshotsStore.notifySubscribers() @@ -149,7 +155,6 @@ private[spark] class ExecutorPodsAllocator( case (_, PodPending(_)) => true case _ => false } - .map { case (id, _) => id } // Make a local, non-volatile copy of the reference since it's used multiple times. This // is the only method that modifies the list, so this is safe. @@ -173,7 +178,8 @@ private[spark] class ExecutorPodsAllocator( // It's possible that we have outstanding pods that are outdated when dynamic allocation // decides to downscale the application. So check if we can release any pending pods early // instead of waiting for them to time out. Drop them first from the unacknowledged list, - // then from the pending. + // then from the pending. However, in order to prevent too frequent frunctuation, newly + // requested pods are protected during executorIdleTimeout period. // // TODO: with dynamic allocation off, handle edge cases if we end up with more running // executors than expected. @@ -181,8 +187,13 @@ private[spark] class ExecutorPodsAllocator( newlyCreatedExecutors.size if (knownPodCount > currentTotalExpectedExecutors) { val excess = knownPodCount - currentTotalExpectedExecutors - val knownPendingToDelete = currentPendingExecutors.take(excess - newlyCreatedExecutors.size) - val toDelete = newlyCreatedExecutors.keys.take(excess).toList ++ knownPendingToDelete + val knownPendingToDelete = currentPendingExecutors + .filter(x => isExecutorIdleTimedOut(x._2, currentTime)) + .map { case (id, _) => id } + .take(excess - newlyCreatedExecutors.size) + val toDelete = newlyCreatedExecutors + .filter(x => currentTime - x._2 > executorIdleTimeout) + .keys.take(excess).toList ++ knownPendingToDelete if (toDelete.nonEmpty) { logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).") @@ -268,4 +279,15 @@ private[spark] class ExecutorPodsAllocator( } } } + + private def isExecutorIdleTimedOut(state: ExecutorPodState, currentTime: Long): Boolean = { + try { + val startTime = Instant.parse(state.pod.getStatus.getStartTime).toEpochMilli() + currentTime - startTime > executorIdleTimeout + } catch { + case _: Exception => + logDebug(s"Cannot get startTime of pod ${state.pod}") + true + } + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala index 2e883623a4b1c..0377e54f3cd76 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.scheduler.cluster.k8s +import java.time.Instant + import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder} import org.apache.spark.deploy.k8s.Constants._ @@ -29,6 +31,7 @@ object ExecutorLifecycleTestUtils { new PodBuilder(podWithAttachedContainerForId(executorId)) .editOrNewStatus() .withPhase("failed") + .withStartTime(Instant.now.toString) .addNewContainerStatus() .withName("spark-executor") .withImage("k8s-spark") @@ -59,6 +62,7 @@ object ExecutorLifecycleTestUtils { new PodBuilder(podWithAttachedContainerForId(executorId)) .editOrNewStatus() .withPhase("pending") + .withStartTime(Instant.now.toString) .endStatus() .build() } @@ -67,6 +71,7 @@ object ExecutorLifecycleTestUtils { new PodBuilder(podWithAttachedContainerForId(executorId)) .editOrNewStatus() .withPhase("running") + .withStartTime(Instant.now.toString) .endStatus() .build() } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala index e4b36e46594f6..c1c33b2a0f199 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.scheduler.cluster.k8s +import java.time.Instant + import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder} import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.PodResource @@ -31,6 +33,7 @@ import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSp import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.Fabric8Aliases._ +import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._ import org.apache.spark.util.ManualClock @@ -47,11 +50,14 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { .endMetadata() .build() - private val conf = new SparkConf().set(KUBERNETES_DRIVER_POD_NAME, driverPodName) + private val conf = new SparkConf() + .set(KUBERNETES_DRIVER_POD_NAME, driverPodName) + .set(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "10s") private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE) private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000L) + private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000 private val secMgr = new SecurityManager(conf) private var waitForExecutorPodsClock: ManualClock = _ @@ -159,6 +165,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any())) .thenReturn(podOperations) + val startTime = Instant.now.toEpochMilli + waitForExecutorPodsClock.setTime(startTime) + // Target 1 executor, make sure it's requested, even with an empty initial snapshot. podsAllocatorUnderTest.setTotalExpectedExecutors(1) verify(podOperations).create(podWithAttachedContainerForId(1)) @@ -184,6 +193,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { verify(podOperations, never()).delete() // Scale down to 1. Pending executors (both acknowledged and not) should be deleted. + waitForExecutorPodsClock.advance(executorIdleTimeout * 2) podsAllocatorUnderTest.setTotalExpectedExecutors(1) snapshotsStore.notifySubscribers() verify(podOperations, times(4)).create(any()) @@ -202,6 +212,47 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { assert(!podsAllocatorUnderTest.isDeleted("4")) } + test("SPARK-33099: Respect executor idle timeout configuration") { + when(podOperations + .withField("status.phase", "Pending")) + .thenReturn(podOperations) + when(podOperations + .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)) + .thenReturn(podOperations) + when(podOperations + .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)) + .thenReturn(podOperations) + when(podOperations + .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any())) + .thenReturn(podOperations) + + val startTime = Instant.now.toEpochMilli + waitForExecutorPodsClock.setTime(startTime) + + podsAllocatorUnderTest.setTotalExpectedExecutors(5) + verify(podOperations).create(podWithAttachedContainerForId(1)) + verify(podOperations).create(podWithAttachedContainerForId(2)) + verify(podOperations).create(podWithAttachedContainerForId(3)) + verify(podOperations).create(podWithAttachedContainerForId(4)) + verify(podOperations).create(podWithAttachedContainerForId(5)) + verify(podOperations, times(5)).create(any()) + + snapshotsStore.updatePod(pendingExecutor(1)) + snapshotsStore.updatePod(pendingExecutor(2)) + + // Newly created executors (both acknowledged and not) are protected by executorIdleTimeout + podsAllocatorUnderTest.setTotalExpectedExecutors(0) + snapshotsStore.notifySubscribers() + verify(podOperations, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5") + verify(podOperations, never()).delete() + + // Newly created executors (both acknowledged and not) are cleaned up. + waitForExecutorPodsClock.advance(executorIdleTimeout * 2) + snapshotsStore.notifySubscribers() + verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5") + verify(podOperations).delete() + } + private def executorPodAnswer(): Answer[KubernetesExecutorSpec] = (invocation: InvocationOnMock) => { val k8sConf: KubernetesExecutorConf = invocation.getArgument(0) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala index 63e43bd40c728..a8e825678d1f5 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala @@ -77,13 +77,15 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn } test("Items returned by the API should be pushed to the event queue") { + val exec1 = runningExecutor(1) + val exec2 = runningExecutor(2) when(activeExecutorPods.list()) .thenReturn(new PodListBuilder() .addToItems( - runningExecutor(1), - runningExecutor(2)) + exec1, + exec2) .build()) pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS) - verify(eventQueue).replaceSnapshot(Seq(runningExecutor(1), runningExecutor(2))) + verify(eventQueue).replaceSnapshot(Seq(exec1, exec2)) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala index 70e19c904eddb..6ca1733bcd32b 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala @@ -50,11 +50,12 @@ class ExecutorPodsSnapshotSuite extends SparkFunSuite { Map( 0L -> PodPending(originalPods(0)), 1L -> PodSucceeded(succeededExecutor(1)))) - val snapshotWithNewPod = snapshotWithUpdatedPod.withUpdate(pendingExecutor(2)) + val pendingExec = pendingExecutor(2) + val snapshotWithNewPod = snapshotWithUpdatedPod.withUpdate(pendingExec) assert(snapshotWithNewPod.executorPods === Map( 0L -> PodPending(originalPods(0)), 1L -> PodSucceeded(succeededExecutor(1)), - 2L -> PodPending(pendingExecutor(2)))) + 2L -> PodPending(pendingExec))) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala index ac1968b4ff810..e35fc83019b8d 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala @@ -67,9 +67,11 @@ class ExecutorPodsWatchSnapshotSourceSuite extends SparkFunSuite with BeforeAndA } test("Watch events should be pushed to the snapshots store as snapshot updates.") { - watch.getValue.eventReceived(Action.ADDED, runningExecutor(1)) - watch.getValue.eventReceived(Action.MODIFIED, runningExecutor(2)) - verify(eventQueue).updatePod(runningExecutor(1)) - verify(eventQueue).updatePod(runningExecutor(2)) + val exec1 = runningExecutor(1) + val exec2 = runningExecutor(2) + watch.getValue.eventReceived(Action.ADDED, exec1) + watch.getValue.eventReceived(Action.MODIFIED, exec2) + verify(eventQueue).updatePod(exec1) + verify(eventQueue).updatePod(exec2) } } From edb140eb5cb7f20af3e2ee7d2f9fb72f3e20e796 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Fri, 9 Oct 2020 03:01:54 -0700 Subject: [PATCH 0205/1009] [SPARK-32896][SS] Add DataStreamWriter.table API ### What changes were proposed in this pull request? This PR proposes to add `DataStreamWriter.table` to specify the output "table" to write from the streaming query. ### Why are the changes needed? For now, there's no way to write to the table (especially catalog table) even the table is capable to handle streaming write, so even with Spark 3, writing to the catalog table via SS should go through the `DataStreamWriter.format(provider)` and wish the provider can handle it as same as we do with catalog table. With the new API, we can directly point to the catalog table which supports streaming write. Some of usages are covered with tests - simply saying, end users can do the following: ```scala // assuming `testcat` is a custom catalog, and `ns` is a namespace in the catalog spark.sql("CREATE TABLE testcat.ns.table1 (id bigint, data string) USING foo") val query = inputDF .writeStream .table("testcat.ns.table1") .option(...) .start() ``` ### Does this PR introduce _any_ user-facing change? Yes, as this adds a new public API in DataStreamWriter. This doesn't bring backward incompatible change. ### How was this patch tested? New unit tests. Closes #29767 from HeartSaVioR/SPARK-32896. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Dongjoon Hyun --- .../spark/sql/connector/InMemoryTable.scala | 59 ++++++- .../sql/streaming/DataStreamWriter.scala | 138 +++++++++------ .../test/DataStreamTableAPISuite.scala | 162 ++++++++++++++++-- 3 files changed, 299 insertions(+), 60 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index 616fc72320caf..6a78b9e2bddd0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform} import org.apache.spark.sql.connector.read._ import org.apache.spark.sql.connector.write._ +import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite} import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull} import org.apache.spark.sql.types.{DataType, DateType, StructType, TimestampType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -145,6 +146,7 @@ class InMemoryTable( override def capabilities: util.Set[TableCapability] = Set( TableCapability.BATCH_READ, TableCapability.BATCH_WRITE, + TableCapability.STREAMING_WRITE, TableCapability.OVERWRITE_BY_FILTER, TableCapability.OVERWRITE_DYNAMIC, TableCapability.TRUNCATE).asJava @@ -169,26 +171,35 @@ class InMemoryTable( new WriteBuilder with SupportsTruncate with SupportsOverwrite with SupportsDynamicOverwrite { private var writer: BatchWrite = Append + private var streamingWriter: StreamingWrite = StreamingAppend override def truncate(): WriteBuilder = { assert(writer == Append) writer = TruncateAndAppend + streamingWriter = StreamingTruncateAndAppend this } override def overwrite(filters: Array[Filter]): WriteBuilder = { assert(writer == Append) writer = new Overwrite(filters) + streamingWriter = new StreamingNotSupportedOperation(s"overwrite ($filters)") this } override def overwriteDynamicPartitions(): WriteBuilder = { assert(writer == Append) writer = DynamicOverwrite + streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions") this } override def buildForBatch(): BatchWrite = writer + + override def buildForStreaming(): StreamingWrite = streamingWriter match { + case exc: StreamingNotSupportedOperation => exc.throwsException() + case s => s + } } } @@ -231,6 +242,45 @@ class InMemoryTable( } } + private abstract class TestStreamingWrite extends StreamingWrite { + def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory = { + BufferedRowsWriterFactory + } + + def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {} + } + + private class StreamingNotSupportedOperation(operation: String) extends TestStreamingWrite { + override def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory = + throwsException() + + override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = + throwsException() + + override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = + throwsException() + + def throwsException[T](): T = throw new IllegalStateException("The operation " + + s"${operation} isn't supported for streaming query.") + } + + private object StreamingAppend extends TestStreamingWrite { + override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = { + dataMap.synchronized { + withData(messages.map(_.asInstanceOf[BufferedRows])) + } + } + } + + private object StreamingTruncateAndAppend extends TestStreamingWrite { + override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = { + dataMap.synchronized { + dataMap.clear + withData(messages.map(_.asInstanceOf[BufferedRows])) + } + } + } + override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters) @@ -310,10 +360,17 @@ private class BufferedRowsReader(partition: BufferedRows) extends PartitionReade override def close(): Unit = {} } -private object BufferedRowsWriterFactory extends DataWriterFactory { +private object BufferedRowsWriterFactory extends DataWriterFactory with StreamingDataWriterFactory { override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = { new BufferWriter } + + override def createWriter( + partitionId: Int, + taskId: Long, + epochId: Long): DataWriter[InternalRow] = { + new BufferWriter + } } private class BufferWriter extends DataWriter[InternalRow] { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index dda6dec9c4ebc..239b4fc2de374 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -27,7 +27,7 @@ import org.apache.spark.api.java.function.VoidFunction2 import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.connector.catalog.{SupportsWrite, TableProvider} +import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider} import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.DataSource @@ -45,6 +45,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap */ @Evolving final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { + import DataStreamWriter._ private val df = ds.toDF() @@ -294,60 +295,75 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { @throws[TimeoutException] def start(): StreamingQuery = startInternal(None) + /** + * Starts the execution of the streaming query, which will continually output results to the given + * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with + * the stream. + * + * @since 3.1.0 + */ + @throws[TimeoutException] + def saveAsTable(tableName: String): StreamingQuery = { + this.source = SOURCE_NAME_TABLE + this.tableName = tableName + startInternal(None) + } + private def startInternal(path: Option[String]): StreamingQuery = { if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, you can not " + "write files of Hive data source directly.") } - if (source == "memory") { - assertNotPartitioned("memory") + if (source == SOURCE_NAME_TABLE) { + assertNotPartitioned(SOURCE_NAME_TABLE) + + import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier + + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser + .parseMultipartIdentifier(tableName) + val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier + + // Currently we don't create a logical streaming writer node in logical plan, so cannot rely + // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message. + // TODO (SPARK-27484): we should add the writing node before the plan is analyzed. + if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) { + throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write") + } + + val tableInstance = catalog.asTableCatalog.loadTable(identifier) + + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ + val sink = tableInstance match { + case t: SupportsWrite if t.supports(STREAMING_WRITE) => t + case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " + + s"write - $t") + } + + startQuery(sink, extraOptions) + } else if (source == SOURCE_NAME_MEMORY) { + assertNotPartitioned(SOURCE_NAME_MEMORY) if (extraOptions.get("queryName").isEmpty) { throw new AnalysisException("queryName must be specified for memory sink") } val sink = new MemorySink() val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes)) - val chkpointLoc = extraOptions.get("checkpointLocation") val recoverFromChkpoint = outputMode == OutputMode.Complete() - val query = df.sparkSession.sessionState.streamingQueryManager.startQuery( - extraOptions.get("queryName"), - chkpointLoc, - df, - extraOptions.toMap, - sink, - outputMode, - useTempCheckpointLocation = true, - recoverFromCheckpointLocation = recoverFromChkpoint, - trigger = trigger) + val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromChkpoint) resultDf.createOrReplaceTempView(query.name) query - } else if (source == "foreach") { - assertNotPartitioned("foreach") + } else if (source == SOURCE_NAME_FOREACH) { + assertNotPartitioned(SOURCE_NAME_FOREACH) val sink = ForeachWriterTable[T](foreachWriter, ds.exprEnc) - df.sparkSession.sessionState.streamingQueryManager.startQuery( - extraOptions.get("queryName"), - extraOptions.get("checkpointLocation"), - df, - extraOptions.toMap, - sink, - outputMode, - useTempCheckpointLocation = true, - trigger = trigger) - } else if (source == "foreachBatch") { - assertNotPartitioned("foreachBatch") + startQuery(sink, extraOptions) + } else if (source == SOURCE_NAME_FOREACH_BATCH) { + assertNotPartitioned(SOURCE_NAME_FOREACH_BATCH) if (trigger.isInstanceOf[ContinuousTrigger]) { - throw new AnalysisException("'foreachBatch' is not supported with continuous trigger") + throw new AnalysisException(s"'$source' is not supported with continuous trigger") } val sink = new ForeachBatchSink[T](foreachBatchWriter, ds.exprEnc) - df.sparkSession.sessionState.streamingQueryManager.startQuery( - extraOptions.get("queryName"), - extraOptions.get("checkpointLocation"), - df, - extraOptions.toMap, - sink, - outputMode, - useTempCheckpointLocation = true, - trigger = trigger) + startQuery(sink, extraOptions) } else { val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf) val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",") @@ -380,19 +396,28 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { createV1Sink(optionsWithPath) } - df.sparkSession.sessionState.streamingQueryManager.startQuery( - extraOptions.get("queryName"), - extraOptions.get("checkpointLocation"), - df, - optionsWithPath.originalMap, - sink, - outputMode, - useTempCheckpointLocation = source == "console" || source == "noop", - recoverFromCheckpointLocation = true, - trigger = trigger) + startQuery(sink, optionsWithPath) } } + private def startQuery( + sink: Table, + newOptions: CaseInsensitiveMap[String], + recoverFromCheckpoint: Boolean = true): StreamingQuery = { + val useTempCheckpointLocation = SOURCES_ALLOW_ONE_TIME_QUERY.contains(source) + + df.sparkSession.sessionState.streamingQueryManager.startQuery( + newOptions.get("queryName"), + newOptions.get("checkpointLocation"), + df, + newOptions.originalMap, + sink, + outputMode, + useTempCheckpointLocation = useTempCheckpointLocation, + recoverFromCheckpointLocation = recoverFromCheckpoint, + trigger = trigger) + } + private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = { val ds = DataSource( df.sparkSession, @@ -409,7 +434,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { * @since 2.0.0 */ def foreach(writer: ForeachWriter[T]): DataStreamWriter[T] = { - this.source = "foreach" + this.source = SOURCE_NAME_FOREACH this.foreachWriter = if (writer != null) { ds.sparkSession.sparkContext.clean(writer) } else { @@ -433,7 +458,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { */ @Evolving def foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T] = { - this.source = "foreachBatch" + this.source = SOURCE_NAME_FOREACH_BATCH if (function == null) throw new IllegalArgumentException("foreachBatch function cannot be null") this.foreachBatchWriter = function this @@ -485,6 +510,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName + private var tableName: String = null + private var outputMode: OutputMode = OutputMode.Append private var trigger: Trigger = Trigger.ProcessingTime(0L) @@ -497,3 +524,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { private var partitioningColumns: Option[Seq[String]] = None } + +object DataStreamWriter { + val SOURCE_NAME_MEMORY = "memory" + val SOURCE_NAME_FOREACH = "foreach" + val SOURCE_NAME_FOREACH_BATCH = "foreachBatch" + val SOURCE_NAME_CONSOLE = "console" + val SOURCE_NAME_TABLE = "table" + val SOURCE_NAME_NOOP = "noop" + + // these writer sources are also used for one-time query, hence allow temp checkpoint location + val SOURCES_ALLOW_ONE_TIME_QUERY = Seq(SOURCE_NAME_MEMORY, SOURCE_NAME_FOREACH, + SOURCE_NAME_FOREACH_BATCH, SOURCE_NAME_CONSOLE, SOURCE_NAME_NOOP) +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala index 788452dace84b..062b1060bc601 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.streaming.test +import java.io.File import java.util import scala.collection.JavaConverters._ @@ -25,10 +26,10 @@ import org.scalatest.BeforeAndAfter import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 -import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog} +import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog, InMemoryTableSessionCatalog} import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, V2TableWithV1Fallback} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.connector.read.ScanBuilder @@ -51,9 +52,10 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { after { spark.sessionState.catalogManager.reset() spark.sessionState.conf.clear() + sqlContext.streams.active.foreach(_.stop()) } - test("table API with file source") { + test("read: table API with file source") { Seq("parquet", "").foreach { source => withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> source) { withTempDir { tempDir => @@ -72,13 +74,13 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } - test("read non-exist table") { + test("read: read non-exist table") { intercept[AnalysisException] { spark.readStream.table("non_exist_table") }.message.contains("Table not found") } - test("stream table API with temp view") { + test("read: stream table API with temp view") { val tblName = "my_table" val stream = MemoryStream[Int] withTable(tblName) { @@ -93,7 +95,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } - test("stream table API with non-streaming temp view") { + test("read: stream table API with non-streaming temp view") { val tblName = "my_table" withTable(tblName) { spark.range(3).createOrReplaceTempView(tblName) @@ -103,7 +105,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } - test("read table without streaming capability support") { + test("read: read table without streaming capability support") { val tableIdentifer = "testcat.table_name" spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo") @@ -113,7 +115,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { }.message.contains("does not support either micro-batch or continuous scan") } - test("read table with custom catalog") { + test("read: read table with custom catalog") { val tblName = "teststream.table_name" withTable(tblName) { spark.sql(s"CREATE TABLE $tblName (data int) USING foo") @@ -131,7 +133,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } - test("read table with custom catalog & namespace") { + test("read: read table with custom catalog & namespace") { spark.sql("CREATE NAMESPACE teststream.ns") val tblName = "teststream.ns.table_name" @@ -151,7 +153,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } - test("fallback to V1 relation") { + test("read: fallback to V1 relation") { val tblName = DataStreamTableAPISuite.V1FallbackTestTableName spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryStreamTableCatalog].getName) @@ -169,6 +171,146 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } } } + + test("write: write to table with custom catalog & no namespace") { + val tableIdentifier = "testcat.table_name" + + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + runTestWithStreamAppend(tableIdentifier) + } + + test("write: write to table with custom catalog & namespace") { + spark.sql("CREATE NAMESPACE testcat.ns") + + val tableIdentifier = "testcat.ns.table_name" + + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + runTestWithStreamAppend(tableIdentifier) + } + + test("write: write to table with default session catalog") { + val v2Source = classOf[FakeV2Provider].getName + spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, + classOf[InMemoryTableSessionCatalog].getName) + + spark.sql("CREATE NAMESPACE ns") + + val tableIdentifier = "ns.table_name" + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + runTestWithStreamAppend(tableIdentifier) + } + + test("write: write to non-exist table with custom catalog") { + val tableIdentifier = "testcat.nonexisttable" + spark.sql("CREATE NAMESPACE testcat.ns") + + withTempDir { checkpointDir => + val exc = intercept[NoSuchTableException] { + runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty) + } + assert(exc.getMessage.contains("nonexisttable")) + } + } + + test("write: write to file provider based table isn't allowed yet") { + val tableIdentifier = "table_name" + + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING parquet") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + withTempDir { checkpointDir => + val exc = intercept[AnalysisException] { + runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty) + } + assert(exc.getMessage.contains("doesn't support streaming write")) + } + } + + test("write: write to temporary view isn't allowed yet") { + val tableIdentifier = "testcat.table_name" + val tempViewIdentifier = "temp_view" + + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + spark.table(tableIdentifier).createOrReplaceTempView(tempViewIdentifier) + + withTempDir { checkpointDir => + val exc = intercept[AnalysisException] { + runStreamQueryAppendMode(tempViewIdentifier, checkpointDir, Seq.empty, Seq.empty) + } + assert(exc.getMessage.contains("doesn't support streaming write")) + } + } + + test("write: write to view shouldn't be allowed") { + val tableIdentifier = "testcat.table_name" + val viewIdentifier = "table_view" + + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) + + spark.sql(s"CREATE VIEW $viewIdentifier AS SELECT id, data FROM $tableIdentifier") + + withTempDir { checkpointDir => + val exc = intercept[AnalysisException] { + runStreamQueryAppendMode(viewIdentifier, checkpointDir, Seq.empty, Seq.empty) + } + assert(exc.getMessage.contains("doesn't support streaming write")) + } + } + + private def runTestWithStreamAppend(tableIdentifier: String) = { + withTempDir { checkpointDir => + val input1 = Seq((1L, "a"), (2L, "b"), (3L, "c")) + verifyStreamAppend(tableIdentifier, checkpointDir, Seq.empty, input1, input1) + + val input2 = Seq((4L, "d"), (5L, "e"), (6L, "f")) + verifyStreamAppend(tableIdentifier, checkpointDir, Seq(input1), input2, input1 ++ input2) + } + } + + private def runStreamQueryAppendMode( + tableIdentifier: String, + checkpointDir: File, + prevInputs: Seq[Seq[(Long, String)]], + newInputs: Seq[(Long, String)]): Unit = { + val inputData = MemoryStream[(Long, String)] + val inputDF = inputData.toDF().toDF("id", "data") + + prevInputs.foreach { inputsPerBatch => + inputData.addData(inputsPerBatch: _*) + } + + val query = inputDF + .writeStream + .option("checkpointLocation", checkpointDir.getAbsolutePath) + .saveAsTable(tableIdentifier) + + inputData.addData(newInputs: _*) + + query.processAllAvailable() + query.stop() + } + + private def verifyStreamAppend( + tableIdentifier: String, + checkpointDir: File, + prevInputs: Seq[Seq[(Long, String)]], + newInputs: Seq[(Long, String)], + expectedOutputs: Seq[(Long, String)]): Unit = { + runStreamQueryAppendMode(tableIdentifier, checkpointDir, prevInputs, newInputs) + checkAnswer( + spark.table(tableIdentifier), + expectedOutputs.map { case (id, data) => Row(id, data) } + ) + } } object DataStreamTableAPISuite { From 2e07ed30418d45e89d108bc4bc020d2933c20a3a Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 9 Oct 2020 03:04:26 -0700 Subject: [PATCH 0206/1009] [SPARK-33082][SPARK-20202][BUILD][SQL][FOLLOW-UP] Remove Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script ### What changes were proposed in this pull request? This PR removes the leftover of Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script. - `test-hive1.2` title is not used anymore in Jenkins - Remove some comments related to Hive 1.2 - Remove unused codes in `OrcFilters.scala` Hive - Test `spark.sql.hive.convertMetastoreOrc` disabled case for the tests added at SPARK-19809 and SPARK-22267 ### Why are the changes needed? To remove unused codes & improve test coverage ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually ran the unit tests. Also It will be tested in CI in this PR. Closes #29973 from HyukjinKwon/SPARK-33082-SPARK-20202. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 230 --------- dev/run-tests-jenkins.py | 2 - .../datasources/orc/OrcFilterSuite.scala | 4 - .../datasources/orc/OrcQuerySuite.scala | 1 - .../execution/datasources/orc/OrcTest.scala | 1 - .../spark/sql/hive/orc/OrcFileFormat.scala | 6 +- .../spark/sql/hive/orc/OrcFilters.scala | 248 --------- .../sql/hive/orc/HiveOrcFilterSuite.scala | 469 ------------------ .../sql/hive/orc/HiveOrcQuerySuite.scala | 22 +- 9 files changed, 13 insertions(+), 970 deletions(-) delete mode 100644 dev/deps/spark-deps-hadoop-2.7-hive-1.2 delete mode 100644 sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 deleted file mode 100644 index d07b04608328f..0000000000000 --- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 +++ /dev/null @@ -1,230 +0,0 @@ -JLargeArrays/1.5//JLargeArrays-1.5.jar -JTransforms/3.1//JTransforms-3.1.jar -JavaEWAH/0.3.2//JavaEWAH-0.3.2.jar -RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar -ST4/4.0.4//ST4-4.0.4.jar -activation/1.1.1//activation-1.1.1.jar -aircompressor/0.10//aircompressor-0.10.jar -algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar -antlr-runtime/3.4//antlr-runtime-3.4.jar -antlr/2.7.7//antlr-2.7.7.jar -antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar -aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -aopalliance/1.0//aopalliance-1.0.jar -apache-log4j-extras/1.2.17//apache-log4j-extras-1.2.17.jar -apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar -apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar -api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar -api-util/1.0.0-M20//api-util-1.0.0-M20.jar -arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/1.0.1//arrow-format-1.0.1.jar -arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar -arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar -arrow-vector/1.0.1//arrow-vector-1.0.1.jar -audience-annotations/0.5.0//audience-annotations-0.5.0.jar -automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.8.2//avro-ipc-1.8.2.jar -avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar -avro/1.8.2//avro-1.8.2.jar -bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar -breeze_2.12/1.0//breeze_2.12-1.0.jar -cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar -chill-java/0.9.5//chill-java-0.9.5.jar -chill_2.12/0.9.5//chill_2.12-0.9.5.jar -commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar -commons-cli/1.2//commons-cli-1.2.jar -commons-codec/1.10//commons-codec-1.10.jar -commons-collections/3.2.2//commons-collections-3.2.2.jar -commons-compiler/3.0.16//commons-compiler-3.0.16.jar -commons-compress/1.8.1//commons-compress-1.8.1.jar -commons-configuration/1.6//commons-configuration-1.6.jar -commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-dbcp/1.4//commons-dbcp-1.4.jar -commons-digester/1.8//commons-digester-1.8.jar -commons-httpclient/3.1//commons-httpclient-3.1.jar -commons-io/2.4//commons-io-2.4.jar -commons-lang/2.6//commons-lang-2.6.jar -commons-lang3/3.10//commons-lang3-3.10.jar -commons-logging/1.1.3//commons-logging-1.1.3.jar -commons-math3/3.4.1//commons-math3-3.4.1.jar -commons-net/3.1//commons-net-3.1.jar -commons-pool/1.5.4//commons-pool-1.5.4.jar -commons-text/1.6//commons-text-1.6.jar -compress-lzf/1.0.3//compress-lzf-1.0.3.jar -core/1.1.2//core-1.1.2.jar -curator-client/2.7.1//curator-client-2.7.1.jar -curator-framework/2.7.1//curator-framework-2.7.1.jar -curator-recipes/2.7.1//curator-recipes-2.7.1.jar -datanucleus-api-jdo/3.2.6//datanucleus-api-jdo-3.2.6.jar -datanucleus-core/3.2.10//datanucleus-core-3.2.10.jar -datanucleus-rdbms/3.2.9//datanucleus-rdbms-3.2.9.jar -derby/10.12.1.1//derby-10.12.1.1.jar -flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar -generex/1.0.2//generex-1.0.2.jar -gson/2.2.4//gson-2.2.4.jar -guava/14.0.1//guava-14.0.1.jar -guice-servlet/3.0//guice-servlet-3.0.jar -guice/3.0//guice-3.0.jar -hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar -hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar -hadoop-client/2.7.4//hadoop-client-2.7.4.jar -hadoop-common/2.7.4//hadoop-common-2.7.4.jar -hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar -hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar -hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar -hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar -hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar -hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar -hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar -hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar -hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar -hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar -hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar -hk2-api/2.6.1//hk2-api-2.6.1.jar -hk2-locator/2.6.1//hk2-locator-2.6.1.jar -hk2-utils/2.6.1//hk2-utils-2.6.1.jar -htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar -httpclient/4.5.6//httpclient-4.5.6.jar -httpcore/4.4.12//httpcore-4.4.12.jar -istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.4.0//ivy-2.4.0.jar -jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar -jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.10.0//jackson-core-2.10.0.jar -jackson-databind/2.10.0//jackson-databind-2.10.0.jar -jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar -jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar -jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar -jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar -jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar -jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar -jackson-xc/1.9.13//jackson-xc-1.9.13.jar -jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar -jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar -jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar -jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar -jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar -jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.0.16//janino-3.0.16.jar -javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.inject/1//javax.inject-1.jar -javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar -javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar -jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar -jdo-api/3.0.1//jdo-api-3.0.1.jar -jersey-client/2.30//jersey-client-2.30.jar -jersey-common/2.30//jersey-common-2.30.jar -jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar -jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar -jersey-hk2/2.30//jersey-hk2-2.30.jar -jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar -jersey-server/2.30//jersey-server-2.30.jar -jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar -jetty-util/6.1.26//jetty-util-6.1.26.jar -jetty/6.1.26//jetty-6.1.26.jar -jline/2.14.6//jline-2.14.6.jar -joda-time/2.10.5//joda-time-2.10.5.jar -jodd-core/3.5.2//jodd-core-3.5.2.jar -jpam/1.1//jpam-1.1.jar -json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar -json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar -json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar -json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar -jsp-api/2.1//jsp-api-2.1.jar -jsr305/3.0.0//jsr305-3.0.0.jar -jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar -kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar -kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar -kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar -kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar -kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar -kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar -kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar -kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar -kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar -kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar -kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar -kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar -kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar -kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar -kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar -kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar -kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar -kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar -kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar -kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar -leveldbjni-all/1.8//leveldbjni-all-1.8.jar -libfb303/0.9.3//libfb303-0.9.3.jar -libthrift/0.12.0//libthrift-0.12.0.jar -log4j/1.2.17//log4j-1.2.17.jar -logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar -lz4-java/1.7.1//lz4-java-1.7.1.jar -machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar -macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar -mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar -metrics-core/4.1.1//metrics-core-4.1.1.jar -metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar -metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar -metrics-json/4.1.1//metrics-json-4.1.1.jar -metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar -minlog/1.3.0//minlog-1.3.0.jar -netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar -objenesis/2.6//objenesis-2.6.jar -okhttp/3.12.12//okhttp-3.12.12.jar -okio/1.14.0//okio-1.14.0.jar -opencsv/2.3//opencsv-2.3.jar -openshift-model/4.10.3//openshift-model-4.10.3.jar -orc-core/1.5.12/nohive/orc-core-1.5.12-nohive.jar -orc-mapreduce/1.5.12/nohive/orc-mapreduce-1.5.12-nohive.jar -orc-shims/1.5.12//orc-shims-1.5.12.jar -oro/2.0.8//oro-2.0.8.jar -osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar -paranamer/2.8//paranamer-2.8.jar -parquet-column/1.10.1//parquet-column-1.10.1.jar -parquet-common/1.10.1//parquet-common-1.10.1.jar -parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar -parquet-format/2.4.0//parquet-format-2.4.0.jar -parquet-hadoop-bundle/1.6.0//parquet-hadoop-bundle-1.6.0.jar -parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar -parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar -protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9//py4j-0.10.9.jar -pyrolite/4.30//pyrolite-4.30.jar -scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar -scala-compiler/2.12.10//scala-compiler-2.12.10.jar -scala-library/2.12.10//scala-library-2.12.10.jar -scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar -scala-reflect/2.12.10//scala-reflect-2.12.10.jar -scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar -shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar -shims/0.9.0//shims-0.9.0.jar -slf4j-api/1.7.30//slf4j-api-1.7.30.jar -slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar -snakeyaml/1.24//snakeyaml-1.24.jar -snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar -snappy/0.2//snappy-0.2.jar -spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar -spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar -spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar -spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar -stax-api/1.0.1//stax-api-1.0.1.jar -stream/2.9.6//stream-2.9.6.jar -stringtemplate/3.2.1//stringtemplate-3.2.1.jar -super-csv/2.2.0//super-csv-2.2.0.jar -threeten-extra/1.5.0//threeten-extra-1.5.0.jar -univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar -xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar -xercesImpl/2.12.0//xercesImpl-2.12.0.jar -xml-apis/1.4.01//xml-apis-1.4.01.jar -xmlenc/0.52//xmlenc-0.52.jar -xz/1.5//xz-1.5.jar -zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar -zookeeper/3.4.14//zookeeper-3.4.14.jar -zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 4ff5b327e3325..610fb1fd27027 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -175,8 +175,6 @@ def main(): if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" # Switch the Hive profile based on the PR title: - if "test-hive1.2" in ghprb_pull_title: - os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2" if "test-hive2.3" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index afc83d7c395f0..681ed91afaa12 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -39,10 +39,6 @@ import org.apache.spark.sql.types._ /** * A test suite that tests Apache ORC filter API based filter pushdown optimization. - * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage. - * The difference are the packages containing 'Predicate' and 'SearchArgument' classes. - * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package. - * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package. */ class OrcFilterSuite extends OrcTest with SharedSparkSession { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index d2970ef1bb63d..ead2c2cf1b70f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -217,7 +217,6 @@ abstract class OrcQueryTest extends OrcTest { } } - // Hive supports zlib, snappy and none for Hive 1.2.1. test("Compression options for writing to an ORC file (SNAPPY, ZLIB and NONE)") { withTempPath { file => spark.range(0, 10).write diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala index aec61acda5444..4243318ac1dd8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala @@ -46,7 +46,6 @@ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION * -> OrcPartitionDiscoverySuite * -> HiveOrcPartitionDiscoverySuite * -> OrcFilterSuite - * -> HiveOrcFilterSuite */ abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index d1ee1baadcbce..2868bb4ba85d3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -47,7 +47,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.datasources.orc.OrcOptions +import org.apache.spark.sql.execution.datasources.orc.{OrcFilters, OrcOptions} import org.apache.spark.sql.hive.{HiveInspectors, HiveShim} import org.apache.spark.sql.sources.{Filter, _} import org.apache.spark.sql.types._ @@ -139,7 +139,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable if (sparkSession.sessionState.conf.orcFilterPushDown) { // Sets pushed predicates - OrcFilters.createFilter(requiredSchema, filters.toArray).foreach { f => + OrcFilters.createFilter(requiredSchema, filters).foreach { f => hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, toKryo(f)) hadoopConf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true) } @@ -296,7 +296,7 @@ private[orc] class OrcOutputWriter( override def close(): Unit = { if (recordWriterInstantiated) { - // Hive 1.2.1 ORC initializes its private `writer` field at the first write. + // Hive ORC initializes its private `writer` field at the first write. OrcFileFormat.addSparkVersionMetadata(recordWriter) recordWriter.close(Reporter.NULL) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala deleted file mode 100644 index ea5c7ca15b065..0000000000000 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.orc - -import java.lang.reflect.Method - -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder - -import org.apache.spark.SparkException -import org.apache.spark.internal.Logging -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded -import org.apache.spark.sql.execution.datasources.orc.{OrcFilters => DatasourceOrcFilters} -import org.apache.spark.sql.execution.datasources.orc.OrcFilters.buildTree -import org.apache.spark.sql.hive.HiveUtils -import org.apache.spark.sql.sources._ -import org.apache.spark.sql.types._ - -/** - * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down. - * - * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double- - * checking pattern when converting `And`/`Or`/`Not` filters. - * - * An ORC `SearchArgument` must be built in one pass using a single builder. For example, you can't - * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`. This is quite - * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using - * existing simpler ones. - * - * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and - * `startNot()` mutate internal state of the builder instance. This forces us to translate all - * convertible filters with a single builder instance. However, before actually converting a filter, - * we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible filter is - * found, we may already end up with a builder whose internal state is inconsistent. - * - * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then - * try to convert its children. Say we convert `left` child successfully, but find that `right` - * child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent - * now. - * - * The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their - * children with brand new builders, and only do the actual conversion with the right builder - * instance when the children are proven to be convertible. - * - * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. Usage of - * builder methods mentioned above can only be found in test code, where all tested filters are - * known to be convertible. - */ -private[orc] object OrcFilters extends Logging { - - private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = { - val method = klass.getMethod(name, args: _*) - method.setAccessible(true) - method - } - - def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = { - DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]] - } - - def convertibleFilters( - schema: StructType, - dataTypeMap: Map[String, DataType], - filters: Seq[Filter]): Seq[Filter] = { - import org.apache.spark.sql.sources._ - - def convertibleFiltersHelper( - filter: Filter, - canPartialPushDown: Boolean): Option[Filter] = filter match { - // At here, it is not safe to just convert one side and remove the other side - // if we do not understand what the parent filters are. - // - // Here is an example used to explain the reason. - // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to - // convert b in ('1'). If we only convert a = 2, we will end up with a filter - // NOT(a = 2), which will generate wrong results. - // - // Pushing one side of AND down is only safe to do at the top level or in the child - // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate - // can be safely removed. - case And(left, right) => - val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown) - val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown) - (leftResultOptional, rightResultOptional) match { - case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult)) - case (Some(leftResult), None) if canPartialPushDown => Some(leftResult) - case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult) - case _ => None - } - - // The Or predicate is convertible when both of its children can be pushed down. - // That is to say, if one/both of the children can be partially pushed down, the Or - // predicate can be partially pushed down as well. - // - // Here is an example used to explain the reason. - // Let's say we have - // (a1 AND a2) OR (b1 AND b2), - // a1 and b1 is convertible, while a2 and b2 is not. - // The predicate can be converted as - // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2) - // As per the logical in And predicate, we can push down (a1 OR b1). - case Or(left, right) => - for { - lhs <- convertibleFiltersHelper(left, canPartialPushDown) - rhs <- convertibleFiltersHelper(right, canPartialPushDown) - } yield Or(lhs, rhs) - case Not(pred) => - val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false) - childResultOptional.map(Not) - case other => - for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other - } - filters.flatMap { filter => - convertibleFiltersHelper(filter, true) - } - } - - /** - * Build a SearchArgument and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input predicates, which should be fully convertible to SearchArgument. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildSearchArgument( - dataTypeMap: Map[String, DataType], - expression: Filter, - builder: Builder): Builder = { - expression match { - case And(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Or(left, right) => - val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr()) - val rhs = buildSearchArgument(dataTypeMap, right, lhs) - rhs.end() - - case Not(child) => - buildSearchArgument(dataTypeMap, child, builder.startNot()).end() - - case other => - buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse { - throw new SparkException( - "The input filter of OrcFilters.buildSearchArgument should be fully convertible.") - } - } - } - - /** - * Build a SearchArgument for a leaf predicate and return the builder so far. - * - * @param dataTypeMap a map from the attribute name to its data type. - * @param expression the input filter predicates. - * @param builder the input SearchArgument.Builder. - * @return the builder so far. - */ - private def buildLeafSearchArgument( - dataTypeMap: Map[String, DataType], - expression: Filter, - builder: Builder): Option[Builder] = { - def isSearchableType(dataType: DataType): Boolean = dataType match { - // Only the values in the Spark types below can be recognized by - // the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method. - case ByteType | ShortType | FloatType | DoubleType => true - case IntegerType | LongType | StringType | BooleanType => true - case TimestampType | _: DecimalType => true - case _ => false - } - - import org.apache.spark.sql.sources._ - - // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` - // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be - // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). - expression match { - // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` - // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be - // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). - - case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "equals", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "nullSafeEquals", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startNot() - val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startNot() - val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object]) - Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end()) - - case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "isNull", classOf[String]) - Some(method.invoke(bd, attribute).asInstanceOf[Builder].end()) - - case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startNot() - val method = findMethod(bd.getClass, "isNull", classOf[String]) - Some(method.invoke(bd, attribute).asInstanceOf[Builder].end()) - - case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - val bd = builder.startAnd() - val method = findMethod(bd.getClass, "in", classOf[String], classOf[Array[Object]]) - Some(method.invoke(bd, attribute, values.map(_.asInstanceOf[AnyRef])) - .asInstanceOf[Builder].end()) - - case _ => None - } - } -} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala deleted file mode 100644 index deb85f30463ae..0000000000000 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.orc - -import java.nio.charset.StandardCharsets -import java.sql.{Date, Timestamp} - -import scala.collection.JavaConverters._ - -import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} - -import org.apache.spark.sql.{Column, DataFrame} -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation} -import org.apache.spark.sql.execution.datasources.orc.OrcTest -import org.apache.spark.sql.hive.HiveUtils -import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.types._ - -/** - * A test suite that tests Hive ORC filter API based filter pushdown optimization. - */ -class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { - - override val orcImp: String = "hive" - - private def checkFilterPredicate( - df: DataFrame, - predicate: Predicate, - checker: (SearchArgument) => Unit): Unit = { - val output = predicate.collect { case a: Attribute => a }.distinct - val query = df - .select(output.map(e => Column(e)): _*) - .where(Column(predicate)) - - var maybeRelation: Option[HadoopFsRelation] = None - val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect { - case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) => - maybeRelation = Some(orcRelation) - filters - }.flatten.reduceLeftOption(_ && _) - assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query") - - val (_, selectedFilters, _) = - DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq) - assert(selectedFilters.nonEmpty, "No filter is pushed down") - - val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray) - assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters") - checker(maybeFilter.get) - } - - private def checkFilterPredicate - (predicate: Predicate, filterOperator: PredicateLeaf.Operator) - (implicit df: DataFrame): Unit = { - def checkComparisonOperator(filter: SearchArgument) = { - val operator = filter.getLeaves.asScala - assert(operator.map(_.getOperator).contains(filterOperator)) - } - checkFilterPredicate(df, predicate, checkComparisonOperator) - } - - private def checkFilterPredicateWithDiffHiveVersion - (predicate: Predicate, stringExpr: String) - (implicit df: DataFrame): Unit = { - def checkLogicalOperator(filter: SearchArgument) = { - assert(filter.toString == stringExpr.replace("\n", ", ")) - } - checkFilterPredicate(df, predicate, checkLogicalOperator) - } - - private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = { - assertResult(expected.replace("\n", ", "))(c) - } - - private def checkNoFilterPredicate - (predicate: Predicate) - (implicit df: DataFrame): Unit = { - val output = predicate.collect { case a: Attribute => a }.distinct - val query = df - .select(output.map(e => Column(e)): _*) - .where(Column(predicate)) - - var maybeRelation: Option[HadoopFsRelation] = None - val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect { - case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) => - maybeRelation = Some(orcRelation) - filters - }.flatten.reduceLeftOption(_ && _) - assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query") - - val (_, selectedFilters, _) = - DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq) - assert(selectedFilters.nonEmpty, "No filter is pushed down") - - val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray) - assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters") - } - - test("filter pushdown - integer") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - long") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - float") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - double") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - string") { - withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - boolean") { - withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - decimal") { - withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate( - Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate( - Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - timestamp") { - val timeString = "2015-08-20 14:57:00" - val timestamps = (1 to 4).map { i => - val milliseconds = Timestamp.valueOf(timeString).getTime + i * 3600 - new Timestamp(milliseconds) - } - withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df => - checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) - - checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS) - checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS) - - checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN) - - checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS) - checkFilterPredicate(Literal(timestamps(0)) <=> $"_1", - PredicateLeaf.Operator.NULL_SAFE_EQUALS) - checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN) - checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) - checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN) - } - } - - test("filter pushdown - combinations with logical operators") { - withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df => - // Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked - // in string form in order to check filter creation including logical operators - // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()` - // to produce string expression and then compare it to given string expression below. - // This might have to be changed after Hive version is upgraded. - checkFilterPredicateWithDiffHiveVersion( - $"_1".isNotNull, - """leaf-0 = (IS_NULL _1) - |expr = (not leaf-0)""".stripMargin.trim - ) - checkFilterPredicateWithDiffHiveVersion( - $"_1" =!= 1, - """leaf-0 = (IS_NULL _1) - |leaf-1 = (EQUALS _1 1) - |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim - ) - checkFilterPredicateWithDiffHiveVersion( - !($"_1" < 4), - """leaf-0 = (IS_NULL _1) - |leaf-1 = (LESS_THAN _1 4) - |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim - ) - checkFilterPredicateWithDiffHiveVersion( - $"_1" < 2 || $"_1" > 3, - """leaf-0 = (LESS_THAN _1 2) - |leaf-1 = (LESS_THAN_EQUALS _1 3) - |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim - ) - checkFilterPredicateWithDiffHiveVersion( - $"_1" < 2 && $"_1" > 3, - """leaf-0 = (IS_NULL _1) - |leaf-1 = (LESS_THAN _1 2) - |leaf-2 = (LESS_THAN_EQUALS _1 3) - |expr = (and (not leaf-0) leaf-1 (not leaf-2))""".stripMargin.trim - ) - } - } - - test("no filter pushdown - non-supported types") { - implicit class IntToBinary(int: Int) { - def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8) - } - // ArrayType - withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df => - checkNoFilterPredicate($"_1".isNull) - } - // BinaryType - withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df => - checkNoFilterPredicate($"_1" <=> 1.b) - } - // MapType - withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df => - checkNoFilterPredicate($"_1".isNotNull) - } - } - - test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") { - import org.apache.spark.sql.sources._ - // The `LessThan` should be converted while the `StringContains` shouldn't - val schema = new StructType( - Array( - StructField("a", IntegerType, nullable = true), - StructField("b", StringType, nullable = true))) - assertResultWithDiffHiveVersion( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim - ) { - OrcFilters.createFilter(schema, Array( - LessThan("a", 10), - StringContains("b", "prefix") - )).get.toString - } - - // The `LessThan` should be converted while the whole inner `And` shouldn't - assertResultWithDiffHiveVersion( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim - ) { - OrcFilters.createFilter(schema, Array( - LessThan("a", 10), - Not(And( - GreaterThan("a", 1), - StringContains("b", "prefix") - )) - )).get.toString - } - - // Safely remove unsupported `StringContains` predicate and push down `LessThan` - assertResultWithDiffHiveVersion( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim - ) { - OrcFilters.createFilter(schema, Array( - And( - LessThan("a", 10), - StringContains("b", "prefix") - ) - )).get.toString - } - - // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`. - assertResultWithDiffHiveVersion( - """leaf-0 = (LESS_THAN a 10) - |leaf-1 = (LESS_THAN_EQUALS a 1) - |expr = (and leaf-0 (not leaf-1)) - """.stripMargin.trim - ) { - OrcFilters.createFilter(schema, Array( - And( - And( - LessThan("a", 10), - StringContains("b", "prefix") - ), - GreaterThan("a", 1) - ) - )).get.toString - } - } - - test("SPARK-27699 Converting disjunctions into ORC SearchArguments") { - import org.apache.spark.sql.sources._ - // The `LessThan` should be converted while the `StringContains` shouldn't - val schema = new StructType( - Array( - StructField("a", IntegerType, nullable = true), - StructField("b", StringType, nullable = true))) - - // The predicate `StringContains` predicate is not able to be pushed down. - assertResultWithDiffHiveVersion("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" + - "expr = (or (not leaf-0) leaf-1)") { - OrcFilters.createFilter(schema, Array( - Or( - GreaterThan("a", 10), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).get.toString - } - - assertResultWithDiffHiveVersion("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" + - "expr = (or (not leaf-0) leaf-1)") { - OrcFilters.createFilter(schema, Array( - Or( - And( - GreaterThan("a", 10), - StringContains("b", "foobar") - ), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).get.toString - } - - assert(OrcFilters.createFilter(schema, Array( - Or( - StringContains("b", "foobar"), - And( - StringContains("b", "prefix"), - LessThan("a", 1) - ) - ) - )).isEmpty) - } -} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 1901ed505197c..fcf7febe33121 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -168,9 +168,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } - // Since Hive 1.2.1 library code path still has this problem, users may hit this - // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, - // Apache Spark with the default configuration doesn't hit this bug. test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") { Seq("native", "hive").foreach { orcImpl => withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { @@ -179,10 +176,12 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { Seq(1 -> 2).toDF("c1", "c2").write.orc(path) checkAnswer(spark.read.orc(path), Row(1, 2)) - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - withTable("t") { - sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") - checkAnswer(spark.table("t"), Row(2, 1)) + Seq(true, false).foreach { convertMetastoreOrc => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> convertMetastoreOrc.toString) { + withTable("t") { + sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") + checkAnswer(spark.table("t"), Row(2, 1)) + } } } } @@ -190,9 +189,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } - // Since Hive 1.2.1 library code path still has this problem, users may hit this - // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, - // Apache Spark with the default configuration doesn't hit this bug. test("SPARK-19809 NullPointerException on zero-size ORC file") { Seq("native", "hive").foreach { orcImpl => withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { @@ -201,8 +197,10 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'") Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc")) - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - checkAnswer(spark.table("spark_19809"), Seq.empty) + Seq(true, false).foreach { convertMetastoreOrc => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> convertMetastoreOrc.toString) { + checkAnswer(spark.table("spark_19809"), Seq.empty) + } } } } From 018811f9747d063a44543ceb265351377f0bc917 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 10 Oct 2020 13:48:26 +0900 Subject: [PATCH 0207/1009] [SPARK-33105][INFRA] Change default R arch from i386 to x64 and parametrize BINPREF ### What changes were proposed in this pull request? - Change default R `arch` from `i386` to `x64`, to match Rtools version. - Parameterize `BINPREF` with `WIN` (https://stackoverflow.com/a/44035904) Reported on dev: http://apache-spark-developers-list.1001551.n3.nabble.com/Broken-rlang-installation-on-AppVeyor-td30294.html ### Why are the changes needed? It seems like update from rlang 0.4.7 to 0.4.8 exposed an issue, where build fails because of incompatible ddl ``` c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe: skipping incompatible C:/R/bin/i386/R.dll when searching for -lR [00:01:52] c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe: skipping incompatible C:/R/bin/i386/R.dll when searching for -lR [00:01:52] c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe: cannot find -lR [00:01:52] collect2.exe: error: ld returned 1 exit status ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #29991 from zero323/APPVEYOR-DEAFAULT-ARCH. Authored-by: zero323 Signed-off-by: HyukjinKwon --- dev/appveyor-install-dependencies.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1 index e344a7fc23191..fb4cc22de35f4 100644 --- a/dev/appveyor-install-dependencies.ps1 +++ b/dev/appveyor-install-dependencies.ps1 @@ -19,7 +19,7 @@ $CRAN = "https://cloud.r-project.org" Function InstallR { if ( -not(Test-Path Env:\R_ARCH) ) { - $arch = "i386" + $arch = "x64" } Else { $arch = $env:R_ARCH @@ -68,7 +68,7 @@ Function InstallRtools { $gccPath = $env:GCC_PATH } $env:PATH = $RtoolsDrive + '\Rtools40\bin;' + $RtoolsDrive + '\Rtools40\mingw64\bin;' + $RtoolsDrive + '\Rtools40\' + $gccPath + '\bin;' + $env:PATH - $env:BINPREF=$RtoolsDrive + '/Rtools40/mingw64/bin/' + $env:BINPREF=$RtoolsDrive + '/Rtools40/mingw$(WIN)/bin/' } # create tools directory outside of Spark directory From 1e63dcc8f0b267f3d835db5b59c60360c04d9c0a Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Sat, 10 Oct 2020 13:53:09 +0900 Subject: [PATCH 0208/1009] [SPARK-33102][SQL] Use stringToSeq on SQL list typed parameters ### What changes were proposed in this pull request? While I've implemented JDBC provider disable functionality it has been popped up [here](https://github.com/apache/spark/pull/29964#discussion_r501786746) that `Utils.stringToSeq` must be used when String list type SQL parameter handled. In this PR I've fixed the problematic parameters. ### Why are the changes needed? `Utils.stringToSeq` must be used when String list type SQL parameter handled. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #29989 from gaborgsomogyi/SPARK-33102. Authored-by: Gabor Somogyi Signed-off-by: HyukjinKwon --- .../spark/sql/execution/streaming/MicroBatchExecution.scala | 4 ++-- .../org/apache/spark/sql/streaming/DataStreamWriter.scala | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index aad212cc13486..c485d0f7d8b2d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relat import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSource import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.{OutputMode, Trigger} -import org.apache.spark.util.Clock +import org.apache.spark.util.{Clock, Utils} class MicroBatchExecution( sparkSession: SparkSession, @@ -76,7 +76,7 @@ class MicroBatchExecution( // transformation is responsible for replacing attributes with their final values. val disabledSources = - sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders.split(",") + Utils.stringToSeq(sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders) import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ val _logicalPlan = analyzedPlan.transform { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 239b4fc2de374..2867bf581df81 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.sources._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.util.Utils /** * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems, @@ -366,7 +367,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { startQuery(sink, extraOptions) } else { val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf) - val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",") + val disabledSources = + Utils.stringToSeq(df.sparkSession.sqlContext.conf.disabledV2StreamingWriters) val useV1Source = disabledSources.contains(cls.getCanonicalName) || // file source v2 does not support streaming yet. classOf[FileDataSourceV2].isAssignableFrom(cls) From dfb7790a9dad8e98bd27001a613b4e13a5eb9d51 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 9 Oct 2020 22:35:12 -0700 Subject: [PATCH 0209/1009] [SPARK-33108][BUILD] Remove sbt-dependency-graph SBT plugin ### What changes were proposed in this pull request? This PR aims to remove `sbt-dependency-graph` SBT plugin. ### Why are the changes needed? `sbt-dependency-graph` officially doesn't support SBT 1.3.x and it's broken due to `NoSuchMethodError`. This cannot be fixed in `sbt-dependency-graph` side at SBT 1.3.x - https://github.com/sbt/sbt-dependency-graph > Note: Under sbt >= 1.3.x some features might currently not work as expected or not at all (like dependencyLicenses). ``` $ build/sbt dependencyTree Launching sbt from build/sbt-launch-1.3.13.jar [info] welcome to sbt 1.3.13 (AdoptOpenJDK Java 1.8.0_252) ... [error] java.lang.NoSuchMethodError: sbt.internal.LibraryManagement$.cachedUpdate(Lsbt/librarymanagement/DependencyResolution;Lsbt/librarymanagement/ModuleDescriptor;Lsbt/util/CacheStoreFactory;Ljava/lang/String;Lsbt/librarymanagement/UpdateConfiguration;Lscala/Function1;ZZZLsbt/librarymanagement/UnresolvedWarningConfiguration;Lsbt/librarymanagement/EvictionWarningOptions;ZLsbt/internal/librarymanagement/CompatibilityWarningOptions;Lsbt/util/Logger;)Lsbt/librarymanagement/UpdateReport; ``` **ALTERNATIVES** - One alternative is `coursier`, but it requires `coursier-based sbt launcher` which is more intrusive. - https://get-coursier.io/docs/sbt-coursier.html#sbt-13x > you'll have to use the coursier-based sbt launcher, via its custom sbt-extras launcher for example. - Another alternative is moving to `SBT 1.4.0` which uses `sbt-dependency-graph` as a built-in, but it's still new and will requires many change. So, this PR aims to remove the broken plugin simply. ### Does this PR introduce _any_ user-facing change? No. This is a dev-only change. ### How was this patch tested? Manual. ``` $ build/sbt dependencyTree ... [error] Not a valid command: dependencyTree [error] Not a valid project ID: dependencyTree [error] Not a valid key: dependencyTree (similar: dependencyOverrides, sbtDependency, dependencyResolution) [error] dependencyTree [error] ^ ``` Closes #29997 from dongjoon-hyun/remove_depedencyTree. Lead-authored-by: Dongjoon Hyun Co-authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- project/plugins.sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index da466da9945c1..920aa677f9e92 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -27,8 +27,6 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4") -addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2") - addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0") @@ -47,3 +45,5 @@ libraryDependencies += "org.ow2.asm" % "asm-commons" % "7.2" addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2") addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0") + +// TODO(SPARK-33109) Upgrade to SBT 1.4 and support `dependencyTree` back From 7696ca56732166977642a777f1d94cfba67b4151 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 10 Oct 2020 16:24:50 -0700 Subject: [PATCH 0210/1009] [SPARK-32881][CORE] Catch some race condition errors and log them more clearly ### What changes were proposed in this pull request? Decommissioning can run out of time resulting in some race condition, these race conditions result in confusing error messages but not negative impact. ### Why are the changes needed? The NPE & element missing errors in the log can create a missunderstanding. ### Does this PR introduce _any_ user-facing change? Logs change. ### How was this patch tested? Existing tests pass. Closes #29992 from holdenk/SPARK-32881-error-messaging-on-decom-race-messages. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/MapOutputTracker.scala | 21 +++++++++------- .../storage/BlockManagerMasterEndpoint.scala | 24 ++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 64102ccc05882..c3152d9225107 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -125,14 +125,19 @@ private class ShuffleStatus(numPartitions: Int) extends Logging { * Update the map output location (e.g. during migration). */ def updateMapOutput(mapId: Long, bmAddress: BlockManagerId): Unit = withWriteLock { - val mapStatusOpt = mapStatuses.find(_.mapId == mapId) - mapStatusOpt match { - case Some(mapStatus) => - logInfo(s"Updating map output for ${mapId} to ${bmAddress}") - mapStatus.updateLocation(bmAddress) - invalidateSerializedMapOutputStatusCache() - case None => - logError(s"Asked to update map output ${mapId} for untracked map status.") + try { + val mapStatusOpt = mapStatuses.find(_.mapId == mapId) + mapStatusOpt match { + case Some(mapStatus) => + logInfo(s"Updating map output for ${mapId} to ${bmAddress}") + mapStatus.updateLocation(bmAddress) + invalidateSerializedMapOutputStatusCache() + case None => + logWarning(s"Asked to update map output ${mapId} for untracked map status.") + } + } catch { + case e: java.lang.NullPointerException => + logWarning(s"Unable to update map output for ${mapId}, status removed in-flight") } } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index a3d42348befaa..61a88b4f26c00 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -380,16 +380,22 @@ class BlockManagerMasterEndpoint( * @return Seq of ReplicateBlock */ private def getReplicateInfoForRDDBlocks(blockManagerId: BlockManagerId): Seq[ReplicateBlock] = { - val info = blockManagerInfo(blockManagerId) + try { + val info = blockManagerInfo(blockManagerId) - val rddBlocks = info.blocks.keySet().asScala.filter(_.isRDD) - rddBlocks.map { blockId => - val currentBlockLocations = blockLocations.get(blockId) - val maxReplicas = currentBlockLocations.size + 1 - val remainingLocations = currentBlockLocations.toSeq.filter(bm => bm != blockManagerId) - val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas) - replicateMsg - }.toSeq + val rddBlocks = info.blocks.keySet().asScala.filter(_.isRDD) + rddBlocks.map { blockId => + val currentBlockLocations = blockLocations.get(blockId) + val maxReplicas = currentBlockLocations.size + 1 + val remainingLocations = currentBlockLocations.toSeq.filter(bm => bm != blockManagerId) + val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas) + replicateMsg + }.toSeq + } catch { + // If the block manager has already exited, nothing to replicate. + case e: java.util.NoSuchElementException => + Seq.empty[ReplicateBlock] + } } // Remove a block from the workers that have it. This can only be used to remove From 5e170140b0374762087b204008da141febaacee3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 10 Oct 2020 16:41:42 -0700 Subject: [PATCH 0211/1009] [SPARK-33107][SQL] Remove hive-2.3 workaround code ### What changes were proposed in this pull request? This pr remove `hive-2.3` workaround code. ### Why are the changes needed? Make code more clear and readable. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #29996 from wangyum/SPARK-33107. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../SparkExecuteStatementOperation.scala | 3 +- .../SparkGetTypeInfoOperation.scala | 14 +++- .../hive/thriftserver/SparkSQLCLIDriver.scala | 5 +- .../thriftserver/SparkSQLSessionManager.scala | 3 +- .../thriftserver/ThriftserverShimUtils.scala | 80 ------------------- .../GetCatalogsOperationMock.scala | 3 +- .../thriftserver/HiveSessionImplSuite.scala | 3 +- .../HiveThriftServer2Suites.scala | 3 +- .../thriftserver/SharedThriftServer.scala | 3 +- .../SparkExecuteStatementOperationSuite.scala | 3 +- .../SparkMetadataOperationSuite.scala | 2 +- ...arkThriftServerProtocolVersionsSuite.scala | 33 ++++---- .../org/apache/spark/sql/hive/HiveShim.scala | 36 +-------- .../org/apache/spark/sql/hive/HiveUtils.scala | 4 +- .../org/apache/spark/sql/hive/hiveUDFs.scala | 8 +- 15 files changed, 51 insertions(+), 152 deletions(-) delete mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index d30951f89cf6b..ec2c795e95c83 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -118,8 +118,7 @@ private[hive] class SparkExecuteStatementOperation( validateDefaultFetchOrientation(order) assertState(OperationState.FINISHED) setHasResultSet(true) - val resultRowSet: RowSet = - ThriftserverShimUtils.resultRowSet(getResultSetSchema, getProtocolVersion) + val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion, false) // Reset iter when FETCH_FIRST or FETCH_PRIOR if ((order.equals(FetchOrientation.FETCH_FIRST) || diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala index c2568ad4ada0a..26b5f8ad8cee1 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.hive.thriftserver import java.util.UUID import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType +import org.apache.hadoop.hive.serde2.thrift.Type +import org.apache.hadoop.hive.serde2.thrift.Type._ import org.apache.hive.service.cli.OperationState import org.apache.hive.service.cli.operation.GetTypeInfoOperation import org.apache.hive.service.cli.session.HiveSession @@ -61,7 +63,7 @@ private[hive] class SparkGetTypeInfoOperation( parentSession.getUsername) try { - ThriftserverShimUtils.supportedType().foreach(typeInfo => { + SparkGetTypeInfoUtil.supportedType.foreach(typeInfo => { val rowData = Array[AnyRef]( typeInfo.getName, // TYPE_NAME typeInfo.toJavaSQLType.asInstanceOf[AnyRef], // DATA_TYPE @@ -90,3 +92,13 @@ private[hive] class SparkGetTypeInfoOperation( HiveThriftServer2.eventManager.onStatementFinish(statementId) } } + +private[hive] object SparkGetTypeInfoUtil { + val supportedType: Seq[Type] = { + Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE, + TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, + FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE, + DATE_TYPE, TIMESTAMP_TYPE, + ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) + } +} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 6676223af4fce..965f28ebe0840 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.log4j.Level import org.apache.thrift.transport.TSocket +import org.slf4j.LoggerFactory import sun.misc.{Signal, SignalHandler} import org.apache.spark.SparkConf @@ -307,7 +308,9 @@ private[hive] object SparkSQLCLIDriver extends Logging { private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { private val sessionState = SessionState.get().asInstanceOf[CliSessionState] - private val console = ThriftserverShimUtils.getConsole + private val LOG = LoggerFactory.getLogger(classOf[SparkSQLCLIDriver]) + + private val console = new SessionState.LogHelper(LOG) private val isRemoteMode = { SparkSQLCLIDriver.isRemoteMode(sessionState) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala index 806b6146b2db1..e4559e69e7585 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager +import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext @@ -45,7 +46,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: } override def openSession( - protocol: ThriftserverShimUtils.TProtocolVersion, + protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala deleted file mode 100644 index c8ac5226b296e..0000000000000 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.thriftserver - -import org.apache.hadoop.hive.ql.session.SessionState -import org.apache.hadoop.hive.serde2.thrift.Type -import org.apache.hadoop.hive.serde2.thrift.Type._ -import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema} -import org.apache.hive.service.rpc.thrift.TProtocolVersion._ -import org.slf4j.LoggerFactory - -/** - * Various utilities for hive-thriftserver used to upgrade the built-in Hive. - */ -private[thriftserver] object ThriftserverShimUtils { - - private[thriftserver] object TOperationType { - val GET_TYPE_INFO = org.apache.hive.service.rpc.thrift.TOperationType.GET_TYPE_INFO - } - - private[thriftserver] type TProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion - private[thriftserver] type Client = org.apache.hive.service.rpc.thrift.TCLIService.Client - private[thriftserver] type TOpenSessionReq = org.apache.hive.service.rpc.thrift.TOpenSessionReq - private[thriftserver] type TGetSchemasReq = org.apache.hive.service.rpc.thrift.TGetSchemasReq - private[thriftserver] type TGetTablesReq = org.apache.hive.service.rpc.thrift.TGetTablesReq - private[thriftserver] type TGetColumnsReq = org.apache.hive.service.rpc.thrift.TGetColumnsReq - private[thriftserver] type TGetInfoReq = org.apache.hive.service.rpc.thrift.TGetInfoReq - private[thriftserver] type TExecuteStatementReq = - org.apache.hive.service.rpc.thrift.TExecuteStatementReq - private[thriftserver] type THandleIdentifier = - org.apache.hive.service.rpc.thrift.THandleIdentifier - private[thriftserver] type TOperationType = org.apache.hive.service.rpc.thrift.TOperationType - private[thriftserver] type TOperationHandle = org.apache.hive.service.rpc.thrift.TOperationHandle - - private[thriftserver] def getConsole: SessionState.LogHelper = { - val LOG = LoggerFactory.getLogger(classOf[SparkSQLCLIDriver]) - new SessionState.LogHelper(LOG) - } - - private[thriftserver] def resultRowSet( - getResultSetSchema: TableSchema, - getProtocolVersion: TProtocolVersion): RowSet = { - RowSetFactory.create(getResultSetSchema, getProtocolVersion, false) - } - - private[thriftserver] def supportedType(): Seq[Type] = { - Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE, - TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, - FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE, - DATE_TYPE, TIMESTAMP_TYPE, - ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) - } - - private[thriftserver] val testedProtocolVersions = Seq( - HIVE_CLI_SERVICE_PROTOCOL_V1, - HIVE_CLI_SERVICE_PROTOCOL_V2, - HIVE_CLI_SERVICE_PROTOCOL_V3, - HIVE_CLI_SERVICE_PROTOCOL_V4, - HIVE_CLI_SERVICE_PROTOCOL_V5, - HIVE_CLI_SERVICE_PROTOCOL_V6, - HIVE_CLI_SERVICE_PROTOCOL_V7, - HIVE_CLI_SERVICE_PROTOCOL_V8, - HIVE_CLI_SERVICE_PROTOCOL_V9, - HIVE_CLI_SERVICE_PROTOCOL_V10) -} diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala index 764f1690d5a66..1bc9aaf672c3b 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala @@ -22,8 +22,7 @@ import java.util.UUID import org.apache.hive.service.cli.OperationHandle import org.apache.hive.service.cli.operation.GetCatalogsOperation import org.apache.hive.service.cli.session.HiveSession - -import org.apache.spark.sql.hive.thriftserver.ThriftserverShimUtils.{THandleIdentifier, TOperationHandle, TOperationType} +import org.apache.hive.service.rpc.thrift.{THandleIdentifier, TOperationHandle, TOperationType} class GetCatalogsOperationMock(parentSession: HiveSession) extends GetCatalogsOperation(parentSession) { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala index 47db7e34a5a2c..13dc74b92d4b3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hive.service.cli.OperationHandle import org.apache.hive.service.cli.operation.{GetCatalogsOperation, Operation, OperationManager} import org.apache.hive.service.cli.session.{HiveSession, HiveSessionImpl, SessionManager} +import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.spark.SparkFunSuite @@ -39,7 +40,7 @@ class HiveSessionImplSuite extends SparkFunSuite { operationManager = new OperationManagerMock() session = new HiveSessionImpl( - ThriftserverShimUtils.testedProtocolVersions.head, + TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1, "", "", new HiveConf(), diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 27d4c4bc40bec..75c00000dee47 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -37,6 +37,7 @@ import org.apache.hive.jdbc.HiveDriver import org.apache.hive.service.auth.PlainSaslHelper import org.apache.hive.service.cli.{FetchOrientation, FetchType, GetInfoType, RowSet} import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient +import org.apache.hive.service.rpc.thrift.TCLIService.Client import org.apache.thrift.protocol.TBinaryProtocol import org.apache.thrift.transport.TSocket import org.scalatest.BeforeAndAfterAll @@ -67,7 +68,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { val user = System.getProperty("user.name") val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport) val protocol = new TBinaryProtocol(transport) - val client = new ThriftCLIServiceClient(new ThriftserverShimUtils.Client(protocol)) + val client = new ThriftCLIServiceClient(new Client(protocol)) transport.open() try f(client) finally transport.close() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala index 5f17607585521..8f61268c838fe 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hive.jdbc.HttpBasicAuthInterceptor import org.apache.hive.service.auth.PlainSaslHelper import org.apache.hive.service.cli.thrift.{ThriftCLIService, ThriftCLIServiceClient} +import org.apache.hive.service.rpc.thrift.TCLIService.Client import org.apache.http.impl.client.HttpClientBuilder import org.apache.thrift.protocol.TBinaryProtocol import org.apache.thrift.transport.{THttpClient, TSocket} @@ -115,7 +116,7 @@ trait SharedThriftServer extends SharedSparkSession { } val protocol = new TBinaryProtocol(transport) - val client = new ThriftCLIServiceClient(new ThriftserverShimUtils.Client(protocol)) + val client = new ThriftCLIServiceClient(new Client(protocol)) transport.open() try f(client) finally transport.close() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala index 4c2f29e0bf394..ca1f9a2f74244 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala @@ -25,6 +25,7 @@ import scala.concurrent.duration._ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hive.service.cli.OperationState import org.apache.hive.service.cli.session.{HiveSession, HiveSessionImpl} +import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.mockito.Mockito.{doReturn, mock, spy, when, RETURNS_DEEP_STUBS} import org.mockito.invocation.InvocationOnMock @@ -64,7 +65,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark ).foreach { case (finalState, transition) => test("SPARK-32057 SparkExecuteStatementOperation should not transiently become ERROR " + s"before being set to $finalState") { - val hiveSession = new HiveSessionImpl(ThriftserverShimUtils.testedProtocolVersions.head, + val hiveSession = new HiveSessionImpl(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1, "username", "password", new HiveConf, "ip address") hiveSession.open(new util.HashMap) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index 818f387f131d6..b94d819326d16 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -255,7 +255,7 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest { withJdbcStatement() { statement => val metaData = statement.getConnection.getMetaData - checkResult(metaData.getTypeInfo, ThriftserverShimUtils.supportedType().map(_.getName)) + checkResult(metaData.getTypeInfo, SparkGetTypeInfoUtil.supportedType.map(_.getName)) } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index d5582077d6170..52cf429441d16 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -23,11 +23,12 @@ import java.util.{List => JList, Properties} import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet} import org.apache.hive.service.auth.PlainSaslHelper import org.apache.hive.service.cli.GetInfoType +import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TGetInfoReq, TGetTablesReq, TOpenSessionReq, TProtocolVersion} +import org.apache.hive.service.rpc.thrift.TCLIService.Client import org.apache.thrift.protocol.TBinaryProtocol import org.apache.thrift.transport.TSocket import org.apache.spark.sql.catalyst.util.NumberConverter -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.unsafe.types.UTF8String class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { @@ -35,20 +36,20 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { override def mode: ServerMode.Value = ServerMode.binary def testExecuteStatementWithProtocolVersion( - version: ThriftserverShimUtils.TProtocolVersion, + version: TProtocolVersion, sql: String)(f: HiveQueryResultSet => Unit): Unit = { val rawTransport = new TSocket("localhost", serverPort) val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties) val user = System.getProperty("user.name") val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport) - val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport)) + val client = new Client(new TBinaryProtocol(transport)) transport.open() var rs: HiveQueryResultSet = null try { - val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version) + val clientProtocol = new TOpenSessionReq(version) val openResp = client.OpenSession(clientProtocol) val sessHandle = openResp.getSessionHandle - val execReq = new ThriftserverShimUtils.TExecuteStatementReq(sessHandle, sql) + val execReq = new TExecuteStatementReq(sessHandle, sql) val execResp = client.ExecuteStatement(execReq) val stmtHandle = execResp.getOperationHandle @@ -73,23 +74,21 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { } } - def testGetInfoWithProtocolVersion(version: ThriftserverShimUtils.TProtocolVersion): Unit = { + def testGetInfoWithProtocolVersion(version: TProtocolVersion): Unit = { val rawTransport = new TSocket("localhost", serverPort) val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties) val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport) - val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport)) + val client = new Client(new TBinaryProtocol(transport)) transport.open() try { - val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version) + val clientProtocol = new TOpenSessionReq(version) val openResp = client.OpenSession(clientProtocol) val sessHandle = openResp.getSessionHandle - val dbVersionReq = - new ThriftserverShimUtils.TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType) + val dbVersionReq = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType) val dbVersion = client.GetInfo(dbVersionReq).getInfoValue.getStringValue - val dbNameReq = - new ThriftserverShimUtils.TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_NAME.toTGetInfoType) + val dbNameReq = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_NAME.toTGetInfoType) val dbName = client.GetInfo(dbNameReq).getInfoValue.getStringValue assert(dbVersion === org.apache.spark.SPARK_VERSION) @@ -102,21 +101,21 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { } def testGetTablesWithProtocolVersion( - version: ThriftserverShimUtils.TProtocolVersion, + version: TProtocolVersion, schema: String, tableNamePattern: String, tableTypes: JList[String])(f: HiveQueryResultSet => Unit): Unit = { val rawTransport = new TSocket("localhost", serverPort) val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties) val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport) - val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport)) + val client = new Client(new TBinaryProtocol(transport)) transport.open() var rs: HiveQueryResultSet = null try { - val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version) + val clientProtocol = new TOpenSessionReq(version) val openResp = client.OpenSession(clientProtocol) val sessHandle = openResp.getSessionHandle - val getTableReq = new ThriftserverShimUtils.TGetTablesReq(sessHandle) + val getTableReq = new TGetTablesReq(sessHandle) getTableReq.setSchemaName(schema) getTableReq.setTableName(tableNamePattern) getTableReq.setTableTypes(tableTypes) @@ -144,7 +143,7 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { } } - ThriftserverShimUtils.testedProtocolVersions.foreach { version => + TProtocolVersion.values().foreach { version => test(s"$version get byte type") { testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as byte)") { rs => assert(rs.next()) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index 1f8ce04270a04..3a53a2a8dadd8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -17,18 +17,16 @@ package org.apache.spark.sql.hive -import java.io.{InputStream, OutputStream} -import java.lang.reflect.Method import java.rmi.server.UID import scala.collection.JavaConverters._ import scala.language.implicitConversions -import scala.reflect.ClassTag import com.google.common.base.Objects import org.apache.avro.Schema import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.ql.exec.SerializationUtilities import org.apache.hadoop.hive.ql.exec.UDF import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc} import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro @@ -148,40 +146,12 @@ private[hive] object HiveShim { case _ => false } - private lazy val serUtilClass = - Utils.classForName("org.apache.hadoop.hive.ql.exec.SerializationUtilities") - private lazy val utilClass = Utils.classForName("org.apache.hadoop.hive.ql.exec.Utilities") - private val deserializeMethodName = "deserializeObjectByKryo" - private val serializeMethodName = "serializeObjectByKryo" - - private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = { - val method = klass.getDeclaredMethod(name, args: _*) - method.setAccessible(true) - method - } - def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = { - val borrowKryo = serUtilClass.getMethod("borrowKryo") - val kryo = borrowKryo.invoke(serUtilClass) - val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName, - kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]]) - try { - deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType] - } finally { - serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) - } + SerializationUtilities.deserializePlan(is, clazz).asInstanceOf[UDFType] } def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = { - val borrowKryo = serUtilClass.getMethod("borrowKryo") - val kryo = borrowKryo.invoke(serUtilClass) - val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName, - kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream]) - try { - serializeObjectByKryo.invoke(null, kryo, function, out) - } finally { - serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo) - } + SerializationUtilities.serializePlan(function, out) } def writeExternal(out: java.io.ObjectOutput): Unit = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 7d4bf7305546c..96c207913d49a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -55,10 +55,8 @@ private[spark] object HiveUtils extends Logging { sc } - private val hiveVersion = HiveVersionInfo.getVersion - /** The version of hive used internally by Spark SQL. */ - val builtinHiveVersion: String = hiveVersion + val builtinHiveVersion: String = HiveVersionInfo.getVersion val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 7fccb72fb913b..c7002853bed54 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive -import java.lang.{Boolean => JBoolean} import java.nio.ByteBuffer import scala.collection.JavaConverters._ @@ -39,7 +38,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.hive.HiveShim._ import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils /** * Here we cannot extends `ImplicitTypeCasts` to compatible with UDF input data type, the reason is: @@ -349,11 +347,7 @@ private[hive] case class HiveUDAFFunction( funcWrapper.createFunction[AbstractGenericUDAFResolver]() } - val clazz = Utils.classForName(classOf[SimpleGenericUDAFParameterInfo].getName) - val ctor = clazz.getDeclaredConstructor( - classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE) - val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE) - val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo] + val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false, false) resolver.getEvaluator(parameterInfo) } From 83f8e13956d5602ff4d37b742da427aa07537c1f Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 12 Oct 2020 10:29:28 +0900 Subject: [PATCH 0212/1009] [SPARK-33086][FOLLOW-UP] Remove unused Optional import from pyspark.resource.profile stub ### What changes were proposed in this pull request? Remove unused `typing.Optional` import from `pyspark.resource.profile` stub. ### Why are the changes needed? Since SPARK-32319 we don't allow unused imports. However, this one slipped both local and CI tests for some reason. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests and mypy check. Closes #30002 from zero323/SPARK-33086-FOLLOWUP. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/resource/profile.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi index d6ea340bb510f..6763baf6590a3 100644 --- a/python/pyspark/resource/profile.pyi +++ b/python/pyspark/resource/profile.pyi @@ -22,7 +22,7 @@ from pyspark.resource.requests import ( # noqa: F401 TaskResourceRequest as TaskResourceRequest, TaskResourceRequests as TaskResourceRequests, ) -from typing import overload, Dict, Optional, Union +from typing import overload, Dict, Union from py4j.java_gateway import JavaObject # type: ignore[import] class ResourceProfile: From c78971b1c7214357a275481a5af468259bcf406f Mon Sep 17 00:00:00 2001 From: Denis Pyshev Date: Sun, 11 Oct 2020 18:51:00 -0700 Subject: [PATCH 0213/1009] [SPARK-33106][BUILD] Fix resolvers clash in SBT ### What changes were proposed in this pull request? Rename manually added resolver for local Ivy repo. Create configuration to publish to local Ivy repo similar to Maven one. Use `publishLocal` to publish both to local Maven and Ivy repos instead of custom task `localPublish` (renamed from `publish-local` of sbt 0.13.x). ### Why are the changes needed? There are two resolvers (bootResolvers's "local" and manually added "local") that point to the same local Ivy repo, but have different configurations, which led to excessive warnings in logs and, potentially, resolution issues. Changeset fixes that case, observable in sbt output as ``` [warn] Multiple resolvers having different access mechanism configured with same name 'local'. To avoid conflict, Remove duplicate project resolvers (`resolvers`) or rename publishing resolve r (`publishTo`). ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Executed `build/sbt`'s `publishLocal` task on individual module and on root project. Closes #30006 from gemelen/feature/local-resolvers. Authored-by: Denis Pyshev Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 6929342d2f539..47369722ba9b2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -108,7 +108,7 @@ object SparkBuild extends PomBuild { override val userPropertiesMap = System.getProperties.asScala.toMap lazy val MavenCompile = config("m2r") extend(Compile) - lazy val publishLocalBoth = TaskKey[Unit]("localPublish", "publish local for m2 and ivy", KeyRanks.ATask) + lazy val SbtCompile = config("sbt") extend(Compile) lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = GenJavadocPlugin.projectSettings ++ Seq( scalacOptions ++= Seq( @@ -213,7 +213,7 @@ object SparkBuild extends PomBuild { "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/", DefaultMavenRepository, Resolver.mavenLocal, - Resolver.file("local", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) + Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) ), externalResolvers := resolvers.value, otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value, @@ -221,9 +221,15 @@ object SparkBuild extends PomBuild { .withResolverName("dotM2") .withArtifacts(packagedArtifacts.value.toVector) .withLogging(ivyLoggingLevel.value), + publishLocalConfiguration in SbtCompile := PublishConfiguration() + .withResolverName("ivyLocal") + .withArtifacts(packagedArtifacts.value.toVector) + .withLogging(ivyLoggingLevel.value), publishMavenStyle in MavenCompile := true, + publishMavenStyle in SbtCompile := false, publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile).value, - publishLocalBoth := Seq(publishLocal in MavenCompile, publishLocal).dependOn.value, + publishLocal in SbtCompile := publishTask(publishLocalConfiguration in SbtCompile).value, + publishLocal := Seq(publishLocal in MavenCompile, publishLocal in SbtCompile).dependOn.value, javacOptions in (Compile, doc) ++= { val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3) From 50b2a497f37c7a51b34dee1c0cb80910687ad4a2 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sun, 11 Oct 2020 20:05:40 -0700 Subject: [PATCH 0214/1009] [SPARK-21708][BUILD][FOLLOWUP] Rename hdpVersion to hadoopVersionValue This PR aims to rename hdpVersion to versionValue. Use the general variable name. No. Pass the CI. Closes #30008 from williamhyun/sbt. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 47369722ba9b2..f20a84451c5c5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -756,12 +756,12 @@ object Assembly { .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String]) }, assemblyJarName in assembly := { - lazy val hdpVersion = hadoopVersion.value + lazy val hadoopVersionValue = hadoopVersion.value if (moduleName.value.contains("streaming-kafka-0-10-assembly") || moduleName.value.contains("streaming-kinesis-asl-assembly")) { s"${moduleName.value}-${version.value}.jar" } else { - s"${moduleName.value}-${version.value}-hadoop${hdpVersion}.jar" + s"${moduleName.value}-${version.value}-hadoop${hadoopVersionValue}.jar" } }, assemblyJarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar", From 4af1ac93846a0dfdcc57ec7604ed51d7787bd6fd Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Mon, 12 Oct 2020 12:24:54 +0900 Subject: [PATCH 0215/1009] [SPARK-32047][SQL] Add JDBC connection provider disable possibility ### What changes were proposed in this pull request? At the moment there is no possibility to turn off JDBC authentication providers which exists on the classpath. This can be problematic because service providers are loaded with service loader. In this PR I've added `spark.sql.sources.disabledJdbcConnProviderList` configuration possibility (default: empty). ### Why are the changes needed? No possibility to turn off JDBC authentication providers. ### Does this PR introduce _any_ user-facing change? Yes, it introduces new configuration option. ### How was this patch tested? * Existing + newly added unit tests. * Existing integration tests. Closes #29964 from gaborgsomogyi/SPARK-32047. Authored-by: Gabor Somogyi Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++ .../jdbc/connection/BasicConnectionProvider.scala | 2 ++ .../jdbc/connection/ConnectionProvider.scala | 7 +++++-- .../jdbc/connection/DB2ConnectionProvider.scala | 2 ++ .../jdbc/connection/MSSQLConnectionProvider.scala | 2 ++ .../connection/MariaDBConnectionProvider.scala | 2 ++ .../connection/OracleConnectionProvider.scala | 2 ++ .../connection/PostgresConnectionProvider.scala | 2 ++ .../spark/sql/jdbc/JdbcConnectionProvider.scala | 6 ++++++ .../jdbc/connection/ConnectionProviderSuite.scala | 15 +++++++++++++-- .../connection/ConnectionProviderSuiteBase.scala | 2 +- .../IntentionallyFaultyConnectionProvider.scala | 1 + 12 files changed, 49 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 18ffc655b2174..8cbdbfe16d2bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2775,6 +2775,15 @@ object SQLConf { .booleanConf .createWithDefault(false) + val DISABLED_JDBC_CONN_PROVIDER_LIST = + buildConf("spark.sql.sources.disabledJdbcConnProviderList") + .internal() + .doc("Configures a list of JDBC connection providers, which are disabled. " + + "The list contains the name of the JDBC connection providers separated by comma.") + .version("3.1.0") + .stringConf + .createWithDefault("") + /** * Holds information about keys that have been deprecated. * @@ -3399,6 +3408,8 @@ class SQLConf extends Serializable with Logging { def truncateTrashEnabled: Boolean = getConf(SQLConf.TRUNCATE_TRASH_ENABLED) + def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala index a5f04649e6628..1c0513f982a1e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala @@ -30,6 +30,8 @@ private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with */ def getAdditionalProperties(options: JDBCOptions): Properties = new Properties() + override val name: String = "basic" + override def canHandle(driver: Driver, options: Map[String, String]): Boolean = { val jdbcOptions = new JDBCOptions(options) jdbcOptions.keytab == null || jdbcOptions.principal == null diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala index 649a0bda4236c..e81add4df960a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala @@ -24,6 +24,7 @@ import scala.collection.mutable import org.apache.spark.internal.Logging import org.apache.spark.security.SecurityConfigurationLock +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.jdbc.JdbcConnectionProvider import org.apache.spark.util.Utils @@ -47,8 +48,10 @@ private[jdbc] object ConnectionProvider extends Logging { logInfo("Loading of the provider failed with the exception:", t) } } - // Seems duplicate but it's needed for Scala 2.13 - providers.toSeq + + val disabledProviders = Utils.stringToSeq(SQLConf.get.disabledJdbcConnectionProviders) + // toSeq seems duplicate but it's needed for Scala 2.13 + providers.filterNot(p => disabledProviders.contains(p.name)).toSeq } def create(driver: Driver, options: Map[String, String]): Connection = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala index ca82cdc561bef..775c3ae4a533a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions private[sql] class DB2ConnectionProvider extends SecureConnectionProvider { override val driverClass = "com.ibm.db2.jcc.DB2Driver" + override val name: String = "db2" + override def appEntry(driver: Driver, options: JDBCOptions): String = "JaasClient" override def getConnection(driver: Driver, options: Map[String, String]): Connection = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala index 4e405b2187e56..e3d3e1a43d510 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala @@ -29,6 +29,8 @@ private[sql] class MSSQLConnectionProvider extends SecureConnectionProvider { override val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver" val parserMethod: String = "parseAndMergeProperties" + override val name: String = "mssql" + override def appEntry(driver: Driver, options: JDBCOptions): String = { val configName = "jaasConfigurationName" val appEntryDefault = "SQLJDBCDriver" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala index d5fe13bf0ca19..29a08d0b5f269 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala @@ -24,6 +24,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions private[jdbc] class MariaDBConnectionProvider extends SecureConnectionProvider { override val driverClass = "org.mariadb.jdbc.Driver" + override val name: String = "mariadb" + override def appEntry(driver: Driver, options: JDBCOptions): String = "Krb5ConnectorContext" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala index 3defda3871765..0d43851bb255e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions private[sql] class OracleConnectionProvider extends SecureConnectionProvider { override val driverClass = "oracle.jdbc.OracleDriver" + override val name: String = "oracle" + override def appEntry(driver: Driver, options: JDBCOptions): String = "kprb5module" override def getConnection(driver: Driver, options: Map[String, String]): Connection = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala index dae8aea81f20a..f26a11e34dc38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala @@ -25,6 +25,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions private[jdbc] class PostgresConnectionProvider extends SecureConnectionProvider { override val driverClass = "org.postgresql.Driver" + override val name: String = "postgres" + override def appEntry(driver: Driver, options: JDBCOptions): String = { val parseURL = driver.getClass.getMethod("parseURL", classOf[String], classOf[Properties]) val properties = parseURL.invoke(driver, options.url, null).asInstanceOf[Properties] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala index caf574b0c2284..1e8abca197025 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala @@ -34,6 +34,12 @@ import org.apache.spark.annotation.{DeveloperApi, Unstable} @DeveloperApi @Unstable abstract class JdbcConnectionProvider { + /** + * Name of the service to provide JDBC connections. This name should be unique. Spark will + * internally use this name to differentiate JDBC connection providers. + */ + val name: String + /** * Checks if this connection provider instance can handle the connection initiated by the driver. * There must be exactly one active connection provider which can handle the connection for a diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala index a48dbdebea7e9..0e9498b2681e2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala @@ -19,8 +19,11 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import javax.security.auth.login.Configuration -class ConnectionProviderSuite extends ConnectionProviderSuiteBase { - test("All built-in provides must be loaded") { +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class ConnectionProviderSuite extends ConnectionProviderSuiteBase with SharedSparkSession { + test("All built-in providers must be loaded") { IntentionallyFaultyConnectionProvider.constructed = false val providers = ConnectionProvider.loadProviders() assert(providers.exists(_.isInstanceOf[BasicConnectionProvider])) @@ -34,6 +37,14 @@ class ConnectionProviderSuite extends ConnectionProviderSuiteBase { assert(providers.size === 6) } + test("Disabled provider must not be loaded") { + withSQLConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST.key -> "db2") { + val providers = ConnectionProvider.loadProviders() + assert(!providers.exists(_.isInstanceOf[DB2ConnectionProvider])) + assert(providers.size === 5) + } + } + test("Multiple security configs must be reachable") { Configuration.setConfiguration(null) val postgresProvider = new PostgresConnectionProvider() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala index be08a3c2f7367..a299841b3c149 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala @@ -42,7 +42,7 @@ abstract class ConnectionProviderSuiteBase extends SparkFunSuite with BeforeAndA JDBCOptions.JDBC_PRINCIPAL -> "principal" )) - override def afterEach(): Unit = { + protected override def afterEach(): Unit = { try { Configuration.setConfiguration(null) } finally { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala index fbefcb91cccde..329d79cae62e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.jdbc.JdbcConnectionProvider private class IntentionallyFaultyConnectionProvider extends JdbcConnectionProvider { IntentionallyFaultyConnectionProvider.constructed = true throw new IllegalArgumentException("Intentional Exception") + override val name: String = "IntentionallyFaultyConnectionProvider" override def canHandle(driver: Driver, options: Map[String, String]): Boolean = true override def getConnection(driver: Driver, options: Map[String, String]): Connection = null } From 543d59dfbffadeb4e11f06d6bbf857f21ac03f73 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 11 Oct 2020 21:54:56 -0700 Subject: [PATCH 0216/1009] [SPARK-33107][BUILD][FOLLOW-UP] Remove com.twitter:parquet-hadoop-bundle:1.6.0 and orc.classifier ### What changes were proposed in this pull request? This pr removes `com.twitter:parquet-hadoop-bundle:1.6.0` and `orc.classifier`. ### Why are the changes needed? To make code more clear and readable. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test. Closes #30005 from wangyum/SPARK-33107. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- examples/pom.xml | 5 ----- pom.xml | 12 ------------ sql/core/pom.xml | 2 -- sql/hive/pom.xml | 5 ----- 4 files changed, 24 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index c5bf2409964bb..8b632cef6d44d 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -109,11 +109,6 @@ scopt_${scala.binary.version} 3.7.1 - - ${hive.parquet.group} - parquet-hadoop-bundle - provided - diff --git a/pom.xml b/pom.xml index b13d5ab81856c..7f678ccf1e4e5 100644 --- a/pom.xml +++ b/pom.xml @@ -137,9 +137,6 @@ 10.12.1.1 1.10.1 1.5.12 - - com.twitter - 1.6.0 9.4.28.v20200408 3.1.0 0.9.5 @@ -236,7 +233,6 @@ --> compile compile - provided 2.7.2 compile compile @@ -2169,7 +2165,6 @@ org.apache.orc orc-core ${orc.version} - ${orc.classifier} ${orc.deps.scope} @@ -2194,7 +2189,6 @@ org.apache.orc orc-mapreduce ${orc.version} - ${orc.classifier} ${orc.deps.scope} @@ -2243,12 +2237,6 @@ ${parquet.version} ${parquet.test.deps.scope} - - ${hive.parquet.group} - parquet-hadoop-bundle - ${hive.parquet.version} - ${hive.parquet.scope} - org.codehaus.janino janino diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0f5d3fd55c15d..3f088e420a9a3 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -88,12 +88,10 @@ org.apache.orc orc-core - ${orc.classifier} org.apache.orc orc-mapreduce - ${orc.classifier} org.apache.hive diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 474c6066ed040..0453094cf8b7b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -35,11 +35,6 @@ - - - ${hive.parquet.group} - parquet-hadoop-bundle - org.apache.spark spark-core_${scala.binary.version} From 9896288b881788660cfaa3f45e90496105889bde Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 12 Oct 2020 00:27:53 -0700 Subject: [PATCH 0217/1009] [SPARK-33117][BUILD] Update zstd-jni to 1.4.5-6 ### What changes were proposed in this pull request? This PR aims to upgrade ZStandard library for Apache Spark 3.1.0. ### Why are the changes needed? This will bring the latest bug fixes. - https://github.com/luben/zstd-jni/commit/2662fbdc320ce482a24c20b8fcac8b1d5b79fe33 - https://github.com/luben/zstd-jni/commit/bbe140b758be2e0ba64566e16d44cafd6e4ba142 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. Closes #30010 from dongjoon-hyun/SPARK-33117. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 979bb1419ce7b..f049ad1f5bb74 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -241,4 +241,4 @@ xmlenc/0.52//xmlenc-0.52.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar -zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar +zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index ebaff6d1977c9..a4dbeb112473a 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -255,4 +255,4 @@ xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar -zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar +zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar diff --git a/pom.xml b/pom.xml index 7f678ccf1e4e5..75b6776cbe470 100644 --- a/pom.xml +++ b/pom.xml @@ -688,7 +688,7 @@ com.github.luben zstd-jni - 1.4.5-4 + 1.4.5-6 com.clearspring.analytics From 78c0967bbe27d3872aa73ff9e6fafb095fd149c1 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 12 Oct 2020 16:54:21 +0900 Subject: [PATCH 0218/1009] [SPARK-33092][SQL] Support subexpression elimination in ProjectExec ### What changes were proposed in this pull request? This patch proposes to add subexpression elimination support into `ProjectExec`. It can be controlled by `spark.sql.subexpressionElimination.enabled` config. Before this change: ```scala val df = spark.read.option("header", true).csv("/tmp/test.csv") df.withColumn("my_map", expr("str_to_map(foo, '&', '=')")).select(col("my_map")("foo"), col("my_map")("bar"), col("my_map")("baz")).debugCodegen ``` L27-40: first `str_to_map`. L68:81: second `str_to_map`. L109-122: third `str_to_map`. ``` /* 024 */ private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException { /* 025 */ boolean project_isNull_0 = true; /* 026 */ UTF8String project_value_0 = null; /* 027 */ boolean project_isNull_1 = true; /* 028 */ MapData project_value_1 = null; /* 029 */ /* 030 */ if (!project_exprIsNull_0_0) { /* 031 */ project_isNull_1 = false; // resultCode could change nullability. /* 032 */ /* 033 */ UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1); /* 034 */ for(UTF8String kvEntry: project_kvs_0) { /* 035 */ UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2); /* 036 */ ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null); /* 037 */ } /* 038 */ project_value_1 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build(); /* 039 */ /* 040 */ } /* 041 */ if (!project_isNull_1) { /* 042 */ project_isNull_0 = false; // resultCode could change nullability. /* 043 */ /* 044 */ final int project_length_0 = project_value_1.numElements(); /* 045 */ final ArrayData project_keys_0 = project_value_1.keyArray(); /* 046 */ final ArrayData project_values_0 = project_value_1.valueArray(); /* 047 */ /* 048 */ int project_index_0 = 0; /* 049 */ boolean project_found_0 = false; /* 050 */ while (project_index_0 < project_length_0 && !project_found_0) { /* 051 */ final UTF8String project_key_0 = project_keys_0.getUTF8String(project_index_0); /* 052 */ if (project_key_0.equals(((UTF8String) references[3] /* literal */))) { /* 053 */ project_found_0 = true; /* 054 */ } else { /* 055 */ project_index_0++; /* 056 */ } /* 057 */ } /* 058 */ /* 059 */ if (!project_found_0 || project_values_0.isNullAt(project_index_0)) { /* 060 */ project_isNull_0 = true; /* 061 */ } else { /* 062 */ project_value_0 = project_values_0.getUTF8String(project_index_0); /* 063 */ } /* 064 */ /* 065 */ } /* 066 */ boolean project_isNull_6 = true; /* 067 */ UTF8String project_value_6 = null; /* 068 */ boolean project_isNull_7 = true; /* 069 */ MapData project_value_7 = null; /* 070 */ /* 071 */ if (!project_exprIsNull_0_0) { /* 072 */ project_isNull_7 = false; // resultCode could change nullability. /* 073 */ /* 074 */ UTF8String[] project_kvs_1 = project_expr_0_0.split(((UTF8String) references[5] /* literal */), -1); /* 075 */ for(UTF8String kvEntry: project_kvs_1) { /* 076 */ UTF8String[] kv = kvEntry.split(((UTF8String) references[6] /* literal */), 2); /* 077 */ ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[4] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null); /* 078 */ } /* 079 */ project_value_7 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[4] /* mapBuilder */).build(); /* 080 */ /* 081 */ } /* 082 */ if (!project_isNull_7) { /* 083 */ project_isNull_6 = false; // resultCode could change nullability. /* 084 */ /* 085 */ final int project_length_1 = project_value_7.numElements(); /* 086 */ final ArrayData project_keys_1 = project_value_7.keyArray(); /* 087 */ final ArrayData project_values_1 = project_value_7.valueArray(); /* 088 */ /* 089 */ int project_index_1 = 0; /* 090 */ boolean project_found_1 = false; /* 091 */ while (project_index_1 < project_length_1 && !project_found_1) { /* 092 */ final UTF8String project_key_1 = project_keys_1.getUTF8String(project_index_1); /* 093 */ if (project_key_1.equals(((UTF8String) references[7] /* literal */))) { /* 094 */ project_found_1 = true; /* 095 */ } else { /* 096 */ project_index_1++; /* 097 */ } /* 098 */ } /* 099 */ /* 100 */ if (!project_found_1 || project_values_1.isNullAt(project_index_1)) { /* 101 */ project_isNull_6 = true; /* 102 */ } else { /* 103 */ project_value_6 = project_values_1.getUTF8String(project_index_1); /* 104 */ } /* 105 */ /* 106 */ } /* 107 */ boolean project_isNull_12 = true; /* 108 */ UTF8String project_value_12 = null; /* 109 */ boolean project_isNull_13 = true; /* 110 */ MapData project_value_13 = null; /* 111 */ /* 112 */ if (!project_exprIsNull_0_0) { /* 113 */ project_isNull_13 = false; // resultCode could change nullability. /* 114 */ /* 115 */ UTF8String[] project_kvs_2 = project_expr_0_0.split(((UTF8String) references[9] /* literal */), -1); /* 116 */ for(UTF8String kvEntry: project_kvs_2) { /* 117 */ UTF8String[] kv = kvEntry.split(((UTF8String) references[10] /* literal */), 2); /* 118 */ ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[8] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null); /* 119 */ } /* 120 */ project_value_13 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[8] /* mapBuilder */).build(); /* 121 */ /* 122 */ } ... ``` After this change: L27-40 evaluates the common map variable. ``` /* 024 */ private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException { /* 025 */ // common sub-expressions /* 026 */ /* 027 */ boolean project_isNull_0 = true; /* 028 */ MapData project_value_0 = null; /* 029 */ /* 030 */ if (!project_exprIsNull_0_0) { /* 031 */ project_isNull_0 = false; // resultCode could change nullability. /* 032 */ /* 033 */ UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1); /* 034 */ for(UTF8String kvEntry: project_kvs_0) { /* 035 */ UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2); /* 036 */ ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null); /* 037 */ } /* 038 */ project_value_0 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build(); /* 039 */ /* 040 */ } /* 041 */ /* 042 */ boolean project_isNull_4 = true; /* 043 */ UTF8String project_value_4 = null; /* 044 */ /* 045 */ if (!project_isNull_0) { /* 046 */ project_isNull_4 = false; // resultCode could change nullability. /* 047 */ /* 048 */ final int project_length_0 = project_value_0.numElements(); /* 049 */ final ArrayData project_keys_0 = project_value_0.keyArray(); /* 050 */ final ArrayData project_values_0 = project_value_0.valueArray(); /* 051 */ /* 052 */ int project_index_0 = 0; /* 053 */ boolean project_found_0 = false; /* 054 */ while (project_index_0 < project_length_0 && !project_found_0) { /* 055 */ final UTF8String project_key_0 = project_keys_0.getUTF8String(project_index_0); /* 056 */ if (project_key_0.equals(((UTF8String) references[3] /* literal */))) { /* 057 */ project_found_0 = true; /* 058 */ } else { /* 059 */ project_index_0++; /* 060 */ } /* 061 */ } /* 062 */ /* 063 */ if (!project_found_0 || project_values_0.isNullAt(project_index_0)) { /* 064 */ project_isNull_4 = true; /* 065 */ } else { /* 066 */ project_value_4 = project_values_0.getUTF8String(project_index_0); /* 067 */ } /* 068 */ /* 069 */ } /* 070 */ boolean project_isNull_6 = true; /* 071 */ UTF8String project_value_6 = null; /* 072 */ /* 073 */ if (!project_isNull_0) { /* 074 */ project_isNull_6 = false; // resultCode could change nullability. /* 075 */ /* 076 */ final int project_length_1 = project_value_0.numElements(); /* 077 */ final ArrayData project_keys_1 = project_value_0.keyArray(); /* 078 */ final ArrayData project_values_1 = project_value_0.valueArray(); /* 079 */ /* 080 */ int project_index_1 = 0; /* 081 */ boolean project_found_1 = false; /* 082 */ while (project_index_1 < project_length_1 && !project_found_1) { /* 083 */ final UTF8String project_key_1 = project_keys_1.getUTF8String(project_index_1); /* 084 */ if (project_key_1.equals(((UTF8String) references[4] /* literal */))) { /* 085 */ project_found_1 = true; /* 086 */ } else { /* 087 */ project_index_1++; /* 088 */ } /* 089 */ } /* 090 */ /* 091 */ if (!project_found_1 || project_values_1.isNullAt(project_index_1)) { /* 092 */ project_isNull_6 = true; /* 093 */ } else { /* 094 */ project_value_6 = project_values_1.getUTF8String(project_index_1); /* 095 */ } /* 096 */ /* 097 */ } /* 098 */ boolean project_isNull_8 = true; /* 099 */ UTF8String project_value_8 = null; /* 100 */ ... ``` When the code is split into separated method: ``` /* 026 */ private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException { /* 027 */ // common sub-expressions /* 028 */ /* 029 */ MapData project_subExprValue_0 = project_subExpr_0(project_exprIsNull_0_0, project_expr_0_0); /* 030 */ ... /* 140 */ private MapData project_subExpr_0(boolean project_exprIsNull_0_0, org.apache.spark.unsafe.types.UTF8String project_expr_0_0) { /* 141 */ boolean project_isNull_0 = true; /* 142 */ MapData project_value_0 = null; /* 143 */ /* 144 */ if (!project_exprIsNull_0_0) { /* 145 */ project_isNull_0 = false; // resultCode could change nullability. /* 146 */ /* 147 */ UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1); /* 148 */ for(UTF8String kvEntry: project_kvs_0) { /* 149 */ UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2); /* 150 */ ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null); /* 151 */ } /* 152 */ project_value_0 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build(); /* 153 */ /* 154 */ } /* 155 */ project_subExprIsNull_0 = project_isNull_0; /* 156 */ return project_value_0; /* 157 */ } ``` ### Why are the changes needed? Users occasionally write repeated expression in projection. It is also possibly that query optimizer optimizes a query to evaluate same expression many times in a Project. Currently in ProjectExec, we don't support subexpression elimination in Whole-stage codegen. We can support it to reduce redundant evaluation. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `spark.sql.subexpressionElimination.enabled` is enabled by default. So that's said we should pass all tests with this change. Closes #29975 from viirya/SPARK-33092. Authored-by: Liang-Chi Hsieh Signed-off-by: Takeshi Yamamuro --- .../expressions/codegen/CodeGenerator.scala | 55 ++++++++++++++----- .../aggregate/HashAggregateExec.scala | 2 +- .../execution/basicPhysicalOperators.scala | 15 ++++- .../sql/connector/DataSourceV2Suite.scala | 2 +- 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 71d36733464f6..9a26c388f59af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -90,8 +90,13 @@ case class SubExprEliminationState(isNull: ExprValue, value: ExprValue) * @param codes Strings representing the codes that evaluate common subexpressions. * @param states Foreach expression that is participating in subexpression elimination, * the state to use. + * @param exprCodesNeedEvaluate Some expression codes that need to be evaluated before + * calling common subexpressions. */ -case class SubExprCodes(codes: Seq[String], states: Map[Expression, SubExprEliminationState]) +case class SubExprCodes( + codes: Seq[String], + states: Map[Expression, SubExprEliminationState], + exprCodesNeedEvaluate: Seq[ExprCode]) /** * The main information about a new added function. @@ -1044,7 +1049,7 @@ class CodegenContext extends Logging { // Get all the expressions that appear at least twice and set up the state for subexpression // elimination. val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1) - val commonExprVals = commonExprs.map(_.head.genCode(this)) + lazy val commonExprVals = commonExprs.map(_.head.genCode(this)) lazy val nonSplitExprCode = { commonExprs.zip(commonExprVals).map { case (exprs, eval) => @@ -1055,10 +1060,17 @@ class CodegenContext extends Logging { } } - val codes = if (commonExprVals.map(_.code.length).sum > SQLConf.get.methodSplitThreshold) { - val inputVarsForAllFuncs = commonExprs.map { expr => - getLocalInputVariableValues(this, expr.head).toSeq - } + // For some operators, they do not require all its child's outputs to be evaluated in advance. + // Instead it only early evaluates part of outputs, for example, `ProjectExec` only early + // evaluate the outputs used more than twice. So we need to extract these variables used by + // subexpressions and evaluate them before subexpressions. + val (inputVarsForAllFuncs, exprCodesNeedEvaluate) = commonExprs.map { expr => + val (inputVars, exprCodes) = getLocalInputVariableValues(this, expr.head) + (inputVars.toSeq, exprCodes.toSeq) + }.unzip + + val splitThreshold = SQLConf.get.methodSplitThreshold + val codes = if (commonExprVals.map(_.code.length).sum > splitThreshold) { if (inputVarsForAllFuncs.map(calculateParamLengthFromExprValues).forall(isValidParamLength)) { commonExprs.zipWithIndex.map { case (exprs, i) => val expr = exprs.head @@ -1109,7 +1121,7 @@ class CodegenContext extends Logging { } else { nonSplitExprCode } - SubExprCodes(codes, localSubExprEliminationExprs.toMap) + SubExprCodes(codes, localSubExprEliminationExprs.toMap, exprCodesNeedEvaluate.flatten) } /** @@ -1732,15 +1744,23 @@ object CodeGenerator extends Logging { } /** - * Extracts all the input variables from references and subexpression elimination states - * for a given `expr`. This result will be used to split the generated code of - * expressions into multiple functions. + * This methods returns two values in a Tuple. + * + * First value: Extracts all the input variables from references and subexpression + * elimination states for a given `expr`. This result will be used to split the + * generated code of expressions into multiple functions. + * + * Second value: Returns the set of `ExprCodes`s which are necessary codes before + * evaluating subexpressions. */ def getLocalInputVariableValues( ctx: CodegenContext, expr: Expression, - subExprs: Map[Expression, SubExprEliminationState] = Map.empty): Set[VariableValue] = { + subExprs: Map[Expression, SubExprEliminationState] = Map.empty) + : (Set[VariableValue], Set[ExprCode]) = { val argSet = mutable.Set[VariableValue]() + val exprCodesNeedEvaluate = mutable.Set[ExprCode]() + if (ctx.INPUT_ROW != null) { argSet += JavaCode.variable(ctx.INPUT_ROW, classOf[InternalRow]) } @@ -1761,16 +1781,21 @@ object CodeGenerator extends Logging { case ref: BoundReference if ctx.currentVars != null && ctx.currentVars(ref.ordinal) != null => - val ExprCode(_, isNull, value) = ctx.currentVars(ref.ordinal) - collectLocalVariable(value) - collectLocalVariable(isNull) + val exprCode = ctx.currentVars(ref.ordinal) + // If the referred variable is not evaluated yet. + if (exprCode.code != EmptyBlock) { + exprCodesNeedEvaluate += exprCode.copy() + exprCode.code = EmptyBlock + } + collectLocalVariable(exprCode.value) + collectLocalVariable(exprCode.isNull) case e => stack.pushAll(e.children) } } - argSet.toSet + (argSet.toSet, exprCodesNeedEvaluate.toSet) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index dcb465707a0ed..52d0450afb181 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -263,7 +263,7 @@ case class HashAggregateExec( } else { val inputVars = aggBufferUpdatingExprs.map { aggExprsForOneFunc => val inputVarsForOneFunc = aggExprsForOneFunc.map( - CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)).reduce(_ ++ _).toSeq + CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)._1).reduce(_ ++ _).toSeq val paramLength = CodeGenerator.calculateParamLengthFromExprValues(inputVarsForOneFunc) // Checks if a parameter length for the `aggExprsForOneFunc` does not go over the JVM limit diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 1f70fde3f7654..7334ea1e27284 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -66,10 +66,23 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan) override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { val exprs = bindReferences[Expression](projectList, child.output) - val resultVars = exprs.map(_.genCode(ctx)) + val (subExprsCode, resultVars, localValInputs) = if (conf.subexpressionEliminationEnabled) { + // subexpression elimination + val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(exprs) + val genVars = ctx.withSubExprEliminationExprs(subExprs.states) { + exprs.map(_.genCode(ctx)) + } + (subExprs.codes.mkString("\n"), genVars, subExprs.exprCodesNeedEvaluate) + } else { + ("", exprs.map(_.genCode(ctx)), Seq.empty) + } + // Evaluation of non-deterministic expressions can't be deferred. val nonDeterministicAttrs = projectList.filterNot(_.deterministic).map(_.toAttribute) s""" + |// common sub-expressions + |${evaluateVariables(localValInputs)} + |$subExprsCode |${evaluateRequiredVariables(output, resultVars, AttributeSet(nonDeterministicAttrs))} |${consume(ctx, resultVars)} """.stripMargin diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index a9c521eb46499..ec1ac00d08bf8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -268,7 +268,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS } } // this input data will fail to read middle way. - val input = spark.range(10).select(failingUdf('id).as('i)).select('i, -'i as 'j) + val input = spark.range(15).select(failingUdf('id).as('i)).select('i, -'i as 'j) val e3 = intercept[SparkException] { input.write.format(cls.getName).option("path", path).mode("overwrite").save() } From a0e324460e5d05cc8beeba5b1b0d1887b71254ea Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 12 Oct 2020 22:54:31 +0900 Subject: [PATCH 0219/1009] [SPARK-32704][SQL][FOLLOWUP] Corrects version values of plan logging configs in SQLConf ### What changes were proposed in this pull request? This PR intends to correct version values (`3.0.0` -> `3.1.0`) of three configs below in `SQLConf`: - spark.sql.planChangeLog.level - spark.sql.planChangeLog.rules - spark.sql.planChangeLog.batches This PR comes from https://github.com/apache/spark/pull/29544#discussion_r503049350. ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #30015 from maropu/pr29544-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 8cbdbfe16d2bc..99c10b38c53b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -221,7 +221,7 @@ object SQLConf { .doc("Configures the log level for logging the change from the original plan to the new " + "plan after a rule or batch is applied. The value can be 'trace', 'debug', 'info', " + "'warn', or 'error'. The default log level is 'trace'.") - .version("3.0.0") + .version("3.1.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel), @@ -233,7 +233,7 @@ object SQLConf { .internal() .doc("Configures a list of rules for logging plan changes, in which the rules are " + "specified by their rule names and separated by comma.") - .version("3.0.0") + .version("3.1.0") .stringConf .createOptional @@ -241,7 +241,7 @@ object SQLConf { .internal() .doc("Configures a list of batches for logging plan changes, in which the batches " + "are specified by their batch names and separated by comma.") - .version("3.0.0") + .version("3.1.0") .stringConf .createOptional From ed2fe8d80635014681ec18b29e33e6ecfaf883d7 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Mon, 12 Oct 2020 09:01:03 -0500 Subject: [PATCH 0220/1009] [SPARK-33111][ML] aft transform optimization ### What changes were proposed in this pull request? 1, when `predictionCol` and `quantilesCol` are both set, we only need one prediction for each row: prediction is just the variable `lambda` in `predictQuantiles`; 2, in the computation of variable `quantiles` in `predictQuantiles`, a pre-computed vector `val baseQuantiles = $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale))` can be reused for each row; ### Why are the changes needed? avoid redundant computation in transform, like what we did in `ProbabilisticClassificationModel`, `GaussianMixtureModel`, etc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuite Closes #30000 from zhengruifeng/aft_predict_transform_opt. Authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../spark/ml/regression/AFTSurvivalRegression.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index f301c349a2dc7..595a2f0e742df 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -421,9 +421,17 @@ class AFTSurvivalRegressionModel private[ml] ( } if (hasQuantilesCol) { - val predictQuantilesUDF = udf { features: Vector => predictQuantiles(features)} + val baseQuantiles = $(quantileProbabilities) + .map(q => math.exp(math.log(-math.log1p(-q)) * scale)) + val lambdaCol = if ($(predictionCol).nonEmpty) { + predictionColumns.head + } else { + udf { features: Vector => predict(features) }.apply(col($(featuresCol))) + } + val predictQuantilesUDF = + udf { lambda: Double => Vectors.dense(baseQuantiles.map(q => q * lambda)) } predictionColNames :+= $(quantilesCol) - predictionColumns :+= predictQuantilesUDF(col($(featuresCol))) + predictionColumns :+= predictQuantilesUDF(lambdaCol) .as($(quantilesCol), outputSchema($(quantilesCol)).metadata) } From b27a287ff293c02dcad0c45cca71a5244664d7f5 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Mon, 12 Oct 2020 14:48:40 +0000 Subject: [PATCH 0221/1009] [SPARK-33016][SQL] Potential SQLMetrics missed which might cause WEB UI display issue while AQE is on ### What changes were proposed in this pull request? With following scenario when AQE is on, SQLMetrics could be incorrect. 1. Stage A and B are created, and UI updated thru event onAdaptiveExecutionUpdate. 2. Stage A and B are running. Subquery in stage A keep updating metrics thru event onAdaptiveSQLMetricUpdate. 3. Stage B completes, while stage A's subquery is still running, updating metrics. 4. Completion of stage B triggers new stage creation and UI update thru event onAdaptiveExecutionUpdate again (just like step 1). So decided to make a trade off of keeping more duplicate SQLMetrics without deleting them when AQE with newPlan updated. ### Why are the changes needed? Make SQLMetrics behavior 100% correct. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Updated SQLAppStatusListenerSuite. Closes #29965 from leanken/leanken-SPARK-33016. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- .../sql/execution/ui/SQLAppStatusListener.scala | 4 ++-- .../execution/ui/SQLAppStatusListenerSuite.scala | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala index 175340d2dfaa7..963aec7ca36c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala @@ -341,7 +341,7 @@ class SQLAppStatusListener( val exec = getOrCreateExecution(executionId) exec.physicalPlanDescription = physicalPlanDescription - exec.metrics = sqlPlanMetrics + exec.metrics ++= sqlPlanMetrics update(exec) } @@ -349,7 +349,7 @@ class SQLAppStatusListener( val SparkListenerSQLAdaptiveSQLMetricUpdates(executionId, sqlPlanMetrics) = event val exec = getOrCreateExecution(executionId) - exec.metrics = exec.metrics ++ sqlPlanMetrics + exec.metrics ++= sqlPlanMetrics update(exec) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index f49a3a384b450..00f23718a0e9e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -680,7 +680,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils assert(sparkPlanInfo.nodeName === "WholeStageCodegen (2)") } - test("SPARK-32615: SQLMetrics validation after sparkPlanInfo updated in AQE") { + test("SPARK-32615,SPARK-33016: SQLMetrics validation after sparkPlanInfo updated in AQE") { val statusStore = createStatusStore() val listener = statusStore.listener.get @@ -755,7 +755,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils .allNodes.flatMap(_.metrics.map(_.accumulatorId)) // Assume that AQE update sparkPlanInfo with newPlan - // ExecutionMetrics will be replaced using newPlan's SQLMetrics + // ExecutionMetrics will be appended using newPlan's SQLMetrics listener.onOtherEvent(SparkListenerSQLAdaptiveExecutionUpdate( executionId, "test", @@ -770,8 +770,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(1, 0))) listener.onTaskStart(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0))) - // live metrics will be override, and ExecutionMetrics should be empty as the newPlan updated. - assert(statusStore.executionMetrics(executionId).isEmpty) + // historical metrics will be kept despite of the newPlan updated. + assert(statusStore.executionMetrics(executionId).size == 2) // update new metrics with Id 4 & 5, since 3 is timing metrics, // timing metrics has a complicated string presentation so we don't test it here. @@ -780,9 +780,9 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils (0L, 1, 0, createAccumulatorInfos(newMetricsValueMap)) ))) - assert(statusStore.executionMetrics(executionId).size == 2) + assert(statusStore.executionMetrics(executionId).size == 4) statusStore.executionMetrics(executionId).foreach { m => - assert(m._2 == "500") + assert(m._2 == "100" || m._2 == "500") } listener.onTaskEnd(SparkListenerTaskEnd( @@ -802,10 +802,10 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils JobSucceeded )) - // aggregateMetrics should ignore metrics from job 0 + // aggregateMetrics should contains all metrics from job 0 and job 1 val aggregateMetrics = listener.liveExecutionMetrics(executionId) if (aggregateMetrics.isDefined) { - oldAccumulatorIds.foreach(id => assert(!aggregateMetrics.get.contains(id))) + assert(aggregateMetrics.get.keySet.size == 4) } listener.onOtherEvent(SparkListenerSQLExecutionEnd( From 819f12ee2fe3cce0c59221c2b02831274c769b23 Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 12 Oct 2020 14:18:34 -0700 Subject: [PATCH 0222/1009] [SPARK-33118][SQL] CREATE TEMPORARY TABLE fails with location ### What changes were proposed in this pull request? We have a problem when you use CREATE TEMPORARY TABLE with LOCATION ```scala spark.range(3).write.parquet("/tmp/testspark1") sql("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/tmp/testspark1')") sql("CREATE TEMPORARY TABLE t USING parquet LOCATION '/tmp/testspark1'") ``` ```scala org.apache.spark.sql.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.; at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$12(DataSource.scala:200) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:200) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:408) at org.apache.spark.sql.execution.datasources.CreateTempViewUsing.run(ddl.scala:94) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616) at org.apache.spark.sql.Dataset.(Dataset.scala:229) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602) ``` This bug was introduced by SPARK-30507. sparksqlparser --> visitCreateTable --> visitCreateTableClauses --> cleanTableOptions extract the path from the options but in this case CreateTempViewUsing need the path in the options map. ### Why are the changes needed? To fix the problem ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit testing and manual testing Closes #30014 from planga82/bugfix/SPARK-33118_create_temp_table_location. Authored-by: Pablo Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/execution/SparkSqlParser.scala | 6 ++++-- .../spark/sql/execution/SparkSqlParserSuite.scala | 11 ++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 11934c934e316..0a5f4c3ed4bcb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -262,7 +262,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx) } - val (_, _, _, options, _, _) = visitCreateTableClauses(ctx.createTableClauses()) + val (_, _, _, options, location, _) = visitCreateTableClauses(ctx.createTableClauses()) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse( throw new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx)) val schema = Option(ctx.colTypeList()).map(createSchema) @@ -271,7 +271,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { "CREATE TEMPORARY VIEW ... USING ... instead") val table = tableIdentifier(ident, "CREATE TEMPORARY VIEW", ctx) - CreateTempViewUsing(table, schema, replace = false, global = false, provider, options) + val optionsWithLocation = location.map(l => options + ("path" -> l)).getOrElse(options) + CreateTempViewUsing(table, schema, replace = false, global = false, provider, + optionsWithLocation) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index af9088003f3b0..5e6808eeba0f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, SortOrder} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource} +import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, RefreshResource} import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, StaticSQLConf} import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType} @@ -160,6 +160,15 @@ class SparkSqlParserSuite extends AnalysisTest { intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements") } + test("SPARK-33118 CREATE TMEPORARY TABLE with LOCATION") { + assertEqual("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/data/tmp/testspark1')", + CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet", + Map("path" -> "/data/tmp/testspark1"))) + assertEqual("CREATE TEMPORARY TABLE t USING parquet LOCATION '/data/tmp/testspark1'", + CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet", + Map("path" -> "/data/tmp/testspark1"))) + } + private def createTableUsing( table: String, database: Option[String] = None, From 86d26b46a53acf52b85ac990059be9e5a3ec0318 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Tue, 13 Oct 2020 13:09:40 +0800 Subject: [PATCH 0223/1009] [SPARK-32455][ML][FOLLOW-UP] LogisticRegressionModel prediction optimization - fix incorrect initialization ### What changes were proposed in this pull request? use `lazy array` instead of `var` for auxiliary variables in binary lor ### Why are the changes needed? In https://github.com/apache/spark/pull/29255, I made a mistake: the `private var _threshold` and `_rawThreshold` are initialized by defaut values of `threshold`, that is beacuse: 1, param `threshold` is set default value at first; 2, `_threshold` and `_rawThreshold` are initialized based on the default value; 3, param `threshold` is updated by the value from estimator, by `copyValues` method: ``` if (map.contains(param) && to.hasParam(param.name)) { to.set(param.name, map(param)) } ``` We can update `_threshold` and `_rawThreshold` in `setThreshold` and `setThresholds`, but we can not update them in `set`/`copyValues` so their values are kept until methods `setThreshold` and `setThresholds` are called. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? test in repl Closes #30013 from zhengruifeng/lor_threshold_init. Authored-by: zhengruifeng Signed-off-by: zhengruifeng --- .../classification/LogisticRegression.scala | 26 +++++++++++-------- .../LogisticRegressionSuite.scala | 3 +-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 4d763cbd29d3c..a43ad466a7c80 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -1100,20 +1100,24 @@ class LogisticRegressionModel private[spark] ( private lazy val _intercept = interceptVector(0) private lazy val _interceptVector = interceptVector.toDense - private var _threshold = Double.NaN - private var _rawThreshold = Double.NaN - - updateBinaryThreshold() + private lazy val _binaryThresholdArray = { + val array = Array(Double.NaN, Double.NaN) + updateBinaryThresholds(array) + array + } + private def _threshold: Double = _binaryThresholdArray(0) + private def _rawThreshold: Double = _binaryThresholdArray(1) - private def updateBinaryThreshold(): Unit = { + private def updateBinaryThresholds(array: Array[Double]): Unit = { if (!isMultinomial) { - _threshold = getThreshold + val _threshold = getThreshold + array(0) = _threshold if (_threshold == 0.0) { - _rawThreshold = Double.NegativeInfinity + array(1) = Double.NegativeInfinity } else if (_threshold == 1.0) { - _rawThreshold = Double.PositiveInfinity + array(1) = Double.PositiveInfinity } else { - _rawThreshold = math.log(_threshold / (1.0 - _threshold)) + array(1) = math.log(_threshold / (1.0 - _threshold)) } } } @@ -1121,7 +1125,7 @@ class LogisticRegressionModel private[spark] ( @Since("1.5.0") override def setThreshold(value: Double): this.type = { super.setThreshold(value) - updateBinaryThreshold() + updateBinaryThresholds(_binaryThresholdArray) this } @@ -1131,7 +1135,7 @@ class LogisticRegressionModel private[spark] ( @Since("1.5.0") override def setThresholds(value: Array[Double]): this.type = { super.setThresholds(value) - updateBinaryThreshold() + updateBinaryThresholds(_binaryThresholdArray) this } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 56eadff6df078..51a6ae3c7e49b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -400,10 +400,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { } test("thresholds prediction") { - val blr = new LogisticRegression().setFamily("binomial") + val blr = new LogisticRegression().setFamily("binomial").setThreshold(1.0) val binaryModel = blr.fit(smallBinaryDataset) - binaryModel.setThreshold(1.0) testTransformer[(Double, Vector)](smallBinaryDataset.toDF(), binaryModel, "prediction") { row => assert(row.getDouble(0) === 0.0) } From e34f2d8df222056e9c2195dec6138fa1af9ca4e1 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 13 Oct 2020 17:41:55 +0900 Subject: [PATCH 0224/1009] [SPARK-33119][SQL] ScalarSubquery should returns the first two rows to avoid Driver OOM ### What changes were proposed in this pull request? `ScalarSubquery` should returns the first two rows. ### Why are the changes needed? To avoid Driver OOM. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test: https://github.com/apache/spark/blob/d6f3138352042e33a2291e11c325b8eadb8dd5f2/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala#L147-L154 Closes #30016 from wangyum/SPARK-33119. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../main/scala/org/apache/spark/sql/execution/subquery.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 1a6b99a455bf7..14cc76f0dbb78 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -80,7 +80,8 @@ case class ScalarSubquery( @volatile private var updated: Boolean = false def updateResult(): Unit = { - val rows = plan.executeCollect() + // Only return the first two rows as an array to avoid Driver OOM. + val rows = plan.executeTake(2) if (rows.length > 1) { sys.error(s"more than one row returned by a subquery used as an expression:\n$plan") } From 17eebd72097ee65e22cdaddf375e868074251f5a Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Tue, 13 Oct 2020 20:11:04 +0900 Subject: [PATCH 0225/1009] [SPARK-32295][SQL] Add not null and size > 0 filters before inner explode/inline to benefit from predicate pushdown ### What changes were proposed in this pull request? Add `And(IsNotNull(e), GreaterThan(Size(e), Literal(0)))` filter before Explode, PosExplode and Inline, when `outer = false`. Removed unused `InferFiltersFromConstraints` from `operatorOptimizationRuleSet` to avoid confusion that happened during the review process. ### Why are the changes needed? Predicate pushdown will be able to move this new filter down through joins and into data sources for performance improvement. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #29092 from tanelk/SPARK-32295. Lead-authored-by: tanel.kiis@gmail.com Co-authored-by: Tanel Kiis Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/optimizer/Optimizer.scala | 43 +++++++++-- .../InferFiltersFromGenerateSuite.scala | 75 +++++++++++++++++++ 2 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5bdaa504a3beb..7586bdf4392f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -79,7 +79,6 @@ abstract class Optimizer(catalogManager: CatalogManager) PushLeftSemiLeftAntiThroughJoin, LimitPushDown, ColumnPruning, - InferFiltersFromConstraints, // Operator combine CollapseRepartition, CollapseProject, @@ -117,14 +116,13 @@ abstract class Optimizer(catalogManager: CatalogManager) extendedOperatorOptimizationRules val operatorOptimizationBatch: Seq[Batch] = { - val rulesWithoutInferFiltersFromConstraints = - operatorOptimizationRuleSet.filterNot(_ == InferFiltersFromConstraints) Batch("Operator Optimization before Inferring Filters", fixedPoint, - rulesWithoutInferFiltersFromConstraints: _*) :: + operatorOptimizationRuleSet: _*) :: Batch("Infer Filters", Once, + InferFiltersFromGenerate, InferFiltersFromConstraints) :: Batch("Operator Optimization after Inferring Filters", fixedPoint, - rulesWithoutInferFiltersFromConstraints: _*) :: + operatorOptimizationRuleSet: _*) :: // Set strategy to Once to avoid pushing filter every time because we do not change the // join condition. Batch("Push extra predicate through join", fixedPoint, @@ -868,6 +866,41 @@ object TransposeWindow extends Rule[LogicalPlan] { } } +/** + * Infers filters from [[Generate]], such that rows that would have been removed + * by this [[Generate]] can be removed earlier - before joins and in data sources. + */ +object InferFiltersFromGenerate extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + // This rule does not infer filters from foldable expressions to avoid constant filters + // like 'size([1, 2, 3]) > 0'. These do not show up in child's constraints and + // then the idempotence will break. + case generate @ Generate(e, _, _, _, _, _) + if !e.deterministic || e.children.forall(_.foldable) => generate + + case generate @ Generate(g, _, false, _, _, _) if canInferFilters(g) => + // Exclude child's constraints to guarantee idempotency + val inferredFilters = ExpressionSet( + Seq( + GreaterThan(Size(g.children.head), Literal(0)), + IsNotNull(g.children.head) + ) + ) -- generate.child.constraints + + if (inferredFilters.nonEmpty) { + generate.copy(child = Filter(inferredFilters.reduce(And), generate.child)) + } else { + generate + } + } + + private def canInferFilters(g: Generator): Boolean = g match { + case _: ExplodeBase => true + case _: Inline => true + case _ => false + } +} + /** * Generate a list of additional filters from an operator's existing constraint but remove those * that are either already part of the operator's condition or are part of the operator's child diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala new file mode 100644 index 0000000000000..3f83971aa9821 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} + +class InferFiltersFromGenerateSuite extends PlanTest { + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("Infer Filters", Once, InferFiltersFromGenerate) :: Nil + } + + val testRelation = LocalRelation('a.array(StructType(Seq( + StructField("x", IntegerType), + StructField("y", IntegerType) + )))) + + Seq(Explode(_), PosExplode(_), Inline(_)).foreach { f => + val generator = f('a) + test("Infer filters from " + generator) { + val originalQuery = testRelation.generate(generator).analyze + val correctAnswer = testRelation + .where(IsNotNull('a) && Size('a) > 0) + .generate(generator) + .analyze + val optimized = Optimize.execute(originalQuery) + comparePlans(optimized, correctAnswer) + } + + test("Don't infer duplicate filters from " + generator) { + val originalQuery = testRelation + .where(IsNotNull('a) && Size('a) > 0) + .generate(generator) + .analyze + val optimized = Optimize.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + + test("Don't infer filters from outer " + generator) { + val originalQuery = testRelation.generate(generator, outer = true).analyze + val optimized = Optimize.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + + val foldableExplode = f(CreateArray(Seq( + CreateStruct(Seq(Literal(0), Literal(1))), + CreateStruct(Seq(Literal(2), Literal(3))) + ))) + test("Don't infer filters from " + foldableExplode) { + val originalQuery = testRelation.generate(foldableExplode).analyze + val optimized = Optimize.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + } +} From 1b0875b6924b4f29aa3cdecc26f8103fcae3dc55 Mon Sep 17 00:00:00 2001 From: Denis Pyshev Date: Tue, 13 Oct 2020 21:37:26 +0900 Subject: [PATCH 0226/1009] [SPARK-33115][BUILD][DOCS] Fix javadoc errors in `kvstore` and `unsafe` modules ### What changes were proposed in this pull request? Fix Javadoc generation errors in `kvstore` and `unsafe` modules according to error message hints. ### Why are the changes needed? Fixes `doc` task failures which prevented other tasks successful executions (eg `publishLocal` task depends on `doc` task). ### Does this PR introduce _any_ user-facing change? No. Meaning of text in Javadoc is stayed the same. ### How was this patch tested? Run `build/sbt kvstore/Compile/doc`, `build/sbt unsafe/Compile/doc` and `build/sbt doc` without errors. Closes #30007 from gemelen/feature/doc-task-fix. Authored-by: Denis Pyshev Signed-off-by: HyukjinKwon --- .../java/org/apache/spark/util/kvstore/InMemoryStore.java | 5 +++-- .../main/java/org/apache/spark/unsafe/types/UTF8String.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java index 42e090bc83ed1..431c7e42774e4 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java @@ -164,8 +164,9 @@ public void clear() { } /** - * An alias class for the type "ConcurrentHashMap, Boolean>", which is used - * as a concurrent hashset for storing natural keys and the boolean value doesn't matter. + * An alias class for the type "{@literal ConcurrentHashMap, Boolean>}", + * which is used as a concurrent hashset for storing natural keys + * and the boolean value doesn't matter. */ private static class NaturalKeys extends ConcurrentHashMap, Boolean> {} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 43bd7976c5d33..b8dda22240042 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -563,7 +563,7 @@ public UTF8String trim() { } /** - * Trims whitespaces (<= ASCII 32) from both ends of this string. + * Trims whitespaces ({@literal <=} ASCII 32) from both ends of this string. * * Note that, this method is the same as java's {@link String#trim}, and different from * {@link UTF8String#trim()} which remove only spaces(= ASCII 32) from both ends. From feee8da14bf506cda30506780fbcf0b8723123f9 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 13 Oct 2020 12:44:20 +0000 Subject: [PATCH 0227/1009] [SPARK-32858][SQL] UnwrapCastInBinaryComparison: support other numeric types ### What changes were proposed in this pull request? In SPARK-24994 we implemented unwrapping cast for **integral types**. This extends it to support **numeric types** such as float/double/decimal, so that filters involving these types can be better pushed down to data sources. Unlike the cases of integral types, conversions between numeric types can result to rounding up or downs. Consider the following case: ```sql cast(e as double) < 1.9 ``` assume type of `e` is short, since 1.9 is not representable in the type, the casting will either truncate or round. Now suppose the literal is truncated, we cannot convert the expression to: ```sql e < cast(1.9 as short) ``` as in the previous implementation, since if `e` is 1, the original expression evaluates to true, but converted expression will evaluate to false. To resolve the above, this PR first finds out whether casting from the wider type to the narrower type will result to truncate or round, by comparing a _roundtrip value_ derived from **converting the literal first to the narrower type, and then to the wider type**, versus the original literal value. For instance, in the above, we'll first obtain a roundtrip value via the conversion (double) 1.9 -> (short) 1 -> (double) 1.0, and then compare it against 1.9. Screen Shot 2020-09-28 at 3 30 27 PM Now in the case of truncate, we'd convert the original expression to: ```sql e <= cast(1.9 as short) ``` instead, so that the conversion also is valid when `e` is 1. For more details, please check [this blog post](https://prestosql.io/blog/2019/05/21/optimizing-the-casts-away.html) by Presto which offers a very good explanation on how it works. ### Why are the changes needed? For queries such as: ```sql SELECT * FROM tbl WHERE short_col < 100.5 ``` The predicate `short_col < 100.5` can't be pushed down to data sources because it involves casts. This eliminates the cast so these queries can run more efficiently. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests Closes #29792 from sunchao/SPARK-32858. Lead-authored-by: Chao Sun Co-authored-by: Chao Sun Signed-off-by: Wenchen Fan --- .../UnwrapCastInBinaryComparison.scala | 202 +++++++++++------- .../UnwrapCastInBinaryComparisonSuite.scala | 166 +++++++++++--- .../UnwrapCastInComparisonEndToEndSuite.scala | 194 +++++++++++++++++ 3 files changed, 454 insertions(+), 108 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index d0acfe036d443..fe325f00e0baf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -35,18 +35,32 @@ import org.apache.spark.sql.types._ * to be optimized away later and pushed down to data sources. * * Currently this only handles cases where: - * 1). `fromType` (of `fromExp`) and `toType` are of integral types (i.e., byte, short, int and - * long) + * 1). `fromType` (of `fromExp`) and `toType` are of numeric types (i.e., short, int, float, + * decimal, etc) * 2). `fromType` can be safely coerced to `toType` without precision loss (e.g., short to int, * int to long, but not long to int) * * If the above conditions are satisfied, the rule checks to see if the literal `value` is within * range `(min, max)`, where `min` and `max` are the minimum and maximum value of `fromType`, - * respectively. If this is true then it means we can safely cast `value` to `fromType` and thus + * respectively. If this is true then it means we may safely cast `value` to `fromType` and thus * able to move the cast to the literal side. That is: * * `cast(fromExp, toType) op value` ==> `fromExp op cast(value, fromType)` * + * Note there are some exceptions to the above: if casting from `value` to `fromType` causes + * rounding up or down, the above conversion will no longer be valid. Instead, the rule does the + * following: + * + * if casting `value` to `fromType` causes rounding up: + * - `cast(fromExp, toType) > value` ==> `fromExp >= cast(value, fromType)` + * - `cast(fromExp, toType) >= value` ==> `fromExp >= cast(value, fromType)` + * - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false) + * - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic) + * - `cast(fromExp, toType) <= value` ==> `fromExp < cast(value, fromType)` + * - `cast(fromExp, toType) < value` ==> `fromExp < cast(value, fromType)` + * + * Similarly for the case when casting `value` to `fromType` causes rounding down. + * * If the `value` is not within range `(min, max)`, the rule breaks the scenario into different * cases and try to replace each with simpler constructs. * @@ -55,8 +69,6 @@ import org.apache.spark.sql.types._ * - `cast(fromExp, toType) >= value` ==> if(isnull(fromExp), null, false) * - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false) * - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic) - * - `cast(fromExp, toType) <=> value` ==> cast(fromExp, toType) <=> value (if `fromExp` is - * non-deterministic) * - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true) * - `cast(fromExp, toType) < value` ==> if(isnull(fromExp), null, true) * @@ -100,12 +112,12 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { swap(unwrapCast(swap(exp))) - // In case both sides have integral type, optimize the comparison by removing casts or + // In case both sides have numeric type, optimize the comparison by removing casts or // moving cast to the literal side. case be @ BinaryComparison( - Cast(fromExp, toType: IntegralType, _), Literal(value, literalType)) + Cast(fromExp, toType: NumericType, _), Literal(value, literalType)) if canImplicitlyCast(fromExp, toType, literalType) => - simplifyIntegralComparison(be, fromExp, toType, value) + simplifyNumericComparison(be, fromExp, toType, value) case _ => exp } @@ -116,82 +128,118 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { * optimizes the expression by moving the cast to the literal side. Otherwise if result is not * true, this replaces the input binary comparison `exp` with simpler expressions. */ - private def simplifyIntegralComparison( + private def simplifyNumericComparison( exp: BinaryComparison, fromExp: Expression, - toType: IntegralType, + toType: NumericType, value: Any): Expression = { val fromType = fromExp.dataType - val (min, max) = getRange(fromType) - val (minInToType, maxInToType) = { - (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval()) - } val ordering = toType.ordering.asInstanceOf[Ordering[Any]] - val minCmp = ordering.compare(value, minInToType) - val maxCmp = ordering.compare(value, maxInToType) + val range = getRange(fromType) - if (maxCmp > 0) { - exp match { - case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) => - falseIfNotNull(fromExp) - case LessThan(_, _) | LessThanOrEqual(_, _) => - trueIfNotNull(fromExp) - // make sure the expression is evaluated if it is non-deterministic - case EqualNullSafe(_, _) if exp.deterministic => - FalseLiteral - case _ => exp + if (range.isDefined) { + val (min, max) = range.get + val (minInToType, maxInToType) = { + (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval()) } - } else if (maxCmp == 0) { - exp match { - case GreaterThan(_, _) => - falseIfNotNull(fromExp) - case LessThanOrEqual(_, _) => - trueIfNotNull(fromExp) - case LessThan(_, _) => - Not(EqualTo(fromExp, Literal(max, fromType))) - case GreaterThanOrEqual(_, _) | EqualTo(_, _) => - EqualTo(fromExp, Literal(max, fromType)) - case EqualNullSafe(_, _) => - EqualNullSafe(fromExp, Literal(max, fromType)) - case _ => exp + val minCmp = ordering.compare(value, minInToType) + val maxCmp = ordering.compare(value, maxInToType) + + if (maxCmp >= 0 || minCmp <= 0) { + return if (maxCmp > 0) { + exp match { + case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) => + falseIfNotNull(fromExp) + case LessThan(_, _) | LessThanOrEqual(_, _) => + trueIfNotNull(fromExp) + // make sure the expression is evaluated if it is non-deterministic + case EqualNullSafe(_, _) if exp.deterministic => + FalseLiteral + case _ => exp + } + } else if (maxCmp == 0) { + exp match { + case GreaterThan(_, _) => + falseIfNotNull(fromExp) + case LessThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case LessThan(_, _) => + Not(EqualTo(fromExp, Literal(max, fromType))) + case GreaterThanOrEqual(_, _) | EqualTo(_, _) => + EqualTo(fromExp, Literal(max, fromType)) + case EqualNullSafe(_, _) => + EqualNullSafe(fromExp, Literal(max, fromType)) + case _ => exp + } + } else if (minCmp < 0) { + exp match { + case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) => + falseIfNotNull(fromExp) + // make sure the expression is evaluated if it is non-deterministic + case EqualNullSafe(_, _) if exp.deterministic => + FalseLiteral + case _ => exp + } + } else { // minCmp == 0 + exp match { + case LessThan(_, _) => + falseIfNotNull(fromExp) + case GreaterThanOrEqual(_, _) => + trueIfNotNull(fromExp) + case GreaterThan(_, _) => + Not(EqualTo(fromExp, Literal(min, fromType))) + case LessThanOrEqual(_, _) | EqualTo(_, _) => + EqualTo(fromExp, Literal(min, fromType)) + case EqualNullSafe(_, _) => + EqualNullSafe(fromExp, Literal(min, fromType)) + case _ => exp + } + } } - } else if (minCmp < 0) { + } + + // When we reach to this point, it means either there is no min/max for the `fromType` (e.g., + // decimal type), or that the literal `value` is within range `(min, max)`. For these, we + // optimize by moving the cast to the literal side. + + val newValue = Cast(Literal(value), fromType).eval() + if (newValue == null) { + // This means the cast failed, for instance, due to the value is not representable in the + // narrower type. In this case we simply return the original expression. + return exp + } + val valueRoundTrip = Cast(Literal(newValue, fromType), toType).eval() + val lit = Literal(newValue, fromType) + val cmp = ordering.compare(value, valueRoundTrip) + if (cmp == 0) { exp match { - case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => - trueIfNotNull(fromExp) - case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) => - falseIfNotNull(fromExp) - // make sure the expression is evaluated if it is non-deterministic - case EqualNullSafe(_, _) if exp.deterministic => - FalseLiteral + case GreaterThan(_, _) => GreaterThan(fromExp, lit) + case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit) + case EqualTo(_, _) => EqualTo(fromExp, lit) + case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit) + case LessThan(_, _) => LessThan(fromExp, lit) + case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit) case _ => exp } - } else if (minCmp == 0) { + } else if (cmp < 0) { + // This means the literal value is rounded up after casting to `fromType` exp match { - case LessThan(_, _) => - falseIfNotNull(fromExp) - case GreaterThanOrEqual(_, _) => - trueIfNotNull(fromExp) - case GreaterThan(_, _) => - Not(EqualTo(fromExp, Literal(min, fromType))) - case LessThanOrEqual(_, _) | EqualTo(_, _) => - EqualTo(fromExp, Literal(min, fromType)) - case EqualNullSafe(_, _) => - EqualNullSafe(fromExp, Literal(min, fromType)) + case EqualTo(_, _) => falseIfNotNull(fromExp) + case EqualNullSafe(_, _) if fromExp.deterministic => FalseLiteral + case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit) + case LessThan(_, _) | LessThanOrEqual(_, _) => LessThan(fromExp, lit) case _ => exp } } else { - // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the - // literal side. - val lit = Literal(Cast(Literal(value), fromType).eval(), fromType) + // This means the literal value is rounded down after casting to `fromType` exp match { - case GreaterThan(_, _) => GreaterThan(fromExp, lit) - case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit) - case EqualTo(_, _) => EqualTo(fromExp, lit) - case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit) - case LessThan(_, _) => LessThan(fromExp, lit) - case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit) + case EqualTo(_, _) => falseIfNotNull(fromExp) + case EqualNullSafe(_, _) => FalseLiteral + case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => GreaterThan(fromExp, lit) + case LessThan(_, _) | LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit) case _ => exp } } @@ -200,7 +248,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { /** * Check if the input `fromExp` can be safely cast to `toType` without any loss of precision, * i.e., the conversion is injective. Note this only handles the case when both sides are of - * integral type. + * numeric type. */ private def canImplicitlyCast( fromExp: Expression, @@ -208,17 +256,19 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { literalType: DataType): Boolean = { toType.sameType(literalType) && !fromExp.foldable && - fromExp.dataType.isInstanceOf[IntegralType] && - toType.isInstanceOf[IntegralType] && + fromExp.dataType.isInstanceOf[NumericType] && + toType.isInstanceOf[NumericType] && Cast.canUpCast(fromExp.dataType, toType) } - private def getRange(dt: DataType): (Any, Any) = dt match { - case ByteType => (Byte.MinValue, Byte.MaxValue) - case ShortType => (Short.MinValue, Short.MaxValue) - case IntegerType => (Int.MinValue, Int.MaxValue) - case LongType => (Long.MinValue, Long.MaxValue) - case other => throw new IllegalArgumentException(s"Unsupported type: ${other.catalogString}") + private[optimizer] def getRange(dt: DataType): Option[(Any, Any)] = dt match { + case ByteType => Some((Byte.MinValue, Byte.MaxValue)) + case ShortType => Some((Short.MinValue, Short.MaxValue)) + case IntegerType => Some((Int.MinValue, Int.MaxValue)) + case LongType => Some((Long.MinValue, Long.MaxValue)) + case FloatType => Some((Float.NegativeInfinity, Float.NaN)) + case DoubleType => Some((Double.NegativeInfinity, Double.NaN)) + case _ => None } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala index 373c1febd2488..0afb166b80ca5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala @@ -36,8 +36,10 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp NullPropagation, UnwrapCastInBinaryComparison) :: Nil } - val testRelation: LocalRelation = LocalRelation('a.short, 'b.float) + val testRelation: LocalRelation = LocalRelation('a.short, 'b.float, 'c.decimal(5, 2)) val f: BoundReference = 'a.short.canBeNull.at(0) + val f2: BoundReference = 'b.float.canBeNull.at(1) + val f3: BoundReference = 'c.decimal(5, 2).canBeNull.at(2) test("unwrap casts when literal == max") { val v = Short.MaxValue @@ -47,6 +49,14 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(castInt(f) <=> v.toInt, f <=> v) assertEquivalent(castInt(f) <= v.toInt, trueIfNotNull(f)) assertEquivalent(castInt(f) < v.toInt, f =!= v) + + val d = Float.NaN + assertEquivalent(castDouble(f2) > d.toDouble, falseIfNotNull(f2)) + assertEquivalent(castDouble(f2) >= d.toDouble, f2 === d) + assertEquivalent(castDouble(f2) === d.toDouble, f2 === d) + assertEquivalent(castDouble(f2) <=> d.toDouble, f2 <=> d) + assertEquivalent(castDouble(f2) <= d.toDouble, trueIfNotNull(f2)) + assertEquivalent(castDouble(f2) < d.toDouble, f2 =!= d) } test("unwrap casts when literal > max") { @@ -67,6 +77,23 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(castInt(f) <=> v.toInt, f <=> v) assertEquivalent(castInt(f) <= v.toInt, f === v) assertEquivalent(castInt(f) < v.toInt, falseIfNotNull(f)) + + val d = Float.NegativeInfinity + assertEquivalent(castDouble(f2) > d.toDouble, f2 =!= d) + assertEquivalent(castDouble(f2) >= d.toDouble, trueIfNotNull(f2)) + assertEquivalent(castDouble(f2) === d.toDouble, f2 === d) + assertEquivalent(castDouble(f2) <=> d.toDouble, f2 <=> d) + assertEquivalent(castDouble(f2) <= d.toDouble, f2 === d) + assertEquivalent(castDouble(f2) < d.toDouble, falseIfNotNull(f2)) + + // Double.NegativeInfinity == Float.NegativeInfinity + val d2 = Double.NegativeInfinity + assertEquivalent(castDouble(f2) > d2, f2 =!= d) + assertEquivalent(castDouble(f2) >= d2, trueIfNotNull(f2)) + assertEquivalent(castDouble(f2) === d2, f2 === d) + assertEquivalent(castDouble(f2) <=> d2, f2 <=> d) + assertEquivalent(castDouble(f2) <= d2, f2 === d) + assertEquivalent(castDouble(f2) < d2, falseIfNotNull(f2)) } test("unwrap casts when literal < min") { @@ -79,13 +106,65 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(castInt(f) < v, falseIfNotNull(f)) } - test("unwrap casts when literal is within range (min, max)") { - assertEquivalent(castInt(f) > 300, f > 300.toShort) - assertEquivalent(castInt(f) >= 500, f >= 500.toShort) - assertEquivalent(castInt(f) === 32766, f === 32766.toShort) - assertEquivalent(castInt(f) <=> 32766, f <=> 32766.toShort) - assertEquivalent(castInt(f) <= -6000, f <= -6000.toShort) - assertEquivalent(castInt(f) < -32767, f < -32767.toShort) + test("unwrap casts when literal is within range (min, max) or fromType has no range") { + Seq(300, 500, 32766, -6000, -32767).foreach(v => { + assertEquivalent(castInt(f) > v, f > v.toShort) + assertEquivalent(castInt(f) >= v, f >= v.toShort) + assertEquivalent(castInt(f) === v, f === v.toShort) + assertEquivalent(castInt(f) <=> v, f <=> v.toShort) + assertEquivalent(castInt(f) <= v, f <= v.toShort) + assertEquivalent(castInt(f) < v, f < v.toShort) + }) + + Seq(3.14.toFloat.toDouble, -1000.0.toFloat.toDouble, + 20.0.toFloat.toDouble, -2.414.toFloat.toDouble, + Float.MinValue.toDouble, Float.MaxValue.toDouble, Float.PositiveInfinity.toDouble + ).foreach(v => { + assertEquivalent(castDouble(f2) > v, f2 > v.toFloat) + assertEquivalent(castDouble(f2) >= v, f2 >= v.toFloat) + assertEquivalent(castDouble(f2) === v, f2 === v.toFloat) + assertEquivalent(castDouble(f2) <=> v, f2 <=> v.toFloat) + assertEquivalent(castDouble(f2) <= v, f2 <= v.toFloat) + assertEquivalent(castDouble(f2) < v, f2 < v.toFloat) + }) + + Seq(decimal2(100.20), decimal2(-200.50)).foreach(v => { + assertEquivalent(castDecimal2(f3) > v, f3 > decimal(v)) + assertEquivalent(castDecimal2(f3) >= v, f3 >= decimal(v)) + assertEquivalent(castDecimal2(f3) === v, f3 === decimal(v)) + assertEquivalent(castDecimal2(f3) <=> v, f3 <=> decimal(v)) + assertEquivalent(castDecimal2(f3) <= v, f3 <= decimal(v)) + assertEquivalent(castDecimal2(f3) < v, f3 < decimal(v)) + }) + } + + test("unwrap cast when literal is within range (min, max) AND has round up or down") { + // Cases for rounding down + var doubleValue = 100.6 + assertEquivalent(castDouble(f) > doubleValue, f > doubleValue.toShort) + assertEquivalent(castDouble(f) >= doubleValue, f > doubleValue.toShort) + assertEquivalent(castDouble(f) === doubleValue, falseIfNotNull(f)) + assertEquivalent(castDouble(f) <=> doubleValue, false) + assertEquivalent(castDouble(f) <= doubleValue, f <= doubleValue.toShort) + assertEquivalent(castDouble(f) < doubleValue, f <= doubleValue.toShort) + + // Cases for rounding up: 3.14 will be rounded to 3.14000010... after casting to float + doubleValue = 3.14 + assertEquivalent(castDouble(f2) > doubleValue, f2 >= doubleValue.toFloat) + assertEquivalent(castDouble(f2) >= doubleValue, f2 >= doubleValue.toFloat) + assertEquivalent(castDouble(f2) === doubleValue, falseIfNotNull(f2)) + assertEquivalent(castDouble(f2) <=> doubleValue, false) + assertEquivalent(castDouble(f2) <= doubleValue, f2 < doubleValue.toFloat) + assertEquivalent(castDouble(f2) < doubleValue, f2 < doubleValue.toFloat) + + // Another case: 400.5678 is rounded up to 400.57 + val decimalValue = decimal2(400.5678) + assertEquivalent(castDecimal2(f3) > decimalValue, f3 >= decimal(decimalValue)) + assertEquivalent(castDecimal2(f3) >= decimalValue, f3 >= decimal(decimalValue)) + assertEquivalent(castDecimal2(f3) === decimalValue, falseIfNotNull(f3)) + assertEquivalent(castDecimal2(f3) <=> decimalValue, false) + assertEquivalent(castDecimal2(f3) <= decimalValue, f3 < decimal(decimalValue)) + assertEquivalent(castDecimal2(f3) < decimalValue, f3 < decimal(decimalValue)) } test("unwrap casts when cast is on rhs") { @@ -100,27 +179,8 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort, ShortType) <= f) } - test("unwrap cast should have no effect when input is not integral type") { - Seq( - castDouble('b) > 42.0, - castDouble('b) >= 42.0, - castDouble('b) === 42.0, - castDouble('b) <=> 42.0, - castDouble('b) <= 42.0, - castDouble('b) < 42.0, - Literal(42.0) > castDouble('b), - Literal(42.0) >= castDouble('b), - Literal(42.0) === castDouble('b), - Literal(42.0) <=> castDouble('b), - Literal(42.0) <= castDouble('b), - Literal(42.0) < castDouble('b) - ).foreach(e => - assertEquivalent(e, e, evaluate = false) - ) - } - - test("unwrap cast should skip when expression is non-deterministic or foldable") { - Seq(positiveInt, negativeInt).foreach (v => { + test("unwrap cast should skip when expression is non-deterministic or foldable") { + Seq(positiveInt, negativeInt).foreach(v => { val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v assertEquivalent(e, e, evaluate = false) val e2 = Cast(Literal(30.toShort), IntegerType) >= v @@ -139,13 +199,46 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp assertEquivalent(castInt(f) < intLit, nullLit) } + test("unwrap casts should skip if downcast failed") { + val decimalValue = decimal2(123456.1234) + assertEquivalent(castDecimal2(f3) === decimalValue, castDecimal2(f3) === decimalValue) + } + test("unwrap cast should skip if cannot coerce type") { assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte) } - private def castInt(e: Expression): Expression = Cast(e, IntegerType) + test("test getRange()") { + assert(Some((Byte.MinValue, Byte.MaxValue)) === getRange(ByteType)) + assert(Some((Short.MinValue, Short.MaxValue)) === getRange(ShortType)) + assert(Some((Int.MinValue, Int.MaxValue)) === getRange(IntegerType)) + assert(Some((Long.MinValue, Long.MaxValue)) === getRange(LongType)) + val floatRange = getRange(FloatType) + assert(floatRange.isDefined) + val (floatMin, floatMax) = floatRange.get + assert(floatMin.isInstanceOf[Float]) + assert(floatMin.asInstanceOf[Float].isNegInfinity) + assert(floatMax.isInstanceOf[Float]) + assert(floatMax.asInstanceOf[Float].isNaN) + + val doubleRange = getRange(DoubleType) + assert(doubleRange.isDefined) + val (doubleMin, doubleMax) = doubleRange.get + assert(doubleMin.isInstanceOf[Double]) + assert(doubleMin.asInstanceOf[Double].isNegInfinity) + assert(doubleMax.isInstanceOf[Double]) + assert(doubleMax.asInstanceOf[Double].isNaN) + + assert(getRange(DecimalType(5, 2)).isEmpty) + } + + private def castInt(e: Expression): Expression = Cast(e, IntegerType) private def castDouble(e: Expression): Expression = Cast(e, DoubleType) + private def castDecimal2(e: Expression): Expression = Cast(e, DecimalType(10, 4)) + + private def decimal(v: Decimal): Decimal = Decimal(v.toJavaBigDecimal, 5, 2) + private def decimal2(v: BigDecimal): Decimal = Decimal(v, 10, 4) private def assertEquivalent(e1: Expression, e2: Expression, evaluate: Boolean = true): Unit = { val plan = testRelation.where(e1).analyze @@ -154,8 +247,17 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp comparePlans(actual, expected) if (evaluate) { - Seq(100.toShort, -300.toShort, null).foreach(v => { - val row = create_row(v) + Seq( + (100.toShort, 3.14.toFloat, decimal2(100)), + (-300.toShort, 3.1415927.toFloat, decimal2(-3000.50)), + (null, Float.NaN, decimal2(12345.6789)), + (null, null, null), + (Short.MaxValue, Float.PositiveInfinity, decimal2(Short.MaxValue)), + (Short.MinValue, Float.NegativeInfinity, decimal2(Short.MinValue)), + (0.toShort, Float.MaxValue, decimal2(0)), + (0.toShort, Float.MinValue, decimal2(0.01)) + ).foreach(v => { + val row = create_row(v._1, v._2, v._3) checkEvaluation(e1, e2.eval(row), row) }) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala new file mode 100644 index 0000000000000..e6f0426428bd4 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.Decimal + +class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + val t = "test_table" + + test("cases when literal is max") { + withTable(t) { + Seq[(Integer, java.lang.Short, java.lang.Float)]( + (1, 100.toShort, 3.14.toFloat), + (2, Short.MaxValue, Float.NaN), + (3, Short.MinValue, Float.PositiveInfinity), + (4, 0.toShort, Float.MaxValue), + (5, null, null)) + .toDF("c1", "c2", "c3").write.saveAsTable(t) + val df = spark.table(t) + + val lit = Short.MaxValue.toInt + checkAnswer(df.where(s"c2 > $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 == $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil) + checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(1) :: Row(2) :: Row(3) :: Row(4) :: Nil) + checkAnswer(df.where(s"c2 < $lit").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil) + + checkAnswer(df.where(s"c3 > double('nan')").select("c1"), Seq.empty) + checkAnswer(df.where(s"c3 >= double('nan')").select("c1"), Row(2)) + checkAnswer(df.where(s"c3 == double('nan')").select("c1"), Row(2)) + checkAnswer(df.where(s"c3 <=> double('nan')").select("c1"), Row(2)) + checkAnswer(df.where(s"c3 != double('nan')").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil) + checkAnswer(df.where(s"c3 <= double('nan')").select("c1"), + Row(1) :: Row(2) :: Row(3) :: Row(4) :: Nil) + checkAnswer(df.where(s"c3 < double('nan')").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil) + } + } + + test("cases when literal is > max") { + withTable(t) { + Seq[(Integer, java.lang.Short)]( + (1, 100.toShort), + (2, Short.MaxValue), + (3, null)) + .toDF("c1", "c2").write.saveAsTable(t) + val df = spark.table(t) + val lit = positiveInt + checkAnswer(df.where(s"c2 > $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 >= $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 == $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where(s"c2 < $lit").select("c1"), Row(1) :: Row(2) :: Nil) + + // No test for float case since NaN is greater than any other numeric value + } + } + + test("cases when literal is min") { + withTable(t) { + Seq[(Integer, java.lang.Short, java.lang.Float)]( + (1, 100.toShort, 3.14.toFloat), + (2, Short.MinValue, Float.NegativeInfinity), + (3, Short.MaxValue, Float.MinValue), + (4, null, null)) + .toDF("c1", "c2", "c3").write.saveAsTable(t) + val df = spark.table(t) + + val lit = Short.MinValue.toInt + checkAnswer(df.where(s"c2 > $lit").select("c1"), Row(1) :: Row(3) :: Nil) + checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(1) :: Row(2) :: Row(3) :: Nil) + checkAnswer(df.where(s"c2 == $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(3) :: Nil) + checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(2)) + checkAnswer(df.where(s"c2 < $lit").select("c1"), Seq.empty) + + checkAnswer(df.where(s"c3 > double('-inf')").select("c1"), Row(1) :: Row(3) :: Nil) + checkAnswer(df.where(s"c3 >= double('-inf')").select("c1"), Row(1) :: Row(2) :: Row(3) :: Nil) + checkAnswer(df.where(s"c3 == double('-inf')").select("c1"), Row(2)) + checkAnswer(df.where(s"c3 <=> double('-inf')").select("c1"), Row(2)) + checkAnswer(df.where(s"c3 != double('-inf')").select("c1"), Row(1) :: Row(3) :: Nil) + checkAnswer(df.where(s"c3 <= double('-inf')").select("c1"), Row(2) :: Nil) + checkAnswer(df.where(s"c3 < double('-inf')").select("c1"), Seq.empty) + } + } + + test("cases when literal is < min") { + val t = "test_table" + withTable(t) { + Seq[(Integer, java.lang.Short)]( + (1, 100.toShort), + (2, Short.MinValue), + (3, null)) + .toDF("c1", "c2").write.saveAsTable(t) + val df = spark.table(t) + + val lit = negativeInt + checkAnswer(df.where(s"c2 > $lit").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where(s"c2 == $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where(s"c2 <= $lit").select("c1"), Seq.empty) + checkAnswer(df.where(s"c2 < $lit").select("c1"), Seq.empty) + } + } + + test("cases when literal is within range (min, max)") { + withTable(t) { + Seq((1, 300.toShort), (2, 500.toShort)).toDF("c1", "c2").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer(df.where("c2 < 200").select("c1"), Seq.empty) + checkAnswer(df.where("c2 < 400").select("c1"), Row(1) :: Nil) + checkAnswer(df.where("c2 < 600").select("c1"), Row(1) :: Row(2) :: Nil) + + checkAnswer(df.where("c2 <= 100").select("c1"), Seq.empty) + checkAnswer(df.where("c2 <= 300").select("c1"), Row(1) :: Nil) + checkAnswer(df.where("c2 <= 500").select("c1"), Row(1) :: Row(2) :: Nil) + + checkAnswer(df.where("c2 == 100").select("c1"), Seq.empty) + checkAnswer(df.where("c2 == 300").select("c1"), Row(1) :: Nil) + checkAnswer(df.where("c2 == 500").select("c1"), Row(2) :: Nil) + + checkAnswer(df.where("c2 <=> 100").select("c1"), Seq.empty) + checkAnswer(df.where("c2 <=> 300").select("c1"), Row(1) :: Nil) + checkAnswer(df.where("c2 <=> 500").select("c1"), Row(2) :: Nil) + checkAnswer(df.where("c2 <=> null").select("c1"), Seq.empty) + + checkAnswer(df.where("c2 >= 200").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where("c2 >= 400").select("c1"), Row(2) :: Nil) + checkAnswer(df.where("c2 >= 600").select("c1"), Seq.empty) + + checkAnswer(df.where("c2 > 100").select("c1"), Row(1) :: Row(2) :: Nil) + checkAnswer(df.where("c2 > 300").select("c1"), Row(2) :: Nil) + checkAnswer(df.where("c2 > 500").select("c1"), Seq.empty) + } + } + + test("cases when literal is within range (min, max) and has rounding up or down") { + withTable(t) { + Seq((1, 100, 3.14.toFloat, decimal(200.12))) + .toDF("c1", "c2", "c3", "c4").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer(df.where("c2 > 99.6").select("c1"), Row(1)) + checkAnswer(df.where("c2 > 100.4").select("c1"), Seq.empty) + checkAnswer(df.where("c2 == 100.4").select("c1"), Seq.empty) + checkAnswer(df.where("c2 <=> 100.4").select("c1"), Seq.empty) + checkAnswer(df.where("c2 < 99.6").select("c1"), Seq.empty) + checkAnswer(df.where("c2 < 100.4").select("c1"), Row(1)) + + checkAnswer(df.where("c3 >= 3.14").select("c1"), Row(1)) + // float(3.14) is casted to double(3.140000104904175) + checkAnswer(df.where("c3 >= 3.14000010").select("c1"), Row(1)) + checkAnswer(df.where("c3 == 3.14").select("c1"), Seq.empty) + checkAnswer(df.where("c3 <=> 3.14").select("c1"), Seq.empty) + checkAnswer(df.where("c3 < 3.14000010").select("c1"), Seq.empty) + checkAnswer(df.where("c3 <= 3.14").select("c1"), Seq.empty) + + checkAnswer(df.where("c4 > cast(200.1199 as decimal(10, 4))").select("c1"), Row(1)) + checkAnswer(df.where("c4 >= cast(200.1201 as decimal(10, 4))").select("c1"), Seq.empty) + checkAnswer(df.where("c4 == cast(200.1156 as decimal(10, 4))").select("c1"), Seq.empty) + checkAnswer(df.where("c4 <=> cast(200.1201 as decimal(10, 4))").select("c1"), Seq.empty) + checkAnswer(df.where("c4 <= cast(200.1201 as decimal(10, 4))").select("c1"), Row(1)) + checkAnswer(df.where("c4 < cast(200.1159 as decimal(10, 4))").select("c1"), Seq.empty) + } + } + + private def decimal(v: BigDecimal): Decimal = Decimal(v, 5, 2) +} From af3e2f7d58507a47e2d767552209c309637a3170 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Tue, 13 Oct 2020 12:57:54 +0000 Subject: [PATCH 0228/1009] [SPARK-33081][SQL] Support ALTER TABLE in JDBC v2 Table Catalog: update type and nullability of columns (DB2 dialect) ### What changes were proposed in this pull request? - Override the default SQL strings in the DB2 Dialect for: * ALTER TABLE UPDATE COLUMN TYPE * ALTER TABLE UPDATE COLUMN NULLABILITY - Add new docker integration test suite jdbc/v2/DB2IntegrationSuite.scala ### Why are the changes needed? In SPARK-24907, we implemented JDBC v2 Table Catalog but it doesn't support some ALTER TABLE at the moment. This PR supports DB2 specific ALTER TABLE. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By running new integration test suite: $ ./build/sbt -Pdocker-integration-tests "test-only *.DB2IntegrationSuite" Closes #29972 from huaxingao/db2_docker. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 76 ++++++++++++++ .../sql/jdbc/v2/OracleIntegrationSuite.scala | 92 +++-------------- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 98 +++++++++++++++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 7 ++ .../apache/spark/sql/jdbc/DB2Dialect.scala | 20 ++++ 5 files changed, 216 insertions(+), 77 deletions(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala new file mode 100644 index 0000000000000..82f9f978c5da2 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +/** + * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): + * {{{ + * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 + * ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite" + * }}} + */ +@DockerTest +class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { + override val catalogName: String = "db2" + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0") + override val env = Map( + "DB2INST1_PASSWORD" -> "rootpass", + "LICENSE" -> "accept", + "DBNAME" -> "foo", + "ARCHIVE_LOGS" -> "false", + "AUTOCONFIG" -> "false" + ) + override val usesIpc = false + override val jdbcPort: Int = 50000 + override val privileged = true + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore + } + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName) + .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort)) + + override def dataPreparation(conn: Connection): Unit = {} + + override def testUpdateColumnType(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE DOUBLE") + t = spark.table(tbl) + expectedSchema = new StructType().add("ID", DoubleType) + assert(t.schema === expectedSchema) + // Update column type from DOUBLE to STRING + val msg1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE VARCHAR(10)") + }.getMessage + assert(msg1.contains("Cannot update alt_table field ID: double cannot be cast to varchar")) + } +} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index 400459c0ea17b..1b51d43c1d139 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -23,10 +23,8 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} -import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.tags.DockerTest @@ -54,7 +52,8 @@ import org.apache.spark.tags.DockerTest * It has been validated with 18.4.0 Express Edition. */ @DockerTest -class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession { +class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { + override val catalogName: String = "oracle" override val db = new DatabaseOnDocker { override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME") override val env = Map( @@ -73,80 +72,19 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark override val connectionTimeout = timeout(7.minutes) override def dataPreparation(conn: Connection): Unit = {} - test("SPARK-33034: ALTER TABLE ... add new columns") { - withTable("oracle.alt_table") { - sql("CREATE TABLE oracle.alt_table (ID STRING) USING _") - sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C1 STRING, C2 STRING)") - var t = spark.table("oracle.alt_table") - var expectedSchema = new StructType() - .add("ID", StringType) - .add("C1", StringType) - .add("C2", StringType) - assert(t.schema === expectedSchema) - sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C3 STRING)") - t = spark.table("oracle.alt_table") - expectedSchema = expectedSchema.add("C3", StringType) - assert(t.schema === expectedSchema) - // Add already existing column - val msg = intercept[AnalysisException] { - sql(s"ALTER TABLE oracle.alt_table ADD COLUMNS (C3 DOUBLE)") - }.getMessage - assert(msg.contains("Cannot add column, because C3 already exists")) - } - // Add a column to not existing table - val msg = intercept[AnalysisException] { - sql(s"ALTER TABLE oracle.not_existing_table ADD COLUMNS (C4 STRING)") + override def testUpdateColumnType(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", DecimalType(10, 0)) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING") + t = spark.table(tbl) + expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + // Update column type from STRING to INTEGER + val msg1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER") }.getMessage - assert(msg.contains("Table not found")) - } - - test("SPARK-33034: ALTER TABLE ... update column type") { - withTable("oracle.alt_table") { - sql("CREATE TABLE oracle.alt_table (ID INTEGER) USING _") - sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE STRING") - val t = spark.table("oracle.alt_table") - val expectedSchema = new StructType().add("ID", StringType) - assert(t.schema === expectedSchema) - // Update column type from STRING to INTEGER - val msg1 = intercept[AnalysisException] { - sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE INTEGER") - }.getMessage - assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) - // Update not existing column - val msg2 = intercept[AnalysisException] { - sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column TYPE DOUBLE") - }.getMessage - assert(msg2.contains("Cannot update missing field bad_column")) - // Update column to wrong type - val msg3 = intercept[ParseException] { - sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE bad_type") - }.getMessage - assert(msg3.contains("DataType bad_type is not supported")) - } - // Update column type in not existing table - val msg = intercept[AnalysisException] { - sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN id TYPE DOUBLE") - }.getMessage - assert(msg.contains("Table not found")) - } - - test("SPARK-33034: ALTER TABLE ... update column nullability") { - withTable("oracle.alt_table") { - sql("CREATE TABLE oracle.alt_table (ID STRING NOT NULL) USING _") - sql("ALTER TABLE oracle.alt_table ALTER COLUMN ID DROP NOT NULL") - val t = spark.table("oracle.alt_table") - val expectedSchema = new StructType().add("ID", StringType, nullable = true) - assert(t.schema === expectedSchema) - // Update nullability of not existing column - val msg = intercept[AnalysisException] { - sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column DROP NOT NULL") - }.getMessage - assert(msg.contains("Cannot update missing field bad_column")) - } - // Update column nullability in not existing table - val msg = intercept[AnalysisException] { - sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN ID DROP NOT NULL") - }.getMessage - assert(msg.contains("Table not found")) + assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) } } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala new file mode 100644 index 0000000000000..384bcc22f27d8 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +@DockerTest +trait V2JDBCTest extends SharedSparkSession { + val catalogName: String + // dialect specific update column type test + def testUpdateColumnType(tbl: String): Unit + + test("SPARK-33034: ALTER TABLE ... add new columns") { + withTable(s"$catalogName.alt_table") { + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _") + var t = spark.table(s"$catalogName.alt_table") + var expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C1 STRING, C2 STRING)") + t = spark.table(s"$catalogName.alt_table") + expectedSchema = expectedSchema.add("C1", StringType).add("C2", StringType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 STRING)") + t = spark.table(s"$catalogName.alt_table") + expectedSchema = expectedSchema.add("C3", StringType) + assert(t.schema === expectedSchema) + // Add already existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)") + }.getMessage + assert(msg.contains("Cannot add column, because C3 already exists")) + } + // Add a column to not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.not_existing_table ADD COLUMNS (C4 STRING)") + }.getMessage + assert(msg.contains("Table not found")) + } + + test("SPARK-33034: ALTER TABLE ... update column type") { + withTable(s"$catalogName.alt_table") { + testUpdateColumnType(s"$catalogName.alt_table") + // Update not existing column + val msg2 = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE") + }.getMessage + assert(msg2.contains("Cannot update missing field bad_column")) + } + // Update column type in not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN id TYPE DOUBLE") + }.getMessage + assert(msg.contains("Table not found")) + } + + test("SPARK-33034: ALTER TABLE ... update column nullability") { + withTable(s"$catalogName.alt_table") { + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") + var t = spark.table(s"$catalogName.alt_table") + // nullable is true in the expecteSchema because Spark always sets nullable to true + // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445 + var expectedSchema = new StructType().add("ID", StringType, nullable = true) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL") + t = spark.table(s"$catalogName.alt_table") + expectedSchema = new StructType().add("ID", StringType, nullable = true) + assert(t.schema === expectedSchema) + // Update nullability of not existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL") + }.getMessage + assert(msg.contains("Cannot update missing field bad_column")) + } + // Update column nullability in not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN ID DROP NOT NULL") + }.getMessage + assert(msg.contains("Table not found")) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 8b8531b2bb3b1..621d416c55457 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -591,6 +591,13 @@ class DDLParserSuite extends AnalysisTest { None)) } + test("alter table: update column type invalid type") { + val msg = intercept[ParseException] { + parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bad_type") + }.getMessage + assert(msg.contains("DataType bad_type is not supported")) + } + test("alter table: update column type") { comparePlans( parsePlan("ALTER TABLE table_name CHANGE COLUMN a.b.c TYPE bigint"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala index 430ca9edab799..908e03726d887 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala @@ -58,4 +58,24 @@ private object DB2Dialect extends JdbcDialect { override def renameTable(oldTable: String, newTable: String): String = { s"RENAME TABLE $oldTable TO $newTable" } + + // scalastyle:off line.size.limit + // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html + // scalastyle:on line.size.limit + override def getUpdateColumnTypeQuery( + tableName: String, + columnName: String, + newDataType: String): String = + s"ALTER TABLE $tableName ALTER COLUMN $columnName SET DATA TYPE $newDataType" + + // scalastyle:off line.size.limit + // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html + // scalastyle:on line.size.limit + override def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL" + s"ALTER TABLE $tableName ALTER COLUMN $columnName $nullable" + } } From 2b7239edfb02dc74415f6c9e6a675e1ba46ac195 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 13 Oct 2020 13:12:17 +0000 Subject: [PATCH 0229/1009] [SPARK-33125][SQL] Improve the error when Lead and Lag are not allowed to specify window frame ### What changes were proposed in this pull request? Except for Postgresql, other data sources (for example: vertica, oracle, redshift, mysql, presto) are not allowed to specify window frame for the Lead and Lag functions. But the current error message is not clear enough. `Window Frame $f must match the required frame` This PR will use the following error message. `Cannot specify window frame for lead function` ### Why are the changes needed? Make clear error message. ### Does this PR introduce _any_ user-facing change? Yes Users will see the clearer error message. ### How was this patch tested? Jenkins test. Closes #30021 from beliefer/SPARK-33125. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 +++ .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 77a6631b250e8..337cf1c0bdc50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2974,6 +2974,9 @@ class Analyzer( */ object ResolveWindowFrame extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { + case WindowExpression(wf: OffsetWindowFunction, + WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f => + failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function") case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != UnspecifiedFrame && wf.frame != f => failAnalysis(s"Window Frame $f must match the required frame ${wf.frame}") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index d3a14e511cdc2..44128c4419951 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -231,7 +231,7 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedAttribute("a") :: Nil, SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")), - "window frame" :: "must match the required frame" :: Nil) + "Cannot specify window frame for lead function" :: Nil) errorTest( "the offset of nth_value window function is negative or zero", From dc697a8b598aea922ee6620d87f3ace2f7947231 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Tue, 13 Oct 2020 13:21:45 +0000 Subject: [PATCH 0230/1009] [SPARK-13860][SQL] Change statistical aggregate function to return null instead of Double.NaN when divideByZero ### What changes were proposed in this pull request? As [SPARK-13860](https://issues.apache.org/jira/browse/SPARK-13860) stated, TPCDS Query 39 returns wrong results using SparkSQL. The root cause is that when stddev_samp is applied to a single element set, with TPCDS answer, it return null; as in SparkSQL, it return Double.NaN which caused the wrong result. Add an extra legacy config to fallback into the NaN logical, and return null by default to align with TPCDS standard. ### Why are the changes needed? SQL correctness issue. ### Does this PR introduce any user-facing change? Yes. See sql-migration-guide In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. ### How was this patch tested? Updated DataFrameAggregateSuite/DataFrameWindowFunctionsSuite to test both default and legacy behavior. Adjust DataFrameWindowFunctionsSuite/SQLQueryTestSuite and some R case to update to the default return null behavior. Closes #29983 from leanken/leanken-SPARK-13860. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- R/pkg/tests/fulltests/test_sparkSQL.R | 4 +- docs/sql-migration-guide.md | 2 + .../sql/catalyst/analysis/TypeCoercion.scala | 18 +- .../aggregate/CentralMomentAgg.scala | 60 ++++- .../catalyst/expressions/aggregate/Corr.scala | 22 +- .../expressions/aggregate/Covariance.scala | 32 ++- .../apache/spark/sql/internal/SQLConf.scala | 12 + .../postgreSQL/aggregates_part1.sql.out | 4 +- .../results/postgreSQL/window_part4.sql.out | 32 +-- .../native/promoteStrings.sql.out | 8 +- .../postgreSQL/udf-aggregates_part1.sql.out | 4 +- .../sql-tests/results/udf/udf-window.sql.out | 8 +- .../sql-tests/results/window.sql.out | 10 +- .../spark/sql/DataFrameAggregateSuite.scala | 62 +++-- .../sql/DataFrameWindowFunctionsSuite.scala | 252 ++++++++++++------ .../execution/AggregationQuerySuite.scala | 12 +- .../sql/hive/execution/WindowQuerySuite.scala | 9 +- 17 files changed, 385 insertions(+), 166 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 268f5734813ba..077dfc6770d94 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2147,7 +2147,7 @@ test_that("group by, agg functions", { df3 <- agg(gd, age = "stddev") expect_is(df3, "SparkDataFrame") df3_local <- collect(df3) - expect_true(is.nan(df3_local[df3_local$name == "Andy", ][1, 2])) + expect_true(is.na(df3_local[df3_local$name == "Andy", ][1, 2])) df4 <- agg(gd, sumAge = sum(df$age)) expect_is(df4, "SparkDataFrame") @@ -2178,7 +2178,7 @@ test_that("group by, agg functions", { df7 <- agg(gd2, value = "stddev") df7_local <- collect(df7) expect_true(abs(df7_local[df7_local$name == "ID1", ][1, 2] - 6.928203) < 1e-6) - expect_true(is.nan(df7_local[df7_local$name == "ID2", ][1, 2])) + expect_true(is.na(df7_local[df7_local$name == "ID2", ][1, 2])) mockLines3 <- c("{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Andy\", \"age\":30}", diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index feff2c7e9f543..c1de58d85d5bf 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -24,6 +24,8 @@ license: | ## Upgrading from Spark SQL 3.0 to 3.1 + - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. + - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`. - In Spark 3.1, SQL UI data adopts the `formatted` mode for the query plan explain results. To restore the behavior before Spark 3.1, you can set `spark.sql.ui.explainMode` to `extended`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index deaa49bf423b1..f72d9be205df3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -450,14 +450,20 @@ object TypeCoercion { case Abs(e @ StringType()) => Abs(Cast(e, DoubleType)) case Sum(e @ StringType()) => Sum(Cast(e, DoubleType)) case Average(e @ StringType()) => Average(Cast(e, DoubleType)) - case StddevPop(e @ StringType()) => StddevPop(Cast(e, DoubleType)) - case StddevSamp(e @ StringType()) => StddevSamp(Cast(e, DoubleType)) + case s @ StddevPop(e @ StringType(), _) => + s.withNewChildren(Seq(Cast(e, DoubleType))) + case s @ StddevSamp(e @ StringType(), _) => + s.withNewChildren(Seq(Cast(e, DoubleType))) case UnaryMinus(e @ StringType()) => UnaryMinus(Cast(e, DoubleType)) case UnaryPositive(e @ StringType()) => UnaryPositive(Cast(e, DoubleType)) - case VariancePop(e @ StringType()) => VariancePop(Cast(e, DoubleType)) - case VarianceSamp(e @ StringType()) => VarianceSamp(Cast(e, DoubleType)) - case Skewness(e @ StringType()) => Skewness(Cast(e, DoubleType)) - case Kurtosis(e @ StringType()) => Kurtosis(Cast(e, DoubleType)) + case v @ VariancePop(e @ StringType(), _) => + v.withNewChildren(Seq(Cast(e, DoubleType))) + case v @ VarianceSamp(e @ StringType(), _) => + v.withNewChildren(Seq(Cast(e, DoubleType))) + case s @ Skewness(e @ StringType(), _) => + s.withNewChildren(Seq(Cast(e, DoubleType))) + case k @ Kurtosis(e @ StringType(), _) => + k.withNewChildren(Seq(Cast(e, DoubleType))) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala index 53759ca3d9165..2cc9adb5aa06e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** @@ -43,7 +44,7 @@ import org.apache.spark.sql.types._ * * @param child to compute central moments of. */ -abstract class CentralMomentAgg(child: Expression) +abstract class CentralMomentAgg(child: Expression, nullOnDivideByZero: Boolean) extends DeclarativeAggregate with ImplicitCastInputTypes { /** @@ -62,6 +63,13 @@ abstract class CentralMomentAgg(child: Expression) protected val m3 = AttributeReference("m3", DoubleType, nullable = false)() protected val m4 = AttributeReference("m4", DoubleType, nullable = false)() + protected def divideByZeroEvalResult: Expression = { + if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN + } + + override def stringArgs: Iterator[Any] = + super.stringArgs.filter(_.isInstanceOf[Expression]) + private def trimHigherOrder[T](expressions: Seq[T]) = expressions.take(momentOrder + 1) override val aggBufferAttributes = trimHigherOrder(Seq(n, avg, m2, m3, m4)) @@ -145,7 +153,12 @@ abstract class CentralMomentAgg(child: Expression) group = "agg_funcs", since = "1.6.0") // scalastyle:on line.size.limit -case class StddevPop(child: Expression) extends CentralMomentAgg(child) { +case class StddevPop( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override protected def momentOrder = 2 @@ -168,13 +181,18 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) { group = "agg_funcs", since = "1.6.0") // scalastyle:on line.size.limit -case class StddevSamp(child: Expression) extends CentralMomentAgg(child) { +case class StddevSamp( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override protected def momentOrder = 2 override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(n === 1.0, Double.NaN, sqrt(m2 / (n - 1.0)))) + If(n === 1.0, divideByZeroEvalResult, sqrt(m2 / (n - 1.0)))) } override def prettyName: String = @@ -191,7 +209,12 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) { """, group = "agg_funcs", since = "1.6.0") -case class VariancePop(child: Expression) extends CentralMomentAgg(child) { +case class VariancePop( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override protected def momentOrder = 2 @@ -212,13 +235,18 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) { """, group = "agg_funcs", since = "1.6.0") -case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) { +case class VarianceSamp( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override protected def momentOrder = 2 override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(n === 1.0, Double.NaN, m2 / (n - 1.0))) + If(n === 1.0, divideByZeroEvalResult, m2 / (n - 1.0))) } override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("var_samp") @@ -235,7 +263,12 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) { """, group = "agg_funcs", since = "1.6.0") -case class Skewness(child: Expression) extends CentralMomentAgg(child) { +case class Skewness( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override def prettyName: String = "skewness" @@ -243,7 +276,7 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) { override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(m2 === 0.0, Double.NaN, sqrt(n) * m3 / sqrt(m2 * m2 * m2))) + If(m2 === 0.0, divideByZeroEvalResult, sqrt(n) * m3 / sqrt(m2 * m2 * m2))) } } @@ -258,13 +291,18 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) { """, group = "agg_funcs", since = "1.6.0") -case class Kurtosis(child: Expression) extends CentralMomentAgg(child) { +case class Kurtosis( + child: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends CentralMomentAgg(child, nullOnDivideByZero) { + + def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate) override protected def momentOrder = 4 override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(m2 === 0.0, Double.NaN, n * m4 / (m2 * m2) - 3.0)) + If(m2 === 0.0, divideByZeroEvalResult, n * m4 / (m2 * m2) - 3.0)) } override def prettyName: String = "kurtosis" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala index 9ef05bb5d4fec..737e8cd3ffa41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** @@ -28,7 +29,7 @@ import org.apache.spark.sql.types._ * Definition of Pearson correlation can be found at * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient */ -abstract class PearsonCorrelation(x: Expression, y: Expression) +abstract class PearsonCorrelation(x: Expression, y: Expression, nullOnDivideByZero: Boolean) extends DeclarativeAggregate with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(x, y) @@ -43,6 +44,13 @@ abstract class PearsonCorrelation(x: Expression, y: Expression) protected val xMk = AttributeReference("xMk", DoubleType, nullable = false)() protected val yMk = AttributeReference("yMk", DoubleType, nullable = false)() + protected def divideByZeroEvalResult: Expression = { + if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN + } + + override def stringArgs: Iterator[Any] = + super.stringArgs.filter(_.isInstanceOf[Expression]) + override val aggBufferAttributes: Seq[AttributeReference] = Seq(n, xAvg, yAvg, ck, xMk, yMk) override val initialValues: Seq[Expression] = Array.fill(6)(Literal(0.0)) @@ -102,12 +110,18 @@ abstract class PearsonCorrelation(x: Expression, y: Expression) group = "agg_funcs", since = "1.6.0") // scalastyle:on line.size.limit -case class Corr(x: Expression, y: Expression) - extends PearsonCorrelation(x, y) { +case class Corr( + x: Expression, + y: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends PearsonCorrelation(x, y, nullOnDivideByZero) { + + def this(x: Expression, y: Expression) = + this(x, y, !SQLConf.get.legacyStatisticalAggregate) override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(n === 1.0, Double.NaN, ck / sqrt(xMk * yMk))) + If(n === 1.0, divideByZeroEvalResult, ck / sqrt(xMk * yMk))) } override def prettyName: String = "corr" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala index f03c2f2710a04..7c4d6ded6559e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala @@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** * Compute the covariance between two expressions. * When applied on empty data (i.e., count is zero), it returns NULL. */ -abstract class Covariance(x: Expression, y: Expression) +abstract class Covariance(x: Expression, y: Expression, nullOnDivideByZero: Boolean) extends DeclarativeAggregate with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(x, y) @@ -38,6 +39,13 @@ abstract class Covariance(x: Expression, y: Expression) protected val yAvg = AttributeReference("yAvg", DoubleType, nullable = false)() protected val ck = AttributeReference("ck", DoubleType, nullable = false)() + protected def divideByZeroEvalResult: Expression = { + if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN + } + + override def stringArgs: Iterator[Any] = + super.stringArgs.filter(_.isInstanceOf[Expression]) + override val aggBufferAttributes: Seq[AttributeReference] = Seq(n, xAvg, yAvg, ck) override val initialValues: Seq[Expression] = Array.fill(4)(Literal(0.0)) @@ -88,7 +96,15 @@ abstract class Covariance(x: Expression, y: Expression) """, group = "agg_funcs", since = "2.0.0") -case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) { +case class CovPopulation( + left: Expression, + right: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends Covariance(left, right, nullOnDivideByZero) { + + def this(left: Expression, right: Expression) = + this(left, right, !SQLConf.get.legacyStatisticalAggregate) + override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), ck / n) } @@ -105,10 +121,18 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance """, group = "agg_funcs", since = "2.0.0") -case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) { +case class CovSample( + left: Expression, + right: Expression, + nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate) + extends Covariance(left, right, nullOnDivideByZero) { + + def this(left: Expression, right: Expression) = + this(left, right, !SQLConf.get.legacyStatisticalAggregate) + override val evaluateExpression: Expression = { If(n === 0.0, Literal.create(null, DoubleType), - If(n === 1.0, Double.NaN, ck / (n - 1.0))) + If(n === 1.0, divideByZeroEvalResult, ck / (n - 1.0))) } override def prettyName: String = "covar_samp" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 99c10b38c53b1..d4c7dd7f3160c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2342,6 +2342,16 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_STATISTICAL_AGGREGATE = + buildConf("spark.sql.legacy.statisticalAggregate") + .internal() + .doc("When set to true, statistical aggregate function returns Double.NaN " + + "if divide by zero occurred during expression evaluation, otherwise, it returns null. " + + "Before version 3.1.0, it returns NaN in divideByZero case by default.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL = buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled") .internal() @@ -3364,6 +3374,8 @@ class SQLConf extends Serializable with Logging { def allowNegativeScaleOfDecimalEnabled: Boolean = getConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED) + def legacyStatisticalAggregate: Boolean = getConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE) + def truncateTableIgnorePermissionAcl: Boolean = getConf(SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out index f7bba96738eab..212365f92946c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out @@ -143,7 +143,7 @@ SELECT var_pop(1.0), var_samp(2.0) -- !query schema struct -- !query output -0.0 NaN +0.0 NULL -- !query @@ -151,7 +151,7 @@ SELECT stddev_pop(CAST(3.0 AS Decimal(38,0))), stddev_samp(CAST(4.0 AS Decimal(3 -- !query schema struct -- !query output -0.0 NaN +0.0 NULL -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 4dd4712345a89..f7439d873b4eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -195,7 +195,7 @@ struct -- !query output -NaN +NULL -- !query @@ -2558,7 +2558,7 @@ SELECT var_samp('1') FROM t -- !query schema struct -- !query output -NaN +NULL -- !query @@ -2566,7 +2566,7 @@ SELECT skewness('1') FROM t -- !query schema struct -- !query output -NaN +NULL -- !query @@ -2574,4 +2574,4 @@ SELECT kurtosis('1') FROM t -- !query schema struct -- !query output -NaN +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out index 76637bf578e6f..a428a7a9c923b 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out @@ -143,7 +143,7 @@ SELECT udf(var_pop(1.0)), var_samp(udf(2.0)) -- !query schema struct -- !query output -0.0 NaN +0.0 NULL -- !query @@ -151,7 +151,7 @@ SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS -- !query schema struct -- !query output -0.0 NaN +0.0 NULL -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out index a84070535b658..928b9ebb12364 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out @@ -289,13 +289,13 @@ ORDER BY cate, udf(val) struct,collect_set:array,skewness:double,kurtosis:double> -- !query output NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -3 NULL 3 3 3 1 3 3.0 NaN NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NaN 1 0.0 NaN NaN 0.0 [3] [3] NaN NaN -NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NaN NaN +3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL +NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL 1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 -1 b 1 1 1 1 1 1.0 NaN 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NaN 1 NULL NULL NaN 0.0 [1] [1] NaN NaN -2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NaN 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 +1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL +2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index a8875fd449bad..028dd7a12d25d 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 29 +-- Number of queries: 32 -- !query @@ -313,13 +313,13 @@ ORDER BY cate, val struct,collect_set:array,skewness:double,kurtosis:double> -- !query output NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -3 NULL 3 3 3 1 3 3.0 NaN NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NaN 1 0.0 NaN NaN 0.0 [3] [3] NaN NaN -NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NaN NaN +3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL +NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL 1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 -1 b 1 1 1 1 1 1.0 NaN 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NaN 1 NULL NULL NaN 0.0 [1] [1] NaN NaN -2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NaN 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 +1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL +2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 353444b664412..d4e64aa03df0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -456,25 +456,51 @@ class DataFrameAggregateSuite extends QueryTest } test("zero moments") { - val input = Seq((1, 2)).toDF("a", "b") - checkAnswer( - input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"), - var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")), - Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0, - Double.NaN, Double.NaN)) + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") { + val input = Seq((1, 2)).toDF("a", "b") + checkAnswer( + input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"), + var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")), + Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0, + Double.NaN, Double.NaN)) - checkAnswer( - input.agg( - expr("stddev(a)"), - expr("stddev_samp(a)"), - expr("stddev_pop(a)"), - expr("variance(a)"), - expr("var_samp(a)"), - expr("var_pop(a)"), - expr("skewness(a)"), - expr("kurtosis(a)")), - Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0, - Double.NaN, Double.NaN)) + checkAnswer( + input.agg( + expr("stddev(a)"), + expr("stddev_samp(a)"), + expr("stddev_pop(a)"), + expr("variance(a)"), + expr("var_samp(a)"), + expr("var_pop(a)"), + expr("skewness(a)"), + expr("kurtosis(a)")), + Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0, + Double.NaN, Double.NaN)) + } + } + + test("SPARK-13860: zero moments LEGACY_STATISTICAL_AGGREGATE off") { + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") { + val input = Seq((1, 2)).toDF("a", "b") + checkAnswer( + input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"), + var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")), + Row(null, null, 0.0, null, null, 0.0, + null, null)) + + checkAnswer( + input.agg( + expr("stddev(a)"), + expr("stddev_samp(a)"), + expr("stddev_pop(a)"), + expr("variance(a)"), + expr("var_samp(a)"), + expr("var_pop(a)"), + expr("skewness(a)"), + expr("kurtosis(a)")), + Row(null, null, 0.0, null, null, 0.0, + null, null)) + } } test("null moments") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index c5dcdc44cc64f..616e333033aa9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -94,89 +94,187 @@ class DataFrameWindowFunctionsSuite extends QueryTest } test("corr, covar_pop, stddev_pop functions in specific window") { - val df = Seq( - ("a", "p1", 10.0, 20.0), - ("b", "p1", 20.0, 10.0), - ("c", "p2", 20.0, 20.0), - ("d", "p2", 20.0, 20.0), - ("e", "p3", 0.0, 0.0), - ("f", "p3", 6.0, 12.0), - ("g", "p3", 6.0, 12.0), - ("h", "p3", 8.0, 16.0), - ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") - checkAnswer( - df.select( - $"key", - corr("value1", "value2").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - covar_pop("value1", "value2") - .over(Window.partitionBy("partitionId") + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") { + val df = Seq( + ("a", "p1", 10.0, 20.0), + ("b", "p1", 20.0, 10.0), + ("c", "p2", 20.0, 20.0), + ("d", "p2", 20.0, 20.0), + ("e", "p3", 0.0, 0.0), + ("f", "p3", 6.0, 12.0), + ("g", "p3", 6.0, 12.0), + ("h", "p3", 8.0, 16.0), + ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") + checkAnswer( + df.select( + $"key", + corr("value1", "value2").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + covar_pop("value1", "value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + var_pop("value1") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev_pop("value1") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + var_pop("value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev_pop("value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))), + + // As stddev_pop(expr) = sqrt(var_pop(expr)) + // the "stddev_pop" column can be calculated from the "var_pop" column. + // + // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2)) + // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns + Seq( + Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), + Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), + Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0), + Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0), + Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("i", Double.NaN, 0.0, 0.0, 0.0, 0.0, 0.0))) + } + } + + test("SPARK-13860: " + + "corr, covar_pop, stddev_pop functions in specific window " + + "LEGACY_STATISTICAL_AGGREGATE off") { + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") { + val df = Seq( + ("a", "p1", 10.0, 20.0), + ("b", "p1", 20.0, 10.0), + ("c", "p2", 20.0, 20.0), + ("d", "p2", 20.0, 20.0), + ("e", "p3", 0.0, 0.0), + ("f", "p3", 6.0, 12.0), + ("g", "p3", 6.0, 12.0), + ("h", "p3", 8.0, 16.0), + ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") + checkAnswer( + df.select( + $"key", + corr("value1", "value2").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + covar_pop("value1", "value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + var_pop("value1") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev_pop("value1") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + var_pop("value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev_pop("value2") + .over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))), + + // As stddev_pop(expr) = sqrt(var_pop(expr)) + // the "stddev_pop" column can be calculated from the "var_pop" column. + // + // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2)) + // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns + Seq( + Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), + Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), + Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0), + Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0), + Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), + Row("i", null, 0.0, 0.0, 0.0, 0.0, 0.0))) + } + } + + test("covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window") { + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") { + val df = Seq( + ("a", "p1", 10.0, 20.0), + ("b", "p1", 20.0, 10.0), + ("c", "p2", 20.0, 20.0), + ("d", "p2", 20.0, 20.0), + ("e", "p3", 0.0, 0.0), + ("f", "p3", 6.0, 12.0), + ("g", "p3", 6.0, 12.0), + ("h", "p3", 8.0, 16.0), + ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") + checkAnswer( + df.select( + $"key", + covar_samp("value1", "value2").over(Window.partitionBy("partitionId") .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - var_pop("value1") - .over(Window.partitionBy("partitionId") + var_samp("value1").over(Window.partitionBy("partitionId") .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - stddev_pop("value1") - .over(Window.partitionBy("partitionId") + variance("value1").over(Window.partitionBy("partitionId") .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - var_pop("value2") - .over(Window.partitionBy("partitionId") + stddev_samp("value1").over(Window.partitionBy("partitionId") .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - stddev_pop("value2") - .over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))), - - // As stddev_pop(expr) = sqrt(var_pop(expr)) - // the "stddev_pop" column can be calculated from the "var_pop" column. - // - // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2)) - // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns. - Seq( - Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), - Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0), - Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0), - Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0), - Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), - Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), - Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), - Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0), - Row("i", Double.NaN, 0.0, 0.0, 0.0, 0.0, 0.0))) + stddev("value1").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)) + ), + Seq( + Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), + Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), + Row("c", 0.0, 0.0, 0.0, 0.0, 0.0), + Row("d", 0.0, 0.0, 0.0, 0.0, 0.0), + Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("i", Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN))) + } } - test("covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window") { - val df = Seq( - ("a", "p1", 10.0, 20.0), - ("b", "p1", 20.0, 10.0), - ("c", "p2", 20.0, 20.0), - ("d", "p2", 20.0, 20.0), - ("e", "p3", 0.0, 0.0), - ("f", "p3", 6.0, 12.0), - ("g", "p3", 6.0, 12.0), - ("h", "p3", 8.0, 16.0), - ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") - checkAnswer( - df.select( - $"key", - covar_samp("value1", "value2").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - var_samp("value1").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - variance("value1").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - stddev_samp("value1").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), - stddev("value1").over(Window.partitionBy("partitionId") - .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)) - ), - Seq( - Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), - Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), - Row("c", 0.0, 0.0, 0.0, 0.0, 0.0), - Row("d", 0.0, 0.0, 0.0, 0.0, 0.0), - Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), - Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), - Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), - Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), - Row("i", Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN))) + test("SPARK-13860: " + + "covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window " + + "LEGACY_STATISTICAL_AGGREGATE off") { + withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") { + val df = Seq( + ("a", "p1", 10.0, 20.0), + ("b", "p1", 20.0, 10.0), + ("c", "p2", 20.0, 20.0), + ("d", "p2", 20.0, 20.0), + ("e", "p3", 0.0, 0.0), + ("f", "p3", 6.0, 12.0), + ("g", "p3", 6.0, 12.0), + ("h", "p3", 8.0, 16.0), + ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2") + checkAnswer( + df.select( + $"key", + covar_samp("value1", "value2").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + var_samp("value1").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + variance("value1").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev_samp("value1").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)), + stddev("value1").over(Window.partitionBy("partitionId") + .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)) + ), + Seq( + Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), + Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755), + Row("c", 0.0, 0.0, 0.0, 0.0, 0.0), + Row("d", 0.0, 0.0, 0.0, 0.0, 0.0), + Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544), + Row("i", null, null, null, null, null))) + } } test("collect_list in ascending ordered window") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala index 87771eed17b1b..70dcfb05c2ba9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala @@ -825,7 +825,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te """ |SELECT corr(b, c) FROM covar_tab WHERE a = 3 """.stripMargin), - Row(Double.NaN) :: Nil) + Row(null) :: Nil) checkAnswer( spark.sql( @@ -834,10 +834,10 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te """.stripMargin), Row(1, null) :: Row(2, null) :: - Row(3, Double.NaN) :: - Row(4, Double.NaN) :: - Row(5, Double.NaN) :: - Row(6, Double.NaN) :: Nil) + Row(3, null) :: + Row(4, null) :: + Row(5, null) :: + Row(6, null) :: Nil) val corr7 = spark.sql("SELECT corr(b, c) FROM covar_tab").collect()(0).getDouble(0) assert(math.abs(corr7 - 0.6633880657639323) < 1e-12) @@ -869,7 +869,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te // one row test val df3 = Seq.tabulate(1)(x => (1 * x, x * x * x - 2)).toDF("a", "b") - checkAnswer(df3.groupBy().agg(covar_samp("a", "b")), Row(Double.NaN)) + checkAnswer(df3.groupBy().agg(covar_samp("a", "b")), Row(null)) checkAnswer(df3.groupBy().agg(covar_pop("a", "b")), Row(0.0)) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala index 15712a18ce751..6bf7bd6cbb90e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala @@ -62,7 +62,6 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto // Moved because: // - Spark uses a different default stddev (sample instead of pop) // - Tiny numerical differences in stddev results. - // - Different StdDev behavior when n=1 (NaN instead of 0) checkAnswer(sql(s""" |select p_mfgr,p_name, p_size, |rank() over(distribute by p_mfgr sort by p_name) as r, @@ -88,22 +87,22 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto Row("Manufacturer#1", "almond antique salmon chartreuse burlywood", 6, 4, 3, 0.6666666666666666, 0.6, 2, 4, 11.0, 15.448840301675292, 2, 6, 2), Row("Manufacturer#1", "almond aquamarine burnished black steel", 28, 5, 4, 0.8333333333333334, 0.8, 3, 5, 14.4, 15.388307249337076, 2, 28, 34), Row("Manufacturer#1", "almond aquamarine pink moccasin thistle", 42, 6, 5, 1.0, 1.0, 3, 6, 19.0, 17.787636155487327, 2, 42, 6), - Row("Manufacturer#2", "almond antique violet chocolate turquoise", 14, 1, 1, 0.2, 0.0, 1, 1, 14.0, Double.NaN, 4, 14, 14), + Row("Manufacturer#2", "almond antique violet chocolate turquoise", 14, 1, 1, 0.2, 0.0, 1, 1, 14.0, null, 4, 14, 14), Row("Manufacturer#2", "almond antique violet turquoise frosted", 40, 2, 2, 0.4, 0.25, 1, 2, 27.0, 18.384776310850235, 4, 40, 14), Row("Manufacturer#2", "almond aquamarine midnight light salmon", 2, 3, 3, 0.6, 0.5, 2, 3, 18.666666666666668, 19.42506971244462, 4, 2, 14), Row("Manufacturer#2", "almond aquamarine rose maroon antique", 25, 4, 4, 0.8, 0.75, 2, 4, 20.25, 16.17353805861084, 4, 25, 40), Row("Manufacturer#2", "almond aquamarine sandy cyan gainsboro", 18, 5, 5, 1.0, 1.0, 3, 5, 19.8, 14.042791745233567, 4, 18, 2), - Row("Manufacturer#3", "almond antique chartreuse khaki white", 17, 1, 1, 0.2, 0.0, 1, 1, 17.0,Double.NaN, 2, 17, 17), + Row("Manufacturer#3", "almond antique chartreuse khaki white", 17, 1, 1, 0.2, 0.0, 1, 1, 17.0, null, 2, 17, 17), Row("Manufacturer#3", "almond antique forest lavender goldenrod", 14, 2, 2, 0.4, 0.25, 1, 2, 15.5, 2.1213203435596424, 2, 14, 17), Row("Manufacturer#3", "almond antique metallic orange dim", 19, 3, 3, 0.6, 0.5, 2, 3, 16.666666666666668, 2.516611478423583, 2, 19, 17), Row("Manufacturer#3", "almond antique misty red olive", 1, 4, 4, 0.8, 0.75, 2, 4, 12.75, 8.098353742170895, 2, 1, 14), Row("Manufacturer#3", "almond antique olive coral navajo", 45, 5, 5, 1.0, 1.0, 3, 5, 19.2, 16.037456157383566, 2, 45, 19), - Row("Manufacturer#4", "almond antique gainsboro frosted violet", 10, 1, 1, 0.2, 0.0, 1, 1, 10.0, Double.NaN, 0, 10, 10), + Row("Manufacturer#4", "almond antique gainsboro frosted violet", 10, 1, 1, 0.2, 0.0, 1, 1, 10.0, null, 0, 10, 10), Row("Manufacturer#4", "almond antique violet mint lemon", 39, 2, 2, 0.4, 0.25, 1, 2, 24.5, 20.506096654409877, 0, 39, 10), Row("Manufacturer#4", "almond aquamarine floral ivory bisque", 27, 3, 3, 0.6, 0.5, 2, 3, 25.333333333333332, 14.571661996262929, 0, 27, 10), Row("Manufacturer#4", "almond aquamarine yellow dodger mint", 7, 4, 4, 0.8, 0.75, 2, 4, 20.75, 15.01943185787443, 0, 7, 39), Row("Manufacturer#4", "almond azure aquamarine papaya violet", 12, 5, 5, 1.0, 1.0, 3, 5, 19.0, 13.583077707206124, 0, 12, 27), - Row("Manufacturer#5", "almond antique blue firebrick mint", 31, 1, 1, 0.2, 0.0, 1, 1, 31.0, Double.NaN, 1, 31, 31), + Row("Manufacturer#5", "almond antique blue firebrick mint", 31, 1, 1, 0.2, 0.0, 1, 1, 31.0, null, 1, 31, 31), Row("Manufacturer#5", "almond antique medium spring khaki", 6, 2, 2, 0.4, 0.25, 1, 2, 18.5, 17.67766952966369, 1, 6, 31), Row("Manufacturer#5", "almond antique sky peru orange", 2, 3, 3, 0.6, 0.5, 2, 3, 13.0, 15.716233645501712, 1, 2, 31), Row("Manufacturer#5", "almond aquamarine dodger light gainsboro", 46, 4, 4, 0.8, 0.75, 2, 4, 21.25, 20.902551678363736, 1, 46, 6), From 304ca1ec93e299ebb32f961eafcaac249a45585c Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 13 Oct 2020 09:21:06 -0700 Subject: [PATCH 0231/1009] [SPARK-33129][BUILD][DOCS] Updating the build/sbt references to test-only with testOnly for SBT 1.3.x ### What changes were proposed in this pull request? test-only - > testOnly in docs across the project. ### Why are the changes needed? Since the sbt version is updated, the older way or running i.e. `test-only` is no longer valid. ### Does this PR introduce _any_ user-facing change? docs update. ### How was this patch tested? Manually. Closes #30028 from ScrapCodes/fix-build/sbt-sample. Authored-by: Prashant Sharma Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala | 2 +- .../apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala | 2 +- .../spark/sql/jdbc/MsSqlServerIntegrationSuite.scala | 2 +- .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 2 +- .../apache/spark/sql/jdbc/OracleIntegrationSuite.scala | 4 ++-- .../apache/spark/sql/jdbc/PostgresIntegrationSuite.scala | 2 +- .../spark/sql/jdbc/PostgresKrbIntegrationSuite.scala | 2 +- .../apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala | 4 ++-- .../org/apache/spark/sql/ExpressionsSchemaSuite.scala | 4 ++-- .../scala/org/apache/spark/sql/PlanStabilitySuite.scala | 8 ++++---- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 8 ++++---- .../hive/thriftserver/ThriftServerQueryTestSuite.scala | 4 ++-- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index 91498493e78e2..4b9acd0d39f3f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 - * ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *DB2IntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala index 7ab544c17a5d8..9c3a609b98bbe 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 - * ./build/sbt -Pdocker-integration-tests "test-only *DB2KrbIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 5d3deff9d2704..f1ffc8f0f3dc7 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): * {{{ * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 - * ./build/sbt -Pdocker-integration-tests "test-only *MsSqlServerIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *MsSqlServerIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 4cd27f8b9fff2..6f96ab33d0fee 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., mysql:5.7.31): * {{{ * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 - * ./build/sbt -Pdocker-integration-tests "test-only *MySQLIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *MySQLIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index ce63d1df6f028..60eb1c055a38e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -40,7 +40,7 @@ import org.apache.spark.tags.DockerTest * Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME * 3. Start docker - sudo service docker start * 4. Run spark test - ./build/sbt -Pdocker-integration-tests - * "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite" + * "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" * * An actual sequence of commands to run the test is as follows * @@ -51,7 +51,7 @@ import org.apache.spark.tags.DockerTest * $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe * $ cd $SPARK_HOME * $ ./build/sbt -Pdocker-integration-tests - * "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite" + * "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" * * It has been validated with 18.4.0 Express Edition. */ diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index ba71c942714da..de9c0660c51c1 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 - * ./build/sbt -Pdocker-integration-tests "test-only *PostgresIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *PostgresIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala index 6b215485247d9..984890f22f492 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 - * ./build/sbt -Pdocker-integration-tests "test-only *PostgresKrbIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index 1b51d43c1d139..403f16aac6356 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -36,7 +36,7 @@ import org.apache.spark.tags.DockerTest * Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME * 3. Start docker - sudo service docker start * 4. Run spark test - ./build/sbt -Pdocker-integration-tests - * "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" + * "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" * * An actual sequence of commands to run the test is as follows * @@ -47,7 +47,7 @@ import org.apache.spark.tags.DockerTest * $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe * $ cd $SPARK_HOME * $ ./build/sbt -Pdocker-integration-tests - * "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" + * "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite" * * It has been validated with 18.4.0 Express Edition. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 37ef04d9cb02f..f3db4d811dd86 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -32,12 +32,12 @@ import org.apache.spark.tags.ExtendedSQLTest * * To run the entire test suite: * {{{ - * build/sbt "sql/test-only *ExpressionsSchemaSuite" + * build/sbt "sql/testOnly *ExpressionsSchemaSuite" * }}} * * To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *ExpressionsSchemaSuite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *ExpressionsSchemaSuite" * }}} * * For example: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index f78fc269986b5..c2aee0ad4c9a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -51,22 +51,22 @@ import org.apache.spark.tags.ExtendedSQLTest * * To run the entire test suite: * {{{ - * build/sbt "sql/test-only *PlanStability[WithStats]Suite" + * build/sbt "sql/testOnly *PlanStability[WithStats]Suite" * }}} * * To run a single test file upon change: * {{{ - * build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" * }}} * * To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite" * }}} * * To re-generate golden file for a single test, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" * }}} */ // scalastyle:on line.size.limit diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 0bb1f5e20fc5b..36e55c0994f18 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -49,22 +49,22 @@ import org.apache.spark.util.Utils * * To run the entire test suite: * {{{ - * build/sbt "sql/test-only *SQLQueryTestSuite" + * build/sbt "sql/testOnly *SQLQueryTestSuite" * }}} * * To run a single test file upon change: * {{{ - * build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql" + * build/sbt "~sql/testOnly *SQLQueryTestSuite -- -z inline-table.sql" * }}} * * To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite" * }}} * * To re-generate golden file for a single test, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite -- -z describe.sql" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite -- -z describe.sql" * }}} * * The format for input files is simple: diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index ecc7ce71d950e..be42497113469 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -39,12 +39,12 @@ import org.apache.spark.sql.types._ * * To run the entire test suite: * {{{ - * build/sbt "hive-thriftserver/test-only *ThriftServerQueryTestSuite" -Phive-thriftserver + * build/sbt "hive-thriftserver/testOnly *ThriftServerQueryTestSuite" -Phive-thriftserver * }}} * * This test suite won't generate golden files. To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite" * }}} * * TODO: From 1bfcb51eebf074588ce84cc2143113ab05f07392 Mon Sep 17 00:00:00 2001 From: neko Date: Tue, 13 Oct 2020 09:29:05 -0700 Subject: [PATCH 0232/1009] [SPARK-33132][WEBUI] Make `formatBytes` return `0.0 B` for negative input instead of `NaN` ### What changes were proposed in this pull request? when bytesRead metric was negative, `formatBytes` in `ui.js` should just return `0.0 B` to avoid `NaN Undefined` result. ### Why are the changes needed? Strengthen the parameter validataion to improve metric display on Summary Metrics of Spark Stage UI. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? It's a small change, just manual test. Closes #30030 from akiyamaneko/formatBytes_NaN. Authored-by: neko Signed-off-by: Dongjoon Hyun --- core/src/main/resources/org/apache/spark/ui/static/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js index 4571fc1aec4dd..4cd83332cde5f 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/utils.js +++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js @@ -39,7 +39,7 @@ function formatDuration(milliseconds) { function formatBytes(bytes, type) { if (type !== 'display') return bytes; - if (bytes == 0) return '0.0 B'; + if (bytes <= 0) return '0.0 B'; var k = 1024; var dm = 1; var sizes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']; From 05a62dcada0176301307b0af194b50c383f496ff Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 14 Oct 2020 12:13:54 +0900 Subject: [PATCH 0233/1009] [SPARK-33134][SQL] Return partial results only for root JSON objects ### What changes were proposed in this pull request? In the PR, I propose to restrict the partial result feature only by root JSON objects. JSON datasource as well as `from_json()` will return `null` for malformed nested JSON objects. ### Why are the changes needed? 1. To not raise exception to users in the PERMISSIVE mode 2. To fix a regression and to have the same behavior as Spark 2.4.x has 3. Current implementation of partial result is supposed to work only for root (top-level) JSON objects, and not tested for bad nested complex JSON fields. ### Does this PR introduce _any_ user-facing change? Yes. Before the changes, the code below: ```scala val pokerhand_raw = Seq("""[{"cards": [19], "playerId": 123456}]""").toDF("events") val event = new StructType().add("playerId", LongType).add("cards", ArrayType(new StructType().add("id", LongType).add("rank", StringType))) val pokerhand_events = pokerhand_raw.select(from_json($"events", ArrayType(event)).as("event")) pokerhand_events.show ``` throws the exception even in the default **PERMISSIVE** mode: ```java java.lang.ClassCastException: java.lang.Long cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray(rows.scala:48) at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray$(rows.scala:48) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getArray(rows.scala:195) ``` After the changes: ``` +-----+ |event| +-----+ | null| +-----+ ``` ### How was this patch tested? Added a test to `JsonFunctionsSuite`. Closes #30031 from MaxGekk/json-skip-row-wrong-schema. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/json/JacksonParser.scala | 7 ++++--- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 0da2baf24fbcb..bbcff4949ae87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -93,7 +93,7 @@ class JacksonParser( new NoopFilters } (parser: JsonParser) => parseJsonToken[Iterable[InternalRow]](parser, st) { - case START_OBJECT => convertObject(parser, st, fieldConverters, jsonFilters) + case START_OBJECT => convertObject(parser, st, fieldConverters, jsonFilters, isRoot = true) // SPARK-3308: support reading top level JSON arrays and take every element // in such an array as a row // @@ -383,7 +383,8 @@ class JacksonParser( parser: JsonParser, schema: StructType, fieldConverters: Array[ValueConverter], - structFilters: StructFilters = new NoopFilters()): Option[InternalRow] = { + structFilters: StructFilters = new NoopFilters(), + isRoot: Boolean = false): Option[InternalRow] = { val row = new GenericInternalRow(schema.length) var badRecordException: Option[Throwable] = None var skipRow = false @@ -397,7 +398,7 @@ class JacksonParser( skipRow = structFilters.skipRow(row, index) } catch { case e: SparkUpgradeException => throw e - case NonFatal(e) => + case NonFatal(e) if isRoot => badRecordException = badRecordException.orElse(Some(e)) parser.skipChildren() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 03b48451c7495..5a1a3550d855b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -733,4 +733,18 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { | """.stripMargin) checkAnswer(toDF("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), toDF("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]")) } + + test("SPARK-33134: return partial results only for root JSON objects") { + val st = new StructType() + .add("c1", LongType) + .add("c2", ArrayType(new StructType().add("c3", LongType).add("c4", StringType))) + val df1 = Seq("""{"c2": [19], "c1": 123456}""").toDF("c0") + checkAnswer(df1.select(from_json($"c0", st)), Row(Row(123456, null))) + val df2 = Seq("""{"data": {"c2": [19], "c1": 123456}}""").toDF("c0") + checkAnswer(df2.select(from_json($"c0", new StructType().add("data", st))), Row(Row(null))) + val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0") + checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null)) + val df4 = Seq("""{"c2": [19]}""").toDF("c0") + checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null)) + } } From d8c4a47ea19d18b0aad22263d002267d663c2f66 Mon Sep 17 00:00:00 2001 From: Richard Penney Date: Wed, 14 Oct 2020 08:48:55 -0500 Subject: [PATCH 0234/1009] [SPARK-33061][SQL] Expose inverse hyperbolic trig functions through sql.functions API This patch is a small extension to change-request SPARK-28133, which added inverse hyperbolic functions to the SQL interpreter, but did not include those methods within the Scala `sql.functions._` API. This patch makes `acosh`, `asinh` and `atanh` functions available through the Scala API. Unit-tests have been added to `sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala`. Manual testing has been done via `spark-shell`, using the following recipe: ``` val df = spark.range(0, 11) .toDF("x") .withColumn("x", ($"x" - 5) / 2.0) val hyps = df.withColumn("tanh", tanh($"x")) .withColumn("sinh", sinh($"x")) .withColumn("cosh", cosh($"x")) val invhyps = hyps.withColumn("atanh", atanh($"tanh")) .withColumn("asinh", asinh($"sinh")) .withColumn("acosh", acosh($"cosh")) invhyps.show ``` which produces the following output: ``` +----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+ | x| tanh| sinh| cosh| atanh| asinh| acosh| +----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+ |-2.5| -0.9866142981514303|-6.0502044810397875| 6.132289479663686| -2.500000000000001|-2.4999999999999956| 2.5| |-2.0| -0.9640275800758169| -3.626860407847019|3.7621956910836314|-2.0000000000000004|-1.9999999999999991| 2.0| |-1.5| -0.9051482536448664|-2.1292794550948173| 2.352409615243247|-1.4999999999999998|-1.4999999999999998| 1.5| |-1.0| -0.7615941559557649|-1.1752011936438014| 1.543080634815244| -1.0| -1.0| 1.0| |-0.5|-0.46211715726000974|-0.5210953054937474|1.1276259652063807| -0.5|-0.5000000000000002|0.4999999999999998| | 0.0| 0.0| 0.0| 1.0| 0.0| 0.0| 0.0| | 0.5| 0.46211715726000974| 0.5210953054937474|1.1276259652063807| 0.5| 0.5|0.4999999999999998| | 1.0| 0.7615941559557649| 1.1752011936438014| 1.543080634815244| 1.0| 1.0| 1.0| | 1.5| 0.9051482536448664| 2.1292794550948173| 2.352409615243247| 1.4999999999999998| 1.5| 1.5| | 2.0| 0.9640275800758169| 3.626860407847019|3.7621956910836314| 2.0000000000000004| 2.0| 2.0| | 2.5| 0.9866142981514303| 6.0502044810397875| 6.132289479663686| 2.500000000000001| 2.5| 2.5| +----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+ ``` Closes #29938 from rwpenney/fix/inverse-hyperbolics. Authored-by: Richard Penney Signed-off-by: Sean Owen --- .../org/apache/spark/sql/functions.scala | 50 ++++++++++++++++++- .../apache/spark/sql/MathFunctionsSuite.scala | 15 ++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 2efe5aae09709..21e22d90f0f80 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1427,6 +1427,22 @@ object functions { */ def acos(columnName: String): Column = acos(Column(columnName)) + /** + * @return inverse hyperbolic cosine of `e` + * + * @group math_funcs + * @since 3.1.0 + */ + def acosh(e: Column): Column = withExpr { Acosh(e.expr) } + + /** + * @return inverse hyperbolic cosine of `columnName` + * + * @group math_funcs + * @since 3.1.0 + */ + def acosh(columnName: String): Column = acosh(Column(columnName)) + /** * @return inverse sine of `e` in radians, as if computed by `java.lang.Math.asin` * @@ -1444,7 +1460,23 @@ object functions { def asin(columnName: String): Column = asin(Column(columnName)) /** - * @return inverse tangent of `e`, as if computed by `java.lang.Math.atan` + * @return inverse hyperbolic sine of `e` + * + * @group math_funcs + * @since 3.1.0 + */ + def asinh(e: Column): Column = withExpr { Asinh(e.expr) } + + /** + * @return inverse hyperbolic sine of `columnName` + * + * @group math_funcs + * @since 3.1.0 + */ + def asinh(columnName: String): Column = asinh(Column(columnName)) + + /** + * @return inverse tangent of `e` as if computed by `java.lang.Math.atan` * * @group math_funcs * @since 1.4.0 @@ -1572,6 +1604,22 @@ object functions { */ def atan2(yValue: Double, xName: String): Column = atan2(yValue, Column(xName)) + /** + * @return inverse hyperbolic tangent of `e` + * + * @group math_funcs + * @since 3.1.0 + */ + def atanh(e: Column): Column = withExpr { Atanh(e.expr) } + + /** + * @return inverse hyperbolic tangent of `columnName` + * + * @group math_funcs + * @since 3.1.0 + */ + def atanh(columnName: String): Column = atanh(Column(columnName)) + /** * An expression that returns the string representation of the binary value of the given long * column. For example, bin("12") returns "1100". diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index bd86c2ec075b0..cd92976571230 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -125,6 +125,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(sinh, math.sinh) } + test("asinh") { + testOneToOneMathFunction(asinh, + (x: Double) => math.log(x + math.sqrt(x * x + 1)) ) + } + test("cos") { testOneToOneMathFunction(cos, math.cos) } @@ -137,6 +142,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(cosh, math.cosh) } + test("acosh") { + testOneToOneMathFunction(acosh, + (x: Double) => math.log(x + math.sqrt(x * x - 1)) ) + } + test("tan") { testOneToOneMathFunction(tan, math.tan) } @@ -149,6 +159,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(tanh, math.tanh) } + test("atanh") { + testOneToOneMathFunction(atanh, + (x: Double) => (0.5 * (math.log1p(x) - math.log1p(-x))) ) + } + test("degrees") { testOneToOneMathFunction(degrees, math.toDegrees) checkAnswer( From 8e5cb1d276686ec428e4e6aa1c3cfd6bb99e4e9a Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Wed, 14 Oct 2020 08:30:03 -0700 Subject: [PATCH 0235/1009] [SPARK-33136][SQL] Fix mistakenly swapped parameter in V2WriteCommand.outputResolved ### What changes were proposed in this pull request? This PR proposes to fix a bug on calling `DataType.equalsIgnoreCompatibleNullability` with mistakenly swapped parameters in `V2WriteCommand.outputResolved`. The order of parameters for `DataType.equalsIgnoreCompatibleNullability` are `from` and `to`, which says that the right order of matching variables are `inAttr` and `outAttr`. ### Why are the changes needed? Spark throws AnalysisException due to unresolved operator in v2 write, while the operator is unresolved due to a bug that parameters to call `DataType.equalsIgnoreCompatibleNullability` in `outputResolved` have been swapped. ### Does this PR introduce _any_ user-facing change? Yes, end users no longer suffer on unresolved operator in v2 write if they're trying to write dataframe containing non-nullable complex types against table matching complex types as nullable. ### How was this patch tested? New UT added. Closes #30033 from HeartSaVioR/SPARK-33136. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Dongjoon Hyun --- .../catalyst/plans/logical/v2Commands.scala | 2 +- .../spark/sql/DataFrameWriterV2Suite.scala | 87 ++++++++++++++++++- 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 50af16ca276e1..272c19b98512b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -45,7 +45,7 @@ trait V2WriteCommand extends Command { case (inAttr, outAttr) => // names and types must match, nullability must be compatible inAttr.name == outAttr.name && - DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) && + DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) && (outAttr.nullable || !inAttr.nullable) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala index 508eefafd0754..ff5c6242987de 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -23,16 +23,15 @@ import scala.collection.JavaConverters._ import org.scalatest.BeforeAndAfter -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamedRelation, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, V2WriteCommand} import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog} import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType} -import org.apache.spark.sql.types.TimestampType +import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType} import org.apache.spark.sql.util.QueryExecutionListener import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -101,6 +100,86 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo assert(v2.catalog.exists(_ == catalogPlugin)) } + case class FakeV2WriteCommand(table: NamedRelation, query: LogicalPlan) extends V2WriteCommand + + test("SPARK-33136 output resolved on complex types for V2 write commands") { + val tableCatalog = catalog("testcat") + + def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = { + val fromTableName = s"from_table_$name" + tableCatalog.createTable( + Identifier.of(Array(), fromTableName), + StructType(Array(StructField("col", fromType))), + Array.empty, + new java.util.HashMap[String, String]()) + + val toTable = tableCatalog.createTable( + Identifier.of(Array(), s"to_table_$name"), + StructType(Array(StructField("col", toType))), + Array.empty, + new java.util.HashMap[String, String]()) + + val df = spark.table(s"testcat.$fromTableName") + + val relation = DataSourceV2Relation.create(toTable, Some(tableCatalog), None) + val writeCommand = FakeV2WriteCommand(relation, df.queryExecution.analyzed) + + assert(writeCommand.outputResolved, s"Unable to write from type $fromType to type $toType.") + } + + // The major difference between `from` and `to` is that `from` is a complex type + // with non-nullable, whereas `to` is same data type with flipping nullable. + + // nested struct type + val fromStructType = StructType(Array( + StructField("s", StringType), + StructField("i_nonnull", IntegerType, nullable = false), + StructField("st", StructType(Array( + StructField("l", LongType), + StructField("s_nonnull", StringType, nullable = false)))))) + + val toStructType = StructType(Array( + StructField("s", StringType), + StructField("i_nonnull", IntegerType), + StructField("st", StructType(Array( + StructField("l", LongType), + StructField("s_nonnull", StringType)))))) + + assertTypeCompatibility("struct", fromStructType, toStructType) + + // array type + assertTypeCompatibility("array", ArrayType(LongType, containsNull = false), + ArrayType(LongType, containsNull = true)) + + // array type with struct type + val fromArrayWithStructType = ArrayType( + StructType(Array(StructField("s", StringType, nullable = false))), + containsNull = false) + + val toArrayWithStructType = ArrayType( + StructType(Array(StructField("s", StringType))), + containsNull = true) + + assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType) + + // map type + assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false), + MapType(IntegerType, StringType, valueContainsNull = true)) + + // map type with struct type + val fromMapWithStructType = MapType( + IntegerType, + StructType(Array(StructField("s", StringType, nullable = false))), + valueContainsNull = false) + + val toMapWithStructType = MapType( + IntegerType, + StructType(Array(StructField("s", StringType))), + valueContainsNull = true) + + assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType) + } + test("Append: basic append") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") From f3ad32f4b6fc55e89e7fb222ed565ad3e32d47c6 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 14 Oct 2020 16:17:28 +0000 Subject: [PATCH 0236/1009] [SPARK-33026][SQL][FOLLOWUP] metrics name should be numOutputRows ### What changes were proposed in this pull request? Follow the convention and rename the metrics `numRows` to `numOutputRows` ### Why are the changes needed? `FilterExec`, `HashAggregateExec`, etc. all use `numOutputRows` ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #30039 from cloud-fan/minor. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../sql/execution/exchange/BroadcastExchangeExec.scala | 8 ++++---- .../spark/sql/execution/metric/SQLMetricsSuite.scala | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala index 4b884dfe537e8..0c5fee20385e1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala @@ -78,7 +78,7 @@ case class BroadcastExchangeExec( override lazy val metrics = Map( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), - "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"), + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"), "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"), "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast")) @@ -91,8 +91,8 @@ case class BroadcastExchangeExec( override def runtimeStatistics: Statistics = { val dataSize = metrics("dataSize").value - val numRows = metrics("numRows").value - Statistics(dataSize, Some(numRows)) + val rowCount = metrics("numOutputRows").value + Statistics(dataSize, Some(rowCount)) } @transient @@ -116,11 +116,11 @@ case class BroadcastExchangeExec( val beforeCollect = System.nanoTime() // Use executeCollect/executeCollectIterator to avoid conversion to Scala types val (numRows, input) = child.executeCollectIterator() + longMetric("numOutputRows") += numRows if (numRows >= MAX_BROADCAST_TABLE_ROWS) { throw new SparkException( s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows") } - longMetric("numRows") += numRows val beforeBuild = System.nanoTime() longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index e404e460fe611..4872906dbfec3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -751,7 +751,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils } assert(exchanges.size === 1) - testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2)) + testMetricsInSparkPlanOperator(exchanges.head, Map("numOutputRows" -> 2)) } } } From 9ab0ec4e38e5df0537b38cb0f89e004ad57bec90 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Thu, 15 Oct 2020 11:59:29 +0900 Subject: [PATCH 0237/1009] [SPARK-33146][CORE] Check for non-fatal errors when loading new applications in SHS ### What changes were proposed in this pull request? Adds an additional check for non-fatal errors when attempting to add a new entry to the history server application listing. ### Why are the changes needed? A bad rolling event log folder (missing appstatus file or no log files) would cause no applications to be loaded by the Spark history server. Figuring out why invalid event log folders are created in the first place will be addressed in separate issues, this just lets the history server skip the invalid folder and successfully load all the valid applications. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New UT Closes #30037 from Kimahriman/bug/rolling-log-crashing-history. Authored-by: Adam Binford Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../deploy/history/FsHistoryProvider.scala | 3 ++ .../history/FsHistoryProviderSuite.scala | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index fe8be0b3b20d3..168bd1e68a304 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -538,6 +538,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) reader.fileSizeForLastIndex > 0 } catch { case _: FileNotFoundException => false + case NonFatal(e) => + logWarning(s"Error while reading new log ${reader.rootPath}", e) + false } case NonFatal(e) => diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 3f8c875f5a552..1578b908b1b55 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -1475,6 +1475,55 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging { } } + test("SPARK-33146: don't let one bad rolling log folder prevent loading other applications") { + withTempDir { dir => + val conf = createTestConf(true) + conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath) + val hadoopConf = SparkHadoopUtil.newConfiguration(conf) + val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf) + + val provider = new FsHistoryProvider(conf) + + val writer = new RollingEventLogFilesWriter("app", None, dir.toURI, conf, hadoopConf) + writer.start() + + writeEventsToRollingWriter(writer, Seq( + SparkListenerApplicationStart("app", Some("app"), 0, "user", None), + SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false) + provider.checkForLogs() + provider.cleanLogs() + assert(dir.listFiles().size === 1) + assert(provider.getListing.length === 1) + + // Manually delete the appstatus file to make an invalid rolling event log + val appStatusPath = RollingEventLogFilesWriter.getAppStatusFilePath(new Path(writer.logPath), + "app", None, true) + fs.delete(appStatusPath, false) + provider.checkForLogs() + provider.cleanLogs() + assert(provider.getListing.length === 0) + + // Create a new application + val writer2 = new RollingEventLogFilesWriter("app2", None, dir.toURI, conf, hadoopConf) + writer2.start() + writeEventsToRollingWriter(writer2, Seq( + SparkListenerApplicationStart("app2", Some("app2"), 0, "user", None), + SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false) + + // Both folders exist but only one application found + provider.checkForLogs() + provider.cleanLogs() + assert(provider.getListing.length === 1) + assert(dir.listFiles().size === 2) + + // Make sure a new provider sees the valid application + provider.stop() + val newProvider = new FsHistoryProvider(conf) + newProvider.checkForLogs() + assert(newProvider.getListing.length === 1) + } + } + /** * Asks the provider to check for logs and calls a function to perform checks on the updated * app list. Example: From ec34a001ad0ef57a496f29a6523d905128875b17 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 14 Oct 2020 20:48:13 -0700 Subject: [PATCH 0238/1009] [SPARK-33153][SQL][TESTS] Ignore Spark 2.4 in HiveExternalCatalogVersionsSuite on Python 3.8/3.9 ### What changes were proposed in this pull request? This PR aims to ignore Apache Spark 2.4.x distribution in HiveExternalCatalogVersionsSuite if Python version is 3.8 or 3.9. ### Why are the changes needed? Currently, `HiveExternalCatalogVersionsSuite` is broken on the latest OS like `Ubuntu 20.04` because its default Python version is 3.8. PySpark 2.4.x doesn't work on Python 3.8 due to SPARK-29536. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. ``` $ python3 --version Python 3.8.5 $ build/sbt "hive/testOnly *.HiveExternalCatalogVersionsSuite" ... [info] All tests passed. [info] Passed: Total 1, Failed 0, Errors 0, Passed 1 ``` Closes #30044 from dongjoon-hyun/SPARK-33153. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../src/main/scala/org/apache/spark/TestUtils.scala | 13 +++++++++++++ .../sql/hive/HiveExternalCatalogVersionsSuite.scala | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index 6947d1c72f12b..bc3644df0ebb5 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -255,6 +255,19 @@ private[spark] object TestUtils { attempt.isSuccess && attempt.get == 0 } + def isPythonVersionAtLeast38(): Boolean = { + val attempt = if (Utils.isWindows) { + Try(Process(Seq("cmd.exe", "/C", "python3 --version")) + .run(ProcessLogger(s => s.startsWith("Python 3.8") || s.startsWith("Python 3.9"))) + .exitValue()) + } else { + Try(Process(Seq("sh", "-c", "python3 --version")) + .run(ProcessLogger(s => s.startsWith("Python 3.8") || s.startsWith("Python 3.9"))) + .exitValue()) + } + attempt.isSuccess && attempt.get == 0 + } + /** * Returns the response code from an HTTP(S) URL. */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index cbfdb7fac88d8..b81b7e8ec0c0f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -234,7 +234,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { // Tests the latest version of every release line. val testingVersions: Seq[String] = { import scala.io.Source - try { + val versions: Seq[String] = try { Source.fromURL(s"${releaseMirror}/spark").mkString .split("\n") .filter(_.contains("""
  3. Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } + versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) } protected var spark: SparkSession = _ From 77a8efbc05cb4ecc40dd050c363429e71a9f23c1 Mon Sep 17 00:00:00 2001 From: manuzhang Date: Thu, 15 Oct 2020 05:53:32 +0000 Subject: [PATCH 0239/1009] [SPARK-32932][SQL] Do not use local shuffle reader at final stage on write command ### What changes were proposed in this pull request? Do not use local shuffle reader at final stage if the root node is write command. ### Why are the changes needed? Users usually repartition with partition column on dynamic partition overwrite. AQE could break it by removing physical shuffle with local shuffle reader. That could lead to a large number of output files, even exceeding the file system limit. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Add test. Closes #29797 from manuzhang/spark-32932. Authored-by: manuzhang Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveSparkPlanExec.scala | 14 ++++- .../adaptive/AdaptiveQueryExecSuite.scala | 51 ++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 0e032569bb8a7..5e75e26e6d074 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -36,6 +36,8 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule} import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._ +import org.apache.spark.sql.execution.command.DataWritingCommandExec +import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange._ import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric} import org.apache.spark.sql.internal.SQLConf @@ -102,6 +104,16 @@ case class AdaptiveSparkPlanExec( OptimizeLocalShuffleReader(conf) ) + private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] = + context.qe.sparkPlan match { + case _: DataWritingCommandExec | _: V2TableWriteExec => + // SPARK-32932: Local shuffle reader could break partitioning that works best + // for the following writing command + queryStageOptimizerRules.filterNot(_.isInstanceOf[OptimizeLocalShuffleReader]) + case _ => + queryStageOptimizerRules + } + // A list of physical optimizer rules to be applied right after a new stage is created. The input // plan to these rules has exchange as its root node. @transient private val postStageCreationRules = Seq( @@ -235,7 +247,7 @@ case class AdaptiveSparkPlanExec( // Run the final plan when there's no more unfinished stages. currentPhysicalPlan = applyPhysicalRules( result.newPlan, - queryStageOptimizerRules ++ postStageCreationRules, + finalStageOptimizerRules ++ postStageCreationRules, Some((planChangeLogger, "AQE Final Query Stage Optimization"))) isFinalPlan = true executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 0dfb1d2fd9eda..38a323b1c057e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -26,15 +26,19 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListe import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} -import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, ReusedSubqueryExec, ShuffledRowRDD, SparkPlan} +import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SparkPlan, UnaryExecNode} import org.apache.spark.sql.execution.command.DataWritingCommandExec +import org.apache.spark.sql.execution.datasources.noop.NoopDataSource +import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.util.QueryExecutionListener import org.apache.spark.util.Utils class AdaptiveQueryExecSuite @@ -1258,4 +1262,49 @@ class AdaptiveQueryExecSuite } } } + + test("SPARK-32932: Do not use local shuffle reader at final stage on write command") { + withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString, + SQLConf.SHUFFLE_PARTITIONS.key -> "5", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + val data = for ( + i <- 1L to 10L; + j <- 1L to 3L + ) yield (i, j) + + val df = data.toDF("i", "j").repartition($"j") + var noLocalReader: Boolean = false + val listener = new QueryExecutionListener { + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + qe.executedPlan match { + case plan@(_: DataWritingCommandExec | _: V2TableWriteExec) => + assert(plan.asInstanceOf[UnaryExecNode].child.isInstanceOf[AdaptiveSparkPlanExec]) + noLocalReader = collect(plan) { + case exec: CustomShuffleReaderExec if exec.isLocalReader => exec + }.isEmpty + case _ => // ignore other events + } + } + override def onFailure(funcName: String, qe: QueryExecution, + exception: Exception): Unit = {} + } + spark.listenerManager.register(listener) + + withTable("t") { + df.write.partitionBy("j").saveAsTable("t") + sparkContext.listenerBus.waitUntilEmpty() + assert(noLocalReader) + noLocalReader = false + } + + // Test DataSource v2 + val format = classOf[NoopDataSource].getName + df.write.format(format).mode("overwrite").save() + sparkContext.listenerBus.waitUntilEmpty() + assert(noLocalReader) + noLocalReader = false + + spark.listenerManager.unregister(listener) + } + } } From 8e7c39089f885413f5e5e1bdafc2d426291a8719 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 15 Oct 2020 01:51:01 -0700 Subject: [PATCH 0240/1009] [SPARK-33155][K8S] spark.kubernetes.pyspark.pythonVersion allows only '3' ### What changes were proposed in this pull request? This PR makes `spark.kubernetes.pyspark.pythonVersion` allow only `3`. In other words, it will reject `2` for `Python 2`. - [x] Configuration description and check is updated. - [x] Documentation is updated - [x] Unit test cases are updated. - [x] Docker image script is updated. ### Why are the changes needed? After SPARK-32138, Apache Spark 3.1 dropped Python 2 support. ### Does this PR introduce _any_ user-facing change? Yes, but Python 2 support is already dropped officially. ### How was this patch tested? Pass the CI. Closes #30049 from dongjoon-hyun/SPARK-DROP-PYTHON2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md | 2 +- .../main/scala/org/apache/spark/deploy/k8s/Config.scala | 6 +++--- .../k8s/features/DriverCommandFeatureStepSuite.scala | 4 +--- .../docker/src/main/dockerfiles/spark/entrypoint.sh | 7 +------ .../deploy/k8s/integrationtest/DecommissionSuite.scala | 1 - .../deploy/k8s/integrationtest/PythonTestsSuite.scala | 4 +--- .../kubernetes/integration-tests/tests/pyfiles.py | 2 +- 7 files changed, 8 insertions(+), 18 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e9c292d21fd47..3bd1c410e8433 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1087,7 +1087,7 @@ See the [configuration page](configuration.html) for information on Spark config spark.kubernetes.pyspark.pythonVersion "3" - This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3. + This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3. 2.4.0 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index d6dc56f9d9d1b..00eaff452ba45 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -285,11 +285,11 @@ private[spark] object Config extends Logging { val PYSPARK_MAJOR_PYTHON_VERSION = ConfigBuilder("spark.kubernetes.pyspark.pythonVersion") - .doc("This sets the major Python version. Either 2 or 3. (Python2 or Python3)") + .doc("This sets the major Python version. Only 3 is available for Python3.") .version("2.4.0") .stringConf - .checkValue(pv => List("2", "3").contains(pv), - "Ensure that major Python version is either Python2 or Python3") + .checkValue(pv => List("3").contains(pv), + "Ensure that major Python version is Python3") .createWithDefault("3") val KUBERNETES_KERBEROS_KRB5_FILE = diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala index 829943f16beac..6a7366e9c6b7a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala @@ -43,7 +43,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { test("python resource") { val mainResource = "local:/main.py" val sparkConf = new SparkConf(false) - .set(PYSPARK_MAJOR_PYTHON_VERSION, "2") val spec = applyFeatureStep( PythonMainAppResource(mainResource), conf = sparkConf, @@ -58,7 +57,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { val envs = spec.pod.container.getEnv.asScala .map { env => (env.getName, env.getValue) } .toMap - val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2") + val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3") assert(envs === expected) } @@ -93,7 +92,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { test("SPARK-25355: python resource args with proxy-user") { val mainResource = "local:/main.py" val sparkConf = new SparkConf(false) - .set(PYSPARK_MAJOR_PYTHON_VERSION, "2") val spec = applyFeatureStep( PythonMainAppResource(mainResource), conf = sparkConf, diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh index 813a70c6e7ec3..d605ae43c024f 100755 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh @@ -44,12 +44,7 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" fi -if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then - pyv="$(python -V 2>&1)" - export PYTHON_VERSION="${pyv:7}" - export PYSPARK_PYTHON="python" - export PYSPARK_DRIVER_PYTHON="python" -elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then +if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then pyv3="$(python3 -V 2>&1)" export PYTHON_VERSION="${pyv3:7}" export PYSPARK_PYTHON="python3" diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index 6e42819b1779e..fd14b12b112d3 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -26,7 +26,6 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => test("Test basic decommissioning", k8sTestTag) { sparkAppConf .set(config.DECOMMISSION_ENABLED.key, "true") - .set("spark.kubernetes.pyspark.pythonVersion", "3") .set("spark.kubernetes.container.image", pyImage) .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true") .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true") diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala index b16ccb429074f..bad6f1c1021ba 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala @@ -35,10 +35,9 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite => isJVM = false) } - test("Run PySpark with Python3 to test a pyfiles example", k8sTestTag) { + test("Run PySpark to test a pyfiles example", k8sTestTag) { sparkAppConf .set("spark.kubernetes.container.image", pyImage) - .set("spark.kubernetes.pyspark.pythonVersion", "3") runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_FILES, mainClass = "", @@ -57,7 +56,6 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite => test("Run PySpark with memory customization", k8sTestTag) { sparkAppConf .set("spark.kubernetes.container.image", pyImage) - .set("spark.kubernetes.pyspark.pythonVersion", "3") .set("spark.kubernetes.memoryOverheadFactor", s"$memOverheadConstant") .set("spark.executor.pyspark.memory", s"${additionalMemory}m") runSparkApplicationAndVerifyCompletion( diff --git a/resource-managers/kubernetes/integration-tests/tests/pyfiles.py b/resource-managers/kubernetes/integration-tests/tests/pyfiles.py index 51c0160554866..73c53be482c03 100644 --- a/resource-managers/kubernetes/integration-tests/tests/pyfiles.py +++ b/resource-managers/kubernetes/integration-tests/tests/pyfiles.py @@ -31,7 +31,7 @@ from py_container_checks import version_check # Begin of Python container checks - version_check(sys.argv[1], 2 if sys.argv[1] == "python" else 3) + version_check(sys.argv[1], 3) # Check python executable at executors spark.udf.register("get_sys_ver", From e85ed8a14c7766ea0fafc32fd9c6ac95c86c8c8f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 15 Oct 2020 02:24:49 -0700 Subject: [PATCH 0241/1009] [SPARK-33156][INFRA] Upgrade GithubAction image from 18.04 to 20.04 ### What changes were proposed in this pull request? This PR aims to upgrade `Github Action` runner image from `Ubuntu 18.04 (LTS)` to `Ubuntu 20.04 (LTS)`. ### Why are the changes needed? `ubuntu-latest` in `GitHub Action` is still `Ubuntu 18.04 (LTS)`. - https://github.com/actions/virtual-environments#available-environments This upgrade will help Apache Spark 3.1+ preparation for vote and release on the latest OS. This is tested here. - https://github.com/dongjoon-hyun/spark/pull/36 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the `Github Action` in this PR. Closes #30050 from dongjoon-hyun/ubuntu_20.04. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 667371dacf5dc..cd2f01ba7e846 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -17,7 +17,8 @@ jobs: # Build: build Spark and run the tests for specified modules. build: name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" - runs-on: ubuntu-latest + # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04. + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -204,7 +205,7 @@ jobs: # Static analysis, and documentation build lint: name: Linters, licenses, dependencies and documentation generation - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout Spark repository uses: actions/checkout@v2 @@ -271,7 +272,7 @@ jobs: java11: name: Java 11 build - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout Spark repository uses: actions/checkout@v2 @@ -296,7 +297,7 @@ jobs: scala-213: name: Scala 2.13 build - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout Spark repository uses: actions/checkout@v2 From 513b6f5af2b873ca8737fd7f0c42fdfd4fa24292 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 15 Oct 2020 20:51:20 +0900 Subject: [PATCH 0242/1009] [SPARK-33079][TESTS] Replace the existing Maven job for Scala 2.13 in Github Actions with SBT job ### What changes were proposed in this pull request? SPARK-32926 added a build test to GitHub Action for Scala 2.13 but it's only with Maven. As SPARK-32873 reported, some compilation error happens only with SBT so I think we need to add another build test to GitHub Action for SBT. Unfortunately, we don't have abundant resources for GitHub Actions so instead of just adding the new SBT job, let's replace the existing Maven job with the new SBT job for Scala 2.13. ### Why are the changes needed? To ensure build test passes even with SBT for Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GitHub Actions' job. Closes #29958 from sarutak/add-sbt-job-for-scala-2.13. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 16 ++++++---------- .../kinesis/KinesisBackedBlockRDD.scala | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index cd2f01ba7e846..5f2dc52cc7893 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -301,22 +301,18 @@ jobs: steps: - name: Checkout Spark repository uses: actions/checkout@v2 - - name: Cache Maven local repository + - name: Cache Ivy local repository uses: actions/cache@v2 with: - path: ~/.m2/repository - key: scala-213-maven-${{ hashFiles('**/pom.xml') }} + path: ~/.ivy2/cache + key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} restore-keys: | - scala-213-maven- + scala-213-ivy- - name: Install Java 11 uses: actions/setup-java@v1 with: java-version: 11 - - name: Build with Maven + - name: Build with SBT run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 ./dev/change-scala-version.sh 2.13 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install - rm -rf ~/.m2/repository/org/apache/spark + ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala index 5072b3ae21d87..ab55d545770e9 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala @@ -91,7 +91,7 @@ class KinesisBackedBlockRDD[T: ClassTag]( require(_blockIds.length == arrayOfseqNumberRanges.length, "Number of blockIds is not equal to the number of sequence number ranges") - override def isValid(): Boolean = true + override def isValid: Boolean = true override def getPartitions: Array[Partition] = { Array.tabulate(_blockIds.length) { i => From 31f7097ce0d7eade17a96fe01184e62a88fd2bbd Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Thu, 15 Oct 2020 15:33:23 +0000 Subject: [PATCH 0243/1009] [SPARK-32402][SQL][FOLLOW-UP] Use quoted column name for JDBCTableCatalog.alterTable ### What changes were proposed in this pull request? I currently have unquoted column names in alter table, e.g. ```ALTER TABLE "test"."alt_table" DROP COLUMN c1``` should change to quoted column name ```ALTER TABLE "test"."alt_table" DROP COLUMN "c1"``` ### Why are the changes needed? We should always use quoted identifiers in JDBC SQLs, e.g. ```CREATE TABLE "test"."abc" ("col" INTEGER ) ``` or ```INSERT INTO "test"."abc" ("col") VALUES (?)```. Using unquoted column name in alterTable causes problems, for example: ``` sql("CREATE TABLE h2.test.alt_table (c1 INTEGER, c2 INTEGER) USING _") sql("ALTER TABLE h2.test.alt_table DROP COLUMN c1") org.apache.spark.sql.AnalysisException: Failed table altering: test.alt_table; ...... Caused by: org.h2.jdbc.JdbcSQLException: Column "C1" not found; SQL statement: ALTER TABLE "test"."alt_table" DROP COLUMN c1 [42122-195] ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #30041 from huaxingao/alter_table_followup. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../apache/spark/sql/jdbc/DB2Dialect.scala | 5 ++-- .../apache/spark/sql/jdbc/JdbcDialects.scala | 25 ++++++++++-------- .../apache/spark/sql/jdbc/OracleDialect.scala | 11 +++++--- .../v2/jdbc/JDBCTableCatalogSuite.scala | 26 +++++++++++-------- 4 files changed, 39 insertions(+), 28 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala index 908e03726d887..0b394db5c8932 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala @@ -66,7 +66,8 @@ private object DB2Dialect extends JdbcDialect { tableName: String, columnName: String, newDataType: String): String = - s"ALTER TABLE $tableName ALTER COLUMN $columnName SET DATA TYPE $newDataType" + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)}" + + s" SET DATA TYPE $newDataType" // scalastyle:off line.size.limit // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html @@ -76,6 +77,6 @@ private object DB2Dialect extends JdbcDialect { columnName: String, isNullable: Boolean): String = { val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL" - s"ALTER TABLE $tableName ALTER COLUMN $columnName $nullable" + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable" } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 5f8d788bc7a22..e0703195051dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -200,7 +200,6 @@ abstract class JdbcDialect extends Serializable { /** * Alter an existing table. - * TODO (SPARK-32523): Override this method in the dialects that have different syntax. * * @param tableName The name of the table to be altered. * @param changes Changes to apply to the table. @@ -216,10 +215,10 @@ abstract class JdbcDialect extends Serializable { updateClause += getAddColumnQuery(tableName, name(0), dataType) case rename: RenameColumn if rename.fieldNames.length == 1 => val name = rename.fieldNames - updateClause += s"ALTER TABLE $tableName RENAME COLUMN ${name(0)} TO ${rename.newName}" + updateClause += getRenameColumnQuery(tableName, name(0), rename.newName) case delete: DeleteColumn if delete.fieldNames.length == 1 => val name = delete.fieldNames - updateClause += s"ALTER TABLE $tableName DROP COLUMN ${name(0)}" + updateClause += getDeleteColumnQuery(tableName, name(0)) case updateColumnType: UpdateColumnType if updateColumnType.fieldNames.length == 1 => val name = updateColumnType.fieldNames val dataType = JdbcUtils.getJdbcType(updateColumnType.newDataType(), this) @@ -227,7 +226,6 @@ abstract class JdbcDialect extends Serializable { updateClause += getUpdateColumnTypeQuery(tableName, name(0), dataType) case updateNull: UpdateColumnNullability if updateNull.fieldNames.length == 1 => val name = updateNull.fieldNames - val nullable = if (updateNull.nullable()) "NULL" else "NOT NULL" updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable()) case _ => throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change") @@ -236,23 +234,28 @@ abstract class JdbcDialect extends Serializable { updateClause.result() } - def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = { - s"ALTER TABLE $tableName ADD COLUMN $columnName $dataType" - } + def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = + s"ALTER TABLE $tableName ADD COLUMN ${quoteIdentifier(columnName)} $dataType" + + def getRenameColumnQuery(tableName: String, columnName: String, newName: String): String = + s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" + + s" ${quoteIdentifier(newName)}" + + def getDeleteColumnQuery(tableName: String, columnName: String): String = + s"ALTER TABLE $tableName DROP COLUMN ${quoteIdentifier(columnName)}" def getUpdateColumnTypeQuery( tableName: String, columnName: String, - newDataType: String): String = { - s"ALTER TABLE $tableName ALTER COLUMN $columnName $newDataType" - } + newDataType: String): String = + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $newDataType" def getUpdateColumnNullabilityQuery( tableName: String, columnName: String, isNullable: Boolean): String = { val nullable = if (isNullable) "NULL" else "NOT NULL" - s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable" + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} SET $nullable" } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 128b90a190481..491b6e29ecf2c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -120,21 +120,24 @@ private case object OracleDialect extends JdbcDialect { } // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001 - override def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = - s"ALTER TABLE $tableName ADD $columnName $dataType" + override def getAddColumnQuery( + tableName: String, + columnName: String, + dataType: String): String = + s"ALTER TABLE $tableName ADD ${quoteIdentifier(columnName)} $dataType" // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001 override def getUpdateColumnTypeQuery( tableName: String, columnName: String, newDataType: String): String = - s"ALTER TABLE $tableName MODIFY $columnName $newDataType" + s"ALTER TABLE $tableName MODIFY ${quoteIdentifier(columnName)} $newDataType" override def getUpdateColumnNullabilityQuery( tableName: String, columnName: String, isNullable: Boolean): String = { val nullable = if (isNullable) "NULL" else "NOT NULL" - s"ALTER TABLE $tableName MODIFY $columnName $nullable" + s"ALTER TABLE $tableName MODIFY ${quoteIdentifier(columnName)} $nullable" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index 8fe58e3a0a28a..209f5609e447f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -178,15 +178,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { .add("C1", IntegerType) .add("C2", StringType) assert(t.schema === expectedSchema) - sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)") + sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)") t = spark.table("h2.test.alt_table") - expectedSchema = expectedSchema.add("C3", DoubleType) + expectedSchema = expectedSchema.add("c3", DoubleType) assert(t.schema === expectedSchema) // Add already existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)") + sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)") }.getMessage - assert(msg.contains("Cannot add column, because C3 already exists")) + assert(msg.contains("Cannot add column, because c3 already exists")) } // Add a column to not existing table and namespace Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table => @@ -199,8 +199,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("alter table ... rename column") { withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER, C0 INTEGER) USING _") - sql("ALTER TABLE h2.test.alt_table RENAME COLUMN ID TO C") + sql("CREATE TABLE h2.test.alt_table (id INTEGER, C0 INTEGER) USING _") + sql("ALTER TABLE h2.test.alt_table RENAME COLUMN id TO C") val t = spark.table("h2.test.alt_table") val expectedSchema = new StructType() .add("C", IntegerType) @@ -223,8 +223,9 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("alter table ... drop column") { withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER) USING _") + sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _") sql("ALTER TABLE h2.test.alt_table DROP COLUMN C1") + sql("ALTER TABLE h2.test.alt_table DROP COLUMN c3") val t = spark.table("h2.test.alt_table") val expectedSchema = new StructType().add("C2", IntegerType) assert(t.schema === expectedSchema) @@ -245,10 +246,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("alter table ... update column type") { withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") + sql("CREATE TABLE h2.test.alt_table (ID INTEGER, deptno INTEGER) USING _") sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE DOUBLE") + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno TYPE DOUBLE") val t = spark.table("h2.test.alt_table") - val expectedSchema = new StructType().add("ID", DoubleType) + val expectedSchema = new StructType().add("ID", DoubleType).add("deptno", DoubleType) assert(t.schema === expectedSchema) // Update not existing column val msg1 = intercept[AnalysisException] { @@ -272,10 +274,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("alter table ... update column nullability") { withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL) USING _") + sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _") sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID DROP NOT NULL") + sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno DROP NOT NULL") val t = spark.table("h2.test.alt_table") - val expectedSchema = new StructType().add("ID", IntegerType, nullable = true) + val expectedSchema = new StructType() + .add("ID", IntegerType, nullable = true).add("deptno", IntegerType, nullable = true) assert(t.schema === expectedSchema) // Update nullability of not existing column val msg = intercept[AnalysisException] { From b089fe5376d72ccd0a6724ac9aa2386c5a81b06b Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 15 Oct 2020 09:08:14 -0700 Subject: [PATCH 0244/1009] [SPARK-32247][INFRA] Install and test scipy with PyPy in GitHub Actions ### What changes were proposed in this pull request? This PR proposes to install `scipy` as well in PyPy. It will test several ML specific test cases in PyPy as well. For example, https://github.com/apache/spark/blob/31a16fbb405a19dc3eb732347e0e1f873b16971d/python/pyspark/mllib/tests/test_linalg.py#L487 It was not installed when GitHub Actions build was added because it failed to install for an unknown reason. Seems like it's fixed in the latest scipy. ### Why are the changes needed? To improve test coverage. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions build in this PR will test it out. Closes #30054 from HyukjinKwon/SPARK-32247. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5f2dc52cc7893..9b7026eeca4c8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -155,12 +155,11 @@ jobs: - name: Install Python packages (Python 3.6 and PyPy3) if: contains(matrix.modules, 'pyspark') # PyArrow is not supported in PyPy yet, see ARROW-2651. - # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. run: | python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner python3.6 -m pip list # PyPy does not have xmlrunner - pypy3 -m pip install numpy pandas + pypy3 -m pip install numpy pandas scipy pypy3 -m pip list - name: Install Python packages (Python 3.8) if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) From 82eea13c7686fb4bfbe8fb4185db81438d2ea884 Mon Sep 17 00:00:00 2001 From: Min Shen Date: Thu, 15 Oct 2020 12:34:52 -0500 Subject: [PATCH 0245/1009] [SPARK-32915][CORE] Network-layer and shuffle RPC layer changes to support push shuffle blocks ### What changes were proposed in this pull request? This is the first patch for SPIP SPARK-30602 for push-based shuffle. Summary of changes: * Introduce new API in ExternalBlockStoreClient to push blocks to a remote shuffle service. * Leveraging the streaming upload functionality in SPARK-6237, it also enables the ExternalBlockHandler to delegate the handling of block push requests to MergedShuffleFileManager. * Propose the API for MergedShuffleFileManager, where the core logic on the shuffle service side to handle block push requests is defined. The actual implementation of this API is deferred into a later RB to restrict the size of this PR. * Introduce OneForOneBlockPusher to enable pushing blocks to remote shuffle services in shuffle RPC layer. * New protocols in shuffle RPC layer to support the functionalities. ### Why are the changes needed? Refer to the SPIP in SPARK-30602 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests. The reference PR with the consolidated changes covering the complete implementation is also provided in SPARK-30602. We have already verified the functionality and the improved performance as documented in the SPIP doc. Lead-authored-by: Min Shen Co-authored-by: Chandni Singh Co-authored-by: Ye Zhou Closes #29855 from Victsm/SPARK-32915. Lead-authored-by: Min Shen Co-authored-by: Chandni Singh Co-authored-by: Ye Zhou Co-authored-by: Chandni Singh Co-authored-by: Min Shen Signed-off-by: Mridul Muralidharan gmail.com> --- common/network-common/pom.xml | 4 + .../spark/network/protocol/Encoders.java | 63 +++++++ common/network-shuffle/pom.xml | 9 + .../network/shuffle/BlockStoreClient.java | 21 +++ .../spark/network/shuffle/ErrorHandler.java | 85 ++++++++++ .../network/shuffle/ExternalBlockHandler.java | 104 +++++++++++- .../shuffle/ExternalBlockStoreClient.java | 52 +++++- .../network/shuffle/MergedBlockMeta.java | 64 +++++++ .../shuffle/MergedShuffleFileManager.java | 116 +++++++++++++ .../network/shuffle/OneForOneBlockPusher.java | 123 ++++++++++++++ .../network/shuffle/RetryingBlockFetcher.java | 27 ++- .../protocol/BlockTransferMessage.java | 6 +- .../protocol/FinalizeShuffleMerge.java | 84 +++++++++ .../shuffle/protocol/MergeStatuses.java | 118 +++++++++++++ .../shuffle/protocol/PushBlockStream.java | 95 +++++++++++ .../network/shuffle/ErrorHandlerSuite.java | 51 ++++++ .../shuffle/ExternalBlockHandlerSuite.java | 40 ++++- .../shuffle/OneForOneBlockPusherSuite.java | 159 ++++++++++++++++++ .../ExternalShuffleServiceMetricsSuite.scala | 3 +- .../yarn/YarnShuffleServiceMetricsSuite.scala | 2 +- .../yarn/YarnShuffleServiceSuite.scala | 1 + 21 files changed, 1212 insertions(+), 15 deletions(-) create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9d5bc9aae0719..d328a7de0a762 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -91,6 +91,10 @@ org.apache.commons commons-crypto + + org.roaringbitmap + RoaringBitmap + diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java index 490915f6de4b3..4fa191b3917e3 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java @@ -17,9 +17,11 @@ package org.apache.spark.network.protocol; +import java.io.IOException; import java.nio.charset.StandardCharsets; import io.netty.buffer.ByteBuf; +import org.roaringbitmap.RoaringBitmap; /** Provides a canonical set of Encoders for simple types. */ public class Encoders { @@ -44,6 +46,40 @@ public static String decode(ByteBuf buf) { } } + /** Bitmaps are encoded with their serialization length followed by the serialization bytes. */ + public static class Bitmaps { + public static int encodedLength(RoaringBitmap b) { + // Compress the bitmap before serializing it. Note that since BlockTransferMessage + // needs to invoke encodedLength first to figure out the length for the ByteBuf, it + // guarantees that the bitmap will always be compressed before being serialized. + b.trim(); + b.runOptimize(); + return b.serializedSizeInBytes(); + } + + public static void encode(ByteBuf buf, RoaringBitmap b) { + int encodedLength = b.serializedSizeInBytes(); + // RoaringBitmap requires nio ByteBuffer for serde. We expose the netty ByteBuf as a nio + // ByteBuffer. Here, we need to explicitly manage the index so we can write into the + // ByteBuffer, and the write is reflected in the underneath ByteBuf. + b.serialize(buf.nioBuffer(buf.writerIndex(), encodedLength)); + buf.writerIndex(buf.writerIndex() + encodedLength); + } + + public static RoaringBitmap decode(ByteBuf buf) { + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(buf.nioBuffer()); + // RoaringBitmap deserialize does not advance the reader index of the underlying ByteBuf. + // Manually update the index here. + buf.readerIndex(buf.readerIndex() + bitmap.serializedSizeInBytes()); + } catch (IOException e) { + throw new RuntimeException("Exception while decoding bitmap", e); + } + return bitmap; + } + } + /** Byte arrays are encoded with their length followed by bytes. */ public static class ByteArrays { public static int encodedLength(byte[] arr) { @@ -135,4 +171,31 @@ public static long[] decode(ByteBuf buf) { return longs; } } + + /** Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding. */ + public static class BitmapArrays { + public static int encodedLength(RoaringBitmap[] bitmaps) { + int totalLength = 4; + for (RoaringBitmap b : bitmaps) { + totalLength += Bitmaps.encodedLength(b); + } + return totalLength; + } + + public static void encode(ByteBuf buf, RoaringBitmap[] bitmaps) { + buf.writeInt(bitmaps.length); + for (RoaringBitmap b : bitmaps) { + Bitmaps.encode(buf, b); + } + } + + public static RoaringBitmap[] decode(ByteBuf buf) { + int numBitmaps = buf.readInt(); + RoaringBitmap[] bitmaps = new RoaringBitmap[numBitmaps]; + for (int i = 0; i < bitmaps.length; i ++) { + bitmaps[i] = Bitmaps.decode(buf); + } + return bitmaps; + } + } } diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 00f1defbb0093..a4a1ff92ef9a0 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -57,6 +57,10 @@ com.google.guava guava + + org.roaringbitmap + RoaringBitmap + @@ -93,6 +97,11 @@ mockito-core test + + commons-io + commons-io + test + diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java index e762bd2071632..37befcd4b67fa 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java @@ -29,6 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.client.RpcResponseCallback; import org.apache.spark.network.client.TransportClient; import org.apache.spark.network.client.TransportClientFactory; @@ -135,4 +136,24 @@ public void onFailure(Throwable t) { hostLocalDirsCompletable.completeExceptionally(e); } } + + /** + * Push a sequence of shuffle blocks in a best-effort manner to a remote node asynchronously. + * These shuffle blocks, along with blocks pushed by other clients, will be merged into + * per-shuffle partition merged shuffle files on the destination node. + * + * @param host the host of the remote node. + * @param port the port of the remote node. + * @param blockIds block ids to be pushed + * @param buffers buffers to be pushed + * @param listener the listener to receive block push status. + */ + public void pushBlocks( + String host, + int port, + String[] blockIds, + ManagedBuffer[] buffers, + BlockFetchingListener listener) { + throw new UnsupportedOperationException(); + } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java new file mode 100644 index 0000000000000..308b0b7a6b33b --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.net.ConnectException; + +import com.google.common.base.Throwables; + +/** + * Plugs into {@link RetryingBlockFetcher} to further control when an exception should be retried + * and logged. + * Note: {@link RetryingBlockFetcher} will delegate the exception to this handler only when + * - remaining retries < max retries + * - exception is an IOException + */ + +public interface ErrorHandler { + + boolean shouldRetryError(Throwable t); + + default boolean shouldLogError(Throwable t) { + return true; + } + + /** + * A no-op error handler instance. + */ + ErrorHandler NOOP_ERROR_HANDLER = t -> true; + + /** + * The error handler for pushing shuffle blocks to remote shuffle services. + */ + class BlockPushErrorHandler implements ErrorHandler { + /** + * String constant used for generating exception messages indicating a block to be merged + * arrives too late on the server side, and also for later checking such exceptions on the + * client side. When we get a block push failure because of the block arrives too late, we + * will not retry pushing the block nor log the exception on the client side. + */ + public static final String TOO_LATE_MESSAGE_SUFFIX = + "received after merged shuffle is finalized"; + + /** + * String constant used for generating exception messages indicating the server couldn't + * append a block after all available attempts due to collision with other blocks belonging + * to the same shuffle partition, and also for later checking such exceptions on the client + * side. When we get a block push failure because of the block couldn't be written due to + * this reason, we will not log the exception on the client side. + */ + public static final String BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX = + "Couldn't find an opportunity to write block"; + + @Override + public boolean shouldRetryError(Throwable t) { + // If it is a connection time out or a connection closed exception, no need to retry. + if (t.getCause() != null && t.getCause() instanceof ConnectException) { + return false; + } + // If the block is too late, there is no need to retry it + return !Throwables.getStackTraceAsString(t).contains(TOO_LATE_MESSAGE_SUFFIX); + } + + @Override + public boolean shouldLogError(Throwable t) { + String errorStackTrace = Throwables.getStackTraceAsString(t); + return !errorStackTrace.contains(BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX) && + !errorStackTrace.contains(TOO_LATE_MESSAGE_SUFFIX); + } + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java index 33865a21ea914..321b25305c504 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java @@ -32,6 +32,7 @@ import com.codahale.metrics.Timer; import com.codahale.metrics.Counter; import com.google.common.annotations.VisibleForTesting; +import org.apache.spark.network.client.StreamCallbackWithID; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,11 +62,21 @@ public class ExternalBlockHandler extends RpcHandler { final ExternalShuffleBlockResolver blockManager; private final OneForOneStreamManager streamManager; private final ShuffleMetrics metrics; + private final MergedShuffleFileManager mergeManager; public ExternalBlockHandler(TransportConf conf, File registeredExecutorFile) throws IOException { this(new OneForOneStreamManager(), - new ExternalShuffleBlockResolver(conf, registeredExecutorFile)); + new ExternalShuffleBlockResolver(conf, registeredExecutorFile), + new NoOpMergedShuffleFileManager()); + } + + public ExternalBlockHandler( + TransportConf conf, + File registeredExecutorFile, + MergedShuffleFileManager mergeManager) throws IOException { + this(new OneForOneStreamManager(), + new ExternalShuffleBlockResolver(conf, registeredExecutorFile), mergeManager); } @VisibleForTesting @@ -78,9 +89,19 @@ public ExternalShuffleBlockResolver getBlockResolver() { public ExternalBlockHandler( OneForOneStreamManager streamManager, ExternalShuffleBlockResolver blockManager) { + this(streamManager, blockManager, new NoOpMergedShuffleFileManager()); + } + + /** Enables mocking out the StreamManager, BlockManager, and MergeManager. */ + @VisibleForTesting + public ExternalBlockHandler( + OneForOneStreamManager streamManager, + ExternalShuffleBlockResolver blockManager, + MergedShuffleFileManager mergeManager) { this.metrics = new ShuffleMetrics(); this.streamManager = streamManager; this.blockManager = blockManager; + this.mergeManager = mergeManager; } @Override @@ -89,6 +110,21 @@ public void receive(TransportClient client, ByteBuffer message, RpcResponseCallb handleMessage(msgObj, client, callback); } + @Override + public StreamCallbackWithID receiveStream( + TransportClient client, + ByteBuffer messageHeader, + RpcResponseCallback callback) { + BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(messageHeader); + if (msgObj instanceof PushBlockStream) { + PushBlockStream message = (PushBlockStream) msgObj; + checkAuth(client, message.appId); + return mergeManager.receiveBlockDataAsStream(message); + } else { + throw new UnsupportedOperationException("Unexpected message with #receiveStream: " + msgObj); + } + } + protected void handleMessage( BlockTransferMessage msgObj, TransportClient client, @@ -139,6 +175,7 @@ protected void handleMessage( RegisterExecutor msg = (RegisterExecutor) msgObj; checkAuth(client, msg.appId); blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo); + mergeManager.registerExecutor(msg.appId, msg.executorInfo.localDirs); callback.onSuccess(ByteBuffer.wrap(new byte[0])); } finally { responseDelayContext.stop(); @@ -156,6 +193,20 @@ protected void handleMessage( Map localDirs = blockManager.getLocalDirs(msg.appId, msg.execIds); callback.onSuccess(new LocalDirsForExecutors(localDirs).toByteBuffer()); + } else if (msgObj instanceof FinalizeShuffleMerge) { + final Timer.Context responseDelayContext = + metrics.finalizeShuffleMergeLatencyMillis.time(); + FinalizeShuffleMerge msg = (FinalizeShuffleMerge) msgObj; + try { + checkAuth(client, msg.appId); + MergeStatuses statuses = mergeManager.finalizeShuffleMerge(msg); + callback.onSuccess(statuses.toByteBuffer()); + } catch(IOException e) { + throw new RuntimeException(String.format("Error while finalizing shuffle merge " + + "for application %s shuffle %d", msg.appId, msg.shuffleId), e); + } finally { + responseDelayContext.stop(); + } } else { throw new UnsupportedOperationException("Unexpected message: " + msgObj); } @@ -225,6 +276,8 @@ public class ShuffleMetrics implements MetricSet { private final Timer openBlockRequestLatencyMillis = new Timer(); // Time latency for executor registration latency in ms private final Timer registerExecutorRequestLatencyMillis = new Timer(); + // Time latency for processing finalize shuffle merge request latency in ms + private final Timer finalizeShuffleMergeLatencyMillis = new Timer(); // Block transfer rate in byte per second private final Meter blockTransferRateBytes = new Meter(); // Number of active connections to the shuffle service @@ -236,6 +289,7 @@ public ShuffleMetrics() { allMetrics = new HashMap<>(); allMetrics.put("openBlockRequestLatencyMillis", openBlockRequestLatencyMillis); allMetrics.put("registerExecutorRequestLatencyMillis", registerExecutorRequestLatencyMillis); + allMetrics.put("finalizeShuffleMergeLatencyMillis", finalizeShuffleMergeLatencyMillis); allMetrics.put("blockTransferRateBytes", blockTransferRateBytes); allMetrics.put("registeredExecutorsSize", (Gauge) () -> blockManager.getRegisteredExecutorsSize()); @@ -373,6 +427,54 @@ public ManagedBuffer next() { } } + /** + * Dummy implementation of merged shuffle file manager. Suitable for when push-based shuffle + * is not enabled. + */ + private static class NoOpMergedShuffleFileManager implements MergedShuffleFileManager { + + @Override + public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + + @Override + public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + + @Override + public void registerApplication(String appId, String user) { + // No-op. Do nothing. + } + + @Override + public void registerExecutor(String appId, String[] localDirs) { + // No-Op. Do nothing. + } + + @Override + public void applicationRemoved(String appId, boolean cleanupLocalDirs) { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + + @Override + public ManagedBuffer getMergedBlockData( + String appId, int shuffleId, int reduceId, int chunkId) { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + + @Override + public MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId) { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + + @Override + public String[] getMergedBlockDirs(String appId) { + throw new UnsupportedOperationException("Cannot handle shuffle block merge"); + } + } + @Override public void channelActive(TransportClient client) { metrics.activeConnections.inc(); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java index 76e23e7c69d2d..eca35ed290467 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java @@ -20,21 +20,24 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; import com.codahale.metrics.MetricSet; import com.google.common.collect.Lists; + +import org.apache.spark.network.TransportContext; +import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.client.RpcResponseCallback; import org.apache.spark.network.client.TransportClient; import org.apache.spark.network.client.TransportClientBootstrap; -import org.apache.spark.network.shuffle.protocol.*; - -import org.apache.spark.network.TransportContext; import org.apache.spark.network.crypto.AuthClientBootstrap; import org.apache.spark.network.sasl.SecretKeyHolder; import org.apache.spark.network.server.NoOpRpcHandler; +import org.apache.spark.network.shuffle.protocol.*; import org.apache.spark.network.util.TransportConf; /** @@ -43,6 +46,8 @@ * (via BlockTransferService), which has the downside of losing the data if we lose the executors. */ public class ExternalBlockStoreClient extends BlockStoreClient { + private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler(); + private final TransportConf conf; private final boolean authEnabled; private final SecretKeyHolder secretKeyHolder; @@ -90,12 +95,12 @@ public void fetchBlocks( try { int maxRetries = conf.maxIORetries(); RetryingBlockFetcher.BlockFetchStarter blockFetchStarter = - (blockIds1, listener1) -> { + (inputBlockId, inputListener) -> { // Unless this client is closed. if (clientFactory != null) { TransportClient client = clientFactory.createClient(host, port, maxRetries > 0); new OneForOneBlockFetcher(client, appId, execId, - blockIds1, listener1, conf, downloadFileManager).start(); + inputBlockId, inputListener, conf, downloadFileManager).start(); } else { logger.info("This clientFactory was closed. Skipping further block fetch retries."); } @@ -116,6 +121,43 @@ public void fetchBlocks( } } + @Override + public void pushBlocks( + String host, + int port, + String[] blockIds, + ManagedBuffer[] buffers, + BlockFetchingListener listener) { + checkInit(); + assert blockIds.length == buffers.length : "Number of block ids and buffers do not match."; + + Map buffersWithId = new HashMap<>(); + for (int i = 0; i < blockIds.length; i++) { + buffersWithId.put(blockIds[i], buffers[i]); + } + logger.debug("Push {} shuffle blocks to {}:{}", blockIds.length, host, port); + try { + RetryingBlockFetcher.BlockFetchStarter blockPushStarter = + (inputBlockId, inputListener) -> { + TransportClient client = clientFactory.createClient(host, port); + new OneForOneBlockPusher(client, appId, inputBlockId, inputListener, buffersWithId) + .start(); + }; + int maxRetries = conf.maxIORetries(); + if (maxRetries > 0) { + new RetryingBlockFetcher( + conf, blockPushStarter, blockIds, listener, PUSH_ERROR_HANDLER).start(); + } else { + blockPushStarter.createAndStart(blockIds, listener); + } + } catch (Exception e) { + logger.error("Exception while beginning pushBlocks", e); + for (String blockId : blockIds) { + listener.onBlockFetchFailure(blockId, e); + } + } + } + @Override public MetricSet shuffleMetrics() { checkInit(); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java new file mode 100644 index 0000000000000..e9d9e53495469 --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Preconditions; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import org.roaringbitmap.RoaringBitmap; + +import org.apache.spark.network.buffer.ManagedBuffer; +import org.apache.spark.network.protocol.Encoders; + +/** + * Contains meta information for a merged block. Currently this information constitutes: + * 1. Number of chunks in a merged shuffle block. + * 2. Bitmaps for each chunk in the merged block. A chunk bitmap contains all the mapIds that were + * merged to that merged block chunk. + */ +public class MergedBlockMeta { + private final int numChunks; + private final ManagedBuffer chunksBitmapBuffer; + + public MergedBlockMeta(int numChunks, ManagedBuffer chunksBitmapBuffer) { + this.numChunks = numChunks; + this.chunksBitmapBuffer = Preconditions.checkNotNull(chunksBitmapBuffer); + } + + public int getNumChunks() { + return numChunks; + } + + public ManagedBuffer getChunksBitmapBuffer() { + return chunksBitmapBuffer; + } + + public RoaringBitmap[] readChunkBitmaps() throws IOException { + ByteBuf buf = Unpooled.wrappedBuffer(chunksBitmapBuffer.nioByteBuffer()); + List bitmaps = new ArrayList<>(); + while(buf.isReadable()) { + bitmaps.add(Encoders.Bitmaps.decode(buf)); + } + assert (bitmaps.size() == numChunks); + return bitmaps.toArray(new RoaringBitmap[0]); + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java new file mode 100644 index 0000000000000..ef4dbb2bd0059 --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.io.IOException; + +import org.apache.spark.network.buffer.ManagedBuffer; +import org.apache.spark.network.client.StreamCallbackWithID; +import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge; +import org.apache.spark.network.shuffle.protocol.MergeStatuses; +import org.apache.spark.network.shuffle.protocol.PushBlockStream; + + +/** + * The MergedShuffleFileManager is used to process push based shuffle when enabled. It works + * along side {@link ExternalBlockHandler} and serves as an RPCHandler for + * {@link org.apache.spark.network.server.RpcHandler#receiveStream}, where it processes the + * remotely pushed streams of shuffle blocks to merge them into merged shuffle files. Right + * now, support for push based shuffle is only implemented for external shuffle service in + * YARN mode. + */ +public interface MergedShuffleFileManager { + /** + * Provides the stream callback used to process a remotely pushed block. The callback is + * used by the {@link org.apache.spark.network.client.StreamInterceptor} installed on the + * channel to process the block data in the channel outside of the message frame. + * + * @param msg metadata of the remotely pushed blocks. This is processed inside the message frame + * @return A stream callback to process the block data in streaming fashion as it arrives + */ + StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg); + + /** + * Handles the request to finalize shuffle merge for a given shuffle. + * + * @param msg contains appId and shuffleId to uniquely identify a shuffle to be finalized + * @return The statuses of the merged shuffle partitions for the given shuffle on this + * shuffle service + * @throws IOException + */ + MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException; + + /** + * Registers an application when it starts. It also stores the username which is necessary + * for generating the host local directories for merged shuffle files. + * Right now, this is invoked by YarnShuffleService. + * + * @param appId application ID + * @param user username + */ + void registerApplication(String appId, String user); + + /** + * Registers an executor with its local dir list when it starts. This provides the specific path + * so MergedShuffleFileManager knows where to store and look for shuffle data for a + * given application. It is invoked by the RPC call when executor tries to register with the + * local shuffle service. + * + * @param appId application ID + * @param localDirs The list of local dirs that this executor gets granted from NodeManager + */ + void registerExecutor(String appId, String[] localDirs); + + /** + * Invoked when an application finishes. This cleans up any remaining metadata associated with + * this application, and optionally deletes the application specific directory path. + * + * @param appId application ID + * @param cleanupLocalDirs flag indicating whether MergedShuffleFileManager should handle + * deletion of local dirs itself. + */ + void applicationRemoved(String appId, boolean cleanupLocalDirs); + + /** + * Get the buffer for a given merged shuffle chunk when serving merged shuffle to reducers + * + * @param appId application ID + * @param shuffleId shuffle ID + * @param reduceId reducer ID + * @param chunkId merged shuffle file chunk ID + * @return The {@link ManagedBuffer} for the given merged shuffle chunk + */ + ManagedBuffer getMergedBlockData(String appId, int shuffleId, int reduceId, int chunkId); + + /** + * Get the meta information of a merged block. + * + * @param appId application ID + * @param shuffleId shuffle ID + * @param reduceId reducer ID + * @return meta information of a merged block + */ + MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId); + + /** + * Get the local directories which stores the merged shuffle files. + * + * @param appId application ID + */ + String[] getMergedBlockDirs(String appId); +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java new file mode 100644 index 0000000000000..407b248170a46 --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.spark.network.buffer.ManagedBuffer; +import org.apache.spark.network.buffer.NioManagedBuffer; +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.shuffle.protocol.PushBlockStream; + +/** + * Similar to {@link OneForOneBlockFetcher}, but for pushing blocks to remote shuffle service to + * be merged instead of for fetching them from remote shuffle services. This is used by + * ShuffleWriter when the block push process is initiated. The supplied BlockFetchingListener + * is used to handle the success or failure in pushing each blocks. + */ +public class OneForOneBlockPusher { + private static final Logger logger = LoggerFactory.getLogger(OneForOneBlockPusher.class); + private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler(); + + private final TransportClient client; + private final String appId; + private final String[] blockIds; + private final BlockFetchingListener listener; + private final Map buffers; + + public OneForOneBlockPusher( + TransportClient client, + String appId, + String[] blockIds, + BlockFetchingListener listener, + Map buffers) { + this.client = client; + this.appId = appId; + this.blockIds = blockIds; + this.listener = listener; + this.buffers = buffers; + } + + private class BlockPushCallback implements RpcResponseCallback { + + private int index; + private String blockId; + + BlockPushCallback(int index, String blockId) { + this.index = index; + this.blockId = blockId; + } + + @Override + public void onSuccess(ByteBuffer response) { + // On receipt of a successful block push + listener.onBlockFetchSuccess(blockId, new NioManagedBuffer(ByteBuffer.allocate(0))); + } + + @Override + public void onFailure(Throwable e) { + // Since block push is best effort, i.e., if we encountered a block push failure that's not + // retriable or exceeding the max retires, we should not fail all remaining block pushes. + // The best effort nature makes block push tolerable of a partial completion. Thus, we only + // fail the block that's actually failed. Not that, on the RetryingBlockFetcher side, once + // retry is initiated, it would still invalidate the previous active retry listener, and + // retry all outstanding blocks. We are preventing forwarding unnecessary block push failures + // to the parent listener of the retry listener. The only exceptions would be if the block + // push failure is due to block arriving on the server side after merge finalization, or the + // client fails to establish connection to the server side. In both cases, we would fail all + // remaining blocks. + if (PUSH_ERROR_HANDLER.shouldRetryError(e)) { + String[] targetBlockId = Arrays.copyOfRange(blockIds, index, index + 1); + failRemainingBlocks(targetBlockId, e); + } else { + String[] targetBlockId = Arrays.copyOfRange(blockIds, index, blockIds.length); + failRemainingBlocks(targetBlockId, e); + } + } + } + + private void failRemainingBlocks(String[] failedBlockIds, Throwable e) { + for (String blockId : failedBlockIds) { + try { + listener.onBlockFetchFailure(blockId, e); + } catch (Exception e2) { + logger.error("Error in block push failure callback", e2); + } + } + } + + /** + * Begins the block pushing process, calling the listener with every block pushed. + */ + public void start() { + logger.debug("Start pushing {} blocks", blockIds.length); + for (int i = 0; i < blockIds.length; i++) { + assert buffers.containsKey(blockIds[i]) : "Could not find the block buffer for block " + + blockIds[i]; + ByteBuffer header = new PushBlockStream(appId, blockIds[i], i).toByteBuffer(); + client.uploadStream(new NioManagedBuffer(header), buffers.get(blockIds[i]), + new BlockPushCallback(i, blockIds[i])); + } + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java index 6bf3da94030d4..43bde1610e41e 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java @@ -99,11 +99,14 @@ void createAndStart(String[] blockIds, BlockFetchingListener listener) */ private RetryingBlockFetchListener currentListener; + private final ErrorHandler errorHandler; + public RetryingBlockFetcher( TransportConf conf, RetryingBlockFetcher.BlockFetchStarter fetchStarter, String[] blockIds, - BlockFetchingListener listener) { + BlockFetchingListener listener, + ErrorHandler errorHandler) { this.fetchStarter = fetchStarter; this.listener = listener; this.maxRetries = conf.maxIORetries(); @@ -111,6 +114,15 @@ public RetryingBlockFetcher( this.outstandingBlocksIds = Sets.newLinkedHashSet(); Collections.addAll(outstandingBlocksIds, blockIds); this.currentListener = new RetryingBlockFetchListener(); + this.errorHandler = errorHandler; + } + + public RetryingBlockFetcher( + TransportConf conf, + BlockFetchStarter fetchStarter, + String[] blockIds, + BlockFetchingListener listener) { + this(conf, fetchStarter, blockIds, listener, ErrorHandler.NOOP_ERROR_HANDLER); } /** @@ -178,7 +190,7 @@ private synchronized boolean shouldRetry(Throwable e) { boolean isIOException = e instanceof IOException || (e.getCause() != null && e.getCause() instanceof IOException); boolean hasRemainingRetries = retryCount < maxRetries; - return isIOException && hasRemainingRetries; + return isIOException && hasRemainingRetries && errorHandler.shouldRetryError(e); } /** @@ -215,8 +227,15 @@ public void onBlockFetchFailure(String blockId, Throwable exception) { if (shouldRetry(exception)) { initiateRetry(); } else { - logger.error(String.format("Failed to fetch block %s, and will not retry (%s retries)", - blockId, retryCount), exception); + if (errorHandler.shouldLogError(exception)) { + logger.error( + String.format("Failed to fetch block %s, and will not retry (%s retries)", + blockId, retryCount), exception); + } else { + logger.debug( + String.format("Failed to fetch block %s, and will not retry (%s retries)", + blockId, retryCount), exception); + } outstandingBlocksIds.remove(blockId); shouldForwardFailure = true; } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java index 89d8dfe8716b8..7f5058124988f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java @@ -47,7 +47,8 @@ public abstract class BlockTransferMessage implements Encodable { public enum Type { OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), REGISTER_DRIVER(4), HEARTBEAT(5), UPLOAD_BLOCK_STREAM(6), REMOVE_BLOCKS(7), BLOCKS_REMOVED(8), - FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11); + FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11), + PUSH_BLOCK_STREAM(12), FINALIZE_SHUFFLE_MERGE(13), MERGE_STATUSES(14); private final byte id; @@ -78,6 +79,9 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) { case 9: return FetchShuffleBlocks.decode(buf); case 10: return GetLocalDirsForExecutors.decode(buf); case 11: return LocalDirsForExecutors.decode(buf); + case 12: return PushBlockStream.decode(buf); + case 13: return FinalizeShuffleMerge.decode(buf); + case 14: return MergeStatuses.decode(buf); default: throw new IllegalArgumentException("Unknown message type: " + type); } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java new file mode 100644 index 0000000000000..9058575df57ef --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle.protocol; + +import com.google.common.base.Objects; +import io.netty.buffer.ByteBuf; + +import org.apache.spark.network.protocol.Encoders; + +/** + * Request to finalize merge for a given shuffle. + * Returns {@link MergeStatuses} + */ +public class FinalizeShuffleMerge extends BlockTransferMessage { + public final String appId; + public final int shuffleId; + + public FinalizeShuffleMerge( + String appId, + int shuffleId) { + this.appId = appId; + this.shuffleId = shuffleId; + } + + @Override + protected BlockTransferMessage.Type type() { + return Type.FINALIZE_SHUFFLE_MERGE; + } + + @Override + public int hashCode() { + return Objects.hashCode(appId, shuffleId); + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("appId", appId) + .add("shuffleId", shuffleId) + .toString(); + } + + @Override + public boolean equals(Object other) { + if (other != null && other instanceof FinalizeShuffleMerge) { + FinalizeShuffleMerge o = (FinalizeShuffleMerge) other; + return Objects.equal(appId, o.appId) + && shuffleId == o.shuffleId; + } + return false; + } + + @Override + public int encodedLength() { + return Encoders.Strings.encodedLength(appId) + 4; + } + + @Override + public void encode(ByteBuf buf) { + Encoders.Strings.encode(buf, appId); + buf.writeInt(shuffleId); + } + + public static FinalizeShuffleMerge decode(ByteBuf buf) { + String appId = Encoders.Strings.decode(buf); + int shuffleId = buf.readInt(); + return new FinalizeShuffleMerge(appId, shuffleId); + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java new file mode 100644 index 0000000000000..f57e8b326e5e2 --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle.protocol; + +import java.util.Arrays; + +import com.google.common.base.Objects; +import io.netty.buffer.ByteBuf; +import org.roaringbitmap.RoaringBitmap; + +import org.apache.spark.network.protocol.Encoders; + +/** + * Result returned by an ExternalShuffleService to the DAGScheduler. This represents the result + * of all the remote shuffle block merge operations performed by an ExternalShuffleService + * for a given shuffle ID. It includes the shuffle ID, an array of bitmaps each representing + * the set of mapper partition blocks that are merged for a given reducer partition, an array + * of reducer IDs, and an array of merged shuffle partition sizes. The 3 arrays list information + * about all the reducer partitions merged by the ExternalShuffleService in the same order. + */ +public class MergeStatuses extends BlockTransferMessage { + /** Shuffle ID **/ + public final int shuffleId; + /** + * Array of bitmaps tracking the set of mapper partition blocks merged for each + * reducer partition + */ + public final RoaringBitmap[] bitmaps; + /** Array of reducer IDs **/ + public final int[] reduceIds; + /** + * Array of merged shuffle partition block size. Each represents the total size of all + * merged shuffle partition blocks for one reducer partition. + * **/ + public final long[] sizes; + + public MergeStatuses( + int shuffleId, + RoaringBitmap[] bitmaps, + int[] reduceIds, + long[] sizes) { + this.shuffleId = shuffleId; + this.bitmaps = bitmaps; + this.reduceIds = reduceIds; + this.sizes = sizes; + } + + @Override + protected Type type() { + return Type.MERGE_STATUSES; + } + + @Override + public int hashCode() { + int objectHashCode = Objects.hashCode(shuffleId); + return (objectHashCode * 41 + Arrays.hashCode(reduceIds) * 41 + + Arrays.hashCode(bitmaps) * 41 + Arrays.hashCode(sizes)); + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("shuffleId", shuffleId) + .add("reduceId size", reduceIds.length) + .toString(); + } + + @Override + public boolean equals(Object other) { + if (other != null && other instanceof MergeStatuses) { + MergeStatuses o = (MergeStatuses) other; + return Objects.equal(shuffleId, o.shuffleId) + && Arrays.equals(bitmaps, o.bitmaps) + && Arrays.equals(reduceIds, o.reduceIds) + && Arrays.equals(sizes, o.sizes); + } + return false; + } + + @Override + public int encodedLength() { + return 4 // int + + Encoders.BitmapArrays.encodedLength(bitmaps) + + Encoders.IntArrays.encodedLength(reduceIds) + + Encoders.LongArrays.encodedLength(sizes); + } + + @Override + public void encode(ByteBuf buf) { + buf.writeInt(shuffleId); + Encoders.BitmapArrays.encode(buf, bitmaps); + Encoders.IntArrays.encode(buf, reduceIds); + Encoders.LongArrays.encode(buf, sizes); + } + + public static MergeStatuses decode(ByteBuf buf) { + int shuffleId = buf.readInt(); + RoaringBitmap[] bitmaps = Encoders.BitmapArrays.decode(buf); + int[] reduceIds = Encoders.IntArrays.decode(buf); + long[] sizes = Encoders.LongArrays.decode(buf); + return new MergeStatuses(shuffleId, bitmaps, reduceIds, sizes); + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java new file mode 100644 index 0000000000000..7eab5a644783c --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle.protocol; + +import com.google.common.base.Objects; +import io.netty.buffer.ByteBuf; + +import org.apache.spark.network.protocol.Encoders; + +// Needed by ScalaDoc. See SPARK-7726 +import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; + + +/** + * Request to push a block to a remote shuffle service to be merged in push based shuffle. + * The remote shuffle service will also include this message when responding the push requests. + */ +public class PushBlockStream extends BlockTransferMessage { + public final String appId; + public final String blockId; + // Similar to the chunkIndex in StreamChunkId, indicating the index of a block in a batch of + // blocks to be pushed. + public final int index; + + public PushBlockStream(String appId, String blockId, int index) { + this.appId = appId; + this.blockId = blockId; + this.index = index; + } + + @Override + protected Type type() { + return Type.PUSH_BLOCK_STREAM; + } + + @Override + public int hashCode() { + return Objects.hashCode(appId, blockId, index); + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("appId", appId) + .add("blockId", blockId) + .add("index", index) + .toString(); + } + + @Override + public boolean equals(Object other) { + if (other != null && other instanceof PushBlockStream) { + PushBlockStream o = (PushBlockStream) other; + return Objects.equal(appId, o.appId) + && Objects.equal(blockId, o.blockId) + && index == o.index; + } + return false; + } + + @Override + public int encodedLength() { + return Encoders.Strings.encodedLength(appId) + + Encoders.Strings.encodedLength(blockId) + 4; + } + + @Override + public void encode(ByteBuf buf) { + Encoders.Strings.encode(buf, appId); + Encoders.Strings.encode(buf, blockId); + buf.writeInt(index); + } + + public static PushBlockStream decode(ByteBuf buf) { + String appId = Encoders.Strings.decode(buf); + String blockId = Encoders.Strings.decode(buf); + int index = buf.readInt(); + return new PushBlockStream(appId, blockId, index); + } +} diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java new file mode 100644 index 0000000000000..992e7762c5a54 --- /dev/null +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.net.ConnectException; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Test suite for {@link ErrorHandler} + */ +public class ErrorHandlerSuite { + + @Test + public void testPushErrorRetry() { + ErrorHandler.BlockPushErrorHandler handler = new ErrorHandler.BlockPushErrorHandler(); + assertFalse(handler.shouldRetryError(new RuntimeException(new IllegalArgumentException( + ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)))); + assertFalse(handler.shouldRetryError(new RuntimeException(new ConnectException()))); + assertTrue(handler.shouldRetryError(new RuntimeException(new IllegalArgumentException( + ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)))); + assertTrue(handler.shouldRetryError(new Throwable())); + } + + @Test + public void testPushErrorLogging() { + ErrorHandler.BlockPushErrorHandler handler = new ErrorHandler.BlockPushErrorHandler(); + assertFalse(handler.shouldLogError(new RuntimeException(new IllegalArgumentException( + ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)))); + assertFalse(handler.shouldLogError(new RuntimeException(new IllegalArgumentException( + ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)))); + assertTrue(handler.shouldLogError(new Throwable())); + } +} diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java index 455351fcf767c..680b8d74a2eea 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.network.shuffle; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Iterator; @@ -25,6 +26,7 @@ import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; +import org.roaringbitmap.RoaringBitmap; import static org.junit.Assert.*; import static org.mockito.ArgumentMatchers.any; @@ -39,6 +41,8 @@ import org.apache.spark.network.shuffle.protocol.BlockTransferMessage; import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo; import org.apache.spark.network.shuffle.protocol.FetchShuffleBlocks; +import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge; +import org.apache.spark.network.shuffle.protocol.MergeStatuses; import org.apache.spark.network.shuffle.protocol.OpenBlocks; import org.apache.spark.network.shuffle.protocol.RegisterExecutor; import org.apache.spark.network.shuffle.protocol.StreamHandle; @@ -50,6 +54,7 @@ public class ExternalBlockHandlerSuite { OneForOneStreamManager streamManager; ExternalShuffleBlockResolver blockResolver; RpcHandler handler; + MergedShuffleFileManager mergedShuffleManager; ManagedBuffer[] blockMarkers = { new NioManagedBuffer(ByteBuffer.wrap(new byte[3])), new NioManagedBuffer(ByteBuffer.wrap(new byte[7])) @@ -59,17 +64,20 @@ public class ExternalBlockHandlerSuite { public void beforeEach() { streamManager = mock(OneForOneStreamManager.class); blockResolver = mock(ExternalShuffleBlockResolver.class); - handler = new ExternalBlockHandler(streamManager, blockResolver); + mergedShuffleManager = mock(MergedShuffleFileManager.class); + handler = new ExternalBlockHandler(streamManager, blockResolver, mergedShuffleManager); } @Test public void testRegisterExecutor() { RpcResponseCallback callback = mock(RpcResponseCallback.class); - ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort"); + String[] localDirs = new String[] {"/a", "/b"}; + ExecutorShuffleInfo config = new ExecutorShuffleInfo(localDirs, 16, "sort"); ByteBuffer registerMessage = new RegisterExecutor("app0", "exec1", config).toByteBuffer(); handler.receive(client, registerMessage, callback); verify(blockResolver, times(1)).registerExecutor("app0", "exec1", config); + verify(mergedShuffleManager, times(1)).registerExecutor("app0", localDirs); verify(callback, times(1)).onSuccess(any(ByteBuffer.class)); verify(callback, never()).onFailure(any(Throwable.class)); @@ -222,4 +230,32 @@ public void testBadMessages() { verify(callback, never()).onSuccess(any(ByteBuffer.class)); verify(callback, never()).onFailure(any(Throwable.class)); } + + @Test + public void testFinalizeShuffleMerge() throws IOException { + RpcResponseCallback callback = mock(RpcResponseCallback.class); + + FinalizeShuffleMerge req = new FinalizeShuffleMerge("app0", 0); + RoaringBitmap bitmap = RoaringBitmap.bitmapOf(0, 1, 2); + MergeStatuses statuses = new MergeStatuses(0, new RoaringBitmap[]{bitmap}, + new int[]{3}, new long[]{30}); + when(mergedShuffleManager.finalizeShuffleMerge(req)).thenReturn(statuses); + + ByteBuffer reqBuf = req.toByteBuffer(); + handler.receive(client, reqBuf, callback); + verify(mergedShuffleManager, times(1)).finalizeShuffleMerge(req); + ArgumentCaptor response = ArgumentCaptor.forClass(ByteBuffer.class); + verify(callback, times(1)).onSuccess(response.capture()); + verify(callback, never()).onFailure(any()); + + MergeStatuses mergeStatuses = + (MergeStatuses) BlockTransferMessage.Decoder.fromByteBuffer(response.getValue()); + assertEquals(mergeStatuses, statuses); + + Timer finalizeShuffleMergeLatencyMillis = (Timer) ((ExternalBlockHandler) handler) + .getAllMetrics() + .getMetrics() + .get("finalizeShuffleMergeLatencyMillis"); + assertEquals(1, finalizeShuffleMergeLatencyMillis.getCount()); + } } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java new file mode 100644 index 0000000000000..ebcdba72aa1a8 --- /dev/null +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; + +import com.google.common.collect.Maps; +import io.netty.buffer.Unpooled; +import org.junit.Test; + +import static org.junit.Assert.*; +import static org.mockito.AdditionalMatchers.*; +import static org.mockito.Mockito.*; + +import org.apache.spark.network.buffer.ManagedBuffer; +import org.apache.spark.network.buffer.NettyManagedBuffer; +import org.apache.spark.network.buffer.NioManagedBuffer; +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.shuffle.protocol.BlockTransferMessage; +import org.apache.spark.network.shuffle.protocol.PushBlockStream; + + +public class OneForOneBlockPusherSuite { + + @Test + public void testPushOne() { + LinkedHashMap blocks = Maps.newLinkedHashMap(); + blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1]))); + String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); + + BlockFetchingListener listener = pushBlocks( + blocks, + blockIds, + Arrays.asList(new PushBlockStream("app-id", "shuffle_0_0_0", 0))); + + verify(listener).onBlockFetchSuccess(eq("shuffle_0_0_0"), any()); + } + + @Test + public void testPushThree() { + LinkedHashMap blocks = Maps.newLinkedHashMap(); + blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); + blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23]))); + blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23]))); + String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); + + BlockFetchingListener listener = pushBlocks( + blocks, + blockIds, + Arrays.asList(new PushBlockStream("app-id", "b0", 0), + new PushBlockStream("app-id", "b1", 1), + new PushBlockStream("app-id", "b2", 2))); + + for (int i = 0; i < 3; i ++) { + verify(listener, times(1)).onBlockFetchSuccess(eq("b" + i), any()); + } + } + + @Test + public void testServerFailures() { + LinkedHashMap blocks = Maps.newLinkedHashMap(); + blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); + blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); + blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); + String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); + + BlockFetchingListener listener = pushBlocks( + blocks, + blockIds, + Arrays.asList(new PushBlockStream("app-id", "b0", 0), + new PushBlockStream("app-id", "b1", 1), + new PushBlockStream("app-id", "b2", 2))); + + verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any()); + verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any()); + verify(listener, times(1)).onBlockFetchFailure(eq("b2"), any()); + } + + @Test + public void testHandlingRetriableFailures() { + LinkedHashMap blocks = Maps.newLinkedHashMap(); + blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); + blocks.put("b1", null); + blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); + String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); + + BlockFetchingListener listener = pushBlocks( + blocks, + blockIds, + Arrays.asList(new PushBlockStream("app-id", "b0", 0), + new PushBlockStream("app-id", "b1", 1), + new PushBlockStream("app-id", "b2", 2))); + + verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any()); + verify(listener, times(0)).onBlockFetchSuccess(not(eq("b0")), any()); + verify(listener, times(0)).onBlockFetchFailure(eq("b0"), any()); + verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any()); + verify(listener, times(2)).onBlockFetchFailure(eq("b2"), any()); + } + + /** + * Begins a push on the given set of blocks by mocking the response from server side. + * If a block is an empty byte, a server side retriable exception will be thrown. + * If a block is null, a non-retriable exception will be thrown. + */ + private static BlockFetchingListener pushBlocks( + LinkedHashMap blocks, + String[] blockIds, + Iterable expectMessages) { + TransportClient client = mock(TransportClient.class); + BlockFetchingListener listener = mock(BlockFetchingListener.class); + OneForOneBlockPusher pusher = + new OneForOneBlockPusher(client, "app-id", blockIds, listener, blocks); + + Iterator> blockIterator = blocks.entrySet().iterator(); + Iterator msgIterator = expectMessages.iterator(); + doAnswer(invocation -> { + ByteBuffer header = ((ManagedBuffer) invocation.getArguments()[0]).nioByteBuffer(); + BlockTransferMessage message = BlockTransferMessage.Decoder.fromByteBuffer(header); + RpcResponseCallback callback = (RpcResponseCallback) invocation.getArguments()[2]; + Map.Entry entry = blockIterator.next(); + ManagedBuffer block = entry.getValue(); + if (block != null && block.nioByteBuffer().capacity() > 0) { + callback.onSuccess(header); + } else if (block != null) { + callback.onFailure(new RuntimeException("Failed " + entry.getKey() + + ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)); + } else { + callback.onFailure(new RuntimeException("Quick fail " + entry.getKey() + + ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)); + } + assertEquals(msgIterator.next(), message); + return null; + }).when(client).uploadStream(any(ManagedBuffer.class), any(), any(RpcResponseCallback.class)); + + pusher.start(); + return listener; + } +} diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala index d681c13337e0d..ea4d252f0dbae 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala @@ -61,7 +61,8 @@ class ExternalShuffleServiceMetricsSuite extends SparkFunSuite { "registeredExecutorsSize", "registerExecutorRequestLatencyMillis", "shuffle-server.usedDirectMemory", - "shuffle-server.usedHeapMemory") + "shuffle-server.usedHeapMemory", + "finalizeShuffleMergeLatencyMillis") ) } } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala index 63ac1af8a9127..9239d891aae3b 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala @@ -40,7 +40,7 @@ class YarnShuffleServiceMetricsSuite extends SparkFunSuite with Matchers { val allMetrics = Set( "openBlockRequestLatencyMillis", "registerExecutorRequestLatencyMillis", "blockTransferRateBytes", "registeredExecutorsSize", "numActiveConnections", - "numCaughtExceptions") + "numCaughtExceptions", "finalizeShuffleMergeLatencyMillis") metrics.getMetrics.keySet().asScala should be (allMetrics) } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala index 46e596575533d..a6a302ad5df95 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala @@ -405,6 +405,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd "openBlockRequestLatencyMillis", "registeredExecutorsSize", "registerExecutorRequestLatencyMillis", + "finalizeShuffleMergeLatencyMillis", "shuffle-server.usedDirectMemory", "shuffle-server.usedHeapMemory" )) From 9e3746469c23fd88f6dacc5082a157ca6970414e Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 15 Oct 2020 12:38:10 -0700 Subject: [PATCH 0246/1009] [SPARK-33078][SQL] Add config for json expression optimization ### What changes were proposed in this pull request? This proposes to add a config for json expression optimization. ### Why are the changes needed? For the new Json expression optimization rules, it is safer if we can disable it using SQL config. ### Does this PR introduce _any_ user-facing change? Yes, users can disable json expression optimization rule. ### How was this patch tested? Unit test Closes #30047 from viirya/SPARK-33078. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../optimizer/OptimizeJsonExprs.scala | 3 ++- .../apache/spark/sql/internal/SQLConf.scala | 11 ++++++++++ .../optimizer/OptimizeJsonExprsSuite.scala | 21 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala index fcd5412d66d41..ce86d8cdd4999 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, StructType} /** @@ -35,7 +36,7 @@ import org.apache.spark.sql.types.{ArrayType, StructType} */ object OptimizeJsonExprs extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case p => p.transformExpressions { + case p if SQLConf.get.jsonExpressionOptimization => p.transformExpressions { case c: CreateNamedStruct // If we create struct from various fields of the same `JsonToStructs`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d4c7dd7f3160c..79d78088f51a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1458,6 +1458,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val JSON_EXPRESSION_OPTIMIZATION = + buildConf("spark.sql.optimizer.enableJsonExpressionOptimization") + .doc("Whether to optimize JSON expressions in SQL optimizer. It includes pruning " + + "unnecessary columns from from_json, simplifing from_json + to_json, to_json + " + + "named_struct(from_json.col1, from_json.col2, ....).") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion") .internal() .doc("Whether to delete the expired log files in file stream sink.") @@ -3232,6 +3241,8 @@ class SQLConf extends Serializable with Logging { def jsonGeneratorIgnoreNullFields: Boolean = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS) + def jsonExpressionOptimization: Boolean = getConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION) + def parallelFileListingInStatsComputation: Boolean = getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala index 7d975a1b00466..4129a37eb69a2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala @@ -29,6 +29,15 @@ import org.apache.spark.sql.types._ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { + private var jsonExpressionOptimizeEnabled: Boolean = _ + protected override def beforeAll(): Unit = { + jsonExpressionOptimizeEnabled = SQLConf.get.jsonExpressionOptimization + } + + protected override def afterAll(): Unit = { + SQLConf.get.setConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION, jsonExpressionOptimizeEnabled) + } + object Optimizer extends RuleExecutor[LogicalPlan] { val batches = Batch("Json optimization", FixedPoint(10), OptimizeJsonExprs) :: Nil } @@ -266,4 +275,16 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { checkEvaluation(e1, e2.eval(row), row) }) } + + test("SPARK-33078: disable json optimization") { + withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { + val options = Map.empty[String, String] + + val query = testRelation + .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")) + val optimized = Optimizer.execute(query.analyze) + + comparePlans(optimized, query.analyze) + } + } } From ba69d68d91eed2773c56a1cd82043aba42cecea3 Mon Sep 17 00:00:00 2001 From: Denis Pyshev Date: Thu, 15 Oct 2020 14:49:43 -0500 Subject: [PATCH 0247/1009] [SPARK-33080][BUILD] Replace fatal warnings snippet ### What changes were proposed in this pull request? Current solution in build file to enable build failure on compilation warnings with exclusion of deprecation ones is not portable after SBT version 1.3.13 (build import fails with compilation error with SBT 1.4) and could be replaced with more robust and maintainable, especially since Scala 2.13.2 with similar built-in functionality. Additionally, warnings were fixed to pass the build, with as few changes as possible: warnings in 2.12 compilation fixed in code, warnings in 2.13 compilation covered by configuration to be addressed separately ### Why are the changes needed? Unblocks upgrade to SBT after 1.3.13. Enhances build file maintainability. Allows fine tune of warnings configuration in scope of Scala 2.13 compilation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? `build/sbt`'s `compile` and `Test/compile` for both Scala 2.12 and 2.13 profiles. Closes #29995 from gemelen/feature/warnings-reporter. Authored-by: Denis Pyshev Signed-off-by: Sean Owen --- .../HostLocalShuffleReadingSuite.scala | 1 + .../spark/storage/BlockManagerSuite.scala | 4 +- project/SparkBuild.scala | 84 ++++++++++--------- .../catalyst/optimizer/OptimizerSuite.scala | 2 +- .../sql/catalyst/util/UnsafeArraySuite.scala | 3 +- .../spark/sql/connector/InMemoryTable.scala | 8 ++ .../sql/streaming/StreamingQuerySuite.scala | 2 +- .../sql/hive/thriftserver/CliSuite.scala | 6 +- 8 files changed, 62 insertions(+), 48 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala index 12c40f4462c7c..8f0c4da88feb2 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala @@ -58,6 +58,7 @@ class HostLocalShuffleReadingSuite extends SparkFunSuite with Matchers with Loca val conf = new SparkConf() .set(SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true) + import scala.language.existentials val (essStatus, blockStoreClientClass) = if (isESSEnabled) { // LocalSparkCluster will disable the ExternalShuffleService by default. Therefore, // we have to manually setup an server which embedded with ExternalBlockHandler to diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 861c16269583a..5450a4b67c00b 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -240,7 +240,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE val driverEndpoint = rpcEnv.setupEndpoint(CoarseGrainedSchedulerBackend.ENDPOINT_NAME, new RpcEndpoint { private val executorSet = mutable.HashSet[String]() - override val rpcEnv: RpcEnv = this.rpcEnv + override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case CoarseGrainedClusterMessages.RegisterExecutor(executorId, _, _, _, _, _, _, _) => executorSet += executorId @@ -254,7 +254,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE def createAndRegisterBlockManager(timeout: Boolean): BlockManagerId = { val id = if (timeout) "timeout" else "normal" val bmRef = rpcEnv.setupEndpoint(s"bm-$id", new RpcEndpoint { - override val rpcEnv: RpcEnv = this.rpcEnv + override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv private def reply[T](context: RpcCallContext, response: T): Unit = { if (timeout) { Thread.sleep(conf.getTimeAsMs(Network.RPC_ASK_TIMEOUT.key) + 1000) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f20a84451c5c5..5f2ef480f8de5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -28,6 +28,7 @@ import scala.collection.mutable.Stack import sbt._ import sbt.Classpaths.publishTask import sbt.Keys._ +import sbt.librarymanagement.{ VersionNumber, SemanticSelector } import com.etsy.sbt.checkstyle.CheckstylePlugin.autoImport._ import com.simplytyped.Antlr4Plugin._ import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} @@ -196,7 +197,52 @@ object SparkBuild extends PomBuild { } ) + // Silencer: Scala compiler plugin for warning suppression + // Aim: enable fatal warnings, but supress ones related to using of deprecated APIs + // depends on scala version: + // <2.13 - silencer 1.6.0 and compiler settings to enable fatal warnings + // 2.13.0,2.13.1 - silencer 1.7.1 and compiler settings to enable fatal warnings + // 2.13.2+ - no silencer and configured warnings to achieve the same + lazy val compilerWarningSettings: Seq[sbt.Def.Setting[_]] = Seq( + libraryDependencies ++= { + if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) { + val silencerVersion = if (scalaBinaryVersion.value == "2.13") "1.7.1" else "1.6.0" + Seq( + "org.scala-lang.modules" %% "scala-collection-compat" % "2.2.0", + compilerPlugin("com.github.ghik" % "silencer-plugin" % silencerVersion cross CrossVersion.full), + "com.github.ghik" % "silencer-lib" % silencerVersion % Provided cross CrossVersion.full + ) + } else { + Seq.empty + } + }, + scalacOptions in Compile ++= { + if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) { + Seq( + "-Xfatal-warnings", + "-deprecation", + "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them + ) + } else { + Seq( + // replace -Xfatal-warnings with fine-grained configuration, since 2.13.2 + // verbose warning on deprecation, error on all others + // see `scalac -Wconf:help` for details + "-Wconf:cat=deprecation:wv,any:e", + // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately + "-Wconf:cat=lint-multiarg-infix:wv", + "-Wconf:cat=other-nullary-override:wv", + "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv", + "-Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv", + "-Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite..futureAction:wv", + "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv" + ) + } + } + ) + lazy val sharedSettings = sparkGenjavadocSettings ++ + compilerWarningSettings ++ (if (sys.env.contains("NOLINT_ON_COMPILE")) Nil else enableScalaStyle) ++ Seq( exportJars in Compile := true, exportJars in Test := false, @@ -274,44 +320,6 @@ object SparkBuild extends PomBuild { if (scalaBinaryVersion.value == "2.12") Seq("-no-java-comments") else Seq.empty }, - // Implements -Xfatal-warnings, ignoring deprecation warnings. - // Code snippet taken from https://issues.scala-lang.org/browse/SI-8410. - compile in Compile := { - val analysis = (compile in Compile).value - val out = streams.value - - def logProblem(l: (=> String) => Unit, f: File, p: xsbti.Problem) = { - val jmap = new java.util.function.Function[Integer, String]() {override def apply(i: Integer): String = {i.toString}} - l(f.toString + ":" + p.position.line.map[String](jmap.apply).map(_ + ":").orElse("") + " " + p.message) - l(p.position.lineContent) - l("") - } - - var failed = 0 - analysis.asInstanceOf[sbt.internal.inc.Analysis].infos.allInfos.foreach { case (k, i) => - i.getReportedProblems foreach { p => - val deprecation = p.message.contains("deprecated") - - if (!deprecation) { - failed = failed + 1 - } - - val printer: (=> String) => Unit = s => if (deprecation) { - out.log.warn(s) - } else { - out.log.error("[warn] " + s) - } - - logProblem(printer, k, p) - - } - } - - if (failed > 0) { - sys.error(s"$failed fatal warnings") - } - analysis - }, // disable Mima check for all modules, // to be enabled in specific ones that have previous artifacts MimaKeys.mimaFailOnNoPrevious := false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala index b48555ec2fb28..48c62fe2990e9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala @@ -58,7 +58,7 @@ class OptimizerSuite extends PlanTest { try { optimizer.execute(analyzed) } catch { - case ex: TreeNodeException[LogicalPlan] + case ex: TreeNodeException[_] if ex.getMessage.contains(SQLConf.OPTIMIZER_MAX_ITERATIONS.key) => fail("optimizer.execute should not reach max iterations.") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala index 6d8ef68473778..2e190c6ba6d4b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala @@ -72,9 +72,8 @@ class UnsafeArraySuite extends SparkFunSuite { arrayData } - private def toUnsafeArray[T : TypeTag](array: Array[T]): ArrayData = { + private def toUnsafeArray[T: TypeTag](array: Array[T]): ArrayData = { val converted = ExpressionEncoder[Array[T]].createSerializer().apply(array).getArray(0) - assert(converted.isInstanceOf[T]) assert(converted.numElements == array.length) converted } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index 6a78b9e2bddd0..b0325600e7530 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -107,6 +107,8 @@ class InMemoryTable( case (micros: Long, TimestampType) => val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, localDate) + case (v, t) => + throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case MonthsTransform(ref) => extractor(ref.fieldNames, schema, row) match { @@ -115,6 +117,8 @@ class InMemoryTable( case (micros: Long, TimestampType) => val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, localDate) + case (v, t) => + throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case DaysTransform(ref) => extractor(ref.fieldNames, schema, row) match { @@ -122,11 +126,15 @@ class InMemoryTable( days case (micros: Long, TimestampType) => ChronoUnit.DAYS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros)) + case (v, t) => + throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case HoursTransform(ref) => extractor(ref.fieldNames, schema, row) match { case (micros: Long, TimestampType) => ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros)) + case (v, t) => + throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case BucketTransform(numBuckets, ref) => (extractor(ref.fieldNames, schema, row).hashCode() & Integer.MAX_VALUE) % numBuckets diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index 1f408d55fd811..9c2403dffbb1a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -705,7 +705,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi val q2 = startQuery(input(1).toDS.map { i => // Emulate that `StreamingQuery` get captured with normal usage unintentionally. // It should not fail the query. - q1 + val q = q1 i }, "stream_serializable_test_2") val q3 = startQuery(input(2).toDS.map { i => diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 2064a99137bf9..f5ce21f2af335 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -98,10 +98,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { Seq(answer) } else { // spark-sql echoes the submitted queries - val queryEcho = query.split("\n").toList match { - case firstLine :: tail => - s"spark-sql> $firstLine" :: tail.map(l => s" > $l") - } + val xs = query.split("\n").toList + val queryEcho = s"spark-sql> ${xs.head}" :: xs.tail.map(l => s" > $l") // longer lines sometimes get split in the output, // match the first 60 characters of each query line queryEcho.map(_.take(60)) :+ answer From 81d3a8eeca80e6cef0415c5fd1a8c5b8852962a3 Mon Sep 17 00:00:00 2001 From: Chuliang Xiao Date: Thu, 15 Oct 2020 17:24:22 -0700 Subject: [PATCH 0248/1009] [MINOR][PYTHON] Fix the typo in the docstring of method agg() ### What changes were proposed in this pull request? Change `df.groupBy.agg()` to `df.groupBy().agg()` in the docstring of `agg()` ### Why are the changes needed? Fix typo in a docstring ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No Closes #30060 from ChuliangXiao/patch-1. Authored-by: Chuliang Xiao Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 94a7df33f335e..487135cd2329a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1514,7 +1514,7 @@ def cube(self, *cols): @since(1.3) def agg(self, *exprs): """ Aggregate on the entire :class:`DataFrame` without groups - (shorthand for ``df.groupBy.agg()``). + (shorthand for ``df.groupBy().agg()``). >>> df.agg({"age": "max"}).collect() [Row(max(age)=5)] From 9f5eff0ae1fbf526bbb5ae7a6582325279aaa3cd Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 15 Oct 2020 17:58:58 -0700 Subject: [PATCH 0249/1009] [SPARK-33162][INFRA] Use pre-built image at GitHub Action PySpark jobs ### What changes were proposed in this pull request? This PR aims to use `pre-built image` at Github Action PySpark jobs. To isolate the changes, `pyspark` jobs are split from the main job. The docker image is built by the following. | Item | URL | | --------------- | ------------- | | Dockerfile | https://github.com/dongjoon-hyun/ApacheSparkGitHubActionImage/blob/main/Dockerfile | | Builder | https://github.com/dongjoon-hyun/ApacheSparkGitHubActionImage/blob/main/.github/workflows/build.yml | | Image Location | https://hub.docker.com/r/dongjoon/apache-spark-github-action-image | Please note that. 1. The community still will use `build_and_test.yml` to add new features like as we did until now. The `Dockerfile` will be updated regularly. 2. When Apache Spark gets an official docker repository location, we will use it. 3. Also, it's the best if we keep this docker file and builder script at a new Apache Spark dev branch instead of outside GitHub repository. ### Why are the changes needed? Currently, two `pyspark` test jobs take over one and half hour always. In total, 3 hours 14 minutes. - https://github.com/apache/spark/runs/1240470628 (1 hour 35 mins) - https://github.com/apache/spark/runs/1240470634 (1 hour 39 mins) This PR will remove the package installation steps which takes 16 minutes and causes flakiness. Note that `Python 3.6 package installation` is not included in the pre-built image and it only takes `20s`. **BEFORE** ![Screen Shot 2020-10-15 at 10 32 17 AM](https://user-images.githubusercontent.com/9700541/96165634-be625080-0ed1-11eb-974b-940c112152e9.png) **AFTER** ![Screen Shot 2020-10-15 at 10 58 17 AM](https://user-images.githubusercontent.com/9700541/96168262-5d3c7c00-0ed5-11eb-83c5-e9dc189a156b.png) In short, `pyspark` GitHub jobs take shorter time. In total, 2 hours 23 minutes (<- 3 hours 14 minutes, previously). - https://github.com/apache/spark/pull/30059/checks?check_run_id=1260512568 (1 hour 18 mins) - https://github.com/apache/spark/pull/30059/checks?check_run_id=1260512582 (1 hour 5 mins) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the GitHub Action on this PR without `package installation steps`. Closes #30059 from dongjoon-hyun/SPARK-33162. Lead-authored-by: Dongjoon Hyun Co-authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 119 ++++++++++++++++++++------- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9b7026eeca4c8..cdbe34129637e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -42,10 +42,6 @@ jobs: streaming, sql-kafka-0-10, streaming-kafka-0-10, mllib-local, mllib, yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - >- - pyspark-sql, pyspark-mllib, pyspark-resource - - >- - pyspark-core, pyspark-streaming, pyspark-ml - >- sparkr # Here, we split Hive and SQL tests into some of slow ones and the rest of them. @@ -128,41 +124,17 @@ jobs: uses: actions/setup-java@v1 with: java-version: ${{ matrix.java }} - # PySpark - - name: Install PyPy3 - # Note that order of Python installations here matters because default python3 is - # overridden by pypy3. - uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') - with: - python-version: pypy3 - architecture: x64 - - name: Install Python 3.6 - uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') - with: - python-version: 3.6 - architecture: x64 - name: Install Python 3.8 uses: actions/setup-python@v2 # We should install one Python that is higher then 3+ for SQL and Yarn because: # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. # - Yarn has a Python specific test too, for example, YarnClusterSuite. - if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) with: python-version: 3.8 architecture: x64 - - name: Install Python packages (Python 3.6 and PyPy3) - if: contains(matrix.modules, 'pyspark') - # PyArrow is not supported in PyPy yet, see ARROW-2651. - run: | - python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner - python3.6 -m pip list - # PyPy does not have xmlrunner - pypy3 -m pip install numpy pandas scipy - pypy3 -m pip list - name: Install Python packages (Python 3.8) - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner python3.8 -m pip list @@ -201,6 +173,93 @@ jobs: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" + pyspark: + name: "Build modules: ${{ matrix.modules }}" + runs-on: ubuntu-20.04 + container: + image: dongjoon/apache-spark-github-action-image:20201015 + strategy: + fail-fast: false + matrix: + modules: + - >- + pyspark-sql, pyspark-mllib, pyspark-resource + - >- + pyspark-core, pyspark-streaming, pyspark-ml + env: + MODULES_TO_TEST: ${{ matrix.modules }} + HADOOP_PROFILE: hadoop3.2 + HIVE_PROFILE: hive2.3 + # GitHub Actions' default miniconda to use in pip packaging test. + CONDA_PREFIX: /usr/share/miniconda + GITHUB_PREV_SHA: ${{ github.event.before }} + GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + # In order to fetch changed files + with: + fetch-depth: 0 + - name: Merge dispatched input branch + if: ${{ github.event.inputs.target != '' }} + run: git merge --progress --ff-only origin/${{ github.event.inputs.target }} + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. + - name: Cache Scala, SBT, Maven and Zinc + uses: actions/cache@v2 + with: + path: | + build/apache-maven-* + build/zinc-* + build/scala-* + build/*.jar + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: pyspark-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + pyspark-maven- + - name: Cache Ivy local repository + uses: actions/cache@v2 + with: + path: ~/.ivy2/cache + key: pyspark-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + pyspark-ivy- + - name: Install Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + architecture: x64 + # This step takes much less time (~30s) than other Python versions so it is not included + # in the Docker image being used. There is also a technical issue to install Python 3.6 on + # Ubuntu 20.04. See also SPARK-33162. + - name: Install Python packages (Python 3.6) + run: | + python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner + python3.6 -m pip list + # Run the tests. + - name: Run tests + run: | + mkdir -p ~/.m2 + ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" + rm -rf ~/.m2/repository/org/apache/spark + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3 + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: failure() + uses: actions/upload-artifact@v2 + with: + name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3 + path: "**/target/unit-tests.log" + # Static analysis, and documentation build lint: name: Linters, licenses, dependencies and documentation generation From 38c05af1d5538fc6ad00cdb57c1a90e90d04e25d Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 16 Oct 2020 10:28:15 +0900 Subject: [PATCH 0250/1009] [SPARK-33163][SQL][TESTS] Check the metadata key 'org.apache.spark.legacyDateTime' in Avro/Parquet files ### What changes were proposed in this pull request? Added a couple tests to `AvroSuite` and to `ParquetIOSuite` to check that the metadata key 'org.apache.spark.legacyDateTime' is written correctly depending on the SQL configs: - spark.sql.legacy.avro.datetimeRebaseModeInWrite - spark.sql.legacy.parquet.datetimeRebaseModeInWrite This is a follow up https://github.com/apache/spark/pull/28137. ### Why are the changes needed? 1. To improve test coverage 2. To make sure that the metadata key is actually saved to Avro/Parquet files ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the added tests: ``` $ build/sbt "testOnly org.apache.spark.sql.execution.datasources.parquet.ParquetIOSuite" $ build/sbt "avro/test:testOnly org.apache.spark.sql.avro.AvroV1Suite" $ build/sbt "avro/test:testOnly org.apache.spark.sql.avro.AvroV2Suite" ``` Closes #30061 from MaxGekk/parquet-test-metakey. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/avro/AvroSuite.scala | 40 ++++++++++++--- .../datasources/parquet/ParquetIOSuite.scala | 51 ++++++++++++++----- 2 files changed, 73 insertions(+), 18 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 1005a274d0304..b0f2f8ed09a96 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1791,15 +1791,19 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa } } + private def checkMetaData(path: java.io.File, key: String, expectedValue: String): Unit = { + val avroFiles = path.listFiles() + .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_")) + assert(avroFiles.length === 1) + val reader = DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]()) + val value = reader.asInstanceOf[DataFileReader[_]].getMetaString(key) + assert(value === expectedValue) + } + test("SPARK-31327: Write Spark version into Avro file metadata") { withTempPath { path => spark.range(1).repartition(1).write.format("avro").save(path.getCanonicalPath) - val avroFiles = path.listFiles() - .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_")) - assert(avroFiles.length === 1) - val reader = DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]()) - val version = reader.asInstanceOf[DataFileReader[_]].getMetaString(SPARK_VERSION_METADATA_KEY) - assert(version === SPARK_VERSION_SHORT) + checkMetaData(path, SPARK_VERSION_METADATA_KEY, SPARK_VERSION_SHORT) } } @@ -1812,6 +1816,30 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa spark.read.format("avro").options(conf).load(path) } } + + test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") { + def saveTs(dir: java.io.File): Unit = { + Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF() + .repartition(1) + .write + .format("avro") + .save(dir.getAbsolutePath) + } + withSQLConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { + withTempPath { dir => + saveTs(dir) + checkMetaData(dir, SPARK_LEGACY_DATETIME, "") + } + } + Seq(CORRECTED, EXCEPTION).foreach { mode => + withSQLConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> mode.toString) { + withTempPath { dir => + saveTs(dir) + checkMetaData(dir, SPARK_LEGACY_DATETIME, null) + } + } + } + } } class AvroV1Suite extends AvroSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 2dc8a062bb73d..ff406f7bc62de 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -859,20 +859,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } - test("Write Spark version into Parquet metadata") { - withTempPath { dir => - val path = dir.getAbsolutePath - spark.range(1).repartition(1).write.parquet(path) - val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0) - - val conf = new Configuration() - val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf) - val parquetReadOptions = HadoopReadOptions.builder(conf).build() - val m = ParquetFileReader.open(hadoopInputFile, parquetReadOptions) - val metaData = m.getFileMetaData.getKeyValueMetaData + private def getMetaData(dir: java.io.File): Map[String, String] = { + val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0) + val conf = new Configuration() + val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf) + val parquetReadOptions = HadoopReadOptions.builder(conf).build() + val m = ParquetFileReader.open(hadoopInputFile, parquetReadOptions) + val metadata = try { + m.getFileMetaData.getKeyValueMetaData + } finally { m.close() + } + metadata.asScala.toMap + } - assert(metaData.get(SPARK_VERSION_METADATA_KEY) === SPARK_VERSION_SHORT) + test("Write Spark version into Parquet metadata") { + withTempPath { dir => + spark.range(1).repartition(1).write.parquet(dir.getAbsolutePath) + assert(getMetaData(dir)(SPARK_VERSION_METADATA_KEY) === SPARK_VERSION_SHORT) } } @@ -1109,6 +1113,29 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } } + + test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") { + def saveTs(dir: java.io.File): Unit = { + Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF() + .repartition(1) + .write + .parquet(dir.getAbsolutePath) + } + withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { + withTempPath { dir => + saveTs(dir) + assert(getMetaData(dir)(SPARK_LEGACY_DATETIME) === "") + } + } + Seq(CORRECTED, EXCEPTION).foreach { mode => + withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString) { + withTempPath { dir => + saveTs(dir) + assert(getMetaData(dir).get(SPARK_LEGACY_DATETIME).isEmpty) + } + } + } + } } class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext) From bf594a978812419e5905a47535b50167dbad532f Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 16 Oct 2020 11:04:35 +0900 Subject: [PATCH 0251/1009] [SPARK-32402][SQL][FOLLOW-UP] Add case sensitivity tests for column resolution in ALTER TABLE ### What changes were proposed in this pull request? Add case sensitivity tests for column resolution in ALTER TABLE ### Why are the changes needed? To make sure `spark.sql.caseSensitive` works for `ResolveAlterTableChanges` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? new test Closes #30063 from huaxingao/caseSensitivity. Authored-by: Huaxin Gao Signed-off-by: HyukjinKwon --- .../v2/jdbc/JDBCTableCatalogSuite.scala | 155 +++++++++++++----- 1 file changed, 114 insertions(+), 41 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index 209f5609e447f..d99ccf85683ed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -168,23 +169,24 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found")) } - test("alter table ... add column") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") - sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C1 INTEGER, C2 STRING)") - var t = spark.table("h2.test.alt_table") + test("ALTER TABLE ... add column") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (ID INTEGER) USING _") + sql(s"ALTER TABLE $tableName ADD COLUMNS (C1 INTEGER, C2 STRING)") + var t = spark.table(tableName) var expectedSchema = new StructType() .add("ID", IntegerType) .add("C1", IntegerType) .add("C2", StringType) assert(t.schema === expectedSchema) - sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)") - t = spark.table("h2.test.alt_table") + sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)") + t = spark.table(tableName) expectedSchema = expectedSchema.add("c3", DoubleType) assert(t.schema === expectedSchema) // Add already existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)") + sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)") }.getMessage assert(msg.contains("Cannot add column, because c3 already exists")) } @@ -197,18 +199,19 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } - test("alter table ... rename column") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (id INTEGER, C0 INTEGER) USING _") - sql("ALTER TABLE h2.test.alt_table RENAME COLUMN id TO C") - val t = spark.table("h2.test.alt_table") + test("ALTER TABLE ... rename column") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER) USING _") + sql(s"ALTER TABLE $tableName RENAME COLUMN id TO C") + val t = spark.table(tableName) val expectedSchema = new StructType() .add("C", IntegerType) .add("C0", IntegerType) assert(t.schema === expectedSchema) // Rename to already existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0") + sql(s"ALTER TABLE $tableName RENAME COLUMN C TO C0") }.getMessage assert(msg.contains("Cannot rename column, because C0 already exists")) } @@ -221,17 +224,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } - test("alter table ... drop column") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _") - sql("ALTER TABLE h2.test.alt_table DROP COLUMN C1") - sql("ALTER TABLE h2.test.alt_table DROP COLUMN c3") - val t = spark.table("h2.test.alt_table") + test("ALTER TABLE ... drop column") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _") + sql(s"ALTER TABLE $tableName DROP COLUMN C1") + sql(s"ALTER TABLE $tableName DROP COLUMN c3") + val t = spark.table(tableName) val expectedSchema = new StructType().add("C2", IntegerType) assert(t.schema === expectedSchema) // Drop not existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column") + sql(s"ALTER TABLE $tableName DROP COLUMN bad_column") }.getMessage assert(msg.contains("Cannot delete missing field bad_column in test.alt_table schema")) } @@ -244,22 +248,23 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } - test("alter table ... update column type") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER, deptno INTEGER) USING _") - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE DOUBLE") - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno TYPE DOUBLE") - val t = spark.table("h2.test.alt_table") + test("ALTER TABLE ... update column type") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER) USING _") + sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE DOUBLE") + sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE DOUBLE") + val t = spark.table(tableName) val expectedSchema = new StructType().add("ID", DoubleType).add("deptno", DoubleType) assert(t.schema === expectedSchema) // Update not existing column val msg1 = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE") + sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column TYPE DOUBLE") }.getMessage assert(msg1.contains("Cannot update missing field bad_column in test.alt_table schema")) // Update column to wrong type val msg2 = intercept[ParseException] { - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type") + sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE bad_type") }.getMessage assert(msg2.contains("DataType bad_type is not supported")) } @@ -272,18 +277,19 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } - test("alter table ... update column nullability") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _") - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID DROP NOT NULL") - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno DROP NOT NULL") - val t = spark.table("h2.test.alt_table") + test("ALTER TABLE ... update column nullability") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _") + sql(s"ALTER TABLE $tableName ALTER COLUMN ID DROP NOT NULL") + sql(s"ALTER TABLE $tableName ALTER COLUMN deptno DROP NOT NULL") + val t = spark.table(tableName) val expectedSchema = new StructType() .add("ID", IntegerType, nullable = true).add("deptno", IntegerType, nullable = true) assert(t.schema === expectedSchema) // Update nullability of not existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL") + sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column DROP NOT NULL") }.getMessage assert(msg.contains("Cannot update missing field bad_column in test.alt_table")) } @@ -296,17 +302,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } - test("alter table ... update column comment not supported") { - withTable("h2.test.alt_table") { - sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _") + test("ALTER TABLE ... update column comment not supported") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (ID INTEGER) USING _") val exp = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'") + sql(s"ALTER TABLE $tableName ALTER COLUMN ID COMMENT 'test'") } assert(exp.getMessage.contains("Failed table altering: test.alt_table")) assert(exp.cause.get.getMessage.contains("Unsupported TableChange")) // Update comment for not existing column val msg = intercept[AnalysisException] { - sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'") + sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column COMMENT 'test'") }.getMessage assert(msg.contains("Cannot update missing field bad_column in test.alt_table")) } @@ -318,4 +325,70 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { assert(msg.contains("Table not found")) } } + + test("ALTER TABLE case sensitivity") { + val tableName = "h2.test.alt_table" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER) USING _") + var t = spark.table(tableName) + var expectedSchema = new StructType().add("c1", IntegerType).add("c2", IntegerType) + assert(t.schema === expectedSchema) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName RENAME COLUMN C2 TO c3") + }.getMessage + assert(msg.contains("Cannot rename missing field C2 in test.alt_table schema")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $tableName RENAME COLUMN C2 TO c3") + expectedSchema = new StructType().add("c1", IntegerType).add("c3", IntegerType) + t = spark.table(tableName) + assert(t.schema === expectedSchema) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName DROP COLUMN C3") + }.getMessage + assert(msg.contains("Cannot delete missing field C3 in test.alt_table schema")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $tableName DROP COLUMN C3") + expectedSchema = new StructType().add("c1", IntegerType) + t = spark.table(tableName) + assert(t.schema === expectedSchema) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ALTER COLUMN C1 TYPE DOUBLE") + }.getMessage + assert(msg.contains("Cannot update missing field C1 in test.alt_table schema")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $tableName ALTER COLUMN C1 TYPE DOUBLE") + expectedSchema = new StructType().add("c1", DoubleType) + t = spark.table(tableName) + assert(t.schema === expectedSchema) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tableName ALTER COLUMN C1 DROP NOT NULL") + }.getMessage + assert(msg.contains("Cannot update missing field C1 in test.alt_table schema")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $tableName ALTER COLUMN C1 DROP NOT NULL") + expectedSchema = new StructType().add("c1", DoubleType, nullable = true) + t = spark.table(tableName) + assert(t.schema === expectedSchema) + } + } + } } From a5c17de24148ac3ef290e091dcf2978e26afa58c Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 16 Oct 2020 11:39:09 +0900 Subject: [PATCH 0252/1009] [SPARK-33165][SQL][TEST] Remove dependencies(scalatest,scalactic) from Benchmark ### What changes were proposed in this pull request? This PR proposes to remove `assert` from `Benchmark` for making it easier to run benchmark codes via `spark-submit`. ### Why are the changes needed? Since the current `Benchmark` (`master` and `branch-3.0`) has `assert`, we need to pass the proper jars of `scalatest` and `scalactic`; - scalatest-core_2.12-3.2.0.jar - scalatest-compatible-3.2.0.jar - scalactic_2.12-3.0.jar ``` ./bin/spark-submit --jars scalatest-core_2.12-3.2.0.jar,scalatest-compatible-3.2.0.jar,scalactic_2.12-3.0.jar,./sql/catalyst/target/spark-catalyst_2.12-3.1.0-SNAPSHOT-tests.jar,./core/target/spark-core_2.12-3.1.0-SNAPSHOT-tests.jar --class org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark ./sql/core/target/spark-sql_2.12-3.1.0-SNAPSHOT-tests.jar --data-location /tmp/tpcds-sf1 ``` This update can make developers submit benchmark codes without these dependencies; ``` ./bin/spark-submit --jars ./sql/catalyst/target/spark-catalyst_2.12-3.1.0-SNAPSHOT-tests.jar,./core/target/spark-core_2.12-3.1.0-SNAPSHOT-tests.jar --class org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark ./sql/core/target/spark-sql_2.12-3.1.0-SNAPSHOT-tests.jar --data-location /tmp/tpcds-sf1 ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked. Closes #30064 from maropu/RemoveDepInBenchmark. Authored-by: Takeshi Yamamuro Signed-off-by: HyukjinKwon --- .../test/scala/org/apache/spark/benchmark/Benchmark.scala | 5 ----- .../spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index 72c05a92848ff..0b2f512b947e1 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -26,7 +26,6 @@ import scala.util.Try import org.apache.commons.io.output.TeeOutputStream import org.apache.commons.lang3.SystemUtils -import org.scalatest.Assertions._ import org.apache.spark.util.Utils @@ -162,7 +161,6 @@ private[spark] class Benchmark( // scalastyle:off println(s" Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms") // scalastyle:on - assert(runTimes.nonEmpty) val best = runTimes.min val avg = runTimes.sum / runTimes.size val stdev = if (runTimes.size > 1) { @@ -184,18 +182,15 @@ private[spark] object Benchmark { private var timeStart: Long = 0L def startTiming(): Unit = { - assert(timeStart == 0L, "Already started timing.") timeStart = System.nanoTime } def stopTiming(): Unit = { - assert(timeStart != 0L, "Have not started timing.") accumulatedTime += System.nanoTime - timeStart timeStart = 0L } def totalTime(): Long = { - assert(timeStart == 0L, "Have not stopped timing.") accumulatedTime } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index ad3d79760adf0..7bbf0795eb052 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation * To run this: * {{{ * 1. without sbt: - * bin/spark-submit --class --data-location + * bin/spark-submit --jars , + * --class --data-location * 2. build/sbt "sql/test:runMain --data-location " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt * "sql/test:runMain --data-location " From 8f4fc22dc460eb05c47e0d61facf116c60b1be37 Mon Sep 17 00:00:00 2001 From: Samuel Souza Date: Thu, 15 Oct 2020 22:12:41 -0500 Subject: [PATCH 0253/1009] [SPARK-33088][CORE] Enhance ExecutorPlugin API to include callbacks on task start and end events ### What changes were proposed in this pull request? Proposing a new set of APIs for ExecutorPlugins, to provide callbacks invoked at the start and end of each task of a job. Not very opinionated on the shape of the API, tried to be as minimal as possible for now. ### Why are the changes needed? Changes described in detail on [SPARK-33088](https://issues.apache.org/jira/browse/SPARK-33088), but mostly this boils down to: 1. This feature was considered when the ExecutorPlugin API was initially introduced in #21923, but never implemented. 2. The use-case which **requires** this feature is to propagate tracing information from the driver to the executor, such that calls from the same job can all be traced. a. Tracing frameworks usually are setup in thread locals, therefore it's important for the setup to happen in the same thread which runs the tasks. b. Executors can be for multiple jobs, therefore it's not sufficient to set tracing information at executor startup time -- it needs to happen every time a task starts or ends. ### Does this PR introduce _any_ user-facing change? No. This PR introduces new features for future developers to use. ### How was this patch tested? Unit tests on `PluginContainerSuite`. Closes #29977 from fsamuel-bs/SPARK-33088. Authored-by: Samuel Souza Signed-off-by: Mridul Muralidharan gmail.com> --- .../spark/api/plugin/ExecutorPlugin.java | 42 ++++++++++++++++ .../org/apache/spark/executor/Executor.scala | 32 +++++++----- .../internal/plugin/PluginContainer.scala | 49 ++++++++++++++++++- .../org/apache/spark/scheduler/Task.scala | 6 ++- .../plugin/PluginContainerSuite.scala | 47 ++++++++++++++++++ .../spark/scheduler/TaskContextSuite.scala | 4 +- 6 files changed, 163 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java index 4961308035163..481bf985f1c6c 100644 --- a/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java +++ b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java @@ -19,6 +19,7 @@ import java.util.Map; +import org.apache.spark.TaskFailedReason; import org.apache.spark.annotation.DeveloperApi; /** @@ -54,4 +55,45 @@ default void init(PluginContext ctx, Map extraConf) {} */ default void shutdown() {} + /** + * Perform any action before the task is run. + *

    + * This method is invoked from the same thread the task will be executed. + * Task-specific information can be accessed via {@link org.apache.spark.TaskContext#get}. + *

    + * Plugin authors should avoid expensive operations here, as this method will be called + * on every task, and doing something expensive can significantly slow down a job. + * It is not recommended for a user to call a remote service, for example. + *

    + * Exceptions thrown from this method do not propagate - they're caught, + * logged, and suppressed. Therefore exceptions when executing this method won't + * make the job fail. + * + * @since 3.1.0 + */ + default void onTaskStart() {} + + /** + * Perform an action after tasks completes without exceptions. + *

    + * As {@link #onTaskStart() onTaskStart} exceptions are suppressed, this method + * will still be invoked even if the corresponding {@link #onTaskStart} call for this + * task failed. + *

    + * Same warnings of {@link #onTaskStart() onTaskStart} apply here. + * + * @since 3.1.0 + */ + default void onTaskSucceeded() {} + + /** + * Perform an action after tasks completes with exceptions. + *

    + * Same warnings of {@link #onTaskStart() onTaskStart} apply here. + * + * @param failureReason the exception thrown from the failed task. + * + * @since 3.1.0 + */ + default void onTaskFailed(TaskFailedReason failureReason) {} } diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 27addd8fc12e2..6653650615192 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -253,7 +253,7 @@ private[spark] class Executor( } def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = { - val tr = new TaskRunner(context, taskDescription) + val tr = new TaskRunner(context, taskDescription, plugins) runningTasks.put(taskDescription.taskId, tr) threadPool.execute(tr) if (decommissioned) { @@ -332,7 +332,8 @@ private[spark] class Executor( class TaskRunner( execBackend: ExecutorBackend, - private val taskDescription: TaskDescription) + private val taskDescription: TaskDescription, + private val plugins: Option[PluginContainer]) extends Runnable { val taskId = taskDescription.taskId @@ -479,7 +480,8 @@ private[spark] class Executor( taskAttemptId = taskId, attemptNumber = taskDescription.attemptNumber, metricsSystem = env.metricsSystem, - resources = taskDescription.resources) + resources = taskDescription.resources, + plugins = plugins) threwException = false res } { @@ -614,6 +616,7 @@ private[spark] class Executor( executorSource.SUCCEEDED_TASKS.inc(1L) setTaskFinishedAndClearInterruptStatus() + plugins.foreach(_.onTaskSucceeded()) execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult) } catch { case t: TaskKilledException => @@ -623,9 +626,9 @@ private[spark] class Executor( // Here and below, put task metric peaks in a WrappedArray to expose them as a Seq // without requiring a copy. val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId)) - val serializedTK = ser.serialize( - TaskKilled(t.reason, accUpdates, accums, metricPeaks.toSeq)) - execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK) + val reason = TaskKilled(t.reason, accUpdates, accums, metricPeaks.toSeq) + plugins.foreach(_.onTaskFailed(reason)) + execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason)) case _: InterruptedException | NonFatal(_) if task != null && task.reasonIfKilled.isDefined => @@ -634,9 +637,9 @@ private[spark] class Executor( val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs) val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId)) - val serializedTK = ser.serialize( - TaskKilled(killReason, accUpdates, accums, metricPeaks.toSeq)) - execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK) + val reason = TaskKilled(killReason, accUpdates, accums, metricPeaks.toSeq) + plugins.foreach(_.onTaskFailed(reason)) + execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason)) case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) => val reason = task.context.fetchFailed.get.toTaskFailedReason @@ -650,11 +653,13 @@ private[spark] class Executor( s"other exception: $t") } setTaskFinishedAndClearInterruptStatus() + plugins.foreach(_.onTaskFailed(reason)) execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason)) case CausedBy(cDE: CommitDeniedException) => val reason = cDE.toTaskCommitDeniedReason setTaskFinishedAndClearInterruptStatus() + plugins.foreach(_.onTaskFailed(reason)) execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason)) case t: Throwable if env.isStopped => @@ -677,21 +682,22 @@ private[spark] class Executor( val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs) val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId)) - val serializedTaskEndReason = { + val (taskFailureReason, serializedTaskFailureReason) = { try { val ef = new ExceptionFailure(t, accUpdates).withAccums(accums) .withMetricPeaks(metricPeaks.toSeq) - ser.serialize(ef) + (ef, ser.serialize(ef)) } catch { case _: NotSerializableException => // t is not serializable so just send the stacktrace val ef = new ExceptionFailure(t, accUpdates, false).withAccums(accums) .withMetricPeaks(metricPeaks.toSeq) - ser.serialize(ef) + (ef, ser.serialize(ef)) } } setTaskFinishedAndClearInterruptStatus() - execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason) + plugins.foreach(_.onTaskFailed(taskFailureReason)) + execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskFailureReason) } else { logInfo("Not reporting error to driver during JVM shutdown.") } diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala index 4eda4767094ad..f78ec250f7173 100644 --- a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala +++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala @@ -20,7 +20,7 @@ package org.apache.spark.internal.plugin import scala.collection.JavaConverters._ import scala.util.{Either, Left, Right} -import org.apache.spark.{SparkContext, SparkEnv} +import org.apache.spark.{SparkContext, SparkEnv, TaskFailedReason} import org.apache.spark.api.plugin._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ @@ -31,6 +31,9 @@ sealed abstract class PluginContainer { def shutdown(): Unit def registerMetrics(appId: String): Unit + def onTaskStart(): Unit + def onTaskSucceeded(): Unit + def onTaskFailed(failureReason: TaskFailedReason): Unit } @@ -85,6 +88,17 @@ private class DriverPluginContainer( } } + override def onTaskStart(): Unit = { + throw new IllegalStateException("Should not be called for the driver container.") + } + + override def onTaskSucceeded(): Unit = { + throw new IllegalStateException("Should not be called for the driver container.") + } + + override def onTaskFailed(failureReason: TaskFailedReason): Unit = { + throw new IllegalStateException("Should not be called for the driver container.") + } } private class ExecutorPluginContainer( @@ -134,6 +148,39 @@ private class ExecutorPluginContainer( } } } + + override def onTaskStart(): Unit = { + executorPlugins.foreach { case (name, plugin) => + try { + plugin.onTaskStart() + } catch { + case t: Throwable => + logInfo(s"Exception while calling onTaskStart on plugin $name.", t) + } + } + } + + override def onTaskSucceeded(): Unit = { + executorPlugins.foreach { case (name, plugin) => + try { + plugin.onTaskSucceeded() + } catch { + case t: Throwable => + logInfo(s"Exception while calling onTaskSucceeded on plugin $name.", t) + } + } + } + + override def onTaskFailed(failureReason: TaskFailedReason): Unit = { + executorPlugins.foreach { case (name, plugin) => + try { + plugin.onTaskFailed(failureReason) + } catch { + case t: Throwable => + logInfo(s"Exception while calling onTaskFailed on plugin $name.", t) + } + } + } } object PluginContainer { diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala index ebc1c05435fee..81f984bb2b511 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala @@ -23,6 +23,7 @@ import java.util.Properties import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.config.APP_CALLER_CONTEXT +import org.apache.spark.internal.plugin.PluginContainer import org.apache.spark.memory.{MemoryMode, TaskMemoryManager} import org.apache.spark.metrics.MetricsSystem import org.apache.spark.rdd.InputFileBlockHolder @@ -82,7 +83,8 @@ private[spark] abstract class Task[T]( taskAttemptId: Long, attemptNumber: Int, metricsSystem: MetricsSystem, - resources: Map[String, ResourceInformation]): T = { + resources: Map[String, ResourceInformation], + plugins: Option[PluginContainer]): T = { SparkEnv.get.blockManager.registerTask(taskAttemptId) // TODO SPARK-24874 Allow create BarrierTaskContext based on partitions, instead of whether // the stage is barrier. @@ -123,6 +125,8 @@ private[spark] abstract class Task[T]( Option(taskAttemptId), Option(attemptNumber)).setCurrentContext() + plugins.foreach(_.onTaskStart()) + try { runTask(context) } catch { diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala index 7888796dd55e6..e7fbe5b998a88 100644 --- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala +++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala @@ -129,6 +129,38 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo assert(TestSparkPlugin.driverPlugin != null) } + test("SPARK-33088: executor tasks trigger plugin calls") { + val conf = new SparkConf() + .setAppName(getClass().getName()) + .set(SparkLauncher.SPARK_MASTER, "local[1]") + .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName())) + + sc = new SparkContext(conf) + sc.parallelize(1 to 10, 2).count() + + assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 0) + } + + test("SPARK-33088: executor failed tasks trigger plugin calls") { + val conf = new SparkConf() + .setAppName(getClass().getName()) + .set(SparkLauncher.SPARK_MASTER, "local[1]") + .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName())) + + sc = new SparkContext(conf) + try { + sc.parallelize(1 to 10, 2).foreach(i => throw new RuntimeException) + } catch { + case t: Throwable => // ignore exception + } + + assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0) + assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2) + } + test("plugin initialization in non-local mode") { val path = Utils.createTempDir() @@ -309,6 +341,10 @@ private class TestDriverPlugin extends DriverPlugin { private class TestExecutorPlugin extends ExecutorPlugin { + var numOnTaskStart: Int = 0 + var numOnTaskSucceeded: Int = 0 + var numOnTaskFailed: Int = 0 + override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = { ctx.metricRegistry().register("executorMetric", new Gauge[Int] { override def getValue(): Int = 84 @@ -316,6 +352,17 @@ private class TestExecutorPlugin extends ExecutorPlugin { TestSparkPlugin.executorContext = ctx } + override def onTaskStart(): Unit = { + numOnTaskStart += 1 + } + + override def onTaskSucceeded(): Unit = { + numOnTaskSucceeded += 1 + } + + override def onTaskFailed(failureReason: TaskFailedReason): Unit = { + numOnTaskFailed += 1 + } } private object TestSparkPlugin { diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala index 394a2a9fbf7cb..8a7ff9eb6dcd3 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala @@ -70,7 +70,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark 0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties, closureSerializer.serialize(TaskMetrics.registered).array()) intercept[RuntimeException] { - task.run(0, 0, null, null) + task.run(0, 0, null, null, Option.empty) } assert(TaskContextSuite.completed) } @@ -92,7 +92,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark 0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties, closureSerializer.serialize(TaskMetrics.registered).array()) intercept[RuntimeException] { - task.run(0, 0, null, null) + task.run(0, 0, null, null, Option.empty) } assert(TaskContextSuite.lastError.getMessage == "damn error") } From bf52fa83b2a031cfa66bdf00a0710c7d6d2b326b Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 16 Oct 2020 13:50:57 +0900 Subject: [PATCH 0254/1009] [SPARK-33165][SQL][TESTS][FOLLOW-UP] Use scala.Predef.assert instead ### What changes were proposed in this pull request? This PR proposes to use `scala.Predef.assert` instead of `org.scalatest.Assertions.assert` removed at https://github.com/apache/spark/pull/30064 ### Why are the changes needed? Just to keep the same behaviour. ### Does this PR introduce _any_ user-facing change? No, dev-only ### How was this patch tested? Recover the existing asserts. Closes #30065 from HyukjinKwon/SPARK-33165. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../src/test/scala/org/apache/spark/benchmark/Benchmark.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index 0b2f512b947e1..5511852ca176e 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -161,6 +161,7 @@ private[spark] class Benchmark( // scalastyle:off println(s" Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms") // scalastyle:on + assert(runTimes.nonEmpty) val best = runTimes.min val avg = runTimes.sum / runTimes.size val stdev = if (runTimes.size > 1) { @@ -182,15 +183,18 @@ private[spark] object Benchmark { private var timeStart: Long = 0L def startTiming(): Unit = { + assert(timeStart == 0L, "Already started timing.") timeStart = System.nanoTime } def stopTiming(): Unit = { + assert(timeStart != 0L, "Have not started timing.") accumulatedTime += System.nanoTime - timeStart timeStart = 0L } def totalTime(): Long = { + assert(timeStart == 0L, "Have not stopped timing.") accumulatedTime } } From 306872eefaa9228eaed1e797be11c8c5fa1705cd Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Fri, 16 Oct 2020 06:05:17 +0000 Subject: [PATCH 0255/1009] [SPARK-33139][SQL] protect setActionSession and clearActiveSession ### What changes were proposed in this pull request? This PR is a sub-task of [SPARK-33138](https://issues.apache.org/jira/browse/SPARK-33138). In order to make SQLConf.get reliable and stable, we need to make sure user can't pollute the SQLConf and SparkSession Context via calling setActiveSession and clearActiveSession. Change of the PR: * add legacy config spark.sql.legacy.allowModifyActiveSession to fallback to old behavior if user do need to call these two API. * by default, if user call these two API, it will throw exception * add extra two internal and private API setActiveSessionInternal and clearActiveSessionInternal for current internal usage * change all internal reference to new internal API except for SQLContext.setActive and SQLContext.clearActive ### Why are the changes needed? Make SQLConf.get reliable and stable. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? * Add UT in SparkSessionBuilderSuite to test the legacy config * Existing test Closes #30042 from leanken/leanken-SPARK-33139. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 + .../kafka010/KafkaMicroBatchSourceSuite.scala | 2 +- .../org/apache/spark/SharedSparkSession.java | 3 +- .../mllib/util/MLlibTestSparkContext.scala | 2 +- python/pyspark/sql/session.py | 15 ++++++-- .../apache/spark/sql/internal/SQLConf.scala | 3 ++ .../spark/sql/internal/StaticSQLConf.scala | 9 +++++ .../org/apache/spark/sql/SparkSession.scala | 26 +++++++++++-- .../spark/sql/execution/SQLExecution.scala | 6 +-- .../spark/sql/execution/SparkPlan.scala | 2 +- .../execution/streaming/StreamExecution.scala | 2 +- .../apache/spark/sql/DeprecatedAPISuite.scala | 6 +-- .../apache/spark/sql/LocalSparkSession.scala | 4 +- .../apache/spark/sql/SQLContextSuite.scala | 2 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 2 +- .../apache/spark/sql/SessionStateSuite.scala | 2 +- .../spark/sql/SparkSessionBuilderSuite.scala | 37 ++++++++++++++----- .../sql/SparkSessionExtensionSuite.scala | 2 +- .../sql/connector/V1WriteFallbackSuite.scala | 4 +- .../CoalesceShufflePartitionsSuite.scala | 4 +- .../adaptive/AdaptiveQueryExecSuite.scala | 4 +- .../state/StateStoreCoordinatorSuite.scala | 2 +- .../streaming/state/StateStoreSuite.scala | 2 +- .../SymmetricHashJoinStateManagerSuite.scala | 2 +- .../sql/streaming/StreamingJoinSuite.scala | 4 +- .../apache/spark/sql/test/SQLTestUtils.scala | 2 +- .../spark/sql/test/SharedSparkSession.scala | 2 +- .../spark/sql/test/TestSQLContext.scala | 2 +- .../hive/thriftserver/SparkOperation.scala | 6 +-- .../apache/spark/sql/hive/test/TestHive.scala | 2 +- 30 files changed, 112 insertions(+), 51 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index c1de58d85d5bf..cc69e78108ffd 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -24,6 +24,8 @@ license: | ## Upgrading from Spark SQL 3.0 to 3.1 + - In Spark 3.1, `SparkSession.setActiveSession` and `SparkSession.clearActiveSession` are deprecated and unsupported, it will throw `UnsupportedOperationException` if called. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.allowModifyActiveSession` to true if you really need to use these APIs. + - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`. diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 63659989dec1b..853d201ba7ea5 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -1189,7 +1189,7 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase { numPartitionsGenerated: Int, reusesConsumers: Boolean): Unit = { - SparkSession.setActiveSession(spark) + SparkSession.setActiveSessionInternal(spark) withTempDir { dir => val provider = new KafkaSourceProvider() val options = Map( diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java index 35a250955b282..49bd0a43a16d6 100644 --- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java +++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Serializable; +import org.apache.spark.sql.SparkSession$; import org.junit.After; import org.junit.Before; @@ -47,7 +48,7 @@ public void tearDown() { spark = null; } finally { SparkSession.clearDefaultSession(); - SparkSession.clearActiveSession(); + SparkSession$.MODULE$.clearActiveSessionInternal(); } } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala index 5eb128abacdb9..840ca6f8af0b1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala @@ -48,7 +48,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite => override def afterAll(): Unit = { try { Utils.deleteRecursively(new File(checkpointDir)) - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() if (spark != null) { spark.stop() } diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 8ca6e41a9b940..e6ab1ea3878f3 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -230,7 +230,10 @@ def __init__(self, sparkContext, jsparkSession=None): SparkSession._instantiatedSession = self SparkSession._activeSession = self self._jvm.SparkSession.setDefaultSession(self._jsparkSession) - self._jvm.SparkSession.setActiveSession(self._jsparkSession) + self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ + .getDeclaredField("MODULE$")\ + .get(None)\ + .setActiveSessionInternal(self._jsparkSession) def _repr_html_(self): return """ @@ -561,7 +564,10 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr Py4JJavaError: ... """ SparkSession._activeSession = self - self._jvm.SparkSession.setActiveSession(self._jsparkSession) + self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ + .getDeclaredField("MODULE$")\ + .get(None)\ + .setActiveSessionInternal(self._jsparkSession) if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") @@ -683,7 +689,10 @@ def stop(self): self._sc.stop() # We should clean the default session up. See SPARK-23228. self._jvm.SparkSession.clearDefaultSession() - self._jvm.SparkSession.clearActiveSession() + self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ + .getDeclaredField("MODULE$")\ + .get(None)\ + .clearActiveSessionInternal() SparkSession._instantiatedSession = None SparkSession._activeSession = None SQLContext._instantiatedContext = None diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 79d78088f51a0..319387fe854cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3414,6 +3414,9 @@ class SQLConf extends Serializable with Logging { def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID) + def legacyAllowModifyActiveSession: Boolean = + getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION) + def legacyAllowCastNumericToTimestamp: Boolean = getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala index ca1074fcf6fc0..b9446465e1f79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala @@ -249,4 +249,13 @@ object StaticSQLConf { .version("3.1.0") .timeConf(TimeUnit.SECONDS) .createWithDefault(-1) + + val LEGACY_ALLOW_MODIFY_ACTIVE_SESSION = + buildStaticConf("spark.sql.legacy.allowModifyActiveSession") + .internal() + .doc("When set to true, user is allowed to use setActiveSession or clearActiveSession " + + "to modify the current active SparkSession, otherwise an exception will be thrown.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 5704414df2d0d..b15d6f981291c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -765,9 +765,9 @@ class SparkSession private( // set and not the default session. This to prevent that we promote the default session to the // active session once we are done. val old = SparkSession.activeThreadSession.get() - SparkSession.setActiveSession(this) + SparkSession.setActiveSessionInternal(this) try block finally { - SparkSession.setActiveSession(old) + SparkSession.setActiveSessionInternal(old) } } } @@ -946,7 +946,7 @@ object SparkSession extends Logging { session = new SparkSession(sparkContext, None, None, extensions, options.toMap) setDefaultSession(session) - setActiveSession(session) + setActiveSessionInternal(session) registerContextListener(sparkContext) } @@ -984,7 +984,16 @@ object SparkSession extends Logging { * * @since 2.0.0 */ + @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0") def setActiveSession(session: SparkSession): Unit = { + if (SQLConf.get.legacyAllowModifyActiveSession) { + setActiveSessionInternal(session) + } else { + throw new UnsupportedOperationException("Not allowed to modify active Spark session.") + } + } + + private[sql] def setActiveSessionInternal(session: SparkSession): Unit = { activeThreadSession.set(session) } @@ -994,7 +1003,16 @@ object SparkSession extends Logging { * * @since 2.0.0 */ + @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0") def clearActiveSession(): Unit = { + if (SQLConf.get.legacyAllowModifyActiveSession) { + clearActiveSessionInternal() + } else { + throw new UnsupportedOperationException("Not allowed to modify active Spark session.") + } + } + + private[spark] def clearActiveSessionInternal(): Unit = { activeThreadSession.remove() } @@ -1149,7 +1167,7 @@ object SparkSession extends Logging { | """.stripMargin) session.get.stop() - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala index c62670b227bcc..1465e57743323 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala @@ -180,15 +180,15 @@ object SQLExecution { exec.submit(() => { val originalSession = SparkSession.getActiveSession val originalLocalProps = sc.getLocalProperties - SparkSession.setActiveSession(activeSession) + SparkSession.setActiveSessionInternal(activeSession) sc.setLocalProperties(localProps) val res = body // reset active session and local props. sc.setLocalProperties(originalLocalProps) if (originalSession.nonEmpty) { - SparkSession.setActiveSession(originalSession.get) + SparkSession.setActiveSessionInternal(originalSession.get) } else { - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() } res }) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index ead8c00031112..42eb131b8e4ce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -82,7 +82,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ /** Overridden make copy also propagates sqlContext to copied plan. */ override def makeCopy(newArgs: Array[AnyRef]): SparkPlan = { if (sqlContext != null) { - SparkSession.setActiveSession(sqlContext.sparkSession) + SparkSession.setActiveSessionInternal(sqlContext.sparkSession) } super.makeCopy(newArgs) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index aba0463f56cd7..09c0d2148307c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -315,7 +315,7 @@ abstract class StreamExecution( startLatch.countDown() // While active, repeatedly attempt to run batches. - SparkSession.setActiveSession(sparkSession) + SparkSession.setActiveSessionInternal(sparkSession) updateStatusMessage("Initializing sources") // force initialization of the logical plan so that the sources can be created diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala index 25b8849d61248..d27333ec727d0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala @@ -130,10 +130,10 @@ class DeprecatedAPISuite extends QueryTest with SharedSparkSession { test("SQLContext.setActive/clearActive") { val sc = spark.sparkContext val sqlContext = new SQLContext(sc) - SQLContext.setActive(sqlContext) + intercept[UnsupportedOperationException](SQLContext.setActive(sqlContext)) + assert(SparkSession.getActiveSession === Some(spark)) + intercept[UnsupportedOperationException](SQLContext.clearActive()) assert(SparkSession.getActiveSession === Some(spark)) - SQLContext.clearActive() - assert(SparkSession.getActiveSession === None) } test("SQLContext.applySchema") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala index 36db95ff8a31b..8fdf55aeae6d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala @@ -30,14 +30,14 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self override def beforeAll(): Unit = { super.beforeAll() InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE) - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } override def afterEach(): Unit = { try { LocalSparkSession.stop(spark) - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() spark = null } finally { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala index a1799829932b8..aec124de81049 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala @@ -43,7 +43,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext { val newSession = sqlContext.newSession() assert(SQLContext.getOrCreate(sc).eq(sqlContext), "SQLContext.getOrCreate after explicitly created SQLContext did not return the context") - SparkSession.setActiveSession(newSession.sparkSession) + SparkSession.setActiveSessionInternal(newSession.sparkSession) assert(SQLContext.getOrCreate(sc).eq(newSession), "SQLContext.getOrCreate after explicitly setActive() did not return the active context") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b86df4db816b3..a002f720a3c4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3468,7 +3468,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark // problem before the fix. withSQLConf(SQLConf.CODEGEN_FALLBACK.key -> "true") { val cloned = spark.cloneSession() - SparkSession.setActiveSession(cloned) + SparkSession.setActiveSessionInternal(cloned) assert(SQLConf.get.getConf(SQLConf.CODEGEN_FALLBACK) === true) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala index 003f5bc835d5f..2f766a270ad73 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala @@ -48,7 +48,7 @@ class SessionStateSuite extends SparkFunSuite { if (activeSession != null) { activeSession.stop() activeSession = null - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } } finally { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala index 9da32d02aa723..e1f7b6f455e14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala @@ -22,7 +22,7 @@ import org.scalatest.BeforeAndAfterEach import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite} import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT import org.apache.spark.internal.config.UI.UI_ENABLED -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.internal.StaticSQLConf._ /** @@ -33,7 +33,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { override def afterEach(): Unit = { // This suite should not interfere with the other test suites. SparkSession.getActiveSession.foreach(_.stop()) - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.getDefaultSession.foreach(_.stop()) SparkSession.clearDefaultSession() } @@ -64,7 +64,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { test("get active or default session") { val session = SparkSession.builder().master("local").getOrCreate() assert(SparkSession.active == session) - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() assert(SparkSession.active == session) SparkSession.clearDefaultSession() intercept[IllegalStateException](SparkSession.active) @@ -82,7 +82,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { test("use session from active thread session and propagate config options") { val defaultSession = SparkSession.builder().master("local").getOrCreate() val activeSession = defaultSession.newSession() - SparkSession.setActiveSession(activeSession) + SparkSession.setActiveSessionInternal(activeSession) val session = SparkSession.builder().config("spark-config2", "a").getOrCreate() assert(activeSession != defaultSession) @@ -90,7 +90,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { assert(session.conf.get("spark-config2") == "a") assert(session.sessionState.conf == SQLConf.get) assert(SQLConf.get.getConfString("spark-config2") == "a") - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() assert(SparkSession.builder().getOrCreate() == defaultSession) } @@ -105,7 +105,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { test("create a new session if the active thread session has been stopped") { val activeSession = SparkSession.builder().master("local").getOrCreate() - SparkSession.setActiveSession(activeSession) + SparkSession.setActiveSessionInternal(activeSession) activeSession.stop() val newSession = SparkSession.builder().master("local").getOrCreate() assert(newSession != activeSession) @@ -181,7 +181,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { .master("local") .getOrCreate() val postFirstCreation = context.listenerBus.listeners.size() - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() SparkSession @@ -190,7 +190,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { .master("local") .getOrCreate() val postSecondCreation = context.listenerBus.listeners.size() - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() assert(postFirstCreation == postSecondCreation) } @@ -211,7 +211,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { assert(session1.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532") // do not propagate static sql configs to the existing default session - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() val session2 = SparkSession .builder() .config(WAREHOUSE_PATH.key, "SPARK-31532-db") @@ -281,4 +281,23 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { () } } + + test("SPARK-33139: Test SparkSession.setActiveSession/clearActiveSession") { + Seq(true, false).foreach { allowModifyActiveSession => + val session = SparkSession.builder() + .master("local") + .config(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION.key, allowModifyActiveSession) + .getOrCreate() + + val newSession = session.newSession() + if (!allowModifyActiveSession) { + intercept[UnsupportedOperationException](SparkSession.setActiveSession(newSession)) + intercept[UnsupportedOperationException](SparkSession.clearActiveSession()) + } else { + SparkSession.setActiveSession(newSession) + SparkSession.clearActiveSession() + } + session.stop() + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index e5e8bc6917799..ebe4e8dea97e3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -51,7 +51,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite { private def stop(spark: SparkSession): Unit = { spark.stop() - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala index 4b52a4cbf4116..0a86a41e86255 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala @@ -130,7 +130,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before } test("fallback writes should only analyze plan once") { - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() try { val session = SparkSession.builder() @@ -141,7 +141,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before val df = session.createDataFrame(Seq((1, "x"), (2, "y"), (3, "z"))) df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test") } finally { - SparkSession.setActiveSession(spark) + SparkSession.setActiveSessionInternal(spark) SparkSession.setDefaultSession(spark) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala index 22c5b651f7e12..fd55ad69ed386 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala @@ -38,14 +38,14 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterAl originalActiveSparkSession = SparkSession.getActiveSession originalInstantiatedSparkSession = SparkSession.getDefaultSession - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } override protected def afterAll(): Unit = { try { // Set these states back. - originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSession(ctx)) + originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSessionInternal(ctx)) originalInstantiatedSparkSession.foreach(ctx => SparkSession.setDefaultSession(ctx)) } finally { super.afterAll() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 38a323b1c057e..fc95ab53dade8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -999,9 +999,9 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { val df = spark.range(10).select(sum('id)) assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec]) - SparkSession.setActiveSession(null) + SparkSession.setActiveSessionInternal(null) checkAnswer(df, Seq(Row(45))) - SparkSession.setActiveSession(spark) // recover the active session. + SparkSession.setActiveSessionInternal(spark) // recover the active session. } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala index 7bca225dfdd8f..20e488960fa59 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala @@ -121,7 +121,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext { var coordRef: StateStoreCoordinatorRef = null try { val spark = SparkSession.builder().sparkContext(sc).getOrCreate() - SparkSession.setActiveSession(spark) + SparkSession.setActiveSessionInternal(spark) import spark.implicits._ coordRef = spark.streams.stateStoreCoordinator implicit val sqlContext = spark.sqlContext diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index 488879938339d..5dbc6723a3ff9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -567,7 +567,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] try { val checkpointLocation = Utils.createTempDir().getAbsoluteFile val spark = SparkSession.builder().master("local[2]").getOrCreate() - SparkSession.setActiveSession(spark) + SparkSession.setActiveSessionInternal(spark) implicit val sqlContext = spark.sqlContext spark.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, "1") import spark.implicits._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala index ce1eabeb932fb..5df47e1d5faa0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.types._ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter { before { - SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec' + SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' spark.streams.stateStoreCoordinator // initialize the lazy coordinator } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index b182727408bbf..b235bf7c3180a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -45,7 +45,7 @@ import org.apache.spark.util.Utils class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter { before { - SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec' + SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' spark.streams.stateStoreCoordinator // initialize the lazy coordinator } @@ -492,7 +492,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with import org.apache.spark.sql.functions._ before { - SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec' + SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' spark.streams.stateStoreCoordinator // initialize the lazy coordinator } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala index 7be15e9d87004..d15dc8c6bccd5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -242,7 +242,7 @@ private[sql] trait SQLTestUtilsBase } protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { - SparkSession.setActiveSession(spark) + SparkSession.setActiveSessionInternal(spark) super.withSQLConf(pairs: _*)(f) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala index cfc92a780308d..a38b360b79c05 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala @@ -144,7 +144,7 @@ trait SharedSparkSessionBase } } } finally { - SparkSession.clearActiveSession() + SparkSession.clearActiveSessionInternal() SparkSession.clearDefaultSession() } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index ac06e1f41bfb3..a477eed4478e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -35,7 +35,7 @@ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) } SparkSession.setDefaultSession(this) - SparkSession.setActiveSession(this) + SparkSession.setActiveSessionInternal(this) @transient override lazy val sessionState: SessionState = { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala index bbfc1b83379aa..be9c024f9ca64 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala @@ -65,7 +65,7 @@ private[hive] trait SparkOperation extends Operation with Logging { try { // Set active SparkSession - SparkSession.setActiveSession(sqlContext.sparkSession) + SparkSession.setActiveSessionInternal(sqlContext.sparkSession) // Set scheduler pool sqlContext.sparkSession.conf.getOption(SQLConf.THRIFTSERVER_POOL.key) match { @@ -81,8 +81,8 @@ private[hive] trait SparkOperation extends Operation with Logging { sqlContext.sparkContext.setLocalProperties(originalProps) originalSession match { - case Some(session) => SparkSession.setActiveSession(session) - case None => SparkSession.clearActiveSession() + case Some(session) => SparkSession.setActiveSessionInternal(session) + case None => SparkSession.clearActiveSessionInternal() } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index accfcb8d9deff..0c601ef798dcc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -195,7 +195,7 @@ private[hive] class TestHiveSparkSession( } SparkSession.setDefaultSession(this) - SparkSession.setActiveSession(this) + SparkSession.setActiveSessionInternal(this) { // set the metastore temporary configuration val metastoreTempConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false) ++ Map( From b69e0651fee0b8f3ae97ffdca713fb6578b9a0da Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 16 Oct 2020 11:11:57 +0000 Subject: [PATCH 0256/1009] [SPARK-33126][SQL] Simplify offset window function(Remove direction field) ### What changes were proposed in this pull request? The current `Lead`/`Lag` extends `OffsetWindowFunction`. `OffsetWindowFunction` contains field `direction` and use `direction` to calculates the `boundary`. We can use single literal expression unify the two properties. For example: 3 means `direction` is Asc and `boundary` is 3. -3 means `direction` is Desc and `boundary` is -3. ### Why are the changes needed? Improve the current implement of `Lead`/`Lag`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #30023 from beliefer/SPARK-33126. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../expressions/windowExpressions.scala | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 0e15ff2904306..bc0b4ac018f9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -348,16 +348,13 @@ abstract class OffsetWindowFunction /** * (Foldable) expression that contains the number of rows between the current row and the row - * where the input expression is evaluated. + * where the input expression is evaluated. If `offset` is a positive integer, it means that + * the direction of the `offset` is from front to back. If it is a negative integer, the direction + * of the `offset` is from back to front. If it is zero, it means that the offset is ignored and + * use current row. */ val offset: Expression - /** - * Direction of the number of rows between the current row and the row where the input expression - * is evaluated. - */ - val direction: SortDirection - override def children: Seq[Expression] = Seq(input, offset, default) /* @@ -373,16 +370,7 @@ abstract class OffsetWindowFunction override def nullable: Boolean = default == null || default.nullable || input.nullable - override lazy val frame: WindowFrame = { - val boundary = direction match { - case Ascending => offset - case Descending => UnaryMinus(offset) match { - case e: Expression if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) - case o => o - } - } - SpecifiedWindowFrame(RowFrame, boundary, boundary) - } + override lazy val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, offset, offset) override def checkInputDataTypes(): TypeCheckResult = { val check = super.checkInputDataTypes() @@ -444,8 +432,6 @@ case class Lead(input: Expression, offset: Expression, default: Expression) def this(input: Expression) = this(input, Literal(1)) def this() = this(Literal(null)) - - override val direction = Ascending } /** @@ -480,7 +466,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression) since = "2.0.0", group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab -case class Lag(input: Expression, offset: Expression, default: Expression) +case class Lag(input: Expression, inputOffset: Expression, default: Expression) extends OffsetWindowFunction { def this(input: Expression, offset: Expression) = this(input, offset, Literal(null)) @@ -489,7 +475,12 @@ case class Lag(input: Expression, offset: Expression, default: Expression) def this() = this(Literal(null)) - override val direction = Descending + override def children: Seq[Expression] = Seq(input, inputOffset, default) + + override val offset: Expression = UnaryMinus(inputOffset) match { + case e: Expression if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) + case o => o + } } abstract class AggregateWindowFunction extends DeclarativeAggregate with WindowFunction { From 3ae1520185e2d96d1bdbd08c989f0d48ad3ba578 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 16 Oct 2020 11:26:27 +0000 Subject: [PATCH 0257/1009] [SPARK-33131][SQL] Fix grouping sets with having clause can not resolve qualified col name ### What changes were proposed in this pull request? Correct the resolution of having clause. ### Why are the changes needed? Grouping sets construct new aggregate lost the qualified name of grouping expression. Here is a example: ``` -- Works resolved by `ResolveReferences` select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having c1 = 1 -- Works because of the extra expression c1 select c1 as c2 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1 -- Failed select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1 ``` It wroks with `Aggregate` without grouping sets through `ResolveReferences`, but Grouping sets not works since the exprId has been changed. ### Does this PR introduce _any_ user-facing change? Yes, bug fix. ### How was this patch tested? add test. Closes #30029 from ulysses-you/SPARK-33131. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../resources/sql-tests/inputs/having.sql | 6 ++++ .../sql-tests/results/having.sql.out | 32 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 337cf1c0bdc50..0ba150ec1efb4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -470,7 +470,7 @@ class Analyzer( */ private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = { groupByExprs.map { - case e: NamedExpression => Alias(e, e.name)() + case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier) case other => Alias(other, other.toString)() } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql index 3b75be19b5677..2799b1a94d085 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql @@ -24,3 +24,9 @@ SELECT SUM(a) AS b, CAST('2020-01-01' AS DATE) AS fake FROM VALUES (1, 10), (2, SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY GROUPING SETS ((b), (a, b)) HAVING b > 10; SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY CUBE(a, b) HAVING b > 10; SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) HAVING b > 10; + +-- SPARK-33131: Grouping sets with having clause can not resolve qualified col name. +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1; diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out index 1b3ac7865159f..6508143e6f9fe 100644 --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out @@ -81,3 +81,35 @@ SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) struct -- !query output 2 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1 +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1 +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1 +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1 +-- !query schema +struct +-- !query output +1 From e029e891abeb37f383e4d5237edf693c8ad53bed Mon Sep 17 00:00:00 2001 From: neko Date: Fri, 16 Oct 2020 23:13:22 +0800 Subject: [PATCH 0258/1009] [SPARK-33145][WEBUI] Fix when `Succeeded Jobs` has many child url elements,they will extend over the edge of the page ### What changes were proposed in this pull request? In Execution web page, when `Succeeded Job`(or Failed Jobs) has many child url elements,they will extend over the edge of the page. ### Why are the changes needed? To make the page more friendly. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Munual test result shows as below: ![fixed](https://user-images.githubusercontent.com/52202080/95977319-50734600-0e4b-11eb-93c0-b8deb565bcd8.png) Closes #30035 from akiyamaneko/sql_execution_job_overflow. Authored-by: neko Signed-off-by: Gengliang Wang --- .../apache/spark/sql/execution/ui/static/spark-sql-viz.css | 5 +++++ .../org/apache/spark/sql/execution/ui/ExecutionPage.scala | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css index 9a32b79cd070f..dbdbf9fbf57b1 100644 --- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css +++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css @@ -52,3 +52,8 @@ .tooltip-inner { word-wrap:break-word; } + +/* Breaks the long job url list when showing Details for Query in SQL */ +.job-url { + word-wrap: break-word; +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala index 76bc7faf18d01..b15c70a7eba75 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala @@ -45,7 +45,7 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging if (jobStatus == status) Some(jobId) else None } if (jobs.nonEmpty) { -

  4. +
  5. {label} {jobs.toSeq.sorted.map { jobId => {jobId.toString}  From 250730170570140788819765ee7519bd823c173d Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 16 Oct 2020 09:37:54 -0700 Subject: [PATCH 0259/1009] [SPARK-33159][SQL] Use hive-service-rpc as dependency instead of inlining the generated code ### What changes were proposed in this pull request? Hive's `hive-service-rpc` module started since hive-2.1.0 and it contains only the thrift IDL file and the code generated by it. Removing the inlined code will help maintain and upgrade builtin hive versions ### Why are the changes needed? to simply the code. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? passing CI Closes #30055 from yaooqinn/SPARK-33159. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 1 + dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 + pom.xml | 19 +- sql/hive-thriftserver/if/TCLIService.thrift | 1269 -- sql/hive-thriftserver/pom.xml | 24 +- .../service/rpc/thrift/TArrayTypeEntry.java | 387 - .../service/rpc/thrift/TBinaryColumn.java | 548 - .../hive/service/rpc/thrift/TBoolColumn.java | 548 - .../hive/service/rpc/thrift/TBoolValue.java | 390 - .../hive/service/rpc/thrift/TByteColumn.java | 548 - .../hive/service/rpc/thrift/TByteValue.java | 390 - .../hive/service/rpc/thrift/TCLIService.java | 18138 ---------------- .../rpc/thrift/TCLIServiceConstants.java | 106 - .../rpc/thrift/TCancelDelegationTokenReq.java | 495 - .../thrift/TCancelDelegationTokenResp.java | 394 - .../rpc/thrift/TCancelOperationReq.java | 394 - .../rpc/thrift/TCancelOperationResp.java | 394 - .../rpc/thrift/TCloseOperationReq.java | 394 - .../rpc/thrift/TCloseOperationResp.java | 394 - .../service/rpc/thrift/TCloseSessionReq.java | 394 - .../service/rpc/thrift/TCloseSessionResp.java | 394 - .../hive/service/rpc/thrift/TColumn.java | 736 - .../hive/service/rpc/thrift/TColumnDesc.java | 704 - .../hive/service/rpc/thrift/TColumnValue.java | 675 - .../service/rpc/thrift/TDoubleColumn.java | 548 - .../hive/service/rpc/thrift/TDoubleValue.java | 390 - .../rpc/thrift/TExecuteStatementReq.java | 863 - .../rpc/thrift/TExecuteStatementResp.java | 509 - .../service/rpc/thrift/TFetchOrientation.java | 57 - .../service/rpc/thrift/TFetchResultsReq.java | 714 - .../service/rpc/thrift/TFetchResultsResp.java | 612 - .../service/rpc/thrift/TGetCatalogsReq.java | 394 - .../service/rpc/thrift/TGetCatalogsResp.java | 509 - .../service/rpc/thrift/TGetColumnsReq.java | 822 - .../service/rpc/thrift/TGetColumnsResp.java | 509 - .../rpc/thrift/TGetCrossReferenceReq.java | 1034 - .../rpc/thrift/TGetCrossReferenceResp.java | 509 - .../rpc/thrift/TGetDelegationTokenReq.java | 596 - .../rpc/thrift/TGetDelegationTokenResp.java | 504 - .../service/rpc/thrift/TGetFunctionsReq.java | 711 - .../service/rpc/thrift/TGetFunctionsResp.java | 509 - .../hive/service/rpc/thrift/TGetInfoReq.java | 507 - .../hive/service/rpc/thrift/TGetInfoResp.java | 497 - .../hive/service/rpc/thrift/TGetInfoType.java | 180 - .../service/rpc/thrift/TGetInfoValue.java | 597 - .../rpc/thrift/TGetOperationStatusReq.java | 501 - .../rpc/thrift/TGetOperationStatusResp.java | 1342 -- .../rpc/thrift/TGetPrimaryKeysReq.java | 716 - .../rpc/thrift/TGetPrimaryKeysResp.java | 509 - .../rpc/thrift/TGetResultSetMetadataReq.java | 394 - .../rpc/thrift/TGetResultSetMetadataResp.java | 509 - .../service/rpc/thrift/TGetSchemasReq.java | 610 - .../service/rpc/thrift/TGetSchemasResp.java | 509 - .../service/rpc/thrift/TGetTableTypesReq.java | 394 - .../rpc/thrift/TGetTableTypesResp.java | 509 - .../service/rpc/thrift/TGetTablesReq.java | 871 - .../service/rpc/thrift/TGetTablesResp.java | 509 - .../service/rpc/thrift/TGetTypeInfoReq.java | 394 - .../service/rpc/thrift/TGetTypeInfoResp.java | 509 - .../service/rpc/thrift/THandleIdentifier.java | 508 - .../hive/service/rpc/thrift/TI16Column.java | 548 - .../hive/service/rpc/thrift/TI16Value.java | 390 - .../hive/service/rpc/thrift/TI32Column.java | 548 - .../hive/service/rpc/thrift/TI32Value.java | 390 - .../hive/service/rpc/thrift/TI64Column.java | 548 - .../hive/service/rpc/thrift/TI64Value.java | 390 - .../rpc/thrift/TJobExecutionStatus.java | 48 - .../service/rpc/thrift/TMapTypeEntry.java | 482 - .../service/rpc/thrift/TOpenSessionReq.java | 778 - .../service/rpc/thrift/TOpenSessionResp.java | 783 - .../service/rpc/thrift/TOperationHandle.java | 709 - .../service/rpc/thrift/TOperationState.java | 66 - .../service/rpc/thrift/TOperationType.java | 66 - .../rpc/thrift/TPrimitiveTypeEntry.java | 516 - .../rpc/thrift/TProgressUpdateResp.java | 1033 - .../service/rpc/thrift/TProtocolVersion.java | 69 - .../rpc/thrift/TRenewDelegationTokenReq.java | 495 - .../rpc/thrift/TRenewDelegationTokenResp.java | 394 - .../apache/hive/service/rpc/thrift/TRow.java | 443 - .../hive/service/rpc/thrift/TRowSet.java | 920 - .../service/rpc/thrift/TSessionHandle.java | 394 - .../hive/service/rpc/thrift/TStatus.java | 875 - .../hive/service/rpc/thrift/TStatusCode.java | 54 - .../service/rpc/thrift/TStringColumn.java | 548 - .../hive/service/rpc/thrift/TStringValue.java | 393 - .../service/rpc/thrift/TStructTypeEntry.java | 452 - .../hive/service/rpc/thrift/TTableSchema.java | 443 - .../hive/service/rpc/thrift/TTypeDesc.java | 443 - .../hive/service/rpc/thrift/TTypeEntry.java | 614 - .../hive/service/rpc/thrift/TTypeId.java | 105 - .../rpc/thrift/TTypeQualifierValue.java | 365 - .../service/rpc/thrift/TTypeQualifiers.java | 454 - .../service/rpc/thrift/TUnionTypeEntry.java | 452 - .../rpc/thrift/TUserDefinedTypeEntry.java | 389 - 94 files changed, 17 insertions(+), 63672 deletions(-) delete mode 100644 sql/hive-thriftserver/if/TCLIService.thrift delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index f049ad1f5bb74..c389c885cb0e5 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -88,6 +88,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar hive-metastore/2.3.7//hive-metastore-2.3.7.jar hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index a4dbeb112473a..ed0db42828301 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -87,6 +87,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar hive-metastore/2.3.7//hive-metastore-2.3.7.jar hive-serde/2.3.7//hive-serde-2.3.7.jar +hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar diff --git a/pom.xml b/pom.xml index 75b6776cbe470..96406d9bcef13 100644 --- a/pom.xml +++ b/pom.xml @@ -1533,7 +1533,6 @@ hive-service - ${hive.group} hive-service-rpc @@ -1593,11 +1592,6 @@ ${hive.group} hive-service - - - ${hive.group} - hive-service-rpc - ${hive.group} hive-shims @@ -1852,7 +1846,6 @@ hive-service - ${hive.group} hive-service-rpc @@ -2007,7 +2000,6 @@ - ${hive.group} hive-service-rpc @@ -2029,6 +2021,17 @@ + + ${hive.group} + hive-service-rpc + ${hive.version} + + + * + * + + + net.sf.jpam jpam diff --git a/sql/hive-thriftserver/if/TCLIService.thrift b/sql/hive-thriftserver/if/TCLIService.thrift deleted file mode 100644 index 9026cd25df5b3..0000000000000 --- a/sql/hive-thriftserver/if/TCLIService.thrift +++ /dev/null @@ -1,1269 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Coding Conventions for this file: -// -// Structs/Enums/Unions -// * Struct, Enum, and Union names begin with a "T", -// and use a capital letter for each new word, with no underscores. -// * All fields should be declared as either optional or required. -// -// Functions -// * Function names start with a capital letter and have a capital letter for -// each new word, with no underscores. -// * Each function should take exactly one parameter, named TFunctionNameReq, -// and should return either void or TFunctionNameResp. This convention allows -// incremental updates. -// -// Services -// * Service names begin with the letter "T", use a capital letter for each -// new word (with no underscores), and end with the word "Service". - -namespace java org.apache.hive.service.rpc.thrift -namespace cpp apache.hive.service.rpc.thrift - -// List of protocol versions. A new token should be -// added to the end of this list every time a change is made. -enum TProtocolVersion { - HIVE_CLI_SERVICE_PROTOCOL_V1, - - // V2 adds support for asynchronous execution - HIVE_CLI_SERVICE_PROTOCOL_V2 - - // V3 add varchar type, primitive type qualifiers - HIVE_CLI_SERVICE_PROTOCOL_V3 - - // V4 add decimal precision/scale, char type - HIVE_CLI_SERVICE_PROTOCOL_V4 - - // V5 adds error details when GetOperationStatus returns in error state - HIVE_CLI_SERVICE_PROTOCOL_V5 - - // V6 uses binary type for binary payload (was string) and uses columnar result set - HIVE_CLI_SERVICE_PROTOCOL_V6 - - // V7 adds support for delegation token based connection - HIVE_CLI_SERVICE_PROTOCOL_V7 - - // V8 adds support for interval types - HIVE_CLI_SERVICE_PROTOCOL_V8 - - // V9 adds support for serializing ResultSets in SerDe - HIVE_CLI_SERVICE_PROTOCOL_V9 - - // V10 adds support for in place updates via GetOperationStatus - HIVE_CLI_SERVICE_PROTOCOL_V10 -} - -enum TTypeId { - BOOLEAN_TYPE, - TINYINT_TYPE, - SMALLINT_TYPE, - INT_TYPE, - BIGINT_TYPE, - FLOAT_TYPE, - DOUBLE_TYPE, - STRING_TYPE, - TIMESTAMP_TYPE, - BINARY_TYPE, - ARRAY_TYPE, - MAP_TYPE, - STRUCT_TYPE, - UNION_TYPE, - USER_DEFINED_TYPE, - DECIMAL_TYPE, - NULL_TYPE, - DATE_TYPE, - VARCHAR_TYPE, - CHAR_TYPE, - INTERVAL_YEAR_MONTH_TYPE, - INTERVAL_DAY_TIME_TYPE -} - -const set PRIMITIVE_TYPES = [ - TTypeId.BOOLEAN_TYPE, - TTypeId.TINYINT_TYPE, - TTypeId.SMALLINT_TYPE, - TTypeId.INT_TYPE, - TTypeId.BIGINT_TYPE, - TTypeId.FLOAT_TYPE, - TTypeId.DOUBLE_TYPE, - TTypeId.STRING_TYPE, - TTypeId.TIMESTAMP_TYPE, - TTypeId.BINARY_TYPE, - TTypeId.DECIMAL_TYPE, - TTypeId.NULL_TYPE, - TTypeId.DATE_TYPE, - TTypeId.VARCHAR_TYPE, - TTypeId.CHAR_TYPE, - TTypeId.INTERVAL_YEAR_MONTH_TYPE, - TTypeId.INTERVAL_DAY_TIME_TYPE -] - -const set COMPLEX_TYPES = [ - TTypeId.ARRAY_TYPE - TTypeId.MAP_TYPE - TTypeId.STRUCT_TYPE - TTypeId.UNION_TYPE - TTypeId.USER_DEFINED_TYPE -] - -const set COLLECTION_TYPES = [ - TTypeId.ARRAY_TYPE - TTypeId.MAP_TYPE -] - -const map TYPE_NAMES = { - TTypeId.BOOLEAN_TYPE: "BOOLEAN", - TTypeId.TINYINT_TYPE: "TINYINT", - TTypeId.SMALLINT_TYPE: "SMALLINT", - TTypeId.INT_TYPE: "INT", - TTypeId.BIGINT_TYPE: "BIGINT", - TTypeId.FLOAT_TYPE: "FLOAT", - TTypeId.DOUBLE_TYPE: "DOUBLE", - TTypeId.STRING_TYPE: "STRING", - TTypeId.TIMESTAMP_TYPE: "TIMESTAMP", - TTypeId.BINARY_TYPE: "BINARY", - TTypeId.ARRAY_TYPE: "ARRAY", - TTypeId.MAP_TYPE: "MAP", - TTypeId.STRUCT_TYPE: "STRUCT", - TTypeId.UNION_TYPE: "UNIONTYPE", - TTypeId.DECIMAL_TYPE: "DECIMAL", - TTypeId.NULL_TYPE: "NULL" - TTypeId.DATE_TYPE: "DATE" - TTypeId.VARCHAR_TYPE: "VARCHAR" - TTypeId.CHAR_TYPE: "CHAR" - TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH" - TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME" -} - -// Thrift does not support recursively defined types or forward declarations, -// which makes it difficult to represent Hive's nested types. -// To get around these limitations TTypeDesc employs a type list that maps -// integer "pointers" to TTypeEntry objects. The following examples show -// how different types are represented using this scheme: -// -// "INT": -// TTypeDesc { -// types = [ -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// } -// ] -// } -// -// "ARRAY": -// TTypeDesc { -// types = [ -// TTypeEntry.array_entry { -// object_type_ptr = 1 -// }, -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// } -// ] -// } -// -// "MAP": -// TTypeDesc { -// types = [ -// TTypeEntry.map_entry { -// key_type_ptr = 1 -// value_type_ptr = 2 -// }, -// TTypeEntry.primitive_entry { -// type = INT_TYPE -// }, -// TTypeEntry.primitive_entry { -// type = STRING_TYPE -// } -// ] -// } - -typedef i32 TTypeEntryPtr - -// Valid TTypeQualifiers key names -const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength" - -// Type qualifier key name for decimal -const string PRECISION = "precision" -const string SCALE = "scale" - -union TTypeQualifierValue { - 1: optional i32 i32Value - 2: optional string stringValue -} - -// Type qualifiers for primitive type. -struct TTypeQualifiers { - 1: required map qualifiers -} - -// Type entry for a primitive type. -struct TPrimitiveTypeEntry { - // The primitive type token. This must satisfy the condition - // that type is in the PRIMITIVE_TYPES set. - 1: required TTypeId type - 2: optional TTypeQualifiers typeQualifiers -} - -// Type entry for an ARRAY type. -struct TArrayTypeEntry { - 1: required TTypeEntryPtr objectTypePtr -} - -// Type entry for a MAP type. -struct TMapTypeEntry { - 1: required TTypeEntryPtr keyTypePtr - 2: required TTypeEntryPtr valueTypePtr -} - -// Type entry for a STRUCT type. -struct TStructTypeEntry { - 1: required map nameToTypePtr -} - -// Type entry for a UNIONTYPE type. -struct TUnionTypeEntry { - 1: required map nameToTypePtr -} - -struct TUserDefinedTypeEntry { - // The fully qualified name of the class implementing this type. - 1: required string typeClassName -} - -// We use a union here since Thrift does not support inheritance. -union TTypeEntry { - 1: TPrimitiveTypeEntry primitiveEntry - 2: TArrayTypeEntry arrayEntry - 3: TMapTypeEntry mapEntry - 4: TStructTypeEntry structEntry - 5: TUnionTypeEntry unionEntry - 6: TUserDefinedTypeEntry userDefinedTypeEntry -} - -// Type descriptor for columns. -struct TTypeDesc { - // The "top" type is always the first element of the list. - // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE - // type, then subsequent elements represent nested types. - 1: required list types -} - -// A result set column descriptor. -struct TColumnDesc { - // The name of the column - 1: required string columnName - - // The type descriptor for this column - 2: required TTypeDesc typeDesc - - // The ordinal position of this column in the schema - 3: required i32 position - - 4: optional string comment -} - -// Metadata used to describe the schema (column names, types, comments) -// of result sets. -struct TTableSchema { - 1: required list columns -} - -// A Boolean column value. -struct TBoolValue { - // NULL if value is unset. - 1: optional bool value -} - -// A Byte column value. -struct TByteValue { - // NULL if value is unset. - 1: optional byte value -} - -// A signed, 16 bit column value. -struct TI16Value { - // NULL if value is unset - 1: optional i16 value -} - -// A signed, 32 bit column value -struct TI32Value { - // NULL if value is unset - 1: optional i32 value -} - -// A signed 64 bit column value -struct TI64Value { - // NULL if value is unset - 1: optional i64 value -} - -// A floating point 64 bit column value -struct TDoubleValue { - // NULL if value is unset - 1: optional double value -} - -struct TStringValue { - // NULL if value is unset - 1: optional string value -} - -// A single column value in a result set. -// Note that Hive's type system is richer than Thrift's, -// so in some cases we have to map multiple Hive types -// to the same Thrift type. On the client-side this is -// disambiguated by looking at the Schema of the -// result set. -union TColumnValue { - 1: TBoolValue boolVal // BOOLEAN - 2: TByteValue byteVal // TINYINT - 3: TI16Value i16Val // SMALLINT - 4: TI32Value i32Val // INT - 5: TI64Value i64Val // BIGINT, TIMESTAMP - 6: TDoubleValue doubleVal // FLOAT, DOUBLE - 7: TStringValue stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME -} - -// Represents a row in a rowset. -struct TRow { - 1: required list colVals -} - -struct TBoolColumn { - 1: required list values - 2: required binary nulls -} - -struct TByteColumn { - 1: required list values - 2: required binary nulls -} - -struct TI16Column { - 1: required list values - 2: required binary nulls -} - -struct TI32Column { - 1: required list values - 2: required binary nulls -} - -struct TI64Column { - 1: required list values - 2: required binary nulls -} - -struct TDoubleColumn { - 1: required list values - 2: required binary nulls -} - -struct TStringColumn { - 1: required list values - 2: required binary nulls -} - -struct TBinaryColumn { - 1: required list values - 2: required binary nulls -} - -// Note that Hive's type system is richer than Thrift's, -// so in some cases we have to map multiple Hive types -// to the same Thrift type. On the client-side this is -// disambiguated by looking at the Schema of the -// result set. -union TColumn { - 1: TBoolColumn boolVal // BOOLEAN - 2: TByteColumn byteVal // TINYINT - 3: TI16Column i16Val // SMALLINT - 4: TI32Column i32Val // INT - 5: TI64Column i64Val // BIGINT, TIMESTAMP - 6: TDoubleColumn doubleVal // FLOAT, DOUBLE - 7: TStringColumn stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL - 8: TBinaryColumn binaryVal // BINARY -} - -// Represents a rowset -struct TRowSet { - // The starting row offset of this rowset. - 1: required i64 startRowOffset - 2: required list rows - 3: optional list columns - 4: optional binary binaryColumns - 5: optional i32 columnCount -} - -// The return status code contained in each response. -enum TStatusCode { - SUCCESS_STATUS, - SUCCESS_WITH_INFO_STATUS, - STILL_EXECUTING_STATUS, - ERROR_STATUS, - INVALID_HANDLE_STATUS -} - -// The return status of a remote request -struct TStatus { - 1: required TStatusCode statusCode - - // If status is SUCCESS_WITH_INFO, info_msgs may be populated with - // additional diagnostic information. - 2: optional list infoMessages - - // If status is ERROR, then the following fields may be set - 3: optional string sqlState // as defined in the ISO/IEF CLI specification - 4: optional i32 errorCode // internal error code - 5: optional string errorMessage -} - -// The state of an operation (i.e. a query or other -// asynchronous operation that generates a result set) -// on the server. -enum TOperationState { - // The operation has been initialized - INITIALIZED_STATE, - - // The operation is running. In this state the result - // set is not available. - RUNNING_STATE, - - // The operation has completed. When an operation is in - // this state its result set may be fetched. - FINISHED_STATE, - - // The operation was canceled by a client - CANCELED_STATE, - - // The operation was closed by a client - CLOSED_STATE, - - // The operation failed due to an error - ERROR_STATE, - - // The operation is in an unrecognized state - UKNOWN_STATE, - - // The operation is in an pending state - PENDING_STATE, - - // The operation is in an timedout state - TIMEDOUT_STATE, -} - -// A string identifier. This is interpreted literally. -typedef string TIdentifier - -// A search pattern. -// -// Valid search pattern characters: -// '_': Any single character. -// '%': Any sequence of zero or more characters. -// '\': Escape character used to include special characters, -// e.g. '_', '%', '\'. If a '\' precedes a non-special -// character it has no special meaning and is interpreted -// literally. -typedef string TPattern - - -// A search pattern or identifier. Used as input -// parameter for many of the catalog functions. -typedef string TPatternOrIdentifier - -struct THandleIdentifier { - // 16 byte globally unique identifier - // This is the public ID of the handle and - // can be used for reporting. - 1: required binary guid, - - // 16 byte secret generated by the server - // and used to verify that the handle is not - // being hijacked by another user. - 2: required binary secret, -} - -// Client-side handle to persistent -// session information on the server-side. -struct TSessionHandle { - 1: required THandleIdentifier sessionId -} - -// The subtype of an OperationHandle. -enum TOperationType { - EXECUTE_STATEMENT, - GET_TYPE_INFO, - GET_CATALOGS, - GET_SCHEMAS, - GET_TABLES, - GET_TABLE_TYPES, - GET_COLUMNS, - GET_FUNCTIONS, - UNKNOWN, -} - -// Client-side reference to a task running -// asynchronously on the server. -struct TOperationHandle { - 1: required THandleIdentifier operationId - 2: required TOperationType operationType - - // If hasResultSet = TRUE, then this operation - // generates a result set that can be fetched. - // Note that the result set may be empty. - // - // If hasResultSet = FALSE, then this operation - // does not generate a result set, and calling - // GetResultSetMetadata or FetchResults against - // this OperationHandle will generate an error. - 3: required bool hasResultSet - - // For operations that don't generate result sets, - // modifiedRowCount is either: - // - // 1) The number of rows that were modified by - // the DML operation (e.g. number of rows inserted, - // number of rows deleted, etc). - // - // 2) 0 for operations that don't modify or add rows. - // - // 3) < 0 if the operation is capable of modifiying rows, - // but Hive is unable to determine how many rows were - // modified. For example, Hive's LOAD DATA command - // doesn't generate row count information because - // Hive doesn't inspect the data as it is loaded. - // - // modifiedRowCount is unset if the operation generates - // a result set. - 4: optional double modifiedRowCount -} - - -// OpenSession() -// -// Open a session (connection) on the server against -// which operations may be executed. -struct TOpenSessionReq { - // The version of the HiveServer2 protocol that the client is using. - 1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10 - - // Username and password for authentication. - // Depending on the authentication scheme being used, - // this information may instead be provided by a lower - // protocol layer, in which case these fields may be - // left unset. - 2: optional string username - 3: optional string password - - // Configuration overlay which is applied when the session is - // first created. - 4: optional map configuration -} - -struct TOpenSessionResp { - 1: required TStatus status - - // The protocol version that the server is using. - 2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10 - - // Session Handle - 3: optional TSessionHandle sessionHandle - - // The configuration settings for this session. - 4: optional map configuration -} - - -// CloseSession() -// -// Closes the specified session and frees any resources -// currently allocated to that session. Any open -// operations in that session will be canceled. -struct TCloseSessionReq { - 1: required TSessionHandle sessionHandle -} - -struct TCloseSessionResp { - 1: required TStatus status -} - - - -enum TGetInfoType { - CLI_MAX_DRIVER_CONNECTIONS = 0, - CLI_MAX_CONCURRENT_ACTIVITIES = 1, - CLI_DATA_SOURCE_NAME = 2, - CLI_FETCH_DIRECTION = 8, - CLI_SERVER_NAME = 13, - CLI_SEARCH_PATTERN_ESCAPE = 14, - CLI_DBMS_NAME = 17, - CLI_DBMS_VER = 18, - CLI_ACCESSIBLE_TABLES = 19, - CLI_ACCESSIBLE_PROCEDURES = 20, - CLI_CURSOR_COMMIT_BEHAVIOR = 23, - CLI_DATA_SOURCE_READ_ONLY = 25, - CLI_DEFAULT_TXN_ISOLATION = 26, - CLI_IDENTIFIER_CASE = 28, - CLI_IDENTIFIER_QUOTE_CHAR = 29, - CLI_MAX_COLUMN_NAME_LEN = 30, - CLI_MAX_CURSOR_NAME_LEN = 31, - CLI_MAX_SCHEMA_NAME_LEN = 32, - CLI_MAX_CATALOG_NAME_LEN = 34, - CLI_MAX_TABLE_NAME_LEN = 35, - CLI_SCROLL_CONCURRENCY = 43, - CLI_TXN_CAPABLE = 46, - CLI_USER_NAME = 47, - CLI_TXN_ISOLATION_OPTION = 72, - CLI_INTEGRITY = 73, - CLI_GETDATA_EXTENSIONS = 81, - CLI_NULL_COLLATION = 85, - CLI_ALTER_TABLE = 86, - CLI_ORDER_BY_COLUMNS_IN_SELECT = 90, - CLI_SPECIAL_CHARACTERS = 94, - CLI_MAX_COLUMNS_IN_GROUP_BY = 97, - CLI_MAX_COLUMNS_IN_INDEX = 98, - CLI_MAX_COLUMNS_IN_ORDER_BY = 99, - CLI_MAX_COLUMNS_IN_SELECT = 100, - CLI_MAX_COLUMNS_IN_TABLE = 101, - CLI_MAX_INDEX_SIZE = 102, - CLI_MAX_ROW_SIZE = 104, - CLI_MAX_STATEMENT_LEN = 105, - CLI_MAX_TABLES_IN_SELECT = 106, - CLI_MAX_USER_NAME_LEN = 107, - CLI_OJ_CAPABILITIES = 115, - - CLI_XOPEN_CLI_YEAR = 10000, - CLI_CURSOR_SENSITIVITY = 10001, - CLI_DESCRIBE_PARAMETER = 10002, - CLI_CATALOG_NAME = 10003, - CLI_COLLATION_SEQ = 10004, - CLI_MAX_IDENTIFIER_LEN = 10005, -} - -union TGetInfoValue { - 1: string stringValue - 2: i16 smallIntValue - 3: i32 integerBitmask - 4: i32 integerFlag - 5: i32 binaryValue - 6: i64 lenValue -} - -// GetInfo() -// -// This function is based on ODBC's CLIGetInfo() function. -// The function returns general information about the data source -// using the same keys as ODBC. -struct TGetInfoReq { - // The sesssion to run this request against - 1: required TSessionHandle sessionHandle - - 2: required TGetInfoType infoType -} - -struct TGetInfoResp { - 1: required TStatus status - - 2: required TGetInfoValue infoValue -} - - -// ExecuteStatement() -// -// Execute a statement. -// The returned OperationHandle can be used to check on the -// status of the statement, and to fetch results once the -// statement has finished executing. -struct TExecuteStatementReq { - // The session to execute the statement against - 1: required TSessionHandle sessionHandle - - // The statement to be executed (DML, DDL, SET, etc) - 2: required string statement - - // Configuration properties that are overlayed on top of the - // the existing session configuration before this statement - // is executed. These properties apply to this statement - // only and will not affect the subsequent state of the Session. - 3: optional map confOverlay - - // Execute asynchronously when runAsync is true - 4: optional bool runAsync = false - - // The number of seconds after which the query will timeout on the server - 5: optional i64 queryTimeout = 0 -} - -struct TExecuteStatementResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - -// GetTypeInfo() -// -// Get information about types supported by the HiveServer instance. -// The information is returned as a result set which can be fetched -// using the OperationHandle provided in the response. -// -// Refer to the documentation for ODBC's CLIGetTypeInfo function for -// the format of the result set. -struct TGetTypeInfoReq { - // The session to run this request against. - 1: required TSessionHandle sessionHandle -} - -struct TGetTypeInfoResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetCatalogs() -// -// Returns the list of catalogs (databases) -// Results are ordered by TABLE_CATALOG -// -// Resultset columns : -// col1 -// name: TABLE_CAT -// type: STRING -// desc: Catalog name. NULL if not applicable. -// -struct TGetCatalogsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle -} - -struct TGetCatalogsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetSchemas() -// -// Retrieves the schema names available in this database. -// The results are ordered by TABLE_CATALOG and TABLE_SCHEM. -// col1 -// name: TABLE_SCHEM -// type: STRING -// desc: schema name -// col2 -// name: TABLE_CATALOG -// type: STRING -// desc: catalog name -struct TGetSchemasReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog. Must not contain a search pattern. - 2: optional TIdentifier catalogName - - // schema name or pattern - 3: optional TPatternOrIdentifier schemaName -} - -struct TGetSchemasResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetTables() -// -// Returns a list of tables with catalog, schema, and table -// type information. The information is returned as a result -// set which can be fetched using the OperationHandle -// provided in the response. -// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME -// -// Result Set Columns: -// -// col1 -// name: TABLE_CAT -// type: STRING -// desc: Catalog name. NULL if not applicable. -// -// col2 -// name: TABLE_SCHEM -// type: STRING -// desc: Schema name. -// -// col3 -// name: TABLE_NAME -// type: STRING -// desc: Table name. -// -// col4 -// name: TABLE_TYPE -// type: STRING -// desc: The table type, e.g. "TABLE", "VIEW", etc. -// -// col5 -// name: REMARKS -// type: STRING -// desc: Comments about the table -// -struct TGetTablesReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog or a search pattern. - 2: optional TPatternOrIdentifier catalogName - - // Name of the schema or a search pattern. - 3: optional TPatternOrIdentifier schemaName - - // Name of the table or a search pattern. - 4: optional TPatternOrIdentifier tableName - - // List of table types to match - // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY", - // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc. - 5: optional list tableTypes -} - -struct TGetTablesResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetTableTypes() -// -// Returns the table types available in this database. -// The results are ordered by table type. -// -// col1 -// name: TABLE_TYPE -// type: STRING -// desc: Table type name. -struct TGetTableTypesReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle -} - -struct TGetTableTypesResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetColumns() -// -// Returns a list of columns in the specified tables. -// The information is returned as a result set which can be fetched -// using the OperationHandle provided in the response. -// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME, -// and ORDINAL_POSITION. -// -// Result Set Columns are the same as those for the ODBC CLIColumns -// function. -// -struct TGetColumnsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog. Must not contain a search pattern. - 2: optional TIdentifier catalogName - - // Schema name or search pattern - 3: optional TPatternOrIdentifier schemaName - - // Table name or search pattern - 4: optional TPatternOrIdentifier tableName - - // Column name or search pattern - 5: optional TPatternOrIdentifier columnName -} - -struct TGetColumnsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - - -// GetFunctions() -// -// Returns a list of functions supported by the data source. The -// behavior of this function matches -// java.sql.DatabaseMetaData.getFunctions() both in terms of -// inputs and outputs. -// -// Result Set Columns: -// -// col1 -// name: FUNCTION_CAT -// type: STRING -// desc: Function catalog (may be null) -// -// col2 -// name: FUNCTION_SCHEM -// type: STRING -// desc: Function schema (may be null) -// -// col3 -// name: FUNCTION_NAME -// type: STRING -// desc: Function name. This is the name used to invoke the function. -// -// col4 -// name: REMARKS -// type: STRING -// desc: Explanatory comment on the function. -// -// col5 -// name: FUNCTION_TYPE -// type: SMALLINT -// desc: Kind of function. One of: -// * functionResultUnknown - Cannot determine if a return value or a table -// will be returned. -// * functionNoTable - Does not a return a table. -// * functionReturnsTable - Returns a table. -// -// col6 -// name: SPECIFIC_NAME -// type: STRING -// desc: The name which uniquely identifies this function within its schema. -// In this case this is the fully qualified class name of the class -// that implements this function. -// -struct TGetFunctionsReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // A catalog name; must match the catalog name as it is stored in the - // database; "" retrieves those without a catalog; null means - // that the catalog name should not be used to narrow the search. - 2: optional TIdentifier catalogName - - // A schema name pattern; must match the schema name as it is stored - // in the database; "" retrieves those without a schema; null means - // that the schema name should not be used to narrow the search. - 3: optional TPatternOrIdentifier schemaName - - // A function name pattern; must match the function name as it is stored - // in the database. - 4: required TPatternOrIdentifier functionName -} - -struct TGetFunctionsResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - -struct TGetPrimaryKeysReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the catalog. - 2: optional TIdentifier catalogName - - // Name of the schema. - 3: optional TIdentifier schemaName - - // Name of the table. - 4: optional TIdentifier tableName -} - -struct TGetPrimaryKeysResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - -struct TGetCrossReferenceReq { - // Session to run this request against - 1: required TSessionHandle sessionHandle - - // Name of the parent catalog. - 2: optional TIdentifier parentCatalogName - - // Name of the parent schema. - 3: optional TIdentifier parentSchemaName - - // Name of the parent table. - 4: optional TIdentifier parentTableName - - // Name of the foreign catalog. - 5: optional TIdentifier foreignCatalogName - - // Name of the foreign schema. - 6: optional TIdentifier foreignSchemaName - - // Name of the foreign table. - 7: optional TIdentifier foreignTableName -} - -struct TGetCrossReferenceResp { - 1: required TStatus status - 2: optional TOperationHandle operationHandle -} - -// GetOperationStatus() -// -// Get the status of an operation running on the server. -struct TGetOperationStatusReq { - // Session to run this request against - 1: required TOperationHandle operationHandle - // optional arguments to get progress information - 2: optional bool getProgressUpdate -} - -struct TGetOperationStatusResp { - 1: required TStatus status - 2: optional TOperationState operationState - - // If operationState is ERROR_STATE, then the following fields may be set - // sqlState as defined in the ISO/IEF CLI specification - 3: optional string sqlState - - // Internal error code - 4: optional i32 errorCode - - // Error message - 5: optional string errorMessage - - // List of statuses of sub tasks - 6: optional string taskStatus - - // When was the operation started - 7: optional i64 operationStarted - - // When was the operation completed - 8: optional i64 operationCompleted - - // If the operation has the result - 9: optional bool hasResultSet - - 10: optional TProgressUpdateResp progressUpdateResponse - -} - - -// CancelOperation() -// -// Cancels processing on the specified operation handle and -// frees any resources which were allocated. -struct TCancelOperationReq { - // Operation to cancel - 1: required TOperationHandle operationHandle -} - -struct TCancelOperationResp { - 1: required TStatus status -} - - -// CloseOperation() -// -// Given an operation in the FINISHED, CANCELED, -// or ERROR states, CloseOperation() will free -// all of the resources which were allocated on -// the server to service the operation. -struct TCloseOperationReq { - 1: required TOperationHandle operationHandle -} - -struct TCloseOperationResp { - 1: required TStatus status -} - - -// GetResultSetMetadata() -// -// Retrieves schema information for the specified operation -struct TGetResultSetMetadataReq { - // Operation for which to fetch result set schema information - 1: required TOperationHandle operationHandle -} - -struct TGetResultSetMetadataResp { - 1: required TStatus status - 2: optional TTableSchema schema -} - - -enum TFetchOrientation { - // Get the next rowset. The fetch offset is ignored. - FETCH_NEXT, - - // Get the previous rowset. The fetch offset is ignored. - FETCH_PRIOR, - - // Return the rowset at the given fetch offset relative - // to the curren rowset. - // NOT SUPPORTED - FETCH_RELATIVE, - - // Return the rowset at the specified fetch offset. - // NOT SUPPORTED - FETCH_ABSOLUTE, - - // Get the first rowset in the result set. - FETCH_FIRST, - - // Get the last rowset in the result set. - // NOT SUPPORTED - FETCH_LAST -} - -// FetchResults() -// -// Fetch rows from the server corresponding to -// a particular OperationHandle. -struct TFetchResultsReq { - // Operation from which to fetch results. - 1: required TOperationHandle operationHandle - - // The fetch orientation. This must be either - // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT. - 2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT - - // Max number of rows that should be returned in - // the rowset. - 3: required i64 maxRows - - // The type of a fetch results request. 0 represents Query output. 1 represents Log - 4: optional i16 fetchType = 0 -} - -struct TFetchResultsResp { - 1: required TStatus status - - // TRUE if there are more rows left to fetch from the server. - 2: optional bool hasMoreRows - - // The rowset. This is optional so that we have the - // option in the future of adding alternate formats for - // representing result set data, e.g. delimited strings, - // binary encoded, etc. - 3: optional TRowSet results -} - -// GetDelegationToken() -// Retrieve delegation token for the current user -struct TGetDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // userid for the proxy user - 2: required string owner - - // designated renewer userid - 3: required string renewer -} - -struct TGetDelegationTokenResp { - // status of the request - 1: required TStatus status - - // delegation token string - 2: optional string delegationToken -} - -// CancelDelegationToken() -// Cancel the given delegation token -struct TCancelDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // delegation token to cancel - 2: required string delegationToken -} - -struct TCancelDelegationTokenResp { - // status of the request - 1: required TStatus status -} - -// RenewDelegationToken() -// Renew the given delegation token -struct TRenewDelegationTokenReq { - // session handle - 1: required TSessionHandle sessionHandle - - // delegation token to renew - 2: required string delegationToken -} - -struct TRenewDelegationTokenResp { - // status of the request - 1: required TStatus status -} - -enum TJobExecutionStatus { - IN_PROGRESS, - COMPLETE, - NOT_AVAILABLE -} - -struct TProgressUpdateResp { - 1: required list headerNames - 2: required list> rows - 3: required double progressedPercentage - 4: required TJobExecutionStatus status - 5: required string footerSummary - 6: required i64 startTime -} - -service TCLIService { - - TOpenSessionResp OpenSession(1:TOpenSessionReq req); - - TCloseSessionResp CloseSession(1:TCloseSessionReq req); - - TGetInfoResp GetInfo(1:TGetInfoReq req); - - TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req); - - TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req); - - TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req); - - TGetSchemasResp GetSchemas(1:TGetSchemasReq req); - - TGetTablesResp GetTables(1:TGetTablesReq req); - - TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req); - - TGetColumnsResp GetColumns(1:TGetColumnsReq req); - - TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req); - - TGetPrimaryKeysResp GetPrimaryKeys(1:TGetPrimaryKeysReq req); - - TGetCrossReferenceResp GetCrossReference(1:TGetCrossReferenceReq req); - - TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req); - - TCancelOperationResp CancelOperation(1:TCancelOperationReq req); - - TCloseOperationResp CloseOperation(1:TCloseOperationReq req); - - TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req); - - TFetchResultsResp FetchResults(1:TFetchResultsReq req); - - TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req); - - TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req); - - TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req); -} diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 4a96afe9df20a..9cd8adb6cb4df 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -77,6 +77,10 @@ ${hive.group} hive-beeline + + ${hive.group} + hive-service-rpc + org.eclipse.jetty jetty-server @@ -133,25 +137,5 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - src/gen/java - - - - - - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java deleted file mode 100644 index 358e322632144..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java +++ /dev/null @@ -1,387 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TArrayTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TArrayTypeEntry"); - - private static final org.apache.thrift.protocol.TField OBJECT_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("objectTypePtr", org.apache.thrift.protocol.TType.I32, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TArrayTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TArrayTypeEntryTupleSchemeFactory()); - } - - private int objectTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OBJECT_TYPE_PTR((short)1, "objectTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OBJECT_TYPE_PTR - return OBJECT_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __OBJECTTYPEPTR_ISSET_ID = 0; - private byte __isset_bitfield = 0; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OBJECT_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("objectTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TArrayTypeEntry.class, metaDataMap); - } - - public TArrayTypeEntry() { - } - - public TArrayTypeEntry( - int objectTypePtr) - { - this(); - this.objectTypePtr = objectTypePtr; - setObjectTypePtrIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TArrayTypeEntry(TArrayTypeEntry other) { - __isset_bitfield = other.__isset_bitfield; - this.objectTypePtr = other.objectTypePtr; - } - - public TArrayTypeEntry deepCopy() { - return new TArrayTypeEntry(this); - } - - @Override - public void clear() { - setObjectTypePtrIsSet(false); - this.objectTypePtr = 0; - } - - public int getObjectTypePtr() { - return this.objectTypePtr; - } - - public void setObjectTypePtr(int objectTypePtr) { - this.objectTypePtr = objectTypePtr; - setObjectTypePtrIsSet(true); - } - - public void unsetObjectTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID); - } - - /** Returns true if field objectTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetObjectTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID); - } - - public void setObjectTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OBJECT_TYPE_PTR: - if (value == null) { - unsetObjectTypePtr(); - } else { - setObjectTypePtr((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OBJECT_TYPE_PTR: - return getObjectTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OBJECT_TYPE_PTR: - return isSetObjectTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TArrayTypeEntry) - return this.equals((TArrayTypeEntry)that); - return false; - } - - public boolean equals(TArrayTypeEntry that) { - if (that == null) - return false; - - boolean this_present_objectTypePtr = true; - boolean that_present_objectTypePtr = true; - if (this_present_objectTypePtr || that_present_objectTypePtr) { - if (!(this_present_objectTypePtr && that_present_objectTypePtr)) - return false; - if (this.objectTypePtr != that.objectTypePtr) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_objectTypePtr = true; - list.add(present_objectTypePtr); - if (present_objectTypePtr) - list.add(objectTypePtr); - - return list.hashCode(); - } - - @Override - public int compareTo(TArrayTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetObjectTypePtr()).compareTo(other.isSetObjectTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetObjectTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.objectTypePtr, other.objectTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TArrayTypeEntry("); - boolean first = true; - - sb.append("objectTypePtr:"); - sb.append(this.objectTypePtr); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetObjectTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'objectTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TArrayTypeEntryStandardSchemeFactory implements SchemeFactory { - public TArrayTypeEntryStandardScheme getScheme() { - return new TArrayTypeEntryStandardScheme(); - } - } - - private static class TArrayTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OBJECT_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.objectTypePtr = iprot.readI32(); - struct.setObjectTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(OBJECT_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.objectTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TArrayTypeEntryTupleSchemeFactory implements SchemeFactory { - public TArrayTypeEntryTupleScheme getScheme() { - return new TArrayTypeEntryTupleScheme(); - } - } - - private static class TArrayTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.objectTypePtr); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.objectTypePtr = iprot.readI32(); - struct.setObjectTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java deleted file mode 100644 index a869cee007c0b..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TBinaryColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBinaryColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBinaryColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBinaryColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBinaryColumn.class, metaDataMap); - } - - public TBinaryColumn() { - } - - public TBinaryColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TBinaryColumn(TBinaryColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TBinaryColumn deepCopy() { - return new TBinaryColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(ByteBuffer elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBinaryColumn) - return this.equals((TBinaryColumn)that); - return false; - } - - public boolean equals(TBinaryColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TBinaryColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBinaryColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.values, sb); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBinaryColumnStandardSchemeFactory implements SchemeFactory { - public TBinaryColumnStandardScheme getScheme() { - return new TBinaryColumnStandardScheme(); - } - } - - private static class TBinaryColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBinaryColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list110 = iprot.readListBegin(); - struct.values = new ArrayList(_list110.size); - ByteBuffer _elem111; - for (int _i112 = 0; _i112 < _list110.size; ++_i112) - { - _elem111 = iprot.readBinary(); - struct.values.add(_elem111); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBinaryColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size())); - for (ByteBuffer _iter113 : struct.values) - { - oprot.writeBinary(_iter113); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBinaryColumnTupleSchemeFactory implements SchemeFactory { - public TBinaryColumnTupleScheme getScheme() { - return new TBinaryColumnTupleScheme(); - } - } - - private static class TBinaryColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (ByteBuffer _iter114 : struct.values) - { - oprot.writeBinary(_iter114); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list115 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.values = new ArrayList(_list115.size); - ByteBuffer _elem116; - for (int _i117 = 0; _i117 < _list115.size; ++_i117) - { - _elem116 = iprot.readBinary(); - struct.values.add(_elem116); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java deleted file mode 100644 index 9bb636672aa1e..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TBoolColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBoolColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBoolColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolColumn.class, metaDataMap); - } - - public TBoolColumn() { - } - - public TBoolColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TBoolColumn(TBoolColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TBoolColumn deepCopy() { - return new TBoolColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(boolean elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBoolColumn) - return this.equals((TBoolColumn)that); - return false; - } - - public boolean equals(TBoolColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TBoolColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBoolColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBoolColumnStandardSchemeFactory implements SchemeFactory { - public TBoolColumnStandardScheme getScheme() { - return new TBoolColumnStandardScheme(); - } - } - - private static class TBoolColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list54 = iprot.readListBegin(); - struct.values = new ArrayList(_list54.size); - boolean _elem55; - for (int _i56 = 0; _i56 < _list54.size; ++_i56) - { - _elem55 = iprot.readBool(); - struct.values.add(_elem55); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, struct.values.size())); - for (boolean _iter57 : struct.values) - { - oprot.writeBool(_iter57); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBoolColumnTupleSchemeFactory implements SchemeFactory { - public TBoolColumnTupleScheme getScheme() { - return new TBoolColumnTupleScheme(); - } - } - - private static class TBoolColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (boolean _iter58 : struct.values) - { - oprot.writeBool(_iter58); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list59 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, iprot.readI32()); - struct.values = new ArrayList(_list59.size); - boolean _elem60; - for (int _i61 = 0; _i61 < _list59.size; ++_i61) - { - _elem60 = iprot.readBool(); - struct.values.add(_elem60); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java deleted file mode 100644 index 87b3070a89b11..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TBoolValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BOOL, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TBoolValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TBoolValueTupleSchemeFactory()); - } - - private boolean value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolValue.class, metaDataMap); - } - - public TBoolValue() { - } - - /** - * Performs a deep copy on other. - */ - public TBoolValue(TBoolValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TBoolValue deepCopy() { - return new TBoolValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = false; - } - - public boolean isValue() { - return this.value; - } - - public void setValue(boolean value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Boolean)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return isValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TBoolValue) - return this.equals((TBoolValue)that); - return false; - } - - public boolean equals(TBoolValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TBoolValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TBoolValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TBoolValueStandardSchemeFactory implements SchemeFactory { - public TBoolValueStandardScheme getScheme() { - return new TBoolValueStandardScheme(); - } - } - - private static class TBoolValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.value = iprot.readBool(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeBool(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TBoolValueTupleSchemeFactory implements SchemeFactory { - public TBoolValueTupleScheme getScheme() { - return new TBoolValueTupleScheme(); - } - } - - private static class TBoolValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeBool(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readBool(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java deleted file mode 100644 index 68b3d3c31eb03..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TByteColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TByteColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TByteColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteColumn.class, metaDataMap); - } - - public TByteColumn() { - } - - public TByteColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TByteColumn(TByteColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TByteColumn deepCopy() { - return new TByteColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(byte elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TByteColumn) - return this.equals((TByteColumn)that); - return false; - } - - public boolean equals(TByteColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TByteColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TByteColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TByteColumnStandardSchemeFactory implements SchemeFactory { - public TByteColumnStandardScheme getScheme() { - return new TByteColumnStandardScheme(); - } - } - - private static class TByteColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TByteColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list62 = iprot.readListBegin(); - struct.values = new ArrayList(_list62.size); - byte _elem63; - for (int _i64 = 0; _i64 < _list62.size; ++_i64) - { - _elem63 = iprot.readByte(); - struct.values.add(_elem63); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TByteColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, struct.values.size())); - for (byte _iter65 : struct.values) - { - oprot.writeByte(_iter65); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TByteColumnTupleSchemeFactory implements SchemeFactory { - public TByteColumnTupleScheme getScheme() { - return new TByteColumnTupleScheme(); - } - } - - private static class TByteColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (byte _iter66 : struct.values) - { - oprot.writeByte(_iter66); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list67 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, iprot.readI32()); - struct.values = new ArrayList(_list67.size); - byte _elem68; - for (int _i69 = 0; _i69 < _list67.size; ++_i69) - { - _elem68 = iprot.readByte(); - struct.values.add(_elem68); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java deleted file mode 100644 index a3d5951335fa7..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TByteValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BYTE, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TByteValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TByteValueTupleSchemeFactory()); - } - - private byte value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteValue.class, metaDataMap); - } - - public TByteValue() { - } - - /** - * Performs a deep copy on other. - */ - public TByteValue(TByteValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TByteValue deepCopy() { - return new TByteValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public byte getValue() { - return this.value; - } - - public void setValue(byte value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Byte)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TByteValue) - return this.equals((TByteValue)that); - return false; - } - - public boolean equals(TByteValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TByteValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TByteValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TByteValueStandardSchemeFactory implements SchemeFactory { - public TByteValueStandardScheme getScheme() { - return new TByteValueStandardScheme(); - } - } - - private static class TByteValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TByteValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.BYTE) { - struct.value = iprot.readByte(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TByteValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeByte(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TByteValueTupleSchemeFactory implements SchemeFactory { - public TByteValueTupleScheme getScheme() { - return new TByteValueTupleScheme(); - } - } - - private static class TByteValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeByte(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readByte(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java deleted file mode 100644 index 6584c24a0142a..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java +++ /dev/null @@ -1,18138 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCLIService { - - public interface Iface { - - public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException; - - public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException; - - public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException; - - public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException; - - public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException; - - public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException; - - public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException; - - public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException; - - public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException; - - public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException; - - public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException; - - public TGetPrimaryKeysResp GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException; - - public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException; - - public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException; - - public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException; - - public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException; - - public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException; - - public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException; - - public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException; - - public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException; - - public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException; - - } - - public interface AsyncIface { - - public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetPrimaryKeys(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetCrossReference(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; - - } - - public static class Client extends org.apache.thrift.TServiceClient implements Iface { - public static class Factory implements org.apache.thrift.TServiceClientFactory { - public Factory() {} - public Client getClient(org.apache.thrift.protocol.TProtocol prot) { - return new Client(prot); - } - public Client getClient(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) { - return new Client(iprot, oprot); - } - } - - public Client(org.apache.thrift.protocol.TProtocol prot) - { - super(prot, prot); - } - - public Client(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) { - super(iprot, oprot); - } - - public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException - { - send_OpenSession(req); - return recv_OpenSession(); - } - - public void send_OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException - { - OpenSession_args args = new OpenSession_args(); - args.setReq(req); - sendBase("OpenSession", args); - } - - public TOpenSessionResp recv_OpenSession() throws org.apache.thrift.TException - { - OpenSession_result result = new OpenSession_result(); - receiveBase(result, "OpenSession"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "OpenSession failed: unknown result"); - } - - public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException - { - send_CloseSession(req); - return recv_CloseSession(); - } - - public void send_CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException - { - CloseSession_args args = new CloseSession_args(); - args.setReq(req); - sendBase("CloseSession", args); - } - - public TCloseSessionResp recv_CloseSession() throws org.apache.thrift.TException - { - CloseSession_result result = new CloseSession_result(); - receiveBase(result, "CloseSession"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseSession failed: unknown result"); - } - - public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException - { - send_GetInfo(req); - return recv_GetInfo(); - } - - public void send_GetInfo(TGetInfoReq req) throws org.apache.thrift.TException - { - GetInfo_args args = new GetInfo_args(); - args.setReq(req); - sendBase("GetInfo", args); - } - - public TGetInfoResp recv_GetInfo() throws org.apache.thrift.TException - { - GetInfo_result result = new GetInfo_result(); - receiveBase(result, "GetInfo"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetInfo failed: unknown result"); - } - - public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException - { - send_ExecuteStatement(req); - return recv_ExecuteStatement(); - } - - public void send_ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException - { - ExecuteStatement_args args = new ExecuteStatement_args(); - args.setReq(req); - sendBase("ExecuteStatement", args); - } - - public TExecuteStatementResp recv_ExecuteStatement() throws org.apache.thrift.TException - { - ExecuteStatement_result result = new ExecuteStatement_result(); - receiveBase(result, "ExecuteStatement"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "ExecuteStatement failed: unknown result"); - } - - public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException - { - send_GetTypeInfo(req); - return recv_GetTypeInfo(); - } - - public void send_GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException - { - GetTypeInfo_args args = new GetTypeInfo_args(); - args.setReq(req); - sendBase("GetTypeInfo", args); - } - - public TGetTypeInfoResp recv_GetTypeInfo() throws org.apache.thrift.TException - { - GetTypeInfo_result result = new GetTypeInfo_result(); - receiveBase(result, "GetTypeInfo"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTypeInfo failed: unknown result"); - } - - public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException - { - send_GetCatalogs(req); - return recv_GetCatalogs(); - } - - public void send_GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException - { - GetCatalogs_args args = new GetCatalogs_args(); - args.setReq(req); - sendBase("GetCatalogs", args); - } - - public TGetCatalogsResp recv_GetCatalogs() throws org.apache.thrift.TException - { - GetCatalogs_result result = new GetCatalogs_result(); - receiveBase(result, "GetCatalogs"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCatalogs failed: unknown result"); - } - - public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException - { - send_GetSchemas(req); - return recv_GetSchemas(); - } - - public void send_GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException - { - GetSchemas_args args = new GetSchemas_args(); - args.setReq(req); - sendBase("GetSchemas", args); - } - - public TGetSchemasResp recv_GetSchemas() throws org.apache.thrift.TException - { - GetSchemas_result result = new GetSchemas_result(); - receiveBase(result, "GetSchemas"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetSchemas failed: unknown result"); - } - - public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException - { - send_GetTables(req); - return recv_GetTables(); - } - - public void send_GetTables(TGetTablesReq req) throws org.apache.thrift.TException - { - GetTables_args args = new GetTables_args(); - args.setReq(req); - sendBase("GetTables", args); - } - - public TGetTablesResp recv_GetTables() throws org.apache.thrift.TException - { - GetTables_result result = new GetTables_result(); - receiveBase(result, "GetTables"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTables failed: unknown result"); - } - - public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException - { - send_GetTableTypes(req); - return recv_GetTableTypes(); - } - - public void send_GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException - { - GetTableTypes_args args = new GetTableTypes_args(); - args.setReq(req); - sendBase("GetTableTypes", args); - } - - public TGetTableTypesResp recv_GetTableTypes() throws org.apache.thrift.TException - { - GetTableTypes_result result = new GetTableTypes_result(); - receiveBase(result, "GetTableTypes"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTableTypes failed: unknown result"); - } - - public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException - { - send_GetColumns(req); - return recv_GetColumns(); - } - - public void send_GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException - { - GetColumns_args args = new GetColumns_args(); - args.setReq(req); - sendBase("GetColumns", args); - } - - public TGetColumnsResp recv_GetColumns() throws org.apache.thrift.TException - { - GetColumns_result result = new GetColumns_result(); - receiveBase(result, "GetColumns"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetColumns failed: unknown result"); - } - - public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException - { - send_GetFunctions(req); - return recv_GetFunctions(); - } - - public void send_GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException - { - GetFunctions_args args = new GetFunctions_args(); - args.setReq(req); - sendBase("GetFunctions", args); - } - - public TGetFunctionsResp recv_GetFunctions() throws org.apache.thrift.TException - { - GetFunctions_result result = new GetFunctions_result(); - receiveBase(result, "GetFunctions"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetFunctions failed: unknown result"); - } - - public TGetPrimaryKeysResp GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException - { - send_GetPrimaryKeys(req); - return recv_GetPrimaryKeys(); - } - - public void send_GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException - { - GetPrimaryKeys_args args = new GetPrimaryKeys_args(); - args.setReq(req); - sendBase("GetPrimaryKeys", args); - } - - public TGetPrimaryKeysResp recv_GetPrimaryKeys() throws org.apache.thrift.TException - { - GetPrimaryKeys_result result = new GetPrimaryKeys_result(); - receiveBase(result, "GetPrimaryKeys"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetPrimaryKeys failed: unknown result"); - } - - public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException - { - send_GetCrossReference(req); - return recv_GetCrossReference(); - } - - public void send_GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException - { - GetCrossReference_args args = new GetCrossReference_args(); - args.setReq(req); - sendBase("GetCrossReference", args); - } - - public TGetCrossReferenceResp recv_GetCrossReference() throws org.apache.thrift.TException - { - GetCrossReference_result result = new GetCrossReference_result(); - receiveBase(result, "GetCrossReference"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCrossReference failed: unknown result"); - } - - public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException - { - send_GetOperationStatus(req); - return recv_GetOperationStatus(); - } - - public void send_GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException - { - GetOperationStatus_args args = new GetOperationStatus_args(); - args.setReq(req); - sendBase("GetOperationStatus", args); - } - - public TGetOperationStatusResp recv_GetOperationStatus() throws org.apache.thrift.TException - { - GetOperationStatus_result result = new GetOperationStatus_result(); - receiveBase(result, "GetOperationStatus"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetOperationStatus failed: unknown result"); - } - - public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException - { - send_CancelOperation(req); - return recv_CancelOperation(); - } - - public void send_CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException - { - CancelOperation_args args = new CancelOperation_args(); - args.setReq(req); - sendBase("CancelOperation", args); - } - - public TCancelOperationResp recv_CancelOperation() throws org.apache.thrift.TException - { - CancelOperation_result result = new CancelOperation_result(); - receiveBase(result, "CancelOperation"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelOperation failed: unknown result"); - } - - public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException - { - send_CloseOperation(req); - return recv_CloseOperation(); - } - - public void send_CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException - { - CloseOperation_args args = new CloseOperation_args(); - args.setReq(req); - sendBase("CloseOperation", args); - } - - public TCloseOperationResp recv_CloseOperation() throws org.apache.thrift.TException - { - CloseOperation_result result = new CloseOperation_result(); - receiveBase(result, "CloseOperation"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseOperation failed: unknown result"); - } - - public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException - { - send_GetResultSetMetadata(req); - return recv_GetResultSetMetadata(); - } - - public void send_GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException - { - GetResultSetMetadata_args args = new GetResultSetMetadata_args(); - args.setReq(req); - sendBase("GetResultSetMetadata", args); - } - - public TGetResultSetMetadataResp recv_GetResultSetMetadata() throws org.apache.thrift.TException - { - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - receiveBase(result, "GetResultSetMetadata"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetResultSetMetadata failed: unknown result"); - } - - public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException - { - send_FetchResults(req); - return recv_FetchResults(); - } - - public void send_FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException - { - FetchResults_args args = new FetchResults_args(); - args.setReq(req); - sendBase("FetchResults", args); - } - - public TFetchResultsResp recv_FetchResults() throws org.apache.thrift.TException - { - FetchResults_result result = new FetchResults_result(); - receiveBase(result, "FetchResults"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "FetchResults failed: unknown result"); - } - - public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException - { - send_GetDelegationToken(req); - return recv_GetDelegationToken(); - } - - public void send_GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException - { - GetDelegationToken_args args = new GetDelegationToken_args(); - args.setReq(req); - sendBase("GetDelegationToken", args); - } - - public TGetDelegationTokenResp recv_GetDelegationToken() throws org.apache.thrift.TException - { - GetDelegationToken_result result = new GetDelegationToken_result(); - receiveBase(result, "GetDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetDelegationToken failed: unknown result"); - } - - public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException - { - send_CancelDelegationToken(req); - return recv_CancelDelegationToken(); - } - - public void send_CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException - { - CancelDelegationToken_args args = new CancelDelegationToken_args(); - args.setReq(req); - sendBase("CancelDelegationToken", args); - } - - public TCancelDelegationTokenResp recv_CancelDelegationToken() throws org.apache.thrift.TException - { - CancelDelegationToken_result result = new CancelDelegationToken_result(); - receiveBase(result, "CancelDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelDelegationToken failed: unknown result"); - } - - public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException - { - send_RenewDelegationToken(req); - return recv_RenewDelegationToken(); - } - - public void send_RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException - { - RenewDelegationToken_args args = new RenewDelegationToken_args(); - args.setReq(req); - sendBase("RenewDelegationToken", args); - } - - public TRenewDelegationTokenResp recv_RenewDelegationToken() throws org.apache.thrift.TException - { - RenewDelegationToken_result result = new RenewDelegationToken_result(); - receiveBase(result, "RenewDelegationToken"); - if (result.isSetSuccess()) { - return result.success; - } - throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "RenewDelegationToken failed: unknown result"); - } - - } - public static class AsyncClient extends org.apache.thrift.async.TAsyncClient implements AsyncIface { - public static class Factory implements org.apache.thrift.async.TAsyncClientFactory { - private org.apache.thrift.async.TAsyncClientManager clientManager; - private org.apache.thrift.protocol.TProtocolFactory protocolFactory; - public Factory(org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.protocol.TProtocolFactory protocolFactory) { - this.clientManager = clientManager; - this.protocolFactory = protocolFactory; - } - public AsyncClient getAsyncClient(org.apache.thrift.transport.TNonblockingTransport transport) { - return new AsyncClient(protocolFactory, clientManager, transport); - } - } - - public AsyncClient(org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.transport.TNonblockingTransport transport) { - super(protocolFactory, clientManager, transport); - } - - public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - OpenSession_call method_call = new OpenSession_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class OpenSession_call extends org.apache.thrift.async.TAsyncMethodCall { - private TOpenSessionReq req; - public OpenSession_call(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("OpenSession", org.apache.thrift.protocol.TMessageType.CALL, 0)); - OpenSession_args args = new OpenSession_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TOpenSessionResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_OpenSession(); - } - } - - public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CloseSession_call method_call = new CloseSession_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CloseSession_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCloseSessionReq req; - public CloseSession_call(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseSession", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CloseSession_args args = new CloseSession_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCloseSessionResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CloseSession(); - } - } - - public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetInfo_call method_call = new GetInfo_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetInfo_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetInfoReq req; - public GetInfo_call(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetInfo", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetInfo_args args = new GetInfo_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetInfoResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetInfo(); - } - } - - public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - ExecuteStatement_call method_call = new ExecuteStatement_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class ExecuteStatement_call extends org.apache.thrift.async.TAsyncMethodCall { - private TExecuteStatementReq req; - public ExecuteStatement_call(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("ExecuteStatement", org.apache.thrift.protocol.TMessageType.CALL, 0)); - ExecuteStatement_args args = new ExecuteStatement_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TExecuteStatementResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_ExecuteStatement(); - } - } - - public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTypeInfo_call method_call = new GetTypeInfo_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTypeInfo_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTypeInfoReq req; - public GetTypeInfo_call(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTypeInfo", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTypeInfo_args args = new GetTypeInfo_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTypeInfoResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTypeInfo(); - } - } - - public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetCatalogs_call method_call = new GetCatalogs_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetCatalogs_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetCatalogsReq req; - public GetCatalogs_call(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCatalogs", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetCatalogs_args args = new GetCatalogs_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetCatalogsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetCatalogs(); - } - } - - public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetSchemas_call method_call = new GetSchemas_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetSchemas_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetSchemasReq req; - public GetSchemas_call(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetSchemas", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetSchemas_args args = new GetSchemas_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetSchemasResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetSchemas(); - } - } - - public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTables_call method_call = new GetTables_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTables_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTablesReq req; - public GetTables_call(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTables", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTables_args args = new GetTables_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTablesResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTables(); - } - } - - public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetTableTypes_call method_call = new GetTableTypes_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetTableTypes_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetTableTypesReq req; - public GetTableTypes_call(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTableTypes", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetTableTypes_args args = new GetTableTypes_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetTableTypesResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetTableTypes(); - } - } - - public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetColumns_call method_call = new GetColumns_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetColumns_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetColumnsReq req; - public GetColumns_call(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetColumns", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetColumns_args args = new GetColumns_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetColumnsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetColumns(); - } - } - - public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetFunctions_call method_call = new GetFunctions_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetFunctions_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetFunctionsReq req; - public GetFunctions_call(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetFunctions", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetFunctions_args args = new GetFunctions_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetFunctionsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetFunctions(); - } - } - - public void GetPrimaryKeys(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetPrimaryKeys_call method_call = new GetPrimaryKeys_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetPrimaryKeys_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetPrimaryKeysReq req; - public GetPrimaryKeys_call(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetPrimaryKeys", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetPrimaryKeys_args args = new GetPrimaryKeys_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetPrimaryKeysResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetPrimaryKeys(); - } - } - - public void GetCrossReference(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetCrossReference_call method_call = new GetCrossReference_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetCrossReference_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetCrossReferenceReq req; - public GetCrossReference_call(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCrossReference", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetCrossReference_args args = new GetCrossReference_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetCrossReferenceResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetCrossReference(); - } - } - - public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetOperationStatus_call method_call = new GetOperationStatus_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetOperationStatus_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetOperationStatusReq req; - public GetOperationStatus_call(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetOperationStatus", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetOperationStatus_args args = new GetOperationStatus_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetOperationStatusResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetOperationStatus(); - } - } - - public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CancelOperation_call method_call = new CancelOperation_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CancelOperation_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCancelOperationReq req; - public CancelOperation_call(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelOperation", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CancelOperation_args args = new CancelOperation_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCancelOperationResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CancelOperation(); - } - } - - public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CloseOperation_call method_call = new CloseOperation_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CloseOperation_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCloseOperationReq req; - public CloseOperation_call(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseOperation", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CloseOperation_args args = new CloseOperation_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCloseOperationResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CloseOperation(); - } - } - - public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetResultSetMetadata_call method_call = new GetResultSetMetadata_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetResultSetMetadata_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetResultSetMetadataReq req; - public GetResultSetMetadata_call(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetResultSetMetadata", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetResultSetMetadata_args args = new GetResultSetMetadata_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetResultSetMetadataResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetResultSetMetadata(); - } - } - - public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - FetchResults_call method_call = new FetchResults_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class FetchResults_call extends org.apache.thrift.async.TAsyncMethodCall { - private TFetchResultsReq req; - public FetchResults_call(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("FetchResults", org.apache.thrift.protocol.TMessageType.CALL, 0)); - FetchResults_args args = new FetchResults_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TFetchResultsResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_FetchResults(); - } - } - - public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - GetDelegationToken_call method_call = new GetDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class GetDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TGetDelegationTokenReq req; - public GetDelegationToken_call(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - GetDelegationToken_args args = new GetDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TGetDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_GetDelegationToken(); - } - } - - public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - CancelDelegationToken_call method_call = new CancelDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class CancelDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TCancelDelegationTokenReq req; - public CancelDelegationToken_call(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - CancelDelegationToken_args args = new CancelDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TCancelDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_CancelDelegationToken(); - } - } - - public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { - checkReady(); - RenewDelegationToken_call method_call = new RenewDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport); - this.___currentMethod = method_call; - ___manager.call(method_call); - } - - public static class RenewDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall { - private TRenewDelegationTokenReq req; - public RenewDelegationToken_call(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { - super(client, protocolFactory, transport, resultHandler, false); - this.req = req; - } - - public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { - prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("RenewDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0)); - RenewDelegationToken_args args = new RenewDelegationToken_args(); - args.setReq(req); - args.write(prot); - prot.writeMessageEnd(); - } - - public TRenewDelegationTokenResp getResult() throws org.apache.thrift.TException { - if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { - throw new IllegalStateException("Method call not finished!"); - } - org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); - org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); - return (new Client(prot)).recv_RenewDelegationToken(); - } - } - - } - - public static class Processor extends org.apache.thrift.TBaseProcessor implements org.apache.thrift.TProcessor { - private static final Logger LOGGER = LoggerFactory.getLogger(Processor.class.getName()); - public Processor(I iface) { - super(iface, getProcessMap(new HashMap>())); - } - - protected Processor(I iface, Map> processMap) { - super(iface, getProcessMap(processMap)); - } - - private static Map> getProcessMap(Map> processMap) { - processMap.put("OpenSession", new OpenSession()); - processMap.put("CloseSession", new CloseSession()); - processMap.put("GetInfo", new GetInfo()); - processMap.put("ExecuteStatement", new ExecuteStatement()); - processMap.put("GetTypeInfo", new GetTypeInfo()); - processMap.put("GetCatalogs", new GetCatalogs()); - processMap.put("GetSchemas", new GetSchemas()); - processMap.put("GetTables", new GetTables()); - processMap.put("GetTableTypes", new GetTableTypes()); - processMap.put("GetColumns", new GetColumns()); - processMap.put("GetFunctions", new GetFunctions()); - processMap.put("GetPrimaryKeys", new GetPrimaryKeys()); - processMap.put("GetCrossReference", new GetCrossReference()); - processMap.put("GetOperationStatus", new GetOperationStatus()); - processMap.put("CancelOperation", new CancelOperation()); - processMap.put("CloseOperation", new CloseOperation()); - processMap.put("GetResultSetMetadata", new GetResultSetMetadata()); - processMap.put("FetchResults", new FetchResults()); - processMap.put("GetDelegationToken", new GetDelegationToken()); - processMap.put("CancelDelegationToken", new CancelDelegationToken()); - processMap.put("RenewDelegationToken", new RenewDelegationToken()); - return processMap; - } - - public static class OpenSession extends org.apache.thrift.ProcessFunction { - public OpenSession() { - super("OpenSession"); - } - - public OpenSession_args getEmptyArgsInstance() { - return new OpenSession_args(); - } - - protected boolean isOneway() { - return false; - } - - public OpenSession_result getResult(I iface, OpenSession_args args) throws org.apache.thrift.TException { - OpenSession_result result = new OpenSession_result(); - result.success = iface.OpenSession(args.req); - return result; - } - } - - public static class CloseSession extends org.apache.thrift.ProcessFunction { - public CloseSession() { - super("CloseSession"); - } - - public CloseSession_args getEmptyArgsInstance() { - return new CloseSession_args(); - } - - protected boolean isOneway() { - return false; - } - - public CloseSession_result getResult(I iface, CloseSession_args args) throws org.apache.thrift.TException { - CloseSession_result result = new CloseSession_result(); - result.success = iface.CloseSession(args.req); - return result; - } - } - - public static class GetInfo extends org.apache.thrift.ProcessFunction { - public GetInfo() { - super("GetInfo"); - } - - public GetInfo_args getEmptyArgsInstance() { - return new GetInfo_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetInfo_result getResult(I iface, GetInfo_args args) throws org.apache.thrift.TException { - GetInfo_result result = new GetInfo_result(); - result.success = iface.GetInfo(args.req); - return result; - } - } - - public static class ExecuteStatement extends org.apache.thrift.ProcessFunction { - public ExecuteStatement() { - super("ExecuteStatement"); - } - - public ExecuteStatement_args getEmptyArgsInstance() { - return new ExecuteStatement_args(); - } - - protected boolean isOneway() { - return false; - } - - public ExecuteStatement_result getResult(I iface, ExecuteStatement_args args) throws org.apache.thrift.TException { - ExecuteStatement_result result = new ExecuteStatement_result(); - result.success = iface.ExecuteStatement(args.req); - return result; - } - } - - public static class GetTypeInfo extends org.apache.thrift.ProcessFunction { - public GetTypeInfo() { - super("GetTypeInfo"); - } - - public GetTypeInfo_args getEmptyArgsInstance() { - return new GetTypeInfo_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTypeInfo_result getResult(I iface, GetTypeInfo_args args) throws org.apache.thrift.TException { - GetTypeInfo_result result = new GetTypeInfo_result(); - result.success = iface.GetTypeInfo(args.req); - return result; - } - } - - public static class GetCatalogs extends org.apache.thrift.ProcessFunction { - public GetCatalogs() { - super("GetCatalogs"); - } - - public GetCatalogs_args getEmptyArgsInstance() { - return new GetCatalogs_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetCatalogs_result getResult(I iface, GetCatalogs_args args) throws org.apache.thrift.TException { - GetCatalogs_result result = new GetCatalogs_result(); - result.success = iface.GetCatalogs(args.req); - return result; - } - } - - public static class GetSchemas extends org.apache.thrift.ProcessFunction { - public GetSchemas() { - super("GetSchemas"); - } - - public GetSchemas_args getEmptyArgsInstance() { - return new GetSchemas_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetSchemas_result getResult(I iface, GetSchemas_args args) throws org.apache.thrift.TException { - GetSchemas_result result = new GetSchemas_result(); - result.success = iface.GetSchemas(args.req); - return result; - } - } - - public static class GetTables extends org.apache.thrift.ProcessFunction { - public GetTables() { - super("GetTables"); - } - - public GetTables_args getEmptyArgsInstance() { - return new GetTables_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTables_result getResult(I iface, GetTables_args args) throws org.apache.thrift.TException { - GetTables_result result = new GetTables_result(); - result.success = iface.GetTables(args.req); - return result; - } - } - - public static class GetTableTypes extends org.apache.thrift.ProcessFunction { - public GetTableTypes() { - super("GetTableTypes"); - } - - public GetTableTypes_args getEmptyArgsInstance() { - return new GetTableTypes_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetTableTypes_result getResult(I iface, GetTableTypes_args args) throws org.apache.thrift.TException { - GetTableTypes_result result = new GetTableTypes_result(); - result.success = iface.GetTableTypes(args.req); - return result; - } - } - - public static class GetColumns extends org.apache.thrift.ProcessFunction { - public GetColumns() { - super("GetColumns"); - } - - public GetColumns_args getEmptyArgsInstance() { - return new GetColumns_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetColumns_result getResult(I iface, GetColumns_args args) throws org.apache.thrift.TException { - GetColumns_result result = new GetColumns_result(); - result.success = iface.GetColumns(args.req); - return result; - } - } - - public static class GetFunctions extends org.apache.thrift.ProcessFunction { - public GetFunctions() { - super("GetFunctions"); - } - - public GetFunctions_args getEmptyArgsInstance() { - return new GetFunctions_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetFunctions_result getResult(I iface, GetFunctions_args args) throws org.apache.thrift.TException { - GetFunctions_result result = new GetFunctions_result(); - result.success = iface.GetFunctions(args.req); - return result; - } - } - - public static class GetPrimaryKeys extends org.apache.thrift.ProcessFunction { - public GetPrimaryKeys() { - super("GetPrimaryKeys"); - } - - public GetPrimaryKeys_args getEmptyArgsInstance() { - return new GetPrimaryKeys_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetPrimaryKeys_result getResult(I iface, GetPrimaryKeys_args args) throws org.apache.thrift.TException { - GetPrimaryKeys_result result = new GetPrimaryKeys_result(); - result.success = iface.GetPrimaryKeys(args.req); - return result; - } - } - - public static class GetCrossReference extends org.apache.thrift.ProcessFunction { - public GetCrossReference() { - super("GetCrossReference"); - } - - public GetCrossReference_args getEmptyArgsInstance() { - return new GetCrossReference_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetCrossReference_result getResult(I iface, GetCrossReference_args args) throws org.apache.thrift.TException { - GetCrossReference_result result = new GetCrossReference_result(); - result.success = iface.GetCrossReference(args.req); - return result; - } - } - - public static class GetOperationStatus extends org.apache.thrift.ProcessFunction { - public GetOperationStatus() { - super("GetOperationStatus"); - } - - public GetOperationStatus_args getEmptyArgsInstance() { - return new GetOperationStatus_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetOperationStatus_result getResult(I iface, GetOperationStatus_args args) throws org.apache.thrift.TException { - GetOperationStatus_result result = new GetOperationStatus_result(); - result.success = iface.GetOperationStatus(args.req); - return result; - } - } - - public static class CancelOperation extends org.apache.thrift.ProcessFunction { - public CancelOperation() { - super("CancelOperation"); - } - - public CancelOperation_args getEmptyArgsInstance() { - return new CancelOperation_args(); - } - - protected boolean isOneway() { - return false; - } - - public CancelOperation_result getResult(I iface, CancelOperation_args args) throws org.apache.thrift.TException { - CancelOperation_result result = new CancelOperation_result(); - result.success = iface.CancelOperation(args.req); - return result; - } - } - - public static class CloseOperation extends org.apache.thrift.ProcessFunction { - public CloseOperation() { - super("CloseOperation"); - } - - public CloseOperation_args getEmptyArgsInstance() { - return new CloseOperation_args(); - } - - protected boolean isOneway() { - return false; - } - - public CloseOperation_result getResult(I iface, CloseOperation_args args) throws org.apache.thrift.TException { - CloseOperation_result result = new CloseOperation_result(); - result.success = iface.CloseOperation(args.req); - return result; - } - } - - public static class GetResultSetMetadata extends org.apache.thrift.ProcessFunction { - public GetResultSetMetadata() { - super("GetResultSetMetadata"); - } - - public GetResultSetMetadata_args getEmptyArgsInstance() { - return new GetResultSetMetadata_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetResultSetMetadata_result getResult(I iface, GetResultSetMetadata_args args) throws org.apache.thrift.TException { - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - result.success = iface.GetResultSetMetadata(args.req); - return result; - } - } - - public static class FetchResults extends org.apache.thrift.ProcessFunction { - public FetchResults() { - super("FetchResults"); - } - - public FetchResults_args getEmptyArgsInstance() { - return new FetchResults_args(); - } - - protected boolean isOneway() { - return false; - } - - public FetchResults_result getResult(I iface, FetchResults_args args) throws org.apache.thrift.TException { - FetchResults_result result = new FetchResults_result(); - result.success = iface.FetchResults(args.req); - return result; - } - } - - public static class GetDelegationToken extends org.apache.thrift.ProcessFunction { - public GetDelegationToken() { - super("GetDelegationToken"); - } - - public GetDelegationToken_args getEmptyArgsInstance() { - return new GetDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public GetDelegationToken_result getResult(I iface, GetDelegationToken_args args) throws org.apache.thrift.TException { - GetDelegationToken_result result = new GetDelegationToken_result(); - result.success = iface.GetDelegationToken(args.req); - return result; - } - } - - public static class CancelDelegationToken extends org.apache.thrift.ProcessFunction { - public CancelDelegationToken() { - super("CancelDelegationToken"); - } - - public CancelDelegationToken_args getEmptyArgsInstance() { - return new CancelDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public CancelDelegationToken_result getResult(I iface, CancelDelegationToken_args args) throws org.apache.thrift.TException { - CancelDelegationToken_result result = new CancelDelegationToken_result(); - result.success = iface.CancelDelegationToken(args.req); - return result; - } - } - - public static class RenewDelegationToken extends org.apache.thrift.ProcessFunction { - public RenewDelegationToken() { - super("RenewDelegationToken"); - } - - public RenewDelegationToken_args getEmptyArgsInstance() { - return new RenewDelegationToken_args(); - } - - protected boolean isOneway() { - return false; - } - - public RenewDelegationToken_result getResult(I iface, RenewDelegationToken_args args) throws org.apache.thrift.TException { - RenewDelegationToken_result result = new RenewDelegationToken_result(); - result.success = iface.RenewDelegationToken(args.req); - return result; - } - } - - } - - public static class AsyncProcessor extends org.apache.thrift.TBaseAsyncProcessor { - private static final Logger LOGGER = LoggerFactory.getLogger(AsyncProcessor.class.getName()); - public AsyncProcessor(I iface) { - super(iface, getProcessMap(new HashMap>())); - } - - protected AsyncProcessor(I iface, Map> processMap) { - super(iface, getProcessMap(processMap)); - } - - private static Map> getProcessMap(Map> processMap) { - processMap.put("OpenSession", new OpenSession()); - processMap.put("CloseSession", new CloseSession()); - processMap.put("GetInfo", new GetInfo()); - processMap.put("ExecuteStatement", new ExecuteStatement()); - processMap.put("GetTypeInfo", new GetTypeInfo()); - processMap.put("GetCatalogs", new GetCatalogs()); - processMap.put("GetSchemas", new GetSchemas()); - processMap.put("GetTables", new GetTables()); - processMap.put("GetTableTypes", new GetTableTypes()); - processMap.put("GetColumns", new GetColumns()); - processMap.put("GetFunctions", new GetFunctions()); - processMap.put("GetPrimaryKeys", new GetPrimaryKeys()); - processMap.put("GetCrossReference", new GetCrossReference()); - processMap.put("GetOperationStatus", new GetOperationStatus()); - processMap.put("CancelOperation", new CancelOperation()); - processMap.put("CloseOperation", new CloseOperation()); - processMap.put("GetResultSetMetadata", new GetResultSetMetadata()); - processMap.put("FetchResults", new FetchResults()); - processMap.put("GetDelegationToken", new GetDelegationToken()); - processMap.put("CancelDelegationToken", new CancelDelegationToken()); - processMap.put("RenewDelegationToken", new RenewDelegationToken()); - return processMap; - } - - public static class OpenSession extends org.apache.thrift.AsyncProcessFunction { - public OpenSession() { - super("OpenSession"); - } - - public OpenSession_args getEmptyArgsInstance() { - return new OpenSession_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TOpenSessionResp o) { - OpenSession_result result = new OpenSession_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - OpenSession_result result = new OpenSession_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, OpenSession_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.OpenSession(args.req,resultHandler); - } - } - - public static class CloseSession extends org.apache.thrift.AsyncProcessFunction { - public CloseSession() { - super("CloseSession"); - } - - public CloseSession_args getEmptyArgsInstance() { - return new CloseSession_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TCloseSessionResp o) { - CloseSession_result result = new CloseSession_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - CloseSession_result result = new CloseSession_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, CloseSession_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.CloseSession(args.req,resultHandler); - } - } - - public static class GetInfo extends org.apache.thrift.AsyncProcessFunction { - public GetInfo() { - super("GetInfo"); - } - - public GetInfo_args getEmptyArgsInstance() { - return new GetInfo_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetInfoResp o) { - GetInfo_result result = new GetInfo_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetInfo_result result = new GetInfo_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetInfo_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetInfo(args.req,resultHandler); - } - } - - public static class ExecuteStatement extends org.apache.thrift.AsyncProcessFunction { - public ExecuteStatement() { - super("ExecuteStatement"); - } - - public ExecuteStatement_args getEmptyArgsInstance() { - return new ExecuteStatement_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TExecuteStatementResp o) { - ExecuteStatement_result result = new ExecuteStatement_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - ExecuteStatement_result result = new ExecuteStatement_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, ExecuteStatement_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.ExecuteStatement(args.req,resultHandler); - } - } - - public static class GetTypeInfo extends org.apache.thrift.AsyncProcessFunction { - public GetTypeInfo() { - super("GetTypeInfo"); - } - - public GetTypeInfo_args getEmptyArgsInstance() { - return new GetTypeInfo_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetTypeInfoResp o) { - GetTypeInfo_result result = new GetTypeInfo_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetTypeInfo_result result = new GetTypeInfo_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetTypeInfo_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetTypeInfo(args.req,resultHandler); - } - } - - public static class GetCatalogs extends org.apache.thrift.AsyncProcessFunction { - public GetCatalogs() { - super("GetCatalogs"); - } - - public GetCatalogs_args getEmptyArgsInstance() { - return new GetCatalogs_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetCatalogsResp o) { - GetCatalogs_result result = new GetCatalogs_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetCatalogs_result result = new GetCatalogs_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetCatalogs_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetCatalogs(args.req,resultHandler); - } - } - - public static class GetSchemas extends org.apache.thrift.AsyncProcessFunction { - public GetSchemas() { - super("GetSchemas"); - } - - public GetSchemas_args getEmptyArgsInstance() { - return new GetSchemas_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetSchemasResp o) { - GetSchemas_result result = new GetSchemas_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetSchemas_result result = new GetSchemas_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetSchemas_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetSchemas(args.req,resultHandler); - } - } - - public static class GetTables extends org.apache.thrift.AsyncProcessFunction { - public GetTables() { - super("GetTables"); - } - - public GetTables_args getEmptyArgsInstance() { - return new GetTables_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetTablesResp o) { - GetTables_result result = new GetTables_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetTables_result result = new GetTables_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetTables_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetTables(args.req,resultHandler); - } - } - - public static class GetTableTypes extends org.apache.thrift.AsyncProcessFunction { - public GetTableTypes() { - super("GetTableTypes"); - } - - public GetTableTypes_args getEmptyArgsInstance() { - return new GetTableTypes_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetTableTypesResp o) { - GetTableTypes_result result = new GetTableTypes_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetTableTypes_result result = new GetTableTypes_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetTableTypes_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetTableTypes(args.req,resultHandler); - } - } - - public static class GetColumns extends org.apache.thrift.AsyncProcessFunction { - public GetColumns() { - super("GetColumns"); - } - - public GetColumns_args getEmptyArgsInstance() { - return new GetColumns_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetColumnsResp o) { - GetColumns_result result = new GetColumns_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetColumns_result result = new GetColumns_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetColumns_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetColumns(args.req,resultHandler); - } - } - - public static class GetFunctions extends org.apache.thrift.AsyncProcessFunction { - public GetFunctions() { - super("GetFunctions"); - } - - public GetFunctions_args getEmptyArgsInstance() { - return new GetFunctions_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetFunctionsResp o) { - GetFunctions_result result = new GetFunctions_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetFunctions_result result = new GetFunctions_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetFunctions_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetFunctions(args.req,resultHandler); - } - } - - public static class GetPrimaryKeys extends org.apache.thrift.AsyncProcessFunction { - public GetPrimaryKeys() { - super("GetPrimaryKeys"); - } - - public GetPrimaryKeys_args getEmptyArgsInstance() { - return new GetPrimaryKeys_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetPrimaryKeysResp o) { - GetPrimaryKeys_result result = new GetPrimaryKeys_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetPrimaryKeys_result result = new GetPrimaryKeys_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetPrimaryKeys_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetPrimaryKeys(args.req,resultHandler); - } - } - - public static class GetCrossReference extends org.apache.thrift.AsyncProcessFunction { - public GetCrossReference() { - super("GetCrossReference"); - } - - public GetCrossReference_args getEmptyArgsInstance() { - return new GetCrossReference_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetCrossReferenceResp o) { - GetCrossReference_result result = new GetCrossReference_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetCrossReference_result result = new GetCrossReference_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetCrossReference_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetCrossReference(args.req,resultHandler); - } - } - - public static class GetOperationStatus extends org.apache.thrift.AsyncProcessFunction { - public GetOperationStatus() { - super("GetOperationStatus"); - } - - public GetOperationStatus_args getEmptyArgsInstance() { - return new GetOperationStatus_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetOperationStatusResp o) { - GetOperationStatus_result result = new GetOperationStatus_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetOperationStatus_result result = new GetOperationStatus_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetOperationStatus_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetOperationStatus(args.req,resultHandler); - } - } - - public static class CancelOperation extends org.apache.thrift.AsyncProcessFunction { - public CancelOperation() { - super("CancelOperation"); - } - - public CancelOperation_args getEmptyArgsInstance() { - return new CancelOperation_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TCancelOperationResp o) { - CancelOperation_result result = new CancelOperation_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - CancelOperation_result result = new CancelOperation_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, CancelOperation_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.CancelOperation(args.req,resultHandler); - } - } - - public static class CloseOperation extends org.apache.thrift.AsyncProcessFunction { - public CloseOperation() { - super("CloseOperation"); - } - - public CloseOperation_args getEmptyArgsInstance() { - return new CloseOperation_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TCloseOperationResp o) { - CloseOperation_result result = new CloseOperation_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - CloseOperation_result result = new CloseOperation_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, CloseOperation_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.CloseOperation(args.req,resultHandler); - } - } - - public static class GetResultSetMetadata extends org.apache.thrift.AsyncProcessFunction { - public GetResultSetMetadata() { - super("GetResultSetMetadata"); - } - - public GetResultSetMetadata_args getEmptyArgsInstance() { - return new GetResultSetMetadata_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetResultSetMetadataResp o) { - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetResultSetMetadata_result result = new GetResultSetMetadata_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetResultSetMetadata_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetResultSetMetadata(args.req,resultHandler); - } - } - - public static class FetchResults extends org.apache.thrift.AsyncProcessFunction { - public FetchResults() { - super("FetchResults"); - } - - public FetchResults_args getEmptyArgsInstance() { - return new FetchResults_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TFetchResultsResp o) { - FetchResults_result result = new FetchResults_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - FetchResults_result result = new FetchResults_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, FetchResults_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.FetchResults(args.req,resultHandler); - } - } - - public static class GetDelegationToken extends org.apache.thrift.AsyncProcessFunction { - public GetDelegationToken() { - super("GetDelegationToken"); - } - - public GetDelegationToken_args getEmptyArgsInstance() { - return new GetDelegationToken_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TGetDelegationTokenResp o) { - GetDelegationToken_result result = new GetDelegationToken_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - GetDelegationToken_result result = new GetDelegationToken_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, GetDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.GetDelegationToken(args.req,resultHandler); - } - } - - public static class CancelDelegationToken extends org.apache.thrift.AsyncProcessFunction { - public CancelDelegationToken() { - super("CancelDelegationToken"); - } - - public CancelDelegationToken_args getEmptyArgsInstance() { - return new CancelDelegationToken_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TCancelDelegationTokenResp o) { - CancelDelegationToken_result result = new CancelDelegationToken_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - CancelDelegationToken_result result = new CancelDelegationToken_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, CancelDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.CancelDelegationToken(args.req,resultHandler); - } - } - - public static class RenewDelegationToken extends org.apache.thrift.AsyncProcessFunction { - public RenewDelegationToken() { - super("RenewDelegationToken"); - } - - public RenewDelegationToken_args getEmptyArgsInstance() { - return new RenewDelegationToken_args(); - } - - public AsyncMethodCallback getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) { - final org.apache.thrift.AsyncProcessFunction fcall = this; - return new AsyncMethodCallback() { - public void onComplete(TRenewDelegationTokenResp o) { - RenewDelegationToken_result result = new RenewDelegationToken_result(); - result.success = o; - try { - fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid); - return; - } catch (Exception e) { - LOGGER.error("Exception writing to internal frame buffer", e); - } - fb.close(); - } - public void onError(Exception e) { - byte msgType = org.apache.thrift.protocol.TMessageType.REPLY; - org.apache.thrift.TBase msg; - RenewDelegationToken_result result = new RenewDelegationToken_result(); - { - msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION; - msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage()); - } - try { - fcall.sendResponse(fb,msg,msgType,seqid); - return; - } catch (Exception ex) { - LOGGER.error("Exception writing to internal frame buffer", ex); - } - fb.close(); - } - }; - } - - protected boolean isOneway() { - return false; - } - - public void start(I iface, RenewDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws TException { - iface.RenewDelegationToken(args.req,resultHandler); - } - } - - } - - public static class OpenSession_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new OpenSession_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new OpenSession_argsTupleSchemeFactory()); - } - - private TOpenSessionReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_args.class, metaDataMap); - } - - public OpenSession_args() { - } - - public OpenSession_args( - TOpenSessionReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public OpenSession_args(OpenSession_args other) { - if (other.isSetReq()) { - this.req = new TOpenSessionReq(other.req); - } - } - - public OpenSession_args deepCopy() { - return new OpenSession_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TOpenSessionReq getReq() { - return this.req; - } - - public void setReq(TOpenSessionReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TOpenSessionReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof OpenSession_args) - return this.equals((OpenSession_args)that); - return false; - } - - public boolean equals(OpenSession_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(OpenSession_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("OpenSession_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class OpenSession_argsStandardSchemeFactory implements SchemeFactory { - public OpenSession_argsStandardScheme getScheme() { - return new OpenSession_argsStandardScheme(); - } - } - - private static class OpenSession_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TOpenSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class OpenSession_argsTupleSchemeFactory implements SchemeFactory { - public OpenSession_argsTupleScheme getScheme() { - return new OpenSession_argsTupleScheme(); - } - } - - private static class OpenSession_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TOpenSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class OpenSession_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new OpenSession_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new OpenSession_resultTupleSchemeFactory()); - } - - private TOpenSessionResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_result.class, metaDataMap); - } - - public OpenSession_result() { - } - - public OpenSession_result( - TOpenSessionResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public OpenSession_result(OpenSession_result other) { - if (other.isSetSuccess()) { - this.success = new TOpenSessionResp(other.success); - } - } - - public OpenSession_result deepCopy() { - return new OpenSession_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TOpenSessionResp getSuccess() { - return this.success; - } - - public void setSuccess(TOpenSessionResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TOpenSessionResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof OpenSession_result) - return this.equals((OpenSession_result)that); - return false; - } - - public boolean equals(OpenSession_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(OpenSession_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("OpenSession_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class OpenSession_resultStandardSchemeFactory implements SchemeFactory { - public OpenSession_resultStandardScheme getScheme() { - return new OpenSession_resultStandardScheme(); - } - } - - private static class OpenSession_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TOpenSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class OpenSession_resultTupleSchemeFactory implements SchemeFactory { - public OpenSession_resultTupleScheme getScheme() { - return new OpenSession_resultTupleScheme(); - } - } - - private static class OpenSession_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TOpenSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CloseSession_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseSession_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseSession_argsTupleSchemeFactory()); - } - - private TCloseSessionReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_args.class, metaDataMap); - } - - public CloseSession_args() { - } - - public CloseSession_args( - TCloseSessionReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CloseSession_args(CloseSession_args other) { - if (other.isSetReq()) { - this.req = new TCloseSessionReq(other.req); - } - } - - public CloseSession_args deepCopy() { - return new CloseSession_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCloseSessionReq getReq() { - return this.req; - } - - public void setReq(TCloseSessionReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCloseSessionReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseSession_args) - return this.equals((CloseSession_args)that); - return false; - } - - public boolean equals(CloseSession_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(CloseSession_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseSession_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseSession_argsStandardSchemeFactory implements SchemeFactory { - public CloseSession_argsStandardScheme getScheme() { - return new CloseSession_argsStandardScheme(); - } - } - - private static class CloseSession_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCloseSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseSession_argsTupleSchemeFactory implements SchemeFactory { - public CloseSession_argsTupleScheme getScheme() { - return new CloseSession_argsTupleScheme(); - } - } - - private static class CloseSession_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCloseSessionReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CloseSession_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseSession_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseSession_resultTupleSchemeFactory()); - } - - private TCloseSessionResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_result.class, metaDataMap); - } - - public CloseSession_result() { - } - - public CloseSession_result( - TCloseSessionResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CloseSession_result(CloseSession_result other) { - if (other.isSetSuccess()) { - this.success = new TCloseSessionResp(other.success); - } - } - - public CloseSession_result deepCopy() { - return new CloseSession_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCloseSessionResp getSuccess() { - return this.success; - } - - public void setSuccess(TCloseSessionResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCloseSessionResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseSession_result) - return this.equals((CloseSession_result)that); - return false; - } - - public boolean equals(CloseSession_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(CloseSession_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseSession_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseSession_resultStandardSchemeFactory implements SchemeFactory { - public CloseSession_resultStandardScheme getScheme() { - return new CloseSession_resultStandardScheme(); - } - } - - private static class CloseSession_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCloseSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseSession_resultTupleSchemeFactory implements SchemeFactory { - public CloseSession_resultTupleScheme getScheme() { - return new CloseSession_resultTupleScheme(); - } - } - - private static class CloseSession_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCloseSessionResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetInfo_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetInfo_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetInfo_argsTupleSchemeFactory()); - } - - private TGetInfoReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_args.class, metaDataMap); - } - - public GetInfo_args() { - } - - public GetInfo_args( - TGetInfoReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetInfo_args(GetInfo_args other) { - if (other.isSetReq()) { - this.req = new TGetInfoReq(other.req); - } - } - - public GetInfo_args deepCopy() { - return new GetInfo_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetInfoReq getReq() { - return this.req; - } - - public void setReq(TGetInfoReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetInfoReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetInfo_args) - return this.equals((GetInfo_args)that); - return false; - } - - public boolean equals(GetInfo_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetInfo_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetInfo_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetInfo_argsStandardSchemeFactory implements SchemeFactory { - public GetInfo_argsStandardScheme getScheme() { - return new GetInfo_argsStandardScheme(); - } - } - - private static class GetInfo_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetInfo_argsTupleSchemeFactory implements SchemeFactory { - public GetInfo_argsTupleScheme getScheme() { - return new GetInfo_argsTupleScheme(); - } - } - - private static class GetInfo_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetInfo_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetInfo_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetInfo_resultTupleSchemeFactory()); - } - - private TGetInfoResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_result.class, metaDataMap); - } - - public GetInfo_result() { - } - - public GetInfo_result( - TGetInfoResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetInfo_result(GetInfo_result other) { - if (other.isSetSuccess()) { - this.success = new TGetInfoResp(other.success); - } - } - - public GetInfo_result deepCopy() { - return new GetInfo_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetInfoResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetInfoResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetInfoResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetInfo_result) - return this.equals((GetInfo_result)that); - return false; - } - - public boolean equals(GetInfo_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetInfo_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetInfo_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetInfo_resultStandardSchemeFactory implements SchemeFactory { - public GetInfo_resultStandardScheme getScheme() { - return new GetInfo_resultStandardScheme(); - } - } - - private static class GetInfo_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetInfo_resultTupleSchemeFactory implements SchemeFactory { - public GetInfo_resultTupleScheme getScheme() { - return new GetInfo_resultTupleScheme(); - } - } - - private static class GetInfo_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class ExecuteStatement_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new ExecuteStatement_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new ExecuteStatement_argsTupleSchemeFactory()); - } - - private TExecuteStatementReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_args.class, metaDataMap); - } - - public ExecuteStatement_args() { - } - - public ExecuteStatement_args( - TExecuteStatementReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public ExecuteStatement_args(ExecuteStatement_args other) { - if (other.isSetReq()) { - this.req = new TExecuteStatementReq(other.req); - } - } - - public ExecuteStatement_args deepCopy() { - return new ExecuteStatement_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TExecuteStatementReq getReq() { - return this.req; - } - - public void setReq(TExecuteStatementReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TExecuteStatementReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof ExecuteStatement_args) - return this.equals((ExecuteStatement_args)that); - return false; - } - - public boolean equals(ExecuteStatement_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(ExecuteStatement_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("ExecuteStatement_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class ExecuteStatement_argsStandardSchemeFactory implements SchemeFactory { - public ExecuteStatement_argsStandardScheme getScheme() { - return new ExecuteStatement_argsStandardScheme(); - } - } - - private static class ExecuteStatement_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TExecuteStatementReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class ExecuteStatement_argsTupleSchemeFactory implements SchemeFactory { - public ExecuteStatement_argsTupleScheme getScheme() { - return new ExecuteStatement_argsTupleScheme(); - } - } - - private static class ExecuteStatement_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TExecuteStatementReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class ExecuteStatement_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new ExecuteStatement_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new ExecuteStatement_resultTupleSchemeFactory()); - } - - private TExecuteStatementResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_result.class, metaDataMap); - } - - public ExecuteStatement_result() { - } - - public ExecuteStatement_result( - TExecuteStatementResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public ExecuteStatement_result(ExecuteStatement_result other) { - if (other.isSetSuccess()) { - this.success = new TExecuteStatementResp(other.success); - } - } - - public ExecuteStatement_result deepCopy() { - return new ExecuteStatement_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TExecuteStatementResp getSuccess() { - return this.success; - } - - public void setSuccess(TExecuteStatementResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TExecuteStatementResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof ExecuteStatement_result) - return this.equals((ExecuteStatement_result)that); - return false; - } - - public boolean equals(ExecuteStatement_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(ExecuteStatement_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("ExecuteStatement_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class ExecuteStatement_resultStandardSchemeFactory implements SchemeFactory { - public ExecuteStatement_resultStandardScheme getScheme() { - return new ExecuteStatement_resultStandardScheme(); - } - } - - private static class ExecuteStatement_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TExecuteStatementResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class ExecuteStatement_resultTupleSchemeFactory implements SchemeFactory { - public ExecuteStatement_resultTupleScheme getScheme() { - return new ExecuteStatement_resultTupleScheme(); - } - } - - private static class ExecuteStatement_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TExecuteStatementResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTypeInfo_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTypeInfo_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTypeInfo_argsTupleSchemeFactory()); - } - - private TGetTypeInfoReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_args.class, metaDataMap); - } - - public GetTypeInfo_args() { - } - - public GetTypeInfo_args( - TGetTypeInfoReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTypeInfo_args(GetTypeInfo_args other) { - if (other.isSetReq()) { - this.req = new TGetTypeInfoReq(other.req); - } - } - - public GetTypeInfo_args deepCopy() { - return new GetTypeInfo_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTypeInfoReq getReq() { - return this.req; - } - - public void setReq(TGetTypeInfoReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTypeInfoReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTypeInfo_args) - return this.equals((GetTypeInfo_args)that); - return false; - } - - public boolean equals(GetTypeInfo_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTypeInfo_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTypeInfo_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTypeInfo_argsStandardSchemeFactory implements SchemeFactory { - public GetTypeInfo_argsStandardScheme getScheme() { - return new GetTypeInfo_argsStandardScheme(); - } - } - - private static class GetTypeInfo_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTypeInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTypeInfo_argsTupleSchemeFactory implements SchemeFactory { - public GetTypeInfo_argsTupleScheme getScheme() { - return new GetTypeInfo_argsTupleScheme(); - } - } - - private static class GetTypeInfo_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTypeInfoReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTypeInfo_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTypeInfo_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTypeInfo_resultTupleSchemeFactory()); - } - - private TGetTypeInfoResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_result.class, metaDataMap); - } - - public GetTypeInfo_result() { - } - - public GetTypeInfo_result( - TGetTypeInfoResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTypeInfo_result(GetTypeInfo_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTypeInfoResp(other.success); - } - } - - public GetTypeInfo_result deepCopy() { - return new GetTypeInfo_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTypeInfoResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTypeInfoResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTypeInfoResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTypeInfo_result) - return this.equals((GetTypeInfo_result)that); - return false; - } - - public boolean equals(GetTypeInfo_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTypeInfo_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTypeInfo_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTypeInfo_resultStandardSchemeFactory implements SchemeFactory { - public GetTypeInfo_resultStandardScheme getScheme() { - return new GetTypeInfo_resultStandardScheme(); - } - } - - private static class GetTypeInfo_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTypeInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTypeInfo_resultTupleSchemeFactory implements SchemeFactory { - public GetTypeInfo_resultTupleScheme getScheme() { - return new GetTypeInfo_resultTupleScheme(); - } - } - - private static class GetTypeInfo_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTypeInfoResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetCatalogs_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCatalogs_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCatalogs_argsTupleSchemeFactory()); - } - - private TGetCatalogsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_args.class, metaDataMap); - } - - public GetCatalogs_args() { - } - - public GetCatalogs_args( - TGetCatalogsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetCatalogs_args(GetCatalogs_args other) { - if (other.isSetReq()) { - this.req = new TGetCatalogsReq(other.req); - } - } - - public GetCatalogs_args deepCopy() { - return new GetCatalogs_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetCatalogsReq getReq() { - return this.req; - } - - public void setReq(TGetCatalogsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetCatalogsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCatalogs_args) - return this.equals((GetCatalogs_args)that); - return false; - } - - public boolean equals(GetCatalogs_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetCatalogs_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCatalogs_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCatalogs_argsStandardSchemeFactory implements SchemeFactory { - public GetCatalogs_argsStandardScheme getScheme() { - return new GetCatalogs_argsStandardScheme(); - } - } - - private static class GetCatalogs_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetCatalogsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCatalogs_argsTupleSchemeFactory implements SchemeFactory { - public GetCatalogs_argsTupleScheme getScheme() { - return new GetCatalogs_argsTupleScheme(); - } - } - - private static class GetCatalogs_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetCatalogsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetCatalogs_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCatalogs_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCatalogs_resultTupleSchemeFactory()); - } - - private TGetCatalogsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_result.class, metaDataMap); - } - - public GetCatalogs_result() { - } - - public GetCatalogs_result( - TGetCatalogsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetCatalogs_result(GetCatalogs_result other) { - if (other.isSetSuccess()) { - this.success = new TGetCatalogsResp(other.success); - } - } - - public GetCatalogs_result deepCopy() { - return new GetCatalogs_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetCatalogsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetCatalogsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetCatalogsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCatalogs_result) - return this.equals((GetCatalogs_result)that); - return false; - } - - public boolean equals(GetCatalogs_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetCatalogs_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCatalogs_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCatalogs_resultStandardSchemeFactory implements SchemeFactory { - public GetCatalogs_resultStandardScheme getScheme() { - return new GetCatalogs_resultStandardScheme(); - } - } - - private static class GetCatalogs_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetCatalogsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCatalogs_resultTupleSchemeFactory implements SchemeFactory { - public GetCatalogs_resultTupleScheme getScheme() { - return new GetCatalogs_resultTupleScheme(); - } - } - - private static class GetCatalogs_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetCatalogsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetSchemas_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetSchemas_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetSchemas_argsTupleSchemeFactory()); - } - - private TGetSchemasReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_args.class, metaDataMap); - } - - public GetSchemas_args() { - } - - public GetSchemas_args( - TGetSchemasReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetSchemas_args(GetSchemas_args other) { - if (other.isSetReq()) { - this.req = new TGetSchemasReq(other.req); - } - } - - public GetSchemas_args deepCopy() { - return new GetSchemas_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetSchemasReq getReq() { - return this.req; - } - - public void setReq(TGetSchemasReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetSchemasReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetSchemas_args) - return this.equals((GetSchemas_args)that); - return false; - } - - public boolean equals(GetSchemas_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetSchemas_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetSchemas_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetSchemas_argsStandardSchemeFactory implements SchemeFactory { - public GetSchemas_argsStandardScheme getScheme() { - return new GetSchemas_argsStandardScheme(); - } - } - - private static class GetSchemas_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetSchemasReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetSchemas_argsTupleSchemeFactory implements SchemeFactory { - public GetSchemas_argsTupleScheme getScheme() { - return new GetSchemas_argsTupleScheme(); - } - } - - private static class GetSchemas_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetSchemasReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetSchemas_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetSchemas_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetSchemas_resultTupleSchemeFactory()); - } - - private TGetSchemasResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_result.class, metaDataMap); - } - - public GetSchemas_result() { - } - - public GetSchemas_result( - TGetSchemasResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetSchemas_result(GetSchemas_result other) { - if (other.isSetSuccess()) { - this.success = new TGetSchemasResp(other.success); - } - } - - public GetSchemas_result deepCopy() { - return new GetSchemas_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetSchemasResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetSchemasResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetSchemasResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetSchemas_result) - return this.equals((GetSchemas_result)that); - return false; - } - - public boolean equals(GetSchemas_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetSchemas_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetSchemas_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetSchemas_resultStandardSchemeFactory implements SchemeFactory { - public GetSchemas_resultStandardScheme getScheme() { - return new GetSchemas_resultStandardScheme(); - } - } - - private static class GetSchemas_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetSchemasResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetSchemas_resultTupleSchemeFactory implements SchemeFactory { - public GetSchemas_resultTupleScheme getScheme() { - return new GetSchemas_resultTupleScheme(); - } - } - - private static class GetSchemas_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetSchemasResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTables_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTables_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTables_argsTupleSchemeFactory()); - } - - private TGetTablesReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_args.class, metaDataMap); - } - - public GetTables_args() { - } - - public GetTables_args( - TGetTablesReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTables_args(GetTables_args other) { - if (other.isSetReq()) { - this.req = new TGetTablesReq(other.req); - } - } - - public GetTables_args deepCopy() { - return new GetTables_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTablesReq getReq() { - return this.req; - } - - public void setReq(TGetTablesReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTablesReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTables_args) - return this.equals((GetTables_args)that); - return false; - } - - public boolean equals(GetTables_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTables_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTables_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTables_argsStandardSchemeFactory implements SchemeFactory { - public GetTables_argsStandardScheme getScheme() { - return new GetTables_argsStandardScheme(); - } - } - - private static class GetTables_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTablesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTables_argsTupleSchemeFactory implements SchemeFactory { - public GetTables_argsTupleScheme getScheme() { - return new GetTables_argsTupleScheme(); - } - } - - private static class GetTables_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTablesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTables_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTables_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTables_resultTupleSchemeFactory()); - } - - private TGetTablesResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_result.class, metaDataMap); - } - - public GetTables_result() { - } - - public GetTables_result( - TGetTablesResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTables_result(GetTables_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTablesResp(other.success); - } - } - - public GetTables_result deepCopy() { - return new GetTables_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTablesResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTablesResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTablesResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTables_result) - return this.equals((GetTables_result)that); - return false; - } - - public boolean equals(GetTables_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTables_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTables_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTables_resultStandardSchemeFactory implements SchemeFactory { - public GetTables_resultStandardScheme getScheme() { - return new GetTables_resultStandardScheme(); - } - } - - private static class GetTables_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTablesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTables_resultTupleSchemeFactory implements SchemeFactory { - public GetTables_resultTupleScheme getScheme() { - return new GetTables_resultTupleScheme(); - } - } - - private static class GetTables_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTablesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetTableTypes_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTableTypes_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTableTypes_argsTupleSchemeFactory()); - } - - private TGetTableTypesReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_args.class, metaDataMap); - } - - public GetTableTypes_args() { - } - - public GetTableTypes_args( - TGetTableTypesReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetTableTypes_args(GetTableTypes_args other) { - if (other.isSetReq()) { - this.req = new TGetTableTypesReq(other.req); - } - } - - public GetTableTypes_args deepCopy() { - return new GetTableTypes_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetTableTypesReq getReq() { - return this.req; - } - - public void setReq(TGetTableTypesReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetTableTypesReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTableTypes_args) - return this.equals((GetTableTypes_args)that); - return false; - } - - public boolean equals(GetTableTypes_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTableTypes_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTableTypes_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTableTypes_argsStandardSchemeFactory implements SchemeFactory { - public GetTableTypes_argsStandardScheme getScheme() { - return new GetTableTypes_argsStandardScheme(); - } - } - - private static class GetTableTypes_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetTableTypesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTableTypes_argsTupleSchemeFactory implements SchemeFactory { - public GetTableTypes_argsTupleScheme getScheme() { - return new GetTableTypes_argsTupleScheme(); - } - } - - private static class GetTableTypes_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetTableTypesReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetTableTypes_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetTableTypes_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetTableTypes_resultTupleSchemeFactory()); - } - - private TGetTableTypesResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_result.class, metaDataMap); - } - - public GetTableTypes_result() { - } - - public GetTableTypes_result( - TGetTableTypesResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetTableTypes_result(GetTableTypes_result other) { - if (other.isSetSuccess()) { - this.success = new TGetTableTypesResp(other.success); - } - } - - public GetTableTypes_result deepCopy() { - return new GetTableTypes_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetTableTypesResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetTableTypesResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetTableTypesResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetTableTypes_result) - return this.equals((GetTableTypes_result)that); - return false; - } - - public boolean equals(GetTableTypes_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetTableTypes_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetTableTypes_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetTableTypes_resultStandardSchemeFactory implements SchemeFactory { - public GetTableTypes_resultStandardScheme getScheme() { - return new GetTableTypes_resultStandardScheme(); - } - } - - private static class GetTableTypes_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetTableTypesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetTableTypes_resultTupleSchemeFactory implements SchemeFactory { - public GetTableTypes_resultTupleScheme getScheme() { - return new GetTableTypes_resultTupleScheme(); - } - } - - private static class GetTableTypes_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetTableTypesResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetColumns_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetColumns_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetColumns_argsTupleSchemeFactory()); - } - - private TGetColumnsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_args.class, metaDataMap); - } - - public GetColumns_args() { - } - - public GetColumns_args( - TGetColumnsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetColumns_args(GetColumns_args other) { - if (other.isSetReq()) { - this.req = new TGetColumnsReq(other.req); - } - } - - public GetColumns_args deepCopy() { - return new GetColumns_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetColumnsReq getReq() { - return this.req; - } - - public void setReq(TGetColumnsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetColumnsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetColumns_args) - return this.equals((GetColumns_args)that); - return false; - } - - public boolean equals(GetColumns_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetColumns_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetColumns_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetColumns_argsStandardSchemeFactory implements SchemeFactory { - public GetColumns_argsStandardScheme getScheme() { - return new GetColumns_argsStandardScheme(); - } - } - - private static class GetColumns_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetColumnsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetColumns_argsTupleSchemeFactory implements SchemeFactory { - public GetColumns_argsTupleScheme getScheme() { - return new GetColumns_argsTupleScheme(); - } - } - - private static class GetColumns_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetColumnsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetColumns_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetColumns_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetColumns_resultTupleSchemeFactory()); - } - - private TGetColumnsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_result.class, metaDataMap); - } - - public GetColumns_result() { - } - - public GetColumns_result( - TGetColumnsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetColumns_result(GetColumns_result other) { - if (other.isSetSuccess()) { - this.success = new TGetColumnsResp(other.success); - } - } - - public GetColumns_result deepCopy() { - return new GetColumns_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetColumnsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetColumnsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetColumnsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetColumns_result) - return this.equals((GetColumns_result)that); - return false; - } - - public boolean equals(GetColumns_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetColumns_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetColumns_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetColumns_resultStandardSchemeFactory implements SchemeFactory { - public GetColumns_resultStandardScheme getScheme() { - return new GetColumns_resultStandardScheme(); - } - } - - private static class GetColumns_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetColumnsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetColumns_resultTupleSchemeFactory implements SchemeFactory { - public GetColumns_resultTupleScheme getScheme() { - return new GetColumns_resultTupleScheme(); - } - } - - private static class GetColumns_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetColumnsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetFunctions_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetFunctions_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetFunctions_argsTupleSchemeFactory()); - } - - private TGetFunctionsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_args.class, metaDataMap); - } - - public GetFunctions_args() { - } - - public GetFunctions_args( - TGetFunctionsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetFunctions_args(GetFunctions_args other) { - if (other.isSetReq()) { - this.req = new TGetFunctionsReq(other.req); - } - } - - public GetFunctions_args deepCopy() { - return new GetFunctions_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetFunctionsReq getReq() { - return this.req; - } - - public void setReq(TGetFunctionsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetFunctionsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetFunctions_args) - return this.equals((GetFunctions_args)that); - return false; - } - - public boolean equals(GetFunctions_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetFunctions_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetFunctions_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetFunctions_argsStandardSchemeFactory implements SchemeFactory { - public GetFunctions_argsStandardScheme getScheme() { - return new GetFunctions_argsStandardScheme(); - } - } - - private static class GetFunctions_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetFunctionsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetFunctions_argsTupleSchemeFactory implements SchemeFactory { - public GetFunctions_argsTupleScheme getScheme() { - return new GetFunctions_argsTupleScheme(); - } - } - - private static class GetFunctions_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetFunctionsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetFunctions_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetFunctions_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetFunctions_resultTupleSchemeFactory()); - } - - private TGetFunctionsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_result.class, metaDataMap); - } - - public GetFunctions_result() { - } - - public GetFunctions_result( - TGetFunctionsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetFunctions_result(GetFunctions_result other) { - if (other.isSetSuccess()) { - this.success = new TGetFunctionsResp(other.success); - } - } - - public GetFunctions_result deepCopy() { - return new GetFunctions_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetFunctionsResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetFunctionsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetFunctionsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetFunctions_result) - return this.equals((GetFunctions_result)that); - return false; - } - - public boolean equals(GetFunctions_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetFunctions_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetFunctions_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetFunctions_resultStandardSchemeFactory implements SchemeFactory { - public GetFunctions_resultStandardScheme getScheme() { - return new GetFunctions_resultStandardScheme(); - } - } - - private static class GetFunctions_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetFunctionsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetFunctions_resultTupleSchemeFactory implements SchemeFactory { - public GetFunctions_resultTupleScheme getScheme() { - return new GetFunctions_resultTupleScheme(); - } - } - - private static class GetFunctions_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetFunctionsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetPrimaryKeys_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetPrimaryKeys_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetPrimaryKeys_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetPrimaryKeys_argsTupleSchemeFactory()); - } - - private TGetPrimaryKeysReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetPrimaryKeysReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetPrimaryKeys_args.class, metaDataMap); - } - - public GetPrimaryKeys_args() { - } - - public GetPrimaryKeys_args( - TGetPrimaryKeysReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetPrimaryKeys_args(GetPrimaryKeys_args other) { - if (other.isSetReq()) { - this.req = new TGetPrimaryKeysReq(other.req); - } - } - - public GetPrimaryKeys_args deepCopy() { - return new GetPrimaryKeys_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetPrimaryKeysReq getReq() { - return this.req; - } - - public void setReq(TGetPrimaryKeysReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetPrimaryKeysReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetPrimaryKeys_args) - return this.equals((GetPrimaryKeys_args)that); - return false; - } - - public boolean equals(GetPrimaryKeys_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetPrimaryKeys_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetPrimaryKeys_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetPrimaryKeys_argsStandardSchemeFactory implements SchemeFactory { - public GetPrimaryKeys_argsStandardScheme getScheme() { - return new GetPrimaryKeys_argsStandardScheme(); - } - } - - private static class GetPrimaryKeys_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetPrimaryKeysReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetPrimaryKeys_argsTupleSchemeFactory implements SchemeFactory { - public GetPrimaryKeys_argsTupleScheme getScheme() { - return new GetPrimaryKeys_argsTupleScheme(); - } - } - - private static class GetPrimaryKeys_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetPrimaryKeysReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetPrimaryKeys_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetPrimaryKeys_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetPrimaryKeys_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetPrimaryKeys_resultTupleSchemeFactory()); - } - - private TGetPrimaryKeysResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetPrimaryKeysResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetPrimaryKeys_result.class, metaDataMap); - } - - public GetPrimaryKeys_result() { - } - - public GetPrimaryKeys_result( - TGetPrimaryKeysResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetPrimaryKeys_result(GetPrimaryKeys_result other) { - if (other.isSetSuccess()) { - this.success = new TGetPrimaryKeysResp(other.success); - } - } - - public GetPrimaryKeys_result deepCopy() { - return new GetPrimaryKeys_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetPrimaryKeysResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetPrimaryKeysResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetPrimaryKeysResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetPrimaryKeys_result) - return this.equals((GetPrimaryKeys_result)that); - return false; - } - - public boolean equals(GetPrimaryKeys_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetPrimaryKeys_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetPrimaryKeys_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetPrimaryKeys_resultStandardSchemeFactory implements SchemeFactory { - public GetPrimaryKeys_resultStandardScheme getScheme() { - return new GetPrimaryKeys_resultStandardScheme(); - } - } - - private static class GetPrimaryKeys_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetPrimaryKeysResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetPrimaryKeys_resultTupleSchemeFactory implements SchemeFactory { - public GetPrimaryKeys_resultTupleScheme getScheme() { - return new GetPrimaryKeys_resultTupleScheme(); - } - } - - private static class GetPrimaryKeys_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetPrimaryKeysResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetCrossReference_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCrossReference_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCrossReference_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCrossReference_argsTupleSchemeFactory()); - } - - private TGetCrossReferenceReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCrossReferenceReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCrossReference_args.class, metaDataMap); - } - - public GetCrossReference_args() { - } - - public GetCrossReference_args( - TGetCrossReferenceReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetCrossReference_args(GetCrossReference_args other) { - if (other.isSetReq()) { - this.req = new TGetCrossReferenceReq(other.req); - } - } - - public GetCrossReference_args deepCopy() { - return new GetCrossReference_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetCrossReferenceReq getReq() { - return this.req; - } - - public void setReq(TGetCrossReferenceReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetCrossReferenceReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCrossReference_args) - return this.equals((GetCrossReference_args)that); - return false; - } - - public boolean equals(GetCrossReference_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetCrossReference_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCrossReference_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCrossReference_argsStandardSchemeFactory implements SchemeFactory { - public GetCrossReference_argsStandardScheme getScheme() { - return new GetCrossReference_argsStandardScheme(); - } - } - - private static class GetCrossReference_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCrossReference_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetCrossReferenceReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCrossReference_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCrossReference_argsTupleSchemeFactory implements SchemeFactory { - public GetCrossReference_argsTupleScheme getScheme() { - return new GetCrossReference_argsTupleScheme(); - } - } - - private static class GetCrossReference_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetCrossReferenceReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetCrossReference_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCrossReference_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetCrossReference_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetCrossReference_resultTupleSchemeFactory()); - } - - private TGetCrossReferenceResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCrossReferenceResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCrossReference_result.class, metaDataMap); - } - - public GetCrossReference_result() { - } - - public GetCrossReference_result( - TGetCrossReferenceResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetCrossReference_result(GetCrossReference_result other) { - if (other.isSetSuccess()) { - this.success = new TGetCrossReferenceResp(other.success); - } - } - - public GetCrossReference_result deepCopy() { - return new GetCrossReference_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetCrossReferenceResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetCrossReferenceResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetCrossReferenceResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetCrossReference_result) - return this.equals((GetCrossReference_result)that); - return false; - } - - public boolean equals(GetCrossReference_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetCrossReference_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetCrossReference_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetCrossReference_resultStandardSchemeFactory implements SchemeFactory { - public GetCrossReference_resultStandardScheme getScheme() { - return new GetCrossReference_resultStandardScheme(); - } - } - - private static class GetCrossReference_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetCrossReference_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetCrossReferenceResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetCrossReference_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetCrossReference_resultTupleSchemeFactory implements SchemeFactory { - public GetCrossReference_resultTupleScheme getScheme() { - return new GetCrossReference_resultTupleScheme(); - } - } - - private static class GetCrossReference_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetCrossReferenceResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetOperationStatus_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetOperationStatus_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetOperationStatus_argsTupleSchemeFactory()); - } - - private TGetOperationStatusReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_args.class, metaDataMap); - } - - public GetOperationStatus_args() { - } - - public GetOperationStatus_args( - TGetOperationStatusReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetOperationStatus_args(GetOperationStatus_args other) { - if (other.isSetReq()) { - this.req = new TGetOperationStatusReq(other.req); - } - } - - public GetOperationStatus_args deepCopy() { - return new GetOperationStatus_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetOperationStatusReq getReq() { - return this.req; - } - - public void setReq(TGetOperationStatusReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetOperationStatusReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetOperationStatus_args) - return this.equals((GetOperationStatus_args)that); - return false; - } - - public boolean equals(GetOperationStatus_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetOperationStatus_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetOperationStatus_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetOperationStatus_argsStandardSchemeFactory implements SchemeFactory { - public GetOperationStatus_argsStandardScheme getScheme() { - return new GetOperationStatus_argsStandardScheme(); - } - } - - private static class GetOperationStatus_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetOperationStatusReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetOperationStatus_argsTupleSchemeFactory implements SchemeFactory { - public GetOperationStatus_argsTupleScheme getScheme() { - return new GetOperationStatus_argsTupleScheme(); - } - } - - private static class GetOperationStatus_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetOperationStatusReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetOperationStatus_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetOperationStatus_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetOperationStatus_resultTupleSchemeFactory()); - } - - private TGetOperationStatusResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_result.class, metaDataMap); - } - - public GetOperationStatus_result() { - } - - public GetOperationStatus_result( - TGetOperationStatusResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetOperationStatus_result(GetOperationStatus_result other) { - if (other.isSetSuccess()) { - this.success = new TGetOperationStatusResp(other.success); - } - } - - public GetOperationStatus_result deepCopy() { - return new GetOperationStatus_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetOperationStatusResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetOperationStatusResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetOperationStatusResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetOperationStatus_result) - return this.equals((GetOperationStatus_result)that); - return false; - } - - public boolean equals(GetOperationStatus_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetOperationStatus_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetOperationStatus_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetOperationStatus_resultStandardSchemeFactory implements SchemeFactory { - public GetOperationStatus_resultStandardScheme getScheme() { - return new GetOperationStatus_resultStandardScheme(); - } - } - - private static class GetOperationStatus_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetOperationStatusResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetOperationStatus_resultTupleSchemeFactory implements SchemeFactory { - public GetOperationStatus_resultTupleScheme getScheme() { - return new GetOperationStatus_resultTupleScheme(); - } - } - - private static class GetOperationStatus_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetOperationStatusResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CancelOperation_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelOperation_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelOperation_argsTupleSchemeFactory()); - } - - private TCancelOperationReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_args.class, metaDataMap); - } - - public CancelOperation_args() { - } - - public CancelOperation_args( - TCancelOperationReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CancelOperation_args(CancelOperation_args other) { - if (other.isSetReq()) { - this.req = new TCancelOperationReq(other.req); - } - } - - public CancelOperation_args deepCopy() { - return new CancelOperation_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCancelOperationReq getReq() { - return this.req; - } - - public void setReq(TCancelOperationReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCancelOperationReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelOperation_args) - return this.equals((CancelOperation_args)that); - return false; - } - - public boolean equals(CancelOperation_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(CancelOperation_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelOperation_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelOperation_argsStandardSchemeFactory implements SchemeFactory { - public CancelOperation_argsStandardScheme getScheme() { - return new CancelOperation_argsStandardScheme(); - } - } - - private static class CancelOperation_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCancelOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelOperation_argsTupleSchemeFactory implements SchemeFactory { - public CancelOperation_argsTupleScheme getScheme() { - return new CancelOperation_argsTupleScheme(); - } - } - - private static class CancelOperation_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCancelOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CancelOperation_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelOperation_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelOperation_resultTupleSchemeFactory()); - } - - private TCancelOperationResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_result.class, metaDataMap); - } - - public CancelOperation_result() { - } - - public CancelOperation_result( - TCancelOperationResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CancelOperation_result(CancelOperation_result other) { - if (other.isSetSuccess()) { - this.success = new TCancelOperationResp(other.success); - } - } - - public CancelOperation_result deepCopy() { - return new CancelOperation_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCancelOperationResp getSuccess() { - return this.success; - } - - public void setSuccess(TCancelOperationResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCancelOperationResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelOperation_result) - return this.equals((CancelOperation_result)that); - return false; - } - - public boolean equals(CancelOperation_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(CancelOperation_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelOperation_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelOperation_resultStandardSchemeFactory implements SchemeFactory { - public CancelOperation_resultStandardScheme getScheme() { - return new CancelOperation_resultStandardScheme(); - } - } - - private static class CancelOperation_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCancelOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelOperation_resultTupleSchemeFactory implements SchemeFactory { - public CancelOperation_resultTupleScheme getScheme() { - return new CancelOperation_resultTupleScheme(); - } - } - - private static class CancelOperation_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCancelOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CloseOperation_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseOperation_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseOperation_argsTupleSchemeFactory()); - } - - private TCloseOperationReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_args.class, metaDataMap); - } - - public CloseOperation_args() { - } - - public CloseOperation_args( - TCloseOperationReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CloseOperation_args(CloseOperation_args other) { - if (other.isSetReq()) { - this.req = new TCloseOperationReq(other.req); - } - } - - public CloseOperation_args deepCopy() { - return new CloseOperation_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCloseOperationReq getReq() { - return this.req; - } - - public void setReq(TCloseOperationReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCloseOperationReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseOperation_args) - return this.equals((CloseOperation_args)that); - return false; - } - - public boolean equals(CloseOperation_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(CloseOperation_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseOperation_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseOperation_argsStandardSchemeFactory implements SchemeFactory { - public CloseOperation_argsStandardScheme getScheme() { - return new CloseOperation_argsStandardScheme(); - } - } - - private static class CloseOperation_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCloseOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseOperation_argsTupleSchemeFactory implements SchemeFactory { - public CloseOperation_argsTupleScheme getScheme() { - return new CloseOperation_argsTupleScheme(); - } - } - - private static class CloseOperation_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCloseOperationReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CloseOperation_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CloseOperation_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CloseOperation_resultTupleSchemeFactory()); - } - - private TCloseOperationResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_result.class, metaDataMap); - } - - public CloseOperation_result() { - } - - public CloseOperation_result( - TCloseOperationResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CloseOperation_result(CloseOperation_result other) { - if (other.isSetSuccess()) { - this.success = new TCloseOperationResp(other.success); - } - } - - public CloseOperation_result deepCopy() { - return new CloseOperation_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCloseOperationResp getSuccess() { - return this.success; - } - - public void setSuccess(TCloseOperationResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCloseOperationResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CloseOperation_result) - return this.equals((CloseOperation_result)that); - return false; - } - - public boolean equals(CloseOperation_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(CloseOperation_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CloseOperation_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CloseOperation_resultStandardSchemeFactory implements SchemeFactory { - public CloseOperation_resultStandardScheme getScheme() { - return new CloseOperation_resultStandardScheme(); - } - } - - private static class CloseOperation_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCloseOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CloseOperation_resultTupleSchemeFactory implements SchemeFactory { - public CloseOperation_resultTupleScheme getScheme() { - return new CloseOperation_resultTupleScheme(); - } - } - - private static class CloseOperation_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCloseOperationResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetResultSetMetadata_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetResultSetMetadata_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetResultSetMetadata_argsTupleSchemeFactory()); - } - - private TGetResultSetMetadataReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_args.class, metaDataMap); - } - - public GetResultSetMetadata_args() { - } - - public GetResultSetMetadata_args( - TGetResultSetMetadataReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetResultSetMetadata_args(GetResultSetMetadata_args other) { - if (other.isSetReq()) { - this.req = new TGetResultSetMetadataReq(other.req); - } - } - - public GetResultSetMetadata_args deepCopy() { - return new GetResultSetMetadata_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetResultSetMetadataReq getReq() { - return this.req; - } - - public void setReq(TGetResultSetMetadataReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetResultSetMetadataReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetResultSetMetadata_args) - return this.equals((GetResultSetMetadata_args)that); - return false; - } - - public boolean equals(GetResultSetMetadata_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetResultSetMetadata_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetResultSetMetadata_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetResultSetMetadata_argsStandardSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_argsStandardScheme getScheme() { - return new GetResultSetMetadata_argsStandardScheme(); - } - } - - private static class GetResultSetMetadata_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetResultSetMetadataReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetResultSetMetadata_argsTupleSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_argsTupleScheme getScheme() { - return new GetResultSetMetadata_argsTupleScheme(); - } - } - - private static class GetResultSetMetadata_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetResultSetMetadataReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetResultSetMetadata_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetResultSetMetadata_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetResultSetMetadata_resultTupleSchemeFactory()); - } - - private TGetResultSetMetadataResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_result.class, metaDataMap); - } - - public GetResultSetMetadata_result() { - } - - public GetResultSetMetadata_result( - TGetResultSetMetadataResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetResultSetMetadata_result(GetResultSetMetadata_result other) { - if (other.isSetSuccess()) { - this.success = new TGetResultSetMetadataResp(other.success); - } - } - - public GetResultSetMetadata_result deepCopy() { - return new GetResultSetMetadata_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetResultSetMetadataResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetResultSetMetadataResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetResultSetMetadataResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetResultSetMetadata_result) - return this.equals((GetResultSetMetadata_result)that); - return false; - } - - public boolean equals(GetResultSetMetadata_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetResultSetMetadata_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetResultSetMetadata_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetResultSetMetadata_resultStandardSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_resultStandardScheme getScheme() { - return new GetResultSetMetadata_resultStandardScheme(); - } - } - - private static class GetResultSetMetadata_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetResultSetMetadataResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetResultSetMetadata_resultTupleSchemeFactory implements SchemeFactory { - public GetResultSetMetadata_resultTupleScheme getScheme() { - return new GetResultSetMetadata_resultTupleScheme(); - } - } - - private static class GetResultSetMetadata_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetResultSetMetadataResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class FetchResults_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new FetchResults_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new FetchResults_argsTupleSchemeFactory()); - } - - private TFetchResultsReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_args.class, metaDataMap); - } - - public FetchResults_args() { - } - - public FetchResults_args( - TFetchResultsReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public FetchResults_args(FetchResults_args other) { - if (other.isSetReq()) { - this.req = new TFetchResultsReq(other.req); - } - } - - public FetchResults_args deepCopy() { - return new FetchResults_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TFetchResultsReq getReq() { - return this.req; - } - - public void setReq(TFetchResultsReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TFetchResultsReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof FetchResults_args) - return this.equals((FetchResults_args)that); - return false; - } - - public boolean equals(FetchResults_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(FetchResults_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("FetchResults_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class FetchResults_argsStandardSchemeFactory implements SchemeFactory { - public FetchResults_argsStandardScheme getScheme() { - return new FetchResults_argsStandardScheme(); - } - } - - private static class FetchResults_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TFetchResultsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class FetchResults_argsTupleSchemeFactory implements SchemeFactory { - public FetchResults_argsTupleScheme getScheme() { - return new FetchResults_argsTupleScheme(); - } - } - - private static class FetchResults_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TFetchResultsReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class FetchResults_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new FetchResults_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new FetchResults_resultTupleSchemeFactory()); - } - - private TFetchResultsResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_result.class, metaDataMap); - } - - public FetchResults_result() { - } - - public FetchResults_result( - TFetchResultsResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public FetchResults_result(FetchResults_result other) { - if (other.isSetSuccess()) { - this.success = new TFetchResultsResp(other.success); - } - } - - public FetchResults_result deepCopy() { - return new FetchResults_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TFetchResultsResp getSuccess() { - return this.success; - } - - public void setSuccess(TFetchResultsResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TFetchResultsResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof FetchResults_result) - return this.equals((FetchResults_result)that); - return false; - } - - public boolean equals(FetchResults_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(FetchResults_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("FetchResults_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class FetchResults_resultStandardSchemeFactory implements SchemeFactory { - public FetchResults_resultStandardScheme getScheme() { - return new FetchResults_resultStandardScheme(); - } - } - - private static class FetchResults_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TFetchResultsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class FetchResults_resultTupleSchemeFactory implements SchemeFactory { - public FetchResults_resultTupleScheme getScheme() { - return new FetchResults_resultTupleScheme(); - } - } - - private static class FetchResults_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TFetchResultsResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class GetDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetDelegationToken_argsTupleSchemeFactory()); - } - - private TGetDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_args.class, metaDataMap); - } - - public GetDelegationToken_args() { - } - - public GetDelegationToken_args( - TGetDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public GetDelegationToken_args(GetDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TGetDelegationTokenReq(other.req); - } - } - - public GetDelegationToken_args deepCopy() { - return new GetDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TGetDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TGetDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TGetDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetDelegationToken_args) - return this.equals((GetDelegationToken_args)that); - return false; - } - - public boolean equals(GetDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(GetDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public GetDelegationToken_argsStandardScheme getScheme() { - return new GetDelegationToken_argsStandardScheme(); - } - } - - private static class GetDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TGetDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public GetDelegationToken_argsTupleScheme getScheme() { - return new GetDelegationToken_argsTupleScheme(); - } - } - - private static class GetDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TGetDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class GetDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new GetDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new GetDelegationToken_resultTupleSchemeFactory()); - } - - private TGetDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_result.class, metaDataMap); - } - - public GetDelegationToken_result() { - } - - public GetDelegationToken_result( - TGetDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public GetDelegationToken_result(GetDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TGetDelegationTokenResp(other.success); - } - } - - public GetDelegationToken_result deepCopy() { - return new GetDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TGetDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TGetDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TGetDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof GetDelegationToken_result) - return this.equals((GetDelegationToken_result)that); - return false; - } - - public boolean equals(GetDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(GetDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("GetDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class GetDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public GetDelegationToken_resultStandardScheme getScheme() { - return new GetDelegationToken_resultStandardScheme(); - } - } - - private static class GetDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TGetDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class GetDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public GetDelegationToken_resultTupleScheme getScheme() { - return new GetDelegationToken_resultTupleScheme(); - } - } - - private static class GetDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TGetDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class CancelDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelDelegationToken_argsTupleSchemeFactory()); - } - - private TCancelDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_args.class, metaDataMap); - } - - public CancelDelegationToken_args() { - } - - public CancelDelegationToken_args( - TCancelDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public CancelDelegationToken_args(CancelDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TCancelDelegationTokenReq(other.req); - } - } - - public CancelDelegationToken_args deepCopy() { - return new CancelDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TCancelDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TCancelDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TCancelDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelDelegationToken_args) - return this.equals((CancelDelegationToken_args)that); - return false; - } - - public boolean equals(CancelDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(CancelDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public CancelDelegationToken_argsStandardScheme getScheme() { - return new CancelDelegationToken_argsStandardScheme(); - } - } - - private static class CancelDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TCancelDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public CancelDelegationToken_argsTupleScheme getScheme() { - return new CancelDelegationToken_argsTupleScheme(); - } - } - - private static class CancelDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TCancelDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class CancelDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new CancelDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new CancelDelegationToken_resultTupleSchemeFactory()); - } - - private TCancelDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_result.class, metaDataMap); - } - - public CancelDelegationToken_result() { - } - - public CancelDelegationToken_result( - TCancelDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public CancelDelegationToken_result(CancelDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TCancelDelegationTokenResp(other.success); - } - } - - public CancelDelegationToken_result deepCopy() { - return new CancelDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TCancelDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TCancelDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TCancelDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof CancelDelegationToken_result) - return this.equals((CancelDelegationToken_result)that); - return false; - } - - public boolean equals(CancelDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(CancelDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("CancelDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class CancelDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public CancelDelegationToken_resultStandardScheme getScheme() { - return new CancelDelegationToken_resultStandardScheme(); - } - } - - private static class CancelDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TCancelDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class CancelDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public CancelDelegationToken_resultTupleScheme getScheme() { - return new CancelDelegationToken_resultTupleScheme(); - } - } - - private static class CancelDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TCancelDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - - public static class RenewDelegationToken_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_args"); - - private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new RenewDelegationToken_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new RenewDelegationToken_argsTupleSchemeFactory()); - } - - private TRenewDelegationTokenReq req; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REQ((short)1, "req"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // REQ - return REQ; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenReq.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_args.class, metaDataMap); - } - - public RenewDelegationToken_args() { - } - - public RenewDelegationToken_args( - TRenewDelegationTokenReq req) - { - this(); - this.req = req; - } - - /** - * Performs a deep copy on other. - */ - public RenewDelegationToken_args(RenewDelegationToken_args other) { - if (other.isSetReq()) { - this.req = new TRenewDelegationTokenReq(other.req); - } - } - - public RenewDelegationToken_args deepCopy() { - return new RenewDelegationToken_args(this); - } - - @Override - public void clear() { - this.req = null; - } - - public TRenewDelegationTokenReq getReq() { - return this.req; - } - - public void setReq(TRenewDelegationTokenReq req) { - this.req = req; - } - - public void unsetReq() { - this.req = null; - } - - /** Returns true if field req is set (has been assigned a value) and false otherwise */ - public boolean isSetReq() { - return this.req != null; - } - - public void setReqIsSet(boolean value) { - if (!value) { - this.req = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REQ: - if (value == null) { - unsetReq(); - } else { - setReq((TRenewDelegationTokenReq)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REQ: - return getReq(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REQ: - return isSetReq(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof RenewDelegationToken_args) - return this.equals((RenewDelegationToken_args)that); - return false; - } - - public boolean equals(RenewDelegationToken_args that) { - if (that == null) - return false; - - boolean this_present_req = true && this.isSetReq(); - boolean that_present_req = true && that.isSetReq(); - if (this_present_req || that_present_req) { - if (!(this_present_req && that_present_req)) - return false; - if (!this.req.equals(that.req)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_req = true && (isSetReq()); - list.add(present_req); - if (present_req) - list.add(req); - - return list.hashCode(); - } - - @Override - public int compareTo(RenewDelegationToken_args other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetReq()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("RenewDelegationToken_args("); - boolean first = true; - - sb.append("req:"); - if (this.req == null) { - sb.append("null"); - } else { - sb.append(this.req); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (req != null) { - req.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class RenewDelegationToken_argsStandardSchemeFactory implements SchemeFactory { - public RenewDelegationToken_argsStandardScheme getScheme() { - return new RenewDelegationToken_argsStandardScheme(); - } - } - - private static class RenewDelegationToken_argsStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // REQ - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.req = new TRenewDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.req != null) { - oprot.writeFieldBegin(REQ_FIELD_DESC); - struct.req.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class RenewDelegationToken_argsTupleSchemeFactory implements SchemeFactory { - public RenewDelegationToken_argsTupleScheme getScheme() { - return new RenewDelegationToken_argsTupleScheme(); - } - } - - private static class RenewDelegationToken_argsTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetReq()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetReq()) { - struct.req.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.req = new TRenewDelegationTokenReq(); - struct.req.read(iprot); - struct.setReqIsSet(true); - } - } - } - - } - - public static class RenewDelegationToken_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_result"); - - private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new RenewDelegationToken_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new RenewDelegationToken_resultTupleSchemeFactory()); - } - - private TRenewDelegationTokenResp success; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SUCCESS((short)0, "success"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 0: // SUCCESS - return SUCCESS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenResp.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_result.class, metaDataMap); - } - - public RenewDelegationToken_result() { - } - - public RenewDelegationToken_result( - TRenewDelegationTokenResp success) - { - this(); - this.success = success; - } - - /** - * Performs a deep copy on other. - */ - public RenewDelegationToken_result(RenewDelegationToken_result other) { - if (other.isSetSuccess()) { - this.success = new TRenewDelegationTokenResp(other.success); - } - } - - public RenewDelegationToken_result deepCopy() { - return new RenewDelegationToken_result(this); - } - - @Override - public void clear() { - this.success = null; - } - - public TRenewDelegationTokenResp getSuccess() { - return this.success; - } - - public void setSuccess(TRenewDelegationTokenResp success) { - this.success = success; - } - - public void unsetSuccess() { - this.success = null; - } - - /** Returns true if field success is set (has been assigned a value) and false otherwise */ - public boolean isSetSuccess() { - return this.success != null; - } - - public void setSuccessIsSet(boolean value) { - if (!value) { - this.success = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SUCCESS: - if (value == null) { - unsetSuccess(); - } else { - setSuccess((TRenewDelegationTokenResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SUCCESS: - return getSuccess(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SUCCESS: - return isSetSuccess(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof RenewDelegationToken_result) - return this.equals((RenewDelegationToken_result)that); - return false; - } - - public boolean equals(RenewDelegationToken_result that) { - if (that == null) - return false; - - boolean this_present_success = true && this.isSetSuccess(); - boolean that_present_success = true && that.isSetSuccess(); - if (this_present_success || that_present_success) { - if (!(this_present_success && that_present_success)) - return false; - if (!this.success.equals(that.success)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_success = true && (isSetSuccess()); - list.add(present_success); - if (present_success) - list.add(success); - - return list.hashCode(); - } - - @Override - public int compareTo(RenewDelegationToken_result other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSuccess()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("RenewDelegationToken_result("); - boolean first = true; - - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - if (success != null) { - success.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class RenewDelegationToken_resultStandardSchemeFactory implements SchemeFactory { - public RenewDelegationToken_resultStandardScheme getScheme() { - return new RenewDelegationToken_resultStandardScheme(); - } - } - - private static class RenewDelegationToken_resultStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.success = new TRenewDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - struct.success.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class RenewDelegationToken_resultTupleSchemeFactory implements SchemeFactory { - public RenewDelegationToken_resultTupleScheme getScheme() { - return new RenewDelegationToken_resultTupleScheme(); - } - } - - private static class RenewDelegationToken_resultTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSuccess()) { - struct.success.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.success = new TRenewDelegationTokenResp(); - struct.success.read(iprot); - struct.setSuccessIsSet(true); - } - } - } - - } - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java deleted file mode 100644 index 930bed731ed2a..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TCLIServiceConstants { - - public static final Set PRIMITIVE_TYPES = new HashSet(); - static { - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BOOLEAN_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.TINYINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.SMALLINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BIGINT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.FLOAT_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DOUBLE_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.STRING_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.TIMESTAMP_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BINARY_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DECIMAL_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.NULL_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DATE_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.VARCHAR_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.CHAR_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE); - PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE); - } - - public static final Set COMPLEX_TYPES = new HashSet(); - static { - COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.STRUCT_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.UNION_TYPE); - COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.USER_DEFINED_TYPE); - } - - public static final Set COLLECTION_TYPES = new HashSet(); - static { - COLLECTION_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE); - COLLECTION_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE); - } - - public static final Map TYPE_NAMES = new HashMap(); - static { - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BOOLEAN_TYPE, "BOOLEAN"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.TINYINT_TYPE, "TINYINT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.SMALLINT_TYPE, "SMALLINT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INT_TYPE, "INT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BIGINT_TYPE, "BIGINT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.FLOAT_TYPE, "FLOAT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DOUBLE_TYPE, "DOUBLE"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.STRING_TYPE, "STRING"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.TIMESTAMP_TYPE, "TIMESTAMP"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BINARY_TYPE, "BINARY"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE, "ARRAY"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE, "MAP"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.STRUCT_TYPE, "STRUCT"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.UNION_TYPE, "UNIONTYPE"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DECIMAL_TYPE, "DECIMAL"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.NULL_TYPE, "NULL"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DATE_TYPE, "DATE"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.CHAR_TYPE, "CHAR"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH"); - TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME"); - } - - public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"; - - public static final String PRECISION = "precision"; - - public static final String SCALE = "scale"; - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java deleted file mode 100644 index a7d4e7de1f60d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java +++ /dev/null @@ -1,495 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCancelDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String delegationToken; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenReq.class, metaDataMap); - } - - public TCancelDelegationTokenReq() { - } - - public TCancelDelegationTokenReq( - TSessionHandle sessionHandle, - String delegationToken) - { - this(); - this.sessionHandle = sessionHandle; - this.delegationToken = delegationToken; - } - - /** - * Performs a deep copy on other. - */ - public TCancelDelegationTokenReq(TCancelDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TCancelDelegationTokenReq deepCopy() { - return new TCancelDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.delegationToken = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelDelegationTokenReq) - return this.equals((TCancelDelegationTokenReq)that); - return false; - } - - public boolean equals(TCancelDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_delegationToken = true && (isSetDelegationToken()); - list.add(present_delegationToken); - if (present_delegationToken) - list.add(delegationToken); - - return list.hashCode(); - } - - @Override - public int compareTo(TCancelDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetDelegationToken()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenReqStandardScheme getScheme() { - return new TCancelDelegationTokenReqStandardScheme(); - } - } - - private static class TCancelDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenReqTupleScheme getScheme() { - return new TCancelDelegationTokenReqTupleScheme(); - } - } - - private static class TCancelDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.delegationToken); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java deleted file mode 100644 index 611e92ca2af30..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCancelDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenResp.class, metaDataMap); - } - - public TCancelDelegationTokenResp() { - } - - public TCancelDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCancelDelegationTokenResp(TCancelDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCancelDelegationTokenResp deepCopy() { - return new TCancelDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelDelegationTokenResp) - return this.equals((TCancelDelegationTokenResp)that); - return false; - } - - public boolean equals(TCancelDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - return list.hashCode(); - } - - @Override - public int compareTo(TCancelDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenRespStandardScheme getScheme() { - return new TCancelDelegationTokenRespStandardScheme(); - } - } - - private static class TCancelDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TCancelDelegationTokenRespTupleScheme getScheme() { - return new TCancelDelegationTokenRespTupleScheme(); - } - } - - private static class TCancelDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java deleted file mode 100644 index 4076c573fafb7..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCancelOperationReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelOperationReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelOperationReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationReq.class, metaDataMap); - } - - public TCancelOperationReq() { - } - - public TCancelOperationReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCancelOperationReq(TCancelOperationReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TCancelOperationReq deepCopy() { - return new TCancelOperationReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelOperationReq) - return this.equals((TCancelOperationReq)that); - return false; - } - - public boolean equals(TCancelOperationReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TCancelOperationReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelOperationReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelOperationReqStandardSchemeFactory implements SchemeFactory { - public TCancelOperationReqStandardScheme getScheme() { - return new TCancelOperationReqStandardScheme(); - } - } - - private static class TCancelOperationReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelOperationReqTupleSchemeFactory implements SchemeFactory { - public TCancelOperationReqTupleScheme getScheme() { - return new TCancelOperationReqTupleScheme(); - } - } - - private static class TCancelOperationReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java deleted file mode 100644 index 7bcc765c85daa..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCancelOperationResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCancelOperationRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCancelOperationRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationResp.class, metaDataMap); - } - - public TCancelOperationResp() { - } - - public TCancelOperationResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCancelOperationResp(TCancelOperationResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCancelOperationResp deepCopy() { - return new TCancelOperationResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCancelOperationResp) - return this.equals((TCancelOperationResp)that); - return false; - } - - public boolean equals(TCancelOperationResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - return list.hashCode(); - } - - @Override - public int compareTo(TCancelOperationResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCancelOperationResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCancelOperationRespStandardSchemeFactory implements SchemeFactory { - public TCancelOperationRespStandardScheme getScheme() { - return new TCancelOperationRespStandardScheme(); - } - } - - private static class TCancelOperationRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCancelOperationRespTupleSchemeFactory implements SchemeFactory { - public TCancelOperationRespTupleScheme getScheme() { - return new TCancelOperationRespTupleScheme(); - } - } - - private static class TCancelOperationRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java deleted file mode 100644 index 47a6b8329c05b..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCloseOperationReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseOperationReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseOperationReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationReq.class, metaDataMap); - } - - public TCloseOperationReq() { - } - - public TCloseOperationReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCloseOperationReq(TCloseOperationReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TCloseOperationReq deepCopy() { - return new TCloseOperationReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseOperationReq) - return this.equals((TCloseOperationReq)that); - return false; - } - - public boolean equals(TCloseOperationReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TCloseOperationReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseOperationReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseOperationReqStandardSchemeFactory implements SchemeFactory { - public TCloseOperationReqStandardScheme getScheme() { - return new TCloseOperationReqStandardScheme(); - } - } - - private static class TCloseOperationReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseOperationReqTupleSchemeFactory implements SchemeFactory { - public TCloseOperationReqTupleScheme getScheme() { - return new TCloseOperationReqTupleScheme(); - } - } - - private static class TCloseOperationReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java deleted file mode 100644 index 0860a2b1c5bac..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCloseOperationResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseOperationRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseOperationRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationResp.class, metaDataMap); - } - - public TCloseOperationResp() { - } - - public TCloseOperationResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCloseOperationResp(TCloseOperationResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCloseOperationResp deepCopy() { - return new TCloseOperationResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseOperationResp) - return this.equals((TCloseOperationResp)that); - return false; - } - - public boolean equals(TCloseOperationResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - return list.hashCode(); - } - - @Override - public int compareTo(TCloseOperationResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseOperationResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseOperationRespStandardSchemeFactory implements SchemeFactory { - public TCloseOperationRespStandardScheme getScheme() { - return new TCloseOperationRespStandardScheme(); - } - } - - private static class TCloseOperationRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseOperationRespTupleSchemeFactory implements SchemeFactory { - public TCloseOperationRespTupleScheme getScheme() { - return new TCloseOperationRespTupleScheme(); - } - } - - private static class TCloseOperationRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java deleted file mode 100644 index 43ee87f487a67..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCloseSessionReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseSessionReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseSessionReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionReq.class, metaDataMap); - } - - public TCloseSessionReq() { - } - - public TCloseSessionReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TCloseSessionReq(TCloseSessionReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TCloseSessionReq deepCopy() { - return new TCloseSessionReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseSessionReq) - return this.equals((TCloseSessionReq)that); - return false; - } - - public boolean equals(TCloseSessionReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TCloseSessionReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseSessionReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseSessionReqStandardSchemeFactory implements SchemeFactory { - public TCloseSessionReqStandardScheme getScheme() { - return new TCloseSessionReqStandardScheme(); - } - } - - private static class TCloseSessionReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseSessionReqTupleSchemeFactory implements SchemeFactory { - public TCloseSessionReqTupleScheme getScheme() { - return new TCloseSessionReqTupleScheme(); - } - } - - private static class TCloseSessionReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java deleted file mode 100644 index 38f82ac8d3cd2..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TCloseSessionResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TCloseSessionRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TCloseSessionRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionResp.class, metaDataMap); - } - - public TCloseSessionResp() { - } - - public TCloseSessionResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TCloseSessionResp(TCloseSessionResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TCloseSessionResp deepCopy() { - return new TCloseSessionResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TCloseSessionResp) - return this.equals((TCloseSessionResp)that); - return false; - } - - public boolean equals(TCloseSessionResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - return list.hashCode(); - } - - @Override - public int compareTo(TCloseSessionResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TCloseSessionResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TCloseSessionRespStandardSchemeFactory implements SchemeFactory { - public TCloseSessionRespStandardScheme getScheme() { - return new TCloseSessionRespStandardScheme(); - } - } - - private static class TCloseSessionRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TCloseSessionRespTupleSchemeFactory implements SchemeFactory { - public TCloseSessionRespTupleScheme getScheme() { - return new TCloseSessionRespTupleScheme(); - } - } - - private static class TCloseSessionRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java deleted file mode 100644 index dd79482200961..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java +++ /dev/null @@ -1,736 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TColumn extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumn"); - private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6); - private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7); - private static final org.apache.thrift.protocol.TField BINARY_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryVal", org.apache.thrift.protocol.TType.STRUCT, (short)8); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - BOOL_VAL((short)1, "boolVal"), - BYTE_VAL((short)2, "byteVal"), - I16_VAL((short)3, "i16Val"), - I32_VAL((short)4, "i32Val"), - I64_VAL((short)5, "i64Val"), - DOUBLE_VAL((short)6, "doubleVal"), - STRING_VAL((short)7, "stringVal"), - BINARY_VAL((short)8, "binaryVal"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // BOOL_VAL - return BOOL_VAL; - case 2: // BYTE_VAL - return BYTE_VAL; - case 3: // I16_VAL - return I16_VAL; - case 4: // I32_VAL - return I32_VAL; - case 5: // I64_VAL - return I64_VAL; - case 6: // DOUBLE_VAL - return DOUBLE_VAL; - case 7: // STRING_VAL - return STRING_VAL; - case 8: // BINARY_VAL - return BINARY_VAL; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolColumn.class))); - tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteColumn.class))); - tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Column.class))); - tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Column.class))); - tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Column.class))); - tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleColumn.class))); - tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringColumn.class))); - tmpMap.put(_Fields.BINARY_VAL, new org.apache.thrift.meta_data.FieldMetaData("binaryVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBinaryColumn.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumn.class, metaDataMap); - } - - public TColumn() { - super(); - } - - public TColumn(TColumn._Fields setField, Object value) { - super(setField, value); - } - - public TColumn(TColumn other) { - super(other); - } - public TColumn deepCopy() { - return new TColumn(this); - } - - public static TColumn boolVal(TBoolColumn value) { - TColumn x = new TColumn(); - x.setBoolVal(value); - return x; - } - - public static TColumn byteVal(TByteColumn value) { - TColumn x = new TColumn(); - x.setByteVal(value); - return x; - } - - public static TColumn i16Val(TI16Column value) { - TColumn x = new TColumn(); - x.setI16Val(value); - return x; - } - - public static TColumn i32Val(TI32Column value) { - TColumn x = new TColumn(); - x.setI32Val(value); - return x; - } - - public static TColumn i64Val(TI64Column value) { - TColumn x = new TColumn(); - x.setI64Val(value); - return x; - } - - public static TColumn doubleVal(TDoubleColumn value) { - TColumn x = new TColumn(); - x.setDoubleVal(value); - return x; - } - - public static TColumn stringVal(TStringColumn value) { - TColumn x = new TColumn(); - x.setStringVal(value); - return x; - } - - public static TColumn binaryVal(TBinaryColumn value) { - TColumn x = new TColumn(); - x.setBinaryVal(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case BOOL_VAL: - if (value instanceof TBoolColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TBoolColumn for field 'boolVal', but got " + value.getClass().getSimpleName()); - case BYTE_VAL: - if (value instanceof TByteColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TByteColumn for field 'byteVal', but got " + value.getClass().getSimpleName()); - case I16_VAL: - if (value instanceof TI16Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI16Column for field 'i16Val', but got " + value.getClass().getSimpleName()); - case I32_VAL: - if (value instanceof TI32Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI32Column for field 'i32Val', but got " + value.getClass().getSimpleName()); - case I64_VAL: - if (value instanceof TI64Column) { - break; - } - throw new ClassCastException("Was expecting value of type TI64Column for field 'i64Val', but got " + value.getClass().getSimpleName()); - case DOUBLE_VAL: - if (value instanceof TDoubleColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TDoubleColumn for field 'doubleVal', but got " + value.getClass().getSimpleName()); - case STRING_VAL: - if (value instanceof TStringColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TStringColumn for field 'stringVal', but got " + value.getClass().getSimpleName()); - case BINARY_VAL: - if (value instanceof TBinaryColumn) { - break; - } - throw new ClassCastException("Was expecting value of type TBinaryColumn for field 'binaryVal', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - if (field.type == BOOL_VAL_FIELD_DESC.type) { - TBoolColumn boolVal; - boolVal = new TBoolColumn(); - boolVal.read(iprot); - return boolVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BYTE_VAL: - if (field.type == BYTE_VAL_FIELD_DESC.type) { - TByteColumn byteVal; - byteVal = new TByteColumn(); - byteVal.read(iprot); - return byteVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I16_VAL: - if (field.type == I16_VAL_FIELD_DESC.type) { - TI16Column i16Val; - i16Val = new TI16Column(); - i16Val.read(iprot); - return i16Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I32_VAL: - if (field.type == I32_VAL_FIELD_DESC.type) { - TI32Column i32Val; - i32Val = new TI32Column(); - i32Val.read(iprot); - return i32Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I64_VAL: - if (field.type == I64_VAL_FIELD_DESC.type) { - TI64Column i64Val; - i64Val = new TI64Column(); - i64Val.read(iprot); - return i64Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case DOUBLE_VAL: - if (field.type == DOUBLE_VAL_FIELD_DESC.type) { - TDoubleColumn doubleVal; - doubleVal = new TDoubleColumn(); - doubleVal.read(iprot); - return doubleVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VAL: - if (field.type == STRING_VAL_FIELD_DESC.type) { - TStringColumn stringVal; - stringVal = new TStringColumn(); - stringVal.read(iprot); - return stringVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BINARY_VAL: - if (field.type == BINARY_VAL_FIELD_DESC.type) { - TBinaryColumn binaryVal; - binaryVal = new TBinaryColumn(); - binaryVal.read(iprot); - return binaryVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolColumn boolVal = (TBoolColumn)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteColumn byteVal = (TByteColumn)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Column i16Val = (TI16Column)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Column i32Val = (TI32Column)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Column i64Val = (TI64Column)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleColumn doubleVal = (TDoubleColumn)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringColumn stringVal = (TStringColumn)value_; - stringVal.write(oprot); - return; - case BINARY_VAL: - TBinaryColumn binaryVal = (TBinaryColumn)value_; - binaryVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - TBoolColumn boolVal; - boolVal = new TBoolColumn(); - boolVal.read(iprot); - return boolVal; - case BYTE_VAL: - TByteColumn byteVal; - byteVal = new TByteColumn(); - byteVal.read(iprot); - return byteVal; - case I16_VAL: - TI16Column i16Val; - i16Val = new TI16Column(); - i16Val.read(iprot); - return i16Val; - case I32_VAL: - TI32Column i32Val; - i32Val = new TI32Column(); - i32Val.read(iprot); - return i32Val; - case I64_VAL: - TI64Column i64Val; - i64Val = new TI64Column(); - i64Val.read(iprot); - return i64Val; - case DOUBLE_VAL: - TDoubleColumn doubleVal; - doubleVal = new TDoubleColumn(); - doubleVal.read(iprot); - return doubleVal; - case STRING_VAL: - TStringColumn stringVal; - stringVal = new TStringColumn(); - stringVal.read(iprot); - return stringVal; - case BINARY_VAL: - TBinaryColumn binaryVal; - binaryVal = new TBinaryColumn(); - binaryVal.read(iprot); - return binaryVal; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolColumn boolVal = (TBoolColumn)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteColumn byteVal = (TByteColumn)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Column i16Val = (TI16Column)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Column i32Val = (TI32Column)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Column i64Val = (TI64Column)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleColumn doubleVal = (TDoubleColumn)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringColumn stringVal = (TStringColumn)value_; - stringVal.write(oprot); - return; - case BINARY_VAL: - TBinaryColumn binaryVal = (TBinaryColumn)value_; - binaryVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case BOOL_VAL: - return BOOL_VAL_FIELD_DESC; - case BYTE_VAL: - return BYTE_VAL_FIELD_DESC; - case I16_VAL: - return I16_VAL_FIELD_DESC; - case I32_VAL: - return I32_VAL_FIELD_DESC; - case I64_VAL: - return I64_VAL_FIELD_DESC; - case DOUBLE_VAL: - return DOUBLE_VAL_FIELD_DESC; - case STRING_VAL: - return STRING_VAL_FIELD_DESC; - case BINARY_VAL: - return BINARY_VAL_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TBoolColumn getBoolVal() { - if (getSetField() == _Fields.BOOL_VAL) { - return (TBoolColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBoolVal(TBoolColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BOOL_VAL; - value_ = value; - } - - public TByteColumn getByteVal() { - if (getSetField() == _Fields.BYTE_VAL) { - return (TByteColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setByteVal(TByteColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BYTE_VAL; - value_ = value; - } - - public TI16Column getI16Val() { - if (getSetField() == _Fields.I16_VAL) { - return (TI16Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI16Val(TI16Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I16_VAL; - value_ = value; - } - - public TI32Column getI32Val() { - if (getSetField() == _Fields.I32_VAL) { - return (TI32Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Val(TI32Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I32_VAL; - value_ = value; - } - - public TI64Column getI64Val() { - if (getSetField() == _Fields.I64_VAL) { - return (TI64Column)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI64Val(TI64Column value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I64_VAL; - value_ = value; - } - - public TDoubleColumn getDoubleVal() { - if (getSetField() == _Fields.DOUBLE_VAL) { - return (TDoubleColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setDoubleVal(TDoubleColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.DOUBLE_VAL; - value_ = value; - } - - public TStringColumn getStringVal() { - if (getSetField() == _Fields.STRING_VAL) { - return (TStringColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringVal(TStringColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VAL; - value_ = value; - } - - public TBinaryColumn getBinaryVal() { - if (getSetField() == _Fields.BINARY_VAL) { - return (TBinaryColumn)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'binaryVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBinaryVal(TBinaryColumn value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BINARY_VAL; - value_ = value; - } - - public boolean isSetBoolVal() { - return setField_ == _Fields.BOOL_VAL; - } - - - public boolean isSetByteVal() { - return setField_ == _Fields.BYTE_VAL; - } - - - public boolean isSetI16Val() { - return setField_ == _Fields.I16_VAL; - } - - - public boolean isSetI32Val() { - return setField_ == _Fields.I32_VAL; - } - - - public boolean isSetI64Val() { - return setField_ == _Fields.I64_VAL; - } - - - public boolean isSetDoubleVal() { - return setField_ == _Fields.DOUBLE_VAL; - } - - - public boolean isSetStringVal() { - return setField_ == _Fields.STRING_VAL; - } - - - public boolean isSetBinaryVal() { - return setField_ == _Fields.BINARY_VAL; - } - - - public boolean equals(Object other) { - if (other instanceof TColumn) { - return equals((TColumn)other); - } else { - return false; - } - } - - public boolean equals(TColumn other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TColumn other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - List list = new ArrayList(); - list.add(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - list.add(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - list.add(value); - } - } - return list.hashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java deleted file mode 100644 index 31472c8f54b94..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java +++ /dev/null @@ -1,704 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TColumnDesc implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnDesc"); - - private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField TYPE_DESC_FIELD_DESC = new org.apache.thrift.protocol.TField("typeDesc", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField POSITION_FIELD_DESC = new org.apache.thrift.protocol.TField("position", org.apache.thrift.protocol.TType.I32, (short)3); - private static final org.apache.thrift.protocol.TField COMMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("comment", org.apache.thrift.protocol.TType.STRING, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TColumnDescStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TColumnDescTupleSchemeFactory()); - } - - private String columnName; // required - private TTypeDesc typeDesc; // required - private int position; // required - private String comment; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COLUMN_NAME((short)1, "columnName"), - TYPE_DESC((short)2, "typeDesc"), - POSITION((short)3, "position"), - COMMENT((short)4, "comment"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COLUMN_NAME - return COLUMN_NAME; - case 2: // TYPE_DESC - return TYPE_DESC; - case 3: // POSITION - return POSITION; - case 4: // COMMENT - return COMMENT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __POSITION_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.COMMENT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.TYPE_DESC, new org.apache.thrift.meta_data.FieldMetaData("typeDesc", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeDesc.class))); - tmpMap.put(_Fields.POSITION, new org.apache.thrift.meta_data.FieldMetaData("position", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.COMMENT, new org.apache.thrift.meta_data.FieldMetaData("comment", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnDesc.class, metaDataMap); - } - - public TColumnDesc() { - } - - public TColumnDesc( - String columnName, - TTypeDesc typeDesc, - int position) - { - this(); - this.columnName = columnName; - this.typeDesc = typeDesc; - this.position = position; - setPositionIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TColumnDesc(TColumnDesc other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetColumnName()) { - this.columnName = other.columnName; - } - if (other.isSetTypeDesc()) { - this.typeDesc = new TTypeDesc(other.typeDesc); - } - this.position = other.position; - if (other.isSetComment()) { - this.comment = other.comment; - } - } - - public TColumnDesc deepCopy() { - return new TColumnDesc(this); - } - - @Override - public void clear() { - this.columnName = null; - this.typeDesc = null; - setPositionIsSet(false); - this.position = 0; - this.comment = null; - } - - public String getColumnName() { - return this.columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public void unsetColumnName() { - this.columnName = null; - } - - /** Returns true if field columnName is set (has been assigned a value) and false otherwise */ - public boolean isSetColumnName() { - return this.columnName != null; - } - - public void setColumnNameIsSet(boolean value) { - if (!value) { - this.columnName = null; - } - } - - public TTypeDesc getTypeDesc() { - return this.typeDesc; - } - - public void setTypeDesc(TTypeDesc typeDesc) { - this.typeDesc = typeDesc; - } - - public void unsetTypeDesc() { - this.typeDesc = null; - } - - /** Returns true if field typeDesc is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeDesc() { - return this.typeDesc != null; - } - - public void setTypeDescIsSet(boolean value) { - if (!value) { - this.typeDesc = null; - } - } - - public int getPosition() { - return this.position; - } - - public void setPosition(int position) { - this.position = position; - setPositionIsSet(true); - } - - public void unsetPosition() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __POSITION_ISSET_ID); - } - - /** Returns true if field position is set (has been assigned a value) and false otherwise */ - public boolean isSetPosition() { - return EncodingUtils.testBit(__isset_bitfield, __POSITION_ISSET_ID); - } - - public void setPositionIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __POSITION_ISSET_ID, value); - } - - public String getComment() { - return this.comment; - } - - public void setComment(String comment) { - this.comment = comment; - } - - public void unsetComment() { - this.comment = null; - } - - /** Returns true if field comment is set (has been assigned a value) and false otherwise */ - public boolean isSetComment() { - return this.comment != null; - } - - public void setCommentIsSet(boolean value) { - if (!value) { - this.comment = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COLUMN_NAME: - if (value == null) { - unsetColumnName(); - } else { - setColumnName((String)value); - } - break; - - case TYPE_DESC: - if (value == null) { - unsetTypeDesc(); - } else { - setTypeDesc((TTypeDesc)value); - } - break; - - case POSITION: - if (value == null) { - unsetPosition(); - } else { - setPosition((Integer)value); - } - break; - - case COMMENT: - if (value == null) { - unsetComment(); - } else { - setComment((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COLUMN_NAME: - return getColumnName(); - - case TYPE_DESC: - return getTypeDesc(); - - case POSITION: - return getPosition(); - - case COMMENT: - return getComment(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COLUMN_NAME: - return isSetColumnName(); - case TYPE_DESC: - return isSetTypeDesc(); - case POSITION: - return isSetPosition(); - case COMMENT: - return isSetComment(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TColumnDesc) - return this.equals((TColumnDesc)that); - return false; - } - - public boolean equals(TColumnDesc that) { - if (that == null) - return false; - - boolean this_present_columnName = true && this.isSetColumnName(); - boolean that_present_columnName = true && that.isSetColumnName(); - if (this_present_columnName || that_present_columnName) { - if (!(this_present_columnName && that_present_columnName)) - return false; - if (!this.columnName.equals(that.columnName)) - return false; - } - - boolean this_present_typeDesc = true && this.isSetTypeDesc(); - boolean that_present_typeDesc = true && that.isSetTypeDesc(); - if (this_present_typeDesc || that_present_typeDesc) { - if (!(this_present_typeDesc && that_present_typeDesc)) - return false; - if (!this.typeDesc.equals(that.typeDesc)) - return false; - } - - boolean this_present_position = true; - boolean that_present_position = true; - if (this_present_position || that_present_position) { - if (!(this_present_position && that_present_position)) - return false; - if (this.position != that.position) - return false; - } - - boolean this_present_comment = true && this.isSetComment(); - boolean that_present_comment = true && that.isSetComment(); - if (this_present_comment || that_present_comment) { - if (!(this_present_comment && that_present_comment)) - return false; - if (!this.comment.equals(that.comment)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_columnName = true && (isSetColumnName()); - list.add(present_columnName); - if (present_columnName) - list.add(columnName); - - boolean present_typeDesc = true && (isSetTypeDesc()); - list.add(present_typeDesc); - if (present_typeDesc) - list.add(typeDesc); - - boolean present_position = true; - list.add(present_position); - if (present_position) - list.add(position); - - boolean present_comment = true && (isSetComment()); - list.add(present_comment); - if (present_comment) - list.add(comment); - - return list.hashCode(); - } - - @Override - public int compareTo(TColumnDesc other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(other.isSetColumnName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumnName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, other.columnName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTypeDesc()).compareTo(other.isSetTypeDesc()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeDesc()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeDesc, other.typeDesc); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetPosition()).compareTo(other.isSetPosition()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetPosition()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.position, other.position); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetComment()).compareTo(other.isSetComment()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetComment()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.comment, other.comment); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TColumnDesc("); - boolean first = true; - - sb.append("columnName:"); - if (this.columnName == null) { - sb.append("null"); - } else { - sb.append(this.columnName); - } - first = false; - if (!first) sb.append(", "); - sb.append("typeDesc:"); - if (this.typeDesc == null) { - sb.append("null"); - } else { - sb.append(this.typeDesc); - } - first = false; - if (!first) sb.append(", "); - sb.append("position:"); - sb.append(this.position); - first = false; - if (isSetComment()) { - if (!first) sb.append(", "); - sb.append("comment:"); - if (this.comment == null) { - sb.append("null"); - } else { - sb.append(this.comment); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColumnName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'columnName' is unset! Struct:" + toString()); - } - - if (!isSetTypeDesc()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeDesc' is unset! Struct:" + toString()); - } - - if (!isSetPosition()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'position' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (typeDesc != null) { - typeDesc.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TColumnDescStandardSchemeFactory implements SchemeFactory { - public TColumnDescStandardScheme getScheme() { - return new TColumnDescStandardScheme(); - } - } - - private static class TColumnDescStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TColumnDesc struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COLUMN_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // TYPE_DESC - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.typeDesc = new TTypeDesc(); - struct.typeDesc.read(iprot); - struct.setTypeDescIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // POSITION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.position = iprot.readI32(); - struct.setPositionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // COMMENT - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.comment = iprot.readString(); - struct.setCommentIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TColumnDesc struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.columnName != null) { - oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC); - oprot.writeString(struct.columnName); - oprot.writeFieldEnd(); - } - if (struct.typeDesc != null) { - oprot.writeFieldBegin(TYPE_DESC_FIELD_DESC); - struct.typeDesc.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(POSITION_FIELD_DESC); - oprot.writeI32(struct.position); - oprot.writeFieldEnd(); - if (struct.comment != null) { - if (struct.isSetComment()) { - oprot.writeFieldBegin(COMMENT_FIELD_DESC); - oprot.writeString(struct.comment); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TColumnDescTupleSchemeFactory implements SchemeFactory { - public TColumnDescTupleScheme getScheme() { - return new TColumnDescTupleScheme(); - } - } - - private static class TColumnDescTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeString(struct.columnName); - struct.typeDesc.write(oprot); - oprot.writeI32(struct.position); - BitSet optionals = new BitSet(); - if (struct.isSetComment()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetComment()) { - oprot.writeString(struct.comment); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - struct.typeDesc = new TTypeDesc(); - struct.typeDesc.read(iprot); - struct.setTypeDescIsSet(true); - struct.position = iprot.readI32(); - struct.setPositionIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.comment = iprot.readString(); - struct.setCommentIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java deleted file mode 100644 index d1cc8e919bc0c..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java +++ /dev/null @@ -1,675 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TColumnValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnValue"); - private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6); - private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - BOOL_VAL((short)1, "boolVal"), - BYTE_VAL((short)2, "byteVal"), - I16_VAL((short)3, "i16Val"), - I32_VAL((short)4, "i32Val"), - I64_VAL((short)5, "i64Val"), - DOUBLE_VAL((short)6, "doubleVal"), - STRING_VAL((short)7, "stringVal"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // BOOL_VAL - return BOOL_VAL; - case 2: // BYTE_VAL - return BYTE_VAL; - case 3: // I16_VAL - return I16_VAL; - case 4: // I32_VAL - return I32_VAL; - case 5: // I64_VAL - return I64_VAL; - case 6: // DOUBLE_VAL - return DOUBLE_VAL; - case 7: // STRING_VAL - return STRING_VAL; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolValue.class))); - tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteValue.class))); - tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Value.class))); - tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Value.class))); - tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Value.class))); - tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleValue.class))); - tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringValue.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnValue.class, metaDataMap); - } - - public TColumnValue() { - super(); - } - - public TColumnValue(TColumnValue._Fields setField, Object value) { - super(setField, value); - } - - public TColumnValue(TColumnValue other) { - super(other); - } - public TColumnValue deepCopy() { - return new TColumnValue(this); - } - - public static TColumnValue boolVal(TBoolValue value) { - TColumnValue x = new TColumnValue(); - x.setBoolVal(value); - return x; - } - - public static TColumnValue byteVal(TByteValue value) { - TColumnValue x = new TColumnValue(); - x.setByteVal(value); - return x; - } - - public static TColumnValue i16Val(TI16Value value) { - TColumnValue x = new TColumnValue(); - x.setI16Val(value); - return x; - } - - public static TColumnValue i32Val(TI32Value value) { - TColumnValue x = new TColumnValue(); - x.setI32Val(value); - return x; - } - - public static TColumnValue i64Val(TI64Value value) { - TColumnValue x = new TColumnValue(); - x.setI64Val(value); - return x; - } - - public static TColumnValue doubleVal(TDoubleValue value) { - TColumnValue x = new TColumnValue(); - x.setDoubleVal(value); - return x; - } - - public static TColumnValue stringVal(TStringValue value) { - TColumnValue x = new TColumnValue(); - x.setStringVal(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case BOOL_VAL: - if (value instanceof TBoolValue) { - break; - } - throw new ClassCastException("Was expecting value of type TBoolValue for field 'boolVal', but got " + value.getClass().getSimpleName()); - case BYTE_VAL: - if (value instanceof TByteValue) { - break; - } - throw new ClassCastException("Was expecting value of type TByteValue for field 'byteVal', but got " + value.getClass().getSimpleName()); - case I16_VAL: - if (value instanceof TI16Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI16Value for field 'i16Val', but got " + value.getClass().getSimpleName()); - case I32_VAL: - if (value instanceof TI32Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI32Value for field 'i32Val', but got " + value.getClass().getSimpleName()); - case I64_VAL: - if (value instanceof TI64Value) { - break; - } - throw new ClassCastException("Was expecting value of type TI64Value for field 'i64Val', but got " + value.getClass().getSimpleName()); - case DOUBLE_VAL: - if (value instanceof TDoubleValue) { - break; - } - throw new ClassCastException("Was expecting value of type TDoubleValue for field 'doubleVal', but got " + value.getClass().getSimpleName()); - case STRING_VAL: - if (value instanceof TStringValue) { - break; - } - throw new ClassCastException("Was expecting value of type TStringValue for field 'stringVal', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - if (field.type == BOOL_VAL_FIELD_DESC.type) { - TBoolValue boolVal; - boolVal = new TBoolValue(); - boolVal.read(iprot); - return boolVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BYTE_VAL: - if (field.type == BYTE_VAL_FIELD_DESC.type) { - TByteValue byteVal; - byteVal = new TByteValue(); - byteVal.read(iprot); - return byteVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I16_VAL: - if (field.type == I16_VAL_FIELD_DESC.type) { - TI16Value i16Val; - i16Val = new TI16Value(); - i16Val.read(iprot); - return i16Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I32_VAL: - if (field.type == I32_VAL_FIELD_DESC.type) { - TI32Value i32Val; - i32Val = new TI32Value(); - i32Val.read(iprot); - return i32Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case I64_VAL: - if (field.type == I64_VAL_FIELD_DESC.type) { - TI64Value i64Val; - i64Val = new TI64Value(); - i64Val.read(iprot); - return i64Val; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case DOUBLE_VAL: - if (field.type == DOUBLE_VAL_FIELD_DESC.type) { - TDoubleValue doubleVal; - doubleVal = new TDoubleValue(); - doubleVal.read(iprot); - return doubleVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VAL: - if (field.type == STRING_VAL_FIELD_DESC.type) { - TStringValue stringVal; - stringVal = new TStringValue(); - stringVal.read(iprot); - return stringVal; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolValue boolVal = (TBoolValue)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteValue byteVal = (TByteValue)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Value i16Val = (TI16Value)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Value i32Val = (TI32Value)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Value i64Val = (TI64Value)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleValue doubleVal = (TDoubleValue)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringValue stringVal = (TStringValue)value_; - stringVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case BOOL_VAL: - TBoolValue boolVal; - boolVal = new TBoolValue(); - boolVal.read(iprot); - return boolVal; - case BYTE_VAL: - TByteValue byteVal; - byteVal = new TByteValue(); - byteVal.read(iprot); - return byteVal; - case I16_VAL: - TI16Value i16Val; - i16Val = new TI16Value(); - i16Val.read(iprot); - return i16Val; - case I32_VAL: - TI32Value i32Val; - i32Val = new TI32Value(); - i32Val.read(iprot); - return i32Val; - case I64_VAL: - TI64Value i64Val; - i64Val = new TI64Value(); - i64Val.read(iprot); - return i64Val; - case DOUBLE_VAL: - TDoubleValue doubleVal; - doubleVal = new TDoubleValue(); - doubleVal.read(iprot); - return doubleVal; - case STRING_VAL: - TStringValue stringVal; - stringVal = new TStringValue(); - stringVal.read(iprot); - return stringVal; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case BOOL_VAL: - TBoolValue boolVal = (TBoolValue)value_; - boolVal.write(oprot); - return; - case BYTE_VAL: - TByteValue byteVal = (TByteValue)value_; - byteVal.write(oprot); - return; - case I16_VAL: - TI16Value i16Val = (TI16Value)value_; - i16Val.write(oprot); - return; - case I32_VAL: - TI32Value i32Val = (TI32Value)value_; - i32Val.write(oprot); - return; - case I64_VAL: - TI64Value i64Val = (TI64Value)value_; - i64Val.write(oprot); - return; - case DOUBLE_VAL: - TDoubleValue doubleVal = (TDoubleValue)value_; - doubleVal.write(oprot); - return; - case STRING_VAL: - TStringValue stringVal = (TStringValue)value_; - stringVal.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case BOOL_VAL: - return BOOL_VAL_FIELD_DESC; - case BYTE_VAL: - return BYTE_VAL_FIELD_DESC; - case I16_VAL: - return I16_VAL_FIELD_DESC; - case I32_VAL: - return I32_VAL_FIELD_DESC; - case I64_VAL: - return I64_VAL_FIELD_DESC; - case DOUBLE_VAL: - return DOUBLE_VAL_FIELD_DESC; - case STRING_VAL: - return STRING_VAL_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TBoolValue getBoolVal() { - if (getSetField() == _Fields.BOOL_VAL) { - return (TBoolValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBoolVal(TBoolValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BOOL_VAL; - value_ = value; - } - - public TByteValue getByteVal() { - if (getSetField() == _Fields.BYTE_VAL) { - return (TByteValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setByteVal(TByteValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.BYTE_VAL; - value_ = value; - } - - public TI16Value getI16Val() { - if (getSetField() == _Fields.I16_VAL) { - return (TI16Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI16Val(TI16Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I16_VAL; - value_ = value; - } - - public TI32Value getI32Val() { - if (getSetField() == _Fields.I32_VAL) { - return (TI32Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Val(TI32Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I32_VAL; - value_ = value; - } - - public TI64Value getI64Val() { - if (getSetField() == _Fields.I64_VAL) { - return (TI64Value)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI64Val(TI64Value value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.I64_VAL; - value_ = value; - } - - public TDoubleValue getDoubleVal() { - if (getSetField() == _Fields.DOUBLE_VAL) { - return (TDoubleValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setDoubleVal(TDoubleValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.DOUBLE_VAL; - value_ = value; - } - - public TStringValue getStringVal() { - if (getSetField() == _Fields.STRING_VAL) { - return (TStringValue)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringVal(TStringValue value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VAL; - value_ = value; - } - - public boolean isSetBoolVal() { - return setField_ == _Fields.BOOL_VAL; - } - - - public boolean isSetByteVal() { - return setField_ == _Fields.BYTE_VAL; - } - - - public boolean isSetI16Val() { - return setField_ == _Fields.I16_VAL; - } - - - public boolean isSetI32Val() { - return setField_ == _Fields.I32_VAL; - } - - - public boolean isSetI64Val() { - return setField_ == _Fields.I64_VAL; - } - - - public boolean isSetDoubleVal() { - return setField_ == _Fields.DOUBLE_VAL; - } - - - public boolean isSetStringVal() { - return setField_ == _Fields.STRING_VAL; - } - - - public boolean equals(Object other) { - if (other instanceof TColumnValue) { - return equals((TColumnValue)other); - } else { - return false; - } - } - - public boolean equals(TColumnValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TColumnValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - List list = new ArrayList(); - list.add(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - list.add(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - list.add(value); - } - } - return list.hashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java deleted file mode 100644 index f93c9b4f0edc3..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TDoubleColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TDoubleColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TDoubleColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleColumn.class, metaDataMap); - } - - public TDoubleColumn() { - } - - public TDoubleColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TDoubleColumn(TDoubleColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TDoubleColumn deepCopy() { - return new TDoubleColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(double elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TDoubleColumn) - return this.equals((TDoubleColumn)that); - return false; - } - - public boolean equals(TDoubleColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TDoubleColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TDoubleColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TDoubleColumnStandardSchemeFactory implements SchemeFactory { - public TDoubleColumnStandardScheme getScheme() { - return new TDoubleColumnStandardScheme(); - } - } - - private static class TDoubleColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list94 = iprot.readListBegin(); - struct.values = new ArrayList(_list94.size); - double _elem95; - for (int _i96 = 0; _i96 < _list94.size; ++_i96) - { - _elem95 = iprot.readDouble(); - struct.values.add(_elem95); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, struct.values.size())); - for (double _iter97 : struct.values) - { - oprot.writeDouble(_iter97); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TDoubleColumnTupleSchemeFactory implements SchemeFactory { - public TDoubleColumnTupleScheme getScheme() { - return new TDoubleColumnTupleScheme(); - } - } - - private static class TDoubleColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (double _iter98 : struct.values) - { - oprot.writeDouble(_iter98); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list99 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, iprot.readI32()); - struct.values = new ArrayList(_list99.size); - double _elem100; - for (int _i101 = 0; _i101 < _list99.size; ++_i101) - { - _elem100 = iprot.readDouble(); - struct.values.add(_elem100); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java deleted file mode 100644 index 5700355aad94d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TDoubleValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.DOUBLE, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TDoubleValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TDoubleValueTupleSchemeFactory()); - } - - private double value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleValue.class, metaDataMap); - } - - public TDoubleValue() { - } - - /** - * Performs a deep copy on other. - */ - public TDoubleValue(TDoubleValue other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TDoubleValue deepCopy() { - return new TDoubleValue(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0.0; - } - - public double getValue() { - return this.value; - } - - public void setValue(double value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Double)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TDoubleValue) - return this.equals((TDoubleValue)that); - return false; - } - - public boolean equals(TDoubleValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TDoubleValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TDoubleValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TDoubleValueStandardSchemeFactory implements SchemeFactory { - public TDoubleValueStandardScheme getScheme() { - return new TDoubleValueStandardScheme(); - } - } - - private static class TDoubleValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) { - struct.value = iprot.readDouble(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeDouble(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TDoubleValueTupleSchemeFactory implements SchemeFactory { - public TDoubleValueTupleScheme getScheme() { - return new TDoubleValueTupleScheme(); - } - } - - private static class TDoubleValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeDouble(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readDouble(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java deleted file mode 100644 index 1f73cec61af78..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java +++ /dev/null @@ -1,863 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TExecuteStatementReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField STATEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("statement", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField CONF_OVERLAY_FIELD_DESC = new org.apache.thrift.protocol.TField("confOverlay", org.apache.thrift.protocol.TType.MAP, (short)3); - private static final org.apache.thrift.protocol.TField RUN_ASYNC_FIELD_DESC = new org.apache.thrift.protocol.TField("runAsync", org.apache.thrift.protocol.TType.BOOL, (short)4); - private static final org.apache.thrift.protocol.TField QUERY_TIMEOUT_FIELD_DESC = new org.apache.thrift.protocol.TField("queryTimeout", org.apache.thrift.protocol.TType.I64, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TExecuteStatementReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TExecuteStatementReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String statement; // required - private Map confOverlay; // optional - private boolean runAsync; // optional - private long queryTimeout; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - STATEMENT((short)2, "statement"), - CONF_OVERLAY((short)3, "confOverlay"), - RUN_ASYNC((short)4, "runAsync"), - QUERY_TIMEOUT((short)5, "queryTimeout"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // STATEMENT - return STATEMENT; - case 3: // CONF_OVERLAY - return CONF_OVERLAY; - case 4: // RUN_ASYNC - return RUN_ASYNC; - case 5: // QUERY_TIMEOUT - return QUERY_TIMEOUT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __RUNASYNC_ISSET_ID = 0; - private static final int __QUERYTIMEOUT_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.CONF_OVERLAY,_Fields.RUN_ASYNC,_Fields.QUERY_TIMEOUT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.STATEMENT, new org.apache.thrift.meta_data.FieldMetaData("statement", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.CONF_OVERLAY, new org.apache.thrift.meta_data.FieldMetaData("confOverlay", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.RUN_ASYNC, new org.apache.thrift.meta_data.FieldMetaData("runAsync", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.QUERY_TIMEOUT, new org.apache.thrift.meta_data.FieldMetaData("queryTimeout", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementReq.class, metaDataMap); - } - - public TExecuteStatementReq() { - this.runAsync = false; - - this.queryTimeout = 0L; - - } - - public TExecuteStatementReq( - TSessionHandle sessionHandle, - String statement) - { - this(); - this.sessionHandle = sessionHandle; - this.statement = statement; - } - - /** - * Performs a deep copy on other. - */ - public TExecuteStatementReq(TExecuteStatementReq other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetStatement()) { - this.statement = other.statement; - } - if (other.isSetConfOverlay()) { - Map __this__confOverlay = new HashMap(other.confOverlay); - this.confOverlay = __this__confOverlay; - } - this.runAsync = other.runAsync; - this.queryTimeout = other.queryTimeout; - } - - public TExecuteStatementReq deepCopy() { - return new TExecuteStatementReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.statement = null; - this.confOverlay = null; - this.runAsync = false; - - this.queryTimeout = 0L; - - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getStatement() { - return this.statement; - } - - public void setStatement(String statement) { - this.statement = statement; - } - - public void unsetStatement() { - this.statement = null; - } - - /** Returns true if field statement is set (has been assigned a value) and false otherwise */ - public boolean isSetStatement() { - return this.statement != null; - } - - public void setStatementIsSet(boolean value) { - if (!value) { - this.statement = null; - } - } - - public int getConfOverlaySize() { - return (this.confOverlay == null) ? 0 : this.confOverlay.size(); - } - - public void putToConfOverlay(String key, String val) { - if (this.confOverlay == null) { - this.confOverlay = new HashMap(); - } - this.confOverlay.put(key, val); - } - - public Map getConfOverlay() { - return this.confOverlay; - } - - public void setConfOverlay(Map confOverlay) { - this.confOverlay = confOverlay; - } - - public void unsetConfOverlay() { - this.confOverlay = null; - } - - /** Returns true if field confOverlay is set (has been assigned a value) and false otherwise */ - public boolean isSetConfOverlay() { - return this.confOverlay != null; - } - - public void setConfOverlayIsSet(boolean value) { - if (!value) { - this.confOverlay = null; - } - } - - public boolean isRunAsync() { - return this.runAsync; - } - - public void setRunAsync(boolean runAsync) { - this.runAsync = runAsync; - setRunAsyncIsSet(true); - } - - public void unsetRunAsync() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __RUNASYNC_ISSET_ID); - } - - /** Returns true if field runAsync is set (has been assigned a value) and false otherwise */ - public boolean isSetRunAsync() { - return EncodingUtils.testBit(__isset_bitfield, __RUNASYNC_ISSET_ID); - } - - public void setRunAsyncIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __RUNASYNC_ISSET_ID, value); - } - - public long getQueryTimeout() { - return this.queryTimeout; - } - - public void setQueryTimeout(long queryTimeout) { - this.queryTimeout = queryTimeout; - setQueryTimeoutIsSet(true); - } - - public void unsetQueryTimeout() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID); - } - - /** Returns true if field queryTimeout is set (has been assigned a value) and false otherwise */ - public boolean isSetQueryTimeout() { - return EncodingUtils.testBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID); - } - - public void setQueryTimeoutIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case STATEMENT: - if (value == null) { - unsetStatement(); - } else { - setStatement((String)value); - } - break; - - case CONF_OVERLAY: - if (value == null) { - unsetConfOverlay(); - } else { - setConfOverlay((Map)value); - } - break; - - case RUN_ASYNC: - if (value == null) { - unsetRunAsync(); - } else { - setRunAsync((Boolean)value); - } - break; - - case QUERY_TIMEOUT: - if (value == null) { - unsetQueryTimeout(); - } else { - setQueryTimeout((Long)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case STATEMENT: - return getStatement(); - - case CONF_OVERLAY: - return getConfOverlay(); - - case RUN_ASYNC: - return isRunAsync(); - - case QUERY_TIMEOUT: - return getQueryTimeout(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case STATEMENT: - return isSetStatement(); - case CONF_OVERLAY: - return isSetConfOverlay(); - case RUN_ASYNC: - return isSetRunAsync(); - case QUERY_TIMEOUT: - return isSetQueryTimeout(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TExecuteStatementReq) - return this.equals((TExecuteStatementReq)that); - return false; - } - - public boolean equals(TExecuteStatementReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_statement = true && this.isSetStatement(); - boolean that_present_statement = true && that.isSetStatement(); - if (this_present_statement || that_present_statement) { - if (!(this_present_statement && that_present_statement)) - return false; - if (!this.statement.equals(that.statement)) - return false; - } - - boolean this_present_confOverlay = true && this.isSetConfOverlay(); - boolean that_present_confOverlay = true && that.isSetConfOverlay(); - if (this_present_confOverlay || that_present_confOverlay) { - if (!(this_present_confOverlay && that_present_confOverlay)) - return false; - if (!this.confOverlay.equals(that.confOverlay)) - return false; - } - - boolean this_present_runAsync = true && this.isSetRunAsync(); - boolean that_present_runAsync = true && that.isSetRunAsync(); - if (this_present_runAsync || that_present_runAsync) { - if (!(this_present_runAsync && that_present_runAsync)) - return false; - if (this.runAsync != that.runAsync) - return false; - } - - boolean this_present_queryTimeout = true && this.isSetQueryTimeout(); - boolean that_present_queryTimeout = true && that.isSetQueryTimeout(); - if (this_present_queryTimeout || that_present_queryTimeout) { - if (!(this_present_queryTimeout && that_present_queryTimeout)) - return false; - if (this.queryTimeout != that.queryTimeout) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_statement = true && (isSetStatement()); - list.add(present_statement); - if (present_statement) - list.add(statement); - - boolean present_confOverlay = true && (isSetConfOverlay()); - list.add(present_confOverlay); - if (present_confOverlay) - list.add(confOverlay); - - boolean present_runAsync = true && (isSetRunAsync()); - list.add(present_runAsync); - if (present_runAsync) - list.add(runAsync); - - boolean present_queryTimeout = true && (isSetQueryTimeout()); - list.add(present_queryTimeout); - if (present_queryTimeout) - list.add(queryTimeout); - - return list.hashCode(); - } - - @Override - public int compareTo(TExecuteStatementReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetStatement()).compareTo(other.isSetStatement()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatement()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statement, other.statement); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfOverlay()).compareTo(other.isSetConfOverlay()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfOverlay()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.confOverlay, other.confOverlay); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRunAsync()).compareTo(other.isSetRunAsync()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRunAsync()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.runAsync, other.runAsync); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetQueryTimeout()).compareTo(other.isSetQueryTimeout()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetQueryTimeout()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.queryTimeout, other.queryTimeout); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TExecuteStatementReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("statement:"); - if (this.statement == null) { - sb.append("null"); - } else { - sb.append(this.statement); - } - first = false; - if (isSetConfOverlay()) { - if (!first) sb.append(", "); - sb.append("confOverlay:"); - if (this.confOverlay == null) { - sb.append("null"); - } else { - sb.append(this.confOverlay); - } - first = false; - } - if (isSetRunAsync()) { - if (!first) sb.append(", "); - sb.append("runAsync:"); - sb.append(this.runAsync); - first = false; - } - if (isSetQueryTimeout()) { - if (!first) sb.append(", "); - sb.append("queryTimeout:"); - sb.append(this.queryTimeout); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetStatement()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'statement' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TExecuteStatementReqStandardSchemeFactory implements SchemeFactory { - public TExecuteStatementReqStandardScheme getScheme() { - return new TExecuteStatementReqStandardScheme(); - } - } - - private static class TExecuteStatementReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // STATEMENT - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.statement = iprot.readString(); - struct.setStatementIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // CONF_OVERLAY - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map162 = iprot.readMapBegin(); - struct.confOverlay = new HashMap(2*_map162.size); - String _key163; - String _val164; - for (int _i165 = 0; _i165 < _map162.size; ++_i165) - { - _key163 = iprot.readString(); - _val164 = iprot.readString(); - struct.confOverlay.put(_key163, _val164); - } - iprot.readMapEnd(); - } - struct.setConfOverlayIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // RUN_ASYNC - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.runAsync = iprot.readBool(); - struct.setRunAsyncIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // QUERY_TIMEOUT - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.queryTimeout = iprot.readI64(); - struct.setQueryTimeoutIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.statement != null) { - oprot.writeFieldBegin(STATEMENT_FIELD_DESC); - oprot.writeString(struct.statement); - oprot.writeFieldEnd(); - } - if (struct.confOverlay != null) { - if (struct.isSetConfOverlay()) { - oprot.writeFieldBegin(CONF_OVERLAY_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.confOverlay.size())); - for (Map.Entry _iter166 : struct.confOverlay.entrySet()) - { - oprot.writeString(_iter166.getKey()); - oprot.writeString(_iter166.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - if (struct.isSetRunAsync()) { - oprot.writeFieldBegin(RUN_ASYNC_FIELD_DESC); - oprot.writeBool(struct.runAsync); - oprot.writeFieldEnd(); - } - if (struct.isSetQueryTimeout()) { - oprot.writeFieldBegin(QUERY_TIMEOUT_FIELD_DESC); - oprot.writeI64(struct.queryTimeout); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TExecuteStatementReqTupleSchemeFactory implements SchemeFactory { - public TExecuteStatementReqTupleScheme getScheme() { - return new TExecuteStatementReqTupleScheme(); - } - } - - private static class TExecuteStatementReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.statement); - BitSet optionals = new BitSet(); - if (struct.isSetConfOverlay()) { - optionals.set(0); - } - if (struct.isSetRunAsync()) { - optionals.set(1); - } - if (struct.isSetQueryTimeout()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); - if (struct.isSetConfOverlay()) { - { - oprot.writeI32(struct.confOverlay.size()); - for (Map.Entry _iter167 : struct.confOverlay.entrySet()) - { - oprot.writeString(_iter167.getKey()); - oprot.writeString(_iter167.getValue()); - } - } - } - if (struct.isSetRunAsync()) { - oprot.writeBool(struct.runAsync); - } - if (struct.isSetQueryTimeout()) { - oprot.writeI64(struct.queryTimeout); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.statement = iprot.readString(); - struct.setStatementIsSet(true); - BitSet incoming = iprot.readBitSet(3); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TMap _map168 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.confOverlay = new HashMap(2*_map168.size); - String _key169; - String _val170; - for (int _i171 = 0; _i171 < _map168.size; ++_i171) - { - _key169 = iprot.readString(); - _val170 = iprot.readString(); - struct.confOverlay.put(_key169, _val170); - } - } - struct.setConfOverlayIsSet(true); - } - if (incoming.get(1)) { - struct.runAsync = iprot.readBool(); - struct.setRunAsyncIsSet(true); - } - if (incoming.get(2)) { - struct.queryTimeout = iprot.readI64(); - struct.setQueryTimeoutIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java deleted file mode 100644 index 7101fa5bdb84c..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TExecuteStatementResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TExecuteStatementRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TExecuteStatementRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementResp.class, metaDataMap); - } - - public TExecuteStatementResp() { - } - - public TExecuteStatementResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TExecuteStatementResp(TExecuteStatementResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TExecuteStatementResp deepCopy() { - return new TExecuteStatementResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TExecuteStatementResp) - return this.equals((TExecuteStatementResp)that); - return false; - } - - public boolean equals(TExecuteStatementResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TExecuteStatementResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TExecuteStatementResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TExecuteStatementRespStandardSchemeFactory implements SchemeFactory { - public TExecuteStatementRespStandardScheme getScheme() { - return new TExecuteStatementRespStandardScheme(); - } - } - - private static class TExecuteStatementRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TExecuteStatementRespTupleSchemeFactory implements SchemeFactory { - public TExecuteStatementRespTupleScheme getScheme() { - return new TExecuteStatementRespTupleScheme(); - } - } - - private static class TExecuteStatementRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java deleted file mode 100644 index 159be45259434..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TFetchOrientation implements org.apache.thrift.TEnum { - FETCH_NEXT(0), - FETCH_PRIOR(1), - FETCH_RELATIVE(2), - FETCH_ABSOLUTE(3), - FETCH_FIRST(4), - FETCH_LAST(5); - - private final int value; - - private TFetchOrientation(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TFetchOrientation findByValue(int value) { - switch (value) { - case 0: - return FETCH_NEXT; - case 1: - return FETCH_PRIOR; - case 2: - return FETCH_RELATIVE; - case 3: - return FETCH_ABSOLUTE; - case 4: - return FETCH_FIRST; - case 5: - return FETCH_LAST; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java deleted file mode 100644 index 2c93339d0c68b..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java +++ /dev/null @@ -1,714 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TFetchResultsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField ORIENTATION_FIELD_DESC = new org.apache.thrift.protocol.TField("orientation", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField MAX_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("maxRows", org.apache.thrift.protocol.TType.I64, (short)3); - private static final org.apache.thrift.protocol.TField FETCH_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("fetchType", org.apache.thrift.protocol.TType.I16, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TFetchResultsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TFetchResultsReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - private TFetchOrientation orientation; // required - private long maxRows; // required - private short fetchType; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"), - /** - * - * @see TFetchOrientation - */ - ORIENTATION((short)2, "orientation"), - MAX_ROWS((short)3, "maxRows"), - FETCH_TYPE((short)4, "fetchType"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - case 2: // ORIENTATION - return ORIENTATION; - case 3: // MAX_ROWS - return MAX_ROWS; - case 4: // FETCH_TYPE - return FETCH_TYPE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __MAXROWS_ISSET_ID = 0; - private static final int __FETCHTYPE_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.FETCH_TYPE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - tmpMap.put(_Fields.ORIENTATION, new org.apache.thrift.meta_data.FieldMetaData("orientation", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TFetchOrientation.class))); - tmpMap.put(_Fields.MAX_ROWS, new org.apache.thrift.meta_data.FieldMetaData("maxRows", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.FETCH_TYPE, new org.apache.thrift.meta_data.FieldMetaData("fetchType", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsReq.class, metaDataMap); - } - - public TFetchResultsReq() { - this.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.FETCH_NEXT; - - this.fetchType = (short)0; - - } - - public TFetchResultsReq( - TOperationHandle operationHandle, - TFetchOrientation orientation, - long maxRows) - { - this(); - this.operationHandle = operationHandle; - this.orientation = orientation; - this.maxRows = maxRows; - setMaxRowsIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TFetchResultsReq(TFetchResultsReq other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - if (other.isSetOrientation()) { - this.orientation = other.orientation; - } - this.maxRows = other.maxRows; - this.fetchType = other.fetchType; - } - - public TFetchResultsReq deepCopy() { - return new TFetchResultsReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - this.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.FETCH_NEXT; - - setMaxRowsIsSet(false); - this.maxRows = 0; - this.fetchType = (short)0; - - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - /** - * - * @see TFetchOrientation - */ - public TFetchOrientation getOrientation() { - return this.orientation; - } - - /** - * - * @see TFetchOrientation - */ - public void setOrientation(TFetchOrientation orientation) { - this.orientation = orientation; - } - - public void unsetOrientation() { - this.orientation = null; - } - - /** Returns true if field orientation is set (has been assigned a value) and false otherwise */ - public boolean isSetOrientation() { - return this.orientation != null; - } - - public void setOrientationIsSet(boolean value) { - if (!value) { - this.orientation = null; - } - } - - public long getMaxRows() { - return this.maxRows; - } - - public void setMaxRows(long maxRows) { - this.maxRows = maxRows; - setMaxRowsIsSet(true); - } - - public void unsetMaxRows() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MAXROWS_ISSET_ID); - } - - /** Returns true if field maxRows is set (has been assigned a value) and false otherwise */ - public boolean isSetMaxRows() { - return EncodingUtils.testBit(__isset_bitfield, __MAXROWS_ISSET_ID); - } - - public void setMaxRowsIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MAXROWS_ISSET_ID, value); - } - - public short getFetchType() { - return this.fetchType; - } - - public void setFetchType(short fetchType) { - this.fetchType = fetchType; - setFetchTypeIsSet(true); - } - - public void unsetFetchType() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __FETCHTYPE_ISSET_ID); - } - - /** Returns true if field fetchType is set (has been assigned a value) and false otherwise */ - public boolean isSetFetchType() { - return EncodingUtils.testBit(__isset_bitfield, __FETCHTYPE_ISSET_ID); - } - - public void setFetchTypeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __FETCHTYPE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - case ORIENTATION: - if (value == null) { - unsetOrientation(); - } else { - setOrientation((TFetchOrientation)value); - } - break; - - case MAX_ROWS: - if (value == null) { - unsetMaxRows(); - } else { - setMaxRows((Long)value); - } - break; - - case FETCH_TYPE: - if (value == null) { - unsetFetchType(); - } else { - setFetchType((Short)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - case ORIENTATION: - return getOrientation(); - - case MAX_ROWS: - return getMaxRows(); - - case FETCH_TYPE: - return getFetchType(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - case ORIENTATION: - return isSetOrientation(); - case MAX_ROWS: - return isSetMaxRows(); - case FETCH_TYPE: - return isSetFetchType(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TFetchResultsReq) - return this.equals((TFetchResultsReq)that); - return false; - } - - public boolean equals(TFetchResultsReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - boolean this_present_orientation = true && this.isSetOrientation(); - boolean that_present_orientation = true && that.isSetOrientation(); - if (this_present_orientation || that_present_orientation) { - if (!(this_present_orientation && that_present_orientation)) - return false; - if (!this.orientation.equals(that.orientation)) - return false; - } - - boolean this_present_maxRows = true; - boolean that_present_maxRows = true; - if (this_present_maxRows || that_present_maxRows) { - if (!(this_present_maxRows && that_present_maxRows)) - return false; - if (this.maxRows != that.maxRows) - return false; - } - - boolean this_present_fetchType = true && this.isSetFetchType(); - boolean that_present_fetchType = true && that.isSetFetchType(); - if (this_present_fetchType || that_present_fetchType) { - if (!(this_present_fetchType && that_present_fetchType)) - return false; - if (this.fetchType != that.fetchType) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - boolean present_orientation = true && (isSetOrientation()); - list.add(present_orientation); - if (present_orientation) - list.add(orientation.getValue()); - - boolean present_maxRows = true; - list.add(present_maxRows); - if (present_maxRows) - list.add(maxRows); - - boolean present_fetchType = true && (isSetFetchType()); - list.add(present_fetchType); - if (present_fetchType) - list.add(fetchType); - - return list.hashCode(); - } - - @Override - public int compareTo(TFetchResultsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOrientation()).compareTo(other.isSetOrientation()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOrientation()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.orientation, other.orientation); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetMaxRows()).compareTo(other.isSetMaxRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetMaxRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.maxRows, other.maxRows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetFetchType()).compareTo(other.isSetFetchType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetFetchType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.fetchType, other.fetchType); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TFetchResultsReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("orientation:"); - if (this.orientation == null) { - sb.append("null"); - } else { - sb.append(this.orientation); - } - first = false; - if (!first) sb.append(", "); - sb.append("maxRows:"); - sb.append(this.maxRows); - first = false; - if (isSetFetchType()) { - if (!first) sb.append(", "); - sb.append("fetchType:"); - sb.append(this.fetchType); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - if (!isSetOrientation()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'orientation' is unset! Struct:" + toString()); - } - - if (!isSetMaxRows()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'maxRows' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TFetchResultsReqStandardSchemeFactory implements SchemeFactory { - public TFetchResultsReqStandardScheme getScheme() { - return new TFetchResultsReqStandardScheme(); - } - } - - private static class TFetchResultsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // ORIENTATION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.findByValue(iprot.readI32()); - struct.setOrientationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // MAX_ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.maxRows = iprot.readI64(); - struct.setMaxRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // FETCH_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I16) { - struct.fetchType = iprot.readI16(); - struct.setFetchTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.orientation != null) { - oprot.writeFieldBegin(ORIENTATION_FIELD_DESC); - oprot.writeI32(struct.orientation.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(MAX_ROWS_FIELD_DESC); - oprot.writeI64(struct.maxRows); - oprot.writeFieldEnd(); - if (struct.isSetFetchType()) { - oprot.writeFieldBegin(FETCH_TYPE_FIELD_DESC); - oprot.writeI16(struct.fetchType); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TFetchResultsReqTupleSchemeFactory implements SchemeFactory { - public TFetchResultsReqTupleScheme getScheme() { - return new TFetchResultsReqTupleScheme(); - } - } - - private static class TFetchResultsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - oprot.writeI32(struct.orientation.getValue()); - oprot.writeI64(struct.maxRows); - BitSet optionals = new BitSet(); - if (struct.isSetFetchType()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetFetchType()) { - oprot.writeI16(struct.fetchType); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - struct.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.findByValue(iprot.readI32()); - struct.setOrientationIsSet(true); - struct.maxRows = iprot.readI64(); - struct.setMaxRowsIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.fetchType = iprot.readI16(); - struct.setFetchTypeIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java deleted file mode 100644 index 8f86cee3ad468..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java +++ /dev/null @@ -1,612 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TFetchResultsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField HAS_MORE_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("hasMoreRows", org.apache.thrift.protocol.TType.BOOL, (short)2); - private static final org.apache.thrift.protocol.TField RESULTS_FIELD_DESC = new org.apache.thrift.protocol.TField("results", org.apache.thrift.protocol.TType.STRUCT, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TFetchResultsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TFetchResultsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private boolean hasMoreRows; // optional - private TRowSet results; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - HAS_MORE_ROWS((short)2, "hasMoreRows"), - RESULTS((short)3, "results"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // HAS_MORE_ROWS - return HAS_MORE_ROWS; - case 3: // RESULTS - return RESULTS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __HASMOREROWS_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.HAS_MORE_ROWS,_Fields.RESULTS}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.HAS_MORE_ROWS, new org.apache.thrift.meta_data.FieldMetaData("hasMoreRows", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.RESULTS, new org.apache.thrift.meta_data.FieldMetaData("results", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRowSet.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsResp.class, metaDataMap); - } - - public TFetchResultsResp() { - } - - public TFetchResultsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TFetchResultsResp(TFetchResultsResp other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - this.hasMoreRows = other.hasMoreRows; - if (other.isSetResults()) { - this.results = new TRowSet(other.results); - } - } - - public TFetchResultsResp deepCopy() { - return new TFetchResultsResp(this); - } - - @Override - public void clear() { - this.status = null; - setHasMoreRowsIsSet(false); - this.hasMoreRows = false; - this.results = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public boolean isHasMoreRows() { - return this.hasMoreRows; - } - - public void setHasMoreRows(boolean hasMoreRows) { - this.hasMoreRows = hasMoreRows; - setHasMoreRowsIsSet(true); - } - - public void unsetHasMoreRows() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASMOREROWS_ISSET_ID); - } - - /** Returns true if field hasMoreRows is set (has been assigned a value) and false otherwise */ - public boolean isSetHasMoreRows() { - return EncodingUtils.testBit(__isset_bitfield, __HASMOREROWS_ISSET_ID); - } - - public void setHasMoreRowsIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASMOREROWS_ISSET_ID, value); - } - - public TRowSet getResults() { - return this.results; - } - - public void setResults(TRowSet results) { - this.results = results; - } - - public void unsetResults() { - this.results = null; - } - - /** Returns true if field results is set (has been assigned a value) and false otherwise */ - public boolean isSetResults() { - return this.results != null; - } - - public void setResultsIsSet(boolean value) { - if (!value) { - this.results = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case HAS_MORE_ROWS: - if (value == null) { - unsetHasMoreRows(); - } else { - setHasMoreRows((Boolean)value); - } - break; - - case RESULTS: - if (value == null) { - unsetResults(); - } else { - setResults((TRowSet)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case HAS_MORE_ROWS: - return isHasMoreRows(); - - case RESULTS: - return getResults(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case HAS_MORE_ROWS: - return isSetHasMoreRows(); - case RESULTS: - return isSetResults(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TFetchResultsResp) - return this.equals((TFetchResultsResp)that); - return false; - } - - public boolean equals(TFetchResultsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_hasMoreRows = true && this.isSetHasMoreRows(); - boolean that_present_hasMoreRows = true && that.isSetHasMoreRows(); - if (this_present_hasMoreRows || that_present_hasMoreRows) { - if (!(this_present_hasMoreRows && that_present_hasMoreRows)) - return false; - if (this.hasMoreRows != that.hasMoreRows) - return false; - } - - boolean this_present_results = true && this.isSetResults(); - boolean that_present_results = true && that.isSetResults(); - if (this_present_results || that_present_results) { - if (!(this_present_results && that_present_results)) - return false; - if (!this.results.equals(that.results)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_hasMoreRows = true && (isSetHasMoreRows()); - list.add(present_hasMoreRows); - if (present_hasMoreRows) - list.add(hasMoreRows); - - boolean present_results = true && (isSetResults()); - list.add(present_results); - if (present_results) - list.add(results); - - return list.hashCode(); - } - - @Override - public int compareTo(TFetchResultsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetHasMoreRows()).compareTo(other.isSetHasMoreRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHasMoreRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasMoreRows, other.hasMoreRows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetResults()).compareTo(other.isSetResults()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetResults()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.results, other.results); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TFetchResultsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetHasMoreRows()) { - if (!first) sb.append(", "); - sb.append("hasMoreRows:"); - sb.append(this.hasMoreRows); - first = false; - } - if (isSetResults()) { - if (!first) sb.append(", "); - sb.append("results:"); - if (this.results == null) { - sb.append("null"); - } else { - sb.append(this.results); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (results != null) { - results.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TFetchResultsRespStandardSchemeFactory implements SchemeFactory { - public TFetchResultsRespStandardScheme getScheme() { - return new TFetchResultsRespStandardScheme(); - } - } - - private static class TFetchResultsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // HAS_MORE_ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.hasMoreRows = iprot.readBool(); - struct.setHasMoreRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // RESULTS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.results = new TRowSet(); - struct.results.read(iprot); - struct.setResultsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.isSetHasMoreRows()) { - oprot.writeFieldBegin(HAS_MORE_ROWS_FIELD_DESC); - oprot.writeBool(struct.hasMoreRows); - oprot.writeFieldEnd(); - } - if (struct.results != null) { - if (struct.isSetResults()) { - oprot.writeFieldBegin(RESULTS_FIELD_DESC); - struct.results.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TFetchResultsRespTupleSchemeFactory implements SchemeFactory { - public TFetchResultsRespTupleScheme getScheme() { - return new TFetchResultsRespTupleScheme(); - } - } - - private static class TFetchResultsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetHasMoreRows()) { - optionals.set(0); - } - if (struct.isSetResults()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetHasMoreRows()) { - oprot.writeBool(struct.hasMoreRows); - } - if (struct.isSetResults()) { - struct.results.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.hasMoreRows = iprot.readBool(); - struct.setHasMoreRowsIsSet(true); - } - if (incoming.get(1)) { - struct.results = new TRowSet(); - struct.results.read(iprot); - struct.setResultsIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java deleted file mode 100644 index b8a2ca6648069..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetCatalogsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCatalogsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCatalogsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsReq.class, metaDataMap); - } - - public TGetCatalogsReq() { - } - - public TGetCatalogsReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetCatalogsReq(TGetCatalogsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetCatalogsReq deepCopy() { - return new TGetCatalogsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCatalogsReq) - return this.equals((TGetCatalogsReq)that); - return false; - } - - public boolean equals(TGetCatalogsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetCatalogsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCatalogsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCatalogsReqStandardSchemeFactory implements SchemeFactory { - public TGetCatalogsReqStandardScheme getScheme() { - return new TGetCatalogsReqStandardScheme(); - } - } - - private static class TGetCatalogsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCatalogsReqTupleSchemeFactory implements SchemeFactory { - public TGetCatalogsReqTupleScheme getScheme() { - return new TGetCatalogsReqTupleScheme(); - } - } - - private static class TGetCatalogsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java deleted file mode 100644 index eeeac9a1f9292..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetCatalogsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCatalogsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCatalogsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsResp.class, metaDataMap); - } - - public TGetCatalogsResp() { - } - - public TGetCatalogsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetCatalogsResp(TGetCatalogsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetCatalogsResp deepCopy() { - return new TGetCatalogsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCatalogsResp) - return this.equals((TGetCatalogsResp)that); - return false; - } - - public boolean equals(TGetCatalogsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetCatalogsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCatalogsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCatalogsRespStandardSchemeFactory implements SchemeFactory { - public TGetCatalogsRespStandardScheme getScheme() { - return new TGetCatalogsRespStandardScheme(); - } - } - - private static class TGetCatalogsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCatalogsRespTupleSchemeFactory implements SchemeFactory { - public TGetCatalogsRespTupleScheme getScheme() { - return new TGetCatalogsRespTupleScheme(); - } - } - - private static class TGetCatalogsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java deleted file mode 100644 index ba80279294957..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java +++ /dev/null @@ -1,822 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetColumnsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetColumnsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetColumnsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String tableName; // optional - private String columnName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - TABLE_NAME((short)4, "tableName"), - COLUMN_NAME((short)5, "columnName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // TABLE_NAME - return TABLE_NAME; - case 5: // COLUMN_NAME - return COLUMN_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.COLUMN_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsReq.class, metaDataMap); - } - - public TGetColumnsReq() { - } - - public TGetColumnsReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetColumnsReq(TGetColumnsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetTableName()) { - this.tableName = other.tableName; - } - if (other.isSetColumnName()) { - this.columnName = other.columnName; - } - } - - public TGetColumnsReq deepCopy() { - return new TGetColumnsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.tableName = null; - this.columnName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getTableName() { - return this.tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public void unsetTableName() { - this.tableName = null; - } - - /** Returns true if field tableName is set (has been assigned a value) and false otherwise */ - public boolean isSetTableName() { - return this.tableName != null; - } - - public void setTableNameIsSet(boolean value) { - if (!value) { - this.tableName = null; - } - } - - public String getColumnName() { - return this.columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public void unsetColumnName() { - this.columnName = null; - } - - /** Returns true if field columnName is set (has been assigned a value) and false otherwise */ - public boolean isSetColumnName() { - return this.columnName != null; - } - - public void setColumnNameIsSet(boolean value) { - if (!value) { - this.columnName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case TABLE_NAME: - if (value == null) { - unsetTableName(); - } else { - setTableName((String)value); - } - break; - - case COLUMN_NAME: - if (value == null) { - unsetColumnName(); - } else { - setColumnName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case TABLE_NAME: - return getTableName(); - - case COLUMN_NAME: - return getColumnName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case TABLE_NAME: - return isSetTableName(); - case COLUMN_NAME: - return isSetColumnName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetColumnsReq) - return this.equals((TGetColumnsReq)that); - return false; - } - - public boolean equals(TGetColumnsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_tableName = true && this.isSetTableName(); - boolean that_present_tableName = true && that.isSetTableName(); - if (this_present_tableName || that_present_tableName) { - if (!(this_present_tableName && that_present_tableName)) - return false; - if (!this.tableName.equals(that.tableName)) - return false; - } - - boolean this_present_columnName = true && this.isSetColumnName(); - boolean that_present_columnName = true && that.isSetColumnName(); - if (this_present_columnName || that_present_columnName) { - if (!(this_present_columnName && that_present_columnName)) - return false; - if (!this.columnName.equals(that.columnName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - list.add(present_catalogName); - if (present_catalogName) - list.add(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - list.add(present_schemaName); - if (present_schemaName) - list.add(schemaName); - - boolean present_tableName = true && (isSetTableName()); - list.add(present_tableName); - if (present_tableName) - list.add(tableName); - - boolean present_columnName = true && (isSetColumnName()); - list.add(present_columnName); - if (present_columnName) - list.add(columnName); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetColumnsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(other.isSetColumnName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumnName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, other.columnName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetColumnsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (isSetTableName()) { - if (!first) sb.append(", "); - sb.append("tableName:"); - if (this.tableName == null) { - sb.append("null"); - } else { - sb.append(this.tableName); - } - first = false; - } - if (isSetColumnName()) { - if (!first) sb.append(", "); - sb.append("columnName:"); - if (this.columnName == null) { - sb.append("null"); - } else { - sb.append(this.columnName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetColumnsReqStandardSchemeFactory implements SchemeFactory { - public TGetColumnsReqStandardScheme getScheme() { - return new TGetColumnsReqStandardScheme(); - } - } - - private static class TGetColumnsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // COLUMN_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.tableName != null) { - if (struct.isSetTableName()) { - oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.tableName); - oprot.writeFieldEnd(); - } - } - if (struct.columnName != null) { - if (struct.isSetColumnName()) { - oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC); - oprot.writeString(struct.columnName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetColumnsReqTupleSchemeFactory implements SchemeFactory { - public TGetColumnsReqTupleScheme getScheme() { - return new TGetColumnsReqTupleScheme(); - } - } - - private static class TGetColumnsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - if (struct.isSetTableName()) { - optionals.set(2); - } - if (struct.isSetColumnName()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - if (struct.isSetTableName()) { - oprot.writeString(struct.tableName); - } - if (struct.isSetColumnName()) { - oprot.writeString(struct.columnName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } - if (incoming.get(3)) { - struct.columnName = iprot.readString(); - struct.setColumnNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java deleted file mode 100644 index c68aac9042fc1..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetColumnsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetColumnsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetColumnsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsResp.class, metaDataMap); - } - - public TGetColumnsResp() { - } - - public TGetColumnsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetColumnsResp(TGetColumnsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetColumnsResp deepCopy() { - return new TGetColumnsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetColumnsResp) - return this.equals((TGetColumnsResp)that); - return false; - } - - public boolean equals(TGetColumnsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetColumnsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetColumnsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetColumnsRespStandardSchemeFactory implements SchemeFactory { - public TGetColumnsRespStandardScheme getScheme() { - return new TGetColumnsRespStandardScheme(); - } - } - - private static class TGetColumnsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetColumnsRespTupleSchemeFactory implements SchemeFactory { - public TGetColumnsRespTupleScheme getScheme() { - return new TGetColumnsRespTupleScheme(); - } - } - - private static class TGetColumnsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java deleted file mode 100644 index 972957063b297..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java +++ /dev/null @@ -1,1034 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetCrossReferenceReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCrossReferenceReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField PARENT_CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentCatalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField PARENT_SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentSchemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField PARENT_TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentTableName", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField FOREIGN_CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignCatalogName", org.apache.thrift.protocol.TType.STRING, (short)5); - private static final org.apache.thrift.protocol.TField FOREIGN_SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignSchemaName", org.apache.thrift.protocol.TType.STRING, (short)6); - private static final org.apache.thrift.protocol.TField FOREIGN_TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignTableName", org.apache.thrift.protocol.TType.STRING, (short)7); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCrossReferenceReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCrossReferenceReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String parentCatalogName; // optional - private String parentSchemaName; // optional - private String parentTableName; // optional - private String foreignCatalogName; // optional - private String foreignSchemaName; // optional - private String foreignTableName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - PARENT_CATALOG_NAME((short)2, "parentCatalogName"), - PARENT_SCHEMA_NAME((short)3, "parentSchemaName"), - PARENT_TABLE_NAME((short)4, "parentTableName"), - FOREIGN_CATALOG_NAME((short)5, "foreignCatalogName"), - FOREIGN_SCHEMA_NAME((short)6, "foreignSchemaName"), - FOREIGN_TABLE_NAME((short)7, "foreignTableName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // PARENT_CATALOG_NAME - return PARENT_CATALOG_NAME; - case 3: // PARENT_SCHEMA_NAME - return PARENT_SCHEMA_NAME; - case 4: // PARENT_TABLE_NAME - return PARENT_TABLE_NAME; - case 5: // FOREIGN_CATALOG_NAME - return FOREIGN_CATALOG_NAME; - case 6: // FOREIGN_SCHEMA_NAME - return FOREIGN_SCHEMA_NAME; - case 7: // FOREIGN_TABLE_NAME - return FOREIGN_TABLE_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.PARENT_CATALOG_NAME,_Fields.PARENT_SCHEMA_NAME,_Fields.PARENT_TABLE_NAME,_Fields.FOREIGN_CATALOG_NAME,_Fields.FOREIGN_SCHEMA_NAME,_Fields.FOREIGN_TABLE_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.PARENT_CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentCatalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.PARENT_SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentSchemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.PARENT_TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentTableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.FOREIGN_CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignCatalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.FOREIGN_SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignSchemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.FOREIGN_TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignTableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCrossReferenceReq.class, metaDataMap); - } - - public TGetCrossReferenceReq() { - } - - public TGetCrossReferenceReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetCrossReferenceReq(TGetCrossReferenceReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetParentCatalogName()) { - this.parentCatalogName = other.parentCatalogName; - } - if (other.isSetParentSchemaName()) { - this.parentSchemaName = other.parentSchemaName; - } - if (other.isSetParentTableName()) { - this.parentTableName = other.parentTableName; - } - if (other.isSetForeignCatalogName()) { - this.foreignCatalogName = other.foreignCatalogName; - } - if (other.isSetForeignSchemaName()) { - this.foreignSchemaName = other.foreignSchemaName; - } - if (other.isSetForeignTableName()) { - this.foreignTableName = other.foreignTableName; - } - } - - public TGetCrossReferenceReq deepCopy() { - return new TGetCrossReferenceReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.parentCatalogName = null; - this.parentSchemaName = null; - this.parentTableName = null; - this.foreignCatalogName = null; - this.foreignSchemaName = null; - this.foreignTableName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getParentCatalogName() { - return this.parentCatalogName; - } - - public void setParentCatalogName(String parentCatalogName) { - this.parentCatalogName = parentCatalogName; - } - - public void unsetParentCatalogName() { - this.parentCatalogName = null; - } - - /** Returns true if field parentCatalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetParentCatalogName() { - return this.parentCatalogName != null; - } - - public void setParentCatalogNameIsSet(boolean value) { - if (!value) { - this.parentCatalogName = null; - } - } - - public String getParentSchemaName() { - return this.parentSchemaName; - } - - public void setParentSchemaName(String parentSchemaName) { - this.parentSchemaName = parentSchemaName; - } - - public void unsetParentSchemaName() { - this.parentSchemaName = null; - } - - /** Returns true if field parentSchemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetParentSchemaName() { - return this.parentSchemaName != null; - } - - public void setParentSchemaNameIsSet(boolean value) { - if (!value) { - this.parentSchemaName = null; - } - } - - public String getParentTableName() { - return this.parentTableName; - } - - public void setParentTableName(String parentTableName) { - this.parentTableName = parentTableName; - } - - public void unsetParentTableName() { - this.parentTableName = null; - } - - /** Returns true if field parentTableName is set (has been assigned a value) and false otherwise */ - public boolean isSetParentTableName() { - return this.parentTableName != null; - } - - public void setParentTableNameIsSet(boolean value) { - if (!value) { - this.parentTableName = null; - } - } - - public String getForeignCatalogName() { - return this.foreignCatalogName; - } - - public void setForeignCatalogName(String foreignCatalogName) { - this.foreignCatalogName = foreignCatalogName; - } - - public void unsetForeignCatalogName() { - this.foreignCatalogName = null; - } - - /** Returns true if field foreignCatalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetForeignCatalogName() { - return this.foreignCatalogName != null; - } - - public void setForeignCatalogNameIsSet(boolean value) { - if (!value) { - this.foreignCatalogName = null; - } - } - - public String getForeignSchemaName() { - return this.foreignSchemaName; - } - - public void setForeignSchemaName(String foreignSchemaName) { - this.foreignSchemaName = foreignSchemaName; - } - - public void unsetForeignSchemaName() { - this.foreignSchemaName = null; - } - - /** Returns true if field foreignSchemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetForeignSchemaName() { - return this.foreignSchemaName != null; - } - - public void setForeignSchemaNameIsSet(boolean value) { - if (!value) { - this.foreignSchemaName = null; - } - } - - public String getForeignTableName() { - return this.foreignTableName; - } - - public void setForeignTableName(String foreignTableName) { - this.foreignTableName = foreignTableName; - } - - public void unsetForeignTableName() { - this.foreignTableName = null; - } - - /** Returns true if field foreignTableName is set (has been assigned a value) and false otherwise */ - public boolean isSetForeignTableName() { - return this.foreignTableName != null; - } - - public void setForeignTableNameIsSet(boolean value) { - if (!value) { - this.foreignTableName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case PARENT_CATALOG_NAME: - if (value == null) { - unsetParentCatalogName(); - } else { - setParentCatalogName((String)value); - } - break; - - case PARENT_SCHEMA_NAME: - if (value == null) { - unsetParentSchemaName(); - } else { - setParentSchemaName((String)value); - } - break; - - case PARENT_TABLE_NAME: - if (value == null) { - unsetParentTableName(); - } else { - setParentTableName((String)value); - } - break; - - case FOREIGN_CATALOG_NAME: - if (value == null) { - unsetForeignCatalogName(); - } else { - setForeignCatalogName((String)value); - } - break; - - case FOREIGN_SCHEMA_NAME: - if (value == null) { - unsetForeignSchemaName(); - } else { - setForeignSchemaName((String)value); - } - break; - - case FOREIGN_TABLE_NAME: - if (value == null) { - unsetForeignTableName(); - } else { - setForeignTableName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case PARENT_CATALOG_NAME: - return getParentCatalogName(); - - case PARENT_SCHEMA_NAME: - return getParentSchemaName(); - - case PARENT_TABLE_NAME: - return getParentTableName(); - - case FOREIGN_CATALOG_NAME: - return getForeignCatalogName(); - - case FOREIGN_SCHEMA_NAME: - return getForeignSchemaName(); - - case FOREIGN_TABLE_NAME: - return getForeignTableName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case PARENT_CATALOG_NAME: - return isSetParentCatalogName(); - case PARENT_SCHEMA_NAME: - return isSetParentSchemaName(); - case PARENT_TABLE_NAME: - return isSetParentTableName(); - case FOREIGN_CATALOG_NAME: - return isSetForeignCatalogName(); - case FOREIGN_SCHEMA_NAME: - return isSetForeignSchemaName(); - case FOREIGN_TABLE_NAME: - return isSetForeignTableName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCrossReferenceReq) - return this.equals((TGetCrossReferenceReq)that); - return false; - } - - public boolean equals(TGetCrossReferenceReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_parentCatalogName = true && this.isSetParentCatalogName(); - boolean that_present_parentCatalogName = true && that.isSetParentCatalogName(); - if (this_present_parentCatalogName || that_present_parentCatalogName) { - if (!(this_present_parentCatalogName && that_present_parentCatalogName)) - return false; - if (!this.parentCatalogName.equals(that.parentCatalogName)) - return false; - } - - boolean this_present_parentSchemaName = true && this.isSetParentSchemaName(); - boolean that_present_parentSchemaName = true && that.isSetParentSchemaName(); - if (this_present_parentSchemaName || that_present_parentSchemaName) { - if (!(this_present_parentSchemaName && that_present_parentSchemaName)) - return false; - if (!this.parentSchemaName.equals(that.parentSchemaName)) - return false; - } - - boolean this_present_parentTableName = true && this.isSetParentTableName(); - boolean that_present_parentTableName = true && that.isSetParentTableName(); - if (this_present_parentTableName || that_present_parentTableName) { - if (!(this_present_parentTableName && that_present_parentTableName)) - return false; - if (!this.parentTableName.equals(that.parentTableName)) - return false; - } - - boolean this_present_foreignCatalogName = true && this.isSetForeignCatalogName(); - boolean that_present_foreignCatalogName = true && that.isSetForeignCatalogName(); - if (this_present_foreignCatalogName || that_present_foreignCatalogName) { - if (!(this_present_foreignCatalogName && that_present_foreignCatalogName)) - return false; - if (!this.foreignCatalogName.equals(that.foreignCatalogName)) - return false; - } - - boolean this_present_foreignSchemaName = true && this.isSetForeignSchemaName(); - boolean that_present_foreignSchemaName = true && that.isSetForeignSchemaName(); - if (this_present_foreignSchemaName || that_present_foreignSchemaName) { - if (!(this_present_foreignSchemaName && that_present_foreignSchemaName)) - return false; - if (!this.foreignSchemaName.equals(that.foreignSchemaName)) - return false; - } - - boolean this_present_foreignTableName = true && this.isSetForeignTableName(); - boolean that_present_foreignTableName = true && that.isSetForeignTableName(); - if (this_present_foreignTableName || that_present_foreignTableName) { - if (!(this_present_foreignTableName && that_present_foreignTableName)) - return false; - if (!this.foreignTableName.equals(that.foreignTableName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_parentCatalogName = true && (isSetParentCatalogName()); - list.add(present_parentCatalogName); - if (present_parentCatalogName) - list.add(parentCatalogName); - - boolean present_parentSchemaName = true && (isSetParentSchemaName()); - list.add(present_parentSchemaName); - if (present_parentSchemaName) - list.add(parentSchemaName); - - boolean present_parentTableName = true && (isSetParentTableName()); - list.add(present_parentTableName); - if (present_parentTableName) - list.add(parentTableName); - - boolean present_foreignCatalogName = true && (isSetForeignCatalogName()); - list.add(present_foreignCatalogName); - if (present_foreignCatalogName) - list.add(foreignCatalogName); - - boolean present_foreignSchemaName = true && (isSetForeignSchemaName()); - list.add(present_foreignSchemaName); - if (present_foreignSchemaName) - list.add(foreignSchemaName); - - boolean present_foreignTableName = true && (isSetForeignTableName()); - list.add(present_foreignTableName); - if (present_foreignTableName) - list.add(foreignTableName); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetCrossReferenceReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetParentCatalogName()).compareTo(other.isSetParentCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetParentCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentCatalogName, other.parentCatalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetParentSchemaName()).compareTo(other.isSetParentSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetParentSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentSchemaName, other.parentSchemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetParentTableName()).compareTo(other.isSetParentTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetParentTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentTableName, other.parentTableName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetForeignCatalogName()).compareTo(other.isSetForeignCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetForeignCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignCatalogName, other.foreignCatalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetForeignSchemaName()).compareTo(other.isSetForeignSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetForeignSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignSchemaName, other.foreignSchemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetForeignTableName()).compareTo(other.isSetForeignTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetForeignTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignTableName, other.foreignTableName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCrossReferenceReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetParentCatalogName()) { - if (!first) sb.append(", "); - sb.append("parentCatalogName:"); - if (this.parentCatalogName == null) { - sb.append("null"); - } else { - sb.append(this.parentCatalogName); - } - first = false; - } - if (isSetParentSchemaName()) { - if (!first) sb.append(", "); - sb.append("parentSchemaName:"); - if (this.parentSchemaName == null) { - sb.append("null"); - } else { - sb.append(this.parentSchemaName); - } - first = false; - } - if (isSetParentTableName()) { - if (!first) sb.append(", "); - sb.append("parentTableName:"); - if (this.parentTableName == null) { - sb.append("null"); - } else { - sb.append(this.parentTableName); - } - first = false; - } - if (isSetForeignCatalogName()) { - if (!first) sb.append(", "); - sb.append("foreignCatalogName:"); - if (this.foreignCatalogName == null) { - sb.append("null"); - } else { - sb.append(this.foreignCatalogName); - } - first = false; - } - if (isSetForeignSchemaName()) { - if (!first) sb.append(", "); - sb.append("foreignSchemaName:"); - if (this.foreignSchemaName == null) { - sb.append("null"); - } else { - sb.append(this.foreignSchemaName); - } - first = false; - } - if (isSetForeignTableName()) { - if (!first) sb.append(", "); - sb.append("foreignTableName:"); - if (this.foreignTableName == null) { - sb.append("null"); - } else { - sb.append(this.foreignTableName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCrossReferenceReqStandardSchemeFactory implements SchemeFactory { - public TGetCrossReferenceReqStandardScheme getScheme() { - return new TGetCrossReferenceReqStandardScheme(); - } - } - - private static class TGetCrossReferenceReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // PARENT_CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.parentCatalogName = iprot.readString(); - struct.setParentCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // PARENT_SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.parentSchemaName = iprot.readString(); - struct.setParentSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // PARENT_TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.parentTableName = iprot.readString(); - struct.setParentTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // FOREIGN_CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.foreignCatalogName = iprot.readString(); - struct.setForeignCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 6: // FOREIGN_SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.foreignSchemaName = iprot.readString(); - struct.setForeignSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 7: // FOREIGN_TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.foreignTableName = iprot.readString(); - struct.setForeignTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.parentCatalogName != null) { - if (struct.isSetParentCatalogName()) { - oprot.writeFieldBegin(PARENT_CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.parentCatalogName); - oprot.writeFieldEnd(); - } - } - if (struct.parentSchemaName != null) { - if (struct.isSetParentSchemaName()) { - oprot.writeFieldBegin(PARENT_SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.parentSchemaName); - oprot.writeFieldEnd(); - } - } - if (struct.parentTableName != null) { - if (struct.isSetParentTableName()) { - oprot.writeFieldBegin(PARENT_TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.parentTableName); - oprot.writeFieldEnd(); - } - } - if (struct.foreignCatalogName != null) { - if (struct.isSetForeignCatalogName()) { - oprot.writeFieldBegin(FOREIGN_CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.foreignCatalogName); - oprot.writeFieldEnd(); - } - } - if (struct.foreignSchemaName != null) { - if (struct.isSetForeignSchemaName()) { - oprot.writeFieldBegin(FOREIGN_SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.foreignSchemaName); - oprot.writeFieldEnd(); - } - } - if (struct.foreignTableName != null) { - if (struct.isSetForeignTableName()) { - oprot.writeFieldBegin(FOREIGN_TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.foreignTableName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCrossReferenceReqTupleSchemeFactory implements SchemeFactory { - public TGetCrossReferenceReqTupleScheme getScheme() { - return new TGetCrossReferenceReqTupleScheme(); - } - } - - private static class TGetCrossReferenceReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetParentCatalogName()) { - optionals.set(0); - } - if (struct.isSetParentSchemaName()) { - optionals.set(1); - } - if (struct.isSetParentTableName()) { - optionals.set(2); - } - if (struct.isSetForeignCatalogName()) { - optionals.set(3); - } - if (struct.isSetForeignSchemaName()) { - optionals.set(4); - } - if (struct.isSetForeignTableName()) { - optionals.set(5); - } - oprot.writeBitSet(optionals, 6); - if (struct.isSetParentCatalogName()) { - oprot.writeString(struct.parentCatalogName); - } - if (struct.isSetParentSchemaName()) { - oprot.writeString(struct.parentSchemaName); - } - if (struct.isSetParentTableName()) { - oprot.writeString(struct.parentTableName); - } - if (struct.isSetForeignCatalogName()) { - oprot.writeString(struct.foreignCatalogName); - } - if (struct.isSetForeignSchemaName()) { - oprot.writeString(struct.foreignSchemaName); - } - if (struct.isSetForeignTableName()) { - oprot.writeString(struct.foreignTableName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(6); - if (incoming.get(0)) { - struct.parentCatalogName = iprot.readString(); - struct.setParentCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.parentSchemaName = iprot.readString(); - struct.setParentSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.parentTableName = iprot.readString(); - struct.setParentTableNameIsSet(true); - } - if (incoming.get(3)) { - struct.foreignCatalogName = iprot.readString(); - struct.setForeignCatalogNameIsSet(true); - } - if (incoming.get(4)) { - struct.foreignSchemaName = iprot.readString(); - struct.setForeignSchemaNameIsSet(true); - } - if (incoming.get(5)) { - struct.foreignTableName = iprot.readString(); - struct.setForeignTableNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java deleted file mode 100644 index 1bfe6d192df06..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetCrossReferenceResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCrossReferenceResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetCrossReferenceRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetCrossReferenceRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCrossReferenceResp.class, metaDataMap); - } - - public TGetCrossReferenceResp() { - } - - public TGetCrossReferenceResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetCrossReferenceResp(TGetCrossReferenceResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetCrossReferenceResp deepCopy() { - return new TGetCrossReferenceResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetCrossReferenceResp) - return this.equals((TGetCrossReferenceResp)that); - return false; - } - - public boolean equals(TGetCrossReferenceResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetCrossReferenceResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetCrossReferenceResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetCrossReferenceRespStandardSchemeFactory implements SchemeFactory { - public TGetCrossReferenceRespStandardScheme getScheme() { - return new TGetCrossReferenceRespStandardScheme(); - } - } - - private static class TGetCrossReferenceRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetCrossReferenceRespTupleSchemeFactory implements SchemeFactory { - public TGetCrossReferenceRespTupleScheme getScheme() { - return new TGetCrossReferenceRespTupleScheme(); - } - } - - private static class TGetCrossReferenceRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java deleted file mode 100644 index e3e28c5860522..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java +++ /dev/null @@ -1,596 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OWNER_FIELD_DESC = new org.apache.thrift.protocol.TField("owner", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField RENEWER_FIELD_DESC = new org.apache.thrift.protocol.TField("renewer", org.apache.thrift.protocol.TType.STRING, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String owner; // required - private String renewer; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - OWNER((short)2, "owner"), - RENEWER((short)3, "renewer"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // OWNER - return OWNER; - case 3: // RENEWER - return RENEWER; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.OWNER, new org.apache.thrift.meta_data.FieldMetaData("owner", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.RENEWER, new org.apache.thrift.meta_data.FieldMetaData("renewer", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenReq.class, metaDataMap); - } - - public TGetDelegationTokenReq() { - } - - public TGetDelegationTokenReq( - TSessionHandle sessionHandle, - String owner, - String renewer) - { - this(); - this.sessionHandle = sessionHandle; - this.owner = owner; - this.renewer = renewer; - } - - /** - * Performs a deep copy on other. - */ - public TGetDelegationTokenReq(TGetDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetOwner()) { - this.owner = other.owner; - } - if (other.isSetRenewer()) { - this.renewer = other.renewer; - } - } - - public TGetDelegationTokenReq deepCopy() { - return new TGetDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.owner = null; - this.renewer = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getOwner() { - return this.owner; - } - - public void setOwner(String owner) { - this.owner = owner; - } - - public void unsetOwner() { - this.owner = null; - } - - /** Returns true if field owner is set (has been assigned a value) and false otherwise */ - public boolean isSetOwner() { - return this.owner != null; - } - - public void setOwnerIsSet(boolean value) { - if (!value) { - this.owner = null; - } - } - - public String getRenewer() { - return this.renewer; - } - - public void setRenewer(String renewer) { - this.renewer = renewer; - } - - public void unsetRenewer() { - this.renewer = null; - } - - /** Returns true if field renewer is set (has been assigned a value) and false otherwise */ - public boolean isSetRenewer() { - return this.renewer != null; - } - - public void setRenewerIsSet(boolean value) { - if (!value) { - this.renewer = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case OWNER: - if (value == null) { - unsetOwner(); - } else { - setOwner((String)value); - } - break; - - case RENEWER: - if (value == null) { - unsetRenewer(); - } else { - setRenewer((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case OWNER: - return getOwner(); - - case RENEWER: - return getRenewer(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case OWNER: - return isSetOwner(); - case RENEWER: - return isSetRenewer(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetDelegationTokenReq) - return this.equals((TGetDelegationTokenReq)that); - return false; - } - - public boolean equals(TGetDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_owner = true && this.isSetOwner(); - boolean that_present_owner = true && that.isSetOwner(); - if (this_present_owner || that_present_owner) { - if (!(this_present_owner && that_present_owner)) - return false; - if (!this.owner.equals(that.owner)) - return false; - } - - boolean this_present_renewer = true && this.isSetRenewer(); - boolean that_present_renewer = true && that.isSetRenewer(); - if (this_present_renewer || that_present_renewer) { - if (!(this_present_renewer && that_present_renewer)) - return false; - if (!this.renewer.equals(that.renewer)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_owner = true && (isSetOwner()); - list.add(present_owner); - if (present_owner) - list.add(owner); - - boolean present_renewer = true && (isSetRenewer()); - list.add(present_renewer); - if (present_renewer) - list.add(renewer); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOwner()).compareTo(other.isSetOwner()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOwner()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.owner, other.owner); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRenewer()).compareTo(other.isSetRenewer()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRenewer()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.renewer, other.renewer); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("owner:"); - if (this.owner == null) { - sb.append("null"); - } else { - sb.append(this.owner); - } - first = false; - if (!first) sb.append(", "); - sb.append("renewer:"); - if (this.renewer == null) { - sb.append("null"); - } else { - sb.append(this.renewer); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetOwner()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'owner' is unset! Struct:" + toString()); - } - - if (!isSetRenewer()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'renewer' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TGetDelegationTokenReqStandardScheme getScheme() { - return new TGetDelegationTokenReqStandardScheme(); - } - } - - private static class TGetDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OWNER - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.owner = iprot.readString(); - struct.setOwnerIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // RENEWER - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.renewer = iprot.readString(); - struct.setRenewerIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.owner != null) { - oprot.writeFieldBegin(OWNER_FIELD_DESC); - oprot.writeString(struct.owner); - oprot.writeFieldEnd(); - } - if (struct.renewer != null) { - oprot.writeFieldBegin(RENEWER_FIELD_DESC); - oprot.writeString(struct.renewer); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TGetDelegationTokenReqTupleScheme getScheme() { - return new TGetDelegationTokenReqTupleScheme(); - } - } - - private static class TGetDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.owner); - oprot.writeString(struct.renewer); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.owner = iprot.readString(); - struct.setOwnerIsSet(true); - struct.renewer = iprot.readString(); - struct.setRenewerIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java deleted file mode 100644 index 6ef2acbbd9435..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java +++ /dev/null @@ -1,504 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - private String delegationToken; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.DELEGATION_TOKEN}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenResp.class, metaDataMap); - } - - public TGetDelegationTokenResp() { - } - - public TGetDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetDelegationTokenResp(TGetDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TGetDelegationTokenResp deepCopy() { - return new TGetDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - this.delegationToken = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetDelegationTokenResp) - return this.equals((TGetDelegationTokenResp)that); - return false; - } - - public boolean equals(TGetDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_delegationToken = true && (isSetDelegationToken()); - list.add(present_delegationToken); - if (present_delegationToken) - list.add(delegationToken); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetDelegationToken()) { - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TGetDelegationTokenRespStandardScheme getScheme() { - return new TGetDelegationTokenRespStandardScheme(); - } - } - - private static class TGetDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - if (struct.isSetDelegationToken()) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TGetDelegationTokenRespTupleScheme getScheme() { - return new TGetDelegationTokenRespTupleScheme(); - } - } - - private static class TGetDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetDelegationToken()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetDelegationToken()) { - oprot.writeString(struct.delegationToken); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java deleted file mode 100644 index ad4f8a5b031e8..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java +++ /dev/null @@ -1,711 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetFunctionsReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField FUNCTION_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("functionName", org.apache.thrift.protocol.TType.STRING, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetFunctionsReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetFunctionsReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String functionName; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - FUNCTION_NAME((short)4, "functionName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // FUNCTION_NAME - return FUNCTION_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.FUNCTION_NAME, new org.apache.thrift.meta_data.FieldMetaData("functionName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsReq.class, metaDataMap); - } - - public TGetFunctionsReq() { - } - - public TGetFunctionsReq( - TSessionHandle sessionHandle, - String functionName) - { - this(); - this.sessionHandle = sessionHandle; - this.functionName = functionName; - } - - /** - * Performs a deep copy on other. - */ - public TGetFunctionsReq(TGetFunctionsReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetFunctionName()) { - this.functionName = other.functionName; - } - } - - public TGetFunctionsReq deepCopy() { - return new TGetFunctionsReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.functionName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getFunctionName() { - return this.functionName; - } - - public void setFunctionName(String functionName) { - this.functionName = functionName; - } - - public void unsetFunctionName() { - this.functionName = null; - } - - /** Returns true if field functionName is set (has been assigned a value) and false otherwise */ - public boolean isSetFunctionName() { - return this.functionName != null; - } - - public void setFunctionNameIsSet(boolean value) { - if (!value) { - this.functionName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case FUNCTION_NAME: - if (value == null) { - unsetFunctionName(); - } else { - setFunctionName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case FUNCTION_NAME: - return getFunctionName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case FUNCTION_NAME: - return isSetFunctionName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetFunctionsReq) - return this.equals((TGetFunctionsReq)that); - return false; - } - - public boolean equals(TGetFunctionsReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_functionName = true && this.isSetFunctionName(); - boolean that_present_functionName = true && that.isSetFunctionName(); - if (this_present_functionName || that_present_functionName) { - if (!(this_present_functionName && that_present_functionName)) - return false; - if (!this.functionName.equals(that.functionName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - list.add(present_catalogName); - if (present_catalogName) - list.add(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - list.add(present_schemaName); - if (present_schemaName) - list.add(schemaName); - - boolean present_functionName = true && (isSetFunctionName()); - list.add(present_functionName); - if (present_functionName) - list.add(functionName); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetFunctionsReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetFunctionName()).compareTo(other.isSetFunctionName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetFunctionName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.functionName, other.functionName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetFunctionsReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (!first) sb.append(", "); - sb.append("functionName:"); - if (this.functionName == null) { - sb.append("null"); - } else { - sb.append(this.functionName); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetFunctionName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'functionName' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetFunctionsReqStandardSchemeFactory implements SchemeFactory { - public TGetFunctionsReqStandardScheme getScheme() { - return new TGetFunctionsReqStandardScheme(); - } - } - - private static class TGetFunctionsReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // FUNCTION_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.functionName = iprot.readString(); - struct.setFunctionNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.functionName != null) { - oprot.writeFieldBegin(FUNCTION_NAME_FIELD_DESC); - oprot.writeString(struct.functionName); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetFunctionsReqTupleSchemeFactory implements SchemeFactory { - public TGetFunctionsReqTupleScheme getScheme() { - return new TGetFunctionsReqTupleScheme(); - } - } - - private static class TGetFunctionsReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.functionName); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.functionName = iprot.readString(); - struct.setFunctionNameIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java deleted file mode 100644 index ead37fb91cc2f..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetFunctionsResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetFunctionsRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetFunctionsRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsResp.class, metaDataMap); - } - - public TGetFunctionsResp() { - } - - public TGetFunctionsResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetFunctionsResp(TGetFunctionsResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetFunctionsResp deepCopy() { - return new TGetFunctionsResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetFunctionsResp) - return this.equals((TGetFunctionsResp)that); - return false; - } - - public boolean equals(TGetFunctionsResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetFunctionsResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetFunctionsResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetFunctionsRespStandardSchemeFactory implements SchemeFactory { - public TGetFunctionsRespStandardScheme getScheme() { - return new TGetFunctionsRespStandardScheme(); - } - } - - private static class TGetFunctionsRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetFunctionsRespTupleSchemeFactory implements SchemeFactory { - public TGetFunctionsRespTupleScheme getScheme() { - return new TGetFunctionsRespTupleScheme(); - } - } - - private static class TGetFunctionsRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java deleted file mode 100644 index b319b70e5eba5..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java +++ /dev/null @@ -1,507 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetInfoReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField INFO_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoType", org.apache.thrift.protocol.TType.I32, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetInfoReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetInfoReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private TGetInfoType infoType; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - /** - * - * @see TGetInfoType - */ - INFO_TYPE((short)2, "infoType"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // INFO_TYPE - return INFO_TYPE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.INFO_TYPE, new org.apache.thrift.meta_data.FieldMetaData("infoType", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TGetInfoType.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoReq.class, metaDataMap); - } - - public TGetInfoReq() { - } - - public TGetInfoReq( - TSessionHandle sessionHandle, - TGetInfoType infoType) - { - this(); - this.sessionHandle = sessionHandle; - this.infoType = infoType; - } - - /** - * Performs a deep copy on other. - */ - public TGetInfoReq(TGetInfoReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetInfoType()) { - this.infoType = other.infoType; - } - } - - public TGetInfoReq deepCopy() { - return new TGetInfoReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.infoType = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - /** - * - * @see TGetInfoType - */ - public TGetInfoType getInfoType() { - return this.infoType; - } - - /** - * - * @see TGetInfoType - */ - public void setInfoType(TGetInfoType infoType) { - this.infoType = infoType; - } - - public void unsetInfoType() { - this.infoType = null; - } - - /** Returns true if field infoType is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoType() { - return this.infoType != null; - } - - public void setInfoTypeIsSet(boolean value) { - if (!value) { - this.infoType = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case INFO_TYPE: - if (value == null) { - unsetInfoType(); - } else { - setInfoType((TGetInfoType)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case INFO_TYPE: - return getInfoType(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case INFO_TYPE: - return isSetInfoType(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetInfoReq) - return this.equals((TGetInfoReq)that); - return false; - } - - public boolean equals(TGetInfoReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_infoType = true && this.isSetInfoType(); - boolean that_present_infoType = true && that.isSetInfoType(); - if (this_present_infoType || that_present_infoType) { - if (!(this_present_infoType && that_present_infoType)) - return false; - if (!this.infoType.equals(that.infoType)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_infoType = true && (isSetInfoType()); - list.add(present_infoType); - if (present_infoType) - list.add(infoType.getValue()); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetInfoReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoType()).compareTo(other.isSetInfoType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoType, other.infoType); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetInfoReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("infoType:"); - if (this.infoType == null) { - sb.append("null"); - } else { - sb.append(this.infoType); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetInfoType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoType' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetInfoReqStandardSchemeFactory implements SchemeFactory { - public TGetInfoReqStandardScheme getScheme() { - return new TGetInfoReqStandardScheme(); - } - } - - private static class TGetInfoReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.infoType = org.apache.hive.service.rpc.thrift.TGetInfoType.findByValue(iprot.readI32()); - struct.setInfoTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.infoType != null) { - oprot.writeFieldBegin(INFO_TYPE_FIELD_DESC); - oprot.writeI32(struct.infoType.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetInfoReqTupleSchemeFactory implements SchemeFactory { - public TGetInfoReqTupleScheme getScheme() { - return new TGetInfoReqTupleScheme(); - } - } - - private static class TGetInfoReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeI32(struct.infoType.getValue()); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.infoType = org.apache.hive.service.rpc.thrift.TGetInfoType.findByValue(iprot.readI32()); - struct.setInfoTypeIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java deleted file mode 100644 index 9be810b024987..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java +++ /dev/null @@ -1,497 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetInfoResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField INFO_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetInfoRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetInfoRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TGetInfoValue infoValue; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - INFO_VALUE((short)2, "infoValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // INFO_VALUE - return INFO_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.INFO_VALUE, new org.apache.thrift.meta_data.FieldMetaData("infoValue", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoValue.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoResp.class, metaDataMap); - } - - public TGetInfoResp() { - } - - public TGetInfoResp( - TStatus status, - TGetInfoValue infoValue) - { - this(); - this.status = status; - this.infoValue = infoValue; - } - - /** - * Performs a deep copy on other. - */ - public TGetInfoResp(TGetInfoResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetInfoValue()) { - this.infoValue = new TGetInfoValue(other.infoValue); - } - } - - public TGetInfoResp deepCopy() { - return new TGetInfoResp(this); - } - - @Override - public void clear() { - this.status = null; - this.infoValue = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TGetInfoValue getInfoValue() { - return this.infoValue; - } - - public void setInfoValue(TGetInfoValue infoValue) { - this.infoValue = infoValue; - } - - public void unsetInfoValue() { - this.infoValue = null; - } - - /** Returns true if field infoValue is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoValue() { - return this.infoValue != null; - } - - public void setInfoValueIsSet(boolean value) { - if (!value) { - this.infoValue = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case INFO_VALUE: - if (value == null) { - unsetInfoValue(); - } else { - setInfoValue((TGetInfoValue)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case INFO_VALUE: - return getInfoValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case INFO_VALUE: - return isSetInfoValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetInfoResp) - return this.equals((TGetInfoResp)that); - return false; - } - - public boolean equals(TGetInfoResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_infoValue = true && this.isSetInfoValue(); - boolean that_present_infoValue = true && that.isSetInfoValue(); - if (this_present_infoValue || that_present_infoValue) { - if (!(this_present_infoValue && that_present_infoValue)) - return false; - if (!this.infoValue.equals(that.infoValue)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_infoValue = true && (isSetInfoValue()); - list.add(present_infoValue); - if (present_infoValue) - list.add(infoValue); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetInfoResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoValue()).compareTo(other.isSetInfoValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoValue, other.infoValue); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetInfoResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (!first) sb.append(", "); - sb.append("infoValue:"); - if (this.infoValue == null) { - sb.append("null"); - } else { - sb.append(this.infoValue); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - if (!isSetInfoValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoValue' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetInfoRespStandardSchemeFactory implements SchemeFactory { - public TGetInfoRespStandardScheme getScheme() { - return new TGetInfoRespStandardScheme(); - } - } - - private static class TGetInfoRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.infoValue = new TGetInfoValue(); - struct.infoValue.read(iprot); - struct.setInfoValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.infoValue != null) { - oprot.writeFieldBegin(INFO_VALUE_FIELD_DESC); - struct.infoValue.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetInfoRespTupleSchemeFactory implements SchemeFactory { - public TGetInfoRespTupleScheme getScheme() { - return new TGetInfoRespTupleScheme(); - } - } - - private static class TGetInfoRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - struct.infoValue.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - struct.infoValue = new TGetInfoValue(); - struct.infoValue.read(iprot); - struct.setInfoValueIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java deleted file mode 100644 index 5b219b62656d7..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TGetInfoType implements org.apache.thrift.TEnum { - CLI_MAX_DRIVER_CONNECTIONS(0), - CLI_MAX_CONCURRENT_ACTIVITIES(1), - CLI_DATA_SOURCE_NAME(2), - CLI_FETCH_DIRECTION(8), - CLI_SERVER_NAME(13), - CLI_SEARCH_PATTERN_ESCAPE(14), - CLI_DBMS_NAME(17), - CLI_DBMS_VER(18), - CLI_ACCESSIBLE_TABLES(19), - CLI_ACCESSIBLE_PROCEDURES(20), - CLI_CURSOR_COMMIT_BEHAVIOR(23), - CLI_DATA_SOURCE_READ_ONLY(25), - CLI_DEFAULT_TXN_ISOLATION(26), - CLI_IDENTIFIER_CASE(28), - CLI_IDENTIFIER_QUOTE_CHAR(29), - CLI_MAX_COLUMN_NAME_LEN(30), - CLI_MAX_CURSOR_NAME_LEN(31), - CLI_MAX_SCHEMA_NAME_LEN(32), - CLI_MAX_CATALOG_NAME_LEN(34), - CLI_MAX_TABLE_NAME_LEN(35), - CLI_SCROLL_CONCURRENCY(43), - CLI_TXN_CAPABLE(46), - CLI_USER_NAME(47), - CLI_TXN_ISOLATION_OPTION(72), - CLI_INTEGRITY(73), - CLI_GETDATA_EXTENSIONS(81), - CLI_NULL_COLLATION(85), - CLI_ALTER_TABLE(86), - CLI_ORDER_BY_COLUMNS_IN_SELECT(90), - CLI_SPECIAL_CHARACTERS(94), - CLI_MAX_COLUMNS_IN_GROUP_BY(97), - CLI_MAX_COLUMNS_IN_INDEX(98), - CLI_MAX_COLUMNS_IN_ORDER_BY(99), - CLI_MAX_COLUMNS_IN_SELECT(100), - CLI_MAX_COLUMNS_IN_TABLE(101), - CLI_MAX_INDEX_SIZE(102), - CLI_MAX_ROW_SIZE(104), - CLI_MAX_STATEMENT_LEN(105), - CLI_MAX_TABLES_IN_SELECT(106), - CLI_MAX_USER_NAME_LEN(107), - CLI_OJ_CAPABILITIES(115), - CLI_XOPEN_CLI_YEAR(10000), - CLI_CURSOR_SENSITIVITY(10001), - CLI_DESCRIBE_PARAMETER(10002), - CLI_CATALOG_NAME(10003), - CLI_COLLATION_SEQ(10004), - CLI_MAX_IDENTIFIER_LEN(10005); - - private final int value; - - private TGetInfoType(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TGetInfoType findByValue(int value) { - switch (value) { - case 0: - return CLI_MAX_DRIVER_CONNECTIONS; - case 1: - return CLI_MAX_CONCURRENT_ACTIVITIES; - case 2: - return CLI_DATA_SOURCE_NAME; - case 8: - return CLI_FETCH_DIRECTION; - case 13: - return CLI_SERVER_NAME; - case 14: - return CLI_SEARCH_PATTERN_ESCAPE; - case 17: - return CLI_DBMS_NAME; - case 18: - return CLI_DBMS_VER; - case 19: - return CLI_ACCESSIBLE_TABLES; - case 20: - return CLI_ACCESSIBLE_PROCEDURES; - case 23: - return CLI_CURSOR_COMMIT_BEHAVIOR; - case 25: - return CLI_DATA_SOURCE_READ_ONLY; - case 26: - return CLI_DEFAULT_TXN_ISOLATION; - case 28: - return CLI_IDENTIFIER_CASE; - case 29: - return CLI_IDENTIFIER_QUOTE_CHAR; - case 30: - return CLI_MAX_COLUMN_NAME_LEN; - case 31: - return CLI_MAX_CURSOR_NAME_LEN; - case 32: - return CLI_MAX_SCHEMA_NAME_LEN; - case 34: - return CLI_MAX_CATALOG_NAME_LEN; - case 35: - return CLI_MAX_TABLE_NAME_LEN; - case 43: - return CLI_SCROLL_CONCURRENCY; - case 46: - return CLI_TXN_CAPABLE; - case 47: - return CLI_USER_NAME; - case 72: - return CLI_TXN_ISOLATION_OPTION; - case 73: - return CLI_INTEGRITY; - case 81: - return CLI_GETDATA_EXTENSIONS; - case 85: - return CLI_NULL_COLLATION; - case 86: - return CLI_ALTER_TABLE; - case 90: - return CLI_ORDER_BY_COLUMNS_IN_SELECT; - case 94: - return CLI_SPECIAL_CHARACTERS; - case 97: - return CLI_MAX_COLUMNS_IN_GROUP_BY; - case 98: - return CLI_MAX_COLUMNS_IN_INDEX; - case 99: - return CLI_MAX_COLUMNS_IN_ORDER_BY; - case 100: - return CLI_MAX_COLUMNS_IN_SELECT; - case 101: - return CLI_MAX_COLUMNS_IN_TABLE; - case 102: - return CLI_MAX_INDEX_SIZE; - case 104: - return CLI_MAX_ROW_SIZE; - case 105: - return CLI_MAX_STATEMENT_LEN; - case 106: - return CLI_MAX_TABLES_IN_SELECT; - case 107: - return CLI_MAX_USER_NAME_LEN; - case 115: - return CLI_OJ_CAPABILITIES; - case 10000: - return CLI_XOPEN_CLI_YEAR; - case 10001: - return CLI_CURSOR_SENSITIVITY; - case 10002: - return CLI_DESCRIBE_PARAMETER; - case 10003: - return CLI_CATALOG_NAME; - case 10004: - return CLI_COLLATION_SEQ; - case 10005: - return CLI_MAX_IDENTIFIER_LEN; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java deleted file mode 100644 index 8e3045a58e5ac..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java +++ /dev/null @@ -1,597 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TGetInfoValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoValue"); - private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField SMALL_INT_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("smallIntValue", org.apache.thrift.protocol.TType.I16, (short)2); - private static final org.apache.thrift.protocol.TField INTEGER_BITMASK_FIELD_DESC = new org.apache.thrift.protocol.TField("integerBitmask", org.apache.thrift.protocol.TType.I32, (short)3); - private static final org.apache.thrift.protocol.TField INTEGER_FLAG_FIELD_DESC = new org.apache.thrift.protocol.TField("integerFlag", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField BINARY_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryValue", org.apache.thrift.protocol.TType.I32, (short)5); - private static final org.apache.thrift.protocol.TField LEN_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lenValue", org.apache.thrift.protocol.TType.I64, (short)6); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STRING_VALUE((short)1, "stringValue"), - SMALL_INT_VALUE((short)2, "smallIntValue"), - INTEGER_BITMASK((short)3, "integerBitmask"), - INTEGER_FLAG((short)4, "integerFlag"), - BINARY_VALUE((short)5, "binaryValue"), - LEN_VALUE((short)6, "lenValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STRING_VALUE - return STRING_VALUE; - case 2: // SMALL_INT_VALUE - return SMALL_INT_VALUE; - case 3: // INTEGER_BITMASK - return INTEGER_BITMASK; - case 4: // INTEGER_FLAG - return INTEGER_FLAG; - case 5: // BINARY_VALUE - return BINARY_VALUE; - case 6: // LEN_VALUE - return LEN_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.SMALL_INT_VALUE, new org.apache.thrift.meta_data.FieldMetaData("smallIntValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - tmpMap.put(_Fields.INTEGER_BITMASK, new org.apache.thrift.meta_data.FieldMetaData("integerBitmask", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.INTEGER_FLAG, new org.apache.thrift.meta_data.FieldMetaData("integerFlag", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.BINARY_VALUE, new org.apache.thrift.meta_data.FieldMetaData("binaryValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.LEN_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lenValue", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoValue.class, metaDataMap); - } - - public TGetInfoValue() { - super(); - } - - public TGetInfoValue(TGetInfoValue._Fields setField, Object value) { - super(setField, value); - } - - public TGetInfoValue(TGetInfoValue other) { - super(other); - } - public TGetInfoValue deepCopy() { - return new TGetInfoValue(this); - } - - public static TGetInfoValue stringValue(String value) { - TGetInfoValue x = new TGetInfoValue(); - x.setStringValue(value); - return x; - } - - public static TGetInfoValue smallIntValue(short value) { - TGetInfoValue x = new TGetInfoValue(); - x.setSmallIntValue(value); - return x; - } - - public static TGetInfoValue integerBitmask(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setIntegerBitmask(value); - return x; - } - - public static TGetInfoValue integerFlag(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setIntegerFlag(value); - return x; - } - - public static TGetInfoValue binaryValue(int value) { - TGetInfoValue x = new TGetInfoValue(); - x.setBinaryValue(value); - return x; - } - - public static TGetInfoValue lenValue(long value) { - TGetInfoValue x = new TGetInfoValue(); - x.setLenValue(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case STRING_VALUE: - if (value instanceof String) { - break; - } - throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName()); - case SMALL_INT_VALUE: - if (value instanceof Short) { - break; - } - throw new ClassCastException("Was expecting value of type Short for field 'smallIntValue', but got " + value.getClass().getSimpleName()); - case INTEGER_BITMASK: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'integerBitmask', but got " + value.getClass().getSimpleName()); - case INTEGER_FLAG: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'integerFlag', but got " + value.getClass().getSimpleName()); - case BINARY_VALUE: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'binaryValue', but got " + value.getClass().getSimpleName()); - case LEN_VALUE: - if (value instanceof Long) { - break; - } - throw new ClassCastException("Was expecting value of type Long for field 'lenValue', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case STRING_VALUE: - if (field.type == STRING_VALUE_FIELD_DESC.type) { - String stringValue; - stringValue = iprot.readString(); - return stringValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case SMALL_INT_VALUE: - if (field.type == SMALL_INT_VALUE_FIELD_DESC.type) { - Short smallIntValue; - smallIntValue = iprot.readI16(); - return smallIntValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case INTEGER_BITMASK: - if (field.type == INTEGER_BITMASK_FIELD_DESC.type) { - Integer integerBitmask; - integerBitmask = iprot.readI32(); - return integerBitmask; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case INTEGER_FLAG: - if (field.type == INTEGER_FLAG_FIELD_DESC.type) { - Integer integerFlag; - integerFlag = iprot.readI32(); - return integerFlag; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case BINARY_VALUE: - if (field.type == BINARY_VALUE_FIELD_DESC.type) { - Integer binaryValue; - binaryValue = iprot.readI32(); - return binaryValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case LEN_VALUE: - if (field.type == LEN_VALUE_FIELD_DESC.type) { - Long lenValue; - lenValue = iprot.readI64(); - return lenValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - case SMALL_INT_VALUE: - Short smallIntValue = (Short)value_; - oprot.writeI16(smallIntValue); - return; - case INTEGER_BITMASK: - Integer integerBitmask = (Integer)value_; - oprot.writeI32(integerBitmask); - return; - case INTEGER_FLAG: - Integer integerFlag = (Integer)value_; - oprot.writeI32(integerFlag); - return; - case BINARY_VALUE: - Integer binaryValue = (Integer)value_; - oprot.writeI32(binaryValue); - return; - case LEN_VALUE: - Long lenValue = (Long)value_; - oprot.writeI64(lenValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case STRING_VALUE: - String stringValue; - stringValue = iprot.readString(); - return stringValue; - case SMALL_INT_VALUE: - Short smallIntValue; - smallIntValue = iprot.readI16(); - return smallIntValue; - case INTEGER_BITMASK: - Integer integerBitmask; - integerBitmask = iprot.readI32(); - return integerBitmask; - case INTEGER_FLAG: - Integer integerFlag; - integerFlag = iprot.readI32(); - return integerFlag; - case BINARY_VALUE: - Integer binaryValue; - binaryValue = iprot.readI32(); - return binaryValue; - case LEN_VALUE: - Long lenValue; - lenValue = iprot.readI64(); - return lenValue; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - case SMALL_INT_VALUE: - Short smallIntValue = (Short)value_; - oprot.writeI16(smallIntValue); - return; - case INTEGER_BITMASK: - Integer integerBitmask = (Integer)value_; - oprot.writeI32(integerBitmask); - return; - case INTEGER_FLAG: - Integer integerFlag = (Integer)value_; - oprot.writeI32(integerFlag); - return; - case BINARY_VALUE: - Integer binaryValue = (Integer)value_; - oprot.writeI32(binaryValue); - return; - case LEN_VALUE: - Long lenValue = (Long)value_; - oprot.writeI64(lenValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case STRING_VALUE: - return STRING_VALUE_FIELD_DESC; - case SMALL_INT_VALUE: - return SMALL_INT_VALUE_FIELD_DESC; - case INTEGER_BITMASK: - return INTEGER_BITMASK_FIELD_DESC; - case INTEGER_FLAG: - return INTEGER_FLAG_FIELD_DESC; - case BINARY_VALUE: - return BINARY_VALUE_FIELD_DESC; - case LEN_VALUE: - return LEN_VALUE_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public String getStringValue() { - if (getSetField() == _Fields.STRING_VALUE) { - return (String)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringValue(String value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VALUE; - value_ = value; - } - - public short getSmallIntValue() { - if (getSetField() == _Fields.SMALL_INT_VALUE) { - return (Short)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'smallIntValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setSmallIntValue(short value) { - setField_ = _Fields.SMALL_INT_VALUE; - value_ = value; - } - - public int getIntegerBitmask() { - if (getSetField() == _Fields.INTEGER_BITMASK) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'integerBitmask' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setIntegerBitmask(int value) { - setField_ = _Fields.INTEGER_BITMASK; - value_ = value; - } - - public int getIntegerFlag() { - if (getSetField() == _Fields.INTEGER_FLAG) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'integerFlag' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setIntegerFlag(int value) { - setField_ = _Fields.INTEGER_FLAG; - value_ = value; - } - - public int getBinaryValue() { - if (getSetField() == _Fields.BINARY_VALUE) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'binaryValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setBinaryValue(int value) { - setField_ = _Fields.BINARY_VALUE; - value_ = value; - } - - public long getLenValue() { - if (getSetField() == _Fields.LEN_VALUE) { - return (Long)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'lenValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setLenValue(long value) { - setField_ = _Fields.LEN_VALUE; - value_ = value; - } - - public boolean isSetStringValue() { - return setField_ == _Fields.STRING_VALUE; - } - - - public boolean isSetSmallIntValue() { - return setField_ == _Fields.SMALL_INT_VALUE; - } - - - public boolean isSetIntegerBitmask() { - return setField_ == _Fields.INTEGER_BITMASK; - } - - - public boolean isSetIntegerFlag() { - return setField_ == _Fields.INTEGER_FLAG; - } - - - public boolean isSetBinaryValue() { - return setField_ == _Fields.BINARY_VALUE; - } - - - public boolean isSetLenValue() { - return setField_ == _Fields.LEN_VALUE; - } - - - public boolean equals(Object other) { - if (other instanceof TGetInfoValue) { - return equals((TGetInfoValue)other); - } else { - return false; - } - } - - public boolean equals(TGetInfoValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TGetInfoValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - List list = new ArrayList(); - list.add(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - list.add(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - list.add(value); - } - } - return list.hashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java deleted file mode 100644 index af31ce2b22819..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java +++ /dev/null @@ -1,501 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetOperationStatusReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField GET_PROGRESS_UPDATE_FIELD_DESC = new org.apache.thrift.protocol.TField("getProgressUpdate", org.apache.thrift.protocol.TType.BOOL, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetOperationStatusReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetOperationStatusReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - private boolean getProgressUpdate; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"), - GET_PROGRESS_UPDATE((short)2, "getProgressUpdate"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - case 2: // GET_PROGRESS_UPDATE - return GET_PROGRESS_UPDATE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __GETPROGRESSUPDATE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.GET_PROGRESS_UPDATE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - tmpMap.put(_Fields.GET_PROGRESS_UPDATE, new org.apache.thrift.meta_data.FieldMetaData("getProgressUpdate", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusReq.class, metaDataMap); - } - - public TGetOperationStatusReq() { - } - - public TGetOperationStatusReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetOperationStatusReq(TGetOperationStatusReq other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - this.getProgressUpdate = other.getProgressUpdate; - } - - public TGetOperationStatusReq deepCopy() { - return new TGetOperationStatusReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - setGetProgressUpdateIsSet(false); - this.getProgressUpdate = false; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public boolean isGetProgressUpdate() { - return this.getProgressUpdate; - } - - public void setGetProgressUpdate(boolean getProgressUpdate) { - this.getProgressUpdate = getProgressUpdate; - setGetProgressUpdateIsSet(true); - } - - public void unsetGetProgressUpdate() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID); - } - - /** Returns true if field getProgressUpdate is set (has been assigned a value) and false otherwise */ - public boolean isSetGetProgressUpdate() { - return EncodingUtils.testBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID); - } - - public void setGetProgressUpdateIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - case GET_PROGRESS_UPDATE: - if (value == null) { - unsetGetProgressUpdate(); - } else { - setGetProgressUpdate((Boolean)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - case GET_PROGRESS_UPDATE: - return isGetProgressUpdate(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - case GET_PROGRESS_UPDATE: - return isSetGetProgressUpdate(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetOperationStatusReq) - return this.equals((TGetOperationStatusReq)that); - return false; - } - - public boolean equals(TGetOperationStatusReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - boolean this_present_getProgressUpdate = true && this.isSetGetProgressUpdate(); - boolean that_present_getProgressUpdate = true && that.isSetGetProgressUpdate(); - if (this_present_getProgressUpdate || that_present_getProgressUpdate) { - if (!(this_present_getProgressUpdate && that_present_getProgressUpdate)) - return false; - if (this.getProgressUpdate != that.getProgressUpdate) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - boolean present_getProgressUpdate = true && (isSetGetProgressUpdate()); - list.add(present_getProgressUpdate); - if (present_getProgressUpdate) - list.add(getProgressUpdate); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetOperationStatusReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetGetProgressUpdate()).compareTo(other.isSetGetProgressUpdate()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetGetProgressUpdate()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.getProgressUpdate, other.getProgressUpdate); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetOperationStatusReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - if (isSetGetProgressUpdate()) { - if (!first) sb.append(", "); - sb.append("getProgressUpdate:"); - sb.append(this.getProgressUpdate); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetOperationStatusReqStandardSchemeFactory implements SchemeFactory { - public TGetOperationStatusReqStandardScheme getScheme() { - return new TGetOperationStatusReqStandardScheme(); - } - } - - private static class TGetOperationStatusReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // GET_PROGRESS_UPDATE - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.getProgressUpdate = iprot.readBool(); - struct.setGetProgressUpdateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.isSetGetProgressUpdate()) { - oprot.writeFieldBegin(GET_PROGRESS_UPDATE_FIELD_DESC); - oprot.writeBool(struct.getProgressUpdate); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetOperationStatusReqTupleSchemeFactory implements SchemeFactory { - public TGetOperationStatusReqTupleScheme getScheme() { - return new TGetOperationStatusReqTupleScheme(); - } - } - - private static class TGetOperationStatusReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetGetProgressUpdate()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetGetProgressUpdate()) { - oprot.writeBool(struct.getProgressUpdate); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.getProgressUpdate = iprot.readBool(); - struct.setGetProgressUpdateIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java deleted file mode 100644 index dbfbb44aa6986..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java +++ /dev/null @@ -1,1342 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetOperationStatusResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationState", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5); - private static final org.apache.thrift.protocol.TField TASK_STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("taskStatus", org.apache.thrift.protocol.TType.STRING, (short)6); - private static final org.apache.thrift.protocol.TField OPERATION_STARTED_FIELD_DESC = new org.apache.thrift.protocol.TField("operationStarted", org.apache.thrift.protocol.TType.I64, (short)7); - private static final org.apache.thrift.protocol.TField OPERATION_COMPLETED_FIELD_DESC = new org.apache.thrift.protocol.TField("operationCompleted", org.apache.thrift.protocol.TType.I64, (short)8); - private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)9); - private static final org.apache.thrift.protocol.TField PROGRESS_UPDATE_RESPONSE_FIELD_DESC = new org.apache.thrift.protocol.TField("progressUpdateResponse", org.apache.thrift.protocol.TType.STRUCT, (short)10); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetOperationStatusRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetOperationStatusRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationState operationState; // optional - private String sqlState; // optional - private int errorCode; // optional - private String errorMessage; // optional - private String taskStatus; // optional - private long operationStarted; // optional - private long operationCompleted; // optional - private boolean hasResultSet; // optional - private TProgressUpdateResp progressUpdateResponse; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - /** - * - * @see TOperationState - */ - OPERATION_STATE((short)2, "operationState"), - SQL_STATE((short)3, "sqlState"), - ERROR_CODE((short)4, "errorCode"), - ERROR_MESSAGE((short)5, "errorMessage"), - TASK_STATUS((short)6, "taskStatus"), - OPERATION_STARTED((short)7, "operationStarted"), - OPERATION_COMPLETED((short)8, "operationCompleted"), - HAS_RESULT_SET((short)9, "hasResultSet"), - PROGRESS_UPDATE_RESPONSE((short)10, "progressUpdateResponse"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_STATE - return OPERATION_STATE; - case 3: // SQL_STATE - return SQL_STATE; - case 4: // ERROR_CODE - return ERROR_CODE; - case 5: // ERROR_MESSAGE - return ERROR_MESSAGE; - case 6: // TASK_STATUS - return TASK_STATUS; - case 7: // OPERATION_STARTED - return OPERATION_STARTED; - case 8: // OPERATION_COMPLETED - return OPERATION_COMPLETED; - case 9: // HAS_RESULT_SET - return HAS_RESULT_SET; - case 10: // PROGRESS_UPDATE_RESPONSE - return PROGRESS_UPDATE_RESPONSE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __ERRORCODE_ISSET_ID = 0; - private static final int __OPERATIONSTARTED_ISSET_ID = 1; - private static final int __OPERATIONCOMPLETED_ISSET_ID = 2; - private static final int __HASRESULTSET_ISSET_ID = 3; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.OPERATION_STATE,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE,_Fields.TASK_STATUS,_Fields.OPERATION_STARTED,_Fields.OPERATION_COMPLETED,_Fields.HAS_RESULT_SET,_Fields.PROGRESS_UPDATE_RESPONSE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_STATE, new org.apache.thrift.meta_data.FieldMetaData("operationState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationState.class))); - tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.TASK_STATUS, new org.apache.thrift.meta_data.FieldMetaData("taskStatus", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.OPERATION_STARTED, new org.apache.thrift.meta_data.FieldMetaData("operationStarted", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.OPERATION_COMPLETED, new org.apache.thrift.meta_data.FieldMetaData("operationCompleted", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.PROGRESS_UPDATE_RESPONSE, new org.apache.thrift.meta_data.FieldMetaData("progressUpdateResponse", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT , "TProgressUpdateResp"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusResp.class, metaDataMap); - } - - public TGetOperationStatusResp() { - } - - public TGetOperationStatusResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetOperationStatusResp(TGetOperationStatusResp other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationState()) { - this.operationState = other.operationState; - } - if (other.isSetSqlState()) { - this.sqlState = other.sqlState; - } - this.errorCode = other.errorCode; - if (other.isSetErrorMessage()) { - this.errorMessage = other.errorMessage; - } - if (other.isSetTaskStatus()) { - this.taskStatus = other.taskStatus; - } - this.operationStarted = other.operationStarted; - this.operationCompleted = other.operationCompleted; - this.hasResultSet = other.hasResultSet; - if (other.isSetProgressUpdateResponse()) { - this.progressUpdateResponse = other.progressUpdateResponse; - } - } - - public TGetOperationStatusResp deepCopy() { - return new TGetOperationStatusResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationState = null; - this.sqlState = null; - setErrorCodeIsSet(false); - this.errorCode = 0; - this.errorMessage = null; - this.taskStatus = null; - setOperationStartedIsSet(false); - this.operationStarted = 0; - setOperationCompletedIsSet(false); - this.operationCompleted = 0; - setHasResultSetIsSet(false); - this.hasResultSet = false; - this.progressUpdateResponse = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - /** - * - * @see TOperationState - */ - public TOperationState getOperationState() { - return this.operationState; - } - - /** - * - * @see TOperationState - */ - public void setOperationState(TOperationState operationState) { - this.operationState = operationState; - } - - public void unsetOperationState() { - this.operationState = null; - } - - /** Returns true if field operationState is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationState() { - return this.operationState != null; - } - - public void setOperationStateIsSet(boolean value) { - if (!value) { - this.operationState = null; - } - } - - public String getSqlState() { - return this.sqlState; - } - - public void setSqlState(String sqlState) { - this.sqlState = sqlState; - } - - public void unsetSqlState() { - this.sqlState = null; - } - - /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */ - public boolean isSetSqlState() { - return this.sqlState != null; - } - - public void setSqlStateIsSet(boolean value) { - if (!value) { - this.sqlState = null; - } - } - - public int getErrorCode() { - return this.errorCode; - } - - public void setErrorCode(int errorCode) { - this.errorCode = errorCode; - setErrorCodeIsSet(true); - } - - public void unsetErrorCode() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorCode() { - return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - public void setErrorCodeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value); - } - - public String getErrorMessage() { - return this.errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public void unsetErrorMessage() { - this.errorMessage = null; - } - - /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorMessage() { - return this.errorMessage != null; - } - - public void setErrorMessageIsSet(boolean value) { - if (!value) { - this.errorMessage = null; - } - } - - public String getTaskStatus() { - return this.taskStatus; - } - - public void setTaskStatus(String taskStatus) { - this.taskStatus = taskStatus; - } - - public void unsetTaskStatus() { - this.taskStatus = null; - } - - /** Returns true if field taskStatus is set (has been assigned a value) and false otherwise */ - public boolean isSetTaskStatus() { - return this.taskStatus != null; - } - - public void setTaskStatusIsSet(boolean value) { - if (!value) { - this.taskStatus = null; - } - } - - public long getOperationStarted() { - return this.operationStarted; - } - - public void setOperationStarted(long operationStarted) { - this.operationStarted = operationStarted; - setOperationStartedIsSet(true); - } - - public void unsetOperationStarted() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID); - } - - /** Returns true if field operationStarted is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationStarted() { - return EncodingUtils.testBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID); - } - - public void setOperationStartedIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID, value); - } - - public long getOperationCompleted() { - return this.operationCompleted; - } - - public void setOperationCompleted(long operationCompleted) { - this.operationCompleted = operationCompleted; - setOperationCompletedIsSet(true); - } - - public void unsetOperationCompleted() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID); - } - - /** Returns true if field operationCompleted is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationCompleted() { - return EncodingUtils.testBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID); - } - - public void setOperationCompletedIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID, value); - } - - public boolean isHasResultSet() { - return this.hasResultSet; - } - - public void setHasResultSet(boolean hasResultSet) { - this.hasResultSet = hasResultSet; - setHasResultSetIsSet(true); - } - - public void unsetHasResultSet() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */ - public boolean isSetHasResultSet() { - return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - public void setHasResultSetIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value); - } - - public TProgressUpdateResp getProgressUpdateResponse() { - return this.progressUpdateResponse; - } - - public void setProgressUpdateResponse(TProgressUpdateResp progressUpdateResponse) { - this.progressUpdateResponse = progressUpdateResponse; - } - - public void unsetProgressUpdateResponse() { - this.progressUpdateResponse = null; - } - - /** Returns true if field progressUpdateResponse is set (has been assigned a value) and false otherwise */ - public boolean isSetProgressUpdateResponse() { - return this.progressUpdateResponse != null; - } - - public void setProgressUpdateResponseIsSet(boolean value) { - if (!value) { - this.progressUpdateResponse = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_STATE: - if (value == null) { - unsetOperationState(); - } else { - setOperationState((TOperationState)value); - } - break; - - case SQL_STATE: - if (value == null) { - unsetSqlState(); - } else { - setSqlState((String)value); - } - break; - - case ERROR_CODE: - if (value == null) { - unsetErrorCode(); - } else { - setErrorCode((Integer)value); - } - break; - - case ERROR_MESSAGE: - if (value == null) { - unsetErrorMessage(); - } else { - setErrorMessage((String)value); - } - break; - - case TASK_STATUS: - if (value == null) { - unsetTaskStatus(); - } else { - setTaskStatus((String)value); - } - break; - - case OPERATION_STARTED: - if (value == null) { - unsetOperationStarted(); - } else { - setOperationStarted((Long)value); - } - break; - - case OPERATION_COMPLETED: - if (value == null) { - unsetOperationCompleted(); - } else { - setOperationCompleted((Long)value); - } - break; - - case HAS_RESULT_SET: - if (value == null) { - unsetHasResultSet(); - } else { - setHasResultSet((Boolean)value); - } - break; - - case PROGRESS_UPDATE_RESPONSE: - if (value == null) { - unsetProgressUpdateResponse(); - } else { - setProgressUpdateResponse((TProgressUpdateResp)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_STATE: - return getOperationState(); - - case SQL_STATE: - return getSqlState(); - - case ERROR_CODE: - return getErrorCode(); - - case ERROR_MESSAGE: - return getErrorMessage(); - - case TASK_STATUS: - return getTaskStatus(); - - case OPERATION_STARTED: - return getOperationStarted(); - - case OPERATION_COMPLETED: - return getOperationCompleted(); - - case HAS_RESULT_SET: - return isHasResultSet(); - - case PROGRESS_UPDATE_RESPONSE: - return getProgressUpdateResponse(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_STATE: - return isSetOperationState(); - case SQL_STATE: - return isSetSqlState(); - case ERROR_CODE: - return isSetErrorCode(); - case ERROR_MESSAGE: - return isSetErrorMessage(); - case TASK_STATUS: - return isSetTaskStatus(); - case OPERATION_STARTED: - return isSetOperationStarted(); - case OPERATION_COMPLETED: - return isSetOperationCompleted(); - case HAS_RESULT_SET: - return isSetHasResultSet(); - case PROGRESS_UPDATE_RESPONSE: - return isSetProgressUpdateResponse(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetOperationStatusResp) - return this.equals((TGetOperationStatusResp)that); - return false; - } - - public boolean equals(TGetOperationStatusResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationState = true && this.isSetOperationState(); - boolean that_present_operationState = true && that.isSetOperationState(); - if (this_present_operationState || that_present_operationState) { - if (!(this_present_operationState && that_present_operationState)) - return false; - if (!this.operationState.equals(that.operationState)) - return false; - } - - boolean this_present_sqlState = true && this.isSetSqlState(); - boolean that_present_sqlState = true && that.isSetSqlState(); - if (this_present_sqlState || that_present_sqlState) { - if (!(this_present_sqlState && that_present_sqlState)) - return false; - if (!this.sqlState.equals(that.sqlState)) - return false; - } - - boolean this_present_errorCode = true && this.isSetErrorCode(); - boolean that_present_errorCode = true && that.isSetErrorCode(); - if (this_present_errorCode || that_present_errorCode) { - if (!(this_present_errorCode && that_present_errorCode)) - return false; - if (this.errorCode != that.errorCode) - return false; - } - - boolean this_present_errorMessage = true && this.isSetErrorMessage(); - boolean that_present_errorMessage = true && that.isSetErrorMessage(); - if (this_present_errorMessage || that_present_errorMessage) { - if (!(this_present_errorMessage && that_present_errorMessage)) - return false; - if (!this.errorMessage.equals(that.errorMessage)) - return false; - } - - boolean this_present_taskStatus = true && this.isSetTaskStatus(); - boolean that_present_taskStatus = true && that.isSetTaskStatus(); - if (this_present_taskStatus || that_present_taskStatus) { - if (!(this_present_taskStatus && that_present_taskStatus)) - return false; - if (!this.taskStatus.equals(that.taskStatus)) - return false; - } - - boolean this_present_operationStarted = true && this.isSetOperationStarted(); - boolean that_present_operationStarted = true && that.isSetOperationStarted(); - if (this_present_operationStarted || that_present_operationStarted) { - if (!(this_present_operationStarted && that_present_operationStarted)) - return false; - if (this.operationStarted != that.operationStarted) - return false; - } - - boolean this_present_operationCompleted = true && this.isSetOperationCompleted(); - boolean that_present_operationCompleted = true && that.isSetOperationCompleted(); - if (this_present_operationCompleted || that_present_operationCompleted) { - if (!(this_present_operationCompleted && that_present_operationCompleted)) - return false; - if (this.operationCompleted != that.operationCompleted) - return false; - } - - boolean this_present_hasResultSet = true && this.isSetHasResultSet(); - boolean that_present_hasResultSet = true && that.isSetHasResultSet(); - if (this_present_hasResultSet || that_present_hasResultSet) { - if (!(this_present_hasResultSet && that_present_hasResultSet)) - return false; - if (this.hasResultSet != that.hasResultSet) - return false; - } - - boolean this_present_progressUpdateResponse = true && this.isSetProgressUpdateResponse(); - boolean that_present_progressUpdateResponse = true && that.isSetProgressUpdateResponse(); - if (this_present_progressUpdateResponse || that_present_progressUpdateResponse) { - if (!(this_present_progressUpdateResponse && that_present_progressUpdateResponse)) - return false; - if (!this.progressUpdateResponse.equals(that.progressUpdateResponse)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationState = true && (isSetOperationState()); - list.add(present_operationState); - if (present_operationState) - list.add(operationState.getValue()); - - boolean present_sqlState = true && (isSetSqlState()); - list.add(present_sqlState); - if (present_sqlState) - list.add(sqlState); - - boolean present_errorCode = true && (isSetErrorCode()); - list.add(present_errorCode); - if (present_errorCode) - list.add(errorCode); - - boolean present_errorMessage = true && (isSetErrorMessage()); - list.add(present_errorMessage); - if (present_errorMessage) - list.add(errorMessage); - - boolean present_taskStatus = true && (isSetTaskStatus()); - list.add(present_taskStatus); - if (present_taskStatus) - list.add(taskStatus); - - boolean present_operationStarted = true && (isSetOperationStarted()); - list.add(present_operationStarted); - if (present_operationStarted) - list.add(operationStarted); - - boolean present_operationCompleted = true && (isSetOperationCompleted()); - list.add(present_operationCompleted); - if (present_operationCompleted) - list.add(operationCompleted); - - boolean present_hasResultSet = true && (isSetHasResultSet()); - list.add(present_hasResultSet); - if (present_hasResultSet) - list.add(hasResultSet); - - boolean present_progressUpdateResponse = true && (isSetProgressUpdateResponse()); - list.add(present_progressUpdateResponse); - if (present_progressUpdateResponse) - list.add(progressUpdateResponse); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetOperationStatusResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationState()).compareTo(other.isSetOperationState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationState, other.operationState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(other.isSetSqlState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSqlState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, other.sqlState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(other.isSetErrorCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, other.errorCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(other.isSetErrorMessage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorMessage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, other.errorMessage); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTaskStatus()).compareTo(other.isSetTaskStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTaskStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.taskStatus, other.taskStatus); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationStarted()).compareTo(other.isSetOperationStarted()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationStarted()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationStarted, other.operationStarted); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationCompleted()).compareTo(other.isSetOperationCompleted()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationCompleted()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationCompleted, other.operationCompleted); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(other.isSetHasResultSet()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHasResultSet()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, other.hasResultSet); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetProgressUpdateResponse()).compareTo(other.isSetProgressUpdateResponse()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetProgressUpdateResponse()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.progressUpdateResponse, other.progressUpdateResponse); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetOperationStatusResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationState()) { - if (!first) sb.append(", "); - sb.append("operationState:"); - if (this.operationState == null) { - sb.append("null"); - } else { - sb.append(this.operationState); - } - first = false; - } - if (isSetSqlState()) { - if (!first) sb.append(", "); - sb.append("sqlState:"); - if (this.sqlState == null) { - sb.append("null"); - } else { - sb.append(this.sqlState); - } - first = false; - } - if (isSetErrorCode()) { - if (!first) sb.append(", "); - sb.append("errorCode:"); - sb.append(this.errorCode); - first = false; - } - if (isSetErrorMessage()) { - if (!first) sb.append(", "); - sb.append("errorMessage:"); - if (this.errorMessage == null) { - sb.append("null"); - } else { - sb.append(this.errorMessage); - } - first = false; - } - if (isSetTaskStatus()) { - if (!first) sb.append(", "); - sb.append("taskStatus:"); - if (this.taskStatus == null) { - sb.append("null"); - } else { - sb.append(this.taskStatus); - } - first = false; - } - if (isSetOperationStarted()) { - if (!first) sb.append(", "); - sb.append("operationStarted:"); - sb.append(this.operationStarted); - first = false; - } - if (isSetOperationCompleted()) { - if (!first) sb.append(", "); - sb.append("operationCompleted:"); - sb.append(this.operationCompleted); - first = false; - } - if (isSetHasResultSet()) { - if (!first) sb.append(", "); - sb.append("hasResultSet:"); - sb.append(this.hasResultSet); - first = false; - } - if (isSetProgressUpdateResponse()) { - if (!first) sb.append(", "); - sb.append("progressUpdateResponse:"); - if (this.progressUpdateResponse == null) { - sb.append("null"); - } else { - sb.append(this.progressUpdateResponse); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetOperationStatusRespStandardSchemeFactory implements SchemeFactory { - public TGetOperationStatusRespStandardScheme getScheme() { - return new TGetOperationStatusRespStandardScheme(); - } - } - - private static class TGetOperationStatusRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.operationState = org.apache.hive.service.rpc.thrift.TOperationState.findByValue(iprot.readI32()); - struct.setOperationStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SQL_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // ERROR_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // ERROR_MESSAGE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 6: // TASK_STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.taskStatus = iprot.readString(); - struct.setTaskStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 7: // OPERATION_STARTED - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.operationStarted = iprot.readI64(); - struct.setOperationStartedIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 8: // OPERATION_COMPLETED - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.operationCompleted = iprot.readI64(); - struct.setOperationCompletedIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 9: // HAS_RESULT_SET - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 10: // PROGRESS_UPDATE_RESPONSE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.progressUpdateResponse = new TProgressUpdateResp(); - struct.progressUpdateResponse.read(iprot); - struct.setProgressUpdateResponseIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationState != null) { - if (struct.isSetOperationState()) { - oprot.writeFieldBegin(OPERATION_STATE_FIELD_DESC); - oprot.writeI32(struct.operationState.getValue()); - oprot.writeFieldEnd(); - } - } - if (struct.sqlState != null) { - if (struct.isSetSqlState()) { - oprot.writeFieldBegin(SQL_STATE_FIELD_DESC); - oprot.writeString(struct.sqlState); - oprot.writeFieldEnd(); - } - } - if (struct.isSetErrorCode()) { - oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC); - oprot.writeI32(struct.errorCode); - oprot.writeFieldEnd(); - } - if (struct.errorMessage != null) { - if (struct.isSetErrorMessage()) { - oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC); - oprot.writeString(struct.errorMessage); - oprot.writeFieldEnd(); - } - } - if (struct.taskStatus != null) { - if (struct.isSetTaskStatus()) { - oprot.writeFieldBegin(TASK_STATUS_FIELD_DESC); - oprot.writeString(struct.taskStatus); - oprot.writeFieldEnd(); - } - } - if (struct.isSetOperationStarted()) { - oprot.writeFieldBegin(OPERATION_STARTED_FIELD_DESC); - oprot.writeI64(struct.operationStarted); - oprot.writeFieldEnd(); - } - if (struct.isSetOperationCompleted()) { - oprot.writeFieldBegin(OPERATION_COMPLETED_FIELD_DESC); - oprot.writeI64(struct.operationCompleted); - oprot.writeFieldEnd(); - } - if (struct.isSetHasResultSet()) { - oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC); - oprot.writeBool(struct.hasResultSet); - oprot.writeFieldEnd(); - } - if (struct.progressUpdateResponse != null) { - if (struct.isSetProgressUpdateResponse()) { - oprot.writeFieldBegin(PROGRESS_UPDATE_RESPONSE_FIELD_DESC); - struct.progressUpdateResponse.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetOperationStatusRespTupleSchemeFactory implements SchemeFactory { - public TGetOperationStatusRespTupleScheme getScheme() { - return new TGetOperationStatusRespTupleScheme(); - } - } - - private static class TGetOperationStatusRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationState()) { - optionals.set(0); - } - if (struct.isSetSqlState()) { - optionals.set(1); - } - if (struct.isSetErrorCode()) { - optionals.set(2); - } - if (struct.isSetErrorMessage()) { - optionals.set(3); - } - if (struct.isSetTaskStatus()) { - optionals.set(4); - } - if (struct.isSetOperationStarted()) { - optionals.set(5); - } - if (struct.isSetOperationCompleted()) { - optionals.set(6); - } - if (struct.isSetHasResultSet()) { - optionals.set(7); - } - if (struct.isSetProgressUpdateResponse()) { - optionals.set(8); - } - oprot.writeBitSet(optionals, 9); - if (struct.isSetOperationState()) { - oprot.writeI32(struct.operationState.getValue()); - } - if (struct.isSetSqlState()) { - oprot.writeString(struct.sqlState); - } - if (struct.isSetErrorCode()) { - oprot.writeI32(struct.errorCode); - } - if (struct.isSetErrorMessage()) { - oprot.writeString(struct.errorMessage); - } - if (struct.isSetTaskStatus()) { - oprot.writeString(struct.taskStatus); - } - if (struct.isSetOperationStarted()) { - oprot.writeI64(struct.operationStarted); - } - if (struct.isSetOperationCompleted()) { - oprot.writeI64(struct.operationCompleted); - } - if (struct.isSetHasResultSet()) { - oprot.writeBool(struct.hasResultSet); - } - if (struct.isSetProgressUpdateResponse()) { - struct.progressUpdateResponse.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(9); - if (incoming.get(0)) { - struct.operationState = org.apache.hive.service.rpc.thrift.TOperationState.findByValue(iprot.readI32()); - struct.setOperationStateIsSet(true); - } - if (incoming.get(1)) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } - if (incoming.get(2)) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } - if (incoming.get(3)) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } - if (incoming.get(4)) { - struct.taskStatus = iprot.readString(); - struct.setTaskStatusIsSet(true); - } - if (incoming.get(5)) { - struct.operationStarted = iprot.readI64(); - struct.setOperationStartedIsSet(true); - } - if (incoming.get(6)) { - struct.operationCompleted = iprot.readI64(); - struct.setOperationCompletedIsSet(true); - } - if (incoming.get(7)) { - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - } - if (incoming.get(8)) { - struct.progressUpdateResponse = new TProgressUpdateResp(); - struct.progressUpdateResponse.read(iprot); - struct.setProgressUpdateResponseIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java deleted file mode 100644 index 1bec9b51c72d8..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java +++ /dev/null @@ -1,716 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetPrimaryKeysReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetPrimaryKeysReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetPrimaryKeysReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetPrimaryKeysReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String tableName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - TABLE_NAME((short)4, "tableName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // TABLE_NAME - return TABLE_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetPrimaryKeysReq.class, metaDataMap); - } - - public TGetPrimaryKeysReq() { - } - - public TGetPrimaryKeysReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetPrimaryKeysReq(TGetPrimaryKeysReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetTableName()) { - this.tableName = other.tableName; - } - } - - public TGetPrimaryKeysReq deepCopy() { - return new TGetPrimaryKeysReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.tableName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getTableName() { - return this.tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public void unsetTableName() { - this.tableName = null; - } - - /** Returns true if field tableName is set (has been assigned a value) and false otherwise */ - public boolean isSetTableName() { - return this.tableName != null; - } - - public void setTableNameIsSet(boolean value) { - if (!value) { - this.tableName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case TABLE_NAME: - if (value == null) { - unsetTableName(); - } else { - setTableName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case TABLE_NAME: - return getTableName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case TABLE_NAME: - return isSetTableName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetPrimaryKeysReq) - return this.equals((TGetPrimaryKeysReq)that); - return false; - } - - public boolean equals(TGetPrimaryKeysReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_tableName = true && this.isSetTableName(); - boolean that_present_tableName = true && that.isSetTableName(); - if (this_present_tableName || that_present_tableName) { - if (!(this_present_tableName && that_present_tableName)) - return false; - if (!this.tableName.equals(that.tableName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - list.add(present_catalogName); - if (present_catalogName) - list.add(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - list.add(present_schemaName); - if (present_schemaName) - list.add(schemaName); - - boolean present_tableName = true && (isSetTableName()); - list.add(present_tableName); - if (present_tableName) - list.add(tableName); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetPrimaryKeysReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetPrimaryKeysReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (isSetTableName()) { - if (!first) sb.append(", "); - sb.append("tableName:"); - if (this.tableName == null) { - sb.append("null"); - } else { - sb.append(this.tableName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetPrimaryKeysReqStandardSchemeFactory implements SchemeFactory { - public TGetPrimaryKeysReqStandardScheme getScheme() { - return new TGetPrimaryKeysReqStandardScheme(); - } - } - - private static class TGetPrimaryKeysReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.tableName != null) { - if (struct.isSetTableName()) { - oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.tableName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetPrimaryKeysReqTupleSchemeFactory implements SchemeFactory { - public TGetPrimaryKeysReqTupleScheme getScheme() { - return new TGetPrimaryKeysReqTupleScheme(); - } - } - - private static class TGetPrimaryKeysReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - if (struct.isSetTableName()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - if (struct.isSetTableName()) { - oprot.writeString(struct.tableName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(3); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java deleted file mode 100644 index 72d9507fe1031..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetPrimaryKeysResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetPrimaryKeysResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetPrimaryKeysRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetPrimaryKeysRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetPrimaryKeysResp.class, metaDataMap); - } - - public TGetPrimaryKeysResp() { - } - - public TGetPrimaryKeysResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetPrimaryKeysResp(TGetPrimaryKeysResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetPrimaryKeysResp deepCopy() { - return new TGetPrimaryKeysResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetPrimaryKeysResp) - return this.equals((TGetPrimaryKeysResp)that); - return false; - } - - public boolean equals(TGetPrimaryKeysResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetPrimaryKeysResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetPrimaryKeysResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetPrimaryKeysRespStandardSchemeFactory implements SchemeFactory { - public TGetPrimaryKeysRespStandardScheme getScheme() { - return new TGetPrimaryKeysRespStandardScheme(); - } - } - - private static class TGetPrimaryKeysRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetPrimaryKeysRespTupleSchemeFactory implements SchemeFactory { - public TGetPrimaryKeysRespTupleScheme getScheme() { - return new TGetPrimaryKeysRespTupleScheme(); - } - } - - private static class TGetPrimaryKeysRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java deleted file mode 100644 index b94d827de264d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetResultSetMetadataReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataReq"); - - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetResultSetMetadataReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetResultSetMetadataReqTupleSchemeFactory()); - } - - private TOperationHandle operationHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_HANDLE((short)1, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataReq.class, metaDataMap); - } - - public TGetResultSetMetadataReq() { - } - - public TGetResultSetMetadataReq( - TOperationHandle operationHandle) - { - this(); - this.operationHandle = operationHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetResultSetMetadataReq(TGetResultSetMetadataReq other) { - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetResultSetMetadataReq deepCopy() { - return new TGetResultSetMetadataReq(this); - } - - @Override - public void clear() { - this.operationHandle = null; - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetResultSetMetadataReq) - return this.equals((TGetResultSetMetadataReq)that); - return false; - } - - public boolean equals(TGetResultSetMetadataReq that) { - if (that == null) - return false; - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetResultSetMetadataReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetResultSetMetadataReq("); - boolean first = true; - - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetResultSetMetadataReqStandardSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataReqStandardScheme getScheme() { - return new TGetResultSetMetadataReqStandardScheme(); - } - } - - private static class TGetResultSetMetadataReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationHandle != null) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetResultSetMetadataReqTupleSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataReqTupleScheme getScheme() { - return new TGetResultSetMetadataReqTupleScheme(); - } - } - - private static class TGetResultSetMetadataReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java deleted file mode 100644 index ae2021ebd5a10..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetResultSetMetadataResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField SCHEMA_FIELD_DESC = new org.apache.thrift.protocol.TField("schema", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetResultSetMetadataRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetResultSetMetadataRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TTableSchema schema; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - SCHEMA((short)2, "schema"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // SCHEMA - return SCHEMA; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.SCHEMA}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.SCHEMA, new org.apache.thrift.meta_data.FieldMetaData("schema", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTableSchema.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataResp.class, metaDataMap); - } - - public TGetResultSetMetadataResp() { - } - - public TGetResultSetMetadataResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetResultSetMetadataResp(TGetResultSetMetadataResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetSchema()) { - this.schema = new TTableSchema(other.schema); - } - } - - public TGetResultSetMetadataResp deepCopy() { - return new TGetResultSetMetadataResp(this); - } - - @Override - public void clear() { - this.status = null; - this.schema = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TTableSchema getSchema() { - return this.schema; - } - - public void setSchema(TTableSchema schema) { - this.schema = schema; - } - - public void unsetSchema() { - this.schema = null; - } - - /** Returns true if field schema is set (has been assigned a value) and false otherwise */ - public boolean isSetSchema() { - return this.schema != null; - } - - public void setSchemaIsSet(boolean value) { - if (!value) { - this.schema = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case SCHEMA: - if (value == null) { - unsetSchema(); - } else { - setSchema((TTableSchema)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case SCHEMA: - return getSchema(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case SCHEMA: - return isSetSchema(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetResultSetMetadataResp) - return this.equals((TGetResultSetMetadataResp)that); - return false; - } - - public boolean equals(TGetResultSetMetadataResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_schema = true && this.isSetSchema(); - boolean that_present_schema = true && that.isSetSchema(); - if (this_present_schema || that_present_schema) { - if (!(this_present_schema && that_present_schema)) - return false; - if (!this.schema.equals(that.schema)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_schema = true && (isSetSchema()); - list.add(present_schema); - if (present_schema) - list.add(schema); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetResultSetMetadataResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchema()).compareTo(other.isSetSchema()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchema()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schema, other.schema); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetResultSetMetadataResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetSchema()) { - if (!first) sb.append(", "); - sb.append("schema:"); - if (this.schema == null) { - sb.append("null"); - } else { - sb.append(this.schema); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (schema != null) { - schema.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetResultSetMetadataRespStandardSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataRespStandardScheme getScheme() { - return new TGetResultSetMetadataRespStandardScheme(); - } - } - - private static class TGetResultSetMetadataRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SCHEMA - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.schema = new TTableSchema(); - struct.schema.read(iprot); - struct.setSchemaIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.schema != null) { - if (struct.isSetSchema()) { - oprot.writeFieldBegin(SCHEMA_FIELD_DESC); - struct.schema.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetResultSetMetadataRespTupleSchemeFactory implements SchemeFactory { - public TGetResultSetMetadataRespTupleScheme getScheme() { - return new TGetResultSetMetadataRespTupleScheme(); - } - } - - private static class TGetResultSetMetadataRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetSchema()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetSchema()) { - struct.schema.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.schema = new TTableSchema(); - struct.schema.read(iprot); - struct.setSchemaIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java deleted file mode 100644 index 17eed87ae096f..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java +++ /dev/null @@ -1,610 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetSchemasReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetSchemasReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetSchemasReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasReq.class, metaDataMap); - } - - public TGetSchemasReq() { - } - - public TGetSchemasReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetSchemasReq(TGetSchemasReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - } - - public TGetSchemasReq deepCopy() { - return new TGetSchemasReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetSchemasReq) - return this.equals((TGetSchemasReq)that); - return false; - } - - public boolean equals(TGetSchemasReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - list.add(present_catalogName); - if (present_catalogName) - list.add(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - list.add(present_schemaName); - if (present_schemaName) - list.add(schemaName); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetSchemasReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetSchemasReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetSchemasReqStandardSchemeFactory implements SchemeFactory { - public TGetSchemasReqStandardScheme getScheme() { - return new TGetSchemasReqStandardScheme(); - } - } - - private static class TGetSchemasReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetSchemasReqTupleSchemeFactory implements SchemeFactory { - public TGetSchemasReqTupleScheme getScheme() { - return new TGetSchemasReqTupleScheme(); - } - } - - private static class TGetSchemasReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java deleted file mode 100644 index e5317f7ff5046..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetSchemasResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetSchemasRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetSchemasRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasResp.class, metaDataMap); - } - - public TGetSchemasResp() { - } - - public TGetSchemasResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetSchemasResp(TGetSchemasResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetSchemasResp deepCopy() { - return new TGetSchemasResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetSchemasResp) - return this.equals((TGetSchemasResp)that); - return false; - } - - public boolean equals(TGetSchemasResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetSchemasResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetSchemasResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetSchemasRespStandardSchemeFactory implements SchemeFactory { - public TGetSchemasRespStandardScheme getScheme() { - return new TGetSchemasRespStandardScheme(); - } - } - - private static class TGetSchemasRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetSchemasRespTupleSchemeFactory implements SchemeFactory { - public TGetSchemasRespTupleScheme getScheme() { - return new TGetSchemasRespTupleScheme(); - } - } - - private static class TGetSchemasRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java deleted file mode 100644 index c027748a336e6..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTableTypesReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTableTypesReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTableTypesReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesReq.class, metaDataMap); - } - - public TGetTableTypesReq() { - } - - public TGetTableTypesReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTableTypesReq(TGetTableTypesReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetTableTypesReq deepCopy() { - return new TGetTableTypesReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTableTypesReq) - return this.equals((TGetTableTypesReq)that); - return false; - } - - public boolean equals(TGetTableTypesReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTableTypesReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTableTypesReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTableTypesReqStandardSchemeFactory implements SchemeFactory { - public TGetTableTypesReqStandardScheme getScheme() { - return new TGetTableTypesReqStandardScheme(); - } - } - - private static class TGetTableTypesReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTableTypesReqTupleSchemeFactory implements SchemeFactory { - public TGetTableTypesReqTupleScheme getScheme() { - return new TGetTableTypesReqTupleScheme(); - } - } - - private static class TGetTableTypesReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java deleted file mode 100644 index c6ce0d4368fdd..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTableTypesResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTableTypesRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTableTypesRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesResp.class, metaDataMap); - } - - public TGetTableTypesResp() { - } - - public TGetTableTypesResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTableTypesResp(TGetTableTypesResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTableTypesResp deepCopy() { - return new TGetTableTypesResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTableTypesResp) - return this.equals((TGetTableTypesResp)that); - return false; - } - - public boolean equals(TGetTableTypesResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTableTypesResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTableTypesResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTableTypesRespStandardSchemeFactory implements SchemeFactory { - public TGetTableTypesRespStandardScheme getScheme() { - return new TGetTableTypesRespStandardScheme(); - } - } - - private static class TGetTableTypesRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTableTypesRespTupleSchemeFactory implements SchemeFactory { - public TGetTableTypesRespTupleScheme getScheme() { - return new TGetTableTypesRespTupleScheme(); - } - } - - private static class TGetTableTypesRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java deleted file mode 100644 index 1aa3f946727b6..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java +++ /dev/null @@ -1,871 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTablesReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField TABLE_TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("tableTypes", org.apache.thrift.protocol.TType.LIST, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTablesReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTablesReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String catalogName; // optional - private String schemaName; // optional - private String tableName; // optional - private List tableTypes; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - CATALOG_NAME((short)2, "catalogName"), - SCHEMA_NAME((short)3, "schemaName"), - TABLE_NAME((short)4, "tableName"), - TABLE_TYPES((short)5, "tableTypes"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // CATALOG_NAME - return CATALOG_NAME; - case 3: // SCHEMA_NAME - return SCHEMA_NAME; - case 4: // TABLE_NAME - return TABLE_NAME; - case 5: // TABLE_TYPES - return TABLE_TYPES; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.TABLE_TYPES}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , "TPatternOrIdentifier"))); - tmpMap.put(_Fields.TABLE_TYPES, new org.apache.thrift.meta_data.FieldMetaData("tableTypes", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesReq.class, metaDataMap); - } - - public TGetTablesReq() { - } - - public TGetTablesReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTablesReq(TGetTablesReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetCatalogName()) { - this.catalogName = other.catalogName; - } - if (other.isSetSchemaName()) { - this.schemaName = other.schemaName; - } - if (other.isSetTableName()) { - this.tableName = other.tableName; - } - if (other.isSetTableTypes()) { - List __this__tableTypes = new ArrayList(other.tableTypes); - this.tableTypes = __this__tableTypes; - } - } - - public TGetTablesReq deepCopy() { - return new TGetTablesReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.catalogName = null; - this.schemaName = null; - this.tableName = null; - this.tableTypes = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getCatalogName() { - return this.catalogName; - } - - public void setCatalogName(String catalogName) { - this.catalogName = catalogName; - } - - public void unsetCatalogName() { - this.catalogName = null; - } - - /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */ - public boolean isSetCatalogName() { - return this.catalogName != null; - } - - public void setCatalogNameIsSet(boolean value) { - if (!value) { - this.catalogName = null; - } - } - - public String getSchemaName() { - return this.schemaName; - } - - public void setSchemaName(String schemaName) { - this.schemaName = schemaName; - } - - public void unsetSchemaName() { - this.schemaName = null; - } - - /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */ - public boolean isSetSchemaName() { - return this.schemaName != null; - } - - public void setSchemaNameIsSet(boolean value) { - if (!value) { - this.schemaName = null; - } - } - - public String getTableName() { - return this.tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public void unsetTableName() { - this.tableName = null; - } - - /** Returns true if field tableName is set (has been assigned a value) and false otherwise */ - public boolean isSetTableName() { - return this.tableName != null; - } - - public void setTableNameIsSet(boolean value) { - if (!value) { - this.tableName = null; - } - } - - public int getTableTypesSize() { - return (this.tableTypes == null) ? 0 : this.tableTypes.size(); - } - - public java.util.Iterator getTableTypesIterator() { - return (this.tableTypes == null) ? null : this.tableTypes.iterator(); - } - - public void addToTableTypes(String elem) { - if (this.tableTypes == null) { - this.tableTypes = new ArrayList(); - } - this.tableTypes.add(elem); - } - - public List getTableTypes() { - return this.tableTypes; - } - - public void setTableTypes(List tableTypes) { - this.tableTypes = tableTypes; - } - - public void unsetTableTypes() { - this.tableTypes = null; - } - - /** Returns true if field tableTypes is set (has been assigned a value) and false otherwise */ - public boolean isSetTableTypes() { - return this.tableTypes != null; - } - - public void setTableTypesIsSet(boolean value) { - if (!value) { - this.tableTypes = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CATALOG_NAME: - if (value == null) { - unsetCatalogName(); - } else { - setCatalogName((String)value); - } - break; - - case SCHEMA_NAME: - if (value == null) { - unsetSchemaName(); - } else { - setSchemaName((String)value); - } - break; - - case TABLE_NAME: - if (value == null) { - unsetTableName(); - } else { - setTableName((String)value); - } - break; - - case TABLE_TYPES: - if (value == null) { - unsetTableTypes(); - } else { - setTableTypes((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case CATALOG_NAME: - return getCatalogName(); - - case SCHEMA_NAME: - return getSchemaName(); - - case TABLE_NAME: - return getTableName(); - - case TABLE_TYPES: - return getTableTypes(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case CATALOG_NAME: - return isSetCatalogName(); - case SCHEMA_NAME: - return isSetSchemaName(); - case TABLE_NAME: - return isSetTableName(); - case TABLE_TYPES: - return isSetTableTypes(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTablesReq) - return this.equals((TGetTablesReq)that); - return false; - } - - public boolean equals(TGetTablesReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_catalogName = true && this.isSetCatalogName(); - boolean that_present_catalogName = true && that.isSetCatalogName(); - if (this_present_catalogName || that_present_catalogName) { - if (!(this_present_catalogName && that_present_catalogName)) - return false; - if (!this.catalogName.equals(that.catalogName)) - return false; - } - - boolean this_present_schemaName = true && this.isSetSchemaName(); - boolean that_present_schemaName = true && that.isSetSchemaName(); - if (this_present_schemaName || that_present_schemaName) { - if (!(this_present_schemaName && that_present_schemaName)) - return false; - if (!this.schemaName.equals(that.schemaName)) - return false; - } - - boolean this_present_tableName = true && this.isSetTableName(); - boolean that_present_tableName = true && that.isSetTableName(); - if (this_present_tableName || that_present_tableName) { - if (!(this_present_tableName && that_present_tableName)) - return false; - if (!this.tableName.equals(that.tableName)) - return false; - } - - boolean this_present_tableTypes = true && this.isSetTableTypes(); - boolean that_present_tableTypes = true && that.isSetTableTypes(); - if (this_present_tableTypes || that_present_tableTypes) { - if (!(this_present_tableTypes && that_present_tableTypes)) - return false; - if (!this.tableTypes.equals(that.tableTypes)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_catalogName = true && (isSetCatalogName()); - list.add(present_catalogName); - if (present_catalogName) - list.add(catalogName); - - boolean present_schemaName = true && (isSetSchemaName()); - list.add(present_schemaName); - if (present_schemaName) - list.add(schemaName); - - boolean present_tableName = true && (isSetTableName()); - list.add(present_tableName); - if (present_tableName) - list.add(tableName); - - boolean present_tableTypes = true && (isSetTableTypes()); - list.add(present_tableTypes); - if (present_tableTypes) - list.add(tableTypes); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTablesReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetCatalogName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSchemaName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTableTypes()).compareTo(other.isSetTableTypes()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTableTypes()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableTypes, other.tableTypes); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTablesReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (isSetCatalogName()) { - if (!first) sb.append(", "); - sb.append("catalogName:"); - if (this.catalogName == null) { - sb.append("null"); - } else { - sb.append(this.catalogName); - } - first = false; - } - if (isSetSchemaName()) { - if (!first) sb.append(", "); - sb.append("schemaName:"); - if (this.schemaName == null) { - sb.append("null"); - } else { - sb.append(this.schemaName); - } - first = false; - } - if (isSetTableName()) { - if (!first) sb.append(", "); - sb.append("tableName:"); - if (this.tableName == null) { - sb.append("null"); - } else { - sb.append(this.tableName); - } - first = false; - } - if (isSetTableTypes()) { - if (!first) sb.append(", "); - sb.append("tableTypes:"); - if (this.tableTypes == null) { - sb.append("null"); - } else { - sb.append(this.tableTypes); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTablesReqStandardSchemeFactory implements SchemeFactory { - public TGetTablesReqStandardScheme getScheme() { - return new TGetTablesReqStandardScheme(); - } - } - - private static class TGetTablesReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // CATALOG_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SCHEMA_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // TABLE_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // TABLE_TYPES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list172 = iprot.readListBegin(); - struct.tableTypes = new ArrayList(_list172.size); - String _elem173; - for (int _i174 = 0; _i174 < _list172.size; ++_i174) - { - _elem173 = iprot.readString(); - struct.tableTypes.add(_elem173); - } - iprot.readListEnd(); - } - struct.setTableTypesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.catalogName != null) { - if (struct.isSetCatalogName()) { - oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC); - oprot.writeString(struct.catalogName); - oprot.writeFieldEnd(); - } - } - if (struct.schemaName != null) { - if (struct.isSetSchemaName()) { - oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC); - oprot.writeString(struct.schemaName); - oprot.writeFieldEnd(); - } - } - if (struct.tableName != null) { - if (struct.isSetTableName()) { - oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC); - oprot.writeString(struct.tableName); - oprot.writeFieldEnd(); - } - } - if (struct.tableTypes != null) { - if (struct.isSetTableTypes()) { - oprot.writeFieldBegin(TABLE_TYPES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.tableTypes.size())); - for (String _iter175 : struct.tableTypes) - { - oprot.writeString(_iter175); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTablesReqTupleSchemeFactory implements SchemeFactory { - public TGetTablesReqTupleScheme getScheme() { - return new TGetTablesReqTupleScheme(); - } - } - - private static class TGetTablesReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetCatalogName()) { - optionals.set(0); - } - if (struct.isSetSchemaName()) { - optionals.set(1); - } - if (struct.isSetTableName()) { - optionals.set(2); - } - if (struct.isSetTableTypes()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetCatalogName()) { - oprot.writeString(struct.catalogName); - } - if (struct.isSetSchemaName()) { - oprot.writeString(struct.schemaName); - } - if (struct.isSetTableName()) { - oprot.writeString(struct.tableName); - } - if (struct.isSetTableTypes()) { - { - oprot.writeI32(struct.tableTypes.size()); - for (String _iter176 : struct.tableTypes) - { - oprot.writeString(_iter176); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - struct.catalogName = iprot.readString(); - struct.setCatalogNameIsSet(true); - } - if (incoming.get(1)) { - struct.schemaName = iprot.readString(); - struct.setSchemaNameIsSet(true); - } - if (incoming.get(2)) { - struct.tableName = iprot.readString(); - struct.setTableNameIsSet(true); - } - if (incoming.get(3)) { - { - org.apache.thrift.protocol.TList _list177 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.tableTypes = new ArrayList(_list177.size); - String _elem178; - for (int _i179 = 0; _i179 < _list177.size; ++_i179) - { - _elem178 = iprot.readString(); - struct.tableTypes.add(_elem178); - } - } - struct.setTableTypesIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java deleted file mode 100644 index 0b7c3825d35a5..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTablesResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTablesRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTablesRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesResp.class, metaDataMap); - } - - public TGetTablesResp() { - } - - public TGetTablesResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTablesResp(TGetTablesResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTablesResp deepCopy() { - return new TGetTablesResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTablesResp) - return this.equals((TGetTablesResp)that); - return false; - } - - public boolean equals(TGetTablesResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTablesResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTablesResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTablesRespStandardSchemeFactory implements SchemeFactory { - public TGetTablesRespStandardScheme getScheme() { - return new TGetTablesRespStandardScheme(); - } - } - - private static class TGetTablesRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTablesRespTupleSchemeFactory implements SchemeFactory { - public TGetTablesRespTupleScheme getScheme() { - return new TGetTablesRespTupleScheme(); - } - } - - private static class TGetTablesRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java deleted file mode 100644 index 2e0ec60e4bc3d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTypeInfoReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTypeInfoReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTypeInfoReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoReq.class, metaDataMap); - } - - public TGetTypeInfoReq() { - } - - public TGetTypeInfoReq( - TSessionHandle sessionHandle) - { - this(); - this.sessionHandle = sessionHandle; - } - - /** - * Performs a deep copy on other. - */ - public TGetTypeInfoReq(TGetTypeInfoReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - } - - public TGetTypeInfoReq deepCopy() { - return new TGetTypeInfoReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTypeInfoReq) - return this.equals((TGetTypeInfoReq)that); - return false; - } - - public boolean equals(TGetTypeInfoReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTypeInfoReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTypeInfoReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTypeInfoReqStandardSchemeFactory implements SchemeFactory { - public TGetTypeInfoReqStandardScheme getScheme() { - return new TGetTypeInfoReqStandardScheme(); - } - } - - private static class TGetTypeInfoReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTypeInfoReqTupleSchemeFactory implements SchemeFactory { - public TGetTypeInfoReqTupleScheme getScheme() { - return new TGetTypeInfoReqTupleScheme(); - } - } - - private static class TGetTypeInfoReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java deleted file mode 100644 index cc2910ef29feb..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java +++ /dev/null @@ -1,509 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TGetTypeInfoResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TGetTypeInfoRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TGetTypeInfoRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TOperationHandle operationHandle; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - OPERATION_HANDLE((short)2, "operationHandle"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // OPERATION_HANDLE - return OPERATION_HANDLE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoResp.class, metaDataMap); - } - - public TGetTypeInfoResp() { - } - - public TGetTypeInfoResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TGetTypeInfoResp(TGetTypeInfoResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetOperationHandle()) { - this.operationHandle = new TOperationHandle(other.operationHandle); - } - } - - public TGetTypeInfoResp deepCopy() { - return new TGetTypeInfoResp(this); - } - - @Override - public void clear() { - this.status = null; - this.operationHandle = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public TOperationHandle getOperationHandle() { - return this.operationHandle; - } - - public void setOperationHandle(TOperationHandle operationHandle) { - this.operationHandle = operationHandle; - } - - public void unsetOperationHandle() { - this.operationHandle = null; - } - - /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationHandle() { - return this.operationHandle != null; - } - - public void setOperationHandleIsSet(boolean value) { - if (!value) { - this.operationHandle = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case OPERATION_HANDLE: - if (value == null) { - unsetOperationHandle(); - } else { - setOperationHandle((TOperationHandle)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case OPERATION_HANDLE: - return getOperationHandle(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case OPERATION_HANDLE: - return isSetOperationHandle(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TGetTypeInfoResp) - return this.equals((TGetTypeInfoResp)that); - return false; - } - - public boolean equals(TGetTypeInfoResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_operationHandle = true && this.isSetOperationHandle(); - boolean that_present_operationHandle = true && that.isSetOperationHandle(); - if (this_present_operationHandle || that_present_operationHandle) { - if (!(this_present_operationHandle && that_present_operationHandle)) - return false; - if (!this.operationHandle.equals(that.operationHandle)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_operationHandle = true && (isSetOperationHandle()); - list.add(present_operationHandle); - if (present_operationHandle) - list.add(operationHandle); - - return list.hashCode(); - } - - @Override - public int compareTo(TGetTypeInfoResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TGetTypeInfoResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (isSetOperationHandle()) { - if (!first) sb.append(", "); - sb.append("operationHandle:"); - if (this.operationHandle == null) { - sb.append("null"); - } else { - sb.append(this.operationHandle); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (operationHandle != null) { - operationHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TGetTypeInfoRespStandardSchemeFactory implements SchemeFactory { - public TGetTypeInfoRespStandardScheme getScheme() { - return new TGetTypeInfoRespStandardScheme(); - } - } - - private static class TGetTypeInfoRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationHandle != null) { - if (struct.isSetOperationHandle()) { - oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC); - struct.operationHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TGetTypeInfoRespTupleSchemeFactory implements SchemeFactory { - public TGetTypeInfoRespTupleScheme getScheme() { - return new TGetTypeInfoRespTupleScheme(); - } - } - - private static class TGetTypeInfoRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - BitSet optionals = new BitSet(); - if (struct.isSetOperationHandle()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetOperationHandle()) { - struct.operationHandle.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.operationHandle = new TOperationHandle(); - struct.operationHandle.read(iprot); - struct.setOperationHandleIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java deleted file mode 100644 index a3879d830000b..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java +++ /dev/null @@ -1,508 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class THandleIdentifier implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("THandleIdentifier"); - - private static final org.apache.thrift.protocol.TField GUID_FIELD_DESC = new org.apache.thrift.protocol.TField("guid", org.apache.thrift.protocol.TType.STRING, (short)1); - private static final org.apache.thrift.protocol.TField SECRET_FIELD_DESC = new org.apache.thrift.protocol.TField("secret", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new THandleIdentifierStandardSchemeFactory()); - schemes.put(TupleScheme.class, new THandleIdentifierTupleSchemeFactory()); - } - - private ByteBuffer guid; // required - private ByteBuffer secret; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - GUID((short)1, "guid"), - SECRET((short)2, "secret"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // GUID - return GUID; - case 2: // SECRET - return SECRET; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.GUID, new org.apache.thrift.meta_data.FieldMetaData("guid", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - tmpMap.put(_Fields.SECRET, new org.apache.thrift.meta_data.FieldMetaData("secret", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(THandleIdentifier.class, metaDataMap); - } - - public THandleIdentifier() { - } - - public THandleIdentifier( - ByteBuffer guid, - ByteBuffer secret) - { - this(); - this.guid = org.apache.thrift.TBaseHelper.copyBinary(guid); - this.secret = org.apache.thrift.TBaseHelper.copyBinary(secret); - } - - /** - * Performs a deep copy on other. - */ - public THandleIdentifier(THandleIdentifier other) { - if (other.isSetGuid()) { - this.guid = org.apache.thrift.TBaseHelper.copyBinary(other.guid); - } - if (other.isSetSecret()) { - this.secret = org.apache.thrift.TBaseHelper.copyBinary(other.secret); - } - } - - public THandleIdentifier deepCopy() { - return new THandleIdentifier(this); - } - - @Override - public void clear() { - this.guid = null; - this.secret = null; - } - - public byte[] getGuid() { - setGuid(org.apache.thrift.TBaseHelper.rightSize(guid)); - return guid == null ? null : guid.array(); - } - - public ByteBuffer bufferForGuid() { - return org.apache.thrift.TBaseHelper.copyBinary(guid); - } - - public void setGuid(byte[] guid) { - this.guid = guid == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(guid, guid.length)); - } - - public void setGuid(ByteBuffer guid) { - this.guid = org.apache.thrift.TBaseHelper.copyBinary(guid); - } - - public void unsetGuid() { - this.guid = null; - } - - /** Returns true if field guid is set (has been assigned a value) and false otherwise */ - public boolean isSetGuid() { - return this.guid != null; - } - - public void setGuidIsSet(boolean value) { - if (!value) { - this.guid = null; - } - } - - public byte[] getSecret() { - setSecret(org.apache.thrift.TBaseHelper.rightSize(secret)); - return secret == null ? null : secret.array(); - } - - public ByteBuffer bufferForSecret() { - return org.apache.thrift.TBaseHelper.copyBinary(secret); - } - - public void setSecret(byte[] secret) { - this.secret = secret == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(secret, secret.length)); - } - - public void setSecret(ByteBuffer secret) { - this.secret = org.apache.thrift.TBaseHelper.copyBinary(secret); - } - - public void unsetSecret() { - this.secret = null; - } - - /** Returns true if field secret is set (has been assigned a value) and false otherwise */ - public boolean isSetSecret() { - return this.secret != null; - } - - public void setSecretIsSet(boolean value) { - if (!value) { - this.secret = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case GUID: - if (value == null) { - unsetGuid(); - } else { - setGuid((ByteBuffer)value); - } - break; - - case SECRET: - if (value == null) { - unsetSecret(); - } else { - setSecret((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case GUID: - return getGuid(); - - case SECRET: - return getSecret(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case GUID: - return isSetGuid(); - case SECRET: - return isSetSecret(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof THandleIdentifier) - return this.equals((THandleIdentifier)that); - return false; - } - - public boolean equals(THandleIdentifier that) { - if (that == null) - return false; - - boolean this_present_guid = true && this.isSetGuid(); - boolean that_present_guid = true && that.isSetGuid(); - if (this_present_guid || that_present_guid) { - if (!(this_present_guid && that_present_guid)) - return false; - if (!this.guid.equals(that.guid)) - return false; - } - - boolean this_present_secret = true && this.isSetSecret(); - boolean that_present_secret = true && that.isSetSecret(); - if (this_present_secret || that_present_secret) { - if (!(this_present_secret && that_present_secret)) - return false; - if (!this.secret.equals(that.secret)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_guid = true && (isSetGuid()); - list.add(present_guid); - if (present_guid) - list.add(guid); - - boolean present_secret = true && (isSetSecret()); - list.add(present_secret); - if (present_secret) - list.add(secret); - - return list.hashCode(); - } - - @Override - public int compareTo(THandleIdentifier other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetGuid()).compareTo(other.isSetGuid()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetGuid()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.guid, other.guid); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSecret()).compareTo(other.isSetSecret()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSecret()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secret, other.secret); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("THandleIdentifier("); - boolean first = true; - - sb.append("guid:"); - if (this.guid == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.guid, sb); - } - first = false; - if (!first) sb.append(", "); - sb.append("secret:"); - if (this.secret == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.secret, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetGuid()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'guid' is unset! Struct:" + toString()); - } - - if (!isSetSecret()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'secret' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class THandleIdentifierStandardSchemeFactory implements SchemeFactory { - public THandleIdentifierStandardScheme getScheme() { - return new THandleIdentifierStandardScheme(); - } - } - - private static class THandleIdentifierStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, THandleIdentifier struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // GUID - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.guid = iprot.readBinary(); - struct.setGuidIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SECRET - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.secret = iprot.readBinary(); - struct.setSecretIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, THandleIdentifier struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.guid != null) { - oprot.writeFieldBegin(GUID_FIELD_DESC); - oprot.writeBinary(struct.guid); - oprot.writeFieldEnd(); - } - if (struct.secret != null) { - oprot.writeFieldBegin(SECRET_FIELD_DESC); - oprot.writeBinary(struct.secret); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class THandleIdentifierTupleSchemeFactory implements SchemeFactory { - public THandleIdentifierTupleScheme getScheme() { - return new THandleIdentifierTupleScheme(); - } - } - - private static class THandleIdentifierTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeBinary(struct.guid); - oprot.writeBinary(struct.secret); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.guid = iprot.readBinary(); - struct.setGuidIsSet(true); - struct.secret = iprot.readBinary(); - struct.setSecretIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java deleted file mode 100644 index 3c44b602b4ff7..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI16Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI16ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI16ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Column.class, metaDataMap); - } - - public TI16Column() { - } - - public TI16Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TI16Column(TI16Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TI16Column deepCopy() { - return new TI16Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(short elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI16Column) - return this.equals((TI16Column)that); - return false; - } - - public boolean equals(TI16Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TI16Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI16Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI16ColumnStandardSchemeFactory implements SchemeFactory { - public TI16ColumnStandardScheme getScheme() { - return new TI16ColumnStandardScheme(); - } - } - - private static class TI16ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list70 = iprot.readListBegin(); - struct.values = new ArrayList(_list70.size); - short _elem71; - for (int _i72 = 0; _i72 < _list70.size; ++_i72) - { - _elem71 = iprot.readI16(); - struct.values.add(_elem71); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, struct.values.size())); - for (short _iter73 : struct.values) - { - oprot.writeI16(_iter73); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI16ColumnTupleSchemeFactory implements SchemeFactory { - public TI16ColumnTupleScheme getScheme() { - return new TI16ColumnTupleScheme(); - } - } - - private static class TI16ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (short _iter74 : struct.values) - { - oprot.writeI16(_iter74); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list75 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, iprot.readI32()); - struct.values = new ArrayList(_list75.size); - short _elem76; - for (int _i77 = 0; _i77 < _list75.size; ++_i77) - { - _elem76 = iprot.readI16(); - struct.values.add(_elem76); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java deleted file mode 100644 index 29fb4cb85201d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI16Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I16, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI16ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI16ValueTupleSchemeFactory()); - } - - private short value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Value.class, metaDataMap); - } - - public TI16Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI16Value(TI16Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI16Value deepCopy() { - return new TI16Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public short getValue() { - return this.value; - } - - public void setValue(short value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Short)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI16Value) - return this.equals((TI16Value)that); - return false; - } - - public boolean equals(TI16Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TI16Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI16Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI16ValueStandardSchemeFactory implements SchemeFactory { - public TI16ValueStandardScheme getScheme() { - return new TI16ValueStandardScheme(); - } - } - - private static class TI16ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I16) { - struct.value = iprot.readI16(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI16(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI16ValueTupleSchemeFactory implements SchemeFactory { - public TI16ValueTupleScheme getScheme() { - return new TI16ValueTupleScheme(); - } - } - - private static class TI16ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI16(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI16(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java deleted file mode 100644 index 9834f1ce8f01b..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI32Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI32ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI32ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Column.class, metaDataMap); - } - - public TI32Column() { - } - - public TI32Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TI32Column(TI32Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TI32Column deepCopy() { - return new TI32Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(int elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI32Column) - return this.equals((TI32Column)that); - return false; - } - - public boolean equals(TI32Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TI32Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI32Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI32ColumnStandardSchemeFactory implements SchemeFactory { - public TI32ColumnStandardScheme getScheme() { - return new TI32ColumnStandardScheme(); - } - } - - private static class TI32ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list78 = iprot.readListBegin(); - struct.values = new ArrayList(_list78.size); - int _elem79; - for (int _i80 = 0; _i80 < _list78.size; ++_i80) - { - _elem79 = iprot.readI32(); - struct.values.add(_elem79); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.values.size())); - for (int _iter81 : struct.values) - { - oprot.writeI32(_iter81); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI32ColumnTupleSchemeFactory implements SchemeFactory { - public TI32ColumnTupleScheme getScheme() { - return new TI32ColumnTupleScheme(); - } - } - - private static class TI32ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (int _iter82 : struct.values) - { - oprot.writeI32(_iter82); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list83 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.values = new ArrayList(_list83.size); - int _elem84; - for (int _i85 = 0; _i85 < _list83.size; ++_i85) - { - _elem84 = iprot.readI32(); - struct.values.add(_elem84); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java deleted file mode 100644 index 8a69632b2d76e..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI32Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I32, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI32ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI32ValueTupleSchemeFactory()); - } - - private int value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Value.class, metaDataMap); - } - - public TI32Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI32Value(TI32Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI32Value deepCopy() { - return new TI32Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public int getValue() { - return this.value; - } - - public void setValue(int value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI32Value) - return this.equals((TI32Value)that); - return false; - } - - public boolean equals(TI32Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TI32Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI32Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI32ValueStandardSchemeFactory implements SchemeFactory { - public TI32ValueStandardScheme getScheme() { - return new TI32ValueStandardScheme(); - } - } - - private static class TI32ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.value = iprot.readI32(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI32(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI32ValueTupleSchemeFactory implements SchemeFactory { - public TI32ValueTupleScheme getScheme() { - return new TI32ValueTupleScheme(); - } - } - - private static class TI32ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI32(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI32(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java deleted file mode 100644 index cd5ef2d7a9ed9..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI64Column implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Column"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI64ColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI64ColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Column.class, metaDataMap); - } - - public TI64Column() { - } - - public TI64Column( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TI64Column(TI64Column other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TI64Column deepCopy() { - return new TI64Column(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(long elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI64Column) - return this.equals((TI64Column)that); - return false; - } - - public boolean equals(TI64Column that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TI64Column other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI64Column("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI64ColumnStandardSchemeFactory implements SchemeFactory { - public TI64ColumnStandardScheme getScheme() { - return new TI64ColumnStandardScheme(); - } - } - - private static class TI64ColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Column struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list86 = iprot.readListBegin(); - struct.values = new ArrayList(_list86.size); - long _elem87; - for (int _i88 = 0; _i88 < _list86.size; ++_i88) - { - _elem87 = iprot.readI64(); - struct.values.add(_elem87); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Column struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, struct.values.size())); - for (long _iter89 : struct.values) - { - oprot.writeI64(_iter89); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI64ColumnTupleSchemeFactory implements SchemeFactory { - public TI64ColumnTupleScheme getScheme() { - return new TI64ColumnTupleScheme(); - } - } - - private static class TI64ColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (long _iter90 : struct.values) - { - oprot.writeI64(_iter90); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list91 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, iprot.readI32()); - struct.values = new ArrayList(_list91.size); - long _elem92; - for (int _i93 = 0; _i93 < _list91.size; ++_i93) - { - _elem92 = iprot.readI64(); - struct.values.add(_elem92); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java deleted file mode 100644 index 393c0bd28610d..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java +++ /dev/null @@ -1,390 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TI64Value implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Value"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I64, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TI64ValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TI64ValueTupleSchemeFactory()); - } - - private long value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __VALUE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Value.class, metaDataMap); - } - - public TI64Value() { - } - - /** - * Performs a deep copy on other. - */ - public TI64Value(TI64Value other) { - __isset_bitfield = other.__isset_bitfield; - this.value = other.value; - } - - public TI64Value deepCopy() { - return new TI64Value(this); - } - - @Override - public void clear() { - setValueIsSet(false); - this.value = 0; - } - - public long getValue() { - return this.value; - } - - public void setValue(long value) { - this.value = value; - setValueIsSet(true); - } - - public void unsetValue() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID); - } - - public void setValueIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((Long)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TI64Value) - return this.equals((TI64Value)that); - return false; - } - - public boolean equals(TI64Value that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (this.value != that.value) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TI64Value other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TI64Value("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - sb.append(this.value); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TI64ValueStandardSchemeFactory implements SchemeFactory { - public TI64ValueStandardScheme getScheme() { - return new TI64ValueStandardScheme(); - } - } - - private static class TI64ValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Value struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.value = iprot.readI64(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Value struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeI64(struct.value); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TI64ValueTupleSchemeFactory implements SchemeFactory { - public TI64ValueTupleScheme getScheme() { - return new TI64ValueTupleScheme(); - } - } - - private static class TI64ValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeI64(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readI64(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java deleted file mode 100644 index b39f208c1b878..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TJobExecutionStatus implements org.apache.thrift.TEnum { - IN_PROGRESS(0), - COMPLETE(1), - NOT_AVAILABLE(2); - - private final int value; - - private TJobExecutionStatus(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TJobExecutionStatus findByValue(int value) { - switch (value) { - case 0: - return IN_PROGRESS; - case 1: - return COMPLETE; - case 2: - return NOT_AVAILABLE; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java deleted file mode 100644 index 7ebc15c9432be..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java +++ /dev/null @@ -1,482 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TMapTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TMapTypeEntry"); - - private static final org.apache.thrift.protocol.TField KEY_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("keyTypePtr", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField VALUE_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("valueTypePtr", org.apache.thrift.protocol.TType.I32, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TMapTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TMapTypeEntryTupleSchemeFactory()); - } - - private int keyTypePtr; // required - private int valueTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - KEY_TYPE_PTR((short)1, "keyTypePtr"), - VALUE_TYPE_PTR((short)2, "valueTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // KEY_TYPE_PTR - return KEY_TYPE_PTR; - case 2: // VALUE_TYPE_PTR - return VALUE_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __KEYTYPEPTR_ISSET_ID = 0; - private static final int __VALUETYPEPTR_ISSET_ID = 1; - private byte __isset_bitfield = 0; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.KEY_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("keyTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - tmpMap.put(_Fields.VALUE_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("valueTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr"))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TMapTypeEntry.class, metaDataMap); - } - - public TMapTypeEntry() { - } - - public TMapTypeEntry( - int keyTypePtr, - int valueTypePtr) - { - this(); - this.keyTypePtr = keyTypePtr; - setKeyTypePtrIsSet(true); - this.valueTypePtr = valueTypePtr; - setValueTypePtrIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TMapTypeEntry(TMapTypeEntry other) { - __isset_bitfield = other.__isset_bitfield; - this.keyTypePtr = other.keyTypePtr; - this.valueTypePtr = other.valueTypePtr; - } - - public TMapTypeEntry deepCopy() { - return new TMapTypeEntry(this); - } - - @Override - public void clear() { - setKeyTypePtrIsSet(false); - this.keyTypePtr = 0; - setValueTypePtrIsSet(false); - this.valueTypePtr = 0; - } - - public int getKeyTypePtr() { - return this.keyTypePtr; - } - - public void setKeyTypePtr(int keyTypePtr) { - this.keyTypePtr = keyTypePtr; - setKeyTypePtrIsSet(true); - } - - public void unsetKeyTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID); - } - - /** Returns true if field keyTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetKeyTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID); - } - - public void setKeyTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID, value); - } - - public int getValueTypePtr() { - return this.valueTypePtr; - } - - public void setValueTypePtr(int valueTypePtr) { - this.valueTypePtr = valueTypePtr; - setValueTypePtrIsSet(true); - } - - public void unsetValueTypePtr() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID); - } - - /** Returns true if field valueTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetValueTypePtr() { - return EncodingUtils.testBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID); - } - - public void setValueTypePtrIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case KEY_TYPE_PTR: - if (value == null) { - unsetKeyTypePtr(); - } else { - setKeyTypePtr((Integer)value); - } - break; - - case VALUE_TYPE_PTR: - if (value == null) { - unsetValueTypePtr(); - } else { - setValueTypePtr((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case KEY_TYPE_PTR: - return getKeyTypePtr(); - - case VALUE_TYPE_PTR: - return getValueTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case KEY_TYPE_PTR: - return isSetKeyTypePtr(); - case VALUE_TYPE_PTR: - return isSetValueTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TMapTypeEntry) - return this.equals((TMapTypeEntry)that); - return false; - } - - public boolean equals(TMapTypeEntry that) { - if (that == null) - return false; - - boolean this_present_keyTypePtr = true; - boolean that_present_keyTypePtr = true; - if (this_present_keyTypePtr || that_present_keyTypePtr) { - if (!(this_present_keyTypePtr && that_present_keyTypePtr)) - return false; - if (this.keyTypePtr != that.keyTypePtr) - return false; - } - - boolean this_present_valueTypePtr = true; - boolean that_present_valueTypePtr = true; - if (this_present_valueTypePtr || that_present_valueTypePtr) { - if (!(this_present_valueTypePtr && that_present_valueTypePtr)) - return false; - if (this.valueTypePtr != that.valueTypePtr) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_keyTypePtr = true; - list.add(present_keyTypePtr); - if (present_keyTypePtr) - list.add(keyTypePtr); - - boolean present_valueTypePtr = true; - list.add(present_valueTypePtr); - if (present_valueTypePtr) - list.add(valueTypePtr); - - return list.hashCode(); - } - - @Override - public int compareTo(TMapTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetKeyTypePtr()).compareTo(other.isSetKeyTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetKeyTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.keyTypePtr, other.keyTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetValueTypePtr()).compareTo(other.isSetValueTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValueTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.valueTypePtr, other.valueTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TMapTypeEntry("); - boolean first = true; - - sb.append("keyTypePtr:"); - sb.append(this.keyTypePtr); - first = false; - if (!first) sb.append(", "); - sb.append("valueTypePtr:"); - sb.append(this.valueTypePtr); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetKeyTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'keyTypePtr' is unset! Struct:" + toString()); - } - - if (!isSetValueTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'valueTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TMapTypeEntryStandardSchemeFactory implements SchemeFactory { - public TMapTypeEntryStandardScheme getScheme() { - return new TMapTypeEntryStandardScheme(); - } - } - - private static class TMapTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TMapTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // KEY_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.keyTypePtr = iprot.readI32(); - struct.setKeyTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // VALUE_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.valueTypePtr = iprot.readI32(); - struct.setValueTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TMapTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(KEY_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.keyTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(VALUE_TYPE_PTR_FIELD_DESC); - oprot.writeI32(struct.valueTypePtr); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TMapTypeEntryTupleSchemeFactory implements SchemeFactory { - public TMapTypeEntryTupleScheme getScheme() { - return new TMapTypeEntryTupleScheme(); - } - } - - private static class TMapTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.keyTypePtr); - oprot.writeI32(struct.valueTypePtr); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.keyTypePtr = iprot.readI32(); - struct.setKeyTypePtrIsSet(true); - struct.valueTypePtr = iprot.readI32(); - struct.setValueTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java deleted file mode 100644 index e47abbb862cf1..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java +++ /dev/null @@ -1,778 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TOpenSessionReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionReq"); - - private static final org.apache.thrift.protocol.TField CLIENT_PROTOCOL_FIELD_DESC = new org.apache.thrift.protocol.TField("client_protocol", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField USERNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("username", org.apache.thrift.protocol.TType.STRING, (short)2); - private static final org.apache.thrift.protocol.TField PASSWORD_FIELD_DESC = new org.apache.thrift.protocol.TField("password", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOpenSessionReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOpenSessionReqTupleSchemeFactory()); - } - - private TProtocolVersion client_protocol; // required - private String username; // optional - private String password; // optional - private Map configuration; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TProtocolVersion - */ - CLIENT_PROTOCOL((short)1, "client_protocol"), - USERNAME((short)2, "username"), - PASSWORD((short)3, "password"), - CONFIGURATION((short)4, "configuration"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // CLIENT_PROTOCOL - return CLIENT_PROTOCOL; - case 2: // USERNAME - return USERNAME; - case 3: // PASSWORD - return PASSWORD; - case 4: // CONFIGURATION - return CONFIGURATION; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.USERNAME,_Fields.PASSWORD,_Fields.CONFIGURATION}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.CLIENT_PROTOCOL, new org.apache.thrift.meta_data.FieldMetaData("client_protocol", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class))); - tmpMap.put(_Fields.USERNAME, new org.apache.thrift.meta_data.FieldMetaData("username", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.PASSWORD, new org.apache.thrift.meta_data.FieldMetaData("password", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionReq.class, metaDataMap); - } - - public TOpenSessionReq() { - this.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10; - - } - - public TOpenSessionReq( - TProtocolVersion client_protocol) - { - this(); - this.client_protocol = client_protocol; - } - - /** - * Performs a deep copy on other. - */ - public TOpenSessionReq(TOpenSessionReq other) { - if (other.isSetClient_protocol()) { - this.client_protocol = other.client_protocol; - } - if (other.isSetUsername()) { - this.username = other.username; - } - if (other.isSetPassword()) { - this.password = other.password; - } - if (other.isSetConfiguration()) { - Map __this__configuration = new HashMap(other.configuration); - this.configuration = __this__configuration; - } - } - - public TOpenSessionReq deepCopy() { - return new TOpenSessionReq(this); - } - - @Override - public void clear() { - this.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10; - - this.username = null; - this.password = null; - this.configuration = null; - } - - /** - * - * @see TProtocolVersion - */ - public TProtocolVersion getClient_protocol() { - return this.client_protocol; - } - - /** - * - * @see TProtocolVersion - */ - public void setClient_protocol(TProtocolVersion client_protocol) { - this.client_protocol = client_protocol; - } - - public void unsetClient_protocol() { - this.client_protocol = null; - } - - /** Returns true if field client_protocol is set (has been assigned a value) and false otherwise */ - public boolean isSetClient_protocol() { - return this.client_protocol != null; - } - - public void setClient_protocolIsSet(boolean value) { - if (!value) { - this.client_protocol = null; - } - } - - public String getUsername() { - return this.username; - } - - public void setUsername(String username) { - this.username = username; - } - - public void unsetUsername() { - this.username = null; - } - - /** Returns true if field username is set (has been assigned a value) and false otherwise */ - public boolean isSetUsername() { - return this.username != null; - } - - public void setUsernameIsSet(boolean value) { - if (!value) { - this.username = null; - } - } - - public String getPassword() { - return this.password; - } - - public void setPassword(String password) { - this.password = password; - } - - public void unsetPassword() { - this.password = null; - } - - /** Returns true if field password is set (has been assigned a value) and false otherwise */ - public boolean isSetPassword() { - return this.password != null; - } - - public void setPasswordIsSet(boolean value) { - if (!value) { - this.password = null; - } - } - - public int getConfigurationSize() { - return (this.configuration == null) ? 0 : this.configuration.size(); - } - - public void putToConfiguration(String key, String val) { - if (this.configuration == null) { - this.configuration = new HashMap(); - } - this.configuration.put(key, val); - } - - public Map getConfiguration() { - return this.configuration; - } - - public void setConfiguration(Map configuration) { - this.configuration = configuration; - } - - public void unsetConfiguration() { - this.configuration = null; - } - - /** Returns true if field configuration is set (has been assigned a value) and false otherwise */ - public boolean isSetConfiguration() { - return this.configuration != null; - } - - public void setConfigurationIsSet(boolean value) { - if (!value) { - this.configuration = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case CLIENT_PROTOCOL: - if (value == null) { - unsetClient_protocol(); - } else { - setClient_protocol((TProtocolVersion)value); - } - break; - - case USERNAME: - if (value == null) { - unsetUsername(); - } else { - setUsername((String)value); - } - break; - - case PASSWORD: - if (value == null) { - unsetPassword(); - } else { - setPassword((String)value); - } - break; - - case CONFIGURATION: - if (value == null) { - unsetConfiguration(); - } else { - setConfiguration((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case CLIENT_PROTOCOL: - return getClient_protocol(); - - case USERNAME: - return getUsername(); - - case PASSWORD: - return getPassword(); - - case CONFIGURATION: - return getConfiguration(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case CLIENT_PROTOCOL: - return isSetClient_protocol(); - case USERNAME: - return isSetUsername(); - case PASSWORD: - return isSetPassword(); - case CONFIGURATION: - return isSetConfiguration(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOpenSessionReq) - return this.equals((TOpenSessionReq)that); - return false; - } - - public boolean equals(TOpenSessionReq that) { - if (that == null) - return false; - - boolean this_present_client_protocol = true && this.isSetClient_protocol(); - boolean that_present_client_protocol = true && that.isSetClient_protocol(); - if (this_present_client_protocol || that_present_client_protocol) { - if (!(this_present_client_protocol && that_present_client_protocol)) - return false; - if (!this.client_protocol.equals(that.client_protocol)) - return false; - } - - boolean this_present_username = true && this.isSetUsername(); - boolean that_present_username = true && that.isSetUsername(); - if (this_present_username || that_present_username) { - if (!(this_present_username && that_present_username)) - return false; - if (!this.username.equals(that.username)) - return false; - } - - boolean this_present_password = true && this.isSetPassword(); - boolean that_present_password = true && that.isSetPassword(); - if (this_present_password || that_present_password) { - if (!(this_present_password && that_present_password)) - return false; - if (!this.password.equals(that.password)) - return false; - } - - boolean this_present_configuration = true && this.isSetConfiguration(); - boolean that_present_configuration = true && that.isSetConfiguration(); - if (this_present_configuration || that_present_configuration) { - if (!(this_present_configuration && that_present_configuration)) - return false; - if (!this.configuration.equals(that.configuration)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_client_protocol = true && (isSetClient_protocol()); - list.add(present_client_protocol); - if (present_client_protocol) - list.add(client_protocol.getValue()); - - boolean present_username = true && (isSetUsername()); - list.add(present_username); - if (present_username) - list.add(username); - - boolean present_password = true && (isSetPassword()); - list.add(present_password); - if (present_password) - list.add(password); - - boolean present_configuration = true && (isSetConfiguration()); - list.add(present_configuration); - if (present_configuration) - list.add(configuration); - - return list.hashCode(); - } - - @Override - public int compareTo(TOpenSessionReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetClient_protocol()).compareTo(other.isSetClient_protocol()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetClient_protocol()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.client_protocol, other.client_protocol); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetUsername()).compareTo(other.isSetUsername()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetUsername()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.username, other.username); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetPassword()).compareTo(other.isSetPassword()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetPassword()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.password, other.password); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(other.isSetConfiguration()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfiguration()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, other.configuration); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOpenSessionReq("); - boolean first = true; - - sb.append("client_protocol:"); - if (this.client_protocol == null) { - sb.append("null"); - } else { - sb.append(this.client_protocol); - } - first = false; - if (isSetUsername()) { - if (!first) sb.append(", "); - sb.append("username:"); - if (this.username == null) { - sb.append("null"); - } else { - sb.append(this.username); - } - first = false; - } - if (isSetPassword()) { - if (!first) sb.append(", "); - sb.append("password:"); - if (this.password == null) { - sb.append("null"); - } else { - sb.append(this.password); - } - first = false; - } - if (isSetConfiguration()) { - if (!first) sb.append(", "); - sb.append("configuration:"); - if (this.configuration == null) { - sb.append("null"); - } else { - sb.append(this.configuration); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetClient_protocol()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'client_protocol' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOpenSessionReqStandardSchemeFactory implements SchemeFactory { - public TOpenSessionReqStandardScheme getScheme() { - return new TOpenSessionReqStandardScheme(); - } - } - - private static class TOpenSessionReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // CLIENT_PROTOCOL - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32()); - struct.setClient_protocolIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // USERNAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.username = iprot.readString(); - struct.setUsernameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // PASSWORD - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.password = iprot.readString(); - struct.setPasswordIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // CONFIGURATION - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map142 = iprot.readMapBegin(); - struct.configuration = new HashMap(2*_map142.size); - String _key143; - String _val144; - for (int _i145 = 0; _i145 < _map142.size; ++_i145) - { - _key143 = iprot.readString(); - _val144 = iprot.readString(); - struct.configuration.put(_key143, _val144); - } - iprot.readMapEnd(); - } - struct.setConfigurationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.client_protocol != null) { - oprot.writeFieldBegin(CLIENT_PROTOCOL_FIELD_DESC); - oprot.writeI32(struct.client_protocol.getValue()); - oprot.writeFieldEnd(); - } - if (struct.username != null) { - if (struct.isSetUsername()) { - oprot.writeFieldBegin(USERNAME_FIELD_DESC); - oprot.writeString(struct.username); - oprot.writeFieldEnd(); - } - } - if (struct.password != null) { - if (struct.isSetPassword()) { - oprot.writeFieldBegin(PASSWORD_FIELD_DESC); - oprot.writeString(struct.password); - oprot.writeFieldEnd(); - } - } - if (struct.configuration != null) { - if (struct.isSetConfiguration()) { - oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size())); - for (Map.Entry _iter146 : struct.configuration.entrySet()) - { - oprot.writeString(_iter146.getKey()); - oprot.writeString(_iter146.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOpenSessionReqTupleSchemeFactory implements SchemeFactory { - public TOpenSessionReqTupleScheme getScheme() { - return new TOpenSessionReqTupleScheme(); - } - } - - private static class TOpenSessionReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.client_protocol.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetUsername()) { - optionals.set(0); - } - if (struct.isSetPassword()) { - optionals.set(1); - } - if (struct.isSetConfiguration()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); - if (struct.isSetUsername()) { - oprot.writeString(struct.username); - } - if (struct.isSetPassword()) { - oprot.writeString(struct.password); - } - if (struct.isSetConfiguration()) { - { - oprot.writeI32(struct.configuration.size()); - for (Map.Entry _iter147 : struct.configuration.entrySet()) - { - oprot.writeString(_iter147.getKey()); - oprot.writeString(_iter147.getValue()); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32()); - struct.setClient_protocolIsSet(true); - BitSet incoming = iprot.readBitSet(3); - if (incoming.get(0)) { - struct.username = iprot.readString(); - struct.setUsernameIsSet(true); - } - if (incoming.get(1)) { - struct.password = iprot.readString(); - struct.setPasswordIsSet(true); - } - if (incoming.get(2)) { - { - org.apache.thrift.protocol.TMap _map148 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.configuration = new HashMap(2*_map148.size); - String _key149; - String _val150; - for (int _i151 = 0; _i151 < _map148.size; ++_i151) - { - _key149 = iprot.readString(); - _val150 = iprot.readString(); - struct.configuration.put(_key149, _val150); - } - } - struct.setConfigurationIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java deleted file mode 100644 index ee1c87bfd76fa..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java +++ /dev/null @@ -1,783 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TOpenSessionResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField SERVER_PROTOCOL_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("serverProtocolVersion", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOpenSessionRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOpenSessionRespTupleSchemeFactory()); - } - - private TStatus status; // required - private TProtocolVersion serverProtocolVersion; // required - private TSessionHandle sessionHandle; // optional - private Map configuration; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"), - /** - * - * @see TProtocolVersion - */ - SERVER_PROTOCOL_VERSION((short)2, "serverProtocolVersion"), - SESSION_HANDLE((short)3, "sessionHandle"), - CONFIGURATION((short)4, "configuration"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - case 2: // SERVER_PROTOCOL_VERSION - return SERVER_PROTOCOL_VERSION; - case 3: // SESSION_HANDLE - return SESSION_HANDLE; - case 4: // CONFIGURATION - return CONFIGURATION; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.SESSION_HANDLE,_Fields.CONFIGURATION}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - tmpMap.put(_Fields.SERVER_PROTOCOL_VERSION, new org.apache.thrift.meta_data.FieldMetaData("serverProtocolVersion", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class))); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionResp.class, metaDataMap); - } - - public TOpenSessionResp() { - this.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10; - - } - - public TOpenSessionResp( - TStatus status, - TProtocolVersion serverProtocolVersion) - { - this(); - this.status = status; - this.serverProtocolVersion = serverProtocolVersion; - } - - /** - * Performs a deep copy on other. - */ - public TOpenSessionResp(TOpenSessionResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - if (other.isSetServerProtocolVersion()) { - this.serverProtocolVersion = other.serverProtocolVersion; - } - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetConfiguration()) { - Map __this__configuration = new HashMap(other.configuration); - this.configuration = __this__configuration; - } - } - - public TOpenSessionResp deepCopy() { - return new TOpenSessionResp(this); - } - - @Override - public void clear() { - this.status = null; - this.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10; - - this.sessionHandle = null; - this.configuration = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - /** - * - * @see TProtocolVersion - */ - public TProtocolVersion getServerProtocolVersion() { - return this.serverProtocolVersion; - } - - /** - * - * @see TProtocolVersion - */ - public void setServerProtocolVersion(TProtocolVersion serverProtocolVersion) { - this.serverProtocolVersion = serverProtocolVersion; - } - - public void unsetServerProtocolVersion() { - this.serverProtocolVersion = null; - } - - /** Returns true if field serverProtocolVersion is set (has been assigned a value) and false otherwise */ - public boolean isSetServerProtocolVersion() { - return this.serverProtocolVersion != null; - } - - public void setServerProtocolVersionIsSet(boolean value) { - if (!value) { - this.serverProtocolVersion = null; - } - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public int getConfigurationSize() { - return (this.configuration == null) ? 0 : this.configuration.size(); - } - - public void putToConfiguration(String key, String val) { - if (this.configuration == null) { - this.configuration = new HashMap(); - } - this.configuration.put(key, val); - } - - public Map getConfiguration() { - return this.configuration; - } - - public void setConfiguration(Map configuration) { - this.configuration = configuration; - } - - public void unsetConfiguration() { - this.configuration = null; - } - - /** Returns true if field configuration is set (has been assigned a value) and false otherwise */ - public boolean isSetConfiguration() { - return this.configuration != null; - } - - public void setConfigurationIsSet(boolean value) { - if (!value) { - this.configuration = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - case SERVER_PROTOCOL_VERSION: - if (value == null) { - unsetServerProtocolVersion(); - } else { - setServerProtocolVersion((TProtocolVersion)value); - } - break; - - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case CONFIGURATION: - if (value == null) { - unsetConfiguration(); - } else { - setConfiguration((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - case SERVER_PROTOCOL_VERSION: - return getServerProtocolVersion(); - - case SESSION_HANDLE: - return getSessionHandle(); - - case CONFIGURATION: - return getConfiguration(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - case SERVER_PROTOCOL_VERSION: - return isSetServerProtocolVersion(); - case SESSION_HANDLE: - return isSetSessionHandle(); - case CONFIGURATION: - return isSetConfiguration(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOpenSessionResp) - return this.equals((TOpenSessionResp)that); - return false; - } - - public boolean equals(TOpenSessionResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_serverProtocolVersion = true && this.isSetServerProtocolVersion(); - boolean that_present_serverProtocolVersion = true && that.isSetServerProtocolVersion(); - if (this_present_serverProtocolVersion || that_present_serverProtocolVersion) { - if (!(this_present_serverProtocolVersion && that_present_serverProtocolVersion)) - return false; - if (!this.serverProtocolVersion.equals(that.serverProtocolVersion)) - return false; - } - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_configuration = true && this.isSetConfiguration(); - boolean that_present_configuration = true && that.isSetConfiguration(); - if (this_present_configuration || that_present_configuration) { - if (!(this_present_configuration && that_present_configuration)) - return false; - if (!this.configuration.equals(that.configuration)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - boolean present_serverProtocolVersion = true && (isSetServerProtocolVersion()); - list.add(present_serverProtocolVersion); - if (present_serverProtocolVersion) - list.add(serverProtocolVersion.getValue()); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_configuration = true && (isSetConfiguration()); - list.add(present_configuration); - if (present_configuration) - list.add(configuration); - - return list.hashCode(); - } - - @Override - public int compareTo(TOpenSessionResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetServerProtocolVersion()).compareTo(other.isSetServerProtocolVersion()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetServerProtocolVersion()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.serverProtocolVersion, other.serverProtocolVersion); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(other.isSetConfiguration()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetConfiguration()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, other.configuration); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOpenSessionResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (!first) sb.append(", "); - sb.append("serverProtocolVersion:"); - if (this.serverProtocolVersion == null) { - sb.append("null"); - } else { - sb.append(this.serverProtocolVersion); - } - first = false; - if (isSetSessionHandle()) { - if (!first) sb.append(", "); - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - } - if (isSetConfiguration()) { - if (!first) sb.append(", "); - sb.append("configuration:"); - if (this.configuration == null) { - sb.append("null"); - } else { - sb.append(this.configuration); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - if (!isSetServerProtocolVersion()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'serverProtocolVersion' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOpenSessionRespStandardSchemeFactory implements SchemeFactory { - public TOpenSessionRespStandardScheme getScheme() { - return new TOpenSessionRespStandardScheme(); - } - } - - private static class TOpenSessionRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // SERVER_PROTOCOL_VERSION - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32()); - struct.setServerProtocolVersionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // CONFIGURATION - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map152 = iprot.readMapBegin(); - struct.configuration = new HashMap(2*_map152.size); - String _key153; - String _val154; - for (int _i155 = 0; _i155 < _map152.size; ++_i155) - { - _key153 = iprot.readString(); - _val154 = iprot.readString(); - struct.configuration.put(_key153, _val154); - } - iprot.readMapEnd(); - } - struct.setConfigurationIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.serverProtocolVersion != null) { - oprot.writeFieldBegin(SERVER_PROTOCOL_VERSION_FIELD_DESC); - oprot.writeI32(struct.serverProtocolVersion.getValue()); - oprot.writeFieldEnd(); - } - if (struct.sessionHandle != null) { - if (struct.isSetSessionHandle()) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - } - if (struct.configuration != null) { - if (struct.isSetConfiguration()) { - oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size())); - for (Map.Entry _iter156 : struct.configuration.entrySet()) - { - oprot.writeString(_iter156.getKey()); - oprot.writeString(_iter156.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOpenSessionRespTupleSchemeFactory implements SchemeFactory { - public TOpenSessionRespTupleScheme getScheme() { - return new TOpenSessionRespTupleScheme(); - } - } - - private static class TOpenSessionRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - oprot.writeI32(struct.serverProtocolVersion.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetSessionHandle()) { - optionals.set(0); - } - if (struct.isSetConfiguration()) { - optionals.set(1); - } - oprot.writeBitSet(optionals, 2); - if (struct.isSetSessionHandle()) { - struct.sessionHandle.write(oprot); - } - if (struct.isSetConfiguration()) { - { - oprot.writeI32(struct.configuration.size()); - for (Map.Entry _iter157 : struct.configuration.entrySet()) - { - oprot.writeString(_iter157.getKey()); - oprot.writeString(_iter157.getValue()); - } - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - struct.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32()); - struct.setServerProtocolVersionIsSet(true); - BitSet incoming = iprot.readBitSet(2); - if (incoming.get(0)) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } - if (incoming.get(1)) { - { - org.apache.thrift.protocol.TMap _map158 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.configuration = new HashMap(2*_map158.size); - String _key159; - String _val160; - for (int _i161 = 0; _i161 < _map158.size; ++_i161) - { - _key159 = iprot.readString(); - _val160 = iprot.readString(); - struct.configuration.put(_key159, _val160); - } - } - struct.setConfigurationIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java deleted file mode 100644 index 9eaf2be3ed5ea..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java +++ /dev/null @@ -1,709 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TOperationHandle implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOperationHandle"); - - private static final org.apache.thrift.protocol.TField OPERATION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("operationId", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField OPERATION_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationType", org.apache.thrift.protocol.TType.I32, (short)2); - private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)3); - private static final org.apache.thrift.protocol.TField MODIFIED_ROW_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("modifiedRowCount", org.apache.thrift.protocol.TType.DOUBLE, (short)4); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TOperationHandleStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TOperationHandleTupleSchemeFactory()); - } - - private THandleIdentifier operationId; // required - private TOperationType operationType; // required - private boolean hasResultSet; // required - private double modifiedRowCount; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - OPERATION_ID((short)1, "operationId"), - /** - * - * @see TOperationType - */ - OPERATION_TYPE((short)2, "operationType"), - HAS_RESULT_SET((short)3, "hasResultSet"), - MODIFIED_ROW_COUNT((short)4, "modifiedRowCount"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // OPERATION_ID - return OPERATION_ID; - case 2: // OPERATION_TYPE - return OPERATION_TYPE; - case 3: // HAS_RESULT_SET - return HAS_RESULT_SET; - case 4: // MODIFIED_ROW_COUNT - return MODIFIED_ROW_COUNT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __HASRESULTSET_ISSET_ID = 0; - private static final int __MODIFIEDROWCOUNT_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.MODIFIED_ROW_COUNT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.OPERATION_ID, new org.apache.thrift.meta_data.FieldMetaData("operationId", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class))); - tmpMap.put(_Fields.OPERATION_TYPE, new org.apache.thrift.meta_data.FieldMetaData("operationType", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationType.class))); - tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); - tmpMap.put(_Fields.MODIFIED_ROW_COUNT, new org.apache.thrift.meta_data.FieldMetaData("modifiedRowCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOperationHandle.class, metaDataMap); - } - - public TOperationHandle() { - } - - public TOperationHandle( - THandleIdentifier operationId, - TOperationType operationType, - boolean hasResultSet) - { - this(); - this.operationId = operationId; - this.operationType = operationType; - this.hasResultSet = hasResultSet; - setHasResultSetIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TOperationHandle(TOperationHandle other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetOperationId()) { - this.operationId = new THandleIdentifier(other.operationId); - } - if (other.isSetOperationType()) { - this.operationType = other.operationType; - } - this.hasResultSet = other.hasResultSet; - this.modifiedRowCount = other.modifiedRowCount; - } - - public TOperationHandle deepCopy() { - return new TOperationHandle(this); - } - - @Override - public void clear() { - this.operationId = null; - this.operationType = null; - setHasResultSetIsSet(false); - this.hasResultSet = false; - setModifiedRowCountIsSet(false); - this.modifiedRowCount = 0.0; - } - - public THandleIdentifier getOperationId() { - return this.operationId; - } - - public void setOperationId(THandleIdentifier operationId) { - this.operationId = operationId; - } - - public void unsetOperationId() { - this.operationId = null; - } - - /** Returns true if field operationId is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationId() { - return this.operationId != null; - } - - public void setOperationIdIsSet(boolean value) { - if (!value) { - this.operationId = null; - } - } - - /** - * - * @see TOperationType - */ - public TOperationType getOperationType() { - return this.operationType; - } - - /** - * - * @see TOperationType - */ - public void setOperationType(TOperationType operationType) { - this.operationType = operationType; - } - - public void unsetOperationType() { - this.operationType = null; - } - - /** Returns true if field operationType is set (has been assigned a value) and false otherwise */ - public boolean isSetOperationType() { - return this.operationType != null; - } - - public void setOperationTypeIsSet(boolean value) { - if (!value) { - this.operationType = null; - } - } - - public boolean isHasResultSet() { - return this.hasResultSet; - } - - public void setHasResultSet(boolean hasResultSet) { - this.hasResultSet = hasResultSet; - setHasResultSetIsSet(true); - } - - public void unsetHasResultSet() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */ - public boolean isSetHasResultSet() { - return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID); - } - - public void setHasResultSetIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value); - } - - public double getModifiedRowCount() { - return this.modifiedRowCount; - } - - public void setModifiedRowCount(double modifiedRowCount) { - this.modifiedRowCount = modifiedRowCount; - setModifiedRowCountIsSet(true); - } - - public void unsetModifiedRowCount() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID); - } - - /** Returns true if field modifiedRowCount is set (has been assigned a value) and false otherwise */ - public boolean isSetModifiedRowCount() { - return EncodingUtils.testBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID); - } - - public void setModifiedRowCountIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case OPERATION_ID: - if (value == null) { - unsetOperationId(); - } else { - setOperationId((THandleIdentifier)value); - } - break; - - case OPERATION_TYPE: - if (value == null) { - unsetOperationType(); - } else { - setOperationType((TOperationType)value); - } - break; - - case HAS_RESULT_SET: - if (value == null) { - unsetHasResultSet(); - } else { - setHasResultSet((Boolean)value); - } - break; - - case MODIFIED_ROW_COUNT: - if (value == null) { - unsetModifiedRowCount(); - } else { - setModifiedRowCount((Double)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case OPERATION_ID: - return getOperationId(); - - case OPERATION_TYPE: - return getOperationType(); - - case HAS_RESULT_SET: - return isHasResultSet(); - - case MODIFIED_ROW_COUNT: - return getModifiedRowCount(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case OPERATION_ID: - return isSetOperationId(); - case OPERATION_TYPE: - return isSetOperationType(); - case HAS_RESULT_SET: - return isSetHasResultSet(); - case MODIFIED_ROW_COUNT: - return isSetModifiedRowCount(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TOperationHandle) - return this.equals((TOperationHandle)that); - return false; - } - - public boolean equals(TOperationHandle that) { - if (that == null) - return false; - - boolean this_present_operationId = true && this.isSetOperationId(); - boolean that_present_operationId = true && that.isSetOperationId(); - if (this_present_operationId || that_present_operationId) { - if (!(this_present_operationId && that_present_operationId)) - return false; - if (!this.operationId.equals(that.operationId)) - return false; - } - - boolean this_present_operationType = true && this.isSetOperationType(); - boolean that_present_operationType = true && that.isSetOperationType(); - if (this_present_operationType || that_present_operationType) { - if (!(this_present_operationType && that_present_operationType)) - return false; - if (!this.operationType.equals(that.operationType)) - return false; - } - - boolean this_present_hasResultSet = true; - boolean that_present_hasResultSet = true; - if (this_present_hasResultSet || that_present_hasResultSet) { - if (!(this_present_hasResultSet && that_present_hasResultSet)) - return false; - if (this.hasResultSet != that.hasResultSet) - return false; - } - - boolean this_present_modifiedRowCount = true && this.isSetModifiedRowCount(); - boolean that_present_modifiedRowCount = true && that.isSetModifiedRowCount(); - if (this_present_modifiedRowCount || that_present_modifiedRowCount) { - if (!(this_present_modifiedRowCount && that_present_modifiedRowCount)) - return false; - if (this.modifiedRowCount != that.modifiedRowCount) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_operationId = true && (isSetOperationId()); - list.add(present_operationId); - if (present_operationId) - list.add(operationId); - - boolean present_operationType = true && (isSetOperationType()); - list.add(present_operationType); - if (present_operationType) - list.add(operationType.getValue()); - - boolean present_hasResultSet = true; - list.add(present_hasResultSet); - if (present_hasResultSet) - list.add(hasResultSet); - - boolean present_modifiedRowCount = true && (isSetModifiedRowCount()); - list.add(present_modifiedRowCount); - if (present_modifiedRowCount) - list.add(modifiedRowCount); - - return list.hashCode(); - } - - @Override - public int compareTo(TOperationHandle other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetOperationId()).compareTo(other.isSetOperationId()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationId()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationId, other.operationId); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetOperationType()).compareTo(other.isSetOperationType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetOperationType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationType, other.operationType); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(other.isSetHasResultSet()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHasResultSet()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, other.hasResultSet); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetModifiedRowCount()).compareTo(other.isSetModifiedRowCount()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetModifiedRowCount()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.modifiedRowCount, other.modifiedRowCount); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TOperationHandle("); - boolean first = true; - - sb.append("operationId:"); - if (this.operationId == null) { - sb.append("null"); - } else { - sb.append(this.operationId); - } - first = false; - if (!first) sb.append(", "); - sb.append("operationType:"); - if (this.operationType == null) { - sb.append("null"); - } else { - sb.append(this.operationType); - } - first = false; - if (!first) sb.append(", "); - sb.append("hasResultSet:"); - sb.append(this.hasResultSet); - first = false; - if (isSetModifiedRowCount()) { - if (!first) sb.append(", "); - sb.append("modifiedRowCount:"); - sb.append(this.modifiedRowCount); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetOperationId()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationId' is unset! Struct:" + toString()); - } - - if (!isSetOperationType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationType' is unset! Struct:" + toString()); - } - - if (!isSetHasResultSet()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'hasResultSet' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (operationId != null) { - operationId.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TOperationHandleStandardSchemeFactory implements SchemeFactory { - public TOperationHandleStandardScheme getScheme() { - return new TOperationHandleStandardScheme(); - } - } - - private static class TOperationHandleStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TOperationHandle struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // OPERATION_ID - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.operationId = new THandleIdentifier(); - struct.operationId.read(iprot); - struct.setOperationIdIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // OPERATION_TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.operationType = org.apache.hive.service.rpc.thrift.TOperationType.findByValue(iprot.readI32()); - struct.setOperationTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // HAS_RESULT_SET - if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // MODIFIED_ROW_COUNT - if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) { - struct.modifiedRowCount = iprot.readDouble(); - struct.setModifiedRowCountIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TOperationHandle struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.operationId != null) { - oprot.writeFieldBegin(OPERATION_ID_FIELD_DESC); - struct.operationId.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.operationType != null) { - oprot.writeFieldBegin(OPERATION_TYPE_FIELD_DESC); - oprot.writeI32(struct.operationType.getValue()); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC); - oprot.writeBool(struct.hasResultSet); - oprot.writeFieldEnd(); - if (struct.isSetModifiedRowCount()) { - oprot.writeFieldBegin(MODIFIED_ROW_COUNT_FIELD_DESC); - oprot.writeDouble(struct.modifiedRowCount); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TOperationHandleTupleSchemeFactory implements SchemeFactory { - public TOperationHandleTupleScheme getScheme() { - return new TOperationHandleTupleScheme(); - } - } - - private static class TOperationHandleTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.operationId.write(oprot); - oprot.writeI32(struct.operationType.getValue()); - oprot.writeBool(struct.hasResultSet); - BitSet optionals = new BitSet(); - if (struct.isSetModifiedRowCount()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetModifiedRowCount()) { - oprot.writeDouble(struct.modifiedRowCount); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.operationId = new THandleIdentifier(); - struct.operationId.read(iprot); - struct.setOperationIdIsSet(true); - struct.operationType = org.apache.hive.service.rpc.thrift.TOperationType.findByValue(iprot.readI32()); - struct.setOperationTypeIsSet(true); - struct.hasResultSet = iprot.readBool(); - struct.setHasResultSetIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.modifiedRowCount = iprot.readDouble(); - struct.setModifiedRowCountIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java deleted file mode 100644 index 4390b4b887583..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TOperationState implements org.apache.thrift.TEnum { - INITIALIZED_STATE(0), - RUNNING_STATE(1), - FINISHED_STATE(2), - CANCELED_STATE(3), - CLOSED_STATE(4), - ERROR_STATE(5), - UKNOWN_STATE(6), - PENDING_STATE(7), - TIMEDOUT_STATE(8); - - private final int value; - - private TOperationState(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TOperationState findByValue(int value) { - switch (value) { - case 0: - return INITIALIZED_STATE; - case 1: - return RUNNING_STATE; - case 2: - return FINISHED_STATE; - case 3: - return CANCELED_STATE; - case 4: - return CLOSED_STATE; - case 5: - return ERROR_STATE; - case 6: - return UKNOWN_STATE; - case 7: - return PENDING_STATE; - case 8: - return TIMEDOUT_STATE; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java deleted file mode 100644 index 08002ad1dc8e8..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TOperationType implements org.apache.thrift.TEnum { - EXECUTE_STATEMENT(0), - GET_TYPE_INFO(1), - GET_CATALOGS(2), - GET_SCHEMAS(3), - GET_TABLES(4), - GET_TABLE_TYPES(5), - GET_COLUMNS(6), - GET_FUNCTIONS(7), - UNKNOWN(8); - - private final int value; - - private TOperationType(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TOperationType findByValue(int value) { - switch (value) { - case 0: - return EXECUTE_STATEMENT; - case 1: - return GET_TYPE_INFO; - case 2: - return GET_CATALOGS; - case 3: - return GET_SCHEMAS; - case 4: - return GET_TABLES; - case 5: - return GET_TABLE_TYPES; - case 6: - return GET_COLUMNS; - case 7: - return GET_FUNCTIONS; - case 8: - return UNKNOWN; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java deleted file mode 100644 index 910c90967f614..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java +++ /dev/null @@ -1,516 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TPrimitiveTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry"); - - private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory()); - } - - private TTypeId type; // required - private TTypeQualifiers typeQualifiers; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TTypeId - */ - TYPE((short)1, "type"), - TYPE_QUALIFIERS((short)2, "typeQualifiers"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPE - return TYPE; - case 2: // TYPE_QUALIFIERS - return TYPE_QUALIFIERS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.TYPE_QUALIFIERS}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class))); - tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap); - } - - public TPrimitiveTypeEntry() { - } - - public TPrimitiveTypeEntry( - TTypeId type) - { - this(); - this.type = type; - } - - /** - * Performs a deep copy on other. - */ - public TPrimitiveTypeEntry(TPrimitiveTypeEntry other) { - if (other.isSetType()) { - this.type = other.type; - } - if (other.isSetTypeQualifiers()) { - this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers); - } - } - - public TPrimitiveTypeEntry deepCopy() { - return new TPrimitiveTypeEntry(this); - } - - @Override - public void clear() { - this.type = null; - this.typeQualifiers = null; - } - - /** - * - * @see TTypeId - */ - public TTypeId getType() { - return this.type; - } - - /** - * - * @see TTypeId - */ - public void setType(TTypeId type) { - this.type = type; - } - - public void unsetType() { - this.type = null; - } - - /** Returns true if field type is set (has been assigned a value) and false otherwise */ - public boolean isSetType() { - return this.type != null; - } - - public void setTypeIsSet(boolean value) { - if (!value) { - this.type = null; - } - } - - public TTypeQualifiers getTypeQualifiers() { - return this.typeQualifiers; - } - - public void setTypeQualifiers(TTypeQualifiers typeQualifiers) { - this.typeQualifiers = typeQualifiers; - } - - public void unsetTypeQualifiers() { - this.typeQualifiers = null; - } - - /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeQualifiers() { - return this.typeQualifiers != null; - } - - public void setTypeQualifiersIsSet(boolean value) { - if (!value) { - this.typeQualifiers = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPE: - if (value == null) { - unsetType(); - } else { - setType((TTypeId)value); - } - break; - - case TYPE_QUALIFIERS: - if (value == null) { - unsetTypeQualifiers(); - } else { - setTypeQualifiers((TTypeQualifiers)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPE: - return getType(); - - case TYPE_QUALIFIERS: - return getTypeQualifiers(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPE: - return isSetType(); - case TYPE_QUALIFIERS: - return isSetTypeQualifiers(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TPrimitiveTypeEntry) - return this.equals((TPrimitiveTypeEntry)that); - return false; - } - - public boolean equals(TPrimitiveTypeEntry that) { - if (that == null) - return false; - - boolean this_present_type = true && this.isSetType(); - boolean that_present_type = true && that.isSetType(); - if (this_present_type || that_present_type) { - if (!(this_present_type && that_present_type)) - return false; - if (!this.type.equals(that.type)) - return false; - } - - boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers(); - boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers(); - if (this_present_typeQualifiers || that_present_typeQualifiers) { - if (!(this_present_typeQualifiers && that_present_typeQualifiers)) - return false; - if (!this.typeQualifiers.equals(that.typeQualifiers)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_type = true && (isSetType()); - list.add(present_type); - if (present_type) - list.add(type.getValue()); - - boolean present_typeQualifiers = true && (isSetTypeQualifiers()); - list.add(present_typeQualifiers); - if (present_typeQualifiers) - list.add(typeQualifiers); - - return list.hashCode(); - } - - @Override - public int compareTo(TPrimitiveTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetType()).compareTo(other.isSetType()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetType()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.type, other.type); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(other.isSetTypeQualifiers()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeQualifiers()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, other.typeQualifiers); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TPrimitiveTypeEntry("); - boolean first = true; - - sb.append("type:"); - if (this.type == null) { - sb.append("null"); - } else { - sb.append(this.type); - } - first = false; - if (isSetTypeQualifiers()) { - if (!first) sb.append(", "); - sb.append("typeQualifiers:"); - if (this.typeQualifiers == null) { - sb.append("null"); - } else { - sb.append(this.typeQualifiers); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetType()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'type' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (typeQualifiers != null) { - typeQualifiers.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TPrimitiveTypeEntryStandardSchemeFactory implements SchemeFactory { - public TPrimitiveTypeEntryStandardScheme getScheme() { - return new TPrimitiveTypeEntryStandardScheme(); - } - } - - private static class TPrimitiveTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.type = org.apache.hive.service.rpc.thrift.TTypeId.findByValue(iprot.readI32()); - struct.setTypeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // TYPE_QUALIFIERS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.typeQualifiers = new TTypeQualifiers(); - struct.typeQualifiers.read(iprot); - struct.setTypeQualifiersIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.type != null) { - oprot.writeFieldBegin(TYPE_FIELD_DESC); - oprot.writeI32(struct.type.getValue()); - oprot.writeFieldEnd(); - } - if (struct.typeQualifiers != null) { - if (struct.isSetTypeQualifiers()) { - oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC); - struct.typeQualifiers.write(oprot); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TPrimitiveTypeEntryTupleSchemeFactory implements SchemeFactory { - public TPrimitiveTypeEntryTupleScheme getScheme() { - return new TPrimitiveTypeEntryTupleScheme(); - } - } - - private static class TPrimitiveTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.type.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetTypeQualifiers()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetTypeQualifiers()) { - struct.typeQualifiers.write(oprot); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.type = org.apache.hive.service.rpc.thrift.TTypeId.findByValue(iprot.readI32()); - struct.setTypeIsSet(true); - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.typeQualifiers = new TTypeQualifiers(); - struct.typeQualifiers.read(iprot); - struct.setTypeQualifiersIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java deleted file mode 100644 index ecc413aad4cdc..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java +++ /dev/null @@ -1,1033 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TProgressUpdateResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TProgressUpdateResp"); - - private static final org.apache.thrift.protocol.TField HEADER_NAMES_FIELD_DESC = new org.apache.thrift.protocol.TField("headerNames", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2); - private static final org.apache.thrift.protocol.TField PROGRESSED_PERCENTAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("progressedPercentage", org.apache.thrift.protocol.TType.DOUBLE, (short)3); - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField FOOTER_SUMMARY_FIELD_DESC = new org.apache.thrift.protocol.TField("footerSummary", org.apache.thrift.protocol.TType.STRING, (short)5); - private static final org.apache.thrift.protocol.TField START_TIME_FIELD_DESC = new org.apache.thrift.protocol.TField("startTime", org.apache.thrift.protocol.TType.I64, (short)6); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TProgressUpdateRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TProgressUpdateRespTupleSchemeFactory()); - } - - private List headerNames; // required - private List> rows; // required - private double progressedPercentage; // required - private TJobExecutionStatus status; // required - private String footerSummary; // required - private long startTime; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - HEADER_NAMES((short)1, "headerNames"), - ROWS((short)2, "rows"), - PROGRESSED_PERCENTAGE((short)3, "progressedPercentage"), - /** - * - * @see TJobExecutionStatus - */ - STATUS((short)4, "status"), - FOOTER_SUMMARY((short)5, "footerSummary"), - START_TIME((short)6, "startTime"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // HEADER_NAMES - return HEADER_NAMES; - case 2: // ROWS - return ROWS; - case 3: // PROGRESSED_PERCENTAGE - return PROGRESSED_PERCENTAGE; - case 4: // STATUS - return STATUS; - case 5: // FOOTER_SUMMARY - return FOOTER_SUMMARY; - case 6: // START_TIME - return START_TIME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __PROGRESSEDPERCENTAGE_ISSET_ID = 0; - private static final int __STARTTIME_ISSET_ID = 1; - private byte __isset_bitfield = 0; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.HEADER_NAMES, new org.apache.thrift.meta_data.FieldMetaData("headerNames", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))))); - tmpMap.put(_Fields.PROGRESSED_PERCENTAGE, new org.apache.thrift.meta_data.FieldMetaData("progressedPercentage", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TJobExecutionStatus.class))); - tmpMap.put(_Fields.FOOTER_SUMMARY, new org.apache.thrift.meta_data.FieldMetaData("footerSummary", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.START_TIME, new org.apache.thrift.meta_data.FieldMetaData("startTime", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TProgressUpdateResp.class, metaDataMap); - } - - public TProgressUpdateResp() { - } - - public TProgressUpdateResp( - List headerNames, - List> rows, - double progressedPercentage, - TJobExecutionStatus status, - String footerSummary, - long startTime) - { - this(); - this.headerNames = headerNames; - this.rows = rows; - this.progressedPercentage = progressedPercentage; - setProgressedPercentageIsSet(true); - this.status = status; - this.footerSummary = footerSummary; - this.startTime = startTime; - setStartTimeIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public TProgressUpdateResp(TProgressUpdateResp other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetHeaderNames()) { - List __this__headerNames = new ArrayList(other.headerNames); - this.headerNames = __this__headerNames; - } - if (other.isSetRows()) { - List> __this__rows = new ArrayList>(other.rows.size()); - for (List other_element : other.rows) { - List __this__rows_copy = new ArrayList(other_element); - __this__rows.add(__this__rows_copy); - } - this.rows = __this__rows; - } - this.progressedPercentage = other.progressedPercentage; - if (other.isSetStatus()) { - this.status = other.status; - } - if (other.isSetFooterSummary()) { - this.footerSummary = other.footerSummary; - } - this.startTime = other.startTime; - } - - public TProgressUpdateResp deepCopy() { - return new TProgressUpdateResp(this); - } - - @Override - public void clear() { - this.headerNames = null; - this.rows = null; - setProgressedPercentageIsSet(false); - this.progressedPercentage = 0.0; - this.status = null; - this.footerSummary = null; - setStartTimeIsSet(false); - this.startTime = 0; - } - - public int getHeaderNamesSize() { - return (this.headerNames == null) ? 0 : this.headerNames.size(); - } - - public java.util.Iterator getHeaderNamesIterator() { - return (this.headerNames == null) ? null : this.headerNames.iterator(); - } - - public void addToHeaderNames(String elem) { - if (this.headerNames == null) { - this.headerNames = new ArrayList(); - } - this.headerNames.add(elem); - } - - public List getHeaderNames() { - return this.headerNames; - } - - public void setHeaderNames(List headerNames) { - this.headerNames = headerNames; - } - - public void unsetHeaderNames() { - this.headerNames = null; - } - - /** Returns true if field headerNames is set (has been assigned a value) and false otherwise */ - public boolean isSetHeaderNames() { - return this.headerNames != null; - } - - public void setHeaderNamesIsSet(boolean value) { - if (!value) { - this.headerNames = null; - } - } - - public int getRowsSize() { - return (this.rows == null) ? 0 : this.rows.size(); - } - - public java.util.Iterator> getRowsIterator() { - return (this.rows == null) ? null : this.rows.iterator(); - } - - public void addToRows(List elem) { - if (this.rows == null) { - this.rows = new ArrayList>(); - } - this.rows.add(elem); - } - - public List> getRows() { - return this.rows; - } - - public void setRows(List> rows) { - this.rows = rows; - } - - public void unsetRows() { - this.rows = null; - } - - /** Returns true if field rows is set (has been assigned a value) and false otherwise */ - public boolean isSetRows() { - return this.rows != null; - } - - public void setRowsIsSet(boolean value) { - if (!value) { - this.rows = null; - } - } - - public double getProgressedPercentage() { - return this.progressedPercentage; - } - - public void setProgressedPercentage(double progressedPercentage) { - this.progressedPercentage = progressedPercentage; - setProgressedPercentageIsSet(true); - } - - public void unsetProgressedPercentage() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID); - } - - /** Returns true if field progressedPercentage is set (has been assigned a value) and false otherwise */ - public boolean isSetProgressedPercentage() { - return EncodingUtils.testBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID); - } - - public void setProgressedPercentageIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID, value); - } - - /** - * - * @see TJobExecutionStatus - */ - public TJobExecutionStatus getStatus() { - return this.status; - } - - /** - * - * @see TJobExecutionStatus - */ - public void setStatus(TJobExecutionStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public String getFooterSummary() { - return this.footerSummary; - } - - public void setFooterSummary(String footerSummary) { - this.footerSummary = footerSummary; - } - - public void unsetFooterSummary() { - this.footerSummary = null; - } - - /** Returns true if field footerSummary is set (has been assigned a value) and false otherwise */ - public boolean isSetFooterSummary() { - return this.footerSummary != null; - } - - public void setFooterSummaryIsSet(boolean value) { - if (!value) { - this.footerSummary = null; - } - } - - public long getStartTime() { - return this.startTime; - } - - public void setStartTime(long startTime) { - this.startTime = startTime; - setStartTimeIsSet(true); - } - - public void unsetStartTime() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTTIME_ISSET_ID); - } - - /** Returns true if field startTime is set (has been assigned a value) and false otherwise */ - public boolean isSetStartTime() { - return EncodingUtils.testBit(__isset_bitfield, __STARTTIME_ISSET_ID); - } - - public void setStartTimeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTTIME_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case HEADER_NAMES: - if (value == null) { - unsetHeaderNames(); - } else { - setHeaderNames((List)value); - } - break; - - case ROWS: - if (value == null) { - unsetRows(); - } else { - setRows((List>)value); - } - break; - - case PROGRESSED_PERCENTAGE: - if (value == null) { - unsetProgressedPercentage(); - } else { - setProgressedPercentage((Double)value); - } - break; - - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TJobExecutionStatus)value); - } - break; - - case FOOTER_SUMMARY: - if (value == null) { - unsetFooterSummary(); - } else { - setFooterSummary((String)value); - } - break; - - case START_TIME: - if (value == null) { - unsetStartTime(); - } else { - setStartTime((Long)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case HEADER_NAMES: - return getHeaderNames(); - - case ROWS: - return getRows(); - - case PROGRESSED_PERCENTAGE: - return getProgressedPercentage(); - - case STATUS: - return getStatus(); - - case FOOTER_SUMMARY: - return getFooterSummary(); - - case START_TIME: - return getStartTime(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case HEADER_NAMES: - return isSetHeaderNames(); - case ROWS: - return isSetRows(); - case PROGRESSED_PERCENTAGE: - return isSetProgressedPercentage(); - case STATUS: - return isSetStatus(); - case FOOTER_SUMMARY: - return isSetFooterSummary(); - case START_TIME: - return isSetStartTime(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TProgressUpdateResp) - return this.equals((TProgressUpdateResp)that); - return false; - } - - public boolean equals(TProgressUpdateResp that) { - if (that == null) - return false; - - boolean this_present_headerNames = true && this.isSetHeaderNames(); - boolean that_present_headerNames = true && that.isSetHeaderNames(); - if (this_present_headerNames || that_present_headerNames) { - if (!(this_present_headerNames && that_present_headerNames)) - return false; - if (!this.headerNames.equals(that.headerNames)) - return false; - } - - boolean this_present_rows = true && this.isSetRows(); - boolean that_present_rows = true && that.isSetRows(); - if (this_present_rows || that_present_rows) { - if (!(this_present_rows && that_present_rows)) - return false; - if (!this.rows.equals(that.rows)) - return false; - } - - boolean this_present_progressedPercentage = true; - boolean that_present_progressedPercentage = true; - if (this_present_progressedPercentage || that_present_progressedPercentage) { - if (!(this_present_progressedPercentage && that_present_progressedPercentage)) - return false; - if (this.progressedPercentage != that.progressedPercentage) - return false; - } - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - boolean this_present_footerSummary = true && this.isSetFooterSummary(); - boolean that_present_footerSummary = true && that.isSetFooterSummary(); - if (this_present_footerSummary || that_present_footerSummary) { - if (!(this_present_footerSummary && that_present_footerSummary)) - return false; - if (!this.footerSummary.equals(that.footerSummary)) - return false; - } - - boolean this_present_startTime = true; - boolean that_present_startTime = true; - if (this_present_startTime || that_present_startTime) { - if (!(this_present_startTime && that_present_startTime)) - return false; - if (this.startTime != that.startTime) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_headerNames = true && (isSetHeaderNames()); - list.add(present_headerNames); - if (present_headerNames) - list.add(headerNames); - - boolean present_rows = true && (isSetRows()); - list.add(present_rows); - if (present_rows) - list.add(rows); - - boolean present_progressedPercentage = true; - list.add(present_progressedPercentage); - if (present_progressedPercentage) - list.add(progressedPercentage); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status.getValue()); - - boolean present_footerSummary = true && (isSetFooterSummary()); - list.add(present_footerSummary); - if (present_footerSummary) - list.add(footerSummary); - - boolean present_startTime = true; - list.add(present_startTime); - if (present_startTime) - list.add(startTime); - - return list.hashCode(); - } - - @Override - public int compareTo(TProgressUpdateResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetHeaderNames()).compareTo(other.isSetHeaderNames()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetHeaderNames()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.headerNames, other.headerNames); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRows()).compareTo(other.isSetRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, other.rows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetProgressedPercentage()).compareTo(other.isSetProgressedPercentage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetProgressedPercentage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.progressedPercentage, other.progressedPercentage); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetFooterSummary()).compareTo(other.isSetFooterSummary()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetFooterSummary()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.footerSummary, other.footerSummary); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetStartTime()).compareTo(other.isSetStartTime()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStartTime()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startTime, other.startTime); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TProgressUpdateResp("); - boolean first = true; - - sb.append("headerNames:"); - if (this.headerNames == null) { - sb.append("null"); - } else { - sb.append(this.headerNames); - } - first = false; - if (!first) sb.append(", "); - sb.append("rows:"); - if (this.rows == null) { - sb.append("null"); - } else { - sb.append(this.rows); - } - first = false; - if (!first) sb.append(", "); - sb.append("progressedPercentage:"); - sb.append(this.progressedPercentage); - first = false; - if (!first) sb.append(", "); - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - if (!first) sb.append(", "); - sb.append("footerSummary:"); - if (this.footerSummary == null) { - sb.append("null"); - } else { - sb.append(this.footerSummary); - } - first = false; - if (!first) sb.append(", "); - sb.append("startTime:"); - sb.append(this.startTime); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetHeaderNames()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'headerNames' is unset! Struct:" + toString()); - } - - if (!isSetRows()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString()); - } - - if (!isSetProgressedPercentage()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'progressedPercentage' is unset! Struct:" + toString()); - } - - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - if (!isSetFooterSummary()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'footerSummary' is unset! Struct:" + toString()); - } - - if (!isSetStartTime()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'startTime' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TProgressUpdateRespStandardSchemeFactory implements SchemeFactory { - public TProgressUpdateRespStandardScheme getScheme() { - return new TProgressUpdateRespStandardScheme(); - } - } - - private static class TProgressUpdateRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TProgressUpdateResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // HEADER_NAMES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list180 = iprot.readListBegin(); - struct.headerNames = new ArrayList(_list180.size); - String _elem181; - for (int _i182 = 0; _i182 < _list180.size; ++_i182) - { - _elem181 = iprot.readString(); - struct.headerNames.add(_elem181); - } - iprot.readListEnd(); - } - struct.setHeaderNamesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list183 = iprot.readListBegin(); - struct.rows = new ArrayList>(_list183.size); - List _elem184; - for (int _i185 = 0; _i185 < _list183.size; ++_i185) - { - { - org.apache.thrift.protocol.TList _list186 = iprot.readListBegin(); - _elem184 = new ArrayList(_list186.size); - String _elem187; - for (int _i188 = 0; _i188 < _list186.size; ++_i188) - { - _elem187 = iprot.readString(); - _elem184.add(_elem187); - } - iprot.readListEnd(); - } - struct.rows.add(_elem184); - } - iprot.readListEnd(); - } - struct.setRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // PROGRESSED_PERCENTAGE - if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) { - struct.progressedPercentage = iprot.readDouble(); - struct.setProgressedPercentageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.status = org.apache.hive.service.rpc.thrift.TJobExecutionStatus.findByValue(iprot.readI32()); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // FOOTER_SUMMARY - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.footerSummary = iprot.readString(); - struct.setFooterSummaryIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 6: // START_TIME - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.startTime = iprot.readI64(); - struct.setStartTimeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TProgressUpdateResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.headerNames != null) { - oprot.writeFieldBegin(HEADER_NAMES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.headerNames.size())); - for (String _iter189 : struct.headerNames) - { - oprot.writeString(_iter189); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.rows != null) { - oprot.writeFieldBegin(ROWS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.LIST, struct.rows.size())); - for (List _iter190 : struct.rows) - { - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, _iter190.size())); - for (String _iter191 : _iter190) - { - oprot.writeString(_iter191); - } - oprot.writeListEnd(); - } - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(PROGRESSED_PERCENTAGE_FIELD_DESC); - oprot.writeDouble(struct.progressedPercentage); - oprot.writeFieldEnd(); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - oprot.writeI32(struct.status.getValue()); - oprot.writeFieldEnd(); - } - if (struct.footerSummary != null) { - oprot.writeFieldBegin(FOOTER_SUMMARY_FIELD_DESC); - oprot.writeString(struct.footerSummary); - oprot.writeFieldEnd(); - } - oprot.writeFieldBegin(START_TIME_FIELD_DESC); - oprot.writeI64(struct.startTime); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TProgressUpdateRespTupleSchemeFactory implements SchemeFactory { - public TProgressUpdateRespTupleScheme getScheme() { - return new TProgressUpdateRespTupleScheme(); - } - } - - private static class TProgressUpdateRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TProgressUpdateResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.headerNames.size()); - for (String _iter192 : struct.headerNames) - { - oprot.writeString(_iter192); - } - } - { - oprot.writeI32(struct.rows.size()); - for (List _iter193 : struct.rows) - { - { - oprot.writeI32(_iter193.size()); - for (String _iter194 : _iter193) - { - oprot.writeString(_iter194); - } - } - } - } - oprot.writeDouble(struct.progressedPercentage); - oprot.writeI32(struct.status.getValue()); - oprot.writeString(struct.footerSummary); - oprot.writeI64(struct.startTime); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TProgressUpdateResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list195 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.headerNames = new ArrayList(_list195.size); - String _elem196; - for (int _i197 = 0; _i197 < _list195.size; ++_i197) - { - _elem196 = iprot.readString(); - struct.headerNames.add(_elem196); - } - } - struct.setHeaderNamesIsSet(true); - { - org.apache.thrift.protocol.TList _list198 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.LIST, iprot.readI32()); - struct.rows = new ArrayList>(_list198.size); - List _elem199; - for (int _i200 = 0; _i200 < _list198.size; ++_i200) - { - { - org.apache.thrift.protocol.TList _list201 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - _elem199 = new ArrayList(_list201.size); - String _elem202; - for (int _i203 = 0; _i203 < _list201.size; ++_i203) - { - _elem202 = iprot.readString(); - _elem199.add(_elem202); - } - } - struct.rows.add(_elem199); - } - } - struct.setRowsIsSet(true); - struct.progressedPercentage = iprot.readDouble(); - struct.setProgressedPercentageIsSet(true); - struct.status = org.apache.hive.service.rpc.thrift.TJobExecutionStatus.findByValue(iprot.readI32()); - struct.setStatusIsSet(true); - struct.footerSummary = iprot.readString(); - struct.setFooterSummaryIsSet(true); - struct.startTime = iprot.readI64(); - struct.setStartTimeIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java deleted file mode 100644 index 18a782513c500..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TProtocolVersion implements org.apache.thrift.TEnum { - HIVE_CLI_SERVICE_PROTOCOL_V1(0), - HIVE_CLI_SERVICE_PROTOCOL_V2(1), - HIVE_CLI_SERVICE_PROTOCOL_V3(2), - HIVE_CLI_SERVICE_PROTOCOL_V4(3), - HIVE_CLI_SERVICE_PROTOCOL_V5(4), - HIVE_CLI_SERVICE_PROTOCOL_V6(5), - HIVE_CLI_SERVICE_PROTOCOL_V7(6), - HIVE_CLI_SERVICE_PROTOCOL_V8(7), - HIVE_CLI_SERVICE_PROTOCOL_V9(8), - HIVE_CLI_SERVICE_PROTOCOL_V10(9); - - private final int value; - - private TProtocolVersion(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TProtocolVersion findByValue(int value) { - switch (value) { - case 0: - return HIVE_CLI_SERVICE_PROTOCOL_V1; - case 1: - return HIVE_CLI_SERVICE_PROTOCOL_V2; - case 2: - return HIVE_CLI_SERVICE_PROTOCOL_V3; - case 3: - return HIVE_CLI_SERVICE_PROTOCOL_V4; - case 4: - return HIVE_CLI_SERVICE_PROTOCOL_V5; - case 5: - return HIVE_CLI_SERVICE_PROTOCOL_V6; - case 6: - return HIVE_CLI_SERVICE_PROTOCOL_V7; - case 7: - return HIVE_CLI_SERVICE_PROTOCOL_V8; - case 8: - return HIVE_CLI_SERVICE_PROTOCOL_V9; - case 9: - return HIVE_CLI_SERVICE_PROTOCOL_V10; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java deleted file mode 100644 index 8957ebc8d2fff..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java +++ /dev/null @@ -1,495 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TRenewDelegationTokenReq implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenReq"); - - private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRenewDelegationTokenReqStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRenewDelegationTokenReqTupleSchemeFactory()); - } - - private TSessionHandle sessionHandle; // required - private String delegationToken; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_HANDLE((short)1, "sessionHandle"), - DELEGATION_TOKEN((short)2, "delegationToken"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_HANDLE - return SESSION_HANDLE; - case 2: // DELEGATION_TOKEN - return DELEGATION_TOKEN; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class))); - tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenReq.class, metaDataMap); - } - - public TRenewDelegationTokenReq() { - } - - public TRenewDelegationTokenReq( - TSessionHandle sessionHandle, - String delegationToken) - { - this(); - this.sessionHandle = sessionHandle; - this.delegationToken = delegationToken; - } - - /** - * Performs a deep copy on other. - */ - public TRenewDelegationTokenReq(TRenewDelegationTokenReq other) { - if (other.isSetSessionHandle()) { - this.sessionHandle = new TSessionHandle(other.sessionHandle); - } - if (other.isSetDelegationToken()) { - this.delegationToken = other.delegationToken; - } - } - - public TRenewDelegationTokenReq deepCopy() { - return new TRenewDelegationTokenReq(this); - } - - @Override - public void clear() { - this.sessionHandle = null; - this.delegationToken = null; - } - - public TSessionHandle getSessionHandle() { - return this.sessionHandle; - } - - public void setSessionHandle(TSessionHandle sessionHandle) { - this.sessionHandle = sessionHandle; - } - - public void unsetSessionHandle() { - this.sessionHandle = null; - } - - /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionHandle() { - return this.sessionHandle != null; - } - - public void setSessionHandleIsSet(boolean value) { - if (!value) { - this.sessionHandle = null; - } - } - - public String getDelegationToken() { - return this.delegationToken; - } - - public void setDelegationToken(String delegationToken) { - this.delegationToken = delegationToken; - } - - public void unsetDelegationToken() { - this.delegationToken = null; - } - - /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */ - public boolean isSetDelegationToken() { - return this.delegationToken != null; - } - - public void setDelegationTokenIsSet(boolean value) { - if (!value) { - this.delegationToken = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_HANDLE: - if (value == null) { - unsetSessionHandle(); - } else { - setSessionHandle((TSessionHandle)value); - } - break; - - case DELEGATION_TOKEN: - if (value == null) { - unsetDelegationToken(); - } else { - setDelegationToken((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_HANDLE: - return getSessionHandle(); - - case DELEGATION_TOKEN: - return getDelegationToken(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_HANDLE: - return isSetSessionHandle(); - case DELEGATION_TOKEN: - return isSetDelegationToken(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRenewDelegationTokenReq) - return this.equals((TRenewDelegationTokenReq)that); - return false; - } - - public boolean equals(TRenewDelegationTokenReq that) { - if (that == null) - return false; - - boolean this_present_sessionHandle = true && this.isSetSessionHandle(); - boolean that_present_sessionHandle = true && that.isSetSessionHandle(); - if (this_present_sessionHandle || that_present_sessionHandle) { - if (!(this_present_sessionHandle && that_present_sessionHandle)) - return false; - if (!this.sessionHandle.equals(that.sessionHandle)) - return false; - } - - boolean this_present_delegationToken = true && this.isSetDelegationToken(); - boolean that_present_delegationToken = true && that.isSetDelegationToken(); - if (this_present_delegationToken || that_present_delegationToken) { - if (!(this_present_delegationToken && that_present_delegationToken)) - return false; - if (!this.delegationToken.equals(that.delegationToken)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionHandle = true && (isSetSessionHandle()); - list.add(present_sessionHandle); - if (present_sessionHandle) - list.add(sessionHandle); - - boolean present_delegationToken = true && (isSetDelegationToken()); - list.add(present_delegationToken); - if (present_delegationToken) - list.add(delegationToken); - - return list.hashCode(); - } - - @Override - public int compareTo(TRenewDelegationTokenReq other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionHandle()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetDelegationToken()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRenewDelegationTokenReq("); - boolean first = true; - - sb.append("sessionHandle:"); - if (this.sessionHandle == null) { - sb.append("null"); - } else { - sb.append(this.sessionHandle); - } - first = false; - if (!first) sb.append(", "); - sb.append("delegationToken:"); - if (this.delegationToken == null) { - sb.append("null"); - } else { - sb.append(this.delegationToken); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionHandle()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString()); - } - - if (!isSetDelegationToken()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionHandle != null) { - sessionHandle.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRenewDelegationTokenReqStandardSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenReqStandardScheme getScheme() { - return new TRenewDelegationTokenReqStandardScheme(); - } - } - - private static class TRenewDelegationTokenReqStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_HANDLE - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // DELEGATION_TOKEN - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionHandle != null) { - oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC); - struct.sessionHandle.write(oprot); - oprot.writeFieldEnd(); - } - if (struct.delegationToken != null) { - oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC); - oprot.writeString(struct.delegationToken); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRenewDelegationTokenReqTupleSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenReqTupleScheme getScheme() { - return new TRenewDelegationTokenReqTupleScheme(); - } - } - - private static class TRenewDelegationTokenReqTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionHandle.write(oprot); - oprot.writeString(struct.delegationToken); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionHandle = new TSessionHandle(); - struct.sessionHandle.read(iprot); - struct.setSessionHandleIsSet(true); - struct.delegationToken = iprot.readString(); - struct.setDelegationTokenIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java deleted file mode 100644 index 6f5004ccc38e4..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TRenewDelegationTokenResp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenResp"); - - private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRenewDelegationTokenRespStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRenewDelegationTokenRespTupleSchemeFactory()); - } - - private TStatus status; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - STATUS((short)1, "status"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS - return STATUS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenResp.class, metaDataMap); - } - - public TRenewDelegationTokenResp() { - } - - public TRenewDelegationTokenResp( - TStatus status) - { - this(); - this.status = status; - } - - /** - * Performs a deep copy on other. - */ - public TRenewDelegationTokenResp(TRenewDelegationTokenResp other) { - if (other.isSetStatus()) { - this.status = new TStatus(other.status); - } - } - - public TRenewDelegationTokenResp deepCopy() { - return new TRenewDelegationTokenResp(this); - } - - @Override - public void clear() { - this.status = null; - } - - public TStatus getStatus() { - return this.status; - } - - public void setStatus(TStatus status) { - this.status = status; - } - - public void unsetStatus() { - this.status = null; - } - - /** Returns true if field status is set (has been assigned a value) and false otherwise */ - public boolean isSetStatus() { - return this.status != null; - } - - public void setStatusIsSet(boolean value) { - if (!value) { - this.status = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS: - if (value == null) { - unsetStatus(); - } else { - setStatus((TStatus)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS: - return getStatus(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS: - return isSetStatus(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRenewDelegationTokenResp) - return this.equals((TRenewDelegationTokenResp)that); - return false; - } - - public boolean equals(TRenewDelegationTokenResp that) { - if (that == null) - return false; - - boolean this_present_status = true && this.isSetStatus(); - boolean that_present_status = true && that.isSetStatus(); - if (this_present_status || that_present_status) { - if (!(this_present_status && that_present_status)) - return false; - if (!this.status.equals(that.status)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_status = true && (isSetStatus()); - list.add(present_status); - if (present_status) - list.add(status); - - return list.hashCode(); - } - - @Override - public int compareTo(TRenewDelegationTokenResp other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatus()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRenewDelegationTokenResp("); - boolean first = true; - - sb.append("status:"); - if (this.status == null) { - sb.append("null"); - } else { - sb.append(this.status); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatus()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (status != null) { - status.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRenewDelegationTokenRespStandardSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenRespStandardScheme getScheme() { - return new TRenewDelegationTokenRespStandardScheme(); - } - } - - private static class TRenewDelegationTokenRespStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.status != null) { - oprot.writeFieldBegin(STATUS_FIELD_DESC); - struct.status.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRenewDelegationTokenRespTupleSchemeFactory implements SchemeFactory { - public TRenewDelegationTokenRespTupleScheme getScheme() { - return new TRenewDelegationTokenRespTupleScheme(); - } - } - - private static class TRenewDelegationTokenRespTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.status.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.status = new TStatus(); - struct.status.read(iprot); - struct.setStatusIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java deleted file mode 100644 index e95299df97c3a..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java +++ /dev/null @@ -1,443 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TRow implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRow"); - - private static final org.apache.thrift.protocol.TField COL_VALS_FIELD_DESC = new org.apache.thrift.protocol.TField("colVals", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRowStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRowTupleSchemeFactory()); - } - - private List colVals; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COL_VALS((short)1, "colVals"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COL_VALS - return COL_VALS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COL_VALS, new org.apache.thrift.meta_data.FieldMetaData("colVals", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnValue.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRow.class, metaDataMap); - } - - public TRow() { - } - - public TRow( - List colVals) - { - this(); - this.colVals = colVals; - } - - /** - * Performs a deep copy on other. - */ - public TRow(TRow other) { - if (other.isSetColVals()) { - List __this__colVals = new ArrayList(other.colVals.size()); - for (TColumnValue other_element : other.colVals) { - __this__colVals.add(new TColumnValue(other_element)); - } - this.colVals = __this__colVals; - } - } - - public TRow deepCopy() { - return new TRow(this); - } - - @Override - public void clear() { - this.colVals = null; - } - - public int getColValsSize() { - return (this.colVals == null) ? 0 : this.colVals.size(); - } - - public java.util.Iterator getColValsIterator() { - return (this.colVals == null) ? null : this.colVals.iterator(); - } - - public void addToColVals(TColumnValue elem) { - if (this.colVals == null) { - this.colVals = new ArrayList(); - } - this.colVals.add(elem); - } - - public List getColVals() { - return this.colVals; - } - - public void setColVals(List colVals) { - this.colVals = colVals; - } - - public void unsetColVals() { - this.colVals = null; - } - - /** Returns true if field colVals is set (has been assigned a value) and false otherwise */ - public boolean isSetColVals() { - return this.colVals != null; - } - - public void setColValsIsSet(boolean value) { - if (!value) { - this.colVals = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COL_VALS: - if (value == null) { - unsetColVals(); - } else { - setColVals((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COL_VALS: - return getColVals(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COL_VALS: - return isSetColVals(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRow) - return this.equals((TRow)that); - return false; - } - - public boolean equals(TRow that) { - if (that == null) - return false; - - boolean this_present_colVals = true && this.isSetColVals(); - boolean that_present_colVals = true && that.isSetColVals(); - if (this_present_colVals || that_present_colVals) { - if (!(this_present_colVals && that_present_colVals)) - return false; - if (!this.colVals.equals(that.colVals)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_colVals = true && (isSetColVals()); - list.add(present_colVals); - if (present_colVals) - list.add(colVals); - - return list.hashCode(); - } - - @Override - public int compareTo(TRow other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetColVals()).compareTo(other.isSetColVals()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColVals()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.colVals, other.colVals); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRow("); - boolean first = true; - - sb.append("colVals:"); - if (this.colVals == null) { - sb.append("null"); - } else { - sb.append(this.colVals); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColVals()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'colVals' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRowStandardSchemeFactory implements SchemeFactory { - public TRowStandardScheme getScheme() { - return new TRowStandardScheme(); - } - } - - private static class TRowStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRow struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COL_VALS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list46 = iprot.readListBegin(); - struct.colVals = new ArrayList(_list46.size); - TColumnValue _elem47; - for (int _i48 = 0; _i48 < _list46.size; ++_i48) - { - _elem47 = new TColumnValue(); - _elem47.read(iprot); - struct.colVals.add(_elem47); - } - iprot.readListEnd(); - } - struct.setColValsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRow struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.colVals != null) { - oprot.writeFieldBegin(COL_VALS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.colVals.size())); - for (TColumnValue _iter49 : struct.colVals) - { - _iter49.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRowTupleSchemeFactory implements SchemeFactory { - public TRowTupleScheme getScheme() { - return new TRowTupleScheme(); - } - } - - private static class TRowTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.colVals.size()); - for (TColumnValue _iter50 : struct.colVals) - { - _iter50.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list51 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.colVals = new ArrayList(_list51.size); - TColumnValue _elem52; - for (int _i53 = 0; _i53 < _list51.size; ++_i53) - { - _elem52 = new TColumnValue(); - _elem52.read(iprot); - struct.colVals.add(_elem52); - } - } - struct.setColValsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java deleted file mode 100644 index da3d9d3ca8820..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java +++ /dev/null @@ -1,920 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TRowSet implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRowSet"); - - private static final org.apache.thrift.protocol.TField START_ROW_OFFSET_FIELD_DESC = new org.apache.thrift.protocol.TField("startRowOffset", org.apache.thrift.protocol.TType.I64, (short)1); - private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2); - private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)3); - private static final org.apache.thrift.protocol.TField BINARY_COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryColumns", org.apache.thrift.protocol.TType.STRING, (short)4); - private static final org.apache.thrift.protocol.TField COLUMN_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("columnCount", org.apache.thrift.protocol.TType.I32, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TRowSetStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TRowSetTupleSchemeFactory()); - } - - private long startRowOffset; // required - private List rows; // required - private List columns; // optional - private ByteBuffer binaryColumns; // optional - private int columnCount; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - START_ROW_OFFSET((short)1, "startRowOffset"), - ROWS((short)2, "rows"), - COLUMNS((short)3, "columns"), - BINARY_COLUMNS((short)4, "binaryColumns"), - COLUMN_COUNT((short)5, "columnCount"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // START_ROW_OFFSET - return START_ROW_OFFSET; - case 2: // ROWS - return ROWS; - case 3: // COLUMNS - return COLUMNS; - case 4: // BINARY_COLUMNS - return BINARY_COLUMNS; - case 5: // COLUMN_COUNT - return COLUMN_COUNT; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __STARTROWOFFSET_ISSET_ID = 0; - private static final int __COLUMNCOUNT_ISSET_ID = 1; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.COLUMNS,_Fields.BINARY_COLUMNS,_Fields.COLUMN_COUNT}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.START_ROW_OFFSET, new org.apache.thrift.meta_data.FieldMetaData("startRowOffset", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRow.class)))); - tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumn.class)))); - tmpMap.put(_Fields.BINARY_COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("binaryColumns", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - tmpMap.put(_Fields.COLUMN_COUNT, new org.apache.thrift.meta_data.FieldMetaData("columnCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRowSet.class, metaDataMap); - } - - public TRowSet() { - } - - public TRowSet( - long startRowOffset, - List rows) - { - this(); - this.startRowOffset = startRowOffset; - setStartRowOffsetIsSet(true); - this.rows = rows; - } - - /** - * Performs a deep copy on other. - */ - public TRowSet(TRowSet other) { - __isset_bitfield = other.__isset_bitfield; - this.startRowOffset = other.startRowOffset; - if (other.isSetRows()) { - List __this__rows = new ArrayList(other.rows.size()); - for (TRow other_element : other.rows) { - __this__rows.add(new TRow(other_element)); - } - this.rows = __this__rows; - } - if (other.isSetColumns()) { - List __this__columns = new ArrayList(other.columns.size()); - for (TColumn other_element : other.columns) { - __this__columns.add(new TColumn(other_element)); - } - this.columns = __this__columns; - } - if (other.isSetBinaryColumns()) { - this.binaryColumns = org.apache.thrift.TBaseHelper.copyBinary(other.binaryColumns); - } - this.columnCount = other.columnCount; - } - - public TRowSet deepCopy() { - return new TRowSet(this); - } - - @Override - public void clear() { - setStartRowOffsetIsSet(false); - this.startRowOffset = 0; - this.rows = null; - this.columns = null; - this.binaryColumns = null; - setColumnCountIsSet(false); - this.columnCount = 0; - } - - public long getStartRowOffset() { - return this.startRowOffset; - } - - public void setStartRowOffset(long startRowOffset) { - this.startRowOffset = startRowOffset; - setStartRowOffsetIsSet(true); - } - - public void unsetStartRowOffset() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID); - } - - /** Returns true if field startRowOffset is set (has been assigned a value) and false otherwise */ - public boolean isSetStartRowOffset() { - return EncodingUtils.testBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID); - } - - public void setStartRowOffsetIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID, value); - } - - public int getRowsSize() { - return (this.rows == null) ? 0 : this.rows.size(); - } - - public java.util.Iterator getRowsIterator() { - return (this.rows == null) ? null : this.rows.iterator(); - } - - public void addToRows(TRow elem) { - if (this.rows == null) { - this.rows = new ArrayList(); - } - this.rows.add(elem); - } - - public List getRows() { - return this.rows; - } - - public void setRows(List rows) { - this.rows = rows; - } - - public void unsetRows() { - this.rows = null; - } - - /** Returns true if field rows is set (has been assigned a value) and false otherwise */ - public boolean isSetRows() { - return this.rows != null; - } - - public void setRowsIsSet(boolean value) { - if (!value) { - this.rows = null; - } - } - - public int getColumnsSize() { - return (this.columns == null) ? 0 : this.columns.size(); - } - - public java.util.Iterator getColumnsIterator() { - return (this.columns == null) ? null : this.columns.iterator(); - } - - public void addToColumns(TColumn elem) { - if (this.columns == null) { - this.columns = new ArrayList(); - } - this.columns.add(elem); - } - - public List getColumns() { - return this.columns; - } - - public void setColumns(List columns) { - this.columns = columns; - } - - public void unsetColumns() { - this.columns = null; - } - - /** Returns true if field columns is set (has been assigned a value) and false otherwise */ - public boolean isSetColumns() { - return this.columns != null; - } - - public void setColumnsIsSet(boolean value) { - if (!value) { - this.columns = null; - } - } - - public byte[] getBinaryColumns() { - setBinaryColumns(org.apache.thrift.TBaseHelper.rightSize(binaryColumns)); - return binaryColumns == null ? null : binaryColumns.array(); - } - - public ByteBuffer bufferForBinaryColumns() { - return org.apache.thrift.TBaseHelper.copyBinary(binaryColumns); - } - - public void setBinaryColumns(byte[] binaryColumns) { - this.binaryColumns = binaryColumns == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(binaryColumns, binaryColumns.length)); - } - - public void setBinaryColumns(ByteBuffer binaryColumns) { - this.binaryColumns = org.apache.thrift.TBaseHelper.copyBinary(binaryColumns); - } - - public void unsetBinaryColumns() { - this.binaryColumns = null; - } - - /** Returns true if field binaryColumns is set (has been assigned a value) and false otherwise */ - public boolean isSetBinaryColumns() { - return this.binaryColumns != null; - } - - public void setBinaryColumnsIsSet(boolean value) { - if (!value) { - this.binaryColumns = null; - } - } - - public int getColumnCount() { - return this.columnCount; - } - - public void setColumnCount(int columnCount) { - this.columnCount = columnCount; - setColumnCountIsSet(true); - } - - public void unsetColumnCount() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID); - } - - /** Returns true if field columnCount is set (has been assigned a value) and false otherwise */ - public boolean isSetColumnCount() { - return EncodingUtils.testBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID); - } - - public void setColumnCountIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case START_ROW_OFFSET: - if (value == null) { - unsetStartRowOffset(); - } else { - setStartRowOffset((Long)value); - } - break; - - case ROWS: - if (value == null) { - unsetRows(); - } else { - setRows((List)value); - } - break; - - case COLUMNS: - if (value == null) { - unsetColumns(); - } else { - setColumns((List)value); - } - break; - - case BINARY_COLUMNS: - if (value == null) { - unsetBinaryColumns(); - } else { - setBinaryColumns((ByteBuffer)value); - } - break; - - case COLUMN_COUNT: - if (value == null) { - unsetColumnCount(); - } else { - setColumnCount((Integer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case START_ROW_OFFSET: - return getStartRowOffset(); - - case ROWS: - return getRows(); - - case COLUMNS: - return getColumns(); - - case BINARY_COLUMNS: - return getBinaryColumns(); - - case COLUMN_COUNT: - return getColumnCount(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case START_ROW_OFFSET: - return isSetStartRowOffset(); - case ROWS: - return isSetRows(); - case COLUMNS: - return isSetColumns(); - case BINARY_COLUMNS: - return isSetBinaryColumns(); - case COLUMN_COUNT: - return isSetColumnCount(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TRowSet) - return this.equals((TRowSet)that); - return false; - } - - public boolean equals(TRowSet that) { - if (that == null) - return false; - - boolean this_present_startRowOffset = true; - boolean that_present_startRowOffset = true; - if (this_present_startRowOffset || that_present_startRowOffset) { - if (!(this_present_startRowOffset && that_present_startRowOffset)) - return false; - if (this.startRowOffset != that.startRowOffset) - return false; - } - - boolean this_present_rows = true && this.isSetRows(); - boolean that_present_rows = true && that.isSetRows(); - if (this_present_rows || that_present_rows) { - if (!(this_present_rows && that_present_rows)) - return false; - if (!this.rows.equals(that.rows)) - return false; - } - - boolean this_present_columns = true && this.isSetColumns(); - boolean that_present_columns = true && that.isSetColumns(); - if (this_present_columns || that_present_columns) { - if (!(this_present_columns && that_present_columns)) - return false; - if (!this.columns.equals(that.columns)) - return false; - } - - boolean this_present_binaryColumns = true && this.isSetBinaryColumns(); - boolean that_present_binaryColumns = true && that.isSetBinaryColumns(); - if (this_present_binaryColumns || that_present_binaryColumns) { - if (!(this_present_binaryColumns && that_present_binaryColumns)) - return false; - if (!this.binaryColumns.equals(that.binaryColumns)) - return false; - } - - boolean this_present_columnCount = true && this.isSetColumnCount(); - boolean that_present_columnCount = true && that.isSetColumnCount(); - if (this_present_columnCount || that_present_columnCount) { - if (!(this_present_columnCount && that_present_columnCount)) - return false; - if (this.columnCount != that.columnCount) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_startRowOffset = true; - list.add(present_startRowOffset); - if (present_startRowOffset) - list.add(startRowOffset); - - boolean present_rows = true && (isSetRows()); - list.add(present_rows); - if (present_rows) - list.add(rows); - - boolean present_columns = true && (isSetColumns()); - list.add(present_columns); - if (present_columns) - list.add(columns); - - boolean present_binaryColumns = true && (isSetBinaryColumns()); - list.add(present_binaryColumns); - if (present_binaryColumns) - list.add(binaryColumns); - - boolean present_columnCount = true && (isSetColumnCount()); - list.add(present_columnCount); - if (present_columnCount) - list.add(columnCount); - - return list.hashCode(); - } - - @Override - public int compareTo(TRowSet other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStartRowOffset()).compareTo(other.isSetStartRowOffset()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStartRowOffset()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startRowOffset, other.startRowOffset); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetRows()).compareTo(other.isSetRows()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRows()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, other.rows); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetColumns()).compareTo(other.isSetColumns()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumns()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, other.columns); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetBinaryColumns()).compareTo(other.isSetBinaryColumns()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetBinaryColumns()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.binaryColumns, other.binaryColumns); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetColumnCount()).compareTo(other.isSetColumnCount()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumnCount()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnCount, other.columnCount); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TRowSet("); - boolean first = true; - - sb.append("startRowOffset:"); - sb.append(this.startRowOffset); - first = false; - if (!first) sb.append(", "); - sb.append("rows:"); - if (this.rows == null) { - sb.append("null"); - } else { - sb.append(this.rows); - } - first = false; - if (isSetColumns()) { - if (!first) sb.append(", "); - sb.append("columns:"); - if (this.columns == null) { - sb.append("null"); - } else { - sb.append(this.columns); - } - first = false; - } - if (isSetBinaryColumns()) { - if (!first) sb.append(", "); - sb.append("binaryColumns:"); - if (this.binaryColumns == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.binaryColumns, sb); - } - first = false; - } - if (isSetColumnCount()) { - if (!first) sb.append(", "); - sb.append("columnCount:"); - sb.append(this.columnCount); - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStartRowOffset()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'startRowOffset' is unset! Struct:" + toString()); - } - - if (!isSetRows()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TRowSetStandardSchemeFactory implements SchemeFactory { - public TRowSetStandardScheme getScheme() { - return new TRowSetStandardScheme(); - } - } - - private static class TRowSetStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TRowSet struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // START_ROW_OFFSET - if (schemeField.type == org.apache.thrift.protocol.TType.I64) { - struct.startRowOffset = iprot.readI64(); - struct.setStartRowOffsetIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // ROWS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list118 = iprot.readListBegin(); - struct.rows = new ArrayList(_list118.size); - TRow _elem119; - for (int _i120 = 0; _i120 < _list118.size; ++_i120) - { - _elem119 = new TRow(); - _elem119.read(iprot); - struct.rows.add(_elem119); - } - iprot.readListEnd(); - } - struct.setRowsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // COLUMNS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list121 = iprot.readListBegin(); - struct.columns = new ArrayList(_list121.size); - TColumn _elem122; - for (int _i123 = 0; _i123 < _list121.size; ++_i123) - { - _elem122 = new TColumn(); - _elem122.read(iprot); - struct.columns.add(_elem122); - } - iprot.readListEnd(); - } - struct.setColumnsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // BINARY_COLUMNS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.binaryColumns = iprot.readBinary(); - struct.setBinaryColumnsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // COLUMN_COUNT - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.columnCount = iprot.readI32(); - struct.setColumnCountIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TRowSet struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(START_ROW_OFFSET_FIELD_DESC); - oprot.writeI64(struct.startRowOffset); - oprot.writeFieldEnd(); - if (struct.rows != null) { - oprot.writeFieldBegin(ROWS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.rows.size())); - for (TRow _iter124 : struct.rows) - { - _iter124.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.columns != null) { - if (struct.isSetColumns()) { - oprot.writeFieldBegin(COLUMNS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size())); - for (TColumn _iter125 : struct.columns) - { - _iter125.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - if (struct.binaryColumns != null) { - if (struct.isSetBinaryColumns()) { - oprot.writeFieldBegin(BINARY_COLUMNS_FIELD_DESC); - oprot.writeBinary(struct.binaryColumns); - oprot.writeFieldEnd(); - } - } - if (struct.isSetColumnCount()) { - oprot.writeFieldBegin(COLUMN_COUNT_FIELD_DESC); - oprot.writeI32(struct.columnCount); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TRowSetTupleSchemeFactory implements SchemeFactory { - public TRowSetTupleScheme getScheme() { - return new TRowSetTupleScheme(); - } - } - - private static class TRowSetTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI64(struct.startRowOffset); - { - oprot.writeI32(struct.rows.size()); - for (TRow _iter126 : struct.rows) - { - _iter126.write(oprot); - } - } - BitSet optionals = new BitSet(); - if (struct.isSetColumns()) { - optionals.set(0); - } - if (struct.isSetBinaryColumns()) { - optionals.set(1); - } - if (struct.isSetColumnCount()) { - optionals.set(2); - } - oprot.writeBitSet(optionals, 3); - if (struct.isSetColumns()) { - { - oprot.writeI32(struct.columns.size()); - for (TColumn _iter127 : struct.columns) - { - _iter127.write(oprot); - } - } - } - if (struct.isSetBinaryColumns()) { - oprot.writeBinary(struct.binaryColumns); - } - if (struct.isSetColumnCount()) { - oprot.writeI32(struct.columnCount); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.startRowOffset = iprot.readI64(); - struct.setStartRowOffsetIsSet(true); - { - org.apache.thrift.protocol.TList _list128 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.rows = new ArrayList(_list128.size); - TRow _elem129; - for (int _i130 = 0; _i130 < _list128.size; ++_i130) - { - _elem129 = new TRow(); - _elem129.read(iprot); - struct.rows.add(_elem129); - } - } - struct.setRowsIsSet(true); - BitSet incoming = iprot.readBitSet(3); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TList _list131 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.columns = new ArrayList(_list131.size); - TColumn _elem132; - for (int _i133 = 0; _i133 < _list131.size; ++_i133) - { - _elem132 = new TColumn(); - _elem132.read(iprot); - struct.columns.add(_elem132); - } - } - struct.setColumnsIsSet(true); - } - if (incoming.get(1)) { - struct.binaryColumns = iprot.readBinary(); - struct.setBinaryColumnsIsSet(true); - } - if (incoming.get(2)) { - struct.columnCount = iprot.readI32(); - struct.setColumnCountIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java deleted file mode 100644 index b5cb6e7b15aa6..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TSessionHandle implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSessionHandle"); - - private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.STRUCT, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TSessionHandleStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TSessionHandleTupleSchemeFactory()); - } - - private THandleIdentifier sessionId; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - SESSION_ID((short)1, "sessionId"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // SESSION_ID - return SESSION_ID; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SESSION_ID, new org.apache.thrift.meta_data.FieldMetaData("sessionId", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSessionHandle.class, metaDataMap); - } - - public TSessionHandle() { - } - - public TSessionHandle( - THandleIdentifier sessionId) - { - this(); - this.sessionId = sessionId; - } - - /** - * Performs a deep copy on other. - */ - public TSessionHandle(TSessionHandle other) { - if (other.isSetSessionId()) { - this.sessionId = new THandleIdentifier(other.sessionId); - } - } - - public TSessionHandle deepCopy() { - return new TSessionHandle(this); - } - - @Override - public void clear() { - this.sessionId = null; - } - - public THandleIdentifier getSessionId() { - return this.sessionId; - } - - public void setSessionId(THandleIdentifier sessionId) { - this.sessionId = sessionId; - } - - public void unsetSessionId() { - this.sessionId = null; - } - - /** Returns true if field sessionId is set (has been assigned a value) and false otherwise */ - public boolean isSetSessionId() { - return this.sessionId != null; - } - - public void setSessionIdIsSet(boolean value) { - if (!value) { - this.sessionId = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case SESSION_ID: - if (value == null) { - unsetSessionId(); - } else { - setSessionId((THandleIdentifier)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case SESSION_ID: - return getSessionId(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case SESSION_ID: - return isSetSessionId(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TSessionHandle) - return this.equals((TSessionHandle)that); - return false; - } - - public boolean equals(TSessionHandle that) { - if (that == null) - return false; - - boolean this_present_sessionId = true && this.isSetSessionId(); - boolean that_present_sessionId = true && that.isSetSessionId(); - if (this_present_sessionId || that_present_sessionId) { - if (!(this_present_sessionId && that_present_sessionId)) - return false; - if (!this.sessionId.equals(that.sessionId)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_sessionId = true && (isSetSessionId()); - list.add(present_sessionId); - if (present_sessionId) - list.add(sessionId); - - return list.hashCode(); - } - - @Override - public int compareTo(TSessionHandle other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetSessionId()).compareTo(other.isSetSessionId()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSessionId()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionId, other.sessionId); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TSessionHandle("); - boolean first = true; - - sb.append("sessionId:"); - if (this.sessionId == null) { - sb.append("null"); - } else { - sb.append(this.sessionId); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetSessionId()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionId' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - if (sessionId != null) { - sessionId.validate(); - } - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TSessionHandleStandardSchemeFactory implements SchemeFactory { - public TSessionHandleStandardScheme getScheme() { - return new TSessionHandleStandardScheme(); - } - } - - private static class TSessionHandleStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TSessionHandle struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // SESSION_ID - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.sessionId = new THandleIdentifier(); - struct.sessionId.read(iprot); - struct.setSessionIdIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TSessionHandle struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.sessionId != null) { - oprot.writeFieldBegin(SESSION_ID_FIELD_DESC); - struct.sessionId.write(oprot); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TSessionHandleTupleSchemeFactory implements SchemeFactory { - public TSessionHandleTupleScheme getScheme() { - return new TSessionHandleTupleScheme(); - } - } - - private static class TSessionHandleTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - struct.sessionId.write(oprot); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.sessionId = new THandleIdentifier(); - struct.sessionId.read(iprot); - struct.setSessionIdIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java deleted file mode 100644 index 50f4531b0a209..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java +++ /dev/null @@ -1,875 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TStatus implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStatus"); - - private static final org.apache.thrift.protocol.TField STATUS_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("statusCode", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField INFO_MESSAGES_FIELD_DESC = new org.apache.thrift.protocol.TField("infoMessages", org.apache.thrift.protocol.TType.LIST, (short)2); - private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3); - private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4); - private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStatusStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStatusTupleSchemeFactory()); - } - - private TStatusCode statusCode; // required - private List infoMessages; // optional - private String sqlState; // optional - private int errorCode; // optional - private String errorMessage; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - /** - * - * @see TStatusCode - */ - STATUS_CODE((short)1, "statusCode"), - INFO_MESSAGES((short)2, "infoMessages"), - SQL_STATE((short)3, "sqlState"), - ERROR_CODE((short)4, "errorCode"), - ERROR_MESSAGE((short)5, "errorMessage"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // STATUS_CODE - return STATUS_CODE; - case 2: // INFO_MESSAGES - return INFO_MESSAGES; - case 3: // SQL_STATE - return SQL_STATE; - case 4: // ERROR_CODE - return ERROR_CODE; - case 5: // ERROR_MESSAGE - return ERROR_MESSAGE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __ERRORCODE_ISSET_ID = 0; - private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.INFO_MESSAGES,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.STATUS_CODE, new org.apache.thrift.meta_data.FieldMetaData("statusCode", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TStatusCode.class))); - tmpMap.put(_Fields.INFO_MESSAGES, new org.apache.thrift.meta_data.FieldMetaData("infoMessages", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStatus.class, metaDataMap); - } - - public TStatus() { - } - - public TStatus( - TStatusCode statusCode) - { - this(); - this.statusCode = statusCode; - } - - /** - * Performs a deep copy on other. - */ - public TStatus(TStatus other) { - __isset_bitfield = other.__isset_bitfield; - if (other.isSetStatusCode()) { - this.statusCode = other.statusCode; - } - if (other.isSetInfoMessages()) { - List __this__infoMessages = new ArrayList(other.infoMessages); - this.infoMessages = __this__infoMessages; - } - if (other.isSetSqlState()) { - this.sqlState = other.sqlState; - } - this.errorCode = other.errorCode; - if (other.isSetErrorMessage()) { - this.errorMessage = other.errorMessage; - } - } - - public TStatus deepCopy() { - return new TStatus(this); - } - - @Override - public void clear() { - this.statusCode = null; - this.infoMessages = null; - this.sqlState = null; - setErrorCodeIsSet(false); - this.errorCode = 0; - this.errorMessage = null; - } - - /** - * - * @see TStatusCode - */ - public TStatusCode getStatusCode() { - return this.statusCode; - } - - /** - * - * @see TStatusCode - */ - public void setStatusCode(TStatusCode statusCode) { - this.statusCode = statusCode; - } - - public void unsetStatusCode() { - this.statusCode = null; - } - - /** Returns true if field statusCode is set (has been assigned a value) and false otherwise */ - public boolean isSetStatusCode() { - return this.statusCode != null; - } - - public void setStatusCodeIsSet(boolean value) { - if (!value) { - this.statusCode = null; - } - } - - public int getInfoMessagesSize() { - return (this.infoMessages == null) ? 0 : this.infoMessages.size(); - } - - public java.util.Iterator getInfoMessagesIterator() { - return (this.infoMessages == null) ? null : this.infoMessages.iterator(); - } - - public void addToInfoMessages(String elem) { - if (this.infoMessages == null) { - this.infoMessages = new ArrayList(); - } - this.infoMessages.add(elem); - } - - public List getInfoMessages() { - return this.infoMessages; - } - - public void setInfoMessages(List infoMessages) { - this.infoMessages = infoMessages; - } - - public void unsetInfoMessages() { - this.infoMessages = null; - } - - /** Returns true if field infoMessages is set (has been assigned a value) and false otherwise */ - public boolean isSetInfoMessages() { - return this.infoMessages != null; - } - - public void setInfoMessagesIsSet(boolean value) { - if (!value) { - this.infoMessages = null; - } - } - - public String getSqlState() { - return this.sqlState; - } - - public void setSqlState(String sqlState) { - this.sqlState = sqlState; - } - - public void unsetSqlState() { - this.sqlState = null; - } - - /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */ - public boolean isSetSqlState() { - return this.sqlState != null; - } - - public void setSqlStateIsSet(boolean value) { - if (!value) { - this.sqlState = null; - } - } - - public int getErrorCode() { - return this.errorCode; - } - - public void setErrorCode(int errorCode) { - this.errorCode = errorCode; - setErrorCodeIsSet(true); - } - - public void unsetErrorCode() { - __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorCode() { - return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID); - } - - public void setErrorCodeIsSet(boolean value) { - __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value); - } - - public String getErrorMessage() { - return this.errorMessage; - } - - public void setErrorMessage(String errorMessage) { - this.errorMessage = errorMessage; - } - - public void unsetErrorMessage() { - this.errorMessage = null; - } - - /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */ - public boolean isSetErrorMessage() { - return this.errorMessage != null; - } - - public void setErrorMessageIsSet(boolean value) { - if (!value) { - this.errorMessage = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case STATUS_CODE: - if (value == null) { - unsetStatusCode(); - } else { - setStatusCode((TStatusCode)value); - } - break; - - case INFO_MESSAGES: - if (value == null) { - unsetInfoMessages(); - } else { - setInfoMessages((List)value); - } - break; - - case SQL_STATE: - if (value == null) { - unsetSqlState(); - } else { - setSqlState((String)value); - } - break; - - case ERROR_CODE: - if (value == null) { - unsetErrorCode(); - } else { - setErrorCode((Integer)value); - } - break; - - case ERROR_MESSAGE: - if (value == null) { - unsetErrorMessage(); - } else { - setErrorMessage((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case STATUS_CODE: - return getStatusCode(); - - case INFO_MESSAGES: - return getInfoMessages(); - - case SQL_STATE: - return getSqlState(); - - case ERROR_CODE: - return getErrorCode(); - - case ERROR_MESSAGE: - return getErrorMessage(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case STATUS_CODE: - return isSetStatusCode(); - case INFO_MESSAGES: - return isSetInfoMessages(); - case SQL_STATE: - return isSetSqlState(); - case ERROR_CODE: - return isSetErrorCode(); - case ERROR_MESSAGE: - return isSetErrorMessage(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStatus) - return this.equals((TStatus)that); - return false; - } - - public boolean equals(TStatus that) { - if (that == null) - return false; - - boolean this_present_statusCode = true && this.isSetStatusCode(); - boolean that_present_statusCode = true && that.isSetStatusCode(); - if (this_present_statusCode || that_present_statusCode) { - if (!(this_present_statusCode && that_present_statusCode)) - return false; - if (!this.statusCode.equals(that.statusCode)) - return false; - } - - boolean this_present_infoMessages = true && this.isSetInfoMessages(); - boolean that_present_infoMessages = true && that.isSetInfoMessages(); - if (this_present_infoMessages || that_present_infoMessages) { - if (!(this_present_infoMessages && that_present_infoMessages)) - return false; - if (!this.infoMessages.equals(that.infoMessages)) - return false; - } - - boolean this_present_sqlState = true && this.isSetSqlState(); - boolean that_present_sqlState = true && that.isSetSqlState(); - if (this_present_sqlState || that_present_sqlState) { - if (!(this_present_sqlState && that_present_sqlState)) - return false; - if (!this.sqlState.equals(that.sqlState)) - return false; - } - - boolean this_present_errorCode = true && this.isSetErrorCode(); - boolean that_present_errorCode = true && that.isSetErrorCode(); - if (this_present_errorCode || that_present_errorCode) { - if (!(this_present_errorCode && that_present_errorCode)) - return false; - if (this.errorCode != that.errorCode) - return false; - } - - boolean this_present_errorMessage = true && this.isSetErrorMessage(); - boolean that_present_errorMessage = true && that.isSetErrorMessage(); - if (this_present_errorMessage || that_present_errorMessage) { - if (!(this_present_errorMessage && that_present_errorMessage)) - return false; - if (!this.errorMessage.equals(that.errorMessage)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_statusCode = true && (isSetStatusCode()); - list.add(present_statusCode); - if (present_statusCode) - list.add(statusCode.getValue()); - - boolean present_infoMessages = true && (isSetInfoMessages()); - list.add(present_infoMessages); - if (present_infoMessages) - list.add(infoMessages); - - boolean present_sqlState = true && (isSetSqlState()); - list.add(present_sqlState); - if (present_sqlState) - list.add(sqlState); - - boolean present_errorCode = true && (isSetErrorCode()); - list.add(present_errorCode); - if (present_errorCode) - list.add(errorCode); - - boolean present_errorMessage = true && (isSetErrorMessage()); - list.add(present_errorMessage); - if (present_errorMessage) - list.add(errorMessage); - - return list.hashCode(); - } - - @Override - public int compareTo(TStatus other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetStatusCode()).compareTo(other.isSetStatusCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetStatusCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statusCode, other.statusCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetInfoMessages()).compareTo(other.isSetInfoMessages()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetInfoMessages()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoMessages, other.infoMessages); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(other.isSetSqlState()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetSqlState()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, other.sqlState); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(other.isSetErrorCode()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorCode()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, other.errorCode); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(other.isSetErrorMessage()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetErrorMessage()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, other.errorMessage); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStatus("); - boolean first = true; - - sb.append("statusCode:"); - if (this.statusCode == null) { - sb.append("null"); - } else { - sb.append(this.statusCode); - } - first = false; - if (isSetInfoMessages()) { - if (!first) sb.append(", "); - sb.append("infoMessages:"); - if (this.infoMessages == null) { - sb.append("null"); - } else { - sb.append(this.infoMessages); - } - first = false; - } - if (isSetSqlState()) { - if (!first) sb.append(", "); - sb.append("sqlState:"); - if (this.sqlState == null) { - sb.append("null"); - } else { - sb.append(this.sqlState); - } - first = false; - } - if (isSetErrorCode()) { - if (!first) sb.append(", "); - sb.append("errorCode:"); - sb.append(this.errorCode); - first = false; - } - if (isSetErrorMessage()) { - if (!first) sb.append(", "); - sb.append("errorMessage:"); - if (this.errorMessage == null) { - sb.append("null"); - } else { - sb.append(this.errorMessage); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetStatusCode()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'statusCode' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bitfield = 0; - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStatusStandardSchemeFactory implements SchemeFactory { - public TStatusStandardScheme getScheme() { - return new TStatusStandardScheme(); - } - } - - private static class TStatusStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStatus struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // STATUS_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.statusCode = org.apache.hive.service.rpc.thrift.TStatusCode.findByValue(iprot.readI32()); - struct.setStatusCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // INFO_MESSAGES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list134 = iprot.readListBegin(); - struct.infoMessages = new ArrayList(_list134.size); - String _elem135; - for (int _i136 = 0; _i136 < _list134.size; ++_i136) - { - _elem135 = iprot.readString(); - struct.infoMessages.add(_elem135); - } - iprot.readListEnd(); - } - struct.setInfoMessagesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 3: // SQL_STATE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 4: // ERROR_CODE - if (schemeField.type == org.apache.thrift.protocol.TType.I32) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 5: // ERROR_MESSAGE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStatus struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.statusCode != null) { - oprot.writeFieldBegin(STATUS_CODE_FIELD_DESC); - oprot.writeI32(struct.statusCode.getValue()); - oprot.writeFieldEnd(); - } - if (struct.infoMessages != null) { - if (struct.isSetInfoMessages()) { - oprot.writeFieldBegin(INFO_MESSAGES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.infoMessages.size())); - for (String _iter137 : struct.infoMessages) - { - oprot.writeString(_iter137); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - } - if (struct.sqlState != null) { - if (struct.isSetSqlState()) { - oprot.writeFieldBegin(SQL_STATE_FIELD_DESC); - oprot.writeString(struct.sqlState); - oprot.writeFieldEnd(); - } - } - if (struct.isSetErrorCode()) { - oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC); - oprot.writeI32(struct.errorCode); - oprot.writeFieldEnd(); - } - if (struct.errorMessage != null) { - if (struct.isSetErrorMessage()) { - oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC); - oprot.writeString(struct.errorMessage); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStatusTupleSchemeFactory implements SchemeFactory { - public TStatusTupleScheme getScheme() { - return new TStatusTupleScheme(); - } - } - - private static class TStatusTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI32(struct.statusCode.getValue()); - BitSet optionals = new BitSet(); - if (struct.isSetInfoMessages()) { - optionals.set(0); - } - if (struct.isSetSqlState()) { - optionals.set(1); - } - if (struct.isSetErrorCode()) { - optionals.set(2); - } - if (struct.isSetErrorMessage()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetInfoMessages()) { - { - oprot.writeI32(struct.infoMessages.size()); - for (String _iter138 : struct.infoMessages) - { - oprot.writeString(_iter138); - } - } - } - if (struct.isSetSqlState()) { - oprot.writeString(struct.sqlState); - } - if (struct.isSetErrorCode()) { - oprot.writeI32(struct.errorCode); - } - if (struct.isSetErrorMessage()) { - oprot.writeString(struct.errorMessage); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.statusCode = org.apache.hive.service.rpc.thrift.TStatusCode.findByValue(iprot.readI32()); - struct.setStatusCodeIsSet(true); - BitSet incoming = iprot.readBitSet(4); - if (incoming.get(0)) { - { - org.apache.thrift.protocol.TList _list139 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.infoMessages = new ArrayList(_list139.size); - String _elem140; - for (int _i141 = 0; _i141 < _list139.size; ++_i141) - { - _elem140 = iprot.readString(); - struct.infoMessages.add(_elem140); - } - } - struct.setInfoMessagesIsSet(true); - } - if (incoming.get(1)) { - struct.sqlState = iprot.readString(); - struct.setSqlStateIsSet(true); - } - if (incoming.get(2)) { - struct.errorCode = iprot.readI32(); - struct.setErrorCodeIsSet(true); - } - if (incoming.get(3)) { - struct.errorMessage = iprot.readString(); - struct.setErrorMessageIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java deleted file mode 100644 index fbf14184fa9a8..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TStatusCode implements org.apache.thrift.TEnum { - SUCCESS_STATUS(0), - SUCCESS_WITH_INFO_STATUS(1), - STILL_EXECUTING_STATUS(2), - ERROR_STATUS(3), - INVALID_HANDLE_STATUS(4); - - private final int value; - - private TStatusCode(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TStatusCode findByValue(int value) { - switch (value) { - case 0: - return SUCCESS_STATUS; - case 1: - return SUCCESS_WITH_INFO_STATUS; - case 2: - return STILL_EXECUTING_STATUS; - case 3: - return ERROR_STATUS; - case 4: - return INVALID_HANDLE_STATUS; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java deleted file mode 100644 index c83a1fd0de3c2..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TStringColumn implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringColumn"); - - private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1); - private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStringColumnStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStringColumnTupleSchemeFactory()); - } - - private List values; // required - private ByteBuffer nulls; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUES((short)1, "values"), - NULLS((short)2, "nulls"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUES - return VALUES; - case 2: // NULLS - return NULLS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); - tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringColumn.class, metaDataMap); - } - - public TStringColumn() { - } - - public TStringColumn( - List values, - ByteBuffer nulls) - { - this(); - this.values = values; - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - /** - * Performs a deep copy on other. - */ - public TStringColumn(TStringColumn other) { - if (other.isSetValues()) { - List __this__values = new ArrayList(other.values); - this.values = __this__values; - } - if (other.isSetNulls()) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls); - } - } - - public TStringColumn deepCopy() { - return new TStringColumn(this); - } - - @Override - public void clear() { - this.values = null; - this.nulls = null; - } - - public int getValuesSize() { - return (this.values == null) ? 0 : this.values.size(); - } - - public java.util.Iterator getValuesIterator() { - return (this.values == null) ? null : this.values.iterator(); - } - - public void addToValues(String elem) { - if (this.values == null) { - this.values = new ArrayList(); - } - this.values.add(elem); - } - - public List getValues() { - return this.values; - } - - public void setValues(List values) { - this.values = values; - } - - public void unsetValues() { - this.values = null; - } - - /** Returns true if field values is set (has been assigned a value) and false otherwise */ - public boolean isSetValues() { - return this.values != null; - } - - public void setValuesIsSet(boolean value) { - if (!value) { - this.values = null; - } - } - - public byte[] getNulls() { - setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls)); - return nulls == null ? null : nulls.array(); - } - - public ByteBuffer bufferForNulls() { - return org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void setNulls(byte[] nulls) { - this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length)); - } - - public void setNulls(ByteBuffer nulls) { - this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls); - } - - public void unsetNulls() { - this.nulls = null; - } - - /** Returns true if field nulls is set (has been assigned a value) and false otherwise */ - public boolean isSetNulls() { - return this.nulls != null; - } - - public void setNullsIsSet(boolean value) { - if (!value) { - this.nulls = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUES: - if (value == null) { - unsetValues(); - } else { - setValues((List)value); - } - break; - - case NULLS: - if (value == null) { - unsetNulls(); - } else { - setNulls((ByteBuffer)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUES: - return getValues(); - - case NULLS: - return getNulls(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUES: - return isSetValues(); - case NULLS: - return isSetNulls(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStringColumn) - return this.equals((TStringColumn)that); - return false; - } - - public boolean equals(TStringColumn that) { - if (that == null) - return false; - - boolean this_present_values = true && this.isSetValues(); - boolean that_present_values = true && that.isSetValues(); - if (this_present_values || that_present_values) { - if (!(this_present_values && that_present_values)) - return false; - if (!this.values.equals(that.values)) - return false; - } - - boolean this_present_nulls = true && this.isSetNulls(); - boolean that_present_nulls = true && that.isSetNulls(); - if (this_present_nulls || that_present_nulls) { - if (!(this_present_nulls && that_present_nulls)) - return false; - if (!this.nulls.equals(that.nulls)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_values = true && (isSetValues()); - list.add(present_values); - if (present_values) - list.add(values); - - boolean present_nulls = true && (isSetNulls()); - list.add(present_nulls); - if (present_nulls) - list.add(nulls); - - return list.hashCode(); - } - - @Override - public int compareTo(TStringColumn other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValues()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNulls()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStringColumn("); - boolean first = true; - - sb.append("values:"); - if (this.values == null) { - sb.append("null"); - } else { - sb.append(this.values); - } - first = false; - if (!first) sb.append(", "); - sb.append("nulls:"); - if (this.nulls == null) { - sb.append("null"); - } else { - org.apache.thrift.TBaseHelper.toString(this.nulls, sb); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetValues()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString()); - } - - if (!isSetNulls()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStringColumnStandardSchemeFactory implements SchemeFactory { - public TStringColumnStandardScheme getScheme() { - return new TStringColumnStandardScheme(); - } - } - - private static class TStringColumnStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStringColumn struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list102 = iprot.readListBegin(); - struct.values = new ArrayList(_list102.size); - String _elem103; - for (int _i104 = 0; _i104 < _list102.size; ++_i104) - { - _elem103 = iprot.readString(); - struct.values.add(_elem103); - } - iprot.readListEnd(); - } - struct.setValuesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - case 2: // NULLS - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStringColumn struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.values != null) { - oprot.writeFieldBegin(VALUES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size())); - for (String _iter105 : struct.values) - { - oprot.writeString(_iter105); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - if (struct.nulls != null) { - oprot.writeFieldBegin(NULLS_FIELD_DESC); - oprot.writeBinary(struct.nulls); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStringColumnTupleSchemeFactory implements SchemeFactory { - public TStringColumnTupleScheme getScheme() { - return new TStringColumnTupleScheme(); - } - } - - private static class TStringColumnTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.values.size()); - for (String _iter106 : struct.values) - { - oprot.writeString(_iter106); - } - } - oprot.writeBinary(struct.nulls); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list107 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.values = new ArrayList(_list107.size); - String _elem108; - for (int _i109 = 0; _i109 < _list107.size; ++_i109) - { - _elem108 = iprot.readString(); - struct.values.add(_elem108); - } - } - struct.setValuesIsSet(true); - struct.nulls = iprot.readBinary(); - struct.setNullsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java deleted file mode 100644 index 13874e5516632..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java +++ /dev/null @@ -1,393 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TStringValue implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringValue"); - - private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.STRING, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStringValueStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStringValueTupleSchemeFactory()); - } - - private String value; // optional - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - VALUE((short)1, "value"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // VALUE - return VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final _Fields optionals[] = {_Fields.VALUE}; - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringValue.class, metaDataMap); - } - - public TStringValue() { - } - - /** - * Performs a deep copy on other. - */ - public TStringValue(TStringValue other) { - if (other.isSetValue()) { - this.value = other.value; - } - } - - public TStringValue deepCopy() { - return new TStringValue(this); - } - - @Override - public void clear() { - this.value = null; - } - - public String getValue() { - return this.value; - } - - public void setValue(String value) { - this.value = value; - } - - public void unsetValue() { - this.value = null; - } - - /** Returns true if field value is set (has been assigned a value) and false otherwise */ - public boolean isSetValue() { - return this.value != null; - } - - public void setValueIsSet(boolean value) { - if (!value) { - this.value = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case VALUE: - if (value == null) { - unsetValue(); - } else { - setValue((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case VALUE: - return getValue(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case VALUE: - return isSetValue(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStringValue) - return this.equals((TStringValue)that); - return false; - } - - public boolean equals(TStringValue that) { - if (that == null) - return false; - - boolean this_present_value = true && this.isSetValue(); - boolean that_present_value = true && that.isSetValue(); - if (this_present_value || that_present_value) { - if (!(this_present_value && that_present_value)) - return false; - if (!this.value.equals(that.value)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_value = true && (isSetValue()); - list.add(present_value); - if (present_value) - list.add(value); - - return list.hashCode(); - } - - @Override - public int compareTo(TStringValue other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetValue()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStringValue("); - boolean first = true; - - if (isSetValue()) { - sb.append("value:"); - if (this.value == null) { - sb.append("null"); - } else { - sb.append(this.value); - } - first = false; - } - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStringValueStandardSchemeFactory implements SchemeFactory { - public TStringValueStandardScheme getScheme() { - return new TStringValueStandardScheme(); - } - } - - private static class TStringValueStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStringValue struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // VALUE - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.value = iprot.readString(); - struct.setValueIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStringValue struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.value != null) { - if (struct.isSetValue()) { - oprot.writeFieldBegin(VALUE_FIELD_DESC); - oprot.writeString(struct.value); - oprot.writeFieldEnd(); - } - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStringValueTupleSchemeFactory implements SchemeFactory { - public TStringValueTupleScheme getScheme() { - return new TStringValueTupleScheme(); - } - } - - private static class TStringValueTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - BitSet optionals = new BitSet(); - if (struct.isSetValue()) { - optionals.set(0); - } - oprot.writeBitSet(optionals, 1); - if (struct.isSetValue()) { - oprot.writeString(struct.value); - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(1); - if (incoming.get(0)) { - struct.value = iprot.readString(); - struct.setValueIsSet(true); - } - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java deleted file mode 100644 index 6c2c4f5dd2ddf..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java +++ /dev/null @@ -1,452 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TStructTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStructTypeEntry"); - - private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TStructTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TStructTypeEntryTupleSchemeFactory()); - } - - private Map nameToTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - NAME_TO_TYPE_PTR((short)1, "nameToTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // NAME_TO_TYPE_PTR - return NAME_TO_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr")))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStructTypeEntry.class, metaDataMap); - } - - public TStructTypeEntry() { - } - - public TStructTypeEntry( - Map nameToTypePtr) - { - this(); - this.nameToTypePtr = nameToTypePtr; - } - - /** - * Performs a deep copy on other. - */ - public TStructTypeEntry(TStructTypeEntry other) { - if (other.isSetNameToTypePtr()) { - Map __this__nameToTypePtr = new HashMap(other.nameToTypePtr.size()); - for (Map.Entry other_element : other.nameToTypePtr.entrySet()) { - - String other_element_key = other_element.getKey(); - Integer other_element_value = other_element.getValue(); - - String __this__nameToTypePtr_copy_key = other_element_key; - - Integer __this__nameToTypePtr_copy_value = other_element_value; - - __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value); - } - this.nameToTypePtr = __this__nameToTypePtr; - } - } - - public TStructTypeEntry deepCopy() { - return new TStructTypeEntry(this); - } - - @Override - public void clear() { - this.nameToTypePtr = null; - } - - public int getNameToTypePtrSize() { - return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size(); - } - - public void putToNameToTypePtr(String key, int val) { - if (this.nameToTypePtr == null) { - this.nameToTypePtr = new HashMap(); - } - this.nameToTypePtr.put(key, val); - } - - public Map getNameToTypePtr() { - return this.nameToTypePtr; - } - - public void setNameToTypePtr(Map nameToTypePtr) { - this.nameToTypePtr = nameToTypePtr; - } - - public void unsetNameToTypePtr() { - this.nameToTypePtr = null; - } - - /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetNameToTypePtr() { - return this.nameToTypePtr != null; - } - - public void setNameToTypePtrIsSet(boolean value) { - if (!value) { - this.nameToTypePtr = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case NAME_TO_TYPE_PTR: - if (value == null) { - unsetNameToTypePtr(); - } else { - setNameToTypePtr((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case NAME_TO_TYPE_PTR: - return getNameToTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case NAME_TO_TYPE_PTR: - return isSetNameToTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TStructTypeEntry) - return this.equals((TStructTypeEntry)that); - return false; - } - - public boolean equals(TStructTypeEntry that) { - if (that == null) - return false; - - boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr(); - boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr(); - if (this_present_nameToTypePtr || that_present_nameToTypePtr) { - if (!(this_present_nameToTypePtr && that_present_nameToTypePtr)) - return false; - if (!this.nameToTypePtr.equals(that.nameToTypePtr)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_nameToTypePtr = true && (isSetNameToTypePtr()); - list.add(present_nameToTypePtr); - if (present_nameToTypePtr) - list.add(nameToTypePtr); - - return list.hashCode(); - } - - @Override - public int compareTo(TStructTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(other.isSetNameToTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNameToTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, other.nameToTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TStructTypeEntry("); - boolean first = true; - - sb.append("nameToTypePtr:"); - if (this.nameToTypePtr == null) { - sb.append("null"); - } else { - sb.append(this.nameToTypePtr); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetNameToTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TStructTypeEntryStandardSchemeFactory implements SchemeFactory { - public TStructTypeEntryStandardScheme getScheme() { - return new TStructTypeEntryStandardScheme(); - } - } - - private static class TStructTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TStructTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // NAME_TO_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map10 = iprot.readMapBegin(); - struct.nameToTypePtr = new HashMap(2*_map10.size); - String _key11; - int _val12; - for (int _i13 = 0; _i13 < _map10.size; ++_i13) - { - _key11 = iprot.readString(); - _val12 = iprot.readI32(); - struct.nameToTypePtr.put(_key11, _val12); - } - iprot.readMapEnd(); - } - struct.setNameToTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TStructTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.nameToTypePtr != null) { - oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size())); - for (Map.Entry _iter14 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter14.getKey()); - oprot.writeI32(_iter14.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TStructTypeEntryTupleSchemeFactory implements SchemeFactory { - public TStructTypeEntryTupleScheme getScheme() { - return new TStructTypeEntryTupleScheme(); - } - } - - private static class TStructTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.nameToTypePtr.size()); - for (Map.Entry _iter15 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter15.getKey()); - oprot.writeI32(_iter15.getValue()); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map16 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.nameToTypePtr = new HashMap(2*_map16.size); - String _key17; - int _val18; - for (int _i19 = 0; _i19 < _map16.size; ++_i19) - { - _key17 = iprot.readString(); - _val18 = iprot.readI32(); - struct.nameToTypePtr.put(_key17, _val18); - } - } - struct.setNameToTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java deleted file mode 100644 index 007b1603546ac..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java +++ /dev/null @@ -1,443 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TTableSchema implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTableSchema"); - - private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTableSchemaStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTableSchemaTupleSchemeFactory()); - } - - private List columns; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COLUMNS((short)1, "columns"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // COLUMNS - return COLUMNS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnDesc.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTableSchema.class, metaDataMap); - } - - public TTableSchema() { - } - - public TTableSchema( - List columns) - { - this(); - this.columns = columns; - } - - /** - * Performs a deep copy on other. - */ - public TTableSchema(TTableSchema other) { - if (other.isSetColumns()) { - List __this__columns = new ArrayList(other.columns.size()); - for (TColumnDesc other_element : other.columns) { - __this__columns.add(new TColumnDesc(other_element)); - } - this.columns = __this__columns; - } - } - - public TTableSchema deepCopy() { - return new TTableSchema(this); - } - - @Override - public void clear() { - this.columns = null; - } - - public int getColumnsSize() { - return (this.columns == null) ? 0 : this.columns.size(); - } - - public java.util.Iterator getColumnsIterator() { - return (this.columns == null) ? null : this.columns.iterator(); - } - - public void addToColumns(TColumnDesc elem) { - if (this.columns == null) { - this.columns = new ArrayList(); - } - this.columns.add(elem); - } - - public List getColumns() { - return this.columns; - } - - public void setColumns(List columns) { - this.columns = columns; - } - - public void unsetColumns() { - this.columns = null; - } - - /** Returns true if field columns is set (has been assigned a value) and false otherwise */ - public boolean isSetColumns() { - return this.columns != null; - } - - public void setColumnsIsSet(boolean value) { - if (!value) { - this.columns = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case COLUMNS: - if (value == null) { - unsetColumns(); - } else { - setColumns((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case COLUMNS: - return getColumns(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case COLUMNS: - return isSetColumns(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTableSchema) - return this.equals((TTableSchema)that); - return false; - } - - public boolean equals(TTableSchema that) { - if (that == null) - return false; - - boolean this_present_columns = true && this.isSetColumns(); - boolean that_present_columns = true && that.isSetColumns(); - if (this_present_columns || that_present_columns) { - if (!(this_present_columns && that_present_columns)) - return false; - if (!this.columns.equals(that.columns)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_columns = true && (isSetColumns()); - list.add(present_columns); - if (present_columns) - list.add(columns); - - return list.hashCode(); - } - - @Override - public int compareTo(TTableSchema other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetColumns()).compareTo(other.isSetColumns()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetColumns()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, other.columns); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTableSchema("); - boolean first = true; - - sb.append("columns:"); - if (this.columns == null) { - sb.append("null"); - } else { - sb.append(this.columns); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetColumns()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'columns' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTableSchemaStandardSchemeFactory implements SchemeFactory { - public TTableSchemaStandardScheme getScheme() { - return new TTableSchemaStandardScheme(); - } - } - - private static class TTableSchemaStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTableSchema struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // COLUMNS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list38 = iprot.readListBegin(); - struct.columns = new ArrayList(_list38.size); - TColumnDesc _elem39; - for (int _i40 = 0; _i40 < _list38.size; ++_i40) - { - _elem39 = new TColumnDesc(); - _elem39.read(iprot); - struct.columns.add(_elem39); - } - iprot.readListEnd(); - } - struct.setColumnsIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTableSchema struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.columns != null) { - oprot.writeFieldBegin(COLUMNS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size())); - for (TColumnDesc _iter41 : struct.columns) - { - _iter41.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTableSchemaTupleSchemeFactory implements SchemeFactory { - public TTableSchemaTupleScheme getScheme() { - return new TTableSchemaTupleScheme(); - } - } - - private static class TTableSchemaTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.columns.size()); - for (TColumnDesc _iter42 : struct.columns) - { - _iter42.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list43 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.columns = new ArrayList(_list43.size); - TColumnDesc _elem44; - for (int _i45 = 0; _i45 < _list43.size; ++_i45) - { - _elem44 = new TColumnDesc(); - _elem44.read(iprot); - struct.columns.add(_elem44); - } - } - struct.setColumnsIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java deleted file mode 100644 index 055a14d06a2d6..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java +++ /dev/null @@ -1,443 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TTypeDesc implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeDesc"); - - private static final org.apache.thrift.protocol.TField TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("types", org.apache.thrift.protocol.TType.LIST, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTypeDescStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTypeDescTupleSchemeFactory()); - } - - private List types; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - TYPES((short)1, "types"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPES - return TYPES; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPES, new org.apache.thrift.meta_data.FieldMetaData("types", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeEntry.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeDesc.class, metaDataMap); - } - - public TTypeDesc() { - } - - public TTypeDesc( - List types) - { - this(); - this.types = types; - } - - /** - * Performs a deep copy on other. - */ - public TTypeDesc(TTypeDesc other) { - if (other.isSetTypes()) { - List __this__types = new ArrayList(other.types.size()); - for (TTypeEntry other_element : other.types) { - __this__types.add(new TTypeEntry(other_element)); - } - this.types = __this__types; - } - } - - public TTypeDesc deepCopy() { - return new TTypeDesc(this); - } - - @Override - public void clear() { - this.types = null; - } - - public int getTypesSize() { - return (this.types == null) ? 0 : this.types.size(); - } - - public java.util.Iterator getTypesIterator() { - return (this.types == null) ? null : this.types.iterator(); - } - - public void addToTypes(TTypeEntry elem) { - if (this.types == null) { - this.types = new ArrayList(); - } - this.types.add(elem); - } - - public List getTypes() { - return this.types; - } - - public void setTypes(List types) { - this.types = types; - } - - public void unsetTypes() { - this.types = null; - } - - /** Returns true if field types is set (has been assigned a value) and false otherwise */ - public boolean isSetTypes() { - return this.types != null; - } - - public void setTypesIsSet(boolean value) { - if (!value) { - this.types = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPES: - if (value == null) { - unsetTypes(); - } else { - setTypes((List)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPES: - return getTypes(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPES: - return isSetTypes(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTypeDesc) - return this.equals((TTypeDesc)that); - return false; - } - - public boolean equals(TTypeDesc that) { - if (that == null) - return false; - - boolean this_present_types = true && this.isSetTypes(); - boolean that_present_types = true && that.isSetTypes(); - if (this_present_types || that_present_types) { - if (!(this_present_types && that_present_types)) - return false; - if (!this.types.equals(that.types)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_types = true && (isSetTypes()); - list.add(present_types); - if (present_types) - list.add(types); - - return list.hashCode(); - } - - @Override - public int compareTo(TTypeDesc other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetTypes()).compareTo(other.isSetTypes()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypes()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.types, other.types); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTypeDesc("); - boolean first = true; - - sb.append("types:"); - if (this.types == null) { - sb.append("null"); - } else { - sb.append(this.types); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetTypes()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'types' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTypeDescStandardSchemeFactory implements SchemeFactory { - public TTypeDescStandardScheme getScheme() { - return new TTypeDescStandardScheme(); - } - } - - private static class TTypeDescStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeDesc struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPES - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list30 = iprot.readListBegin(); - struct.types = new ArrayList(_list30.size); - TTypeEntry _elem31; - for (int _i32 = 0; _i32 < _list30.size; ++_i32) - { - _elem31 = new TTypeEntry(); - _elem31.read(iprot); - struct.types.add(_elem31); - } - iprot.readListEnd(); - } - struct.setTypesIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeDesc struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.types != null) { - oprot.writeFieldBegin(TYPES_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.types.size())); - for (TTypeEntry _iter33 : struct.types) - { - _iter33.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTypeDescTupleSchemeFactory implements SchemeFactory { - public TTypeDescTupleScheme getScheme() { - return new TTypeDescTupleScheme(); - } - } - - private static class TTypeDescTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.types.size()); - for (TTypeEntry _iter34 : struct.types) - { - _iter34.write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TList _list35 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.types = new ArrayList(_list35.size); - TTypeEntry _elem36; - for (int _i37 = 0; _i37 < _list35.size; ++_i37) - { - _elem36 = new TTypeEntry(); - _elem36.read(iprot); - struct.types.add(_elem36); - } - } - struct.setTypesIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java deleted file mode 100644 index b609151b8fbee..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java +++ /dev/null @@ -1,614 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TTypeEntry extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeEntry"); - private static final org.apache.thrift.protocol.TField PRIMITIVE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("primitiveEntry", org.apache.thrift.protocol.TType.STRUCT, (short)1); - private static final org.apache.thrift.protocol.TField ARRAY_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("arrayEntry", org.apache.thrift.protocol.TType.STRUCT, (short)2); - private static final org.apache.thrift.protocol.TField MAP_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("mapEntry", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField STRUCT_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("structEntry", org.apache.thrift.protocol.TType.STRUCT, (short)4); - private static final org.apache.thrift.protocol.TField UNION_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("unionEntry", org.apache.thrift.protocol.TType.STRUCT, (short)5); - private static final org.apache.thrift.protocol.TField USER_DEFINED_TYPE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("userDefinedTypeEntry", org.apache.thrift.protocol.TType.STRUCT, (short)6); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - PRIMITIVE_ENTRY((short)1, "primitiveEntry"), - ARRAY_ENTRY((short)2, "arrayEntry"), - MAP_ENTRY((short)3, "mapEntry"), - STRUCT_ENTRY((short)4, "structEntry"), - UNION_ENTRY((short)5, "unionEntry"), - USER_DEFINED_TYPE_ENTRY((short)6, "userDefinedTypeEntry"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // PRIMITIVE_ENTRY - return PRIMITIVE_ENTRY; - case 2: // ARRAY_ENTRY - return ARRAY_ENTRY; - case 3: // MAP_ENTRY - return MAP_ENTRY; - case 4: // STRUCT_ENTRY - return STRUCT_ENTRY; - case 5: // UNION_ENTRY - return UNION_ENTRY; - case 6: // USER_DEFINED_TYPE_ENTRY - return USER_DEFINED_TYPE_ENTRY; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.PRIMITIVE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("primitiveEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TPrimitiveTypeEntry.class))); - tmpMap.put(_Fields.ARRAY_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("arrayEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TArrayTypeEntry.class))); - tmpMap.put(_Fields.MAP_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("mapEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TMapTypeEntry.class))); - tmpMap.put(_Fields.STRUCT_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("structEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStructTypeEntry.class))); - tmpMap.put(_Fields.UNION_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("unionEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUnionTypeEntry.class))); - tmpMap.put(_Fields.USER_DEFINED_TYPE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("userDefinedTypeEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUserDefinedTypeEntry.class))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeEntry.class, metaDataMap); - } - - public TTypeEntry() { - super(); - } - - public TTypeEntry(TTypeEntry._Fields setField, Object value) { - super(setField, value); - } - - public TTypeEntry(TTypeEntry other) { - super(other); - } - public TTypeEntry deepCopy() { - return new TTypeEntry(this); - } - - public static TTypeEntry primitiveEntry(TPrimitiveTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setPrimitiveEntry(value); - return x; - } - - public static TTypeEntry arrayEntry(TArrayTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setArrayEntry(value); - return x; - } - - public static TTypeEntry mapEntry(TMapTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setMapEntry(value); - return x; - } - - public static TTypeEntry structEntry(TStructTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setStructEntry(value); - return x; - } - - public static TTypeEntry unionEntry(TUnionTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setUnionEntry(value); - return x; - } - - public static TTypeEntry userDefinedTypeEntry(TUserDefinedTypeEntry value) { - TTypeEntry x = new TTypeEntry(); - x.setUserDefinedTypeEntry(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case PRIMITIVE_ENTRY: - if (value instanceof TPrimitiveTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TPrimitiveTypeEntry for field 'primitiveEntry', but got " + value.getClass().getSimpleName()); - case ARRAY_ENTRY: - if (value instanceof TArrayTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TArrayTypeEntry for field 'arrayEntry', but got " + value.getClass().getSimpleName()); - case MAP_ENTRY: - if (value instanceof TMapTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TMapTypeEntry for field 'mapEntry', but got " + value.getClass().getSimpleName()); - case STRUCT_ENTRY: - if (value instanceof TStructTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TStructTypeEntry for field 'structEntry', but got " + value.getClass().getSimpleName()); - case UNION_ENTRY: - if (value instanceof TUnionTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TUnionTypeEntry for field 'unionEntry', but got " + value.getClass().getSimpleName()); - case USER_DEFINED_TYPE_ENTRY: - if (value instanceof TUserDefinedTypeEntry) { - break; - } - throw new ClassCastException("Was expecting value of type TUserDefinedTypeEntry for field 'userDefinedTypeEntry', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case PRIMITIVE_ENTRY: - if (field.type == PRIMITIVE_ENTRY_FIELD_DESC.type) { - TPrimitiveTypeEntry primitiveEntry; - primitiveEntry = new TPrimitiveTypeEntry(); - primitiveEntry.read(iprot); - return primitiveEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case ARRAY_ENTRY: - if (field.type == ARRAY_ENTRY_FIELD_DESC.type) { - TArrayTypeEntry arrayEntry; - arrayEntry = new TArrayTypeEntry(); - arrayEntry.read(iprot); - return arrayEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case MAP_ENTRY: - if (field.type == MAP_ENTRY_FIELD_DESC.type) { - TMapTypeEntry mapEntry; - mapEntry = new TMapTypeEntry(); - mapEntry.read(iprot); - return mapEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRUCT_ENTRY: - if (field.type == STRUCT_ENTRY_FIELD_DESC.type) { - TStructTypeEntry structEntry; - structEntry = new TStructTypeEntry(); - structEntry.read(iprot); - return structEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case UNION_ENTRY: - if (field.type == UNION_ENTRY_FIELD_DESC.type) { - TUnionTypeEntry unionEntry; - unionEntry = new TUnionTypeEntry(); - unionEntry.read(iprot); - return unionEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case USER_DEFINED_TYPE_ENTRY: - if (field.type == USER_DEFINED_TYPE_ENTRY_FIELD_DESC.type) { - TUserDefinedTypeEntry userDefinedTypeEntry; - userDefinedTypeEntry = new TUserDefinedTypeEntry(); - userDefinedTypeEntry.read(iprot); - return userDefinedTypeEntry; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_; - primitiveEntry.write(oprot); - return; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_; - arrayEntry.write(oprot); - return; - case MAP_ENTRY: - TMapTypeEntry mapEntry = (TMapTypeEntry)value_; - mapEntry.write(oprot); - return; - case STRUCT_ENTRY: - TStructTypeEntry structEntry = (TStructTypeEntry)value_; - structEntry.write(oprot); - return; - case UNION_ENTRY: - TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_; - unionEntry.write(oprot); - return; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_; - userDefinedTypeEntry.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry; - primitiveEntry = new TPrimitiveTypeEntry(); - primitiveEntry.read(iprot); - return primitiveEntry; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry; - arrayEntry = new TArrayTypeEntry(); - arrayEntry.read(iprot); - return arrayEntry; - case MAP_ENTRY: - TMapTypeEntry mapEntry; - mapEntry = new TMapTypeEntry(); - mapEntry.read(iprot); - return mapEntry; - case STRUCT_ENTRY: - TStructTypeEntry structEntry; - structEntry = new TStructTypeEntry(); - structEntry.read(iprot); - return structEntry; - case UNION_ENTRY: - TUnionTypeEntry unionEntry; - unionEntry = new TUnionTypeEntry(); - unionEntry.read(iprot); - return unionEntry; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry; - userDefinedTypeEntry = new TUserDefinedTypeEntry(); - userDefinedTypeEntry.read(iprot); - return userDefinedTypeEntry; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case PRIMITIVE_ENTRY: - TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_; - primitiveEntry.write(oprot); - return; - case ARRAY_ENTRY: - TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_; - arrayEntry.write(oprot); - return; - case MAP_ENTRY: - TMapTypeEntry mapEntry = (TMapTypeEntry)value_; - mapEntry.write(oprot); - return; - case STRUCT_ENTRY: - TStructTypeEntry structEntry = (TStructTypeEntry)value_; - structEntry.write(oprot); - return; - case UNION_ENTRY: - TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_; - unionEntry.write(oprot); - return; - case USER_DEFINED_TYPE_ENTRY: - TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_; - userDefinedTypeEntry.write(oprot); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case PRIMITIVE_ENTRY: - return PRIMITIVE_ENTRY_FIELD_DESC; - case ARRAY_ENTRY: - return ARRAY_ENTRY_FIELD_DESC; - case MAP_ENTRY: - return MAP_ENTRY_FIELD_DESC; - case STRUCT_ENTRY: - return STRUCT_ENTRY_FIELD_DESC; - case UNION_ENTRY: - return UNION_ENTRY_FIELD_DESC; - case USER_DEFINED_TYPE_ENTRY: - return USER_DEFINED_TYPE_ENTRY_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public TPrimitiveTypeEntry getPrimitiveEntry() { - if (getSetField() == _Fields.PRIMITIVE_ENTRY) { - return (TPrimitiveTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'primitiveEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setPrimitiveEntry(TPrimitiveTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.PRIMITIVE_ENTRY; - value_ = value; - } - - public TArrayTypeEntry getArrayEntry() { - if (getSetField() == _Fields.ARRAY_ENTRY) { - return (TArrayTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'arrayEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setArrayEntry(TArrayTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.ARRAY_ENTRY; - value_ = value; - } - - public TMapTypeEntry getMapEntry() { - if (getSetField() == _Fields.MAP_ENTRY) { - return (TMapTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'mapEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setMapEntry(TMapTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.MAP_ENTRY; - value_ = value; - } - - public TStructTypeEntry getStructEntry() { - if (getSetField() == _Fields.STRUCT_ENTRY) { - return (TStructTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'structEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStructEntry(TStructTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRUCT_ENTRY; - value_ = value; - } - - public TUnionTypeEntry getUnionEntry() { - if (getSetField() == _Fields.UNION_ENTRY) { - return (TUnionTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'unionEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setUnionEntry(TUnionTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.UNION_ENTRY; - value_ = value; - } - - public TUserDefinedTypeEntry getUserDefinedTypeEntry() { - if (getSetField() == _Fields.USER_DEFINED_TYPE_ENTRY) { - return (TUserDefinedTypeEntry)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'userDefinedTypeEntry' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setUserDefinedTypeEntry(TUserDefinedTypeEntry value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.USER_DEFINED_TYPE_ENTRY; - value_ = value; - } - - public boolean isSetPrimitiveEntry() { - return setField_ == _Fields.PRIMITIVE_ENTRY; - } - - - public boolean isSetArrayEntry() { - return setField_ == _Fields.ARRAY_ENTRY; - } - - - public boolean isSetMapEntry() { - return setField_ == _Fields.MAP_ENTRY; - } - - - public boolean isSetStructEntry() { - return setField_ == _Fields.STRUCT_ENTRY; - } - - - public boolean isSetUnionEntry() { - return setField_ == _Fields.UNION_ENTRY; - } - - - public boolean isSetUserDefinedTypeEntry() { - return setField_ == _Fields.USER_DEFINED_TYPE_ENTRY; - } - - - public boolean equals(Object other) { - if (other instanceof TTypeEntry) { - return equals((TTypeEntry)other); - } else { - return false; - } - } - - public boolean equals(TTypeEntry other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TTypeEntry other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - List list = new ArrayList(); - list.add(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - list.add(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - list.add(value); - } - } - return list.hashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java deleted file mode 100644 index a3735ebf3ec07..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - - -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - -public enum TTypeId implements org.apache.thrift.TEnum { - BOOLEAN_TYPE(0), - TINYINT_TYPE(1), - SMALLINT_TYPE(2), - INT_TYPE(3), - BIGINT_TYPE(4), - FLOAT_TYPE(5), - DOUBLE_TYPE(6), - STRING_TYPE(7), - TIMESTAMP_TYPE(8), - BINARY_TYPE(9), - ARRAY_TYPE(10), - MAP_TYPE(11), - STRUCT_TYPE(12), - UNION_TYPE(13), - USER_DEFINED_TYPE(14), - DECIMAL_TYPE(15), - NULL_TYPE(16), - DATE_TYPE(17), - VARCHAR_TYPE(18), - CHAR_TYPE(19), - INTERVAL_YEAR_MONTH_TYPE(20), - INTERVAL_DAY_TIME_TYPE(21); - - private final int value; - - private TTypeId(int value) { - this.value = value; - } - - /** - * Get the integer value of this enum value, as defined in the Thrift IDL. - */ - public int getValue() { - return value; - } - - /** - * Find a the enum type by its integer value, as defined in the Thrift IDL. - * @return null if the value is not found. - */ - public static TTypeId findByValue(int value) { - switch (value) { - case 0: - return BOOLEAN_TYPE; - case 1: - return TINYINT_TYPE; - case 2: - return SMALLINT_TYPE; - case 3: - return INT_TYPE; - case 4: - return BIGINT_TYPE; - case 5: - return FLOAT_TYPE; - case 6: - return DOUBLE_TYPE; - case 7: - return STRING_TYPE; - case 8: - return TIMESTAMP_TYPE; - case 9: - return BINARY_TYPE; - case 10: - return ARRAY_TYPE; - case 11: - return MAP_TYPE; - case 12: - return STRUCT_TYPE; - case 13: - return UNION_TYPE; - case 14: - return USER_DEFINED_TYPE; - case 15: - return DECIMAL_TYPE; - case 16: - return NULL_TYPE; - case 17: - return DATE_TYPE; - case 18: - return VARCHAR_TYPE; - case 19: - return CHAR_TYPE; - case 20: - return INTERVAL_YEAR_MONTH_TYPE; - case 21: - return INTERVAL_DAY_TIME_TYPE; - default: - return null; - } - } -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java deleted file mode 100644 index 1720c0e9a72c2..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java +++ /dev/null @@ -1,365 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -public class TTypeQualifierValue extends org.apache.thrift.TUnion { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifierValue"); - private static final org.apache.thrift.protocol.TField I32_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Value", org.apache.thrift.protocol.TType.I32, (short)1); - private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)2); - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - I32_VALUE((short)1, "i32Value"), - STRING_VALUE((short)2, "stringValue"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // I32_VALUE - return I32_VALUE; - case 2: // STRING_VALUE - return STRING_VALUE; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.I32_VALUE, new org.apache.thrift.meta_data.FieldMetaData("i32Value", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); - tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifierValue.class, metaDataMap); - } - - public TTypeQualifierValue() { - super(); - } - - public TTypeQualifierValue(TTypeQualifierValue._Fields setField, Object value) { - super(setField, value); - } - - public TTypeQualifierValue(TTypeQualifierValue other) { - super(other); - } - public TTypeQualifierValue deepCopy() { - return new TTypeQualifierValue(this); - } - - public static TTypeQualifierValue i32Value(int value) { - TTypeQualifierValue x = new TTypeQualifierValue(); - x.setI32Value(value); - return x; - } - - public static TTypeQualifierValue stringValue(String value) { - TTypeQualifierValue x = new TTypeQualifierValue(); - x.setStringValue(value); - return x; - } - - - @Override - protected void checkType(_Fields setField, Object value) throws ClassCastException { - switch (setField) { - case I32_VALUE: - if (value instanceof Integer) { - break; - } - throw new ClassCastException("Was expecting value of type Integer for field 'i32Value', but got " + value.getClass().getSimpleName()); - case STRING_VALUE: - if (value instanceof String) { - break; - } - throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName()); - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(field.id); - if (setField != null) { - switch (setField) { - case I32_VALUE: - if (field.type == I32_VALUE_FIELD_DESC.type) { - Integer i32Value; - i32Value = iprot.readI32(); - return i32Value; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - case STRING_VALUE: - if (field.type == STRING_VALUE_FIELD_DESC.type) { - String stringValue; - stringValue = iprot.readString(); - return stringValue; - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - return null; - } - } - - @Override - protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case I32_VALUE: - Integer i32Value = (Integer)value_; - oprot.writeI32(i32Value); - return; - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException { - _Fields setField = _Fields.findByThriftId(fieldID); - if (setField != null) { - switch (setField) { - case I32_VALUE: - Integer i32Value; - i32Value = iprot.readI32(); - return i32Value; - case STRING_VALUE: - String stringValue; - stringValue = iprot.readString(); - return stringValue; - default: - throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); - } - } else { - throw new TProtocolException("Couldn't find a field with field id " + fieldID); - } - } - - @Override - protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - switch (setField_) { - case I32_VALUE: - Integer i32Value = (Integer)value_; - oprot.writeI32(i32Value); - return; - case STRING_VALUE: - String stringValue = (String)value_; - oprot.writeString(stringValue); - return; - default: - throw new IllegalStateException("Cannot write union with unknown field " + setField_); - } - } - - @Override - protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) { - switch (setField) { - case I32_VALUE: - return I32_VALUE_FIELD_DESC; - case STRING_VALUE: - return STRING_VALUE_FIELD_DESC; - default: - throw new IllegalArgumentException("Unknown field id " + setField); - } - } - - @Override - protected org.apache.thrift.protocol.TStruct getStructDesc() { - return STRUCT_DESC; - } - - @Override - protected _Fields enumForId(short id) { - return _Fields.findByThriftIdOrThrow(id); - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - - public int getI32Value() { - if (getSetField() == _Fields.I32_VALUE) { - return (Integer)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'i32Value' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setI32Value(int value) { - setField_ = _Fields.I32_VALUE; - value_ = value; - } - - public String getStringValue() { - if (getSetField() == _Fields.STRING_VALUE) { - return (String)getFieldValue(); - } else { - throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name); - } - } - - public void setStringValue(String value) { - if (value == null) throw new NullPointerException(); - setField_ = _Fields.STRING_VALUE; - value_ = value; - } - - public boolean isSetI32Value() { - return setField_ == _Fields.I32_VALUE; - } - - - public boolean isSetStringValue() { - return setField_ == _Fields.STRING_VALUE; - } - - - public boolean equals(Object other) { - if (other instanceof TTypeQualifierValue) { - return equals((TTypeQualifierValue)other); - } else { - return false; - } - } - - public boolean equals(TTypeQualifierValue other) { - return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue()); - } - - @Override - public int compareTo(TTypeQualifierValue other) { - int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField()); - if (lastComparison == 0) { - return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue()); - } - return lastComparison; - } - - - @Override - public int hashCode() { - List list = new ArrayList(); - list.add(this.getClass().getName()); - org.apache.thrift.TFieldIdEnum setField = getSetField(); - if (setField != null) { - list.add(setField.getThriftFieldId()); - Object value = getFieldValue(); - if (value instanceof org.apache.thrift.TEnum) { - list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue()); - } else { - list.add(value); - } - } - return list.hashCode(); - } - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - -} diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java deleted file mode 100644 index f46d2ceb79caa..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java +++ /dev/null @@ -1,454 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TTypeQualifiers implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers"); - - private static final org.apache.thrift.protocol.TField QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("qualifiers", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory()); - } - - private Map qualifiers; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - QUALIFIERS((short)1, "qualifiers"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // QUALIFIERS - return QUALIFIERS; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("qualifiers", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifierValue.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap); - } - - public TTypeQualifiers() { - } - - public TTypeQualifiers( - Map qualifiers) - { - this(); - this.qualifiers = qualifiers; - } - - /** - * Performs a deep copy on other. - */ - public TTypeQualifiers(TTypeQualifiers other) { - if (other.isSetQualifiers()) { - Map __this__qualifiers = new HashMap(other.qualifiers.size()); - for (Map.Entry other_element : other.qualifiers.entrySet()) { - - String other_element_key = other_element.getKey(); - TTypeQualifierValue other_element_value = other_element.getValue(); - - String __this__qualifiers_copy_key = other_element_key; - - TTypeQualifierValue __this__qualifiers_copy_value = new TTypeQualifierValue(other_element_value); - - __this__qualifiers.put(__this__qualifiers_copy_key, __this__qualifiers_copy_value); - } - this.qualifiers = __this__qualifiers; - } - } - - public TTypeQualifiers deepCopy() { - return new TTypeQualifiers(this); - } - - @Override - public void clear() { - this.qualifiers = null; - } - - public int getQualifiersSize() { - return (this.qualifiers == null) ? 0 : this.qualifiers.size(); - } - - public void putToQualifiers(String key, TTypeQualifierValue val) { - if (this.qualifiers == null) { - this.qualifiers = new HashMap(); - } - this.qualifiers.put(key, val); - } - - public Map getQualifiers() { - return this.qualifiers; - } - - public void setQualifiers(Map qualifiers) { - this.qualifiers = qualifiers; - } - - public void unsetQualifiers() { - this.qualifiers = null; - } - - /** Returns true if field qualifiers is set (has been assigned a value) and false otherwise */ - public boolean isSetQualifiers() { - return this.qualifiers != null; - } - - public void setQualifiersIsSet(boolean value) { - if (!value) { - this.qualifiers = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case QUALIFIERS: - if (value == null) { - unsetQualifiers(); - } else { - setQualifiers((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case QUALIFIERS: - return getQualifiers(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case QUALIFIERS: - return isSetQualifiers(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TTypeQualifiers) - return this.equals((TTypeQualifiers)that); - return false; - } - - public boolean equals(TTypeQualifiers that) { - if (that == null) - return false; - - boolean this_present_qualifiers = true && this.isSetQualifiers(); - boolean that_present_qualifiers = true && that.isSetQualifiers(); - if (this_present_qualifiers || that_present_qualifiers) { - if (!(this_present_qualifiers && that_present_qualifiers)) - return false; - if (!this.qualifiers.equals(that.qualifiers)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_qualifiers = true && (isSetQualifiers()); - list.add(present_qualifiers); - if (present_qualifiers) - list.add(qualifiers); - - return list.hashCode(); - } - - @Override - public int compareTo(TTypeQualifiers other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetQualifiers()).compareTo(other.isSetQualifiers()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetQualifiers()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.qualifiers, other.qualifiers); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TTypeQualifiers("); - boolean first = true; - - sb.append("qualifiers:"); - if (this.qualifiers == null) { - sb.append("null"); - } else { - sb.append(this.qualifiers); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetQualifiers()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'qualifiers' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory { - public TTypeQualifiersStandardScheme getScheme() { - return new TTypeQualifiersStandardScheme(); - } - } - - private static class TTypeQualifiersStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // QUALIFIERS - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map0 = iprot.readMapBegin(); - struct.qualifiers = new HashMap(2*_map0.size); - String _key1; - TTypeQualifierValue _val2; - for (int _i3 = 0; _i3 < _map0.size; ++_i3) - { - _key1 = iprot.readString(); - _val2 = new TTypeQualifierValue(); - _val2.read(iprot); - struct.qualifiers.put(_key1, _val2); - } - iprot.readMapEnd(); - } - struct.setQualifiersIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.qualifiers != null) { - oprot.writeFieldBegin(QUALIFIERS_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, struct.qualifiers.size())); - for (Map.Entry _iter4 : struct.qualifiers.entrySet()) - { - oprot.writeString(_iter4.getKey()); - _iter4.getValue().write(oprot); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory { - public TTypeQualifiersTupleScheme getScheme() { - return new TTypeQualifiersTupleScheme(); - } - } - - private static class TTypeQualifiersTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.qualifiers.size()); - for (Map.Entry _iter5 : struct.qualifiers.entrySet()) - { - oprot.writeString(_iter5.getKey()); - _iter5.getValue().write(oprot); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map6 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32()); - struct.qualifiers = new HashMap(2*_map6.size); - String _key7; - TTypeQualifierValue _val8; - for (int _i9 = 0; _i9 < _map6.size; ++_i9) - { - _key7 = iprot.readString(); - _val8 = new TTypeQualifierValue(); - _val8.read(iprot); - struct.qualifiers.put(_key7, _val8); - } - } - struct.setQualifiersIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java deleted file mode 100644 index d53f74cb8eff1..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java +++ /dev/null @@ -1,452 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TUnionTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUnionTypeEntry"); - - private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TUnionTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TUnionTypeEntryTupleSchemeFactory()); - } - - private Map nameToTypePtr; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - NAME_TO_TYPE_PTR((short)1, "nameToTypePtr"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // NAME_TO_TYPE_PTR - return NAME_TO_TYPE_PTR; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32 , "TTypeEntryPtr")))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUnionTypeEntry.class, metaDataMap); - } - - public TUnionTypeEntry() { - } - - public TUnionTypeEntry( - Map nameToTypePtr) - { - this(); - this.nameToTypePtr = nameToTypePtr; - } - - /** - * Performs a deep copy on other. - */ - public TUnionTypeEntry(TUnionTypeEntry other) { - if (other.isSetNameToTypePtr()) { - Map __this__nameToTypePtr = new HashMap(other.nameToTypePtr.size()); - for (Map.Entry other_element : other.nameToTypePtr.entrySet()) { - - String other_element_key = other_element.getKey(); - Integer other_element_value = other_element.getValue(); - - String __this__nameToTypePtr_copy_key = other_element_key; - - Integer __this__nameToTypePtr_copy_value = other_element_value; - - __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value); - } - this.nameToTypePtr = __this__nameToTypePtr; - } - } - - public TUnionTypeEntry deepCopy() { - return new TUnionTypeEntry(this); - } - - @Override - public void clear() { - this.nameToTypePtr = null; - } - - public int getNameToTypePtrSize() { - return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size(); - } - - public void putToNameToTypePtr(String key, int val) { - if (this.nameToTypePtr == null) { - this.nameToTypePtr = new HashMap(); - } - this.nameToTypePtr.put(key, val); - } - - public Map getNameToTypePtr() { - return this.nameToTypePtr; - } - - public void setNameToTypePtr(Map nameToTypePtr) { - this.nameToTypePtr = nameToTypePtr; - } - - public void unsetNameToTypePtr() { - this.nameToTypePtr = null; - } - - /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */ - public boolean isSetNameToTypePtr() { - return this.nameToTypePtr != null; - } - - public void setNameToTypePtrIsSet(boolean value) { - if (!value) { - this.nameToTypePtr = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case NAME_TO_TYPE_PTR: - if (value == null) { - unsetNameToTypePtr(); - } else { - setNameToTypePtr((Map)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case NAME_TO_TYPE_PTR: - return getNameToTypePtr(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case NAME_TO_TYPE_PTR: - return isSetNameToTypePtr(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TUnionTypeEntry) - return this.equals((TUnionTypeEntry)that); - return false; - } - - public boolean equals(TUnionTypeEntry that) { - if (that == null) - return false; - - boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr(); - boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr(); - if (this_present_nameToTypePtr || that_present_nameToTypePtr) { - if (!(this_present_nameToTypePtr && that_present_nameToTypePtr)) - return false; - if (!this.nameToTypePtr.equals(that.nameToTypePtr)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_nameToTypePtr = true && (isSetNameToTypePtr()); - list.add(present_nameToTypePtr); - if (present_nameToTypePtr) - list.add(nameToTypePtr); - - return list.hashCode(); - } - - @Override - public int compareTo(TUnionTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(other.isSetNameToTypePtr()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetNameToTypePtr()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, other.nameToTypePtr); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TUnionTypeEntry("); - boolean first = true; - - sb.append("nameToTypePtr:"); - if (this.nameToTypePtr == null) { - sb.append("null"); - } else { - sb.append(this.nameToTypePtr); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetNameToTypePtr()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TUnionTypeEntryStandardSchemeFactory implements SchemeFactory { - public TUnionTypeEntryStandardScheme getScheme() { - return new TUnionTypeEntryStandardScheme(); - } - } - - private static class TUnionTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // NAME_TO_TYPE_PTR - if (schemeField.type == org.apache.thrift.protocol.TType.MAP) { - { - org.apache.thrift.protocol.TMap _map20 = iprot.readMapBegin(); - struct.nameToTypePtr = new HashMap(2*_map20.size); - String _key21; - int _val22; - for (int _i23 = 0; _i23 < _map20.size; ++_i23) - { - _key21 = iprot.readString(); - _val22 = iprot.readI32(); - struct.nameToTypePtr.put(_key21, _val22); - } - iprot.readMapEnd(); - } - struct.setNameToTypePtrIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.nameToTypePtr != null) { - oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC); - { - oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size())); - for (Map.Entry _iter24 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter24.getKey()); - oprot.writeI32(_iter24.getValue()); - } - oprot.writeMapEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TUnionTypeEntryTupleSchemeFactory implements SchemeFactory { - public TUnionTypeEntryTupleScheme getScheme() { - return new TUnionTypeEntryTupleScheme(); - } - } - - private static class TUnionTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - { - oprot.writeI32(struct.nameToTypePtr.size()); - for (Map.Entry _iter25 : struct.nameToTypePtr.entrySet()) - { - oprot.writeString(_iter25.getKey()); - oprot.writeI32(_iter25.getValue()); - } - } - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - { - org.apache.thrift.protocol.TMap _map26 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32()); - struct.nameToTypePtr = new HashMap(2*_map26.size); - String _key27; - int _val28; - for (int _i29 = 0; _i29 < _map26.size; ++_i29) - { - _key27 = iprot.readString(); - _val28 = iprot.readI32(); - struct.nameToTypePtr.put(_key27, _val28); - } - } - struct.setNameToTypePtrIsSet(true); - } - } - -} - diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java deleted file mode 100644 index b80c4dd5c6302..0000000000000 --- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java +++ /dev/null @@ -1,389 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.3) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -package org.apache.hive.service.rpc.thrift; - -import org.apache.thrift.scheme.IScheme; -import org.apache.thrift.scheme.SchemeFactory; -import org.apache.thrift.scheme.StandardScheme; - -import org.apache.thrift.scheme.TupleScheme; -import org.apache.thrift.protocol.TTupleProtocol; -import org.apache.thrift.protocol.TProtocolException; -import org.apache.thrift.EncodingUtils; -import org.apache.thrift.TException; -import org.apache.thrift.async.AsyncMethodCallback; -import org.apache.thrift.server.AbstractNonblockingServer.*; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.Set; -import java.util.HashSet; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; -import java.nio.ByteBuffer; -import java.util.Arrays; -import javax.annotation.Generated; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) -@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") -public class TUserDefinedTypeEntry implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUserDefinedTypeEntry"); - - private static final org.apache.thrift.protocol.TField TYPE_CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("typeClassName", org.apache.thrift.protocol.TType.STRING, (short)1); - - private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); - static { - schemes.put(StandardScheme.class, new TUserDefinedTypeEntryStandardSchemeFactory()); - schemes.put(TupleScheme.class, new TUserDefinedTypeEntryTupleSchemeFactory()); - } - - private String typeClassName; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - TYPE_CLASS_NAME((short)1, "typeClassName"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch(fieldId) { - case 1: // TYPE_CLASS_NAME - return TYPE_CLASS_NAME; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.TYPE_CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("typeClassName", org.apache.thrift.TFieldRequirementType.REQUIRED, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUserDefinedTypeEntry.class, metaDataMap); - } - - public TUserDefinedTypeEntry() { - } - - public TUserDefinedTypeEntry( - String typeClassName) - { - this(); - this.typeClassName = typeClassName; - } - - /** - * Performs a deep copy on other. - */ - public TUserDefinedTypeEntry(TUserDefinedTypeEntry other) { - if (other.isSetTypeClassName()) { - this.typeClassName = other.typeClassName; - } - } - - public TUserDefinedTypeEntry deepCopy() { - return new TUserDefinedTypeEntry(this); - } - - @Override - public void clear() { - this.typeClassName = null; - } - - public String getTypeClassName() { - return this.typeClassName; - } - - public void setTypeClassName(String typeClassName) { - this.typeClassName = typeClassName; - } - - public void unsetTypeClassName() { - this.typeClassName = null; - } - - /** Returns true if field typeClassName is set (has been assigned a value) and false otherwise */ - public boolean isSetTypeClassName() { - return this.typeClassName != null; - } - - public void setTypeClassNameIsSet(boolean value) { - if (!value) { - this.typeClassName = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case TYPE_CLASS_NAME: - if (value == null) { - unsetTypeClassName(); - } else { - setTypeClassName((String)value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case TYPE_CLASS_NAME: - return getTypeClassName(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case TYPE_CLASS_NAME: - return isSetTypeClassName(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof TUserDefinedTypeEntry) - return this.equals((TUserDefinedTypeEntry)that); - return false; - } - - public boolean equals(TUserDefinedTypeEntry that) { - if (that == null) - return false; - - boolean this_present_typeClassName = true && this.isSetTypeClassName(); - boolean that_present_typeClassName = true && that.isSetTypeClassName(); - if (this_present_typeClassName || that_present_typeClassName) { - if (!(this_present_typeClassName && that_present_typeClassName)) - return false; - if (!this.typeClassName.equals(that.typeClassName)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - List list = new ArrayList(); - - boolean present_typeClassName = true && (isSetTypeClassName()); - list.add(present_typeClassName); - if (present_typeClassName) - list.add(typeClassName); - - return list.hashCode(); - } - - @Override - public int compareTo(TUserDefinedTypeEntry other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - - lastComparison = Boolean.valueOf(isSetTypeClassName()).compareTo(other.isSetTypeClassName()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTypeClassName()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeClassName, other.typeClassName); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - schemes.get(iprot.getScheme()).getScheme().read(iprot, this); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - schemes.get(oprot.getScheme()).getScheme().write(oprot, this); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("TUserDefinedTypeEntry("); - boolean first = true; - - sb.append("typeClassName:"); - if (this.typeClassName == null) { - sb.append("null"); - } else { - sb.append(this.typeClassName); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - if (!isSetTypeClassName()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeClassName' is unset! Struct:" + toString()); - } - - // check for sub-struct validity - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private static class TUserDefinedTypeEntryStandardSchemeFactory implements SchemeFactory { - public TUserDefinedTypeEntryStandardScheme getScheme() { - return new TUserDefinedTypeEntryStandardScheme(); - } - } - - private static class TUserDefinedTypeEntryStandardScheme extends StandardScheme { - - public void read(org.apache.thrift.protocol.TProtocol iprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField schemeField; - iprot.readStructBegin(); - while (true) - { - schemeField = iprot.readFieldBegin(); - if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (schemeField.id) { - case 1: // TYPE_CLASS_NAME - if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { - struct.typeClassName = iprot.readString(); - struct.setTypeClassNameIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - struct.validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - struct.validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (struct.typeClassName != null) { - oprot.writeFieldBegin(TYPE_CLASS_NAME_FIELD_DESC); - oprot.writeString(struct.typeClassName); - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - } - - private static class TUserDefinedTypeEntryTupleSchemeFactory implements SchemeFactory { - public TUserDefinedTypeEntryTupleScheme getScheme() { - return new TUserDefinedTypeEntryTupleScheme(); - } - } - - private static class TUserDefinedTypeEntryTupleScheme extends TupleScheme { - - @Override - public void write(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeString(struct.typeClassName); - } - - @Override - public void read(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException { - TTupleProtocol iprot = (TTupleProtocol) prot; - struct.typeClassName = iprot.readString(); - struct.setTypeClassNameIsSet(true); - } - } - -} - From ab0bad9544367727fc017a9a43e4c5bf86da0445 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 16 Oct 2020 12:52:45 -0700 Subject: [PATCH 0260/1009] [SPARK-33171][INFRA] Mark ParquetV*FilterSuite/ParquetV*SchemaPruningSuite as ExtendedSQLTest ### What changes were proposed in this pull request? This PR aims to mark ParquetV1FilterSuite and ParquetV2FilterSuite as `ExtendedSQLTest`. - ParquetV1FilterSuite/ParquetV2FilterSuite - ParquetV1SchemaPruningSuite/ParquetV2SchemaPruningSuite ### Why are the changes needed? Currently, `sql - other tests` is the longest job. This PR will move the above tests to `sql - slow tests` job. **BEFORE** - https://github.com/apache/spark/runs/1264150802 (1 hour 37 minutes) **AFTER** - https://github.com/apache/spark/pull/30068/checks?check_run_id=1265879896 (1 hour 21 minutes) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the Github Action with the reduced time. Closes #30068 from dongjoon-hyun/MOVE3. Lead-authored-by: Dongjoon Hyun Co-authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../sql/execution/datasources/parquet/ParquetFilterSuite.scala | 3 +++ .../datasources/parquet/ParquetSchemaPruningSuite.scala | 3 +++ 2 files changed, 6 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 5689b9d05d7bb..763f9315bfc5b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -45,6 +45,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ +import org.apache.spark.tags.ExtendedSQLTest import org.apache.spark.util.{AccumulatorContext, AccumulatorV2} /** @@ -1571,6 +1572,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } +@ExtendedSQLTest class ParquetV1FilterSuite extends ParquetFilterSuite { override protected def sparkConf: SparkConf = super @@ -1650,6 +1652,7 @@ class ParquetV1FilterSuite extends ParquetFilterSuite { } } +@ExtendedSQLTest class ParquetV2FilterSuite extends ParquetFilterSuite { // TODO: enable Parquet V2 write path after file source V2 writers are workable. override protected def sparkConf: SparkConf = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index c64e95078e916..cab93bd96fff4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.datasources.SchemaPruningSuite import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.tags.ExtendedSQLTest abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanHelper { override protected val dataSourceName: String = "parquet" @@ -33,6 +34,7 @@ abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with Adaptiv } +@ExtendedSQLTest class ParquetV1SchemaPruningSuite extends ParquetSchemaPruningSuite { override protected def sparkConf: SparkConf = super @@ -40,6 +42,7 @@ class ParquetV1SchemaPruningSuite extends ParquetSchemaPruningSuite { .set(SQLConf.USE_V1_SOURCE_LIST, "parquet") } +@ExtendedSQLTest class ParquetV2SchemaPruningSuite extends ParquetSchemaPruningSuite { // TODO: enable Parquet V2 write path after file source V2 writers are workable. override protected def sparkConf: SparkConf = From acb79f52db6f2b7e84fda005e3a38ea2aa3fc5ce Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 16 Oct 2020 14:27:27 -0700 Subject: [PATCH 0261/1009] [MINOR][SQL] Re-use `binaryToSQLTimestamp()` in `ParquetRowConverter` ### What changes were proposed in this pull request? The function `binaryToSQLTimestamp()` is used by Parquet Vectorized reader. Parquet MR reader has similar code for de-serialization of INT96 timestamps. In this PR, I propose to de-duplicate code and re-use `binaryToSQLTimestamp()`. ### Why are the changes needed? This should improve maintenance, and should allow to avoid errors while changing Vectorized and regular parquet readers. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By existing test suites, for instance `ParquetIOSuite`. Closes #30069 from MaxGekk/int96-common-serde. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../datasources/parquet/ParquetRowConverter.scala | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 9a010d7192081..e0008ed16d56d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -300,15 +300,7 @@ private[parquet] class ParquetRowConverter( new ParquetPrimitiveConverter(updater) { // Converts nanosecond timestamps stored as INT96 override def addBinary(value: Binary): Unit = { - assert( - value.length() == 12, - "Timestamps (with nanoseconds) are expected to be stored in 12-byte long binaries, " + - s"but got a ${value.length()}-byte binary.") - - val buf = value.toByteBuffer.order(ByteOrder.LITTLE_ENDIAN) - val timeOfDayNanos = buf.getLong - val julianDay = buf.getInt - val rawTime = DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos) + val rawTime = ParquetRowConverter.binaryToSQLTimestamp(value) val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, ZoneOffset.UTC)) .getOrElse(rawTime) updater.setLong(adjTime) From ce6180c8c3b67a09b32735a2f5f9154d7d9aa14e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 16 Oct 2020 14:47:46 -0700 Subject: [PATCH 0262/1009] [SPARK-33154][CORE][K8S] Handle cleaned shuffles during migration ### What changes were proposed in this pull request? If a block is removed between discovery to transfer fo the block, we short circuit that block and remove it from the list to transfer and increment the transferred blocks. This is complicated since both RPC errors and local read errors may be reported with the same exception class. ### Why are the changes needed? Slow shuffle refreshes could waste time when decommissioning has already finished. Decommissioning might avoid transferring some some blocks to an otherwise live host which is marked as "full" if a deleted block fails to transfer to that host. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit and integration tests. Closes #30046 from holdenk/handle-cleaned-shuffles-during0migration. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../spark/network/BlockTransferService.scala | 1 + .../shuffle/IndexShuffleBlockResolver.scala | 39 +++++--- .../storage/BlockManagerDecommissioner.scala | 52 ++++++---- .../BlockManagerDecommissionUnitSuite.scala | 99 +++++++++++++++++-- .../integrationtest/DecommissionSuite.scala | 71 +++++++++++++ .../k8s/integrationtest/ProcessUtils.scala | 4 +- .../integration-tests/tests/autoscale.py | 49 +++++++++ .../tests/decommissioning.py | 2 +- .../tests/decommissioning_cleanup.py | 59 +++++++++++ 9 files changed, 334 insertions(+), 42 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/tests/autoscale.py create mode 100644 resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala index 98129b62b53df..c7f5a97e35612 100644 --- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala @@ -110,6 +110,7 @@ abstract class BlockTransferService extends BlockStoreClient { * This method is similar to [[uploadBlock]], except this one blocks the thread * until the upload finishes. */ + @throws[java.io.IOException] def uploadBlockSync( hostname: String, port: Int, diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index a019a3382d5b2..9496918760298 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -225,19 +225,32 @@ private[spark] class IndexShuffleBlockResolver( * Get the index & data block for migration. */ def getMigrationBlocks(shuffleBlockInfo: ShuffleBlockInfo): List[(BlockId, ManagedBuffer)] = { - val shuffleId = shuffleBlockInfo.shuffleId - val mapId = shuffleBlockInfo.mapId - // Load the index block - val indexFile = getIndexFile(shuffleId, mapId) - val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID) - val indexFileSize = indexFile.length() - val indexBlockData = new FileSegmentManagedBuffer(transportConf, indexFile, 0, indexFileSize) - - // Load the data block - val dataFile = getDataFile(shuffleId, mapId) - val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID) - val dataBlockData = new FileSegmentManagedBuffer(transportConf, dataFile, 0, dataFile.length()) - List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData)) + try { + val shuffleId = shuffleBlockInfo.shuffleId + val mapId = shuffleBlockInfo.mapId + // Load the index block + val indexFile = getIndexFile(shuffleId, mapId) + val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID) + val indexFileSize = indexFile.length() + val indexBlockData = new FileSegmentManagedBuffer( + transportConf, indexFile, 0, indexFileSize) + + // Load the data block + val dataFile = getDataFile(shuffleId, mapId) + val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID) + val dataBlockData = new FileSegmentManagedBuffer( + transportConf, dataFile, 0, dataFile.length()) + + // Make sure the files exist + assert(indexFile.exists() && dataFile.exists()) + + List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData)) + } catch { + case e: Exception => // If we can't load the blocks ignore them. + logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration" + + "this is expected to occure if a block is removed after decommissioning has started.") + List.empty[(BlockId, ManagedBuffer)] + } } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index f0a8e47aa3200..3377b357a9231 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -17,6 +17,7 @@ package org.apache.spark.storage +import java.io.IOException import java.util.concurrent.ExecutorService import java.util.concurrent.atomic.AtomicInteger @@ -82,23 +83,38 @@ private[storage] class BlockManagerDecommissioner( Thread.sleep(SLEEP_TIME_SECS * 1000L) case Some((shuffleBlockInfo, retryCount)) => if (retryCount < maxReplicationFailuresForDecommission) { - logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}") - val blocks = - bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo) + logDebug(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}") + val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo) logDebug(s"Got migration sub-blocks ${blocks}") - blocks.foreach { case (blockId, buffer) => - logDebug(s"Migrating sub-block ${blockId}") - bm.blockTransferService.uploadBlockSync( - peer.host, - peer.port, - peer.executorId, - blockId, - buffer, - StorageLevel.DISK_ONLY, - null)// class tag, we don't need for shuffle - logDebug(s"Migrated sub block ${blockId}") + + // Migrate the components of the blocks. + try { + blocks.foreach { case (blockId, buffer) => + logDebug(s"Migrating sub-block ${blockId}") + bm.blockTransferService.uploadBlockSync( + peer.host, + peer.port, + peer.executorId, + blockId, + buffer, + StorageLevel.DISK_ONLY, + null)// class tag, we don't need for shuffle + logDebug(s"Migrated sub block ${blockId}") + } + logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}") + } catch { + case e: IOException => + // If a block got deleted before netty opened the file handle, then trying to + // load the blocks now will fail. This is most likely to occur if we start + // migrating blocks and then the shuffle TTL cleaner kicks in. However this + // could also happen with manually managed shuffles or a GC event on the driver + // a no longer referenced RDD with shuffle files. + if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) { + logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.") + } else { + throw e + } } - logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}") } else { logError(s"Skipping block ${shuffleBlockInfo} because it has failed ${retryCount}") } @@ -121,11 +137,11 @@ private[storage] class BlockManagerDecommissioner( } // Shuffles which are either in queue for migrations or migrated - private val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]() + protected[storage] val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]() // Shuffles which have migrated. This used to know when we are "done", being done can change // if a new shuffle file is created by a running task. - private val numMigratedShuffles = new AtomicInteger(0) + private[storage] val numMigratedShuffles = new AtomicInteger(0) // Shuffles which are queued for migration & number of retries so far. // Visible in storage for testing. @@ -225,7 +241,7 @@ private[storage] class BlockManagerDecommissioner( // Update the queue of shuffles to be migrated logInfo("Offloading shuffle blocks") val localShuffles = bm.migratableResolver.getStoredShuffles().toSet - val newShufflesToMigrate = localShuffles.diff(migratingShuffles).toSeq + val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava) migratingShuffles ++= newShufflesToMigrate diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala index 74ad8bd2bcf9d..a87fc1835f6b5 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala @@ -63,9 +63,14 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { * a constant Long.MaxValue timestamp. */ private def validateDecommissionTimestamps(conf: SparkConf, bm: BlockManager, - migratableShuffleBlockResolver: MigratableResolver, fail: Boolean = false) = { + fail: Boolean = false) = { // Verify the decommissioning manager timestamps and status val bmDecomManager = new BlockManagerDecommissioner(conf, bm) + validateDecommissionTimestampsOnManager(bmDecomManager, fail) + } + + private def validateDecommissionTimestampsOnManager(bmDecomManager: BlockManagerDecommissioner, + fail: Boolean = false, numShuffles: Option[Int] = None) = { var previousTime: Option[Long] = None try { bmDecomManager.start() @@ -85,6 +90,9 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { // If we expect migration to fail we should get the max value quickly. assert(currentTime === Long.MaxValue) } + numShuffles.foreach { s => + assert(bmDecomManager.numMigratedShuffles.get() === s) + } } if (!fail) { // Wait 5 seconds and assert times keep moving forward. @@ -110,7 +118,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345))) // Verify the decom manager handles this correctly - validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver) + validateDecommissionTimestamps(sparkConf, bm) } test("block decom manager with no migrations configured") { @@ -128,8 +136,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED, false) .set(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL, 10L) // Verify the decom manager handles this correctly - validateDecommissionTimestamps(badConf, bm, migratableShuffleBlockResolver, - fail = true) + validateDecommissionTimestamps(badConf, bm, fail = true) } test("block decom manager with no peers") { @@ -144,8 +151,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { .thenReturn(Seq()) // Verify the decom manager handles this correctly - validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver, - fail = true) + validateDecommissionTimestamps(sparkConf, bm, fail = true) } @@ -161,7 +167,83 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345))) // Verify the decom manager handles this correctly - validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver) + validateDecommissionTimestamps(sparkConf, bm) + } + + test("block decom manager does not re-add removed shuffle files") { + // Set up the mocks so we return one shuffle block + val bm = mock(classOf[BlockManager]) + val migratableShuffleBlockResolver = mock(classOf[MigratableResolver]) + registerShuffleBlocks(migratableShuffleBlockResolver, Set()) + when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver) + when(bm.getMigratableRDDBlocks()) + .thenReturn(Seq()) + when(bm.getPeers(mc.any())) + .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345))) + val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm) + bmDecomManager.migratingShuffles += ShuffleBlockInfo(10, 10) + + validateDecommissionTimestampsOnManager(bmDecomManager) + } + + test("block decom manager handles IO failures") { + // Set up the mocks so we return one shuffle block + val bm = mock(classOf[BlockManager]) + val migratableShuffleBlockResolver = mock(classOf[MigratableResolver]) + registerShuffleBlocks(migratableShuffleBlockResolver, Set((1, 1L, 1))) + when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver) + when(bm.getMigratableRDDBlocks()) + .thenReturn(Seq()) + when(bm.getPeers(mc.any())) + .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345))) + + val blockTransferService = mock(classOf[BlockTransferService]) + // Simulate an ambiguous IO error (e.g. block could be gone, connection failed, etc.) + when(blockTransferService.uploadBlockSync( + mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull())).thenThrow( + new java.io.IOException("boop") + ) + + when(bm.blockTransferService).thenReturn(blockTransferService) + + // Verify the decom manager handles this correctly + val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm) + validateDecommissionTimestampsOnManager(bmDecomManager, fail = false) + } + + test("block decom manager short circuits removed blocks") { + // Set up the mocks so we return one shuffle block + val bm = mock(classOf[BlockManager]) + val migratableShuffleBlockResolver = mock(classOf[MigratableResolver]) + // First call get blocks, then empty list simulating a delete. + when(migratableShuffleBlockResolver.getStoredShuffles()) + .thenReturn(Seq(ShuffleBlockInfo(1, 1))) + .thenReturn(Seq()) + when(migratableShuffleBlockResolver.getMigrationBlocks(mc.any())) + .thenReturn(List( + (ShuffleIndexBlockId(1, 1, 1), mock(classOf[ManagedBuffer])), + (ShuffleDataBlockId(1, 1, 1), mock(classOf[ManagedBuffer])))) + .thenReturn(List()) + + when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver) + when(bm.getMigratableRDDBlocks()) + .thenReturn(Seq()) + when(bm.getPeers(mc.any())) + .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345))) + + val blockTransferService = mock(classOf[BlockTransferService]) + // Simulate an ambiguous IO error (e.g. block could be gone, connection failed, etc.) + when(blockTransferService.uploadBlockSync( + mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull())).thenThrow( + new java.io.IOException("boop") + ) + + when(bm.blockTransferService).thenReturn(blockTransferService) + + // Verify the decom manager handles this correctly + val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm) + validateDecommissionTimestampsOnManager(bmDecomManager, fail = false, + numShuffles = Some(1)) } test("test shuffle and cached rdd migration without any error") { @@ -192,7 +274,8 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { // We don't check that all blocks are migrated because out mock is always returning an RDD. eventually(timeout(100.second), interval(10.milliseconds)) { - assert(bmDecomManager.shufflesToMigrate.isEmpty == true) + assert(bmDecomManager.shufflesToMigrate.isEmpty === true) + assert(bmDecomManager.numMigratedShuffles.get() === 1) verify(bm, least(1)).replicateBlock( mc.eq(storedBlockId1), mc.any(), mc.any(), mc.eq(Some(3))) verify(blockTransferService, times(2)) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index fd14b12b112d3..cdde8411d8b7b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -53,9 +53,80 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => executorPatience = None, decommissioningTest = true) } + + test("Test basic decommissioning with shuffle cleanup", k8sTestTag) { + sparkAppConf + .set(config.DECOMMISSION_ENABLED.key, "true") + .set("spark.kubernetes.container.image", pyImage) + .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true") + .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true") + .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key, "true") + .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED.key, "true") + .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT.key, "400") + // Ensure we have somewhere to migrate our data too + .set("spark.executor.instances", "3") + // The default of 30 seconds is fine, but for testing we just want to get this done fast. + .set("spark.storage.decommission.replicationReattemptInterval", "1") + + runSparkApplicationAndVerifyCompletion( + appResource = PYSPARK_DECOMISSIONING_CLEANUP, + mainClass = "", + expectedLogOnCompletion = Seq( + "Finished waiting, stopping Spark", + "Received decommission executor message", + "Acknowledged decommissioning block manager", + ": Executor decommission."), + appArgs = Array.empty[String], + driverPodChecker = doBasicDriverPyPodCheck, + executorPodChecker = doBasicExecutorPyPodCheck, + appLocator = appLocator, + isJVM = false, + pyFiles = None, + executorPatience = None, + decommissioningTest = true) + } + + test("Test decommissioning with dynamic allocation & shuffle cleanups", k8sTestTag) { + sparkAppConf + .set(config.DECOMMISSION_ENABLED.key, "true") + .set("spark.kubernetes.container.image", pyImage) + .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true") + .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true") + .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key, "true") + .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED.key, "true") + .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT.key, "30") + .set(config.DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT.key, "30") + .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "5") + .set(config.DYN_ALLOCATION_MIN_EXECUTORS.key, "1") + .set(config.DYN_ALLOCATION_INITIAL_EXECUTORS.key, "2") + .set(config.DYN_ALLOCATION_ENABLED.key, "true") + // The default of 30 seconds is fine, but for testing we just want to get this done fast. + .set("spark.storage.decommission.replicationReattemptInterval", "1") + + var execLogs: String = "" + + runSparkApplicationAndVerifyCompletion( + appResource = PYSPARK_SCALE, + mainClass = "", + expectedLogOnCompletion = Seq( + "Finished waiting, stopping Spark", + "Received decommission executor message", + "Acknowledged decommissioning block manager", + ": Executor decommission."), + appArgs = Array.empty[String], + driverPodChecker = doBasicDriverPyPodCheck, + executorPodChecker = doBasicExecutorPyPodCheck, + appLocator = appLocator, + isJVM = false, + pyFiles = None, + executorPatience = None, + decommissioningTest = false) + } } private[spark] object DecommissionSuite { val TEST_LOCAL_PYSPARK: String = "local:///opt/spark/tests/" val PYSPARK_DECOMISSIONING: String = TEST_LOCAL_PYSPARK + "decommissioning.py" + val PYSPARK_DECOMISSIONING_CLEANUP: String = TEST_LOCAL_PYSPARK + "decommissioning_cleanup.py" + val PYSPARK_SCALE: String = TEST_LOCAL_PYSPARK + "autoscale.py" } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala index cce842ce62f01..a1ecd48e747ea 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala @@ -32,7 +32,7 @@ object ProcessUtils extends Logging { def executeProcess( fullCommand: Array[String], timeout: Long, - dumpErrors: Boolean = false): Seq[String] = { + dumpErrors: Boolean = true): Seq[String] = { val pb = new ProcessBuilder().command(fullCommand: _*) pb.redirectErrorStream(true) val proc = pb.start() @@ -45,7 +45,7 @@ object ProcessUtils extends Logging { assert(proc.waitFor(timeout, TimeUnit.SECONDS), s"Timed out while executing ${fullCommand.mkString(" ")}") assert(proc.exitValue == 0, - s"Failed to execute ${fullCommand.mkString(" ")}" + + s"Failed to execute -- ${fullCommand.mkString(" ")} --" + s"${if (dumpErrors) "\n" + outputLines.mkString("\n")}") outputLines.toSeq } diff --git a/resource-managers/kubernetes/integration-tests/tests/autoscale.py b/resource-managers/kubernetes/integration-tests/tests/autoscale.py new file mode 100644 index 0000000000000..809b698fcdd8c --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/tests/autoscale.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import time + +from pyspark.sql import SparkSession + + +if __name__ == "__main__": + """ + Usage: autoscale + """ + print("Starting autoscale test") + spark = SparkSession \ + .builder \ + .appName("AutoScale") \ + .getOrCreate() + sc = spark._sc + + initialRdd = sc.parallelize(range(100), 5) + # Trigger a shuffle so there are shuffle blocks to migrate + rdd = initialRdd.map(lambda x: (x, x)).groupByKey() + rdd.collect() + numCores = sc._jsc.sc().getExecutorMemoryStatus().size() + print("Have " + str(numCores)) + print("Waiting for dynamic alloc") + time.sleep(150) + print("Finished waiting!") + rdd.count() + rdd.collect() + print("Finished waiting, stopping Spark.") + spark.stop() + print("Done, exiting Python") + sys.exit(0) diff --git a/resource-managers/kubernetes/integration-tests/tests/decommissioning.py b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py index 5fcad083b007c..0880e8ab275b3 100644 --- a/resource-managers/kubernetes/integration-tests/tests/decommissioning.py +++ b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py @@ -28,7 +28,7 @@ print("Starting decom test") spark = SparkSession \ .builder \ - .appName("PyMemoryTest") \ + .appName("DecomTest") \ .getOrCreate() sc = spark._sc acc = sc.accumulator(0) diff --git a/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py b/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py new file mode 100644 index 0000000000000..8af558ee5214e --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py @@ -0,0 +1,59 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import time + +from pyspark.sql import SparkSession + + +if __name__ == "__main__": + """ + Usage: decommissioning + """ + print("Starting decom test") + spark = SparkSession \ + .builder \ + .appName("DecomTest") \ + .getOrCreate() + sc = spark._sc + acc = sc.accumulator(0) + + def addToAcc(x): + acc.add(1) + return x + + initialRdd = sc.parallelize(range(100), 5) + accRdd = initialRdd.map(addToAcc) + # Trigger a shuffle so there are shuffle blocks to migrate + rdd = accRdd.map(lambda x: (x, x)).groupByKey() + # Make enough shuffle files to increase the chance of the race condition. + for i in range(1, 2): + shuffleRdd = sc.parallelize(range(1, 10), 5).map(lambda x: (x, x)).groupByKey() + shuffleRdd.collect() + rdd.collect() + print("1st accumulator value is: " + str(acc.value)) + print("Waiting to give nodes time to finish migration, decom exec 1.") + print("...") + time.sleep(30) + rdd.count() + rdd.collect() + print("Final accumulator value is: " + str(acc.value)) + print("Finished waiting, stopping Spark.") + spark.stop() + print("Done, exiting Python") + sys.exit(0) From e574fcd23021cc94f043981d84287f3bb1308b5f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 16 Oct 2020 14:48:14 -0700 Subject: [PATCH 0263/1009] [SPARK-32376][SQL] Make unionByName null-filling behavior work with struct columns ### What changes were proposed in this pull request? SPARK-29358 added support for `unionByName` to work when the two datasets didn't necessarily have the same schema, but it does not work with nested columns like structs. This patch adds the support to work with struct columns. The behavior before this PR: ```scala scala> val df1 = spark.range(1).selectExpr("id c0", "named_struct('c', id + 1, 'b', id + 2, 'a', id + 3) c1") scala> val df2 = spark.range(1).selectExpr("id c0", "named_struct('c', id + 1, 'b', id + 2) c1") scala> df1.unionByName(df2, true).printSchema org.apache.spark.sql.AnalysisException: Union can only be performed on tables with the compatible column types. struct <> struct at the second column of the second table;; 'Union false, false :- Project [id#0L AS c0#2L, named_struct(c, (id#0L + cast(1 as bigint)), b, (id#0L + cast(2 as bigint)), a, (id#0L + cast(3 as bigint))) AS c1#3] : +- Range (0, 1, step=1, splits=Some(12)) +- Project [c0#8L, c1#9] +- Project [id#6L AS c0#8L, named_struct(c, (id#6L + cast(1 as bigint)), b, (id#6L + cast(2 as bigint))) AS c1#9] +- Range (0, 1, step=1, splits=Some(12)) ``` The behavior after this PR: ```scala scala> df1.unionByName(df2, true).printSchema root |-- c0: long (nullable = false) |-- c1: struct (nullable = false) | |-- a: long (nullable = true) | |-- b: long (nullable = false) | |-- c: long (nullable = false) scala> df1.unionByName(df2, true).show() +---+-------------+ | c0| c1| +---+-------------+ | 0| {3, 2, 1}| | 0|{ null, 2, 1}| +---+-------------+ ``` ### Why are the changes needed? The `allowMissingColumns` of `unionByName` is a feature allowing merging different schema from two datasets when unioning them together. Nested column support makes the feature more general and flexible for usage. ### Does this PR introduce _any_ user-facing change? Yes, after this change users can union two datasets with different schema with different structs. ### How was this patch tested? Unit tests. Closes #29587 from viirya/SPARK-32376. Authored-by: Liang-Chi Hsieh Signed-off-by: Liang-Chi Hsieh --- .../sql/catalyst/analysis/ResolveUnion.scala | 192 +++++++++++++++++- .../expressions/complexTypeCreator.scala | 52 ++++- .../expressions/complexTypeExtractors.scala | 4 +- .../apache/spark/sql/types/StructType.scala | 35 ++++ .../spark/sql/types/StructTypeSuite.scala | 96 ++++++++- .../scala/org/apache/spark/sql/Column.scala | 36 +--- .../scala/org/apache/spark/sql/Dataset.scala | 6 + .../sql/DataFrameSetOperationsSuite.scala | 181 +++++++++++++++++ 8 files changed, 555 insertions(+), 47 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala index 693a5a4e75443..c1a9c9d3d9bab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala @@ -17,29 +17,188 @@ package org.apache.spark.sql.catalyst.analysis +import scala.collection.mutable + import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{Alias, Literal} +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.optimizer.CombineUnions import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils +import org.apache.spark.unsafe.types.UTF8String /** * Resolves different children of Union to a common set of columns. */ object ResolveUnion extends Rule[LogicalPlan] { - private def unionTwoSides( + /** + * This method sorts columns recursively in a struct expression based on column names. + */ + private def sortStructFields(expr: Expression): Expression = { + val existingExprs = expr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map { + case (name, i) => + val fieldExpr = GetStructField(KnownNotNull(expr), i) + if (fieldExpr.dataType.isInstanceOf[StructType]) { + (name, sortStructFields(fieldExpr)) + } else { + (name, fieldExpr) + } + }.sortBy(_._1).flatMap(pair => Seq(Literal(pair._1), pair._2)) + + val newExpr = CreateNamedStruct(existingExprs) + if (expr.nullable) { + If(IsNull(expr), Literal(null, newExpr.dataType), newExpr) + } else { + newExpr + } + } + + /** + * Assumes input expressions are field expression of `CreateNamedStruct`. This method + * sorts the expressions based on field names. + */ + private def sortFieldExprs(fieldExprs: Seq[Expression]): Seq[Expression] = { + fieldExprs.grouped(2).map { e => + Seq(e.head, e.last) + }.toSeq.sortBy { pair => + assert(pair.head.isInstanceOf[Literal]) + pair.head.eval().asInstanceOf[UTF8String].toString + }.flatten + } + + /** + * This helper method sorts fields in a `UpdateFields` expression by field name. + */ + private def sortStructFieldsInWithFields(expr: Expression): Expression = expr transformUp { + case u: UpdateFields if u.resolved => + u.evalExpr match { + case i @ If(IsNull(_), _, CreateNamedStruct(fieldExprs)) => + val sorted = sortFieldExprs(fieldExprs) + val newStruct = CreateNamedStruct(sorted) + i.copy(trueValue = Literal(null, newStruct.dataType), falseValue = newStruct) + case CreateNamedStruct(fieldExprs) => + val sorted = sortFieldExprs(fieldExprs) + val newStruct = CreateNamedStruct(sorted) + newStruct + case other => + throw new IllegalStateException(s"`UpdateFields` has incorrect expression: $other. " + + "Please file a bug report with this error message, stack trace, and the query.") + } + } + + def simplifyWithFields(expr: Expression): Expression = { + expr.transformUp { + case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) => + UpdateFields(struct, fieldOps1 ++ fieldOps2) + } + } + + /** + * Adds missing fields recursively into given `col` expression, based on the target `StructType`. + * This is called by `compareAndAddFields` when we find two struct columns with same name but + * different nested fields. This method will find out the missing nested fields from `col` to + * `target` struct and add these missing nested fields. Currently we don't support finding out + * missing nested fields of struct nested in array or struct nested in map. + */ + private def addFields(col: NamedExpression, target: StructType): Expression = { + assert(col.dataType.isInstanceOf[StructType], "Only support StructType.") + + val resolver = SQLConf.get.resolver + val missingFieldsOpt = + StructType.findMissingFields(col.dataType.asInstanceOf[StructType], target, resolver) + + // We need to sort columns in result, because we might add another column in other side. + // E.g., we want to union two structs "a int, b long" and "a int, c string". + // If we don't sort, we will have "a int, b long, c string" and + // "a int, c string, b long", which are not compatible. + if (missingFieldsOpt.isEmpty) { + sortStructFields(col) + } else { + missingFieldsOpt.map { s => + val struct = addFieldsInto(col, s.fields) + // Combines `WithFields`s to reduce expression tree. + val reducedStruct = simplifyWithFields(struct) + val sorted = sortStructFieldsInWithFields(reducedStruct) + sorted + }.get + } + } + + /** + * Adds missing fields recursively into given `col` expression. The missing fields are given + * in `fields`. For example, given `col` as "z struct, x int", and `fields` is + * "z struct, w string". This method will add a nested `z.w` field and a top-level + * `w` field to `col` and fill null values for them. Note that because we might also add missing + * fields at other side of Union, we must make sure corresponding attributes at two sides have + * same field order in structs, so when we adding missing fields, we will sort the fields based on + * field names. So the data type of returned expression will be + * "w string, x int, z struct". + */ + private def addFieldsInto( + col: Expression, + fields: Seq[StructField]): Expression = { + fields.foldLeft(col) { case (currCol, field) => + field.dataType match { + case st: StructType => + val resolver = SQLConf.get.resolver + val colField = currCol.dataType.asInstanceOf[StructType] + .find(f => resolver(f.name, field.name)) + if (colField.isEmpty) { + // The whole struct is missing. Add a null. + UpdateFields(currCol, field.name, Literal(null, st)) + } else { + UpdateFields(currCol, field.name, + addFieldsInto(ExtractValue(currCol, Literal(field.name), resolver), st.fields)) + } + case dt => + UpdateFields(currCol, field.name, Literal(null, dt)) + } + } + } + + /** + * This method will compare right to left plan's outputs. If there is one struct attribute + * at right side has same name with left side struct attribute, but two structs are not the + * same data type, i.e., some missing (nested) fields at right struct attribute, then this + * method will try to add missing (nested) fields into the right attribute with null values. + */ + private def compareAndAddFields( left: LogicalPlan, right: LogicalPlan, - allowMissingCol: Boolean): LogicalPlan = { + allowMissingCol: Boolean): (Seq[NamedExpression], Seq[NamedExpression]) = { val resolver = SQLConf.get.resolver val leftOutputAttrs = left.output val rightOutputAttrs = right.output - // Builds a project list for `right` based on `left` output names + val aliased = mutable.ArrayBuffer.empty[Attribute] + val rightProjectList = leftOutputAttrs.map { lattr => - rightOutputAttrs.find { rattr => resolver(lattr.name, rattr.name) }.getOrElse { + val found = rightOutputAttrs.find { rattr => resolver(lattr.name, rattr.name) } + if (found.isDefined) { + val foundAttr = found.get + val foundDt = foundAttr.dataType + (foundDt, lattr.dataType) match { + case (source: StructType, target: StructType) + if allowMissingCol && !source.sameType(target) => + // Having an output with same name, but different struct type. + // We need to add missing fields. Note that if there are deeply nested structs such as + // nested struct of array in struct, we don't support to add missing deeply nested field + // like that. We will sort columns in the struct expression to make sure two sides of + // union have consistent schema. + aliased += foundAttr + Alias(addFields(foundAttr, target), foundAttr.name)() + case _ => + // We don't need/try to add missing fields if: + // 1. The attributes of left and right side are the same struct type + // 2. The attributes are not struct types. They might be primitive types, or array, map + // types. We don't support adding missing fields of nested structs in array or map + // types now. + // 3. `allowMissingCol` is disabled. + foundAttr + } + } else { if (allowMissingCol) { Alias(Literal(null, lattr.dataType), lattr.name)() } else { @@ -50,18 +209,29 @@ object ResolveUnion extends Rule[LogicalPlan] { } } + (rightProjectList, aliased.toSeq) + } + + private def unionTwoSides( + left: LogicalPlan, + right: LogicalPlan, + allowMissingCol: Boolean): LogicalPlan = { + val rightOutputAttrs = right.output + + // Builds a project list for `right` based on `left` output names + val (rightProjectList, aliased) = compareAndAddFields(left, right, allowMissingCol) + // Delegates failure checks to `CheckAnalysis` - val notFoundAttrs = rightOutputAttrs.diff(rightProjectList) + val notFoundAttrs = rightOutputAttrs.diff(rightProjectList ++ aliased) val rightChild = Project(rightProjectList ++ notFoundAttrs, right) // Builds a project for `logicalPlan` based on `right` output names, if allowing // missing columns. val leftChild = if (allowMissingCol) { - val missingAttrs = notFoundAttrs.map { attr => - Alias(Literal(null, attr.dataType), attr.name)() - } - if (missingAttrs.nonEmpty) { - Project(leftOutputAttrs ++ missingAttrs, left) + // Add missing (nested) fields to left plan. + val (leftProjectList, _) = compareAndAddFields(rightChild, left, allowMissingCol) + if (leftProjectList.map(_.toAttribute) != left.output) { + Project(leftProjectList, left) } else { left } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index f6485a51f8fae..3958cfd0af2a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -20,10 +20,11 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion} +import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion, UnresolvedExtractValue} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FUNC_ALIAS, FunctionBuilder} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -661,3 +662,52 @@ case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperat } } } + +object UpdateFields { + private def nameParts(fieldName: String): Seq[String] = { + require(fieldName != null, "fieldName cannot be null") + + if (fieldName.isEmpty) { + fieldName :: Nil + } else { + CatalystSqlParser.parseMultipartIdentifier(fieldName) + } + } + + /** + * Adds/replaces field of `StructType` into `col` expression by name. + */ + def apply(col: Expression, fieldName: String, expr: Expression): UpdateFields = { + updateFieldsHelper(col, nameParts(fieldName), name => WithField(name, expr)) + } + + /** + * Drops fields of `StructType` in `col` expression by name. + */ + def apply(col: Expression, fieldName: String): UpdateFields = { + updateFieldsHelper(col, nameParts(fieldName), name => DropField(name)) + } + + private def updateFieldsHelper( + structExpr: Expression, + namePartsRemaining: Seq[String], + valueFunc: String => StructFieldsOperation) : UpdateFields = { + val fieldName = namePartsRemaining.head + if (namePartsRemaining.length == 1) { + UpdateFields(structExpr, valueFunc(fieldName) :: Nil) + } else { + val newStruct = if (structExpr.resolved) { + val resolver = SQLConf.get.resolver + ExtractValue(structExpr, Literal(fieldName), resolver) + } else { + UnresolvedExtractValue(structExpr, Literal(fieldName)) + } + + val newValue = updateFieldsHelper( + structExpr = newStruct, + namePartsRemaining = namePartsRemaining.tail, + valueFunc = valueFunc) + UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil) + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 89ff4facd25a9..60afe140960cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -116,8 +116,10 @@ case class GetStructField(child: Expression, ordinal: Int, name: Option[String] s"$child.${name.getOrElse(fieldName)}" } + def extractFieldName: String = name.getOrElse(childSchema(ordinal).name) + override def sql: String = - child.sql + s".${quoteIdentifier(name.getOrElse(childSchema(ordinal).name))}" + child.sql + s".${quoteIdentifier(extractFieldName)}" protected override def nullSafeEval(input: Any): Any = input.asInstanceOf[InternalRow].get(ordinal, childSchema(ordinal).dataType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index b14fb04cc4539..c5e76c160ff46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -641,4 +641,39 @@ object StructType extends AbstractDataType { fields.foreach(s => map.put(s.name, s)) map } + + /** + * Returns a `StructType` that contains missing fields recursively from `source` to `target`. + * Note that this doesn't support looking into array type and map type recursively. + */ + def findMissingFields( + source: StructType, + target: StructType, + resolver: Resolver): Option[StructType] = { + def bothStructType(dt1: DataType, dt2: DataType): Boolean = + dt1.isInstanceOf[StructType] && dt2.isInstanceOf[StructType] + + val newFields = mutable.ArrayBuffer.empty[StructField] + + target.fields.foreach { field => + val found = source.fields.find(f => resolver(field.name, f.name)) + if (found.isEmpty) { + // Found a missing field in `source`. + newFields += field + } else if (bothStructType(found.get.dataType, field.dataType) && + !found.get.dataType.sameType(field.dataType)) { + // Found a field with same name, but different data type. + findMissingFields(found.get.dataType.asInstanceOf[StructType], + field.dataType.asInstanceOf[StructType], resolver).map { missingType => + newFields += found.get.copy(dataType = missingType) + } + } + } + + if (newFields.isEmpty) { + None + } else { + Some(StructType(newFields.toSeq)) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala index 6824a64badc10..645e65f06508d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala @@ -18,9 +18,11 @@ package org.apache.spark.sql.types import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType.fromDDL -class StructTypeSuite extends SparkFunSuite { +class StructTypeSuite extends SparkFunSuite with SQLHelper { private val s = StructType.fromDDL("a INT, b STRING") @@ -103,4 +105,96 @@ class StructTypeSuite extends SparkFunSuite { val interval = "`a` INTERVAL" assert(fromDDL(interval).toDDL === interval) } + + test("find missing (nested) fields") { + val schema = StructType.fromDDL("c1 INT, c2 STRUCT>") + val resolver = SQLConf.get.resolver + + val source1 = StructType.fromDDL("c1 INT") + val missing1 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source1, schema, resolver) + .exists(_.sameType(missing1))) + + val source2 = StructType.fromDDL("c1 INT, c3 STRING") + val missing2 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source2, schema, resolver) + .exists(_.sameType(missing2))) + + val source3 = StructType.fromDDL("c1 INT, c2 STRUCT") + val missing3 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source3, schema, resolver) + .exists(_.sameType(missing3))) + + val source4 = StructType.fromDDL("c1 INT, c2 STRUCT>") + val missing4 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source4, schema, resolver) + .exists(_.sameType(missing4))) + } + + test("find missing (nested) fields: array and map") { + val resolver = SQLConf.get.resolver + + val schemaWithArray = StructType.fromDDL("c1 INT, c2 ARRAY>") + val source5 = StructType.fromDDL("c1 INT") + val missing5 = StructType.fromDDL("c2 ARRAY>") + assert( + StructType.findMissingFields(source5, schemaWithArray, resolver) + .exists(_.sameType(missing5))) + + val schemaWithMap1 = StructType.fromDDL( + "c1 INT, c2 MAP, STRING>, c3 LONG") + val source6 = StructType.fromDDL("c1 INT, c3 LONG") + val missing6 = StructType.fromDDL("c2 MAP, STRING>") + assert( + StructType.findMissingFields(source6, schemaWithMap1, resolver) + .exists(_.sameType(missing6))) + + val schemaWithMap2 = StructType.fromDDL( + "c1 INT, c2 MAP>, c3 STRING") + val source7 = StructType.fromDDL("c1 INT, c3 STRING") + val missing7 = StructType.fromDDL("c2 MAP>") + assert( + StructType.findMissingFields(source7, schemaWithMap2, resolver) + .exists(_.sameType(missing7))) + + // Unsupported: nested struct in array, map + val source8 = StructType.fromDDL("c1 INT, c2 ARRAY>") + // `findMissingFields` doesn't support looking into nested struct in array type. + assert(StructType.findMissingFields(source8, schemaWithArray, resolver).isEmpty) + + val source9 = StructType.fromDDL("c1 INT, c2 MAP, STRING>, c3 LONG") + // `findMissingFields` doesn't support looking into nested struct in map type. + assert(StructType.findMissingFields(source9, schemaWithMap1, resolver).isEmpty) + + val source10 = StructType.fromDDL("c1 INT, c2 MAP>, c3 STRING") + // `findMissingFields` doesn't support looking into nested struct in map type. + assert(StructType.findMissingFields(source10, schemaWithMap2, resolver).isEmpty) + } + + test("find missing (nested) fields: case sensitive cases") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val schema = StructType.fromDDL("c1 INT, c2 STRUCT>") + val resolver = SQLConf.get.resolver + + val source1 = StructType.fromDDL("c1 INT, C2 LONG") + val missing1 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source1, schema, resolver) + .exists(_.sameType(missing1))) + + val source2 = StructType.fromDDL("c2 LONG") + val missing2 = StructType.fromDDL("c1 INT") + assert(StructType.findMissingFields(source2, schema, resolver) + .exists(_.sameType(missing2))) + + val source3 = StructType.fromDDL("c1 INT, c2 STRUCT>") + val missing3 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source3, schema, resolver) + .exists(_.sameType(missing3))) + + val source4 = StructType.fromDDL("c1 INT, c2 STRUCT>") + val missing4 = StructType.fromDDL("c2 STRUCT>") + assert(StructType.findMissingFields(source4, schema, resolver) + .exists(_.sameType(missing4))) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index a46d6c0bb2282..30792c9bacd53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -925,7 +925,7 @@ class Column(val expr: Expression) extends Logging { def withField(fieldName: String, col: Column): Column = withExpr { require(fieldName != null, "fieldName cannot be null") require(col != null, "col cannot be null") - updateFieldsHelper(expr, nameParts(fieldName), name => WithField(name, col.expr)) + UpdateFields(expr, fieldName, col.expr) } // scalastyle:off line.size.limit @@ -989,38 +989,8 @@ class Column(val expr: Expression) extends Logging { */ // scalastyle:on line.size.limit def dropFields(fieldNames: String*): Column = withExpr { - def dropField(structExpr: Expression, fieldName: String): UpdateFields = - updateFieldsHelper(structExpr, nameParts(fieldName), name => DropField(name)) - - fieldNames.tail.foldLeft(dropField(expr, fieldNames.head)) { - (resExpr, fieldName) => dropField(resExpr, fieldName) - } - } - - private def nameParts(fieldName: String): Seq[String] = { - require(fieldName != null, "fieldName cannot be null") - - if (fieldName.isEmpty) { - fieldName :: Nil - } else { - CatalystSqlParser.parseMultipartIdentifier(fieldName) - } - } - - private def updateFieldsHelper( - structExpr: Expression, - namePartsRemaining: Seq[String], - valueFunc: String => StructFieldsOperation): UpdateFields = { - - val fieldName = namePartsRemaining.head - if (namePartsRemaining.length == 1) { - UpdateFields(structExpr, valueFunc(fieldName) :: Nil) - } else { - val newValue = updateFieldsHelper( - structExpr = UnresolvedExtractValue(structExpr, Literal(fieldName)), - namePartsRemaining = namePartsRemaining.tail, - valueFunc = valueFunc) - UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil) + fieldNames.tail.foldLeft(UpdateFields(expr, fieldNames.head)) { + (resExpr, fieldName) => UpdateFields(resExpr, fieldName) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 87b9aea80c823..3d431d6ff13a9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2067,6 +2067,12 @@ class Dataset[T] private[sql]( * // +----+----+----+----+ * }}} * + * Note that `allowMissingColumns` supports nested column in struct types. Missing nested columns + * of struct columns with same name will also be filled with null values. This currently does not + * support nested columns in array and map types. Note that if there is any missing nested columns + * to be filled, in order to make consistent schema between two sides of union, the nested fields + * of structs will be sorted after merging schema. + * * @group typedrel * @since 3.1.0 */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala index e72b8ce860b28..5f28dc60962ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala @@ -536,4 +536,185 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { assert(union2.schema.fieldNames === Array("a", "B", "C", "c")) } } + + test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - simple") { + val df1 = Seq(((1, 2, 3), 0), ((2, 3, 4), 1), ((3, 4, 5), 2)).toDF("a", "idx") + val df2 = Seq(((3, 4), 0), ((1, 2), 1), ((2, 3), 2)).toDF("a", "idx") + val df3 = Seq(((100, 101, 102, 103), 0), ((110, 111, 112, 113), 1), ((120, 121, 122, 123), 2)) + .toDF("a", "idx") + + var unionDf = df1.unionByName(df2, true) + + checkAnswer(unionDf, + Row(Row(1, 2, 3), 0) :: Row(Row(2, 3, 4), 1) :: Row(Row(3, 4, 5), 2) :: + Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil + ) + + assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT") + + unionDf = df1.unionByName(df2, true).unionByName(df3, true) + + checkAnswer(unionDf, + Row(Row(1, 2, 3, null), 0) :: + Row(Row(2, 3, 4, null), 1) :: + Row(Row(3, 4, 5, null), 2) :: // df1 + Row(Row(3, 4, null, null), 0) :: + Row(Row(1, 2, null, null), 1) :: + Row(Row(2, 3, null, null), 2) :: // df2 + Row(Row(100, 101, 102, 103), 0) :: + Row(Row(110, 111, 112, 113), 1) :: + Row(Row(120, 121, 122, 123), 2) :: Nil // df3 + ) + assert(unionDf.schema.toDDL == + "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT") + } + + test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") { + val df1 = Seq((0, UnionClass1a(0, 1L, UnionClass2(1, "2")))).toDF("id", "a") + val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a") + + val expectedSchema = "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + + "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>" + + var unionDf = df1.unionByName(df2, true) + checkAnswer(unionDf, + Row(0, Row(0, 1, Row(1, null, "2"))) :: + Row(1, Row(1, 2, Row(2, 3L, null))) :: Nil) + assert(unionDf.schema.toDDL == expectedSchema) + + unionDf = df2.unionByName(df1, true) + checkAnswer(unionDf, + Row(1, Row(1, 2, Row(2, 3L, null))) :: + Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil) + assert(unionDf.schema.toDDL == expectedSchema) + + val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a") + unionDf = df1.unionByName(df3, true) + checkAnswer(unionDf, + Row(0, Row(0, 1, Row(1, null, "2"))) :: + Row(2, Row(2, 3, null)) :: Nil) + assert(unionDf.schema.toDDL == expectedSchema) + } + + test("SPARK-32376: Make unionByName null-filling behavior work with struct columns" + + " - case-sensitive cases") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val df1 = Seq((0, UnionClass1a(0, 1L, UnionClass2(1, "2")))).toDF("id", "a") + val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a") + + var unionDf = df1.unionByName(df2, true) + checkAnswer(unionDf, + Row(0, Row(0, 1, Row(null, 1, null, "2"))) :: + Row(1, Row(1, 2, Row(2, null, 3L, null))) :: Nil) + assert(unionDf.schema.toDDL == + "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + + "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT, `c`: STRING>>") + + unionDf = df2.unionByName(df1, true) + checkAnswer(unionDf, + Row(1, Row(1, 2, Row(2, null, 3L, null))) :: + Row(0, Row(0, 1, Row(null, 1, null, "2"))) :: Nil) + assert(unionDf.schema.toDDL == + "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + + "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT, `c`: STRING>>") + + val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a") + unionDf = df2.unionByName(df3, true) + checkAnswer(unionDf, + Row(1, Row(1, 2, Row(2, null, 3L))) :: + Row(2, Row(2, 3, Row(null, 4, 5L))) :: Nil) + assert(unionDf.schema.toDDL == + "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + + "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT>>") + } + } + + test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - edge case") { + val nestedStructType1 = StructType(Seq( + StructField("b", StringType))) + val nestedStructValues1 = Row("b") + + val nestedStructType2 = StructType(Seq( + StructField("b", StringType), + StructField("a", StringType))) + val nestedStructValues2 = Row("b", "a") + + val df1: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(nestedStructValues1) :: Nil), + StructType(Seq(StructField("topLevelCol", nestedStructType1)))) + + val df2: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(nestedStructValues2) :: Nil), + StructType(Seq(StructField("topLevelCol", nestedStructType2)))) + + val union = df1.unionByName(df2, allowMissingColumns = true) + checkAnswer(union, Row(Row(null, "b")) :: Row(Row("a", "b")) :: Nil) + assert(union.schema.toDDL == "`topLevelCol` STRUCT<`a`: STRING, `b`: STRING>") + } + + test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - deep expr") { + def nestedDf(depth: Int, numColsAtEachDepth: Int): DataFrame = { + val initialNestedStructType = StructType( + (0 to numColsAtEachDepth).map(i => + StructField(s"nested${depth}Col$i", IntegerType, nullable = false)) + ) + val initialNestedValues = Row(0 to numColsAtEachDepth: _*) + + var depthCounter = depth - 1 + var structType = initialNestedStructType + var struct = initialNestedValues + while (depthCounter != 0) { + struct = Row((struct +: (1 to numColsAtEachDepth)): _*) + structType = StructType( + StructField(s"nested${depthCounter}Col0", structType, nullable = false) +: + (1 to numColsAtEachDepth).map(i => + StructField(s"nested${depthCounter}Col$i", IntegerType, nullable = false)) + ) + depthCounter -= 1 + } + + val df: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(struct) :: Nil), + StructType(Seq(StructField("nested0Col0", structType)))) + + df + } + + val df1 = nestedDf(depth = 10, numColsAtEachDepth = 1) + val df2 = nestedDf(depth = 10, numColsAtEachDepth = 20) + val union = df1.unionByName(df2, allowMissingColumns = true) + // scalastyle:off + val row1 = Row(Row(Row(Row(Row(Row(Row(Row(Row(Row( + Row(0, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), + 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null)) + val row2 = Row(Row(Row(Row(Row(Row(Row(Row(Row(Row( + Row(0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9), + 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9)) + // scalastyle:on + checkAnswer(union, row1 :: row2 :: Nil) + } } + +case class UnionClass1a(a: Int, b: Long, nested: UnionClass2) +case class UnionClass1b(a: Int, b: Long, nested: UnionClass3) +case class UnionClass1c(a: Int, b: Long, nested: UnionClass4) + +case class UnionClass2(a: Int, c: String) +case class UnionClass3(a: Int, b: Long) +case class UnionClass4(A: Int, b: Long) From 0411def0b1da22d3c15fe7c85f6296da41d5d2cd Mon Sep 17 00:00:00 2001 From: Denis Pyshev Date: Fri, 16 Oct 2020 16:32:09 -0700 Subject: [PATCH 0264/1009] [SPARK-33109][BUILD] Upgrade to sbt 1.4.0 ### What changes were proposed in this pull request? Upgrade sbt to release 1.4.0 ### Why are the changes needed? Bring built-in `dependencyTree` instead of removed `sbt-dependency-graph` plugin, that doesn't work with sbt used in build. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Should pass all the tests. Closes #30070 from gemelen/feature/sbt-1.4. Authored-by: Denis Pyshev Signed-off-by: Dongjoon Hyun --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index b1e5e313d853f..e391883fbbc2d 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.3.13 +sbt.version=1.4.0 From 911dcd39838eab742f1993776cada57e1c0f6f6c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 16 Oct 2020 21:23:21 -0700 Subject: [PATCH 0265/1009] [SPARK-33173][CORE][TESTS] Use `eventually` to check `numOnTaskFailed` in PluginContainerSuite ### What changes were proposed in this pull request? This PR aims to use `eventually` to fix the flakiness of the test case `SPARK-33088: executor failed tasks trigger plugin calls`. ### Why are the changes needed? The test case checks like the following. ```scala assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0) assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2) ``` Although first and second passed, the third can fail. - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-3.2-hive-2.3-jdk-11/lastCompletedBuild/testReport/org.apache.spark.internal.plugin/PluginContainerSuite/SPARK_33088__executor_failed_tasks_trigger_plugin_calls/ - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129919/testReport/ ``` sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: 1 did not equal 2 at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231) at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295) at org.apache.spark.internal.plugin.PluginContainerSuite.$anonfun$new$8(PluginContainerSuite.scala:161) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This only improves the robustness. Closes #30072 from dongjoon-hyun/SPARK-33173. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/internal/plugin/PluginContainerSuite.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala index e7fbe5b998a88..15966e2744491 100644 --- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala +++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala @@ -156,9 +156,11 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo case t: Throwable => // ignore exception } - assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) - assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0) - assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2) + eventually(timeout(10.seconds), interval(100.millis)) { + assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0) + assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2) + } } test("plugin initialization in non-local mode") { From 2c4599db4b5de8f7b86af4b4c4b4a43b80e82d1a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 16 Oct 2020 22:18:12 -0700 Subject: [PATCH 0266/1009] [MINOR][SS][DOCS] Update Structured Streaming guide doc and update code typo ### What changes were proposed in this pull request? This is a minor change to update structured-streaming-programming-guide and typos in code. ### Why are the changes needed? Keep the user-facing document correct and updated. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests. Closes #30074 from viirya/ss-minor. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- docs/structured-streaming-programming-guide.md | 6 ++++-- .../analysis/UnsupportedOperationChecker.scala | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index f137915eaa57c..ccd6f41f5c664 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -1763,7 +1763,9 @@ Here is the compatibility matrix. Queries with mapGroupsWithState Update - + + Aggregations not allowed in a query with mapGroupsWithState. + Queries with flatMapGroupsWithState @@ -1777,7 +1779,7 @@ Here is the compatibility matrix. Update operation mode Update - Aggregations not allowed after flatMapGroupsWithState. + Aggregations not allowed in a query with flatMapGroupsWithState. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 46f178f3a9ce2..44e8602ba7e81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -212,11 +212,11 @@ object UnsupportedOperationChecker extends Logging { case m: FlatMapGroupsWithState if m.isStreaming => // Check compatibility with output modes and aggregations in query - val aggsAfterFlatMapGroups = collectStreamingAggregates(plan) + val aggsInQuery = collectStreamingAggregates(plan) if (m.isMapGroupsWithState) { // check mapGroupsWithState // allowed only in update query output mode and without aggregation - if (aggsAfterFlatMapGroups.nonEmpty) { + if (aggsInQuery.nonEmpty) { throwError( "mapGroupsWithState is not supported with aggregation " + "on a streaming DataFrame/Dataset") @@ -225,8 +225,8 @@ object UnsupportedOperationChecker extends Logging { "mapGroupsWithState is not supported with " + s"$outputMode output mode on a streaming DataFrame/Dataset") } - } else { // check latMapGroupsWithState - if (aggsAfterFlatMapGroups.isEmpty) { + } else { // check flatMapGroupsWithState + if (aggsInQuery.isEmpty) { // flatMapGroupsWithState without aggregation: operation's output mode must // match query output mode m.outputMode match { @@ -252,7 +252,7 @@ object UnsupportedOperationChecker extends Logging { } else if (collectStreamingAggregates(m).nonEmpty) { throwError( "flatMapGroupsWithState in append mode is not supported after " + - s"aggregation on a streaming DataFrame/Dataset") + "aggregation on a streaming DataFrame/Dataset") } } } From 3010e9044e068216d7a7a9ec510453ecbb159f95 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 17 Oct 2020 21:02:25 -0700 Subject: [PATCH 0267/1009] [SPARK-33170][SQL] Add SQL config to control fast-fail behavior in FileFormatWriter ### What changes were proposed in this pull request? This patch proposes to add a config we can control fast-fail behavior in FileFormatWriter and set it false by default. ### Why are the changes needed? In SPARK-29649, we catch `FileAlreadyExistsException` in `FileFormatWriter` and fail fast for the task set to prevent task retry. Due to latest discussion, it is important to be able to keep original behavior that is to retry tasks even `FileAlreadyExistsException` is thrown, because `FileAlreadyExistsException` could be recoverable in some cases. We are going to add a config we can control this behavior and set it false for fast-fail by default. ### Does this PR introduce _any_ user-facing change? Yes. By default the task in FileFormatWriter will retry even if `FileAlreadyExistsException` is thrown. This is the behavior before Spark 3.0. User can control fast-fail behavior by enabling it. ### How was this patch tested? Unit test. Closes #30073 from viirya/SPARK-33170. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/internal/SQLConf.scala | 16 ++++++++++ .../datasources/FileFormatWriter.scala | 3 +- .../spark/sql/sources/InsertSuite.scala | 32 ++++++++++++------- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 319387fe854cf..0497c9b7e80b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2052,6 +2052,20 @@ object SQLConf { .stringConf .createWithDefault("") + val FASTFAIL_ON_FILEFORMAT_OUTPUT = + buildConf("spark.sql.execution.fastFailOnFileFormatOutput") + .internal() + .doc("Whether to fast fail task execution when writing output to FileFormat datasource. " + + "If this is enabled, in `FileFormatWriter` we will catch `FileAlreadyExistsException` " + + "and fast fail output task without further task retry. Only enabling this if you know " + + "the `FileAlreadyExistsException` of the output task is unrecoverable, i.e., further " + + "task attempts won't be able to success. If the `FileAlreadyExistsException` might be " + + "recoverable, you should keep this as disabled and let Spark to retry output tasks. " + + "This is disabled by default.") + .version("3.0.2") + .booleanConf + .createWithDefault(false) + object PartitionOverwriteMode extends Enumeration { val STATIC, DYNAMIC = Value } @@ -3336,6 +3350,8 @@ class SQLConf extends Serializable with Logging { def disabledV2StreamingMicroBatchReaders: String = getConf(DISABLED_V2_STREAMING_MICROBATCH_READERS) + def fastFailFileFormatOutput: Boolean = getConf(FASTFAIL_ON_FILEFORMAT_OUTPUT) + def concatBinaryAsString: Boolean = getConf(CONCAT_BINARY_AS_STRING) def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala index 219c778b9164a..abb88ae73cabf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, SQLExecution} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StringType import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.{SerializableConfiguration, Utils} @@ -283,7 +284,7 @@ object FileFormatWriter extends Logging { } catch { case e: FetchFailedException => throw e - case f: FileAlreadyExistsException => + case f: FileAlreadyExistsException if SQLConf.get.fastFailFileFormatOutput => // If any output file to write already exists, it does not make sense to re-run this task. // We throw the exception and let Executor throw ExceptionFailure to abort the job. throw new TaskOutputFileAlreadyExistException(f) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 32c4fb60b8c54..9b5466e8a68f1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -826,21 +826,29 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { } test("Stop task set if FileAlreadyExistsException was thrown") { - withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName, - "fs.file.impl.disable.cache" -> "true") { - withTable("t") { - sql( - """ - |CREATE TABLE t(i INT, part1 INT) USING PARQUET - |PARTITIONED BY (part1) + Seq(true, false).foreach { fastFail => + withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName, + "fs.file.impl.disable.cache" -> "true", + SQLConf.FASTFAIL_ON_FILEFORMAT_OUTPUT.key -> fastFail.toString) { + withTable("t") { + sql( + """ + |CREATE TABLE t(i INT, part1 INT) USING PARQUET + |PARTITIONED BY (part1) """.stripMargin) - val df = Seq((1, 1)).toDF("i", "part1") - val err = intercept[SparkException] { - df.write.mode("overwrite").format("parquet").insertInto("t") + val df = Seq((1, 1)).toDF("i", "part1") + val err = intercept[SparkException] { + df.write.mode("overwrite").format("parquet").insertInto("t") + } + + if (fastFail) { + assert(err.getCause.getMessage.contains("can not write to output file: " + + "org.apache.hadoop.fs.FileAlreadyExistsException")) + } else { + assert(err.getCause.getMessage.contains("Task failed while writing rows")) + } } - assert(err.getCause.getMessage.contains("can not write to output file: " + - "org.apache.hadoop.fs.FileAlreadyExistsException")) } } } From 7766a6fb5f66c6b339909ae25d7f01769f580b18 Mon Sep 17 00:00:00 2001 From: Keiji Yoshida Date: Sun, 18 Oct 2020 16:47:04 +0900 Subject: [PATCH 0268/1009] [MINOR][DOCS][EXAMPLE] Fix the Python manual_load_options_csv example ### What changes were proposed in this pull request? This pull request changes the `sep` parameter's value from `:` to `;` in the example of `examples/src/main/python/sql/datasource.py`. This code snippet is shown on the Spark SQL Guide documentation. The `sep` parameter's value should be `;` since the data in https://github.com/apache/spark/blob/master/examples/src/main/resources/people.csv is separated by `;`. ### Why are the changes needed? To fix the example code so that it can be executed properly. ### Does this PR introduce _any_ user-facing change? Yes. This code snippet is shown on the Spark SQL Guide documentation: https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html#manually-specifying-options ### How was this patch tested? By building the documentation and checking the Spark SQL Guide documentation manually in the local environment. Closes #30082 from kjmrknsn/fix-example-python-datasource. Authored-by: Keiji Yoshida Signed-off-by: HyukjinKwon --- examples/src/main/python/sql/datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py index 94a41a7e5e7b4..eecd8c2d84788 100644 --- a/examples/src/main/python/sql/datasource.py +++ b/examples/src/main/python/sql/datasource.py @@ -94,7 +94,7 @@ def basic_datasource_example(spark): # $example on:manual_load_options_csv$ df = spark.read.load("examples/src/main/resources/people.csv", - format="csv", sep=":", inferSchema="true", header="true") + format="csv", sep=";", inferSchema="true", header="true") # $example off:manual_load_options_csv$ # $example on:manual_save_options_orc$ From d2f328aba6f1d218425fe5d41bdec66dcaa33c85 Mon Sep 17 00:00:00 2001 From: Keiji Yoshida Date: Sun, 18 Oct 2020 17:13:55 +0900 Subject: [PATCH 0269/1009] [MINOR][DOCS] Fix the link to the pickle module page in RDD Programming Guide ### What changes were proposed in this pull request? This pull request changes the link to the pickle module page from https://docs.python.org/2/library/pickle.html to https://docs.python.org/3/library/pickle.html in RDD Programming Guide. ### Why are the changes needed? Since Python 2 is no longer supported and it is preferable to refer to the pickle module page of Python 3. ### Does this PR introduce _any_ user-facing change? Yes. Before: the `Pickle` link's destination page was https://docs.python.org/2/library/pickle.html After: the `Pickle` link's destination page is https://docs.python.org/3/library/pickle.html ### How was this patch tested? By building the documentation site and check the link's destination page is changed correctly in the local environment. Closes #30081 from kjmrknsn/docs-fix-pickle-link. Authored-by: Keiji Yoshida Signed-off-by: HyukjinKwon --- docs/rdd-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md index b48540dc09ece..acc682b27681b 100644 --- a/docs/rdd-programming-guide.md +++ b/docs/rdd-programming-guide.md @@ -1254,7 +1254,7 @@ storage levels is: -**Note:** *In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/2/library/pickle.html) library, +**Note:** *In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/3/library/pickle.html) library, so it does not matter whether you choose a serialized level. The available storage levels in Python include `MEMORY_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK`, `MEMORY_AND_DISK_2`, `DISK_ONLY`, `DISK_ONLY_2`, and `DISK_ONLY_3`.* From 20b7b923abc2266cf280b8623d6b5b9b277177ec Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 18 Oct 2020 09:21:07 -0700 Subject: [PATCH 0270/1009] [SPARK-33176][K8S] Use 11-jre-slim as default in K8s Dockerfile ### What changes were proposed in this pull request? This PR aims to use `openjdk:11-jre-slim` as default in K8s Dockerfile. ### Why are the changes needed? Although Apache Spark supports both Java8/Java11, there is a difference. 1. Java8-built distribution can run both Java8/Java11 2. Java11-built distribution can run on Java11, but not Java8. In short, we had better use Java11 in Dockerfile to embrace both cases without any issues. ### Does this PR introduce _any_ user-facing change? Yes. This will remove the change of user frustration when they build with JDK11 and build the image without overriding Java base image. ### How was this patch tested? Pass the K8s IT. Closes #30083 from dongjoon-hyun/SPARK-33176. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index a13fe67c9eb72..8c3db7e243d8b 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -ARG java_image_tag=8-jre-slim +ARG java_image_tag=11-jre-slim FROM openjdk:${java_image_tag} From ad99f14b4277616b681c91778eba4d9184f8eecf Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Sun, 18 Oct 2020 09:24:44 -0700 Subject: [PATCH 0271/1009] [SPARK-33109][BUILD][FOLLOW-UP] Remove the obsolete comment about bringing sbt-dependency-graph back ### What changes were proposed in this pull request? This PR proposes to remove an obsolete comment about adding the `sbt-dependency-graph` back in SBT plugins. ### Why are the changes needed? sbt-dependency-graph is now built-in from SBT 1.4.0, see https://github.com/sbt/sbt/releases/tag/v1.4.0. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually tested `./build/sbt dependencyTree`. Closes #30085 from HyukjinKwon/SPARK-33109. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- project/plugins.sbt | 2 -- 1 file changed, 2 deletions(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index 920aa677f9e92..c33a96772d5a1 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -45,5 +45,3 @@ libraryDependencies += "org.ow2.asm" % "asm-commons" % "7.2" addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2") addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0") - -// TODO(SPARK-33109) Upgrade to SBT 1.4 and support `dependencyTree` back From 97605cd1269987ed5ba3013a5f8497375ce8913e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 18 Oct 2020 09:59:50 -0700 Subject: [PATCH 0272/1009] [SPARK-33175][K8S] Detect duplicated mountPaths and fail at Spark side ### What changes were proposed in this pull request? This PR aims to detect duplicate `mountPath`s and stop the job. ### Why are the changes needed? If there is a conflict on `mountPath`, the pod is created and repeats the following error messages and keeps running. Spark job should not keep running and wasting the cluster resources. We had better fail at Spark side. ``` $ k get pod -l 'spark-role in (driver,executor)' NAME READY STATUS RESTARTS AGE tpcds 1/1 Running 0 33m ``` ``` 20/10/18 05:09:26 WARN ExecutorPodsSnapshotsStoreImpl: Exception when notifying snapshot subscriber. io.fabric8.kubernetes.client.KubernetesClientException: Failure executing: POST at: ... Message: Pod "tpcds-exec-1" is invalid: spec.containers[0].volumeMounts[1].mountPath: Invalid value: "/data1": must be unique. ... ``` **AFTER THIS PR** The job will stop with the following error message instead of keeping running. ``` 20/10/18 06:58:45 ERROR ExecutorPodsSnapshotsStoreImpl: Going to stop due to IllegalArgumentException java.lang.IllegalArgumentException: requirement failed: Found duplicated mountPath: `/data1` ``` ### Does this PR introduce _any_ user-facing change? Yes, but this is a bug fix. ### How was this patch tested? Pass the CI with the newly added test case. Closes #30084 from dongjoon-hyun/SPARK-33175-2. Lead-authored-by: Dongjoon Hyun Co-authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../features/MountVolumesFeatureStep.scala | 5 ++++ .../k8s/ExecutorPodsSnapshotsStoreImpl.scala | 3 +++ .../MountVolumesFeatureStepSuite.scala | 25 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala index e297656520200..c66756fd69116 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala @@ -49,6 +49,11 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf) private def constructVolumes( volumeSpecs: Iterable[KubernetesVolumeSpec] ): Iterable[(VolumeMount, Volume)] = { + val duplicateMountPaths = volumeSpecs.map(_.mountPath).toSeq.groupBy(identity).collect { + case (x, ys) if ys.length > 1 => s"'$x'" + } + require(duplicateMountPaths.isEmpty, + s"Found duplicated mountPath: ${duplicateMountPaths.mkString(", ")}") volumeSpecs.zipWithIndex.map { case (spec, i) => val volumeMount = new VolumeMountBuilder() .withMountPath(spec.mountPath) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala index 5c192c690eba5..3f2cb485bbb31 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala @@ -133,6 +133,9 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul snapshotsBuffer.drainTo(snapshots) onNewSnapshots(snapshots.asScala.toSeq) } catch { + case e: IllegalArgumentException => + logError("Going to stop due to IllegalArgumentException", e) + System.exit(1) case NonFatal(e) => logWarning("Exception when notifying snapshot subscriber.", e) } finally { lock.unlock() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala index e95af264d09ec..bbb89fd0a1c24 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala @@ -236,6 +236,31 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite { assert(configuredPod.container.getVolumeMounts.size() === 2) } + test("mountPath should be unique") { + val hpVolumeConf = KubernetesVolumeSpec( + "hpVolume", + "/data", + "", + false, + KubernetesHostPathVolumeConf("/hostPath/tmp") + ) + val pvcVolumeConf = KubernetesVolumeSpec( + "checkpointVolume", + "/data", + "", + true, + KubernetesPVCVolumeConf("pvcClaim") + ) + val kubernetesConf = KubernetesTestConf.createDriverConf( + volumes = Seq(hpVolumeConf, pvcVolumeConf)) + + val step = new MountVolumesFeatureStep(kubernetesConf) + val m = intercept[IllegalArgumentException] { + step.configurePod(SparkPod.initialPod()) + }.getMessage + assert(m.contains("Found duplicated mountPath: '/data'")) + } + test("Mounts subpath on emptyDir") { val volumeConf = KubernetesVolumeSpec( "testVolume", From ce498943d23e1660ba2b724e8831739f3b8a0bbf Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Mon, 19 Oct 2020 09:50:59 +0900 Subject: [PATCH 0273/1009] [SPARK-33177][SQL] CollectList and CollectSet should not be nullable ### What changes were proposed in this pull request? Mark `CollectList` and `CollectSet` as non-nullable. ### Why are the changes needed? `CollectList` and `CollectSet` SQL expressions never return null value. Marking them as non-nullable can have some performance benefits, because some optimizer rules apply only to non-nullable expressions ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Did not find any existing tests on the nullability of aggregate functions. Closes #30087 from tanelk/SPARK-33177_collect. Authored-by: tanel.kiis@gmail.com Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/expressions/aggregate/collect.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index 0a3d87623be8b..f95f44c808092 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -39,7 +39,7 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper override def children: Seq[Expression] = child :: Nil - override def nullable: Boolean = true + override def nullable: Boolean = false override def dataType: DataType = ArrayType(child.dataType, false) From f8277d3aa308d267ff0423f85ffd884480cedf59 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sun, 18 Oct 2020 19:02:21 -0700 Subject: [PATCH 0274/1009] [SPARK-32069][CORE][SQL] Improve error message on reading unexpected directory ### What changes were proposed in this pull request? Improve error message on reading unexpected directory ### Why are the changes needed? Improve error message on reading unexpected directory ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Ut Closes #30027 from AngersZhuuuu/SPARK-32069. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/rdd/HadoopRDD.scala | 4 ++++ .../sql/hive/HiveParquetSourceSuite.scala | 16 +++++++++---- .../hive/execution/HiveTableScanSuite.scala | 23 +++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index d5f21112c0c9e..5fc0b4f736d55 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -232,6 +232,10 @@ class HadoopRDD[K, V]( logWarning(s"${jobConf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" + s" partitions returned from this path.", e) Array.empty[Partition] + case e: IOException if e.getMessage.startsWith("Not a file:") => + val path = e.getMessage.split(":").map(_.trim).apply(2) + throw new IOException(s"Path: ${path} is a directory, which is not supported by the " + + s"record reader when `mapreduce.input.fileinputformat.input.dir.recursive` is false.") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index b557fe73f1154..86fc32cd8ca63 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -230,6 +230,12 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { withTempPath { path => withTable("parq_tbl1", "parq_tbl2", "parq_tbl3", "tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + + def checkErrorMsg(path: String): String = { + s"Path: ${path} is a directory, which is not supported by the record reader " + + s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false." + } + val parquetTblStatement1 = s""" |CREATE EXTERNAL TABLE parq_tbl1( @@ -287,7 +293,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { val msg = intercept[IOException] { sql("SELECT * FROM tbl1").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains(checkErrorMsg(s"$path/l1"))) } val l1DirStatement = @@ -305,7 +311,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { val msg = intercept[IOException] { sql("SELECT * FROM tbl2").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains(checkErrorMsg(s"$path/l1/l2"))) } val l2DirStatement = @@ -323,7 +329,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { val msg = intercept[IOException] { sql("SELECT * FROM tbl3").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains(checkErrorMsg(s"$path/l1/l2/l3"))) } val wildcardTopDirStatement = @@ -341,7 +347,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { val msg = intercept[IOException] { sql("SELECT * FROM tbl4").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains(checkErrorMsg(s"$path/l1/l2"))) } val wildcardL1DirStatement = @@ -359,7 +365,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { val msg = intercept[IOException] { sql("SELECT * FROM tbl5").show() }.getMessage - assert(msg.contains("Not a file:")) + assert(msg.contains(checkErrorMsg(s"$path/l1/l2/l3"))) } val wildcardL2DirStatement = diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index bdccfccbc5bdb..ba6dbb01d5901 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.execution +import java.io.{File, IOException} + import org.apache.spark.sql.Row import org.apache.spark.sql.functions.col import org.apache.spark.sql.hive.HiveUtils @@ -248,6 +250,27 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH } } + test("SPARK-32069: Improve error message on reading unexpected directory") { + withTable("t") { + withTempDir { f => + sql(s"CREATE TABLE t(i LONG) LOCATION '${f.getAbsolutePath}'") + sql("INSERT INTO t VALUES(1)") + val dir = new File(f.getCanonicalPath + "/data") + dir.mkdir() + sql("set mapreduce.input.fileinputformat.input.dir.recursive=true") + assert(sql("select * from t").collect().head.getLong(0) == 1) + sql("set mapreduce.input.fileinputformat.input.dir.recursive=false") + val e = intercept[IOException] { + sql("SELECT * FROM t").collect() + } + assert(e.getMessage.contains(s"Path: ${dir.getAbsoluteFile} is a directory, " + + s"which is not supported by the record reader " + + s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false.")) + dir.delete() + } + } + } + private def getHiveTableScanExec(query: String): HiveTableScanExec = { sql(query).queryExecution.sparkPlan.collectFirst { case p: HiveTableScanExec => p From e6c53c2c1b538d6272df4d1ca294d04f8b49bd6c Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 19 Oct 2020 14:13:37 +0900 Subject: [PATCH 0275/1009] [SPARK-33123][INFRA] Ignore GitHub only changes in Amplab Jenkins build ### What changes were proposed in this pull request? This PR aims to ignore GitHub only changes in Amplab Jenkins build. ### Why are the changes needed? This will save server resources. ### Does this PR introduce _any_ user-facing change? No, this is a dev-only change. ### How was this patch tested? Manually. I used the following doctest during testing and removed it at the clean-up. E2E tests: ``` cd dev cat test.py ``` ```python import importlib runtests = importlib.import_module("run-tests") print([x.name for x in runtests.determine_modules_for_files([".github/workflows/build_and_test.yml"])]) ``` ```python $ GITHUB_ACTIONS=1 python test.py ['root'] $ python test.py [] ``` Unittests: ```bash $ GITHUN_ACTIONS=1 python3 -m doctest dev/run-tests.py $ python3 -m doctest dev/run-tests.py ``` Closes #30020 from williamhyun/SPARK-33123. Lead-authored-by: William Hyun Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- dev/run-tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 48191e9bb024d..662ac2d6311dd 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -42,7 +42,8 @@ def determine_modules_for_files(filenames): """ Given a list of filenames, return the set of modules that contain those files. If a file is not associated with a more specific submodule, then this method will consider that - file to belong to the 'root' module. GitHub Action and Appveyor files are ignored. + file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions, + and `appveyor.yml` is always ignored because this file is dedicated only to AppVeyor builds. >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"])) ['pyspark-core', 'sql'] @@ -55,6 +56,8 @@ def determine_modules_for_files(filenames): for filename in filenames: if filename in ("appveyor.yml",): continue + if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"): + continue matched_at_least_one_module = False for module in modules.all_modules: if module.contains_file(filename): From 53783e706dde943adee978a8eeee95a6f60687bd Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 19 Oct 2020 15:54:52 +0900 Subject: [PATCH 0276/1009] [SPARK-33179][TESTS] Switch default Hadoop profile in run-tests.py ### What changes were proposed in this pull request? This PR aims to switch the default Hadoop profile from `hadoop2.7` to `hadoop3.2` in `dev/run-tests.py` when it's running in local or GitHub Action environments. ### Why are the changes needed? The default Hadoop version is 3.2. We had better be consistent. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. **BEFORE** ``` % dev/run-tests.py Cannot install SparkR as R was not found in PATH [info] Using build tool sbt with Hadoop profile hadoop2.7 and Hive profile hive2.3 under environment local ``` **AFTER** ``` % dev/run-tests.py Cannot install SparkR as R was not found in PATH [info] Using build tool sbt with Hadoop profile hadoop3.2 and Hive profile hive2.3 under environment local ``` Closes #30090 from williamhyun/SPARK-33179. Authored-by: William Hyun Signed-off-by: HyukjinKwon --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 662ac2d6311dd..5bdbc0ffb850c 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -638,7 +638,7 @@ def main(): else: # else we're running locally or Github Actions. build_tool = "sbt" - hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7") + hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop3.2") hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") if "GITHUB_ACTIONS" in os.environ: test_env = "github_actions" From 388e067a909516a9a509399fe17d79ce1fb54d31 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Mon, 19 Oct 2020 16:40:48 +0900 Subject: [PATCH 0277/1009] [SPARK-33139][SQL][FOLLOW-UP] Avoid using reflect call on session.py ### What changes were proposed in this pull request? In [SPARK-33139](https://github.com/apache/spark/pull/30042), I was using reflect "Class.forName" in python code to invoke method in SparkSession which is not recommended. using getattr to access "SparkSession$.Module$" instead. ### Why are the changes needed? Code refine. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Existing tests. Closes #30092 from leanken/leanken-SPARK-33139-followup. Authored-by: xuewei.linxuewei Signed-off-by: HyukjinKwon --- python/pyspark/sql/session.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index e6ab1ea3878f3..d724b76e3bfc3 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -230,9 +230,7 @@ def __init__(self, sparkContext, jsparkSession=None): SparkSession._instantiatedSession = self SparkSession._activeSession = self self._jvm.SparkSession.setDefaultSession(self._jsparkSession) - self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ - .getDeclaredField("MODULE$")\ - .get(None)\ + getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\ .setActiveSessionInternal(self._jsparkSession) def _repr_html_(self): @@ -564,9 +562,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr Py4JJavaError: ... """ SparkSession._activeSession = self - self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ - .getDeclaredField("MODULE$")\ - .get(None)\ + getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\ .setActiveSessionInternal(self._jsparkSession) if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") @@ -689,10 +685,7 @@ def stop(self): self._sc.stop() # We should clean the default session up. See SPARK-23228. self._jvm.SparkSession.clearDefaultSession() - self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\ - .getDeclaredField("MODULE$")\ - .get(None)\ - .clearActiveSessionInternal() + getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").clearActiveSessionInternal() SparkSession._instantiatedSession = None SparkSession._activeSession = None SQLContext._instantiatedContext = None From a7a8dae4836f455a26ba6cb3c7d733775b6af0f6 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 19 Oct 2020 17:13:47 +0900 Subject: [PATCH 0278/1009] Revert "[SPARK-33069][INFRA] Skip test result report if no JUnit XML files are found" This reverts commit a0aa8f33a9420feb9228b51a3dfad2e7e86d65a5. --- .github/workflows/test_report.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml index 060a8795b6a77..93cdb86687261 100644 --- a/.github/workflows/test_report.yml +++ b/.github/workflows/test_report.yml @@ -15,16 +15,7 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} workflow: ${{ github.event.workflow_run.workflow_id }} commit: ${{ github.event.workflow_run.head_commit.id }} - - name: Check if JUnit report XML files exist - run: | - if ls **/target/test-reports/*.xml > /dev/null 2>&1; then - echo '::set-output name=FILE_EXISTS::true' - else - echo '::set-output name=FILE_EXISTS::false' - fi - id: check-junit-file - name: Publish test report - if: steps.check-junit-file.outputs.FILE_EXISTS == 'true' uses: scacap/action-surefire-report@v1 with: check_name: Report test results From 26b13c70c312147e42db27cd986e970115a55cdd Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 19 Oct 2020 17:47:49 +0900 Subject: [PATCH 0279/1009] [SPARK-33169][SQL][TESTS] Check propagation of datasource options to underlying file system for built-in file-based datasources ### What changes were proposed in this pull request? 1. Add the common trait `CommonFileDataSourceSuite` with tests that can be executed for all built-in file-based datasources. 2. Add a test `CommonFileDataSourceSuite` to check that datasource options are propagated to underlying file systems as Hadoop configs. 3. Mix `CommonFileDataSourceSuite` to `AvroSuite`, `OrcSourceSuite`, `TextSuite`, `JsonSuite`, CSVSuite` and to `ParquetFileFormatSuite`. 4. Remove duplicated tests from `AvroSuite` and from `OrcSourceSuite`. ### Why are the changes needed? To improve test coverage and test all built-in file-based datasources. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the affected test suites. Closes #30067 from MaxGekk/ds-options-common-test. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/avro/AvroSuite.scala | 20 +++--- .../source/libsvm/LibSVMRelationSuite.scala | 28 +++++---- .../spark/sql/FileBasedDataSourceSuite.scala | 16 ----- .../CommonFileDataSourceSuite.scala | 62 +++++++++++++++++++ .../execution/datasources/csv/CSVSuite.scala | 10 ++- .../datasources/json/JsonSuite.scala | 11 +++- .../datasources/orc/OrcSourceSuite.scala | 23 ++----- .../parquet/ParquetFileFormatSuite.scala | 25 +++++++- .../datasources/text/TextSuite.scala | 5 +- 9 files changed, 136 insertions(+), 64 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index b0f2f8ed09a96..52cab880ab897 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC} import org.apache.spark.sql.execution.{FormattedMode, SparkPlan} -import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition} +import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf @@ -50,9 +50,15 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.v2.avro.AvroScan import org.apache.spark.util.Utils -abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDataSourceSuiteBase { +abstract class AvroSuite + extends QueryTest + with SharedSparkSession + with CommonFileDataSourceSuite + with NestedDataSourceSuiteBase { + import testImplicits._ + override protected def dataSourceFormat = "avro" override val nestedDataSources = Seq("avro") val episodesAvro = testFile("episodes.avro") val testAvro = testFile("test.avro") @@ -1807,16 +1813,6 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa } } - test("SPARK-33089: should propagate Hadoop config from DS options to underlying file system") { - withSQLConf( - "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, - "fs.file.impl.disable.cache" -> "true") { - val conf = Map("ds_option" -> "value") - val path = "file:" + testAvro.stripPrefix("file:") - spark.read.format("avro").options(conf).load(path) - } - } - test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") { def saveTs(dir: java.io.File): Unit = { Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF() diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala index cc0ca308cb668..a456409cfe3bc 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala @@ -27,13 +27,26 @@ import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SaveMode} -import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite import org.apache.spark.sql.types.{DoubleType, StructField, StructType} import org.apache.spark.util.Utils +class LibSVMRelationSuite + extends SparkFunSuite + with MLlibTestSparkContext + with CommonFileDataSourceSuite { + + override protected def dataSourceFormat = "libsvm" + override protected def inputDataset = { + val rawData = new java.util.ArrayList[Row]() + rawData.add(Row(1.0, Vectors.sparse(1, Seq((0, 1.0))))) + val struct = new StructType() + .add("labelFoo", DoubleType, false) + .add("featuresBar", VectorType, false) + spark.createDataFrame(rawData, struct) + } -class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with SQLHelper { // Path for dataset var path: String = _ @@ -212,13 +225,4 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with assert(v == Vectors.sparse(2, Seq((0, 2.0), (1, 3.0)))) } } - - test("SPARK-33101: should propagate Hadoop config from DS options to underlying file system") { - withSQLConf( - "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, - "fs.file.impl.disable.cache" -> "true") { - val df = spark.read.option("ds_option", "value").format("libsvm").load(path) - assert(df.columns(0) == "label") - } - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 77e07e5550f35..b27c1145181bd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -842,22 +842,6 @@ class FileBasedDataSourceSuite extends QueryTest } } - test("SPARK-31935: Hadoop file system config should be effective in data source options") { - Seq("parquet", "").foreach { format => - withSQLConf( - SQLConf.USE_V1_SOURCE_LIST.key -> format, - "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, - "fs.file.impl.disable.cache" -> "true") { - withTempDir { dir => - val path = "file:" + dir.getCanonicalPath.stripPrefix("file:") - spark.range(10).write.option("ds_option", "value").mode("overwrite").parquet(path) - checkAnswer( - spark.read.option("ds_option", "value").parquet(path), spark.range(10).toDF()) - } - } - } - } - test("SPARK-31116: Select nested schema with case insensitive mode") { // This test case failed at only Parquet. ORC is added for test coverage parity. Seq("orc", "parquet").foreach { format => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala new file mode 100644 index 0000000000000..b7d0a7fc306ad --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.spark.sql.{Dataset, Encoders, FakeFileSystemRequiringDSOption, SparkSession} +import org.apache.spark.sql.catalyst.plans.SQLHelper + +/** + * The trait contains tests for all file-based data sources. + * The tests that are not applicable to all file-based data sources should be placed to + * [[org.apache.spark.sql.FileBasedDataSourceSuite]]. + */ +trait CommonFileDataSourceSuite extends SQLHelper { self: AnyFunSuite => + + protected def spark: SparkSession + protected def dataSourceFormat: String + protected def inputDataset: Dataset[_] = spark.createDataset(Seq("abc"))(Encoders.STRING) + + test(s"Propagate Hadoop configs from $dataSourceFormat options to underlying file system") { + withSQLConf( + "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, + "fs.file.impl.disable.cache" -> "true") { + Seq(false, true).foreach { mergeSchema => + withTempPath { dir => + val path = dir.getAbsolutePath + val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString) + inputDataset + .write + .options(conf) + .format(dataSourceFormat) + .save(path) + Seq(path, "file:" + path.stripPrefix("file:")).foreach { p => + val readback = spark + .read + .options(conf) + .format(dataSourceFormat) + .load(p) + // Checks that read doesn't throw the exception from `FakeFileSystemRequiringDSOption` + readback.write.mode("overwrite").format("noop").save() + } + } + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 066259075d6bf..a236814fdcdcd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -36,13 +36,21 @@ import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.{SparkConf, SparkException, TestUtils} import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row} import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ -abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData { +abstract class CSVSuite + extends QueryTest + with SharedSparkSession + with TestCsvData + with CommonFileDataSourceSuite { + import testImplicits._ + override protected def dataSourceFormat = "csv" + private val carsFile = "test-data/cars.csv" private val carsMalformedFile = "test-data/cars-malformed.csv" private val carsFile8859 = "test-data/cars_iso-8859-1.csv" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index d9270024d5b28..76e05a2ed6ed7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.{functions => F, _} import org.apache.spark.sql.catalyst.json._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.ExternalRDD -import org.apache.spark.sql.execution.datasources.{DataSource, InMemoryFileIndex, NoopCache} +import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, InMemoryFileIndex, NoopCache} import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -49,9 +49,16 @@ class TestFileFilter extends PathFilter { override def accept(path: Path): Boolean = path.getParent.getName != "p=2" } -abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData { +abstract class JsonSuite + extends QueryTest + with SharedSparkSession + with TestJsonData + with CommonFileDataSourceSuite { + import testImplicits._ + override protected def dataSourceFormat = "json" + test("Type promotion") { def checkTypePromotion(expected: Any, actual: Any): Unit = { assert(expected.getClass == actual.getClass, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 1242b8c693d64..4c489bdcc649e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -32,8 +32,8 @@ import org.apache.orc.impl.RecordReaderImpl import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SPARK_VERSION_SHORT, SparkException} -import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SPARK_VERSION_METADATA_KEY} -import org.apache.spark.sql.execution.datasources.SchemaMergeUtils +import org.apache.spark.sql.{Row, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, SchemaMergeUtils} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{LongType, StructField, StructType} @@ -41,9 +41,11 @@ import org.apache.spark.util.Utils case class OrcData(intField: Int, stringField: String) -abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { +abstract class OrcSuite extends OrcTest with BeforeAndAfterAll with CommonFileDataSourceSuite { import testImplicits._ + override protected def dataSourceFormat = "orc" + var orcTableDir: File = null var orcTableAsDir: File = null @@ -537,21 +539,6 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } } - - test("SPARK-33094: should propagate Hadoop config from DS options to underlying file system") { - withSQLConf( - "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName, - "fs.file.impl.disable.cache" -> "true") { - Seq(false, true).foreach { mergeSchema => - withTempPath { dir => - val path = dir.getAbsolutePath - val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString) - spark.range(1).write.options(conf).orc(path) - checkAnswer(spark.read.options(conf).orc(path), Row(0)) - } - } - } - } } class OrcSourceSuite extends OrcSuite with SharedSparkSession { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala index e65f4d12bf7f2..c52b57eb31e4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala @@ -19,12 +19,19 @@ package org.apache.spark.sql.execution.datasources.parquet import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.spark.SparkException +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession -class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSparkSession { +abstract class ParquetFileFormatSuite + extends QueryTest + with ParquetTest + with SharedSparkSession + with CommonFileDataSourceSuite { + + override protected def dataSourceFormat = "parquet" test("read parquet footers in parallel") { def testReadFooters(ignoreCorruptFiles: Boolean): Unit = { @@ -57,3 +64,17 @@ class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSpark assert(exception.getMessage().contains("Could not read footer for file")) } } + +class ParquetFileFormatV1Suite extends ParquetFileFormatSuite { + override protected def sparkConf: SparkConf = + super + .sparkConf + .set(SQLConf.USE_V1_SOURCE_LIST, "parquet") +} + +class ParquetFileFormatV2Suite extends ParquetFileFormatSuite { + override protected def sparkConf: SparkConf = + super + .sparkConf + .set(SQLConf.USE_V1_SOURCE_LIST, "") +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index 7e97994476694..1eb32ed285799 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -26,14 +26,17 @@ import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode} +import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.util.Utils -abstract class TextSuite extends QueryTest with SharedSparkSession { +abstract class TextSuite extends QueryTest with SharedSparkSession with CommonFileDataSourceSuite { import testImplicits._ + override protected def dataSourceFormat = "text" + test("reading text file") { verifyFrame(spark.read.format("text").load(testFile)) } From 66c5e0132209a5a94f9d7efb5e895f143b0ef53b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 19 Oct 2020 10:35:34 -0700 Subject: [PATCH 0280/1009] [SPARK-32941][SQL] Optimize UpdateFields expression chain and put the rule early in Analysis phase ### What changes were proposed in this pull request? This patch proposes to add more optimization to `UpdateFields` expression chain. And optimize `UpdateFields` early in analysis phase. ### Why are the changes needed? `UpdateFields` can manipulate complex nested data, but using `UpdateFields` can easily create inefficient expression chain. We should optimize it further. Because when manipulating deeply nested schema, the `UpdateFields` expression tree could be too complex to analyze, this change optimizes `UpdateFields` early in analysis phase. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #29812 from viirya/SPARK-32941. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/Analyzer.scala | 6 ++ .../sql/catalyst/analysis/ResolveUnion.scala | 11 +--- .../sql/catalyst/optimizer/ComplexTypes.scala | 7 +- .../sql/catalyst/optimizer/Optimizer.scala | 2 +- .../sql/catalyst/optimizer/UpdateFields.scala | 57 +++++++++++++++-- ...te.scala => OptimizeWithFieldsSuite.scala} | 64 +++++++++++++++++-- .../optimizer/complexTypesSuite.scala | 6 +- 7 files changed, 129 insertions(+), 24 deletions(-) rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{CombineUpdateFieldsSuite.scala => OptimizeWithFieldsSuite.scala} (51%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0ba150ec1efb4..4264627e0d9bd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.objects._ +import org.apache.spark.sql.catalyst.optimizer.OptimizeUpdateFields import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ @@ -207,6 +208,11 @@ class Analyzer( lazy val batches: Seq[Batch] = Seq( Batch("Substitution", fixedPoint, + // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule. + // However, when manipulating deeply nested schema, `UpdateFields` expression tree could be + // very complex and make analysis impossible. Thus we need to optimize `UpdateFields` early + // at the beginning of analysis. + OptimizeUpdateFields, CTESubstitution, WindowsSubstitution, EliminateUnions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala index c1a9c9d3d9bab..b08e116642ece 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.optimizer.CombineUnions +import org.apache.spark.sql.catalyst.optimizer.{CombineUnions, OptimizeUpdateFields} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf @@ -88,13 +88,6 @@ object ResolveUnion extends Rule[LogicalPlan] { } } - def simplifyWithFields(expr: Expression): Expression = { - expr.transformUp { - case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) => - UpdateFields(struct, fieldOps1 ++ fieldOps2) - } - } - /** * Adds missing fields recursively into given `col` expression, based on the target `StructType`. * This is called by `compareAndAddFields` when we find two struct columns with same name but @@ -119,7 +112,7 @@ object ResolveUnion extends Rule[LogicalPlan] { missingFieldsOpt.map { s => val struct = addFieldsInto(col, s.fields) // Combines `WithFields`s to reduce expression tree. - val reducedStruct = simplifyWithFields(struct) + val reducedStruct = struct.transformUp(OptimizeUpdateFields.optimizeUpdateFields) val sorted = sortStructFieldsInWithFields(reducedStruct) sorted }.get diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala index 860219e55b052..2ac8f62b67b3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala @@ -46,7 +46,12 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] { // if the struct itself is null, then any value extracted from it (expr) will be null // so we don't need to wrap expr in If(IsNull(struct), Literal(null, expr.dataType), expr) case expr: GetStructField if expr.child.semanticEquals(structExpr) => expr - case expr => If(IsNull(structExpr), Literal(null, expr.dataType), expr) + case expr => + if (structExpr.nullable) { + If(IsNull(structExpr), Literal(null, expr.dataType), expr) + } else { + expr + } } // Remove redundant array indexing. case GetArrayStructFields(CreateArray(elems, useStringTypeWhenEmpty), field, ordinal, _, _) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 7586bdf4392f5..3e9a97419682d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -109,7 +109,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveRedundantAliases, UnwrapCastInBinaryComparison, RemoveNoopOperators, - CombineUpdateFields, + OptimizeUpdateFields, SimplifyExtractValueOps, OptimizeJsonExprs, CombineConcats) ++ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala index c7154210e0c62..465d2efe2775c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala @@ -17,19 +17,68 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.UpdateFields +import java.util.Locale + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.expressions.{Expression, UpdateFields, WithField} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.internal.SQLConf /** - * Combines all adjacent [[UpdateFields]] expression into a single [[UpdateFields]] expression. + * Optimizes [[UpdateFields]] expression chains. */ -object CombineUpdateFields extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { +object OptimizeUpdateFields extends Rule[LogicalPlan] { + private def canOptimize(names: Seq[String]): Boolean = { + if (SQLConf.get.caseSensitiveAnalysis) { + names.distinct.length != names.length + } else { + names.map(_.toLowerCase(Locale.ROOT)).distinct.length != names.length + } + } + + val optimizeUpdateFields: PartialFunction[Expression, Expression] = { + case UpdateFields(structExpr, fieldOps) + if fieldOps.forall(_.isInstanceOf[WithField]) && + canOptimize(fieldOps.map(_.asInstanceOf[WithField].name)) => + val caseSensitive = SQLConf.get.caseSensitiveAnalysis + + val withFields = fieldOps.map(_.asInstanceOf[WithField]) + val names = withFields.map(_.name) + val values = withFields.map(_.valExpr) + + val newNames = mutable.ArrayBuffer.empty[String] + val newValues = mutable.ArrayBuffer.empty[Expression] + + if (caseSensitive) { + names.zip(values).reverse.foreach { case (name, value) => + if (!newNames.contains(name)) { + newNames += name + newValues += value + } + } + } else { + val nameSet = mutable.HashSet.empty[String] + names.zip(values).reverse.foreach { case (name, value) => + val lowercaseName = name.toLowerCase(Locale.ROOT) + if (!nameSet.contains(lowercaseName)) { + newNames += name + newValues += value + nameSet += lowercaseName + } + } + } + + val newWithFields = newNames.reverse.zip(newValues.reverse).map(p => WithField(p._1, p._2)) + UpdateFields(structExpr, newWithFields.toSeq) + case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) => UpdateFields(struct, fieldOps1 ++ fieldOps2) } + + def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions(optimizeUpdateFields) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala similarity index 51% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala index ff9c60a2fa5bd..b093b39cc4b88 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala @@ -19,19 +19,21 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, UpdateFields, WithField} +import org.apache.spark.sql.catalyst.expressions.{Alias, GetStructField, Literal, UpdateFields, WithField} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.internal.SQLConf - -class CombineUpdateFieldsSuite extends PlanTest { +class OptimizeWithFieldsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { - val batches = Batch("CombineUpdateFields", FixedPoint(10), CombineUpdateFields) :: Nil + val batches = Batch("OptimizeUpdateFields", FixedPoint(10), + OptimizeUpdateFields, SimplifyExtractValueOps) :: Nil } private val testRelation = LocalRelation('a.struct('a1.int)) + private val testRelation2 = LocalRelation('a.struct('a1.int).notNull) test("combines two adjacent UpdateFields Expressions") { val originalQuery = testRelation @@ -70,4 +72,58 @@ class CombineUpdateFieldsSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-32941: optimize WithFields followed by GetStructField") { + val originalQuery = testRelation2 + .select(Alias( + GetStructField(UpdateFields('a, + WithField("b1", Literal(4)) :: Nil), 1), "out")()) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation2 + .select(Alias(Literal(4), "out")()) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("SPARK-32941: optimize WithFields chain - case insensitive") { + val originalQuery = testRelation + .select( + Alias(UpdateFields('a, + WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(), + Alias(UpdateFields('a, + WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")()) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .select( + Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(), + Alias(UpdateFields('a, WithField("B1", Literal(5)) :: Nil), "out2")()) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("SPARK-32941: optimize WithFields chain - case sensitive") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val originalQuery = testRelation + .select( + Alias(UpdateFields('a, + WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(), + Alias(UpdateFields('a, + WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")()) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .select( + Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(), + Alias( + UpdateFields('a, + WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")()) + .analyze + + comparePlans(optimized, correctAnswer) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index d9cefdaf3fe70..9878969959bfd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -44,7 +44,7 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { BooleanSimplification, SimplifyConditionals, SimplifyBinaryComparison, - CombineUpdateFields, + OptimizeUpdateFields, SimplifyExtractValueOps) :: Nil } @@ -698,7 +698,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { val expected = structLevel2.select( UpdateFields('a1, Seq( // scalastyle:off line.size.limit - WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: Nil)), WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil)) // scalastyle:on line.size.limit )).as("a1")) @@ -732,7 +731,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { structLevel2.select( UpdateFields('a1, Seq( - WithField("a2", repeatedExpr), WithField("a2", UpdateFields( If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr), WithField("c3", Literal(3)) :: Nil)) @@ -763,7 +761,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { val expected = structLevel2.select( UpdateFields('a1, Seq( - WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3")))), WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3"), DropField("c3")))) )).as("a1")) @@ -797,7 +794,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { structLevel2.select( UpdateFields('a1, Seq( - WithField("a2", repeatedExpr), WithField("a2", UpdateFields( If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr), DropField("c3") :: Nil)) From 6ad75cda1eb9704ca1fd1539ea80454d66681965 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 19 Oct 2020 12:50:01 -0700 Subject: [PATCH 0281/1009] [SPARK-17333][PYSPARK] Enable mypy ### What changes were proposed in this pull request? Add MyPy to the CI. Once this is installed on the CI: https://issues.apache.org/jira/browse/SPARK-32797?jql=project%20%3D%20SPARK%20AND%20text%20~%20mypy this wil automatically check the types. ### Why are the changes needed? We should check if the types are still correct on the CI. ``` MacBook-Pro-van-Fokko:spark fokkodriesprong$ ./dev/lint-python starting python compilation test... python compilation succeeded. starting pycodestyle test... pycodestyle checks passed. starting flake8 test... flake8 checks passed. starting mypy test... mypy checks passed. The sphinx-build command was not found. Skipping Sphinx build for now. all lint-python tests passed! ``` ### Does this PR introduce _any_ user-facing change? No :) ### How was this patch tested? By running `./dev/lint-python` locally. Closes #30088 from Fokko/SPARK-17333. Authored-by: Fokko Driesprong Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 2 +- .gitignore | 1 + dev/lint-python | 27 ++++++++++++++++++++++++++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index cdbe34129637e..762e22f24cc2b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -287,7 +287,7 @@ jobs: run: | # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx + pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy - name: Install R 4.0 uses: r-lib/actions/setup-r@v1 with: diff --git a/.gitignore b/.gitignore index 2e4f77ad6fb42..82414d1fa35bf 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ python/docs/source/reference/api/ python/test_coverage/coverage_data python/test_coverage/htmlcov python/pyspark/python +.mypy_cache/ reports/ scalastyle-on-compile.generated.xml scalastyle-output.xml diff --git a/dev/lint-python b/dev/lint-python index 21949e5d8e4e7..62664818dc106 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -18,7 +18,7 @@ # define test binaries + versions FLAKE8_BUILD="flake8" MINIMUM_FLAKE8="3.5.0" - +MYPY_BUILD="mypy" PYCODESTYLE_BUILD="pycodestyle" MINIMUM_PYCODESTYLE="2.6.0" @@ -122,6 +122,30 @@ function pycodestyle_test { fi } +function mypy_test { + local MYPY_REPORT= + local MYPY_STATUS= + + if ! hash "$MYPY_BUILD" 2> /dev/null; then + echo "The $MYPY_BUILD command was not found. Skipping for now." + return + fi + + echo "starting $MYPY_BUILD test..." + MYPY_REPORT=$( ($MYPY_BUILD --config-file python/mypy.ini python/pyspark) 2>&1) + MYPY_STATUS=$? + + if [ "$MYPY_STATUS" -ne 0 ]; then + echo "mypy checks failed:" + echo "$MYPY_REPORT" + echo "$MYPY_STATUS" + exit "$MYPY_STATUS" + else + echo "mypy checks passed." + echo + fi +} + function flake8_test { local FLAKE8_VERSION= local EXPECTED_FLAKE8= @@ -246,6 +270,7 @@ PYTHON_SOURCE="$(find . -name "*.py")" compile_python_test "$PYTHON_SOURCE" pycodestyle_test "$PYTHON_SOURCE" flake8_test +mypy_test sphinx_test echo From f65a24412b6691ecdb4254e70d6e7abc846edb66 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Tue, 20 Oct 2020 10:23:58 +0900 Subject: [PATCH 0282/1009] [SPARK-33181][SQL][DOCS] Document Load Table Directly from File in SQL Select Reference ### What changes were proposed in this pull request? Add the link to the feature: "Run SQL on files directly" to SQL reference documentation page ### Why are the changes needed? To make SQL Reference complete ### Does this PR introduce _any_ user-facing change? yes. Previously, reading in sql from file directly is not included in the documentation: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html, not listed in from_items. The new link is added to the select statement documentation, like the below: ![image](https://user-images.githubusercontent.com/16770242/96517999-c34f3900-121e-11eb-8d56-c4ba0432855e.png) ![image](https://user-images.githubusercontent.com/16770242/96518808-8126f700-1220-11eb-8c98-fb398eee0330.png) ### How was this patch tested? Manually built and tested Closes #30095 from liaoaoyuan97/master. Authored-by: liaoaoyuan97 Signed-off-by: HyukjinKwon --- docs/sql-ref-syntax-qry-select.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md index 453737a2f36b8..655766d4c6d22 100644 --- a/docs/sql-ref-syntax-qry-select.md +++ b/docs/sql-ref-syntax-qry-select.md @@ -85,6 +85,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] } * [Table-value function](sql-ref-syntax-qry-select-tvf.html) * [Inline table](sql-ref-syntax-qry-select-inline-table.html) * Subquery + * [File](sql-data-sources-load-save-functions.html#run-sql-on-files-directly) * **PIVOT** From 35133901f79209bd5e6e3e17531095d0ecae737d Mon Sep 17 00:00:00 2001 From: Nan Zhu Date: Tue, 20 Oct 2020 11:13:16 +0900 Subject: [PATCH 0283/1009] [SPARK-32351][SQL] Show partially pushed down partition filters in explain() ### What changes were proposed in this pull request? Currently, actual non-dynamic partition pruning is executed in the optimizer phase (PruneFileSourcePartitions) if an input relation has a catalog file index. The current code assumes the same partition filters are generated again in FileSourceStrategy and passed into FileSourceScanExec. FileSourceScanExec uses the partition filters when listing files, but these non-dynamic partition filters do nothing because unnecessary partitions are already pruned in advance, so the filters are mainly used for explain output in this case. If a WHERE clause has DNF-ed predicates, FileSourceStrategy cannot extract the same filters with PruneFileSourcePartitions and then PartitionFilters is not shown in explain output. This patch proposes to extract partition filters in FileSourceStrategy and HiveStrategy with `extractPredicatesWithinOutputSet` added in https://github.com/apache/spark/pull/29101/files#diff-6be42cfa3c62a7536b1eb1d6447c073c again, then It will show the partially pushed down partition filter in explain(). ### Why are the changes needed? without the patch, the explained plan is inconsistent with what is actually executed without the change the explained plan of `"SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)"` for datasource and hive tables are like the following respectively (missing pushed down partition filters) ``` == Physical Plan == *(1) Filter ((p#21 = 1) OR ((p#21 = 2) AND (i#20 = 1))) +- *(1) ColumnarToRow +- FileScan parquet default.t[i#20,p#21] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/nanzhu/code/spark/sql/hive/target/tmp/hive_execution_test_group/war..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` ``` == Physical Plan == *(1) Filter ((p#33 = 1) OR ((p#33 = 2) AND (i#32 = 1))) +- Scan hive default.t [i#32, p#33], HiveTableRelation [`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [i#32], Partition Cols: [p#33], Pruned Partitions: [(p=1), (p=2)]] ``` with change the plan looks like (the actually executed partition filters are exhibited) ``` == Physical Plan == *(1) Filter ((p#21 = 1) OR ((p#21 = 2) AND (i#20 = 1))) +- *(1) ColumnarToRow +- FileScan parquet default.t[i#20,p#21] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/nanzhu/code/spark/sql/hive/target/tmp/hive_execution_test_group/war..., PartitionFilters: [((p#21 = 1) OR (p#21 = 2))], PushedFilters: [], ReadSchema: struct ``` ``` == Physical Plan == *(1) Filter ((p#37 = 1) OR ((p#37 = 2) AND (i#36 = 1))) +- Scan hive default.t [i#36, p#37], HiveTableRelation [`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [i#36], Partition Cols: [p#37], Pruned Partitions: [(p=1), (p=2)]], [((p#37 = 1) OR (p#37 = 2))] ``` ### Does this PR introduce _any_ user-facing change no ### How was this patch tested? Unit test. Closes #29831 from CodingCat/SPARK-32351. Lead-authored-by: Nan Zhu Co-authored-by: Nan Zhu Signed-off-by: HyukjinKwon --- .../datasources/DataSourceStrategy.scala | 17 +++++- .../datasources/FileSourceStrategy.scala | 12 ++-- .../spark/sql/hive/HiveStrategies.scala | 17 +++--- .../PruneHiveTablePartitionsSuite.scala | 17 +++++- .../execution/PrunePartitionSuiteBase.scala | 59 +++++++++++++++---- 5 files changed, 93 insertions(+), 29 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 86e85719272e8..9d666fc3a063e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -42,6 +42,7 @@ import org.apache.spark.sql.connector.catalog.SupportsRead import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.command._ +import org.apache.spark.sql.execution.datasources.FileSourceStrategy.{extractPredicatesWithinOutputSet, logInfo} import org.apache.spark.sql.execution.streaming.StreamingRelation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy @@ -467,7 +468,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with } } -object DataSourceStrategy { +object DataSourceStrategy extends PredicateHelper { /** * The attribute name may differ from the one in the schema if the query analyzer * is case insensitive. We should change attribute names to match the ones in the schema, @@ -484,6 +485,20 @@ object DataSourceStrategy { } } + def getPushedDownFilters( + partitionColumns: Seq[Expression], + normalizedFilters: Seq[Expression]): ExpressionSet = { + if (partitionColumns.isEmpty) { + ExpressionSet(Nil) + } else { + val partitionSet = AttributeSet(partitionColumns) + val predicates = ExpressionSet(normalizedFilters + .flatMap(extractPredicatesWithinOutputSet(_, partitionSet))) + logInfo(s"Pruning directories with: ${predicates.mkString(",")}") + predicates + } + } + private def translateLeafNodeFilter( predicate: Expression, pushableColumn: PushableColumnBase): Option[Filter] = predicate match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 7928f6e0f9324..1191f99cc98a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -154,15 +154,11 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging { l.resolve( fsRelation.partitionSchema, fsRelation.sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) - val partitionKeyFilters = if (partitionColumns.isEmpty) { - ExpressionSet(Nil) - } else { - val predicates = ExpressionSet(normalizedFilters - .filter(_.references.subsetOf(partitionSet))) - logInfo(s"Pruning directories with: ${predicates.mkString(",")}") - predicates - } + // this partitionKeyFilters should be the same with the ones being executed in + // PruneFileSourcePartitions + val partitionKeyFilters = DataSourceStrategy.getPushedDownFilters(partitionColumns, + normalizedFilters) // subquery expressions are filtered out because they can't be used to prune buckets or pushed // down as data filters, yet they would be executed diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 97e1dee5913a4..2ace96583d9cc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSchema import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils} -import org.apache.spark.sql.execution.datasources.CreateTable +import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy} import org.apache.spark.sql.hive.execution._ import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -256,20 +256,21 @@ private[hive] trait HiveStrategies { */ object HiveTableScans extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { - case ScanOperation(projectList, predicates, relation: HiveTableRelation) => + case ScanOperation(projectList, filters, relation: HiveTableRelation) => // Filter out all predicates that only deal with partition keys, these are given to the // hive table scan operator to be used for partition pruning. val partitionKeyIds = AttributeSet(relation.partitionCols) - val (pruningPredicates, otherPredicates) = predicates.partition { predicate => - !predicate.references.isEmpty && - predicate.references.subsetOf(partitionKeyIds) - } + val normalizedFilters = DataSourceStrategy.normalizeExprs( + filters.filter(_.deterministic), relation.output) + + val partitionKeyFilters = DataSourceStrategy.getPushedDownFilters(relation.partitionCols, + normalizedFilters) pruneFilterProject( projectList, - otherPredicates, + filters.filter(f => f.references.isEmpty || !f.references.subsetOf(partitionKeyIds)), identity[Seq[Expression]], - HiveTableScanExec(_, relation, pruningPredicates)(sparkSession)) :: Nil + HiveTableScanExec(_, relation, partitionKeyFilters.toSeq)(sparkSession)) :: Nil case _ => Nil } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala index 06aea084330fa..018df35403be5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala @@ -75,7 +75,22 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase { } val scale = 20 val predicate = (1 to scale).map(i => s"(p0 = '$i' AND p1 = '$i')").mkString(" OR ") - assertPrunedPartitions(s"SELECT * FROM t WHERE $predicate", scale) + val expectedStr = { + // left + "(((((((`p0` = 1) && (`p1` = 1)) || ((`p0` = 2) && (`p1` = 2))) ||" + + " ((`p0` = 3) && (`p1` = 3))) || (((`p0` = 4) && (`p1` = 4)) ||" + + " ((`p0` = 5) && (`p1` = 5)))) || (((((`p0` = 6) && (`p1` = 6)) ||" + + " ((`p0` = 7) && (`p1` = 7))) || ((`p0` = 8) && (`p1` = 8))) ||" + + " (((`p0` = 9) && (`p1` = 9)) || ((`p0` = 10) && (`p1` = 10))))) ||" + + // right + " ((((((`p0` = 11) && (`p1` = 11)) || ((`p0` = 12) && (`p1` = 12))) ||" + + " ((`p0` = 13) && (`p1` = 13))) || (((`p0` = 14) && (`p1` = 14)) ||" + + " ((`p0` = 15) && (`p1` = 15)))) || (((((`p0` = 16) && (`p1` = 16)) ||" + + " ((`p0` = 17) && (`p1` = 17))) || ((`p0` = 18) && (`p1` = 18))) ||" + + " (((`p0` = 19) && (`p1` = 19)) || ((`p0` = 20) && (`p1` = 20))))))" + } + assertPrunedPartitions(s"SELECT * FROM t WHERE $predicate", scale, + expectedStr) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala index d088061cdc6e5..993a730524f6f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, EqualTo, Expression, IsNotNull, Literal} +import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils @@ -46,30 +47,66 @@ abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with } assertPrunedPartitions( - "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2) + "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2, + "((`p` = '1') || (`p` = '2'))") assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4) + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4, + "") assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2) + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2, + "((`p` = '1') || (`p` = '3'))") assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3) + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3, + "((`p` = '1') || ((`p` = '2') || (`p` = '3')))") assertPrunedPartitions( - "SELECT * FROM t", 4) + "SELECT * FROM t", 4, + "") assertPrunedPartitions( - "SELECT * FROM t WHERE p = '1' AND i = 2", 1) + "SELECT * FROM t WHERE p = '1' AND i = 2", 1, + "(`p` = '1')") assertPrunedPartitions( """ |SELECT i, COUNT(1) FROM ( |SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1) |) tmp GROUP BY i - """.stripMargin, 2) + """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))") } } } - protected def assertPrunedPartitions(query: String, expected: Long): Unit = { - val plan = sql(query).queryExecution.sparkPlan - assert(getScanExecPartitionSize(plan) == expected) + private def getCleanStringRepresentation(exp: Expression): String = exp match { + case attr: AttributeReference => + attr.sql.replaceAll("spark_catalog.default.t.", "") + case l: Literal => + l.sql + case e: BinaryOperator => + s"(${getCleanStringRepresentation(e.left)} ${e.symbol} " + + s"${getCleanStringRepresentation(e.right)})" + } + + protected def assertPrunedPartitions( + query: String, + expectedPartitionCount: Long, + expectedPushedDownFilters: String): Unit = { + val qe = sql(query).queryExecution + val plan = qe.sparkPlan + assert(getScanExecPartitionSize(plan) == expectedPartitionCount) + + val pushedDownPartitionFilters = qe.executedPlan.collectFirst { + case scan: FileSourceScanExec => scan.partitionFilters + case scan: HiveTableScanExec => scan.partitionPruningPred + }.map(exps => exps.filterNot(e => e.isInstanceOf[IsNotNull])) + val pushedFilters = pushedDownPartitionFilters.map(filters => { + filters.foldLeft("")((currentStr, exp) => { + if (currentStr == "") { + s"${getCleanStringRepresentation(exp)}" + } else { + s"$currentStr AND ${getCleanStringRepresentation(exp)}" + } + }) + }) + + assert(pushedFilters == Some(expectedPushedDownFilters)) } protected def getScanExecPartitionSize(plan: SparkPlan): Long From a44e008de3ae5aecad9e0f1a7af6a1e8b0d97f4e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 20 Oct 2020 14:58:59 +0900 Subject: [PATCH 0284/1009] [SPARK-33160][SQL] Allow saving/loading INT96 in parquet w/o rebasing ### What changes were proposed in this pull request? 1. Add the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` to control timestamps rebasing in saving them as INT96. It supports the same set of values as `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` but the default value is `LEGACY` to preserve backward compatibility with Spark <= 3.0. 2. Write the metadata key `org.apache.spark.int96NoRebase` to parquet files if the files are saved with `spark.sql.legacy.parquet.int96RebaseModeInWrite` isn't set to `LEGACY`. 3. Add the SQL config `spark.sql.legacy.parquet.datetimeRebaseModeInRead` to control loading INT96 timestamps when parquet metadata doesn't have enough info (the `org.apache.spark.int96NoRebase` tag) about parquet writer - either INT96 was written by Proleptic Gregorian system or some Julian one. 4. Modified Vectorized and Parquet-mr Readers to support loading/saving INT96 timestamps w/o rebasing depending on SQL config and the metadata tag: - **No rebasing** in testing when the SQL config `spark.test.forceNoRebase` is set to `true` - **No rebasing** if parquet metadata contains the tag `org.apache.spark.int96NoRebase`. This is the case when parquet files are saved by Spark >= 3.1 with `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` is set to `CORRECTED`, or saved by other systems with the tag `org.apache.spark.int96NoRebase`. - **With rebasing** if parquet files saved by Spark (any versions) without the metadata tag `org.apache.spark.int96NoRebase`. - Rebasing depend on the SQL config `spark.sql.legacy.parquet.datetimeRebaseModeInRead` if there are no metadata tags `org.apache.spark.version` and `org.apache.spark.int96NoRebase`. New SQL configs are added instead of re-using existing `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` and `spark.sql.legacy.parquet.datetimeRebaseModeInRead` because of: - To allow users have different modes for INT96 and for TIMESTAMP_MICROS (MILLIS). For example, users might want to save INT96 as LEGACY but TIMESTAMP_MICROS as CORRECTED. - To have different modes for INT96 and DATE in load (or in save). - To be backward compatible with Spark 2.4. For now, `spark.sql.legacy.parquet.datetimeRebaseModeInWrite/Read` are set to `EXCEPTION` by default. ### Why are the changes needed? 1. Parquet spec says that INT96 must be stored as Julian days (see https://github.com/apache/parquet-format/pull/49). This doesn't mean that a reader ( or a writer) is based on the Julian calendar. So, rebasing from Proleptic Gregorian to Julian calendar can be not needed. 2. Rebasing from/to Julian calendar can loose information because dates in one calendar don't exist in another one. Like 1582-10-04..1582-10-15 exist in Proleptic Gregorian calendar but not in the hybrid calendar (Julian + Gregorian), and visa versa, Julian date 1000-02-29 doesn't exist in Proleptic Gregorian calendar. We should allow users to save timestamps without loosing such dates (rebasing shifts such dates to the next valid date). 3. It would also make Spark compatible with other systems such as Impala and newer versions of Hive that write proleptic Gregorian based INT96 timestamps. ### Does this PR introduce _any_ user-facing change? It can when `spark.sql.legacy.parquet.int96RebaseModeInWrite` is set non-default value `LEGACY`. ### How was this patch tested? - Added a test to check the metadata key `org.apache.spark.int96NoRebase` - By `ParquetIOSuite` Closes #30056 from MaxGekk/parquet-rebase-int96. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../sql/catalyst/util/DateTimeUtils.scala | 5 +- .../apache/spark/sql/internal/SQLConf.scala | 29 ++++ .../catalyst/util/DateTimeUtilsSuite.scala | 9 +- .../parquet/VectorizedColumnReader.java | 124 ++++++++++++++---- .../VectorizedParquetRecordReader.java | 23 +++- .../datasources/DataSourceUtils.scala | 38 ++++-- .../parquet/ParquetFileFormat.scala | 9 +- .../parquet/ParquetReadSupport.scala | 12 +- .../parquet/ParquetRecordMaterializer.scala | 13 +- .../parquet/ParquetRowConverter.scala | 14 +- .../parquet/ParquetWriteSupport.scala | 17 ++- .../ParquetPartitionReaderFactory.scala | 24 +++- .../scala/org/apache/spark/sql/package.scala | 6 + .../datasources/parquet/ParquetIOSuite.scala | 92 ++++++++++--- 14 files changed, 327 insertions(+), 88 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index f0dab8095fc75..ff6b106d93d1d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -169,8 +169,7 @@ object DateTimeUtils { */ def fromJulianDay(days: Int, nanos: Long): Long = { // use Long to avoid rounding errors - val micros = (days - JULIAN_DAY_OF_EPOCH).toLong * MICROS_PER_DAY + nanos / NANOS_PER_MICROS - rebaseJulianToGregorianMicros(micros) + (days - JULIAN_DAY_OF_EPOCH).toLong * MICROS_PER_DAY + nanos / NANOS_PER_MICROS } /** @@ -179,7 +178,7 @@ object DateTimeUtils { * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive). */ def toJulianDay(micros: Long): (Int, Long) = { - val julianUs = rebaseGregorianToJulianMicros(micros) + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY + val julianUs = micros + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY val days = julianUs / MICROS_PER_DAY val us = julianUs % MICROS_PER_DAY (days.toInt, MICROSECONDS.toNanos(us)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0497c9b7e80b8..3648615a1eaee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2654,6 +2654,20 @@ object SQLConf { .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) + val LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE = + buildConf("spark.sql.legacy.parquet.int96RebaseModeInWrite") + .internal() + .doc("When LEGACY, which is the default, Spark will rebase INT96 timestamps from " + + "Proleptic Gregorian calendar to the legacy hybrid (Julian + Gregorian) calendar when " + + "writing Parquet files. When CORRECTED, Spark will not do rebase and write the timestamps" + + " as it is. When EXCEPTION, Spark will fail the writing if it sees ancient timestamps " + + "that are ambiguous between the two calendars.") + .version("3.1.0") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) + .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + val LEGACY_PARQUET_REBASE_MODE_IN_READ = buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead") .internal() @@ -2669,6 +2683,21 @@ object SQLConf { .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) + val LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ = + buildConf("spark.sql.legacy.parquet.int96RebaseModeInRead") + .internal() + .doc("When LEGACY, which is the default, Spark will rebase INT96 timestamps from " + + "the legacy hybrid (Julian + Gregorian) calendar to Proleptic Gregorian calendar when " + + "reading Parquet files. When CORRECTED, Spark will not do rebase and read the timestamps " + + "as it is. When EXCEPTION, Spark will fail the reading if it sees ancient timestamps " + + "that are ambiguous between the two calendars. This config is only effective if the " + + "writer info (like Spark, Hive) of the Parquet files is unknown.") + .version("3.1.0") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) + .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + val LEGACY_AVRO_REBASE_MODE_IN_WRITE = buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite") .internal() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index fe761f672c041..7bbdf44d78c3c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ +import org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseJulianToGregorianMicros import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { @@ -70,17 +71,17 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { } test("us and julian day") { - val (d, ns) = toJulianDay(0) + val (d, ns) = toJulianDay(RebaseDateTime.rebaseGregorianToJulianMicros(0)) assert(d === JULIAN_DAY_OF_EPOCH) assert(ns === 0) - assert(fromJulianDay(d, ns) == 0L) + assert(rebaseJulianToGregorianMicros(fromJulianDay(d, ns)) == 0L) Seq(Timestamp.valueOf("2015-06-11 10:10:10.100"), Timestamp.valueOf("2015-06-11 20:10:10.100"), Timestamp.valueOf("1900-06-11 20:10:10.100")).foreach { t => - val (d, ns) = toJulianDay(fromJavaTimestamp(t)) + val (d, ns) = toJulianDay(RebaseDateTime.rebaseGregorianToJulianMicros(fromJavaTimestamp(t))) assert(ns > 0) - val t1 = toJavaTimestamp(fromJulianDay(d, ns)) + val t1 = toJavaTimestamp(rebaseJulianToGregorianMicros(fromJulianDay(d, ns))) assert(t.equals(t1)) } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java index 3e409ab9a50a1..1b8b18d4d8735 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java @@ -104,13 +104,15 @@ public class VectorizedColumnReader { private final ZoneId convertTz; private static final ZoneId UTC = ZoneOffset.UTC; private final String datetimeRebaseMode; + private final String int96RebaseMode; public VectorizedColumnReader( ColumnDescriptor descriptor, OriginalType originalType, PageReader pageReader, ZoneId convertTz, - String datetimeRebaseMode) throws IOException { + String datetimeRebaseMode, + String int96RebaseMode) throws IOException { this.descriptor = descriptor; this.pageReader = pageReader; this.convertTz = convertTz; @@ -136,6 +138,9 @@ public VectorizedColumnReader( assert "LEGACY".equals(datetimeRebaseMode) || "EXCEPTION".equals(datetimeRebaseMode) || "CORRECTED".equals(datetimeRebaseMode); this.datetimeRebaseMode = datetimeRebaseMode; + assert "LEGACY".equals(int96RebaseMode) || "EXCEPTION".equals(int96RebaseMode) || + "CORRECTED".equals(int96RebaseMode); + this.int96RebaseMode = int96RebaseMode; } /** @@ -189,10 +194,13 @@ static int rebaseDays(int julianDays, final boolean failIfRebase) { } } - static long rebaseMicros(long julianMicros, final boolean failIfRebase) { + private static long rebaseTimestamp( + long julianMicros, + final boolean failIfRebase, + final String format) { if (failIfRebase) { if (julianMicros < RebaseDateTime.lastSwitchJulianTs()) { - throw DataSourceUtils.newRebaseExceptionInRead("Parquet"); + throw DataSourceUtils.newRebaseExceptionInRead(format); } else { return julianMicros; } @@ -201,6 +209,14 @@ static long rebaseMicros(long julianMicros, final boolean failIfRebase) { } } + static long rebaseMicros(long julianMicros, final boolean failIfRebase) { + return rebaseTimestamp(julianMicros, failIfRebase, "Parquet"); + } + + static long rebaseInt96(long julianMicros, final boolean failIfRebase) { + return rebaseTimestamp(julianMicros, failIfRebase, "Parquet INT96"); + } + /** * Reads `total` values from this columnReader into column. */ @@ -399,20 +415,44 @@ private void decodeDictionaryIds( break; case INT96: if (column.dataType() == DataTypes.TimestampType) { + final boolean failIfRebase = "EXCEPTION".equals(int96RebaseMode); if (!shouldConvertTimestamps()) { - for (int i = rowId; i < rowId + num; ++i) { - if (!column.isNullAt(i)) { - Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); - column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v)); + if ("CORRECTED".equals(int96RebaseMode)) { + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNullAt(i)) { + Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); + column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v)); + } + } + } else { + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNullAt(i)) { + Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); + long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(v); + long gregorianMicros = rebaseInt96(julianMicros, failIfRebase); + column.putLong(i, gregorianMicros); + } } } } else { - for (int i = rowId; i < rowId + num; ++i) { - if (!column.isNullAt(i)) { - Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); - long rawTime = ParquetRowConverter.binaryToSQLTimestamp(v); - long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC); - column.putLong(i, adjTime); + if ("CORRECTED".equals(int96RebaseMode)) { + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNullAt(i)) { + Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); + long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(v); + long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC); + column.putLong(i, adjTime); + } + } + } else { + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNullAt(i)) { + Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i)); + long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(v); + long gregorianMicros = rebaseInt96(julianMicros, failIfRebase); + long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC); + column.putLong(i, adjTime); + } } } } @@ -577,25 +617,53 @@ private void readBinaryBatch(int rowId, int num, WritableColumnVector column) th || DecimalType.isByteArrayDecimalType(column.dataType())) { defColumn.readBinarys(num, column, rowId, maxDefLevel, data); } else if (column.dataType() == DataTypes.TimestampType) { + final boolean failIfRebase = "EXCEPTION".equals(int96RebaseMode); if (!shouldConvertTimestamps()) { - for (int i = 0; i < num; i++) { - if (defColumn.readInteger() == maxDefLevel) { - // Read 12 bytes for INT96 - long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); - column.putLong(rowId + i, rawTime); - } else { - column.putNull(rowId + i); + if ("CORRECTED".equals(int96RebaseMode)) { + for (int i = 0; i < num; i++) { + if (defColumn.readInteger() == maxDefLevel) { + // Read 12 bytes for INT96 + long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); + column.putLong(rowId + i, gregorianMicros); + } else { + column.putNull(rowId + i); + } + } + } else { + for (int i = 0; i < num; i++) { + if (defColumn.readInteger() == maxDefLevel) { + // Read 12 bytes for INT96 + long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); + long gregorianMicros = rebaseInt96(julianMicros, failIfRebase); + column.putLong(rowId + i, gregorianMicros); + } else { + column.putNull(rowId + i); + } } } } else { - for (int i = 0; i < num; i++) { - if (defColumn.readInteger() == maxDefLevel) { - // Read 12 bytes for INT96 - long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); - long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC); - column.putLong(rowId + i, adjTime); - } else { - column.putNull(rowId + i); + if ("CORRECTED".equals(int96RebaseMode)) { + for (int i = 0; i < num; i++) { + if (defColumn.readInteger() == maxDefLevel) { + // Read 12 bytes for INT96 + long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); + long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC); + column.putLong(rowId + i, adjTime); + } else { + column.putNull(rowId + i); + } + } + } else { + for (int i = 0; i < num; i++) { + if (defColumn.readInteger() == maxDefLevel) { + // Read 12 bytes for INT96 + long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)); + long gregorianMicros = rebaseInt96(julianMicros, failIfRebase); + long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC); + column.putLong(rowId + i, adjTime); + } else { + column.putNull(rowId + i); + } } } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java index b40cc154d76fe..9d38a74a2956a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java @@ -93,6 +93,11 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa */ private final String datetimeRebaseMode; + /** + * The mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian calendar. + */ + private final String int96RebaseMode; + /** * columnBatch object that is used for batch decoding. This is created on first use and triggers * batched decoding. It is not valid to interleave calls to the batched interface with the row @@ -122,16 +127,21 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa private final MemoryMode MEMORY_MODE; public VectorizedParquetRecordReader( - ZoneId convertTz, String datetimeRebaseMode, boolean useOffHeap, int capacity) { + ZoneId convertTz, + String datetimeRebaseMode, + String int96RebaseMode, + boolean useOffHeap, + int capacity) { this.convertTz = convertTz; this.datetimeRebaseMode = datetimeRebaseMode; + this.int96RebaseMode = int96RebaseMode; MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP; this.capacity = capacity; } // For test only. public VectorizedParquetRecordReader(boolean useOffHeap, int capacity) { - this(null, "CORRECTED", useOffHeap, capacity); + this(null, "CORRECTED", "LEGACY", useOffHeap, capacity); } /** @@ -320,8 +330,13 @@ private void checkEndOfRowGroup() throws IOException { columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; - columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(), - pages.getPageReader(columns.get(i)), convertTz, datetimeRebaseMode); + columnReaders[i] = new VectorizedColumnReader( + columns.get(i), + types.get(i).getOriginalType(), + pages.getPageReader(columns.get(i)), + convertTz, + datetimeRebaseMode, + int96RebaseMode); } totalCountLoadedSoFar += pages.getRowCount(); } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index b4308a872bb39..f8068a634977b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -26,7 +26,7 @@ import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.SparkUpgradeException -import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.util.RebaseDateTime @@ -111,13 +111,26 @@ object DataSourceUtils { }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) } - def newRebaseExceptionInRead(format: String): SparkUpgradeException = { - val config = if (format == "Parquet") { - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key - } else if (format == "Avro") { - SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key + def int96RebaseMode( + lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + LegacyBehaviorPolicy.CORRECTED + } else if (lookupFileMeta(SPARK_INT96_NO_REBASE) != null) { + LegacyBehaviorPolicy.CORRECTED + } else if (lookupFileMeta(SPARK_VERSION_METADATA_KEY) != null) { + LegacyBehaviorPolicy.LEGACY } else { - throw new IllegalStateException("unrecognized format " + format) + LegacyBehaviorPolicy.withName(modeByConfig) + } + } + + def newRebaseExceptionInRead(format: String): SparkUpgradeException = { + val config = format match { + case "Parquet INT96" => SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key + case "Parquet" => SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key + case "Avro" => SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key + case _ => throw new IllegalStateException("unrecognized format " + format) } new SparkUpgradeException("3.0", "reading dates before 1582-10-15 or timestamps before " + s"1900-01-01T00:00:00Z from $format files can be ambiguous, as the files may be written by " + @@ -129,12 +142,11 @@ object DataSourceUtils { } def newRebaseExceptionInWrite(format: String): SparkUpgradeException = { - val config = if (format == "Parquet") { - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key - } else if (format == "Avro") { - SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key - } else { - throw new IllegalStateException("unrecognized format " + format) + val config = format match { + case "Parquet INT96" => SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key + case "Parquet" => SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key + case "Avro" => SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key + case _ => throw new IllegalStateException("unrecognized format " + format) } new SparkUpgradeException("3.0", "writing dates before 1582-10-15 or timestamps before " + s"1900-01-01T00:00:00Z into $format files can be dangerous, as the files may be read by " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 68f49f9442579..95f19f9dcee64 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -303,6 +303,9 @@ class ParquetFileFormat val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode( footerFileMetaData.getKeyValueMetaData.get, SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ)) + val int96RebaseMode = DataSourceUtils.int96RebaseMode( + footerFileMetaData.getKeyValueMetaData.get, + SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ)) val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) val hadoopAttemptContext = @@ -318,6 +321,7 @@ class ParquetFileFormat val vectorizedReader = new VectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, + int96RebaseMode.toString, enableOffHeapColumnVector && taskContext.isDefined, capacity) val iter = new RecordReaderIterator(vectorizedReader) @@ -336,7 +340,10 @@ class ParquetFileFormat logDebug(s"Falling back to parquet-mr") // ParquetRecordReader returns InternalRow val readSupport = new ParquetReadSupport( - convertTz, enableVectorizedReader = false, datetimeRebaseMode) + convertTz, + enableVectorizedReader = false, + datetimeRebaseMode, + int96RebaseMode) val reader = if (pushed.isDefined && enableRecordFilter) { val parquetFilter = FilterCompat.get(pushed.get, null) new ParquetRecordReader[InternalRow](readSupport, parquetFilter) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index a30d1c26b3b2d..e74872da0829d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -54,7 +54,8 @@ import org.apache.spark.sql.types._ class ParquetReadSupport( val convertTz: Option[ZoneId], enableVectorizedReader: Boolean, - datetimeRebaseMode: LegacyBehaviorPolicy.Value) + datetimeRebaseMode: LegacyBehaviorPolicy.Value, + int96RebaseMode: LegacyBehaviorPolicy.Value) extends ReadSupport[InternalRow] with Logging { private var catalystRequestedSchema: StructType = _ @@ -62,7 +63,11 @@ class ParquetReadSupport( // We need a zero-arg constructor for SpecificParquetRecordReaderBase. But that is only // used in the vectorized reader, where we get the convertTz/rebaseDateTime value directly, // and the values here are ignored. - this(None, enableVectorizedReader = true, datetimeRebaseMode = LegacyBehaviorPolicy.CORRECTED) + this( + None, + enableVectorizedReader = true, + datetimeRebaseMode = LegacyBehaviorPolicy.CORRECTED, + int96RebaseMode = LegacyBehaviorPolicy.LEGACY) } /** @@ -131,7 +136,8 @@ class ParquetReadSupport( ParquetReadSupport.expandUDT(catalystRequestedSchema), new ParquetToSparkSchemaConverter(conf), convertTz, - datetimeRebaseMode) + datetimeRebaseMode, + int96RebaseMode) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala index bb528d548b6ef..80763ef019b01 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala @@ -35,17 +35,26 @@ import org.apache.spark.sql.types.StructType * @param convertTz the optional time zone to convert to int96 data * @param datetimeRebaseMode the mode of rebasing date/timestamp from Julian to Proleptic Gregorian * calendar + * @param int96RebaseMode the mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian + * calendar */ private[parquet] class ParquetRecordMaterializer( parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetToSparkSchemaConverter, convertTz: Option[ZoneId], - datetimeRebaseMode: LegacyBehaviorPolicy.Value) + datetimeRebaseMode: LegacyBehaviorPolicy.Value, + int96RebaseMode: LegacyBehaviorPolicy.Value) extends RecordMaterializer[InternalRow] { private val rootConverter = new ParquetRowConverter( - schemaConverter, parquetSchema, catalystSchema, convertTz, datetimeRebaseMode, NoopUpdater) + schemaConverter, + parquetSchema, + catalystSchema, + convertTz, + datetimeRebaseMode, + int96RebaseMode, + NoopUpdater) override def getCurrentRecord: InternalRow = rootConverter.currentRecord diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index e0008ed16d56d..6ef56af927129 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -123,6 +123,8 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd * @param convertTz the optional time zone to convert to int96 data * @param datetimeRebaseMode the mode of rebasing date/timestamp from Julian to Proleptic Gregorian * calendar + * @param int96RebaseMode the mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian + * calendar * @param updater An updater which propagates converted field values to the parent container */ private[parquet] class ParquetRowConverter( @@ -131,6 +133,7 @@ private[parquet] class ParquetRowConverter( catalystType: StructType, convertTz: Option[ZoneId], datetimeRebaseMode: LegacyBehaviorPolicy.Value, + int96RebaseMode: LegacyBehaviorPolicy.Value, updater: ParentContainerUpdater) extends ParquetGroupConverter(updater) with Logging { @@ -187,6 +190,9 @@ private[parquet] class ParquetRowConverter( private val timestampRebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInRead( datetimeRebaseMode, "Parquet") + private val int96RebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInRead( + int96RebaseMode, "Parquet INT96") + // Converters for each field. private[this] val fieldConverters: Array[Converter with HasParentContainerUpdater] = { // (SPARK-31116) Use case insensitive map if spark.sql.caseSensitive is false @@ -300,9 +306,10 @@ private[parquet] class ParquetRowConverter( new ParquetPrimitiveConverter(updater) { // Converts nanosecond timestamps stored as INT96 override def addBinary(value: Binary): Unit = { - val rawTime = ParquetRowConverter.binaryToSQLTimestamp(value) - val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, ZoneOffset.UTC)) - .getOrElse(rawTime) + val julianMicros = ParquetRowConverter.binaryToSQLTimestamp(value) + val gregorianMicros = int96RebaseFunc(julianMicros) + val adjTime = convertTz.map(DateTimeUtils.convertTz(gregorianMicros, _, ZoneOffset.UTC)) + .getOrElse(gregorianMicros) updater.setLong(adjTime) } } @@ -363,6 +370,7 @@ private[parquet] class ParquetRowConverter( t, convertTz, datetimeRebaseMode, + int96RebaseMode, wrappedUpdater) case t => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala index 6c333671d59cb..b538c2f2493d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala @@ -31,7 +31,7 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer} import org.apache.spark.SPARK_VERSION_SHORT import org.apache.spark.internal.Logging -import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.SpecializedGetters import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -88,6 +88,12 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { private val timestampRebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInWrite( datetimeRebaseMode, "Parquet") + private val int96RebaseMode = LegacyBehaviorPolicy.withName( + SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE)) + + private val int96RebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInWrite( + int96RebaseMode, "Parquet INT96") + override def init(configuration: Configuration): WriteContext = { val schemaString = configuration.get(ParquetWriteSupport.SPARK_ROW_SCHEMA) this.schema = StructType.fromString(schemaString) @@ -115,6 +121,12 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { } else { None } + } ++ { + if (int96RebaseMode == LegacyBehaviorPolicy.LEGACY) { + None + } else { + Some(SPARK_INT96_NO_REBASE -> "") + } } logInfo( @@ -193,7 +205,8 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { outputTimestampType match { case SQLConf.ParquetOutputTimestampType.INT96 => (row: SpecializedGetters, ordinal: Int) => - val (julianDay, timeOfDayNanos) = DateTimeUtils.toJulianDay(row.getLong(ordinal)) + val micros = int96RebaseFunc(row.getLong(ordinal)) + val (julianDay, timeOfDayNanos) = DateTimeUtils.toJulianDay(micros) val buf = ByteBuffer.wrap(timestampBuffer) buf.order(ByteOrder.LITTLE_ENDIAN).putLong(timeOfDayNanos).putInt(julianDay) recordConsumer.addBinary(Binary.fromReusedByteArray(timestampBuffer)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala index 3b482b0c8ab62..e4d5e9b2d9f6d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala @@ -119,6 +119,7 @@ case class ParquetPartitionReaderFactory( buildReaderFunc: ( ParquetInputSplit, InternalRow, TaskAttemptContextImpl, Option[FilterPredicate], Option[ZoneId], + LegacyBehaviorPolicy.Value, LegacyBehaviorPolicy.Value) => RecordReader[Void, T]): RecordReader[Void, T] = { val conf = broadcastedConf.value.value @@ -174,8 +175,17 @@ case class ParquetPartitionReaderFactory( val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode( footerFileMetaData.getKeyValueMetaData.get, SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ)) + val int96RebaseMode = DataSourceUtils.int96RebaseMode( + footerFileMetaData.getKeyValueMetaData.get, + SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ)) val reader = buildReaderFunc( - split, file.partitionValues, hadoopAttemptContext, pushed, convertTz, datetimeRebaseMode) + split, + file.partitionValues, + hadoopAttemptContext, + pushed, + convertTz, + datetimeRebaseMode, + int96RebaseMode) reader.initialize(split, hadoopAttemptContext) reader } @@ -190,12 +200,16 @@ case class ParquetPartitionReaderFactory( hadoopAttemptContext: TaskAttemptContextImpl, pushed: Option[FilterPredicate], convertTz: Option[ZoneId], - datetimeRebaseMode: LegacyBehaviorPolicy.Value): RecordReader[Void, InternalRow] = { + datetimeRebaseMode: LegacyBehaviorPolicy.Value, + int96RebaseMode: LegacyBehaviorPolicy.Value): RecordReader[Void, InternalRow] = { logDebug(s"Falling back to parquet-mr") val taskContext = Option(TaskContext.get()) // ParquetRecordReader returns InternalRow val readSupport = new ParquetReadSupport( - convertTz, enableVectorizedReader = false, datetimeRebaseMode) + convertTz, + enableVectorizedReader = false, + datetimeRebaseMode, + int96RebaseMode) val reader = if (pushed.isDefined && enableRecordFilter) { val parquetFilter = FilterCompat.get(pushed.get, null) new ParquetRecordReader[InternalRow](readSupport, parquetFilter) @@ -221,11 +235,13 @@ case class ParquetPartitionReaderFactory( hadoopAttemptContext: TaskAttemptContextImpl, pushed: Option[FilterPredicate], convertTz: Option[ZoneId], - datetimeRebaseMode: LegacyBehaviorPolicy.Value): VectorizedParquetRecordReader = { + datetimeRebaseMode: LegacyBehaviorPolicy.Value, + int96RebaseMode: LegacyBehaviorPolicy.Value): VectorizedParquetRecordReader = { val taskContext = Option(TaskContext.get()) val vectorizedReader = new VectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, + int96RebaseMode.toString, enableOffHeapColumnVector && taskContext.isDefined, capacity) val iter = new RecordReaderIterator(vectorizedReader) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index c0397010acba3..011be6d69c576 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -60,4 +60,10 @@ package object sql { * values. */ private[sql] val SPARK_LEGACY_DATETIME = "org.apache.spark.legacyDateTime" + + /** + * Parquet file metadata key to indicate that the file with INT96 column type was written + * without rebasing. + */ + private[sql] val SPARK_INT96_NO_REBASE = "org.apache.spark.int96NoRebase" } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index ff406f7bc62de..214f36a2df713 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -951,7 +951,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession rowFunc: Int => (String, String), toJavaType: String => T, checkDefaultLegacyRead: String => Unit, - tsOutputType: String = "TIMESTAMP_MICROS"): Unit = { + tsOutputType: String = "TIMESTAMP_MICROS", + inWriteConf: String = SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key, + inReadConf: String = SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key): Unit = { withTempPaths(2) { paths => paths.foreach(_.delete()) val path2_4 = getResourceParquetFilePath("test-data/" + fileName) @@ -962,18 +964,20 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsOutputType) { checkDefaultLegacyRead(path2_4) // By default we should fail to write ancient datetime values. - val e = intercept[SparkException](df.write.parquet(path3_0)) - assert(e.getCause.getCause.getCause.isInstanceOf[SparkUpgradeException]) - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) { + if (tsOutputType != "INT96") { + val e = intercept[SparkException](df.write.parquet(path3_0)) + assert(e.getCause.getCause.getCause.isInstanceOf[SparkUpgradeException]) + } + withSQLConf(inWriteConf -> CORRECTED.toString) { df.write.mode("overwrite").parquet(path3_0) } - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { + withSQLConf(inWriteConf -> LEGACY.toString) { df.write.parquet(path3_0_rebase) } } // For Parquet files written by Spark 3.0, we know the writer info and don't need the // config to guide the rebase behavior. - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key -> LEGACY.toString) { + withSQLConf(inReadConf -> LEGACY.toString) { checkAnswer( spark.read.format("parquet").load(path2_4, path3_0, path3_0_rebase), (0 until N).flatMap { i => @@ -1015,15 +1019,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession java.sql.Timestamp.valueOf, checkDefaultRead, tsOutputType = "TIMESTAMP_MILLIS") - // INT96 is a legacy timestamp format and we always rebase the seconds for it. + } + } + Seq( + "2_4_5" -> successInRead _, + "2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) => + withAllParquetReaders { Seq("plain", "dict").foreach { enc => - checkAnswer(readResourceParquetFile( - s"test-data/before_1582_timestamp_int96_${enc}_v$version.snappy.parquet"), - Seq.tabulate(N) { i => - Row( - java.sql.Timestamp.valueOf("1001-01-01 01:02:03.123456"), - java.sql.Timestamp.valueOf(s"1001-01-0${i + 1} 01:02:03.123456")) - }) + checkReadMixedFiles( + s"before_1582_timestamp_int96_${enc}_v$version.snappy.parquet", + "timestamp", + (i: Int) => ("1001-01-01 01:02:03.123456", s"1001-01-0${i + 1} 01:02:03.123456"), + java.sql.Timestamp.valueOf, + checkDefaultRead, + tsOutputType = "INT96", + inWriteConf = SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key, + inReadConf = SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key) } } } @@ -1033,15 +1044,31 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession val N = 8 Seq(false, true).foreach { dictionaryEncoding => Seq( - ("TIMESTAMP_MILLIS", "1001-01-01 01:02:03.123", "1001-01-07 01:09:05.123"), - ("TIMESTAMP_MICROS", "1001-01-01 01:02:03.123456", "1001-01-07 01:09:05.123456"), - ("INT96", "1001-01-01 01:02:03.123456", "1001-01-01 01:02:03.123456") - ).foreach { case (outType, tsStr, nonRebased) => + ( + "TIMESTAMP_MILLIS", + "1001-01-01 01:02:03.123", + "1001-01-07 01:09:05.123", + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key, + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key), + ( + "TIMESTAMP_MICROS", + "1001-01-01 01:02:03.123456", + "1001-01-07 01:09:05.123456", + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key, + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key), + ( + "INT96", + "1001-01-01 01:02:03.123456", + "1001-01-07 01:09:05.123456", + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key, + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key + ) + ).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf) => withClue(s"output type $outType") { withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) { withTempPath { dir => val path = dir.getAbsolutePath - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { + withSQLConf(inWriteConf -> LEGACY.toString) { Seq.tabulate(N)(_ => tsStr).toDF("tsS") .select($"tsS".cast("timestamp").as("ts")) .repartition(1) @@ -1054,8 +1081,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession // The file metadata indicates if it needs rebase or not, so we can always get the // correct result regardless of the "rebase mode" config. Seq(LEGACY, CORRECTED, EXCEPTION).foreach { mode => - withSQLConf( - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key -> mode.toString) { + withSQLConf(inReadConf -> mode.toString) { checkAnswer( spark.read.parquet(path), Seq.tabulate(N)(_ => Row(Timestamp.valueOf(tsStr)))) @@ -1136,6 +1162,30 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } } + + test("SPARK-33160: write the metadata key 'org.apache.spark.int96NoRebase'") { + def saveTs(dir: java.io.File): Unit = { + Seq(Timestamp.valueOf("1000-01-01 01:02:03")).toDF() + .repartition(1) + .write + .parquet(dir.getAbsolutePath) + } + withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { + withTempPath { dir => + saveTs(dir) + assert(getMetaData(dir).get(SPARK_INT96_NO_REBASE).isEmpty) + } + } + withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) { + withTempPath { dir => + saveTs(dir) + assert(getMetaData(dir)(SPARK_INT96_NO_REBASE) === "") + } + } + withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) { + withTempPath { dir => intercept[SparkException] { saveTs(dir) } } + } + } } class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext) From fbb68436203627186e4070cac674707283c9dcc2 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Tue, 20 Oct 2020 15:14:38 +0900 Subject: [PATCH 0285/1009] [SPARK-32229][SQL] Fix PostgresConnectionProvider and MSSQLConnectionProvider by accessing wrapped driver ### What changes were proposed in this pull request? Postgres and MSSQL connection providers are not able to get custom `appEntry` because under some circumstances the driver is wrapped with `DriverWrapper`. Such case is not handled in the mentioned providers. In this PR I've added this edge case handling by passing unwrapped `Driver` from `JdbcUtils`. ### Why are the changes needed? `DriverWrapper` is not considered. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing + additional unit tests. Closes #30024 from gaborgsomogyi/SPARK-32229. Authored-by: Gabor Somogyi Signed-off-by: Takeshi Yamamuro --- .../datasources/jdbc/DriverRegistry.scala | 11 +++++++ .../datasources/jdbc/JdbcUtils.scala | 12 ++----- .../jdbc/DriverRegistrySuite.scala | 29 ++++++++++++++++ .../jdbc/connection/TestDriver.scala | 33 +++++++++++++++++++ 4 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala index 530d836d9fde3..3444d03beff5d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.jdbc import java.sql.{Driver, DriverManager} +import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.internal.Logging @@ -58,5 +59,15 @@ object DriverRegistry extends Logging { } } } + + def get(className: String): Driver = { + DriverManager.getDrivers.asScala.collectFirst { + case d: DriverWrapper if d.wrapped.getClass.getCanonicalName == className => d.wrapped + case d if d.getClass.getCanonicalName == className => d + }.getOrElse { + throw new IllegalStateException( + s"Did not find registered driver with class $className") + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 202f2e03b68d8..24e380e3be3e1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql.execution.datasources.jdbc -import java.sql.{Connection, Driver, DriverManager, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException} +import java.sql.{Connection, Driver, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException} import java.util.Locale -import scala.collection.JavaConverters._ import scala.util.Try import scala.util.control.NonFatal @@ -56,17 +55,10 @@ object JdbcUtils extends Logging { val driverClass: String = options.driverClass () => { DriverRegistry.register(driverClass) - val driver: Driver = DriverManager.getDrivers.asScala.collectFirst { - case d: DriverWrapper if d.wrapped.getClass.getCanonicalName == driverClass => d - case d if d.getClass.getCanonicalName == driverClass => d - }.getOrElse { - throw new IllegalStateException( - s"Did not find registered driver with class $driverClass") - } + val driver: Driver = DriverRegistry.get(driverClass) val connection = ConnectionProvider.create(driver, options.parameters) require(connection != null, s"The driver could not open a JDBC connection. Check the URL: ${options.url}") - connection } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala new file mode 100644 index 0000000000000..51dbdacb5e0fe --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.jdbc + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.execution.datasources.jdbc.connection.TestDriver + +class DriverRegistrySuite extends SparkFunSuite { + test("SPARK-32229: get must give back wrapped driver if wrapped") { + val className = classOf[TestDriver].getName + DriverRegistry.register(className) + assert(DriverRegistry.get(className).isInstanceOf[TestDriver]) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala new file mode 100644 index 0000000000000..6b57a95ed458b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.jdbc.connection + +import java.sql.{Connection, Driver, DriverPropertyInfo} +import java.util.Properties +import java.util.logging.Logger + +private[jdbc] class TestDriver() extends Driver { + override def connect(url: String, info: Properties): Connection = null + override def acceptsURL(url: String): Boolean = false + override def getPropertyInfo(url: String, info: Properties): Array[DriverPropertyInfo] = + Array.empty + override def getMajorVersion: Int = 0 + override def getMinorVersion: Int = 0 + override def jdbcCompliant(): Boolean = false + override def getParentLogger: Logger = null +} From eb9966b70055a67dd02451c78ec205d913a38a42 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 20 Oct 2020 17:35:09 +0900 Subject: [PATCH 0286/1009] [SPARK-33190][INFRA][TESTS] Set upper bound of PyArrow version in GitHub Actions ### What changes were proposed in this pull request? PyArrow is uploaded into PyPI today (https://pypi.org/project/pyarrow/), and some tests fail with PyArrow 2.0.0+: ``` ====================================================================== ERROR [0.774s]: test_grouped_over_window_with_key (pyspark.sql.tests.test_pandas_grouped_map.GroupedMapInPandasTests) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/sql/tests/test_pandas_grouped_map.py", line 595, in test_grouped_over_window_with_key .select('id', 'result').collect() File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 588, in collect sock_info = self._jdf.collectToPython() File "/__w/spark/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ answer, self.gateway_client, self.target_id, self.name) File "/__w/spark/spark/python/pyspark/sql/utils.py", line 117, in deco raise converted from None pyspark.sql.utils.PythonException: An exception was thrown from the Python worker. Please see the stack trace below. Traceback (most recent call last): File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 601, in main process() File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 593, in process serializer.dump_stream(out_iter, outfile) File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 255, in dump_stream return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream) File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 81, in dump_stream for batch in iterator: File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 248, in init_stream_yield_batches for series in iterator: File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 426, in mapper return f(keys, vals) File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 170, in return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))] File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 158, in wrapped result = f(key, pd.concat(value_series, axis=1)) File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/util.py", line 68, in wrapper return f(*args, **kwargs) File "/__w/spark/spark/python/pyspark/sql/tests/test_pandas_grouped_map.py", line 590, in f "{} != {}".format(expected_key[i][1], window_range) AssertionError: {'start': datetime.datetime(2018, 3, 15, 0, 0), 'end': datetime.datetime(2018, 3, 20, 0, 0)} != {'start': datetime.datetime(2018, 3, 15, 0, 0, tzinfo=), 'end': datetime.datetime(2018, 3, 20, 0, 0, tzinfo=)} ``` https://github.com/apache/spark/runs/1278917457 This PR proposes to set the upper bound of PyArrow in GitHub Actions build. This should be removed when we properly support PyArrow 2.0.0+ (SPARK-33189). ### Why are the changes needed? To make build pass. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions in this build will test it out. Closes #30098 from HyukjinKwon/hot-fix-test. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 762e22f24cc2b..a1c99fd21a466 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -136,7 +136,7 @@ jobs: - name: Install Python packages (Python 3.8) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner + python3.8 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner python3.8 -m pip list # SparkR - name: Install R 4.0 @@ -239,7 +239,7 @@ jobs: # Ubuntu 20.04. See also SPARK-33162. - name: Install Python packages (Python 3.6) run: | - python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner + python3.6 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner python3.6 -m pip list # Run the tests. - name: Run tests From 2cfd215dc4fb1ff6865644fec8284ba93dcddd5c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 21 Oct 2020 00:31:58 +0900 Subject: [PATCH 0287/1009] [SPARK-33191][YARN][TESTS] Fix PySpark test cases in YarnClusterSuite ### What changes were proposed in this pull request? This PR proposes to fix: ``` org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-client mode org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-cluster mode org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-cluster mode using spark.yarn.appMasterEnv to override local envvar ``` it currently fails as below: ``` 20/10/16 19:20:36 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0) (amp-jenkins-worker-03.amp executor 1): org.apache.spark.SparkException: Error from python worker: Traceback (most recent call last): File "/usr/lib64/python2.6/runpy.py", line 104, in _run_module_as_main loader, code, fname = _get_module_details(mod_name) File "/usr/lib64/python2.6/runpy.py", line 79, in _get_module_details loader = get_loader(mod_name) File "/usr/lib64/python2.6/pkgutil.py", line 456, in get_loader return find_loader(fullname) File "/usr/lib64/python2.6/pkgutil.py", line 466, in find_loader for importer in iter_importers(fullname): File "/usr/lib64/python2.6/pkgutil.py", line 422, in iter_importers __import__(pkg) File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/__init__.py", line 53, in from pyspark.rdd import RDD, RDDBarrier File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/rdd.py", line 34, in from pyspark.java_gateway import local_connect_and_auth File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/java_gateway.py", line 29, in from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 60 PY4J_TRUE = {"yes", "y", "t", "true"} ^ SyntaxError: invalid syntax ``` I think this was broken when Python 2 was dropped but was not caught because this specific test does not run when there's no change in YARN codes. See also https://github.com/apache/spark/pull/29843#issuecomment-712540024 The root cause seems like the paths are different, see https://github.com/apache/spark/pull/29843#pullrequestreview-502595199. I _think_ Jenkins uses a different Python executable via Anaconda and the executor side does not know where it is for some reasons. This PR proposes to fix it just by explicitly specifying the absolute path for Python executable so the tests should pass in any environment. ### Why are the changes needed? To make tests pass. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? This issue looks specific to Jenkins. It should run the tests on Jenkins. Closes #30099 from HyukjinKwon/SPARK-33191. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/TestUtils.scala | 22 ++++++++++++++++++- .../spark/deploy/yarn/YarnClusterSuite.scala | 16 +++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index bc3644df0ebb5..9632d6c691085 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -20,13 +20,14 @@ package org.apache.spark import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} import java.net.{HttpURLConnection, URI, URL} import java.nio.charset.StandardCharsets -import java.nio.file.{Files => JavaFiles} +import java.nio.file.{Files => JavaFiles, Paths} import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE} import java.security.SecureRandom import java.security.cert.X509Certificate import java.util.{Arrays, EnumSet, Locale, Properties} import java.util.concurrent.{TimeoutException, TimeUnit} import java.util.jar.{JarEntry, JarOutputStream, Manifest} +import java.util.regex.Pattern import javax.net.ssl._ import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider} @@ -37,6 +38,7 @@ import scala.sys.process.{Process, ProcessLogger} import scala.util.Try import com.google.common.io.{ByteStreams, Files} +import org.apache.commons.lang3.StringUtils import org.apache.log4j.PropertyConfigurator import org.json4s.JsonAST.JValue import org.json4s.jackson.JsonMethods.{compact, render} @@ -268,6 +270,24 @@ private[spark] object TestUtils { attempt.isSuccess && attempt.get == 0 } + /** + * Get the absolute path from the executable. This implementation was borrowed from + * `spark/dev/sparktestsupport/shellutils.py`. + */ + def getAbsolutePathFromExecutable(executable: String): Option[String] = { + val command = if (Utils.isWindows) s"$executable.exe" else executable + if (command.split(File.separator, 2).length == 1 && + JavaFiles.isRegularFile(Paths.get(command)) && + JavaFiles.isExecutable(Paths.get(command))) { + Some(Paths.get(command).toAbsolutePath.toString) + } else { + sys.env("PATH").split(Pattern.quote(File.pathSeparator)) + .map(path => Paths.get(s"${StringUtils.strip(path, "\"")}${File.separator}$command")) + .find(p => JavaFiles.isRegularFile(p) && JavaFiles.isExecutable(p)) + .map(_.toString) + } + } + /** * Returns the response code from an HTTP(S) URL. */ diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 14438bc141056..cf754cca315f0 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -52,6 +52,13 @@ import org.apache.spark.util.{Utils, YarnContainerInfoHelper} @ExtendedYarnTest class YarnClusterSuite extends BaseYarnClusterSuite { + private val pythonExecutablePath = { + // To make sure to use the same Python executable. + val maybePath = TestUtils.getAbsolutePathFromExecutable("python3") + assert(maybePath.isDefined) + maybePath.get + } + override def newYarnConfig(): YarnConfiguration = new YarnConfiguration() private val TEST_PYFILE = """ @@ -175,9 +182,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite { clientMode = false, extraConf = Map( "spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON" - -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python"), + -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", pythonExecutablePath), "spark.yarn.appMasterEnv.PYSPARK_PYTHON" - -> sys.env.getOrElse("PYSPARK_PYTHON", "python")), + -> sys.env.getOrElse("PYSPARK_PYTHON", pythonExecutablePath)), extraEnv = Map( "PYSPARK_DRIVER_PYTHON" -> "not python", "PYSPARK_PYTHON" -> "not python")) @@ -275,7 +282,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite { s"$sparkHome/python") val extraEnvVars = Map( "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator), - "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator)) ++ extraEnv + "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator), + "PYSPARK_DRIVER_PYTHON" -> pythonExecutablePath, + "PYSPARK_PYTHON" -> pythonExecutablePath + ) ++ extraEnv val moduleDir = { val subdir = new File(tempDir, "pyModules") From 46ad325e56abd95c0ffdbe64aad78582da8c725d Mon Sep 17 00:00:00 2001 From: Keiji Yoshida Date: Wed, 21 Oct 2020 00:36:45 +0900 Subject: [PATCH 0288/1009] [MINOR][DOCS] Fix the description about to_avro and from_avro functions ### What changes were proposed in this pull request? This pull request changes the description about `to_avro` and `from_avro` functions to include Python as a supported language as the functions have been supported in Python since Apache Spark 3.0.0 [[SPARK-26856](https://issues.apache.org/jira/browse/SPARK-26856)]. ### Why are the changes needed? Same as above. ### Does this PR introduce _any_ user-facing change? Yes. The description changed by this pull request is on https://spark.apache.org/docs/latest/sql-data-sources-avro.html#to_avro-and-from_avro. ### How was this patch tested? Tested manually by building and checking the document in the local environment. Closes #30105 from kjmrknsn/fix-docs-sql-data-sources-avro. Authored-by: Keiji Yoshida Signed-off-by: HyukjinKwon --- docs/sql-data-sources-avro.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index d926ae7703268..69b165ed28bae 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -88,7 +88,7 @@ Kafka key-value record will be augmented with some metadata, such as the ingesti * If the "value" field that contains your data is in Avro, you could use `from_avro()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a file. * `to_avro()` can be used to turn structs into Avro records. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka. -Both functions are currently only available in Scala and Java. +Both functions are currently only available in Scala, Java, and Python.
    From c824db2d8b154acf51637844f5f268e988bd0081 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 20 Oct 2020 14:55:08 -0700 Subject: [PATCH 0289/1009] [MINOR][CORE] Improve log message during storage decommission ### What changes were proposed in this pull request? This PR aims to improve the log message for better analysis. ### Why are the changes needed? Good logs are crucial always. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual review. Closes #30109 from dongjoon-hyun/k8s_log. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/storage/BlockManagerDecommissioner.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 3377b357a9231..66df72921acb2 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -83,9 +83,10 @@ private[storage] class BlockManagerDecommissioner( Thread.sleep(SLEEP_TIME_SECS * 1000L) case Some((shuffleBlockInfo, retryCount)) => if (retryCount < maxReplicationFailuresForDecommission) { - logDebug(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}") + logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " + + "($retryCount / $maxReplicationFailuresForDecommission)") val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo) - logDebug(s"Got migration sub-blocks ${blocks}") + logInfo(s"Got migration sub-blocks ${blocks}") // Migrate the components of the blocks. try { @@ -101,7 +102,7 @@ private[storage] class BlockManagerDecommissioner( null)// class tag, we don't need for shuffle logDebug(s"Migrated sub block ${blockId}") } - logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}") + logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}") } catch { case e: IOException => // If a block got deleted before netty opened the file handle, then trying to @@ -244,6 +245,8 @@ private[storage] class BlockManagerDecommissioner( val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava) migratingShuffles ++= newShufflesToMigrate + logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " + + "are added. In total, ${migratingShuffles.size} shuffles are remained.") // Update the threads doing migrations val livePeerSet = bm.getPeers(false).toSet From 385d5db9413a7f23c8a4c2d802541e88ce3a4633 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 20 Oct 2020 15:02:36 -0700 Subject: [PATCH 0290/1009] [SPARK-33198][CORE] getMigrationBlocks should not fail at missing files ### What changes were proposed in this pull request? This PR aims to fix `getMigrationBlocks` error handling and to add test coverage. 1. `getMigrationBlocks` should not fail at indexFile only case. 2. `assert` causes `java.lang.AssertionError` which is not an `Exception`. ### Why are the changes needed? To handle the exception correctly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI with the newly added test case. Closes #30110 from dongjoon-hyun/SPARK-33198. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../shuffle/IndexShuffleBlockResolver.scala | 19 ++++++++++++------- .../sort/IndexShuffleBlockResolverSuite.scala | 5 +++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index 9496918760298..525b8fd3f6923 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -241,14 +241,19 @@ private[spark] class IndexShuffleBlockResolver( val dataBlockData = new FileSegmentManagedBuffer( transportConf, dataFile, 0, dataFile.length()) - // Make sure the files exist - assert(indexFile.exists() && dataFile.exists()) - - List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData)) + // Make sure the index exist. + if (!indexFile.exists()) { + throw new FileNotFoundException("Index file is deleted already.") + } + if (dataFile.exists()) { + List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData)) + } else { + List((indexBlockId, indexBlockData)) + } } catch { - case e: Exception => // If we can't load the blocks ignore them. - logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration" + - "this is expected to occure if a block is removed after decommissioning has started.") + case _: Exception => // If we can't load the blocks ignore them. + logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration. " + + "This is expected to occur if a block is removed after decommissioning has started.") List.empty[(BlockId, ManagedBuffer)] } } diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala index 725a1d90557a2..91260d01eb8b6 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala @@ -156,4 +156,9 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa indexIn2.close() } } + + test("SPARK-33198 getMigrationBlocks should not fail at missing files") { + val resolver = new IndexShuffleBlockResolver(conf, blockManager) + assert(resolver.getMigrationBlocks(ShuffleBlockInfo(Int.MaxValue, Long.MaxValue)).isEmpty) + } } From 47a6568265525002021c1e5cfa4330f5b1a91469 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 21 Oct 2020 09:13:33 +0900 Subject: [PATCH 0291/1009] [SPARK-33189][PYTHON][TESTS] Add env var to tests for legacy nested timestamps in pyarrow ### What changes were proposed in this pull request? Add an environment variable `PYARROW_IGNORE_TIMEZONE` to pyspark tests in run-tests.py to use legacy nested timestamp behavior. This means that when converting arrow to pandas, nested timestamps with timezones will have the timezone localized during conversion. ### Why are the changes needed? The default behavior was changed in PyArrow 2.0.0 to propagate timezone information. Using the environment variable enables testing with newer versions of pyarrow until the issue can be fixed in SPARK-32285. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #30111 from BryanCutler/arrow-enable-legacy-nested-timestamps-SPARK-33189. Authored-by: Bryan Cutler Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 4 ++-- python/run-tests.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a1c99fd21a466..27607a799d038 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -136,7 +136,7 @@ jobs: - name: Install Python packages (Python 3.8) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner + python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner python3.8 -m pip list # SparkR - name: Install R 4.0 @@ -239,7 +239,7 @@ jobs: # Ubuntu 20.04. See also SPARK-33162. - name: Install Python packages (Python 3.6) run: | - python3.6 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner + python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner python3.6 -m pip list # Run the tests. - name: Run tests diff --git a/python/run-tests.py b/python/run-tests.py index ad2e90edad2bc..712f38fb81b83 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -72,6 +72,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): 'SPARK_PREPEND_CLASSES': '1', 'PYSPARK_PYTHON': which(pyspark_python), 'PYSPARK_DRIVER_PYTHON': which(pyspark_python), + # Preserve legacy nested timezone behavior for pyarrow>=2, remove after SPARK-32285 + 'PYARROW_IGNORE_TIMEZONE': '1', }) # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is From dcb08204339e2291727be8e1a206e272652f9ae4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 21 Oct 2020 15:51:16 +0900 Subject: [PATCH 0292/1009] [SPARK-32785][SQL][DOCS][FOLLOWUP] Update migaration guide for incomplete interval literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Address comments https://github.com/apache/spark/pull/29635#discussion_r507241899 to improve migration guide ### Why are the changes needed? improve migration guide ### Does this PR introduce _any_ user-facing change? NO,only doc update ### How was this patch tested? passing GitHub action Closes #30113 from yaooqinn/SPARK-32785-F. Authored-by: Kent Yao Signed-off-by: Takeshi Yamamuro --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index cc69e78108ffd..5612e4f1453f1 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -44,7 +44,7 @@ license: | - In Spark 3.1, `path` option cannot coexist when the following methods are called with path parameter(s): `DataFrameReader.load()`, `DataFrameWriter.save()`, `DataStreamReader.load()`, or `DataStreamWriter.start()`. In addition, `paths` option cannot coexist for `DataFrameReader.load()`. For example, `spark.read.format("csv").option("path", "/tmp").load("/tmp2")` or `spark.read.option("path", "/tmp").csv("/tmp2")` will throw `org.apache.spark.sql.AnalysisException`. In Spark version 3.0 and below, `path` option is overwritten if one path parameter is passed to above methods; `path` option is added to the overall paths if multiple path parameters are passed to `DataFrameReader.load()`. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.pathOptionBehavior.enabled` to `true`. - - In Spark 3.1, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0, they result `NULL`s. + - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s. - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details. From 618695b78fe93ae6506650ecfbebe807a43c5f0c Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Wed, 21 Oct 2020 08:49:25 -0500 Subject: [PATCH 0293/1009] [SPARK-33111][ML][FOLLOW-UP] aft transform optimization - predictQuantiles ### What changes were proposed in this pull request? 1, optimize `predictQuantiles` by pre-computing an auxiliary var. ### Why are the changes needed? In https://github.com/apache/spark/pull/30000, I optimized the `transform` method. I find that we can also optimize `predictQuantiles` by pre-computing an auxiliary var. It is about 56% faster than existing impl. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #30034 from zhengruifeng/aft_quantiles_opt. Authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../ml/regression/AFTSurvivalRegression.scala | 42 +++++++++++-------- .../AFTSurvivalRegressionSuite.scala | 2 +- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 595a2f0e742df..3870a71a91a20 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -383,22 +383,32 @@ class AFTSurvivalRegressionModel private[ml] ( /** @group setParam */ @Since("1.6.0") - def setQuantileProbabilities(value: Array[Double]): this.type = set(quantileProbabilities, value) + def setQuantileProbabilities(value: Array[Double]): this.type = { + set(quantileProbabilities, value) + _quantiles(0) = $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale)) + this + } /** @group setParam */ @Since("1.6.0") def setQuantilesCol(value: String): this.type = set(quantilesCol, value) + private lazy val _quantiles = { + Array($(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale))) + } + + private def lambda2Quantiles(lambda: Double): Vector = { + val quantiles = _quantiles(0).clone() + var i = 0 + while (i < quantiles.length) { quantiles(i) *= lambda; i += 1 } + Vectors.dense(quantiles) + } + @Since("2.0.0") def predictQuantiles(features: Vector): Vector = { // scale parameter for the Weibull distribution of lifetime - val lambda = math.exp(BLAS.dot(coefficients, features) + intercept) - // shape parameter for the Weibull distribution of lifetime - val k = 1 / scale - val quantiles = $(quantileProbabilities).map { - q => lambda * math.exp(math.log(-math.log1p(-q)) / k) - } - Vectors.dense(quantiles) + val lambda = predict(features) + lambda2Quantiles(lambda) } @Since("2.0.0") @@ -414,24 +424,20 @@ class AFTSurvivalRegressionModel private[ml] ( var predictionColumns = Seq.empty[Column] if ($(predictionCol).nonEmpty) { - val predictUDF = udf { features: Vector => predict(features) } + val predCol = udf(predict _).apply(col($(featuresCol))) predictionColNames :+= $(predictionCol) - predictionColumns :+= predictUDF(col($(featuresCol))) + predictionColumns :+= predCol .as($(predictionCol), outputSchema($(predictionCol)).metadata) } if (hasQuantilesCol) { - val baseQuantiles = $(quantileProbabilities) - .map(q => math.exp(math.log(-math.log1p(-q)) * scale)) - val lambdaCol = if ($(predictionCol).nonEmpty) { - predictionColumns.head + val quanCol = if ($(predictionCol).nonEmpty) { + udf(lambda2Quantiles _).apply(predictionColumns.head) } else { - udf { features: Vector => predict(features) }.apply(col($(featuresCol))) + udf(predictQuantiles _).apply(col($(featuresCol))) } - val predictQuantilesUDF = - udf { lambda: Double => Vectors.dense(baseQuantiles.map(q => q * lambda)) } predictionColNames :+= $(quantilesCol) - predictionColumns :+= predictQuantilesUDF(lambdaCol) + predictionColumns :+= quanCol .as($(quantilesCol), outputSchema($(quantilesCol)).metadata) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala index a66143ab12e49..63ccfa3834624 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala @@ -130,9 +130,9 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest { test("aft survival regression with univariate") { val quantileProbabilities = Array(0.1, 0.5, 0.9) val trainer = new AFTSurvivalRegression() - .setQuantileProbabilities(quantileProbabilities) .setQuantilesCol("quantiles") val model = trainer.fit(datasetUnivariate) + model.setQuantileProbabilities(quantileProbabilities) /* Using the following R code to load the data and train the model using survival package. From 1b7367ccd7cdcbfc9ff9a3893693a3261a5eb7c1 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 21 Oct 2020 13:04:39 -0700 Subject: [PATCH 0294/1009] [SPARK-33205][BUILD] Bump snappy-java version to 1.1.8 ### What changes were proposed in this pull request? This PR intends to upgrade snappy-java from 1.1.7.5 to 1.1.8. ### Why are the changes needed? For performance improvements; the released `snappy-java` bundles the latest `Snappy` v1.1.8 binaries with small performance improvements. - snappy-java release note: https://github.com/xerial/snappy-java/releases/tag/1.1.8 - snappy release note: https://github.com/google/snappy/releases/tag/1.1.8 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GA tests. Closes #30120 from maropu/Snappy1.1.8. Authored-by: Takeshi Yamamuro Signed-off-by: Liang-Chi Hsieh --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index c389c885cb0e5..e365559ed8cbf 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -222,7 +222,7 @@ shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.24//snakeyaml-1.24.jar -snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar +snappy-java/1.1.8//snappy-java-1.1.8.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index ed0db42828301..0c050d62db3da 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -237,7 +237,7 @@ shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.24//snakeyaml-1.24.jar -snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar +snappy-java/1.1.8//snappy-java-1.1.8.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar diff --git a/pom.xml b/pom.xml index 96406d9bcef13..2fd002e91751f 100644 --- a/pom.xml +++ b/pom.xml @@ -170,7 +170,7 @@ true 1.9.13 2.10.0 - 1.1.7.5 + 1.1.8 1.1.2 1.10 2.5 From 7aed81d4926c8f13ffb38f7ff90162b15c876016 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 21 Oct 2020 14:37:56 -0700 Subject: [PATCH 0295/1009] [SPARK-33202][CORE] Fix BlockManagerDecommissioner to return the correct migration status ### What changes were proposed in this pull request? This PR changes `<` into `>` in the following to fix data loss during storage migrations. ```scala // If we found any new shuffles to migrate or otherwise have not migrated everything. - newShufflesToMigrate.nonEmpty || migratingShuffles.size < numMigratedShuffles.get() + newShufflesToMigrate.nonEmpty || migratingShuffles.size > numMigratedShuffles.get() ``` ### Why are the changes needed? `refreshOffloadingShuffleBlocks` should return `true` when the migration is still on-going. Since `migratingShuffles` is defined like the following, `migratingShuffles.size > numMigratedShuffles.get()` means the migration is not finished. ```scala // Shuffles which are either in queue for migrations or migrated protected[storage] val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]() ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI with the updated test cases. Closes #30116 from dongjoon-hyun/SPARK-33202. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/storage/BlockManagerDecommissioner.scala | 7 ++++--- .../storage/BlockManagerDecommissionUnitSuite.scala | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 66df72921acb2..89d12406365dc 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -84,7 +84,7 @@ private[storage] class BlockManagerDecommissioner( case Some((shuffleBlockInfo, retryCount)) => if (retryCount < maxReplicationFailuresForDecommission) { logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " + - "($retryCount / $maxReplicationFailuresForDecommission)") + s"($retryCount / $maxReplicationFailuresForDecommission)") val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo) logInfo(s"Got migration sub-blocks ${blocks}") @@ -130,6 +130,7 @@ private[storage] class BlockManagerDecommissioner( case Some((shuffleMap, retryCount)) => logError(s"Error during migration, adding ${shuffleMap} back to migration queue", e) shufflesToMigrate.add((shuffleMap, retryCount + 1)) + running = false case None => logError(s"Error while waiting for block to migrate", e) } @@ -246,7 +247,7 @@ private[storage] class BlockManagerDecommissioner( shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava) migratingShuffles ++= newShufflesToMigrate logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " + - "are added. In total, ${migratingShuffles.size} shuffles are remained.") + s"are added. In total, ${migratingShuffles.size} shuffles are remained.") // Update the threads doing migrations val livePeerSet = bm.getPeers(false).toSet @@ -268,7 +269,7 @@ private[storage] class BlockManagerDecommissioner( stoppedShuffle = true } // If we found any new shuffles to migrate or otherwise have not migrated everything. - newShufflesToMigrate.nonEmpty || migratingShuffles.size < numMigratedShuffles.get() + newShufflesToMigrate.nonEmpty || migratingShuffles.size > numMigratedShuffles.get() } /** diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala index a87fc1835f6b5..b7ac378b4c6cd 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala @@ -63,20 +63,20 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { * a constant Long.MaxValue timestamp. */ private def validateDecommissionTimestamps(conf: SparkConf, bm: BlockManager, - fail: Boolean = false) = { + fail: Boolean = false, assertDone: Boolean = true) = { // Verify the decommissioning manager timestamps and status val bmDecomManager = new BlockManagerDecommissioner(conf, bm) - validateDecommissionTimestampsOnManager(bmDecomManager, fail) + validateDecommissionTimestampsOnManager(bmDecomManager, fail, assertDone) } private def validateDecommissionTimestampsOnManager(bmDecomManager: BlockManagerDecommissioner, - fail: Boolean = false, numShuffles: Option[Int] = None) = { + fail: Boolean = false, assertDone: Boolean = true, numShuffles: Option[Int] = None) = { var previousTime: Option[Long] = None try { bmDecomManager.start() eventually(timeout(100.second), interval(10.milliseconds)) { val (currentTime, done) = bmDecomManager.lastMigrationInfo() - assert(done) + assert(!assertDone || done) // Make sure the time stamp starts moving forward. if (!fail) { previousTime match { @@ -98,7 +98,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { // Wait 5 seconds and assert times keep moving forward. Thread.sleep(5000) val (currentTime, done) = bmDecomManager.lastMigrationInfo() - assert(done && currentTime > previousTime.get) + assert((!assertDone || done) && currentTime > previousTime.get) } } finally { bmDecomManager.stop() @@ -183,7 +183,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers { val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm) bmDecomManager.migratingShuffles += ShuffleBlockInfo(10, 10) - validateDecommissionTimestampsOnManager(bmDecomManager) + validateDecommissionTimestampsOnManager(bmDecomManager, fail = false, assertDone = false) } test("block decom manager handles IO failures") { From 66005a323625fc8c7346d28e9a8c52f91ae8d1a0 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 21 Oct 2020 14:46:47 -0700 Subject: [PATCH 0296/1009] [SPARK-31964][PYTHON][FOLLOW-UP] Use is_categorical_dtype instead of deprecated is_categorical ### What changes were proposed in this pull request? This PR is a small followup of https://github.com/apache/spark/pull/28793 and proposes to use `is_categorical_dtype` instead of deprecated `is_categorical`. `is_categorical_dtype` exists from minimum pandas version we support (https://github.com/pandas-dev/pandas/blob/v0.23.2/pandas/core/dtypes/api.py), and `is_categorical` was deprecated from pandas 1.1.0 (https://github.com/pandas-dev/pandas/commit/87a1cc21cab751c16fda4e6f0a95988a8d90462b). ### Why are the changes needed? To avoid using deprecated APIs, and remove warnings. ### Does this PR introduce _any_ user-facing change? Yes, it will remove warnings that says `is_categorical` is deprecated. ### How was this patch tested? By running any pandas UDF with pandas 1.1.0+: ```python import pandas as pd from pyspark.sql.functions import pandas_udf def func(x: pd.Series) -> pd.Series: return x spark.range(10).select(pandas_udf(func, "long")("id")).show() ``` Before: ``` /.../python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py:151: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead ... ``` After: ``` ... ``` Closes #30114 from HyukjinKwon/replace-deprecated-is_categorical. Authored-by: HyukjinKwon Signed-off-by: Bryan Cutler --- python/pyspark/sql/pandas/serializers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py index 63fb8562799e3..09c7cf1b312bc 100644 --- a/python/pyspark/sql/pandas/serializers.py +++ b/python/pyspark/sql/pandas/serializers.py @@ -136,7 +136,7 @@ def _create_batch(self, series): import pandas as pd import pyarrow as pa from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal - from pandas.api.types import is_categorical + from pandas.api.types import is_categorical_dtype # Make input conform to [(series1, type1), (series2, type2), ...] if not isinstance(series, (list, tuple)) or \ (len(series) == 2 and isinstance(series[1], pa.DataType)): @@ -148,7 +148,7 @@ def create_array(s, t): # Ensure timestamp series are in expected form for Spark internal representation if t is not None and pa.types.is_timestamp(t): s = _check_series_convert_timestamps_internal(s, self._timezone) - elif is_categorical(s.dtype): + elif is_categorical_dtype(s.dtype): # Note: This can be removed once minimum pyarrow version is >= 0.16.1 s = s.astype(s.dtypes.categories.dtype) try: From bbf2d6f6df0011c3035d829a56b035a2b094295c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 22 Oct 2020 10:03:41 +0900 Subject: [PATCH 0297/1009] [SPARK-33160][SQL][FOLLOWUP] Update benchmarks of INT96 type rebasing ### What changes were proposed in this pull request? 1. Turn off/on the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` which was added by https://github.com/apache/spark/pull/30056 in `DateTimeRebaseBenchmark`. The parquet readers should infer correct rebasing mode automatically from metadata. 2. Regenerate benchmark results of `DateTimeRebaseBenchmark` in the environment: | Item | Description | | ---- | ----| | Region | us-west-2 (Oregon) | | Instance | r3.xlarge (spot instance) | | AMI | ami-06f2f779464715dc5 (ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190722.1) | | Java | OpenJDK8/11 installed by`sudo add-apt-repository ppa:openjdk-r/ppa` & `sudo apt install openjdk-11-jdk`| ### Why are the changes needed? To have up-to-date info about INT96 performance which is the default type for Catalyst's timestamp type. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By updating benchmark results: ``` $ SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.DateTimeRebaseBenchmark" ``` Closes #30118 from MaxGekk/int96-rebase-benchmark. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../DateTimeRebaseBenchmark-jdk11-results.txt | 206 +++++++++--------- .../DateTimeRebaseBenchmark-results.txt | 206 +++++++++--------- .../benchmark/DateTimeRebaseBenchmark.scala | 3 +- 3 files changed, 208 insertions(+), 207 deletions(-) diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt index 05896a4d69b47..74b19f2eef6a8 100644 --- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt @@ -2,153 +2,153 @@ Rebasing dates/timestamps in Parquet datasource ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save DATE to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 20023 20023 0 5.0 200.2 1.0X -before 1582, noop 10729 10729 0 9.3 107.3 1.9X -after 1582, rebase EXCEPTION 31834 31834 0 3.1 318.3 0.6X -after 1582, rebase LEGACY 31997 31997 0 3.1 320.0 0.6X -after 1582, rebase CORRECTED 31712 31712 0 3.2 317.1 0.6X -before 1582, rebase LEGACY 23663 23663 0 4.2 236.6 0.8X -before 1582, rebase CORRECTED 22749 22749 0 4.4 227.5 0.9X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1582, noop 21041 21041 0 4.8 210.4 1.0X +before 1582, noop 11202 11202 0 8.9 112.0 1.9X +after 1582, rebase EXCEPTION 32810 32810 0 3.0 328.1 0.6X +after 1582, rebase LEGACY 32530 32530 0 3.1 325.3 0.6X +after 1582, rebase CORRECTED 32849 32849 0 3.0 328.5 0.6X +before 1582, rebase LEGACY 23537 23537 0 4.2 235.4 0.9X +before 1582, rebase CORRECTED 22870 22870 0 4.4 228.7 0.9X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load DATE from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase EXCEPTION 12984 13262 257 7.7 129.8 1.0X -after 1582, vec off, rebase LEGACY 13278 13330 50 7.5 132.8 1.0X -after 1582, vec off, rebase CORRECTED 13202 13255 50 7.6 132.0 1.0X -after 1582, vec on, rebase EXCEPTION 3823 3853 40 26.2 38.2 3.4X -after 1582, vec on, rebase LEGACY 3846 3876 27 26.0 38.5 3.4X -after 1582, vec on, rebase CORRECTED 3775 3838 62 26.5 37.7 3.4X -before 1582, vec off, rebase LEGACY 13671 13692 26 7.3 136.7 0.9X -before 1582, vec off, rebase CORRECTED 13387 13476 106 7.5 133.9 1.0X -before 1582, vec on, rebase LEGACY 4477 4484 7 22.3 44.8 2.9X -before 1582, vec on, rebase CORRECTED 3729 3773 50 26.8 37.3 3.5X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1582, vec off, rebase EXCEPTION 13114 13225 104 7.6 131.1 1.0X +after 1582, vec off, rebase LEGACY 13175 13189 15 7.6 131.8 1.0X +after 1582, vec off, rebase CORRECTED 13080 13115 34 7.6 130.8 1.0X +after 1582, vec on, rebase EXCEPTION 3698 3726 29 27.0 37.0 3.5X +after 1582, vec on, rebase LEGACY 3730 3745 17 26.8 37.3 3.5X +after 1582, vec on, rebase CORRECTED 3714 3758 75 26.9 37.1 3.5X +before 1582, vec off, rebase LEGACY 13519 13575 63 7.4 135.2 1.0X +before 1582, vec off, rebase CORRECTED 13210 13309 108 7.6 132.1 1.0X +before 1582, vec on, rebase LEGACY 4459 4488 44 22.4 44.6 2.9X +before 1582, vec on, rebase CORRECTED 3661 3718 88 27.3 36.6 3.6X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_INT96 to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 3020 3020 0 33.1 30.2 1.0X -before 1900, noop 3013 3013 0 33.2 30.1 1.0X -after 1900, rebase EXCEPTION 28796 28796 0 3.5 288.0 0.1X -after 1900, rebase LEGACY 28869 28869 0 3.5 288.7 0.1X -after 1900, rebase CORRECTED 28522 28522 0 3.5 285.2 0.1X -before 1900, rebase LEGACY 30594 30594 0 3.3 305.9 0.1X -before 1900, rebase CORRECTED 30743 30743 0 3.3 307.4 0.1X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1900, noop 2900 2900 0 34.5 29.0 1.0X +before 1900, noop 2848 2848 0 35.1 28.5 1.0X +after 1900, rebase EXCEPTION 27623 27623 0 3.6 276.2 0.1X +after 1900, rebase LEGACY 27305 27305 0 3.7 273.0 0.1X +after 1900, rebase CORRECTED 27715 27715 0 3.6 277.2 0.1X +before 1900, rebase LEGACY 30911 30911 0 3.2 309.1 0.1X +before 1900, rebase CORRECTED 27944 27944 0 3.6 279.4 0.1X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_INT96 from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 19325 19468 135 5.2 193.3 1.0X -after 1900, vec off, rebase LEGACY 19568 19602 30 5.1 195.7 1.0X -after 1900, vec off, rebase CORRECTED 19532 19538 6 5.1 195.3 1.0X -after 1900, vec on, rebase EXCEPTION 9884 9990 94 10.1 98.8 2.0X -after 1900, vec on, rebase LEGACY 9933 9985 49 10.1 99.3 1.9X -after 1900, vec on, rebase CORRECTED 9967 10043 76 10.0 99.7 1.9X -before 1900, vec off, rebase LEGACY 24162 24198 37 4.1 241.6 0.8X -before 1900, vec off, rebase CORRECTED 24034 24056 20 4.2 240.3 0.8X -before 1900, vec on, rebase LEGACY 12548 12625 72 8.0 125.5 1.5X -before 1900, vec on, rebase CORRECTED 12580 12660 115 7.9 125.8 1.5X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1900, vec off, rebase EXCEPTION 16853 16885 41 5.9 168.5 1.0X +after 1900, vec off, rebase LEGACY 16804 16816 21 6.0 168.0 1.0X +after 1900, vec off, rebase CORRECTED 16985 17020 58 5.9 169.9 1.0X +after 1900, vec on, rebase EXCEPTION 7044 7063 19 14.2 70.4 2.4X +after 1900, vec on, rebase LEGACY 7183 7255 94 13.9 71.8 2.3X +after 1900, vec on, rebase CORRECTED 7047 7137 86 14.2 70.5 2.4X +before 1900, vec off, rebase LEGACY 20371 20458 81 4.9 203.7 0.8X +before 1900, vec off, rebase CORRECTED 17484 17541 54 5.7 174.8 1.0X +before 1900, vec on, rebase LEGACY 10284 10327 45 9.7 102.8 1.6X +before 1900, vec on, rebase CORRECTED 7044 7073 37 14.2 70.4 2.4X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_MICROS to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 3159 3159 0 31.7 31.6 1.0X -before 1900, noop 3038 3038 0 32.9 30.4 1.0X -after 1900, rebase EXCEPTION 16885 16885 0 5.9 168.8 0.2X -after 1900, rebase LEGACY 17171 17171 0 5.8 171.7 0.2X -after 1900, rebase CORRECTED 17353 17353 0 5.8 173.5 0.2X -before 1900, rebase LEGACY 20579 20579 0 4.9 205.8 0.2X -before 1900, rebase CORRECTED 17544 17544 0 5.7 175.4 0.2X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1900, noop 2848 2848 0 35.1 28.5 1.0X +before 1900, noop 2855 2855 0 35.0 28.6 1.0X +after 1900, rebase EXCEPTION 15622 15622 0 6.4 156.2 0.2X +after 1900, rebase LEGACY 16148 16148 0 6.2 161.5 0.2X +after 1900, rebase CORRECTED 16946 16946 0 5.9 169.5 0.2X +before 1900, rebase LEGACY 19486 19486 0 5.1 194.9 0.1X +before 1900, rebase CORRECTED 17029 17029 0 5.9 170.3 0.2X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_MICROS from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 16304 16345 58 6.1 163.0 1.0X -after 1900, vec off, rebase LEGACY 16503 16585 75 6.1 165.0 1.0X -after 1900, vec off, rebase CORRECTED 16413 16463 44 6.1 164.1 1.0X -after 1900, vec on, rebase EXCEPTION 5017 5034 29 19.9 50.2 3.2X -after 1900, vec on, rebase LEGACY 5060 5094 30 19.8 50.6 3.2X -after 1900, vec on, rebase CORRECTED 4969 4971 1 20.1 49.7 3.3X -before 1900, vec off, rebase LEGACY 19767 20001 203 5.1 197.7 0.8X -before 1900, vec off, rebase CORRECTED 16421 16465 38 6.1 164.2 1.0X -before 1900, vec on, rebase LEGACY 8535 8608 64 11.7 85.4 1.9X -before 1900, vec on, rebase CORRECTED 5044 5077 32 19.8 50.4 3.2X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1900, vec off, rebase EXCEPTION 15785 15848 56 6.3 157.9 1.0X +after 1900, vec off, rebase LEGACY 15935 15954 17 6.3 159.3 1.0X +after 1900, vec off, rebase CORRECTED 15976 16046 62 6.3 159.8 1.0X +after 1900, vec on, rebase EXCEPTION 4925 4941 20 20.3 49.3 3.2X +after 1900, vec on, rebase LEGACY 5033 5041 11 19.9 50.3 3.1X +after 1900, vec on, rebase CORRECTED 4946 4972 29 20.2 49.5 3.2X +before 1900, vec off, rebase LEGACY 18619 18782 176 5.4 186.2 0.8X +before 1900, vec off, rebase CORRECTED 15956 16018 56 6.3 159.6 1.0X +before 1900, vec on, rebase LEGACY 8461 8472 14 11.8 84.6 1.9X +before 1900, vec on, rebase CORRECTED 4953 4962 12 20.2 49.5 3.2X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_MILLIS to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2995 2995 0 33.4 29.9 1.0X -before 1900, noop 2981 2981 0 33.5 29.8 1.0X -after 1900, rebase EXCEPTION 16196 16196 0 6.2 162.0 0.2X -after 1900, rebase LEGACY 16550 16550 0 6.0 165.5 0.2X -after 1900, rebase CORRECTED 16908 16908 0 5.9 169.1 0.2X -before 1900, rebase LEGACY 20087 20087 0 5.0 200.9 0.1X -before 1900, rebase CORRECTED 17171 17171 0 5.8 171.7 0.2X - -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +after 1900, noop 3019 3019 0 33.1 30.2 1.0X +before 1900, noop 2896 2896 0 34.5 29.0 1.0X +after 1900, rebase EXCEPTION 15525 15525 0 6.4 155.2 0.2X +after 1900, rebase LEGACY 15903 15903 0 6.3 159.0 0.2X +after 1900, rebase CORRECTED 16468 16468 0 6.1 164.7 0.2X +before 1900, rebase LEGACY 19620 19620 0 5.1 196.2 0.2X +before 1900, rebase CORRECTED 16470 16470 0 6.1 164.7 0.2X + +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_MILLIS from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 16688 16787 88 6.0 166.9 1.0X -after 1900, vec off, rebase LEGACY 17383 17462 73 5.8 173.8 1.0X -after 1900, vec off, rebase CORRECTED 17317 17329 11 5.8 173.2 1.0X -after 1900, vec on, rebase EXCEPTION 6342 6348 6 15.8 63.4 2.6X -after 1900, vec on, rebase LEGACY 6500 6521 18 15.4 65.0 2.6X -after 1900, vec on, rebase CORRECTED 6164 6172 11 16.2 61.6 2.7X -before 1900, vec off, rebase LEGACY 20575 20665 81 4.9 205.7 0.8X -before 1900, vec off, rebase CORRECTED 17239 17290 61 5.8 172.4 1.0X -before 1900, vec on, rebase LEGACY 9310 9373 60 10.7 93.1 1.8X -before 1900, vec on, rebase CORRECTED 6091 6105 16 16.4 60.9 2.7X +after 1900, vec off, rebase EXCEPTION 16329 16357 26 6.1 163.3 1.0X +after 1900, vec off, rebase LEGACY 16609 16659 51 6.0 166.1 1.0X +after 1900, vec off, rebase CORRECTED 16659 16765 91 6.0 166.6 1.0X +after 1900, vec on, rebase EXCEPTION 6132 6162 28 16.3 61.3 2.7X +after 1900, vec on, rebase LEGACY 6344 6397 61 15.8 63.4 2.6X +after 1900, vec on, rebase CORRECTED 6023 6024 2 16.6 60.2 2.7X +before 1900, vec off, rebase LEGACY 19611 19626 13 5.1 196.1 0.8X +before 1900, vec off, rebase CORRECTED 16765 16784 19 6.0 167.7 1.0X +before 1900, vec on, rebase LEGACY 9136 9158 19 10.9 91.4 1.8X +before 1900, vec on, rebase CORRECTED 6023 6042 30 16.6 60.2 2.7X ================================================================================================ Rebasing dates/timestamps in ORC datasource ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save DATE to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 19583 19583 0 5.1 195.8 1.0X -before 1582, noop 10711 10711 0 9.3 107.1 1.8X -after 1582 27864 27864 0 3.6 278.6 0.7X -before 1582 19648 19648 0 5.1 196.5 1.0X +after 1582, noop 20934 20934 0 4.8 209.3 1.0X +before 1582, noop 11098 11098 0 9.0 111.0 1.9X +after 1582 29249 29249 0 3.4 292.5 0.7X +before 1582 20059 20059 0 5.0 200.6 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load DATE from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off 10383 10560 192 9.6 103.8 1.0X -after 1582, vec on 3844 3864 33 26.0 38.4 2.7X -before 1582, vec off 10867 10916 48 9.2 108.7 1.0X -before 1582, vec on 4158 4170 12 24.0 41.6 2.5X +after 1582, vec off 10751 10802 56 9.3 107.5 1.0X +after 1582, vec on 3815 3870 62 26.2 38.1 2.8X +before 1582, vec off 11144 11174 37 9.0 111.4 1.0X +before 1582, vec on 4120 4126 8 24.3 41.2 2.6X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2989 2989 0 33.5 29.9 1.0X -before 1900, noop 3000 3000 0 33.3 30.0 1.0X -after 1900 19426 19426 0 5.1 194.3 0.2X -before 1900 23282 23282 0 4.3 232.8 0.1X +after 1900, noop 2858 2858 0 35.0 28.6 1.0X +before 1900, noop 2859 2859 0 35.0 28.6 1.0X +after 1900 17098 17098 0 5.8 171.0 0.2X +before 1900 20639 20639 0 4.8 206.4 0.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off 12089 12102 15 8.3 120.9 1.0X -after 1900, vec on 5210 5325 100 19.2 52.1 2.3X -before 1900, vec off 15320 15373 46 6.5 153.2 0.8X -before 1900, vec on 7937 7970 48 12.6 79.4 1.5X +after 1900, vec off 12292 12318 23 8.1 122.9 1.0X +after 1900, vec on 5198 5271 95 19.2 52.0 2.4X +before 1900, vec off 15108 15145 53 6.6 151.1 0.8X +before 1900, vec on 8085 8277 245 12.4 80.8 1.5X diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt index 3e94d6c6fcfa7..07b156a62e2ec 100644 --- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt +++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt @@ -2,153 +2,153 @@ Rebasing dates/timestamps in Parquet datasource ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save DATE to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 23300 23300 0 4.3 233.0 1.0X -before 1582, noop 10585 10585 0 9.4 105.9 2.2X -after 1582, rebase EXCEPTION 35215 35215 0 2.8 352.1 0.7X -after 1582, rebase LEGACY 34927 34927 0 2.9 349.3 0.7X -after 1582, rebase CORRECTED 35479 35479 0 2.8 354.8 0.7X -before 1582, rebase LEGACY 22767 22767 0 4.4 227.7 1.0X -before 1582, rebase CORRECTED 22527 22527 0 4.4 225.3 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1582, noop 22736 22736 0 4.4 227.4 1.0X +before 1582, noop 10512 10512 0 9.5 105.1 2.2X +after 1582, rebase EXCEPTION 35759 35759 0 2.8 357.6 0.6X +after 1582, rebase LEGACY 36229 36229 0 2.8 362.3 0.6X +after 1582, rebase CORRECTED 35489 35489 0 2.8 354.9 0.6X +before 1582, rebase LEGACY 23514 23514 0 4.3 235.1 1.0X +before 1582, rebase CORRECTED 23234 23234 0 4.3 232.3 1.0X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load DATE from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off, rebase EXCEPTION 13480 13577 94 7.4 134.8 1.0X -after 1582, vec off, rebase LEGACY 13466 13586 118 7.4 134.7 1.0X -after 1582, vec off, rebase CORRECTED 13526 13558 41 7.4 135.3 1.0X -after 1582, vec on, rebase EXCEPTION 3759 3778 28 26.6 37.6 3.6X -after 1582, vec on, rebase LEGACY 3957 4004 57 25.3 39.6 3.4X -after 1582, vec on, rebase CORRECTED 3739 3755 25 26.7 37.4 3.6X -before 1582, vec off, rebase LEGACY 13986 14038 67 7.1 139.9 1.0X -before 1582, vec off, rebase CORRECTED 13453 13491 49 7.4 134.5 1.0X -before 1582, vec on, rebase LEGACY 4716 4724 10 21.2 47.2 2.9X -before 1582, vec on, rebase CORRECTED 3701 3750 50 27.0 37.0 3.6X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1582, vec off, rebase EXCEPTION 13036 13121 85 7.7 130.4 1.0X +after 1582, vec off, rebase LEGACY 13567 13631 55 7.4 135.7 1.0X +after 1582, vec off, rebase CORRECTED 13476 13498 28 7.4 134.8 1.0X +after 1582, vec on, rebase EXCEPTION 3676 3679 3 27.2 36.8 3.5X +after 1582, vec on, rebase LEGACY 3842 3863 19 26.0 38.4 3.4X +after 1582, vec on, rebase CORRECTED 3706 3756 69 27.0 37.1 3.5X +before 1582, vec off, rebase LEGACY 13781 13832 68 7.3 137.8 0.9X +before 1582, vec off, rebase CORRECTED 13414 13445 28 7.5 134.1 1.0X +before 1582, vec on, rebase LEGACY 4774 4788 14 20.9 47.7 2.7X +before 1582, vec on, rebase CORRECTED 3650 3691 38 27.4 36.5 3.6X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_INT96 to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2790 2790 0 35.8 27.9 1.0X -before 1900, noop 2812 2812 0 35.6 28.1 1.0X -after 1900, rebase EXCEPTION 24789 24789 0 4.0 247.9 0.1X -after 1900, rebase LEGACY 24539 24539 0 4.1 245.4 0.1X -after 1900, rebase CORRECTED 24543 24543 0 4.1 245.4 0.1X -before 1900, rebase LEGACY 30496 30496 0 3.3 305.0 0.1X -before 1900, rebase CORRECTED 30428 30428 0 3.3 304.3 0.1X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1900, noop 2696 2696 0 37.1 27.0 1.0X +before 1900, noop 2687 2687 0 37.2 26.9 1.0X +after 1900, rebase EXCEPTION 29085 29085 0 3.4 290.9 0.1X +after 1900, rebase LEGACY 29789 29789 0 3.4 297.9 0.1X +after 1900, rebase CORRECTED 29563 29563 0 3.4 295.6 0.1X +before 1900, rebase LEGACY 34033 34033 0 2.9 340.3 0.1X +before 1900, rebase CORRECTED 29687 29687 0 3.4 296.9 0.1X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_INT96 from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 17106 17192 75 5.8 171.1 1.0X -after 1900, vec off, rebase LEGACY 17273 17337 55 5.8 172.7 1.0X -after 1900, vec off, rebase CORRECTED 17073 17215 128 5.9 170.7 1.0X -after 1900, vec on, rebase EXCEPTION 8903 8976 117 11.2 89.0 1.9X -after 1900, vec on, rebase LEGACY 8793 8876 84 11.4 87.9 1.9X -after 1900, vec on, rebase CORRECTED 8820 8878 53 11.3 88.2 1.9X -before 1900, vec off, rebase LEGACY 20997 21069 82 4.8 210.0 0.8X -before 1900, vec off, rebase CORRECTED 20874 20946 90 4.8 208.7 0.8X -before 1900, vec on, rebase LEGACY 12024 12090 58 8.3 120.2 1.4X -before 1900, vec on, rebase CORRECTED 12020 12069 64 8.3 120.2 1.4X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1900, vec off, rebase EXCEPTION 16623 16711 78 6.0 166.2 1.0X +after 1900, vec off, rebase LEGACY 16525 16641 103 6.1 165.3 1.0X +after 1900, vec off, rebase CORRECTED 16698 16847 133 6.0 167.0 1.0X +after 1900, vec on, rebase EXCEPTION 8614 8723 97 11.6 86.1 1.9X +after 1900, vec on, rebase LEGACY 9790 9812 20 10.2 97.9 1.7X +after 1900, vec on, rebase CORRECTED 8607 8671 73 11.6 86.1 1.9X +before 1900, vec off, rebase LEGACY 21389 21553 142 4.7 213.9 0.8X +before 1900, vec off, rebase CORRECTED 17539 17545 6 5.7 175.4 0.9X +before 1900, vec on, rebase LEGACY 13594 13627 40 7.4 135.9 1.2X +before 1900, vec on, rebase CORRECTED 8620 8666 73 11.6 86.2 1.9X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_MICROS to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2939 2939 0 34.0 29.4 1.0X -before 1900, noop 2917 2917 0 34.3 29.2 1.0X -after 1900, rebase EXCEPTION 15954 15954 0 6.3 159.5 0.2X -after 1900, rebase LEGACY 16402 16402 0 6.1 164.0 0.2X -after 1900, rebase CORRECTED 16541 16541 0 6.0 165.4 0.2X -before 1900, rebase LEGACY 20500 20500 0 4.9 205.0 0.1X -before 1900, rebase CORRECTED 16764 16764 0 6.0 167.6 0.2X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1900, noop 2755 2755 0 36.3 27.5 1.0X +before 1900, noop 2819 2819 0 35.5 28.2 1.0X +after 1900, rebase EXCEPTION 16742 16742 0 6.0 167.4 0.2X +after 1900, rebase LEGACY 16978 16978 0 5.9 169.8 0.2X +after 1900, rebase CORRECTED 17508 17508 0 5.7 175.1 0.2X +before 1900, rebase LEGACY 21961 21961 0 4.6 219.6 0.1X +before 1900, rebase CORRECTED 17770 17770 0 5.6 177.7 0.2X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_MICROS from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 15607 15655 81 6.4 156.1 1.0X -after 1900, vec off, rebase LEGACY 15616 15676 54 6.4 156.2 1.0X -after 1900, vec off, rebase CORRECTED 15634 15732 108 6.4 156.3 1.0X -after 1900, vec on, rebase EXCEPTION 5041 5057 16 19.8 50.4 3.1X -after 1900, vec on, rebase LEGACY 5516 5539 29 18.1 55.2 2.8X -after 1900, vec on, rebase CORRECTED 5087 5104 28 19.7 50.9 3.1X -before 1900, vec off, rebase LEGACY 19262 19338 79 5.2 192.6 0.8X -before 1900, vec off, rebase CORRECTED 15718 15755 53 6.4 157.2 1.0X -before 1900, vec on, rebase LEGACY 10147 10240 114 9.9 101.5 1.5X -before 1900, vec on, rebase CORRECTED 5062 5080 21 19.8 50.6 3.1X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1900, vec off, rebase EXCEPTION 15311 15405 82 6.5 153.1 1.0X +after 1900, vec off, rebase LEGACY 15501 15578 73 6.5 155.0 1.0X +after 1900, vec off, rebase CORRECTED 15331 15472 123 6.5 153.3 1.0X +after 1900, vec on, rebase EXCEPTION 4976 5008 38 20.1 49.8 3.1X +after 1900, vec on, rebase LEGACY 5366 5443 67 18.6 53.7 2.9X +after 1900, vec on, rebase CORRECTED 4977 4982 9 20.1 49.8 3.1X +before 1900, vec off, rebase LEGACY 19205 19281 65 5.2 192.1 0.8X +before 1900, vec off, rebase CORRECTED 15458 15490 28 6.5 154.6 1.0X +before 1900, vec on, rebase LEGACY 9878 9933 79 10.1 98.8 1.5X +before 1900, vec on, rebase CORRECTED 4886 4961 66 20.5 48.9 3.1X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP_MILLIS to parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2915 2915 0 34.3 29.2 1.0X -before 1900, noop 2894 2894 0 34.6 28.9 1.0X -after 1900, rebase EXCEPTION 15545 15545 0 6.4 155.4 0.2X -after 1900, rebase LEGACY 15840 15840 0 6.3 158.4 0.2X -after 1900, rebase CORRECTED 16324 16324 0 6.1 163.2 0.2X -before 1900, rebase LEGACY 20359 20359 0 4.9 203.6 0.1X -before 1900, rebase CORRECTED 16292 16292 0 6.1 162.9 0.2X - -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +after 1900, noop 2836 2836 0 35.3 28.4 1.0X +before 1900, noop 2813 2813 0 35.6 28.1 1.0X +after 1900, rebase EXCEPTION 16549 16549 0 6.0 165.5 0.2X +after 1900, rebase LEGACY 16296 16296 0 6.1 163.0 0.2X +after 1900, rebase CORRECTED 16913 16913 0 5.9 169.1 0.2X +before 1900, rebase LEGACY 21150 21150 0 4.7 211.5 0.1X +before 1900, rebase CORRECTED 17090 17090 0 5.9 170.9 0.2X + +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP_MILLIS from parquet: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off, rebase EXCEPTION 15857 16015 223 6.3 158.6 1.0X -after 1900, vec off, rebase LEGACY 16174 16231 63 6.2 161.7 1.0X -after 1900, vec off, rebase CORRECTED 16353 16400 67 6.1 163.5 1.0X -after 1900, vec on, rebase EXCEPTION 6449 6459 9 15.5 64.5 2.5X -after 1900, vec on, rebase LEGACY 7028 7035 6 14.2 70.3 2.3X -after 1900, vec on, rebase CORRECTED 6585 6623 37 15.2 65.8 2.4X -before 1900, vec off, rebase LEGACY 19929 20027 95 5.0 199.3 0.8X -before 1900, vec off, rebase CORRECTED 16401 16451 49 6.1 164.0 1.0X -before 1900, vec on, rebase LEGACY 10517 10563 40 9.5 105.2 1.5X -before 1900, vec on, rebase CORRECTED 6659 6675 26 15.0 66.6 2.4X +after 1900, vec off, rebase EXCEPTION 15706 15823 132 6.4 157.1 1.0X +after 1900, vec off, rebase LEGACY 16100 16194 88 6.2 161.0 1.0X +after 1900, vec off, rebase CORRECTED 16227 16282 81 6.2 162.3 1.0X +after 1900, vec on, rebase EXCEPTION 6383 6404 26 15.7 63.8 2.5X +after 1900, vec on, rebase LEGACY 6994 7006 15 14.3 69.9 2.2X +after 1900, vec on, rebase CORRECTED 6580 6597 15 15.2 65.8 2.4X +before 1900, vec off, rebase LEGACY 19601 19674 82 5.1 196.0 0.8X +before 1900, vec off, rebase CORRECTED 16188 16215 25 6.2 161.9 1.0X +before 1900, vec on, rebase LEGACY 10305 10360 51 9.7 103.1 1.5X +before 1900, vec on, rebase CORRECTED 6573 6600 28 15.2 65.7 2.4X ================================================================================================ Rebasing dates/timestamps in ORC datasource ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save DATE to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, noop 22782 22782 0 4.4 227.8 1.0X -before 1582, noop 10555 10555 0 9.5 105.6 2.2X -after 1582 31497 31497 0 3.2 315.0 0.7X -before 1582 19803 19803 0 5.0 198.0 1.2X +after 1582, noop 22766 22766 0 4.4 227.7 1.0X +before 1582, noop 10535 10535 0 9.5 105.3 2.2X +after 1582 31037 31037 0 3.2 310.4 0.7X +before 1582 19755 19755 0 5.1 197.6 1.2X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load DATE from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1582, vec off 10180 10214 44 9.8 101.8 1.0X -after 1582, vec on 3785 3804 24 26.4 37.8 2.7X -before 1582, vec off 10537 10582 39 9.5 105.4 1.0X -before 1582, vec on 4117 4146 25 24.3 41.2 2.5X +after 1582, vec off 11137 11165 37 9.0 111.4 1.0X +after 1582, vec on 3701 3734 51 27.0 37.0 3.0X +before 1582, vec off 11379 11409 50 8.8 113.8 1.0X +before 1582, vec on 4110 4160 57 24.3 41.1 2.7X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Save TIMESTAMP to ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, noop 2853 2853 0 35.1 28.5 1.0X -before 1900, noop 2999 2999 0 33.3 30.0 1.0X -after 1900 16757 16757 0 6.0 167.6 0.2X -before 1900 21542 21542 0 4.6 215.4 0.1X +after 1900, noop 2830 2830 0 35.3 28.3 1.0X +before 1900, noop 2867 2867 0 34.9 28.7 1.0X +after 1900 17867 17867 0 5.6 178.7 0.2X +before 1900 21555 21555 0 4.6 215.6 0.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Load TIMESTAMP from ORC: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -after 1900, vec off 12212 12254 39 8.2 122.1 1.0X -after 1900, vec on 5369 5390 35 18.6 53.7 2.3X -before 1900, vec off 15661 15705 73 6.4 156.6 0.8X -before 1900, vec on 8720 8744 29 11.5 87.2 1.4X +after 1900, vec off 12245 12269 24 8.2 122.5 1.0X +after 1900, vec on 5258 5303 63 19.0 52.6 2.3X +before 1900, vec off 15698 15777 119 6.4 157.0 0.8X +before 1900, vec on 8568 8674 138 11.7 85.7 1.4X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala index 7caaa5376db7f..bc94d1f235800 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala @@ -165,7 +165,8 @@ object DateTimeRebaseBenchmark extends SqlBasedBenchmark { benchmark.addCase(caseName(modernDates, dateTime, Some(mode)), 1) { _ => withSQLConf( SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> getOutputType(dateTime), - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString) { + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString, + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> mode.toString) { genDF(rowsNum, dateTime, modernDates) .write .mode("overwrite") From 4a33cd928df4739e69ae9530aae23964e470d2f8 Mon Sep 17 00:00:00 2001 From: Alessandro Patti Date: Wed, 21 Oct 2020 18:14:21 -0700 Subject: [PATCH 0298/1009] [SPARK-33203][PYTHON][TEST] Fix tests failing with rounding errors ### What changes were proposed in this pull request? Increase tolerance for two tests that fail in some environments and fail in others (flaky? Pass/fail is constant within the same environment) ### Why are the changes needed? The tests `pyspark.ml.recommendation` and `pyspark.ml.tests.test_algorithms` fail with ``` File "/home/jenkins/python/pyspark/ml/tests/test_algorithms.py", line 96, in test_raw_and_probability_prediction self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1)) AssertionError: False is not true ``` ``` File "/home/jenkins/python/pyspark/ml/recommendation.py", line 256, in _main_.ALS Failed example: predictions[0] Expected: Row(user=0, item=2, newPrediction=0.6929101347923279) Got: Row(user=0, item=2, newPrediction=0.6929104924201965) ... ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This path changes a test target. Just executed the tests to verify they pass. Closes #30104 from AlessandroPatti/apatti/rounding-errors. Authored-by: Alessandro Patti Signed-off-by: Dongjoon Hyun --- python/pyspark/ml/recommendation.py | 6 +++--- python/pyspark/ml/tests/test_algorithms.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 4f39c5abec785..4ef38534444cd 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -254,11 +254,11 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable): >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"]) >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0]) >>> predictions[0] - Row(user=0, item=2, newPrediction=0.6929101347923279) + Row(user=0, item=2, newPrediction=0.692910...) >>> predictions[1] - Row(user=1, item=0, newPrediction=3.47356915473938) + Row(user=1, item=0, newPrediction=3.473569...) >>> predictions[2] - Row(user=2, item=0, newPrediction=-0.8991986513137817) + Row(user=2, item=0, newPrediction=-0.899198...) >>> user_recs = model.recommendForAllUsers(3) >>> user_recs.where(user_recs.user == 0)\ .select("recommendations.item", "recommendations.rating").collect() diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 03653c25b4ad4..f8b61b7c57919 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -86,7 +86,7 @@ def test_raw_and_probability_prediction(self): expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045] self.assertTrue(result.prediction, expected_prediction) self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) - self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1)) + self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.1)) class OneVsRestTests(SparkSessionTestCase): From ba13b94f6b2b477a93c0849c1fc776ffd5f1a0e6 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 22 Oct 2020 03:04:29 +0000 Subject: [PATCH 0299/1009] [SPARK-33210][SQL] Set the rebasing mode for parquet INT96 type to `EXCEPTION` by default ### What changes were proposed in this pull request? 1. Set the default value for the SQL configs `spark.sql.legacy.parquet.int96RebaseModeInWrite` and `spark.sql.legacy.parquet.int96RebaseModeInRead` to `EXCEPTION`. 2. Update the SQL migration guide. ### Why are the changes needed? Current default value `LEGACY` may lead to shifting timestamps in read or in write. We should leave the decision about rebasing to users. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By existing test suites like `ParquetIOSuite`. Closes #30121 from MaxGekk/int96-exception-by-default. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 ++ .../scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- .../datasources/parquet/ParquetFilterSuite.scala | 3 ++- .../execution/datasources/parquet/ParquetIOSuite.scala | 2 +- .../org/apache/spark/sql/hive/StatisticsSuite.scala | 9 +++++---- .../apache/spark/sql/sources/HadoopFsRelationTest.scala | 1 + 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 5612e4f1453f1..124b04fb2bede 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -47,6 +47,8 @@ license: | - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s. - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details. + + - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3648615a1eaee..65d976958ffdd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2666,7 +2666,7 @@ object SQLConf { .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) - .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_PARQUET_REBASE_MODE_IN_READ = buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead") @@ -2696,7 +2696,7 @@ object SQLConf { .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) - .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_AVRO_REBASE_MODE_IN_WRITE = buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 763f9315bfc5b..24a1ba124e56b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -586,7 +586,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared Seq(true, false).foreach { java8Api => withSQLConf( SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString, - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED") { + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED", + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> "CORRECTED") { // spark.sql.parquet.outputTimestampType = TIMESTAMP_MILLIS val millisData = Seq( "1000-06-14 08:28:53.123", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 214f36a2df713..dac4e950a7823 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -1022,7 +1022,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } Seq( - "2_4_5" -> successInRead _, + "2_4_5" -> failInRead _, "2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) => withAllParquetReaders { Seq("plain", "dict").foreach { enc => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 52dd2b34a0e95..db0e93787338e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1513,26 +1513,27 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") + val expectedSize = 636 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == 601) + assert(tableStats.sizeInBytes == expectedSize) assert(tableStats.rowCount.isEmpty) sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS") tableStats = getTableStats(tblName) - assert(tableStats.sizeInBytes == 601) + assert(tableStats.sizeInBytes == expectedSize) assert(tableStats.rowCount.get == 1) // analyze a single partition sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN") var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == 601) + assert(partStats.sizeInBytes == expectedSize) assert(partStats.rowCount.isEmpty) sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS") partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13")) - assert(partStats.sizeInBytes == 601) + assert(partStats.sizeInBytes == expectedSize) assert(partStats.rowCount.get == 1) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala index cbea74103343e..b65a00457c72c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala @@ -155,6 +155,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withSQLConf( SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString, SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString, + SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString, SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) { val dataGenerator = RandomDataGenerator.forType( dataType = dataType, From cb3fa6c9368e64184a5f7b19688181d11de9511c Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 22 Oct 2020 03:21:34 +0000 Subject: [PATCH 0300/1009] [SPARK-33212][BUILD] Move to shaded clients for Hadoop 3.x profile ### What changes were proposed in this pull request? This switches Spark to use shaded Hadoop clients, namely hadoop-client-api and hadoop-client-runtime, for Hadoop 3.x. For Hadoop 2.7, we'll still use the same modules such as hadoop-client. In order to still keep default Hadoop profile to be hadoop-3.2, this defines the following Maven properties: ``` hadoop-client-api.artifact hadoop-client-runtime.artifact hadoop-client-minicluster.artifact ``` which default to: ``` hadoop-client-api hadoop-client-runtime hadoop-client-minicluster ``` but all switch to `hadoop-client` when the Hadoop profile is hadoop-2.7. A side affect from this is we'll import the same dependency multiple times. For this I have to disable Maven enforcer `banDuplicatePomDependencyVersions`. Besides above, there are the following changes: - explicitly add a few dependencies which are imported via transitive dependencies from Hadoop jars, but are removed from the shaded client jars. - removed the use of `ProxyUriUtils.getPath` from `ApplicationMaster` which is a server-side/private API. - modified `IsolatedClientLoader` to exclude `hadoop-auth` jars when Hadoop version is 3.x. This change should only matter when we're not sharing Hadoop classes with Spark (which is _mostly_ used in tests). ### Why are the changes needed? This serves two purposes: - to unblock Spark from upgrading to Hadoop 3.2.2/3.3.0+. Latest Hadoop versions have upgraded to use Guava 27+ and in order to adopt the latest Hadoop versions in Spark, we'll need to resolve the Guava conflicts. This takes the approach by switching to shaded client jars provided by Hadoop. - avoid pulling 3rd party dependencies from Hadoop and avoid potential future conflicts. ### Does this PR introduce _any_ user-facing change? When people use Spark with `hadoop-provided` option, they should make sure class path contains `hadoop-client-api` and `hadoop-client-runtime` jars. In addition, they may need to make sure these jars appear before other Hadoop jars in the order. Otherwise, classes may be loaded from the other non-shaded Hadoop jars and cause potential conflicts. ### How was this patch tested? Relying on existing tests. Closes #29843 from sunchao/SPARK-29250. Authored-by: Chao Sun Signed-off-by: DB Tsai --- common/network-yarn/pom.xml | 8 ++- core/pom.xml | 16 +++++- .../org/apache/spark/deploy/SparkSubmit.scala | 8 ++- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 3 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 52 +---------------- external/kafka-0-10-assembly/pom.xml | 8 ++- external/kafka-0-10-sql/pom.xml | 4 ++ external/kafka-0-10-token-provider/pom.xml | 5 ++ external/kinesis-asl-assembly/pom.xml | 8 ++- hadoop-cloud/pom.xml | 7 ++- launcher/pom.xml | 9 ++- pom.xml | 57 +++++++++++++++---- resource-managers/yarn/pom.xml | 53 +++++++++++------ .../spark/deploy/yarn/ApplicationMaster.scala | 6 +- .../deploy/yarn/BaseYarnClusterSuite.scala | 10 ++++ sql/catalyst/pom.xml | 4 ++ sql/hive/pom.xml | 5 ++ .../hive/client/IsolatedClientLoader.scala | 19 ++++++- 18 files changed, 186 insertions(+), 96 deletions(-) diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 0225db81925c5..9938e5d769e12 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -65,7 +65,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.slf4j diff --git a/core/pom.xml b/core/pom.xml index 14b217d7fb22e..7a56c4ca3c638 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -66,7 +66,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.apache.spark @@ -177,6 +183,14 @@ org.apache.commons commons-text + + commons-io + commons-io + + + commons-collections + commons-collections + com.google.code.findbugs jsr305 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8363d570d7320..93370f5dae72e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1182,10 +1182,12 @@ private[spark] object SparkSubmitUtils { def resolveDependencyPaths( artifacts: Array[AnyRef], cacheDirectory: File): String = { - artifacts.map { artifactInfo => - val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId + artifacts.map { ai => + val artifactInfo = ai.asInstanceOf[Artifact] + val artifact = artifactInfo.getModuleRevisionId + val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else "" cacheDirectory.getAbsolutePath + File.separator + - s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar" + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar" }.mkString(",") } diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index e365559ed8cbf..b0b215a316df2 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -127,7 +127,7 @@ javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -227,7 +227,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 0c050d62db3da..b64c7989a4e02 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -3,14 +3,12 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar -accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -aopalliance/1.0//aopalliance-1.0.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar arrow-format/1.0.1//arrow-format-1.0.1.jar arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar @@ -27,15 +25,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar -commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.8.1//commons-compress-1.8.1.jar -commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-daemon/1.0.13//commons-daemon-1.0.13.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar @@ -55,30 +50,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.12.1.1//derby-10.12.1.1.jar -dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -ehcache/3.3.1//ehcache-3.3.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar -geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -guice-servlet/4.0//guice-servlet-4.0.jar -guice/4.0//guice-4.0.jar -hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar -hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar -hadoop-client/3.2.0//hadoop-client-3.2.0.jar -hadoop-common/3.2.0//hadoop-common-3.2.0.jar -hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar -hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar -hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar -hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar -hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar -hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar -hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar -hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar -hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar -hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar +hadoop-client-api/3.2.0//hadoop-client-api-3.2.0.jar +hadoop-client-runtime/3.2.0//hadoop-client-runtime-3.2.0.jar hive-beeline/2.3.7//hive-beeline-2.3.7.jar hive-cli/2.3.7//hive-cli-2.3.7.jar hive-common/2.3.7//hive-common-2.3.7.jar @@ -108,8 +86,6 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar -jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar -jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar @@ -122,13 +98,11 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.30//jersey-client-2.30.jar @@ -142,30 +116,14 @@ jline/2.14.6//jline-2.14.6.jar joda-time/2.10.5//joda-time-2.10.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json-smart/2.3//json-smart-2.3.jar json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar -jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar -kerb-admin/1.0.1//kerb-admin-1.0.1.jar -kerb-client/1.0.1//kerb-client-1.0.1.jar -kerb-common/1.0.1//kerb-common-1.0.1.jar -kerb-core/1.0.1//kerb-core-1.0.1.jar -kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar -kerb-identity/1.0.1//kerb-identity-1.0.1.jar -kerb-server/1.0.1//kerb-server-1.0.1.jar -kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar -kerb-util/1.0.1//kerb-util-1.0.1.jar -kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar -kerby-config/1.0.1//kerby-config-1.0.1.jar -kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar -kerby-util/1.0.1//kerby-util-1.0.1.jar -kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar @@ -203,9 +161,7 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar -nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar -okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar @@ -225,7 +181,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar -re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar scala-library/2.12.10//scala-library-2.12.10.jar @@ -243,15 +198,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar stax-api/1.0.1//stax-api-1.0.1.jar -stax2-api/3.1.4//stax2-api-3.1.4.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar -token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar velocity/1.5//velocity-1.5.jar -woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index d9d9fb7f55c77..b1e306c499385 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -71,9 +71,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-mapred diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95a99ac88412e..06a6bef005e69 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -79,6 +79,10 @@ kafka-clients ${kafka.version} + + com.google.code.findbugs + jsr305 + org.apache.commons commons-pool2 diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 941946f30e96f..1b0d6d322917f 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -58,6 +58,11 @@ mockito-core test + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.spark spark-tags_${scala.binary.version} diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 76ee5bb7b2f85..5a49358a84241 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -91,9 +91,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-ipc diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 8689e0b8a9ea8..a5642a5a68fe4 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -58,10 +58,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + test + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} test diff --git a/pom.xml b/pom.xml index 2fd002e91751f..8b2130f2d9f56 100644 --- a/pom.xml +++ b/pom.xml @@ -243,6 +243,15 @@ compile test + + hadoop-client-api + hadoop-client-runtime + hadoop-client-minicluster + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + runtime + + + org.apache.hadoop + hadoop-client-minicluster + ${yarn.version} + test + + org.apache.hadoop hadoop-client @@ -1632,6 +1666,14 @@ org.apache.ant ant + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-auth + org.apache.zookeeper zookeeper @@ -2396,17 +2438,6 @@ - - enforce-no-duplicate-dependencies - - enforce - - - - - - - @@ -2866,6 +2897,7 @@ maven-shade-plugin false + false org.spark-project.spark:unused @@ -3127,6 +3159,9 @@ 2.7.4 2.7.1 2.4 + hadoop-client + hadoop-client + hadoop-client diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index bc80769be2390..da715c6bdc59f 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -40,6 +40,33 @@ true + + hadoop-2.7 + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-server-tests + tests + test + + + @@ -69,23 +96,20 @@ org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common - - - org.apache.hadoop - hadoop-yarn-server-web-proxy + ${hadoop-client-api.artifact} + ${hadoop.version} org.apache.hadoop - hadoop-yarn-client + ${hadoop-client-runtime.artifact} + ${hadoop.version} + ${hadoop.deps.scope} org.apache.hadoop - hadoop-client + ${hadoop-client-minicluster.artifact} + ${hadoop.version} + test @@ -142,13 +166,6 @@ test - - org.apache.hadoop - hadoop-yarn-server-tests - tests - test - - org.mockito mockito-core diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 5f632fbb259ff..9b99e8ff9265c 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{URI, URL} +import java.net.{URI, URL, URLEncoder} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException -import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark._ @@ -308,7 +307,8 @@ private[spark] class ApplicationMaster( // The client-mode AM doesn't listen for incoming connections, so report an invalid port. registerAM(Utils.localHostName, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId) - addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId)) + val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8") + addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId") createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf) reporterThread.join() } catch { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index 20f5339c46fef..a813b9913f23b 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "100.0") + // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround + yarnConf.set("yarn.scheduler.capacity.root.queues", "default") + yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100) + yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1) + yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100) + yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*") + yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1) + yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1) yarnCluster.init(yarnConf) yarnCluster.start() diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6b79eb722fcdd..af976fa1fa983 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -104,6 +104,10 @@ org.antlr antlr4-runtime + + javax.xml.bind + jaxb-api + commons-codec commons-codec diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 0453094cf8b7b..4fca6264c0594 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -162,6 +162,11 @@ org.datanucleus datanucleus-core + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.thrift libthrift diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 42a0ec0253b85..f9946fe8e0616 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -118,11 +118,24 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { + val hadoopJarNames = if (hadoopVersion.startsWith("3")) { + Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", + s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") + } else { + Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion") + } val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1", - s"org.apache.hadoop:hadoop-client:$hadoopVersion") + Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames + + val extraExclusions = if (hadoopVersion.startsWith("3")) { + // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so + // exclude here in favor of the ones in Hadoop 3.2+ + Seq("org.apache.hadoop:hadoop-auth") + } else { + Seq.empty + } val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( @@ -130,7 +143,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.buildIvySettings( Some(remoteRepos), ivyPath), - exclusions = version.exclusions) + exclusions = version.exclusions ++ extraExclusions) } val allFiles = classpath.split(",").map(new File(_)).toSet From eb33bcb4b2db2a13b3da783e58feb8852e04637b Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 22 Oct 2020 07:59:49 +0000 Subject: [PATCH 0301/1009] [SPARK-30796][SQL] Add parameter position for REGEXP_REPLACE ### What changes were proposed in this pull request? `REGEXP_REPLACE` could replace all substrings of string that match regexp with replacement string. But `REGEXP_REPLACE` lost some flexibility. such as: converts camel case strings to a string containing lower case words separated by an underscore: AddressLine1 -> address_line_1 If we support the parameter position, we can do like this(e.g. Oracle): ``` WITH strings as ( SELECT 'AddressLine1' s FROM dual union all SELECT 'ZipCode' s FROM dual union all SELECT 'Country' s FROM dual ) SELECT s "STRING", lower(regexp_replace(s, '([A-Z0-9])', '_\1', 2)) "MODIFIED_STRING" FROM strings; ``` The output: ``` STRING MODIFIED_STRING -------------------- -------------------- AddressLine1 address_line_1 ZipCode zip_code Country country ``` There are some mainstream database support the syntax. **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/REGEXP_REPLACE.html#GUID-EA80A33C-441A-4692-A959-273B5A224490 **Vertica** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/RegularExpressions/REGEXP_REPLACE.htm?zoom_highlight=regexp_replace **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/REGEXP_REPLACE.html ### Why are the changes needed? The parameter position for `REGEXP_REPLACE` is very useful. ### Does this PR introduce _any_ user-facing change? 'Yes'. ### How was this patch tested? Jenkins test. Closes #29891 from beliefer/add-position-for-regex_replace. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../expressions/regexpExpressions.scala | 101 ++++++++++++++---- .../expressions/RegexpExpressionsSuite.scala | 14 +++ .../sql-functions/sql-expression-schema.md | 2 +- .../sql-tests/inputs/regexp-functions.sql | 12 +++ .../results/regexp-functions.sql.out | 84 ++++++++++++++- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 8eb7f463e049c..c9dd7c7acddde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -24,6 +24,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.commons.text.StringEscapeUtils +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{GenericArrayData, StringUtils} @@ -318,7 +320,24 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression) */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(str, regexp, rep) - Replaces all substrings of `str` that match `regexp` with `rep`.", + usage = "_FUNC_(str, regexp, rep[, position]) - Replaces all substrings of `str` that match `regexp` with `rep`.", + arguments = """ + Arguments: + * str - a string expression to search for a regular expression pattern match. + * regexp - a string representing a regular expression. The regex string should be a + Java regular expression. + + Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL + parser. For example, to match "\abc", a regular expression for `regexp` can be + "^\\abc$". + + There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to + fallback to the Spark 1.6 behavior regarding string literal parsing. For example, + if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". + * rep - a string expression to replace matched substrings. + * position - a positive integer literal that indicates the position within `str` to begin searching. + The default is 1. If position is greater than the number of characters in `str`, the result is `str`. + """, examples = """ Examples: > SELECT _FUNC_('100-200', '(\\d+)', 'num'); @@ -326,8 +345,24 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression) """, since = "1.5.0") // scalastyle:on line.size.limit -case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression) - extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { +case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression) + extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant { + + def this(subject: Expression, regexp: Expression, rep: Expression) = + this(subject, regexp, rep, Literal(1)) + + override def checkInputDataTypes(): TypeCheckResult = { + if (!pos.foldable) { + return TypeCheckFailure(s"Position expression must be foldable, but got $pos") + } + + val posEval = pos.eval() + if (posEval == null || posEval.asInstanceOf[Int] > 0) { + TypeCheckSuccess + } else { + TypeCheckFailure(s"Position expression must be positive, but got: $posEval") + } + } // last regex in string, we will update the pattern iff regexp value changed. @transient private var lastRegex: UTF8String = _ @@ -339,7 +374,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio // result buffer write by Matcher @transient private lazy val result: StringBuffer = new StringBuffer - override def nullSafeEval(s: Any, p: Any, r: Any): Any = { + override def nullSafeEval(s: Any, p: Any, r: Any, i: Any): Any = { if (!p.equals(lastRegex)) { // regex value changed lastRegex = p.asInstanceOf[UTF8String].clone() @@ -350,20 +385,26 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio lastReplacementInUTF8 = r.asInstanceOf[UTF8String].clone() lastReplacement = lastReplacementInUTF8.toString } - val m = pattern.matcher(s.toString()) - result.delete(0, result.length()) - - while (m.find) { - m.appendReplacement(result, lastReplacement) + val source = s.toString() + val position = i.asInstanceOf[Int] - 1 + if (position < source.length) { + val m = pattern.matcher(source) + m.region(position, source.length) + result.delete(0, result.length()) + while (m.find) { + m.appendReplacement(result, lastReplacement) + } + m.appendTail(result) + UTF8String.fromString(result.toString) + } else { + s } - m.appendTail(result) - - UTF8String.fromString(result.toString) } override def dataType: DataType = StringType - override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) - override def children: Seq[Expression] = subject :: regexp :: rep :: Nil + override def inputTypes: Seq[AbstractDataType] = + Seq(StringType, StringType, StringType, IntegerType) + override def children: Seq[Expression] = subject :: regexp :: rep :: pos :: Nil override def prettyName: String = "regexp_replace" override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -373,6 +414,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName val matcher = ctx.freshName("matcher") + val source = ctx.freshName("source") + val position = ctx.freshName("position") val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex") val termPattern = ctx.addMutableState(classNamePattern, "pattern") @@ -385,7 +428,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio "" } - nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => { + nullSafeCodeGen(ctx, ev, (subject, regexp, rep, pos) => { s""" if (!$regexp.equals($termLastRegex)) { // regex value changed @@ -397,21 +440,33 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio $termLastReplacementInUTF8 = $rep.clone(); $termLastReplacement = $termLastReplacementInUTF8.toString(); } - $classNameStringBuffer $termResult = new $classNameStringBuffer(); - java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString()); - - while ($matcher.find()) { - $matcher.appendReplacement($termResult, $termLastReplacement); + String $source = $subject.toString(); + int $position = $pos - 1; + if ($position < $source.length()) { + $classNameStringBuffer $termResult = new $classNameStringBuffer(); + java.util.regex.Matcher $matcher = $termPattern.matcher($source); + $matcher.region($position, $source.length()); + + while ($matcher.find()) { + $matcher.appendReplacement($termResult, $termLastReplacement); + } + $matcher.appendTail($termResult); + ${ev.value} = UTF8String.fromString($termResult.toString()); + $termResult = null; + } else { + ${ev.value} = $subject; } - $matcher.appendTail($termResult); - ${ev.value} = UTF8String.fromString($termResult.toString()); - $termResult = null; $setEvNotNull """ }) } } +object RegExpReplace { + def apply(subject: Expression, regexp: Expression, rep: Expression): RegExpReplace = + new RegExpReplace(subject, regexp, rep) +} + object RegExpExtractBase { def checkGroupIndex(groupCount: Int, groupIndex: Int): Unit = { if (groupIndex < 0) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 205dc10efc8a8..77a32a735f76d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -253,6 +253,20 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(expr, null, row4) checkEvaluation(expr, null, row5) checkEvaluation(expr, null, row6) + // test position + val exprWithPos = RegExpReplace(s, p, r, 4) + checkEvaluation(exprWithPos, "100-num", row1) + checkEvaluation(exprWithPos, "100-###", row2) + checkEvaluation(exprWithPos, "100###200", row3) + checkEvaluation(exprWithPos, null, row4) + checkEvaluation(exprWithPos, null, row5) + checkEvaluation(exprWithPos, null, row6) + val exprWithLargePos = RegExpReplace(s, p, r, 7) + checkEvaluation(exprWithLargePos, "100-20num", row1) + checkEvaluation(exprWithLargePos, "100-20###", row2) + val exprWithExceedLength = RegExpReplace(s, p, r, 8) + checkEvaluation(exprWithExceedLength, "100-200", row1) + checkEvaluation(exprWithExceedLength, "100-200", row2) val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num")) checkEvaluation(nonNullExpr, "num-num", row1) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 1675fb1cc7c62..da83df4994d8d 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -217,7 +217,7 @@ | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpExtractAll | regexp_extract_all | SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1) | struct> | -| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | +| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | | org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | | org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | | org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql index 7128dee0a00d7..3f3eaaae9ee4e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql @@ -31,3 +31,15 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3); SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1); SELECT regexp_extract_all('1a 2b 14m', '(\\d+)?([a-z]+)', 1); SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1); + +-- regexp_replace +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something'); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 1); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 2); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30); +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out index 2eef926f63e37..8d471a5bb1c87 100644 --- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 30 +-- Number of queries: 37 -- !query @@ -252,3 +252,85 @@ SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1) struct> -- !query output ["","2","14"] + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something') +-- !query schema +struct +-- !query output +something, something, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2)' due to data type mismatch: Position expression must be positive, but got: -2; line 1 pos 7 + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0)' due to data type mismatch: Position expression must be positive, but got: 0; line 1 pos 7 + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 1) +-- !query schema +struct +-- !query output +something, something, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 2) +-- !query schema +struct +-- !query output +hsomething, something, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8) +-- !query schema +struct +-- !query output +healthy, something, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26) +-- !query schema +struct +-- !query output +healthy, wealthy, and wissomething + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27) +-- !query schema +struct +-- !query output +healthy, wealthy, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30) +-- !query schema +struct +-- !query output +healthy, wealthy, and wise + + +-- !query +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null) +-- !query schema +struct +-- !query output +NULL \ No newline at end of file From a908b67502164d5b1409aca912dac7042e825586 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 22 Oct 2020 01:10:24 -0700 Subject: [PATCH 0302/1009] [SPARK-33218][CORE] Update misleading log messages for removed shuffle blocks ### What changes were proposed in this pull request? This updates the misleading log messages for removed shuffle block during migration. ### Why are the changes needed? 1. For the deleted shuffle blocks, `IndexShuffleBlockResolver` shows users WARN message saying `skipping migration`. However, `BlockManagerDecommissioner` shows users INFO message including `Migrated ShuffleBlockInfo(...)` inconsistently. Technically, we didn't migrated. We should not show `Migrated` message in this case. ``` INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (2 / 3) WARN IndexShuffleBlockResolver: Failed to resolve shuffle block ShuffleBlockInfo(109,18924), skipping migration. This is expected to occur if a block is removed after decommissioning has started. INFO BlockManagerDecommissioner: Got migration sub-blocks List() ... INFO BlockManagerDecommissioner: Migrated ShuffleBlockInfo(109,18924) to BlockManagerId(...) ``` 2. In addition, if the shuffle file is deleted while the information is in the queue, the above messages are repeated multiple times, `spark.storage.decommission.maxReplicationFailuresPerBlock`. We had better use one line instead of the group of messages for that case. ``` INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (0 / 3) ... INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (1 / 3) ... INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (2 / 3) ``` 3. Skipping or not is a role of `BlockManagerDecommissioner` class. `IndexShuffleBlockResolver.getMigrationBlocks` is used twice differently like the following. We had better inform users at `BlockManagerDecommissioner` once. - At the beginning, to get the sub-blocks. - In case of `IOException`, to determine whether ignoring it or re-throwing. And, `BlockManagerDecommissioner` shows WARN message (`Skipping block ...`) again. ### Does this PR introduce _any_ user-facing change? No. This is an update for log message info to be consistent. ### How was this patch tested? Manually. Closes #30129 from dongjoon-hyun/SPARK-33218. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../shuffle/IndexShuffleBlockResolver.scala | 2 +- .../storage/BlockManagerDecommissioner.scala | 64 ++++++++++--------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index 525b8fd3f6923..e5df27c0d3c7a 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -252,7 +252,7 @@ private[spark] class IndexShuffleBlockResolver( } } catch { case _: Exception => // If we can't load the blocks ignore them. - logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration. " + + logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}. " + "This is expected to occur if a block is removed after decommissioning has started.") List.empty[(BlockId, ManagedBuffer)] } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 89d12406365dc..d1e89418a4897 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -83,38 +83,42 @@ private[storage] class BlockManagerDecommissioner( Thread.sleep(SLEEP_TIME_SECS * 1000L) case Some((shuffleBlockInfo, retryCount)) => if (retryCount < maxReplicationFailuresForDecommission) { - logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " + - s"($retryCount / $maxReplicationFailuresForDecommission)") val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo) - logInfo(s"Got migration sub-blocks ${blocks}") - - // Migrate the components of the blocks. - try { - blocks.foreach { case (blockId, buffer) => - logDebug(s"Migrating sub-block ${blockId}") - bm.blockTransferService.uploadBlockSync( - peer.host, - peer.port, - peer.executorId, - blockId, - buffer, - StorageLevel.DISK_ONLY, - null)// class tag, we don't need for shuffle - logDebug(s"Migrated sub block ${blockId}") - } - logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}") - } catch { - case e: IOException => - // If a block got deleted before netty opened the file handle, then trying to - // load the blocks now will fail. This is most likely to occur if we start - // migrating blocks and then the shuffle TTL cleaner kicks in. However this - // could also happen with manually managed shuffles or a GC event on the driver - // a no longer referenced RDD with shuffle files. - if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) { - logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.") - } else { - throw e + if (blocks.isEmpty) { + logInfo(s"Ignore empty shuffle block $shuffleBlockInfo") + } else { + logInfo(s"Got migration sub-blocks ${blocks}") + logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " + + s"($retryCount / $maxReplicationFailuresForDecommission)") + + // Migrate the components of the blocks. + try { + blocks.foreach { case (blockId, buffer) => + logDebug(s"Migrating sub-block ${blockId}") + bm.blockTransferService.uploadBlockSync( + peer.host, + peer.port, + peer.executorId, + blockId, + buffer, + StorageLevel.DISK_ONLY, + null) // class tag, we don't need for shuffle + logDebug(s"Migrated sub block ${blockId}") } + logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}") + } catch { + case e: IOException => + // If a block got deleted before netty opened the file handle, then trying to + // load the blocks now will fail. This is most likely to occur if we start + // migrating blocks and then the shuffle TTL cleaner kicks in. However this + // could also happen with manually managed shuffles or a GC event on the + // driver a no longer referenced RDD with shuffle files. + if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) { + logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.") + } else { + throw e + } + } } } else { logError(s"Skipping block ${shuffleBlockInfo} because it has failed ${retryCount}") From d9ee33cfb95e1f05878e498c93c5cc65ce449f0e Mon Sep 17 00:00:00 2001 From: Xuedong Luan Date: Thu, 22 Oct 2020 17:23:10 +0900 Subject: [PATCH 0303/1009] [SPARK-26533][SQL] Support query auto timeout cancel on thriftserver ### What changes were proposed in this pull request? Support query auto cancelling when running too long on thriftserver. This is the rework of #28991 and the credit should be the original author, leoluan2009. Closes #28991 ### Why are the changes needed? For some cases, we use thriftserver as long-running applications. Some times we want all the query need not to run more than given time. In these cases, we can enable auto cancel for time-consumed query.Which can let us release resources for other queries to run. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #29933 from maropu/pr28991. Lead-authored-by: Xuedong Luan Co-authored-by: Takeshi Yamamuro Co-authored-by: Luan Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/internal/SQLConf.scala | 11 ++++ .../cli/operation/OperationManager.java | 4 +- .../service/cli/operation/SQLOperation.java | 7 ++- .../hive/thriftserver/HiveThriftServer2.scala | 2 +- .../SparkExecuteStatementOperation.scala | 45 ++++++++++++++- .../server/SparkSQLOperationManager.scala | 5 +- .../ui/HiveThriftServer2AppStatusStore.scala | 1 + .../ui/HiveThriftServer2EventManager.scala | 7 +++ .../ui/HiveThriftServer2Listener.scala | 10 ++++ .../HiveThriftServer2Suites.scala | 55 ++++++++++++++++++- .../SparkExecuteStatementOperationSuite.scala | 3 +- .../ui/HiveThriftServer2ListenerSuite.scala | 1 + 12 files changed, 140 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 65d976958ffdd..dad59ba0e7327 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -893,6 +893,17 @@ object SQLConf { .booleanConf .createWithDefault(false) + val THRIFTSERVER_QUERY_TIMEOUT = + buildConf("spark.sql.thriftServer.queryTimeout") + .doc("Set a query duration timeout in seconds in Thrift Server. If the timeout is set to " + + "a positive value, a running query will be cancelled automatically when the timeout is " + + "exceeded, otherwise the query continues to run till completion. If timeout values are " + + "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " + + "than this configuration value, they take precedence.") + .version("3.1.0") + .timeConf(TimeUnit.SECONDS) + .createWithDefault(0L) + val THRIFTSERVER_UI_STATEMENT_LIMIT = buildConf("spark.sql.thriftserver.ui.retainedStatements") .doc("The number of SQL statements kept in the JDBC/ODBC web UI history.") diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java index 75edc5763ce44..3df842d2b4af9 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java @@ -97,7 +97,8 @@ public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parent public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession, String statement, Map confOverlay, boolean runAsync, long queryTimeout) throws HiveSQLException { - return newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync); + return newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync, + queryTimeout); } public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) { @@ -207,6 +208,7 @@ public void cancelOperation(OperationHandle opHandle) throws HiveSQLException { Operation operation = getOperation(opHandle); OperationState opState = operation.getStatus().getState(); if (opState == OperationState.CANCELED || + opState == OperationState.TIMEDOUT || opState == OperationState.CLOSED || opState == OperationState.FINISHED || opState == OperationState.ERROR || diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java index e2ac1ea78c1ab..894793152f409 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -155,11 +155,12 @@ private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException { throw toSQLException("Error while processing statement", response); } } catch (HiveSQLException e) { - // If the operation was cancelled by another thread, + // If the operation was cancelled by another thread or timed out, // Driver#run will return a non-zero response code. - // We will simply return if the operation state is CANCELED, + // We will simply return if the operation state is CANCELED or TIMEDOUT, // otherwise throw an exception - if (getStatus().getState() == OperationState.CANCELED) { + if (getStatus().getState() == OperationState.CANCELED || + getStatus().getState() == OperationState.TIMEDOUT) { return; } else { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala index 4e6729faced43..a1f2d62a0b72c 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala @@ -116,7 +116,7 @@ object HiveThriftServer2 extends Logging { } private[thriftserver] object ExecutionState extends Enumeration { - val STARTED, COMPILED, CANCELED, FAILED, FINISHED, CLOSED = Value + val STARTED, COMPILED, CANCELED, TIMEDOUT, FAILED, FINISHED, CLOSED = Value type ExecutionState = Value } } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index ec2c795e95c83..bc8cc16746a30 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.thriftserver import java.security.PrivilegedExceptionAction import java.util.{Arrays, Map => JMap} -import java.util.concurrent.RejectedExecutionException +import java.util.concurrent.{Executors, RejectedExecutionException, TimeUnit} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -45,11 +45,24 @@ private[hive] class SparkExecuteStatementOperation( parentSession: HiveSession, statement: String, confOverlay: JMap[String, String], - runInBackground: Boolean = true) + runInBackground: Boolean = true, + queryTimeout: Long) extends ExecuteStatementOperation(parentSession, statement, confOverlay, runInBackground) with SparkOperation with Logging { + // If a timeout value `queryTimeout` is specified by users and it is smaller than + // a global timeout value, we use the user-specified value. + // This code follows the Hive timeout behaviour (See #29933 for details). + private val timeout = { + val globalTimeout = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_QUERY_TIMEOUT) + if (globalTimeout > 0 && (queryTimeout <= 0 || globalTimeout < queryTimeout)) { + globalTimeout + } else { + queryTimeout + } + } + private var result: DataFrame = _ // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST. @@ -200,6 +213,23 @@ private[hive] class SparkExecuteStatementOperation( parentSession.getUsername) setHasResultSet(true) // avoid no resultset for async run + if (timeout > 0) { + val timeoutExecutor = Executors.newSingleThreadScheduledExecutor() + timeoutExecutor.schedule(new Runnable { + override def run(): Unit = { + try { + timeoutCancel() + } catch { + case NonFatal(e) => + setOperationException(new HiveSQLException(e)) + logError(s"Error cancelling the query after timeout: $timeout seconds") + } finally { + timeoutExecutor.shutdown() + } + } + }, timeout, TimeUnit.SECONDS) + } + if (!runInBackground) { execute() } else { @@ -328,6 +358,17 @@ private[hive] class SparkExecuteStatementOperation( } } + def timeoutCancel(): Unit = { + synchronized { + if (!getStatus.getState.isTerminal) { + logInfo(s"Query with $statementId timed out after $timeout seconds") + setState(OperationState.TIMEDOUT) + cleanup() + HiveThriftServer2.eventManager.onStatementTimeout(statementId) + } + } + } + override def cancel(): Unit = { synchronized { if (!getStatus.getState.isTerminal) { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala index bc9c13eb0d4f8..ba42eefed2a22 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala @@ -44,14 +44,15 @@ private[thriftserver] class SparkSQLOperationManager() parentSession: HiveSession, statement: String, confOverlay: JMap[String, String], - async: Boolean): ExecuteStatementOperation = synchronized { + async: Boolean, + queryTimeout: Long): ExecuteStatementOperation = synchronized { val sqlContext = sessionToContexts.get(parentSession.getSessionHandle) require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" + s" initialized or had already closed.") val conf = sqlContext.sessionState.conf val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC) val operation = new SparkExecuteStatementOperation( - sqlContext, parentSession, statement, confOverlay, runInBackground) + sqlContext, parentSession, statement, confOverlay, runInBackground, queryTimeout) handleToOperation.put(operation.getHandle, operation) logDebug(s"Created Operation for $statement with session=$parentSession, " + s"runInBackground=$runInBackground") diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala index 5cb78f6e64650..8bd8f29a4b9ec 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala @@ -119,6 +119,7 @@ private[thriftserver] class ExecutionInfo( def isExecutionActive: Boolean = { !(state == ExecutionState.FAILED || state == ExecutionState.CANCELED || + state == ExecutionState.TIMEDOUT || state == ExecutionState.CLOSED) } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala index fa04c67896a69..202fdf33c0dd9 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala @@ -57,6 +57,10 @@ private[thriftserver] class HiveThriftServer2EventManager(sc: SparkContext) { postLiveListenerBus(SparkListenerThriftServerOperationCanceled(id, System.currentTimeMillis())) } + def onStatementTimeout(id: String): Unit = { + postLiveListenerBus(SparkListenerThriftServerOperationTimeout(id, System.currentTimeMillis())) + } + def onStatementError(id: String, errorMsg: String, errorTrace: String): Unit = { postLiveListenerBus(SparkListenerThriftServerOperationError(id, errorMsg, errorTrace, System.currentTimeMillis())) @@ -96,6 +100,9 @@ private[thriftserver] case class SparkListenerThriftServerOperationParsed( private[thriftserver] case class SparkListenerThriftServerOperationCanceled( id: String, finishTime: Long) extends SparkListenerEvent +private[thriftserver] case class SparkListenerThriftServerOperationTimeout( + id: String, finishTime: Long) extends SparkListenerEvent + private[thriftserver] case class SparkListenerThriftServerOperationError( id: String, errorMsg: String, diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala index 6b7e5ee611417..4cf672e3d9d9e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala @@ -119,6 +119,7 @@ private[thriftserver] class HiveThriftServer2Listener( case e: SparkListenerThriftServerOperationStart => onOperationStart(e) case e: SparkListenerThriftServerOperationParsed => onOperationParsed(e) case e: SparkListenerThriftServerOperationCanceled => onOperationCanceled(e) + case e: SparkListenerThriftServerOperationTimeout => onOperationTimeout(e) case e: SparkListenerThriftServerOperationError => onOperationError(e) case e: SparkListenerThriftServerOperationFinish => onOperationFinished(e) case e: SparkListenerThriftServerOperationClosed => onOperationClosed(e) @@ -181,6 +182,15 @@ private[thriftserver] class HiveThriftServer2Listener( case None => logWarning(s"onOperationCanceled called with unknown operation id: ${e.id}") } + private def onOperationTimeout(e: SparkListenerThriftServerOperationTimeout): Unit = + Option(executionList.get(e.id)) match { + case Some(executionData) => + executionData.finishTimestamp = e.finishTime + executionData.state = ExecutionState.TIMEDOUT + updateLiveStore(executionData) + case None => logWarning(s"onOperationCanceled called with unknown operation id: ${e.id}") + } + private def onOperationError(e: SparkListenerThriftServerOperationError): Unit = Option(executionList.get(e.id)) match { case Some(executionData) => diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 75c00000dee47..7cc60bb505089 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -46,6 +46,7 @@ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.HiveTestJars +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer import org.apache.spark.util.{ThreadUtils, Utils} @@ -285,7 +286,6 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } test("test multiple session") { - import org.apache.spark.sql.internal.SQLConf var defaultV1: String = null var defaultV2: String = null var data: ArrayBuffer[Int] = null @@ -880,6 +880,59 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { assert(rs.getString(1) === expected.toString) } } + + test("SPARK-26533: Support query auto timeout cancel on thriftserver - setQueryTimeout") { + withJdbcStatement() { statement => + statement.setQueryTimeout(1) + val e = intercept[SQLException] { + statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)") + }.getMessage + assert(e.contains("Query timed out after")) + + statement.setQueryTimeout(0) + val rs1 = statement.executeQuery( + "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)") + rs1.next() + assert(rs1.getString(1) == "test") + + statement.setQueryTimeout(-1) + val rs2 = statement.executeQuery( + "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)") + rs2.next() + assert(rs2.getString(1) == "test") + } + } + + test("SPARK-26533: Support query auto timeout cancel on thriftserver - SQLConf") { + withJdbcStatement() { statement => + statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1") + val e1 = intercept[SQLException] { + statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)") + }.getMessage + assert(e1.contains("Query timed out after")) + + statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=0") + val rs = statement.executeQuery( + "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)") + rs.next() + assert(rs.getString(1) == "test") + + // Uses a smaller timeout value of a config value and an a user-specified one + statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1") + statement.setQueryTimeout(30) + val e2 = intercept[SQLException] { + statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)") + }.getMessage + assert(e2.contains("Query timed out after")) + + statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=30") + statement.setQueryTimeout(1) + val e3 = intercept[SQLException] { + statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)") + }.getMessage + assert(e3.contains("Query timed out after")) + } + } } class SingleSessionSuite extends HiveThriftJdbcTest { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala index ca1f9a2f74244..c8bb6d9ee0821 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala @@ -61,6 +61,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark Seq( (OperationState.CANCELED, (_: SparkExecuteStatementOperation).cancel()), + (OperationState.TIMEDOUT, (_: SparkExecuteStatementOperation).timeoutCancel()), (OperationState.CLOSED, (_: SparkExecuteStatementOperation).close()) ).foreach { case (finalState, transition) => test("SPARK-32057 SparkExecuteStatementOperation should not transiently become ERROR " + @@ -109,7 +110,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark signal: Semaphore, finalState: OperationState) extends SparkExecuteStatementOperation(sqlContext, hiveSession, statement, - new util.HashMap, false) { + new util.HashMap, false, 0) { override def cleanup(): Unit = { super.cleanup() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala index 9a9f574153a0a..3f0538dd1c943 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala @@ -151,6 +151,7 @@ class HiveThriftServer2ListenerSuite extends SparkFunSuite with BeforeAndAfter { "stmt", "groupId", 0)) listener.onOtherEvent(SparkListenerThriftServerOperationParsed(unknownOperation, "query")) listener.onOtherEvent(SparkListenerThriftServerOperationCanceled(unknownOperation, 0)) + listener.onOtherEvent(SparkListenerThriftServerOperationTimeout(unknownOperation, 0)) listener.onOtherEvent(SparkListenerThriftServerOperationError(unknownOperation, "msg", "trace", 0)) listener.onOtherEvent(SparkListenerThriftServerOperationFinish(unknownOperation, 0)) From 8cae7f88b011939473fc9a6373012e23398bbc07 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 22 Oct 2020 13:51:42 +0000 Subject: [PATCH 0304/1009] [SPARK-33095][SQL] Support ALTER TABLE in JDBC v2 Table Catalog: add, update type and nullability of columns (MySQL dialect) ### What changes were proposed in this pull request? Override the default SQL strings for: ALTER TABLE UPDATE COLUMN TYPE ALTER TABLE UPDATE COLUMN NULLABILITY in the following MySQL JDBC dialect according to official documentation. Write MySQL integration tests for JDBC. ### Why are the changes needed? Improved code coverage and support mysql dialect for jdbc. ### Does this PR introduce _any_ user-facing change? Yes, Support ALTER TABLE in JDBC v2 Table Catalog: add, update type and nullability of columns (MySQL dialect) ### How was this patch tested? Added tests. Closes #30025 from ScrapCodes/mysql-dialect. Authored-by: Prashant Sharma Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/MySQLIntegrationSuite.scala | 89 +++++++++++++++++++ .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 34 +++---- .../apache/spark/sql/jdbc/MySQLDialect.scala | 24 ++++- 3 files changed, 131 insertions(+), 16 deletions(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala new file mode 100644 index 0000000000000..ec958cd55c943 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.{Connection, SQLFeatureNotSupportedException} + +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +/** + * + * To run this test suite for a specific version (e.g., mysql:5.7.31): + * {{{ + * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 + * ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite" + * + * }}} + * + */ +@DockerTest +class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { + override val catalogName: String = "mysql" + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.31") + override val env = Map( + "MYSQL_ROOT_PASSWORD" -> "rootpass" + ) + override val usesIpc = false + override val jdbcPort: Int = 3306 + + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass" + } + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.mysql", classOf[JDBCTableCatalog].getName) + .set("spark.sql.catalog.mysql.url", db.getJdbcUrl(dockerIp, externalPort)) + + override val connectionTimeout = timeout(7.minutes) + + override def dataPreparation(conn: Connection): Unit = {} + + override def testUpdateColumnType(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING") + t = spark.table(tbl) + expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + // Update column type from STRING to INTEGER + val msg1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER") + }.getMessage + assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) + } + + override def testUpdateColumnNullability(tbl: String): Unit = { + sql("CREATE TABLE mysql.alt_table (ID STRING NOT NULL) USING _") + // Update nullability is unsupported for mysql db. + val msg = intercept[AnalysisException] { + sql("ALTER TABLE mysql.alt_table ALTER COLUMN ID DROP NOT NULL") + }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage + + assert(msg.contains("UpdateColumnNullability is not supported")) + } +} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 384bcc22f27d8..942c6237fd358 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -28,6 +28,24 @@ trait V2JDBCTest extends SharedSparkSession { // dialect specific update column type test def testUpdateColumnType(tbl: String): Unit + def testUpdateColumnNullability(tbl: String): Unit = { + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") + var t = spark.table(s"$catalogName.alt_table") + // nullable is true in the expecteSchema because Spark always sets nullable to true + // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445 + var expectedSchema = new StructType().add("ID", StringType, nullable = true) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL") + t = spark.table(s"$catalogName.alt_table") + expectedSchema = new StructType().add("ID", StringType, nullable = true) + assert(t.schema === expectedSchema) + // Update nullability of not existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL") + }.getMessage + assert(msg.contains("Cannot update missing field bad_column")) + } + test("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _") @@ -73,21 +91,7 @@ trait V2JDBCTest extends SharedSparkSession { test("SPARK-33034: ALTER TABLE ... update column nullability") { withTable(s"$catalogName.alt_table") { - sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") - var t = spark.table(s"$catalogName.alt_table") - // nullable is true in the expecteSchema because Spark always sets nullable to true - // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445 - var expectedSchema = new StructType().add("ID", StringType, nullable = true) - assert(t.schema === expectedSchema) - sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL") - t = spark.table(s"$catalogName.alt_table") - expectedSchema = new StructType().add("ID", StringType, nullable = true) - assert(t.schema === expectedSchema) - // Update nullability of not existing column - val msg = intercept[AnalysisException] { - sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL") - }.getMessage - assert(msg.contains("Cannot update missing field bad_column")) + testUpdateColumnNullability(s"$catalogName.alt_table") } // Update column nullability in not existing table val msg = intercept[AnalysisException] { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 24b31b14d9427..a516e9e76ef31 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.jdbc -import java.sql.Types +import java.sql.{SQLFeatureNotSupportedException, Types} import java.util.Locale import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} @@ -48,4 +48,26 @@ private case object MySQLDialect extends JdbcDialect { } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) + + // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html + override def getUpdateColumnTypeQuery( + tableName: String, + columnName: String, + newDataType: String): String = { + s"ALTER TABLE $tableName MODIFY COLUMN ${quoteIdentifier(columnName)} $newDataType" + } + + // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html + // require to have column data type to change the column nullability + // ALTER TABLE tbl_name MODIFY [COLUMN] col_name column_definition + // column_definition: + // data_type [NOT NULL | NULL] + // e.g. ALTER TABLE t1 MODIFY b INT NOT NULL; + // We don't have column data type here, so throw Exception for now + override def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported") + } } From a1629b4a5790dce1a57e2c2bad9e04c627b88d29 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 22 Oct 2020 13:53:01 +0000 Subject: [PATCH 0305/1009] [SPARK-32852][SQL] spark.sql.hive.metastore.jars support HDFS location ### What changes were proposed in this pull request? Support `spark.sql.hive.metastore.jars` use HDFS location. When user need to use path to set hive metastore jars, you should set `spark.sql.hive.metasstore.jars=path` and set real path in `spark.sql.hive.metastore.jars.path` since we use `File.pathSeperator` to split path, but `FIle.pathSeparator` is `:` in unix, it will split hdfs location `hdfs://nameservice/xx`. So add new config `spark.sql.hive.metastore.jars.path` to set comma separated paths. To keep both two way supported ### Why are the changes needed? All spark app can fetch internal version hive jars in HDFS location, not need distribute to all node. ### Does this PR introduce _any_ user-facing change? User can use HDFS location to store hive metastore jars ### How was this patch tested? Manuel tested. Closes #29881 from AngersZhuuuu/SPARK-32852. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/hive/HiveUtils.scala | 92 ++++++++++++++++--- 1 file changed, 77 insertions(+), 15 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 96c207913d49a..399f8911ef679 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -40,6 +40,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.execution.command.DDLUtils +import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf._ @@ -77,7 +78,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_JARS = buildStaticConf("spark.sql.hive.metastore.jars") .doc(s""" | Location of the jars that should be used to instantiate the HiveMetastoreClient. - | This property can be one of three options: " + | This property can be one of four options: " | 1. "builtin" | Use Hive ${builtinHiveVersion}, which is bundled with the Spark assembly when | -Phive is enabled. When this option is chosen, @@ -85,12 +86,32 @@ private[spark] object HiveUtils extends Logging { | ${builtinHiveVersion} or not defined. | 2. "maven" | Use Hive jars of specified version downloaded from Maven repositories. - | 3. A classpath in the standard format for both Hive and Hadoop. + | 3. "path" + | Use Hive jars configured by `spark.sql.hive.metastore.jars.path` + | in comma separated format. Support both local or remote paths. + | 4. A classpath in the standard format for both Hive and Hadoop. """.stripMargin) .version("1.4.0") .stringConf .createWithDefault("builtin") + val HIVE_METASTORE_JARS_PATH = buildStaticConf("spark.sql.hive.metastore.jars.path") + .doc(s"Comma separated URL of Hive jars, support both local and remote paths," + + s"Such as: " + + s" 1. file://path/to/jar/xxx.jar\n" + + s" 2. hdfs://nameservice/path/to/jar/xxx.jar\n" + + s" 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)\n" + + s" 4. [http/https/ftp]://path/to/jar/xxx.jar\n" + + s"Notice: `http/https/ftp` doesn't support wildcard, but other URLs support" + + s"nested path wildcard, Such as: " + + s" 1. file://path/to/jar/*, file://path/to/jar/*/*\n" + + s" 2. hdfs://nameservice/path/to/jar/*, hdfs://nameservice/path/to/jar/*/*\n" + + s"When ${HIVE_METASTORE_JARS.key} is set to `path`, we will use Hive jars configured by this") + .version("3.1.0") + .stringConf + .toSequence + .createWithDefault(Nil) + val CONVERT_METASTORE_PARQUET = buildConf("spark.sql.hive.convertMetastoreParquet") .doc("When set to true, the built-in Parquet reader and writer are used to process " + "parquet tables created by using the HiveQL syntax, instead of Hive serde.") @@ -175,6 +196,7 @@ private[spark] object HiveUtils extends Logging { * The location of the jars that should be used to instantiate the HiveMetastoreClient. This * property can be one of three options: * - a classpath in the standard format for both hive and hadoop. + * - path - attempt to discover the jars with paths configured by `HIVE_METASTORE_JARS_PATH`. * - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This * option is only valid when using the execution version of Hive. * - maven - download the correct version of hive on demand from maven. @@ -183,6 +205,13 @@ private[spark] object HiveUtils extends Logging { conf.getConf(HIVE_METASTORE_JARS) } + /** + * Hive jars paths, only work when `HIVE_METASTORE_JARS` is `path`. + */ + private def hiveMetastoreJarsPath(conf: SQLConf): Seq[String] = { + conf.getConf(HIVE_METASTORE_JARS_PATH) + } + /** * A comma separated list of class prefixes that should be loaded using the classloader that * is shared between Spark SQL and a specific version of Hive. An example of classes that should @@ -333,6 +362,20 @@ private[spark] object HiveUtils extends Logging { val hiveMetastoreBarrierPrefixes = HiveUtils.hiveMetastoreBarrierPrefixes(sqlConf) val metaVersion = IsolatedClientLoader.hiveVersion(hiveMetastoreVersion) + def addLocalHiveJars(file: File): Seq[URL] = { + if (file.getName == "*") { + val files = file.getParentFile.listFiles() + if (files == null) { + logWarning(s"Hive jar path '${file.getPath}' does not exist.") + Nil + } else { + files.filter(_.getName.toLowerCase(Locale.ROOT).endsWith(".jar")).map(_.toURL).toSeq + } + } else { + file.toURL :: Nil + } + } + val isolatedLoader = if (hiveMetastoreJars == "builtin") { if (builtinHiveVersion != hiveMetastoreVersion) { throw new IllegalArgumentException( @@ -393,24 +436,43 @@ private[spark] object HiveUtils extends Logging { config = configurations, barrierPrefixes = hiveMetastoreBarrierPrefixes, sharedPrefixes = hiveMetastoreSharedPrefixes) + } else if (hiveMetastoreJars == "path") { + // Convert to files and expand any directories. + val jars = + HiveUtils.hiveMetastoreJarsPath(sqlConf) + .flatMap { + case path if path.contains("\\") && Utils.isWindows => + addLocalHiveJars(new File(path)) + case path => + DataSource.checkAndGlobPathIfNecessary( + pathStrings = Seq(path), + hadoopConf = hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = false, + enableGlobbing = true + ).map(_.toUri.toURL) + } + + logInfo( + s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " + + s"using path: ${jars.mkString(";")}") + new IsolatedClientLoader( + version = metaVersion, + sparkConf = conf, + hadoopConf = hadoopConf, + execJars = jars.toSeq, + config = configurations, + isolationOn = true, + barrierPrefixes = hiveMetastoreBarrierPrefixes, + sharedPrefixes = hiveMetastoreSharedPrefixes) } else { // Convert to files and expand any directories. val jars = hiveMetastoreJars .split(File.pathSeparator) - .flatMap { - case path if new File(path).getName == "*" => - val files = new File(path).getParentFile.listFiles() - if (files == null) { - logWarning(s"Hive jar path '$path' does not exist.") - Nil - } else { - files.filter(_.getName.toLowerCase(Locale.ROOT).endsWith(".jar")).toSeq - } - case path => - new File(path) :: Nil - } - .map(_.toURI.toURL) + .flatMap { path => + addLocalHiveJars(new File(path)) + } logInfo( s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " + From b38f3a5557b45503e0f8d67bc77c5d390a67a42f Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 22 Oct 2020 14:01:07 +0000 Subject: [PATCH 0306/1009] [SPARK-32978][SQL] Make sure the number of dynamic part metric is correct ### What changes were proposed in this pull request? The purpose of this pr is to resolve SPARK-32978. The main reason of bad case describe in SPARK-32978 is the `BasicWriteTaskStatsTracker` directly reports the new added partition number of each task, which makes it impossible to remove duplicate data in driver side. The main of this pr is change to report partitionValues to driver and remove duplicate data at driver side to make sure the number of dynamic part metric is correct. ### Why are the changes needed? The the number of dynamic part metric we display on the UI should be correct. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a new test case refer to described in SPARK-32978 Closes #30026 from LuciferYang/SPARK-32978. Authored-by: yangjie01 Signed-off-by: Wenchen Fan --- ...namicPartitionsBenchmark-jdk11-results.txt | 8 ++ ...WithDynamicPartitionsBenchmark-results.txt | 8 ++ .../datasources/BasicWriteStatsTracker.scala | 16 +-- ...tTableWithDynamicPartitionsBenchmark.scala | 103 ++++++++++++++++++ ...BasicWriteJobStatsTrackerMetricSuite.scala | 59 ++++++++++ 5 files changed, 187 insertions(+), 7 deletions(-) create mode 100644 sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt create mode 100644 sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt new file mode 100644 index 0000000000000..12fe0e1f5a7ce --- /dev/null +++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt @@ -0,0 +1,8 @@ +OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.7 +Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz +dynamic insert table benchmark, totalRows = 200000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +one partition column, 100 partitions 16396 16688 413 0.0 81978.3 1.0X +two partition columns, 500 partitions 50356 50924 804 0.0 251777.9 0.3X +three partition columns, 2000 partitions 144342 144850 718 0.0 721710.9 0.1X + diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt new file mode 100644 index 0000000000000..c042d74091a3b --- /dev/null +++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt @@ -0,0 +1,8 @@ +OpenJDK 64-Bit Server VM 1.8.0_232-b18 on Mac OS X 10.15.7 +Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz +dynamic insert table benchmark, totalRows = 200000: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +one partition column, 100 partitions 23370 23588 309 0.0 116848.3 1.0X +two partition columns, 500 partitions 37686 38079 555 0.0 188432.2 0.6X +three partition columns, 2000 partitions 112489 113049 792 0.0 562446.1 0.2X + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala index b71c2d12f02b8..6babbb465a3fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources import java.io.FileNotFoundException +import scala.collection.mutable + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -35,7 +37,7 @@ import org.apache.spark.util.SerializableConfiguration * These were first introduced in https://github.com/apache/spark/pull/18159 (SPARK-20703). */ case class BasicWriteTaskStats( - numPartitions: Int, + partitions: Seq[InternalRow], numFiles: Int, numBytes: Long, numRows: Long) @@ -48,7 +50,7 @@ case class BasicWriteTaskStats( class BasicWriteTaskStatsTracker(hadoopConf: Configuration) extends WriteTaskStatsTracker with Logging { - private[this] var numPartitions: Int = 0 + private[this] val partitions: mutable.ArrayBuffer[InternalRow] = mutable.ArrayBuffer.empty private[this] var numFiles: Int = 0 private[this] var submittedFiles: Int = 0 private[this] var numBytes: Long = 0L @@ -76,7 +78,7 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration) override def newPartition(partitionValues: InternalRow): Unit = { - numPartitions += 1 + partitions.append(partitionValues) } override def newBucket(bucketId: Int): Unit = { @@ -117,7 +119,7 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration) "This could be due to the output format not writing empty files, " + "or files being not immediately visible in the filesystem.") } - BasicWriteTaskStats(numPartitions, numFiles, numBytes, numRows) + BasicWriteTaskStats(partitions.toSeq, numFiles, numBytes, numRows) } } @@ -139,7 +141,7 @@ class BasicWriteJobStatsTracker( override def processStats(stats: Seq[WriteTaskStats]): Unit = { val sparkContext = SparkContext.getActive.get - var numPartitions: Long = 0L + var partitionsSet: mutable.Set[InternalRow] = mutable.HashSet.empty var numFiles: Long = 0L var totalNumBytes: Long = 0L var totalNumOutput: Long = 0L @@ -147,7 +149,7 @@ class BasicWriteJobStatsTracker( val basicStats = stats.map(_.asInstanceOf[BasicWriteTaskStats]) basicStats.foreach { summary => - numPartitions += summary.numPartitions + partitionsSet ++= summary.partitions numFiles += summary.numFiles totalNumBytes += summary.numBytes totalNumOutput += summary.numRows @@ -156,7 +158,7 @@ class BasicWriteJobStatsTracker( metrics(BasicWriteJobStatsTracker.NUM_FILES_KEY).add(numFiles) metrics(BasicWriteJobStatsTracker.NUM_OUTPUT_BYTES_KEY).add(totalNumBytes) metrics(BasicWriteJobStatsTracker.NUM_OUTPUT_ROWS_KEY).add(totalNumOutput) - metrics(BasicWriteJobStatsTracker.NUM_PARTS_KEY).add(numPartitions) + metrics(BasicWriteJobStatsTracker.NUM_PARTS_KEY).add(partitionsSet.size) val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toList) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala new file mode 100644 index 0000000000000..81a29cefd0045 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import org.apache.spark.benchmark.Benchmark + +/** + * Benchmark to measure insert into table with dynamic partition columns. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to + * "benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt". + * }}} + */ +object InsertTableWithDynamicPartitionsBenchmark extends DataSourceWriteBenchmark { + + def prepareSourceTableAndGetTotalRows(numberRows: Long, sourceTable: String, + part1Step: Int, part2Step: Int, part3Step: Int): Long = { + val dataFrame = spark.range(0, numberRows, 1, 4) + val dataFrame1 = spark.range(0, numberRows, part1Step, 4) + val dataFrame2 = spark.range(0, numberRows, part2Step, 4) + val dataFrame3 = spark.range(0, numberRows, part3Step, 4) + + val data = dataFrame.join(dataFrame1).join(dataFrame2).join(dataFrame3) + .toDF("id", "part1", "part2", "part3") + data.write.saveAsTable(sourceTable) + data.count() + } + + def writeOnePartitionColumnTable(tableName: String, + partitionNumber: Long, benchmark: Benchmark): Unit = { + spark.sql(s"create table $tableName(i bigint, part bigint) " + + "using parquet partitioned by (part)") + benchmark.addCase(s"one partition column, $partitionNumber partitions") { _ => + spark.sql(s"insert overwrite table $tableName partition(part) " + + "select id, part1 as part from sourceTable") + } + } + + def writeTwoPartitionColumnTable(tableName: String, + partitionNumber: Long, benchmark: Benchmark): Unit = { + spark.sql(s"create table $tableName(i bigint, part1 bigint, part2 bigint) " + + "using parquet partitioned by (part1, part2)") + benchmark.addCase(s"two partition columns, $partitionNumber partitions") { _ => + spark.sql(s"insert overwrite table $tableName partition(part1, part2) " + + "select id, part1, part2 from sourceTable") + } + } + + def writeThreePartitionColumnTable(tableName: String, + partitionNumber: Long, benchmark: Benchmark): Unit = { + spark.sql(s"create table $tableName(i bigint, part1 bigint, part2 bigint, part3 bigint) " + + "using parquet partitioned by (part1, part2, part3)") + benchmark.addCase(s"three partition columns, $partitionNumber partitions") { _ => + spark.sql(s"insert overwrite table $tableName partition(part1, part2, part3) " + + "select id, part1, part2, part3 from sourceTable") + } + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val sourceTable = "sourceTable" + val onePartColTable = "onePartColTable" + val twoPartColTable = "twoPartColTable" + val threePartColTable = "threePartColTable" + val numberRows = 100L + val part1Step = 1 + val part2Step = 20 + val part3Step = 25 + val part1Number = numberRows / part1Step + val part2Number = numberRows / part2Step * part1Number + val part3Number = numberRows / part3Step * part2Number + + withTable(sourceTable, onePartColTable, twoPartColTable, threePartColTable) { + val totalRows = + prepareSourceTableAndGetTotalRows(numberRows, sourceTable, part1Step, part2Step, part3Step) + val benchmark = + new Benchmark(s"dynamic insert table benchmark, totalRows = $totalRows", + totalRows, output = output) + writeOnePartitionColumnTable(onePartColTable, part1Number, benchmark) + writeTwoPartitionColumnTable(twoPartColTable, part2Number, benchmark) + writeThreePartitionColumnTable(threePartColTable, part3Number, benchmark) + benchmark.run() + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala new file mode 100644 index 0000000000000..3e58c225d8c7a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.{LocalSparkSession, SparkSession} + +class BasicWriteJobStatsTrackerMetricSuite extends SparkFunSuite with LocalSparkSession { + + test("SPARK-32978: make sure the number of dynamic part metric is correct") { + try { + val partitions = "50" + spark = SparkSession.builder().master("local[4]").getOrCreate() + val statusStore = spark.sharedState.statusStore + + spark.sql("create table dynamic_partition(i bigint, part bigint) " + + "using parquet partitioned by (part)") + val oldExecutionsSize = statusStore.executionsList().size + spark.sql("insert overwrite table dynamic_partition partition(part) " + + s"select id, id % $partitions as part from range(10000)") + + // Wait for listener to finish computing the metrics for the executions. + while (statusStore.executionsList().size - oldExecutionsSize < 1 || + statusStore.executionsList().last.metricValues == null) { + Thread.sleep(100) + } + + // There should be 2 SQLExecutionUIData in executionsList and the 2nd item is we need, + // but the executionId is indeterminate in maven test, + // so the `statusStore.execution(executionId)` API is not used. + assert(statusStore.executionsCount() == 2) + val executionData = statusStore.executionsList()(1) + val accumulatorIdOpt = + executionData.metrics.find(_.name == "number of dynamic part").map(_.accumulatorId) + assert(accumulatorIdOpt.isDefined) + val numPartsOpt = executionData.metricValues.get(accumulatorIdOpt.get) + assert(numPartsOpt.isDefined && numPartsOpt.get == partitions) + + } finally { + spark.sql("drop table if exists dynamic_partition") + spark.stop() + } + } +} From a03d77d32696f5a33770e9bee654acde904da7d4 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 22 Oct 2020 15:57:03 +0000 Subject: [PATCH 0307/1009] [SPARK-33160][SQL][FOLLOWUP] Replace the parquet metadata key `org.apache.spark.int96NoRebase` by `org.apache.spark.legacyINT96` ### What changes were proposed in this pull request? 1. Replace the metadata key `org.apache.spark.int96NoRebase` by `org.apache.spark.legacyINT96`. 2. Change the condition when new key should be saved to parquet metadata: it should be saved when the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` is set to `LEGACY`. 3. Change handling the metadata key in read: - If there is no the key in parquet metadata, take the rebase mode from the SQL config: `spark.sql.legacy.parquet.int96RebaseModeInRead` - If parquet files were saved by Spark < 3.1.0, use the `LEGACY` rebasing mode for INT96 type. - For files written by Spark >= 3.1.0, if the `org.apache.spark.legacyINT96` presents in metadata, perform rebasing otherwise don't. ### Why are the changes needed? - To not increase parquet size by default when `spark.sql.legacy.parquet.int96RebaseModeInWrite` is `EXCEPTION` after https://github.com/apache/spark/pull/30121. - To have the implementation similar to `org.apache.spark.legacyDateTime` - To minimise impact on other subsystems that are based on file sizes like gathering statistics. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Modified test in `ParquetIOSuite` Closes #30132 from MaxGekk/int96-flip-metadata-rebase-key. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../datasources/DataSourceUtils.scala | 22 ++++++++++++------- .../parquet/ParquetWriteSupport.scala | 6 ++--- .../scala/org/apache/spark/sql/package.scala | 4 ++-- .../datasources/parquet/ParquetIOSuite.scala | 16 +++++++++----- .../spark/sql/hive/StatisticsSuite.scala | 2 +- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index f8068a634977b..b54747a25d5a3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -26,7 +26,7 @@ import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.SparkUpgradeException -import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_LEGACY_INT96, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.util.RebaseDateTime @@ -115,14 +115,20 @@ object DataSourceUtils { lookupFileMeta: String => String, modeByConfig: String): LegacyBehaviorPolicy.Value = { if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { - LegacyBehaviorPolicy.CORRECTED - } else if (lookupFileMeta(SPARK_INT96_NO_REBASE) != null) { - LegacyBehaviorPolicy.CORRECTED - } else if (lookupFileMeta(SPARK_VERSION_METADATA_KEY) != null) { - LegacyBehaviorPolicy.LEGACY - } else { - LegacyBehaviorPolicy.withName(modeByConfig) + return LegacyBehaviorPolicy.CORRECTED } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 3.0 and earlier follow the legacy hybrid calendar and we need to + // rebase the INT96 timestamp values. + // Files written by Spark 3.1 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.1.0" || lookupFileMeta(SPARK_LEGACY_INT96) != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) } def newRebaseExceptionInRead(format: String): SparkUpgradeException = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala index b538c2f2493d0..26074719364a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala @@ -31,7 +31,7 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer} import org.apache.spark.SPARK_VERSION_SHORT import org.apache.spark.internal.Logging -import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY} +import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_LEGACY_INT96, SPARK_VERSION_METADATA_KEY} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.SpecializedGetters import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -123,9 +123,9 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { } } ++ { if (int96RebaseMode == LegacyBehaviorPolicy.LEGACY) { - None + Some(SPARK_LEGACY_INT96 -> "") } else { - Some(SPARK_INT96_NO_REBASE -> "") + None } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index 011be6d69c576..022fecf1ae412 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -63,7 +63,7 @@ package object sql { /** * Parquet file metadata key to indicate that the file with INT96 column type was written - * without rebasing. + * with rebasing. */ - private[sql] val SPARK_INT96_NO_REBASE = "org.apache.spark.int96NoRebase" + private[sql] val SPARK_LEGACY_INT96 = "org.apache.spark.legacyINT96" } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index dac4e950a7823..34bdef7bdb402 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -1163,9 +1163,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } - test("SPARK-33160: write the metadata key 'org.apache.spark.int96NoRebase'") { - def saveTs(dir: java.io.File): Unit = { - Seq(Timestamp.valueOf("1000-01-01 01:02:03")).toDF() + test("SPARK-33160: write the metadata key 'org.apache.spark.legacyINT96'") { + def saveTs(dir: java.io.File, ts: String = "1000-01-01 01:02:03"): Unit = { + Seq(Timestamp.valueOf(ts)).toDF() .repartition(1) .write .parquet(dir.getAbsolutePath) @@ -1173,18 +1173,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) { withTempPath { dir => saveTs(dir) - assert(getMetaData(dir).get(SPARK_INT96_NO_REBASE).isEmpty) + assert(getMetaData(dir)(SPARK_LEGACY_INT96) === "") } } withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) { withTempPath { dir => saveTs(dir) - assert(getMetaData(dir)(SPARK_INT96_NO_REBASE) === "") + assert(getMetaData(dir).get(SPARK_LEGACY_INT96).isEmpty) } } withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) { withTempPath { dir => intercept[SparkException] { saveTs(dir) } } } + withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) { + withTempPath { dir => + saveTs(dir, "2020-10-22 01:02:03") + assert(getMetaData(dir).get(SPARK_LEGACY_INT96).isEmpty) + } + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index db0e93787338e..7d5a200606356 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1513,7 +1513,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - val expectedSize = 636 + val expectedSize = 601 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName) From 3819d39607392aa968595e3d97b84fedf83d08d9 Mon Sep 17 00:00:00 2001 From: Ankit Srivastava Date: Thu, 22 Oct 2020 16:35:55 -0700 Subject: [PATCH 0308/1009] [SPARK-32998][BUILD] Add ability to override default remote repos with internal one ### What changes were proposed in this pull request? - Building spark internally in orgs where access to outside internet is not allowed takes a long time because unsuccessful attempts are made to download artifacts from repositories which are not accessible. The unsuccessful attempts unnecessarily add significant amount of time to the build. I have seen a difference of up-to 1hr for some runs. - Adding 1 environment variables that should be present that the start of the build and if they exist, override the default repos defined in the code and scripts. envVariables: - DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/ ### Why are the changes needed? To allow orgs to build spark internally without relying on external repositories for artifact downloads. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Multiple builds with and without env variables set. Closes #29874 from ankits/SPARK-32998. Authored-by: Ankit Srivastava Signed-off-by: Dongjoon Hyun --- build/sbt-launch-lib.bash | 6 +++++- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 5 ++++- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 ++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash index 423ba3b766e61..1d79989f3c3c3 100755 --- a/build/sbt-launch-lib.bash +++ b/build/sbt-launch-lib.bash @@ -39,7 +39,11 @@ dlog () { acquire_sbt_jar () { SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` - URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + # DEFAULT_ARTIFACT_REPOSITORY env variable can be used to only fetch + # artifacts from internal repos only. + # Ex: + # DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/ + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar JAR=build/sbt-launch-${SBT_VERSION}.jar sbt_jar=$JAR diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 93370f5dae72e..9a316e8c5b5a9 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1160,13 +1160,16 @@ private[spark] object SparkSubmitUtils { val br: IBiblioResolver = new IBiblioResolver br.setM2compatible(true) br.setUsepoms(true) + val defaultInternalRepo : Option[String] = sys.env.get("DEFAULT_ARTIFACT_REPOSITORY") + br.setRoot(defaultInternalRepo.getOrElse("https://repo1.maven.org/maven2/")) br.setName("central") cr.add(br) val sp: IBiblioResolver = new IBiblioResolver sp.setM2compatible(true) sp.setUsepoms(true) - sp.setRoot("https://dl.bintray.com/spark-packages/maven") + sp.setRoot(sys.env.getOrElse( + "DEFAULT_ARTIFACT_REPOSITORY", "https://dl.bintray.com/spark-packages/maven")) sp.setName("spark-packages") cr.add(sp) cr diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index dad59ba0e7327..952785b9a3e65 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2566,7 +2566,8 @@ object SQLConf { .version("3.0.0") .stringConf .createWithDefault( - "https://maven-central.storage-download.googleapis.com/maven2/") + sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY", + "https://maven-central.storage-download.googleapis.com/maven2/")) val LEGACY_FROM_DAYTIME_STRING = buildConf("spark.sql.legacy.fromDayTimeString.enabled") From 87b32f65ef907707a5f76777ecd4570a8c34eedd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 23 Oct 2020 13:35:46 +0900 Subject: [PATCH 0309/1009] [MINOR][DOCS][TESTS] Fix PLAN_CHANGE_LOG_LEVEL document ### What changes were proposed in this pull request? `PLAN_CHANGE_LOG_LEVEL` config document is wrong. This is to fix it. ### Why are the changes needed? Fix wrong doc. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Only doc change. Closes #30136 from viirya/minor-sqlconf. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 952785b9a3e65..35ef24c1c3ba6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -225,7 +225,7 @@ object SQLConf { .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel), - "Invalid value for 'spark.sql.optimizer.planChangeLog.level'. Valid values are " + + "Invalid value for 'spark.sql.planChangeLog.level'. Valid values are " + "'trace', 'debug', 'info', 'warn' and 'error'.") .createWithDefault("trace") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala index 68c5e2e2f7694..1187950c04240 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala @@ -100,7 +100,7 @@ class OptimizerLoggingSuite extends PlanTest { withSQLConf(SQLConf.PLAN_CHANGE_LOG_LEVEL.key -> level) {} } assert(error.getMessage.contains( - "Invalid value for 'spark.sql.optimizer.planChangeLog.level'.")) + "Invalid value for 'spark.sql.planChangeLog.level'.")) } } From edeecada665e4974bb9e0f125dc30d71bd0a54ee Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Fri, 23 Oct 2020 13:58:44 +0900 Subject: [PATCH 0310/1009] [SPARK-32850][CORE][K8S] Simplify the RPC message flow of decommission ### What changes were proposed in this pull request? This PR cleans up the RPC message flow among the multiple decommission use cases, it includes changes: * Keep `Worker`'s decommission status be consistent between the case where decommission starts from `Worker` and the case where decommission starts from the `MasterWebUI`: sending `DecommissionWorker` from `Master` to `Worker` in the latter case. * Change from two-way communication to one-way communication when notifying decommission between driver and executor: it's obviously unnecessary for the executor to acknowledge the decommission status to the driver since the decommission request is from the driver. And it's same in reverse. * Only send one message instead of two(`DecommissionSelf`/`DecommissionBlockManager`) when decommission the executor: executor and `BlockManager` are in the same JVM. * Clean up codes around here. ### Why are the changes needed? Before: WeChat56c00cc34d9785a67a544dca036d49da After: WeChat05f7afb017e3f0132394c5e54245e49e (Note the diagrams only counts those RPC calls that needed to go through the network. Local RPC calls are not counted here.) After this change, We reduced 6 original RPC calls and added one more RPC call for keeping the consistent decommission status for the Worker. And the RPC flow becomes more clear. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated existing tests. Closes #29817 from Ngone51/simplify-decommission-rpc. Authored-by: yi.wu Signed-off-by: HyukjinKwon --- .../spark/ExecutorAllocationClient.scala | 19 +++- .../spark/ExecutorAllocationManager.scala | 5 +- .../apache/spark/deploy/DeployMessage.scala | 32 ++++++- .../apache/spark/deploy/master/Master.scala | 21 +++-- .../apache/spark/deploy/worker/Worker.scala | 29 ++++-- .../CoarseGrainedExecutorBackend.scala | 70 ++++++++------ .../cluster/CoarseGrainedClusterMessage.scala | 16 +++- .../CoarseGrainedSchedulerBackend.scala | 93 +++++++------------ .../cluster/StandaloneSchedulerBackend.scala | 7 +- .../apache/spark/storage/BlockManager.scala | 9 +- .../storage/BlockManagerMasterEndpoint.scala | 25 ++--- .../storage/BlockManagerStorageEndpoint.scala | 2 +- .../deploy/DecommissionWorkerSuite.scala | 4 +- .../spark/deploy/client/AppClientSuite.scala | 8 +- .../spark/deploy/master/MasterSuite.scala | 18 ++-- .../scheduler/WorkerDecommissionSuite.scala | 7 +- ...kManagerDecommissionIntegrationSuite.scala | 40 ++++++++ .../integrationtest/DecommissionSuite.scala | 12 +-- .../ExecutorAllocationManagerSuite.scala | 6 +- 19 files changed, 257 insertions(+), 166 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala index ce47f3fd32203..cdba1c44034c0 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala @@ -91,11 +91,13 @@ private[spark] trait ExecutorAllocationClient { * @param executorsAndDecomInfo identifiers of executors & decom info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. + * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ def decommissionExecutors( - executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean): Seq[String] = { + executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean): Seq[String] = { killExecutors(executorsAndDecomInfo.map(_._1), adjustTargetNumExecutors, countFailures = false) @@ -109,14 +111,21 @@ private[spark] trait ExecutorAllocationClient { * @param executorId identifiers of executor to decommission * @param decommissionInfo information about the decommission (reason, host loss) * @param adjustTargetNumExecutors if we should adjust the target number of executors. + * @param triggeredByExecutor whether the decommission is triggered at executor. + * (TODO: add a new type like `ExecutorDecommissionInfo` for the + * case where executor is decommissioned at executor first, so we + * don't need this extra parameter.) * @return whether the request is acknowledged by the cluster manager. */ - final def decommissionExecutor(executorId: String, + final def decommissionExecutor( + executorId: String, decommissionInfo: ExecutorDecommissionInfo, - adjustTargetNumExecutors: Boolean): Boolean = { + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean = false): Boolean = { val decommissionedExecutors = decommissionExecutors( Array((executorId, decommissionInfo)), - adjustTargetNumExecutors = adjustTargetNumExecutors) + adjustTargetNumExecutors = adjustTargetNumExecutors, + triggeredByExecutor = triggeredByExecutor) decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId) } diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 596508a2cf8c8..1dd64df106bc2 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -580,7 +580,10 @@ private[spark] class ExecutorAllocationManager( if (decommissionEnabled) { val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map( id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray - client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false) + client.decommissionExecutors( + executorIdsWithoutHostLoss, + adjustTargetNumExecutors = false, + triggeredByExecutor = false) } else { client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false, countFailures = false, force = false) diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index 83f373d526e90..d5b5375d64f4d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -61,13 +61,35 @@ private[deploy] object DeployMessages { } /** + * An internal message that used by Master itself, in order to handle the + * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously. + * @param ids A collection of Worker ids, which should be decommissioned. + */ + case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage + + /** + * A message that sent from Master to Worker to decommission the Worker. + * It's used for the case where decommission is triggered at MasterWebUI. + * + * Note that decommission a Worker will cause all the executors on that Worker + * to be decommissioned as well. + */ + object DecommissionWorker extends DeployMessage + + /** + * A message that sent by the Worker to itself when it receives PWR signal, + * indicating the Worker starts to decommission. + */ + object WorkerSigPWRReceived extends DeployMessage + + /** + * A message sent from Worker to Master to tell Master that the Worker has started + * decommissioning. It's used for the case where decommission is triggered at Worker. + * * @param id the worker id - * @param worker the worker endpoint ref + * @param workerRef the worker endpoint ref */ - case class WorkerDecommission( - id: String, - worker: RpcEndpointRef) - extends DeployMessage + case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage case class ExecutorStateChanged( appId: String, diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 48516cdf83291..ceeb01149f5db 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -245,8 +245,7 @@ private[deploy] class Master( logError("Leadership has been revoked -- master shutting down.") System.exit(0) - case WorkerDecommission(id, workerRef) => - logInfo("Recording worker %s decommissioning".format(id)) + case WorkerDecommissioning(id, workerRef) => if (state == RecoveryState.STANDBY) { workerRef.send(MasterInStandby) } else { @@ -254,6 +253,19 @@ private[deploy] class Master( idToWorker.get(id).foreach(decommissionWorker) } + case DecommissionWorkers(ids) => + // The caller has already checked the state when handling DecommissionWorkersOnHosts, + // so it should not be the STANDBY + assert(state != RecoveryState.STANDBY) + ids.foreach ( id => + // We use foreach since get gives us an option and we can skip the failures. + idToWorker.get(id).foreach { w => + decommissionWorker(w) + // Also send a message to the worker node to notify. + w.endpoint.send(DecommissionWorker) + } + ) + case RegisterWorker( id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress, resources) => @@ -891,10 +903,7 @@ private[deploy] class Master( logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}") // The workers are removed async to avoid blocking the receive loop for the entire batch - workersToRemove.foreach(wi => { - logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}") - self.send(WorkerDecommission(wi.id, wi.endpoint)) - }) + self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq)) // Return the count of workers actually removed workersToRemove.size diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 7649bc37c30b6..0660dbdafd605 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -70,7 +70,10 @@ private[deploy] class Worker( if (conf.get(config.DECOMMISSION_ENABLED)) { logInfo("Registering SIGPWR handler to trigger decommissioning.") SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling worker decommission feature.")(decommissionSelf) + "disabling worker decommission feature.") { + self.send(WorkerSigPWRReceived) + true + } } else { logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.") } @@ -137,7 +140,8 @@ private[deploy] class Worker( private var registered = false private var connected = false private var decommissioned = false - private val workerId = generateWorkerId() + // expose for test + private[spark] val workerId = generateWorkerId() private val sparkHome = if (sys.props.contains(IS_TESTING.key)) { assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!") @@ -668,8 +672,14 @@ private[deploy] class Worker( finishedApps += id maybeCleanupApplication(id) - case WorkerDecommission(_, _) => + case DecommissionWorker => + decommissionSelf() + + case WorkerSigPWRReceived => decommissionSelf() + // Tell the Master that we are starting decommissioning + // so it stops trying to launch executor/driver on us + sendToMaster(WorkerDecommissioning(workerId, self)) } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { @@ -768,16 +778,15 @@ private[deploy] class Worker( } } - private[deploy] def decommissionSelf(): Boolean = { - if (conf.get(config.DECOMMISSION_ENABLED)) { - logDebug("Decommissioning self") + private[deploy] def decommissionSelf(): Unit = { + if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) { decommissioned = true - sendToMaster(WorkerDecommission(workerId, self)) + logInfo(s"Decommission worker $workerId.") + } else if (decommissioned) { + logWarning(s"Worker $workerId already started decommissioning.") } else { - logWarning("Asked to decommission self, but decommissioning not enabled") + logWarning(s"Receive decommission request, but decommission feature is disabled.") } - // Return true since can be called as a signal handler - true } private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = { diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 48045bafe6e3f..b2bc6b3b68007 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile import org.apache.spark.resource.ResourceProfile._ import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.rpc._ -import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription} +import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.serializer.SerializerInstance import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils} @@ -79,12 +79,14 @@ private[spark] class CoarseGrainedExecutorBackend( */ private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]] - @volatile private var decommissioned = false + private var decommissioned = false override def onStart(): Unit = { - logInfo("Registering PWR handler.") - SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling decommission feature.")(decommissionSelf) + if (env.conf.get(DECOMMISSION_ENABLED)) { + logInfo("Registering PWR handler to trigger decommissioning.") + SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + + "disabling executor decommission feature.") (self.askSync[Boolean](ExecutorSigPWRReceived)) + } logInfo("Connecting to driver: " + driverUrl) try { @@ -166,17 +168,6 @@ private[spark] class CoarseGrainedExecutorBackend( if (executor == null) { exitExecutor(1, "Received LaunchTask command but executor was null") } else { - if (decommissioned) { - val msg = "Asked to launch a task while decommissioned." - logError(msg) - driver match { - case Some(endpoint) => - logInfo("Sending DecommissionExecutor to driver.") - endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg))) - case _ => - logError("No registered driver to send Decommission to.") - } - } val taskDesc = TaskDescription.decode(data.value) logInfo("Got assigned task " + taskDesc.taskId) taskResources(taskDesc.taskId) = taskDesc.resources @@ -213,11 +204,31 @@ private[spark] class CoarseGrainedExecutorBackend( logInfo(s"Received tokens of ${tokenBytes.length} bytes") SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf) - case DecommissionSelf => - logInfo("Received decommission self") + case DecommissionExecutor => decommissionSelf() } + override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { + case ExecutorSigPWRReceived => + var driverNotified = false + try { + driver.foreach { driverRef => + // Tell driver that we are starting decommissioning so it stops trying to schedule us + driverNotified = driverRef.askSync[Boolean](ExecutorDecommissioning(executorId)) + if (driverNotified) decommissionSelf() + } + } catch { + case e: Exception => + if (driverNotified) { + logError("Fail to decommission self (but driver has been notified).", e) + } else { + logError("Fail to tell driver that we are starting decommissioning", e) + } + decommissioned = false + } + context.reply(decommissioned) + } + override def onDisconnected(remoteAddress: RpcAddress): Unit = { if (stopping.get()) { logInfo(s"Driver from $remoteAddress disconnected during shutdown") @@ -264,17 +275,20 @@ private[spark] class CoarseGrainedExecutorBackend( System.exit(code) } - private def decommissionSelf(): Boolean = { - val msg = "Decommissioning self w/sync" + private def decommissionSelf(): Unit = { + if (!env.conf.get(DECOMMISSION_ENABLED)) { + logWarning(s"Receive decommission request, but decommission feature is disabled.") + return + } else if (decommissioned) { + logWarning(s"Executor $executorId already started decommissioning.") + return + } + val msg = s"Decommission executor $executorId." logInfo(msg) try { decommissioned = true - // Tell master we are are decommissioned so it stops trying to schedule us - if (driver.nonEmpty) { - driver.get.askSync[Boolean](DecommissionExecutor( - executorId, ExecutorDecommissionInfo(msg))) - } else { - logError("No driver to message decommissioning.") + if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) { + env.blockManager.decommissionBlockManager() } if (executor != null) { executor.decommission() @@ -333,12 +347,10 @@ private[spark] class CoarseGrainedExecutorBackend( shutdownThread.start() logInfo("Will exit when finished decommissioning") - // Return true since we are handling a signal - true } catch { case e: Exception => + decommissioned = false logError("Unexpected error while decommissioning self", e) - false } } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 7242ab7786061..d1b0e798c51be 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -95,8 +95,17 @@ private[spark] object CoarseGrainedClusterMessages { case class RemoveExecutor(executorId: String, reason: ExecutorLossReason) extends CoarseGrainedClusterMessage - case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo) - extends CoarseGrainedClusterMessage + // A message that sent from executor to driver to tell driver that the executor has started + // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S) + case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage + + // A message that sent from driver to executor to decommission that executor. + // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker. + object DecommissionExecutor extends CoarseGrainedClusterMessage + + // A message that sent to the executor itself when it receives PWR signal, + // indicating the executor starts to decommission. + object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage case class RemoveWorker(workerId: String, host: String, message: String) extends CoarseGrainedClusterMessage @@ -136,7 +145,4 @@ private[spark] object CoarseGrainedClusterMessages { // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not. case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage - - // Used to ask an executor to decommission itself. (Can be an internal message) - case object DecommissionSelf extends CoarseGrainedClusterMessage } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 37ea648d80048..1d2689034f1ff 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -191,11 +191,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor)) removeExecutor(executorId, reason) - // Do not change this code without running the K8s integration suites - case DecommissionExecutor(executorId, decommissionInfo) => - logError(s"Received decommission executor message ${executorId}: $decommissionInfo") - decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false) - case RemoveWorker(workerId, host, message) => removeWorker(workerId, host, message) @@ -274,10 +269,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp context.reply(true) // Do not change this code without running the K8s integration suites - case DecommissionExecutor(executorId, decommissionInfo) => - logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.") - context.reply(decommissionExecutor(executorId, decommissionInfo, - adjustTargetNumExecutors = false)) + case ExecutorDecommissioning(executorId) => + logWarning(s"Received executor $executorId decommissioned message") + context.reply( + decommissionExecutor( + executorId, + ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."), + adjustTargetNumExecutors = false, + triggeredByExecutor = true)) case RetrieveSparkAppConfig(resourceProfileId) => val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId) @@ -465,72 +464,50 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp * @param executorsAndDecomInfo Identifiers of executors & decommission info. * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down * after these executors have been decommissioned. + * @param triggeredByExecutor whether the decommission is triggered at executor. * @return the ids of the executors acknowledged by the cluster manager to be removed. */ override def decommissionExecutors( executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)], - adjustTargetNumExecutors: Boolean): Seq[String] = { - + adjustTargetNumExecutors: Boolean, + triggeredByExecutor: Boolean): Seq[String] = withLock { // Do not change this code without running the K8s integration suites - val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) => - CoarseGrainedSchedulerBackend.this.synchronized { - // Only bother decommissioning executors which are alive. - if (isExecutorActive(executorId)) { - executorsPendingDecommission(executorId) = decomInfo.workerHost - true - } else { - false - } + val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) => + // Only bother decommissioning executors which are alive. + if (isExecutorActive(executorId)) { + scheduler.executorDecommission(executorId, decomInfo) + executorsPendingDecommission(executorId) = decomInfo.workerHost + Some(executorId) + } else { + None } } + logInfo(s"Decommission executors: ${executorsToDecommission.mkString(", ")}") // If we don't want to replace the executors we are decommissioning if (adjustTargetNumExecutors) { - adjustExecutors(executorsToDecommission.map(_._1)) + adjustExecutors(executorsToDecommission) } - executorsToDecommission.filter { case (executorId, decomInfo) => - doDecommission(executorId, decomInfo) - }.map(_._1) - } - - // Do not change this code without running the K8s integration suites - private def doDecommission(executorId: String, - decomInfo: ExecutorDecommissionInfo): Boolean = { - - logInfo(s"Asking executor $executorId to decommissioning.") - scheduler.executorDecommission(executorId, decomInfo) - // Send decommission message to the executor (it could have originated on the executor - // but not necessarily). - CoarseGrainedSchedulerBackend.this.synchronized { - executorDataMap.get(executorId) match { - case Some(executorInfo) => - executorInfo.executorEndpoint.send(DecommissionSelf) - case None => - // Ignoring the executor since it is not registered. - logWarning(s"Attempted to decommission unknown executor $executorId.") - return false + // Mark those corresponding BlockManagers as decommissioned first before we sending + // decommission notification to executors. So, it's less likely to lead to the race + // condition where `getPeer` request from the decommissioned executor comes first + // before the BlockManagers are marked as decommissioned. + // Note that marking BlockManager as decommissioned doesn't need depend on + // `spark.storage.decommission.enabled`. Because it's meaningless to save more blocks + // for the BlockManager since the executor will be shutdown soon. + scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission) + + if (!triggeredByExecutor) { + executorsToDecommission.foreach { executorId => + logInfo(s"Notify executor $executorId to decommissioning.") + executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor) } } - logInfo(s"Asked executor $executorId to decommission.") - - if (conf.get(STORAGE_DECOMMISSION_ENABLED)) { - try { - logInfo(s"Asking block manager corresponding to executor $executorId to decommission.") - scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId)) - } catch { - case e: Exception => - logError("Unexpected error during block manager " + - s"decommissioning for executor $executorId: ${e.toString}", e) - return false - } - logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.") - } - true + executorsToDecommission } - override def start(): Unit = { if (UserGroupInformation.isSecurityEnabled()) { delegationTokenManager = createTokenManager() diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index 34b03dfec9e80..b9ac8d2ba2784 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -178,9 +178,12 @@ private[spark] class StandaloneSchedulerBackend( } override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) { - logInfo("Asked to decommission executor") + logInfo(s"Asked to decommission executor $fullId") val execId = fullId.split("/")(1) - decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false) + decommissionExecutors( + Array((execId, decommissionInfo)), + adjustTargetNumExecutors = false, + triggeredByExecutor = false) logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo)) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index ff0f38a2479b0..3909c02c5bb1f 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter} import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter} -import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock +import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock} import org.apache.spark.storage.memory._ import org.apache.spark.unsafe.Platform import org.apache.spark.util._ @@ -243,8 +243,9 @@ private[spark] class BlockManager( private var blockReplicationPolicy: BlockReplicationPolicy = _ + // visible for test // This is volatile since if it's defined we should not accept remote blocks. - @volatile private var decommissioner: Option[BlockManagerDecommissioner] = None + @volatile private[spark] var decommissioner: Option[BlockManagerDecommissioner] = None // A DownloadFileManager used to track all the files of remote blocks which are above the // specified memory threshold. Files will be deleted automatically based on weak reference. @@ -1809,7 +1810,9 @@ private[spark] class BlockManager( blocksToRemove.size } - def decommissionBlockManager(): Unit = synchronized { + def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager) + + private[spark] def decommissionSelf(): Unit = synchronized { decommissioner match { case None => logInfo("Starting block manager decommissioning process...") diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 61a88b4f26c00..569d7d32284bc 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -163,8 +163,14 @@ class BlockManagerMasterEndpoint( context.reply(true) case DecommissionBlockManagers(executorIds) => - val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get) - decommissionBlockManagers(bmIds) + // Mark corresponding BlockManagers as being decommissioning by adding them to + // decommissioningBlockManagerSet, so they won't be used to replicate or migrate blocks. + // Note that BlockManagerStorageEndpoint will be notified about decommissioning when the + // executor is notified(see BlockManager.decommissionSelf), so we don't need to send the + // notification here. + val bms = executorIds.flatMap(blockManagerIdByExecutor.get) + logInfo(s"Mark BlockManagers (${bms.mkString(", ")}) as being decommissioning.") + decommissioningBlockManagerSet ++= bms context.reply(true) case GetReplicateInfoForRDDBlocks(blockManagerId) => @@ -359,21 +365,6 @@ class BlockManagerMasterEndpoint( blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) } - /** - * Decommission the given Seq of blockmanagers - * - Adds these block managers to decommissioningBlockManagerSet Set - * - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]] - */ - def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = { - val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet) - val futures = newBlockManagersToDecommission.map { blockManagerId => - decommissioningBlockManagerSet.add(blockManagerId) - val info = blockManagerInfo(blockManagerId) - info.storageEndpoint.ask[Unit](DecommissionBlockManager) - } - Future.sequence{ futures.toSeq } - } - /** * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId * @param blockManagerId - block manager id for which ReplicateBlock info is needed diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala index a69bebc23c661..54a72568b18fa 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala @@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint( } case DecommissionBlockManager => - context.reply(blockManager.decommissionBlockManager()) + context.reply(blockManager.decommissionSelf()) case RemoveBroadcast(broadcastId, _) => doAsync[Int]("removing broadcast " + broadcastId, context) { diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala index 9c5e460854053..abe5b7a71ca63 100644 --- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach import org.scalatest.concurrent.Eventually._ import org.apache.spark._ -import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission} +import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState} import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -414,7 +414,7 @@ class DecommissionWorkerSuite def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = { logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`") - master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint)) + master.self.send(DecommissionWorkers(Seq(workerInfo.id))) } def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = { diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala index fe88822bb46b5..93c0aa000e207 100644 --- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures} import org.apache.spark._ import org.apache.spark.deploy.{ApplicationDescription, Command} -import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} +import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning} import org.apache.spark.deploy.master.{ApplicationInfo, Master} import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.{config, Logging} @@ -122,7 +122,11 @@ class AppClientSuite // Send a decommission self to all the workers // Note: normally the worker would send this on their own. - workers.foreach(worker => worker.decommissionSelf()) + workers.foreach { worker => + worker.decommissionSelf() + // send the notice to Master to tell the decommission of Workers + master.self.send(WorkerDecommissioning(worker.workerId, worker.self)) + } // Decommissioning is async. eventually(timeout(1.seconds), interval(10.millis)) { diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 8f19fb5cc80bd..3329300b64d13 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -72,6 +72,7 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend }) } + var decommissioned = false var appDesc = DeployTestUtils.createAppDesc() val drivers = mutable.HashSet[String]() val driverResources = new mutable.HashMap[String, Map[String, Set[String]]] @@ -96,6 +97,8 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend case None => } driverIdToAppId.remove(driverId) + case DecommissionWorker => + decommissioned = true } } @@ -742,9 +745,9 @@ class MasterSuite extends SparkFunSuite hostnames: Seq[String]): Unit = { val conf = new SparkConf() val master = makeAliveMaster(conf) - val workerRegs = (1 to numWorkers).map{idx => + val workers = (1 to numWorkers).map { idx => val worker = new MockWorker(master.self, conf) - worker.rpcEnv.setupEndpoint("worker", worker) + worker.rpcEnv.setupEndpoint(s"worker-$idx", worker) val workerReg = RegisterWorker( worker.id, "localhost", @@ -755,14 +758,14 @@ class MasterSuite extends SparkFunSuite "http://localhost:8080", RpcAddress("localhost", 10000)) master.self.send(workerReg) - workerReg + worker } eventually(timeout(10.seconds)) { val masterState = master.self.askSync[MasterStateResponse](RequestMasterState) assert(masterState.workers.length === numWorkers) assert(masterState.workers.forall(_.state == WorkerState.ALIVE)) - assert(masterState.workers.map(_.id).toSet == workerRegs.map(_.id).toSet) + assert(masterState.workers.map(_.id).toSet == workers.map(_.id).toSet) } val decomWorkersCount = master.self.askSync[Integer](DecommissionWorkersOnHosts(hostnames)) @@ -773,8 +776,11 @@ class MasterSuite extends SparkFunSuite eventually(timeout(30.seconds)) { val masterState = master.self.askSync[MasterStateResponse](RequestMasterState) assert(masterState.workers.length === numWorkers) - val workersActuallyDecomed = masterState.workers.count(_.state == WorkerState.DECOMMISSIONED) - assert(workersActuallyDecomed === numWorkersExpectedToDecom) + val workersActuallyDecomed = masterState.workers + .filter(_.state == WorkerState.DECOMMISSIONED).map(_.id) + val decommissionedWorkers = workers.filter(w => workersActuallyDecomed.contains(w.id)) + assert(workersActuallyDecomed.length === numWorkersExpectedToDecom) + assert(decommissionedWorkers.forall(_.decommissioned)) } // Decommissioning a worker again should return the same answer since we want this call to be diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala index 83bb66efdac9e..4a92cbcb85847 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils} class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { override def beforeEach(): Unit = { - val conf = new SparkConf().setAppName("test").setMaster("local") + val conf = new SparkConf().setAppName("test") .set(config.DECOMMISSION_ENABLED, true) sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf) @@ -78,7 +78,10 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { val execs = sched.getExecutorIds() // Make the executors decommission, finish, exit, and not be replaced. val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray - sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true) + sched.decommissionExecutors( + execsAndDecomInfo, + adjustTargetNumExecutors = true, + triggeredByExecutor = false) val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds) assert(asyncCountResult === 10) } diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala index dcf313f671d5e..bb685cd353ddc 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala @@ -40,6 +40,46 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS val TaskEnded = "TASK_ENDED" val JobEnded = "JOB_ENDED" + Seq(false, true).foreach { isEnabled => + test(s"SPARK-32850: BlockManager decommission should respect the configuration " + + s"(enabled=${isEnabled})") { + val conf = new SparkConf() + .setAppName("test-blockmanager-decommissioner") + .setMaster("local-cluster[2, 1, 1024]") + .set(config.DECOMMISSION_ENABLED, true) + .set(config.STORAGE_DECOMMISSION_ENABLED, isEnabled) + sc = new SparkContext(conf) + TestUtils.waitUntilExecutorsUp(sc, 2, 6000) + val executors = sc.getExecutorIds().toArray + val decommissionListener = new SparkListener { + override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { + // ensure Tasks launched at executors before they're marked as decommissioned by driver + Thread.sleep(3000) + sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend] + .decommissionExecutors( + executors.map { id => (id, ExecutorDecommissionInfo("test")) }, + true, + false) + } + } + sc.addSparkListener(decommissionListener) + + val decommissionStatus: Seq[Boolean] = sc.parallelize(1 to 100, 2).mapPartitions { _ => + val startTime = System.currentTimeMillis() + while (SparkEnv.get.blockManager.decommissioner.isEmpty && + // wait at most 6 seconds for BlockManager to start to decommission (if enabled) + System.currentTimeMillis() - startTime < 6000) { + Thread.sleep(300) + } + val blockManagerDecommissionStatus = + if (SparkEnv.get.blockManager.decommissioner.isEmpty) false else true + Iterator.single(blockManagerDecommissionStatus) + }.collect() + assert(decommissionStatus.forall(_ == isEnabled)) + sc.removeSparkListener(decommissionListener) + } + } + testRetry(s"verify that an already running task which is going to cache data succeeds " + s"on a decommissioned executor after task start") { runDecomTest(true, false, TaskStarted) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index cdde8411d8b7b..9d7db04bb72b0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -40,9 +40,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => mainClass = "", expectedLogOnCompletion = Seq( "Finished waiting, stopping Spark", - "Received decommission executor message", - "Acknowledged decommissioning block manager", - ": Executor decommission.", + "Decommission executors", "Final accumulator value is: 100"), appArgs = Array.empty[String], driverPodChecker = doBasicDriverPyPodCheck, @@ -73,9 +71,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => mainClass = "", expectedLogOnCompletion = Seq( "Finished waiting, stopping Spark", - "Received decommission executor message", - "Acknowledged decommissioning block manager", - ": Executor decommission."), + "Decommission executors"), appArgs = Array.empty[String], driverPodChecker = doBasicDriverPyPodCheck, executorPodChecker = doBasicExecutorPyPodCheck, @@ -110,9 +106,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => mainClass = "", expectedLogOnCompletion = Seq( "Finished waiting, stopping Spark", - "Received decommission executor message", - "Acknowledged decommissioning block manager", - ": Executor decommission."), + "Decommission executors"), appArgs = Array.empty[String], driverPodChecker = doBasicDriverPyPodCheck, executorPodChecker = doBasicExecutorPyPodCheck, diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala index f1870718c6730..293498ae5c37b 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.streaming.scheduler -import org.mockito.ArgumentMatchers.{eq => meq} +import org.mockito.ArgumentMatchers.{any, eq => meq} import org.mockito.Mockito.{never, reset, times, verify, when} import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester} import org.scalatest.concurrent.Eventually.{eventually, timeout} @@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase val decomInfo = ExecutorDecommissionInfo("spark scale down", None) if (decommissioning) { verify(allocationClient, times(1)).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true)) + meq(expectedExec.get), meq(decomInfo), meq(true), any()) verify(allocationClient, never).killExecutor(meq(expectedExec.get)) } else { verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get)) verify(allocationClient, never).decommissionExecutor( - meq(expectedExec.get), meq(decomInfo), meq(true)) + meq(expectedExec.get), meq(decomInfo), meq(true), any()) } } else { if (decommissioning) { From e21bb710e5473831ca7f1aba6081a217067789a8 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 23 Oct 2020 05:52:38 +0000 Subject: [PATCH 0311/1009] [SPARK-32991][SQL] Use conf in shared state as the original configuraion for RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? #### case the case here covers the static and dynamic SQL configs behavior in `sharedState` and `sessionState`, and the specially handled config `spark.sql.warehouse.dir` the case can be found here - https://github.com/yaooqinn/sugar/blob/master/src/main/scala/com/netease/mammut/spark/training/sql/WarehouseSCBeforeSS.scala ```scala import java.lang.reflect.Field import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} object WarehouseSCBeforeSS extends App { val wh = "spark.sql.warehouse.dir" val td = "spark.sql.globalTempDatabase" val custom = "spark.sql.custom" val conf = new SparkConf() .setMaster("local") .setAppName("SPARK-32991") .set(wh, "./data1") .set(td, "bob") val sc = new SparkContext(conf) val spark = SparkSession.builder() .config(wh, "./data2") .config(td, "alice") .config(custom, "kyao") .getOrCreate() val confField: Field = spark.sharedState.getClass.getDeclaredField("conf") confField.setAccessible(true) private val shared: SparkConf = confField.get(spark.sharedState).asInstanceOf[SparkConf] println() println(s"=====> SharedState: $wh=${shared.get(wh)}") println(s"=====> SharedState: $td=${shared.get(td)}") println(s"=====> SharedState: $custom=${shared.get(custom, "")}") println(s"=====> SessionState: $wh=${spark.conf.get(wh)}") println(s"=====> SessionState: $td=${spark.conf.get(td)}") println(s"=====> SessionState: $custom=${spark.conf.get(custom, "")}") val spark2 = SparkSession.builder().config(td, "fred").getOrCreate() println(s"=====> SessionState 2: $wh=${spark2.conf.get(wh)}") println(s"=====> SessionState 2: $td=${spark2.conf.get(td)}") println(s"=====> SessionState 2: $custom=${spark2.conf.get(custom, "")}") SparkSession.setActiveSession(spark) spark.sql("RESET") println(s"=====> SessionState RESET: $wh=${spark.conf.get(wh)}") println(s"=====> SessionState RESET: $td=${spark.conf.get(td)}") println(s"=====> SessionState RESET: $custom=${spark.conf.get(custom, "")}") val spark3 = SparkSession.builder().getOrCreate() println(s"=====> SessionState 3: $wh=${spark2.conf.get(wh)}") println(s"=====> SessionState 3: $td=${spark2.conf.get(td)}") println(s"=====> SessionState 3: $custom=${spark2.conf.get(custom, "")}") } ``` #### outputs and analysis ``` // 1. Make the cloned spark conf in shared state respect the warehouse dir from the 1st SparkSession //=====> SharedState: spark.sql.warehouse.dir=./data1 // 2. ⏬ //=====> SharedState: spark.sql.globalTempDatabase=alice //=====> SharedState: spark.sql.custom=kyao //=====> SessionState: spark.sql.warehouse.dir=./data2 //=====> SessionState: spark.sql.globalTempDatabase=alice //=====> SessionState: spark.sql.custom=kyao //=====> SessionState 2: spark.sql.warehouse.dir=./data2 //=====> SessionState 2: spark.sql.globalTempDatabase=alice //=====> SessionState 2: spark.sql.custom=kyao // 2'.🔼 OK until here // 3. Make the below 3 ones respect the cloned spark conf in shared state with issue 1 fixed //=====> SessionState RESET: spark.sql.warehouse.dir=./data1 //=====> SessionState RESET: spark.sql.globalTempDatabase=bob //=====> SessionState RESET: spark.sql.custom= // 4. Then the SparkSessions created after RESET will be corrected. //=====> SessionState 3: spark.sql.warehouse.dir=./data1 //=====> SessionState 3: spark.sql.globalTempDatabase=bob //=====> SessionState 3: spark.sql.custom= ``` In this PR, we gather all valid config to the cloned conf of `sharedState` during being constructed, well, actually only `spark.sql.warehouse.dir` is missing. Then we use this conf as defaults for `RESET` Command. `SparkSession.clearActiveSession/clearDefaultSession` will make the shared state invisible and unsharable. They will be internal only soon (confirmed with Wenchen), so cases with them called will not be a problem. ### Why are the changes needed? bugfix for programming API to call RESET while users creating SparkContext first and config SparkSession later. ### Does this PR introduce _any_ user-facing change? yes, before this change when you use programming API and call RESET, all configs will be reset to SparkContext.conf, now they go to SparkSession.sharedState.conf ### How was this patch tested? new tests Closes #30045 from yaooqinn/SPARK-32991. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../sql/execution/command/SetCommand.scala | 2 +- .../spark/sql/internal/SharedState.scala | 15 ++++-- .../spark/sql/SparkSessionBuilderSuite.scala | 54 +++++++++++++++++++ .../spark/sql/hive/HiveSharedStateSuite.scala | 46 +++++++++++----- 4 files changed, 99 insertions(+), 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index fd89e361fe3d1..61ee6d7f4a299 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -172,7 +172,7 @@ object SetCommand { case class ResetCommand(config: Option[String]) extends RunnableCommand with IgnoreCachedData { override def run(sparkSession: SparkSession): Seq[Row] = { - val defaults = sparkSession.sparkContext.conf + val defaults = sparkSession.sharedState.conf config match { case Some(key) => sparkSession.conf.unset(key) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index ce4385d88f1e9..1acdc4bd5f0e3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -55,10 +55,11 @@ private[sql] class SharedState( SharedState.setFsUrlStreamHandlerFactory(sparkContext.conf, sparkContext.hadoopConfiguration) - private val (conf, hadoopConf) = { + private[sql] val (conf, hadoopConf) = { // Load hive-site.xml into hadoopConf and determine the warehouse path which will be set into // both spark conf and hadoop conf avoiding be affected by any SparkSession level options - SharedState.loadHiveConfFile(sparkContext.conf, sparkContext.hadoopConfiguration) + SharedState.loadHiveConfFile( + sparkContext.conf, sparkContext.hadoopConfiguration, initialConfigs) val confClone = sparkContext.conf.clone() val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration) // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing @@ -227,7 +228,8 @@ object SharedState extends Logging { */ def loadHiveConfFile( sparkConf: SparkConf, - hadoopConf: Configuration): Unit = { + hadoopConf: Configuration, + initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = { val hiveWarehouseKey = "hive.metastore.warehouse.dir" val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml") if (configFile != null) { @@ -238,11 +240,13 @@ object SharedState extends Logging { hadoopConf.setIfUnset(entry.getKey, entry.getValue) } } + val sparkWarehouseOption = + initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key)) // hive.metastore.warehouse.dir only stay in hadoopConf sparkConf.remove(hiveWarehouseKey) // Set the Hive metastore warehouse path to the one we use val hiveWarehouseDir = hadoopConf.get(hiveWarehouseKey) - val warehousePath = if (hiveWarehouseDir != null && !sparkConf.contains(WAREHOUSE_PATH.key)) { + val warehousePath = if (hiveWarehouseDir != null && sparkWarehouseOption.isEmpty) { // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set, // we will respect the value of hive.metastore.warehouse.dir. sparkConf.set(WAREHOUSE_PATH.key, hiveWarehouseDir) @@ -254,9 +258,10 @@ object SharedState extends Logging { // the value of spark.sql.warehouse.dir. // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir. - val sparkWarehouseDir = sparkConf.get(WAREHOUSE_PATH) + val sparkWarehouseDir = sparkWarehouseOption.getOrElse(WAREHOUSE_PATH.defaultValueString) logInfo(s"Setting $hiveWarehouseKey ('$hiveWarehouseDir') to the value of " + s"${WAREHOUSE_PATH.key} ('$sparkWarehouseDir').") + sparkConf.set(WAREHOUSE_PATH.key, sparkWarehouseDir) hadoopConf.set(hiveWarehouseKey, sparkWarehouseDir) sparkWarehouseDir } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala index e1f7b6f455e14..23695af0f59c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala @@ -300,4 +300,58 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { session.stop() } } + + test("SPARK-32991: Use conf in shared state as the original configuration for RESET") { + val wh = "spark.sql.warehouse.dir" + val td = "spark.sql.globalTempDatabase" + val custom = "spark.sql.custom" + + val conf = new SparkConf() + .setMaster("local") + .setAppName("SPARK-32991") + .set(wh, "./data1") + .set(td, "bob") + + val sc = new SparkContext(conf) + + val spark = SparkSession.builder() + .config(wh, "./data2") + .config(td, "alice") + .config(custom, "kyao") + .getOrCreate() + + // When creating the first session like above, we will update the shared spark conf to the + // newly specified values + val sharedWH = spark.sharedState.conf.get(wh) + val sharedTD = spark.sharedState.conf.get(td) + val sharedCustom = spark.sharedState.conf.get(custom) + assert(sharedWH === "./data2", + "The warehouse dir in shared state should be determined by the 1st created spark session") + assert(sharedTD === "alice", + "Static sql configs in shared state should be determined by the 1st created spark session") + assert(sharedCustom === "kyao", + "Dynamic sql configs in shared state should be determined by the 1st created spark session") + + assert(spark.conf.get(wh) === sharedWH, + "The warehouse dir in session conf and shared state conf should be consistent") + assert(spark.conf.get(td) === sharedTD, + "Static sql configs in session conf and shared state conf should be consistent") + assert(spark.conf.get(custom) === sharedCustom, + "Dynamic sql configs in session conf and shared state conf should be consistent before" + + " setting to new ones") + + spark.sql("RESET") + + assert(spark.conf.get(wh) === sharedWH, + "The warehouse dir in shared state should be respect after RESET") + assert(spark.conf.get(td) === sharedTD, + "Static sql configs in shared state should be respect after RESET") + assert(spark.conf.get(custom) === sharedCustom, + "Dynamic sql configs in shared state should be respect after RESET") + + val spark2 = SparkSession.builder().getOrCreate() + assert(spark2.conf.get(wh) === sharedWH) + assert(spark2.conf.get(td) === sharedTD) + assert(spark2.conf.get(custom) === sharedCustom) + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala index 78535b094b83d..d2d4546ea18ea 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala @@ -20,35 +20,46 @@ package org.apache.spark.sql.hive import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} -import org.apache.spark.sql.internal.SharedState +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.StaticSQLConf._ import org.apache.spark.util.Utils class HiveSharedStateSuite extends SparkFunSuite { + override def beforeEach(): Unit = { + SparkSession.clearActiveSessionInternal() + SparkSession.clearDefaultSession() + super.beforeEach() + } + test("initial configs should be passed to SharedState but not SparkContext") { val conf = new SparkConf().setMaster("local").setAppName("SharedState Test") val sc = SparkContext.getOrCreate(conf) + val wareHouseDir = Utils.createTempDir().toString val invalidPath = "invalid/path" val metastorePath = Utils.createTempDir() val tmpDb = "tmp_db" // The initial configs used to generate SharedState, none of these should affect the global - // shared SparkContext's configurations. Especially, all these configs are passed to the cloned - // confs inside SharedState except metastore warehouse dir. + // shared SparkContext's configurations, except spark.sql.warehouse.dir. + // Especially, all these configs are passed to the cloned confs inside SharedState for sharing + // cross sessions. val initialConfigs = Map("spark.foo" -> "bar", - WAREHOUSE_PATH.key -> invalidPath, - ConfVars.METASTOREWAREHOUSE.varname -> invalidPath, + WAREHOUSE_PATH.key -> wareHouseDir, + ConfVars.METASTOREWAREHOUSE.varname -> wareHouseDir, CATALOG_IMPLEMENTATION.key -> "hive", ConfVars.METASTORECONNECTURLKEY.varname -> s"jdbc:derby:;databaseName=$metastorePath/metastore_db;create=true", GLOBAL_TEMP_DATABASE.key -> tmpDb) - val state = new SharedState(sc, initialConfigs) - assert(sc.conf.get(WAREHOUSE_PATH.key) !== invalidPath, - "warehouse conf in session options can't affect application wide spark conf") - assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !== invalidPath, - "warehouse conf in session options can't affect application wide hadoop conf") + val builder = SparkSession.builder() + initialConfigs.foreach { case (k, v) => builder.config(k, v) } + val ss = builder.getOrCreate() + val state = ss.sharedState + assert(sc.conf.get(WAREHOUSE_PATH.key) === wareHouseDir, + "initial warehouse conf in session options can affect application wide spark conf") + assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) === wareHouseDir, + "initial warehouse conf in session options can affect application wide hadoop conf") assert(!state.sparkContext.conf.contains("spark.foo"), "static spark conf should not be affected by session") @@ -57,9 +68,20 @@ class HiveSharedStateSuite extends SparkFunSuite { val client = state.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client assert(client.getConf("spark.foo", "") === "bar", "session level conf should be passed to catalog") - assert(client.getConf(ConfVars.METASTOREWAREHOUSE.varname, invalidPath) !== invalidPath, - "session level conf should be passed to catalog except warehouse dir") + assert(client.getConf(ConfVars.METASTOREWAREHOUSE.varname, "") === wareHouseDir, + "session level conf should be passed to catalog") assert(state.globalTempViewManager.database === tmpDb) + + val ss2 = + builder.config("spark.foo", "bar2222").config(WAREHOUSE_PATH.key, invalidPath).getOrCreate() + + assert(ss2.sparkContext.conf.get(WAREHOUSE_PATH.key) !== invalidPath, + "warehouse conf in session options can't affect application wide spark conf") + assert(ss2.sparkContext.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !== + invalidPath, "warehouse conf in session options can't affect application wide hadoop conf") + assert(ss.conf.get("spark.foo") === "bar2222", "session level conf should be passed to catalog") + assert(ss.conf.get(WAREHOUSE_PATH) !== invalidPath, + "session level conf should be passed to catalog") } } From 5e5b48d9a8a65c23d5abd0ea973e9d515731f17e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 22 Oct 2020 22:53:24 -0700 Subject: [PATCH 0312/1009] [SPARK-33226][BUILD] Upgrade to SBT 1.4.1 ### What changes were proposed in this pull request? This PR aims to upgrade SBT from 1.4.0 to 1.4.1. ### Why are the changes needed? SBT 1.4.1 is a maintenance release at 1.4.x line. There are many bug fixes already. - https://github.com/sbt/sbt/releases/tag/v1.4.1 (Released on 2020-10-19) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI and check [the Jenkins log](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/130185/testReport). ``` ======================================================================== Building Spark ======================================================================== [info] Building Spark using SBT with these arguments: -Phadoop-3.2 -Phive-2.3 -Phive -Pspark-ganglia-lgpl -Pkinesis-asl -Pyarn -Phadoop-cloud -Phive-thriftserver -Pkubernetes -Pmesos test:package streaming-kinesis-asl-assembly/assembly Using /usr/java/jdk1.8.0_191 as default JAVA_HOME. Note, this will be overridden by -java-home if it is set. Attempting to fetch sbt Launching sbt from build/sbt-launch-1.4.1.jar [info] [launcher] getting org.scala-sbt sbt 1.4.1 (this may take some time)... downloading https://repo1.maven.org/maven2/org/scala-sbt/sbt/1.4.1/sbt-1.4.1.jar ... ``` Closes #30137 from dongjoon-hyun/SBT_1.4.1. Authored-by: Dongjoon Hyun Signed-off-by: Liang-Chi Hsieh --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index e391883fbbc2d..d70d98448e4ca 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.4.0 +sbt.version=1.4.1 From 10bd42cd475eea8d5e5689e770e4773cebf62374 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 23 Oct 2020 19:19:02 +0900 Subject: [PATCH 0313/1009] [SPARK-33104][BUILD] Exclude 'org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests' ### What changes were proposed in this pull request? This PR proposes to exclude `org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests` from `hadoop-yarn-server-tests` when we use Hadoop 2 profile. For some reasons, after SBT 1.3 upgrade at SPARK-21708, SBT starts to pull the dependencies of 'hadoop-yarn-server-tests' with 'tests' classifier: ``` org/apache/hadoop/hadoop-common/2.7.4/hadoop-common-2.7.4-tests.jar org/apache/hadoop/hadoop-yarn-common/2.7.4/hadoop-yarn-common-2.7.4-tests.jar org/apache/hadoop/hadoop-yarn-server-resourcemanager/2.7.4/hadoop-yarn-server-resourcemanager-2.7.4-tests.jar ``` these were not pulled before the upgrade. This specific `hadoop-yarn-server-resourcemanager-2.7.4-tests.jar` causes the problem (SPARK-33104) 1. When the test case creates the Hadoop configuration here, https://github.com/apache/spark/blob/cc06266ade5a4eb35089501a3b32736624208d4c/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L122 2. Such jars above have higher precedence in the class path, instead of the specified custom `core-site.xml` in the test: https://github.com/apache/spark/blob/e93b8f02cd706bedc47c9b55a73f632fe9e61ec3/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala#L1375 3. Later, `core-site.xml` in the jar is picked instead in Hadoop's `Configuration`: Before this fix: ``` jar:file:/.../https/maven-central.storage-download.googleapis.com/maven2/org/apache/hadoop/ hadoop-yarn-server-resourcemanager/2.7.4/hadoop-yarn-server-resourcemanager-2.7.4-tests.jar!/core-site.xml ``` After this fix: ``` file:/.../spark/resource-managers/yarn/target/org.apache.spark.deploy.yarn.YarnClusterSuite/ org.apache.spark.deploy.yarn.YarnClusterSuite-localDir-nm-0_0/ usercache/.../filecache/10/__spark_conf__.zip/__hadoop_conf__/core-site.xml ``` 4. the `core-site.xml` in the jar of course does not contain: https://github.com/apache/spark/blob/2cfd215dc4fb1ff6865644fec8284ba93dcddd5c/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala#L133-L141 and the specific test fails. This PR uses some kind of hacky approach. It was excluded from 'hadoop-yarn-server-tests' with 'tests' classifier, and then added back as a proper dependency (when Hadoop 2 profile is used). In this way, SBT does not pull `hadoop-yarn-server-resourcemanager` with `tests` classifier anymore. ### Why are the changes needed? To make the build pass. This is a blocker. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually tested and debugged: ```bash build/sbt clean "yarn/testOnly *.YarnClusterSuite -- -z SparkHadoopUtil" -Pyarn -Phadoop-2.7 -Phive -Phive-2.3 ``` Closes #30133 from HyukjinKwon/SPARK-33104. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- pom.xml | 16 ++++++++++++++++ resource-managers/yarn/pom.xml | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/pom.xml b/pom.xml index 8b2130f2d9f56..2c6f458ee25fd 100644 --- a/pom.xml +++ b/pom.xml @@ -1399,8 +1399,24 @@ com.sun.jersey.contribs * + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + ${yarn.version} + test + org.apache.hadoop hadoop-yarn-server-web-proxy diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index da715c6bdc59f..f6d6ddccc99c3 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -65,6 +65,15 @@ tests test + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + From 82d500a05cb81019107376e5a9e7d1d3d27ff808 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 23 Oct 2020 13:34:33 -0700 Subject: [PATCH 0314/1009] [SPARK-33193][SQL][TEST] Hive ThriftServer JDBC Database MetaData API Behavior Auditing ### What changes were proposed in this pull request? Add a test case to audit all JDBC metadata behaviors to check and prevent potential APIs silent changing from both the upstream hive-jdbc module or the Spark thrift server side. Forked from my kyuubi project here https://github.com/yaooqinn/kyuubi/blob/master/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala ### Why are the changes needed? Make the SparkThriftServer safer to evolve. ### Does this PR introduce _any_ user-facing change? dev only ### How was this patch tested? new tests Closes #30101 from yaooqinn/SPARK-33193. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../SparkMetadataOperationSuite.scala | 206 +++++++++++++++++- 1 file changed, 205 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index b94d819326d16..b413b46adcaa1 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -17,12 +17,15 @@ package org.apache.spark.sql.hive.thriftserver -import java.sql.{DatabaseMetaData, ResultSet} +import java.sql.{DatabaseMetaData, ResultSet, SQLFeatureNotSupportedException} +import org.apache.hive.common.util.HiveVersionInfo import org.apache.hive.service.cli.HiveSQLException +import org.apache.spark.SPARK_VERSION import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.types._ +import org.apache.spark.util.VersionUtils class SparkMetadataOperationSuite extends HiveThriftJdbcTest { @@ -396,4 +399,205 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest { } } } + + test("Hive ThriftServer JDBC Database MetaData API Auditing - Method not supported") { + // These APIs belong to the upstream Apache Hive's hive-jdbc artifact where defines the hive + // behavior. Users can also use it to interact with Spark ThriftServer directly. Some behaviors + // are not fully consistent with Spark e.g. we support correlated subqueries but the hive-jdbc + // now fail directly at client side. There is nothing we can do but accept the current + // condition and highlight the difference and make it perspective in future changes both from + // upstream and inside Spark. + withJdbcStatement() { statement => + val metaData = statement.getConnection.getMetaData + Seq( + () => metaData.allProceduresAreCallable, + () => metaData.getURL, + () => metaData.getUserName, + () => metaData.isReadOnly, + () => metaData.nullsAreSortedHigh, + () => metaData.nullsAreSortedLow, + () => metaData.nullsAreSortedAtStart, + () => metaData.nullsAreSortedAtEnd, + () => metaData.usesLocalFiles, + () => metaData.usesLocalFilePerTable, + () => metaData.supportsMixedCaseIdentifiers, + () => metaData.supportsMixedCaseQuotedIdentifiers, + () => metaData.storesUpperCaseIdentifiers, + () => metaData.storesUpperCaseQuotedIdentifiers, + () => metaData.storesLowerCaseIdentifiers, + () => metaData.storesLowerCaseQuotedIdentifiers, + () => metaData.storesMixedCaseIdentifiers, + () => metaData.storesMixedCaseQuotedIdentifiers, + () => metaData.getSQLKeywords, + () => metaData.nullPlusNonNullIsNull, + () => metaData.supportsConvert, + () => metaData.supportsTableCorrelationNames, + () => metaData.supportsDifferentTableCorrelationNames, + () => metaData.supportsExpressionsInOrderBy, + () => metaData.supportsOrderByUnrelated, + () => metaData.supportsGroupByUnrelated, + () => metaData.supportsGroupByBeyondSelect, + () => metaData.supportsLikeEscapeClause, + () => metaData.supportsMultipleTransactions, + () => metaData.supportsMinimumSQLGrammar, + () => metaData.supportsCoreSQLGrammar, + () => metaData.supportsExtendedSQLGrammar, + () => metaData.supportsANSI92EntryLevelSQL, + () => metaData.supportsANSI92IntermediateSQL, + () => metaData.supportsANSI92FullSQL, + () => metaData.supportsIntegrityEnhancementFacility, + () => metaData.isCatalogAtStart, + () => metaData.supportsSubqueriesInComparisons, + () => metaData.supportsSubqueriesInExists, + () => metaData.supportsSubqueriesInIns, + () => metaData.supportsSubqueriesInQuantifieds, + // Spark support this, see https://issues.apache.org/jira/browse/SPARK-18455 + () => metaData.supportsCorrelatedSubqueries, + () => metaData.supportsOpenCursorsAcrossCommit, + () => metaData.supportsOpenCursorsAcrossRollback, + () => metaData.supportsOpenStatementsAcrossCommit, + () => metaData.supportsOpenStatementsAcrossRollback, + () => metaData.getMaxBinaryLiteralLength, + () => metaData.getMaxCharLiteralLength, + () => metaData.getMaxColumnsInGroupBy, + () => metaData.getMaxColumnsInIndex, + () => metaData.getMaxColumnsInOrderBy, + () => metaData.getMaxColumnsInSelect, + () => metaData.getMaxColumnsInTable, + () => metaData.getMaxConnections, + () => metaData.getMaxCursorNameLength, + () => metaData.getMaxIndexLength, + () => metaData.getMaxSchemaNameLength, + () => metaData.getMaxProcedureNameLength, + () => metaData.getMaxCatalogNameLength, + () => metaData.getMaxRowSize, + () => metaData.doesMaxRowSizeIncludeBlobs, + () => metaData.getMaxStatementLength, + () => metaData.getMaxStatements, + () => metaData.getMaxTableNameLength, + () => metaData.getMaxTablesInSelect, + () => metaData.getMaxUserNameLength, + () => metaData.supportsTransactionIsolationLevel(1), + () => metaData.supportsDataDefinitionAndDataManipulationTransactions, + () => metaData.supportsDataManipulationTransactionsOnly, + () => metaData.dataDefinitionCausesTransactionCommit, + () => metaData.dataDefinitionIgnoredInTransactions, + () => metaData.getColumnPrivileges("", "%", "%", "%"), + () => metaData.getTablePrivileges("", "%", "%"), + () => metaData.getBestRowIdentifier("", "%", "%", 0, true), + () => metaData.getVersionColumns("", "%", "%"), + () => metaData.getExportedKeys("", "default", ""), + () => metaData.supportsResultSetConcurrency(ResultSet.TYPE_FORWARD_ONLY, 2), + () => metaData.ownUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.ownDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.ownInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.updatesAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.deletesAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.insertsAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.supportsNamedParameters, + () => metaData.supportsMultipleOpenResults, + () => metaData.supportsGetGeneratedKeys, + () => metaData.getSuperTypes("", "%", "%"), + () => metaData.getSuperTables("", "%", "%"), + () => metaData.getAttributes("", "%", "%", "%"), + () => metaData.getResultSetHoldability, + () => metaData.locatorsUpdateCopy, + () => metaData.supportsStatementPooling, + () => metaData.getRowIdLifetime, + () => metaData.supportsStoredFunctionsUsingCallSyntax, + () => metaData.autoCommitFailureClosesAllResultSets, + () => metaData.getClientInfoProperties, + () => metaData.getFunctionColumns("", "%", "%", "%"), + () => metaData.getPseudoColumns("", "%", "%", "%"), + () => metaData.generatedKeyAlwaysReturned).foreach { func => + val e = intercept[SQLFeatureNotSupportedException](func()) + assert(e.getMessage === "Method not supported") + } + } + } + + test("Hive ThriftServer JDBC Database MetaData API Auditing - Method supported") { + // These APIs belong to the upstream Apache Hive's hive-jdbc artifact where defines the hive + // behavior. Users can also use it to interact with Spark ThriftServer directly. Some behaviors + // are not fully consistent with Spark e.g. we can work with multiple catalogs. + // There is nothing we can do but accept the current condition and highlight the difference + // and make it perspective in future changes both from upstream and inside Spark. + withJdbcStatement() { statement => + val metaData = statement.getConnection.getMetaData + assert(metaData.allTablesAreSelectable) + assert(metaData.getDatabaseProductName === "Spark SQL") + assert(metaData.getDatabaseProductVersion === SPARK_VERSION) + assert(metaData.getDriverName === "Hive JDBC") + assert(metaData.getDriverVersion === HiveVersionInfo.getVersion) + assert(metaData.getDatabaseMajorVersion === VersionUtils.majorVersion(SPARK_VERSION)) + assert(metaData.getDatabaseMinorVersion === VersionUtils.minorVersion(SPARK_VERSION)) + assert(metaData.getIdentifierQuoteString === " ", + "This method returns a space \" \" if identifier quoting is not supported") + assert(metaData.getNumericFunctions === "") + assert(metaData.getStringFunctions === "") + assert(metaData.getSystemFunctions === "") + assert(metaData.getTimeDateFunctions === "") + assert(metaData.getSearchStringEscape === "\\") + assert(metaData.getExtraNameCharacters === "") + assert(metaData.supportsAlterTableWithAddColumn()) + assert(!metaData.supportsAlterTableWithDropColumn()) + assert(metaData.supportsColumnAliasing()) + assert(metaData.supportsGroupBy) + assert(!metaData.supportsMultipleResultSets) + assert(!metaData.supportsNonNullableColumns) + assert(metaData.supportsOuterJoins) + assert(metaData.supportsFullOuterJoins) + assert(metaData.supportsLimitedOuterJoins) + assert(metaData.getSchemaTerm === "database") + assert(metaData.getProcedureTerm === "UDF") + assert(metaData.getCatalogTerm === "instance") + assert(metaData.getCatalogSeparator === ".") + assert(metaData.supportsSchemasInDataManipulation) + assert(!metaData.supportsSchemasInProcedureCalls) + assert(metaData.supportsSchemasInTableDefinitions) + assert(!metaData.supportsSchemasInIndexDefinitions) + assert(!metaData.supportsSchemasInPrivilegeDefinitions) + // This is actually supported, but hive jdbc package return false + assert(!metaData.supportsCatalogsInDataManipulation) + assert(!metaData.supportsCatalogsInProcedureCalls) + // This is actually supported, but hive jdbc package return false + assert(!metaData.supportsCatalogsInTableDefinitions) + assert(!metaData.supportsCatalogsInIndexDefinitions) + assert(!metaData.supportsCatalogsInPrivilegeDefinitions) + assert(!metaData.supportsPositionedDelete) + assert(!metaData.supportsPositionedUpdate) + assert(!metaData.supportsSelectForUpdate) + assert(!metaData.supportsStoredProcedures) + // This is actually supported, but hive jdbc package return false + assert(!metaData.supportsUnion) + assert(metaData.supportsUnionAll) + assert(metaData.getMaxColumnNameLength === 128) + assert(metaData.getDefaultTransactionIsolation === java.sql.Connection.TRANSACTION_NONE) + assert(!metaData.supportsTransactions) + assert(!metaData.getProcedureColumns("", "%", "%", "%").next()) + assert(!metaData.getImportedKeys("", "default", "").next()) + + // TODO: SPARK-33219 Disable GetPrimaryKeys and GetCrossReference APIs explicitly + // for Spark ThriftServer + assert(!metaData.getPrimaryKeys("", "default", "").next()) + assert(!metaData.getCrossReference("", "default", "src", "", "default", "src2").next()) + + assert(!metaData.getIndexInfo("", "default", "src", true, true).next()) + assert(metaData.supportsResultSetType(ResultSet.TYPE_FORWARD_ONLY)) + assert(metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE)) + assert(metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_SENSITIVE)) + assert(!metaData.supportsBatchUpdates) + assert(!metaData.getUDTs(",", "%", "%", null).next()) + assert(!metaData.supportsSavepoints) + assert(!metaData.supportsResultSetHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT)) + assert(metaData.getJDBCMajorVersion === 3) + assert(metaData.getJDBCMinorVersion === 0) + assert(metaData.getSQLStateType === DatabaseMetaData.sqlStateSQL) + assert(metaData.getMaxLogicalLobSize === 0) + assert(!metaData.supportsRefCursors) + } + } } From d7f15b025b16c99768516cfb7fd96ab2e6ee1c2b Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 24 Oct 2020 10:00:04 +0900 Subject: [PATCH 0315/1009] [SPARK-33003][PYTHON][DOCS] Add type hints guidelines to the documentation ### What changes were proposed in this pull request? Add type hints guidelines to developer docs. ### Why are the changes needed? Since it is a new and still somewhat evolving feature, we should provided clear guidelines for potential contributors. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Closes #30094 from zero323/SPARK-33003. Authored-by: zero323 Signed-off-by: HyukjinKwon --- .../docs/source/development/contributing.rst | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst index cb4e47a6b4197..2b62c953e0786 100644 --- a/python/docs/source/development/contributing.rst +++ b/python/docs/source/development/contributing.rst @@ -77,6 +77,50 @@ There are a couple of additional notes to keep in mind when contributing to code * Be Pythonic. * APIs are matched with Scala and Java sides in general. * PySpark specific APIs can still be considered as long as they are Pythonic and do not conflict with other existent APIs, for example, decorator usage of UDFs. +* If you extend or modify public API, please adjust corresponding type hints. See `Contributing and Maintaining Type Hints`_ for details. + +Contributing and Maintaining Type Hints +---------------------------------------- + +PySpark type hints are provided using stub files, placed in the same directory as the annotated module, with exception to ``# type: ignore`` in modules which don't have their own stubs (tests, examples and non-public API). +As a rule of thumb, only public API is annotated. + +Annotations should, when possible: + +* Reflect expectations of the underlying JVM API, to help avoid type related failures outside Python interpreter. +* In case of conflict between too broad (``Any``) and too narrow argument annotations, prefer the latter as one, as long as it is covering most of the typical use cases. +* Indicate nonsensical combinations of arguments using ``@overload`` annotations. For example, to indicate that ``*Col`` and ``*Cols`` arguments are mutually exclusive: + + .. code-block:: python + + @overload + def __init__( + self, + *, + threshold: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ... + ) -> None: ... + @overload + def __init__( + self, + *, + thresholds: Optional[List[float]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ... + ) -> None: ... + +* Be compatible with the current stable MyPy release. + + +Complex supporting type definitions, should be placed in dedicated ``_typing.pyi`` stubs. See for example `pyspark.sql._typing.pyi `_. + +Annotations can be validated using ``dev/lint-python`` script or by invoking mypy directly: + +.. code-block:: bash + + mypy --config python/mypy.ini python/pyspark + Code Style Guide @@ -90,4 +134,3 @@ the APIs were inspired by Java. PySpark also follows `camelCase` for exposed API There is an exception ``functions.py`` that uses `snake_case`. It was in order to make APIs SQL (and Python) friendly. PySpark leverages linters such as `pycodestyle `_ and `flake8 `_, which ``dev/lint-python`` runs. Therefore, make sure to run that script to double check. - From f65952772702f0a8772c93b79f562f35c337f5a5 Mon Sep 17 00:00:00 2001 From: Shiqi Sun Date: Sat, 24 Oct 2020 09:55:57 -0700 Subject: [PATCH 0316/1009] [SPARK-30821][K8S] Handle executor failure with multiple containers Handle executor failure with multiple containers Added a spark property spark.kubernetes.executor.checkAllContainers, with default being false. When it's true, the executor snapshot will take all containers in the executor into consideration when deciding whether the executor is in "Running" state, if the pod restart policy is "Never". Also, added the new spark property to the doc. ### What changes were proposed in this pull request? Checking of all containers in the executor pod when reporting executor status, if the `spark.kubernetes.executor.checkAllContainers` property is set to true. ### Why are the changes needed? Currently, a pod remains "running" as long as there is at least one running container. This prevents Spark from noticing when a container has failed in an executor pod with multiple containers. With this change, user can configure the behavior to be different. Namely, if any container in the executor pod has failed, either the executor process or one of its sidecars, the pod is considered to be failed, and it will be rescheduled. ### Does this PR introduce _any_ user-facing change? Yes, new spark property added. User is now able to choose whether to turn on this feature using the `spark.kubernetes.executor.checkAllContainers` property. ### How was this patch tested? Unit test was added and all passed. I tried to run integration test by following the instruction [here](https://spark.apache.org/developer-tools.html) (section "Testing K8S") and also [here](https://github.com/apache/spark/blob/master/resource-managers/kubernetes/integration-tests/README.md), but I wasn't able to run it smoothly as it fails to talk with minikube cluster. Maybe it's because my minikube version is too new (I'm using v1.13.1)...? Since I've been trying it for two days and still can't make it work, I decided to submit this PR and hopefully the Jenkins test will pass. Closes #29924 from huskysun/exec-sidecar-failure. Authored-by: Shiqi Sun Signed-off-by: Holden Karau --- docs/running-on-kubernetes.md | 8 +++ .../org/apache/spark/deploy/k8s/Config.scala | 8 +++ .../cluster/k8s/ExecutorPodsSnapshot.scala | 16 +++++- .../k8s/KubernetesClusterManager.scala | 3 + ...erministicExecutorPodsSnapshotsStore.scala | 2 + .../k8s/ExecutorLifecycleTestUtils.scala | 32 ++++++++++- .../k8s/ExecutorPodsSnapshotSuite.scala | 56 +++++++++++++------ .../k8s/ExecutorPodsSnapshotsStoreSuite.scala | 1 + 8 files changed, 108 insertions(+), 18 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3bd1c410e8433..4714e3517f16e 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1181,6 +1181,14 @@ See the [configuration page](configuration.html) for information on Spark config 3.0.0 + + spark.kubernetes.executor.checkAllContainers + false + + Specify whether executor pods should be check all containers (including sidecars) or only the executor container when determining the pod status. + + 3.1.0 + spark.kubernetes.submission.connectionTimeout 10000 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index 00eaff452ba45..d399f66b45981 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -417,6 +417,14 @@ private[spark] object Config extends Logging { .stringConf .createOptional + val KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS = + ConfigBuilder("spark.kubernetes.executor.checkAllContainers") + .doc("If set to true, all containers in the executor pod will be checked when reporting" + + "executor status.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label." val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation." val KUBERNETES_DRIVER_SERVICE_ANNOTATION_PREFIX = "spark.kubernetes.driver.service.annotation." diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala index 30030ab539048..be75311bc3d4a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala @@ -18,6 +18,7 @@ package org.apache.spark.scheduler.cluster.k8s import java.util.Locale +import io.fabric8.kubernetes.api.model.ContainerStateTerminated import io.fabric8.kubernetes.api.model.Pod import org.apache.spark.deploy.k8s.Constants._ @@ -37,6 +38,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP } object ExecutorPodsSnapshot extends Logging { + private var shouldCheckAllContainers: Boolean = _ def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = { ExecutorPodsSnapshot(toStatesByExecutorId(executorPods)) @@ -44,6 +46,10 @@ object ExecutorPodsSnapshot extends Logging { def apply(): ExecutorPodsSnapshot = ExecutorPodsSnapshot(Map.empty[Long, ExecutorPodState]) + def setShouldCheckAllContainers(watchAllContainers: Boolean): Unit = { + shouldCheckAllContainers = watchAllContainers + } + private def toStatesByExecutorId(executorPods: Seq[Pod]): Map[Long, ExecutorPodState] = { executorPods.map { pod => (pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL).toLong, toState(pod)) @@ -59,7 +65,15 @@ object ExecutorPodsSnapshot extends Logging { case "pending" => PodPending(pod) case "running" => - PodRunning(pod) + if (shouldCheckAllContainers && + "Never" == pod.getSpec.getRestartPolicy && + pod.getStatus.getContainerStatuses.stream + .map[ContainerStateTerminated](cs => cs.getState.getTerminated) + .anyMatch(t => t != null && t.getExitCode != 0)) { + PodFailed(pod) + } else { + PodRunning(pod) + } case "failed" => PodFailed(pod) case "succeeded" => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala index fcaa3687b14b4..cc5c2f4b6325d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala @@ -95,10 +95,13 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit val schedulerExecutorService = ThreadUtils.newDaemonSingleThreadScheduledExecutor( "kubernetes-executor-maintenance") + ExecutorPodsSnapshot.setShouldCheckAllContainers( + sc.conf.get(KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS)) val subscribersExecutor = ThreadUtils .newDaemonThreadPoolScheduledExecutor( "kubernetes-executor-snapshots-subscribers", 2) val snapshotsStore = new ExecutorPodsSnapshotsStoreImpl(subscribersExecutor) + val removedExecutorsCache = CacheBuilder.newBuilder() .expireAfterWrite(3, TimeUnit.MINUTES) .build[java.lang.Long, java.lang.Long]() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala index 6dc052a5e6894..6e989316310e6 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala @@ -21,6 +21,8 @@ import scala.collection.mutable class DeterministicExecutorPodsSnapshotsStore extends ExecutorPodsSnapshotsStore { + ExecutorPodsSnapshot.setShouldCheckAllContainers(false) + private val snapshotsBuffer = mutable.Buffer.empty[ExecutorPodsSnapshot] private val subscribers = mutable.Buffer.empty[Seq[ExecutorPodsSnapshot] => Unit] diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala index 0377e54f3cd76..62c79e6f7cba5 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala @@ -76,6 +76,33 @@ object ExecutorLifecycleTestUtils { .build() } + /** + * [SPARK-30821] + * This creates a pod with one container in running state and one container in failed + * state (terminated with non-zero exit code). This pod is used for unit-testing the + * spark.kubernetes.executor.checkAllContainers Spark Conf. + */ + def runningExecutorWithFailedContainer(executorId: Long): Pod = { + new PodBuilder(podWithAttachedContainerForId(executorId)) + .editOrNewStatus() + .withPhase("running") + .addNewContainerStatus() + .withNewState() + .withNewTerminated() + .withExitCode(1) + .endTerminated() + .endState() + .endContainerStatus() + .addNewContainerStatus() + .withNewState() + .withNewRunning() + .endRunning() + .endState() + .endContainerStatus() + .endStatus() + .build() + } + def succeededExecutor(executorId: Long): Pod = { new PodBuilder(podWithAttachedContainerForId(executorId)) .editOrNewStatus() @@ -117,7 +144,10 @@ object ExecutorLifecycleTestUtils { .addToLabels(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID) .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE) .addToLabels(SPARK_EXECUTOR_ID_LABEL, executorId.toString) - .endMetadata() + .endMetadata() + .editOrNewSpec() + .withRestartPolicy("Never") + .endSpec() .build() val container = new ContainerBuilder() .withName("spark-executor") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala index 6ca1733bcd32b..ad12461bfaf8c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala @@ -16,31 +16,55 @@ */ package org.apache.spark.scheduler.cluster.k8s +import io.fabric8.kubernetes.api.model.Pod + import org.apache.spark.SparkFunSuite import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._ class ExecutorPodsSnapshotSuite extends SparkFunSuite { + def testCase(pod: Pod, state: Pod => ExecutorPodState): (Pod, ExecutorPodState) = + (pod, state(pod)) + + def doTest(testCases: Seq[(Pod, ExecutorPodState)]): Unit = { + val snapshot = ExecutorPodsSnapshot(testCases.map(_._1)) + for (((_, state), i) <- testCases.zipWithIndex) { + assertResult(state.getClass.getName, s"executor ID $i") { + snapshot.executorPods(i).getClass.getName + } + } + } + test("States are interpreted correctly from pod metadata.") { - val pods = Seq( - pendingExecutor(0), - runningExecutor(1), - succeededExecutor(2), - failedExecutorWithoutDeletion(3), - deletedExecutor(4), - unknownExecutor(5)) - val snapshot = ExecutorPodsSnapshot(pods) - assert(snapshot.executorPods === - Map( - 0L -> PodPending(pods(0)), - 1L -> PodRunning(pods(1)), - 2L -> PodSucceeded(pods(2)), - 3L -> PodFailed(pods(3)), - 4L -> PodDeleted(pods(4)), - 5L -> PodUnknown(pods(5)))) + ExecutorPodsSnapshot.setShouldCheckAllContainers(false) + val testCases = Seq( + testCase(pendingExecutor(0), PodPending), + testCase(runningExecutor(1), PodRunning), + testCase(succeededExecutor(2), PodSucceeded), + testCase(failedExecutorWithoutDeletion(3), PodFailed), + testCase(deletedExecutor(4), PodDeleted), + testCase(unknownExecutor(5), PodUnknown) + ) + doTest(testCases) + } + + test("SPARK-30821: States are interpreted correctly from pod metadata" + + " when configured to check all containers.") { + ExecutorPodsSnapshot.setShouldCheckAllContainers(true) + val testCases = Seq( + testCase(pendingExecutor(0), PodPending), + testCase(runningExecutor(1), PodRunning), + testCase(runningExecutorWithFailedContainer(2), PodFailed), + testCase(succeededExecutor(3), PodSucceeded), + testCase(failedExecutorWithoutDeletion(4), PodFailed), + testCase(deletedExecutor(5), PodDeleted), + testCase(unknownExecutor(6), PodUnknown) + ) + doTest(testCases) } test("Updates add new pods for non-matching ids and edit existing pods for matching ids") { + ExecutorPodsSnapshot.setShouldCheckAllContainers(false) val originalPods = Seq( pendingExecutor(0), runningExecutor(1)) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala index cf54b3c4eb329..614c198bd9caf 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala @@ -35,6 +35,7 @@ class ExecutorPodsSnapshotsStoreSuite extends SparkFunSuite with BeforeAndAfter before { eventBufferScheduler = new DeterministicScheduler() eventQueueUnderTest = new ExecutorPodsSnapshotsStoreImpl(eventBufferScheduler) + ExecutorPodsSnapshot.setShouldCheckAllContainers(false) } test("Subscribers get notified of events periodically.") { From 0c66a88d1d1336c9f3b474622315254952cbd56e Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Sat, 24 Oct 2020 15:36:41 -0700 Subject: [PATCH 0317/1009] [SPARK-29438][SS][FOLLOWUP] Add regression tests for Streaming Aggregation and flatMapGroupsWithState ### What changes were proposed in this pull request? This patch adds new UTs to prevent SPARK-29438 for streaming aggregation as well as flatMapGroupsWithState, as we agree about the review comment quote here: https://github.com/apache/spark/pull/26162#issuecomment-576929692 > LGTM for this PR. But on a additional note, this is a very subtle and easy-to-make bug with TaskContext.getPartitionId. I wonder if this bug is present in any other stateful operation. I wonder if this bug is present in any other stateful operation. Can you please verify how partitionId is used in the other stateful operations? For now they're not broken, but even better if we have UTs to prevent the case for the future. ### Why are the changes needed? New UTs will prevent streaming aggregation and flatMapGroupsWithState to be broken in future where it is placed on the right side of UNION and the number of partition is changing on the left side of UNION. Please refer SPARK-29438 for more details. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added UTs. Closes #27333 from HeartSaVioR/SPARK-29438-add-regression-test. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Liang-Chi Hsieh --- .../FlatMapGroupsWithStateSuite.scala | 52 ++++++++++++++++++- .../streaming/StreamingAggregationSuite.scala | 45 +++++++++++++++- .../StreamingDeduplicationSuite.scala | 42 +++++++++++++++ 3 files changed, 137 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala index e2887e78b0508..2efd715b7731c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala @@ -26,7 +26,7 @@ import org.scalatest.exceptions.TestFailedException import org.apache.spark.SparkException import org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction -import org.apache.spark.sql.Encoder +import org.apache.spark.sql.{DataFrame, Encoder} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsWithState @@ -1020,6 +1020,56 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest { spark.createDataset(Seq(("a", 2), ("b", 1))).toDF) } + testWithAllStateVersions("SPARK-29438: ensure UNION doesn't lead (flat)MapGroupsWithState" + + " to use shifted partition IDs") { + val stateFunc = (key: String, values: Iterator[String], state: GroupState[RunningCount]) => { + val count = state.getOption.map(_.count).getOrElse(0L) + values.size + state.update(RunningCount(count)) + (key, count.toString) + } + + def constructUnionDf(desiredPartitionsForInput1: Int) + : (MemoryStream[String], MemoryStream[String], DataFrame) = { + val input1 = MemoryStream[String](desiredPartitionsForInput1) + val input2 = MemoryStream[String] + val df1 = input1.toDF() + .select($"value", $"value") + val df2 = input2.toDS() + .groupByKey(x => x) + .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str) + .toDF() + + // Unioned DF would have columns as (String, String) + (input1, input2, df1.union(df2)) + } + + withTempDir { checkpointDir => + val (input1, input2, unionDf) = constructUnionDf(2) + testStream(unionDf, Update)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(input1, "input1-a")(input2, "input2-a"), + CheckNewAnswer(("input1-a", "input1-a"), ("input2-a", "1")), + StopStream + ) + + // We're restoring the query with different number of partitions in left side of UNION, + // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on + // TaskContext.partitionId()). This test will verify (flat)MapGroupsWithState doesn't have + // such issue. + + val (newInput1, newInput2, newUnionDf) = constructUnionDf(3) + + newInput1.addData("input1-a") + newInput2.addData("input2-a") + + testStream(newUnionDf, Update)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(newInput1, "input1-a")(newInput2, "input2-a", "input2-b"), + CheckNewAnswer(("input1-a", "input1-a"), ("input2-a", "2"), ("input2-b", "1")) + ) + } + } + testQuietly("StateStore.abort on task failure handling") { val stateFunc = (key: String, values: Iterator[String], state: GroupState[RunningCount]) => { if (FlatMapGroupsWithStateSuite.failInTask) throw new Exception("expected failure") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala index cb69460ca1580..4a57cc27b1d59 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.Assertions import org.apache.spark.{SparkEnv, SparkException} import org.apache.spark.rdd.BlockRDD -import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.util.DateTimeConstants._ @@ -337,6 +337,49 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { ) } + testWithAllStateVersions("SPARK-29438: ensure UNION doesn't lead streaming aggregation to use" + + " shifted partition IDs") { + def constructUnionDf(desiredPartitionsForInput1: Int) + : (MemoryStream[Int], MemoryStream[Int], DataFrame) = { + val input1 = MemoryStream[Int](desiredPartitionsForInput1) + val input2 = MemoryStream[Int] + val df1 = input1.toDF() + .select($"value", $"value" + 1) + val df2 = input2.toDF() + .groupBy($"value") + .agg(count("*")) + + // Unioned DF would have columns as (Int, Int) + (input1, input2, df1.union(df2)) + } + + withTempDir { checkpointDir => + val (input1, input2, unionDf) = constructUnionDf(2) + testStream(unionDf, Update)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(input1, 11, 12)(input2, 21, 22), + CheckNewAnswer(Row(11, 12), Row(12, 13), Row(21, 1), Row(22, 1)), + StopStream + ) + + // We're restoring the query with different number of partitions in left side of UNION, + // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on + // TaskContext.partitionId()). This test will verify streaming aggregation doesn't have + // such issue. + + val (newInput1, newInput2, newUnionDf) = constructUnionDf(3) + + newInput1.addData(11, 12) + newInput2.addData(21, 22) + + testStream(newUnionDf, Update)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(newInput1, 13, 14)(newInput2, 22, 23), + CheckNewAnswer(Row(13, 14), Row(14, 15), Row(22, 2), Row(23, 1)) + ) + } + } + testQuietlyWithAllStateVersions("midbatch failure") { val inputData = MemoryStream[Int] FailureSingleton.firstTime = true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala index 1f346aac8d2c2..e1505acf3ecda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming import org.scalatest.BeforeAndAfterAll +import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, SinglePartition} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec} @@ -294,4 +295,45 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest { testWithFlag(true) testWithFlag(false) } + + test("SPARK-29438: ensure UNION doesn't lead streaming deduplication to use" + + " shifted partition IDs") { + def constructUnionDf(desiredPartitionsForInput1: Int) + : (MemoryStream[Int], MemoryStream[Int], DataFrame) = { + val input1 = MemoryStream[Int](desiredPartitionsForInput1) + val input2 = MemoryStream[Int] + val df1 = input1.toDF().select($"value") + val df2 = input2.toDF().dropDuplicates("value") + + // Unioned DF would have columns as (Int) + (input1, input2, df1.union(df2)) + } + + withTempDir { checkpointDir => + val (input1, input2, unionDf) = constructUnionDf(2) + testStream(unionDf, Append)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(input1, 11, 12)(input2, 21, 22), + CheckNewAnswer(11, 12, 21, 22), + StopStream + ) + + // We're restoring the query with different number of partitions in left side of UNION, + // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on + // TaskContext.partitionId()). This test will verify streaming deduplication doesn't have + // such issue. + + val (newInput1, newInput2, newUnionDf) = constructUnionDf(3) + + newInput1.addData(11, 12) + newInput2.addData(21, 22) + + testStream(newUnionDf, Append)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + MultiAddData(newInput1, 13, 14)(newInput2, 22, 23), + CheckNewAnswer(13, 14, 23) + ) + } + } + } From 87b498462b82fce02dd50286887092cf7858d2e8 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 25 Oct 2020 16:15:55 -0700 Subject: [PATCH 0318/1009] [SPARK-33228][SQL] Don't uncache data when replacing a view having the same logical plan ### What changes were proposed in this pull request? SPARK-30494's updated the `CreateViewCommand` code to implicitly drop cache when replacing an existing view. But, this change drops cache even when replacing a view having the same logical plan. A sequence of queries to reproduce this as follows; ``` // Spark v2.4.6+ scala> val df = spark.range(1).selectExpr("id a", "id b") scala> df.cache() scala> df.explain() == Physical Plan == *(1) ColumnarToRow +- InMemoryTableScan [a#2L, b#3L] +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L] +- *(1) Range (0, 1, step=1, splits=4) scala> df.createOrReplaceTempView("t") scala> sql("select * from t").explain() == Physical Plan == *(1) ColumnarToRow +- InMemoryTableScan [a#2L, b#3L] +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L] +- *(1) Range (0, 1, step=1, splits=4) // If one re-runs the same query `df.createOrReplaceTempView("t")`, the cache's swept away scala> df.createOrReplaceTempView("t") scala> sql("select * from t").explain() == Physical Plan == *(1) Project [id#0L AS a#2L, id#0L AS b#3L] +- *(1) Range (0, 1, step=1, splits=4) // Until v2.4.6 scala> val df = spark.range(1).selectExpr("id a", "id b") scala> df.cache() scala> df.createOrReplaceTempView("t") scala> sql("select * from t").explain() 20/10/23 22:33:42 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException == Physical Plan == *(1) InMemoryTableScan [a#2L, b#3L] +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L] +- *(1) Range (0, 1, step=1, splits=4) scala> df.createOrReplaceTempView("t") scala> sql("select * from t").explain() == Physical Plan == *(1) InMemoryTableScan [a#2L, b#3L] +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas) +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L] +- *(1) Range (0, 1, step=1, splits=4) ``` ### Why are the changes needed? bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #30140 from maropu/FixBugInReplaceView. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/command/views.scala | 10 ++++---- .../apache/spark/sql/CachedTableSuite.scala | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 94f34a9b39b28..bcc0e1fd82d7a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -110,17 +110,19 @@ case class CreateViewCommand( verifyTemporaryObjectsNotExists(catalog) if (viewType == LocalTempView) { - if (replace && catalog.getTempView(name.table).isDefined) { - logDebug(s"Try to uncache ${name.quotedString} before replacing.") + if (replace && catalog.getTempView(name.table).isDefined && + !catalog.getTempView(name.table).get.sameResult(child)) { + logInfo(s"Try to uncache ${name.quotedString} before replacing.") CommandUtils.uncacheTableOrView(sparkSession, name.quotedString) } val aliasedPlan = aliasPlan(sparkSession, analyzedPlan) catalog.createTempView(name.table, aliasedPlan, overrideIfExists = replace) } else if (viewType == GlobalTempView) { - if (replace && catalog.getGlobalTempView(name.table).isDefined) { + if (replace && catalog.getGlobalTempView(name.table).isDefined && + !catalog.getGlobalTempView(name.table).get.sameResult(child)) { val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE) val globalTempView = TableIdentifier(name.table, Option(db)) - logDebug(s"Try to uncache ${globalTempView.quotedString} before replacing.") + logInfo(s"Try to uncache ${globalTempView.quotedString} before replacing.") CommandUtils.uncacheTableOrView(sparkSession, globalTempView.quotedString) } val aliasedPlan = aliasPlan(sparkSession, analyzedPlan) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 20f2a7f947b81..adc725ed9b062 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -1184,4 +1184,28 @@ class CachedTableSuite extends QueryTest with SQLTestUtils assert(spark.sharedState.cacheManager.isEmpty) } } + + test("SPARK-33228: Don't uncache data when replacing an existing view having the same plan") { + withTempView("tempView") { + spark.catalog.clearCache() + val df = spark.range(1).selectExpr("id a", "id b") + df.cache() + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + df.createOrReplaceTempView("tempView") + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + df.createOrReplaceTempView("tempView") + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + } + + withTempView("tempGlobalTempView") { + spark.catalog.clearCache() + val df = spark.range(1).selectExpr("id a", "id b") + df.cache() + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + df.createOrReplaceGlobalTempView("tempGlobalTempView") + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + df.createOrReplaceGlobalTempView("tempGlobalTempView") + assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) + } + } } From ce0ebf5f023b1d2230bbd4b9ffad294edef3bca7 Mon Sep 17 00:00:00 2001 From: Emi Date: Sun, 25 Oct 2020 17:06:06 -0700 Subject: [PATCH 0319/1009] [SPARK-33234][INFRA] Generates SHA-512 using shasum ### What changes were proposed in this pull request? I am generating the SHA-512 using the standard shasum which also has a better output compared to GPG. ### Why are the changes needed? Which makes the hash much easier to verify for users that don't have GPG. Because an user having GPG can check the keys but an user without GPG will have a hard time validating the SHA-512 based on the 'pretty printed' format. Apache Spark is the only project where I've seen this format. Most other Apache projects have a one-line hash file. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This patch assumes the build system has shasum (it should, but I can't test this). Closes #30123 from emilianbold/master. Authored-by: Emi Signed-off-by: Dongjoon Hyun --- dev/create-release/release-build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index c7fee13d39c6b..240f4c8dfd371 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -182,8 +182,7 @@ if [[ "$1" == "package" ]]; then tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \ --detach-sig spark-$SPARK_VERSION.tgz - echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ - SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512 + shasum -a 512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512 rm -rf spark-$SPARK_VERSION ZINC_PORT=3035 From 56ab60fb7ae37ca64d668bc4a1f18216cc7186fd Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 26 Oct 2020 11:20:29 +0900 Subject: [PATCH 0320/1009] [SPARK-32388][SQL] TRANSFORM with schema-less mode should keep the same with hive ### What changes were proposed in this pull request? In current Spark script transformation with hive serde mode, in case of schema less, result is different with hive. This pr to keep result same with hive script transform serde. #### Hive Scrip Transform with serde in schemaless ``` hive> create table t (c0 int, c1 int, c2 int); hive> INSERT INTO t VALUES (1, 1, 1); hive> INSERT INTO t VALUES (2, 2, 2); hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t; hive> DESCRIBE v; key string value string hive> SELECT * FROM v; 1 1 1 2 2 2 hive> SELECT key FROM v; 1 2 hive> SELECT value FROM v; 1 1 2 2 ``` #### Spark script transform with hive serde in schema less. ``` hive> create table t (c0 int, c1 int, c2 int); hive> INSERT INTO t VALUES (1, 1, 1); hive> INSERT INTO t VALUES (2, 2, 2); hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t; hive> SELECT * FROM v; 1 1 2 2 ``` **No serde mode in hive (ROW FORMATTED DELIMITED)** ![image](https://user-images.githubusercontent.com/46485123/90088770-55841e00-dd52-11ea-92dd-7fe52d93f0b3.png) ### Why are the changes needed? Keep same behavior with hive script transform ### Does this PR introduce _any_ user-facing change? Before this pr with hive serde script transform ``` select transform(*) USING 'cat' from ( select 1, 2, 3, 4 ) tmp key value 1 2 ``` After ``` select transform(*) USING 'cat' from ( select 1, 2, 3, 4 ) tmp key value 1 2 3 4 ``` ### How was this patch tested? UT Closes #29421 from AngersZhuuuu/SPARK-32388. Authored-by: angerszhu Signed-off-by: HyukjinKwon --- .../BaseScriptTransformationExec.scala | 11 +- .../spark/sql/execution/SparkSqlParser.scala | 4 +- .../BaseScriptTransformationSuite.scala | 40 ++++- .../HiveScriptTransformationSuite.scala | 159 ++++++++++++++++-- 4 files changed, 189 insertions(+), 25 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index c5107645f46f8..74e5aa716ad67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -111,15 +111,14 @@ trait BaseScriptTransformationExec extends UnaryExecNode { .zip(outputFieldWriters) .map { case (data, writer) => writer(data) }) } else { - // In schema less mode, hive default serde will choose first two output column as output - // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException. - // Here we change spark's behavior same as hive's default serde. - // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this - // to keep spark and hive behavior same in SPARK-32388 + // In schema less mode, hive will choose first two output column as output. + // If output column size less then 2, it will return NULL for columns with missing values. + // Here we split row string and choose first 2 values, if values's size less then 2, + // we pad NULL value until 2 to make behavior same with hive. val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => new GenericInternalRow( - prevLine.split(outputRowFormat).slice(0, 2) + prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null) .map(kvWriter)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 0a5f4c3ed4bcb..f46526d419158 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -785,7 +785,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { // Use default (serde) format. val name = conf.getConfString("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - val props = Seq("field.delim" -> "\t") + val props = Seq( + "field.delim" -> "\t", + "serialization.last.column.takes.rest" -> "true") val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) (Nil, Option(name), props, recordHandler) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index c07ea0f12f94e..e6029400997a2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -137,10 +137,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU } } - test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") { - assume(TestUtils.testCommandAvailable("python")) - val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile - + test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") { withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -157,7 +154,24 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.col("c").expr, df.col("d").expr, df.col("e").expr), - script = s"python $scriptFilePath", + script = "cat", + output = Seq( + AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), + child = child, + ioschema = defaultIOSchema.copy(schemaLess = true) + ), + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) + + checkAnswer( + df, + (child: SparkPlan) => createScriptTransformationExec( + input = Seq( + df.col("a").expr, + df.col("b").expr), + script = "cat", output = Seq( AttributeReference("key", StringType)(), AttributeReference("value", StringType)()), @@ -167,6 +181,22 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) + + checkAnswer( + df, + (child: SparkPlan) => createScriptTransformationExec( + input = Seq( + df.col("a").expr), + script = "cat", + output = Seq( + AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), + child = child, + ioschema = defaultIOSchema.copy(schemaLess = true) + ), + df.select( + 'a.cast("string").as("key"), + lit(null)).collect()) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index d247f37130776..0af0563715e12 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -156,10 +156,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T assert(uncaughtExceptionHandler.exception.isEmpty) } - test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") { - assume(TestUtils.testCommandAvailable("python")) - val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath - + test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") { withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -168,21 +165,157 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) df.createTempView("v") - val query = sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - |USING 'python ${scriptFilePath}' - |FROM v - """.stripMargin) + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and don't specify serde, + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | USING 'cat' + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + concat_ws("\t", + 'b.cast("string"), + 'c.cast("string"), + decimalToString('d), + 'e.cast("string")).as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and just specify serde, + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) + + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and specify serde with + // 'serialization.last.column.takes.rest=true', + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + concat_ws("\t", + 'b.cast("string"), + 'c.cast("string"), + decimalToString('d), + 'e.cast("string")).as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and specify serde + // with 'serialization.last.column.takes.rest=false', + // it will choose first two column as output schema (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'false' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'false' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) - // In hive default serde mode, if we don't define output schema, it will choose first - // two column as output schema (key: String, value: String) + // In hive default serde mode, if we don't define output schema, + // when output column size = 2 and specify serde, it will these two column as + // output schema (key: String, value: String) checkAnswer( - query, + sql( + s""" + |SELECT TRANSFORM(a, b) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), identity, df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size < 2 and specify serde, it will return null for deficiency + // output schema (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + lit(null)).collect()) } } From 369cc614f369f9fd9be5b13a3f047a261c8e8d90 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 26 Oct 2020 11:38:48 +0900 Subject: [PATCH 0321/1009] Revert "[SPARK-32388][SQL] TRANSFORM with schema-less mode should keep the same with hive" This reverts commit 56ab60fb7ae37ca64d668bc4a1f18216cc7186fd. --- .../BaseScriptTransformationExec.scala | 11 +- .../spark/sql/execution/SparkSqlParser.scala | 4 +- .../BaseScriptTransformationSuite.scala | 40 +---- .../HiveScriptTransformationSuite.scala | 159 ++---------------- 4 files changed, 25 insertions(+), 189 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index 74e5aa716ad67..c5107645f46f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -111,14 +111,15 @@ trait BaseScriptTransformationExec extends UnaryExecNode { .zip(outputFieldWriters) .map { case (data, writer) => writer(data) }) } else { - // In schema less mode, hive will choose first two output column as output. - // If output column size less then 2, it will return NULL for columns with missing values. - // Here we split row string and choose first 2 values, if values's size less then 2, - // we pad NULL value until 2 to make behavior same with hive. + // In schema less mode, hive default serde will choose first two output column as output + // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException. + // Here we change spark's behavior same as hive's default serde. + // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this + // to keep spark and hive behavior same in SPARK-32388 val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => new GenericInternalRow( - prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null) + prevLine.split(outputRowFormat).slice(0, 2) .map(kvWriter)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index f46526d419158..0a5f4c3ed4bcb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -785,9 +785,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { // Use default (serde) format. val name = conf.getConfString("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - val props = Seq( - "field.delim" -> "\t", - "serialization.last.column.takes.rest" -> "true") + val props = Seq("field.delim" -> "\t") val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) (Nil, Option(name), props, recordHandler) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index e6029400997a2..c07ea0f12f94e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -137,7 +137,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU } } - test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") { + test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") { + assume(TestUtils.testCommandAvailable("python")) + val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile + withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -154,24 +157,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.col("c").expr, df.col("d").expr, df.col("e").expr), - script = "cat", - output = Seq( - AttributeReference("key", StringType)(), - AttributeReference("value", StringType)()), - child = child, - ioschema = defaultIOSchema.copy(schemaLess = true) - ), - df.select( - 'a.cast("string").as("key"), - 'b.cast("string").as("value")).collect()) - - checkAnswer( - df, - (child: SparkPlan) => createScriptTransformationExec( - input = Seq( - df.col("a").expr, - df.col("b").expr), - script = "cat", + script = s"python $scriptFilePath", output = Seq( AttributeReference("key", StringType)(), AttributeReference("value", StringType)()), @@ -181,22 +167,6 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) - - checkAnswer( - df, - (child: SparkPlan) => createScriptTransformationExec( - input = Seq( - df.col("a").expr), - script = "cat", - output = Seq( - AttributeReference("key", StringType)(), - AttributeReference("value", StringType)()), - child = child, - ioschema = defaultIOSchema.copy(schemaLess = true) - ), - df.select( - 'a.cast("string").as("key"), - lit(null)).collect()) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index 0af0563715e12..d247f37130776 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -156,7 +156,10 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T assert(uncaughtExceptionHandler.exception.isEmpty) } - test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") { + test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") { + assume(TestUtils.testCommandAvailable("python")) + val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath + withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -165,157 +168,21 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) df.createTempView("v") - // In hive default serde mode, if we don't define output schema, - // when output column size > 2 and don't specify serde, - // it will choose take rest columns in second column as output schema - // (key: String, value: String) - checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - | USING 'cat' - |FROM v - """.stripMargin), - identity, - df.select( - 'a.cast("string").as("key"), - concat_ws("\t", - 'b.cast("string"), - 'c.cast("string"), - decimalToString('d), - 'e.cast("string")).as("value")).collect()) - - // In hive default serde mode, if we don't define output schema, - // when output column size > 2 and just specify serde, - // it will choose take rest columns in second column as output schema - // (key: String, value: String) - checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t' - | ) - | USING 'cat' - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t' - | ) - |FROM v - """.stripMargin), - identity, - df.select( - 'a.cast("string").as("key"), - 'b.cast("string").as("value")).collect()) - - - // In hive default serde mode, if we don't define output schema, - // when output column size > 2 and specify serde with - // 'serialization.last.column.takes.rest=true', - // it will choose take rest columns in second column as output schema - // (key: String, value: String) - checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - | USING 'cat' - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - |FROM v - """.stripMargin), - identity, - df.select( - 'a.cast("string").as("key"), - concat_ws("\t", - 'b.cast("string"), - 'c.cast("string"), - decimalToString('d), - 'e.cast("string")).as("value")).collect()) - - // In hive default serde mode, if we don't define output schema, - // when output column size > 2 and specify serde - // with 'serialization.last.column.takes.rest=false', - // it will choose first two column as output schema (key: String, value: String) - checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'false' - | ) - | USING 'cat' - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'false' - | ) - |FROM v - """.stripMargin), - identity, - df.select( - 'a.cast("string").as("key"), - 'b.cast("string").as("value")).collect()) + val query = sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + |USING 'python ${scriptFilePath}' + |FROM v + """.stripMargin) - // In hive default serde mode, if we don't define output schema, - // when output column size = 2 and specify serde, it will these two column as - // output schema (key: String, value: String) + // In hive default serde mode, if we don't define output schema, it will choose first + // two column as output schema (key: String, value: String) checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a, b) - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - | USING 'cat' - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - |FROM v - """.stripMargin), + query, identity, df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) - - // In hive default serde mode, if we don't define output schema, - // when output column size < 2 and specify serde, it will return null for deficiency - // output schema (key: String, value: String) - checkAnswer( - sql( - s""" - |SELECT TRANSFORM(a) - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - | USING 'cat' - | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - | WITH SERDEPROPERTIES ( - | 'field.delim' = '\t', - | 'serialization.last.column.takes.rest' = 'true' - | ) - |FROM v - """.stripMargin), - identity, - df.select( - 'a.cast("string").as("key"), - lit(null)).collect()) } } From d87a0bb2caa6804d59130c41a4c005acb2e4aad2 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Mon, 26 Oct 2020 13:33:06 +0900 Subject: [PATCH 0322/1009] [SPARK-32862][SS] Left semi stream-stream join ### What changes were proposed in this pull request? This is to support left semi join in stream-stream join. The implementation of left semi join is (mostly in `StreamingSymmetricHashJoinExec` and `SymmetricHashJoinStateManager`): * For left side input row, check if there's a match on right side state store. * if there's a match, output the left side row, but do not put the row in left side state store (no need to put in state store). * if there's no match, output nothing, but put the row in left side state store (with "matched" field to set to false in state store). * For right side input row, check if there's a match on left side state store. * For all matched left rows in state store, output the rows with "matched" field as false. Set all left rows with "matched" field to be true. Only output the left side rows matched for the first time to guarantee left semi join semantics. * State store eviction: evict rows from left/right side state store below watermark, same as inner join. Note a followup optimization can be to evict matched left side rows from state store earlier, even when the rows are still above watermark. However this needs more change in `SymmetricHashJoinStateManager`, so will leave this as a followup. ### Why are the changes needed? Current stream-stream join supports inner, left outer and right outer join (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L166 ). We do see internally a lot of users are using left semi stream-stream join (not spark structured streaming), e.g. I want to get the ad impression (join left side) which has click (joint right side), but I don't care how many clicks per ad (left semi semantics). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests in `UnsupportedOperationChecker.scala` and `StreamingJoinSuite.scala`. Closes #30076 from c21/stream-join. Authored-by: Cheng Su Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../UnsupportedOperationChecker.scala | 15 +- .../sql/catalyst/expressions/JoinedRow.scala | 10 + .../analysis/UnsupportedOperationsSuite.scala | 66 ++- .../StreamingSymmetricHashJoinExec.scala | 121 +++-- .../state/SymmetricHashJoinStateManager.scala | 11 +- .../sql/streaming/StreamingJoinSuite.scala | 502 +++++++++++++----- 6 files changed, 545 insertions(+), 180 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 44e8602ba7e81..809323455652e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -291,17 +291,17 @@ object UnsupportedOperationChecker extends Logging { throwError("Full outer joins with streaming DataFrames/Datasets are not supported") } - case LeftSemi | LeftAnti => + case LeftAnti => if (right.isStreaming) { - throwError("Left semi/anti joins with a streaming DataFrame/Dataset " + + throwError("Left anti joins with a streaming DataFrame/Dataset " + "on the right are not supported") } - // We support streaming left outer joins with static on the right always, and with - // stream on both sides under the appropriate conditions. - case LeftOuter => + // We support streaming left outer and left semi joins with static on the right always, + // and with stream on both sides under the appropriate conditions. + case LeftOuter | LeftSemi => if (!left.isStreaming && right.isStreaming) { - throwError("Left outer join with a streaming DataFrame/Dataset " + + throwError(s"$joinType join with a streaming DataFrame/Dataset " + "on the right and a static DataFrame/Dataset on the left is not supported") } else if (left.isStreaming && right.isStreaming) { val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan) @@ -311,7 +311,8 @@ object UnsupportedOperationChecker extends Logging { left.outputSet, right.outputSet, condition, Some(1000000)).isDefined if (!watermarkInJoinKeys && !hasValidWatermarkRange) { - throwError("Stream-stream outer join between two streaming DataFrame/Datasets " + + throwError( + s"Stream-stream $joinType join between two streaming DataFrame/Datasets " + "is not supported without a watermark in the join keys, or a watermark on " + "the nullable side and an appropriate range condition") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala index 7770684a5b399..86871223d66ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala @@ -55,6 +55,16 @@ class JoinedRow extends InternalRow { this } + /** Gets this JoinedRow's left base row. */ + def getLeft: InternalRow = { + row1 + } + + /** Gets this JoinedRow's right base row. */ + def getRight: InternalRow = { + row2 + } + override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = { assert(fieldTypes.length == row1.numFields + row2.numFields) val (left, right) = fieldTypes.splitAt(row1.numFields) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index 3ec6fdeedd4b8..b9943a9744985 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -490,7 +490,69 @@ class UnsupportedOperationsSuite extends SparkFunSuite { _.join(_, joinType = LeftSemi), streamStreamSupported = false, batchStreamSupported = false, - expectedMsg = "left semi/anti joins") + expectedMsg = "LeftSemi join") + + // Left semi joins: update and complete mode not allowed + assertNotSupportedInStreamingPlan( + "left semi join with stream-stream relations and update mode", + streamRelation.join(streamRelation, joinType = LeftSemi, + condition = Some(attribute === attribute)), + OutputMode.Update(), + Seq("is not supported in Update output mode")) + assertNotSupportedInStreamingPlan( + "left semi join with stream-stream relations and complete mode", + Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftSemi, + condition = Some(attribute === attribute))), + OutputMode.Complete(), + Seq("is not supported in Complete output mode")) + + // Left semi joins: stream-stream allowed with join on watermark attribute + // Note that the attribute need not be watermarked on both sides. + assertSupportedInStreamingPlan( + "left semi join with stream-stream relations and join on attribute with left watermark", + streamRelation.join(streamRelation, joinType = LeftSemi, + condition = Some(attributeWithWatermark === attribute)), + OutputMode.Append()) + assertSupportedInStreamingPlan( + "left semi join with stream-stream relations and join on attribute with right watermark", + streamRelation.join(streamRelation, joinType = LeftSemi, + condition = Some(attribute === attributeWithWatermark)), + OutputMode.Append()) + assertNotSupportedInStreamingPlan( + "left semi join with stream-stream relations and join on non-watermarked attribute", + streamRelation.join(streamRelation, joinType = LeftSemi, + condition = Some(attribute === attribute)), + OutputMode.Append(), + Seq("without a watermark in the join keys")) + + // Left semi joins: stream-stream allowed with range condition yielding state value watermark + assertSupportedInStreamingPlan( + "left semi join with stream-stream relations and state value watermark", { + val leftRelation = streamRelation + val rightTimeWithWatermark = + AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata) + val rightRelation = new TestStreamingRelation(rightTimeWithWatermark) + leftRelation.join( + rightRelation, + joinType = LeftSemi, + condition = Some(attribute > rightTimeWithWatermark + 10)) + }, + OutputMode.Append()) + + // Left semi joins: stream-stream not allowed with insufficient range condition + assertNotSupportedInStreamingPlan( + "left semi join with stream-stream relations and state value watermark", { + val leftRelation = streamRelation + val rightTimeWithWatermark = + AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata) + val rightRelation = new TestStreamingRelation(rightTimeWithWatermark) + leftRelation.join( + rightRelation, + joinType = LeftSemi, + condition = Some(attribute < rightTimeWithWatermark + 10)) + }, + OutputMode.Append(), + Seq("appropriate range condition")) // Left anti joins: stream-* not allowed testBinaryOperationInStreamingPlan( @@ -498,7 +560,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { _.join(_, joinType = LeftAnti), streamStreamSupported = false, batchStreamSupported = false, - expectedMsg = "left semi/anti joins") + expectedMsg = "Left anti join") // Right outer joins: stream-* not allowed testBinaryOperationInStreamingPlan( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala index a52f5f4ac94ae..8b69205530769 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala @@ -152,7 +152,8 @@ case class StreamingSymmetricHashJoinExec( } if (stateFormatVersion < 2 && joinType != Inner) { - throw new IllegalArgumentException("The query is using stream-stream outer join with state" + + throw new IllegalArgumentException( + s"The query is using stream-stream $joinType join with state" + s" format version ${stateFormatVersion} - correctness issue is discovered. Please discard" + " the checkpoint and rerun the query. See SPARK-26154 for more details.") } @@ -165,7 +166,7 @@ case class StreamingSymmetricHashJoinExec( } require( - joinType == Inner || joinType == LeftOuter || joinType == RightOuter, + joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == LeftSemi, errorMessageForJoinType) require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType)) @@ -185,6 +186,7 @@ case class StreamingSymmetricHashJoinExec( case _: InnerLike => left.output ++ right.output case LeftOuter => left.output ++ right.output.map(_.withNullability(true)) case RightOuter => left.output.map(_.withNullability(true)) ++ right.output + case LeftSemi => left.output case _ => throwBadJoinTypeException() } @@ -193,6 +195,7 @@ case class StreamingSymmetricHashJoinExec( PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning)) case LeftOuter => left.outputPartitioning case RightOuter => right.outputPartitioning + case LeftSemi => left.outputPartitioning case _ => throwBadJoinTypeException() } @@ -246,14 +249,21 @@ case class StreamingSymmetricHashJoinExec( // Join one side input using the other side's buffered/state rows. Here is how it is done. // - // - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` generates all rows from - // matching new left input with stored right input, and also stores all the left input + // - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` + // - Inner, Left Outer, Right Outer Join: generates all rows from matching new left input + // with stored right input, and also stores all the left input. + // - Left Semi Join: generates all new left input rows from matching new left input with + // stored right input, and also stores all the non-matched left input. // - // - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` generates all rows from - // matching new right input with stored left input, and also stores all the right input. - // It also generates all rows from matching new left input with new right input, since - // the new left input has become stored by that point. This tiny asymmetry is necessary - // to avoid duplication. + // - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` + // - Inner, Left Outer, Right Outer Join: generates all rows from matching new right input + // with stored left input, and also stores all the right input. + // It also generates all rows from matching new left input with new right input, since + // the new left input has become stored by that point. This tiny asymmetry is necessary + // to avoid duplication. + // - Left Semi Join: generates all stored left input rows, from matching new right input + // with stored left input, and also stores all the right input. Note only first-time + // matched left input rows will be generated, this is to guarantee left semi semantics. val leftOutputIter = leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner) { (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(input).withRight(matched) } @@ -261,22 +271,21 @@ case class StreamingSymmetricHashJoinExec( (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(matched).withRight(input) } - // We need to save the time that the inner join output iterator completes, since outer join - // output counts as both update and removal time. - var innerOutputCompletionTimeNs: Long = 0 - def onInnerOutputCompletion = { - innerOutputCompletionTimeNs = System.nanoTime + // We need to save the time that the one side hash join output iterator completes, since + // other join output counts as both update and removal time. + var hashJoinOutputCompletionTimeNs: Long = 0 + def onHashJoinOutputCompletion(): Unit = { + hashJoinOutputCompletionTimeNs = System.nanoTime } - // This is the iterator which produces the inner join rows. For outer joins, this will be - // prepended to a second iterator producing outer join rows; for inner joins, this is the full - // output. - val innerOutputIter = CompletionIterator[InternalRow, Iterator[InternalRow]]( - (leftOutputIter ++ rightOutputIter), onInnerOutputCompletion) - + // This is the iterator which produces the inner and left semi join rows. For other joins, + // this will be prepended to a second iterator producing other rows; for inner and left semi + // joins, this is the full output. + val hashJoinOutputIter = CompletionIterator[InternalRow, Iterator[InternalRow]]( + leftOutputIter ++ rightOutputIter, onHashJoinOutputCompletion()) val outputIter: Iterator[InternalRow] = joinType match { - case Inner => - innerOutputIter + case Inner | LeftSemi => + hashJoinOutputIter case LeftOuter => // We generate the outer join input by: // * Getting an iterator over the rows that have aged out on the left side. These rows are @@ -311,7 +320,7 @@ case class StreamingSymmetricHashJoinExec( } }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) - innerOutputIter ++ outerOutputIter + hashJoinOutputIter ++ outerOutputIter case RightOuter => // See comments for left outer case. def matchesWithLeftSideState(rightKeyValue: UnsafeRowPair) = { @@ -330,11 +339,15 @@ case class StreamingSymmetricHashJoinExec( } }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) - innerOutputIter ++ outerOutputIter + hashJoinOutputIter ++ outerOutputIter case _ => throwBadJoinTypeException() } - val outputProjection = UnsafeProjection.create(left.output ++ right.output, output) + val outputProjection = if (joinType == LeftSemi) { + UnsafeProjection.create(output, output) + } else { + UnsafeProjection.create(left.output ++ right.output, output) + } val outputIterWithMetrics = outputIter.map { row => numOutputRows += 1 outputProjection(row) @@ -345,24 +358,28 @@ case class StreamingSymmetricHashJoinExec( // All processing time counts as update time. allUpdatesTimeMs += math.max(NANOSECONDS.toMillis(System.nanoTime - updateStartTimeNs), 0) - // Processing time between inner output completion and here comes from the outer portion of a - // join, and thus counts as removal time as we remove old state from one side while iterating. - if (innerOutputCompletionTimeNs != 0) { + // Processing time between one side hash join output completion and here comes from the + // outer portion of a join, and thus counts as removal time as we remove old state from + // one side while iterating. + if (hashJoinOutputCompletionTimeNs != 0) { allRemovalsTimeMs += - math.max(NANOSECONDS.toMillis(System.nanoTime - innerOutputCompletionTimeNs), 0) + math.max(NANOSECONDS.toMillis(System.nanoTime - hashJoinOutputCompletionTimeNs), 0) } allRemovalsTimeMs += timeTakenMs { // Remove any remaining state rows which aren't needed because they're below the watermark. // - // For inner joins, we have to remove unnecessary state rows from both sides if possible. + // For inner and left semi joins, we have to remove unnecessary state rows from both sides + // if possible. + // // For outer joins, we have already removed unnecessary state rows from the outer side // (e.g., left side for left outer join) while generating the outer "null" outputs. Now, we // have to remove unnecessary state rows from the other side (e.g., right side for the left // outer join) if possible. In all cases, nothing needs to be outputted, hence the removal // needs to be done greedily by immediately consuming the returned iterator. val cleanupIter = joinType match { - case Inner => leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState() + case Inner | LeftSemi => + leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState() case LeftOuter => rightSideJoiner.removeOldState() case RightOuter => leftSideJoiner.removeOldState() case _ => throwBadJoinTypeException() @@ -481,6 +498,26 @@ case class StreamingSymmetricHashJoinExec( case _ => (_: InternalRow) => Iterator.empty } + val excludeRowsAlreadyMatched = joinType == LeftSemi && joinSide == RightSide + + val generateOutputIter: (InternalRow, Iterator[JoinedRow]) => Iterator[InternalRow] = + joinSide match { + case LeftSide if joinType == LeftSemi => + (input: InternalRow, joinedRowIter: Iterator[JoinedRow]) => + // For left side of left semi join, generate one left row if there is matched + // rows from right side. Otherwise, generate nothing. + if (joinedRowIter.nonEmpty) { + Iterator.single(input) + } else { + Iterator.empty + } + case RightSide if joinType == LeftSemi => + (_: InternalRow, joinedRowIter: Iterator[JoinedRow]) => + // For right side of left semi join, generate matched left rows only. + joinedRowIter.map(_.getLeft) + case _ => (_: InternalRow, joinedRowIter: Iterator[JoinedRow]) => joinedRowIter + } + nonLateRows.flatMap { row => val thisRow = row.asInstanceOf[UnsafeRow] // If this row fails the pre join filter, that means it can never satisfy the full join @@ -489,8 +526,12 @@ case class StreamingSymmetricHashJoinExec( // the case of inner join). if (preJoinFilter(thisRow)) { val key = keyGenerator(thisRow) - val outputIter: Iterator[JoinedRow] = otherSideJoiner.joinStateManager - .getJoinedRows(key, thatRow => generateJoinedRow(thisRow, thatRow), postJoinFilter) + val joinedRowIter: Iterator[JoinedRow] = otherSideJoiner.joinStateManager.getJoinedRows( + key, + thatRow => generateJoinedRow(thisRow, thatRow), + postJoinFilter, + excludeRowsAlreadyMatched) + val outputIter = generateOutputIter(thisRow, joinedRowIter) new AddingProcessedRowToStateCompletionIterator(key, thisRow, outputIter) } else { generateFilteredJoinedRow(thisRow) @@ -501,13 +542,19 @@ case class StreamingSymmetricHashJoinExec( private class AddingProcessedRowToStateCompletionIterator( key: UnsafeRow, thisRow: UnsafeRow, - subIter: Iterator[JoinedRow]) - extends CompletionIterator[JoinedRow, Iterator[JoinedRow]](subIter) { + subIter: Iterator[InternalRow]) + extends CompletionIterator[InternalRow, Iterator[InternalRow]](subIter) { + private val iteratorNotEmpty: Boolean = super.hasNext override def completion(): Unit = { - val shouldAddToState = // add only if both removal predicates do not match - !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) + val isLeftSemiWithMatch = + joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty + // Add to state store only if both removal predicates do not match, + // and the row is not matched for left side of left semi join. + val shouldAddToState = + !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) && + !isLeftSemiWithMatch if (shouldAddToState) { joinStateManager.append(key, thisRow, matched = iteratorNotEmpty) updatedStateRowsCount += 1 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala index 2aa2a18b9eaf4..3fae3979757fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala @@ -99,13 +99,20 @@ class SymmetricHashJoinStateManager( /** * Get all the matched values for given join condition, with marking matched. * This method is designed to mark joined rows properly without exposing internal index of row. + * + * @param excludeRowsAlreadyMatched Do not join with rows already matched previously. + * This is used for right side of left semi join in + * [[StreamingSymmetricHashJoinExec]] only. */ def getJoinedRows( key: UnsafeRow, generateJoinedRow: InternalRow => JoinedRow, - predicate: JoinedRow => Boolean): Iterator[JoinedRow] = { + predicate: JoinedRow => Boolean, + excludeRowsAlreadyMatched: Boolean = false): Iterator[JoinedRow] = { val numValues = keyToNumValues.get(key) - keyWithIndexToValue.getAll(key, numValues).map { keyIdxToValue => + keyWithIndexToValue.getAll(key, numValues).filterNot { keyIdxToValue => + excludeRowsAlreadyMatched && keyIdxToValue.matched + }.map { keyIdxToValue => val joinedRow = generateJoinedRow(keyIdxToValue.value) if (predicate(joinedRow)) { if (!keyIdxToValue.matched) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index b235bf7c3180a..91d1f5de3f211 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -41,18 +41,174 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils +abstract class StreamingJoinSuite + extends StreamTest with StateStoreMetricsTest with BeforeAndAfter { -class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter { + import testImplicits._ before { - SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' - spark.streams.stateStoreCoordinator // initialize the lazy coordinator + SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' + spark.streams.stateStoreCoordinator // initialize the lazy coordinator } after { StateStore.stop() } + protected def setupStream(prefix: String, multiplier: Int): (MemoryStream[Int], DataFrame) = { + val input = MemoryStream[Int] + val df = input.toDF + .select( + 'value as "key", + timestamp_seconds($"value") as s"${prefix}Time", + ('value * multiplier) as s"${prefix}Value") + .withWatermark(s"${prefix}Time", "10 seconds") + + (input, df) + } + + protected def setupWindowedJoin(joinType: String) + : (MemoryStream[Int], MemoryStream[Int], DataFrame) = { + + val (input1, df1) = setupStream("left", 2) + val (input2, df2) = setupStream("right", 3) + val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue) + val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue) + val joined = windowed1.join(windowed2, Seq("key", "window"), joinType) + val select = if (joinType == "left_semi") { + joined.select('key, $"window.end".cast("long"), 'leftValue) + } else { + joined.select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) + } + + (input1, input2, select) + } + + protected def setupWindowedJoinWithLeftCondition(joinType: String) + : (MemoryStream[Int], MemoryStream[Int], DataFrame) = { + + val (leftInput, df1) = setupStream("left", 2) + val (rightInput, df2) = setupStream("right", 3) + // Use different schemas to ensure the null row is being generated from the correct side. + val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) + val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) + + val joined = left.join( + right, + left("key") === right("key") + && left("window") === right("window") + && 'leftValue > 4, + joinType) + + val select = if (joinType == "left_semi") { + joined.select(left("key"), left("window.end").cast("long"), 'leftValue) + } else if (joinType == "left_outer") { + joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + } else if (joinType == "right_outer") { + joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + } else { + joined + } + + (leftInput, rightInput, select) + } + + protected def setupWindowedJoinWithRightCondition(joinType: String) + : (MemoryStream[Int], MemoryStream[Int], DataFrame) = { + + val (leftInput, df1) = setupStream("left", 2) + val (rightInput, df2) = setupStream("right", 3) + // Use different schemas to ensure the null row is being generated from the correct side. + val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) + val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) + + val joined = left.join( + right, + left("key") === right("key") + && left("window") === right("window") + && 'rightValue.cast("int") > 7, + joinType) + + val select = if (joinType == "left_semi") { + joined.select(left("key"), left("window.end").cast("long"), 'leftValue) + } else if (joinType == "left_outer") { + joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + } else if (joinType == "right_outer") { + joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + } else { + joined + } + + (leftInput, rightInput, select) + } + + protected def setupWindowedJoinWithRangeCondition(joinType: String) + : (MemoryStream[(Int, Int)], MemoryStream[(Int, Int)], DataFrame) = { + + val leftInput = MemoryStream[(Int, Int)] + val rightInput = MemoryStream[(Int, Int)] + + val df1 = leftInput.toDF.toDF("leftKey", "time") + .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue") + .withWatermark("leftTime", "10 seconds") + + val df2 = rightInput.toDF.toDF("rightKey", "time") + .select('rightKey, timestamp_seconds($"time") as "rightTime", + ('rightKey * 3) as "rightValue") + .withWatermark("rightTime", "10 seconds") + + val joined = + df1.join( + df2, + expr("leftKey = rightKey AND " + + "leftTime BETWEEN rightTime - interval 5 seconds AND rightTime + interval 5 seconds"), + joinType) + + val select = if (joinType == "left_semi") { + joined.select('leftKey, 'leftTime.cast("int")) + } else { + joined.select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int")) + } + + (leftInput, rightInput, select) + } + + protected def setupWindowedSelfJoin(joinType: String) + : (MemoryStream[(Int, Long)], DataFrame) = { + + val inputStream = MemoryStream[(Int, Long)] + + val df = inputStream.toDS() + .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp")) + + val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime")) + + val rightStream = df + // Introduce misses for ease of debugging + .where(col("value") % 2 === 0) + .select(col("value").as("rightId"), col("timestamp").as("rightTime")) + + val joined = leftStream + .withWatermark("leftTime", "5 seconds") + .join( + rightStream.withWatermark("rightTime", "5 seconds"), + expr("leftId = rightId AND rightTime >= leftTime AND " + + "rightTime <= leftTime + interval 5 seconds"), + joinType) + + val select = if (joinType == "left_semi") { + joined.select(col("leftId"), col("leftTime").cast("int")) + } else { + joined.select(col("leftId"), col("leftTime").cast("int"), + col("rightId"), col("rightTime").cast("int")) + } + + (inputStream, select) + } +} + +class StreamingInnerJoinSuite extends StreamingJoinSuite { + import testImplicits._ test("stream stream inner join on non-time column") { val input1 = MemoryStream[Int] @@ -486,58 +642,13 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with } -class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter { +class StreamingOuterJoinSuite extends StreamingJoinSuite { import testImplicits._ import org.apache.spark.sql.functions._ - before { - SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec' - spark.streams.stateStoreCoordinator // initialize the lazy coordinator - } - - after { - StateStore.stop() - } - - private def setupStream(prefix: String, multiplier: Int): (MemoryStream[Int], DataFrame) = { - val input = MemoryStream[Int] - val df = input.toDF - .select( - 'value as "key", - timestamp_seconds($"value") as s"${prefix}Time", - ('value * multiplier) as s"${prefix}Value") - .withWatermark(s"${prefix}Time", "10 seconds") - - return (input, df) - } - - private def setupWindowedJoin(joinType: String): - (MemoryStream[Int], MemoryStream[Int], DataFrame) = { - val (input1, df1) = setupStream("left", 2) - val (input2, df2) = setupStream("right", 3) - val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue) - val joined = windowed1.join(windowed2, Seq("key", "window"), joinType) - .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue) - - (input1, input2, joined) - } - test("left outer early state exclusion on left") { - val (leftInput, df1) = setupStream("left", 2) - val (rightInput, df2) = setupStream("right", 3) - // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) - - val joined = left.join( - right, - left("key") === right("key") - && left("window") === right("window") - && 'leftValue > 4, - "left_outer") - .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("left_outer") testStream(joined)( MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5), @@ -554,19 +665,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with } test("left outer early state exclusion on right") { - val (leftInput, df1) = setupStream("left", 2) - val (rightInput, df2) = setupStream("right", 3) - // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) - - val joined = left.join( - right, - left("key") === right("key") - && left("window") === right("window") - && 'rightValue.cast("int") > 7, - "left_outer") - .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue) + val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("left_outer") testStream(joined)( MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3), @@ -583,19 +682,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with } test("right outer early state exclusion on left") { - val (leftInput, df1) = setupStream("left", 2) - val (rightInput, df2) = setupStream("right", 3) - // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) - - val joined = left.join( - right, - left("key") === right("key") - && left("window") === right("window") - && 'leftValue > 4, - "right_outer") - .select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("right_outer") testStream(joined)( MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5), @@ -612,19 +699,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with } test("right outer early state exclusion on right") { - val (leftInput, df1) = setupStream("left", 2) - val (rightInput, df2) = setupStream("right", 3) - // Use different schemas to ensure the null row is being generated from the correct side. - val left = df1.select('key, window('leftTime, "10 second"), 'leftValue) - val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string")) - - val joined = left.join( - right, - left("key") === right("key") - && left("window") === right("window") - && 'rightValue.cast("int") > 7, - "right_outer") - .select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) + val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("right_outer") testStream(joined)( MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3), @@ -681,27 +756,8 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with ("right_outer", Row(null, 2, null, 5)) ).foreach { case (joinType: String, outerResult) => test(s"${joinType.replaceAllLiterally("_", " ")} with watermark range condition") { - import org.apache.spark.sql.functions._ - - val leftInput = MemoryStream[(Int, Int)] - val rightInput = MemoryStream[(Int, Int)] - - val df1 = leftInput.toDF.toDF("leftKey", "time") - .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue") - .withWatermark("leftTime", "10 seconds") - - val df2 = rightInput.toDF.toDF("rightKey", "time") - .select('rightKey, timestamp_seconds($"time") as "rightTime", - ('rightKey * 3) as "rightValue") - .withWatermark("rightTime", "10 seconds") - - val joined = - df1.join( - df2, - expr("leftKey = rightKey AND " + - "leftTime BETWEEN rightTime - interval 5 seconds AND rightTime + interval 5 seconds"), - joinType) - .select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int")) + val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition(joinType) + testStream(joined)( AddData(leftInput, (1, 5), (3, 5)), CheckAnswer(), @@ -780,27 +836,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with } test("SPARK-26187 self left outer join should not return outer nulls for already matched rows") { - val inputStream = MemoryStream[(Int, Long)] - - val df = inputStream.toDS() - .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp")) - - val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime")) - - val rightStream = df - // Introduce misses for ease of debugging - .where(col("value") % 2 === 0) - .select(col("value").as("rightId"), col("timestamp").as("rightTime")) - - val query = leftStream - .withWatermark("leftTime", "5 seconds") - .join( - rightStream.withWatermark("rightTime", "5 seconds"), - expr("leftId = rightId AND rightTime >= leftTime AND " + - "rightTime <= leftTime + interval 5 seconds"), - joinType = "leftOuter") - .select(col("leftId"), col("leftTime").cast("int"), - col("rightId"), col("rightTime").cast("int")) + val (inputStream, query) = setupWindowedSelfJoin("left_outer") testStream(query)( AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), @@ -938,7 +974,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with throw writer.exception.get } assert(e.getMessage.toLowerCase(Locale.ROOT) - .contains("the query is using stream-stream outer join with state format version 1")) + .contains("the query is using stream-stream leftouter join with state format version 1")) } test("SPARK-29438: ensure UNION doesn't lead stream-stream join to use shifted partition IDs") { @@ -1041,3 +1077,205 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with ) } } + +class StreamingLeftSemiJoinSuite extends StreamingJoinSuite { + + import testImplicits._ + + test("windowed left semi join") { + val (leftInput, rightInput, joined) = setupWindowedJoin("left_semi") + + testStream(joined)( + MultiAddData(leftInput, 1, 2, 3, 4, 5)(rightInput, 3, 4, 5, 6, 7), + CheckNewAnswer(Row(3, 10, 6), Row(4, 10, 8), Row(5, 10, 10)), + // states + // left: 1, 2, 3, 4 ,5 + // right: 3, 4, 5, 6, 7 + assertNumStateRows(total = 10, updated = 10), + MultiAddData(leftInput, 21)(rightInput, 22), + // Watermark = 11, should remove rows having window=[0,10]. + CheckNewAnswer(), + // states + // left: 21 + // right: 22 + // + // states evicted + // left: 1, 2, 3, 4 ,5 (below watermark) + // right: 3, 4, 5, 6, 7 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(leftInput, 22), + CheckNewAnswer(Row(22, 30, 44)), + // Unlike inner/outer joins, given left input row matches with right input row, + // we don't buffer the matched left input row to the state store. + // + // states + // left: 21 + // right: 22 + assertNumStateRows(total = 2, updated = 0), + StopStream, + StartStream(), + + AddData(leftInput, 1), + // Row not add as 1 < state key watermark = 12. + CheckNewAnswer(), + // states + // left: 21 + // right: 22 + assertNumStateRows(total = 2, updated = 0, droppedByWatermark = 1), + AddData(rightInput, 5), + // Row not add as 5 < state key watermark = 12. + CheckNewAnswer(), + // states + // left: 21 + // right: 22 + assertNumStateRows(total = 2, updated = 0, droppedByWatermark = 1) + ) + } + + test("left semi early state exclusion on left") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("left_semi") + + testStream(joined)( + MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5), + // The left rows with leftValue <= 4 should not generate their semi join rows and + // not get added to the state. + CheckNewAnswer(Row(3, 10, 6)), + // states + // left: 3 + // right: 3, 4, 5 + assertNumStateRows(total = 4, updated = 4), + // We shouldn't get more semi join rows when the watermark advances. + MultiAddData(leftInput, 20)(rightInput, 21), + CheckNewAnswer(), + // states + // left: 20 + // right: 21 + // + // states evicted + // left: 3 (below watermark) + // right: 3, 4, 5 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, 20), + CheckNewAnswer((20, 30, 40)), + // states + // left: 20 + // right: 21, 20 + assertNumStateRows(total = 3, updated = 1) + ) + } + + test("left semi early state exclusion on right") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("left_semi") + + testStream(joined)( + MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3), + // The right rows with rightValue <= 7 should never be added to the state. + // The right row with rightValue = 9 > 7, hence joined and added to state. + CheckNewAnswer(Row(3, 10, 6)), + // states + // left: 3, 4, 5 + // right: 3 + assertNumStateRows(total = 4, updated = 4), + // We shouldn't get more semi join rows when the watermark advances. + MultiAddData(leftInput, 20)(rightInput, 21), + CheckNewAnswer(), + // states + // left: 20 + // right: 21 + // + // states evicted + // left: 3, 4, 5 (below watermark) + // right: 3 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, 20), + CheckNewAnswer((20, 30, 40)), + // states + // left: 20 + // right: 21, 20 + assertNumStateRows(total = 3, updated = 1) + ) + } + + test("left semi join with watermark range condition") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("left_semi") + + testStream(joined)( + AddData(leftInput, (1, 5), (3, 5)), + CheckNewAnswer(), + // states + // left: (1, 5), (3, 5) + // right: nothing + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, (1, 10), (2, 5)), + // Match left row in the state. + CheckNewAnswer((1, 5)), + // states + // left: (1, 5), (3, 5) + // right: (1, 10), (2, 5) + assertNumStateRows(total = 4, updated = 2), + AddData(rightInput, (1, 9)), + // No match as left row is already matched. + CheckNewAnswer(), + // states + // left: (1, 5), (3, 5) + // right: (1, 10), (2, 5), (1, 9) + assertNumStateRows(total = 5, updated = 1), + // Increase event time watermark to 20s by adding data with time = 30s on both inputs. + AddData(leftInput, (1, 7), (1, 30)), + CheckNewAnswer((1, 7)), + // states + // left: (1, 5), (3, 5), (1, 30) + // right: (1, 10), (2, 5), (1, 9) + assertNumStateRows(total = 6, updated = 1), + // Watermark = 30 - 10 = 20, no matched row. + AddData(rightInput, (0, 30)), + CheckNewAnswer(), + // states + // left: (1, 30) + // right: (0, 30) + // + // states evicted + // left: (1, 5), (3, 5) (below watermark = 20) + // right: (1, 10), (2, 5), (1, 9) (below watermark = 20) + assertNumStateRows(total = 2, updated = 1) + ) + } + + test("self left semi join") { + val (inputStream, query) = setupWindowedSelfJoin("left_semi") + + testStream(query)( + AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), + CheckNewAnswer((2, 2), (4, 4)), + // batch 1 - global watermark = 0 + // states + // left: (2, 2L), (4, 4L) + // (left rows with value % 2 != 0 is filtered per [[PushPredicateThroughJoin]]) + // right: (2, 2L), (4, 4L) + // (right rows with value % 2 != 0 is filtered per [[PushPredicateThroughJoin]]) + assertNumStateRows(total = 4, updated = 4), + AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)), + CheckNewAnswer((6, 6), (8, 8), (10, 10)), + // batch 2 - global watermark = 5 + // states + // left: (2, 2L), (4, 4L), (6, 6L), (8, 8L), (10, 10L) + // right: (6, 6L), (8, 8L), (10, 10L) + // + // states evicted + // left: nothing (it waits for 5 seconds more than watermark due to join condition) + // right: (2, 2L), (4, 4L) + assertNumStateRows(total = 8, updated = 6), + AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)), + CheckNewAnswer((12, 12), (14, 14)), + // batch 3 - global watermark = 9 + // states + // left: (4, 4L), (6, 6L), (8, 8L), (10, 10L), (12, 12L), (14, 14L) + // right: (10, 10L), (12, 12L), (14, 14L) + // + // states evicted + // left: (2, 2L) + // right: (6, 6L), (8, 8L) + assertNumStateRows(total = 9, updated = 4) + ) + } +} From a21945ce6c725896d19647891d1f9fa9ef74bd87 Mon Sep 17 00:00:00 2001 From: Yuning Zhang Date: Mon, 26 Oct 2020 16:19:06 +0900 Subject: [PATCH 0323/1009] [SPARK-33197][SQL] Make changes to spark.sql.analyzer.maxIterations take effect at runtime ### What changes were proposed in this pull request? Make changes to `spark.sql.analyzer.maxIterations` take effect at runtime. ### Why are the changes needed? `spark.sql.analyzer.maxIterations` is not a static conf. However, before this patch, changing `spark.sql.analyzer.maxIterations` at runtime does not take effect. ### Does this PR introduce _any_ user-facing change? Yes. Before this patch, changing `spark.sql.analyzer.maxIterations` at runtime does not take effect. ### How was this patch tested? modified unit test Closes #30108 from yuningzh-db/dynamic-analyzer-max-iterations. Authored-by: Yuning Zhang Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/analysis/Analyzer.scala | 16 +++----- .../sql/catalyst/analysis/AnalysisSuite.scala | 41 +++++++++++++++++++ 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4264627e0d9bd..457c41c39a196 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -132,8 +132,7 @@ object AnalysisContext { */ class Analyzer( override val catalogManager: CatalogManager, - conf: SQLConf, - maxIterations: Int) + conf: SQLConf) extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog { private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog @@ -148,12 +147,7 @@ class Analyzer( def this(catalog: SessionCatalog, conf: SQLConf) = { this( new CatalogManager(conf, FakeV2SessionCatalog, catalog), - conf, - conf.analyzerMaxIterations) - } - - def this(catalogManager: CatalogManager, conf: SQLConf) = { - this(catalogManager, conf, conf.analyzerMaxIterations) + conf) } def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = { @@ -188,9 +182,9 @@ class Analyzer( * If the plan cannot be resolved within maxIterations, analyzer will throw exception to inform * user to increase the value of SQLConf.ANALYZER_MAX_ITERATIONS. */ - protected val fixedPoint = + protected def fixedPoint = FixedPoint( - maxIterations, + conf.analyzerMaxIterations, errorOnExceed = true, maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key) @@ -206,7 +200,7 @@ class Analyzer( */ val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil - lazy val batches: Seq[Batch] = Seq( + override def batches: Seq[Batch] = Seq( Batch("Substitution", fixedPoint, // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule. // However, when manipulating deeply nested schema, `UpdateFields` expression tree could be diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 3a5c4b9769685..4f51b77d8ece0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -926,4 +926,45 @@ class AnalysisSuite extends AnalysisTest with Matchers { ) assertAnalysisSuccess(plan) } + + test("SPARK-33197: Make sure changes to ANALYZER_MAX_ITERATIONS take effect at runtime") { + // RuleExecutor only throw exception or log warning when the rule is supposed to run + // more than once. + val maxIterations = 2 + val maxIterationsEnough = 5 + withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) { + val conf = SQLConf.get + val testAnalyzer = new Analyzer( + new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf) + + val plan = testRelation2.select( + $"a" / Literal(2) as "div1", + $"a" / $"b" as "div2", + $"a" / $"c" as "div3", + $"a" / $"d" as "div4", + $"e" / $"e" as "div5") + + val message1 = intercept[TreeNodeException[LogicalPlan]] { + testAnalyzer.execute(plan) + }.getMessage + assert(message1.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " + + s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value.")) + + withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterationsEnough.toString) { + try { + testAnalyzer.execute(plan) + } catch { + case ex: TreeNodeException[_] + if ex.getMessage.contains(SQLConf.ANALYZER_MAX_ITERATIONS.key) => + fail("analyzer.execute should not reach max iterations.") + } + } + + val message2 = intercept[TreeNodeException[LogicalPlan]] { + testAnalyzer.execute(plan) + }.getMessage + assert(message2.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " + + s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value.")) + } + } } From 850adeb0fd188cc3cb6319758d58a12554cb6149 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 26 Oct 2020 01:50:23 -0700 Subject: [PATCH 0324/1009] [SPARK-33239][INFRA] Use pre-built image at GitHub Action SparkR job ### What changes were proposed in this pull request? This PR aims to use a pre-built image for Github Action SparkR job. ### Why are the changes needed? This will reduce the execution time and the flakiness. **BEFORE (21 minutes 39 seconds)** ![Screen Shot 2020-10-16 at 1 24 43 PM](https://user-images.githubusercontent.com/9700541/96305593-fbeada80-0fb2-11eb-9b8e-86d8abaad9ef.png) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the GitHub Action `sparkr` job in this PR. Closes #30066 from dongjoon-hyun/SPARKR. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 77 ++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 27607a799d038..5b06485b9959e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -42,8 +42,6 @@ jobs: streaming, sql-kafka-0-10, streaming-kafka-0-10, mllib-local, mllib, yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - >- - sparkr # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] excluded-tags: [""] @@ -138,20 +136,6 @@ jobs: run: | python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner python3.8 -m pip list - # SparkR - - name: Install R 4.0 - uses: r-lib/actions/setup-r@v1 - if: contains(matrix.modules, 'sparkr') - with: - r-version: 4.0 - - name: Install R packages - if: contains(matrix.modules, 'sparkr') - run: | - # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. - sudo apt-get install -y libcurl4-openssl-dev qpdf - sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" - # Show installed packages in R. - sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' # Run the tests. - name: Run tests run: | @@ -260,6 +244,67 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3 path: "**/target/unit-tests.log" + sparkr: + name: Build modules - sparkr + runs-on: ubuntu-20.04 + container: + image: dongjoon/apache-spark-github-action-image:20201025 + env: + HADOOP_PROFILE: hadoop3.2 + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v2 + # In order to fetch changed files + with: + fetch-depth: 0 + - name: Merge dispatched input branch + if: ${{ github.event.inputs.target != '' }} + run: git merge --progress --ff-only origin/${{ github.event.inputs.target }} + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. + - name: Cache Scala, SBT, Maven and Zinc + uses: actions/cache@v2 + with: + path: | + build/apache-maven-* + build/zinc-* + build/scala-* + build/*.jar + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: sparkr-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + sparkr-maven- + - name: Cache Ivy local repository + uses: actions/cache@v2 + with: + path: ~/.ivy2/cache + key: sparkr-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + sparkr-ivy- + - name: Run tests + run: | + mkdir -p ~/.m2 + # The followings are also used by `r-lib/actions/setup-r` to avoid + # R issues at docker environment + export TZ=UTC + export _R_CHECK_SYSTEM_CLOCK_=FALSE + ./dev/run-tests --parallelism 2 --modules sparkr + rm -rf ~/.m2/repository/org/apache/spark + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v2 + with: + name: test-results-sparkr--1.8-hadoop3.2-hive2.3 + path: "**/target/test-reports/*.xml" + # Static analysis, and documentation build lint: name: Linters, licenses, dependencies and documentation generation From 1042d49bf9d7bb5162215e981e2f8e98164b2aff Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Mon, 26 Oct 2020 20:23:24 +0900 Subject: [PATCH 0325/1009] [SPARK-33075][SQL] Enable auto bucketed scan by default (disable only for cached query) ### What changes were proposed in this pull request? This PR is to enable auto bucketed table scan by default, with exception to only disable for cached query (similar to AQE). The reason why disabling auto scan for cached query is that, the cached query output partitioning can be leveraged later to avoid shuffle and sort when doing join and aggregate. ### Why are the changes needed? Enable auto bucketed table scan by default is useful as it can optimize query automatically under the hood, without users interaction. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit test for cached query in `DisableUnnecessaryBucketedScanSuite.scala`. Also change a bunch of unit tests which should disable auto bucketed scan to make them work. Closes #30138 from c21/enable-auto-bucket. Authored-by: Cheng Su Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/internal/SQLConf.scala | 2 +- .../org/apache/spark/sql/SparkSession.scala | 21 +++++++++++- .../spark/sql/execution/CacheManager.scala | 29 +++++++++++----- .../adaptive/AdaptiveSparkPlanHelper.scala | 16 --------- .../datasources/FileSourceStrategySuite.scala | 28 ++++++++------- .../execution/joins/BroadcastJoinSuite.scala | 34 ++++++++++--------- .../spark/sql/sources/BucketedReadSuite.scala | 34 ++++++++++--------- .../DisableUnnecessaryBucketedScanSuite.scala | 23 +++++++++++++ 8 files changed, 116 insertions(+), 71 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 35ef24c1c3ba6..3024398399962 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -971,7 +971,7 @@ object SQLConf { "false, this configuration does not take any effect.") .version("3.1.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled") .internal() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index b15d6f981291c..b33557dbfdb27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -29,7 +29,7 @@ import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext, TaskContext} import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT +import org.apache.spark.internal.config.{ConfigEntry, EXECUTOR_ALLOW_SPARK_CONTEXT} import org.apache.spark.rdd.RDD import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} import org.apache.spark.sql.catalog.Catalog @@ -1077,6 +1077,25 @@ object SparkSession extends Logging { throw new IllegalStateException("No active or default Spark session found"))) } + /** + * Returns a cloned SparkSession with all specified configurations disabled, or + * the original SparkSession if all configurations are already disabled. + */ + private[sql] def getOrCloneSessionWithConfigsOff( + session: SparkSession, + configurations: Seq[ConfigEntry[Boolean]]): SparkSession = { + val configsEnabled = configurations.filter(session.sessionState.conf.getConf(_)) + if (configsEnabled.isEmpty) { + session + } else { + val newSession = session.cloneSession() + configsEnabled.foreach(conf => { + newSession.sessionState.conf.setConf(conf, false) + }) + newSession + } + } + //////////////////////////////////////////////////////////////////////////////////////// // Private methods from now on //////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 7201026b11b6b..5f72d6005a8dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -22,6 +22,7 @@ import scala.collection.immutable.IndexedSeq import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint @@ -31,6 +32,7 @@ import org.apache.spark.sql.execution.columnar.{DefaultCachedBatchSerializer, In import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK @@ -55,6 +57,17 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { @transient @volatile private var cachedData = IndexedSeq[CachedData]() + /** + * Configurations needs to be turned off, to avoid regression for cached query, so that the + * outputPartitioning of the underlying cached query plan can be leveraged later. + * Configurations include: + * 1. AQE + * 2. Automatic bucketed table scan + */ + private val forceDisableConfigs: Seq[ConfigEntry[Boolean]] = Seq( + SQLConf.ADAPTIVE_EXECUTION_ENABLED, + SQLConf.AUTO_BUCKETED_SCAN_ENABLED) + /** Clears all cached tables. */ def clearCache(): Unit = this.synchronized { cachedData.foreach(_.cachedRepresentation.cacheBuilder.clearCache()) @@ -79,10 +92,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { if (lookupCachedData(planToCache).nonEmpty) { logWarning("Asked to cache already cached data.") } else { - // Turn off AQE so that the outputPartitioning of the underlying plan can be leveraged. - val sessionWithAqeOff = getOrCloneSessionWithAqeOff(query.sparkSession) - val inMemoryRelation = sessionWithAqeOff.withActive { - val qe = sessionWithAqeOff.sessionState.executePlan(planToCache) + val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff( + query.sparkSession, forceDisableConfigs) + val inMemoryRelation = sessionWithConfigsOff.withActive { + val qe = sessionWithConfigsOff.sessionState.executePlan(planToCache) InMemoryRelation( storageLevel, qe, @@ -188,10 +201,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { } needToRecache.map { cd => cd.cachedRepresentation.cacheBuilder.clearCache() - // Turn off AQE so that the outputPartitioning of the underlying plan can be leveraged. - val sessionWithAqeOff = getOrCloneSessionWithAqeOff(spark) - val newCache = sessionWithAqeOff.withActive { - val qe = sessionWithAqeOff.sessionState.executePlan(cd.plan) + val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff( + spark, forceDisableConfigs) + val newCache = sessionWithConfigsOff.withActive { + val qe = sessionWithConfigsOff.sessionState.executePlan(cd.plan) InMemoryRelation(cd.cachedRepresentation.cacheBuilder, qe) } val recomputedPlan = cd.copy(cachedRepresentation = newCache) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala index 8d7a2c95081c4..6ba375910a4eb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala @@ -17,9 +17,7 @@ package org.apache.spark.sql.execution.adaptive -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.internal.SQLConf /** * This class provides utility methods related to tree traversal of an [[AdaptiveSparkPlanExec]] @@ -137,18 +135,4 @@ trait AdaptiveSparkPlanHelper { case a: AdaptiveSparkPlanExec => a.executedPlan case other => other } - - /** - * Returns a cloned [[SparkSession]] with adaptive execution disabled, or the original - * [[SparkSession]] if its adaptive execution is already disabled. - */ - def getOrCloneSessionWithAqeOff[T](session: SparkSession): SparkSession = { - if (!session.sessionState.conf.adaptiveExecutionEnabled) { - session - } else { - val newSession = session.cloneSession() - newSession.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, false) - newSession - } - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala index dfd9ba03f5be0..50f32126e5dec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala @@ -262,20 +262,22 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre "p1=2/file7_0000" -> 1), buckets = 3) - // No partition pruning - checkScan(table) { partitions => - assert(partitions.size == 3) - assert(partitions(0).files.size == 5) - assert(partitions(1).files.size == 0) - assert(partitions(2).files.size == 2) - } + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") { + // No partition pruning + checkScan(table) { partitions => + assert(partitions.size == 3) + assert(partitions(0).files.size == 5) + assert(partitions(1).files.size == 0) + assert(partitions(2).files.size == 2) + } - // With partition pruning - checkScan(table.where("p1=2")) { partitions => - assert(partitions.size == 3) - assert(partitions(0).files.size == 3) - assert(partitions(1).files.size == 0) - assert(partitions(2).files.size == 1) + // With partition pruning + checkScan(table.where("p1=2")) { partitions => + assert(partitions.size == 3) + assert(partitions(0).files.size == 3) + assert(partitions(1).files.size == 0) + assert(partitions(2).files.size == 1) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala index 7ff945f5cbfb4..b6d1baf6e7902 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala @@ -432,22 +432,24 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils // join1 is a broadcast join where df2 is broadcasted. Note that output partitioning on the // streamed side (t1) is HashPartitioning (bucketed files). val join1 = t1.join(df2, t1("i1") === df2("i2") && t1("j1") === df2("j2")) - val plan1 = join1.queryExecution.executedPlan - assert(collect(plan1) { case e: ShuffleExchangeExec => e }.isEmpty) - val broadcastJoins = collect(plan1) { case b: BroadcastHashJoinExec => b } - assert(broadcastJoins.size == 1) - assert(broadcastJoins(0).outputPartitioning.isInstanceOf[PartitioningCollection]) - val p = broadcastJoins(0).outputPartitioning.asInstanceOf[PartitioningCollection] - assert(p.partitionings.size == 4) - // Verify all the combinations of output partitioning. - Seq(Seq(t1("i1"), t1("j1")), - Seq(t1("i1"), df2("j2")), - Seq(df2("i2"), t1("j1")), - Seq(df2("i2"), df2("j2"))).foreach { expected => - val expectedExpressions = expected.map(_.expr) - assert(p.partitionings.exists { - case h: HashPartitioning => expressionsEqual(h.expressions, expectedExpressions) - }) + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") { + val plan1 = join1.queryExecution.executedPlan + assert(collect(plan1) { case e: ShuffleExchangeExec => e }.isEmpty) + val broadcastJoins = collect(plan1) { case b: BroadcastHashJoinExec => b } + assert(broadcastJoins.size == 1) + assert(broadcastJoins(0).outputPartitioning.isInstanceOf[PartitioningCollection]) + val p = broadcastJoins(0).outputPartitioning.asInstanceOf[PartitioningCollection] + assert(p.partitionings.size == 4) + // Verify all the combinations of output partitioning. + Seq(Seq(t1("i1"), t1("j1")), + Seq(t1("i1"), df2("j2")), + Seq(df2("i2"), t1("j1")), + Seq(df2("i2"), df2("j2"))).foreach { expected => + val expectedExpressions = expected.map(_.expr) + assert(p.partitionings.exists { + case h: HashPartitioning => expressionsEqual(h.expressions, expectedExpressions) + }) + } } // Join on the column from the broadcasted side (i2, j2) and make sure output partitioning diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index f8276b143c1e6..a188e4d9d6d90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -81,22 +81,24 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { .bucketBy(8, "j", "k") .saveAsTable("bucketed_table") - val bucketValue = Random.nextInt(maxI) - val table = spark.table("bucketed_table").filter($"i" === bucketValue) - val query = table.queryExecution - val output = query.analyzed.output - val rdd = query.toRdd - - assert(rdd.partitions.length == 8) - - val attrs = table.select("j", "k").queryExecution.analyzed.output - val checkBucketId = rdd.mapPartitionsWithIndex((index, rows) => { - val getBucketId = UnsafeProjection.create( - HashPartitioning(attrs, 8).partitionIdExpression :: Nil, - output) - rows.map(row => getBucketId(row).getInt(0) -> index) - }) - checkBucketId.collect().foreach(r => assert(r._1 == r._2)) + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") { + val bucketValue = Random.nextInt(maxI) + val table = spark.table("bucketed_table").filter($"i" === bucketValue) + val query = table.queryExecution + val output = query.analyzed.output + val rdd = query.toRdd + + assert(rdd.partitions.length == 8) + + val attrs = table.select("j", "k").queryExecution.analyzed.output + val checkBucketId = rdd.mapPartitionsWithIndex((index, rows) => { + val getBucketId = UnsafeProjection.create( + HashPartitioning(attrs, 8).partitionIdExpression :: Nil, + output) + rows.map(row => getBucketId(row).getInt(0) -> index) + }) + checkBucketId.collect().foreach(r => assert(r._1 == r._2)) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala index 1c258bc0dadb9..70b74aed40eca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala @@ -18,7 +18,10 @@ package org.apache.spark.sql.sources import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} @@ -218,4 +221,24 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes } } } + + test("SPARK-33075: not disable bucketed table scan for cached query") { + withTable("t1") { + withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "true") { + df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1") + spark.catalog.cacheTable("t1") + assertCached(spark.table("t1")) + + // Verify cached bucketed table scan not disabled + val partitioning = spark.table("t1").queryExecution.executedPlan + .outputPartitioning + assert(partitioning match { + case HashPartitioning(Seq(column: AttributeReference), 8) if column.name == "i" => true + case _ => false + }) + val aggregateQueryPlan = sql("SELECT SUM(i) FROM t1 GROUP BY i").queryExecution.executedPlan + assert(aggregateQueryPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) + } + } + } } From 11bbb130df7b083f42acf0207531efe3912d89eb Mon Sep 17 00:00:00 2001 From: neko Date: Mon, 26 Oct 2020 20:41:56 +0800 Subject: [PATCH 0326/1009] [SPARK-33204][UI] The 'Event Timeline' area cannot be opened when a spark application has some failed jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? The page returned by /jobs in Spark UI will store the detail information of each job in javascript like this: ```javascript { 'className': 'executor added', 'group': 'executors', 'start': new Date(1602834008978), 'content': '
    Executor 3 added
    ' } ``` if an application has a failed job, the failure reason corresponding to the job will be stored in the ` content` field in the javascript . if the failure reason contains the character: **'**, the javascript code will throw an exception to cause the `event timeline url` had no response , The following is an example of error json: ```javascript { 'className': 'executor removed', 'group': 'executors', 'start': new Date(1602925908654), 'content': '
    Executor 2 removed
    ' } ``` So we need to considier this special case , if the returned job info contains the character:**'**, just remove it ### Why are the changes needed? Ensure that the UI page can function normally ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This pr only fixes an exception in a special case, manual test result as blows: ![fixed](https://user-images.githubusercontent.com/52202080/96711638-74490580-13d0-11eb-93e0-b44d9ed5da5c.gif) Closes #30119 from akiyamaneko/timeline_view_cannot_open. Authored-by: neko Signed-off-by: Gengliang Wang --- core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 3 ++- core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index 4e76ea289ede6..5f5a08fe0e574 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -147,7 +147,8 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We | 'Removed at ${UIUtils.formatDate(removeTime)}' + | '${ e.removeReason.map { reason => - s"""
    Reason: ${reason.replace("\n", " ")}""" + s"""
    Reason: ${StringEscapeUtils.escapeEcmaScript( + reason.replace("\n", " "))}""" }.getOrElse("") }"' + | 'data-html="true">Executor ${e.id} removed
    ' diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala index df239d6d0e187..19eccc5209b8e 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -127,7 +127,8 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP | 'Removed at ${UIUtils.formatDate(removeTime)}' + | '${ e.removeReason.map { reason => - s"""
    Reason: ${reason.replace("\n", " ")}""" + s"""
    Reason: ${StringEscapeUtils.escapeEcmaScript( + reason.replace("\n", " "))}""" }.getOrElse("") }"' + | 'data-html="true">Executor ${e.id} removed
    ' From 02fa19f102122f06e4358cf86c5e903fda28b289 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 26 Oct 2020 12:31:05 -0700 Subject: [PATCH 0327/1009] [SPARK-33230][SQL] Hadoop committers to get unique job ID in "spark.sql.sources.writeJobUUID" ### What changes were proposed in this pull request? This reinstates the old option `spark.sql.sources.write.jobUUID` to set a unique jobId in the jobconf so that hadoop MR committers have a unique ID which is (a) consistent across tasks and workers and (b) not brittle compared to generated-timestamp job IDs. The latter matches that of what JobID requires, but as they are generated per-thread, may not always be unique within a cluster. ### Why are the changes needed? If a committer (e.g s3a staging committer) uses job-attempt-ID as a unique ID then any two jobs started within the same second have the same ID, so can clash. ### Does this PR introduce _any_ user-facing change? Good Q. It is "developer-facing" in the context of anyone writing a committer. But it reinstates a property which was in Spark 1.x and "went away" ### How was this patch tested? Testing: no test here. You'd have to create a new committer which extracted the value in both job and task(s) and verified consistency. That is possible (with a task output whose records contained the UUID), but it would be pretty convoluted and a high maintenance cost. Because it's trying to address a race condition, it's hard to regenerate the problem downstream and so verify a fix in a test run...I'll just look at the logs to see what temporary dir is being used in the cluster FS and verify it's a UUID Closes #30141 from steveloughran/SPARK-33230-jobId. Authored-by: Steve Loughran Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/datasources/FileFormatWriter.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala index abb88ae73cabf..a71aeb47872ce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala @@ -133,7 +133,7 @@ object FileFormatWriter extends Logging { fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema) val description = new WriteJobDescription( - uuid = UUID.randomUUID().toString, + uuid = UUID.randomUUID.toString, serializableHadoopConf = new SerializableConfiguration(job.getConfiguration), outputWriterFactory = outputWriterFactory, allColumns = outputSpec.outputColumns, @@ -164,6 +164,10 @@ object FileFormatWriter extends Logging { SQLExecution.checkSQLExecutionId(sparkSession) + // propagate the decription UUID into the jobs, so that committers + // get an ID guaranteed to be unique. + job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid) + // This call shouldn't be put into the `try` block below because it only initializes and // prepares the job, any exception thrown from here shouldn't cause abortJob() to be called. committer.setupJob(job) From afa6aee4f5ea270db5331e48ad08e0b176cdd2a0 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 26 Oct 2020 15:29:12 -0700 Subject: [PATCH 0328/1009] [SPARK-33237][K8S][TESTS] Use default Hadoop-3.2 profile from K8s IT Jenkins job ### What changes were proposed in this pull request? This PR aims to use `hadoop-3.2` profile in K8s IT Jenkins jobs. - [x] Switch the default value of `HADOOP_PROFILE` from `hadoop-2.7` to `hadoop-3.2`. - [x] Remove `-Phadoop2.7` from Jenkins K8s IT job. - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/configure **BEFORE** ``` ./dev/make-distribution.sh --name ${DATE}-${REVISION} --r --pip --tgz -DzincPort=${ZINC_PORT} \ -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver ``` **AFTER** ``` ./dev/make-distribution.sh --name ${DATE}-${REVISION} --r --pip --tgz -DzincPort=${ZINC_PORT} \ -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver ``` ### Why are the changes needed? Since Apache Spark 3.1.0, Hadoop 3 is the default. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Check the Jenkins K8s IT log and result. - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/34899/ ``` + /home/jenkins/workspace/SparkPullRequestBuilder-K8s/build/mvn clean package -DskipTests -DzincPort=4021 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver Using `mvn` from path: /home/jenkins/tools/hudson.tasks.Maven_MavenInstallation/Maven_3.6.3/bin/mvn [INFO] Scanning for projects... [INFO] ------------------------------------------------------------------------ [INFO] Reactor Build Order: [INFO] ``` Closes #30153 from dongjoon-hyun/SPARK-33237. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../integration-tests/dev/dev-run-integration-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh index 9c03a97ef15d5..b72a4f74918ba 100755 --- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh +++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh @@ -35,7 +35,7 @@ CONTEXT= INCLUDE_TAGS="k8s" EXCLUDE_TAGS= JAVA_VERSION="8" -HADOOP_PROFILE="hadoop-2.7" +HADOOP_PROFILE="hadoop-3.2" MVN="$TEST_ROOT_DIR/build/mvn" SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version 2>/dev/null\ From e43cd8ccef153ed504200c9f52966cb6a96e73bf Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 27 Oct 2020 09:25:53 +0900 Subject: [PATCH 0329/1009] [SPARK-32388][SQL] TRANSFORM with schema-less mode should keep the same with hive ### What changes were proposed in this pull request? In current Spark script transformation with hive serde mode, in case of schema less, result is different with hive. This pr to keep result same with hive script transform serde. #### Hive Scrip Transform with serde in schemaless ``` hive> create table t (c0 int, c1 int, c2 int); hive> INSERT INTO t VALUES (1, 1, 1); hive> INSERT INTO t VALUES (2, 2, 2); hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t; hive> DESCRIBE v; key string value string hive> SELECT * FROM v; 1 1 1 2 2 2 hive> SELECT key FROM v; 1 2 hive> SELECT value FROM v; 1 1 2 2 ``` #### Spark script transform with hive serde in schema less. ``` hive> create table t (c0 int, c1 int, c2 int); hive> INSERT INTO t VALUES (1, 1, 1); hive> INSERT INTO t VALUES (2, 2, 2); hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t; hive> SELECT * FROM v; 1 1 2 2 ``` **No serde mode in hive (ROW FORMATTED DELIMITED)** ![image](https://user-images.githubusercontent.com/46485123/90088770-55841e00-dd52-11ea-92dd-7fe52d93f0b3.png) ### Why are the changes needed? Keep same behavior with hive script transform ### Does this PR introduce _any_ user-facing change? Before this pr with hive serde script transform ``` select transform(*) USING 'cat' from ( select 1, 2, 3, 4 ) tmp key value 1 2 ``` After ``` select transform(*) USING 'cat' from ( select 1, 2, 3, 4 ) tmp key value 1 2 3 4 ``` ### How was this patch tested? UT Closes #29421 from AngersZhuuuu/SPARK-32388. Authored-by: angerszhu Signed-off-by: HyukjinKwon --- .../BaseScriptTransformationExec.scala | 11 +- .../spark/sql/execution/SparkSqlParser.scala | 4 +- .../BaseScriptTransformationSuite.scala | 40 ++++- .../HiveScriptTransformationSuite.scala | 159 ++++++++++++++++-- 4 files changed, 189 insertions(+), 25 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index c5107645f46f8..74e5aa716ad67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -111,15 +111,14 @@ trait BaseScriptTransformationExec extends UnaryExecNode { .zip(outputFieldWriters) .map { case (data, writer) => writer(data) }) } else { - // In schema less mode, hive default serde will choose first two output column as output - // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException. - // Here we change spark's behavior same as hive's default serde. - // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this - // to keep spark and hive behavior same in SPARK-32388 + // In schema less mode, hive will choose first two output column as output. + // If output column size less then 2, it will return NULL for columns with missing values. + // Here we split row string and choose first 2 values, if values's size less then 2, + // we pad NULL value until 2 to make behavior same with hive. val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => new GenericInternalRow( - prevLine.split(outputRowFormat).slice(0, 2) + prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null) .map(kvWriter)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 0a5f4c3ed4bcb..f46526d419158 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -785,7 +785,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { // Use default (serde) format. val name = conf.getConfString("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - val props = Seq("field.delim" -> "\t") + val props = Seq( + "field.delim" -> "\t", + "serialization.last.column.takes.rest" -> "true") val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) (Nil, Option(name), props, recordHandler) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index c07ea0f12f94e..e6029400997a2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -137,10 +137,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU } } - test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") { - assume(TestUtils.testCommandAvailable("python")) - val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile - + test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") { withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -157,7 +154,24 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.col("c").expr, df.col("d").expr, df.col("e").expr), - script = s"python $scriptFilePath", + script = "cat", + output = Seq( + AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), + child = child, + ioschema = defaultIOSchema.copy(schemaLess = true) + ), + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) + + checkAnswer( + df, + (child: SparkPlan) => createScriptTransformationExec( + input = Seq( + df.col("a").expr, + df.col("b").expr), + script = "cat", output = Seq( AttributeReference("key", StringType)(), AttributeReference("value", StringType)()), @@ -167,6 +181,22 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) + + checkAnswer( + df, + (child: SparkPlan) => createScriptTransformationExec( + input = Seq( + df.col("a").expr), + script = "cat", + output = Seq( + AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), + child = child, + ioschema = defaultIOSchema.copy(schemaLess = true) + ), + df.select( + 'a.cast("string").as("key"), + lit(null)).collect()) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index d247f37130776..a8b10fc94d880 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -156,10 +156,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T assert(uncaughtExceptionHandler.exception.isEmpty) } - test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") { - assume(TestUtils.testCommandAvailable("python")) - val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath - + test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") { withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), @@ -168,21 +165,157 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) df.createTempView("v") - val query = sql( - s""" - |SELECT TRANSFORM(a, b, c, d, e) - |USING 'python ${scriptFilePath}' - |FROM v - """.stripMargin) + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and don't specify serde, + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | USING 'cat' + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + concat_ws("\t", + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and just specify serde, + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) + + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and specify serde with + // 'serialization.last.column.takes.rest=true', + // it will choose take rest columns in second column as output schema + // (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + concat_ws("\t", + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size > 2 and specify serde + // with 'serialization.last.column.takes.rest=false', + // it will choose first two column as output schema (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'false' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'false' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) - // In hive default serde mode, if we don't define output schema, it will choose first - // two column as output schema (key: String, value: String) + // In hive default serde mode, if we don't define output schema, + // when output column size = 2 and specify serde, it will these two column as + // output schema (key: String, value: String) checkAnswer( - query, + sql( + s""" + |SELECT TRANSFORM(a, b) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), identity, df.select( 'a.cast("string").as("key"), 'b.cast("string").as("value")).collect()) + + // In hive default serde mode, if we don't define output schema, + // when output column size < 2 and specify serde, it will return null for deficiency + // output schema (key: String, value: String) + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + | USING 'cat' + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '\t', + | 'serialization.last.column.takes.rest' = 'true' + | ) + |FROM v + """.stripMargin), + identity, + df.select( + 'a.cast("string").as("key"), + lit(null)).collect()) } } From 7cdc921bc07c3d627a8fcbc81cd9c320bda0b873 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 27 Oct 2020 09:52:09 +0900 Subject: [PATCH 0330/1009] [SPARK-32188][PYTHON][DOCS][FOLLOW-UP] Document Column APIs in API reference ### What changes were proposed in this pull request? This PR proposes to document the APIs in `Column` as well in API reference of PySpark documentation. ### Why are the changes needed? To document common APIs in PySpark. ### Does this PR introduce _any_ user-facing change? Yes, `Column.*` will be shown in API reference page. ### How was this patch tested? Manually tested via `cd python` and `make clean html`. Closes #30150 from HyukjinKwon/SPARK-32188. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/docs/source/reference/pyspark.sql.rst | 39 ++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index 0ed2f1b86ada5..f067b5500c1f4 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -217,6 +217,45 @@ DataFrame APIs DataFrameStatFunctions.freqItems DataFrameStatFunctions.sampleBy +Column APIs +----------- + +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Column.alias + Column.asc + Column.asc_nulls_first + Column.asc_nulls_last + Column.astype + Column.between + Column.bitwiseAND + Column.bitwiseOR + Column.bitwiseXOR + Column.cast + Column.contains + Column.desc + Column.desc_nulls_first + Column.desc_nulls_last + Column.dropFields + Column.endswith + Column.eqNullSafe + Column.getField + Column.getItem + Column.isNotNull + Column.isNull + Column.isin + Column.like + Column.name + Column.otherwise + Column.over + Column.rlike + Column.startswith + Column.substr + Column.when + Column.withField Data Types ---------- From 4e6a310f8062102ea6a022fb21171f896c8296ae Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 27 Oct 2020 11:05:53 +0900 Subject: [PATCH 0331/1009] [SPARK-32084][PYTHON][SQL] Expand dictionary functions ### What changes were proposed in this pull request? - [x] Expand dictionary definitions into standalone functions. - [x] Fix annotations for ordering functions. ### Why are the changes needed? To simplify further maintenance of docstrings. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #30143 from zero323/SPARK-32084. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/sql/functions.py | 870 +++++++++++++++------ python/pyspark/sql/functions.pyi | 12 +- python/pyspark/sql/tests/test_functions.py | 10 +- 3 files changed, 626 insertions(+), 266 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 97146fdb804ab..22941ab6f1157 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -24,8 +24,7 @@ from pyspark import since, SparkContext from pyspark.rdd import PythonEvalType -from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal, \ - _create_column_from_name +from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal from pyspark.sql.dataframe import DataFrame from pyspark.sql.types import StringType, DataType # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409 @@ -42,154 +41,457 @@ # since it requires to make every single overridden definition. -def _create_function(name, doc=""): - """Create a PySpark function by its name""" - def _(col): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ +def _get_get_jvm_function(name, sc): + """ + Retrieves JVM function identified by name from + Java gateway associated with sc. + """ + return getattr(sc._jvm.functions, name) -def _create_function_over_column(name, doc=""): - """Similar with `_create_function` but creates a PySpark function that takes a column - (as string as well). This is mainly for PySpark functions to take strings as - column names. +def _invoke_function(name, *args): + """ + Invokes JVM function identified by name with args + and wraps the result with :class:`Column`. """ - def _(col): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.functions, name)(_to_java_column(col)) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ + jf = _get_get_jvm_function(name, SparkContext._active_spark_context) + return Column(jf(*args)) -def _wrap_deprecated_function(func, message): - """ Wrap the deprecated function to print out deprecation warnings""" - def _(col): - warnings.warn(message, DeprecationWarning) - return func(col) - return functools.wraps(func)(_) +def _invoke_function_over_column(name, col): + """ + Invokes unary JVM function identified by name + and wraps the result with :class:`Column`. + """ + return _invoke_function(name, _to_java_column(col)) -def _create_binary_mathfunction(name, doc=""): - """ Create a binary mathfunction by name""" - def _(col1, col2): - sc = SparkContext._active_spark_context +def _invoke_binary_math_function(name, col1, col2): + """ + Invokes binary JVM math function identified by name + and wraps the result with :class:`Column`. + """ + return _invoke_function( + name, # For legacy reasons, the arguments here can be implicitly converted into floats, # if they are not columns or strings. - if isinstance(col1, Column): - arg1 = col1._jc - elif isinstance(col1, str): - arg1 = _create_column_from_name(col1) - else: - arg1 = float(col1) - - if isinstance(col2, Column): - arg2 = col2._jc - elif isinstance(col2, str): - arg2 = _create_column_from_name(col2) - else: - arg2 = float(col2) - - jc = getattr(sc._jvm.functions, name)(arg1, arg2) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ - - -def _create_window_function(name, doc=''): - """ Create a window function by name """ - def _(): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.functions, name)() - return Column(jc) - _.__name__ = name - _.__doc__ = 'Window function: ' + doc - return _ + _to_java_column(col1) if isinstance(col1, (str, Column)) else float(col1), + _to_java_column(col2) if isinstance(col2, (str, Column)) else float(col2) + ) def _options_to_str(options): return {key: to_str(value) for (key, value) in options.items()} -_lit_doc = """ + +@since(1.3) +def lit(col): + """ Creates a :class:`Column` of literal value. >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1) [Row(height=5, spark_user=True)] """ -_functions = { - 'lit': _lit_doc, - 'col': 'Returns a :class:`Column` based on the given column name.', - 'column': 'Returns a :class:`Column` based on the given column name.', - 'asc': 'Returns a sort expression based on the ascending order of the given column name.', - 'desc': 'Returns a sort expression based on the descending order of the given column name.', -} - -_functions_over_column = { - 'sqrt': 'Computes the square root of the specified float value.', - 'abs': 'Computes the absolute value.', - - 'max': 'Aggregate function: returns the maximum value of the expression in a group.', - 'min': 'Aggregate function: returns the minimum value of the expression in a group.', - 'count': 'Aggregate function: returns the number of items in a group.', - 'sum': 'Aggregate function: returns the sum of all values in the expression.', - 'avg': 'Aggregate function: returns the average of the values in a group.', - 'mean': 'Aggregate function: returns the average of the values in a group.', - 'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.', -} - -_functions_1_4_over_column = { - # unary math functions - 'acos': ':return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`', - 'asin': ':return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`', - 'atan': ':return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()`', - 'cbrt': 'Computes the cube-root of the given value.', - 'ceil': 'Computes the ceiling of the given value.', - 'cos': """:param col: angle in radians - :return: cosine of the angle, as if computed by `java.lang.Math.cos()`.""", - 'cosh': """:param col: hyperbolic angle - :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`""", - 'exp': 'Computes the exponential of the given value.', - 'expm1': 'Computes the exponential of the given value minus one.', - 'floor': 'Computes the floor of the given value.', - 'log': 'Computes the natural logarithm of the given value.', - 'log10': 'Computes the logarithm of the given value in Base 10.', - 'log1p': 'Computes the natural logarithm of the given value plus one.', - 'rint': 'Returns the double value that is closest in value to the argument and' + - ' is equal to a mathematical integer.', - 'signum': 'Computes the signum of the given value.', - 'sin': """:param col: angle in radians - :return: sine of the angle, as if computed by `java.lang.Math.sin()`""", - 'sinh': """:param col: hyperbolic angle - :return: hyperbolic sine of the given value, - as if computed by `java.lang.Math.sinh()`""", - 'tan': """:param col: angle in radians - :return: tangent of the given value, as if computed by `java.lang.Math.tan()`""", - 'tanh': """:param col: hyperbolic angle - :return: hyperbolic tangent of the given value, - as if computed by `java.lang.Math.tanh()`""", - 'toDegrees': '.. note:: Deprecated in 2.1, use :func:`degrees` instead.', - 'toRadians': '.. note:: Deprecated in 2.1, use :func:`radians` instead.', - 'bitwiseNOT': 'Computes bitwise not.', -} - -_functions_2_4 = { - 'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given' + - ' column name, and null values return before non-null values.', - 'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given' + - ' column name, and null values appear after non-null values.', - 'desc_nulls_first': 'Returns a sort expression based on the descending order of the given' + - ' column name, and null values appear before non-null values.', - 'desc_nulls_last': 'Returns a sort expression based on the descending order of the given' + - ' column name, and null values appear after non-null values', -} - -_collect_list_doc = """ + return col if isinstance(col, Column) else _invoke_function("lit", col) + + +@since(1.3) +def col(col): + """ + Returns a :class:`Column` based on the given column name.' + """ + return _invoke_function("col", col) + + +@since(1.3) +def column(col): + """ + Returns a :class:`Column` based on the given column name.' + """ + return col(col) + + +@since(1.3) +def asc(col): + """ + Returns a sort expression based on the ascending order of the given column name. + """ + return _invoke_function("asc", col) + + +@since(1.3) +def desc(col): + """ + Returns a sort expression based on the descending order of the given column name. + """ + return _invoke_function("desc", col) + + +@since(1.3) +def sqrt(col): + """ + Computes the square root of the specified float value. + """ + return _invoke_function_over_column("sqrt", col) + + +@since(1.3) +def abs(col): + """ + Computes the absolute value. + """ + return _invoke_function_over_column("abs", col) + + +@since(1.3) +def max(col): + """ + Aggregate function: returns the maximum value of the expression in a group. + """ + return _invoke_function_over_column("max", col) + + +@since(1.3) +def min(col): + """ + Aggregate function: returns the minimum value of the expression in a group. + """ + return _invoke_function_over_column("min", col) + + +@since(1.3) +def count(col): + """ + Aggregate function: returns the number of items in a group. + """ + return _invoke_function_over_column("count", col) + + +@since(1.3) +def sum(col): + """ + Aggregate function: returns the sum of all values in the expression. + """ + return _invoke_function_over_column("sum", col) + + +@since(1.3) +def avg(col): + """ + Aggregate function: returns the average of the values in a group. + """ + return _invoke_function_over_column("avg", col) + + +@since(1.3) +def mean(col): + """ + Aggregate function: returns the average of the values in a group. + """ + return _invoke_function_over_column("mean", col) + + +@since(1.3) +def sumDistinct(col): + """ + Aggregate function: returns the sum of distinct values in the expression. + """ + return _invoke_function_over_column("sumDistinct", col) + + +@since(1.4) +def acos(col): + """ + :return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()` + """ + return _invoke_function_over_column("acos", col) + + +@since(1.4) +def asin(col): + """ + :return: inverse sine of `col`, as if computed by `java.lang.Math.asin()` + """ + return _invoke_function_over_column("asin", col) + + +@since(1.4) +def atan(col): + """ + :return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()` + """ + return _invoke_function_over_column("atan", col) + + +@since(1.4) +def cbrt(col): + """ + Computes the cube-root of the given value. + """ + return _invoke_function_over_column("cbrt", col) + + +@since(1.4) +def ceil(col): + """ + Computes the ceiling of the given value. + """ + return _invoke_function_over_column("ceil", col) + + +@since(1.4) +def cos(col): + """ + :param col: angle in radians + :return: cosine of the angle, as if computed by `java.lang.Math.cos()`. + """ + return _invoke_function_over_column("cos", col) + + +@since(1.4) +def cosh(col): + """ + :param col: hyperbolic angle + :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()` + """ + return _invoke_function_over_column("cosh", col) + + +@since(1.4) +def exp(col): + """ + Computes the exponential of the given value. + """ + return _invoke_function_over_column("exp", col) + + +@since(1.4) +def expm1(col): + """ + Computes the exponential of the given value minus one. + """ + return _invoke_function_over_column("expm1", col) + + +@since(1.4) +def floor(col): + """ + Computes the floor of the given value. + """ + return _invoke_function_over_column("floor", col) + + +@since(1.4) +def log(col): + """ + Computes the natural logarithm of the given value. + """ + return _invoke_function_over_column("log", col) + + +@since(1.4) +def log10(col): + """ + Computes the logarithm of the given value in Base 10. + """ + return _invoke_function_over_column("log10", col) + + +@since(1.4) +def log1p(col): + """ + Computes the natural logarithm of the given value plus one. + """ + return _invoke_function_over_column("log1p", col) + + +@since(1.4) +def rint(col): + """ + Returns the double value that is closest in value to the argument and + is equal to a mathematical integer. + """ + return _invoke_function_over_column("rint", col) + + +@since(1.4) +def signum(col): + """ + Computes the signum of the given value. + """ + return _invoke_function_over_column("signum", col) + + +@since(1.4) +def sin(col): + """ + :param col: angle in radians + :return: sine of the angle, as if computed by `java.lang.Math.sin()` + """ + return _invoke_function_over_column("sin", col) + + +@since(1.4) +def sinh(col): + """ + :param col: hyperbolic angle + :return: hyperbolic sine of the given value, + as if computed by `java.lang.Math.sinh()` + """ + return _invoke_function_over_column("sinh", col) + + +@since(1.4) +def tan(col): + """ + :param col: angle in radians + :return: tangent of the given value, as if computed by `java.lang.Math.tan()` + """ + return _invoke_function_over_column("tan", col) + + +@since(1.4) +def tanh(col): + """ + :param col: hyperbolic angle + :return: hyperbolic tangent of the given value + as if computed by `java.lang.Math.tanh()` + """ + return _invoke_function_over_column("tanh", col) + + +@since(1.4) +def toDegrees(col): + """ + .. note:: Deprecated in 2.1, use :func:`degrees` instead. + """ + warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning) + return degrees(col) + + +@since(1.4) +def toRadians(col): + """ + .. note:: Deprecated in 2.1, use :func:`radians` instead. + """ + warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning) + return radians(col) + + +@since(1.4) +def bitwiseNOT(col): + """ + Computes bitwise not. + """ + return _invoke_function_over_column("bitwiseNOT", col) + + +@since(2.4) +def asc_nulls_first(col): + """ + Returns a sort expression based on the ascending order of the given + column name, and null values return before non-null values. + """ + return _invoke_function("asc_nulls_first", col) + + +@since(2.4) +def asc_nulls_last(col): + """ + Returns a sort expression based on the ascending order of the given + column name, and null values appear after non-null values. + """ + return _invoke_function("asc_nulls_last", col) + + +@since(2.4) +def desc_nulls_first(col): + """ + Returns a sort expression based on the descending order of the given + column name, and null values appear before non-null values. + """ + return _invoke_function("desc_nulls_first", col) + + +@since(2.4) +def desc_nulls_last(col): + """ + Returns a sort expression based on the descending order of the given + column name, and null values appear after non-null values. + """ + return _invoke_function("desc_nulls_last", col) + + +@since(1.6) +def stddev(col): + """ + Aggregate function: alias for stddev_samp. + """ + return _invoke_function_over_column("stddev", col) + + +@since(1.6) +def stddev_samp(col): + """ + Aggregate function: returns the unbiased sample standard deviation of + the expression in a group. + """ + return _invoke_function_over_column("stddev_samp", col) + + +@since(1.6) +def stddev_pop(col): + """ + Aggregate function: returns population standard deviation of + the expression in a group. + """ + return _invoke_function_over_column("stddev_pop", col) + + +@since(1.6) +def variance(col): + """ + Aggregate function: alias for var_samp + """ + return _invoke_function_over_column("variance", col) + + +@since(1.6) +def var_samp(col): + """ + Aggregate function: returns the unbiased sample variance of + the values in a group. + """ + return _invoke_function_over_column("var_samp", col) + + +@since(1.6) +def var_pop(col): + """ + Aggregate function: returns the population variance of the values in a group. + """ + return _invoke_function_over_column("var_pop", col) + + +@since(1.6) +def skewness(col): + """ + Aggregate function: returns the skewness of the values in a group. + """ + return _invoke_function_over_column("skewness", col) + + +@since(1.6) +def kurtosis(col): + """ + Aggregate function: returns the kurtosis of the values in a group. + """ + return _invoke_function_over_column("kurtosis", col) + + +@since(1.6) +def collect_list(col): + """ Aggregate function: returns a list of objects with duplicates. .. note:: The function is non-deterministic because the order of collected results depends @@ -199,7 +501,12 @@ def _options_to_str(options): >>> df2.agg(collect_list('age')).collect() [Row(collect_list(age)=[2, 5, 5])] """ -_collect_set_doc = """ + return _invoke_function_over_column("collect_list", col) + + +@since(1.6) +def collect_set(col): + """ Aggregate function: returns a set of objects with duplicate elements eliminated. .. note:: The function is non-deterministic because the order of collected results depends @@ -209,111 +516,118 @@ def _options_to_str(options): >>> df2.agg(collect_set('age')).collect() [Row(collect_set(age)=[5, 2])] """ -_functions_1_6_over_column = { - # unary math functions - 'stddev': 'Aggregate function: alias for stddev_samp.', - 'stddev_samp': 'Aggregate function: returns the unbiased sample standard deviation of' + - ' the expression in a group.', - 'stddev_pop': 'Aggregate function: returns population standard deviation of' + - ' the expression in a group.', - 'variance': 'Aggregate function: alias for var_samp.', - 'var_samp': 'Aggregate function: returns the unbiased sample variance of' + - ' the values in a group.', - 'var_pop': 'Aggregate function: returns the population variance of the values in a group.', - 'skewness': 'Aggregate function: returns the skewness of the values in a group.', - 'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.', - 'collect_list': _collect_list_doc, - 'collect_set': _collect_set_doc -} - -_functions_2_1_over_column = { - # unary math functions - 'degrees': """ - Converts an angle measured in radians to an approximately equivalent angle - measured in degrees. - - :param col: angle in radians - :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()` - """, - 'radians': """ - Converts an angle measured in degrees to an approximately equivalent angle - measured in radians. - - :param col: angle in degrees - :return: angle in radians, as if computed by `java.lang.Math.toRadians()` - """, -} - -# math functions that take two arguments as input -_binary_mathfunctions = { - 'atan2': """ - :param col1: coordinate on y-axis - :param col2: coordinate on x-axis - :return: the `theta` component of the point - (`r`, `theta`) - in polar coordinates that corresponds to the point - (`x`, `y`) in Cartesian coordinates, - as if computed by `java.lang.Math.atan2()` - """, - 'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.', - 'pow': 'Returns the value of the first argument raised to the power of the second argument.', -} - -_window_functions = { - 'row_number': - """returns a sequential number starting at 1 within a window partition.""", - 'dense_rank': - """returns the rank of rows within a window partition, without any gaps. - - The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking - sequence when there are ties. That is, if you were ranking a competition using dense_rank - and had three people tie for second place, you would say that all three were in second - place and that the next person came in third. Rank would give me sequential numbers, making - the person that came in third place (after the ties) would register as coming in fifth. - - This is equivalent to the DENSE_RANK function in SQL.""", - 'rank': - """returns the rank of rows within a window partition. - - The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking - sequence when there are ties. That is, if you were ranking a competition using dense_rank - and had three people tie for second place, you would say that all three were in second - place and that the next person came in third. Rank would give me sequential numbers, making - the person that came in third place (after the ties) would register as coming in fifth. - - This is equivalent to the RANK function in SQL.""", - 'cume_dist': - """returns the cumulative distribution of values within a window partition, - i.e. the fraction of rows that are below the current row.""", - 'percent_rank': - """returns the relative rank (i.e. percentile) of rows within a window partition.""", -} - -# Wraps deprecated functions (keys) with the messages (values). -_functions_deprecated = { - 'toDegrees': 'Deprecated in 2.1, use degrees instead.', - 'toRadians': 'Deprecated in 2.1, use radians instead.', -} - -for _name, _doc in _functions.items(): - globals()[_name] = since(1.3)(_create_function(_name, _doc)) -for _name, _doc in _functions_over_column.items(): - globals()[_name] = since(1.3)(_create_function_over_column(_name, _doc)) -for _name, _doc in _functions_1_4_over_column.items(): - globals()[_name] = since(1.4)(_create_function_over_column(_name, _doc)) -for _name, _doc in _binary_mathfunctions.items(): - globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc)) -for _name, _doc in _window_functions.items(): - globals()[_name] = since(1.6)(_create_window_function(_name, _doc)) -for _name, _doc in _functions_1_6_over_column.items(): - globals()[_name] = since(1.6)(_create_function_over_column(_name, _doc)) -for _name, _doc in _functions_2_1_over_column.items(): - globals()[_name] = since(2.1)(_create_function_over_column(_name, _doc)) -for _name, _message in _functions_deprecated.items(): - globals()[_name] = _wrap_deprecated_function(globals()[_name], _message) -for _name, _doc in _functions_2_4.items(): - globals()[_name] = since(2.4)(_create_function(_name, _doc)) -del _name, _doc + return _invoke_function_over_column("collect_set", col) + + +@since(2.1) +def degrees(col): + """ + Converts an angle measured in radians to an approximately equivalent angle + measured in degrees. + + :param col: angle in radians + :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()` + """ + return _invoke_function_over_column("degrees", col) + + +@since(2.1) +def radians(col): + """ + Converts an angle measured in degrees to an approximately equivalent angle + measured in radians. + + :param col: angle in degrees + :return: angle in radians, as if computed by `java.lang.Math.toRadians()` + """ + return _invoke_function_over_column("radians", col) + + +@since(1.4) +def atan2(col1, col2): + """ + :param col1: coordinate on y-axis + :param col2: coordinate on x-axis + :return: the `theta` component of the point + (`r`, `theta`) + in polar coordinates that corresponds to the point + (`x`, `y`) in Cartesian coordinates, + as if computed by `java.lang.Math.atan2()` + """ + return _invoke_binary_math_function("atan2", col1, col2) + + +@since(1.4) +def hypot(col1, col2): + """ + Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow. + """ + return _invoke_binary_math_function("hypot", col1, col2) + + +@since(1.4) +def pow(col1, col2): + """ + Returns the value of the first argument raised to the power of the second argument. + """ + return _invoke_binary_math_function("pow", col1, col2) + + +@since(1.6) +def row_number(): + """ + Window function: returns a sequential number starting at 1 within a window partition. + """ + return _invoke_function("row_number") + + +@since(1.6) +def dense_rank(): + """ + Window function: returns the rank of rows within a window partition, without any gaps. + + The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking + sequence when there are ties. That is, if you were ranking a competition using dense_rank + and had three people tie for second place, you would say that all three were in second + place and that the next person came in third. Rank would give me sequential numbers, making + the person that came in third place (after the ties) would register as coming in fifth. + + This is equivalent to the DENSE_RANK function in SQL. + """ + return _invoke_function("dense_rank") + + +@since(1.6) +def rank(): + """ + Window function: returns the rank of rows within a window partition. + + The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking + sequence when there are ties. That is, if you were ranking a competition using dense_rank + and had three people tie for second place, you would say that all three were in second + place and that the next person came in third. Rank would give me sequential numbers, making + the person that came in third place (after the ties) would register as coming in fifth. + + This is equivalent to the RANK function in SQL. + """ + return _invoke_function("rank") + + +@since(1.6) +def cume_dist(): + """ + Window function: returns the cumulative distribution of values within a window partition, + i.e. the fraction of rows that are below the current row. + """ + return _invoke_function("cume_dist") + + +@since(1.6) +def percent_rank(): + """ + Window function: returns the relative rank (i.e. percentile) of rows within a window partition. + """ + return _invoke_function("percent_rank") @since(1.3) @@ -1645,21 +1959,68 @@ def raise_error(errMsg): # ---------------------- String/Binary functions ------------------------------ -_string_functions = { - 'upper': 'Converts a string expression to upper case.', - 'lower': 'Converts a string expression to lower case.', - 'ascii': 'Computes the numeric value of the first character of the string column.', - 'base64': 'Computes the BASE64 encoding of a binary column and returns it as a string column.', - 'unbase64': 'Decodes a BASE64 encoded string column and returns it as a binary column.', - 'ltrim': 'Trim the spaces from left end for the specified string value.', - 'rtrim': 'Trim the spaces from right end for the specified string value.', - 'trim': 'Trim the spaces from both ends for the specified string column.', -} +@since(1.5) +def upper(col): + """ + Converts a string expression to upper case. + """ + return _invoke_function_over_column("upper", col) + + +@since(1.5) +def lower(col): + """ + Converts a string expression to lower case. + """ + return _invoke_function_over_column("lower", col) + +@since(1.5) +def ascii(col): + """ + Computes the numeric value of the first character of the string column. + """ + return _invoke_function_over_column("ascii", col) -for _name, _doc in _string_functions.items(): - globals()[_name] = since(1.5)(_create_function_over_column(_name, _doc)) -del _name, _doc + +@since(1.5) +def base64(col): + """ + Computes the BASE64 encoding of a binary column and returns it as a string column. + """ + return _invoke_function_over_column("base64", col) + + +@since(1.5) +def unbase64(col): + """ + Decodes a BASE64 encoded string column and returns it as a binary column. + """ + return _invoke_function_over_column("unbase64", col) + + +@since(1.5) +def ltrim(col): + """ + Trim the spaces from left end for the specified string value. + """ + return _invoke_function_over_column("ltrim", col) + + +@since(1.5) +def rtrim(col): + """ + Trim the spaces from right end for the specified string value. + """ + return _invoke_function_over_column("rtrim", col) + + +@since(1.5) +def trim(col): + """ + Trim the spaces from both ends for the specified string column. + """ + return _invoke_function_over_column("trim", col) @since(1.5) @@ -2231,7 +2592,7 @@ def element_at(col, extraction): """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.element_at( - _to_java_column(col), lit(extraction)._jc)) # noqa: F821 'lit' is dynamically defined. + _to_java_column(col), lit(extraction)._jc)) @since(2.4) @@ -3607,13 +3968,6 @@ def udf(f=None, returnType=StringType()): evalType=PythonEvalType.SQL_BATCHED_UDF) -ignored_fns = ['map', 'since'] -__all__ = [k for k, v in globals().items() - if not k.startswith('_') and k[0].islower() and callable(v) and k not in ignored_fns] -__all__ += ["PandasUDFType"] -__all__.sort() - - def _test(): import doctest from pyspark.sql import Row, SparkSession diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 779a29c086d5a..1d048efcc3ca5 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -258,9 +258,9 @@ def map_zip_with( ) -> Column: ... def abs(col: ColumnOrName) -> Column: ... def acos(col: ColumnOrName) -> Column: ... -def asc(col: ColumnOrName) -> Column: ... -def asc_nulls_first(col: ColumnOrName) -> Column: ... -def asc_nulls_last(col: ColumnOrName) -> Column: ... +def asc(col: str) -> Column: ... +def asc_nulls_first(col: str) -> Column: ... +def asc_nulls_last(col: str) -> Column: ... def ascii(col: ColumnOrName) -> Column: ... def asin(col: ColumnOrName) -> Column: ... def atan(col: ColumnOrName) -> Column: ... @@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ... def cume_dist() -> Column: ... def degrees(col: ColumnOrName) -> Column: ... def dense_rank() -> Column: ... -def desc(col: ColumnOrName) -> Column: ... -def desc_nulls_first(col: ColumnOrName) -> Column: ... -def desc_nulls_last(col: ColumnOrName) -> Column: ... +def desc(col: str) -> Column: ... +def desc_nulls_first(col: str) -> Column: ... +def desc_nulls_last(col: str) -> Column: ... def exp(col: ColumnOrName) -> Column: ... def expm1(col: ColumnOrName) -> Column: ... def floor(col: ColumnOrName) -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 26d260fe77b0c..cc77b8d5dfe3e 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -161,14 +161,20 @@ def test_rand_functions(self): def test_string_functions(self): from pyspark.sql import functions - from pyspark.sql.functions import col, lit, _string_functions + from pyspark.sql.functions import col, lit + string_functions = [ + "upper", "lower", "ascii", + "base64", "unbase64", + "ltrim", "rtrim", "trim" + ] + df = self.spark.createDataFrame([['nick']], schema=['name']) self.assertRaisesRegexp( TypeError, "must be the same type", lambda: df.select(col('name').substr(0, lit(1)))) - for name in _string_functions.keys(): + for name in string_functions: self.assertEqual( df.select(getattr(functions, name)("name")).first()[0], df.select(getattr(functions, name)(col("name"))).first()[0]) From 9818f079aa00a390c1cbd267022f42e05db6d67b Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 27 Oct 2020 14:03:57 +0900 Subject: [PATCH 0332/1009] [SPARK-33243][PYTHON][BUILD] Add numpydoc into documentation dependency ### What changes were proposed in this pull request? This PR proposes to initiate the migration to NumPy documentation style (from reST style) in PySpark docstrings. This PR also adds one migration example of `SparkContext`. - **Before:** ... ![Screen Shot 2020-10-26 at 7 02 05 PM](https://user-images.githubusercontent.com/6477701/97161090-a8ea0200-17c0-11eb-8204-0e70d18fc571.png) ... ![Screen Shot 2020-10-26 at 7 02 09 PM](https://user-images.githubusercontent.com/6477701/97161100-aab3c580-17c0-11eb-92ad-f5ad4441ce16.png) ... - **After:** ... ![Screen Shot 2020-10-26 at 7 24 08 PM](https://user-images.githubusercontent.com/6477701/97161219-d636b000-17c0-11eb-80ab-d17a570ecb4b.png) ... See also https://numpydoc.readthedocs.io/en/latest/format.html ### Why are the changes needed? There are many reasons for switching to NumPy documentation style. 1. Arguably reST style doesn't fit well when the docstring grows large because it provides (arguably) less structures and syntax. 2. NumPy documentation style provides a better human readable docstring format. For example, notebook users often just do `help(...)` by `pydoc`. 3. NumPy documentation style is pretty commonly used in data science libraries, for example, pandas, numpy, Dask, Koalas, matplotlib, ... Using NumPy documentation style can give users a consistent documentation style. ### Does this PR introduce _any_ user-facing change? The dependency itself doesn't change anything user-facing. The documentation change in `SparkContext` does, as shown above. ### How was this patch tested? Manually tested via running `cd python` and `make clean html`. Closes #30149 from HyukjinKwon/SPARK-33243. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 4 +- dev/create-release/spark-rm/Dockerfile | 2 +- dev/lint-python | 9 ++ dev/requirements.txt | 1 + docs/README.md | 2 +- .../source/_templates/autosummary/class.rst | 38 ++++++++ .../{ => autosummary}/class_with_docs.rst | 0 python/docs/source/conf.py | 5 +- python/docs/source/reference/pyspark.ml.rst | 28 +++--- .../docs/source/reference/pyspark.mllib.rst | 26 +++--- python/docs/source/reference/pyspark.sql.rst | 2 +- python/pyspark/context.py | 93 +++++++++++-------- 12 files changed, 137 insertions(+), 73 deletions(-) create mode 100644 python/docs/source/_templates/autosummary/class.rst rename python/docs/source/_templates/{ => autosummary}/class_with_docs.rst (100%) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5b06485b9959e..55c578e15724a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -332,7 +332,7 @@ jobs: run: | # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy + pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc - name: Install R 4.0 uses: r-lib/actions/setup-r@v1 with: @@ -353,7 +353,7 @@ jobs: sudo apt-get install -y libcurl4-openssl-dev pandoc # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx + pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc gem install jekyll jekyll-redirect-from rouge sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" - name: Scala linter diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 4e007a5eeb93a..6b32f10490719 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -36,7 +36,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y" # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. # We should use the latest Sphinx version once this is fixed. -ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1" +ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1 numpydoc==1.1.0" ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0" # Install extra needed repos and refresh. diff --git a/dev/lint-python b/dev/lint-python index 62664818dc106..2c244e0c0b297 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -126,6 +126,7 @@ function mypy_test { local MYPY_REPORT= local MYPY_STATUS= + # TODO(SPARK-32797): Install mypy on the Jenkins CI workers if ! hash "$MYPY_BUILD" 2> /dev/null; then echo "The $MYPY_BUILD command was not found. Skipping for now." return @@ -236,6 +237,14 @@ function sphinx_test { return fi + # TODO(SPARK-33242): Install numpydoc in Jenkins machines + PYTHON_HAS_NUMPYDOC=$("$PYTHON_EXECUTABLE" -c 'import importlib.util; print(importlib.util.find_spec("numpydoc") is not None)') + if [[ "$PYTHON_HAS_NUMPYDOC" == "False" ]]; then + echo "$PYTHON_EXECUTABLE does not have numpydoc installed. Skipping Sphinx build for now." + echo + return + fi + echo "starting $SPHINX_BUILD tests..." pushd python/docs &> /dev/null make clean &> /dev/null diff --git a/dev/requirements.txt b/dev/requirements.txt index b11f24fdbd4b2..c1546c8b8d4d3 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -6,3 +6,4 @@ sphinx pydata_sphinx_theme ipython nbsphinx +numpydoc diff --git a/docs/README.md b/docs/README.md index 09982c1301163..af51dca6180a9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -63,7 +63,7 @@ See also https://github.com/sphinx-doc/sphinx/issues/7551. --> ```sh -$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx +$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc ``` ## Generating the Documentation HTML diff --git a/python/docs/source/_templates/autosummary/class.rst b/python/docs/source/_templates/autosummary/class.rst new file mode 100644 index 0000000000000..d794f797ee2ad --- /dev/null +++ b/python/docs/source/_templates/autosummary/class.rst @@ -0,0 +1,38 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. Workaround to avoud documenting __init__. + +{% extends "!autosummary/class.rst" %} + +{% if '__init__' in methods %} +{% set caught_result = methods.remove('__init__') %} +{% endif %} + +{% block methods %} +{% if methods %} + + .. rubric:: Methods + + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + +{% endif %} +{% endblock %} + diff --git a/python/docs/source/_templates/class_with_docs.rst b/python/docs/source/_templates/autosummary/class_with_docs.rst similarity index 100% rename from python/docs/source/_templates/class_with_docs.rst rename to python/docs/source/_templates/autosummary/class_with_docs.rst diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py index 9d87bbe27df2a..a1bcd3d502a97 100644 --- a/python/docs/source/conf.py +++ b/python/docs/source/conf.py @@ -47,9 +47,12 @@ 'sphinx.ext.autosummary', 'nbsphinx', # Converts Jupyter Notebook to reStructuredText files for Sphinx. # For ipython directive in reStructuredText files. It is generated by the notebook. - 'IPython.sphinxext.ipython_console_highlighting' + 'IPython.sphinxext.ipython_console_highlighting', + 'numpydoc', # handle NumPy documentation formatted docstrings. ] +numpydoc_show_class_members = False + # Links used globally in the RST files. # These are defined here to allow link substitutions dynamically. rst_epilog = """ diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst index 00d0e44e92715..5fafe5899f20b 100644 --- a/python/docs/source/reference/pyspark.ml.rst +++ b/python/docs/source/reference/pyspark.ml.rst @@ -25,7 +25,7 @@ ML Pipeline APIs .. currentmodule:: pyspark.ml .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Transformer @@ -44,7 +44,7 @@ Parameters .. currentmodule:: pyspark.ml.param .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Param @@ -58,7 +58,7 @@ Feature .. currentmodule:: pyspark.ml.feature .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ANOVASelector @@ -126,7 +126,7 @@ Classification .. currentmodule:: pyspark.ml.classification .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ LinearSVC @@ -169,7 +169,7 @@ Clustering .. currentmodule:: pyspark.ml.clustering .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ BisectingKMeans @@ -205,7 +205,7 @@ Vector and Matrix .. currentmodule:: pyspark.ml.linalg .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Vector @@ -224,7 +224,7 @@ Recommendation .. currentmodule:: pyspark.ml.recommendation .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ALS @@ -237,7 +237,7 @@ Regression .. currentmodule:: pyspark.ml.regression .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ AFTSurvivalRegression @@ -268,7 +268,7 @@ Statistics .. currentmodule:: pyspark.ml.stat .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ANOVATest @@ -287,7 +287,7 @@ Tuning .. currentmodule:: pyspark.ml.tuning .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ParamGridBuilder @@ -303,7 +303,7 @@ Evaluation .. currentmodule:: pyspark.ml.evaluation .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Evaluator @@ -321,7 +321,7 @@ Frequency Pattern Mining .. currentmodule:: pyspark.ml.fpm .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ FPGrowth @@ -335,7 +335,7 @@ Image .. currentmodule:: pyspark.ml.image .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ImageSchema @@ -348,7 +348,7 @@ Utilities .. currentmodule:: pyspark.ml.util .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ BaseReadWrite diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst index 1251b1df752c7..acc834c065ac3 100644 --- a/python/docs/source/reference/pyspark.mllib.rst +++ b/python/docs/source/reference/pyspark.mllib.rst @@ -25,7 +25,7 @@ Classification .. currentmodule:: pyspark.mllib.classification .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ LogisticRegressionModel @@ -44,7 +44,7 @@ Clustering .. currentmodule:: pyspark.mllib.clustering .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ @@ -68,7 +68,7 @@ Evaluation .. currentmodule:: pyspark.mllib.evaluation .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ BinaryClassificationMetrics @@ -83,7 +83,7 @@ Feature .. currentmodule:: pyspark.mllib.feature .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Normalizer @@ -105,7 +105,7 @@ Frequency Pattern Mining .. currentmodule:: pyspark.mllib.fpm .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ FPGrowth @@ -120,7 +120,7 @@ Vector and Matrix .. currentmodule:: pyspark.mllib.linalg .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Vector @@ -140,7 +140,7 @@ Distributed Representation .. currentmodule:: pyspark.mllib.linalg.distributed .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ BlockMatrix @@ -159,7 +159,7 @@ Random .. currentmodule:: pyspark.mllib.random .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ RandomRDDs @@ -171,7 +171,7 @@ Recommendation .. currentmodule:: pyspark.mllib.recommendation .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ MatrixFactorizationModel @@ -185,7 +185,7 @@ Regression .. currentmodule:: pyspark.mllib.regression .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ LabeledPoint @@ -208,7 +208,7 @@ Statistics .. currentmodule:: pyspark.mllib.stat .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ Statistics @@ -224,7 +224,7 @@ Tree .. currentmodule:: pyspark.mllib.tree .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ DecisionTreeModel @@ -241,7 +241,7 @@ Utilities .. currentmodule:: pyspark.mllib.util .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ JavaLoader diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index f067b5500c1f4..3f903fe8c7acd 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -263,7 +263,7 @@ Data Types .. currentmodule:: pyspark.sql.types .. autosummary:: - :template: class_with_docs.rst + :template: autosummary/class_with_docs.rst :toctree: api/ ArrayType diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 4213a742a1dc4..3f1643e2d21ac 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -63,12 +63,59 @@ class SparkContext(object): connection to a Spark cluster, and can be used to create :class:`RDD` and broadcast variables on that cluster. - .. note:: Only one :class:`SparkContext` should be active per JVM. You must `stop()` - the active :class:`SparkContext` before creating a new one. - - .. note:: :class:`SparkContext` instance is not supported to share across multiple - processes out of the box, and PySpark does not guarantee multi-processing execution. - Use threads instead for concurrent processing purpose. + When you create a new SparkContext, at least the master and app name should + be set, either through the named parameters here or through `conf`. + + Parameters + ---------- + master : str, optional + Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]). + appName : str, optional + A name for your job, to display on the cluster web UI. + sparkHome : str, optional + Location where Spark is installed on cluster nodes. + pyFiles : str, optional + Collection of .zip or .py files to send to the cluster + and add to PYTHONPATH. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs. + environment : dict, optional + A dictionary of environment variables to set on + worker nodes. + batchSize : int, optional + The number of Python objects represented as a single + Java object. Set 1 to disable batching, 0 to automatically choose + the batch size based on object sizes, or -1 to use an unlimited + batch size + serializer : :class:`pyspark.serializers.Serializer`, optional + The serializer for RDDs. + conf : dict, optional + A :class:`SparkConf` object setting Spark properties. + gateway : optional + Use an existing gateway and JVM, otherwise a new JVM + will be instantiated. This is only used internally. + jsc : optional + The JavaSparkContext instance. This is only used internally. + profiler_cls : :class:`pyspark.profiler.Profiler`, optional + A class of custom Profiler used to do profiling + (default is :class:`pyspark.profiler.BasicProfiler`). + + Notes + ----- + Only one :class:`SparkContext` should be active per JVM. You must `stop()` + the active :class:`SparkContext` before creating a new one. + + :class:`SparkContext` instance is not supported to share across multiple + processes out of the box, and PySpark does not guarantee multi-processing execution. + Use threads instead for concurrent processing purpose. + + Examples + -------- + >>> from pyspark.context import SparkContext + >>> sc = SparkContext('local', 'test') + >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError:... """ _gateway = None @@ -83,40 +130,6 @@ class SparkContext(object): def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None, environment=None, batchSize=0, serializer=PickleSerializer(), conf=None, gateway=None, jsc=None, profiler_cls=BasicProfiler): - """ - Create a new SparkContext. At least the master and app name should be set, - either through the named parameters here or through `conf`. - - :param master: Cluster URL to connect to - (e.g. mesos://host:port, spark://host:port, local[4]). - :param appName: A name for your job, to display on the cluster web UI. - :param sparkHome: Location where Spark is installed on cluster nodes. - :param pyFiles: Collection of .zip or .py files to send to the cluster - and add to PYTHONPATH. These can be paths on the local file - system or HDFS, HTTP, HTTPS, or FTP URLs. - :param environment: A dictionary of environment variables to set on - worker nodes. - :param batchSize: The number of Python objects represented as a single - Java object. Set 1 to disable batching, 0 to automatically choose - the batch size based on object sizes, or -1 to use an unlimited - batch size - :param serializer: The serializer for RDDs. - :param conf: A :class:`SparkConf` object setting Spark properties. - :param gateway: Use an existing gateway and JVM, otherwise a new JVM - will be instantiated. - :param jsc: The JavaSparkContext instance (optional). - :param profiler_cls: A class of custom Profiler used to do profiling - (default is pyspark.profiler.BasicProfiler). - - - >>> from pyspark.context import SparkContext - >>> sc = SparkContext('local', 'test') - - >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ... - ValueError:... - """ if (conf is None or conf.get("spark.executor.allowSparkContext", "false").lower() != "true"): # In order to prevent SparkContext from being created in executors. From 4b0e23e646b579b852056ffc87164b16adef5a09 Mon Sep 17 00:00:00 2001 From: Baohe Zhang Date: Tue, 27 Oct 2020 14:28:20 +0900 Subject: [PATCH 0333/1009] [SPARK-33215][WEBUI] Speed up event log download by skipping UI rebuild ### What changes were proposed in this pull request? This patch separates the view permission checks from the getAppUi in FsHistoryServerProvider, thus enabling SHS to do view permissions check of a given attempt for a given user without rebuilding the UI. This is achieved by adding a method "checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean" to many layers of history server components. Currently, this feature is useful for event log download. ### Why are the changes needed? Right now, when we want to download the event logs from the spark history server, SHS will need to parse entire the event log to rebuild UI, and this is just for view permission checks. UI rebuilding is a time-consuming and memory-intensive task, especially for large logs. However, this process is unnecessary for event log download. With this patch, UI rebuild can be skipped when downloading event logs from the history server. Thus the time of downloading a GB scale event log can be reduced from several minutes to several seconds, and the memory consumption of UI rebuilding can be avoided. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added test cases to confirm the view permission checks work properly and download event logs won't trigger UI loading. Also did some manual tests to verify the download speed can be drastically improved and the authentication works properly. Closes #30126 from baohe-zhang/bypass_ui_rebuild_for_log_download. Authored-by: Baohe Zhang Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../history/ApplicationHistoryProvider.scala | 7 +++ .../deploy/history/FsHistoryProvider.scala | 34 ++++++++---- .../spark/deploy/history/HistoryServer.scala | 5 ++ .../spark/status/api/v1/ApiRootResource.scala | 15 ++++++ .../api/v1/OneApplicationResource.scala | 9 ++-- .../scala/org/apache/spark/ui/SparkUI.scala | 5 ++ .../history/FsHistoryProviderSuite.scala | 54 ++++++++++++++++++- .../deploy/history/HistoryServerSuite.scala | 18 +++++++ 8 files changed, 132 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala index 472b52957ed7f..f3f7db6bb0aba 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala @@ -150,4 +150,11 @@ private[history] abstract class ApplicationHistoryProvider { */ def onUIDetached(appId: String, attemptId: Option[String], ui: SparkUI): Unit = { } + /** + * Returns true if the given user has permission to view the UI of the given attempt. + * + * @throws NoSuchElementException if the given attempt doesn't exist + */ + def checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean + } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 168bd1e68a304..400c82c1f9e63 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -359,15 +359,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) } val conf = this.conf.clone() - val secManager = new SecurityManager(conf) - - secManager.setAcls(historyUiAclsEnable) - // make sure to set admin acls before view acls so they are properly picked up - secManager.setAdminAcls(historyUiAdminAcls ++ stringToSeq(attempt.adminAcls.getOrElse(""))) - secManager.setViewAcls(attempt.info.sparkUser, stringToSeq(attempt.viewAcls.getOrElse(""))) - secManager.setAdminAclsGroups(historyUiAdminAclsGroups ++ - stringToSeq(attempt.adminAclsGroups.getOrElse(""))) - secManager.setViewAclsGroups(stringToSeq(attempt.viewAclsGroups.getOrElse(""))) + val secManager = createSecurityManager(conf, attempt) val kvstore = try { diskManager match { @@ -461,6 +453,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) } } + override def checkUIViewPermissions(appId: String, attemptId: Option[String], + user: String): Boolean = { + val app = load(appId) + val attempt = app.attempts.find(_.info.attemptId == attemptId).orNull + if (attempt == null) { + throw new NoSuchElementException() + } + val secManager = createSecurityManager(this.conf.clone(), attempt) + secManager.checkUIViewPermissions(user) + } + /** * Builds the application list based on the current contents of the log directory. * Tries to reuse as much of the data already in memory as possible, by not reading @@ -1376,6 +1379,19 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) endProcessing(rootPath) } } + + private def createSecurityManager(conf: SparkConf, + attempt: AttemptInfoWrapper): SecurityManager = { + val secManager = new SecurityManager(conf) + secManager.setAcls(historyUiAclsEnable) + // make sure to set admin acls before view acls so they are properly picked up + secManager.setAdminAcls(historyUiAdminAcls ++ stringToSeq(attempt.adminAcls.getOrElse(""))) + secManager.setViewAcls(attempt.info.sparkUser, stringToSeq(attempt.viewAcls.getOrElse(""))) + secManager.setAdminAclsGroups(historyUiAdminAclsGroups ++ + stringToSeq(attempt.adminAclsGroups.getOrElse(""))) + secManager.setViewAclsGroups(stringToSeq(attempt.viewAclsGroups.getOrElse(""))) + secManager + } } private[history] object FsHistoryProvider { diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index ca21a8056d1b5..bb13f34818a62 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -128,6 +128,11 @@ class HistoryServer( appCache.withSparkUI(appId, attemptId)(fn) } + override def checkUIViewPermissions(appId: String, attemptId: Option[String], + user: String): Boolean = { + provider.checkUIViewPermissions(appId, attemptId, user) + } + initialize() /** diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala index 83f76db7e89da..cc21c1488f67c 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala @@ -95,6 +95,8 @@ private[spark] trait UIRoot { .build() } def securityManager: SecurityManager + + def checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean } private[v1] object UIRootFromServletContext { @@ -145,6 +147,19 @@ private[v1] trait BaseAppResource extends ApiRequestContext { throw new NotFoundException(s"no such app: $appKey") } } + + protected def checkUIViewPermissions(): Unit = { + try { + val user = httpRequest.getRemoteUser() + if (!uiRoot.checkUIViewPermissions(appId, Option(attemptId), user)) { + throw new ForbiddenException(raw"""user "$user" is not authorized""") + } + } catch { + case _: NoSuchElementException => + val appKey = Option(attemptId).map(appId + "/" + _).getOrElse(appId) + throw new NotFoundException(s"no such app: $appKey") + } + } } private[v1] class ForbiddenException(msg: String) extends WebApplicationException( diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala index 536a1fcd59cd0..fb64ff5e60247 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala @@ -115,15 +115,14 @@ private[v1] class AbstractApplicationResource extends BaseAppResource { @Path("logs") @Produces(Array(MediaType.APPLICATION_OCTET_STREAM)) def getEventLogs(): Response = { - // Retrieve the UI for the application just to do access permission checks. For backwards - // compatibility, this code also tries with attemptId "1" if the UI without an attempt ID does - // not exist. + // For backwards compatibility, this code also tries with attemptId "1" if the UI + // without an attempt ID does not exist. try { - withUI { _ => } + checkUIViewPermissions() } catch { case _: NotFoundException if attemptId == null => attemptId = "1" - withUI { _ => } + checkUIViewPermissions() attemptId = null } diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 8ae9828c3fee1..b1769a8a9c9ee 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -110,6 +110,11 @@ private[spark] class SparkUI private ( } } + override def checkUIViewPermissions(appId: String, attemptId: Option[String], + user: String): Boolean = { + securityManager.checkUIViewPermissions(user) + } + def getApplicationInfoList: Iterator[ApplicationInfo] = { Iterator(new ApplicationInfo( id = appId, diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 1578b908b1b55..0b0754be2f56f 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -44,7 +44,7 @@ import org.apache.spark.deploy.history.EventLogTestHelper._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR import org.apache.spark.internal.config.History._ -import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, USER_GROUPS_MAPPING} +import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, UI_VIEW_ACLS, UI_VIEW_ACLS_GROUPS, USER_GROUPS_MAPPING} import org.apache.spark.io._ import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.ExecutorInfo @@ -1524,6 +1524,58 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging { } } + test("SPARK-33215: check ui view permissions without retrieving ui") { + val conf = createTestConf() + .set(HISTORY_SERVER_UI_ACLS_ENABLE, true) + .set(HISTORY_SERVER_UI_ADMIN_ACLS, Seq("user1", "user2")) + .set(HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS, Seq("group1")) + .set(USER_GROUPS_MAPPING, classOf[TestGroupsMappingProvider].getName) + + val provider = new FsHistoryProvider(conf) + val log = newLogFile("app1", Some("attempt1"), inProgress = false) + writeFile(log, None, + SparkListenerApplicationStart("app1", Some("app1"), System.currentTimeMillis(), + "test", Some("attempt1")), + SparkListenerEnvironmentUpdate(Map( + "Spark Properties" -> List((UI_VIEW_ACLS.key, "user"), (UI_VIEW_ACLS_GROUPS.key, "group")), + "Hadoop Properties" -> Seq.empty, + "JVM Information" -> Seq.empty, + "System Properties" -> Seq.empty, + "Classpath Entries" -> Seq.empty + )), + SparkListenerApplicationEnd(System.currentTimeMillis())) + + provider.checkForLogs() + + // attempt2 doesn't exist + intercept[NoSuchElementException] { + provider.checkUIViewPermissions("app1", Some("attempt2"), "user1") + } + // app2 doesn't exist + intercept[NoSuchElementException] { + provider.checkUIViewPermissions("app2", Some("attempt1"), "user1") + } + + // user1 and user2 are admins + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user1")) + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user2")) + // user3 is a member of admin group "group1" + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user3")) + // test is the app owner + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "test")) + // user is in the app's view acls + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user")) + // user5 is a member of the app's view acls group "group" + assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user5")) + + // abc, user6, user7 don't have permissions + assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "abc")) + assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "user6")) + assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "user7")) + + provider.stop() + } + /** * Asks the provider to check for logs and calls a function to perform checks on the updated * app list. Example: diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 51e38f9cdcd2d..e4c23d3d1b1c3 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -584,6 +584,24 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers } } + test("SPARK-33215: speed up event log download by skipping UI rebuild") { + val appId = "local-1430917381535" + + stop() + init() + + val port = server.boundPort + val testUrls = Seq( + s"http://localhost:$port/api/v1/applications/$appId/logs", + s"http://localhost:$port/api/v1/applications/$appId/1/logs", + s"http://localhost:$port/api/v1/applications/$appId/2/logs") + + testUrls.foreach { url => + TestUtils.httpResponseCode(new URL(url)) + } + assert(server.cacheMetrics.loadCount.getCount === 0, "downloading event log shouldn't load ui") + } + test("access history application defaults to the last attempt id") { def getRedirectUrl(url: URL): (Int, String) = { From 537a49fc0966b0b289b67ac9c6ea20093165b0da Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Tue, 27 Oct 2020 12:40:57 +0000 Subject: [PATCH 0334/1009] [SPARK-33140][SQL] remove SQLConf and SparkSession in all sub-class of Rule[QueryPlan] ### What changes were proposed in this pull request? Since Issue [SPARK-33139](https://issues.apache.org/jira/browse/SPARK-33139) has been done, and SQLConf.get and SparkSession.active are more reliable. We are trying to refine the existing code usage of passing SQLConf and SparkSession into sub-class of Rule[QueryPlan]. In this PR. * remove SQLConf from ctor-parameter of all sub-class of Rule[QueryPlan]. * using SQLConf.get to replace the original SQLConf instance. * remove SparkSession from ctor-parameter of all sub-class of Rule[QueryPlan]. * using SparkSession.active to replace the original SparkSession instance. ### Why are the changes needed? Code refine. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Existing test Closes #30097 from leanken/leanken-SPARK-33140. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 18 ++--- .../sql/catalyst/analysis/ResolveHints.scala | 14 ++-- .../analysis/ResolveInlineTables.scala | 3 +- .../SubstituteUnresolvedOrdinals.scala | 3 +- .../sql/catalyst/analysis/TypeCoercion.scala | 18 ++--- .../analysis/higherOrderFunctions.scala | 2 +- .../catalyst/analysis/timeZoneAnalysis.scala | 2 +- .../optimizer/CostBasedJoinReorder.scala | 2 - .../spark/sql/catalyst/rules/Rule.scala | 3 + .../sql/catalyst/analysis/AnalysisSuite.scala | 6 +- .../sql/catalyst/analysis/AnalysisTest.scala | 76 +++++++++---------- .../analysis/DataSourceV2AnalysisSuite.scala | 46 +++++++---- .../ResolveGroupingAnalyticsSuite.scala | 2 +- .../analysis/ResolveInlineTablesSuite.scala | 26 +++---- .../ResolveLambdaVariablesSuite.scala | 2 +- .../ResolvedUuidExpressionsSuite.scala | 2 +- .../SubstituteUnresolvedOrdinalsSuite.scala | 20 +++-- .../catalyst/analysis/TypeCoercionSuite.scala | 10 +-- .../expressions/ExpressionEvalHelper.scala | 2 +- .../expressions/ObjectExpressionsSuite.scala | 2 +- .../expressions/SelectedFieldSuite.scala | 2 +- .../optimizer/AggregateOptimizeSuite.scala | 22 +++--- .../optimizer/EliminateSortsSuite.scala | 36 ++++----- .../analysis/ResolveSessionCatalog.scala | 1 - .../apache/spark/sql/execution/Columnar.scala | 2 - .../spark/sql/execution/QueryExecution.scala | 21 +++-- .../execution/RemoveRedundantProjects.scala | 2 +- .../sql/execution/WholeStageCodegenExec.scala | 1 - .../sql/execution/adaptive/AQEOptimizer.scala | 2 +- .../adaptive/AdaptiveSparkPlanExec.scala | 18 ++--- .../adaptive/CoalesceShufflePartitions.scala | 6 +- .../adaptive/DemoteBroadcastHashJoin.scala | 2 +- .../adaptive/InsertAdaptiveSparkPlan.scala | 2 - .../adaptive/OptimizeLocalShuffleReader.scala | 8 +- .../adaptive/OptimizeSkewedJoin.scala | 4 +- .../adaptive/ReuseAdaptiveSubquery.scala | 1 - .../analysis/DetectAmbiguousSelfJoin.scala | 2 +- .../bucketing/CoalesceBucketsInJoin.scala | 2 +- .../DisableUnnecessaryBucketedScan.scala | 2 +- .../datasources/DataSourceStrategy.scala | 10 +-- .../datasources/FallBackFileSourceV2.scala | 4 +- .../sql/execution/datasources/rules.scala | 31 ++++---- .../PlanDynamicPruningFilters.scala | 7 +- .../exchange/EnsureRequirements.scala | 2 +- .../sql/execution/exchange/Exchange.scala | 2 +- .../apache/spark/sql/execution/subquery.scala | 8 +- .../internal/BaseSessionStateBuilder.scala | 16 ++-- .../V2CommandsCaseSensitivitySuite.scala | 2 +- .../sql/execution/ColumnarRulesSuite.scala | 4 +- .../spark/sql/execution/PlannerSuite.scala | 24 +++--- .../RemoveRedundantProjectsSuite.scala | 2 +- .../CoalesceBucketsInJoinSuite.scala | 2 +- .../command/PlanResolutionSuite.scala | 4 +- .../exchange/EnsureRequirementsSuite.scala | 12 +-- .../execution/joins/BroadcastJoinSuite.scala | 6 +- .../execution/joins/ExistenceJoinSuite.scala | 20 ++--- .../sql/execution/joins/InnerJoinSuite.scala | 6 +- .../sql/execution/joins/OuterJoinSuite.scala | 4 +- .../sql/sources/DataSourceAnalysisSuite.scala | 40 +++++----- .../sql/hive/HiveSessionStateBuilder.scala | 24 +++--- .../spark/sql/hive/HiveStrategies.scala | 12 ++- .../execution/PruneHiveTablePartitions.scala | 11 ++- .../PruneHiveTablePartitionsSuite.scala | 2 +- 63 files changed, 327 insertions(+), 323 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 457c41c39a196..39816f499944b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -210,12 +210,12 @@ class Analyzer( CTESubstitution, WindowsSubstitution, EliminateUnions, - new SubstituteUnresolvedOrdinals(conf)), + SubstituteUnresolvedOrdinals), Batch("Disable Hints", Once, - new ResolveHints.DisableHints(conf)), + new ResolveHints.DisableHints), Batch("Hints", fixedPoint, - new ResolveHints.ResolveJoinStrategyHints(conf), - new ResolveHints.ResolveCoalesceHints(conf)), + ResolveHints.ResolveJoinStrategyHints, + ResolveHints.ResolveCoalesceHints), Batch("Simple Sanity Check", Once, LookupFunctions), Batch("Resolution", fixedPoint, @@ -249,19 +249,19 @@ class Analyzer( GlobalAggregates :: ResolveAggregateFunctions :: TimeWindowing :: - ResolveInlineTables(conf) :: + ResolveInlineTables :: ResolveHigherOrderFunctions(v1SessionCatalog) :: - ResolveLambdaVariables(conf) :: - ResolveTimeZone(conf) :: + ResolveLambdaVariables :: + ResolveTimeZone :: ResolveRandomSeed :: ResolveBinaryArithmetic :: ResolveUnion :: - TypeCoercion.typeCoercionRules(conf) ++ + TypeCoercion.typeCoercionRules ++ extendedResolutionRules : _*), Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), Batch("Normalize Alter Table", Once, ResolveAlterTableChanges), Batch("Remove Unresolved Hints", Once, - new ResolveHints.RemoveAllHints(conf)), + new ResolveHints.RemoveAllHints), Batch("Nondeterministic", Once, PullOutNondeterministic), Batch("UDF", Once, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index c0a9414d61f8f..f1706c11e92ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -50,7 +50,7 @@ object ResolveHints { * * This rule must happen before common table expressions. */ - class ResolveJoinStrategyHints(conf: SQLConf) extends Rule[LogicalPlan] { + object ResolveJoinStrategyHints extends Rule[LogicalPlan] { private val STRATEGY_HINT_NAMES = JoinStrategyHint.strategies.flatMap(_.hintAliases) private val hintErrorHandler = conf.hintErrorHandler @@ -171,7 +171,9 @@ object ResolveHints { /** * COALESCE Hint accepts names "COALESCE", "REPARTITION", and "REPARTITION_BY_RANGE". */ - class ResolveCoalesceHints(conf: SQLConf) extends Rule[LogicalPlan] { + object ResolveCoalesceHints extends Rule[LogicalPlan] { + + val COALESCE_HINT_NAMES: Set[String] = Set("COALESCE", "REPARTITION", "REPARTITION_BY_RANGE") /** * This function handles hints for "COALESCE" and "REPARTITION". @@ -260,15 +262,11 @@ object ResolveHints { } } - object ResolveCoalesceHints { - val COALESCE_HINT_NAMES: Set[String] = Set("COALESCE", "REPARTITION", "REPARTITION_BY_RANGE") - } - /** * Removes all the hints, used to remove invalid hints provided by the user. * This must be executed after all the other hint rules are executed. */ - class RemoveAllHints(conf: SQLConf) extends Rule[LogicalPlan] { + class RemoveAllHints extends Rule[LogicalPlan] { private val hintErrorHandler = conf.hintErrorHandler @@ -284,7 +282,7 @@ object ResolveHints { * This is executed at the very beginning of the Analyzer to disable * the hint functionality. */ - class DisableHints(conf: SQLConf) extends RemoveAllHints(conf: SQLConf) { + class DisableHints extends RemoveAllHints { override def apply(plan: LogicalPlan): LogicalPlan = { if (conf.getConf(SQLConf.DISABLE_HINTS)) super.apply(plan) else plan } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index 4edfe507a7580..ab735c74ced9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -22,13 +22,12 @@ import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{StructField, StructType} /** * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]]. */ -case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport { +object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case table: UnresolvedInlineTable if table.expressionsResolved => validateInputDimension(table) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala index 860d20f897690..1e7480a69e40f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala @@ -21,13 +21,12 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.IntegerType /** * Replaces ordinal in 'order by' or 'group by' with UnresolvedOrdinal expression. */ -class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] { +object SubstituteUnresolvedOrdinals extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index f72d9be205df3..b69cb6091f02c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -47,16 +47,16 @@ import org.apache.spark.sql.types._ */ object TypeCoercion { - def typeCoercionRules(conf: SQLConf): List[Rule[LogicalPlan]] = - InConversion(conf) :: + def typeCoercionRules: List[Rule[LogicalPlan]] = + InConversion :: WidenSetOperationTypes :: - PromoteStrings(conf) :: + PromoteStrings :: DecimalPrecision :: BooleanEquality :: FunctionArgumentConversion :: - ConcatCoercion(conf) :: + ConcatCoercion :: MapZipWithCoercion :: - EltCoercion(conf) :: + EltCoercion :: CaseWhenCoercion :: IfCoercion :: StackCoercion :: @@ -414,7 +414,7 @@ object TypeCoercion { /** * Promotes strings that appear in arithmetic expressions. */ - case class PromoteStrings(conf: SQLConf) extends TypeCoercionRule { + object PromoteStrings extends TypeCoercionRule { private def castExpr(expr: Expression, targetType: DataType): Expression = { (expr.dataType, targetType) match { case (NullType, dt) => Literal.create(null, targetType) @@ -481,7 +481,7 @@ object TypeCoercion { * operator type is found the original expression will be returned and an * Analysis Exception will be raised at the type checking phase. */ - case class InConversion(conf: SQLConf) extends TypeCoercionRule { + object InConversion extends TypeCoercionRule { override protected def coerceTypes( plan: LogicalPlan): LogicalPlan = plan resolveExpressions { // Skip nodes who's children have not been resolved yet. @@ -786,7 +786,7 @@ object TypeCoercion { * If `spark.sql.function.concatBinaryAsString` is false and all children types are binary, * the expected types are binary. Otherwise, the expected ones are strings. */ - case class ConcatCoercion(conf: SQLConf) extends TypeCoercionRule { + object ConcatCoercion extends TypeCoercionRule { override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = { plan resolveOperators { case p => @@ -834,7 +834,7 @@ object TypeCoercion { * If `spark.sql.function.eltOutputAsString` is false and all children types are binary, * the expected types are binary. Otherwise, the expected ones are strings. */ - case class EltCoercion(conf: SQLConf) extends TypeCoercionRule { + object EltCoercion extends TypeCoercionRule { override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = { plan resolveOperators { case p => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 11f94762d43e0..e10af3d5cc68d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -70,7 +70,7 @@ case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[Log * be a lambda function defined in an outer scope, or a attribute in produced by the plan's * child. If names are duplicate, the name defined in the most inner scope is used. */ -case class ResolveLambdaVariables(conf: SQLConf) extends Rule[LogicalPlan] { +object ResolveLambdaVariables extends Rule[LogicalPlan] { type LambdaVariableMap = Map[String, NamedExpression] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala index a27aa845bf0ae..d8062744a4264 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.types.DataType * Replace [[TimeZoneAwareExpression]] without timezone id by its copy with session local * time zone. */ -case class ResolveTimeZone(conf: SQLConf) extends Rule[LogicalPlan] { +object ResolveTimeZone extends Rule[LogicalPlan] { private val transformTimeZoneExprs: PartialFunction[Expression, Expression] = { case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty => e.withTimeZone(conf.sessionLocalTimeZone) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala index 45541051a6b13..11b675e75869e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala @@ -34,8 +34,6 @@ import org.apache.spark.sql.internal.SQLConf */ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper { - private def conf = SQLConf.get - def apply(plan: LogicalPlan): LogicalPlan = { if (!conf.cboEnabled || !conf.joinReorderEnabled) { plan diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala index 7eb72724d7663..a774217ecc832 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.rules import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.trees.TreeNode +import org.apache.spark.sql.internal.SQLConf abstract class Rule[TreeType <: TreeNode[_]] extends Logging { @@ -29,4 +30,6 @@ abstract class Rule[TreeType <: TreeNode[_]] extends Logging { } def apply(plan: TreeType): TreeType + + def conf: SQLConf = SQLConf.get } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 4f51b77d8ece0..37dcee1e59ee8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -197,7 +197,7 @@ class AnalysisSuite extends AnalysisTest with Matchers { } test("divide should be casted into fractional types") { - val plan = caseInsensitiveAnalyzer.execute( + val plan = getAnalyzer.execute( testRelation2.select( $"a" / Literal(2) as "div1", $"a" / $"b" as "div2", @@ -258,13 +258,13 @@ class AnalysisSuite extends AnalysisTest with Matchers { CreateStruct(Seq(att1, ((att1.as("aa")) + 1).as("a_plus_1"))).as("col"), att1 ) - val prevPlan = getAnalyzer(true).execute(plan) + val prevPlan = getAnalyzer.execute(plan) plan = prevPlan.select(CreateArray(Seq( CreateStruct(Seq(att1, (att1 + 1).as("a_plus_1"))).as("col1"), /** alias should be eliminated by [[CleanupAliases]] */ "col".attr.as("col2") )).as("arr")) - plan = getAnalyzer(true).execute(plan) + plan = getAnalyzer.execute(plan) val expectedPlan = prevPlan.select( CreateArray(Seq( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala index 4473c20b2cca6..8c14ffffa17a5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala @@ -31,13 +31,9 @@ import org.apache.spark.sql.internal.SQLConf trait AnalysisTest extends PlanTest { - protected lazy val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) - protected lazy val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) - protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil - private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { - val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) + protected def getAnalyzer: Analyzer = { val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf) catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), @@ -52,17 +48,15 @@ trait AnalysisTest extends PlanTest { } } - protected def getAnalyzer(caseSensitive: Boolean) = { - if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer - } - protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { - val analyzer = getAnalyzer(caseSensitive) - val actualPlan = analyzer.executeAndCheck(inputPlan, new QueryPlanningTracker) - comparePlans(actualPlan, expectedPlan) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val analyzer = getAnalyzer + val actualPlan = analyzer.executeAndCheck(inputPlan, new QueryPlanningTracker) + comparePlans(actualPlan, expectedPlan) + } } protected override def comparePlans( @@ -76,18 +70,20 @@ trait AnalysisTest extends PlanTest { protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { - val analyzer = getAnalyzer(caseSensitive) - val analysisAttempt = analyzer.execute(inputPlan) - try analyzer.checkAnalysis(analysisAttempt) catch { - case a: AnalysisException => - fail( - s""" - |Failed to Analyze Plan - |$inputPlan - | - |Partial Analysis - |$analysisAttempt - """.stripMargin, a) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val analyzer = getAnalyzer + val analysisAttempt = analyzer.execute(inputPlan) + try analyzer.checkAnalysis(analysisAttempt) catch { + case a: AnalysisException => + fail( + s""" + |Failed to Analyze Plan + |$inputPlan + | + |Partial Analysis + |$analysisAttempt + """.stripMargin, a) + } } } @@ -95,22 +91,24 @@ trait AnalysisTest extends PlanTest { inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { - val analyzer = getAnalyzer(caseSensitive) - val e = intercept[AnalysisException] { - analyzer.checkAnalysis(analyzer.execute(inputPlan)) - } + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val analyzer = getAnalyzer + val e = intercept[AnalysisException] { + analyzer.checkAnalysis(analyzer.execute(inputPlan)) + } - if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall( - e.getMessage.toLowerCase(Locale.ROOT).contains)) { - fail( - s"""Exception message should contain the following substrings: - | - | ${expectedErrors.mkString("\n ")} - | - |Actual exception message: - | - | ${e.getMessage} - """.stripMargin) + if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall( + e.getMessage.toLowerCase(Locale.ROOT).contains)) { + fail( + s"""Exception message should contain the following substrings: + | + | ${expectedErrors.mkString("\n ")} + | + |Actual exception message: + | + | ${e.getMessage} + """.stripMargin) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala index e466d558db1ef..7a2320f931da3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala @@ -114,28 +114,50 @@ case class TestRelationAcceptAnySchema(output: Seq[AttributeReference]) } abstract class DataSourceV2ANSIAnalysisSuite extends DataSourceV2AnalysisBaseSuite { - override def getSQLConf(caseSensitive: Boolean): SQLConf = - super.getSQLConf(caseSensitive) - .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.ANSI) - // For Ansi store assignment policy, expression `AnsiCast` is used instead of `Cast`. override def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, - caseSensitive: Boolean): Unit = { + caseSensitive: Boolean = true): Unit = { val expectedPlanWithAnsiCast = expectedPlan transformAllExpressions { case c: Cast => AnsiCast(c.child, c.dataType, c.timeZoneId) case other => other } - super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive) + + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.ANSI.toString) { + super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive) + } + } + + override def assertAnalysisError( + inputPlan: LogicalPlan, + expectedErrors: Seq[String], + caseSensitive: Boolean = true): Unit = { + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.ANSI.toString) { + super.assertAnalysisError(inputPlan, expectedErrors, caseSensitive) + } } } abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseSuite { - override def getSQLConf(caseSensitive: Boolean): SQLConf = - super.getSQLConf(caseSensitive) - .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.STRICT) + override def checkAnalysis( + inputPlan: LogicalPlan, + expectedPlan: LogicalPlan, + caseSensitive: Boolean = true): Unit = { + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString) { + super.checkAnalysis(inputPlan, expectedPlan, caseSensitive) + } + } + + override def assertAnalysisError( + inputPlan: LogicalPlan, + expectedErrors: Seq[String], + caseSensitive: Boolean = true): Unit = { + withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString) { + super.assertAnalysisError(inputPlan, expectedErrors, caseSensitive) + } + } test("byName: fail canWrite check") { val parsedPlan = byName(table, widerTable) @@ -200,11 +222,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { - protected def getSQLConf(caseSensitive: Boolean): SQLConf = - new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) - - override def getAnalyzer(caseSensitive: Boolean): Analyzer = { - val conf = getSQLConf(caseSensitive) + override def getAnalyzer: Analyzer = { val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf) catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala index 7284a6a30ef7e..249e7a49a0a90 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala @@ -110,7 +110,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { Seq(UnresolvedAlias(Multiply(unresolved_a, Literal(2))), unresolved_b, UnresolvedAlias(count(unresolved_c)))) - val resultPlan = getAnalyzer(true).executeAndCheck(originalPlan2, new QueryPlanningTracker) + val resultPlan = getAnalyzer.executeAndCheck(originalPlan2, new QueryPlanningTracker) val gExpressions = resultPlan.asInstanceOf[Aggregate].groupingExpressions assert(gExpressions.size == 3) val firstGroupingExprAttrName = diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala index 9e99c8e11cdfe..16d23153c1c53 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala @@ -35,53 +35,53 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { - ResolveInlineTables(conf).validateInputEvaluable( + ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { - ResolveInlineTables(conf).validateInputEvaluable( + ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { - ResolveInlineTables(conf).validateInputEvaluable( + ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { - ResolveInlineTables(conf).validateInputEvaluable( + ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { - ResolveInlineTables(conf).validateInputDimension( + ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { - ResolveInlineTables(conf).validateInputDimension( + ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { - ResolveInlineTables(conf).validateInputDimension( + ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) - assert(ResolveInlineTables(conf)(table) == table) + assert(ResolveInlineTables(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) - val converted = ResolveInlineTables(conf).convert(table) + val converted = ResolveInlineTables.convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) @@ -92,8 +92,8 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { test("convert TimeZoneAwareExpression") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType)))) - val withTimeZone = ResolveTimeZone(conf).apply(table) - val LocalRelation(output, data, _) = ResolveInlineTables(conf).apply(withTimeZone) + val withTimeZone = ResolveTimeZone.apply(table) + val LocalRelation(output, data, _) = ResolveInlineTables.apply(withTimeZone) val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType) .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long] assert(output.map(_.dataType) == Seq(TimestampType)) @@ -103,11 +103,11 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) - val converted1 = ResolveInlineTables(conf).convert(table1) + val converted1 = ResolveInlineTables.convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) - val converted2 = ResolveInlineTables(conf).convert(table2) + val converted2 = ResolveInlineTables.convert(table2) assert(converted2.schema.fields(0).nullable) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala index a5847ba7c522d..b9233a27f3d7a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala @@ -32,7 +32,7 @@ class ResolveLambdaVariablesSuite extends PlanTest { import org.apache.spark.sql.catalyst.dsl.plans._ object Analyzer extends RuleExecutor[LogicalPlan] { - val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables(conf)) :: Nil + val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables) :: Nil } private val key = 'key.int diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala index 64bd07534b19b..5ddfa9f2191e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala @@ -36,7 +36,7 @@ class ResolvedUuidExpressionsSuite extends AnalysisTest { private lazy val uuid1Ref = uuid1.toAttribute private val tracker = new QueryPlanningTracker - private val analyzer = getAnalyzer(caseSensitive = true) + private val analyzer = getAnalyzer private def getUuidExpressions(plan: LogicalPlan): Seq[Uuid] = { plan.flatMap { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala index 2331346f325aa..c0312282c76c8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala @@ -36,31 +36,35 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { // Tests order by ordinal, apply single rule. val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc) comparePlans( - new SubstituteUnresolvedOrdinals(conf).apply(plan), + SubstituteUnresolvedOrdinals.apply(plan), testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc)) // Tests order by ordinal, do full analysis checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc)) // order by ordinal can be turned off by config - comparePlans( - new SubstituteUnresolvedOrdinals(conf.copy(SQLConf.ORDER_BY_ORDINAL -> false)).apply(plan), - testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "false") { + comparePlans( + SubstituteUnresolvedOrdinals.apply(plan), + testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) + } } test("group by ordinal") { // Tests group by ordinal, apply single rule. val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b) comparePlans( - new SubstituteUnresolvedOrdinals(conf).apply(plan2), + SubstituteUnresolvedOrdinals.apply(plan2), testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b)) // Tests group by ordinal, do full analysis checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b)) // group by ordinal can be turned off by config - comparePlans( - new SubstituteUnresolvedOrdinals(conf.copy(SQLConf.GROUP_BY_ORDINAL -> false)).apply(plan2), - testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) + withSQLConf(SQLConf.GROUP_BY_ORDINAL.key -> "false") { + comparePlans( + SubstituteUnresolvedOrdinals.apply(plan2), + testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 7b80de908fa08..1e5bc271ab270 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1103,7 +1103,7 @@ class TypeCoercionSuite extends AnalysisTest { } test("type coercion for Concat") { - val rule = TypeCoercion.ConcatCoercion(conf) + val rule = TypeCoercion.ConcatCoercion ruleTest(rule, Concat(Seq(Literal("ab"), Literal("cde"))), @@ -1157,7 +1157,7 @@ class TypeCoercionSuite extends AnalysisTest { } test("type coercion for Elt") { - val rule = TypeCoercion.EltCoercion(conf) + val rule = TypeCoercion.EltCoercion ruleTest(rule, Elt(Seq(Literal(1), Literal("ab"), Literal("cde"))), @@ -1284,7 +1284,7 @@ class TypeCoercionSuite extends AnalysisTest { } } - private val timeZoneResolver = ResolveTimeZone(new SQLConf) + private val timeZoneResolver = ResolveTimeZone private def widenSetOperationTypes(plan: LogicalPlan): LogicalPlan = { timeZoneResolver(TypeCoercion.WidenSetOperationTypes(plan)) @@ -1437,7 +1437,7 @@ class TypeCoercionSuite extends AnalysisTest { */ test("make sure rules do not fire early") { // InConversion - val inConversion = TypeCoercion.InConversion(conf) + val inConversion = TypeCoercion.InConversion ruleTest(inConversion, In(UnresolvedAttribute("a"), Seq(Literal(1))), In(UnresolvedAttribute("a"), Seq(Literal(1))) @@ -1481,7 +1481,7 @@ class TypeCoercionSuite extends AnalysisTest { } test("binary comparison with string promotion") { - val rule = TypeCoercion.PromoteStrings(conf) + val rule = TypeCoercion.PromoteStrings ruleTest(rule, GreaterThan(Literal("123"), Literal(1)), GreaterThan(Cast(Literal("123"), IntegerType), Literal(1))) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index d0b0d04d1f719..60ab98eeb410a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -74,7 +74,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB private def prepareEvaluation(expression: Expression): Expression = { val serializer = new JavaSerializer(new SparkConf()).newInstance - val resolver = ResolveTimeZone(new SQLConf) + val resolver = ResolveTimeZone val expr = resolver.resolveTimeZones(expression) assert(expr.resolved) serializer.deserialize(serializer.serialize(expr)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index 02c5c9ab89088..ff33324c3bb18 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -270,7 +270,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { private def checkObjectExprEvaluation( expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = { val serializer = new JavaSerializer(new SparkConf()).newInstance - val resolver = ResolveTimeZone(new SQLConf) + val resolver = ResolveTimeZone val expr = resolver.resolveTimeZones(serializer.deserialize(serializer.serialize(expression))) checkEvaluationWithoutCodegen(expr, expected, inputRow) checkEvaluationWithMutableProjection(expr, expected, inputRow) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala index 76d6890cc8f6f..cf5463be1faa1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala @@ -534,7 +534,7 @@ class SelectedFieldSuite extends AnalysisTest { private def unapplySelect(expr: String, relation: LocalRelation) = { val parsedExpr = parseAsCatalystExpression(Seq(expr)).head val select = relation.select(parsedExpr) - val analyzed = caseSensitiveAnalyzer.execute(select) + val analyzed = getAnalyzer.execute(select) SelectedField.unapply(analyzed.expressions.head) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala index f8ddc93597070..8984bad479a6b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala @@ -17,21 +17,16 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} -import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} +import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal -import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} -class AggregateOptimizeSuite extends PlanTest { - override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) - val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) - val analyzer = new Analyzer(catalog, conf) +class AggregateOptimizeSuite extends AnalysisTest { + val analyzer = getAnalyzer object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), @@ -51,11 +46,14 @@ class AggregateOptimizeSuite extends PlanTest { } test("do not remove all grouping expressions if they are all literals") { - val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) - val optimized = Optimize.execute(analyzer.execute(query)) - val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) + withSQLConf(CASE_SENSITIVE.key -> "false", GROUP_BY_ORDINAL.key -> "false") { + val analyzer = getAnalyzer + val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) + val optimized = Optimize.execute(analyzer.execute(query)) + val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) - comparePlans(optimized, correctAnswer) + comparePlans(optimized, correctAnswer) + } } test("Remove aliased literals") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 265f0a9936759..cc351e365113d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -18,8 +18,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.api.python.PythonEvalType -import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} -import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} +import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -27,14 +26,11 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL} import org.apache.spark.sql.types.IntegerType -class EliminateSortsSuite extends PlanTest { - override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, ORDER_BY_ORDINAL -> false) - val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) - val analyzer = new Analyzer(catalog, conf) +class EliminateSortsSuite extends AnalysisTest { + val analyzer = getAnalyzer object Optimize extends RuleExecutor[LogicalPlan] { val batches = @@ -66,23 +62,29 @@ class EliminateSortsSuite extends PlanTest { } test("All the SortOrder are no-op") { - val x = testRelation + withSQLConf(CASE_SENSITIVE.key -> "true", ORDER_BY_ORDINAL.key -> "false") { + val x = testRelation + val analyzer = getAnalyzer - val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) - val optimized = Optimize.execute(analyzer.execute(query)) - val correctAnswer = analyzer.execute(x) + val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) + val optimized = Optimize.execute(analyzer.execute(query)) + val correctAnswer = analyzer.execute(x) - comparePlans(optimized, correctAnswer) + comparePlans(optimized, correctAnswer) + } } test("Partial order-by clauses contain no-op SortOrder") { - val x = testRelation + withSQLConf(CASE_SENSITIVE.key -> "true", ORDER_BY_ORDINAL.key -> "false") { + val x = testRelation + val analyzer = getAnalyzer - val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) - val optimized = Optimize.execute(analyzer.execute(query)) - val correctAnswer = analyzer.execute(x.orderBy('a.asc)) + val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) + val optimized = Optimize.execute(analyzer.execute(query)) + val correctAnswer = analyzer.execute(x.orderBy('a.asc)) - comparePlans(optimized, correctAnswer) + comparePlans(optimized, correctAnswer) + } } test("Remove no-op alias") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 24382e07a2966..c4fd84cd978d4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -38,7 +38,6 @@ import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBui */ class ResolveSessionCatalog( val catalogManager: CatalogManager, - conf: SQLConf, isTempView: Seq[String] => Boolean, isTempFunction: String => Boolean) extends Rule[LogicalPlan] with LookupCatalog { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala index e47ec9ab9b61b..8d542792a0e28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector, WritableColumnVector} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} @@ -494,7 +493,6 @@ case class RowToColumnarExec(child: SparkPlan) extends RowToColumnarTransition { * to/from columnar formatted data. */ case class ApplyColumnarRulesAndInsertTransitions( - conf: SQLConf, columnarRules: Seq[ColumnarRule]) extends Rule[SparkPlan] { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index a056500fa361a..c37e1e92c8576 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -339,17 +339,16 @@ object QueryExecution { // as the original plan is hidden behind `AdaptiveSparkPlanExec`. adaptiveExecutionRule.toSeq ++ Seq( - CoalesceBucketsInJoin(sparkSession.sessionState.conf), - PlanDynamicPruningFilters(sparkSession), - PlanSubqueries(sparkSession), - RemoveRedundantProjects(sparkSession.sessionState.conf), - EnsureRequirements(sparkSession.sessionState.conf), - DisableUnnecessaryBucketedScan(sparkSession.sessionState.conf), - ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf, - sparkSession.sessionState.columnarRules), - CollapseCodegenStages(sparkSession.sessionState.conf), - ReuseExchange(sparkSession.sessionState.conf), - ReuseSubquery(sparkSession.sessionState.conf) + CoalesceBucketsInJoin, + PlanDynamicPruningFilters, + PlanSubqueries, + RemoveRedundantProjects, + EnsureRequirements, + DisableUnnecessaryBucketedScan, + ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules), + CollapseCodegenStages(), + ReuseExchange, + ReuseSubquery ) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala index 2bcf86edbea37..8746cc6f650d7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.internal.SQLConf * optimization to prune data. During physical planning, redundant project nodes can be removed * to simplify the query plan. */ -case class RemoveRedundantProjects(conf: SQLConf) extends Rule[SparkPlan] { +object RemoveRedundantProjects extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.getConf(SQLConf.REMOVE_REDUNDANT_PROJECTS_ENABLED)) { plan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index bcd31c4c1d775..a8905ca530005 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -873,7 +873,6 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) * failed to generate/compile code. */ case class CollapseCodegenStages( - conf: SQLConf, codegenStageCounter: AtomicInteger = new AtomicInteger(0)) extends Rule[SparkPlan] { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala index 0170f8b2f71c2..04b8ade8ac629 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala @@ -28,7 +28,7 @@ import org.apache.spark.util.Utils class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] { private val defaultBatches = Seq( Batch("Demote BroadcastHashJoin", Once, - DemoteBroadcastHashJoin(conf)), + DemoteBroadcastHashJoin), Batch("Eliminate Join to Empty Relation", Once, EliminateJoinToEmptyRelation) ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 5e75e26e6d074..d30e16276b9f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -82,8 +82,8 @@ case class AdaptiveSparkPlanExec( // The logical plan optimizer for re-optimizing the current logical plan. @transient private val optimizer = new AQEOptimizer(conf) - @transient private val removeRedundantProjects = RemoveRedundantProjects(conf) - @transient private val ensureRequirements = EnsureRequirements(conf) + @transient private val removeRedundantProjects = RemoveRedundantProjects + @transient private val ensureRequirements = EnsureRequirements // A list of physical plan rules to be applied before creation of query stages. The physical // plan should reach a final status of query stages (i.e., no more addition or removal of @@ -96,12 +96,12 @@ case class AdaptiveSparkPlanExec( // A list of physical optimizer rules to be applied to a new stage before its execution. These // optimizations should be stage-independent. @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq( - ReuseAdaptiveSubquery(conf, context.subqueryCache), - CoalesceShufflePartitions(context.session), + ReuseAdaptiveSubquery(context.subqueryCache), + CoalesceShufflePartitions, // The following two rules need to make use of 'CustomShuffleReaderExec.partitionSpecs' // added by `CoalesceShufflePartitions`. So they must be executed after it. - OptimizeSkewedJoin(conf), - OptimizeLocalShuffleReader(conf) + OptimizeSkewedJoin, + OptimizeLocalShuffleReader ) private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] = @@ -109,7 +109,7 @@ case class AdaptiveSparkPlanExec( case _: DataWritingCommandExec | _: V2TableWriteExec => // SPARK-32932: Local shuffle reader could break partitioning that works best // for the following writing command - queryStageOptimizerRules.filterNot(_.isInstanceOf[OptimizeLocalShuffleReader]) + queryStageOptimizerRules.filterNot(_ == OptimizeLocalShuffleReader) case _ => queryStageOptimizerRules } @@ -117,8 +117,8 @@ case class AdaptiveSparkPlanExec( // A list of physical optimizer rules to be applied right after a new stage is created. The input // plan to these rules has exchange as its root node. @transient private val postStageCreationRules = Seq( - ApplyColumnarRulesAndInsertTransitions(conf, context.session.sessionState.columnarRules), - CollapseCodegenStages(conf) + ApplyColumnarRulesAndInsertTransitions(context.session.sessionState.columnarRules), + CollapseCodegenStages() ) @transient private val costEvaluator = SimpleCostEvaluator diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala index 84c65df31a7c5..ecf908a737442 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala @@ -26,9 +26,7 @@ import org.apache.spark.sql.internal.SQLConf * A rule to coalesce the shuffle partitions based on the map output statistics, which can * avoid many small reduce tasks that hurt performance. */ -case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPlan] { - private def conf = session.sessionState.conf - +object CoalesceShufflePartitions extends Rule[SparkPlan] { override def apply(plan: SparkPlan): SparkPlan = { if (!conf.coalesceShufflePartitionsEnabled) { return plan @@ -65,7 +63,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl // We fall back to Spark default parallelism if the minimum number of coalesced partitions // is not set, so to avoid perf regressions compared to no coalescing. val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM) - .getOrElse(session.sparkContext.defaultParallelism) + .getOrElse(SparkSession.active.sparkContext.defaultParallelism) val partitionSpecs = ShufflePartitionsUtil.coalescePartitions( validMetrics.toArray, advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala index aba83b1337109..011acbf1b22a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.internal.SQLConf * This optimization rule detects a join child that has a high ratio of empty partitions and * adds a no-broadcast-hash-join hint to avoid it being broadcast. */ -case class DemoteBroadcastHashJoin(conf: SQLConf) extends Rule[LogicalPlan] { +object DemoteBroadcastHashJoin extends Rule[LogicalPlan] { private def shouldDemote(plan: LogicalPlan): Boolean = plan match { case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.resultOption.get().isDefined diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala index 754225dd3fe95..f8478f860b2d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala @@ -39,8 +39,6 @@ import org.apache.spark.sql.internal.SQLConf case class InsertAdaptiveSparkPlan( adaptiveExecutionContext: AdaptiveExecutionContext) extends Rule[SparkPlan] { - private val conf = adaptiveExecutionContext.session.sessionState.conf - override def apply(plan: SparkPlan): SparkPlan = applyInternal(plan, false) private def applyInternal(plan: SparkPlan, isSubquery: Boolean): SparkPlan = plan match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala index 7bb9265e1717a..8db2827beaf43 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala @@ -33,10 +33,9 @@ import org.apache.spark.sql.internal.SQLConf * then run `EnsureRequirements` to check whether additional shuffle introduced. * If introduced, we will revert all the local readers. */ -case class OptimizeLocalShuffleReader(conf: SQLConf) extends Rule[SparkPlan] { - import OptimizeLocalShuffleReader._ +object OptimizeLocalShuffleReader extends Rule[SparkPlan] { - private val ensureRequirements = EnsureRequirements(conf) + private val ensureRequirements = EnsureRequirements // The build side is a broadcast query stage which should have been optimized using local reader // already. So we only need to deal with probe side here. @@ -118,9 +117,6 @@ case class OptimizeLocalShuffleReader(conf: SQLConf) extends Rule[SparkPlan] { createProbeSideLocalReader(s) } } -} - -object OptimizeLocalShuffleReader { object BroadcastJoinWithShuffleLeft { def unapply(plan: SparkPlan): Option[(SparkPlan, BuildSide)] = plan match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala index a85b188727ba4..582d586c59358 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala @@ -53,9 +53,9 @@ import org.apache.spark.sql.internal.SQLConf * Note that, when this rule is enabled, it also coalesces non-skewed partitions like * `CoalesceShufflePartitions` does. */ -case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] { +object OptimizeSkewedJoin extends Rule[SparkPlan] { - private val ensureRequirements = EnsureRequirements(conf) + private val ensureRequirements = EnsureRequirements private val supportedJoinTypes = Inner :: Cross :: LeftSemi :: LeftAnti :: LeftOuter :: RightOuter :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala index 432f7e204791b..c3c7358641fcb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, import org.apache.spark.sql.internal.SQLConf case class ReuseAdaptiveSubquery( - conf: SQLConf, reuseMap: TrieMap[SparkPlan, BaseSubqueryExec]) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala index 136f7c47f5341..ef657ba35455f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.internal.SQLConf * Note that, this rule removes all the Dataset id related metadata from `AttributeReference`, so * that they don't exist after analyzer. */ -class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] { +object DetectAmbiguousSelfJoin extends Rule[LogicalPlan] { // Dataset column reference is an `AttributeReference` with 2 special metadata. private def isColumnReference(a: AttributeReference): Boolean = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala index 22f308f331449..40a2a7a2359e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.internal.SQLConf * - The ratio of the number of buckets is less than the value set in * COALESCE_BUCKETS_IN_JOIN_MAX_BUCKET_RATIO. */ -case class CoalesceBucketsInJoin(conf: SQLConf) extends Rule[SparkPlan] { +object CoalesceBucketsInJoin extends Rule[SparkPlan] { private def updateNumCoalescedBucketsInScan( plan: SparkPlan, numCoalescedBuckets: Int): SparkPlan = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala index 9b4f898df00b6..2bbd5f5d969dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala @@ -75,7 +75,7 @@ import org.apache.spark.sql.internal.SQLConf * the paper "Access Path Selection in a Relational Database Management System" * (https://dl.acm.org/doi/10.1145/582095.582099). */ -case class DisableUnnecessaryBucketedScan(conf: SQLConf) extends Rule[SparkPlan] { +object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] { /** * Disable bucketed table scan with pre-order traversal of plan. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 9d666fc3a063e..02dd4e549f93b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -58,7 +58,7 @@ import org.apache.spark.unsafe.types.UTF8String * Note that, this rule must be run after `PreprocessTableCreation` and * `PreprocessTableInsertion`. */ -case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport { +object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport { def resolver: Resolver = conf.resolver @@ -243,16 +243,16 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast * TODO: we should remove the special handling for hive tables after completely making hive as a * data source. */ -class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] { +object FindDataSourceTable extends Rule[LogicalPlan] { private def readDataSourceTable( table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = { val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table) - val catalog = sparkSession.sessionState.catalog + val catalog = SparkSession.active.sessionState.catalog val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table) catalog.getCachedPlan(qualifiedTableName, () => { val dataSource = DataSource( - sparkSession, + SparkSession.active, // In older version(prior to 2.1) of Spark, the table schema can be empty and should be // inferred at runtime. We should still support it. userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema), @@ -270,7 +270,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] extraOptions: CaseInsensitiveStringMap): StreamingRelation = { val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table) val dataSource = DataSource( - sparkSession, + SparkSession.active, className = table.provider.get, userSpecifiedSchema = Some(table.schema), options = dsOptions) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala index 28a63c26604ec..0244175f1a1bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File * This is a temporary hack for making current data source V2 work. It should be * removed when Catalog support of file data source v2 is finished. */ -class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] { +object FallBackFileSourceV2 extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case i @ InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) => @@ -42,7 +42,7 @@ class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] table.schema, None, v1FileFormat, - d.options.asScala.toMap)(sparkSession) + d.options.asScala.toMap)(SparkSession.active) i.copy(table = LogicalRelation(relation)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 5fb1a4d249070..5c46a36cf91f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -38,16 +38,16 @@ import org.apache.spark.sql.util.SchemaUtils /** * Replaces [[UnresolvedRelation]]s if the plan is for direct query on files. */ -class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { +object ResolveSQLOnFile extends Rule[LogicalPlan] { private def maybeSQLFile(u: UnresolvedRelation): Boolean = { - sparkSession.sessionState.conf.runSQLonFile && u.multipartIdentifier.size == 2 + conf.runSQLonFile && u.multipartIdentifier.size == 2 } def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case u: UnresolvedRelation if maybeSQLFile(u) => try { val dataSource = DataSource( - sparkSession, + SparkSession.active, paths = u.multipartIdentifier.last :: Nil, className = u.multipartIdentifier.head) @@ -73,9 +73,9 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { /** * Preprocess [[CreateTable]], to do some normalization and checking. */ -case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] { +object PreprocessTableCreation extends Rule[LogicalPlan] { // catalog is a def and not a val/lazy val as the latter would introduce a circular reference - private def catalog = sparkSession.sessionState.catalog + private def catalog = SparkSession.active.sessionState.catalog def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { // When we CREATE TABLE without specifying the table schema, we should fail the query if @@ -112,7 +112,6 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi } // Check if the specified data source match the data source of the existing table. - val conf = sparkSession.sessionState.conf val existingProvider = DataSource.lookupDataSource(existingTable.provider.get, conf) val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get, conf) // TODO: Check that options from the resolved relation match the relation that we are @@ -140,7 +139,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi s"(${query.schema.catalogString})") } - val resolver = sparkSession.sessionState.conf.resolver + val resolver = conf.resolver val tableCols = existingTable.schema.map(_.name) // As we are inserting into an existing table, we should respect the existing schema and @@ -245,7 +244,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi val schema = create.tableSchema val partitioning = create.partitioning val identifier = create.tableName - val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis + val isCaseSensitive = conf.caseSensitiveAnalysis // Check that columns are not duplicated in the schema val flattenedSchema = SchemaUtils.explodeNestedFieldNames(schema) SchemaUtils.checkColumnNameDuplication( @@ -266,7 +265,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi create } else { // Resolve and normalize partition columns as necessary - val resolver = sparkSession.sessionState.conf.resolver + val resolver = conf.resolver val normalizedPartitions = partitioning.map { case transform: RewritableTransform => val rewritten = transform.references().map { ref => @@ -291,7 +290,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi SchemaUtils.checkSchemaColumnNameDuplication( schema, "in the table definition of " + table.identifier, - sparkSession.sessionState.conf.caseSensitiveAnalysis) + conf.caseSensitiveAnalysis) assertNoNullTypeInSchema(schema) @@ -317,12 +316,12 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi tableName = table.identifier.unquotedString, tableCols = schema.map(_.name), partCols = table.partitionColumnNames, - resolver = sparkSession.sessionState.conf.resolver) + resolver = conf.resolver) SchemaUtils.checkColumnNameDuplication( normalizedPartitionCols, "in the partition schema", - sparkSession.sessionState.conf.resolver) + conf.resolver) if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) { if (DDLUtils.isHiveTable(table)) { @@ -351,16 +350,16 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi tableName = table.identifier.unquotedString, tableCols = schema.map(_.name), bucketSpec = bucketSpec, - resolver = sparkSession.sessionState.conf.resolver) + resolver = conf.resolver) SchemaUtils.checkColumnNameDuplication( normalizedBucketSpec.bucketColumnNames, "in the bucket definition", - sparkSession.sessionState.conf.resolver) + conf.resolver) SchemaUtils.checkColumnNameDuplication( normalizedBucketSpec.sortColumnNames, "in the sort definition", - sparkSession.sessionState.conf.resolver) + conf.resolver) normalizedBucketSpec.sortColumnNames.map(schema(_)).map(_.dataType).foreach { case dt if RowOrdering.isOrderable(dt) => // OK @@ -382,7 +381,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi * table. It also does data type casting and field renaming, to make sure that the columns to be * inserted have the correct data type and fields have the correct names. */ -case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { +object PreprocessTableInsertion extends Rule[LogicalPlan] { private def preprocess( insert: InsertIntoStatement, tblName: String, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala index 6973f55e8dca0..e1e996a857521 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala @@ -34,8 +34,7 @@ import org.apache.spark.sql.internal.SQLConf * results of broadcast. For joins that are not planned as broadcast hash joins we keep * the fallback mechanism with subquery duplicate. */ -case class PlanDynamicPruningFilters(sparkSession: SparkSession) - extends Rule[SparkPlan] with PredicateHelper { +object PlanDynamicPruningFilters extends Rule[SparkPlan] with PredicateHelper { /** * Identify the shape in which keys of a given plan are broadcasted. @@ -54,7 +53,7 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession) case DynamicPruningSubquery( value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId) => val sparkPlan = QueryExecution.createSparkPlan( - sparkSession, sparkSession.sessionState.planner, buildPlan) + SparkSession.active, SparkSession.active.sessionState.planner, buildPlan) // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is // the first to be applied (apart from `InsertAdaptiveSparkPlan`). val canReuseExchange = SQLConf.get.exchangeReuseEnabled && buildKeys.nonEmpty && @@ -67,7 +66,7 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession) }.isDefined if (canReuseExchange) { - val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, sparkPlan) + val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, sparkPlan) val mode = broadcastMode(buildKeys, executedPlan.output) // plan a broadcast exchange of the build side of the join val exchange = BroadcastExchangeExec(mode, executedPlan) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index 3641654b89b76..cf38fee055ca5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf * each operator by inserting [[ShuffleExchangeExec]] Operators where required. Also ensure that * the input partition ordering requirements are met. */ -case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { +object EnsureRequirements extends Rule[SparkPlan] { private def ensureDistributionAndOrdering(operator: SparkPlan): SparkPlan = { val requiredChildDistributions: Seq[Distribution] = operator.requiredChildDistribution diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala index c4062879c2727..aeaf59b7f0f4a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala @@ -100,7 +100,7 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan * Find out duplicated exchanges in the spark plan, then use the same exchange for all the * references. */ -case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { +object ReuseExchange extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 14cc76f0dbb78..7cf9af67aaa36 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -172,11 +172,11 @@ case class InSubqueryExec( /** * Plans subqueries that are present in the given [[SparkPlan]]. */ -case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] { +object PlanSubqueries extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { plan.transformAllExpressions { case subquery: expressions.ScalarSubquery => - val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan) + val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, subquery.plan) ScalarSubquery( SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan), subquery.exprId) @@ -190,7 +190,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] { } ) } - val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, query) + val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, query) InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId) } } @@ -200,7 +200,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] { * Find out duplicated subqueries in the spark plan, then use the same subquery result for all the * references. */ -case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { +object ReuseSubquery extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.subqueryReuseEnabled) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 4ca1ac863addc..3cef9f9df0daa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -177,19 +177,19 @@ abstract class BaseSessionStateBuilder( */ protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) { override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = - new FindDataSourceTable(session) +: - new ResolveSQLOnFile(session) +: - new FallBackFileSourceV2(session) +: + FindDataSourceTable +: + ResolveSQLOnFile +: + FallBackFileSourceV2 +: ResolveEncodersInScalaAgg +: new ResolveSessionCatalog( - catalogManager, conf, catalog.isTempView, catalog.isTempFunction) +: + catalogManager, catalog.isTempView, catalog.isTempFunction) +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - new DetectAmbiguousSelfJoin(conf) +: - PreprocessTableCreation(session) +: - PreprocessTableInsertion(conf) +: - DataSourceAnalysis(conf) +: + DetectAmbiguousSelfJoin +: + PreprocessTableCreation +: + PreprocessTableInsertion +: + DataSourceAnalysis +: customPostHocResolutionRules override val extendedCheckRules: Seq[LogicalPlan => Unit] = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala index dd95ceb59bdc4..e5f46eb9b1098 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala @@ -34,7 +34,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ override protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = { - Seq(PreprocessTableCreation(spark)) + Seq(PreprocessTableCreation) } test("CreateTableAsSelect: using top level field for partitioning") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala index d5d534eb5f878..dd2790040b9e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala @@ -27,7 +27,7 @@ class ColumnarRulesSuite extends PlanTest with SharedSparkSession { test("Idempotency of columnar rules - RowToColumnar/ColumnarToRow") { val rules = ApplyColumnarRulesAndInsertTransitions( - spark.sessionState.conf, spark.sessionState.columnarRules) + spark.sessionState.columnarRules) val plan = UnaryOp(UnaryOp(LeafOp(false), true), false) val expected = @@ -40,7 +40,7 @@ class ColumnarRulesSuite extends PlanTest with SharedSparkSession { test("Idempotency of columnar rules - ColumnarToRow/RowToColumnar") { val rules = ApplyColumnarRulesAndInsertTransitions( - spark.sessionState.conf, spark.sessionState.columnarRules) + spark.sessionState.columnarRules) val plan = UnaryOp(UnaryOp(LeafOp(true), false), true) val expected = ColumnarToRowExec( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index ca52e51c87ea7..048466b3d8637 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -342,7 +342,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildDistribution = Seq(distribution, distribution), requiredChildOrdering = Seq(Seq.empty, Seq.empty) ) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) } @@ -360,7 +360,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildDistribution = Seq(distribution, distribution), requiredChildOrdering = Seq(Seq.empty, Seq.empty) ) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.isEmpty) { fail(s"Exchange should have been added:\n$outputPlan") @@ -380,7 +380,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildDistribution = Seq(distribution, distribution), requiredChildOrdering = Seq(Seq.empty, Seq.empty) ) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.nonEmpty) { fail(s"Exchange should not have been added:\n$outputPlan") @@ -403,7 +403,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildDistribution = Seq(distribution, distribution), requiredChildOrdering = Seq(outputOrdering, outputOrdering) ) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.nonEmpty) { fail(s"No Exchanges should have been added:\n$outputPlan") @@ -418,7 +418,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val inputPlan = ShuffleExchangeExec( partitioning, DummySparkPlan(outputPartitioning = partitioning)) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 2) { fail(s"Topmost Exchange should have been eliminated:\n$outputPlan") @@ -433,7 +433,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val inputPlan = ShuffleExchangeExec( partitioning, DummySparkPlan(outputPartitioning = partitioning)) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 1) { fail(s"Topmost Exchange should not have been eliminated:\n$outputPlan") @@ -451,7 +451,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildDistribution = Seq(distribution), requiredChildOrdering = Seq(Seq.empty)) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) val shuffle = outputPlan.collect { case e: ShuffleExchangeExec => e } assert(shuffle.size === 1) assert(shuffle.head.outputPartitioning === finalPartitioning) @@ -476,7 +476,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { shuffle, shuffle) - val outputPlan = ReuseExchange(spark.sessionState.conf).apply(inputPlan) + val outputPlan = ReuseExchange.apply(inputPlan) if (outputPlan.collect { case e: ReusedExchangeExec => true }.size != 1) { fail(s"Should re-use the shuffle:\n$outputPlan") } @@ -493,7 +493,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { ShuffleExchangeExec(finalPartitioning, inputPlan), ShuffleExchangeExec(finalPartitioning, inputPlan)) - val outputPlan2 = ReuseExchange(spark.sessionState.conf).apply(inputPlan2) + val outputPlan2 = ReuseExchange.apply(inputPlan2) if (outputPlan2.collect { case e: ReusedExchangeExec => true }.size != 2) { fail(s"Should re-use the two shuffles:\n$outputPlan2") } @@ -530,7 +530,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { requiredChildOrdering = Seq(requiredOrdering), requiredChildDistribution = Seq(UnspecifiedDistribution) ) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan) + val outputPlan = EnsureRequirements.apply(inputPlan) assertDistributionRequirementsAreSatisfied(outputPlan) if (shouldHaveSort) { if (outputPlan.collect { case s: SortExec => true }.isEmpty) { @@ -691,7 +691,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val smjExec = SortMergeJoinExec( exprA :: exprA :: Nil, exprB :: exprC :: Nil, Inner, None, plan1, plan2) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(smjExec) + val outputPlan = EnsureRequirements.apply(smjExec) outputPlan match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _, _) => assert(leftKeys == Seq(exprA, exprA)) @@ -711,7 +711,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { condition = None, left = plan1, right = plan2) - val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(smjExec) + val outputPlan = EnsureRequirements.apply(smjExec) outputPlan match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala index 930935f077665..2de9d21abca82 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala @@ -141,7 +141,7 @@ abstract class RemoveRedundantProjectsSuiteBase } // Re-apply remove redundant project rule. - val rule = RemoveRedundantProjects(spark.sessionState.conf) + val rule = RemoveRedundantProjects val newExecutedPlan = rule.apply(newPlan) // The manually added ProjectExec node shouldn't be removed. assert(collectWithSubqueries(newExecutedPlan) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala index 89aee37a4246f..63964665fc81c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala @@ -99,7 +99,7 @@ class CoalesceBucketsInJoinSuite extends SQLTestUtils with SharedSparkSession { s.leftKeys, s.rightKeys, Inner, BuildLeft, None, lScan, rScan) } - val plan = CoalesceBucketsInJoin(spark.sessionState.conf)(join) + val plan = CoalesceBucketsInJoin(join) def verify(expected: Option[Int], subPlan: SparkPlan): Unit = { val coalesced = subPlan.collect { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 2d6a5da6d67f7..8782295e5d33b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -155,10 +155,10 @@ class PlanResolutionSuite extends AnalysisTest { // TODO: run the analyzer directly. val rules = Seq( CTESubstitution, - ResolveInlineTables(conf), + ResolveInlineTables, analyzer.ResolveRelations, new ResolveCatalogs(catalogManager), - new ResolveSessionCatalog(catalogManager, conf, _ == Seq("v"), _ => false), + new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false), analyzer.ResolveTables, analyzer.ResolveReferences, analyzer.ResolveSubqueryColumnAliases, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala index 38e68cd2512e7..296cbc3f3ad52 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala @@ -39,7 +39,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { // Test PartitioningCollection on the left side of join. val smjExec1 = SortMergeJoinExec( exprB :: exprA :: Nil, exprA :: exprB :: Nil, Inner, None, plan1, plan2) - EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match { + EnsureRequirements.apply(smjExec1) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) => @@ -51,7 +51,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { // Test PartitioningCollection on the right side of join. val smjExec2 = SortMergeJoinExec( exprA :: exprB :: Nil, exprB :: exprA :: Nil, Inner, None, plan2, plan1) - EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match { + EnsureRequirements.apply(smjExec2) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => @@ -64,7 +64,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { // and it should fall back to the right side. val smjExec3 = SortMergeJoinExec( exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1) - EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match { + EnsureRequirements.apply(smjExec3) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => @@ -83,7 +83,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { // Test fallback to the right side, which has HashPartitioning. val smjExec1 = SortMergeJoinExec( exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2) - EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match { + EnsureRequirements.apply(smjExec1) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) => @@ -97,7 +97,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { outputPartitioning = PartitioningCollection(Seq(HashPartitioning(exprB :: exprC :: Nil, 5)))) val smjExec2 = SortMergeJoinExec( exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan3) - EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match { + EnsureRequirements.apply(smjExec2) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => @@ -110,7 +110,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { // found, and it should fall back to the left side, which has a PartitioningCollection. val smjExec3 = SortMergeJoinExec( exprC :: exprB :: Nil, exprA :: exprB :: Nil, Inner, None, plan3, plan1) - EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match { + EnsureRequirements.apply(smjExec3) match { case SortMergeJoinExec(leftKeys, rightKeys, _, _, SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala index b6d1baf6e7902..044e9ace6243f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala @@ -91,7 +91,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils } else { df1.join(df2, joinExpression, joinType) } - val plan = EnsureRequirements(spark.sessionState.conf).apply(df3.queryExecution.sparkPlan) + val plan = EnsureRequirements.apply(df3.queryExecution.sparkPlan) assert(plan.collect { case p: T => p }.size === 1) plan } @@ -171,7 +171,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils val df4 = Seq((1, "5"), (2, "5")).toDF("key", "value") val df5 = df4.join(df3, Seq("key"), "inner") - val plan = EnsureRequirements(spark.sessionState.conf).apply(df5.queryExecution.sparkPlan) + val plan = EnsureRequirements.apply(df5.queryExecution.sparkPlan) assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1) assert(plan.collect { case p: SortMergeJoinExec => p }.size === 1) @@ -182,7 +182,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils val df1 = Seq((1, "4"), (2, "2")).toDF("key", "value") val joined = df1.join(df, Seq("key"), "inner") - val plan = EnsureRequirements(spark.sessionState.conf).apply(joined.queryExecution.sparkPlan) + val plan = EnsureRequirements.apply(joined.queryExecution.sparkPlan) assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala index e8ac09fdb634e..fcbc0da9d5551 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala @@ -107,13 +107,13 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( ShuffledHashJoinExec( leftKeys, rightKeys, joinType, BuildRight, boundCondition, left, right)), expectedAnswer, sortAnswers = true) checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( createLeftSemiPlusJoin(ShuffledHashJoinExec( leftKeys, rightKeys, leftSemiPlus, BuildRight, boundCondition, left, right))), expectedAnswer, @@ -126,13 +126,13 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( BroadcastHashJoinExec( leftKeys, rightKeys, joinType, BuildRight, boundCondition, left, right)), expectedAnswer, sortAnswers = true) checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( createLeftSemiPlusJoin(BroadcastHashJoinExec( leftKeys, rightKeys, leftSemiPlus, BuildRight, boundCondition, left, right))), expectedAnswer, @@ -145,12 +145,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( SortMergeJoinExec(leftKeys, rightKeys, joinType, boundCondition, left, right)), expectedAnswer, sortAnswers = true) checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( createLeftSemiPlusJoin(SortMergeJoinExec( leftKeys, rightKeys, leftSemiPlus, boundCondition, left, right))), expectedAnswer, @@ -162,12 +162,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { test(s"$testName using BroadcastNestedLoopJoin build left") { withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( BroadcastNestedLoopJoinExec(left, right, BuildLeft, joinType, Some(condition))), expectedAnswer, sortAnswers = true) checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( createLeftSemiPlusJoin(BroadcastNestedLoopJoinExec( left, right, BuildLeft, leftSemiPlus, Some(condition)))), expectedAnswer, @@ -178,12 +178,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { test(s"$testName using BroadcastNestedLoopJoin build right") { withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( BroadcastNestedLoopJoinExec(left, right, BuildRight, joinType, Some(condition))), expectedAnswer, sortAnswers = true) checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(left.sqlContext.sessionState.conf).apply( + EnsureRequirements.apply( createLeftSemiPlusJoin(BroadcastNestedLoopJoinExec( left, right, BuildRight, leftSemiPlus, Some(condition)))), expectedAnswer, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala index 44ab3f7d023d3..f476c15f59983 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala @@ -101,7 +101,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession { boundCondition, leftPlan, rightPlan) - EnsureRequirements(spark.sessionState.conf).apply(broadcastJoin) + EnsureRequirements.apply(broadcastJoin) } def makeShuffledHashJoin( @@ -115,7 +115,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession { side, None, leftPlan, rightPlan) val filteredJoin = boundCondition.map(FilterExec(_, shuffledHashJoin)).getOrElse(shuffledHashJoin) - EnsureRequirements(spark.sessionState.conf).apply(filteredJoin) + EnsureRequirements.apply(filteredJoin) } def makeSortMergeJoin( @@ -126,7 +126,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession { rightPlan: SparkPlan) = { val sortMergeJoin = joins.SortMergeJoinExec(leftKeys, rightKeys, Inner, boundCondition, leftPlan, rightPlan) - EnsureRequirements(spark.sessionState.conf).apply(sortMergeJoin) + EnsureRequirements.apply(sortMergeJoin) } testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin (build=left)") { _ => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala index a466e05816ad8..9f7e0a14f6a5c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala @@ -110,7 +110,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession { withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { val buildSide = if (joinType == LeftOuter) BuildRight else BuildLeft checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(spark.sessionState.conf).apply( + EnsureRequirements.apply( ShuffledHashJoinExec( leftKeys, rightKeys, joinType, buildSide, boundCondition, left, right)), expectedAnswer.map(Row.fromTuple), @@ -143,7 +143,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => - EnsureRequirements(spark.sessionState.conf).apply( + EnsureRequirements.apply( SortMergeJoinExec(leftKeys, rightKeys, joinType, boundCondition, left, right)), expectedAnswer.map(Row.fromTuple), sortAnswers = true) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala index a6c50904d395b..81ce979ef0b62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala @@ -23,12 +23,13 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, Expression, Literal} +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.execution.datasources.DataSourceAnalysis import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.sql.types.{DataType, IntegerType, StructType} -class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { +class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with SQLHelper { private var targetAttributes: Seq[Attribute] = _ private var targetPartitionSchema: StructType = _ @@ -51,19 +52,26 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } Seq(true, false).foreach { caseSensitive => - val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) + def testRule(testName: String, caseSensitive: Boolean)(func: => Unit): Unit = { + test(s"$testName (caseSensitive: $caseSensitive)") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + func + } + } + } + def cast(e: Expression, dt: DataType): Expression = { - conf.storeAssignmentPolicy match { + SQLConf.get.storeAssignmentPolicy match { case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT => - AnsiCast(e, dt, Option(conf.sessionLocalTimeZone)) + AnsiCast(e, dt, Option(SQLConf.get.sessionLocalTimeZone)) case _ => - Cast(e, dt, Option(conf.sessionLocalTimeZone)) + Cast(e, dt, Option(SQLConf.get.sessionLocalTimeZone)) } } - val rule = DataSourceAnalysis(conf) - test( - s"convertStaticPartitions only handle INSERT having at least static partitions " + - s"(caseSensitive: $caseSensitive)") { + val rule = DataSourceAnalysis + testRule( + "convertStaticPartitions only handle INSERT having at least static partitions", + caseSensitive) { intercept[AssertionError] { rule.convertStaticPartitions( sourceAttributes = Seq('e.int, 'f.int), @@ -73,7 +81,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test(s"Missing columns (caseSensitive: $caseSensitive)") { + testRule("Missing columns", caseSensitive) { // Missing columns. intercept[AnalysisException] { rule.convertStaticPartitions( @@ -84,7 +92,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test(s"Missing partitioning columns (caseSensitive: $caseSensitive)") { + testRule("Missing partitioning columns", caseSensitive) { // Missing partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( @@ -113,7 +121,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test(s"Wrong partitioning columns (caseSensitive: $caseSensitive)") { + testRule("Wrong partitioning columns", caseSensitive) { // Wrong partitioning columns. intercept[AnalysisException] { rule.convertStaticPartitions( @@ -144,9 +152,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test( - s"Static partitions need to appear before dynamic partitions" + - s" (caseSensitive: $caseSensitive)") { + testRule("Static partitions need to appear before dynamic partitions", caseSensitive) { // Static partitions need to appear before dynamic partitions. intercept[AnalysisException] { rule.convertStaticPartitions( @@ -157,7 +163,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test(s"All static partitions (caseSensitive: $caseSensitive)") { + testRule("All static partitions", caseSensitive) { if (!caseSensitive) { val nonPartitionedAttributes = Seq('e.int, 'f.int) val expected = nonPartitionedAttributes ++ @@ -195,7 +201,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll { } } - test(s"Static partition and dynamic partition (caseSensitive: $caseSensitive)") { + testRule("Static partition and dynamic partition", caseSensitive) { val nonPartitionedAttributes = Seq('e.int, 'f.int) val dynamicPartitionAttributes = Seq('g.int) val expected = diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index b9135733856a5..345f0288de4b1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -75,22 +75,22 @@ class HiveSessionStateBuilder( */ override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) { override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = - new ResolveHiveSerdeTable(session) +: - new FindDataSourceTable(session) +: - new ResolveSQLOnFile(session) +: - new FallBackFileSourceV2(session) +: + ResolveHiveSerdeTable +: + FindDataSourceTable +: + ResolveSQLOnFile +: + FallBackFileSourceV2 +: ResolveEncodersInScalaAgg +: new ResolveSessionCatalog( - catalogManager, conf, catalog.isTempView, catalog.isTempFunction) +: + catalogManager, catalog.isTempView, catalog.isTempFunction) +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = - new DetectAmbiguousSelfJoin(conf) +: - new DetermineTableStats(session) +: - RelationConversions(conf, catalog) +: - PreprocessTableCreation(session) +: - PreprocessTableInsertion(conf) +: - DataSourceAnalysis(conf) +: + DetectAmbiguousSelfJoin +: + DetermineTableStats +: + RelationConversions(catalog) +: + PreprocessTableCreation +: + PreprocessTableInsertion +: + DataSourceAnalysis +: HiveAnalysis +: customPostHocResolutionRules @@ -103,7 +103,7 @@ class HiveSessionStateBuilder( } override def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = - Seq(new PruneHiveTablePartitions(session)) + Seq(PruneHiveTablePartitions) /** * Planner that takes into account Hive-specific strategies. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 2ace96583d9cc..f91f78616abf5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} * Determine the database, serde/format and schema of the Hive serde table, according to the storage * properties. */ -class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] { +object ResolveHiveSerdeTable extends Rule[LogicalPlan] { private def determineHiveSerde(table: CatalogTable): CatalogTable = { if (table.storage.serde.nonEmpty) { table @@ -50,7 +50,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] { throw new AnalysisException("Creating bucketed Hive serde table is not supported yet.") } - val defaultStorage = HiveSerDe.getDefaultStorage(session.sessionState.conf) + val defaultStorage = HiveSerDe.getDefaultStorage(conf) val options = new HiveOptions(table.storage.properties) val fileStorage = if (options.fileFormat.isDefined) { @@ -90,7 +90,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case c @ CreateTable(t, _, query) if DDLUtils.isHiveTable(t) => // Finds the database name if the name does not exist. - val dbName = t.identifier.database.getOrElse(session.catalog.currentDatabase) + val dbName = t.identifier.database.getOrElse(SparkSession.active.catalog.currentDatabase) val table = t.copy(identifier = t.identifier.copy(database = Some(dbName))) // Determines the serde/format of Hive tables @@ -113,16 +113,15 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] { } } -class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] { +object DetermineTableStats extends Rule[LogicalPlan] { private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = { val table = relation.tableMeta val partitionCols = relation.partitionCols - val conf = session.sessionState.conf // For partitioned tables, the partition directory may be outside of the table directory. // Which is expensive to get table size. Please see how we implemented it in the AnalyzeTable. val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && partitionCols.isEmpty) { try { - val hadoopConf = session.sessionState.newHadoopConf() + val hadoopConf = SparkSession.active.sessionState.newHadoopConf() val tablePath = new Path(table.location) val fs: FileSystem = tablePath.getFileSystem(hadoopConf) fs.getContentSummary(tablePath).getLength @@ -191,7 +190,6 @@ object HiveAnalysis extends Rule[LogicalPlan] { * `PreprocessTableCreation`, `PreprocessTableInsertion`, `DataSourceAnalysis` and `HiveAnalysis`. */ case class RelationConversions( - conf: SQLConf, sessionCatalog: HiveSessionCatalog) extends Rule[LogicalPlan] { private def isConvertible(relation: HiveTableRelation): Boolean = { isConvertible(relation.tableMeta) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala index f6aff10cbc147..50ced7870d9ed 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.DataSourceStrategy -import org.apache.spark.sql.internal.SQLConf /** * Prune hive table partitions using partition filters on [[HiveTableRelation]]. The pruned @@ -40,10 +39,10 @@ import org.apache.spark.sql.internal.SQLConf * * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source. */ -private[sql] class PruneHiveTablePartitions(session: SparkSession) - extends Rule[LogicalPlan] with CastSupport with PredicateHelper { +private[sql] class PruneHiveTablePartitions - override val conf: SQLConf = session.sessionState.conf +private[sql] object PruneHiveTablePartitions + extends Rule[LogicalPlan] with CastSupport with PredicateHelper { /** * Extract the partition filters from the filters on the table. @@ -65,11 +64,11 @@ private[sql] class PruneHiveTablePartitions(session: SparkSession) relation: HiveTableRelation, partitionFilters: ExpressionSet): Seq[CatalogTablePartition] = { if (conf.metastorePartitionPruning) { - session.sessionState.catalog.listPartitionsByFilter( + SparkSession.active.sessionState.catalog.listPartitionsByFilter( relation.tableMeta.identifier, partitionFilters.toSeq) } else { ExternalCatalogUtils.prunePartitionsByFilter(relation.tableMeta, - session.sessionState.catalog.listPartitions(relation.tableMeta.identifier), + SparkSession.active.sessionState.catalog.listPartitions(relation.tableMeta.identifier), partitionFilters.toSeq, conf.sessionLocalTimeZone) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala index 018df35403be5..6b35928067b50 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala @@ -29,7 +29,7 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneHiveTablePartitions", Once, - EliminateSubqueryAliases, new PruneHiveTablePartitions(spark)) :: Nil + EliminateSubqueryAliases, PruneHiveTablePartitions) :: Nil } test("SPARK-15616: statistics pruned after going through PruneHiveTablePartitions") { From 281f99c70b2fab2839495638d07acc1e534e5ad6 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Tue, 27 Oct 2020 22:53:05 +0900 Subject: [PATCH 0335/1009] [SPARK-33225][SQL] Extract AliasHelper trait ### What changes were proposed in this pull request? Extract methods related to handling Aliases to a trait. ### Why are the changes needed? Avoid code duplication ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UTs cover this Closes #30134 from tanelk/SPARK-33225_aliasHelper. Lead-authored-by: tanel.kiis@gmail.com Co-authored-by: Tanel Kiis Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/analysis/Analyzer.scala | 43 ++------ .../catalyst/expressions/AliasHelper.scala | 100 ++++++++++++++++++ .../sql/catalyst/expressions/predicates.scala | 31 ++---- .../sql/catalyst/optimizer/Optimizer.scala | 46 +------- .../optimizer/PushDownLeftSemiAntiJoin.scala | 4 +- .../sql/catalyst/optimizer/subquery.scala | 4 +- 6 files changed, 125 insertions(+), 103 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 39816f499944b..52c96f4a8f014 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2193,7 +2193,7 @@ class Analyzer( * those in a HAVING clause or ORDER BY clause. These expressions are pushed down to the * underlying aggregate operator and then projected away after the original operator. */ - object ResolveAggregateFunctions extends Rule[LogicalPlan] { + object ResolveAggregateFunctions extends Rule[LogicalPlan] with AliasHelper { def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { // Resolve aggregate with having clause to Filter(..., Aggregate()). Note, to avoid wrongly // resolve the having condition expression, here we skip resolving it in ResolveReferences @@ -2226,8 +2226,7 @@ class Analyzer( // Aggregate. checkAnalysis(resolvedAggregate) - val originalAggExprs = aggregate.aggregateExpressions.map( - CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + val originalAggExprs = aggregate.aggregateExpressions.map(trimNonTopLevelAliases) // If the ordering expression is same with original aggregate expression, we don't need // to push down this ordering expression and can reference the original aggregate @@ -2370,7 +2369,7 @@ class Analyzer( case _ => false }.isDefined } } - CleanupAliases.trimNonTopLevelAliases(expr) match { + trimNonTopLevelAliases(expr) match { case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g) case Alias(g: Generator, _) => hasInnerGenerator(g) case MultiAlias(g: Generator, _) => hasInnerGenerator(g) @@ -2440,7 +2439,7 @@ class Analyzer( val projectExprs = Array.ofDim[NamedExpression](aggList.length) val newAggList = aggList - .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + .map(trimNonTopLevelAliases) .zipWithIndex .flatMap { case (AliasedGenerator(generator, names, outer), idx) => @@ -2483,7 +2482,7 @@ class Analyzer( var resolvedGenerator: Generate = null val newProjectList = projectList - .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + .map(trimNonTopLevelAliases) .flatMap { case AliasedGenerator(generator, names, outer) if generator.childrenResolved => // It's a sanity check, this should not happen as the previous case will throw @@ -3495,45 +3494,23 @@ object EliminateUnions extends Rule[LogicalPlan] { * are not in its `children`, e.g. `RuntimeReplaceable`, the transformation for Aliases in this * rule can't work for those parameters. */ -object CleanupAliases extends Rule[LogicalPlan] { - def trimAliases(e: Expression): Expression = { - e.transformDown { - case Alias(child, _) => child - case MultiAlias(child, _) => child - } - } - - def trimNonTopLevelAliases(e: Expression): Expression = e match { - case a: Alias => - a.copy(child = trimAliases(a.child))( - exprId = a.exprId, - qualifier = a.qualifier, - explicitMetadata = Some(a.metadata)) - case a: MultiAlias => - a.copy(child = trimAliases(a.child)) - case other => trimAliases(other) - } - +object CleanupAliases extends Rule[LogicalPlan] with AliasHelper { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case Project(projectList, child) => - val cleanedProjectList = - projectList.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + val cleanedProjectList = projectList.map(trimNonTopLevelAliases) Project(cleanedProjectList, child) case Aggregate(grouping, aggs, child) => - val cleanedAggs = aggs.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression]) + val cleanedAggs = aggs.map(trimNonTopLevelAliases) Aggregate(grouping.map(trimAliases), cleanedAggs, child) case Window(windowExprs, partitionSpec, orderSpec, child) => - val cleanedWindowExprs = - windowExprs.map(e => trimNonTopLevelAliases(e).asInstanceOf[NamedExpression]) + val cleanedWindowExprs = windowExprs.map(trimNonTopLevelAliases) Window(cleanedWindowExprs, partitionSpec.map(trimAliases), orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child) case CollectMetrics(name, metrics, child) => - val cleanedMetrics = metrics.map { - e => trimNonTopLevelAliases(e).asInstanceOf[NamedExpression] - } + val cleanedMetrics = metrics.map(trimNonTopLevelAliases) CollectMetrics(name, cleanedMetrics, child) // Operators that operate on objects should only have expressions from encoders, which should diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala new file mode 100644 index 0000000000000..ec47875754a6f --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.catalyst.analysis.MultiAlias +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project} + +/** + * Helper methods for collecting and replacing aliases. + */ +trait AliasHelper { + + protected def getAliasMap(plan: Project): AttributeMap[Alias] = { + // Create a map of Aliases to their values from the child projection. + // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> Alias(a + b, c)). + getAliasMap(plan.projectList) + } + + protected def getAliasMap(plan: Aggregate): AttributeMap[Alias] = { + // Find all the aliased expressions in the aggregate list that don't include any actual + // AggregateExpression or PythonUDF, and create a map from the alias to the expression + val aliasMap = plan.aggregateExpressions.collect { + case a: Alias if a.child.find(e => e.isInstanceOf[AggregateExpression] || + PythonUDF.isGroupedAggPandasUDF(e)).isEmpty => + (a.toAttribute, a) + } + AttributeMap(aliasMap) + } + + protected def getAliasMap(exprs: Seq[NamedExpression]): AttributeMap[Alias] = { + // Create a map of Aliases to their values from the child projection. + // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> Alias(a + b, c)). + AttributeMap(exprs.collect { case a: Alias => (a.toAttribute, a) }) + } + + /** + * Replace all attributes, that reference an alias, with the aliased expression + */ + protected def replaceAlias( + expr: Expression, + aliasMap: AttributeMap[Alias]): Expression = { + // Use transformUp to prevent infinite recursion when the replacement expression + // redefines the same ExprId, + trimAliases(expr.transformUp { + case a: Attribute => aliasMap.getOrElse(a, a) + }) + } + + /** + * Replace all attributes, that reference an alias, with the aliased expression, + * but keep the name of the outmost attribute. + */ + protected def replaceAliasButKeepName( + expr: NamedExpression, + aliasMap: AttributeMap[Alias]): NamedExpression = { + // Use transformUp to prevent infinite recursion when the replacement expression + // redefines the same ExprId, + trimNonTopLevelAliases(expr.transformUp { + case a: Attribute => aliasMap.getOrElse(a, a) + }).asInstanceOf[NamedExpression] + } + + protected def trimAliases(e: Expression): Expression = { + e.transformDown { + case Alias(child, _) => child + case MultiAlias(child, _) => child + } + } + + protected def trimNonTopLevelAliases[T <: Expression](e: T): T = { + val res = e match { + case a: Alias => + a.copy(child = trimAliases(a.child))( + exprId = a.exprId, + qualifier = a.qualifier, + explicitMetadata = Some(a.metadata)) + case a: MultiAlias => + a.copy(child = trimAliases(a.child)) + case other => trimAliases(other) + } + + res.asInstanceOf[T] + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 1f55045dbca74..f440534745ba1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -97,7 +97,7 @@ object Predicate extends CodeGeneratorWithInterpretedFallback[Expression, BasePr } } -trait PredicateHelper extends Logging { +trait PredicateHelper extends AliasHelper with Logging { protected def splitConjunctivePredicates(condition: Expression): Seq[Expression] = { condition match { case And(cond1, cond2) => @@ -117,18 +117,13 @@ trait PredicateHelper extends Logging { plan: LogicalPlan): Option[(Expression, LogicalPlan)] = { plan match { - case Project(projectList, child) => - val aliases = AttributeMap(projectList.collect { - case a @ Alias(child, _) => (a.toAttribute, child) - }) - findExpressionAndTrackLineageDown(replaceAlias(exp, aliases), child) + case p: Project => + val aliases = getAliasMap(p) + findExpressionAndTrackLineageDown(replaceAlias(exp, aliases), p.child) // we can unwrap only if there are row projections, and no aggregation operation - case Aggregate(_, aggregateExpressions, child) => - val aliasMap = AttributeMap(aggregateExpressions.collect { - case a: Alias if a.child.find(_.isInstanceOf[AggregateExpression]).isEmpty => - (a.toAttribute, a.child) - }) - findExpressionAndTrackLineageDown(replaceAlias(exp, aliasMap), child) + case a: Aggregate => + val aliasMap = getAliasMap(a) + findExpressionAndTrackLineageDown(replaceAlias(exp, aliasMap), a.child) case l: LeafNode if exp.references.subsetOf(l.outputSet) => Some((exp, l)) case other => @@ -150,18 +145,6 @@ trait PredicateHelper extends Logging { } } - // Substitute any known alias from a map. - protected def replaceAlias( - condition: Expression, - aliases: AttributeMap[Expression]): Expression = { - // Use transformUp to prevent infinite recursion when the replacement expression - // redefines the same ExprId, - condition.transformUp { - case a: Attribute => - aliases.getOrElse(a, a) - } - } - /** * Returns true if `expr` can be evaluated using only the output of `plan`. This method * can be used to determine when it is acceptable to move expression evaluation within a query diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 3e9a97419682d..f3f64031843e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -729,7 +729,7 @@ object ColumnPruning extends Rule[LogicalPlan] { * and the upper project consists of the same number of columns which is equal or aliasing. * `GlobalLimit(LocalLimit)` pattern is also considered. */ -object CollapseProject extends Rule[LogicalPlan] { +object CollapseProject extends Rule[LogicalPlan] with AliasHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p1 @ Project(_, p2: Project) => @@ -758,17 +758,9 @@ object CollapseProject extends Rule[LogicalPlan] { s.copy(child = p2.copy(projectList = buildCleanedProjectList(l1, p2.projectList))) } - private def collectAliases(projectList: Seq[NamedExpression]): AttributeMap[Alias] = { - AttributeMap(projectList.collect { - case a: Alias => a.toAttribute -> a - }) - } - private def haveCommonNonDeterministicOutput( upper: Seq[NamedExpression], lower: Seq[NamedExpression]): Boolean = { - // Create a map of Aliases to their values from the lower projection. - // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)). - val aliases = collectAliases(lower) + val aliases = getAliasMap(lower) // Collapse upper and lower Projects if and only if their overlapped expressions are all // deterministic. @@ -780,21 +772,8 @@ object CollapseProject extends Rule[LogicalPlan] { private def buildCleanedProjectList( upper: Seq[NamedExpression], lower: Seq[NamedExpression]): Seq[NamedExpression] = { - // Create a map of Aliases to their values from the lower projection. - // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)). - val aliases = collectAliases(lower) - - // Substitute any attributes that are produced by the lower projection, so that we safely - // eliminate it. - // e.g., 'SELECT c + 1 FROM (SELECT a + b AS C ...' produces 'SELECT a + b + 1 ...' - // Use transformUp to prevent infinite recursion. - val rewrittenUpper = upper.map(_.transformUp { - case a: Attribute => aliases.getOrElse(a, a) - }) - // collapse upper and lower Projects may introduce unnecessary Aliases, trim them here. - rewrittenUpper.map { p => - CleanupAliases.trimNonTopLevelAliases(p).asInstanceOf[NamedExpression] - } + val aliases = getAliasMap(lower) + upper.map(replaceAliasButKeepName(_, aliases)) } private def isRenaming(list1: Seq[NamedExpression], list2: Seq[NamedExpression]): Boolean = { @@ -1271,23 +1250,6 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe } } - def getAliasMap(plan: Project): AttributeMap[Expression] = { - // Create a map of Aliases to their values from the child projection. - // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b). - AttributeMap(plan.projectList.collect { case a: Alias => (a.toAttribute, a.child) }) - } - - def getAliasMap(plan: Aggregate): AttributeMap[Expression] = { - // Find all the aliased expressions in the aggregate list that don't include any actual - // AggregateExpression or PythonUDF, and create a map from the alias to the expression - val aliasMap = plan.aggregateExpressions.collect { - case a: Alias if a.child.find(e => e.isInstanceOf[AggregateExpression] || - PythonUDF.isGroupedAggPandasUDF(e)).isEmpty => - (a.toAttribute, a.child) - } - AttributeMap(aliasMap) - } - def canPushThrough(p: UnaryNode): Boolean = p match { // Note that some operators (e.g. project, aggregate, union) are being handled separately // (earlier in this rule). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala index 606db85fcdea6..50fe0192d6f26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala @@ -42,7 +42,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper { // No join condition, just push down the Join below Project p.copy(child = Join(gChild, rightOp, joinType, joinCond, hint)) } else { - val aliasMap = PushPredicateThroughNonJoin.getAliasMap(p) + val aliasMap = getAliasMap(p) val newJoinCond = if (aliasMap.nonEmpty) { Option(replaceAlias(joinCond.get, aliasMap)) } else { @@ -55,7 +55,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper { case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(_), _, _) if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty && !agg.aggregateExpressions.exists(ScalarSubquery.hasCorrelatedScalarSubquery) => - val aliasMap = PushPredicateThroughNonJoin.getAliasMap(agg) + val aliasMap = getAliasMap(agg) val canPushDownPredicate = (predicate: Expression) => { val replaced = replaceAlias(predicate, aliasMap) predicate.references.nonEmpty && diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index f184253ef0595..cb076f6e35184 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -335,7 +335,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper /** * This rule rewrites correlated [[ScalarSubquery]] expressions into LEFT OUTER joins. */ -object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { +object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelper { /** * Extract all correlated scalar subqueries from an expression. The subqueries are collected using * the given collector. The expression is rewritten and returned. @@ -357,7 +357,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { */ private def tryEvalExpr(expr: Expression): Expression = { // Removes Alias over given expression, because Alias is not foldable. - if (!CleanupAliases.trimAliases(expr).foldable) { + if (!trimAliases(expr).foldable) { // SPARK-28441: Some expressions, like PythonUDF, can't be statically evaluated. // Needs to evaluate them on query runtime. expr From f284218dae23bf91e72e221943188cdb85e13dac Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Tue, 27 Oct 2020 15:04:53 +0000 Subject: [PATCH 0336/1009] [SPARK-33137][SQL] Support ALTER TABLE in JDBC v2 Table Catalog: update type and nullability of columns (Postgres dialect) ### What changes were proposed in this pull request? Override the default SQL strings in Postgres Dialect for: - ALTER TABLE UPDATE COLUMN TYPE - ALTER TABLE UPDATE COLUMN NULLABILITY Add new docker integration test suite `jdbc/v2/PostgreSQLIntegrationSuite.scala` ### Why are the changes needed? supports Postgres specific ALTER TABLE syntax. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new test `PostgreSQLIntegrationSuite` Closes #30089 from huaxingao/postgres_docker. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 2 +- .../jdbc/v2/PostgresIntegrationSuite.scala | 69 +++++++++++++++++++ .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 21 ++++++ .../spark/sql/jdbc/PostgresDialect.scala | 16 +++++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 82f9f978c5da2..5c1442283aaed 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 - * ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite" + * ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala new file mode 100644 index 0000000000000..45994a5093748 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +/** + * To run this test suite for a specific version (e.g., postgres:13.0): + * {{{ + * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite" + * }}} + */ +@DockerTest +class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { + override val catalogName: String = "postgresql" + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine") + override val env = Map( + "POSTGRES_PASSWORD" -> "rootpass" + ) + override val usesIpc = false + override val jdbcPort = 5432 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" + } + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName) + .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort)) + override def dataPreparation(conn: Connection): Unit = {} + + override def testUpdateColumnType(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING") + t = spark.table(tbl) + expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + // Update column type from STRING to INTEGER + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER") + }.getMessage + assert(msg.contains("Cannot update alt_table field ID: string cannot be cast to int")) + } +} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 942c6237fd358..8419db7784e88 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -73,6 +73,27 @@ trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } + test("SPARK-33034: ALTER TABLE ... drop column") { + withTable(s"$catalogName.alt_table") { + sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER) USING _") + sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1") + sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3") + val t = spark.table(s"$catalogName.alt_table") + val expectedSchema = new StructType().add("C2", StringType) + assert(t.schema === expectedSchema) + // Drop not existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column") + }.getMessage + assert(msg.contains("Cannot delete missing field bad_column in alt_table schema")) + } + // Drop a column from a not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.not_existing_table DROP COLUMN C1") + }.getMessage + assert(msg.contains("Table not found")) + } + test("SPARK-33034: ALTER TABLE ... update column type") { withTable(s"$catalogName.alt_table") { testUpdateColumnType(s"$catalogName.alt_table") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index a1ce25a0464c3..ee8cbed1ff7a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -126,4 +126,20 @@ private object PostgresDialect extends JdbcDialect { } } + // See https://www.postgresql.org/docs/12/sql-altertable.html + override def getUpdateColumnTypeQuery( + tableName: String, + columnName: String, + newDataType: String): String = { + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} TYPE $newDataType" + } + + // See https://www.postgresql.org/docs/12/sql-altertable.html + override def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL" + s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable" + } } From 98f0a219915dc9ed696602b9bfad82d9cf6c4113 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 27 Oct 2020 11:54:08 -0700 Subject: [PATCH 0337/1009] [SPARK-33231][SPARK-33262][CORE] Make pod allocation executor timeouts configurable & allow scheduling with pending pods ### What changes were proposed in this pull request? Make pod allocation executor timeouts configurable. Keep all known pods in mind when allocating executors to avoid over-allocating if the pending time is much higher than the allocation interval. This PR increases the default wait time to 600s from the current 60s. Since nodes can now remain "pending" for long periods of time, we allow additional batches to be scheduled during pending allocation but keep the total number of pods in account. ### Why are the changes needed? The current executor timeouts do not match that of all real world clusters especially under load. While this can be worked around by increasing the allocation batch delay, that will decrease the speed at which the total number of executors will be able to be requested. The increase in default timeout is needed to handle real-world testing environments I've encountered on moderately busy clusters and K8s clusters with their own underlying dynamic scale-up of hardware (e.g. GKE, EKS, etc.) ### Does this PR introduce _any_ user-facing change? Yes new configuration property ### How was this patch tested? Updated existing test to use the timeout from the new configuration property. Verified test failed without the update. Closes #30155 from holdenk/SPARK-33231-make-pod-creation-timeout-configurable. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/deploy/k8s/Config.scala | 8 ++++++++ .../scheduler/cluster/k8s/ExecutorPodsAllocator.scala | 9 +++++---- .../cluster/k8s/ExecutorPodsAllocatorSuite.scala | 4 +++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index d399f66b45981..e3af1ccc24f1c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -227,6 +227,14 @@ private[spark] object Config extends Logging { .checkValue(value => value > 0, "Allocation batch delay must be a positive time value.") .createWithDefaultString("1s") + val KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT = + ConfigBuilder("spark.kubernetes.allocation.executor.timeout") + .doc("Time to wait before considering a pending executor timedout.") + .version("3.1.0") + .timeConf(TimeUnit.MILLISECONDS) + .checkValue(value => value > 0, "Allocation executor timeout must be a positive time value.") + .createWithDefaultString("600s") + val KUBERNETES_EXECUTOR_LOST_REASON_CHECK_MAX_ATTEMPTS = ConfigBuilder("spark.kubernetes.executor.lostCheck.maxAttempts") .doc("Maximum number of attempts allowed for checking the reason of an executor loss " + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala index 5e09de37f2848..4e8ca47b8dd02 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala @@ -51,7 +51,9 @@ private[spark] class ExecutorPodsAllocator( private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) - private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000) + private val podCreationTimeout = math.max( + podAllocationDelay * 5, + conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT)) private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000 @@ -214,10 +216,9 @@ private[spark] class ExecutorPodsAllocator( } if (newlyCreatedExecutors.isEmpty - && currentPendingExecutors.isEmpty - && currentRunningCount < currentTotalExpectedExecutors) { + && knownPodCount < currentTotalExpectedExecutors) { val numExecutorsToAllocate = math.min( - currentTotalExpectedExecutors - currentRunningCount, podAllocationSize) + currentTotalExpectedExecutors - knownPodCount, podAllocationSize) logInfo(s"Going to request $numExecutorsToAllocate executors from Kubernetes.") for ( _ <- 0 until numExecutorsToAllocate) { val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala index c1c33b2a0f199..84c07bc588b06 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala @@ -56,8 +56,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE) private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) - private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000L) private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000 + private val podCreationTimeout = math.max(podAllocationDelay * 5, + conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT)) + private val secMgr = new SecurityManager(conf) private var waitForExecutorPodsClock: ManualClock = _ From 3f2a2b5fe6ada37ef86f00737387e6cf2496df74 Mon Sep 17 00:00:00 2001 From: Ankur Dave Date: Tue, 27 Oct 2020 13:20:22 -0700 Subject: [PATCH 0338/1009] [SPARK-33260][SQL] Fix incorrect results from SortExec when sortOrder is Stream ### What changes were proposed in this pull request? The following query produces incorrect results. The query has two essential features: (1) it contains a string aggregate, resulting in a `SortExec` node, and (2) it contains a duplicate grouping key, causing `RemoveRepetitionFromGroupExpressions` to produce a sort order stored as a `Stream`. ```sql SELECT bigint_col_1, bigint_col_9, MAX(CAST(bigint_col_1 AS string)) FROM table_4 GROUP BY bigint_col_1, bigint_col_9, bigint_col_9 ``` When the sort order is stored as a `Stream`, the line `ordering.map(_.child.genCode(ctx))` in `GenerateOrdering#createOrderKeys()` produces unpredictable side effects to `ctx`. This is because `genCode(ctx)` modifies `ctx`. When ordering is a `Stream`, the modifications will not happen immediately as intended, but will instead occur lazily when the returned `Stream` is used later. Similar bugs have occurred at least three times in the past: https://issues.apache.org/jira/browse/SPARK-24500, https://issues.apache.org/jira/browse/SPARK-25767, https://issues.apache.org/jira/browse/SPARK-26680. The fix is to check if `ordering` is a `Stream` and force the modifications to happen immediately if so. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a unit test for `SortExec` where `sortOrder` is a `Stream`. The test previously failed and now passes. Closes #30160 from ankurdave/SPARK-33260. Authored-by: Ankur Dave Signed-off-by: Dongjoon Hyun --- .../expressions/codegen/GenerateOrdering.scala | 4 +++- .../org/apache/spark/sql/execution/SortSuite.scala | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 63bd59e7628b2..5d00519d27c53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -71,7 +71,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], BaseOrdering] with ctx.INPUT_ROW = row // to use INPUT_ROW we must make sure currentVars is null ctx.currentVars = null - ordering.map(_.child.genCode(ctx)) + // SPARK-33260: To avoid unpredictable modifications to `ctx` when `ordering` is a Stream, we + // use `toIndexedSeq` to make the transformation eager. + ordering.toIndexedSeq.map(_.child.genCode(ctx)) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index 7654a9d982059..6a4f3f62641f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -97,6 +97,19 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { } } + test("SPARK-33260: sort order is a Stream") { + val input = Seq( + ("Hello", 4, 2.0), + ("Hello", 1, 1.0), + ("World", 8, 3.0) + ) + checkAnswer( + input.toDF("a", "b", "c"), + (child: SparkPlan) => SortExec(Stream('a.asc, 'b.asc, 'c.asc), global = true, child = child), + input.sortBy(t => (t._1, t._2, t._3)).map(Row.fromTuple), + sortAnswers = false) + } + // Test sorting on different data types for ( dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); From 7d11d972c356140d21909c6a62cdb8d813bd015e Mon Sep 17 00:00:00 2001 From: Stuart White Date: Wed, 28 Oct 2020 08:36:14 +0900 Subject: [PATCH 0339/1009] [SPARK-33246][SQL][DOCS] Correct documentation for null semantics of "NULL AND False" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? The documentation of the Spark SQL null semantics states that "NULL AND False" yields NULL. This is incorrect. "NULL AND False" yields False. ``` Seq[(java.lang.Boolean, java.lang.Boolean)]( (null, false) ) .toDF("left_operand", "right_operand") .withColumn("AND", 'left_operand && 'right_operand) .show(truncate = false) +------------+-------------+-----+ |left_operand|right_operand|AND | +------------+-------------+-----+ |null |false |false| +------------+-------------+-----+ ``` I propose the documentation be updated to reflect that "NULL AND False" yields False. This contribution is my original work and I license it to the project under the project’s open source license. ### Why are the changes needed? This change improves the accuracy of the documentation. ### Does this PR introduce _any_ user-facing change? Yes. This PR introduces a fix to the documentation. ### How was this patch tested? Since this is only a documentation change, no tests were added. Closes #30161 from stwhit/SPARK-33246. Authored-by: Stuart White Signed-off-by: Takeshi Yamamuro --- docs/sql-ref-null-semantics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md index fb5d2a312d0e1..3c12e7a28b64e 100644 --- a/docs/sql-ref-null-semantics.md +++ b/docs/sql-ref-null-semantics.md @@ -125,7 +125,7 @@ The following tables illustrate the behavior of logical operators when one or bo |True|NULL|True|NULL| |False|NULL|NULL|False| |NULL|True|True|NULL| -|NULL|False|NULL|NULL| +|NULL|False|NULL|False| |NULL|NULL|NULL|NULL| |operand|NOT| From ea709d67486dd6329977df6c3ed7a443b835dd48 Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 28 Oct 2020 09:46:13 +0900 Subject: [PATCH 0340/1009] [SPARK-33258][R][SQL] Add asc_nulls_* and desc_nulls_* methods to SparkR ### What changes were proposed in this pull request? This PR adds the following `Column` methods to R API: - asc_nulls_first - asc_nulls_last - desc_nulls_first - desc_nulls_last ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? No, new methods. ### How was this patch tested? New unit tests. Closes #30159 from zero323/SPARK-33258. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 4 ++++ R/pkg/R/column.R | 6 +++++- R/pkg/R/generics.R | 12 ++++++++++++ R/pkg/tests/fulltests/test_sparkSQL.R | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index a9cca4bf6f6fc..404a6968ea429 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -228,6 +228,8 @@ exportMethods("%<=>%", "arrays_zip", "arrays_zip_with", "asc", + "asc_nulls_first", + "asc_nulls_last", "ascii", "asin", "assert_true", @@ -273,6 +275,8 @@ exportMethods("%<=>%", "degrees", "dense_rank", "desc", + "desc_nulls_first", + "desc_nulls_last", "dropFields", "element_at", "encode", diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index c5fcfaff94029..835178990b485 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -67,7 +67,11 @@ operators <- list( # we can not override `&&` and `||`, so use `&` and `|` instead "&" = "and", "|" = "or", "^" = "pow" ) -column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") +column_functions1 <- c( + "asc", "asc_nulls_first", "asc_nulls_last", + "desc", "desc_nulls_first", "desc_nulls_last", + "isNaN", "isNull", "isNotNull" +) column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6b732e594cd3f..e372ae27e315a 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -675,6 +675,12 @@ setGeneric("broadcast", function(x) { standardGeneric("broadcast") }) #' @rdname columnfunctions setGeneric("asc", function(x) { standardGeneric("asc") }) +#' @rdname columnfunctions +setGeneric("asc_nulls_first", function(x) { standardGeneric("asc_nulls_first") }) + +#' @rdname columnfunctions +setGeneric("asc_nulls_last", function(x) { standardGeneric("asc_nulls_last") }) + #' @rdname between setGeneric("between", function(x, bounds) { standardGeneric("between") }) @@ -689,6 +695,12 @@ setGeneric("contains", function(x, ...) { standardGeneric("contains") }) #' @rdname columnfunctions setGeneric("desc", function(x) { standardGeneric("desc") }) +#' @rdname columnfunctions +setGeneric("desc_nulls_first", function(x) { standardGeneric("desc_nulls_first") }) + +#' @rdname columnfunctions +setGeneric("desc_nulls_last", function(x) { standardGeneric("desc_nulls_last") }) + #' @rdname endsWith setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 077dfc6770d94..685e6e672bdf9 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1428,6 +1428,8 @@ test_that("column functions", { vector_to_array(c, "float32") + vector_to_array(c, "float64") c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) + nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE) + c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) + + desc_nulls_first(c1) + desc_nulls_last(c1) # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) From c2bea045e3628081bca1ba752669a5bc009ebd00 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 28 Oct 2020 11:21:35 +0900 Subject: [PATCH 0341/1009] [SPARK-33264][SQL][DOCS] Add a dedicated page for SQL-on-file in SQL documents ### What changes were proposed in this pull request? This PR intends to add a dedicated page for SQL-on-file in SQL documents. This comes from the comment: https://github.com/apache/spark/pull/30095/files#r508965149 ### Why are the changes needed? For better documentations. ### Does this PR introduce _any_ user-facing change? Screen Shot 2020-10-28 at 9 56 59 ### How was this patch tested? N/A Closes #30165 from maropu/DocForFile. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- docs/_data/menu-sql.yaml | 2 + docs/sql-ref-syntax-qry-select-file.md | 76 ++++++++++++++++++++++++++ docs/sql-ref-syntax-qry-select.md | 3 +- docs/sql-ref-syntax-qry.md | 1 + docs/sql-ref-syntax.md | 1 + 5 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 docs/sql-ref-syntax-qry-select-file.md diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 63f6b4a0a204b..2207bd6a17656 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -175,6 +175,8 @@ url: sql-ref-syntax-qry-select-hints.html - text: Inline Table url: sql-ref-syntax-qry-select-inline-table.html + - text: File + url: sql-ref-syntax-qry-select-file.html - text: JOIN url: sql-ref-syntax-qry-select-join.html - text: LIKE Predicate diff --git a/docs/sql-ref-syntax-qry-select-file.md b/docs/sql-ref-syntax-qry-select-file.md new file mode 100644 index 0000000000000..c3dc406ee79e6 --- /dev/null +++ b/docs/sql-ref-syntax-qry-select-file.md @@ -0,0 +1,76 @@ +--- +layout: global +title: File +displayTitle: File +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +### Description + +You can query a file with a specified format directly with SQL. + +### Syntax + +```sql +file_format.`file_path` +``` + +### Parameters + +* **file_format** + + Specifies a file format for a given file path, could be TEXTFILE, ORC, PARQUET, etc. + +* **file_path** + + Specifies a file path with a given format. + +### Examples + +```sql +-- PARQUET file +SELECT * FROM parquet.`examples/src/main/resources/users.parquet`; ++------+--------------+----------------+ +| name|favorite_color|favorite_numbers| ++------+--------------+----------------+ +|Alyssa| null| [3, 9, 15, 20]| +| Ben| red| []| ++------+--------------+----------------+ + +-- ORC file +SELECT * FROM orc.`examples/src/main/resources/users.orc`; ++------+--------------+----------------+ +| name|favorite_color|favorite_numbers| ++------+--------------+----------------+ +|Alyssa| null| [3, 9, 15, 20]| +| Ben| red| []| ++------+--------------+----------------+ + +-- JSON file +SELECT * FROM json.`examples/src/main/resources/people.json`; ++----+-------+ +| age| name| ++----+-------+ +|null|Michael| +| 30| Andy| +| 19| Justin| ++----+-------+ +``` + +### Related Statements + +* [SELECT](sql-ref-syntax-qry-select.html) diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md index 655766d4c6d22..bac7c2bc6a06d 100644 --- a/docs/sql-ref-syntax-qry-select.md +++ b/docs/sql-ref-syntax-qry-select.md @@ -85,7 +85,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] } * [Table-value function](sql-ref-syntax-qry-select-tvf.html) * [Inline table](sql-ref-syntax-qry-select-inline-table.html) * Subquery - * [File](sql-data-sources-load-save-functions.html#run-sql-on-files-directly) + * [File](sql-ref-syntax-qry-select-file.html) * **PIVOT** @@ -165,6 +165,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] } * [Common Table Expression](sql-ref-syntax-qry-select-cte.html) * [Hints](sql-ref-syntax-qry-select-hints.html) * [Inline Table](sql-ref-syntax-qry-select-inline-table.html) +* [File](sql-ref-syntax-qry-select-file.html) * [JOIN](sql-ref-syntax-qry-select-join.html) * [LIKE Predicate](sql-ref-syntax-qry-select-like.html) * [Set Operators](sql-ref-syntax-qry-select-setops.html) diff --git a/docs/sql-ref-syntax-qry.md b/docs/sql-ref-syntax-qry.md index d55ea43d15036..6751b90e12443 100644 --- a/docs/sql-ref-syntax-qry.md +++ b/docs/sql-ref-syntax-qry.md @@ -39,6 +39,7 @@ ability to generate logical and physical plan for a given query using * [Common Table Expression](sql-ref-syntax-qry-select-cte.html) * [Hints](sql-ref-syntax-qry-select-hints.html) * [Inline Table](sql-ref-syntax-qry-select-inline-table.html) + * [File](sql-ref-syntax-qry-select-file.html) * [JOIN](sql-ref-syntax-qry-select-join.html) * [LIKE Predicate](sql-ref-syntax-qry-select-like.html) * [Set Operators](sql-ref-syntax-qry-select-setops.html) diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md index 4e58abb2a8596..f3d35b57d90cd 100644 --- a/docs/sql-ref-syntax.md +++ b/docs/sql-ref-syntax.md @@ -56,6 +56,7 @@ Spark SQL is Apache Spark's module for working with structured data. The SQL Syn * [HAVING Clause](sql-ref-syntax-qry-select-having.html) * [Hints](sql-ref-syntax-qry-select-hints.html) * [Inline Table](sql-ref-syntax-qry-select-inline-table.html) + * [File](sql-ref-syntax-qry-select-file.html) * [JOIN](sql-ref-syntax-qry-select-join.html) * [LIKE Predicate](sql-ref-syntax-qry-select-like.html) * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html) From fcf8aa59b5025dde9b4af36953146894659967e2 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Wed, 28 Oct 2020 03:31:11 +0000 Subject: [PATCH 0342/1009] [SPARK-33240][SQL] Fail fast when fails to instantiate configured v2 session catalog ### What changes were proposed in this pull request? This patch proposes to change the behavior on failing fast when Spark fails to instantiate configured v2 session catalog. ### Why are the changes needed? The Spark behavior is against the intention of the end users - if end users configure session catalog which Spark would fail to initialize, Spark would swallow the error with only logging the error message and silently use the default catalog implementation. This follows the voices on [discussion thread](https://lists.apache.org/thread.html/rdfa22a5ebdc4ac66e2c5c8ff0cd9d750e8a1690cd6fb456d119c2400%40%3Cdev.spark.apache.org%3E) in dev mailing list. ### Does this PR introduce _any_ user-facing change? Yes. After the PR Spark will fail immediately if Spark fails to instantiate configured session catalog. ### How was this patch tested? New UT added. Closes #30147 from HeartSaVioR/SPARK-33240. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Wenchen Fan --- .../sql/connector/catalog/CatalogManager.scala | 12 ++---------- .../connector/SupportsCatalogOptionsSuite.scala | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala index c6d21540f27d5..8e8cd786b70c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.connector.catalog import scala.collection.mutable -import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException @@ -82,15 +81,8 @@ class CatalogManager( * in the fallback configuration, spark.sql.sources.write.useV1SourceList */ private[sql] def v2SessionCatalog: CatalogPlugin = { - conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { customV2SessionCatalog => - try { - catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog()) - } catch { - case NonFatal(_) => - logError( - "Fail to instantiate the custom v2 session catalog: " + customV2SessionCatalog) - defaultSessionCatalog - } + conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { _ => + catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog()) }.getOrElse(defaultSessionCatalog) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala index 550bec7505422..eacdb9e2fcd7b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala @@ -22,6 +22,7 @@ import scala.util.Try import org.scalatest.BeforeAndAfter +import org.apache.spark.SparkException import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode} import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression} @@ -254,6 +255,22 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with } } + test("SPARK-33240: fail the query when instantiation on session catalog fails") { + try { + spark.sessionState.catalogManager.reset() + spark.conf.set( + V2_SESSION_CATALOG_IMPLEMENTATION.key, "InvalidCatalogClass") + val e = intercept[SparkException] { + sql(s"create table t1 (id bigint) using $format") + } + + assert(e.getMessage.contains("Cannot find catalog plugin class")) + assert(e.getMessage.contains("InvalidCatalogClass")) + } finally { + spark.sessionState.catalogManager.reset() + } + } + private def checkV2Identifiers( plan: LogicalPlan, identifier: String = "t1", From 528160f0014206eaceb01ae0f3ad316bfbdc6885 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 28 Oct 2020 05:44:55 +0000 Subject: [PATCH 0343/1009] [SPARK-33174][SQL] Migrate DROP TABLE to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `DROP TABLE` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? The current behavior is not consistent between v1 and v2 commands when resolving a temp view. In v2, the `t` in the following example is resolved to a table: ```scala sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE testcat.ns") sql("DROP TABLE t") // 't' is resolved to testcat.ns.t ``` whereas in v1, the `t` is resolved to a temp view: ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("DROP TABLE t") // 't' is resolved to a temp view ``` ### Does this PR introduce _any_ user-facing change? After this PR, for v2, `DROP TABLE t` is resolved to a temp view `t` instead of `testcat.ns.t`, consistent with v1 behavior. ### How was this patch tested? Added a new test Closes #30079 from imback82/drop_table_consistent. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 13 +++++--- .../catalyst/analysis/ResolveCatalogs.scala | 3 -- .../analysis/ResolveNoopDropTable.scala | 33 +++++++++++++++++++ .../catalyst/analysis/v2ResolutionPlans.scala | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 7 ++-- .../catalyst/plans/logical/statements.scala | 8 ----- .../catalyst/plans/logical/v2Commands.scala | 13 ++++++-- .../sql/catalyst/parser/DDLParserSuite.scala | 23 +++++++++---- .../analysis/ResolveSessionCatalog.scala | 16 ++++++--- .../datasources/v2/DataSourceV2Strategy.scala | 9 +++-- .../sql/connector/DataSourceV2SQLSuite.scala | 29 ++++++++++++++-- .../connector/TestV2SessionCatalogBase.scala | 17 ++++++++-- .../command/PlanResolutionSuite.scala | 16 ++++----- .../v2/jdbc/JDBCTableCatalogSuite.scala | 6 ++-- 14 files changed, 142 insertions(+), 53 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 52c96f4a8f014..61c077fd12aa2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -258,7 +258,9 @@ class Analyzer( ResolveUnion :: TypeCoercion.typeCoercionRules ++ extendedResolutionRules : _*), - Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), + Batch("Post-Hoc Resolution", Once, + Seq(ResolveNoopDropTable) ++ + postHocResolutionRules: _*), Batch("Normalize Alter Table", Once, ResolveAlterTableChanges), Batch("Remove Unresolved Hints", Once, new ResolveHints.RemoveAllHints), @@ -864,7 +866,9 @@ class Analyzer( } u case u @ UnresolvedTableOrView(ident) => - lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u) + lookupTempView(ident) + .map(_ => ResolvedView(ident.asIdentifier, isTemp = true)) + .getOrElse(u) } def lookupTempView( @@ -1017,7 +1021,8 @@ class Analyzer( case u @ UnresolvedTable(identifier) => lookupTableOrView(identifier).map { case v: ResolvedView => - u.failAnalysis(s"${v.identifier.quoted} is a view not table.") + val viewStr = if (v.isTemp) "temp view" else "view" + u.failAnalysis(s"${v.identifier.quoted} is a $viewStr not table.") case table => table }.getOrElse(u) @@ -1030,7 +1035,7 @@ class Analyzer( case SessionCatalogAndIdentifier(catalog, ident) => CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => - ResolvedView(ident) + ResolvedView(ident, isTemp = false) case table => ResolvedTable(catalog.asTableCatalog, ident, table) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 65ddff8c44ed9..d3bb72badeb13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -198,9 +198,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) writeOptions = c.writeOptions, orCreate = c.orCreate) - case DropTableStatement(NonSessionCatalogAndTable(catalog, tbl), ifExists, _) => - DropTable(catalog.asTableCatalog, tbl.asIdentifier, ifExists) - case DropViewStatement(NonSessionCatalogAndTable(catalog, viewName), _) => throw new AnalysisException( s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala new file mode 100644 index 0000000000000..f9da9174f85e6 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan, NoopDropTable} +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * A rule for handling [[DropTable]] logical plan when the table or temp view is not resolved. + * If "ifExists" flag is set to true, the plan is resolved to [[NoopDropTable]], + * which is a no-op command. + */ +object ResolveNoopDropTable extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { + case DropTable(u: UnresolvedTableOrView, ifExists, _) if ifExists => + NoopDropTable(u.multipartIdentifier) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index a16763f2cf943..1344d78838e1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -81,7 +81,7 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T */ // TODO: create a generic representation for temp view, v1 view and v2 view, after we add view // support to v2 catalog. For now we only need the identifier to fallback to v1 command. -case class ResolvedView(identifier: Identifier) extends LeafNode { +case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNode { override def output: Seq[Attribute] = Nil } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f29e7b11e02de..f28375c8d7a4a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2878,11 +2878,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[DropTableStatement]] command. + * Create a [[DropTable]] command. */ override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) { - DropTableStatement( - visitMultipartIdentifier(ctx.multipartIdentifier()), + // DROP TABLE works with either a table or a temporary view. + DropTable( + UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())), ctx.EXISTS != null, ctx.PURGE != null) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index d7c097af9120f..3a534b2eb8ceb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -283,14 +283,6 @@ case class RenameTableStatement( newName: Seq[String], isView: Boolean) extends ParsedStatement -/** - * A DROP TABLE statement, as parsed from SQL. - */ -case class DropTableStatement( - tableName: Seq[String], - ifExists: Boolean, - purge: Boolean) extends ParsedStatement - /** * A DROP VIEW statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 272c19b98512b..96cb096ff97c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -388,9 +388,16 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit * The logical plan of the DROP TABLE command that works for v2 tables. */ case class DropTable( - catalog: TableCatalog, - ident: Identifier, - ifExists: Boolean) extends Command + child: LogicalPlan, + ifExists: Boolean, + purge: Boolean) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + +/** + * The logical plan for handling non-existing table for DROP TABLE command. + */ +case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command /** * The logical plan of the ALTER TABLE command that works for v2 tables. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 621d416c55457..a81f9e16083d6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -381,19 +381,28 @@ class DDLParserSuite extends AnalysisTest { test("drop table") { parseCompare("DROP TABLE testcat.ns1.ns2.tbl", - DropTableStatement(Seq("testcat", "ns1", "ns2", "tbl"), ifExists = false, purge = false)) + DropTable( + UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl")), + ifExists = false, + purge = false)) parseCompare(s"DROP TABLE db.tab", - DropTableStatement(Seq("db", "tab"), ifExists = false, purge = false)) + DropTable( + UnresolvedTableOrView(Seq("db", "tab")), ifExists = false, purge = false)) parseCompare(s"DROP TABLE IF EXISTS db.tab", - DropTableStatement(Seq("db", "tab"), ifExists = true, purge = false)) + DropTable( + UnresolvedTableOrView(Seq("db", "tab")), ifExists = true, purge = false)) parseCompare(s"DROP TABLE tab", - DropTableStatement(Seq("tab"), ifExists = false, purge = false)) + DropTable( + UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = false)) parseCompare(s"DROP TABLE IF EXISTS tab", - DropTableStatement(Seq("tab"), ifExists = true, purge = false)) + DropTable( + UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = false)) parseCompare(s"DROP TABLE tab PURGE", - DropTableStatement(Seq("tab"), ifExists = false, purge = true)) + DropTable( + UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = true)) parseCompare(s"DROP TABLE IF EXISTS tab PURGE", - DropTableStatement(Seq("tab"), ifExists = true, purge = true)) + DropTable( + UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = true)) } test("drop view") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index c4fd84cd978d4..f35eb41fe2ce1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -260,14 +260,14 @@ class ResolveSessionCatalog( DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet. - case DescribeRelation(ResolvedView(ident), partitionSpec, isExtended) => + case DescribeRelation(ResolvedView(ident, _), partitionSpec, isExtended) => DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended) if isSessionCatalog(r.catalog) => DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended) - case DescribeColumn(ResolvedView(ident), colNameParts, isExtended) => + case DescribeColumn(ResolvedView(ident, _), colNameParts, isExtended) => DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended) // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the @@ -367,9 +367,17 @@ class ResolveSessionCatalog( orCreate = c.orCreate) } + case DropTable( + r @ ResolvedTable(_, _, _: V1Table), ifExists, purge) if isSessionCatalog(r.catalog) => + DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge) + // v1 DROP TABLE supports temp view. - case DropTableStatement(TempViewOrV1Table(name), ifExists, purge) => - DropTableCommand(name.asTableIdentifier, ifExists, isView = false, purge = purge) + case DropTable(r: ResolvedView, ifExists, purge) => + if (!r.isTemp) { + throw new AnalysisException( + "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead") + } + DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge) // v1 DROP TABLE supports temp view. case DropViewStatement(TempViewOrV1Table(name), ifExists) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 3841bd0a66987..81a36dee58389 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} -import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, ProjectExec, RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.sources.{BaseRelation, TableScan} @@ -228,8 +228,11 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DescribeColumn(_: ResolvedTable, _, _) => throw new AnalysisException("Describing columns is not supported for v2 tables.") - case DropTable(catalog, ident, ifExists) => - DropTableExec(catalog, ident, ifExists) :: Nil + case DropTable(r: ResolvedTable, ifExists, _) => + DropTableExec(r.catalog, r.identifier, ifExists) :: Nil + + case NoopDropTable(multipartIdentifier) => + LocalTableScanExec(Nil, Nil) :: Nil case AlterTable(catalog, ident, _, changes) => AlterTableExec(catalog, ident, changes) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index e3618f1326941..298c07059ff44 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -744,10 +744,33 @@ class DataSourceV2SQLSuite } test("DropTable: if exists") { - intercept[NoSuchTableException] { - sql(s"DROP TABLE testcat.db.notbl") + val ex = intercept[AnalysisException] { + sql("DROP TABLE testcat.db.notbl") + } + assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl")) + sql("DROP TABLE IF EXISTS testcat.db.notbl") + } + + test("SPARK-33174: DROP TABLE should resolve to a temporary view first") { + withTable("testcat.ns.t") { + withTempView("t") { + sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") + sql("CREATE TEMPORARY VIEW t AS SELECT 2") + sql("USE testcat.ns") + + // Check the temporary view 't' exists. + runShowTablesSql( + "SHOW TABLES FROM spark_catalog.default LIKE 't'", + Seq(Row("", "t", true)), + expectV2Catalog = false) + sql("DROP TABLE t") + // Verify that the temporary view 't' is resolved first and dropped. + runShowTablesSql( + "SHOW TABLES FROM spark_catalog.default LIKE 't'", + Nil, + expectV2Catalog = false) + } } - sql(s"DROP TABLE IF EXISTS testcat.db.notbl") } test("Relation: basic") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala index 4e741ff35c29f..f57edb9eb220c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.connector import java.util import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.atomic.AtomicBoolean import scala.collection.JavaConverters._ @@ -36,6 +37,13 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating protected val tables: util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]() + private val tableCreated: AtomicBoolean = new AtomicBoolean(false) + + private def addTable(ident: Identifier, table: T): Unit = { + tableCreated.set(true) + tables.put(ident, table) + } + protected def newTable( name: String, schema: StructType, @@ -51,7 +59,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => v1Table case t => val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties()) - tables.put(ident, table) + addTable(ident, table) table } } @@ -64,7 +72,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating properties: util.Map[String, String]): Table = { val created = super.createTable(ident, schema, partitions, properties) val t = newTable(created.name(), schema, partitions, properties) - tables.put(ident, t) + addTable(ident, t) t } @@ -74,8 +82,11 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating } def clearTables(): Unit = { - assert(!tables.isEmpty, "Tables were empty, maybe didn't use the session catalog code path?") + assert( + tableCreated.get, + "Tables are not created, maybe didn't use the session catalog code path?") tables.keySet().asScala.foreach(super.dropTable) tables.clear() + tableCreated.set(false) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 8782295e5d33b..d5820b016736a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -630,10 +630,10 @@ class PlanResolutionSuite extends AnalysisTest { } test("drop table") { - val tableName1 = "db.tab" - val tableIdent1 = TableIdentifier("tab", Option("db")) - val tableName2 = "tab" - val tableIdent2 = TableIdentifier("tab", Some("default")) + val tableName1 = "db.v1Table" + val tableIdent1 = TableIdentifier("v1Table", Option("db")) + val tableName2 = "v1Table" + val tableIdent2 = TableIdentifier("v1Table", Some("default")) parseResolveCompare(s"DROP TABLE $tableName1", DropTableCommand(tableIdent1, ifExists = false, isView = false, purge = false)) @@ -656,13 +656,13 @@ class PlanResolutionSuite extends AnalysisTest { val tableIdent2 = Identifier.of(Array.empty, "tab") parseResolveCompare(s"DROP TABLE $tableName1", - DropTable(testCat, tableIdent1, ifExists = false)) + DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = false, purge = false)) parseResolveCompare(s"DROP TABLE IF EXISTS $tableName1", - DropTable(testCat, tableIdent1, ifExists = true)) + DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = true, purge = false)) parseResolveCompare(s"DROP TABLE $tableName2", - DropTable(testCat, tableIdent2, ifExists = false)) + DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = false, purge = false)) parseResolveCompare(s"DROP TABLE IF EXISTS $tableName2", - DropTable(testCat, tableIdent2, ifExists = true)) + DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = true, purge = false)) } test("drop view") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index d99ccf85683ed..51316b464ab34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -78,10 +78,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("DROP TABLE h2.test.to_drop") checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) Seq( - "h2.test.not_existing_table" -> "Table test.not_existing_table not found", - "h2.bad_test.not_existing_table" -> "Table bad_test.not_existing_table not found" + "h2.test.not_existing_table" -> "Table or view not found: h2.test.not_existing_table", + "h2.bad_test.not_existing_table" -> "Table or view not found: h2.bad_test.not_existing_table" ).foreach { case (table, expectedMsg) => - val msg = intercept[NoSuchTableException] { + val msg = intercept[AnalysisException] { sql(s"DROP TABLE $table") }.getMessage assert(msg.contains(expectedMsg)) From 9fb45361fd00b046e04748e1a1c8add3fa09f01c Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Wed, 28 Oct 2020 05:51:47 +0000 Subject: [PATCH 0344/1009] [SPARK-33183][SQL] Fix Optimizer rule EliminateSorts and add a physical rule to remove redundant sorts ### What changes were proposed in this pull request? This PR aims to fix a correctness bug in the optimizer rule `EliminateSorts`. It also adds a new physical rule to remove redundant sorts that cannot be eliminated in the Optimizer rule after the bugfix. ### Why are the changes needed? A global sort should not be eliminated even if its child is ordered since we don't know if its child ordering is global or local. For example, in the following scenario, the first sort shouldn't be removed because it has a stronger guarantee than the second sort even if the sort orders are the same for both sorts. ``` Sort(orders, global = True, ...) Sort(orders, global = False, ...) ``` Since there is no straightforward way to identify whether a node's output ordering is local or global, we should not remove a global sort even if its child is already ordered. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Unit tests Closes #30093 from allisonwang-db/fix-sort. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 16 +- .../apache/spark/sql/internal/SQLConf.scala | 7 + .../optimizer/EliminateSortsSuite.scala | 102 +++++++++++-- .../spark/sql/execution/QueryExecution.scala | 1 + .../sql/execution/RemoveRedundantSorts.scala | 46 ++++++ .../adaptive/AdaptiveSparkPlanExec.scala | 2 + .../spark/sql/execution/PlannerSuite.scala | 13 -- .../execution/RemoveRedundantSortsSuite.scala | 144 ++++++++++++++++++ 8 files changed, 303 insertions(+), 28 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f3f64031843e0..9519a56c2817a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1020,7 +1020,7 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { * Note that changes in the final output ordering may affect the file size (SPARK-32318). * This rule handles the following cases: * 1) if the sort order is empty or the sort order does not have any reference - * 2) if the child is already sorted + * 2) if the Sort operator is a local sort and the child is already sorted * 3) if there is another Sort operator separated by 0...n Project, Filter, Repartition or * RepartitionByExpression (with deterministic expressions) operators * 4) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or @@ -1031,12 +1031,18 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { * function is order irrelevant */ object EliminateSorts extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { + def apply(plan: LogicalPlan): LogicalPlan = plan transform applyLocally + + private val applyLocally: PartialFunction[LogicalPlan, LogicalPlan] = { case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) => val newOrders = orders.filterNot(_.child.foldable) - if (newOrders.isEmpty) child else s.copy(order = newOrders) - case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) => - child + if (newOrders.isEmpty) { + applyLocally.lift(child).getOrElse(child) + } else { + s.copy(order = newOrders) + } + case Sort(orders, false, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) => + applyLocally.lift(child).getOrElse(child) case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child)) case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) => j.copy(left = recursiveRemoveSort(originLeft), right = recursiveRemoveSort(originRight)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3024398399962..d84dfcc8f3086 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1253,6 +1253,13 @@ object SQLConf { .booleanConf .createWithDefault(true) + val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts") + .internal() + .doc("Whether to remove redundant physical sort node") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val STATE_STORE_PROVIDER_CLASS = buildConf("spark.sql.streaming.stateStore.providerClass") .internal() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index cc351e365113d..62deebd930752 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -99,12 +99,34 @@ class EliminateSortsSuite extends AnalysisTest { comparePlans(optimized, correctAnswer) } - test("remove redundant order by") { + test("SPARK-33183: remove consecutive no-op sorts") { + val plan = testRelation.orderBy().orderBy().orderBy() + val optimized = Optimize.execute(plan.analyze) + val correctAnswer = testRelation.analyze + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: remove redundant sort by") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst) - val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst) + val unnecessaryReordered = orderedPlan.limit(2).select('a).sortBy('a.asc, 'b.desc_nullsFirst) val optimized = Optimize.execute(unnecessaryReordered.analyze) val correctAnswer = orderedPlan.limit(2).select('a).analyze - comparePlans(Optimize.execute(optimized), correctAnswer) + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: remove all redundant local sorts") { + val orderedPlan = testRelation.sortBy('a.asc).orderBy('a.asc).sortBy('a.asc) + val optimized = Optimize.execute(orderedPlan.analyze) + val correctAnswer = testRelation.orderBy('a.asc).analyze + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: should not remove global sort") { + val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst) + val reordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst) + val optimized = Optimize.execute(reordered.analyze) + val correctAnswer = reordered.analyze + comparePlans(optimized, correctAnswer) } test("do not remove sort if the order is different") { @@ -115,22 +137,39 @@ class EliminateSortsSuite extends AnalysisTest { comparePlans(optimized, correctAnswer) } - test("filters don't affect order") { + test("SPARK-33183: remove top level local sort with filter operators") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc) - val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc) + val filteredAndReordered = orderedPlan.where('a > Literal(10)).sortBy('a.asc, 'b.desc) val optimized = Optimize.execute(filteredAndReordered.analyze) val correctAnswer = orderedPlan.where('a > Literal(10)).analyze comparePlans(optimized, correctAnswer) } - test("limits don't affect order") { + test("SPARK-33183: keep top level global sort with filter operators") { + val projectPlan = testRelation.select('a, 'b) + val orderedPlan = projectPlan.orderBy('a.asc, 'b.desc) + val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc) + val optimized = Optimize.execute(filteredAndReordered.analyze) + val correctAnswer = projectPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc).analyze + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: limits should not affect order for local sort") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc) - val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc) + val filteredAndReordered = orderedPlan.limit(Literal(10)).sortBy('a.asc, 'b.desc) val optimized = Optimize.execute(filteredAndReordered.analyze) val correctAnswer = orderedPlan.limit(Literal(10)).analyze comparePlans(optimized, correctAnswer) } + test("SPARK-33183: should not remove global sort with limit operators") { + val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc) + val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc) + val optimized = Optimize.execute(filteredAndReordered.analyze) + val correctAnswer = filteredAndReordered.analyze + comparePlans(optimized, correctAnswer) + } + test("different sorts are not simplified if limit is in between") { val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10)) .orderBy('a.asc) @@ -139,11 +178,11 @@ class EliminateSortsSuite extends AnalysisTest { comparePlans(optimized, correctAnswer) } - test("range is already sorted") { + test("SPARK-33183: should not remove global sort with range operator") { val inputPlan = Range(1L, 1000L, 1, 10) val orderedPlan = inputPlan.orderBy('id.asc) val optimized = Optimize.execute(orderedPlan.analyze) - val correctAnswer = inputPlan.analyze + val correctAnswer = orderedPlan.analyze comparePlans(optimized, correctAnswer) val reversedPlan = inputPlan.orderBy('id.desc) @@ -154,10 +193,18 @@ class EliminateSortsSuite extends AnalysisTest { val negativeStepInputPlan = Range(10L, 1L, -1, 10) val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc) val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze) - val negativeStepCorrectAnswer = negativeStepInputPlan.analyze + val negativeStepCorrectAnswer = negativeStepOrderedPlan.analyze comparePlans(negativeStepOptimized, negativeStepCorrectAnswer) } + test("SPARK-33183: remove local sort with range operator") { + val inputPlan = Range(1L, 1000L, 1, 10) + val orderedPlan = inputPlan.sortBy('id.asc) + val optimized = Optimize.execute(orderedPlan.analyze) + val correctAnswer = inputPlan.analyze + comparePlans(optimized, correctAnswer) + } + test("sort should not be removed when there is a node which doesn't guarantee any order") { val orderedPlan = testRelation.select('a, 'b) val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc) @@ -333,4 +380,39 @@ class EliminateSortsSuite extends AnalysisTest { val correctAnswer = PushDownOptimizer.execute(noOrderByPlan.analyze) comparePlans(optimized, correctAnswer) } + + test("SPARK-33183: remove consecutive global sorts with the same ordering") { + Seq( + (testRelation.orderBy('a.asc).orderBy('a.asc), testRelation.orderBy('a.asc)), + (testRelation.orderBy('a.asc, 'b.desc).orderBy('a.asc), testRelation.orderBy('a.asc)) + ).foreach { case (ordered, answer) => + val optimized = Optimize.execute(ordered.analyze) + comparePlans(optimized, answer.analyze) + } + } + + test("SPARK-33183: remove consecutive local sorts with the same ordering") { + val orderedPlan = testRelation.sortBy('a.asc).sortBy('a.asc).sortBy('a.asc) + val optimized = Optimize.execute(orderedPlan.analyze) + val correctAnswer = testRelation.sortBy('a.asc).analyze + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: remove consecutive local sorts with different ordering") { + val orderedPlan = testRelation.sortBy('b.asc).sortBy('a.desc).sortBy('a.asc) + val optimized = Optimize.execute(orderedPlan.analyze) + val correctAnswer = testRelation.sortBy('a.asc).analyze + comparePlans(optimized, correctAnswer) + } + + test("SPARK-33183: should keep global sort when child is a local sort with the same ordering") { + val correctAnswer = testRelation.orderBy('a.asc).analyze + Seq( + testRelation.sortBy('a.asc).orderBy('a.asc), + testRelation.orderBy('a.asc).sortBy('a.asc).orderBy('a.asc) + ).foreach { ordered => + val optimized = Optimize.execute(ordered.analyze) + comparePlans(optimized, correctAnswer) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index c37e1e92c8576..b998430c1602d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -343,6 +343,7 @@ object QueryExecution { PlanDynamicPruningFilters, PlanSubqueries, RemoveRedundantProjects, + RemoveRedundantSorts, EnsureRequirements, DisableUnnecessaryBucketedScan, ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala new file mode 100644 index 0000000000000..87c08ec865fe9 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.catalyst.expressions.SortOrder +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.internal.SQLConf + +/** + * Remove redundant SortExec node from the spark plan. A sort node is redundant when + * its child satisfies both its sort orders and its required child distribution. Note + * this rule differs from the Optimizer rule EliminateSorts in that this rule also checks + * if the child satisfies the required distribution so that it is safe to remove not only a + * local sort but also a global sort when its child already satisfies required sort orders. + */ +object RemoveRedundantSorts extends Rule[SparkPlan] { + def apply(plan: SparkPlan): SparkPlan = { + if (!conf.getConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED)) { + plan + } else { + removeSorts(plan) + } + } + + private def removeSorts(plan: SparkPlan): SparkPlan = plan transform { + case s @ SortExec(orders, _, child, _) + if SortOrder.orderingSatisfies(child.outputOrdering, orders) && + child.outputPartitioning.satisfies(s.requiredChildDistribution.head) => + child + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index d30e16276b9f3..a4a58dfe1de53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -83,6 +83,7 @@ case class AdaptiveSparkPlanExec( @transient private val optimizer = new AQEOptimizer(conf) @transient private val removeRedundantProjects = RemoveRedundantProjects + @transient private val removeRedundantSorts = RemoveRedundantSorts @transient private val ensureRequirements = EnsureRequirements // A list of physical plan rules to be applied before creation of query stages. The physical @@ -90,6 +91,7 @@ case class AdaptiveSparkPlanExec( // Exchange nodes) after running these rules. private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq( removeRedundantProjects, + removeRedundantSorts, ensureRequirements ) ++ context.session.sessionState.queryStagePrepRules diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 048466b3d8637..be29acb6d3a7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -234,19 +234,6 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } } - test("SPARK-23375: Cached sorted data doesn't need to be re-sorted") { - val query = testData.select('key, 'value).sort('key.desc).cache() - assert(query.queryExecution.optimizedPlan.isInstanceOf[InMemoryRelation]) - val resorted = query.sort('key.desc) - assert(resorted.queryExecution.optimizedPlan.collect { case s: Sort => s}.isEmpty) - assert(resorted.select('key).collect().map(_.getInt(0)).toSeq == - (1 to 100).reverse) - // with a different order, the sort is needed - val sortedAsc = query.sort('key) - assert(sortedAsc.queryExecution.optimizedPlan.collect { case s: Sort => s}.size == 1) - assert(sortedAsc.select('key).collect().map(_.getInt(0)).toSeq == (1 to 100)) - } - test("PartitioningCollection") { withTempView("normal", "small", "tiny") { testData.createOrReplaceTempView("normal") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala new file mode 100644 index 0000000000000..54c5a33441900 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + + +abstract class RemoveRedundantSortsSuiteBase + extends QueryTest + with SharedSparkSession + with AdaptiveSparkPlanHelper { + import testImplicits._ + + private def checkNumSorts(df: DataFrame, count: Int): Unit = { + val plan = df.queryExecution.executedPlan + assert(collectWithSubqueries(plan) { case s: SortExec => s }.length == count) + } + + private def checkSorts(query: String, enabledCount: Int, disabledCount: Int): Unit = { + withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") { + val df = sql(query) + checkNumSorts(df, enabledCount) + val result = df.collect() + withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "false") { + val df = sql(query) + checkNumSorts(df, disabledCount) + checkAnswer(df, result) + } + } + } + + test("remove redundant sorts with limit") { + withTempView("t") { + spark.range(100).select('id as "key").createOrReplaceTempView("t") + val query = + """ + |SELECT key FROM + | (SELECT key FROM t WHERE key > 10 ORDER BY key DESC LIMIT 10) + |ORDER BY key DESC + |""".stripMargin + checkSorts(query, 0, 1) + } + } + + test("remove redundant sorts with broadcast hash join") { + withTempView("t1", "t2") { + spark.range(1000).select('id as "key").createOrReplaceTempView("t1") + spark.range(1000).select('id as "key").createOrReplaceTempView("t2") + + val queryTemplate = """ + |SELECT /*+ BROADCAST(%s) */ t1.key FROM + | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1 + |JOIN + | (SELECT key FROM t2 WHERE key > 50 ORDER BY key DESC LIMIT 100) t2 + |ON t1.key = t2.key + |ORDER BY %s + """.stripMargin + + // No sort should be removed since the stream side (t2) order DESC + // does not satisfy the required sort order ASC. + val buildLeftOrderByRightAsc = queryTemplate.format("t1", "t2.key ASC") + checkSorts(buildLeftOrderByRightAsc, 1, 1) + + // The top sort node should be removed since the stream side (t2) order DESC already + // satisfies the required sort order DESC. + val buildLeftOrderByRightDesc = queryTemplate.format("t1", "t2.key DESC") + checkSorts(buildLeftOrderByRightDesc, 0, 1) + + // No sort should be removed since the sort ordering from broadcast-hash join is based + // on the stream side (t2) and the required sort order is from t1. + val buildLeftOrderByLeftDesc = queryTemplate.format("t1", "t1.key DESC") + checkSorts(buildLeftOrderByLeftDesc, 1, 1) + + // The top sort node should be removed since the stream side (t1) order DESC already + // satisfies the required sort order DESC. + val buildRightOrderByLeftDesc = queryTemplate.format("t2", "t1.key DESC") + checkSorts(buildRightOrderByLeftDesc, 0, 1) + } + } + + test("remove redundant sorts with sort merge join") { + withTempView("t1", "t2") { + spark.range(1000).select('id as "key").createOrReplaceTempView("t1") + spark.range(1000).select('id as "key").createOrReplaceTempView("t2") + val query = """ + |SELECT /*+ MERGE(t1) */ t1.key FROM + | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1 + |JOIN + | (SELECT key FROM t2 WHERE key > 50 ORDER BY key DESC LIMIT 100) t2 + |ON t1.key = t2.key + |ORDER BY t1.key + """.stripMargin + + val queryAsc = query + " ASC" + checkSorts(queryAsc, 2, 3) + + // The top level sort should not be removed since the child output ordering is ASC and + // the required ordering is DESC. + val queryDesc = query + " DESC" + checkSorts(queryDesc, 3, 3) + } + } + + test("cached sorted data doesn't need to be re-sorted") { + withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") { + val df = spark.range(1000).select('id as "key").sort('key.desc).cache() + val resorted = df.sort('key.desc) + val sortedAsc = df.sort('key.asc) + checkNumSorts(df, 0) + checkNumSorts(resorted, 0) + checkNumSorts(sortedAsc, 1) + val result = resorted.collect() + withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "false") { + val resorted = df.sort('key.desc) + checkNumSorts(resorted, 1) + checkAnswer(resorted, result) + } + } + } +} + +class RemoveRedundantSortsSuite extends RemoveRedundantSortsSuiteBase + with DisableAdaptiveExecutionSuite + +class RemoveRedundantSortsSuiteAE extends RemoveRedundantSortsSuiteBase + with EnableAdaptiveExecutionSuite From 3c3ad5f7c00f6f68bc659d4cf7020fa944b7bc69 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Wed, 28 Oct 2020 06:40:23 +0000 Subject: [PATCH 0345/1009] [SPARK-32934][SQL] Improve the performance for NTH_VALUE and reactor the OffsetWindowFunction ### What changes were proposed in this pull request? Spark SQL supports some window function like `NTH_VALUE`. If we specify window frame like `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`, we can elimate some calculations. For example: if we execute the SQL show below: ``` SELECT NTH_VALUE(col, 2) OVER(ORDER BY rank UNBOUNDED PRECEDING AND CURRENT ROW) FROM tab; ``` The output for row number greater than 1, return the fixed value. otherwise, return null. So we just calculate the value once and notice whether the row number less than 2. `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` is simpler. ### Why are the changes needed? Improve the performance for `NTH_VALUE`, `FIRST_VALUE` and `LAST_VALUE`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #29800 from beliefer/optimize-nth_value. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +- .../expressions/windowExpressions.scala | 86 +++++++----- .../sql/execution/window/WindowExec.scala | 8 +- .../sql/execution/window/WindowExecBase.scala | 43 +++++- .../window/WindowFunctionFrame.scala | 123 ++++++++++++++++-- .../resources/sql-tests/inputs/window.sql | 30 +++++ .../sql-tests/results/window.sql.out | 98 +++++++++++++- .../sql/DataFrameWindowFunctionsSuite.scala | 17 +-- 9 files changed, 353 insertions(+), 59 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 61c077fd12aa2..c2116a2b8f471 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2978,7 +2978,7 @@ class Analyzer( */ object ResolveWindowFrame extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { - case WindowExpression(wf: OffsetWindowFunction, + case WindowExpression(wf: FrameLessOffsetWindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f => failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function") case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 351be32ee438e..d261f26072bcc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -166,7 +166,7 @@ trait CheckAnalysis extends PredicateHelper { case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) => failAnalysis(s"Distinct window functions are not supported: $w") - case w @ WindowExpression(_: OffsetWindowFunction, + case w @ WindowExpression(_: FrameLessOffsetWindowFunction, WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame)) if order.isEmpty || !frame.isOffset => failAnalysis("An offset window function can only be evaluated in an ordered " + @@ -176,7 +176,8 @@ trait CheckAnalysis extends PredicateHelper { // Only allow window functions with an aggregate expression or an offset window // function or a Pandas window UDF. e match { - case _: AggregateExpression | _: OffsetWindowFunction | _: AggregateWindowFunction => + case _: AggregateExpression | _: FrameLessOffsetWindowFunction | + _: AggregateWindowFunction => w case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => w diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index bc0b4ac018f9e..168585dc3de00 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -327,25 +327,14 @@ object WindowFunctionType { } } - -/** - * An offset window function is a window function that returns the value of the input column offset - * by a number of rows within the partition. For instance: an OffsetWindowfunction for value x with - * offset -2, will get the value of x 2 rows back in the partition. - */ -abstract class OffsetWindowFunction - extends Expression with WindowFunction with Unevaluable with ImplicitCastInputTypes { +trait OffsetWindowSpec extends Expression { /** * Input expression to evaluate against a row which a number of rows below or above (depending on - * the value and sign of the offset) the current row. + * the value and sign of the offset) the starting row (current row if isRelative=true, or the + * first row of the window frame otherwise). */ val input: Expression - /** - * Default result value for the function when the `offset`th row does not exist. - */ - val default: Expression - /** * (Foldable) expression that contains the number of rows between the current row and the row * where the input expression is evaluated. If `offset` is a positive integer, it means that @@ -355,6 +344,36 @@ abstract class OffsetWindowFunction */ val offset: Expression + /** + * Default result value for the function when the `offset`th row does not exist. + */ + val default: Expression + + /** + * An optional specification that indicates the offset window function should skip null values in + * the determination of which row to use. + */ + val ignoreNulls: Boolean + + /** + * Whether the offset is starts with the current row. If `isRelative` is true, `offset` means + * the offset is start with the current row. otherwise, the offset is starts with the first + * row of the entire window frame. + */ + val isRelative: Boolean + + lazy val fakeFrame = SpecifiedWindowFrame(RowFrame, offset, offset) +} + +/** + * A frameless offset window function is a window function that cannot specify window frame and + * returns the value of the input column offset by a number of rows within the partition. + * For instance: a FrameLessOffsetWindowFunction for value x with offset -2, will get the value of + * x 2 rows back in the partition. + */ +abstract class FrameLessOffsetWindowFunction + extends WindowFunction with OffsetWindowSpec with Unevaluable with ImplicitCastInputTypes { + override def children: Seq[Expression] = Seq(input, offset, default) /* @@ -370,7 +389,11 @@ abstract class OffsetWindowFunction override def nullable: Boolean = default == null || default.nullable || input.nullable - override lazy val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, offset, offset) + override val ignoreNulls = false + + override val isRelative = true + + override lazy val frame: WindowFrame = fakeFrame override def checkInputDataTypes(): TypeCheckResult = { val check = super.checkInputDataTypes() @@ -425,7 +448,7 @@ abstract class OffsetWindowFunction group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab case class Lead(input: Expression, offset: Expression, default: Expression) - extends OffsetWindowFunction { + extends FrameLessOffsetWindowFunction { def this(input: Expression, offset: Expression) = this(input, offset, Literal(null)) @@ -467,7 +490,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression) group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab case class Lag(input: Expression, inputOffset: Expression, default: Expression) - extends OffsetWindowFunction { + extends FrameLessOffsetWindowFunction { def this(input: Expression, offset: Expression) = this(input, offset, Literal(null)) @@ -579,7 +602,6 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { } // scalastyle:off line.size.limit line.contains.tab - @ExpressionDescription( usage = """ _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row @@ -607,12 +629,16 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { since = "3.1.0", group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab -case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean) - extends AggregateWindowFunction with ImplicitCastInputTypes { +case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean) + extends AggregateWindowFunction with OffsetWindowSpec with ImplicitCastInputTypes { def this(child: Expression, offset: Expression) = this(child, offset, false) - override def children: Seq[Expression] = input :: offsetExpr :: Nil + override lazy val default = Literal.create(null, input.dataType) + + override val isRelative = false + + override def children: Seq[Expression] = input :: offset :: Nil override val frame: WindowFrame = UnspecifiedFrame @@ -624,35 +650,35 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool val check = super.checkInputDataTypes() if (check.isFailure) { check - } else if (!offsetExpr.foldable) { - TypeCheckFailure(s"Offset expression '$offsetExpr' must be a literal.") - } else if (offset <= 0) { + } else if (!offset.foldable) { + TypeCheckFailure(s"Offset expression '$offset' must be a literal.") + } else if (offsetVal <= 0) { TypeCheckFailure( - s"The 'offset' argument of nth_value must be greater than zero but it is $offset.") + s"The 'offset' argument of nth_value must be greater than zero but it is $offsetVal.") } else { TypeCheckSuccess } } - private lazy val offset = offsetExpr.eval().asInstanceOf[Int].toLong + private lazy val offsetVal = offset.eval().asInstanceOf[Int].toLong private lazy val result = AttributeReference("result", input.dataType)() private lazy val count = AttributeReference("count", LongType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = result :: count :: Nil override lazy val initialValues: Seq[Literal] = Seq( - /* result = */ Literal.create(null, input.dataType), + /* result = */ default, /* count = */ Literal(1L) ) override lazy val updateExpressions: Seq[Expression] = { if (ignoreNulls) { Seq( - /* result = */ If(count === offset && input.isNotNull, input, result), + /* result = */ If(count === offsetVal && input.isNotNull, input, result), /* count = */ If(input.isNull, count, count + 1L) ) } else { Seq( - /* result = */ If(count === offset, input, result), + /* result = */ If(count === offsetVal, input, result), /* count = */ count + 1L ) } @@ -662,7 +688,7 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool override def prettyName: String = "nth_value" override def sql: String = - s"$prettyName(${input.sql}, ${offsetExpr.sql})${if (ignoreNulls) " ignore nulls" else ""}" + s"$prettyName(${input.sql}, ${offset.sql})${if (ignoreNulls) " ignore nulls" else ""}" } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala index eaca55df08d06..439c31a47fd3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala @@ -57,8 +57,12 @@ import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, * 3. CURRENT ROW AND 1 FOLLOWING * 4. 1 PRECEDING AND 1 FOLLOWING * 5. 1 FOLLOWING AND 2 FOLLOWING - * - Offset frame: The frame consist of one row, which is an offset number of rows away from the - * current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame. + * - Offset frame: The frame consist of one row, which is an offset number of rows. There are three + * implement of offset frame. + * 1. [[FrameLessOffsetWindowFunction]] returns the value of the input column offset by a number + * of rows according to the current row. + * 2. [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]] + * returns the value of the input column offset by a number of rows within the frame. * * Different frame boundaries can be used in Growing, Shrinking and Moving frames. A frame * boundary can be either Row or Range based: diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index ed055bb801ae5..f0b99c1522aa1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -136,8 +136,15 @@ trait WindowExecBase extends UnaryExecNode { val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame] function match { case AggregateExpression(f, _, _, _, _) => collect("AGGREGATE", frame, e, f) + case f: FrameLessOffsetWindowFunction => collect("FRAME_LESS_OFFSET", frame, e, f) + case f: OffsetWindowSpec if !f.ignoreNulls && + frame.frameType == RowFrame && frame.lower == UnboundedPreceding => + frame.upper match { + case UnboundedFollowing => collect("UNBOUNDED_OFFSET", f.fakeFrame, e, f) + case CurrentRow => collect("UNBOUNDED_PRECEDING_OFFSET", f.fakeFrame, e, f) + case _ => collect("AGGREGATE", frame, e, f) + } case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f) - case f: OffsetWindowFunction => collect("OFFSET", frame, e, f) case f: PythonUDF => collect("AGGREGATE", frame, e, f) case f => sys.error(s"Unsupported window function: $f") } @@ -171,18 +178,42 @@ trait WindowExecBase extends UnaryExecNode { // Create the factory to produce WindowFunctionFrame. val factory = key match { - // Offset Frame - case ("OFFSET", _, IntegerLiteral(offset), _) => + // Frameless offset Frame + case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _) => target: InternalRow => - new OffsetWindowFunctionFrame( + new FrameLessOffsetWindowFunctionFrame( target, ordinal, - // OFFSET frame functions are guaranteed be OffsetWindowFunctions. - functions.map(_.asInstanceOf[OffsetWindowFunction]), + // OFFSET frame functions are guaranteed be OffsetWindowSpec. + functions.map(_.asInstanceOf[OffsetWindowSpec]), child.output, (expressions, schema) => MutableProjection.create(expressions, schema), offset) + case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _) => + target: InternalRow => { + new UnboundedOffsetWindowFunctionFrame( + target, + ordinal, + // OFFSET frame functions are guaranteed be OffsetWindowSpec. + functions.map(_.asInstanceOf[OffsetWindowSpec]), + child.output, + (expressions, schema) => + MutableProjection.create(expressions, schema), + offset) + } + case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _) => + target: InternalRow => { + new UnboundedPrecedingOffsetWindowFunctionFrame( + target, + ordinal, + // OFFSET frame functions are guaranteed be OffsetWindowSpec. + functions.map(_.asInstanceOf[OffsetWindowSpec]), + child.output, + (expressions, schema) => + MutableProjection.create(expressions, schema), + offset) + } // Entire Partition Frame. case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala index dc1b919feefe4..e8a83f9772d35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala @@ -77,31 +77,31 @@ object WindowFunctionFrame { * @param newMutableProjection function used to create the projection. * @param offset by which rows get moved within a partition. */ -final class OffsetWindowFunctionFrame( +abstract class OffsetWindowFunctionFrameBase( target: InternalRow, ordinal: Int, - expressions: Array[OffsetWindowFunction], + expressions: Array[OffsetWindowSpec], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, offset: Int) extends WindowFunctionFrame { /** Rows of the partition currently being processed. */ - private[this] var input: ExternalAppendOnlyUnsafeRowArray = null + protected var input: ExternalAppendOnlyUnsafeRowArray = null /** * An iterator over the [[input]] */ - private[this] var inputIterator: Iterator[UnsafeRow] = _ + protected var inputIterator: Iterator[UnsafeRow] = _ /** Index of the input row currently used for output. */ - private[this] var inputIndex = 0 + protected var inputIndex = 0 /** * Create the projection used when the offset row exists. * Please note that this project always respect null input values (like PostgreSQL). */ - private[this] val projection = { + protected val projection = { // Collect the expressions and bind them. val inputAttrs = inputSchema.map(_.withNullability(true)) val boundExpressions = Seq.fill(ordinal)(NoOp) ++ bindReferences( @@ -112,7 +112,7 @@ final class OffsetWindowFunctionFrame( } /** Create the projection used when the offset row DOES NOT exists. */ - private[this] val fillDefaultValue = { + protected val fillDefaultValue = { // Collect the expressions and bind them. val inputAttrs: AttributeSeq = inputSchema.map(_.withNullability(true)) val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e => @@ -129,6 +129,28 @@ final class OffsetWindowFunctionFrame( newMutableProjection(boundExpressions, Nil).target(target) } + override def currentLowerBound(): Int = throw new UnsupportedOperationException() + + override def currentUpperBound(): Int = throw new UnsupportedOperationException() +} + +/** + * The frameless offset window frame is an internal window frame just used to optimize the + * performance for the window function that returns the value of the input column offset + * by a number of rows according to the current row. The internal window frame is not a popular + * window frame cannot be specified and used directly by the users. This window frame + * calculates frames containing LEAD/LAG statements. + */ +class FrameLessOffsetWindowFunctionFrame( + target: InternalRow, + ordinal: Int, + expressions: Array[OffsetWindowSpec], + inputSchema: Seq[Attribute], + newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, + offset: Int) + extends OffsetWindowFunctionFrameBase( + target, ordinal, expressions, inputSchema, newMutableProjection, offset) { + override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { input = rows inputIterator = input.generateIterator() @@ -151,10 +173,93 @@ final class OffsetWindowFunctionFrame( } inputIndex += 1 } +} - override def currentLowerBound(): Int = throw new UnsupportedOperationException() +/** + * The unbounded offset window frame is an internal window frame just used to optimize the + * performance for the window function that returns the value of the input column offset + * by a number of rows within the frame and has specified ROWS BETWEEN UNBOUNDED PRECEDING + * AND UNBOUNDED FOLLOWING. The internal window frame is not a popular window frame cannot be + * specified and used directly by the users. + * The unbounded offset window frame calculates frames containing NTH_VALUE statements. + * The unbounded offset window frame return the same value for all rows in the window partition. + */ +class UnboundedOffsetWindowFunctionFrame( + target: InternalRow, + ordinal: Int, + expressions: Array[OffsetWindowSpec], + inputSchema: Seq[Attribute], + newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, + offset: Int) + extends OffsetWindowFunctionFrameBase( + target, ordinal, expressions, inputSchema, newMutableProjection, offset) { - override def currentUpperBound(): Int = throw new UnsupportedOperationException() + override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { + input = rows + if (offset > input.length) { + fillDefaultValue(EmptyRow) + } else { + inputIterator = input.generateIterator() + // drain the first few rows if offset is larger than one + inputIndex = 0 + while (inputIndex < offset - 1) { + if (inputIterator.hasNext) inputIterator.next() + inputIndex += 1 + } + val r = WindowFunctionFrame.getNextOrNull(inputIterator) + projection(r) + } + } + + override def write(index: Int, current: InternalRow): Unit = { + // The results are the same for each row in the partition, and have been evaluated in prepare. + // Don't need to recalculate here. + } +} + +/** + * The unbounded preceding offset window frame is an internal window frame just used to optimize + * the performance for the window function that returns the value of the input column offset + * by a number of rows within the frame and has specified ROWS BETWEEN UNBOUNDED PRECEDING + * AND CURRENT ROW. The internal window frame is not a popular window frame cannot be specified + * and used directly by the users. + * The unbounded preceding offset window frame calculates frames containing NTH_VALUE statements. + * The unbounded preceding offset window frame return the same value for rows which index + * (starting from 1) equal to or greater than offset in the window partition. + */ +class UnboundedPrecedingOffsetWindowFunctionFrame( + target: InternalRow, + ordinal: Int, + expressions: Array[OffsetWindowSpec], + inputSchema: Seq[Attribute], + newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, + offset: Int) + extends OffsetWindowFunctionFrameBase( + target, ordinal, expressions, inputSchema, newMutableProjection, offset) { + + var selectedRow: UnsafeRow = null + + override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { + input = rows + inputIterator = input.generateIterator() + // drain the first few rows if offset is larger than one + inputIndex = 0 + while (inputIndex < offset - 1) { + if (inputIterator.hasNext) inputIterator.next() + inputIndex += 1 + } + if (inputIndex < input.length) { + selectedRow = WindowFunctionFrame.getNextOrNull(inputIterator) + } + } + + override def write(index: Int, current: InternalRow): Unit = { + if (index >= inputIndex && selectedRow != null) { + projection(selectedRow) + } else { + fillDefaultValue(EmptyRow) + } + } } /** diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 5de6db210ce36..c1be5fb27e6fa 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -165,6 +165,16 @@ FROM basic_pays ORDER BY salary DESC; +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + SELECT employee_name, salary, @@ -205,6 +215,26 @@ FROM basic_pays ORDER BY salary DESC; +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC; + SELECT employee_name, department, diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index 028dd7a12d25d..f6506a77e239c 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 32 +-- Number of queries: 35 -- !query @@ -479,6 +479,38 @@ Anthony Bow 6627 Gerard Bondur Leslie Thompson 5186 Gerard Bondur +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 NULL +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + -- !query SELECT employee_name, @@ -607,6 +639,70 @@ Anthony Bow 6627 Gerard Bondur Leslie Thompson 5186 Gerard Bondur +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 Gerard Bondur +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + +-- !query +SELECT + employee_name, + salary, + nth_value(employee_name, 2) OVER ( + ORDER BY salary DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary +FROM + basic_pays +ORDER BY salary DESC +-- !query schema +struct +-- !query output +Larry Bott 11798 Gerard Bondur +Gerard Bondur 11472 Gerard Bondur +Pamela Castillo 11303 Gerard Bondur +Barry Jones 10586 Gerard Bondur +George Vanauf 10563 Gerard Bondur +Loui Bondur 10449 Gerard Bondur +Mary Patterson 9998 Gerard Bondur +Steve Patterson 9441 Gerard Bondur +Julie Firrelli 9181 Gerard Bondur +Jeff Firrelli 8992 Gerard Bondur +William Patterson 8870 Gerard Bondur +Diane Murphy 8435 Gerard Bondur +Leslie Jennings 8113 Gerard Bondur +Gerard Hernandez 6949 Gerard Bondur +Foon Yue Tseng 6660 Gerard Bondur +Anthony Bow 6627 Gerard Bondur +Leslie Thompson 5186 Gerard Bondur + + -- !query SELECT employee_name, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 616e333033aa9..207b2963f0b3b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -657,15 +657,16 @@ class DataFrameWindowFunctionsSuite extends QueryTest $"order", nth_value($"value", 2).over(window), nth_value($"value", 2, ignoreNulls = false).over(window), - nth_value($"value", 2, ignoreNulls = true).over(window)), + nth_value($"value", 2, ignoreNulls = true).over(window), + nth_value($"value", 3, ignoreNulls = false).over(window)), Seq( - Row("a", 0, null, null, null), - Row("a", 1, "x", "x", null), - Row("a", 2, "x", "x", "y"), - Row("a", 3, "x", "x", "y"), - Row("a", 4, "x", "x", "y"), - Row("b", 1, null, null, null), - Row("b", 2, null, null, null))) + Row("a", 0, null, null, null, null), + Row("a", 1, "x", "x", null, null), + Row("a", 2, "x", "x", "y", "y"), + Row("a", 3, "x", "x", "y", "y"), + Row("a", 4, "x", "x", "y", "y"), + Row("b", 1, null, null, null, null), + Row("b", 2, null, null, null, null))) } test("nth_value on descending ordered window") { From 2b8fe6d9ae2fe31d1545da98003f931ee1aa11d5 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 28 Oct 2020 21:32:09 +0900 Subject: [PATCH 0346/1009] [SPARK-33269][INFRA] Ignore ".bsp/" directory in Git ### What changes were proposed in this pull request? After SBT upgrade into 1.4.0 and above. there is always a ".bsp" directory after sbt starts: https://github.com/sbt/sbt/releases/tag/v1.4.0 This PR is to put the directory in to `.gitignore`. ### Why are the changes needed? The ".bsp" directory is an untracked file for git during development. This is annoying. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual local test Closes #30171 from gengliangwang/ignoreBSP. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 82414d1fa35bf..9c145fba1bee9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.swp *~ .DS_Store +.bsp/ .cache .classpath .ensime From b26ae98407c6c017a4061c0c420f48685ddd6163 Mon Sep 17 00:00:00 2001 From: "zky.zhoukeyong" Date: Wed, 28 Oct 2020 13:17:28 +0000 Subject: [PATCH 0347/1009] [SPARK-33208][SQL] Update the document of SparkSession#sql Change-Id: I82db1f9e8f667573aa3a03e05152cbed0ea7686b ### What changes were proposed in this pull request? Update the document of SparkSession#sql, mention that this API eagerly runs DDL/DML commands, but not for SELECT queries. ### Why are the changes needed? To clarify the behavior of SparkSession#sql. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? No needed. Closes #30168 from waitinfuture/master. Authored-by: zky.zhoukeyong Signed-off-by: Wenchen Fan --- sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 68ce82d5badda..7cf0b6bb70364 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -641,8 +641,8 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * Executes a SQL query using Spark, returning the result as a `DataFrame`. The dialect that is - * used for SQL parsing can be configured with 'spark.sql.dialect'. + * Executes a SQL query using Spark, returning the result as a `DataFrame`. + * This API eagerly runs DDL/DML commands, but not for SELECT queries. * * @group basic * @since 1.3.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index b33557dbfdb27..c4aadfb1d66bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -600,7 +600,7 @@ class SparkSession private( /** * Executes a SQL query using Spark, returning the result as a `DataFrame`. - * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'. + * This API eagerly runs DDL/DML commands, but not for SELECT queries. * * @since 2.0.0 */ From a6216e2446b6befc3f6d6b370e694421aadda9dd Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 28 Oct 2020 08:33:02 -0700 Subject: [PATCH 0348/1009] [SPARK-33268][SQL][PYTHON] Fix bugs for casting data from/to PythonUserDefinedType ### What changes were proposed in this pull request? This PR intends to fix bus for casting data from/to PythonUserDefinedType. A sequence of queries to reproduce this issue is as follows; ``` >>> from pyspark.sql import Row >>> from pyspark.sql.functions import col >>> from pyspark.sql.types import * >>> from pyspark.testing.sqlutils import * >>> >>> row = Row(point=ExamplePoint(1.0, 2.0)) >>> df = spark.createDataFrame([row]) >>> df.select(col("point").cast(PythonOnlyUDT())) Traceback (most recent call last): File "", line 1, in File "/Users/maropu/Repositories/spark/spark-master/python/pyspark/sql/dataframe.py", line 1402, in select jdf = self._jdf.select(self._jcols(*cols)) File "/Users/maropu/Repositories/spark/spark-master/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__ File "/Users/maropu/Repositories/spark/spark-master/python/pyspark/sql/utils.py", line 111, in deco return f(*a, **kw) File "/Users/maropu/Repositories/spark/spark-master/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o44.select. : java.lang.NullPointerException at org.apache.spark.sql.types.UserDefinedType.acceptsType(UserDefinedType.scala:84) at org.apache.spark.sql.catalyst.expressions.Cast$.canCast(Cast.scala:96) at org.apache.spark.sql.catalyst.expressions.CastBase.checkInputDataTypes(Cast.scala:267) at org.apache.spark.sql.catalyst.expressions.CastBase.resolved$lzycompute(Cast.scala:290) at org.apache.spark.sql.catalyst.expressions.CastBase.resolved(Cast.scala:290) ``` A root cause of this issue is that, since `PythonUserDefinedType#userClassis` always null, `isAssignableFrom` in `UserDefinedType#acceptsType` throws a null exception. To fix it, this PR defines `acceptsType` in `PythonUserDefinedType` and filters out the null case in `UserDefinedType#acceptsType`. ### Why are the changes needed? Bug fixes. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #30169 from maropu/FixPythonUDTCast. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/tests/test_types.py | 9 +++++++++ .../org/apache/spark/sql/types/UserDefinedType.scala | 9 +++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index e85e8a6e6d1ee..6b5c1ad6c4e46 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -27,6 +27,7 @@ from pyspark.sql import Row from pyspark.sql.functions import col from pyspark.sql.udf import UserDefinedFunction +from pyspark.sql.utils import AnalysisException from pyspark.sql.types import ByteType, ShortType, IntegerType, FloatType, DateType, \ TimestampType, MapType, StringType, StructType, StructField, ArrayType, DoubleType, LongType, \ DecimalType, BinaryType, BooleanType, NullType @@ -441,6 +442,14 @@ def test_cast_to_string_with_udt(self): result = df.select(col('point').cast('string'), col('pypoint').cast('string')).head() self.assertEqual(result, Row(point=u'(1.0, 2.0)', pypoint=u'[3.0, 4.0]')) + def test_cast_to_udt_with_udt(self): + from pyspark.sql.functions import col + row = Row(point=ExamplePoint(1.0, 2.0), python_only_point=PythonOnlyPoint(1.0, 2.0)) + df = self.spark.createDataFrame([row]) + self.assertRaises(AnalysisException, lambda: df.select(col("point").cast(PythonOnlyUDT()))) + self.assertRaises(AnalysisException, + lambda: df.select(col("python_only_point").cast(ExamplePointUDT()))) + def test_struct_type(self): struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None) struct2 = StructType([StructField("f1", StringType(), True), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala index 592ce03606d4b..689c30f6b7fa9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala @@ -78,8 +78,8 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa */ override private[spark] def asNullable: UserDefinedType[UserType] = this - override private[sql] def acceptsType(dataType: DataType) = dataType match { - case other: UserDefinedType[_] => + override private[sql] def acceptsType(dataType: DataType): Boolean = dataType match { + case other: UserDefinedType[_] if this.userClass != null && other.userClass != null => this.getClass == other.getClass || this.userClass.isAssignableFrom(other.userClass) case _ => false @@ -131,6 +131,11 @@ private[sql] class PythonUserDefinedType( ("sqlType" -> sqlType.jsonValue) } + override private[sql] def acceptsType(dataType: DataType): Boolean = dataType match { + case other: PythonUserDefinedType => pyUDT == other.pyUDT + case _ => false + } + override def equals(other: Any): Boolean = other match { case that: PythonUserDefinedType => pyUDT == that.pyUDT case _ => false From a744fea3be12f1a53ab553040b95da730210bc88 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Wed, 28 Oct 2020 10:00:29 -0700 Subject: [PATCH 0349/1009] [SPARK-33267][SQL] Fix NPE issue on 'In' filter when one of values contains null ### What changes were proposed in this pull request? This PR proposes to fix the NPE issue on `In` filter when one of values contain null. In real case, you can trigger this issue when you try to push down the filter with `in (..., null)` against V2 source table. `DataSourceStrategy` caches the mapping (filter instance -> expression) in HashMap, which leverages hash code on the key, hence it could trigger the NPE issue. ### Why are the changes needed? This is an obvious bug as `In` filter doesn't care about null value when calculating hash code. ### Does this PR introduce _any_ user-facing change? Yes, previously the query with having `null` in "in" condition against data source V2 source table supporting push down filter failed with NPE, whereas after the PR the query will not fail. ### How was this patch tested? UT added. The new UT fails without the PR and passes with the PR. Closes #30170 from HeartSaVioR/SPARK-33267. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/sources/filters.scala | 2 +- .../apache/spark/sql/connector/DataSourceV2Suite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala index 7533793253513..2b44a3a861ed9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -164,7 +164,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter { var h = attribute.hashCode values.foreach { v => h *= 41 - h += v.hashCode() + h += (if (v != null) v.hashCode() else 0) } h } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index ec1ac00d08bf8..ce28e615702db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -413,6 +413,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS } } } + + test("SPARK-33267: push down with condition 'in (..., null)' should not throw NPE") { + Seq(classOf[AdvancedDataSourceV2], classOf[JavaAdvancedDataSourceV2]).foreach { cls => + withClue(cls.getName) { + val df = spark.read.format(cls.getName).load() + // before SPARK-33267 below query just threw NPE + df.select('i).where("i in (1, null)").collect() + } + } + } } From 2639ad43cb8357db235e7fc9ce24930cca7f2525 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 29 Oct 2020 07:37:16 +0900 Subject: [PATCH 0350/1009] [SPARK-33272][SQL] prune the attributes mapping in QueryPlan.transformUpWithNewOutput ### What changes were proposed in this pull request? For complex query plans, `QueryPlan.transformUpWithNewOutput` will keep accumulating the attributes mapping to be propagated, which may hurt performance. This PR prunes the attributes mapping before propagating. ### Why are the changes needed? A simple perf improvement. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #30173 from cloud-fan/bug. Authored-by: Wenchen Fan Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/plans/QueryPlan.scala | 17 +++++++++++++++-- .../catalyst/plans/logical/AnalysisHelper.scala | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 3e8467bab0348..b1884eac27f73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -180,10 +180,14 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT * rewrite attribute references in the parent nodes. * @param skipCond a boolean condition to indicate if we can skip transforming a plan node to save * time. + * @param canGetOutput a boolean condition to indicate if we can get the output of a plan node + * to prune the attributes mapping to be propagated. The default value is true + * as only unresolved logical plan can't get output. */ def transformUpWithNewOutput( rule: PartialFunction[PlanType, (PlanType, Seq[(Attribute, Attribute)])], - skipCond: PlanType => Boolean = _ => false): PlanType = { + skipCond: PlanType => Boolean = _ => false, + canGetOutput: PlanType => Boolean = _ => true): PlanType = { def rewrite(plan: PlanType): (PlanType, Seq[(Attribute, Attribute)]) = { if (skipCond(plan)) { plan -> Nil @@ -237,7 +241,16 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT val existingAttrMappingSet = transferAttrMapping.map(_._2).toSet newValidAttrMapping.filterNot { case (_, a) => existingAttrMappingSet.contains(a) } } - planAfterRule -> (transferAttrMapping ++ newOtherAttrMapping).toSeq + val resultAttrMapping = if (canGetOutput(plan)) { + // We propagate the attributes mapping to the parent plan node to update attributes, so + // the `newAttr` must be part of this plan's output. + (transferAttrMapping ++ newOtherAttrMapping).filter { + case (_, newAttr) => planAfterRule.outputSet.contains(newAttr) + } + } else { + transferAttrMapping ++ newOtherAttrMapping + } + planAfterRule -> resultAttrMapping.toSeq } } rewrite(this)._1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala index 30447db1acc04..d8d18b46bcc74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala @@ -127,7 +127,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => rule: PartialFunction[LogicalPlan, (LogicalPlan, Seq[(Attribute, Attribute)])]) : LogicalPlan = { if (!analyzed) { - transformUpWithNewOutput(rule, skipCond = _.analyzed) + transformUpWithNewOutput(rule, skipCond = _.analyzed, canGetOutput = _.resolved) } else { self } From c592ae6ed81cf381512749c43ed688411ef1b431 Mon Sep 17 00:00:00 2001 From: Nathan Wreggit Date: Thu, 29 Oct 2020 10:28:50 +0900 Subject: [PATCH 0351/1009] [SQL][MINOR] Update from_unixtime doc ### What changes were proposed in this pull request? This PR fixes from_unixtime documentation to show that fmt is optional parameter. ### Does this PR introduce _any_ user-facing change? Yes, documentation update. **Before change:** ![image](https://user-images.githubusercontent.com/4176173/97497659-18c6cc80-1928-11eb-93d8-453ef627ac7c.png) **After change:** ![image](https://user-images.githubusercontent.com/4176173/97496153-c5537f00-1925-11eb-8102-457e85e019d5.png) ### How was this patch tested? Style check using: ./dev/run-tests Manual check and screenshotting with: ./sql/create-docs.sh Manual verification of behavior with latest spark-sql binary. Closes #30176 from Obbay2/from_unixtime_doc. Authored-by: Nathan Wreggit Signed-off-by: HyukjinKwon --- .../sql/catalyst/expressions/datetimeExpressions.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 571b0be40c6e6..223d0e661ed3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -895,17 +895,20 @@ abstract class UnixTime extends ToTimestamp { */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(unix_time, fmt) - Returns `unix_time` in the specified `fmt`.", + usage = "_FUNC_(unix_time[, fmt]) - Returns `unix_time` in the specified `fmt`.", arguments = """ Arguments: * unix_time - UNIX Timestamp to be converted to the provided format. * fmt - Date/time format pattern to follow. See Datetime Patterns - for valid date and time format patterns. + for valid date and time format patterns. The 'yyyy-MM-dd HH:mm:ss' pattern is used if omitted. """, examples = """ Examples: > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss'); 1969-12-31 16:00:00 + + > SELECT _FUNC_(0); + 1969-12-31 16:00:00 """, group = "datetime_funcs", since = "1.5.0") From 9d5e48ea95d1c3017a51ff69584f32a18901b2b5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 29 Oct 2020 10:30:41 +0900 Subject: [PATCH 0352/1009] [SPARK-33270][SQL] Return SQL schema instead of Catalog string from the `SchemaOfJson` expression ### What changes were proposed in this pull request? Return schema in SQL format instead of Catalog string from the `SchemaOfJson` expression. ### Why are the changes needed? In some cases, `from_json()` cannot parse schemas returned by `schema_of_json`, for instance, when JSON fields have spaces (gaps). Such fields will be quoted after the changes, and can be parsed by `from_json()`. Here is the example: ```scala val in = Seq("""{"a b": 1}""").toDS() in.select(from_json('value, schema_of_json("""{"a b": 100}""")) as "parsed") ``` raises the exception: ``` == SQL == struct ------^^^ at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parseTableSchema(ParseDriver.scala:76) at org.apache.spark.sql.types.DataType$.fromDDL(DataType.scala:131) at org.apache.spark.sql.catalyst.expressions.ExprUtils$.evalTypeExpr(ExprUtils.scala:33) at org.apache.spark.sql.catalyst.expressions.JsonToStructs.(jsonExpressions.scala:537) at org.apache.spark.sql.functions$.from_json(functions.scala:4141) ``` ### Does this PR introduce _any_ user-facing change? Yes. For example, `schema_of_json` for the input `{"col":0}`. Before: `struct` After: `STRUCT<`col`: BIGINT>` ### How was this patch tested? By existing test suites `JsonFunctionsSuite` and `JsonExpressionsSuite`. Closes #30172 from MaxGekk/schema_of_json-sql-schema. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- R/pkg/tests/fulltests/test_sparkSQL.R | 4 ++-- docs/sql-migration-guide.md | 2 ++ python/pyspark/sql/functions.py | 4 ++-- .../expressions/jsonExpressions.scala | 6 +++--- .../expressions/JsonExpressionsSuite.scala | 8 ++++---- .../sql-tests/results/json-functions.sql.out | 6 +++--- .../apache/spark/sql/JsonFunctionsSuite.scala | 19 +++++++++++++------ 7 files changed, 29 insertions(+), 20 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 685e6e672bdf9..22bd4133d46a8 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1717,9 +1717,9 @@ test_that("column functions", { df <- as.DataFrame(list(list("col" = "1"))) c <- collect(select(df, schema_of_json('{"name":"Bob"}'))) - expect_equal(c[[1]], "struct") + expect_equal(c[[1]], "STRUCT<`name`: STRING>") c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}')))) - expect_equal(c[[1]], "struct") + expect_equal(c[[1]], "STRUCT<`name`: STRING>") # Test to_json() supports arrays of primitive types and arrays df <- sql("SELECT array(19, 42, 70) as age") diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 124b04fb2bede..ee82d9ac4724b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -49,6 +49,8 @@ license: | - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details. - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. + + - In Spark 3.1, the `schema_of_json` function returns the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 22941ab6f1157..68639ff7b6320 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2937,10 +2937,10 @@ def schema_of_json(json, options={}): >>> df = spark.range(1) >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect() - [Row(json='struct')] + [Row(json='STRUCT<`a`: BIGINT>')] >>> schema = schema_of_json('{a: 1}', {'allowUnquotedFieldNames':'true'}) >>> df.select(schema.alias("json")).collect() - [Row(json='struct')] + [Row(json='STRUCT<`a`: BIGINT>')] """ if isinstance(json, str): col = _create_column_from_literal(json) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index ef02d2db97a3f..39d9eb5a36964 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -741,9 +741,9 @@ case class StructsToJson( examples = """ Examples: > SELECT _FUNC_('[{"col":0}]'); - array> + ARRAY> > SELECT _FUNC_('[{"col":01}]', map('allowNumericLeadingZeros', 'true')); - array> + ARRAY> """, group = "json_funcs", since = "2.4.0") @@ -801,7 +801,7 @@ case class SchemaOfJson( } } - UTF8String.fromString(dt.catalogString) + UTF8String.fromString(dt.sql) } override def prettyName: String = "schema_of_json" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 6f062dcc9a1ce..b3666936e5855 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -735,17 +735,17 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("SPARK-24709: infer schema of json strings") { checkEvaluation(new SchemaOfJson(Literal.create("""{"col":0}""")), - "struct") + "STRUCT<`col`: BIGINT>") checkEvaluation( new SchemaOfJson(Literal.create("""{"col0":["a"], "col1": {"col2": "b"}}""")), - "struct,col1:struct>") + "STRUCT<`col0`: ARRAY, `col1`: STRUCT<`col2`: STRING>>") } test("infer schema of JSON strings by using options") { checkEvaluation( new SchemaOfJson(Literal.create("""{"col":01}"""), CreateMap(Seq(Literal.create("allowNumericLeadingZeros"), Literal.create("true")))), - "struct") + "STRUCT<`col`: BIGINT>") } test("parse date with locale") { @@ -810,7 +810,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with } Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach { - checkDecimalInfer(_, """struct""") + checkDecimalInfer(_, """STRUCT<`d`: DECIMAL(7,3)>""") } } diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 34a329627f5dd..3cc45890cf089 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -213,7 +213,7 @@ select schema_of_json('{"c1":0, "c2":[1]}') -- !query schema struct -- !query output -struct> +STRUCT<`c1`: BIGINT, `c2`: ARRAY> -- !query @@ -352,7 +352,7 @@ select schema_of_json('{"c1":1}', map('primitivesAsString', 'true')) -- !query schema struct -- !query output -struct +STRUCT<`c1`: STRING> -- !query @@ -360,7 +360,7 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr -- !query schema struct -- !query output -struct +STRUCT<`c1`: BIGINT, `c2`: DECIMAL(1,1)> -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 5a1a3550d855b..e2a9cf536d154 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -411,7 +411,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { test("infers schemas using options") { val df = spark.range(1) .select(schema_of_json(lit("{a:1}"), Map("allowUnquotedFieldNames" -> "true").asJava)) - checkAnswer(df, Seq(Row("struct"))) + checkAnswer(df, Seq(Row("STRUCT<`a`: BIGINT>"))) } test("from_json - array of primitive types") { @@ -684,14 +684,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { val input = regexp_replace(lit("""{"item_id": 1, "item_price": 0.1}"""), "item_", "") checkAnswer( spark.range(1).select(schema_of_json(input)), - Seq(Row("struct"))) + Seq(Row("STRUCT<`id`: BIGINT, `price`: DOUBLE>"))) } test("SPARK-31065: schema_of_json - null and empty strings as strings") { Seq("""{"id": null}""", """{"id": ""}""").foreach { input => checkAnswer( spark.range(1).select(schema_of_json(input)), - Seq(Row("struct"))) + Seq(Row("STRUCT<`id`: STRING>"))) } } @@ -703,7 +703,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { schema_of_json( lit("""{"id": "a", "drop": {"drop": null}}"""), options.asJava)), - Seq(Row("struct"))) + Seq(Row("STRUCT<`id`: STRING>"))) // Array of structs checkAnswer( @@ -711,7 +711,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { schema_of_json( lit("""[{"id": "a", "drop": {"drop": null}}]"""), options.asJava)), - Seq(Row("array>"))) + Seq(Row("ARRAY>"))) // Other types are not affected. checkAnswer( @@ -719,7 +719,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { schema_of_json( lit("""null"""), options.asJava)), - Seq(Row("string"))) + Seq(Row("STRING"))) } test("optional datetime parser does not affect json time formatting") { @@ -747,4 +747,11 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { val df4 = Seq("""{"c2": [19]}""").toDF("c0") checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null)) } + + test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") { + val in = Seq("""{"a b": 1}""").toDS() + val out = in.select(from_json('value, schema_of_json("""{"a b": 100}""")) as "parsed") + val expected = new StructType().add("parsed", new StructType().add("a b", LongType)) + assert(out.schema == expected) + } } From b409025641133fe7f352de4beaa2c0b037be3f56 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 29 Oct 2020 21:02:10 +0900 Subject: [PATCH 0353/1009] [SPARK-33281][SQL] Return SQL schema instead of Catalog string from the `SchemaOfCsv` expression ### What changes were proposed in this pull request? Return schema in SQL format instead of Catalog string from the SchemaOfCsv expression. ### Why are the changes needed? To unify output of the `schema_of_json()` and `schema_of_csv()`. ### Does this PR introduce _any_ user-facing change? Yes, they can but `schema_of_csv()` is usually used in combination with `from_csv()`, so, the format of schema shouldn't be much matter. Before: ``` > SELECT schema_of_csv('1,abc'); struct<_c0:int,_c1:string> ``` After: ``` > SELECT schema_of_csv('1,abc'); STRUCT<`_c0`: INT, `_c1`: STRING> ``` ### How was this patch tested? By existing test suites `CsvFunctionsSuite` and `CsvExpressionsSuite`. Closes #30180 from MaxGekk/schema_of_csv-sql-schema. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- R/pkg/tests/fulltests/test_sparkSQL.R | 4 ++-- docs/sql-migration-guide.md | 2 +- python/pyspark/sql/functions.py | 4 ++-- .../spark/sql/catalyst/expressions/csvExpressions.scala | 4 ++-- .../sql/catalyst/expressions/CsvExpressionsSuite.scala | 4 ++-- .../resources/sql-tests/results/csv-functions.sql.out | 2 +- .../scala/org/apache/spark/sql/CsvFunctionsSuite.scala | 8 ++++---- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 22bd4133d46a8..3a0d359e2ae79 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1682,9 +1682,9 @@ test_that("column functions", { df <- as.DataFrame(list(list("col" = "1"))) c <- collect(select(df, schema_of_csv("Amsterdam,2018"))) - expect_equal(c[[1]], "struct<_c0:string,_c1:int>") + expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>") c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018")))) - expect_equal(c[[1]], "struct<_c0:string,_c1:int>") + expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>") # Test to_json(), from_json(), schema_of_json() df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index ee82d9ac4724b..fdc764a93424b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -50,7 +50,7 @@ license: | - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. - - In Spark 3.1, the `schema_of_json` function returns the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. + - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 68639ff7b6320..69fdf220f19fe 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2964,9 +2964,9 @@ def schema_of_csv(csv, options={}): >>> df = spark.range(1) >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect() - [Row(csv='struct<_c0:int,_c1:string>')] + [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')] >>> df.select(schema_of_csv('1|a', {'sep':'|'}).alias("csv")).collect() - [Row(csv='struct<_c0:int,_c1:string>')] + [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')] """ if isinstance(csv, str): col = _create_column_from_literal(csv) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala index f9ccf3c8c811f..6fad272aa4557 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala @@ -144,7 +144,7 @@ case class CsvToStructs( examples = """ Examples: > SELECT _FUNC_('1,abc'); - struct<_c0:int,_c1:string> + STRUCT<`_c0`: INT, `_c1`: STRING> """, since = "3.0.0") case class SchemaOfCsv( @@ -186,7 +186,7 @@ case class SchemaOfCsv( val inferSchema = new CSVInferSchema(parsedOptions) val fieldTypes = inferSchema.inferRowType(startType, row) val st = StructType(inferSchema.toStructFields(fieldTypes, header)) - UTF8String.fromString(st.catalogString) + UTF8String.fromString(st.sql) } override def prettyName: String = "schema_of_csv" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index 4a19add23fc58..7945974a1f3dc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -158,13 +158,13 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P } test("infer schema of CSV strings") { - checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "struct<_c0:int,_c1:string>") + checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "STRUCT<`_c0`: INT, `_c1`: STRING>") } test("infer schema of CSV strings by using options") { checkEvaluation( new SchemaOfCsv(Literal.create("1|abc"), Map("delimiter" -> "|")), - "struct<_c0:int,_c1:string>") + "STRUCT<`_c0`: INT, `_c1`: STRING>") } test("to_csv - struct") { diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 1e3173172a528..7ba3f712363fe 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -82,7 +82,7 @@ select schema_of_csv('1|abc', map('delimiter', '|')) -- !query schema struct -- !query output -struct<_c0:int,_c1:string> +STRUCT<`_c0`: INT, `_c1`: STRING> -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 800e294cca8c4..abccaf19084b2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -80,16 +80,16 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { test("schema_of_csv - infers schemas") { checkAnswer( spark.range(1).select(schema_of_csv(lit("0.1,1"))), - Seq(Row("struct<_c0:double,_c1:int>"))) + Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>"))) checkAnswer( spark.range(1).select(schema_of_csv("0.1,1")), - Seq(Row("struct<_c0:double,_c1:int>"))) + Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>"))) } test("schema_of_csv - infers schemas using options") { val df = spark.range(1) .select(schema_of_csv(lit("0.1 1"), Map("sep" -> " ").asJava)) - checkAnswer(df, Seq(Row("struct<_c0:double,_c1:int>"))) + checkAnswer(df, Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>"))) } test("to_csv - struct") { @@ -236,7 +236,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { val input = concat_ws(",", lit(0.1), lit(1)) checkAnswer( spark.range(1).select(schema_of_csv(input)), - Seq(Row("struct<_c0:double,_c1:int>"))) + Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>"))) } test("optional datetime parser does not affect csv time formatting") { From 056b62264b024c83840f2bf23f4bb9cabd13e136 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 29 Oct 2020 07:44:44 -0700 Subject: [PATCH 0354/1009] [SPARK-33263][SS] Configurable StateStore compression codec ### What changes were proposed in this pull request? This patch proposes to make StateStore compression codec configurable. ### Why are the changes needed? Currently the compression codec of StateStore is not configurable and hard-coded to be lz4. It is better if we can follow Spark other modules to configure the compression codec of StateStore. For example, we can choose zstd codec and zstd is configurable with different compression level. ### Does this PR introduce _any_ user-facing change? Yes, after this change users can config different codec for StateStore. ### How was this patch tested? Unit test. Closes #30162 from viirya/SPARK-33263. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/internal/SQLConf.scala | 12 +++ .../sql/execution/streaming/OffsetSeq.scala | 10 ++- .../state/HDFSBackedStateStoreProvider.scala | 8 +- .../streaming/state/StateStoreConf.scala | 3 + .../commits/0 | 2 + .../metadata | 1 + .../offsets/0 | 3 + .../state/0/0/1.delta | Bin 0 -> 85 bytes .../state/StateStoreCompatibilitySuite.scala | 84 ++++++++++++++++++ .../streaming/state/StateStoreSuite.scala | 13 +-- 10 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d84dfcc8f3086..21357a492e39e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1331,6 +1331,16 @@ object SQLConf { .intConf .createWithDefault(2) + val STATE_STORE_COMPRESSION_CODEC = + buildConf("spark.sql.streaming.stateStore.compression.codec") + .internal() + .doc("The codec used to compress delta and snapshot files generated by StateStore. " + + "By default, Spark provides four codecs: lz4, lzf, snappy, and zstd. You can also " + + "use fully qualified class names to specify the codec. Default codec is lz4.") + .version("3.1.0") + .stringConf + .createWithDefault("lz4") + val STREAMING_AGGREGATION_STATE_FORMAT_VERSION = buildConf("spark.sql.streaming.aggregation.stateFormatVersion") .internal() @@ -3089,6 +3099,8 @@ class SQLConf extends Serializable with Logging { def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY) + def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC) + def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED) def parquetFilterPushDownDate: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DATE_ENABLED) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala index 1c59464268444..7d7ec76467836 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala @@ -89,10 +89,15 @@ case class OffsetSeqMetadata( object OffsetSeqMetadata extends Logging { private implicit val format = Serialization.formats(NoTypeHints) + /** + * These configs are related to streaming query execution and should not be changed across + * batches of a streaming query. The values of these configs are persisted into the offset + * log in the checkpoint position. + */ private val relevantSQLConfs = Seq( SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY, FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION, - STREAMING_JOIN_STATE_FORMAT_VERSION) + STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC) /** * Default values of relevant configurations that are used for backward compatibility. @@ -111,7 +116,8 @@ object OffsetSeqMetadata extends Logging { STREAMING_AGGREGATION_STATE_FORMAT_VERSION.key -> StreamingAggregationStateManager.legacyVersion.toString, STREAMING_JOIN_STATE_FORMAT_VERSION.key -> - SymmetricHashJoinStateManager.legacyVersion.toString + SymmetricHashJoinStateManager.legacyVersion.toString, + STATE_STORE_COMPRESSION_CODEC.key -> "lz4" ) def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 31618922e44cf..0a25d51666321 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -33,7 +33,7 @@ import org.apache.hadoop.fs._ import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.internal.Logging -import org.apache.spark.io.LZ4CompressionCodec +import org.apache.spark.io.CompressionCodec import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.execution.streaming.CheckpointFileManager import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream @@ -696,12 +696,14 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit } private def compressStream(outputStream: DataOutputStream): DataOutputStream = { - val compressed = new LZ4CompressionCodec(sparkConf).compressedOutputStream(outputStream) + val compressed = CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec) + .compressedOutputStream(outputStream) new DataOutputStream(compressed) } private def decompressStream(inputStream: DataInputStream): DataInputStream = { - val compressed = new LZ4CompressionCodec(sparkConf).compressedInputStream(inputStream) + val compressed = CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec) + .compressedInputStream(inputStream) new DataInputStream(compressed) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala index 84d0b76ac9158..11043bc81ae3f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala @@ -52,6 +52,9 @@ class StateStoreConf( val formatValidationCheckValue: Boolean = extraOptions.getOrElse(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG, "true") == "true" + /** The compression codec used to compress delta and snapshot files. */ + val compressionCodec: String = sqlConf.stateStoreCompressionCodec + /** * Additional configurations related to state store. This will capture all configs in * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0 new file mode 100644 index 0000000000000..9c1e3021c3ead --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0 @@ -0,0 +1,2 @@ +v1 +{"nextBatchWatermarkMs":0} \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata new file mode 100644 index 0000000000000..df5937f800382 --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata @@ -0,0 +1 @@ +{"id":"6bcf6671-d23e-4ad8-824f-98aa5924ce6d"} \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0 new file mode 100644 index 0000000000000..d12f52147dd6a --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0 @@ -0,0 +1,3 @@ +v1 +{"batchWatermarkMs":0,"batchTimestampMs":1603918440918,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"1"}} +0 \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..8de7bc89a5de82e3aa620251416c3b72f55d5936 GIT binary patch literal 85 zcmeZ?GI7euPtI1=W?*2b0b;cUSscMYT7ZF(L70()fnS7ySCC1Wfsu)U--dxpkQvP6 T(O?K*_zwg=Q2nAnEpPw;QR5Aq literal 0 HcmV?d00001 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala new file mode 100644 index 0000000000000..b189de8d2a21e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.state + +import java.io.File + +import org.apache.commons.io.FileUtils + +import org.apache.spark.SparkFunSuite +import org.apache.spark.io.CompressionCodec +import org.apache.spark.sql.catalyst.plans.PlanTestBase +import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update +import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.functions.count +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.StreamTest +import org.apache.spark.util.Utils + +class StateStoreCompatibilitySuite extends StreamTest with StateStoreCodecsTest { + testWithAllCodec( + "SPARK-33263: Recovery from checkpoint before codec config introduced") { + val resourceUri = this.getClass.getResource( + "/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/").toURI + val checkpointDir = Utils.createTempDir().getCanonicalFile + FileUtils.copyDirectory(new File(resourceUri), checkpointDir) + + import testImplicits._ + + val inputData = MemoryStream[Int] + val aggregated = inputData.toDF().groupBy("value").agg(count("*")) + inputData.addData(1, 2, 3) + + /** + * Note: The checkpoint was generated using the following input in Spark version 3.0.0: + * AddData(inputData, 1, 2, 3) + */ + + testStream(aggregated, Update)( + StartStream( + checkpointLocation = checkpointDir.getAbsolutePath, + additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "1")), + AddData(inputData, 1, 2), + CheckNewAnswer((1, 2), (2, 2)) + ) + } +} + +trait StateStoreCodecsTest extends SparkFunSuite with PlanTestBase { + private val codecsInShortName = + CompressionCodec.ALL_COMPRESSION_CODECS.map { c => CompressionCodec.getShortName(c) } + + protected def testWithAllCodec(name: String)(func: => Any): Unit = { + codecsInShortName.foreach { codecShortName => + test(s"$name - with codec $codecShortName") { + withSQLConf(SQLConf.STATE_STORE_COMPRESSION_CODEC.key -> codecShortName) { + func + } + } + } + + CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codecShortName => + test(s"$name - with codec $codecShortName") { + withSQLConf(SQLConf.STATE_STORE_COMPRESSION_CODEC.key -> codecShortName) { + func + } + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index 5dbc6723a3ff9..9dc6c0a760d7e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -767,6 +767,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] sqlConf.setConf(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT, minDeltasForSnapshot) sqlConf.setConf(SQLConf.MAX_BATCHES_TO_RETAIN_IN_MEMORY, numOfVersToRetainInMemory) sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2) + sqlConf.setConf(SQLConf.STATE_STORE_COMPRESSION_CODEC, SQLConf.get.stateStoreCompressionCodec) val provider = new HDFSBackedStateStoreProvider() provider.init( StateStoreId(dir, opId, partition), @@ -815,10 +816,10 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] } abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] - extends SparkFunSuite { + extends StateStoreCodecsTest { import StateStoreTestsHelper._ - test("get, put, remove, commit, and all data iterator") { + testWithAllCodec("get, put, remove, commit, and all data iterator") { val provider = newStoreProvider() // Verify state before starting a new set of updates @@ -870,7 +871,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] assert(getData(provider, version = 1) === Set("b" -> 2)) } - test("removing while iterating") { + testWithAllCodec("removing while iterating") { val provider = newStoreProvider() // Verify state before starting a new set of updates @@ -892,7 +893,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] assert(get(store, "b") === None) } - test("abort") { + testWithAllCodec("abort") { val provider = newStoreProvider() val store = provider.getStore(0) put(store, "a", 1) @@ -905,7 +906,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] store1.abort() } - test("getStore with invalid versions") { + testWithAllCodec("getStore with invalid versions") { val provider = newStoreProvider() def checkInvalidVersion(version: Int): Unit = { @@ -939,7 +940,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] checkInvalidVersion(3) } - test("two concurrent StateStores - one for read-only and one for read-write") { + testWithAllCodec("two concurrent StateStores - one for read-only and one for read-write") { // During Streaming Aggregation, we have two StateStores per task, one used as read-only in // `StateStoreRestoreExec`, and one read-write used in `StateStoreSaveExec`. `StateStore.abort` // will be called for these StateStores if they haven't committed their results. We need to From fa6311731be8643f047d2a85faf16e82300883b0 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 29 Oct 2020 08:00:23 -0700 Subject: [PATCH 0355/1009] [SPARK-33283][CORE] Remove useless externalBlockStoreSize from RDDInfo ### What changes were proposed in this pull request? "external block store" API was removed after SPARK-12667, `externalBlockStoreSize` in `RDDInfo` looks like always 0 and useless. So this pr just to remove this useless variable. ### Why are the changes needed? remove useless variable. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30179 from LuciferYang/SPARK-12667-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 5 ++--- .../apache/spark/storage/BlockManagerMasterEndpoint.scala | 1 - core/src/main/scala/org/apache/spark/storage/RDDInfo.scala | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 6095042de7f0c..15b00a4496da6 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1919,9 +1919,8 @@ abstract class RDD[T: ClassTag]( val persistence = if (storageLevel != StorageLevel.NONE) storageLevel.description else "" val storageInfo = rdd.context.getRDDStorageInfo(_.id == rdd.id).map(info => - " CachedPartitions: %d; MemorySize: %s; ExternalBlockStoreSize: %s; DiskSize: %s".format( - info.numCachedPartitions, bytesToString(info.memSize), - bytesToString(info.externalBlockStoreSize), bytesToString(info.diskSize))) + " CachedPartitions: %d; MemorySize: %s; DiskSize: %s".format( + info.numCachedPartitions, bytesToString(info.memSize), bytesToString(info.diskSize))) s"$rdd [$persistence]" +: storageInfo } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 569d7d32284bc..b8c5cbd121861 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -737,7 +737,6 @@ private[spark] class BlockManagerInfo( if (storageLevel.isValid) { /* isValid means it is either stored in-memory or on-disk. * The memSize here indicates the data size in or dropped from memory, - * externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore, * and the diskSize here indicates the data size in or dropped to disk. * They can be both larger than 0, when a block is dropped from memory to disk. * Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */ diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala index 27a4d4b64175e..f3575c4e43eb0 100644 --- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala +++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala @@ -38,7 +38,6 @@ class RDDInfo( var numCachedPartitions = 0 var memSize = 0L var diskSize = 0L - var externalBlockStoreSize = 0L def isCached: Boolean = (memSize + diskSize > 0) && numCachedPartitions > 0 From cbd3fdea62dab73fc4a96702de8fd1f07722da66 Mon Sep 17 00:00:00 2001 From: luluorta Date: Thu, 29 Oct 2020 16:44:17 +0000 Subject: [PATCH 0356/1009] [SPARK-33008][SQL] Division by zero on divide-like operations returns incorrect result ### What changes were proposed in this pull request? In ANSI mode, when a division by zero occurs performing a divide-like operation (Divide, IntegralDivide, Remainder or Pmod), we are returning an incorrect value. Instead, we should throw an exception, as stated in the SQL standard. ### Why are the changes needed? Result corrupt. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? added UT + existing UTs (improved) Closes #29882 from luluorta/SPARK-33008. Authored-by: luluorta Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 22 +-- .../catalyst/analysis/DecimalPrecision.scala | 40 ++-- .../analysis/StreamingJoinHelper.scala | 8 +- .../sql/catalyst/analysis/TypeCoercion.scala | 10 +- .../catalyst/expressions/Canonicalize.scala | 7 +- .../expressions/aggregate/Average.scala | 6 +- .../sql/catalyst/expressions/arithmetic.scala | 159 +++++++++++----- .../expressions/bitwiseExpressions.scala | 6 + .../expressions/intervalExpressions.scala | 12 +- .../expressions/windowExpressions.scala | 2 +- .../sql/catalyst/optimizer/expressions.scala | 17 +- .../ArithmeticExpressionSuite.scala | 174 +++++++++++++----- .../expressions/ExpressionEvalHelper.scala | 15 ++ .../sql-tests/inputs/postgreSQL/case.sql | 2 +- .../inputs/postgreSQL/select_having.sql | 1 + .../inputs/udf/postgreSQL/udf-case.sql | 2 +- .../udf/postgreSQL/udf-select_having.sql | 1 + .../sql-tests/results/postgreSQL/case.sql.out | 18 +- .../sql-tests/results/postgreSQL/int8.sql.out | 15 +- .../results/postgreSQL/numeric.sql.out | 8 +- .../results/postgreSQL/select_having.sql.out | 5 +- .../results/udf/postgreSQL/udf-case.sql.out | 18 +- .../udf/postgreSQL/udf-select_having.sql.out | 5 +- .../sql/SparkSessionExtensionSuite.scala | 7 +- 24 files changed, 379 insertions(+), 181 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c2116a2b8f471..10fe5314b0ef9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -307,35 +307,35 @@ class Analyzer( object ResolveBinaryArithmetic extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case p: LogicalPlan => p.transformExpressionsUp { - case a @ Add(l, r) if a.childrenResolved => (l.dataType, r.dataType) match { + case a @ Add(l, r, f) if a.childrenResolved => (l.dataType, r.dataType) match { case (CalendarIntervalType, CalendarIntervalType) => a - case (DateType, CalendarIntervalType) => DateAddInterval(l, r) + case (DateType, CalendarIntervalType) => DateAddInterval(l, r, ansiEnabled = f) case (_, CalendarIntervalType) => Cast(TimeAdd(l, r), l.dataType) - case (CalendarIntervalType, DateType) => DateAddInterval(r, l) + case (CalendarIntervalType, DateType) => DateAddInterval(r, l, ansiEnabled = f) case (CalendarIntervalType, _) => Cast(TimeAdd(r, l), r.dataType) case (DateType, dt) if dt != StringType => DateAdd(l, r) case (dt, DateType) if dt != StringType => DateAdd(r, l) case _ => a } - case s @ Subtract(l, r) if s.childrenResolved => (l.dataType, r.dataType) match { + case s @ Subtract(l, r, f) if s.childrenResolved => (l.dataType, r.dataType) match { case (CalendarIntervalType, CalendarIntervalType) => s case (DateType, CalendarIntervalType) => - DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r))) + DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r, f), ansiEnabled = f)) case (_, CalendarIntervalType) => - Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r))), l.dataType) + Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, f))), l.dataType) case (TimestampType, _) => SubtractTimestamps(l, r) case (_, TimestampType) => SubtractTimestamps(l, r) case (_, DateType) => SubtractDates(l, r) case (DateType, dt) if dt != StringType => DateSub(l, r) case _ => s } - case m @ Multiply(l, r) if m.childrenResolved => (l.dataType, r.dataType) match { - case (CalendarIntervalType, _) => MultiplyInterval(l, r) - case (_, CalendarIntervalType) => MultiplyInterval(r, l) + case m @ Multiply(l, r, f) if m.childrenResolved => (l.dataType, r.dataType) match { + case (CalendarIntervalType, _) => MultiplyInterval(l, r, f) + case (_, CalendarIntervalType) => MultiplyInterval(r, l, f) case _ => m } - case d @ Divide(l, r) if d.childrenResolved => (l.dataType, r.dataType) match { - case (CalendarIntervalType, _) => DivideInterval(l, r) + case d @ Divide(l, r, f) if d.childrenResolved => (l.dataType, r.dataType) match { + case (CalendarIntervalType, _) => DivideInterval(l, r, f) case _ => d } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index f2d607e5b737c..6eed152e6dd77 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -98,7 +98,7 @@ object DecimalPrecision extends TypeCoercionRule { // Skip nodes who is already promoted case e: BinaryArithmetic if e.left.isInstanceOf[PromotePrecision] => e - case Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case a @ Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val resultScale = max(s1, s2) val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1, @@ -106,10 +106,12 @@ object DecimalPrecision extends TypeCoercionRule { } else { DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale) } - CheckOverflow(Add(promotePrecision(e1, resultType), promotePrecision(e2, resultType)), + CheckOverflow( + a.withNewChildren(Seq(promotePrecision(e1, resultType), promotePrecision(e2, resultType))), resultType, nullOnOverflow) - case Subtract(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case s @ Subtract(e1 @ DecimalType.Expression(p1, s1), + e2 @ DecimalType.Expression(p2, s2), _) => val resultScale = max(s1, s2) val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1, @@ -117,20 +119,23 @@ object DecimalPrecision extends TypeCoercionRule { } else { DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale) } - CheckOverflow(Subtract(promotePrecision(e1, resultType), promotePrecision(e2, resultType)), + CheckOverflow( + s.withNewChildren(Seq(promotePrecision(e1, resultType), promotePrecision(e2, resultType))), resultType, nullOnOverflow) - case Multiply(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case m @ Multiply( + e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { DecimalType.adjustPrecisionScale(p1 + p2 + 1, s1 + s2) } else { DecimalType.bounded(p1 + p2 + 1, s1 + s2) } val widerType = widerDecimalType(p1, s1, p2, s2) - CheckOverflow(Multiply(promotePrecision(e1, widerType), promotePrecision(e2, widerType)), + CheckOverflow( + m.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))), resultType, nullOnOverflow) - case Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case d @ Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1) // Scale: max(6, s1 + p2 + 1) @@ -149,10 +154,12 @@ object DecimalPrecision extends TypeCoercionRule { DecimalType.bounded(intDig + decDig, decDig) } val widerType = widerDecimalType(p1, s1, p2, s2) - CheckOverflow(Divide(promotePrecision(e1, widerType), promotePrecision(e2, widerType)), + CheckOverflow( + d.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))), resultType, nullOnOverflow) - case Remainder(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case r @ Remainder( + e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2)) } else { @@ -160,10 +167,11 @@ object DecimalPrecision extends TypeCoercionRule { } // resultType may have lower precision, so we cast them into wider type first. val widerType = widerDecimalType(p1, s1, p2, s2) - CheckOverflow(Remainder(promotePrecision(e1, widerType), promotePrecision(e2, widerType)), + CheckOverflow( + r.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))), resultType, nullOnOverflow) - case Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + case p @ Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) { DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2)) } else { @@ -171,15 +179,15 @@ object DecimalPrecision extends TypeCoercionRule { } // resultType may have lower precision, so we cast them into wider type first. val widerType = widerDecimalType(p1, s1, p2, s2) - CheckOverflow(Pmod(promotePrecision(e1, widerType), promotePrecision(e2, widerType)), + CheckOverflow( + p.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))), resultType, nullOnOverflow) case expr @ IntegralDivide( - e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) => + e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) => val widerType = widerDecimalType(p1, s1, p2, s2) - val promotedExpr = IntegralDivide( - promotePrecision(e1, widerType), - promotePrecision(e2, widerType)) + val promotedExpr = expr.withNewChildren( + Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))) if (expr.dataType.isInstanceOf[DecimalType]) { // This follows division rule val intDig = p1 - s1 + s2 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala index 6a2ff4b91e68d..cddc3a44f4d9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala @@ -189,7 +189,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging { if attributesWithEventWatermark.contains(a) && metadata.contains(delayKey) => Multiply(Literal(eventWatermark.get.toDouble), Literal(1000.0)) } - }.reduceLeft(Add) + }.reduceLeft(Add(_, _)) // Calculate the constraint value logInfo(s"Final expression to evaluate constraint:\t$exprWithWatermarkSubstituted") @@ -226,14 +226,14 @@ object StreamingJoinHelper extends PredicateHelper with Logging { */ def collect(expr: Expression, negate: Boolean): Seq[Expression] = { expr match { - case Add(left, right) => + case Add(left, right, _) => collect(left, negate) ++ collect(right, negate) - case Subtract(left, right) => + case Subtract(left, right, _) => collect(left, negate) ++ collect(right, !negate) case TimeAdd(left, right, _) => collect(left, negate) ++ collect(right, negate) case DatetimeSub(_, _, child) => collect(child, negate) - case UnaryMinus(child) => + case UnaryMinus(child, _) => collect(child, !negate) case CheckOverflow(child, _, _) => collect(child, negate) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index b69cb6091f02c..becdef8b9c603 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -454,7 +454,7 @@ object TypeCoercion { s.withNewChildren(Seq(Cast(e, DoubleType))) case s @ StddevSamp(e @ StringType(), _) => s.withNewChildren(Seq(Cast(e, DoubleType))) - case UnaryMinus(e @ StringType()) => UnaryMinus(Cast(e, DoubleType)) + case m @ UnaryMinus(e @ StringType(), _) => m.withNewChildren(Seq(Cast(e, DoubleType))) case UnaryPositive(e @ StringType()) => UnaryPositive(Cast(e, DoubleType)) case v @ VariancePop(e @ StringType(), _) => v.withNewChildren(Seq(Cast(e, DoubleType))) @@ -698,8 +698,8 @@ object TypeCoercion { // Decimal and Double remain the same case d: Divide if d.dataType == DoubleType => d case d: Divide if d.dataType.isInstanceOf[DecimalType] => d - case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) => - Divide(Cast(left, DoubleType), Cast(right, DoubleType)) + case d @ Divide(left, right, _) if isNumericOrNull(left) && isNumericOrNull(right) => + d.withNewChildren(Seq(Cast(left, DoubleType), Cast(right, DoubleType))) } private def isNumericOrNull(ex: Expression): Boolean = { @@ -715,8 +715,8 @@ object TypeCoercion { object IntegralDivision extends TypeCoercionRule { override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { case e if !e.childrenResolved => e - case d @ IntegralDivide(left, right) => - IntegralDivide(mayCastToLong(left), mayCastToLong(right)) + case d @ IntegralDivide(left, right, _) => + d.withNewChildren(Seq(mayCastToLong(left), mayCastToLong(right))) } private def mayCastToLong(expr: Expression): Expression = expr.dataType match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index 1ecf4372cfb58..ae201359a762c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -70,8 +70,11 @@ object Canonicalize { /** Rearrange expressions that are commutative or associative. */ private def expressionReorder(e: Expression): Expression = e match { - case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add) - case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply) + // TODO: do not reorder consecutive `Add`s or `Multiply`s with different `failOnError` flags + case a @ Add(_, _, f) => + orderCommutative(a, { case Add(l, r, _) => Seq(l, r) }).reduce(Add(_, _, f)) + case m @ Multiply(_, _, f) => + orderCommutative(m, { case Multiply(l, r, _) => Seq(l, r) }).reduce(Multiply(_, _, f)) case o: Or => orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index d3ce1f8d331ab..13f38ac7c9ae5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -77,11 +77,13 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit ) // If all input are nulls, count will be 0 and we will get null after the division. + // We can't directly use `/` as it throws an exception under ansi mode. override lazy val evaluateExpression = child.dataType match { case _: DecimalType => - DecimalPrecision.decimalAndDecimal(sum / count.cast(DecimalType.LongDecimal)).cast(resultType) + DecimalPrecision.decimalAndDecimal( + Divide(sum, count.cast(DecimalType.LongDecimal), failOnError = false)).cast(resultType) case _ => - sum.cast(resultType) / count.cast(resultType) + Divide(sum.cast(resultType), count.cast(resultType), failOnError = false) } override lazy val updateExpressions: Seq[Expression] = Seq( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index f25fd9b672e8b..c69edccc696bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -34,9 +34,12 @@ import org.apache.spark.unsafe.types.CalendarInterval -1 """, since = "1.0.0") -case class UnaryMinus(child: Expression) extends UnaryExpression - with ExpectsInputTypes with NullIntolerant { - private val checkOverflow = SQLConf.get.ansiEnabled +case class UnaryMinus( + child: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) + extends UnaryExpression with ExpectsInputTypes with NullIntolerant { + + def this(child: Expression) = this(child, SQLConf.get.ansiEnabled) override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval) @@ -44,11 +47,11 @@ case class UnaryMinus(child: Expression) extends UnaryExpression override def toString: String = s"-$child" - private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow) + private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match { case _: DecimalType => defineCodeGen(ctx, ev, c => s"$c.unary_$$minus()") - case ByteType | ShortType if checkOverflow => + case ByteType | ShortType if failOnError => nullSafeCodeGen(ctx, ev, eval => { val javaBoxedType = CodeGenerator.boxedType(dataType) val javaType = CodeGenerator.javaType(dataType) @@ -61,7 +64,7 @@ case class UnaryMinus(child: Expression) extends UnaryExpression |${ev.value} = ($javaType)(-($originValue)); """.stripMargin }) - case IntegerType | LongType if checkOverflow => + case IntegerType | LongType if failOnError => nullSafeCodeGen(ctx, ev, eval => { val mathClass = classOf[Math].getName s"${ev.value} = $mathClass.negateExact($eval);" @@ -76,12 +79,12 @@ case class UnaryMinus(child: Expression) extends UnaryExpression """}) case _: CalendarIntervalType => val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") - val method = if (checkOverflow) "negateExact" else "negate" + val method = if (failOnError) "negateExact" else "negate" defineCodeGen(ctx, ev, c => s"$iu.$method($c)") } protected override def nullSafeEval(input: Any): Any = dataType match { - case CalendarIntervalType if checkOverflow => + case CalendarIntervalType if failOnError => IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval]) case CalendarIntervalType => IntervalUtils.negate(input.asInstanceOf[CalendarInterval]) case _ => numeric.negate(input) @@ -104,7 +107,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression """, since = "1.5.0") case class UnaryPositive(child: Expression) - extends UnaryExpression with ExpectsInputTypes with NullIntolerant { + extends UnaryExpression with ExpectsInputTypes with NullIntolerant { + override def prettyName: String = "positive" override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval) @@ -131,7 +135,7 @@ case class UnaryPositive(child: Expression) """, since = "1.2.0") case class Abs(child: Expression) - extends UnaryExpression with ExpectsInputTypes with NullIntolerant { + extends UnaryExpression with ExpectsInputTypes with NullIntolerant { override def inputTypes: Seq[AbstractDataType] = Seq(NumericType) @@ -151,7 +155,7 @@ case class Abs(child: Expression) abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { - protected val checkOverflow = SQLConf.get.ansiEnabled + protected val failOnError: Boolean override def dataType: DataType = left.dataType @@ -181,7 +185,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { case ByteType | ShortType => nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val tmpResult = ctx.freshName("tmpResult") - val overflowCheck = if (checkOverflow) { + val overflowCheck = if (failOnError) { val javaType = CodeGenerator.boxedType(dataType) s""" |if ($tmpResult < $javaType.MIN_VALUE || $tmpResult > $javaType.MAX_VALUE) { @@ -199,7 +203,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { }) case IntegerType | LongType => nullSafeCodeGen(ctx, ev, (eval1, eval2) => { - val operation = if (checkOverflow && exactMathMethod.isDefined) { + val operation = if (failOnError && exactMathMethod.isDefined) { val mathClass = classOf[Math].getName s"$mathClass.${exactMathMethod.get}($eval1, $eval2)" } else { @@ -233,7 +237,12 @@ object BinaryArithmetic { 3 """, since = "1.0.0") -case class Add(left: Expression, right: Expression) extends BinaryArithmetic { +case class Add( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = TypeCollection.NumericAndInterval @@ -241,12 +250,12 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic { override def decimalMethod: String = "$plus" - override def calendarIntervalMethod: String = if (checkOverflow) "addExact" else "add" + override def calendarIntervalMethod: String = if (failOnError) "addExact" else "add" - private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow) + private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError) protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match { - case CalendarIntervalType if checkOverflow => + case CalendarIntervalType if failOnError => IntervalUtils.addExact( input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval]) case CalendarIntervalType => @@ -266,7 +275,12 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic { 1 """, since = "1.0.0") -case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic { +case class Subtract( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = TypeCollection.NumericAndInterval @@ -274,12 +288,12 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti override def decimalMethod: String = "$minus" - override def calendarIntervalMethod: String = if (checkOverflow) "subtractExact" else "subtract" + override def calendarIntervalMethod: String = if (failOnError) "subtractExact" else "subtract" - private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow) + private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError) protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match { - case CalendarIntervalType if checkOverflow => + case CalendarIntervalType if failOnError => IntervalUtils.subtractExact( input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval]) case CalendarIntervalType => @@ -299,14 +313,19 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti 6 """, since = "1.0.0") -case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic { +case class Multiply( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = NumericType override def symbol: String = "*" override def decimalMethod: String = "$times" - private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow) + private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError) protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2) @@ -320,15 +339,25 @@ trait DivModLike extends BinaryArithmetic { override def nullable: Boolean = true + private lazy val isZero: Any => Boolean = right.dataType match { + case _: DecimalType => x => x.asInstanceOf[Decimal].isZero + case _ => x => x == 0 + } + final override def eval(input: InternalRow): Any = { + // evaluate right first as we have a chance to skip left if right is 0 val input2 = right.eval(input) - if (input2 == null || input2 == 0) { + if (input2 == null || (!failOnError && isZero(input2))) { null } else { val input1 = left.eval(input) if (input1 == null) { null } else { + if (isZero(input2)) { + // when we reach here, failOnError must bet true. + throw new ArithmeticException("divide by zero") + } evalOperation(input1, input2) } } @@ -337,7 +366,7 @@ trait DivModLike extends BinaryArithmetic { def evalOperation(left: Any, right: Any): Any /** - * Special case handling due to division/remainder by 0 => null. + * Special case handling due to division/remainder by 0 => null or ArithmeticException. */ override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval1 = left.genCode(ctx) @@ -354,29 +383,42 @@ trait DivModLike extends BinaryArithmetic { } else { s"($javaType)(${eval1.value} $symbol ${eval2.value})" } + // evaluate right first as we have a chance to skip left if right is 0 if (!left.nullable && !right.nullable) { + val divByZero = if (failOnError) { + "throw new ArithmeticException(\"divide by zero\");" + } else { + s"${ev.isNull} = true;" + } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; if ($isZero) { - ${ev.isNull} = true; + $divByZero } else { ${eval1.code} ${ev.value} = $operation; }""") } else { + val nullOnErrorCondition = if (failOnError) "" else s" || $isZero" + val failOnErrorBranch = if (failOnError) { + s"""if ($isZero) throw new ArithmeticException("divide by zero");""" + } else { + "" + } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (${eval2.isNull} || $isZero) { + if (${eval2.isNull}$nullOnErrorCondition) { ${ev.isNull} = true; } else { ${eval1.code} if (${eval1.isNull}) { ${ev.isNull} = true; } else { + $failOnErrorBranch ${ev.value} = $operation; } }""") @@ -396,7 +438,12 @@ trait DivModLike extends BinaryArithmetic { """, since = "1.0.0") // scalastyle:on line.size.limit -case class Divide(left: Expression, right: Expression) extends DivModLike { +case class Divide( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = TypeCollection(DoubleType, DecimalType) @@ -422,7 +469,10 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { // scalastyle:on line.size.limit case class IntegralDivide( left: Expression, - right: Expression) extends DivModLike { + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = TypeCollection(LongType, DecimalType) @@ -453,12 +503,6 @@ case class IntegralDivide( override def evalOperation(left: Any, right: Any): Any = div(left, right) } -object IntegralDivide { - def apply(left: Expression, right: Expression): IntegralDivide = { - new IntegralDivide(left, right) - } -} - @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.", examples = """ @@ -469,7 +513,12 @@ object IntegralDivide { 0.2 """, since = "1.0.0") -case class Remainder(left: Expression, right: Expression) extends DivModLike { +case class Remainder( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def inputType: AbstractDataType = NumericType @@ -517,7 +566,12 @@ case class Remainder(left: Expression, right: Expression) extends DivModLike { 2 """, since = "1.5.0") -case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { +case class Pmod( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) override def toString: String = s"pmod($left, $right)" @@ -530,15 +584,25 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { override def nullable: Boolean = true - override def eval(input: InternalRow): Any = { + private lazy val isZero: Any => Boolean = right.dataType match { + case _: DecimalType => x => x.asInstanceOf[Decimal].isZero + case _ => x => x == 0 + } + + final override def eval(input: InternalRow): Any = { + // evaluate right first as we have a chance to skip left if right is 0 val input2 = right.eval(input) - if (input2 == null || input2 == 0) { + if (input2 == null || (!failOnError && isZero(input2))) { null } else { val input1 = left.eval(input) if (input1 == null) { null } else { + if (isZero(input2)) { + // when we reach here, failOnError must bet true. + throw new ArithmeticException("divide by zero") + } input1 match { case i: Integer => pmod(i, input2.asInstanceOf[java.lang.Integer]) case l: Long => pmod(l, input2.asInstanceOf[java.lang.Long]) @@ -595,29 +659,42 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { """ } + // evaluate right first as we have a chance to skip left if right is 0 if (!left.nullable && !right.nullable) { + val divByZero = if (failOnError) { + "throw new ArithmeticException(\"divide by zero\");" + } else { + s"${ev.isNull} = true;" + } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; if ($isZero) { - ${ev.isNull} = true; + $divByZero } else { ${eval1.code} $result }""") } else { + val nullOnErrorCondition = if (failOnError) "" else s" || $isZero" + val failOnErrorBranch = if (failOnError) { + s"""if ($isZero) throw new ArithmeticException("divide by zero");""" + } else { + "" + } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (${eval2.isNull} || $isZero) { + if (${eval2.isNull}$nullOnErrorCondition) { ${ev.isNull} = true; } else { ${eval1.code} if (${eval1.isNull}) { ${ev.isNull} = true; } else { + $failOnErrorBranch $result } }""") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index aa3993dccd1c5..33ce60875c600 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -36,6 +36,8 @@ import org.apache.spark.sql.types._ since = "1.4.0") case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { + protected override val failOnError: Boolean = false + override def inputType: AbstractDataType = IntegralType override def symbol: String = "&" @@ -69,6 +71,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme since = "1.4.0") case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { + protected override val failOnError: Boolean = false + override def inputType: AbstractDataType = IntegralType override def symbol: String = "|" @@ -102,6 +106,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet since = "1.4.0") case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { + protected override val failOnError: Boolean = false + override def inputType: AbstractDataType = IntegralType override def symbol: String = "^" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index db3b2a38fece0..8b92c619df626 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -109,25 +109,25 @@ abstract class IntervalNumOperation( case class MultiplyInterval( interval: Expression, num: Expression, - checkOverflow: Boolean = SQLConf.get.ansiEnabled) + failOnError: Boolean = SQLConf.get.ansiEnabled) extends IntervalNumOperation(interval, num) { override protected val operation: (CalendarInterval, Double) => CalendarInterval = - if (checkOverflow) multiplyExact else multiply + if (failOnError) multiplyExact else multiply - override protected def operationName: String = if (checkOverflow) "multiplyExact" else "multiply" + override protected def operationName: String = if (failOnError) "multiplyExact" else "multiply" } case class DivideInterval( interval: Expression, num: Expression, - checkOverflow: Boolean = SQLConf.get.ansiEnabled) + failOnError: Boolean = SQLConf.get.ansiEnabled) extends IntervalNumOperation(interval, num) { override protected val operation: (CalendarInterval, Double) => CalendarInterval = - if (checkOverflow) divideExact else divide + if (failOnError) divideExact else divide - override protected def operationName: String = if (checkOverflow) "divideExact" else "divide" + override protected def operationName: String = if (failOnError) "divideExact" else "divide" } // scalastyle:off line.size.limit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 168585dc3de00..1a57afa8d9aae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -235,7 +235,7 @@ case class SpecifiedWindowFrame( private def boundarySql(expr: Expression): String = expr match { case e: SpecialFrameBoundary => e.sql - case UnaryMinus(n) => n.sql + " PRECEDING" + case UnaryMinus(n, _) => n.sql + " PRECEDING" case e: Expression => e.sql + " FOLLOWING" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 0e7a39c54050e..55a45f4410b34 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -177,7 +177,7 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] { private def flattenAdd( expression: Expression, groupSet: ExpressionSet): Seq[Expression] = expression match { - case expr @ Add(l, r) if !groupSet.contains(expr) => + case expr @ Add(l, r, _) if !groupSet.contains(expr) => flattenAdd(l, groupSet) ++ flattenAdd(r, groupSet) case other => other :: Nil } @@ -185,7 +185,7 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] { private def flattenMultiply( expression: Expression, groupSet: ExpressionSet): Seq[Expression] = expression match { - case expr @ Multiply(l, r) if !groupSet.contains(expr) => + case expr @ Multiply(l, r, _) if !groupSet.contains(expr) => flattenMultiply(l, groupSet) ++ flattenMultiply(r, groupSet) case other => other :: Nil } @@ -201,23 +201,24 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] { // We have to respect aggregate expressions which exists in grouping expressions when plan // is an Aggregate operator, otherwise the optimized expression could not be derived from // grouping expressions. + // TODO: do not reorder consecutive `Add`s or `Multiply`s with different `failOnError` flags val groupingExpressionSet = collectGroupingExpressions(q) q transformExpressionsDown { - case a: Add if a.deterministic && a.dataType.isInstanceOf[IntegralType] => + case a @ Add(_, _, f) if a.deterministic && a.dataType.isInstanceOf[IntegralType] => val (foldables, others) = flattenAdd(a, groupingExpressionSet).partition(_.foldable) if (foldables.size > 1) { - val foldableExpr = foldables.reduce((x, y) => Add(x, y)) + val foldableExpr = foldables.reduce((x, y) => Add(x, y, f)) val c = Literal.create(foldableExpr.eval(EmptyRow), a.dataType) - if (others.isEmpty) c else Add(others.reduce((x, y) => Add(x, y)), c) + if (others.isEmpty) c else Add(others.reduce((x, y) => Add(x, y, f)), c, f) } else { a } - case m: Multiply if m.deterministic && m.dataType.isInstanceOf[IntegralType] => + case m @ Multiply(_, _, f) if m.deterministic && m.dataType.isInstanceOf[IntegralType] => val (foldables, others) = flattenMultiply(m, groupingExpressionSet).partition(_.foldable) if (foldables.size > 1) { - val foldableExpr = foldables.reduce((x, y) => Multiply(x, y)) + val foldableExpr = foldables.reduce((x, y) => Multiply(x, y, f)) val c = Literal.create(foldableExpr.eval(EmptyRow), m.dataType) - if (others.isEmpty) c else Multiply(others.reduce((x, y) => Multiply(x, y)), c) + if (others.isEmpty) c else Multiply(others.reduce((x, y) => Multiply(x, y, f)), c, f) } else { m } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index f05598aeb5353..14dd04afebe28 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -60,10 +60,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Add(positiveIntLit, negativeIntLit), -1) checkEvaluation(Add(positiveLongLit, negativeLongLit), -1L) - Seq("true", "false").foreach { checkOverflow => - withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) { + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { DataTypeTestUtils.numericAndInterval.foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add, tpe, tpe) + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add(_, _), tpe, tpe) } } } @@ -103,8 +103,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(UnaryMinus(positiveLongLit), - positiveLong) checkEvaluation(UnaryMinus(negativeLongLit), - negativeLong) - DataTypeTestUtils.numericAndInterval.foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegen(UnaryMinus, tpe) + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { + DataTypeTestUtils.numericAndInterval.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegenAllowingException(UnaryMinus(_), tpe) + } + } } } @@ -121,10 +125,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Subtract(positiveIntLit, negativeIntLit), positiveInt - negativeInt) checkEvaluation(Subtract(positiveLongLit, negativeLongLit), positiveLong - negativeLong) - Seq("true", "false").foreach { checkOverflow => - withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) { + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { DataTypeTestUtils.numericAndInterval.foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract, tpe, tpe) + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract(_, _), tpe, tpe) } } } @@ -143,10 +147,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Multiply(positiveIntLit, negativeIntLit), positiveInt * negativeInt) checkEvaluation(Multiply(positiveLongLit, negativeLongLit), positiveLong * negativeLong) - Seq("true", "false").foreach { checkOverflow => - withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) { + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply, tpe, tpe) + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply(_, _), tpe, tpe) } } } @@ -161,21 +165,45 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper testDecimalAndDoubleType { convert => val left = Literal(convert(2)) val right = Literal(convert(1)) - val dataType = left.dataType checkEvaluation(Divide(left, right), convert(2)) - checkEvaluation(Divide(Literal.create(null, dataType), right), null) + checkEvaluation(Divide(Literal.create(null, left.dataType), right), null) checkEvaluation(Divide(left, Literal.create(null, right.dataType)), null) checkEvaluation(Divide(left, Literal(convert(0))), null) // divide by zero } - Seq(DoubleType, DecimalType.SYSTEM_DEFAULT).foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegen(Divide, tpe, tpe) + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { + Seq(DoubleType, DecimalType.SYSTEM_DEFAULT).foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Divide(_, _), tpe, tpe) + } + } } } - test("/ (Divide) for Long type") { - checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0L) + private def testDecimalAndLongType(testFunc: (Int => Any) => Unit): Unit = { + testFunc(_.toLong) + testFunc(Decimal(_)) + } + + test("/ (Divide) for Long and Decimal type") { + testDecimalAndLongType { convert => + val left = Literal(convert(1)) + val right = Literal(convert(2)) + checkEvaluation(IntegralDivide(left, right), 0L) + checkEvaluation(IntegralDivide(Literal.create(null, left.dataType), right), null) + checkEvaluation(IntegralDivide(left, Literal.create(null, right.dataType)), null) + checkEvaluation(IntegralDivide(left, Literal(convert(0))), null) // divide by zero + } checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) + + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { + Seq(LongType, DecimalType.SYSTEM_DEFAULT).foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegenAllowingException( + IntegralDivide(_, _), tpe, tpe) + } + } + } } test("% (Remainder)") { @@ -194,8 +222,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Remainder(positiveLongLit, positiveLongLit), 0L) checkEvaluation(Remainder(negativeLongLit, negativeLongLit), 0L) - DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => - checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { + DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Remainder(_, _), tpe, tpe) + } + } } } @@ -248,12 +280,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Pmod(positiveInt, negativeInt), positiveInt) checkEvaluation(Pmod(positiveLong, negativeLong), positiveLong) - // mod by 0 - checkEvaluation(Pmod(Literal(-7), Literal(0)), null) - checkEvaluation(Pmod(Literal(7.2D), Literal(0D)), null) - checkEvaluation(Pmod(Literal(7.2F), Literal(0F)), null) - checkEvaluation(Pmod(Literal(2.toByte), Literal(0.toByte)), null) - checkEvaluation(Pmod(positiveShort, 0.toShort), null) + Seq("true", "false").foreach { failOnError => + withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) { + DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegenAllowingException(Pmod(_, _), tpe, tpe) + } + } + } } test("function least") { @@ -408,18 +441,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper test("SPARK-24598: overflow on long returns wrong result") { val maxLongLiteral = Literal(Long.MaxValue) val minLongLiteral = Literal(Long.MinValue) - val e1 = Add(maxLongLiteral, Literal(1L)) - val e2 = Subtract(maxLongLiteral, Literal(-1L)) - val e3 = Multiply(maxLongLiteral, Literal(2L)) - val e4 = Add(minLongLiteral, minLongLiteral) - val e5 = Subtract(minLongLiteral, maxLongLiteral) - val e6 = Multiply(minLongLiteral, minLongLiteral) withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val e1 = Add(maxLongLiteral, Literal(1L)) + val e2 = Subtract(maxLongLiteral, Literal(-1L)) + val e3 = Multiply(maxLongLiteral, Literal(2L)) + val e4 = Add(minLongLiteral, minLongLiteral) + val e5 = Subtract(minLongLiteral, maxLongLiteral) + val e6 = Multiply(minLongLiteral, minLongLiteral) Seq(e1, e2, e3, e4, e5, e6).foreach { e => checkExceptionInExpression[ArithmeticException](e, "overflow") } } withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val e1 = Add(maxLongLiteral, Literal(1L)) + val e2 = Subtract(maxLongLiteral, Literal(-1L)) + val e3 = Multiply(maxLongLiteral, Literal(2L)) + val e4 = Add(minLongLiteral, minLongLiteral) + val e5 = Subtract(minLongLiteral, maxLongLiteral) + val e6 = Multiply(minLongLiteral, minLongLiteral) checkEvaluation(e1, Long.MinValue) checkEvaluation(e2, Long.MinValue) checkEvaluation(e3, -2L) @@ -432,18 +471,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper test("SPARK-24598: overflow on integer returns wrong result") { val maxIntLiteral = Literal(Int.MaxValue) val minIntLiteral = Literal(Int.MinValue) - val e1 = Add(maxIntLiteral, Literal(1)) - val e2 = Subtract(maxIntLiteral, Literal(-1)) - val e3 = Multiply(maxIntLiteral, Literal(2)) - val e4 = Add(minIntLiteral, minIntLiteral) - val e5 = Subtract(minIntLiteral, maxIntLiteral) - val e6 = Multiply(minIntLiteral, minIntLiteral) withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val e1 = Add(maxIntLiteral, Literal(1)) + val e2 = Subtract(maxIntLiteral, Literal(-1)) + val e3 = Multiply(maxIntLiteral, Literal(2)) + val e4 = Add(minIntLiteral, minIntLiteral) + val e5 = Subtract(minIntLiteral, maxIntLiteral) + val e6 = Multiply(minIntLiteral, minIntLiteral) Seq(e1, e2, e3, e4, e5, e6).foreach { e => checkExceptionInExpression[ArithmeticException](e, "overflow") } } withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val e1 = Add(maxIntLiteral, Literal(1)) + val e2 = Subtract(maxIntLiteral, Literal(-1)) + val e3 = Multiply(maxIntLiteral, Literal(2)) + val e4 = Add(minIntLiteral, minIntLiteral) + val e5 = Subtract(minIntLiteral, maxIntLiteral) + val e6 = Multiply(minIntLiteral, minIntLiteral) checkEvaluation(e1, Int.MinValue) checkEvaluation(e2, Int.MinValue) checkEvaluation(e3, -2) @@ -456,18 +501,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper test("SPARK-24598: overflow on short returns wrong result") { val maxShortLiteral = Literal(Short.MaxValue) val minShortLiteral = Literal(Short.MinValue) - val e1 = Add(maxShortLiteral, Literal(1.toShort)) - val e2 = Subtract(maxShortLiteral, Literal((-1).toShort)) - val e3 = Multiply(maxShortLiteral, Literal(2.toShort)) - val e4 = Add(minShortLiteral, minShortLiteral) - val e5 = Subtract(minShortLiteral, maxShortLiteral) - val e6 = Multiply(minShortLiteral, minShortLiteral) withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val e1 = Add(maxShortLiteral, Literal(1.toShort)) + val e2 = Subtract(maxShortLiteral, Literal((-1).toShort)) + val e3 = Multiply(maxShortLiteral, Literal(2.toShort)) + val e4 = Add(minShortLiteral, minShortLiteral) + val e5 = Subtract(minShortLiteral, maxShortLiteral) + val e6 = Multiply(minShortLiteral, minShortLiteral) Seq(e1, e2, e3, e4, e5, e6).foreach { e => checkExceptionInExpression[ArithmeticException](e, "overflow") } } withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val e1 = Add(maxShortLiteral, Literal(1.toShort)) + val e2 = Subtract(maxShortLiteral, Literal((-1).toShort)) + val e3 = Multiply(maxShortLiteral, Literal(2.toShort)) + val e4 = Add(minShortLiteral, minShortLiteral) + val e5 = Subtract(minShortLiteral, maxShortLiteral) + val e6 = Multiply(minShortLiteral, minShortLiteral) checkEvaluation(e1, Short.MinValue) checkEvaluation(e2, Short.MinValue) checkEvaluation(e3, (-2).toShort) @@ -480,18 +531,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper test("SPARK-24598: overflow on byte returns wrong result") { val maxByteLiteral = Literal(Byte.MaxValue) val minByteLiteral = Literal(Byte.MinValue) - val e1 = Add(maxByteLiteral, Literal(1.toByte)) - val e2 = Subtract(maxByteLiteral, Literal((-1).toByte)) - val e3 = Multiply(maxByteLiteral, Literal(2.toByte)) - val e4 = Add(minByteLiteral, minByteLiteral) - val e5 = Subtract(minByteLiteral, maxByteLiteral) - val e6 = Multiply(minByteLiteral, minByteLiteral) withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val e1 = Add(maxByteLiteral, Literal(1.toByte)) + val e2 = Subtract(maxByteLiteral, Literal((-1).toByte)) + val e3 = Multiply(maxByteLiteral, Literal(2.toByte)) + val e4 = Add(minByteLiteral, minByteLiteral) + val e5 = Subtract(minByteLiteral, maxByteLiteral) + val e6 = Multiply(minByteLiteral, minByteLiteral) Seq(e1, e2, e3, e4, e5, e6).foreach { e => checkExceptionInExpression[ArithmeticException](e, "overflow") } } withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val e1 = Add(maxByteLiteral, Literal(1.toByte)) + val e2 = Subtract(maxByteLiteral, Literal((-1).toByte)) + val e3 = Multiply(maxByteLiteral, Literal(2.toByte)) + val e4 = Add(minByteLiteral, minByteLiteral) + val e5 = Subtract(minByteLiteral, maxByteLiteral) + val e6 = Multiply(minByteLiteral, minByteLiteral) checkEvaluation(e1, Byte.MinValue) checkEvaluation(e2, Byte.MinValue) checkEvaluation(e3, (-2).toByte) @@ -500,4 +557,23 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(e6, 0.toByte) } } + + test("SPARK-33008: division by zero on divide-like operations returns incorrect result") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val operators: Seq[((Expression, Expression) => Expression, ((Int => Any) => Unit) => Unit)] = + Seq((Divide(_, _), testDecimalAndDoubleType), + (IntegralDivide(_, _), testDecimalAndLongType), + (Remainder(_, _), testNumericDataTypes), + (Pmod(_, _), testNumericDataTypes)) + operators.foreach { case (operator, testTypesFn) => + testTypesFn { convert => + val one = Literal(convert(1)) + val zero = Literal(convert(0)) + checkEvaluation(operator(Literal.create(null, one.dataType), zero), null) + checkEvaluation(operator(one, Literal.create(null, zero.dataType)), null) + checkExceptionInExpression[ArithmeticException](operator(one, zero), "divide by zero") + } + } + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 60ab98eeb410a..842c8f3243f2a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -349,6 +349,21 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB } } + /** + * Test evaluation results between Interpreted mode and Codegen mode, making sure we have + * consistent result regardless of the evaluation method we use. If an exception is thrown, + * it checks that both modes throw the same exception. + * + * This method test against unary expressions by feeding them arbitrary literals of `dataType`. + */ + def checkConsistencyBetweenInterpretedAndCodegenAllowingException( + c: Expression => Expression, + dataType: DataType): Unit = { + forAll (LiteralGenerator.randomGen(dataType)) { (l: Literal) => + cmpInterpretWithCodegen(EmptyRow, c(l), true) + } + } + /** * Test evaluation results between Interpreted mode and Codegen mode, making sure we have * consistent result regardless of the evaluation method we use. diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql index 6d9c44c67a96b..b39ccb85fb366 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql @@ -65,11 +65,11 @@ SELECT '7' AS `None`, CASE WHEN rand() < 0 THEN 1 END AS `NULL on no matches`; +-- [SPARK-33008] Spark SQL throws an exception -- Constant-expression folding shouldn't evaluate unreachable subexpressions SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END; SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END; --- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL -- However we do not currently suppress folding of potentially -- reachable subexpressions SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql index 2edde8df08047..0efe0877e9b3e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql @@ -49,6 +49,7 @@ SELECT 1 AS one FROM test_having HAVING a > 1; SELECT 1 AS one FROM test_having HAVING 1 > 2; SELECT 1 AS one FROM test_having HAVING 1 < 2; +-- [SPARK-33008] Spark SQL throws an exception -- and just to prove that we aren't scanning the table: SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql index 8fa3c0a6dfec9..5322c1b502439 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql @@ -67,11 +67,11 @@ SELECT '7' AS `None`, CASE WHEN rand() < udf(0) THEN 1 END AS `NULL on no matches`; +-- [SPARK-33008] Spark SQL throws an exception -- Constant-expression folding shouldn't evaluate unreachable subexpressions SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END; SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END; --- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL -- However we do not currently suppress folding of potentially -- reachable subexpressions SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql index 412d45b49a184..76c0b198aa439 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql @@ -51,6 +51,7 @@ SELECT 1 AS one FROM test_having HAVING udf(a) > 1; SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2)); SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2)); +-- [SPARK-33008] Spark SQL throws an exception -- and just to prove that we aren't scanning the table: SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2; diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out index 1b002c3f48ae2..0006768dbcb0f 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out @@ -176,28 +176,28 @@ struct -- !query SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END -- !query schema -struct +struct<> -- !query output -1.0 +java.lang.ArithmeticException +divide by zero -- !query SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END -- !query schema -struct +struct<> -- !query output -1.0 +java.lang.ArithmeticException +divide by zero -- !query SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl -- !query schema -struct 100) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) ELSE CAST(0 AS DOUBLE) END:double> +struct<> -- !query output -0.0 -0.0 -0.0 -0.0 +java.lang.ArithmeticException +divide by zero -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index 18b0c821ae70f..6f98e2f9eeee7 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -569,25 +569,28 @@ struct -- !query select bigint('9223372036854775800') / bigint('0') -- !query schema -struct<(CAST(CAST(9223372036854775800 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double> +struct<> -- !query output -NULL +java.lang.ArithmeticException +divide by zero -- !query select bigint('-9223372036854775808') / smallint('0') -- !query schema -struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS SMALLINT) AS DOUBLE)):double> +struct<> -- !query output -NULL +java.lang.ArithmeticException +divide by zero -- !query select smallint('100') / bigint('0') -- !query schema -struct<(CAST(CAST(100 AS SMALLINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double> +struct<> -- !query output -NULL +java.lang.ArithmeticException +divide by zero -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out index d97853d5fc6d0..fc2961a072e9f 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out @@ -4673,7 +4673,7 @@ struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div -- !query select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000) -- !query schema -struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)> +struct -- !query output 999999999999999999999 @@ -4689,7 +4689,7 @@ struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) di -- !query select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000) -- !query schema -struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)> +struct -- !query output -999999999999999999999 @@ -4697,7 +4697,7 @@ struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % -- !query select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000) -- !query schema -struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(20,0)) AS DECIMAL(22,0)) * CAST(1000000000000000000000 AS DECIMAL(22,0))) AS DECIMAL(38,0)) + CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)> +struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(20,0)) AS DECIMAL(22,0)) * CAST(1000000000000000000000 AS DECIMAL(22,0))) AS DECIMAL(38,0)) + CAST(mod(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)), CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)> -- !query output -9999999999999999999999 @@ -4705,7 +4705,7 @@ struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS -- !query select mod (70.0,70) -- !query schema -struct<(CAST(70.0 AS DECIMAL(3,1)) % CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(3,1)> +struct -- !query output 0.0 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index d8d33d92a7cc4..e4b7f3b1f5e88 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -174,9 +174,10 @@ struct -- !query SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 -- !query schema -struct +struct<> -- !query output -1 +java.lang.ArithmeticException +divide by zero -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out index 6c733e916d734..2f31d2684ca22 100755 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out @@ -176,28 +176,28 @@ struct -- !query SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END -- !query schema -struct +struct<> -- !query output -1.0 +java.lang.ArithmeticException +divide by zero -- !query SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END -- !query schema -struct +struct<> -- !query output -1.0 +java.lang.ArithmeticException +divide by zero -- !query SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl -- !query schema -struct 100) THEN CAST(udf(ansi_cast((ansi_cast(1 as double) / ansi_cast(0 as double)) as string)) AS DOUBLE) ELSE CAST(CAST(udf(ansi_cast(0 as string)) AS INT) AS DOUBLE) END:double> +struct<> -- !query output -0.0 -0.0 -0.0 -0.0 +java.lang.ArithmeticException +divide by zero -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index 50b6e60086747..89fc36a0da827 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -174,9 +174,10 @@ struct -- !query SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 -- !query schema -struct +struct<> -- !query output -1 +java.lang.ArithmeticException +divide by zero -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index ebe4e8dea97e3..cc88f9ad3da40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -643,8 +643,11 @@ class ColumnarProjectExec(projectList: Seq[NamedExpression], child: SparkPlan) * A version of add that supports columnar processing for longs. This version is broken * on purpose so it adds the numbers plus 1 so that the tests can show that it was replaced. */ -class BrokenColumnarAdd(left: ColumnarExpression, right: ColumnarExpression) - extends Add(left, right) with ColumnarExpression { +class BrokenColumnarAdd( + left: ColumnarExpression, + right: ColumnarExpression, + failOnError: Boolean = false) + extends Add(left, right, failOnError) with ColumnarExpression { override def supportsColumnar(): Boolean = left.supportsColumnar && right.supportsColumnar From 838791bf0b8290143001fe8f94b1fbbd53a181d2 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 29 Oct 2020 19:10:01 -0700 Subject: [PATCH 0357/1009] [SPARK-33292][SQL] Make Literal ArrayBasedMapData string representation disambiguous ### What changes were proposed in this pull request? This PR aims to wrap `ArrayBasedMapData` literal representation with `map(...)`. ### Why are the changes needed? Literal ArrayBasedMapData has inconsistent string representation from `LogicalPlan` to `Optimized Logical Plan/Physical Plan`. Also, the representation at `Optimized Logical Plan` and `Physical Plan` is ambiguous like `[1 AS a#0, keys: [key1], values: [value1] AS b#1]`. **BEFORE** ```scala scala> spark.version res0: String = 2.4.7 scala> sql("SELECT 1 a, map('key1', 'value1') b").explain(true) == Parsed Logical Plan == 'Project [1 AS a#0, 'map(key1, value1) AS b#1] +- OneRowRelation == Analyzed Logical Plan == a: int, b: map Project [1 AS a#0, map(key1, value1) AS b#1] +- OneRowRelation == Optimized Logical Plan == Project [1 AS a#0, keys: [key1], values: [value1] AS b#1] +- OneRowRelation == Physical Plan == *(1) Project [1 AS a#0, keys: [key1], values: [value1] AS b#1] +- Scan OneRowRelation[] ``` **AFTER** ```scala scala> spark.version res0: String = 3.1.0-SNAPSHOT scala> sql("SELECT 1 a, map('key1', 'value1') b").explain(true) == Parsed Logical Plan == 'Project [1 AS a#4, 'map(key1, value1) AS b#5] +- OneRowRelation == Analyzed Logical Plan == a: int, b: map Project [1 AS a#4, map(key1, value1) AS b#5] +- OneRowRelation == Optimized Logical Plan == Project [1 AS a#4, map(keys: [key1], values: [value1]) AS b#5] +- OneRowRelation == Physical Plan == *(1) Project [1 AS a#4, map(keys: [key1], values: [value1]) AS b#5] +- *(1) Scan OneRowRelation[] ``` ### Does this PR introduce _any_ user-facing change? Yes. This changes the query plan's string representation in `explain` command and UI. However, this is a bug fix. ### How was this patch tested? Pass the CI with the newly added test case. Closes #30190 from dongjoon-hyun/SPARK-33292. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/expressions/literals.scala | 1 + .../spark/sql/catalyst/expressions/LiteralExpressionSuite.scala | 1 + 2 files changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 213a58a3244e2..9e96ab8a9b6ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -297,6 +297,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { override def toString: String = value match { case null => "null" case binary: Array[Byte] => s"0x" + DatatypeConverter.printHexBinary(binary) + case d: ArrayBasedMapData => s"map(${d.toString})" case other => other.toString } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index 4714635a3370b..bb86135021b91 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -209,6 +209,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { } checkMapLiteral(Map("a" -> 1, "b" -> 2, "c" -> 3)) checkMapLiteral(Map("1" -> 1.0, "2" -> 2.0, "3" -> 3.0)) + assert(Literal.create(Map("a" -> 1)).toString === "map(keys: [a], values: [1])") } test("struct") { From 343e0bb3adae465547e1423ea79f07d0e79adee7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 30 Oct 2020 11:18:47 +0900 Subject: [PATCH 0358/1009] [SPARK-33286][SQL] Improve the error message about schema parsing by `from_json/from_csv` # What changes were proposed in this pull request? In the PR, I propose to improve the error message from `from_json`/`from_csv` by combining errors from all schema parsers: - DataType.fromJson (except CSV) - CatalystSqlParser.parseDataType - CatalystSqlParser.parseTableSchema Before the changes, `from_json` does not show error messages from the first parser in the chain that could mislead users. ### Why are the changes needed? Currently, `from_json` outputs the error message from the fallback schema parser which can confuse end-users. For example: ```scala val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}""" df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).show() ``` The JSON schema has an issue in `{"a":123}` but the error message doesn't point it out: ``` mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0) == SQL == {"fields": [{"a":123}], "type": "struct"} ^^^ org.apache.spark.sql.catalyst.parser.ParseException: mismatched input '{' expecting {'ADD', 'AFTER', ... }(line 1, pos 0) == SQL == {"fields": [{"a":123}], "type": "struct"} ^^^ ``` ### Does this PR introduce _any_ user-facing change? Yes, after the changes for the example above: ``` Cannot parse the schema in JSON format: Failed to convert the JSON string '{"a":123}' to a field. Failed fallback parsing: Cannot parse the data type: mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0) == SQL == {"fields": [{"a":123}], "type": "struct"} ^^^ Failed fallback parsing: mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0) == SQL == {"fields": [{"a":123}], "type": "struct"} ^^^ ``` ### How was this patch tested? - By existing tests suites like `JsonFunctionsSuite` and `JsonExpressionsSuite`. - Add new test to `JsonFunctionsSuite`. - Re-gen results for `json-functions.sql`. Closes #30183 from MaxGekk/fromDDL-error-msg. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/types/DataType.scala | 38 +++++++++++++++++-- .../org/apache/spark/sql/functions.scala | 12 +++--- .../sql-tests/results/csv-functions.sql.out | 9 ++++- .../sql-tests/results/json-functions.sql.out | 9 ++++- .../apache/spark/sql/JsonFunctionsSuite.scala | 21 ++++++++++ 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 3f70b7647f195..043c88f88843c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -28,9 +28,10 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.annotation.Stable +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.expressions.{Cast, Expression} -import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer} import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat import org.apache.spark.sql.internal.SQLConf @@ -125,10 +126,41 @@ object DataType { private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r def fromDDL(ddl: String): DataType = { + parseTypeWithFallback( + ddl, + CatalystSqlParser.parseDataType, + "Cannot parse the data type: ", + fallbackParser = CatalystSqlParser.parseTableSchema) + } + + /** + * Parses data type from a string with schema. It calls `parser` for `schema`. + * If it fails, calls `fallbackParser`. If the fallback function fails too, combines error message + * from `parser` and `fallbackParser`. + * + * @param schema The schema string to parse by `parser` or `fallbackParser`. + * @param parser The function that should be invoke firstly. + * @param errorMsg The error message for `parser`. + * @param fallbackParser The function that is called when `parser` fails. + * @return The data type parsed from the `schema` schema. + */ + def parseTypeWithFallback( + schema: String, + parser: String => DataType, + errorMsg: String, + fallbackParser: String => DataType): DataType = { try { - CatalystSqlParser.parseDataType(ddl) + parser(schema) } catch { - case NonFatal(_) => CatalystSqlParser.parseTableSchema(ddl) + case NonFatal(e1) => + try { + fallbackParser(schema) + } catch { + case NonFatal(e2) => + throw new AnalysisException( + message = s"$errorMsg${e1.getMessage}\nFailed fallback parsing: ${e2.getMessage}", + cause = Some(e1.getCause)) + } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 21e22d90f0f80..ffa97c20c397c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -21,7 +21,6 @@ import scala.collection.JavaConverters._ import scala.language.implicitConversions import scala.reflect.runtime.universe.{typeTag, TypeTag} import scala.util.Try -import scala.util.control.NonFatal import org.apache.spark.annotation.Stable import org.apache.spark.sql.api.java._ @@ -36,6 +35,7 @@ import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.DataType.parseTypeWithFallback import org.apache.spark.util.Utils /** @@ -4101,11 +4101,11 @@ object functions { * @since 2.3.0 */ def from_json(e: Column, schema: String, options: Map[String, String]): Column = { - val dataType = try { - DataType.fromJson(schema) - } catch { - case NonFatal(_) => DataType.fromDDL(schema) - } + val dataType = parseTypeWithFallback( + schema, + DataType.fromJson, + "Cannot parse the schema in JSON format: ", + fallbackParser = DataType.fromDDL) from_json(e, dataType, options) } diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 7ba3f712363fe..ed2341f71a1b0 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -33,13 +33,20 @@ select from_csv('1', 'a InvalidType') struct<> -- !query output org.apache.spark.sql.AnalysisException +Cannot parse the data type: +extraneous input 'InvalidType' expecting (line 1, pos 2) +== SQL == +a InvalidType +--^^^ + +Failed fallback parsing: DataType invalidtype is not supported.(line 1, pos 2) == SQL == a InvalidType --^^^ -; line 1 pos 7 +;; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 3cc45890cf089..838e4607d0324 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -124,13 +124,20 @@ select from_json('{"a":1}', 'a InvalidType') struct<> -- !query output org.apache.spark.sql.AnalysisException +Cannot parse the data type: +extraneous input 'InvalidType' expecting (line 1, pos 2) +== SQL == +a InvalidType +--^^^ + +Failed fallback parsing: DataType invalidtype is not supported.(line 1, pos 2) == SQL == a InvalidType --^^^ -; line 1 pos 7 +;; line 1 pos 7 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index e2a9cf536d154..2e515ee92bceb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -754,4 +754,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { val expected = new StructType().add("parsed", new StructType().add("a b", LongType)) assert(out.schema == expected) } + + test("SPARK-33286: from_json - combined error messages") { + val df = Seq("""{"a":1}""").toDF("json") + val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}""" + val errMsg1 = intercept[AnalysisException] { + df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect() + }.getMessage + assert(errMsg1.contains("""Failed to convert the JSON string '{"a":123}' to a field""")) + + val invalidDataType = "MAP" + val errMsg2 = intercept[AnalysisException] { + df.select(from_json($"json", invalidDataType, Map.empty[String, String])).collect() + }.getMessage + assert(errMsg2.contains("DataType cow is not supported")) + + val invalidTableSchema = "x INT, a cow" + val errMsg3 = intercept[AnalysisException] { + df.select(from_json($"json", invalidTableSchema, Map.empty[String, String])).collect() + }.getMessage + assert(errMsg3.contains("DataType cow is not supported")) + } } From 0c943cd2fbc6f2d25588991613abf469ace0153e Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 30 Oct 2020 14:11:25 +0900 Subject: [PATCH 0359/1009] [SPARK-33248][SQL] Add a configuration to control the legacy behavior of whether need to pad null value when value size less then schema size ### What changes were proposed in this pull request? Add a configuration to control the legacy behavior of whether need to pad null value when value size less then schema size. Since we can't decide whether it's a but and some use need it behavior same as Hive. ### Why are the changes needed? Provides a compatible choice between historical behavior and Hive ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existed UT Closes #30156 from AngersZhuuuu/SPARK-33284. Lead-authored-by: angerszhu Co-authored-by: AngersZhuuuu Signed-off-by: HyukjinKwon --- docs/sql-migration-guide.md | 2 ++ .../org/apache/spark/sql/internal/SQLConf.scala | 15 +++++++++++++++ .../execution/BaseScriptTransformationExec.scala | 10 ++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index fdc764a93424b..319e72172d597 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -51,6 +51,8 @@ license: | - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. + + - In Spark 3.1, when `spark.sql.legacy.transformationPadNullWhenValueLessThenSchema` is true, Spark will pad NULL value when script transformation's output value size less then schema size in default-serde mode(script transformation with row format of `ROW FORMAT DELIMITED`). If false, Spark will keep original behavior to throw `ArrayIndexOutOfBoundsException`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 21357a492e39e..8825f4f96378d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2765,6 +2765,18 @@ object SQLConf { .checkValue(_ > 0, "The timeout value must be positive") .createWithDefault(10L) + val LEGACY_SCRIPT_TRANSFORM_PAD_NULL = + buildConf("spark.sql.legacy.transformationPadNullWhenValueLessThenSchema") + .internal() + .doc("Whether pad null value when transformation output's value size less then " + + "schema size in default-serde mode(script transformation with row format of " + + "`ROW FORMAT DELIMITED`)." + + "When true, Spark will pad NULL value to keep same behavior with hive." + + "When false, Spark keep original behavior to throw `ArrayIndexOutOfBoundsException`") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP = buildConf("spark.sql.legacy.allowCastNumericToTimestamp") .internal() @@ -3493,6 +3505,9 @@ class SQLConf extends Serializable with Logging { def legacyAllowModifyActiveSession: Boolean = getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION) + def legacyPadNullWhenValueLessThenSchema: Boolean = + getConf(SQLConf.LEGACY_SCRIPT_TRANSFORM_PAD_NULL) + def legacyAllowCastNumericToTimestamp: Boolean = getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index 74e5aa716ad67..f2cddc7ba7290 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -104,10 +104,16 @@ trait BaseScriptTransformationExec extends UnaryExecNode { val reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)) val outputRowFormat = ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD") + + val padNull = if (conf.legacyPadNullWhenValueLessThenSchema) { + (arr: Array[String], size: Int) => arr.padTo(size, null) + } else { + (arr: Array[String], size: Int) => arr + } val processRowWithoutSerde = if (!ioschema.schemaLess) { prevLine: String => new GenericInternalRow( - prevLine.split(outputRowFormat).padTo(outputFieldWriters.size, null) + padNull(prevLine.split(outputRowFormat), outputFieldWriters.size) .zip(outputFieldWriters) .map { case (data, writer) => writer(data) }) } else { @@ -118,7 +124,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode { val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => new GenericInternalRow( - prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null) + padNull(prevLine.split(outputRowFormat).slice(0, 2), 2) .map(kvWriter)) } From d59f6a709586ff0d1bfbfda50c4e4cf17d5a50ff Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 30 Oct 2020 08:18:10 +0000 Subject: [PATCH 0360/1009] [SPARK-33294][SQL] Add query resolved check before analyze InsertIntoDir ### What changes were proposed in this pull request? Add `query.resolved` before analyze `InsertIntoDir`. ### Why are the changes needed? For better error msg. ``` INSERT OVERWRITE DIRECTORY '/tmp/file' USING PARQUET SELECT * FROM ( SELECT c3 FROM ( SELECT c1, c2 from values(1,2) t(c1, c2) ) ) ``` Before this PR, we get such error msg ``` org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to toAttribute on unresolved object, tree: * at org.apache.spark.sql.catalyst.analysis.Star.toAttribute(unresolved.scala:244) at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52) at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.List.foreach(List.scala:392) ``` ### Does this PR introduce _any_ user-facing change? Yes, error msg changed. ### How was this patch tested? New test. Closes #30197 from ulysses-you/SPARK-33294. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../datasources/DataSourceStrategy.scala | 4 ++-- .../apache/spark/sql/sources/InsertSuite.scala | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 02dd4e549f93b..b1600a639a9bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -163,8 +163,8 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport { InsertIntoDataSourceCommand(l, query, overwrite) case InsertIntoDir(_, storage, provider, query, overwrite) - if provider.isDefined && provider.get.toLowerCase(Locale.ROOT) != DDLUtils.HIVE_PROVIDER => - + if query.resolved && provider.isDefined && + provider.get.toLowerCase(Locale.ROOT) != DDLUtils.HIVE_PROVIDER => val outputPath = new Path(storage.locationUri.get) if (overwrite) DDLUtils.verifyNotReadPath(query, outputPath) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 9b5466e8a68f1..4686a0c69de63 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -896,6 +896,23 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'' AS part2") } } + + test("SPARK-33294: Add query resolved check before analyze InsertIntoDir") { + withTempPath { path => + val msg = intercept[AnalysisException] { + sql( + s""" + |INSERT OVERWRITE DIRECTORY '${path.getAbsolutePath}' USING PARQUET + |SELECT * FROM ( + | SELECT c3 FROM ( + | SELECT c1, c2 from values(1,2) t(c1, c2) + | ) + |) + """.stripMargin) + }.getMessage + assert(msg.contains("cannot resolve '`c3`' given input columns")) + } + } } class FileExistingTestFileSystem extends RawLocalFileSystem { From 3af1651e50be3bc2e441be8827441f87d34d99cc Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 30 Oct 2020 17:53:30 +0900 Subject: [PATCH 0361/1009] [SPARK-33297][BUILD] Switch to use flat class loader strategy in SBT ### What changes were proposed in this pull request? This PR proposes to switch the class loader strategy from `ScalaLibrary` to `Flat` (see https://www.scala-sbt.org/1.x/docs/In-Process-Classloaders.html): https://github.com/apache/spark/runs/1314691686 ``` Error: java.util.MissingResourceException: Can't find bundle for base name org.scalactic.ScalacticBundle, locale en Error: at java.util.ResourceBundle.throwMissingResourceException(ResourceBundle.java:1581) Error: at java.util.ResourceBundle.getBundleImpl(ResourceBundle.java:1396) Error: at java.util.ResourceBundle.getBundle(ResourceBundle.java:782) Error: at org.scalactic.Resources$.resourceBundle$lzycompute(Resources.scala:8) Error: at org.scalactic.Resources$.resourceBundle(Resources.scala:8) Error: at org.scalactic.Resources$.pleaseDefineScalacticFillFilePathnameEnvVar(Resources.scala:256) Error: at org.scalactic.source.PositionMacro$PositionMacroImpl.apply(PositionMacro.scala:65) Error: at org.scalactic.source.PositionMacro$.genPosition(PositionMacro.scala:85) Error: at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source) Error: at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) Error: at java.lang.reflect.Method.invoke(Method.java:498) ``` See also https://github.com/sbt/sbt/issues/5736 ### Why are the changes needed? To make the build unflaky. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions build in this test. Closes #30198 from HyukjinKwon/SPARK-33297. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- project/SparkBuild.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f2ef480f8de5..55c87fcb3aaa2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -322,7 +322,11 @@ object SparkBuild extends PomBuild { // disable Mima check for all modules, // to be enabled in specific ones that have previous artifacts - MimaKeys.mimaFailOnNoPrevious := false + MimaKeys.mimaFailOnNoPrevious := false, + + // To prevent intermittent compliation failures, see also SPARK-33297 + // Apparently we can remove this when we use JDK 11. + Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat ) def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = { From 7c897c1216dd23e4a973bd82063a88ea9a6f7ca5 Mon Sep 17 00:00:00 2001 From: Dmitry Sabanin Date: Fri, 30 Oct 2020 11:14:42 -0700 Subject: [PATCH 0362/1009] [MINOR][CORE][DOCS] Fix typo in "spark.storage.decommission.shuffleBlocks.enabled" description ### What changes were proposed in this pull request? Small typo fix in the description of `spark.storage.decommission.shuffleBlocks.enabled` property. Closes #30208 from dsabanin/patch-1. Authored-by: Dmitry Sabanin Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/internal/config/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 9a7039a9cfe93..491395c3cbcde 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -423,7 +423,7 @@ package object config { private[spark] val STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED = ConfigBuilder("spark.storage.decommission.shuffleBlocks.enabled") .doc("Whether to transfer shuffle blocks during block manager decommissioning. Requires " + - "a migratable shuffle resolver (like sort based shuffe)") + "a migratable shuffle resolver (like sort based shuffle)") .version("3.1.0") .booleanConf .createWithDefault(false) From 491a0fb08b0c57a99894a0b33c5814854db8de3d Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 30 Oct 2020 11:26:30 -0700 Subject: [PATCH 0363/1009] [SPARK-33262][K8S][FOLLOWUP] Verify pod allocation does not stall ### What changes were proposed in this pull request? Add a test that pending executor does not stall pod allocation. ### Why are the changes needed? Better test coverage ### Does this PR introduce _any_ user-facing change? Test only change. ### How was this patch tested? New test passes. Closes #30205 from holdenk/verify-pod-allocation-does-not-stall. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../k8s/ExecutorPodsAllocatorSuite.scala | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala index 84c07bc588b06..37f9caef656d0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala @@ -255,6 +255,40 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter { verify(podOperations).delete() } + test("SPARK-33262: pod allocator does not stall with pending pods") { + when(podOperations + .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)) + .thenReturn(podOperations) + when(podOperations + .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)) + .thenReturn(podOperations) + when(podOperations + .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1")) + .thenReturn(labeledPods) + when(podOperations + .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "3", "4", "5", "6")) + .thenReturn(podOperations) + + podsAllocatorUnderTest.setTotalExpectedExecutors(6) + // Initial request of pods + verify(podOperations).create(podWithAttachedContainerForId(1)) + verify(podOperations).create(podWithAttachedContainerForId(2)) + verify(podOperations).create(podWithAttachedContainerForId(3)) + verify(podOperations).create(podWithAttachedContainerForId(4)) + verify(podOperations).create(podWithAttachedContainerForId(5)) + // 4 come up, 1 pending + snapshotsStore.updatePod(pendingExecutor(1)) + snapshotsStore.updatePod(runningExecutor(2)) + snapshotsStore.updatePod(runningExecutor(3)) + snapshotsStore.updatePod(runningExecutor(4)) + snapshotsStore.updatePod(runningExecutor(5)) + // We move forward one allocation cycle + waitForExecutorPodsClock.setTime(podAllocationDelay + 1) + snapshotsStore.notifySubscribers() + // We request pod 6 + verify(podOperations).create(podWithAttachedContainerForId(6)) + } + private def executorPodAnswer(): Answer[KubernetesExecutorSpec] = (invocation: InvocationOnMock) => { val k8sConf: KubernetesExecutorConf = invocation.getArgument(0) From 72ad9dcd5d484a8dd64c08889de85ef9de2a6077 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Fri, 30 Oct 2020 17:16:53 -0500 Subject: [PATCH 0364/1009] [SPARK-32037][CORE] Rename blacklisting feature ### What changes were proposed in this pull request? this PR renames the blacklisting feature. I ended up using "excludeOnFailure" or "excluded" in most cases but there is a mix. I renamed the BlacklistTracker to HealthTracker, but for the TaskSetBlacklist HealthTracker didn't make sense to me since its not the health of the taskset itself but rather tracking the things its excluded on so I renamed it to be TaskSetExcludeList. Everything else I tried to use the context and in most cases excluded made sense. It made more sense to me then blocked since you are basically excluding those executors and nodes from scheduling tasks on them. Then can be unexcluded later after timeouts and such. The configs I changed the name to use excludeOnFailure which I thought explained it. I unfortunately couldn't get rid of some of them because its part of the event listener and history files. To keep backwards compatibility I kept the events and some of the parsing so that the history server would still properly read older history files. It is not forward compatible though - meaning a new application write the "Excluded" events so the older history server won't properly read display them as being blacklisted. A few of the files below are showing up as deleted and recreated even though I did a git mv on them. I'm not sure why. ### Why are the changes needed? get rid of problematic language ### Does this PR introduce _any_ user-facing change? Config name changes but the old configs still work but are deprecated. ### How was this patch tested? updated tests and also manually tested the UI changes and manually tested the history server reading older versions of history files and vice versa. Closes #29906 from tgravescs/SPARK-32037. Lead-authored-by: Thomas Graves Co-authored-by: Thomas Graves Signed-off-by: Thomas Graves --- .../apache/spark/SparkFirehoseListener.java | 38 ++ .../ui/static/executorspage-template.html | 4 +- .../apache/spark/ui/static/executorspage.js | 28 +- .../org/apache/spark/ui/static/stagepage.js | 2 +- .../spark/ui/static/stagespage-template.html | 4 +- .../spark/ExecutorAllocationManager.scala | 12 +- .../scala/org/apache/spark/SparkConf.scala | 28 +- .../org/apache/spark/TaskEndReason.scala | 9 +- .../history/BasicEventFilterBuilder.scala | 2 + .../history/HistoryAppStatusStore.scala | 2 +- .../spark/internal/config/package.scala | 75 ++- .../spark/scheduler/BlacklistTracker.scala | 477 -------------- .../apache/spark/scheduler/DAGScheduler.scala | 4 +- .../scheduler/EventLoggingListener.scala | 26 + .../scheduler/ExecutorFailuresInTaskSet.scala | 2 +- .../spark/scheduler/HealthTracker.scala | 491 ++++++++++++++ .../spark/scheduler/SparkListener.scala | 117 +++- .../spark/scheduler/SparkListenerBus.scala | 12 + .../spark/scheduler/TaskSchedulerImpl.scala | 59 +- ...acklist.scala => TaskSetExcludeList.scala} | 78 ++- .../spark/scheduler/TaskSetManager.scala | 77 +-- .../cluster/CoarseGrainedClusterMessage.scala | 2 +- .../CoarseGrainedSchedulerBackend.scala | 25 +- .../spark/status/AppStatusListener.scala | 131 +++- .../apache/spark/status/AppStatusSource.scala | 16 + .../org/apache/spark/status/LiveEntity.scala | 21 +- .../org/apache/spark/status/api/v1/api.scala | 10 +- .../scala/org/apache/spark/ui/ToolTips.scala | 3 - ...cludeOnFailure_for_stage_expectation.json} | 6 +- ...OnFailure_node_for_stage_expectation.json} | 15 +- .../executor_list_json_expectation.json | 4 +- ...ith_executor_metrics_json_expectation.json | 16 +- .../executor_memory_usage_expectation.json | 20 +- ...or_node_excludeOnFailure_expectation.json} | 20 +- ...udeOnFailure_unexcluding_expectation.json} | 20 +- ...utor_resource_information_expectation.json | 12 +- .../one_stage_attempt_json_expectation.json | 3 +- .../one_stage_json_expectation.json | 3 +- ...age_with_accumulable_json_expectation.json | 3 +- .../stage_with_peak_metrics_expectation.json | 6 +- .../ExecutorAllocationManagerSuite.scala | 6 +- .../apache/spark/HeartbeatReceiverSuite.scala | 2 +- .../StandaloneDynamicAllocationSuite.scala | 8 +- .../history/BasicEventFilterSuite.scala | 10 +- .../history/EventLogFileCompactorSuite.scala | 12 +- .../deploy/history/HistoryServerSuite.scala | 9 +- .../scheduler/BlacklistTrackerSuite.scala | 608 ----------------- .../CoarseGrainedSchedulerBackendSuite.scala | 2 +- ...la => HealthTrackerIntegrationSuite.scala} | 24 +- .../spark/scheduler/HealthTrackerSuite.scala | 615 ++++++++++++++++++ .../scheduler/TaskSchedulerImplSuite.scala | 301 ++++----- .../scheduler/TaskSetBlacklistSuite.scala | 287 -------- .../scheduler/TaskSetExcludelistSuite.scala | 310 +++++++++ .../spark/scheduler/TaskSetManagerSuite.scala | 82 +-- .../KryoSerializerDistributedSuite.scala | 2 +- .../spark/status/AppStatusListenerSuite.scala | 43 +- .../status/api/v1/ExecutorSummarySuite.scala | 6 +- .../apache/spark/util/JsonProtocolSuite.scala | 68 +- docs/configuration.md | 64 +- docs/monitoring.md | 6 +- docs/running-on-yarn.md | 8 +- .../KubernetesClusterSchedulerBackend.scala | 2 +- .../MesosCoarseGrainedSchedulerBackend.scala | 6 +- ...osCoarseGrainedSchedulerBackendSuite.scala | 2 +- .../spark/deploy/yarn/ApplicationMaster.scala | 6 +- .../spark/deploy/yarn/YarnAllocator.scala | 20 +- ...a => YarnAllocatorNodeHealthTracker.scala} | 85 +-- .../org/apache/spark/deploy/yarn/config.scala | 11 +- .../cluster/YarnSchedulerBackend.scala | 8 +- ... => YarnAllocatorHealthTrackerSuite.scala} | 90 +-- .../deploy/yarn/YarnAllocatorSuite.scala | 29 +- .../cluster/YarnSchedulerBackendSuite.scala | 17 +- 72 files changed, 2557 insertions(+), 2075 deletions(-) delete mode 100644 core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala create mode 100644 core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala rename core/src/main/scala/org/apache/spark/scheduler/{TaskSetBlacklist.scala => TaskSetExcludeList.scala} (63%) rename core/src/test/resources/HistoryServerExpectations/{blacklisting_for_stage_expectation.json => excludeOnFailure_for_stage_expectation.json} (99%) rename core/src/test/resources/HistoryServerExpectations/{blacklisting_node_for_stage_expectation.json => excludeOnFailure_node_for_stage_expectation.json} (98%) rename core/src/test/resources/HistoryServerExpectations/{executor_node_blacklisting_expectation.json => executor_node_excludeOnFailure_expectation.json} (92%) rename core/src/test/resources/HistoryServerExpectations/{executor_node_blacklisting_unblacklisting_expectation.json => executor_node_excludeOnFailure_unexcluding_expectation.json} (90%) delete mode 100644 core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala rename core/src/test/scala/org/apache/spark/scheduler/{BlacklistIntegrationSuite.scala => HealthTrackerIntegrationSuite.scala} (86%) create mode 100644 core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala delete mode 100644 core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala create mode 100644 core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala rename resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/{YarnAllocatorBlacklistTracker.scala => YarnAllocatorNodeHealthTracker.scala} (63%) rename resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/{YarnAllocatorBlacklistTrackerSuite.scala => YarnAllocatorHealthTrackerSuite.scala} (54%) diff --git a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java index c0e72b57d48bd..7cb2455affe48 100644 --- a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java +++ b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java @@ -17,6 +17,7 @@ package org.apache.spark; +import org.apache.spark.annotation.DeveloperApi; import org.apache.spark.scheduler.*; /** @@ -27,7 +28,11 @@ * new methods to SparkListener: forgetting to add a method will result in a compilation error (if * this was a concrete Scala class, default implementations of new event handlers would be inherited * from the SparkListener trait). + * + * Please note until Spark 3.1.0 this was missing the DevelopApi annotation, this needs to be + * taken into account if changing this API before a major release. */ +@DeveloperApi public class SparkFirehoseListener implements SparkListenerInterface { public void onEvent(SparkListenerEvent event) { } @@ -124,34 +129,67 @@ public final void onExecutorBlacklisted(SparkListenerExecutorBlacklisted executo onEvent(executorBlacklisted); } + @Override + public final void onExecutorExcluded(SparkListenerExecutorExcluded executorExcluded) { + onEvent(executorExcluded); + } + @Override public void onExecutorBlacklistedForStage( SparkListenerExecutorBlacklistedForStage executorBlacklistedForStage) { onEvent(executorBlacklistedForStage); } + @Override + public void onExecutorExcludedForStage( + SparkListenerExecutorExcludedForStage executorExcludedForStage) { + onEvent(executorExcludedForStage); + } + @Override public void onNodeBlacklistedForStage( SparkListenerNodeBlacklistedForStage nodeBlacklistedForStage) { onEvent(nodeBlacklistedForStage); } + @Override + public void onNodeExcludedForStage( + SparkListenerNodeExcludedForStage nodeExcludedForStage) { + onEvent(nodeExcludedForStage); + } + @Override public final void onExecutorUnblacklisted( SparkListenerExecutorUnblacklisted executorUnblacklisted) { onEvent(executorUnblacklisted); } + @Override + public final void onExecutorUnexcluded( + SparkListenerExecutorUnexcluded executorUnexcluded) { + onEvent(executorUnexcluded); + } + @Override public final void onNodeBlacklisted(SparkListenerNodeBlacklisted nodeBlacklisted) { onEvent(nodeBlacklisted); } + @Override + public final void onNodeExcluded(SparkListenerNodeExcluded nodeExcluded) { + onEvent(nodeExcluded); + } + @Override public final void onNodeUnblacklisted(SparkListenerNodeUnblacklisted nodeUnblacklisted) { onEvent(nodeUnblacklisted); } + @Override + public final void onNodeUnexcluded(SparkListenerNodeUnexcluded nodeUnexcluded) { + onEvent(nodeUnexcluded); + } + @Override public void onBlockUpdated(SparkListenerBlockUpdated blockUpdated) { onEvent(blockUpdated); diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html index 0729dfe1cef72..5e835c053eb6c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html @@ -56,8 +56,8 @@

    Summary

    - Blacklisted + title="Number of executors excluded by the scheduler due to task failures."> + Excluded diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index 520edb9cc3e34..d4eaea9103771 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -26,15 +26,15 @@ function getThreadDumpEnabled() { } function formatStatus(status, type, row) { - if (row.isBlacklisted) { - return "Blacklisted"; + if (row.isExcluded) { + return "Excluded"; } if (status) { - if (row.blacklistedInStages.length == 0) { + if (row.excludedInStages.length == 0) { return "Active" } - return "Active (Blacklisted in Stages: [" + row.blacklistedInStages.join(", ") + "])"; + return "Active (Excluded in Stages: [" + row.excludedInStages.join(", ") + "])"; } return "Dead" } @@ -168,7 +168,7 @@ $(document).ready(function () { var allTotalInputBytes = 0; var allTotalShuffleRead = 0; var allTotalShuffleWrite = 0; - var allTotalBlacklisted = 0; + var allTotalExcluded = 0; var activeExecCnt = 0; var activeRDDBlocks = 0; @@ -190,7 +190,7 @@ $(document).ready(function () { var activeTotalInputBytes = 0; var activeTotalShuffleRead = 0; var activeTotalShuffleWrite = 0; - var activeTotalBlacklisted = 0; + var activeTotalExcluded = 0; var deadExecCnt = 0; var deadRDDBlocks = 0; @@ -212,7 +212,7 @@ $(document).ready(function () { var deadTotalInputBytes = 0; var deadTotalShuffleRead = 0; var deadTotalShuffleWrite = 0; - var deadTotalBlacklisted = 0; + var deadTotalExcluded = 0; response.forEach(function (exec) { var memoryMetrics = { @@ -246,7 +246,7 @@ $(document).ready(function () { allTotalInputBytes += exec.totalInputBytes; allTotalShuffleRead += exec.totalShuffleRead; allTotalShuffleWrite += exec.totalShuffleWrite; - allTotalBlacklisted += exec.isBlacklisted ? 1 : 0; + allTotalExcluded += exec.isExcluded ? 1 : 0; if (exec.isActive) { activeExecCnt += 1; activeRDDBlocks += exec.rddBlocks; @@ -268,7 +268,7 @@ $(document).ready(function () { activeTotalInputBytes += exec.totalInputBytes; activeTotalShuffleRead += exec.totalShuffleRead; activeTotalShuffleWrite += exec.totalShuffleWrite; - activeTotalBlacklisted += exec.isBlacklisted ? 1 : 0; + activeTotalExcluded += exec.isExcluded ? 1 : 0; } else { deadExecCnt += 1; deadRDDBlocks += exec.rddBlocks; @@ -290,7 +290,7 @@ $(document).ready(function () { deadTotalInputBytes += exec.totalInputBytes; deadTotalShuffleRead += exec.totalShuffleRead; deadTotalShuffleWrite += exec.totalShuffleWrite; - deadTotalBlacklisted += exec.isBlacklisted ? 1 : 0; + deadTotalExcluded += exec.isExcluded ? 1 : 0; // todo - TEST BACKWARDS compatibility history? } }); @@ -315,7 +315,7 @@ $(document).ready(function () { "allTotalInputBytes": allTotalInputBytes, "allTotalShuffleRead": allTotalShuffleRead, "allTotalShuffleWrite": allTotalShuffleWrite, - "allTotalBlacklisted": allTotalBlacklisted + "allTotalExcluded": allTotalExcluded }; var activeSummary = { "execCnt": ( "Active(" + activeExecCnt + ")"), @@ -338,7 +338,7 @@ $(document).ready(function () { "allTotalInputBytes": activeTotalInputBytes, "allTotalShuffleRead": activeTotalShuffleRead, "allTotalShuffleWrite": activeTotalShuffleWrite, - "allTotalBlacklisted": activeTotalBlacklisted + "allTotalExcluded": activeTotalExcluded }; var deadSummary = { "execCnt": ( "Dead(" + deadExecCnt + ")" ), @@ -361,7 +361,7 @@ $(document).ready(function () { "allTotalInputBytes": deadTotalInputBytes, "allTotalShuffleRead": deadTotalShuffleRead, "allTotalShuffleWrite": deadTotalShuffleWrite, - "allTotalBlacklisted": deadTotalBlacklisted + "allTotalExcluded": deadTotalExcluded }; var data = {executors: response, "execSummary": [activeSummary, deadSummary, totalSummary]}; @@ -547,7 +547,7 @@ $(document).ready(function () { {data: 'allTotalInputBytes', render: formatBytes}, {data: 'allTotalShuffleRead', render: formatBytes}, {data: 'allTotalShuffleWrite', render: formatBytes}, - {data: 'allTotalBlacklisted'} + {data: 'allTotalExcluded'} ], "paging": false, "searching": false, diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js index 93b37c296271b..ee1115868f69b 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js @@ -433,7 +433,7 @@ $(document).ready(function () { {data : "failedTasks"}, {data : "killedTasks"}, {data : "succeededTasks"}, - {data : "isBlacklistedForStage"}, + {data : "isExcludedForStage"}, { data : function (row, type) { return row.inputRecords != 0 ? formatBytes(row.inputBytes, type) + " / " + row.inputRecords : ""; diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html index 77ea70e4ad966..9b40d0dc4a230 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html @@ -50,8 +50,8 @@

    Aggregated Metrics by Executor

    Succeeded Tasks - Blacklisted + title="Shows if this executor has been excluded by the scheduler due to task failures."> + Excluded Input Size / Records Output Size / Records diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 1dd64df106bc2..e445f188e1eed 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -312,8 +312,8 @@ private[spark] class ExecutorAllocationManager( if (unschedulableTaskSets > 0) { // Request additional executors to account for task sets having tasks that are unschedulable - // due to blacklisting when the active executor count has already reached the max needed - // which we would normally get. + // due to executors excluded for failures when the active executor count has already reached + // the max needed which we would normally get. val maxNeededForUnschedulables = math.ceil(unschedulableTaskSets * executorAllocationRatio / tasksPerExecutor).toInt math.max(maxNeededWithSpeculationLocalityOffset, @@ -662,10 +662,10 @@ private[spark] class ExecutorAllocationManager( private val resourceProfileIdToStageAttempt = new mutable.HashMap[Int, mutable.Set[StageAttempt]] - // Keep track of unschedulable task sets due to blacklisting. This is a Set of StageAttempt's - // because we'll only take the last unschedulable task in a taskset although there can be more. - // This is done in order to avoid costly loops in the scheduling. - // Check TaskSetManager#getCompletelyBlacklistedTaskIfAny for more details. + // Keep track of unschedulable task sets because of executor/node exclusions from too many task + // failures. This is a Set of StageAttempt's because we'll only take the last unschedulable task + // in a taskset although there can be more. This is done in order to avoid costly loops in the + // scheduling. Check TaskSetManager#getCompletelyExcludedTaskIfAny for more details. private val unschedulableTaskSets = new mutable.HashSet[StageAttempt] // stageAttempt to tuple (the number of task with locality preferences, a map where each pair diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 427e98e616515..5f37a1abb1909 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -603,7 +603,7 @@ private[spark] object SparkConf extends Logging { "are no longer accepted. To specify the equivalent now, one may use '64k'."), DeprecatedConfig("spark.rpc", "2.0", "Not used anymore."), DeprecatedConfig("spark.scheduler.executorTaskBlacklistTime", "2.1.0", - "Please use the new blacklisting options, spark.blacklist.*"), + "Please use the new excludedOnFailure options, spark.excludeOnFailure.*"), DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used anymore"), DeprecatedConfig("spark.executor.port", "2.0.0", "Not used anymore"), DeprecatedConfig("spark.shuffle.service.index.cache.entries", "2.3.0", @@ -612,7 +612,31 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore."), DeprecatedConfig("spark.yarn.services", "3.0.0", "Feature no longer available."), DeprecatedConfig("spark.executor.plugins", "3.0.0", - "Feature replaced with new plugin API. See Monitoring documentation.") + "Feature replaced with new plugin API. See Monitoring documentation."), + DeprecatedConfig("spark.blacklist.enabled", "3.1.0", + "Please use spark.excludeOnFailure.enabled"), + DeprecatedConfig("spark.blacklist.task.maxTaskAttemptsPerExecutor", "3.1.0", + "Please use spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor"), + DeprecatedConfig("spark.blacklist.task.maxTaskAttemptsPerNode", "3.1.0", + "Please use spark.excludeOnFailure.task.maxTaskAttemptsPerNode"), + DeprecatedConfig("spark.blacklist.application.maxFailedTasksPerExecutor", "3.1.0", + "Please use spark.excludeOnFailure.application.maxFailedTasksPerExecutor"), + DeprecatedConfig("spark.blacklist.stage.maxFailedTasksPerExecutor", "3.1.0", + "Please use spark.excludeOnFailure.stage.maxFailedTasksPerExecutor"), + DeprecatedConfig("spark.blacklist.application.maxFailedExecutorsPerNode", "3.1.0", + "Please use spark.excludeOnFailure.application.maxFailedExecutorsPerNode"), + DeprecatedConfig("spark.blacklist.stage.maxFailedExecutorsPerNode", "3.1.0", + "Please use spark.excludeOnFailure.stage.maxFailedExecutorsPerNode"), + DeprecatedConfig("spark.blacklist.timeout", "3.1.0", + "Please use spark.excludeOnFailure.timeout"), + DeprecatedConfig("spark.blacklist.application.fetchFailure.enabled", "3.1.0", + "Please use spark.excludeOnFailure.application.fetchFailure.enabled"), + DeprecatedConfig("spark.scheduler.blacklist.unschedulableTaskSetTimeout", "3.1.0", + "Please use spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout"), + DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0", + "Please use spark.excludeOnFailure.killExcludedExecutors"), + DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0", + "Please use spark.yarn.executor.launch.excludeOnFailure.enabled") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala index 6606d317e7b86..b304eb97fbdf6 100644 --- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala +++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala @@ -98,10 +98,11 @@ case class FetchFailed( /** * Fetch failures lead to a different failure handling path: (1) we don't abort the stage after * 4 task failures, instead we immediately go back to the stage which generated the map output, - * and regenerate the missing data. (2) we don't count fetch failures for blacklisting, since - * presumably its not the fault of the executor where the task ran, but the executor which - * stored the data. This is especially important because we might rack up a bunch of - * fetch-failures in rapid succession, on all nodes of the cluster, due to one bad node. + * and regenerate the missing data. (2) we don't count fetch failures from executors excluded + * due to too many task failures, since presumably its not the fault of the executor where + * the task ran, but the executor which stored the data. This is especially important because + * we might rack up a bunch of fetch-failures in rapid succession, on all nodes of the cluster, + * due to one bad node. */ override def countTowardsTaskFailures: Boolean = false } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala index b18bf2665d6ce..c659d32d16314 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala @@ -160,6 +160,8 @@ private[spark] class BasicEventFilter( case e: SparkListenerExecutorRemoved => liveExecutors.contains(e.executorId) case e: SparkListenerExecutorBlacklisted => liveExecutors.contains(e.executorId) case e: SparkListenerExecutorUnblacklisted => liveExecutors.contains(e.executorId) + case e: SparkListenerExecutorExcluded => liveExecutors.contains(e.executorId) + case e: SparkListenerExecutorUnexcluded => liveExecutors.contains(e.executorId) case e: SparkListenerStageExecutorMetrics => liveExecutors.contains(e.execId) case e: SparkListenerBlockManagerAdded => acceptBlockManagerEvent(e.blockManagerId) case e: SparkListenerBlockManagerRemoved => acceptBlockManagerEvent(e.blockManagerId) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala index 7973652b3e254..ac0f102d81a6a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala @@ -73,7 +73,7 @@ private[spark] class HistoryAppStatusStore( source.totalShuffleWrite, source.isBlacklisted, source.maxMemory, source.addTime, source.removeTime, source.removeReason, newExecutorLogs, source.memoryMetrics, source.blacklistedInStages, source.peakMemoryMetrics, source.attributes, source.resources, - source.resourceProfileId) + source.resourceProfileId, source.isExcluded, source.excludedInStages) } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 491395c3cbcde..6239ef0491a6f 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -722,74 +722,83 @@ package object config { .booleanConf .createWithDefault(true) - // Blacklist confs - private[spark] val BLACKLIST_ENABLED = - ConfigBuilder("spark.blacklist.enabled") - .version("2.1.0") + private[spark] val EXCLUDE_ON_FAILURE_ENABLED = + ConfigBuilder("spark.excludeOnFailure.enabled") + .version("3.1.0") + .withAlternative("spark.blacklist.enabled") .booleanConf .createOptional private[spark] val MAX_TASK_ATTEMPTS_PER_EXECUTOR = - ConfigBuilder("spark.blacklist.task.maxTaskAttemptsPerExecutor") - .version("2.1.0") + ConfigBuilder("spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor") + .version("3.1.0") + .withAlternative("spark.blacklist.task.maxTaskAttemptsPerExecutor") .intConf .createWithDefault(1) private[spark] val MAX_TASK_ATTEMPTS_PER_NODE = - ConfigBuilder("spark.blacklist.task.maxTaskAttemptsPerNode") - .version("2.1.0") + ConfigBuilder("spark.excludeOnFailure.task.maxTaskAttemptsPerNode") + .version("3.1.0") + .withAlternative("spark.blacklist.task.maxTaskAttemptsPerNode") .intConf .createWithDefault(2) private[spark] val MAX_FAILURES_PER_EXEC = - ConfigBuilder("spark.blacklist.application.maxFailedTasksPerExecutor") - .version("2.2.0") + ConfigBuilder("spark.excludeOnFailure.application.maxFailedTasksPerExecutor") + .version("3.1.0") + .withAlternative("spark.blacklist.application.maxFailedTasksPerExecutor") .intConf .createWithDefault(2) private[spark] val MAX_FAILURES_PER_EXEC_STAGE = - ConfigBuilder("spark.blacklist.stage.maxFailedTasksPerExecutor") - .version("2.1.0") + ConfigBuilder("spark.excludeOnFailure.stage.maxFailedTasksPerExecutor") + .version("3.1.0") + .withAlternative("spark.blacklist.stage.maxFailedTasksPerExecutor") .intConf .createWithDefault(2) private[spark] val MAX_FAILED_EXEC_PER_NODE = - ConfigBuilder("spark.blacklist.application.maxFailedExecutorsPerNode") - .version("2.2.0") + ConfigBuilder("spark.excludeOnFailure.application.maxFailedExecutorsPerNode") + .version("3.1.0") + .withAlternative("spark.blacklist.application.maxFailedExecutorsPerNode") .intConf .createWithDefault(2) private[spark] val MAX_FAILED_EXEC_PER_NODE_STAGE = - ConfigBuilder("spark.blacklist.stage.maxFailedExecutorsPerNode") - .version("2.1.0") + ConfigBuilder("spark.excludeOnFailure.stage.maxFailedExecutorsPerNode") + .version("3.1.0") + .withAlternative("spark.blacklist.stage.maxFailedExecutorsPerNode") .intConf .createWithDefault(2) - private[spark] val BLACKLIST_TIMEOUT_CONF = - ConfigBuilder("spark.blacklist.timeout") - .version("2.1.0") + private[spark] val EXCLUDE_ON_FAILURE_TIMEOUT_CONF = + ConfigBuilder("spark.excludeOnFailure.timeout") + .version("3.1.0") + .withAlternative("spark.blacklist.timeout") .timeConf(TimeUnit.MILLISECONDS) .createOptional - private[spark] val BLACKLIST_KILL_ENABLED = - ConfigBuilder("spark.blacklist.killBlacklistedExecutors") - .version("2.2.0") + private[spark] val EXCLUDE_ON_FAILURE_KILL_ENABLED = + ConfigBuilder("spark.excludeOnFailure.killExcludedExecutors") + .version("3.1.0") + .withAlternative("spark.blacklist.killBlacklistedExecutors") .booleanConf .createWithDefault(false) - private[spark] val BLACKLIST_LEGACY_TIMEOUT_CONF = - ConfigBuilder("spark.scheduler.executorTaskBlacklistTime") + private[spark] val EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF = + ConfigBuilder("spark.scheduler.executorTaskExcludeOnFailureTime") .internal() - .version("1.0.0") + .version("3.1.0") + .withAlternative("spark.scheduler.executorTaskBlacklistTime") .timeConf(TimeUnit.MILLISECONDS) .createOptional - private[spark] val BLACKLIST_FETCH_FAILURE_ENABLED = - ConfigBuilder("spark.blacklist.application.fetchFailure.enabled") - .version("2.3.0") + private[spark] val EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED = + ConfigBuilder("spark.excludeOnFailure.application.fetchFailure.enabled") + .version("3.1.0") + .withAlternative("spark.blacklist.application.fetchFailure.enabled") .booleanConf .createWithDefault(false) - // End blacklist confs private[spark] val UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE = ConfigBuilder("spark.files.fetchFailure.unRegisterOutputOnHost") @@ -1453,10 +1462,12 @@ package object config { .createWithDefaultString("365d") private[spark] val UNSCHEDULABLE_TASKSET_TIMEOUT = - ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout") + ConfigBuilder("spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout") .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " + - "before aborting a TaskSet which is unschedulable because of being completely blacklisted.") - .version("2.4.1") + "before aborting a TaskSet which is unschedulable because all executors are " + + "excluded due to failures.") + .version("3.1.0") + .withAlternative("spark.scheduler.blacklist.unschedulableTaskSetTimeout") .timeConf(TimeUnit.SECONDS) .checkValue(v => v >= 0, "The value should be a non negative time value.") .createWithDefault(120) diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala deleted file mode 100644 index 9e524c52267be..0000000000000 --- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.scheduler - -import java.util.concurrent.atomic.AtomicReference - -import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} - -import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkContext} -import org.apache.spark.internal.Logging -import org.apache.spark.internal.config -import org.apache.spark.util.{Clock, SystemClock, Utils} - -/** - * BlacklistTracker is designed to track problematic executors and nodes. It supports blacklisting - * executors and nodes across an entire application (with a periodic expiry). TaskSetManagers add - * additional blacklisting of executors and nodes for individual tasks and stages which works in - * concert with the blacklisting here. - * - * The tracker needs to deal with a variety of workloads, eg.: - * - * * bad user code -- this may lead to many task failures, but that should not count against - * individual executors - * * many small stages -- this may prevent a bad executor for having many failures within one - * stage, but still many failures over the entire application - * * "flaky" executors -- they don't fail every task, but are still faulty enough to merit - * blacklisting - * - * See the design doc on SPARK-8425 for a more in-depth discussion. - * - * THREADING: As with most helpers of TaskSchedulerImpl, this is not thread-safe. Though it is - * called by multiple threads, callers must already have a lock on the TaskSchedulerImpl. The - * one exception is [[nodeBlacklist()]], which can be called without holding a lock. - */ -private[scheduler] class BlacklistTracker ( - private val listenerBus: LiveListenerBus, - conf: SparkConf, - allocationClient: Option[ExecutorAllocationClient], - clock: Clock = new SystemClock()) extends Logging { - - def this(sc: SparkContext, allocationClient: Option[ExecutorAllocationClient]) = { - this(sc.listenerBus, sc.conf, allocationClient) - } - - BlacklistTracker.validateBlacklistConfs(conf) - private val MAX_FAILURES_PER_EXEC = conf.get(config.MAX_FAILURES_PER_EXEC) - private val MAX_FAILED_EXEC_PER_NODE = conf.get(config.MAX_FAILED_EXEC_PER_NODE) - val BLACKLIST_TIMEOUT_MILLIS = BlacklistTracker.getBlacklistTimeout(conf) - private val BLACKLIST_FETCH_FAILURE_ENABLED = conf.get(config.BLACKLIST_FETCH_FAILURE_ENABLED) - - /** - * A map from executorId to information on task failures. Tracks the time of each task failure, - * so that we can avoid blacklisting executors due to failures that are very far apart. We do not - * actively remove from this as soon as tasks hit their timeouts, to avoid the time it would take - * to do so. But it will not grow too large, because as soon as an executor gets too many - * failures, we blacklist the executor and remove its entry here. - */ - private val executorIdToFailureList = new HashMap[String, ExecutorFailureList]() - val executorIdToBlacklistStatus = new HashMap[String, BlacklistedExecutor]() - val nodeIdToBlacklistExpiryTime = new HashMap[String, Long]() - /** - * An immutable copy of the set of nodes that are currently blacklisted. Kept in an - * AtomicReference to make [[nodeBlacklist()]] thread-safe. - */ - private val _nodeBlacklist = new AtomicReference[Set[String]](Set()) - /** - * Time when the next blacklist will expire. Used as a - * shortcut to avoid iterating over all entries in the blacklist when none will have expired. - */ - var nextExpiryTime: Long = Long.MaxValue - /** - * Mapping from nodes to all of the executors that have been blacklisted on that node. We do *not* - * remove from this when executors are removed from spark, so we can track when we get multiple - * successive blacklisted executors on one node. Nonetheless, it will not grow too large because - * there cannot be many blacklisted executors on one node, before we stop requesting more - * executors on that node, and we clean up the list of blacklisted executors once an executor has - * been blacklisted for BLACKLIST_TIMEOUT_MILLIS. - */ - val nodeToBlacklistedExecs = new HashMap[String, HashSet[String]]() - - /** - * Un-blacklists executors and nodes that have been blacklisted for at least - * BLACKLIST_TIMEOUT_MILLIS - */ - def applyBlacklistTimeout(): Unit = { - val now = clock.getTimeMillis() - // quickly check if we've got anything to expire from blacklist -- if not, avoid doing any work - if (now > nextExpiryTime) { - // Apply the timeout to blacklisted nodes and executors - val execsToUnblacklist = executorIdToBlacklistStatus.filter(_._2.expiryTime < now).keys - if (execsToUnblacklist.nonEmpty) { - // Un-blacklist any executors that have been blacklisted longer than the blacklist timeout. - logInfo(s"Removing executors $execsToUnblacklist from blacklist because the blacklist " + - s"for those executors has timed out") - execsToUnblacklist.foreach { exec => - val status = executorIdToBlacklistStatus.remove(exec).get - val failedExecsOnNode = nodeToBlacklistedExecs(status.node) - listenerBus.post(SparkListenerExecutorUnblacklisted(now, exec)) - failedExecsOnNode.remove(exec) - if (failedExecsOnNode.isEmpty) { - nodeToBlacklistedExecs.remove(status.node) - } - } - } - val nodesToUnblacklist = nodeIdToBlacklistExpiryTime.filter(_._2 < now).keys - if (nodesToUnblacklist.nonEmpty) { - // Un-blacklist any nodes that have been blacklisted longer than the blacklist timeout. - logInfo(s"Removing nodes $nodesToUnblacklist from blacklist because the blacklist " + - s"has timed out") - nodesToUnblacklist.foreach { node => - nodeIdToBlacklistExpiryTime.remove(node) - listenerBus.post(SparkListenerNodeUnblacklisted(now, node)) - } - _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet) - } - updateNextExpiryTime() - } - } - - private def updateNextExpiryTime(): Unit = { - val execMinExpiry = if (executorIdToBlacklistStatus.nonEmpty) { - executorIdToBlacklistStatus.map{_._2.expiryTime}.min - } else { - Long.MaxValue - } - val nodeMinExpiry = if (nodeIdToBlacklistExpiryTime.nonEmpty) { - nodeIdToBlacklistExpiryTime.values.min - } else { - Long.MaxValue - } - nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry) - } - - private def killExecutor(exec: String, msg: String): Unit = { - allocationClient match { - case Some(a) => - logInfo(msg) - a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false, - force = true) - case None => - logInfo(s"Not attempting to kill blacklisted executor id $exec " + - s"since allocation client is not defined.") - } - } - - private def killBlacklistedExecutor(exec: String): Unit = { - if (conf.get(config.BLACKLIST_KILL_ENABLED)) { - killExecutor(exec, - s"Killing blacklisted executor id $exec since ${config.BLACKLIST_KILL_ENABLED.key} is set.") - } - } - - private[scheduler] def killBlacklistedIdleExecutor(exec: String): Unit = { - killExecutor(exec, - s"Killing blacklisted idle executor id $exec because of task unschedulability and trying " + - "to acquire a new executor.") - } - - private def killExecutorsOnBlacklistedNode(node: String): Unit = { - if (conf.get(config.BLACKLIST_KILL_ENABLED)) { - allocationClient match { - case Some(a) => - logInfo(s"Killing all executors on blacklisted host $node " + - s"since ${config.BLACKLIST_KILL_ENABLED.key} is set.") - if (a.killExecutorsOnHost(node) == false) { - logError(s"Killing executors on node $node failed.") - } - case None => - logWarning(s"Not attempting to kill executors on blacklisted host $node " + - s"since allocation client is not defined.") - } - } - } - - def updateBlacklistForFetchFailure(host: String, exec: String): Unit = { - if (BLACKLIST_FETCH_FAILURE_ENABLED) { - // If we blacklist on fetch failures, we are implicitly saying that we believe the failure is - // non-transient, and can't be recovered from (even if this is the first fetch failure, - // stage is retried after just one failure, so we don't always get a chance to collect - // multiple fetch failures). - // If the external shuffle-service is on, then every other executor on this node would - // be suffering from the same issue, so we should blacklist (and potentially kill) all - // of them immediately. - - val now = clock.getTimeMillis() - val expiryTimeForNewBlacklists = now + BLACKLIST_TIMEOUT_MILLIS - - if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) { - if (!nodeIdToBlacklistExpiryTime.contains(host)) { - logInfo(s"blacklisting node $host due to fetch failure of external shuffle service") - - nodeIdToBlacklistExpiryTime.put(host, expiryTimeForNewBlacklists) - listenerBus.post(SparkListenerNodeBlacklisted(now, host, 1)) - _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet) - killExecutorsOnBlacklistedNode(host) - updateNextExpiryTime() - } - } else if (!executorIdToBlacklistStatus.contains(exec)) { - logInfo(s"Blacklisting executor $exec due to fetch failure") - - executorIdToBlacklistStatus.put(exec, BlacklistedExecutor(host, expiryTimeForNewBlacklists)) - // We hardcoded number of failure tasks to 1 for fetch failure, because there's no - // reattempt for such failure. - listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, 1)) - updateNextExpiryTime() - killBlacklistedExecutor(exec) - - val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(host, HashSet[String]()) - blacklistedExecsOnNode += exec - } - } - } - - def updateBlacklistForSuccessfulTaskSet( - stageId: Int, - stageAttemptId: Int, - failuresByExec: HashMap[String, ExecutorFailuresInTaskSet]): Unit = { - // if any tasks failed, we count them towards the overall failure count for the executor at - // this point. - val now = clock.getTimeMillis() - failuresByExec.foreach { case (exec, failuresInTaskSet) => - val appFailuresOnExecutor = - executorIdToFailureList.getOrElseUpdate(exec, new ExecutorFailureList) - appFailuresOnExecutor.addFailures(stageId, stageAttemptId, failuresInTaskSet) - appFailuresOnExecutor.dropFailuresWithTimeoutBefore(now) - val newTotal = appFailuresOnExecutor.numUniqueTaskFailures - - val expiryTimeForNewBlacklists = now + BLACKLIST_TIMEOUT_MILLIS - // If this pushes the total number of failures over the threshold, blacklist the executor. - // If its already blacklisted, we avoid "re-blacklisting" (which can happen if there were - // other tasks already running in another taskset when it got blacklisted), because it makes - // some of the logic around expiry times a little more confusing. But it also wouldn't be a - // problem to re-blacklist, with a later expiry time. - if (newTotal >= MAX_FAILURES_PER_EXEC && !executorIdToBlacklistStatus.contains(exec)) { - logInfo(s"Blacklisting executor id: $exec because it has $newTotal" + - s" task failures in successful task sets") - val node = failuresInTaskSet.node - executorIdToBlacklistStatus.put(exec, BlacklistedExecutor(node, expiryTimeForNewBlacklists)) - listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, newTotal)) - executorIdToFailureList.remove(exec) - updateNextExpiryTime() - killBlacklistedExecutor(exec) - - // In addition to blacklisting the executor, we also update the data for failures on the - // node, and potentially put the entire node into a blacklist as well. - val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(node, HashSet[String]()) - blacklistedExecsOnNode += exec - // If the node is already in the blacklist, we avoid adding it again with a later expiry - // time. - if (blacklistedExecsOnNode.size >= MAX_FAILED_EXEC_PER_NODE && - !nodeIdToBlacklistExpiryTime.contains(node)) { - logInfo(s"Blacklisting node $node because it has ${blacklistedExecsOnNode.size} " + - s"executors blacklisted: ${blacklistedExecsOnNode}") - nodeIdToBlacklistExpiryTime.put(node, expiryTimeForNewBlacklists) - listenerBus.post(SparkListenerNodeBlacklisted(now, node, blacklistedExecsOnNode.size)) - _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet) - killExecutorsOnBlacklistedNode(node) - } - } - } - } - - def isExecutorBlacklisted(executorId: String): Boolean = { - executorIdToBlacklistStatus.contains(executorId) - } - - /** - * Get the full set of nodes that are blacklisted. Unlike other methods in this class, this *IS* - * thread-safe -- no lock required on a taskScheduler. - */ - def nodeBlacklist(): Set[String] = { - _nodeBlacklist.get() - } - - def isNodeBlacklisted(node: String): Boolean = { - nodeIdToBlacklistExpiryTime.contains(node) - } - - def handleRemovedExecutor(executorId: String): Unit = { - // We intentionally do not clean up executors that are already blacklisted in - // nodeToBlacklistedExecs, so that if another executor on the same node gets blacklisted, we can - // blacklist the entire node. We also can't clean up executorIdToBlacklistStatus, so we can - // eventually remove the executor after the timeout. Despite not clearing those structures - // here, we don't expect they will grow too big since you won't get too many executors on one - // node, and the timeout will clear it up periodically in any case. - executorIdToFailureList -= executorId - } - - - /** - * Tracks all failures for one executor (that have not passed the timeout). - * - * In general we actually expect this to be extremely small, since it won't contain more than the - * maximum number of task failures before an executor is failed (default 2). - */ - private[scheduler] final class ExecutorFailureList extends Logging { - - private case class TaskId(stage: Int, stageAttempt: Int, taskIndex: Int) - - /** - * All failures on this executor in successful task sets. - */ - private var failuresAndExpiryTimes = ArrayBuffer[(TaskId, Long)]() - /** - * As an optimization, we track the min expiry time over all entries in failuresAndExpiryTimes - * so its quick to tell if there are any failures with expiry before the current time. - */ - private var minExpiryTime = Long.MaxValue - - def addFailures( - stage: Int, - stageAttempt: Int, - failuresInTaskSet: ExecutorFailuresInTaskSet): Unit = { - failuresInTaskSet.taskToFailureCountAndFailureTime.foreach { - case (taskIdx, (_, failureTime)) => - val expiryTime = failureTime + BLACKLIST_TIMEOUT_MILLIS - failuresAndExpiryTimes += ((TaskId(stage, stageAttempt, taskIdx), expiryTime)) - if (expiryTime < minExpiryTime) { - minExpiryTime = expiryTime - } - } - } - - /** - * The number of unique tasks that failed on this executor. Only counts failures within the - * timeout, and in successful tasksets. - */ - def numUniqueTaskFailures: Int = failuresAndExpiryTimes.size - - def isEmpty: Boolean = failuresAndExpiryTimes.isEmpty - - /** - * Apply the timeout to individual tasks. This is to prevent one-off failures that are very - * spread out in time (and likely have nothing to do with problems on the executor) from - * triggering blacklisting. However, note that we do *not* remove executors and nodes from - * the blacklist as we expire individual task failures -- each have their own timeout. Eg., - * suppose: - * * timeout = 10, maxFailuresPerExec = 2 - * * Task 1 fails on exec 1 at time 0 - * * Task 2 fails on exec 1 at time 5 - * --> exec 1 is blacklisted from time 5 - 15. - * This is to simplify the implementation, as well as keep the behavior easier to understand - * for the end user. - */ - def dropFailuresWithTimeoutBefore(dropBefore: Long): Unit = { - if (minExpiryTime < dropBefore) { - var newMinExpiry = Long.MaxValue - val newFailures = new ArrayBuffer[(TaskId, Long)] - failuresAndExpiryTimes.foreach { case (task, expiryTime) => - if (expiryTime >= dropBefore) { - newFailures += ((task, expiryTime)) - if (expiryTime < newMinExpiry) { - newMinExpiry = expiryTime - } - } - } - failuresAndExpiryTimes = newFailures - minExpiryTime = newMinExpiry - } - } - - override def toString(): String = { - s"failures = $failuresAndExpiryTimes" - } - } - -} - -private[spark] object BlacklistTracker extends Logging { - - private val DEFAULT_TIMEOUT = "1h" - - /** - * Returns true if the blacklist is enabled, based on checking the configuration in the following - * order: - * 1. Is it specifically enabled or disabled? - * 2. Is it enabled via the legacy timeout conf? - * 3. Default is off - */ - def isBlacklistEnabled(conf: SparkConf): Boolean = { - conf.get(config.BLACKLIST_ENABLED) match { - case Some(enabled) => - enabled - case None => - // if they've got a non-zero setting for the legacy conf, always enable the blacklist, - // otherwise, use the default. - val legacyKey = config.BLACKLIST_LEGACY_TIMEOUT_CONF.key - conf.get(config.BLACKLIST_LEGACY_TIMEOUT_CONF).exists { legacyTimeout => - if (legacyTimeout == 0) { - logWarning(s"Turning off blacklisting due to legacy configuration: $legacyKey == 0") - false - } else { - logWarning(s"Turning on blacklisting due to legacy configuration: $legacyKey > 0") - true - } - } - } - } - - def getBlacklistTimeout(conf: SparkConf): Long = { - conf.get(config.BLACKLIST_TIMEOUT_CONF).getOrElse { - conf.get(config.BLACKLIST_LEGACY_TIMEOUT_CONF).getOrElse { - Utils.timeStringAsMs(DEFAULT_TIMEOUT) - } - } - } - - /** - * Verify that blacklist configurations are consistent; if not, throw an exception. Should only - * be called if blacklisting is enabled. - * - * The configuration for the blacklist is expected to adhere to a few invariants. Default - * values follow these rules of course, but users may unwittingly change one configuration - * without making the corresponding adjustment elsewhere. This ensures we fail-fast when - * there are such misconfigurations. - */ - def validateBlacklistConfs(conf: SparkConf): Unit = { - - def mustBePos(k: String, v: String): Unit = { - throw new IllegalArgumentException(s"$k was $v, but must be > 0.") - } - - Seq( - config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, - config.MAX_TASK_ATTEMPTS_PER_NODE, - config.MAX_FAILURES_PER_EXEC_STAGE, - config.MAX_FAILED_EXEC_PER_NODE_STAGE, - config.MAX_FAILURES_PER_EXEC, - config.MAX_FAILED_EXEC_PER_NODE - ).foreach { config => - val v = conf.get(config) - if (v <= 0) { - mustBePos(config.key, v.toString) - } - } - - val timeout = getBlacklistTimeout(conf) - if (timeout <= 0) { - // first, figure out where the timeout came from, to include the right conf in the message. - conf.get(config.BLACKLIST_TIMEOUT_CONF) match { - case Some(t) => - mustBePos(config.BLACKLIST_TIMEOUT_CONF.key, timeout.toString) - case None => - mustBePos(config.BLACKLIST_LEGACY_TIMEOUT_CONF.key, timeout.toString) - } - } - - val maxTaskFailures = conf.get(config.TASK_MAX_FAILURES) - val maxNodeAttempts = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE) - - if (maxNodeAttempts >= maxTaskFailures) { - throw new IllegalArgumentException(s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " + - s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " + - s"( = ${maxTaskFailures} ). Though blacklisting is enabled, with this configuration, " + - s"Spark will not be robust to one bad node. Decrease " + - s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " + - s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}") - } - } -} - -private final case class BlacklistedExecutor(node: String, expiryTime: Long) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 080e0e7f1552f..13b766e654832 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -333,8 +333,8 @@ private[spark] class DAGScheduler( } /** - * Called by the TaskSetManager when a taskset becomes unschedulable due to blacklisting and - * dynamic allocation is enabled. + * Called by the TaskSetManager when a taskset becomes unschedulable due to executors being + * excluded because of too many task failures and dynamic allocation is enabled. */ def unschedulableTaskSetAdded( stageId: Int, diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala index b2e9a0b2a04e8..1fda03f732636 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala @@ -191,27 +191,53 @@ private[spark] class EventLoggingListener( logEvent(event, flushLogger = true) } + override def onExecutorExcluded(event: SparkListenerExecutorExcluded): Unit = { + logEvent(event, flushLogger = true) + } + override def onExecutorBlacklistedForStage( event: SparkListenerExecutorBlacklistedForStage): Unit = { logEvent(event, flushLogger = true) } + override def onExecutorExcludedForStage( + event: SparkListenerExecutorExcludedForStage): Unit = { + logEvent(event, flushLogger = true) + } + override def onNodeBlacklistedForStage(event: SparkListenerNodeBlacklistedForStage): Unit = { logEvent(event, flushLogger = true) } + override def onNodeExcludedForStage(event: SparkListenerNodeExcludedForStage): Unit = { + logEvent(event, flushLogger = true) + } + override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = { logEvent(event, flushLogger = true) } + override def onExecutorUnexcluded(event: SparkListenerExecutorUnexcluded): Unit = { + logEvent(event, flushLogger = true) + } + + override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = { logEvent(event, flushLogger = true) } + override def onNodeExcluded(event: SparkListenerNodeExcluded): Unit = { + logEvent(event, flushLogger = true) + } + override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = { logEvent(event, flushLogger = true) } + override def onNodeUnexcluded(event: SparkListenerNodeUnexcluded): Unit = { + logEvent(event, flushLogger = true) + } + override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = { if (shouldLogBlockUpdates) { logEvent(event, flushLogger = true) diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala index 70553d8be28b5..f27c1560f8272 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler import scala.collection.mutable.HashMap /** - * Small helper for tracking failed tasks for blacklisting purposes. Info on all failures on one + * Small helper for tracking failed tasks for exclusion purposes. Info on all failures on one * executor, within one task set. */ private[scheduler] class ExecutorFailuresInTaskSet(val node: String) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala new file mode 100644 index 0000000000000..9bbacea94bf68 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala @@ -0,0 +1,491 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import java.util.concurrent.atomic.AtomicReference + +import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} + +import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkContext} +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config +import org.apache.spark.util.{Clock, SystemClock, Utils} + +/** + * HealthTracker is designed to track problematic executors and nodes. It supports excluding + * executors and nodes across an entire application (with a periodic expiry). TaskSetManagers add + * additional logic for exclusion of executors and nodes for individual tasks and stages which + * works in concert with the logic here. + * + * The tracker needs to deal with a variety of workloads, eg.: + * + * * bad user code -- this may lead to many task failures, but that should not count against + * individual executors + * * many small stages -- this may prevent a bad executor for having many failures within one + * stage, but still many failures over the entire application + * * "flaky" executors -- they don't fail every task, but are still faulty enough to merit + * excluding + * + * See the design doc on SPARK-8425 for a more in-depth discussion. Note SPARK-32037 renamed + * the feature. + * + * THREADING: As with most helpers of TaskSchedulerImpl, this is not thread-safe. Though it is + * called by multiple threads, callers must already have a lock on the TaskSchedulerImpl. The + * one exception is [[excludedNodeList()]], which can be called without holding a lock. + */ +private[scheduler] class HealthTracker ( + private val listenerBus: LiveListenerBus, + conf: SparkConf, + allocationClient: Option[ExecutorAllocationClient], + clock: Clock = new SystemClock()) extends Logging { + + def this(sc: SparkContext, allocationClient: Option[ExecutorAllocationClient]) = { + this(sc.listenerBus, sc.conf, allocationClient) + } + + HealthTracker.validateExcludeOnFailureConfs(conf) + private val MAX_FAILURES_PER_EXEC = conf.get(config.MAX_FAILURES_PER_EXEC) + private val MAX_FAILED_EXEC_PER_NODE = conf.get(config.MAX_FAILED_EXEC_PER_NODE) + val EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS = HealthTracker.getExludeOnFailureTimeout(conf) + private val EXCLUDE_FETCH_FAILURE_ENABLED = + conf.get(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED) + + /** + * A map from executorId to information on task failures. Tracks the time of each task failure, + * so that we can avoid excluding executors due to failures that are very far apart. We do not + * actively remove from this as soon as tasks hit their timeouts, to avoid the time it would take + * to do so. But it will not grow too large, because as soon as an executor gets too many + * failures, we exclude the executor and remove its entry here. + */ + private val executorIdToFailureList = new HashMap[String, ExecutorFailureList]() + val executorIdToExcludedStatus = new HashMap[String, ExcludedExecutor]() + val nodeIdToExcludedExpiryTime = new HashMap[String, Long]() + /** + * An immutable copy of the set of nodes that are currently excluded. Kept in an + * AtomicReference to make [[excludedNodeList()]] thread-safe. + */ + private val _excludedNodeList = new AtomicReference[Set[String]](Set()) + /** + * Time when the next excluded node will expire. Used as a shortcut to + * avoid iterating over all entries in the excludedNodeList when none will have expired. + */ + var nextExpiryTime: Long = Long.MaxValue + /** + * Mapping from nodes to all of the executors that have been excluded on that node. We do *not* + * remove from this when executors are removed from spark, so we can track when we get multiple + * successive excluded executors on one node. Nonetheless, it will not grow too large because + * there cannot be many excluded executors on one node, before we stop requesting more + * executors on that node, and we clean up the list of exluded executors once an executor has + * been excluded for EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS. + */ + val nodeToExcludedExecs = new HashMap[String, HashSet[String]]() + + /** + * Include executors and nodes that have been excluded for at least + * EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + */ + def applyExcludeOnFailureTimeout(): Unit = { + val now = clock.getTimeMillis() + // quickly check if we've got anything to expire that is excluded -- if not, + // avoid doing any work + if (now > nextExpiryTime) { + // Apply the timeout to excluded nodes and executors + val execsToInclude = executorIdToExcludedStatus.filter(_._2.expiryTime < now).keys + if (execsToInclude.nonEmpty) { + // Include any executors that have been exluded longer than the excludeOnFailure timeout. + logInfo(s"Removing executors $execsToInclude from exclude list because the " + + s"the executors have reached the timed out") + execsToInclude.foreach { exec => + val status = executorIdToExcludedStatus.remove(exec).get + val failedExecsOnNode = nodeToExcludedExecs(status.node) + // post both to keep backwards compatibility + listenerBus.post(SparkListenerExecutorUnblacklisted(now, exec)) + listenerBus.post(SparkListenerExecutorUnexcluded(now, exec)) + failedExecsOnNode.remove(exec) + if (failedExecsOnNode.isEmpty) { + nodeToExcludedExecs.remove(status.node) + } + } + } + val nodesToInclude = nodeIdToExcludedExpiryTime.filter(_._2 < now).keys + if (nodesToInclude.nonEmpty) { + // Include any nodes that have been excluded longer than the excludeOnFailure timeout. + logInfo(s"Removing nodes $nodesToInclude from exclude list because the " + + s"nodes have reached has timed out") + nodesToInclude.foreach { node => + nodeIdToExcludedExpiryTime.remove(node) + // post both to keep backwards compatibility + listenerBus.post(SparkListenerNodeUnblacklisted(now, node)) + listenerBus.post(SparkListenerNodeUnexcluded(now, node)) + } + _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet) + } + updateNextExpiryTime() + } + } + + private def updateNextExpiryTime(): Unit = { + val execMinExpiry = if (executorIdToExcludedStatus.nonEmpty) { + executorIdToExcludedStatus.map{_._2.expiryTime}.min + } else { + Long.MaxValue + } + val nodeMinExpiry = if (nodeIdToExcludedExpiryTime.nonEmpty) { + nodeIdToExcludedExpiryTime.values.min + } else { + Long.MaxValue + } + nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry) + } + + private def killExecutor(exec: String, msg: String): Unit = { + allocationClient match { + case Some(a) => + logInfo(msg) + a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false, + force = true) + case None => + logInfo(s"Not attempting to kill excluded executor id $exec " + + s"since allocation client is not defined.") + } + } + + private def killExcludedExecutor(exec: String): Unit = { + if (conf.get(config.EXCLUDE_ON_FAILURE_KILL_ENABLED)) { + killExecutor(exec, s"Killing excluded executor id $exec since " + + s"${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.") + } + } + + private[scheduler] def killExcludedIdleExecutor(exec: String): Unit = { + killExecutor(exec, + s"Killing excluded idle executor id $exec because of task unschedulability and trying " + + "to acquire a new executor.") + } + + private def killExecutorsOnExcludedNode(node: String): Unit = { + if (conf.get(config.EXCLUDE_ON_FAILURE_KILL_ENABLED)) { + allocationClient match { + case Some(a) => + logInfo(s"Killing all executors on excluded host $node " + + s"since ${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.") + if (a.killExecutorsOnHost(node) == false) { + logError(s"Killing executors on node $node failed.") + } + case None => + logWarning(s"Not attempting to kill executors on excluded host $node " + + s"since allocation client is not defined.") + } + } + } + + def updateExcludedForFetchFailure(host: String, exec: String): Unit = { + if (EXCLUDE_FETCH_FAILURE_ENABLED) { + // If we exclude on fetch failures, we are implicitly saying that we believe the failure is + // non-transient, and can't be recovered from (even if this is the first fetch failure, + // stage is retried after just one failure, so we don't always get a chance to collect + // multiple fetch failures). + // If the external shuffle-service is on, then every other executor on this node would + // be suffering from the same issue, so we should exclude (and potentially kill) all + // of them immediately. + + val now = clock.getTimeMillis() + val expiryTimeForNewExcludes = now + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + + if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) { + if (!nodeIdToExcludedExpiryTime.contains(host)) { + logInfo(s"excluding node $host due to fetch failure of external shuffle service") + + nodeIdToExcludedExpiryTime.put(host, expiryTimeForNewExcludes) + // post both to keep backwards compatibility + listenerBus.post(SparkListenerNodeBlacklisted(now, host, 1)) + listenerBus.post(SparkListenerNodeExcluded(now, host, 1)) + _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet) + killExecutorsOnExcludedNode(host) + updateNextExpiryTime() + } + } else if (!executorIdToExcludedStatus.contains(exec)) { + logInfo(s"Excluding executor $exec due to fetch failure") + + executorIdToExcludedStatus.put(exec, ExcludedExecutor(host, expiryTimeForNewExcludes)) + // We hardcoded number of failure tasks to 1 for fetch failure, because there's no + // reattempt for such failure. + // post both to keep backwards compatibility + listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, 1)) + listenerBus.post(SparkListenerExecutorExcluded(now, exec, 1)) + updateNextExpiryTime() + killExcludedExecutor(exec) + + val excludedExecsOnNode = nodeToExcludedExecs.getOrElseUpdate(host, HashSet[String]()) + excludedExecsOnNode += exec + } + } + } + + def updateExcludedForSuccessfulTaskSet( + stageId: Int, + stageAttemptId: Int, + failuresByExec: HashMap[String, ExecutorFailuresInTaskSet]): Unit = { + // if any tasks failed, we count them towards the overall failure count for the executor at + // this point. + val now = clock.getTimeMillis() + failuresByExec.foreach { case (exec, failuresInTaskSet) => + val appFailuresOnExecutor = + executorIdToFailureList.getOrElseUpdate(exec, new ExecutorFailureList) + appFailuresOnExecutor.addFailures(stageId, stageAttemptId, failuresInTaskSet) + appFailuresOnExecutor.dropFailuresWithTimeoutBefore(now) + val newTotal = appFailuresOnExecutor.numUniqueTaskFailures + + val expiryTimeForNewExcludes = now + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + // If this pushes the total number of failures over the threshold, exclude the executor. + // If its already excluded, we avoid "re-excluding" (which can happen if there were + // other tasks already running in another taskset when it got excluded), because it makes + // some of the logic around expiry times a little more confusing. But it also wouldn't be a + // problem to re-exclude, with a later expiry time. + if (newTotal >= MAX_FAILURES_PER_EXEC && !executorIdToExcludedStatus.contains(exec)) { + logInfo(s"Excluding executor id: $exec because it has $newTotal" + + s" task failures in successful task sets") + val node = failuresInTaskSet.node + executorIdToExcludedStatus.put(exec, ExcludedExecutor(node, expiryTimeForNewExcludes)) + // post both to keep backwards compatibility + listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, newTotal)) + listenerBus.post(SparkListenerExecutorExcluded(now, exec, newTotal)) + executorIdToFailureList.remove(exec) + updateNextExpiryTime() + killExcludedExecutor(exec) + + // In addition to excluding the executor, we also update the data for failures on the + // node, and potentially exclude the entire node as well. + val excludedExecsOnNode = nodeToExcludedExecs.getOrElseUpdate(node, HashSet[String]()) + excludedExecsOnNode += exec + // If the node is already excluded, we avoid adding it again with a later expiry + // time. + if (excludedExecsOnNode.size >= MAX_FAILED_EXEC_PER_NODE && + !nodeIdToExcludedExpiryTime.contains(node)) { + logInfo(s"Excluding node $node because it has ${excludedExecsOnNode.size} " + + s"executors excluded: ${excludedExecsOnNode}") + nodeIdToExcludedExpiryTime.put(node, expiryTimeForNewExcludes) + // post both to keep backwards compatibility + listenerBus.post(SparkListenerNodeBlacklisted(now, node, excludedExecsOnNode.size)) + listenerBus.post(SparkListenerNodeExcluded(now, node, excludedExecsOnNode.size)) + _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet) + killExecutorsOnExcludedNode(node) + } + } + } + } + + def isExecutorExcluded(executorId: String): Boolean = { + executorIdToExcludedStatus.contains(executorId) + } + + /** + * Get the full set of nodes that are excluded. Unlike other methods in this class, this *IS* + * thread-safe -- no lock required on a taskScheduler. + */ + def excludedNodeList(): Set[String] = { + _excludedNodeList.get() + } + + def isNodeExcluded(node: String): Boolean = { + nodeIdToExcludedExpiryTime.contains(node) + } + + def handleRemovedExecutor(executorId: String): Unit = { + // We intentionally do not clean up executors that are already excluded in + // nodeToExcludedExecs, so that if another executor on the same node gets excluded, we can + // exclude the entire node. We also can't clean up executorIdToExcludedStatus, so we can + // eventually remove the executor after the timeout. Despite not clearing those structures + // here, we don't expect they will grow too big since you won't get too many executors on one + // node, and the timeout will clear it up periodically in any case. + executorIdToFailureList -= executorId + } + + /** + * Tracks all failures for one executor (that have not passed the timeout). + * + * In general we actually expect this to be extremely small, since it won't contain more than the + * maximum number of task failures before an executor is failed (default 2). + */ + private[scheduler] final class ExecutorFailureList extends Logging { + + private case class TaskId(stage: Int, stageAttempt: Int, taskIndex: Int) + + /** + * All failures on this executor in successful task sets. + */ + private var failuresAndExpiryTimes = ArrayBuffer[(TaskId, Long)]() + /** + * As an optimization, we track the min expiry time over all entries in failuresAndExpiryTimes + * so its quick to tell if there are any failures with expiry before the current time. + */ + private var minExpiryTime = Long.MaxValue + + def addFailures( + stage: Int, + stageAttempt: Int, + failuresInTaskSet: ExecutorFailuresInTaskSet): Unit = { + failuresInTaskSet.taskToFailureCountAndFailureTime.foreach { + case (taskIdx, (_, failureTime)) => + val expiryTime = failureTime + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + failuresAndExpiryTimes += ((TaskId(stage, stageAttempt, taskIdx), expiryTime)) + if (expiryTime < minExpiryTime) { + minExpiryTime = expiryTime + } + } + } + + /** + * The number of unique tasks that failed on this executor. Only counts failures within the + * timeout, and in successful tasksets. + */ + def numUniqueTaskFailures: Int = failuresAndExpiryTimes.size + + def isEmpty: Boolean = failuresAndExpiryTimes.isEmpty + + /** + * Apply the timeout to individual tasks. This is to prevent one-off failures that are very + * spread out in time (and likely have nothing to do with problems on the executor) from + * triggering exlusion. However, note that we do *not* remove executors and nodes from + * being excluded as we expire individual task failures -- each have their own timeout. Eg., + * suppose: + * * timeout = 10, maxFailuresPerExec = 2 + * * Task 1 fails on exec 1 at time 0 + * * Task 2 fails on exec 1 at time 5 + * --> exec 1 is excluded from time 5 - 15. + * This is to simplify the implementation, as well as keep the behavior easier to understand + * for the end user. + */ + def dropFailuresWithTimeoutBefore(dropBefore: Long): Unit = { + if (minExpiryTime < dropBefore) { + var newMinExpiry = Long.MaxValue + val newFailures = new ArrayBuffer[(TaskId, Long)] + failuresAndExpiryTimes.foreach { case (task, expiryTime) => + if (expiryTime >= dropBefore) { + newFailures += ((task, expiryTime)) + if (expiryTime < newMinExpiry) { + newMinExpiry = expiryTime + } + } + } + failuresAndExpiryTimes = newFailures + minExpiryTime = newMinExpiry + } + } + + override def toString(): String = { + s"failures = $failuresAndExpiryTimes" + } + } + +} + +private[spark] object HealthTracker extends Logging { + + private val DEFAULT_TIMEOUT = "1h" + + /** + * Returns true if the excludeOnFailure is enabled, based on checking the configuration + * in the following order: + * 1. Is it specifically enabled or disabled? + * 2. Is it enabled via the legacy timeout conf? + * 3. Default is off + */ + def isExcludeOnFailureEnabled(conf: SparkConf): Boolean = { + conf.get(config.EXCLUDE_ON_FAILURE_ENABLED) match { + case Some(enabled) => + enabled + case None => + // if they've got a non-zero setting for the legacy conf, always enable it, + // otherwise, use the default. + val legacyKey = config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF.key + conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).exists { legacyTimeout => + if (legacyTimeout == 0) { + logWarning(s"Turning off excludeOnFailure due to legacy configuration: $legacyKey == 0") + false + } else { + logWarning(s"Turning on excludeOnFailure due to legacy configuration: $legacyKey > 0") + true + } + } + } + } + + def getExludeOnFailureTimeout(conf: SparkConf): Long = { + conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF).getOrElse { + conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).getOrElse { + Utils.timeStringAsMs(DEFAULT_TIMEOUT) + } + } + } + + /** + * Verify that exclude on failure configurations are consistent; if not, throw an exception. + * Should only be called if excludeOnFailure is enabled. + * + * The configuration is expected to adhere to a few invariants. Default values + * follow these rules of course, but users may unwittingly change one configuration + * without making the corresponding adjustment elsewhere. This ensures we fail-fast when + * there are such misconfigurations. + */ + def validateExcludeOnFailureConfs(conf: SparkConf): Unit = { + + def mustBePos(k: String, v: String): Unit = { + throw new IllegalArgumentException(s"$k was $v, but must be > 0.") + } + + Seq( + config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, + config.MAX_TASK_ATTEMPTS_PER_NODE, + config.MAX_FAILURES_PER_EXEC_STAGE, + config.MAX_FAILED_EXEC_PER_NODE_STAGE, + config.MAX_FAILURES_PER_EXEC, + config.MAX_FAILED_EXEC_PER_NODE + ).foreach { config => + val v = conf.get(config) + if (v <= 0) { + mustBePos(config.key, v.toString) + } + } + + val timeout = getExludeOnFailureTimeout(conf) + if (timeout <= 0) { + // first, figure out where the timeout came from, to include the right conf in the message. + conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF) match { + case Some(t) => + mustBePos(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF.key, timeout.toString) + case None => + mustBePos(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF.key, timeout.toString) + } + } + + val maxTaskFailures = conf.get(config.TASK_MAX_FAILURES) + val maxNodeAttempts = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE) + + if (maxNodeAttempts >= maxTaskFailures) { + throw new IllegalArgumentException(s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " + + s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " + + s"( = ${maxTaskFailures} ). Though excludeOnFailure is enabled, with this configuration, " + + s"Spark will not be robust to one bad node. Decrease " + + s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " + + s"or disable excludeOnFailure with ${config.EXCLUDE_ON_FAILURE_ENABLED.key}") + } + } +} + +private final case class ExcludedExecutor(node: String, expiryTime: Long) diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala index 8119215b8b74f..3fcb35b604ef6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala @@ -118,12 +118,21 @@ case class SparkListenerExecutorRemoved(time: Long, executorId: String, reason: extends SparkListenerEvent @DeveloperApi +@deprecated("use SparkListenerExecutorExcluded instead", "3.1.0") case class SparkListenerExecutorBlacklisted( time: Long, executorId: String, taskFailures: Int) extends SparkListenerEvent +@DeveloperApi +case class SparkListenerExecutorExcluded( + time: Long, + executorId: String, + taskFailures: Int) + extends SparkListenerEvent + +@deprecated("use SparkListenerExecutorExcludedForStage instead", "3.1.0") @DeveloperApi case class SparkListenerExecutorBlacklistedForStage( time: Long, @@ -133,6 +142,17 @@ case class SparkListenerExecutorBlacklistedForStage( stageAttemptId: Int) extends SparkListenerEvent + +@DeveloperApi +case class SparkListenerExecutorExcludedForStage( + time: Long, + executorId: String, + taskFailures: Int, + stageId: Int, + stageAttemptId: Int) + extends SparkListenerEvent + +@deprecated("use SparkListenerNodeExcludedForStage instead", "3.1.0") @DeveloperApi case class SparkListenerNodeBlacklistedForStage( time: Long, @@ -142,10 +162,27 @@ case class SparkListenerNodeBlacklistedForStage( stageAttemptId: Int) extends SparkListenerEvent + +@DeveloperApi +case class SparkListenerNodeExcludedForStage( + time: Long, + hostId: String, + executorFailures: Int, + stageId: Int, + stageAttemptId: Int) + extends SparkListenerEvent + +@deprecated("use SparkListenerExecutorUnexcluded instead", "3.1.0") @DeveloperApi case class SparkListenerExecutorUnblacklisted(time: Long, executorId: String) extends SparkListenerEvent + +@DeveloperApi +case class SparkListenerExecutorUnexcluded(time: Long, executorId: String) + extends SparkListenerEvent + +@deprecated("use SparkListenerNodeExcluded instead", "3.1.0") @DeveloperApi case class SparkListenerNodeBlacklisted( time: Long, @@ -153,10 +190,23 @@ case class SparkListenerNodeBlacklisted( executorFailures: Int) extends SparkListenerEvent + +@DeveloperApi +case class SparkListenerNodeExcluded( + time: Long, + hostId: String, + executorFailures: Int) + extends SparkListenerEvent + +@deprecated("use SparkListenerNodeUnexcluded instead", "3.1.0") @DeveloperApi case class SparkListenerNodeUnblacklisted(time: Long, hostId: String) extends SparkListenerEvent +@DeveloperApi +case class SparkListenerNodeUnexcluded(time: Long, hostId: String) + extends SparkListenerEvent + @DeveloperApi case class SparkListenerUnschedulableTaskSetAdded( stageId: Int, @@ -319,38 +369,75 @@ private[spark] trait SparkListenerInterface { def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit /** - * Called when the driver blacklists an executor for a Spark application. + * Called when the driver excludes an executor for a Spark application. */ + @deprecated("use onExecutorExcluded instead", "3.1.0") def onExecutorBlacklisted(executorBlacklisted: SparkListenerExecutorBlacklisted): Unit /** - * Called when the driver blacklists an executor for a stage. + * Called when the driver excludes an executor for a Spark application. */ + def onExecutorExcluded(executorExcluded: SparkListenerExecutorExcluded): Unit + + /** + * Called when the driver excludes an executor for a stage. + */ + @deprecated("use onExecutorExcludedForStage instead", "3.1.0") def onExecutorBlacklistedForStage( executorBlacklistedForStage: SparkListenerExecutorBlacklistedForStage): Unit /** - * Called when the driver blacklists a node for a stage. + * Called when the driver excludes an executor for a stage. + */ + def onExecutorExcludedForStage( + executorExcludedForStage: SparkListenerExecutorExcludedForStage): Unit + + /** + * Called when the driver excludes a node for a stage. */ + @deprecated("use onNodeExcludedForStage instead", "3.1.0") def onNodeBlacklistedForStage(nodeBlacklistedForStage: SparkListenerNodeBlacklistedForStage): Unit /** - * Called when the driver re-enables a previously blacklisted executor. + * Called when the driver excludes a node for a stage. + */ + def onNodeExcludedForStage(nodeExcludedForStage: SparkListenerNodeExcludedForStage): Unit + + /** + * Called when the driver re-enables a previously excluded executor. */ + @deprecated("use onExecutorUnexcluded instead", "3.1.0") def onExecutorUnblacklisted(executorUnblacklisted: SparkListenerExecutorUnblacklisted): Unit /** - * Called when the driver blacklists a node for a Spark application. + * Called when the driver re-enables a previously excluded executor. + */ + def onExecutorUnexcluded(executorUnexcluded: SparkListenerExecutorUnexcluded): Unit + + /** + * Called when the driver excludes a node for a Spark application. */ + @deprecated("use onNodeExcluded instead", "3.1.0") def onNodeBlacklisted(nodeBlacklisted: SparkListenerNodeBlacklisted): Unit /** - * Called when the driver re-enables a previously blacklisted node. + * Called when the driver excludes a node for a Spark application. */ + def onNodeExcluded(nodeExcluded: SparkListenerNodeExcluded): Unit + + /** + * Called when the driver re-enables a previously excluded node. + */ + @deprecated("use onNodeUnexcluded instead", "3.1.0") def onNodeUnblacklisted(nodeUnblacklisted: SparkListenerNodeUnblacklisted): Unit /** - * Called when a taskset becomes unschedulable due to blacklisting and dynamic allocation + * Called when the driver re-enables a previously excluded node. + */ + def onNodeUnexcluded(nodeUnexcluded: SparkListenerNodeUnexcluded): Unit + + /** + * Called when a taskset becomes unschedulable due to exludeOnFailure and dynamic allocation * is enabled. */ def onUnschedulableTaskSetAdded( @@ -433,21 +520,33 @@ abstract class SparkListener extends SparkListenerInterface { override def onExecutorBlacklisted( executorBlacklisted: SparkListenerExecutorBlacklisted): Unit = { } + override def onExecutorExcluded( + executorExcluded: SparkListenerExecutorExcluded): Unit = { } - def onExecutorBlacklistedForStage( + override def onExecutorBlacklistedForStage( executorBlacklistedForStage: SparkListenerExecutorBlacklistedForStage): Unit = { } + override def onExecutorExcludedForStage( + executorExcludedForStage: SparkListenerExecutorExcludedForStage): Unit = { } - def onNodeBlacklistedForStage( + override def onNodeBlacklistedForStage( nodeBlacklistedForStage: SparkListenerNodeBlacklistedForStage): Unit = { } + override def onNodeExcludedForStage( + nodeExcludedForStage: SparkListenerNodeExcludedForStage): Unit = { } override def onExecutorUnblacklisted( executorUnblacklisted: SparkListenerExecutorUnblacklisted): Unit = { } + override def onExecutorUnexcluded( + executorUnexcluded: SparkListenerExecutorUnexcluded): Unit = { } override def onNodeBlacklisted( nodeBlacklisted: SparkListenerNodeBlacklisted): Unit = { } + override def onNodeExcluded( + nodeExcluded: SparkListenerNodeExcluded): Unit = { } override def onNodeUnblacklisted( nodeUnblacklisted: SparkListenerNodeUnblacklisted): Unit = { } + override def onNodeUnexcluded( + nodeUnexcluded: SparkListenerNodeUnexcluded): Unit = { } override def onUnschedulableTaskSetAdded( unschedulableTaskSetAdded: SparkListenerUnschedulableTaskSetAdded): Unit = { } diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala index 13e65f4291fd0..ec0c0cf3cf82b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala @@ -75,6 +75,18 @@ private[spark] trait SparkListenerBus listener.onNodeBlacklisted(nodeBlacklisted) case nodeUnblacklisted: SparkListenerNodeUnblacklisted => listener.onNodeUnblacklisted(nodeUnblacklisted) + case executorExcludedForStage: SparkListenerExecutorExcludedForStage => + listener.onExecutorExcludedForStage(executorExcludedForStage) + case nodeExcludedForStage: SparkListenerNodeExcludedForStage => + listener.onNodeExcludedForStage(nodeExcludedForStage) + case executorExcluded: SparkListenerExecutorExcluded => + listener.onExecutorExcluded(executorExcluded) + case executorUnexcluded: SparkListenerExecutorUnexcluded => + listener.onExecutorUnexcluded(executorUnexcluded) + case nodeExcluded: SparkListenerNodeExcluded => + listener.onNodeExcluded(nodeExcluded) + case nodeUnexcluded: SparkListenerNodeUnexcluded => + listener.onNodeUnexcluded(nodeUnexcluded) case blockUpdated: SparkListenerBlockUpdated => listener.onBlockUpdated(blockUpdated) case speculativeTaskSubmitted: SparkListenerSpeculativeTaskSubmitted => diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 2fcf13d5268f8..57e219999b0d0 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -91,9 +91,9 @@ private[spark] class TaskSchedulerImpl( this(sc, sc.conf.get(config.TASK_MAX_FAILURES)) } - // Lazily initializing blacklistTrackerOpt to avoid getting empty ExecutorAllocationClient, + // Lazily initializing healthTrackerOpt to avoid getting empty ExecutorAllocationClient, // because ExecutorAllocationClient is created after this TaskSchedulerImpl. - private[scheduler] lazy val blacklistTrackerOpt = maybeCreateBlacklistTracker(sc) + private[scheduler] lazy val healthTrackerOpt = maybeCreateHealthTracker(sc) val conf = sc.conf @@ -281,7 +281,7 @@ private[spark] class TaskSchedulerImpl( private[scheduler] def createTaskSetManager( taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { - new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock) + new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock) } override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = synchronized { @@ -381,7 +381,7 @@ private[spark] class TaskSchedulerImpl( : (Boolean, Option[TaskLocality]) = { var noDelayScheduleRejects = true var minLaunchedLocality: Option[TaskLocality] = None - // nodes and executors that are blacklisted for the entire application have already been + // nodes and executors that are excluded for the entire application have already been // filtered out by this point for (i <- 0 until shuffledOffers.size) { val execId = shuffledOffers(i).executorId @@ -515,15 +515,15 @@ private[spark] class TaskSchedulerImpl( hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host } - // Before making any offers, remove any nodes from the blacklist whose blacklist has expired. Do + // Before making any offers, include any nodes whose expireOnFailure timeout has expired. Do // this here to avoid a separate thread and added synchronization overhead, and also because - // updating the blacklist is only relevant when task offers are being made. - blacklistTrackerOpt.foreach(_.applyBlacklistTimeout()) + // updating the excluded executors and nodes is only relevant when task offers are being made. + healthTrackerOpt.foreach(_.applyExcludeOnFailureTimeout()) - val filteredOffers = blacklistTrackerOpt.map { blacklistTracker => + val filteredOffers = healthTrackerOpt.map { healthTracker => offers.filter { offer => - !blacklistTracker.isNodeBlacklisted(offer.host) && - !blacklistTracker.isExecutorBlacklisted(offer.executorId) + !healthTracker.isNodeExcluded(offer.host) && + !healthTracker.isExecutorExcluded(offer.executorId) } }.getOrElse(offers) @@ -602,15 +602,15 @@ private[spark] class TaskSchedulerImpl( } if (!launchedAnyTask) { - taskSet.getCompletelyBlacklistedTaskIfAny(hostToExecutors).foreach { taskIndex => - // If the taskSet is unschedulable we try to find an existing idle blacklisted + taskSet.getCompletelyExcludedTaskIfAny(hostToExecutors).foreach { taskIndex => + // If the taskSet is unschedulable we try to find an existing idle excluded // executor and kill the idle executor and kick off an abortTimer which if it doesn't // schedule a task within the the timeout will abort the taskSet if we were unable to // schedule any task from the taskSet. // Note 1: We keep track of schedulability on a per taskSet basis rather than on a per // task basis. // Note 2: The taskSet can still be aborted when there are more than one idle - // blacklisted executors and dynamic allocation is on. This can happen when a killed + // excluded executors and dynamic allocation is on. This can happen when a killed // idle executor isn't replaced in time by ExecutorAllocationManager as it relies on // pending tasks and doesn't kill executors on idle timeouts, resulting in the abort // timer to expire and abort the taskSet. @@ -621,7 +621,7 @@ private[spark] class TaskSchedulerImpl( executorIdToRunningTaskIds.find(x => !isExecutorBusy(x._1)) match { case Some ((executorId, _)) => if (!unschedulableTaskSetToExpiryTime.contains(taskSet)) { - blacklistTrackerOpt.foreach(blt => blt.killBlacklistedIdleExecutor(executorId)) + healthTrackerOpt.foreach(blt => blt.killExcludedIdleExecutor(executorId)) updateUnschedulableTaskSetTimeoutAndStartAbortTimer(taskSet, taskIndex) } case None => @@ -638,18 +638,19 @@ private[spark] class TaskSchedulerImpl( } } else { // Abort Immediately - logInfo("Cannot schedule any task because of complete blacklisting. No idle" + - s" executors can be found to kill. Aborting stage ${taskSet.stageId}.") - taskSet.abortSinceCompletelyBlacklisted(taskIndex) + logInfo("Cannot schedule any task because all executors excluded from " + + "failures. No idle executors can be found to kill. Aborting stage " + + s"${taskSet.stageId}.") + taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex) } } } } else { - // We want to defer killing any taskSets as long as we have a non blacklisted executor + // We want to defer killing any taskSets as long as we have a non excluded executor // which can be used to schedule a task from any active taskSets. This ensures that the // job can make progress. // Note: It is theoretically possible that a taskSet never gets scheduled on a - // non-blacklisted executor and the abort timer doesn't kick in because of a constant + // non-excluded executor and the abort timer doesn't kick in because of a constant // submission of new TaskSets. See the PR for more details. if (unschedulableTaskSetToExpiryTime.nonEmpty) { logInfo("Clearing the expiry times for all unschedulable taskSets as a task was " + @@ -710,7 +711,7 @@ private[spark] class TaskSchedulerImpl( val timeout = conf.get(config.UNSCHEDULABLE_TASKSET_TIMEOUT) * 1000 unschedulableTaskSetToExpiryTime(taskSet) = clock.getTimeMillis() + timeout logInfo(s"Waiting for $timeout ms for completely " + - s"blacklisted task to be schedulable again before aborting stage ${taskSet.stageId}.") + s"excluded task to be schedulable again before aborting stage ${taskSet.stageId}.") abortTimer.schedule( createUnschedulableTaskSetAbortTimer(taskSet, taskIndex), timeout) } @@ -722,9 +723,9 @@ private[spark] class TaskSchedulerImpl( override def run(): Unit = TaskSchedulerImpl.this.synchronized { if (unschedulableTaskSetToExpiryTime.contains(taskSet) && unschedulableTaskSetToExpiryTime(taskSet) <= clock.getTimeMillis()) { - logInfo("Cannot schedule any task because of complete blacklisting. " + + logInfo("Cannot schedule any task because all executors excluded due to failures. " + s"Wait time for scheduling expired. Aborting stage ${taskSet.stageId}.") - taskSet.abortSinceCompletelyBlacklisted(taskIndex) + taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex) } else { this.cancel() } @@ -1019,7 +1020,7 @@ private[spark] class TaskSchedulerImpl( executorIdToHost -= executorId rootPool.executorLost(executorId, host, reason) } - blacklistTrackerOpt.foreach(_.handleRemovedExecutor(executorId)) + healthTrackerOpt.foreach(_.handleRemovedExecutor(executorId)) } def executorAdded(execId: String, host: String): Unit = { @@ -1060,11 +1061,11 @@ private[spark] class TaskSchedulerImpl( } /** - * Get a snapshot of the currently blacklisted nodes for the entire application. This is + * Get a snapshot of the currently excluded nodes for the entire application. This is * thread-safe -- it can be called without a lock on the TaskScheduler. */ - def nodeBlacklist(): Set[String] = { - blacklistTrackerOpt.map(_.nodeBlacklist()).getOrElse(Set.empty) + def excludedNodes(): Set[String] = { + healthTrackerOpt.map(_.excludedNodeList()).getOrElse(Set.empty) } /** @@ -1223,13 +1224,13 @@ private[spark] object TaskSchedulerImpl { retval.toList } - private def maybeCreateBlacklistTracker(sc: SparkContext): Option[BlacklistTracker] = { - if (BlacklistTracker.isBlacklistEnabled(sc.conf)) { + private def maybeCreateHealthTracker(sc: SparkContext): Option[HealthTracker] = { + if (HealthTracker.isExcludeOnFailureEnabled(sc.conf)) { val executorAllocClient: Option[ExecutorAllocationClient] = sc.schedulerBackend match { case b: ExecutorAllocationClient => Some(b) case _ => None } - Some(new BlacklistTracker(sc, executorAllocClient)) + Some(new HealthTracker(sc, executorAllocClient)) } else { None } diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala similarity index 63% rename from core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala rename to core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala index 4df2889089ee9..d8c46db166fc5 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala @@ -24,19 +24,19 @@ import org.apache.spark.internal.config import org.apache.spark.util.Clock /** - * Handles blacklisting executors and nodes within a taskset. This includes blacklisting specific - * (task, executor) / (task, nodes) pairs, and also completely blacklisting executors and nodes + * Handles excluding executors and nodes within a taskset. This includes excluding specific + * (task, executor) / (task, nodes) pairs, and also completely excluding executors and nodes * for the entire taskset. * - * It also must store sufficient information in task failures for application level blacklisting, - * which is handled by [[BlacklistTracker]]. Note that BlacklistTracker does not know anything + * It also must store sufficient information in task failures for application level exclusion, + * which is handled by [[HealthTracker]]. Note that HealthTracker does not know anything * about task failures until a taskset completes successfully. * * THREADING: This class is a helper to [[TaskSetManager]]; as with the methods in * [[TaskSetManager]] this class is designed only to be called from code with a lock on the * TaskScheduler (e.g. its event handlers). It should not be called from other threads. */ -private[scheduler] class TaskSetBlacklist( +private[scheduler] class TaskSetExcludelist( private val listenerBus: LiveListenerBus, val conf: SparkConf, val stageId: Int, @@ -49,9 +49,9 @@ private[scheduler] class TaskSetBlacklist( private val MAX_FAILED_EXEC_PER_NODE_STAGE = conf.get(config.MAX_FAILED_EXEC_PER_NODE_STAGE) /** - * A map from each executor to the task failures on that executor. This is used for blacklisting - * within this taskset, and it is also relayed onto [[BlacklistTracker]] for app-level - * blacklisting if this taskset completes successfully. + * A map from each executor to the task failures on that executor. This is used for exclusion + * within this taskset, and it is also relayed onto [[HealthTracker]] for app-level + * exlucsion if this taskset completes successfully. */ val execToFailures = new HashMap[String, ExecutorFailuresInTaskSet]() @@ -61,9 +61,9 @@ private[scheduler] class TaskSetBlacklist( * node -> execs mapping in the usual case when there aren't any failures). */ private val nodeToExecsWithFailures = new HashMap[String, HashSet[String]]() - private val nodeToBlacklistedTaskIndexes = new HashMap[String, HashSet[Int]]() - private val blacklistedExecs = new HashSet[String]() - private val blacklistedNodes = new HashSet[String]() + private val nodeToExcludedTaskIndexes = new HashMap[String, HashSet[Int]]() + private val excludedExecs = new HashSet[String]() + private val excludedNodes = new HashSet[String]() private var latestFailureReason: String = null @@ -75,36 +75,36 @@ private[scheduler] class TaskSetBlacklist( } /** - * Return true if this executor is blacklisted for the given task. This does *not* - * need to return true if the executor is blacklisted for the entire stage, or blacklisted + * Return true if this executor is excluded for the given task. This does *not* + * need to return true if the executor is excluded for the entire stage, or excluded * for the entire application. That is to keep this method as fast as possible in the inner-loop * of the scheduler, where those filters will have already been applied. */ - def isExecutorBlacklistedForTask(executorId: String, index: Int): Boolean = { + def isExecutorExcludedForTask(executorId: String, index: Int): Boolean = { execToFailures.get(executorId).exists { execFailures => execFailures.getNumTaskFailures(index) >= MAX_TASK_ATTEMPTS_PER_EXECUTOR } } - def isNodeBlacklistedForTask(node: String, index: Int): Boolean = { - nodeToBlacklistedTaskIndexes.get(node).exists(_.contains(index)) + def isNodeExcludedForTask(node: String, index: Int): Boolean = { + nodeToExcludedTaskIndexes.get(node).exists(_.contains(index)) } /** - * Return true if this executor is blacklisted for the given stage. Completely ignores whether - * the executor is blacklisted for the entire application (or anything to do with the node the + * Return true if this executor is excluded for the given stage. Completely ignores whether + * the executor is excluded for the entire application (or anything to do with the node the * executor is on). That is to keep this method as fast as possible in the inner-loop of the * scheduler, where those filters will already have been applied. */ - def isExecutorBlacklistedForTaskSet(executorId: String): Boolean = { - blacklistedExecs.contains(executorId) + def isExecutorExcludedForTaskSet(executorId: String): Boolean = { + excludedExecs.contains(executorId) } - def isNodeBlacklistedForTaskSet(node: String): Boolean = { - blacklistedNodes.contains(node) + def isNodeExcludedForTaskSet(node: String): Boolean = { + excludedNodes.contains(node) } - private[scheduler] def updateBlacklistForFailedTask( + private[scheduler] def updateExcludedForFailedTask( host: String, exec: String, index: Int, @@ -114,7 +114,7 @@ private[scheduler] class TaskSetBlacklist( execFailures.updateWithFailure(index, clock.getTimeMillis()) // check if this task has also failed on other executors on the same host -- if its gone - // over the limit, blacklist this task from the entire host. + // over the limit, exclude this task from the entire host. val execsWithFailuresOnNode = nodeToExecsWithFailures.getOrElseUpdate(host, new HashSet()) execsWithFailuresOnNode += exec val failuresOnHost = execsWithFailuresOnNode.toIterator.flatMap { exec => @@ -127,27 +127,35 @@ private[scheduler] class TaskSetBlacklist( } }.sum if (failuresOnHost >= MAX_TASK_ATTEMPTS_PER_NODE) { - nodeToBlacklistedTaskIndexes.getOrElseUpdate(host, new HashSet()) += index + nodeToExcludedTaskIndexes.getOrElseUpdate(host, new HashSet()) += index } - // Check if enough tasks have failed on the executor to blacklist it for the entire stage. + // Check if enough tasks have failed on the executor to exclude it for the entire stage. val numFailures = execFailures.numUniqueTasksWithFailures if (numFailures >= MAX_FAILURES_PER_EXEC_STAGE) { - if (blacklistedExecs.add(exec)) { - logInfo(s"Blacklisting executor ${exec} for stage $stageId") - // This executor has been pushed into the blacklist for this stage. Let's check if it - // pushes the whole node into the blacklist. - val blacklistedExecutorsOnNode = - execsWithFailuresOnNode.filter(blacklistedExecs.contains(_)) + if (excludedExecs.add(exec)) { + logInfo(s"Excluding executor ${exec} for stage $stageId") + // This executor has been excluded for this stage. Let's check if it + // the whole node should be excluded. + val excludedExecutorsOnNode = + execsWithFailuresOnNode.filter(excludedExecs.contains(_)) val now = clock.getTimeMillis() + // SparkListenerExecutorBlacklistedForStage is deprecated but post both events + // to keep backward compatibility listenerBus.post( SparkListenerExecutorBlacklistedForStage(now, exec, numFailures, stageId, stageAttemptId)) - val numFailExec = blacklistedExecutorsOnNode.size + listenerBus.post( + SparkListenerExecutorExcludedForStage(now, exec, numFailures, stageId, stageAttemptId)) + val numFailExec = excludedExecutorsOnNode.size if (numFailExec >= MAX_FAILED_EXEC_PER_NODE_STAGE) { - if (blacklistedNodes.add(host)) { - logInfo(s"Blacklisting ${host} for stage $stageId") + if (excludedNodes.add(host)) { + logInfo(s"Excluding ${host} for stage $stageId") + // SparkListenerNodeBlacklistedForStage is deprecated but post both events + // to keep backward compatibility listenerBus.post( SparkListenerNodeBlacklistedForStage(now, host, numFailExec, stageId, stageAttemptId)) + listenerBus.post( + SparkListenerNodeExcludedForStage(now, host, numFailExec, stageId, stageAttemptId)) } } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 78fd412ef154c..0cfa76583bfbb 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -55,7 +55,7 @@ private[spark] class TaskSetManager( sched: TaskSchedulerImpl, val taskSet: TaskSet, val maxTaskFailures: Int, - blacklistTracker: Option[BlacklistTracker] = None, + healthTracker: Option[HealthTracker] = None, clock: Clock = new SystemClock()) extends Schedulable with Logging { private val conf = sched.sc.conf @@ -130,9 +130,9 @@ private[spark] class TaskSetManager( private var totalResultSize = 0L private var calculatedTasks = 0 - private[scheduler] val taskSetBlacklistHelperOpt: Option[TaskSetBlacklist] = { - blacklistTracker.map { _ => - new TaskSetBlacklist(sched.sc.listenerBus, conf, stageId, taskSet.stageAttemptId, clock) + private[scheduler] val taskSetExcludelistHelperOpt: Option[TaskSetExcludelist] = { + healthTracker.map { _ => + new TaskSetExcludelist(sched.sc.listenerBus, conf, stageId, taskSet.stageAttemptId, clock) } } @@ -294,7 +294,7 @@ private[spark] class TaskSetManager( while (indexOffset > 0) { indexOffset -= 1 val index = list(indexOffset) - if (!isTaskBlacklistedOnExecOrNode(index, execId, host) && + if (!isTaskExcludededOnExecOrNode(index, execId, host) && !(speculative && hasAttemptOnHost(index, host))) { // This should almost always be list.trimEnd(1) to remove tail list.remove(indexOffset) @@ -317,10 +317,10 @@ private[spark] class TaskSetManager( taskAttempts(taskIndex).exists(_.host == host) } - private def isTaskBlacklistedOnExecOrNode(index: Int, execId: String, host: String): Boolean = { - taskSetBlacklistHelperOpt.exists { blacklist => - blacklist.isNodeBlacklistedForTask(host, index) || - blacklist.isExecutorBlacklistedForTask(execId, index) + private def isTaskExcludededOnExecOrNode(index: Int, execId: String, host: String): Boolean = { + taskSetExcludelistHelperOpt.exists { excludeList => + excludeList.isNodeExcludedForTask(host, index) || + excludeList.isExecutorExcludedForTask(execId, index) } } @@ -421,11 +421,11 @@ private[spark] class TaskSetManager( taskResourceAssignments: Map[String, ResourceInformation] = Map.empty) : (Option[TaskDescription], Boolean) = { - val offerBlacklisted = taskSetBlacklistHelperOpt.exists { blacklist => - blacklist.isNodeBlacklistedForTaskSet(host) || - blacklist.isExecutorBlacklistedForTaskSet(execId) + val offerExcluded = taskSetExcludelistHelperOpt.exists { excludeList => + excludeList.isNodeExcludedForTaskSet(host) || + excludeList.isExecutorExcludedForTaskSet(execId) } - if (!isZombie && !offerBlacklisted) { + if (!isZombie && !offerExcluded) { val curTime = clock.getTimeMillis() var allowedLocality = maxLocality @@ -518,10 +518,10 @@ private[spark] class TaskSetManager( if (isZombie && runningTasks == 0) { sched.taskSetFinished(this) if (tasksSuccessful == numTasks) { - blacklistTracker.foreach(_.updateBlacklistForSuccessfulTaskSet( + healthTracker.foreach(_.updateExcludedForSuccessfulTaskSet( taskSet.stageId, taskSet.stageAttemptId, - taskSetBlacklistHelperOpt.get.execToFailures)) + taskSetExcludelistHelperOpt.get.execToFailures)) } } } @@ -606,12 +606,13 @@ private[spark] class TaskSetManager( } /** - * Check whether the given task set has been blacklisted to the point that it can't run anywhere. + * Check whether the given task set has been excluded to the point that it can't run anywhere. * * It is possible that this taskset has become impossible to schedule *anywhere* due to the - * blacklist. The most common scenario would be if there are fewer executors than - * spark.task.maxFailures. We need to detect this so we can avoid the job from being hung. - * We try to acquire new executor/s by killing an existing idle blacklisted executor. + * failures that lead executors being excluded from the ones we can run on. The most common + * scenario would be if there are fewer executors than spark.task.maxFailures. + * We need to detect this so we can avoid the job from being hung. We try to acquire new + * executor/s by killing an existing idle excluded executor. * * There's a tradeoff here: we could make sure all tasks in the task set are schedulable, but that * would add extra time to each iteration of the scheduling loop. Here, we take the approach of @@ -620,12 +621,12 @@ private[spark] class TaskSetManager( * method is faster in the typical case. In the worst case, this method can take * O(maxTaskFailures + numTasks) time, but it will be faster when there haven't been any task * failures (this is because the method picks one unscheduled task, and then iterates through each - * executor until it finds one that the task isn't blacklisted on). + * executor until it finds one that the task isn't excluded on). */ - private[scheduler] def getCompletelyBlacklistedTaskIfAny( + private[scheduler] def getCompletelyExcludedTaskIfAny( hostToExecutors: HashMap[String, HashSet[String]]): Option[Int] = { - taskSetBlacklistHelperOpt.flatMap { taskSetBlacklist => - val appBlacklist = blacklistTracker.get + taskSetExcludelistHelperOpt.flatMap { taskSetExcludelist => + val appHealthTracker = healthTracker.get // Only look for unschedulable tasks when at least one executor has registered. Otherwise, // task sets will be (unnecessarily) aborted in cases when no executors have registered yet. if (hostToExecutors.nonEmpty) { @@ -651,18 +652,18 @@ private[spark] class TaskSetManager( // when that unschedulable task is the last task remaining. hostToExecutors.forall { case (host, execsOnHost) => // Check if the task can run on the node - val nodeBlacklisted = - appBlacklist.isNodeBlacklisted(host) || - taskSetBlacklist.isNodeBlacklistedForTaskSet(host) || - taskSetBlacklist.isNodeBlacklistedForTask(host, indexInTaskSet) - if (nodeBlacklisted) { + val nodeExcluded = + appHealthTracker.isNodeExcluded(host) || + taskSetExcludelist.isNodeExcludedForTaskSet(host) || + taskSetExcludelist.isNodeExcludedForTask(host, indexInTaskSet) + if (nodeExcluded) { true } else { // Check if the task can run on any of the executors execsOnHost.forall { exec => - appBlacklist.isExecutorBlacklisted(exec) || - taskSetBlacklist.isExecutorBlacklistedForTaskSet(exec) || - taskSetBlacklist.isExecutorBlacklistedForTask(exec, indexInTaskSet) + appHealthTracker.isExecutorExcluded(exec) || + taskSetExcludelist.isExecutorExcludedForTaskSet(exec) || + taskSetExcludelist.isExecutorExcludedForTask(exec, indexInTaskSet) } } } @@ -673,16 +674,16 @@ private[spark] class TaskSetManager( } } - private[scheduler] def abortSinceCompletelyBlacklisted(indexInTaskSet: Int): Unit = { - taskSetBlacklistHelperOpt.foreach { taskSetBlacklist => + private[scheduler] def abortSinceCompletelyExcludedOnFailure(indexInTaskSet: Int): Unit = { + taskSetExcludelistHelperOpt.foreach { taskSetExcludelist => val partition = tasks(indexInTaskSet).partitionId abort(s""" |Aborting $taskSet because task $indexInTaskSet (partition $partition) - |cannot run anywhere due to node and executor blacklist. + |cannot run anywhere due to node and executor excludeOnFailure. |Most recent failure: - |${taskSetBlacklist.getLatestFailureReason} + |${taskSetExcludelist.getLatestFailureReason} | - |Blacklisting behavior can be configured via spark.blacklist.*. + |ExcludeOnFailure behavior can be configured via spark.excludeOnFailure.*. |""".stripMargin) } } @@ -821,7 +822,7 @@ private[spark] class TaskSetManager( isZombie = true if (fetchFailed.bmAddress != null) { - blacklistTracker.foreach(_.updateBlacklistForFetchFailure( + healthTracker.foreach(_.updateExcludedForFetchFailure( fetchFailed.bmAddress.host, fetchFailed.bmAddress.executorId)) } @@ -899,7 +900,7 @@ private[spark] class TaskSetManager( if (!isZombie && reason.countTowardsTaskFailures) { assert (null != failureReason) - taskSetBlacklistHelperOpt.foreach(_.updateBlacklistForFailedTask( + taskSetExcludelistHelperOpt.foreach(_.updateExcludedForFailedTask( info.host, info.executorId, index, failureReason)) numFailures(index) += 1 if (numFailures(index) >= maxTaskFailures) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index d1b0e798c51be..eda1cb52d4abc 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -131,7 +131,7 @@ private[spark] object CoarseGrainedClusterMessages { resourceProfileToTotalExecs: Map[ResourceProfile, Int], numLocalityAwareTasksPerResourceProfileId: Map[Int, Int], hostToLocalTaskCount: Map[Int, Map[String, Int]], - nodeBlacklist: Set[String]) + excludedNodes: Set[String]) extends CoarseGrainedClusterMessage // Check if an executor was force-killed but for a reason unrelated to the running tasks. diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 1d2689034f1ff..2bd0b4cc4b7d0 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -209,13 +209,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp attributes, resources, resourceProfileId) => if (executorDataMap.contains(executorId)) { context.sendFailure(new IllegalStateException(s"Duplicate executor ID: $executorId")) - } else if (scheduler.nodeBlacklist.contains(hostname) || - isBlacklisted(executorId, hostname)) { - // If the cluster manager gives us an executor on a blacklisted node (because it - // already started allocating those resources before we informed it of our blacklist, - // or if it ignored our blacklist), then we reject that executor immediately. - logInfo(s"Rejecting $executorId as it has been blacklisted.") - context.sendFailure(new IllegalStateException(s"Executor is blacklisted: $executorId")) + } else if (scheduler.excludedNodes.contains(hostname) || + isExecutorExcluded(executorId, hostname)) { + // If the cluster manager gives us an executor on an excluded node (because it + // already started allocating those resources before we informed it of our exclusion, + // or if it ignored our exclusion), then we reject that executor immediately. + logInfo(s"Rejecting $executorId as it has been excluded.") + context.sendFailure( + new IllegalStateException(s"Executor is excluded due to failures: $executorId")) } else { // If the executor's rpc env is not listening for incoming connections, `hostPort` // will be null, and the client connection should be used to contact the executor. @@ -852,7 +853,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp final override def killExecutorsOnHost(host: String): Boolean = { logInfo(s"Requesting to kill any and all executors on host ${host}") // A potential race exists if a new executor attempts to register on a host - // that is on the blacklist and is no no longer valid. To avoid this race, + // that is on the exclude list and is no no longer valid. To avoid this race, // all executor registration and killing happens in the event loop. This way, either // an executor will fail to register, or will be killed when all executors on a host // are killed. @@ -884,13 +885,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp protected def currentDelegationTokens: Array[Byte] = delegationTokens.get() /** - * Checks whether the executor is blacklisted. This is called when the executor tries to - * register with the scheduler, and will deny registration if this method returns true. + * Checks whether the executor is excluded due to failure(s). This is called when the executor + * tries to register with the scheduler, and will deny registration if this method returns true. * - * This is in addition to the blacklist kept by the task scheduler, so custom implementations + * This is in addition to the exclude list kept by the task scheduler, so custom implementations * don't need to check there. */ - protected def isBlacklisted(executorId: String, hostname: String): Boolean = false + protected def isExecutorExcluded(executorId: String, hostname: String): Boolean = false // SPARK-27112: We need to ensure that there is ordering of lock acquisition // between TaskSchedulerImpl and CoarseGrainedSchedulerBackend objects in order to fix diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 7ae9117137caa..5b0c1dc389af0 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -283,82 +283,141 @@ private[spark] class AppStatusListener( } } + // Note, the blacklisted functions are left here for backwards compatibility to allow + // new history server to properly read and display older event logs. override def onExecutorBlacklisted(event: SparkListenerExecutorBlacklisted): Unit = { - updateBlackListStatus(event.executorId, true) + updateExecExclusionStatus(event.executorId, true) + } + + override def onExecutorExcluded(event: SparkListenerExecutorExcluded): Unit = { + updateExecExclusionStatus(event.executorId, true) } override def onExecutorBlacklistedForStage( event: SparkListenerExecutorBlacklistedForStage): Unit = { - val now = System.nanoTime() + updateExclusionStatusForStage(event.stageId, event.stageAttemptId, event.executorId) + } - Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage => - setStageBlackListStatus(stage, now, event.executorId) - } - liveExecutors.get(event.executorId).foreach { exec => - addBlackListedStageTo(exec, event.stageId, now) - } + override def onExecutorExcludedForStage( + event: SparkListenerExecutorExcludedForStage): Unit = { + updateExclusionStatusForStage(event.stageId, event.stageAttemptId, event.executorId) } override def onNodeBlacklistedForStage(event: SparkListenerNodeBlacklistedForStage): Unit = { - val now = System.nanoTime() + updateNodeExclusionStatusForStage(event.stageId, event.stageAttemptId, event.hostId) + } - // Implicitly blacklist every available executor for the stage associated with this node - Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage => - val executorIds = liveExecutors.values.filter(_.host == event.hostId).map(_.executorId).toSeq - setStageBlackListStatus(stage, now, executorIds: _*) - } - liveExecutors.values.filter(_.hostname == event.hostId).foreach { exec => - addBlackListedStageTo(exec, event.stageId, now) - } + override def onNodeExcludedForStage(event: SparkListenerNodeExcludedForStage): Unit = { + updateNodeExclusionStatusForStage(event.stageId, event.stageAttemptId, event.hostId) } - private def addBlackListedStageTo(exec: LiveExecutor, stageId: Int, now: Long): Unit = { - exec.blacklistedInStages += stageId + private def addExcludedStageTo(exec: LiveExecutor, stageId: Int, now: Long): Unit = { + exec.excludedInStages += stageId liveUpdate(exec, now) } private def setStageBlackListStatus(stage: LiveStage, now: Long, executorIds: String*): Unit = { executorIds.foreach { executorId => val executorStageSummary = stage.executorSummary(executorId) - executorStageSummary.isBlacklisted = true + executorStageSummary.isExcluded = true maybeUpdate(executorStageSummary, now) } - stage.blackListedExecutors ++= executorIds + stage.excludedExecutors ++= executorIds + maybeUpdate(stage, now) + } + + private def setStageExcludedStatus(stage: LiveStage, now: Long, executorIds: String*): Unit = { + executorIds.foreach { executorId => + val executorStageSummary = stage.executorSummary(executorId) + executorStageSummary.isExcluded = true + maybeUpdate(executorStageSummary, now) + } + stage.excludedExecutors ++= executorIds maybeUpdate(stage, now) } override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = { - updateBlackListStatus(event.executorId, false) + updateExecExclusionStatus(event.executorId, false) + } + + override def onExecutorUnexcluded(event: SparkListenerExecutorUnexcluded): Unit = { + updateExecExclusionStatus(event.executorId, false) } override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = { - updateNodeBlackList(event.hostId, true) + updateNodeExcluded(event.hostId, true) + } + + override def onNodeExcluded(event: SparkListenerNodeExcluded): Unit = { + updateNodeExcluded(event.hostId, true) } override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = { - updateNodeBlackList(event.hostId, false) + updateNodeExcluded(event.hostId, false) + } + + override def onNodeUnexcluded(event: SparkListenerNodeUnexcluded): Unit = { + updateNodeExcluded(event.hostId, false) } - private def updateBlackListStatus(execId: String, blacklisted: Boolean): Unit = { + private def updateNodeExclusionStatusForStage(stageId: Int, stageAttemptId: Int, + hostId: String): Unit = { + val now = System.nanoTime() + + // Implicitly exclude every available executor for the stage associated with this node + Option(liveStages.get((stageId, stageAttemptId))).foreach { stage => + val executorIds = liveExecutors.values.filter(_.host == hostId).map(_.executorId).toSeq + setStageExcludedStatus(stage, now, executorIds: _*) + } + liveExecutors.values.filter(_.hostname == hostId).foreach { exec => + addExcludedStageTo(exec, stageId, now) + } + } + + private def updateExclusionStatusForStage(stageId: Int, stageAttemptId: Int, + execId: String): Unit = { + val now = System.nanoTime() + + Option(liveStages.get((stageId, stageAttemptId))).foreach { stage => + setStageExcludedStatus(stage, now, execId) + } + liveExecutors.get(execId).foreach { exec => + addExcludedStageTo(exec, stageId, now) + } + } + + private def updateExecExclusionStatus(execId: String, excluded: Boolean): Unit = { liveExecutors.get(execId).foreach { exec => - exec.isBlacklisted = blacklisted - if (blacklisted) { + updateExecExclusionStatus(exec, excluded, System.nanoTime()) + } + } + + private def updateExecExclusionStatus(exec: LiveExecutor, excluded: Boolean, now: Long): Unit = { + // Since we are sending both blacklisted and excluded events for backwards compatibility + // we need to protect against double counting so don't increment if already in + // that state. Also protects against executor being excluded and then node being + // separately excluded which could result in this being called twice for same + // executor. + if (exec.isExcluded != excluded) { + if (excluded) { appStatusSource.foreach(_.BLACKLISTED_EXECUTORS.inc()) + appStatusSource.foreach(_.EXCLUDED_EXECUTORS.inc()) } else { appStatusSource.foreach(_.UNBLACKLISTED_EXECUTORS.inc()) + appStatusSource.foreach(_.UNEXCLUDED_EXECUTORS.inc()) } - liveUpdate(exec, System.nanoTime()) + exec.isExcluded = excluded + liveUpdate(exec, now) } } - private def updateNodeBlackList(host: String, blacklisted: Boolean): Unit = { + private def updateNodeExcluded(host: String, excluded: Boolean): Unit = { val now = System.nanoTime() - // Implicitly (un)blacklist every executor associated with the node. + // Implicitly (un)exclude every executor associated with the node. liveExecutors.values.foreach { exec => if (exec.hostname == host) { - exec.isBlacklisted = blacklisted - liveUpdate(exec, now) + updateExecExclusionStatus(exec, excluded, now) } } } @@ -759,10 +818,10 @@ private[spark] class AppStatusListener( update(pool, now) } - val executorIdsForStage = stage.blackListedExecutors + val executorIdsForStage = stage.excludedExecutors executorIdsForStage.foreach { executorId => liveExecutors.get(executorId).foreach { exec => - removeBlackListedStageFrom(exec, event.stageInfo.stageId, now) + removeExcludedStageFrom(exec, event.stageInfo.stageId, now) } } @@ -782,8 +841,8 @@ private[spark] class AppStatusListener( deadExecutors.retain((execId, exec) => isExecutorActiveForLiveStages(exec)) } - private def removeBlackListedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = { - exec.blacklistedInStages -= stageId + private def removeExcludedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = { + exec.excludedInStages -= stageId liveUpdate(exec, now) } diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala index 20f171bd3c375..d19744db089ba 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala @@ -59,9 +59,25 @@ private[spark] class AppStatusSource extends Source { val SKIPPED_TASKS = getCounter("tasks", "skippedTasks") + // This is the count of how many executors have been blacklisted at the application level, + // does not include stage level blacklisting. + // this is private but user visible from metrics so just deprecate + @deprecated("use excludedExecutors instead", "3.1.0") val BLACKLISTED_EXECUTORS = getCounter("tasks", "blackListedExecutors") + // This is the count of how many executors have been unblacklisted at the application level, + // does not include stage level unblacklisting. + @deprecated("use unexcludedExecutors instead", "3.1.0") val UNBLACKLISTED_EXECUTORS = getCounter("tasks", "unblackListedExecutors") + + // This is the count of how many executors have been excluded at the application level, + // does not include stage level exclusion. + val EXCLUDED_EXECUTORS = getCounter("tasks", "excludedExecutors") + + // This is the count of how many executors have been unexcluded at the application level, + // does not include stage level unexclusion. + val UNEXCLUDED_EXECUTORS = getCounter("tasks", "unexcludedExecutors") + } private[spark] object AppStatusSource { diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala index 0fadd330a01ad..38f1f25f2fcaa 100644 --- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala +++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala @@ -286,8 +286,8 @@ private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extend var totalInputBytes = 0L var totalShuffleRead = 0L var totalShuffleWrite = 0L - var isBlacklisted = false - var blacklistedInStages: Set[Int] = TreeSet() + var isExcluded = false + var excludedInStages: Set[Int] = TreeSet() var executorLogs = Map[String, String]() var attributes = Map[String, String]() @@ -334,18 +334,20 @@ private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extend totalInputBytes, totalShuffleRead, totalShuffleWrite, - isBlacklisted, + isExcluded, maxMemory, addTime, Option(removeTime), Option(removeReason), executorLogs, memoryMetrics, - blacklistedInStages, + excludedInStages, Some(peakExecutorMetrics).filter(_.isSet), attributes, resources, - resourceProfileId) + resourceProfileId, + isExcluded, + excludedInStages) new ExecutorSummaryWrapper(info) } } @@ -361,7 +363,7 @@ private class LiveExecutorStageSummary( var succeededTasks = 0 var failedTasks = 0 var killedTasks = 0 - var isBlacklisted = false + var isExcluded = false var metrics = createMetrics(default = 0L) @@ -383,8 +385,9 @@ private class LiveExecutorStageSummary( metrics.shuffleWriteMetrics.recordsWritten, metrics.memoryBytesSpilled, metrics.diskBytesSpilled, - isBlacklisted, - Some(peakExecutorMetrics).filter(_.isSet)) + isExcluded, + Some(peakExecutorMetrics).filter(_.isSet), + isExcluded) new ExecutorStageSummaryWrapper(stageId, attemptId, executorId, info) } @@ -421,7 +424,7 @@ private class LiveStage extends LiveEntity { val activeTasksPerExecutor = new HashMap[String, Int]().withDefaultValue(0) - var blackListedExecutors = new HashSet[String]() + var excludedExecutors = new HashSet[String]() val peakExecutorMetrics = new ExecutorMetrics() diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala index 5a8cf09e1cba6..96f5b7b5cf27e 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala @@ -82,10 +82,12 @@ class ExecutorStageSummary private[spark]( val shuffleWriteRecords : Long, val memoryBytesSpilled : Long, val diskBytesSpilled : Long, + @deprecated("use isExcludedForStage instead", "3.1.0") val isBlacklistedForStage: Boolean, @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer]) @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer]) - val peakMemoryMetrics: Option[ExecutorMetrics]) + val peakMemoryMetrics: Option[ExecutorMetrics], + val isExcludedForStage: Boolean) class ExecutorSummary private[spark]( val id: String, @@ -105,6 +107,7 @@ class ExecutorSummary private[spark]( val totalInputBytes: Long, val totalShuffleRead: Long, val totalShuffleWrite: Long, + @deprecated("use isExcluded instead", "3.1.0") val isBlacklisted: Boolean, val maxMemory: Long, val addTime: Date, @@ -112,13 +115,16 @@ class ExecutorSummary private[spark]( val removeReason: Option[String], val executorLogs: Map[String, String], val memoryMetrics: Option[MemoryMetrics], + @deprecated("use excludedInStages instead", "3.1.0") val blacklistedInStages: Set[Int], @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer]) @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer]) val peakMemoryMetrics: Option[ExecutorMetrics], val attributes: Map[String, String], val resources: Map[String, ResourceInformation], - val resourceProfileId: Int) + val resourceProfileId: Int, + val isExcluded: Boolean, + val excludedInStages: Set[Int]) class MemoryMetrics private[spark]( val usedOnHeapStorageMemory: Long, diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala index aefd001e573f9..a7c42b86468b2 100644 --- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala +++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala @@ -91,9 +91,6 @@ private[spark] object ToolTips { val TASK_TIME = "Shaded red when garbage collection (GC) time is over 10% of task time" - val BLACKLISTED = - "Shows if this executor has been blacklisted by the scheduler due to task failures." - val APPLICATION_EXECUTOR_LIMIT = """Maximum number of executors that this application will use. This limit is finite only when dynamic allocation is enabled. The number of granted executors may exceed the limit diff --git a/core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json similarity index 99% rename from core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json rename to core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json index 0d197eab0e25d..a69940fa5a1a5 100644 --- a/core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json @@ -697,7 +697,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : true + "isBlacklistedForStage" : true, + "isExcludedForStage" : true }, "1" : { "taskTime" : 708, @@ -714,7 +715,8 @@ "shuffleWriteRecords" : 10, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false } }, "killedTasksSummary" : { }, diff --git a/core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json similarity index 98% rename from core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json rename to core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json index 24d73faa45021..bda9caedbbe81 100644 --- a/core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json @@ -805,7 +805,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : true + "isBlacklistedForStage" : true, + "isExcludedForStage" : true }, "5" : { "taskTime" : 1579, @@ -822,7 +823,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : true + "isBlacklistedForStage" : true, + "isExcludedForStage" : true }, "1" : { "taskTime" : 2411, @@ -839,7 +841,8 @@ "shuffleWriteRecords" : 12, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false }, "2" : { "taskTime" : 2446, @@ -856,7 +859,8 @@ "shuffleWriteRecords" : 15, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false }, "3" : { "taskTime" : 1774, @@ -873,7 +877,8 @@ "shuffleWriteRecords" : 3, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : true + "isBlacklistedForStage" : true, + "isExcludedForStage" : true } }, "killedTasksSummary" : { }, diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json index 67425676a62d6..c18a2e31dff3c 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json @@ -23,5 +23,7 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json index d052a27385f66..bf3e93f3d3783 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json @@ -51,7 +51,9 @@ }, "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "3", "hostPort" : "test-3.vpc.company.com:37641", @@ -118,7 +120,9 @@ "CONTAINER_ID" : "container_1553914137147_0018_01_000004" }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "2", "hostPort" : "test-4.vpc.company.com:33179", @@ -185,7 +189,9 @@ "CONTAINER_ID" : "container_1553914137147_0018_01_000003" }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "1", "hostPort" : "test-2.vpc.company.com:43764", @@ -252,5 +258,7 @@ "CONTAINER_ID" : "container_1553914137147_0018_01_000002" }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json index 91574ca8266b2..9adda275b5609 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json @@ -29,7 +29,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "3", "hostPort" : "172.22.0.167:51485", @@ -64,7 +66,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] } ,{ "id" : "2", "hostPort" : "172.22.0.167:51487", @@ -99,7 +103,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "1", "hostPort" : "172.22.0.167:51490", @@ -134,7 +140,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "0", "hostPort" : "172.22.0.167:51491", @@ -169,5 +177,7 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json similarity index 92% rename from core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json rename to core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json index f14b9a5085a42..65bd309c1025e 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json @@ -29,7 +29,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "3", "hostPort" : "172.22.0.167:51485", @@ -64,7 +66,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "2", "hostPort" : "172.22.0.167:51487", @@ -99,7 +103,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "1", "hostPort" : "172.22.0.167:51490", @@ -134,7 +140,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] }, { "id" : "0", "hostPort" : "172.22.0.167:51491", @@ -169,5 +177,7 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : true, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json similarity index 90% rename from core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json rename to core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json index 3645387317ca1..46e8f81d0e245 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json @@ -23,7 +23,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "3", "hostPort" : "172.22.0.111:64543", @@ -52,7 +54,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "2", "hostPort" : "172.22.0.111:64539", @@ -81,7 +85,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "1", "hostPort" : "172.22.0.111:64541", @@ -110,7 +116,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "0", "hostPort" : "172.22.0.111:64540", @@ -139,5 +147,7 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json index 165389cf25027..53ae9a0c7909e 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json @@ -29,7 +29,9 @@ "blacklistedInStages" : [ ], "attributes" : { }, "resources" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "2", "hostPort" : "tomg-test:46005", @@ -79,7 +81,9 @@ "addresses" : [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12" ] } }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] }, { "id" : "1", "hostPort" : "tomg-test:44873", @@ -129,5 +133,7 @@ "addresses" : [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12" ] } }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "isExcluded" : false, + "excludedInStages" : [ ] } ] diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json index 3db7d551b6130..41e54c68858ad 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json @@ -459,7 +459,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false } }, "killedTasksSummary" : { }, diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json index 8ef3769c1ca6b..7a6685a609523 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json @@ -459,7 +459,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false } }, "killedTasksSummary" : { }, diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json index 3b5476ae8b160..066b6a4f884a7 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json @@ -503,7 +503,8 @@ "shuffleWriteRecords" : 0, "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, - "isBlacklistedForStage" : false + "isBlacklistedForStage" : false, + "isExcludedForStage" : false } }, "killedTasksSummary" : { }, diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json index 373510d23058e..20a958073245a 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json @@ -929,7 +929,8 @@ "MinorGCTime" : 0, "MajorGCCount" : 0, "MajorGCTime" : 0 - } + }, + "isExcludedForStage" : false }, "driver" : { "taskTime" : 0, @@ -968,7 +969,8 @@ "MinorGCTime" : 115, "MajorGCCount" : 4, "MajorGCTime" : 339 - } + }, + "isExcludedForStage" : false } }, "killedTasksSummary" : { }, diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 6a38bba5dd0e5..d1edb80e40b21 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -524,7 +524,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { assert(numExecutorsTarget(manager, defaultProfile.id) === 1) assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 1) - // Stage 0 becomes unschedulable due to blacklisting + // Stage 0 becomes unschedulable due to excludeOnFailure post(SparkListenerUnschedulableTaskSetAdded(0, 0)) clock.advance(1000) manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime()) @@ -580,7 +580,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { post(SparkListenerTaskEnd(0, 0, null, Success, t2Info, new ExecutorMetrics, null)) post(SparkListenerStageCompleted(createStageInfo(0, 2))) - // Stage 1 and 2 becomes unschedulable now due to blacklisting + // Stage 1 and 2 becomes unschedulable now due to excludeOnFailure post(SparkListenerUnschedulableTaskSetAdded(1, 0)) post(SparkListenerUnschedulableTaskSetAdded(2, 0)) @@ -637,7 +637,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { (0 to 3).foreach { i => assert(removeExecutorDefaultProfile(manager, i.toString)) } (0 to 3).foreach { i => onExecutorRemoved(manager, i.toString) } - // Now due to blacklisting, the task becomes unschedulable + // Now due to executor being excluded, the task becomes unschedulable post(SparkListenerUnschedulableTaskSetAdded(0, 0)) clock.advance(1000) manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime()) diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala index a2e70b23a3e5d..c9d43f517afba 100644 --- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala +++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala @@ -76,7 +76,7 @@ class HeartbeatReceiverSuite sc = spy(new SparkContext(conf)) scheduler = mock(classOf[TaskSchedulerImpl]) when(sc.taskScheduler).thenReturn(scheduler) - when(scheduler.nodeBlacklist).thenReturn(Predef.Set[String]()) + when(scheduler.excludedNodes).thenReturn(Predef.Set[String]()) when(scheduler.sc).thenReturn(sc) heartbeatReceiverClock = new ManualClock heartbeatReceiver = new HeartbeatReceiver(sc, heartbeatReceiverClock) diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala index c7c3ad27675fa..e1d4eff0a62cb 100644 --- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala @@ -497,19 +497,19 @@ class StandaloneDynamicAllocationSuite } } - test("executor registration on a blacklisted host must fail") { + test("executor registration on a excluded host must fail") { // The context isn't really used by the test, but it helps with creating a test scheduler, // since CoarseGrainedSchedulerBackend makes a lot of calls to the context instance. - sc = new SparkContext(appConf.set(config.BLACKLIST_ENABLED.key, "true")) + sc = new SparkContext(appConf.set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true")) val endpointRef = mock(classOf[RpcEndpointRef]) val mockAddress = mock(classOf[RpcAddress]) when(endpointRef.address).thenReturn(mockAddress) - val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty, + val message = RegisterExecutor("one", endpointRef, "excluded-host", 10, Map.empty, Map.empty, Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) val taskScheduler = mock(classOf[TaskSchedulerImpl]) - when(taskScheduler.nodeBlacklist()).thenReturn(Set("blacklisted-host")) + when(taskScheduler.excludedNodes()).thenReturn(Set("excluded-host")) when(taskScheduler.resourceOffers(any(), any[Boolean])).thenReturn(Nil) when(taskScheduler.sc).thenReturn(sc) diff --git a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala index 2da40dccba53e..5d40a0610eb6c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala @@ -135,6 +135,8 @@ class BasicEventFilterSuite extends SparkFunSuite { SparkListenerStageExecutorMetrics(1.toString, 0, 0, new ExecutorMetrics))) assert(Some(false) === acceptFn(SparkListenerExecutorBlacklisted(0, 1.toString, 1))) assert(Some(false) === acceptFn(SparkListenerExecutorUnblacklisted(0, 1.toString))) + assert(Some(false) === acceptFn(SparkListenerExecutorExcluded(0, 1.toString, 1))) + assert(Some(false) === acceptFn(SparkListenerExecutorUnexcluded(0, 1.toString))) assert(Some(false) === acceptFn(createExecutorRemovedEvent(1))) val bmId = BlockManagerId(1.toString, "host1", 1) assert(Some(false) === acceptFn(SparkListenerBlockManagerAdded(0, bmId, 1))) @@ -148,6 +150,10 @@ class BasicEventFilterSuite extends SparkFunSuite { SparkListenerStageExecutorMetrics(2.toString, 0, 0, new ExecutorMetrics))) assert(Some(true) === acceptFn(SparkListenerExecutorBlacklisted(0, 2.toString, 1))) assert(Some(true) === acceptFn(SparkListenerExecutorUnblacklisted(0, 2.toString))) + assert(None === acceptFn(SparkListenerNodeBlacklisted(0, "host1", 1))) + assert(None === acceptFn(SparkListenerNodeUnblacklisted(0, "host1"))) + assert(Some(true) === acceptFn(SparkListenerExecutorExcluded(0, 2.toString, 1))) + assert(Some(true) === acceptFn(SparkListenerExecutorUnexcluded(0, 2.toString))) assert(Some(true) === acceptFn(createExecutorRemovedEvent(2))) val bmId2 = BlockManagerId(2.toString, "host1", 1) assert(Some(true) === acceptFn(SparkListenerBlockManagerAdded(0, bmId2, 1))) @@ -164,8 +170,8 @@ class BasicEventFilterSuite extends SparkFunSuite { assert(None === acceptFn(SparkListenerEnvironmentUpdate(Map.empty))) assert(None === acceptFn(SparkListenerApplicationStart("1", Some("1"), 0, "user", None))) assert(None === acceptFn(SparkListenerApplicationEnd(1))) - assert(None === acceptFn(SparkListenerNodeBlacklisted(0, "host1", 1))) - assert(None === acceptFn(SparkListenerNodeUnblacklisted(0, "host1"))) + assert(None === acceptFn(SparkListenerNodeExcluded(0, "host1", 1))) + assert(None === acceptFn(SparkListenerNodeUnexcluded(0, "host1"))) assert(None === acceptFn(SparkListenerLogStart("testVersion"))) } diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala index 2a914023ec821..ac39f022d5ca6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala @@ -219,10 +219,10 @@ class EventLogFileCompactorSuite extends SparkFunSuite { override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = { case _: SparkListenerApplicationEnd => true case _: SparkListenerEnvironmentUpdate => true - case _: SparkListenerNodeBlacklisted => true + case _: SparkListenerNodeExcluded => true case _: SparkListenerBlockManagerAdded => false case _: SparkListenerApplicationStart => false - case _: SparkListenerNodeUnblacklisted => false + case _: SparkListenerNodeUnexcluded => false } override def statistics(): Option[EventFilter.FilterStatistics] = None @@ -254,11 +254,11 @@ class EventLogFileCompactorSuite extends SparkFunSuite { // filterApplicationStart: Some(false) & Some(false) => filter out writeEventToWriter(writer, SparkListenerApplicationStart("app", None, 0, "user", None)) - // filterNodeBlacklisted: None & Some(true) => filter in - expectedLines += writeEventToWriter(writer, SparkListenerNodeBlacklisted(0, "host1", 1)) + // filterNodeExcluded: None & Some(true) => filter in + expectedLines += writeEventToWriter(writer, SparkListenerNodeExcluded(0, "host1", 1)) - // filterNodeUnblacklisted: None & Some(false) => filter out - writeEventToWriter(writer, SparkListenerNodeUnblacklisted(0, "host1")) + // filterNodeUnexcluded: None & Some(false) => filter out + writeEventToWriter(writer, SparkListenerNodeUnexcluded(0, "host1")) // other events: None & None => filter in expectedLines += writeEventToWriter(writer, SparkListenerUnpersistRDD(0)) diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index e4c23d3d1b1c3..08b2118065521 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -169,12 +169,13 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers "applications/local-1426533911241/1/stages/0/0/taskList", "stage task list from multi-attempt app json(2)" -> "applications/local-1426533911241/2/stages/0/0/taskList", - "blacklisting for stage" -> "applications/app-20180109111548-0000/stages/0/0", - "blacklisting node for stage" -> "applications/application_1516285256255_0012/stages/0/0", + "excludeOnFailure for stage" -> "applications/app-20180109111548-0000/stages/0/0", + "excludeOnFailure node for stage" -> "applications/application_1516285256255_0012/stages/0/0", "rdd list storage json" -> "applications/local-1422981780767/storage/rdd", - "executor node blacklisting" -> "applications/app-20161116163331-0000/executors", - "executor node blacklisting unblacklisting" -> "applications/app-20161115172038-0000/executors", + "executor node excludeOnFailure" -> "applications/app-20161116163331-0000/executors", + "executor node excludeOnFailure unexcluding" -> + "applications/app-20161115172038-0000/executors", "executor memory usage" -> "applications/app-20161116163331-0000/executors", "executor resource information" -> "applications/application_1555004656427_0144/executors", "multiple resource profiles" -> "applications/application_1578436911597_0052/environment", diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala deleted file mode 100644 index a1671a58f0d9b..0000000000000 --- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala +++ /dev/null @@ -1,608 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.scheduler - -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{never, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.scalatest.BeforeAndAfterEach -import org.scalatestplus.mockito.MockitoSugar - -import org.apache.spark._ -import org.apache.spark.internal.config -import org.apache.spark.util.ManualClock - -class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar - with LocalSparkContext { - - private val clock = new ManualClock(0) - - private var blacklist: BlacklistTracker = _ - private var listenerBusMock: LiveListenerBus = _ - private var scheduler: TaskSchedulerImpl = _ - private var conf: SparkConf = _ - - override def beforeEach(): Unit = { - conf = new SparkConf().setAppName("test").setMaster("local") - .set(config.BLACKLIST_ENABLED.key, "true") - scheduler = mockTaskSchedWithConf(conf) - - clock.setTime(0) - - listenerBusMock = mock[LiveListenerBus] - blacklist = new BlacklistTracker(listenerBusMock, conf, None, clock) - } - - override def afterEach(): Unit = { - if (blacklist != null) { - blacklist = null - } - if (scheduler != null) { - scheduler.stop() - scheduler = null - } - super.afterEach() - } - - // All executors and hosts used in tests should be in this set, so that [[assertEquivalentToSet]] - // works. Its OK if its got extraneous entries - val allExecutorAndHostIds = { - (('A' to 'Z')++ (1 to 100).map(_.toString)) - .flatMap{ suffix => - Seq(s"host$suffix", s"host-$suffix") - } - }.toSet - - /** - * Its easier to write our tests as if we could directly look at the sets of nodes & executors in - * the blacklist. However the api doesn't expose a set, so this is a simple way to test - * something similar, since we know the universe of values that might appear in these sets. - */ - def assertEquivalentToSet(f: String => Boolean, expected: Set[String]): Unit = { - allExecutorAndHostIds.foreach { id => - val actual = f(id) - val exp = expected.contains(id) - assert(actual === exp, raw"""for string "$id" """) - } - } - - def mockTaskSchedWithConf(conf: SparkConf): TaskSchedulerImpl = { - sc = new SparkContext(conf) - val scheduler = mock[TaskSchedulerImpl] - when(scheduler.sc).thenReturn(sc) - when(scheduler.mapOutputTracker).thenReturn( - SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]) - scheduler - } - - def createTaskSetBlacklist(stageId: Int = 0): TaskSetBlacklist = { - new TaskSetBlacklist(listenerBusMock, conf, stageId, stageAttemptId = 0, clock = clock) - } - - test("executors can be blacklisted with only a few failures per stage") { - // For many different stages, executor 1 fails a task, then executor 2 succeeds the task, - // and then the task set is done. Not enough failures to blacklist the executor *within* - // any particular taskset, but we still blacklist the executor overall eventually. - // Also, we intentionally have a mix of task successes and failures -- there are even some - // successes after the executor is blacklisted. The idea here is those tasks get scheduled - // before the executor is blacklisted. We might get successes after blacklisting (because the - // executor might be flaky but not totally broken). But successes should not unblacklist the - // executor. - val failuresUntilBlacklisted = conf.get(config.MAX_FAILURES_PER_EXEC) - var failuresSoFar = 0 - (0 until failuresUntilBlacklisted * 10).foreach { stageId => - val taskSetBlacklist = createTaskSetBlacklist(stageId) - if (stageId % 2 == 0) { - // fail one task in every other taskset - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - failuresSoFar += 1 - } - blacklist.updateBlacklistForSuccessfulTaskSet(stageId, 0, taskSetBlacklist.execToFailures) - assert(failuresSoFar == stageId / 2 + 1) - if (failuresSoFar < failuresUntilBlacklisted) { - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - } else { - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1")) - verify(listenerBusMock).post( - SparkListenerExecutorBlacklisted(0, "1", failuresUntilBlacklisted)) - } - } - } - - // If an executor has many task failures, but the task set ends up failing, it shouldn't be - // counted against the executor. - test("executors aren't blacklisted as a result of tasks in failed task sets") { - val failuresUntilBlacklisted = conf.get(config.MAX_FAILURES_PER_EXEC) - // for many different stages, executor 1 fails a task, and then the taskSet fails. - (0 until failuresUntilBlacklisted * 10).foreach { stage => - val taskSetBlacklist = createTaskSetBlacklist(stage) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - } - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - } - - Seq(true, false).foreach { succeedTaskSet => - val label = if (succeedTaskSet) "success" else "failure" - test(s"stage blacklist updates correctly on stage $label") { - // Within one taskset, an executor fails a few times, so it's blacklisted for the taskset. - // But if the taskset fails, we shouldn't blacklist the executor after the stage. - val taskSetBlacklist = createTaskSetBlacklist(0) - // We trigger enough failures for both the taskset blacklist, and the application blacklist. - val numFailures = math.max(conf.get(config.MAX_FAILURES_PER_EXEC), - conf.get(config.MAX_FAILURES_PER_EXEC_STAGE)) - (0 until numFailures).foreach { index => - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = index, failureReason = "testing") - } - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1")) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - if (succeedTaskSet) { - // The task set succeeded elsewhere, so we should count those failures against our executor, - // and it should be blacklisted for the entire application. - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist.execToFailures) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", numFailures)) - } else { - // The task set failed, so we don't count these failures against the executor for other - // stages. - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - } - } - } - - test("blacklisted executors and nodes get recovered with time") { - val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0) - // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole - // application. - (0 until 4).foreach { partition => - taskSetBlacklist0.updateBlacklistForFailedTask( - "hostA", exec = "1", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures) - assert(blacklist.nodeBlacklist() === Set()) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set()) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4)) - - val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1) - // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole - // application. Since that's the second executor that is blacklisted on the same node, we also - // blacklist that node. - (0 until 4).foreach { partition => - taskSetBlacklist1.updateBlacklistForFailedTask( - "hostA", exec = "2", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist1.execToFailures) - assert(blacklist.nodeBlacklist() === Set("hostA")) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set("hostA")) - verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2)) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 4)) - - // Advance the clock and then make sure hostA and executors 1 and 2 have been removed from the - // blacklist. - val timeout = blacklist.BLACKLIST_TIMEOUT_MILLIS + 1 - clock.advance(timeout) - blacklist.applyBlacklistTimeout() - assert(blacklist.nodeBlacklist() === Set()) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set()) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "2")) - verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "1")) - verify(listenerBusMock).post(SparkListenerNodeUnblacklisted(timeout, "hostA")) - - // Fail one more task, but executor isn't put back into blacklist since the count of failures - // on that executor should have been reset to 0. - val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 2) - taskSetBlacklist2.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - blacklist.updateBlacklistForSuccessfulTaskSet(2, 0, taskSetBlacklist2.execToFailures) - assert(blacklist.nodeBlacklist() === Set()) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set()) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - } - - test("blacklist can handle lost executors") { - // The blacklist should still work if an executor is killed completely. We should still - // be able to blacklist the entire node. - val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0) - // Lets say that executor 1 dies completely. We get some task failures, but - // the taskset then finishes successfully (elsewhere). - (0 until 4).foreach { partition => - taskSetBlacklist0.updateBlacklistForFailedTask( - "hostA", exec = "1", index = partition, failureReason = "testing") - } - blacklist.handleRemovedExecutor("1") - blacklist.updateBlacklistForSuccessfulTaskSet( - stageId = 0, - stageAttemptId = 0, - taskSetBlacklist0.execToFailures) - assert(blacklist.isExecutorBlacklisted("1")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4)) - val t1 = blacklist.BLACKLIST_TIMEOUT_MILLIS / 2 - clock.advance(t1) - - // Now another executor gets spun up on that host, but it also dies. - val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1) - (0 until 4).foreach { partition => - taskSetBlacklist1.updateBlacklistForFailedTask( - "hostA", exec = "2", index = partition, failureReason = "testing") - } - blacklist.handleRemovedExecutor("2") - blacklist.updateBlacklistForSuccessfulTaskSet( - stageId = 1, - stageAttemptId = 0, - taskSetBlacklist1.execToFailures) - // We've now had two bad executors on the hostA, so we should blacklist the entire node. - assert(blacklist.isExecutorBlacklisted("1")) - assert(blacklist.isExecutorBlacklisted("2")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t1, "2", 4)) - assert(blacklist.isNodeBlacklisted("hostA")) - verify(listenerBusMock).post(SparkListenerNodeBlacklisted(t1, "hostA", 2)) - - // Advance the clock so that executor 1 should no longer be explicitly blacklisted, but - // everything else should still be blacklisted. - val t2 = blacklist.BLACKLIST_TIMEOUT_MILLIS / 2 + 1 - clock.advance(t2) - blacklist.applyBlacklistTimeout() - assert(!blacklist.isExecutorBlacklisted("1")) - verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(t1 + t2, "1")) - assert(blacklist.isExecutorBlacklisted("2")) - assert(blacklist.isNodeBlacklisted("hostA")) - // make sure we don't leak memory - assert(!blacklist.executorIdToBlacklistStatus.contains("1")) - assert(!blacklist.nodeToBlacklistedExecs("hostA").contains("1")) - // Advance the timeout again so now hostA should be removed from the blacklist. - clock.advance(t1) - blacklist.applyBlacklistTimeout() - assert(!blacklist.nodeIdToBlacklistExpiryTime.contains("hostA")) - verify(listenerBusMock).post(SparkListenerNodeUnblacklisted(t1 + t2 + t1, "hostA")) - // Even though unblacklisting a node implicitly unblacklists all of its executors, - // there will be no SparkListenerExecutorUnblacklisted sent here. - } - - test("task failures expire with time") { - // Verifies that 2 failures within the timeout period cause an executor to be blacklisted, but - // if task failures are spaced out by more than the timeout period, the first failure is timed - // out, and the executor isn't blacklisted. - var stageId = 0 - - def failOneTaskInTaskSet(exec: String): Unit = { - val taskSetBlacklist = createTaskSetBlacklist(stageId = stageId) - taskSetBlacklist.updateBlacklistForFailedTask("host-" + exec, exec, 0, "testing") - blacklist.updateBlacklistForSuccessfulTaskSet(stageId, 0, taskSetBlacklist.execToFailures) - stageId += 1 - } - - failOneTaskInTaskSet(exec = "1") - // We have one sporadic failure on exec 2, but that's it. Later checks ensure that we never - // blacklist executor 2 despite this one failure. - failOneTaskInTaskSet(exec = "2") - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - assert(blacklist.nextExpiryTime === Long.MaxValue) - - // We advance the clock past the expiry time. - clock.advance(blacklist.BLACKLIST_TIMEOUT_MILLIS + 1) - val t0 = clock.getTimeMillis() - blacklist.applyBlacklistTimeout() - assert(blacklist.nextExpiryTime === Long.MaxValue) - failOneTaskInTaskSet(exec = "1") - - // Because the 2nd failure on executor 1 happened past the expiry time, nothing should have been - // blacklisted. - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - - // Now we add one more failure, within the timeout, and it should be counted. - clock.setTime(t0 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1) - val t1 = clock.getTimeMillis() - failOneTaskInTaskSet(exec = "1") - blacklist.applyBlacklistTimeout() - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t1, "1", 2)) - assert(blacklist.nextExpiryTime === t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - - // Add failures on executor 3, make sure it gets put on the blacklist. - clock.setTime(t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1) - val t2 = clock.getTimeMillis() - failOneTaskInTaskSet(exec = "3") - failOneTaskInTaskSet(exec = "3") - blacklist.applyBlacklistTimeout() - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "3")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t2, "3", 2)) - assert(blacklist.nextExpiryTime === t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - - // Now we go past the timeout for executor 1, so it should be dropped from the blacklist. - clock.setTime(t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS + 1) - blacklist.applyBlacklistTimeout() - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("3")) - verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(clock.getTimeMillis(), "1")) - assert(blacklist.nextExpiryTime === t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - - // Make sure that we update correctly when we go from having blacklisted executors to - // just having tasks with timeouts. - clock.setTime(t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1) - failOneTaskInTaskSet(exec = "4") - blacklist.applyBlacklistTimeout() - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("3")) - assert(blacklist.nextExpiryTime === t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - - clock.setTime(t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS + 1) - blacklist.applyBlacklistTimeout() - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(clock.getTimeMillis(), "3")) - // we've got one task failure still, but we don't bother setting nextExpiryTime to it, to - // avoid wasting time checking for expiry of individual task failures. - assert(blacklist.nextExpiryTime === Long.MaxValue) - } - - test("task failure timeout works as expected for long-running tasksets") { - // This ensures that we don't trigger spurious blacklisting for long tasksets, when the taskset - // finishes long after the task failures. We create two tasksets, each with one failure. - // Individually they shouldn't cause any blacklisting since there is only one failure. - // Furthermore, we space the failures out so far that even when both tasksets have completed, - // we still don't trigger any blacklisting. - val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1) - val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 2) - // Taskset1 has one failure immediately - taskSetBlacklist1.updateBlacklistForFailedTask("host-1", "1", 0, "testing") - // Then we have a *long* delay, much longer than the timeout, before any other failures or - // taskset completion - clock.advance(blacklist.BLACKLIST_TIMEOUT_MILLIS * 5) - // After the long delay, we have one failure on taskset 2, on the same executor - taskSetBlacklist2.updateBlacklistForFailedTask("host-1", "1", 0, "testing") - // Finally, we complete both tasksets. Its important here to complete taskset2 *first*. We - // want to make sure that when taskset 1 finishes, even though we've now got two task failures, - // we realize that the task failure we just added was well before the timeout. - clock.advance(1) - blacklist.updateBlacklistForSuccessfulTaskSet(stageId = 2, 0, taskSetBlacklist2.execToFailures) - clock.advance(1) - blacklist.updateBlacklistForSuccessfulTaskSet(stageId = 1, 0, taskSetBlacklist1.execToFailures) - - // Make sure nothing was blacklisted - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set()) - } - - test("only blacklist nodes for the application when enough executors have failed on that " + - "specific host") { - // we blacklist executors on two different hosts -- make sure that doesn't lead to any - // node blacklisting - val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0) - taskSetBlacklist0.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - taskSetBlacklist0.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 1, failureReason = "testing") - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 2)) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set()) - - val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1) - taskSetBlacklist1.updateBlacklistForFailedTask( - "hostB", exec = "2", index = 0, failureReason = "testing") - taskSetBlacklist1.updateBlacklistForFailedTask( - "hostB", exec = "2", index = 1, failureReason = "testing") - blacklist.updateBlacklistForSuccessfulTaskSet(1, 0, taskSetBlacklist1.execToFailures) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 2)) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set()) - - // Finally, blacklist another executor on the same node as the original blacklisted executor, - // and make sure this time we *do* blacklist the node. - val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 0) - taskSetBlacklist2.updateBlacklistForFailedTask( - "hostA", exec = "3", index = 0, failureReason = "testing") - taskSetBlacklist2.updateBlacklistForFailedTask( - "hostA", exec = "3", index = 1, failureReason = "testing") - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist2.execToFailures) - assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2", "3")) - verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "3", 2)) - assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set("hostA")) - verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2)) - } - - test("blacklist still respects legacy configs") { - val conf = new SparkConf().setMaster("local") - assert(!BlacklistTracker.isBlacklistEnabled(conf)) - conf.set(config.BLACKLIST_LEGACY_TIMEOUT_CONF, 5000L) - assert(BlacklistTracker.isBlacklistEnabled(conf)) - assert(5000 === BlacklistTracker.getBlacklistTimeout(conf)) - // the new conf takes precedence, though - conf.set(config.BLACKLIST_TIMEOUT_CONF, 1000L) - assert(1000 === BlacklistTracker.getBlacklistTimeout(conf)) - - // if you explicitly set the legacy conf to 0, that also would disable blacklisting - conf.set(config.BLACKLIST_LEGACY_TIMEOUT_CONF, 0L) - assert(!BlacklistTracker.isBlacklistEnabled(conf)) - // but again, the new conf takes precedence - conf.set(config.BLACKLIST_ENABLED, true) - assert(BlacklistTracker.isBlacklistEnabled(conf)) - assert(1000 === BlacklistTracker.getBlacklistTimeout(conf)) - } - - test("check blacklist configuration invariants") { - val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster") - Seq( - (2, 2), - (2, 3) - ).foreach { case (maxTaskFailures, maxNodeAttempts) => - conf.set(config.TASK_MAX_FAILURES, maxTaskFailures) - conf.set(config.MAX_TASK_ATTEMPTS_PER_NODE.key, maxNodeAttempts.toString) - val excMsg = intercept[IllegalArgumentException] { - BlacklistTracker.validateBlacklistConfs(conf) - }.getMessage() - assert(excMsg === s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " + - s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " + - s"( = ${maxTaskFailures} ). Though blacklisting is enabled, with this configuration, " + - s"Spark will not be robust to one bad node. Decrease " + - s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " + - s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}") - } - - conf.remove(config.TASK_MAX_FAILURES) - conf.remove(config.MAX_TASK_ATTEMPTS_PER_NODE) - - Seq( - config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, - config.MAX_TASK_ATTEMPTS_PER_NODE, - config.MAX_FAILURES_PER_EXEC_STAGE, - config.MAX_FAILED_EXEC_PER_NODE_STAGE, - config.MAX_FAILURES_PER_EXEC, - config.MAX_FAILED_EXEC_PER_NODE, - config.BLACKLIST_TIMEOUT_CONF - ).foreach { config => - conf.set(config.key, "0") - val excMsg = intercept[IllegalArgumentException] { - BlacklistTracker.validateBlacklistConfs(conf) - }.getMessage() - assert(excMsg.contains(s"${config.key} was 0, but must be > 0.")) - conf.remove(config) - } - } - - test("blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") { - val allocationClientMock = mock[ExecutorAllocationClient] - when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called")) - when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) => - // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist - // is updated before we ask the executor allocation client to kill all the executors - // on a particular host. - if (blacklist.nodeBlacklist.contains("hostA")) { - true - } else { - throw new IllegalStateException("hostA should be on the blacklist") - } - } - blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock) - - // Disable auto-kill. Blacklist an executor and make sure killExecutors is not called. - conf.set(config.BLACKLIST_KILL_ENABLED, false) - - val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0) - // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole - // application. - (0 until 4).foreach { partition => - taskSetBlacklist0.updateBlacklistForFailedTask( - "hostA", exec = "1", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures) - - verify(allocationClientMock, never).killExecutor(any()) - - val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1) - // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole - // application. Since that's the second executor that is blacklisted on the same node, we also - // blacklist that node. - (0 until 4).foreach { partition => - taskSetBlacklist1.updateBlacklistForFailedTask( - "hostA", exec = "2", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist1.execToFailures) - - verify(allocationClientMock, never).killExecutors(any(), any(), any(), any()) - verify(allocationClientMock, never).killExecutorsOnHost(any()) - - // Enable auto-kill. Blacklist an executor and make sure killExecutors is called. - conf.set(config.BLACKLIST_KILL_ENABLED, true) - blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock) - - val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 0) - // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole - // application. - (0 until 4).foreach { partition => - taskSetBlacklist2.updateBlacklistForFailedTask( - "hostA", exec = "1", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist2.execToFailures) - - verify(allocationClientMock).killExecutors(Seq("1"), false, false, true) - - val taskSetBlacklist3 = createTaskSetBlacklist(stageId = 1) - // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole - // application. Since that's the second executor that is blacklisted on the same node, we also - // blacklist that node. - (0 until 4).foreach { partition => - taskSetBlacklist3.updateBlacklistForFailedTask( - "hostA", exec = "2", index = partition, failureReason = "testing") - } - blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist3.execToFailures) - - verify(allocationClientMock).killExecutors(Seq("2"), false, false, true) - verify(allocationClientMock).killExecutorsOnHost("hostA") - } - - test("fetch failure blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") { - val allocationClientMock = mock[ExecutorAllocationClient] - when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called")) - when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) => - // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist - // is updated before we ask the executor allocation client to kill all the executors - // on a particular host. - if (blacklist.nodeBlacklist.contains("hostA")) { - true - } else { - throw new IllegalStateException("hostA should be on the blacklist") - } - } - - conf.set(config.BLACKLIST_FETCH_FAILURE_ENABLED, true) - blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock) - - // Disable auto-kill. Blacklist an executor and make sure killExecutors is not called. - conf.set(config.BLACKLIST_KILL_ENABLED, false) - blacklist.updateBlacklistForFetchFailure("hostA", exec = "1") - - verify(allocationClientMock, never).killExecutors(any(), any(), any(), any()) - verify(allocationClientMock, never).killExecutorsOnHost(any()) - - assert(blacklist.nodeToBlacklistedExecs.contains("hostA")) - assert(blacklist.nodeToBlacklistedExecs("hostA").contains("1")) - - // Enable auto-kill. Blacklist an executor and make sure killExecutors is called. - conf.set(config.BLACKLIST_KILL_ENABLED, true) - blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock) - clock.advance(1000) - blacklist.updateBlacklistForFetchFailure("hostA", exec = "1") - - verify(allocationClientMock).killExecutors(Seq("1"), false, false, true) - verify(allocationClientMock, never).killExecutorsOnHost(any()) - - assert(blacklist.executorIdToBlacklistStatus.contains("1")) - assert(blacklist.executorIdToBlacklistStatus("1").node === "hostA") - assert(blacklist.executorIdToBlacklistStatus("1").expiryTime === - 1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - assert(blacklist.nextExpiryTime === 1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - assert(blacklist.nodeIdToBlacklistExpiryTime.isEmpty) - assert(blacklist.nodeToBlacklistedExecs.contains("hostA")) - assert(blacklist.nodeToBlacklistedExecs("hostA").contains("1")) - - // Enable external shuffle service to see if all the executors on this node will be killed. - conf.set(config.SHUFFLE_SERVICE_ENABLED, true) - clock.advance(1000) - blacklist.updateBlacklistForFetchFailure("hostA", exec = "2") - - verify(allocationClientMock, never).killExecutors(Seq("2"), true, true) - verify(allocationClientMock).killExecutorsOnHost("hostA") - - assert(blacklist.nodeIdToBlacklistExpiryTime.contains("hostA")) - assert(blacklist.nodeIdToBlacklistExpiryTime("hostA") === - 2000 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - assert(blacklist.nextExpiryTime === 1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS) - } -} diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala index d648293fdbe06..47e37fc55cefe 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala @@ -300,7 +300,7 @@ private class CSMockExternalClusterManager extends ExternalClusterManager { when(ts.applicationId()).thenReturn("appid1") when(ts.applicationAttemptId()).thenReturn(Some("attempt1")) when(ts.schedulingMode).thenReturn(SchedulingMode.FIFO) - when(ts.nodeBlacklist()).thenReturn(Set.empty[String]) + when(ts.excludedNodes()).thenReturn(Set.empty[String]) ts } diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala similarity index 86% rename from core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala rename to core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala index 246d4b2f56ec9..29a8f4be8b72b 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala @@ -20,7 +20,7 @@ import org.apache.spark._ import org.apache.spark.internal.config import org.apache.spark.internal.config.Tests._ -class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorMockBackend]{ +class HealthTrackerIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorMockBackend]{ val badHost = "host-0" @@ -40,9 +40,9 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM // Test demonstrating the issue -- without a config change, the scheduler keeps scheduling // according to locality preferences, and so the job fails - testScheduler("If preferred node is bad, without blacklist job will fail", + testScheduler("If preferred node is bad, without excludeOnFailure job will fail", extraConfs = Seq( - config.BLACKLIST_ENABLED.key -> "false" + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "false" )) { val rdd = new MockRDDWithLocalityPrefs(sc, 10, Nil, badHost) withBackend(badHostBackend _) { @@ -55,19 +55,19 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM testScheduler( "With default settings, job can succeed despite multiple bad executors on node", extraConfs = Seq( - config.BLACKLIST_ENABLED.key -> "true", + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true", config.TASK_MAX_FAILURES.key -> "4", TEST_N_HOSTS.key -> "2", TEST_N_EXECUTORS_HOST.key -> "5", TEST_N_CORES_EXECUTOR.key -> "10" ) ) { - // To reliably reproduce the failure that would occur without blacklisting, we have to use 1 + // To reliably reproduce the failure that would occur without exludeOnFailure, we have to use 1 // task. That way, we ensure this 1 task gets rotated through enough bad executors on the host // to fail the taskSet, before we have a bunch of different tasks fail in the executors so we - // blacklist them. - // But the point here is -- without blacklisting, we would never schedule anything on the good - // host-1 before we hit too many failures trying our preferred host-0. + // exclude them. + // But the point here is -- without excludeOnFailure, we would never schedule anything on the + // good host-1 before we hit too many failures trying our preferred host-0. val rdd = new MockRDDWithLocalityPrefs(sc, 1, Nil, badHost) withBackend(badHostBackend _) { val jobFuture = submit(rdd, (0 until 1).toArray) @@ -76,12 +76,12 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM assertDataStructuresEmpty(noFailure = true) } - // Here we run with the blacklist on, and the default config takes care of having this + // Here we run with the excludeOnFailure on, and the default config takes care of having this // robust to one bad node. testScheduler( "Bad node with multiple executors, job will still succeed with the right confs", extraConfs = Seq( - config.BLACKLIST_ENABLED.key -> "true", + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true", // just to avoid this test taking too long config.LOCALITY_WAIT.key -> "10ms" ) @@ -100,7 +100,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM testScheduler( "SPARK-15865 Progress with fewer executors than maxTaskFailures", extraConfs = Seq( - config.BLACKLIST_ENABLED.key -> "true", + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true", TEST_N_HOSTS.key -> "2", TEST_N_EXECUTORS_HOST.key -> "1", TEST_N_CORES_EXECUTOR.key -> "1", @@ -116,7 +116,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM awaitJobTermination(jobFuture, duration) val pattern = ( s"""|Aborting TaskSet 0.0 because task .* - |cannot run anywhere due to node and executor blacklist""".stripMargin).r + |cannot run anywhere due to node and executor excludeOnFailure""".stripMargin).r assert(pattern.findFirstIn(failure.getMessage).isDefined, s"Couldn't find $pattern in ${failure.getMessage()}") } diff --git a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala new file mode 100644 index 0000000000000..7ecc1f51ce236 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala @@ -0,0 +1,615 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito.{never, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.scalatest.BeforeAndAfterEach +import org.scalatestplus.mockito.MockitoSugar + +import org.apache.spark._ +import org.apache.spark.internal.config +import org.apache.spark.util.ManualClock + +class HealthTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar + with LocalSparkContext { + + private val clock = new ManualClock(0) + + private var healthTracker: HealthTracker = _ + private var listenerBusMock: LiveListenerBus = _ + private var scheduler: TaskSchedulerImpl = _ + private var conf: SparkConf = _ + + override def beforeEach(): Unit = { + conf = new SparkConf().setAppName("test").setMaster("local") + .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true") + scheduler = mockTaskSchedWithConf(conf) + + clock.setTime(0) + + listenerBusMock = mock[LiveListenerBus] + healthTracker = new HealthTracker(listenerBusMock, conf, None, clock) + } + + override def afterEach(): Unit = { + if (healthTracker != null) { + healthTracker = null + } + if (scheduler != null) { + scheduler.stop() + scheduler = null + } + super.afterEach() + } + + // All executors and hosts used in tests should be in this set, so that [[assertEquivalentToSet]] + // works. Its OK if its got extraneous entries + val allExecutorAndHostIds = { + (('A' to 'Z')++ (1 to 100).map(_.toString)) + .flatMap{ suffix => + Seq(s"host$suffix", s"host-$suffix") + } + }.toSet + + /** + * Its easier to write our tests as if we could directly look at the sets of nodes & executors in + * the exclude. However the api doesn't expose a set, so this is a simple way to test + * something similar, since we know the universe of values that might appear in these sets. + */ + def assertEquivalentToSet(f: String => Boolean, expected: Set[String]): Unit = { + allExecutorAndHostIds.foreach { id => + val actual = f(id) + val exp = expected.contains(id) + assert(actual === exp, raw"""for string "$id" """) + } + } + + def mockTaskSchedWithConf(conf: SparkConf): TaskSchedulerImpl = { + sc = new SparkContext(conf) + val scheduler = mock[TaskSchedulerImpl] + when(scheduler.sc).thenReturn(sc) + when(scheduler.mapOutputTracker).thenReturn( + SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]) + scheduler + } + + def createTaskSetExcludelist(stageId: Int = 0): TaskSetExcludelist = { + new TaskSetExcludelist(listenerBusMock, conf, stageId, stageAttemptId = 0, clock = clock) + } + + test("executors can be excluded with only a few failures per stage") { + // For many different stages, executor 1 fails a task, then executor 2 succeeds the task, + // and then the task set is done. Not enough failures to exclude the executor *within* + // any particular taskset, but we still exclude the executor overall eventually. + // Also, we intentionally have a mix of task successes and failures -- there are even some + // successes after the executor is excluded. The idea here is those tasks get scheduled + // before the executor is excluded. We might get successes after excluding (because the + // executor might be flaky but not totally broken). But successes should not unexclude the + // executor. + val failuresUntilExcludeed = conf.get(config.MAX_FAILURES_PER_EXEC) + var failuresSoFar = 0 + (0 until failuresUntilExcludeed * 10).foreach { stageId => + val taskSetExclude = createTaskSetExcludelist(stageId) + if (stageId % 2 == 0) { + // fail one task in every other taskset + taskSetExclude.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + failuresSoFar += 1 + } + healthTracker.updateExcludedForSuccessfulTaskSet(stageId, 0, taskSetExclude.execToFailures) + assert(failuresSoFar == stageId / 2 + 1) + if (failuresSoFar < failuresUntilExcludeed) { + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + } else { + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1")) + verify(listenerBusMock).post( + SparkListenerExecutorExcluded(0, "1", failuresUntilExcludeed)) + verify(listenerBusMock).post( + SparkListenerExecutorBlacklisted(0, "1", failuresUntilExcludeed)) + } + } + } + + // If an executor has many task failures, but the task set ends up failing, it shouldn't be + // counted against the executor. + test("executors aren't excluded as a result of tasks in failed task sets") { + val failuresUntilExcludeed = conf.get(config.MAX_FAILURES_PER_EXEC) + // for many different stages, executor 1 fails a task, and then the taskSet fails. + (0 until failuresUntilExcludeed * 10).foreach { stage => + val taskSetExclude = createTaskSetExcludelist(stage) + taskSetExclude.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + } + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + } + + Seq(true, false).foreach { succeedTaskSet => + val label = if (succeedTaskSet) "success" else "failure" + test(s"stage exclude updates correctly on stage $label") { + // Within one taskset, an executor fails a few times, so it's excluded for the taskset. + // But if the taskset fails, we shouldn't exclude the executor after the stage. + val taskSetExclude = createTaskSetExcludelist(0) + // We trigger enough failures for both the taskset exclude, and the application exclude. + val numFailures = math.max(conf.get(config.MAX_FAILURES_PER_EXEC), + conf.get(config.MAX_FAILURES_PER_EXEC_STAGE)) + (0 until numFailures).foreach { index => + taskSetExclude.updateExcludedForFailedTask( + "hostA", exec = "1", index = index, failureReason = "testing") + } + assert(taskSetExclude.isExecutorExcludedForTaskSet("1")) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + if (succeedTaskSet) { + // The task set succeeded elsewhere, so we should count those failures against our executor, + // and it should be excluded for the entire application. + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude.execToFailures) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", numFailures)) + } else { + // The task set failed, so we don't count these failures against the executor for other + // stages. + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + } + } + } + + test("excluded executors and nodes get recovered with time") { + val taskSetExclude0 = createTaskSetExcludelist(stageId = 0) + // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole + // application. + (0 until 4).foreach { partition => + taskSetExclude0.updateExcludedForFailedTask( + "hostA", exec = "1", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures) + assert(healthTracker.excludedNodeList() === Set()) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set()) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 4)) + verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4)) + + val taskSetExclude1 = createTaskSetExcludelist(stageId = 1) + // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole + // application. Since that's the second executor that is excluded on the same node, we also + // exclude that node. + (0 until 4).foreach { partition => + taskSetExclude1.updateExcludedForFailedTask( + "hostA", exec = "2", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude1.execToFailures) + assert(healthTracker.excludedNodeList() === Set("hostA")) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set("hostA")) + verify(listenerBusMock).post(SparkListenerNodeExcluded(0, "hostA", 2)) + verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2)) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "2", 4)) + verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 4)) + + // Advance the clock and then make sure hostA and executors 1 and 2 have been removed from the + // exclude. + val timeout = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1 + clock.advance(timeout) + healthTracker.applyExcludeOnFailureTimeout() + assert(healthTracker.excludedNodeList() === Set()) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set()) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(timeout, "2")) + verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(timeout, "1")) + verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "2")) + verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "1")) + verify(listenerBusMock).post(SparkListenerNodeUnexcluded(timeout, "hostA")) + + // Fail one more task, but executor isn't put back into exclude since the count of failures + // on that executor should have been reset to 0. + val taskSetExclude2 = createTaskSetExcludelist(stageId = 2) + taskSetExclude2.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + healthTracker.updateExcludedForSuccessfulTaskSet(2, 0, taskSetExclude2.execToFailures) + assert(healthTracker.excludedNodeList() === Set()) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set()) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + } + + test("exclude can handle lost executors") { + // The exclude should still work if an executor is killed completely. We should still + // be able to exclude the entire node. + val taskSetExclude0 = createTaskSetExcludelist(stageId = 0) + // Lets say that executor 1 dies completely. We get some task failures, but + // the taskset then finishes successfully (elsewhere). + (0 until 4).foreach { partition => + taskSetExclude0.updateExcludedForFailedTask( + "hostA", exec = "1", index = partition, failureReason = "testing") + } + healthTracker.handleRemovedExecutor("1") + healthTracker.updateExcludedForSuccessfulTaskSet( + stageId = 0, + stageAttemptId = 0, + taskSetExclude0.execToFailures) + assert(healthTracker.isExecutorExcluded("1")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 4)) + val t1 = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS / 2 + clock.advance(t1) + + // Now another executor gets spun up on that host, but it also dies. + val taskSetExclude1 = createTaskSetExcludelist(stageId = 1) + (0 until 4).foreach { partition => + taskSetExclude1.updateExcludedForFailedTask( + "hostA", exec = "2", index = partition, failureReason = "testing") + } + healthTracker.handleRemovedExecutor("2") + healthTracker.updateExcludedForSuccessfulTaskSet( + stageId = 1, + stageAttemptId = 0, + taskSetExclude1.execToFailures) + // We've now had two bad executors on the hostA, so we should exclude the entire node. + assert(healthTracker.isExecutorExcluded("1")) + assert(healthTracker.isExecutorExcluded("2")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(t1, "2", 4)) + assert(healthTracker.isNodeExcluded("hostA")) + verify(listenerBusMock).post(SparkListenerNodeExcluded(t1, "hostA", 2)) + + // Advance the clock so that executor 1 should no longer be explicitly excluded, but + // everything else should still be excluded. + val t2 = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS / 2 + 1 + clock.advance(t2) + healthTracker.applyExcludeOnFailureTimeout() + assert(!healthTracker.isExecutorExcluded("1")) + verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(t1 + t2, "1")) + assert(healthTracker.isExecutorExcluded("2")) + assert(healthTracker.isNodeExcluded("hostA")) + // make sure we don't leak memory + assert(!healthTracker.executorIdToExcludedStatus.contains("1")) + assert(!healthTracker.nodeToExcludedExecs("hostA").contains("1")) + // Advance the timeout again so now hostA should be removed from the exclude. + clock.advance(t1) + healthTracker.applyExcludeOnFailureTimeout() + assert(!healthTracker.nodeIdToExcludedExpiryTime.contains("hostA")) + verify(listenerBusMock).post(SparkListenerNodeUnexcluded(t1 + t2 + t1, "hostA")) + // Even though unexcluding a node implicitly unexcludes all of its executors, + // there will be no SparkListenerExecutorUnexcluded sent here. + } + + test("task failures expire with time") { + // Verifies that 2 failures within the timeout period cause an executor to be excluded, but + // if task failures are spaced out by more than the timeout period, the first failure is timed + // out, and the executor isn't excluded. + var stageId = 0 + + def failOneTaskInTaskSet(exec: String): Unit = { + val taskSetExclude = createTaskSetExcludelist(stageId = stageId) + taskSetExclude.updateExcludedForFailedTask("host-" + exec, exec, 0, "testing") + healthTracker.updateExcludedForSuccessfulTaskSet(stageId, 0, taskSetExclude.execToFailures) + stageId += 1 + } + + failOneTaskInTaskSet(exec = "1") + // We have one sporadic failure on exec 2, but that's it. Later checks ensure that we never + // exclude executor 2 despite this one failure. + failOneTaskInTaskSet(exec = "2") + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + assert(healthTracker.nextExpiryTime === Long.MaxValue) + + // We advance the clock past the expiry time. + clock.advance(healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1) + val t0 = clock.getTimeMillis() + healthTracker.applyExcludeOnFailureTimeout() + assert(healthTracker.nextExpiryTime === Long.MaxValue) + failOneTaskInTaskSet(exec = "1") + + // Because the 2nd failure on executor 1 happened past the expiry time, nothing should have been + // excluded. + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + + // Now we add one more failure, within the timeout, and it should be counted. + clock.setTime(t0 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1) + val t1 = clock.getTimeMillis() + failOneTaskInTaskSet(exec = "1") + healthTracker.applyExcludeOnFailureTimeout() + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(t1, "1", 2)) + assert(healthTracker.nextExpiryTime === t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + + // Add failures on executor 3, make sure it gets put on the exclude. + clock.setTime(t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1) + val t2 = clock.getTimeMillis() + failOneTaskInTaskSet(exec = "3") + failOneTaskInTaskSet(exec = "3") + healthTracker.applyExcludeOnFailureTimeout() + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "3")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(t2, "3", 2)) + assert(healthTracker.nextExpiryTime === t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + + // Now we go past the timeout for executor 1, so it should be dropped from the exclude. + clock.setTime(t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1) + healthTracker.applyExcludeOnFailureTimeout() + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("3")) + verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(clock.getTimeMillis(), "1")) + assert(healthTracker.nextExpiryTime === t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + + // Make sure that we update correctly when we go from having excluded executors to + // just having tasks with timeouts. + clock.setTime(t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1) + failOneTaskInTaskSet(exec = "4") + healthTracker.applyExcludeOnFailureTimeout() + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("3")) + assert(healthTracker.nextExpiryTime === t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + + clock.setTime(t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1) + healthTracker.applyExcludeOnFailureTimeout() + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(clock.getTimeMillis(), "3")) + // we've got one task failure still, but we don't bother setting nextExpiryTime to it, to + // avoid wasting time checking for expiry of individual task failures. + assert(healthTracker.nextExpiryTime === Long.MaxValue) + } + + test("task failure timeout works as expected for long-running tasksets") { + // This ensures that we don't trigger spurious excluding for long tasksets, when the taskset + // finishes long after the task failures. We create two tasksets, each with one failure. + // Individually they shouldn't cause any excluding since there is only one failure. + // Furthermore, we space the failures out so far that even when both tasksets have completed, + // we still don't trigger any excluding. + val taskSetExclude1 = createTaskSetExcludelist(stageId = 1) + val taskSetExclude2 = createTaskSetExcludelist(stageId = 2) + // Taskset1 has one failure immediately + taskSetExclude1.updateExcludedForFailedTask("host-1", "1", 0, "testing") + // Then we have a *long* delay, much longer than the timeout, before any other failures or + // taskset completion + clock.advance(healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS * 5) + // After the long delay, we have one failure on taskset 2, on the same executor + taskSetExclude2.updateExcludedForFailedTask("host-1", "1", 0, "testing") + // Finally, we complete both tasksets. Its important here to complete taskset2 *first*. We + // want to make sure that when taskset 1 finishes, even though we've now got two task failures, + // we realize that the task failure we just added was well before the timeout. + clock.advance(1) + healthTracker.updateExcludedForSuccessfulTaskSet(stageId = 2, 0, taskSetExclude2.execToFailures) + clock.advance(1) + healthTracker.updateExcludedForSuccessfulTaskSet(stageId = 1, 0, taskSetExclude1.execToFailures) + + // Make sure nothing was excluded + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set()) + } + + test("only exclude nodes for the application when enough executors have failed on that " + + "specific host") { + // we exclude executors on two different hosts -- make sure that doesn't lead to any + // node excluding + val taskSetExclude0 = createTaskSetExcludelist(stageId = 0) + taskSetExclude0.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + taskSetExclude0.updateExcludedForFailedTask( + "hostA", exec = "1", index = 1, failureReason = "testing") + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 2)) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set()) + + val taskSetExclude1 = createTaskSetExcludelist(stageId = 1) + taskSetExclude1.updateExcludedForFailedTask( + "hostB", exec = "2", index = 0, failureReason = "testing") + taskSetExclude1.updateExcludedForFailedTask( + "hostB", exec = "2", index = 1, failureReason = "testing") + healthTracker.updateExcludedForSuccessfulTaskSet(1, 0, taskSetExclude1.execToFailures) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "2", 2)) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set()) + + // Finally, exclude another executor on the same node as the original excluded executor, + // and make sure this time we *do* exclude the node. + val taskSetExclude2 = createTaskSetExcludelist(stageId = 0) + taskSetExclude2.updateExcludedForFailedTask( + "hostA", exec = "3", index = 0, failureReason = "testing") + taskSetExclude2.updateExcludedForFailedTask( + "hostA", exec = "3", index = 1, failureReason = "testing") + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude2.execToFailures) + assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2", "3")) + verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "3", 2)) + assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set("hostA")) + verify(listenerBusMock).post(SparkListenerNodeExcluded(0, "hostA", 2)) + } + + test("exclude still respects legacy configs") { + val conf = new SparkConf().setMaster("local") + assert(!HealthTracker.isExcludeOnFailureEnabled(conf)) + conf.set(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF, 5000L) + assert(HealthTracker.isExcludeOnFailureEnabled(conf)) + assert(5000 === HealthTracker.getExludeOnFailureTimeout(conf)) + // the new conf takes precedence, though + conf.set(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF, 1000L) + assert(1000 === HealthTracker.getExludeOnFailureTimeout(conf)) + + // if you explicitly set the legacy conf to 0, that also would disable excluding + conf.set(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF, 0L) + assert(!HealthTracker.isExcludeOnFailureEnabled(conf)) + // but again, the new conf takes precedence + conf.set(config.EXCLUDE_ON_FAILURE_ENABLED, true) + assert(HealthTracker.isExcludeOnFailureEnabled(conf)) + assert(1000 === HealthTracker.getExludeOnFailureTimeout(conf)) + } + + test("check exclude configuration invariants") { + val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster") + Seq( + (2, 2), + (2, 3) + ).foreach { case (maxTaskFailures, maxNodeAttempts) => + conf.set(config.TASK_MAX_FAILURES, maxTaskFailures) + conf.set(config.MAX_TASK_ATTEMPTS_PER_NODE.key, maxNodeAttempts.toString) + val excMsg = intercept[IllegalArgumentException] { + HealthTracker.validateExcludeOnFailureConfs(conf) + }.getMessage() + assert(excMsg === s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " + + s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " + + s"( = ${maxTaskFailures} ). Though excludeOnFailure is enabled, with this " + + s"configuration, Spark will not be robust to one bad node. Decrease " + + s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " + + s"or disable excludeOnFailure with ${config.EXCLUDE_ON_FAILURE_ENABLED.key}") + } + + conf.remove(config.TASK_MAX_FAILURES) + conf.remove(config.MAX_TASK_ATTEMPTS_PER_NODE) + + Seq( + config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, + config.MAX_TASK_ATTEMPTS_PER_NODE, + config.MAX_FAILURES_PER_EXEC_STAGE, + config.MAX_FAILED_EXEC_PER_NODE_STAGE, + config.MAX_FAILURES_PER_EXEC, + config.MAX_FAILED_EXEC_PER_NODE, + config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF + ).foreach { config => + conf.set(config.key, "0") + val excMsg = intercept[IllegalArgumentException] { + HealthTracker.validateExcludeOnFailureConfs(conf) + }.getMessage() + assert(excMsg.contains(s"${config.key} was 0, but must be > 0.")) + conf.remove(config) + } + } + + test("excluding kills executors, configured by EXCLUDE_ON_FAILURE_KILL_ENABLED") { + val allocationClientMock = mock[ExecutorAllocationClient] + when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called")) + when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) => + // To avoid a race between excluding and killing, it is important that the nodeExclude + // is updated before we ask the executor allocation client to kill all the executors + // on a particular host. + if (healthTracker.excludedNodeList().contains("hostA")) { + true + } else { + throw new IllegalStateException("hostA should be on the exclude") + } + } + healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock) + + // Disable auto-kill. Exclude an executor and make sure killExecutors is not called. + conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, false) + + val taskSetExclude0 = createTaskSetExcludelist(stageId = 0) + // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole + // application. + (0 until 4).foreach { partition => + taskSetExclude0.updateExcludedForFailedTask( + "hostA", exec = "1", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures) + + verify(allocationClientMock, never).killExecutor(any()) + + val taskSetExclude1 = createTaskSetExcludelist(stageId = 1) + // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole + // application. Since that's the second executor that is excluded on the same node, we also + // exclude that node. + (0 until 4).foreach { partition => + taskSetExclude1.updateExcludedForFailedTask( + "hostA", exec = "2", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude1.execToFailures) + + verify(allocationClientMock, never).killExecutors(any(), any(), any(), any()) + verify(allocationClientMock, never).killExecutorsOnHost(any()) + + // Enable auto-kill. Exclude an executor and make sure killExecutors is called. + conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, true) + healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock) + + val taskSetExclude2 = createTaskSetExcludelist(stageId = 0) + // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole + // application. + (0 until 4).foreach { partition => + taskSetExclude2.updateExcludedForFailedTask( + "hostA", exec = "1", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude2.execToFailures) + + verify(allocationClientMock).killExecutors(Seq("1"), false, false, true) + + val taskSetExclude3 = createTaskSetExcludelist(stageId = 1) + // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole + // application. Since that's the second executor that is excluded on the same node, we also + // exclude that node. + (0 until 4).foreach { partition => + taskSetExclude3.updateExcludedForFailedTask( + "hostA", exec = "2", index = partition, failureReason = "testing") + } + healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude3.execToFailures) + + verify(allocationClientMock).killExecutors(Seq("2"), false, false, true) + verify(allocationClientMock).killExecutorsOnHost("hostA") + } + + test("fetch failure excluding kills executors, configured by EXCLUDE_ON_FAILURE_KILL_ENABLED") { + val allocationClientMock = mock[ExecutorAllocationClient] + when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called")) + when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) => + // To avoid a race between excluding and killing, it is important that the nodeExclude + // is updated before we ask the executor allocation client to kill all the executors + // on a particular host. + if (healthTracker.excludedNodeList().contains("hostA")) { + true + } else { + throw new IllegalStateException("hostA should be on the exclude") + } + } + + conf.set(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED, true) + healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock) + + // Disable auto-kill. Exclude an executor and make sure killExecutors is not called. + conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, false) + healthTracker.updateExcludedForFetchFailure("hostA", exec = "1") + + verify(allocationClientMock, never).killExecutors(any(), any(), any(), any()) + verify(allocationClientMock, never).killExecutorsOnHost(any()) + + assert(healthTracker.nodeToExcludedExecs.contains("hostA")) + assert(healthTracker.nodeToExcludedExecs("hostA").contains("1")) + + // Enable auto-kill. Exclude an executor and make sure killExecutors is called. + conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, true) + healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock) + clock.advance(1000) + healthTracker.updateExcludedForFetchFailure("hostA", exec = "1") + + verify(allocationClientMock).killExecutors(Seq("1"), false, false, true) + verify(allocationClientMock, never).killExecutorsOnHost(any()) + + assert(healthTracker.executorIdToExcludedStatus.contains("1")) + assert(healthTracker.executorIdToExcludedStatus("1").node === "hostA") + assert(healthTracker.executorIdToExcludedStatus("1").expiryTime === + 1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + assert(healthTracker.nextExpiryTime === 1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + assert(healthTracker.nodeIdToExcludedExpiryTime.isEmpty) + assert(healthTracker.nodeToExcludedExecs.contains("hostA")) + assert(healthTracker.nodeToExcludedExecs("hostA").contains("1")) + + // Enable external shuffle service to see if all the executors on this node will be killed. + conf.set(config.SHUFFLE_SERVICE_ENABLED, true) + clock.advance(1000) + healthTracker.updateExcludedForFetchFailure("hostA", exec = "2") + + verify(allocationClientMock, never).killExecutors(Seq("2"), true, true) + verify(allocationClientMock).killExecutorsOnHost("hostA") + + assert(healthTracker.nodeIdToExcludedExpiryTime.contains("hostA")) + assert(healthTracker.nodeIdToExcludedExpiryTime("hostA") === + 2000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + assert(healthTracker.nextExpiryTime === 1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS) + } +} diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index f29eb70eb3628..0c60c42c054cf 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -51,11 +51,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B var failedTaskSetReason: String = null var failedTaskSet = false - var blacklist: BlacklistTracker = null + var healthTracker: HealthTracker = null var taskScheduler: TaskSchedulerImpl = null var dagScheduler: DAGScheduler = null - val stageToMockTaskSetBlacklist = new HashMap[Int, TaskSetBlacklist]() + val stageToMockTaskSetExcludelist = new HashMap[Int, TaskSetExcludelist]() val stageToMockTaskSetManager = new HashMap[Int, TaskSetManager]() override def beforeEach(): Unit = { @@ -63,7 +63,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B failedTaskSet = false failedTaskSetException = None failedTaskSetReason = null - stageToMockTaskSetBlacklist.clear() + stageToMockTaskSetExcludelist.clear() stageToMockTaskSetManager.clear() } @@ -95,10 +95,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B setupHelper() } - def setupSchedulerWithMockTaskSetBlacklist(confs: (String, String)*): TaskSchedulerImpl = { - blacklist = mock[BlacklistTracker] + def setupSchedulerWithMockTaskSetExcludelist(confs: (String, String)*): TaskSchedulerImpl = { + healthTracker = mock[HealthTracker] val conf = new SparkConf().setMaster("local").setAppName("TaskSchedulerImplSuite") - conf.set(config.BLACKLIST_ENABLED, true) + conf.set(config.EXCLUDE_ON_FAILURE_ENABLED, true) confs.foreach { case (k, v) => conf.set(k, v) } sc = new SparkContext(conf) @@ -106,16 +106,16 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B new TaskSchedulerImpl(sc, sc.conf.get(config.TASK_MAX_FAILURES)) { override def createTaskSetManager(taskSet: TaskSet, maxFailures: Int): TaskSetManager = { val tsm = super.createTaskSetManager(taskSet, maxFailures) - // we need to create a spied tsm just so we can set the TaskSetBlacklist + // we need to create a spied tsm just so we can set the TaskSetExcludelist val tsmSpy = spy(tsm) - val taskSetBlacklist = mock[TaskSetBlacklist] - when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(taskSetBlacklist)) + val taskSetExcludelist = mock[TaskSetExcludelist] + when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(taskSetExcludelist)) stageToMockTaskSetManager(taskSet.stageId) = tsmSpy - stageToMockTaskSetBlacklist(taskSet.stageId) = taskSetBlacklist + stageToMockTaskSetExcludelist(taskSet.stageId) = taskSetExcludelist tsmSpy } - override private[scheduler] lazy val blacklistTrackerOpt = Some(blacklist) + override private[scheduler] lazy val healthTrackerOpt = Some(healthTracker) } setupHelper() } @@ -230,7 +230,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B sc.conf.get(config.TASK_MAX_FAILURES), clock = clock) { override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { - new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock) + new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock) } override def shuffleOffers(offers: IndexedSeq[WorkerOffer]): IndexedSeq[WorkerOffer] = { // Don't shuffle the offers around for this test. Instead, we'll just pass in all @@ -678,22 +678,22 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!failedTaskSet) } - test("scheduled tasks obey task and stage blacklists") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist() + test("scheduled tasks obey task and stage excludelist") { + taskScheduler = setupSchedulerWithMockTaskSetExcludelist() (0 to 2).foreach {stageId => val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = stageId, stageAttemptId = 0) taskScheduler.submitTasks(taskSet) } - // Setup our mock blacklist: - // * stage 0 is blacklisted on node "host1" - // * stage 1 is blacklisted on executor "executor3" - // * stage 0, partition 0 is blacklisted on executor 0 - // (mocked methods default to returning false, ie. no blacklisting) - when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet("host1")).thenReturn(true) - when(stageToMockTaskSetBlacklist(1).isExecutorBlacklistedForTaskSet("executor3")) + // Setup our mock excludelist: + // * stage 0 is excluded on node "host1" + // * stage 1 is excluded on executor "executor3" + // * stage 0, partition 0 is excluded on executor 0 + // (mocked methods default to returning false, ie. no excluding) + when(stageToMockTaskSetExcludelist(0).isNodeExcludedForTaskSet("host1")).thenReturn(true) + when(stageToMockTaskSetExcludelist(1).isExecutorExcludedForTaskSet("executor3")) .thenReturn(true) - when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTask("executor0", 0)) + when(stageToMockTaskSetExcludelist(0).isExecutorExcludedForTask("executor0", 0)) .thenReturn(true) val offers = IndexedSeq( @@ -705,21 +705,21 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten // We should schedule all tasks. assert(firstTaskAttempts.size === 6) - // Whenever we schedule a task, we must consult the node and executor blacklist. (The test + // Whenever we schedule a task, we must consult the node and executor excludelist. (The test // doesn't check exactly what checks are made because the offers get shuffled.) (0 to 2).foreach { stageId => - verify(stageToMockTaskSetBlacklist(stageId), atLeast(1)) - .isNodeBlacklistedForTaskSet(anyString()) - verify(stageToMockTaskSetBlacklist(stageId), atLeast(1)) - .isExecutorBlacklistedForTaskSet(anyString()) + verify(stageToMockTaskSetExcludelist(stageId), atLeast(1)) + .isNodeExcludedForTaskSet(anyString()) + verify(stageToMockTaskSetExcludelist(stageId), atLeast(1)) + .isExecutorExcludedForTaskSet(anyString()) } def tasksForStage(stageId: Int): Seq[TaskDescription] = { firstTaskAttempts.filter{_.name.contains(s"stage $stageId")} } tasksForStage(0).foreach { task => - // executors 1 & 2 blacklisted for node - // executor 0 blacklisted just for partition 0 + // executors 1 & 2 excluded for node + // executor 0 excluded just for partition 0 if (task.index == 0) { assert(task.executorId === "executor3") } else { @@ -727,12 +727,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B } } tasksForStage(1).foreach { task => - // executor 3 blacklisted + // executor 3 excluded assert("executor3" != task.executorId) } // no restrictions on stage 2 - // Finally, just make sure that we can still complete tasks as usual with blacklisting + // Finally, just make sure that we can still complete tasks as usual with exclusion // in effect. Finish each of the tasksets -- taskset 0 & 1 complete successfully, taskset 2 // fails. (0 to 2).foreach { stageId => @@ -770,23 +770,23 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B } // the tasksSets complete, so the tracker should be notified of the successful ones - verify(blacklist, times(1)).updateBlacklistForSuccessfulTaskSet( + verify(healthTracker, times(1)).updateExcludedForSuccessfulTaskSet( stageId = 0, stageAttemptId = 0, - failuresByExec = stageToMockTaskSetBlacklist(0).execToFailures) - verify(blacklist, times(1)).updateBlacklistForSuccessfulTaskSet( + failuresByExec = stageToMockTaskSetExcludelist(0).execToFailures) + verify(healthTracker, times(1)).updateExcludedForSuccessfulTaskSet( stageId = 1, stageAttemptId = 0, - failuresByExec = stageToMockTaskSetBlacklist(1).execToFailures) + failuresByExec = stageToMockTaskSetExcludelist(1).execToFailures) // but we shouldn't update for the failed taskset - verify(blacklist, never).updateBlacklistForSuccessfulTaskSet( + verify(healthTracker, never).updateExcludedForSuccessfulTaskSet( stageId = meq(2), stageAttemptId = anyInt(), failuresByExec = any()) } - test("scheduled tasks obey node and executor blacklists") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist() + test("scheduled tasks obey node and executor excludelists") { + taskScheduler = setupSchedulerWithMockTaskSetExcludelist() (0 to 2).foreach { stageId => val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = stageId, stageAttemptId = 0) taskScheduler.submitTasks(taskSet) @@ -800,13 +800,13 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B new WorkerOffer("executor4", "host3", 1) ) - // setup our mock blacklist: - // host1, executor0 & executor3 are completely blacklisted + // setup our mock excludelist: + // host1, executor0 & executor3 are completely excluded // This covers everything *except* one core on executor4 / host3, so that everything is still // schedulable. - when(blacklist.isNodeBlacklisted("host1")).thenReturn(true) - when(blacklist.isExecutorBlacklisted("executor0")).thenReturn(true) - when(blacklist.isExecutorBlacklisted("executor3")).thenReturn(true) + when(healthTracker.isNodeExcluded("host1")).thenReturn(true) + when(healthTracker.isExecutorExcluded("executor0")).thenReturn(true) + when(healthTracker.isExecutorExcluded("executor3")).thenReturn(true) val stageToTsm = (0 to 2).map { stageId => val tsm = taskScheduler.taskSetManagerForAttempt(stageId, 0).get @@ -818,12 +818,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(firstTaskAttempts.size === 1) assert(firstTaskAttempts.head.executorId === "executor4") ('0' until '2').foreach { hostNum => - verify(blacklist, atLeast(1)).isNodeBlacklisted("host" + hostNum) + verify(healthTracker, atLeast(1)).isNodeExcluded("host" + hostNum) } } - test("abort stage when all executors are blacklisted and we cannot acquire new executor") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist() + test("abort stage when all executors are excluded and we cannot acquire new executor") { + taskScheduler = setupSchedulerWithMockTaskSetExcludelist() val taskSet = FakeTask.createTaskSet(numTasks = 10) taskScheduler.submitTasks(taskSet) val tsm = stageToMockTaskSetManager(0) @@ -836,11 +836,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B WorkerOffer("executor3", "host1", 2) )) - // now say our blacklist updates to blacklist a bunch of resources, but *not* everything - when(blacklist.isNodeBlacklisted("host1")).thenReturn(true) - when(blacklist.isExecutorBlacklisted("executor0")).thenReturn(true) + // now say our health tracker updates to exclude a bunch of resources, but *not* everything + when(healthTracker.isNodeExcluded("host1")).thenReturn(true) + when(healthTracker.isExecutorExcluded("executor0")).thenReturn(true) - // make an offer on the blacklisted resources. We won't schedule anything, but also won't + // make an offer on the excluded resources. We won't schedule anything, but also won't // abort yet, since we know of other resources that work assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 2), @@ -848,9 +848,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B )).flatten.size === 0) assert(!tsm.isZombie) - // now update the blacklist so that everything really is blacklisted - when(blacklist.isExecutorBlacklisted("executor1")).thenReturn(true) - when(blacklist.isExecutorBlacklisted("executor2")).thenReturn(true) + // now update the health tracker so that everything really is excluded + when(healthTracker.isExecutorExcluded("executor1")).thenReturn(true) + when(healthTracker.isExecutorExcluded("executor2")).thenReturn(true) assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 2), WorkerOffer("executor3", "host1", 2) @@ -859,10 +859,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B verify(tsm).abort(anyString(), any()) } - test("SPARK-22148 abort timer should kick in when task is completely blacklisted & no new " + + test("SPARK-22148 abort timer should kick in when task is completely excluded & no new " + "executor can be acquired") { // set the abort timer to fail immediately - taskScheduler = setupSchedulerWithMockTaskSetBlacklist( + taskScheduler = setupSchedulerWithMockTaskSetExcludelist( config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0") // We have only 1 task remaining with 1 executor @@ -878,10 +878,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // Fail the running task val failedTask = firstTaskAttempts.find(_.executorId == "executor0").get failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm) - when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", failedTask.index)).thenReturn(true) - // make an offer on the blacklisted executor. We won't schedule anything, and set the abort + // make an offer on the excluded executor. We won't schedule anything, and set the abort // timer to kick in immediately assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 1) @@ -894,7 +894,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B } test("SPARK-22148 try to acquire a new executor when task is unschedulable with 1 executor") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist( + taskScheduler = setupSchedulerWithMockTaskSetExcludelist( config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "10") // We have only 1 task remaining with 1 executor @@ -910,11 +910,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // Fail the running task val failedTask = firstTaskAttempts.head failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm) - when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", failedTask.index)).thenReturn(true) - // make an offer on the blacklisted executor. We won't schedule anything, and set the abort - // timer to expire if no new executors could be acquired. We kill the existing idle blacklisted + // make an offer on the excluded executor. We won't schedule anything, and set the abort + // timer to expire if no new executors could be acquired. We kill the existing idle excluded // executor and try to acquire a new one. assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 1) @@ -930,12 +930,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!tsm.isZombie) } - // This is to test a scenario where we have two taskSets completely blacklisted and on acquiring + // This is to test a scenario where we have two taskSets completely excluded and on acquiring // a new executor we don't want the abort timer for the second taskSet to expire and abort the job test("SPARK-22148 abort timer should clear unschedulableTaskSetToExpiryTime for all TaskSets") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist() + taskScheduler = setupSchedulerWithMockTaskSetExcludelist() - // We have 2 taskSets with 1 task remaining in each with 1 executor completely blacklisted + // We have 2 taskSets with 1 task remaining in each with 1 executor completely excluded val taskSet1 = FakeTask.createTaskSet(numTasks = 1, stageId = 0, stageAttemptId = 0) taskScheduler.submitTasks(taskSet1) val taskSet2 = FakeTask.createTaskSet(numTasks = 1, stageId = 1, stageAttemptId = 0) @@ -952,7 +952,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // Fail the running task val failedTask = firstTaskAttempts.head failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm) - when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", failedTask.index)).thenReturn(true) // make an offer. We will schedule the task from the second taskSet. Since a task was scheduled @@ -966,10 +966,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B val tsm2 = stageToMockTaskSetManager(1) val failedTask2 = secondTaskAttempts.head failTask(failedTask2.taskId, TaskState.FAILED, UnknownReason, tsm2) - when(tsm2.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm2.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", failedTask2.index)).thenReturn(true) - // make an offer on the blacklisted executor. We won't schedule anything, and set the abort + // make an offer on the excluded executor. We won't schedule anything, and set the abort // timer for taskSet1 and taskSet2 assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 1) @@ -991,9 +991,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // this test is to check that we don't abort a taskSet which is not being scheduled on other // executors as it is waiting on locality timeout and not being aborted because it is still not - // completely blacklisted. - test("SPARK-22148 Ensure we don't abort the taskSet if we haven't been completely blacklisted") { - taskScheduler = setupSchedulerWithMockTaskSetBlacklist( + // completely excluded. + test("SPARK-22148 Ensure we don't abort the taskSet if we haven't been completely excluded") { + taskScheduler = setupSchedulerWithMockTaskSetExcludelist( config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0", // This is to avoid any potential flakiness in the test because of large pauses in jenkins config.LOCALITY_WAIT.key -> "30s" @@ -1014,7 +1014,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // Fail the running task val failedTask = taskAttempts.head failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm) - when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", failedTask.index)).thenReturn(true) // make an offer but we won't schedule anything yet as scheduler locality is still PROCESS_LOCAL @@ -1027,10 +1027,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!tsm.isZombie) } - test("SPARK-31418 abort timer should kick in when task is completely blacklisted &" + + test("SPARK-31418 abort timer should kick in when task is completely excluded &" + "allocation manager could not acquire a new executor before the timeout") { // set the abort timer to fail immediately - taskScheduler = setupSchedulerWithMockTaskSetBlacklist( + taskScheduler = setupSchedulerWithMockTaskSetExcludelist( config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0", config.DYN_ALLOCATION_ENABLED.key -> "true") @@ -1044,14 +1044,14 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // Fail the running task failTask(0, TaskState.FAILED, UnknownReason, tsm) - when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask( + when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask( "executor0", 0)).thenReturn(true) // If the executor is busy, then dynamic allocation should kick in and try - // to acquire additional executors to schedule the blacklisted task + // to acquire additional executors to schedule the excluded task assert(taskScheduler.isExecutorBusy("executor0")) - // make an offer on the blacklisted executor. We won't schedule anything, and set the abort + // make an offer on the excluded executor. We won't schedule anything, and set the abort // timer to kick in immediately assert(taskScheduler.resourceOffers(IndexedSeq( WorkerOffer("executor0", "host0", 1) @@ -1064,31 +1064,31 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B } /** - * Helper for performance tests. Takes the explicitly blacklisted nodes and executors; verifies - * that the blacklists are used efficiently to ensure scheduling is not O(numPendingTasks). + * Helper for performance tests. Takes the explicitly excluded nodes and executors; verifies + * that the excluded are used efficiently to ensure scheduling is not O(numPendingTasks). * Creates 1 offer on executor[1-3]. Executor1 & 2 are on host1, executor3 is on host2. Passed * in nodes and executors should be on that list. */ - private def testBlacklistPerformance( + private def testExcludelistPerformance( testName: String, - nodeBlacklist: Seq[String], - execBlacklist: Seq[String]): Unit = { + nodeExcludelist: Seq[String], + execExcludelist: Seq[String]): Unit = { // Because scheduling involves shuffling the order of offers around, we run this test a few // times to cover more possibilities. There are only 3 offers, which means 6 permutations, // so 10 iterations is pretty good. (0 until 10).foreach { testItr => test(s"$testName: iteration $testItr") { - // When an executor or node is blacklisted, we want to make sure that we don't try - // scheduling each pending task, one by one, to discover they are all blacklisted. This is + // When an executor or node is excluded, we want to make sure that we don't try + // scheduling each pending task, one by one, to discover they are all excluded. This is // important for performance -- if we did check each task one-by-one, then responding to a // resource offer (which is usually O(1)-ish) would become O(numPendingTasks), which would // slow down scheduler throughput and slow down scheduling even on healthy executors. // Here, we check a proxy for the runtime -- we make sure the scheduling is short-circuited - // at the node or executor blacklist, so we never check the per-task blacklist. We also - // make sure we don't check the node & executor blacklist for the entire taskset + // at the node or executor excludelist, so we never check the per-task excludelist. We also + // make sure we don't check the node & executor excludelist for the entire taskset // O(numPendingTasks) times. - taskScheduler = setupSchedulerWithMockTaskSetBlacklist() + taskScheduler = setupSchedulerWithMockTaskSetExcludelist() // we schedule 500 tasks so we can clearly distinguish anything that is O(numPendingTasks) val taskSet = FakeTask.createTaskSet(numTasks = 500, stageId = 0, stageAttemptId = 0) taskScheduler.submitTasks(taskSet) @@ -1098,91 +1098,92 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B new WorkerOffer("executor2", "host1", 1), new WorkerOffer("executor3", "host2", 1) ) - // We should check the node & exec blacklists, but only O(numOffers), not O(numPendingTasks) - // times. In the worst case, after shuffling, we offer our blacklisted resource first, and - // then offer other resources which do get used. The taskset blacklist is consulted - // repeatedly as we offer resources to the taskset -- each iteration either schedules - // something, or it terminates that locality level, so the maximum number of checks is - // numCores + numLocalityLevels + // We should check the node & exec excludelists, but only O(numOffers), + // not O(numPendingTasks) times. In the worst case, after shuffling, + // we offer our excluded resource first, and then offer other resources + // which do get used. The taskset excludelist is consulted repeatedly + // as we offer resources to the taskset -- each iteration either schedules + // something, or it terminates that locality level, so the maximum number of + // checks is numCores + numLocalityLevels val numCoresOnAllOffers = offers.map(_.cores).sum val numLocalityLevels = TaskLocality.values.size - val maxBlacklistChecks = numCoresOnAllOffers + numLocalityLevels + val maxExcludelistChecks = numCoresOnAllOffers + numLocalityLevels - // Setup the blacklist - nodeBlacklist.foreach { node => - when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet(node)).thenReturn(true) + // Setup the excludelist + nodeExcludelist.foreach { node => + when(stageToMockTaskSetExcludelist(0).isNodeExcludedForTaskSet(node)).thenReturn(true) } - execBlacklist.foreach { exec => - when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTaskSet(exec)) + execExcludelist.foreach { exec => + when(stageToMockTaskSetExcludelist(0).isExecutorExcludedForTaskSet(exec)) .thenReturn(true) } - // Figure out which nodes have any effective blacklisting on them. This means all nodes - // that are explicitly blacklisted, plus those that have *any* executors blacklisted. - val nodesForBlacklistedExecutors = offers.filter { offer => - execBlacklist.contains(offer.executorId) + // Figure out which nodes have any effective exclusions on them. This means all nodes + // that are explicitly excluded, plus those that have *any* executors excluded. + val nodesForExcludedExecutors = offers.filter { offer => + execExcludelist.contains(offer.executorId) }.map(_.host).distinct - val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet - // Similarly, figure out which executors have any blacklisting. This means all executors - // that are explicitly blacklisted, plus all executors on nodes that are blacklisted. - val execsForBlacklistedNodes = offers.filter { offer => - nodeBlacklist.contains(offer.host) + val nodesWithAnyExclusions = (nodeExcludelist ++ nodesForExcludedExecutors).toSet + // Similarly, figure out which executors have any exclusions. This means all executors + // that are explicitly excluded, plus all executors on nodes that are excluded. + val execsForExcludedNodes = offers.filter { offer => + nodeExcludelist.contains(offer.host) }.map(_.executorId).toSeq - val executorsWithAnyBlacklisting = (execBlacklist ++ execsForBlacklistedNodes).toSet + val executorsWithAnyExclusions = (execExcludelist ++ execsForExcludedNodes).toSet // Schedule a taskset, and make sure our test setup is correct -- we are able to schedule - // a task on all executors that aren't blacklisted (whether that executor is a explicitly - // blacklisted, or implicitly blacklisted via the node blacklist). + // a task on all executors that aren't excluded (whether that executor is a explicitly + // excluded, or implicitly excluded via the node excludeOnFailures). val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten - assert(firstTaskAttempts.size === offers.size - executorsWithAnyBlacklisting.size) + assert(firstTaskAttempts.size === offers.size - executorsWithAnyExclusions.size) - // Now check that we haven't made too many calls to any of the blacklist methods. - // We should be checking our node blacklist, but it should be within the bound we defined + // Now check that we haven't made too many calls to any of the excludelist methods. + // We should be checking our node excludelist, but it should be within the bound we defined // above. - verify(stageToMockTaskSetBlacklist(0), atMost(maxBlacklistChecks)) - .isNodeBlacklistedForTaskSet(anyString()) - // We shouldn't ever consult the per-task blacklist for the nodes that have been blacklisted - // for the entire taskset, since the taskset level blacklisting should prevent scheduling + verify(stageToMockTaskSetExcludelist(0), atMost(maxExcludelistChecks)) + .isNodeExcludedForTaskSet(anyString()) + // We shouldn't ever consult the per-task excludelist for the nodes that have been excluded + // for the entire taskset, since the taskset level exclusions should prevent scheduling // from ever looking at specific tasks. - nodesWithAnyBlacklisting.foreach { node => - verify(stageToMockTaskSetBlacklist(0), never) - .isNodeBlacklistedForTask(meq(node), anyInt()) + nodesWithAnyExclusions.foreach { node => + verify(stageToMockTaskSetExcludelist(0), never) + .isNodeExcludedForTask(meq(node), anyInt()) } - executorsWithAnyBlacklisting.foreach { exec => - // We should be checking our executor blacklist, but it should be within the bound defined - // above. Its possible that this will be significantly fewer calls, maybe even 0, if - // there is also a node-blacklist which takes effect first. But this assert is all we - // need to avoid an O(numPendingTask) slowdown. - verify(stageToMockTaskSetBlacklist(0), atMost(maxBlacklistChecks)) - .isExecutorBlacklistedForTaskSet(exec) - // We shouldn't ever consult the per-task blacklist for executors that have been - // blacklisted for the entire taskset, since the taskset level blacklisting should prevent + executorsWithAnyExclusions.foreach { exec => + // We should be checking our executor excludelist, but it should be within the bound + // defined above. Its possible that this will be significantly fewer calls, maybe even + // 0, if there is also a node-excludelist which takes effect first. But this assert is + // all we need to avoid an O(numPendingTask) slowdown. + verify(stageToMockTaskSetExcludelist(0), atMost(maxExcludelistChecks)) + .isExecutorExcludedForTaskSet(exec) + // We shouldn't ever consult the per-task excludelist for executors that have been + // excluded for the entire taskset, since the taskset level exclusions should prevent // scheduling from ever looking at specific tasks. - verify(stageToMockTaskSetBlacklist(0), never) - .isExecutorBlacklistedForTask(meq(exec), anyInt()) + verify(stageToMockTaskSetExcludelist(0), never) + .isExecutorExcludedForTask(meq(exec), anyInt()) } } } } - testBlacklistPerformance( - testName = "Blacklisted node for entire task set prevents per-task blacklist checks", - nodeBlacklist = Seq("host1"), - execBlacklist = Seq()) + testExcludelistPerformance( + testName = "Excluded node for entire task set prevents per-task exclusion checks", + nodeExcludelist = Seq("host1"), + execExcludelist = Seq()) - testBlacklistPerformance( - testName = "Blacklisted executor for entire task set prevents per-task blacklist checks", - nodeBlacklist = Seq(), - execBlacklist = Seq("executor3") + testExcludelistPerformance( + testName = "Excluded executor for entire task set prevents per-task exclusion checks", + nodeExcludelist = Seq(), + execExcludelist = Seq("executor3") ) test("abort stage if executor loss results in unschedulability from previously failed tasks") { - // Make sure we can detect when a taskset becomes unschedulable from a blacklisting. This + // Make sure we can detect when a taskset becomes unschedulable from excludeOnFailure. This // test explores a particular corner case -- you may have one task fail, but still be // schedulable on another executor. However, that executor may fail later on, leaving the // first task with no place to run. val taskScheduler = setupScheduler( - config.BLACKLIST_ENABLED.key -> "true" + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true" ) val taskSet = FakeTask.createTaskSet(2) @@ -1215,7 +1216,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(nextTaskAttempts.head.index != failedTask.index) // Now we should definitely realize that our task set is unschedulable, because the only - // task left can't be scheduled on any executors due to the blacklist. + // task left can't be scheduled on any executors due to the excludelist. taskScheduler.resourceOffers(IndexedSeq(new WorkerOffer("executor0", "host0", 1))) sc.listenerBus.waitUntilEmpty(100000) assert(tsm.isZombie) @@ -1223,11 +1224,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B val idx = failedTask.index assert(failedTaskSetReason === s""" |Aborting $taskSet because task $idx (partition $idx) - |cannot run anywhere due to node and executor blacklist. + |cannot run anywhere due to node and executor excludeOnFailure. |Most recent failure: - |${tsm.taskSetBlacklistHelperOpt.get.getLatestFailureReason} + |${tsm.taskSetExcludelistHelperOpt.get.getLatestFailureReason} | - |Blacklisting behavior can be configured via spark.blacklist.*. + |ExcludeOnFailure behavior can be configured via spark.excludeOnFailure.*. |""".stripMargin) } @@ -1238,7 +1239,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // available and not bail on the job val taskScheduler = setupScheduler( - config.BLACKLIST_ENABLED.key -> "true" + config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true" ) val taskSet = FakeTask.createTaskSet(2, (0 until 2).map { _ => Seq(TaskLocation("host0")) }: _*) @@ -1306,7 +1307,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(taskScheduler.getExecutorsAliveOnHost("host1") === Some(Set("executor1", "executor3"))) } - test("scheduler checks for executors that can be expired from blacklist") { + test("scheduler checks for executors that can be expired from excludeOnFailure") { taskScheduler = setupScheduler() taskScheduler.submitTasks(FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0)) @@ -1314,7 +1315,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B new WorkerOffer("executor0", "host0", 1) )).flatten - verify(blacklist).applyBlacklistTimeout() + verify(healthTracker).applyExcludeOnFailureTimeout() } test("if an executor is lost then the state for its running tasks is cleaned up (SPARK-18553)") { @@ -1400,7 +1401,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B offers } override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { - new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock) + new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock) } } // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks. @@ -1440,7 +1441,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B val clock = new ManualClock() val taskScheduler = new TaskSchedulerImpl(sc) { override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { - new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock) + new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock) } } // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks. diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala deleted file mode 100644 index ed97a4c206ca3..0000000000000 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.scheduler - -import org.mockito.ArgumentMatchers.isA -import org.mockito.Mockito.{never, verify} -import org.scalatest.BeforeAndAfterEach -import org.scalatestplus.mockito.MockitoSugar - -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.internal.config -import org.apache.spark.util.ManualClock - -class TaskSetBlacklistSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar { - - private var listenerBusMock: LiveListenerBus = _ - - override def beforeEach(): Unit = { - listenerBusMock = mock[LiveListenerBus] - super.beforeEach() - } - - test("Blacklisting tasks, executors, and nodes") { - val conf = new SparkConf().setAppName("test").setMaster("local") - .set(config.BLACKLIST_ENABLED.key, "true") - val clock = new ManualClock - val attemptId = 0 - val taskSetBlacklist = new TaskSetBlacklist( - listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) - - clock.setTime(0) - // We will mark task 0 & 1 failed on both executor 1 & 2. - // We should blacklist all executors on that host, for all tasks for the stage. Note the API - // will return false for isExecutorBacklistedForTaskSet even when the node is blacklisted, so - // the executor is implicitly blacklisted (this makes sense with how the scheduler uses the - // blacklist) - - // First, mark task 0 as failed on exec1. - // task 0 should be blacklisted on exec1, and nowhere else - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "exec1", index = 0, failureReason = "testing") - for { - executor <- (1 to 4).map(_.toString) - index <- 0 until 10 - } { - val shouldBeBlacklisted = (executor == "exec1" && index == 0) - assert(taskSetBlacklist.isExecutorBlacklistedForTask(executor, index) === shouldBeBlacklisted) - } - - assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerExecutorBlacklistedForStage])) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - - // Mark task 1 failed on exec1 -- this pushes the executor into the blacklist - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "exec1", index = 1, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1")) - verify(listenerBusMock).post( - SparkListenerExecutorBlacklistedForStage(0, "exec1", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - - // Mark one task as failed on exec2 -- not enough for any further blacklisting yet. - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "exec2", index = 0, failureReason = "testing") - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1")) - - assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec2")) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - - // Mark another task as failed on exec2 -- now we blacklist exec2, which also leads to - // blacklisting the entire node. - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "exec2", index = 1, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1")) - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec2")) - verify(listenerBusMock).post( - SparkListenerExecutorBlacklistedForStage(0, "exec2", 2, 0, attemptId)) - - assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock).post( - SparkListenerNodeBlacklistedForStage(0, "hostA", 2, 0, attemptId)) - - // Make sure the blacklist has the correct per-task && per-executor responses, over a wider - // range of inputs. - for { - executor <- (1 to 4).map(e => s"exec$e") - index <- 0 until 10 - } { - withClue(s"exec = $executor; index = $index") { - val badExec = (executor == "exec1" || executor == "exec2") - val badIndex = (index == 0 || index == 1) - assert( - // this ignores whether the executor is blacklisted entirely for the taskset -- that is - // intentional, it keeps it fast and is sufficient for usage in the scheduler. - taskSetBlacklist.isExecutorBlacklistedForTask(executor, index) === (badExec && badIndex)) - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet(executor) === badExec) - if (badExec) { - verify(listenerBusMock).post( - SparkListenerExecutorBlacklistedForStage(0, executor, 2, 0, attemptId)) - } - } - } - assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - val execToFailures = taskSetBlacklist.execToFailures - assert(execToFailures.keySet === Set("exec1", "exec2")) - - Seq("exec1", "exec2").foreach { exec => - assert( - execToFailures(exec).taskToFailureCountAndFailureTime === Map( - 0 -> ((1, 0)), - 1 -> ((1, 0)) - ) - ) - } - } - - test("multiple attempts for the same task count once") { - // Make sure that for blacklisting tasks, the node counts task attempts, not executors. But for - // stage-level blacklisting, we count unique tasks. The reason for this difference is, with - // task-attempt blacklisting, we want to make it easy to configure so that you ensure a node - // is blacklisted before the taskset is completely aborted because of spark.task.maxFailures. - // But with stage-blacklisting, we want to make sure we're not just counting one bad task - // that has failed many times. - - val conf = new SparkConf().setMaster("local").setAppName("test") - .set(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, 2) - .set(config.MAX_TASK_ATTEMPTS_PER_NODE, 3) - .set(config.MAX_FAILURES_PER_EXEC_STAGE, 2) - .set(config.MAX_FAILED_EXEC_PER_NODE_STAGE, 3) - val clock = new ManualClock - - val attemptId = 0 - val taskSetBlacklist = new TaskSetBlacklist( - listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) - - var time = 0 - clock.setTime(time) - // Fail a task twice on hostA, exec:1 - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - assert(taskSetBlacklist.isExecutorBlacklistedForTask("1", 0)) - assert(!taskSetBlacklist.isNodeBlacklistedForTask("hostA", 0)) - - assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("1")) - verify(listenerBusMock, never()).post( - SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()).post( - SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId)) - - // Fail the same task once more on hostA, exec:2 - time += 1 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "2", index = 0, failureReason = "testing") - assert(taskSetBlacklist.isNodeBlacklistedForTask("hostA", 0)) - - assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("2")) - verify(listenerBusMock, never()).post( - SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()).post( - SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId)) - - // Fail another task on hostA, exec:1. Now that executor has failures on two different tasks, - // so its blacklisted - time += 1 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 1, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1")) - verify(listenerBusMock) - .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - - // Fail a third task on hostA, exec:2, so that exec is blacklisted for the whole task set - time += 1 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "2", index = 2, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("2")) - verify(listenerBusMock) - .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - - // Fail a fourth & fifth task on hostA, exec:3. Now we've got three executors that are - // blacklisted for the taskset, so blacklist the whole node. - time += 1 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "3", index = 3, failureReason = "testing") - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "3", index = 4, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("3")) - verify(listenerBusMock) - .post(SparkListenerExecutorBlacklistedForStage(time, "3", 2, 0, attemptId)) - - assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock).post( - SparkListenerNodeBlacklistedForStage(time, "hostA", 3, 0, attemptId)) - } - - test("only blacklist nodes for the task set when all the blacklisted executors are all on " + - "same host") { - // we blacklist executors on two different hosts within one taskSet -- make sure that doesn't - // lead to any node blacklisting - val conf = new SparkConf().setAppName("test").setMaster("local") - .set(config.BLACKLIST_ENABLED.key, "true") - val clock = new ManualClock - - val attemptId = 0 - val taskSetBlacklist = new TaskSetBlacklist( - listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) - var time = 0 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 0, failureReason = "testing") - taskSetBlacklist.updateBlacklistForFailedTask( - "hostA", exec = "1", index = 1, failureReason = "testing") - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1")) - verify(listenerBusMock) - .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - verify(listenerBusMock, never()).post( - SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId)) - - time += 1 - clock.setTime(time) - taskSetBlacklist.updateBlacklistForFailedTask( - "hostB", exec = "2", index = 0, failureReason = "testing") - taskSetBlacklist.updateBlacklistForFailedTask( - "hostB", exec = "2", index = 1, failureReason = "testing") - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1")) - - assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("2")) - verify(listenerBusMock) - .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId)) - - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA")) - assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostB")) - verify(listenerBusMock, never()) - .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) - } - -} diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala new file mode 100644 index 0000000000000..d20768d7cd12b --- /dev/null +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler + +import org.mockito.ArgumentMatchers.isA +import org.mockito.Mockito.{never, verify} +import org.scalatest.BeforeAndAfterEach +import org.scalatestplus.mockito.MockitoSugar + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.internal.config +import org.apache.spark.util.ManualClock + +class TaskSetExcludelistSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar { + + private var listenerBusMock: LiveListenerBus = _ + + override def beforeEach(): Unit = { + listenerBusMock = mock[LiveListenerBus] + super.beforeEach() + } + + test("Excluding tasks, executors, and nodes") { + val conf = new SparkConf().setAppName("test").setMaster("local") + .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true") + val clock = new ManualClock + val attemptId = 0 + val taskSetExcludelist = new TaskSetExcludelist( + listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) + + clock.setTime(0) + // We will mark task 0 & 1 failed on both executor 1 & 2. + // We should exclude all executors on that host, for all tasks for the stage. Note the API + // will return false for isExecutorBacklistedForTaskSet even when the node is excluded, so + // the executor is implicitly excluded (this makes sense with how the scheduler uses the + // exclude) + + // First, mark task 0 as failed on exec1. + // task 0 should be excluded on exec1, and nowhere else + taskSetExcludelist.updateExcludedForFailedTask( + "hostA", exec = "exec1", index = 0, failureReason = "testing") + for { + executor <- (1 to 4).map(_.toString) + index <- 0 until 10 + } { + val shouldBeExcluded = (executor == "exec1" && index == 0) + assert(taskSetExcludelist.isExecutorExcludedForTask(executor, index) === shouldBeExcluded) + } + + assert(!taskSetExcludelist.isExecutorExcludedForTaskSet("exec1")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerExecutorExcludedForStage])) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerExecutorBlacklistedForStage])) + + assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + + // Mark task 1 failed on exec1 -- this pushes the executor into the exclude + taskSetExcludelist.updateExcludedForFailedTask( + "hostA", exec = "exec1", index = 1, failureReason = "testing") + + assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1")) + verify(listenerBusMock).post( + SparkListenerExecutorExcludedForStage(0, "exec1", 2, 0, attemptId)) + verify(listenerBusMock).post( + SparkListenerExecutorBlacklistedForStage(0, "exec1", 2, 0, attemptId)) + + + assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) + + // Mark one task as failed on exec2 -- not enough for any further excluding yet. + taskSetExcludelist.updateExcludedForFailedTask( + "hostA", exec = "exec2", index = 0, failureReason = "testing") + assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1")) + + assert(!taskSetExcludelist.isExecutorExcludedForTaskSet("exec2")) + + assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) + + // Mark another task as failed on exec2 -- now we exclude exec2, which also leads to + // excluding the entire node. + taskSetExcludelist.updateExcludedForFailedTask( + "hostA", exec = "exec2", index = 1, failureReason = "testing") + + assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1")) + + assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec2")) + verify(listenerBusMock).post( + SparkListenerExecutorExcludedForStage(0, "exec2", 2, 0, attemptId)) + verify(listenerBusMock).post( + SparkListenerExecutorBlacklistedForStage(0, "exec2", 2, 0, attemptId)) + + assert(taskSetExcludelist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock).post( + SparkListenerNodeExcludedForStage(0, "hostA", 2, 0, attemptId)) + verify(listenerBusMock).post( + SparkListenerNodeBlacklistedForStage(0, "hostA", 2, 0, attemptId)) + + // Make sure the exclude has the correct per-task && per-executor responses, over a wider + // range of inputs. + for { + executor <- (1 to 4).map(e => s"exec$e") + index <- 0 until 10 + } { + withClue(s"exec = $executor; index = $index") { + val badExec = (executor == "exec1" || executor == "exec2") + val badIndex = (index == 0 || index == 1) + assert( + // this ignores whether the executor is excluded entirely for the taskset -- that is + // intentional, it keeps it fast and is sufficient for usage in the scheduler. + taskSetExcludelist.isExecutorExcludedForTask(executor, index) === (badExec && badIndex)) + assert(taskSetExcludelist.isExecutorExcludedForTaskSet(executor) === badExec) + if (badExec) { + verify(listenerBusMock).post( + SparkListenerExecutorExcludedForStage(0, executor, 2, 0, attemptId)) + verify(listenerBusMock).post( + SparkListenerExecutorBlacklistedForStage(0, executor, 2, 0, attemptId)) + } + } + } + assert(taskSetExcludelist.isNodeExcludedForTaskSet("hostA")) + val execToFailures = taskSetExcludelist.execToFailures + assert(execToFailures.keySet === Set("exec1", "exec2")) + + Seq("exec1", "exec2").foreach { exec => + assert( + execToFailures(exec).taskToFailureCountAndFailureTime === Map( + 0 -> ((1, 0)), + 1 -> ((1, 0)) + ) + ) + } + } + + test("multiple attempts for the same task count once") { + // Make sure that for excluding tasks, the node counts task attempts, not executors. But for + // stage-level excluding, we count unique tasks. The reason for this difference is, with + // task-attempt excluding, we want to make it easy to configure so that you ensure a node + // is excluded before the taskset is completely aborted because of spark.task.maxFailures. + // But with stage-excluding, we want to make sure we're not just counting one bad task + // that has failed many times. + + val conf = new SparkConf().setMaster("local").setAppName("test") + .set(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, 2) + .set(config.MAX_TASK_ATTEMPTS_PER_NODE, 3) + .set(config.MAX_FAILURES_PER_EXEC_STAGE, 2) + .set(config.MAX_FAILED_EXEC_PER_NODE_STAGE, 3) + val clock = new ManualClock + + val attemptId = 0 + val taskSetExcludlist = new TaskSetExcludelist( + listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) + + var time = 0 + clock.setTime(time) + // Fail a task twice on hostA, exec:1 + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + assert(taskSetExcludlist.isExecutorExcludedForTask("1", 0)) + assert(!taskSetExcludlist.isNodeExcludedForTask("hostA", 0)) + + assert(!taskSetExcludlist.isExecutorExcludedForTaskSet("1")) + verify(listenerBusMock, never()).post( + SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()).post( + SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId)) + + // Fail the same task once more on hostA, exec:2 + time += 1 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "2", index = 0, failureReason = "testing") + assert(taskSetExcludlist.isNodeExcludedForTask("hostA", 0)) + + assert(!taskSetExcludlist.isExecutorExcludedForTaskSet("2")) + verify(listenerBusMock, never()).post( + SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()).post( + SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId)) + + // Fail another task on hostA, exec:1. Now that executor has failures on two different tasks, + // so its excluded + time += 1 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "1", index = 1, failureReason = "testing") + + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1")) + verify(listenerBusMock) + .post(SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + + // Fail a third task on hostA, exec:2, so that exec is excluded for the whole task set + time += 1 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "2", index = 2, failureReason = "testing") + + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("2")) + verify(listenerBusMock) + .post(SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + + // Fail a fourth & fifth task on hostA, exec:3. Now we've got three executors that are + // excluded for the taskset, so exclude the whole node. + time += 1 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "3", index = 3, failureReason = "testing") + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "3", index = 4, failureReason = "testing") + + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("3")) + verify(listenerBusMock) + .post(SparkListenerExecutorExcludedForStage(time, "3", 2, 0, attemptId)) + + assert(taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock).post( + SparkListenerNodeExcludedForStage(time, "hostA", 3, 0, attemptId)) + } + + test("only exclude nodes for the task set when all the excluded executors are all on " + + "same host") { + // we exclude executors on two different hosts within one taskSet -- make sure that doesn't + // lead to any node excluding + val conf = new SparkConf().setAppName("test").setMaster("local") + .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true") + val clock = new ManualClock + + val attemptId = 0 + val taskSetExcludlist = new TaskSetExcludelist( + listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock) + var time = 0 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "1", index = 0, failureReason = "testing") + taskSetExcludlist.updateExcludedForFailedTask( + "hostA", exec = "1", index = 1, failureReason = "testing") + + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1")) + verify(listenerBusMock) + .post(SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId)) + verify(listenerBusMock) + .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + verify(listenerBusMock, never()).post( + SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId)) + verify(listenerBusMock, never()).post( + SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId)) + + time += 1 + clock.setTime(time) + taskSetExcludlist.updateExcludedForFailedTask( + "hostB", exec = "2", index = 0, failureReason = "testing") + taskSetExcludlist.updateExcludedForFailedTask( + "hostB", exec = "2", index = 1, failureReason = "testing") + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1")) + + assert(taskSetExcludlist.isExecutorExcludedForTaskSet("2")) + verify(listenerBusMock) + .post(SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId)) + verify(listenerBusMock) + .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId)) + + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA")) + assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostB")) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeExcludedForStage])) + verify(listenerBusMock, never()) + .post(isA(classOf[SparkListenerNodeBlacklistedForStage])) + } + +} diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala index c389fd2ffa8b1..e01e278f60205 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala @@ -382,14 +382,14 @@ class TaskSetManagerSuite assert(delayReject === false) manager.isZombie = false - // offers not accepted due to blacklisting are not delay schedule rejects + // offers not accepted due to excludelist are not delay schedule rejects val tsmSpy = spy(manager) - val blacklist = mock(classOf[TaskSetBlacklist]) - when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(blacklist)) - when(blacklist.isNodeBlacklistedForTaskSet(any())).thenReturn(true) - val (blacklistTask, blackListReject) = tsmSpy.resourceOffer("exec2", "host2", ANY) - assert(blacklistTask.isEmpty) - assert(blackListReject === false) + val excludelist = mock(classOf[TaskSetExcludelist]) + when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(excludelist)) + when(excludelist.isNodeExcludedForTaskSet(any())).thenReturn(true) + val (task, taskReject) = tsmSpy.resourceOffer("exec2", "host2", ANY) + assert(task.isEmpty) + assert(taskReject === false) // After another delay, we can go ahead and launch that task non-locally assert(manager.resourceOffer("exec2", "host2", ANY)._1.get.index === 3) @@ -479,11 +479,11 @@ class TaskSetManagerSuite } } - test("executors should be blacklisted after task failure, in spite of locality preferences") { + test("executors should be excluded after task failure, in spite of locality preferences") { val rescheduleDelay = 300L val conf = new SparkConf(). - set(config.BLACKLIST_ENABLED, true). - set(config.BLACKLIST_TIMEOUT_CONF, rescheduleDelay). + set(config.EXCLUDE_ON_FAILURE_ENABLED, true). + set(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF, rescheduleDelay). // don't wait to jump locality levels in this test set(config.LOCALITY_WAIT.key, "0") @@ -495,11 +495,11 @@ class TaskSetManagerSuite val taskSet = FakeTask.createTaskSet(1, Seq(TaskLocation("host1", "exec1"))) val clock = new ManualClock clock.advance(1) - // We don't directly use the application blacklist, but its presence triggers blacklisting + // We don't directly use the application excludelist, but its presence triggers exclusion // within the taskset. val mockListenerBus = mock(classOf[LiveListenerBus]) - val blacklistTrackerOpt = Some(new BlacklistTracker(mockListenerBus, conf, None, clock)) - val manager = new TaskSetManager(sched, taskSet, 4, blacklistTrackerOpt, clock) + val healthTrackerOpt = Some(new HealthTracker(mockListenerBus, conf, None, clock)) + val manager = new TaskSetManager(sched, taskSet, 4, healthTrackerOpt, clock) { val offerResult = manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)._1 @@ -512,7 +512,7 @@ class TaskSetManagerSuite manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost) assert(!sched.taskSetsFailed.contains(taskSet.id)) - // Ensure scheduling on exec1 fails after failure 1 due to blacklist + // Ensure scheduling on exec1 fails after failure 1 due to executor being excluded assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)._1.isEmpty) assert(manager.resourceOffer("exec1", "host1", NODE_LOCAL)._1.isEmpty) assert(manager.resourceOffer("exec1", "host1", RACK_LOCAL)._1.isEmpty) @@ -532,7 +532,7 @@ class TaskSetManagerSuite manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost) assert(!sched.taskSetsFailed.contains(taskSet.id)) - // Ensure scheduling on exec1.1 fails after failure 2 due to blacklist + // Ensure scheduling on exec1.1 fails after failure 2 due to executor being excluded assert(manager.resourceOffer("exec1.1", "host1", NODE_LOCAL)._1.isEmpty) } @@ -548,12 +548,12 @@ class TaskSetManagerSuite manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost) assert(!sched.taskSetsFailed.contains(taskSet.id)) - // Ensure scheduling on exec2 fails after failure 3 due to blacklist + // Ensure scheduling on exec2 fails after failure 3 due to executor being excluded assert(manager.resourceOffer("exec2", "host2", ANY)._1.isEmpty) } - // Despite advancing beyond the time for expiring executors from within the blacklist, - // we *never* expire from *within* the stage blacklist + // Despite advancing beyond the time for expiring executors from within the excludelist, + // we *never* expire from *within* the stage excludelist clock.advance(rescheduleDelay) { @@ -1358,20 +1358,20 @@ class TaskSetManagerSuite assert(manager3.name === "TaskSet_1.1") } - test("don't update blacklist for shuffle-fetch failures, preemption, denied commits, " + + test("don't update excludelist for shuffle-fetch failures, preemption, denied commits, " + "or killed tasks") { // Setup a taskset, and fail some tasks for a fetch failure, preemption, denied commit, // and killed task. val conf = new SparkConf(). - set(config.BLACKLIST_ENABLED, true) + set(config.EXCLUDE_ON_FAILURE_ENABLED, true) sc = new SparkContext("local", "test", conf) sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) val taskSet = FakeTask.createTaskSet(4) val tsm = new TaskSetManager(sched, taskSet, 4) - // we need a spy so we can attach our mock blacklist + // we need a spy so we can attach our mock excludelist val tsmSpy = spy(tsm) - val blacklist = mock(classOf[TaskSetBlacklist]) - when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(blacklist)) + val excludelist = mock(classOf[TaskSetExcludelist]) + when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(excludelist)) // make some offers to our taskset, to get tasks we will fail val taskDescs = Seq( @@ -1392,23 +1392,23 @@ class TaskSetManagerSuite TaskCommitDenied(0, 2, 0)) tsmSpy.handleFailedTask(taskDescs(3).taskId, TaskState.KILLED, TaskKilled("test")) - // Make sure that the blacklist ignored all of the task failures above, since they aren't + // Make sure that the excludelist ignored all of the task failures above, since they aren't // the fault of the executor where the task was running. - verify(blacklist, never()) - .updateBlacklistForFailedTask(anyString(), anyString(), anyInt(), anyString()) + verify(excludelist, never()) + .updateExcludedForFailedTask(anyString(), anyString(), anyInt(), anyString()) } - test("update application blacklist for shuffle-fetch") { + test("update application healthTracker for shuffle-fetch") { // Setup a taskset, and fail some one task for fetch failure. val conf = new SparkConf() - .set(config.BLACKLIST_ENABLED, true) + .set(config.EXCLUDE_ON_FAILURE_ENABLED, true) .set(config.SHUFFLE_SERVICE_ENABLED, true) - .set(config.BLACKLIST_FETCH_FAILURE_ENABLED, true) + .set(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED, true) sc = new SparkContext("local", "test", conf) sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2")) val taskSet = FakeTask.createTaskSet(4) - val blacklistTracker = new BlacklistTracker(sc, None) - val tsm = new TaskSetManager(sched, taskSet, 4, Some(blacklistTracker)) + val healthTracker = new HealthTracker(sc, None) + val tsm = new TaskSetManager(sched, taskSet, 4, Some(healthTracker)) // make some offers to our taskset, to get tasks we will fail val taskDescs = Seq( @@ -1420,22 +1420,22 @@ class TaskSetManagerSuite } assert(taskDescs.size === 4) - assert(!blacklistTracker.isExecutorBlacklisted(taskDescs(0).executorId)) - assert(!blacklistTracker.isNodeBlacklisted("host1")) + assert(!healthTracker.isExecutorExcluded(taskDescs(0).executorId)) + assert(!healthTracker.isNodeExcluded("host1")) // Fail the task with fetch failure tsm.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED, FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored")) - assert(blacklistTracker.isNodeBlacklisted("host1")) + assert(healthTracker.isNodeExcluded("host1")) } - test("update blacklist before adding pending task to avoid race condition") { - // When a task fails, it should apply the blacklist policy prior to + test("update healthTracker before adding pending task to avoid race condition") { + // When a task fails, it should apply the excludeOnFailure policy prior to // retrying the task otherwise there's a race condition where run on // the same executor that it was intended to be black listed from. val conf = new SparkConf(). - set(config.BLACKLIST_ENABLED, true) + set(config.EXCLUDE_ON_FAILURE_ENABLED, true) // Create a task with two executors. sc = new SparkContext("local", "test", conf) @@ -1448,8 +1448,8 @@ class TaskSetManagerSuite val clock = new ManualClock val mockListenerBus = mock(classOf[LiveListenerBus]) - val blacklistTracker = new BlacklistTracker(mockListenerBus, conf, None, clock) - val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(blacklistTracker)) + val healthTracker = new HealthTracker(mockListenerBus, conf, None, clock) + val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(healthTracker)) val taskSetManagerSpy = spy(taskSetManager) val taskDesc = taskSetManagerSpy.resourceOffer(exec, host, TaskLocality.ANY)._1 @@ -1458,8 +1458,8 @@ class TaskSetManagerSuite when(taskSetManagerSpy.addPendingTask(anyInt(), anyBoolean(), anyBoolean())).thenAnswer( (invocationOnMock: InvocationOnMock) => { val task: Int = invocationOnMock.getArgument(0) - assert(taskSetManager.taskSetBlacklistHelperOpt.get. - isExecutorBlacklistedForTask(exec, task)) + assert(taskSetManager.taskSetExcludelistHelperOpt.get. + isExecutorExcludedForTask(exec, task)) } ) diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala index 397fdce8ae6e3..4acb4bbc779c3 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala @@ -31,7 +31,7 @@ class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContex .set(config.SERIALIZER, "org.apache.spark.serializer.KryoSerializer") .set(config.Kryo.KRYO_USER_REGISTRATORS, Seq(classOf[AppJarRegistrator].getName)) .set(config.TASK_MAX_FAILURES, 1) - .set(config.BLACKLIST_ENABLED, false) + .set(config.EXCLUDE_ON_FAILURE_ENABLED, false) val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName)) conf.setJars(List(jar.getPath)) diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala index d5829c352be9b..6ca1109791c35 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala @@ -256,9 +256,9 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { } } - // Blacklisting executor for stage + // Excluding executor for stage time += 1 - listener.onExecutorBlacklistedForStage(SparkListenerExecutorBlacklistedForStage( + listener.onExecutorExcludedForStage(SparkListenerExecutorExcludedForStage( time = time, executorId = execIds.head, taskFailures = 2, @@ -273,18 +273,21 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { assert(executorStageSummaryWrappers.nonEmpty) executorStageSummaryWrappers.foreach { exec => - // only the first executor is expected to be blacklisted - val expectedBlacklistedFlag = exec.executorId == execIds.head - assert(exec.info.isBlacklistedForStage === expectedBlacklistedFlag) + // only the first executor is expected to be excluded + val expectedExcludedFlag = exec.executorId == execIds.head + assert(exec.info.isBlacklistedForStage === expectedExcludedFlag) + assert(exec.info.isExcludedForStage === expectedExcludedFlag) } check[ExecutorSummaryWrapper](execIds.head) { exec => assert(exec.info.blacklistedInStages === Set(stages.head.stageId)) + assert(exec.info.excludedInStages === Set(stages.head.stageId)) + } - // Blacklisting node for stage + // Excluding node for stage time += 1 - listener.onNodeBlacklistedForStage(SparkListenerNodeBlacklistedForStage( + listener.onNodeExcludedForStage(SparkListenerNodeExcludedForStage( time = time, hostId = "2.example.com", // this is where the second executor is hosted executorFailures = 1, @@ -299,8 +302,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { assert(executorStageSummaryWrappersForNode.nonEmpty) executorStageSummaryWrappersForNode.foreach { exec => - // both executor is expected to be blacklisted + // both executor is expected to be excluded assert(exec.info.isBlacklistedForStage) + assert(exec.info.isExcludedForStage) + } // Fail one of the tasks, re-start it. @@ -450,6 +455,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { check[ExecutorSummaryWrapper](execIds.head) { exec => assert(exec.info.blacklistedInStages === Set()) + assert(exec.info.excludedInStages === Set()) } // Submit stage 2. @@ -466,9 +472,9 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { assert(stage.info.submissionTime === Some(new Date(stages.last.submissionTime.get))) } - // Blacklisting node for stage + // Excluding node for stage time += 1 - listener.onNodeBlacklistedForStage(SparkListenerNodeBlacklistedForStage( + listener.onNodeExcludedForStage(SparkListenerNodeExcludedForStage( time = time, hostId = "1.example.com", executorFailures = 1, @@ -477,6 +483,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { check[ExecutorSummaryWrapper](execIds.head) { exec => assert(exec.info.blacklistedInStages === Set(stages.last.stageId)) + assert(exec.info.excludedInStages === Set(stages.last.stageId)) } // Start and fail all tasks of stage 2. @@ -628,30 +635,34 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { assert(job.info.numSkippedTasks === s1Tasks.size) } - // Blacklist an executor. + // Exclude an executor. time += 1 - listener.onExecutorBlacklisted(SparkListenerExecutorBlacklisted(time, "1", 42)) + listener.onExecutorExcluded(SparkListenerExecutorExcluded(time, "1", 42)) check[ExecutorSummaryWrapper]("1") { exec => assert(exec.info.isBlacklisted) + assert(exec.info.isExcluded) } time += 1 - listener.onExecutorUnblacklisted(SparkListenerExecutorUnblacklisted(time, "1")) + listener.onExecutorUnexcluded(SparkListenerExecutorUnexcluded(time, "1")) check[ExecutorSummaryWrapper]("1") { exec => assert(!exec.info.isBlacklisted) + assert(!exec.info.isExcluded) } - // Blacklist a node. + // Exclude a node. time += 1 - listener.onNodeBlacklisted(SparkListenerNodeBlacklisted(time, "1.example.com", 2)) + listener.onNodeExcluded(SparkListenerNodeExcluded(time, "1.example.com", 2)) check[ExecutorSummaryWrapper]("1") { exec => assert(exec.info.isBlacklisted) + assert(exec.info.isExcluded) } time += 1 - listener.onNodeUnblacklisted(SparkListenerNodeUnblacklisted(time, "1.example.com")) + listener.onNodeUnexcluded(SparkListenerNodeUnexcluded(time, "1.example.com")) check[ExecutorSummaryWrapper]("1") { exec => assert(!exec.info.isBlacklisted) + assert(!exec.info.isExcluded) } // Stop executors. diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala index 286911bdfc19a..541a7821a51fb 100644 --- a/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala +++ b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala @@ -33,7 +33,8 @@ class ExecutorSummarySuite extends SparkFunSuite { 0, 0, 1, 100, 1, 100, 100, 10, false, 20, new Date(1600984336352L), - Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1) + Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1, + false, Set()) val expectedJson = "{\"id\":\"id\",\"hostPort\":\"host:port\",\"isActive\":true," + "\"rddBlocks\":1,\"memoryUsed\":10,\"diskUsed\":10,\"totalCores\":1,\"maxTasks\":1," + "\"activeTasks\":1,\"failedTasks\":0,\"completedTasks\":0,\"totalTasks\":1," + @@ -41,7 +42,8 @@ class ExecutorSummarySuite extends SparkFunSuite { "\"totalShuffleRead\":100,\"totalShuffleWrite\":10,\"isBlacklisted\":false," + "\"maxMemory\":20,\"addTime\":1600984336352,\"removeTime\":null,\"removeReason\":null," + "\"executorLogs\":{},\"memoryMetrics\":null,\"blacklistedInStages\":[]," + - "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1}" + "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1," + + "\"isExcluded\":false,\"excludedInStages\":[]}" val json = mapper.writeValueAsString(executorSummary) assert(expectedJson.equals(json)) val deserializeExecutorSummary = mapper.readValue(json, new TypeReference[ExecutorSummary] {}) diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 2ae51f425dcb5..4cd1fc19f1484 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -94,12 +94,18 @@ class JsonProtocolSuite extends SparkFunSuite { val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1", new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4)) val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason") - val executorBlacklisted = SparkListenerExecutorBlacklisted(executorBlacklistedTime, "exec1", 22) + val executorBlacklisted = SparkListenerExecutorBlacklisted(executorExcludedTime, "exec1", 22) val executorUnblacklisted = - SparkListenerExecutorUnblacklisted(executorUnblacklistedTime, "exec1") - val nodeBlacklisted = SparkListenerNodeBlacklisted(nodeBlacklistedTime, "node1", 33) + SparkListenerExecutorUnblacklisted(executorUnexcludedTime, "exec1") + val nodeBlacklisted = SparkListenerNodeBlacklisted(nodeExcludedTime, "node1", 33) + val executorExcluded = SparkListenerExecutorExcluded(executorExcludedTime, "exec1", 22) + val executorUnexcluded = + SparkListenerExecutorUnexcluded(executorUnexcludedTime, "exec1") + val nodeExcluded = SparkListenerNodeExcluded(nodeExcludedTime, "node1", 33) val nodeUnblacklisted = - SparkListenerNodeUnblacklisted(nodeUnblacklistedTime, "node1") + SparkListenerNodeUnblacklisted(nodeUnexcludedTime, "node1") + val nodeUnexcluded = + SparkListenerNodeUnexcluded(nodeUnexcludedTime, "node1") val executorMetricsUpdate = { // Use custom accum ID for determinism val accumUpdates = @@ -147,8 +153,12 @@ class JsonProtocolSuite extends SparkFunSuite { testEvent(executorRemoved, executorRemovedJsonString) testEvent(executorBlacklisted, executorBlacklistedJsonString) testEvent(executorUnblacklisted, executorUnblacklistedJsonString) + testEvent(executorExcluded, executorExcludedJsonString) + testEvent(executorUnexcluded, executorUnexcludedJsonString) testEvent(nodeBlacklisted, nodeBlacklistedJsonString) testEvent(nodeUnblacklisted, nodeUnblacklistedJsonString) + testEvent(nodeExcluded, nodeExcludedJsonString) + testEvent(nodeUnexcluded, nodeUnexcludedJsonString) testEvent(executorMetricsUpdate, executorMetricsUpdateJsonString) testEvent(blockUpdated, blockUpdatedJsonString) testEvent(stageExecutorMetrics, stageExecutorMetricsJsonString) @@ -598,10 +608,10 @@ private[spark] object JsonProtocolSuite extends Assertions { private val jobCompletionTime = 1421191296660L private val executorAddedTime = 1421458410000L private val executorRemovedTime = 1421458922000L - private val executorBlacklistedTime = 1421458932000L - private val executorUnblacklistedTime = 1421458942000L - private val nodeBlacklistedTime = 1421458952000L - private val nodeUnblacklistedTime = 1421458962000L + private val executorExcludedTime = 1421458932000L + private val executorUnexcludedTime = 1421458942000L + private val nodeExcludedTime = 1421458952000L + private val nodeUnexcludedTime = 1421458962000L private def testEvent(event: SparkListenerEvent, jsonString: String): Unit = { val actualJsonString = compact(render(JsonProtocol.sparkEventToJson(event))) @@ -2415,36 +2425,70 @@ private[spark] object JsonProtocolSuite extends Assertions { s""" |{ | "Event" : "org.apache.spark.scheduler.SparkListenerExecutorBlacklisted", - | "time" : ${executorBlacklistedTime}, + | "time" : ${executorExcludedTime}, | "executorId" : "exec1", | "taskFailures" : 22 |} """.stripMargin + private val executorExcludedJsonString = + s""" + |{ + | "Event" : "org.apache.spark.scheduler.SparkListenerExecutorExcluded", + | "time" : ${executorExcludedTime}, + | "executorId" : "exec1", + | "taskFailures" : 22 + |} + """.stripMargin private val executorUnblacklistedJsonString = s""" |{ | "Event" : "org.apache.spark.scheduler.SparkListenerExecutorUnblacklisted", - | "time" : ${executorUnblacklistedTime}, + | "time" : ${executorUnexcludedTime}, | "executorId" : "exec1" |} """.stripMargin + private val executorUnexcludedJsonString = + s""" + |{ + | "Event" : "org.apache.spark.scheduler.SparkListenerExecutorUnexcluded", + | "time" : ${executorUnexcludedTime}, + | "executorId" : "exec1" + |} + """.stripMargin private val nodeBlacklistedJsonString = s""" |{ | "Event" : "org.apache.spark.scheduler.SparkListenerNodeBlacklisted", - | "time" : ${nodeBlacklistedTime}, + | "time" : ${nodeExcludedTime}, | "hostId" : "node1", | "executorFailures" : 33 |} """.stripMargin + private val nodeExcludedJsonString = + s""" + |{ + | "Event" : "org.apache.spark.scheduler.SparkListenerNodeExcluded", + | "time" : ${nodeExcludedTime}, + | "hostId" : "node1", + | "executorFailures" : 33 + |} + """.stripMargin private val nodeUnblacklistedJsonString = s""" |{ | "Event" : "org.apache.spark.scheduler.SparkListenerNodeUnblacklisted", - | "time" : ${nodeUnblacklistedTime}, + | "time" : ${nodeUnexcludedTime}, | "hostId" : "node1" |} """.stripMargin + private val nodeUnexcludedJsonString = + s""" + |{ + | "Event" : "org.apache.spark.scheduler.SparkListenerNodeUnexcluded", + | "time" : ${nodeUnexcludedTime}, + | "hostId" : "node1" + |} + """.stripMargin private val resourceProfileJsonString = """ |{ diff --git a/docs/configuration.md b/docs/configuration.md index d825a589dfd31..232ea4079d436 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2146,113 +2146,113 @@ Apart from these, the following properties are also available, and may be useful 3.1.0 - spark.scheduler.blacklist.unschedulableTaskSetTimeout + spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout 120s The timeout in seconds to wait to acquire a new executor and schedule a task before aborting a - TaskSet which is unschedulable because of being completely blacklisted. + TaskSet which is unschedulable because all executors are exluded due to task failures. 2.4.1 - spark.blacklist.enabled + spark.excludeOnFailure.enabled false - If set to "true", prevent Spark from scheduling tasks on executors that have been blacklisted - due to too many task failures. The blacklisting algorithm can be further controlled by the - other "spark.blacklist" configuration options. + If set to "true", prevent Spark from scheduling tasks on executors that have been excluded + due to too many task failures. The algorithm used to exclude executors and nodes can be further + controlled by the other "spark.excludeOnFailure" configuration options. 2.1.0 - spark.blacklist.timeout + spark.excludeOnFailure.timeout 1h - (Experimental) How long a node or executor is blacklisted for the entire application, before it - is unconditionally removed from the blacklist to attempt running new tasks. + (Experimental) How long a node or executor is excluded for the entire application, before it + is unconditionally removed from the excludelist to attempt running new tasks. 2.1.0 - spark.blacklist.task.maxTaskAttemptsPerExecutor + spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor 1 (Experimental) For a given task, how many times it can be retried on one executor before the - executor is blacklisted for that task. + executor is excluded for that task. 2.1.0 - spark.blacklist.task.maxTaskAttemptsPerNode + spark.excludeOnFailure.task.maxTaskAttemptsPerNode 2 (Experimental) For a given task, how many times it can be retried on one node, before the entire - node is blacklisted for that task. + node is excluded for that task. 2.1.0 - spark.blacklist.stage.maxFailedTasksPerExecutor + spark.excludeOnFailure.stage.maxFailedTasksPerExecutor 2 (Experimental) How many different tasks must fail on one executor, within one stage, before the - executor is blacklisted for that stage. + executor is excluded for that stage. 2.1.0 - spark.blacklist.stage.maxFailedExecutorsPerNode + spark.excludeOnFailure.stage.maxFailedExecutorsPerNode 2 - (Experimental) How many different executors are marked as blacklisted for a given stage, before + (Experimental) How many different executors are marked as excluded for a given stage, before the entire node is marked as failed for the stage. 2.1.0 - spark.blacklist.application.maxFailedTasksPerExecutor + spark.excludeOnFailure.application.maxFailedTasksPerExecutor 2 (Experimental) How many different tasks must fail on one executor, in successful task sets, - before the executor is blacklisted for the entire application. Blacklisted executors will + before the executor is excluded for the entire application. Excluded executors will be automatically added back to the pool of available resources after the timeout specified by - spark.blacklist.timeout. Note that with dynamic allocation, though, the executors + spark.excludeOnFailure.timeout. Note that with dynamic allocation, though, the executors may get marked as idle and be reclaimed by the cluster manager. 2.2.0 - spark.blacklist.application.maxFailedExecutorsPerNode + spark.excludeOnFailure.application.maxFailedExecutorsPerNode 2 - (Experimental) How many different executors must be blacklisted for the entire application, - before the node is blacklisted for the entire application. Blacklisted nodes will + (Experimental) How many different executors must be excluded for the entire application, + before the node is excluded for the entire application. Excluded nodes will be automatically added back to the pool of available resources after the timeout specified by - spark.blacklist.timeout. Note that with dynamic allocation, though, the executors - on the node may get marked as idle and be reclaimed by the cluster manager. + spark.excludeOnFailure.timeout. Note that with dynamic allocation, though, the + executors on the node may get marked as idle and be reclaimed by the cluster manager. 2.2.0 - spark.blacklist.killBlacklistedExecutors + spark.excludeOnFailure.killExcludedExecutors false (Experimental) If set to "true", allow Spark to automatically kill the executors - when they are blacklisted on fetch failure or blacklisted for the entire application, - as controlled by spark.blacklist.application.*. Note that, when an entire node is added - to the blacklist, all of the executors on that node will be killed. + when they are excluded on fetch failure or excluded for the entire application, + as controlled by spark.killExcludedExecutors.application.*. Note that, when an entire node is added + excluded, all of the executors on that node will be killed. 2.2.0 - spark.blacklist.application.fetchFailure.enabled + spark.excludeOnFailure.application.fetchFailure.enabled false - (Experimental) If set to "true", Spark will blacklist the executor immediately when a fetch + (Experimental) If set to "true", Spark will exclude the executor immediately when a fetch failure happens. If external shuffle service is enabled, then the whole node will be - blacklisted. + excluded. 2.3.0 diff --git a/docs/monitoring.md b/docs/monitoring.md index 97948f6fac4d9..3513fed7b3d78 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -1125,12 +1125,14 @@ This is the component with the largest amount of instrumented metrics - stages.failedStages.count - stages.skippedStages.count - stages.completedStages.count - - tasks.blackListedExecutors.count + - tasks.blackListedExecutors.count // deprecated use excludedExecutors instead + - tasks.excludedExecutors.count - tasks.completedTasks.count - tasks.failedTasks.count - tasks.killedTasks.count - tasks.skippedTasks.count - - tasks.unblackListedExecutors.count + - tasks.unblackListedExecutors.count // deprecated use unexcludedExecutors instead + - tasks.unexcludedExecutors.count - jobs.succeededJobs - jobs.failedJobs - jobDuration diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 6f7aaf2baeccd..5e8eb48093c8a 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -551,12 +551,12 @@ To use a custom metrics.properties for the application master and executors, upd 2.0.0 - spark.yarn.blacklist.executor.launch.blacklisting.enabled + spark.yarn.executor.launch.excludeOnFailure.enabled false - Flag to enable blacklisting of nodes having YARN resource allocation problems. - The error limit for blacklisting can be configured by - spark.blacklist.application.maxFailedExecutorsPerNode. + Flag to enable exclusion of nodes having YARN resource allocation problems. + The error limit for excluding can be configured by + spark.excludeOnFailure.application.maxFailedExecutorsPerNode. 2.4.0 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala index 5655ef50d214f..4ea22ebd93eef 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala @@ -185,7 +185,7 @@ private[spark] class KubernetesClusterSchedulerBackend( Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint)) } - override protected def isBlacklisted(executorId: String, hostname: String): Boolean = { + override protected def isExecutorExcluded(executorId: String, hostname: String): Boolean = { podAllocator.isDeleted(executorId) } diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index 32cd50298bc6c..bbe1ff495d8a6 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala @@ -63,7 +63,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( with MesosScheduler with MesosSchedulerUtils { - // Blacklist a agent after this many failures + // Exclude an agent after this many failures private val MAX_AGENT_FAILURES = 2 private val maxCoresOption = conf.get(config.CORES_MAX) @@ -667,12 +667,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( totalGpusAcquired -= gpus gpusByTaskId -= taskId } - // If it was a failure, mark the agent as failed for blacklisting purposes + // If it was a failure, mark the agent as failed for excluding purposes if (TaskState.isFailed(state)) { agent.taskFailures += 1 if (agent.taskFailures >= MAX_AGENT_FAILURES) { - logInfo(s"Blacklisting Mesos agent $agentId due to too many failures; " + + logInfo(s"Excluding Mesos agent $agentId due to too many failures; " + "is Spark installed on it?") } } diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala index 4d7f6441020b7..2b7272a490376 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala @@ -833,7 +833,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite when(driver.start()).thenReturn(Protos.Status.DRIVER_RUNNING) taskScheduler = mock[TaskSchedulerImpl] - when(taskScheduler.nodeBlacklist).thenReturn(Set[String]()) + when(taskScheduler.excludedNodes).thenReturn(Set[String]()) when(taskScheduler.sc).thenReturn(sc) externalShuffleClient = mock[MesosExternalBlockStoreClient] diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 9b99e8ff9265c..e23773229c560 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -567,10 +567,10 @@ private[spark] class ApplicationMaster( finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES, s"Max number of executor failures ($maxNumExecutorFailures) reached") - } else if (allocator.isAllNodeBlacklisted) { + } else if (allocator.isAllNodeExcluded) { finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES, - "Due to executor failures all available nodes are blacklisted") + "Due to executor failures all available nodes are excluded") } else { logDebug("Sending progress") allocator.allocateResources() @@ -792,7 +792,7 @@ private[spark] class ApplicationMaster( r.resourceProfileToTotalExecs, r.numLocalityAwareTasksPerResourceProfileId, r.hostToLocalTaskCount, - r.nodeBlacklist)) { + r.excludedNodes)) { resetAllocatorInterval() } context.reply(true) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index adbbbc01a0bd5..ef01a2ad95483 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -159,8 +159,8 @@ private[yarn] class YarnAllocator( private[spark] val failureTracker = new FailureTracker(sparkConf, clock) - private val allocatorBlacklistTracker = - new YarnAllocatorBlacklistTracker(sparkConf, amClient, failureTracker) + private val allocatorNodeHealthTracker = + new YarnAllocatorNodeHealthTracker(sparkConf, amClient, failureTracker) // Executor memory in MiB. protected val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt @@ -238,7 +238,7 @@ private[yarn] class YarnAllocator( def getNumExecutorsFailed: Int = failureTracker.numFailedExecutors - def isAllNodeBlacklisted: Boolean = allocatorBlacklistTracker.isAllNodeBlacklisted + def isAllNodeExcluded: Boolean = allocatorNodeHealthTracker.isAllNodeExcluded /** * A sequence of pending container requests that have not yet been fulfilled. @@ -358,15 +358,15 @@ private[yarn] class YarnAllocator( * placement hint. * @param hostToLocalTaskCount a map of preferred hostname to possible task counts for each * ResourceProfile id to be used as container placement hint. - * @param nodeBlacklist blacklisted nodes, which is passed in to avoid allocating new containers - * on them. It will be used to update the application master's blacklist. + * @param excludedNodes excluded nodes, which is passed in to avoid allocating new containers + * on them. It will be used to update the applications excluded node list. * @return Whether the new requested total is different than the old value. */ def requestTotalExecutorsWithPreferredLocalities( resourceProfileToTotalExecs: Map[ResourceProfile, Int], numLocalityAwareTasksPerResourceProfileId: Map[Int, Int], hostToLocalTaskCountPerResourceProfileId: Map[Int, Map[String, Int]], - nodeBlacklist: Set[String]): Boolean = synchronized { + excludedNodes: Set[String]): Boolean = synchronized { this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId @@ -377,7 +377,7 @@ private[yarn] class YarnAllocator( logInfo(s"Driver requested a total number of $numExecs executor(s) " + s"for resource profile id: ${rp.id}.") targetNumExecutorsPerResourceProfileId(rp.id) = numExecs - allocatorBlacklistTracker.setSchedulerBlacklistedNodes(nodeBlacklist) + allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes) true } else { false @@ -416,7 +416,7 @@ private[yarn] class YarnAllocator( val allocateResponse = amClient.allocate(progressIndicator) val allocatedContainers = allocateResponse.getAllocatedContainers() - allocatorBlacklistTracker.setNumClusterNodes(allocateResponse.getNumClusterNodes) + allocatorNodeHealthTracker.setNumClusterNodes(allocateResponse.getNumClusterNodes) if (allocatedContainers.size > 0) { logDebug(("Allocated containers: %d. Current executor count: %d. " + @@ -827,7 +827,7 @@ private[yarn] class YarnAllocator( s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key}." (true, message) case other_exit_status => - // SPARK-26269: follow YARN's blacklisting behaviour(see https://github + // SPARK-26269: follow YARN's behaviour(see https://github // .com/apache/hadoop/blob/228156cfd1b474988bc4fedfbf7edddc87db41e3/had // oop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/ap // ache/hadoop/yarn/util/Apps.java#L273 for details) @@ -837,7 +837,7 @@ private[yarn] class YarnAllocator( s". Diagnostics: ${completedContainer.getDiagnostics}.") } else { // completed container from a bad node - allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt) + allocatorNodeHealthTracker.handleResourceAllocationFailure(hostOpt) (true, s"Container from a bad node: $containerId$onHostStr" + s". Exit status: ${completedContainer.getExitStatus}" + s". Diagnostics: ${completedContainer.getDiagnostics}.") diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala similarity index 63% rename from resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala rename to resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala index 339d3715a7316..de9e190361428 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala @@ -27,42 +27,43 @@ import org.apache.spark.SparkConf import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ -import org.apache.spark.scheduler.BlacklistTracker +import org.apache.spark.scheduler.HealthTracker import org.apache.spark.util.{Clock, SystemClock} /** - * YarnAllocatorBlacklistTracker is responsible for tracking the blacklisted nodes - * and synchronizing the node list to YARN. + * YarnAllocatorNodeHealthTracker is responsible for tracking the health of nodes + * and synchronizing the node list to YARN as to which nodes are excluded. * - * Blacklisted nodes are coming from two different sources: + * Excluding nodes are coming from two different sources: * *
      - *
    • from the scheduler as task level blacklisted nodes + *
    • from the scheduler as task level excluded nodes *
    • from this class (tracked here) as YARN resource allocation problems *
    * * The reason to realize this logic here (and not in the driver) is to avoid possible delays - * between synchronizing the blacklisted nodes with YARN and resource allocations. + * between synchronizing the excluded nodes with YARN and resource allocations. */ -private[spark] class YarnAllocatorBlacklistTracker( +private[spark] class YarnAllocatorNodeHealthTracker( sparkConf: SparkConf, amClient: AMRMClient[ContainerRequest], failureTracker: FailureTracker) extends Logging { - private val blacklistTimeoutMillis = BlacklistTracker.getBlacklistTimeout(sparkConf) + private val excludeOnFailureTimeoutMillis = HealthTracker.getExludeOnFailureTimeout(sparkConf) - private val launchBlacklistEnabled = sparkConf.get(YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED) + private val launchExcludeOnFailureEnabled = + sparkConf.get(YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED) private val maxFailuresPerHost = sparkConf.get(MAX_FAILED_EXEC_PER_NODE) private val excludeNodes = sparkConf.get(YARN_EXCLUDE_NODES).toSet - private val allocatorBlacklist = new HashMap[String, Long]() + private val allocatorExcludedNodeList = new HashMap[String, Long]() - private var currentBlacklistedYarnNodes = Set.empty[String] + private var currentExcludededYarnNodes = Set.empty[String] - private var schedulerBlacklist = Set.empty[String] + private var schedulerExcludedNodeList = Set.empty[String] private var numClusterNodes = Int.MaxValue @@ -72,72 +73,76 @@ private[spark] class YarnAllocatorBlacklistTracker( def handleResourceAllocationFailure(hostOpt: Option[String]): Unit = { hostOpt match { - case Some(hostname) if launchBlacklistEnabled => - // failures on an already blacklisted nodes are not even tracked. + case Some(hostname) if launchExcludeOnFailureEnabled => + // failures on an already excluded node are not even tracked. // otherwise, such failures could shutdown the application // as resource requests are asynchronous // and a late failure response could exceed MAX_EXECUTOR_FAILURES - if (!schedulerBlacklist.contains(hostname) && - !allocatorBlacklist.contains(hostname)) { + if (!schedulerExcludedNodeList.contains(hostname) && + !allocatorExcludedNodeList.contains(hostname)) { failureTracker.registerFailureOnHost(hostname) - updateAllocationBlacklistedNodes(hostname) + updateAllocationExcludedNodes(hostname) } case _ => failureTracker.registerExecutorFailure() } } - private def updateAllocationBlacklistedNodes(hostname: String): Unit = { + private def updateAllocationExcludedNodes(hostname: String): Unit = { val failuresOnHost = failureTracker.numFailuresOnHost(hostname) if (failuresOnHost > maxFailuresPerHost) { - logInfo(s"blacklisting $hostname as YARN allocation failed $failuresOnHost times") - allocatorBlacklist.put( + logInfo(s"excluding $hostname as YARN allocation failed $failuresOnHost times") + allocatorExcludedNodeList.put( hostname, - failureTracker.clock.getTimeMillis() + blacklistTimeoutMillis) - refreshBlacklistedNodes() + failureTracker.clock.getTimeMillis() + excludeOnFailureTimeoutMillis) + refreshExcludedNodes() } } - def setSchedulerBlacklistedNodes(schedulerBlacklistedNodesWithExpiry: Set[String]): Unit = { - this.schedulerBlacklist = schedulerBlacklistedNodesWithExpiry - refreshBlacklistedNodes() + def setSchedulerExcludedNodes(schedulerExcludedNodesWithExpiry: Set[String]): Unit = { + this.schedulerExcludedNodeList = schedulerExcludedNodesWithExpiry + refreshExcludedNodes() } - def isAllNodeBlacklisted: Boolean = { + def isAllNodeExcluded: Boolean = { if (numClusterNodes <= 0) { logWarning("No available nodes reported, please check Resource Manager.") false } else { - currentBlacklistedYarnNodes.size >= numClusterNodes + currentExcludededYarnNodes.size >= numClusterNodes } } - private def refreshBlacklistedNodes(): Unit = { - removeExpiredYarnBlacklistedNodes() - val allBlacklistedNodes = excludeNodes ++ schedulerBlacklist ++ allocatorBlacklist.keySet - synchronizeBlacklistedNodeWithYarn(allBlacklistedNodes) + private def refreshExcludedNodes(): Unit = { + removeExpiredYarnExcludedNodes() + val allExcludedNodes = + excludeNodes ++ schedulerExcludedNodeList ++ allocatorExcludedNodeList.keySet + synchronizeExcludedNodesWithYarn(allExcludedNodes) } - private def synchronizeBlacklistedNodeWithYarn(nodesToBlacklist: Set[String]): Unit = { - // Update blacklist information to YARN ResourceManager for this application, + private def synchronizeExcludedNodesWithYarn(nodesToExclude: Set[String]): Unit = { + // Update YARN with the nodes that are excluded for this application, // in order to avoid allocating new Containers on the problematic nodes. - val additions = (nodesToBlacklist -- currentBlacklistedYarnNodes).toList.sorted - val removals = (currentBlacklistedYarnNodes -- nodesToBlacklist).toList.sorted + val additions = (nodesToExclude -- currentExcludededYarnNodes).toList.sorted + val removals = (currentExcludededYarnNodes -- nodesToExclude).toList.sorted if (additions.nonEmpty) { - logInfo(s"adding nodes to YARN application master's blacklist: $additions") + logInfo(s"adding nodes to YARN application master's excluded node list: $additions") } if (removals.nonEmpty) { - logInfo(s"removing nodes from YARN application master's blacklist: $removals") + logInfo(s"removing nodes from YARN application master's excluded node list: $removals") } if (additions.nonEmpty || removals.nonEmpty) { + // Note YARNs api for excluding nodes is updateBlacklist. + // TODO - We need to update once Hadoop changes - + // https://issues.apache.org/jira/browse/HADOOP-17169 amClient.updateBlacklist(additions.asJava, removals.asJava) } - currentBlacklistedYarnNodes = nodesToBlacklist + currentExcludededYarnNodes = nodesToExclude } - private def removeExpiredYarnBlacklistedNodes(): Unit = { + private def removeExpiredYarnExcludedNodes(): Unit = { val now = failureTracker.clock.getTimeMillis() - allocatorBlacklist.retain { (_, expiryTime) => expiryTime > now } + allocatorExcludedNodeList.retain { (_, expiryTime) => expiryTime > now } } } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala index 1b0bf295db499..f2e838f6270c9 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala @@ -379,14 +379,15 @@ package object config extends Logging { .stringConf .createOptional - /* YARN allocator-level blacklisting related config entries. */ - private[spark] val YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED = - ConfigBuilder("spark.yarn.blacklist.executor.launch.blacklisting.enabled") - .version("2.4.0") + /* YARN allocator-level excludeOnFailure related config entries. */ + private[spark] val YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED = + ConfigBuilder("spark.yarn.executor.launch.excludeOnFailure.enabled") + .version("3.1.0") + .withAlternative("spark.yarn.blacklist.executor.launch.blacklisting.enabled") .booleanConf .createWithDefault(false) - /* Initially blacklisted YARN nodes. */ + /* Initially excluded YARN nodes. */ private[spark] val YARN_EXCLUDE_NODES = ConfigBuilder("spark.yarn.exclude.nodes") .version("3.0.0") .stringConf diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala index 3f2e8846e85b3..b42bdb9816600 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala @@ -132,13 +132,13 @@ private[spark] abstract class YarnSchedulerBackend( private[cluster] def prepareRequestExecutors( resourceProfileToTotalExecs: Map[ResourceProfile, Int]): RequestExecutors = { - val nodeBlacklist: Set[String] = scheduler.nodeBlacklist() - // For locality preferences, ignore preferences for nodes that are blacklisted + val excludedNodes: Set[String] = scheduler.excludedNodes() + // For locality preferences, ignore preferences for nodes that are excluded val filteredRPHostToLocalTaskCount = rpHostToLocalTaskCount.map { case (rpid, v) => - (rpid, v.filter { case (host, count) => !nodeBlacklist.contains(host) }) + (rpid, v.filter { case (host, count) => !excludedNodes.contains(host) }) } RequestExecutors(resourceProfileToTotalExecs, numLocalityAwareTasksPerResourceProfileId, - filteredRPHostToLocalTaskCount, nodeBlacklist) + filteredRPHostToLocalTaskCount, excludedNodes) } /** diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala similarity index 54% rename from resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala rename to resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala index 97615f5c936b0..c2fd5ff316592 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala @@ -26,14 +26,14 @@ import org.scalatest.BeforeAndAfterEach import org.scalatest.matchers.must.Matchers import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.yarn.config.{YARN_EXCLUDE_NODES, YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED} -import org.apache.spark.internal.config.{BLACKLIST_TIMEOUT_CONF, MAX_FAILED_EXEC_PER_NODE} +import org.apache.spark.deploy.yarn.config.{YARN_EXCLUDE_NODES, YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED} +import org.apache.spark.internal.config.{EXCLUDE_ON_FAILURE_TIMEOUT_CONF, MAX_FAILED_EXEC_PER_NODE} import org.apache.spark.util.ManualClock -class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers +class YarnAllocatorHealthTrackerSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach { - val BLACKLIST_TIMEOUT = 100L + val EXCLUDE_TIMEOUT = 100L val MAX_FAILED_EXEC_PER_NODE_VALUE = 2 var sparkConf: SparkConf = _ @@ -42,117 +42,117 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers override def beforeEach(): Unit = { sparkConf = new SparkConf() - sparkConf.set(BLACKLIST_TIMEOUT_CONF, BLACKLIST_TIMEOUT) - sparkConf.set(YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED, true) + sparkConf.set(EXCLUDE_ON_FAILURE_TIMEOUT_CONF, EXCLUDE_TIMEOUT) + sparkConf.set(YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED, true) sparkConf.set(MAX_FAILED_EXEC_PER_NODE, MAX_FAILED_EXEC_PER_NODE_VALUE) clock = new ManualClock() amClientMock = mock(classOf[AMRMClient[ContainerRequest]]) super.beforeEach() } - private def createYarnAllocatorBlacklistTracker( - sparkConf: SparkConf = sparkConf): YarnAllocatorBlacklistTracker = { + private def createYarnAllocatorHealthTracker( + sparkConf: SparkConf = sparkConf): YarnAllocatorNodeHealthTracker = { val failureTracker = new FailureTracker(sparkConf, clock) - val yarnBlacklistTracker = - new YarnAllocatorBlacklistTracker(sparkConf, amClientMock, failureTracker) - yarnBlacklistTracker.setNumClusterNodes(4) - yarnBlacklistTracker + val yarnHealthTracker = + new YarnAllocatorNodeHealthTracker(sparkConf, amClientMock, failureTracker) + yarnHealthTracker.setNumClusterNodes(4) + yarnHealthTracker } - test("expiring its own blacklisted nodes") { - val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker() + test("expiring its own excluded nodes") { + val yarnHealthTracker = createYarnAllocatorHealthTracker() (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach { _ => { - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host")) - // host should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2 + yarnHealthTracker.handleResourceAllocationFailure(Some("host")) + // host should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2 verify(amClientMock, never()) .updateBlacklist(Arrays.asList("host"), Collections.emptyList()) } } - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host")) - // the third failure on the host triggers the blacklisting + yarnHealthTracker.handleResourceAllocationFailure(Some("host")) + // the third failure on the host triggers the exclusion verify(amClientMock).updateBlacklist(Arrays.asList("host"), Collections.emptyList()) - clock.advance(BLACKLIST_TIMEOUT) + clock.advance(EXCLUDE_TIMEOUT) - // trigger synchronisation of blacklisted nodes with YARN - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set()) + // trigger synchronisation of excluded nodes with YARN + yarnHealthTracker.setSchedulerExcludedNodes(Set()) verify(amClientMock).updateBlacklist(Collections.emptyList(), Arrays.asList("host")) } - test("not handling the expiry of scheduler blacklisted nodes") { - val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker() + test("not handling the expiry of scheduler excluded nodes") { + val yarnHealthTracker = createYarnAllocatorHealthTracker() - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2")) + yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2")) verify(amClientMock) .updateBlacklist(Arrays.asList("host1", "host2"), Collections.emptyList()) // advance timer more then host1, host2 expiry time clock.advance(200L) - // expired blacklisted nodes (simulating a resource request) - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2")) - // no change is communicated to YARN regarding the blacklisting + // expired excluded nodes (simulating a resource request) + yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2")) + // no change is communicated to YARN regarding the exclusion verify(amClientMock, times(0)).updateBlacklist(Collections.emptyList(), Collections.emptyList()) } - test("combining scheduler and allocation blacklist") { + test("combining scheduler and allocation excluded node list") { sparkConf.set(YARN_EXCLUDE_NODES, Seq("initial1", "initial2")) - val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker(sparkConf) - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set()) + val yarnHealthTracker = createYarnAllocatorHealthTracker(sparkConf) + yarnHealthTracker.setSchedulerExcludedNodes(Set()) - // initial1 and initial2 is added as blacklisted nodes at the very first updateBlacklist call + // initial1 and initial2 is added as excluded nodes at the very first updateBlacklist call // and they are never removed verify(amClientMock) .updateBlacklist(Arrays.asList("initial1", "initial2"), Collections.emptyList()) (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach { _ => { - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host1")) - // host1 should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2 + yarnHealthTracker.handleResourceAllocationFailure(Some("host1")) + // host1 should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2 verify(amClientMock, never()) .updateBlacklist(Arrays.asList("host1"), Collections.emptyList()) } } - // as this is the third failure on host1 the node will be blacklisted - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host1")) + // as this is the third failure on host1 the node will be excluded + yarnHealthTracker.handleResourceAllocationFailure(Some("host1")) verify(amClientMock) .updateBlacklist(Arrays.asList("host1"), Collections.emptyList()) - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host2", "host3")) + yarnHealthTracker.setSchedulerExcludedNodes(Set("host2", "host3")) verify(amClientMock) .updateBlacklist(Arrays.asList("host2", "host3"), Collections.emptyList()) clock.advance(10L) - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host3", "host4")) + yarnHealthTracker.setSchedulerExcludedNodes(Set("host3", "host4")) verify(amClientMock) .updateBlacklist(Arrays.asList("host4"), Arrays.asList("host2")) } - test("blacklist all available nodes") { - val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker() - yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2", "host3")) + test("exclude all available nodes") { + val yarnHealthTracker = createYarnAllocatorHealthTracker() + yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2", "host3")) verify(amClientMock) .updateBlacklist(Arrays.asList("host1", "host2", "host3"), Collections.emptyList()) clock.advance(60L) (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach { _ => { - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host4")) - // host4 should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2 + yarnHealthTracker.handleResourceAllocationFailure(Some("host4")) + // host4 should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2 verify(amClientMock, never()) .updateBlacklist(Arrays.asList("host4"), Collections.emptyList()) } } - // the third failure on the host triggers the blacklisting - yarnBlacklistTracker.handleResourceAllocationFailure(Some("host4")) + // the third failure on the host triggers the exclusion + yarnHealthTracker.handleResourceAllocationFailure(Some("host4")) verify(amClientMock).updateBlacklist(Arrays.asList("host4"), Collections.emptyList()) - assert(yarnBlacklistTracker.isAllNodeBlacklisted) + assert(yarnHealthTracker.isAllNodeExcluded) } } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala index 63e2b97e0ecab..6b5c72ad7f7aa 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala @@ -523,9 +523,10 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter handler.getNumUnexpectedContainerRelease should be (2) } - test("blacklisted nodes reflected in amClient requests") { - // Internally we track the set of blacklisted nodes, but yarn wants us to send *changes* - // to the blacklist. This makes sure we are sending the right updates. + test("excluded nodes reflected in amClient requests") { + // Internally we track the set of excluded nodes, but yarn wants us to send *changes* + // to it. Note the YARN api uses the term blacklist for excluded nodes. + // This makes sure we are sending the right updates. val mockAmClient = mock(classOf[AMRMClient[ContainerRequest]]) val (handler, _) = createAllocator(4, mockAmClient) val resourceProfileToTotalExecs = mutable.HashMap(defaultRP -> 1) @@ -534,14 +535,14 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter numLocalityAwareTasksPerResourceProfileId.toMap, Map(), Set("hostA")) verify(mockAmClient).updateBlacklist(Seq("hostA").asJava, Seq[String]().asJava) - val blacklistedNodes = Set( + val excludedNodes = Set( "hostA", "hostB" ) resourceProfileToTotalExecs(defaultRP) = 2 handler.requestTotalExecutorsWithPreferredLocalities(resourceProfileToTotalExecs.toMap, - numLocalityAwareTasksPerResourceProfileId.toMap, Map(), blacklistedNodes) + numLocalityAwareTasksPerResourceProfileId.toMap, Map(), excludedNodes) verify(mockAmClient).updateBlacklist(Seq("hostB").asJava, Seq[String]().asJava) resourceProfileToTotalExecs(defaultRP) = 3 handler.requestTotalExecutorsWithPreferredLocalities(resourceProfileToTotalExecs.toMap, @@ -592,7 +593,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter handler.getNumExecutorsFailed should be (0) } - test("SPARK-26269: YarnAllocator should have same blacklist behaviour with YARN") { + test("SPARK-26269: YarnAllocator should have same excludeOnFailure behaviour with YARN") { val rmClientSpy = spy(rmClient) val maxExecutors = 11 @@ -600,7 +601,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter maxExecutors, rmClientSpy, Map( - YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED.key -> "true", + YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED.key -> "true", MAX_FAILED_EXEC_PER_NODE.key -> "0")) handler.updateResourceRequests() @@ -608,7 +609,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter val ids = 0 to maxExecutors val containers = createContainers(hosts, ids) - val nonBlacklistedStatuses = Seq( + val nonExcludedStatuses = Seq( ContainerExitStatus.SUCCESS, ContainerExitStatus.PREEMPTED, ContainerExitStatus.KILLED_EXCEEDED_VMEM, @@ -619,24 +620,24 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter ContainerExitStatus.ABORTED, ContainerExitStatus.DISKS_FAILED) - val nonBlacklistedContainerStatuses = nonBlacklistedStatuses.zipWithIndex.map { + val nonExcludedContainerStatuses = nonExcludedStatuses.zipWithIndex.map { case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus) } - val BLACKLISTED_EXIT_CODE = 1 - val blacklistedStatuses = Seq(ContainerExitStatus.INVALID, BLACKLISTED_EXIT_CODE) + val EXCLUDED_EXIT_CODE = 1 + val excludedStatuses = Seq(ContainerExitStatus.INVALID, EXCLUDED_EXIT_CODE) - val blacklistedContainerStatuses = blacklistedStatuses.zip(9 until maxExecutors).map { + val excludedContainerStatuses = excludedStatuses.zip(9 until maxExecutors).map { case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus) } handler.handleAllocatedContainers(containers.slice(0, 9)) - handler.processCompletedContainers(nonBlacklistedContainerStatuses) + handler.processCompletedContainers(nonExcludedContainerStatuses) verify(rmClientSpy, never()) .updateBlacklist(hosts.slice(0, 9).asJava, Collections.emptyList()) handler.handleAllocatedContainers(containers.slice(9, 11)) - handler.processCompletedContainers(blacklistedContainerStatuses) + handler.processCompletedContainers(excludedContainerStatuses) verify(rmClientSpy) .updateBlacklist(hosts.slice(9, 10).asJava, Collections.emptyList()) verify(rmClientSpy) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala index 9003c2f630975..7959bb55d7ffc 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala @@ -44,9 +44,10 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc } private class TestTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) { - val blacklistedNodes = new AtomicReference[Set[String]]() - def setNodeBlacklist(nodeBlacklist: Set[String]): Unit = blacklistedNodes.set(nodeBlacklist) - override def nodeBlacklist(): Set[String] = blacklistedNodes.get() + val excludedNodesList = new AtomicReference[Set[String]]() + def setNodeExcludeList(nodeExcludeList: Set[String]): Unit = + excludedNodesList.set(nodeExcludeList) + override def excludedNodes(): Set[String] = excludedNodesList.get() } private class TestYarnSchedulerBackend(scheduler: TaskSchedulerImpl, sc: SparkContext) @@ -56,7 +57,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc } } - test("RequestExecutors reflects node blacklist and is serializable") { + test("RequestExecutors reflects node excludelist and is serializable") { sc = new SparkContext("local", "YarnSchedulerBackendSuite") // Subclassing the TaskSchedulerImpl here instead of using Mockito. For details see SPARK-26891. val sched = new TestTaskSchedulerImpl(sc) @@ -65,7 +66,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc val ser = new JavaSerializer(sc.conf).newInstance() val defaultResourceProf = ResourceProfile.getOrCreateDefaultProfile(sc.getConf) for { - blacklist <- IndexedSeq(Set[String](), Set("a", "b", "c")) + excludelist <- IndexedSeq(Set[String](), Set("a", "b", "c")) numRequested <- 0 until 10 hostToLocalCount <- IndexedSeq( Map(defaultResourceProf.id -> Map.empty[String, Int]), @@ -73,14 +74,14 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc ) } { yarnSchedulerBackendExtended.setHostToLocalTaskCount(hostToLocalCount) - sched.setNodeBlacklist(blacklist) + sched.setNodeExcludeList(excludelist) val request = Map(defaultResourceProf -> numRequested) val req = yarnSchedulerBackendExtended.prepareRequestExecutors(request) assert(req.resourceProfileToTotalExecs(defaultResourceProf) === numRequested) - assert(req.nodeBlacklist === blacklist) + assert(req.excludedNodes === excludelist) val hosts = req.hostToLocalTaskCount(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID).keySet - assert(hosts.intersect(blacklist).isEmpty) + assert(hosts.intersect(excludelist).isEmpty) // Serialize to make sure serialization doesn't throw an error ser.serialize(req) } From 32b78d3795d5c4fd533b0267647977ed4f02ee49 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 31 Oct 2020 09:49:18 -0700 Subject: [PATCH 0365/1009] [SPARK-33290][SQL] REFRESH TABLE should invalidate cache even though the table itself may not be cached ### What changes were proposed in this pull request? In `CatalogImpl.refreshTable`, this moves the `uncacheQuery` call out of the condition `if (cache.nonEmpty)` so that it will be called whether the table itself is cached or not. ### Why are the changes needed? In the case like the following: ```sql CREATE TABLE t ...; CREATE VIEW t1 AS SELECT * FROM t; REFRESH TABLE t; ``` If the table `t` is refreshed, the view `t1` which is depending on `t` will not be invalidated. This could lead to incorrect result and is similar to [SPARK-19765](https://issues.apache.org/jira/browse/SPARK-19765). On the other hand, if we have: ```sql CREATE TABLE t ...; CACHE TABLE t; CREATE VIEW t1 AS SELECT * FROM t; REFRESH TABLE t; ``` Then the view `t1` will be refreshed. The behavior is somewhat inconsistent. ### Does this PR introduce _any_ user-facing change? Yes, with the change any cache that are depending on the table refreshed will be invalidated with the change. Previously this only happens if the table itself is cached. ### How was this patch tested? Added a new UT for the case. Closes #30187 from sunchao/SPARK-33290. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../spark/sql/internal/CatalogImpl.scala | 12 ++++-- .../apache/spark/sql/CachedTableSuite.scala | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 795775dd07561..3e216415c2815 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -504,6 +504,9 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { * If this table is cached as an InMemoryRelation, drop the original cached version and make the * new version cached lazily. * + * In addition, refreshing a table also invalidate all caches that have reference to the table + * in a cascading manner. This is to prevent incorrect result from the otherwise staled caches. + * * @group cachemgmt * @since 2.0.0 */ @@ -524,14 +527,17 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { // If this table is cached as an InMemoryRelation, drop the original // cached version and make the new version cached lazily. val cache = sparkSession.sharedState.cacheManager.lookupCachedData(table) + + // uncache the logical plan. + // note this is a no-op for the table itself if it's not cached, but will invalidate all + // caches referencing this table. + sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true) + if (cache.nonEmpty) { // save the cache name and cache level for recreation val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel - // uncache the logical plan. - sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true) - // recache with the same name and cache level. sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index adc725ed9b062..6313370476c93 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -1208,4 +1208,46 @@ class CachedTableSuite extends QueryTest with SQLTestUtils assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined) } } + + test("SPARK-33290: REFRESH TABLE should invalidate all caches referencing the table") { + withTable("t") { + withTempPath { path => + withTempView("tempView1", "tempView2") { + Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath) + sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'") + sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t") + sql("CACHE TABLE tempView2 AS SELECT i FROM tempView1") + checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a"))) + checkAnswer(sql("SELECT * FROM tempView2"), Seq(Row(1))) + + Utils.deleteRecursively(path) + sql("REFRESH TABLE tempView1") + checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty) + checkAnswer(sql("SELECT * FROM tempView2"), Seq.empty) + } + } + } + } + + test("SPARK-33290: querying temporary view after REFRESH TABLE fails with FNFE") { + withTable("t") { + withTempPath { path => + withTempView("tempView1") { + Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath) + sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'") + sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t") + checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a"))) + + Utils.deleteRecursively(path) + sql("REFRESH TABLE t") + checkAnswer(sql("SELECT * FROM t"), Seq.empty) + val exception = intercept[Exception] { + checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty) + } + assert(exception.getMessage.contains("FileNotFoundException")) + assert(exception.getMessage.contains("REFRESH TABLE")) + } + } + } + } } From c51e5fc14b9d1d120afcf0e53714ccba5063b71e Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 31 Oct 2020 10:01:31 -0700 Subject: [PATCH 0366/1009] [SPARK-33293][SQL] Refactor WriteToDataSourceV2Exec and reduce code duplication ### What changes were proposed in this pull request? Refactor `WriteToDataSourceV2Exec` via removing code duplication around write to table logic: - renamed `AtomicTableWriteExec` to `TableWriteExec` so that the table write logic in this trait can be modified and shared with `CreateTableAsSelectExec`, `ReplaceTableAsSelectExec`, `AtomicCreateTableAsSelectExec ` and `AtomicReplaceTableAsSelectExec`. - similar to the above, renamed `writeToStagedTable` to `writeToTable` in `TableWriteExec`. - extended `writeToTable` so that it can handle both staged table as well as non-staged table. ### Why are the changes needed? Simplify the logic and remove duplication, to make this piece of code easier to maintain. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass CIs with the existing test coverage. Closes #30193 from sunchao/SPARK-33293. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../v2/WriteToDataSourceV2Exec.scala | 98 ++++++------------- 1 file changed, 30 insertions(+), 68 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index 616e18ee85a6b..efa2c31e07602 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage} import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} @@ -66,9 +66,7 @@ case class CreateTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - ifNotExists: Boolean) extends V2TableWriteExec with SupportsV1Write { - - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper + ifNotExists: Boolean) extends TableWriteExec { override protected def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { @@ -79,30 +77,9 @@ case class CreateTableAsSelectExec( throw new TableAlreadyExistsException(ident) } - Utils.tryWithSafeFinallyAndFailureCallbacks({ - val schema = query.schema.asNullable - catalog.createTable( - ident, schema, partitioning.toArray, properties.asJava) match { - case table: SupportsWrite => - val info = LogicalWriteInfoImpl( - queryId = UUID.randomUUID().toString, - schema, - writeOptions) - val writeBuilder = table.newWriteBuilder(info) - - writeBuilder match { - case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write()) - case v2 => writeWithV2(v2.buildForBatch()) - } - - case _ => - // table does not support writes - throw new SparkException( - s"Table implementation does not support writes: ${ident.quoted}") - } - })(catchBlock = { - catalog.dropTable(ident) - }) + val table = catalog.createTable(ident, query.schema.asNullable, + partitioning.toArray, properties.asJava) + writeToTable(catalog, table, writeOptions, ident) } } @@ -123,7 +100,7 @@ case class AtomicCreateTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - ifNotExists: Boolean) extends AtomicTableWriteExec { + ifNotExists: Boolean) extends TableWriteExec { override protected def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { @@ -135,7 +112,7 @@ case class AtomicCreateTableAsSelectExec( } val stagedTable = catalog.stageCreate( ident, query.schema.asNullable, partitioning.toArray, properties.asJava) - writeToStagedTable(stagedTable, writeOptions, ident) + writeToTable(catalog, stagedTable, writeOptions, ident) } } @@ -157,9 +134,7 @@ case class ReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends V2TableWriteExec with SupportsV1Write { - - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper + orCreate: Boolean) extends TableWriteExec { override protected def run(): Seq[InternalRow] = { // Note that this operation is potentially unsafe, but these are the strict semantics of @@ -175,31 +150,9 @@ case class ReplaceTableAsSelectExec( } else if (!orCreate) { throw new CannotReplaceMissingTableException(ident) } - val schema = query.schema.asNullable - val createdTable = catalog.createTable( - ident, schema, partitioning.toArray, properties.asJava) - Utils.tryWithSafeFinallyAndFailureCallbacks({ - createdTable match { - case table: SupportsWrite => - val info = LogicalWriteInfoImpl( - queryId = UUID.randomUUID().toString, - schema, - writeOptions) - val writeBuilder = table.newWriteBuilder(info) - - writeBuilder match { - case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write()) - case v2 => writeWithV2(v2.buildForBatch()) - } - - case _ => - // table does not support writes - throw new SparkException( - s"Table implementation does not support writes: ${ident.quoted}") - } - })(catchBlock = { - catalog.dropTable(ident) - }) + val table = catalog.createTable( + ident, query.schema.asNullable, partitioning.toArray, properties.asJava) + writeToTable(catalog, table, writeOptions, ident) } } @@ -223,7 +176,7 @@ case class AtomicReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends AtomicTableWriteExec { + orCreate: Boolean) extends TableWriteExec { override protected def run(): Seq[InternalRow] = { val schema = query.schema.asNullable @@ -241,7 +194,7 @@ case class AtomicReplaceTableAsSelectExec( } else { throw new CannotReplaceMissingTableException(ident) } - writeToStagedTable(staged, writeOptions, ident) + writeToTable(catalog, staged, writeOptions, ident) } } @@ -479,15 +432,16 @@ object DataWritingSparkTask extends Logging { } } -private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1Write { +private[v2] trait TableWriteExec extends V2TableWriteExec with SupportsV1Write { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper - protected def writeToStagedTable( - stagedTable: StagedTable, + protected def writeToTable( + catalog: TableCatalog, + table: Table, writeOptions: CaseInsensitiveStringMap, ident: Identifier): Seq[InternalRow] = { Utils.tryWithSafeFinallyAndFailureCallbacks({ - stagedTable match { + table match { case table: SupportsWrite => val info = LogicalWriteInfoImpl( queryId = UUID.randomUUID().toString, @@ -499,17 +453,25 @@ private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1W case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write()) case v2 => writeWithV2(v2.buildForBatch()) } - stagedTable.commitStagedChanges() + + table match { + case st: StagedTable => st.commitStagedChanges() + case _ => + } writtenRows case _ => - // Table does not support writes - staged changes are also rolled back below. + // Table does not support writes - staged changes are also rolled back below if table + // is staging. throw new SparkException( s"Table implementation does not support writes: ${ident.quoted}") } })(catchBlock = { - // Failure rolls back the staged writes and metadata changes. - stagedTable.abortStagedChanges() + table match { + // Failure rolls back the staged writes and metadata changes. + case st: StagedTable => st.abortStagedChanges() + case _ => catalog.dropTable(ident) + } }) } } From 69c27f49acf2fe6fbc8335bde2aac4afd4188678 Mon Sep 17 00:00:00 2001 From: "wangguangxin.cn" Date: Sat, 31 Oct 2020 15:14:46 -0700 Subject: [PATCH 0367/1009] [SPARK-33306][SQL] Timezone is needed when cast date to string ### What changes were proposed in this pull request? When `spark.sql.legacy.typeCoercion.datetimeToString.enabled` is enabled, spark will cast date to string when compare date with string. In Spark3, timezone is needed when casting date to string as https://github.com/apache/spark/blob/72ad9dcd5d484a8dd64c08889de85ef9de2a6077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L309. Howerver, the timezone may not be set because `CastBase.needsTimeZone` returns false for this kind of casting. A simple way to reproduce this is ``` spark-shell --conf spark.sql.legacy.typeCoercion.datetimeToString.enabled=true ``` when we execute the following sql, ``` select a.d1 from (select to_date(concat('2000-01-0', id)) as d1 from range(1, 2)) a join (select concat('2000-01-0', id) as d2 from range(1, 2)) b on a.d1 = b.d2 ``` it will throw ``` java.util.NoSuchElementException: None.get at scala.None$.get(Option.scala:529) at scala.None$.get(Option.scala:527) at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId(datetimeExpressions.scala:56) at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId$(datetimeExpressions.scala:56) at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId$lzycompute(Cast.scala:253) at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId(Cast.scala:253) at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter$lzycompute(Cast.scala:287) at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter(Cast.scala:287) ``` ### Why are the changes needed? As described above, it's a bug here. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add more UT Closes #30213 from WangGuangxin/SPARK-33306. Authored-by: wangguangxin.cn Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/expressions/Cast.scala | 1 + .../org/apache/spark/sql/SQLQuerySuite.scala | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index bf759db59f3e6..610297cfd50b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -108,6 +108,7 @@ object Cast { */ def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match { case (StringType, TimestampType | DateType) => true + case (DateType, StringType) => true case (DateType, TimestampType) => true case (TimestampType, StringType) => true case (TimestampType, DateType) => true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a002f720a3c4a..0dd2a286772a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3691,6 +3691,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(sql("SELECT id FROM t WHERE (SELECT true)"), Row(0L)) } } + + test("SPARK-33306: Timezone is needed when cast Date to String") { + withTempView("t1", "t2") { + spark.sql("select to_date(concat('2000-01-0', id)) as d from range(1, 2)") + .createOrReplaceTempView("t1") + spark.sql("select concat('2000-01-0', id) as d from range(1, 2)") + .createOrReplaceTempView("t2") + val result = Date.valueOf("2000-01-01") + + checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result)) + withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") { + checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result)) + } + } + } } case class Foo(bar: Option[String]) From 56587f076d282ec96c4779faa63d7d9764cf0c3c Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sun, 1 Nov 2020 19:09:12 +0900 Subject: [PATCH 0368/1009] [SPARK-33310][PYTHON] Relax pyspark typing for sql str functions ### What changes were proposed in this pull request? Relax pyspark typing for sql str functions. These functions all pass the first argument through `_to_java_column`, such that a string or Column object is acceptable. ### Why are the changes needed? Convenience & ensuring the typing reflects the functionality ### Does this PR introduce _any_ user-facing change? Yes, a backwards-compatible increase in functionality. But I think typing support is unreleased, so possibly no change to released versions. ### How was this patch tested? Not tested. I am newish to Python typing with stubs, so someone should confirm this is the correct way to fix this. Closes #30209 from dhimmel/patch-1. Authored-by: Daniel Himmelstein Signed-off-by: HyukjinKwon --- python/pyspark/sql/functions.pyi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 1d048efcc3ca5..7ba3f07e17c19 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -155,11 +155,11 @@ def overlay( def substring(str: ColumnOrName, pos: int, len: int) -> Column: ... def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ... def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column: ... -def locate(substr: str, str: Column, pos: int = ...) -> Column: ... -def lpad(col: Column, len: int, pad: str) -> Column: ... -def rpad(col: Column, len: int, pad: str) -> Column: ... -def repeat(col: Column, n: int) -> Column: ... -def split(str: Column, pattern: str, limit: int = ...) -> Column: ... +def locate(substr: str, str: ColumnOrName, pos: int = ...) -> Column: ... +def lpad(col: ColumnOrName, len: int, pad: str) -> Column: ... +def rpad(col: ColumnOrName, len: int, pad: str) -> Column: ... +def repeat(col: ColumnOrName, n: int) -> Column: ... +def split(str: ColumnOrName, pattern: str, limit: int = ...) -> Column: ... def regexp_extract(str: ColumnOrName, pattern: str, idx: int) -> Column: ... def regexp_replace(str: ColumnOrName, pattern: str, replacement: str) -> Column: ... def initcap(col: ColumnOrName) -> Column: ... From b8a440f09880c596325dd9e6caae6b470be76a8f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sun, 1 Nov 2020 20:28:12 +0900 Subject: [PATCH 0369/1009] [SPARK-33277][PYSPARK][SQL] Use ContextAwareIterator to stop consuming after the task ends ### What changes were proposed in this pull request? As the Python evaluation consumes the parent iterator in a separate thread, it could consume more data from the parent even after the task ends and the parent is closed. Thus, we should use `ContextAwareIterator` to stop consuming after the task ends. ### Why are the changes needed? Python/Pandas UDF right after off-heap vectorized reader could cause executor crash. E.g.,: ```py spark.range(0, 100000, 1, 1).write.parquet(path) spark.conf.set("spark.sql.columnVector.offheap.enabled", True) def f(x): return 0 fUdf = udf(f, LongType()) spark.read.parquet(path).select(fUdf('id')).head() ``` This is because, the Python evaluation consumes the parent iterator in a separate thread and it consumes more data from the parent even after the task ends and the parent is closed. If an off-heap column vector exists in the parent iterator, it could cause segmentation fault which crashes the executor. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests, and manually. Closes #30177 from ueshin/issues/SPARK-33277/python_pandas_udf. Authored-by: Takuya UESHIN Signed-off-by: HyukjinKwon --- python/pyspark/sql/tests/test_pandas_map.py | 22 +++++++++++++++++++ .../sql/tests/test_pandas_udf_scalar.py | 19 ++++++++++++++++ python/pyspark/sql/tests/test_udf.py | 20 +++++++++++++++++ .../sql/execution/python/EvalPythonExec.scala | 18 ++++++++++++++- .../execution/python/MapInPandasExec.scala | 7 +++--- 5 files changed, 82 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index 3ca437f75fc23..2cad30c7294d4 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -15,9 +15,12 @@ # limitations under the License. # import os +import shutil +import tempfile import time import unittest +from pyspark.sql import Row from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message @@ -112,6 +115,25 @@ def func(iterator): expected = df.collect() self.assertEquals(actual, expected) + # SPARK-33277 + def test_map_in_pandas_with_column_vector(self): + path = tempfile.mkdtemp() + shutil.rmtree(path) + + try: + self.spark.range(0, 200000, 1, 1).write.parquet(path) + + def func(iterator): + for pdf in iterator: + yield pd.DataFrame({'id': [0] * len(pdf)}) + + for offheap in ["true", "false"]: + with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): + self.assertEquals( + self.spark.read.parquet(path).mapInPandas(func, 'id long').head(), Row(0)) + finally: + shutil.rmtree(path) + if __name__ == "__main__": from pyspark.sql.tests.test_pandas_map import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index 6d325c9085ce1..c2c8f6f697c4b 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -1137,6 +1137,25 @@ def test_datasource_with_udf(self): finally: shutil.rmtree(path) + # SPARK-33277 + def test_pandas_udf_with_column_vector(self): + path = tempfile.mkdtemp() + shutil.rmtree(path) + + try: + self.spark.range(0, 200000, 1, 1).write.parquet(path) + + @pandas_udf(LongType()) + def udf(x): + return pd.Series([0] * len(x)) + + for offheap in ["true", "false"]: + with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): + self.assertEquals( + self.spark.read.parquet(path).select(udf('id')).head(), Row(0)) + finally: + shutil.rmtree(path) + if __name__ == "__main__": from pyspark.sql.tests.test_pandas_udf_scalar import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index a7dcbfd32ac1c..c2e95fd41c5b4 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -674,6 +674,26 @@ def test_udf_cache(self): self.assertEqual(df.select(udf(func)("id"))._jdf.queryExecution() .withCachedData().getClass().getSimpleName(), 'InMemoryRelation') + # SPARK-33277 + def test_udf_with_column_vector(self): + path = tempfile.mkdtemp() + shutil.rmtree(path) + + try: + self.spark.range(0, 100000, 1, 1).write.parquet(path) + + def f(x): + return 0 + + fUdf = udf(f, LongType()) + + for offheap in ["true", "false"]: + with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): + self.assertEquals( + self.spark.read.parquet(path).select(fUdf('id')).head(), Row(0)) + finally: + shutil.rmtree(path) + class UDFInitializationTests(unittest.TestCase): def tearDown(self): diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala index 298d63478b63e..89c7716f7c1b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala @@ -89,6 +89,7 @@ trait EvalPythonExec extends UnaryExecNode { inputRDD.mapPartitions { iter => val context = TaskContext.get() + val contextAwareIterator = new ContextAwareIterator(iter, context) // The queue used to buffer input rows so we can drain it to // combine input with output from Python. @@ -120,7 +121,7 @@ trait EvalPythonExec extends UnaryExecNode { }.toSeq) // Add rows to queue to join later with the result. - val projectedRowIter = iter.map { inputRow => + val projectedRowIter = contextAwareIterator.map { inputRow => queue.add(inputRow.asInstanceOf[UnsafeRow]) projection(inputRow) } @@ -137,3 +138,18 @@ trait EvalPythonExec extends UnaryExecNode { } } } + +/** + * A TaskContext aware iterator. + * + * As the Python evaluation consumes the parent iterator in a separate thread, + * it could consume more data from the parent even after the task ends and the parent is closed. + * Thus, we should use ContextAwareIterator to stop consuming after the task ends. + */ +class ContextAwareIterator[IN](iter: Iterator[IN], context: TaskContext) extends Iterator[IN] { + + override def hasNext: Boolean = + !context.isCompleted() && !context.isInterrupted() && iter.hasNext + + override def next(): IN = iter.next() +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala index 2bb808119c0ae..7fc18f885a2d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala @@ -61,16 +61,17 @@ case class MapInPandasExec( val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf) val outputTypes = child.schema + val context = TaskContext.get() + val contextAwareIterator = new ContextAwareIterator(inputIter, context) + // Here we wrap it via another row so that Python sides understand it // as a DataFrame. - val wrappedIter = inputIter.map(InternalRow(_)) + val wrappedIter = contextAwareIterator.map(InternalRow(_)) // DO NOT use iter.grouped(). See BatchIterator. val batchIter = if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter) - val context = TaskContext.get() - val columnarBatchIter = new ArrowPythonRunner( chainedFunc, PythonEvalType.SQL_MAP_PANDAS_ITER_UDF, From 2b6dfa5f7bdd2f2ae7b4d53bb811ccb8563377c5 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Sun, 1 Nov 2020 23:57:57 +0800 Subject: [PATCH 0370/1009] [SPARK-20044][UI] Support Spark UI behind front-end reverse proxy using a path prefix Revert proxy url MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Allow to run the Spark web UI behind a reverse proxy with URLs prefixed by a context root, like www.mydomain.com/spark. In particular, this allows to access multiple Spark clusters through the same virtual host, only distinguishing them by context root, like www.mydomain.com/cluster1, www.mydomain.com/cluster2, and it allows to run the Spark UI in a common cookie domain (for SSO) with other services. ### Why are the changes needed? This PR is to take over https://github.com/apache/spark/pull/17455. After changes, Spark allows showing customized prefix URL in all the `href` links of the HTML pages. ### Does this PR introduce _any_ user-facing change? Yes, all the links of UI pages will be contains the value of `spark.ui.reverseProxyUrl` if it is configurated. ### How was this patch tested? New HTML Unit tests in MasterSuite Manual UI testing for master, worker and app UI with an nginx proxy Spark config: ``` spark.ui.port 8080 spark.ui.reverseProxy=true spark.ui.reverseProxyUrl=/path/to/spark/ ``` nginx config: ``` server { listen 9000; set $SPARK_MASTER http://127.0.0.1:8080; # split spark UI path into prefix and local path within master UI location ~ ^(/path/to/spark/) { # strip prefix when forwarding request rewrite /path/to/spark(/.*) $1 break; #rewrite /path/to/spark/ "/" ; # forward to spark master UI proxy_pass $SPARK_MASTER; proxy_intercept_errors on; error_page 301 302 307 = handle_redirects; } location handle_redirects { set $saved_redirect_location '$upstream_http_location'; proxy_pass $saved_redirect_location; } } ``` Closes #29820 from gengliangwang/revertProxyURL. Lead-authored-by: Gengliang Wang Co-authored-by: Oliver Köth Signed-off-by: Gengliang Wang --- .../scala/org/apache/spark/SparkContext.scala | 4 +- .../apache/spark/deploy/master/Master.scala | 8 +- .../spark/deploy/worker/ExecutorRunner.scala | 3 +- .../apache/spark/deploy/worker/Worker.scala | 9 +- .../scala/org/apache/spark/ui/UIUtils.scala | 3 +- .../spark/deploy/master/MasterSuite.scala | 101 ++++++++++++++++-- docs/configuration.md | 25 ++++- 7 files changed, 140 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 501e865c4105a..b35768222437c 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -570,7 +570,9 @@ class SparkContext(config: SparkConf) extends Logging { _applicationAttemptId = _taskScheduler.applicationAttemptId() _conf.set("spark.app.id", _applicationId) if (_conf.get(UI_REVERSE_PROXY)) { - System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId) + val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + + "/proxy/" + _applicationId + System.setProperty("spark.ui.proxyBase", proxyUrl) } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index ceeb01149f5db..a582a5d045855 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -147,7 +147,13 @@ private[deploy] class Master( webUi.bind() masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}" if (reverseProxy) { - masterWebUiUrl = conf.get(UI_REVERSE_PROXY_URL).orElse(Some(masterWebUiUrl)).get + val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/")) + if (uiReverseProxyUrl.nonEmpty) { + System.setProperty("spark.ui.proxyBase", uiReverseProxyUrl.get) + // If the master URL has a path component, it must end with a slash. + // Otherwise the browser generates incorrect relative links + masterWebUiUrl = uiReverseProxyUrl.get + "/" + } webUi.addProxy() logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " + s"Applications UIs are available at $masterWebUiUrl") diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index e4fcae13a2f89..2e26ccf671d88 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -171,7 +171,8 @@ private[deploy] class ExecutorRunner( // Add webUI log urls val baseUrl = if (conf.get(UI_REVERSE_PROXY)) { - s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" + conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + + s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" } else { s"$webUiScheme$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType=" } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 0660dbdafd605..a6092f637a9cb 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -276,7 +276,14 @@ private[deploy] class Worker( master = Some(masterRef) connected = true if (reverseProxy) { - logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId") + logInfo("WorkerWebUI is available at %s/proxy/%s".format( + activeMasterWebUiUrl.stripSuffix("/"), workerId)) + // if reverseProxyUrl is not set, then we continue to generate relative URLs + // starting with "/" throughout the UI and do not use activeMasterWebUiUrl + val proxyUrl = conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + // In the method `UIUtils.makeHref`, the URL segment "/proxy/$worker_id" will be appended + // after `proxyUrl`, so no need to set the worker ID in the `spark.ui.proxyBase` here. + System.setProperty("spark.ui.proxyBase", proxyUrl) } // Cancel any outstanding re-registration attempts because we found a new master cancelLastRegistrationRetry() diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index dba6f8e8440cb..5e3406037a72b 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -639,7 +639,8 @@ private[spark] object UIUtils extends Logging { */ def makeHref(proxy: Boolean, id: String, origHref: String): String = { if (proxy) { - s"/proxy/$id" + val proxyPrefix = sys.props.getOrElse("spark.ui.proxyBase", "") + proxyPrefix + "/proxy/" + id } else { origHref } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 3329300b64d13..a46799df069d6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -143,6 +143,10 @@ class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkCo class MasterSuite extends SparkFunSuite with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter { + // regex to extract worker links from the master webui HTML + // groups represent URL and worker ID + val WORKER_LINK_RE = """
    \s*(worker-.+?)\s*""".r + private var _master: Master = _ after { @@ -320,10 +324,10 @@ class MasterSuite extends SparkFunSuite val conf = new SparkConf() val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5.seconds), interval(100.milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") - .getLines().mkString("\n") + val json = Source.fromURL(s"$masterUrl/json").getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => @@ -332,6 +336,16 @@ class MasterSuite extends SparkFunSuite .getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList + workerLinks.size should be (2) + workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) => + val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + } } } finally { localCluster.stop() @@ -340,31 +354,106 @@ class MasterSuite extends SparkFunSuite test("master/worker web ui available with reverseProxy") { implicit val formats = org.json4s.DefaultFormats - val reverseProxyUrl = "http://localhost:8080" + val conf = new SparkConf() + conf.set(UI_REVERSE_PROXY, true) + val localCluster = new LocalSparkCluster(2, 2, 512, conf) + localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" + try { + eventually(timeout(5.seconds), interval(100.milliseconds)) { + val json = Source.fromURL(s"$masterUrl/json") + .getLines().mkString("\n") + val JArray(workers) = (parse(json) \ "workers") + workers.size should be (2) + workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master + val JString(workerId) = workerSummaryJson \ "id" + val url = s"$masterUrl/proxy/${workerId}/json" + val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) + (workerResponse \ "cores").extract[Int] should be (2) + } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + html should include ("""href="/static""") + html should include ("""src="/static""") + verifyWorkerUI(html, masterUrl) + } + } finally { + localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + test("master/worker web ui available behind front-end reverseProxy") { + implicit val formats = org.json4s.DefaultFormats + val reverseProxyUrl = "http://proxyhost:8080/path/to/spark" val conf = new SparkConf() conf.set(UI_REVERSE_PROXY, true) conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl) val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5.seconds), interval(100.milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master val JString(workerId) = workerSummaryJson \ "id" - val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json" + val url = s"$masterUrl/proxy/${workerId}/json" val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) - (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl) + (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/") } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith + (s"$reverseProxyUrl/proxy/worker-") + System.setProperty("spark.ui.proxyBase", reverseProxyUrl) + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + verifyStaticResourcesServedByProxy(html, reverseProxyUrl) + verifyWorkerUI(html, masterUrl, reverseProxyUrl) } } finally { localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + private def verifyWorkerUI(masterHtml: String, masterUrl: String, + reverseProxyUrl: String = ""): Unit = { + val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList + workerLinks.size should be (2) + workerLinks foreach { + case WORKER_LINK_RE(workerUrl, workerId) => + workerUrl should be (s"$reverseProxyUrl/proxy/$workerId") + // there is no real front-end proxy as defined in $reverseProxyUrl + // construct url directly targeting the master + val url = s"$masterUrl/proxy/$workerId/" + System.setProperty("spark.ui.proxyBase", workerUrl) + val workerHtml = Source.fromURL(url).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + verifyStaticResourcesServedByProxy(workerHtml, workerUrl) + case _ => fail // make sure we don't accidentially skip the tests } } + private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = { + html should not include ("""href="/static""") + html should include (s"""href="$proxyUrl/static""") + html should not include ("""src="/static""") + html should include (s"""src="$proxyUrl/static""") + } + test("basic scheduling - spread out") { basicScheduling(spreadOut = true) } diff --git a/docs/configuration.md b/docs/configuration.md index 232ea4079d436..aab18f23a083f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1193,8 +1193,29 @@ Apart from these, the following properties are also available, and may be useful spark.ui.reverseProxyUrl - This is the URL where your proxy is running. This URL is for proxy which is running in front of Spark Master. This is useful when running proxy for authentication e.g. OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to reach your proxy. - + If the Spark UI should be served through another front-end reverse proxy, this is the URL + for accessing the Spark master UI through that reverse proxy. + This is useful when running proxy for authentication e.g. an OAuth proxy. The URL may contain + a path prefix, like http://mydomain.com/path/to/spark/, allowing you to serve the + UI for multiple Spark clusters and other web applications through the same virtual host and + port. + Normally, this should be an absolute URL including scheme (http/https), host and port. + It is possible to specify a relative URL starting with "/" here. In this case, all URLs + generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still + work, as the entire Spark UI is served through the same host and port. +
    The setting affects link generation in the Spark UI, but the front-end reverse proxy + is responsible for +
      +
    • stripping a path prefix before forwarding the request,
    • +
    • rewriting redirects which point directly to the Spark master,
    • +
    • redirecting access from http://mydomain.com/path/to/spark to + http://mydomain.com/path/to/spark/ (trailing slash after path prefix); otherwise + relative links on the master page do not work correctly.
    • +
    + This setting affects all the workers and application UIs running in the cluster and must be set + identically on all the workers, drivers and masters. In is only effective when + spark.ui.reverseProxy is turned on. This setting is not needed when the Spark + master web UI is directly reachable. 2.1.0 From d71b2febaf536113ffe4ad0626d1d3b4098b98a5 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 2 Nov 2020 08:54:08 +0900 Subject: [PATCH 0371/1009] [SPARK-30663][SPARK-33313][TESTS][R] Drop testthat 1.x support and add testthat 3.x support ### What changes were proposed in this pull request? This PR modifies `R/pkg/tests/run-all.R` by: - Removing `testthat` 1.x support, as Jenkins has been upgraded to 2.x with SPARK-30637 and this code is no longer relevant. - Add `testthat` 3.x support to avoid AppVeyor failures. ### Why are the changes needed? Currently used internal API has been removed in the latest `testthat` release. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tests executed against `testthat == 2.3.2` and `testthat == 3.0.0` Closes #30219 from zero323/SPARK-33313. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/tests/run-all.R | 44 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 3043df0f12075..f9e266eb4e014 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -60,25 +60,37 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) { # set random seed for predictable results. mostly for base's sample() in tree and classification set.seed(42) - # TODO (SPARK-30663) To be removed once testthat 1.x is removed from all builds - if (packageVersion("testthat")$major <= 1) { - # testthat 1.x - test_runner <- testthat:::run_tests - reporter <- "summary" + if (packageVersion("testthat")$major <= 1) stop("testhat 1.x is not supported") + + test_runner <- if (packageVersion("testthat")$major == 2) { + # testthat >= 2.0.0, < 3.0.0 + function(path, package, reporter, filter) { + testthat:::test_package_dir( + test_path = path, + package = package, + filter = filter, + reporter = reporter + ) + } } else { - # testthat >= 2.0.0 - test_runner <- testthat:::test_package_dir - dir.create("target/test-reports", showWarnings = FALSE) - reporter <- MultiReporter$new(list( - SummaryReporter$new(), - JunitReporter$new(file = "target/test-reports/test-results.xml") - )) + # testthat >= 3.0.0 + testthat::test_dir } - test_runner("SparkR", - file.path(sparkRDir, "pkg", "tests", "fulltests"), - NULL, - reporter) + dir.create("target/test-reports", showWarnings = FALSE) + reporter <- MultiReporter$new(list( + SummaryReporter$new(), + JunitReporter$new( + file = file.path(getwd(), "target/test-reports/test-results.xml") + ) + )) + + test_runner( + path = file.path(sparkRDir, "pkg", "tests", "fulltests"), + package = "SparkR", + reporter = reporter, + filter = NULL + ) } SparkR:::uninstallDownloadedSpark() From 6226ccc092c0e24487ee80dc169eb15b32825bce Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 2 Nov 2020 05:03:41 +0000 Subject: [PATCH 0372/1009] [SPARK-33095] Follow up, support alter table column rename ### What changes were proposed in this pull request? Support rename column for mysql dialect. ### Why are the changes needed? At the moment, it does not work for mysql version 5.x. So, we should throw proper exception for that case. ### Does this PR introduce _any_ user-facing change? Yes, `column rename` with mysql dialect should work correctly. ### How was this patch tested? Added tests for rename column. Ran the tests to pass with both versions of mysql. * `export MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31` * `export MYSQL_DOCKER_IMAGE_NAME=mysql:8.0` Closes #30142 from ScrapCodes/mysql-dialect-rename. Authored-by: Prashant Sharma Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/MySQLIntegrationSuite.scala | 28 +++++++++++++++--- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 29 ++++++++++++++++++- .../datasources/jdbc/JdbcUtils.scala | 9 +++--- .../apache/spark/sql/jdbc/JdbcDialects.scala | 13 +++++++-- .../apache/spark/sql/jdbc/MySQLDialect.scala | 19 ++++++++++++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index ec958cd55c943..6cf0f56ee7eeb 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -50,7 +50,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { override val jdbcPort: Int = 3306 override def getJdbcUrl(ip: String, port: Int): String = - s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass" + s"jdbc:mysql://$ip:$port/" + + s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false" } override def sparkConf: SparkConf = super.sparkConf @@ -59,7 +60,11 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { override val connectionTimeout = timeout(7.minutes) - override def dataPreparation(conn: Connection): Unit = {} + private var mySQLVersion = -1 + + override def dataPreparation(conn: Connection): Unit = { + mySQLVersion = conn.getMetaData.getDatabaseMajorVersion + } override def testUpdateColumnType(tbl: String): Unit = { sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") @@ -77,11 +82,26 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) } + override def testRenameColumn(tbl: String): Unit = { + assert(mySQLVersion > 0) + if (mySQLVersion < 8) { + // Rename is unsupported for mysql versions < 8.0. + val exception = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED") + } + assert(exception.getCause != null, s"Wrong exception thrown: $exception") + val msg = exception.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage + assert(msg.contains("Rename column is only supported for MySQL version 8.0 and above.")) + } else { + super.testRenameColumn(tbl) + } + } + override def testUpdateColumnNullability(tbl: String): Unit = { - sql("CREATE TABLE mysql.alt_table (ID STRING NOT NULL) USING _") + sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _") // Update nullability is unsupported for mysql db. val msg = intercept[AnalysisException] { - sql("ALTER TABLE mysql.alt_table ALTER COLUMN ID DROP NOT NULL") + sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL") }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage assert(msg.contains("UpdateColumnNullability is not supported")) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 8419db7784e88..92af29d9c9467 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.tags.DockerTest @DockerTest -trait V2JDBCTest extends SharedSparkSession { +private[v2] trait V2JDBCTest extends SharedSparkSession { val catalogName: String // dialect specific update column type test def testUpdateColumnType(tbl: String): Unit @@ -46,6 +46,14 @@ trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Cannot update missing field bad_column")) } + def testRenameColumn(tbl: String): Unit = { + sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED") + val t = spark.table(s"$tbl") + val expectedSchema = new StructType().add("RENAMED", StringType, nullable = true) + .add("ID1", StringType, nullable = true).add("ID2", StringType, nullable = true) + assert(t.schema === expectedSchema) + } + test("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _") @@ -110,6 +118,24 @@ trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } + test("SPARK-33034: ALTER TABLE ... rename column") { + withTable(s"$catalogName.alt_table") { + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," + + s" ID1 STRING NOT NULL, ID2 STRING NOT NULL) USING _") + testRenameColumn(s"$catalogName.alt_table") + // Rename to already existing column + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2") + }.getMessage + assert(msg.contains("Cannot rename column, because ID2 already exists")) + } + // Rename a column in a not existing table + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalogName.not_existing_table RENAME COLUMN ID TO C") + }.getMessage + assert(msg.contains("Table not found")) + } + test("SPARK-33034: ALTER TABLE ... update column nullability") { withTable(s"$catalogName.alt_table") { testUpdateColumnNullability(s"$catalogName.alt_table") @@ -121,3 +147,4 @@ trait V2JDBCTest extends SharedSparkSession { assert(msg.contains("Table not found")) } } + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 24e380e3be3e1..9aaa55980436e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -895,11 +895,12 @@ object JdbcUtils extends Logging { changes: Seq[TableChange], options: JDBCOptions): Unit = { val dialect = JdbcDialects.get(options.url) + val metaData = conn.getMetaData if (changes.length == 1) { - executeStatement(conn, options, dialect.alterTable(tableName, changes)(0)) + executeStatement(conn, options, dialect.alterTable(tableName, changes, + metaData.getDatabaseMajorVersion)(0)) } else { - val metadata = conn.getMetaData - if (!metadata.supportsTransactions) { + if (!metaData.supportsTransactions) { throw new SQLFeatureNotSupportedException("The target JDBC server does not support " + "transaction and can only support ALTER TABLE with a single action.") } else { @@ -907,7 +908,7 @@ object JdbcUtils extends Logging { val statement = conn.createStatement try { statement.setQueryTimeout(options.queryTimeout) - for (sql <- dialect.alterTable(tableName, changes)) { + for (sql <- dialect.alterTable(tableName, changes, metaData.getDatabaseMajorVersion)) { statement.executeUpdate(sql) } conn.commit() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index e0703195051dc..0a857b99966fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -205,7 +205,10 @@ abstract class JdbcDialect extends Serializable { * @param changes Changes to apply to the table. * @return The SQL statements to use for altering the table. */ - def alterTable(tableName: String, changes: Seq[TableChange]): Array[String] = { + def alterTable( + tableName: String, + changes: Seq[TableChange], + dbMajorVersion: Int): Array[String] = { val updateClause = ArrayBuilder.make[String] for (change <- changes) { change match { @@ -215,7 +218,7 @@ abstract class JdbcDialect extends Serializable { updateClause += getAddColumnQuery(tableName, name(0), dataType) case rename: RenameColumn if rename.fieldNames.length == 1 => val name = rename.fieldNames - updateClause += getRenameColumnQuery(tableName, name(0), rename.newName) + updateClause += getRenameColumnQuery(tableName, name(0), rename.newName, dbMajorVersion) case delete: DeleteColumn if delete.fieldNames.length == 1 => val name = delete.fieldNames updateClause += getDeleteColumnQuery(tableName, name(0)) @@ -237,7 +240,11 @@ abstract class JdbcDialect extends Serializable { def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = s"ALTER TABLE $tableName ADD COLUMN ${quoteIdentifier(columnName)} $dataType" - def getRenameColumnQuery(tableName: String, columnName: String, newName: String): String = + def getRenameColumnQuery( + tableName: String, + columnName: String, + newName: String, + dbMajorVersion: Int): String = s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" + s" ${quoteIdentifier(newName)}" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index a516e9e76ef31..942cdc9619b56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -57,6 +57,25 @@ private case object MySQLDialect extends JdbcDialect { s"ALTER TABLE $tableName MODIFY COLUMN ${quoteIdentifier(columnName)} $newDataType" } + // See Old Syntax: https://dev.mysql.com/doc/refman/5.6/en/alter-table.html + // According to https://dev.mysql.com/worklog/task/?id=10761 old syntax works for + // both versions of MySQL i.e. 5.x and 8.0 + // The old syntax requires us to have type definition. Since we do not have type + // information, we throw the exception for old version. + override def getRenameColumnQuery( + tableName: String, + columnName: String, + newName: String, + dbMajorVersion: Int): String = { + if (dbMajorVersion >= 8) { + s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" + + s" ${quoteIdentifier(newName)}" + } else { + throw new SQLFeatureNotSupportedException( + s"Rename column is only supported for MySQL version 8.0 and above.") + } + } + // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html // require to have column data type to change the column nullability // ALTER TABLE tbl_name MODIFY [COLUMN] col_name column_definition From e52b858ef71fd2f05e3653e15e91252c04fcefd4 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Mon, 2 Nov 2020 06:44:07 +0000 Subject: [PATCH 0373/1009] [SPARK-33027][SQL] Add DisableUnnecessaryBucketedScan rule to AQE ### What changes were proposed in this pull request? As a followup comment from https://github.com/apache/spark/pull/29804#issuecomment-700650620 , here we add add the physical plan rule DisableUnnecessaryBucketedScan into AQE AdaptiveSparkPlanExec.queryStagePreparationRules, to make auto bucketed scan work with AQE. The change is mostly in: * `AdaptiveSparkPlanExec.scala`: add physical plan rule `DisableUnnecessaryBucketedScan` * `DisableUnnecessaryBucketedScan.scala`: propagate logical plan link for the file source scan exec operator, otherwise we lose the logical plan link information when AQE is enabled, and will get exception [here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala#L176). (for example, for query `SELECT * FROM bucketed_table` with AQE is enabled) * `DisableUnnecessaryBucketedScanSuite.scala`: add new test suite for AQE enabled - `DisableUnnecessaryBucketedScanWithoutHiveSupportSuiteAE`, and changed some of tests to use `AdaptiveSparkPlanHelper.find/collect`, to make the plan verification work when AQE enabled. ### Why are the changes needed? It's reasonable to add the support to allow disabling unnecessary bucketed scan with AQE is enabled, this helps optimize the query when AQE is enabled. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit test in `DisableUnnecessaryBucketedScanSuite`. Closes #30200 from c21/auto-bucket-aqe. Authored-by: Cheng Su Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveSparkPlanExec.scala | 12 ++++---- .../DisableUnnecessaryBucketedScan.scala | 4 ++- .../DisableUnnecessaryBucketedScanSuite.scala | 28 +++++++++++++++---- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index a4a58dfe1de53..4ae33311d5a24 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule} import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._ +import org.apache.spark.sql.execution.bucketing.DisableUnnecessaryBucketedScan import org.apache.spark.sql.execution.command.DataWritingCommandExec import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange._ @@ -82,17 +83,14 @@ case class AdaptiveSparkPlanExec( // The logical plan optimizer for re-optimizing the current logical plan. @transient private val optimizer = new AQEOptimizer(conf) - @transient private val removeRedundantProjects = RemoveRedundantProjects - @transient private val removeRedundantSorts = RemoveRedundantSorts - @transient private val ensureRequirements = EnsureRequirements - // A list of physical plan rules to be applied before creation of query stages. The physical // plan should reach a final status of query stages (i.e., no more addition or removal of // Exchange nodes) after running these rules. private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq( - removeRedundantProjects, - removeRedundantSorts, - ensureRequirements + RemoveRedundantProjects, + RemoveRedundantSorts, + EnsureRequirements, + DisableUnnecessaryBucketedScan ) ++ context.session.sessionState.queryStagePrepRules // A list of physical optimizer rules to be applied to a new stage before its execution. These diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala index 2bbd5f5d969dc..bb59f44abc761 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala @@ -101,7 +101,9 @@ object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] { case scan: FileSourceScanExec => if (isBucketedScanWithoutFilter(scan)) { if (!withInterestingPartition || (withExchange && withAllowedNode)) { - scan.copy(disableBucketedScan = true) + val nonBucketedScan = scan.copy(disableBucketedScan = true) + scan.logicalLink.foreach(nonBucketedScan.setLogicalLink) + nonBucketedScan } else { scan } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala index 70b74aed40eca..1fdd3be88f782 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala @@ -21,6 +21,8 @@ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} +import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION @@ -28,7 +30,8 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite extends DisableUnnecessaryBucketedScanSuite - with SharedSparkSession { + with SharedSparkSession + with DisableAdaptiveExecutionSuite { protected override def beforeAll(): Unit = { super.beforeAll() @@ -36,7 +39,22 @@ class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite } } -abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTestUtils { +class DisableUnnecessaryBucketedScanWithoutHiveSupportSuiteAE + extends DisableUnnecessaryBucketedScanSuite + with SharedSparkSession + with EnableAdaptiveExecutionSuite { + + protected override def beforeAll(): Unit = { + super.beforeAll() + assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") + } +} + +abstract class DisableUnnecessaryBucketedScanSuite + extends QueryTest + with SQLTestUtils + with AdaptiveSparkPlanHelper { + import testImplicits._ private lazy val df1 = @@ -51,7 +69,7 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = { val plan = sql(query).queryExecution.executedPlan - val bucketedScan = plan.collect { case s: FileSourceScanExec if s.bucketedScan => s } + val bucketedScan = collect(plan) { case s: FileSourceScanExec if s.bucketedScan => s } assert(bucketedScan.length == expectedNumBucketedScan) } @@ -230,14 +248,14 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes assertCached(spark.table("t1")) // Verify cached bucketed table scan not disabled - val partitioning = spark.table("t1").queryExecution.executedPlan + val partitioning = stripAQEPlan(spark.table("t1").queryExecution.executedPlan) .outputPartitioning assert(partitioning match { case HashPartitioning(Seq(column: AttributeReference), 8) if column.name == "i" => true case _ => false }) val aggregateQueryPlan = sql("SELECT SUM(i) FROM t1 GROUP BY i").queryExecution.executedPlan - assert(aggregateQueryPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) + assert(find(aggregateQueryPlan)(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) } } } From 789d19cab5caa20d35dcdd700ed7fe53ca1893fe Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 2 Nov 2020 08:34:50 -0800 Subject: [PATCH 0374/1009] [SPARK-33319][SQL][TEST] Add all built-in SerDes to HiveSerDeReadWriteSuite ### What changes were proposed in this pull request? This pr add all built-in SerDes to `HiveSerDeReadWriteSuite`. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe ### Why are the changes needed? We will upgrade Parquet, ORC and Avro, need to ensure compatibility. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #30228 from wangyum/SPARK-33319. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../hive/execution/HiveSerDeReadWriteSuite.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala index ac9ae8c9229db..aae49f70ca93f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala @@ -135,11 +135,12 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS } // MAP withTable("hive_serde") { - hiveClient.runSqlHive(s"CREATE TABLE hive_serde (c1 MAP ) STORED AS $fileFormat") - hiveClient.runSqlHive("INSERT INTO TABLE hive_serde SELECT MAP(1, 'a') FROM (SELECT 1) t") - checkAnswer(spark.table("hive_serde"), Row(Map(1 -> "a"))) - spark.sql("INSERT INTO TABLE hive_serde SELECT MAP(2, 'b')") - checkAnswer(spark.table("hive_serde"), Seq(Row(Map(1 -> "a")), Row(Map(2 -> "b")))) + hiveClient.runSqlHive( + s"CREATE TABLE hive_serde (c1 MAP ) STORED AS $fileFormat") + hiveClient.runSqlHive("INSERT INTO TABLE hive_serde SELECT MAP('1', 'a') FROM (SELECT 1) t") + checkAnswer(spark.table("hive_serde"), Row(Map("1" -> "a"))) + spark.sql("INSERT INTO TABLE hive_serde SELECT MAP('2', 'b')") + checkAnswer(spark.table("hive_serde"), Seq(Row(Map("1" -> "a")), Row(Map("2" -> "b")))) } // STRUCT @@ -154,7 +155,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS } } - Seq("PARQUET", "ORC", "TEXTFILE").foreach { fileFormat => + Seq("SEQUENCEFILE", "TEXTFILE", "RCFILE", "ORC", "PARQUET", "AVRO").foreach { fileFormat => test(s"Read/Write Hive $fileFormat serde table") { // Numeric Types checkNumericTypes(fileFormat, "TINYINT", 2) From eecebd03023bdde5084b7f518d709e304eff7228 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 2 Nov 2020 10:07:18 -0800 Subject: [PATCH 0375/1009] [SPARK-33306][SQL][FOLLOWUP] Group DateType and TimestampType together in `needsTimeZone()` ### What changes were proposed in this pull request? In the PR, I propose to group `DateType` and `TimestampType` together in checking time zone needs in the `Cast.needsTimeZone()` method. ### Why are the changes needed? To improve code maintainability. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By the existing test `"SPARK-33306: Timezone is needed when cast Date to String"`. Closes #30223 from MaxGekk/WangGuangxin-SPARK-33306-followup. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 610297cfd50b6..48a9e19c9d953 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -108,9 +108,8 @@ object Cast { */ def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match { case (StringType, TimestampType | DateType) => true - case (DateType, StringType) => true + case (TimestampType | DateType, StringType) => true case (DateType, TimestampType) => true - case (TimestampType, StringType) => true case (TimestampType, DateType) => true case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType) case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) => From bdabf60fb4a61b0eef95144f2c54477a10ea849f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 2 Nov 2020 10:10:24 -0800 Subject: [PATCH 0376/1009] [SPARK-33299][SQL][DOCS] Don't mention schemas in JSON format in docs for `from_json` ### What changes were proposed in this pull request? Remove the JSON formatted schema from comments for `from_json()` in Scala/Python APIs. Closes #30201 ### Why are the changes needed? Schemas in JSON format is internal (not documented). It shouldn't be recommenced for usage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By linters. Closes #30226 from MaxGekk/from_json-common-schema-parsing-2. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/functions.py | 3 +-- .../src/main/scala/org/apache/spark/sql/functions.scala | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 69fdf220f19fe..c349ae5cf46c4 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2847,8 +2847,7 @@ def from_json(col, schema, options={}): :param schema: a StructType or ArrayType of StructType to use when parsing the json column. :param options: options to control parsing. accepts the same options as the json datasource - .. note:: Since Spark 2.3, the DDL-formatted string or a JSON format string is also - supported for ``schema``. + .. note:: Since Spark 2.3, the DDL-formatted string is also supported for ``schema``. >>> from pyspark.sql.types import * >>> data = [(1, '''{"a": 1}''')] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index ffa97c20c397c..6bb9f7871edf2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -4077,9 +4077,7 @@ object functions { * Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. - * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1, - * the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL - * format is also supported for the schema. + * @param schema the schema as a DDL-formatted string. * * @group collection_funcs * @since 2.1.0 @@ -4094,8 +4092,7 @@ object functions { * Returns `null`, in the case of an unparseable string. * * @param e a string column containing JSON data. - * @param schema the schema to use when parsing the json string as a json string, it could be a - * JSON format string or a DDL-formatted string. + * @param schema the schema as a DDL-formatted string. * * @group collection_funcs * @since 2.3.0 From 3959f0d9879fa7fa9e8f2e8ed8c8b12003d21788 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 3 Nov 2020 10:00:49 +0900 Subject: [PATCH 0377/1009] [SPARK-33250][PYTHON][DOCS] Migration to NumPy documentation style in SQL (pyspark.sql.*) ### What changes were proposed in this pull request? This PR proposes to migrate to [NumPy documentation style](https://numpydoc.readthedocs.io/en/latest/format.html), see also SPARK-33243. While I am migrating, I also fixed some Python type hints accordingly. ### Why are the changes needed? For better documentation as text itself, and generated HTMLs ### Does this PR introduce _any_ user-facing change? Yes, they will see a better format of HTMLs, and better text format. See SPARK-33243. ### How was this patch tested? Manually tested via running `./dev/lint-python`. Closes #30181 from HyukjinKwon/SPARK-33250. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/pyspark/sql/avro/functions.py | 45 +- python/pyspark/sql/catalog.py | 43 +- python/pyspark/sql/column.py | 233 +++- python/pyspark/sql/context.py | 272 ++-- python/pyspark/sql/dataframe.py | 842 ++++++++--- python/pyspark/sql/dataframe.pyi | 14 +- python/pyspark/sql/functions.py | 1631 ++++++++++++++++------ python/pyspark/sql/functions.pyi | 2 +- python/pyspark/sql/group.py | 80 +- python/pyspark/sql/pandas/conversion.py | 35 +- python/pyspark/sql/pandas/functions.py | 73 +- python/pyspark/sql/pandas/group_ops.py | 107 +- python/pyspark/sql/pandas/map_ops.py | 22 +- python/pyspark/sql/pandas/serializers.py | 22 +- python/pyspark/sql/pandas/types.py | 68 +- python/pyspark/sql/readwriter.py | 1056 ++++++++------ python/pyspark/sql/readwriter.pyi | 34 +- python/pyspark/sql/session.py | 265 +++- python/pyspark/sql/streaming.py | 845 ++++++----- python/pyspark/sql/streaming.pyi | 17 +- python/pyspark/sql/types.py | 179 ++- python/pyspark/sql/udf.py | 70 +- python/pyspark/sql/utils.py | 12 +- python/pyspark/sql/window.py | 116 +- python/pyspark/sql/window.pyi | 9 +- 25 files changed, 4240 insertions(+), 1852 deletions(-) diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py index 75fe4eaa078a1..ce322814e34f8 100644 --- a/python/pyspark/sql/avro/functions.py +++ b/python/pyspark/sql/avro/functions.py @@ -20,12 +20,11 @@ """ -from pyspark import since, SparkContext +from pyspark import SparkContext from pyspark.sql.column import Column, _to_java_column from pyspark.util import _print_missing_jar -@since(3.0) def from_avro(data, jsonFormatSchema, options={}): """ Converts a binary column of Avro format into its corresponding catalyst value. @@ -34,13 +33,24 @@ def from_avro(data, jsonFormatSchema, options={}): To deserialize the data with a compatible and evolved schema, the expected Avro schema can be set via the option avroSchema. - Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the - application as per the deployment section of "Apache Avro Data Source Guide". + .. versionadded:: 3.0.0 + + Parameters + ---------- + data : :class:`Column` or str + the binary column. + jsonFormatSchema : str + the avro schema in JSON string format. + options : dict, optional + options to control how the Avro record is parsed. - :param data: the binary column. - :param jsonFormatSchema: the avro schema in JSON string format. - :param options: options to control how the Avro record is parsed. + Notes + ----- + Avro is built-in but external data source module since Spark 2.4. Please deploy the + application as per the deployment section of "Apache Avro Data Source Guide". + Examples + -------- >>> from pyspark.sql import Row >>> from pyspark.sql.avro.functions import from_avro, to_avro >>> data = [(1, Row(age=2, name='Alice'))] @@ -48,6 +58,7 @@ def from_avro(data, jsonFormatSchema, options={}): >>> avroDf = df.select(to_avro(df.value).alias("avro")) >>> avroDf.collect() [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))] + >>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields": ... [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord", ... "fields":[{"name":"age","type":["long","null"]}, @@ -67,23 +78,33 @@ def from_avro(data, jsonFormatSchema, options={}): return Column(jc) -@since(3.0) def to_avro(data, jsonFormatSchema=""): """ Converts a column into binary of avro format. - Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the - application as per the deployment section of "Apache Avro Data Source Guide". + .. versionadded:: 3.0.0 + + Parameters + ---------- + data : :class:`Column` or str + the data column. + jsonFormatSchema : str, optional + user-specified output avro schema in JSON string format. - :param data: the data column. - :param jsonFormatSchema: user-specified output avro schema in JSON string format. + Notes + ----- + Avro is built-in but external data source module since Spark 2.4. Please deploy the + application as per the deployment section of "Apache Avro Data Source Guide". + Examples + -------- >>> from pyspark.sql import Row >>> from pyspark.sql.avro.functions import to_avro >>> data = ['SPADES'] >>> df = spark.createDataFrame(data, "string") >>> df.select(to_avro(df.value).alias("suite")).collect() [Row(suite=bytearray(b'\\x00\\x0cSPADES'))] + >>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value", ... "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]''' >>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect() diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 44e321c557e3d..70d68a04a473c 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -106,13 +106,16 @@ def listFunctions(self, dbName=None): isTemporary=jfunction.isTemporary())) return functions - @since(2.0) def listColumns(self, tableName, dbName=None): """Returns a list of columns for the given table/view in the specified database. If no database is specified, the current database is used. - Note: the order of arguments here is different from that of its JVM counterpart + .. versionadded:: 2.0.0 + + Notes + ----- + the order of arguments here is different from that of its JVM counterpart because Python does not support method overloading. """ if dbName is None: @@ -130,7 +133,6 @@ def listColumns(self, tableName, dbName=None): isBucket=jcolumn.isBucket())) return columns - @since(2.0) def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): """Creates a table based on the dataset in a data source. @@ -143,14 +145,17 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, ** Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and created external table. - :return: :class:`DataFrame` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`DataFrame` """ warnings.warn( "createExternalTable is deprecated since Spark 2.2, please use createTable instead.", DeprecationWarning) return self.createTable(tableName, path, source, schema, **options) - @since(2.2) def createTable( self, tableName, path=None, source=None, schema=None, description=None, **options): """Creates a table based on the dataset in a data source. @@ -165,10 +170,14 @@ def createTable( Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and created table. + .. versionadded:: 2.2.0 + + Returns + ------- + :class:`DataFrame` + .. versionchanged:: 3.1 Added the ``description`` parameter. - - :return: :class:`DataFrame` """ if path is not None: options["path"] = path @@ -186,15 +195,20 @@ def createTable( tableName, source, scala_datatype, description, options) return DataFrame(df, self._sparkSession._wrapped) - @since(2.0) def dropTempView(self, viewName): """Drops the local temporary view with the given view name in the catalog. If the view has been cached before, then it will also be uncached. Returns true if this view is dropped successfully, false otherwise. - Note that, the return type of this method was None in Spark 2.0, but changed to Boolean + .. versionadded:: 2.0.0 + + Notes + ----- + The return type of this method was None in Spark 2.0, but changed to Boolean in Spark 2.1. + Examples + -------- >>> spark.createDataFrame([(1, 1)]).createTempView("my_table") >>> spark.table("my_table").collect() [Row(_1=1, _2=1)] @@ -206,12 +220,15 @@ def dropTempView(self, viewName): """ self._jcatalog.dropTempView(viewName) - @since(2.1) def dropGlobalTempView(self, viewName): """Drops the global temporary view with the given view name in the catalog. If the view has been cached before, then it will also be uncached. Returns true if this view is dropped successfully, false otherwise. + .. versionadded:: 2.1.0 + + Examples + -------- >>> spark.createDataFrame([(1, 1)]).createGlobalTempView("my_table") >>> spark.table("global_temp.my_table").collect() [Row(_1=1, _2=1)] @@ -223,12 +240,14 @@ def dropGlobalTempView(self, viewName): """ self._jcatalog.dropGlobalTempView(viewName) - @since(2.0) def registerFunction(self, name, f, returnType=None): """An alias for :func:`spark.udf.register`. See :meth:`pyspark.sql.UDFRegistration.register`. - .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead. + .. versionadded:: 2.0.0 + + .. deprecated:: 2.3.0 + Use :func:`spark.udf.register` instead. """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.register instead.", diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 3cf7a033641d8..3dd08d88e92c4 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -19,7 +19,7 @@ import json import warnings -from pyspark import copy_func, since +from pyspark import copy_func from pyspark.context import SparkContext from pyspark.sql.types import DataType, StructField, StructType, IntegerType, StringType @@ -141,7 +141,7 @@ class Column(object): df.colName + 1 1 / df.colName - .. versionadded:: 1.3 + .. versionadded:: 1.3.0 """ def __init__(self, jc): @@ -175,8 +175,15 @@ def __init__(self, jc): _eqNullSafe_doc = """ Equality test that is safe for null values. - :param other: a value or :class:`Column` + .. versionadded:: 2.3.0 + + Parameters + ---------- + other + a value or :class:`Column` + Examples + -------- >>> from pyspark.sql import Row >>> df1 = spark.createDataFrame([ ... Row(id=1, value='foo'), @@ -219,11 +226,11 @@ def __init__(self, jc): | true| false| false| +----------------+---------------+----------------+ - .. note:: Unlike Pandas, PySpark doesn't consider NaN values to be NULL. - See the `NaN Semantics`_ for details. - .. _NaN Semantics: - https://spark.apache.org/docs/latest/sql-programming-guide.html#nan-semantics - .. versionadded:: 2.3.0 + Notes + ----- + Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the + `NaN Semantics `_ + for details. """ eqNullSafe = _bin_op("eqNullSafe", _eqNullSafe_doc) @@ -244,9 +251,14 @@ def __contains__(self, item): _bitwiseOR_doc = """ Compute bitwise OR of this expression with another expression. - :param other: a value or :class:`Column` to calculate bitwise or(|) against - this :class:`Column`. + Parameters + ---------- + other + a value or :class:`Column` to calculate bitwise or(|) with + this :class:`Column`. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(a=170, b=75)]) >>> df.select(df.a.bitwiseOR(df.b)).collect() @@ -255,9 +267,14 @@ def __contains__(self, item): _bitwiseAND_doc = """ Compute bitwise AND of this expression with another expression. - :param other: a value or :class:`Column` to calculate bitwise and(&) against - this :class:`Column`. + Parameters + ---------- + other + a value or :class:`Column` to calculate bitwise and(&) with + this :class:`Column`. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(a=170, b=75)]) >>> df.select(df.a.bitwiseAND(df.b)).collect() @@ -266,9 +283,14 @@ def __contains__(self, item): _bitwiseXOR_doc = """ Compute bitwise XOR of this expression with another expression. - :param other: a value or :class:`Column` to calculate bitwise xor(^) against - this :class:`Column`. + Parameters + ---------- + other + a value or :class:`Column` to calculate bitwise xor(^) with + this :class:`Column`. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(a=170, b=75)]) >>> df.select(df.a.bitwiseXOR(df.b)).collect() @@ -279,12 +301,15 @@ def __contains__(self, item): bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc) bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc) - @since(1.3) def getItem(self, key): """ An expression that gets an item at position ``ordinal`` out of a list, or gets an item by key out of a dict. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"]) >>> df.select(df.l.getItem(0), df.d.getItem("key")).show() +----+------+ @@ -301,11 +326,14 @@ def getItem(self, key): DeprecationWarning) return self[key] - @since(1.3) def getField(self, name): """ An expression that gets a field by name in a StructField. + .. versionadded:: 1.3.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))]) >>> df.select(df.r.getField("b")).show() @@ -329,11 +357,14 @@ def getField(self, name): DeprecationWarning) return self[name] - @since(3.1) def withField(self, fieldName, col): """ An expression that adds/replaces a field in :class:`StructType` by name. + .. versionadded:: 3.1.0 + + Examples + -------- >>> from pyspark.sql import Row >>> from pyspark.sql.functions import lit >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))]) @@ -358,11 +389,14 @@ def withField(self, fieldName, col): return Column(self._jc.withField(fieldName, col._jc)) - @since(3.1) def dropFields(self, *fieldNames): """ An expression that drops fields in :class:`StructType` by name. + .. versionadded:: 3.1.0 + + Examples + -------- >>> from pyspark.sql import Row >>> from pyspark.sql.functions import col, lit >>> df = spark.createDataFrame([ @@ -429,8 +463,13 @@ def __iter__(self): _contains_doc = """ Contains the other element. Returns a boolean :class:`Column` based on a string match. - :param other: string in line + Parameters + ---------- + other + string in line. A value as a literal or a :class:`Column`. + Examples + -------- >>> df.filter(df.name.contains('o')).collect() [Row(age=5, name='Bob')] """ @@ -438,26 +477,43 @@ def __iter__(self): SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex match. - :param other: an extended regex expression + Parameters + ---------- + other : str + an extended regex expression + Examples + -------- >>> df.filter(df.name.rlike('ice$')).collect() [Row(age=2, name='Alice')] """ _like_doc = """ SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match. - :param other: a SQL LIKE pattern + Parameters + ---------- + other : str + a SQL LIKE pattern - See :func:`rlike` for a regex version + See Also + -------- + pyspark.sql.Column.rlike + Examples + -------- >>> df.filter(df.name.like('Al%')).collect() [Row(age=2, name='Alice')] """ _startswith_doc = """ String starts with. Returns a boolean :class:`Column` based on a string match. - :param other: string at start of line (do not use a regex `^`) + Parameters + ---------- + other : :class:`Column` or str + string at start of line (do not use a regex `^`) + Examples + -------- >>> df.filter(df.name.startswith('Al')).collect() [Row(age=2, name='Alice')] >>> df.filter(df.name.startswith('^Al')).collect() @@ -466,8 +522,13 @@ def __iter__(self): _endswith_doc = """ String ends with. Returns a boolean :class:`Column` based on a string match. - :param other: string at end of line (do not use a regex `$`) + Parameters + ---------- + other : :class:`Column` or str + string at end of line (do not use a regex `$`) + Examples + -------- >>> df.filter(df.name.endswith('ice')).collect() [Row(age=2, name='Alice')] >>> df.filter(df.name.endswith('ice$')).collect() @@ -480,14 +541,21 @@ def __iter__(self): startswith = _bin_op("startsWith", _startswith_doc) endswith = _bin_op("endsWith", _endswith_doc) - @since(1.3) def substr(self, startPos, length): """ Return a :class:`Column` which is a substring of the column. - :param startPos: start position (int or Column) - :param length: length of the substring (int or Column) + .. versionadded:: 1.3.0 + + Parameters + ---------- + startPos : :class:`Column` or int + start position + length : :class:`Column` or int + length of the substring + Examples + -------- >>> df.select(df.name.substr(1, 3).alias("col")).collect() [Row(col='Ali'), Row(col='Bob')] """ @@ -507,12 +575,15 @@ def substr(self, startPos, length): raise TypeError("Unexpected type: %s" % type(startPos)) return Column(jc) - @since(1.5) def isin(self, *cols): """ A boolean expression that is evaluated to true if the value of this expression is contained by the evaluated values of the arguments. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df[df.name.isin("Bob", "Mike")].collect() [Row(age=5, name='Bob')] >>> df[df.age.isin([1, 2, 3])].collect() @@ -529,6 +600,8 @@ def isin(self, *cols): _asc_doc = """ Returns a sort expression based on ascending order of the column. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.asc()).collect() @@ -538,27 +611,37 @@ def isin(self, *cols): Returns a sort expression based on ascending order of the column, and null values return before non-null values. + .. versionadded:: 2.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect() [Row(name=None), Row(name='Alice'), Row(name='Tom')] - .. versionadded:: 2.4 """ _asc_nulls_last_doc = """ Returns a sort expression based on ascending order of the column, and null values appear after non-null values. + .. versionadded:: 2.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect() [Row(name='Alice'), Row(name='Tom'), Row(name=None)] - .. versionadded:: 2.4 """ _desc_doc = """ Returns a sort expression based on the descending order of the column. + .. versionadded:: 2.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.desc()).collect() @@ -568,23 +651,28 @@ def isin(self, *cols): Returns a sort expression based on the descending order of the column, and null values appear before non-null values. + .. versionadded:: 2.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect() [Row(name=None), Row(name='Tom'), Row(name='Alice')] - .. versionadded:: 2.4 """ _desc_nulls_last_doc = """ Returns a sort expression based on the descending order of the column, and null values appear after non-null values. + .. versionadded:: 2.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"]) >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect() [Row(name='Tom'), Row(name='Alice'), Row(name=None)] - - .. versionadded:: 2.4 """ asc = _unary_op("asc", _asc_doc) @@ -597,6 +685,8 @@ def isin(self, *cols): _isNull_doc = """ True if the current expression is null. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)]) >>> df.filter(df.height.isNull()).collect() @@ -605,6 +695,8 @@ def isin(self, *cols): _isNotNull_doc = """ True if the current expression is NOT null. + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)]) >>> df.filter(df.height.isNotNull()).collect() @@ -614,20 +706,30 @@ def isin(self, *cols): isNull = _unary_op("isNull", _isNull_doc) isNotNull = _unary_op("isNotNull", _isNotNull_doc) - @since(1.3) def alias(self, *alias, **kwargs): """ Returns this column aliased with a new name or names (in the case of expressions that return more than one column, such as explode). - :param alias: strings of desired column names (collects all positional arguments passed) - :param metadata: a dict of information to be stored in ``metadata`` attribute of the + .. versionadded:: 1.3.0 + + Parameters + ---------- + alias : str + desired column names (collects all positional arguments passed) + + Other Parameters + ---------------- + metadata: dict + a dict of information to be stored in ``metadata`` attribute of the corresponding :class:`StructField ` (optional, keyword only argument) - .. versionchanged:: 2.2 - Added optional ``metadata`` argument. + .. versionchanged:: 2.2.0 + Added optional ``metadata`` argument. + Examples + -------- >>> df.select(df.age.alias("age2")).collect() [Row(age2=2), Row(age2=5)] >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max'] @@ -652,10 +754,13 @@ def alias(self, *alias, **kwargs): name = copy_func(alias, sinceversion=2.0, doc=":func:`name` is an alias for :func:`alias`.") - @since(1.3) def cast(self, dataType): """ Convert the column into type ``dataType``. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.select(df.age.cast("string").alias('ages')).collect() [Row(ages='2'), Row(ages='5')] >>> df.select(df.age.cast(StringType()).alias('ages')).collect() @@ -674,12 +779,15 @@ def cast(self, dataType): astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.") - @since(1.3) def between(self, lowerBound, upperBound): """ A boolean expression that is evaluated to true if the value of this expression is between the given columns. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.select(df.name, df.age.between(2, 4)).show() +-----+---------------------------+ | name|((age >= 2) AND (age <= 4))| @@ -690,17 +798,22 @@ def between(self, lowerBound, upperBound): """ return (self >= lowerBound) & (self <= upperBound) - @since(1.4) def when(self, condition, value): """ Evaluates a list of conditions and returns one of multiple possible result expressions. If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. - See :func:`pyspark.sql.functions.when` for example usage. + .. versionadded:: 1.4.0 - :param condition: a boolean :class:`Column` expression. - :param value: a literal value, or a :class:`Column` expression. + Parameters + ---------- + condition : :class:`Column` + a boolean :class:`Column` expression. + value + a literal value, or a :class:`Column` expression. + Examples + -------- >>> from pyspark.sql import functions as F >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show() +-----+------------------------------------------------------------+ @@ -709,6 +822,10 @@ def when(self, condition, value): |Alice| -1| | Bob| 1| +-----+------------------------------------------------------------+ + + See Also + -------- + pyspark.sql.functions.when """ if not isinstance(condition, Column): raise TypeError("condition should be a Column") @@ -716,16 +833,20 @@ def when(self, condition, value): jc = self._jc.when(condition._jc, v) return Column(jc) - @since(1.4) def otherwise(self, value): """ Evaluates a list of conditions and returns one of multiple possible result expressions. If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. - See :func:`pyspark.sql.functions.when` for example usage. + .. versionadded:: 1.4.0 - :param value: a literal value, or a :class:`Column` expression. + Parameters + ---------- + value + a literal value, or a :class:`Column` expression. + Examples + -------- >>> from pyspark.sql import functions as F >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show() +-----+-------------------------------------+ @@ -734,19 +855,31 @@ def otherwise(self, value): |Alice| 0| | Bob| 1| +-----+-------------------------------------+ + + See Also + -------- + pyspark.sql.functions.when """ v = value._jc if isinstance(value, Column) else value jc = self._jc.otherwise(v) return Column(jc) - @since(1.4) def over(self, window): """ Define a windowing column. - :param window: a :class:`WindowSpec` - :return: a Column + .. versionadded:: 1.4.0 + + Parameters + ---------- + window : :class:`WindowSpec` + + Returns + ------- + :class:`Column` + Examples + -------- >>> from pyspark.sql import Window >>> window = Window.partitionBy("name").orderBy("age") \ .rowsBetween(Window.unboundedPreceding, Window.currentRow) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 937d44ac5ecbc..5071240a511a6 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -38,34 +38,41 @@ class SQLContext(object): A SQLContext can be used create :class:`DataFrame`, register :class:`DataFrame` as tables, execute SQL over tables, cache tables, and read parquet files. - :param sparkContext: The :class:`SparkContext` backing this SQLContext. - :param sparkSession: The :class:`SparkSession` around which this SQLContext wraps. - :param jsqlContext: An optional JVM Scala SQLContext. If set, we do not instantiate a new + .. deprecated:: 3.0.0 + Use :func:`SparkSession.builder.getOrCreate()` instead. + + Parameters + ---------- + sparkContext : :class:`SparkContext` + The :class:`SparkContext` backing this SQLContext. + sparkSession : :class:`SparkSession` + The :class:`SparkSession` around which this SQLContext wraps. + jsqlContext : optional + An optional JVM Scala SQLContext. If set, we do not instantiate a new SQLContext in the JVM, instead we make all calls to this object. + This is only for internal. + + Examples + -------- + >>> from datetime import datetime + >>> from pyspark.sql import Row + >>> sqlContext = SQLContext(sc) + >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, + ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), + ... time=datetime(2014, 8, 1, 14, 1, 5))]) + >>> df = allTypes.toDF() + >>> df.createOrReplaceTempView("allTypes") + >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' + ... 'from allTypes where b and i > 0').collect() + [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ + dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] + >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() + [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ _instantiatedContext = None def __init__(self, sparkContext, sparkSession=None, jsqlContext=None): - """Creates a new SQLContext. - - .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead. - - >>> from datetime import datetime - >>> from pyspark.sql import Row - >>> sqlContext = SQLContext(sc) - >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, - ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), - ... time=datetime(2014, 8, 1, 14, 1, 5))]) - >>> df = allTypes.toDF() - >>> df.createOrReplaceTempView("allTypes") - >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' - ... 'from allTypes where b and i > 0').collect() - [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ - dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] - >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() - [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] - """ if sparkSession is None: warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", @@ -101,14 +108,18 @@ def _conf(self): return self.sparkSession._jsparkSession.sessionState().conf() @classmethod - @since(1.6) def getOrCreate(cls, sc): """ Get the existing SQLContext or create a new one with given SparkContext. - :param sc: SparkContext + .. versionadded:: 1.6.0 - .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead. + .. deprecated:: 3.0.0 + Use :func:`SparkSession.builder.getOrCreate()` instead. + + Parameters + ---------- + sc : :class:`SparkContext` """ warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", @@ -122,22 +133,23 @@ def getOrCreate(cls, sc): cls(sc, sparkSession, jsqlContext) return cls._instantiatedContext - @since(1.6) def newSession(self): """ Returns a new SQLContext as new session, that has separate SQLConf, registered temporary views and UDFs, but shared SparkContext and table cache. + + .. versionadded:: 1.6.0 """ return self.__class__(self._sc, self.sparkSession.newSession()) - @since(1.3) def setConf(self, key, value): """Sets the given Spark SQL configuration property. + + .. versionadded:: 1.3.0 """ self.sparkSession.conf.set(key, value) - @since(1.3) def getConf(self, key, defaultValue=_NoValue): """Returns the value of Spark SQL configuration property for the given key. @@ -145,6 +157,10 @@ def getConf(self, key, defaultValue=_NoValue): defaultValue. If the key is not set and defaultValue is not set, return the system default value. + .. versionadded:: 1.3.0 + + Examples + -------- >>> sqlContext.getConf("spark.sql.shuffle.partitions") '200' >>> sqlContext.getConf("spark.sql.shuffle.partitions", "10") @@ -156,27 +172,42 @@ def getConf(self, key, defaultValue=_NoValue): return self.sparkSession.conf.get(key, defaultValue) @property - @since("1.3.1") def udf(self): """Returns a :class:`UDFRegistration` for UDF registration. - :return: :class:`UDFRegistration` + .. versionadded:: 1.3.1 + + Returns + ------- + :class:`UDFRegistration` """ return self.sparkSession.udf - @since(1.4) def range(self, start, end=None, step=1, numPartitions=None): """ Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with step value ``step``. - :param start: the start value - :param end: the end value (exclusive) - :param step: the incremental step (default: 1) - :param numPartitions: the number of partitions of the DataFrame - :return: :class:`DataFrame` - + .. versionadded:: 1.4.0 + + Parameters + ---------- + start : int + the start value + end : int, optional + the end value (exclusive) + step : int, optional + the incremental step (default: 1) + numPartitions : int, optional + the number of partitions of the DataFrame + + Returns + ------- + :class:`DataFrame` + + Examples + -------- >>> sqlContext.range(1, 7, 2).collect() [Row(id=1), Row(id=3), Row(id=5)] @@ -187,24 +218,28 @@ def range(self, start, end=None, step=1, numPartitions=None): """ return self.sparkSession.range(start, end, step, numPartitions) - @since(1.2) def registerFunction(self, name, f, returnType=None): """An alias for :func:`spark.udf.register`. See :meth:`pyspark.sql.UDFRegistration.register`. - .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead. + .. versionadded:: 1.2.0 + + .. deprecated:: 2.3.0 + Use :func:`spark.udf.register` instead. """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.register instead.", DeprecationWarning) return self.sparkSession.udf.register(name, f, returnType) - @since(2.1) def registerJavaFunction(self, name, javaClassName, returnType=None): """An alias for :func:`spark.udf.registerJavaFunction`. See :meth:`pyspark.sql.UDFRegistration.registerJavaFunction`. - .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead. + .. versionadded:: 2.1.0 + + .. deprecated:: 2.3.0 + Use :func:`spark.udf.registerJavaFunction` instead. """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction instead.", @@ -216,13 +251,19 @@ def _inferSchema(self, rdd, samplingRatio=None): """ Infer schema from an RDD of Row or tuple. - :param rdd: an RDD of Row or tuple - :param samplingRatio: sampling ratio, or no sampling (default) - :return: :class:`pyspark.sql.types.StructType` + Parameters + ---------- + rdd : :class:`RDD` + an RDD of Row or tuple + samplingRatio : float, optional + sampling ratio, or no sampling (default) + + Returns + ------- + :class:`pyspark.sql.types.StructType` """ return self.sparkSession._inferSchema(rdd, samplingRatio) - @since(1.3) def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True): """ Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`. @@ -243,28 +284,41 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr If schema inference is needed, ``samplingRatio`` is used to determined the ratio of rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``. - :param data: an RDD of any kind of SQL data representation(e.g. :class:`Row`, + .. versionadded:: 1.3.0 + + .. versionchanged:: 2.0.0 + The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a + datatype string after 2.0. + If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a + :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple. + + .. versionchanged:: 2.1.0 + Added verifySchema. + + Parameters + ---------- + data : :class:`RDD` or iterable + an RDD of any kind of SQL data representation(e.g. :class:`Row`, :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or :class:`pandas.DataFrame`. - :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of + schema : :class:`pyspark.sql.types.DataType`, str or list, optional + a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default is None. The data type string format equals to :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`. - :param samplingRatio: the sample ratio of rows used for inferring - :param verifySchema: verify data types of every row against schema. - :return: :class:`DataFrame` + samplingRatio : float, optional + the sample ratio of rows used for inferring + verifySchema : bool, optional + verify data types of every row against schema. Enabled by default. - .. versionchanged:: 2.0 - The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a - datatype string after 2.0. - If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a - :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple. - - .. versionchanged:: 2.1 - Added verifySchema. + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> l = [('Alice', 1)] >>> sqlContext.createDataFrame(l).collect() [Row(_1='Alice', _2=1)] @@ -314,26 +368,31 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr """ return self.sparkSession.createDataFrame(data, schema, samplingRatio, verifySchema) - @since(1.3) def registerDataFrameAsTable(self, df, tableName): """Registers the given :class:`DataFrame` as a temporary table in the catalog. Temporary tables exist only during the lifetime of this instance of :class:`SQLContext`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> sqlContext.registerDataFrameAsTable(df, "table1") """ df.createOrReplaceTempView(tableName) - @since(1.6) def dropTempTable(self, tableName): """ Remove the temporary table from catalog. + .. versionadded:: 1.6.0 + + Examples + -------- >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> sqlContext.dropTempTable("table1") """ self.sparkSession.catalog.dropTempView(tableName) - @since(1.3) def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): """Creates an external table based on the dataset in a data source. @@ -346,17 +405,26 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, ** Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and created external table. - :return: :class:`DataFrame` + .. versionadded:: 1.3.0 + + Returns + ------- + :class:`DataFrame` """ return self.sparkSession.catalog.createExternalTable( tableName, path, source, schema, **options) - @since(1.0) def sql(self, sqlQuery): """Returns a :class:`DataFrame` representing the result of the given query. - :return: :class:`DataFrame` + .. versionadded:: 1.0.0 + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> df2 = sqlContext.sql("SELECT field1 AS f1, field2 as f2 from table1") >>> df2.collect() @@ -364,12 +432,17 @@ def sql(self, sqlQuery): """ return self.sparkSession.sql(sqlQuery) - @since(1.0) def table(self, tableName): """Returns the specified table or view as a :class:`DataFrame`. - :return: :class:`DataFrame` + .. versionadded:: 1.0.0 + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> df2 = sqlContext.table("table1") >>> sorted(df.collect()) == sorted(df2.collect()) @@ -377,7 +450,6 @@ def table(self, tableName): """ return self.sparkSession.table(tableName) - @since(1.3) def tables(self, dbName=None): """Returns a :class:`DataFrame` containing names of tables in the given database. @@ -386,9 +458,19 @@ def tables(self, dbName=None): The returned DataFrame has two columns: ``tableName`` and ``isTemporary`` (a column with :class:`BooleanType` indicating if a table is a temporary one or not). - :param dbName: string, name of the database to use. - :return: :class:`DataFrame` + .. versionadded:: 1.3.0 + Parameters + ---------- + dbName: str, optional + name of the database to use. + + Returns + ------- + :class:`DataFrame` + + Examples + -------- >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> df2 = sqlContext.tables() >>> df2.filter("tableName = 'table1'").first() @@ -399,12 +481,20 @@ def tables(self, dbName=None): else: return DataFrame(self._ssql_ctx.tables(dbName), self) - @since(1.3) def tableNames(self, dbName=None): """Returns a list of names of tables in the database ``dbName``. - :param dbName: string, name of the database to use. Default to the current database. - :return: list of table names, in string + .. versionadded:: 1.3.0 + + Parameters + ---------- + dbName: str + name of the database to use. Default to the current database. + + Returns + ------- + list + list of table names, in string >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> "table1" in sqlContext.tableNames() @@ -433,26 +523,34 @@ def clearCache(self): self._ssql_ctx.clearCache() @property - @since(1.4) def read(self): """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. - :return: :class:`DataFrameReader` + .. versionadded:: 1.4.0 + + Returns + ------- + :class:`DataFrameReader` """ return DataFrameReader(self) @property - @since(2.0) def readStream(self): """ Returns a :class:`DataStreamReader` that can be used to read data streams as a streaming :class:`DataFrame`. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :return: :class:`DataStreamReader` + Notes + ----- + This API is evolving. + + Returns + ------- + :class:`DataStreamReader` >>> text_sdf = sqlContext.readStream.text(tempfile.mkdtemp()) >>> text_sdf.isStreaming @@ -461,12 +559,15 @@ def readStream(self): return DataStreamReader(self) @property - @since(2.0) def streams(self): """Returns a :class:`StreamingQueryManager` that allows managing all the :class:`StreamingQuery` StreamingQueries active on `this` context. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. """ from pyspark.sql.streaming import StreamingQueryManager return StreamingQueryManager(self._ssql_ctx.streams()) @@ -478,11 +579,18 @@ class HiveContext(SQLContext): Configuration for Hive is read from ``hive-site.xml`` on the classpath. It supports running both SQL and HiveQL commands. - :param sparkContext: The SparkContext to wrap. - :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new + .. deprecated:: 2.0.0 + Use SparkSession.builder.enableHiveSupport().getOrCreate(). + + Parameters + ---------- + sparkContext : :class:`SparkContext` + The SparkContext to wrap. + jhiveContext : optional + An optional JVM Scala HiveContext. If set, we do not instantiate a new :class:`HiveContext` in the JVM, instead we make all calls to this object. + This is only for internal use. - .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate(). """ def __init__(self, sparkContext, jhiveContext=None): diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 487135cd2329a..9fae27a2d9c6c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -62,7 +62,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): people.filter(people.age > 30).join(department, people.deptId == department.id) \\ .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"}) - .. versionadded:: 1.3 + .. versionadded:: 1.3.0 """ def __init__(self, jdf, sql_ctx): @@ -100,38 +100,44 @@ def stat(self): """ return DataFrameStatFunctions(self) - @since(1.3) def toJSON(self, use_unicode=True): """Converts a :class:`DataFrame` into a :class:`RDD` of string. Each row is turned into a JSON document as one element in the returned RDD. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.toJSON().first() '{"age":2,"name":"Alice"}' """ rdd = self._jdf.toJSON() return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode)) - @since(1.3) def registerTempTable(self, name): """Registers this DataFrame as a temporary table using the given name. The lifetime of this temporary table is tied to the :class:`SparkSession` that was used to create this :class:`DataFrame`. + .. versionadded:: 1.3.0 + + .. deprecated:: 2.0.0 + Use :meth:`DataFrame.createOrReplaceTempView` instead. + + Examples + -------- >>> df.registerTempTable("people") >>> df2 = spark.sql("select * from people") >>> sorted(df.collect()) == sorted(df2.collect()) True >>> spark.catalog.dropTempView("people") - - .. note:: Deprecated in 2.0, use createOrReplaceTempView instead. """ warnings.warn( "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning) self._jdf.createOrReplaceTempView(name) - @since(2.0) def createTempView(self, name): """Creates a local temporary view with this :class:`DataFrame`. @@ -140,6 +146,10 @@ def createTempView(self, name): throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the catalog. + .. versionadded:: 2.0.0 + + Examples + -------- >>> df.createTempView("people") >>> df2 = spark.sql("select * from people") >>> sorted(df.collect()) == sorted(df2.collect()) @@ -153,13 +163,16 @@ def createTempView(self, name): """ self._jdf.createTempView(name) - @since(2.0) def createOrReplaceTempView(self, name): """Creates or replaces a local temporary view with this :class:`DataFrame`. The lifetime of this temporary table is tied to the :class:`SparkSession` that was used to create this :class:`DataFrame`. + .. versionadded:: 2.0.0 + + Examples + -------- >>> df.createOrReplaceTempView("people") >>> df2 = df.filter(df.age > 3) >>> df2.createOrReplaceTempView("people") @@ -171,7 +184,6 @@ def createOrReplaceTempView(self, name): """ self._jdf.createOrReplaceTempView(name) - @since(2.1) def createGlobalTempView(self, name): """Creates a global temporary view with this :class:`DataFrame`. @@ -179,6 +191,10 @@ def createGlobalTempView(self, name): throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the catalog. + .. versionadded:: 2.1.0 + + Examples + -------- >>> df.createGlobalTempView("people") >>> df2 = spark.sql("select * from global_temp.people") >>> sorted(df.collect()) == sorted(df2.collect()) @@ -192,12 +208,15 @@ def createGlobalTempView(self, name): """ self._jdf.createGlobalTempView(name) - @since(2.2) def createOrReplaceGlobalTempView(self, name): """Creates or replaces a global temporary view using the given name. The lifetime of this temporary view is tied to this Spark application. + .. versionadded:: 2.2.0 + + Examples + -------- >>> df.createOrReplaceGlobalTempView("people") >>> df2 = df.filter(df.age > 3) >>> df2.createOrReplaceGlobalTempView("people") @@ -210,34 +229,45 @@ def createOrReplaceGlobalTempView(self, name): self._jdf.createOrReplaceGlobalTempView(name) @property - @since(1.4) def write(self): """ Interface for saving the content of the non-streaming :class:`DataFrame` out into external storage. - :return: :class:`DataFrameWriter` + .. versionadded:: 1.4.0 + + Returns + ------- + :class:`DataFrameWriter` """ return DataFrameWriter(self) @property - @since(2.0) def writeStream(self): """ Interface for saving the content of the streaming :class:`DataFrame` out into external storage. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :return: :class:`DataStreamWriter` + Notes + ----- + This API is evolving. + + Returns + ------- + :class:`DataStreamWriter` """ return DataStreamWriter(self) @property - @since(1.3) def schema(self): """Returns the schema of this :class:`DataFrame` as a :class:`pyspark.sql.types.StructType`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.schema StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true))) """ @@ -249,10 +279,13 @@ def schema(self): "Unable to parse datatype from schema. %s" % e) return self._schema - @since(1.3) def printSchema(self): """Prints out the schema in the tree format. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.printSchema() root |-- age: integer (nullable = true) @@ -261,14 +294,19 @@ def printSchema(self): """ print(self._jdf.schema().treeString()) - @since(1.3) def explain(self, extended=None, mode=None): """Prints the (logical and physical) plans to the console for debugging purpose. - :param extended: boolean, default ``False``. If ``False``, prints only the physical plan. + .. versionadded:: 1.3.0 + + parameters + ---------- + extended : bool, optional + default ``False``. If ``False``, prints only the physical plan. When this is a string without specifying the ``mode``, it works as the mode is specified. - :param mode: specifies the expected output format of plans. + mode : str, optional + specifies the expected output format of plans. * ``simple``: Print only a physical plan. * ``extended``: Print both logical and physical plans. @@ -277,6 +315,11 @@ def explain(self, extended=None, mode=None): * ``formatted``: Split explain output into two sections: a physical plan outline \ and node details. + .. versionchanged:: 3.0.0 + Added optional argument `mode` to specify the expected output format of plans. + + Examples + -------- >>> df.explain() == Physical Plan == *(1) Scan ExistingRDD[age#0,name#1] @@ -302,9 +345,6 @@ def explain(self, extended=None, mode=None): == Optimized Logical Plan == ...Statistics... ... - - .. versionchanged:: 3.0.0 - Added optional argument `mode` to specify the expected output format of plans. """ if extended is not None and mode is not None: @@ -345,13 +385,17 @@ def explain(self, extended=None, mode=None): print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode)) - @since(2.4) def exceptAll(self, other): """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame` but not in another :class:`DataFrame` while preserving duplicates. This is equivalent to `EXCEPT ALL` in SQL. + As standard in SQL, this function resolves columns by position (not by name). + + .. versionadded:: 2.4.0 + Examples + -------- >>> df1 = spark.createDataFrame( ... [("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b", 3), ("c", 4)], ["C1", "C2"]) >>> df2 = spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"]) @@ -366,7 +410,6 @@ def exceptAll(self, other): | c| 4| +---+---+ - Also as standard in SQL, this function resolves columns by position (not by name). """ return DataFrame(self._jdf.exceptAll(other._jdf), self.sql_ctx) @@ -378,7 +421,6 @@ def isLocal(self): return self._jdf.isLocal() @property - @since(2.0) def isStreaming(self): """Returns ``True`` if this :class:`Dataset` contains one or more sources that continuously return data as it arrives. A :class:`Dataset` that reads data from a streaming source @@ -387,21 +429,33 @@ def isStreaming(self): :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming source present. - .. note:: Evolving + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. """ return self._jdf.isStreaming() - @since(1.3) def show(self, n=20, truncate=True, vertical=False): """Prints the first ``n`` rows to the console. - :param n: Number of rows to show. - :param truncate: If set to ``True``, truncate strings longer than 20 chars by default. + .. versionadded:: 1.3.0 + + Parameters + ---------- + n : int, optional + Number of rows to show. + truncate : bool, optional + If set to ``True``, truncate strings longer than 20 chars by default. If set to a number greater than one, truncates long strings to length ``truncate`` and align cells right. - :param vertical: If set to ``True``, print output rows vertically (one line + vertical : bool, optional + If set to ``True``, print output rows vertically (one line per column value). + Examples + -------- >>> df DataFrame[age: int, name: string] >>> df.show() @@ -472,35 +526,46 @@ def _repr_html_(self): else: return None - @since(2.1) def checkpoint(self, eager=True): """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms where the plan may grow exponentially. It will be saved to files inside the checkpoint directory set with :meth:`SparkContext.setCheckpointDir`. - :param eager: Whether to checkpoint this :class:`DataFrame` immediately + .. versionadded:: 2.1.0 - .. note:: Experimental + Parameters + ---------- + eager : bool, optional + Whether to checkpoint this :class:`DataFrame` immediately + + Notes + ----- + This API is experimental. """ jdf = self._jdf.checkpoint(eager) return DataFrame(jdf, self.sql_ctx) - @since(2.3) def localCheckpoint(self, eager=True): """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to truncate the logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms where the plan may grow exponentially. Local checkpoints are stored in the executors using the caching subsystem and therefore they are not reliable. - :param eager: Whether to checkpoint this :class:`DataFrame` immediately + .. versionadded:: 2.3.0 + + Parameters + ---------- + eager : bool, optional + Whether to checkpoint this :class:`DataFrame` immediately - .. note:: Experimental + Notes + ----- + This API is experimental. """ jdf = self._jdf.localCheckpoint(eager) return DataFrame(jdf, self.sql_ctx) - @since(2.1) def withWatermark(self, eventTime, delayThreshold): """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point in time before which we assume no more late data is going to arrive. @@ -517,12 +582,20 @@ def withWatermark(self, eventTime, delayThreshold): to be at least `delayThreshold` behind the actual event time. In some cases we may still process records that arrive more than `delayThreshold` late. - :param eventTime: the name of the column that contains the event time of the row. - :param delayThreshold: the minimum delay to wait to data to arrive late, relative to the + .. versionadded:: 2.1.0 + + Parameters + ---------- + eventTime : str or :class:`Column` + the name of the column that contains the event time of the row. + delayThreshold : str + the minimum delay to wait to data to arrive late, relative to the latest record that has been processed in the form of an interval (e.g. "1 minute" or "5 hours"). - .. note:: Evolving + Notes + ----- + This API is evolving. >>> from pyspark.sql.functions import timestamp_seconds >>> sdf.select( @@ -537,14 +610,24 @@ def withWatermark(self, eventTime, delayThreshold): jdf = self._jdf.withWatermark(eventTime, delayThreshold) return DataFrame(jdf, self.sql_ctx) - @since(2.2) def hint(self, name, *parameters): """Specifies some hint on the current :class:`DataFrame`. - :param name: A name of the hint. - :param parameters: Optional parameters. - :return: :class:`DataFrame` + .. versionadded:: 2.2.0 + + Parameters + ---------- + name : str + A name of the hint. + parameters : str, list, float or int + Optional parameters. + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> df.join(df2.hint("broadcast"), "name").show() +----+---+------+ |name|age|height| @@ -568,19 +651,25 @@ def hint(self, name, *parameters): jdf = self._jdf.hint(name, self._jseq(parameters)) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def count(self): """Returns the number of rows in this :class:`DataFrame`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.count() 2 """ return int(self._jdf.count()) - @since(1.3) def collect(self): """Returns all the records as a list of :class:`Row`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.collect() [Row(age=2, name='Alice'), Row(age=5, name='Bob')] """ @@ -588,7 +677,6 @@ def collect(self): sock_info = self._jdf.collectToPython() return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer()))) - @since(2.0) def toLocalIterator(self, prefetchPartitions=False): """ Returns an iterator that contains all of the rows in this :class:`DataFrame`. @@ -596,9 +684,15 @@ def toLocalIterator(self, prefetchPartitions=False): :class:`DataFrame`. With prefetch it may consume up to the memory of the 2 largest partitions. - :param prefetchPartitions: If Spark should pre-fetch the next partition - before it is needed. + .. versionadded:: 2.0.0 + Parameters + ---------- + prefetchPartitions : bool, optional + If Spark should pre-fetch the next partition before it is needed. + + Examples + -------- >>> list(df.toLocalIterator()) [Row(age=2, name='Alice'), Row(age=5, name='Bob')] """ @@ -606,10 +700,13 @@ def toLocalIterator(self, prefetchPartitions=False): sock_info = self._jdf.toPythonIterator(prefetchPartitions) return _local_iterator_from_socket(sock_info, BatchedSerializer(PickleSerializer())) - @since(1.3) def limit(self, num): """Limits the result count to the number specified. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.limit(1).collect() [Row(age=2, name='Alice')] >>> df.limit(0).collect() @@ -618,16 +715,18 @@ def limit(self, num): jdf = self._jdf.limit(num) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def take(self, num): """Returns the first ``num`` rows as a :class:`list` of :class:`Row`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.take(2) [Row(age=2, name='Alice'), Row(age=5, name='Bob')] """ return self.limit(num).collect() - @since(3.0) def tail(self, num): """ Returns the last ``num`` rows as a :class:`list` of :class:`Row`. @@ -635,6 +734,10 @@ def tail(self, num): Running tail requires moving data into the application's driver process, and doing so with a very large ``num`` can crash the driver process with OutOfMemoryError. + .. versionadded:: 3.0.0 + + Examples + -------- >>> df.tail(1) [Row(age=5, name='Bob')] """ @@ -642,24 +745,30 @@ def tail(self, num): sock_info = self._jdf.tailToPython(num) return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer()))) - @since(1.3) def foreach(self, f): """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`. This is a shorthand for ``df.rdd.foreach()``. + .. versionadded:: 1.3.0 + + Examples + -------- >>> def f(person): ... print(person.name) >>> df.foreach(f) """ self.rdd.foreach(f) - @since(1.3) def foreachPartition(self, f): """Applies the ``f`` function to each partition of this :class:`DataFrame`. This a shorthand for ``df.rdd.foreachPartition()``. + .. versionadded:: 1.3.0 + + Examples + -------- >>> def f(people): ... for person in people: ... print(person.name) @@ -667,25 +776,30 @@ def foreachPartition(self, f): """ self.rdd.foreachPartition(f) - @since(1.3) def cache(self): """Persists the :class:`DataFrame` with the default storage level (`MEMORY_AND_DISK`). - .. note:: The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0. + .. versionadded:: 1.3.0 + + Notes + ----- + The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0. """ self.is_cached = True self._jdf.cache() return self - @since(1.3) def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER): """Sets the storage level to persist the contents of the :class:`DataFrame` across operations after the first time it is computed. This can only be used to assign a new storage level if the :class:`DataFrame` does not have a storage level set yet. If no storage level is specified defaults to (`MEMORY_AND_DISK_DESER`) - .. note:: The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala - in 3.0. + .. versionadded:: 1.3.0 + + Notes + ----- + The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala in 3.0. """ self.is_cached = True javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel) @@ -693,10 +807,13 @@ def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER): return self @property - @since(2.1) def storageLevel(self): """Get the :class:`DataFrame`'s current storage level. + .. versionadded:: 2.1.0 + + Examples + -------- >>> df.storageLevel StorageLevel(False, False, False, False, 1) >>> df.cache().storageLevel @@ -712,24 +829,24 @@ def storageLevel(self): java_storage_level.replication()) return storage_level - @since(1.3) def unpersist(self, blocking=False): """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from memory and disk. - .. note:: `blocking` default has changed to ``False`` to match Scala in 2.0. + .. versionadded:: 1.3.0 + + Notes + ----- + `blocking` default has changed to ``False`` to match Scala in 2.0. """ self.is_cached = False self._jdf.unpersist(blocking) return self - @since(1.4) def coalesce(self, numPartitions): """ Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions. - :param numPartitions: int, to specify the target number of partitions - Similar to coalesce defined on an :class:`RDD`, this operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of the 100 new partitions will @@ -743,26 +860,42 @@ def coalesce(self, numPartitions): current upstream partitions will be executed in parallel (per whatever the current partitioning is). + .. versionadded:: 1.4.0 + + Parameters + ---------- + numPartitions : int + specify the target number of partitions + + Examples + -------- >>> df.coalesce(1).rdd.getNumPartitions() 1 """ return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx) - @since(1.3) def repartition(self, numPartitions, *cols): """ Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The resulting :class:`DataFrame` is hash partitioned. - :param numPartitions: + .. versionadded:: 1.3.0 + + Parameters + ---------- + numPartitions : int can be an int to specify the target number of partitions or a Column. If it is a Column, it will be used as the first partitioning column. If not specified, the default number of partitions is used. + cols : str or :class:`Column` + partitioning columns. - .. versionchanged:: 1.6 - Added optional arguments to specify the partitioning columns. Also made numPartitions - optional if partitioning columns are specified. + .. versionchanged:: 1.6 + Added optional arguments to specify the partitioning columns. Also made numPartitions + optional if partitioning columns are specified. + Examples + -------- >>> df.repartition(10).rdd.getNumPartitions() 10 >>> data = df.union(df).repartition("age") @@ -810,25 +943,34 @@ def repartition(self, numPartitions, *cols): else: raise TypeError("numPartitions should be an int or Column") - @since("2.4.0") def repartitionByRange(self, numPartitions, *cols): """ Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The resulting :class:`DataFrame` is range partitioned. - :param numPartitions: + At least one partition-by expression must be specified. + When no explicit sort order is specified, "ascending nulls first" is assumed. + + .. versionadded:: 2.4.0 + + Parameters + ---------- + numPartitions : int can be an int to specify the target number of partitions or a Column. If it is a Column, it will be used as the first partitioning column. If not specified, the default number of partitions is used. + cols : str or :class:`Column` + partitioning columns. - At least one partition-by expression must be specified. - When no explicit sort order is specified, "ascending nulls first" is assumed. - - Note that due to performance reasons this method uses sampling to estimate the ranges. + Notes + ----- + Due to performance reasons this method uses sampling to estimate the ranges. Hence, the output may not be consistent, since sampling can return different values. The sample size can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`. + Examples + -------- >>> df.repartitionByRange(2, "age").rdd.getNumPartitions() 2 >>> df.show() @@ -861,28 +1003,41 @@ def repartitionByRange(self, numPartitions, *cols): else: raise TypeError("numPartitions should be an int, string or Column") - @since(1.3) def distinct(self): """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.distinct().count() 2 """ return DataFrame(self._jdf.distinct(), self.sql_ctx) - @since(1.3) def sample(self, withReplacement=None, fraction=None, seed=None): """Returns a sampled subset of this :class:`DataFrame`. - :param withReplacement: Sample with replacement or not (default ``False``). - :param fraction: Fraction of rows to generate, range [0.0, 1.0]. - :param seed: Seed for sampling (default a random seed). + .. versionadded:: 1.3.0 - .. note:: This is not guaranteed to provide exactly the fraction specified of the total - count of the given :class:`DataFrame`. + Parameters + ---------- + withReplacement : bool, optional + Sample with replacement or not (default ``False``). + fraction : float, optional + Fraction of rows to generate, range [0.0, 1.0]. + seed : int, optional + Seed for sampling (default a random seed). - .. note:: `fraction` is required and, `withReplacement` and `seed` are optional. + Notes + ----- + This is not guaranteed to provide exactly the fraction specified of the total + count of the given :class:`DataFrame`. + `fraction` is required and, `withReplacement` and `seed` are optional. + + Examples + -------- >>> df = spark.range(10) >>> df.sample(0.5, 3).count() 7 @@ -935,19 +1090,32 @@ def sample(self, withReplacement=None, fraction=None, seed=None): jdf = self._jdf.sample(*args) return DataFrame(jdf, self.sql_ctx) - @since(1.5) def sampleBy(self, col, fractions, seed=None): """ Returns a stratified sample without replacement based on the fraction given on each stratum. - :param col: column that defines strata - :param fractions: + .. versionadded:: 1.5.0 + + Parameters + ---------- + col : :class:`Column` or str + column that defines strata + + .. versionchanged:: 3.0 + Added sampling by a column of :class:`Column` + fractions : dict sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as zero. - :param seed: random seed - :return: a new :class:`DataFrame` that represents the stratified sample + seed : int, optional + random seed + + Returns + ------- + a new :class:`DataFrame` that represents the stratified sample + Examples + -------- >>> from pyspark.sql.functions import col >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key")) >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0) @@ -960,9 +1128,6 @@ def sampleBy(self, col, fractions, seed=None): +---+-----+ >>> dataset.sampleBy(col("key"), fractions={2: 1.0}, seed=0).count() 33 - - .. versionchanged:: 3.0 - Added sampling by a column of :class:`Column` """ if isinstance(col, str): col = Column(col) @@ -978,14 +1143,21 @@ def sampleBy(self, col, fractions, seed=None): seed = seed if seed is not None else random.randint(0, sys.maxsize) return DataFrame(self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sql_ctx) - @since(1.4) def randomSplit(self, weights, seed=None): """Randomly splits this :class:`DataFrame` with the provided weights. - :param weights: list of doubles as weights with which to split the :class:`DataFrame`. + .. versionadded:: 1.4.0 + + Parameters + ---------- + weights : list + list of doubles as weights with which to split the :class:`DataFrame`. Weights will be normalized if they don't sum up to 1.0. - :param seed: The seed for sampling. + seed : int, optional + The seed for sampling. + Examples + -------- >>> splits = df4.randomSplit([1.0, 2.0], 24) >>> splits[0].count() 2 @@ -1001,33 +1173,45 @@ def randomSplit(self, weights, seed=None): return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array] @property - @since(1.3) def dtypes(self): """Returns all column names and their data types as a list. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.dtypes [('age', 'int'), ('name', 'string')] """ return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields] @property - @since(1.3) def columns(self): """Returns all column names as a list. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.columns ['age', 'name'] """ return [f.name for f in self.schema.fields] - @since(2.3) def colRegex(self, colName): """ Selects column based on the column name specified as a regex and returns it as :class:`Column`. - :param colName: string, column name specified as a regex. + .. versionadded:: 2.3.0 + + Parameters + ---------- + colName : str + string, column name specified as a regex. + Examples + -------- >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["Col1", "Col2"]) >>> df.select(df.colRegex("`(Col1)?+.+`")).show() +----+ @@ -1043,12 +1227,18 @@ def colRegex(self, colName): jc = self._jdf.colRegex(colName) return Column(jc) - @since(1.3) def alias(self, alias): """Returns a new :class:`DataFrame` with an alias set. - :param alias: string, an alias name to be set for the :class:`DataFrame`. + .. versionadded:: 1.3.0 + + Parameters + ---------- + alias : str + an alias name to be set for the :class:`DataFrame`. + Examples + -------- >>> from pyspark.sql.functions import * >>> df_as1 = df.alias("df_as1") >>> df_as2 = df.alias("df_as2") @@ -1060,12 +1250,18 @@ def alias(self, alias): assert isinstance(alias, str), "alias should be a string" return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx) - @since(2.1) def crossJoin(self, other): """Returns the cartesian product with another :class:`DataFrame`. - :param other: Right side of the cartesian product. + .. versionadded:: 2.1.0 + + Parameters + ---------- + other : :class:`DataFrame` + Right side of the cartesian product. + Examples + -------- >>> df.select("age", "name").collect() [Row(age=2, name='Alice'), Row(age=5, name='Bob')] >>> df2.select("name", "height").collect() @@ -1078,20 +1274,28 @@ def crossJoin(self, other): jdf = self._jdf.crossJoin(other._jdf) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def join(self, other, on=None, how=None): """Joins with another :class:`DataFrame`, using the given join expression. - :param other: Right side of the join - :param on: a string for the join column name, a list of column names, + .. versionadded:: 1.3.0 + + Parameters + ---------- + other : :class:`DataFrame` + Right side of the join + on : str, list or :class:`Column`, optional + a string for the join column name, a list of column names, a join expression (Column), or a list of Columns. If `on` is a string or a list of strings indicating the name of the join column(s), the column(s) must exist on both sides, and this performs an equi-join. - :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``, + how : str, optional + default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``, ``full``, ``fullouter``, ``full_outer``, ``left``, ``leftouter``, ``left_outer``, ``right``, ``rightouter``, ``right_outer``, ``semi``, ``leftsemi``, ``left_semi``, ``anti``, ``leftanti`` and ``left_anti``. + Examples + -------- The following performs a full outer join between ``df1`` and ``df2``. >>> from pyspark.sql.functions import desc >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height) \ @@ -1134,15 +1338,25 @@ def join(self, other, on=None, how=None): jdf = self._jdf.join(other._jdf, on, how) return DataFrame(jdf, self.sql_ctx) - @since(1.6) def sortWithinPartitions(self, *cols, **kwargs): """Returns a new :class:`DataFrame` with each partition sorted by the specified column(s). - :param cols: list of :class:`Column` or column names to sort by. - :param ascending: boolean or list of boolean (default ``True``). + .. versionadded:: 1.6.0 + + Parameters + ---------- + cols : str, list or :class:`Column`, optional + list of :class:`Column` or column names to sort by. + + Other Parameters + ---------------- + ascending : bool or list, optional + boolean or list of boolean (default ``True``). Sort ascending vs. descending. Specify list for multiple sort orders. If a list is specified, length of the list must equal length of the `cols`. + Examples + -------- >>> df.sortWithinPartitions("age", ascending=False).show() +---+-----+ |age| name| @@ -1154,15 +1368,25 @@ def sortWithinPartitions(self, *cols, **kwargs): jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs)) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def sort(self, *cols, **kwargs): """Returns a new :class:`DataFrame` sorted by the specified column(s). - :param cols: list of :class:`Column` or column names to sort by. - :param ascending: boolean or list of boolean (default ``True``). + .. versionadded:: 1.3.0 + + Parameters + ---------- + cols : str, list, or :class:`Column`, optional + list of :class:`Column` or column names to sort by. + + Other Parameters + ---------------- + ascending : bool or list, optional + boolean or list of boolean (default ``True``). Sort ascending vs. descending. Specify list for multiple sort orders. If a list is specified, length of the list must equal length of the `cols`. + Examples + -------- >>> df.sort(df.age.desc()).collect() [Row(age=5, name='Bob'), Row(age=2, name='Alice')] >>> df.sort("age", ascending=False).collect() @@ -1218,17 +1442,24 @@ def _sort_cols(self, cols, kwargs): raise TypeError("ascending can only be boolean or list, but got %s" % type(ascending)) return self._jseq(jcols) - @since("1.3.1") def describe(self, *cols): """Computes basic statistics for numeric and string columns. + .. versionadded:: 1.3.1 + This include count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical or string columns. - .. note:: This function is meant for exploratory data analysis, as we make no - guarantee about the backward compatibility of the schema of the resulting - :class:`DataFrame`. + Notes + ----- + This function is meant for exploratory data analysis, as we make no + guarantee about the backward compatibility of the schema of the resulting + :class:`DataFrame`. + + Use summary for expanded statistics and control over which statistics to compute. + Examples + -------- >>> df.describe(['age']).show() +-------+------------------+ |summary| age| @@ -1250,14 +1481,15 @@ def describe(self, *cols): | max| 5| Bob| +-------+------------------+-----+ - Use summary for expanded statistics and control over which statistics to compute. + See Also + -------- + DataFrame.summary """ if len(cols) == 1 and isinstance(cols[0], list): cols = cols[0] jdf = self._jdf.describe(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) - @since("2.3.0") def summary(self, *statistics): """Computes specified statistics for numeric and string columns. Available statistics are: - count @@ -1270,10 +1502,16 @@ def summary(self, *statistics): If no statistics are given, this function computes count, mean, stddev, min, approximate quartiles (percentiles at 25%, 50%, and 75%), and max. - .. note:: This function is meant for exploratory data analysis, as we make no - guarantee about the backward compatibility of the schema of the resulting - :class:`DataFrame`. + .. versionadded:: 2.3.0 + + Notes + ----- + This function is meant for exploratory data analysis, as we make no + guarantee about the backward compatibility of the schema of the resulting + :class:`DataFrame`. + Examples + -------- >>> df.summary().show() +-------+------------------+-----+ |summary| age| name| @@ -1308,24 +1546,37 @@ def summary(self, *statistics): | count| 2| 2| +-------+---+----+ - See also describe for basic statistics. + See Also + -------- + DataFrame.display """ if len(statistics) == 1 and isinstance(statistics[0], list): statistics = statistics[0] jdf = self._jdf.summary(self._jseq(statistics)) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def head(self, n=None): """Returns the first ``n`` rows. - .. note:: This method should only be used if the resulting array is expected - to be small, as all the data is loaded into the driver's memory. + .. versionadded:: 1.3.0 + + Notes + ----- + This method should only be used if the resulting array is expected + to be small, as all the data is loaded into the driver's memory. - :param n: int, default 1. Number of rows to return. - :return: If n is greater than 1, return a list of :class:`Row`. - If n is 1, return a single Row. + Parameters + ---------- + n : int, optional + default 1. Number of rows to return. + Returns + ------- + If n is greater than 1, return a list of :class:`Row`. + If n is 1, return a single Row. + + Examples + -------- >>> df.head() Row(age=2, name='Alice') >>> df.head(1) @@ -1336,19 +1587,25 @@ def head(self, n=None): return rs[0] if rs else None return self.take(n) - @since(1.3) def first(self): """Returns the first row as a :class:`Row`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.first() Row(age=2, name='Alice') """ return self.head() - @since(1.3) def __getitem__(self, item): """Returns the column as a :class:`Column`. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.select(df['age']).collect() [Row(age=2), Row(age=5)] >>> df[ ["name", "age"]].collect() @@ -1371,10 +1628,13 @@ def __getitem__(self, item): else: raise TypeError("unexpected item type: %s" % type(item)) - @since(1.3) def __getattr__(self, name): """Returns the :class:`Column` denoted by ``name``. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.select(df.age).collect() [Row(age=2), Row(age=5)] """ @@ -1384,14 +1644,20 @@ def __getattr__(self, name): jc = self._jdf.apply(name) return Column(jc) - @since(1.3) def select(self, *cols): """Projects a set of expressions and returns a new :class:`DataFrame`. - :param cols: list of column names (string) or expressions (:class:`Column`). + .. versionadded:: 1.3.0 + + Parameters + ---------- + cols : str, :class:`Column`, or list + column names (string) or expressions (:class:`Column`). If one of the column names is '*', that column is expanded to include all columns in the current :class:`DataFrame`. + Examples + -------- >>> df.select('*').collect() [Row(age=2, name='Alice'), Row(age=5, name='Bob')] >>> df.select('name', 'age').collect() @@ -1402,12 +1668,15 @@ def select(self, *cols): jdf = self._jdf.select(self._jcols(*cols)) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def selectExpr(self, *expr): """Projects a set of SQL expressions and returns a new :class:`DataFrame`. This is a variant of :func:`select` that accepts SQL expressions. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.selectExpr("age * 2", "abs(age)").collect() [Row((age * 2)=4, abs(age)=2), Row((age * 2)=10, abs(age)=5)] """ @@ -1416,15 +1685,21 @@ def selectExpr(self, *expr): jdf = self._jdf.selectExpr(self._jseq(expr)) return DataFrame(jdf, self.sql_ctx) - @since(1.3) def filter(self, condition): """Filters rows using the given condition. :func:`where` is an alias for :func:`filter`. - :param condition: a :class:`Column` of :class:`types.BooleanType` + .. versionadded:: 1.3.0 + + Parameters + ---------- + condition : :class:`Column` or str + a :class:`Column` of :class:`types.BooleanType` or a string of SQL expression. + Examples + -------- >>> df.filter(df.age > 3).collect() [Row(age=5, name='Bob')] >>> df.where(df.age == 2).collect() @@ -1443,7 +1718,6 @@ def filter(self, condition): raise TypeError("condition should be string or Column") return DataFrame(jdf, self.sql_ctx) - @since(1.3) def groupBy(self, *cols): """Groups the :class:`DataFrame` using the specified columns, so we can run aggregation on them. See :class:`GroupedData` @@ -1451,9 +1725,16 @@ def groupBy(self, *cols): :func:`groupby` is an alias for :func:`groupBy`. - :param cols: list of columns to group by. + .. versionadded:: 1.3.0 + + Parameters + ---------- + cols : list, str or :class:`Column` + columns to group by. Each element should be a column name (string) or an expression (:class:`Column`). + Examples + -------- >>> df.groupBy().avg().collect() [Row(avg(age)=3.5)] >>> sorted(df.groupBy('name').agg({'age': 'mean'}).collect()) @@ -1467,12 +1748,15 @@ def groupBy(self, *cols): from pyspark.sql.group import GroupedData return GroupedData(jgd, self) - @since(1.4) def rollup(self, *cols): """ Create a multi-dimensional rollup for the current :class:`DataFrame` using the specified columns, so we can run aggregation on them. + .. versionadded:: 1.4.0 + + Examples + -------- >>> df.rollup("name", df.age).count().orderBy("name", "age").show() +-----+----+-----+ | name| age|count| @@ -1488,12 +1772,15 @@ def rollup(self, *cols): from pyspark.sql.group import GroupedData return GroupedData(jgd, self) - @since(1.4) def cube(self, *cols): """ Create a multi-dimensional cube for the current :class:`DataFrame` using the specified columns, so we can run aggregations on them. + .. versionadded:: 1.4.0 + + Examples + -------- >>> df.cube("name", df.age).count().orderBy("name", "age").show() +-----+----+-----+ | name| age|count| @@ -1511,11 +1798,14 @@ def cube(self, *cols): from pyspark.sql.group import GroupedData return GroupedData(jgd, self) - @since(1.3) def agg(self, *exprs): """ Aggregate on the entire :class:`DataFrame` without groups (shorthand for ``df.groupBy().agg()``). + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.agg({"age": "max"}).collect() [Row(max(age)=5)] >>> from pyspark.sql import functions as F @@ -1548,7 +1838,6 @@ def unionAll(self, other): """ return self.union(other) - @since(2.3) def unionByName(self, other, allowMissingColumns=False): """ Returns a new :class:`DataFrame` containing union of rows in this and another :class:`DataFrame`. @@ -1556,6 +1845,10 @@ def unionByName(self, other, allowMissingColumns=False): This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set union (that does deduplication of elements), use this function followed by :func:`distinct`. + .. versionadded:: 2.3.0 + + Examples + -------- The difference between this function and :func:`union` is that this function resolves columns by name (not by position): @@ -1599,12 +1892,17 @@ def intersect(self, other): """ return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx) - @since(2.4) def intersectAll(self, other): """ Return a new :class:`DataFrame` containing rows in both this :class:`DataFrame` and another :class:`DataFrame` while preserving duplicates. - This is equivalent to `INTERSECT ALL` in SQL. + This is equivalent to `INTERSECT ALL` in SQL. As standard in SQL, this function + resolves columns by position (not by name). + + .. versionadded:: 2.4.0 + + Examples + -------- >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"]) >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"]) @@ -1617,7 +1915,6 @@ def intersectAll(self, other): | b| 3| +---+---+ - Also as standard in SQL, this function resolves columns by position (not by name). """ return DataFrame(self._jdf.intersectAll(other._jdf), self.sql_ctx) @@ -1631,7 +1928,6 @@ def subtract(self, other): """ return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx) - @since(1.4) def dropDuplicates(self, subset=None): """Return a new :class:`DataFrame` with duplicate rows removed, optionally only considering certain columns. @@ -1644,6 +1940,10 @@ def dropDuplicates(self, subset=None): :func:`drop_duplicates` is an alias for :func:`dropDuplicates`. + .. versionadded:: 1.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> df = sc.parallelize([ \\ ... Row(name='Alice', age=5, height=80), \\ @@ -1670,19 +1970,27 @@ def dropDuplicates(self, subset=None): jdf = self._jdf.dropDuplicates(self._jseq(subset)) return DataFrame(jdf, self.sql_ctx) - @since("1.3.1") def dropna(self, how='any', thresh=None, subset=None): """Returns a new :class:`DataFrame` omitting rows with null values. :func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are aliases of each other. - :param how: 'any' or 'all'. + .. versionadded:: 1.3.1 + + Parameters + ---------- + how : str, optional + 'any' or 'all'. If 'any', drop a row if it contains any nulls. If 'all', drop a row only if all its values are null. - :param thresh: int, default None + thresh: int, optional + default None If specified, drop rows that have less than `thresh` non-null values. This overwrites the `how` parameter. - :param subset: optional list of column names to consider. + subset : str, tuple or list, optional + optional list of column names to consider. + Examples + -------- >>> df4.na.drop().show() +---+------+-----+ |age|height| name| @@ -1705,21 +2013,27 @@ def dropna(self, how='any', thresh=None, subset=None): return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx) - @since("1.3.1") def fillna(self, value, subset=None): """Replace null values, alias for ``na.fill()``. :func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other. - :param value: int, float, string, bool or dict. + .. versionadded:: 1.3.1 + + Parameters + ---------- + value : int, float, string, bool or dict Value to replace null values with. If the value is a dict, then `subset` is ignored and `value` must be a mapping from column name (string) to replacement value. The replacement value must be an int, float, boolean, or string. - :param subset: optional list of column names to consider. + subset : str, tuple or list, optional + optional list of column names to consider. Columns specified in subset that do not have matching data type are ignored. For example, if `value` is a string, and subset contains a non-string column, then the non-string column is simply ignored. + Examples + -------- >>> df4.na.fill(50).show() +---+------+-----+ |age|height| name| @@ -1770,7 +2084,6 @@ def fillna(self, value, subset=None): return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx) - @since(1.4) def replace(self, to_replace, value=_NoValue, subset=None): """Returns a new :class:`DataFrame` replacing a value with another value. :func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are @@ -1782,20 +2095,27 @@ def replace(self, to_replace, value=_NoValue, subset=None): floating point representation. In case of conflicts (for example with `{42: -1, 42.0: 1}`) and arbitrary replacement will be used. - :param to_replace: bool, int, float, string, list or dict. + .. versionadded:: 1.4.0 + + Parameters + ---------- + to_replace : bool, int, float, string, list or dict Value to be replaced. If the value is a dict, then `value` is ignored or can be omitted, and `to_replace` must be a mapping between a value and a replacement. - :param value: bool, int, float, string, list or None. + value : bool, int, float, string or None, optional The replacement value must be a bool, int, float, string or None. If `value` is a list, `value` should be of the same length and type as `to_replace`. If `value` is a scalar and `to_replace` is a sequence, then `value` is used as a replacement for each item in `to_replace`. - :param subset: optional list of column names to consider. + subset : list, optional + optional list of column names to consider. Columns specified in subset that do not have matching data type are ignored. For example, if `value` is a string, and subset contains a non-string column, then the non-string column is simply ignored. + Examples + -------- >>> df4.na.replace(10, 20).show() +----+------+-----+ | age|height| name| @@ -1910,7 +2230,6 @@ def all_of_(xs): return DataFrame( self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx) - @since(2.0) def approxQuantile(self, col, probabilities, relativeError): """ Calculates the approximate quantiles of numerical columns of a @@ -1933,23 +2252,33 @@ def approxQuantile(self, col, probabilities, relativeError): Note that null values will be ignored in numerical columns before calculation. For columns only containing null values, an empty list is returned. - :param col: str, list. - Can be a single column name, or a list of names for multiple columns. - :param probabilities: a list of quantile probabilities - Each number must belong to [0, 1]. - For example 0 is the minimum, 0.5 is the median, 1 is the maximum. - :param relativeError: The relative target precision to achieve - (>= 0). If set to zero, the exact quantiles are computed, which - could be very expensive. Note that values greater than 1 are - accepted but give the same result as 1. - :return: the approximate quantiles at the given probabilities. If - the input `col` is a string, the output is a list of floats. If the - input `col` is a list or tuple of strings, the output is also a - list, but each element in it is a list of floats, i.e., the output - is a list of list of floats. - - .. versionchanged:: 2.2 - Added support for multiple columns. + .. versionadded:: 2.0.0 + + Parameters + ---------- + col: str, tuple or list + Can be a single column name, or a list of names for multiple columns. + + .. versionchanged:: 2.2 + Added support for multiple columns. + probabilities : list or tuple + a list of quantile probabilities + Each number must belong to [0, 1]. + For example 0 is the minimum, 0.5 is the median, 1 is the maximum. + relativeError : float + The relative target precision to achieve + (>= 0). If set to zero, the exact quantiles are computed, which + could be very expensive. Note that values greater than 1 are + accepted but give the same result as 1. + + Returns + ------- + list + the approximate quantiles at the given probabilities. If + the input `col` is a string, the output is a list of floats. If the + input `col` is a list or tuple of strings, the output is also a + list, but each element in it is a list of floats, i.e., the output + is a list of list of floats. """ if not isinstance(col, (str, list, tuple)): @@ -1984,16 +2313,22 @@ def approxQuantile(self, col, probabilities, relativeError): jaq_list = [list(j) for j in jaq] return jaq_list[0] if isStr else jaq_list - @since(1.4) def corr(self, col1, col2, method=None): """ Calculates the correlation of two columns of a :class:`DataFrame` as a double value. Currently only supports the Pearson Correlation Coefficient. :func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other. - :param col1: The name of the first column - :param col2: The name of the second column - :param method: The correlation method. Currently only supports "pearson" + .. versionadded:: 1.4.0 + + Parameters + ---------- + col1 : str + The name of the first column + col2 : str + The name of the second column + method : str, optional + The correlation method. Currently only supports "pearson" """ if not isinstance(col1, str): raise ValueError("col1 should be a string.") @@ -2006,14 +2341,19 @@ def corr(self, col1, col2, method=None): "coefficient is supported.") return self._jdf.stat().corr(col1, col2, method) - @since(1.4) def cov(self, col1, col2): """ Calculate the sample covariance for the given columns, specified by their names, as a double value. :func:`DataFrame.cov` and :func:`DataFrameStatFunctions.cov` are aliases. - :param col1: The name of the first column - :param col2: The name of the second column + .. versionadded:: 1.4.0 + + Parameters + ---------- + col1 : str + The name of the first column + col2 : str + The name of the second column """ if not isinstance(col1, str): raise ValueError("col1 should be a string.") @@ -2021,7 +2361,6 @@ def cov(self, col1, col2): raise ValueError("col2 should be a string.") return self._jdf.stat().cov(col1, col2) - @since(1.4) def crosstab(self, col1, col2): """ Computes a pair-wise frequency table of the given columns. Also known as a contingency @@ -2032,9 +2371,15 @@ def crosstab(self, col1, col2): Pairs that have no occurrences will have zero as their counts. :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases. - :param col1: The name of the first column. Distinct items will make the first item of + .. versionadded:: 1.4.0 + + Parameters + ---------- + col1 : str + The name of the first column. Distinct items will make the first item of each row. - :param col2: The name of the second column. Distinct items will make the column names + col2 : str + The name of the second column. Distinct items will make the column names of the :class:`DataFrame`. """ if not isinstance(col1, str): @@ -2043,7 +2388,6 @@ def crosstab(self, col1, col2): raise ValueError("col2 should be a string.") return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx) - @since(1.4) def freqItems(self, cols, support=None): """ Finding frequent items for columns, possibly with false positives. Using the @@ -2051,14 +2395,22 @@ def freqItems(self, cols, support=None): "https://doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases. - .. note:: This function is meant for exploratory data analysis, as we make no - guarantee about the backward compatibility of the schema of the resulting - :class:`DataFrame`. + .. versionadded:: 1.4.0 - :param cols: Names of the columns to calculate frequent items for as a list or tuple of + Parameters + ---------- + cols : list or tuple + Names of the columns to calculate frequent items for as a list or tuple of strings. - :param support: The frequency with which to consider an item 'frequent'. Default is 1%. + support : float, optional + The frequency with which to consider an item 'frequent'. Default is 1%. The support must be greater than 1e-4. + + Notes + ----- + This function is meant for exploratory data analysis, as we make no + guarantee about the backward compatibility of the schema of the resulting + :class:`DataFrame`. """ if isinstance(cols, tuple): cols = list(cols) @@ -2068,7 +2420,6 @@ def freqItems(self, cols, support=None): support = 0.01 return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx) - @since(1.3) def withColumn(self, colName, col): """ Returns a new :class:`DataFrame` by adding a column or replacing the @@ -2077,14 +2428,24 @@ def withColumn(self, colName, col): The column expression must be an expression over this :class:`DataFrame`; attempting to add a column from some other :class:`DataFrame` will raise an error. - :param colName: string, name of the new column. - :param col: a :class:`Column` expression for the new column. + .. versionadded:: 1.3.0 + + Parameters + ---------- + colName : str + string, name of the new column. + col : :class:`Column` + a :class:`Column` expression for the new column. - .. note:: This method introduces a projection internally. Therefore, calling it multiple - times, for instance, via loops in order to add multiple columns can generate big - plans which can cause performance issues and even `StackOverflowException`. - To avoid this, use :func:`select` with the multiple columns at once. + Notes + ----- + This method introduces a projection internally. Therefore, calling it multiple + times, for instance, via loops in order to add multiple columns can generate big + plans which can cause performance issues and even `StackOverflowException`. + To avoid this, use :func:`select` with the multiple columns at once. + Examples + -------- >>> df.withColumn('age2', df.age + 2).collect() [Row(age=2, name='Alice', age2=4), Row(age=5, name='Bob', age2=7)] @@ -2092,27 +2453,39 @@ def withColumn(self, colName, col): assert isinstance(col, Column), "col should be Column" return DataFrame(self._jdf.withColumn(colName, col._jc), self.sql_ctx) - @since(1.3) def withColumnRenamed(self, existing, new): """Returns a new :class:`DataFrame` by renaming an existing column. This is a no-op if schema doesn't contain the given column name. - :param existing: string, name of the existing column to rename. - :param new: string, new name of the column. + .. versionadded:: 1.3.0 + + Parameters + ---------- + existing : str + string, name of the existing column to rename. + new : str + string, new name of the column. + Examples + -------- >>> df.withColumnRenamed('age', 'age2').collect() [Row(age2=2, name='Alice'), Row(age2=5, name='Bob')] """ return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sql_ctx) - @since(1.4) def drop(self, *cols): """Returns a new :class:`DataFrame` that drops the specified column. This is a no-op if schema doesn't contain the given column name(s). - :param cols: a string name of the column to drop, or a - :class:`Column` to drop, or a list of string name of the columns to drop. + .. versionadded:: 1.4.0 + Parameters + ---------- + cols: str or :class:`Column` + a name of the column, or the :class:`Column` to drop + + Examples + -------- >>> df.drop('age').collect() [Row(name='Alice'), Row(name='Bob')] @@ -2147,20 +2520,31 @@ def drop(self, *cols): def toDF(self, *cols): """Returns a new :class:`DataFrame` that with new specified column names - :param cols: list of new column names (string) + Parameters + ---------- + cols : str + new column names + Examples + -------- >>> df.toDF('f1', 'f2').collect() [Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')] """ jdf = self._jdf.toDF(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) - @since(3.0) def transform(self, func): """Returns a new :class:`DataFrame`. Concise syntax for chaining custom transformations. - :param func: a function that takes and returns a :class:`DataFrame`. + .. versionadded:: 3.0.0 + + Parameters + ---------- + func : function + a function that takes and returns a :class:`DataFrame`. + Examples + -------- >>> from pyspark.sql.functions import col >>> df = spark.createDataFrame([(1, 1.0), (2, 2.0)], ["int", "float"]) >>> def cast_all_to_int(input_df): @@ -2180,21 +2564,26 @@ def transform(self, func): "should have been DataFrame." % type(result) return result - @since(3.1) def sameSemantics(self, other): """ Returns `True` when the logical query plans inside both :class:`DataFrame`\\s are equal and therefore return same results. - .. note:: The equality comparison here is simplified by tolerating the cosmetic differences - such as attribute names. + .. versionadded:: 3.1.0 - .. note:: This API can compare both :class:`DataFrame`\\s very fast but can still return - `False` on the :class:`DataFrame` that return the same results, for instance, from - different plans. Such false negative semantic can be useful when caching as an example. + Notes + ----- + The equality comparison here is simplified by tolerating the cosmetic differences + such as attribute names. - .. note:: DeveloperApi + This API can compare both :class:`DataFrame`\\s very fast but can still return + `False` on the :class:`DataFrame` that return the same results, for instance, from + different plans. Such false negative semantic can be useful when caching as an example. + This API is a developer API. + + Examples + -------- >>> df1 = spark.range(10) >>> df2 = spark.range(10) >>> df1.withColumn("col1", df1.id * 2).sameSemantics(df2.withColumn("col1", df2.id * 2)) @@ -2209,16 +2598,21 @@ def sameSemantics(self, other): % type(other)) return self._jdf.sameSemantics(other._jdf) - @since(3.1) def semanticHash(self): """ Returns a hash code of the logical query plan against this :class:`DataFrame`. - .. note:: Unlike the standard hash code, the hash is calculated against the query plan - simplified by tolerating the cosmetic differences such as attribute names. + .. versionadded:: 3.1.0 + + Notes + ----- + Unlike the standard hash code, the hash is calculated against the query plan + simplified by tolerating the cosmetic differences such as attribute names. - .. note:: DeveloperApi + This API is a developer API. + Examples + -------- >>> spark.range(10).selectExpr("id as col0").semanticHash() # doctest: +SKIP 1855039936 >>> spark.range(10).selectExpr("id as col1").semanticHash() # doctest: +SKIP @@ -2226,7 +2620,6 @@ def semanticHash(self): """ return self._jdf.semanticHash() - @since(3.1) def inputFiles(self): """ Returns a best-effort snapshot of the files that compose this :class:`DataFrame`. @@ -2234,6 +2627,10 @@ def inputFiles(self): takes the union of all results. Depending on the source relations, this may not find all input files. Duplicates are removed. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df = spark.read.load("examples/src/main/resources/people.json", format="json") >>> len(df.inputFiles()) 1 @@ -2260,7 +2657,6 @@ def inputFiles(self): sinceversion=1.4, doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.") - @since(3.1) def writeTo(self, table): """ Create a write configuration builder for v2 sources. @@ -2269,6 +2665,10 @@ def writeTo(self, table): For example, to append or create or replace existing tables. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").append() # doctest: +SKIP >>> df.writeTo( # doctest: +SKIP ... "catalog.db.table" diff --git a/python/pyspark/sql/dataframe.pyi b/python/pyspark/sql/dataframe.pyi index c498d529d820f..1351c59470c9d 100644 --- a/python/pyspark/sql/dataframe.pyi +++ b/python/pyspark/sql/dataframe.pyi @@ -31,6 +31,7 @@ from typing import ( from py4j.java_gateway import JavaObject # type: ignore[import] from pyspark.sql._typing import ColumnOrName, LiteralType, OptionalPrimitiveType +from pyspark._typing import PrimitiveType from pyspark.sql.types import ( # noqa: F401 StructType, StructField, @@ -86,7 +87,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): def withWatermark( self, eventTime: ColumnOrName, delayThreshold: str ) -> DataFrame: ... - def hint(self, name: str, *parameters: Any) -> DataFrame: ... + def hint(self, name: str, *parameters: Union[PrimitiveType, List[PrimitiveType]]) -> DataFrame: ... def count(self) -> int: ... def collect(self) -> List[Row]: ... def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[Row]: ... @@ -122,7 +123,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): seed: Optional[int] = ..., ) -> DataFrame: ... def sampleBy( - self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ... + self, col: ColumnOrName, fractions: Dict[Any, float], seed: Optional[int] = ... ) -> DataFrame: ... def randomSplit( self, weights: List[float], seed: Optional[int] = ... @@ -199,7 +200,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): self, how: str = ..., thresh: Optional[int] = ..., - subset: Optional[List[str]] = ..., + subset: Optional[Union[str, Tuple[str, ...], List[str]]] = ..., ) -> DataFrame: ... @overload def fillna( @@ -237,13 +238,16 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): subset: Optional[List[str]] = ..., ) -> DataFrame: ... def approxQuantile( - self, col: str, probabilities: List[float], relativeError: float + self, + col: Union[str, Tuple[str, ...], List[str]], + probabilities: Union[List[float], Tuple[float, ...]], + relativeError: float ) -> List[float]: ... def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ... def cov(self, col1: str, col2: str) -> float: ... def crosstab(self, col1: str, col2: str) -> DataFrame: ... def freqItems( - self, cols: List[str], support: Optional[float] = ... + self, cols: Union[List[str], Tuple[str]], support: Optional[float] = ... ) -> DataFrame: ... def withColumn(self, colName: str, col: Column) -> DataFrame: ... def withColumnRenamed(self, existing: str, new: str) -> DataFrame: ... diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index c349ae5cf46c4..87b999dca76ec 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -84,11 +84,14 @@ def _options_to_str(options): return {key: to_str(value) for (key, value) in options.items()} -@since(1.3) def lit(col): """ Creates a :class:`Column` of literal value. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1) [Row(height=5, spark_user=True)] """ @@ -199,26 +202,39 @@ def sumDistinct(col): return _invoke_function_over_column("sumDistinct", col) -@since(1.4) def acos(col): """ - :return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()` + .. versionadded:: 1.4.0 + + Returns + ------- + :class:`Column` + inverse cosine of `col`, as if computed by `java.lang.Math.acos()` """ return _invoke_function_over_column("acos", col) -@since(1.4) def asin(col): """ - :return: inverse sine of `col`, as if computed by `java.lang.Math.asin()` + .. versionadded:: 1.3.0 + + + Returns + ------- + :class:`Column` + inverse sine of `col`, as if computed by `java.lang.Math.asin()` """ return _invoke_function_over_column("asin", col) -@since(1.4) def atan(col): """ - :return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()` + .. versionadded:: 1.4.0 + + Returns + ------- + :class:`Column` + inverse tangent of `col`, as if computed by `java.lang.Math.atan()` """ return _invoke_function_over_column("atan", col) @@ -239,20 +255,36 @@ def ceil(col): return _invoke_function_over_column("ceil", col) -@since(1.4) def cos(col): """ - :param col: angle in radians - :return: cosine of the angle, as if computed by `java.lang.Math.cos()`. + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + angle in radians + + Returns + ------- + :class:`Column` + cosine of the angle, as if computed by `java.lang.Math.cos()`. """ return _invoke_function_over_column("cos", col) -@since(1.4) def cosh(col): """ - :param col: hyperbolic angle - :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + hyperbolic angle + + Returns + ------- + :class:`Column` + hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()` """ return _invoke_function_over_column("cosh", col) @@ -322,40 +354,71 @@ def signum(col): return _invoke_function_over_column("signum", col) -@since(1.4) def sin(col): """ - :param col: angle in radians - :return: sine of the angle, as if computed by `java.lang.Math.sin()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + + Returns + ------- + :class:`Column` + sine of the angle, as if computed by `java.lang.Math.sin()` """ return _invoke_function_over_column("sin", col) -@since(1.4) def sinh(col): """ - :param col: hyperbolic angle - :return: hyperbolic sine of the given value, - as if computed by `java.lang.Math.sinh()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + hyperbolic angle + + Returns + ------- + :class:`Column` + hyperbolic sine of the given value, + as if computed by `java.lang.Math.sinh()` """ return _invoke_function_over_column("sinh", col) -@since(1.4) def tan(col): """ - :param col: angle in radians - :return: tangent of the given value, as if computed by `java.lang.Math.tan()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + angle in radians + + Returns + ------- + :class:`Column` + tangent of the given value, as if computed by `java.lang.Math.tan()` """ return _invoke_function_over_column("tan", col) -@since(1.4) def tanh(col): """ - :param col: hyperbolic angle - :return: hyperbolic tangent of the given value - as if computed by `java.lang.Math.tanh()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + hyperbolic angle + + Returns + ------- + :class:`Column` + hyperbolic tangent of the given value + as if computed by `java.lang.Math.tanh()` """ return _invoke_function_over_column("tanh", col) @@ -363,7 +426,8 @@ def tanh(col): @since(1.4) def toDegrees(col): """ - .. note:: Deprecated in 2.1, use :func:`degrees` instead. + .. deprecated:: 2.1.0 + Use :func:`degrees` instead. """ warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning) return degrees(col) @@ -372,7 +436,8 @@ def toDegrees(col): @since(1.4) def toRadians(col): """ - .. note:: Deprecated in 2.1, use :func:`radians` instead. + .. deprecated:: 2.1.0 + Use :func:`radians` instead. """ warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning) return radians(col) @@ -489,14 +554,19 @@ def kurtosis(col): return _invoke_function_over_column("kurtosis", col) -@since(1.6) def collect_list(col): """ Aggregate function: returns a list of objects with duplicates. - .. note:: The function is non-deterministic because the order of collected results depends - on the order of the rows which may be non-deterministic after a shuffle. + .. versionadded:: 1.6.0 + Notes + ----- + The function is non-deterministic because the order of collected results depends + on the order of the rows which may be non-deterministic after a shuffle. + + Examples + -------- >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',)) >>> df2.agg(collect_list('age')).collect() [Row(collect_list(age)=[2, 5, 5])] @@ -504,14 +574,19 @@ def collect_list(col): return _invoke_function_over_column("collect_list", col) -@since(1.6) def collect_set(col): """ Aggregate function: returns a set of objects with duplicate elements eliminated. - .. note:: The function is non-deterministic because the order of collected results depends - on the order of the rows which may be non-deterministic after a shuffle. + .. versionadded:: 1.6.0 + + Notes + ----- + The function is non-deterministic because the order of collected results depends + on the order of the rows which may be non-deterministic after a shuffle. + Examples + -------- >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',)) >>> df2.agg(collect_set('age')).collect() [Row(collect_set(age)=[5, 2])] @@ -519,40 +594,65 @@ def collect_set(col): return _invoke_function_over_column("collect_set", col) -@since(2.1) def degrees(col): """ Converts an angle measured in radians to an approximately equivalent angle measured in degrees. - :param col: angle in radians - :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()` + .. versionadded:: 2.1.0 + + Parameters + ---------- + col : :class:`Column` or str + angle in radians + + Returns + ------- + :class:`Column` + angle in degrees, as if computed by `java.lang.Math.toDegrees()` """ return _invoke_function_over_column("degrees", col) -@since(2.1) def radians(col): """ Converts an angle measured in degrees to an approximately equivalent angle measured in radians. - :param col: angle in degrees - :return: angle in radians, as if computed by `java.lang.Math.toRadians()` + .. versionadded:: 2.1.0 + + Parameters + ---------- + col : :class:`Column` or str + angle in degrees + + Returns + ------- + :class:`Column` + angle in radians, as if computed by `java.lang.Math.toRadians()` """ return _invoke_function_over_column("radians", col) -@since(1.4) def atan2(col1, col2): """ - :param col1: coordinate on y-axis - :param col2: coordinate on x-axis - :return: the `theta` component of the point - (`r`, `theta`) - in polar coordinates that corresponds to the point - (`x`, `y`) in Cartesian coordinates, - as if computed by `java.lang.Math.atan2()` + .. versionadded:: 1.4.0 + + Parameters + ---------- + col1 : str, :class:`Column` or float + coordinate on y-axis + col2 : str, :class:`Column` or float + coordinate on x-axis + + Returns + ------- + :class:`Column` + the `theta` component of the point + (`r`, `theta`) + in polar coordinates that corresponds to the point + (`x`, `y`) in Cartesian coordinates, + as if computed by `java.lang.Math.atan2()` """ return _invoke_binary_math_function("atan2", col1, col2) @@ -633,20 +733,28 @@ def percent_rank(): @since(1.3) def approxCountDistinct(col, rsd=None): """ - .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead. + .. deprecated:: 2.1.0 + Use :func:`approx_count_distinct` instead. """ warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning) return approx_count_distinct(col, rsd) -@since(2.1) def approx_count_distinct(col, rsd=None): """Aggregate function: returns a new :class:`Column` for approximate distinct count of column `col`. - :param rsd: maximum relative standard deviation allowed (default = 0.05). + .. versionadded:: 2.1.0 + + Parameters + ---------- + col : :class:`Column` or str + rsd : float, optional + maximum relative standard deviation allowed (default = 0.05). For rsd < 0.01, it is more efficient to use :func:`countDistinct` + Examples + -------- >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect() [Row(distinct_ages=2)] """ @@ -666,10 +774,13 @@ def broadcast(df): return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sql_ctx) -@since(1.4) def coalesce(*cols): """Returns the first column that is not null. + .. versionadded:: 1.4.0 + + Examples + -------- >>> cDf = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b")) >>> cDf.show() +----+----+ @@ -703,11 +814,14 @@ def coalesce(*cols): return Column(jc) -@since(1.6) def corr(col1, col2): """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1`` and ``col2``. + .. versionadded:: 1.6.0 + + Examples + -------- >>> a = range(20) >>> b = [2 * x for x in range(20)] >>> df = spark.createDataFrame(zip(a, b), ["a", "b"]) @@ -718,10 +832,13 @@ def corr(col1, col2): return Column(sc._jvm.functions.corr(_to_java_column(col1), _to_java_column(col2))) -@since(2.0) def covar_pop(col1, col2): """Returns a new :class:`Column` for the population covariance of ``col1`` and ``col2``. + .. versionadded:: 2.0.0 + + Examples + -------- >>> a = [1] * 10 >>> b = [1] * 10 >>> df = spark.createDataFrame(zip(a, b), ["a", "b"]) @@ -732,10 +849,13 @@ def covar_pop(col1, col2): return Column(sc._jvm.functions.covar_pop(_to_java_column(col1), _to_java_column(col2))) -@since(2.0) def covar_samp(col1, col2): """Returns a new :class:`Column` for the sample covariance of ``col1`` and ``col2``. + .. versionadded:: 2.0.0 + + Examples + -------- >>> a = [1] * 10 >>> b = [1] * 10 >>> df = spark.createDataFrame(zip(a, b), ["a", "b"]) @@ -746,10 +866,13 @@ def covar_samp(col1, col2): return Column(sc._jvm.functions.covar_samp(_to_java_column(col1), _to_java_column(col2))) -@since(1.3) def countDistinct(col, *cols): """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.agg(countDistinct(df.age, df.name).alias('c')).collect() [Row(c=2)] @@ -761,27 +884,33 @@ def countDistinct(col, *cols): return Column(jc) -@since(1.3) def first(col, ignorenulls=False): """Aggregate function: returns the first value in a group. The function by default returns the first values it sees. It will return the first non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned. - .. note:: The function is non-deterministic because its results depends on the order of the - rows which may be non-deterministic after a shuffle. + .. versionadded:: 1.3.0 + + Notes + ----- + The function is non-deterministic because its results depends on the order of the + rows which may be non-deterministic after a shuffle. """ sc = SparkContext._active_spark_context jc = sc._jvm.functions.first(_to_java_column(col), ignorenulls) return Column(jc) -@since(2.0) def grouping(col): """ Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or not, returns 1 for aggregated or 0 for not aggregated in the result set. + .. versionadded:: 2.0.0 + + Examples + -------- >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show() +-----+--------------+--------+ | name|grouping(name)|sum(age)| @@ -796,16 +925,21 @@ def grouping(col): return Column(jc) -@since(2.0) def grouping_id(*cols): """ Aggregate function: returns the level of grouping, equals to (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn) - .. note:: The list of columns should match with grouping columns exactly, or empty (means all - the grouping columns). + .. versionadded:: 2.0.0 + Notes + ----- + The list of columns should match with grouping columns exactly, or empty (means all + the grouping columns). + + Examples + -------- >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show() +-----+-------------+--------+ | name|grouping_id()|sum(age)| @@ -828,10 +962,13 @@ def input_file_name(): return Column(sc._jvm.functions.input_file_name()) -@since(1.6) def isnan(col): """An expression that returns true iff the column is NaN. + .. versionadded:: 1.6.0 + + Examples + -------- >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b")) >>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect() [Row(r1=False, r2=False), Row(r1=True, r2=True)] @@ -840,10 +977,13 @@ def isnan(col): return Column(sc._jvm.functions.isnan(_to_java_column(col))) -@since(1.6) def isnull(col): """An expression that returns true iff the column is null. + .. versionadded:: 1.6.0 + + Examples + -------- >>> df = spark.createDataFrame([(1, None), (None, 2)], ("a", "b")) >>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect() [Row(r1=False, r2=False), Row(r1=True, r2=True)] @@ -852,22 +992,24 @@ def isnull(col): return Column(sc._jvm.functions.isnull(_to_java_column(col))) -@since(1.3) def last(col, ignorenulls=False): """Aggregate function: returns the last value in a group. The function by default returns the last values it sees. It will return the last non-null value it sees when ignoreNulls is set to true. If all values are null, then null is returned. - .. note:: The function is non-deterministic because its results depends on the order of the - rows which may be non-deterministic after a shuffle. + .. versionadded:: 1.3.0 + + Notes + ----- + The function is non-deterministic because its results depends on the order of the + rows which may be non-deterministic after a shuffle. """ sc = SparkContext._active_spark_context jc = sc._jvm.functions.last(_to_java_column(col), ignorenulls) return Column(jc) -@since(1.6) def monotonically_increasing_id(): """A column that generates monotonically increasing 64-bit integers. @@ -876,7 +1018,11 @@ def monotonically_increasing_id(): within each partition in the lower 33 bits. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records. - .. note:: The function is non-deterministic because its result depends on partition IDs. + .. versionadded:: 1.6.0 + + Notes + ----- + The function is non-deterministic because its result depends on partition IDs. As an example, consider a :class:`DataFrame` with two partitions, each with 3 records. This expression would return the following IDs: @@ -890,12 +1036,15 @@ def monotonically_increasing_id(): return Column(sc._jvm.functions.monotonically_increasing_id()) -@since(1.6) def nanvl(col1, col2): """Returns col1 if it is not NaN, or col2 if col1 is NaN. Both inputs should be floating point columns (:class:`DoubleType` or :class:`FloatType`). + .. versionadded:: 1.6.0 + + Examples + -------- >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b")) >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect() [Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)] @@ -904,7 +1053,6 @@ def nanvl(col1, col2): return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2))) -@since(3.1) def percentile_approx(col, percentage, accuracy=10000): """Returns the approximate `percentile` of the numeric column `col` which is the smallest value in the ordered `col` values (sorted from least to greatest) such that no more than `percentage` @@ -920,6 +1068,10 @@ def percentile_approx(col, percentage, accuracy=10000): In this case, returns the approximate percentile array of column col at the given percentage array. + .. versionadded:: 3.1.0 + + Examples + -------- >>> key = (col("id") % 3).alias("key") >>> value = (randn(42) + key * 10).alias("value") >>> df = spark.range(0, 1000, 1, 1).select(key, value) @@ -959,13 +1111,18 @@ def percentile_approx(col, percentage, accuracy=10000): return Column(sc._jvm.functions.percentile_approx(_to_java_column(col), percentage, accuracy)) -@since(1.4) def rand(seed=None): """Generates a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0). - .. note:: The function is non-deterministic in general case. + .. versionadded:: 1.4.0 + Notes + ----- + The function is non-deterministic in general case. + + Examples + -------- >>> df.withColumn('rand', rand(seed=42) * 3).collect() [Row(age=2, name='Alice', rand=2.4052597283576684), Row(age=5, name='Bob', rand=2.3913904055683974)] @@ -978,13 +1135,18 @@ def rand(seed=None): return Column(jc) -@since(1.4) def randn(seed=None): """Generates a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution. - .. note:: The function is non-deterministic in general case. + .. versionadded:: 1.4.0 + Notes + ----- + The function is non-deterministic in general case. + + Examples + -------- >>> df.withColumn('randn', randn(seed=42)).collect() [Row(age=2, name='Alice', randn=1.1027054481455365), Row(age=5, name='Bob', randn=0.7400395449950132)] @@ -997,12 +1159,15 @@ def randn(seed=None): return Column(jc) -@since(1.5) def round(col, scale=0): """ Round the given value to `scale` decimal places using HALF_UP rounding mode if `scale` >= 0 or at integral part when `scale` < 0. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([(2.5,)], ['a']).select(round('a', 0).alias('r')).collect() [Row(r=3.0)] """ @@ -1010,12 +1175,15 @@ def round(col, scale=0): return Column(sc._jvm.functions.round(_to_java_column(col), scale)) -@since(2.0) def bround(col, scale=0): """ Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0 or at integral part when `scale` < 0. + .. versionadded:: 2.0.0 + + Examples + -------- >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect() [Row(r=2.0)] """ @@ -1023,10 +1191,13 @@ def bround(col, scale=0): return Column(sc._jvm.functions.bround(_to_java_column(col), scale)) -@since(1.5) def shiftLeft(col, numBits): """Shift the given value numBits left. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([(21,)], ['a']).select(shiftLeft('a', 1).alias('r')).collect() [Row(r=42)] """ @@ -1034,10 +1205,13 @@ def shiftLeft(col, numBits): return Column(sc._jvm.functions.shiftLeft(_to_java_column(col), numBits)) -@since(1.5) def shiftRight(col, numBits): """(Signed) shift the given value numBits right. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect() [Row(r=21)] """ @@ -1046,10 +1220,13 @@ def shiftRight(col, numBits): return Column(jc) -@since(1.5) def shiftRightUnsigned(col, numBits): """Unsigned shift the given value numBits right. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([(-42,)], ['a']) >>> df.select(shiftRightUnsigned('a', 1).alias('r')).collect() [Row(r=9223372036854775787)] @@ -1059,12 +1236,17 @@ def shiftRightUnsigned(col, numBits): return Column(jc) -@since(1.6) def spark_partition_id(): """A column for partition ID. - .. note:: This is indeterministic because it depends on data partitioning and task scheduling. + .. versionadded:: 1.6.0 + Notes + ----- + This is indeterministic because it depends on data partitioning and task scheduling. + + Examples + -------- >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect() [Row(pid=0), Row(pid=0)] """ @@ -1072,10 +1254,13 @@ def spark_partition_id(): return Column(sc._jvm.functions.spark_partition_id()) -@since(1.5) def expr(str): """Parses the expression string into the column that it represents + .. versionadded:: 1.5.0 + + Examples + -------- >>> df.select(expr("length(name)")).collect() [Row(length(name)=5), Row(length(name)=3)] """ @@ -1083,12 +1268,18 @@ def expr(str): return Column(sc._jvm.functions.expr(str)) -@since(1.4) def struct(*cols): """Creates a new struct column. - :param cols: list of column names (string) or list of :class:`Column` expressions + .. versionadded:: 1.4.0 + + Parameters + ---------- + cols : list, set, str or :class:`Column` + column names or :class:`Column`\\s to contain in the output struct. + Examples + -------- >>> df.select(struct('age', 'name').alias("struct")).collect() [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))] >>> df.select(struct([df.age, df.name]).alias("struct")).collect() @@ -1101,12 +1292,15 @@ def struct(*cols): return Column(jc) -@since(1.5) def greatest(*cols): """ Returns the greatest value of the list of column names, skipping null values. This function takes at least 2 parameters. It will return null iff all parameters are null. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c']) >>> df.select(greatest(df.a, df.b, df.c).alias("greatest")).collect() [Row(greatest=4)] @@ -1117,12 +1311,15 @@ def greatest(*cols): return Column(sc._jvm.functions.greatest(_to_seq(sc, cols, _to_java_column))) -@since(1.5) def least(*cols): """ Returns the least value of the list of column names, skipping null values. This function takes at least 2 parameters. It will return null iff all parameters are null. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c']) >>> df.select(least(df.a, df.b, df.c).alias("least")).collect() [Row(least=1)] @@ -1133,13 +1330,18 @@ def least(*cols): return Column(sc._jvm.functions.least(_to_seq(sc, cols, _to_java_column))) -@since(1.4) def when(condition, value): """Evaluates a list of conditions and returns one of multiple possible result expressions. If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. - :param condition: a boolean :class:`Column` expression. - :param value: a literal value, or a :class:`Column` expression. + .. versionadded:: 1.4.0 + + Parameters + ---------- + condition : :class:`Column` + a boolean :class:`Column` expression. + value : + a literal value, or a :class:`Column` expression. >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect() [Row(age=3), Row(age=4)] @@ -1155,12 +1357,15 @@ def when(condition, value): return Column(jc) -@since(1.5) def log(arg1, arg2=None): """Returns the first argument-based logarithm of the second argument. If there is only one argument, then this takes the natural logarithm of the argument. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df.select(log(10.0, df.age).alias('ten')).rdd.map(lambda l: str(l.ten)[:7]).collect() ['0.30102', '0.69897'] @@ -1175,10 +1380,13 @@ def log(arg1, arg2=None): return Column(jc) -@since(1.5) def log2(col): """Returns the base-2 logarithm of the argument. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect() [Row(log2=2.0)] """ @@ -1186,11 +1394,14 @@ def log2(col): return Column(sc._jvm.functions.log2(_to_java_column(col))) -@since(1.5) def conv(col, fromBase, toBase): """ Convert a number in a string column from one base to another. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([("010101",)], ['n']) >>> df.select(conv(df.n, 2, 16).alias('hex')).collect() [Row(hex='15')] @@ -1199,11 +1410,14 @@ def conv(col, fromBase, toBase): return Column(sc._jvm.functions.conv(_to_java_column(col), fromBase, toBase)) -@since(1.5) def factorial(col): """ Computes the factorial of the given value. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([(5,)], ['n']) >>> df.select(factorial(df.n).alias('f')).collect() [Row(f=120)] @@ -1214,7 +1428,6 @@ def factorial(col): # --------------- Window functions ------------------------ -@since(1.4) def lag(col, offset=1, default=None): """ Window function: returns the value that is `offset` rows before the current row, and @@ -1223,15 +1436,21 @@ def lag(col, offset=1, default=None): This is equivalent to the LAG function in SQL. - :param col: name of column or expression - :param offset: number of row to extend - :param default: default value + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + offset : int, optional + number of row to extend + default : optional + default value """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.lag(_to_java_column(col), offset, default)) -@since(1.4) def lead(col, offset=1, default=None): """ Window function: returns the value that is `offset` rows after the current row, and @@ -1240,15 +1459,21 @@ def lead(col, offset=1, default=None): This is equivalent to the LEAD function in SQL. - :param col: name of column or expression - :param offset: number of row to extend - :param default: default value + .. versionadded:: 1.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + offset : int, optional + number of row to extend + default : optional + default value """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default)) -@since(3.1) def nth_value(col, offset, ignoreNulls=False): """ Window function: returns the value that is the `offset`\\th row of the window frame @@ -1259,16 +1484,22 @@ def nth_value(col, offset, ignoreNulls=False): This is equivalent to the nth_value function in SQL. - :param col: name of column or expression - :param offset: number of row to use as the value - :param ignoreNulls: indicates the Nth value should skip null in the + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + offset : int, optional + number of row to use as the value + ignoreNulls : bool, optional + indicates the Nth value should skip null in the determination of which row to use """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.nth_value(_to_java_column(col), offset, ignoreNulls)) -@since(1.4) def ntile(n): """ Window function: returns the ntile group id (from 1 to `n` inclusive) @@ -1278,7 +1509,12 @@ def ntile(n): This is equivalent to the NTILE function in SQL. - :param n: an integer + .. versionadded:: 1.4.0 + + Parameters + ---------- + n : int + an integer """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.ntile(int(n))) @@ -1305,7 +1541,6 @@ def current_timestamp(): return Column(sc._jvm.functions.current_timestamp()) -@since(1.5) def date_format(date, format): """ Converts a date/timestamp/string to a value of string in the format specified by the date @@ -1315,9 +1550,15 @@ def date_format(date, format): pattern letters of `datetime pattern`_. can be used. .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - .. note:: Use when ever possible specialized functions like `year`. These benefit from a - specialized implementation. + .. versionadded:: 1.5.0 + + Notes + ----- + Whenever possible, use specialized functions like `year`. + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect() [Row(date='04/08/2015')] @@ -1326,11 +1567,14 @@ def date_format(date, format): return Column(sc._jvm.functions.date_format(_to_java_column(date), format)) -@since(1.5) def year(col): """ Extract the year of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(year('dt').alias('year')).collect() [Row(year=2015)] @@ -1339,11 +1583,14 @@ def year(col): return Column(sc._jvm.functions.year(_to_java_column(col))) -@since(1.5) def quarter(col): """ Extract the quarter of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(quarter('dt').alias('quarter')).collect() [Row(quarter=2)] @@ -1352,11 +1599,14 @@ def quarter(col): return Column(sc._jvm.functions.quarter(_to_java_column(col))) -@since(1.5) def month(col): """ Extract the month of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(month('dt').alias('month')).collect() [Row(month=4)] @@ -1365,11 +1615,14 @@ def month(col): return Column(sc._jvm.functions.month(_to_java_column(col))) -@since(2.3) def dayofweek(col): """ Extract the day of the week of a given date as integer. + .. versionadded:: 2.3.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(dayofweek('dt').alias('day')).collect() [Row(day=4)] @@ -1378,11 +1631,14 @@ def dayofweek(col): return Column(sc._jvm.functions.dayofweek(_to_java_column(col))) -@since(1.5) def dayofmonth(col): """ Extract the day of the month of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(dayofmonth('dt').alias('day')).collect() [Row(day=8)] @@ -1391,11 +1647,14 @@ def dayofmonth(col): return Column(sc._jvm.functions.dayofmonth(_to_java_column(col))) -@since(1.5) def dayofyear(col): """ Extract the day of the year of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(dayofyear('dt').alias('day')).collect() [Row(day=98)] @@ -1404,11 +1663,14 @@ def dayofyear(col): return Column(sc._jvm.functions.dayofyear(_to_java_column(col))) -@since(1.5) def hour(col): """ Extract the hours of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts']) >>> df.select(hour('ts').alias('hour')).collect() [Row(hour=13)] @@ -1417,11 +1679,14 @@ def hour(col): return Column(sc._jvm.functions.hour(_to_java_column(col))) -@since(1.5) def minute(col): """ Extract the minutes of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts']) >>> df.select(minute('ts').alias('minute')).collect() [Row(minute=8)] @@ -1430,11 +1695,14 @@ def minute(col): return Column(sc._jvm.functions.minute(_to_java_column(col))) -@since(1.5) def second(col): """ Extract the seconds of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts']) >>> df.select(second('ts').alias('second')).collect() [Row(second=15)] @@ -1443,11 +1711,14 @@ def second(col): return Column(sc._jvm.functions.second(_to_java_column(col))) -@since(1.5) def weekofyear(col): """ Extract the week number of a given date as integer. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(weekofyear(df.dt).alias('week')).collect() [Row(week=15)] @@ -1456,11 +1727,14 @@ def weekofyear(col): return Column(sc._jvm.functions.weekofyear(_to_java_column(col))) -@since(1.5) def date_add(start, days): """ Returns the date that is `days` days after `start` + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(date_add(df.dt, 1).alias('next_date')).collect() [Row(next_date=datetime.date(2015, 4, 9))] @@ -1469,11 +1743,14 @@ def date_add(start, days): return Column(sc._jvm.functions.date_add(_to_java_column(start), days)) -@since(1.5) def date_sub(start, days): """ Returns the date that is `days` days before `start` + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect() [Row(prev_date=datetime.date(2015, 4, 7))] @@ -1482,11 +1759,14 @@ def date_sub(start, days): return Column(sc._jvm.functions.date_sub(_to_java_column(start), days)) -@since(1.5) def datediff(end, start): """ Returns the number of days from `start` to `end`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2']) >>> df.select(datediff(df.d2, df.d1).alias('diff')).collect() [Row(diff=32)] @@ -1495,11 +1775,14 @@ def datediff(end, start): return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start))) -@since(1.5) def add_months(start, months): """ Returns the date that is `months` months after `start` + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> df.select(add_months(df.dt, 1).alias('next_month')).collect() [Row(next_month=datetime.date(2015, 5, 8))] @@ -1508,7 +1791,6 @@ def add_months(start, months): return Column(sc._jvm.functions.add_months(_to_java_column(start), months)) -@since(1.5) def months_between(date1, date2, roundOff=True): """ Returns number of months between dates date1 and date2. @@ -1517,6 +1799,10 @@ def months_between(date1, date2, roundOff=True): returns an integer (time of day will be ignored). The result is rounded off to 8 digits unless `roundOff` is set to `False`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2']) >>> df.select(months_between(df.date1, df.date2).alias('months')).collect() [Row(months=3.94959677)] @@ -1528,7 +1814,6 @@ def months_between(date1, date2, roundOff=True): _to_java_column(date1), _to_java_column(date2), roundOff)) -@since(2.2) def to_date(col, format=None): """Converts a :class:`Column` into :class:`pyspark.sql.types.DateType` using the optionally specified format. Specify formats according to `datetime pattern`_. @@ -1537,6 +1822,10 @@ def to_date(col, format=None): .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + .. versionadded:: 2.2.0 + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_date(df.t).alias('date')).collect() [Row(date=datetime.date(1997, 2, 28))] @@ -1553,7 +1842,6 @@ def to_date(col, format=None): return Column(jc) -@since(2.2) def to_timestamp(col, format=None): """Converts a :class:`Column` into :class:`pyspark.sql.types.TimestampType` using the optionally specified format. Specify formats according to `datetime pattern`_. @@ -1562,6 +1850,10 @@ def to_timestamp(col, format=None): .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + .. versionadded:: 2.2.0 + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_timestamp(df.t).alias('dt')).collect() [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))] @@ -1578,13 +1870,20 @@ def to_timestamp(col, format=None): return Column(jc) -@since(1.5) def trunc(date, format): """ Returns date truncated to the unit specified by the format. - :param format: 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm' + .. versionadded:: 1.5.0 + + Parameters + ---------- + date : :class:`Column` or str + format : str + 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm' + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28',)], ['d']) >>> df.select(trunc(df.d, 'year').alias('year')).collect() [Row(year=datetime.date(1997, 1, 1))] @@ -1595,14 +1894,21 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) -@since(2.3) def date_trunc(format, timestamp): """ Returns timestamp truncated to the unit specified by the format. - :param format: 'year', 'yyyy', 'yy', 'month', 'mon', 'mm', + .. versionadded:: 2.3.0 + + Parameters + ---------- + format : str + 'year', 'yyyy', 'yy', 'month', 'mon', 'mm', 'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter' + timestamp : :class:`Column` or str + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t']) >>> df.select(date_trunc('year', df.t).alias('year')).collect() [Row(year=datetime.datetime(1997, 1, 1, 0, 0))] @@ -1613,7 +1919,6 @@ def date_trunc(format, timestamp): return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp))) -@since(1.5) def next_day(date, dayOfWeek): """ Returns the first date which is later than the value of the date column. @@ -1621,6 +1926,10 @@ def next_day(date, dayOfWeek): Day of the week parameter is case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun". + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('2015-07-27',)], ['d']) >>> df.select(next_day(df.d, 'Sun').alias('date')).collect() [Row(date=datetime.date(2015, 8, 2))] @@ -1629,11 +1938,14 @@ def next_day(date, dayOfWeek): return Column(sc._jvm.functions.next_day(_to_java_column(date), dayOfWeek)) -@since(1.5) def last_day(date): """ Returns the last day of the month which the given date belongs to. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-10',)], ['d']) >>> df.select(last_day(df.d).alias('date')).collect() [Row(date=datetime.date(1997, 2, 28))] @@ -1642,13 +1954,16 @@ def last_day(date): return Column(sc._jvm.functions.last_day(_to_java_column(date))) -@since(1.5) def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"): """ Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string representing the timestamp of that moment in the current system time zone in the given format. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time']) >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect() @@ -1659,7 +1974,6 @@ def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"): return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format)) -@since(1.5) def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): """ Convert time string with given pattern ('yyyy-MM-dd HH:mm:ss', by default) @@ -1668,6 +1982,10 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): if `timestamp` is None, then it returns current timestamp. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt']) >>> time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect() @@ -1680,7 +1998,6 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): return Column(sc._jvm.functions.unix_timestamp(_to_java_column(timestamp), format)) -@since(1.5) def from_utc_timestamp(timestamp, tz): """ This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function @@ -1696,17 +2013,25 @@ def from_utc_timestamp(timestamp, tz): according to the timezone in the string, and finally display the result by converting the timestamp to string according to the session local timezone. - :param timestamp: the column that contains timestamps - :param tz: A string detailing the time zone ID that the input should be adjusted to. It should - be in the format of either region-based zone IDs or zone offsets. Region IDs must - have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in - the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are - supported as aliases of '+00:00'. Other short names are not recommended to use - because they can be ambiguous. - - .. versionchanged:: 2.4 - `tz` can take a :class:`Column` containing timezone ID strings. - + .. versionadded:: 1.5.0 + + Parameters + ---------- + timestamp : :class:`Column` or str + the column that contains timestamps + tz : :class:`Column` or str + A string detailing the time zone ID that the input should be adjusted to. It should + be in the format of either region-based zone IDs or zone offsets. Region IDs must + have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in + the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are + supported as aliases of '+00:00'. Other short names are not recommended to use + because they can be ambiguous. + + .. versionchanged:: 2.4 + `tz` can take a :class:`Column` containing timezone ID strings. + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz']) >>> df.select(from_utc_timestamp(df.ts, "PST").alias('local_time')).collect() [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))] @@ -1719,7 +2044,6 @@ def from_utc_timestamp(timestamp, tz): return Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz)) -@since(1.5) def to_utc_timestamp(timestamp, tz): """ This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function @@ -1735,17 +2059,25 @@ def to_utc_timestamp(timestamp, tz): according to the timezone in the string, and finally display the result by converting the timestamp to string according to the session local timezone. - :param timestamp: the column that contains timestamps - :param tz: A string detailing the time zone ID that the input should be adjusted to. It should - be in the format of either region-based zone IDs or zone offsets. Region IDs must - have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in - the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are - supported as aliases of '+00:00'. Other short names are not recommended to use - because they can be ambiguous. - - .. versionchanged:: 2.4 - `tz` can take a :class:`Column` containing timezone ID strings. - + .. versionadded:: 1.5.0 + + Parameters + ---------- + timestamp : :class:`Column` or str + the column that contains timestamps + tz : :class:`Column` or str + A string detailing the time zone ID that the input should be adjusted to. It should + be in the format of either region-based zone IDs or zone offsets. Region IDs must + have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in + the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are + upported as aliases of '+00:00'. Other short names are not recommended to use + because they can be ambiguous. + + .. versionchanged:: 2.4.0 + `tz` can take a :class:`Column` containing timezone ID strings. + + Examples + -------- >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz']) >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect() [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))] @@ -1758,9 +2090,12 @@ def to_utc_timestamp(timestamp, tz): return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz)) -@since(3.1) def timestamp_seconds(col): """ + .. versionadded:: 3.1.0 + + Examples + -------- >>> from pyspark.sql.functions import timestamp_seconds >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time']) @@ -1777,7 +2112,6 @@ def timestamp_seconds(col): return Column(sc._jvm.functions.timestamp_seconds(_to_java_column(col))) -@since(2.0) def window(timeColumn, windowDuration, slideDuration=None, startTime=None): """Bucketize rows into one or more time windows given a timestamp specifying column. Window starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window @@ -1797,6 +2131,10 @@ def window(timeColumn, windowDuration, slideDuration=None, startTime=None): The output column will be a struct called 'window' by default with the nested columns 'start' and 'end', where 'start' and 'end' will be of :class:`pyspark.sql.types.TimestampType`. + .. versionadded:: 2.0.0 + + Examples + -------- >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val") >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum")) >>> w.select(w.window.start.cast("string").alias("start"), @@ -1827,12 +2165,15 @@ def check_string_field(field, fieldName): # ---------------------------- misc functions ---------------------------------- -@since(1.5) def crc32(col): """ Calculates the cyclic redundancy check value (CRC32) of a binary column and returns the value as a bigint. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect() [Row(crc32=2743272264)] """ @@ -1840,10 +2181,13 @@ def crc32(col): return Column(sc._jvm.functions.crc32(_to_java_column(col))) -@since(1.5) def md5(col): """Calculates the MD5 digest and returns the value as a 32 character hex string. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect() [Row(hash='902fbdd2b1df0c4f70b4a5d23525e932')] """ @@ -1852,10 +2196,13 @@ def md5(col): return Column(jc) -@since(1.5) def sha1(col): """Returns the hex string result of SHA-1. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect() [Row(hash='3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')] """ @@ -1864,12 +2211,15 @@ def sha1(col): return Column(jc) -@since(1.5) def sha2(col, numBits): """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, 512, or 0 (which is equivalent to 256). + .. versionadded:: 1.5.0 + + Examples + -------- >>> digests = df.select(sha2(df.name, 256).alias('s')).collect() >>> digests[0] Row(s='3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043') @@ -1881,10 +2231,13 @@ def sha2(col, numBits): return Column(jc) -@since(2.0) def hash(*cols): """Calculates the hash code of given columns, and returns the result as an int column. + .. versionadded:: 2.0.0 + + Examples + -------- >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect() [Row(hash=-757602832)] """ @@ -1893,11 +2246,14 @@ def hash(*cols): return Column(jc) -@since(3.0) def xxhash64(*cols): """Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm, and returns the result as a long column. + .. versionadded:: 3.0.0 + + Examples + -------- >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect() [Row(hash=4105715581806190027)] """ @@ -1906,12 +2262,15 @@ def xxhash64(*cols): return Column(jc) -@since(3.1) def assert_true(col, errMsg=None): """ Returns null if the input column is true; throws an exception with the provided error message otherwise. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df = spark.createDataFrame([(0,1)], ['a', 'b']) >>> df.select(assert_true(df.a < df.b).alias('r')).collect() [Row(r=None)] @@ -2023,12 +2382,15 @@ def trim(col): return _invoke_function_over_column("trim", col) -@since(1.5) def concat_ws(sep, *cols): """ Concatenates multiple input string columns together into a single string column, using the given separator. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd']) >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect() [Row(s='abcd-123')] @@ -2057,14 +2419,19 @@ def encode(col, charset): return Column(sc._jvm.functions.encode(_to_java_column(col), charset)) -@since(1.5) def format_number(col, d): """ Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places with HALF_EVEN round mode, and returns the result as a string. - :param col: the column name of the numeric value to be formatted - :param d: the N decimal places + .. versionadded:: 1.5.0 + + Parameters + ---------- + col : :class:`Column` or str + the column name of the numeric value to be formatted + d : int + the N decimal places >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect() [Row(v='5.0000')] @@ -2073,15 +2440,21 @@ def format_number(col, d): return Column(sc._jvm.functions.format_number(_to_java_column(col), d)) -@since(1.5) def format_string(format, *cols): """ Formats the arguments in printf-style and returns the result as a string column. - :param format: string that can contain embedded format tags and used as result column's value - :param cols: list of column names (string) or list of :class:`Column` expressions to - be used in formatting + .. versionadded:: 1.5.0 + + Parameters + ---------- + format : str + string that can contain embedded format tags and used as result column's value + cols : :class:`Column` or str + column names or :class:`Column`\\s to be used in formatting + Examples + -------- >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b']) >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect() [Row(v='5 hello')] @@ -2090,14 +2463,17 @@ def format_string(format, *cols): return Column(sc._jvm.functions.format_string(format, _to_seq(sc, cols, _to_java_column))) -@since(1.5) def instr(str, substr): """ Locate the position of the first occurrence of substr column in the given string. Returns null if either of the arguments are null. - .. note:: The position is not zero based, but 1 based index. Returns 0 if substr - could not be found in str. + .. versionadded:: 1.5.0 + + Notes + ----- + The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(instr(df.s, 'b').alias('s')).collect() @@ -2107,12 +2483,15 @@ def instr(str, substr): return Column(sc._jvm.functions.instr(_to_java_column(str), substr)) -@since(3.0) def overlay(src, replace, pos, len=-1): """ Overlay the specified portion of `src` with `replace`, starting from byte position `pos` of `src` and proceeding for `len` bytes. + .. versionadded:: 3.0.0 + + Examples + -------- >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y")) >>> df.select(overlay("x", "y", 7).alias("overlayed")).show() +----------+ @@ -2141,15 +2520,20 @@ def overlay(src, replace, pos, len=-1): )) -@since(1.5) def substring(str, pos, len): """ Substring starts at `pos` and is of length `len` when str is String type or returns the slice of byte array that starts at `pos` in byte and is of length `len` when str is Binary type. - .. note:: The position is not zero based, but 1 based index. + .. versionadded:: 1.5.0 + Notes + ----- + The position is not zero based, but 1 based index. + + Examples + -------- >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(substring(df.s, 1, 2).alias('s')).collect() [Row(s='ab')] @@ -2158,7 +2542,6 @@ def substring(str, pos, len): return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len)) -@since(1.5) def substring_index(str, delim, count): """ Returns the substring from string str before count occurrences of the delimiter delim. @@ -2166,6 +2549,10 @@ def substring_index(str, delim, count): returned. If count is negative, every to the right of the final delimiter (counting from the right) is returned. substring_index performs a case-sensitive match when searching for delim. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('a.b.c.d',)], ['s']) >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect() [Row(s='a.b')] @@ -2176,10 +2563,13 @@ def substring_index(str, delim, count): return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count)) -@since(1.5) def levenshtein(left, right): """Computes the Levenshtein distance of the two given strings. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r']) >>> df0.select(levenshtein('l', 'r').alias('d')).collect() [Row(d=3)] @@ -2189,18 +2579,28 @@ def levenshtein(left, right): return Column(jc) -@since(1.5) def locate(substr, str, pos=1): """ Locate the position of the first occurrence of substr in a string column, after position pos. - .. note:: The position is not zero based, but 1 based index. Returns 0 if substr - could not be found in str. + .. versionadded:: 1.5.0 + + Parameters + ---------- + substr : str + a string + str : :class:`Column` or str + a Column of :class:`pyspark.sql.types.StringType` + pos : int, optional + start position (zero based) - :param substr: a string - :param str: a Column of :class:`pyspark.sql.types.StringType` - :param pos: start position (zero based) + Notes + ----- + The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. + Examples + -------- >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(locate('b', df.s, 1).alias('s')).collect() [Row(s=2)] @@ -2209,11 +2609,14 @@ def locate(substr, str, pos=1): return Column(sc._jvm.functions.locate(substr, _to_java_column(str), pos)) -@since(1.5) def lpad(col, len, pad): """ Left-pad the string column to width `len` with `pad`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(lpad(df.s, 6, '#').alias('s')).collect() [Row(s='##abcd')] @@ -2222,11 +2625,14 @@ def lpad(col, len, pad): return Column(sc._jvm.functions.lpad(_to_java_column(col), len, pad)) -@since(1.5) def rpad(col, len, pad): """ Right-pad the string column to width `len` with `pad`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(rpad(df.s, 6, '#').alias('s')).collect() [Row(s='abcd##')] @@ -2235,11 +2641,14 @@ def rpad(col, len, pad): return Column(sc._jvm.functions.rpad(_to_java_column(col), len, pad)) -@since(1.5) def repeat(col, n): """ Repeats a string column n times, and returns it as a new string column. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('ab',)], ['s',]) >>> df.select(repeat(df.s, 3).alias('s')).collect() [Row(s='ababab')] @@ -2248,15 +2657,21 @@ def repeat(col, n): return Column(sc._jvm.functions.repeat(_to_java_column(col), n)) -@since(1.5) def split(str, pattern, limit=-1): """ Splits str around matches of the given pattern. - :param str: a string expression to split - :param pattern: a string representing a regular expression. The regex string should be + .. versionadded:: 1.5.0 + + Parameters + ---------- + str : :class:`Column` or str + a string expression to split + pattern : str + a string representing a regular expression. The regex string should be a Java regular expression. - :param limit: an integer which controls the number of times `pattern` is applied. + limit : int, optional + an integer which controls the number of times `pattern` is applied. * ``limit > 0``: The resulting array's length will not be more than `limit`, and the resulting array's last entry will contain all input beyond the last @@ -2264,9 +2679,11 @@ def split(str, pattern, limit=-1): * ``limit <= 0``: `pattern` will be applied as many times as possible, and the resulting array can be of any size. - .. versionchanged:: 3.0 - `split` now takes an optional `limit` field. If not provided, default limit value is -1. + .. versionchanged:: 3.0 + `split` now takes an optional `limit` field. If not provided, default limit value is -1. + Examples + -------- >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',]) >>> df.select(split(df.s, '[ABC]', 2).alias('s')).collect() [Row(s=['one', 'twoBthreeC'])] @@ -2277,11 +2694,14 @@ def split(str, pattern, limit=-1): return Column(sc._jvm.functions.split(_to_java_column(str), pattern, limit)) -@since(1.5) def regexp_extract(str, pattern, idx): r"""Extract a specific group matched by a Java regex, from the specified string column. If the regex did not match, or the specified group did not match, an empty string is returned. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('100-200',)], ['str']) >>> df.select(regexp_extract('str', r'(\d+)-(\d+)', 1).alias('d')).collect() [Row(d='100')] @@ -2297,10 +2717,13 @@ def regexp_extract(str, pattern, idx): return Column(jc) -@since(1.5) def regexp_replace(str, pattern, replacement): r"""Replace all substrings of the specified string value that match regexp with rep. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('100-200',)], ['str']) >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect() [Row(d='-----')] @@ -2310,10 +2733,13 @@ def regexp_replace(str, pattern, replacement): return Column(jc) -@since(1.5) def initcap(col): """Translate the first letter of each word to upper case in the sentence. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect() [Row(v='Ab Cd')] """ @@ -2321,11 +2747,14 @@ def initcap(col): return Column(sc._jvm.functions.initcap(_to_java_column(col))) -@since(1.5) def soundex(col): """ Returns the SoundEx encoding for a string + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name']) >>> df.select(soundex(df.name).alias("soundex")).collect() [Row(soundex='P362'), Row(soundex='U612')] @@ -2334,10 +2763,13 @@ def soundex(col): return Column(sc._jvm.functions.soundex(_to_java_column(col))) -@since(1.5) def bin(col): """Returns the string representation of the binary value of the given column. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df.select(bin(df.age).alias('c')).collect() [Row(c='10'), Row(c='101')] """ @@ -2346,12 +2778,15 @@ def bin(col): return Column(jc) -@since(1.5) def hex(col): """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`, :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or :class:`pyspark.sql.types.LongType`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect() [Row(hex(a)='414243', hex(b)='3')] """ @@ -2360,11 +2795,14 @@ def hex(col): return Column(jc) -@since(1.5) def unhex(col): """Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte representation of number. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect() [Row(unhex(a)=bytearray(b'ABC'))] """ @@ -2372,12 +2810,15 @@ def unhex(col): return Column(sc._jvm.functions.unhex(_to_java_column(col))) -@since(1.5) def length(col): """Computes the character length of string data or number of bytes of binary data. The length of character data includes the trailing spaces. The length of binary data includes binary zeros. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect() [Row(length=4)] """ @@ -2385,13 +2826,16 @@ def length(col): return Column(sc._jvm.functions.length(_to_java_column(col))) -@since(1.5) def translate(srcCol, matching, replace): """A function translate any character in the `srcCol` by a character in `matching`. The characters in `replace` is corresponding to the characters in `matching`. The translate will happen when any character in the string matching with the character in the `matching`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a', "rnlt", "123") \\ ... .alias('r')).collect() [Row(r='1a2s3ae')] @@ -2402,13 +2846,19 @@ def translate(srcCol, matching, replace): # ---------------------- Collection functions ------------------------------ -@since(2.0) def create_map(*cols): """Creates a new map column. - :param cols: list of column names (string) or list of :class:`Column` expressions that are + .. versionadded:: 2.0.0 + + Parameters + ---------- + cols : :class:`Column` or str + column names or :class:`Column`\\s that are grouped as key-value pairs, e.g. (key1, value1, key2, value2, ...). + Examples + -------- >>> df.select(create_map('name', 'age').alias("map")).collect() [Row(map={'Alice': 2}), Row(map={'Bob': 5})] >>> df.select(create_map([df.name, df.age]).alias("map")).collect() @@ -2421,13 +2871,20 @@ def create_map(*cols): return Column(jc) -@since(2.4) def map_from_arrays(col1, col2): """Creates a new map from two arrays. - :param col1: name of column containing a set of keys. All elements should not be null - :param col2: name of column containing a set of values + .. versionadded:: 2.4.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of column containing a set of keys. All elements should not be null + col2 : :class:`Column` or str + name of column containing a set of values + Examples + -------- >>> df = spark.createDataFrame([([2, 5], ['a', 'b'])], ['k', 'v']) >>> df.select(map_from_arrays(df.k, df.v).alias("map")).show() +----------------+ @@ -2440,13 +2897,19 @@ def map_from_arrays(col1, col2): return Column(sc._jvm.functions.map_from_arrays(_to_java_column(col1), _to_java_column(col2))) -@since(1.4) def array(*cols): """Creates a new array column. - :param cols: list of column names (string) or list of :class:`Column` expressions that have + .. versionadded:: 1.4.0 + + Parameters + ---------- + cols : :class:`Column` or str + column names or :class:`Column`\\s that have the same data type. + Examples + -------- >>> df.select(array('age', 'age').alias("arr")).collect() [Row(arr=[2, 2]), Row(arr=[5, 5])] >>> df.select(array([df.age, df.age]).alias("arr")).collect() @@ -2459,15 +2922,22 @@ def array(*cols): return Column(jc) -@since(1.5) def array_contains(col, value): """ Collection function: returns null if the array is null, true if the array contains the given value, and false otherwise. - :param col: name of column containing array - :param value: value or column to check for in array + .. versionadded:: 1.5.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column containing array + value : + value or column to check for in array + Examples + -------- >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data']) >>> df.select(array_contains(df.data, "a")).collect() [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)] @@ -2479,13 +2949,16 @@ def array_contains(col, value): return Column(sc._jvm.functions.array_contains(_to_java_column(col), value)) -@since(2.4) def arrays_overlap(a1, a2): """ Collection function: returns true if the arrays contain any common non-null element; if not, returns null if both the arrays are non-empty and any of them contains a null element; returns false otherwise. + .. versionadded:: 2.4.0 + + Examples + -------- >>> df = spark.createDataFrame([(["a", "b"], ["b", "c"]), (["a"], ["b", "c"])], ['x', 'y']) >>> df.select(arrays_overlap(df.x, df.y).alias("overlap")).collect() [Row(overlap=True), Row(overlap=False)] @@ -2494,16 +2967,24 @@ def arrays_overlap(a1, a2): return Column(sc._jvm.functions.arrays_overlap(_to_java_column(a1), _to_java_column(a2))) -@since(2.4) def slice(x, start, length): """ Collection function: returns an array containing all the elements in `x` from index `start` (array indices start at 1, or from the end if `start` is negative) with the specified `length`. - :param x: the array to be sliced - :param start: the starting index - :param length: the length of the slice + .. versionadded:: 2.4.0 + + Parameters + ---------- + x : :class:`Column` or str + the array to be sliced + start : :class:`Column` or int + the starting index + length : :class:`Column` or int + the length of the slice + Examples + -------- >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x']) >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect() [Row(sliced=[2, 3]), Row(sliced=[5])] @@ -2516,12 +2997,15 @@ def slice(x, start, length): )) -@since(2.4) def array_join(col, delimiter, null_replacement=None): """ Concatenates the elements of `column` using the `delimiter`. Null values are replaced with `null_replacement` if set, otherwise they are ignored. + .. versionadded:: 2.4.0 + + Examples + -------- >>> df = spark.createDataFrame([(["a", "b", "c"],), (["a", None],)], ['data']) >>> df.select(array_join(df.data, ",").alias("joined")).collect() [Row(joined='a,b,c'), Row(joined='a')] @@ -2536,12 +3020,15 @@ def array_join(col, delimiter, null_replacement=None): _to_java_column(col), delimiter, null_replacement)) -@since(1.5) def concat(*cols): """ Concatenates multiple input columns together into a single column. The function works with strings, binary and compatible array columns. + .. versionadded:: 1.5.0 + + Examples + -------- >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd']) >>> df.select(concat(df.s, df.d).alias('s')).collect() [Row(s='abcd123')] @@ -2554,15 +3041,20 @@ def concat(*cols): return Column(sc._jvm.functions.concat(_to_seq(sc, cols, _to_java_column))) -@since(2.4) def array_position(col, value): """ Collection function: Locates the position of the first occurrence of the given value in the given array. Returns null if either of the arguments are null. - .. note:: The position is not zero based, but 1 based index. Returns 0 if the given - value could not be found in the array. + .. versionadded:: 2.4.0 + Notes + ----- + The position is not zero based, but 1 based index. Returns 0 if the given + value could not be found in the array. + + Examples + -------- >>> df = spark.createDataFrame([(["c", "b", "a"],), ([],)], ['data']) >>> df.select(array_position(df.data, "a")).collect() [Row(array_position(data, a)=3), Row(array_position(data, a)=0)] @@ -2571,17 +3063,26 @@ def array_position(col, value): return Column(sc._jvm.functions.array_position(_to_java_column(col), value)) -@since(2.4) def element_at(col, extraction): """ Collection function: Returns element of array at given index in extraction if col is array. Returns value for the given key in extraction if col is map. - :param col: name of column containing array or map - :param extraction: index to check for in array or key to check for in map + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column containing array or map + extraction : + index to check for in array or key to check for in map - .. note:: The position is not zero based, but 1 based index. + Notes + ----- + The position is not zero based, but 1 based index. + Examples + -------- >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data']) >>> df.select(element_at(df.data, 1)).collect() [Row(element_at(data, 1)='a'), Row(element_at(data, 1)=None)] @@ -2595,14 +3096,21 @@ def element_at(col, extraction): _to_java_column(col), lit(extraction)._jc)) -@since(2.4) def array_remove(col, element): """ Collection function: Remove all elements that equal to element from the given array. - :param col: name of column containing array - :param element: element to be removed from the array + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column containing array + element : + element to be removed from the array + Examples + -------- >>> df = spark.createDataFrame([([1, 2, 3, 1, 1],), ([],)], ['data']) >>> df.select(array_remove(df.data, 1)).collect() [Row(array_remove(data, 1)=[2, 3]), Row(array_remove(data, 1)=[])] @@ -2611,13 +3119,19 @@ def array_remove(col, element): return Column(sc._jvm.functions.array_remove(_to_java_column(col), element)) -@since(2.4) def array_distinct(col): """ Collection function: removes duplicate values from the array. - :param col: name of column or expression + .. versionadded:: 2.4.0 + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + + Examples + -------- >>> df = spark.createDataFrame([([1, 2, 3, 2],), ([4, 5, 5, 4],)], ['data']) >>> df.select(array_distinct(df.data)).collect() [Row(array_distinct(data)=[1, 2, 3]), Row(array_distinct(data)=[4, 5])] @@ -2626,15 +3140,22 @@ def array_distinct(col): return Column(sc._jvm.functions.array_distinct(_to_java_column(col))) -@since(2.4) def array_intersect(col1, col2): """ Collection function: returns an array of the elements in the intersection of col1 and col2, without duplicates. - :param col1: name of column containing array - :param col2: name of column containing array + .. versionadded:: 2.4.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of column containing array + col2 : :class:`Column` or str + name of column containing array + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])]) >>> df.select(array_intersect(df.c1, df.c2)).collect() @@ -2644,15 +3165,22 @@ def array_intersect(col1, col2): return Column(sc._jvm.functions.array_intersect(_to_java_column(col1), _to_java_column(col2))) -@since(2.4) def array_union(col1, col2): """ Collection function: returns an array of the elements in the union of col1 and col2, without duplicates. - :param col1: name of column containing array - :param col2: name of column containing array + .. versionadded:: 2.4.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of column containing array + col2 : :class:`Column` or str + name of column containing array + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])]) >>> df.select(array_union(df.c1, df.c2)).collect() @@ -2662,15 +3190,22 @@ def array_union(col1, col2): return Column(sc._jvm.functions.array_union(_to_java_column(col1), _to_java_column(col2))) -@since(2.4) def array_except(col1, col2): """ Collection function: returns an array of the elements in col1 but not in col2, without duplicates. - :param col1: name of column containing array - :param col2: name of column containing array + .. versionadded:: 2.4.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of column containing array + col2 : :class:`Column` or str + name of column containing array + Examples + -------- >>> from pyspark.sql import Row >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])]) >>> df.select(array_except(df.c1, df.c2)).collect() @@ -2680,13 +3215,16 @@ def array_except(col1, col2): return Column(sc._jvm.functions.array_except(_to_java_column(col1), _to_java_column(col2))) -@since(1.4) def explode(col): """ Returns a new row for each element in the given array or map. Uses the default column name `col` for elements in the array and `key` and `value` for elements in the map unless specified otherwise. + .. versionadded:: 1.4.0 + + Examples + -------- >>> from pyspark.sql import Row >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})]) >>> eDF.select(explode(eDF.intlist).alias("anInt")).collect() @@ -2704,13 +3242,16 @@ def explode(col): return Column(jc) -@since(2.1) def posexplode(col): """ Returns a new row for each element with position in the given array or map. Uses the default column name `pos` for position, and `col` for elements in the array and `key` and `value` for elements in the map unless specified otherwise. + .. versionadded:: 2.1.0 + + Examples + -------- >>> from pyspark.sql import Row >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})]) >>> eDF.select(posexplode(eDF.intlist)).collect() @@ -2728,7 +3269,6 @@ def posexplode(col): return Column(jc) -@since(2.3) def explode_outer(col): """ Returns a new row for each element in the given array or map. @@ -2736,6 +3276,10 @@ def explode_outer(col): Uses the default column name `col` for elements in the array and `key` and `value` for elements in the map unless specified otherwise. + .. versionadded:: 2.3.0 + + Examples + -------- >>> df = spark.createDataFrame( ... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)], ... ("id", "an_array", "a_map") @@ -2764,7 +3308,6 @@ def explode_outer(col): return Column(jc) -@since(2.3) def posexplode_outer(col): """ Returns a new row for each element with position in the given array or map. @@ -2772,6 +3315,10 @@ def posexplode_outer(col): Uses the default column name `pos` for position, and `col` for elements in the array and `key` and `value` for elements in the map unless specified otherwise. + .. versionadded:: 2.3.0 + + Examples + -------- >>> df = spark.createDataFrame( ... [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)], ... ("id", "an_array", "a_map") @@ -2799,15 +3346,22 @@ def posexplode_outer(col): return Column(jc) -@since(1.6) def get_json_object(col, path): """ Extracts json object from a json string based on json path specified, and returns json string of the extracted json object. It will return null if the input json string is invalid. - :param col: string column in json format - :param path: path to the json object to extract + .. versionadded:: 1.6.0 + + Parameters + ---------- + col : :class:`Column` or str + string column in json format + path : str + path to the json object to extract + Examples + -------- >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')] >>> df = spark.createDataFrame(data, ("key", "jstring")) >>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \\ @@ -2819,13 +3373,20 @@ def get_json_object(col, path): return Column(jc) -@since(1.6) def json_tuple(col, *fields): """Creates a new row for a json column according to the given field names. - :param col: string column in json format - :param fields: list of fields to extract + .. versionadded:: 1.6.0 + + Parameters + ---------- + col : :class:`Column` or str + string column in json format + fields : str + fields to extract + Examples + -------- >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')] >>> df = spark.createDataFrame(data, ("key", "jstring")) >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect() @@ -2836,19 +3397,28 @@ def json_tuple(col, *fields): return Column(jc) -@since(2.1) def from_json(col, schema, options={}): """ Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType` as keys type, :class:`StructType` or :class:`ArrayType` with the specified schema. Returns `null`, in the case of an unparseable string. - :param col: string column in json format - :param schema: a StructType or ArrayType of StructType to use when parsing the json column. - :param options: options to control parsing. accepts the same options as the json datasource + .. versionadded:: 2.1.0 - .. note:: Since Spark 2.3, the DDL-formatted string is also supported for ``schema``. + Parameters + ---------- + col : :class:`Column` or str + string column in json format + schema : :class:`DataType` or str + a StructType or ArrayType of StructType to use when parsing the json column. + .. versionchanged:: 2.3 + the DDL-formatted string is also supported for ``schema``. + options : dict, optional + options to control parsing. accepts the same options as the json datasource + + Examples + -------- >>> from pyspark.sql.types import * >>> data = [(1, '''{"a": 1}''')] >>> schema = StructType([StructField("a", IntegerType())]) @@ -2883,17 +3453,24 @@ def from_json(col, schema, options={}): return Column(jc) -@since(2.1) def to_json(col, options={}): """ Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType` into a JSON string. Throws an exception, in the case of an unsupported type. - :param col: name of column containing a struct, an array or a map. - :param options: options to control converting. accepts the same options as the JSON datasource. - Additionally the function supports the `pretty` option which enables - pretty JSON generation. + .. versionadded:: 2.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column containing a struct, an array or a map. + options : dict, optional + options to control converting. accepts the same options as the JSON datasource. + Additionally the function supports the `pretty` option which enables + pretty JSON generation. + Examples + -------- >>> from pyspark.sql import Row >>> from pyspark.sql.types import * >>> data = [(1, Row(age=2, name='Alice'))] @@ -2923,17 +3500,24 @@ def to_json(col, options={}): return Column(jc) -@since(2.4) def schema_of_json(json, options={}): """ Parses a JSON string and infers its schema in DDL format. - :param json: a JSON string or a string literal containing a JSON string. - :param options: options to control parsing. accepts the same options as the JSON datasource + .. versionadded:: 2.4.0 - .. versionchanged:: 3.0 - It accepts `options` parameter to control schema inferring. + Parameters + ---------- + json : :class:`Column` or str + a JSON string or a string literal containing a JSON string. + options : dict, optional + options to control parsing. accepts the same options as the JSON datasource + .. versionchanged:: 3.0 + It accepts `options` parameter to control schema inferring. + + Examples + -------- >>> df = spark.range(1) >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect() [Row(json='STRUCT<`a`: BIGINT>')] @@ -2953,14 +3537,21 @@ def schema_of_json(json, options={}): return Column(jc) -@since(3.0) def schema_of_csv(csv, options={}): """ Parses a CSV string and infers its schema in DDL format. - :param col: a CSV string or a string literal containing a CSV string. - :param options: options to control parsing. accepts the same options as the CSV datasource + .. versionadded:: 3.0.0 + + Parameters + ---------- + csv : :class:`Column` or str + a CSV string or a string literal containing a CSV string. + options : dict, optional + options to control parsing. accepts the same options as the CSV datasource + Examples + -------- >>> df = spark.range(1) >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect() [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')] @@ -2979,15 +3570,22 @@ def schema_of_csv(csv, options={}): return Column(jc) -@since(3.0) def to_csv(col, options={}): """ Converts a column containing a :class:`StructType` into a CSV string. Throws an exception, in the case of an unsupported type. - :param col: name of column containing a struct. - :param options: options to control converting. accepts the same options as the CSV datasource. + .. versionadded:: 3.0.0 + Parameters + ---------- + col : :class:`Column` or str + name of column containing a struct. + options: dict, optional + options to control converting. accepts the same options as the CSV datasource. + + Examples + -------- >>> from pyspark.sql import Row >>> data = [(1, Row(age=2, name='Alice'))] >>> df = spark.createDataFrame(data, ("key", "value")) @@ -3000,13 +3598,19 @@ def to_csv(col, options={}): return Column(jc) -@since(1.5) def size(col): """ Collection function: returns the length of the array or map stored in the column. - :param col: name of column or expression + .. versionadded:: 1.5.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> df = spark.createDataFrame([([1, 2, 3],),([1],),([],)], ['data']) >>> df.select(size(df.data)).collect() [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)] @@ -3015,13 +3619,19 @@ def size(col): return Column(sc._jvm.functions.size(_to_java_column(col))) -@since(2.4) def array_min(col): """ Collection function: returns the minimum value of the array. - :param col: name of column or expression + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data']) >>> df.select(array_min(df.data).alias('min')).collect() [Row(min=1), Row(min=-1)] @@ -3030,13 +3640,19 @@ def array_min(col): return Column(sc._jvm.functions.array_min(_to_java_column(col))) -@since(2.4) def array_max(col): """ Collection function: returns the maximum value of the array. - :param col: name of column or expression + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data']) >>> df.select(array_max(df.data).alias('max')).collect() [Row(max=3), Row(max=10)] @@ -3045,7 +3661,6 @@ def array_max(col): return Column(sc._jvm.functions.array_max(_to_java_column(col))) -@since(1.5) def sort_array(col, asc=True): """ Collection function: sorts the input array in ascending or descending order according @@ -3053,8 +3668,16 @@ def sort_array(col, asc=True): of the returned array in ascending order or at the end of the returned array in descending order. - :param col: name of column or expression + .. versionadded:: 1.5.0 + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + asc : bool, optional + + Examples + -------- >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data']) >>> df.select(sort_array(df.data).alias('r')).collect() [Row(r=[None, 1, 2, 3]), Row(r=[1]), Row(r=[])] @@ -3065,14 +3688,20 @@ def sort_array(col, asc=True): return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc)) -@since(2.4) def array_sort(col): """ Collection function: sorts the input array in ascending order. The elements of the input array must be orderable. Null elements will be placed at the end of the returned array. - :param col: name of column or expression + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data']) >>> df.select(array_sort(df.data).alias('r')).collect() [Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])] @@ -3081,15 +3710,23 @@ def array_sort(col): return Column(sc._jvm.functions.array_sort(_to_java_column(col))) -@since(2.4) def shuffle(col): """ Collection function: Generates a random permutation of the given array. - .. note:: The function is non-deterministic. + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression - :param col: name of column or expression + Notes + ----- + The function is non-deterministic. + Examples + -------- >>> df = spark.createDataFrame([([1, 20, 3, 5],), ([1, 20, None, 3],)], ['data']) >>> df.select(shuffle(df.data).alias('s')).collect() # doctest: +SKIP [Row(s=[3, 1, 5, 20]), Row(s=[20, None, 3, 1])] @@ -3098,13 +3735,19 @@ def shuffle(col): return Column(sc._jvm.functions.shuffle(_to_java_column(col))) -@since(1.5) def reverse(col): """ Collection function: returns a reversed string or an array with reverse order of elements. - :param col: name of column or expression + .. versionadded:: 1.5.0 + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + + Examples + -------- >>> df = spark.createDataFrame([('Spark SQL',)], ['data']) >>> df.select(reverse(df.data).alias('s')).collect() [Row(s='LQS krapS')] @@ -3116,15 +3759,21 @@ def reverse(col): return Column(sc._jvm.functions.reverse(_to_java_column(col))) -@since(2.4) def flatten(col): """ Collection function: creates a single array from an array of arrays. If a structure of nested arrays is deeper than two levels, only one level of nesting is removed. - :param col: name of column or expression + .. versionadded:: 2.4.0 + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + + Examples + -------- >>> df = spark.createDataFrame([([[1, 2, 3], [4, 5], [6]],), ([None, [4, 5]],)], ['data']) >>> df.select(flatten(df.data).alias('r')).collect() [Row(r=[1, 2, 3, 4, 5, 6]), Row(r=None)] @@ -3133,13 +3782,19 @@ def flatten(col): return Column(sc._jvm.functions.flatten(_to_java_column(col))) -@since(2.3) def map_keys(col): """ Collection function: Returns an unordered array containing the keys of the map. - :param col: name of column or expression + .. versionadded:: 2.3.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> from pyspark.sql.functions import map_keys >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data") >>> df.select(map_keys("data").alias("keys")).show() @@ -3153,13 +3808,19 @@ def map_keys(col): return Column(sc._jvm.functions.map_keys(_to_java_column(col))) -@since(2.3) def map_values(col): """ Collection function: Returns an unordered array containing the values of the map. - :param col: name of column or expression + .. versionadded:: 2.3.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> from pyspark.sql.functions import map_values >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data") >>> df.select(map_values("data").alias("values")).show() @@ -3173,13 +3834,19 @@ def map_values(col): return Column(sc._jvm.functions.map_values(_to_java_column(col))) -@since(3.0) def map_entries(col): """ Collection function: Returns an unordered array of all entries in the given map. - :param col: name of column or expression + .. versionadded:: 3.0.0 + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + + Examples + -------- >>> from pyspark.sql.functions import map_entries >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data") >>> df.select(map_entries("data").alias("entries")).show() @@ -3193,13 +3860,19 @@ def map_entries(col): return Column(sc._jvm.functions.map_entries(_to_java_column(col))) -@since(2.4) def map_from_entries(col): """ Collection function: Returns a map created from the given array of entries. - :param col: name of column or expression + .. versionadded:: 2.4.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + Examples + -------- >>> from pyspark.sql.functions import map_from_entries >>> df = spark.sql("SELECT array(struct(1, 'a'), struct(2, 'b')) as data") >>> df.select(map_from_entries("data").alias("map")).show() @@ -3213,11 +3886,14 @@ def map_from_entries(col): return Column(sc._jvm.functions.map_from_entries(_to_java_column(col))) -@since(2.4) def array_repeat(col, count): """ Collection function: creates an array containing a column repeated count times. + .. versionadded:: 2.4.0 + + Examples + -------- >>> df = spark.createDataFrame([('ab',)], ['data']) >>> df.select(array_repeat(df.data, 3).alias('r')).collect() [Row(r=['ab', 'ab', 'ab'])] @@ -3229,14 +3905,20 @@ def array_repeat(col, count): )) -@since(2.4) def arrays_zip(*cols): """ Collection function: Returns a merged array of structs in which the N-th struct contains all N-th values of input arrays. - :param cols: columns of arrays to be merged. + .. versionadded:: 2.4.0 + Parameters + ---------- + cols : :class:`Column` or str + columns of arrays to be merged. + + Examples + -------- >>> from pyspark.sql.functions import arrays_zip >>> df = spark.createDataFrame([(([1, 2, 3], [2, 3, 4]))], ['vals1', 'vals2']) >>> df.select(arrays_zip(df.vals1, df.vals2).alias('zipped')).collect() @@ -3246,12 +3928,18 @@ def arrays_zip(*cols): return Column(sc._jvm.functions.arrays_zip(_to_seq(sc, cols, _to_java_column))) -@since(2.4) def map_concat(*cols): """Returns the union of all the given maps. - :param cols: list of column names (string) or list of :class:`Column` expressions + .. versionadded:: 2.4.0 + Parameters + ---------- + cols : :class:`Column` or str + column names or :class:`Column`\\s + + Examples + -------- >>> from pyspark.sql.functions import map_concat >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as map1, map(3, 'c') as map2") >>> df.select(map_concat("map1", "map2").alias("map3")).show(truncate=False) @@ -3268,13 +3956,16 @@ def map_concat(*cols): return Column(jc) -@since(2.4) def sequence(start, stop, step=None): """ Generate a sequence of integers from `start` to `stop`, incrementing by `step`. If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`, otherwise -1. + .. versionadded:: 2.4.0 + + Examples + -------- >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2')) >>> df1.select(sequence('C1', 'C2').alias('r')).collect() [Row(r=[-2, -1, 0, 1, 2])] @@ -3290,16 +3981,24 @@ def sequence(start, stop, step=None): _to_java_column(start), _to_java_column(stop), _to_java_column(step))) -@since(3.0) def from_csv(col, schema, options={}): """ Parses a column containing a CSV string to a row with the specified schema. Returns `null`, in the case of an unparseable string. - :param col: string column in CSV format - :param schema: a string with schema in DDL format to use when parsing the CSV column. - :param options: options to control parsing. accepts the same options as the CSV datasource + .. versionadded:: 3.0.0 + Parameters + ---------- + col : :class:`Column` or str + string column in CSV format + schema :class:`Column` or str + a string with schema in DDL format to use when parsing the CSV column. + options : dict, optional + options to control parsing. accepts the same options as the CSV datasource + + Examples + -------- >>> data = [("1,2,3",)] >>> df = spark.createDataFrame(data, ("value",)) >>> df.select(from_csv(df.value, "a INT, b INT, c INT").alias("csv")).collect() @@ -3331,7 +4030,9 @@ def _unresolved_named_lambda_variable(*name_parts): Create `o.a.s.sql.expressions.UnresolvedNamedLambdaVariable`, convert it to o.s.sql.Column and wrap in Python `Column` - :param name_parts: str + Parameters + ---------- + name_parts : str """ sc = SparkContext._active_spark_context name_parts_seq = _to_seq(sc, name_parts) @@ -3428,13 +4129,18 @@ def _invoke_higher_order_function(name, cols, funs): return Column(sc._jvm.Column(expr(*jcols + jfuns))) -@since(3.1) def transform(col, f): """ Returns an array of elements after applying a transformation to each element in the input array. - :param col: name of column or expression - :param f: a function that is applied to each element of the input array. + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + a function that is applied to each element of the input array. Can take one of the following forms: - Unary ``(x: Column) -> Column: ...`` @@ -3446,8 +4152,12 @@ def transform(col, f): Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + Examples + -------- >>> df = spark.createDataFrame([(1, [1, 2, 3, 4])], ("key", "values")) >>> df.select(transform("values", lambda x: x * 2).alias("doubled")).show() +------------+ @@ -3468,19 +4178,26 @@ def transform(col, f): return _invoke_higher_order_function("ArrayTransform", [col], [f]) -@since(3.1) def exists(col, f): """ Returns whether a predicate holds for one or more elements in the array. - :param col: name of column or expression - :param f: an function ``(x: Column) -> Column: ...`` returning the Boolean expression. + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + ``(x: Column) -> Column: ...`` returning the Boolean expression. Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). :return: a :class:`pyspark.sql.Column` + Examples + -------- >>> df = spark.createDataFrame([(1, [1, 2, 3, 4]), (2, [3, -1, 0])],("key", "values")) >>> df.select(exists("values", lambda x: x < 0).alias("any_negative")).show() +------------+ @@ -3493,19 +4210,29 @@ def exists(col, f): return _invoke_higher_order_function("ArrayExists", [col], [f]) -@since(3.1) def forall(col, f): """ Returns whether a predicate holds for every element in the array. - :param col: name of column or expression - :param f: an function ``(x: Column) -> Column: ...`` returning the Boolean expression. + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + ``(x: Column) -> Column: ...`` returning the Boolean expression. Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame( ... [(1, ["bar"]), (2, ["foo", "bar"]), (3, ["foobar", "foo"])], ... ("key", "values") @@ -3522,13 +4249,18 @@ def forall(col, f): return _invoke_higher_order_function("ArrayForAll", [col], [f]) -@since(3.1) def filter(col, f): """ Returns an array of elements for which a predicate holds in a given array. - :param col: name of column or expression - :param f: A function that returns the Boolean expression. + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + A function that returns the Boolean expression. Can take one of the following forms: - Unary ``(x: Column) -> Column: ...`` @@ -3540,8 +4272,12 @@ def filter(col, f): Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + Examples + -------- >>> df = spark.createDataFrame( ... [(1, ["2018-09-20", "2019-02-03", "2019-07-01", "2020-06-01"])], ... ("key", "values") @@ -3560,7 +4296,6 @@ def filter(col, f): return _invoke_higher_order_function("ArrayFilter", [col], [f]) -@since(3.1) def aggregate(col, zero, merge, finish=None): """ Applies a binary operator to an initial state and all elements in the array, @@ -3572,14 +4307,27 @@ def aggregate(col, zero, merge, finish=None): Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :param col: name of column or expression - :param zero: initial value. Name of column or expression - :param merge: a binary function ``(acc: Column, x: Column) -> Column...`` returning expression + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + zero : :class:`Column` or str + initial value. Name of column or expression + merge : function + a binary function ``(acc: Column, x: Column) -> Column...`` returning expression of the same type as ``zero`` - :param finish: an optional unary function ``(x: Column) -> Column: ...`` + finish : function + an optional unary function ``(x: Column) -> Column: ...`` used to convert accumulated value. - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([(1, [20.0, 4.0, 2.0, 6.0, 10.0])], ("id", "values")) >>> df.select(aggregate("values", lit(0.0), lambda acc, x: acc + x).alias("sum")).show() +----+ @@ -3621,22 +4369,33 @@ def aggregate(col, zero, merge, finish=None): ) -@since(3.1) def zip_with(col1, col2, f): """ Merge two given arrays, element-wise, into a single array using a function. If one array is shorter, nulls are appended at the end to match the length of the longer array, before applying the function. - :param col1: name of the first column or expression - :param col2: name of the second column or expression - :param f: a binary function ``(x1: Column, x2: Column) -> Column...`` + .. versionadded:: 3.1.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of the first column or expression + col2 : :class:`Column` or str + name of the second column or expression + f : function + a binary function ``(x1: Column, x2: Column) -> Column...`` Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([(1, [1, 3, 5, 8], [0, 2, 4, 6])], ("id", "xs", "ys")) >>> df.select(zip_with("xs", "ys", lambda x, y: x ** y).alias("powers")).show(truncate=False) +---------------------------+ @@ -3656,20 +4415,30 @@ def zip_with(col1, col2, f): return _invoke_higher_order_function("ZipWith", [col1, col2], [f]) -@since(3.1) def transform_keys(col, f): """ Applies a function to every key-value pair in a map and returns a map with the results of those applications as the new keys for the pairs. - :param col: name of column or expression - :param f: a binary function ``(k: Column, v: Column) -> Column...`` + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + a binary function ``(k: Column, v: Column) -> Column...`` Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([(1, {"foo": -2.0, "bar": 2.0})], ("id", "data")) >>> df.select(transform_keys( ... "data", lambda k, _: upper(k)).alias("data_upper") @@ -3683,20 +4452,30 @@ def transform_keys(col, f): return _invoke_higher_order_function("TransformKeys", [col], [f]) -@since(3.1) def transform_values(col, f): """ Applies a function to every key-value pair in a map and returns a map with the results of those applications as the new values for the pairs. - :param col: name of column or expression - :param f: a binary function ``(k: Column, v: Column) -> Column...`` + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + a binary function ``(k: Column, v: Column) -> Column...`` Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([(1, {"IT": 10.0, "SALES": 2.0, "OPS": 24.0})], ("id", "data")) >>> df.select(transform_values( ... "data", lambda k, v: when(k.isin("IT", "OPS"), v + 10.0).otherwise(v) @@ -3710,19 +4489,29 @@ def transform_values(col, f): return _invoke_higher_order_function("TransformValues", [col], [f]) -@since(3.1) def map_filter(col, f): """ Returns a map whose key-value pairs satisfy a predicate. - :param col: name of column or expression - :param f: a binary function ``(k: Column, v: Column) -> Column...`` + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :class:`Column` or str + name of column or expression + f : function + a binary function ``(k: Column, v: Column) -> Column...`` Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([(1, {"foo": 42.0, "bar": 1.0, "baz": 32.0})], ("id", "data")) >>> df.select(map_filter( ... "data", lambda _, v: v > 30.0).alias("data_filtered") @@ -3736,20 +4525,31 @@ def map_filter(col, f): return _invoke_higher_order_function("MapFilter", [col], [f]) -@since(3.1) def map_zip_with(col1, col2, f): """ Merge two given maps, key-wise into a single map using a function. - :param col1: name of the first column or expression - :param col2: name of the second column or expression - :param f: a ternary function ``(k: Column, v1: Column, v2: Column) -> Column...`` + .. versionadded:: 3.1.0 + + Parameters + ---------- + col1 : :class:`Column` or str + name of the first column or expression + col2 : :class:`Column` or str + name of the second column or expression + f : function + a ternary function ``(k: Column, v1: Column, v2: Column) -> Column...`` Can use methods of :class:`pyspark.sql.Column`, functions defined in :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``. Python ``UserDefinedFunctions`` are not supported (`SPARK-27052 `__). - :return: a :class:`pyspark.sql.Column` + Returns + ------- + :class:`pyspark.sql.Column` + + Examples + -------- >>> df = spark.createDataFrame([ ... (1, {"IT": 24.0, "SALES": 12.00}, {"IT": 2.0, "SALES": 1.4})], ... ("id", "base", "ratio") @@ -3768,100 +4568,120 @@ def map_zip_with(col1, col2, f): # ---------------------- Partition transform functions -------------------------------- -@since(3.1) def years(col): """ Partition transform function: A transform for timestamps and dates to partition data into years. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").partitionedBy( # doctest: +SKIP ... years("ts") ... ).createOrReplace() - .. warning:: - This function can be used only in combinatiion with - :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` - method of the `DataFrameWriterV2`. + Notes + ----- + This function can be used only in combinatiion with + :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` + method of the `DataFrameWriterV2`. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.years(_to_java_column(col))) -@since(3.1) def months(col): """ Partition transform function: A transform for timestamps and dates to partition data into months. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").partitionedBy( ... months("ts") ... ).createOrReplace() # doctest: +SKIP - .. warning:: - This function can be used only in combinatiion with - :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` - method of the `DataFrameWriterV2`. + Notes + ----- + This function can be used only in combinatiion with + :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` + method of the `DataFrameWriterV2`. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.months(_to_java_column(col))) -@since(3.1) def days(col): """ Partition transform function: A transform for timestamps and dates to partition data into days. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").partitionedBy( # doctest: +SKIP ... days("ts") ... ).createOrReplace() - .. warning:: - This function can be used only in combinatiion with - :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` - method of the `DataFrameWriterV2`. + Notes + ----- + This function can be used only in combinatiion with + :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` + method of the `DataFrameWriterV2`. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.days(_to_java_column(col))) -@since(3.1) def hours(col): """ Partition transform function: A transform for timestamps to partition data into hours. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").partitionedBy( # doctest: +SKIP ... hours("ts") ... ).createOrReplace() - .. warning:: - This function can be used only in combinatiion with - :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` - method of the `DataFrameWriterV2`. + Notes + ----- + This function can be used only in combinatiion with + :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` + method of the `DataFrameWriterV2`. """ sc = SparkContext._active_spark_context return Column(sc._jvm.functions.hours(_to_java_column(col))) -@since(3.1) def bucket(numBuckets, col): """ Partition transform function: A transform for any type that partitions by a hash of the input column. + .. versionadded:: 3.1.0 + + Examples + -------- >>> df.writeTo("catalog.db.table").partitionedBy( # doctest: +SKIP ... bucket(42, "ts") ... ).createOrReplace() - .. warning:: - This function can be used only in combination with - :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` - method of the `DataFrameWriterV2`. + Notes + ----- + This function can be used only in combination with + :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` + method of the `DataFrameWriterV2`. """ if not isinstance(numBuckets, (int, Column)): @@ -3880,29 +4700,21 @@ def bucket(numBuckets, col): # ---------------------------- User Defined Function ---------------------------------- -@since(1.3) def udf(f=None, returnType=StringType()): """Creates a user defined function (UDF). - .. note:: The user-defined functions are considered deterministic by default. Due to - optimization, duplicate invocations may be eliminated or the function may even be invoked - more times than it is present in the query. If your function is not deterministic, call - `asNondeterministic` on the user defined function. E.g.: + .. versionadded:: 1.3.0 - >>> from pyspark.sql.types import IntegerType - >>> import random - >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic() - - .. note:: The user-defined functions do not support conditional expressions or short circuiting - in boolean expressions and it ends up with being executed all internally. If the functions - can fail on special rows, the workaround is to incorporate the condition into the functions. - - .. note:: The user-defined functions do not take keyword arguments on the calling side. - - :param f: python function if used as a standalone function - :param returnType: the return type of the user-defined function. The value can be either a + Parameters + ---------- + f : function + python function if used as a standalone function + returnType : :class:`pyspark.sql.types.DataType` or str + the return type of the user-defined function. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + Examples + -------- >>> from pyspark.sql.types import IntegerType >>> slen = udf(lambda s: len(s), IntegerType()) >>> @udf @@ -3922,6 +4734,23 @@ def udf(f=None, returnType=StringType()): +----------+--------------+------------+ | 8| JOHN DOE| 22| +----------+--------------+------------+ + + Notes + ----- + The user-defined functions are considered deterministic by default. Due to + optimization, duplicate invocations may be eliminated or the function may even be invoked + more times than it is present in the query. If your function is not deterministic, call + `asNondeterministic` on the user defined function. E.g.: + + >>> from pyspark.sql.types import IntegerType + >>> import random + >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic() + + The user-defined functions do not support conditional expressions or short circuiting + in boolean expressions and it ends up with being executed all internally. If the functions + can fail on special rows, the workaround is to incorporate the condition into the functions. + + The user-defined functions do not take keyword arguments on the calling side. """ # The following table shows most of Python data and SQL type conversions in normal UDFs that diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 7ba3f07e17c19..e395f5797bebd 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -174,7 +174,7 @@ def create_map(*cols: ColumnOrName) -> Column: ... def array(*cols: ColumnOrName) -> Column: ... def array_contains(col: ColumnOrName, value: Any) -> Column: ... def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ... -def slice(x: ColumnOrName, start: int, length: int) -> Column: ... +def slice(x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]) -> Column: ... def array_join( col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ... ) -> Column: ... diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py index 688f8d4992b7d..d3cbf9268c9c7 100644 --- a/python/pyspark/sql/group.py +++ b/python/pyspark/sql/group.py @@ -17,7 +17,6 @@ import sys -from pyspark import since from pyspark.sql.column import Column, _to_seq from pyspark.sql.dataframe import DataFrame from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin @@ -59,7 +58,6 @@ def __init__(self, jgd, df): self._df = df self.sql_ctx = df.sql_ctx - @since(1.3) def agg(self, *exprs): """Compute aggregates and returns the result as a :class:`DataFrame`. @@ -81,12 +79,21 @@ def agg(self, *exprs): Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions. - .. note:: Built-in aggregation functions and group aggregate pandas UDFs cannot be mixed - in a single call to this function. + .. versionadded:: 1.3.0 - :param exprs: a dict mapping from column name (string) to aggregate functions (string), + Parameters + ---------- + exprs : dict + a dict mapping from column name (string) to aggregate functions (string), or a list of :class:`Column`. + Notes + ----- + Built-in aggregation functions and group aggregate pandas UDFs cannot be mixed + in a single call to this function. + + Examples + -------- >>> gdf = df.groupBy(df.name) >>> sorted(gdf.agg({"*": "count"}).collect()) [Row(name='Alice', count(1)=1), Row(name='Bob', count(1)=1)] @@ -113,23 +120,32 @@ def agg(self, *exprs): return DataFrame(jdf, self.sql_ctx) @dfapi - @since(1.3) def count(self): """Counts the number of records for each group. + .. versionadded:: 1.3.0 + + Examples + -------- >>> sorted(df.groupBy(df.age).count().collect()) [Row(age=2, count=1), Row(age=5, count=1)] """ @df_varargs_api - @since(1.3) def mean(self, *cols): """Computes average values for each numeric columns for each group. :func:`mean` is an alias for :func:`avg`. - :param cols: list of column names (string). Non-numeric columns are ignored. + .. versionadded:: 1.3.0 + Parameters + ---------- + cols : str + column names. Non-numeric columns are ignored. + + Examples + -------- >>> df.groupBy().mean('age').collect() [Row(avg(age)=3.5)] >>> df3.groupBy().mean('age', 'height').collect() @@ -137,14 +153,20 @@ def mean(self, *cols): """ @df_varargs_api - @since(1.3) def avg(self, *cols): """Computes average values for each numeric columns for each group. :func:`mean` is an alias for :func:`avg`. - :param cols: list of column names (string). Non-numeric columns are ignored. + .. versionadded:: 1.3.0 + + Parameters + ---------- + cols : str + column names. Non-numeric columns are ignored. + Examples + -------- >>> df.groupBy().avg('age').collect() [Row(avg(age)=3.5)] >>> df3.groupBy().avg('age', 'height').collect() @@ -152,10 +174,13 @@ def avg(self, *cols): """ @df_varargs_api - @since(1.3) def max(self, *cols): """Computes the max value for each numeric columns for each group. + .. versionadded:: 1.3.0 + + Examples + -------- >>> df.groupBy().max('age').collect() [Row(max(age)=5)] >>> df3.groupBy().max('age', 'height').collect() @@ -163,12 +188,18 @@ def max(self, *cols): """ @df_varargs_api - @since(1.3) def min(self, *cols): """Computes the min value for each numeric column for each group. - :param cols: list of column names (string). Non-numeric columns are ignored. + .. versionadded:: 1.3.0 + + Parameters + ---------- + cols : str + column names. Non-numeric columns are ignored. + Examples + -------- >>> df.groupBy().min('age').collect() [Row(min(age)=2)] >>> df3.groupBy().min('age', 'height').collect() @@ -176,19 +207,24 @@ def min(self, *cols): """ @df_varargs_api - @since(1.3) def sum(self, *cols): """Compute the sum for each numeric columns for each group. - :param cols: list of column names (string). Non-numeric columns are ignored. + .. versionadded:: 1.3.0 + Parameters + ---------- + cols : str + column names. Non-numeric columns are ignored. + + Examples + -------- >>> df.groupBy().sum('age').collect() [Row(sum(age)=7)] >>> df3.groupBy().sum('age', 'height').collect() [Row(sum(age)=7, sum(height)=165)] """ - @since(1.6) def pivot(self, pivot_col, values=None): """ Pivots a column of the current :class:`DataFrame` and perform the specified aggregation. @@ -196,9 +232,17 @@ def pivot(self, pivot_col, values=None): of distinct values to pivot on, and one that does not. The latter is more concise but less efficient, because Spark needs to first compute the list of distinct values internally. - :param pivot_col: Name of the column to pivot. - :param values: List of values that will be translated to columns in the output DataFrame. + .. versionadded:: 1.6.0 + + Parameters + ---------- + pivot_col : str + Name of the column to pivot. + values : + List of values that will be translated to columns in the output DataFrame. + Examples + -------- # Compute the sum of earnings for each year by course with each course as a separate column >>> df4.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").collect() diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py index d39a4413a0f2e..3456c12e59c09 100644 --- a/python/pyspark/sql/pandas/conversion.py +++ b/python/pyspark/sql/pandas/conversion.py @@ -18,7 +18,6 @@ import warnings from collections import Counter -from pyspark import since from pyspark.rdd import _load_from_socket from pyspark.sql.pandas.serializers import ArrowCollectSerializer from pyspark.sql.types import IntegralType @@ -33,18 +32,23 @@ class PandasConversionMixin(object): can use this class. """ - @since(1.3) def toPandas(self): """ Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``. This is only available if Pandas is installed and available. - .. note:: This method should only be used if the resulting Pandas's :class:`DataFrame` is - expected to be small, as all the data is loaded into the driver's memory. + .. versionadded:: 1.3.0 - .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental. + Notes + ----- + This method should only be used if the resulting Pandas's :class:`DataFrame` is + expected to be small, as all the data is loaded into the driver's memory. + Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental. + + Examples + -------- >>> df.toPandas() # doctest: +SKIP age name 0 2 Alice @@ -221,8 +225,7 @@ def _collect_as_arrow(self): """ Returns all records as a list of ArrowRecordBatches, pyarrow must be installed and available on driver and worker Python environments. - - .. note:: Experimental. + This is an experimental feature. """ from pyspark.sql.dataframe import DataFrame @@ -295,7 +298,11 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr def _convert_from_pandas(self, pdf, schema, timezone): """ Convert a pandas.DataFrame to list of records that can be used to make a DataFrame - :return list of records + + Returns + ------- + list + list of records """ from pyspark.sql import SparkSession @@ -343,8 +350,16 @@ def _get_numpy_record_dtype(self, rec): """ Used when converting a pandas.DataFrame to Spark using to_records(), this will correct the dtypes of fields in a record so they can be properly loaded into Spark. - :param rec: a numpy record to check field dtypes - :return corrected dtype for a numpy.record or None if no correction needed + + Parameters + ---------- + rec : numpy.record + a numpy record to check field dtypes + + Returns + ------- + numpy.dtype + corrected dtype for a numpy.record or None if no correction needed """ import numpy as np cur_dtypes = rec.dtype diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py index 82203fc03a9a1..16462e8702a0b 100644 --- a/python/pyspark/sql/pandas/functions.py +++ b/python/pyspark/sql/pandas/functions.py @@ -19,7 +19,6 @@ import warnings from inspect import getfullargspec -from pyspark import since from pyspark.rdd import PythonEvalType from pyspark.sql.pandas.typehints import infer_eval_type from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version @@ -39,7 +38,6 @@ class PandasUDFType(object): GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF -@since(2.3) def pandas_udf(f=None, returnType=None, functionType=None): """ Creates a pandas user defined function (a.k.a. vectorized user defined function). @@ -50,14 +48,22 @@ def pandas_udf(f=None, returnType=None, functionType=None): additional configuration is required. A Pandas UDF behaves as a regular PySpark function API in general. - :param f: user-defined function. A python function if used as a standalone function - :param returnType: the return type of the user-defined function. The value can be either a - :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. - :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`. - Default: SCALAR. + .. versionadded:: 2.3.0 - .. note:: This parameter exists for compatibility. Using Python type hints is encouraged. + Parameters + ---------- + f : function, optional + user-defined function. A python function if used as a standalone function + returnType : :class:`pyspark.sql.types.DataType` or str, optional + the return type of the user-defined function. The value can be either a + :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + functionType : int, optional + an enum value in :class:`pyspark.sql.functions.PandasUDFType`. + Default: SCALAR. This parameter exists for compatibility. + Using Python type hints is encouraged. + Examples + -------- In order to use this API, customarily the below are imported: >>> import pandas as pd @@ -263,30 +269,33 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]: Therefore, mutating the input series is not allowed and will cause incorrect results. For the same reason, users should also not rely on the index of the input series. - .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window` - - .. note:: The user-defined functions do not support conditional expressions or short circuiting - in boolean expressions and it ends up with being executed all internally. If the functions - can fail on special rows, the workaround is to incorporate the condition into the functions. - - .. note:: The user-defined functions do not take keyword arguments on the calling side. - - .. note:: The data type of returned `pandas.Series` from the user-defined functions should be - matched with defined `returnType` (see :meth:`types.to_arrow_type` and - :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do - conversion on returned data. The conversion is not guaranteed to be correct and results - should be checked for accuracy by users. - - .. note:: Currently, - :class:`pyspark.sql.types.MapType`, - :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and - nested :class:`pyspark.sql.types.StructType` - are currently not supported as output types. - - .. seealso:: :meth:`pyspark.sql.DataFrame.mapInPandas` - .. seealso:: :meth:`pyspark.sql.GroupedData.applyInPandas` - .. seealso:: :meth:`pyspark.sql.PandasCogroupedOps.applyInPandas` - .. seealso:: :meth:`pyspark.sql.UDFRegistration.register` + Notes + ----- + The user-defined functions do not support conditional expressions or short circuiting + in boolean expressions and it ends up with being executed all internally. If the functions + can fail on special rows, the workaround is to incorporate the condition into the functions. + + The user-defined functions do not take keyword arguments on the calling side. + + The data type of returned `pandas.Series` from the user-defined functions should be + matched with defined `returnType` (see :meth:`types.to_arrow_type` and + :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do + conversion on returned data. The conversion is not guaranteed to be correct and results + should be checked for accuracy by users. + + Currently, + :class:`pyspark.sql.types.MapType`, + :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and + nested :class:`pyspark.sql.types.StructType` + are currently not supported as output types. + + See Also + -------- + pyspark.sql.GroupedData.agg + pyspark.sql.DataFrame.mapInPandas + pyspark.sql.GroupedData.applyInPandas + pyspark.sql.PandasCogroupedOps.applyInPandas + pyspark.sql.UDFRegistration.register """ # The following table shows most of Pandas data and SQL type conversions in Pandas UDFs that diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py index ce021fac147fb..8d4f67e2c7502 100644 --- a/python/pyspark/sql/pandas/group_ops.py +++ b/python/pyspark/sql/pandas/group_ops.py @@ -17,7 +17,6 @@ import sys import warnings -from pyspark import since from pyspark.rdd import PythonEvalType from pyspark.sql.column import Column from pyspark.sql.dataframe import DataFrame @@ -29,19 +28,27 @@ class PandasGroupedOpsMixin(object): can use this class. """ - @since(2.3) def apply(self, udf): """ It is an alias of :meth:`pyspark.sql.GroupedData.applyInPandas`; however, it takes a :meth:`pyspark.sql.functions.pandas_udf` whereas :meth:`pyspark.sql.GroupedData.applyInPandas` takes a Python native function. - .. note:: It is preferred to use :meth:`pyspark.sql.GroupedData.applyInPandas` over this - API. This API will be deprecated in the future releases. + .. versionadded:: 2.3.0 - :param udf: a grouped map user-defined function returned by + Parameters + ---------- + udf : :func:`pyspark.sql.functions.pandas_udf` + a grouped map user-defined function returned by :func:`pyspark.sql.functions.pandas_udf`. + Notes + ----- + It is preferred to use :meth:`pyspark.sql.GroupedData.applyInPandas` over this + API. This API will be deprecated in the future releases. + + Examples + -------- >>> from pyspark.sql.functions import pandas_udf, PandasUDFType >>> df = spark.createDataFrame( ... [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], @@ -61,8 +68,9 @@ def apply(self, udf): | 2| 1.1094003924504583| +---+-------------------+ - .. seealso:: :meth:`pyspark.sql.functions.pandas_udf` - + See Also + -------- + pyspark.sql.functions.pandas_udf """ # Columns are special because hasattr always return True if isinstance(udf, Column) or not hasattr(udf, 'func') \ @@ -77,7 +85,6 @@ def apply(self, udf): return self.applyInPandas(udf.func, schema=udf.returnType) - @since(3.0) def applyInPandas(self, func, schema): """ Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result @@ -94,11 +101,19 @@ def applyInPandas(self, func, schema): field data types by position if not strings, e.g. integer indices. The length of the returned `pandas.DataFrame` can be arbitrary. - :param func: a Python native function that takes a `pandas.DataFrame`, and outputs a + .. versionadded:: 3.0.0 + + Parameters + ---------- + func : function + a Python native function that takes a `pandas.DataFrame`, and outputs a `pandas.DataFrame`. - :param schema: the return type of the `func` in PySpark. The value can be either a + schema : :class:`pyspark.sql.types.DataType` or str + the return type of the `func` in PySpark. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + Examples + -------- >>> import pandas as pd # doctest: +SKIP >>> from pyspark.sql.functions import pandas_udf, ceil >>> df = spark.createDataFrame( @@ -141,6 +156,7 @@ def applyInPandas(self, func, schema): | 1|1.5| | 2|6.0| +---+---+ + >>> def sum_func(key, pdf): ... # key is a tuple of two numpy.int64s, which is the values ... # of 'id' and 'ceil(df.v / 2)' for the current group @@ -156,19 +172,23 @@ def applyInPandas(self, func, schema): | 2| 2| 3.0| +---+-----------+----+ - .. note:: This function requires a full shuffle. All the data of a group will be loaded - into memory, so the user should be aware of the potential OOM risk if data is skewed - and certain groups are too large to fit in memory. + Notes + ----- + This function requires a full shuffle. All the data of a group will be loaded + into memory, so the user should be aware of the potential OOM risk if data is skewed + and certain groups are too large to fit in memory. - .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is - recommended to explicitly index the columns by name to ensure the positions are correct, - or alternatively use an `OrderedDict`. - For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or - `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`. + If returning a new `pandas.DataFrame` constructed with a dictionary, it is + recommended to explicitly index the columns by name to ensure the positions are correct, + or alternatively use an `OrderedDict`. + For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or + `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`. - .. note:: Experimental + This API is experimental. - .. seealso:: :meth:`pyspark.sql.functions.pandas_udf` + See Also + -------- + pyspark.sql.functions.pandas_udf """ from pyspark.sql import GroupedData from pyspark.sql.functions import pandas_udf, PandasUDFType @@ -182,11 +202,12 @@ def applyInPandas(self, func, schema): jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr()) return DataFrame(jdf, self.sql_ctx) - @since(3.0) def cogroup(self, other): """ Cogroups this group with another group so that we can run cogrouped operations. + .. versionadded:: 3.0.0 + See :class:`PandasCogroupedOps` for the operations that can be run. """ from pyspark.sql import GroupedData @@ -201,9 +222,11 @@ class PandasCogroupedOps(object): A logical grouping of two :class:`GroupedData`, created by :func:`GroupedData.cogroup`. - .. note:: Experimental + .. versionadded:: 3.0.0 - .. versionadded:: 3.0 + Notes + ----- + This API is experimental. """ def __init__(self, gd1, gd2): @@ -211,7 +234,6 @@ def __init__(self, gd1, gd2): self._gd2 = gd2 self.sql_ctx = gd1.sql_ctx - @since(3.0) def applyInPandas(self, func, schema): """ Applies a function to each cogroup using pandas and returns the result @@ -228,12 +250,20 @@ def applyInPandas(self, func, schema): field data types by position if not strings, e.g. integer indices. The length of the returned `pandas.DataFrame` can be arbitrary. - :param func: a Python native function that takes two `pandas.DataFrame`\\s, and + .. versionadded:: 3.0.0 + + Parameters + ---------- + func : function + a Python native function that takes two `pandas.DataFrame`\\s, and outputs a `pandas.DataFrame`, or that takes one tuple (grouping keys) and two pandas ``DataFrame``\\s, and outputs a pandas ``DataFrame``. - :param schema: the return type of the `func` in PySpark. The value can be either a + schema : :class:`pyspark.sql.types.DataType` or str + the return type of the `func` in PySpark. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + Examples + -------- >>> from pyspark.sql.functions import pandas_udf >>> df1 = spark.createDataFrame( ... [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)], @@ -275,20 +305,23 @@ def applyInPandas(self, func, schema): |20000102| 1|3.0| x| +--------+---+---+---+ - .. note:: This function requires a full shuffle. All the data of a cogroup will be loaded - into memory, so the user should be aware of the potential OOM risk if data is skewed - and certain groups are too large to fit in memory. - - .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is - recommended to explicitly index the columns by name to ensure the positions are correct, - or alternatively use an `OrderedDict`. - For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or - `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`. + Notes + ----- + This function requires a full shuffle. All the data of a cogroup will be loaded + into memory, so the user should be aware of the potential OOM risk if data is skewed + and certain groups are too large to fit in memory. - .. note:: Experimental + If returning a new `pandas.DataFrame` constructed with a dictionary, it is + recommended to explicitly index the columns by name to ensure the positions are correct, + or alternatively use an `OrderedDict`. + For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or + `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`. - .. seealso:: :meth:`pyspark.sql.functions.pandas_udf` + This API is experimental. + See Also + -------- + pyspark.sql.functions.pandas_udf """ from pyspark.sql.pandas.functions import pandas_udf diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py index 9835e88c6ac21..63fe37113e7cc 100644 --- a/python/pyspark/sql/pandas/map_ops.py +++ b/python/pyspark/sql/pandas/map_ops.py @@ -16,7 +16,6 @@ # import sys -from pyspark import since from pyspark.rdd import PythonEvalType @@ -26,7 +25,6 @@ class PandasMapOpsMixin(object): can use this class. """ - @since(3.0) def mapInPandas(self, func, schema): """ Maps an iterator of batches in the current :class:`DataFrame` using a Python native @@ -40,11 +38,19 @@ def mapInPandas(self, func, schema): Each `pandas.DataFrame` size can be controlled by `spark.sql.execution.arrow.maxRecordsPerBatch`. - :param func: a Python native function that takes an iterator of `pandas.DataFrame`\\s, and + .. versionadded:: 3.0.0 + + Parameters + ---------- + func : function + a Python native function that takes an iterator of `pandas.DataFrame`\\s, and outputs an iterator of `pandas.DataFrame`\\s. - :param schema: the return type of the `func` in PySpark. The value can be either a + schema : :class:`pyspark.sql.types.DataType` or str + the return type of the `func` in PySpark. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + Examples + -------- >>> from pyspark.sql.functions import pandas_udf >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age")) >>> def filter_func(iterator): @@ -57,9 +63,13 @@ def mapInPandas(self, func, schema): | 1| 21| +---+---+ - .. seealso:: :meth:`pyspark.sql.functions.pandas_udf` + Notes + ----- + This API is experimental - .. note:: Experimental + See Also + -------- + pyspark.sql.functions.pandas_udf """ from pyspark.sql import DataFrame from pyspark.sql.pandas.functions import pandas_udf diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py index 09c7cf1b312bc..73d36ee555fb5 100644 --- a/python/pyspark/sql/pandas/serializers.py +++ b/python/pyspark/sql/pandas/serializers.py @@ -100,9 +100,14 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer): """ Serializes Pandas.Series as Arrow data with Arrow streaming format. - :param timezone: A timezone to respect when handling timestamp values - :param safecheck: If True, conversion from Arrow to Pandas checks for overflow/truncation - :param assign_cols_by_name: If True, then Pandas DataFrames will get columns by name + Parameters + ---------- + timezone : str + A timezone to respect when handling timestamp values + safecheck : bool + If True, conversion from Arrow to Pandas checks for overflow/truncation + assign_cols_by_name : bool + If True, then Pandas DataFrames will get columns by name """ def __init__(self, timezone, safecheck, assign_cols_by_name): @@ -130,8 +135,15 @@ def _create_batch(self, series): Create an Arrow record batch from the given pandas.Series or list of Series, with optional type. - :param series: A single pandas.Series, list of Series, or list of (series, arrow_type) - :return: Arrow RecordBatch + Parameters + ---------- + series : pandas.Series or list + A single series, list of series, or list of (series, arrow_type) + + Returns + ------- + pyarrow.RecordBatch + Arrow RecordBatch """ import pandas as pd import pyarrow as pa diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py index 78f9daa130d59..67557120715ac 100644 --- a/python/pyspark/sql/pandas/types.py +++ b/python/pyspark/sql/pandas/types.py @@ -153,9 +153,16 @@ def _check_series_localize_timestamps(s, timezone): If the input series is not a timestamp series, then the same series is returned. If the input series is a timestamp series, then a converted series is returned. - :param s: pandas.Series - :param timezone: the timezone to convert. if None then use local timezone - :return pandas.Series that have been converted to tz-naive + Parameters + ---------- + s : pandas.Series + timezone : str + the timezone to convert. if None then use local timezone + + Returns + ------- + pandas.Series + `pandas.Series` that have been converted to tz-naive """ from pyspark.sql.pandas.utils import require_minimum_pandas_version require_minimum_pandas_version() @@ -174,9 +181,16 @@ def _check_series_convert_timestamps_internal(s, timezone): Convert a tz-naive timestamp in the specified timezone or local timezone to UTC normalized for Spark internal storage - :param s: a pandas.Series - :param timezone: the timezone to convert. if None then use local timezone - :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone + Parameters + ---------- + s : pandas.Series + timezone : str + the timezone to convert. if None then use local timezone + + Returns + ------- + pandas.Series + `pandas.Series` where if it is a timestamp, has been UTC normalized without a time zone """ from pyspark.sql.pandas.utils import require_minimum_pandas_version require_minimum_pandas_version() @@ -226,10 +240,18 @@ def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone): """ Convert timestamp to timezone-naive in the specified timezone or local timezone - :param s: a pandas.Series - :param from_timezone: the timezone to convert from. if None then use local timezone - :param to_timezone: the timezone to convert to. if None then use local timezone - :return pandas.Series where if it is a timestamp, has been converted to tz-naive + Parameters + ---------- + s : pandas.Series + from_timezone : str + the timezone to convert from. if None then use local timezone + to_timezone : str + the timezone to convert to. if None then use local timezone + + Returns + ------- + pandas.Series + `pandas.Series` where if it is a timestamp, has been converted to tz-naive """ from pyspark.sql.pandas.utils import require_minimum_pandas_version require_minimum_pandas_version() @@ -254,9 +276,16 @@ def _check_series_convert_timestamps_local_tz(s, timezone): """ Convert timestamp to timezone-naive in the specified timezone or local timezone - :param s: a pandas.Series - :param timezone: the timezone to convert to. if None then use local timezone - :return pandas.Series where if it is a timestamp, has been converted to tz-naive + Parameters + ---------- + s : pandas.Series + timezone : str + the timezone to convert to. if None then use local timezone + + Returns + ------- + pandas.Series + `pandas.Series` where if it is a timestamp, has been converted to tz-naive """ return _check_series_convert_timestamps_localize(s, None, timezone) @@ -265,8 +294,15 @@ def _check_series_convert_timestamps_tz_local(s, timezone): """ Convert timestamp to timezone-naive in the specified timezone or local timezone - :param s: a pandas.Series - :param timezone: the timezone to convert from. if None then use local timezone - :return pandas.Series where if it is a timestamp, has been converted to tz-naive + Parameters + ---------- + s : pandas.Series + timezone : str + the timezone to convert from. if None then use local timezone + + Returns + ------- + pandas.Series + `pandas.Series` where if it is a timestamp, has been converted to tz-naive """ return _check_series_convert_timestamps_localize(s, timezone, None) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index ae715eea70b6d..2ed991c87f506 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -57,12 +57,18 @@ def _df(self, jdf): from pyspark.sql.dataframe import DataFrame return DataFrame(jdf, self._spark) - @since(1.4) def format(self, source): """Specifies the input data source format. - :param source: string, name of the data source, e.g. 'json', 'parquet'. + .. versionadded:: 1.4.0 + + Parameters + ---------- + source : str + string, name of the data source, e.g. 'json', 'parquet'. + Examples + -------- >>> df = spark.read.format('json').load('python/test_support/sql/people.json') >>> df.dtypes [('age', 'bigint'), ('name', 'string')] @@ -71,7 +77,6 @@ def format(self, source): self._jreader = self._jreader.format(source) return self - @since(1.4) def schema(self, schema): """Specifies the input schema. @@ -79,8 +84,13 @@ def schema(self, schema): By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading. - :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string - (For example ``col0 INT, col1 DOUBLE``). + .. versionadded:: 1.4.0 + + Parameters + ---------- + schema : :class:`pyspark.sql.types.StructType` or str + a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string + (For example ``col0 INT, col1 DOUBLE``). >>> s = spark.read.schema("col0 INT, col1 DOUBLE") """ @@ -144,16 +154,25 @@ def options(self, **options): self._jreader = self._jreader.option(k, to_str(options[k])) return self - @since(1.4) def load(self, path=None, format=None, schema=None, **options): """Loads data from a data source and returns it as a :class:`DataFrame`. - :param path: optional string or a list of string for file-system backed data sources. - :param format: optional string for format of the data source. Default to 'parquet'. - :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema - or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param options: all other string options - + .. versionadded:: 1.4.0 + + Parameters + ---------- + path : str or list, optional + optional string or a list of string for file-system backed data sources. + format : str, optional + optional string for format of the data source. Default to 'parquet'. + schema : :class:`pyspark.sql.types.StructType` or str, optional + optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + **options : dict + all other string options + + Examples + -------- >>> df = spark.read.format("parquet").load('python/test_support/sql/parquet_partitioned', ... opt1=True, opt2=1, opt3='str') >>> df.dtypes @@ -178,7 +197,6 @@ def load(self, path=None, format=None, schema=None, **options): else: return self._df(self._jreader.load()) - @since(1.4) def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None, allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None, @@ -195,89 +213,118 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, If the ``schema`` parameter is not specified, this function goes through the input once to determine the input schema. - :param path: string represents path to the JSON dataset, or a list of paths, - or RDD of Strings storing JSON objects. - :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema or - a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param primitivesAsString: infers all primitive values as a string type. If None is set, - it uses the default value, ``false``. - :param prefersDecimal: infers all floating-point values as a decimal type. If the values - do not fit in decimal, then it infers them as doubles. If None is - set, it uses the default value, ``false``. - :param allowComments: ignores Java/C++ style comment in JSON records. If None is set, - it uses the default value, ``false``. - :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set, - it uses the default value, ``false``. - :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is - set, it uses the default value, ``true``. - :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is - set, it uses the default value, ``false``. - :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character - using backslash quoting mechanism. If None is - set, it uses the default value, ``false``. - :param mode: allows a mode for dealing with corrupt records during parsing. If None is + .. versionadded:: 1.4.0 + + Parameters + ---------- + path : str, list or :class:`RDD` + string represents path to the JSON dataset, or a list of paths, + or RDD of Strings storing JSON objects. + schema : :class:`pyspark.sql.types.StructType` or str, optional + an optional :class:`pyspark.sql.types.StructType` for the input schema or + a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + primitivesAsString : str or bool, optional + infers all primitive values as a string type. If None is set, + it uses the default value, ``false``. + prefersDecimal : str or bool, optional + infers all floating-point values as a decimal type. If the values + do not fit in decimal, then it infers them as doubles. If None is + set, it uses the default value, ``false``. + allowComments : str or bool, optional + ignores Java/C++ style comment in JSON records. If None is set, + it uses the default value, ``false``. + allowUnquotedFieldNames : str or bool, optional + allows unquoted JSON field names. If None is set, + it uses the default value, ``false``. + allowSingleQuotes : str or bool, optional + allows single quotes in addition to double quotes. If None is + set, it uses the default value, ``true``. + allowNumericLeadingZero : str or bool, optional + allows leading zeros in numbers (e.g. 00012). If None is + set, it uses the default value, ``false``. + allowBackslashEscapingAnyCharacter : str or bool, optional + allows accepting quoting of all character + using backslash quoting mechanism. If None is + set, it uses the default value, ``false``. + mode : str, optional + allows a mode for dealing with corrupt records during parsing. If None is set, it uses the default value, ``PERMISSIVE``. - * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ - into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ - fields to ``null``. To keep corrupt records, an user can set a string type \ - field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ - schema does not have the field, it drops corrupt records during parsing. \ - When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \ - field in an output schema. - * ``DROPMALFORMED``: ignores the whole corrupted records. - * ``FAILFAST``: throws an exception when it meets corrupted records. - - :param columnNameOfCorruptRecord: allows renaming the new field having malformed string - created by ``PERMISSIVE`` mode. This overrides - ``spark.sql.columnNameOfCorruptRecord``. If None is set, - it uses the value specified in - ``spark.sql.columnNameOfCorruptRecord``. - :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param multiLine: parse one record, which may span multiple lines, per file. If None is - set, it uses the default value, ``false``. - :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control - characters (ASCII characters with value less than 32, - including tab and line feed characters) or not. - :param encoding: allows to forcibly set one of standard basic or extended encoding for - the JSON files. For example UTF-16BE, UTF-32LE. If None is set, - the encoding of input JSON will be detected automatically - when the multiLine option is set to ``true``. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - :param samplingRatio: defines fraction of input JSON objects used for schema inferring. - If None is set, it uses the default value, ``1.0``. - :param dropFieldIfAllNull: whether to ignore column of all null values or empty - array/struct during schema inference. If None is set, it - uses the default value, ``false``. - :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, - it uses the default value, ``en-US``. For instance, ``locale`` is used while - parsing dates and timestamps. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - :param allowNonNumericNumbers: allows JSON parser to recognize set of "Not-a-Number" (NaN) - tokens as legal floating number values. If None is set, - it uses the default value, ``true``. + * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ + into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ + fields to ``null``. To keep corrupt records, an user can set a string type \ + field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ + schema does not have the field, it drops corrupt records during parsing. \ + When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \ + field in an output schema. + * ``DROPMALFORMED``: ignores the whole corrupted records. + * ``FAILFAST``: throws an exception when it meets corrupted records. + + columnNameOfCorruptRecord: str, optional + allows renaming the new field having malformed string + created by ``PERMISSIVE`` mode. This overrides + ``spark.sql.columnNameOfCorruptRecord``. If None is set, + it uses the value specified in + ``spark.sql.columnNameOfCorruptRecord``. + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats + follow the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + multiLine : str or bool, optional + parse one record, which may span multiple lines, per file. If None is + set, it uses the default value, ``false``. + allowUnquotedControlChars : str or bool, optional + allows JSON Strings to contain unquoted control + characters (ASCII characters with value less than 32, + including tab and line feed characters) or not. + encoding : str or bool, optional + allows to forcibly set one of standard basic or extended encoding for + the JSON files. For example UTF-16BE, UTF-32LE. If None is set, + the encoding of input JSON will be detected automatically + when the multiLine option is set to ``true``. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + samplingRatio : str or float, optional + defines fraction of input JSON objects used for schema inferring. + If None is set, it uses the default value, ``1.0``. + dropFieldIfAllNull : str or bool, optional + whether to ignore column of all null values or empty + array/struct during schema inference. If None is set, it + uses the default value, ``false``. + locale : str, optional + sets a locale as language tag in IETF BCP 47 format. If None is set, + it uses the default value, ``en-US``. For instance, ``locale`` is used while + parsing dates and timestamps. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + allowNonNumericNumbers : str or bool + allows JSON parser to recognize set of "Not-a-Number" (NaN) + tokens as legal floating number values. If None is set, + it uses the default value, ``true``. * ``+INF``: for positive infinity, as well as alias of ``+Infinity`` and ``Infinity``. * ``-INF``: for negative infinity, alias ``-Infinity``. * ``NaN``: for other not-a-numbers, like result of division by zero. - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - + Examples + -------- >>> df1 = spark.read.json('python/test_support/sql/people.json') >>> df1.dtypes [('age', 'bigint'), ('name', 'string')] @@ -317,12 +364,18 @@ def func(iterator): else: raise TypeError("path can be only string, list or RDD") - @since(1.4) def table(self, tableName): """Returns the specified table as a :class:`DataFrame`. - :param tableName: string, name of the table. + .. versionadded:: 1.4.0 + Parameters + ---------- + tableName : str + string, name of the table. + + Examples + -------- >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned') >>> df.createOrReplaceTempView('tmpTable') >>> spark.read.table('tmpTable').dtypes @@ -330,24 +383,35 @@ def table(self, tableName): """ return self._df(self._jreader.table(tableName)) - @since(1.4) def parquet(self, *paths, **options): """ Loads Parquet files, returning the result as a :class:`DataFrame`. - :param mergeSchema: sets whether we should merge schemas collected from all - Parquet part-files. This will override - ``spark.sql.parquet.mergeSchema``. The default value is specified in - ``spark.sql.parquet.mergeSchema``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 1.4.0 + + Parameters + ---------- + paths : str + + Other Parameters + ---------------- + mergeSchema : str or bool, optional + sets whether we should merge schemas collected from all + Parquet part-files. This will override + ``spark.sql.parquet.mergeSchema``. The default value is specified in + ``spark.sql.parquet.mergeSchema``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + + Examples + -------- >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned') >>> df.dtypes [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] @@ -359,7 +423,6 @@ def parquet(self, *paths, **options): recursiveFileLookup=recursiveFileLookup) return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths))) - @since(1.6) def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, recursiveFileLookup=None): """ @@ -370,19 +433,28 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, By default, each line in the text file is a new row in the resulting DataFrame. - :param paths: string, or list of strings, for input path(s). - :param wholetext: if true, read each file from input path(s) as a single row. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 1.6.0 + + Parameters + ---------- + paths : str or list + string, or list of strings, for input path(s). + wholetext : str or bool, optional + if true, read each file from input path(s) as a single row. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option disables + `partition discovery `_. # noqa + + Examples + -------- >>> df = spark.read.text('python/test_support/sql/text-test.txt') >>> df.collect() [Row(value='hello'), Row(value='this')] @@ -397,7 +469,6 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, paths = [paths] return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths))) - @since(2.0) def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None, comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None, @@ -412,119 +483,156 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non ``inferSchema`` is enabled. To avoid going through the entire data once, disable ``inferSchema`` option or specify the schema explicitly using ``schema``. - :param path: string, or list of strings, for input path(s), - or RDD of Strings storing CSV rows. - :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema - or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param sep: sets a separator (one or more characters) for each field and value. If None is - set, it uses the default value, ``,``. - :param encoding: decodes the CSV files by the given encoding type. If None is set, - it uses the default value, ``UTF-8``. - :param quote: sets a single character used for escaping quoted values where the - separator can be part of the value. If None is set, it uses the default - value, ``"``. If you would like to turn off quotations, you need to set an - empty string. - :param escape: sets a single character used for escaping quotes inside an already - quoted value. If None is set, it uses the default value, ``\``. - :param comment: sets a single character used for skipping lines beginning with this - character. By default (None), it is disabled. - :param header: uses the first line as names of columns. If None is set, it uses the - default value, ``false``. - .. note:: if the given path is a RDD of Strings, this header - option will remove all lines same with the header if exists. - - :param inferSchema: infers the input schema automatically from data. It requires one extra - pass over the data. If None is set, it uses the default value, ``false``. - :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be - forcibly applied to datasource files, and headers in CSV files will be - ignored. If the option is set to ``false``, the schema will be - validated against all headers in CSV files or the first header in RDD - if the ``header`` option is set to ``true``. Field names in the schema - and column names in CSV headers are checked by their positions - taking into account ``spark.sql.caseSensitive``. If None is set, - ``true`` is used by default. Though the default value is ``true``, - it is recommended to disable the ``enforceSchema`` option - to avoid incorrect results. - :param ignoreLeadingWhiteSpace: A flag indicating whether or not leading whitespaces from - values being read should be skipped. If None is set, it - uses the default value, ``false``. - :param ignoreTrailingWhiteSpace: A flag indicating whether or not trailing whitespaces from - values being read should be skipped. If None is set, it - uses the default value, ``false``. - :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. Since 2.0.1, this ``nullValue`` param - applies to all supported types including the string type. - :param nanValue: sets the string representation of a non-number value. If None is set, it - uses the default value, ``NaN``. - :param positiveInf: sets the string representation of a positive infinity value. If None - is set, it uses the default value, ``Inf``. - :param negativeInf: sets the string representation of a negative infinity value. If None - is set, it uses the default value, ``Inf``. - :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param maxColumns: defines a hard limit of how many columns a record can have. If None is - set, it uses the default value, ``20480``. - :param maxCharsPerColumn: defines the maximum number of characters allowed for any given - value being read. If None is set, it uses the default value, - ``-1`` meaning unlimited length. - :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0. - If specified, it is ignored. - :param mode: allows a mode for dealing with corrupt records during parsing. If None is - set, it uses the default value, ``PERMISSIVE``. Note that Spark tries to - parse only required columns in CSV under column pruning. Therefore, corrupt - records can be different based on required set of fields. This behavior can - be controlled by ``spark.sql.csv.parser.columnPruning.enabled`` - (enabled by default). - - * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ - into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ - fields to ``null``. To keep corrupt records, an user can set a string type \ - field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ - schema does not have the field, it drops corrupt records during parsing. \ - A record with less/more tokens than schema is not a corrupted record to CSV. \ - When it meets a record having fewer tokens than the length of the schema, \ - sets ``null`` to extra fields. When the record has more tokens than the \ - length of the schema, it drops extra tokens. - * ``DROPMALFORMED``: ignores the whole corrupted records. - * ``FAILFAST``: throws an exception when it meets corrupted records. - - :param columnNameOfCorruptRecord: allows renaming the new field having malformed string - created by ``PERMISSIVE`` mode. This overrides - ``spark.sql.columnNameOfCorruptRecord``. If None is set, - it uses the value specified in - ``spark.sql.columnNameOfCorruptRecord``. - :param multiLine: parse records, which may span multiple lines. If None is - set, it uses the default value, ``false``. - :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for - the quote character. If None is set, the default value is - escape character when escape and quote characters are - different, ``\0`` otherwise. - :param samplingRatio: defines fraction of rows used for schema inferring. - If None is set, it uses the default value, ``1.0``. - :param emptyValue: sets the string representation of an empty value. If None is set, it uses - the default value, empty string. - :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, - it uses the default value, ``en-US``. For instance, ``locale`` is used while - parsing dates and timestamps. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - Maximum length is 1 character. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - + .. versionadded:: 2.0.0 + + Parameters + ---------- + path : str or list + string, or list of strings, for input path(s), + or RDD of Strings storing CSV rows. + schema : :class:`pyspark.sql.types.StructType` or str, optional + an optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + sep : str, optional + sets a separator (one or more characters) for each field and value. If None is + set, it uses the default value, ``,``. + encoding : str, optional + decodes the CSV files by the given encoding type. If None is set, + it uses the default value, ``UTF-8``. + quote : str, optional + sets a single character used for escaping quoted values where the + separator can be part of the value. If None is set, it uses the default + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. + escape : str, optional + sets a single character used for escaping quotes inside an already + quoted value. If None is set, it uses the default value, ``\``. + comment : str, optional + sets a single character used for skipping lines beginning with this + character. By default (None), it is disabled. + header : str or bool, optional + uses the first line as names of columns. If None is set, it uses the + default value, ``false``. + + .. note:: if the given path is a RDD of Strings, this header + option will remove all lines same with the header if exists. + + inferSchema : str or bool, optional + infers the input schema automatically from data. It requires one extra + pass over the data. If None is set, it uses the default value, ``false``. + enforceSchema : str or bool, optional + If it is set to ``true``, the specified or inferred schema will be + forcibly applied to datasource files, and headers in CSV files will be + ignored. If the option is set to ``false``, the schema will be + validated against all headers in CSV files or the first header in RDD + if the ``header`` option is set to ``true``. Field names in the schema + and column names in CSV headers are checked by their positions + taking into account ``spark.sql.caseSensitive``. If None is set, + ``true`` is used by default. Though the default value is ``true``, + it is recommended to disable the ``enforceSchema`` option + to avoid incorrect results. + ignoreLeadingWhiteSpace : str or bool, optional + A flag indicating whether or not leading whitespaces from + values being read should be skipped. If None is set, it + uses the default value, ``false``. + ignoreTrailingWhiteSpace : str or bool, optional + A flag indicating whether or not trailing whitespaces from + values being read should be skipped. If None is set, it + uses the default value, ``false``. + nullValue : str, optional + sets the string representation of a null value. If None is set, it uses + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. + nanValue : str, optional + sets the string representation of a non-number value. If None is set, it + uses the default value, ``NaN``. + positiveInf : str, optional + sets the string representation of a positive infinity value. If None + is set, it uses the default value, ``Inf``. + negativeInf : str, optional + sets the string representation of a negative infinity value. If None + is set, it uses the default value, ``Inf``. + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats + follow the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + maxColumns : str or int, optional + defines a hard limit of how many columns a record can have. If None is + set, it uses the default value, ``20480``. + maxCharsPerColumn : str or int, optional + defines the maximum number of characters allowed for any given + value being read. If None is set, it uses the default value, + ``-1`` meaning unlimited length. + maxMalformedLogPerPartition : str or int, optional + this parameter is no longer used since Spark 2.2.0. + If specified, it is ignored. + mode : str, optional + allows a mode for dealing with corrupt records during parsing. If None is + set, it uses the default value, ``PERMISSIVE``. Note that Spark tries to + parse only required columns in CSV under column pruning. Therefore, corrupt + records can be different based on required set of fields. This behavior can + be controlled by ``spark.sql.csv.parser.columnPruning.enabled`` + (enabled by default). + + * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ + into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ + fields to ``null``. To keep corrupt records, an user can set a string type \ + field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ + schema does not have the field, it drops corrupt records during parsing. \ + A record with less/more tokens than schema is not a corrupted record to CSV. \ + When it meets a record having fewer tokens than the length of the schema, \ + sets ``null`` to extra fields. When the record has more tokens than the \ + length of the schema, it drops extra tokens. + * ``DROPMALFORMED``: ignores the whole corrupted records. + * ``FAILFAST``: throws an exception when it meets corrupted records. + + columnNameOfCorruptRecord : str, optional + allows renaming the new field having malformed string + created by ``PERMISSIVE`` mode. This overrides + ``spark.sql.columnNameOfCorruptRecord``. If None is set, + it uses the value specified in + ``spark.sql.columnNameOfCorruptRecord``. + multiLine : str or bool, optional + parse records, which may span multiple lines. If None is + set, it uses the default value, ``false``. + charToEscapeQuoteEscaping : str, optional + sets a single character used for escaping the escape for + the quote character. If None is set, the default value is + escape character when escape and quote characters are + different, ``\0`` otherwise. + samplingRatio : str or float, optional + defines fraction of rows used for schema inferring. + If None is set, it uses the default value, ``1.0``. + emptyValue : str, optional + sets the string representation of an empty value. If None is set, it uses + the default value, empty string. + locale : str, optional + sets a locale as language tag in IETF BCP 47 format. If None is set, + it uses the default value, ``en-US``. For instance, ``locale`` is used while + parsing dates and timestamps. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + Maximum length is 1 character. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option disables + `partition discovery `_. # noqa + + Examples + -------- >>> df = spark.read.csv('python/test_support/sql/ages.csv') >>> df.dtypes [('_c0', 'string'), ('_c1', 'string')] @@ -571,22 +679,30 @@ def func(iterator): else: raise TypeError("path can be only string, list or RDD") - @since(1.5) def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """Loads ORC files, returning the result as a :class:`DataFrame`. - :param mergeSchema: sets whether we should merge schemas collected from all - ORC part-files. This will override ``spark.sql.orc.mergeSchema``. - The default value is specified in ``spark.sql.orc.mergeSchema``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 1.5.0 + + Parameters + ---------- + path : str or list + mergeSchema : str or bool, optional + sets whether we should merge schemas collected from all + ORC part-files. This will override ``spark.sql.orc.mergeSchema``. + The default value is specified in ``spark.sql.orc.mergeSchema``. + pathGlobFilter : str or bool + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + + Examples + -------- >>> df = spark.read.orc('python/test_support/sql/orc_partitioned') >>> df.dtypes [('a', 'bigint'), ('b', 'int'), ('c', 'int')] @@ -597,7 +713,6 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N path = [path] return self._df(self._jreader.orc(_to_seq(self._spark._sc, path))) - @since(1.4) def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None, predicates=None, properties=None): """ @@ -605,31 +720,48 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar accessible via JDBC URL ``url`` and connection ``properties``. Partitions of the table will be retrieved in parallel if either ``column`` or - ``predicates`` is specified. ``lowerBound`, ``upperBound`` and ``numPartitions`` + ``predicates`` is specified. ``lowerBound``, ``upperBound`` and ``numPartitions`` is needed when ``column`` is specified. If both ``column`` and ``predicates`` are specified, ``column`` will be used. - .. note:: Don't create too many partitions in parallel on a large cluster; - otherwise Spark might crash your external database systems. - - :param url: a JDBC URL of the form ``jdbc:subprotocol:subname`` - :param table: the name of the table - :param column: the name of a column of numeric, date, or timestamp type - that will be used for partitioning; - if this parameter is specified, then ``numPartitions``, ``lowerBound`` - (inclusive), and ``upperBound`` (exclusive) will form partition strides - for generated WHERE clause expressions used to split the column - ``column`` evenly - :param lowerBound: the minimum value of ``column`` used to decide partition stride - :param upperBound: the maximum value of ``column`` used to decide partition stride - :param numPartitions: the number of partitions - :param predicates: a list of expressions suitable for inclusion in WHERE clauses; - each one defines one partition of the :class:`DataFrame` - :param properties: a dictionary of JDBC database connection arguments. Normally at - least properties "user" and "password" with their corresponding values. - For example { 'user' : 'SYSTEM', 'password' : 'mypassword' } - :return: a DataFrame + .. versionadded:: 1.4.0 + + Parameters + ---------- + url : str + a JDBC URL of the form ``jdbc:subprotocol:subname`` + table : str + the name of the table + column : str, optional + the name of a column of numeric, date, or timestamp type + that will be used for partitioning; + if this parameter is specified, then ``numPartitions``, ``lowerBound`` + (inclusive), and ``upperBound`` (exclusive) will form partition strides + for generated WHERE clause expressions used to split the column + ``column`` evenly + lowerBound : str or int, optional + the minimum value of ``column`` used to decide partition stride + upperBound : str or int, optional + the maximum value of ``column`` used to decide partition stride + numPartitions : int, optional + the number of partitions + predicates : list, optional + a list of expressions suitable for inclusion in WHERE clauses; + each one defines one partition of the :class:`DataFrame` + properties : dict, optional + a dictionary of JDBC database connection arguments. Normally at + least properties "user" and "password" with their corresponding values. + For example { 'user' : 'SYSTEM', 'password' : 'mypassword' } + + Notes + ----- + Don't create too many partitions in parallel on a large cluster; + otherwise Spark might crash your external database systems. + + Returns + ------- + :class:`DataFrame` """ if properties is None: properties = dict() @@ -667,7 +799,6 @@ def _sq(self, jsq): from pyspark.sql.streaming import StreamingQuery return StreamingQuery(jsq) - @since(1.4) def mode(self, saveMode): """Specifies the behavior when data or table already exists. @@ -678,6 +809,10 @@ def mode(self, saveMode): * `error` or `errorifexists`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. + .. versionadded:: 1.4.0 + + Examples + -------- >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data')) """ # At the JVM side, the default value of mode is already set to "error". @@ -686,12 +821,18 @@ def mode(self, saveMode): self._jwrite = self._jwrite.mode(saveMode) return self - @since(1.4) def format(self, source): """Specifies the underlying output data source. - :param source: string, name of the data source, e.g. 'json', 'parquet'. + .. versionadded:: 1.4.0 + + Parameters + ---------- + source : str + string, name of the data source, e.g. 'json', 'parquet'. + Examples + -------- >>> df.write.format('json').save(os.path.join(tempfile.mkdtemp(), 'data')) """ self._jwrite = self._jwrite.format(source) @@ -740,15 +881,21 @@ def options(self, **options): self._jwrite = self._jwrite.option(k, to_str(options[k])) return self - @since(1.4) def partitionBy(self, *cols): """Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. - :param cols: name of columns + .. versionadded:: 1.4.0 + Parameters + ---------- + cols : str or list + name of columns + + Examples + -------- >>> df.write.partitionBy('year', 'month').parquet(os.path.join(tempfile.mkdtemp(), 'data')) """ if len(cols) == 1 and isinstance(cols[0], (list, tuple)): @@ -756,18 +903,28 @@ def partitionBy(self, *cols): self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols)) return self - @since(2.3) def bucketBy(self, numBuckets, col, *cols): """Buckets the output by the given columns.If specified, the output is laid out on the file system similar to Hive's bucketing scheme. - :param numBuckets: the number of buckets to save - :param col: a name of a column, or a list of names. - :param cols: additional names (optional). If `col` is a list it should be empty. + .. versionadded:: 2.3.0 + + Parameters + ---------- + numBuckets : int + the number of buckets to save + col : str, list or tuple + a name of a column, or a list of names. + cols : str + additional names (optional). If `col` is a list it should be empty. - .. note:: Applicable for file-based data sources in combination with - :py:meth:`DataFrameWriter.saveAsTable`. + Notes + ----- + Applicable for file-based data sources in combination with + :py:meth:`DataFrameWriter.saveAsTable`. + Examples + -------- >>> (df.write.format('parquet') # doctest: +SKIP ... .bucketBy(100, 'year', 'month') ... .mode("overwrite") @@ -788,13 +945,20 @@ def bucketBy(self, numBuckets, col, *cols): self._jwrite = self._jwrite.bucketBy(numBuckets, col, _to_seq(self._spark._sc, cols)) return self - @since(2.3) def sortBy(self, col, *cols): """Sorts the output in each bucket by the given columns on the file system. - :param col: a name of a column, or a list of names. - :param cols: additional names (optional). If `col` is a list it should be empty. + .. versionadded:: 2.3.0 + + Parameters + ---------- + col : str, tuple or list + a name of a column, or a list of names. + cols : str + additional names (optional). If `col` is a list it should be empty. + Examples + -------- >>> (df.write.format('parquet') # doctest: +SKIP ... .bucketBy(100, 'year', 'month') ... .sortBy('day') @@ -813,7 +977,6 @@ def sortBy(self, col, *cols): self._jwrite = self._jwrite.sortBy(col, _to_seq(self._spark._sc, cols)) return self - @since(1.4) def save(self, path=None, format=None, mode=None, partitionBy=None, **options): """Saves the contents of the :class:`DataFrame` to a data source. @@ -821,18 +984,29 @@ def save(self, path=None, format=None, mode=None, partitionBy=None, **options): If ``format`` is not specified, the default data source configured by ``spark.sql.sources.default`` will be used. - :param path: the path in a Hadoop supported file system - :param format: the format used to save - :param mode: specifies the behavior of the save operation when data already exists. + .. versionadded:: 1.4.0 + + Parameters + ---------- + path : str, optional + the path in a Hadoop supported file system + format : str, optional + the format used to save + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. * ``ignore``: Silently ignore this operation if data already exists. * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param partitionBy: names of partitioning columns - :param options: all other string options + partitionBy : list, optional + names of partitioning columns + **options : dict + all other string options + Examples + -------- >>> df.write.mode("append").save(os.path.join(tempfile.mkdtemp(), 'data')) """ self.mode(mode).options(**options) @@ -858,7 +1032,6 @@ def insertInto(self, tableName, overwrite=None): self.mode("overwrite" if overwrite else "append") self._jwrite.insertInto(tableName) - @since(1.4) def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options): """Saves the content of the :class:`DataFrame` as the specified table. @@ -872,12 +1045,21 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options) * `error` or `errorifexists`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. - :param name: the table name - :param format: the format used to save - :param mode: one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \ - (default: error) - :param partitionBy: names of partitioning columns - :param options: all other string options + .. versionadded:: 1.4.0 + + Parameters + ---------- + name : str + the table name + format : str, optional + the format used to save + mode : str, optional + one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \ + (default: error) + partitionBy : str or list + names of partitioning columns + **options : dict + all other string options """ self.mode(mode).options(**options) if partitionBy is not None: @@ -886,41 +1068,53 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options) self.format(format) self._jwrite.saveAsTable(name) - @since(1.4) def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None, lineSep=None, encoding=None, ignoreNullFields=None): """Saves the content of the :class:`DataFrame` in JSON format (`JSON Lines text format or newline-delimited JSON `_) at the specified path. - :param path: the path in any Hadoop supported file system - :param mode: specifies the behavior of the save operation when data already exists. + .. versionadded:: 1.4.0 + + Parameters + ---------- + path : str + the path in any Hadoop supported file system + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. * ``ignore``: Silently ignore this operation if data already exists. * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param compression: compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, bzip2, gzip, lz4, - snappy and deflate). - :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param encoding: specifies encoding (charset) of saved json files. If None is set, - the default UTF-8 charset will be used. - :param lineSep: defines the line separator that should be used for writing. If None is - set, it uses the default value, ``\\n``. - :param ignoreNullFields: Whether to ignore null fields when generating JSON objects. - If None is set, it uses the default value, ``true``. - - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - + compression : str, optional + compression codec to use when saving to file. This can be one of the + known case-insensitive shorten names (none, bzip2, gzip, lz4, + snappy and deflate). + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats + follow the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + encoding : str, optional + specifies encoding (charset) of saved json files. If None is set, + the default UTF-8 charset will be used. + lineSep : str, optional defines the line separator that should be used for writing. If None is + set, it uses the default value, ``\\n``. + ignoreNullFields : str or bool, optional + Whether to ignore null fields when generating JSON objects. + If None is set, it uses the default value, ``true``. + + Examples + -------- >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data')) """ self.mode(mode) @@ -929,25 +1123,34 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm lineSep=lineSep, encoding=encoding, ignoreNullFields=ignoreNullFields) self._jwrite.json(path) - @since(1.4) def parquet(self, path, mode=None, partitionBy=None, compression=None): """Saves the content of the :class:`DataFrame` in Parquet format at the specified path. - :param path: the path in any Hadoop supported file system - :param mode: specifies the behavior of the save operation when data already exists. + .. versionadded:: 1.4.0 + + Parameters + ---------- + path : str + the path in any Hadoop supported file system + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. * ``ignore``: Silently ignore this operation if data already exists. * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param partitionBy: names of partitioning columns - :param compression: compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, uncompressed, snappy, gzip, - lzo, brotli, lz4, and zstd). This will override - ``spark.sql.parquet.compression.codec``. If None is set, it uses the - value specified in ``spark.sql.parquet.compression.codec``. - + partitionBy : str or list, optional + names of partitioning columns + compression : str, optional + compression codec to use when saving to file. This can be one of the + known case-insensitive shorten names (none, uncompressed, snappy, gzip, + lzo, brotli, lz4, and zstd). This will override + ``spark.sql.parquet.compression.codec``. If None is set, it uses the + value specified in ``spark.sql.parquet.compression.codec``. + + Examples + -------- >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data')) """ self.mode(mode) @@ -956,17 +1159,23 @@ def parquet(self, path, mode=None, partitionBy=None, compression=None): self._set_opts(compression=compression) self._jwrite.parquet(path) - @since(1.6) def text(self, path, compression=None, lineSep=None): """Saves the content of the DataFrame in a text file at the specified path. The text files will be encoded as UTF-8. - :param path: the path in any Hadoop supported file system - :param compression: compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, bzip2, gzip, lz4, - snappy and deflate). - :param lineSep: defines the line separator that should be used for writing. If None is - set, it uses the default value, ``\\n``. + .. versionadded:: 1.6.0 + + Parameters + ---------- + path : str + the path in any Hadoop supported file system + compression : str, optional + compression codec to use when saving to file. This can be one of the + known case-insensitive shorten names (none, bzip2, gzip, lz4, + snappy and deflate). + lineSep : str, optional + defines the line separator that should be used for writing. If None is + set, it uses the default value, ``\\n``. The DataFrame must have only one column that is of string type. Each row becomes a new line in the output file. @@ -974,15 +1183,20 @@ def text(self, path, compression=None, lineSep=None): self._set_opts(compression=compression, lineSep=lineSep) self._jwrite.text(path) - @since(2.0) def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=None, header=None, nullValue=None, escapeQuotes=None, quoteAll=None, dateFormat=None, timestampFormat=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None, charToEscapeQuoteEscaping=None, encoding=None, emptyValue=None, lineSep=None): r"""Saves the content of the :class:`DataFrame` in CSV format at the specified path. - :param path: the path in any Hadoop supported file system - :param mode: specifies the behavior of the save operation when data already exists. + .. versionadded:: 2.0.0 + + Parameters + ---------- + path : str + the path in any Hadoop supported file system + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. @@ -990,53 +1204,71 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param compression: compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, bzip2, gzip, lz4, - snappy and deflate). - :param sep: sets a separator (one or more characters) for each field and value. If None is - set, it uses the default value, ``,``. - :param quote: sets a single character used for escaping quoted values where the - separator can be part of the value. If None is set, it uses the default - value, ``"``. If an empty string is set, it uses ``u0000`` (null character). - :param escape: sets a single character used for escaping quotes inside an already - quoted value. If None is set, it uses the default value, ``\`` - :param escapeQuotes: a flag indicating whether values containing quotes should always - be enclosed in quotes. If None is set, it uses the default value - ``true``, escaping all values containing a quote character. - :param quoteAll: a flag indicating whether all values should always be enclosed in - quotes. If None is set, it uses the default value ``false``, - only escaping values containing a quote character. - :param header: writes the names of columns as the first line. If None is set, it uses - the default value, ``false``. - :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. - :param dateFormat: sets the string that indicates a date format. Custom date formats follow - the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from - values being written should be skipped. If None is set, it - uses the default value, ``true``. - :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from - values being written should be skipped. If None is set, it - uses the default value, ``true``. - :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for - the quote character. If None is set, the default value is - escape character when escape and quote characters are - different, ``\0`` otherwise.. - :param encoding: sets the encoding (charset) of saved csv files. If None is set, - the default UTF-8 charset will be used. - :param emptyValue: sets the string representation of an empty value. If None is set, it uses - the default value, ``""``. - :param lineSep: defines the line separator that should be used for writing. If None is - set, it uses the default value, ``\\n``. Maximum length is 1 character. - - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - + compression : str, optional + compression codec to use when saving to file. This can be one of the + known case-insensitive shorten names (none, bzip2, gzip, lz4, + snappy and deflate). + sep : str, optional + sets a separator (one or more characters) for each field and value. If None is + set, it uses the default value, ``,``. + quote : str, optional + sets a single character used for escaping quoted values where the + separator can be part of the value. If None is set, it uses the default + value, ``"``. If an empty string is set, it uses ``u0000`` (null character). + escape : str, optional + sets a single character used for escaping quotes inside an already + quoted value. If None is set, it uses the default value, ``\`` + escapeQuotes : str or bool, optional + a flag indicating whether values containing quotes should always + be enclosed in quotes. If None is set, it uses the default value + ``true``, escaping all values containing a quote character. + quoteAll : str or bool, optional + a flag indicating whether all values should always be enclosed in + quotes. If None is set, it uses the default value ``false``, + only escaping values containing a quote character. + header : str or bool, optional + writes the names of columns as the first line. If None is set, it uses + the default value, ``false``. + nullValue : str, optional + sets the string representation of a null value. If None is set, it uses + the default value, empty string. + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats follow + the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + ignoreLeadingWhiteSpace : str or bool, optional + a flag indicating whether or not leading whitespaces from + values being written should be skipped. If None is set, it + uses the default value, ``true``. + ignoreTrailingWhiteSpace : str or bool, optional + a flag indicating whether or not trailing whitespaces from + values being written should be skipped. If None is set, it + uses the default value, ``true``. + charToEscapeQuoteEscaping : str, optional + sets a single character used for escaping the escape for + the quote character. If None is set, the default value is + escape character when escape and quote characters are + different, ``\0`` otherwise.. + encoding : str, optional + sets the encoding (charset) of saved csv files. If None is set, + the default UTF-8 charset will be used. + emptyValue : str, optional + sets the string representation of an empty value. If None is set, it uses + the default value, ``""``. + lineSep : str, optional + defines the line separator that should be used for writing. If None is + set, it uses the default value, ``\\n``. Maximum length is 1 character. + + Examples + -------- >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data')) """ self.mode(mode) @@ -1049,25 +1281,34 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No encoding=encoding, emptyValue=emptyValue, lineSep=lineSep) self._jwrite.csv(path) - @since(1.5) def orc(self, path, mode=None, partitionBy=None, compression=None): """Saves the content of the :class:`DataFrame` in ORC format at the specified path. - :param path: the path in any Hadoop supported file system - :param mode: specifies the behavior of the save operation when data already exists. + .. versionadded:: 1.5.0 + + Parameters + ---------- + path : str + the path in any Hadoop supported file system + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. * ``ignore``: Silently ignore this operation if data already exists. * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param partitionBy: names of partitioning columns - :param compression: compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, snappy, zlib, and lzo). - This will override ``orc.compress`` and - ``spark.sql.orc.compression.codec``. If None is set, it uses the value - specified in ``spark.sql.orc.compression.codec``. - + partitionBy : str or list, optional + names of partitioning columns + compression : str, optional + compression codec to use when saving to file. This can be one of the + known case-insensitive shorten names (none, snappy, zlib, and lzo). + This will override ``orc.compress`` and + ``spark.sql.orc.compression.codec``. If None is set, it uses the value + specified in ``spark.sql.orc.compression.codec``. + + Examples + -------- >>> orc_df = spark.read.orc('python/test_support/sql/orc_partitioned') >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data')) """ @@ -1077,25 +1318,34 @@ def orc(self, path, mode=None, partitionBy=None, compression=None): self._set_opts(compression=compression) self._jwrite.orc(path) - @since(1.4) def jdbc(self, url, table, mode=None, properties=None): """Saves the content of the :class:`DataFrame` to an external database table via JDBC. - .. note:: Don't create too many partitions in parallel on a large cluster; - otherwise Spark might crash your external database systems. + .. versionadded:: 1.4.0 - :param url: a JDBC URL of the form ``jdbc:subprotocol:subname`` - :param table: Name of the table in the external database. - :param mode: specifies the behavior of the save operation when data already exists. + Parameters + ---------- + url : str + a JDBC URL of the form ``jdbc:subprotocol:subname`` + table : str + Name of the table in the external database. + mode : str, optional + specifies the behavior of the save operation when data already exists. * ``append``: Append contents of this :class:`DataFrame` to existing data. * ``overwrite``: Overwrite existing data. * ``ignore``: Silently ignore this operation if data already exists. * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \ exists. - :param properties: a dictionary of JDBC database connection arguments. Normally at - least properties "user" and "password" with their corresponding values. - For example { 'user' : 'SYSTEM', 'password' : 'mypassword' } + properties : dict + a dictionary of JDBC database connection arguments. Normally at + least properties "user" and "password" with their corresponding values. + For example { 'user' : 'SYSTEM', 'password' : 'mypassword' } + + Notes + ----- + Don't create too many partitions in parallel on a large cluster; + otherwise Spark might crash your external database systems. """ if properties is None: properties = dict() diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi index a111cbe416c2f..64c5697203a44 100644 --- a/python/pyspark/sql/readwriter.pyi +++ b/python/pyspark/sql/readwriter.pyi @@ -41,7 +41,7 @@ class DataFrameReader(OptionUtils): self, path: Optional[PathOrPaths] = ..., format: Optional[str] = ..., - schema: Optional[StructType] = ..., + schema: Optional[Union[StructType, str]] = ..., **options: OptionalPrimitiveType ) -> DataFrame: ... def json( @@ -66,7 +66,9 @@ class DataFrameReader(OptionUtils): dropFieldIfAllNull: Optional[Union[bool, str]] = ..., encoding: Optional[str] = ..., locale: Optional[str] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., + allowNonNumericNumbers: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def table(self, tableName: str) -> DataFrame: ... def parquet(self, *paths: str, **options: OptionalPrimitiveType) -> DataFrame: ... @@ -75,7 +77,8 @@ class DataFrameReader(OptionUtils): paths: PathOrPaths, wholetext: bool = ..., lineSep: Optional[str] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def csv( self, @@ -96,9 +99,9 @@ class DataFrameReader(OptionUtils): negativeInf: Optional[str] = ..., dateFormat: Optional[str] = ..., timestampFormat: Optional[str] = ..., - maxColumns: Optional[int] = ..., - maxCharsPerColumn: Optional[int] = ..., - maxMalformedLogPerPartition: Optional[int] = ..., + maxColumns: Optional[Union[int, str]] = ..., + maxCharsPerColumn: Optional[Union[int, str]] = ..., + maxMalformedLogPerPartition: Optional[Union[int, str]] = ..., mode: Optional[str] = ..., columnNameOfCorruptRecord: Optional[str] = ..., multiLine: Optional[Union[bool, str]] = ..., @@ -108,12 +111,15 @@ class DataFrameReader(OptionUtils): emptyValue: Optional[str] = ..., locale: Optional[str] = ..., lineSep: Optional[str] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def orc( self, path: PathOrPaths, mergeSchema: Optional[bool] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... @overload def jdbc( @@ -125,8 +131,8 @@ class DataFrameReader(OptionUtils): url: str, table: str, column: str, - lowerBound: int, - upperBound: int, + lowerBound: Union[int, str], + upperBound: Union[int, str], numPartitions: int, *, properties: Optional[Dict[str, str]] = ... @@ -166,7 +172,7 @@ class DataFrameWriter(OptionUtils): path: Optional[str] = ..., format: Optional[str] = ..., mode: Optional[str] = ..., - partitionBy: Optional[List[str]] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., **options: OptionalPrimitiveType ) -> None: ... def insertInto(self, tableName: str, overwrite: Optional[bool] = ...) -> None: ... @@ -175,7 +181,7 @@ class DataFrameWriter(OptionUtils): name: str, format: Optional[str] = ..., mode: Optional[str] = ..., - partitionBy: Optional[List[str]] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., **options: OptionalPrimitiveType ) -> None: ... def json( @@ -187,13 +193,13 @@ class DataFrameWriter(OptionUtils): timestampFormat: Optional[str] = ..., lineSep: Optional[str] = ..., encoding: Optional[str] = ..., - ignoreNullFields: Optional[bool] = ..., + ignoreNullFields: Optional[Union[bool, str]] = ..., ) -> None: ... def parquet( self, path: str, mode: Optional[str] = ..., - partitionBy: Optional[List[str]] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., compression: Optional[str] = ..., ) -> None: ... def text( @@ -224,7 +230,7 @@ class DataFrameWriter(OptionUtils): self, path: str, mode: Optional[str] = ..., - partitionBy: Optional[List[str]] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., compression: Optional[str] = ..., ) -> None: ... def jdbc( diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index d724b76e3bfc3..2857e2e5865ae 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -42,10 +42,24 @@ def toDF(self, schema=None, sampleRatio=None): This is a shorthand for ``spark.createDataFrame(rdd, schema, sampleRatio)`` - :param schema: a :class:`pyspark.sql.types.StructType` or list of names of columns - :param sampleRatio: the sample ratio of rows used for inferring - :return: a DataFrame + Parameters + ---------- + schema : :class:`pyspark.sql.types.DataType`, str or list, optional + a :class:`pyspark.sql.types.DataType` or a datatype string or a list of + column names, default is None. The data type string format equals to + :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can + omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use + ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. + We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`. + sampleRatio : float, optional + the sample ratio of rows used for inferring + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> rdd.toDF().collect() [Row(name='Alice', age=1)] """ @@ -61,14 +75,31 @@ class SparkSession(SparkConversionMixin): tables, execute SQL over tables, cache tables, and read parquet files. To create a SparkSession, use the following builder pattern: + .. autoattribute:: builder + :annotation: + + Examples + -------- >>> spark = SparkSession.builder \\ ... .master("local") \\ ... .appName("Word Count") \\ ... .config("spark.some.config.option", "some-value") \\ ... .getOrCreate() - .. autoattribute:: builder - :annotation: + >>> from datetime import datetime + >>> from pyspark.sql import Row + >>> spark = SparkSession(sc) + >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, + ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), + ... time=datetime(2014, 8, 1, 14, 1, 5))]) + >>> df = allTypes.toDF() + >>> df.createOrReplaceTempView("allTypes") + >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' + ... 'from allTypes where b and i > 0').collect() + [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ + dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] + >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() + [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] """ class Builder(object): @@ -79,11 +110,23 @@ class Builder(object): _options = {} _sc = None - @since(2.0) def config(self, key=None, value=None, conf=None): """Sets a config option. Options set using this method are automatically propagated to both :class:`SparkConf` and :class:`SparkSession`'s own configuration. + .. versionadded:: 2.0.0 + + Parameters + ---------- + key : str, optional + a key name string for configuration property + value : str, optional + a value for configuration property + conf : :class:`SparkConf`, optional + an instance of :class:`SparkConf` + + Examples + -------- For an existing SparkConf, use `conf` parameter. >>> from pyspark.conf import SparkConf @@ -95,9 +138,6 @@ def config(self, key=None, value=None, conf=None): >>> SparkSession.builder.config("spark.some.config.option", "some-value") >> from datetime import datetime - >>> from pyspark.sql import Row - >>> spark = SparkSession(sc) - >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1, - ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), - ... time=datetime(2014, 8, 1, 14, 1, 5))]) - >>> df = allTypes.toDF() - >>> df.createOrReplaceTempView("allTypes") - >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' - ... 'from allTypes where b and i > 0').collect() - [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ - dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)] - >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect() - [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])] - """ from pyspark.sql.context import SQLContext self._sc = sparkContext self._jsc = self._sc._jsc @@ -254,13 +288,19 @@ def newSession(self): return self.__class__(self._sc, self._jsparkSession.newSession()) @classmethod - @since(3.0) def getActiveSession(cls): """ Returns the active SparkSession for the current thread, returned by the builder - :return: :class:`SparkSession` if an active session exists for the current thread + .. versionadded:: 3.0.0 + + Returns + ------- + :class:`SparkSession` + Spark session if an active session exists for the current thread + Examples + -------- >>> s = SparkSession.getActiveSession() >>> l = [('Alice', 1)] >>> rdd = s.sparkContext.parallelize(l) @@ -305,12 +345,15 @@ def conf(self): return self._conf @property - @since(2.0) def catalog(self): """Interface through which the user may create, drop, alter or query underlying databases, tables, functions, etc. - :return: :class:`Catalog` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`Catalog` """ from pyspark.sql.catalog import Catalog if not hasattr(self, "_catalog"): @@ -318,28 +361,43 @@ def catalog(self): return self._catalog @property - @since(2.0) def udf(self): """Returns a :class:`UDFRegistration` for UDF registration. - :return: :class:`UDFRegistration` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`UDFRegistration` """ from pyspark.sql.udf import UDFRegistration return UDFRegistration(self) - @since(2.0) def range(self, start, end=None, step=1, numPartitions=None): """ Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with step value ``step``. - :param start: the start value - :param end: the end value (exclusive) - :param step: the incremental step (default: 1) - :param numPartitions: the number of partitions of the DataFrame - :return: :class:`DataFrame` - + .. versionadded:: 2.0.0 + + Parameters + ---------- + start : int + the start value + end : int, optional + the end value (exclusive) + step : int, optional + the incremental step (default: 1) + numPartitions : int, optional + the number of partitions of the DataFrame + + Returns + ------- + :class:`DataFrame` + + Examples + -------- >>> spark.range(1, 7, 2).collect() [Row(id=1), Row(id=3), Row(id=5)] @@ -362,9 +420,16 @@ def _inferSchemaFromList(self, data, names=None): """ Infer schema from list of Row, dict, or tuple. - :param data: list of Row, dict, or tuple - :param names: list of column names - :return: :class:`pyspark.sql.types.StructType` + Parameters + ---------- + data : iterable + list of Row, dict, or tuple + names : list, optional + list of column names + + Returns + ------- + :class:`pyspark.sql.types.StructType` """ if not data: raise ValueError("can not infer schema from empty dataset") @@ -377,9 +442,17 @@ def _inferSchema(self, rdd, samplingRatio=None, names=None): """ Infer schema from an RDD of Row, dict, or tuple. - :param rdd: an RDD of Row, dict, or tuple - :param samplingRatio: sampling ratio, or no sampling (default) - :return: :class:`pyspark.sql.types.StructType` + Parameters + ---------- + rdd : :class:`RDD` + an RDD of Row, dict, or tuple + samplingRatio : float, optional + sampling ratio, or no sampling (default) + names : list, optional + + Returns + ------- + :class:`pyspark.sql.types.StructType` """ first = rdd.first() if not first: @@ -476,7 +549,6 @@ def _create_shell_session(): return SparkSession.builder.getOrCreate() - @since(2.0) def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True): """ Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`. @@ -497,23 +569,39 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr If schema inference is needed, ``samplingRatio`` is used to determined the ratio of rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``. - :param data: an RDD of any kind of SQL data representation (e.g. row, tuple, int, boolean, - etc.), :class:`list`, or :class:`pandas.DataFrame`. - :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of - column names, default is ``None``. The data type string format equals to - :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can - omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use - ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. We can also use - ``int`` as a short name for ``IntegerType``. - :param samplingRatio: the sample ratio of rows used for inferring - :param verifySchema: verify data types of every row against schema. - :return: :class:`DataFrame` + .. versionadded:: 2.0.0 - .. versionchanged:: 2.1 + .. versionchanged:: 2.1.0 Added verifySchema. - .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental. - + Parameters + ---------- + data : :class:`RDD` or iterable + an RDD of any kind of SQL data representation(e.g. :class:`Row`, + :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or + :class:`pandas.DataFrame`. + schema : :class:`pyspark.sql.types.DataType`, str or list, optional + a :class:`pyspark.sql.types.DataType` or a datatype string or a list of + column names, default is None. The data type string format equals to + :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can + omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use + ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. + We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`. + samplingRatio : float, optional + the sample ratio of rows used for inferring + verifySchema : bool, optional + verify data types of every row against schema. Enabled by default. + + Returns + ------- + :class:`DataFrame` + + Notes + ----- + Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental. + + Examples + -------- >>> l = [('Alice', 1)] >>> spark.createDataFrame(l).collect() [Row(_1='Alice', _2=1)] @@ -614,12 +702,17 @@ def prepare(obj): df._schema = schema return df - @since(2.0) def sql(self, sqlQuery): """Returns a :class:`DataFrame` representing the result of the given query. - :return: :class:`DataFrame` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> df.createOrReplaceTempView("table1") >>> df2 = spark.sql("SELECT field1 AS f1, field2 as f2 from table1") >>> df2.collect() @@ -627,12 +720,17 @@ def sql(self, sqlQuery): """ return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) - @since(2.0) def table(self, tableName): """Returns the specified table as a :class:`DataFrame`. - :return: :class:`DataFrame` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`DataFrame` + Examples + -------- >>> df.createOrReplaceTempView("table1") >>> df2 = spark.table("table1") >>> sorted(df.collect()) == sorted(df2.collect()) @@ -641,38 +739,51 @@ def table(self, tableName): return DataFrame(self._jsparkSession.table(tableName), self._wrapped) @property - @since(2.0) def read(self): """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. - :return: :class:`DataFrameReader` + .. versionadded:: 2.0.0 + + Returns + ------- + :class:`DataFrameReader` """ return DataFrameReader(self._wrapped) @property - @since(2.0) def readStream(self): """ Returns a :class:`DataStreamReader` that can be used to read data streams as a streaming :class:`DataFrame`. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. - :return: :class:`DataStreamReader` + Returns + ------- + :class:`DataStreamReader` """ return DataStreamReader(self._wrapped) @property - @since(2.0) def streams(self): """Returns a :class:`StreamingQueryManager` that allows managing all the :class:`StreamingQuery` instances active on `this` context. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. - :return: :class:`StreamingQueryManager` + Returns + ------- + :class:`StreamingQueryManager` """ from pyspark.sql.streaming import StreamingQueryManager return StreamingQueryManager(self._jsparkSession.streams()) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index eb3155e5512eb..e7b2fa16d620a 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -34,9 +34,11 @@ class StreamingQuery(object): A handle to a query that is executing continuously in the background as new data arrives. All these methods are thread-safe. - .. note:: Evolving + .. versionadded:: 2.0.0 - .. versionadded:: 2.0 + Notes + ----- + This API is evolving. """ def __init__(self, jsq): @@ -116,13 +118,16 @@ def recentProgress(self): return [json.loads(p.json()) for p in self._jsq.recentProgress()] @property - @since(2.1) def lastProgress(self): """ Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or None if there were no progress updates - :return: a map + .. versionadded:: 2.1.0 + + Returns + ------- + dict """ lastProgress = self._jsq.lastProgress() if lastProgress: @@ -130,15 +135,18 @@ def lastProgress(self): else: return None - @since(2.0) def processAllAvailable(self): """Blocks until all available data in the source has been processed and committed to the sink. This method is intended for testing. - .. note:: In the case of continually arriving data, this method may block forever. - Additionally, this method is only guaranteed to block until data that has been - synchronously appended data to a stream source prior to invocation. - (i.e. `getOffset` must immediately reflect the addition). + .. versionadded:: 2.0.0 + + Notes + ----- + In the case of continually arriving data, this method may block forever. + Additionally, this method is only guaranteed to block until data that has been + synchronously appended data to a stream source prior to invocation. + (i.e. `getOffset` must immediately reflect the addition). """ return self._jsq.processAllAvailable() @@ -148,12 +156,18 @@ def stop(self): """ self._jsq.stop() - @since(2.1) def explain(self, extended=False): """Prints the (logical and physical) plans to the console for debugging purpose. - :param extended: boolean, default ``False``. If ``False``, prints only the physical plan. + .. versionadded:: 2.1.0 + + Parameters + ---------- + extended : bool, optional + default ``False``. If ``False``, prints only the physical plan. + Examples + -------- >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start() >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans. >>> sq.explain() @@ -174,10 +188,14 @@ def explain(self, extended=False): # We should print it in the Python process. print(self._jsq.explainInternal(extended)) - @since(2.1) def exception(self): """ - :return: the StreamingQueryException if the query was terminated by an exception, or None. + .. versionadded:: 2.1.0 + + Returns + ------- + :class:`StreamingQueryException` + the StreamingQueryException if the query was terminated by an exception, or None. """ if self._jsq.exception().isDefined(): je = self._jsq.exception().get() @@ -191,19 +209,24 @@ def exception(self): class StreamingQueryManager(object): """A class to manage all the :class:`StreamingQuery` StreamingQueries active. - .. note:: Evolving + .. versionadded:: 2.0.0 - .. versionadded:: 2.0 + Notes + ----- + This API is evolving. """ def __init__(self, jsqm): self._jsqm = jsqm @property - @since(2.0) def active(self): """Returns a list of active queries associated with this SQLContext + .. versionadded:: 2.0.0 + + Examples + -------- >>> sq = sdf.writeStream.format('memory').queryName('this_query').start() >>> sqm = spark.streams >>> # get the list of active streaming queries @@ -213,11 +236,14 @@ def active(self): """ return [StreamingQuery(jsq) for jsq in self._jsqm.active()] - @since(2.0) def get(self, id): """Returns an active query from this SQLContext or throws exception if an active query with this name doesn't exist. + .. versionadded:: 2.0.0 + + Examples + -------- >>> sq = sdf.writeStream.format('memory').queryName('this_query').start() >>> sq.name 'this_query' @@ -259,11 +285,14 @@ def awaitAnyTermination(self, timeout=None): else: return self._jsqm.awaitAnyTermination() - @since(2.0) def resetTerminated(self): """Forget about past terminated queries so that :func:`awaitAnyTermination()` can be used again to wait for new terminations. + .. versionadded:: 2.0.0 + + Examples + -------- >>> spark.streams.resetTerminated() """ self._jsqm.resetTerminated() @@ -275,9 +304,11 @@ class DataStreamReader(OptionUtils): storage systems (e.g. file systems, key-value stores, etc). Use :attr:`SparkSession.readStream ` to access this. - .. note:: Evolving. + .. versionadded:: 2.0.0 - .. versionadded:: 2.0 + Notes + ----- + This API is evolving. """ def __init__(self, spark): @@ -288,20 +319,27 @@ def _df(self, jdf): from pyspark.sql.dataframe import DataFrame return DataFrame(jdf, self._spark) - @since(2.0) def format(self, source): """Specifies the input data source format. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :param source: string, name of the data source, e.g. 'json', 'parquet'. + Parameters + ---------- + source : str + name of the data source, e.g. 'json', 'parquet'. + Notes + ----- + This API is evolving. + + Examples + -------- >>> s = spark.readStream.format("text") """ self._jreader = self._jreader.format(source) return self - @since(2.0) def schema(self, schema): """Specifies the input schema. @@ -309,11 +347,20 @@ def schema(self, schema): By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string - (For example ``col0 INT, col1 DOUBLE``). + Parameters + ---------- + schema : :class:`pyspark.sql.types.StructType` or str + a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string + (For example ``col0 INT, col1 DOUBLE``). + Notes + ----- + This API is evolving. + + Examples + -------- >>> s = spark.readStream.schema(sdf_schema) >>> s = spark.readStream.schema("col0 INT, col1 DOUBLE") """ @@ -328,7 +375,6 @@ def schema(self, schema): raise TypeError("schema should be StructType or string") return self - @since(2.0) def option(self, key, value): """Adds an input option for the underlying data source. @@ -346,14 +392,19 @@ def option(self, key, value): ambiguous. If it isn't set, the current value of the SQL config ``spark.sql.session.timeZone`` is used by default. - .. note:: Evolving. + .. versionadded:: 2.0.0 + Notes + ----- + This API is evolving. + + Examples + -------- >>> s = spark.readStream.option("x", 1) """ self._jreader = self._jreader.option(key, to_str(value)) return self - @since(2.0) def options(self, **options): """Adds input options for the underlying data source. @@ -371,27 +422,44 @@ def options(self, **options): ambiguous. If it isn't set, the current value of the SQL config ``spark.sql.session.timeZone`` is used by default. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. + Examples + -------- >>> s = spark.readStream.options(x="1", y=2) """ for k in options: self._jreader = self._jreader.option(k, to_str(options[k])) return self - @since(2.0) def load(self, path=None, format=None, schema=None, **options): """Loads a data stream from a data source and returns it as a :class:`DataFrame `. - .. note:: Evolving. - - :param path: optional string for file-system backed data sources. - :param format: optional string for format of the data source. Default to 'parquet'. - :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema - or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param options: all other string options - + .. versionadded:: 2.0.0 + + Parameters + ---------- + path : str, optional + optional string for file-system backed data sources. + format : str, optional + optional string for format of the data source. Default to 'parquet'. + schema : :class:`pyspark.sql.types.StructType` or str, optional + optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + **options : dict + all other string options + + Notes + ----- + This API is evolving. + + Examples + -------- >>> json_sdf = spark.readStream.format("json") \\ ... .schema(sdf_schema) \\ ... .load(tempfile.mkdtemp()) @@ -413,7 +481,6 @@ def load(self, path=None, format=None, schema=None, **options): else: return self._df(self._jreader.load()) - @since(2.0) def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None, allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None, @@ -430,89 +497,119 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, If the ``schema`` parameter is not specified, this function goes through the input once to determine the input schema. - .. note:: Evolving. - - :param path: string represents path to the JSON dataset, - or RDD of Strings storing JSON objects. - :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema - or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param primitivesAsString: infers all primitive values as a string type. If None is set, - it uses the default value, ``false``. - :param prefersDecimal: infers all floating-point values as a decimal type. If the values - do not fit in decimal, then it infers them as doubles. If None is - set, it uses the default value, ``false``. - :param allowComments: ignores Java/C++ style comment in JSON records. If None is set, - it uses the default value, ``false``. - :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set, - it uses the default value, ``false``. - :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is - set, it uses the default value, ``true``. - :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is - set, it uses the default value, ``false``. - :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character - using backslash quoting mechanism. If None is - set, it uses the default value, ``false``. - :param mode: allows a mode for dealing with corrupt records during parsing. If None is - set, it uses the default value, ``PERMISSIVE``. - - * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ - into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ - fields to ``null``. To keep corrupt records, an user can set a string type \ - field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ - schema does not have the field, it drops corrupt records during parsing. \ - When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \ - field in an output schema. - * ``DROPMALFORMED``: ignores the whole corrupted records. - * ``FAILFAST``: throws an exception when it meets corrupted records. - - :param columnNameOfCorruptRecord: allows renaming the new field having malformed string - created by ``PERMISSIVE`` mode. This overrides - ``spark.sql.columnNameOfCorruptRecord``. If None is set, - it uses the value specified in - ``spark.sql.columnNameOfCorruptRecord``. - :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param multiLine: parse one record, which may span multiple lines, per file. If None is - set, it uses the default value, ``false``. - :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control - characters (ASCII characters with value less than 32, - including tab and line feed characters) or not. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, - it uses the default value, ``en-US``. For instance, ``locale`` is used while - parsing dates and timestamps. - :param dropFieldIfAllNull: whether to ignore column of all null values or empty - array/struct during schema inference. If None is set, it - uses the default value, ``false``. - :param encoding: allows to forcibly set one of standard basic or extended encoding for - the JSON files. For example UTF-16BE, UTF-32LE. If None is set, - the encoding of input JSON will be detected automatically - when the multiLine option is set to ``true``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - :param allowNonNumericNumbers: allows JSON parser to recognize set of "Not-a-Number" (NaN) - tokens as legal floating number values. If None is set, - it uses the default value, ``true``. + .. versionadded:: 2.0.0 + + Parameters + ---------- + path : str + string represents path to the JSON dataset, + or RDD of Strings storing JSON objects. + schema : :class:`pyspark.sql.types.StructType` or str, optional + an optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + primitivesAsString : str or bool, optional + infers all primitive values as a string type. If None is set, + it uses the default value, ``false``. + prefersDecimal : str or bool, optional + infers all floating-point values as a decimal type. If the values + do not fit in decimal, then it infers them as doubles. If None is + set, it uses the default value, ``false``. + allowComments : str or bool, optional + ignores Java/C++ style comment in JSON records. If None is set, + it uses the default value, ``false``. + allowUnquotedFieldNames : str or bool, optional + allows unquoted JSON field names. If None is set, + it uses the default value, ``false``. + allowSingleQuotes : str or bool, optional + allows single quotes in addition to double quotes. If None is + set, it uses the default value, ``true``. + allowNumericLeadingZero : str or bool, optional + allows leading zeros in numbers (e.g. 00012). If None is + set, it uses the default value, ``false``. + allowBackslashEscapingAnyCharacter : str or bool, optional + allows accepting quoting of all character + using backslash quoting mechanism. If None is + set, it uses the default value, ``false``. + mode : str, optional + allows a mode for dealing with corrupt records during parsing. If None is + set, it uses the default value, ``PERMISSIVE``. + + * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ + into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ + fields to ``null``. To keep corrupt records, an user can set a string type \ + field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ + schema does not have the field, it drops corrupt records during parsing. \ + When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \ + field in an output schema. + * ``DROPMALFORMED``: ignores the whole corrupted records. + * ``FAILFAST``: throws an exception when it meets corrupted records. + + columnNameOfCorruptRecord : str, optional + allows renaming the new field having malformed string + created by ``PERMISSIVE`` mode. This overrides + ``spark.sql.columnNameOfCorruptRecord``. If None is set, + it uses the value specified in + ``spark.sql.columnNameOfCorruptRecord``. + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats + follow the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + multiLine : str or bool, optional + parse one record, which may span multiple lines, per file. If None is + set, it uses the default value, ``false``. + allowUnquotedControlChars : str or bool, optional + allows JSON Strings to contain unquoted control + characters (ASCII characters with value less than 32, + including tab and line feed characters) or not. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + locale : str, optional + sets a locale as language tag in IETF BCP 47 format. If None is set, + it uses the default value, ``en-US``. For instance, ``locale`` is used while + parsing dates and timestamps. + dropFieldIfAllNull : str or bool, optional + whether to ignore column of all null values or empty + array/struct during schema inference. If None is set, it + uses the default value, ``false``. + encoding : str or bool, optional + allows to forcibly set one of standard basic or extended encoding for + the JSON files. For example UTF-16BE, UTF-32LE. If None is set, + the encoding of input JSON will be detected automatically + when the multiLine option is set to ``true``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + allowNonNumericNumbers : str or bool, optional + allows JSON parser to recognize set of "Not-a-Number" (NaN) + tokens as legal floating number values. If None is set, + it uses the default value, ``true``. * ``+INF``: for positive infinity, as well as alias of ``+Infinity`` and ``Infinity``. * ``-INF``: for negative infinity, alias ``-Infinity``. * ``NaN``: for other not-a-numbers, like result of division by zero. - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + Notes + ----- + This API is evolving. + Examples + -------- >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema) >>> json_sdf.isStreaming True @@ -535,24 +632,28 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, else: raise TypeError("path can be only a single string") - @since(2.3) def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """Loads a ORC file stream, returning the result as a :class:`DataFrame`. - .. note:: Evolving. - - :param mergeSchema: sets whether we should merge schemas collected from all - ORC part-files. This will override ``spark.sql.orc.mergeSchema``. - The default value is specified in ``spark.sql.orc.mergeSchema``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 2.3.0 + + Parameters + ---------- + mergeSchema : str or bool, optional + sets whether we should merge schemas collected from all + ORC part-files. This will override ``spark.sql.orc.mergeSchema``. + The default value is specified in ``spark.sql.orc.mergeSchema``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + + Examples + -------- >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp()) >>> orc_sdf.isStreaming True @@ -566,26 +667,30 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N else: raise TypeError("path can be only a single string") - @since(2.0) def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """ Loads a Parquet file stream, returning the result as a :class:`DataFrame`. - .. note:: Evolving. - - :param mergeSchema: sets whether we should merge schemas collected from all - Parquet part-files. This will override - ``spark.sql.parquet.mergeSchema``. The default value is specified in - ``spark.sql.parquet.mergeSchema``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 2.0.0 + + Parameters + ---------- + mergeSchema : str or bool, optional + sets whether we should merge schemas collected from all + Parquet part-files. This will override + ``spark.sql.parquet.mergeSchema``. The default value is specified in + ``spark.sql.parquet.mergeSchema``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + + Examples + -------- >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp()) >>> parquet_sdf.isStreaming True @@ -599,7 +704,6 @@ def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLook else: raise TypeError("path can be only a single string") - @since(2.0) def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None, recursiveFileLookup=None): """ @@ -610,21 +714,32 @@ def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None, By default, each line in the text file is a new row in the resulting DataFrame. - .. note:: Evolving. - - :param paths: string, or list of strings, for input path(s). - :param wholetext: if true, read each file from input path(s) as a single row. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - + .. versionadded:: 2.0.0 + + Parameters + ---------- + paths : str or list + string, or list of strings, for input path(s). + wholetext : str or bool, optional + if true, read each file from input path(s) as a single row. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option + disables + `partition discovery `_. # noqa + + Notes + ----- + This API is evolving. + + Examples + -------- >>> text_sdf = spark.readStream.text(tempfile.mkdtemp()) >>> text_sdf.isStreaming True @@ -639,7 +754,6 @@ def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None, else: raise TypeError("path can be only a single string") - @since(2.0) def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None, comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None, @@ -654,111 +768,147 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non ``inferSchema`` is enabled. To avoid going through the entire data once, disable ``inferSchema`` option or specify the schema explicitly using ``schema``. - .. note:: Evolving. - - :param path: string, or list of strings, for input path(s). - :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema - or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). - :param sep: sets a separator (one or more characters) for each field and value. If None is - set, it uses the default value, ``,``. - :param encoding: decodes the CSV files by the given encoding type. If None is set, - it uses the default value, ``UTF-8``. - :param quote: sets a single character used for escaping quoted values where the - separator can be part of the value. If None is set, it uses the default - value, ``"``. If you would like to turn off quotations, you need to set an - empty string. - :param escape: sets a single character used for escaping quotes inside an already - quoted value. If None is set, it uses the default value, ``\``. - :param comment: sets a single character used for skipping lines beginning with this - character. By default (None), it is disabled. - :param header: uses the first line as names of columns. If None is set, it uses the - default value, ``false``. - :param inferSchema: infers the input schema automatically from data. It requires one extra - pass over the data. If None is set, it uses the default value, ``false``. - :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be - forcibly applied to datasource files, and headers in CSV files will be - ignored. If the option is set to ``false``, the schema will be - validated against all headers in CSV files or the first header in RDD - if the ``header`` option is set to ``true``. Field names in the schema - and column names in CSV headers are checked by their positions - taking into account ``spark.sql.caseSensitive``. If None is set, - ``true`` is used by default. Though the default value is ``true``, - it is recommended to disable the ``enforceSchema`` option - to avoid incorrect results. - :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from - values being read should be skipped. If None is set, it - uses the default value, ``false``. - :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from - values being read should be skipped. If None is set, it - uses the default value, ``false``. - :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. Since 2.0.1, this ``nullValue`` param - applies to all supported types including the string type. - :param nanValue: sets the string representation of a non-number value. If None is set, it - uses the default value, ``NaN``. - :param positiveInf: sets the string representation of a positive infinity value. If None - is set, it uses the default value, ``Inf``. - :param negativeInf: sets the string representation of a negative infinity value. If None - is set, it uses the default value, ``Inf``. - :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at `datetime pattern`_. - This applies to date type. If None is set, it uses the - default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. - Custom date formats follow the formats at `datetime pattern`_. - This applies to timestamp type. If None is set, it uses the - default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. - :param maxColumns: defines a hard limit of how many columns a record can have. If None is - set, it uses the default value, ``20480``. - :param maxCharsPerColumn: defines the maximum number of characters allowed for any given - value being read. If None is set, it uses the default value, - ``-1`` meaning unlimited length. - :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0. - If specified, it is ignored. - :param mode: allows a mode for dealing with corrupt records during parsing. If None is - set, it uses the default value, ``PERMISSIVE``. - - * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ - into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ - fields to ``null``. To keep corrupt records, an user can set a string type \ - field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ - schema does not have the field, it drops corrupt records during parsing. \ - A record with less/more tokens than schema is not a corrupted record to CSV. \ - When it meets a record having fewer tokens than the length of the schema, \ - sets ``null`` to extra fields. When the record has more tokens than the \ - length of the schema, it drops extra tokens. - * ``DROPMALFORMED``: ignores the whole corrupted records. - * ``FAILFAST``: throws an exception when it meets corrupted records. - - :param columnNameOfCorruptRecord: allows renaming the new field having malformed string - created by ``PERMISSIVE`` mode. This overrides - ``spark.sql.columnNameOfCorruptRecord``. If None is set, - it uses the value specified in - ``spark.sql.columnNameOfCorruptRecord``. - :param multiLine: parse one record, which may span multiple lines. If None is - set, it uses the default value, ``false``. - :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for - the quote character. If None is set, the default value is - escape character when escape and quote characters are - different, ``\0`` otherwise.. - :param emptyValue: sets the string representation of an empty value. If None is set, it uses - the default value, empty string. - :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, - it uses the default value, ``en-US``. For instance, ``locale`` is used while - parsing dates and timestamps. - :param lineSep: defines the line separator that should be used for parsing. If None is - set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. - Maximum length is 1 character. - :param pathGlobFilter: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. - It does not change the behavior of `partition discovery`_. - :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. - - .. _partition discovery: - https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery - .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - + Parameters + ---------- + path : str or list + string, or list of strings, for input path(s). + schema : :class:`pyspark.sql.types.StructType` or str, optional + an optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). + sep : str, optional + sets a separator (one or more characters) for each field and value. If None is + set, it uses the default value, ``,``. + encoding : str, optional + decodes the CSV files by the given encoding type. If None is set, + it uses the default value, ``UTF-8``. + quote : str, optional sets a single character used for escaping quoted values where the + separator can be part of the value. If None is set, it uses the default + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. + escape : str, optional + sets a single character used for escaping quotes inside an already + quoted value. If None is set, it uses the default value, ``\``. + comment : str, optional + sets a single character used for skipping lines beginning with this + character. By default (None), it is disabled. + header : str or bool, optional + uses the first line as names of columns. If None is set, it uses the + default value, ``false``. + inferSchema : str or bool, optional + infers the input schema automatically from data. It requires one extra + pass over the data. If None is set, it uses the default value, ``false``. + enforceSchema : str or bool, optional + If it is set to ``true``, the specified or inferred schema will be + forcibly applied to datasource files, and headers in CSV files will be + ignored. If the option is set to ``false``, the schema will be + validated against all headers in CSV files or the first header in RDD + if the ``header`` option is set to ``true``. Field names in the schema + and column names in CSV headers are checked by their positions + taking into account ``spark.sql.caseSensitive``. If None is set, + ``true`` is used by default. Though the default value is ``true``, + it is recommended to disable the ``enforceSchema`` option + to avoid incorrect results. + ignoreLeadingWhiteSpace : str or bool, optional + a flag indicating whether or not leading whitespaces from + values being read should be skipped. If None is set, it + uses the default value, ``false``. + ignoreTrailingWhiteSpace : str or bool, optional + a flag indicating whether or not trailing whitespaces from + values being read should be skipped. If None is set, it + uses the default value, ``false``. + nullValue : str, optional + sets the string representation of a null value. If None is set, it uses + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. + nanValue : str, optional + sets the string representation of a non-number value. If None is set, it + uses the default value, ``NaN``. + positiveInf : str, optional + sets the string representation of a positive infinity value. If None + is set, it uses the default value, ``Inf``. + negativeInf : str, optional + sets the string representation of a negative infinity value. If None + is set, it uses the default value, ``Inf``. + dateFormat : str, optional + sets the string that indicates a date format. Custom date formats + follow the formats at + `datetime pattern `_. # noqa + This applies to date type. If None is set, it uses the + default value, ``yyyy-MM-dd``. + timestampFormat : str, optional + sets the string that indicates a timestamp format. + Custom date formats follow the formats at + `datetime pattern `_. # noqa + This applies to timestamp type. If None is set, it uses the + default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``. + maxColumns : str or int, optional + defines a hard limit of how many columns a record can have. If None is + set, it uses the default value, ``20480``. + maxCharsPerColumn : str or int, optional + defines the maximum number of characters allowed for any given + value being read. If None is set, it uses the default value, + ``-1`` meaning unlimited length. + maxMalformedLogPerPartition : str or int, optional + this parameter is no longer used since Spark 2.2.0. + If specified, it is ignored. + mode : str, optional + allows a mode for dealing with corrupt records during parsing. If None is + set, it uses the default value, ``PERMISSIVE``. + + * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \ + into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \ + fields to ``null``. To keep corrupt records, an user can set a string type \ + field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \ + schema does not have the field, it drops corrupt records during parsing. \ + A record with less/more tokens than schema is not a corrupted record to CSV. \ + When it meets a record having fewer tokens than the length of the schema, \ + sets ``null`` to extra fields. When the record has more tokens than the \ + length of the schema, it drops extra tokens. + * ``DROPMALFORMED``: ignores the whole corrupted records. + * ``FAILFAST``: throws an exception when it meets corrupted records. + + columnNameOfCorruptRecord : str, optional + allows renaming the new field having malformed string + created by ``PERMISSIVE`` mode. This overrides + ``spark.sql.columnNameOfCorruptRecord``. If None is set, + it uses the value specified in + ``spark.sql.columnNameOfCorruptRecord``. + multiLine : str or bool, optional + parse one record, which may span multiple lines. If None is + set, it uses the default value, ``false``. + charToEscapeQuoteEscaping : str, optional + sets a single character used for escaping the escape for + the quote character. If None is set, the default value is + escape character when escape and quote characters are + different, ``\0`` otherwise. + emptyValue : str, optional + sets the string representation of an empty value. If None is set, it uses + the default value, empty string. + locale : str, optional + sets a locale as language tag in IETF BCP 47 format. If None is set, + it uses the default value, ``en-US``. For instance, ``locale`` is used while + parsing dates and timestamps. + lineSep : str, optional + defines the line separator that should be used for parsing. If None is + set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + Maximum length is 1 character. + pathGlobFilter : str or bool, optional + an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of + `partition discovery `_. # noqa + recursiveFileLookup : str or bool, optional + recursively scan a directory for files. Using this option disables + `partition discovery `_. # noqa + + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. + + Examples + -------- >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema) >>> csv_sdf.isStreaming True @@ -790,9 +940,11 @@ class DataStreamWriter(object): Use :attr:`DataFrame.writeStream ` to access this. - .. note:: Evolving. + .. versionadded:: 2.0.0 - .. versionadded:: 2.0 + Notes + ----- + This API is evolving. """ def __init__(self, df): @@ -804,10 +956,11 @@ def _sq(self, jsq): from pyspark.sql.streaming import StreamingQuery return StreamingQuery(jsq) - @since(2.0) def outputMode(self, outputMode): """Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink. + .. versionadded:: 2.0.0 + Options include: * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to @@ -818,8 +971,12 @@ def outputMode(self, outputMode): written to the sink every time there are some updates. If the query doesn't contain aggregations, it will be equivalent to `append` mode. - .. note:: Evolving. + Notes + ----- + This API is evolving. + Examples + -------- >>> writer = sdf.writeStream.outputMode('append') """ if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0: @@ -827,20 +984,27 @@ def outputMode(self, outputMode): self._jwrite = self._jwrite.outputMode(outputMode) return self - @since(2.0) def format(self, source): """Specifies the underlying output data source. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Parameters + ---------- + source : str + string, name of the data source, which for now can be 'parquet'. - :param source: string, name of the data source, which for now can be 'parquet'. + Notes + ----- + This API is evolving. + Examples + -------- >>> writer = sdf.writeStream.format('json') """ self._jwrite = self._jwrite.format(source) return self - @since(2.0) def option(self, key, value): """Adds an output option for the underlying data source. @@ -858,12 +1022,15 @@ def option(self, key, value): ambiguous. If it isn't set, the current value of the SQL config ``spark.sql.session.timeZone`` is used by default. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. """ self._jwrite = self._jwrite.option(key, to_str(value)) return self - @since(2.0) def options(self, **options): """Adds output options for the underlying data source. @@ -881,39 +1048,56 @@ def options(self, **options): ambiguous. If it isn't set, the current value of the SQL config ``spark.sql.session.timeZone`` is used by default. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Notes + ----- + This API is evolving. """ for k in options: self._jwrite = self._jwrite.option(k, to_str(options[k])) return self - @since(2.0) def partitionBy(self, *cols): """Partitions the output by the given columns on the file system. If specified, the output is laid out on the file system similar to Hive's partitioning scheme. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :param cols: name of columns + Parameters + ---------- + cols : str or list + name of columns + Notes + ----- + This API is evolving. """ if len(cols) == 1 and isinstance(cols[0], (list, tuple)): cols = cols[0] self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols)) return self - @since(2.0) def queryName(self, queryName): """Specifies the name of the :class:`StreamingQuery` that can be started with :func:`start`. This name must be unique among all the currently active queries in the associated SparkSession. - .. note:: Evolving. + .. versionadded:: 2.0.0 + + Parameters + ---------- + queryName : str + unique name for the query - :param queryName: unique name for the query + Notes + ----- + This API is evolving. + Examples + -------- >>> writer = sdf.writeStream.queryName('streaming_query') """ if not queryName or type(queryName) != str or len(queryName.strip()) == 0: @@ -922,22 +1106,32 @@ def queryName(self, queryName): return self @keyword_only - @since(2.0) def trigger(self, *, processingTime=None, once=None, continuous=None): """Set the trigger for the stream query. If this is not set it will run the query as fast as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``. - .. note:: Evolving. - - :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'. - Set a trigger that runs a microbatch query periodically based on the - processing time. Only one trigger can be set. - :param once: if set to True, set a trigger that processes only one batch of data in a - streaming query then terminates the query. Only one trigger can be set. - :param continuous: a time interval as a string, e.g. '5 seconds', '1 minute'. - Set a trigger that runs a continuous query with a given checkpoint - interval. Only one trigger can be set. - + .. versionadded:: 2.0.0 + + Parameters + ---------- + processingTime : str, optional + a processing time interval as a string, e.g. '5 seconds', '1 minute'. + Set a trigger that runs a microbatch query periodically based on the + processing time. Only one trigger can be set. + once : bool, optional + if set to True, set a trigger that processes only one batch of data in a + streaming query then terminates the query. Only one trigger can be set. + continuous : str, optional + a time interval as a string, e.g. '5 seconds', '1 minute'. + Set a trigger that runs a continuous query with a given checkpoint + interval. Only one trigger can be set. + + Notes + ----- + This API is evolving. + + Examples + -------- >>> # trigger the query for execution every 5 seconds >>> writer = sdf.writeStream.trigger(processingTime='5 seconds') >>> # trigger the query for just once batch of data @@ -977,7 +1171,6 @@ def trigger(self, *, processingTime=None, once=None, continuous=None): self._jwrite = self._jwrite.trigger(jTrigger) return self - @since(2.4) def foreach(self, f): """ Sets the output of the streaming query to be processed using the provided writer ``f``. @@ -1045,8 +1238,14 @@ def foreach(self, f): returns successfully (irrespective of the return value), except if the Python crashes in the middle. - .. note:: Evolving. + .. versionadded:: 2.4.0 + + Notes + ----- + This API is evolving. + Examples + -------- >>> # Print every row using a function >>> def print_row(row): ... print(row) @@ -1139,7 +1338,6 @@ def func_with_open_process_close(partition_id, iterator): self._jwrite.foreach(jForeachWriter) return self - @since(2.4) def foreachBatch(self, func): """ Sets the output of the streaming query to be processed using the provided @@ -1151,8 +1349,14 @@ def foreachBatch(self, func): to exactly same for the same batchId (assuming all operations are deterministic in the query). - .. note:: Evolving. + .. versionadded:: 2.4.0 + Notes + ----- + This API is evolving. + + Examples + -------- >>> def func(batch_df, batch_id): ... batch_df.collect() ... @@ -1168,7 +1372,6 @@ def foreachBatch(self, func): ensure_callback_server_started(gw) return self - @since(2.0) def start(self, path=None, format=None, outputMode=None, partitionBy=None, queryName=None, **options): """Streams the contents of the :class:`DataFrame` to a data source. @@ -1177,12 +1380,17 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query If ``format`` is not specified, the default data source configured by ``spark.sql.sources.default`` will be used. - .. note:: Evolving. + .. versionadded:: 2.0.0 - :param path: the path in a Hadoop supported file system - :param format: the format used to save - :param outputMode: specifies how data of a streaming DataFrame/Dataset is written to a - streaming sink. + Parameters + ---------- + path : str, optional + the path in a Hadoop supported file system + format : str, optional + the format used to save + outputMode : str, optional + specifies how data of a streaming DataFrame/Dataset is written to a + streaming sink. * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the sink @@ -1191,11 +1399,20 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be written to the sink every time there are some updates. If the query doesn't contain aggregations, it will be equivalent to `append` mode. - :param partitionBy: names of partitioning columns - :param queryName: unique name for the query - :param options: All other string options. You may want to provide a `checkpointLocation` - for most streams, however it is not required for a `memory` stream. - + partitionBy : str or list, optional + names of partitioning columns + queryName : str, optional + unique name for the query + **options : dict + All other string options. You may want to provide a `checkpointLocation` + for most streams, however it is not required for a `memory` stream. + + Notes + ----- + This API is evolving. + + Examples + -------- >>> sq = sdf.writeStream.format('memory').queryName('this_query').start() >>> sq.isActive True diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi index 22055b2efc06b..56ce140b826d5 100644 --- a/python/pyspark/sql/streaming.pyi +++ b/python/pyspark/sql/streaming.pyi @@ -68,7 +68,7 @@ class DataStreamReader(OptionUtils): self, path: Optional[str] = ..., format: Optional[str] = ..., - schema: Optional[StructType] = ..., + schema: Optional[Union[StructType, str]] = ..., **options: OptionalPrimitiveType ) -> DataFrame: ... def json( @@ -92,26 +92,31 @@ class DataStreamReader(OptionUtils): locale: Optional[str] = ..., dropFieldIfAllNull: Optional[Union[bool, str]] = ..., encoding: Optional[str] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., + allowNonNumericNumbers: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def orc( self, path: str, mergeSchema: Optional[bool] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def parquet( self, path: str, mergeSchema: Optional[bool] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def text( self, path: str, wholetext: bool = ..., lineSep: Optional[str] = ..., - recursiveFileLookup: Optional[bool] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... def csv( self, @@ -142,6 +147,8 @@ class DataStreamReader(OptionUtils): emptyValue: Optional[str] = ..., locale: Optional[str] = ..., lineSep: Optional[str] = ..., + pathGlobFilter: Optional[Union[bool, str]] = ..., + recursiveFileLookup: Optional[Union[bool, str]] = ..., ) -> DataFrame: ... class DataStreamWriter: diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 5a89d5ab9a7e5..c0948b6e6e379 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -198,8 +198,12 @@ class DecimalType(FractionalType): When creating a DecimalType, the default precision and scale is (10, 0). When inferring schema from decimal.Decimal objects, it will be DecimalType(38, 18). - :param precision: the maximum (i.e. total) number of digits (default: 10) - :param scale: the number of digits on right side of dot. (default: 0) + Parameters + ---------- + precision : int, optional + the maximum (i.e. total) number of digits (default: 10) + scale : int, optional + the number of digits on right side of dot. (default: 0) """ def __init__(self, precision=10, scale=0): @@ -263,17 +267,22 @@ def simpleString(self): class ArrayType(DataType): """Array data type. - :param elementType: :class:`DataType` of each element in the array. - :param containsNull: boolean, whether the array can contain null (None) values. + Parameters + ---------- + elementType : :class:`DataType` + :class:`DataType` of each element in the array. + containsNull : bool, optional + whether the array can contain null (None) values. + + Examples + -------- + >>> ArrayType(StringType()) == ArrayType(StringType(), True) + True + >>> ArrayType(StringType(), False) == ArrayType(StringType()) + False """ def __init__(self, elementType, containsNull=True): - """ - >>> ArrayType(StringType()) == ArrayType(StringType(), True) - True - >>> ArrayType(StringType(), False) == ArrayType(StringType()) - False - """ assert isinstance(elementType, DataType),\ "elementType %s should be an instance of %s" % (elementType, DataType) self.elementType = elementType @@ -313,22 +322,30 @@ def fromInternal(self, obj): class MapType(DataType): """Map data type. - :param keyType: :class:`DataType` of the keys in the map. - :param valueType: :class:`DataType` of the values in the map. - :param valueContainsNull: indicates whether values can contain null (None) values. - + Parameters + ---------- + keyType : :class:`DataType` + :class:`DataType` of the keys in the map. + valueType : :class:`DataType` + :class:`DataType` of the values in the map. + valueContainsNull : bool, optional + indicates whether values can contain null (None) values. + + Notes + ----- Keys in a map data type are not allowed to be null (None). + + Examples + -------- + >>> (MapType(StringType(), IntegerType()) + ... == MapType(StringType(), IntegerType(), True)) + True + >>> (MapType(StringType(), IntegerType(), False) + ... == MapType(StringType(), FloatType())) + False """ def __init__(self, keyType, valueType, valueContainsNull=True): - """ - >>> (MapType(StringType(), IntegerType()) - ... == MapType(StringType(), IntegerType(), True)) - True - >>> (MapType(StringType(), IntegerType(), False) - ... == MapType(StringType(), FloatType())) - False - """ assert isinstance(keyType, DataType),\ "keyType %s should be an instance of %s" % (keyType, DataType) assert isinstance(valueType, DataType),\ @@ -375,21 +392,28 @@ def fromInternal(self, obj): class StructField(DataType): """A field in :class:`StructType`. - :param name: string, name of the field. - :param dataType: :class:`DataType` of the field. - :param nullable: boolean, whether the field can be null (None) or not. - :param metadata: a dict from string to simple type that can be toInternald to JSON automatically + Parameters + ---------- + name : str + name of the field. + dataType : :class:`DataType` + :class:`DataType` of the field. + nullable : bool + whether the field can be null (None) or not. + metadata : dict + a dict from string to simple type that can be toInternald to JSON automatically + + Examples + -------- + >>> (StructField("f1", StringType(), True) + ... == StructField("f1", StringType(), True)) + True + >>> (StructField("f1", StringType(), True) + ... == StructField("f2", StringType(), True)) + False """ def __init__(self, name, dataType, nullable=True, metadata=None): - """ - >>> (StructField("f1", StringType(), True) - ... == StructField("f1", StringType(), True)) - True - >>> (StructField("f1", StringType(), True) - ... == StructField("f2", StringType(), True)) - False - """ assert isinstance(dataType, DataType),\ "dataType %s should be an instance of %s" % (dataType, DataType) assert isinstance(name, str), "field name %s should be a string" % (name) @@ -441,24 +465,25 @@ class StructType(DataType): Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s. A contained :class:`StructField` can be accessed by its name or position. + Examples + -------- >>> struct1 = StructType([StructField("f1", StringType(), True)]) >>> struct1["f1"] StructField(f1,StringType,true) >>> struct1[0] StructField(f1,StringType,true) + + >>> struct1 = StructType([StructField("f1", StringType(), True)]) + >>> struct2 = StructType([StructField("f1", StringType(), True)]) + >>> struct1 == struct2 + True + >>> struct1 = StructType([StructField("f1", StringType(), True)]) + >>> struct2 = StructType([StructField("f1", StringType(), True), + ... StructField("f2", IntegerType(), False)]) + >>> struct1 == struct2 + False """ def __init__(self, fields=None): - """ - >>> struct1 = StructType([StructField("f1", StringType(), True)]) - >>> struct2 = StructType([StructField("f1", StringType(), True)]) - >>> struct1 == struct2 - True - >>> struct1 = StructType([StructField("f1", StringType(), True)]) - >>> struct2 = StructType([StructField("f1", StringType(), True), - ... StructField("f2", IntegerType(), False)]) - >>> struct1 == struct2 - False - """ if not fields: self.fields = [] self.names = [] @@ -481,6 +506,23 @@ def add(self, field, data_type=None, nullable=True, metadata=None): metadata(optional). The data_type parameter may be either a String or a DataType object. + Parameters + ---------- + field : str or :class:`StructField` + Either the name of the field or a StructField object + data_type : :class:`DataType`, optional + If present, the DataType of the StructField to create + nullable : bool, optional + Whether the field to add should be nullable (default True) + metadata : dict, optional + Any additional metadata (default None) + + Returns + ------- + :class:`StructType` + + Examples + -------- >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None) >>> struct2 = StructType([StructField("f1", StringType(), True), \\ ... StructField("f2", StringType(), True, None)]) @@ -494,12 +536,6 @@ def add(self, field, data_type=None, nullable=True, metadata=None): >>> struct2 = StructType([StructField("f1", StringType(), True)]) >>> struct1 == struct2 True - - :param field: Either the name of the field or a StructField object - :param data_type: If present, the DataType of the StructField to create - :param nullable: Whether the field to add should be nullable (default True) - :param metadata: Any additional metadata (default None) - :return: a new updated StructType """ if isinstance(field, StructField): self.fields.append(field) @@ -563,6 +599,8 @@ def fieldNames(self): """ Returns all field names in a list. + Examples + -------- >>> struct = StructType([StructField("f1", StringType(), True)]) >>> struct.fieldNames() ['f1'] @@ -745,6 +783,8 @@ def _parse_datatype_string(s): for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted string and case-insensitive strings. + Examples + -------- >>> _parse_datatype_string("int ") IntegerType >>> _parse_datatype_string("INT ") @@ -803,6 +843,9 @@ def from_ddl_datatype(type_str): def _parse_datatype_json_string(json_string): """Parses the given data type JSON string. + + Examples + -------- >>> import pickle >>> def check_datatype(datatype): ... pickled = pickle.loads(pickle.dumps(datatype)) @@ -1173,6 +1216,8 @@ def _make_type_verifier(dataType, nullable=True, name=None): within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is not checked, so it will become infinity when cast to Java float, if it overflows. + Examples + -------- >>> _make_type_verifier(StructType([]))(None) >>> _make_type_verifier(StringType())("") >>> _make_type_verifier(LongType())(0) @@ -1392,10 +1437,13 @@ class Row(tuple): It is not allowed to omit a named argument to represent that the value is None or missing. This should be explicitly set to None in this case. - NOTE: As of Spark 3.0.0, Rows created from named arguments no longer have - field names sorted alphabetically and will be ordered in the position as - entered. + .. versionchanged:: 3.0.0 + Rows created from named arguments no longer have + field names sorted alphabetically and will be ordered in the position as + entered. + Examples + -------- >>> row = Row(name="Alice", age=11) >>> row Row(name='Alice', age=11) @@ -1447,14 +1495,21 @@ def asDict(self, recursive=False): """ Return as a dict - :param recursive: turns the nested Rows to dict (default: False). - - .. note:: If a row contains duplicate field names, e.g., the rows of a join - between two :class:`DataFrame` that both have the fields of same names, - one of the duplicate fields will be selected by ``asDict``. ``__getitem__`` - will also return one of the duplicate fields, however returned value might - be different to ``asDict``. - + Parameters + ---------- + recursive : bool, optional + turns the nested Rows to dict (default: False). + + Notes + ----- + If a row contains duplicate field names, e.g., the rows of a join + between two :class:`DataFrame` that both have the fields of same names, + one of the duplicate fields will be selected by ``asDict``. ``__getitem__`` + will also return one of the duplicate fields, however returned value might + be different to ``asDict``. + + Examples + -------- >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11} True >>> row = Row(key=1, value=Row(name='a', age=2)) diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index 100481cf12899..c2e02a1c8c3d8 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -20,7 +20,7 @@ import functools import sys -from pyspark import SparkContext, since +from pyspark import SparkContext from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType from pyspark.sql.column import Column, _to_java_column, _to_seq from pyspark.sql.types import StringType, DataType, StructType, _parse_datatype_string @@ -49,9 +49,11 @@ class UserDefinedFunction(object): .. versionadded:: 1.3 - .. note:: The constructor of this class is not supposed to be directly called. - Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf` - to create this instance. + Notes + ----- + The constructor of this class is not supposed to be directly called. + Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf` + to create this instance. """ def __init__(self, func, returnType=StringType(), @@ -232,26 +234,39 @@ class UDFRegistration(object): def __init__(self, sparkSession): self.sparkSession = sparkSession - @since("1.3.1") def register(self, name, f, returnType=None): """Register a Python function (including lambda function) or a user-defined function as a SQL function. - :param name: name of the user-defined function in SQL statements. - :param f: a Python function, or a user-defined function. The user-defined function can + .. versionadded:: 1.3.1 + + Parameters + ---------- + name : str, + name of the user-defined function in SQL statements. + f : function, :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf` + a Python function, or a user-defined function. The user-defined function can be either row-at-a-time or vectorized. See :meth:`pyspark.sql.functions.udf` and :meth:`pyspark.sql.functions.pandas_udf`. - :param returnType: the return type of the registered user-defined function. The value can + returnType : :class:`pyspark.sql.types.DataType` or str, optional + the return type of the registered user-defined function. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. - :return: a user-defined function. + `returnType` can be optionally specified when `f` is a Python function but not + when `f` is a user-defined function. Please see the examples below. + + Returns + ------- + function + a user-defined function + Notes + ----- To register a nondeterministic Python function, users need to first build a nondeterministic user-defined function for the Python function and then register it as a SQL function. - `returnType` can be optionally specified when `f` is a Python function but not - when `f` is a user-defined function. Please see below. - + Examples + -------- 1. When `f` is a Python function: `returnType` defaults to string type and can be optionally specified. The produced @@ -275,7 +290,7 @@ def register(self, name, f, returnType=None): >>> spark.sql("SELECT stringLengthInt('test')").collect() [Row(stringLengthInt(test)=4)] - 2. When `f` is a user-defined function: + 2. When `f` is a user-defined function (from Spark 2.3.0): Spark uses the return type of the given user-defined function as the return type of the registered user-defined function. `returnType` should not be specified. @@ -315,8 +330,6 @@ def register(self, name, f, returnType=None): >>> spark.sql(q).collect() # doctest: +SKIP [Row(sum_udf(v1)=1), Row(sum_udf(v1)=5)] - .. note:: Registration for a user-defined function (case 2.) was added from - Spark 2.3.0. """ # This is to check whether the input function is from a user-defined function or @@ -348,18 +361,26 @@ def register(self, name, f, returnType=None): self.sparkSession._jsparkSession.udf().registerPython(name, register_udf._judf) return return_udf - @since(2.3) def registerJavaFunction(self, name, javaClassName, returnType=None): """Register a Java user-defined function as a SQL function. In addition to a name and the function itself, the return type can be optionally specified. When the return type is not specified we would infer it via reflection. - :param name: name of the user-defined function - :param javaClassName: fully qualified name of java class - :param returnType: the return type of the registered Java function. The value can be either + .. versionadded:: 2.3.0 + + Parameters + ---------- + name : str + name of the user-defined function + javaClassName : str + fully qualified name of java class + returnType : :class:`pyspark.sql.types.DataType` or str, optional + the return type of the registered Java function. The value can be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string. + Examples + -------- >>> from pyspark.sql.types import IntegerType >>> spark.udf.registerJavaFunction( ... "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType()) @@ -387,13 +408,18 @@ def registerJavaFunction(self, name, javaClassName, returnType=None): jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json()) self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt) - @since(2.3) def registerJavaUDAF(self, name, javaClassName): """Register a Java user-defined aggregate function as a SQL function. - :param name: name of the user-defined aggregate function - :param javaClassName: fully qualified name of java class + .. versionadded:: 2.3.0 + + name : str + name of the user-defined aggregate function + javaClassName : str + fully qualified name of java class + Examples + -------- >>> spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg") ... # doctest: +SKIP >>> df = spark.createDataFrame([(1, "a"),(2, "b"), (3, "a")],["id", "name"]) diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index bd76d880055cd..18f8ba29f95a2 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -141,9 +141,15 @@ def install_exception_handler(): def toJArray(gateway, jtype, arr): """ Convert python list to java type array - :param gateway: Py4j Gateway - :param jtype: java type of element in array - :param arr: python type list + + Parameters + ---------- + gateway : + Py4j Gateway + jtype : + java type of element in array + arr : + python type list """ jarr = gateway.new_array(jtype, len(arr)) for i in range(0, len(arr)): diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py index 82f74346ba928..753ac6e10b3b3 100644 --- a/python/pyspark/sql/window.py +++ b/python/pyspark/sql/window.py @@ -34,19 +34,21 @@ class Window(object): """ Utility functions for defining window in DataFrames. - For example: + .. versionadded:: 1.4 + + Notes + ----- + When ordering is not defined, an unbounded window frame (rowFrame, + unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined, + a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default. + Examples + -------- >>> # ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW >>> window = Window.orderBy("date").rowsBetween(Window.unboundedPreceding, Window.currentRow) >>> # PARTITION BY country ORDER BY date RANGE BETWEEN 3 PRECEDING AND 3 FOLLOWING >>> window = Window.orderBy("date").partitionBy("country").rangeBetween(-3, 3) - - .. note:: When ordering is not defined, an unbounded window frame (rowFrame, - unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined, - a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default. - - .. versionadded:: 1.4 """ _JAVA_MIN_LONG = -(1 << 63) # -9223372036854775808 @@ -81,7 +83,6 @@ def orderBy(*cols): return WindowSpec(jspec) @staticmethod - @since(2.1) def rowsBetween(start, end): """ Creates a :class:`WindowSpec` with the frame boundaries defined, @@ -101,6 +102,21 @@ def rowsBetween(start, end): offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from index 4 to index 7. + .. versionadded:: 2.1.0 + + Parameters + ---------- + start : int + boundary start, inclusive. + The frame is unbounded if this is ``Window.unboundedPreceding``, or + any value less than or equal to -9223372036854775808. + end : int + boundary end, inclusive. + The frame is unbounded if this is ``Window.unboundedFollowing``, or + any value greater than or equal to 9223372036854775807. + + Examples + -------- >>> from pyspark.sql import Window >>> from pyspark.sql import functions as func >>> from pyspark.sql import SQLContext @@ -121,12 +137,6 @@ def rowsBetween(start, end): | 3| b| 3| +---+--------+---+ - :param start: boundary start, inclusive. - The frame is unbounded if this is ``Window.unboundedPreceding``, or - any value less than or equal to -9223372036854775808. - :param end: boundary end, inclusive. - The frame is unbounded if this is ``Window.unboundedFollowing``, or - any value greater than or equal to 9223372036854775807. """ if start <= Window._PRECEDING_THRESHOLD: start = Window.unboundedPreceding @@ -137,7 +147,6 @@ def rowsBetween(start, end): return WindowSpec(jspec) @staticmethod - @since(2.1) def rangeBetween(start, end): """ Creates a :class:`WindowSpec` with the frame boundaries defined, @@ -160,6 +169,21 @@ def rangeBetween(start, end): unbounded, because no value modification is needed, in this case multiple and non-numeric ORDER BY expression are allowed. + .. versionadded:: 2.1.0 + + Parameters + ---------- + start : int + boundary start, inclusive. + The frame is unbounded if this is ``Window.unboundedPreceding``, or + any value less than or equal to max(-sys.maxsize, -9223372036854775808). + end : int + boundary end, inclusive. + The frame is unbounded if this is ``Window.unboundedFollowing``, or + any value greater than or equal to min(sys.maxsize, 9223372036854775807). + + Examples + -------- >>> from pyspark.sql import Window >>> from pyspark.sql import functions as func >>> from pyspark.sql import SQLContext @@ -180,12 +204,6 @@ def rangeBetween(start, end): | 3| b| 3| +---+--------+---+ - :param start: boundary start, inclusive. - The frame is unbounded if this is ``Window.unboundedPreceding``, or - any value less than or equal to max(-sys.maxsize, -9223372036854775808). - :param end: boundary end, inclusive. - The frame is unbounded if this is ``Window.unboundedFollowing``, or - any value greater than or equal to min(sys.maxsize, 9223372036854775807). """ if start <= Window._PRECEDING_THRESHOLD: start = Window.unboundedPreceding @@ -203,31 +221,38 @@ class WindowSpec(object): Use the static methods in :class:`Window` to create a :class:`WindowSpec`. - .. versionadded:: 1.4 + .. versionadded:: 1.4.0 """ def __init__(self, jspec): self._jspec = jspec - @since(1.4) def partitionBy(self, *cols): """ Defines the partitioning columns in a :class:`WindowSpec`. - :param cols: names of columns or expressions + .. versionadded:: 1.4.0 + + Parameters + ---------- + cols : str, :class:`Column` or list + names of columns or expressions """ return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols))) - @since(1.4) def orderBy(self, *cols): """ Defines the ordering columns in a :class:`WindowSpec`. - :param cols: names of columns or expressions + .. versionadded:: 1.4.0 + + Parameters + ---------- + cols : str, :class:`Column` or list + names of columns or expressions """ return WindowSpec(self._jspec.orderBy(_to_java_cols(cols))) - @since(1.4) def rowsBetween(self, start, end): """ Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). @@ -240,12 +265,18 @@ def rowsBetween(self, start, end): and ``Window.currentRow`` to specify special boundary values, rather than using integral values directly. - :param start: boundary start, inclusive. - The frame is unbounded if this is ``Window.unboundedPreceding``, or - any value less than or equal to max(-sys.maxsize, -9223372036854775808). - :param end: boundary end, inclusive. - The frame is unbounded if this is ``Window.unboundedFollowing``, or - any value greater than or equal to min(sys.maxsize, 9223372036854775807). + .. versionadded:: 1.4.0 + + Parameters + ---------- + start : int + boundary start, inclusive. + The frame is unbounded if this is ``Window.unboundedPreceding``, or + any value less than or equal to max(-sys.maxsize, -9223372036854775808). + end : int + boundary end, inclusive. + The frame is unbounded if this is ``Window.unboundedFollowing``, or + any value greater than or equal to min(sys.maxsize, 9223372036854775807). """ if start <= Window._PRECEDING_THRESHOLD: start = Window.unboundedPreceding @@ -253,7 +284,6 @@ def rowsBetween(self, start, end): end = Window.unboundedFollowing return WindowSpec(self._jspec.rowsBetween(start, end)) - @since(1.4) def rangeBetween(self, start, end): """ Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). @@ -266,12 +296,18 @@ def rangeBetween(self, start, end): and ``Window.currentRow`` to specify special boundary values, rather than using integral values directly. - :param start: boundary start, inclusive. - The frame is unbounded if this is ``Window.unboundedPreceding``, or - any value less than or equal to max(-sys.maxsize, -9223372036854775808). - :param end: boundary end, inclusive. - The frame is unbounded if this is ``Window.unboundedFollowing``, or - any value greater than or equal to min(sys.maxsize, 9223372036854775807). + .. versionadded:: 1.4.0 + + Parameters + ---------- + start : int + boundary start, inclusive. + The frame is unbounded if this is ``Window.unboundedPreceding``, or + any value less than or equal to max(-sys.maxsize, -9223372036854775808). + end : int + boundary end, inclusive. + The frame is unbounded if this is ``Window.unboundedFollowing``, or + any value greater than or equal to min(sys.maxsize, 9223372036854775807). """ if start <= Window._PRECEDING_THRESHOLD: start = Window.unboundedPreceding diff --git a/python/pyspark/sql/window.pyi b/python/pyspark/sql/window.pyi index 4e31d57bec4d0..4fdc468df112a 100644 --- a/python/pyspark/sql/window.pyi +++ b/python/pyspark/sql/window.pyi @@ -15,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from typing import List, Union from pyspark.sql._typing import ColumnOrName from py4j.java_gateway import JavaObject # type: ignore[import] @@ -24,9 +25,9 @@ class Window: unboundedFollowing: int currentRow: int @staticmethod - def partitionBy(*cols: ColumnOrName) -> WindowSpec: ... + def partitionBy(*cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ... @staticmethod - def orderBy(*cols: ColumnOrName) -> WindowSpec: ... + def orderBy(*cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ... @staticmethod def rowsBetween(start: int, end: int) -> WindowSpec: ... @staticmethod @@ -34,7 +35,7 @@ class Window: class WindowSpec: def __init__(self, jspec: JavaObject) -> None: ... - def partitionBy(self, *cols: ColumnOrName) -> WindowSpec: ... - def orderBy(self, *cols: ColumnOrName) -> WindowSpec: ... + def partitionBy(self, *cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ... + def orderBy(self, *cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ... def rowsBetween(self, start: int, end: int) -> WindowSpec: ... def rangeBetween(self, start: int, end: int) -> WindowSpec: ... From 27d81369342c19bae558329ddd0e2542554433f9 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 2 Nov 2020 22:23:26 -0800 Subject: [PATCH 0378/1009] [SPARK-33324][K8S][BUILD] Upgrade kubernetes-client to 4.11.1 ### What changes were proposed in this pull request? This PR aims to upgrade `Kubernetes-client` from 4.10.3 to 4.11.1. ### Why are the changes needed? This upgrades the dependency for Apache Spark 3.1.0. Since 4.12.0 is still new and has a breaking API changes, this PR chooses the latest compatible one. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the all CIs including K8s IT. Closes #30233 from dongjoon-hyun/SPARK-33324. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 44 +++++++++---------- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 44 +++++++++---------- resource-managers/kubernetes/core/pom.xml | 2 +- .../kubernetes/integration-tests/pom.xml | 2 +- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index b0b215a316df2..1cd4ee94997f8 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -108,7 +108,7 @@ jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar -jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar +jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar @@ -155,26 +155,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar -kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar -kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar -kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar -kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar -kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar -kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar -kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar -kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar -kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar -kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar -kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar -kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar -kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar -kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar -kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar -kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar -kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar -kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar -kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar +kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar +kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar +kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar +kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar +kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar +kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar +kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar +kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar +kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar +kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar +kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar +kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar +kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar +kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar +kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar +kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar +kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar +kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar +kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar +kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar @@ -195,7 +195,7 @@ objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -openshift-model/4.10.3//openshift-model-4.10.3.jar +openshift-model/4.11.1//openshift-model-4.11.1.jar orc-core/1.5.12//orc-core-1.5.12.jar orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar orc-shims/1.5.12//orc-shims-1.5.12.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b64c7989a4e02..198e939820fcd 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -85,7 +85,7 @@ jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar -jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar +jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar @@ -125,26 +125,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar -kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar -kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar -kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar -kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar -kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar -kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar -kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar -kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar -kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar -kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar -kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar -kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar -kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar -kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar -kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar -kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar -kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar -kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar -kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar +kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar +kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar +kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar +kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar +kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar +kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar +kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar +kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar +kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar +kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar +kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar +kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar +kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar +kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar +kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar +kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar +kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar +kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar +kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar +kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar @@ -165,7 +165,7 @@ objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -openshift-model/4.10.3//openshift-model-4.10.3.jar +openshift-model/4.11.1//openshift-model-4.11.1.jar orc-core/1.5.12//orc-core-1.5.12.jar orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar orc-shims/1.5.12//orc-shims-1.5.12.jar diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index a4c80f551cdfc..9ae48f4da8b05 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -30,7 +30,7 @@ kubernetes - 4.10.3 + 4.11.1 diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 952081030f5f3..5274c0579eb05 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -28,7 +28,7 @@ 1.3.0 - 4.10.3 + 4.11.1 kubernetes-integration-tests From 4c8ee8856cb9714d433456fb0ce44dfebb00d83f Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 3 Nov 2020 22:50:59 +0900 Subject: [PATCH 0379/1009] [SPARK-33257][PYTHON][SQL] Support Column inputs in PySpark ordering functions (asc*, desc*) ### What changes were proposed in this pull request? This PR adds support for passing `Column`s as input to PySpark sorting functions. ### Why are the changes needed? According to SPARK-26979, PySpark functions should support both Column and str arguments, when possible. ### Does this PR introduce _any_ user-facing change? PySpark users can now provide both `Column` and `str` as an argument for `asc*` and `desc*` functions. ### How was this patch tested? New unit tests. Closes #30227 from zero323/SPARK-33257. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/sql/functions.py | 30 +++++++++++++++---- python/pyspark/sql/functions.pyi | 12 ++++---- python/pyspark/sql/tests/test_functions.py | 35 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 87b999dca76ec..86a88a5bf341e 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -119,7 +119,10 @@ def asc(col): """ Returns a sort expression based on the ascending order of the given column name. """ - return _invoke_function("asc", col) + return ( + col.asc() if isinstance(col, Column) + else _invoke_function("asc", col) + ) @since(1.3) @@ -127,7 +130,10 @@ def desc(col): """ Returns a sort expression based on the descending order of the given column name. """ - return _invoke_function("desc", col) + return ( + col.desc() if isinstance(col, Column) + else _invoke_function("desc", col) + ) @since(1.3) @@ -457,7 +463,10 @@ def asc_nulls_first(col): Returns a sort expression based on the ascending order of the given column name, and null values return before non-null values. """ - return _invoke_function("asc_nulls_first", col) + return ( + col.asc_nulls_first() if isinstance(col, Column) + else _invoke_function("asc_nulls_first", col) + ) @since(2.4) @@ -466,7 +475,10 @@ def asc_nulls_last(col): Returns a sort expression based on the ascending order of the given column name, and null values appear after non-null values. """ - return _invoke_function("asc_nulls_last", col) + return ( + col.asc_nulls_last() if isinstance(col, Column) + else _invoke_function("asc_nulls_last", col) + ) @since(2.4) @@ -475,7 +487,10 @@ def desc_nulls_first(col): Returns a sort expression based on the descending order of the given column name, and null values appear before non-null values. """ - return _invoke_function("desc_nulls_first", col) + return ( + col.desc_nulls_first() if isinstance(col, Column) + else _invoke_function("desc_nulls_first", col) + ) @since(2.4) @@ -484,7 +499,10 @@ def desc_nulls_last(col): Returns a sort expression based on the descending order of the given column name, and null values appear after non-null values. """ - return _invoke_function("desc_nulls_last", col) + return ( + col.desc_nulls_last() if isinstance(col, Column) + else _invoke_function("desc_nulls_last", col) + ) @since(1.6) diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index e395f5797bebd..281c1d75436c6 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -258,9 +258,9 @@ def map_zip_with( ) -> Column: ... def abs(col: ColumnOrName) -> Column: ... def acos(col: ColumnOrName) -> Column: ... -def asc(col: str) -> Column: ... -def asc_nulls_first(col: str) -> Column: ... -def asc_nulls_last(col: str) -> Column: ... +def asc(col: ColumnOrName) -> Column: ... +def asc_nulls_first(col: ColumnOrName) -> Column: ... +def asc_nulls_last(col: ColumnOrName) -> Column: ... def ascii(col: ColumnOrName) -> Column: ... def asin(col: ColumnOrName) -> Column: ... def atan(col: ColumnOrName) -> Column: ... @@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ... def cume_dist() -> Column: ... def degrees(col: ColumnOrName) -> Column: ... def dense_rank() -> Column: ... -def desc(col: str) -> Column: ... -def desc_nulls_first(col: str) -> Column: ... -def desc_nulls_last(col: str) -> Column: ... +def desc(col: ColumnOrName) -> Column: ... +def desc_nulls_first(col: ColumnOrName) -> Column: ... +def desc_nulls_last(col: ColumnOrName) -> Column: ... def exp(col: ColumnOrName) -> Column: ... def expm1(col: ColumnOrName) -> Column: ... def floor(col: ColumnOrName) -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index cc77b8d5dfe3e..32549343d938f 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -265,6 +265,41 @@ def test_approxQuantile(self): self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1)) self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1)) + def test_sorting_functions_with_column(self): + from pyspark.sql import functions + from pyspark.sql.column import Column + + funs = [ + functions.asc_nulls_first, functions.asc_nulls_last, + functions.desc_nulls_first, functions.desc_nulls_last + ] + exprs = [col("x"), "x"] + + for fun in funs: + for expr in exprs: + res = fun(expr) + self.assertIsInstance(res, Column) + self.assertIn( + f"""'x {fun.__name__.replace("_", " ").upper()}'""", + str(res) + ) + + for expr in exprs: + res = functions.asc(expr) + self.assertIsInstance(res, Column) + self.assertIn( + """'x ASC NULLS FIRST'""", + str(res) + ) + + for expr in exprs: + res = functions.desc(expr) + self.assertIsInstance(res, Column) + self.assertIn( + """'x DESC NULLS LAST'""", + str(res) + ) + def test_sort_with_nulls_order(self): from pyspark.sql import functions From 56c623e98c54fdb4d47c9264ae1b282ecb2b7291 Mon Sep 17 00:00:00 2001 From: neko Date: Tue, 3 Nov 2020 08:49:52 -0600 Subject: [PATCH 0380/1009] [SPARK-33284][WEB-UI] In the Storage UI page, clicking any field to sort the table will cause the header content to be lost ### What changes were proposed in this pull request? In the old version of spark in the storage UI page, the sorting function is normal, but sorting in the new version will cause the header content to be lost, So I try to fix the bug. ### Why are the changes needed? The header field of the table on the page is similar to the following, **note that each th contains the span attribute**: ```html .... Storage Level ..... ``` Since [PR#26136](https://github.com/apache/spark/pull/26136), if the `th` in the table itself contains the `span` attribute, the `span` will be deleted directly after clicking the sort, and the original header content will be lost. There are three problems in `sorttable.js`: 1. `sortrevind.class = "sorttable_sortrevind"` in [sorttab.js#107](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L107) and `sortfwdind.class = "sorttable_sortfwdind"` in [sorttab.js#125](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L125) sorttable_xx attribute should be assigned to`className` instead of `class`, as javascript uses `rowlists[j].className.search` rather than `rowlists[j].class.search` to determine whether the component has a sorting flag or not. 2. `rowlists[j].className.search(/\sorttable_sortrevind\b/)` in [sorttab.js#120](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L120) was wrong. The original intention is to search whether `className` contains the word `sorttable_sortrevind` , but the expression is wrong, it should be `\bsorttable_sortrevind\b` instead of `\sorttable_sortrevind\b` 3. The if check statement in the following code snippet ([sorttab.js#141](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L141)) was wrong. **If the `search` function does not find the target, it will return -1, but Boolean(-1) is actually equals true**. This statement will cause span to be deleted even if it does not contain `sorttable_sortfwdind` or `sorttable_sortrevind`. ```javascript rowlists = this.parentNode.getElementsByTagName("span"); for (var j=0; j < rowlists.length; j++) { if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) || rowlists[j].className.search(/\sorttable_sortrevind\b/) ) { rowlists[j].parentNode.removeChild(rowlists[j]); } } ``` ### Does this PR introduce _any_ user-facing change? NO. ### How was this patch tested? The manual test result of the ui page is as below: ![fix sorted](https://user-images.githubusercontent.com/52202080/97543194-daeaa680-1a02-11eb-8b11-8109c3e4e9a3.gif) Closes #30182 from akiyamaneko/ui_storage_sort_error. Authored-by: neko Signed-off-by: Sean Owen --- .../org/apache/spark/ui/static/sorttable.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index ecd580e5c64aa..3f98a0379dc3c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -99,12 +99,12 @@ sorttable = { 'sorttable_sorted_reverse'); rowlists = this.parentNode.getElementsByTagName("span"); for (var j=0; j < rowlists.length; j++) { - if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)) { + if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) != -1) { rowlists[j].parentNode.removeChild(rowlists[j]); } } sortrevind = document.createElement('span'); - sortrevind.class = "sorttable_sortrevind"; + sortrevind.className = "sorttable_sortrevind"; sortrevind.innerHTML = stIsIE ? ' 5' : ' ▾'; this.appendChild(sortrevind); return; @@ -117,12 +117,12 @@ sorttable = { 'sorttable_sorted'); rowlists = this.parentNode.getElementsByTagName("span"); for (var j=0; j < rowlists.length; j++) { - if (rowlists[j].className.search(/\sorttable_sortrevind\b/)) { + if (rowlists[j].className.search(/\bsorttable_sortrevind\b/) != -1) { rowlists[j].parentNode.removeChild(rowlists[j]); } } sortfwdind = document.createElement('span'); - sortfwdind.class = "sorttable_sortfwdind"; + sortfwdind.className = "sorttable_sortfwdind"; sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▴'; this.appendChild(sortfwdind); return; @@ -138,15 +138,15 @@ sorttable = { }); rowlists = this.parentNode.getElementsByTagName("span"); for (var j=0; j < rowlists.length; j++) { - if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) - || rowlists[j].className.search(/\sorttable_sortrevind\b/) ) { + if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) != -1 + || rowlists[j].className.search(/\bsorttable_sortrevind\b/) != -1) { rowlists[j].parentNode.removeChild(rowlists[j]); } } this.className += ' sorttable_sorted'; sortfwdind = document.createElement('span'); - sortfwdind.class = "sorttable_sortfwdind"; + sortfwdind.className = "sorttable_sortfwdind"; sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▴'; this.appendChild(sortfwdind); From d900c6ff49ed898163f562d1211743decb75c601 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 3 Nov 2020 14:53:01 -0800 Subject: [PATCH 0381/1009] [SPARK-33293][SQL][FOLLOW-UP] Rename TableWriteExec to TableWriteExecHelper ### What changes were proposed in this pull request? Rename `TableWriteExec` in `WriteToDataSourceV2Exec.scala` to `TableWriteExecHelper`. ### Why are the changes needed? See [discussion](https://github.com/apache/spark/pull/30193#discussion_r516412653). The former is too general. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30235 from sunchao/SPARK-33293-2. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/WriteToDataSourceV2Exec.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index efa2c31e07602..1421a9315c3a8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -66,7 +66,7 @@ case class CreateTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - ifNotExists: Boolean) extends TableWriteExec { + ifNotExists: Boolean) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { @@ -100,7 +100,7 @@ case class AtomicCreateTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - ifNotExists: Boolean) extends TableWriteExec { + ifNotExists: Boolean) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { @@ -134,7 +134,7 @@ case class ReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends TableWriteExec { + orCreate: Boolean) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { // Note that this operation is potentially unsafe, but these are the strict semantics of @@ -176,7 +176,7 @@ case class AtomicReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends TableWriteExec { + orCreate: Boolean) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { val schema = query.schema.asNullable @@ -432,7 +432,7 @@ object DataWritingSparkTask extends Logging { } } -private[v2] trait TableWriteExec extends V2TableWriteExec with SupportsV1Write { +private[v2] trait TableWriteExecHelper extends V2TableWriteExec with SupportsV1Write { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper protected def writeToTable( From 034070a23aa8bcecc351bb2fec413e1662dcbb75 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 4 Nov 2020 12:30:38 +0800 Subject: [PATCH 0382/1009] Revert "[SPARK-33248][SQL] Add a configuration to control the legacy behavior of whether need to pad null value when value size less then schema size" This reverts commit 0c943cd2fbc6f2d25588991613abf469ace0153e. --- docs/sql-migration-guide.md | 2 -- .../org/apache/spark/sql/internal/SQLConf.scala | 15 --------------- .../execution/BaseScriptTransformationExec.scala | 10 ++-------- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 319e72172d597..fdc764a93424b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -51,8 +51,6 @@ license: | - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`. - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. - - - In Spark 3.1, when `spark.sql.legacy.transformationPadNullWhenValueLessThenSchema` is true, Spark will pad NULL value when script transformation's output value size less then schema size in default-serde mode(script transformation with row format of `ROW FORMAT DELIMITED`). If false, Spark will keep original behavior to throw `ArrayIndexOutOfBoundsException`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 8825f4f96378d..21357a492e39e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2765,18 +2765,6 @@ object SQLConf { .checkValue(_ > 0, "The timeout value must be positive") .createWithDefault(10L) - val LEGACY_SCRIPT_TRANSFORM_PAD_NULL = - buildConf("spark.sql.legacy.transformationPadNullWhenValueLessThenSchema") - .internal() - .doc("Whether pad null value when transformation output's value size less then " + - "schema size in default-serde mode(script transformation with row format of " + - "`ROW FORMAT DELIMITED`)." + - "When true, Spark will pad NULL value to keep same behavior with hive." + - "When false, Spark keep original behavior to throw `ArrayIndexOutOfBoundsException`") - .version("3.1.0") - .booleanConf - .createWithDefault(true) - val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP = buildConf("spark.sql.legacy.allowCastNumericToTimestamp") .internal() @@ -3505,9 +3493,6 @@ class SQLConf extends Serializable with Logging { def legacyAllowModifyActiveSession: Boolean = getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION) - def legacyPadNullWhenValueLessThenSchema: Boolean = - getConf(SQLConf.LEGACY_SCRIPT_TRANSFORM_PAD_NULL) - def legacyAllowCastNumericToTimestamp: Boolean = getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index f2cddc7ba7290..74e5aa716ad67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -104,16 +104,10 @@ trait BaseScriptTransformationExec extends UnaryExecNode { val reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)) val outputRowFormat = ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD") - - val padNull = if (conf.legacyPadNullWhenValueLessThenSchema) { - (arr: Array[String], size: Int) => arr.padTo(size, null) - } else { - (arr: Array[String], size: Int) => arr - } val processRowWithoutSerde = if (!ioschema.schemaLess) { prevLine: String => new GenericInternalRow( - padNull(prevLine.split(outputRowFormat), outputFieldWriters.size) + prevLine.split(outputRowFormat).padTo(outputFieldWriters.size, null) .zip(outputFieldWriters) .map { case (data, writer) => writer(data) }) } else { @@ -124,7 +118,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode { val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => new GenericInternalRow( - padNull(prevLine.split(outputRowFormat).slice(0, 2), 2) + prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null) .map(kvWriter)) } From 1740b29b3f006abd08bc01b0ca807c3721d4bb0e Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 4 Nov 2020 05:01:39 +0000 Subject: [PATCH 0383/1009] [SPARK-33323][SQL] Add query resolved check before convert hive relation ### What changes were proposed in this pull request? Add query.resolved before convert hive relation. ### Why are the changes needed? For better error msg. ``` CREATE TABLE t STORED AS PARQUET AS SELECT * FROM ( SELECT c3 FROM ( SELECT c1, c2 from values(1,2) t(c1, c2) ) ) ``` Before this PR, we get such error msg ``` org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to toAttribute on unresolved object, tree: * at org.apache.spark.sql.catalyst.analysis.Star.toAttribute(unresolved.scala:244) at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52) at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.List.foreach(List.scala:392) ``` ### Does this PR introduce _any_ user-facing change? Yes, error msg changed. ### How was this patch tested? Add test. Closes #30230 from ulysses-you/SPARK-33323. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../spark/sql/hive/HiveStrategies.scala | 5 +++-- .../spark/sql/hive/HiveParquetSuite.scala | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index f91f78616abf5..e9f0461e6d1a8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -221,8 +221,9 @@ case class RelationConversions( // CTAS case CreateTable(tableDesc, mode, Some(query)) - if DDLUtils.isHiveTable(tableDesc) && tableDesc.partitionColumnNames.isEmpty && - isConvertible(tableDesc) && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) => + if query.resolved && DDLUtils.isHiveTable(tableDesc) && + tableDesc.partitionColumnNames.isEmpty && isConvertible(tableDesc) && + SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) => // validation is required to be done here before relation conversion. DDLUtils.checkDataColNames(tableDesc.copy(schema = query.schema)) // This is for CREATE TABLE .. STORED AS PARQUET/ORC AS SELECT null diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 470c6a342b4dd..df96b0675cc2d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive -import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.execution.datasources.parquet.ParquetTest import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf @@ -106,4 +106,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("SPARK-33323: Add query resolved check before convert hive relation") { + withTable("t") { + val msg = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE t STORED AS PARQUET AS + |SELECT * FROM ( + | SELECT c3 FROM ( + | SELECT c1, c2 from values(1,2) t(c1, c2) + | ) + |) + """.stripMargin) + }.getMessage + assert(msg.contains("cannot resolve '`c3`' given input columns")) + } + } } From 0ad35ba5f8bd6413669b568de659334bb9a3fb44 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 4 Nov 2020 06:50:37 +0000 Subject: [PATCH 0384/1009] [SPARK-33321][SQL] Migrate ANALYZE TABLE commands to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `ANALYZE TABLE` and `ANALYZE TABLE ... FOR COLUMNS` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `ANALYZE TABLE` is not supported for v2 tables. ### Why are the changes needed? The changes allow consistent resolution behavior when resolving the table/view identifier. For example, the following is the current behavior: ```scala sql("create temporary view t as select 1") sql("create database db") sql("create table db.t using csv as select 1") sql("use db") sql("ANALYZE TABLE t compute statistics") // Succeeds ``` With this change, ANALYZE TABLE above fails with the following: ``` org.apache.spark.sql.AnalysisException: t is a temp view not table or permanent view.; line 1 pos 0 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$40(Analyzer.scala:872) at scala.Option.map(Option.scala:230) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.applyOrElse(Analyzer.scala:870) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.applyOrElse(Analyzer.scala:856) ``` , which is expected since temporary view is resolved first and ANALYZE TABLE doesn't support a temporary view. ### Does this PR introduce _any_ user-facing change? After this PR, `ANALYZE TABLE t` is resolved to a temp view `t` instead of table `db.t`. ### How was this patch tested? Updated existing tests. Closes #30229 from imback82/parse_v1table. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 13 +++-- .../catalyst/analysis/v2ResolutionPlans.scala | 4 +- .../sql/catalyst/parser/AstBuilder.scala | 15 +++-- .../catalyst/plans/logical/statements.scala | 19 ------ .../catalyst/plans/logical/v2Commands.scala | 22 +++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 58 +++++++++++++------ .../analysis/ResolveSessionCatalog.scala | 36 ++++++------ .../datasources/v2/DataSourceV2Strategy.scala | 5 +- .../sql-tests/results/describe.sql.out | 2 +- .../spark/sql/StatisticsCollectionSuite.scala | 4 +- .../sql/connector/DataSourceV2SQLSuite.scala | 11 +++- .../spark/sql/execution/SQLViewSuite.scala | 5 +- 12 files changed, 120 insertions(+), 74 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 10fe5314b0ef9..69cf30c34d494 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -865,9 +865,14 @@ class Analyzer( u.failAnalysis(s"${ident.quoted} is a temp view not table.") } u - case u @ UnresolvedTableOrView(ident) => + case u @ UnresolvedTableOrView(ident, allowTempView) => lookupTempView(ident) - .map(_ => ResolvedView(ident.asIdentifier, isTemp = true)) + .map { _ => + if (!allowTempView) { + u.failAnalysis(s"${ident.quoted} is a temp view not table or permanent view.") + } + ResolvedView(ident.asIdentifier, isTemp = true) + } .getOrElse(u) } @@ -926,7 +931,7 @@ class Analyzer( .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) - case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident)) => + case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _) => CatalogV2Util.loadTable(catalog, ident) .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) @@ -1026,7 +1031,7 @@ class Analyzer( case table => table }.getOrElse(u) - case u @ UnresolvedTableOrView(identifier) => + case u @ UnresolvedTableOrView(identifier, _) => lookupTableOrView(identifier).getOrElse(u) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 1344d78838e1c..fcf4a438eb19c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -46,7 +46,9 @@ case class UnresolvedTable(multipartIdentifier: Seq[String]) extends LeafNode { * Holds the name of a table or view that has yet to be looked up in a catalog. It will * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis. */ -case class UnresolvedTableOrView(multipartIdentifier: Seq[String]) extends LeafNode { +case class UnresolvedTableOrView( + multipartIdentifier: Seq[String], + allowTempView: Boolean = true) extends LeafNode { override lazy val resolved: Boolean = false override def output: Seq[Attribute] = Nil } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f28375c8d7a4a..c5e8429d49427 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3216,7 +3216,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create an [[AnalyzeTableStatement]], or an [[AnalyzeColumnStatement]]. + * Create an [[AnalyzeTable]], or an [[AnalyzeColumn]]. * Example SQL for analyzing a table or a set of partitions : * {{{ * ANALYZE TABLE multi_part_name [PARTITION (partcol1[=val1], partcol2[=val2], ...)] @@ -3249,18 +3249,23 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val tableName = visitMultipartIdentifier(ctx.multipartIdentifier()) if (ctx.ALL() != null) { checkPartitionSpec() - AnalyzeColumnStatement(tableName, None, allColumns = true) + AnalyzeColumn(UnresolvedTableOrView(tableName), None, allColumns = true) } else if (ctx.identifierSeq() == null) { val partitionSpec = if (ctx.partitionSpec != null) { visitPartitionSpec(ctx.partitionSpec) } else { Map.empty[String, Option[String]] } - AnalyzeTableStatement(tableName, partitionSpec, noScan = ctx.identifier != null) + AnalyzeTable( + UnresolvedTableOrView(tableName, allowTempView = false), + partitionSpec, + noScan = ctx.identifier != null) } else { checkPartitionSpec() - AnalyzeColumnStatement( - tableName, Option(visitIdentifierSeq(ctx.identifierSeq())), allColumns = false) + AnalyzeColumn( + UnresolvedTableOrView(tableName), + Option(visitIdentifierSeq(ctx.identifierSeq())), + allColumns = false) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 3a534b2eb8ceb..e711a6ad434d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -342,25 +342,6 @@ case class CreateNamespaceStatement( */ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement -/** - * An ANALYZE TABLE statement, as parsed from SQL. - */ -case class AnalyzeTableStatement( - tableName: Seq[String], - partitionSpec: Map[String, Option[String]], - noScan: Boolean) extends ParsedStatement - -/** - * An ANALYZE TABLE FOR COLUMNS statement, as parsed from SQL. - */ -case class AnalyzeColumnStatement( - tableName: Seq[String], - columnNames: Option[Seq[String]], - allColumns: Boolean) extends ParsedStatement { - require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " + - "mutually exclusive. Only one of them should be specified.") -} - /** * A REPAIR TABLE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 96cb096ff97c9..a1e26ae1ba2c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -567,3 +567,25 @@ case class ShowFunctions( pattern: Option[String]) extends Command { override def children: Seq[LogicalPlan] = child.toSeq } + +/** + * The logical plan of the ANALYZE TABLE command that works for v2 catalogs. + */ +case class AnalyzeTable( + child: LogicalPlan, + partitionSpec: Map[String, Option[String]], + noScan: Boolean) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + +/** + * The logical plan of the ANALYZE TABLE FOR COLUMNS command that works for v2 catalogs. + */ +case class AnalyzeColumn( + child: LogicalPlan, + columnNames: Option[Seq[String]], + allColumns: Boolean) extends Command { + require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " + + "mutually exclusive. Only one of them should be specified.") + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index a81f9e16083d6..aca7602bdbcb0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1502,42 +1502,59 @@ class DDLParserSuite extends AnalysisTest { test("analyze table statistics") { comparePlans(parsePlan("analyze table a.b.c compute statistics"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = false)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map.empty, noScan = false)) comparePlans(parsePlan("analyze table a.b.c compute statistics noscan"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map.empty, noScan = true)) comparePlans(parsePlan("analyze table a.b.c partition (a) compute statistics nOscAn"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map("a" -> None), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("a" -> None), noScan = true)) // Partitions specified comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS"), - AnalyzeTableStatement( - Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan"), - AnalyzeTableStatement( - Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09")), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> Some("2008-04-09")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS"), - AnalyzeTableStatement( - Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan"), - AnalyzeTableStatement( - Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr=11) COMPUTE STATISTICS noscan"), - AnalyzeTableStatement( - Seq("a", "b", "c"), Map("ds" -> None, "hr" -> Some("11")), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> None, "hr" -> Some("11")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = false)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> None, "hr" -> None), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS noscan"), - AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = true)) + AnalyzeTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + Map("ds" -> None, "hr" -> None), noScan = true)) intercept("analyze table a.b.c compute statistics xxxx", "Expected `NOSCAN` instead of `xxxx`") @@ -1550,7 +1567,8 @@ class DDLParserSuite extends AnalysisTest { comparePlans( parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"), - AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false)) + AnalyzeColumn( + UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false)) // Partition specified - should be ignored comparePlans( @@ -1559,7 +1577,8 @@ class DDLParserSuite extends AnalysisTest { |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10') |COMPUTE STATISTICS FOR COLUMNS key, value """.stripMargin), - AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false)) + AnalyzeColumn( + UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false)) // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS comparePlans( @@ -1568,7 +1587,8 @@ class DDLParserSuite extends AnalysisTest { |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10') |COMPUTE STATISTICS FOR ALL COLUMNS """.stripMargin), - AnalyzeColumnStatement(Seq("a", "b", "c"), None, allColumns = true)) + AnalyzeColumn( + UnresolvedTableOrView(Seq("a", "b", "c")), None, allColumns = true)) intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value", "mismatched input 'key' expecting {, ';'}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index f35eb41fe2ce1..610632ac9256e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -22,12 +22,11 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType} /** @@ -255,19 +254,11 @@ class ResolveSessionCatalog( case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) - case DescribeRelation(r @ ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended) - if isSessionCatalog(r.catalog) => - DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) - // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet. - case DescribeRelation(ResolvedView(ident, _), partitionSpec, isExtended) => + case DescribeRelation(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) => DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) - case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended) - if isSessionCatalog(r.catalog) => - DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended) - - case DescribeColumn(ResolvedView(ident, _), colNameParts, isExtended) => + case DescribeColumn(ResolvedV1TableOrViewIdentifier(ident), colNameParts, isExtended) => DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended) // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the @@ -419,17 +410,16 @@ class ResolveSessionCatalog( } ShowTablesCommand(db, Some(pattern), true, partitionsSpec) - case AnalyzeTableStatement(tbl, partitionSpec, noScan) => - val v1TableName = parseV1Table(tbl, "ANALYZE TABLE") + // ANALYZE TABLE works on permanent views if the views are cached. + case AnalyzeTable(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, noScan) => if (partitionSpec.isEmpty) { - AnalyzeTableCommand(v1TableName.asTableIdentifier, noScan) + AnalyzeTableCommand(ident.asTableIdentifier, noScan) } else { - AnalyzePartitionCommand(v1TableName.asTableIdentifier, partitionSpec, noScan) + AnalyzePartitionCommand(ident.asTableIdentifier, partitionSpec, noScan) } - case AnalyzeColumnStatement(tbl, columnNames, allColumns) => - val v1TableName = parseTempViewOrV1Table(tbl, "ANALYZE TABLE") - AnalyzeColumnCommand(v1TableName.asTableIdentifier, columnNames, allColumns) + case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) => + AnalyzeColumnCommand(ident.asTableIdentifier, columnNames, allColumns) case RepairTableStatement(tbl) => val v1TableName = parseV1Table(tbl, "MSCK REPAIR TABLE") @@ -706,6 +696,14 @@ class ResolveSessionCatalog( } } + object ResolvedV1TableOrViewIdentifier { + def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { + case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident) + case ResolvedView(ident, _) => Some(ident) + case _ => None + } + } + private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = { if (colName.length > 1) { throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 81a36dee58389..4bb58142b3d19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -231,7 +231,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DropTable(r: ResolvedTable, ifExists, _) => DropTableExec(r.catalog, r.identifier, ifExists) :: Nil - case NoopDropTable(multipartIdentifier) => + case _: NoopDropTable => LocalTableScanExec(Nil, Nil) :: Nil case AlterTable(catalog, ident, _, changes) => @@ -280,6 +280,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case r @ ShowTableProperties(rt: ResolvedTable, propertyKey) => ShowTablePropertiesExec(r.output, rt.table, propertyKey) :: Nil + case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) => + throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index a7de033e3a1ac..07aed98d120f9 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -540,7 +540,7 @@ struct -- !query output == Parsed Logical Plan == 'DescribeRelation false -+- 'UnresolvedTableOrView [t] ++- 'UnresolvedTableOrView [t], true == Analyzed Logical Plan == col_name: string, data_type: string, comment: string diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 18356a4de9ef4..b016cc3f57e0d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -540,10 +540,10 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared test("analyzes column statistics in cached global temporary view") { withGlobalTempView("gTempView") { val globalTempDB = spark.sharedState.globalTempViewManager.database - val errMsg1 = intercept[NoSuchTableException] { + val errMsg1 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id") }.getMessage - assert(errMsg1.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'")) + assert(errMsg1.contains(s"Table or view not found: $globalTempDB.gTempView")) // Analyzes in a global temporary view sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)") val errMsg2 = intercept[AnalysisException] { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 298c07059ff44..893ee5f130cda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2010,8 +2010,8 @@ class DataSourceV2SQLSuite val t = "testcat.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - testV1Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS") - testV1CommandSupportingTempView("ANALYZE TABLE", s"$t COMPUTE STATISTICS FOR ALL COLUMNS") + testNotSupportedV2Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS") + testNotSupportedV2Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS FOR ALL COLUMNS") } } @@ -2606,6 +2606,13 @@ class DataSourceV2SQLSuite } } + private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { + val e = intercept[AnalysisException] { + sql(s"$sqlCommand $sqlParams") + } + assert(e.message.contains(s"$sqlCommand is not supported for v2 tables")) + } + private def testV1Command(sqlCommand: String, sqlParams: String): Unit = { val e = intercept[AnalysisException] { sql(s"$sqlCommand $sqlParams") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index f3cae24527d60..7a6b0b8d6dd9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -175,7 +175,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { }.getMessage assert(e2.contains("SHOW CREATE TABLE is not supported on a temporary view")) assertNoSuchTable(s"SHOW PARTITIONS $viewName") - assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") + val e3 = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") + }.getMessage + assert(e3.contains(s"$viewName is a temp view not table or permanent view")) assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") } } From ff724d23b696b2c4232be5daf31eed569779d720 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Wed, 4 Nov 2020 06:51:54 +0000 Subject: [PATCH 0385/1009] [SPARK-33214][TEST][HIVE] Stop HiveExternalCatalogVersionsSuite from using a hard-coded location to store localized Spark binaries ### What changes were proposed in this pull request? This PR changes `HiveExternalCatalogVersionsSuite` to, by default, use a standard temporary directory to store the Spark binaries that it localizes. It additionally adds a new System property, `spark.test.cache-dir`, which can be used to define a static location into which the Spark binary will be localized to allow for sharing between test executions. If the System property is used, the downloaded binaries won't be deleted after the test runs. ### Why are the changes needed? In SPARK-22356 (PR #19579), the `sparkTestingDir` used by `HiveExternalCatalogVersionsSuite` became hard-coded to enable re-use of the downloaded Spark tarball between test executions: ``` // For local test, you can set `sparkTestingDir` to a static value like `/tmp/test-spark`, to // avoid downloading Spark of different versions in each run. private val sparkTestingDir = new File("/tmp/test-spark") ``` However this doesn't work, since it gets deleted every time: ``` override def afterAll(): Unit = { try { Utils.deleteRecursively(wareHousePath) Utils.deleteRecursively(tmpDataDir) Utils.deleteRecursively(sparkTestingDir) } finally { super.afterAll() } } ``` It's bad that we're hard-coding to a `/tmp` directory, as in some cases this is not the proper place to store temporary files. We're not currently making any good use of it. ### Does this PR introduce _any_ user-facing change? Developer-facing changes only, as this is in a test. ### How was this patch tested? The test continues to execute as expected. Closes #30122 from xkrogen/xkrogen-SPARK-33214-hiveexternalversioncatalogsuite-fix. Authored-by: Erik Krogen Signed-off-by: Wenchen Fan --- .../HiveExternalCatalogVersionsSuite.scala | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index b81b7e8ec0c0f..38a8c492d77a7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -42,26 +42,33 @@ import org.apache.spark.util.Utils * Test HiveExternalCatalog backward compatibility. * * Note that, this test suite will automatically download spark binary packages of different - * versions to a local directory `/tmp/spark-test`. If there is already a spark folder with - * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the - * downloading for this spark version. + * versions to a local directory. If the `spark.test.cache-dir` system property is defined, this + * directory will be used. If there is already a spark folder with expected version under this + * local directory, e.g. `/{cache-dir}/spark-2.0.3`, downloading for this spark version will be + * skipped. If the system property is not present, a temporary directory will be used and cleaned + * up after the test. */ @SlowHiveTest @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { + import HiveExternalCatalogVersionsSuite._ private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse") private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data") - // For local test, you can set `sparkTestingDir` to a static value like `/tmp/test-spark`, to + // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to // avoid downloading Spark of different versions in each run. - private val sparkTestingDir = new File("/tmp/test-spark") + private val sparkTestingDir = Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY)) + .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark")) private val unusedJar = TestUtils.createJarWithClasses(Seq.empty) override def afterAll(): Unit = { try { Utils.deleteRecursively(wareHousePath) Utils.deleteRecursively(tmpDataDir) - Utils.deleteRecursively(sparkTestingDir) + // Only delete sparkTestingDir if it wasn't defined to a static location by the system prop + if (Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY)).isEmpty) { + Utils.deleteRecursively(sparkTestingDir) + } } finally { super.afterAll() } @@ -307,3 +314,8 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { } } } + +object HiveExternalCatalogVersionsSuite { + private val SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY = "spark.test.cache-dir" +} + From 0b557b329046c66ee67a8c94c5bb95ffbe50e135 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 4 Nov 2020 17:39:06 +0900 Subject: [PATCH 0386/1009] [SPARK-33265][TEST] Rename classOf[Seq] to classOf[scala.collection.Seq] in PostgresIntegrationSuite for Scala 2.13 ### What changes were proposed in this pull request? This PR renames some part of `Seq` in `PostgresIntegrationSuite` to `scala.collection.Seq`. When I run `docker-integration-test`, I noticed that `PostgresIntegrationSuite` failed due to `ClassCastException`. The reason is the same as what is resolved in SPARK-29292. ### Why are the changes needed? To pass `docker-integration-test` for Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Ran `PostgresIntegrationSuite` fixed and confirmed it successfully finished. Closes #30166 from sarutak/fix-toseq-postgresql. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .../spark/sql/jdbc/PostgresIntegrationSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index de9c0660c51c1..fa13100b5fdc8 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -116,14 +116,14 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(classOf[java.lang.Boolean].isAssignableFrom(types(7))) assert(classOf[String].isAssignableFrom(types(8))) assert(classOf[String].isAssignableFrom(types(9))) - assert(classOf[Seq[Int]].isAssignableFrom(types(10))) - assert(classOf[Seq[String]].isAssignableFrom(types(11))) - assert(classOf[Seq[Double]].isAssignableFrom(types(12))) - assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(13))) + assert(classOf[scala.collection.Seq[Int]].isAssignableFrom(types(10))) + assert(classOf[scala.collection.Seq[String]].isAssignableFrom(types(11))) + assert(classOf[scala.collection.Seq[Double]].isAssignableFrom(types(12))) + assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(13))) assert(classOf[String].isAssignableFrom(types(14))) assert(classOf[java.lang.Float].isAssignableFrom(types(15))) assert(classOf[java.lang.Short].isAssignableFrom(types(16))) - assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(17))) + assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(17))) assert(rows(0).getString(0).equals("hello")) assert(rows(0).getInt(1) == 42) assert(rows(0).getDouble(2) == 1.25) From 42c0b175ce6ee4bf1104b6a8cef6bb6477693781 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 4 Nov 2020 08:35:10 -0800 Subject: [PATCH 0387/1009] [SPARK-33338][SQL] GROUP BY using literal map should not fail ### What changes were proposed in this pull request? This PR aims to fix `semanticEquals` works correctly on `GetMapValue` expressions having literal maps with `ArrayBasedMapData` and `GenericArrayData`. ### Why are the changes needed? This is a regression from Apache Spark 1.6.x. ```scala scala> sc.version res1: String = 1.6.3 scala> sqlContext.sql("SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]").show +---+ |_c0| +---+ | v1| +---+ ``` Apache Spark 2.x ~ 3.0.1 raise`RuntimeException` for the following queries. ```sql CREATE TABLE t USING ORC AS SELECT map('k1', 'v1') m, 'k1' k SELECT map('k1', 'v1')[k] FROM t GROUP BY 1 SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k] SELECT map('k1', 'v1')[k] a FROM t GROUP BY a ``` **BEFORE** ```scala Caused by: java.lang.RuntimeException: Couldn't find k#3 in [keys: [k1], values: [v1][k#3]#6] at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85) at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ``` **AFTER** ```sql spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY 1; v1 Time taken: 1.278 seconds, Fetched 1 row(s) spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]; v1 Time taken: 0.313 seconds, Fetched 1 row(s) spark-sql> SELECT map('k1', 'v1')[k] a FROM t GROUP BY a; v1 Time taken: 0.265 seconds, Fetched 1 row(s) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with the newly added test case. Closes #30246 from dongjoon-hyun/SPARK-33338. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/expressions/literals.scala | 2 ++ .../catalyst/expressions/ComplexTypeSuite.scala | 15 +++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 12 ++++++++++++ 3 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 9e96ab8a9b6ca..413d0af61a05c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -316,6 +316,8 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { (value, o.value) match { case (null, null) => true case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b) + case (a: ArrayBasedMapData, b: ArrayBasedMapData) => + a.keyArray == b.keyArray && a.valueArray == b.valueArray case (a, b) => a != null && a.equals(b) } case _ => false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index cdb83d3580f0a..38e32ff2518f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -471,4 +472,18 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx) assert(ctx.inlinedMutableStates.isEmpty) } + + test("SPARK-33338: semanticEquals should handle static GetMapValue correctly") { + val keys = new Array[UTF8String](1) + val values = new Array[UTF8String](1) + keys(0) = UTF8String.fromString("key") + values(0) = UTF8String.fromString("value") + + val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values)) + val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values)) + val m1 = GetMapValue(Literal.create(d1, MapType(StringType, StringType)), Literal("a")) + val m2 = GetMapValue(Literal.create(d2, MapType(StringType, StringType)), Literal("a")) + + assert(m1.semanticEquals(m2)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0dd2a286772a5..cebbf9282f710 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3706,6 +3706,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-33338: GROUP BY using literal map should not fail") { + withTempDir { dir => + sql(s"CREATE TABLE t USING ORC LOCATION '${dir.toURI}' AS SELECT map('k1', 'v1') m, 'k1' k") + Seq( + "SELECT map('k1', 'v1')[k] FROM t GROUP BY 1", + "SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]", + "SELECT map('k1', 'v1')[k] a FROM t GROUP BY a").foreach { statement => + checkAnswer(sql(statement), Row("v1")) + } + } + } } case class Foo(bar: Option[String]) From b7fff0397319efd2987d4cceff4f738f1c06409d Mon Sep 17 00:00:00 2001 From: Luca Canali Date: Wed, 4 Nov 2020 16:48:55 -0600 Subject: [PATCH 0388/1009] [SPARK-31711][CORE] Register the executor source with the metrics system when running in local mode ### What changes were proposed in this pull request? This PR proposes to register the executor source with the Spark metrics system when running in local mode. ### Why are the changes needed? The Apache Spark metrics system provides many useful insights on the Spark workload. In particular, the [executor source metrics](https://github.com/apache/spark/blob/master/docs/monitoring.md#component-instance--executor) provide detailed info, including the number of active tasks, I/O metrics, and several task metrics details. The executor source metrics, contrary to other sources (for example ExecutorMetrics source), is not available when running in local mode. Having executor metrics in local mode can be useful when testing and troubleshooting Spark workloads in a development environment. The metrics can be fed to a dashboard to see the evolution of resource usage and can be used to troubleshoot performance, as [in this example](https://github.com/cerndb/spark-dashboard). Currently users will have to deploy on a cluster to be able to collect executor source metrics, while the possibility of having them in local mode is handy for testing. ### Does this PR introduce _any_ user-facing change? - This PR exposes executor source metrics data when running in local mode. ### How was this patch tested? - Manually tested by running in local mode and inspecting the metrics listed in http://localhost:4040/metrics/json/ - Also added a test in `SourceConfigSuite` Closes #28528 from LucaCanali/metricsWithLocalMode. Authored-by: Luca Canali Signed-off-by: Thomas Graves --- .../main/scala/org/apache/spark/SparkContext.scala | 5 ++++- .../scala/org/apache/spark/executor/Executor.scala | 8 ++++++++ .../spark/metrics/source/SourceConfigSuite.scala | 12 ++++++++++++ docs/monitoring.md | 8 ++++++-- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b35768222437c..d68015454de9d 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -42,7 +42,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFor import org.apache.spark.annotation.DeveloperApi import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil} -import org.apache.spark.executor.{ExecutorMetrics, ExecutorMetricsSource} +import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource} import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat} import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ @@ -625,6 +625,9 @@ class SparkContext(config: SparkConf) extends Logging { // Post init _taskScheduler.postStartHook() + if (isLocal) { + _env.metricsSystem.registerSource(Executor.executorSourceLocalModeOnly) + } _env.metricsSystem.registerSource(_dagScheduler.metricsSource) _env.metricsSystem.registerSource(new BlockManagerSource(_env.blockManager)) _env.metricsSystem.registerSource(new JVMCPUSource()) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 6653650615192..1a0ad566633da 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -135,6 +135,11 @@ private[spark] class Executor( env.metricsSystem.registerSource(new JVMCPUSource()) executorMetricsSource.foreach(_.register(env.metricsSystem)) env.metricsSystem.registerSource(env.blockManager.shuffleMetricsSource) + } else { + // This enable the registration of the executor source in local mode. + // The actual registration happens in SparkContext, + // it cannot be done here as the appId is not available yet + Executor.executorSourceLocalModeOnly = executorSource } // Whether to load classes in user jars before those in Spark jars @@ -987,4 +992,7 @@ private[spark] object Executor { // task is fully deserialized. When possible, the TaskContext.getLocalProperty call should be // used instead. val taskDeserializationProps: ThreadLocal[Properties] = new ThreadLocal[Properties] + + // Used to store executorSource, for local mode only + var executorSourceLocalModeOnly: ExecutorSource = null } diff --git a/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala index 8f5ab7419d4f7..7da1403ecd4b5 100644 --- a/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala @@ -80,4 +80,16 @@ class SourceConfigSuite extends SparkFunSuite with LocalSparkContext { } } + test("SPARK-31711: Test executor source registration in local mode") { + val conf = new SparkConf() + val sc = new SparkContext("local", "test", conf) + try { + val metricsSystem = sc.env.metricsSystem + + // Executor source should be registered + assert (metricsSystem.getSourcesByName("executor").nonEmpty) + } finally { + sc.stop() + } + } } diff --git a/docs/monitoring.md b/docs/monitoring.md index 3513fed7b3d78..a07a113445981 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -1155,6 +1155,11 @@ This is the component with the largest amount of instrumented metrics - namespace=JVMCPU - jvmCpuTime +- namespace=executor + - **note:** These metrics are available in the driver in local mode only. + - A full list of available metrics in this + namespace can be found in the corresponding entry for the Executor component instance. + - namespace=ExecutorMetrics - **note:** these metrics are conditional to a configuration parameter: `spark.metrics.executorMetricsSource.enabled` (default is true) @@ -1167,8 +1172,7 @@ This is the component with the largest amount of instrumented metrics custom plugins into Spark. ### Component instance = Executor -These metrics are exposed by Spark executors. Note, currently they are not available -when running in local mode. +These metrics are exposed by Spark executors. - namespace=executor (metrics are of type counter or gauge) - bytesRead.count From d24dbe89557c6cdbe5c7a2b190ccd4e847757428 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 4 Nov 2020 15:05:35 -0800 Subject: [PATCH 0389/1009] [SPARK-33343][BUILD] Fix the build with sbt to copy hadoop-client-runtime.jar ### What changes were proposed in this pull request? This PR fix the issue that spark-shell doesn't work if it's built with `sbt package` (without any profiles specified). It's due to hadoop-client-runtime.jar isn't copied to assembly/target/scala-2.12/jars. ``` $ bin/spark-shell Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/shaded/com/ctc/wstx/io/InputBootstrapper at org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:426) at org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$2(SparkSubmit.scala:342) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:342) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:877) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.shaded.com.ctc.wstx.io.InputBootstrapper at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) ``` ### Why are the changes needed? This is a bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Ran spark-shell and confirmed it works. Closes #30250 from sarutak/copy-runtime-sbt. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2c6f458ee25fd..dd0c5f04e5875 100644 --- a/pom.xml +++ b/pom.xml @@ -1084,7 +1084,7 @@ org.apache.hadoop hadoop-client-runtime ${hadoop.version} - runtime + ${hadoop.deps.scope} org.apache.hadoop From 7e8eb0447bfca2e38040c974dce711659e613e3c Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 5 Nov 2020 11:50:11 +0900 Subject: [PATCH 0390/1009] [SPARK-33314][SQL] Avoid dropping rows in Avro reader ### What changes were proposed in this pull request? This PR adds a check to RowReader#hasNextRow such that multiple calls to RowReader#hasNextRow with no intervening call to RowReader#nextRow will avoid consuming more than 1 record. This PR also modifies RowReader#nextRow such that consecutive calls will return new rows (previously consecutive calls would return the same row). ### Why are the changes needed? SPARK-32346 slightly refactored the AvroFileFormat and AvroPartitionReaderFactory to use a new iterator-like trait called AvroUtils#RowReader. RowReader#hasNextRow consumes a raw input record and stores the deserialized row for the next call to RowReader#nextRow. Unfortunately, sometimes hasNextRow is called twice before nextRow is called, resulting in a lost row. For example (which assumes V1 Avro reader): ```scala val df = spark.range(0, 25).toDF("index") df.write.mode("overwrite").format("avro").save("index_avro") val loaded = spark.read.format("avro").load("index_avro") // The following will give the expected size loaded.collect.size // The following will give the wrong size loaded.orderBy("index").collect.size ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added tests, which fail without the fix. Closes #30221 from bersprockets/avro_iterator_play. Authored-by: Bruce Robbins Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/avro/AvroUtils.scala | 14 +++- .../org/apache/spark/sql/avro/AvroSuite.scala | 84 ++++++++++++++++++- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 3583b38a01333..51997acc6dffe 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -174,7 +174,7 @@ private[sql] object AvroUtils extends Logging { private[this] var currentRow: Option[InternalRow] = None def hasNextRow: Boolean = { - do { + while (!completed && currentRow.isEmpty) { val r = fileReader.hasNext && !fileReader.pastSync(stopPosition) if (!r) { fileReader.close() @@ -182,15 +182,21 @@ private[sql] object AvroUtils extends Logging { currentRow = None } else { val record = fileReader.next() + // the row must be deserialized in hasNextRow, because AvroDeserializer#deserialize + // potentially filters rows currentRow = deserializer.deserialize(record).asInstanceOf[Option[InternalRow]] } - } while (!completed && currentRow.isEmpty) - + } currentRow.isDefined } def nextRow: InternalRow = { - currentRow.getOrElse { + if (currentRow.isEmpty) { + hasNextRow + } + val returnRow = currentRow + currentRow = None // free up hasNextRow to consume more Avro records, if not exhausted + returnRow.getOrElse { throw new NoSuchElementException("next on empty iterator") } } diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 52cab880ab897..4f4af97f1299f 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.avro import java.io._ -import java.net.URL +import java.net.{URI, URL} import java.nio.file.{Files, Paths, StandardCopyOption} import java.sql.{Date, Timestamp} import java.util.{Locale, UUID} @@ -31,16 +31,20 @@ import org.apache.avro.Schema.Type._ import org.apache.avro.file.{DataFileReader, DataFileWriter} import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed} +import org.apache.avro.mapred.FsInput import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf, SparkException, SparkUpgradeException} import org.apache.spark.sql._ import org.apache.spark.sql.TestingUDT.IntervalData +import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC} import org.apache.spark.sql.execution.{FormattedMode, SparkPlan} -import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition} +import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition, PartitionedFile} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf @@ -1836,6 +1840,24 @@ abstract class AvroSuite } } } + + test("SPARK-33314: RowReader doesn't over-consume when hasNextRow called twice") { + withTempPath { dir => + Seq((1), (2), (3)) + .toDF("index") + .write + .format("avro") + .save(dir.getCanonicalPath) + val df = spark + .read + .format("avro") + .load(dir.getCanonicalPath) + .orderBy("index") + + checkAnswer(df, + Seq(Row(1), Row(2), Row(3))) + } + } } class AvroV1Suite extends AvroSuite { @@ -2005,3 +2027,61 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { } } } + +class AvroRowReaderSuite + extends QueryTest + with SharedSparkSession { + + import testImplicits._ + + override protected def sparkConf: SparkConf = + super + .sparkConf + .set(SQLConf.USE_V1_SOURCE_LIST, "") // need this for BatchScanExec + + test("SPARK-33314: hasNextRow and nextRow properly handle consecutive calls") { + withTempPath { dir => + Seq((1), (2), (3)) + .toDF("value") + .coalesce(1) + .write + .format("avro") + .save(dir.getCanonicalPath) + + val df = spark.read.format("avro").load(dir.getCanonicalPath) + val fileScan = df.queryExecution.executedPlan collectFirst { + case BatchScanExec(_, f: AvroScan) => f + } + val filePath = fileScan.get.fileIndex.inputFiles(0) + val fileSize = new File(new URI(filePath)).length + val in = new FsInput(new Path(new URI(filePath)), new Configuration()) + val reader = DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]()) + + val it = new Iterator[InternalRow] with AvroUtils.RowReader { + override val fileReader = reader + override val deserializer = new AvroDeserializer( + reader.getSchema, + StructType(new StructField("value", IntegerType, true) :: Nil), + CORRECTED, + new NoopFilters) + override val stopPosition = fileSize + + override def hasNext: Boolean = hasNextRow + + override def next: InternalRow = nextRow + } + assert(it.hasNext == true) + assert(it.next.getInt(0) == 1) + // test no intervening next + assert(it.hasNext == true) + assert(it.hasNext == true) + // test no intervening hasNext + assert(it.next.getInt(0) == 2) + assert(it.next.getInt(0) == 3) + assert(it.hasNext == false) + assertThrows[NoSuchElementException] { + it.next + } + } + } +} From 551b504cfe38d1ab583e617c37e49659edd65c2e Mon Sep 17 00:00:00 2001 From: Bo Zhang Date: Thu, 5 Nov 2020 12:27:20 +0800 Subject: [PATCH 0391/1009] [SPARK-33316][SQL] Support user provided nullable Avro schema for non-nullable catalyst schema in Avro writing ### What changes were proposed in this pull request? This change is to support user provided nullable Avro schema for data with non-nullable catalyst schema in Avro writing. Without this change, when users try to use a nullable Avro schema to write data with a non-nullable catalyst schema, it will throw an `IncompatibleSchemaException` with a message like `Cannot convert Catalyst type StringType to Avro type ["null","string"]`. With this change it will assume that the data is non-nullable, log a warning message for the nullability difference and serialize the data to Avro format with the nullable Avro schema provided. ### Why are the changes needed? This change is needed because sometimes our users do not have full control over the nullability of the Avro schemas they use, and this change provides them with the flexibility. ### Does this PR introduce _any_ user-facing change? Yes. Users are allowed to use nullable Avro schemas for data with non-nullable catalyst schemas in Avro writing after the change. ### How was this patch tested? Added unit tests. Closes #30224 from bozhang2820/avro-nullable. Authored-by: Bo Zhang Signed-off-by: Gengliang Wang --- .../spark/sql/avro/AvroSerializer.scala | 54 ++++++++++++++---- .../spark/sql/avro/SchemaConverters.scala | 2 + .../spark/sql/avro/AvroFunctionsSuite.scala | 37 ++++++++++++ .../org/apache/spark/sql/avro/AvroSuite.scala | 57 +++++++++++++++++++ 4 files changed, 140 insertions(+), 10 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala index 08b1b4184fb0b..0ea95d1c0db5d 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala @@ -255,20 +255,54 @@ private[sql] class AvroSerializer( result } + /** + * Resolve a possibly nullable Avro Type. + * + * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another + * non-null type. This method will check the nullability of the input Avro type and return the + * non-null type within when it is nullable. Otherwise it will return the input Avro type + * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an + * unsupported nullable type. + * + * It will also log a warning message if the nullability for Avro and catalyst types are + * different. + */ private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = { - if (avroType.getType == Type.UNION && nullable) { - // avro uses union to represent nullable type. + val (avroNullable, resolvedAvroType) = resolveAvroType(avroType) + warnNullabilityDifference(avroNullable, nullable) + resolvedAvroType + } + + /** + * Check the nullability of the input Avro type and resolve it when it is nullable. The first + * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second + * return value is the possibly resolved type. + */ + private def resolveAvroType(avroType: Schema): (Boolean, Schema) = { + if (avroType.getType == Type.UNION) { val fields = avroType.getTypes.asScala - assert(fields.length == 2) val actualType = fields.filter(_.getType != Type.NULL) - assert(actualType.length == 1) - actualType.head - } else { - if (nullable) { - logWarning("Writing avro files with non-nullable avro schema with nullable catalyst " + - "schema will throw runtime exception if there is a record with null value.") + if (fields.length != 2 || actualType.length != 1) { + throw new UnsupportedAvroTypeException( + s"Unsupported Avro UNION type $avroType: Only UNION of a null type and a non-null " + + "type is supported") } - avroType + (true, actualType.head) + } else { + (false, avroType) + } + } + + /** + * log a warning message if the nullability for Avro and catalyst types are different. + */ + private def warnNullabilityDifference(avroNullable: Boolean, catalystNullable: Boolean): Unit = { + if (avroNullable && !catalystNullable) { + logWarning("Writing Avro files with nullable Avro schema and non-nullable catalyst schema.") + } + if (!avroNullable && catalystNullable) { + logWarning("Writing Avro files with non-nullable Avro schema and nullable catalyst " + + "schema will throw runtime exception if there is a record with null value.") } } } diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala index 905f90fa79373..c685c89f0dfc8 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala @@ -208,3 +208,5 @@ object SchemaConverters { private[avro] class IncompatibleSchemaException( msg: String, ex: Throwable = null) extends Exception(msg, ex) + +private[avro] class UnsupportedAvroTypeException(msg: String) extends Exception(msg) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala index 7f14efe15ad55..c9e0d4344691a 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala @@ -201,4 +201,41 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession { Map("avroSchema" -> evolvedAvroSchema).asJava)), expected) } + + test("roundtrip in to_avro and from_avro - struct with nullable Avro schema") { + val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct")) + val avroTypeStruct = s""" + |{ + | "type": "record", + | "name": "struct", + | "fields": [ + | {"name": "id", "type": "long"}, + | {"name": "str", "type": ["null", "string"]} + | ] + |} + """.stripMargin + val avroStructDF = df.select(functions.to_avro('struct, avroTypeStruct).as("avro")) + checkAnswer(avroStructDF.select( + functions.from_avro('avro, avroTypeStruct)), df) + } + + test("to_avro with unsupported nullable Avro schema") { + val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct")) + for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) { + val avroTypeStruct = s""" + |{ + | "type": "record", + | "name": "struct", + | "fields": [ + | {"name": "id", "type": $unsupportedAvroType}, + | {"name": "str", "type": ["null", "string"]} + | ] + |} + """.stripMargin + val message = intercept[SparkException] { + df.select(functions.to_avro('struct, avroTypeStruct).as("avro")).show() + }.getCause.getMessage + assert(message.contains("Only UNION of a null type and a non-null type is supported")) + } + } } diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 4f4af97f1299f..c9c6bcecac14e 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1019,6 +1019,63 @@ abstract class AvroSuite } } + test("support user provided nullable avro schema " + + "for non-nullable catalyst schema without any null record") { + val catalystSchema = + StructType(Seq( + StructField("Age", IntegerType, nullable = false), + StructField("Name", StringType, nullable = false))) + + val avroSchema = """ + |{ + | "type" : "record", + | "name" : "test_schema", + | "fields" : [ + | {"name": "Age", "type": ["null", "int"]}, + | {"name": "Name", "type": ["null", "string"]} + | ] + |} + """.stripMargin + + val df = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(2, "Aurora"))), catalystSchema) + + withTempPath { tempDir => + df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath) + checkAvroSchemaEquals(avroSchema, getAvroSchemaStringFromFiles(tempDir.getPath)) + } + } + + test("unsupported nullable avro type") { + val catalystSchema = + StructType(Seq( + StructField("Age", IntegerType, nullable = false), + StructField("Name", StringType, nullable = false))) + + for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) { + val avroSchema = s""" + |{ + | "type" : "record", + | "name" : "test_schema", + | "fields" : [ + | {"name": "Age", "type": $unsupportedAvroType}, + | {"name": "Name", "type": ["null", "string"]} + | ] + |} + """.stripMargin + + val df = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(2, "Aurora"))), catalystSchema) + + withTempPath { tempDir => + val message = intercept[SparkException] { + df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath) + }.getCause.getMessage + assert(message.contains("Only UNION of a null type and a non-null type is supported")) + } + } + } + test("error handling for unsupported Interval data types") { withTempDir { dir => val tempDir = new File(dir, "files").getCanonicalPath From 0535b34ad47249df4806ed70471d5539b998a3b3 Mon Sep 17 00:00:00 2001 From: Kyle Bendickson Date: Thu, 5 Nov 2020 16:10:52 +0900 Subject: [PATCH 0392/1009] [SPARK-33282] Migrate from deprecated probot autolabeler to GitHub labeler action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR removes the old Probot Autolabeler labeling configuration, as the probot autolabeler has been deprecated. I've updated the configs in Iceberg and in Avro, and we also need to update here. This PR adds in an additional workflow for labeling PRs and migrates the old probot config to the new format. Unfortunately, because certain features have not been released upstream, we will not get the _exact_ behavior as before. I have documented where that is and what changes are neeeded, and in the associated ticket I've also discussed other options and why I think this is the best way to go. Definitely a follow up ticket is needed to get the original behavior back in these few cases, but PRs have not been labeled for almost a month and so it's probably best to get it right 95% of the time and occasionally have some UI related PRs labeled as `CORE` while the issue is resolved upstream and/or further investigated. ### Why are the changes needed? The probot autolabeler is dead and will not be maintained going forward. This has been confirmed with github user [at]mithro in an issue in their repository. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? To test this PR, I first merged the config into my local fork. I then edited it several times and ran tests on that. Unfortunately, I've overwritten my fork with the apache repo in order to create a proper PR. However, I've also added the config for the same thing in the Iceberg repo as well as the Avro repo. I have now merged this PR into my local repo and will be running some tests on edge cases there and for validating in general: - [Check that the SQL label is applied for changes directly below repo root's sql directory](https://github.com/kbendick/spark/pull/16) ✅ - [Check that the structured streaming label is applied](https://github.com/kbendick/spark/pull/20) ✅ - [Check that a wildcard at the end of a pattern will match nested files](https://github.com/kbendick/spark/pull/19) ✅ - [Check that the rule **/*pom.xml will match the root pom.xml file](https://github.com/kbendick/spark/pull/25) ✅ I've also discovered that we're likely not killing github actions that run (like large tests etc) when users push to their PR. In most cases, I see that a user has to mark something as "OK to test", but it still seems like we might want to discuss whether or not we should add a cancellation step In order to save time / capacity on the runners. If so desired, we would add an action in each workflow that cancels old runs when a `push` action occurs on a PR. This will likely make waiting for test runners much faster iff tests are automatically rerun on push by anybody (such as PMCs, PRs that have been marked OK to test, etc). We could free a large number of resources potentially if a cancellation step was added to all of the workflows in the Apache account (as github action API limits are set at the account level). Admittedly, the fact that the "old" workflow runs weren't cancelled could admittedly be because of the fact that I was working in a fork, but given that there are explicit actions to be added to the start of workflows to cancel old PR workflows and given that we don't have them configured indicates to me that likely this is the case in this repo (and in most `apache` repos as well), at least under certain circumstances (e.g. repos that don't have "Ok to test"-like webhooks as one example). This is a separate issue though, which I can bring up on the mailing list once I'm done with this PR. Unfortunately I've been very busy the past two weeks, but if somebody else wanted to work on that I would be happy to support with any knowledge I have. The last Apache repo to still have the probot autolabeler in it is Beam, at which point we can have Gavin from ASF Infra remove the permissions for the probot autolabeler entirely. See the associated JIRA ticket for the links to other tickets, like the one for ASF Infra to remove the dead probot autolabeler's read and write permissions to our PRs in the Apache organization. Closes #30244 from kbendick/begin-migration-to-github-labeler-action. Authored-by: Kyle Bendickson Signed-off-by: HyukjinKwon --- .github/autolabeler.yml | 133 ----------------------------- .github/labeler.yml | 152 ++++++++++++++++++++++++++++++++++ .github/workflows/labeler.yml | 43 ++++++++++ 3 files changed, 195 insertions(+), 133 deletions(-) delete mode 100644 .github/autolabeler.yml create mode 100644 .github/labeler.yml create mode 100644 .github/workflows/labeler.yml diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml deleted file mode 100644 index 3bca01f89950a..0000000000000 --- a/.github/autolabeler.yml +++ /dev/null @@ -1,133 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Bot page: https://github.com/apps/probot-autolabeler -# The matching patterns follow the .gitignore spec. -# See: https://git-scm.com/docs/gitignore#_pattern_format -# Also, note that the plugin uses 'ignore' package. See also -# https://github.com/kaelzhang/node-ignore -INFRA: - - ".github/" - - "appveyor.yml" - - "/tools/" - - "/dev/create-release/" - - ".asf.yaml" - - ".gitattributes" - - ".gitignore" - - "/dev/github_jira_sync.py" - - "/dev/merge_spark_pr.py" - - "/dev/run-tests-jenkins*" -BUILD: - - "/dev/" - - "!/dev/github_jira_sync.py" - - "!/dev/merge_spark_pr.py" - - "!/dev/run-tests-jenkins*" - - "!/dev/.rat-excludes" - - "/build/" - - "/project/" - - "/assembly/" - - "*pom.xml" - - "/bin/docker-image-tool.sh" - - "/bin/find-spark-home*" - - "scalastyle-config.xml" -DOCS: - - "docs/" - - "/README.md" - - "/CONTRIBUTING.md" -EXAMPLES: - - "examples/" - - "/bin/run-example*" -CORE: - - "/core/" - - "!UI.scala" - - "!ui/" - - "/common/kvstore/" - - "/common/network-common/" - - "/common/network-shuffle/" - - "/python/pyspark/*.py" - - "/python/pyspark/tests/*.py" -SPARK SUBMIT: - - "/bin/spark-submit*" -SPARK SHELL: - - "/repl/" - - "/bin/spark-shell*" -SQL: - - "sql/" - - "/common/unsafe/" - - "!/python/pyspark/sql/avro/" - - "!/python/pyspark/sql/streaming.py" - - "!/python/pyspark/sql/tests/test_streaming.py" - - "/bin/spark-sql*" - - "/bin/beeline*" - - "/sbin/*thriftserver*.sh" - - "*SQL*.R" - - "DataFrame.R" - - "WindowSpec.R" - - "catalog.R" - - "column.R" - - "functions.R" - - "group.R" - - "schema.R" - - "types.R" -AVRO: - - "/external/avro/" - - "/python/pyspark/sql/avro/" -DSTREAM: - - "/streaming/" - - "/data/streaming/" - - "/external/flume*" - - "/external/kinesis*" - - "/external/kafka*" - - "/python/pyspark/streaming/" -GRAPHX: - - "/graphx/" - - "/data/graphx/" -ML: - - "ml/" - - "*mllib_*.R" -MLLIB: - - "spark/mllib/" - - "/mllib-local/" - - "/python/pyspark/mllib/" -STRUCTURED STREAMING: - - "sql/**/streaming/" - - "/external/kafka-0-10-sql/" - - "/python/pyspark/sql/streaming.py" - - "/python/pyspark/sql/tests/test_streaming.py" - - "*streaming.R" -PYTHON: - - "/bin/pyspark*" - - "python/" -R: - - "r/" - - "R/" - - "/bin/sparkR*" -YARN: - - "/resource-managers/yarn/" -MESOS: - - "/resource-managers/mesos/" - - "/sbin/*mesos*.sh" -KUBERNETES: - - "/resource-managers/kubernetes/" -WINDOWS: - - "*.cmd" - - "/R/pkg/tests/fulltests/test_Windows.R" -WEB UI: - - "ui/" - - "UI.scala" -DEPLOY: - - "/sbin/" diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000000000..bd61902925e33 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,152 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler +# +# Note that we currently cannot use the negatioon operator (i.e. `!`) for miniglob matches as they +# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a +# list of constraints / globs and then matches all of the constraints for either `any` of the files or +# `all` of the files in the change set. +# +# However, `any`/`all` are not supported in a released version and testing off of the `main` branch +# resulted in some other errors when testing. +# +# An issue has been opened upstream requesting that a release be cut that has support for all/any: +# - https://github.com/actions/labeler/issues/111 +# +# While we wait for this issue to be handled upstream, we can remove +# the negated / `!` matches for now and at least have labels again. +# +INFRA: + - ".github/**/*" + - "appveyor.yml" + - "tools/**/*" + - "dev/create-release/**/*" + - ".asf.yaml" + - ".gitattributes" + - ".gitignore" + - "dev/github_jira_sync.py" + - "dev/merge_spark_pr.py" + - "dev/run-tests-jenkins*" +BUILD: + # Can be supported when a stable release with correct all/any is released + #- any: ['dev/**/*', '!dev/github_jira_sync.py', '!dev/merge_spark_pr.py', '!dev/.rat-excludes'] + - "dev/**/*" + - "build/**/*" + - "project/**/*" + - "assembly/**/*" + - "**/*pom.xml" + - "bin/docker-image-tool.sh" + - "bin/find-spark-home*" + - "scalastyle-config.xml" + # These can be added in the above `any` clause (and the /dev/**/* glob removed) when + # `any`/`all` support is released + # - "!dev/github_jira_sync.py" + # - "!dev/merge_spark_pr.py" + # - "!dev/run-tests-jenkins*" + # - "!dev/.rat-excludes" +DOCS: + - "docs/**/*" + - "**/README.md" + - "**/CONTRIBUTING.md" +EXAMPLES: + - "examples/**/*" + - "bin/run-example*" +# CORE needs to be updated when all/any are released upstream. +CORE: + # - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied. + - "core/**/*" + - "common/kvstore/**/*" + - "common/network-common/**/*" + - "common/network-shuffle/**/*" + - "python/pyspark/**/*.py" + - "python/pyspark/tests/**/*.py" +SPARK SUBMIT: + - "bin/spark-submit*" +SPARK SHELL: + - "repl/**/*" + - "bin/spark-shell*" +SQL: +#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming.py", "!python/pyspark/sql/tests/test_streaming.py"] + - "**/sql/**/*" + - "common/unsafe/**/*" + #- "!python/pyspark/sql/avro/**/*" + #- "!python/pyspark/sql/streaming.py" + #- "!python/pyspark/sql/tests/test_streaming.py" + - "bin/spark-sql*" + - "bin/beeline*" + - "sbin/*thriftserver*.sh" + - "**/*SQL*.R" + - "**/DataFrame.R" + - "**/*WindowSpec.R" + - "**/*catalog.R" + - "**/*column.R" + - "**/*functions.R" + - "**/*group.R" + - "**/*schema.R" + - "**/*types.R" +AVRO: + - "external/avro/**/*" + - "python/pyspark/sql/avro/**/*" +DSTREAM: + - "streaming/**/*" + - "data/streaming/**/*" + - "external/kinesis*" + - "external/kafka*" + - "python/pyspark/streaming/**/*" +GRAPHX: + - "graphx/**/*" + - "data/graphx/**/*" +ML: + - "**/ml/**/*" + - "**/*mllib_*.R" +MLLIB: + - "**/spark/mllib/**/*" + - "mllib-local/**/*" + - "python/pyspark/mllib/**/*" +STRUCTURED STREAMING: + - "**/sql/**/streaming/**/*" + - "external/kafka-0-10-sql/**/*" + - "python/pyspark/sql/streaming.py" + - "python/pyspark/sql/tests/test_streaming.py" + - "**/*streaming.R" +PYTHON: + - "bin/pyspark*" + - "**/python/**/*" +R: + - "**/r/**/*" + - "**/R/**/*" + - "bin/sparkR*" +YARN: + - "resource-managers/yarn/**/*" +MESOS: + - "resource-managers/mesos/**/*" + - "sbin/*mesos*.sh" +KUBERNETES: + - "resource-managers/kubernetes/**/*" +WINDOWS: + - "**/*.cmd" + - "R/pkg/tests/fulltests/test_Windows.R" +WEB UI: + - "**/ui/**/*" + - "**/*UI.scala" +DEPLOY: + - "sbin/**/*" + diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000000000..a1a5ab5b70f5b --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: "Pull Request Labeler" +on: pull_request_target + +jobs: + label: + runs-on: ubuntu-latest + steps: + # In order to get back the negated matches like in the old config, + # we need the actinons/labeler concept of `all` and `any` which matches + # all of the given constraints / glob patterns for either `all` + # files or `any` file in the change set. + # + # Github issue which requests a timeline for a release with any/all support: + # - https://github.com/actions/labeler/issues/111 + # This issue also references the issue that mentioned that any/all are only + # supported on main branch (previously called master): + # - https://github.com/actions/labeler/issues/73#issuecomment-639034278 + # + # However, these are not in a published release and the current `main` branch + # has some issues upon testing. + - uses: actions/labeler@2.2.0 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + sync-labels: true From d530ed0ea8bdba09fba6dcd51f8e4f7745781c2e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 5 Nov 2020 16:15:17 +0900 Subject: [PATCH 0393/1009] Revert "[SPARK-33277][PYSPARK][SQL] Use ContextAwareIterator to stop consuming after the task ends" This reverts commit b8a440f09880c596325dd9e6caae6b470be76a8f. --- python/pyspark/sql/tests/test_pandas_map.py | 22 ------------------- .../sql/tests/test_pandas_udf_scalar.py | 19 ---------------- python/pyspark/sql/tests/test_udf.py | 20 ----------------- .../sql/execution/python/EvalPythonExec.scala | 18 +-------------- .../execution/python/MapInPandasExec.scala | 7 +++--- 5 files changed, 4 insertions(+), 82 deletions(-) diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index 2cad30c7294d4..3ca437f75fc23 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -15,12 +15,9 @@ # limitations under the License. # import os -import shutil -import tempfile import time import unittest -from pyspark.sql import Row from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message @@ -115,25 +112,6 @@ def func(iterator): expected = df.collect() self.assertEquals(actual, expected) - # SPARK-33277 - def test_map_in_pandas_with_column_vector(self): - path = tempfile.mkdtemp() - shutil.rmtree(path) - - try: - self.spark.range(0, 200000, 1, 1).write.parquet(path) - - def func(iterator): - for pdf in iterator: - yield pd.DataFrame({'id': [0] * len(pdf)}) - - for offheap in ["true", "false"]: - with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): - self.assertEquals( - self.spark.read.parquet(path).mapInPandas(func, 'id long').head(), Row(0)) - finally: - shutil.rmtree(path) - if __name__ == "__main__": from pyspark.sql.tests.test_pandas_map import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index c2c8f6f697c4b..6d325c9085ce1 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -1137,25 +1137,6 @@ def test_datasource_with_udf(self): finally: shutil.rmtree(path) - # SPARK-33277 - def test_pandas_udf_with_column_vector(self): - path = tempfile.mkdtemp() - shutil.rmtree(path) - - try: - self.spark.range(0, 200000, 1, 1).write.parquet(path) - - @pandas_udf(LongType()) - def udf(x): - return pd.Series([0] * len(x)) - - for offheap in ["true", "false"]: - with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): - self.assertEquals( - self.spark.read.parquet(path).select(udf('id')).head(), Row(0)) - finally: - shutil.rmtree(path) - if __name__ == "__main__": from pyspark.sql.tests.test_pandas_udf_scalar import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index c2e95fd41c5b4..a7dcbfd32ac1c 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -674,26 +674,6 @@ def test_udf_cache(self): self.assertEqual(df.select(udf(func)("id"))._jdf.queryExecution() .withCachedData().getClass().getSimpleName(), 'InMemoryRelation') - # SPARK-33277 - def test_udf_with_column_vector(self): - path = tempfile.mkdtemp() - shutil.rmtree(path) - - try: - self.spark.range(0, 100000, 1, 1).write.parquet(path) - - def f(x): - return 0 - - fUdf = udf(f, LongType()) - - for offheap in ["true", "false"]: - with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}): - self.assertEquals( - self.spark.read.parquet(path).select(fUdf('id')).head(), Row(0)) - finally: - shutil.rmtree(path) - class UDFInitializationTests(unittest.TestCase): def tearDown(self): diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala index 89c7716f7c1b2..298d63478b63e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala @@ -89,7 +89,6 @@ trait EvalPythonExec extends UnaryExecNode { inputRDD.mapPartitions { iter => val context = TaskContext.get() - val contextAwareIterator = new ContextAwareIterator(iter, context) // The queue used to buffer input rows so we can drain it to // combine input with output from Python. @@ -121,7 +120,7 @@ trait EvalPythonExec extends UnaryExecNode { }.toSeq) // Add rows to queue to join later with the result. - val projectedRowIter = contextAwareIterator.map { inputRow => + val projectedRowIter = iter.map { inputRow => queue.add(inputRow.asInstanceOf[UnsafeRow]) projection(inputRow) } @@ -138,18 +137,3 @@ trait EvalPythonExec extends UnaryExecNode { } } } - -/** - * A TaskContext aware iterator. - * - * As the Python evaluation consumes the parent iterator in a separate thread, - * it could consume more data from the parent even after the task ends and the parent is closed. - * Thus, we should use ContextAwareIterator to stop consuming after the task ends. - */ -class ContextAwareIterator[IN](iter: Iterator[IN], context: TaskContext) extends Iterator[IN] { - - override def hasNext: Boolean = - !context.isCompleted() && !context.isInterrupted() && iter.hasNext - - override def next(): IN = iter.next() -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala index 7fc18f885a2d3..2bb808119c0ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala @@ -61,17 +61,16 @@ case class MapInPandasExec( val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf) val outputTypes = child.schema - val context = TaskContext.get() - val contextAwareIterator = new ContextAwareIterator(inputIter, context) - // Here we wrap it via another row so that Python sides understand it // as a DataFrame. - val wrappedIter = contextAwareIterator.map(InternalRow(_)) + val wrappedIter = inputIter.map(InternalRow(_)) // DO NOT use iter.grouped(). See BatchIterator. val batchIter = if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter) + val context = TaskContext.get() + val columnarBatchIter = new ArrowPythonRunner( chainedFunc, PythonEvalType.SQL_MAP_PANDAS_ITER_UDF, From e66201b30bc1f3da7284af14b32e5e6200768dbd Mon Sep 17 00:00:00 2001 From: Sarvesh Dave Date: Thu, 5 Nov 2020 16:22:31 +0900 Subject: [PATCH 0394/1009] [MINOR][SS][DOCS] Update join type in stream static joins code examples ### What changes were proposed in this pull request? Update join type in stream static joins code examples in structured streaming programming guide. 1) Scala, Java and Python examples have a common issue. The join keyword is "right_join", it should be "left_outer". _Reasons:_ a) This code snippet is an example of "left outer join" as the streaming df is on left and static df is on right. Also, right outer join between stream df(left) and static df(right) is not supported. b) The keyword "right_join/left_join" is unsupported and it should be "right_outer/left_outer". So, all of these code snippets have been updated to "left_outer". 2) R exmaple is correct, but the example is of "right_outer" with static df (left) and streaming df(right). It is changed to "left_outer" to make it consistent with other three examples of scala, java and python. ### Why are the changes needed? To fix the mistake in example code of documentation. ### Does this PR introduce _any_ user-facing change? Yes, it is a user-facing change (but documentation update only). **Screenshots 1: Scala/Java/python example (similar issue)** _Before:_ Screenshot 2020-11-05 at 12 16 09 AM _After:_ Screenshot 2020-11-05 at 12 17 12 AM **Screenshots 2: R example (Make it consistent with above change)** _Before:_ Screenshot 2020-11-05 at 12 19 57 AM _After:_ Screenshot 2020-11-05 at 12 20 51 AM ### How was this patch tested? The change was tested locally. 1) cd docs/ SKIP_API=1 jekyll build 2) Verify docs/_site/structured-streaming-programming-guide.html file in browser. Closes #30252 from sarveshdave1/doc-update-stream-static-joins. Authored-by: Sarvesh Dave Signed-off-by: Jungtaek Lim (HeartSaVioR) --- docs/structured-streaming-programming-guide.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index ccd6f41f5c664..c671d6b590626 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -1117,7 +1117,7 @@ val staticDf = spark.read. ... val streamingDf = spark.readStream. ... streamingDf.join(staticDf, "type") // inner equi-join with a static DF -streamingDf.join(staticDf, "type", "right_join") // right outer join with a static DF +streamingDf.join(staticDf, "type", "left_outer") // left outer join with a static DF {% endhighlight %} @@ -1128,7 +1128,7 @@ streamingDf.join(staticDf, "type", "right_join") // right outer join with a sta Dataset staticDf = spark.read(). ...; Dataset streamingDf = spark.readStream(). ...; streamingDf.join(staticDf, "type"); // inner equi-join with a static DF -streamingDf.join(staticDf, "type", "right_join"); // right outer join with a static DF +streamingDf.join(staticDf, "type", "left_outer"); // left outer join with a static DF {% endhighlight %} @@ -1139,7 +1139,7 @@ streamingDf.join(staticDf, "type", "right_join"); // right outer join with a st staticDf = spark.read. ... streamingDf = spark.readStream. ... streamingDf.join(staticDf, "type") # inner equi-join with a static DF -streamingDf.join(staticDf, "type", "right_join") # right outer join with a static DF +streamingDf.join(staticDf, "type", "left_outer") # left outer join with a static DF {% endhighlight %} @@ -1151,10 +1151,10 @@ staticDf <- read.df(...) streamingDf <- read.stream(...) joined <- merge(streamingDf, staticDf, sort = FALSE) # inner equi-join with a static DF joined <- join( + streamingDf, staticDf, - streamingDf, streamingDf$value == staticDf$value, - "right_outer") # right outer join with a static DF + "left_outer") # left outer join with a static DF {% endhighlight %} From 21413b7dd4e19f725b21b92cddfbe73d1b381a05 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Thu, 5 Nov 2020 18:21:17 +0900 Subject: [PATCH 0395/1009] [SPARK-30294][SS] Explicitly defines read-only StateStore and optimize for HDFSBackedStateStore ### What changes were proposed in this pull request? There's a concept of 'read-only' and 'read+write' state store in Spark which is defined "implicitly". Spark doesn't prevent write for 'read-only' state store; Spark just assumes read-only stateful operator will not modify the state store. Given it's not defined explicitly, the instance of state store has to be implemented as 'read+write' even it's being used as 'read-only', which sometimes brings confusion. For example, abort() in HDFSBackedStateStore - https://github.com/apache/spark/blob/d38f8167483d4d79e8360f24a8c0bffd51460659/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala#L143-L155 The comment sounds as if statement works differently between 'read-only' and 'read+write', but that's not true as both state store has state initialized as UPDATING (no difference). So 'read-only' state also creates the temporary file, initializes output streams to write to temporary file, closes output streams, and finally deletes the temporary file. This unnecessary operations are being done per batch/partition. This patch explicitly defines 'read-only' StateStore, and enables state store provider to create 'read-only' StateStore instance if requested. Relevant code paths are modified, as well as 'read-only' StateStore implementation for HDFSBackedStateStore is introduced. The new implementation gets rid of unnecessary operations explained above. In point of backward-compatibility view, the only thing being changed in public API side is `StateStoreProvider`. The trait `StateStoreProvider` has to be changed to allow requesting 'read-only' StateStore; this patch adds default implementation which leverages 'read+write' StateStore but wrapping with 'write-protected' StateStore instance, so that custom providers don't need to change their code to reflect the change. But if the providers can optimize for read-only workload, they'll be happy to make a change. Please note that this patch makes ReadOnlyStateStore extend StateStore and being referred as StateStore, as StateStore is being used in so many places and it's not easy to support both traits if we differentiate them. So unfortunately these write methods are still exposed for read-only state; it just throws UnsupportedOperationException. ### Why are the changes needed? The new API opens the chance to optimize read-only state store instance compared with read+write state store instance. HDFSBackedStateStoreProvider is modified to provide read-only version of state store which doesn't deal with temporary file as well as state machine. ### Does this PR introduce any user-facing change? Clearly "no" for most end users, and also "no" for custom state store providers as it doesn't touch trait `StateStore` as well as provides default implementation for added method in trait `StateStoreProvider`. ### How was this patch tested? Modified UT. Existing UTs ensure the change doesn't break anything. Closes #26935 from HeartSaVioR/SPARK-30294. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../state/HDFSBackedStateStoreProvider.scala | 46 ++++++-- .../streaming/state/StateStore.scala | 111 +++++++++++++++--- .../streaming/state/StateStoreRDD.scala | 104 ++++++++++++---- .../StreamingAggregationStateManager.scala | 22 ++-- .../execution/streaming/state/package.scala | 35 ++++++ .../streaming/statefulOperators.scala | 2 +- .../streaming/state/StateStoreSuite.scala | 4 +- 7 files changed, 261 insertions(+), 63 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 0a25d51666321..5c55034e88df5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -58,7 +58,6 @@ import org.apache.spark.util.{SizeEstimator, Utils} * - store.remove(...) * - store.commit() // commits all the updates to made; the new version will be returned * - store.iterator() // key-value data after last commit as an iterator - * - store.updates() // updates made in the last commit as an iterator * * Fault-tolerance model: * - Every set of updates is written to a delta file before committing. @@ -79,6 +78,27 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit // java.util.ConcurrentModificationException type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow] + class HDFSBackedReadStateStore(val version: Long, map: MapType) + extends ReadStateStore { + + override def id: StateStoreId = HDFSBackedStateStoreProvider.this.stateStoreId + + override def get(key: UnsafeRow): UnsafeRow = map.get(key) + + override def iterator(): Iterator[UnsafeRowPair] = { + val unsafeRowPair = new UnsafeRowPair() + map.entrySet.asScala.iterator.map { entry => + unsafeRowPair.withRows(entry.getKey, entry.getValue) + } + } + + override def abort(): Unit = {} + + override def toString(): String = { + s"HDFSReadStateStore[id=(op=${id.operatorId},part=${id.partitionId}),dir=$baseDir]" + } + } + /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */ class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType) extends StateStore { @@ -142,9 +162,8 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit /** Abort all the updates made on this store. This store will not be usable any more. */ override def abort(): Unit = { - // This if statement is to ensure that files are deleted only if there are changes to the - // StateStore. We have two StateStores for each task, one which is used only for reading, and - // the other used for read+write. We don't want the read-only to delete state files. + // This if statement is to ensure that files are deleted only once: if either commit or abort + // is called before, it will be no-op. if (state == UPDATING) { state = ABORTED cancelDeltaFile(compressedStream, deltaFileStream) @@ -197,15 +216,26 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit } /** Get the state store for making updates to create a new `version` of the store. */ - override def getStore(version: Long): StateStore = synchronized { + override def getStore(version: Long): StateStore = { + val newMap = getLoadedMapForStore(version) + logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for update") + new HDFSBackedStateStore(version, newMap) + } + + /** Get the state store for reading to specific `version` of the store. */ + override def getReadStore(version: Long): ReadStateStore = { + val newMap = getLoadedMapForStore(version) + logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for readonly") + new HDFSBackedReadStateStore(version, newMap) + } + + private def getLoadedMapForStore(version: Long): MapType = synchronized { require(version >= 0, "Version cannot be less than 0") val newMap = new MapType() if (version > 0) { newMap.putAll(loadMap(version)) } - val store = new HDFSBackedStateStore(version, newMap) - logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for update") - store + newMap } override def init( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index 092ca968f59c4..d52505fbdab35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -36,10 +36,14 @@ import org.apache.spark.sql.types.StructType import org.apache.spark.util.{ThreadUtils, Utils} /** - * Base trait for a versioned key-value store. Each instance of a `StateStore` represents a specific - * version of state data, and such instances are created through a [[StateStoreProvider]]. + * Base trait for a versioned key-value store which provides read operations. Each instance of a + * `ReadStateStore` represents a specific version of state data, and such instances are created + * through a [[StateStoreProvider]]. + * + * `abort` method will be called when the task is completed - please clean up the resources in + * the method. */ -trait StateStore { +trait ReadStateStore { /** Unique identifier of the store */ def id: StateStoreId @@ -53,17 +57,6 @@ trait StateStore { */ def get(key: UnsafeRow): UnsafeRow - /** - * Put a new value for a non-null key. Implementations must be aware that the UnsafeRows in - * the params can be reused, and must make copies of the data as needed for persistence. - */ - def put(key: UnsafeRow, value: UnsafeRow): Unit - - /** - * Remove a single non-null key. - */ - def remove(key: UnsafeRow): Unit - /** * Get key value pairs with optional approximate `start` and `end` extents. * If the State Store implementation maintains indices for the data based on the optional @@ -81,6 +74,40 @@ trait StateStore { iterator() } + /** Return an iterator containing all the key-value pairs in the StateStore. */ + def iterator(): Iterator[UnsafeRowPair] + + /** + * Clean up the resource. + * + * The method name is to respect backward compatibility on [[StateStore]]. + */ + def abort(): Unit +} + +/** + * Base trait for a versioned key-value store which provides both read and write operations. Each + * instance of a `StateStore` represents a specific version of state data, and such instances are + * created through a [[StateStoreProvider]]. + * + * Unlike [[ReadStateStore]], `abort` method may not be called if the `commit` method succeeds + * to commit the change. (`hasCommitted` returns `true`.) Otherwise, `abort` method will be called. + * Implementation should deal with resource cleanup in both methods, and also need to guard with + * double resource cleanup. + */ +trait StateStore extends ReadStateStore { + + /** + * Put a new value for a non-null key. Implementations must be aware that the UnsafeRows in + * the params can be reused, and must make copies of the data as needed for persistence. + */ + def put(key: UnsafeRow, value: UnsafeRow): Unit + + /** + * Remove a single non-null key. + */ + def remove(key: UnsafeRow): Unit + /** * Commit all the updates that have been made to the store, and return the new version. * Implementations should ensure that no more updates (puts, removes) can be after a commit in @@ -92,13 +119,13 @@ trait StateStore { * Abort all the updates that have been made to the store. Implementations should ensure that * no more updates (puts, removes) can be after an abort in order to avoid incorrect usage. */ - def abort(): Unit + override def abort(): Unit /** * Return an iterator containing all the key-value pairs in the StateStore. Implementations must * ensure that updates (puts, removes) can be made while iterating over this iterator. */ - def iterator(): Iterator[UnsafeRowPair] + override def iterator(): Iterator[UnsafeRowPair] /** Current metrics of the state store */ def metrics: StateStoreMetrics @@ -109,6 +136,19 @@ trait StateStore { def hasCommitted: Boolean } +/** Wraps the instance of StateStore to make the instance read-only. */ +class WrappedReadStateStore(store: StateStore) extends ReadStateStore { + override def id: StateStoreId = store.id + + override def version: Long = store.version + + override def get(key: UnsafeRow): UnsafeRow = store.get(key) + + override def iterator(): Iterator[UnsafeRowPair] = store.iterator() + + override def abort(): Unit = store.abort() +} + /** * Metrics reported by a state store * @param numKeys Number of keys in the state store @@ -206,6 +246,15 @@ trait StateStoreProvider { /** Return an instance of [[StateStore]] representing state data of the given version */ def getStore(version: Long): StateStore + /** + * Return an instance of [[ReadStateStore]] representing state data of the given version. + * By default it will return the same instance as getStore(version) but wrapped to prevent + * modification. Providers can override and return optimized version of [[ReadStateStore]] + * based on the fact the instance will be only used for reading. + */ + def getReadStore(version: Long): ReadStateStore = + new WrappedReadStateStore(getStore(version)) + /** Optional method for providers to allow for background maintenance (e.g. compactions) */ def doMaintenance(): Unit = { } @@ -379,6 +428,21 @@ object StateStore extends Logging { @GuardedBy("loadedProviders") private var _coordRef: StateStoreCoordinatorRef = null + /** Get or create a read-only store associated with the id. */ + def getReadOnly( + storeProviderId: StateStoreProviderId, + keySchema: StructType, + valueSchema: StructType, + indexOrdinal: Option[Int], + version: Long, + storeConf: StateStoreConf, + hadoopConf: Configuration): ReadStateStore = { + require(version >= 0) + val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema, + indexOrdinal, storeConf, hadoopConf) + storeProvider.getReadStore(version) + } + /** Get or create a store associated with the id. */ def get( storeProviderId: StateStoreProviderId, @@ -389,7 +453,19 @@ object StateStore extends Logging { storeConf: StateStoreConf, hadoopConf: Configuration): StateStore = { require(version >= 0) - val storeProvider = loadedProviders.synchronized { + val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema, + indexOrdinal, storeConf, hadoopConf) + storeProvider.getStore(version) + } + + private def getStateStoreProvider( + storeProviderId: StateStoreProviderId, + keySchema: StructType, + valueSchema: StructType, + indexOrdinal: Option[Int], + storeConf: StateStoreConf, + hadoopConf: Configuration): StateStoreProvider = { + loadedProviders.synchronized { startMaintenanceIfNeeded() val provider = loadedProviders.getOrElseUpdate( storeProviderId, @@ -399,7 +475,6 @@ object StateStore extends Logging { reportActiveStoreInstance(storeProviderId) provider } - storeProvider.getStore(version) } /** Unload a state store provider */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala index 90a53727aa317..eda191f28bf18 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala @@ -29,14 +29,51 @@ import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration +// This doesn't directly override RDD methods as MiMa complains it. +abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag]( + dataRDD: RDD[T], + checkpointLocation: String, + queryRunId: UUID, + operatorId: Long, + sessionState: SessionState, + @transient private val storeCoordinator: Option[StateStoreCoordinatorRef], + extraOptions: Map[String, String] = Map.empty) extends RDD[U](dataRDD) { + + protected val storeConf = new StateStoreConf(sessionState.conf, extraOptions) + + // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it + protected val hadoopConfBroadcast = dataRDD.context.broadcast( + new SerializableConfiguration(sessionState.newHadoopConf())) + + /** Implementations can simply call this method in getPreferredLocations. */ + protected def _getPartitions: Array[Partition] = dataRDD.partitions + + /** + * Set the preferred location of each partition using the executor that has the related + * [[StateStoreProvider]] already loaded. + * + * Implementations can simply call this method in getPreferredLocations. + */ + protected def _getPreferredLocations(partition: Partition): Seq[String] = { + val stateStoreProviderId = getStateProviderId(partition) + storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq + } + + protected def getStateProviderId(partition: Partition): StateStoreProviderId = { + StateStoreProviderId( + StateStoreId(checkpointLocation, operatorId, partition.index), + queryRunId) + } +} + /** - * An RDD that allows computations to be executed against [[StateStore]]s. It + * An RDD that allows computations to be executed against [[ReadStateStore]]s. It * uses the [[StateStoreCoordinator]] to get the locations of loaded state stores * and use that as the preferred locations. */ -class StateStoreRDD[T: ClassTag, U: ClassTag]( +class ReadStateStoreRDD[T: ClassTag, U: ClassTag]( dataRDD: RDD[T], - storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U], + storeReadFunction: (ReadStateStore, Iterator[T]) => Iterator[U], checkpointLocation: String, queryRunId: UUID, operatorId: Long, @@ -47,34 +84,55 @@ class StateStoreRDD[T: ClassTag, U: ClassTag]( sessionState: SessionState, @transient private val storeCoordinator: Option[StateStoreCoordinatorRef], extraOptions: Map[String, String] = Map.empty) - extends RDD[U](dataRDD) { + extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId, + sessionState, storeCoordinator, extraOptions) { - private val storeConf = new StateStoreConf(sessionState.conf, extraOptions) + override protected def getPartitions: Array[Partition] = _getPartitions - // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it - private val hadoopConfBroadcast = dataRDD.context.broadcast( - new SerializableConfiguration(sessionState.newHadoopConf())) + override def getPreferredLocations(partition: Partition): Seq[String] = + _getPreferredLocations(partition) - override protected def getPartitions: Array[Partition] = dataRDD.partitions + override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { + val storeProviderId = getStateProviderId(partition) - /** - * Set the preferred location of each partition using the executor that has the related - * [[StateStoreProvider]] already loaded. - */ - override def getPreferredLocations(partition: Partition): Seq[String] = { - val stateStoreProviderId = StateStoreProviderId( - StateStoreId(checkpointLocation, operatorId, partition.index), - queryRunId) - storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq + val store = StateStore.getReadOnly( + storeProviderId, keySchema, valueSchema, indexOrdinal, storeVersion, + storeConf, hadoopConfBroadcast.value.value) + val inputIter = dataRDD.iterator(partition, ctxt) + storeReadFunction(store, inputIter) } +} + +/** + * An RDD that allows computations to be executed against [[StateStore]]s. It + * uses the [[StateStoreCoordinator]] to get the locations of loaded state stores + * and use that as the preferred locations. + */ +class StateStoreRDD[T: ClassTag, U: ClassTag]( + dataRDD: RDD[T], + storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U], + checkpointLocation: String, + queryRunId: UUID, + operatorId: Long, + storeVersion: Long, + keySchema: StructType, + valueSchema: StructType, + indexOrdinal: Option[Int], + sessionState: SessionState, + @transient private val storeCoordinator: Option[StateStoreCoordinatorRef], + extraOptions: Map[String, String] = Map.empty) + extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId, + sessionState, storeCoordinator, extraOptions) { + + override protected def getPartitions: Array[Partition] = _getPartitions + + override def getPreferredLocations(partition: Partition): Seq[String] = + _getPreferredLocations(partition) override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { - var store: StateStore = null - val storeProviderId = StateStoreProviderId( - StateStoreId(checkpointLocation, operatorId, partition.index), - queryRunId) + val storeProviderId = getStateProviderId(partition) - store = StateStore.get( + val store = StateStore.get( storeProviderId, keySchema, valueSchema, indexOrdinal, storeVersion, storeConf, hadoopConfBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala index 9bfb9561b42a1..0496e4768b681 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala @@ -34,7 +34,7 @@ sealed trait StreamingAggregationStateManager extends Serializable { def getStateValueSchema: StructType /** Get the current value of a non-null key from the target state store. */ - def get(store: StateStore, key: UnsafeRow): UnsafeRow + def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow /** * Put a new value for a non-null key to the target state store. Note that key will be @@ -52,13 +52,13 @@ sealed trait StreamingAggregationStateManager extends Serializable { def remove(store: StateStore, key: UnsafeRow): Unit /** Return an iterator containing all the key-value pairs in target state store. */ - def iterator(store: StateStore): Iterator[UnsafeRowPair] + def iterator(store: ReadStateStore): Iterator[UnsafeRowPair] /** Return an iterator containing all the keys in target state store. */ - def keys(store: StateStore): Iterator[UnsafeRow] + def keys(store: ReadStateStore): Iterator[UnsafeRow] /** Return an iterator containing all the values in target state store. */ - def values(store: StateStore): Iterator[UnsafeRow] + def values(store: ReadStateStore): Iterator[UnsafeRow] } object StreamingAggregationStateManager extends Logging { @@ -90,7 +90,7 @@ abstract class StreamingAggregationStateManagerBaseImpl( override def remove(store: StateStore, key: UnsafeRow): Unit = store.remove(key) - override def keys(store: StateStore): Iterator[UnsafeRow] = { + override def keys(store: ReadStateStore): Iterator[UnsafeRow] = { // discard and don't convert values to avoid computation store.getRange(None, None).map(_.key) } @@ -113,7 +113,7 @@ class StreamingAggregationStateManagerImplV1( override def getStateValueSchema: StructType = inputRowAttributes.toStructType - override def get(store: StateStore, key: UnsafeRow): UnsafeRow = { + override def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow = { store.get(key) } @@ -121,11 +121,11 @@ class StreamingAggregationStateManagerImplV1( store.put(getKey(row), row) } - override def iterator(store: StateStore): Iterator[UnsafeRowPair] = { + override def iterator(store: ReadStateStore): Iterator[UnsafeRowPair] = { store.iterator() } - override def values(store: StateStore): Iterator[UnsafeRow] = { + override def values(store: ReadStateStore): Iterator[UnsafeRow] = { store.iterator().map(_.value) } } @@ -167,7 +167,7 @@ class StreamingAggregationStateManagerImplV2( override def getStateValueSchema: StructType = valueExpressions.toStructType - override def get(store: StateStore, key: UnsafeRow): UnsafeRow = { + override def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow = { val savedState = store.get(key) if (savedState == null) { return savedState @@ -182,11 +182,11 @@ class StreamingAggregationStateManagerImplV2( store.put(key, value) } - override def iterator(store: StateStore): Iterator[UnsafeRowPair] = { + override def iterator(store: ReadStateStore): Iterator[UnsafeRowPair] = { store.iterator().map(rowPair => new UnsafeRowPair(rowPair.key, restoreOriginalRow(rowPair))) } - override def values(store: StateStore): Iterator[UnsafeRow] = { + override def values(store: ReadStateStore): Iterator[UnsafeRow] = { store.iterator().map(rowPair => restoreOriginalRow(rowPair)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala index c7a332b6d778e..fa89c506587b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala @@ -82,5 +82,40 @@ package object state { storeCoordinator, extraOptions) } + + /** Map each partition of an RDD along with data in a [[ReadStateStore]]. */ + private[streaming] def mapPartitionsWithReadStateStore[U: ClassTag]( + stateInfo: StatefulOperatorStateInfo, + keySchema: StructType, + valueSchema: StructType, + indexOrdinal: Option[Int], + sessionState: SessionState, + storeCoordinator: Option[StateStoreCoordinatorRef], + extraOptions: Map[String, String] = Map.empty)( + storeReadFn: (ReadStateStore, Iterator[T]) => Iterator[U]) + : ReadStateStoreRDD[T, U] = { + + val cleanedF = dataRDD.sparkContext.clean(storeReadFn) + val wrappedF = (store: ReadStateStore, iter: Iterator[T]) => { + // Clean up the state store. + TaskContext.get().addTaskCompletionListener[Unit](_ => { + store.abort() + }) + cleanedF(store, iter) + } + new ReadStateStoreRDD( + dataRDD, + wrappedF, + stateInfo.checkpointLocation, + stateInfo.queryRunId, + stateInfo.operatorId, + stateInfo.storeVersion, + keySchema, + valueSchema, + indexOrdinal, + sessionState, + storeCoordinator, + extraOptions) + } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala index 497b13793a67b..f5fbe0fc32254 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala @@ -247,7 +247,7 @@ case class StateStoreRestoreExec( override protected def doExecute(): RDD[InternalRow] = { val numOutputRows = longMetric("numOutputRows") - child.execute().mapPartitionsWithStateStore( + child.execute().mapPartitionsWithReadStateStore( getStateInfo, keyExpressions.toStructType, stateManager.getStateValueSchema, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index 9dc6c0a760d7e..c461bbb7e38eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -958,7 +958,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider] // two state stores val provider1 = newStoreProvider(storeId) - val restoreStore = provider1.getStore(1) + val restoreStore = provider1.getReadStore(1) val saveStore = provider1.getStore(1) put(saveStore, key, get(restoreStore, key).get + 1) @@ -1034,7 +1034,7 @@ object StateStoreTestsHelper { store.put(stringToRow(key), intToRow(value)) } - def get(store: StateStore, key: String): Option[Int] = { + def get(store: ReadStateStore, key: String): Option[Int] = { Option(store.get(stringToRow(key))).map(rowToInt) } From 26ea417b1448d679fdc777705ee2f99f4e741ef3 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 5 Nov 2020 09:23:41 -0800 Subject: [PATCH 0396/1009] [SPARK-33362][SQL] skipSchemaResolution should still require query to be resolved ### What changes were proposed in this pull request? Fix a small bug in `V2WriteCommand.resolved`. It should always require the `table` and `query` to be resolved. ### Why are the changes needed? To prevent potential bugs that we skip resolve the input query. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? a new test Closes #30265 from cloud-fan/ds-minor-2. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/Analyzer.scala | 3 ++- .../catalyst/plans/logical/v2Commands.scala | 26 ++++++++++--------- .../analysis/DataSourceV2AnalysisSuite.scala | 9 +++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 69cf30c34d494..f32190bc30df0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1507,7 +1507,8 @@ class Analyzer( g.copy(resolvedSelectedExprs, resolvedGroupingExprs, g.child, resolvedAggExprs) - case o: OverwriteByExpression if !o.outputResolved => + case o: OverwriteByExpression + if !(o.table.resolved && o.query.resolved && o.outputResolved) => // do not resolve expression attributes until the query attributes are resolved against the // table by ResolveOutputRelation. that rule will alias the attributes to the table's names. o diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index a1e26ae1ba2c8..f18aecd19b8d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -35,20 +35,20 @@ trait V2WriteCommand extends Command { override def children: Seq[LogicalPlan] = Seq(query) - override lazy val resolved: Boolean = outputResolved + override lazy val resolved: Boolean = table.resolved && query.resolved && outputResolved def outputResolved: Boolean = { + assert(table.resolved && query.resolved, + "`outputResolved` can only be called when `table` and `query` are both resolved.") // If the table doesn't require schema match, we don't need to resolve the output columns. - table.skipSchemaResolution || { - table.resolved && query.resolved && query.output.size == table.output.size && - query.output.zip(table.output).forall { - case (inAttr, outAttr) => - // names and types must match, nullability must be compatible - inAttr.name == outAttr.name && - DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) && - (outAttr.nullable || !inAttr.nullable) - } - } + table.skipSchemaResolution || (query.output.size == table.output.size && + query.output.zip(table.output).forall { + case (inAttr, outAttr) => + // names and types must match, nullability must be compatible + inAttr.name == outAttr.name && + DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) && + (outAttr.nullable || !inAttr.nullable) + }) } } @@ -86,7 +86,9 @@ case class OverwriteByExpression( query: LogicalPlan, writeOptions: Map[String, String], isByName: Boolean) extends V2WriteCommand { - override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved + override lazy val resolved: Boolean = { + table.resolved && query.resolved && outputResolved && deleteExpr.resolved + } } object OverwriteByExpression { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala index 7a2320f931da3..52dcf63426a7e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala @@ -248,6 +248,15 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan + test("skipSchemaResolution should still require query to be resolved") { + val table = TestRelationAcceptAnySchema(StructType(Seq( + StructField("a", FloatType), + StructField("b", DoubleType))).toAttributes) + val query = UnresolvedRelation(Seq("t")) + val parsedPlan = byName(table, query) + assertNotResolved(parsedPlan) + } + test("byName: basic behavior") { val query = TestRelation(table.schema.toAttributes) From 208b94e4c1e5c500e76c54e8f7a2be6a07ef3f7a Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 5 Nov 2020 09:29:53 -0800 Subject: [PATCH 0397/1009] [SPARK-33353][BUILD] Cache dependencies for Coursier with new sbt in GitHub Actions ### What changes were proposed in this pull request? This PR change the behavior of GitHub Actions job that caches dependencies. SPARK-33226 upgraded sbt to 1.4.1. As of 1.3.0, sbt uses Coursier as the dependency resolver / fetcher. So let's change the dependency cache configuration for the GitHub Actions job. ### Why are the changes needed? To make build faster with Coursier for the GitHub Actions job. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done by GitHub Actions itself. Closes #30259 from sarutak/coursier-cache. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 55c578e15724a..e4762523f7018 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -111,13 +111,13 @@ jobs: key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ matrix.java }}-${{ matrix.hadoop }}-maven- - - name: Cache Ivy local repository + - name: Cache Coursier local repository uses: actions/cache@v2 with: - path: ~/.ivy2/cache - key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + path: ~/.cache/coursier + key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- + ${{ matrix.java }}-${{ matrix.hadoop }}-coursier- - name: Install JDK ${{ matrix.java }} uses: actions/setup-java@v1 with: @@ -206,13 +206,13 @@ jobs: key: pyspark-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | pyspark-maven- - - name: Cache Ivy local repository + - name: Cache Coursier local repository uses: actions/cache@v2 with: - path: ~/.ivy2/cache - key: pyspark-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + path: ~/.cache/coursier + key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} restore-keys: | - pyspark-ivy- + pyspark-coursier- - name: Install Python 3.6 uses: actions/setup-python@v2 with: @@ -282,13 +282,13 @@ jobs: key: sparkr-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | sparkr-maven- - - name: Cache Ivy local repository + - name: Cache Coursier local repository uses: actions/cache@v2 with: - path: ~/.ivy2/cache - key: sparkr-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + path: ~/.cache/coursier + key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} restore-keys: | - sparkr-ivy- + sparkr-coursier- - name: Run tests run: | mkdir -p ~/.m2 @@ -404,13 +404,13 @@ jobs: steps: - name: Checkout Spark repository uses: actions/checkout@v2 - - name: Cache Ivy local repository + - name: Cache Coursier local repository uses: actions/cache@v2 with: - path: ~/.ivy2/cache - key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + path: ~/.cache/coursier + key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} restore-keys: | - scala-213-ivy- + scala-213-coursier- - name: Install Java 11 uses: actions/setup-java@v1 with: From 1a704793f4846610307d18a8bf5e23a3f97525d3 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 5 Nov 2020 10:09:28 -0800 Subject: [PATCH 0398/1009] [SPARK-33290][SQL][DOCS][FOLLOW-UP] Update SQL migration guide ### What changes were proposed in this pull request? Update SQL migration guide for SPARK-33290 ### Why are the changes needed? Make the change better documented. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30256 from sunchao/SPARK-33290-2. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- docs/sql-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index fdc764a93424b..55618308c300a 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -52,6 +52,8 @@ license: | - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. + - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached. + ## Upgrading from Spark SQL 3.0 to 3.0.1 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. From 324275ae8350ec15844ce384f40f1ecc4acdc072 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Thu, 5 Nov 2020 12:38:42 -0600 Subject: [PATCH 0399/1009] [SPARK-33185][YARN] Set up yarn.Client to print direct links to driver stdout/stderr ### What changes were proposed in this pull request? Currently when run in `cluster` mode on YARN, the Spark `yarn.Client` will print out the application report into the logs, to be easily viewed by users. For example: ``` INFO yarn.Client: client token: Token { kind: YARN_CLIENT_TOKEN, service: } diagnostics: N/A ApplicationMaster host: X.X.X.X ApplicationMaster RPC port: 0 queue: default start time: 1602782566027 final status: UNDEFINED tracking URL: http://hostname:8888/proxy/application_/ user: xkrogen ``` I propose adding, alongside the application report, some additional lines like: ``` Driver Logs (stdout): http://hostname:8042/node/containerlogs/container_/xkrogen/stdout?start=-4096 Driver Logs (stderr): http://hostname:8042/node/containerlogs/container_/xkrogen/stderr?start=-4096 ``` This information isn't contained in the `ApplicationReport`, so it's necessary to query the ResourceManager REST API. For now I have added this as an always-on feature, but if there is any concern about adding this REST dependency, I think hiding this feature behind an off-by-default flag is reasonable. ### Why are the changes needed? Typically, the tracking URL can be used to find the logs of the ApplicationMaster/driver while the application is running. Later, the Spark History Server can be used to track this information down, using the stdout/stderr links on the Executors page. However, in the situation when the driver crashed _before_ writing out a history file, the SHS may not be aware of this application, and thus does not contain links to the driver logs. When this situation arises, it can be difficult for users to debug further, since they can't easily find their driver logs. It is possible to reach the logs by using the `yarn logs` commands, but the average Spark user isn't aware of this and shouldn't have to be. With this information readily available in the logs, users can quickly jump to their driver logs, even if it crashed before the SHS became aware of the application. This has the additional benefit of providing a quick way to access driver logs, which often contain useful information, in a single click (instead of navigating through the Spark UI). ### Does this PR introduce _any_ user-facing change? Yes, some additional print statements will be created in the application report when using YARN in cluster mode. ### How was this patch tested? Added unit tests for the parsing logic in `yarn.ClientSuite`. Also tested against a live cluster. When the driver is running: ``` INFO Client: Application report for application_XXXXXXXXX_YYYYYY (state: RUNNING) INFO Client: client token: Token { kind: YARN_CLIENT_TOKEN, service: } diagnostics: N/A ApplicationMaster host: host.example.com ApplicationMaster RPC port: ###### queue: queue_name start time: 1604529046091 final status: UNDEFINED tracking URL: http://host.example.com:8080/proxy/application_XXXXXXXXX_YYYYYY/ user: xkrogen Driver Logs (stdout): http://host.example.com:8042/node/containerlogs/container_e07_XXXXXXXXX_YYYYYY_01_000001/xkrogen/stdout?start=-4096 Driver Logs (stderr): http://host.example.com:8042/node/containerlogs/container_e07_XXXXXXXXX_YYYYYY_01_000001/xkrogen/stderr?start=-4096 INFO Client: Application report for application_XXXXXXXXX_YYYYYY (state: RUNNING) ``` I confirmed that when the driver has not yet launched, the report does not include the two Driver Logs items. Will omit the output here for brevity since it looks the same. Closes #30096 from xkrogen/xkrogen-SPARK-33185-yarn-client-print. Authored-by: Erik Krogen Signed-off-by: Mridul Muralidharan gmail.com> --- .../org/apache/spark/deploy/yarn/Client.scala | 73 +++++++++++++++++-- .../org/apache/spark/deploy/yarn/config.scala | 9 +++ .../spark/util/YarnContainerInfoHelper.scala | 14 +++- .../spark/deploy/yarn/ClientSuite.scala | 47 ++++++++++++ 4 files changed, 134 insertions(+), 9 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 517a4af2e4b02..30ca4a6615fe8 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -25,11 +25,16 @@ import java.util.{Locale, Properties, UUID} import java.util.zip.{ZipEntry, ZipOutputStream} import scala.collection.JavaConverters._ +import scala.collection.immutable.{Map => IMap} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map} import scala.util.control.NonFatal +import com.fasterxml.jackson.databind.ObjectMapper import com.google.common.base.Objects import com.google.common.io.Files +import javax.ws.rs.client.ClientBuilder +import javax.ws.rs.core.MediaType +import javax.ws.rs.core.Response.Status.Family import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -46,6 +51,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException import org.apache.hadoop.yarn.security.AMRMTokenIdentifier import org.apache.hadoop.yarn.util.Records +import org.apache.hadoop.yarn.webapp.util.WebAppUtils import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.api.python.PythonUtils @@ -58,7 +64,7 @@ import org.apache.spark.internal.config._ import org.apache.spark.internal.config.Python._ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils} import org.apache.spark.rpc.RpcEnv -import org.apache.spark.util.{CallerContext, Utils} +import org.apache.spark.util.{CallerContext, Utils, YarnContainerInfoHelper} private[spark] class Client( val args: ClientArguments, @@ -1080,9 +1086,9 @@ private[spark] class Client( // If DEBUG is enabled, log report details every iteration // Otherwise, log them every time the application changes state if (log.isDebugEnabled) { - logDebug(formatReportDetails(report)) + logDebug(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) } else if (lastState != state) { - logInfo(formatReportDetails(report)) + logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) } } @@ -1152,7 +1158,17 @@ private[spark] class Client( appMaster } - private def formatReportDetails(report: ApplicationReport): String = { + /** + * Format an application report and optionally, links to driver logs, in a human-friendly manner. + * + * @param report The application report from YARN. + * @param driverLogsLinks A map of driver log files and their links. Keys are the file names + * (e.g. `stdout`), and values are the links. If empty, nothing will be + * printed. + * @return Human-readable version of the input data. + */ + private def formatReportDetails(report: ApplicationReport, + driverLogsLinks: IMap[String, String]): String = { val details = Seq[(String, String)]( ("client token", getClientToken(report)), ("diagnostics", report.getDiagnostics), @@ -1163,7 +1179,7 @@ private[spark] class Client( ("final status", report.getFinalApplicationStatus.toString), ("tracking URL", report.getTrackingUrl), ("user", report.getUser) - ) + ) ++ driverLogsLinks.map { case (fname, link) => (s"Driver Logs ($fname)", link) } // Use more loggable format if value is null or empty details.map { case (k, v) => @@ -1172,6 +1188,37 @@ private[spark] class Client( }.mkString("") } + /** + * Fetch links to the logs of the driver for the given application ID. This requires hitting the + * RM REST API. Returns an empty map if the links could not be fetched. If this feature is + * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately. + */ + private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = { + if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) { + return IMap() + } + try { + val baseRmUrl = WebAppUtils.getRMWebAppURLWithScheme(hadoopConf) + val response = ClientBuilder.newClient() + .target(baseRmUrl) + .path("ws").path("v1").path("cluster").path("apps") + .path(appId.toString).path("appattempts") + .request(MediaType.APPLICATION_JSON) + .get() + response.getStatusInfo.getFamily match { + case Family.SUCCESSFUL => parseAppAttemptsJsonResponse(response.readEntity(classOf[String])) + case _ => + logWarning(s"Unable to fetch app attempts info from $baseRmUrl, got " + + s"status code ${response.getStatus}: ${response.getStatusInfo.getReasonPhrase}") + IMap() + } + } catch { + case e: Exception => + logWarning(s"Unable to get driver log links for $appId", e) + IMap() + } + } + /** * Submit an application to the ResourceManager. * If set spark.yarn.submit.waitAppCompletion to true, it will stay alive @@ -1186,7 +1233,7 @@ private[spark] class Client( val report = getApplicationReport(appId) val state = report.getYarnApplicationState logInfo(s"Application report for $appId (state: $state)") - logInfo(formatReportDetails(report)) + logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) { throw new SparkException(s"Application $appId finished with status: $state") } @@ -1577,6 +1624,20 @@ private object Client extends Logging { writer.flush() out.closeEntry() } + + private[yarn] def parseAppAttemptsJsonResponse(jsonString: String): IMap[String, String] = { + val objectMapper = new ObjectMapper() + // If JSON response is malformed somewhere along the way, MissingNode will be returned, + // which allows for safe continuation of chaining. The `elements()` call will be empty, + // and None will get returned. + objectMapper.readTree(jsonString) + .path("appAttempts").path("appAttempt") + .elements().asScala.toList.takeRight(1).headOption + .map(_.path("logsLink").asText("")) + .filterNot(_ == "") + .map(baseUrl => YarnContainerInfoHelper.getLogUrlsFromBaseUrl(baseUrl)) + .getOrElse(IMap()) + } } private[spark] class YarnClusterApplication extends SparkApplication { diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala index f2e838f6270c9..89a4af2d2a741 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala @@ -189,6 +189,15 @@ package object config extends Logging { .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") + private[spark] val CLIENT_INCLUDE_DRIVER_LOGS_LINK = + ConfigBuilder("spark.yarn.includeDriverLogsLink") + .doc("In cluster mode, whether the client application report includes links to the driver " + + "container's logs. This requires polling the ResourceManager's REST API, so it " + + "places some additional load on the RM.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + /* Shared Client-mode AM / Driver configuration. */ private[spark] val AM_MAX_WAIT_TIME = ConfigBuilder("spark.yarn.am.waitTime") diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala index 5e39422e868b7..854fe18c22430 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala @@ -28,6 +28,16 @@ import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.internal.Logging private[spark] object YarnContainerInfoHelper extends Logging { + + private[this] val DRIVER_LOG_FILE_NAMES = Seq("stdout", "stderr") + private[this] val DRIVER_LOG_START_OFFSET = -4096 + + def getLogUrlsFromBaseUrl(baseUrl: String): Map[String, String] = { + DRIVER_LOG_FILE_NAMES.map { fname => + fname -> s"$baseUrl/$fname?start=$DRIVER_LOG_START_OFFSET" + }.toMap + } + def getLogUrls( conf: Configuration, container: Option[Container]): Option[Map[String, String]] = { @@ -42,9 +52,7 @@ private[spark] object YarnContainerInfoHelper extends Logging { val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") - Some(Map( - "stdout" -> s"$baseUrl/stdout?start=-4096", - "stderr" -> s"$baseUrl/stderr?start=-4096")) + Some(getLogUrlsFromBaseUrl(baseUrl)) } catch { case e: Exception => logInfo("Error while building executor logs - executor logs will not be available", e) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index ea3acec3bb78b..fccb2406d66f8 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -583,6 +583,53 @@ class ClientSuite extends SparkFunSuite with Matchers { } } + test("SPARK-33185 Parse YARN AppAttempts valid JSON response") { + val appIdSuffix = "1500000000000_1234567" + val containerId = s"container_e1_${appIdSuffix}_01_000001" + val nodeHost = "node.example.com" + val jsonString = + s""" + |{"appAttempts": { + | "appAttempt": [ { + | "id":1, + | "startTime":1600000000000, + | "finishedTime":1600000100000, + | "containerId":"$containerId", + | "nodeHttpAddress":"$nodeHost:8042", + | "nodeId":"node.example.com:8041", + | "logsLink":"http://$nodeHost:8042/node/containerlogs/$containerId/username", + | "blacklistedNodes":"", + | "nodesBlacklistedBySystem":"", + | "appAttemptId":"appattempt_${appIdSuffix}_000001" + | }] + |}} + |""".stripMargin + val logLinkMap = Client.parseAppAttemptsJsonResponse(jsonString) + assert(logLinkMap.keySet === Set("stdout", "stderr")) + assert(logLinkMap("stdout") === + s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stdout?start=-4096") + assert(logLinkMap("stderr") === + s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stderr?start=-4096") + } + + test("SPARK-33185 Parse YARN AppAttempts invalid JSON response") { + // No "appAttempt" present + assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { } }""") === Map()) + + // "appAttempt" is empty + assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { "appAttempt": [ ] } }""") + === Map()) + + // logsLink is missing + assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"id":1}]}}""") + === Map()) + + // logsLink is present but empty + assert( + Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"logsLink":""}]}}""") + === Map()) + } + private val matching = Seq( ("files URI match test1", "file:///file1", "file:///file2"), ("files URI match test2", "file:///c:file1", "file://c:file2"), From cd4e3d3b0c7b1ec645ec9c3b2a1847ce29a65765 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 5 Nov 2020 15:44:04 -0800 Subject: [PATCH 0400/1009] [SPARK-33360][SQL] Simplify DS v2 write resolution ### What changes were proposed in this pull request? Removing duplicated code in `ResolveOutputRelation`, by adding `V2WriteCommand.withNewQuery` ### Why are the changes needed? code cleanup ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #30264 from cloud-fan/ds-minor. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/Analyzer.scala | 39 ++------- .../catalyst/plans/logical/v2Commands.scala | 16 +++- .../analysis/DataSourceV2AnalysisSuite.scala | 62 +++++++++++++ .../spark/sql/DataFrameWriterV2Suite.scala | 86 +------------------ 4 files changed, 86 insertions(+), 117 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index f32190bc30df0..c4e4ffb98fb25 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3046,40 +3046,15 @@ class Analyzer( */ object ResolveOutputRelation extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { - case append @ AppendData(table, query, _, isByName) - if table.resolved && query.resolved && !append.outputResolved => + case v2Write: V2WriteCommand + if v2Write.table.resolved && v2Write.query.resolved && !v2Write.outputResolved => validateStoreAssignmentPolicy() - val projection = - TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf) - - if (projection != query) { - append.copy(query = projection) - } else { - append - } - - case overwrite @ OverwriteByExpression(table, _, query, _, isByName) - if table.resolved && query.resolved && !overwrite.outputResolved => - validateStoreAssignmentPolicy() - val projection = - TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf) - - if (projection != query) { - overwrite.copy(query = projection) - } else { - overwrite - } - - case overwrite @ OverwritePartitionsDynamic(table, query, _, isByName) - if table.resolved && query.resolved && !overwrite.outputResolved => - validateStoreAssignmentPolicy() - val projection = - TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf) - - if (projection != query) { - overwrite.copy(query = projection) + val projection = TableOutputResolver.resolveOutputColumns( + v2Write.table.name, v2Write.table.output, v2Write.query, v2Write.isByName, conf) + if (projection != v2Write.query) { + v2Write.withNewQuery(projection) } else { - overwrite + v2Write } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index f18aecd19b8d8..fb8a9be80385b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, Struct trait V2WriteCommand extends Command { def table: NamedRelation def query: LogicalPlan + def isByName: Boolean override def children: Seq[LogicalPlan] = Seq(query) @@ -50,6 +51,8 @@ trait V2WriteCommand extends Command { (outAttr.nullable || !inAttr.nullable) }) } + + def withNewQuery(newQuery: LogicalPlan): V2WriteCommand } /** @@ -59,7 +62,9 @@ case class AppendData( table: NamedRelation, query: LogicalPlan, writeOptions: Map[String, String], - isByName: Boolean) extends V2WriteCommand + isByName: Boolean) extends V2WriteCommand { + override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery) +} object AppendData { def byName( @@ -89,6 +94,9 @@ case class OverwriteByExpression( override lazy val resolved: Boolean = { table.resolved && query.resolved && outputResolved && deleteExpr.resolved } + override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = { + copy(query = newQuery) + } } object OverwriteByExpression { @@ -116,7 +124,11 @@ case class OverwritePartitionsDynamic( table: NamedRelation, query: LogicalPlan, writeOptions: Map[String, String], - isByName: Boolean) extends V2WriteCommand + isByName: Boolean) extends V2WriteCommand { + override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = { + copy(query = newQuery) + } +} object OverwritePartitionsDynamic { def byName( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala index 52dcf63426a7e..ba926f842551f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala @@ -248,6 +248,68 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan + test("SPARK-33136: output resolved on complex types for V2 write commands") { + def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = { + val table = TestRelation(StructType(Seq(StructField("a", toType))).toAttributes) + val query = TestRelation(StructType(Seq(StructField("a", fromType))).toAttributes) + val parsedPlan = byName(table, query) + assertResolved(parsedPlan) + checkAnalysis(parsedPlan, parsedPlan) + } + + // The major difference between `from` and `to` is that `from` is a complex type + // with non-nullable, whereas `to` is same data type with flipping nullable. + + // nested struct type + val fromStructType = StructType(Array( + StructField("s", StringType), + StructField("i_nonnull", IntegerType, nullable = false), + StructField("st", StructType(Array( + StructField("l", LongType), + StructField("s_nonnull", StringType, nullable = false)))))) + + val toStructType = StructType(Array( + StructField("s", StringType), + StructField("i_nonnull", IntegerType), + StructField("st", StructType(Array( + StructField("l", LongType), + StructField("s_nonnull", StringType)))))) + + assertTypeCompatibility("struct", fromStructType, toStructType) + + // array type + assertTypeCompatibility("array", ArrayType(LongType, containsNull = false), + ArrayType(LongType, containsNull = true)) + + // array type with struct type + val fromArrayWithStructType = ArrayType( + StructType(Array(StructField("s", StringType, nullable = false))), + containsNull = false) + + val toArrayWithStructType = ArrayType( + StructType(Array(StructField("s", StringType))), + containsNull = true) + + assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType) + + // map type + assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false), + MapType(IntegerType, StringType, valueContainsNull = true)) + + // map type with struct type + val fromMapWithStructType = MapType( + IntegerType, + StructType(Array(StructField("s", StringType, nullable = false))), + valueContainsNull = false) + + val toMapWithStructType = MapType( + IntegerType, + StructType(Array(StructField("s", StringType))), + valueContainsNull = true) + + assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType) + } + test("skipSchemaResolution should still require query to be resolved") { val table = TestRelationAcceptAnySchema(StructType(Seq( StructField("a", FloatType), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala index ff5c6242987de..8720c1f620564 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -23,15 +23,15 @@ import scala.collection.JavaConverters._ import org.scalatest.BeforeAndAfter -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamedRelation, NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, V2WriteCommand} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog} import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType} +import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType} import org.apache.spark.sql.util.QueryExecutionListener import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -100,86 +100,6 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo assert(v2.catalog.exists(_ == catalogPlugin)) } - case class FakeV2WriteCommand(table: NamedRelation, query: LogicalPlan) extends V2WriteCommand - - test("SPARK-33136 output resolved on complex types for V2 write commands") { - val tableCatalog = catalog("testcat") - - def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = { - val fromTableName = s"from_table_$name" - tableCatalog.createTable( - Identifier.of(Array(), fromTableName), - StructType(Array(StructField("col", fromType))), - Array.empty, - new java.util.HashMap[String, String]()) - - val toTable = tableCatalog.createTable( - Identifier.of(Array(), s"to_table_$name"), - StructType(Array(StructField("col", toType))), - Array.empty, - new java.util.HashMap[String, String]()) - - val df = spark.table(s"testcat.$fromTableName") - - val relation = DataSourceV2Relation.create(toTable, Some(tableCatalog), None) - val writeCommand = FakeV2WriteCommand(relation, df.queryExecution.analyzed) - - assert(writeCommand.outputResolved, s"Unable to write from type $fromType to type $toType.") - } - - // The major difference between `from` and `to` is that `from` is a complex type - // with non-nullable, whereas `to` is same data type with flipping nullable. - - // nested struct type - val fromStructType = StructType(Array( - StructField("s", StringType), - StructField("i_nonnull", IntegerType, nullable = false), - StructField("st", StructType(Array( - StructField("l", LongType), - StructField("s_nonnull", StringType, nullable = false)))))) - - val toStructType = StructType(Array( - StructField("s", StringType), - StructField("i_nonnull", IntegerType), - StructField("st", StructType(Array( - StructField("l", LongType), - StructField("s_nonnull", StringType)))))) - - assertTypeCompatibility("struct", fromStructType, toStructType) - - // array type - assertTypeCompatibility("array", ArrayType(LongType, containsNull = false), - ArrayType(LongType, containsNull = true)) - - // array type with struct type - val fromArrayWithStructType = ArrayType( - StructType(Array(StructField("s", StringType, nullable = false))), - containsNull = false) - - val toArrayWithStructType = ArrayType( - StructType(Array(StructField("s", StringType))), - containsNull = true) - - assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType) - - // map type - assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false), - MapType(IntegerType, StringType, valueContainsNull = true)) - - // map type with struct type - val fromMapWithStructType = MapType( - IntegerType, - StructType(Array(StructField("s", StringType, nullable = false))), - valueContainsNull = false) - - val toMapWithStructType = MapType( - IntegerType, - StructType(Array(StructField("s", StringType))), - valueContainsNull = true) - - assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType) - } - test("Append: basic append") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") From 4941b7ae18d4081233953cc11328645d0b4cf208 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Thu, 5 Nov 2020 17:37:44 -0800 Subject: [PATCH 0401/1009] [SPARK-33365][BUILD] Update SBT to 1.4.2 ### What changes were proposed in this pull request? This PR aims to update SBT from 1.4.1 to 1.4.2. ### Why are the changes needed? This will bring the latest bug fixes. - https://github.com/sbt/sbt/releases/tag/v1.4.2 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30268 from williamhyun/sbt. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index d70d98448e4ca..5ec1d700fd2a8 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.4.1 +sbt.version=1.4.2 From 90f35c663e4118b7a716e614f37b8d888d0d6bd6 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 6 Nov 2020 12:46:26 +0900 Subject: [PATCH 0402/1009] [MINOR][SQL] Fix incorrect JIRA ID comments in Analyzer ### What changes were proposed in this pull request? This PR fixes incorrect JIRA ids in `Analyzer.scala` introduced by SPARK-31670 (https://github.com/apache/spark/pull/28490) ```scala - // SPARK-31607: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations + // SPARK-31670: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations ``` ### Why are the changes needed? Fix the wrong information. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This is a comment change. Manually review. Closes #30269 from dongjoon-hyun/SPARK-31670-MINOR. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c4e4ffb98fb25..f0143fdb23473 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1463,7 +1463,7 @@ class Analyzer( // rule: ResolveDeserializer. case plan if containsDeserializer(plan.expressions) => plan - // SPARK-31607: Resolve Struct field in groupByExpressions and aggregateExpressions + // SPARK-31670: Resolve Struct field in groupByExpressions and aggregateExpressions // with CUBE/ROLLUP will be wrapped with alias like Alias(GetStructField, name) with // different ExprId. This cause aggregateExpressions can't be replaced by expanded // groupByExpressions in `ResolveGroupingAnalytics.constructAggregateExprs()`, we trim @@ -1487,7 +1487,7 @@ class Analyzer( a.copy(resolvedGroupingExprs, resolvedAggExprs, a.child) - // SPARK-31607: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations + // SPARK-31670: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations // will be wrapped with alias like Alias(GetStructField, name) with different ExprId. // This cause aggregateExpressions can't be replaced by expanded groupByExpressions in // `ResolveGroupingAnalytics.constructAggregateExprs()`, we trim unnecessary alias From d16311051d4c67b65116ed182c87f96656b63333 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 6 Nov 2020 05:20:25 +0000 Subject: [PATCH 0403/1009] [SPARK-32934][SQL][FOLLOW-UP] Refine class naming and code comments ### What changes were proposed in this pull request? 1. Rename `OffsetWindowSpec` to `OffsetWindowFunction`, as it's the base class for all offset based window functions. 2. Refine and add more comments. 3. Remove `isRelative` as it's useless. ### Why are the changes needed? code refinement ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #30261 from cloud-fan/window. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../expressions/windowExpressions.scala | 24 +++++++------------ .../sql/execution/window/WindowExec.scala | 10 ++++---- .../sql/execution/window/WindowExecBase.scala | 17 ++++++------- .../window/WindowFunctionFrame.scala | 10 ++++---- 5 files changed, 30 insertions(+), 35 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index d261f26072bcc..ac91fa0b5811e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -166,10 +166,10 @@ trait CheckAnalysis extends PredicateHelper { case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) => failAnalysis(s"Distinct window functions are not supported: $w") - case w @ WindowExpression(_: FrameLessOffsetWindowFunction, + case w @ WindowExpression(wf: FrameLessOffsetWindowFunction, WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame)) if order.isEmpty || !frame.isOffset => - failAnalysis("An offset window function can only be evaluated in an ordered " + + failAnalysis(s"${wf.prettyName} function can only be evaluated in an ordered " + s"row-based window frame with a single offset: $w") case w @ WindowExpression(e, s) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 1a57afa8d9aae..b6dd817794723 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -327,7 +327,7 @@ object WindowFunctionType { } } -trait OffsetWindowSpec extends Expression { +trait OffsetWindowFunction extends WindowFunction { /** * Input expression to evaluate against a row which a number of rows below or above (depending on * the value and sign of the offset) the starting row (current row if isRelative=true, or the @@ -356,23 +356,21 @@ trait OffsetWindowSpec extends Expression { val ignoreNulls: Boolean /** - * Whether the offset is starts with the current row. If `isRelative` is true, `offset` means - * the offset is start with the current row. otherwise, the offset is starts with the first - * row of the entire window frame. + * A fake window frame which is used to hold the offset information. It's used as a key to group + * by offset window functions in `WindowExecBase.windowFrameExpressionFactoryPairs`, as offset + * window functions with the same offset and same window frame can be evaluated together. */ - val isRelative: Boolean - lazy val fakeFrame = SpecifiedWindowFrame(RowFrame, offset, offset) } /** * A frameless offset window function is a window function that cannot specify window frame and - * returns the value of the input column offset by a number of rows within the partition. - * For instance: a FrameLessOffsetWindowFunction for value x with offset -2, will get the value of - * x 2 rows back in the partition. + * returns the value of the input column offset by a number of rows according to the current row + * within the partition. For instance: a FrameLessOffsetWindowFunction for value x with offset -2, + * will get the value of x 2 rows back from the current row in the partition. */ abstract class FrameLessOffsetWindowFunction - extends WindowFunction with OffsetWindowSpec with Unevaluable with ImplicitCastInputTypes { + extends OffsetWindowFunction with Unevaluable with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(input, offset, default) @@ -391,8 +389,6 @@ abstract class FrameLessOffsetWindowFunction override val ignoreNulls = false - override val isRelative = true - override lazy val frame: WindowFrame = fakeFrame override def checkInputDataTypes(): TypeCheckResult = { @@ -630,14 +626,12 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean) - extends AggregateWindowFunction with OffsetWindowSpec with ImplicitCastInputTypes { + extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes { def this(child: Expression, offset: Expression) = this(child, offset, false) override lazy val default = Literal.create(null, input.dataType) - override val isRelative = false - override def children: Seq[Expression] = input :: offset :: Nil override val frame: WindowFrame = UnspecifiedFrame diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala index 439c31a47fd3b..b693cae824bf9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala @@ -57,12 +57,10 @@ import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, * 3. CURRENT ROW AND 1 FOLLOWING * 4. 1 PRECEDING AND 1 FOLLOWING * 5. 1 FOLLOWING AND 2 FOLLOWING - * - Offset frame: The frame consist of one row, which is an offset number of rows. There are three - * implement of offset frame. - * 1. [[FrameLessOffsetWindowFunction]] returns the value of the input column offset by a number - * of rows according to the current row. - * 2. [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]] - * returns the value of the input column offset by a number of rows within the frame. + * - Offset frame: The frame consist of one row, which is an offset number of rows away from the + * current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame. There are + * three implements of offset frame: [[FrameLessOffsetWindowFunctionFrame]], + * [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]]. * * Different frame boundaries can be used in Growing, Shrinking and Moving frames. A frame * boundary can be either Row or Range based: diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index f0b99c1522aa1..a6a3f3d7384bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -136,8 +136,9 @@ trait WindowExecBase extends UnaryExecNode { val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame] function match { case AggregateExpression(f, _, _, _, _) => collect("AGGREGATE", frame, e, f) - case f: FrameLessOffsetWindowFunction => collect("FRAME_LESS_OFFSET", frame, e, f) - case f: OffsetWindowSpec if !f.ignoreNulls && + case f: FrameLessOffsetWindowFunction => + collect("FRAME_LESS_OFFSET", f.fakeFrame, e, f) + case f: OffsetWindowFunction if !f.ignoreNulls && frame.frameType == RowFrame && frame.lower == UnboundedPreceding => frame.upper match { case UnboundedFollowing => collect("UNBOUNDED_OFFSET", f.fakeFrame, e, f) @@ -184,8 +185,8 @@ trait WindowExecBase extends UnaryExecNode { new FrameLessOffsetWindowFunctionFrame( target, ordinal, - // OFFSET frame functions are guaranteed be OffsetWindowSpec. - functions.map(_.asInstanceOf[OffsetWindowSpec]), + // OFFSET frame functions are guaranteed be OffsetWindowFunction. + functions.map(_.asInstanceOf[OffsetWindowFunction]), child.output, (expressions, schema) => MutableProjection.create(expressions, schema), @@ -195,8 +196,8 @@ trait WindowExecBase extends UnaryExecNode { new UnboundedOffsetWindowFunctionFrame( target, ordinal, - // OFFSET frame functions are guaranteed be OffsetWindowSpec. - functions.map(_.asInstanceOf[OffsetWindowSpec]), + // OFFSET frame functions are guaranteed be OffsetWindowFunction. + functions.map(_.asInstanceOf[OffsetWindowFunction]), child.output, (expressions, schema) => MutableProjection.create(expressions, schema), @@ -207,8 +208,8 @@ trait WindowExecBase extends UnaryExecNode { new UnboundedPrecedingOffsetWindowFunctionFrame( target, ordinal, - // OFFSET frame functions are guaranteed be OffsetWindowSpec. - functions.map(_.asInstanceOf[OffsetWindowSpec]), + // OFFSET frame functions are guaranteed be OffsetWindowFunction. + functions.map(_.asInstanceOf[OffsetWindowFunction]), child.output, (expressions, schema) => MutableProjection.create(expressions, schema), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala index e8a83f9772d35..2a4b957c35426 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala @@ -80,7 +80,7 @@ object WindowFunctionFrame { abstract class OffsetWindowFunctionFrameBase( target: InternalRow, ordinal: Int, - expressions: Array[OffsetWindowSpec], + expressions: Array[OffsetWindowFunction], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, offset: Int) @@ -144,7 +144,7 @@ abstract class OffsetWindowFunctionFrameBase( class FrameLessOffsetWindowFunctionFrame( target: InternalRow, ordinal: Int, - expressions: Array[OffsetWindowSpec], + expressions: Array[OffsetWindowFunction], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, offset: Int) @@ -187,12 +187,13 @@ class FrameLessOffsetWindowFunctionFrame( class UnboundedOffsetWindowFunctionFrame( target: InternalRow, ordinal: Int, - expressions: Array[OffsetWindowSpec], + expressions: Array[OffsetWindowFunction], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, offset: Int) extends OffsetWindowFunctionFrameBase( target, ordinal, expressions, inputSchema, newMutableProjection, offset) { + assert(offset > 0) override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { input = rows @@ -230,12 +231,13 @@ class UnboundedOffsetWindowFunctionFrame( class UnboundedPrecedingOffsetWindowFunctionFrame( target: InternalRow, ordinal: Int, - expressions: Array[OffsetWindowSpec], + expressions: Array[OffsetWindowFunction], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, offset: Int) extends OffsetWindowFunctionFrameBase( target, ordinal, expressions, inputSchema, newMutableProjection, offset) { + assert(offset > 0) var selectedRow: UnsafeRow = null From f6c00079709b6dcda72b08d3e9865ca6b49f8b74 Mon Sep 17 00:00:00 2001 From: neko Date: Fri, 6 Nov 2020 13:45:02 +0800 Subject: [PATCH 0404/1009] [SPARK-33342][WEBUI] fix the wrong url and display name of blocking thread in threadDump page ### What changes were proposed in this pull request? fix the wrong url and display name of blocking thread in threadDump page. The blockingThreadId variable passed to the page should be of string type instead of Option type. ### Why are the changes needed? blocking threadId in the ui page is not displayed well, and the corresponding url cannot be redirected normally ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? The pr only involves minor changes to the page and does not affect other functions, The manual test results are as follows. The thread name displayed on the page is correct, and you can click on the URL to jump to the corresponding url ![shows_ok](https://user-images.githubusercontent.com/52202080/98108177-89488d00-1ed6-11eb-9488-8446c3f38bad.gif) Closes #30249 from akiyamaneko/thread-dump-improve. Authored-by: neko Signed-off-by: Gengliang Wang --- .../org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala index 2c7aeeabb3601..c3246dc90976c 100644 --- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala @@ -41,10 +41,10 @@ private[ui] class ExecutorThreadDumpPage( val dumpRows = threadDump.map { thread => val threadId = thread.threadId val blockedBy = thread.blockedByThreadId match { - case Some(_) => + case Some(blockingThreadId) => case None => Text("") } From 733a468726849ba17ab27bd20895f253590fedcb Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Fri, 6 Nov 2020 05:46:38 +0000 Subject: [PATCH 0405/1009] [SPARK-33130][SQL] Support ALTER TABLE in JDBC v2 Table Catalog: add, update type and nullability of columns (MsSqlServer dialect) ### What changes were proposed in this pull request? Override the default SQL strings for: ALTER TABLE RENAME COLUMN ALTER TABLE UPDATE COLUMN NULLABILITY in the following MsSQLServer JDBC dialect according to official documentation. Write MsSqlServer integration tests for JDBC. ### Why are the changes needed? To add the support for alter table when interacting with MSSql Server. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? added tests Closes #30038 from ScrapCodes/mssql-dialect. Authored-by: Prashant Sharma Signed-off-by: Wenchen Fan --- .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 90 +++++++++++++++++++ .../spark/sql/jdbc/MsSqlServerDialect.scala | 38 ++++++++ 2 files changed, 128 insertions(+) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala new file mode 100644 index 0000000000000..905e32aaa918e --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.{Connection, SQLFeatureNotSupportedException} + +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.types._ +import org.apache.spark.tags.DockerTest + +/** + * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): + * {{{ + * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 + * ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite" + * }}} + */ +@DockerTest +class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { + + override val catalogName: String = "mssql" + + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME", + "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04") + override val env = Map( + "SA_PASSWORD" -> "Sapass123", + "ACCEPT_EULA" -> "Y" + ) + override val usesIpc = false + override val jdbcPort: Int = 1433 + + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;" + } + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.mssql", classOf[JDBCTableCatalog].getName) + .set("spark.sql.catalog.mssql.url", db.getJdbcUrl(dockerIp, externalPort)) + + override val connectionTimeout = timeout(7.minutes) + + override def dataPreparation(conn: Connection): Unit = {} + + override def testUpdateColumnType(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING") + t = spark.table(tbl) + expectedSchema = new StructType().add("ID", StringType) + assert(t.schema === expectedSchema) + // Update column type from STRING to INTEGER + val msg1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER") + }.getMessage + assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int")) + } + + override def testUpdateColumnNullability(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _") + // Update nullability is unsupported for mssql db. + val msg = intercept[AnalysisException] { + sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL") + }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage + + assert(msg.contains("UpdateColumnNullability is not supported")) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index 1c6e8c359aa15..dc39a10987c91 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.jdbc +import java.sql.SQLFeatureNotSupportedException import java.util.Locale import org.apache.spark.sql.internal.SQLConf @@ -64,4 +65,41 @@ private object MsSqlServerDialect extends JdbcDialect { override def renameTable(oldTable: String, newTable: String): String = { s"EXEC sp_rename $oldTable, $newTable" } + + // scalastyle:off line.size.limit + // see https://docs.microsoft.com/en-us/sql/relational-databases/tables/add-columns-to-a-table-database-engine?view=sql-server-ver15 + // scalastyle:on line.size.limit + override def getAddColumnQuery( + tableName: String, + columnName: String, + dataType: String): String = { + s"ALTER TABLE $tableName ADD ${quoteIdentifier(columnName)} $dataType" + } + + // scalastyle:off line.size.limit + // See https://docs.microsoft.com/en-us/sql/relational-databases/system-stored-procedures/sp-rename-transact-sql?view=sql-server-ver15 + // scalastyle:on line.size.limit + override def getRenameColumnQuery( + tableName: String, + columnName: String, + newName: String, + dbMajorVersion: Int): String = { + s"EXEC sp_rename '$tableName.${quoteIdentifier(columnName)}'," + + s" ${quoteIdentifier(newName)}, 'COLUMN'" + } + + // scalastyle:off line.size.limit + // see https://docs.microsoft.com/en-us/sql/t-sql/statements/alter-table-transact-sql?view=sql-server-ver15 + // scalastyle:on line.size.limit + // require to have column data type to change the column nullability + // ALTER TABLE tbl_name ALTER COLUMN col_name datatype [NULL | NOT NULL] + // column_definition: + // data_type [NOT NULL | NULL] + // We don't have column data type here, so we throw Exception for now + override def getUpdateColumnNullabilityQuery( + tableName: String, + columnName: String, + isNullable: Boolean): String = { + throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported") + } } From 68c032c246bb091b25d80e436b9288cca9245265 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Thu, 5 Nov 2020 22:00:45 -0800 Subject: [PATCH 0406/1009] [SPARK-33364][SQL] Introduce the "purge" option in TableCatalog.dropTable for v2 catalog ### What changes were proposed in this pull request? This PR proposes to introduce the `purge` option in `TableCatalog.dropTable` so that v2 catalogs can use the option if needed. Related discussion: https://github.com/apache/spark/pull/30079#discussion_r510594110 ### Why are the changes needed? Spark DDL supports passing the purge option to `DROP TABLE` command. However, the option is not used (ignored) for v2 catalogs. ### Does this PR introduce _any_ user-facing change? This PR introduces a new API in `TableCatalog`. ### How was this patch tested? Added a test. Closes #30267 from imback82/purge_table. Authored-by: Terry Kim Signed-off-by: Dongjoon Hyun --- .../sql/connector/catalog/TableCatalog.java | 23 +++++++++++++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../datasources/v2/DropTableExec.scala | 9 +++++--- .../sql/connector/DataSourceV2SQLSuite.scala | 11 +++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index b818515adf9c0..92079d127b1e3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -162,6 +162,29 @@ Table alterTable( */ boolean dropTable(Identifier ident); + /** + * Drop a table in the catalog with an option to purge. + *

    + * If the catalog supports views and contains a view for the identifier and not a table, this + * must not drop the view and must return false. + *

    + * If the catalog supports the option to purge a table, this method must be overridden. + * The default implementation falls back to {@link #dropTable(Identifier)} dropTable} if the + * purge option is set to false. Otherwise, it throws {@link UnsupportedOperationException}. + * + * @param ident a table identifier + * @param purge whether a table should be purged + * @return true if a table was deleted, false if no table exists for the identifier + * + * @since 3.1.0 + */ + default boolean dropTable(Identifier ident, boolean purge) { + if (purge) { + throw new UnsupportedOperationException("Purge option is not supported."); + } + return dropTable(ident); + } + /** * Renames a table in the catalog. *

    diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 4bb58142b3d19..648929eaa33ce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -228,8 +228,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DescribeColumn(_: ResolvedTable, _, _) => throw new AnalysisException("Describing columns is not supported for v2 tables.") - case DropTable(r: ResolvedTable, ifExists, _) => - DropTableExec(r.catalog, r.identifier, ifExists) :: Nil + case DropTable(r: ResolvedTable, ifExists, purge) => + DropTableExec(r.catalog, r.identifier, ifExists, purge) :: Nil case _: NoopDropTable => LocalTableScanExec(Nil, Nil) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 967613f77577c..1fd0cd177478b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -25,12 +25,15 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} /** * Physical plan node for dropping a table. */ -case class DropTableExec(catalog: TableCatalog, ident: Identifier, ifExists: Boolean) - extends V2CommandExec { +case class DropTableExec( + catalog: TableCatalog, + ident: Identifier, + ifExists: Boolean, + purge: Boolean) extends V2CommandExec { override def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { - catalog.dropTable(ident) + catalog.dropTable(ident, purge) } else if (!ifExists) { throw new NoSuchTableException(ident) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 893ee5f130cda..444daf8233c67 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -751,6 +751,17 @@ class DataSourceV2SQLSuite sql("DROP TABLE IF EXISTS testcat.db.notbl") } + test("DropTable: purge option") { + withTable("testcat.ns.t") { + sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") + val ex = intercept[UnsupportedOperationException] { + sql ("DROP TABLE testcat.ns.t PURGE") + } + // The default TableCatalog.dropTable implementation doesn't support the purge option. + assert(ex.getMessage.contains("Purge option is not supported")) + } + } + test("SPARK-33174: DROP TABLE should resolve to a temporary view first") { withTable("testcat.ns.t") { withTempView("t") { From 93ad26be01a47fb075310a26188e238d55110098 Mon Sep 17 00:00:00 2001 From: Warren Zhu Date: Fri, 6 Nov 2020 16:53:10 +0900 Subject: [PATCH 0407/1009] [SPARK-23432][UI] Add executor peak jvm memory metrics in executors page ### What changes were proposed in this pull request? Add executor peak jvm memory metrics in executors page ![image](https://user-images.githubusercontent.com/1633312/97767765-9121bf00-1adb-11eb-93c7-7912d9fe7826.png) ### Why are the changes needed? Users can know executor peak jvm metrics on in executors page ### Does this PR introduce _any_ user-facing change? Users can know executor peak jvm metrics on in executors page ### How was this patch tested? Manually tested Closes #30186 from warrenzhu25/23432. Authored-by: Warren Zhu Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../ui/static/executorspage-template.html | 16 ++++++ .../apache/spark/ui/static/executorspage.js | 52 +++++++++++++++++-- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html index 5e835c053eb6c..ec3cb5bb8ae5e 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html @@ -86,6 +86,22 @@

    Executors

    Off Heap Storage Memory + + + Peak JVM Memory OnHeap / OffHeap + + + Peak Execution Memory OnHeap / OffHeap + + + Peak Storage Memory OnHeap / OffHeap + + + Peak Pool Memory Direct / Mapped Disk Used Cores Resources diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index d4eaea9103771..4f179a93c9d5f 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -119,7 +119,7 @@ function totalDurationColor(totalGCTime, totalDuration) { } var sumOptionalColumns = [3, 4]; -var execOptionalColumns = [5, 6, 9, 10]; +var execOptionalColumns = [5, 6, 7, 8, 9, 10, 13, 14]; var execDataTable; var sumDataTable; @@ -412,6 +412,42 @@ $(document).ready(function () { formatBytes(row.memoryMetrics.totalOffHeapStorageMemory, type)); } }, + { + data: function (row, type) { + if (type !== 'display') + return row.peakMemoryMetrics.JVMHeapMemory; + else + return (formatBytes(row.peakMemoryMetrics.JVMHeapMemory, type) + ' / ' + + formatBytes(row.peakMemoryMetrics.JVMOffHeapMemory, type)); + } + }, + { + data: function (row, type) { + if (type !== 'display') + return row.peakMemoryMetrics.OnHeapExecutionMemory; + else + return (formatBytes(row.peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' + + formatBytes(row.peakMemoryMetrics.OffHeapExecutionMemory, type)); + } + }, + { + data: function (row, type) { + if (type !== 'display') + return row.peakMemoryMetrics.OnHeapStorageMemory; + else + return (formatBytes(row.peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' + + formatBytes(row.peakMemoryMetrics.OffHeapStorageMemory, type)); + } + }, + { + data: function (row, type) { + if (type !== 'display') + return row.peakMemoryMetrics.DirectPoolMemory; + else + return (formatBytes(row.peakMemoryMetrics.DirectPoolMemory, type) + ' / ' + + formatBytes(row.peakMemoryMetrics.MappedPoolMemory, type)); + } + }, {data: 'diskUsed', render: formatBytes}, {data: 'totalCores'}, {name: 'resourcesCol', data: 'resources', render: formatResourceCells, orderable: false}, @@ -462,8 +498,12 @@ $(document).ready(function () { "columnDefs": [ {"visible": false, "targets": 5}, {"visible": false, "targets": 6}, + {"visible": false, "targets": 7}, + {"visible": false, "targets": 8}, {"visible": false, "targets": 9}, - {"visible": false, "targets": 10} + {"visible": false, "targets": 10}, + {"visible": false, "targets": 13}, + {"visible": false, "targets": 14} ], "deferRender": true }; @@ -571,8 +611,12 @@ $(document).ready(function () { "
    Select All
    " + "
    On Heap Memory
    " + "
    Off Heap Memory
    " + - "
    Resources
    " + - "
    Resource Profile Id
    " + + "
    Peak JVM Memory OnHeap / OffHeap
    " + + "
    Peak Execution Memory OnHeap / OffHeap
    " + + "
    Peak Storage Memory OnHeap / OffHeap
    " + + "
    Peak Pool Memory Direct / Mapped
    " + + "
    Resources
    " + + "
    Resource Profile Id
    " + ""); reselectCheckboxesBasedOnTaskTableState(); From 09fa7ecae146c0865fc535b4b17175ca5714cfa4 Mon Sep 17 00:00:00 2001 From: Stuart White Date: Fri, 6 Nov 2020 13:12:35 -0800 Subject: [PATCH 0408/1009] [SPARK-33291][SQL] Improve DataFrame.show for nulls in arrays and structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? The changes in [SPARK-32501 Inconsistent NULL conversions to strings](https://issues.apache.org/jira/browse/SPARK-32501) introduced some behavior that I'd like to clean up a bit. Here's sample code to illustrate the behavior I'd like to clean up: ```scala val rows = Seq[String](null) .toDF("value") .withColumn("struct1", struct('value as "value1")) .withColumn("struct2", struct('value as "value1", 'value as "value2")) .withColumn("array1", array('value)) .withColumn("array2", array('value, 'value)) // Show the DataFrame using the "first" codepath. rows.show(truncate=false) +-----+-------+-------------+------+--------+ |value|struct1|struct2 |array1|array2 | +-----+-------+-------------+------+--------+ |null |{ null}|{ null, null}|[] |[, null]| +-----+-------+-------------+------+--------+ // Write the DataFrame to disk, then read it back and show it to trigger the "codegen" code path: rows.write.parquet("rows") spark.read.parquet("rows").show(truncate=false) +-----+-------+-------------+-------+-------------+ |value|struct1|struct2 |array1 |array2 | +-----+-------+-------------+-------+-------------+ |null |{ null}|{ null, null}|[ null]|[ null, null]| +-----+-------+-------------+-------+-------------+ ``` Notice: 1. If the first element of a struct is null, it is printed with a leading space (e.g. "\{ null\}"). I think it's preferable to print it without the leading space (e.g. "\{null\}"). This is consistent with how non-null values are printed inside a struct. 2. If the first element of an array is null, it is not printed at all in the first code path, and the "codegen" code path prints it with a leading space. I think both code paths should be consistent and print it without a leading space (e.g. "[null]"). The desired result of this PR is to product the following output via both code paths: ``` +-----+-------+------------+------+------------+ |value|struct1|struct2 |array1|array2 | +-----+-------+------------+------+------------+ |null |{null} |{null, null}|[null]|[null, null]| +-----+-------+------------+------+------------+ ``` This contribution is my original work and I license the work to the project under the project’s open source license. ### Why are the changes needed? To correct errors and inconsistencies in how DataFrame.show() displays nulls inside arrays and structs. ### Does this PR introduce _any_ user-facing change? Yes. This PR changes what is printed out by DataFrame.show(). ### How was this patch tested? I added new test cases in CastSuite.scala to cover the cases addressed by this PR. Closes #30189 from stwhit/show_nulls. Authored-by: Stuart White Signed-off-by: Liang-Chi Hsieh --- .../spark/sql/catalyst/expressions/Cast.scala | 20 +++++++++------- .../sql/catalyst/expressions/CastSuite.scala | 24 +++++++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 48a9e19c9d953..4af12d61e86d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -315,7 +315,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit builder.append("[") if (array.numElements > 0) { val toUTF8String = castToString(et) - if (!array.isNullAt(0)) { + if (array.isNullAt(0)) { + if (!legacyCastToStr) builder.append("null") + } else { builder.append(toUTF8String(array.get(0, et)).asInstanceOf[UTF8String]) } var i = 1 @@ -376,7 +378,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val st = fields.map(_.dataType) val toUTF8StringFuncs = st.map(castToString) if (row.isNullAt(0)) { - if (!legacyCastToStr) builder.append(" null") + if (!legacyCastToStr) builder.append("null") } else { builder.append(toUTF8StringFuncs(0)(row.get(0, st(0))).asInstanceOf[UTF8String]) } @@ -898,8 +900,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit """ } - private def outNullElem(buffer: ExprValue): Block = { - if (legacyCastToStr) code"" else code"""$buffer.append(" null");""" + private def appendIfNotLegacyCastToStr(buffer: ExprValue, s: String): Block = { + if (!legacyCastToStr) code"""$buffer.append("$s");""" else EmptyBlock } private def writeArrayToStringBuilder( @@ -925,14 +927,14 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit |$buffer.append("["); |if ($array.numElements() > 0) { | if ($array.isNullAt(0)) { - | ${outNullElem(buffer)} + | ${appendIfNotLegacyCastToStr(buffer, "null")} | } else { | $buffer.append($elementToStringFunc(${CodeGenerator.getValue(array, et, "0")})); | } | for (int $loopIndex = 1; $loopIndex < $array.numElements(); $loopIndex++) { | $buffer.append(","); | if ($array.isNullAt($loopIndex)) { - | ${outNullElem(buffer)} + | ${appendIfNotLegacyCastToStr(buffer, " null")} | } else { | $buffer.append(" "); | $buffer.append($elementToStringFunc(${CodeGenerator.getValue(array, et, loopIndex)})); @@ -982,7 +984,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | $buffer.append($keyToStringFunc($getMapFirstKey)); | $buffer.append(" ->"); | if ($map.valueArray().isNullAt(0)) { - | ${outNullElem(buffer)} + | ${appendIfNotLegacyCastToStr(buffer, " null")} | } else { | $buffer.append(" "); | $buffer.append($valueToStringFunc($getMapFirstValue)); @@ -992,7 +994,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit | $buffer.append($keyToStringFunc($getMapKeyArray)); | $buffer.append(" ->"); | if ($map.valueArray().isNullAt($loopIndex)) { - | ${outNullElem(buffer)} + | ${appendIfNotLegacyCastToStr(buffer, " null")} | } else { | $buffer.append(" "); | $buffer.append($valueToStringFunc($getMapValueArray)); @@ -1016,7 +1018,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit code""" |${if (i != 0) code"""$buffer.append(",");""" else EmptyBlock} |if ($row.isNullAt($i)) { - | ${outNullElem(buffer)} + | ${appendIfNotLegacyCastToStr(buffer, if (i == 0) "null" else " null")} |} else { | ${if (i != 0) code"""$buffer.append(" ");""" else EmptyBlock} | diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 7caa4a55c06af..61133e2db5cbd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -717,6 +717,17 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]") } + test("SPARK-33291: Cast array with null elements to string") { + Seq(false, true).foreach { omitNull => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { + val ret1 = cast(Literal.create(Array(null, null)), StringType) + checkEvaluation( + ret1, + s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]") + } + } + } + test("SPARK-22973 Cast map to string") { Seq( false -> ("{", "}"), @@ -773,6 +784,19 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-33291: Cast struct with null elements to string") { + Seq( + false -> ("{", "}"), + true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { + val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType) + checkEvaluation( + ret1, + s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb") + } + } + } + test("up-cast") { def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match { case (_, dt: DecimalType) => dt.isWiderThan(from) From fb9c873e7d5c81f312b26e46df32b1aadc6670b7 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sat, 7 Nov 2020 06:43:27 +0900 Subject: [PATCH 0409/1009] [SPARK-33347][CORE] Cleanup useless variables of MutableApplicationInfo ### What changes were proposed in this pull request? There are 4 fields in `MutableApplicationInfo ` seems useless: - `coresGranted` - `maxCores` - `coresPerExecutor` - `memoryPerExecutorMB` They are always `None` and not reassigned. So the main change of this pr is cleanup these useless fields in `MutableApplicationInfo`. ### Why are the changes needed? Cleanup useless variables. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30251 from LuciferYang/SPARK-33347. Authored-by: yangjie01 Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../apache/spark/deploy/history/FsHistoryProvider.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 400c82c1f9e63..e1b0fc5e45d6e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -1546,14 +1546,9 @@ private[history] class AppListingListener( private class MutableApplicationInfo { var id: String = null var name: String = null - var coresGranted: Option[Int] = None - var maxCores: Option[Int] = None - var coresPerExecutor: Option[Int] = None - var memoryPerExecutorMB: Option[Int] = None def toView(): ApplicationInfoWrapper = { - val apiInfo = ApplicationInfo(id, name, coresGranted, maxCores, coresPerExecutor, - memoryPerExecutorMB, Nil) + val apiInfo = ApplicationInfo(id, name, None, None, None, None, Nil) new ApplicationInfoWrapper(apiInfo, List(attempt.toView())) } From e11a24c1ba5b0f3116b46a213443902165919da5 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 6 Nov 2020 15:05:37 -0800 Subject: [PATCH 0410/1009] [SPARK-33371][PYTHON] Update setup.py and tests for Python 3.9 ### What changes were proposed in this pull request? This PR proposes to fix PySpark to officially support Python 3.9. The main codes already work. We should just note that we support Python 3.9. Also, this PR fixes some minor fixes into the test codes. - `Thread.isAlive` is removed in Python 3.9, and `Thread.is_alive` exists in Python 3.6+, see https://docs.python.org/3/whatsnew/3.9.html#removed - Fixed `TaskContextTestsWithWorkerReuse.test_barrier_with_python_worker_reuse` and `TaskContextTests.test_barrier` to be less flaky. This becomes more flaky in Python 3.9 for some reasons. NOTE that PyArrow does not support Python 3.9 yet. ### Why are the changes needed? To officially support Python 3.9. ### Does this PR introduce _any_ user-facing change? Yes, it officially supports Python 3.9. ### How was this patch tested? Manually ran the tests: ``` $ ./run-tests --python-executable=python Running PySpark tests. Output is in /.../spark/python/unit-tests.log Will test against the following Python executables: ['python'] Will test the following Python modules: ['pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming'] python python_implementation is CPython python version is: Python 3.9.0 Starting test(python): pyspark.ml.tests.test_base Starting test(python): pyspark.ml.tests.test_evaluation Starting test(python): pyspark.ml.tests.test_algorithms Starting test(python): pyspark.ml.tests.test_feature Finished test(python): pyspark.ml.tests.test_base (12s) Starting test(python): pyspark.ml.tests.test_image Finished test(python): pyspark.ml.tests.test_evaluation (15s) Starting test(python): pyspark.ml.tests.test_linalg Finished test(python): pyspark.ml.tests.test_feature (25s) Starting test(python): pyspark.ml.tests.test_param Finished test(python): pyspark.ml.tests.test_image (17s) Starting test(python): pyspark.ml.tests.test_persistence Finished test(python): pyspark.ml.tests.test_param (17s) Starting test(python): pyspark.ml.tests.test_pipeline Finished test(python): pyspark.ml.tests.test_linalg (30s) Starting test(python): pyspark.ml.tests.test_stat Finished test(python): pyspark.ml.tests.test_pipeline (6s) Starting test(python): pyspark.ml.tests.test_training_summary Finished test(python): pyspark.ml.tests.test_stat (12s) Starting test(python): pyspark.ml.tests.test_tuning Finished test(python): pyspark.ml.tests.test_algorithms (68s) Starting test(python): pyspark.ml.tests.test_wrapper Finished test(python): pyspark.ml.tests.test_persistence (51s) Starting test(python): pyspark.mllib.tests.test_algorithms Finished test(python): pyspark.ml.tests.test_training_summary (33s) Starting test(python): pyspark.mllib.tests.test_feature Finished test(python): pyspark.ml.tests.test_wrapper (19s) Starting test(python): pyspark.mllib.tests.test_linalg Finished test(python): pyspark.mllib.tests.test_feature (26s) Starting test(python): pyspark.mllib.tests.test_stat Finished test(python): pyspark.mllib.tests.test_stat (22s) Starting test(python): pyspark.mllib.tests.test_streaming_algorithms Finished test(python): pyspark.mllib.tests.test_algorithms (53s) Starting test(python): pyspark.mllib.tests.test_util Finished test(python): pyspark.mllib.tests.test_linalg (54s) Starting test(python): pyspark.sql.tests.test_arrow Finished test(python): pyspark.sql.tests.test_arrow (0s) ... 61 tests were skipped Starting test(python): pyspark.sql.tests.test_catalog Finished test(python): pyspark.mllib.tests.test_util (11s) Starting test(python): pyspark.sql.tests.test_column Finished test(python): pyspark.sql.tests.test_catalog (16s) Starting test(python): pyspark.sql.tests.test_conf Finished test(python): pyspark.sql.tests.test_column (17s) Starting test(python): pyspark.sql.tests.test_context Finished test(python): pyspark.sql.tests.test_context (6s) ... 3 tests were skipped Starting test(python): pyspark.sql.tests.test_dataframe Finished test(python): pyspark.sql.tests.test_conf (11s) Starting test(python): pyspark.sql.tests.test_datasources Finished test(python): pyspark.sql.tests.test_datasources (19s) Starting test(python): pyspark.sql.tests.test_functions Finished test(python): pyspark.sql.tests.test_dataframe (35s) ... 3 tests were skipped Starting test(python): pyspark.sql.tests.test_group Finished test(python): pyspark.sql.tests.test_functions (32s) Starting test(python): pyspark.sql.tests.test_pandas_cogrouped_map Finished test(python): pyspark.sql.tests.test_pandas_cogrouped_map (1s) ... 15 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_grouped_map Finished test(python): pyspark.sql.tests.test_group (19s) Starting test(python): pyspark.sql.tests.test_pandas_map Finished test(python): pyspark.sql.tests.test_pandas_grouped_map (0s) ... 21 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_udf Finished test(python): pyspark.sql.tests.test_pandas_map (0s) ... 6 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg Finished test(python): pyspark.sql.tests.test_pandas_udf (0s) ... 6 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_udf_scalar Finished test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg (0s) ... 13 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_udf_typehints Finished test(python): pyspark.sql.tests.test_pandas_udf_scalar (0s) ... 50 tests were skipped Starting test(python): pyspark.sql.tests.test_pandas_udf_window Finished test(python): pyspark.sql.tests.test_pandas_udf_typehints (0s) ... 10 tests were skipped Starting test(python): pyspark.sql.tests.test_readwriter Finished test(python): pyspark.sql.tests.test_pandas_udf_window (0s) ... 14 tests were skipped Starting test(python): pyspark.sql.tests.test_serde Finished test(python): pyspark.sql.tests.test_serde (19s) Starting test(python): pyspark.sql.tests.test_session Finished test(python): pyspark.mllib.tests.test_streaming_algorithms (120s) Starting test(python): pyspark.sql.tests.test_streaming Finished test(python): pyspark.sql.tests.test_readwriter (25s) Starting test(python): pyspark.sql.tests.test_types Finished test(python): pyspark.ml.tests.test_tuning (208s) Starting test(python): pyspark.sql.tests.test_udf Finished test(python): pyspark.sql.tests.test_session (31s) Starting test(python): pyspark.sql.tests.test_utils Finished test(python): pyspark.sql.tests.test_streaming (35s) Starting test(python): pyspark.streaming.tests.test_context Finished test(python): pyspark.sql.tests.test_types (34s) Starting test(python): pyspark.streaming.tests.test_dstream Finished test(python): pyspark.sql.tests.test_utils (14s) Starting test(python): pyspark.streaming.tests.test_kinesis Finished test(python): pyspark.streaming.tests.test_kinesis (0s) ... 2 tests were skipped Starting test(python): pyspark.streaming.tests.test_listener Finished test(python): pyspark.streaming.tests.test_listener (11s) Starting test(python): pyspark.tests.test_appsubmit Finished test(python): pyspark.sql.tests.test_udf (39s) Starting test(python): pyspark.tests.test_broadcast Finished test(python): pyspark.streaming.tests.test_context (23s) Starting test(python): pyspark.tests.test_conf Finished test(python): pyspark.tests.test_conf (15s) Starting test(python): pyspark.tests.test_context Finished test(python): pyspark.tests.test_broadcast (33s) Starting test(python): pyspark.tests.test_daemon Finished test(python): pyspark.tests.test_daemon (5s) Starting test(python): pyspark.tests.test_install_spark Finished test(python): pyspark.tests.test_context (44s) Starting test(python): pyspark.tests.test_join Finished test(python): pyspark.tests.test_appsubmit (68s) Starting test(python): pyspark.tests.test_profiler Finished test(python): pyspark.tests.test_join (7s) Starting test(python): pyspark.tests.test_rdd Finished test(python): pyspark.tests.test_profiler (9s) Starting test(python): pyspark.tests.test_rddbarrier Finished test(python): pyspark.tests.test_rddbarrier (7s) Starting test(python): pyspark.tests.test_readwrite Finished test(python): pyspark.streaming.tests.test_dstream (107s) Starting test(python): pyspark.tests.test_serializers Finished test(python): pyspark.tests.test_serializers (8s) Starting test(python): pyspark.tests.test_shuffle Finished test(python): pyspark.tests.test_readwrite (14s) Starting test(python): pyspark.tests.test_taskcontext Finished test(python): pyspark.tests.test_install_spark (65s) Starting test(python): pyspark.tests.test_util Finished test(python): pyspark.tests.test_shuffle (8s) Starting test(python): pyspark.tests.test_worker Finished test(python): pyspark.tests.test_util (5s) Starting test(python): pyspark.accumulators Finished test(python): pyspark.accumulators (5s) Starting test(python): pyspark.broadcast Finished test(python): pyspark.broadcast (6s) Starting test(python): pyspark.conf Finished test(python): pyspark.tests.test_worker (14s) Starting test(python): pyspark.context Finished test(python): pyspark.conf (4s) Starting test(python): pyspark.ml.classification Finished test(python): pyspark.tests.test_rdd (60s) Starting test(python): pyspark.ml.clustering Finished test(python): pyspark.context (21s) Starting test(python): pyspark.ml.evaluation Finished test(python): pyspark.tests.test_taskcontext (69s) Starting test(python): pyspark.ml.feature Finished test(python): pyspark.ml.evaluation (26s) Starting test(python): pyspark.ml.fpm Finished test(python): pyspark.ml.clustering (45s) Starting test(python): pyspark.ml.functions Finished test(python): pyspark.ml.fpm (24s) Starting test(python): pyspark.ml.image Finished test(python): pyspark.ml.functions (17s) Starting test(python): pyspark.ml.linalg.__init__ Finished test(python): pyspark.ml.linalg.__init__ (0s) Starting test(python): pyspark.ml.recommendation Finished test(python): pyspark.ml.classification (74s) Starting test(python): pyspark.ml.regression Finished test(python): pyspark.ml.image (8s) Starting test(python): pyspark.ml.stat Finished test(python): pyspark.ml.stat (29s) Starting test(python): pyspark.ml.tuning Finished test(python): pyspark.ml.regression (53s) Starting test(python): pyspark.mllib.classification Finished test(python): pyspark.ml.tuning (35s) Starting test(python): pyspark.mllib.clustering Finished test(python): pyspark.ml.feature (103s) Starting test(python): pyspark.mllib.evaluation Finished test(python): pyspark.mllib.classification (33s) Starting test(python): pyspark.mllib.feature Finished test(python): pyspark.mllib.evaluation (21s) Starting test(python): pyspark.mllib.fpm Finished test(python): pyspark.ml.recommendation (103s) Starting test(python): pyspark.mllib.linalg.__init__ Finished test(python): pyspark.mllib.linalg.__init__ (1s) Starting test(python): pyspark.mllib.linalg.distributed Finished test(python): pyspark.mllib.feature (26s) Starting test(python): pyspark.mllib.random Finished test(python): pyspark.mllib.fpm (23s) Starting test(python): pyspark.mllib.recommendation Finished test(python): pyspark.mllib.clustering (50s) Starting test(python): pyspark.mllib.regression Finished test(python): pyspark.mllib.random (13s) Starting test(python): pyspark.mllib.stat.KernelDensity Finished test(python): pyspark.mllib.stat.KernelDensity (1s) Starting test(python): pyspark.mllib.stat._statistics Finished test(python): pyspark.mllib.linalg.distributed (42s) Starting test(python): pyspark.mllib.tree Finished test(python): pyspark.mllib.stat._statistics (19s) Starting test(python): pyspark.mllib.util Finished test(python): pyspark.mllib.regression (33s) Starting test(python): pyspark.profiler Finished test(python): pyspark.mllib.recommendation (36s) Starting test(python): pyspark.rdd Finished test(python): pyspark.profiler (9s) Starting test(python): pyspark.resource.tests.test_resources Finished test(python): pyspark.mllib.tree (19s) Starting test(python): pyspark.serializers Finished test(python): pyspark.mllib.util (21s) Starting test(python): pyspark.shuffle Finished test(python): pyspark.resource.tests.test_resources (9s) Starting test(python): pyspark.sql.avro.functions Finished test(python): pyspark.shuffle (1s) Starting test(python): pyspark.sql.catalog Finished test(python): pyspark.rdd (22s) Starting test(python): pyspark.sql.column Finished test(python): pyspark.serializers (12s) Starting test(python): pyspark.sql.conf Finished test(python): pyspark.sql.conf (6s) Starting test(python): pyspark.sql.context Finished test(python): pyspark.sql.catalog (14s) Starting test(python): pyspark.sql.dataframe Finished test(python): pyspark.sql.avro.functions (15s) Starting test(python): pyspark.sql.functions Finished test(python): pyspark.sql.column (24s) Starting test(python): pyspark.sql.group Finished test(python): pyspark.sql.context (20s) Starting test(python): pyspark.sql.pandas.conversion Finished test(python): pyspark.sql.pandas.conversion (13s) Starting test(python): pyspark.sql.pandas.group_ops Finished test(python): pyspark.sql.group (36s) Starting test(python): pyspark.sql.pandas.map_ops Finished test(python): pyspark.sql.pandas.group_ops (21s) Starting test(python): pyspark.sql.pandas.serializers Finished test(python): pyspark.sql.pandas.serializers (0s) Starting test(python): pyspark.sql.pandas.typehints Finished test(python): pyspark.sql.pandas.typehints (0s) Starting test(python): pyspark.sql.pandas.types Finished test(python): pyspark.sql.pandas.types (0s) Starting test(python): pyspark.sql.pandas.utils Finished test(python): pyspark.sql.pandas.utils (0s) Starting test(python): pyspark.sql.readwriter Finished test(python): pyspark.sql.dataframe (56s) Starting test(python): pyspark.sql.session Finished test(python): pyspark.sql.functions (57s) Starting test(python): pyspark.sql.streaming Finished test(python): pyspark.sql.pandas.map_ops (12s) Starting test(python): pyspark.sql.types Finished test(python): pyspark.sql.types (10s) Starting test(python): pyspark.sql.udf Finished test(python): pyspark.sql.streaming (16s) Starting test(python): pyspark.sql.window Finished test(python): pyspark.sql.session (19s) Starting test(python): pyspark.streaming.util Finished test(python): pyspark.streaming.util (0s) Starting test(python): pyspark.util Finished test(python): pyspark.util (0s) Finished test(python): pyspark.sql.readwriter (24s) Finished test(python): pyspark.sql.udf (13s) Finished test(python): pyspark.sql.window (14s) Tests passed in 780 seconds ``` Closes #30277 from HyukjinKwon/SPARK-33371. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/pyspark/tests/test_taskcontext.py | 8 ++++---- python/pyspark/tests/test_worker.py | 2 +- python/setup.py | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py index f0e6672957c13..f6e275abfb1e7 100644 --- a/python/pyspark/tests/test_taskcontext.py +++ b/python/pyspark/tests/test_taskcontext.py @@ -124,12 +124,12 @@ def f(iterator): def context_barrier(x): tc = BarrierTaskContext.get() - time.sleep(random.randint(1, 10)) + time.sleep(random.randint(1, 5) * 2) tc.barrier() return time.time() times = rdd.barrier().mapPartitions(f).map(context_barrier).collect() - self.assertTrue(max(times) - min(times) < 1) + self.assertTrue(max(times) - min(times) < 2) def test_all_gather(self): """ @@ -232,7 +232,7 @@ def f(iterator): def context_barrier(x): tc = BarrierTaskContext.get() - time.sleep(random.randint(1, 10)) + time.sleep(random.randint(1, 5) * 2) tc.barrier() return (time.time(), os.getpid()) @@ -240,7 +240,7 @@ def context_barrier(x): times = list(map(lambda x: x[0], result)) pids = list(map(lambda x: x[1], result)) # check both barrier and worker reuse effect - self.assertTrue(max(times) - min(times) < 1) + self.assertTrue(max(times) - min(times) < 2) for pid in pids: self.assertTrue(pid in worker_pids) diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py index bfaf3a3186cad..8039c0661dd0b 100644 --- a/python/pyspark/tests/test_worker.py +++ b/python/pyspark/tests/test_worker.py @@ -134,7 +134,7 @@ def count(): t.daemon = True t.start() t.join(5) - self.assertTrue(not t.isAlive()) + self.assertTrue(not t.is_alive()) self.assertEqual(100000, rdd.count()) def test_with_different_versions_of_python(self): diff --git a/python/setup.py b/python/setup.py index 8d9cf2ee5459a..f5836ecf5fbfc 100755 --- a/python/setup.py +++ b/python/setup.py @@ -266,6 +266,7 @@ def run(self): 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Typing :: Typed'], From 1090b1b00a4aa6168fd5b69f227f28309c42b6fd Mon Sep 17 00:00:00 2001 From: Hannah Amundson Date: Sun, 8 Nov 2020 20:29:24 +0900 Subject: [PATCH 0411/1009] [SPARK-32860][DOCS][SQL] Updating documentation about map support in Encoders ### What changes were proposed in this pull request? Javadocs updated for the encoder to include maps as a collection type ### Why are the changes needed? The javadocs were not updated with fix SPARK-16706 ### Does this PR introduce _any_ user-facing change? Yes, the javadocs are updated ### How was this patch tested? sbt was run to ensure it meets scalastyle Closes #30274 from hannahkamundson/SPARK-32860. Lead-authored-by: Hannah Amundson Co-authored-by: Hannah <48397717+hannahkamundson@users.noreply.github.com> Signed-off-by: HyukjinKwon --- sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala index 5d31b5bbf12af..24045b5a43a64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala @@ -146,7 +146,7 @@ object Encoders { * - String * - java.math.BigDecimal, java.math.BigInteger * - time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant - * - collection types: only array and java.util.List currently, map support is in progress + * - collection types: array, java.util.List, and map * - nested java bean. * * @since 1.6.0 From 02fd52cfbc8989a41f69bafd7d432ec3a365c138 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sun, 8 Nov 2020 12:51:48 -0600 Subject: [PATCH 0412/1009] [SPARK-33352][CORE][SQL][SS][MLLIB][AVRO][K8S] Fix procedure-like declaration compilation warnings in Scala 2.13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? There are two similar compilation warnings about procedure-like declaration in Scala 2.13: ``` [WARNING] [Warn] /spark/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala:70: procedure syntax is deprecated for constructors: add `=`, as in method definition ``` and ``` [WARNING] [Warn] /spark/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala:211: procedure syntax is deprecated: instead, add `: Unit =` to explicitly declare `run`'s return type ``` this pr is the first part to resolve SPARK-33352: - For constructors method definition add `=` to convert to function syntax - For without `return type` methods definition add `: Unit =` to convert to function syntax ### Why are the changes needed? Eliminate compilation warnings in Scala 2.13 and this change should be compatible with Scala 2.12 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30255 from LuciferYang/SPARK-29392-FOLLOWUP.1. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../scala/org/apache/spark/HeartbeatReceiver.scala | 2 +- .../main/scala/org/apache/spark/TaskEndReason.scala | 4 ++-- .../org/apache/spark/executor/ExecutorMetrics.scala | 6 +++--- .../org/apache/spark/rdd/InputFileBlockHolder.scala | 2 +- .../org/apache/spark/rdd/LocalCheckpointRDD.scala | 2 +- .../scala/org/apache/spark/scheduler/MapStatus.scala | 2 +- .../org/apache/spark/scheduler/ShuffleMapTask.scala | 2 +- .../cluster/StandaloneSchedulerBackend.scala | 3 ++- .../apache/spark/shuffle/FetchFailedException.scala | 2 +- .../spark/storage/BlockManagerDecommissioner.scala | 2 +- .../org/apache/spark/storage/StorageLevel.scala | 2 +- .../org/apache/spark/storage/StorageUtils.scala | 2 +- .../apache/spark/util/UninterruptibleThread.scala | 2 +- .../util/collection/ExternalAppendOnlyMap.scala | 2 +- .../executor/CoarseGrainedExecutorBackendSuite.scala | 2 +- .../spark/resource/ResourceProfileManagerSuite.scala | 4 ++-- .../apache/spark/resource/ResourceProfileSuite.scala | 4 ++-- .../util/SparkUncaughtExceptionHandlerSuite.scala | 2 +- .../org/apache/spark/sql/avro/AvroDeserializer.scala | 2 +- .../org/apache/spark/sql/avro/AvroSerializer.scala | 2 +- .../spark/sql/jdbc/DockerJDBCIntegrationSuite.scala | 4 ++-- .../org/apache/spark/metrics/sink/GangliaSink.scala | 6 +++--- .../scala/org/apache/spark/ml/stat/Summarizer.scala | 2 +- .../apache/spark/mllib/feature/ChiSqSelector.scala | 2 +- .../apache/spark/mllib/feature/StandardScaler.scala | 2 +- .../spark/mllib/tree/configuration/Strategy.scala | 4 ++-- .../spark/deploy/k8s/integrationtest/Utils.scala | 6 +++--- .../catalyst/analysis/AlreadyExistException.scala | 2 +- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 4 ++-- .../catalyst/expressions/datetimeExpressions.scala | 2 +- .../scala/org/apache/spark/sql/SparkSession.scala | 2 +- .../execution/ExternalAppendOnlyUnsafeRowArray.scala | 2 +- .../apache/spark/sql/execution/command/views.scala | 2 +- .../datasources/parquet/ParquetReadSupport.scala | 2 +- .../org/apache/spark/sql/streaming/progress.scala | 2 +- .../org/apache/spark/sql/test/TestSQLContext.scala | 4 ++-- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- .../sql/hive/thriftserver/HiveSessionImplSuite.scala | 2 +- .../org/apache/spark/sql/hive/test/TestHive.scala | 12 ++++++------ 39 files changed, 58 insertions(+), 57 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala index c99698f99d904..233ad884a721a 100644 --- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala +++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala @@ -67,7 +67,7 @@ private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean) private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock) extends SparkListener with ThreadSafeRpcEndpoint with Logging { - def this(sc: SparkContext) { + def this(sc: SparkContext) = { this(sc, new SystemClock) } diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala index b304eb97fbdf6..5dc70e9834b0b 100644 --- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala +++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala @@ -143,12 +143,12 @@ case class ExceptionFailure( private[spark] def this( e: Throwable, accumUpdates: Seq[AccumulableInfo], - preserveCause: Boolean) { + preserveCause: Boolean) = { this(e.getClass.getName, e.getMessage, e.getStackTrace, Utils.exceptionString(e), if (preserveCause) Some(new ThrowableSerializationWrapper(e)) else None, accumUpdates) } - private[spark] def this(e: Throwable, accumUpdates: Seq[AccumulableInfo]) { + private[spark] def this(e: Throwable, accumUpdates: Seq[AccumulableInfo]) = { this(e, accumUpdates, preserveCause = true) } diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala index d9aa3ef60fc9e..486e59652218b 100644 --- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala @@ -44,12 +44,12 @@ class ExecutorMetrics private[spark] extends Serializable { /** Returns true if the values for the metrics have been set, false otherwise. */ def isSet(): Boolean = metrics(0) > -1 - private[spark] def this(metrics: Array[Long]) { + private[spark] def this(metrics: Array[Long]) = { this() Array.copy(metrics, 0, this.metrics, 0, Math.min(metrics.size, this.metrics.size)) } - private[spark] def this(metrics: AtomicLongArray) { + private[spark] def this(metrics: AtomicLongArray) = { this() ExecutorMetricType.metricToOffset.foreach { case (_, i) => this.metrics(i) = metrics.get(i) @@ -61,7 +61,7 @@ class ExecutorMetrics private[spark] extends Serializable { * * @param executorMetrics map of executor metric name to value */ - private[spark] def this(executorMetrics: Map[String, Long]) { + private[spark] def this(executorMetrics: Map[String, Long]) = { this() ExecutorMetricType.metricToOffset.foreach { case (name, idx) => metrics(idx) = executorMetrics.getOrElse(name, 0L) diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala index 1beb085db27d9..8230144025feb 100644 --- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala +++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala @@ -34,7 +34,7 @@ private[spark] object InputFileBlockHolder { * @param length size of the block, in bytes, or -1 if not available. */ private class FileBlock(val filePath: UTF8String, val startOffset: Long, val length: Long) { - def this() { + def this() = { this(UTF8String.fromString(""), -1, -1) } } diff --git a/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala index 503aa0dffc9f3..113ed2db7f546 100644 --- a/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala @@ -40,7 +40,7 @@ private[spark] class LocalCheckpointRDD[T: ClassTag]( numPartitions: Int) extends CheckpointRDD[T](sc) { - def this(rdd: RDD[T]) { + def this(rdd: RDD[T]) = { this(rdd.context, rdd.id, rdd.partitions.length) } diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala index cfc2e141290c4..1239c32cee3ab 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala @@ -123,7 +123,7 @@ private[spark] class CompressedMapStatus( // For deserialization only protected def this() = this(null, null.asInstanceOf[Array[Byte]], -1) - def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) { + def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) = { this(loc, uncompressedSizes.map(MapStatus.compressSize), mapTaskId) } diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala index a0ba9208ea647..89db3a86f4ce8 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala @@ -66,7 +66,7 @@ private[spark] class ShuffleMapTask( with Logging { /** A constructor used only in test suites. This does not require passing in an RDD. */ - def this(partitionId: Int) { + def this(partitionId: Int) = { this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null) } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala index b9ac8d2ba2784..c14b2d4e5df31 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala @@ -177,7 +177,8 @@ private[spark] class StandaloneSchedulerBackend( removeExecutor(fullId.split("/")(1), reason) } - override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) { + override def executorDecommissioned(fullId: String, + decommissionInfo: ExecutorDecommissionInfo): Unit = { logInfo(s"Asked to decommission executor $fullId") val execId = fullId.split("/")(1) decommissionExecutors( diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala index 6509a04dc4893..208c676a1c352 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala @@ -48,7 +48,7 @@ private[spark] class FetchFailedException( mapTaskId: Long, mapIndex: Int, reduceId: Int, - cause: Throwable) { + cause: Throwable) = { this(bmAddress, shuffleId, mapTaskId, mapIndex, reduceId, cause.getMessage, cause) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index d1e89418a4897..9129e8012dc59 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -208,7 +208,7 @@ private[storage] class BlockManagerDecommissioner( private val shuffleBlockMigrationRefreshRunnable = new Runnable { val sleepInterval = conf.get(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL) - override def run() { + override def run(): Unit = { assert(conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED)) while (!stopped && !stoppedShuffle && !Thread.interrupted()) { try { diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala index f6db73ba805b1..ce89c2ae90b49 100644 --- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala @@ -45,7 +45,7 @@ class StorageLevel private( extends Externalizable { // TODO: Also add fields for caching priority, dataset ID, and flushing. - private def this(flags: Int, replication: Int) { + private def this(flags: Int, replication: Int) = { this((flags & 8) != 0, (flags & 4) != 0, (flags & 2) != 0, (flags & 1) != 0, replication) } diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala index fc426eee608c0..147731a0fb547 100644 --- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala @@ -61,7 +61,7 @@ private[spark] class StorageStatus( maxMemory: Long, maxOnHeapMem: Option[Long], maxOffHeapMem: Option[Long], - initialBlocks: Map[BlockId, BlockStatus]) { + initialBlocks: Map[BlockId, BlockStatus]) = { this(bmid, maxMemory, maxOnHeapMem, maxOffHeapMem) initialBlocks.foreach { case (bid, bstatus) => addBlock(bid, bstatus) } } diff --git a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala index 6a58ec142dd7f..24788d69121b2 100644 --- a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala +++ b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala @@ -31,7 +31,7 @@ private[spark] class UninterruptibleThread( target: Runnable, name: String) extends Thread(target, name) { - def this(name: String) { + def this(name: String) = { this(null, name) } diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala index 7f40b469a95e9..731131b688ca7 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala @@ -76,7 +76,7 @@ class ExternalAppendOnlyMap[K, V, C]( mergeValue: (C, V) => C, mergeCombiners: (C, C) => C, serializer: Serializer, - blockManager: BlockManager) { + blockManager: BlockManager) = { this(createCombiner, mergeValue, mergeCombiners, serializer, blockManager, TaskContext.get()) } diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala index e0b586074b89e..319dcfeecee24 100644 --- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala @@ -106,7 +106,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite testParsingMultipleResources(conf, ResourceProfile.getOrCreateDefaultProfile(conf)) } - def testParsingMultipleResources(conf: SparkConf, resourceProfile: ResourceProfile) { + def testParsingMultipleResources(conf: SparkConf, resourceProfile: ResourceProfile): Unit = { val serializer = new JavaSerializer(conf) val env = createMockEnv(conf, serializer) // we don't really use this, just need it to get at the parser function diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala index f4521738c4870..ddfe80ee81e6f 100644 --- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.scheduler.LiveListenerBus class ResourceProfileManagerSuite extends SparkFunSuite { - override def beforeAll() { + override def beforeAll(): Unit = { try { ResourceProfile.clearDefaultProfile() } finally { @@ -32,7 +32,7 @@ class ResourceProfileManagerSuite extends SparkFunSuite { } } - override def afterEach() { + override def afterEach(): Unit = { try { ResourceProfile.clearDefaultProfile() } finally { diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala index d0479ca7db40c..f8c4a3a68f367 100644 --- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala +++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.resource.TestResourceIDs._ class ResourceProfileSuite extends SparkFunSuite { - override def beforeAll() { + override def beforeAll(): Unit = { try { ResourceProfile.clearDefaultProfile() } finally { @@ -32,7 +32,7 @@ class ResourceProfileSuite extends SparkFunSuite { } } - override def afterEach() { + override def afterEach(): Unit = { try { ResourceProfile.clearDefaultProfile() } finally { diff --git a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala index 90741a6bde7f0..9e23b25493dfe 100644 --- a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala @@ -80,7 +80,7 @@ object ThrowableThrower { // a thread that uses SparkUncaughtExceptionHandler and throws a Throwable by name class ThrowerThread(name: String, exitOnUncaughtException: Boolean) extends Thread { - override def run() { + override def run(): Unit = { Thread.setDefaultUncaughtExceptionHandler( new SparkUncaughtExceptionHandler(exitOnUncaughtException)) throw ThrowableTypes.getThrowableByName(name) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala index aabf9d92ce7d8..85416b80cfbb7 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala @@ -48,7 +48,7 @@ private[sql] class AvroDeserializer( datetimeRebaseMode: LegacyBehaviorPolicy.Value, filters: StructFilters) { - def this(rootAvroType: Schema, rootCatalystType: DataType) { + def this(rootAvroType: Schema, rootCatalystType: DataType) = { this( rootAvroType, rootCatalystType, diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala index 0ea95d1c0db5d..33c6022ff7b6d 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala @@ -49,7 +49,7 @@ private[sql] class AvroSerializer( nullable: Boolean, datetimeRebaseMode: LegacyBehaviorPolicy.Value) extends Logging { - def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) { + def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) = { this(rootCatalystType, rootAvroType, nullable, LegacyBehaviorPolicy.withName(SQLConf.get.getConf( SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE))) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index 24927da16d50c..ad6a829fffd0d 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -108,7 +108,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu private var containerId: String = _ protected var jdbcUrl: String = _ - override def beforeAll() { + override def beforeAll(): Unit = { super.beforeAll() try { docker = DefaultDockerClient.fromEnv.build() @@ -174,7 +174,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu } } - override def afterAll() { + override def afterAll(): Unit = { try { if (docker != null) { try { diff --git a/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala index 4fb9f2f849085..7266187597589 100644 --- a/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala +++ b/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala @@ -81,15 +81,15 @@ class GangliaSink(val property: Properties, val registry: MetricRegistry, .withDMax(dmax) .build(ganglia) - override def start() { + override def start(): Unit = { reporter.start(pollPeriod, pollUnit) } - override def stop() { + override def stop(): Unit = { reporter.stop() } - override def report() { + override def report(): Unit = { reporter.report() } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala index 4db518bd4f9ba..397dbb28f8e3e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala @@ -435,7 +435,7 @@ private[spark] class SummarizerBuffer( private var currMax: Array[Double] = null private var currMin: Array[Double] = null - def this() { + def this() = { this( Seq( SummaryBuilderImpl.Mean, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index d970c3c3d6131..70125d2c4c6af 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -195,7 +195,7 @@ class ChiSqSelector @Since("2.1.0") () extends Serializable { * The is the same to call this() and setNumTopFeatures(numTopFeatures) */ @Since("1.3.0") - def this(numTopFeatures: Int) { + def this(numTopFeatures: Int) = { this() this.numTopFeatures = numTopFeatures } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 78c974e22f2cf..8f9d6d07a4c36 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -83,7 +83,7 @@ class StandardScalerModel @Since("1.3.0") ( /** */ @Since("1.3.0") - def this(std: Vector, mean: Vector) { + def this(std: Vector, mean: Vector) = { this(std, mean, withStd = std != null, withMean = mean != null) require(this.withStd || this.withMean, "at least one of std or mean vectors must be provided") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index 09e3e22030546..0f6c7033687fa 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -116,7 +116,7 @@ class Strategy @Since("1.3.0") ( maxMemoryInMB: Int, subsamplingRate: Double, useNodeIdCache: Boolean, - checkpointInterval: Int) { + checkpointInterval: Int) = { this(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, maxMemoryInMB, subsamplingRate, useNodeIdCache, checkpointInterval, 0.0) @@ -133,7 +133,7 @@ class Strategy @Since("1.3.0") ( maxDepth: Int, numClasses: Int, maxBins: Int, - categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) { + categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) = { this(algo, impurity, maxDepth, numClasses, maxBins, Sort, categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap, minWeightFractionPerNode = 0.0) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala index 0000a94725763..9bcd6e9503532 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala @@ -60,15 +60,15 @@ object Utils extends Logging { val openLatch: CountDownLatch = new CountDownLatch(1) val closeLatch: CountDownLatch = new CountDownLatch(1) - override def onOpen(response: Response) { + override def onOpen(response: Response): Unit = { openLatch.countDown() } - override def onClose(a: Int, b: String) { + override def onClose(a: Int, b: String): Unit = { closeLatch.countDown() } - override def onFailure(e: Throwable, r: Response) { + override def onFailure(e: Throwable, r: Response): Unit = { } def waitForInputStreamToConnect(): Unit = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala index c50ba623c27b2..70f821d5f8af0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala @@ -64,7 +64,7 @@ class PartitionAlreadyExistsException(message: String) extends AnalysisException } class PartitionsAlreadyExistException(message: String) extends AnalysisException(message) { - def this(db: String, table: String, specs: Seq[TablePartitionSpec]) { + def this(db: String, table: String, specs: Seq[TablePartitionSpec]) = { this(s"The following partitions already exists in table '$table' database '$db':\n" + specs.mkString("\n===\n")) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 4865629329831..fa5634935ff29 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -72,7 +72,7 @@ class SessionCatalog( def this( externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry, - conf: SQLConf) { + conf: SQLConf) = { this( () => externalCatalog, () => new GlobalTempViewManager(conf.getConf(GLOBAL_TEMP_DATABASE)), @@ -84,7 +84,7 @@ class SessionCatalog( } // For testing only. - def this(externalCatalog: ExternalCatalog) { + def this(externalCatalog: ExternalCatalog) = { this( externalCatalog, new SimpleFunctionRegistry, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 223d0e661ed3e..97aacb3f7530c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1421,7 +1421,7 @@ case class MonthsBetween( case class ParseToDate(left: Expression, format: Option[Expression], child: Expression) extends RuntimeReplaceable { - def this(left: Expression, format: Expression) { + def this(left: Expression, format: Expression) = { this(left, Option(format), Cast(GetTimestamp(left, format), DateType)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index c4aadfb1d66bd..592f209475baf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -94,7 +94,7 @@ class SparkSession private( * since that would cause every new session to reinvoke Spark Session Extensions on the currently * running extensions. */ - private[sql] def this(sc: SparkContext) { + private[sql] def this(sc: SparkContext) = { this(sc, None, None, SparkSession.applyExtensions( sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala index ac282ea2e94f5..993627847c08c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala @@ -52,7 +52,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray( numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) extends Logging { - def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) { + def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) = { this( TaskContext.get().taskMemoryManager(), SparkEnv.get.blockManager, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index bcc0e1fd82d7a..43bc50522f2a8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -173,7 +173,7 @@ case class CreateViewCommand( // added/generated from a temporary view. // 2) The temp functions are represented by multiple classes. Most are inaccessible from this // package (e.g., HiveGenericUDF). - def verify(child: LogicalPlan) { + def verify(child: LogicalPlan): Unit = { child.collect { // Disallow creating permanent views based on temporary views. case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index e74872da0829d..4a1f9154488af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -59,7 +59,7 @@ class ParquetReadSupport( extends ReadSupport[InternalRow] with Logging { private var catalystRequestedSchema: StructType = _ - def this() { + def this() = { // We need a zero-arg constructor for SpecificParquetRecordReaderBase. But that is only // used in the vectorized reader, where we get the convertTz/rebaseDateTime value directly, // and the values here are ignored. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala index 482f2b4bf4ed7..59dc5bc1f37df 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala @@ -231,7 +231,7 @@ class SinkProgress protected[sql]( val numOutputRows: Long) extends Serializable { /** SinkProgress without custom metrics. */ - protected[sql] def this(description: String) { + protected[sql] def this(description: String) = { this(description, DEFAULT_NUM_OUTPUT_ROWS) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index a477eed4478e8..36488bec7bb53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -25,12 +25,12 @@ import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf * A special `SparkSession` prepared for testing. */ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self => - def this(sparkConf: SparkConf) { + def this(sparkConf: SparkConf) = { this(new SparkContext("local[2]", "test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) } - def this() { + def this() = { this(new SparkConf) } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 965f28ebe0840..8550597da936e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -465,7 +465,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { oldSignal = Signal.handle(interruptSignal, new SignalHandler() { private var interruptRequested: Boolean = false - override def handle(signal: Signal) { + override def handle(signal: Signal): Unit = { val initialRequest = !interruptRequested interruptRequested = true diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala index 13dc74b92d4b3..7c42348f74453 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala @@ -33,7 +33,7 @@ class HiveSessionImplSuite extends SparkFunSuite { private var session: HiveSessionImpl = _ private var operationManager: OperationManagerMock = _ - override def beforeAll() { + override def beforeAll(): Unit = { super.beforeAll() val sessionManager = new SessionManager(null) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 0c601ef798dcc..082aa8d765e9c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -130,11 +130,11 @@ class TestHiveContext( * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true * when running in the JVM, i.e. it needs to be false when calling from Python. */ - def this(sc: SparkContext, loadTestTables: Boolean = true) { + def this(sc: SparkContext, loadTestTables: Boolean = true) = { this(new TestHiveSparkSession(HiveUtils.withHiveExternalCatalog(sc), loadTestTables)) } - def this(sc: SparkContext, hiveClient: HiveClient) { + def this(sc: SparkContext, hiveClient: HiveClient) = { this(new TestHiveSparkSession(HiveUtils.withHiveExternalCatalog(sc), hiveClient, loadTestTables = false)) @@ -178,7 +178,7 @@ private[hive] class TestHiveSparkSession( private val loadTestTables: Boolean) extends SparkSession(sc) with Logging { self => - def this(sc: SparkContext, loadTestTables: Boolean) { + def this(sc: SparkContext, loadTestTables: Boolean) = { this( sc, existingSharedState = None, @@ -186,7 +186,7 @@ private[hive] class TestHiveSparkSession( loadTestTables) } - def this(sc: SparkContext, hiveClient: HiveClient, loadTestTables: Boolean) { + def this(sc: SparkContext, hiveClient: HiveClient, loadTestTables: Boolean) = { this( sc, existingSharedState = Some(new TestHiveSharedState(sc, Some(hiveClient))), @@ -584,11 +584,11 @@ private[hive] class TestHiveQueryExecution( logicalPlan: LogicalPlan) extends QueryExecution(sparkSession, logicalPlan) with Logging { - def this(sparkSession: TestHiveSparkSession, sql: String) { + def this(sparkSession: TestHiveSparkSession, sql: String) = { this(sparkSession, sparkSession.sessionState.sqlParser.parsePlan(sql)) } - def this(sql: String) { + def this(sql: String) = { this(TestHive.sparkSession, sql) } From c269b53f073b1ae448e24cf346917397f5e10285 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 8 Nov 2020 18:44:26 -0800 Subject: [PATCH 0413/1009] [SPARK-33384][SS] Delete temporary file when cancelling writing to final path even underlying stream throwing error ### What changes were proposed in this pull request? In `RenameBasedFSDataOutputStream.cancel`, we do two things: closing underlying stream and delete temporary file, in a single try/catch block. Closing `OutputStream` could possibly throw `IOException` so we possibly missing deleting temporary file. This patch proposes to delete temporary even underlying stream throwing error. ### Why are the changes needed? To avoid leaving temporary files during canceling writing in `RenameBasedFSDataOutputStream`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30290 from viirya/SPARK-33384. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../execution/streaming/CheckpointFileManager.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala index 26f42b6e3f472..41b705514fb92 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala @@ -160,11 +160,17 @@ object CheckpointFileManager extends Logging { override def cancel(): Unit = synchronized { try { if (terminated) return - underlyingStream.close() + try { + underlyingStream.close() + } catch { + case NonFatal(e) => + logWarning(s"Error cancelling write to $finalPath, " + + s"continuing to delete temp path $tempPath", e) + } fm.delete(tempPath) } catch { case NonFatal(e) => - logWarning(s"Error cancelling write to $finalPath", e) + logWarning(s"Error deleting temp file $tempPath", e) } finally { terminated = true } From aa0849b46a43f0942e884816cbc771435571b564 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 8 Nov 2020 22:43:27 -0800 Subject: [PATCH 0414/1009] [SPARK-33387][CORE] Support ordered shuffle block migration ### What changes were proposed in this pull request? This PR aims to support sorted shuffle block migration. ### Why are the changes needed? Since the current shuffle block migration works in a random order, the failure during worker decommission affects all shuffles. We had better finish the shuffles one by one to minimize the number of affected shuffle. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with the newly added test case. Closes #30293 from dongjoon-hyun/SPARK-33387. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../storage/BlockManagerDecommissioner.scala | 1 + .../spark/storage/BlockManagerSuite.scala | 23 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 9129e8012dc59..9699515c626bf 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -248,6 +248,7 @@ private[storage] class BlockManagerDecommissioner( logInfo("Offloading shuffle blocks") val localShuffles = bm.migratableResolver.getStoredShuffles().toSet val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq + .sortBy(b => (b.shuffleId, b.mapId)) shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava) migratingShuffles ++= newShufflesToMigrate logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " + diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 5450a4b67c00b..55280fc578310 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -57,7 +57,7 @@ import org.apache.spark.scheduler.{LiveListenerBus, MapStatus, SparkListenerBloc import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend} import org.apache.spark.security.{CryptoStreamUtils, EncryptionFunSuite} import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, SerializerManager} -import org.apache.spark.shuffle.{ShuffleBlockResolver, ShuffleManager} +import org.apache.spark.shuffle.{MigratableResolver, ShuffleBlockInfo, ShuffleBlockResolver, ShuffleManager} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.storage.BlockManagerMessages._ import org.apache.spark.util._ @@ -1974,6 +1974,27 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE } } + test("SPARK-33387 Support ordered shuffle block migration") { + val blocks: Seq[ShuffleBlockInfo] = Seq( + ShuffleBlockInfo(1, 0L), + ShuffleBlockInfo(0, 1L), + ShuffleBlockInfo(0, 0L), + ShuffleBlockInfo(1, 1L)) + val sortedBlocks = blocks.sortBy(b => (b.shuffleId, b.mapId)) + + val resolver = mock(classOf[MigratableResolver]) + when(resolver.getStoredShuffles).thenReturn(blocks) + + val bm = mock(classOf[BlockManager]) + when(bm.migratableResolver).thenReturn(resolver) + when(bm.getPeers(mc.any())).thenReturn(Seq.empty) + + val decomManager = new BlockManagerDecommissioner(conf, bm) + decomManager.refreshOffloadingShuffleBlocks() + + assert(sortedBlocks.sameElements(decomManager.shufflesToMigrate.asScala.map(_._1))) + } + class MockBlockTransferService(val maxFailures: Int) extends BlockTransferService { var numCalls = 0 var tempFileManager: DownloadFileManager = null From bfb257f078854ad587a9e2bfe548cdb7bf8786d4 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Mon, 9 Nov 2020 07:02:14 +0000 Subject: [PATCH 0415/1009] [SPARK-32405][SQL] Apply table options while creating tables in JDBC Table Catalog ### What changes were proposed in this pull request? Currently in JDBCTableCatalog, we ignore the table options when creating table. ``` // TODO (SPARK-32405): Apply table options while creating tables in JDBC Table Catalog if (!properties.isEmpty) { logWarning("Cannot create JDBC table with properties, these properties will be " + "ignored: " + properties.asScala.map { case (k, v) => s"$k=$v" }.mkString("[", ", ", "]")) } ``` ### Why are the changes needed? need to apply the table options when we create table ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? add new test Closes #30154 from huaxingao/table_options. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 8 +++++ .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 2 ++ .../sql/jdbc/v2/MySQLIntegrationSuite.scala | 8 +++++ .../jdbc/v2/PostgresIntegrationSuite.scala | 8 +++++ .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 30 ++++++++++++++++ .../datasources/jdbc/JDBCOptions.scala | 3 ++ .../datasources/jdbc/JdbcUtils.scala | 10 ++++++ .../v2/jdbc/JDBCTableCatalog.scala | 35 ++++++++++++++++--- .../apache/spark/sql/jdbc/DerbyDialect.scala | 8 ++++- .../apache/spark/sql/jdbc/JdbcDialects.scala | 7 +++- .../spark/sql/jdbc/MsSqlServerDialect.scala | 8 +++++ .../apache/spark/sql/jdbc/MySQLDialect.scala | 5 +++ .../v2/jdbc/JDBCTableCatalogSuite.scala | 26 ++++++++++++++ 13 files changed, 151 insertions(+), 7 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 5c1442283aaed..4b6461815d306 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -73,4 +73,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { }.getMessage assert(msg1.contains("Cannot update alt_table field ID: double cannot be cast to varchar")) } + + override def testCreateTableWithProperty(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INT) USING _" + + s" TBLPROPERTIES('CCSID'='UNICODE')") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + } } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index 905e32aaa918e..fd101607ad3ee 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -62,6 +62,8 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC override def dataPreparation(conn: Connection): Unit = {} + override def notSupportsTableComment: Boolean = true + override def testUpdateColumnType(tbl: String): Unit = { sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") var t = spark.table(tbl) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index 6cf0f56ee7eeb..a81399fc2a4f7 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -106,4 +106,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { assert(msg.contains("UpdateColumnNullability is not supported")) } + + override def testCreateTableWithProperty(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INT) USING _" + + s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + } } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 45994a5093748..df2c865e4d13b 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -66,4 +66,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes }.getMessage assert(msg.contains("Cannot update alt_table field ID: string cannot be cast to int")) } + + override def testCreateTableWithProperty(tbl: String): Unit = { + sql(s"CREATE TABLE $tbl (ID INT) USING _" + + s" TBLPROPERTIES('TABLESPACE'='pg_default')") + var t = spark.table(tbl) + var expectedSchema = new StructType().add("ID", IntegerType) + assert(t.schema === expectedSchema) + } } diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 92af29d9c9467..2e726b9e650b6 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.jdbc.v2 +import org.apache.log4j.Level + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ @@ -28,6 +30,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { // dialect specific update column type test def testUpdateColumnType(tbl: String): Unit + def notSupportsTableComment: Boolean = false + def testUpdateColumnNullability(tbl: String): Unit = { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") var t = spark.table(s"$catalogName.alt_table") @@ -54,6 +58,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { assert(t.schema === expectedSchema) } + def testCreateTableWithProperty(tbl: String): Unit = {} + test("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _") @@ -146,5 +152,29 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { }.getMessage assert(msg.contains("Table not found")) } + + test("CREATE TABLE with table comment") { + withTable(s"$catalogName.new_table") { + val logAppender = new LogAppender("table comment") + withLogAppender(logAppender) { + sql(s"CREATE TABLE $catalogName.new_table(i INT) USING _ COMMENT 'this is a comment'") + } + val createCommentWarning = logAppender.loggingEvents + .filter(_.getLevel == Level.WARN) + .map(_.getRenderedMessage) + .exists(_.contains("Cannot create JDBC table comment")) + assert(createCommentWarning === notSupportsTableComment) + } + } + + test("CREATE TABLE with table property") { + withTable(s"$catalogName.new_table") { + val m = intercept[AnalysisException] { + sql(s"CREATE TABLE $catalogName.new_table (i INT) USING _ TBLPROPERTIES('a'='1')") + }.message + assert(m.contains("Failed table creation")) + testCreateTableWithProperty(s"$catalogName.new_table") + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala index e6fff8dbdbd7c..6e8b7ea678264 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala @@ -206,6 +206,8 @@ class JDBCOptions( } // The principal name of user's keytab file val principal = parameters.getOrElse(JDBC_PRINCIPAL, null) + + val tableComment = parameters.getOrElse(JDBC_TABLE_COMMENT, "").toString } class JdbcOptionsInWrite( @@ -260,4 +262,5 @@ object JDBCOptions { val JDBC_PUSHDOWN_PREDICATE = newOption("pushDownPredicate") val JDBC_KEYTAB = newOption("keytab") val JDBC_PRINCIPAL = newOption("principal") + val JDBC_TABLE_COMMENT = newOption("tableComment") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 9aaa55980436e..78f31fb80ecf6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -863,6 +863,7 @@ object JdbcUtils extends Logging { schema: StructType, caseSensitive: Boolean, options: JdbcOptionsInWrite): Unit = { + val dialect = JdbcDialects.get(options.url) val strSchema = schemaString( schema, caseSensitive, options.url, options.createTableColumnTypes) val createTableOptions = options.createTableOptions @@ -872,6 +873,15 @@ object JdbcUtils extends Logging { // E.g., "CREATE TABLE t (name string) ENGINE=InnoDB DEFAULT CHARSET=utf8" val sql = s"CREATE TABLE $tableName ($strSchema) $createTableOptions" executeStatement(conn, options, sql) + if (options.tableComment.nonEmpty) { + try { + executeStatement( + conn, options, dialect.getTableCommentQuery(tableName, options.tableComment)) + } catch { + case e: Exception => + logWarning("Cannot create JDBC table comment. The table comment will be ignored.") + } + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala index 8edc2fe5585e0..e96b37e05c762 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala @@ -21,6 +21,7 @@ import java.sql.{Connection, SQLException} import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException} import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog, TableChange} import org.apache.spark.sql.connector.expressions.Transform @@ -117,14 +118,38 @@ class JDBCTableCatalog extends TableCatalog with Logging { if (partitions.nonEmpty) { throw new UnsupportedOperationException("Cannot create JDBC table with partition") } - // TODO (SPARK-32405): Apply table options while creating tables in JDBC Table Catalog + + var tableOptions = options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident)) + var tableComment: String = "" + var tableProperties: String = "" if (!properties.isEmpty) { - logWarning("Cannot create JDBC table with properties, these properties will be " + - "ignored: " + properties.asScala.map { case (k, v) => s"$k=$v" }.mkString("[", ", ", "]")) + properties.asScala.map { + case (k, v) => k match { + case "comment" => tableComment = v + // ToDo: have a follow up to fail provider once unify create table syntax PR is merged + case "provider" => + case "owner" => // owner is ignored. It is default to current user name. + case "location" => + throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" + + " JDBC catalog.") + case _ => tableProperties = tableProperties + " " + s"$k $v" + } + } } - val writeOptions = new JdbcOptionsInWrite( - options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident))) + if (tableComment != "") { + tableOptions = tableOptions + (JDBCOptions.JDBC_TABLE_COMMENT -> tableComment) + } + if (tableProperties != "") { + // table property is set in JDBC_CREATE_TABLE_OPTIONS, which will be appended + // to CREATE TABLE statement. + // E.g., "CREATE TABLE t (name string) ENGINE InnoDB DEFAULT CHARACTER SET utf8" + // Spark doesn't check if these table properties are supported by databases. If + // table property is invalid, database will fail the table creation. + tableOptions = tableOptions + (JDBCOptions.JDBC_CREATE_TABLE_OPTIONS -> tableProperties) + } + + val writeOptions = new JdbcOptionsInWrite(tableOptions) val caseSensitive = SQLConf.get.caseSensitiveAnalysis withConnection { conn => classifyException(s"Failed table creation: $ident") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala index 9ca8879be31e0..3a2c9a5428be0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.jdbc -import java.sql.Types +import java.sql.{SQLFeatureNotSupportedException, Types} import java.util.Locale import org.apache.spark.sql.types._ @@ -50,4 +50,10 @@ private object DerbyDialect extends JdbcDialect { override def renameTable(oldTable: String, newTable: String): String = { s"RENAME TABLE $oldTable TO $newTable" } + + // Derby currently doesn't support comment on table. Here is the ticket to add the support + // https://issues.apache.org/jira/browse/DERBY-7008 + override def getTableCommentQuery(table: String, comment: String): String = { + throw new SQLFeatureNotSupportedException(s"comment on table is not supported") + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 0a857b99966fc..b12882b72fb66 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuilder import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.TableChange._ @@ -61,7 +62,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int) * for the given Catalyst type. */ @DeveloperApi -abstract class JdbcDialect extends Serializable { +abstract class JdbcDialect extends Serializable with Logging{ /** * Check if this dialect instance can handle a certain jdbc url. * @param url the jdbc url. @@ -265,6 +266,10 @@ abstract class JdbcDialect extends Serializable { s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} SET $nullable" } + def getTableCommentQuery(table: String, comment: String): String = { + s"COMMENT ON TABLE $table IS '$comment'" + } + /** * Gets a dialect exception, classifies it and wraps it by `AnalysisException`. * @param message The error message to be placed to the returned exception. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index dc39a10987c91..bc8589881adc2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -102,4 +102,12 @@ private object MsSqlServerDialect extends JdbcDialect { isNullable: Boolean): String = { throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported") } + + // scalastyle:off line.size.limit + // https://docs.microsoft.com/en-us/sql/relational-databases/system-stored-procedures/sp-addextendedproperty-transact-sql?redirectedfrom=MSDN&view=sql-server-ver15 + // scalastyle:on line.size.limit + // need to use the stored procedure called sp_addextendedproperty to add comments to tables + override def getTableCommentQuery(table: String, comment: String): String = { + throw new SQLFeatureNotSupportedException(s"comment on table is not supported") + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 942cdc9619b56..71bba6f1105ba 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -89,4 +89,9 @@ private case object MySQLDialect extends JdbcDialect { isNullable: Boolean): String = { throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported") } + + // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html + override def getTableCommentQuery(table: String, comment: String): String = { + s"ALTER TABLE $table COMMENT = '$comment'" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index 51316b464ab34..c7ad96c8f7619 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2.jdbc import java.sql.{Connection, DriverManager} import java.util.Properties +import org.apache.log4j.Level + import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} @@ -391,4 +393,28 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { } } } + + test("CREATE TABLE with table comment") { + withTable("h2.test.new_table") { + val logAppender = new LogAppender("table comment") + withLogAppender(logAppender) { + sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _ COMMENT 'this is a comment'") + } + val createCommentWarning = logAppender.loggingEvents + .filter(_.getLevel == Level.WARN) + .map(_.getRenderedMessage) + .exists(_.contains("Cannot create JDBC table comment")) + assert(createCommentWarning === false) + } + } + + test("CREATE TABLE with table property") { + withTable("h2.test.new_table") { + val m = intercept[AnalysisException] { + sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _" + + " TBLPROPERTIES('ENGINE'='tableEngineName')") + }.cause.get.getMessage + assert(m.contains("\"TABLEENGINENAME\" not found")) + } + } } From 98730b7ee24bfc35b4dcf431246dbb3ae19f8322 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 9 Nov 2020 08:08:00 +0000 Subject: [PATCH 0416/1009] [SPARK-33087][SQL] DataFrameWriterV2 should delegate table resolution to the analyzer ### What changes were proposed in this pull request? This PR makes `DataFrameWriterV2` to create query plans with `UnresolvedRelation` and leave the table resolution work to the analyzer. ### Why are the changes needed? Table resolution work should be done by the analyzer. After this PR, the behavior is more consistent between different APIs (DataFrameWriter, DataFrameWriterV2 and SQL). See the next section for behavior changes. ### Does this PR introduce _any_ user-facing change? Yes. 1. writes to a temp view of v2 relation: previously it fails with table not found exception, now it works if the v2 relation is writable. This is consistent with `DataFrameWriter` and SQL INSERT. 2. writes to other temp views: previously it fails with table not found exception, now it fails with a more explicit error message, saying that writing to a temp view of non-v2-relation is not allowed. 3. writes to a view: previously it fails with table not writable error, now it fails with a more explicit error message, saying that writing to a view is not allowed. 4. writes to a v1 table: previously it fails with table not writable error, now it fails with a more explicit error message, saying that writing to a v1 table is not allowed. (We can allow it later, by falling back to v1 command) ### How was this patch tested? new tests Closes #29970 from cloud-fan/refactor. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 41 ++++++ .../sql/catalyst/analysis/CheckAnalysis.scala | 5 + .../catalyst/plans/logical/v2Commands.scala | 8 ++ .../apache/spark/sql/DataFrameWriterV2.scala | 44 +----- .../spark/sql/DataFrameWriterV2Suite.scala | 129 +++++++++++++++++- 5 files changed, 183 insertions(+), 44 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index f0143fdb23473..5834f9bad4a18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -860,6 +860,17 @@ class Analyzer( lookupTempView(ident) .map(view => i.copy(table = view)) .getOrElse(i) + // TODO (SPARK-27484): handle streaming write commands when we have them. + case write: V2WriteCommand => + write.table match { + case UnresolvedRelation(ident, _, false) => + lookupTempView(ident).map(EliminateSubqueryAliases(_)).map { + case r: DataSourceV2Relation => write.withNewTable(r) + case _ => throw new AnalysisException("Cannot write into temp view " + + s"${ident.quoted} as it's not a data source v2 relation.") + }.getOrElse(write) + case _ => write + } case u @ UnresolvedTable(ident) => lookupTempView(ident).foreach { _ => u.failAnalysis(s"${ident.quoted} is a temp view not table.") @@ -942,6 +953,18 @@ class Analyzer( .map(v2Relation => i.copy(table = v2Relation)) .getOrElse(i) + // TODO (SPARK-27484): handle streaming write commands when we have them. + case write: V2WriteCommand => + write.table match { + case u: UnresolvedRelation if !u.isStreaming => + lookupV2Relation(u.multipartIdentifier, u.options, false).map { + case r: DataSourceV2Relation => write.withNewTable(r) + case other => throw new IllegalStateException( + "[BUG] unexpected plan returned by `lookupV2Relation`: " + other) + }.getOrElse(write) + case _ => write + } + case alter @ AlterTable(_, _, u: UnresolvedV2Relation, _) => CatalogV2Util.loadRelation(u.catalog, u.tableName) .map(rel => alter.copy(table = rel)) @@ -1019,6 +1042,24 @@ class Analyzer( case other => i.copy(table = other) } + // TODO (SPARK-27484): handle streaming write commands when we have them. + case write: V2WriteCommand => + write.table match { + case u: UnresolvedRelation if !u.isStreaming => + lookupRelation(u.multipartIdentifier, u.options, false) + .map(EliminateSubqueryAliases(_)) + .map { + case v: View => write.failAnalysis( + s"Writing into a view is not allowed. View: ${v.desc.identifier}.") + case u: UnresolvedCatalogRelation => write.failAnalysis( + "Cannot write into v1 table: " + u.tableMeta.identifier) + case r: DataSourceV2Relation => write.withNewTable(r) + case other => throw new IllegalStateException( + "[BUG] unexpected plan returned by `lookupRelation`: " + other) + }.getOrElse(write) + case _ => write + } + case u: UnresolvedRelation => lookupRelation(u.multipartIdentifier, u.options, u.isStreaming) .map(resolveViews).getOrElse(u) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ac91fa0b5811e..33a5224ed293e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -108,6 +108,11 @@ trait CheckAnalysis extends PredicateHelper { case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) => failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") + // TODO (SPARK-27484): handle streaming write commands when we have them. + case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] => + val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier + write.table.failAnalysis(s"Table or view not found: ${tblName.quoted}") + case u: UnresolvedV2Relation if isView(u.originalNameParts) => u.failAnalysis( s"Invalid command: '${u.originalNameParts.quoted}' is a view not a table.") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index fb8a9be80385b..94d4e7ecfac21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -53,6 +53,7 @@ trait V2WriteCommand extends Command { } def withNewQuery(newQuery: LogicalPlan): V2WriteCommand + def withNewTable(newTable: NamedRelation): V2WriteCommand } /** @@ -64,6 +65,7 @@ case class AppendData( writeOptions: Map[String, String], isByName: Boolean) extends V2WriteCommand { override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery) + override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable) } object AppendData { @@ -97,6 +99,9 @@ case class OverwriteByExpression( override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = { copy(query = newQuery) } + override def withNewTable(newTable: NamedRelation): OverwriteByExpression = { + copy(table = newTable) + } } object OverwriteByExpression { @@ -128,6 +133,9 @@ case class OverwritePartitionsDynamic( override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = { copy(query = newQuery) } + override def withNewTable(newTable: NamedRelation): OverwritePartitionsDynamic = { + copy(table = newTable) + } } object OverwritePartitionsDynamic { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala index 87f35410172d6..d55b5c3103537 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala @@ -21,12 +21,11 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.annotation.Experimental -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedRelation} import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years} import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelectStatement, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelectStatement} import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform} import org.apache.spark.sql.execution.SQLExecution -import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.types.IntegerType /** @@ -38,21 +37,12 @@ import org.apache.spark.sql.types.IntegerType final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) extends CreateTableWriter[T] { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - import org.apache.spark.sql.connector.catalog.CatalogV2Util._ - import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier - private val df: DataFrame = ds.toDF() private val sparkSession = ds.sparkSession private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table) - private val (catalog, identifier) = { - val CatalogAndIdentifier(catalog, identifier) = tableName - (catalog.asTableCatalog, identifier) - } - private val logicalPlan = df.queryExecution.logical private var provider: Option[String] = None @@ -153,15 +143,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) */ @throws(classOf[NoSuchTableException]) def append(): Unit = { - val append = loadTable(catalog, identifier) match { - case Some(t) => - AppendData.byName( - DataSourceV2Relation.create(t, Some(catalog), Some(identifier)), - logicalPlan, options.toMap) - case _ => - throw new NoSuchTableException(identifier) - } - + val append = AppendData.byName(UnresolvedRelation(tableName), logicalPlan, options.toMap) runCommand("append")(append) } @@ -177,15 +159,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) */ @throws(classOf[NoSuchTableException]) def overwrite(condition: Column): Unit = { - val overwrite = loadTable(catalog, identifier) match { - case Some(t) => - OverwriteByExpression.byName( - DataSourceV2Relation.create(t, Some(catalog), Some(identifier)), - logicalPlan, condition.expr, options.toMap) - case _ => - throw new NoSuchTableException(identifier) - } - + val overwrite = OverwriteByExpression.byName( + UnresolvedRelation(tableName), logicalPlan, condition.expr, options.toMap) runCommand("overwrite")(overwrite) } @@ -204,15 +179,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) */ @throws(classOf[NoSuchTableException]) def overwritePartitions(): Unit = { - val dynamicOverwrite = loadTable(catalog, identifier) match { - case Some(t) => - OverwritePartitionsDynamic.byName( - DataSourceV2Relation.create(t, Some(catalog), Some(identifier)), - logicalPlan, options.toMap) - case _ => - throw new NoSuchTableException(identifier) - } - + val dynamicOverwrite = OverwritePartitionsDynamic.byName( + UnresolvedRelation(tableName), logicalPlan, options.toMap) runCommand("overwritePartitions")(dynamicOverwrite) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala index 8720c1f620564..de791383326f1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.sources.FakeSourceOne import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType} import org.apache.spark.sql.util.QueryExecutionListener @@ -57,6 +58,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } after { + spark.sessionState.catalog.reset() spark.sessionState.catalogManager.reset() spark.sessionState.conf.clear() } @@ -118,6 +120,18 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) } + test("Append: write to a temp view of v2 relation") { + spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") + spark.table("testcat.table_name").createOrReplaceTempView("temp_view") + spark.table("source").writeTo("temp_view").append() + checkAnswer( + spark.table("testcat.table_name"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + checkAnswer( + spark.table("temp_view"), + Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))) + } + test("Append: by name not position") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") @@ -136,11 +150,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Append: fail if table does not exist") { - val exc = intercept[NoSuchTableException] { + val exc = intercept[AnalysisException] { spark.table("source").writeTo("testcat.table_name").append() } - assert(exc.getMessage.contains("table_name")) + assert(exc.getMessage.contains("Table or view not found: testcat.table_name")) + } + + test("Append: fail if it writes to a temp view that is not v2 relation") { + spark.range(10).createOrReplaceTempView("temp_view") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("temp_view").append() + } + assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " + + "data source v2 relation")) + } + + test("Append: fail if it writes to a view") { + spark.sql("CREATE VIEW v AS SELECT 1") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("v").append() + } + assert(exc.getMessage.contains("Writing into a view is not allowed")) + } + + test("Append: fail if it writes to a v1 table") { + sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").append() + } + assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`")) } test("Overwrite: overwrite by expression: true") { @@ -181,6 +220,20 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) } + test("Overwrite: write to a temp view of v2 relation") { + spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") + spark.table("source").writeTo("testcat.table_name").append() + spark.table("testcat.table_name").createOrReplaceTempView("temp_view") + + spark.table("source2").writeTo("testcat.table_name").overwrite(lit(true)) + checkAnswer( + spark.table("testcat.table_name"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + checkAnswer( + spark.table("temp_view"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + } + test("Overwrite: by name not position") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") @@ -200,11 +253,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Overwrite: fail if table does not exist") { - val exc = intercept[NoSuchTableException] { + val exc = intercept[AnalysisException] { spark.table("source").writeTo("testcat.table_name").overwrite(lit(true)) } - assert(exc.getMessage.contains("table_name")) + assert(exc.getMessage.contains("Table or view not found: testcat.table_name")) + } + + test("Overwrite: fail if it writes to a temp view that is not v2 relation") { + spark.range(10).createOrReplaceTempView("temp_view") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("temp_view").overwrite(lit(true)) + } + assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " + + "data source v2 relation")) + } + + test("Overwrite: fail if it writes to a view") { + spark.sql("CREATE VIEW v AS SELECT 1") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("v").overwrite(lit(true)) + } + assert(exc.getMessage.contains("Writing into a view is not allowed")) + } + + test("Overwrite: fail if it writes to a v1 table") { + sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").overwrite(lit(true)) + } + assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`")) } test("OverwritePartitions: overwrite conflicting partitions") { @@ -245,6 +323,20 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) } + test("OverwritePartitions: write to a temp view of v2 relation") { + spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") + spark.table("source").writeTo("testcat.table_name").append() + spark.table("testcat.table_name").createOrReplaceTempView("temp_view") + + spark.table("source2").writeTo("testcat.table_name").overwritePartitions() + checkAnswer( + spark.table("testcat.table_name"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + checkAnswer( + spark.table("temp_view"), + Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f"))) + } + test("OverwritePartitions: by name not position") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") @@ -264,11 +356,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("OverwritePartitions: fail if table does not exist") { - val exc = intercept[NoSuchTableException] { + val exc = intercept[AnalysisException] { spark.table("source").writeTo("testcat.table_name").overwritePartitions() } - assert(exc.getMessage.contains("table_name")) + assert(exc.getMessage.contains("Table or view not found: testcat.table_name")) + } + + test("OverwritePartitions: fail if it writes to a temp view that is not v2 relation") { + spark.range(10).createOrReplaceTempView("temp_view") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("temp_view").overwritePartitions() + } + assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " + + "data source v2 relation")) + } + + test("OverwritePartitions: fail if it writes to a view") { + spark.sql("CREATE VIEW v AS SELECT 1") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("v").overwritePartitions() + } + assert(exc.getMessage.contains("Writing into a view is not allowed")) + } + + test("OverwritePartitions: fail if it writes to a v1 table") { + sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}") + val exc = intercept[AnalysisException] { + spark.table("source").writeTo("table_name").overwritePartitions() + } + assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`")) } test("Create: basic behavior") { From 69799c514ff9874c57bf94d4de21ea4cd0cbbf8d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 9 Nov 2020 08:32:51 +0000 Subject: [PATCH 0417/1009] [SPARK-33372][SQL] Fix InSet bucket pruning ### What changes were proposed in this pull request? This pr fix `InSet` bucket pruning because of it's values should not be `Literal`: https://github.com/apache/spark/blob/cbd3fdea62dab73fc4a96702de8fd1f07722da66/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala#L253-L255 ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test and manual test: ```scala spark.sql("select id as a, id as b from range(10000)").write.bucketBy(100, "a").saveAsTable("t") spark.sql("select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)").show ``` Before this PR | After this PR -- | -- ![image](https://user-images.githubusercontent.com/5399861/98380788-fb120980-2083-11eb-8fae-4e21ad873e9b.png) | ![image](https://user-images.githubusercontent.com/5399861/98381095-5ba14680-2084-11eb-82ca-2d780c85305c.png) Closes #30279 from wangyum/SPARK-33372. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../spark/sql/execution/datasources/FileSourceStrategy.scala | 5 ++--- .../org/apache/spark/sql/sources/BucketedReadSuite.scala | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 1191f99cc98a2..5e07f778ac135 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -89,9 +89,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging { case expressions.In(a: Attribute, list) if list.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName => getBucketSetFromIterable(a, list.map(e => e.eval(EmptyRow))) - case expressions.InSet(a: Attribute, hset) - if hset.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName => - getBucketSetFromIterable(a, hset.map(e => expressions.Literal(e).eval(EmptyRow))) + case expressions.InSet(a: Attribute, hset) if a.name == bucketColumnName => + getBucketSetFromIterable(a, hset) case expressions.IsNull(a: Attribute) if a.name == bucketColumnName => getBucketSetFromValue(a, null) case expressions.And(left, right) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index a188e4d9d6d90..6a31ce07dabb4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -190,7 +190,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { // Case 4: InSet val inSetExpr = expressions.InSet($"j".expr, - Set(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3).map(lit(_).expr)) + Set(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3)) checkPrunedAnswers( bucketSpec, bucketValues = Seq(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3), From 7a5647a93aaea9d1d78d9262e24fc8c010db04d0 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 9 Nov 2020 09:20:31 +0000 Subject: [PATCH 0418/1009] [SPARK-33385][SQL] Support bucket pruning for IsNaN ### What changes were proposed in this pull request? This pr add support bucket pruning on `IsNaN` predicate. ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30291 from wangyum/SPARK-33385. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../datasources/FileSourceStrategy.scala | 7 +++++++ .../spark/sql/sources/BucketedReadSuite.scala | 21 ++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 5e07f778ac135..1bfde7515dc92 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.ScanOperation import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} +import org.apache.spark.sql.types.{DoubleType, FloatType} import org.apache.spark.util.collection.BitSet /** @@ -93,6 +94,12 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging { getBucketSetFromIterable(a, hset) case expressions.IsNull(a: Attribute) if a.name == bucketColumnName => getBucketSetFromValue(a, null) + case expressions.IsNaN(a: Attribute) + if a.name == bucketColumnName && a.dataType == FloatType => + getBucketSetFromValue(a, Float.NaN) + case expressions.IsNaN(a: Attribute) + if a.name == bucketColumnName && a.dataType == DoubleType => + getBucketSetFromValue(a, Double.NaN) case expressions.And(left, right) => getExpressionBuckets(left, bucketColumnName, numBuckets) & getExpressionBuckets(right, bucketColumnName, numBuckets) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 6a31ce07dabb4..4832386e553db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -113,7 +113,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { // 2) Verify the final result is the same as the expected one private def checkPrunedAnswers( bucketSpec: BucketSpec, - bucketValues: Seq[Integer], + bucketValues: Seq[Any], filterCondition: Column, originalDataFrame: DataFrame): Unit = { // This test verifies parts of the plan. Disable whole stage codegen. @@ -245,6 +245,25 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { } } + test("bucket pruning support IsNaN") { + withTable("bucketed_table") { + val numBuckets = NumBucketsForPruningNullDf + val bucketSpec = BucketSpec(numBuckets, Seq("j"), Nil) + val naNDF = nullDF.selectExpr("i", "cast(if(isnull(j), 'NaN', j) as double) as j", "k") + // json does not support predicate push-down, and thus json is used here + naNDF.write + .format("json") + .bucketBy(numBuckets, "j") + .saveAsTable("bucketed_table") + + checkPrunedAnswers( + bucketSpec, + bucketValues = Double.NaN :: Nil, + filterCondition = $"j".isNaN, + naNDF) + } + } + test("read partitioning bucketed tables having composite filters") { withTable("bucketed_table") { val numBuckets = NumBucketsForPruningDF From 4e1c89400dc57b5c53741f14f5179add7cb617eb Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Mon, 9 Nov 2020 09:44:58 +0000 Subject: [PATCH 0419/1009] [SPARK-33140][SQL][FOLLOW-UP] Use sparkSession in AQE context when applying rules ### What changes were proposed in this pull request? After #30097, all rules are using `SparkSession.active` to get `SQLConf` and `SparkSession`. But in AQE, when applying the rules for the initial plan, we should use the spark session in AQE context. ### Why are the changes needed? Fix potential problem caused by using the wrong spark session ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing ut Closes #30294 from linhongliu-db/SPARK-33140-followup. Authored-by: Linhong Liu Signed-off-by: Wenchen Fan --- .../sql/execution/adaptive/AdaptiveSparkPlanExec.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 4ae33311d5a24..75cc073c4a62c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -123,8 +123,10 @@ case class AdaptiveSparkPlanExec( @transient private val costEvaluator = SimpleCostEvaluator - @transient private val initialPlan = applyPhysicalRules( - inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations"))) + @transient private val initialPlan = context.session.withActive { + applyPhysicalRules( + inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations"))) + } @volatile private var currentPhysicalPlan = initialPlan From 84dc37461187210ecdb25fa36ccb61c7cc1a6486 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 9 Nov 2020 19:27:36 +0900 Subject: [PATCH 0420/1009] [SPARK-33303][SQL] Deduplicate deterministic PythonUDF calls ### What changes were proposed in this pull request? This PR modifies the `ExtractPythonUDFs` rule to deduplicate deterministic PythonUDF calls. Before this PR the dataframe: `df.withColumn("c", batchedPythonUDF(col("a"))).withColumn("d", col("c"))` has the plan: ``` *(1) Project [value#1 AS a#4, pythonUDF1#15 AS c#7, pythonUDF1#15 AS d#10] +- BatchEvalPython [dummyUDF(value#1), dummyUDF(value#1)], [pythonUDF0#14, pythonUDF1#15] +- LocalTableScan [value#1] ``` After this PR the deterministic PythonUDF calls are deduplicated: ``` *(1) Project [value#1 AS a#4, pythonUDF0#14 AS c#7, pythonUDF0#14 AS d#10] +- BatchEvalPython [dummyUDF(value#1)], [pythonUDF0#14] +- LocalTableScan [value#1] ``` ### Why are the changes needed? To fix a performance issue. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New and existing UTs. Closes #30203 from peter-toth/SPARK-33303-deduplicate-deterministic-udf-calls. Authored-by: Peter Toth Signed-off-by: HyukjinKwon --- .../execution/python/ExtractPythonUDFs.scala | 20 +++++++++----- .../python/BatchEvalPythonExecSuite.scala | 7 +++++ .../python/ExtractPythonUDFsSuite.scala | 27 +++++++++++++++++++ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala index 1c88056cb50c9..dab2723d25726 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala @@ -218,13 +218,22 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { } } + private def canonicalizeDeterministic(u: PythonUDF) = { + if (u.deterministic) { + u.canonicalized.asInstanceOf[PythonUDF] + } else { + u + } + } + /** * Extract all the PythonUDFs from the current operator and evaluate them before the operator. */ private def extract(plan: LogicalPlan): LogicalPlan = { - val udfs = collectEvaluableUDFsFromExpressions(plan.expressions) + val udfs = ExpressionSet(collectEvaluableUDFsFromExpressions(plan.expressions)) // ignore the PythonUDF that come from second/third aggregate, which is not used .filter(udf => udf.references.subsetOf(plan.inputSet)) + .toSeq.asInstanceOf[Seq[PythonUDF]] if (udfs.isEmpty) { // If there aren't any, we are done. plan @@ -262,7 +271,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { throw new AnalysisException("Unexcepted UDF evalType") } - attributeMap ++= validUdfs.zip(resultAttrs) + attributeMap ++= validUdfs.map(canonicalizeDeterministic).zip(resultAttrs) evaluation } else { child @@ -270,13 +279,12 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { } // Other cases are disallowed as they are ambiguous or would require a cartesian // product. - udfs.filterNot(attributeMap.contains).foreach { udf => - sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.") + udfs.map(canonicalizeDeterministic).filterNot(attributeMap.contains).foreach { + udf => sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.") } val rewritten = plan.withNewChildren(newChildren).transformExpressions { - case p: PythonUDF if attributeMap.contains(p) => - attributeMap(p) + case p: PythonUDF => attributeMap.getOrElse(canonicalizeDeterministic(p), p) } // extract remaining python UDFs recursively diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala index 5fe3d6a71167e..cb5e23e0534d0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala @@ -137,6 +137,13 @@ class MyDummyPythonUDF extends UserDefinedPythonFunction( pythonEvalType = PythonEvalType.SQL_BATCHED_UDF, udfDeterministic = true) +class MyDummyNondeterministicPythonUDF extends UserDefinedPythonFunction( + name = "dummyNondeterministicUDF", + func = new DummyUDF, + dataType = BooleanType, + pythonEvalType = PythonEvalType.SQL_BATCHED_UDF, + udfDeterministic = false) + class MyDummyGroupedAggPandasUDF extends UserDefinedPythonFunction( name = "dummyGroupedAggPandasUDF", func = new DummyUDF, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index 87d541d2d22b0..325f4923bd6c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -28,6 +28,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { import testImplicits._ val batchedPythonUDF = new MyDummyPythonUDF + val batchedNondeterministicPythonUDF = new MyDummyNondeterministicPythonUDF val scalarPandasUDF = new MyDummyScalarPandasUDF private def collectBatchExec(plan: SparkPlan): Seq[BatchEvalPythonExec] = plan.collect { @@ -166,5 +167,31 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { } } + test("SPARK-33303: Deterministic UDF calls are deduplicated") { + val df = Seq("Hello").toDF("a") + + val df2 = df.withColumn("c", batchedPythonUDF(col("a"))).withColumn("d", col("c")) + val pythonEvalNodes2 = collectBatchExec(df2.queryExecution.executedPlan) + assert(pythonEvalNodes2.size == 1) + assert(pythonEvalNodes2.head.udfs.size == 1) + + val df3 = df.withColumns(Seq("c", "d"), + Seq(batchedPythonUDF(col("a")), batchedPythonUDF(col("a")))) + val pythonEvalNodes3 = collectBatchExec(df3.queryExecution.executedPlan) + assert(pythonEvalNodes3.size == 1) + assert(pythonEvalNodes3.head.udfs.size == 1) + + val df4 = df.withColumn("c", batchedNondeterministicPythonUDF(col("a"))) + .withColumn("d", col("c")) + val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan) + assert(pythonEvalNodes4.size == 1) + assert(pythonEvalNodes4.head.udfs.size == 1) + + val df5 = df.withColumns(Seq("c", "d"), + Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a")))) + val pythonEvalNodes5 = collectBatchExec(df5.queryExecution.executedPlan) + assert(pythonEvalNodes5.size == 1) + assert(pythonEvalNodes5.head.udfs.size == 2) + } } From 8113c88542ee282b510c7e046d64df1761a85d14 Mon Sep 17 00:00:00 2001 From: Chandni Singh Date: Mon, 9 Nov 2020 11:00:52 -0600 Subject: [PATCH 0421/1009] [SPARK-32916][SHUFFLE] Implementation of shuffle service that leverages push-based shuffle in YARN deployment mode ### What changes were proposed in this pull request? This is one of the patches for SPIP [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602) which is needed for push-based shuffle. Summary of changes: - Adds an implementation of `MergedShuffleFileManager` which was introduced with [Spark 32915](https://issues.apache.org/jira/browse/SPARK-32915). - Integrated the push-based shuffle service with `YarnShuffleService`. ### Why are the changes needed? Refer to the SPIP in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602). ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit tests. The reference PR with the consolidated changes covering the complete implementation is also provided in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602). We have already verified the functionality and the improved performance as documented in the SPIP doc. Lead-authored-by: Min Shen mshenlinkedin.com Co-authored-by: Chandni Singh chsinghlinkedin.com Co-authored-by: Ye Zhou yezhoulinkedin.com Closes #30062 from otterc/SPARK-32916. Lead-authored-by: Chandni Singh Co-authored-by: Chandni Singh Co-authored-by: Ye Zhou Co-authored-by: Min Shen Signed-off-by: Mridul Muralidharan gmail.com> --- .../spark/network/protocol/Encoders.java | 26 +- .../spark/network/util/TransportConf.java | 35 + .../spark/network/protocol/EncodersSuite.java | 68 ++ common/network-shuffle/pom.xml | 10 +- .../spark/network/shuffle/ErrorHandler.java | 8 +- .../network/shuffle/ExternalBlockHandler.java | 25 +- .../network/shuffle/MergedBlockMeta.java | 2 + .../shuffle/MergedShuffleFileManager.java | 28 +- .../network/shuffle/OneForOneBlockPusher.java | 11 +- .../shuffle/RemoteBlockPushResolver.java | 934 ++++++++++++++++++ .../protocol/FinalizeShuffleMerge.java | 2 + .../shuffle/protocol/MergeStatuses.java | 2 + .../shuffle/protocol/PushBlockStream.java | 37 +- .../shuffle/ExternalBlockHandlerSuite.java | 2 +- .../shuffle/OneForOneBlockPusherSuite.java | 66 +- .../shuffle/RemoteBlockPushResolverSuite.java | 496 ++++++++++ .../network/yarn/YarnShuffleService.java | 23 +- .../network/yarn/YarnShuffleServiceSuite.java | 61 ++ 18 files changed, 1748 insertions(+), 88 deletions(-) create mode 100644 common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java create mode 100644 common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java index 4fa191b3917e3..8bab808ad6864 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java @@ -18,6 +18,7 @@ package org.apache.spark.network.protocol; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import io.netty.buffer.ByteBuf; @@ -46,7 +47,11 @@ public static String decode(ByteBuf buf) { } } - /** Bitmaps are encoded with their serialization length followed by the serialization bytes. */ + /** + * Bitmaps are encoded with their serialization length followed by the serialization bytes. + * + * @since 3.1.0 + */ public static class Bitmaps { public static int encodedLength(RoaringBitmap b) { // Compress the bitmap before serializing it. Note that since BlockTransferMessage @@ -57,13 +62,20 @@ public static int encodedLength(RoaringBitmap b) { return b.serializedSizeInBytes(); } + /** + * The input ByteBuf for this encoder should have enough write capacity to fit the serialized + * bitmap. Other encoders which use {@link io.netty.buffer.AbstractByteBuf#writeBytes(byte[])} + * to write can expand the buf as writeBytes calls {@link ByteBuf#ensureWritable} internally. + * However, this encoder doesn't rely on netty's writeBytes and will fail if the input buf + * doesn't have enough write capacity. + */ public static void encode(ByteBuf buf, RoaringBitmap b) { - int encodedLength = b.serializedSizeInBytes(); // RoaringBitmap requires nio ByteBuffer for serde. We expose the netty ByteBuf as a nio // ByteBuffer. Here, we need to explicitly manage the index so we can write into the // ByteBuffer, and the write is reflected in the underneath ByteBuf. - b.serialize(buf.nioBuffer(buf.writerIndex(), encodedLength)); - buf.writerIndex(buf.writerIndex() + encodedLength); + ByteBuffer byteBuffer = buf.nioBuffer(buf.writerIndex(), buf.writableBytes()); + b.serialize(byteBuffer); + buf.writerIndex(buf.writerIndex() + byteBuffer.position()); } public static RoaringBitmap decode(ByteBuf buf) { @@ -172,7 +184,11 @@ public static long[] decode(ByteBuf buf) { } } - /** Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding. */ + /** + * Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding. + * + * @since 3.1.0 + */ public static class BitmapArrays { public static int encodedLength(RoaringBitmap[] bitmaps) { int totalLength = 4; diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index 646e4278811f4..fd287b022618b 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -363,4 +363,39 @@ public boolean useOldFetchProtocol() { return conf.getBoolean("spark.shuffle.useOldFetchProtocol", false); } + /** + * Class name of the implementation of MergedShuffleFileManager that merges the blocks + * pushed to it when push-based shuffle is enabled. By default, push-based shuffle is disabled at + * a cluster level because this configuration is set to + * 'org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager'. + * To turn on push-based shuffle at a cluster level, set the configuration to + * 'org.apache.spark.network.shuffle.RemoteBlockPushResolver'. + */ + public String mergedShuffleFileManagerImpl() { + return conf.get("spark.shuffle.server.mergedShuffleFileManagerImpl", + "org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager"); + } + + /** + * The minimum size of a chunk when dividing a merged shuffle file into multiple chunks during + * push-based shuffle. + * A merged shuffle file consists of multiple small shuffle blocks. Fetching the + * complete merged shuffle file in a single response increases the memory requirements for the + * clients. Instead of serving the entire merged file, the shuffle service serves the + * merged file in `chunks`. A `chunk` constitutes few shuffle blocks in entirety and this + * configuration controls how big a chunk can get. A corresponding index file for each merged + * shuffle file will be generated indicating chunk boundaries. + */ + public int minChunkSizeInMergedShuffleFile() { + return Ints.checkedCast(JavaUtils.byteStringAsBytes( + conf.get("spark.shuffle.server.minChunkSizeInMergedShuffleFile", "2m"))); + } + + /** + * The size of cache in memory which is used in push-based shuffle for storing merged index files. + */ + public long mergedIndexCacheSize() { + return JavaUtils.byteStringAsBytes( + conf.get("spark.shuffle.server.mergedIndexCacheSize", "100m")); + } } diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java new file mode 100644 index 0000000000000..6e89702c04396 --- /dev/null +++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.protocol; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import org.junit.Test; +import org.roaringbitmap.RoaringBitmap; + +import static org.junit.Assert.*; + +/** + * Tests for {@link Encoders}. + */ +public class EncodersSuite { + + @Test + public void testRoaringBitmapEncodeDecode() { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1, 2, 3); + ByteBuf buf = Unpooled.buffer(Encoders.Bitmaps.encodedLength(bitmap)); + Encoders.Bitmaps.encode(buf, bitmap); + RoaringBitmap decodedBitmap = Encoders.Bitmaps.decode(buf); + assertEquals(bitmap, decodedBitmap); + } + + @Test (expected = java.nio.BufferOverflowException.class) + public void testRoaringBitmapEncodeShouldFailWhenBufferIsSmall() { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1, 2, 3); + ByteBuf buf = Unpooled.buffer(4); + Encoders.Bitmaps.encode(buf, bitmap); + } + + @Test + public void testBitmapArraysEncodeDecode() { + RoaringBitmap[] bitmaps = new RoaringBitmap[] { + new RoaringBitmap(), + new RoaringBitmap(), + new RoaringBitmap(), // empty + new RoaringBitmap(), + new RoaringBitmap() + }; + bitmaps[0].add(1, 2, 3); + bitmaps[1].add(1, 2, 4); + bitmaps[3].add(7L, 9L); + bitmaps[4].add(1L, 100L); + ByteBuf buf = Unpooled.buffer(Encoders.BitmapArrays.encodedLength(bitmaps)); + Encoders.BitmapArrays.encode(buf, bitmaps); + RoaringBitmap[] decodedBitmaps = Encoders.BitmapArrays.decode(buf); + assertArrayEquals(bitmaps, decodedBitmaps); + } +} diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index a4a1ff92ef9a0..562a1d495cc8a 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -47,6 +47,11 @@ metrics-core
    + + org.apache.spark + spark-tags_${scala.binary.version} + + org.slf4j @@ -70,11 +75,6 @@ test-jar test - - org.apache.spark - spark-tags_${scala.binary.version} - test - 2.8 1.8 - 1.0.0 + 1.1.0 2.6 @@ -549,6 +550,11 @@ commons-codec ${commons-codec.version} + + org.apache.commons + commons-compress + ${commons-compress.version} + org.apache.commons commons-math3 From 4360c6f12ae8f192fb65ae1c6ad6ee05e0217c7d Mon Sep 17 00:00:00 2001 From: neko Date: Tue, 10 Nov 2020 11:12:19 +0900 Subject: [PATCH 0426/1009] [SPARK-33363] Add prompt information related to the current task when pyspark/sparkR starts ### What changes were proposed in this pull request? add prompt information about current applicationId, current URL and master info when pyspark / sparkR starts. ### Why are the changes needed? The information printed when pyspark/sparkR starts does not prompt the basic information of current application, and it is not convenient when used pyspark/sparkR in dos. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? manual test result shows below: ![pyspark new print](https://user-images.githubusercontent.com/52202080/98274268-2a663f00-1fce-11eb-88ce-964ce90b439e.png) ![sparkR](https://user-images.githubusercontent.com/52202080/98541235-1a01dd00-22ca-11eb-9304-09bcde87b05e.png) Closes #30266 from akiyamaneko/pyspark-hint-info. Authored-by: neko Signed-off-by: HyukjinKwon --- R/pkg/inst/profile/shell.R | 4 +++- python/pyspark/shell.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R index f6c20e1a5ebc3..ffedb3038fd53 100644 --- a/R/pkg/inst/profile/shell.R +++ b/R/pkg/inst/profile/shell.R @@ -43,5 +43,7 @@ cat(" /_/", "\n") cat("\n") - cat("\nSparkSession available as 'spark'.\n") + cat("\nSparkSession Web UI available at", SparkR::sparkR.uiWebUrl()) + cat("\nSparkSession available as 'spark'(master = ", unlist(SparkR::sparkR.conf("spark.master")), + ", app id = ", unlist(SparkR::sparkR.conf("spark.app.id")), ").", "\n", sep = "") } diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 0c6cc1302ff62..25aadb16840c8 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -62,6 +62,8 @@ platform.python_version(), platform.python_build()[0], platform.python_build()[1])) +print("Spark context Web UI available at %s" % (sc.uiWebUrl)) +print("Spark context available as 'sc' (master = %s, app id = %s)." % (sc.master, sc.applicationId)) print("SparkSession available as 'spark'.") # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP, From 4ac8133866e7b97e04ab75cad0e0bf54565b0ba5 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Tue, 10 Nov 2020 11:22:35 +0900 Subject: [PATCH 0427/1009] [SPARK-33223][SS][UI] Structured Streaming Web UI state information ### What changes were proposed in this pull request? Structured Streaming UI is not containing state information. In this PR I've added it. ### Why are the changes needed? Missing state information. ### Does this PR introduce _any_ user-facing change? Additional UI elements appear. ### How was this patch tested? Existing unit tests + manual test. Screenshot 2020-10-30 at 15 14 21 Closes #30151 from gaborgsomogyi/SPARK-33223. Authored-by: Gabor Somogyi Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../ui/StreamingQueryStatisticsPage.scala | 119 +++++++++++++++++- .../sql/streaming/ui/UISeleniumSuite.scala | 15 ++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index 227e5e5af3983..77078046dda7c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -22,7 +22,7 @@ import java.lang.{Long => JLong} import java.util.UUID import javax.servlet.http.HttpServletRequest -import scala.xml.{Node, Unparsed} +import scala.xml.{Node, NodeBuffer, Unparsed} import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.ui.UIUtils._ @@ -126,6 +126,122 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
    } + def generateAggregatedStateOperators( + query: StreamingQueryUIData, + minBatchTime: Long, + maxBatchTime: Long, + jsCollector: JsCollector): NodeBuffer = { + // This is made sure on caller side but put it here to be defensive + require(query.lastProgress != null) + if (query.lastProgress.stateOperators.nonEmpty) { + val numRowsTotalData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), + p.stateOperators.map(_.numRowsTotal).sum.toDouble)) + val maxNumRowsTotal = numRowsTotalData.maxBy(_._2)._2 + + val numRowsUpdatedData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), + p.stateOperators.map(_.numRowsUpdated).sum.toDouble)) + val maxNumRowsUpdated = numRowsUpdatedData.maxBy(_._2)._2 + + val memoryUsedBytesData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), + p.stateOperators.map(_.memoryUsedBytes).sum.toDouble)) + val maxMemoryUsedBytes = memoryUsedBytesData.maxBy(_._2)._2 + + val numRowsDroppedByWatermarkData = query.recentProgress + .map(p => (parseProgressTimestamp(p.timestamp), + p.stateOperators.map(_.numRowsDroppedByWatermark).sum.toDouble)) + val maxNumRowsDroppedByWatermark = numRowsDroppedByWatermarkData.maxBy(_._2)._2 + + val graphUIDataForNumberTotalRows = + new GraphUIData( + "aggregated-num-total-state-rows-timeline", + "aggregated-num-total-state-rows-histogram", + numRowsTotalData, + minBatchTime, + maxBatchTime, + 0, + maxNumRowsTotal, + "records") + graphUIDataForNumberTotalRows.generateDataJs(jsCollector) + + val graphUIDataForNumberUpdatedRows = + new GraphUIData( + "aggregated-num-updated-state-rows-timeline", + "aggregated-num-updated-state-rows-histogram", + numRowsUpdatedData, + minBatchTime, + maxBatchTime, + 0, + maxNumRowsUpdated, + "records") + graphUIDataForNumberUpdatedRows.generateDataJs(jsCollector) + + val graphUIDataForMemoryUsedBytes = + new GraphUIData( + "aggregated-state-memory-used-bytes-timeline", + "aggregated-state-memory-used-bytes-histogram", + memoryUsedBytesData, + minBatchTime, + maxBatchTime, + 0, + maxMemoryUsedBytes, + "bytes") + graphUIDataForMemoryUsedBytes.generateDataJs(jsCollector) + + val graphUIDataForNumRowsDroppedByWatermark = + new GraphUIData( + "aggregated-num-state-rows-dropped-by-watermark-timeline", + "aggregated-num-state-rows-dropped-by-watermark-histogram", + numRowsDroppedByWatermarkData, + minBatchTime, + maxBatchTime, + 0, + maxNumRowsDroppedByWatermark, + "records") + graphUIDataForNumRowsDroppedByWatermark.generateDataJs(jsCollector) + + // scalastyle:off + + +
    +
    Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}
    +
    + + {graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)} + {graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}
    +
    + + {graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)} + {graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}
    +
    + + {graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)} + {graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}
    +
    + + {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} + {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} + + // scalastyle:on + } else { + new NodeBuffer() + } + } + def generateStatTable(query: StreamingQueryUIData): Seq[Node] = { val batchToTimestamps = withNoProgress(query, query.recentProgress.map(p => (p.batchId, parseProgressTimestamp(p.timestamp))), @@ -284,6 +400,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) {graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)} + {generateAggregatedStateOperators(query, minBatchTime, maxBatchTime, jsCollector)} } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala index 82aa1453f9ba2..1a8b28001b8d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala @@ -75,10 +75,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq h3Text should not contain ("Streaming Query") + val input1 = spark.readStream.format("rate").load() + val input2 = spark.readStream.format("rate").load() val activeQuery = - spark.readStream.format("rate").load().writeStream.format("noop").start() + input1.join(input2, "value").writeStream.format("noop").start() val completedQuery = - spark.readStream.format("rate").load().writeStream.format("noop").start() + input1.join(input2, "value").writeStream.format("noop").start() completedQuery.stop() val failedQuery = spark.readStream.format("rate").load().select("value").as[Long] .map(_ / 0).writeStream.format("noop").start() @@ -129,6 +131,15 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B findAll(cssSelector("""#stat-table th""")).map(_.text).toSeq should be { List("", "Timelines", "Histograms") } + summaryText should contain ("Input Rate (?)") + summaryText should contain ("Process Rate (?)") + summaryText should contain ("Input Rows (?)") + summaryText should contain ("Batch Duration (?)") + summaryText should contain ("Operation Duration (?)") + summaryText should contain ("Aggregated Number Of Total State Rows (?)") + summaryText should contain ("Aggregated Number Of Updated State Rows (?)") + summaryText should contain ("Aggregated State Memory Used In Bytes (?)") + summaryText should contain ("Aggregated Number Of State Rows Dropped By Watermark (?)") } } finally { spark.streams.active.foreach(_.stop()) From c2caf2522b2e65a93a797580f08ac36461000969 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 9 Nov 2020 19:07:16 -0800 Subject: [PATCH 0428/1009] [SPARK-33213][BUILD] Upgrade Apache Arrow to 2.0.0 ### What changes were proposed in this pull request? This upgrade Apache Arrow version from 1.0.1 to 2.0.0 ### Why are the changes needed? Apache Arrow 2.0.0 was released with some improvements from Java side, so it's better to upgrade Spark to the new version. Note that the format version in Arrow 2.0.0 is still 1.0.0 so API should still be compatible between 1.0.1 and 2.0.0. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UTs. Closes #30306 from sunchao/SPARK-33213. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 8 ++++---- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 8 ++++---- pom.xml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index dc98de4d8015d..8c1ab9e3c1cfe 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -15,10 +15,10 @@ apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar api-util/1.0.0-M20//api-util-1.0.0-M20.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/1.0.1//arrow-format-1.0.1.jar -arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar -arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar -arrow-vector/1.0.1//arrow-vector-1.0.1.jar +arrow-format/2.0.0//arrow-format-2.0.0.jar +arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar +arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar +arrow-vector/2.0.0//arrow-vector-2.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar automaton/1.11-8//automaton-1.11-8.jar avro-ipc/1.8.2//avro-ipc-1.8.2.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 0d7aeb9a82059..fcb993033221e 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -10,10 +10,10 @@ antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/1.0.1//arrow-format-1.0.1.jar -arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar -arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar -arrow-vector/1.0.1//arrow-vector-1.0.1.jar +arrow-format/2.0.0//arrow-format-2.0.0.jar +arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar +arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar +arrow-vector/2.0.0//arrow-vector-2.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar automaton/1.11-8//automaton-1.11-8.jar avro-ipc/1.8.2//avro-ipc-1.8.2.jar diff --git a/pom.xml b/pom.xml index d0eb0a354627d..25c6da7100056 100644 --- a/pom.xml +++ b/pom.xml @@ -203,7 +203,7 @@ If you are changing Arrow version specification, please check ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too. --> - 1.0.1 + 2.0.0 org.fusesource.leveldbjni From a1f84d8714cd1bd6cc6e2da6eb97fb9f58f3ee8f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 10 Nov 2020 04:43:32 +0000 Subject: [PATCH 0429/1009] [SPARK-33369][SQL] DSV2: Skip schema inference in write if table provider supports external metadata ### What changes were proposed in this pull request? When TableProvider.supportsExternalMetadata() is true, Spark will use the input Dataframe's schema in `DataframeWriter.save()`/`DataStreamWriter.start()` and skip schema/partitioning inference. ### Why are the changes needed? For all the v2 data sources which are not FileDataSourceV2, Spark always infers the table schema/partitioning on `DataframeWriter.save()`/`DataStreamWriter.start()`. The inference of table schema/partitioning can be expensive. However, there is no such trait or flag for indicating a V2 source can use the input DataFrame's schema on `DataframeWriter.save()`/`DataStreamWriter.start()`. We can resolve the problem by adding a new expected behavior for the method `TableProvider.supportsExternalMetadata()`. ### Does this PR introduce _any_ user-facing change? Yes, a new behavior for the data source v2 API `TableProvider.supportsExternalMetadata()` when it returns true. ### How was this patch tested? Unit test Closes #30273 from gengliangwang/supportsExternalMetadata. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../sql/connector/catalog/TableProvider.java | 7 ++-- .../apache/spark/sql/DataFrameWriter.scala | 11 +++--- .../sql/streaming/DataStreamWriter.scala | 10 +++++- ...pache.spark.sql.sources.DataSourceRegister | 1 + .../sql/connector/DataSourceV2Suite.scala | 23 ++++++++++++ .../sources/StreamingDataSourceV2Suite.scala | 36 +++++++++++++++++++ 6 files changed, 80 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java index 82731e2c8e1e8..4881fde06c659 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java @@ -77,8 +77,11 @@ default Transform[] inferPartitioning(CaseInsensitiveStringMap options) { /** * Returns true if the source has the ability of accepting external table metadata when getting - * tables. The external table metadata includes user-specified schema from - * `DataFrameReader`/`DataStreamReader` and schema/partitioning stored in Spark catalog. + * tables. The external table metadata includes: + * 1. For table reader: user-specified schema from `DataFrameReader`/`DataStreamReader` and + * schema/partitioning stored in Spark catalog. + * 2. For table writer: the schema of the input `Dataframe` of + * `DataframeWriter`/`DataStreamWriter`. *

    * By default this method returns false, which means the schema and partitioning passed to * `getTable` are from the infer methods. Please override it if this source has expensive diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index da031b1827dd5..991f02d43bc47 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -325,11 +325,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) def getTable: Table = { - // For file source, it's expensive to infer schema/partition at each write. Here we pass - // the schema of input query and the user-specified partitioning to `getTable`. If the - // query schema is not compatible with the existing data, the write can still success but - // following reads would fail. - if (provider.isInstanceOf[FileDataSourceV2]) { + // If the source accepts external table metadata, here we pass the schema of input query + // and the user-specified partitioning to `getTable`. This is for avoiding + // schema/partitioning inference, which can be very expensive. + // If the query schema is not compatible with the existing data, the behavior is undefined. + // For example, writing file source will success but the following reads will fail. + if (provider.supportsExternalMetadata()) { provider.getTable( df.schema.asNullable, partitioningAsV2.toArray, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 2867bf581df81..d67e175c24dd9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -386,8 +386,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++ optionsWithPath.originalMap val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava) + // If the source accepts external table metadata, here we pass the schema of input query + // to `getTable`. This is for avoiding schema inference, which can be very expensive. + // If the query schema is not compatible with the existing data, the behavior is undefined. + val outputSchema = if (provider.supportsExternalMetadata()) { + Some(df.schema) + } else { + None + } val table = DataSourceV2Utils.getTableFromProvider( - provider, dsOptions, userSpecifiedSchema = None) + provider, dsOptions, userSpecifiedSchema = outputSchema) import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ table match { case table: SupportsWrite if table.supports(STREAMING_WRITE) => diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index 914af589384df..dd22970203b3c 100644 --- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -11,4 +11,5 @@ org.apache.spark.sql.streaming.sources.FakeReadBothModes org.apache.spark.sql.streaming.sources.FakeReadNeitherMode org.apache.spark.sql.streaming.sources.FakeWriteOnly org.apache.spark.sql.streaming.sources.FakeNoWrite +org.apache.spark.sql.streaming.sources.FakeWriteSupportingExternalMetadata org.apache.spark.sql.streaming.sources.FakeWriteSupportProviderV1Fallback diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index ce28e615702db..28cb448c400c7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -157,6 +157,19 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS } } + test("SPARK-33369: Skip schema inference in DataframeWriter.save() if table provider " + + "supports external metadata") { + withTempDir { dir => + val cls = classOf[SupportsExternalMetadataWritableDataSource].getName + spark.range(10).select('id as 'i, -'id as 'j).write.format(cls) + .option("path", dir.getCanonicalPath).mode("append").save() + val schema = new StructType().add("i", "long").add("j", "long") + checkAnswer( + spark.read.format(cls).option("path", dir.getCanonicalPath).schema(schema).load(), + spark.range(10).select('id, -'id)) + } + } + test("partitioning reporting") { import org.apache.spark.sql.functions.{count, sum} Seq(classOf[PartitionAwareDataSource], classOf[JavaPartitionAwareDataSource]).foreach { cls => @@ -771,6 +784,16 @@ class SimpleWriteOnlyDataSource extends SimpleWritableDataSource { } } +class SupportsExternalMetadataWritableDataSource extends SimpleWritableDataSource { + override def supportsExternalMetadata(): Boolean = true + + override def inferSchema(options: CaseInsensitiveStringMap): StructType = { + throw new IllegalArgumentException( + "Dataframe writer should not require inferring table schema the data source supports" + + " external metadata.") + } +} + class ReportStatisticsDataSource extends SimpleWritableDataSource { class MyScanBuilder extends SimpleScanBuilder diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala index 05cf324f8d490..66544a8dc4693 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsRead, SupportsWrite, Table, TableCapability, TableProvider} import org.apache.spark.sql.connector.catalog.TableCapability._ +import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, ScanBuilder} import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, MicroBatchStream, Offset, PartitionOffset} import org.apache.spark.sql.connector.write.{LogicalWriteInfo, PhysicalWriteInfo, WriteBuilder, WriterCommitMessage} @@ -195,6 +196,30 @@ class FakeNoWrite extends DataSourceRegister with SimpleTableProvider { } } +class FakeWriteSupportingExternalMetadata + extends DataSourceRegister + with TableProvider { + override def shortName(): String = "fake-write-supporting-external-metadata" + + override def supportsExternalMetadata(): Boolean = true + + override def inferSchema(options: CaseInsensitiveStringMap): StructType = { + throw new IllegalArgumentException( + "Data stream writer should not require inferring table schema the data source supports" + + " external Metadata.") + } + + override def getTable( + tableSchema: StructType, + partitioning: Array[Transform], + properties: util.Map[String, String]): Table = { + new Table with FakeStreamingWriteTable { + override def name(): String = "fake" + override def schema(): StructType = tableSchema + } + } +} + case class FakeWriteV1FallbackException() extends Exception class FakeSink extends Sink { @@ -314,6 +339,17 @@ class StreamingDataSourceV2Suite extends StreamTest { } } + test("SPARK-33369: Skip schema inference in DataStreamWriter.start() if table provider " + + "supports external metadata") { + testPositiveCaseWithQuery( + "fake-read-microbatch-continuous", "fake-write-supporting-external-metadata", + Trigger.Once()) { v2Query => + val sink = v2Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink + assert(sink.isInstanceOf[Table]) + assert(sink.asInstanceOf[Table].schema() == StructType(Nil)) + } + } + test("disabled v2 write") { // Ensure the V2 path works normally and generates a V2 sink.. testPositiveCaseWithQuery( From 90f6f39e429e0db00e234bdcf679a70dfce3272e Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 10 Nov 2020 05:28:06 +0000 Subject: [PATCH 0430/1009] [SPARK-33366][SQL] Migrate LOAD DATA command to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `LOAD DATA` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `LOAD DATA` is not supported for v2 tables. ### Why are the changes needed? The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior: ```scala sql("CREATE TEMPORARY VIEW t AS SELECT 1") sql("CREATE DATABASE db") sql("CREATE TABLE t (key INT, value STRING) USING hive") sql("USE db") sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE t") // Succeeds ``` With this change, `LOAD DATA` above fails with the following: ``` org.apache.spark.sql.AnalysisException: t is a temp view not table.; line 1 pos 0 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$39(Analyzer.scala:865) at scala.Option.foreach(Option.scala:407) ``` , which is expected since temporary view is resolved first and `LOAD DATA` doesn't support a temporary view. ### Does this PR introduce _any_ user-facing change? After this PR, `LOAD DATA ... t` is resolved to a temp view `t` instead of table `db.t` in the above scenario. ### How was this patch tested? Updated existing tests. Closes #30270 from imback82/load_data_cmd. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 6 +- .../catalyst/plans/logical/statements.scala | 10 --- .../catalyst/plans/logical/v2Commands.scala | 65 +++++++++++-------- .../sql/catalyst/parser/DDLParserSuite.scala | 10 +-- .../analysis/ResolveSessionCatalog.scala | 28 ++++---- .../datasources/v2/DataSourceV2Strategy.scala | 3 + .../sql/connector/DataSourceV2SQLSuite.scala | 8 +-- .../spark/sql/execution/SQLViewSuite.scala | 15 +++-- .../apache/spark/sql/hive/test/TestHive.scala | 5 +- 9 files changed, 82 insertions(+), 68 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c5e8429d49427..07086d1a45aa0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3282,7 +3282,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[LoadDataStatement]]. + * Create a [[LoadData]]. * * For example: * {{{ @@ -3291,8 +3291,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * }}} */ override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) { - LoadDataStatement( - tableName = visitMultipartIdentifier(ctx.multipartIdentifier), + LoadData( + child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), path = string(ctx.path), isLocal = ctx.LOCAL != null, isOverwrite = ctx.OVERWRITE != null, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index e711a6ad434d4..246e7f3bcb959 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -347,16 +347,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends */ case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement -/** - * A LOAD DATA INTO TABLE statement, as parsed from SQL - */ -case class LoadDataStatement( - tableName: Seq[String], - path: String, - isLocal: Boolean, - isOverwrite: Boolean, - partition: Option[TablePartitionSpec]) extends ParsedStatement - /** * A SHOW CREATE TABLE statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 94d4e7ecfac21..b5386f5044452 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -261,7 +261,7 @@ case class ReplaceTableAsSelect( } /** - * The logical plan of the CREATE NAMESPACE command that works for v2 catalogs. + * The logical plan of the CREATE NAMESPACE command. */ case class CreateNamespace( catalog: SupportsNamespaces, @@ -270,7 +270,7 @@ case class CreateNamespace( properties: Map[String, String]) extends Command /** - * The logical plan of the DROP NAMESPACE command that works for v2 catalogs. + * The logical plan of the DROP NAMESPACE command. */ case class DropNamespace( namespace: LogicalPlan, @@ -280,7 +280,7 @@ case class DropNamespace( } /** - * The logical plan of the DESCRIBE NAMESPACE command that works for v2 catalogs. + * The logical plan of the DESCRIBE NAMESPACE command. */ case class DescribeNamespace( namespace: LogicalPlan, @@ -296,7 +296,7 @@ case class DescribeNamespace( /** * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET (DBPROPERTIES|PROPERTIES) - * command that works for v2 catalogs. + * command. */ case class AlterNamespaceSetProperties( namespace: LogicalPlan, @@ -305,8 +305,7 @@ case class AlterNamespaceSetProperties( } /** - * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION - * command that works for v2 catalogs. + * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION command. */ case class AlterNamespaceSetLocation( namespace: LogicalPlan, @@ -315,7 +314,7 @@ case class AlterNamespaceSetLocation( } /** - * The logical plan of the SHOW NAMESPACES command that works for v2 catalogs. + * The logical plan of the SHOW NAMESPACES command. */ case class ShowNamespaces( namespace: LogicalPlan, @@ -327,7 +326,7 @@ case class ShowNamespaces( } /** - * The logical plan of the DESCRIBE relation_name command that works for v2 tables. + * The logical plan of the DESCRIBE relation_name command. */ case class DescribeRelation( relation: LogicalPlan, @@ -338,7 +337,7 @@ case class DescribeRelation( } /** - * The logical plan of the DESCRIBE relation_name col_name command that works for v2 tables. + * The logical plan of the DESCRIBE relation_name col_name command. */ case class DescribeColumn( relation: LogicalPlan, @@ -349,7 +348,7 @@ case class DescribeColumn( } /** - * The logical plan of the DELETE FROM command that works for v2 tables. + * The logical plan of the DELETE FROM command. */ case class DeleteFromTable( table: LogicalPlan, @@ -358,7 +357,7 @@ case class DeleteFromTable( } /** - * The logical plan of the UPDATE TABLE command that works for v2 tables. + * The logical plan of the UPDATE TABLE command. */ case class UpdateTable( table: LogicalPlan, @@ -368,7 +367,7 @@ case class UpdateTable( } /** - * The logical plan of the MERGE INTO command that works for v2 tables. + * The logical plan of the MERGE INTO command. */ case class MergeIntoTable( targetTable: LogicalPlan, @@ -407,7 +406,7 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit } /** - * The logical plan of the DROP TABLE command that works for v2 tables. + * The logical plan of the DROP TABLE command. */ case class DropTable( child: LogicalPlan, @@ -422,7 +421,7 @@ case class DropTable( case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command /** - * The logical plan of the ALTER TABLE command that works for v2 tables. + * The logical plan of the ALTER TABLE command. */ case class AlterTable( catalog: TableCatalog, @@ -454,7 +453,7 @@ case class AlterTable( } /** - * The logical plan of the ALTER TABLE RENAME command that works for v2 tables. + * The logical plan of the ALTER TABLE RENAME command. */ case class RenameTable( catalog: TableCatalog, @@ -462,7 +461,7 @@ case class RenameTable( newIdent: Identifier) extends Command /** - * The logical plan of the SHOW TABLE command that works for v2 catalogs. + * The logical plan of the SHOW TABLE command. */ case class ShowTables( namespace: LogicalPlan, @@ -475,7 +474,7 @@ case class ShowTables( } /** - * The logical plan of the SHOW VIEWS command that works for v1 and v2 catalogs. + * The logical plan of the SHOW VIEWS command. * * Notes: v2 catalogs do not support views API yet, the command will fallback to * v1 ShowViewsCommand during ResolveSessionCatalog. @@ -491,7 +490,7 @@ case class ShowViews( } /** - * The logical plan of the USE/USE NAMESPACE command that works for v2 catalogs. + * The logical plan of the USE/USE NAMESPACE command. */ case class SetCatalogAndNamespace( catalogManager: CatalogManager, @@ -499,14 +498,14 @@ case class SetCatalogAndNamespace( namespace: Option[Seq[String]]) extends Command /** - * The logical plan of the REFRESH TABLE command that works for v2 catalogs. + * The logical plan of the REFRESH TABLE command. */ case class RefreshTable(child: LogicalPlan) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } /** - * The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs. + * The logical plan of the SHOW CURRENT NAMESPACE command. */ case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command { override val output: Seq[Attribute] = Seq( @@ -515,7 +514,7 @@ case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command } /** - * The logical plan of the SHOW TBLPROPERTIES command that works for v2 catalogs. + * The logical plan of the SHOW TBLPROPERTIES command. */ case class ShowTableProperties( table: LogicalPlan, @@ -556,21 +555,21 @@ case class CommentOnTable(child: LogicalPlan, comment: String) extends Command { } /** - * The logical plan of the REFRESH FUNCTION command that works for v2 catalogs. + * The logical plan of the REFRESH FUNCTION command. */ case class RefreshFunction(child: LogicalPlan) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } /** - * The logical plan of the DESCRIBE FUNCTION command that works for v2 catalogs. + * The logical plan of the DESCRIBE FUNCTION command. */ case class DescribeFunction(child: LogicalPlan, isExtended: Boolean) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } /** - * The logical plan of the DROP FUNCTION command that works for v2 catalogs. + * The logical plan of the DROP FUNCTION command. */ case class DropFunction( child: LogicalPlan, @@ -580,7 +579,7 @@ case class DropFunction( } /** - * The logical plan of the SHOW FUNCTIONS command that works for v2 catalogs. + * The logical plan of the SHOW FUNCTIONS command. */ case class ShowFunctions( child: Option[LogicalPlan], @@ -591,7 +590,7 @@ case class ShowFunctions( } /** - * The logical plan of the ANALYZE TABLE command that works for v2 catalogs. + * The logical plan of the ANALYZE TABLE command. */ case class AnalyzeTable( child: LogicalPlan, @@ -601,7 +600,7 @@ case class AnalyzeTable( } /** - * The logical plan of the ANALYZE TABLE FOR COLUMNS command that works for v2 catalogs. + * The logical plan of the ANALYZE TABLE FOR COLUMNS command. */ case class AnalyzeColumn( child: LogicalPlan, @@ -611,3 +610,15 @@ case class AnalyzeColumn( "mutually exclusive. Only one of them should be specified.") override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the LOAD DATA INTO TABLE command. + */ +case class LoadData( + child: LogicalPlan, + path: String, + isLocal: Boolean, + isOverwrite: Boolean, + partition: Option[TablePartitionSpec]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index aca7602bdbcb0..085aaf148c8cd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1605,15 +1605,15 @@ class DDLParserSuite extends AnalysisTest { test("LOAD DATA INTO table") { comparePlans( parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"), - LoadDataStatement(Seq("a", "b", "c"), "filepath", false, false, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", false, false, None)) comparePlans( parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"), - LoadDataStatement(Seq("a", "b", "c"), "filepath", true, false, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, false, None)) comparePlans( parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"), - LoadDataStatement(Seq("a", "b", "c"), "filepath", true, true, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, None)) comparePlans( parsePlan( @@ -1621,8 +1621,8 @@ class DDLParserSuite extends AnalysisTest { |LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c |PARTITION(ds='2017-06-10') """.stripMargin), - LoadDataStatement( - Seq("a", "b", "c"), + LoadData( + UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 610632ac9256e..59652229a2b2e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -312,8 +312,8 @@ class ResolveSessionCatalog( ignoreIfExists = c.ifNotExists) } - case RefreshTable(r @ ResolvedTable(_, _, _: V1Table)) if isSessionCatalog(r.catalog) => - RefreshTableCommand(r.identifier.asTableIdentifier) + case RefreshTable(ResolvedV1TableIdentifier(ident)) => + RefreshTableCommand(ident.asTableIdentifier) case RefreshTable(r: ResolvedView) => RefreshTableCommand(r.identifier.asTableIdentifier) @@ -358,9 +358,8 @@ class ResolveSessionCatalog( orCreate = c.orCreate) } - case DropTable( - r @ ResolvedTable(_, _, _: V1Table), ifExists, purge) if isSessionCatalog(r.catalog) => - DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge) + case DropTable(ResolvedV1TableIdentifier(ident), ifExists, purge) => + DropTableCommand(ident.asTableIdentifier, ifExists, isView = false, purge = purge) // v1 DROP TABLE supports temp view. case DropTable(r: ResolvedView, ifExists, purge) => @@ -427,10 +426,9 @@ class ResolveSessionCatalog( v1TableName.asTableIdentifier, "MSCK REPAIR TABLE") - case LoadDataStatement(tbl, path, isLocal, isOverwrite, partition) => - val v1TableName = parseV1Table(tbl, "LOAD DATA") + case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) => LoadDataCommand( - v1TableName.asTableIdentifier, + ident.asTableIdentifier, path, isLocal, isOverwrite, @@ -573,9 +571,8 @@ class ResolveSessionCatalog( "SHOW VIEWS, only SessionCatalog supports this command.") } - case ShowTableProperties( - r @ ResolvedTable(_, _, _: V1Table), propertyKey) if isSessionCatalog(r.catalog) => - ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey) + case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey) => + ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey) case ShowTableProperties(r: ResolvedView, propertyKey) => ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey) @@ -696,9 +693,16 @@ class ResolveSessionCatalog( } } - object ResolvedV1TableOrViewIdentifier { + object ResolvedV1TableIdentifier { def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident) + case _ => None + } + } + + object ResolvedV1TableOrViewIdentifier { + def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { + case ResolvedV1TableIdentifier(ident) => Some(ident) case ResolvedView(ident, _) => Some(ident) case _ => None } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 648929eaa33ce..817b3cecf03e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -283,6 +283,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) => throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.") + case LoadData(_: ResolvedTable, _, _, _, _) => + throw new AnalysisException("LOAD DATA is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 444daf8233c67..ee3f7bed7ca9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2074,10 +2074,10 @@ class DataSourceV2SQLSuite |PARTITIONED BY (id) """.stripMargin) - testV1Command("LOAD DATA", s"INPATH 'filepath' INTO TABLE $t") - testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' INTO TABLE $t") - testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t") - testV1Command("LOAD DATA", + testNotSupportedV2Command("LOAD DATA", s"INPATH 'filepath' INTO TABLE $t") + testNotSupportedV2Command("LOAD DATA", s"LOCAL INPATH 'filepath' INTO TABLE $t") + testNotSupportedV2Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t") + testNotSupportedV2Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t PARTITION(id=1)") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 7a6b0b8d6dd9f..8889ea177720e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -168,17 +168,20 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val dataFilePath = Thread.currentThread().getContextClassLoader.getResource("data/files/employee.dat") - assertNoSuchTable(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") - assertNoSuchTable(s"TRUNCATE TABLE $viewName") val e2 = intercept[AnalysisException] { + sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") + }.getMessage + assert(e2.contains(s"$viewName is a temp view not table")) + assertNoSuchTable(s"TRUNCATE TABLE $viewName") + val e3 = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") }.getMessage - assert(e2.contains("SHOW CREATE TABLE is not supported on a temporary view")) + assert(e3.contains("SHOW CREATE TABLE is not supported on a temporary view")) assertNoSuchTable(s"SHOW PARTITIONS $viewName") - val e3 = intercept[AnalysisException] { + val e4 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") }.getMessage - assert(e3.contains(s"$viewName is a temp view not table or permanent view")) + assert(e4.contains(s"$viewName is a temp view not table or permanent view")) assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") } } @@ -208,7 +211,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { e = intercept[AnalysisException] { sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") }.getMessage - assert(e.contains(s"Target table in LOAD DATA cannot be a view: `default`.`testview`")) + assert(e.contains("default.testView is a view not table")) e = intercept[AnalysisException] { sql(s"TRUNCATE TABLE $viewName") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 082aa8d765e9c..10cb200550499 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -496,7 +496,10 @@ private[hive] class TestHiveSparkSession( def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables def loadTestTable(name: String): Unit = { - if (!sharedState.loadedTables.contains(name)) { + // LOAD DATA does not work on temporary views. Since temporary views are resolved first, + // skip loading if there exists a temporary view with the given name. + if (sessionState.catalog.getTempView(name).isEmpty && + !sharedState.loadedTables.contains(name)) { // Marks the table as loaded first to prevent infinite mutually recursive table loading. sharedState.loadedTables += name logDebug(s"Loading test table $name") From ad02ceda29c60f9c6e0430caff0d174558c0c661 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Tue, 10 Nov 2020 05:46:45 +0000 Subject: [PATCH 0431/1009] [SPARK-33244][SQL] Unify the code paths for spark.table and spark.read.table ### What changes were proposed in this pull request? - Call `spark.read.table` in `spark.table`. - Add comments for `spark.table` to emphasize it also support streaming temp view reading. ### Why are the changes needed? The code paths of `spark.table` and `spark.read.table` should be the same. This behavior is broke in SPARK-32592 since we need to respect options in `spark.read.table` API. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT. Closes #30148 from xuanyuanking/SPARK-33244. Authored-by: Yuanjian Li Signed-off-by: Wenchen Fan --- .../scala/org/apache/spark/sql/DataFrameReader.scala | 12 ++++++++++-- .../scala/org/apache/spark/sql/SparkSession.scala | 11 +++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index bd986d0138256..276d5d29bfa2c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -825,8 +825,16 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { def orc(paths: String*): DataFrame = format("orc").load(paths: _*) /** - * Returns the specified table as a `DataFrame`. - * + * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch + * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view, + * the returned DataFrame is simply the query plan of the view, which can either be a batch or + * streaming query plan. + * + * @param tableName is either a qualified or unqualified name that designates a table or view. + * If a database is specified, it identifies the table/view from the database. + * Otherwise, it first attempts to find a temporary view with the given name + * and then match the table/view from the current database. + * Note that, the global temporary view database is also valid here. * @since 1.4.0 */ def table(tableName: String): DataFrame = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 592f209475baf..d738d617f2315 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -573,7 +573,10 @@ class SparkSession private( @transient lazy val catalog: Catalog = new CatalogImpl(self) /** - * Returns the specified table/view as a `DataFrame`. + * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch + * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view, + * the returned DataFrame is simply the query plan of the view, which can either be a batch or + * streaming query plan. * * @param tableName is either a qualified or unqualified name that designates a table or view. * If a database is specified, it identifies the table/view from the database. @@ -583,11 +586,7 @@ class SparkSession private( * @since 2.0.0 */ def table(tableName: String): DataFrame = { - table(sessionState.sqlParser.parseMultipartIdentifier(tableName)) - } - - private[sql] def table(multipartIdentifier: Seq[String]): DataFrame = { - Dataset.ofRows(self, UnresolvedRelation(multipartIdentifier)) + read.table(tableName) } private[sql] def table(tableIdent: TableIdentifier): DataFrame = { From e3a768dd79558b04f6ae71380876bcde2354008c Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Tue, 10 Nov 2020 07:23:47 +0000 Subject: [PATCH 0432/1009] [SPARK-33391][SQL] element_at with CreateArray not respect one based index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? element_at with CreateArray not respect one based index. repo step: ``` var df = spark.sql("select element_at(array(3, 2, 1), 0)") df.printSchema() df = spark.sql("select element_at(array(3, 2, 1), 1)") df.printSchema() df = spark.sql("select element_at(array(3, 2, 1), 2)") df.printSchema() df = spark.sql("select element_at(array(3, 2, 1), 3)") df.printSchema() root – element_at(array(3, 2, 1), 0): integer (nullable = false) root – element_at(array(3, 2, 1), 1): integer (nullable = false) root – element_at(array(3, 2, 1), 2): integer (nullable = false) root – element_at(array(3, 2, 1), 3): integer (nullable = true) correct answer should be 0 true which is outOfBounds return default true. 1 false 2 false 3 false ``` For expression eval, it respect the oneBasedIndex, but within checking the nullable, it calculates with zeroBasedIndex using `computeNullabilityFromArray`. ### Why are the changes needed? Correctness issue. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added UT and existing UT. Closes #30296 from leanken/leanken-SPARK-33391. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- .../expressions/collectionOperations.scala | 30 +++++++++++++++ .../CollectionExpressionsSuite.scala | 38 +++++++++++++++---- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 8719b2e065663..cb081b80ba096 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1965,6 +1965,36 @@ case class ElementAt(left: Expression, right: Expression) } } + private def nullability(elements: Seq[Expression], ordinal: Int): Boolean = { + if (ordinal == 0) { + false + } else if (elements.length < math.abs(ordinal)) { + true + } else { + if (ordinal < 0) { + elements(elements.length + ordinal).nullable + } else { + elements(ordinal - 1).nullable + } + } + } + + override def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = { + if (ordinal.foldable && !ordinal.nullable) { + val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue() + child match { + case CreateArray(ar, _) => + nullability(ar, intOrdinal) + case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) => + nullability(elements, intOrdinal) || field.nullable + case _ => + true + } + } else { + true + } + } + override def nullable: Boolean = left.dataType match { case _: ArrayType => computeNullabilityFromArray(left, right) case _: MapType => true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 856c1fad9b204..d59d13d49cef4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1122,11 +1122,18 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val a = AttributeReference("a", IntegerType, nullable = false)() val b = AttributeReference("b", IntegerType, nullable = true)() val array = CreateArray(a :: b :: Nil) - assert(!ElementAt(array, Literal(0)).nullable) - assert(ElementAt(array, Literal(1)).nullable) - assert(!ElementAt(array, Subtract(Literal(2), Literal(2))).nullable) + assert(!ElementAt(array, Literal(1)).nullable) + assert(!ElementAt(array, Literal(-2)).nullable) + assert(ElementAt(array, Literal(2)).nullable) + assert(ElementAt(array, Literal(-1)).nullable) + assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable) assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable) + // CreateArray case invalid indices + assert(!ElementAt(array, Literal(0)).nullable) + assert(ElementAt(array, Literal(4)).nullable) + assert(ElementAt(array, Literal(-4)).nullable) + // GetArrayStructFields case val f1 = StructField("a", IntegerType, nullable = false) val f2 = StructField("b", IntegerType, nullable = true) @@ -1135,19 +1142,34 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val inputArray1 = CreateArray(c :: Nil) val inputArray1ContainsNull = c.nullable val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull) - assert(!ElementAt(stArray1, Literal(0)).nullable) + assert(!ElementAt(stArray1, Literal(1)).nullable) + assert(!ElementAt(stArray1, Literal(-1)).nullable) val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull) - assert(ElementAt(stArray2, Literal(0)).nullable) + assert(ElementAt(stArray2, Literal(1)).nullable) + assert(ElementAt(stArray2, Literal(-1)).nullable) val d = AttributeReference("d", structType, nullable = true)() val inputArray2 = CreateArray(c :: d :: Nil) val inputArray2ContainsNull = c.nullable || d.nullable val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull) - assert(!ElementAt(stArray3, Literal(0)).nullable) - assert(ElementAt(stArray3, Literal(1)).nullable) + assert(!ElementAt(stArray3, Literal(1)).nullable) + assert(!ElementAt(stArray3, Literal(-2)).nullable) + assert(ElementAt(stArray3, Literal(2)).nullable) + assert(ElementAt(stArray3, Literal(-1)).nullable) val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull) - assert(ElementAt(stArray4, Literal(0)).nullable) assert(ElementAt(stArray4, Literal(1)).nullable) + assert(ElementAt(stArray4, Literal(-2)).nullable) + assert(ElementAt(stArray4, Literal(2)).nullable) + assert(ElementAt(stArray4, Literal(-1)).nullable) + + // GetArrayStructFields case invalid indices + assert(!ElementAt(stArray3, Literal(0)).nullable) + assert(ElementAt(stArray3, Literal(4)).nullable) + assert(ElementAt(stArray3, Literal(-4)).nullable) + + assert(ElementAt(stArray4, Literal(0)).nullable) + assert(ElementAt(stArray4, Literal(4)).nullable) + assert(ElementAt(stArray4, Literal(-4)).nullable) } test("Concat") { From 27bb40b6297361985e3590687f0332a72b71bc85 Mon Sep 17 00:00:00 2001 From: lrz Date: Tue, 10 Nov 2020 19:39:18 +0900 Subject: [PATCH 0433/1009] [SPARK-33339][PYTHON] Pyspark application will hang due to non Exception error ### What changes were proposed in this pull request? When a system.exit exception occurs during the process, the python worker exits abnormally, and then the executor task is still waiting for the worker for reading from socket, causing it to hang. The system.exit exception may be caused by the user's error code, but spark should at least throw an error to remind the user, not get stuck we can run a simple test to reproduce this case: ``` from pyspark.sql import SparkSession def err(line): raise SystemExit spark = SparkSession.builder.appName("test").getOrCreate() spark.sparkContext.parallelize(range(1,2), 2).map(err).collect() spark.stop() ``` ### Why are the changes needed? to make sure pyspark application won't hang if there's non-Exception error in python worker ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? added a new test and also manually tested the case above Closes #30248 from li36909/pyspark. Lead-authored-by: lrz Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- python/pyspark/tests/test_worker.py | 9 +++++++++ python/pyspark/worker.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py index 8039c0661dd0b..d7a4b84e8dc41 100644 --- a/python/pyspark/tests/test_worker.py +++ b/python/pyspark/tests/test_worker.py @@ -95,6 +95,15 @@ def raise_exception(_): self.assertRaises(Exception, lambda: rdd.foreach(raise_exception)) self.assertEqual(100, rdd.map(str).count()) + def test_after_non_exception_error(self): + # SPARK-33339: Pyspark application will hang due to non Exception + def raise_system_exit(_): + raise SystemExit() + rdd = self.sc.parallelize(range(100), 1) + with QuietTest(self.sc): + self.assertRaises(Exception, lambda: rdd.foreach(raise_system_exit)) + self.assertEqual(100, rdd.map(str).count()) + def test_after_jvm_exception(self): tempFile = tempfile.NamedTemporaryFile(delete=False) tempFile.write(b"Hello World!") diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 84e5cca5d3c00..6362839d96242 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -604,7 +604,7 @@ def process(): # reuse. TaskContext._setTaskContext(None) BarrierTaskContext._setTaskContext(None) - except Exception: + except BaseException: try: exc_info = traceback.format_exc() if isinstance(exc_info, bytes): @@ -618,7 +618,7 @@ def process(): except IOError: # JVM close the socket pass - except Exception: + except BaseException: # Write the error to stderr if it happened while serializing print("PySpark worker failed with exception:", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) From 4934da56bcc13fc61afc8e8cc44fb5290b5e7b32 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 10 Nov 2020 14:37:42 +0000 Subject: [PATCH 0434/1009] [SPARK-33305][SQL] DSv2: DROP TABLE command should also invalidate cache ### What changes were proposed in this pull request? This changes `DropTableExec` to also invalidate caches referencing the table to be dropped, in a cascading manner. ### Why are the changes needed? In DSv1, `DROP TABLE` command also invalidate caches as described in [SPARK-19765](https://issues.apache.org/jira/browse/SPARK-19765). However in DSv2 the same command only drops the table but doesn't handle the caches. This could lead to correctness issue. ### Does this PR introduce _any_ user-facing change? Yes. Now DSv2 `DROP TABLE` command also invalidates cache. ### How was this patch tested? Added a new UT Closes #30211 from sunchao/SPARK-33305. Authored-by: Chao Sun Signed-off-by: Wenchen Fan --- .../datasources/v2/DataSourceV2Strategy.scala | 2 +- .../execution/datasources/v2/DropTableExec.scala | 7 ++++++- .../sql/connector/DataSourceV2SQLSuite.scala | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 817b3cecf03e2..5695d232fae54 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -229,7 +229,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException("Describing columns is not supported for v2 tables.") case DropTable(r: ResolvedTable, ifExists, purge) => - DropTableExec(r.catalog, r.identifier, ifExists, purge) :: Nil + DropTableExec(session, r.catalog, r.table, r.identifier, ifExists, purge) :: Nil case _: NoopDropTable => LocalTableScanExec(Nil, Nil) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 1fd0cd177478b..068475fc56f47 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -17,22 +17,27 @@ package org.apache.spark.sql.execution.datasources.v2 +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} /** * Physical plan node for dropping a table. */ case class DropTableExec( + session: SparkSession, catalog: TableCatalog, + table: Table, ident: Identifier, ifExists: Boolean, purge: Boolean) extends V2CommandExec { override def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { + val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) + session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) catalog.dropTable(ident, purge) } else if (!ifExists) { throw new NoSuchTableException(ident) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index ee3f7bed7ca9f..dfa32b9ac802e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -784,6 +784,22 @@ class DataSourceV2SQLSuite } } + test("SPARK-33305: DROP TABLE should also invalidate cache") { + val t = "testcat.ns.t" + val view = "view" + withTable(t) { + withTempView(view) { + sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) + checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) + + sql(s"DROP TABLE $t") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } + test("Relation: basic") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { From 34f5e7ce77647d3b5eb11700566e0bbce73960e2 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 10 Nov 2020 14:40:24 +0000 Subject: [PATCH 0435/1009] [SPARK-33302][SQL] Push down filters through Expand ### What changes were proposed in this pull request? Push down filter through expand. For case below: ``` create table t1(pid int, uid int, sid int, dt date, suid int) using parquet; create table t2(pid int, vs int, uid int, csid int) using parquet; SELECT years, appversion, SUM(uusers) AS users FROM (SELECT Date_trunc('year', dt) AS years, CASE WHEN h.pid = 3 THEN 'iOS' WHEN h.pid = 4 THEN 'Android' ELSE 'Other' END AS viewport, h.vs AS appversion, Count(DISTINCT u.uid) AS uusers ,Count(DISTINCT u.suid) AS srcusers FROM t1 u join t2 h ON h.uid = u.uid GROUP BY 1, 2, 3) AS a WHERE viewport = 'iOS' GROUP BY 1, 2 ``` Plan. before this pr: ``` == Physical Plan == *(5) HashAggregate(keys=[years#30, appversion#32], functions=[sum(uusers#33L)]) +- Exchange hashpartitioning(years#30, appversion#32, 200), true, [id=#251] +- *(4) HashAggregate(keys=[years#30, appversion#32], functions=[partial_sum(uusers#33L)]) +- *(4) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12], functions=[count(if ((gid#44 = 1)) u.`uid`#47 else null)]) +- Exchange hashpartitioning(date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, 200), true, [id=#246] +- *(3) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12], functions=[partial_count(if ((gid#44 = 1)) u.`uid`#47 else null)]) +- *(3) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44], functions=[]) +- Exchange hashpartitioning(date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44, 200), true, [id=#241] +- *(2) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44], functions=[]) +- *(2) Filter (CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46 = iOS) +- *(2) Expand [ArrayBuffer(date_trunc(year, cast(dt#9 as timestamp), Some(Etc/GMT+7)), CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END, vs#12, uid#7, null, 1), ArrayBuffer(date_trunc(year, cast(dt#9 as timestamp), Some(Etc/GMT+7)), CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END, vs#12, null, suid#10, 2)], [date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44] +- *(2) Project [uid#7, dt#9, suid#10, pid#11, vs#12] +- *(2) BroadcastHashJoin [uid#7], [uid#13], Inner, BuildRight :- *(2) Project [uid#7, dt#9, suid#10] : +- *(2) Filter isnotnull(uid#7) : +- *(2) ColumnarToRow : +- FileScan parquet default.t1[uid#7,dt#9,suid#10] Batched: true, DataFilters: [isnotnull(uid#7)], Format: Parquet, Location: InMemoryFileIndex[file:/root/spark-3.0.0-bin-hadoop3.2/spark-warehouse/t1], PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))), [id=#233] +- *(1) Project [pid#11, vs#12, uid#13] +- *(1) Filter isnotnull(uid#13) +- *(1) ColumnarToRow +- FileScan parquet default.t2[pid#11,vs#12,uid#13] Batched: true, DataFilters: [isnotnull(uid#13)], Format: Parquet, Location: InMemoryFileIndex[file:/root/spark-3.0.0-bin-hadoop3.2/spark-warehouse/t2], PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct ``` Plan. after. this pr. : ``` == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- HashAggregate(keys=[years#0, appversion#2], functions=[sum(uusers#3L)], output=[years#0, appversion#2, users#5L]) +- Exchange hashpartitioning(years#0, appversion#2, 5), true, [id=#71] +- HashAggregate(keys=[years#0, appversion#2], functions=[partial_sum(uusers#3L)], output=[years#0, appversion#2, sum#22L]) +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12], functions=[count(distinct uid#7)], output=[years#0, appversion#2, uusers#3L]) +- Exchange hashpartitioning(date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, 5), true, [id=#67] +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12], functions=[partial_count(distinct uid#7)], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, count#27L]) +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7], functions=[], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7]) +- Exchange hashpartitioning(date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7, 5), true, [id=#63] +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles)) AS date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END AS CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7], functions=[], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7]) +- Project [uid#7, dt#9, pid#11, vs#12] +- BroadcastHashJoin [uid#7], [uid#13], Inner, BuildRight, false :- Filter isnotnull(uid#7) : +- FileScan parquet default.t1[uid#7,dt#9] Batched: true, DataFilters: [isnotnull(uid#7)], Format: Parquet, Location: InMemoryFileIndex[file:/private/var/folders/4l/7_c5c97s1_gb0d9_d6shygx00000gn/T/warehouse-c069d87..., PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#58] +- Filter ((CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END = iOS) AND isnotnull(uid#13)) +- FileScan parquet default.t2[pid#11,vs#12,uid#13] Batched: true, DataFilters: [(CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END = iOS), isnotnull..., Format: Parquet, Location: InMemoryFileIndex[file:/private/var/folders/4l/7_c5c97s1_gb0d9_d6shygx00000gn/T/warehouse-c069d87..., PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct ``` ### Why are the changes needed? Improve performance, filter more data. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #30278 from AngersZhuuuu/SPARK-33302. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../optimizer/FilterPushdownSuite.scala | 24 ++++++++++++++++++- .../LeftSemiAntiJoinPushDownSuite.scala | 15 ++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 9519a56c2817a..51f7799b1e427 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1269,6 +1269,7 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe case _: Sort => true case _: BatchEvalPython => true case _: ArrowEvalPython => true + case _: Expand => true case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index 156313300eef9..11ec037c94f73 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{BooleanType, IntegerType, TimestampType} +import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, TimestampType} import org.apache.spark.unsafe.types.CalendarInterval class FilterPushdownSuite extends PlanTest { @@ -1208,6 +1208,28 @@ class FilterPushdownSuite extends PlanTest { checkAnalysis = false) } + test("push down predicate through expand") { + val query = + Filter('a > 1, + Expand( + Seq( + Seq('a, 'b, 'c, Literal.create(null, StringType), 1), + Seq('a, 'b, 'c, 'a, 2)), + Seq('a, 'b, 'c), + testRelation)).analyze + val optimized = Optimize.execute(query) + + val expected = + Expand( + Seq( + Seq('a, 'b, 'c, Literal.create(null, StringType), 1), + Seq('a, 'b, 'c, 'a, 2)), + Seq('a, 'b, 'c), + Filter('a > 1, testRelation)).analyze + + comparePlans(optimized, expected) + } + test("SPARK-28345: PythonUDF predicate should be able to pushdown to join") { val pythonUDFJoinCond = { val pythonUDF = PythonUDF("pythonUDF", null, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala index a3da9f73ebd40..729a1e9f06ca5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala @@ -315,6 +315,21 @@ class LeftSemiPushdownSuite extends PlanTest { comparePlans(optimized, originalQuery.analyze) } + test("Unary: LeftSemi join push down through Expand") { + val expand = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)), + Seq('a, 'b, 'c), testRelation) + val originalQuery = expand + .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1)) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)), + Seq('a, 'b, 'c), testRelation + .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1))) + .analyze + + comparePlans(optimized, correctAnswer) + } + Seq(Some('d === 'e), None).foreach { case innerJoinCond => Seq(LeftSemi, LeftAnti).foreach { case outerJT => Seq(Inner, LeftOuter, Cross, RightOuter).foreach { case innerJT => From 3165ca742a7508dca35a1e40b303c337939df86f Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 10 Nov 2020 15:41:04 +0000 Subject: [PATCH 0436/1009] [SPARK-33376][SQL] Remove the option of "sharesHadoopClasses" in Hive IsolatedClientLoader ### What changes were proposed in this pull request? This removes the `sharesHadoopClasses` flag from `IsolatedClientLoader` in Hive module. ### Why are the changes needed? Currently, when initializing `IsolatedClientLoader`, users can set the `sharesHadoopClasses` flag to decide whether the `HiveClient` created should share Hadoop classes with Spark itself or not. In the latter case, the client will only load Hadoop classes from the Hive dependencies. There are two reasons to remove this: 1. this feature is currently used in two cases: 1) unit tests, 2) when the Hadoop version defined in Maven can not be found when `spark.sql.hive.metastore.jars` is equal to "maven", which could be very rare. 2. when `sharesHadoopClasses` is false, Spark doesn't really only use Hadoop classes from Hive jars: we also download `hadoop-client` jar and put all the sub-module jars (e.g., `hadoop-common`, `hadoop-hdfs`) together with the Hive jars, and the Hadoop version used by `hadoop-client` is the same version used by Spark itself. As result, we're mixing two versions of Hadoop jars in the classpath, which could potentially cause issues, especially considering that the default Hadoop version is already 3.2.0 while most Hive versions supported by the `IsolatedClientLoader` is still using Hadoop 2.x or even lower. ### Does this PR introduce _any_ user-facing change? This affects Spark users in one scenario: when `spark.sql.hive.metastore.jars` is set to `maven` AND the Hadoop version specified in pom file cannot be downloaded, currently the behavior is to switch to _not_ share Hadoop classes, but with the PR it will share Hadoop classes with Spark. ### How was this patch tested? Existing UTs. Closes #30284 from sunchao/SPARK-33376. Authored-by: Chao Sun Signed-off-by: Wenchen Fan --- .../sql/hive/client/IsolatedClientLoader.scala | 16 ++++------------ .../sql/hive/client/HadoopVersionInfoSuite.scala | 3 +-- .../sql/hive/client/HiveClientBuilder.scala | 6 ++---- .../client/HivePartitionFilteringSuite.scala | 4 ---- .../spark/sql/hive/client/HiveVersionSuite.scala | 7 ++----- 5 files changed, 9 insertions(+), 27 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index f9946fe8e0616..9663e03ee6a74 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -52,12 +52,9 @@ private[hive] object IsolatedClientLoader extends Logging { config: Map[String, String] = Map.empty, ivyPath: Option[String] = None, sharedPrefixes: Seq[String] = Seq.empty, - barrierPrefixes: Seq[String] = Seq.empty, - sharesHadoopClasses: Boolean = true): IsolatedClientLoader = synchronized { + barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized { val resolvedVersion = hiveVersion(hiveMetastoreVersion) - // We will first try to share Hadoop classes. If we cannot resolve the Hadoop artifact - // with the given version, we will use Hadoop 2.7 and then will not share Hadoop classes. - var _sharesHadoopClasses = sharesHadoopClasses + // We will use Hadoop 2.7 if we cannot resolve the Hadoop artifact. val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) { resolvedVersions((resolvedVersion, hadoopVersion)) } else { @@ -72,10 +69,8 @@ private[hive] object IsolatedClientLoader extends Logging { val fallbackVersion = "2.7.4" logWarning(s"Failed to resolve Hadoop artifacts for the version $hadoopVersion. We " + s"will change the hadoop version from $hadoopVersion to $fallbackVersion and try " + - "again. Hadoop classes will not be shared between Spark and Hive metastore client. " + - "It is recommended to set jars used by Hive metastore client through " + + "again. It is recommended to set jars used by Hive metastore client through " + "spark.sql.hive.metastore.jars in the production environment.") - _sharesHadoopClasses = false (downloadVersion( resolvedVersion, fallbackVersion, ivyPath, remoteRepos), fallbackVersion) } @@ -89,7 +84,6 @@ private[hive] object IsolatedClientLoader extends Logging { execJars = files, hadoopConf = hadoopConf, config = config, - sharesHadoopClasses = _sharesHadoopClasses, sharedPrefixes = sharedPrefixes, barrierPrefixes = barrierPrefixes) } @@ -177,7 +171,6 @@ private[hive] object IsolatedClientLoader extends Logging { * @param config A set of options that will be added to the HiveConf of the constructed client. * @param isolationOn When true, custom versions of barrier classes will be constructed. Must be * true unless loading the version of hive that is on Spark's classloader. - * @param sharesHadoopClasses When true, we will share Hadoop classes between Spark and * @param baseClassLoader The spark classloader that is used to load shared classes. */ private[hive] class IsolatedClientLoader( @@ -187,7 +180,6 @@ private[hive] class IsolatedClientLoader( val execJars: Seq[URL] = Seq.empty, val config: Map[String, String] = Map.empty, val isolationOn: Boolean = true, - val sharesHadoopClasses: Boolean = true, val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader, val sharedPrefixes: Seq[String] = Seq.empty, val barrierPrefixes: Seq[String] = Seq.empty) @@ -204,7 +196,7 @@ private[hive] class IsolatedClientLoader( name.startsWith("org.apache.log4j") || // log4j1.x name.startsWith("org.apache.logging.log4j") || // log4j2 name.startsWith("org.apache.spark.") || - (sharesHadoopClasses && isHadoopClass) || + isHadoopClass || name.startsWith("scala.") || (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) || name.startsWith("java.") || diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index 65492abf38cc0..8d55356da28e6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -49,8 +49,7 @@ class HadoopVersionInfoSuite extends SparkFunSuite { sparkConf = new SparkConf(), hadoopConf = hadoopConf, config = HiveClientBuilder.buildConf(Map.empty), - ivyPath = Some(ivyPath.getCanonicalPath), - sharesHadoopClasses = true) + ivyPath = Some(ivyPath.getCanonicalPath)) val jars = client.classLoader.getParent.asInstanceOf[URLClassLoader].getURLs .map(u => new File(u.toURI)) // Drop all Hadoop jars to use the existing Hadoop jars on the classpath diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala index 2ad3afcb214b3..f40b4f00d9fd0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala @@ -46,15 +46,13 @@ private[client] object HiveClientBuilder { def buildClient( version: String, hadoopConf: Configuration, - extraConf: Map[String, String] = Map.empty, - sharesHadoopClasses: Boolean = true): HiveClient = { + extraConf: Map[String, String] = Map.empty): HiveClient = { IsolatedClientLoader.forVersion( hiveMetastoreVersion = version, hadoopVersion = VersionInfo.getVersion, sparkConf = new SparkConf(), hadoopConf = hadoopConf, config = buildConf(extraConf), - ivyPath = ivyPath, - sharesHadoopClasses = sharesHadoopClasses).createClient() + ivyPath = ivyPath).createClient() } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index 2d615f6fdc261..7e10d498d0413 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -272,10 +272,6 @@ class HivePartitionFilteringSuite(version: String) day1 :: day2 :: Nil) } - test("create client with sharesHadoopClasses = false") { - buildClient(new Configuration(), sharesHadoopClasses = false) - } - private def testMetastorePartitionFiltering( filterExpr: Expression, expectedDs: Seq[Int], diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala index dd58c302e0197..02e9b7fb151fd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala @@ -28,9 +28,7 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu override protected val enableAutoThreadAudit = false protected var client: HiveClient = null - protected def buildClient( - hadoopConf: Configuration, - sharesHadoopClasses: Boolean = true): HiveClient = { + protected def buildClient(hadoopConf: Configuration): HiveClient = { // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and // hive.metastore.schema.verification from false to true since 2.0 // For details, see the JIRA HIVE-6113 and HIVE-12463 @@ -46,8 +44,7 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu HiveClientBuilder.buildClient( version, hadoopConf, - HiveUtils.formatTimeVarsForHiveClient(hadoopConf), - sharesHadoopClasses = sharesHadoopClasses) + HiveUtils.formatTimeVarsForHiveClient(hadoopConf)) } override def suiteName: String = s"${super.suiteName}($version)" From 122c8999cbf2a1f9484ae973864a843cfa32b6c6 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 10 Nov 2020 09:17:00 -0800 Subject: [PATCH 0437/1009] [SPARK-33251][FOLLOWUP][PYTHON][DOCS][MINOR] Adjusts returns PrefixSpan.findFrequentSequentialPatterns ### What changes were proposed in this pull request? Changes pyspark.sql.dataframe.DataFrame to :py:class:`pyspark.sql.DataFrame` ### Why are the changes needed? Consistency (see https://github.com/apache/spark/pull/30285#pullrequestreview-526764104). ### Does this PR introduce _any_ user-facing change? User will see shorter reference with a link. ### How was this patch tested? `dev/lint-python` and manual check of the rendered docs. Closes #30313 from zero323/SPARK-33251-FOLLOW-UP. Authored-by: zero323 Signed-off-by: Huaxin Gao --- python/pyspark/ml/fpm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index 77e610f49410d..d36b9efb8cce0 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -443,7 +443,7 @@ def findFrequentSequentialPatterns(self, dataset): Returns ------- - pyspark.sql.dataframe.DataFrame + :py:class:`pyspark.sql.DataFrame` A `DataFrame` that contains columns of sequence and corresponding frequency. The schema of it will be: From 6fa80ed1dd43c2ecd092c10933330b501641c51b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 10 Nov 2020 16:17:00 -0800 Subject: [PATCH 0438/1009] [SPARK-33337][SQL] Support subexpression elimination in branches of conditional expressions ### What changes were proposed in this pull request? Currently we skip subexpression elimination in branches of conditional expressions including `If`, `CaseWhen`, and `Coalesce`. Actually we can do subexpression elimination for such branches if the subexpression is common across all branches. This patch proposes to support subexpression elimination in branches of conditional expressions. ### Why are the changes needed? We may miss subexpression elimination chances in branches of conditional expressions. This kind of subexpression is frequently seen. It may be written manually by users or come from query optimizer. For example, project collapsing could embed expressions between two `Project`s and produces conditional expression like: ``` CASE WHEN jsonToStruct(json).a = '1' THEN 1.0 WHEN jsonToStruct(json).a = '2' THEN 2.0 ... ELSE 1.2 END ``` If `jsonToStruct(json)` is time-expensive expression, we don't eliminate the duplication and waste time on running it repeatedly now. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30245 from viirya/SPARK-33337. Authored-by: Liang-Chi Hsieh Signed-off-by: Liang-Chi Hsieh --- .../expressions/EquivalentExpressions.scala | 96 +++++++++++---- .../expressions/codegen/CodeGenerator.scala | 2 +- .../SubexpressionEliminationSuite.scala | 111 ++++++++++++++++-- 3 files changed, 177 insertions(+), 32 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala index 458c48df6d0c8..1dfff412d9a8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala @@ -65,11 +65,82 @@ class EquivalentExpressions { } } + private def addExprToSet(expr: Expression, set: mutable.Set[Expr]): Boolean = { + if (expr.deterministic) { + val e = Expr(expr) + if (set.contains(e)) { + true + } else { + set.add(e) + false + } + } else { + false + } + } + + /** + * Adds only expressions which are common in each of given expressions, in a recursive way. + * For example, given two expressions `(a + (b + (c + 1)))` and `(d + (e + (c + 1)))`, + * the common expression `(c + 1)` will be added into `equivalenceMap`. + */ + private def addCommonExprs( + exprs: Seq[Expression], + addFunc: Expression => Boolean = addExpr): Unit = { + val exprSetForAll = mutable.Set[Expr]() + addExprTree(exprs.head, addExprToSet(_, exprSetForAll)) + + val commonExprSet = exprs.tail.foldLeft(exprSetForAll) { (exprSet, expr) => + val otherExprSet = mutable.Set[Expr]() + addExprTree(expr, addExprToSet(_, otherExprSet)) + exprSet.intersect(otherExprSet) + } + + commonExprSet.foreach(expr => addFunc(expr.e)) + } + + // There are some special expressions that we should not recurse into all of its children. + // 1. CodegenFallback: it's children will not be used to generate code (call eval() instead) + // 2. If: common subexpressions will always be evaluated at the beginning, but the true and + // false expressions in `If` may not get accessed, according to the predicate + // expression. We should only recurse into the predicate expression. + // 3. CaseWhen: like `If`, the children of `CaseWhen` only get accessed in a certain + // condition. We should only recurse into the first condition expression as it + // will always get accessed. + // 4. Coalesce: it's also a conditional expression, we should only recurse into the first + // children, because others may not get accessed. + private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match { + case _: CodegenFallback => Nil + case i: If => i.predicate :: Nil + case c: CaseWhen => c.children.head :: Nil + case c: Coalesce => c.children.head :: Nil + case other => other.children + } + + // For some special expressions we cannot just recurse into all of its children, but we can + // recursively add the common expressions shared between all of its children. + private def commonChildrenToRecurse(expr: Expression): Seq[Seq[Expression]] = expr match { + case i: If => Seq(Seq(i.trueValue, i.falseValue)) + case c: CaseWhen => + // We look at subexpressions in conditions and values of `CaseWhen` separately. It is + // because a subexpression in conditions will be run no matter which condition is matched + // if it is shared among conditions, but it doesn't need to be shared in values. Similarly, + // a subexpression among values doesn't need to be in conditions because no matter which + // condition is true, it will be evaluated. + val conditions = c.branches.tail.map(_._1) + val values = c.branches.map(_._2) ++ c.elseValue + Seq(conditions, values) + case c: Coalesce => Seq(c.children.tail) + case _ => Nil + } + /** * Adds the expression to this data structure recursively. Stops if a matching expression * is found. That is, if `expr` has already been added, its children are not added. */ - def addExprTree(expr: Expression): Unit = { + def addExprTree( + expr: Expression, + addFunc: Expression => Boolean = addExpr): Unit = { val skip = expr.isInstanceOf[LeafExpression] || // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning. @@ -78,26 +149,9 @@ class EquivalentExpressions { // can cause error like NPE. (expr.isInstanceOf[PlanExpression[_]] && TaskContext.get != null) - // There are some special expressions that we should not recurse into all of its children. - // 1. CodegenFallback: it's children will not be used to generate code (call eval() instead) - // 2. If: common subexpressions will always be evaluated at the beginning, but the true and - // false expressions in `If` may not get accessed, according to the predicate - // expression. We should only recurse into the predicate expression. - // 3. CaseWhen: like `If`, the children of `CaseWhen` only get accessed in a certain - // condition. We should only recurse into the first condition expression as it - // will always get accessed. - // 4. Coalesce: it's also a conditional expression, we should only recurse into the first - // children, because others may not get accessed. - def childrenToRecurse: Seq[Expression] = expr match { - case _: CodegenFallback => Nil - case i: If => i.predicate :: Nil - case c: CaseWhen => c.children.head :: Nil - case c: Coalesce => c.children.head :: Nil - case other => other.children - } - - if (!skip && !addExpr(expr)) { - childrenToRecurse.foreach(addExprTree) + if (!skip && !addFunc(expr)) { + childrenToRecurse(expr).foreach(addExprTree(_, addFunc)) + commonChildrenToRecurse(expr).filter(_.nonEmpty).foreach(addCommonExprs(_, addFunc)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 9a26c388f59af..9aa827a58d87a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1044,7 +1044,7 @@ class CodegenContext extends Logging { val localSubExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState] // Add each expression tree and compute the common subexpressions. - expressions.foreach(equivalentExpressions.addExprTree) + expressions.foreach(equivalentExpressions.addExprTree(_)) // Get all the expressions that appear at least twice and set up the state for subexpression // elimination. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala index 1fa185cc77ebb..4725a40781c6b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala @@ -146,20 +146,111 @@ class SubexpressionEliminationSuite extends SparkFunSuite { equivalence.addExprTree(add) // the `two` inside `fallback` should not be added assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 0) - assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 3) // add, two, explode + assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 3) // add, two, explode } - test("Children of conditional expressions") { - val condition = And(Literal(true), Literal(false)) + test("Children of conditional expressions: If") { val add = Add(Literal(1), Literal(2)) - val ifExpr = If(condition, add, add) + val condition = GreaterThan(add, Literal(3)) - val equivalence = new EquivalentExpressions - equivalence.addExprTree(ifExpr) - // the `add` inside `If` should not be added - assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 0) - // only ifExpr and its predicate expression - assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 2) + val ifExpr1 = If(condition, add, add) + val equivalence1 = new EquivalentExpressions + equivalence1.addExprTree(ifExpr1) + + // `add` is in both two branches of `If` and predicate. + assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1) + assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add)) + // one-time expressions: only ifExpr and its predicate expression + assert(equivalence1.getAllEquivalentExprs.count(_.size == 1) == 2) + assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1)) + assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).last == Seq(condition)) + + // Repeated `add` is only in one branch, so we don't count it. + val ifExpr2 = If(condition, Add(Literal(1), Literal(3)), Add(add, add)) + val equivalence2 = new EquivalentExpressions + equivalence2.addExprTree(ifExpr2) + + assert(equivalence2.getAllEquivalentExprs.count(_.size > 1) == 0) + assert(equivalence2.getAllEquivalentExprs.count(_.size == 1) == 3) + + val ifExpr3 = If(condition, ifExpr1, ifExpr1) + val equivalence3 = new EquivalentExpressions + equivalence3.addExprTree(ifExpr3) + + // `add`: 2, `condition`: 2 + assert(equivalence3.getAllEquivalentExprs.count(_.size == 2) == 2) + assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add)) + assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).last == Seq(condition, condition)) + + // `ifExpr1`, `ifExpr3` + assert(equivalence3.getAllEquivalentExprs.count(_.size == 1) == 2) + assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1)) + assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).last == Seq(ifExpr3)) + } + + test("Children of conditional expressions: CaseWhen") { + val add1 = Add(Literal(1), Literal(2)) + val add2 = Add(Literal(2), Literal(3)) + val conditions1 = (GreaterThan(add2, Literal(3)), add1) :: + (GreaterThan(add2, Literal(4)), add1) :: + (GreaterThan(add2, Literal(5)), add1) :: Nil + + val caseWhenExpr1 = CaseWhen(conditions1, None) + val equivalence1 = new EquivalentExpressions + equivalence1.addExprTree(caseWhenExpr1) + + // `add2` is repeatedly in all conditions. + assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1) + assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add2, add2)) + + val conditions2 = (GreaterThan(add1, Literal(3)), add1) :: + (GreaterThan(add2, Literal(4)), add1) :: + (GreaterThan(add2, Literal(5)), add1) :: Nil + + val caseWhenExpr2 = CaseWhen(conditions2, None) + val equivalence2 = new EquivalentExpressions + equivalence2.addExprTree(caseWhenExpr2) + + // `add1` is repeatedly in all branch values, and first predicate. + assert(equivalence2.getAllEquivalentExprs.count(_.size == 2) == 1) + assert(equivalence2.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add1, add1)) + + // Negative case. `add1` or `add2` is not commonly used in all predicates/branch values. + val conditions3 = (GreaterThan(add1, Literal(3)), add2) :: + (GreaterThan(add2, Literal(4)), add1) :: + (GreaterThan(add2, Literal(5)), add1) :: Nil + + val caseWhenExpr3 = CaseWhen(conditions3, None) + val equivalence3 = new EquivalentExpressions + equivalence3.addExprTree(caseWhenExpr3) + assert(equivalence3.getAllEquivalentExprs.count(_.size == 2) == 0) + } + + test("Children of conditional expressions: Coalesce") { + val add1 = Add(Literal(1), Literal(2)) + val add2 = Add(Literal(2), Literal(3)) + val conditions1 = GreaterThan(add2, Literal(3)) :: + GreaterThan(add2, Literal(4)) :: + GreaterThan(add2, Literal(5)) :: Nil + + val coalesceExpr1 = Coalesce(conditions1) + val equivalence1 = new EquivalentExpressions + equivalence1.addExprTree(coalesceExpr1) + + // `add2` is repeatedly in all conditions. + assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1) + assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add2, add2)) + + // Negative case. `add1` and `add2` both are not used in all branches. + val conditions2 = GreaterThan(add1, Literal(3)) :: + GreaterThan(add2, Literal(4)) :: + GreaterThan(add2, Literal(5)) :: Nil + + val coalesceExpr2 = Coalesce(conditions2) + val equivalence2 = new EquivalentExpressions + equivalence2.addExprTree(coalesceExpr2) + + assert(equivalence2.getAllEquivalentExprs.count(_.size == 2) == 0) } } From 46346943bb6c312dc87ac3fcdfd1dbeac68c53b5 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Wed, 11 Nov 2020 09:28:59 +0900 Subject: [PATCH 0439/1009] [SPARK-33404][SQL] Fix incorrect results in `date_trunc` expression ### What changes were proposed in this pull request? The following query produces incorrect results: ``` SELECT date_trunc('minute', '1769-10-17 17:10:02') ``` Spark currently incorrectly returns ``` 1769-10-17 17:10:02 ``` against the expected return value of ``` 1769-10-17 17:10:00 ``` **Steps to repro** Run the following commands in spark-shell: ``` spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") spark.sql("SELECT date_trunc('minute', '1769-10-17 17:10:02')").show() ``` This happens as `truncTimestamp` in package `org.apache.spark.sql.catalyst.util.DateTimeUtils` incorrectly assumes that time zone offsets can never have the granularity of a second and thus does not account for time zone adjustment when truncating the given timestamp to `minute`. This assumption is currently used when truncating the timestamps to `microsecond, millisecond, second, or minute`. This PR fixes this issue and always uses time zone knowledge when truncating timestamps regardless of the truncation unit. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added new tests to `DateTimeUtilsSuite` which previously failed and pass now. Closes #30303 from utkarsh39/trunc-timestamp-fix. Authored-by: Utkarsh Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/util/DateTimeUtils.scala | 6 ++-- .../catalyst/util/DateTimeUtilsSuite.scala | 34 +++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ff6b106d93d1d..3b974759bd6c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -736,14 +736,16 @@ object DateTimeUtils { * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 9. */ def truncTimestamp(micros: Long, level: Int, zoneId: ZoneId): Long = { + // Time zone offsets have a maximum precision of seconds (see `java.time.ZoneOffset`). Hence + // truncation to microsecond, millisecond, and second can be done + // without using time zone information. This results in a performance improvement. level match { case TRUNC_TO_MICROSECOND => micros case TRUNC_TO_MILLISECOND => micros - Math.floorMod(micros, MICROS_PER_MILLIS) case TRUNC_TO_SECOND => micros - Math.floorMod(micros, MICROS_PER_SECOND) - case TRUNC_TO_MINUTE => - micros - Math.floorMod(micros, MICROS_PER_MINUTE) + case TRUNC_TO_MINUTE => truncToUnit(micros, zoneId, ChronoUnit.MINUTES) case TRUNC_TO_HOUR => truncToUnit(micros, zoneId, ChronoUnit.HOURS) case TRUNC_TO_DAY => truncToUnit(micros, zoneId, ChronoUnit.DAYS) case _ => // Try to truncate date levels diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 7bbdf44d78c3c..3d841f32379ff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -518,18 +518,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(time == None) } - test("truncTimestamp") { - def testTrunc( - level: Int, - expected: String, - inputTS: Long, - zoneId: ZoneId = defaultZoneId): Unit = { - val truncated = - DateTimeUtils.truncTimestamp(inputTS, level, zoneId) - val expectedTS = toTimestamp(expected, defaultZoneId) - assert(truncated === expectedTS.get) + def testTrunc( + level: Int, + expected: String, + inputTS: Long, + zoneId: ZoneId = defaultZoneId): Unit = { + val truncated = DateTimeUtils.truncTimestamp(inputTS, level, zoneId) + val expectedTS = toTimestamp(expected, defaultZoneId) + assert(truncated === expectedTS.get) + } + + test("SPARK-33404: test truncTimestamp when time zone offset from UTC has a " + + "granularity of seconds") { + for (zid <- ALL_TIMEZONES) { + withDefaultTimeZone(zid) { + val inputTS = DateTimeUtils.stringToTimestamp( + UTF8String.fromString("1769-10-17T17:10:02.123456"), defaultZoneId) + testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "1769-10-17T17:10:00", inputTS.get, zid) + testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "1769-10-17T17:10:02", inputTS.get, zid) + testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "1769-10-17T17:10:02.123", inputTS.get, zid) + testTrunc(DateTimeUtils.TRUNC_TO_MICROSECOND, "1769-10-17T17:10:02.123456", + inputTS.get, zid) + } } + } + test("truncTimestamp") { val defaultInputTS = DateTimeUtils.stringToTimestamp( UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId) val defaultInputTS1 = DateTimeUtils.stringToTimestamp( From 5197c5d2e7648d75def3e159e0d2aa3e20117105 Mon Sep 17 00:00:00 2001 From: ulysses Date: Wed, 11 Nov 2020 11:39:11 +0900 Subject: [PATCH 0440/1009] [SPARK-33390][SQL] Make Literal support char array ### What changes were proposed in this pull request? Make Literal support char array. ### Why are the changes needed? We always use `Literal()` to create foldable value, and `char[]` is a usual data type. We can make it easy that support create String Literal with `char[]`. ### Does this PR introduce _any_ user-facing change? Yes, user can call `Literal()` with `char[]`. ### How was this patch tested? Add test. Closes #30295 from ulysses-you/SPARK-33390. Authored-by: ulysses Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/CatalystTypeConverters.scala | 1 + .../apache/spark/sql/catalyst/expressions/literals.scala | 4 ++++ .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala | 7 +++++++ .../catalyst/expressions/LiteralExpressionSuite.scala | 9 +++++++++ .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 8 ++++++++ 5 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index aab944c680149..971d61518c026 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -291,6 +291,7 @@ object CatalystTypeConverters { case str: String => UTF8String.fromString(str) case utf8: UTF8String => utf8 case chr: Char => UTF8String.fromString(chr.toString) + case ac: Array[Char] => UTF8String.fromString(String.valueOf(ac)) case other => throw new IllegalArgumentException( s"The value (${other.toString}) of the type (${other.getClass.getCanonicalName}) " + s"cannot be converted to the string type") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 413d0af61a05c..1e69814673082 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.lang.{Boolean => JavaBoolean} import java.lang.{Byte => JavaByte} +import java.lang.{Character => JavaChar} import java.lang.{Double => JavaDouble} import java.lang.{Float => JavaFloat} import java.lang.{Integer => JavaInteger} @@ -62,6 +63,7 @@ object Literal { case s: Short => Literal(s, ShortType) case s: String => Literal(UTF8String.fromString(s), StringType) case c: Char => Literal(UTF8String.fromString(c.toString), StringType) + case ac: Array[Char] => Literal(UTF8String.fromString(String.valueOf(ac)), StringType) case b: Boolean => Literal(b, BooleanType) case d: BigDecimal => val decimal = Decimal(d) @@ -102,6 +104,7 @@ object Literal { case JavaByte.TYPE => ByteType case JavaFloat.TYPE => FloatType case JavaBoolean.TYPE => BooleanType + case JavaChar.TYPE => StringType // java classes case _ if clz == classOf[LocalDate] => DateType @@ -110,6 +113,7 @@ object Literal { case _ if clz == classOf[Timestamp] => TimestampType case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT case _ if clz == classOf[Array[Byte]] => BinaryType + case _ if clz == classOf[Array[Char]] => StringType case _ if clz == classOf[JavaShort] => ShortType case _ if clz == classOf[JavaInteger] => IntegerType case _ if clz == classOf[JavaLong] => LongType diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala index b9e7cf3049896..f4b08330e4c79 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala @@ -152,6 +152,13 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper { assert(converter(chr) === expected) } + test("SPARK-33390: Make Literal support char array") { + val ac = Array('a', 'c') + val converter = CatalystTypeConverters.createToCatalystConverter(StringType) + val expected = UTF8String.fromString(String.valueOf(ac)) + assert(converter(ac) === expected) + } + test("converting java.time.Instant to TimestampType") { Seq( "0101-02-16T10:11:32Z", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index bb86135021b91..7a482641def3d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -239,6 +239,15 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.create('\n'), "\n") } + test("SPARK-33390: Make Literal support char array") { + checkEvaluation(Literal(Array('h', 'e', 'l', 'l', 'o')), "hello") + checkEvaluation(Literal(Array("hello".toCharArray)), Array("hello")) + // scalastyle:off + checkEvaluation(Literal(Array('测','试')), "测试") + checkEvaluation(Literal(Array('a', '测', 'b', '试', 'c')), "a测b试c") + // scalastyle:on + } + test("construct literals from java.time.LocalDate") { Seq( LocalDate.of(1, 1, 1), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 3c914ae043677..6a1378837ea9b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1939,6 +1939,14 @@ class DatasetSuite extends QueryTest Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)): _* ) } + + test("SPARK-33390: Make Literal support char array") { + val df = Seq("aa", "bb", "cc", "abc").toDF("zoo") + checkAnswer(df.where($"zoo" === Array('a', 'a')), Seq(Row("aa"))) + checkAnswer( + df.where($"zoo".contains(Array('a', 'b'))), + Seq(Row("abc"))) + } } object AssertExecutionId { From 1e2eeda20e062a77dfd8f944abeaeeb609817ae3 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 11 Nov 2020 05:26:46 +0000 Subject: [PATCH 0441/1009] [SPARK-33382][SQL][TESTS] Unify datasource v1 and v2 SHOW TABLES tests ### What changes were proposed in this pull request? In the PR, I propose to gather common `SHOW TABLES` tests into one trait `org.apache.spark.sql.execution.command.ShowTablesSuite`, and put datasource specific tests to the `v1.ShowTablesSuite` and `v2.ShowTablesSuite`. Also tests for parsing `SHOW TABLES` are extracted to `ShowTablesParserSuite`. ### Why are the changes needed? - The unification will allow to run common `SHOW TABLES` tests for both DSv1 and DSv2 - We can detect missing features and differences between DSv1 and DSv2 implementations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: - `org.apache.spark.sql.execution.command.v1.ShowTablesSuite` - `org.apache.spark.sql.execution.command.v2.ShowTablesSuite` - `ShowTablesParserSuite` Closes #30287 from MaxGekk/unify-dsv1_v2-tests. Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/DDLParserSuite.scala | 49 ------ .../sql/connector/DataSourceV2SQLSuite.scala | 150 +----------------- .../command/ShowTablesParserSuite.scala | 76 +++++++++ .../execution/command/ShowTablesSuite.scala | 122 ++++++++++++++ .../command/v1/ShowTablesSuite.scala | 95 +++++++++++ .../command/v2/ShowTablesSuite.scala | 115 ++++++++++++++ 6 files changed, 409 insertions(+), 198 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 085aaf148c8cd..7dac8ffd8475d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1233,55 +1233,6 @@ class DDLParserSuite extends AnalysisTest { assert(exc.getMessage.contains("There must be at least one WHEN clause in a MERGE statement")) } - test("show tables") { - comparePlans( - parsePlan("SHOW TABLES"), - ShowTables(UnresolvedNamespace(Seq.empty[String]), None)) - comparePlans( - parsePlan("SHOW TABLES '*test*'"), - ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*"))) - comparePlans( - parsePlan("SHOW TABLES LIKE '*test*'"), - ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*"))) - comparePlans( - parsePlan("SHOW TABLES FROM testcat.ns1.ns2.tbl"), - ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None)) - comparePlans( - parsePlan("SHOW TABLES IN testcat.ns1.ns2.tbl"), - ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None)) - comparePlans( - parsePlan("SHOW TABLES IN ns1 '*test*'"), - ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*"))) - comparePlans( - parsePlan("SHOW TABLES IN ns1 LIKE '*test*'"), - ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*"))) - } - - test("show table extended") { - comparePlans( - parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"), - ShowTableStatement(None, "*test*", None)) - comparePlans( - parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*'"), - ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None)) - comparePlans( - parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*'"), - ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None)) - comparePlans( - parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"), - ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11")))) - comparePlans( - parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*' " + - "PARTITION(ds='2008-04-09')"), - ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", - Some(Map("ds" -> "2008-04-09")))) - comparePlans( - parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*' " + - "PARTITION(ds='2008-04-09')"), - ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", - Some(Map("ds" -> "2008-04-09")))) - } - test("show views") { comparePlans( parsePlan("SHOW VIEWS"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index dfa32b9ac802e..6f888e527eeab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -925,71 +925,9 @@ class DataSourceV2SQLSuite } } - test("ShowTables: using v2 catalog") { - spark.sql("CREATE TABLE testcat.db.table_name (id bigint, data string) USING foo") - spark.sql("CREATE TABLE testcat.n1.n2.db.table_name (id bigint, data string) USING foo") - - runShowTablesSql("SHOW TABLES FROM testcat.db", Seq(Row("db", "table_name"))) - - runShowTablesSql( - "SHOW TABLES FROM testcat.n1.n2.db", - Seq(Row("n1.n2.db", "table_name"))) - } - - test("ShowTables: using v2 catalog with a pattern") { - spark.sql("CREATE TABLE testcat.db.table (id bigint, data string) USING foo") - spark.sql("CREATE TABLE testcat.db.table_name_1 (id bigint, data string) USING foo") - spark.sql("CREATE TABLE testcat.db.table_name_2 (id bigint, data string) USING foo") - spark.sql("CREATE TABLE testcat.db2.table_name_2 (id bigint, data string) USING foo") - - runShowTablesSql( - "SHOW TABLES FROM testcat.db", - Seq( - Row("db", "table"), - Row("db", "table_name_1"), - Row("db", "table_name_2"))) - - runShowTablesSql( - "SHOW TABLES FROM testcat.db LIKE '*name*'", - Seq(Row("db", "table_name_1"), Row("db", "table_name_2"))) - - runShowTablesSql( - "SHOW TABLES FROM testcat.db LIKE '*2'", - Seq(Row("db", "table_name_2"))) - } - - test("ShowTables: using v2 catalog, namespace doesn't exist") { - runShowTablesSql("SHOW TABLES FROM testcat.unknown", Seq()) - } - - test("ShowTables: using v1 catalog") { - runShowTablesSql( - "SHOW TABLES FROM default", - Seq(Row("", "source", true), Row("", "source2", true)), - expectV2Catalog = false) - } - - test("ShowTables: using v1 catalog, db doesn't exist ") { - // 'db' below resolves to a database name for v1 catalog because there is no catalog named - // 'db' and there is no default catalog set. - val exception = intercept[NoSuchDatabaseException] { - runShowTablesSql("SHOW TABLES FROM db", Seq(), expectV2Catalog = false) - } - - assert(exception.getMessage.contains("Database 'db' not found")) - } - - test("ShowTables: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") { - val exception = intercept[AnalysisException] { - runShowTablesSql("SHOW TABLES FROM a.b", Seq(), expectV2Catalog = false) - } - - assert(exception.getMessage.contains("The database name is not valid: a.b")) - } - test("ShowViews: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") { val exception = intercept[AnalysisException] { - sql("SHOW TABLES FROM a.b") + sql("SHOW VIEWS FROM a.b") } assert(exception.getMessage.contains("The database name is not valid: a.b")) @@ -1004,48 +942,6 @@ class DataSourceV2SQLSuite " only SessionCatalog supports this command.")) } - test("ShowTables: using v2 catalog with empty namespace") { - spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo") - runShowTablesSql("SHOW TABLES FROM testcat", Seq(Row("", "table"))) - } - - test("ShowTables: namespace is not specified and default v2 catalog is set") { - spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat") - spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo") - - // v2 catalog is used where default namespace is empty for TestInMemoryTableCatalog. - runShowTablesSql("SHOW TABLES", Seq(Row("", "table"))) - } - - test("ShowTables: namespace not specified and default v2 catalog not set - fallback to v1") { - runShowTablesSql( - "SHOW TABLES", - Seq(Row("", "source", true), Row("", "source2", true)), - expectV2Catalog = false) - - runShowTablesSql( - "SHOW TABLES LIKE '*2'", - Seq(Row("", "source2", true)), - expectV2Catalog = false) - } - - test("ShowTables: change current catalog and namespace with USE statements") { - sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo") - - // Initially, the v2 session catalog (current catalog) is used. - runShowTablesSql( - "SHOW TABLES", Seq(Row("", "source", true), Row("", "source2", true)), - expectV2Catalog = false) - - // Update the current catalog, and no table is matched since the current namespace is Array(). - sql("USE testcat") - runShowTablesSql("SHOW TABLES", Seq()) - - // Update the current namespace to match ns1.ns2.table. - sql("USE testcat.ns1.ns2") - runShowTablesSql("SHOW TABLES", Seq(Row("ns1.ns2", "table"))) - } - private def runShowTablesSql( sqlText: String, expected: Seq[Row], @@ -1066,50 +962,6 @@ class DataSourceV2SQLSuite assert(expected === df.collect()) } - test("SHOW TABLE EXTENDED not valid v1 database") { - def testV1CommandNamespace(sqlCommand: String, namespace: String): Unit = { - val e = intercept[AnalysisException] { - sql(sqlCommand) - } - assert(e.message.contains(s"The database name is not valid: ${namespace}")) - } - - val namespace = "testcat.ns1.ns2" - val table = "tbl" - withTable(s"$namespace.$table") { - sql(s"CREATE TABLE $namespace.$table (id bigint, data string) " + - s"USING foo PARTITIONED BY (id)") - - testV1CommandNamespace(s"SHOW TABLE EXTENDED FROM $namespace LIKE 'tb*'", - namespace) - testV1CommandNamespace(s"SHOW TABLE EXTENDED IN $namespace LIKE 'tb*'", - namespace) - testV1CommandNamespace("SHOW TABLE EXTENDED " + - s"FROM $namespace LIKE 'tb*' PARTITION(id=1)", - namespace) - testV1CommandNamespace("SHOW TABLE EXTENDED " + - s"IN $namespace LIKE 'tb*' PARTITION(id=1)", - namespace) - } - } - - test("SHOW TABLE EXTENDED valid v1") { - val expected = Seq(Row("", "source", true), Row("", "source2", true)) - val schema = new StructType() - .add("database", StringType, nullable = false) - .add("tableName", StringType, nullable = false) - .add("isTemporary", BooleanType, nullable = false) - .add("information", StringType, nullable = false) - - val df = sql("SHOW TABLE EXTENDED FROM default LIKE '*source*'") - val result = df.collect() - val resultWithoutInfo = result.map{ case Row(db, table, temp, _) => Row(db, table, temp)} - - assert(df.schema === schema) - assert(resultWithoutInfo === expected) - result.foreach{ case Row(_, _, _, info: String) => assert(info.nonEmpty)} - } - test("CreateNameSpace: basic tests") { // Session catalog is used. withNamespace("ns") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala new file mode 100644 index 0000000000000..16f3dea8d75ef --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.plans.logical.{ShowTables, ShowTableStatement} +import org.apache.spark.sql.test.SharedSparkSession + +class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession { + private val catalog = "test_catalog" + + test("show tables") { + comparePlans( + parsePlan("SHOW TABLES"), + ShowTables(UnresolvedNamespace(Seq.empty[String]), None)) + comparePlans( + parsePlan("SHOW TABLES '*test*'"), + ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*"))) + comparePlans( + parsePlan("SHOW TABLES LIKE '*test*'"), + ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*"))) + comparePlans( + parsePlan(s"SHOW TABLES FROM $catalog.ns1.ns2.tbl"), + ShowTables(UnresolvedNamespace(Seq(catalog, "ns1", "ns2", "tbl")), None)) + comparePlans( + parsePlan(s"SHOW TABLES IN $catalog.ns1.ns2.tbl"), + ShowTables(UnresolvedNamespace(Seq(catalog, "ns1", "ns2", "tbl")), None)) + comparePlans( + parsePlan("SHOW TABLES IN ns1 '*test*'"), + ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*"))) + comparePlans( + parsePlan("SHOW TABLES IN ns1 LIKE '*test*'"), + ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*"))) + } + + test("show table extended") { + comparePlans( + parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"), + ShowTableStatement(None, "*test*", None)) + comparePlans( + parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*'"), + ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None)) + comparePlans( + parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*'"), + ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None)) + comparePlans( + parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"), + ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11")))) + comparePlans( + parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*' " + + "PARTITION(ds='2008-04-09')"), + ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", + Some(Map("ds" -> "2008-04-09")))) + comparePlans( + parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*' " + + "PARTITION(ds='2008-04-09')"), + ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", + Some(Map("ds" -> "2008-04-09")))) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala new file mode 100644 index 0000000000000..01720b5723243 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.sql.Row +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType + +trait ShowTablesSuite extends SharedSparkSession { + protected def version: String + protected def catalog: String + protected def defaultNamespace: Seq[String] + protected def defaultUsing: String + case class ShowRow(namespace: String, table: String, isTemporary: Boolean) + protected def getRows(showRows: Seq[ShowRow]): Seq[Row] + // Gets the schema of `SHOW TABLES` + protected def showSchema: StructType + + protected def runShowTablesSql(sqlText: String, expected: Seq[ShowRow]): Unit = { + val df = spark.sql(sqlText) + assert(df.schema === showSchema) + assert(df.collect() === getRows(expected)) + } + + override def test(testName: String, testTags: Tag*)(testFun: => Any) + (implicit pos: Position): Unit = { + super.test(s"SHOW TABLES $version: " + testName, testTags: _*)(testFun) + } + + test("show an existing table") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + withTable(s"$catalog.ns.table") { + sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing") + runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false))) + } + } + } + + test("show tables with a pattern") { + withNamespace(s"$catalog.ns1", s"$catalog.ns2") { + sql(s"CREATE NAMESPACE $catalog.ns1") + sql(s"CREATE NAMESPACE $catalog.ns2") + withTable( + s"$catalog.ns1.table", + s"$catalog.ns1.table_name_1", + s"$catalog.ns1.table_name_2", + s"$catalog.ns2.table_name_2") { + sql(s"CREATE TABLE $catalog.ns1.table (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns1.table_name_1 (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns1.table_name_2 (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns2.table_name_2 (id bigint, data string) $defaultUsing") + + runShowTablesSql( + s"SHOW TABLES FROM $catalog.ns1", + Seq( + ShowRow("ns1", "table", false), + ShowRow("ns1", "table_name_1", false), + ShowRow("ns1", "table_name_2", false))) + + runShowTablesSql( + s"SHOW TABLES FROM $catalog.ns1 LIKE '*name*'", + Seq( + ShowRow("ns1", "table_name_1", false), + ShowRow("ns1", "table_name_2", false))) + + runShowTablesSql( + s"SHOW TABLES FROM $catalog.ns1 LIKE '*2'", + Seq(ShowRow("ns1", "table_name_2", false))) + } + } + } + + test("show tables with current catalog and namespace") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { + val tblName = (catalog +: defaultNamespace :+ "table").quoted + withTable(tblName) { + sql(s"CREATE TABLE $tblName (name STRING, id INT) $defaultUsing") + val ns = defaultNamespace.mkString(".") + runShowTablesSql("SHOW TABLES", Seq(ShowRow(ns, "table", false))) + } + } + } + + test("change current catalog and namespace with USE statements") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + withTable(s"$catalog.ns.table") { + sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing") + + sql(s"USE $catalog") + // No table is matched since the current namespace is not ["ns"] + assert(defaultNamespace != Seq("ns")) + runShowTablesSql("SHOW TABLES", Seq()) + + // Update the current namespace to match "ns.tbl". + sql(s"USE $catalog.ns") + runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false))) + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala new file mode 100644 index 0000000000000..feb3bc623f3fa --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite} +import org.apache.spark.sql.types.{BooleanType, StringType, StructType} + +class ShowTablesSuite extends CommonShowTablesSuite { + override def version: String = "V1" + override def catalog: String = CatalogManager.SESSION_CATALOG_NAME + override def defaultNamespace: Seq[String] = Seq("default") + override def defaultUsing: String = "USING parquet" + override def showSchema: StructType = { + new StructType() + .add("database", StringType, nullable = false) + .add("tableName", StringType, nullable = false) + .add("isTemporary", BooleanType, nullable = false) + } + override def getRows(showRows: Seq[ShowRow]): Seq[Row] = { + showRows.map { + case ShowRow(namespace, table, isTemporary) => Row(namespace, table, isTemporary) + } + } + + private def withSourceViews(f: => Unit): Unit = { + withTable("source", "source2") { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data") + df2.createOrReplaceTempView("source2") + f + } + } + + // `SHOW TABLES` returns empty result in V2 catalog instead of throwing the exception. + test("show table in a not existing namespace") { + val msg = intercept[NoSuchDatabaseException] { + runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq()) + }.getMessage + assert(msg.contains("Database 'unknown' not found")) + } + + // `SHOW TABLES` from v2 catalog returns empty result. + test("v1 SHOW TABLES list the temp views") { + withSourceViews { + runShowTablesSql( + "SHOW TABLES FROM default", + Seq(ShowRow("", "source", true), ShowRow("", "source2", true))) + } + } + + test("v1 SHOW TABLES only support single-level namespace") { + val exception = intercept[AnalysisException] { + runShowTablesSql("SHOW TABLES FROM a.b", Seq()) + } + assert(exception.getMessage.contains("The database name is not valid: a.b")) + } + + test("SHOW TABLE EXTENDED from default") { + withSourceViews { + val expected = Seq(Row("", "source", true), Row("", "source2", true)) + val schema = new StructType() + .add("database", StringType, nullable = false) + .add("tableName", StringType, nullable = false) + .add("isTemporary", BooleanType, nullable = false) + .add("information", StringType, nullable = false) + + val df = sql("SHOW TABLE EXTENDED FROM default LIKE '*source*'") + val result = df.collect() + val resultWithoutInfo = result.map { case Row(db, table, temp, _) => Row(db, table, temp) } + + assert(df.schema === schema) + assert(resultWithoutInfo === expected) + result.foreach { case Row(_, _, _, info: String) => assert(info.nonEmpty) } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala new file mode 100644 index 0000000000000..668120ae1cada --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException +import org.apache.spark.sql.connector.InMemoryTableCatalog +import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite} +import org.apache.spark.sql.types.{StringType, StructType} + +class ShowTablesSuite extends CommonShowTablesSuite { + override def version: String = "V2" + override def catalog: String = "test_catalog" + override def defaultNamespace: Seq[String] = Nil + override def defaultUsing: String = "USING _" + override def showSchema: StructType = { + new StructType() + .add("namespace", StringType, nullable = false) + .add("tableName", StringType, nullable = false) + } + override def getRows(showRows: Seq[ShowRow]): Seq[Row] = { + showRows.map { + case ShowRow(namespace, table, _) => Row(namespace, table) + } + } + + override def sparkConf: SparkConf = super.sparkConf + .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName) + + // The test fails with the exception `NoSuchDatabaseException` in V1 catalog. + // TODO(SPARK-33394): Throw `NoSuchDatabaseException` for not existing namespace + test("show table in a not existing namespace") { + runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq()) + } + + // The test fails for V1 catalog with the error: + // org.apache.spark.sql.AnalysisException: + // The namespace in session catalog must have exactly one name part: spark_catalog.n1.n2.db + test("show tables in nested namespaces") { + withTable(s"$catalog.n1.n2.db") { + spark.sql(s"CREATE TABLE $catalog.n1.n2.db.table_name (id bigint, data string) $defaultUsing") + runShowTablesSql( + s"SHOW TABLES FROM $catalog.n1.n2.db", + Seq(ShowRow("n1.n2.db", "table_name", false))) + } + } + + // The test fails for V1 catalog with the error: + // org.apache.spark.sql.AnalysisException: + // The namespace in session catalog must have exactly one name part: spark_catalog.table + test("using v2 catalog with empty namespace") { + withTable(s"$catalog.table") { + spark.sql(s"CREATE TABLE $catalog.table (id bigint, data string) $defaultUsing") + runShowTablesSql(s"SHOW TABLES FROM $catalog", Seq(ShowRow("", "table", false))) + } + } + + // The test fails for V1 catalog with the error: + // org.apache.spark.sql.AnalysisException: + // The namespace in session catalog must have exactly one name part: spark_catalog.ns1.ns2.tbl + test("SHOW TABLE EXTENDED not valid v1 database") { + def testV1CommandNamespace(sqlCommand: String, namespace: String): Unit = { + val e = intercept[AnalysisException] { + sql(sqlCommand) + } + assert(e.message.contains(s"The database name is not valid: ${namespace}")) + } + + val namespace = s"$catalog.ns1.ns2" + val table = "tbl" + withTable(s"$namespace.$table") { + sql(s"CREATE TABLE $namespace.$table (id bigint, data string) " + + s"$defaultUsing PARTITIONED BY (id)") + + testV1CommandNamespace(s"SHOW TABLE EXTENDED FROM $namespace LIKE 'tb*'", + namespace) + testV1CommandNamespace(s"SHOW TABLE EXTENDED IN $namespace LIKE 'tb*'", + namespace) + testV1CommandNamespace("SHOW TABLE EXTENDED " + + s"FROM $namespace LIKE 'tb*' PARTITION(id=1)", + namespace) + testV1CommandNamespace("SHOW TABLE EXTENDED " + + s"IN $namespace LIKE 'tb*' PARTITION(id=1)", + namespace) + } + } + + // TODO(SPARK-33393): Support SHOW TABLE EXTENDED in DSv2 + test("SHOW TABLE EXTENDED: an existing table") { + val table = "people" + withTable(s"$catalog.$table") { + sql(s"CREATE TABLE $catalog.$table (name STRING, id INT) $defaultUsing") + val errMsg = intercept[NoSuchDatabaseException] { + sql(s"SHOW TABLE EXTENDED FROM $catalog LIKE '*$table*'").collect() + }.getMessage + assert(errMsg.contains(s"Database '$catalog' not found")) + } + } +} From 6d5d03095798a2ca2014ada340424512d60810ce Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 11 Nov 2020 05:54:27 +0000 Subject: [PATCH 0442/1009] [SPARK-33414][SQL] Migrate SHOW CREATE TABLE command to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `SHOW CREATE TABLE` to use `UnresolvedTableOrView` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `SHOW CREATE TABLE` works only with a v1 table and a permanent view, and not supported for v2 tables. ### Why are the changes needed? The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior: ```scala sql("CREATE TEMPORARY VIEW t AS SELECT 1") sql("CREATE DATABASE db") sql("CREATE TABLE t (key INT, value STRING) USING hive") sql("USE db") sql("SHOW CREATE TABLE t AS SERDE") // Succeeds ``` With this change, `SHOW CREATE TABLE ... AS SERDE` above fails with the following: ``` org.apache.spark.sql.AnalysisException: t is a temp view not table or permanent view.; line 1 pos 0 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$43(Analyzer.scala:883) at scala.Option.map(Option.scala:230) ``` , which is expected since temporary view is resolved first and `SHOW CREATE TABLE ... AS SERDE` doesn't support a temporary view. Note that there is no behavior change for `SHOW CREATE TABLE` without `AS SERDE` since it was already resolving to a temporary view first. See below for more detail. ### Does this PR introduce _any_ user-facing change? After this PR, `SHOW CREATE TABLE t AS SERDE` is resolved to a temp view `t` instead of table `db.t` in the above scenario. Note that there is no behavior change for `SHOW CREATE TABLE` without `AS SERDE`, but the exception message changes from `SHOW CREATE TABLE is not supported on a temporary view` to `t is a temp view not table or permanent view`. ### How was this patch tested? Updated existing tests. Closes #30321 from imback82/show_create_table. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/AstBuilder.scala | 8 ++++++-- .../sql/catalyst/plans/logical/statements.scala | 7 ------- .../sql/catalyst/plans/logical/v2Commands.scala | 7 +++++++ .../spark/sql/catalyst/parser/DDLParserSuite.scala | 8 +++++++- .../catalyst/analysis/ResolveSessionCatalog.scala | 13 ++++++------- .../datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../org/apache/spark/sql/ShowCreateTableSuite.scala | 7 ++++--- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 3 ++- .../apache/spark/sql/execution/SQLViewSuite.scala | 2 +- 9 files changed, 36 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 07086d1a45aa0..893afc8984e9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3301,10 +3301,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Creates a [[ShowCreateTableStatement]] + * Creates a [[ShowCreateTable]] */ override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) { - ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null) + ShowCreateTable( + UnresolvedTableOrView( + visitMultipartIdentifier(ctx.multipartIdentifier()), + allowTempView = false), + ctx.SERDE != null) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 246e7f3bcb959..2fc56891cd15e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -347,13 +347,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends */ case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement -/** - * A SHOW CREATE TABLE statement, as parsed from SQL. - */ -case class ShowCreateTableStatement( - tableName: Seq[String], - asSerde: Boolean = false) extends ParsedStatement - /** * A CACHE TABLE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index b5386f5044452..c1fc0b69354cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -622,3 +622,10 @@ case class LoadData( partition: Option[TablePartitionSpec]) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the SHOW CREATE TABLE command. + */ +case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 7dac8ffd8475d..be1ac56c4a4a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1583,7 +1583,13 @@ class DDLParserSuite extends AnalysisTest { test("SHOW CREATE table") { comparePlans( parsePlan("SHOW CREATE TABLE a.b.c"), - ShowCreateTableStatement(Seq("a", "b", "c"))) + ShowCreateTable(UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false))) + + comparePlans( + parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"), + ShowCreateTable( + UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + asSerde = true)) } test("CACHE TABLE") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 59652229a2b2e..ff25272aebb5b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -434,13 +434,12 @@ class ResolveSessionCatalog( isOverwrite, partition) - case ShowCreateTableStatement(tbl, asSerde) if !asSerde => - val name = parseTempViewOrV1Table(tbl, "SHOW CREATE TABLE") - ShowCreateTableCommand(name.asTableIdentifier) - - case ShowCreateTableStatement(tbl, asSerde) if asSerde => - val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE") - ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier) + case ShowCreateTable(ResolvedV1TableOrViewIdentifier(ident), asSerde) => + if (asSerde) { + ShowCreateTableAsSerdeCommand(ident.asTableIdentifier) + } else { + ShowCreateTableCommand(ident.asTableIdentifier) + } case CacheTableStatement(tbl, plan, isLazy, options) => val name = if (plan.isDefined) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 5695d232fae54..48fa88ed550b6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -286,6 +286,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case LoadData(_: ResolvedTable, _, _, _, _) => throw new AnalysisException("LOAD DATA is not supported for v2 tables.") + case ShowCreateTable(_: ResolvedTable, _) => + throw new AnalysisException("SHOW CREATE TABLE is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index 1106a787cc9a7..7b4c8d1cc71d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -155,16 +155,17 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { val ex = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") } - assert(ex.getMessage.contains("SHOW CREATE TABLE is not supported on a temporary view")) + assert(ex.getMessage.contains(s"$viewName is a temp view not table or permanent view")) } withGlobalTempView(viewName) { sql(s"CREATE GLOBAL TEMPORARY VIEW $viewName AS SELECT 1 AS a") + val globalTempViewDb = spark.sessionState.catalog.globalTempViewManager.database val ex = intercept[AnalysisException] { - val globalTempViewDb = spark.sessionState.catalog.globalTempViewManager.database sql(s"SHOW CREATE TABLE $globalTempViewDb.$viewName") } - assert(ex.getMessage.contains("SHOW CREATE TABLE is not supported on a temporary view")) + assert(ex.getMessage.contains( + s"$globalTempViewDb.$viewName is a temp view not table or permanent view")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 6f888e527eeab..68de55f03ba83 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1954,7 +1954,8 @@ class DataSourceV2SQLSuite val t = "testcat.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - testV1CommandSupportingTempView("SHOW CREATE TABLE", t) + testNotSupportedV2Command("SHOW CREATE TABLE", t) + testNotSupportedV2Command("SHOW CREATE TABLE", s"$t AS SERDE") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 8889ea177720e..f5d6ea929a9aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -176,7 +176,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val e3 = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") }.getMessage - assert(e3.contains("SHOW CREATE TABLE is not supported on a temporary view")) + assert(e3.contains(s"$viewName is a temp view not table or permanent view")) assertNoSuchTable(s"SHOW PARTITIONS $viewName") val e4 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") From 4b367976a877adb981f65d546e1522fdf30d0731 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 11 Nov 2020 15:24:05 +0900 Subject: [PATCH 0443/1009] [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark ### What changes were proposed in this pull request? This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only. ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked. Closes #30324 from maropu/FilterBugInTPCDSQueryBenchmark. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../benchmark/TPCDSQueryBenchmark.scala | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 7bbf0795eb052..43bc7c12937ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -98,11 +98,16 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { } } - def filterQueries( + private def filterQueries( origQueries: Seq[String], - args: TPCDSQueryBenchmarkArguments): Seq[String] = { - if (args.queryFilter.nonEmpty) { - origQueries.filter(args.queryFilter.contains) + queryFilter: Set[String], + nameSuffix: String = ""): Seq[String] = { + if (queryFilter.nonEmpty) { + if (nameSuffix.nonEmpty) { + origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") } + } else { + origQueries.filter(queryFilter.contains) + } } else { origQueries } @@ -125,6 +130,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") // This list only includes TPC-DS v2.7 queries that are different from v1.4 ones + val nameSuffixForQueriesV2_7 = "-v2.7" val tpcdsQueriesV2_7 = Seq( "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", @@ -132,8 +138,9 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { "q80a", "q86a", "q98") // If `--query-filter` defined, filters the queries that this option selects - val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs) - val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs) + val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter) + val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter, + nameSuffix = nameSuffixForQueriesV2_7) if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) { throw new RuntimeException( @@ -143,6 +150,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { val tableSizes = setupTables(benchmarkArgs.dataLocation) runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes) runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes, - nameSuffix = "-v2.7") + nameSuffix = nameSuffixForQueriesV2_7) } } From 8760032f4f7e1ef36fee6afc45923d3826ef14fc Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 11 Nov 2020 16:13:21 +0900 Subject: [PATCH 0444/1009] [SPARK-33412][SQL] OverwriteByExpression should resolve its delete condition based on the table relation not the input query ### What changes were proposed in this pull request? Make a special case in `ResolveReferences`, which resolves `OverwriteByExpression`'s condition expression based on the table relation instead of the input query. ### Why are the changes needed? The condition expression is passed to the table implementation at the end, so we should resolve it using table schema. Previously it works because we have a hack in `ResolveReferences` to delay the resolution if `outputResolved == false`. However, this hack doesn't work for tables accepting any schema like https://github.com/delta-io/delta/pull/521 . We may wrongly resolve the delete condition using input query's outout columns which don't match the table column names. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests and updated test in v2 write. Closes #30318 from cloud-fan/v2-write. Authored-by: Wenchen Fan Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/analysis/Analyzer.scala | 9 ++++----- .../sql/catalyst/plans/logical/v2Commands.scala | 3 ++- .../analysis/DataSourceV2AnalysisSuite.scala | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5834f9bad4a18..b27b8d8a606da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1548,11 +1548,10 @@ class Analyzer( g.copy(resolvedSelectedExprs, resolvedGroupingExprs, g.child, resolvedAggExprs) - case o: OverwriteByExpression - if !(o.table.resolved && o.query.resolved && o.outputResolved) => - // do not resolve expression attributes until the query attributes are resolved against the - // table by ResolveOutputRelation. that rule will alias the attributes to the table's names. - o + case o: OverwriteByExpression if o.table.resolved => + // The delete condition of `OverwriteByExpression` will be passed to the table + // implementation and should be resolved based on the table schema. + o.copy(deleteExpr = resolveExpressionBottomUp(o.deleteExpr, o.table)) case m @ MergeIntoTable(targetTable, sourceTable, _, _, _) if !m.resolved && targetTable.resolved && sourceTable.resolved => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index c1fc0b69354cd..e65555ea27672 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Unevaluable} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange} @@ -96,6 +96,7 @@ case class OverwriteByExpression( override lazy val resolved: Boolean = { table.resolved && query.resolved && outputResolved && deleteExpr.resolved } + override def inputSet: AttributeSet = AttributeSet(table.output) override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = { copy(query = newQuery) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala index ba926f842551f..349237c2aa893 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala @@ -668,9 +668,7 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(), Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()), query), - LessThanOrEqual( - AttributeReference("x", DoubleType, nullable = false)(x.exprId), - Literal(15.0d))) + LessThanOrEqual(x, Literal(15.0d))) assertNotResolved(parsedPlan) checkAnalysis(parsedPlan, expectedPlan) @@ -678,7 +676,7 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { } protected def testNotResolvedOverwriteByExpression(): Unit = { - val xRequiredTable = TestRelation(StructType(Seq( + val table = TestRelation(StructType(Seq( StructField("x", DoubleType, nullable = false), StructField("y", DoubleType))).toAttributes) @@ -687,10 +685,19 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest { StructField("b", DoubleType))).toAttributes) // the write is resolved (checked above). this test plan is not because of the expression. - val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query, + val parsedPlan = OverwriteByExpression.byPosition(table, query, LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d))) assertNotResolved(parsedPlan) assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y")) + + val tableAcceptAnySchema = TestRelationAcceptAnySchema(StructType(Seq( + StructField("x", DoubleType, nullable = false), + StructField("y", DoubleType))).toAttributes) + + val parsedPlan2 = OverwriteByExpression.byPosition(tableAcceptAnySchema, query, + LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d))) + assertNotResolved(parsedPlan2) + assertAnalysisError(parsedPlan2, Seq("cannot resolve", "`a`", "given input columns", "x, y")) } } From 1eb236b9360a000afc30424341698fe26ee96d0f Mon Sep 17 00:00:00 2001 From: stczwd Date: Wed, 11 Nov 2020 09:30:42 +0000 Subject: [PATCH 0445/1009] [SPARK-32512][SQL] add alter table add/drop partition command for datasourcev2 ### What changes were proposed in this pull request? This patch is trying to add `AlterTableAddPartitionExec` and `AlterTableDropPartitionExec` with the new table partition API, defined in #28617. ### Does this PR introduce _any_ user-facing change? Yes. User can use `alter table add partition` or `alter table drop partition` to create/drop partition in V2Table. ### How was this patch tested? Run suites and fix old tests. Closes #29339 from stczwd/SPARK-32512-new. Lead-authored-by: stczwd Co-authored-by: Jacky Lee Co-authored-by: Jackey Lee Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 1 + .../sql/catalyst/analysis/CheckAnalysis.scala | 27 +++ .../analysis/ResolvePartitionSpec.scala | 89 ++++++++++ .../catalyst/analysis/v2ResolutionPlans.scala | 12 ++ .../sql/catalyst/parser/AstBuilder.scala | 18 +- .../catalyst/plans/logical/statements.scala | 18 -- .../catalyst/plans/logical/v2Commands.scala | 42 ++++- .../v2/DataSourceV2Implicits.scala | 31 +++- .../sql/catalyst/parser/DDLParserSuite.scala | 30 ++-- .../InMemoryPartitionTableCatalog.scala | 47 +++++ .../analysis/ResolveSessionCatalog.scala | 18 +- .../v2/AlterTableAddPartitionExec.scala | 65 +++++++ .../v2/AlterTableDropPartitionExec.scala | 57 ++++++ .../datasources/v2/DataSourceV2Strategy.scala | 12 +- .../AlterTablePartitionV2SQLSuite.scala | 162 ++++++++++++++++++ .../sql/connector/DataSourceV2SQLSuite.scala | 59 +++---- .../sql/connector/DatasourceV2SQLBase.scala | 54 ++++++ .../spark/sql/execution/SQLViewSuite.scala | 8 +- .../sql/hive/execution/HiveDDLSuite.scala | 17 +- 19 files changed, 670 insertions(+), 97 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b27b8d8a606da..690d66bec890d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -225,6 +225,7 @@ class Analyzer( ResolveInsertInto :: ResolveRelations :: ResolveTables :: + ResolvePartitionSpec :: ResolveReferences :: ResolveCreateNamedStruct :: ResolveDeserializer :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 33a5224ed293e..452ba80b23441 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table} import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -564,6 +565,12 @@ trait CheckAnalysis extends PredicateHelper { // no validation needed for set and remove property } + case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) => + checkAlterTablePartition(table, parts) + + case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) => + checkAlterTablePartition(table, parts) + case _ => // Fallbacks to the following checks } @@ -976,4 +983,24 @@ trait CheckAnalysis extends PredicateHelper { failOnOuterReferenceInSubTree(p) }} } + + // Make sure that table is able to alter partition. + private def checkAlterTablePartition( + table: Table, parts: Seq[PartitionSpec]): Unit = { + (table, parts) match { + case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) => + failAnalysis("PartitionSpecs are not resolved") + + case (table, _) if !table.isInstanceOf[SupportsPartitionManagement] => + failAnalysis(s"Table ${table.name()} can not alter partitions.") + + // Skip atomic partition tables + case (_: SupportsAtomicPartitionManagement, _) => + case (_: SupportsPartitionManagement, parts) if parts.size > 1 => + failAnalysis( + s"Nonatomic partition table ${table.name()} can not alter multiple partitions.") + + case _ => + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala new file mode 100644 index 0000000000000..5e19a32968992 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement +import org.apache.spark.sql.types._ + +/** + * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands. + */ +object ResolvePartitionSpec extends Rule[LogicalPlan] { + + def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { + case r @ AlterTableAddPartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) => + r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema())) + + case r @ AlterTableDropPartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) => + r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema())) + } + + private def resolvePartitionSpecs( + partSpecs: Seq[PartitionSpec], partSchema: StructType): Seq[ResolvedPartitionSpec] = + partSpecs.map { + case unresolvedPartSpec: UnresolvedPartitionSpec => + ResolvedPartitionSpec( + convertToPartIdent(unresolvedPartSpec.spec, partSchema), unresolvedPartSpec.location) + case resolvedPartitionSpec: ResolvedPartitionSpec => + resolvedPartitionSpec + } + + private def convertToPartIdent( + partSpec: TablePartitionSpec, partSchema: StructType): InternalRow = { + val conflictKeys = partSpec.keys.toSeq.diff(partSchema.map(_.name)) + if (conflictKeys.nonEmpty) { + throw new AnalysisException(s"Partition key ${conflictKeys.mkString(",")} not exists") + } + + val partValues = partSchema.map { part => + val partValue = partSpec.get(part.name).orNull + if (partValue == null) { + null + } else { + // TODO: Support other datatypes, such as DateType + part.dataType match { + case _: ByteType => + partValue.toByte + case _: ShortType => + partValue.toShort + case _: IntegerType => + partValue.toInt + case _: LongType => + partValue.toLong + case _: FloatType => + partValue.toFloat + case _: DoubleType => + partValue.toDouble + case _: StringType => + partValue + case _ => + throw new AnalysisException( + s"Type ${part.dataType.typeName} is not supported for partition.") + } + } + } + InternalRow.fromSeq(partValues) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index fcf4a438eb19c..83acfb8d4a71c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql.catalyst.analysis +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogFunction +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog} @@ -53,6 +55,12 @@ case class UnresolvedTableOrView( override def output: Seq[Attribute] = Nil } +sealed trait PartitionSpec + +case class UnresolvedPartitionSpec( + spec: TablePartitionSpec, + location: Option[String] = None) extends PartitionSpec + /** * Holds the name of a function that has yet to be looked up in a catalog. It will be resolved to * [[ResolvedFunc]] during analysis. @@ -78,6 +86,10 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T override def output: Seq[Attribute] = Nil } +case class ResolvedPartitionSpec( + spec: InternalRow, + location: Option[String] = None) extends PartitionSpec + /** * A plan containing resolved (temp) views. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 893afc8984e9c..be8bbb5ad3eba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3415,7 +3415,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create an [[AlterTableAddPartitionStatement]]. + * Create an [[AlterTableAddPartition]]. * * For example: * {{{ @@ -3435,10 +3435,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val specsAndLocs = ctx.partitionSpecLocation.asScala.map { splCtx => val spec = visitNonOptionalPartitionSpec(splCtx.partitionSpec) val location = Option(splCtx.locationSpec).map(visitLocationSpec) - spec -> location + UnresolvedPartitionSpec(spec, location) } - AlterTableAddPartitionStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), + AlterTableAddPartition( + UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), specsAndLocs.toSeq, ctx.EXISTS != null) } @@ -3460,7 +3460,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create an [[AlterTableDropPartitionStatement]] + * Create an [[AlterTableDropPartition]] * * For example: * {{{ @@ -3477,9 +3477,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging if (ctx.VIEW != null) { operationNotAllowed("ALTER VIEW ... DROP PARTITION", ctx) } - AlterTableDropPartitionStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), - ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec).toSeq, + val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec) + .map(spec => UnresolvedPartitionSpec(spec)) + AlterTableDropPartition( + UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), + partSpecs.toSeq, ifExists = ctx.EXISTS != null, purge = ctx.PURGE != null, retainData = false) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 2fc56891cd15e..39bc5a5604b20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -217,14 +217,6 @@ case class AlterTableSetLocationStatement( case class AlterTableRecoverPartitionsStatement( tableName: Seq[String]) extends ParsedStatement -/** - * ALTER TABLE ... ADD PARTITION command, as parsed from SQL - */ -case class AlterTableAddPartitionStatement( - tableName: Seq[String], - partitionSpecsAndLocs: Seq[(TablePartitionSpec, Option[String])], - ifNotExists: Boolean) extends ParsedStatement - /** * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL. */ @@ -233,16 +225,6 @@ case class AlterTableRenamePartitionStatement( from: TablePartitionSpec, to: TablePartitionSpec) extends ParsedStatement -/** - * ALTER TABLE ... DROP PARTITION command, as parsed from SQL - */ -case class AlterTableDropPartitionStatement( - tableName: Seq[String], - specs: Seq[TablePartitionSpec], - ifExists: Boolean, - purge: Boolean, - retainData: Boolean) extends ParsedStatement - /** * ALTER TABLE ... SERDEPROPERTIES command, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index e65555ea27672..5bda2b5b8db01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.plans.logical -import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException} +import org.apache.spark.sql.catalyst.analysis.{NamedRelation, PartitionSpec, ResolvedPartitionSpec, UnresolvedException} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema @@ -612,6 +612,46 @@ case class AnalyzeColumn( override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the ALTER TABLE ADD PARTITION command. + * + * The syntax of this command is: + * {{{ + * ALTER TABLE table ADD [IF NOT EXISTS] + * PARTITION spec1 [LOCATION 'loc1'][, PARTITION spec2 [LOCATION 'loc2'], ...]; + * }}} + */ +case class AlterTableAddPartition( + child: LogicalPlan, + parts: Seq[PartitionSpec], + ifNotExists: Boolean) extends Command { + override lazy val resolved: Boolean = + childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec]) + + override def children: Seq[LogicalPlan] = child :: Nil +} + +/** + * The logical plan of the ALTER TABLE DROP PARTITION command. + * This may remove the data and metadata for this partition. + * + * The syntax of this command is: + * {{{ + * ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]; + * }}} + */ +case class AlterTableDropPartition( + child: LogicalPlan, + parts: Seq[PartitionSpec], + ifExists: Boolean, + purge: Boolean, + retainData: Boolean) extends Command { + override lazy val resolved: Boolean = + childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec]) + + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the LOAD DATA INTO TABLE command. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala index 86ef867eca547..dfacf6e83ef57 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala @@ -20,7 +20,8 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.JavaConverters._ import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability} +import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionSpec, UnresolvedPartitionSpec} +import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability} import org.apache.spark.sql.util.CaseInsensitiveStringMap object DataSourceV2Implicits { @@ -52,6 +53,26 @@ object DataSourceV2Implicits { } } + def asPartitionable: SupportsPartitionManagement = { + table match { + case support: SupportsPartitionManagement => + support + case _ => + throw new AnalysisException( + s"Table does not support partition management: ${table.name}") + } + } + + def asAtomicPartitionable: SupportsAtomicPartitionManagement = { + table match { + case support: SupportsAtomicPartitionManagement => + support + case _ => + throw new AnalysisException( + s"Table does not support atomic partition management: ${table.name}") + } + } + def supports(capability: TableCapability): Boolean = table.capabilities.contains(capability) def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports) @@ -62,4 +83,12 @@ object DataSourceV2Implicits { new CaseInsensitiveStringMap(options.asJava) } } + + implicit class PartitionSpecsHelper(partSpecs: Seq[PartitionSpec]) { + def asUnresolvedPartitionSpecs: Seq[UnresolvedPartitionSpec] = + partSpecs.map(_.asInstanceOf[UnresolvedPartitionSpec]) + + def asResolvedPartitionSpecs: Seq[ResolvedPartitionSpec] = + partSpecs.map(_.asInstanceOf[ResolvedPartitionSpec]) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index be1ac56c4a4a3..cddc392cfa2d7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView} import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -1700,15 +1700,15 @@ class DDLParserSuite extends AnalysisTest { val parsed1 = parsePlan(sql1) val parsed2 = parsePlan(sql2) - val expected1 = AlterTableAddPartitionStatement( - Seq("a", "b", "c"), + val expected1 = AlterTableAddPartition( + UnresolvedTable(Seq("a", "b", "c")), Seq( - (Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")), - (Map("dt" -> "2009-09-09", "country" -> "uk"), None)), + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")), + UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)), ifNotExists = true) - val expected2 = AlterTableAddPartitionStatement( - Seq("a", "b", "c"), - Seq((Map("dt" -> "2008-08-08"), Some("loc"))), + val expected2 = AlterTableAddPartition( + UnresolvedTable(Seq("a", "b", "c")), + Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))), ifNotExists = false) comparePlans(parsed1, expected1) @@ -1773,11 +1773,11 @@ class DDLParserSuite extends AnalysisTest { assertUnsupported(sql1_view) assertUnsupported(sql2_view) - val expected1_table = AlterTableDropPartitionStatement( - Seq("table_name"), + val expected1_table = AlterTableDropPartition( + UnresolvedTable(Seq("table_name")), Seq( - Map("dt" -> "2008-08-08", "country" -> "us"), - Map("dt" -> "2009-09-09", "country" -> "uk")), + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), + UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), ifExists = true, purge = false, retainData = false) @@ -1789,9 +1789,9 @@ class DDLParserSuite extends AnalysisTest { comparePlans(parsed1_purge, expected1_purge) val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')" - val expected3_table = AlterTableDropPartitionStatement( - Seq("a", "b", "c"), - Seq(Map("ds" -> "2017-06-10")), + val expected3_table = AlterTableDropPartition( + UnresolvedTable(Seq("a", "b", "c")), + Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))), ifExists = true, purge = false, retainData = false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala new file mode 100644 index 0000000000000..aebfe5af41825 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util + +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier, Table} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.types.StructType + +class InMemoryPartitionTableCatalog extends InMemoryTableCatalog { + import CatalogV2Implicits._ + + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): Table = { + if (tables.containsKey(ident)) { + throw new TableAlreadyExistsException(ident) + } + + InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties) + + val table = new InMemoryAtomicPartitionTable( + s"$name.${ident.quoted}", schema, partitions, properties) + tables.put(ident, table) + namespaces.putIfAbsent(ident.namespace.toList, Map()) + table + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index ff25272aebb5b..bd9120a1fbe78 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} @@ -42,6 +42,7 @@ class ResolveSessionCatalog( extends Rule[LogicalPlan] with LookupCatalog { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.CatalogV2Util._ + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case AlterTableAddColumnsStatement( @@ -497,11 +498,10 @@ class ResolveSessionCatalog( v1TableName.asTableIdentifier, "ALTER TABLE RECOVER PARTITIONS") - case AlterTableAddPartitionStatement(tbl, partitionSpecsAndLocs, ifNotExists) => - val v1TableName = parseV1Table(tbl, "ALTER TABLE ADD PARTITION") + case AlterTableAddPartition(ResolvedV1TableIdentifier(ident), partSpecsAndLocs, ifNotExists) => AlterTableAddPartitionCommand( - v1TableName.asTableIdentifier, - partitionSpecsAndLocs, + ident.asTableIdentifier, + partSpecsAndLocs.asUnresolvedPartitionSpecs.map(spec => (spec.spec, spec.location)), ifNotExists) case AlterTableRenamePartitionStatement(tbl, from, to) => @@ -511,11 +511,11 @@ class ResolveSessionCatalog( from, to) - case AlterTableDropPartitionStatement(tbl, specs, ifExists, purge, retainData) => - val v1TableName = parseV1Table(tbl, "ALTER TABLE DROP PARTITION") + case AlterTableDropPartition( + ResolvedV1TableIdentifier(ident), specs, ifExists, purge, retainData) => AlterTableDropPartitionCommand( - v1TableName.asTableIdentifier, - specs, + ident.asTableIdentifier, + specs.asUnresolvedPartitionSpecs.map(_.spec), ifExists, purge, retainData) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala new file mode 100644 index 0000000000000..0171cdd9ca41a --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvedPartitionSpec} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement} + +/** + * Physical plan node for adding partitions of table. + */ +case class AlterTableAddPartitionExec( + table: SupportsPartitionManagement, + partSpecs: Seq[ResolvedPartitionSpec], + ignoreIfExists: Boolean) extends V2CommandExec { + import DataSourceV2Implicits._ + + override def output: Seq[Attribute] = Seq.empty + + override protected def run(): Seq[InternalRow] = { + val (existsParts, notExistsParts) = + partSpecs.partition(p => table.partitionExists(p.spec)) + + if (existsParts.nonEmpty && !ignoreIfExists) { + throw new PartitionsAlreadyExistException( + table.name(), existsParts.map(_.spec), table.partitionSchema()) + } + + notExistsParts match { + case Seq() => // Nothing will be done + case Seq(partitionSpec) => + val partProp = partitionSpec.location.map(loc => "location" -> loc).toMap + table.createPartition(partitionSpec.spec, partProp.asJava) + case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] => + val partIdents = notExistsParts.map(_.spec) + val partProps = notExistsParts.map(_.location.map(loc => "location" -> loc).toMap) + table.asAtomicPartitionable + .createPartitions( + partIdents.toArray, + partProps.map(_.asJava).toArray) + case _ => + throw new UnsupportedOperationException( + s"Nonatomic partition table ${table.name()} can not add multiple partitions.") + } + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala new file mode 100644 index 0000000000000..09a65804a05eb --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, ResolvedPartitionSpec} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement} + +/** + * Physical plan node for dropping partitions of table. + */ +case class AlterTableDropPartitionExec( + table: SupportsPartitionManagement, + partSpecs: Seq[ResolvedPartitionSpec], + ignoreIfNotExists: Boolean) extends V2CommandExec { + import DataSourceV2Implicits._ + + override def output: Seq[Attribute] = Seq.empty + + override protected def run(): Seq[InternalRow] = { + val (existsPartIdents, notExistsPartIdents) = + partSpecs.map(_.spec).partition(table.partitionExists) + + if (notExistsPartIdents.nonEmpty && !ignoreIfNotExists) { + throw new NoSuchPartitionsException( + table.name(), notExistsPartIdents, table.partitionSchema()) + } + + existsPartIdents match { + case Seq() => // Nothing will be done + case Seq(partIdent) => + table.dropPartition(partIdent) + case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] => + table.asAtomicPartitionable.dropPartitions(existsPartIdents.toArray) + case _ => + throw new UnsupportedOperationException( + s"Nonatomic partition table ${table.name()} can not drop multiple partitions.") + } + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 48fa88ed550b6..a82f86ea952d9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy @@ -283,6 +283,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) => throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.") + case AlterTableAddPartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfExists) => + AlterTableAddPartitionExec( + table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil + + case AlterTableDropPartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _, _) => + AlterTableDropPartitionExec( + table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil + case LoadData(_: ResolvedTable, _, _, _, _) => throw new AnalysisException("LOAD DATA is not supported for v2 tables.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala new file mode 100644 index 0000000000000..107d0ea47249d --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException} +import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits + +class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { + + import CatalogV2Implicits._ + import DataSourceV2Implicits._ + + + test("ALTER TABLE RECOVER PARTITIONS") { + val t = "testcat.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $t RECOVER PARTITIONS") + } + assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables")) + } + } + + test("ALTER TABLE ADD PARTITION") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + + val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) + assert(partMetadata.containsKey("location")) + assert(partMetadata.get("location") == "loc") + } + } + + test("ALTER TABLE ADD PARTITIONS") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql( + s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc' PARTITION (id=2) LOCATION 'loc1'") + + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2)))) + + val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) + assert(partMetadata.containsKey("location")) + assert(partMetadata.get("location") == "loc") + + val partMetadata1 = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(2))) + assert(partMetadata1.containsKey("location")) + assert(partMetadata1.get("location") == "loc1") + } + } + + test("ALTER TABLE ADD PARTITIONS: partition already exists") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql( + s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") + + assertThrows[PartitionsAlreadyExistException]( + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'")) + + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] + assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + + spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2)))) + } + } + + test("ALTER TABLE RENAME PARTITION") { + val t = "testcat.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + val e = intercept[AnalysisException] { + sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)") + } + assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables")) + } + } + + test("ALTER TABLE DROP PARTITION") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") + + val partTable = + catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) + } + } + + test("ALTER TABLE DROP PARTITIONS") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") + + val partTable = + catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) + assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + } + } + + test("ALTER TABLE DROP PARTITIONS: partition not exists") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + assertThrows[NoSuchPartitionsException]( + spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")) + + val partTable = + catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + assert(partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) + + spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) + assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 68de55f03ba83..c480df323ddc2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -24,7 +24,8 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME @@ -39,19 +40,16 @@ import org.apache.spark.util.Utils class DataSourceV2SQLSuite extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true) - with AlterTableTests { + with AlterTableTests with DatasourceV2SQLBase { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ private val v2Source = classOf[FakeV2Provider].getName override protected val v2Format = v2Source override protected val catalogAndNamespace = "testcat.ns1.ns2." private val defaultUser: String = Utils.getCurrentUserName() - private def catalog(name: String): CatalogPlugin = { - spark.sessionState.catalogManager.catalog(name) - } - protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { val tmpView = "tmp_view" withTempView(tmpView) { @@ -72,26 +70,6 @@ class DataSourceV2SQLSuite v2Catalog.loadTable(Identifier.of(namespace, nameParts.last)) } - before { - spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) - spark.conf.set( - "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName) - spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName) - spark.conf.set( - V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName) - - val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") - df.createOrReplaceTempView("source") - val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data") - df2.createOrReplaceTempView("source2") - } - - after { - spark.sessionState.catalog.reset() - spark.sessionState.catalogManager.reset() - spark.sessionState.conf.clear() - } - test("CreateTable: use v2 plan because catalog is set") { spark.sql("CREATE TABLE testcat.table_name (id bigint NOT NULL, data string) USING foo") @@ -2011,13 +1989,18 @@ class DataSourceV2SQLSuite } test("ALTER TABLE ADD PARTITION") { - val t = "testcat.ns1.ns2.tbl" + val t = "testpart.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - } - assert(e.message.contains("ALTER TABLE ADD PARTITION is only supported with v1 tables")) + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + + val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) + assert(partMetadata.containsKey("location")) + assert(partMetadata.get("location") == "loc") } } @@ -2032,14 +2015,16 @@ class DataSourceV2SQLSuite } } - test("ALTER TABLE DROP PARTITIONS") { - val t = "testcat.ns1.ns2.tbl" + test("ALTER TABLE DROP PARTITION") { + val t = "testpart.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE $t DROP PARTITION (id=1)") - } - assert(e.message.contains("ALTER TABLE DROP PARTITION is only supported with v1 tables")) + spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") + + val partTable = + catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala new file mode 100644 index 0000000000000..8922eea8e0ae6 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.connector.catalog.CatalogPlugin +import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION +import org.apache.spark.sql.test.SharedSparkSession + +trait DatasourceV2SQLBase + extends QueryTest with SharedSparkSession with BeforeAndAfter { + + protected def catalog(name: String): CatalogPlugin = { + spark.sessionState.catalogManager.catalog(name) + } + + before { + spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) + spark.conf.set("spark.sql.catalog.testpart", classOf[InMemoryPartitionTableCatalog].getName) + spark.conf.set( + "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName) + spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName) + spark.conf.set( + V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName) + + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data") + df2.createOrReplaceTempView("source2") + } + + after { + spark.sessionState.catalog.reset() + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.clear() + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index f5d6ea929a9aa..87a5cb9f73355 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -138,8 +138,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'") assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'") assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')") - assertNoSuchTable(s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')") - assertNoSuchTable(s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')") assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')") assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS") @@ -147,6 +145,12 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { assertAnalysisError( s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'", s"'$viewName' is a view not a table") + assertAnalysisError( + s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')", + s"$viewName is a temp view not table") + assertAnalysisError( + s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')", + s"$viewName is a temp view not table") // For the following v2 ALERT TABLE statements, unsupported operations are checked first // before resolving the relations. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 44c551cf4a4c1..1f15bd685b239 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -815,6 +815,11 @@ class HiveDDLSuite } } + private def assertAnalysisError(sqlText: String, message: String): Unit = { + val e = intercept[AnalysisException](sql(sqlText)) + assert(e.message.contains(message)) + } + private def assertErrorForAlterTableOnView(sqlText: String): Unit = { val message = intercept[AnalysisException](sql(sqlText)).getMessage assert(message.contains("Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead")) @@ -892,16 +897,18 @@ class HiveDDLSuite assertErrorForAlterTableOnView( s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')") - assertErrorForAlterTableOnView( - s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')") - - assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')") - assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RECOVER PARTITIONS") assertErrorForAlterTableOnView( s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')") + assertAnalysisError( + s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')", + s"$oldViewName is a view not table") + assertAnalysisError( + s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')", + s"$oldViewName is a view not table") + assert(catalog.tableExists(TableIdentifier(tabName))) assert(catalog.tableExists(TableIdentifier(oldViewName))) assert(!catalog.tableExists(TableIdentifier(newViewName))) From 4b76a74f1c0b5d9bd11794eefd739352764c88c4 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 12 Nov 2020 00:13:17 +0900 Subject: [PATCH 0446/1009] [SPARK-33415][PYTHON][SQL] Don't encode JVM response in Column.__repr__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Removes encoding of the JVM response in `pyspark.sql.column.Column.__repr__`. ### Why are the changes needed? API consistency and improved readability of the expressions. ### Does this PR introduce _any_ user-facing change? Before this change col("abc") col("wąż") result in Column Column After this change we'll get Column<'abc'> Column<'wąż'> ### How was this patch tested? Existing tests and manual inspection. Closes #30322 from zero323/SPARK-33415. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/pyspark/sql/column.py | 2 +- python/pyspark/sql/tests/test_column.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 3dd08d88e92c4..345e81bd2d73e 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -906,7 +906,7 @@ def __nonzero__(self): __bool__ = __nonzero__ def __repr__(self): - return 'Column<%s>' % self._jc.toString().encode('utf8') + return "Column<'%s'>" % self._jc.toString() def _test(): diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 4b4ac3bf9cd6c..4a9c7106a12b0 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -116,6 +116,7 @@ def test_column_name_with_non_ascii(self): self.assertEqual([("数量", 'bigint')], df.dtypes) self.assertEqual(1, df.select("数量").first()[0]) self.assertEqual(1, df.select(df["数量"]).first()[0]) + self.assertTrue(columnName in repr(df[columnName])) def test_field_accessor(self): df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF() From 7e867298fed60db670e40013524ed41b1ab46215 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 11 Nov 2020 08:50:43 -0800 Subject: [PATCH 0447/1009] [SPARK-33404][SQL][FOLLOWUP] Update benchmark results for `date_trunc` ### What changes were proposed in this pull request? Updated results of `DateTimeBenchmark` in the environment: | Item | Description | | ---- | ----| | Region | us-west-2 (Oregon) | | Instance | r3.xlarge (spot instance) | | AMI | ami-06f2f779464715dc5 (ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190722.1) | | Java | OpenJDK8/11 installed by`sudo add-apt-repository ppa:openjdk-r/ppa` & `sudo apt install openjdk-11-jdk`| ### Why are the changes needed? The fix https://github.com/apache/spark/pull/30303 slowed down `date_trunc`. This PR updates benchmark results to have actual info about performance of `date_trunc`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By regenerating benchmark results: ``` $ SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.DateTimeBenchmark" ``` Closes #30338 from MaxGekk/fix-trunc_date-benchmark. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../DateTimeBenchmark-jdk11-results.txt | 372 +++++++++--------- .../benchmarks/DateTimeBenchmark-results.txt | 372 +++++++++--------- 2 files changed, 372 insertions(+), 372 deletions(-) diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt index d84dccbf6c266..b787eff7029e6 100644 --- a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt @@ -2,460 +2,460 @@ datetime +/- interval ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz datetime +/- interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date + interval(m) 1550 1609 83 6.5 155.0 1.0X -date + interval(m, d) 1572 1575 5 6.4 157.2 1.0X -date + interval(m, d, ms) 6512 6512 0 1.5 651.2 0.2X -date - interval(m) 1469 1489 28 6.8 146.9 1.1X -date - interval(m, d) 1558 1572 19 6.4 155.8 1.0X -date - interval(m, d, ms) 6602 6605 4 1.5 660.2 0.2X -timestamp + interval(m) 2945 2961 23 3.4 294.5 0.5X -timestamp + interval(m, d) 3075 3083 12 3.3 307.5 0.5X -timestamp + interval(m, d, ms) 3421 3430 13 2.9 342.1 0.5X -timestamp - interval(m) 3050 3061 17 3.3 305.0 0.5X -timestamp - interval(m, d) 3195 3201 8 3.1 319.5 0.5X -timestamp - interval(m, d, ms) 3442 3450 11 2.9 344.2 0.5X +date + interval(m) 1556 1667 157 6.4 155.6 1.0X +date + interval(m, d) 1582 1593 16 6.3 158.2 1.0X +date + interval(m, d, ms) 6619 6625 9 1.5 661.9 0.2X +date - interval(m) 1463 1475 16 6.8 146.3 1.1X +date - interval(m, d) 1569 1589 29 6.4 156.9 1.0X +date - interval(m, d, ms) 6638 6641 5 1.5 663.8 0.2X +timestamp + interval(m) 3153 3159 7 3.2 315.3 0.5X +timestamp + interval(m, d) 3230 3234 7 3.1 323.0 0.5X +timestamp + interval(m, d, ms) 3309 3313 5 3.0 330.9 0.5X +timestamp - interval(m) 2897 2900 4 3.5 289.7 0.5X +timestamp - interval(m, d) 3018 3019 1 3.3 301.8 0.5X +timestamp - interval(m, d, ms) 3313 3317 5 3.0 331.3 0.5X ================================================================================================ Extract components ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast to timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp wholestage off 320 326 8 31.2 32.0 1.0X -cast to timestamp wholestage on 289 297 5 34.6 28.9 1.1X +cast to timestamp wholestage off 314 319 7 31.8 31.4 1.0X +cast to timestamp wholestage on 289 305 12 34.6 28.9 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz year of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -year of timestamp wholestage off 1266 1266 1 7.9 126.6 1.0X -year of timestamp wholestage on 1233 1253 15 8.1 123.3 1.0X +year of timestamp wholestage off 1237 1247 14 8.1 123.7 1.0X +year of timestamp wholestage on 1242 1251 11 8.0 124.2 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz quarter of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -quarter of timestamp wholestage off 1594 1600 8 6.3 159.4 1.0X -quarter of timestamp wholestage on 1529 1532 3 6.5 152.9 1.0X +quarter of timestamp wholestage off 1589 1590 2 6.3 158.9 1.0X +quarter of timestamp wholestage on 1541 1556 11 6.5 154.1 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz month of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -month of timestamp wholestage off 1239 1257 25 8.1 123.9 1.0X -month of timestamp wholestage on 1235 1243 5 8.1 123.5 1.0X +month of timestamp wholestage off 1236 1252 23 8.1 123.6 1.0X +month of timestamp wholestage on 1226 1232 5 8.2 122.6 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz weekofyear of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -weekofyear of timestamp wholestage off 2209 2216 9 4.5 220.9 1.0X -weekofyear of timestamp wholestage on 1831 1838 9 5.5 183.1 1.2X +weekofyear of timestamp wholestage off 1877 1879 3 5.3 187.7 1.0X +weekofyear of timestamp wholestage on 1852 1872 28 5.4 185.2 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz day of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -day of timestamp wholestage off 1238 1238 0 8.1 123.8 1.0X -day of timestamp wholestage on 1223 1235 12 8.2 122.3 1.0X +day of timestamp wholestage off 1260 1262 3 7.9 126.0 1.0X +day of timestamp wholestage on 1230 1238 9 8.1 123.0 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofyear of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofyear of timestamp wholestage off 1302 1304 3 7.7 130.2 1.0X -dayofyear of timestamp wholestage on 1269 1276 6 7.9 126.9 1.0X +dayofyear of timestamp wholestage off 1281 1285 7 7.8 128.1 1.0X +dayofyear of timestamp wholestage on 1268 1272 6 7.9 126.8 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofmonth of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofmonth of timestamp wholestage off 1251 1253 3 8.0 125.1 1.0X -dayofmonth of timestamp wholestage on 1225 1232 9 8.2 122.5 1.0X +dayofmonth of timestamp wholestage off 1280 1287 9 7.8 128.0 1.0X +dayofmonth of timestamp wholestage on 1232 1237 5 8.1 123.2 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofweek of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofweek of timestamp wholestage off 1424 1424 1 7.0 142.4 1.0X -dayofweek of timestamp wholestage on 1385 1389 4 7.2 138.5 1.0X +dayofweek of timestamp wholestage off 1417 1419 4 7.1 141.7 1.0X +dayofweek of timestamp wholestage on 1419 1435 19 7.0 141.9 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz weekday of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -weekday of timestamp wholestage off 1366 1366 0 7.3 136.6 1.0X -weekday of timestamp wholestage on 1320 1325 5 7.6 132.0 1.0X +weekday of timestamp wholestage off 1353 1359 8 7.4 135.3 1.0X +weekday of timestamp wholestage on 1338 1345 7 7.5 133.8 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz hour of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -hour of timestamp wholestage off 985 986 1 10.2 98.5 1.0X -hour of timestamp wholestage on 974 981 10 10.3 97.4 1.0X +hour of timestamp wholestage off 985 998 17 10.1 98.5 1.0X +hour of timestamp wholestage on 935 938 3 10.7 93.5 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz minute of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -minute of timestamp wholestage off 1044 1047 5 9.6 104.4 1.0X -minute of timestamp wholestage on 984 994 17 10.2 98.4 1.1X +minute of timestamp wholestage off 1053 1053 0 9.5 105.3 1.0X +minute of timestamp wholestage on 934 940 9 10.7 93.4 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz second of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -second of timestamp wholestage off 999 1003 6 10.0 99.9 1.0X -second of timestamp wholestage on 961 974 8 10.4 96.1 1.0X +second of timestamp wholestage off 978 983 7 10.2 97.8 1.0X +second of timestamp wholestage on 935 944 9 10.7 93.5 1.0X ================================================================================================ Current date and time ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz current_date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -current_date wholestage off 297 302 7 33.6 29.7 1.0X -current_date wholestage on 270 283 22 37.1 27.0 1.1X +current_date wholestage off 297 299 2 33.6 29.7 1.0X +current_date wholestage on 273 283 11 36.6 27.3 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz current_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -current_timestamp wholestage off 302 310 11 33.1 30.2 1.0X -current_timestamp wholestage on 264 351 98 37.9 26.4 1.1X +current_timestamp wholestage off 300 365 92 33.4 30.0 1.0X +current_timestamp wholestage on 276 381 91 36.3 27.6 1.1X ================================================================================================ Date arithmetic ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast to date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date wholestage off 1083 1083 1 9.2 108.3 1.0X -cast to date wholestage on 1040 1044 5 9.6 104.0 1.0X +cast to date wholestage off 1073 1087 20 9.3 107.3 1.0X +cast to date wholestage on 1009 1016 7 9.9 100.9 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz last_day: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -last_day wholestage off 1258 1258 0 7.9 125.8 1.0X -last_day wholestage on 1244 1254 8 8.0 124.4 1.0X +last_day wholestage off 1253 1254 2 8.0 125.3 1.0X +last_day wholestage on 1247 1257 10 8.0 124.7 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz next_day: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -next_day wholestage off 1133 1135 3 8.8 113.3 1.0X -next_day wholestage on 1093 1100 7 9.1 109.3 1.0X +next_day wholestage off 1150 1150 1 8.7 115.0 1.0X +next_day wholestage on 1061 1066 5 9.4 106.1 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_add: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_add wholestage off 1065 1074 14 9.4 106.5 1.0X -date_add wholestage on 1044 1053 6 9.6 104.4 1.0X +date_add wholestage off 1062 1068 9 9.4 106.2 1.0X +date_add wholestage on 1049 1056 8 9.5 104.9 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_sub: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_sub wholestage off 1069 1076 9 9.4 106.9 1.0X -date_sub wholestage on 1047 1052 8 9.6 104.7 1.0X +date_sub wholestage off 1063 1067 6 9.4 106.3 1.0X +date_sub wholestage on 1043 1061 26 9.6 104.3 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz add_months: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -add_months wholestage off 1417 1430 18 7.1 141.7 1.0X -add_months wholestage on 1439 1445 5 6.9 143.9 1.0X +add_months wholestage off 1427 1434 10 7.0 142.7 1.0X +add_months wholestage on 1436 1449 11 7.0 143.6 1.0X ================================================================================================ Formatting dates ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz format date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -format date wholestage off 5228 5232 6 1.9 522.8 1.0X -format date wholestage on 5172 5193 17 1.9 517.2 1.0X +format date wholestage off 5200 5214 19 1.9 520.0 1.0X +format date wholestage on 5404 5424 14 1.9 540.4 1.0X ================================================================================================ Formatting timestamps ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz from_unixtime: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -from_unixtime wholestage off 6941 6952 16 1.4 694.1 1.0X -from_unixtime wholestage on 6898 6926 32 1.4 689.8 1.0X +from_unixtime wholestage off 7493 7494 2 1.3 749.3 1.0X +from_unixtime wholestage on 7506 7514 7 1.3 750.6 1.0X ================================================================================================ Convert timestamps ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz from_utc_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -from_utc_timestamp wholestage off 1339 1342 5 7.5 133.9 1.0X -from_utc_timestamp wholestage on 1285 1292 5 7.8 128.5 1.0X +from_utc_timestamp wholestage off 1314 1317 4 7.6 131.4 1.0X +from_utc_timestamp wholestage on 1273 1279 6 7.9 127.3 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_utc_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_utc_timestamp wholestage off 1697 1717 29 5.9 169.7 1.0X -to_utc_timestamp wholestage on 1656 1665 13 6.0 165.6 1.0X +to_utc_timestamp wholestage off 1751 1752 1 5.7 175.1 1.0X +to_utc_timestamp wholestage on 1711 1716 6 5.8 171.1 1.0X ================================================================================================ Intervals ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast interval wholestage off 333 344 16 30.1 33.3 1.0X -cast interval wholestage on 288 290 2 34.7 28.8 1.2X +cast interval wholestage off 332 337 7 30.1 33.2 1.0X +cast interval wholestage on 288 289 1 34.7 28.8 1.2X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz datediff: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -datediff wholestage off 1857 1860 4 5.4 185.7 1.0X -datediff wholestage on 1795 1808 10 5.6 179.5 1.0X +datediff wholestage off 1850 1852 3 5.4 185.0 1.0X +datediff wholestage on 1783 1791 5 5.6 178.3 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz months_between: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -months_between wholestage off 5826 5834 11 1.7 582.6 1.0X -months_between wholestage on 5737 5763 18 1.7 573.7 1.0X +months_between wholestage off 5540 5545 8 1.8 554.0 1.0X +months_between wholestage on 5474 5482 8 1.8 547.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz window: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -window wholestage off 2220 2246 36 0.5 2220.4 1.0X -window wholestage on 46696 46794 89 0.0 46696.1 0.0X +window wholestage off 2200 2309 154 0.5 2200.0 1.0X +window wholestage on 47429 47483 35 0.0 47428.8 0.0X ================================================================================================ Truncation ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YEAR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YEAR wholestage off 2658 2659 1 3.8 265.8 1.0X -date_trunc YEAR wholestage on 2691 2700 8 3.7 269.1 1.0X +date_trunc YEAR wholestage off 2587 2591 5 3.9 258.7 1.0X +date_trunc YEAR wholestage on 2531 2548 11 4.0 253.1 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YYYY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YYYY wholestage off 2671 2679 11 3.7 267.1 1.0X -date_trunc YYYY wholestage on 2700 2706 6 3.7 270.0 1.0X +date_trunc YYYY wholestage off 2595 2596 1 3.9 259.5 1.0X +date_trunc YYYY wholestage on 2532 2537 9 3.9 253.2 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YY wholestage off 2674 2689 20 3.7 267.4 1.0X -date_trunc YY wholestage on 2697 2716 17 3.7 269.7 1.0X +date_trunc YY wholestage off 2604 2604 1 3.8 260.4 1.0X +date_trunc YY wholestage on 2529 2539 7 4.0 252.9 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MON: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MON wholestage off 2695 2700 7 3.7 269.5 1.0X -date_trunc MON wholestage on 2711 2722 11 3.7 271.1 1.0X +date_trunc MON wholestage off 2601 2606 7 3.8 260.1 1.0X +date_trunc MON wholestage on 2544 2551 5 3.9 254.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MONTH: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MONTH wholestage off 2682 2685 4 3.7 268.2 1.0X -date_trunc MONTH wholestage on 2709 2727 15 3.7 270.9 1.0X +date_trunc MONTH wholestage off 2596 2597 1 3.9 259.6 1.0X +date_trunc MONTH wholestage on 2547 2552 8 3.9 254.7 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MM: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MM wholestage off 2683 2693 14 3.7 268.3 1.0X -date_trunc MM wholestage on 2706 2722 16 3.7 270.6 1.0X +date_trunc MM wholestage off 2598 2598 1 3.8 259.8 1.0X +date_trunc MM wholestage on 2545 2550 5 3.9 254.5 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc DAY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc DAY wholestage off 2292 2299 10 4.4 229.2 1.0X -date_trunc DAY wholestage on 2290 2311 14 4.4 229.0 1.0X +date_trunc DAY wholestage off 2248 2249 2 4.4 224.8 1.0X +date_trunc DAY wholestage on 2215 2222 6 4.5 221.5 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc DD: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc DD wholestage off 2302 2309 9 4.3 230.2 1.0X -date_trunc DD wholestage on 2282 2292 6 4.4 228.2 1.0X +date_trunc DD wholestage off 2244 2251 9 4.5 224.4 1.0X +date_trunc DD wholestage on 2214 2220 6 4.5 221.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc HOUR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc HOUR wholestage off 2288 2288 0 4.4 228.8 1.0X -date_trunc HOUR wholestage on 2277 2290 14 4.4 227.7 1.0X +date_trunc HOUR wholestage off 2208 2211 3 4.5 220.8 1.0X +date_trunc HOUR wholestage on 2228 2233 3 4.5 222.8 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MINUTE: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MINUTE wholestage off 400 419 26 25.0 40.0 1.0X -date_trunc MINUTE wholestage on 401 405 4 24.9 40.1 1.0X +date_trunc MINUTE wholestage off 2230 2238 11 4.5 223.0 1.0X +date_trunc MINUTE wholestage on 2217 2225 11 4.5 221.7 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc SECOND: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc SECOND wholestage off 408 414 9 24.5 40.8 1.0X -date_trunc SECOND wholestage on 408 413 8 24.5 40.8 1.0X +date_trunc SECOND wholestage off 353 362 12 28.3 35.3 1.0X +date_trunc SECOND wholestage on 333 336 3 30.0 33.3 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc WEEK: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc WEEK wholestage off 2623 2631 12 3.8 262.3 1.0X -date_trunc WEEK wholestage on 2613 2621 8 3.8 261.3 1.0X +date_trunc WEEK wholestage off 2473 2478 7 4.0 247.3 1.0X +date_trunc WEEK wholestage on 2439 2462 33 4.1 243.9 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc QUARTER: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc QUARTER wholestage off 3518 3520 3 2.8 351.8 1.0X -date_trunc QUARTER wholestage on 3501 3510 11 2.9 350.1 1.0X +date_trunc QUARTER wholestage off 3163 3165 3 3.2 316.3 1.0X +date_trunc QUARTER wholestage on 3129 3142 13 3.2 312.9 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc year: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc year wholestage off 315 333 26 31.8 31.5 1.0X -trunc year wholestage on 352 360 7 28.4 35.2 0.9X +trunc year wholestage off 309 311 3 32.4 30.9 1.0X +trunc year wholestage on 325 332 4 30.8 32.5 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc yyyy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc yyyy wholestage off 321 321 1 31.2 32.1 1.0X -trunc yyyy wholestage on 354 358 5 28.3 35.4 0.9X +trunc yyyy wholestage off 319 320 2 31.4 31.9 1.0X +trunc yyyy wholestage on 324 328 4 30.9 32.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc yy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc yy wholestage off 312 313 1 32.0 31.2 1.0X -trunc yy wholestage on 355 360 5 28.2 35.5 0.9X +trunc yy wholestage off 311 313 3 32.2 31.1 1.0X +trunc yy wholestage on 324 330 4 30.8 32.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc mon: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc mon wholestage off 324 327 4 30.9 32.4 1.0X -trunc mon wholestage on 355 357 2 28.2 35.5 0.9X +trunc mon wholestage off 310 313 4 32.2 31.0 1.0X +trunc mon wholestage on 326 329 4 30.7 32.6 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc month: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc month wholestage off 313 318 8 32.0 31.3 1.0X -trunc month wholestage on 354 358 5 28.3 35.4 0.9X +trunc month wholestage off 308 318 13 32.4 30.8 1.0X +trunc month wholestage on 324 326 3 30.9 32.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc mm: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc mm wholestage off 314 325 15 31.8 31.4 1.0X -trunc mm wholestage on 353 366 17 28.4 35.3 0.9X +trunc mm wholestage off 309 314 7 32.4 30.9 1.0X +trunc mm wholestage on 323 329 5 31.0 32.3 1.0X ================================================================================================ Parsing ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to timestamp str: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to timestamp str wholestage off 168 169 0 5.9 168.4 1.0X -to timestamp str wholestage on 168 173 7 6.0 167.6 1.0X +to timestamp str wholestage off 172 174 2 5.8 172.4 1.0X +to timestamp str wholestage on 171 174 4 5.9 170.6 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_timestamp wholestage off 1390 1390 0 0.7 1389.8 1.0X -to_timestamp wholestage on 1204 1215 11 0.8 1204.2 1.2X +to_timestamp wholestage off 1410 1411 2 0.7 1410.4 1.0X +to_timestamp wholestage on 1364 1375 10 0.7 1364.4 1.0X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_unix_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_unix_timestamp wholestage off 1277 1281 4 0.8 1277.5 1.0X -to_unix_timestamp wholestage on 1203 1213 11 0.8 1202.6 1.1X +to_unix_timestamp wholestage off 1449 1453 6 0.7 1449.2 1.0X +to_unix_timestamp wholestage on 1379 1389 9 0.7 1379.5 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to date str: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to date str wholestage off 218 219 1 4.6 218.2 1.0X -to date str wholestage on 211 214 5 4.7 210.8 1.0X +to date str wholestage off 228 231 4 4.4 228.1 1.0X +to date str wholestage on 211 213 1 4.7 210.6 1.1X -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_date wholestage off 3016 3041 35 0.3 3016.1 1.0X -to_date wholestage on 3015 3023 9 0.3 3014.6 1.0X +to_date wholestage off 3147 3173 37 0.3 3147.0 1.0X +to_date wholestage on 3123 3137 13 0.3 3123.0 1.0X ================================================================================================ Conversion from/to external types ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz To/from Java's date-time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -From java.sql.Date 430 442 18 11.6 86.0 1.0X -From java.time.LocalDate 351 354 3 14.3 70.2 1.2X -Collect java.sql.Date 2095 2853 733 2.4 418.9 0.2X -Collect java.time.LocalDate 1691 1910 209 3.0 338.3 0.3X -From java.sql.Timestamp 276 280 4 18.1 55.2 1.6X -From java.time.Instant 324 328 4 15.4 64.8 1.3X -Collect longs 1348 1450 126 3.7 269.5 0.3X -Collect java.sql.Timestamp 1441 1478 62 3.5 288.3 0.3X -Collect java.time.Instant 1471 1579 100 3.4 294.3 0.3X -java.sql.Date to Hive string 12049 12909 862 0.4 2409.8 0.0X -java.time.LocalDate to Hive string 12045 12130 74 0.4 2408.9 0.0X -java.sql.Timestamp to Hive string 12854 13376 510 0.4 2570.9 0.0X -java.time.Instant to Hive string 15057 15184 115 0.3 3011.4 0.0X +From java.sql.Date 403 414 13 12.4 80.6 1.0X +From java.time.LocalDate 342 346 4 14.6 68.4 1.2X +Collect java.sql.Date 2122 2549 639 2.4 424.4 0.2X +Collect java.time.LocalDate 1833 2034 175 2.7 366.5 0.2X +From java.sql.Timestamp 244 250 6 20.5 48.8 1.7X +From java.time.Instant 315 316 1 15.9 63.0 1.3X +Collect longs 1436 1452 19 3.5 287.2 0.3X +Collect java.sql.Timestamp 1685 1698 14 3.0 337.0 0.2X +Collect java.time.Instant 1722 2022 278 2.9 344.4 0.2X +java.sql.Date to Hive string 14996 16316 1670 0.3 2999.2 0.0X +java.time.LocalDate to Hive string 13774 13942 160 0.4 2754.8 0.0X +java.sql.Timestamp to Hive string 15346 15775 435 0.3 3069.3 0.0X +java.time.Instant to Hive string 17731 18153 444 0.3 3546.1 0.0X diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt index ebfcb45f30ce0..8e22dbbd8b8b3 100644 --- a/sql/core/benchmarks/DateTimeBenchmark-results.txt +++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt @@ -2,460 +2,460 @@ datetime +/- interval ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz datetime +/- interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date + interval(m) 1636 1653 24 6.1 163.6 1.0X -date + interval(m, d) 1802 1818 23 5.5 180.2 0.9X -date + interval(m, d, ms) 6330 6348 26 1.6 633.0 0.3X -date - interval(m) 1462 1484 32 6.8 146.2 1.1X -date - interval(m, d) 1732 1732 1 5.8 173.2 0.9X -date - interval(m, d, ms) 6494 6505 16 1.5 649.4 0.3X -timestamp + interval(m) 2446 2446 0 4.1 244.6 0.7X -timestamp + interval(m, d) 2670 2703 46 3.7 267.0 0.6X -timestamp + interval(m, d, ms) 2992 3012 29 3.3 299.2 0.5X -timestamp - interval(m) 2447 2449 3 4.1 244.7 0.7X -timestamp - interval(m, d) 2739 2739 0 3.7 273.9 0.6X -timestamp - interval(m, d, ms) 2977 2983 8 3.4 297.7 0.5X +date + interval(m) 1651 1690 56 6.1 165.1 1.0X +date + interval(m, d) 1826 1833 10 5.5 182.6 0.9X +date + interval(m, d, ms) 6522 6534 17 1.5 652.2 0.3X +date - interval(m) 1465 1473 12 6.8 146.5 1.1X +date - interval(m, d) 1728 1734 9 5.8 172.8 1.0X +date - interval(m, d, ms) 6757 6765 12 1.5 675.7 0.2X +timestamp + interval(m) 2686 2696 14 3.7 268.6 0.6X +timestamp + interval(m, d) 2979 2982 4 3.4 297.9 0.6X +timestamp + interval(m, d, ms) 3483 3507 33 2.9 348.3 0.5X +timestamp - interval(m) 2856 2858 3 3.5 285.6 0.6X +timestamp - interval(m, d) 3167 3169 3 3.2 316.7 0.5X +timestamp - interval(m, d, ms) 3475 3477 2 2.9 347.5 0.5X ================================================================================================ Extract components ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast to timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp wholestage off 312 321 13 32.1 31.2 1.0X -cast to timestamp wholestage on 290 311 14 34.5 29.0 1.1X +cast to timestamp wholestage off 309 312 5 32.4 30.9 1.0X +cast to timestamp wholestage on 292 302 8 34.2 29.2 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz year of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -year of timestamp wholestage off 1226 1228 3 8.2 122.6 1.0X -year of timestamp wholestage on 1214 1222 10 8.2 121.4 1.0X +year of timestamp wholestage off 1228 1228 0 8.1 122.8 1.0X +year of timestamp wholestage on 1213 1227 18 8.2 121.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz quarter of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -quarter of timestamp wholestage off 1437 1447 14 7.0 143.7 1.0X -quarter of timestamp wholestage on 1354 1359 4 7.4 135.4 1.1X +quarter of timestamp wholestage off 1433 1440 9 7.0 143.3 1.0X +quarter of timestamp wholestage on 1344 1349 4 7.4 134.4 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz month of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -month of timestamp wholestage off 1219 1219 1 8.2 121.9 1.0X -month of timestamp wholestage on 1205 1211 7 8.3 120.5 1.0X +month of timestamp wholestage off 1229 1232 5 8.1 122.9 1.0X +month of timestamp wholestage on 1201 1207 6 8.3 120.1 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz weekofyear of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -weekofyear of timestamp wholestage off 1849 1854 7 5.4 184.9 1.0X -weekofyear of timestamp wholestage on 1829 1835 5 5.5 182.9 1.0X +weekofyear of timestamp wholestage off 1921 1931 14 5.2 192.1 1.0X +weekofyear of timestamp wholestage on 1864 1881 16 5.4 186.4 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz day of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -day of timestamp wholestage off 1224 1230 8 8.2 122.4 1.0X -day of timestamp wholestage on 1204 1215 10 8.3 120.4 1.0X +day of timestamp wholestage off 1223 1225 2 8.2 122.3 1.0X +day of timestamp wholestage on 1204 1215 7 8.3 120.4 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofyear of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofyear of timestamp wholestage off 1272 1275 5 7.9 127.2 1.0X -dayofyear of timestamp wholestage on 1246 1256 7 8.0 124.6 1.0X +dayofyear of timestamp wholestage off 1261 1266 8 7.9 126.1 1.0X +dayofyear of timestamp wholestage on 1236 1260 15 8.1 123.6 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofmonth of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofmonth of timestamp wholestage off 1226 1233 11 8.2 122.6 1.0X -dayofmonth of timestamp wholestage on 1205 1211 5 8.3 120.5 1.0X +dayofmonth of timestamp wholestage off 1243 1250 10 8.0 124.3 1.0X +dayofmonth of timestamp wholestage on 1203 1214 11 8.3 120.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz dayofweek of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -dayofweek of timestamp wholestage off 1420 1427 9 7.0 142.0 1.0X -dayofweek of timestamp wholestage on 1375 1385 11 7.3 137.5 1.0X +dayofweek of timestamp wholestage off 1400 1409 13 7.1 140.0 1.0X +dayofweek of timestamp wholestage on 1374 1385 10 7.3 137.4 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz weekday of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -weekday of timestamp wholestage off 1345 1347 3 7.4 134.5 1.0X -weekday of timestamp wholestage on 1316 1322 5 7.6 131.6 1.0X +weekday of timestamp wholestage off 1355 1358 4 7.4 135.5 1.0X +weekday of timestamp wholestage on 1319 1328 8 7.6 131.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz hour of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -hour of timestamp wholestage off 983 984 1 10.2 98.3 1.0X -hour of timestamp wholestage on 942 953 8 10.6 94.2 1.0X +hour of timestamp wholestage off 970 973 4 10.3 97.0 1.0X +hour of timestamp wholestage on 950 957 9 10.5 95.0 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz minute of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -minute of timestamp wholestage off 1008 1010 3 9.9 100.8 1.0X -minute of timestamp wholestage on 942 945 3 10.6 94.2 1.1X +minute of timestamp wholestage off 1017 1019 3 9.8 101.7 1.0X +minute of timestamp wholestage on 948 951 2 10.5 94.8 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz second of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -second of timestamp wholestage off 975 976 1 10.3 97.5 1.0X -second of timestamp wholestage on 938 944 4 10.7 93.8 1.0X +second of timestamp wholestage off 965 966 2 10.4 96.5 1.0X +second of timestamp wholestage on 943 946 2 10.6 94.3 1.0X ================================================================================================ Current date and time ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz current_date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -current_date wholestage off 295 296 2 33.9 29.5 1.0X -current_date wholestage on 267 274 6 37.5 26.7 1.1X +current_date wholestage off 296 296 0 33.8 29.6 1.0X +current_date wholestage on 271 277 7 36.9 27.1 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz current_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -current_timestamp wholestage off 298 303 7 33.5 29.8 1.0X -current_timestamp wholestage on 261 275 12 38.2 26.1 1.1X +current_timestamp wholestage off 307 329 32 32.6 30.7 1.0X +current_timestamp wholestage on 259 314 96 38.7 25.9 1.2X ================================================================================================ Date arithmetic ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast to date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date wholestage off 1071 1073 3 9.3 107.1 1.0X -cast to date wholestage on 998 1014 31 10.0 99.8 1.1X +cast to date wholestage off 1075 1077 3 9.3 107.5 1.0X +cast to date wholestage on 997 1002 5 10.0 99.7 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz last_day: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -last_day wholestage off 1260 1261 1 7.9 126.0 1.0X -last_day wholestage on 1245 1261 17 8.0 124.5 1.0X +last_day wholestage off 1259 1261 3 7.9 125.9 1.0X +last_day wholestage on 1231 1242 11 8.1 123.1 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz next_day: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -next_day wholestage off 1118 1120 2 8.9 111.8 1.0X -next_day wholestage on 1043 1047 3 9.6 104.3 1.1X +next_day wholestage off 1121 1123 3 8.9 112.1 1.0X +next_day wholestage on 1043 1049 6 9.6 104.3 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_add: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_add wholestage off 1046 1048 3 9.6 104.6 1.0X -date_add wholestage on 1040 1048 11 9.6 104.0 1.0X +date_add wholestage off 1043 1044 2 9.6 104.3 1.0X +date_add wholestage on 1026 1030 5 9.7 102.6 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_sub: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_sub wholestage off 1081 1081 0 9.3 108.1 1.0X -date_sub wholestage on 1030 1035 6 9.7 103.0 1.0X +date_sub wholestage off 1058 1062 6 9.5 105.8 1.0X +date_sub wholestage on 1024 1027 3 9.8 102.4 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz add_months: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -add_months wholestage off 1393 1400 10 7.2 139.3 1.0X -add_months wholestage on 1391 1396 5 7.2 139.1 1.0X +add_months wholestage off 1403 1404 2 7.1 140.3 1.0X +add_months wholestage on 1394 1399 5 7.2 139.4 1.0X ================================================================================================ Formatting dates ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz format date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -format date wholestage off 5424 5426 2 1.8 542.4 1.0X -format date wholestage on 5408 5448 37 1.8 540.8 1.0X +format date wholestage off 5730 5736 8 1.7 573.0 1.0X +format date wholestage on 6159 6184 26 1.6 615.9 0.9X ================================================================================================ Formatting timestamps ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz from_unixtime: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -from_unixtime wholestage off 8839 8841 3 1.1 883.9 1.0X -from_unixtime wholestage on 8788 8826 24 1.1 878.8 1.0X +from_unixtime wholestage off 8718 8725 10 1.1 871.8 1.0X +from_unixtime wholestage on 8648 8668 17 1.2 864.8 1.0X ================================================================================================ Convert timestamps ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz from_utc_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -from_utc_timestamp wholestage off 1105 1111 8 9.0 110.5 1.0X -from_utc_timestamp wholestage on 1073 1081 8 9.3 107.3 1.0X +from_utc_timestamp wholestage off 1174 1180 8 8.5 117.4 1.0X +from_utc_timestamp wholestage on 1084 1093 6 9.2 108.4 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_utc_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_utc_timestamp wholestage off 1462 1465 4 6.8 146.2 1.0X -to_utc_timestamp wholestage on 1394 1408 13 7.2 139.4 1.0X +to_utc_timestamp wholestage off 1567 1567 0 6.4 156.7 1.0X +to_utc_timestamp wholestage on 1509 1528 13 6.6 150.9 1.0X ================================================================================================ Intervals ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz cast interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast interval wholestage off 325 328 4 30.8 32.5 1.0X -cast interval wholestage on 286 290 3 35.0 28.6 1.1X +cast interval wholestage off 328 332 5 30.4 32.8 1.0X +cast interval wholestage on 286 290 5 35.0 28.6 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz datediff: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -datediff wholestage off 1822 1824 3 5.5 182.2 1.0X -datediff wholestage on 1757 1761 5 5.7 175.7 1.0X +datediff wholestage off 1832 1833 2 5.5 183.2 1.0X +datediff wholestage on 1757 1761 3 5.7 175.7 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz months_between: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -months_between wholestage off 4886 4893 10 2.0 488.6 1.0X -months_between wholestage on 4785 4799 12 2.1 478.5 1.0X +months_between wholestage off 5040 5049 13 2.0 504.0 1.0X +months_between wholestage on 4943 4950 5 2.0 494.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz window: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -window wholestage off 2024 2052 40 0.5 2023.7 1.0X -window wholestage on 46599 46660 45 0.0 46599.0 0.0X +window wholestage off 1779 1855 107 0.6 1778.6 1.0X +window wholestage on 46705 46754 43 0.0 46705.1 0.0X ================================================================================================ Truncation ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YEAR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YEAR wholestage off 2361 2366 7 4.2 236.1 1.0X -date_trunc YEAR wholestage on 2325 2328 3 4.3 232.5 1.0X +date_trunc YEAR wholestage off 2485 2497 17 4.0 248.5 1.0X +date_trunc YEAR wholestage on 2403 2420 20 4.2 240.3 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YYYY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YYYY wholestage off 2366 2374 12 4.2 236.6 1.0X -date_trunc YYYY wholestage on 2316 2328 13 4.3 231.6 1.0X +date_trunc YYYY wholestage off 2498 2502 5 4.0 249.8 1.0X +date_trunc YYYY wholestage on 2399 2401 2 4.2 239.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc YY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc YY wholestage off 2359 2359 0 4.2 235.9 1.0X -date_trunc YY wholestage on 2315 2325 7 4.3 231.5 1.0X +date_trunc YY wholestage off 2492 2493 3 4.0 249.2 1.0X +date_trunc YY wholestage on 2399 2404 6 4.2 239.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MON: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MON wholestage off 2360 2369 12 4.2 236.0 1.0X -date_trunc MON wholestage on 2306 2314 9 4.3 230.6 1.0X +date_trunc MON wholestage off 2454 2455 1 4.1 245.4 1.0X +date_trunc MON wholestage on 2412 2417 5 4.1 241.2 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MONTH: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MONTH wholestage off 2359 2360 2 4.2 235.9 1.0X -date_trunc MONTH wholestage on 2304 2308 4 4.3 230.4 1.0X +date_trunc MONTH wholestage off 2449 2450 1 4.1 244.9 1.0X +date_trunc MONTH wholestage on 2409 2414 7 4.2 240.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MM: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MM wholestage off 2356 2358 2 4.2 235.6 1.0X -date_trunc MM wholestage on 2302 2309 6 4.3 230.2 1.0X +date_trunc MM wholestage off 2445 2450 7 4.1 244.5 1.0X +date_trunc MM wholestage on 2409 2412 4 4.2 240.9 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc DAY: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc DAY wholestage off 1962 1964 3 5.1 196.2 1.0X -date_trunc DAY wholestage on 1916 1921 6 5.2 191.6 1.0X +date_trunc DAY wholestage off 2158 2165 10 4.6 215.8 1.0X +date_trunc DAY wholestage on 2039 2045 6 4.9 203.9 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc DD: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc DD wholestage off 1956 1957 2 5.1 195.6 1.0X -date_trunc DD wholestage on 1916 1922 6 5.2 191.6 1.0X +date_trunc DD wholestage off 2156 2162 8 4.6 215.6 1.0X +date_trunc DD wholestage on 2038 2043 3 4.9 203.8 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc HOUR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc HOUR wholestage off 1968 1970 3 5.1 196.8 1.0X -date_trunc HOUR wholestage on 1949 1961 9 5.1 194.9 1.0X +date_trunc HOUR wholestage off 2080 2081 2 4.8 208.0 1.0X +date_trunc HOUR wholestage on 2042 2048 6 4.9 204.2 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc MINUTE: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc MINUTE wholestage off 368 373 7 27.2 36.8 1.0X -date_trunc MINUTE wholestage on 338 343 6 29.6 33.8 1.1X +date_trunc MINUTE wholestage off 2116 2122 9 4.7 211.6 1.0X +date_trunc MINUTE wholestage on 2041 2048 11 4.9 204.1 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc SECOND: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc SECOND wholestage off 379 379 1 26.4 37.9 1.0X -date_trunc SECOND wholestage on 327 340 13 30.6 32.7 1.2X +date_trunc SECOND wholestage off 349 352 4 28.6 34.9 1.0X +date_trunc SECOND wholestage on 309 314 6 32.3 30.9 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc WEEK: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc WEEK wholestage off 2227 2242 21 4.5 222.7 1.0X -date_trunc WEEK wholestage on 2231 2241 9 4.5 223.1 1.0X +date_trunc WEEK wholestage off 2324 2330 8 4.3 232.4 1.0X +date_trunc WEEK wholestage on 2297 2305 13 4.4 229.7 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz date_trunc QUARTER: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -date_trunc QUARTER wholestage off 3158 3160 3 3.2 315.8 1.0X -date_trunc QUARTER wholestage on 3150 3163 12 3.2 315.0 1.0X +date_trunc QUARTER wholestage off 3652 3654 3 2.7 365.2 1.0X +date_trunc QUARTER wholestage on 3211 3218 9 3.1 321.1 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc year: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc year wholestage off 321 323 3 31.2 32.1 1.0X -trunc year wholestage on 302 330 18 33.1 30.2 1.1X +trunc year wholestage off 308 311 4 32.5 30.8 1.0X +trunc year wholestage on 286 291 4 35.0 28.6 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc yyyy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc yyyy wholestage off 320 324 6 31.2 32.0 1.0X -trunc yyyy wholestage on 294 329 20 34.0 29.4 1.1X +trunc yyyy wholestage off 304 305 1 32.9 30.4 1.0X +trunc yyyy wholestage on 286 290 5 35.0 28.6 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc yy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc yy wholestage off 322 322 0 31.1 32.2 1.0X -trunc yy wholestage on 293 320 37 34.1 29.3 1.1X +trunc yy wholestage off 319 322 5 31.4 31.9 1.0X +trunc yy wholestage on 285 288 3 35.0 28.5 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc mon: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc mon wholestage off 320 322 2 31.2 32.0 1.0X -trunc mon wholestage on 291 312 26 34.4 29.1 1.1X +trunc mon wholestage off 304 309 7 32.9 30.4 1.0X +trunc mon wholestage on 284 289 4 35.2 28.4 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc month: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc month wholestage off 318 331 18 31.4 31.8 1.0X -trunc month wholestage on 297 329 28 33.7 29.7 1.1X +trunc month wholestage off 302 305 4 33.1 30.2 1.0X +trunc month wholestage on 285 294 10 35.1 28.5 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz trunc mm: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -trunc mm wholestage off 318 319 1 31.4 31.8 1.0X -trunc mm wholestage on 312 335 15 32.1 31.2 1.0X +trunc mm wholestage off 301 317 23 33.2 30.1 1.0X +trunc mm wholestage on 284 290 4 35.2 28.4 1.1X ================================================================================================ Parsing ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to timestamp str: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to timestamp str wholestage off 217 221 5 4.6 217.5 1.0X -to timestamp str wholestage on 210 214 5 4.8 210.0 1.0X +to timestamp str wholestage off 217 219 2 4.6 217.5 1.0X +to timestamp str wholestage on 216 219 4 4.6 215.7 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_timestamp wholestage off 1714 1718 5 0.6 1714.4 1.0X -to_timestamp wholestage on 1418 1433 14 0.7 1418.5 1.2X +to_timestamp wholestage off 1853 1855 3 0.5 1852.9 1.0X +to_timestamp wholestage on 2138 2159 26 0.5 2137.6 0.9X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_unix_timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_unix_timestamp wholestage off 1436 1441 6 0.7 1436.2 1.0X -to_unix_timestamp wholestage on 1421 1426 7 0.7 1420.6 1.0X +to_unix_timestamp wholestage off 2115 2116 1 0.5 2115.2 1.0X +to_unix_timestamp wholestage on 2131 2144 16 0.5 2130.8 1.0X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to date str: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to date str wholestage off 267 267 0 3.8 266.6 1.0X -to date str wholestage on 260 262 2 3.8 260.1 1.0X +to date str wholestage off 280 281 1 3.6 279.7 1.0X +to date str wholestage on 265 271 9 3.8 265.2 1.1X -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz to_date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -to_date wholestage off 3419 3436 25 0.3 3419.0 1.0X -to_date wholestage on 3344 3352 7 0.3 3343.5 1.0X +to_date wholestage off 3434 3458 34 0.3 3433.7 1.0X +to_date wholestage on 3517 3539 18 0.3 3517.4 1.0X ================================================================================================ Conversion from/to external types ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws +OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz To/from Java's date-time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -From java.sql.Date 436 445 8 11.5 87.2 1.0X -From java.time.LocalDate 348 357 11 14.4 69.7 1.3X -Collect java.sql.Date 1723 1917 168 2.9 344.5 0.3X -Collect java.time.LocalDate 1591 1602 18 3.1 318.3 0.3X -From java.sql.Timestamp 248 252 4 20.2 49.6 1.8X -From java.time.Instant 232 238 5 21.5 46.5 1.9X -Collect longs 1398 1455 99 3.6 279.5 0.3X -Collect java.sql.Timestamp 1469 1483 13 3.4 293.9 0.3X -Collect java.time.Instant 1561 1597 40 3.2 312.2 0.3X -java.sql.Date to Hive string 13820 14798 857 0.4 2763.9 0.0X -java.time.LocalDate to Hive string 14374 14779 357 0.3 2874.8 0.0X -java.sql.Timestamp to Hive string 14872 15461 653 0.3 2974.5 0.0X -java.time.Instant to Hive string 17062 17789 759 0.3 3412.4 0.0X +From java.sql.Date 399 405 6 12.5 79.7 1.0X +From java.time.LocalDate 341 347 6 14.6 68.3 1.2X +Collect java.sql.Date 1732 1943 183 2.9 346.3 0.2X +Collect java.time.LocalDate 1686 1719 29 3.0 337.2 0.2X +From java.sql.Timestamp 249 261 19 20.1 49.8 1.6X +From java.time.Instant 240 242 3 20.9 47.9 1.7X +Collect longs 1546 1582 60 3.2 309.3 0.3X +Collect java.sql.Timestamp 1714 1720 6 2.9 342.9 0.2X +Collect java.time.Instant 2063 2119 65 2.4 412.6 0.2X +java.sql.Date to Hive string 13888 14401 490 0.4 2777.6 0.0X +java.time.LocalDate to Hive string 13804 14231 661 0.4 2760.8 0.0X +java.sql.Timestamp to Hive string 14231 14550 393 0.4 2846.1 0.0X +java.time.Instant to Hive string 16732 17801 953 0.3 3346.3 0.0X From 318a173fcee11902820593fe4ac992a90e6bb00e Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 11 Nov 2020 14:27:48 -0800 Subject: [PATCH 0448/1009] [SPARK-33402][CORE] Jobs launched in same second have duplicate MapReduce JobIDs ### What changes were proposed in this pull request? 1. Applies the SQL changes in SPARK-33230 to SparkHadoopWriter, so that `rdd.saveAsNewAPIHadoopDataset` passes in a unique job UUID in `spark.sql.sources.writeJobUUID` 1. `SparkHadoopWriterUtils.createJobTrackerID` generates a JobID by appending a random long number to the supplied timestamp to ensure the probability of a collision is near-zero. 1. With tests of uniqueness, round trips and negative jobID rejection. ### Why are the changes needed? Without this, if more than one job is started in the same second *and the committer expects application attempt IDs to be unique* is at risk of clashing with other jobs. With the fix, * those committers which use the ID set in `spark.sql.sources.writeJobUUID` as a priority ID will pick that up instead and so be unique. * committers which use the Hadoop JobID for unique paths and filenames will get the randomly generated jobID. Assuming all clocks in a cluster in sync, the probability of two jobs launched in the same second has dropped from 1 to 1/(2^63) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests There's a new test suite SparkHadoopWriterUtilsSuite which creates jobID, verifies they are unique even for the same timestamp and that they can be marshalled to string and parsed back in the hadoop code, which contains some (brittle) assumptions about the format of job IDs. Functional Integration Tests 1. Hadoop-trunk built with [HADOOP-17318], publishing to local maven repository 1. Spark built with hadoop.version=3.4.0-SNAPSHOT to pick up these JARs. 1. Spark + Object store integration tests at [https://github.com/hortonworks-spark/cloud-integration](https://github.com/hortonworks-spark/cloud-integration) were built against that local spark version 1. And executed against AWS london. The tests were run with `fs.s3a.committer.require.uuid=true`, so the s3a committers fail fast if they don't get a job ID down. This showed that `rdd.saveAsNewAPIHadoopDataset` wasn't setting the UUID option. It again uses the current Date value for an app attempt -which is not guaranteed to be unique. With the change applied to spark, the relevant tests work, therefore the committers are getting unique job IDs. Closes #30319 from steveloughran/BUG/SPARK-33402-jobuuid. Authored-by: Steve Loughran Signed-off-by: Dongjoon Hyun --- .../spark/internal/io/SparkHadoopWriter.scala | 7 +- .../internal/io/SparkHadoopWriterUtils.scala | 25 ++++- .../io/SparkHadoopWriterUtilsSuite.scala | 102 ++++++++++++++++++ 3 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala index 6d174b5e0f81b..37b470802067a 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala @@ -18,7 +18,7 @@ package org.apache.spark.internal.io import java.text.NumberFormat -import java.util.{Date, Locale} +import java.util.{Date, Locale, UUID} import scala.reflect.ClassTag @@ -70,6 +70,11 @@ object SparkHadoopWriter extends Logging { // Assert the output format/key/value class is set in JobConf. config.assertConf(jobContext, rdd.conf) + // propagate the description UUID into the jobs, so that committers + // get an ID guaranteed to be unique. + jobContext.getConfiguration.set("spark.sql.sources.writeJobUUID", + UUID.randomUUID.toString) + val committer = config.createCommitter(commitJobId) committer.setupJob(jobContext) diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala index de828a6d6156e..657842c620f30 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala @@ -20,7 +20,7 @@ package org.apache.spark.internal.io import java.text.SimpleDateFormat import java.util.{Date, Locale} -import scala.util.DynamicVariable +import scala.util.{DynamicVariable, Random} import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.{JobConf, JobID} @@ -37,14 +37,35 @@ private[spark] object SparkHadoopWriterUtils { private val RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES = 256 + private val RAND = new Random() + /** + * Create a job ID. + * + * @param time (current) time + * @param id job number + * @return a job ID + */ def createJobID(time: Date, id: Int): JobID = { + if (id < 0) { + throw new IllegalArgumentException("Job number is negative") + } val jobtrackerID = createJobTrackerID(time) new JobID(jobtrackerID, id) } + /** + * Generate an ID for a job tracker. + * @param time (current) time + * @return a string for a job ID + */ def createJobTrackerID(time: Date): String = { - new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(time) + val base = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(time) + var l1 = RAND.nextLong() + if (l1 < 0) { + l1 = -l1 + } + base + l1 } def createPathFromString(path: String, conf: JobConf): Path = { diff --git a/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala b/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala new file mode 100644 index 0000000000000..33b58ec9e6665 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.internal.io + +import java.util.Date + +import org.apache.hadoop.mapreduce.JobID + +import org.apache.spark.SparkFunSuite +import org.apache.spark.internal.io.SparkHadoopWriterUtils.createJobID + +/** + * Unit tests for functions in SparkHadoopWriterUtils. + */ +class SparkHadoopWriterUtilsSuite extends SparkFunSuite { + + /** + * Core test of JobID generation: + * They are created. + * The job number is converted to the job ID. + * They round trip to string and back + * (which implies that the full string matches the regexp + * in the JobID class). + */ + test("JobID Generation") { + val jobNumber = 1010 + val j1 = createJobID(new Date(), jobNumber) + assert(jobNumber == j1.getId, + s"Job number mismatch in $j1") + + val jobStr = j1.toString + // the string value begins with job_ + assert(jobStr.startsWith("job_"), + s"wrong prefix of $jobStr") + // and the hadoop code can parse it + val j2 = roundTrip(j1) + assert(j1.getId == j2.getId, "Job ID mismatch") + assert(j1.getJtIdentifier == j2.getJtIdentifier, "Job identifier mismatch") + } + + /** + * This is the problem surfacing in situations where committers expect + * Job IDs to be unique: if the timestamp is (exclusively) used + * then there will conflict in directories created. + */ + test("JobIDs generated at same time are different") { + val now = new Date() + val j1 = createJobID(now, 1) + val j2 = createJobID(now, 1) + assert(j1.toString != j2.toString) + } + + /** + * There's nothing explicitly in the Hadoop classes to stop + * job numbers being negative. + * There's some big assumptions in the FileOutputCommitter about attempt IDs + * being positive during any recovery operations; for safety the ID + * job number is validated. + */ + test("JobIDs with negative job number") { + intercept[IllegalArgumentException] { + createJobID(new Date(), -1) + } + } + + /** + * If someone ever does reinstate use of timestamps, + * make sure that the case of timestamp == 0 is handled. + */ + test("JobIDs on Epoch are different") { + val j1 = createJobID(new Date(0), 0) + val j2 = createJobID(new Date(0), 0) + assert (j1.toString != j2.toString) + } + + /** + * Do a round trip as a string and back again. + * This uses the JobID parser. + * @param jobID job ID + * @return the returned jobID + */ + private def roundTrip(jobID: JobID): JobID = { + val parsedJobId = JobID.forName(jobID.toString) + assert(jobID == parsedJobId, "Round trip was inconsistent") + parsedJobId + } +} From 9d58a2f0f0f308a03830bf183959a4743a77b78a Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Thu, 12 Nov 2020 08:29:22 +0900 Subject: [PATCH 0449/1009] [MINOR][GRAPHX] Correct typos in the sub-modules: graphx, external, and examples ### What changes were proposed in this pull request? This PR intends to fix typos in the sub-modules: graphx, external, and examples. Split per holdenk https://github.com/apache/spark/pull/30323#issuecomment-725159710 NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356 ### Why are the changes needed? Misspelled words make it harder to read / understand content. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No testing was performed Closes #30326 from jsoref/spelling-graphx. Authored-by: Josh Soref Signed-off-by: Takeshi Yamamuro --- .../spark/examples/streaming/JavaCustomReceiver.java | 2 +- .../examples/streaming/JavaNetworkWordCount.java | 2 +- .../streaming/JavaRecoverableNetworkWordCount.java | 2 +- .../examples/streaming/JavaSqlNetworkWordCount.java | 2 +- .../src/main/python/ml/train_validation_split.py | 2 +- examples/src/main/python/sql/arrow.py | 4 ++-- .../streaming/recoverable_network_wordcount.py | 2 +- .../main/python/streaming/sql_network_wordcount.py | 2 +- .../spark/examples/streaming/CustomReceiver.scala | 2 +- .../spark/examples/streaming/NetworkWordCount.scala | 2 +- .../streaming/RecoverableNetworkWordCount.scala | 2 +- .../examples/streaming/SqlNetworkWordCount.scala | 2 +- .../streaming/StatefulNetworkWordCount.scala | 2 +- .../spark/sql/jdbc/DockerJDBCIntegrationSuite.scala | 2 +- .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 2 +- .../sql/kafka010/KafkaContinuousSourceSuite.scala | 4 ++-- .../sql/kafka010/KafkaMicroBatchSourceSuite.scala | 12 ++++++------ .../spark/sql/kafka010/KafkaRelationSuite.scala | 4 ++-- .../apache/spark/sql/kafka010/KafkaTestUtils.scala | 4 ++-- .../spark/streaming/kafka010/KafkaRDDSuite.scala | 2 +- .../examples/streaming/JavaKinesisWordCountASL.java | 2 +- .../examples/streaming/kinesis_wordcount_asl.py | 2 +- .../examples/streaming/KinesisWordCountASL.scala | 6 +++--- .../streaming/kinesis/KinesisUtilsPythonHelper.scala | 2 +- .../org/apache/spark/graphx/lib/PageRankSuite.scala | 6 +++--- 25 files changed, 38 insertions(+), 38 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java index 47692ec982890..f84a1978de1ad 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java @@ -67,7 +67,7 @@ public static void main(String[] args) throws Exception { JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create an input stream with the custom receiver on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') JavaReceiverInputDStream lines = ssc.receiverStream( new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator()); diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java index b217672def88e..d56134bd99e36 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream lines = ssc.socketTextStream( diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java index c01a62b078f7a..0c11c40cfe7ed 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java @@ -126,7 +126,7 @@ private static JavaStreamingContext createContext(String ip, ssc.checkpoint(checkpointDirectory); // Create a socket stream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') JavaReceiverInputDStream lines = ssc.socketTextStream(ip, port); JavaDStream words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator()); JavaPairDStream wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)) diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java index 948d1a2111780..5d30698c93372 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java @@ -59,7 +59,7 @@ public static void main(String[] args) throws Exception { JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream lines = ssc.socketTextStream( diff --git a/examples/src/main/python/ml/train_validation_split.py b/examples/src/main/python/ml/train_validation_split.py index d4f9184bf576e..5e3dc7b3ec2fa 100644 --- a/examples/src/main/python/ml/train_validation_split.py +++ b/examples/src/main/python/ml/train_validation_split.py @@ -17,7 +17,7 @@ """ This example demonstrates applying TrainValidationSplit to split data -and preform model selection. +and perform model selection. Run with: bin/spark-submit examples/src/main/python/ml/train_validation_split.py diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py index 9978e8601449a..a0eba0fbede73 100644 --- a/examples/src/main/python/sql/arrow.py +++ b/examples/src/main/python/sql/arrow.py @@ -60,7 +60,7 @@ def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame: s3['col2'] = s1 + s2.str.len() return s3 - # Create a Spark DataFrame that has three columns including a sturct column. + # Create a Spark DataFrame that has three columns including a struct column. df = spark.createDataFrame( [[1, "a string", ("a nested string",)]], "long_col long, string_col string, struct_col struct") @@ -285,7 +285,7 @@ def asof_join(l, r): ser_to_frame_pandas_udf_example(spark) print("Running pandas_udf example: Series to Series") ser_to_ser_pandas_udf_example(spark) - print("Running pandas_udf example: Iterator of Series to Iterator of Seires") + print("Running pandas_udf example: Iterator of Series to Iterator of Series") iter_ser_to_iter_ser_pandas_udf_example(spark) print("Running pandas_udf example: Iterator of Multiple Series to Iterator of Series") iter_sers_to_iter_ser_pandas_udf_example(spark) diff --git a/examples/src/main/python/streaming/recoverable_network_wordcount.py b/examples/src/main/python/streaming/recoverable_network_wordcount.py index 6ebe91a2f47fe..567f9c819e3ad 100644 --- a/examples/src/main/python/streaming/recoverable_network_wordcount.py +++ b/examples/src/main/python/streaming/recoverable_network_wordcount.py @@ -66,7 +66,7 @@ def createContext(host, port, outputPath): ssc = StreamingContext(sc, 1) # Create a socket stream on target ip:port and count the - # words in input stream of \n delimited text (eg. generated by 'nc') + # words in input stream of \n delimited text (e.g. generated by 'nc') lines = ssc.socketTextStream(host, port) words = lines.flatMap(lambda line: line.split(" ")) wordCounts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y) diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py index 59a8a11a45b19..2965ea8fb1872 100644 --- a/examples/src/main/python/streaming/sql_network_wordcount.py +++ b/examples/src/main/python/streaming/sql_network_wordcount.py @@ -52,7 +52,7 @@ def getSparkSessionInstance(sparkConf): ssc = StreamingContext(sc, 1) # Create a socket stream on target ip:port and count the - # words in input stream of \n delimited text (eg. generated by 'nc') + # words in input stream of \n delimited text (e.g. generated by 'nc') lines = ssc.socketTextStream(host, int(port)) words = lines.flatMap(lambda line: line.split(" ")) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala index 0f47deaf1021b..626f4b4d3ccdf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala @@ -50,7 +50,7 @@ object CustomReceiver { val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create an input stream with the custom receiver on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt)) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala index 26bb51dde3a1d..7d981dfb949ea 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala @@ -47,7 +47,7 @@ object NetworkWordCount { val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create a socket stream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala index ee3bbe40fbeed..98539d6494231 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala @@ -112,7 +112,7 @@ object RecoverableNetworkWordCount { ssc.checkpoint(checkpointDirectory) // Create a socket stream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') val lines = ssc.socketTextStream(ip, port) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map((_, 1)).reduceByKey(_ + _) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala index 778be7baaeeac..7daa0014e0f1c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala @@ -51,7 +51,7 @@ object SqlNetworkWordCount { val ssc = new StreamingContext(sparkConf, Seconds(2)) // Create a socket stream on target ip:port and count the - // words in input stream of \n delimited text (eg. generated by 'nc') + // words in input stream of \n delimited text (e.g. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala index 46f01edf7deec..8a5fcda9cd990 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala @@ -52,7 +52,7 @@ object StatefulNetworkWordCount { val initialRDD = ssc.sparkContext.parallelize(List(("hello", 1), ("world", 1))) // Create a ReceiverInputDStream on target ip:port and count the - // words in input stream of \n delimited test (eg. generated by 'nc') + // words in input stream of \n delimited test (e.g. generated by 'nc') val lines = ssc.socketTextStream(args(0), args(1).toInt) val words = lines.flatMap(_.split(" ")) val wordDstream = words.map(x => (x, 1)) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index ad6a829fffd0d..00b7b413a964d 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -45,7 +45,7 @@ abstract class DatabaseOnDocker { val env: Map[String, String] /** - * Wheather or not to use ipc mode for shared memory when starting docker image + * Whether or not to use ipc mode for shared memory when starting docker image */ val usesIpc: Boolean diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 2e726b9e650b6..e36555e514c9f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -35,7 +35,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { def testUpdateColumnNullability(tbl: String): Unit = { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") var t = spark.table(s"$catalogName.alt_table") - // nullable is true in the expecteSchema because Spark always sets nullable to true + // nullable is true in the expectedSchema because Spark always sets nullable to true // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445 var expectedSchema = new StructType().add("ID", StringType, nullable = true) assert(t.schema === expectedSchema) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala index 14dcbeef0d9a3..6801d14d036dd 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala @@ -33,7 +33,7 @@ class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuo withTable(table) { val topic = newTopic() testUtils.createTopic(topic) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => val df = spark .readStream .format("kafka") @@ -99,7 +99,7 @@ class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuo withTable(table) { val topic = newTopic() testUtils.createTopic(topic) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => val df = spark .readStream .format("kafka") diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 853d201ba7ea5..510c0c5bd28a5 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -569,7 +569,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { val rows = spark.table("kafkaWatermark").collect() assert(rows.length === 1, s"Unexpected results: ${rows.toList}") val row = rows(0) - // We cannot check the exact window start time as it depands on the time that messages were + // We cannot check the exact window start time as it depends on the time that messages were // inserted by the producer. So here we just use a low bound to make sure the internal // conversion works. assert( @@ -836,7 +836,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { val topicPartition = new TopicPartition(topic, 0) // The message values are the same as their offsets to make the test easy to follow - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => testStream(mapped)( StartStream(Trigger.ProcessingTime(100), clock), waitUntilBatchProcessed, @@ -959,7 +959,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { val topicPartition = new TopicPartition(topic, 0) // The message values are the same as their offsets to make the test easy to follow - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => testStream(mapped)( StartStream(Trigger.ProcessingTime(100), clock), waitUntilBatchProcessed, @@ -1050,7 +1050,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { .load() .select($"value".as[String]) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => producer.beginTransaction() (0 to 3).foreach { i => producer.send(new ProducerRecord[String, String](topic, i.toString)).get() @@ -1066,7 +1066,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { // this case, if we forget to reset `FetchedData._nextOffsetInFetchedData` or // `FetchedData._offsetAfterPoll` (See SPARK-25495), the next batch will see incorrect // values and return wrong results hence fail the test. - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => producer.beginTransaction() (4 to 7).foreach { i => producer.send(new ProducerRecord[String, String](topic, i.toString)).get() @@ -1779,7 +1779,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest { withTable(table) { val topic = newTopic() testUtils.createTopic(topic) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => val df = spark .readStream .format("kafka") diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala index e5f3a229622e1..6f5dc0bb081ba 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala @@ -503,7 +503,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession test("read Kafka transactional messages: read_committed") { val topic = newTopic() testUtils.createTopic(topic) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => val df = spark .read .format("kafka") @@ -552,7 +552,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession test("read Kafka transactional messages: read_uncommitted") { val topic = newTopic() testUtils.createTopic(topic) - testUtils.withTranscationalProducer { producer => + testUtils.withTransactionalProducer { producer => val df = spark .read .format("kafka") diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala index 3a86352e42d2b..c5f3086b38c99 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala @@ -539,7 +539,7 @@ class KafkaTestUtils( } /** Call `f` with a `KafkaProducer` that has initialized transactions. */ - def withTranscationalProducer(f: KafkaProducer[String, String] => Unit): Unit = { + def withTransactionalProducer(f: KafkaProducer[String, String] => Unit): Unit = { val props = producerConfiguration props.put("transactional.id", UUID.randomUUID().toString) val producer = new KafkaProducer[String, String](props) @@ -577,7 +577,7 @@ class KafkaTestUtils( // ensure that logs from all replicas are deleted if delete topic is marked successful assert(servers.forall(server => topicAndPartitions.forall(tp => server.getLogManager().getLog(tp).isEmpty)), - s"topic $topic still exists in log mananger") + s"topic $topic still exists in log manager") // ensure that topic is removed from all cleaner offsets assert(servers.forall(server => topicAndPartitions.forall { tp => val checkpoints = server.getLogManager().liveLogDirs.map { logDir => diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala index d6123e16dd238..2053d3655d860 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala @@ -42,7 +42,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll { private val sparkConf = new SparkConf().setMaster("local[4]") .setAppName(this.getClass.getSimpleName) // Set a timeout of 10 seconds that's going to be used to fetch topics/partitions from kafka. - // Othewise the poll timeout defaults to 2 minutes and causes test cases to run longer. + // Otherwise the poll timeout defaults to 2 minutes and causes test cases to run longer. .set("spark.streaming.kafka.consumer.poll.ms", "10000") private var sc: SparkContext = _ diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java index d704aeb507518..244873af70de9 100644 --- a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java +++ b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java @@ -49,7 +49,7 @@ * * Usage: JavaKinesisWordCountASL [app-name] [stream-name] [endpoint-url] [region-name] * [app-name] is the name of the consumer app, used to track the read data in DynamoDB - * [stream-name] name of the Kinesis stream (ie. mySparkStream) + * [stream-name] name of the Kinesis stream (i.e. mySparkStream) * [endpoint-url] endpoint of the Kinesis service * (e.g. https://kinesis.us-east-1.amazonaws.com) * diff --git a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py index df8c64e531cfa..06ada13b52399 100644 --- a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py +++ b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py @@ -23,7 +23,7 @@ Usage: kinesis_wordcount_asl.py is the name of the consumer app, used to track the read data in DynamoDB - name of the Kinesis stream (ie. mySparkStream) + name of the Kinesis stream (i.e. mySparkStream) endpoint of the Kinesis service (e.g. https://kinesis.us-east-1.amazonaws.com) region name of the Kinesis endpoint (e.g. us-east-1) diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index bbb6008c2dddf..d6a9160eed98e 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -43,7 +43,7 @@ import org.apache.spark.streaming.kinesis.KinesisInputDStream * * Usage: KinesisWordCountASL * is the name of the consumer app, used to track the read data in DynamoDB - * name of the Kinesis stream (ie. mySparkStream) + * name of the Kinesis stream (i.e. mySparkStream) * endpoint of the Kinesis service * (e.g. https://kinesis.us-east-1.amazonaws.com) * @@ -167,9 +167,9 @@ object KinesisWordCountASL extends Logging { * Usage: KinesisWordProducerASL \ * * - * is the name of the Kinesis stream (ie. mySparkStream) + * is the name of the Kinesis stream (i.e. mySparkStream) * is the endpoint of the Kinesis service - * (ie. https://kinesis.us-east-1.amazonaws.com) + * (i.e. https://kinesis.us-east-1.amazonaws.com) * is the rate of records per second to put onto the stream * is the number of words per record * diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala index c89dedd3366d1..0056438c4eefb 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala @@ -46,7 +46,7 @@ private class KinesisUtilsPythonHelper { // scalastyle:on if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) { - throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " + + throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExternalId " + "must all be defined or all be null") } if (awsAccessKeyId == null && awsSecretKey != null) { diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala index a5e2fc5c9a74f..8008a89c6cd5f 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala @@ -274,8 +274,8 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext { withSpark { sc => // Check that implementation can handle large vertexIds, SPARK-25149 val vertexIdOffset = Int.MaxValue.toLong + 1 - val sourceOffest = 4 - val source = vertexIdOffset + sourceOffest + val sourceOffset = 4 + val source = vertexIdOffset + sourceOffset val numIter = 10 val vertices = vertexIdOffset until vertexIdOffset + numIter val chain1 = vertices.zip(vertices.tail) @@ -285,7 +285,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext { val tol = 0.0001 val errorTol = 1.0e-1 - val a = resetProb / (1 - Math.pow(1 - resetProb, numIter - sourceOffest)) + val a = resetProb / (1 - Math.pow(1 - resetProb, numIter - sourceOffset)) // We expect the rank to decay as (1 - resetProb) ^ distance val expectedRanks = sc.parallelize(vertices).map { vid => val rank = if (vid < source) { From 61ee5d8a4e3080e01abfdbd8277fa75868c257cd Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 12 Nov 2020 10:20:33 +0800 Subject: [PATCH 0450/1009] [WIP] Test (#30327) * resend * address comments * directly gen new Iter * directly gen new Iter * update blockify strategy * address comments * try to fix 2.13 * try to fix scala 2.13 * use 1.0 as the default value for gemv * update Co-authored-by: zhengruifeng --- .../spark/ml/classification/LinearSVC.scala | 93 ++++++------------- .../apache/spark/ml/feature/Instance.scala | 72 ++++++++++++++ .../ml/param/shared/SharedParamsCodeGen.scala | 7 +- .../spark/ml/param/shared/sharedParams.scala | 18 ++++ .../ml/classification/LinearSVCSuite.scala | 4 +- .../spark/ml/feature/InstanceSuite.scala | 54 +++++++++++ python/pyspark/ml/classification.py | 26 +++--- python/pyspark/ml/classification.pyi | 9 +- .../ml/param/_shared_params_code_gen.py | 6 +- python/pyspark/ml/param/shared.py | 18 ++++ python/pyspark/ml/param/shared.pyi | 5 + 11 files changed, 225 insertions(+), 87 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 77272c65eb231..a2e7b0fadd4cb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel /** Params for linear SVM Classifier. */ private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol - with HasAggregationDepth with HasThreshold with HasBlockSize { + with HasAggregationDepth with HasThreshold with HasBlockSizeInMB { /** * Param for threshold in binary classification prediction. @@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR "threshold in binary classification prediction applied to rawPrediction") setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6, - standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1) + standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSizeInMB -> 0.0) } /** @@ -153,22 +153,13 @@ class LinearSVC @Since("2.2.0") ( def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) /** - * Set block size for stacking input data in matrices. - * If blockSize == 1, then stacking will be skipped, and each vector is treated individually; - * If blockSize > 1, then vectors will be stacked to blocks, and high-level BLAS routines - * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV). - * Recommended size is between 10 and 1000. An appropriate choice of the block size depends - * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example, - * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads). - * Note that existing BLAS implementations are mainly optimized for dense matrices, if the - * input dataset is sparse, stacking may bring no performance gain, the worse is possible - * performance regression. - * Default is 1. + * Sets the value of param [[blockSizeInMB]]. + * Default is 0.0. * * @group expertSetParam */ @Since("3.1.0") - def setBlockSize(value: Int): this.type = set(blockSize, value) + def setBlockSizeInMB(value: Double): this.type = set(blockSizeInMB, value) @Since("2.2.0") override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra) @@ -177,19 +168,19 @@ class LinearSVC @Since("2.2.0") ( instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol, - regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize) + regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, + blockSizeInMB) + + if (dataset.storageLevel != StorageLevel.NONE) { + instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " + + s"then cached during training. Be careful of double caching!") + } val instances = extractInstances(dataset) .setName("training instances") - if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) { - instances.persist(StorageLevel.MEMORY_AND_DISK) - } - - var requestedMetrics = Seq("mean", "std", "count") - if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros" val (summarizer, labelSummarizer) = Summarizer - .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics) + .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count")) val histogram = labelSummarizer.histogram val numInvalid = labelSummarizer.countInvalid @@ -199,14 +190,12 @@ class LinearSVC @Since("2.2.0") ( instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString) instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString) instr.logSumOfWeights(summarizer.weightSum) - if ($(blockSize) > 1) { - val scale = 1.0 / summarizer.count / numFeatures - val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum - instr.logNamedValue("sparsity", sparsity.toString) - if (sparsity > 0.5) { - instr.logWarning(s"sparsity of input dataset is $sparsity, " + - s"which may hurt performance in high-level BLAS.") - } + + var actualBlockSizeInMB = $(blockSizeInMB) + if (actualBlockSizeInMB == 0) { + actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB + require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0") + instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString) } val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { @@ -245,12 +234,8 @@ class LinearSVC @Since("2.2.0") ( Note that the intercept in scaled space and original space is the same; as a result, no scaling is needed. */ - val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) { - trainOnRows(instances, featuresStd, regularization, optimizer) - } else { - trainOnBlocks(instances, featuresStd, regularization, optimizer) - } - if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist() + val (rawCoefficients, objectiveHistory) = + trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer) if (rawCoefficients == null) { val msg = s"${optimizer.getClass.getName} failed." @@ -284,35 +269,9 @@ class LinearSVC @Since("2.2.0") ( model.setSummary(Some(summary)) } - private def trainOnRows( - instances: RDD[Instance], - featuresStd: Array[Double], - regularization: Option[L2Regularization], - optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { - val numFeatures = featuresStd.length - val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures - - val bcFeaturesStd = instances.context.broadcast(featuresStd) - val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_) - val costFun = new RDDLossFunction(instances, getAggregatorFunc, - regularization, $(aggregationDepth)) - - val states = optimizer.iterations(new CachedDiffFunction(costFun), - Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector) - - val arrayBuilder = mutable.ArrayBuilder.make[Double] - var state: optimizer.State = null - while (states.hasNext) { - state = states.next() - arrayBuilder += state.adjustedValue - } - bcFeaturesStd.destroy() - - (if (state != null) state.x.toArray else null, arrayBuilder.result) - } - - private def trainOnBlocks( + private def trainImpl( instances: RDD[Instance], + actualBlockSizeInMB: Double, featuresStd: Array[Double], regularization: Option[L2Regularization], optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { @@ -326,9 +285,11 @@ class LinearSVC @Since("2.2.0") ( val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true) iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) } } - val blocks = InstanceBlock.blokify(standardized, $(blockSize)) + + val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong + val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage) .persist(StorageLevel.MEMORY_AND_DISK) - .setName(s"training blocks (blockSize=${$(blockSize)})") + .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)") val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_) val costFun = new RDDLossFunction(blocks, getAggregatorFunc, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala index db5f88d5dddc8..0b47c48e9a922 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala @@ -17,6 +17,8 @@ package org.apache.spark.ml.feature +import scala.collection.mutable + import org.apache.spark.ml.linalg._ import org.apache.spark.rdd.RDD @@ -100,6 +102,32 @@ private[spark] case class InstanceBlock( private[spark] object InstanceBlock { + /** + * Suggested value for BlockSizeInMB in Level-2 routine cases. + * According to performance tests of BLAS routine (see SPARK-31714) and + * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for + * linear models using Level-2 routine (GEMV) to perform prediction and + * gradient computation. + */ + val DefaultBlockSizeInMB = 1.0 + + private def getBlockMemUsage( + numCols: Long, + numRows: Long, + nnz: Long, + allUnitWeight: Boolean): Long = { + val doubleBytes = java.lang.Double.BYTES + val arrayHeader = 12L + val denseSize = Matrices.getDenseSize(numCols, numRows) + val sparseSize = Matrices.getSparseSize(nnz, numRows + 1) + val matrixSize = math.min(denseSize, sparseSize) + if (allUnitWeight) { + matrixSize + doubleBytes * numRows + arrayHeader * 2 + } else { + matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2 + } + } + def fromInstances(instances: Seq[Instance]): InstanceBlock = { val labels = instances.map(_.label).toArray val weights = if (instances.exists(_.weight != 1)) { @@ -114,6 +142,50 @@ private[spark] object InstanceBlock { def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = { instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances)) } + + def blokifyWithMaxMemUsage( + instanceIterator: Iterator[Instance], + maxMemUsage: Long): Iterator[InstanceBlock] = { + require(maxMemUsage > 0) + + new Iterator[InstanceBlock]() { + private var numCols = -1L + + override def hasNext: Boolean = instanceIterator.hasNext + + override def next(): InstanceBlock = { + val buff = mutable.ArrayBuilder.make[Instance] + var buffCnt = 0L + var buffNnz = 0L + var buffUnitWeight = true + var blockMemUsage = 0L + + while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) { + val instance: Instance = instanceIterator.next() + if (numCols < 0L) numCols = instance.features.size + require(numCols == instance.features.size) + val nnz = instance.features.numNonzeros + + buff += instance + buffCnt += 1L + buffNnz += nnz + buffUnitWeight &&= (instance.weight == 1) + blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight) + } + + // the block mem usage may slightly exceed threshold, not a big issue. + // and this ensure even if one row exceed block limit, each block has one row + InstanceBlock.fromInstances(buff.result()) + } + } + } + + def blokifyWithMaxMemUsage( + instances: RDD[Instance], + maxMemUsage: Long): RDD[InstanceBlock] = { + require(maxMemUsage > 0) + instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage)) + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 7fd5f5938b565..64261bdfac7d5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -108,7 +108,12 @@ private[shared] object SharedParamsCodeGen { ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " + "stacked within partitions. If block size is more than remaining data in a partition " + "then it is adjusted to the size of this data.", - isValid = "ParamValidators.gt(0)", isExpertParam = true) + isValid = "ParamValidators.gt(0)", isExpertParam = true), + ParamDesc[Double]("blockSizeInMB", "Maximum memory in MB for stacking input data " + + "in blocks. Data is stacked within partitions. If more than remaining data size in a " + + "partition then it is adjusted to the data size. If 0, try to infer an appropriate value " + + "based on the statistics of dataset. Must be >= 0.", + Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true) ) val code = genSharedParams(params) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index 60203eba61ea5..1c741545dade0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -562,4 +562,22 @@ trait HasBlockSize extends Params { /** @group expertGetParam */ final def getBlockSize: Int = $(blockSize) } + +/** + * Trait for shared param blockSizeInMB (default: 0.0). This trait may be changed or + * removed between minor versions. + */ +trait HasBlockSizeInMB extends Params { + + /** + * Param for Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.. + * @group expertParam + */ + final val blockSizeInMB: DoubleParam = new DoubleParam(this, "blockSizeInMB", "Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", ParamValidators.gtEq(0.0)) + + setDefault(blockSizeInMB, 0.0) + + /** @group expertGetParam */ + final def getBlockSizeInMB: Double = $(blockSizeInMB) +} // scalastyle:on diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala index a66397324c1a6..55558f06ee362 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala @@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest { .setFitIntercept(fitIntercept) .setMaxIter(5) val model = lsvc.fit(dataset) - Seq(4, 16, 64).foreach { blockSize => - val model2 = lsvc.setBlockSize(blockSize).fit(dataset) + Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s => + val model2 = lsvc.setBlockSizeInMB(s).fit(dataset) assert(model.intercept ~== model2.intercept relTol 1e-9) assert(model.coefficients ~== model2.coefficients relTol 1e-9) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala index d780bdf5f5dc8..f1e071357bab7 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala @@ -74,4 +74,58 @@ class InstanceSuite extends SparkFunSuite{ } } + test("InstanceBlock: blokify with max memory usage") { + val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)) + val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse) + val instances = Seq(instance1, instance2) + + val blocks = InstanceBlock + .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray + require(blocks.length == 1) + val block = blocks.head + assert(block.size === 2) + assert(block.numFeatures === 2) + block.instanceIterator.zipWithIndex.foreach { + case (instance, i) => + assert(instance.label === instances(i).label) + assert(instance.weight === instances(i).weight) + assert(instance.features.toArray === instances(i).features.toArray) + } + Seq(0, 1).foreach { i => + val nzIter = block.getNonZeroIter(i) + val vec = Vectors.sparse(2, nzIter.toSeq) + assert(vec.toArray === instances(i).features.toArray) + } + + // instances larger than maxMemUsage + val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0))) + InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size + InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size + + // different numFeatures + intercept[IllegalArgumentException] { + InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size + } + + // nnz = 10 + val sparseInstance = Instance(-2.0, 3.0, + Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1))) + + // normally, memory usage of a block does not exceed maxMemUsage too much + val maxMemUsage = 1 << 18 + val mixedIter = Iterator.fill(100)(denseInstance) ++ + Iterator.fill(1000)(sparseInstance) ++ + Iterator.fill(10)(denseInstance) ++ + Iterator.fill(10)(sparseInstance) ++ + Iterator.fill(100)(denseInstance) ++ + Iterator.fill(100)(sparseInstance) + InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage) + .foreach { block => + val doubleBytes = java.lang.Double.BYTES + val arrayHeader = 12L + val blockMemUsage = block.matrix.getSizeInBytes + + (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2 + require(blockMemUsage < maxMemUsage * 1.05) + } + } } diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index d6c861361a248..f96bbd4d33577 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -26,8 +26,8 @@ from pyspark.ml import Estimator, Predictor, PredictionModel, Model from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \ HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \ - HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \ - HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism + HasAggregationDepth, HasThreshold, HasBlockSize, HasBlockSizeInMB, Param, Params, \ + TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ _TreeEnsembleModel, _RandomForestParams, _GBTParams, \ _HasVarianceImpurity, _TreeClassifierParams @@ -504,7 +504,7 @@ def recallByThreshold(self): class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSize): + HasBlockSizeInMB): """ Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`. @@ -521,7 +521,7 @@ def __init__(self, *args): super(_LinearSVCParams, self).__init__(*args) self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True, standardization=True, threshold=0.0, aggregationDepth=2, - blockSize=1) + blockSizeInMB=0.0) @inherit_doc @@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl LinearSVCModel... >>> model.getThreshold() 0.5 - >>> model.getBlockSize() - 1 + >>> model.getBlockSizeInMB() + 0.0 >>> model.coefficients DenseVector([0.0, -0.2792, -0.1833]) >>> model.intercept @@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSize=1): + aggregationDepth=2, blockSizeInMB=0.0): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSize=1): + aggregationDepth=2, blockSizeInMB=0.0): """ super(LinearSVC, self).__init__() self._java_obj = self._new_java_obj( @@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSize=1): + aggregationDepth=2, blockSizeInMB=0.0): """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSize=1): + aggregationDepth=2, blockSizeInMB=0.0): Sets params for Linear SVM Classifier. """ kwargs = self._input_kwargs @@ -694,11 +694,11 @@ def setAggregationDepth(self, value): return self._set(aggregationDepth=value) @since("3.1.0") - def setBlockSize(self, value): + def setBlockSizeInMB(self, value): """ - Sets the value of :py:attr:`blockSize`. + Sets the value of :py:attr:`blockSizeInMB`. """ - return self._set(blockSize=value) + return self._set(blockSizeInMB=value) class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable, diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi index 55afc20a54cb9..241f5baf8dfd4 100644 --- a/python/pyspark/ml/classification.pyi +++ b/python/pyspark/ml/classification.pyi @@ -26,6 +26,7 @@ from pyspark.ml.base import _PredictorParams from pyspark.ml.param.shared import ( HasAggregationDepth, HasBlockSize, + HasBlockSizeInMB, HasElasticNetParam, HasFitIntercept, HasMaxIter, @@ -172,7 +173,7 @@ class _LinearSVCParams( HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSize, + HasBlockSizeInMB, ): threshold: Param[float] def __init__(self, *args: Any) -> None: ... @@ -198,7 +199,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSize: int = ... + blockSizeInMB: float = ... ) -> None: ... def setParams( self, @@ -215,7 +216,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSize: int = ... + blockSizeInMB: float = ... ) -> LinearSVC: ... def setMaxIter(self, value: int) -> LinearSVC: ... def setRegParam(self, value: float) -> LinearSVC: ... @@ -225,7 +226,7 @@ class LinearSVC( def setThreshold(self, value: float) -> LinearSVC: ... def setWeightCol(self, value: str) -> LinearSVC: ... def setAggregationDepth(self, value: int) -> LinearSVC: ... - def setBlockSize(self, value: int) -> LinearSVC: ... + def setBlockSizeInMB(self, value: float) -> LinearSVC: ... class LinearSVCModel( _JavaClassificationModel[Vector], diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index bc1ea87ad629c..b6fc170abe788 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -165,7 +165,11 @@ def get$Name(self): None, "TypeConverters.toString"), ("blockSize", "block size for stacking input data in matrices. Data is stacked within " "partitions. If block size is more than remaining data in a partition then it is " - "adjusted to the size of this data.", None, "TypeConverters.toInt")] + "adjusted to the size of this data.", None, "TypeConverters.toInt"), + ("blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is " + + "stacked within partitions. If more than remaining data size in a partition then it " + + "is adjusted to the data size. If 0, try to infer an appropriate value based on the " + + "statistics of dataset. Must be >= 0.", "0.0", "TypeConverters.toFloat")] code = [] for name, doc, defaultValueStr, typeConverter in shared: diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index 24fb0d3e2554d..a829a2e76b380 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -597,3 +597,21 @@ def getBlockSize(self): Gets the value of blockSize or its default value. """ return self.getOrDefault(self.blockSize) + + +class HasBlockSizeInMB(Params): + """ + Mixin for param blockSizeInMB: maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0. + """ + + blockSizeInMB = Param(Params._dummy(), "blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", typeConverter=TypeConverters.toFloat) + + def __init__(self): + super(HasBlockSizeInMB, self).__init__() + self._setDefault(blockSizeInMB=0.0) + + def getBlockSizeInMB(self): + """ + Gets the value of blockSizeInMB or its default value. + """ + return self.getOrDefault(self.blockSizeInMB) diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi index 5999c0eaa4661..bbb4890455de7 100644 --- a/python/pyspark/ml/param/shared.pyi +++ b/python/pyspark/ml/param/shared.pyi @@ -185,3 +185,8 @@ class HasBlockSize(Params): blockSize: Param[int] def __init__(self) -> None: ... def getBlockSize(self) -> int: ... + +class HasBlockSizeInMB(Params): + blockSizeInMB: Param[float] + def __init__(self) -> None: ... + def getBlockSizeInMB(self) -> float: ... From 6244407ce60c33ec9a549011723195fe8e15f287 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 12 Nov 2020 11:32:12 +0900 Subject: [PATCH 0451/1009] Revert "[WIP] Test (#30327)" This reverts commit 61ee5d8a4e3080e01abfdbd8277fa75868c257cd. ### What changes were proposed in this pull request? I need to merge https://github.com/apache/spark/pull/30327 to https://github.com/apache/spark/pull/30009, but I merged it to master by mistake. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #30345 from zhengruifeng/revert-30327-adaptively_blockify_linear_svc_II. Authored-by: Ruifeng Zheng Signed-off-by: HyukjinKwon --- .../spark/ml/classification/LinearSVC.scala | 93 +++++++++++++------ .../apache/spark/ml/feature/Instance.scala | 72 -------------- .../ml/param/shared/SharedParamsCodeGen.scala | 7 +- .../spark/ml/param/shared/sharedParams.scala | 18 ---- .../ml/classification/LinearSVCSuite.scala | 4 +- .../spark/ml/feature/InstanceSuite.scala | 54 ----------- python/pyspark/ml/classification.py | 26 +++--- python/pyspark/ml/classification.pyi | 9 +- .../ml/param/_shared_params_code_gen.py | 6 +- python/pyspark/ml/param/shared.py | 18 ---- python/pyspark/ml/param/shared.pyi | 5 - 11 files changed, 87 insertions(+), 225 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index a2e7b0fadd4cb..77272c65eb231 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel /** Params for linear SVM Classifier. */ private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol - with HasAggregationDepth with HasThreshold with HasBlockSizeInMB { + with HasAggregationDepth with HasThreshold with HasBlockSize { /** * Param for threshold in binary classification prediction. @@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR "threshold in binary classification prediction applied to rawPrediction") setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6, - standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSizeInMB -> 0.0) + standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1) } /** @@ -153,13 +153,22 @@ class LinearSVC @Since("2.2.0") ( def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) /** - * Sets the value of param [[blockSizeInMB]]. - * Default is 0.0. + * Set block size for stacking input data in matrices. + * If blockSize == 1, then stacking will be skipped, and each vector is treated individually; + * If blockSize > 1, then vectors will be stacked to blocks, and high-level BLAS routines + * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV). + * Recommended size is between 10 and 1000. An appropriate choice of the block size depends + * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example, + * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads). + * Note that existing BLAS implementations are mainly optimized for dense matrices, if the + * input dataset is sparse, stacking may bring no performance gain, the worse is possible + * performance regression. + * Default is 1. * * @group expertSetParam */ @Since("3.1.0") - def setBlockSizeInMB(value: Double): this.type = set(blockSizeInMB, value) + def setBlockSize(value: Int): this.type = set(blockSize, value) @Since("2.2.0") override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra) @@ -168,19 +177,19 @@ class LinearSVC @Since("2.2.0") ( instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol, - regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, - blockSizeInMB) - - if (dataset.storageLevel != StorageLevel.NONE) { - instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " + - s"then cached during training. Be careful of double caching!") - } + regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize) val instances = extractInstances(dataset) .setName("training instances") + if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + + var requestedMetrics = Seq("mean", "std", "count") + if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros" val (summarizer, labelSummarizer) = Summarizer - .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count")) + .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics) val histogram = labelSummarizer.histogram val numInvalid = labelSummarizer.countInvalid @@ -190,12 +199,14 @@ class LinearSVC @Since("2.2.0") ( instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString) instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString) instr.logSumOfWeights(summarizer.weightSum) - - var actualBlockSizeInMB = $(blockSizeInMB) - if (actualBlockSizeInMB == 0) { - actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB - require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0") - instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString) + if ($(blockSize) > 1) { + val scale = 1.0 / summarizer.count / numFeatures + val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum + instr.logNamedValue("sparsity", sparsity.toString) + if (sparsity > 0.5) { + instr.logWarning(s"sparsity of input dataset is $sparsity, " + + s"which may hurt performance in high-level BLAS.") + } } val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { @@ -234,8 +245,12 @@ class LinearSVC @Since("2.2.0") ( Note that the intercept in scaled space and original space is the same; as a result, no scaling is needed. */ - val (rawCoefficients, objectiveHistory) = - trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer) + val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) { + trainOnRows(instances, featuresStd, regularization, optimizer) + } else { + trainOnBlocks(instances, featuresStd, regularization, optimizer) + } + if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist() if (rawCoefficients == null) { val msg = s"${optimizer.getClass.getName} failed." @@ -269,9 +284,35 @@ class LinearSVC @Since("2.2.0") ( model.setSummary(Some(summary)) } - private def trainImpl( + private def trainOnRows( + instances: RDD[Instance], + featuresStd: Array[Double], + regularization: Option[L2Regularization], + optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { + val numFeatures = featuresStd.length + val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures + + val bcFeaturesStd = instances.context.broadcast(featuresStd) + val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_) + val costFun = new RDDLossFunction(instances, getAggregatorFunc, + regularization, $(aggregationDepth)) + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector) + + val arrayBuilder = mutable.ArrayBuilder.make[Double] + var state: optimizer.State = null + while (states.hasNext) { + state = states.next() + arrayBuilder += state.adjustedValue + } + bcFeaturesStd.destroy() + + (if (state != null) state.x.toArray else null, arrayBuilder.result) + } + + private def trainOnBlocks( instances: RDD[Instance], - actualBlockSizeInMB: Double, featuresStd: Array[Double], regularization: Option[L2Regularization], optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { @@ -285,11 +326,9 @@ class LinearSVC @Since("2.2.0") ( val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true) iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) } } - - val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong - val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage) + val blocks = InstanceBlock.blokify(standardized, $(blockSize)) .persist(StorageLevel.MEMORY_AND_DISK) - .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)") + .setName(s"training blocks (blockSize=${$(blockSize)})") val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_) val costFun = new RDDLossFunction(blocks, getAggregatorFunc, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala index 0b47c48e9a922..db5f88d5dddc8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml.feature -import scala.collection.mutable - import org.apache.spark.ml.linalg._ import org.apache.spark.rdd.RDD @@ -102,32 +100,6 @@ private[spark] case class InstanceBlock( private[spark] object InstanceBlock { - /** - * Suggested value for BlockSizeInMB in Level-2 routine cases. - * According to performance tests of BLAS routine (see SPARK-31714) and - * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for - * linear models using Level-2 routine (GEMV) to perform prediction and - * gradient computation. - */ - val DefaultBlockSizeInMB = 1.0 - - private def getBlockMemUsage( - numCols: Long, - numRows: Long, - nnz: Long, - allUnitWeight: Boolean): Long = { - val doubleBytes = java.lang.Double.BYTES - val arrayHeader = 12L - val denseSize = Matrices.getDenseSize(numCols, numRows) - val sparseSize = Matrices.getSparseSize(nnz, numRows + 1) - val matrixSize = math.min(denseSize, sparseSize) - if (allUnitWeight) { - matrixSize + doubleBytes * numRows + arrayHeader * 2 - } else { - matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2 - } - } - def fromInstances(instances: Seq[Instance]): InstanceBlock = { val labels = instances.map(_.label).toArray val weights = if (instances.exists(_.weight != 1)) { @@ -142,50 +114,6 @@ private[spark] object InstanceBlock { def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = { instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances)) } - - def blokifyWithMaxMemUsage( - instanceIterator: Iterator[Instance], - maxMemUsage: Long): Iterator[InstanceBlock] = { - require(maxMemUsage > 0) - - new Iterator[InstanceBlock]() { - private var numCols = -1L - - override def hasNext: Boolean = instanceIterator.hasNext - - override def next(): InstanceBlock = { - val buff = mutable.ArrayBuilder.make[Instance] - var buffCnt = 0L - var buffNnz = 0L - var buffUnitWeight = true - var blockMemUsage = 0L - - while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) { - val instance: Instance = instanceIterator.next() - if (numCols < 0L) numCols = instance.features.size - require(numCols == instance.features.size) - val nnz = instance.features.numNonzeros - - buff += instance - buffCnt += 1L - buffNnz += nnz - buffUnitWeight &&= (instance.weight == 1) - blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight) - } - - // the block mem usage may slightly exceed threshold, not a big issue. - // and this ensure even if one row exceed block limit, each block has one row - InstanceBlock.fromInstances(buff.result()) - } - } - } - - def blokifyWithMaxMemUsage( - instances: RDD[Instance], - maxMemUsage: Long): RDD[InstanceBlock] = { - require(maxMemUsage > 0) - instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage)) - } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 64261bdfac7d5..7fd5f5938b565 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -108,12 +108,7 @@ private[shared] object SharedParamsCodeGen { ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " + "stacked within partitions. If block size is more than remaining data in a partition " + "then it is adjusted to the size of this data.", - isValid = "ParamValidators.gt(0)", isExpertParam = true), - ParamDesc[Double]("blockSizeInMB", "Maximum memory in MB for stacking input data " + - "in blocks. Data is stacked within partitions. If more than remaining data size in a " + - "partition then it is adjusted to the data size. If 0, try to infer an appropriate value " + - "based on the statistics of dataset. Must be >= 0.", - Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true) + isValid = "ParamValidators.gt(0)", isExpertParam = true) ) val code = genSharedParams(params) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index 1c741545dade0..60203eba61ea5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -562,22 +562,4 @@ trait HasBlockSize extends Params { /** @group expertGetParam */ final def getBlockSize: Int = $(blockSize) } - -/** - * Trait for shared param blockSizeInMB (default: 0.0). This trait may be changed or - * removed between minor versions. - */ -trait HasBlockSizeInMB extends Params { - - /** - * Param for Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.. - * @group expertParam - */ - final val blockSizeInMB: DoubleParam = new DoubleParam(this, "blockSizeInMB", "Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", ParamValidators.gtEq(0.0)) - - setDefault(blockSizeInMB, 0.0) - - /** @group expertGetParam */ - final def getBlockSizeInMB: Double = $(blockSizeInMB) -} // scalastyle:on diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala index 55558f06ee362..a66397324c1a6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala @@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest { .setFitIntercept(fitIntercept) .setMaxIter(5) val model = lsvc.fit(dataset) - Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s => - val model2 = lsvc.setBlockSizeInMB(s).fit(dataset) + Seq(4, 16, 64).foreach { blockSize => + val model2 = lsvc.setBlockSize(blockSize).fit(dataset) assert(model.intercept ~== model2.intercept relTol 1e-9) assert(model.coefficients ~== model2.coefficients relTol 1e-9) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala index f1e071357bab7..d780bdf5f5dc8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala @@ -74,58 +74,4 @@ class InstanceSuite extends SparkFunSuite{ } } - test("InstanceBlock: blokify with max memory usage") { - val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)) - val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse) - val instances = Seq(instance1, instance2) - - val blocks = InstanceBlock - .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray - require(blocks.length == 1) - val block = blocks.head - assert(block.size === 2) - assert(block.numFeatures === 2) - block.instanceIterator.zipWithIndex.foreach { - case (instance, i) => - assert(instance.label === instances(i).label) - assert(instance.weight === instances(i).weight) - assert(instance.features.toArray === instances(i).features.toArray) - } - Seq(0, 1).foreach { i => - val nzIter = block.getNonZeroIter(i) - val vec = Vectors.sparse(2, nzIter.toSeq) - assert(vec.toArray === instances(i).features.toArray) - } - - // instances larger than maxMemUsage - val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0))) - InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size - InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size - - // different numFeatures - intercept[IllegalArgumentException] { - InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size - } - - // nnz = 10 - val sparseInstance = Instance(-2.0, 3.0, - Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1))) - - // normally, memory usage of a block does not exceed maxMemUsage too much - val maxMemUsage = 1 << 18 - val mixedIter = Iterator.fill(100)(denseInstance) ++ - Iterator.fill(1000)(sparseInstance) ++ - Iterator.fill(10)(denseInstance) ++ - Iterator.fill(10)(sparseInstance) ++ - Iterator.fill(100)(denseInstance) ++ - Iterator.fill(100)(sparseInstance) - InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage) - .foreach { block => - val doubleBytes = java.lang.Double.BYTES - val arrayHeader = 12L - val blockMemUsage = block.matrix.getSizeInBytes + - (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2 - require(blockMemUsage < maxMemUsage * 1.05) - } - } } diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index f96bbd4d33577..d6c861361a248 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -26,8 +26,8 @@ from pyspark.ml import Estimator, Predictor, PredictionModel, Model from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \ HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \ - HasAggregationDepth, HasThreshold, HasBlockSize, HasBlockSizeInMB, Param, Params, \ - TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism + HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \ + HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ _TreeEnsembleModel, _RandomForestParams, _GBTParams, \ _HasVarianceImpurity, _TreeClassifierParams @@ -504,7 +504,7 @@ def recallByThreshold(self): class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSizeInMB): + HasBlockSize): """ Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`. @@ -521,7 +521,7 @@ def __init__(self, *args): super(_LinearSVCParams, self).__init__(*args) self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True, standardization=True, threshold=0.0, aggregationDepth=2, - blockSizeInMB=0.0) + blockSize=1) @inherit_doc @@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl LinearSVCModel... >>> model.getThreshold() 0.5 - >>> model.getBlockSizeInMB() - 0.0 + >>> model.getBlockSize() + 1 >>> model.coefficients DenseVector([0.0, -0.2792, -0.1833]) >>> model.intercept @@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSizeInMB=0.0): + aggregationDepth=2, blockSize=1): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSizeInMB=0.0): + aggregationDepth=2, blockSize=1): """ super(LinearSVC, self).__init__() self._java_obj = self._new_java_obj( @@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSizeInMB=0.0): + aggregationDepth=2, blockSize=1): """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSizeInMB=0.0): + aggregationDepth=2, blockSize=1): Sets params for Linear SVM Classifier. """ kwargs = self._input_kwargs @@ -694,11 +694,11 @@ def setAggregationDepth(self, value): return self._set(aggregationDepth=value) @since("3.1.0") - def setBlockSizeInMB(self, value): + def setBlockSize(self, value): """ - Sets the value of :py:attr:`blockSizeInMB`. + Sets the value of :py:attr:`blockSize`. """ - return self._set(blockSizeInMB=value) + return self._set(blockSize=value) class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable, diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi index 241f5baf8dfd4..55afc20a54cb9 100644 --- a/python/pyspark/ml/classification.pyi +++ b/python/pyspark/ml/classification.pyi @@ -26,7 +26,6 @@ from pyspark.ml.base import _PredictorParams from pyspark.ml.param.shared import ( HasAggregationDepth, HasBlockSize, - HasBlockSizeInMB, HasElasticNetParam, HasFitIntercept, HasMaxIter, @@ -173,7 +172,7 @@ class _LinearSVCParams( HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSizeInMB, + HasBlockSize, ): threshold: Param[float] def __init__(self, *args: Any) -> None: ... @@ -199,7 +198,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSizeInMB: float = ... + blockSize: int = ... ) -> None: ... def setParams( self, @@ -216,7 +215,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSizeInMB: float = ... + blockSize: int = ... ) -> LinearSVC: ... def setMaxIter(self, value: int) -> LinearSVC: ... def setRegParam(self, value: float) -> LinearSVC: ... @@ -226,7 +225,7 @@ class LinearSVC( def setThreshold(self, value: float) -> LinearSVC: ... def setWeightCol(self, value: str) -> LinearSVC: ... def setAggregationDepth(self, value: int) -> LinearSVC: ... - def setBlockSizeInMB(self, value: float) -> LinearSVC: ... + def setBlockSize(self, value: int) -> LinearSVC: ... class LinearSVCModel( _JavaClassificationModel[Vector], diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index b6fc170abe788..bc1ea87ad629c 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -165,11 +165,7 @@ def get$Name(self): None, "TypeConverters.toString"), ("blockSize", "block size for stacking input data in matrices. Data is stacked within " "partitions. If block size is more than remaining data in a partition then it is " - "adjusted to the size of this data.", None, "TypeConverters.toInt"), - ("blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is " + - "stacked within partitions. If more than remaining data size in a partition then it " + - "is adjusted to the data size. If 0, try to infer an appropriate value based on the " + - "statistics of dataset. Must be >= 0.", "0.0", "TypeConverters.toFloat")] + "adjusted to the size of this data.", None, "TypeConverters.toInt")] code = [] for name, doc, defaultValueStr, typeConverter in shared: diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index a829a2e76b380..24fb0d3e2554d 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -597,21 +597,3 @@ def getBlockSize(self): Gets the value of blockSize or its default value. """ return self.getOrDefault(self.blockSize) - - -class HasBlockSizeInMB(Params): - """ - Mixin for param blockSizeInMB: maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0. - """ - - blockSizeInMB = Param(Params._dummy(), "blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", typeConverter=TypeConverters.toFloat) - - def __init__(self): - super(HasBlockSizeInMB, self).__init__() - self._setDefault(blockSizeInMB=0.0) - - def getBlockSizeInMB(self): - """ - Gets the value of blockSizeInMB or its default value. - """ - return self.getOrDefault(self.blockSizeInMB) diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi index bbb4890455de7..5999c0eaa4661 100644 --- a/python/pyspark/ml/param/shared.pyi +++ b/python/pyspark/ml/param/shared.pyi @@ -185,8 +185,3 @@ class HasBlockSize(Params): blockSize: Param[int] def __init__(self) -> None: ... def getBlockSize(self) -> int: ... - -class HasBlockSizeInMB(Params): - blockSizeInMB: Param[float] - def __init__(self) -> None: ... - def getBlockSizeInMB(self) -> float: ... From 9f983a68f1fdefcd033ea65999ab916b61cba8b3 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Thu, 12 Nov 2020 12:22:25 +0900 Subject: [PATCH 0452/1009] [SPARK-30294][SS][FOLLOW-UP] Directly override RDD methods ### Why are the changes needed? Follow the comment: https://github.com/apache/spark/pull/26935#discussion_r514697997 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test and Mima test. Closes #30344 from xuanyuanking/SPARK-30294-follow. Authored-by: Yuanjian Li Signed-off-by: HyukjinKwon --- .../streaming/state/StateStoreRDD.scala | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala index eda191f28bf18..b894e771a6fe2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration -// This doesn't directly override RDD methods as MiMa complains it. abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag]( dataRDD: RDD[T], checkpointLocation: String, @@ -45,16 +44,13 @@ abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag]( protected val hadoopConfBroadcast = dataRDD.context.broadcast( new SerializableConfiguration(sessionState.newHadoopConf())) - /** Implementations can simply call this method in getPreferredLocations. */ - protected def _getPartitions: Array[Partition] = dataRDD.partitions - /** * Set the preferred location of each partition using the executor that has the related * [[StateStoreProvider]] already loaded. * * Implementations can simply call this method in getPreferredLocations. */ - protected def _getPreferredLocations(partition: Partition): Seq[String] = { + override def getPreferredLocations(partition: Partition): Seq[String] = { val stateStoreProviderId = getStateProviderId(partition) storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq } @@ -87,10 +83,7 @@ class ReadStateStoreRDD[T: ClassTag, U: ClassTag]( extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId, sessionState, storeCoordinator, extraOptions) { - override protected def getPartitions: Array[Partition] = _getPartitions - - override def getPreferredLocations(partition: Partition): Seq[String] = - _getPreferredLocations(partition) + override protected def getPartitions: Array[Partition] = dataRDD.partitions override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { val storeProviderId = getStateProviderId(partition) @@ -124,10 +117,7 @@ class StateStoreRDD[T: ClassTag, U: ClassTag]( extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId, sessionState, storeCoordinator, extraOptions) { - override protected def getPartitions: Array[Partition] = _getPartitions - - override def getPreferredLocations(partition: Partition): Seq[String] = - _getPreferredLocations(partition) + override protected def getPartitions: Array[Partition] = dataRDD.partitions override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { val storeProviderId = getStateProviderId(partition) From 22baf05a9ec6fffe53bd34d35c122de776464dd0 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 12 Nov 2020 15:36:31 +0900 Subject: [PATCH 0453/1009] [SPARK-33408][SPARK-32354][K8S][R] Use R 3.6.3 in K8s R image and re-enable RTestsSuite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR aims to use R 3.6.3 in K8s R image and re-enable `RTestsSuite`. ### Why are the changes needed? Jenkins Server is using `R 3.6.3`. ``` + SPARK_HOME=/home/jenkins/workspace/SparkPullRequestBuilder-K8s + /usr/bin/R CMD check --as-cran --no-tests SparkR_3.1.0.tar.gz * using log directory ‘/home/jenkins/workspace/SparkPullRequestBuilder-K8s/R/SparkR.Rcheck’ * using R version 3.6.3 (2020-02-29) ``` OpenJDK docker image is using `R 3.5.2 (2018-12-20)` which is old and currently `spark-3.0.1` fails to run SparkR. ``` $ cd spark-3.0.1-bin-hadoop3.2 $ bin/docker-image-tool.sh -R kubernetes/dockerfiles/spark/bindings/R/Dockerfile -n build ... exit code: 1 termination reason: Error ... $ bin/spark-submit --master k8s://https://192.168.64.49:8443 --deploy-mode cluster --conf spark.kubernetes.container.image=spark-r:latest local:///opt/spark/examples/src/main/r/dataframe.R $ k logs dataframe-r-b1c14b75b0c09eeb-driver ... + exec /usr/bin/tini -s -- /opt/spark/bin/spark-submit --conf spark.driver.bindAddress=172.17.0.4 --deploy-mode client --properties-file /opt/spark/conf/spark.properties --class org.apache.spark.deploy.RRunner local:///opt/spark/examples/src/main/r/dataframe.R 20/11/10 06:03:58 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable log4j:WARN No appenders could be found for logger (io.netty.util.internal.logging.InternalLoggerFactory). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. Error: package or namespace load failed for ‘SparkR’ in rbind(info, getNamespaceInfo(env, "S3methods")): number of columns of matrices must match (see arg 2) In addition: Warning message: package ‘SparkR’ was built under R version 4.0.2 Execution halted ``` In addition, this PR aims to recover the test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass K8S IT Jenkins job. Closes #30130 from dongjoon-hyun/SPARK-32354. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .../src/main/dockerfiles/spark/bindings/R/Dockerfile | 9 ++++++++- .../deploy/k8s/integrationtest/KubernetesSuite.scala | 3 +-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile index 59f375b707ca7..bd645e40677d0 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile @@ -25,7 +25,14 @@ USER 0 RUN mkdir ${SPARK_HOME}/R -RUN apt-get update && apt install -y r-base r-base-dev && rm -rf /var/cache/apt/* +# Install R 3.6.3 (http://cloud.r-project.org/bin/linux/debian/) +RUN \ + echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran35/" >> /etc/apt/sources.list && \ + apt install -y gnupg && \ + apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' && \ + apt-get update && \ + apt install -y -t buster-cran35 r-base r-base-dev && \ + rm -rf /var/cache/apt/* COPY R ${SPARK_HOME}/R ENV R_HOME /usr/lib/R diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 28ab37152cf4c..f1d8217e31b71 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -43,8 +43,7 @@ import org.apache.spark.internal.config._ class KubernetesSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite - // TODO(SPARK-32354): Fix and re-enable the R tests. - with DepsTestsSuite with DecommissionSuite /* with RTestsSuite */ with Logging with Eventually + with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging with Eventually with Matchers { From 6d31daeb6a2c5607ffe3b23ffb381626ad57f576 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Thu, 12 Nov 2020 08:50:32 +0000 Subject: [PATCH 0454/1009] [SPARK-33386][SQL] Accessing array elements in ElementAt/Elt/GetArrayItem should failed if index is out of bound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Instead of returning NULL, throws runtime ArrayIndexOutOfBoundsException when ansiMode is enable for `element_at`,`elt`, `GetArrayItem` functions. ### Why are the changes needed? For ansiMode. ### Does this PR introduce any user-facing change? When `spark.sql.ansi.enabled` = true, Spark will throw `ArrayIndexOutOfBoundsException` if out-of-range index when accessing array elements ### How was this patch tested? Added UT and existing UT. Closes #30297 from leanken/leanken-SPARK-33386. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 9 +- .../sql/catalyst/analysis/TypeCoercion.scala | 4 +- .../expressions/ProjectionOverSchema.scala | 6 +- .../catalyst/expressions/SelectedField.scala | 2 +- .../expressions/collectionOperations.scala | 53 ++-- .../expressions/complexTypeExtractors.scala | 67 +++-- .../expressions/stringExpressions.scala | 33 ++- .../sql/catalyst/optimizer/ComplexTypes.scala | 2 +- .../apache/spark/sql/internal/SQLConf.scala | 7 +- .../CollectionExpressionsSuite.scala | 136 ++++++---- .../expressions/ComplexTypeSuite.scala | 23 ++ .../expressions/StringExpressionsSuite.scala | 32 ++- .../resources/sql-tests/inputs/ansi/array.sql | 1 + .../test/resources/sql-tests/inputs/array.sql | 12 + .../sql-tests/results/ansi/array.sql.out | 234 ++++++++++++++++++ .../resources/sql-tests/results/array.sql.out | 67 ++++- 16 files changed, 584 insertions(+), 104 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index d6e99312bb66e..c2b36033e318e 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -110,7 +110,14 @@ SELECT * FROM t; ### SQL Functions The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - - `size`: This function returns null for null input under ANSI mode. + - `size`: This function returns null for null input. + - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + +### SQL Operators + +The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`). + - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. ### SQL Keywords diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index becdef8b9c603..e8dab28b5e907 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -840,8 +840,8 @@ object TypeCoercion { plan resolveOperators { case p => p transformExpressionsUp { // Skip nodes if unresolved or not enough children - case c @ Elt(children) if !c.childrenResolved || children.size < 2 => c - case c @ Elt(children) => + case c @ Elt(children, _) if !c.childrenResolved || children.size < 2 => c + case c @ Elt(children, _) => val index = children.head val newIndex = ImplicitTypeCasts.implicitCast(index, IntegerType).getOrElse(index) val newInputs = if (conf.eltOutputAsString || diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala index 13c6f8db7c129..6f1d9d065ab1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala @@ -34,8 +34,10 @@ case class ProjectionOverSchema(schema: StructType) { expr match { case a: AttributeReference if fieldNames.contains(a.name) => Some(a.copy(dataType = schema(a.name).dataType)(a.exprId, a.qualifier)) - case GetArrayItem(child, arrayItemOrdinal) => - getProjection(child).map { projection => GetArrayItem(projection, arrayItemOrdinal) } + case GetArrayItem(child, arrayItemOrdinal, failOnError) => + getProjection(child).map { + projection => GetArrayItem(projection, arrayItemOrdinal, failOnError) + } case a: GetArrayStructFields => getProjection(a.child).map(p => (p, p.dataType)).map { case (projection, ArrayType(projSchema @ StructType(_), _)) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala index 7ba3d302d553b..adcc4be10687e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala @@ -119,7 +119,7 @@ object SelectedField { throw new AnalysisException(s"DataType '$x' is not supported by MapKeys.") } selectField(child, opt) - case GetArrayItem(child, _) => + case GetArrayItem(child, _, _) => // GetArrayItem does not select a field from a struct (i.e. prune the struct) so it can't be // the top-level extractor. However it can be part of an extractor chain. val ArrayType(_, containsNull) = child.dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index cb081b80ba096..ee98ebf5a8a50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1906,8 +1906,10 @@ case class ArrayPosition(left: Expression, right: Expression) @ExpressionDescription( usage = """ _FUNC_(array, index) - Returns element of array at given (1-based) index. If index < 0, - accesses elements from the last to the first. Returns NULL if the index exceeds the length - of the array. + accesses elements from the last to the first. The function returns NULL + if the index exceeds the length of the array and `spark.sql.ansi.enabled` is set to false. + If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException + for invalid indices. _FUNC_(map, key) - Returns value for given key, or NULL if the key is not contained in the map """, @@ -1919,9 +1921,14 @@ case class ArrayPosition(left: Expression, right: Expression) b """, since = "2.4.0") -case class ElementAt(left: Expression, right: Expression) +case class ElementAt( + left: Expression, + right: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant { + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) + @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType @transient private lazy val arrayContainsNull = left.dataType.asInstanceOf[ArrayType].containsNull @@ -1969,7 +1976,7 @@ case class ElementAt(left: Expression, right: Expression) if (ordinal == 0) { false } else if (elements.length < math.abs(ordinal)) { - true + !failOnError } else { if (ordinal < 0) { elements(elements.length + ordinal).nullable @@ -1979,24 +1986,9 @@ case class ElementAt(left: Expression, right: Expression) } } - override def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = { - if (ordinal.foldable && !ordinal.nullable) { - val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue() - child match { - case CreateArray(ar, _) => - nullability(ar, intOrdinal) - case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) => - nullability(elements, intOrdinal) || field.nullable - case _ => - true - } - } else { - true - } - } - override def nullable: Boolean = left.dataType match { - case _: ArrayType => computeNullabilityFromArray(left, right) + case _: ArrayType => + computeNullabilityFromArray(left, right, failOnError, nullability) case _: MapType => true } @@ -2008,7 +2000,12 @@ case class ElementAt(left: Expression, right: Expression) val array = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Int] if (array.numElements() < math.abs(index)) { - null + if (failOnError) { + throw new ArrayIndexOutOfBoundsException( + s"Invalid index: $index, numElements: ${array.numElements()}") + } else { + null + } } else { val idx = if (index == 0) { throw new ArrayIndexOutOfBoundsException("SQL array indices start at 1") @@ -2042,10 +2039,20 @@ case class ElementAt(left: Expression, right: Expression) } else { "" } + + val indexOutOfBoundBranch = if (failOnError) { + s"""throw new ArrayIndexOutOfBoundsException( + | "Invalid index: " + $index + ", numElements: " + $eval1.numElements() + |); + """.stripMargin + } else { + s"${ev.isNull} = true;" + } + s""" |int $index = (int) $eval2; |if ($eval1.numElements() < Math.abs($index)) { - | ${ev.isNull} = true; + | $indexOutOfBoundBranch |} else { | if ($index == 0) { | throw new ArrayIndexOutOfBoundsException("SQL array indices start at 1"); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 60afe140960cc..363d388692c9f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} import org.apache.spark.sql.catalyst.util.{quoteIdentifier, ArrayData, GenericArrayData, MapData, TypeUtils} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -222,10 +223,15 @@ case class GetArrayStructFields( * * We need to do type checking here as `ordinal` expression maybe unresolved. */ -case class GetArrayItem(child: Expression, ordinal: Expression) +case class GetArrayItem( + child: Expression, + ordinal: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue with NullIntolerant { + def this(child: Expression, ordinal: Expression) = this(child, ordinal, SQLConf.get.ansiEnabled) + // We have done type checking for child in `ExtractValue`, so only need to check the `ordinal`. override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegralType) @@ -234,13 +240,29 @@ case class GetArrayItem(child: Expression, ordinal: Expression) override def left: Expression = child override def right: Expression = ordinal - override def nullable: Boolean = computeNullabilityFromArray(left, right) + override def nullable: Boolean = + computeNullabilityFromArray(left, right, failOnError, nullability) override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType + private def nullability(elements: Seq[Expression], ordinal: Int): Boolean = { + if (ordinal >= 0 && ordinal < elements.length) { + elements(ordinal).nullable + } else { + !failOnError + } + } + protected override def nullSafeEval(value: Any, ordinal: Any): Any = { val baseValue = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Number].intValue() - if (index >= baseValue.numElements() || index < 0 || baseValue.isNullAt(index)) { + if (index >= baseValue.numElements() || index < 0) { + if (failOnError) { + throw new ArrayIndexOutOfBoundsException( + s"Invalid index: $index, numElements: ${baseValue.numElements()}") + } else { + null + } + } else if (baseValue.isNullAt(index)) { null } else { baseValue.get(index, dataType) @@ -251,15 +273,28 @@ case class GetArrayItem(child: Expression, ordinal: Expression) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val index = ctx.freshName("index") val nullCheck = if (child.dataType.asInstanceOf[ArrayType].containsNull) { - s" || $eval1.isNullAt($index)" + s"""else if ($eval1.isNullAt($index)) { + ${ev.isNull} = true; + } + """ } else { "" } + + val indexOutOfBoundBranch = if (failOnError) { + s"""throw new ArrayIndexOutOfBoundsException( + | "Invalid index: " + $index + ", numElements: " + $eval1.numElements() + |); + """.stripMargin + } else { + s"${ev.isNull} = true;" + } + s""" final int $index = (int) $eval2; - if ($index >= $eval1.numElements() || $index < 0$nullCheck) { - ${ev.isNull} = true; - } else { + if ($index >= $eval1.numElements() || $index < 0) { + $indexOutOfBoundBranch + } $nullCheck else { ${ev.value} = ${CodeGenerator.getValue(eval1, dataType, index)}; } """ @@ -273,20 +308,24 @@ case class GetArrayItem(child: Expression, ordinal: Expression) trait GetArrayItemUtil { /** `Null` is returned for invalid ordinals. */ - protected def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = { + protected def computeNullabilityFromArray( + child: Expression, + ordinal: Expression, + failOnError: Boolean, + nullability: (Seq[Expression], Int) => Boolean): Boolean = { + val arrayContainsNull = child.dataType.asInstanceOf[ArrayType].containsNull if (ordinal.foldable && !ordinal.nullable) { val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue() child match { - case CreateArray(ar, _) if intOrdinal < ar.length => - ar(intOrdinal).nullable - case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) - if intOrdinal < elements.length => - elements(intOrdinal).nullable || field.nullable + case CreateArray(ar, _) => + nullability(ar, intOrdinal) + case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) => + nullability(elements, intOrdinal) || field.nullable case _ => true } } else { - true + if (failOnError) arrayContainsNull else true } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 1fe990207160c..16e22940495f1 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.types.{ByteArray, UTF8String} @@ -231,7 +232,12 @@ case class ConcatWs(children: Seq[Expression]) */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(n, input1, input2, ...) - Returns the `n`-th input, e.g., returns `input2` when `n` is 2.", + usage = """ + _FUNC_(n, input1, input2, ...) - Returns the `n`-th input, e.g., returns `input2` when `n` is 2. + The function returns NULL if the index exceeds the length of the array + and `spark.sql.ansi.enabled` is set to false. If `spark.sql.ansi.enabled` is set to true, + it throws ArrayIndexOutOfBoundsException for invalid indices. + """, examples = """ Examples: > SELECT _FUNC_(1, 'scala', 'java'); @@ -239,7 +245,11 @@ case class ConcatWs(children: Seq[Expression]) """, since = "2.0.0") // scalastyle:on line.size.limit -case class Elt(children: Seq[Expression]) extends Expression { +case class Elt( + children: Seq[Expression], + failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression { + + def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled) private lazy val indexExpr = children.head private lazy val inputExprs = children.tail.toArray @@ -275,7 +285,12 @@ case class Elt(children: Seq[Expression]) extends Expression { } else { val index = indexObj.asInstanceOf[Int] if (index <= 0 || index > inputExprs.length) { - null + if (failOnError) { + throw new ArrayIndexOutOfBoundsException( + s"Invalid index: $index, numElements: ${inputExprs.length}") + } else { + null + } } else { inputExprs(index - 1).eval(input) } @@ -323,6 +338,17 @@ case class Elt(children: Seq[Expression]) extends Expression { """.stripMargin }.mkString) + val indexOutOfBoundBranch = if (failOnError) { + s""" + |if (!$indexMatched) { + | throw new ArrayIndexOutOfBoundsException( + | "Invalid index: " + ${index.value} + ", numElements: " + ${inputExprs.length}); + |} + """.stripMargin + } else { + "" + } + ev.copy( code""" |${index.code} @@ -332,6 +358,7 @@ case class Elt(children: Seq[Expression]) extends Expression { |do { | $codes |} while (false); + |$indexOutOfBoundBranch |final ${CodeGenerator.javaType(dataType)} ${ev.value} = $inputVal; |final boolean ${ev.isNull} = ${ev.value} == null; """.stripMargin) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala index 2ac8f62b67b3d..7a21ce254a235 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala @@ -61,7 +61,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] { CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))), useStringTypeWhenEmpty) // Remove redundant map lookup. - case ga @ GetArrayItem(CreateArray(elems, _), IntegerLiteral(idx)) => + case ga @ GetArrayItem(CreateArray(elems, _), IntegerLiteral(idx), _) => // Instead of creating the array and then selecting one row, remove array creation // altogether. if (idx >= 0 && idx < elems.size) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 21357a492e39e..ef988052affcd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2144,9 +2144,10 @@ object SQLConf { val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " + - "throw a runtime exception if an overflow occurs in any operation on integral/decimal " + - "field. 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " + - "the SQL parser.") + "throw an exception at runtime if the inputs to a SQL operator/function are invalid, " + + "e.g. overflow in arithmetic operations, out-of-range index when accessing array elements. " + + "2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " + + "the SQL parser. 3. Spark will return NULL for null input for function `size`.") .version("3.0.0") .booleanConf .createWithDefault(false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index d59d13d49cef4..6ee88c9eaef86 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1118,58 +1118,62 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } test("correctly handles ElementAt nullability for arrays") { - // CreateArray case - val a = AttributeReference("a", IntegerType, nullable = false)() - val b = AttributeReference("b", IntegerType, nullable = true)() - val array = CreateArray(a :: b :: Nil) - assert(!ElementAt(array, Literal(1)).nullable) - assert(!ElementAt(array, Literal(-2)).nullable) - assert(ElementAt(array, Literal(2)).nullable) - assert(ElementAt(array, Literal(-1)).nullable) - assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable) - assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable) - - // CreateArray case invalid indices - assert(!ElementAt(array, Literal(0)).nullable) - assert(ElementAt(array, Literal(4)).nullable) - assert(ElementAt(array, Literal(-4)).nullable) - - // GetArrayStructFields case - val f1 = StructField("a", IntegerType, nullable = false) - val f2 = StructField("b", IntegerType, nullable = true) - val structType = StructType(f1 :: f2 :: Nil) - val c = AttributeReference("c", structType, nullable = false)() - val inputArray1 = CreateArray(c :: Nil) - val inputArray1ContainsNull = c.nullable - val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull) - assert(!ElementAt(stArray1, Literal(1)).nullable) - assert(!ElementAt(stArray1, Literal(-1)).nullable) - val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull) - assert(ElementAt(stArray2, Literal(1)).nullable) - assert(ElementAt(stArray2, Literal(-1)).nullable) - - val d = AttributeReference("d", structType, nullable = true)() - val inputArray2 = CreateArray(c :: d :: Nil) - val inputArray2ContainsNull = c.nullable || d.nullable - val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull) - assert(!ElementAt(stArray3, Literal(1)).nullable) - assert(!ElementAt(stArray3, Literal(-2)).nullable) - assert(ElementAt(stArray3, Literal(2)).nullable) - assert(ElementAt(stArray3, Literal(-1)).nullable) - val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull) - assert(ElementAt(stArray4, Literal(1)).nullable) - assert(ElementAt(stArray4, Literal(-2)).nullable) - assert(ElementAt(stArray4, Literal(2)).nullable) - assert(ElementAt(stArray4, Literal(-1)).nullable) - - // GetArrayStructFields case invalid indices - assert(!ElementAt(stArray3, Literal(0)).nullable) - assert(ElementAt(stArray3, Literal(4)).nullable) - assert(ElementAt(stArray3, Literal(-4)).nullable) - - assert(ElementAt(stArray4, Literal(0)).nullable) - assert(ElementAt(stArray4, Literal(4)).nullable) - assert(ElementAt(stArray4, Literal(-4)).nullable) + Seq(true, false).foreach { ansiEnabled => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + // CreateArray case + val a = AttributeReference("a", IntegerType, nullable = false)() + val b = AttributeReference("b", IntegerType, nullable = true)() + val array = CreateArray(a :: b :: Nil) + assert(!ElementAt(array, Literal(1)).nullable) + assert(!ElementAt(array, Literal(-2)).nullable) + assert(ElementAt(array, Literal(2)).nullable) + assert(ElementAt(array, Literal(-1)).nullable) + assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable) + assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable) + + // CreateArray case invalid indices + assert(!ElementAt(array, Literal(0)).nullable) + assert(ElementAt(array, Literal(4)).nullable == !ansiEnabled) + assert(ElementAt(array, Literal(-4)).nullable == !ansiEnabled) + + // GetArrayStructFields case + val f1 = StructField("a", IntegerType, nullable = false) + val f2 = StructField("b", IntegerType, nullable = true) + val structType = StructType(f1 :: f2 :: Nil) + val c = AttributeReference("c", structType, nullable = false)() + val inputArray1 = CreateArray(c :: Nil) + val inputArray1ContainsNull = c.nullable + val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull) + assert(!ElementAt(stArray1, Literal(1)).nullable) + assert(!ElementAt(stArray1, Literal(-1)).nullable) + val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull) + assert(ElementAt(stArray2, Literal(1)).nullable) + assert(ElementAt(stArray2, Literal(-1)).nullable) + + val d = AttributeReference("d", structType, nullable = true)() + val inputArray2 = CreateArray(c :: d :: Nil) + val inputArray2ContainsNull = c.nullable || d.nullable + val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull) + assert(!ElementAt(stArray3, Literal(1)).nullable) + assert(!ElementAt(stArray3, Literal(-2)).nullable) + assert(ElementAt(stArray3, Literal(2)).nullable) + assert(ElementAt(stArray3, Literal(-1)).nullable) + val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull) + assert(ElementAt(stArray4, Literal(1)).nullable) + assert(ElementAt(stArray4, Literal(-2)).nullable) + assert(ElementAt(stArray4, Literal(2)).nullable) + assert(ElementAt(stArray4, Literal(-1)).nullable) + + // GetArrayStructFields case invalid indices + assert(!ElementAt(stArray3, Literal(0)).nullable) + assert(ElementAt(stArray3, Literal(4)).nullable == !ansiEnabled) + assert(ElementAt(stArray3, Literal(-4)).nullable == !ansiEnabled) + + assert(ElementAt(stArray4, Literal(0)).nullable) + assert(ElementAt(stArray4, Literal(4)).nullable) + assert(ElementAt(stArray4, Literal(-4)).nullable) + } + } } test("Concat") { @@ -1883,4 +1887,32 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal(stringToInterval("interval 1 year"))), Seq(Date.valueOf("2018-01-01"))) } + + test("SPARK-33386: element_at ArrayIndexOutOfBoundsException") { + Seq(true, false).foreach { ansiEnabled => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + val array = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType)) + var expr: Expression = ElementAt(array, Literal(5)) + if (ansiEnabled) { + val errMsg = "Invalid index: 5, numElements: 3" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + + expr = ElementAt(array, Literal(-5)) + if (ansiEnabled) { + val errMsg = "Invalid index: -5, numElements: 3" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + + // SQL array indices start at 1 exception throws for both mode. + expr = ElementAt(array, Literal(0)) + val errMsg = "SQL array indices start at 1" + checkExceptionInExpression[Exception](expr, errMsg) + } + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 38e32ff2518f7..67ab2071de037 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -62,6 +62,29 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(GetArrayItem(nestedArray, Literal(0)), Seq(1)) } + test("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException") { + Seq(true, false).foreach { ansiEnabled => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + val array = Literal.create(Seq("a", "b"), ArrayType(StringType)) + + if (ansiEnabled) { + checkExceptionInExpression[Exception]( + GetArrayItem(array, Literal(5)), + "Invalid index: 5, numElements: 2" + ) + + checkExceptionInExpression[Exception]( + GetArrayItem(array, Literal(-1)), + "Invalid index: -1, numElements: 2" + ) + } else { + checkEvaluation(GetArrayItem(array, Literal(5)), null) + checkEvaluation(GetArrayItem(array, Literal(-1)), null) + } + } + } + } + test("SPARK-26637 handles GetArrayItem nullability correctly when input array size is constant") { // CreateArray case val a = AttributeReference("a", IntegerType, nullable = false)() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 967ccc42c632d..a1b6cec24f23f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -968,4 +968,34 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { GenerateUnsafeProjection.generate( Sentences(Literal("\"quote"), Literal("\"quote"), Literal("\"quote")) :: Nil) } + + test("SPARK-33386: elt ArrayIndexOutOfBoundsException") { + Seq(true, false).foreach { ansiEnabled => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + var expr: Expression = Elt(Seq(Literal(4), Literal("123"), Literal("456"))) + if (ansiEnabled) { + val errMsg = "Invalid index: 4, numElements: 2" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + + expr = Elt(Seq(Literal(0), Literal("123"), Literal("456"))) + if (ansiEnabled) { + val errMsg = "Invalid index: 0, numElements: 2" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + + expr = Elt(Seq(Literal(-1), Literal("123"), Literal("456"))) + if (ansiEnabled) { + val errMsg = "Invalid index: -1, numElements: 2" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + } + } + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql new file mode 100644 index 0000000000000..662756cbfb0b0 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql @@ -0,0 +1 @@ +--IMPORT array.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql index 984321ab795fc..f73b653659eb4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/array.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql @@ -90,3 +90,15 @@ select size(date_array), size(timestamp_array) from primitive_arrays; + +-- index out of range for array elements +select element_at(array(1, 2, 3), 5); +select element_at(array(1, 2, 3), -5); +select element_at(array(1, 2, 3), 0); + +select elt(4, '123', '456'); +select elt(0, '123', '456'); +select elt(-1, '123', '456'); + +select array(1, 2, 3)[5]; +select array(1, 2, 3)[-1]; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out new file mode 100644 index 0000000000000..12a77e36273fa --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -0,0 +1,234 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 20 + + +-- !query +create temporary view data as select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c) +-- !query schema +struct<> +-- !query output + + + +-- !query +select * from data +-- !query schema +struct,c:array>> +-- !query output +one [11,12,13] [[111,112,113],[121,122,123]] +two [21,22,23] [[211,212,213],[221,222,223]] + + +-- !query +select a, b[0], b[0] + b[1] from data +-- !query schema +struct +-- !query output +one 11 23 +two 21 43 + + +-- !query +select a, c[0][0] + c[0][0 + 1] from data +-- !query schema +struct +-- !query output +one 223 +two 423 + + +-- !query +create temporary view primitive_arrays as select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +) +-- !query schema +struct<> +-- !query output + + + +-- !query +select * from primitive_arrays +-- !query schema +struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,date_array:array,timestamp_array:array> +-- !query output +[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00,2016-11-12 20:54:00] + + +-- !query +select + array_contains(boolean_array, true), array_contains(boolean_array, false), + array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y), + array_contains(smallint_array, 2S), array_contains(smallint_array, 0S), + array_contains(int_array, 2), array_contains(int_array, 0), + array_contains(bigint_array, 2L), array_contains(bigint_array, 0L), + array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1), + array_contains(double_array, 2.0D), array_contains(double_array, 0.0D), + array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)), + array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'), + array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000') +from primitive_arrays +-- !query schema +struct +-- !query output +true false true false true false true false true false true false true false true false true false true false + + +-- !query +select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data +-- !query schema +struct +-- !query output +false false +true true + + +-- !query +select + sort_array(boolean_array), + sort_array(tinyint_array), + sort_array(smallint_array), + sort_array(int_array), + sort_array(bigint_array), + sort_array(decimal_array), + sort_array(double_array), + sort_array(float_array), + sort_array(date_array), + sort_array(timestamp_array) +from primitive_arrays +-- !query schema +struct,sort_array(tinyint_array, true):array,sort_array(smallint_array, true):array,sort_array(int_array, true):array,sort_array(bigint_array, true):array,sort_array(decimal_array, true):array,sort_array(double_array, true):array,sort_array(float_array, true):array,sort_array(date_array, true):array,sort_array(timestamp_array, true):array> +-- !query output +[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00,2016-11-15 20:54:00] + + +-- !query +select sort_array(array('b', 'd'), '1') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7 + + +-- !query +select sort_array(array('b', 'd'), cast(NULL as boolean)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7 + + +-- !query +select + size(boolean_array), + size(tinyint_array), + size(smallint_array), + size(int_array), + size(bigint_array), + size(decimal_array), + size(double_array), + size(float_array), + size(date_array), + size(timestamp_array) +from primitive_arrays +-- !query schema +struct +-- !query output +1 2 2 2 2 2 2 2 2 2 + + +-- !query +select element_at(array(1, 2, 3), 5) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: 5, numElements: 3 + + +-- !query +select element_at(array(1, 2, 3), -5) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: -5, numElements: 3 + + +-- !query +select element_at(array(1, 2, 3), 0) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +SQL array indices start at 1 + + +-- !query +select elt(4, '123', '456') +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: 4, numElements: 2 + + +-- !query +select elt(0, '123', '456') +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: 0, numElements: 2 + + +-- !query +select elt(-1, '123', '456') +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: -1, numElements: 2 + + +-- !query +select array(1, 2, 3)[5] +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: 5, numElements: 3 + + +-- !query +select array(1, 2, 3)[-1] +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +Invalid index: -1, numElements: 3 diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out index 2c2b1a7856304..9bf0d89ed71fe 100644 --- a/sql/core/src/test/resources/sql-tests/results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 12 +-- Number of queries: 20 -- !query @@ -160,3 +160,68 @@ from primitive_arrays struct -- !query output 1 2 2 2 2 2 2 2 2 2 + + +-- !query +select element_at(array(1, 2, 3), 5) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select element_at(array(1, 2, 3), -5) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select element_at(array(1, 2, 3), 0) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +SQL array indices start at 1 + + +-- !query +select elt(4, '123', '456') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select elt(0, '123', '456') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select elt(-1, '123', '456') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select array(1, 2, 3)[5] +-- !query schema +struct +-- !query output +NULL + + +-- !query +select array(1, 2, 3)[-1] +-- !query schema +struct +-- !query output +NULL From 4335af075a8ad27c4906f03ae5f8cd8f9a754e5a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 12 Nov 2020 18:53:06 +0900 Subject: [PATCH 0455/1009] [MINOR][DOC] spark.executor.memoryOverhead is not cluster-mode only ### What changes were proposed in this pull request? Remove "in cluster mode" from the description of `spark.executor.memoryOverhead` ### Why are the changes needed? fix correctness issue in documentaion ### Does this PR introduce _any_ user-facing change? yes, users may not get confused about the description `spark.executor.memoryOverhead` ### How was this patch tested? pass GA doc generation Closes #30311 from yaooqinn/minordoc. Authored-by: Kent Yao Signed-off-by: Takeshi Yamamuro --- .../scala/org/apache/spark/internal/config/package.scala | 4 ++-- docs/configuration.md | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 6239ef0491a6f..2bb1290963f87 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -302,8 +302,8 @@ package object config { .createWithDefaultString("1g") private[spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.executor.memoryOverhead") - .doc("The amount of non-heap memory to be allocated per executor in cluster mode, " + - "in MiB unless otherwise specified.") + .doc("The amount of non-heap memory to be allocated per executor, in MiB unless otherwise" + + " specified.") .version("2.3.0") .bytesConf(ByteUnit.MiB) .createOptional diff --git a/docs/configuration.md b/docs/configuration.md index aab18f23a083f..d4738f1c363f0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -274,10 +274,9 @@ of the most common options to set are: spark.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 - Amount of additional memory to be allocated per executor process in cluster mode, in MiB unless - otherwise specified. This is memory that accounts for things like VM overheads, interned strings, - other native overheads, etc. This tends to grow with the executor size (typically 6-10%). - This option is currently supported on YARN and Kubernetes. + Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. + This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. + This tends to grow with the executor size (typically 6-10%). This option is currently supported on YARN and Kubernetes.
    Note: Additional memory includes PySpark executor memory (when spark.executor.pyspark.memory is not configured) and memory used by other From a2887164bcca152e2402169bf6991c7dfb3ac11c Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Thu, 12 Nov 2020 19:14:07 +0800 Subject: [PATCH 0456/1009] [SPARK-32907][ML][PYTHON] adaptively blockify instances - LinearSVC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? 1, use `maxBlockSizeInMB` instead of `blockSize`(#rows) to control the stacking of vectors; 2, infer an appropriate `maxBlockSizeInMB` if set 0; ### Why are the changes needed? the performance gain is mainly related to the nnz of block. f2jBLAS |   |   |   |   |   |   |   |   |   |   |   |   |   -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128 epsilon(100%) | 326481 | 26143 | 25710 | 24726 | 25395 | 25840 | 26846 | 25927 | 27431 | 26190 | 26056 | 26347 | 27204 epsilon3000(67%) | 455247 | 35893 | 34366 | 34985 | 38387 | 38901 | 40426 | 40044 | 39161 | 38767 | 39965 | 39523 | 39108 epsilon4000(50%) | 306390 | 42256 | 41164 | 43748 | 48638 | 50892 | 50986 | 51091 | 51072 | 51289 | 51652 | 53312 | 52146 epsilon5000(40%) | 307619 | 43639 | 42992 | 44743 | 50800 | 51939 | 51871 | 52190 | 53850 | 52607 | 51062 | 52509 | 51570 epsilon10000(20%) | 310070 | 58371 | 55921 | 56317 | 56618 | 53694 | 52131 | 51768 | 51728 | 52233 | 51881 | 51653 | 52440 epsilon20000(10%) | 316565 | 109193 | 95121 | 82764 | 69653 | 60764 | 56066 | 53371 | 52822 | 52872 | 52769 | 52527 | 53508 epsilon200000(1%) | 336181 | 1569721 | 1069355 | 673718 | 375043 | 218230 | 145393 | 110926 | 94327 | 87039 | 83926 | 81890 | 81787   |   |   |   |   |   |   |   |   |   |   |   |   |     |   |   |   |   |   |   |   |   |   |   |   |   |     | Speedup |   |   |   |   |   |   |   |   |   |   |   |   epsilon(100%) | 1 | 12.48827602 | 12.69859977 | **13.20395535** | 12.85611341 | 12.63471362 | 12.16125307 | 12.59231689 | 11.90189931 | 12.46586483 | 12.5299739 | 12.39158158 | 12.00121306 epsilon3000(67%) | 1 | 12.68344803 | **13.2470174** | 13.01263399 | 11.85940553 | 11.70270687 | 11.26124276 | 11.36866946 | 11.62500958 | 11.74315784 | 11.39114225 | 11.51853351 | 11.64076404 epsilon4000(50%) | 1 | 7.250804619 | **7.443154212** | 7.003520161 | 6.299395534 | 6.020396133 | 6.00929667 | 5.996946625 | 5.999177632 | 5.973795551 | 5.931812902 | 5.747111345 | 5.875618456 epsilon5000(40%) | 1 | 7.049176196 | **7.155261444** | 6.875243055 | 6.055492126 | 5.92269778 | 5.930462108 | 5.894213451 | 5.712516249 | 5.847491779 | 6.024421292 | 5.858405226 | 5.965076595 epsilon10000(20%) | 1 | 5.312055644 | 5.544786395 | 5.505797539 | 5.4765269 | 5.774760681 | 5.947900481 | 5.98960748 | 5.994239097 | 5.93628549 | 5.976561747 | **6.002942714** | 5.912852784 epsilon20000(10%) | 1 | 2.899132728 | 3.328024306 | 3.824911797 | 4.544886796 | 5.209745902 | 5.64629187 | 5.931404695 | 5.993052137 | 5.987384627 | 5.999071425 | **6.026710073** | 5.916218136 epsilon200000(1%) | 1 | 0.214166084 | 0.314377358 | 0.498993644 | 0.896379882 | 1.540489392 | 2.312222734 | 3.03067811 | 3.563995463 | 3.862417997 | 4.005683578 | 4.105275369 | **4.110445425** OpenBLAS |   |   |   |   |   |   |   |   |   |   |   |   |   -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128 epsilon(100%) | 299119 | 26047 | 25049 | 25239 | 28001 | 35138 | 36438 | 36279 | 36114 | 35111 | 35428 | 36295 | 35197 epsilon3000(67%) | 439798 | 33321 | 34423 | 34336 | 38906 | 51756 | 54138 | 54085 | 53412 | 54766 | 54425 | 54221 | 54842 epsilon4000(50%) | 302963 | 42960 | 40678 | 43483 | 48254 | 50888 | 54990 | 52647 | 51947 | 51843 | 52891 | 53410 | 52020 epsilon5000(40%) | 303569 | 44225 | 44961 | 45065 | 51768 | 52776 | 51930 | 53587 | 53104 | 51833 | 52138 | 52574 | 53756 epsilon10000(20%) | 307403 | 58447 | 55993 | 56757 | 56694 | 54038 | 52734 | 52073 | 52051 | 52150 | 51986 | 52407 | 52390 epsilon20000(10%) | 313344 | 107580 | 94679 | 83329 | 70226 | 60996 | 57130 | 55461 | 54641 | 52712 | 52541 | 53101 | 53312 epsilon200000(1%) | 334679 | 1642726 | 1073148 | 654481 | 364974 | 213881 | 140248 | 107579 | 91757 | 85090 | 81940 | 80492 | 80250   |   |   |   |   |   |   |   |   |   |   |   |   |     |   |   |   |   |   |   |   |   |   |   |   |   |     | Speedup |   |   |   |   |   |   |   |   |   |   |   |   epsilon(100%) | 1 | 11.48381771 | **11.94135494** | 11.85146004 | 10.68243991 | 8.512692811 | 8.208985125 | 8.244962651 | 8.282632774 | 8.519238985 | 8.443011178 | 8.241328007 | 8.498423161 epsilon3000(67%) | 1 | 13.19882356 | 12.7762833 | **12.80865564** | 11.30411762 | 8.497526857 | 8.123646976 | 8.131607655 | 8.234067251 | 8.030493372 | 8.080808452 | 8.111211523 | 8.01936472 epsilon4000(50%) | 1 | 7.052211359 | **7.44783421** | 6.967389555 | 6.278505409 | 5.953525389 | 5.509419895 | 5.754610899 | 5.832155851 | 5.843855487 | 5.728063376 | 5.672402172 | 5.823971549 epsilon5000(40%) | 1 | **6.86419446** | 6.751829363 | 6.736247642 | 5.864027971 | 5.752027437 | 5.845734643 | 5.664974714 | 5.716499699 | 5.856674319 | 5.822413595 | 5.774127896 | 5.647164968 epsilon10000(20%) | 1 | 5.259517169 | 5.490025539 | 5.416124883 | 5.422143437 | 5.688645028 | 5.829313157 | 5.903308816 | 5.905803923 | 5.894592522 | **5.913188166** | 5.865685882 | 5.867589235 epsilon20000(10%) | 1 | 2.912660346 | 3.309540658 | 3.760323537 | 4.461937174 | 5.137123746 | 5.48475407 | 5.649807973 | 5.734594901 | 5.944452876 | **5.963799699** | 5.900905821 | 5.87755102 epsilon200000(1%) | 1 | 0.203733915 | 0.311866583 | 0.511365494 | 0.916994087 | 1.564790701 | 2.38633706 | 3.111006795 | 3.647449241 | 3.933235398 | 4.084439834 | 4.157916315 | **4.170454829** ### Does this PR introduce _any_ user-facing change? yes, param `blockSize` -> `blockSizeInMB` in master ### How was this patch tested? added testsuites and performance test (result attached in [ticket](https://issues.apache.org/jira/browse/SPARK-32907)) Closes #30009 from zhengruifeng/adaptively_blockify_linear_svc_II. Lead-authored-by: zhengruifeng Co-authored-by: Weichen Xu Signed-off-by: Weichen Xu --- .../spark/ml/classification/LinearSVC.scala | 93 ++++++------------- .../apache/spark/ml/feature/Instance.scala | 71 ++++++++++++++ .../ml/param/shared/SharedParamsCodeGen.scala | 7 +- .../spark/ml/param/shared/sharedParams.scala | 18 ++++ .../ml/classification/LinearSVCSuite.scala | 4 +- .../spark/ml/feature/InstanceSuite.scala | 54 +++++++++++ python/pyspark/ml/classification.py | 26 +++--- python/pyspark/ml/classification.pyi | 9 +- .../ml/param/_shared_params_code_gen.py | 6 +- python/pyspark/ml/param/shared.py | 18 ++++ python/pyspark/ml/param/shared.pyi | 5 + 11 files changed, 224 insertions(+), 87 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 77272c65eb231..95f37671e1399 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel /** Params for linear SVM Classifier. */ private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol - with HasAggregationDepth with HasThreshold with HasBlockSize { + with HasAggregationDepth with HasThreshold with HasMaxBlockSizeInMB { /** * Param for threshold in binary classification prediction. @@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR "threshold in binary classification prediction applied to rawPrediction") setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6, - standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1) + standardization -> true, threshold -> 0.0, aggregationDepth -> 2, maxBlockSizeInMB -> 0.0) } /** @@ -153,22 +153,13 @@ class LinearSVC @Since("2.2.0") ( def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) /** - * Set block size for stacking input data in matrices. - * If blockSize == 1, then stacking will be skipped, and each vector is treated individually; - * If blockSize > 1, then vectors will be stacked to blocks, and high-level BLAS routines - * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV). - * Recommended size is between 10 and 1000. An appropriate choice of the block size depends - * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example, - * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads). - * Note that existing BLAS implementations are mainly optimized for dense matrices, if the - * input dataset is sparse, stacking may bring no performance gain, the worse is possible - * performance regression. - * Default is 1. + * Sets the value of param [[maxBlockSizeInMB]]. + * Default is 0.0. * * @group expertSetParam */ @Since("3.1.0") - def setBlockSize(value: Int): this.type = set(blockSize, value) + def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value) @Since("2.2.0") override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra) @@ -177,19 +168,19 @@ class LinearSVC @Since("2.2.0") ( instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol, - regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize) + regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, + maxBlockSizeInMB) + + if (dataset.storageLevel != StorageLevel.NONE) { + instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " + + s"then cached during training. Be careful of double caching!") + } val instances = extractInstances(dataset) .setName("training instances") - if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) { - instances.persist(StorageLevel.MEMORY_AND_DISK) - } - - var requestedMetrics = Seq("mean", "std", "count") - if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros" val (summarizer, labelSummarizer) = Summarizer - .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics) + .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count")) val histogram = labelSummarizer.histogram val numInvalid = labelSummarizer.countInvalid @@ -199,14 +190,12 @@ class LinearSVC @Since("2.2.0") ( instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString) instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString) instr.logSumOfWeights(summarizer.weightSum) - if ($(blockSize) > 1) { - val scale = 1.0 / summarizer.count / numFeatures - val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum - instr.logNamedValue("sparsity", sparsity.toString) - if (sparsity > 0.5) { - instr.logWarning(s"sparsity of input dataset is $sparsity, " + - s"which may hurt performance in high-level BLAS.") - } + + var actualBlockSizeInMB = $(maxBlockSizeInMB) + if (actualBlockSizeInMB == 0) { + actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB + require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0") + instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString) } val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { @@ -245,12 +234,8 @@ class LinearSVC @Since("2.2.0") ( Note that the intercept in scaled space and original space is the same; as a result, no scaling is needed. */ - val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) { - trainOnRows(instances, featuresStd, regularization, optimizer) - } else { - trainOnBlocks(instances, featuresStd, regularization, optimizer) - } - if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist() + val (rawCoefficients, objectiveHistory) = + trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer) if (rawCoefficients == null) { val msg = s"${optimizer.getClass.getName} failed." @@ -284,35 +269,9 @@ class LinearSVC @Since("2.2.0") ( model.setSummary(Some(summary)) } - private def trainOnRows( - instances: RDD[Instance], - featuresStd: Array[Double], - regularization: Option[L2Regularization], - optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { - val numFeatures = featuresStd.length - val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures - - val bcFeaturesStd = instances.context.broadcast(featuresStd) - val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_) - val costFun = new RDDLossFunction(instances, getAggregatorFunc, - regularization, $(aggregationDepth)) - - val states = optimizer.iterations(new CachedDiffFunction(costFun), - Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector) - - val arrayBuilder = mutable.ArrayBuilder.make[Double] - var state: optimizer.State = null - while (states.hasNext) { - state = states.next() - arrayBuilder += state.adjustedValue - } - bcFeaturesStd.destroy() - - (if (state != null) state.x.toArray else null, arrayBuilder.result) - } - - private def trainOnBlocks( + private def trainImpl( instances: RDD[Instance], + actualBlockSizeInMB: Double, featuresStd: Array[Double], regularization: Option[L2Regularization], optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = { @@ -326,9 +285,11 @@ class LinearSVC @Since("2.2.0") ( val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true) iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) } } - val blocks = InstanceBlock.blokify(standardized, $(blockSize)) + + val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong + val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage) .persist(StorageLevel.MEMORY_AND_DISK) - .setName(s"training blocks (blockSize=${$(blockSize)})") + .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)") val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_) val costFun = new RDDLossFunction(blocks, getAggregatorFunc, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala index db5f88d5dddc8..c237366ec5c3d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala @@ -17,6 +17,8 @@ package org.apache.spark.ml.feature +import scala.collection.mutable + import org.apache.spark.ml.linalg._ import org.apache.spark.rdd.RDD @@ -100,6 +102,32 @@ private[spark] case class InstanceBlock( private[spark] object InstanceBlock { + /** + * Suggested value for BlockSizeInMB in Level-2 routine cases. + * According to performance tests of BLAS routine (see SPARK-31714) and + * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for + * linear models using Level-2 routine (GEMV) to perform prediction and + * gradient computation. + */ + val DefaultBlockSizeInMB = 1.0 + + private def getBlockMemUsage( + numCols: Long, + numRows: Long, + nnz: Long, + allUnitWeight: Boolean): Long = { + val doubleBytes = java.lang.Double.BYTES + val arrayHeader = 12L + val denseSize = Matrices.getDenseSize(numCols, numRows) + val sparseSize = Matrices.getSparseSize(nnz, numRows + 1) + val matrixSize = math.min(denseSize, sparseSize) + if (allUnitWeight) { + matrixSize + doubleBytes * numRows + arrayHeader * 2 + } else { + matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2 + } + } + def fromInstances(instances: Seq[Instance]): InstanceBlock = { val labels = instances.map(_.label).toArray val weights = if (instances.exists(_.weight != 1)) { @@ -114,6 +142,49 @@ private[spark] object InstanceBlock { def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = { instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances)) } + + def blokifyWithMaxMemUsage( + instanceIterator: Iterator[Instance], + maxMemUsage: Long): Iterator[InstanceBlock] = { + require(maxMemUsage > 0) + + new Iterator[InstanceBlock]() { + private var numCols = -1L + + override def hasNext: Boolean = instanceIterator.hasNext + + override def next(): InstanceBlock = { + val buff = mutable.ArrayBuilder.make[Instance] + var buffCnt = 0L + var buffNnz = 0L + var buffUnitWeight = true + var blockMemUsage = 0L + + while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) { + val instance = instanceIterator.next() + if (numCols < 0L) numCols = instance.features.size + require(numCols == instance.features.size) + + buff += instance + buffCnt += 1L + buffNnz += instance.features.numNonzeros + buffUnitWeight &&= (instance.weight == 1) + blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight) + } + + // the block memory usage may slightly exceed threshold, not a big issue. + // and this ensure even if one row exceed block limit, each block has one row. + InstanceBlock.fromInstances(buff.result()) + } + } + } + + def blokifyWithMaxMemUsage( + instances: RDD[Instance], + maxMemUsage: Long): RDD[InstanceBlock] = { + require(maxMemUsage > 0) + instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage)) + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 7fd5f5938b565..0640fe355fdd6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -108,7 +108,12 @@ private[shared] object SharedParamsCodeGen { ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " + "stacked within partitions. If block size is more than remaining data in a partition " + "then it is adjusted to the size of this data.", - isValid = "ParamValidators.gt(0)", isExpertParam = true) + isValid = "ParamValidators.gt(0)", isExpertParam = true), + ParamDesc[Double]("maxBlockSizeInMB", "Maximum memory in MB for stacking input data " + + "into blocks. Data is stacked within partitions. If more than remaining data size in a " + + "partition then it is adjusted to the data size. If 0, try to infer an appropriate " + + "value. Must be >= 0.", + Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true) ) val code = genSharedParams(params) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index 60203eba61ea5..2fbda45a9e97a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -562,4 +562,22 @@ trait HasBlockSize extends Params { /** @group expertGetParam */ final def getBlockSize: Int = $(blockSize) } + +/** + * Trait for shared param maxBlockSizeInMB (default: 0.0). This trait may be changed or + * removed between minor versions. + */ +trait HasMaxBlockSizeInMB extends Params { + + /** + * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.. + * @group expertParam + */ + final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", ParamValidators.gtEq(0.0)) + + setDefault(maxBlockSizeInMB, 0.0) + + /** @group expertGetParam */ + final def getMaxBlockSizeInMB: Double = $(maxBlockSizeInMB) +} // scalastyle:on diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala index a66397324c1a6..d8b9c6a606ec2 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala @@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest { .setFitIntercept(fitIntercept) .setMaxIter(5) val model = lsvc.fit(dataset) - Seq(4, 16, 64).foreach { blockSize => - val model2 = lsvc.setBlockSize(blockSize).fit(dataset) + Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s => + val model2 = lsvc.setMaxBlockSizeInMB(s).fit(dataset) assert(model.intercept ~== model2.intercept relTol 1e-9) assert(model.coefficients ~== model2.coefficients relTol 1e-9) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala index d780bdf5f5dc8..f1e071357bab7 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala @@ -74,4 +74,58 @@ class InstanceSuite extends SparkFunSuite{ } } + test("InstanceBlock: blokify with max memory usage") { + val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)) + val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse) + val instances = Seq(instance1, instance2) + + val blocks = InstanceBlock + .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray + require(blocks.length == 1) + val block = blocks.head + assert(block.size === 2) + assert(block.numFeatures === 2) + block.instanceIterator.zipWithIndex.foreach { + case (instance, i) => + assert(instance.label === instances(i).label) + assert(instance.weight === instances(i).weight) + assert(instance.features.toArray === instances(i).features.toArray) + } + Seq(0, 1).foreach { i => + val nzIter = block.getNonZeroIter(i) + val vec = Vectors.sparse(2, nzIter.toSeq) + assert(vec.toArray === instances(i).features.toArray) + } + + // instances larger than maxMemUsage + val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0))) + InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size + InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size + + // different numFeatures + intercept[IllegalArgumentException] { + InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size + } + + // nnz = 10 + val sparseInstance = Instance(-2.0, 3.0, + Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1))) + + // normally, memory usage of a block does not exceed maxMemUsage too much + val maxMemUsage = 1 << 18 + val mixedIter = Iterator.fill(100)(denseInstance) ++ + Iterator.fill(1000)(sparseInstance) ++ + Iterator.fill(10)(denseInstance) ++ + Iterator.fill(10)(sparseInstance) ++ + Iterator.fill(100)(denseInstance) ++ + Iterator.fill(100)(sparseInstance) + InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage) + .foreach { block => + val doubleBytes = java.lang.Double.BYTES + val arrayHeader = 12L + val blockMemUsage = block.matrix.getSizeInBytes + + (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2 + require(blockMemUsage < maxMemUsage * 1.05) + } + } } diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index d6c861361a248..8f13f3275cb5b 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -26,8 +26,8 @@ from pyspark.ml import Estimator, Predictor, PredictionModel, Model from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \ HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \ - HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \ - HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism + HasAggregationDepth, HasThreshold, HasBlockSize, HasMaxBlockSizeInMB, Param, Params, \ + TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ _TreeEnsembleModel, _RandomForestParams, _GBTParams, \ _HasVarianceImpurity, _TreeClassifierParams @@ -504,7 +504,7 @@ def recallByThreshold(self): class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSize): + HasMaxBlockSizeInMB): """ Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`. @@ -521,7 +521,7 @@ def __init__(self, *args): super(_LinearSVCParams, self).__init__(*args) self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True, standardization=True, threshold=0.0, aggregationDepth=2, - blockSize=1) + maxBlockSizeInMB=0.0) @inherit_doc @@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl LinearSVCModel... >>> model.getThreshold() 0.5 - >>> model.getBlockSize() - 1 + >>> model.getMaxBlockSizeInMB() + 0.0 >>> model.coefficients DenseVector([0.0, -0.2792, -0.1833]) >>> model.intercept @@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSize=1): + aggregationDepth=2, maxBlockSizeInMB=0.0): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSize=1): + aggregationDepth=2, maxBlockSizeInMB=0.0): """ super(LinearSVC, self).__init__() self._java_obj = self._new_java_obj( @@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, - aggregationDepth=2, blockSize=1): + aggregationDepth=2, maxBlockSizeInMB=0.0): """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \ - aggregationDepth=2, blockSize=1): + aggregationDepth=2, maxBlockSizeInMB=0.0): Sets params for Linear SVM Classifier. """ kwargs = self._input_kwargs @@ -694,11 +694,11 @@ def setAggregationDepth(self, value): return self._set(aggregationDepth=value) @since("3.1.0") - def setBlockSize(self, value): + def setMaxBlockSizeInMB(self, value): """ - Sets the value of :py:attr:`blockSize`. + Sets the value of :py:attr:`maxBlockSizeInMB`. """ - return self._set(blockSize=value) + return self._set(maxBlockSizeInMB=value) class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable, diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi index 55afc20a54cb9..9f72d24f63117 100644 --- a/python/pyspark/ml/classification.pyi +++ b/python/pyspark/ml/classification.pyi @@ -26,6 +26,7 @@ from pyspark.ml.base import _PredictorParams from pyspark.ml.param.shared import ( HasAggregationDepth, HasBlockSize, + HasMaxBlockSizeInMB, HasElasticNetParam, HasFitIntercept, HasMaxIter, @@ -172,7 +173,7 @@ class _LinearSVCParams( HasWeightCol, HasAggregationDepth, HasThreshold, - HasBlockSize, + HasMaxBlockSizeInMB, ): threshold: Param[float] def __init__(self, *args: Any) -> None: ... @@ -198,7 +199,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSize: int = ... + maxBlockSizeInMB: float = ... ) -> None: ... def setParams( self, @@ -215,7 +216,7 @@ class LinearSVC( threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ..., - blockSize: int = ... + maxBlockSizeInMB: float = ... ) -> LinearSVC: ... def setMaxIter(self, value: int) -> LinearSVC: ... def setRegParam(self, value: float) -> LinearSVC: ... @@ -225,7 +226,7 @@ class LinearSVC( def setThreshold(self, value: float) -> LinearSVC: ... def setWeightCol(self, value: str) -> LinearSVC: ... def setAggregationDepth(self, value: int) -> LinearSVC: ... - def setBlockSize(self, value: int) -> LinearSVC: ... + def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ... class LinearSVCModel( _JavaClassificationModel[Vector], diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index bc1ea87ad629c..53d26972c4b4a 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -165,7 +165,11 @@ def get$Name(self): None, "TypeConverters.toString"), ("blockSize", "block size for stacking input data in matrices. Data is stacked within " "partitions. If block size is more than remaining data in a partition then it is " - "adjusted to the size of this data.", None, "TypeConverters.toInt")] + "adjusted to the size of this data.", None, "TypeConverters.toInt"), + ("maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is " + + "stacked within partitions. If more than remaining data size in a partition then it " + + "is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", + "0.0", "TypeConverters.toFloat")] code = [] for name, doc, defaultValueStr, typeConverter in shared: diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index 24fb0d3e2554d..cbef7386e2214 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -597,3 +597,21 @@ def getBlockSize(self): Gets the value of blockSize or its default value. """ return self.getOrDefault(self.blockSize) + + +class HasMaxBlockSizeInMB(Params): + """ + Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0. + """ + + maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", typeConverter=TypeConverters.toFloat) + + def __init__(self): + super(HasMaxBlockSizeInMB, self).__init__() + self._setDefault(maxBlockSizeInMB=0.0) + + def getMaxBlockSizeInMB(self): + """ + Gets the value of maxBlockSizeInMB or its default value. + """ + return self.getOrDefault(self.maxBlockSizeInMB) diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi index 5999c0eaa4661..0ff4d544205bc 100644 --- a/python/pyspark/ml/param/shared.pyi +++ b/python/pyspark/ml/param/shared.pyi @@ -185,3 +185,8 @@ class HasBlockSize(Params): blockSize: Param[int] def __init__(self) -> None: ... def getBlockSize(self) -> int: ... + +class HasMaxBlockSizeInMB(Params): + maxBlockSizeInMB: Param[float] + def __init__(self) -> None: ... + def getMaxBlockSizeInMB(self) -> float: ... From a3d2954662831ca9fa6a2b886ca5bd8d81785974 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 12 Nov 2020 20:26:33 +0900 Subject: [PATCH 0457/1009] [SPARK-33421][SQL] Support Greatest and Least in Expression Canonicalize ### What changes were proposed in this pull request? Add `Greatest` and `Least` check in `Canonicalize`. ### Why are the changes needed? The children of both `Greatest` and `Least` are order Irrelevant. Let's say we have `greatest(1, 2)` and `greatest(2, 1)`. We can get the same canonicalized expression in this case. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30330 from ulysses-you/SPARK-33421. Authored-by: ulysses Signed-off-by: HyukjinKwon --- .../catalyst/expressions/Canonicalize.scala | 7 +++++ .../expressions/CanonicalizeSuite.scala | 28 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index ae201359a762c..2765ec7d8a0eb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -109,6 +109,13 @@ object Canonicalize { // order the list in the In operator case In(value, list) if list.length > 1 => In(value, list.sortBy(_.hashCode())) + case g: Greatest => + val newChildren = orderCommutative(g, { case Greatest(children) => children }) + Greatest(newChildren) + case l: Least => + val newChildren = orderCommutative(l, { case Least(children) => children }) + Least(newChildren) + case _ => e } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala index bcbccd93e509f..ac31a68b2b618 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala @@ -142,4 +142,32 @@ class CanonicalizeSuite extends SparkFunSuite { } } } + + test("SPARK-33421: Support Greatest and Least in Expression Canonicalize") { + Seq(Least(_), Greatest(_)).foreach { f => + // test deterministic expr + val expr1 = f(Seq(Literal(1), Literal(2), Literal(3))) + val expr2 = f(Seq(Literal(3), Literal(1), Literal(2))) + val expr3 = f(Seq(Literal(1), Literal(1), Literal(1))) + assert(expr1.canonicalized == expr2.canonicalized) + assert(expr1.canonicalized != expr3.canonicalized) + assert(expr2.canonicalized != expr3.canonicalized) + + // test non-deterministic expr + val randExpr1 = f(Seq(Literal(1), rand(1))) + val randExpr2 = f(Seq(rand(1), Literal(1))) + val randExpr3 = f(Seq(Literal(1), rand(2))) + assert(randExpr1.canonicalized == randExpr2.canonicalized) + assert(randExpr1.canonicalized != randExpr3.canonicalized) + assert(randExpr2.canonicalized != randExpr3.canonicalized) + + // test nested expr + val nestedExpr1 = f(Seq(Literal(1), f(Seq(Literal(2), Literal(3))))) + val nestedExpr2 = f(Seq(f(Seq(Literal(2), Literal(3))), Literal(1))) + val nestedExpr3 = f(Seq(f(Seq(Literal(1), Literal(1))), Literal(1))) + assert(nestedExpr1.canonicalized == nestedExpr2.canonicalized) + assert(nestedExpr1.canonicalized != nestedExpr3.canonicalized) + assert(nestedExpr2.canonicalized != nestedExpr3.canonicalized) + } + } } From 2f07c568107b2e466a6d6e199eaff7068100bb3c Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 12 Nov 2020 14:59:22 +0000 Subject: [PATCH 0458/1009] [SPARK-33278][SQL] Improve the performance for FIRST_VALUE ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/29800 provides a performance improvement for `NTH_VALUE`. `FIRST_VALUE` also could use the `UnboundedOffsetWindowFunctionFrame` and `UnboundedPrecedingOffsetWindowFunctionFrame`. ### Why are the changes needed? Improve the performance for `FIRST_VALUE`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #30178 from beliefer/SPARK-33278. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 13 + .../OptimizeWindowFunctionsSuite.scala | 76 ++++ .../resources/sql-tests/inputs/window.sql | 66 +-- .../sql-tests/results/window.sql.out | 426 +++++++++--------- 4 files changed, 339 insertions(+), 242 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 51f7799b1e427..e492d01650097 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -82,6 +82,7 @@ abstract class Optimizer(catalogManager: CatalogManager) // Operator combine CollapseRepartition, CollapseProject, + OptimizeWindowFunctions, CollapseWindow, CombineFilters, CombineLimits, @@ -806,6 +807,18 @@ object CollapseRepartition extends Rule[LogicalPlan] { } } +/** + * Replaces first(col) to nth_value(col, 1) for better performance. + */ +object OptimizeWindowFunctions extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { + case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _), spec) + if spec.orderSpec.nonEmpty && + spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame].frameType == RowFrame => + we.copy(windowFunction = NthValue(first.child, Literal(1), first.ignoreNulls)) + } +} + /** * Collapse Adjacent Window Expression. * - If the partition specs and order specs are the same and the window expression are diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala new file mode 100644 index 0000000000000..389aaeafe655f --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.First +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor + +class OptimizeWindowFunctionsSuite extends PlanTest { + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("OptimizeWindowFunctions", FixedPoint(10), + OptimizeWindowFunctions) :: Nil + } + + val testRelation = LocalRelation('a.double, 'b.double, 'c.string) + val a = testRelation.output(0) + val b = testRelation.output(1) + val c = testRelation.output(2) + + test("replace first(col) by nth_value(col, 1)") { + val inputPlan = testRelation.select( + WindowExpression( + First(a, false).toAggregateExpression(), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))) + val correctAnswer = testRelation.select( + WindowExpression( + NthValue(a, Literal(1), false), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))) + + val optimized = Optimize.execute(inputPlan) + assert(optimized == correctAnswer) + } + + test("can't replace first(col) by nth_value(col, 1) if the window frame type is range") { + val inputPlan = testRelation.select( + WindowExpression( + First(a, false).toAggregateExpression(), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RangeFrame, UnboundedPreceding, CurrentRow)))) + + val optimized = Optimize.execute(inputPlan) + assert(optimized == inputPlan) + } + + test("can't replace first(col) by nth_value(col, 1) if the window frame isn't ordered") { + val inputPlan = testRelation.select( + WindowExpression( + First(a, false).toAggregateExpression(), + WindowSpecDefinition(b :: Nil, Nil, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))) + + val optimized = Optimize.execute(inputPlan) + assert(optimized == inputPlan) + } +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index c1be5fb27e6fa..f5223af9125f6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -146,104 +146,108 @@ SELECT val, cate, count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate) FROM testData ORDER BY cate, val; --- nth_value() over () +-- nth_value()/first_value() over () SELECT employee_name, salary, - nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary - RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) ORDER BY salary; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) ORDER BY salary DESC; SELECT employee_name, department, salary, - NTH_VALUE(employee_name, 2) OVER ( - PARTITION BY department - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) second_highest_salary + FIRST_VALUE(employee_name) OVER w highest_salary, + NTH_VALUE(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS ( + PARTITION BY department + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +) ORDER BY department; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index f6506a77e239c..1304dcf21d0b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -421,286 +421,288 @@ window aggregate function with filter predicate is not supported yet.; SELECT employee_name, salary, - nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 NULL -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott NULL +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 NULL -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott NULL +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 NULL -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott NULL +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary - RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) ORDER BY salary -- !query schema -struct +struct -- !query output -Leslie Thompson 5186 NULL -Anthony Bow 6627 Anthony Bow -Foon Yue Tseng 6660 Anthony Bow -Gerard Hernandez 6949 Anthony Bow -Leslie Jennings 8113 Foon Yue Tseng -Diane Murphy 8435 Foon Yue Tseng -William Patterson 8870 Leslie Jennings -Jeff Firrelli 8992 Diane Murphy -Julie Firrelli 9181 Diane Murphy -Steve Patterson 9441 Diane Murphy -Mary Patterson 9998 Diane Murphy -Loui Bondur 10449 Jeff Firrelli -George Vanauf 10563 Jeff Firrelli -Barry Jones 10586 Jeff Firrelli -Pamela Castillo 11303 Mary Patterson -Gerard Bondur 11472 Loui Bondur -Larry Bott 11798 Loui Bondur +Leslie Thompson 5186 Leslie Thompson NULL +Anthony Bow 6627 Leslie Thompson Anthony Bow +Foon Yue Tseng 6660 Leslie Thompson Anthony Bow +Gerard Hernandez 6949 Leslie Thompson Anthony Bow +Leslie Jennings 8113 Anthony Bow Foon Yue Tseng +Diane Murphy 8435 Anthony Bow Foon Yue Tseng +William Patterson 8870 Gerard Hernandez Leslie Jennings +Jeff Firrelli 8992 Leslie Jennings Diane Murphy +Julie Firrelli 9181 Leslie Jennings Diane Murphy +Steve Patterson 9441 Leslie Jennings Diane Murphy +Mary Patterson 9998 Leslie Jennings Diane Murphy +Loui Bondur 10449 William Patterson Jeff Firrelli +George Vanauf 10563 William Patterson Jeff Firrelli +Barry Jones 10586 William Patterson Jeff Firrelli +Pamela Castillo 11303 Steve Patterson Mary Patterson +Gerard Bondur 11472 Mary Patterson Loui Bondur +Larry Bott 11798 Mary Patterson Loui Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 Gerard Bondur -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Pamela Castillo -George Vanauf 10563 Barry Jones -Loui Bondur 10449 George Vanauf -Mary Patterson 9998 Loui Bondur -Steve Patterson 9441 Mary Patterson -Julie Firrelli 9181 Steve Patterson -Jeff Firrelli 8992 Julie Firrelli -William Patterson 8870 Jeff Firrelli -Diane Murphy 8435 William Patterson -Leslie Jennings 8113 Diane Murphy -Gerard Hernandez 6949 Leslie Jennings -Foon Yue Tseng 6660 Gerard Hernandez -Anthony Bow 6627 Foon Yue Tseng -Leslie Thompson 5186 Anthony Bow +Larry Bott 11798 Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Gerard Bondur Pamela Castillo +George Vanauf 10563 Pamela Castillo Barry Jones +Loui Bondur 10449 Barry Jones George Vanauf +Mary Patterson 9998 George Vanauf Loui Bondur +Steve Patterson 9441 Loui Bondur Mary Patterson +Julie Firrelli 9181 Mary Patterson Steve Patterson +Jeff Firrelli 8992 Steve Patterson Julie Firrelli +William Patterson 8870 Julie Firrelli Jeff Firrelli +Diane Murphy 8435 Jeff Firrelli William Patterson +Leslie Jennings 8113 William Patterson Diane Murphy +Gerard Hernandez 6949 Diane Murphy Leslie Jennings +Foon Yue Tseng 6660 Leslie Jennings Gerard Hernandez +Anthony Bow 6627 Gerard Hernandez Foon Yue Tseng +Leslie Thompson 5186 Foon Yue Tseng Anthony Bow -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 Gerard Bondur -Gerard Bondur 11472 Pamela Castillo -Pamela Castillo 11303 Barry Jones -Barry Jones 10586 George Vanauf -George Vanauf 10563 Loui Bondur -Loui Bondur 10449 Mary Patterson -Mary Patterson 9998 Steve Patterson -Steve Patterson 9441 Julie Firrelli -Julie Firrelli 9181 Jeff Firrelli -Jeff Firrelli 8992 William Patterson -William Patterson 8870 Diane Murphy -Diane Murphy 8435 Leslie Jennings -Leslie Jennings 8113 Gerard Hernandez -Gerard Hernandez 6949 Foon Yue Tseng -Foon Yue Tseng 6660 Anthony Bow -Anthony Bow 6627 Leslie Thompson -Leslie Thompson 5186 NULL +Larry Bott 11798 Larry Bott Gerard Bondur +Gerard Bondur 11472 Gerard Bondur Pamela Castillo +Pamela Castillo 11303 Pamela Castillo Barry Jones +Barry Jones 10586 Barry Jones George Vanauf +George Vanauf 10563 George Vanauf Loui Bondur +Loui Bondur 10449 Loui Bondur Mary Patterson +Mary Patterson 9998 Mary Patterson Steve Patterson +Steve Patterson 9441 Steve Patterson Julie Firrelli +Julie Firrelli 9181 Julie Firrelli Jeff Firrelli +Jeff Firrelli 8992 Jeff Firrelli William Patterson +William Patterson 8870 William Patterson Diane Murphy +Diane Murphy 8435 Diane Murphy Leslie Jennings +Leslie Jennings 8113 Leslie Jennings Gerard Hernandez +Gerard Hernandez 6949 Gerard Hernandez Foon Yue Tseng +Foon Yue Tseng 6660 Foon Yue Tseng Anthony Bow +Anthony Bow 6627 Anthony Bow Leslie Thompson +Leslie Thompson 5186 Leslie Thompson NULL -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 Gerard Bondur -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 Gerard Bondur -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query SELECT employee_name, salary, - nth_value(employee_name, 2) OVER ( - ORDER BY salary DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) ORDER BY salary DESC -- !query schema -struct +struct -- !query output -Larry Bott 11798 Gerard Bondur -Gerard Bondur 11472 Gerard Bondur -Pamela Castillo 11303 Gerard Bondur -Barry Jones 10586 Gerard Bondur -George Vanauf 10563 Gerard Bondur -Loui Bondur 10449 Gerard Bondur -Mary Patterson 9998 Gerard Bondur -Steve Patterson 9441 Gerard Bondur -Julie Firrelli 9181 Gerard Bondur -Jeff Firrelli 8992 Gerard Bondur -William Patterson 8870 Gerard Bondur -Diane Murphy 8435 Gerard Bondur -Leslie Jennings 8113 Gerard Bondur -Gerard Hernandez 6949 Gerard Bondur -Foon Yue Tseng 6660 Gerard Bondur -Anthony Bow 6627 Gerard Bondur -Leslie Thompson 5186 Gerard Bondur +Larry Bott 11798 Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Gerard Bondur -- !query @@ -708,31 +710,33 @@ SELECT employee_name, department, salary, - NTH_VALUE(employee_name, 2) OVER ( - PARTITION BY department - ORDER BY salary DESC - RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) second_highest_salary + FIRST_VALUE(employee_name) OVER w highest_salary, + NTH_VALUE(employee_name, 2) OVER w second_highest_salary FROM basic_pays +WINDOW w AS ( + PARTITION BY department + ORDER BY salary DESC + RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +) ORDER BY department -- !query schema -struct --- !query output -Gerard Bondur Accounting 11472 Mary Patterson -Mary Patterson Accounting 9998 Mary Patterson -Jeff Firrelli Accounting 8992 Mary Patterson -William Patterson Accounting 8870 Mary Patterson -Diane Murphy Accounting 8435 Mary Patterson -Anthony Bow Accounting 6627 Mary Patterson -Leslie Jennings IT 8113 Leslie Thompson -Leslie Thompson IT 5186 Leslie Thompson -Larry Bott SCM 11798 Pamela Castillo -Pamela Castillo SCM 11303 Pamela Castillo -Barry Jones SCM 10586 Pamela Castillo -Loui Bondur SCM 10449 Pamela Castillo -Gerard Hernandez SCM 6949 Pamela Castillo -George Vanauf Sales 10563 Steve Patterson -Steve Patterson Sales 9441 Steve Patterson -Julie Firrelli Sales 9181 Steve Patterson -Foon Yue Tseng Sales 6660 Steve Patterson \ No newline at end of file +struct +-- !query output +Gerard Bondur Accounting 11472 Gerard Bondur Mary Patterson +Mary Patterson Accounting 9998 Gerard Bondur Mary Patterson +Jeff Firrelli Accounting 8992 Gerard Bondur Mary Patterson +William Patterson Accounting 8870 Gerard Bondur Mary Patterson +Diane Murphy Accounting 8435 Gerard Bondur Mary Patterson +Anthony Bow Accounting 6627 Gerard Bondur Mary Patterson +Leslie Jennings IT 8113 Leslie Jennings Leslie Thompson +Leslie Thompson IT 5186 Leslie Jennings Leslie Thompson +Larry Bott SCM 11798 Larry Bott Pamela Castillo +Pamela Castillo SCM 11303 Larry Bott Pamela Castillo +Barry Jones SCM 10586 Larry Bott Pamela Castillo +Loui Bondur SCM 10449 Larry Bott Pamela Castillo +Gerard Hernandez SCM 6949 Larry Bott Pamela Castillo +George Vanauf Sales 10563 George Vanauf Steve Patterson +Steve Patterson Sales 9441 George Vanauf Steve Patterson +Julie Firrelli Sales 9181 George Vanauf Steve Patterson +Foon Yue Tseng Sales 6660 George Vanauf Steve Patterson \ No newline at end of file From 1baf0d5c9b481622d5a811fd600f680b0cc3229f Mon Sep 17 00:00:00 2001 From: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com> Date: Fri, 13 Nov 2020 01:10:28 +0900 Subject: [PATCH 0459/1009] [SPARK-33140][SQL][FOLLOW-UP] change val to def in object rule ### What changes were proposed in this pull request? In #30097, many rules changed from case class to object, but if the rule is stateful, there will be a problem. For example, if an object rule uses a `val` to refer to a config, it will be unchanged after initialization even if other spark session uses a different config value. ### Why are the changes needed? Avoid potential bug ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #30354 from linhongliu-db/SPARK-33140-followup-2. Lead-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com> Co-authored-by: Linhong Liu Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/catalyst/analysis/ResolveHints.scala | 4 ++-- .../spark/sql/catalyst/analysis/higherOrderFunctions.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index f1706c11e92ec..b44ca20e74bb0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -53,7 +53,7 @@ object ResolveHints { object ResolveJoinStrategyHints extends Rule[LogicalPlan] { private val STRATEGY_HINT_NAMES = JoinStrategyHint.strategies.flatMap(_.hintAliases) - private val hintErrorHandler = conf.hintErrorHandler + private def hintErrorHandler = conf.hintErrorHandler def resolver: Resolver = conf.resolver @@ -268,7 +268,7 @@ object ResolveHints { */ class RemoveAllHints extends Rule[LogicalPlan] { - private val hintErrorHandler = conf.hintErrorHandler + private def hintErrorHandler = conf.hintErrorHandler def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp { case h: UnresolvedHint => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index e10af3d5cc68d..51eb3d033ddc4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -74,7 +74,7 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] { type LambdaVariableMap = Map[String, NamedExpression] - private val canonicalizer = { + private def canonicalizer = { if (!conf.caseSensitiveAnalysis) { // scalastyle:off caselocale s: String => s.toLowerCase From cf3b6551ce010a5503d6c624e313690cd2058855 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 12 Nov 2020 15:22:56 -0800 Subject: [PATCH 0460/1009] [SPARK-33435][SQL] DSv2: REFRESH TABLE should invalidate caches referencing the table ### What changes were proposed in this pull request? This changes `RefreshTableExec` in DSv2 to also invalidate caches with references to the target table to be refreshed. The change itself is similar to what's done in #30211. Note that though, since we currently don't support caching a DSv2 table directly, this doesn't add recache logic as in the DSv1 impl. I marked it as a TODO for now. ### Why are the changes needed? Currently the behavior in DSv1 and DSv2 is inconsistent w.r.t refreshing table: in DSv1 we invalidate both metadata cache as well as all table caches that are related to the table, but in DSv2 we only do the former. This addresses the issue and make the behavior consistent. ### Does this PR introduce _any_ user-facing change? Yes, now refreshing a v2 table also invalidate all the related caches. ### How was this patch tested? Added a new UT. Closes #30359 from sunchao/SPARK-33435. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Strategy.scala | 2 +- .../datasources/v2/RefreshTableExec.scala | 11 ++++++++++- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index a82f86ea952d9..21abfc2816ee4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -128,7 +128,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } case RefreshTable(r: ResolvedTable) => - RefreshTableExec(r.catalog, r.identifier) :: Nil + RefreshTableExec(session, r.catalog, r.table, r.identifier) :: Nil case ReplaceTable(catalog, ident, schema, parts, props, orCreate) => val propsWithOwner = CatalogV2Util.withDefaultOwnership(props) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala index 2a19ff304a9e0..52836de5a926b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala @@ -17,15 +17,24 @@ package org.apache.spark.sql.execution.datasources.v2 +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} case class RefreshTableExec( + session: SparkSession, catalog: TableCatalog, + table: Table, ident: Identifier) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { catalog.invalidateTable(ident) + + // invalidate all caches referencing the given table + // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table + val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) + session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + Seq.empty } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index c480df323ddc2..db3f11dbda51a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1650,6 +1650,20 @@ class DataSourceV2SQLSuite } } + test("SPARK-33435: REFRESH TABLE should invalidate all caches referencing the table") { + val tblName = "testcat.ns.t" + withTable(tblName) { + withTempView("t") { + sql(s"CREATE TABLE $tblName (id bigint) USING foo") + sql(s"CACHE TABLE t AS SELECT id FROM $tblName") + + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table("t")).isDefined) + sql(s"REFRESH TABLE $tblName") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table("t")).isEmpty) + } + } + } + test("REPLACE TABLE: v1 table") { val e = intercept[AnalysisException] { sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}") From 2c64b731ae6a976b0d75a95901db849b4a0e2393 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 12 Nov 2020 15:31:57 -0800 Subject: [PATCH 0461/1009] [SPARK-33259][SS] Disable streaming query with possible correctness issue by default ### What changes were proposed in this pull request? This patch proposes to disable the streaming query with possible correctness issue in chained stateful operators. The behavior can be controlled by a SQL config, so if users understand the risk and still want to run the query, they can disable the check. ### Why are the changes needed? The possible correctness in chained stateful operators in streaming query is not straightforward for users. From users perspective, it will be considered as a Spark bug. It is also possible the worse case, users are not aware of the correctness issue and use wrong results. A better approach should be to disable such queries and let users choose to run the query if they understand there is such risk, instead of implicitly running the query and let users to find out correctness issue by themselves and report this known to Spark community. ### Does this PR introduce _any_ user-facing change? Yes. Streaming query with possible correctness issue will be blocked to run, except for users explicitly disable the SQL config. ### How was this patch tested? Unit test. Closes #30210 from viirya/SPARK-33259. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- docs/ss-migration-guide.md | 6 +++- .../UnsupportedOperationChecker.scala | 19 ++++++++--- .../apache/spark/sql/internal/SQLConf.scala | 18 ++++++++++ .../analysis/UnsupportedOperationsSuite.scala | 34 +++++++++++++------ .../FlatMapGroupsWithStateSuite.scala | 4 ++- 5 files changed, 64 insertions(+), 17 deletions(-) diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md index 002058b69bf30..d52b2e095fc76 100644 --- a/docs/ss-migration-guide.md +++ b/docs/ss-migration-guide.md @@ -26,10 +26,14 @@ Note that this migration guide describes the items specific to Structured Stream Many items of SQL migration can be applied when migrating Structured Streaming to higher versions. Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html). +## Upgrading from Structured Streaming 3.0 to 3.1 + +- In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false. + ## Upgrading from Structured Streaming 2.4 to 3.0 - In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`. - Spark 3.0 fixes the correctness issue on Stream-stream outer join, which changes the schema of state. (See [SPARK-26154](https://issues.apache.org/jira/browse/SPARK-26154) for more details). If you start your query from checkpoint constructed from Spark 2.x which uses stream-stream outer join, Spark 3.0 fails the query. To recalculate outputs, discard the checkpoint and replay previous inputs. -- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`. \ No newline at end of file +- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 809323455652e..814ea8c9768ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode /** @@ -40,10 +41,15 @@ object UnsupportedOperationChecker extends Logging { } } + /** + * Checks for possible correctness issue in chained stateful operators. The behavior is + * controlled by SQL config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled`. + * Once it is enabled, an analysis exception will be thrown. Otherwise, Spark will just + * print a warning message. + */ def checkStreamingQueryGlobalWatermarkLimit( plan: LogicalPlan, - outputMode: OutputMode, - failWhenDetected: Boolean): Unit = { + outputMode: OutputMode): Unit = { def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match { case s: Aggregate if s.isStreaming && outputMode == InternalOutputModes.Append => true @@ -62,6 +68,8 @@ object UnsupportedOperationChecker extends Logging { case _ => false } + val failWhenDetected = SQLConf.get.statefulOperatorCorrectnessCheckEnabled + try { plan.foreach { subPlan => if (isStatefulOperation(subPlan)) { @@ -73,7 +81,10 @@ object UnsupportedOperationChecker extends Logging { "The query contains stateful operation which can emit rows older than " + "the current watermark plus allowed late record delay, which are \"late rows\"" + " in downstream stateful operations and these rows can be discarded. " + - "Please refer the programming guide doc for more details." + "Please refer the programming guide doc for more details. If you understand " + + "the possible risk of correctness issue and still need to run the query, " + + "you can disable this check by setting the config " + + "`spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false." throwError(errorMsg)(plan) } } @@ -388,7 +399,7 @@ object UnsupportedOperationChecker extends Logging { checkUnsupportedExpressions(subPlan) } - checkStreamingQueryGlobalWatermarkLimit(plan, outputMode, failWhenDetected = false) + checkStreamingQueryGlobalWatermarkLimit(plan, outputMode) } def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ef988052affcd..546b199950a21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1382,6 +1382,21 @@ object SQLConf { .booleanConf .createWithDefault(true) + val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED = + buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled") + .internal() + .doc("When true, the stateful operators for streaming query will be checked for possible " + + "correctness issue due to global watermark. The correctness issue comes from queries " + + "containing stateful operation which can emit rows older than the current watermark " + + "plus allowed late record delay, which are \"late rows\" in downstream stateful " + + "operations and these rows can be discarded. Please refer the programming guide doc for " + + "more details. Once the issue is detected, Spark will throw analysis exception. " + + "When this config is disabled, Spark will just print warning message for users. " + + "Prior to Spark 3.1.0, the behavior is disabling this config.") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val VARIABLE_SUBSTITUTE_ENABLED = buildConf("spark.sql.variable.substitute") .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " + @@ -3017,6 +3032,9 @@ class SQLConf extends Serializable with Logging { def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED) + def statefulOperatorCorrectnessCheckEnabled: Boolean = + getConf(STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED) + def streamingFileCommitProtocolClass: String = getConf(STREAMING_FILE_COMMIT_PROTOCOL_CLASS) def fileSinkLogDeletion: Boolean = getConf(FILE_SINK_LOG_DELETION) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index b9943a9744985..21dde3ca8ca51 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.{FlatMapGroupsWithState, _} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder} import org.apache.spark.unsafe.types.CalendarInterval @@ -36,7 +37,7 @@ import org.apache.spark.unsafe.types.CalendarInterval /** A dummy command for testing unsupported operations. */ case class DummyCommand() extends Command -class UnsupportedOperationsSuite extends SparkFunSuite { +class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper { val attribute = AttributeReference("a", IntegerType, nullable = true)() val watermarkMetadata = new MetadataBuilder() @@ -218,6 +219,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { expectedMsgs = Seq("flatMapGroupsWithState in append mode", "update")) // FlatMapGroupsWithState(Append) in streaming with aggregation + // Only supported when `spark.sql.streaming.statefulOperator.correctnessCheck` is disabled. for (outputMode <- Seq(Append, Update, Complete)) { assertSupportedInStreamingPlan( "flatMapGroupsWithState - flatMapGroupsWithState(Append) " + @@ -228,7 +230,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite { FlatMapGroupsWithState( null, att, att, Seq(att), Seq(att), att, null, Append, isMapGroupsWithState = false, null, streamRelation)), - outputMode = outputMode) + outputMode = outputMode, + SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") } for (outputMode <- Seq(Append, Update)) { @@ -268,6 +271,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite { } // multiple FlatMapGroupsWithStates + // Only supported when `spark.sql.streaming.statefulOperator.correctnessCheck` is disabled. assertSupportedInStreamingPlan( "flatMapGroupsWithState - multiple flatMapGroupsWithStates on streaming relation and all are " + "in append mode", @@ -275,7 +279,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite { isMapGroupsWithState = false, null, FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append, isMapGroupsWithState = false, null, streamRelation)), - outputMode = Append) + outputMode = Append, + SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") assertNotSupportedInStreamingPlan( "flatMapGroupsWithState - multiple flatMapGroupsWithStates on s streaming relation but some" + @@ -995,9 +1000,12 @@ class UnsupportedOperationsSuite extends SparkFunSuite { def assertSupportedInStreamingPlan( name: String, plan: LogicalPlan, - outputMode: OutputMode): Unit = { + outputMode: OutputMode, + configs: (String, String)*): Unit = { test(s"streaming plan - $name: supported") { - UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode) + withSQLConf(configs: _*) { + UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode) + } } } @@ -1070,14 +1078,18 @@ class UnsupportedOperationsSuite extends SparkFunSuite { expectFailure: Boolean): Unit = { test(s"Global watermark limit - $testNamePostfix") { if (expectFailure) { - val e = intercept[AnalysisException] { - UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit( - wrapInStreaming(plan), outputMode, failWhenDetected = true) + withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "true") { + val e = intercept[AnalysisException] { + UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit( + wrapInStreaming(plan), outputMode) + } + assert(e.message.contains("Detected pattern of possible 'correctness' issue")) } - assert(e.message.contains("Detected pattern of possible 'correctness' issue")) } else { - UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit( - wrapInStreaming(plan), outputMode, failWhenDetected = true) + withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") { + UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit( + wrapInStreaming(plan), outputMode) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala index 2efd715b7731c..f97c9386f9488 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala @@ -1324,7 +1324,9 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest { def testWithAllStateVersions(name: String)(func: => Unit): Unit = { for (version <- FlatMapGroupsWithStateExecHelper.supportedVersions) { test(s"$name - state format version $version") { - withSQLConf(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString) { + withSQLConf( + SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString, + SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") { func } } From 539c2deb896d0adb9bbd63fc1ef48a31050a6538 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 13 Nov 2020 05:15:13 +0000 Subject: [PATCH 0462/1009] [SPARK-33426][SQL][TESTS] Unify Hive SHOW TABLES tests ### What changes were proposed in this pull request? 1. Create the separate test suite `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`. 2. Re-use V1 SHOW TABLES tests added by https://github.com/apache/spark/pull/30287 in the Hive test suites. 3. Add new test case for the pattern `'table_name_1*|table_name_2*'` in the common test suite. ### Why are the changes needed? To test V1 + common SHOW TABLES tests in Hive. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running v1/v2 and Hive v1 `ShowTablesSuite`: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite" ``` Closes #30340 from MaxGekk/show-tables-hive-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- ...sSuite.scala => ShowTablesSuiteBase.scala} | 38 +++++++++++-------- .../command/v1/ShowTablesSuite.scala | 7 +++- .../command/v2/ShowTablesSuite.scala | 5 ++- .../sql/hive/execution/HiveCommandSuite.scala | 22 ----------- .../execution/command/ShowTablesSuite.scala | 26 +++++++++++++ 5 files changed, 56 insertions(+), 42 deletions(-) rename sql/core/src/test/scala/org/apache/spark/sql/execution/command/{ShowTablesSuite.scala => ShowTablesSuiteBase.scala} (76%) create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala similarity index 76% rename from sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala index 01720b5723243..49428fab79027 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala @@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.command import org.scalactic.source.Position import org.scalatest.Tag -import org.apache.spark.sql.Row +import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType -trait ShowTablesSuite extends SharedSparkSession { +trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils { protected def version: String protected def catalog: String protected def defaultNamespace: Seq[String] @@ -39,7 +39,7 @@ trait ShowTablesSuite extends SharedSparkSession { protected def runShowTablesSql(sqlText: String, expected: Seq[ShowRow]): Unit = { val df = spark.sql(sqlText) assert(df.schema === showSchema) - assert(df.collect() === getRows(expected)) + checkAnswer(df, getRows(expected)) } override def test(testName: String, testTags: Tag*)(testFun: => Any) @@ -63,30 +63,36 @@ trait ShowTablesSuite extends SharedSparkSession { sql(s"CREATE NAMESPACE $catalog.ns2") withTable( s"$catalog.ns1.table", - s"$catalog.ns1.table_name_1", - s"$catalog.ns1.table_name_2", - s"$catalog.ns2.table_name_2") { + s"$catalog.ns1.table_name_1a", + s"$catalog.ns1.table_name_2b", + s"$catalog.ns2.table_name_2b") { sql(s"CREATE TABLE $catalog.ns1.table (id bigint, data string) $defaultUsing") - sql(s"CREATE TABLE $catalog.ns1.table_name_1 (id bigint, data string) $defaultUsing") - sql(s"CREATE TABLE $catalog.ns1.table_name_2 (id bigint, data string) $defaultUsing") - sql(s"CREATE TABLE $catalog.ns2.table_name_2 (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns1.table_name_1a (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns1.table_name_2b (id bigint, data string) $defaultUsing") + sql(s"CREATE TABLE $catalog.ns2.table_name_2b (id bigint, data string) $defaultUsing") runShowTablesSql( s"SHOW TABLES FROM $catalog.ns1", Seq( ShowRow("ns1", "table", false), - ShowRow("ns1", "table_name_1", false), - ShowRow("ns1", "table_name_2", false))) + ShowRow("ns1", "table_name_1a", false), + ShowRow("ns1", "table_name_2b", false))) runShowTablesSql( s"SHOW TABLES FROM $catalog.ns1 LIKE '*name*'", Seq( - ShowRow("ns1", "table_name_1", false), - ShowRow("ns1", "table_name_2", false))) + ShowRow("ns1", "table_name_1a", false), + ShowRow("ns1", "table_name_2b", false))) runShowTablesSql( - s"SHOW TABLES FROM $catalog.ns1 LIKE '*2'", - Seq(ShowRow("ns1", "table_name_2", false))) + s"SHOW TABLES FROM $catalog.ns1 LIKE 'table_name_1*|table_name_2*'", + Seq( + ShowRow("ns1", "table_name_1a", false), + ShowRow("ns1", "table_name_2b", false))) + + runShowTablesSql( + s"SHOW TABLES FROM $catalog.ns1 LIKE '*2b'", + Seq(ShowRow("ns1", "table_name_2b", false))) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index feb3bc623f3fa..d2332818d9546 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -20,10 +20,11 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException import org.apache.spark.sql.connector.catalog.CatalogManager -import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite} +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{BooleanType, StringType, StructType} -class ShowTablesSuite extends CommonShowTablesSuite { +trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { override def version: String = "V1" override def catalog: String = CatalogManager.SESSION_CATALOG_NAME override def defaultNamespace: Seq[String] = Seq("default") @@ -93,3 +94,5 @@ class ShowTablesSuite extends CommonShowTablesSuite { } } } + +class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala index 668120ae1cada..c7f68863a1791 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala @@ -21,10 +21,11 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException import org.apache.spark.sql.connector.InMemoryTableCatalog -import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite} +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{StringType, StructType} -class ShowTablesSuite extends CommonShowTablesSuite { +class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSession { override def version: String = "V2" override def catalog: String = "test_catalog" override def defaultNamespace: Seq[String] = Nil diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala index dcec8bf5c0cc6..a78fd506b752e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala @@ -95,28 +95,6 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto } } - test("show tables") { - withTable("show1a", "show2b") { - sql("CREATE TABLE show1a(c1 int)") - sql("CREATE TABLE show2b(c2 int)") - checkAnswer( - sql("SHOW TABLES IN default 'show1*'"), - Row("default", "show1a", false) :: Nil) - checkAnswer( - sql("SHOW TABLES IN default 'show1*|show2*'"), - Row("default", "show1a", false) :: - Row("default", "show2b", false) :: Nil) - checkAnswer( - sql("SHOW TABLES 'show1*|show2*'"), - Row("default", "show1a", false) :: - Row("default", "show2b", false) :: Nil) - assert( - sql("SHOW TABLES").count() >= 2) - assert( - sql("SHOW TABLES IN default").count() >= 2) - } - } - test("show views") { withView("show1a", "show2b", "global_temp.temp1", "temp2") { sql("CREATE VIEW show1a AS SELECT 1 AS id") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala new file mode 100644 index 0000000000000..836f080d28e75 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class ShowTablesSuite extends v1.ShowTablesSuiteBase with TestHiveSingleton { + override def version: String = "Hive V1" + override def defaultUsing: String = "USING HIVE" +} From a70a2b02ce7d18947778d37c8fffb3f1b1b5b154 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 12 Nov 2020 21:19:51 -0800 Subject: [PATCH 0463/1009] [SPARK-33439][INFRA] Use SERIAL_SBT_TESTS=1 for SQL modules ### What changes were proposed in this pull request? This PR aims to decrease the parallelism of `SQL` module like `Hive` module. ### Why are the changes needed? GitHub Action `sql - slow tests` become flaky. - https://github.com/apache/spark/runs/1393670291 - https://github.com/apache/spark/runs/1393088031 ### Does this PR introduce _any_ user-facing change? No. This is dev-only feature. Although this will increase the running time, but it's better than flakiness. ### How was this patch tested? Pass the GitHub Action stably. Closes #30365 from dongjoon-hyun/SPARK-33439. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e4762523f7018..0918ee111b536 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -139,8 +139,8 @@ jobs: # Run the tests. - name: Run tests run: | - # Hive tests become flaky when running in parallel as it's too intensive. - if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi + # Hive and SQL tests become flaky when running in parallel as it's too intensive. + if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi mkdir -p ~/.m2 ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" rm -rf ~/.m2/repository/org/apache/spark From 82a21d2a3e3d4eafa43802b3034907a1f2725396 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 13 Nov 2020 15:57:07 +0900 Subject: [PATCH 0464/1009] [SPARK-33433][SQL] Change Aggregate max rows to 1 if grouping is empty ### What changes were proposed in this pull request? Change `Aggregate` max rows to 1 if grouping is empty. ### Why are the changes needed? If `Aggregate` grouping is empty, the result is always one row. Then we don't need push down limit in `LimitPushDown` with such case ``` select count(*) from t1 union select count(*) from t2 limit 1 ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30356 from ulysses-you/SPARK-33433. Authored-by: ulysses Signed-off-by: Takeshi Yamamuro --- .../plans/logical/basicLogicalOperators.scala | 8 +++++++- .../optimizer/LimitPushdownSuite.scala | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 223ef652d2f80..17bf704c6d67a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -586,7 +586,13 @@ case class Aggregate( } override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute) - override def maxRows: Option[Long] = child.maxRows + override def maxRows: Option[Long] = { + if (groupingExpressions.isEmpty) { + Some(1L) + } else { + child.maxRows + } + } override lazy val validConstraints: ExpressionSet = { val nonAgg = aggregateExpressions.filter(_.find(_.isInstanceOf[AggregateExpression]).isEmpty) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index 17fb9fc5d11e3..d993aee3d7518 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -171,4 +171,22 @@ class LimitPushdownSuite extends PlanTest { // No pushdown for FULL OUTER JOINS. comparePlans(optimized, originalQuery) } + + test("SPARK-33433: Change Aggregate max rows to 1 if grouping is empty") { + val analyzed1 = Limit(1, Union( + x.groupBy()(count(1)), + y.groupBy()(count(1)))).analyze + val optimized1 = Optimize.execute(analyzed1) + comparePlans(analyzed1, optimized1) + + // test push down + val analyzed2 = Limit(1, Union( + x.groupBy(Symbol("a"))(count(1)), + y.groupBy(Symbol("b"))(count(1)))).analyze + val optimized2 = Optimize.execute(analyzed2) + val expected2 = Limit(1, Union( + LocalLimit(1, x.groupBy(Symbol("a"))(count(1))), + LocalLimit(1, y.groupBy(Symbol("b"))(count(1))))).analyze + comparePlans(expected2, optimized2) + } } From cdd8e51742a59ab11ffd45b8f4e893128c43f8d7 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 13 Nov 2020 06:58:16 +0000 Subject: [PATCH 0465/1009] [SPARK-33419][SQL] Unexpected behavior when using SET commands before a query in SparkSession.sql ### What changes were proposed in this pull request? SparkSession.sql converts a string value to a DataFrame, and the string value should be one single SQL statement ending up w/ or w/o one or more semicolons. e.g. ```sql scala> spark.sql(" select 2").show +---+ | 2| +---+ | 2| +---+ scala> spark.sql(" select 2;").show +---+ | 2| +---+ | 2| +---+ scala> spark.sql(" select 2;;;;").show +---+ | 2| +---+ | 2| +---+ ``` If we put 2 or more statements in, it fails in the parser as expected, e.g. ```sql scala> spark.sql(" select 2; select 1;").show org.apache.spark.sql.catalyst.parser.ParseException: extraneous input 'select' expecting {, ';'}(line 1, pos 11) == SQL == select 2; select 1; -----------^^^ at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130) at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81) at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607) ... 47 elided ``` As a very generic user scenario, users may want to change some settings before they execute the queries. They may pass a string value like `set spark.sql.abc=2; select 1;` into this API, which creates a confusing gap between the actual effect and the user's expectations. The user may want the query to be executed with spark.sql.abc=2, but Spark actually treats the whole part of `2; select 1;` as the value of the property 'spark.sql.abc', e.g. ``` scala> spark.sql("set spark.sql.abc=2; select 1;").show +-------------+------------+ | key| value| +-------------+------------+ |spark.sql.abc|2; select 1;| +-------------+------------+ ``` What's more, the SET symbol could digest everything behind it, which makes it unstable from version to version, e.g. #### 3.1 ```sql scala> spark.sql("set;").show org.apache.spark.sql.catalyst.parser.ParseException: Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, please use quotes, e.g., SET `ke y`=value.(line 1, pos 0) == SQL == set; ^^^ at org.apache.spark.sql.execution.SparkSqlAstBuilder.$anonfun$visitSetConfiguration$1(SparkSqlParser.scala:83) at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113) at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:72) at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:58) at org.apache.spark.sql.catalyst.parser.SqlBaseParser$SetConfigurationContext.accept(SqlBaseParser.java:2161) at org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18) at org.apache.spark.sql.catalyst.parser.AstBuilder.$anonfun$visitSingleStatement$1(AstBuilder.scala:77) at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113) at org.apache.spark.sql.catalyst.parser.AstBuilder.visitSingleStatement(AstBuilder.scala:77) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.$anonfun$parsePlan$1(ParseDriver.scala:82) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:113) at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81) at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607) ... 47 elided scala> spark.sql("set a;").show org.apache.spark.sql.catalyst.parser.ParseException: Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, please use quotes, e.g., SET `ke y`=value.(line 1, pos 0) == SQL == set a; ^^^ at org.apache.spark.sql.execution.SparkSqlAstBuilder.$anonfun$visitSetConfiguration$1(SparkSqlParser.scala:83) at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113) at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:72) at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:58) at org.apache.spark.sql.catalyst.parser.SqlBaseParser$SetConfigurationContext.accept(SqlBaseParser.java:2161) at org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18) at org.apache.spark.sql.catalyst.parser.AstBuilder.$anonfun$visitSingleStatement$1(AstBuilder.scala:77) at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113) at org.apache.spark.sql.catalyst.parser.AstBuilder.visitSingleStatement(AstBuilder.scala:77) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.$anonfun$parsePlan$1(ParseDriver.scala:82) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:113) at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81) at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607) ... 47 elided ``` #### 2.4 ```sql scala> spark.sql("set;").show +---+-----------+ |key| value| +---+-----------+ | ;|| +---+-----------+ scala> spark.sql("set a;").show +---+-----------+ |key| value| +---+-----------+ | a;|| +---+-----------+ ``` In this PR, 1. make `set spark.sql.abc=2; select 1;` in `SparkSession.sql` fail directly, user should call `.sql` for each statement separately. 2. make the semicolon as the separator of statements, and if users want to use it as part of the property value, shall use quotes too. ### Why are the changes needed? 1. disambiguation for `SparkSession.sql` 2. make semicolon work same both w/ `SET` and other statements ### Does this PR introduce _any_ user-facing change? yes, the semicolon works as a separator of statements now, it will be trimmed if it is at the end of the statement and fail the statement if it is in the middle. you need to use quotes if you want it to be part of the property value ### How was this patch tested? new tests Closes #30332 from yaooqinn/SPARK-33419. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 6 +++ .../sql/catalyst/parser/ParserUtils.scala | 11 ++++ .../spark/sql/execution/SparkSqlParser.scala | 35 +++++++++---- .../sql/execution/SparkSqlParserSuite.scala | 52 +++++++++++++++++-- 4 files changed, 90 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index ad0de528708a4..6b6b751cc3c15 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -246,7 +246,9 @@ statement | SET TIME ZONE interval #setTimeZone | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone | SET TIME ZONE .*? #setTimeZone + | SET configKey EQ configValue #setQuotedConfiguration | SET configKey (EQ .*?)? #setQuotedConfiguration + | SET .*? EQ configValue #setQuotedConfiguration | SET .*? #setConfiguration | RESET configKey #resetQuotedConfiguration | RESET .*? #resetConfiguration @@ -257,6 +259,10 @@ configKey : quotedIdentifier ; +configValue + : quotedIdentifier + ; + unsupportedHiveNativeCommands : kw1=CREATE kw2=ROLE | kw1=DROP kw2=ROLE diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index f2dab941cb8b2..1f32620e54902 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -71,6 +71,17 @@ object ParserUtils { stream.getText(interval) } + /** + * Get all the text which between the given start and end tokens. + * When we need to extract everything between two tokens including all spaces we should use + * this method instead of defined a named Antlr4 rule for .*?, + * which somehow parse "a b" -> "ab" in some cases + */ + def interval(start: Token, end: Token): String = { + val interval = Interval.of(start.getStopIndex + 1, end.getStartIndex - 1) + start.getInputStream.getText(interval) + } + /** Convert a string token into a string. */ def string(token: Token): String = unescapeSQLString(token.getText) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index f46526d419158..b28effbcb5514 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -58,8 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) { class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { import org.apache.spark.sql.catalyst.parser.ParserUtils._ - private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=(.*)""".r + private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=([^;]*);*""".r private val configKeyDef = """([a-zA-Z_\d\\.:]+)$""".r + private val configValueDef = """([^;]*);*""".r /** * Create a [[SetCommand]] logical plan. @@ -79,18 +80,34 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { case s if s.isEmpty => SetCommand(None) case _ => throw new ParseException("Expected format is 'SET', 'SET key', or " + - "'SET key=value'. If you want to include special characters in key, " + - "please use quotes, e.g., SET `ke y`=value.", ctx) + "'SET key=value'. If you want to include special characters in key, or include semicolon " + + "in value, please use quotes, e.g., SET `ke y`=`v;alue`.", ctx) } } - override def visitSetQuotedConfiguration(ctx: SetQuotedConfigurationContext) - : LogicalPlan = withOrigin(ctx) { - val keyStr = ctx.configKey().getText - if (ctx.EQ() != null) { - SetCommand(Some(keyStr -> Option(remainder(ctx.EQ().getSymbol).trim))) + override def visitSetQuotedConfiguration( + ctx: SetQuotedConfigurationContext): LogicalPlan = withOrigin(ctx) { + if (ctx.configValue() != null && ctx.configKey() != null) { + SetCommand(Some(ctx.configKey().getText -> Option(ctx.configValue().getText))) + } else if (ctx.configValue() != null) { + val valueStr = ctx.configValue().getText + val keyCandidate = interval(ctx.SET().getSymbol, ctx.EQ().getSymbol).trim + keyCandidate match { + case configKeyDef(key) => SetCommand(Some(key -> Option(valueStr))) + case _ => throw new ParseException(s"'$keyCandidate' is an invalid property key, please " + + s"use quotes, e.g. SET `$keyCandidate`=`$valueStr`", ctx) + } } else { - SetCommand(Some(keyStr -> None)) + val keyStr = ctx.configKey().getText + if (ctx.EQ() != null) { + remainder(ctx.EQ().getSymbol).trim match { + case configValueDef(valueStr) => SetCommand(Some(keyStr -> Option(valueStr))) + case other => throw new ParseException(s"'$other' is an invalid property value, please " + + s"use quotes, e.g. SET `$keyStr`=`$other`", ctx) + } + } else { + SetCommand(Some(keyStr -> None)) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 5e6808eeba0f6..5b4cd47742c00 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -70,9 +70,21 @@ class SparkSqlParserSuite extends AnalysisTest { StaticSQLConf ConfigEntry.knownConfigs.values.asScala.foreach { config => assertEqual(s"SET ${config.key}", SetCommand(Some(config.key -> None))) - if (config.defaultValue.isDefined && config.defaultValueString != null) { - assertEqual(s"SET ${config.key}=${config.defaultValueString}", - SetCommand(Some(config.key -> Some(config.defaultValueString)))) + assertEqual(s"SET `${config.key}`", SetCommand(Some(config.key -> None))) + + val defaultValueStr = config.defaultValueString + if (config.defaultValue.isDefined && defaultValueStr != null) { + assertEqual(s"SET ${config.key}=`$defaultValueStr`", + SetCommand(Some(config.key -> Some(defaultValueStr)))) + assertEqual(s"SET `${config.key}`=`$defaultValueStr`", + SetCommand(Some(config.key -> Some(defaultValueStr)))) + + if (!defaultValueStr.contains(";")) { + assertEqual(s"SET ${config.key}=$defaultValueStr", + SetCommand(Some(config.key -> Some(defaultValueStr)))) + assertEqual(s"SET `${config.key}`=$defaultValueStr", + SetCommand(Some(config.key -> Some(defaultValueStr)))) + } } assertEqual(s"RESET ${config.key}", ResetCommand(Some(config.key))) } @@ -101,10 +113,11 @@ class SparkSqlParserSuite extends AnalysisTest { SetCommand(Some("spark.sql. key" -> Some("v a lu e")))) assertEqual("SET `spark.sql. key`= -1", SetCommand(Some("spark.sql. key" -> Some("-1")))) + assertEqual("SET key=", SetCommand(Some("key" -> Some("")))) val expectedErrMsg = "Expected format is 'SET', 'SET key', or " + - "'SET key=value'. If you want to include special characters in key, " + - "please use quotes, e.g., SET `ke y`=value." + "'SET key=value'. If you want to include special characters in key, or include semicolon " + + "in value, please use quotes, e.g., SET `ke y`=`v;alue`." intercept("SET spark.sql.key value", expectedErrMsg) intercept("SET spark.sql.key 'value'", expectedErrMsg) intercept("SET spark.sql.key \"value\" ", expectedErrMsg) @@ -115,6 +128,8 @@ class SparkSqlParserSuite extends AnalysisTest { intercept("SET spark.sql. key=value", expectedErrMsg) intercept("SET spark.sql :key=value", expectedErrMsg) intercept("SET spark.sql . key=value", expectedErrMsg) + intercept("SET =", expectedErrMsg) + intercept("SET =value", expectedErrMsg) } test("Report Error for invalid usage of RESET command") { @@ -141,6 +156,33 @@ class SparkSqlParserSuite extends AnalysisTest { intercept("RESET spark.sql : key", expectedErrMsg) } + test("SPARK-33419: Semicolon handling in SET command") { + assertEqual("SET a=1;", SetCommand(Some("a" -> Some("1")))) + assertEqual("SET a=1;;", SetCommand(Some("a" -> Some("1")))) + + assertEqual("SET a=`1`;", SetCommand(Some("a" -> Some("1")))) + assertEqual("SET a=`1;`", SetCommand(Some("a" -> Some("1;")))) + assertEqual("SET a=`1;`;", SetCommand(Some("a" -> Some("1;")))) + + assertEqual("SET `a`=1;;", SetCommand(Some("a" -> Some("1")))) + assertEqual("SET `a`=`1;`", SetCommand(Some("a" -> Some("1;")))) + assertEqual("SET `a`=`1;`;", SetCommand(Some("a" -> Some("1;")))) + + val expectedErrMsg = "Expected format is 'SET', 'SET key', or " + + "'SET key=value'. If you want to include special characters in key, or include semicolon " + + "in value, please use quotes, e.g., SET `ke y`=`v;alue`." + + intercept("SET a=1; SELECT 1", expectedErrMsg) + intercept("SET a=1;2;;", expectedErrMsg) + + intercept("SET a b=`1;;`", + "'a b' is an invalid property key, please use quotes, e.g. SET `a b`=`1;;`") + + intercept("SET `a`=1;2;;", + "'1;2;;' is an invalid property value, please use quotes, e.g." + + " SET `a`=`1;2;;`") + } + test("refresh resource") { assertEqual("REFRESH prefix_path", RefreshResource("prefix_path")) assertEqual("REFRESH /", RefreshResource("/")) From f80fe213bd4c5e065d5723816c42302a532be75c Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 13 Nov 2020 16:51:06 +0800 Subject: [PATCH 0466/1009] [SPARK-33166][DOC] Provide Search Function in Spark docs site ### What changes were proposed in this pull request? In the last few releases, our Spark documentation https://spark.apache.org/docs/latest/ becomes richer. It would nice to provide a search function to make our users find contents faster. [DocSearch](https://docsearch.algolia.com/) is entirely free and automated. This PR will use it to provides search function. The screenshots show below: ![overview](https://user-images.githubusercontent.com/8486025/98756802-30d82a80-23c3-11eb-9ca2-73bb20fb54c4.png) ### Why are the changes needed? Let the users of Spark documentation could find the needed information effectively. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? build on my machine and look on brower. Closes #30292 from beliefer/SPARK-33166. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Gengliang Wang --- docs/_layouts/global.html | 23 +++++++++++++++++++++++ docs/css/docsearch.css | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 docs/css/docsearch.css diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index 5f6cd7c6b7f20..65af17ed2e4a1 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -30,6 +30,8 @@ + + {% production %} @@ -125,6 +127,10 @@ Third Party Projects + +

    @@ -172,6 +178,23 @@

    {{ page.title }}

    + + From 97d2cee4af4ad8882334e2b680ab75dc73e29336 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 18 Nov 2020 18:35:11 -0800 Subject: [PATCH 0512/1009] [SPARK-33427][SQL][FOLLOWUP] Prevent test flakyness in SubExprEvaluationRuntimeSuite ### What changes were proposed in this pull request? This followup is to prevent possible test flakyness of `SubExprEvaluationRuntimeSuite`. ### Why are the changes needed? Because HashMap doesn't guarantee the order, in `proxyExpressions` the proxy expression id is not deterministic. So in `SubExprEvaluationRuntimeSuite` we should not test against it. ### Does this PR introduce _any_ user-facing change? No, dev only. ### How was this patch tested? Unit test. Closes #30414 from viirya/SPARK-33427-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: Liang-Chi Hsieh --- .../catalyst/expressions/SubExprEvaluationRuntimeSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala index badcd4fc3fdad..f56ec49724adb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala @@ -84,8 +84,7 @@ class SubExprEvaluationRuntimeSuite extends SparkFunSuite { }) // ( (one * two) * (one * two) ) assert(proxys.size == 2) - val expected = ExpressionProxy(mul2, 0, runtime) - assert(proxys.forall(_ == expected)) + assert(proxys.forall(_.child == mul2)) } test("ExpressionProxy won't be on non deterministic") { From e518008ca9dc8a4950e2655ed9b35ce95ffe5acb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 18 Nov 2020 18:58:06 -0800 Subject: [PATCH 0513/1009] [SPARK-33473][SQL] Extend interpreted subexpression elimination to other interpreted projections ### What changes were proposed in this pull request? Similar to `InterpretedUnsafeProjection`, this patch proposes to extend interpreted subexpression elimination to `InterpretedMutableProjection` and `InterpretedSafeProjection`. ### Why are the changes needed? Enabling subexpression elimination can improve the performance of interpreted projections, as shown in `InterpretedUnsafeProjection`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30406 from viirya/SPARK-33473. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../InterpretedMutableProjection.scala | 18 ++++++- .../InterpretedSafeProjection.scala | 16 +++++- .../expressions/MutableProjectionSuite.scala | 46 +++++++++++++++++ .../codegen/GeneratedProjectionSuite.scala | 49 ++++++++++++++++++- 4 files changed, 125 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala index a2daec0b1ade1..91c9457af7de3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp +import org.apache.spark.sql.internal.SQLConf /** @@ -33,6 +34,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) = this(bindReferences(expressions, inputSchema)) + private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled + private[this] lazy val runtime = + new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries) + private[this] val exprs = if (subExprEliminationEnabled) { + runtime.proxyExpressions(expressions) + } else { + expressions + } + private[this] val buffer = new Array[Any](expressions.size) override def initialize(partitionIndex: Int): Unit = { @@ -76,11 +86,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable }.toArray override def apply(input: InternalRow): InternalRow = { + if (subExprEliminationEnabled) { + runtime.setInput(input) + } + var i = 0 while (i < validExprs.length) { - val (expr, ordinal) = validExprs(i) + val (_, ordinal) = validExprs(i) // Store the result into buffer first, to make the projection atomic (needed by aggregation) - buffer(ordinal) = expr.eval(input) + buffer(ordinal) = exprs(ordinal).eval(input) i += 1 } i = 0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala index 70789dac1d87a..0e71892db666b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -30,6 +31,15 @@ import org.apache.spark.sql.types._ */ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection { + private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled + private[this] lazy val runtime = + new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries) + private[this] val exprs = if (subExprEliminationEnabled) { + runtime.proxyExpressions(expressions) + } else { + expressions + } + private[this] val mutableRow = new SpecificInternalRow(expressions.map(_.dataType)) private[this] val exprsWithWriters = expressions.zipWithIndex.filter { @@ -49,7 +59,7 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection } } } - (e, f) + (exprs(i), f) } private def generateSafeValueConverter(dt: DataType): Any => Any = dt match { @@ -97,6 +107,10 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection } override def apply(row: InternalRow): InternalRow = { + if (subExprEliminationEnabled) { + runtime.setInput(row) + } + var i = 0 while (i < exprsWithWriters.length) { val (expr, writer) = exprsWithWriters(i) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala index c31310bc54023..8f030b45e5d3e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala @@ -80,4 +80,50 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper { assert(errMsg.contains("MutableProjection cannot use UnsafeRow for output data types:")) } } + + test("SPARK-33473: subexpression elimination for interpreted MutableProjection") { + Seq("true", "false").foreach { enabled => + withSQLConf( + SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled, + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) { + val one = BoundReference(0, DoubleType, true) + val two = BoundReference(1, DoubleType, true) + + val mul = Multiply(one, two) + val mul2 = Multiply(mul, mul) + val sqrt = Sqrt(mul2) + val sum = Add(mul2, sqrt) + + val proj = MutableProjection.create(Seq(sum)) + val result = (d1: Double, d2: Double) => + ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2)) + + val inputRows = Seq( + InternalRow.fromSeq(Seq(1.0, 2.0)), + InternalRow.fromSeq(Seq(2.0, 3.0)), + InternalRow.fromSeq(Seq(1.0, null)), + InternalRow.fromSeq(Seq(null, 2.0)), + InternalRow.fromSeq(Seq(3.0, 4.0)), + InternalRow.fromSeq(Seq(null, null)) + ) + val expectedResults = Seq( + result(1.0, 2.0), + result(2.0, 3.0), + null, + null, + result(3.0, 4.0), + null + ) + + inputRows.zip(expectedResults).foreach { case (inputRow, expected) => + val projRow = proj.apply(inputRow) + if (expected != null) { + assert(projRow.getDouble(0) == expected) + } else { + assert(projRow.isNullAt(0)) + } + } + } + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala index 4c9bcfe8f93a6..180665e653727 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala @@ -23,13 +23,14 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.GenericArrayData +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String /** * A test suite for generated projections */ -class GeneratedProjectionSuite extends SparkFunSuite { +class GeneratedProjectionSuite extends SparkFunSuite with ExpressionEvalHelper { test("generated projections on wider table") { val N = 1000 @@ -246,4 +247,50 @@ class GeneratedProjectionSuite extends SparkFunSuite { val row2 = mutableProj(result) assert(result === row2) } + + test("SPARK-33473: subexpression elimination for interpreted SafeProjection") { + Seq("true", "false").foreach { enabled => + withSQLConf( + SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled, + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) { + val one = BoundReference(0, DoubleType, true) + val two = BoundReference(1, DoubleType, true) + + val mul = Multiply(one, two) + val mul2 = Multiply(mul, mul) + val sqrt = Sqrt(mul2) + val sum = Add(mul2, sqrt) + + val proj = SafeProjection.create(Seq(sum)) + val result = (d1: Double, d2: Double) => + ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2)) + + val inputRows = Seq( + InternalRow.fromSeq(Seq(1.0, 2.0)), + InternalRow.fromSeq(Seq(2.0, 3.0)), + InternalRow.fromSeq(Seq(1.0, null)), + InternalRow.fromSeq(Seq(null, 2.0)), + InternalRow.fromSeq(Seq(3.0, 4.0)), + InternalRow.fromSeq(Seq(null, null)) + ) + val expectedResults = Seq( + result(1.0, 2.0), + result(2.0, 3.0), + null, + null, + result(3.0, 4.0), + null + ) + + inputRows.zip(expectedResults).foreach { case (inputRow, expected) => + val projRow = proj.apply(inputRow) + if (expected != null) { + assert(projRow.getDouble(0) == expected) + } else { + assert(projRow.isNullAt(0)) + } + } + } + } + } } From 66a76378cf9aa049c9281fc099721904942fa5ee Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 18 Nov 2020 19:18:28 -0800 Subject: [PATCH 0514/1009] [SPARK-31255][SQL][FOLLOWUP] Add missing license headers ### What changes were proposed in this pull request? Add missing license headers for new files added in #28027. ### Why are the changes needed? To fix licenses. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This is a purely non-functional change. Closes #30415 from rdblue/license-headers. Authored-by: Ryan Blue Signed-off-by: Dongjoon Hyun --- .../sql/connector/catalog/MetadataColumn.java | 19 +++++++++++++++++++ .../catalog/SupportsMetadataColumns.java | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java index 8aefa28323b33..cdfa082ced317 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.spark.sql.connector.catalog; import org.apache.spark.annotation.Evolving; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java index fc313491f2970..208abfc302582 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.spark.sql.connector.catalog; import org.apache.spark.annotation.Evolving; From e3058ba17cb4512537953eb4ded884e24ee93ba2 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 19 Nov 2020 14:20:39 +0900 Subject: [PATCH 0515/1009] [SPARK-33441][BUILD] Add unused-imports compilation check and remove all unused-imports ### What changes were proposed in this pull request? This pr add a new Scala compile arg to `pom.xml` to defense against new unused imports: - `-Ywarn-unused-import` for Scala 2.12 - `-Wconf:cat=unused-imports:e` for Scala 2.13 The other fIles change are remove all unused imports in Spark code ### Why are the changes needed? Cleanup code and add guarantee to defense against new unused imports ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30351 from LuciferYang/remove-imports-core-module. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- .../org/apache/spark/BarrierTaskContext.scala | 1 - .../org/apache/spark/MapOutputTracker.scala | 2 +- .../apache/spark/api/python/PythonRunner.scala | 5 ----- .../apache/spark/api/python/SerDeUtil.scala | 2 -- .../scala/org/apache/spark/api/r/RRunner.scala | 1 - .../org/apache/spark/deploy/JsonProtocol.scala | 4 ++-- .../history/BasicEventFilterBuilder.scala | 1 - .../deploy/history/FsHistoryProvider.scala | 2 +- .../spark/deploy/history/HybridStore.scala | 1 - .../apache/spark/deploy/master/Master.scala | 2 -- .../spark/deploy/master/ui/MasterWebUI.scala | 1 - .../CoarseGrainedExecutorBackend.scala | 1 - .../spark/network/BlockDataManager.scala | 2 +- .../spark/network/BlockTransferService.scala | 1 - .../netty/NettyBlockTransferService.scala | 6 ++---- .../spark/rdd/ParallelCollectionRDD.scala | 1 - .../spark/rdd/ReliableCheckpointRDD.scala | 1 - .../spark/resource/ResourceAllocator.scala | 3 +-- .../apache/spark/resource/ResourceUtils.scala | 4 ++-- .../spark/scheduler/TaskSchedulerImpl.scala | 3 --- .../cluster/CoarseGrainedClusterMessage.scala | 1 - .../shuffle/sort/SortShuffleManager.scala | 6 ++---- .../apache/spark/status/AppStatusStore.scala | 3 +-- .../org/apache/spark/status/KVUtils.scala | 1 - .../apache/spark/storage/BlockManager.scala | 1 - .../storage/BlockManagerDecommissioner.scala | 3 +-- .../storage/BlockManagerMasterEndpoint.scala | 2 +- .../org/apache/spark/util/ThreadUtils.scala | 1 - .../scala/org/apache/spark/util/Utils.scala | 2 +- .../org/apache/spark/StatusTrackerSuite.scala | 1 - .../StandaloneDynamicAllocationSuite.scala | 2 +- .../history/EventLogFileCompactorSuite.scala | 3 +-- .../deploy/master/ui/MasterWebUISuite.scala | 2 -- .../apache/spark/executor/ExecutorSuite.scala | 5 ++--- .../input/WholeTextFileRecordReaderSuite.scala | 1 - .../netty/NettyBlockTransferServiceSuite.scala | 2 +- .../spark/resource/ResourceUtilsSuite.scala | 2 -- .../scheduler/BarrierTaskContextSuite.scala | 1 - .../CoarseGrainedSchedulerBackendSuite.scala | 1 - .../spark/scheduler/DAGSchedulerSuite.scala | 2 +- .../scheduler/TaskSchedulerImplSuite.scala | 2 +- .../spark/scheduler/TaskSetManagerSuite.scala | 1 - .../scheduler/WorkerDecommissionSuite.scala | 6 ++---- .../spark/storage/BlockInfoManagerSuite.scala | 2 +- .../org/apache/spark/ui/StagePageSuite.scala | 1 - .../org/apache/spark/util/UtilsSuite.scala | 3 +-- .../examples/ml/DeveloperApiExample.scala | 1 - .../examples/mllib/RankingMetricsExample.scala | 1 - .../spark/examples/sql/SparkSQLExample.scala | 2 -- .../spark/sql/avro/SchemaConverters.scala | 2 +- .../org/apache/spark/sql/avro/AvroSuite.scala | 2 +- .../apache/spark/sql/kafka010/KafkaBatch.scala | 2 -- .../sql/kafka010/KafkaMicroBatchStream.scala | 3 --- .../spark/sql/kafka010/KafkaOffsetReader.scala | 11 +++++++---- .../spark/sql/kafka010/KafkaRelation.scala | 3 --- .../spark/sql/kafka010/KafkaSource.scala | 5 ----- .../sql/kafka010/KafkaSourceProvider.scala | 2 +- .../kafka010/KafkaMicroBatchSourceSuite.scala | 1 - .../spark/sql/kafka010/KafkaTestUtils.scala | 4 +--- .../apache/spark/kafka010/KafkaTokenUtil.scala | 2 +- .../kafka010/mocks/MockScheduler.scala | 2 -- .../kinesis/KinesisCheckpointer.scala | 1 - .../kinesis/KinesisInputDStream.scala | 2 -- .../apache/spark/ml/attribute/package.scala | 2 -- .../ml/feature/VarianceThresholdSelector.scala | 5 +---- .../org/apache/spark/ml/feature/package.scala | 2 -- .../apache/spark/ml/recommendation/ALS.scala | 2 +- .../ml/recommendation/TopByKeyAggregator.scala | 1 - .../classification/LogisticRegression.scala | 1 - .../apache/spark/ml/fpm/PrefixSpanSuite.scala | 1 - .../GeneralizedLinearRegressionSuite.scala | 4 ---- .../apache/spark/ml/stat/SummarizerSuite.scala | 1 - .../spark/ml/tree/impl/RandomForestSuite.scala | 1 - .../spark/ml/util/DefaultReadWriteTest.scala | 1 - .../spark/ml/util/PMMLReadWriteTest.scala | 3 --- .../spark/mllib/clustering/LDASuite.scala | 1 - .../linalg/distributed/BlockMatrixSuite.scala | 2 +- pom.xml | 3 +++ .../org/apache/spark/repl/Repl2Suite.scala | 5 ----- .../spark/repl/ExecutorClassLoaderSuite.scala | 1 - .../org/apache/spark/repl/ReplSuite.scala | 2 +- .../k8s/features/EnvSecretsFeatureStep.scala | 2 +- .../k8s/features/MountSecretsFeatureStep.scala | 2 +- .../cluster/k8s/ExecutorPodsAllocator.scala | 1 - .../cluster/k8s/KubernetesClusterManager.scala | 1 - .../spark/deploy/k8s/KubernetesTestConf.scala | 1 - .../deploy/k8s/KubernetesUtilsSuite.scala | 2 +- .../spark/deploy/k8s/PodBuilderSuite.scala | 1 - .../DriverCommandFeatureStepSuite.scala | 3 --- .../DriverServiceFeatureStepSuite.scala | 1 - .../HadoopConfDriverFeatureStepSuite.scala | 1 - .../KerberosConfDriverFeatureStepSuite.scala | 3 +-- .../MountVolumesFeatureStepSuite.scala | 2 +- .../k8s/ExecutorPodsAllocatorSuite.scala | 2 +- .../KubernetesTestComponents.scala | 1 - .../deploy/k8s/integrationtest/Utils.scala | 2 -- .../backend/cloud/KubeConfigBackend.scala | 2 -- .../spark/deploy/mesos/ui/MesosClusterUI.scala | 1 - .../MesosCoarseGrainedSchedulerBackend.scala | 2 +- .../mesos/MesosSchedulerBackendUtil.scala | 2 +- .../cluster/mesos/MesosSchedulerUtils.scala | 4 ++-- .../spark/deploy/yarn/ExecutorRunnable.scala | 4 +--- ...tyPreferredContainerPlacementStrategy.scala | 2 +- .../deploy/yarn/YarnSparkHadoopUtil.scala | 1 - .../launcher/YarnCommandBuilderUtils.scala | 2 -- .../deploy/yarn/YarnSparkHadoopUtilSuite.scala | 1 - .../spark/sql/catalyst/ScalaReflection.scala | 5 ----- .../catalyst/analysis/DecimalPrecision.scala | 1 - .../catalyst/analysis/ResolveCatalogs.scala | 2 +- .../analysis/higherOrderFunctions.scala | 1 - .../catalyst/analysis/v2ResolutionPlans.scala | 5 ++--- .../sql/catalyst/catalog/ExternalCatalog.scala | 2 +- .../catalyst/encoders/ExpressionEncoder.scala | 4 +--- .../spark/sql/catalyst/expressions/Cast.scala | 3 +-- .../sql/catalyst/expressions/Expression.scala | 5 ++--- .../sql/catalyst/expressions/ScalaUDF.scala | 2 +- .../expressions/codegen/CodeGenerator.scala | 7 +++---- .../codegen/GeneratePredicate.scala | 1 - .../codegen/GenerateUnsafeRowJoiner.scala | 4 ---- .../expressions/higherOrderFunctions.scala | 2 +- .../sql/catalyst/expressions/predicates.scala | 3 --- .../spark/sql/catalyst/json/JsonFilters.scala | 1 - .../sql/catalyst/optimizer/ComplexTypes.scala | 1 - .../optimizer/NormalizeFloatingNumbers.scala | 4 ++-- .../ReplaceNullWithFalseInPredicate.scala | 1 - .../catalyst/optimizer/finishAnalysis.scala | 2 -- .../sql/catalyst/optimizer/subquery.scala | 1 - .../plans/logical/AnalysisHelper.scala | 12 ++++++------ .../catalyst/plans/logical/Statistics.scala | 8 -------- .../plans/logical/basicLogicalOperators.scala | 7 ++----- .../sql/catalyst/plans/logical/hints.scala | 1 - .../statsEstimation/ProjectEstimation.scala | 2 +- .../spark/sql/catalyst/trees/TreeNode.scala | 1 - .../spark/sql/catalyst/util/ArrayData.scala | 1 - .../sql/catalyst/util/RebaseDateTime.scala | 2 +- .../datasources/v2/DataSourceV2Relation.scala | 8 ++++---- .../org/apache/spark/sql/types/DataType.scala | 2 +- .../org/apache/spark/sql/types/Decimal.scala | 1 - .../org/apache/spark/sql/RowJsonSuite.scala | 3 +-- .../sql/catalyst/ScalaReflectionSuite.scala | 1 - ...reateTablePartitioningValidationSuite.scala | 2 +- .../analysis/ResolveNaturalJoinSuite.scala | 1 - .../analysis/StreamingJoinHelperSuite.scala | 2 +- .../analysis/UnsupportedOperationsSuite.scala | 1 - .../expressions/ObjectExpressionsSuite.scala | 5 ++--- .../SubExprEvaluationRuntimeSuite.scala | 1 - .../aggregate/ApproximatePercentileSuite.scala | 2 +- .../expressions/codegen/CodeBlockSuite.scala | 2 +- .../optimizer/EliminateDistinctSuite.scala | 2 +- .../optimizer/FilterPushdownSuite.scala | 3 +-- .../PullupCorrelatedPredicatesSuite.scala | 2 +- .../optimizer/SimplifyCastsSuite.scala | 2 -- .../catalyst/optimizer/complexTypesSuite.scala | 2 +- .../sql/catalyst/parser/DDLParserSuite.scala | 3 +-- .../FilterEstimationSuite.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 1 - .../org/apache/spark/sql/DataFrameWriter.scala | 1 - .../scala/org/apache/spark/sql/Dataset.scala | 3 +-- .../spark/sql/RelationalGroupedDataset.scala | 1 - .../org/apache/spark/sql/RuntimeConfig.scala | 2 -- .../org/apache/spark/sql/UDFRegistration.scala | 4 ++-- .../org/apache/spark/sql/catalog/Catalog.scala | 2 +- .../analysis/ResolveSessionCatalog.scala | 2 +- .../spark/sql/execution/CacheManager.scala | 2 +- .../sql/execution/CollectMetricsExec.scala | 2 -- .../sql/execution/DataSourceScanExec.scala | 1 - .../spark/sql/execution/HiveResult.scala | 2 +- .../execution/RemoveRedundantProjects.scala | 1 - .../spark/sql/execution/SparkSqlParser.scala | 2 +- .../spark/sql/execution/SparkStrategies.scala | 7 ++++--- .../sql/execution/WholeStageCodegenExec.scala | 1 - .../adaptive/DemoteBroadcastHashJoin.scala | 1 - .../execution/adaptive/LogicalQueryStage.scala | 1 - .../adaptive/ReuseAdaptiveSubquery.scala | 1 - .../sql/execution/adaptive/simpleCosting.scala | 2 +- .../aggregate/ObjectAggregationIterator.scala | 1 - .../aggregate/ObjectAggregationMap.scala | 1 - .../SortBasedAggregationIterator.scala | 7 ++++--- .../spark/sql/execution/aggregate/udaf.scala | 6 +----- .../sql/execution/basicPhysicalOperators.scala | 4 ++-- .../bucketing/CoalesceBucketsInJoin.scala | 1 - .../DisableUnnecessaryBucketedScan.scala | 1 - .../sql/execution/columnar/ColumnStats.scala | 2 +- .../sql/execution/command/CommandUtils.scala | 2 +- .../execution/command/DataWritingCommand.scala | 3 +-- .../sql/execution/command/SetCommand.scala | 2 +- .../spark/sql/execution/command/cache.scala | 1 - .../command/createDataSourceTables.scala | 1 - .../sql/execution/command/functions.scala | 2 +- .../sql/execution/datasources/DataSource.scala | 1 - .../datasources/FallBackFileSourceV2.scala | 5 +++-- .../datasources/HadoopFsRelation.scala | 4 ---- .../execution/datasources/OutputWriter.scala | 3 +-- .../PartitioningAwareFileIndex.scala | 2 +- .../datasources/PartitioningUtils.scala | 2 +- .../datasources/RecordReaderIterator.scala | 2 -- .../execution/datasources/SchemaPruning.scala | 2 +- .../binaryfile/BinaryFileFormat.scala | 4 ++-- .../datasources/csv/CSVDataSource.scala | 2 -- .../execution/datasources/jdbc/JDBCRDD.scala | 6 +++--- .../datasources/json/JsonFileFormat.scala | 3 --- .../datasources/orc/OrcDeserializer.scala | 1 - .../parquet/ParquetFileFormat.scala | 4 ++-- .../parquet/ParquetOutputWriter.scala | 1 - .../parquet/ParquetRowConverter.scala | 18 +++++++++++------- .../sql/execution/datasources/rules.scala | 2 -- .../datasources/v2/DescribeTableExec.scala | 2 +- .../datasources/v2/DropNamespaceExec.scala | 2 +- .../datasources/v2/FileDataSourceV2.scala | 1 - .../v2/ShowTablePropertiesExec.scala | 2 +- .../datasources/v2/TableCapabilityCheck.scala | 2 +- .../datasources/v2/TextBasedFileScan.scala | 2 -- .../datasources/v2/orc/OrcScanBuilder.scala | 3 --- .../PlanDynamicPruningFilters.scala | 2 +- .../exchange/EnsureRequirements.scala | 1 - .../sql/execution/exchange/Exchange.scala | 1 - .../sql/execution/python/EvalPythonExec.scala | 2 +- .../spark/sql/execution/r/ArrowRRunner.scala | 2 +- .../streaming/FlatMapGroupsWithStateExec.scala | 2 +- .../execution/streaming/HDFSMetadataLog.scala | 4 +--- .../execution/streaming/StreamExecution.scala | 1 - .../execution/streaming/StreamMetadata.scala | 10 +++++----- .../StreamingSymmetricHashJoinHelper.scala | 8 ++++---- .../streaming/sources/ForeachBatchSink.scala | 1 - .../sources/PackedRowWriterFactory.scala | 7 ++++--- .../execution/streaming/sources/memory.scala | 5 ++--- .../execution/streaming/state/StateStore.scala | 2 +- .../streaming/state/StateStoreRDD.scala | 2 -- .../state/SymmetricHashJoinStateManager.scala | 6 +++--- .../streaming/statefulOperators.scala | 1 - .../execution/streaming/streamingLimits.scala | 1 - .../sql/execution/window/WindowExec.scala | 8 +------- .../sql/execution/window/WindowExecBase.scala | 2 +- .../sql/expressions/UserDefinedFunction.scala | 7 ++----- .../sql/expressions/scalalang/typed.scala | 2 -- .../scala/org/apache/spark/sql/functions.scala | 3 +-- .../spark/sql/internal/SessionState.scala | 5 +++-- .../spark/sql/internal/SharedState.scala | 2 -- .../sql/streaming/StreamingQueryManager.scala | 1 - .../spark/sql/streaming/ui/UIUtils.scala | 1 - .../org/apache/spark/sql/DataFrameSuite.scala | 1 - .../sql/DataFrameTimeWindowingSuite.scala | 2 -- .../spark/sql/DataFrameWindowFramesSuite.scala | 2 -- .../spark/sql/DataFrameWriterV2Suite.scala | 2 +- .../spark/sql/DatasetPrimitiveSuite.scala | 1 - .../spark/sql/IntegratedUDFTestUtils.scala | 1 - .../apache/spark/sql/PlanStabilitySuite.scala | 1 - .../spark/sql/StatisticsCollectionSuite.scala | 1 - ...aSourceV2DataFrameSessionCatalogSuite.scala | 1 - .../sql/connector/DataSourceV2SQLSuite.scala | 2 +- .../SupportsCatalogOptionsSuite.scala | 2 +- .../connector/TableCapabilityCheckSuite.scala | 2 +- .../sql/connector/V1ReadFallbackSuite.scala | 2 +- .../BaseScriptTransformationSuite.scala | 1 - .../spark/sql/execution/PlannerSuite.scala | 2 +- .../spark/sql/execution/SameResultSuite.scala | 2 +- .../spark/sql/execution/SparkPlanTest.scala | 1 - .../execution/adaptive/AdaptiveTestUtils.scala | 2 -- .../benchmark/FilterPushdownBenchmark.scala | 2 +- ...rquetNestedPredicatePushDownBenchmark.scala | 3 +-- .../benchmark/TPCDSQueryBenchmark.scala | 1 - .../execution/columnar/ColumnStatsSuite.scala | 1 - .../spark/sql/execution/command/DDLSuite.scala | 1 - .../command/PlanResolutionSuite.scala | 2 +- .../execution/datasources/ReadSchemaTest.scala | 2 +- .../RowDataSourceStrategySuite.scala | 5 ----- .../SaveIntoDataSourceCommandSuite.scala | 1 - .../binaryfile/BinaryFileFormatSuite.scala | 2 +- .../json/JsonParsingOptionsSuite.scala | 5 ++--- .../orc/OrcV2SchemaPruningSuite.scala | 2 +- .../parquet/ParquetCommitterSuite.scala | 4 +--- .../datasources/parquet/ParquetIOSuite.scala | 1 - .../parquet/ParquetInteroperabilitySuite.scala | 2 +- .../ParquetPartitionDiscoverySuite.scala | 2 -- .../parquet/ParquetSchemaSuite.scala | 2 +- .../streaming/FileStreamSinkLogSuite.scala | 1 - .../execution/streaming/MemorySinkSuite.scala | 2 +- ...FlatMapGroupsWithStateExecHelperSuite.scala | 1 - .../spark/sql/internal/CatalogSuite.scala | 2 +- .../spark/sql/internal/SQLConfSuite.scala | 2 -- .../spark/sql/sources/BucketedReadSuite.scala | 2 +- .../DisableUnnecessaryBucketedScanSuite.scala | 1 - .../spark/sql/sources/PathOptionSuite.scala | 2 -- .../FlatMapGroupsWithStateSuite.scala | 3 +-- .../spark/sql/streaming/StreamTest.scala | 1 - .../streaming/StreamingAggregationSuite.scala | 2 -- .../StreamingDeduplicationSuite.scala | 8 ++------ .../sql/streaming/StreamingJoinSuite.scala | 10 +--------- .../streaming/continuous/ContinuousSuite.scala | 1 - .../test/DataStreamReaderWriterSuite.scala | 2 -- .../spark/sql/test/GenericFunSpecSuite.scala | 2 -- .../SparkGetSchemasOperation.scala | 3 --- .../thriftserver/SparkGetTablesOperation.scala | 1 - .../hive/thriftserver/SparkSQLCLIService.scala | 2 -- .../thriftserver/SparkSQLSessionManager.scala | 4 ---- .../thriftserver/ui/ThriftServerPage.scala | 1 - .../spark/sql/hive/thriftserver/CliSuite.scala | 2 +- .../ThriftServerQueryTestSuite.scala | 1 - .../execution/HiveCompatibilitySuite.scala | 1 - .../spark/sql/hive/HiveExternalCatalog.scala | 1 - .../org/apache/spark/sql/hive/HiveUtils.scala | 1 - .../spark/sql/hive/client/HiveClientImpl.scala | 1 - .../spark/sql/hive/client/HiveShim.scala | 3 +-- .../sql/hive/execution/SaveAsHiveFile.scala | 2 +- .../InsertIntoHiveTableBenchmark.scala | 1 - .../sql/hive/HiveExternalCatalogSuite.scala | 2 -- .../sql/hive/HiveParquetSourceSuite.scala | 1 - .../apache/spark/sql/hive/HiveShimSuite.scala | 3 --- .../sql/hive/HiveShowCreateTableSuite.scala | 2 +- .../spark/sql/hive/HiveSparkSubmitSuite.scala | 2 -- .../sql/hive/HiveUserDefinedTypeSuite.scala | 1 - .../apache/spark/sql/hive/HiveUtilsSuite.scala | 3 +-- .../spark/sql/hive/QueryPartitionSuite.scala | 4 ---- .../spark/sql/hive/StatisticsSuite.scala | 2 +- .../hive/client/HiveClientUserNameSuite.scala | 1 - .../sql/hive/execution/HiveQuerySuite.scala | 1 - .../sql/hive/execution/HiveSQLViewSuite.scala | 4 ++-- .../HiveScriptTransformationSuite.scala | 3 --- .../hive/execution/HiveTableScanSuite.scala | 1 - .../sql/hive/execution/HiveUDFSuite.scala | 1 - .../execution/PrunePartitionSuiteBase.scala | 2 +- .../sql/hive/execution/SQLQuerySuite.scala | 1 - .../sql/hive/execution/UDAQuerySuite.scala | 12 ++---------- .../apache/spark/sql/hive/test/TestHive.scala | 1 - .../streaming/ApiStreamingRootResource.scala | 2 -- .../org/apache/spark/streaming/State.scala | 2 -- .../scheduler/ReceivedBlockTracker.scala | 1 - .../streaming/ReceiverInputDStreamSuite.scala | 1 - .../apache/spark/streaming/TestSuiteBase.scala | 3 +-- .../receiver/BlockGeneratorSuite.scala | 1 - .../ExecutorAllocationManagerSuite.scala | 2 +- 331 files changed, 225 insertions(+), 573 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala index 4d765481eb836..09fa91655fba5 100644 --- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala +++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala @@ -21,7 +21,6 @@ import java.util.{Properties, Timer, TimerTask} import scala.collection.JavaConverters._ import scala.concurrent.duration._ -import scala.language.postfixOps import scala.util.{Failure, Success => ScalaSuccess, Try} import org.apache.spark.annotation.{Experimental, Since} diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index c3152d9225107..cdec1982b4487 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ import org.apache.spark.io.CompressionCodec import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv} -import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, MapStatus} +import org.apache.spark.scheduler.MapStatus import org.apache.spark.shuffle.MetadataFetchFailedException import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId} import org.apache.spark.util._ diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala index cb4eabefec32f..136da80d48dee 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala @@ -24,13 +24,8 @@ import java.nio.charset.StandardCharsets.UTF_8 import java.util.concurrent.atomic.AtomicBoolean import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal -import org.json4s.JsonAST._ -import org.json4s.JsonDSL._ -import org.json4s.jackson.JsonMethods.{compact, render} - import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES} diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index 5a6fa507963f0..dc2587a62ae40 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -17,8 +17,6 @@ package org.apache.spark.api.python -import java.nio.ByteOrder -import java.nio.charset.StandardCharsets import java.util.{ArrayList => JArrayList} import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala index 20ab6fc2f348d..41c66024272b9 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala @@ -19,7 +19,6 @@ package org.apache.spark.api.r import java.io._ -import org.apache.spark._ import org.apache.spark.broadcast.Broadcast /** diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index 17733d99cd5bc..d76fb7f9a20b3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -22,7 +22,6 @@ import org.json4s.JsonDSL._ import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import org.apache.spark.deploy.master._ -import org.apache.spark.deploy.master.RecoveryState.MasterState import org.apache.spark.deploy.worker.ExecutorRunner import org.apache.spark.resource.{ResourceInformation, ResourceRequirement} @@ -208,7 +207,8 @@ private[deploy] object JsonProtocol { * master * `completeddrivers` a list of Json objects of [[DriverInfo]] of the completed drivers * of the master - * `status` status of the master, see [[MasterState]] + * `status` status of the master, + * see [[org.apache.spark.deploy.master.RecoveryState.MasterState]] */ def writeMasterState(obj: MasterStateResponse): JObject = { val aliveWorkers = obj.workers.filter(_.isAlive()) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala index c659d32d16314..57b05ff245258 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala @@ -19,7 +19,6 @@ package org.apache.spark.deploy.history import scala.collection.mutable -import org.apache.spark.SparkContext import org.apache.spark.deploy.history.EventFilter.FilterStatistics import org.apache.spark.internal.Logging import org.apache.spark.scheduler._ diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index e1b0fc5e45d6e..e5341aff8ce66 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -21,7 +21,7 @@ import java.io.{File, FileNotFoundException, IOException} import java.lang.{Long => JLong} import java.nio.file.Files import java.util.{Date, NoSuchElementException, ServiceLoader} -import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future, TimeUnit} +import java.util.concurrent.{ConcurrentHashMap, ExecutorService, TimeUnit} import java.util.zip.ZipOutputStream import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala index 58714f16e8417..1b8c7ff26e9f5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala @@ -17,7 +17,6 @@ package org.apache.spark.deploy.history -import java.io.IOException import java.util.Collection import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicBoolean diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index a582a5d045855..cccd3da323774 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -22,9 +22,7 @@ import java.util.{Date, Locale} import java.util.concurrent.{ScheduledFuture, TimeUnit} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} -import scala.collection.mutable import scala.util.Random -import scala.util.control.NonFatal import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkHadoopUtil} diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala index 035f9d379471c..af94bd6d9e0f2 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala @@ -18,7 +18,6 @@ package org.apache.spark.deploy.master.ui import java.net.{InetAddress, NetworkInterface, SocketException} -import java.util.Locale import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, MasterStateResponse, RequestMasterState} diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index b2bc6b3b68007..6a1fd57873c3a 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -17,7 +17,6 @@ package org.apache.spark.executor -import java.io.File import java.net.URL import java.nio.ByteBuffer import java.util.Locale diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala index 62fbc166167d3..cafb39ea82ad9 100644 --- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala +++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala @@ -22,7 +22,7 @@ import scala.reflect.ClassTag import org.apache.spark.TaskContext import org.apache.spark.network.buffer.ManagedBuffer import org.apache.spark.network.client.StreamCallbackWithID -import org.apache.spark.storage.{BlockId, ShuffleBlockId, StorageLevel} +import org.apache.spark.storage.{BlockId, StorageLevel} private[spark] trait BlockDataManager { diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala index c7f5a97e35612..635efc3e22628 100644 --- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala @@ -23,7 +23,6 @@ import scala.concurrent.{Future, Promise} import scala.concurrent.duration.Duration import scala.reflect.ClassTag -import org.apache.spark.internal.Logging import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.{BlockFetchingListener, BlockStoreClient, DownloadFileManager} import org.apache.spark.storage.{BlockId, EncryptedManagedBuffer, StorageLevel} diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala index 806fbf52795bc..828849812bbd1 100644 --- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala @@ -19,9 +19,7 @@ package org.apache.spark.network.netty import java.io.IOException import java.nio.ByteBuffer -import java.util import java.util.{HashMap => JHashMap, Map => JMap} -import java.util.concurrent.CompletableFuture import scala.collection.JavaConverters._ import scala.concurrent.{Future, Promise} @@ -35,11 +33,11 @@ import org.apache.spark.ExecutorDeadException import org.apache.spark.internal.config import org.apache.spark.network._ import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} -import org.apache.spark.network.client.{RpcResponseCallback, TransportClient, TransportClientBootstrap, TransportClientFactory} +import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap} import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap} import org.apache.spark.network.server._ import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, OneForOneBlockFetcher, RetryingBlockFetcher} -import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, GetLocalDirsForExecutors, LocalDirsForExecutors, UploadBlock, UploadBlockStream} +import org.apache.spark.network.shuffle.protocol.{UploadBlock, UploadBlockStream} import org.apache.spark.network.util.JavaUtils import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.serializer.JavaSerializer diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala index 324cba5b4de42..f0239cdd9136d 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala @@ -19,7 +19,6 @@ package org.apache.spark.rdd import java.io._ -import scala.Serializable import scala.collection.Map import scala.collection.immutable.NumericRange import scala.collection.mutable.ArrayBuffer diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala index 576a83f6ab4d9..5093a12777ad3 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala @@ -20,7 +20,6 @@ package org.apache.spark.rdd import java.io.{FileNotFoundException, IOException} import java.util.concurrent.TimeUnit -import scala.collection.mutable import scala.reflect.ClassTag import scala.util.control.NonFatal diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala index 482d9e94c6dd9..22d10a975ad0f 100644 --- a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala +++ b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala @@ -20,7 +20,6 @@ package org.apache.spark.resource import scala.collection.mutable import org.apache.spark.SparkException -import org.apache.spark.util.collection.OpenHashMap /** * Trait used to help executor/worker allocate resources. @@ -40,7 +39,7 @@ trait ResourceAllocator { * can be a multiple, such that each address can be allocated up to [[slotsPerAddress]] * times. * - * TODO Use [[OpenHashMap]] instead to gain better performance. + * TODO Use [[org.apache.spark.util.collection.OpenHashMap]] instead to gain better performance. */ private lazy val addressAvailabilityMap = { mutable.HashMap(resourceAddresses.map(_ -> slotsPerAddress): _*) diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala index 5a9435653920f..837b2d80aace6 100644 --- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala +++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala @@ -29,8 +29,8 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.api.resource.ResourceDiscoveryPlugin import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.{CPUS_PER_TASK, EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX} -import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING, SKIP_VALIDATE_CORES_TESTING} +import org.apache.spark.internal.config.{EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX} +import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING} import org.apache.spark.util.Utils /** diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 57e219999b0d0..b939e40f3b60c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -26,9 +26,6 @@ import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, Buffer, HashMap, HashSet} import scala.util.Random -import com.google.common.base.Ticker -import com.google.common.cache.CacheBuilder - import org.apache.spark._ import org.apache.spark.TaskState.TaskState import org.apache.spark.executor.ExecutorMetrics diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index eda1cb52d4abc..e084453be0789 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -22,7 +22,6 @@ import java.nio.ByteBuffer import org.apache.spark.TaskState.TaskState import org.apache.spark.resource.{ResourceInformation, ResourceProfile} import org.apache.spark.rpc.RpcEndpointRef -import org.apache.spark.scheduler.ExecutorDecommissionInfo import org.apache.spark.scheduler.ExecutorLossReason import org.apache.spark.util.SerializableBuffer diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala index 72460180f5908..d9b8eddcf8cd0 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala @@ -22,11 +22,9 @@ import java.util.concurrent.ConcurrentHashMap import scala.collection.JavaConverters._ import org.apache.spark._ -import org.apache.spark.internal.{config, Logging} -import org.apache.spark.scheduler.MapStatus +import org.apache.spark.internal.Logging import org.apache.spark.shuffle._ -import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleExecutorComponents} -import org.apache.spark.util.Utils +import org.apache.spark.shuffle.api.ShuffleExecutorComponents import org.apache.spark.util.collection.OpenHashSet /** diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala index 5c6543fe28a18..affa85b76cf19 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala @@ -22,8 +22,7 @@ import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap -import org.apache.spark.{JobExecutionStatus, SparkConf, SparkException} -import org.apache.spark.resource.ResourceProfileManager +import org.apache.spark.{JobExecutionStatus, SparkConf} import org.apache.spark.status.api.v1 import org.apache.spark.ui.scope._ import org.apache.spark.util.Utils diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala index 45348be5c98b9..c79f2dcd86533 100644 --- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala +++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala @@ -21,7 +21,6 @@ import java.io.File import scala.annotation.meta.getter import scala.collection.JavaConverters._ -import scala.language.implicitConversions import scala.reflect.{classTag, ClassTag} import com.fasterxml.jackson.annotation.JsonInclude diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 3909c02c5bb1f..924601f92c5b8 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -55,7 +55,6 @@ import org.apache.spark.rpc.RpcEnv import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter} -import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter} import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock} import org.apache.spark.storage.memory._ import org.apache.spark.unsafe.Platform diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 9699515c626bf..7a55039db1b60 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -18,7 +18,6 @@ package org.apache.spark.storage import java.io.IOException -import java.util.concurrent.ExecutorService import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConverters._ @@ -28,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config -import org.apache.spark.shuffle.{MigratableResolver, ShuffleBlockInfo} +import org.apache.spark.shuffle.ShuffleBlockInfo import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock import org.apache.spark.util.ThreadUtils diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index b8c5cbd121861..a7532a9870fae 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -33,7 +33,7 @@ import org.apache.spark.{MapOutputTrackerMaster, SparkConf} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.{config, Logging} import org.apache.spark.network.shuffle.ExternalBlockStoreClient -import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointAddress, RpcEndpointRef, RpcEnv} +import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv} import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend} import org.apache.spark.storage.BlockManagerMessages._ diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala index 78206c51c1028..d45dc937910d9 100644 --- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala @@ -23,7 +23,6 @@ import java.util.concurrent.locks.ReentrantLock import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future} import scala.concurrent.duration.{Duration, FiniteDuration} -import scala.language.higherKinds import scala.util.control.NonFatal import com.google.common.util.concurrent.ThreadFactoryBuilder diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 7f1f3a71acab8..b743ab6507117 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -28,7 +28,7 @@ import java.nio.channels.{Channels, FileChannel, WritableByteChannel} import java.nio.charset.StandardCharsets import java.nio.file.Files import java.security.SecureRandom -import java.util.{Arrays, Locale, Properties, Random, UUID} +import java.util.{Locale, Properties, Random, UUID} import java.util.concurrent._ import java.util.concurrent.TimeUnit.NANOSECONDS import java.util.zip.GZIPInputStream diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala index fae6c4af1240c..e6d3377120e56 100644 --- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark import scala.concurrent.duration._ -import scala.language.implicitConversions import org.scalatest.concurrent.Eventually._ import org.scalatest.matchers.must.Matchers diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala index e1d4eff0a62cb..e47181719a9db 100644 --- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import scala.concurrent.duration._ import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{mock, verify, when} +import org.mockito.Mockito.{mock, when} import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester} import org.scalatest.concurrent.Eventually._ diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala index ac39f022d5ca6..7d07af4d7246b 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala @@ -23,10 +23,9 @@ import scala.io.{Codec, Source} import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.json4s.jackson.JsonMethods.parse -import org.apache.spark.{SparkConf, SparkFunSuite, Success} +import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.history.EventLogTestHelper.writeEventsToRollingWriter -import org.apache.spark.executor.ExecutorMetrics import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.ExecutorInfo import org.apache.spark.status.ListenerEventsTestHelper._ diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala index 35de457ec48ce..be83ec12f92f5 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala @@ -21,7 +21,6 @@ import java.io.DataOutputStream import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date -import javax.servlet.http.HttpServletResponse import scala.collection.mutable.HashMap @@ -32,7 +31,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ -import org.apache.spark.internal.config.UI import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 8e58beff74290..31049d104e63d 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.executor -import java.io.{Externalizable, File, ObjectInput, ObjectOutput} +import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.lang.Thread.UncaughtExceptionHandler import java.nio.ByteBuffer import java.util.Properties @@ -41,7 +41,6 @@ import org.scalatestplus.mockito.MockitoSugar import org.apache.spark._ import org.apache.spark.TaskState.TaskState import org.apache.spark.broadcast.Broadcast -import org.apache.spark.deploy.{SimpleApplicationTest, SparkSubmitSuite} import org.apache.spark.internal.config._ import org.apache.spark.internal.config.UI._ import org.apache.spark.memory.TestMemoryManager @@ -53,7 +52,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task, import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager} import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.storage.{BlockManager, BlockManagerId} -import org.apache.spark.util.{LongAccumulator, UninterruptibleThread, Utils} +import org.apache.spark.util.{LongAccumulator, UninterruptibleThread} class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester { diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala index fab7aea6c47aa..f1d7053c34594 100644 --- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala +++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala @@ -29,7 +29,6 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils /** * Tests the correctness of diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala index fa1a75d076051..182c3c09e0524 100644 --- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala @@ -24,7 +24,7 @@ import scala.reflect.ClassTag import scala.util.Random import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{mock, times, verify, when} +import org.mockito.Mockito.{mock, when} import org.scalatest.BeforeAndAfterEach import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala index 278a72a7192d8..e8e8682e20ed4 100644 --- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala @@ -26,10 +26,8 @@ import org.json4s.{DefaultFormats, Extraction} import org.apache.spark.{LocalSparkContext, SparkConf, SparkException, SparkFunSuite} import org.apache.spark.TestUtils._ import org.apache.spark.internal.config._ -import org.apache.spark.internal.config.Tests._ import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.resource.TestResourceIDs._ -import org.apache.spark.scheduler.LiveListenerBus import org.apache.spark.util.Utils class ResourceUtilsSuite extends SparkFunSuite diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala index e4ec62f8efc5b..b7ac9ecac2387 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala @@ -25,7 +25,6 @@ import org.scalatest.concurrent.Eventually import org.scalatest.time.SpanSugar._ import org.apache.spark._ -import org.apache.spark.internal.config import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with Eventually { diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala index 47e37fc55cefe..65d51e57ee308 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala @@ -188,7 +188,6 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo } test("extra resources from executor") { - import TestUtils._ val execCores = 3 val conf = new SparkConf() diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 99be1faab8b85..58aa246b7358f 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -42,7 +42,7 @@ import org.apache.spark.resource.ResourceUtils.{FPGA, GPU} import org.apache.spark.scheduler.SchedulingMode.SchedulingMode import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException} import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster} -import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, ThreadUtils, Utils} +import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, Utils} class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler) extends DAGSchedulerEventProcessLoop(dagScheduler) { diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index 0c60c42c054cf..b6a59c8bbd944 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.internal.config import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests} import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.resource.TestResourceIDs._ -import org.apache.spark.util.{Clock, ManualClock, SystemClock} +import org.apache.spark.util.{Clock, ManualClock} class FakeSchedulerBackend extends SchedulerBackend { def start(): Unit = {} diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala index e01e278f60205..a760dda3897df 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala @@ -1768,7 +1768,6 @@ class TaskSetManagerSuite } test("TaskSetManager passes task resource along") { - import TestUtils._ sc = new SparkContext("local", "test") sc.conf.set(TASK_GPU_ID.amountConf, "2") diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala index 4a92cbcb85847..1c2326db6dc99 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala @@ -19,14 +19,12 @@ package org.apache.spark.scheduler import java.util.concurrent.Semaphore -import scala.concurrent.TimeoutException import scala.concurrent.duration._ -import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite, - TestUtils} +import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils} import org.apache.spark.internal.config import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend -import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils} +import org.apache.spark.util.ThreadUtils class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala index 9c0699bc981f8..d2bf385e10796 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.storage import java.util.Properties -import scala.concurrent.{Await, ExecutionContext, Future} +import scala.concurrent.{ExecutionContext, Future} import scala.language.implicitConversions import scala.reflect.ClassTag diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala index 48e0d218c0e5c..d02d7f862df80 100644 --- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.ui -import java.util.Locale import javax.servlet.http.HttpServletRequest import scala.xml.Node diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 857749e84764d..20624c743bc22 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -18,8 +18,7 @@ package org.apache.spark.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataOutput, DataOutputStream, File, - FileOutputStream, InputStream, PrintStream, SequenceInputStream} -import java.lang.{Double => JDouble, Float => JFloat} + FileOutputStream, PrintStream, SequenceInputStream} import java.lang.reflect.Field import java.net.{BindException, ServerSocket, URI} import java.nio.{ByteBuffer, ByteOrder} diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala index 86d00cac9485f..487cb27b93fe8 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala @@ -41,7 +41,6 @@ object DeveloperApiExample { .builder .appName("DeveloperApiExample") .getOrCreate() - import spark.implicits._ // Prepare training data. val training = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala index 2845028dd0814..7a7501ee84526 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala @@ -30,7 +30,6 @@ object RankingMetricsExample { .builder .appName("RankingMetricsExample") .getOrCreate() - import spark.implicits._ // $example on$ // Read in the ratings data val ratings = spark.read.textFile("data/mllib/sample_movielens_data.txt").rdd.map { line => diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala index fde281087c267..b17b86c08314b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala @@ -42,8 +42,6 @@ object SparkSQLExample { .config("spark.some.config.option", "some-value") .getOrCreate() - // For implicit conversions like converting RDDs to DataFrames - import spark.implicits._ // $example off:init_session$ runBasicDataFrameExample(spark) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala index c685c89f0dfc8..09c849960c1b5 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala @@ -27,7 +27,7 @@ import org.apache.avro.Schema.Type._ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.Decimal.{maxPrecisionForBytes, minBytesForPrecision} +import org.apache.spark.sql.types.Decimal.minBytesForPrecision /** * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index c9c6bcecac14e..d3bfb716f515c 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC} import org.apache.spark.sql.execution.{FormattedMode, SparkPlan} -import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition, PartitionedFile} +import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala index 9ad083f1cfde5..a1b0f7d22216b 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.kafka010 -import org.apache.kafka.common.TopicPartition - import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala index 6599e7e0fe707..c25b8b4e510a0 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala @@ -19,12 +19,9 @@ package org.apache.spark.sql.kafka010 import java.{util => ju} -import org.apache.kafka.clients.consumer.ConsumerConfig - import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT -import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.sql.SparkSession import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory} import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset, ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl} diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala index 6d30bd2a6d2cd..adcc20c25cb5f 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal -import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetAndTimestamp} +import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp} import org.apache.kafka.common.TopicPartition import org.apache.spark.SparkEnv @@ -33,10 +33,12 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner} /** - * This class uses Kafka's own [[KafkaConsumer]] API to read data offsets from Kafka. + * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to + * read data offsets from Kafka. * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read * by this source. These strategies directly correspond to the different consumption options - * in. This class is designed to return a configured [[KafkaConsumer]] that is used by the + * in. This class is designed to return a configured + * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the * [[KafkaSource]] to query for the offsets. See the docs on * [[org.apache.spark.sql.kafka010.ConsumerStrategy]] * for more details. @@ -50,7 +52,8 @@ private[kafka010] class KafkaOffsetReader( driverGroupIdPrefix: String) extends Logging { /** - * [[UninterruptibleThreadRunner]] ensures that all [[KafkaConsumer]] communication called in an + * [[UninterruptibleThreadRunner]] ensures that all + * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an * [[UninterruptibleThread]], however for batch mode this is not the case. */ diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala index 413a0c4de8bea..69a66e2209773 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala @@ -17,13 +17,10 @@ package org.apache.spark.sql.kafka010 -import org.apache.kafka.common.TopicPartition - import org.apache.spark.internal.Logging import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.types.StructType diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala index 57879c7ca31cf..71ccb5f952f0a 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala @@ -18,11 +18,7 @@ package org.apache.spark.sql.kafka010 import java.{util => ju} -import java.io._ -import java.nio.charset.StandardCharsets -import org.apache.commons.io.IOUtils -import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.TopicPartition import org.apache.spark.SparkContext @@ -35,7 +31,6 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.read.streaming import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl} import org.apache.spark.sql.execution.streaming._ -import org.apache.spark.sql.kafka010.KafkaSource._ import org.apache.spark.sql.kafka010.KafkaSourceProvider._ import org.apache.spark.sql.types._ diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala index 748d623a0a32a..3ace0874674b6 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala @@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.kafka010.KafkaConfigUpdater import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability, TableProvider} +import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability} import org.apache.spark.sql.connector.read.{Batch, Scan, ScanBuilder} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, SupportsTruncate, WriteBuilder} diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index fe783ffe53a3b..08f673455d729 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -1178,7 +1178,6 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase { } testWithUninterruptibleThread("minPartitions is supported") { - import testImplicits._ val topic = newTopic() val tp = new TopicPartition(topic, 0) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala index c5f3086b38c99..43ed4a8378a8c 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala @@ -26,7 +26,6 @@ import javax.security.auth.login.Configuration import scala.collection.JavaConverters._ import scala.io.Source -import scala.util.Random import scala.util.control.NonFatal import com.google.common.io.Files @@ -38,13 +37,12 @@ import org.apache.hadoop.minikdc.MiniKdc import org.apache.hadoop.security.UserGroupInformation import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.clients.admin._ -import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.clients.producer._ import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.config.SaslConfigs import org.apache.kafka.common.network.ListenerName import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT} -import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} +import org.apache.kafka.common.serialization.StringSerializer import org.apache.kafka.common.utils.SystemTime import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import org.apache.zookeeper.server.auth.SASLAuthenticationProvider diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala index 307a69f9b84c5..bc790418decd3 100644 --- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala +++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala @@ -36,7 +36,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S import org.apache.kafka.common.security.scram.ScramLoginModule import org.apache.kafka.common.security.token.delegation.DelegationToken -import org.apache.spark.{SparkConf, SparkEnv} +import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.security.HadoopDelegationTokenManager import org.apache.spark.internal.Logging diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala index ac81f92f86109..c0724909bc350 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala @@ -19,8 +19,6 @@ package org.apache.spark.streaming.kafka010.mocks import java.util.concurrent.{ScheduledFuture, TimeUnit} -import scala.collection.mutable.PriorityQueue - import kafka.utils.Scheduler import org.apache.kafka.common.utils.Time import org.jmock.lib.concurrent.DeterministicScheduler diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala index 11e949536f2b6..770eb2d89d522 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala @@ -21,7 +21,6 @@ import java.util.concurrent._ import scala.util.control.NonFatal import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason import org.apache.spark.internal.Logging import org.apache.spark.streaming.Duration diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala index 8c3931a1c87fd..e778d083b3f70 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala @@ -50,8 +50,6 @@ private[kinesis] class KinesisInputDStream[T: ClassTag]( val metricsEnabledDimensions: Set[String] ) extends ReceiverInputDStream[T](_ssc) { - import KinesisReadConfigurations._ - private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala index d26acf924c0a3..7bc86c4871cfb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml -import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} - /** * ==ML attributes== * diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala index cd245dd723348..2c7186015d400 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala @@ -17,13 +17,10 @@ package org.apache.spark.ml.feature -import scala.collection.mutable.ArrayBuilder - import org.apache.hadoop.fs.Path import org.apache.spark.annotation.Since import org.apache.spark.ml._ -import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute} import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ @@ -31,7 +28,7 @@ import org.apache.spark.ml.stat.Summarizer import org.apache.spark.ml.util._ import org.apache.spark.sql._ import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.types.StructType /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala index 6ff970cc72dfd..ac63024768d77 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml -import org.apache.spark.ml.feature.{HashingTF, IDF, IDFModel, VectorAssembler} - /** * == Feature transformers == * diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index a0e5924a7ee3a..088f6a682be82 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats import org.json4s.JsonDSL._ -import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext, SparkException} +import org.apache.spark.{Partitioner, SparkException} import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala index 517179c0eb9ae..ed41169070c59 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala @@ -17,7 +17,6 @@ package org.apache.spark.ml.recommendation -import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{Encoder, Encoders} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index f88f3fce61b33..75262ac4fe06b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -30,7 +30,6 @@ import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{DataValidators, Loader, Saveable} import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession -import org.apache.spark.storage.StorageLevel /** * Classification model trained using Multinomial/Binary Logistic Regression. diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala index 2252151af306b..cc8982f338702 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.ml.fpm import org.apache.spark.ml.util.MLTest -import org.apache.spark.sql.DataFrame class PrefixSpanSuite extends MLTest { diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index a30c47293c543..a0e17a4b40fd2 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -507,8 +507,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest val residualDeviancesR = Array(3.809296, 3.70055) - import GeneralizedLinearRegression._ - var idx = 0 val link = "log" val dataset = datasetPoissonLogWithZero @@ -790,8 +788,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest val expected = Seq(0.5108256, 0.1201443, 1.600000, 1.886792, 0.625, 0.530, -0.4700036, -0.6348783, 1.325782, 1.463641) - import GeneralizedLinearRegression._ - var idx = 0 for (family <- GeneralizedLinearRegression.supportedFamilyNames.sortWith(_ < _)) { for (useWeight <- Seq(false, true)) { diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala index 68ba57c0d5fc8..e438a4135908e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala @@ -29,7 +29,6 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext { import testImplicits._ import Summarizer._ - import SummaryBuilderImpl._ private case class ExpectedMetrics( mean: Vector, diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala index 2a83d0aaf9699..3ca6816ce7c0d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.ml.tree.impl import scala.annotation.tailrec import scala.collection.mutable -import scala.language.implicitConversions import org.apache.spark.SparkFunSuite import org.apache.spark.ml.classification.DecisionTreeClassificationModel diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala index dd0139b94f098..c5bf202a2d337 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala @@ -19,7 +19,6 @@ package org.apache.spark.ml.util import java.io.{File, IOException} -import org.json4s.JNothing import org.scalatest.Suite import org.apache.spark.{SparkException, SparkFunSuite} diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala index d2c4832b12bac..19e9fe4bdb30e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala @@ -23,10 +23,7 @@ import org.dmg.pmml.PMML import org.scalatest.Suite import org.apache.spark.SparkContext -import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ -import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.Dataset trait PMMLReadWriteTest extends TempDirectory { self: Suite => /** diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index 56d41403f74cc..8f311bbf9f840 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.mllib.clustering import java.util.{ArrayList => JArrayList} import breeze.linalg.{argmax, argtopk, max, DenseMatrix => BDM} -import org.scalatest.Assertions import org.apache.spark.SparkFunSuite import org.apache.spark.graphx.Edge diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index 9d7177e0a149e..0e789821aa5f3 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -22,7 +22,7 @@ import java.{util => ju} import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV} import org.apache.spark.{SparkException, SparkFunSuite} -import org.apache.spark.mllib.linalg.{DenseMatrix, DenseVector, Matrices, Matrix, SparseMatrix, SparseVector, Vectors} +import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ diff --git a/pom.xml b/pom.xml index ee88e11046803..3ae2e7420e154 100644 --- a/pom.xml +++ b/pom.xml @@ -164,6 +164,7 @@ 3.2.2 2.12.10 2.12 + -Ywarn-unused-import 2.0.0 --test @@ -2537,6 +2538,7 @@ -deprecation -feature -explaintypes + ${scalac.arg.unused-imports} -target:jvm-1.8 @@ -3266,6 +3268,7 @@ 2.13.3 2.13 + -Wconf:cat=unused-imports:e diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala index 4ffa8beaf4740..90af9ec299efc 100644 --- a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala +++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala @@ -18,17 +18,12 @@ package org.apache.spark.repl import java.io._ -import java.nio.file.Files import scala.tools.nsc.interpreter.SimpleReader -import org.apache.log4j.{Level, LogManager, PropertyConfigurator} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SparkContext, SparkFunSuite} -import org.apache.spark.internal.Logging -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll { test("propagation of local properties") { diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index 5428fa4ee9df7..f696e93e9cef2 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -28,7 +28,6 @@ import java.util.Collections import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider} import scala.io.Source -import scala.language.implicitConversions import com.google.common.io.Files import org.mockito.ArgumentMatchers.{any, anyString} diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 95d908cec5de0..6566d29d16e91 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -23,7 +23,7 @@ import java.nio.file.Files import org.apache.log4j.{Level, LogManager, PropertyConfigurator} import org.scalatest.BeforeAndAfterAll -import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala index d78f04dcc40e6..222e19c5e20f1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features import scala.collection.JavaConverters._ -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder} import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala index f4e1a3a326729..9de7686c8a9c0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.k8s.features -import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, VolumeBuilder, VolumeMountBuilder} import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala index c029b248f7ea4..863cb28bc827c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala @@ -35,7 +35,6 @@ import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference import org.apache.spark.internal.Logging import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT import org.apache.spark.resource.ResourceProfile -import org.apache.spark.scheduler.cluster.SchedulerBackendUtils import org.apache.spark.util.{Clock, Utils} private[spark] class ExecutorPodsAllocator( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala index cc5c2f4b6325d..151e98ba17e3b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala @@ -25,7 +25,6 @@ import io.fabric8.kubernetes.client.Config import org.apache.spark.SparkContext import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.k8s.Config._ -import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} import org.apache.spark.util.{SystemClock, ThreadUtils} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala index 83d9481e6f2b0..0567f32c23134 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala @@ -21,7 +21,6 @@ import io.fabric8.kubernetes.api.model.Pod import org.apache.spark.SparkConf import org.apache.spark.deploy.k8s.Config._ -import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource} /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala index 7c231586af935..ef57a4b861508 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.k8s import scala.collection.JavaConverters._ -import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, PodBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder} import org.apache.spark.SparkFunSuite diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala index 26bd317de8ec6..4d4c4baeb12c0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala @@ -26,7 +26,6 @@ import org.mockito.Mockito.{mock, never, verify, when} import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} -import org.apache.spark.deploy.k8s._ import org.apache.spark.internal.config.ConfigEntry abstract class PodBuilderSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala index 6a7366e9c6b7a..a44d465e35087 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala @@ -20,11 +20,8 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.k8s._ -import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.submit._ -import org.apache.spark.internal.config._ -import org.apache.spark.util.Utils class DriverCommandFeatureStepSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala index 18afd10395566..413371d056b26 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala @@ -25,7 +25,6 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod} import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ -import org.apache.spark.deploy.k8s.submit.JavaMainAppResource import org.apache.spark.internal.config._ import org.apache.spark.internal.config.UI._ import org.apache.spark.util.ManualClock diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala index e1c01dbdc7358..c078e69b8a14b 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala @@ -27,7 +27,6 @@ import io.fabric8.kubernetes.api.model.ConfigMap import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.Constants._ -import org.apache.spark.deploy.k8s.submit.JavaMainAppResource import org.apache.spark.util.{SparkConfWithEnv, Utils} class HadoopConfDriverFeatureStepSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala index 41ca3a94ce7a7..094fcb39782f4 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala @@ -26,14 +26,13 @@ import com.google.common.io.Files import io.fabric8.kubernetes.api.model.{ConfigMap, Secret} import org.apache.commons.codec.binary.Base64 import org.apache.hadoop.io.Text -import org.apache.hadoop.security.{Credentials, UserGroupInformation} +import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ -import org.apache.spark.deploy.k8s.submit.JavaMainAppResource import org.apache.spark.internal.config._ import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala index bbb89fd0a1c24..95ee37e3daa41 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.k8s._ class MountVolumesFeatureStepSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala index 528b755c41605..8401f7102ad8e 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala @@ -29,7 +29,7 @@ import org.mockito.stubbing.Answer import org.scalatest.BeforeAndAfter import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} -import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec, SparkPod} +import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec} import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.Fabric8Aliases._ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala index af980f0494369..0bf01e6b66427 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala @@ -21,7 +21,6 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer import io.fabric8.kubernetes.client.DefaultKubernetesClient import org.scalatest.concurrent.Eventually diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala index e50115d6f493f..ee44cb5f85835 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala @@ -22,7 +22,6 @@ import java.util.concurrent.CountDownLatch import java.util.zip.{ZipEntry, ZipOutputStream} import scala.collection.JavaConverters._ -import scala.util.Try import io.fabric8.kubernetes.client.dsl.ExecListener import okhttp3.Response @@ -32,7 +31,6 @@ import org.apache.hadoop.util.VersionInfo import org.apache.spark.{SPARK_VERSION, SparkException} import org.apache.spark.internal.Logging -import org.apache.spark.util.{Utils => SparkUtils} object Utils extends Logging { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala index be1834c0b5dea..0fbed4a220e68 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala @@ -16,8 +16,6 @@ */ package org.apache.spark.deploy.k8s.integrationtest.backend.cloud -import java.nio.file.Paths - import io.fabric8.kubernetes.client.{Config, DefaultKubernetesClient} import io.fabric8.kubernetes.client.utils.Utils import org.apache.commons.lang3.StringUtils diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala index c0cdcda14291f..e260fb8e25f4c 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala @@ -20,7 +20,6 @@ package org.apache.spark.deploy.mesos.ui import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.scheduler.cluster.mesos.MesosClusterScheduler import org.apache.spark.ui.{SparkUI, WebUI} -import org.apache.spark.ui.JettyUtils._ /** * UI that displays driver results from the [[org.apache.spark.deploy.mesos.MesosClusterDispatcher]] diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index bbe1ff495d8a6..efcef09132f5b 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala @@ -39,7 +39,7 @@ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.mesos.MesosExternalBlockStoreClient import org.apache.spark.resource.ResourceProfile -import org.apache.spark.rpc.{RpcEndpointAddress, RpcEndpointRef} +import org.apache.spark.rpc.RpcEndpointAddress import org.apache.spark.scheduler.{ExecutorProcessLost, TaskSchedulerImpl} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.Utils diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala index 981b8e9df1747..a5a2611be3765 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala @@ -17,7 +17,7 @@ package org.apache.spark.scheduler.cluster.mesos -import org.apache.mesos.Protos.{ContainerInfo, Environment, Image, NetworkInfo, Parameter, Secret, +import org.apache.mesos.Protos.{ContainerInfo, Image, NetworkInfo, Parameter, Secret, TaskState => MesosTaskState, Volume} import org.apache.mesos.Protos.ContainerInfo.{DockerInfo, MesosInfo} import org.apache.mesos.Protos.Environment.Variable diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 2be8835f77e36..b5a360167679e 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -29,10 +29,10 @@ import scala.util.control.NonFatal import com.google.common.base.Splitter import com.google.common.io.Files import org.apache.mesos.{MesosSchedulerDriver, Protos, Scheduler, SchedulerDriver} -import org.apache.mesos.Protos.{SlaveID => AgentID, TaskState => MesosTaskState, _} +import org.apache.mesos.Protos.{TaskState => MesosTaskState, _} import org.apache.mesos.Protos.FrameworkInfo.Capability import org.apache.mesos.Protos.Resource.ReservationInfo -import org.apache.mesos.protobuf.{ByteString, GeneratedMessageV3} +import org.apache.mesos.protobuf.GeneratedMessageV3 import org.apache.spark.{SparkConf, SparkContext, SparkException} import org.apache.spark.TaskState diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index d9262bbac6586..ede39063cf1bd 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -19,12 +19,11 @@ package org.apache.spark.deploy.yarn import java.io.File import java.nio.ByteBuffer -import java.util.{Collections, Locale} +import java.util.Collections import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap, ListBuffer} -import org.apache.hadoop.HadoopIllegalArgumentException import org.apache.hadoop.fs.Path import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.security.UserGroupInformation @@ -40,7 +39,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ import org.apache.spark.network.util.JavaUtils -import org.apache.spark.resource.ResourceProfile import org.apache.spark.util.Utils private[yarn] class ExecutorRunnable( diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala index 5640f7ede33df..7ac5beac76e20 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala @@ -21,7 +21,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap, Set} import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.yarn.api.records.{ContainerId, Resource} +import org.apache.hadoop.yarn.api.records.ContainerId import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest import org.apache.spark.SparkConf diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 0273de10993eb..09766bf97d8f3 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -26,7 +26,6 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, P import org.apache.hadoop.yarn.util.ConverterUtils import org.apache.spark.{SecurityManager, SparkConf} -import org.apache.spark.internal.config._ import org.apache.spark.launcher.YarnCommandBuilderUtils import org.apache.spark.resource.ExecutorResourceRequest import org.apache.spark.util.Utils diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala index 0c3d080cca254..d000287cb7a96 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala @@ -17,8 +17,6 @@ package org.apache.spark.launcher -import scala.collection.JavaConverters._ -import scala.collection.mutable.ListBuffer import scala.util.Properties /** diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala index 7f8dd590545c6..5b762f606112c 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala @@ -29,7 +29,6 @@ import org.scalatest.matchers.should.Matchers._ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging -import org.apache.spark.internal.config._ import org.apache.spark.internal.config.UI._ import org.apache.spark.util.{ResetSystemProperties, Utils} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index c65e181181e83..53c7f17ee6b2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} -import org.apache.spark.util.Utils /** @@ -894,10 +893,6 @@ trait ScalaReflection extends Logging { import universe._ - // The Predef.Map is scala.collection.immutable.Map. - // Since the map values can be mutable, we explicitly import scala.collection.Map at here. - import scala.collection.Map - /** * Any codes calling `scala.reflect.api.Types.TypeApi.<:<` should be wrapped by this method to * clean up the Scala reflection garbage automatically. Otherwise, it will leak some objects to diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 6eed152e6dd77..47a45b0e529c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index d3bb72badeb13..deeb8215d22c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableCatalog, TableChange} /** * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 51eb3d033ddc4..2fa6bf0acea67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.DataType /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 83acfb8d4a71c..98bd84fb94bd6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -18,11 +18,10 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.catalog.CatalogFunction import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} -import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog} +import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog} /** * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index db930cf7890e6..5643bf8b3a9b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.catalog -import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, NoSuchTableException} +import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.types.StructType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index 3d5c1855f6975..9ab38044e6a88 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.encoders -import java.io.ObjectInputStream - import scala.reflect.ClassTag import scala.reflect.runtime.universe.{typeTag, TypeTag} @@ -33,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, Initial import org.apache.spark.sql.catalyst.optimizer.{ReassignLambdaVariableID, SimplifyCasts} import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LeafNode, LocalRelation} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{DataType, ObjectType, StringType, StructField, StructType} +import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 1257cf6e787ce..5afc308e52ead 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions -import java.math.{BigDecimal => JavaBigDecimal} import java.time.ZoneId import java.util.Locale import java.util.concurrent.TimeUnit._ @@ -25,7 +24,7 @@ import java.util.concurrent.TimeUnit._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} -import org.apache.spark.sql.catalyst.expressions.Cast.{canCast, forceNullable, resolvableNullability} +import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 35b192cc5544a..1d23953484046 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -24,9 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.TreeNode -import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -63,7 +61,8 @@ import org.apache.spark.sql.types._ * functions. * - [[NamedExpression]]: An [[Expression]] that is named. * - [[TimeZoneAwareExpression]]: A common base trait for time zone aware expressions. - * - [[SubqueryExpression]]: A base interface for expressions that contain a [[LogicalPlan]]. + * - [[SubqueryExpression]]: A base interface for expressions that contain a + * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. * * - [[LeafExpression]]: an expression that has no child. * - [[UnaryExpression]]: an expression that has one child. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala index 6e2bd96784b94..0a69d5aa6b9ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType, UserDefinedType} +import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType} import org.apache.spark.util.Utils /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 9aa827a58d87a..1ff4a93cf0acd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -38,9 +38,8 @@ import org.apache.spark.metrics.source.CodegenMetrics import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, SQLOrderingUtil} +import org.apache.spark.sql.catalyst.util.{ArrayData, MapData, SQLOrderingUtil} import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS -import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform @@ -1555,8 +1554,8 @@ object CodeGenerator extends Logging { } /** - * Generates code creating a [[UnsafeArrayData]] or [[GenericArrayData]] based on - * given parameters. + * Generates code creating a [[UnsafeArrayData]] or + * [[org.apache.spark.sql.catalyst.util.GenericArrayData]] based on given parameters. * * @param arrayName name of the array to create * @param elementType data type of the elements in source array diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index 7404030b661c8..c246d07f189b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions.codegen -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala index 070570d8f20b2..27b1f89f70870 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala @@ -17,12 +17,8 @@ package org.apache.spark.sql.catalyst.expressions.codegen -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow} import org.apache.spark.sql.types.StructType -import org.apache.spark.unsafe.Platform abstract class UnsafeRowJoiner { def join(row1: UnsafeRow, row2: UnsafeRow): UnsafeRow diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index 9fef8e9415e72..4454afb6c099b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicReference import scala.collection.mutable import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedException} +import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedException} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.internal.SQLConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index f440534745ba1..53d6394d0d1f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -18,14 +18,11 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.immutable.TreeSet -import scala.collection.mutable import org.apache.spark.internal.Logging -import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala index d6adbe83584e3..0d5974af19ac3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.json import org.apache.spark.sql.catalyst.{InternalRow, StructFilters} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources import org.apache.spark.sql.types.StructType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala index 3dd79d153c236..0ff11ca49f3d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.types.StructType /** * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala index bfc36ec477a73..4434c29cbb3c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala @@ -17,10 +17,10 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, Window} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Window} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index 33b398e11cde9..ef3de4738c75c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFi import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.BooleanType import org.apache.spark.util.Utils diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 76b9bd03f216c..9aa7e3201ab1b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.optimizer -import java.time.LocalDate - import scala.collection.mutable import org.apache.spark.sql.catalyst.expressions._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index cb076f6e35184..11532d22204a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.CleanupAliases import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.expressions.aggregate._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala index d8d18b46bcc74..2c6a716a2ed48 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql.catalyst.plans.logical -import org.apache.spark.sql.catalyst.analysis.CheckAnalysis import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} +import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.util.Utils @@ -33,7 +32,7 @@ import org.apache.spark.util.Utils * analyzed flag set to true. * * The analyzer rules should use the various resolve methods, in lieu of the various transform - * methods defined in [[TreeNode]] and [[QueryPlan]]. + * methods defined in [[org.apache.spark.sql.catalyst.trees.TreeNode]] and [[QueryPlan]]. * * To prevent accidental use of the transform methods, this trait also overrides the transform * methods to throw exceptions in test mode, if they are used in the analyzer. @@ -44,7 +43,8 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => /** * Recursively marks all nodes in this plan tree as analyzed. - * This should only be called by [[CheckAnalysis]]. + * This should only be called by + * [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]]. */ private[catalyst] def setAnalyzed(): Unit = { if (!_analyzed) { @@ -155,7 +155,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => * In analyzer, use [[resolveOperatorsDown()]] instead. If this is used in the analyzer, * an exception will be thrown in test mode. It is however OK to call this function within * the scope of a [[resolveOperatorsDown()]] call. - * @see [[TreeNode.transformDown()]]. + * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformDown()]]. */ override def transformDown(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { assertNotAnalysisRule() @@ -164,7 +164,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => /** * Use [[resolveOperators()]] in the analyzer. - * @see [[TreeNode.transformUp()]] + * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformUp()]] */ override def transformUp(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { assertNotAnalysisRule() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala index 49f89bed154bb..1346f80247a1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala @@ -20,18 +20,10 @@ package org.apache.spark.sql.catalyst.plans.logical import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import java.math.{MathContext, RoundingMode} -import scala.util.control.NonFatal - import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream} -import org.apache.spark.internal.Logging -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate._ -import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.Utils diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 4e7923b45822b..f96e07863fa69 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -17,17 +17,14 @@ package org.apache.spark.sql.catalyst.plans.logical -import scala.collection.mutable - import org.apache.spark.sql.catalyst.AliasIdentifier -import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation} +import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction} +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.catalyst.util.truncatedString -import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.random.RandomSampler diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala index a325b61fcc5a9..4b5e278fccdfb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.util.Utils /** * A general hint for the child that is not yet resolved. This node is generated by the parser and diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala index 6925423f003ba..8e58c4f314df0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap} +import org.apache.spark.sql.catalyst.expressions.AttributeMap import org.apache.spark.sql.catalyst.plans.logical.{Project, Statistics} object ProjectEstimation { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 1ab7bbdcff697..ff2b366a9bc75 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, import org.apache.spark.sql.catalyst.errors._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning} import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat import org.apache.spark.sql.catalyst.util.truncatedString diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala index ebbf241088f80..44203316edd94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala @@ -22,7 +22,6 @@ import scala.reflect.ClassTag import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData} import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.array.ByteArrayMethods object ArrayData { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala index 1a78422e57a4c..46860ae1771de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.util -import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId} +import java.time.{LocalDate, LocalDateTime, LocalTime} import java.time.temporal.ChronoField import java.util.{Calendar, TimeZone} import java.util.Calendar.{DAY_OF_MONTH, DST_OFFSET, ERA, HOUR_OF_DAY, MINUTE, MONTH, SECOND, YEAR, ZONE_OFFSET} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala index b09ccff39f842..f541411daeff4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala @@ -22,9 +22,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability} -import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics} +import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics} import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream} -import org.apache.spark.sql.connector.write.WriteBuilder import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.Utils @@ -36,8 +35,9 @@ import org.apache.spark.util.Utils * @param output the output attributes of this relation. * @param catalog catalogPlugin for the table. None if no catalog is specified. * @param identifier the identifier for the table. None if no identifier is defined. - * @param options The options for this table operation. It's used to create fresh [[ScanBuilder]] - * and [[WriteBuilder]]. + * @param options The options for this table operation. It's used to create fresh + * [[org.apache.spark.sql.connector.read.ScanBuilder]] and + * [[org.apache.spark.sql.connector.write.WriteBuilder]]. */ case class DataSourceV2Relation( table: Table, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 043c88f88843c..7556a19f0d316 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -31,7 +31,7 @@ import org.apache.spark.annotation.Stable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.expressions.{Cast, Expression} -import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer} import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat import org.apache.spark.sql.internal.SQLConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 6be6d81ec3bb7..960e174f9c368 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.types -import java.lang.{Long => JLong} import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, RoundingMode} import scala.util.Try diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala index ac18b0f79b5f3..1962fca66c059 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala @@ -17,14 +17,13 @@ package org.apache.spark.sql import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDate} +import java.time.LocalDate import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.encoders.{ExamplePoint, ExamplePointUDT} import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala index e8c7aed6d72ce..164bbd7f34d04 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala @@ -134,7 +134,6 @@ object ScroogeLikeExample { } trait ScroogeLikeExample extends Product1[Int] with Serializable { - import ScroogeLikeExample._ def x: Int diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala index f433229595e9e..1c849fa21e4ea 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode} import org.apache.spark.sql.connector.InMemoryTableCatalog import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} -import org.apache.spark.sql.connector.expressions.{Expressions, LogicalExpressions} +import org.apache.spark.sql.connector.expressions.Expressions import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala index e449b9669cc72..ea2284e5420bd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala index 8cf41a02320d2..7566545f98355 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet} import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode, LocalRelation} +import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode} import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, TimestampType} class StreamingJoinHelperSuite extends AnalysisTest { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index 918db903a783f..3be417de472c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder} -import org.apache.spark.unsafe.types.CalendarInterval /** A dummy command for testing unsupported operations. */ case class DummyCommand() extends Command diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index ff33324c3bb18..bc2b93e5390da 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -28,7 +28,7 @@ import scala.util.Random import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.sql.{RandomDataGenerator, Row} -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, JavaTypeInference, ScalaReflection} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.ScroogeLikeExample import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer, UnresolvedDeserializer} import org.apache.spark.sql.catalyst.dsl.expressions._ @@ -37,9 +37,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.unsafe.types.UTF8String class InvokeTargetClass extends Serializable { def filterInt(e: Any): Any = e.asInstanceOf[Int] > 0 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala index f56ec49724adb..64b619ca7766b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.types.IntegerType class SubExprEvaluationRuntimeSuite extends SparkFunSuite { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala index 303fa137d8925..53e8ee9fbe715 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.catalyst.util.{ArrayData, QuantileSummaries} import org.apache.spark.sql.catalyst.util.QuantileSummaries.Stats -import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType} +import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType} import org.apache.spark.util.SizeEstimator class ApproximatePercentileSuite extends SparkFunSuite { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala index 67e3bc69543e8..d660afb7f8a05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.types.{BooleanType, IntegerType} +import org.apache.spark.sql.types.IntegerType class CodeBlockSuite extends SparkFunSuite { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala index f40691bd1a038..51c751923e414 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class EliminateDistinctSuite extends PlanTest { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index 11ec037c94f73..c518fdded2112 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -25,8 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, TimestampType} +import org.apache.spark.sql.types.{IntegerType, StringType} import org.apache.spark.unsafe.types.CalendarInterval class FilterPushdownSuite extends PlanTest { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala index 8785bc7cd36cb..17dfc7f3f18f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class PullupCorrelatedPredicatesSuite extends PlanTest { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala index 0ccf8aea660b2..c981cee55d0fa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala @@ -17,10 +17,8 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala index 9878969959bfd..dcd2fbbf00529 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Range} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 4ac5c8d0561d9..f93c0dcf59f4c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -21,12 +21,11 @@ import java.util.Locale import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView} -import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource} +import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first} import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType} import org.apache.spark.unsafe.types.UTF8String diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala index 1cf888519077a..878fae4c547b3 100755 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.LeftOuter import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.{ColumnStatsMap, FilterEstimation} +import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.ColumnStatsMap import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 30792c9bacd53..c164835c753e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql import scala.collection.JavaConverters._ -import scala.language.implicitConversions import org.apache.spark.annotation.Stable import org.apache.spark.internal.Logging diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 991f02d43bc47..31b4c158aa67b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -470,7 +470,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { def insertInto(tableName: String): Unit = { import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - import org.apache.spark.sql.connector.catalog.CatalogV2Util._ assertNotBucketed("insertInto") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 3d431d6ff13a9..2c38a65ac2106 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -21,7 +21,6 @@ import java.io.{ByteArrayOutputStream, CharArrayWriter, DataOutputStream} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer -import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal @@ -63,7 +62,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils import org.apache.spark.storage.StorageLevel import org.apache.spark.unsafe.array.ByteArrayMethods -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils private[sql] object Dataset { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index 7e430b682faf4..c40ce0f4777c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql import java.util.Locale import scala.collection.JavaConverters._ -import scala.language.implicitConversions import org.apache.spark.annotation.Stable import org.apache.spark.api.python.PythonEvalType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala index e9bc25d489718..2f46fa8073bbc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala @@ -18,10 +18,8 @@ package org.apache.spark.sql import org.apache.spark.annotation.Stable -import org.apache.spark.internal.Logging import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.SQLConf.{DeprecatedConfig, RemovedConfig} /** * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index 0f6ae9c5d44e1..cceb38558946e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -30,9 +30,9 @@ import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} -import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF} +import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.execution.python.UserDefinedPythonFunction -import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction} +import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction} import org.apache.spark.sql.types.DataType import org.apache.spark.util.Utils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala index c6a644f9f2e29..1436574c0d90a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalog import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Evolving, Experimental, Stable} +import org.apache.spark.annotation.Stable import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset} import org.apache.spark.sql.types.StructType import org.apache.spark.storage.StorageLevel diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index bd9120a1fbe78..303ae47f06b84 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 5f72d6005a8dd..f163d85914bc9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression} import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper -import org.apache.spark.sql.execution.columnar.{DefaultCachedBatchSerializer, InMemoryRelation} +import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala index e1b9c8f430c56..b0bbb52bc4990 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala @@ -16,8 +16,6 @@ */ package org.apache.spark.sql.execution -import scala.collection.mutable - import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 45d28ddb42fc3..44636beeec7fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -25,7 +25,6 @@ import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index dcec0b019da28..08950c827f5aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, ZoneOffset} import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand} import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec} import org.apache.spark.sql.internal.SQLConf diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala index 8746cc6f650d7..bbe3f50492d9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, PartialMerge} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.aggregate.BaseAggregateExec import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase -import org.apache.spark.sql.execution.window.WindowExec import org.apache.spark.sql.internal.SQLConf /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 6c42c051fbba6..85476bcd21e19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -27,7 +27,7 @@ import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.TerminalNode import org.apache.spark.sql.SaveMode -import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index ba3d83714c302..e9b1aa81895f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.python._ import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.sources.MemoryPlan import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery} +import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType /** @@ -312,8 +312,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { /** * Used to plan streaming aggregation queries that are computed incrementally as part of a - * [[StreamingQuery]]. Currently this rule is injected into the planner - * on-demand, only when planning in a [[org.apache.spark.sql.execution.streaming.StreamExecution]] + * [[org.apache.spark.sql.streaming.StreamingQuery]]. Currently this rule is injected into the + * planner on-demand, only when planning in a + * [[org.apache.spark.sql.execution.streaming.StreamExecution]] */ object StatefulAggregationStrategy extends Strategy { override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index a8905ca530005..b2963457e22db 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.aggregate.HashAggregateExec diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala index 011acbf1b22a4..3760782515e97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Join, LogicalPlan, NO_BROADCAST_HASH} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.internal.SQLConf /** * This optimization rule detects a join child that has a high ratio of empty partitions and diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala index 9914eddd53a3d..bff142315f8ff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} -import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.SparkPlan /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala index c3c7358641fcb..71540dbd39f95 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala @@ -21,7 +21,6 @@ import scala.collection.concurrent.TrieMap import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, ReusedSubqueryExec, SparkPlan} -import org.apache.spark.sql.internal.SQLConf case class ReuseAdaptiveSubquery( reuseMap: TrieMap[SparkPlan, BaseSubqueryExec]) extends Rule[SparkPlan] { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala index cdc57dbc7dcc2..aae3d922b28a5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike} +import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike /** * A simple implementation of [[Cost]], which takes a number of [[Long]] as the cost value. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala index 75651500954cf..1c140d7b6955f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType import org.apache.spark.unsafe.KVIterator -import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter class ObjectAggregationIterator( partIndex: Int, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala index b5372bcca89dd..9f2cf84a6d7e6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala @@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection, U import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, TypedImperativeAggregate} import org.apache.spark.sql.execution.UnsafeKVExternalSorter import org.apache.spark.sql.types.StructType -import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter /** * An aggregation map that supports using safe `SpecificInternalRow`s aggregation buffers, so that diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala index 492b0f2da77cb..deb9e76c51760 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala @@ -19,12 +19,13 @@ package org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction} +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.execution.metric.SQLMetric /** - * An iterator used to evaluate [[AggregateFunction]]. It assumes the input rows have been - * sorted by values of [[groupingExpressions]]. + * An iterator used to evaluate + * [[org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction]]. + * It assumes the input rows have been sorted by values of [[groupingExpressions]]. */ class SortBasedAggregationIterator( partIndex: Int, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala index 44bc9c2e3a9d0..41e247a02759b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala @@ -17,16 +17,12 @@ package org.apache.spark.sql.execution.aggregate -import scala.reflect.runtime.universe.TypeTag - import org.apache.spark.internal.Logging -import org.apache.spark.sql.{Column, Row} +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _} -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} -import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateSafeProjection} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 7334ea1e27284..006fa0fba4138 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -21,7 +21,7 @@ import java.util.concurrent.{Future => JFuture} import java.util.concurrent.TimeUnit._ import scala.collection.mutable -import scala.concurrent.{ExecutionContext} +import scala.concurrent.ExecutionContext import scala.concurrent.duration.Duration import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext} @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.execution.metric.SQLMetrics import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.{LongType, StructType} -import org.apache.spark.util.{ThreadUtils, Utils} +import org.apache.spark.util.ThreadUtils import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler} /** Physical plan for Project. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala index 40a2a7a2359e0..a4e5be01b45a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala @@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SparkPlan} import org.apache.spark.sql.execution.joins.{BaseJoinExec, ShuffledHashJoinExec, SortMergeJoinExec} -import org.apache.spark.sql.internal.SQLConf /** * This rule coalesces one side of the `SortMergeJoin` and `ShuffledHashJoin` diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala index bb59f44abc761..6b195b3b49f09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan} import org.apache.spark.sql.execution.aggregate.BaseAggregateExec import org.apache.spark.sql.execution.exchange.Exchange -import org.apache.spark.sql.internal.SQLConf /** * Disable unnecessary bucketed table scan based on actual physical query plan. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala index 45557bfbada6c..d2f65b745f35a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.columnar import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.unsafe.types.UTF8String class ColumnStatisticsSchema(a: Attribute) extends Serializable { val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index 6495463be02c0..f86f62bbf853b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path, PathFilter} import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} -import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable} +import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.plans.logical._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala index a1bb5af1ab723..a56007f5d5d95 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker -import org.apache.spark.sql.execution.datasources.FileFormatWriter import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.util.SerializableConfiguration @@ -35,7 +34,7 @@ trait DataWritingCommand extends Command { /** * The input query plan that produces the data to be written. * IMPORTANT: the input query plan MUST be analyzed, so that we can carry its output columns - * to [[FileFormatWriter]]. + * to [[org.apache.spark.sql.execution.datasources.FileFormatWriter]]. */ def query: LogicalPlan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index 61ee6d7f4a299..00accedf21556 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command import org.apache.spark.internal.Logging import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.types.{StringType, StructField, StructType} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala index ef6b0bba1628e..f99dc8d9f1a8e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala @@ -21,7 +21,6 @@ import java.util.Locale import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index 68c47d6a6dfaa..6ed40aacd1125 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -21,7 +21,6 @@ import java.net.URI import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog._ -import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index d76b4b8894783..330a503e5f8e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException} import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource} -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.types.{StringType, StructField, StructType} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index b55bed9cd7fc0..34ded5d456d09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.datasources import java.util.{Locale, ServiceConfigurationError, ServiceLoader} import scala.collection.JavaConverters._ -import scala.language.implicitConversions import scala.util.{Failure, Success, Try} import org.apache.hadoop.conf.Configuration diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala index 28a63c26604ec..1149767bdade2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala @@ -22,11 +22,12 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable} +import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} /** * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]]. - * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into view `t` fails + * E.g, with temporary view `t` using + * [[org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2]], inserting into view `t` fails * since there is no corresponding physical plan. * This is a temporary hack for making current data source V2 work. It should be * removed when Catalog support of file data source v2 is finished. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala index d278802e6c9f2..a0b191e60f376 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala @@ -17,10 +17,6 @@ package org.apache.spark.sql.execution.datasources -import java.util.Locale - -import scala.collection.mutable - import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.execution.FileRelation diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala index 868e5371426c0..1d7abe5b938c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala @@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.datasources import org.apache.hadoop.mapreduce.TaskAttemptContext -import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.StructType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala index 5341e22f5e670..fed9614347f6a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} -import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.sql.types.StructType /** * An abstract class that represents [[FileIndex]]s that are aware of partitioned tables. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 4087efc486a4f..796c23c7337d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, Literal} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal} import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala index c3dd6939ec5bd..0959d8799f5a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala @@ -21,8 +21,6 @@ import java.io.Closeable import org.apache.hadoop.mapreduce.RecordReader -import org.apache.spark.sql.catalyst.InternalRow - /** * An adaptor from a Hadoop [[RecordReader]] to an [[Iterator]] over the values returned. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala index 61e0154a0ffe8..76a6a48ca0b0c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType} +import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} /** * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala index 637ce68ec05a2..b241243363746 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala @@ -22,14 +22,14 @@ import java.sql.Timestamp import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path} +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkException import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter -import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile} import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala index 10146be44e8bf..d8fa768a604f4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala @@ -23,8 +23,6 @@ import java.nio.charset.{Charset, StandardCharsets} import com.univocity.parsers.csv.CsvParser import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.hadoop.io.{LongWritable, Text} -import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.lib.input.FileInputFormat diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index e25ce53941ff6..87ca78db59b29 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.jdbc -import java.sql.{Connection, PreparedStatement, ResultSet, SQLException} +import java.sql.{Connection, PreparedStatement, ResultSet} import scala.util.control.NonFatal @@ -46,8 +46,8 @@ object JDBCRDD extends Logging { * @param options - JDBC options that contains url, table and other information. * * @return A StructType giving the table's Catalyst schema. - * @throws SQLException if the table specification is garbage. - * @throws SQLException if the table contains an unsupported type. + * @throws java.sql.SQLException if the table specification is garbage. + * @throws java.sql.SQLException if the table contains an unsupported type. */ def resolveTable(options: JDBCOptions): StructType = { val url = options.url diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala index e0fa4584185e9..f2f6f60cb1dde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala @@ -17,13 +17,10 @@ package org.apache.spark.sql.execution.datasources.json -import java.nio.charset.{Charset, StandardCharsets} - import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext} -import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.ExprUtils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala index 4ab009c6bd014..32ce7185f7381 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala @@ -23,7 +23,6 @@ import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData} import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseJulianToGregorianDays import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 95f19f9dcee64..1901f5575470e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -35,7 +35,6 @@ import org.apache.parquet.hadoop._ import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel import org.apache.parquet.hadoop.codec.CodecConfig import org.apache.parquet.hadoop.util.ContextUtil -import org.apache.parquet.schema.MessageType import org.apache.spark.{SparkException, TaskContext} import org.apache.spark.internal.Logging @@ -504,7 +503,8 @@ object ParquetFileFormat extends Logging { /** * Reads Spark SQL schema from a Parquet footer. If a valid serialized Spark SQL schema string * can be found in the file metadata, returns the deserialized [[StructType]], otherwise, returns - * a [[StructType]] converted from the [[MessageType]] stored in this footer. + * a [[StructType]] converted from the [[org.apache.parquet.schema.MessageType]] stored in this + * footer. */ def readSchemaFromFooter( footer: Footer, converter: ParquetToSparkSchemaConverter): StructType = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala index e7753cec681cf..70f6726c581a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala @@ -21,7 +21,6 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce._ import org.apache.parquet.hadoop.ParquetOutputFormat -import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.OutputWriter diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 6ef56af927129..f65aef95b6c38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -26,9 +26,9 @@ import scala.collection.mutable.ArrayBuffer import org.apache.parquet.column.Dictionary import org.apache.parquet.io.api.{Binary, Converter, GroupConverter, PrimitiveConverter} -import org.apache.parquet.schema.{GroupType, MessageType, OriginalType, Type} -import org.apache.parquet.schema.OriginalType.{INT_32, LIST, UTF8} -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, DOUBLE, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96} +import org.apache.parquet.schema.{GroupType, OriginalType, Type} +import org.apache.parquet.schema.OriginalType.LIST +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96} import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow @@ -107,11 +107,15 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd * }}} * 5 converters will be created: * - * - a root [[ParquetRowConverter]] for [[MessageType]] `root`, which contains: - * - a [[ParquetPrimitiveConverter]] for required [[INT_32]] field `f1`, and + * - a root [[ParquetRowConverter]] for [[org.apache.parquet.schema.MessageType]] `root`, + * which contains: + * - a [[ParquetPrimitiveConverter]] for required + * [[org.apache.parquet.schema.OriginalType.INT_32]] field `f1`, and * - a nested [[ParquetRowConverter]] for optional [[GroupType]] `f2`, which contains: - * - a [[ParquetPrimitiveConverter]] for required [[DOUBLE]] field `f21`, and - * - a [[ParquetStringConverter]] for optional [[UTF8]] string field `f22` + * - a [[ParquetPrimitiveConverter]] for required + * [[org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE]] field `f21`, and + * - a [[ParquetStringConverter]] for optional [[org.apache.parquet.schema.OriginalType.UTF8]] + * string field `f22` * * When used as a root converter, [[NoopUpdater]] should be used since root converters don't have * any "parent" container. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index e45514385e292..3a2a642b870f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -29,8 +29,6 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSc import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform} import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.sql.sources.InsertableRelation import org.apache.spark.sql.types.{AtomicType, StructType} import org.apache.spark.sql.util.SchemaUtils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 0cbcad1f48026..0ca442baeea2f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table} import org.apache.spark.sql.types.StructType case class DescribeTableExec( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala index f7b4317ad65e2..777ee9d385f12 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala @@ -21,7 +21,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{CatalogPlugin, SupportsNamespaces} +import org.apache.spark.sql.connector.catalog.CatalogPlugin /** * Physical plan node for dropping a namespace. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala index e4de70d4ee88f..8cf59f3a59323 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala @@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.catalog.{Table, TableProvider} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala index 95715fd1af56e..7ceee1edee180 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table} /** * Physical plan node for showing table properties. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala index 5dfd2e52706d0..cb4a2994de1f4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 -import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table} +import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.streaming.StreamingRelation import org.apache.spark.sql.types.BooleanType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala index 1ca3fd42c0597..f24fb95acb922 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala @@ -22,8 +22,6 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex -import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.Utils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala index 2f9387532c25c..0dbc74395afb1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala @@ -19,10 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.orc import scala.collection.JavaConverters._ -import org.apache.orc.mapreduce.OrcInputFormat - import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters} import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex import org.apache.spark.sql.execution.datasources.orc.OrcFilters diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala index 6973f55e8dca0..93d7db44f2285 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeSeq, BindReferences, DynamicPruningExpression, DynamicPruningSubquery, Expression, ListQuery, Literal, PredicateHelper} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{InSubqueryExec, QueryExecution, SparkPlan, SubqueryBroadcastExec} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index cf38fee055ca5..ebbc8a4df5643 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} -import org.apache.spark.sql.internal.SQLConf /** * Ensures that the [[org.apache.spark.sql.catalyst.plans.physical.Partitioning Partitioning]] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala index aeaf59b7f0f4a..e58733b35990a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expre import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType import org.apache.spark.sql.vectorized.ColumnarBatch diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala index 298d63478b63e..7c476ab03c002 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala @@ -26,7 +26,7 @@ import org.apache.spark.api.python.ChainedPythonFunctions import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} +import org.apache.spark.sql.execution.UnaryExecNode import org.apache.spark.sql.types.{DataType, StructField, StructType} import org.apache.spark.util.Utils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala index 59f5a7078a151..ae7b7ef23512c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala @@ -26,7 +26,7 @@ import org.apache.arrow.vector.VectorSchemaRoot import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter} import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel -import org.apache.spark.{SparkException, TaskContext} +import org.apache.spark.TaskContext import org.apache.spark.api.r._ import org.apache.spark.api.r.SpecialLengths import org.apache.spark.broadcast.Broadcast diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala index eb8b8af7950b2..747094b7791c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder -import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeReference, Expression, Literal, SortOrder, UnsafeRow} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder, UnsafeRow} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution} import org.apache.spark.sql.execution._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala index c2278e8659147..893639a86c88c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala @@ -19,14 +19,12 @@ package org.apache.spark.sql.execution.streaming import java.io._ import java.nio.charset.StandardCharsets -import java.util.{ConcurrentModificationException, EnumSet, UUID} +import java.util.ConcurrentModificationException import scala.reflect.ClassTag import org.apache.commons.io.IOUtils -import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ -import org.apache.hadoop.fs.permission.FsPermission import org.json4s.NoTypeHints import org.json4s.jackson.Serialization diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index aba0463f56cd7..d6be33c76e937 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -40,7 +40,6 @@ import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table} import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream} import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate} import org.apache.spark.sql.connector.write.streaming.StreamingWrite -import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.command.StreamingExplainCommand import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress import org.apache.spark.sql.internal.SQLConf diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala index 516afbea5d9de..fc0cfc30ff2fd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala @@ -31,14 +31,14 @@ import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream -import org.apache.spark.sql.streaming.StreamingQuery /** - * Contains metadata associated with a [[StreamingQuery]]. This information is written - * in the checkpoint location the first time a query is started and recovered every time the query - * is restarted. + * Contains metadata associated with a [[org.apache.spark.sql.streaming.StreamingQuery]]. + * This information is written in the checkpoint location the first time a query is started + * and recovered every time the query is restarted. * - * @param id unique id of the [[StreamingQuery]] that needs to be persisted across restarts + * @param id unique id of the [[org.apache.spark.sql.streaming.StreamingQuery]] + * that needs to be persisted across restarts */ case class StreamMetadata(id: String) { def json: String = Serialization.write(this)(StreamMetadata.format) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala index 71792facf698a..2f62dbd7ec578 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala @@ -21,13 +21,13 @@ import scala.reflect.ClassTag import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.internal.Logging -import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition, ZippedPartitionsRDD2} +import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition} import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, BoundReference, Expression, NamedExpression, PredicateHelper} import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._ import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.streaming.WatermarkSupport.watermarkExpression -import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProvider, StateStoreProviderId} +import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProviderId} /** @@ -200,8 +200,8 @@ object StreamingSymmetricHashJoinHelper extends Logging { /** * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks' * preferred location is based on which executors have the required join state stores already - * loaded. This class is a variant of [[ZippedPartitionsRDD2]] which only changes signature - * of `f`. + * loaded. This class is a variant of [[org.apache.spark.rdd.ZippedPartitionsRDD2]] which only + * changes signature of `f`. */ class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala index 6d5e7fd5c5cf3..60c66d863a3c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution.streaming.sources -import org.apache.spark.api.python.PythonException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.execution.streaming.Sink diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala index 507f860e0452a..fa51dd61a939b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala @@ -21,12 +21,13 @@ import scala.collection.mutable import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage} +import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage} import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory /** - * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery - * to a [[BatchWrite]] on the driver. + * A simple [[org.apache.spark.sql.connector.write.DataWriterFactory]] whose tasks just pack rows + * into the commit message for delivery to a + * [[org.apache.spark.sql.connector.write.BatchWrite]] on the driver. * * Note that, because it sends all rows to the driver, this factory will generally be unsuitable * for production-quality sinks. It's intended for use in tests. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala index a6ac6f2da8e41..778cfeda68af0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala @@ -35,13 +35,12 @@ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUti import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability} import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage} import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite} -import org.apache.spark.sql.execution.streaming.Sink import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend import org.apache.spark.sql.types.StructType /** - * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit - * tests and does not provide durability. + * A sink that stores the results in memory. This [[org.apache.spark.sql.execution.streaming.Sink]] + * is primarily intended for use in unit tests and does not provide durability. */ class MemorySink extends Table with SupportsWrite with Logging { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index d52505fbdab35..05bcee7b05c6f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.spark.{SparkContext, SparkEnv, SparkException} +import org.apache.spark.{SparkContext, SparkEnv} import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.catalyst.util.UnsafeRowUtils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala index b894e771a6fe2..f21e2ffb80a7b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala @@ -23,8 +23,6 @@ import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD -import org.apache.spark.sql.execution.streaming.StreamExecution -import org.apache.spark.sql.execution.streaming.continuous.EpochTracker import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala index 3fae3979757fe..dae771c613131 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala @@ -25,14 +25,14 @@ import org.apache.spark.TaskContext import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SpecificInternalRow, UnsafeProjection, UnsafeRow} -import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec} +import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._ -import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType} import org.apache.spark.util.NextIterator /** - * Helper class to manage state required by a single side of [[StreamingSymmetricHashJoinExec]]. + * Helper class to manage state required by a single side of + * [[org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinExec]]. * The interface of this class is basically that of a multi-map: * - Get: Returns an iterator of multiple values for given key * - Append: Append a new value to the given key diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala index f5fbe0fc32254..1449d937982e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala @@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.execution.streaming.state._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.{OutputMode, StateOperatorProgress} import org.apache.spark.sql.types._ import org.apache.spark.util.{CompletionIterator, NextIterator, Utils} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala index b19540253d7eb..e53e0644eb268 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala @@ -22,7 +22,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, SortOrder, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning} -import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.execution.{LimitExec, SparkPlan, UnaryExecNode} import org.apache.spark.sql.execution.streaming.state.StateStoreOps import org.apache.spark.sql.streaming.OutputMode diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala index b693cae824bf9..6e0e36cbe5901 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala @@ -17,17 +17,11 @@ package org.apache.spark.sql.execution.window -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.plans.physical._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan, UnaryExecNode} -import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType} +import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan} /** * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index a6a3f3d7384bf..c6b98d48d7dde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} +import org.apache.spark.sql.execution.UnaryExecNode import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType} trait WindowExecBase extends UnaryExecNode { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala index 0cef33509a175..80dd3cf8bc840 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala @@ -17,16 +17,13 @@ package org.apache.spark.sql.expressions -import scala.reflect.runtime.universe.TypeTag - -import org.apache.spark.annotation.{Experimental, Stable} +import org.apache.spark.annotation.Stable import org.apache.spark.sql.{Column, Encoder} -import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.ScalaAggregator -import org.apache.spark.sql.types.{AnyDataType, DataType} +import org.apache.spark.sql.types.DataType /** * A user-defined function. To create one, use the `udf` functions in `functions`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala index f7591e4d265e0..4e3c5586209e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala @@ -44,8 +44,6 @@ object typed { override protected def _sqlContext: SQLContext = null } - import implicits._ - /** * Average aggregate function. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 5dc1c6b5b49fc..9861d21d3a430 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -18,8 +18,7 @@ package org.apache.spark.sql import scala.collection.JavaConverters._ -import scala.language.implicitConversions -import scala.reflect.runtime.universe.{typeTag, TypeTag} +import scala.reflect.runtime.universe.TypeTag import scala.util.Try import org.apache.spark.annotation.Stable diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 0f9a89741c192..48d8c3d325347 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution._ import org.apache.spark.sql.streaming.StreamingQueryManager -import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListener} +import org.apache.spark.sql.util.ExecutionListenerManager /** * A class that holds all session-specific state in a given [[SparkSession]]. @@ -52,7 +52,8 @@ import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListen * @param planner Planner that converts optimized logical plans to physical plans. * @param streamingQueryManagerBuilder A function to create a streaming query manager to * start and stop streaming queries. - * @param listenerManager Interface to register custom [[QueryExecutionListener]]s. + * @param listenerManager Interface to register custominternal/SessionState.scala + * [[org.apache.spark.sql.util.QueryExecutionListener]]s. * @param resourceLoaderBuilder a function to create a session shared resource loader to load JARs, * files, etc. * @param createQueryExecution Function used to create QueryExecution objects. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 1acdc4bd5f0e3..89aceacac6007 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -31,13 +31,11 @@ import org.apache.hadoop.fs.FsUrlStreamHandlerFactory import org.apache.spark.{SparkConf, SparkContext, SparkException} import org.apache.spark.internal.Logging -import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab} import org.apache.spark.sql.internal.StaticSQLConf._ -import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab} import org.apache.spark.status.ElementTrackingStore import org.apache.spark.util.Utils diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala index 0fe2d0be966d0..ffdbe9d4e4915 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala @@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path import org.apache.spark.SparkException import org.apache.spark.annotation.Evolving import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.UI.UI_ENABLED import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession} import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala index cdad5ed9942b5..1f7e65dede170 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.streaming.ui import java.text.SimpleDateFormat -import java.util.Locale import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 321f4966178d7..d34dcb4fe0c01 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicLong import scala.reflect.runtime.universe.TypeTag import scala.util.Random -import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ import org.apache.spark.SparkException diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala index 8b0f46b9d1ddb..4fdaeb57ad50e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql -import org.scalatest.BeforeAndAfterEach - import org.apache.spark.sql.catalyst.plans.logical.Expand import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala index 8c998290b5044..fd408c37ef6cd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql -import java.sql.Date - import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala index de791383326f1..35e732e0840e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConverters._ import org.scalatest.BeforeAndAfter -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog} import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala index 2be86b9ad6208..ac51634febc99 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql import scala.collection.immutable.{HashSet => HSet} import scala.collection.immutable.Queue import scala.collection.mutable.{LinkedHashMap => LHMap} -import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.test.SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala index 80346b350c142..861a001b190aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala @@ -27,7 +27,6 @@ import org.scalatest.Assertions._ import org.apache.spark.TestUtils import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils} import org.apache.spark.broadcast.Broadcast -import org.apache.spark.internal.config.Tests import org.apache.spark.sql.catalyst.expressions.{Cast, Expression} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.execution.python.UserDefinedPythonFunction diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index c2aee0ad4c9a1..76204c504c0ed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.tags.ExtendedSQLTest // scalastyle:off line.size.limit /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index b016cc3f57e0d..65377594f083c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit import scala.collection.mutable import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala index 6b25d7c61663c..46112d40f08ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.util.CaseInsensitiveStringMap class DataSourceV2DataFrameSessionCatalogSuite extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 4eaf5822e1628..ddafa1bb5070a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala index eacdb9e2fcd7b..3aad644655aa6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression} import org.apache.spark.sql.connector.catalog.{Identifier, SupportsCatalogOptions, TableCatalog} import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME -import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} +import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala index 2d75a35215866..bad21aac41712 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, NamedRelation} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 -import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability, TableProvider} +import org.apache.spark.sql.connector.catalog.{Table, TableCapability} import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala index 74f2ca14234d2..9beef690cba32 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession, SQLContext} -import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, TableProvider} +import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns, V1Scan} import org.apache.spark.sql.execution.RowDataSourceScanExec diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index e6029400997a2..81f292809df4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -28,7 +28,6 @@ import org.scalatest.exceptions.TestFailedException import org.apache.spark.{SparkException, TaskContext, TestUtils} import org.apache.spark.rdd.RDD -import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow} import org.apache.spark.sql.catalyst.plans.physical.Partitioning diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 7db94a702488a..b631f08405a39 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.{execution, DataFrame, Row} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Sort, Union} +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Union} import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution} import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala index ddaa2687eaf1a..18d36670306b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.{DataFrame, QueryTest} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} -import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan} +import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala index 7ddf9d87a6aca..f1fcf3bc5125e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution -import scala.language.implicitConversions import scala.util.control.NonFatal import org.apache.spark.SparkFunSuite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala index 48f85ae76cd8c..ad3ec85e984c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.execution.adaptive -import java.io.{PrintWriter, StringWriter} - import org.scalactic.source.Position import org.scalatest.Tag diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala index 9ade8b14f59b0..a98ca7f5d8f88 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.functions.{monotonically_increasing_id, timestamp_seconds} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType -import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType} +import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType} /** * Benchmark to measure read performance with Filter pushdown. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala index d2bd962b50654..f89fe2e64c778 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.execution.benchmark -import org.apache.spark.SparkConf import org.apache.spark.benchmark.Benchmark -import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} +import org.apache.spark.sql.{DataFrame, SaveMode} import org.apache.spark.sql.internal.SQLConf /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 43bc7c12937ec..f931914b19c6c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.SparkConf import org.apache.spark.benchmark.Benchmark -import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala index 847e0ec4f3195..0abb3cb6a2ed0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.columnar import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.CalendarInterval class ColumnStatsSuite extends SparkFunSuite { testColumnStats(classOf[BooleanColumnStats], BOOLEAN, Array(true, false, 0)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 348cf94dfc629..9d0147048dbb8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -2026,7 +2026,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } test("SPARK-30312: truncate table - keep acl/permission") { - import testImplicits._ val ignorePermissionAcl = Seq(true, false) ignorePermissionAcl.foreach { ignore => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index f5809ebbb836e..fd1978c5137a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -26,7 +26,7 @@ import org.mockito.invocation.InvocationOnMock import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala index fd70b6529ff51..22db55afc27c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala @@ -21,7 +21,7 @@ import java.io.File import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.functions._ -import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.test.SharedSparkSession /** * The reader schema is said to be evolved (or projected) when it changed after the data is diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala index 6420081a9757b..3e8a4fe290502 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala @@ -22,15 +22,10 @@ import java.util.Properties import org.scalatest.BeforeAndAfter -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.sql.sources._ import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types._ import org.apache.spark.util.Utils class RowDataSourceStrategySuite extends SharedSparkSession with BeforeAndAfter { - import testImplicits._ val url = "jdbc:h2:mem:testdb0" val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala index 233978289f068..e843d1d328425 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution.datasources -import org.apache.spark.SparkConf import org.apache.spark.sql.SaveMode import org.apache.spark.sql.test.SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala index 8462916daaab8..86ff026d7b1e9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.PartitionedFile import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH import org.apache.spark.sql.sources._ -import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ import org.apache.spark.util.Utils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala index d27b5c4737a11..7cc3a1cf9f3b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala @@ -18,12 +18,11 @@ package org.apache.spark.sql.execution.datasources.json import org.apache.spark.sql.{QueryTest, Row} -import org.apache.spark.sql.catalyst.json.JSONOptions import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{DoubleType, StringType, StructType} +import org.apache.spark.sql.types.{StringType, StructType} /** - * Test cases for various [[JSONOptions]]. + * Test cases for various [[org.apache.spark.sql.catalyst.json.JSONOptions]]. */ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession { import testImplicits._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala index 6c9bd32913178..378b52f9c6c8c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.orc import org.apache.spark.SparkConf -import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.SchemaPruningSuite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala index 4b2437803d645..7f408dbba5099 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.execution.datasources.parquet -import java.io.FileNotFoundException - import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext} @@ -149,7 +147,7 @@ private object MarkingFileOutput { * @param outputPath destination directory * @param conf configuration to create the FS with * @return the status of the marker - * @throws FileNotFoundException if the marker is absent + * @throws java.io.FileNotFoundException if the marker is absent */ def checkMarker(outputPath: Path, conf: Configuration): FileStatus = { outputPath.getFileSystem(conf).getFileStatus(new Path(outputPath, "marker")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 34bdef7bdb402..d13b3e58a30ff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.parquet import java.nio.file.{Files, Paths, StandardCopyOption} import java.sql.{Date, Timestamp} -import java.time._ import java.util.Locale import scala.collection.JavaConverters._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala index 8c4eedfde76cd..8c5f7bed7c50d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala @@ -21,7 +21,7 @@ import java.io.File import java.time.ZoneOffset import org.apache.commons.io.FileUtils -import org.apache.hadoop.fs.{FileSystem, Path, PathFilter} +import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index accd04592bec5..5c41614c45b6f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -23,8 +23,6 @@ import java.sql.{Date, Timestamp} import java.time.{ZoneId, ZoneOffset} import java.util.{Calendar, Locale} -import scala.collection.mutable.ArrayBuffer - import com.google.common.io.Files import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetOutputFormat diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 7990b1c27437a..e97c6cd29709c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.TypeTag import org.apache.parquet.io.ParquetDecodingException import org.apache.parquet.schema.{MessageType, MessageTypeParser} -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala index c53617b40e09d..622d69e188821 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala @@ -22,7 +22,6 @@ import java.lang.{Long => JLong} import java.net.URI import java.nio.charset.StandardCharsets.UTF_8 import java.util.concurrent.ConcurrentHashMap -import java.util.concurrent.atomic.AtomicLong import scala.util.Random diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala index 3ead91fcf712a..014840d758c0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala @@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.streaming.sources._ -import org.apache.spark.sql.streaming.{OutputMode, StreamTest} +import org.apache.spark.sql.streaming.StreamTest import org.apache.spark.sql.types.{IntegerType, StructField, StructType} import org.apache.spark.util.Utils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala index dec30fd01f7e2..ea6fd8ab312c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala @@ -23,7 +23,6 @@ import org.apache.spark.sql.Encoder import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.execution.streaming.GroupStateImpl._ -import org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite._ import org.apache.spark.sql.streaming.StreamTest import org.apache.spark.sql.types._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala index 298820349b683..6eb070138c3b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalog.{Column, Database, Function, Table} import org.apache.spark.sql.catalyst.{FunctionIdentifier, ScalaReflection, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ -import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo} +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.Range import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index 77a5d12cd8c95..580e7df6ef63e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.sql.internal import java.util.TimeZone -import scala.language.reflectiveCalls - import org.apache.hadoop.fs.Path import org.apache.log4j.Level diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 4832386e553db..167e87dd3d5cb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning -import org.apache.spark.sql.execution.{DataSourceScanExec, FileSourceScanExec, SortExec, SparkPlan} +import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec import org.apache.spark.sql.execution.datasources.BucketingUtils import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala index 1fdd3be88f782..179cdeb976391 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} -import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala index 9b26a5659df49..48d717daf00d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.sql.sources import java.net.URI -import org.apache.hadoop.fs.Path - import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogUtils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala index f97c9386f9488..788be539fe073 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala @@ -21,7 +21,6 @@ import java.io.File import java.sql.Date import org.apache.commons.io.FileUtils -import org.scalatest.BeforeAndAfterAll import org.scalatest.exceptions.TestFailedException import org.apache.spark.SparkException @@ -34,7 +33,7 @@ import org.apache.spark.sql.catalyst.plans.physical.UnknownPartitioning import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.execution.RDDScanExec import org.apache.spark.sql.execution.streaming._ -import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore, StateStoreId, StateStoreMetrics, UnsafeRowPair} +import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore} import org.apache.spark.sql.functions.timestamp_seconds import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.util.StreamManualClock diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index 7a2e29f1258ae..624b630401f47 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.streaming import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import scala.language.experimental.macros import scala.reflect.ClassTag import scala.util.Random import scala.util.control.NonFatal diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala index 4a57cc27b1d59..0524e29662014 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala @@ -20,8 +20,6 @@ package org.apache.spark.sql.streaming import java.io.File import java.util.{Locale, TimeZone} -import scala.collection.mutable - import org.apache.commons.io.FileUtils import org.scalatest.Assertions diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala index e1505acf3ecda..ac9cd1a12d06f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala @@ -17,13 +17,9 @@ package org.apache.spark.sql.streaming -import org.scalatest.BeforeAndAfterAll - -import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, SinglePartition} +import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ -import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec} -import org.apache.spark.sql.execution.streaming.state.StateStore +import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index b2bb00b704a69..a25616af360b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -26,19 +26,11 @@ import scala.util.Random import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfter -import org.apache.spark.SparkContext import org.apache.spark.scheduler.ExecutorCacheTaskLocation -import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession} -import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Literal} -import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter} -import org.apache.spark.sql.catalyst.trees.TreeNode -import org.apache.spark.sql.execution.{FileSourceScanExec, LogicalRDD} -import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper} import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId} import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types._ import org.apache.spark.util.Utils abstract class StreamingJoinSuite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 0d17f2e0bc7fb..02f91399fce1c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -22,7 +22,6 @@ import java.sql.Timestamp import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart} import org.apache.spark.sql._ -import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.continuous._ import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala index 8d39704c61d4e..bdc714d49fcc9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala @@ -89,8 +89,6 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider { override def getOffset: Option[Offset] = Some(new LongOffset(0)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { - import spark.implicits._ - spark.internalCreateDataFrame(spark.sparkContext.emptyRDD, schema, isStreaming = true) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala index 1b6724054a3ad..d15e5c42732d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.sql.test import org.scalatest.funspec.AnyFunSpec -import org.apache.spark.sql.Dataset - /** * The purpose of this suite is to make sure that generic FunSpec-based scala * tests work with a shared spark session diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala index e58357a415545..45cfa86ba9343 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala @@ -17,10 +17,8 @@ package org.apache.spark.sql.hive.thriftserver -import java.util.UUID import java.util.regex.Pattern -import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType import org.apache.hive.service.cli._ import org.apache.hive.service.cli.operation.GetSchemasOperation @@ -29,7 +27,6 @@ import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext -import org.apache.spark.util.{Utils => SparkUtils} /** * Spark's own GetSchemasOperation diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala index bccad865be27a..bddf5eb82012f 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala @@ -30,7 +30,6 @@ import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.catalog.CatalogTableType._ -import org.apache.spark.sql.hive.HiveUtils /** * Spark's own GetTablesOperation diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala index c39d2ecdd7923..df0fa514ccff3 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala @@ -24,7 +24,6 @@ import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import scala.util.control.NonFatal -import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.shims.Utils @@ -37,7 +36,6 @@ import org.apache.hive.service.server.HiveServer2 import org.slf4j.Logger import org.apache.spark.sql.SQLContext -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala index e4559e69e7585..856edede0b85f 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala @@ -17,11 +17,7 @@ package org.apache.spark.sql.hive.thriftserver -import java.util.concurrent.Executors - -import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.rpc.thrift.TProtocolVersion diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 8efbdb30c605c..54a40e3990f09 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.thriftserver.ui import java.net.URLEncoder import java.nio.charset.StandardCharsets.UTF_8 -import java.util.Calendar import javax.servlet.http.HttpServletRequest import scala.xml.Node diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index f5ce21f2af335..d39b94503fe40 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -27,7 +27,7 @@ import scala.concurrent.Promise import scala.concurrent.duration._ import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} +import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index be42497113469..4a87be5f61195 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -23,7 +23,6 @@ import java.util.{Locale, MissingFormatArgumentException} import scala.util.control.NonFatal -import org.apache.commons.io.FileUtils import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.spark.SparkException diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index a685549290f0e..d9b6bb43c2b47 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -22,7 +22,6 @@ import java.io.File import org.scalatest.BeforeAndAfter import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index f01a03996821a..907bb86ad0c1c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -41,7 +41,6 @@ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.connector.catalog.TableCatalog import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} import org.apache.spark.sql.hive.client.HiveClient diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 0082fa87e00f1..46a8e9660a207 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -24,7 +24,6 @@ import java.util.concurrent.TimeUnit import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap -import scala.language.implicitConversions import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.hadoop.conf.Configuration diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index a78e1cebc588c..9bc99b08c2cc8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -57,7 +57,6 @@ import org.apache.spark.sql.connector.catalog.SupportsNamespaces._ import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX} -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.{CircularBuffer, Utils} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index a5417b2c5ff6d..44a3a4c590934 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -41,13 +41,12 @@ import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils -import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType} import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala index 4be3cd45454c6..c712a4a2b7c23 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive.execution -import java.io.{File, IOException} +import java.io.IOException import java.net.URI import java.text.SimpleDateFormat import java.util.{Date, Locale, Random} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala index da34c54cb36a2..e71b11e7a3f41 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHive /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala index 270595b0011e9..e413e0ee73cb9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive -import java.net.URI - import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 86fc32cd8ca63..b3ea54a7bc931 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils /** * A suite of tests for the Parquet support through the data sources API. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala index 54c64a4eeb190..89131a79e59de 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala @@ -16,9 +16,6 @@ */ package org.apache.spark.sql.hive -import scala.collection.JavaConverters._ -import scala.language.implicitConversions - import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.serde2.ColumnProjectionUtils diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala index 446923ad23201..3e7c3e6799724 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} +import org.apache.spark.sql.internal.HiveSerDe class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 501a877e8b7fb..77d54ed45a5de 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -770,8 +770,6 @@ object SPARK_14244 extends QueryTest { val hiveContext = new TestHiveContext(sparkContext) spark = hiveContext.sparkSession - import hiveContext.implicits._ - try { val window = Window.orderBy("id") val df = spark.range(2).select(cume_dist().over(window).as("cdist")).orderBy("cdist") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala index ca1af73b038a7..d0af8dc7ae49f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.hive import scala.collection.JavaConverters._ -import scala.util.Random import org.apache.hadoop.hive.ql.udf.generic.GenericUDF import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, StandardListObjectInspector} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 4ad97eaa2b1c8..d8e1e01292820 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -23,9 +23,8 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.execution.HiveResult import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SQLTestUtils} +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.ChildFirstURLClassLoader class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala index 1e396553c9c52..483622b16762a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala @@ -17,12 +17,8 @@ package org.apache.spark.sql.hive -import java.io.File import java.sql.Timestamp -import com.google.common.io.Files -import org.apache.hadoop.fs.FileSystem - import org.apache.spark.internal.config._ import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 7d5a200606356..43d1ba04c561d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, HiveTableRelation} -import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, HistogramBin, HistogramSerializer} +import org.apache.spark.sql.catalyst.plans.logical.HistogramBin import org.apache.spark.sql.catalyst.util.{DateTimeUtils, StringUtils} import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, CommandUtils, DDLUtils} import org.apache.spark.sql.execution.datasources.LogicalRelation diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala index 77956f4fe69da..b94d517e89e30 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala @@ -21,7 +21,6 @@ import java.security.PrivilegedExceptionAction import org.apache.hadoop.conf.Configuration import org.apache.hadoop.security.UserGroupInformation -import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester} import org.apache.spark.util.Utils diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index cea7c5686054a..1cabf6033e8d8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec -import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.internal.SQLConf diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala index da7dfd05f33d6..8aae7a1545b1a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.hive.execution -import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession} +import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.execution.SQLViewSuite -import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} +import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.types.{NullType, StructType} /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index a8b10fc94d880..1018ae5b68895 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -17,10 +17,8 @@ package org.apache.spark.sql.hive.execution -import java.io.File import java.sql.Timestamp -import org.apache.commons.io.FileUtils import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.scalatest.exceptions.TestFailedException @@ -28,7 +26,6 @@ import org.apache.spark.{SparkException, TestUtils} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} import org.apache.spark.sql.execution._ import org.apache.spark.sql.functions._ -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index ba6dbb01d5901..4a50621d89d4e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -21,7 +21,6 @@ import java.io.{File, IOException} import org.apache.spark.sql.Row import org.apache.spark.sql.functions.col -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index dd797b39e0939..9e8046b9ef544 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -34,7 +34,6 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.command.FunctionsCommand import org.apache.spark.sql.functions.max -import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala index 993a730524f6f..8e35cd034311d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, EqualTo, Expression, IsNotNull, Literal} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, Expression, IsNotNull, Literal} import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index a69a949e3a3a2..712f81d98753e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -45,7 +45,6 @@ import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ import org.apache.spark.tags.SlowHiveTest -import org.apache.spark.util.Utils case class Nested1(f1: Nested2) case class Nested2(f2: Nested3) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala index 1f1a5568b0201..50f13efccc915 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala @@ -17,23 +17,15 @@ package org.apache.spark.sql.hive.execution -import java.lang.{Double => jlDouble, Integer => jlInt, Long => jlLong} - -import scala.collection.JavaConverters._ -import scala.util.Random - -import test.org.apache.spark.sql.MyDoubleAvg -import test.org.apache.spark.sql.MyDoubleSum +import java.lang.{Double => jlDouble, Long => jlLong} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.GenericInternalRow -import org.apache.spark.sql.catalyst.expressions.UnsafeRow -import org.apache.spark.sql.expressions.{Aggregator} +import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 5669cb757a678..f7c13ea047da7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -23,7 +23,6 @@ import java.util.{Set => JavaSet} import scala.collection.JavaConverters._ import scala.collection.mutable -import scala.language.implicitConversions import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path diff --git a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala index a2571b910f615..99d59e4a1447a 100644 --- a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala +++ b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala @@ -23,9 +23,7 @@ import javax.ws.rs.core.MediaType import org.apache.spark.status.api.v1.NotFoundException import org.apache.spark.streaming.Time -import org.apache.spark.streaming.ui.StreamingJobProgressListener import org.apache.spark.streaming.ui.StreamingJobProgressListener._ -import org.apache.spark.ui.SparkUI @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class ApiStreamingRootResource extends BaseStreamingAppResource { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala index 734c6ef42696e..c4cd1a9dc336b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala @@ -17,8 +17,6 @@ package org.apache.spark.streaming -import scala.language.implicitConversions - import org.apache.spark.annotation.Experimental /** diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala index d038021e93e73..4ac1c62822e7a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala @@ -21,7 +21,6 @@ import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.collection.mutable -import scala.language.implicitConversions import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala index 6b332206e8f6d..9d4b67bccecaf 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala @@ -22,7 +22,6 @@ import scala.util.Random import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.rdd.BlockRDD import org.apache.spark.storage.{StorageLevel, StreamBlockId} -import org.apache.spark.streaming.StreamingConf.RECEIVER_WAL_ENABLE_CONF_KEY import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD import org.apache.spark.streaming.receiver.{BlockManagerBasedStoreResult, Receiver, WriteAheadLogBasedStoreResult} diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala index 55c2950261a07..7ce4343acbdac 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala @@ -17,11 +17,10 @@ package org.apache.spark.streaming -import java.io.{File, IOException, ObjectInputStream} +import java.io.{IOException, ObjectInputStream} import java.util.concurrent.{ConcurrentLinkedQueue, TimeUnit} import scala.collection.JavaConverters._ -import scala.language.implicitConversions import scala.reflect.ClassTag import org.scalatest.BeforeAndAfterEach diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala index cd867aa8132bc..31456b0b95b18 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala @@ -25,7 +25,6 @@ import scala.collection.mutable import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits} import org.scalatest.concurrent.Eventually._ -import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ import org.scalatest.time.SpanSugar._ diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala index 293498ae5c37b..c2b039244d01f 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.streaming.scheduler import org.mockito.ArgumentMatchers.{any, eq => meq} import org.mockito.Mockito.{never, reset, times, verify, when} -import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester} +import org.scalatest.PrivateMethodTester import org.scalatest.concurrent.Eventually.{eventually, timeout} import org.scalatest.time.SpanSugar._ import org.scalatestplus.mockito.MockitoSugar From 014e1fbb3aba81a803c963fc0b7f4a8d1d70e253 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 19 Nov 2020 14:01:42 +0800 Subject: [PATCH 0516/1009] [SPARK-27421][SQL] Fix filter for int column and value class java.lang.String when pruning partition column ### What changes were proposed in this pull request? This pr fix filter for int column and value class java.lang.String when pruning partition column. How to reproduce this issue: ```scala spark.sql("CREATE table test (name STRING) partitioned by (id int) STORED AS PARQUET") spark.sql("CREATE VIEW test_view as select cast(id as string) as id, name from test") spark.sql("SELECT * FROM test_view WHERE id = '0'").explain ``` ``` 20/11/15 06:19:01 INFO audit: ugi=root ip=unknown-ip-addr cmd=get_partitions_by_filter : db=default tbl=test 20/11/15 06:19:01 INFO MetaStoreDirectSql: Unable to push down SQL filter: Cannot push down filter for int column and value class java.lang.String 20/11/15 06:19:01 ERROR SparkSQLDriver: Failed in [SELECT * FROM test_view WHERE id = '0'] java.lang.RuntimeException: Caught Hive MetaException attempting to get partition metadata by filter from Hive. You can set the Spark configuration setting spark.sql.hive.manageFilesourcePartitions to false to work around this problem, however this will result in degraded performance. Please report a bug: https://issues.apache.org/jira/browse/SPARK at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:828) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getPartitionsByFilter$1(HiveClientImpl.scala:745) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276) at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:743) ``` ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30380 from wangyum/SPARK-27421. Authored-by: Yuming Wang Signed-off-by: Yuming Wang --- .../org/apache/spark/sql/hive/client/HiveShim.scala | 2 +- .../sql/hive/client/HivePartitionFilteringSuite.scala | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 44a3a4c590934..d989f0154ea95 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -730,7 +730,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { def unapply(expr: Expression): Option[Attribute] = { expr match { case attr: Attribute => Some(attr) - case Cast(child @ AtomicType(), dt: AtomicType, _) + case Cast(child @ IntegralType(), dt: IntegralType, _) if Cast.canUpCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child) case _ => None } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index daa785bf110c5..81186909bb167 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType} +import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType} import org.apache.spark.util.Utils class HivePartitionFilteringSuite(version: String) @@ -290,6 +290,13 @@ class HivePartitionFilteringSuite(version: String) (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil) } + test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") { + val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba")) + testMetastorePartitionFiltering( + attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"), + day :: Nil) + } + private def testMetastorePartitionFiltering( filterExpr: Expression, expectedDs: Seq[Int], From 0b0fb70b09c7424805478a261e264d9df044fb96 Mon Sep 17 00:00:00 2001 From: Prakhar Jain Date: Thu, 19 Nov 2020 06:25:37 +0000 Subject: [PATCH 0517/1009] [SPARK-33400][SQL] Normalize sameOrderExpressions in SortOrder to avoid unnecessary sort operations ### What changes were proposed in this pull request? This pull request tries to normalize the SortOrder properly to prevent unnecessary sort operators. Currently the sameOrderExpressions are not normalized as part of AliasAwareOutputOrdering. Example: consider this join of three tables: """ |SELECT t2id, t3.id as t3id |FROM ( | SELECT t1.id as t1id, t2.id as t2id | FROM t1, t2 | WHERE t1.id = t2.id |) t12, t3 |WHERE t1id = t3.id """. The plan for this looks like: *(8) Project [t2id#1059L, id#1004L AS t3id#1060L] +- *(8) SortMergeJoin [t2id#1059L], [id#1004L], Inner :- *(5) Sort [t2id#1059L ASC NULLS FIRST ], false, 0 <----------------------------- : +- *(5) Project [id#1000L AS t2id#1059L] : +- *(5) SortMergeJoin [id#996L], [id#1000L], Inner : :- *(2) Sort [id#996L ASC NULLS FIRST ], false, 0 : : +- Exchange hashpartitioning(id#996L, 5), true, [id=#1426] : : +- *(1) Range (0, 10, step=1, splits=2) : +- *(4) Sort [id#1000L ASC NULLS FIRST ], false, 0 : +- Exchange hashpartitioning(id#1000L, 5), true, [id=#1432] : +- *(3) Range (0, 20, step=1, splits=2) +- *(7) Sort [id#1004L ASC NULLS FIRST ], false, 0 +- Exchange hashpartitioning(id#1004L, 5), true, [id=#1443] +- *(6) Range (0, 30, step=1, splits=2) In this plan, the marked sort node could have been avoided as the data is already sorted on "t2.id" by the lower SortMergeJoin. ### Why are the changes needed? To remove unneeded Sort operators. ### Does this PR introduce any user-facing change? No ### How was this patch tested? New UT added. Closes #30302 from prakharjain09/SPARK-33400-sortorder. Authored-by: Prakhar Jain Signed-off-by: Wenchen Fan --- .../AliasAwareOutputExpression.scala | 6 +++- .../spark/sql/execution/PlannerSuite.scala | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala index 3cbe1654ea2cd..3ba8745be995f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala @@ -65,7 +65,11 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression { final override def outputOrdering: Seq[SortOrder] = { if (hasAlias) { - orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder]) + orderingExpressions.map { sortOrder => + val newSortOrder = normalizeExpression(sortOrder).asInstanceOf[SortOrder] + val newSameOrderExpressions = newSortOrder.sameOrderExpressions.map(normalizeExpression) + newSortOrder.copy(sameOrderExpressions = newSameOrderExpressions) + } } else { orderingExpressions } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index b631f08405a39..6de81cc414d7d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -1059,6 +1059,37 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } } + test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") { + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withTempView("t1", "t2", "t3") { + spark.range(10).repartition($"id").createTempView("t1") + spark.range(20).repartition($"id").createTempView("t2") + spark.range(30).repartition($"id").createTempView("t3") + val planned = sql( + """ + |SELECT t2id, t3.id as t3id + |FROM ( + | SELECT t1.id as t1id, t2.id as t2id + | FROM t1, t2 + | WHERE t1.id = t2.id + |) t12, t3 + |WHERE t2id = t3.id + """.stripMargin).queryExecution.executedPlan + + val sortNodes = planned.collect { case s: SortExec => s } + assert(sortNodes.size == 3) + + val projects = planned.collect { case p: ProjectExec => p } + assert(projects.exists(_.outputOrdering match { + case Seq(SortOrder(_, Ascending, NullsFirst, sameOrderExprs)) => + sameOrderExprs.size == 1 && sameOrderExprs.head.isInstanceOf[AttributeReference] && + sameOrderExprs.head.asInstanceOf[AttributeReference].name == "t2id" + case _ => false + })) + } + } + } + test("aliases to expressions should not be replaced") { withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { withTempView("df1", "df2") { From d5e7bd0cc497a5ea2baab0046501a804e7d42aa4 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 18 Nov 2020 23:59:11 -0800 Subject: [PATCH 0518/1009] [SPARK-33483][INFRA][TESTS] Fix rat exclusion patterns and add a LICENSE ### What changes were proposed in this pull request? This PR fixes the RAT exclusion rule which was originated from SPARK-1144 (Apache Spark 1.0) ### Why are the changes needed? This prevents the situation like https://github.com/apache/spark/pull/30415. Currently, it missed `catalog` directory due to `.log` rule. ``` $ dev/check-license Could not find Apache license headers in the following files: !????? /Users/dongjoon/APACHE/spark-merge/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java !????? /Users/dongjoon/APACHE/spark-merge/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI with the new rule. Closes #30418 from dongjoon-hyun/SPARK-RAT. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/.rat-excludes | 28 ++++++++++++------- .../resources/data/scripts/test_transform.py | 18 ++++++++++++ 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 0e892a927906a..7da330dfe1fbf 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -42,11 +42,11 @@ jquery.dataTables.1.10.20.min.js jquery.mustache.js jsonFormatter.min.css jsonFormatter.min.js -.*avsc -.*txt -.*json -.*data -.*log +.*\.avsc +.*\.txt +.*\.json +.*\.data +.*\.log pyspark-coverage-site/* cloudpickle/* join.py @@ -98,17 +98,17 @@ local-1430917381535_2 DESCRIPTION NAMESPACE test_support/* -.*Rd +.*\.Rd help/* html/* INDEX .lintr gen-java.* -.*avpr -.*parquet +.*\.avpr +.*\.parquet spark-deps-.* -.*csv -.*tsv +.*\.csv +.*\.tsv .*\.sql .Rbuildignore META-INF/* @@ -125,3 +125,11 @@ application_1578436911597_0052 config.properties app-20200706201101-0003 py.typed +_metadata +_SUCCESS +part-00000 +.*\.res +flights_tiny.txt.1 +over1k +over10k +exported_table/* diff --git a/sql/hive/src/test/resources/data/scripts/test_transform.py b/sql/hive/src/test/resources/data/scripts/test_transform.py index ac6d11d8b919c..dedb370f6c90e 100755 --- a/sql/hive/src/test/resources/data/scripts/test_transform.py +++ b/sql/hive/src/test/resources/data/scripts/test_transform.py @@ -1,3 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# import sys delim = sys.argv[1] From ef2638c3e3aa1d2ce137f1c50c9697a7877d1719 Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Thu, 19 Nov 2020 00:12:22 -0800 Subject: [PATCH 0519/1009] [SPARK-33183][SQL][FOLLOW-UP] Update rule RemoveRedundantSorts config version ### What changes were proposed in this pull request? This PR is a follow up for #30093 to updates the config `spark.sql.execution.removeRedundantSorts` version to 2.4.8. ### Why are the changes needed? To update the rule version it has been backported to 2.4. #30194 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30420 from allisonwang-db/spark-33183-follow-up. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Dongjoon Hyun --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5c17f0434bc79..43014feecfd8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1277,7 +1277,7 @@ object SQLConf { val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts") .internal() .doc("Whether to remove redundant physical sort node") - .version("3.1.0") + .version("2.4.8") .booleanConf .createWithDefault(true) From a03c540cf7fe92160caf41ef6d2e2993f667dc59 Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Thu, 19 Nov 2020 13:29:01 +0000 Subject: [PATCH 0520/1009] [SPARK-33472][SQL] Adjust RemoveRedundantSorts rule order ### What changes were proposed in this pull request? This PR switched the order for the rule `RemoveRedundantSorts` and `EnsureRequirements` so that `EnsureRequirements` will be invoked before `RemoveRedundantSorts` to avoid IllegalArgumentException when instantiating PartitioningCollection. ### Why are the changes needed? `RemoveRedundantSorts` rule uses SparkPlan's `outputPartitioning` to check whether a sort node is redundant. Currently, it is added before `EnsureRequirements`. Since `PartitioningCollection` requires left and right partitioning to have the same number of partitions, which is not necessarily true before applying `EnsureRequirements`, the rule can fail with the following exception: ``` IllegalArgumentException: requirement failed: PartitioningCollection requires all of its partitionings have the same numPartitions. ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #30373 from allisonwang-db/sort-follow-up. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- .../spark/sql/execution/QueryExecution.scala | 4 ++- .../spark/sql/execution/SparkPlan.scala | 7 ++++- .../adaptive/AdaptiveSparkPlanExec.scala | 2 +- .../execution/RemoveRedundantSortsSuite.scala | 28 +++++++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 77f7a4e553f06..040d1f36ed8a5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -343,8 +343,10 @@ object QueryExecution { PlanDynamicPruningFilters(sparkSession), PlanSubqueries(sparkSession), RemoveRedundantProjects, - RemoveRedundantSorts, EnsureRequirements, + // `RemoveRedundantSorts` needs to be added before `EnsureRequirements` to guarantee the same + // number of partitions when instantiating PartitioningCollection. + RemoveRedundantSorts, DisableUnnecessaryBucketedScan, ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules), CollapseCodegenStages(), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index ead8c00031112..062aa69b3adb3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -135,7 +135,12 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ def longMetric(name: String): SQLMetric = metrics(name) // TODO: Move to `DistributedPlan` - /** Specifies how data is partitioned across different nodes in the cluster. */ + /** + * Specifies how data is partitioned across different nodes in the cluster. + * Note this method may fail if it is invoked before `EnsureRequirements` is applied + * since `PartitioningCollection` requires all its partitionings to have + * the same number of partitions. + */ def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH! /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 0865e42b440db..570edbf5f78a3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -88,8 +88,8 @@ case class AdaptiveSparkPlanExec( // Exchange nodes) after running these rules. private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq( RemoveRedundantProjects, - RemoveRedundantSorts, EnsureRequirements, + RemoveRedundantSorts, DisableUnnecessaryBucketedScan ) ++ context.session.sessionState.queryStagePrepRules diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala index 54c5a33441900..751078d08fda9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala @@ -18,7 +18,9 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.catalyst.plans.physical.{RangePartitioning, UnknownPartitioning} import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} +import org.apache.spark.sql.execution.joins.ShuffledJoin import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -135,6 +137,32 @@ abstract class RemoveRedundantSortsSuiteBase } } } + + test("SPARK-33472: shuffled join with different left and right side partition numbers") { + withTempView("t1", "t2") { + spark.range(0, 100, 1, 2).select('id as "key").createOrReplaceTempView("t1") + (0 to 100).toDF("key").createOrReplaceTempView("t2") + + val queryTemplate = """ + |SELECT /*+ %s(t1) */ t1.key + |FROM t1 JOIN t2 ON t1.key = t2.key + |WHERE t1.key > 10 AND t2.key < 50 + |ORDER BY t1.key ASC + """.stripMargin + + Seq(("MERGE", 3), ("SHUFFLE_HASH", 1)).foreach { case (hint, count) => + val query = queryTemplate.format(hint) + val df = sql(query) + val sparkPlan = df.queryExecution.sparkPlan + val join = sparkPlan.collect { case j: ShuffledJoin => j }.head + val leftPartitioning = join.left.outputPartitioning + assert(leftPartitioning.isInstanceOf[RangePartitioning]) + assert(leftPartitioning.numPartitions == 2) + assert(join.right.outputPartitioning == UnknownPartitioning(0)) + checkSorts(query, count, count) + } + } + } } class RemoveRedundantSortsSuite extends RemoveRedundantSortsSuiteBase From 21b13506cd822ed7db343bff4ca25d9555178f10 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 19 Nov 2020 13:31:10 +0000 Subject: [PATCH 0521/1009] [SPARK-33442][SQL] Change Combine Limit to Eliminate limit using max row ### What changes were proposed in this pull request? Change `CombineLimits` name to `EliminateLimits` and add check if `Limit` child max row <= limit. ### Why are the changes needed? In Add-hoc scene, we always add limit for the query if user have no special limit value, but not all limit is nesessary. A general negative example is ``` select count(*) from t limit 100000; ``` It will be great if we can eliminate limit at Spark side. Also, we make a benchmark for this case ``` runBenchmark("Sort and Limit") { val N = 100000 val benchmark = new Benchmark("benchmark sort and limit", N) benchmark.addCase("TakeOrderedAndProject", 3) { _ => spark.range(N).toDF("c").repartition(200).sort("c").take(200000) } benchmark.addCase("Sort And Limit", 3) { _ => withSQLConf("spark.sql.execution.topKSortFallbackThreshold" -> "-1") { spark.range(N).toDF("c").repartition(200).sort("c").take(200000) } } benchmark.addCase("Sort", 3) { _ => spark.range(N).toDF("c").repartition(200).sort("c").collect() } benchmark.run() } ``` and the result is ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.15.6 Intel(R) Core(TM) i5-5257U CPU 2.70GHz benchmark sort and limit: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ TakeOrderedAndProject 1833 2259 382 0.1 18327.1 1.0X Sort And Limit 1417 1658 285 0.1 14167.5 1.3X Sort 1324 1484 225 0.1 13238.3 1.4X ``` It shows that it makes sense to replace `TakeOrderedAndProjectExec` with `Sort + Project`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30368 from ulysses-you/SPARK-33442. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 19 +- .../optimizer/CombiningLimitsSuite.scala | 31 +- .../optimizer/LimitPushdownSuite.scala | 4 +- .../approved-plans-v1_4/q16.sf100/explain.txt | 6 +- .../q16.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q16/explain.txt | 6 +- .../approved-plans-v1_4/q16/simplified.txt | 4 +- .../q23a.sf100/explain.txt | 303 +++++++------- .../q23a.sf100/simplified.txt | 381 +++++++++--------- .../approved-plans-v1_4/q23a/explain.txt | 239 ++++++----- .../approved-plans-v1_4/q23a/simplified.txt | 273 +++++++------ .../approved-plans-v1_4/q38.sf100/explain.txt | 139 +++---- .../q38.sf100/simplified.txt | 189 +++++---- .../approved-plans-v1_4/q38/explain.txt | 113 +++--- .../approved-plans-v1_4/q38/simplified.txt | 125 +++--- .../approved-plans-v1_4/q92.sf100/explain.txt | 6 +- .../q92.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q92/explain.txt | 6 +- .../approved-plans-v1_4/q92/simplified.txt | 4 +- .../approved-plans-v1_4/q94.sf100/explain.txt | 6 +- .../q94.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q94/explain.txt | 6 +- .../approved-plans-v1_4/q94/simplified.txt | 4 +- .../approved-plans-v1_4/q95.sf100/explain.txt | 6 +- .../q95.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q95/explain.txt | 6 +- .../approved-plans-v1_4/q95/simplified.txt | 4 +- .../approved-plans-v1_4/q96.sf100/explain.txt | 6 +- .../q96.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q96/explain.txt | 6 +- .../approved-plans-v1_4/q96/simplified.txt | 4 +- .../approved-plans-v1_4/q97.sf100/explain.txt | 63 ++- .../q97.sf100/simplified.txt | 91 +++-- .../approved-plans-v1_4/q97/explain.txt | 63 ++- .../approved-plans-v1_4/q97/simplified.txt | 91 +++-- .../spark/sql/streaming/StreamSuite.scala | 2 +- 36 files changed, 1113 insertions(+), 1113 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 86c46e072c887..c4b9936fa4c4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -85,7 +85,7 @@ abstract class Optimizer(catalogManager: CatalogManager) OptimizeWindowFunctions, CollapseWindow, CombineFilters, - CombineLimits, + EliminateLimits, CombineUnions, // Constant folding and strength reduction TransposeWindow, @@ -1451,11 +1451,20 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { } /** - * Combines two adjacent [[Limit]] operators into one, merging the - * expressions into one single expression. + * This rule optimizes Limit operators by: + * 1. Eliminate [[Limit]] operators if it's child max row <= limit. + * 2. Combines two adjacent [[Limit]] operators into one, merging the + * expressions into one single expression. */ -object CombineLimits extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { +object EliminateLimits extends Rule[LogicalPlan] { + private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = { + limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().asInstanceOf[Int] } + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case Limit(l, child) if canEliminate(l, child) => + child + case GlobalLimit(le, GlobalLimit(ne, grandChild)) => GlobalLimit(Least(Seq(ne, le)), grandChild) case LocalLimit(le, LocalLimit(ne, grandChild)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index b190dd5a7c220..70f130f834c68 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -30,8 +30,8 @@ class CombiningLimitsSuite extends PlanTest { Batch("Column Pruning", FixedPoint(100), ColumnPruning, RemoveNoopOperators) :: - Batch("Combine Limit", FixedPoint(10), - CombineLimits) :: + Batch("Eliminate Limit", FixedPoint(10), + EliminateLimits) :: Batch("Constant Folding", FixedPoint(10), NullPropagation, ConstantFolding, @@ -90,4 +90,31 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-33442: Change Combine Limit to Eliminate limit using max row") { + // test child max row <= limit. + val query1 = testRelation.select().groupBy()(count(1)).limit(1).analyze + val optimized1 = Optimize.execute(query1) + val expected1 = testRelation.select().groupBy()(count(1)).analyze + comparePlans(optimized1, expected1) + + // test child max row > limit. + val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze + val optimized2 = Optimize.execute(query2) + comparePlans(optimized2, query2) + + // test child max row is none + val query3 = testRelation.select(Symbol("a")).limit(1).analyze + val optimized3 = Optimize.execute(query3) + comparePlans(optimized3, query3) + + // test sort after limit + val query4 = testRelation.select().groupBy()(count(1)) + .orderBy(count(1).asc).limit(1).analyze + val optimized4 = Optimize.execute(query4) + // the top project has been removed, so we need optimize expected too + val expected4 = Optimize.execute( + testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze) + comparePlans(optimized4, expected4) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index d993aee3d7518..e365e3300096e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -33,7 +33,7 @@ class LimitPushdownSuite extends PlanTest { EliminateSubqueryAliases) :: Batch("Limit pushdown", FixedPoint(100), LimitPushDown, - CombineLimits, + EliminateLimits, ConstantFolding, BooleanSimplification) :: Nil } @@ -74,7 +74,7 @@ class LimitPushdownSuite extends PlanTest { Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2) val unionOptimized = Optimize.execute(unionQuery.analyze) val unionCorrectAnswer = - Limit(2, Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1))).analyze + Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze comparePlans(unionOptimized, unionCorrectAnswer) } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index 509fb0133095b..a446163e3d29d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (44) +* Sort (44) +- * HashAggregate (43) +- Exchange (42) +- * HashAggregate (41) @@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] -(44) TakeOrderedAndProject +(44) Sort [codegen id : 12] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index ea9a0b27ff700..73a9b58010f58 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (12) +WholeStageCodegen (12) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index 2ae939cfe41f3..ea7e298393e4c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index a044b05365f8e..169f07c2d85e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index bda9824b71b5a..85f71b6cd9388 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -1,104 +1,103 @@ == Physical Plan == -CollectLimit (100) -+- * HashAggregate (99) - +- Exchange (98) - +- * HashAggregate (97) - +- Union (96) - :- * Project (59) - : +- * BroadcastHashJoin Inner BuildRight (58) - : :- * Project (52) - : : +- SortMergeJoin LeftSemi (51) - : : :- * Sort (33) - : : : +- Exchange (32) - : : : +- * Project (31) - : : : +- SortMergeJoin LeftSemi (30) - : : : :- * Sort (5) - : : : : +- Exchange (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- * Sort (29) - : : : +- * Project (28) - : : : +- * Filter (27) - : : : +- * HashAggregate (26) - : : : +- * HashAggregate (25) - : : : +- * Project (24) - : : : +- * SortMergeJoin Inner (23) - : : : :- * Sort (17) - : : : : +- Exchange (16) - : : : : +- * Project (15) - : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : :- * Filter (8) - : : : : : +- * ColumnarToRow (7) - : : : : : +- Scan parquet default.store_sales (6) - : : : : +- BroadcastExchange (13) - : : : : +- * Project (12) - : : : : +- * Filter (11) - : : : : +- * ColumnarToRow (10) - : : : : +- Scan parquet default.date_dim (9) - : : : +- * Sort (22) - : : : +- Exchange (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.item (18) - : : +- * Sort (50) - : : +- * Project (49) - : : +- * Filter (48) - : : +- * HashAggregate (47) - : : +- * HashAggregate (46) - : : +- * Project (45) - : : +- * SortMergeJoin Inner (44) - : : :- * Sort (38) - : : : +- Exchange (37) - : : : +- * Filter (36) - : : : +- * ColumnarToRow (35) - : : : +- Scan parquet default.store_sales (34) - : : +- * Sort (43) - : : +- Exchange (42) - : : +- * Filter (41) - : : +- * ColumnarToRow (40) - : : +- Scan parquet default.customer (39) - : +- BroadcastExchange (57) - : +- * Project (56) - : +- * Filter (55) - : +- * ColumnarToRow (54) - : +- Scan parquet default.date_dim (53) - +- * Project (95) - +- * BroadcastHashJoin Inner BuildRight (94) - :- * Project (92) - : +- SortMergeJoin LeftSemi (91) - : :- * Sort (79) - : : +- Exchange (78) - : : +- * Project (77) - : : +- SortMergeJoin LeftSemi (76) - : : :- * Sort (64) - : : : +- Exchange (63) - : : : +- * Filter (62) - : : : +- * ColumnarToRow (61) - : : : +- Scan parquet default.web_sales (60) - : : +- * Sort (75) - : : +- * Project (74) - : : +- * Filter (73) - : : +- * HashAggregate (72) - : : +- * HashAggregate (71) - : : +- * Project (70) - : : +- * SortMergeJoin Inner (69) - : : :- * Sort (66) - : : : +- ReusedExchange (65) - : : +- * Sort (68) - : : +- ReusedExchange (67) - : +- * Sort (90) - : +- * Project (89) - : +- * Filter (88) - : +- * HashAggregate (87) - : +- * HashAggregate (86) - : +- * Project (85) - : +- * SortMergeJoin Inner (84) - : :- * Sort (81) - : : +- ReusedExchange (80) - : +- * Sort (83) - : +- ReusedExchange (82) - +- ReusedExchange (93) +* HashAggregate (99) ++- Exchange (98) + +- * HashAggregate (97) + +- Union (96) + :- * Project (59) + : +- * BroadcastHashJoin Inner BuildRight (58) + : :- * Project (52) + : : +- SortMergeJoin LeftSemi (51) + : : :- * Sort (33) + : : : +- Exchange (32) + : : : +- * Project (31) + : : : +- SortMergeJoin LeftSemi (30) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (29) + : : : +- * Project (28) + : : : +- * Filter (27) + : : : +- * HashAggregate (26) + : : : +- * HashAggregate (25) + : : : +- * Project (24) + : : : +- * SortMergeJoin Inner (23) + : : : :- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (22) + : : : +- Exchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.item (18) + : : +- * Sort (50) + : : +- * Project (49) + : : +- * Filter (48) + : : +- * HashAggregate (47) + : : +- * HashAggregate (46) + : : +- * Project (45) + : : +- * SortMergeJoin Inner (44) + : : :- * Sort (38) + : : : +- Exchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.store_sales (34) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.customer (39) + : +- BroadcastExchange (57) + : +- * Project (56) + : +- * Filter (55) + : +- * ColumnarToRow (54) + : +- Scan parquet default.date_dim (53) + +- * Project (95) + +- * BroadcastHashJoin Inner BuildRight (94) + :- * Project (92) + : +- SortMergeJoin LeftSemi (91) + : :- * Sort (79) + : : +- Exchange (78) + : : +- * Project (77) + : : +- SortMergeJoin LeftSemi (76) + : : :- * Sort (64) + : : : +- Exchange (63) + : : : +- * Filter (62) + : : : +- * ColumnarToRow (61) + : : : +- Scan parquet default.web_sales (60) + : : +- * Sort (75) + : : +- * Project (74) + : : +- * Filter (73) + : : +- * HashAggregate (72) + : : +- * HashAggregate (71) + : : +- * Project (70) + : : +- * SortMergeJoin Inner (69) + : : :- * Sort (66) + : : : +- ReusedExchange (65) + : : +- * Sort (68) + : : +- ReusedExchange (67) + : +- * Sort (90) + : +- * Project (89) + : +- * Filter (88) + : +- * HashAggregate (87) + : +- * HashAggregate (86) + : +- * Project (85) + : +- * SortMergeJoin Inner (84) + : :- * Sort (81) + : : +- ReusedExchange (80) + : +- * Sort (83) + : +- ReusedExchange (82) + +- ReusedExchange (93) (1) Scan parquet default.catalog_sales @@ -547,149 +546,145 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#65] Results [1]: [sum(sales#40)#65 AS sum(sales)#66] -(100) CollectLimit -Input [1]: [sum(sales)#66] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37] -* HashAggregate (124) -+- Exchange (123) - +- * HashAggregate (122) - +- * HashAggregate (121) - +- * HashAggregate (120) - +- * Project (119) - +- * SortMergeJoin Inner (118) - :- * Sort (112) - : +- Exchange (111) - : +- * Project (110) - : +- * BroadcastHashJoin Inner BuildRight (109) - : :- * Filter (103) - : : +- * ColumnarToRow (102) - : : +- Scan parquet default.store_sales (101) - : +- BroadcastExchange (108) - : +- * Project (107) - : +- * Filter (106) - : +- * ColumnarToRow (105) - : +- Scan parquet default.date_dim (104) - +- * Sort (117) - +- Exchange (116) - +- * Filter (115) - +- * ColumnarToRow (114) - +- Scan parquet default.customer (113) - - -(101) Scan parquet default.store_sales +* HashAggregate (123) ++- Exchange (122) + +- * HashAggregate (121) + +- * HashAggregate (120) + +- * HashAggregate (119) + +- * Project (118) + +- * SortMergeJoin Inner (117) + :- * Sort (111) + : +- Exchange (110) + : +- * Project (109) + : +- * BroadcastHashJoin Inner BuildRight (108) + : :- * Filter (102) + : : +- * ColumnarToRow (101) + : : +- Scan parquet default.store_sales (100) + : +- BroadcastExchange (107) + : +- * Project (106) + : +- * Filter (105) + : +- * ColumnarToRow (104) + : +- Scan parquet default.date_dim (103) + +- * Sort (116) + +- Exchange (115) + +- * Filter (114) + +- * ColumnarToRow (113) + +- Scan parquet default.customer (112) + + +(100) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(102) ColumnarToRow [codegen id : 2] +(101) ColumnarToRow [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -(103) Filter [codegen id : 2] +(102) Filter [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7)) -(104) Scan parquet default.date_dim +(103) Scan parquet default.date_dim Output [2]: [d_date_sk#9, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(105) ColumnarToRow [codegen id : 1] +(104) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] -(106) Filter [codegen id : 1] +(105) Filter [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) -(107) Project [codegen id : 1] +(106) Project [codegen id : 1] Output [1]: [d_date_sk#9] Input [2]: [d_date_sk#9, d_year#11] -(108) BroadcastExchange +(107) BroadcastExchange Input [1]: [d_date_sk#9] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67] -(109) BroadcastHashJoin [codegen id : 2] +(108) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#7] Right keys [1]: [d_date_sk#9] Join condition: None -(110) Project [codegen id : 2] +(109) Project [codegen id : 2] Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9] -(111) Exchange +(110) Exchange Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68] -(112) Sort [codegen id : 3] +(111) Sort [codegen id : 3] Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 -(113) Scan parquet default.customer +(112) Scan parquet default.customer Output [1]: [c_customer_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(114) ColumnarToRow [codegen id : 4] +(113) ColumnarToRow [codegen id : 4] Input [1]: [c_customer_sk#28] -(115) Filter [codegen id : 4] +(114) Filter [codegen id : 4] Input [1]: [c_customer_sk#28] Condition : isnotnull(c_customer_sk#28) -(116) Exchange +(115) Exchange Input [1]: [c_customer_sk#28] Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69] -(117) Sort [codegen id : 5] +(116) Sort [codegen id : 5] Input [1]: [c_customer_sk#28] Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 -(118) SortMergeJoin [codegen id : 6] +(117) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#24] Right keys [1]: [c_customer_sk#28] Join condition: None -(119) Project [codegen id : 6] +(118) Project [codegen id : 6] Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -(120) HashAggregate [codegen id : 6] +(119) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Keys [1]: [c_customer_sk#28] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#70, isEmpty#71] Results [3]: [c_customer_sk#28, sum#72, isEmpty#73] -(121) HashAggregate [codegen id : 6] +(120) HashAggregate [codegen id : 6] Input [3]: [c_customer_sk#28, sum#72, isEmpty#73] Keys [1]: [c_customer_sk#28] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75] -(122) HashAggregate [codegen id : 6] +(121) HashAggregate [codegen id : 6] Input [1]: [csales#75] Keys: [] Functions [1]: [partial_max(csales#75)] Aggregate Attributes [1]: [max#76] Results [1]: [max#77] -(123) Exchange +(122) Exchange Input [1]: [max#77] Arguments: SinglePartition, true, [id=#78] -(124) HashAggregate [codegen id : 7] +(123) HashAggregate [codegen id : 7] Input [1]: [max#77] Keys: [] Functions [1]: [max(csales#75)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt index 695e6ccd71821..5bb8bc5b99d0c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt @@ -1,209 +1,208 @@ -CollectLimit - WholeStageCodegen (36) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (35) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (17) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #2 - WholeStageCodegen (9) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_item_sk,item_sk] - WholeStageCodegen (2) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen (1) - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - WholeStageCodegen (8) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #4 - WholeStageCodegen (4) - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #6 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - WholeStageCodegen (15) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (7) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (6) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] +WholeStageCodegen (36) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (35) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (17) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (9) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + WholeStageCodegen (8) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] + WholeStageCodegen (5) + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk] #10 - WholeStageCodegen (2) - Project [ss_customer_sk,ss_quantity,ss_sales_price] + Exchange [ss_item_sk] #4 + WholeStageCodegen (4) + Project [ss_item_sk,d_date] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Project [d_date_sk] + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_date] Filter [d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_date,d_year] InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] + WholeStageCodegen (7) + Sort [i_item_sk] InputAdapter - Exchange [c_customer_sk] #12 - WholeStageCodegen (4) - Filter [c_customer_sk] + Exchange [i_item_sk] #6 + WholeStageCodegen (6) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (12) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #7 - WholeStageCodegen (11) - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - WholeStageCodegen (14) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #8 - WholeStageCodegen (13) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (16) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (34) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - InputAdapter - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] - WholeStageCodegen (27) - Sort [ws_bill_customer_sk] - InputAdapter - Exchange [ws_bill_customer_sk] #14 - WholeStageCodegen (26) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + Scan parquet default.item [i_item_sk,i_item_desc] + WholeStageCodegen (15) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max(csales),tpcds_cmax,max] InputAdapter - SortMergeJoin [ws_item_sk,item_sk] - WholeStageCodegen (19) - Sort [ws_item_sk] + Exchange #9 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #10 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #12 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (12) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #7 + WholeStageCodegen (11) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (14) + Sort [c_customer_sk] InputAdapter - Exchange [ws_item_sk] #15 - WholeStageCodegen (18) - Filter [ws_sold_date_sk] + Exchange [c_customer_sk] #8 + WholeStageCodegen (13) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - WholeStageCodegen (25) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (22) - Sort [ss_item_sk] - InputAdapter - ReusedExchange [ss_item_sk,d_date] #4 - InputAdapter - WholeStageCodegen (24) - Sort [i_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_item_desc] #6 - WholeStageCodegen (32) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (29) - Sort [ss_customer_sk] - InputAdapter - ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 - InputAdapter - WholeStageCodegen (31) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk] #8 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (34) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] #13 + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + WholeStageCodegen (27) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #14 + WholeStageCodegen (26) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + SortMergeJoin [ws_item_sk,item_sk] + WholeStageCodegen (19) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #15 + WholeStageCodegen (18) + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + WholeStageCodegen (25) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (22) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,d_date] #4 + InputAdapter + WholeStageCodegen (24) + Sort [i_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_desc] #6 + WholeStageCodegen (32) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 6d2b5b0013d8f..15ae5bfe24303 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -1,76 +1,75 @@ == Physical Plan == -CollectLimit (72) -+- * HashAggregate (71) - +- Exchange (70) - +- * HashAggregate (69) - +- Union (68) - :- * Project (51) - : +- * BroadcastHashJoin Inner BuildRight (50) - : :- * Project (44) - : : +- * BroadcastHashJoin LeftSemi BuildRight (43) - : : :- * Project (27) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (25) - : : : +- * Project (24) - : : : +- * Filter (23) - : : : +- * HashAggregate (22) - : : : +- Exchange (21) - : : : +- * HashAggregate (20) - : : : +- * Project (19) - : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : :- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (17) - : : : +- * Filter (16) - : : : +- * ColumnarToRow (15) - : : : +- Scan parquet default.item (14) - : : +- BroadcastExchange (42) - : : +- * Project (41) - : : +- * Filter (40) - : : +- * HashAggregate (39) - : : +- Exchange (38) - : : +- * HashAggregate (37) - : : +- * Project (36) - : : +- * BroadcastHashJoin Inner BuildRight (35) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.store_sales (28) - : : +- BroadcastExchange (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer (31) - : +- BroadcastExchange (49) - : +- * Project (48) - : +- * Filter (47) - : +- * ColumnarToRow (46) - : +- Scan parquet default.date_dim (45) - +- * Project (67) - +- * BroadcastHashJoin Inner BuildRight (66) - :- * Project (64) - : +- * BroadcastHashJoin LeftSemi BuildRight (63) - : :- * Project (57) - : : +- * BroadcastHashJoin LeftSemi BuildRight (56) - : : :- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.web_sales (52) - : : +- ReusedExchange (55) - : +- BroadcastExchange (62) - : +- * Project (61) - : +- * Filter (60) - : +- * HashAggregate (59) - : +- ReusedExchange (58) - +- ReusedExchange (65) +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- Union (68) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (44) + : : +- * BroadcastHashJoin LeftSemi BuildRight (43) + : : :- * Project (27) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (25) + : : : +- * Project (24) + : : : +- * Filter (23) + : : : +- * HashAggregate (22) + : : : +- Exchange (21) + : : : +- * HashAggregate (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (17) + : : : +- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.item (14) + : : +- BroadcastExchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.store_sales (28) + : : +- BroadcastExchange (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer (31) + : +- BroadcastExchange (49) + : +- * Project (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.date_dim (45) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin LeftSemi BuildRight (63) + : :- * Project (57) + : : +- * BroadcastHashJoin LeftSemi BuildRight (56) + : : :- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.web_sales (52) + : : +- ReusedExchange (55) + : +- BroadcastExchange (62) + : +- * Project (61) + : +- * Filter (60) + : +- * HashAggregate (59) + : +- ReusedExchange (58) + +- ReusedExchange (65) (1) Scan parquet default.catalog_sales @@ -398,139 +397,135 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#57] Results [1]: [sum(sales#40)#57 AS sum(sales)#58] -(72) CollectLimit -Input [1]: [sum(sales)#58] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] -* HashAggregate (94) -+- Exchange (93) - +- * HashAggregate (92) - +- * HashAggregate (91) - +- Exchange (90) - +- * HashAggregate (89) - +- * Project (88) - +- * BroadcastHashJoin Inner BuildRight (87) - :- * Project (81) - : +- * BroadcastHashJoin Inner BuildRight (80) - : :- * Filter (75) - : : +- * ColumnarToRow (74) - : : +- Scan parquet default.store_sales (73) - : +- BroadcastExchange (79) - : +- * Filter (78) - : +- * ColumnarToRow (77) - : +- Scan parquet default.customer (76) - +- BroadcastExchange (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.date_dim (82) - - -(73) Scan parquet default.store_sales +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.store_sales (72) + : +- BroadcastExchange (78) + : +- * Filter (77) + : +- * ColumnarToRow (76) + : +- Scan parquet default.customer (75) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet default.date_dim (81) + + +(72) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(74) ColumnarToRow [codegen id : 3] +(73) ColumnarToRow [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -(75) Filter [codegen id : 3] +(74) Filter [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) -(76) Scan parquet default.customer +(75) Scan parquet default.customer Output [1]: [c_customer_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(77) ColumnarToRow [codegen id : 1] +(76) ColumnarToRow [codegen id : 1] Input [1]: [c_customer_sk#26] -(78) Filter [codegen id : 1] +(77) Filter [codegen id : 1] Input [1]: [c_customer_sk#26] Condition : isnotnull(c_customer_sk#26) -(79) BroadcastExchange +(78) BroadcastExchange Input [1]: [c_customer_sk#26] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] -(80) BroadcastHashJoin [codegen id : 3] +(79) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_customer_sk#23] Right keys [1]: [c_customer_sk#26] Join condition: None -(81) Project [codegen id : 3] +(80) Project [codegen id : 3] Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -(82) Scan parquet default.date_dim +(81) Scan parquet default.date_dim Output [2]: [d_date_sk#8, d_year#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(83) ColumnarToRow [codegen id : 2] +(82) ColumnarToRow [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] -(84) Filter [codegen id : 2] +(83) Filter [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) -(85) Project [codegen id : 2] +(84) Project [codegen id : 2] Output [1]: [d_date_sk#8] Input [2]: [d_date_sk#8, d_year#10] -(86) BroadcastExchange +(85) BroadcastExchange Input [1]: [d_date_sk#8] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] -(87) BroadcastHashJoin [codegen id : 3] +(86) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#6] Right keys [1]: [d_date_sk#8] Join condition: None -(88) Project [codegen id : 3] +(87) Project [codegen id : 3] Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] -(89) HashAggregate [codegen id : 3] +(88) HashAggregate [codegen id : 3] Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Keys [1]: [c_customer_sk#26] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#61, isEmpty#62] Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] -(90) Exchange +(89) Exchange Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] -(91) HashAggregate [codegen id : 4] +(90) HashAggregate [codegen id : 4] Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Keys [1]: [c_customer_sk#26] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] -(92) HashAggregate [codegen id : 4] +(91) HashAggregate [codegen id : 4] Input [1]: [csales#67] Keys: [] Functions [1]: [partial_max(csales#67)] Aggregate Attributes [1]: [max#68] Results [1]: [max#69] -(93) Exchange +(92) Exchange Input [1]: [max#69] Arguments: SinglePartition, true, [id=#70] -(94) HashAggregate [codegen id : 5] +(93) HashAggregate [codegen id : 5] Input [1]: [max#69] Keys: [] Functions [1]: [max(csales#67)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index d860e18574f2a..aebe2bd3e1a6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -1,143 +1,142 @@ -CollectLimit - WholeStageCodegen (20) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (19) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (9) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_item_sk,item_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (4) - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - InputAdapter - Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 - WholeStageCodegen (3) - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] +WholeStageCodegen (20) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (19) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (9) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + InputAdapter + Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 + WholeStageCodegen (3) + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] + Scan parquet default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (7) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (5) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (4) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #10 - WholeStageCodegen (3) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_year,d_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen (6) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (5) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (8) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (18) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_item_sk,item_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - InputAdapter - ReusedExchange [item_sk] #2 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (16) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - ReusedExchange [c_customer_sk,sum,isEmpty] #7 - InputAdapter - ReusedExchange [d_date_sk] #13 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (18) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt index 92b9c26825e51..7465ddae84e8a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt @@ -1,72 +1,71 @@ == Physical Plan == -CollectLimit (68) -+- * HashAggregate (67) - +- Exchange (66) - +- * HashAggregate (65) - +- * HashAggregate (64) - +- * HashAggregate (63) - +- * HashAggregate (62) - +- * HashAggregate (61) - +- * HashAggregate (60) - +- Exchange (59) - +- * HashAggregate (58) - +- SortMergeJoin LeftSemi (57) - :- SortMergeJoin LeftSemi (39) - : :- * Sort (21) - : : +- Exchange (20) - : : +- * Project (19) - : : +- * SortMergeJoin Inner (18) - : : :- * Sort (12) - : : : +- Exchange (11) - : : : +- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- * Sort (17) - : : +- Exchange (16) - : : +- * Filter (15) - : : +- * ColumnarToRow (14) - : : +- Scan parquet default.customer (13) - : +- * Sort (38) - : +- Exchange (37) - : +- * HashAggregate (36) - : +- Exchange (35) - : +- * HashAggregate (34) - : +- * Project (33) - : +- * SortMergeJoin Inner (32) - : :- * Sort (29) - : : +- Exchange (28) - : : +- * Project (27) - : : +- * BroadcastHashJoin Inner BuildRight (26) - : : :- * Filter (24) - : : : +- * ColumnarToRow (23) - : : : +- Scan parquet default.catalog_sales (22) - : : +- ReusedExchange (25) - : +- * Sort (31) - : +- ReusedExchange (30) - +- * Sort (56) - +- Exchange (55) - +- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * Project (51) - +- * SortMergeJoin Inner (50) - :- * Sort (47) - : +- Exchange (46) - : +- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Filter (42) - : : +- * ColumnarToRow (41) - : : +- Scan parquet default.web_sales (40) - : +- ReusedExchange (43) - +- * Sort (49) - +- ReusedExchange (48) +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- SortMergeJoin LeftSemi (57) + :- SortMergeJoin LeftSemi (39) + : :- * Sort (21) + : : +- Exchange (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer (13) + : +- * Sort (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- * Project (33) + : +- * SortMergeJoin Inner (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- * Sort (31) + : +- ReusedExchange (30) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (43) + +- * Sort (49) + +- ReusedExchange (48) (1) Scan parquet default.store_sales @@ -387,7 +386,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#37] Results [1]: [count(1)#37 AS count(1)#38] -(68) CollectLimit -Input [1]: [count(1)#38] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt index 5bcd7dbb93022..8dd59340cf069 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt @@ -1,118 +1,117 @@ -CollectLimit - WholeStageCodegen (26) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (25) - HashAggregate [count,count] +WholeStageCodegen (26) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (25) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (24) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (24) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - WholeStageCodegen (7) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #3 - WholeStageCodegen (6) - Project [d_date,c_first_name,c_last_name] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #4 - WholeStageCodegen (2) - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #6 - WholeStageCodegen (4) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - WholeStageCodegen (15) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #7 - WholeStageCodegen (14) - HashAggregate [c_last_name,c_first_name,d_date] + WholeStageCodegen (7) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #3 + WholeStageCodegen (6) + Project [d_date,c_first_name,c_last_name] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (13) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #9 - WholeStageCodegen (9) - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk,d_date] #5 - InputAdapter - WholeStageCodegen (12) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 - WholeStageCodegen (23) + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (15) Sort [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #10 - WholeStageCodegen (22) + Exchange [c_last_name,c_first_name,d_date] #7 + WholeStageCodegen (14) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #11 - WholeStageCodegen (21) + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (13) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] InputAdapter - WholeStageCodegen (18) - Sort [ws_bill_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] InputAdapter - Exchange [ws_bill_customer_sk] #12 - WholeStageCodegen (17) - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + Exchange [cs_bill_customer_sk] #9 + WholeStageCodegen (9) + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #5 InputAdapter - WholeStageCodegen (20) + WholeStageCodegen (12) Sort [c_customer_sk] InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 + WholeStageCodegen (23) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #10 + WholeStageCodegen (22) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #11 + WholeStageCodegen (21) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (17) + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #5 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt index 09ab60c7cf651..74454cf32afd0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -1,59 +1,58 @@ == Physical Plan == -CollectLimit (55) -+- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * HashAggregate (51) - +- * HashAggregate (50) - +- * HashAggregate (49) - +- * HashAggregate (48) - +- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * BroadcastHashJoin LeftSemi BuildRight (44) - :- * BroadcastHashJoin LeftSemi BuildRight (30) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer (11) - : +- BroadcastExchange (29) - : +- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_sales (17) - : : +- ReusedExchange (20) - : +- ReusedExchange (23) - +- BroadcastExchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Project (39) - +- * BroadcastHashJoin Inner BuildRight (38) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.web_sales (31) - : +- ReusedExchange (34) - +- ReusedExchange (37) +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftSemi BuildRight (44) + :- * BroadcastHashJoin LeftSemi BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) (1) Scan parquet default.store_sales @@ -322,7 +321,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#33] Results [1]: [count(1)#33 AS count(1)#34] -(55) CollectLimit -Input [1]: [count(1)#34] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index 10a2166ce761d..a5b57a4ac9450 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -1,81 +1,80 @@ -CollectLimit - WholeStageCodegen (13) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (12) - HashAggregate [count,count] +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (11) - HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - Project [d_date,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] + Project [d_date,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (2) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 - WholeStageCodegen (5) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) + BroadcastExchange #5 + WholeStageCodegen (6) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (9) + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (5) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index dc4665185b014..99459bfe9a049 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 7fd1cd3637a09..0721155286d17 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index b17a48db8baac..8a441392f4165 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 652b2e36cf781..1f24a7c964f20 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 7720d9dee4170..43390c5048a6d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (47) +* Sort (47) +- * HashAggregate (46) +- Exchange (45) +- * HashAggregate (44) @@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] -(47) TakeOrderedAndProject +(47) Sort [codegen id : 14] Input [3]: [order count #32, total shipping cost #33, total net profit #34] -Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] +Arguments: [order count #32 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index 128a8179ac10b..7b3d461b9e80f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (14) +WholeStageCodegen (14) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index a94e74f66b201..2abbe4f9b8390 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 9d30b998fe174..5e7d7db5c0a9e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index 7fec07e259559..547792f3d7ae4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (61) +* Sort (61) +- * HashAggregate (60) +- Exchange (59) +- * HashAggregate (58) @@ -331,7 +331,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33] Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38] -(61) TakeOrderedAndProject +(61) Sort [codegen id : 23] Input [3]: [order count #36, total shipping cost #37, total net profit #38] -Arguments: 100, [order count #36 ASC NULLS FIRST], [order count #36, total shipping cost #37, total net profit #38] +Arguments: [order count #36 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index da48d34c72a04..7213a9f58d3f8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (23) +WholeStageCodegen (23) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 3a24e83aff256..1cc99e296383f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (56) +* Sort (56) +- * HashAggregate (55) +- Exchange (54) +- * HashAggregate (53) @@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] -(56) TakeOrderedAndProject +(56) Sort [codegen id : 11] Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] +Arguments: [order count #35 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 6d35311c810f5..191ff22c1961f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (11) +WholeStageCodegen (11) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index d00029f985471..5ae0e1632f15b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt index 1355caffbbfe8..d9ee3e09481ed 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 3561eff8f57ef..6729910d9cb4a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index b13f28bf69cfd..45400b6c512f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 8797e5ad64149..e64d5f6f3587e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1134,7 +1134,7 @@ class StreamSuite extends StreamTest { verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false) verifyLocalLimit( - inputDF.groupBy().count().limit(1), + inputDF.groupBy("value").count().limit(1), expectStreamingLimit = false, outputMode = OutputMode.Complete()) } From 3695e997d5d436be086235505bbb030c87ae8eef Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 19 Nov 2020 16:56:21 +0000 Subject: [PATCH 0522/1009] [SPARK-33045][SQL] Support build-in function like_all and fix StackOverflowError issue ### What changes were proposed in this pull request? Spark already support `LIKE ALL` syntax, but it will throw `StackOverflowError` if there are many elements(more than 14378 elements). We should implement built-in function for LIKE ALL to fix this issue. Why the stack overflow can happen in the current approach ? The current approach uses reduceLeft to connect each `Like(e, p)`, this will lead the the call depth of the thread is too large, causing `StackOverflowError` problems. Why the fix in this PR can avoid the error? This PR support built-in function for `LIKE ALL` and avoid this issue. ### Why are the changes needed? 1.Fix the `StackOverflowError` issue. 2.Support built-in function `like_all`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #29999 from beliefer/SPARK-33045-like_all. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/dsl/package.scala | 5 ++ .../expressions/regexpExpressions.scala | 84 +++++++++++++++++++ .../sql/catalyst/parser/AstBuilder.scala | 15 +++- .../apache/spark/sql/internal/SQLConf.scala | 14 ++++ .../expressions/RegexpExpressionsSuite.scala | 24 ++++++ .../resources/sql-tests/inputs/like-all.sql | 4 + 6 files changed, 145 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index b61c4b8d065f2..4cd649b07a5c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.{Inner, JoinType} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String /** * A collection of implicit conversions that create a DSL for constructing catalyst data structures. @@ -102,6 +103,10 @@ package object dsl { def like(other: Expression, escapeChar: Char = '\\'): Expression = Like(expr, other, escapeChar) def rlike(other: Expression): Expression = RLike(expr, other) + def likeAll(others: Expression*): Expression = + LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) + def notLikeAll(others: Expression*): Expression = + NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) def contains(other: Expression): Expression = Contains(expr, other) def startsWith(other: Expression): Expression = StartsWith(expr, other) def endsWith(other: Expression): Expression = EndsWith(expr, other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index c9dd7c7acddde..b4d9921488d5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -20,10 +20,12 @@ package org.apache.spark.sql.catalyst.expressions import java.util.Locale import java.util.regex.{Matcher, MatchResult, Pattern} +import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.commons.text.StringEscapeUtils +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -178,6 +180,88 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) } } +/** + * Optimized version of LIKE ALL, when all pattern values are literal. + */ +abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { + + protected def patterns: Seq[UTF8String] + + protected def isNotLikeAll: Boolean + + override def inputTypes: Seq[DataType] = StringType :: Nil + + override def dataType: DataType = BooleanType + + override def nullable: Boolean = true + + private lazy val hasNull: Boolean = patterns.contains(null) + + private lazy val cache = patterns.filterNot(_ == null) + .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\'))) + + private lazy val matchFunc = if (isNotLikeAll) { + (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches() + } else { + (p: Pattern, inputValue: String) => p.matcher(inputValue).matches() + } + + override def eval(input: InternalRow): Any = { + val exprValue = child.eval(input) + if (exprValue == null) { + null + } else { + if (cache.forall(matchFunc(_, exprValue.toString))) { + if (hasNull) null else true + } else { + false + } + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val eval = child.genCode(ctx) + val patternClass = classOf[Pattern].getName + val javaDataType = CodeGenerator.javaType(child.dataType) + val pattern = ctx.freshName("pattern") + val valueArg = ctx.freshName("valueArg") + val patternCache = ctx.addReferenceObj("patternCache", cache.asJava) + + val checkNotMatchCode = if (isNotLikeAll) { + s"$pattern.matcher($valueArg.toString()).matches()" + } else { + s"!$pattern.matcher($valueArg.toString()).matches()" + } + + ev.copy(code = + code""" + |${eval.code} + |boolean ${ev.isNull} = false; + |boolean ${ev.value} = true; + |if (${eval.isNull}) { + | ${ev.isNull} = true; + |} else { + | $javaDataType $valueArg = ${eval.value}; + | for ($patternClass $pattern: $patternCache) { + | if ($checkNotMatchCode) { + | ${ev.value} = false; + | break; + | } + | } + | if (${ev.value} && $hasNull) ${ev.isNull} = true; + |} + """.stripMargin) + } +} + +case class LikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase { + override def isNotLikeAll: Boolean = false +} + +case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase { + override def isNotLikeAll: Boolean = true +} + // scalastyle:off line.contains.tab @ExpressionDescription( usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c3855fe088db6..79857a63a69b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1406,7 +1406,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) => getLikeQuantifierExprs(ctx.expression).reduceLeft(Or) case Some(SqlBaseParser.ALL) => - getLikeQuantifierExprs(ctx.expression).reduceLeft(And) + validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx) + val expressions = ctx.expression.asScala.map(expression) + if (expressions.size > SQLConf.get.optimizerLikeAllConversionThreshold && + expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { + // If there are many pattern expressions, will throw StackOverflowError. + // So we use LikeAll or NotLikeAll instead. + val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) + ctx.NOT match { + case null => LikeAll(e, patterns) + case _ => NotLikeAll(e, patterns) + } + } else { + getLikeQuantifierExprs(ctx.expression).reduceLeft(And) + } case _ => val escapeChar = Option(ctx.escapeChar).map(string).map { str => if (str.length != 1) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 43014feecfd8e..fcf222c8fdab0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -216,6 +216,18 @@ object SQLConf { "for using switch statements in InSet must be non-negative and less than or equal to 600") .createWithDefault(400) + val OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD = + buildConf("spark.sql.optimizer.likeAllConversionThreshold") + .internal() + .doc("Configure the maximum size of the pattern sequence in like all. Spark will convert " + + "the logical combination of like to avoid StackOverflowError. 200 is an empirical value " + + "that will not cause StackOverflowError.") + .version("3.1.0") + .intConf + .checkValue(threshold => threshold >= 0, "The maximum size of pattern sequence " + + "in like all must be non-negative") + .createWithDefault(200) + val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level") .internal() .doc("Configures the log level for logging the change from the original plan to the new " + @@ -3037,6 +3049,8 @@ class SQLConf extends Serializable with Logging { def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD) + def optimizerLikeAllConversionThreshold: Int = getConf(OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD) + def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL) def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 77a32a735f76d..cc5ab5dc7b4e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -48,6 +48,30 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(mkExpr(regex), expected, create_row(input)) // check row input } + test("LIKE ALL") { + checkEvaluation(Literal.create(null, StringType).likeAll("%foo%", "%oo"), null) + checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%oo"), true) + checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%bar%"), false) + checkEvaluation(Literal.create("foo", StringType) + .likeAll("%foo%", Literal.create(null, StringType)), null) + checkEvaluation(Literal.create("foo", StringType) + .likeAll(Literal.create(null, StringType), "%foo%"), null) + checkEvaluation(Literal.create("foo", StringType) + .likeAll("%feo%", Literal.create(null, StringType)), false) + checkEvaluation(Literal.create("foo", StringType) + .likeAll(Literal.create(null, StringType), "%feo%"), false) + checkEvaluation(Literal.create("foo", StringType).notLikeAll("tee", "%yoo%"), true) + checkEvaluation(Literal.create("foo", StringType).notLikeAll("%oo%", "%yoo%"), false) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAll("%foo%", Literal.create(null, StringType)), false) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAll(Literal.create(null, StringType), "%foo%"), false) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAll("%yoo%", Literal.create(null, StringType)), null) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAll(Literal.create(null, StringType), "%yoo%"), null) + } + test("LIKE Pattern") { // null handling diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql index a084dbef61a0c..f83277376e680 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql @@ -1,3 +1,7 @@ +-- test cases for like all +--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=0 +--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=200 + CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES ('google', '%oo%'), ('facebook', '%oo%'), From 6da8ade5f46cac69820ef0f6987806ffa78873f1 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 19 Nov 2020 12:42:33 -0800 Subject: [PATCH 0523/1009] [SPARK-33045][SQL][FOLLOWUP] Fix build failure with Scala 2.13 ### What changes were proposed in this pull request? Explicitly convert `scala.collection.mutable.Buffer` to `Seq`. In Scala 2.13 `Seq` is an alias of `scala.collection.immutable.Seq` instead of `scala.collection.Seq`. ### Why are the changes needed? Without the change build with Scala 2.13 fails with the following: ``` [error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:1417:41: type mismatch; [error] found : scala.collection.mutable.Buffer[org.apache.spark.unsafe.types.UTF8String] [error] required: Seq[org.apache.spark.unsafe.types.UTF8String] [error] case null => LikeAll(e, patterns) [error] ^ [error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:1418:41: type mismatch; [error] found : scala.collection.mutable.Buffer[org.apache.spark.unsafe.types.UTF8String] [error] required: Seq[org.apache.spark.unsafe.types.UTF8String] [error] case _ => NotLikeAll(e, patterns) [error] ^ ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30431 from sunchao/SPARK-33045-followup. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 79857a63a69b5..23de8ab09dd0a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1414,8 +1414,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg // So we use LikeAll or NotLikeAll instead. val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) ctx.NOT match { - case null => LikeAll(e, patterns) - case _ => NotLikeAll(e, patterns) + case null => LikeAll(e, patterns.toSeq) + case _ => NotLikeAll(e, patterns.toSeq) } } else { getLikeQuantifierExprs(ctx.expression).reduceLeft(And) From 883a213a8f721d19855f7a5696084533da2002f7 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Thu, 19 Nov 2020 13:36:45 -0800 Subject: [PATCH 0524/1009] [MINOR] Structured Streaming statistics page indent fix ### What changes were proposed in this pull request? Structured Streaming statistics page code contains an indentation issue. This PR fixes it. ### Why are the changes needed? Indent fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #30434 from gaborgsomogyi/STAT-INDENT-FIX. Authored-by: Gabor Somogyi Signed-off-by: Dongjoon Hyun --- .../ui/StreamingQueryStatisticsPage.scala | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index 77078046dda7c..7d38acfceee81 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -209,33 +209,33 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) {graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)} {graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)} - - -
    -
    Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}
    -
    - - {graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)} - {graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)} - - - -
    -
    Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}
    -
    - - {graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)} - {graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)} - - - -
    -
    Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}
    -
    - - {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} - {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} - + + +
    +
    Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}
    +
    + + {graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)} + {graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}
    +
    + + {graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)} + {graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}
    +
    + + {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} + {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} + // scalastyle:on } else { new NodeBuffer() From 02d410a18c966944c7a46e5bc3006dadf3d579b6 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 20 Nov 2020 13:14:20 +0900 Subject: [PATCH 0525/1009] [MINOR][DOCS] Document 'without' value for HADOOP_VERSION in pip installation ### What changes were proposed in this pull request? I believe it's self-descriptive. ### Why are the changes needed? To document supported features. ### Does this PR introduce _any_ user-facing change? Yes, the docs are updated. It's master only. ### How was this patch tested? Manually built the docs via `cd python/docs` and `make clean html`: ![Screen Shot 2020-11-20 at 10 59 07 AM](https://user-images.githubusercontent.com/6477701/99748225-7ad9b280-2b1f-11eb-86fd-165012b1bb7c.png) Closes #30436 from HyukjinKwon/minor-doc-fix. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/docs/source/getting_started/install.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 4039698d39958..9c9ff7fa7844b 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -48,7 +48,7 @@ If you want to install extra dependencies for a specific componenet, you can ins pip install pyspark[sql] -For PySpark with a different Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below: +For PySpark with/without a specific Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below: .. code-block:: bash @@ -68,8 +68,13 @@ It is recommended to use ``-v`` option in ``pip`` to track the installation and HADOOP_VERSION=2.7 pip install pyspark -v -Supported versions of Hadoop are ``HADOOP_VERSION=2.7`` and ``HADOOP_VERSION=3.2`` (default). -Note that this installation of PySpark with a different version of Hadoop is experimental. It can change or be removed between minor releases. +Supported values in ``HADOOP_VERSION`` are: + +- ``without``: Spark pre-built with user-provided Apache Hadoop +- ``2.7``: Spark pre-built for Apache Hadoop 2.7 +- ``3.2``: Spark pre-built for Apache Hadoop 3.2 and later (default) + +Note that this installation way of PySpark with/without a specific Hadoop version is experimental. It can change or be removed between minor releases. Using Conda From 8218b488035049434271dc9e3bd5af45ffadf0fd Mon Sep 17 00:00:00 2001 From: Venkata krishnan Sowrirajan Date: Fri, 20 Nov 2020 06:00:30 -0600 Subject: [PATCH 0526/1009] [SPARK-32919][SHUFFLE][TEST-MAVEN][TEST-HADOOP2.7] Driver side changes for coordinating push based shuffle by selecting external shuffle services for merging partitions ### What changes were proposed in this pull request? Driver side changes for coordinating push based shuffle by selecting external shuffle services for merging partitions. This PR includes changes related to `ShuffleMapStage` preparation which is selection of merger locations and initializing them as part of `ShuffleDependency`. Currently this code is not used as some of the changes would come subsequently as part of https://issues.apache.org/jira/browse/SPARK-32917 (shuffle blocks push as part of `ShuffleMapTask`), https://issues.apache.org/jira/browse/SPARK-32918 (support for finalize API) and https://issues.apache.org/jira/browse/SPARK-32920 (finalization of push/merge phase). This is why the tests here are also partial, once these above mentioned changes are raised as PR we will have enough tests for DAGScheduler piece of code as well. ### Why are the changes needed? Added a new API in `SchedulerBackend` to get merger locations for push based shuffle. This is currently implemented for Yarn and other cluster managers can have separate implementations which is why a new API is introduced. ### Does this PR introduce _any_ user-facing change? Yes, user facing config to enable push based shuffle is introduced ### How was this patch tested? Added unit tests partially and some of the changes in DAGScheduler depends on future changes, DAGScheduler tests will be added along with those changes. Lead-authored-by: Venkata krishnan Sowrirajan vsowrirajanlinkedin.com Co-authored-by: Min Shen mshenlinkedin.com Closes #30164 from venkata91/upstream-SPARK-32919. Lead-authored-by: Venkata krishnan Sowrirajan Co-authored-by: Min Shen Signed-off-by: Mridul Muralidharan gmail.com> --- .../scala/org/apache/spark/Dependency.scala | 15 +++++ .../spark/internal/config/package.scala | 47 ++++++++++++++ .../apache/spark/scheduler/DAGScheduler.scala | 40 ++++++++++++ .../spark/scheduler/SchedulerBackend.scala | 13 ++++ .../apache/spark/storage/BlockManagerId.scala | 2 + .../spark/storage/BlockManagerMaster.scala | 20 ++++++ .../storage/BlockManagerMasterEndpoint.scala | 65 +++++++++++++++++++ .../spark/storage/BlockManagerMessages.scala | 6 ++ .../scala/org/apache/spark/util/Utils.scala | 8 +++ .../spark/storage/BlockManagerSuite.scala | 49 +++++++++++++- .../org/apache/spark/util/UtilsSuite.scala | 12 ++++ .../cluster/YarnSchedulerBackend.scala | 50 ++++++++++++-- 12 files changed, 320 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala index ba8e4d69ba755..d21b9d9833e9e 100644 --- a/core/src/main/scala/org/apache/spark/Dependency.scala +++ b/core/src/main/scala/org/apache/spark/Dependency.scala @@ -23,6 +23,7 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.{ShuffleHandle, ShuffleWriteProcessor} +import org.apache.spark.storage.BlockManagerId /** * :: DeveloperApi :: @@ -95,6 +96,20 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag]( val shuffleHandle: ShuffleHandle = _rdd.context.env.shuffleManager.registerShuffle( shuffleId, this) + /** + * Stores the location of the list of chosen external shuffle services for handling the + * shuffle merge requests from mappers in this shuffle map stage. + */ + private[spark] var mergerLocs: Seq[BlockManagerId] = Nil + + def setMergerLocs(mergerLocs: Seq[BlockManagerId]): Unit = { + if (mergerLocs != null) { + this.mergerLocs = mergerLocs + } + } + + def getMergerLocs: Seq[BlockManagerId] = mergerLocs + _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this)) _rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId) } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 4bc49514fc5ad..b38d0e5c617b9 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1945,4 +1945,51 @@ package object config { .version("3.0.1") .booleanConf .createWithDefault(false) + + private[spark] val PUSH_BASED_SHUFFLE_ENABLED = + ConfigBuilder("spark.shuffle.push.enabled") + .doc("Set to 'true' to enable push-based shuffle on the client side and this works in " + + "conjunction with the server side flag spark.shuffle.server.mergedShuffleFileManagerImpl " + + "which needs to be set with the appropriate " + + "org.apache.spark.network.shuffle.MergedShuffleFileManager implementation for push-based " + + "shuffle to be enabled") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + + private[spark] val SHUFFLE_MERGER_MAX_RETAINED_LOCATIONS = + ConfigBuilder("spark.shuffle.push.maxRetainedMergerLocations") + .doc("Maximum number of shuffle push merger locations cached for push based shuffle. " + + "Currently, shuffle push merger locations are nothing but external shuffle services " + + "which are responsible for handling pushed blocks and merging them and serving " + + "merged blocks for later shuffle fetch.") + .version("3.1.0") + .intConf + .createWithDefault(500) + + private[spark] val SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO = + ConfigBuilder("spark.shuffle.push.mergersMinThresholdRatio") + .doc("The minimum number of shuffle merger locations required to enable push based " + + "shuffle for a stage. This is specified as a ratio of the number of partitions in " + + "the child stage. For example, a reduce stage which has 100 partitions and uses the " + + "default value 0.05 requires at least 5 unique merger locations to enable push based " + + "shuffle. Merger locations are currently defined as external shuffle services.") + .version("3.1.0") + .doubleConf + .createWithDefault(0.05) + + private[spark] val SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD = + ConfigBuilder("spark.shuffle.push.mergersMinStaticThreshold") + .doc(s"The static threshold for number of shuffle push merger locations should be " + + "available in order to enable push based shuffle for a stage. Note this config " + + s"works in conjunction with ${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key}. " + + "Maximum of spark.shuffle.push.mergersMinStaticThreshold and " + + s"${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key} ratio number of mergers needed to " + + "enable push based shuffle for a stage. For eg: with 1000 partitions for the child " + + "stage with spark.shuffle.push.mergersMinStaticThreshold as 5 and " + + s"${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key} set to 0.05, we would need " + + "at least 50 mergers to enable push based shuffle for that stage.") + .version("3.1.0") + .doubleConf + .createWithDefault(5) } diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 13b766e654832..6fb0fb93f253b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -249,6 +249,8 @@ private[spark] class DAGScheduler( private[spark] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this) taskScheduler.setDAGScheduler(this) + private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(sc.getConf) + /** * Called by the TaskSetManager to report task's starting. */ @@ -1252,6 +1254,33 @@ private[spark] class DAGScheduler( execCores.map(cores => properties.setProperty(EXECUTOR_CORES_LOCAL_PROPERTY, cores)) } + /** + * If push based shuffle is enabled, set the shuffle services to be used for the given + * shuffle map stage for block push/merge. + * + * Even with dynamic resource allocation kicking in and significantly reducing the number + * of available active executors, we would still be able to get sufficient shuffle service + * locations for block push/merge by getting the historical locations of past executors. + */ + private def prepareShuffleServicesForShuffleMapStage(stage: ShuffleMapStage): Unit = { + // TODO(SPARK-32920) Handle stage reuse/retry cases separately as without finalize + // TODO changes we cannot disable shuffle merge for the retry/reuse cases + val mergerLocs = sc.schedulerBackend.getShufflePushMergerLocations( + stage.shuffleDep.partitioner.numPartitions, stage.resourceProfileId) + + if (mergerLocs.nonEmpty) { + stage.shuffleDep.setMergerLocs(mergerLocs) + logInfo(s"Push-based shuffle enabled for $stage (${stage.name}) with" + + s" ${stage.shuffleDep.getMergerLocs.size} merger locations") + + logDebug("List of shuffle push merger locations " + + s"${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}") + } else { + logInfo("No available merger locations." + + s" Push-based shuffle disabled for $stage (${stage.name})") + } + } + /** Called when stage's parents are available and we can now do its task. */ private def submitMissingTasks(stage: Stage, jobId: Int): Unit = { logDebug("submitMissingTasks(" + stage + ")") @@ -1281,6 +1310,12 @@ private[spark] class DAGScheduler( stage match { case s: ShuffleMapStage => outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1) + // Only generate merger location for a given shuffle dependency once. This way, even if + // this stage gets retried, it would still be merging blocks using the same set of + // shuffle services. + if (pushBasedShuffleEnabled) { + prepareShuffleServicesForShuffleMapStage(s) + } case s: ResultStage => outputCommitCoordinator.stageStart( stage = s.id, maxPartitionId = s.rdd.partitions.length - 1) @@ -2027,6 +2062,11 @@ private[spark] class DAGScheduler( if (!executorFailureEpoch.contains(execId) || executorFailureEpoch(execId) < currentEpoch) { executorFailureEpoch(execId) = currentEpoch logInfo(s"Executor lost: $execId (epoch $currentEpoch)") + if (pushBasedShuffleEnabled) { + // Remove fetchFailed host in the shuffle push merger list for push based shuffle + hostToUnregisterOutputs.foreach( + host => blockManagerMaster.removeShufflePushMergerLocation(host)) + } blockManagerMaster.removeExecutor(execId) clearCacheLocs() } diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala index a566d0a04387c..b2acdb3e12a6d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala @@ -18,6 +18,7 @@ package org.apache.spark.scheduler import org.apache.spark.resource.ResourceProfile +import org.apache.spark.storage.BlockManagerId /** * A backend interface for scheduling systems that allows plugging in different ones under @@ -92,4 +93,16 @@ private[spark] trait SchedulerBackend { */ def maxNumConcurrentTasks(rp: ResourceProfile): Int + /** + * Get the list of host locations for push based shuffle + * + * Currently push based shuffle is disabled for both stage retry and stage reuse cases + * (for eg: in the case where few partitions are lost due to failure). Hence this method + * should be invoked only once for a ShuffleDependency. + * @return List of external shuffle services locations + */ + def getShufflePushMergerLocations( + numPartitions: Int, + resourceProfileId: Int): Seq[BlockManagerId] = Nil + } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala index 49e32d04d450a..c6a4457d8f910 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala @@ -145,4 +145,6 @@ private[spark] object BlockManagerId { def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = { blockManagerIdCache.get(id) } + + private[spark] val SHUFFLE_MERGER_IDENTIFIER = "shuffle-push-merger" } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala index f544d47b8e13c..fe1a5aef9499c 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala @@ -125,6 +125,26 @@ class BlockManagerMaster( driverEndpoint.askSync[Seq[BlockManagerId]](GetPeers(blockManagerId)) } + /** + * Get a list of unique shuffle service locations where an executor is successfully + * registered in the past for block push/merge with push based shuffle. + */ + def getShufflePushMergerLocations( + numMergersNeeded: Int, + hostsToFilter: Set[String]): Seq[BlockManagerId] = { + driverEndpoint.askSync[Seq[BlockManagerId]]( + GetShufflePushMergerLocations(numMergersNeeded, hostsToFilter)) + } + + /** + * Remove the host from the candidate list of shuffle push mergers. This can be + * triggered if there is a FetchFailedException on the host + * @param host + */ + def removeShufflePushMergerLocation(host: String): Unit = { + driverEndpoint.askSync[Seq[BlockManagerId]](RemoveShufflePushMergerLocation(host)) + } + def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = { driverEndpoint.askSync[Option[RpcEndpointRef]](GetExecutorEndpointRef(executorId)) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index a7532a9870fae..4d565511704d4 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -74,6 +74,14 @@ class BlockManagerMasterEndpoint( // Mapping from block id to the set of block managers that have the block. private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]] + // Mapping from host name to shuffle (mergers) services where the current app + // registered an executor in the past. Older hosts are removed when the + // maxRetainedMergerLocations size is reached in favor of newer locations. + private val shuffleMergerLocations = new mutable.LinkedHashMap[String, BlockManagerId]() + + // Maximum number of merger locations to cache + private val maxRetainedMergerLocations = conf.get(config.SHUFFLE_MERGER_MAX_RETAINED_LOCATIONS) + private val askThreadPool = ThreadUtils.newDaemonCachedThreadPool("block-manager-ask-thread-pool", 100) private implicit val askExecutionContext = ExecutionContext.fromExecutorService(askThreadPool) @@ -92,6 +100,8 @@ class BlockManagerMasterEndpoint( val defaultRpcTimeout = RpcUtils.askRpcTimeout(conf) + private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(conf) + logInfo("BlockManagerMasterEndpoint up") // same as `conf.get(config.SHUFFLE_SERVICE_ENABLED) // && conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)` @@ -139,6 +149,12 @@ class BlockManagerMasterEndpoint( case GetBlockStatus(blockId, askStorageEndpoints) => context.reply(blockStatus(blockId, askStorageEndpoints)) + case GetShufflePushMergerLocations(numMergersNeeded, hostsToFilter) => + context.reply(getShufflePushMergerLocations(numMergersNeeded, hostsToFilter)) + + case RemoveShufflePushMergerLocation(host) => + context.reply(removeShufflePushMergerLocation(host)) + case IsExecutorAlive(executorId) => context.reply(blockManagerIdByExecutor.contains(executorId)) @@ -360,6 +376,17 @@ class BlockManagerMasterEndpoint( } + private def addMergerLocation(blockManagerId: BlockManagerId): Unit = { + if (!blockManagerId.isDriver && !shuffleMergerLocations.contains(blockManagerId.host)) { + val shuffleServerId = BlockManagerId(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER, + blockManagerId.host, externalShuffleServicePort) + if (shuffleMergerLocations.size >= maxRetainedMergerLocations) { + shuffleMergerLocations -= shuffleMergerLocations.head._1 + } + shuffleMergerLocations(shuffleServerId.host) = shuffleServerId + } + } + private def removeExecutor(execId: String): Unit = { logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.") blockManagerIdByExecutor.get(execId).foreach(removeBlockManager) @@ -526,6 +553,10 @@ class BlockManagerMasterEndpoint( blockManagerInfo(id) = new BlockManagerInfo(id, System.currentTimeMillis(), maxOnHeapMemSize, maxOffHeapMemSize, storageEndpoint, externalShuffleServiceBlockStatus) + + if (pushBasedShuffleEnabled) { + addMergerLocation(id) + } } listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize, Some(maxOnHeapMemSize), Some(maxOffHeapMemSize))) @@ -657,6 +688,40 @@ class BlockManagerMasterEndpoint( } } + private def getShufflePushMergerLocations( + numMergersNeeded: Int, + hostsToFilter: Set[String]): Seq[BlockManagerId] = { + val blockManagerHosts = blockManagerIdByExecutor.values.map(_.host).toSet + val filteredBlockManagerHosts = blockManagerHosts.filterNot(hostsToFilter.contains(_)) + val filteredMergersWithExecutors = filteredBlockManagerHosts.map( + BlockManagerId(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER, _, externalShuffleServicePort)) + // Enough mergers are available as part of active executors list + if (filteredMergersWithExecutors.size >= numMergersNeeded) { + filteredMergersWithExecutors.toSeq + } else { + // Delta mergers added from inactive mergers list to the active mergers list + val filteredMergersWithExecutorsHosts = filteredMergersWithExecutors.map(_.host) + val filteredMergersWithoutExecutors = shuffleMergerLocations.values + .filterNot(x => hostsToFilter.contains(x.host)) + .filterNot(x => filteredMergersWithExecutorsHosts.contains(x.host)) + val randomFilteredMergersLocations = + if (filteredMergersWithoutExecutors.size > + numMergersNeeded - filteredMergersWithExecutors.size) { + Utils.randomize(filteredMergersWithoutExecutors) + .take(numMergersNeeded - filteredMergersWithExecutors.size) + } else { + filteredMergersWithoutExecutors + } + filteredMergersWithExecutors.toSeq ++ randomFilteredMergersLocations + } + } + + private def removeShufflePushMergerLocation(host: String): Unit = { + if (shuffleMergerLocations.contains(host)) { + shuffleMergerLocations.remove(host) + } + } + /** * Returns an [[RpcEndpointRef]] of the [[BlockManagerReplicaEndpoint]] for sending RPC messages. */ diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala index bbc076cea9ba8..afe416a55ed0d 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala @@ -141,4 +141,10 @@ private[spark] object BlockManagerMessages { case class BlockManagerHeartbeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster case class IsExecutorAlive(executorId: String) extends ToBlockManagerMaster + + case class GetShufflePushMergerLocations(numMergersNeeded: Int, hostsToFilter: Set[String]) + extends ToBlockManagerMaster + + case class RemoveShufflePushMergerLocation(host: String) extends ToBlockManagerMaster + } diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index b743ab6507117..6ccf65b737c1a 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2541,6 +2541,14 @@ private[spark] object Utils extends Logging { master == "local" || master.startsWith("local[") } + /** + * Push based shuffle can only be enabled when external shuffle service is enabled. + */ + def isPushBasedShuffleEnabled(conf: SparkConf): Boolean = { + conf.get(PUSH_BASED_SHUFFLE_ENABLED) && + (conf.get(IS_TESTING).getOrElse(false) || conf.get(SHUFFLE_SERVICE_ENABLED)) + } + /** * Return whether dynamic allocation is enabled in the given conf. */ diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 55280fc578310..144489c5f7922 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -100,6 +100,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE .set(Kryo.KRYO_SERIALIZER_BUFFER_SIZE.key, "1m") .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L) .set(Network.RPC_ASK_TIMEOUT, "5s") + .set(PUSH_BASED_SHUFFLE_ENABLED, true) } private def makeSortShuffleManager(): SortShuffleManager = { @@ -1974,6 +1975,48 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE } } + test("SPARK-32919: Shuffle push merger locations should be bounded with in" + + " spark.shuffle.push.retainedMergerLocations") { + assert(master.getShufflePushMergerLocations(10, Set.empty).isEmpty) + makeBlockManager(100, "execA", + transferService = Some(new MockBlockTransferService(10, "hostA"))) + makeBlockManager(100, "execB", + transferService = Some(new MockBlockTransferService(10, "hostB"))) + makeBlockManager(100, "execC", + transferService = Some(new MockBlockTransferService(10, "hostC"))) + makeBlockManager(100, "execD", + transferService = Some(new MockBlockTransferService(10, "hostD"))) + makeBlockManager(100, "execE", + transferService = Some(new MockBlockTransferService(10, "hostA"))) + assert(master.getShufflePushMergerLocations(10, Set.empty).size == 4) + assert(master.getShufflePushMergerLocations(10, Set.empty).map(_.host).sorted === + Seq("hostC", "hostD", "hostA", "hostB").sorted) + assert(master.getShufflePushMergerLocations(10, Set("hostB")).size == 3) + } + + test("SPARK-32919: Prefer active executor locations for shuffle push mergers") { + makeBlockManager(100, "execA", + transferService = Some(new MockBlockTransferService(10, "hostA"))) + makeBlockManager(100, "execB", + transferService = Some(new MockBlockTransferService(10, "hostB"))) + makeBlockManager(100, "execC", + transferService = Some(new MockBlockTransferService(10, "hostC"))) + makeBlockManager(100, "execD", + transferService = Some(new MockBlockTransferService(10, "hostD"))) + makeBlockManager(100, "execE", + transferService = Some(new MockBlockTransferService(10, "hostA"))) + assert(master.getShufflePushMergerLocations(5, Set.empty).size == 4) + + master.removeExecutor("execA") + master.removeExecutor("execE") + + assert(master.getShufflePushMergerLocations(3, Set.empty).size == 3) + assert(master.getShufflePushMergerLocations(3, Set.empty).map(_.host).sorted === + Seq("hostC", "hostB", "hostD").sorted) + assert(master.getShufflePushMergerLocations(4, Set.empty).map(_.host).sorted === + Seq("hostB", "hostA", "hostC", "hostD").sorted) + } + test("SPARK-33387 Support ordered shuffle block migration") { val blocks: Seq[ShuffleBlockInfo] = Seq( ShuffleBlockInfo(1, 0L), @@ -1995,7 +2038,9 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE assert(sortedBlocks.sameElements(decomManager.shufflesToMigrate.asScala.map(_._1))) } - class MockBlockTransferService(val maxFailures: Int) extends BlockTransferService { + class MockBlockTransferService( + val maxFailures: Int, + override val hostName: String = "MockBlockTransferServiceHost") extends BlockTransferService { var numCalls = 0 var tempFileManager: DownloadFileManager = null @@ -2013,8 +2058,6 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE override def close(): Unit = {} - override def hostName: String = { "MockBlockTransferServiceHost" } - override def port: Int = { 63332 } override def uploadBlock( diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 20624c743bc22..8fb408041ca9d 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.{SparkConf, SparkException, SparkFunSuite, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ +import org.apache.spark.internal.config.Tests.IS_TESTING import org.apache.spark.network.util.ByteUnit import org.apache.spark.scheduler.SparkListener import org.apache.spark.util.io.ChunkedByteBufferInputStream @@ -1432,6 +1433,17 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { }.getMessage assert(message.contains(expected)) } + + test("isPushBasedShuffleEnabled when both PUSH_BASED_SHUFFLE_ENABLED" + + " and SHUFFLE_SERVICE_ENABLED are true") { + val conf = new SparkConf() + assert(Utils.isPushBasedShuffleEnabled(conf) === false) + conf.set(PUSH_BASED_SHUFFLE_ENABLED, true) + conf.set(IS_TESTING, false) + assert(Utils.isPushBasedShuffleEnabled(conf) === false) + conf.set(SHUFFLE_SERVICE_ENABLED, true) + assert(Utils.isPushBasedShuffleEnabled(conf) === true) + } } private class SimpleExtension diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala index b42bdb9816600..22002bb32004d 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala @@ -18,7 +18,7 @@ package org.apache.spark.scheduler.cluster import java.util.EnumSet -import java.util.concurrent.atomic.{AtomicBoolean} +import java.util.concurrent.atomic.AtomicBoolean import javax.servlet.DispatcherType import scala.concurrent.{ExecutionContext, Future} @@ -29,14 +29,14 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId} import org.apache.spark.SparkContext import org.apache.spark.deploy.security.HadoopDelegationTokenManager -import org.apache.spark.internal.Logging -import org.apache.spark.internal.config +import org.apache.spark.internal.{config, Logging} import org.apache.spark.internal.config.UI._ import org.apache.spark.resource.ResourceProfile import org.apache.spark.rpc._ import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ -import org.apache.spark.util.{RpcUtils, ThreadUtils} +import org.apache.spark.storage.{BlockManagerId, BlockManagerMaster} +import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils} /** * Abstract Yarn scheduler backend that contains common logic @@ -80,6 +80,18 @@ private[spark] abstract class YarnSchedulerBackend( /** Attempt ID. This is unset for client-mode schedulers */ private var attemptId: Option[ApplicationAttemptId] = None + private val blockManagerMaster: BlockManagerMaster = sc.env.blockManager.master + + private val minMergersThresholdRatio = + conf.get(config.SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO) + + private val minMergersStaticThreshold = + conf.get(config.SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD) + + private val maxNumExecutors = conf.get(config.DYN_ALLOCATION_MAX_EXECUTORS) + + private val numExecutors = conf.get(config.EXECUTOR_INSTANCES).getOrElse(0) + /** * Bind to YARN. This *must* be done before calling [[start()]]. * @@ -161,6 +173,36 @@ private[spark] abstract class YarnSchedulerBackend( totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio } + override def getShufflePushMergerLocations( + numPartitions: Int, + resourceProfileId: Int): Seq[BlockManagerId] = { + // TODO (SPARK-33481) This is a naive way of calculating numMergersDesired for a stage, + // TODO we can use better heuristics to calculate numMergersDesired for a stage. + val maxExecutors = if (Utils.isDynamicAllocationEnabled(sc.getConf)) { + maxNumExecutors + } else { + numExecutors + } + val tasksPerExecutor = sc.resourceProfileManager + .resourceProfileFromId(resourceProfileId).maxTasksPerExecutor(sc.conf) + val numMergersDesired = math.min( + math.max(1, math.ceil(numPartitions / tasksPerExecutor).toInt), maxExecutors) + val minMergersNeeded = math.max(minMergersStaticThreshold, + math.floor(numMergersDesired * minMergersThresholdRatio).toInt) + + // Request for numMergersDesired shuffle mergers to BlockManagerMasterEndpoint + // and if it's less than minMergersNeeded, we disable push based shuffle. + val mergerLocations = blockManagerMaster + .getShufflePushMergerLocations(numMergersDesired, scheduler.excludedNodes()) + if (mergerLocations.size < numMergersDesired && mergerLocations.size < minMergersNeeded) { + Seq.empty[BlockManagerId] + } else { + logDebug(s"The number of shuffle mergers desired ${numMergersDesired}" + + s" and available locations are ${mergerLocations.length}") + mergerLocations + } + } + /** * Add filters to the SparkUI. */ From 2289389821a23e5b5badabfb4e62c427de2554a5 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 20 Nov 2020 21:27:41 +0900 Subject: [PATCH 0527/1009] [SPARK-33441][BUILD][FOLLOWUP] Make unused-imports check for SBT specific ### What changes were proposed in this pull request? Move "unused-imports" check config to `SparkBuild.scala` and make it SBT specific. ### Why are the changes needed? Make unused-imports check for SBT specific. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30441 from LuciferYang/SPARK-33441-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- pom.xml | 5 +---- project/SparkBuild.scala | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 3ae2e7420e154..85cf5a00b0b24 100644 --- a/pom.xml +++ b/pom.xml @@ -164,7 +164,6 @@ 3.2.2 2.12.10 2.12 - -Ywarn-unused-import 2.0.0 --test @@ -2538,7 +2537,6 @@ -deprecation -feature -explaintypes - ${scalac.arg.unused-imports} -target:jvm-1.8 @@ -3262,13 +3260,12 @@ - + scala-2.13 2.13.3 2.13 - -Wconf:cat=unused-imports:e diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 55c87fcb3aaa2..05413b7091ad9 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -221,6 +221,7 @@ object SparkBuild extends PomBuild { Seq( "-Xfatal-warnings", "-deprecation", + "-Ywarn-unused-import", "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them ) } else { @@ -230,6 +231,8 @@ object SparkBuild extends PomBuild { // see `scalac -Wconf:help` for details "-Wconf:cat=deprecation:wv,any:e", // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately + // TODO(SPARK-33499): Enable this option when Scala 2.12 is no longer supported. + // "-Wunused:imports", "-Wconf:cat=lint-multiarg-infix:wv", "-Wconf:cat=other-nullary-override:wv", "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv", From 870d4095336f29f5bef77b9232d6cb9d025987dd Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 20 Nov 2020 12:53:45 +0000 Subject: [PATCH 0528/1009] [SPARK-32512][SQL][TESTS][FOLLOWUP] Remove duplicate tests for ALTER TABLE .. PARTITIONS from DataSourceV2SQLSuite ### What changes were proposed in this pull request? Remove tests from `DataSourceV2SQLSuite` that were copied to `AlterTablePartitionV2SQLSuite` by https://github.com/apache/spark/pull/29339. ### Why are the changes needed? - To reduce tests execution time - To improve test maintenance ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the modified tests: ``` $ build/sbt "test:testOnly *DataSourceV2SQLSuite" $ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite" ``` Closes #30444 from MaxGekk/dedup-tests-AlterTablePartitionV2SQLSuite. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/connector/DataSourceV2SQLSuite.scala | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index ddafa1bb5070a..0057415ff6e1d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -24,7 +24,6 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ @@ -43,7 +42,6 @@ class DataSourceV2SQLSuite with AlterTableTests with DatasourceV2SQLBase { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ private val v2Source = classOf[FakeV2Provider].getName override protected val v2Format = v2Source @@ -1980,57 +1978,6 @@ class DataSourceV2SQLSuite } } - test("ALTER TABLE RECOVER PARTITIONS") { - val t = "testcat.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE $t RECOVER PARTITIONS") - } - assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables")) - } - } - - test("ALTER TABLE ADD PARTITION") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - - val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) - assert(partMetadata.containsKey("location")) - assert(partMetadata.get("location") == "loc") - } - } - - test("ALTER TABLE RENAME PARTITION") { - val t = "testcat.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)") - } - assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables")) - } - } - - test("ALTER TABLE DROP PARTITION") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") - - val partTable = - catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) - } - } - test("ALTER TABLE SerDe properties") { val t = "testcat.ns1.ns2.tbl" withTable(t) { From cbc8be24c896ed25be63ef9a111ff015af4fabec Mon Sep 17 00:00:00 2001 From: liucht Date: Fri, 20 Nov 2020 22:19:35 +0900 Subject: [PATCH 0529/1009] [SPARK-33422][DOC] Fix the correct display of left menu item ### What changes were proposed in this pull request? Limit the height of the menu area on the left to display vertical scroll bar ### Why are the changes needed? The bottom menu item cannot be displayed when the left menu tree is long ### Does this PR introduce any user-facing change? Yes, if the menu item shows more, you'll see it by pulling down the vertical scroll bar before: ![image](https://user-images.githubusercontent.com/28332082/98805115-16995d80-2452-11eb-933a-3b72c14bea78.png) after: ![image](https://user-images.githubusercontent.com/28332082/98805418-7e4fa880-2452-11eb-9a9b-8d265078297c.png) ### How was this patch tested? NA Closes #30335 from liucht-inspur/master. Authored-by: liucht Signed-off-by: HyukjinKwon --- docs/css/main.css | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/css/main.css b/docs/css/main.css index 8168a46f9a437..8b279a157c2b6 100755 --- a/docs/css/main.css +++ b/docs/css/main.css @@ -162,6 +162,7 @@ body .container-wrapper { margin-right: auto; border-radius: 15px; position: relative; + min-height: 100vh; } .title { @@ -264,6 +265,7 @@ a:hover code { max-width: 914px; line-height: 1.6; /* Inspired by Github's wiki style */ padding-left: 30px; + min-height: 100vh; } .dropdown-menu { @@ -325,6 +327,7 @@ a.anchorjs-link:hover { text-decoration: none; } border-bottom-width: 0px; margin-top: 0px; width: 210px; + height: 80%; float: left; position: fixed; overflow-y: scroll; From 3384bda453d0e728be311ce458e00d70d2484973 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 20 Nov 2020 13:23:08 +0000 Subject: [PATCH 0530/1009] [SPARK-33468][SQL] ParseUrl in ANSI mode should fail if input string is not a valid url ### What changes were proposed in this pull request? With `ParseUrl`, instead of return null we throw exception if input string is not a vaild url. ### Why are the changes needed? For ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, user will get exception if `set spark.sql.ansi.enabled=true`. ### How was this patch tested? Add test. Closes #30399 from ulysses-you/SPARK-33468. Lead-authored-by: ulysses Co-authored-by: ulysses-you Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 1 + .../catalyst/expressions/stringExpressions.scala | 7 +++++-- .../expressions/StringExpressionsSuite.scala | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index fd7208615a09f..870ed0aa0daaa 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -135,6 +135,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. ### SQL Operators diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 16e22940495f1..9f92181b34df1 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1357,8 +1357,9 @@ object ParseUrl { 1 """, since = "2.0.0") -case class ParseUrl(children: Seq[Expression]) +case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression with ExpectsInputTypes with CodegenFallback { + def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled) override def nullable: Boolean = true override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType) @@ -1404,7 +1405,9 @@ case class ParseUrl(children: Seq[Expression]) try { new URI(url.toString) } catch { - case e: URISyntaxException => null + case e: URISyntaxException if failOnError => + throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e) + case _: URISyntaxException => null } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index a1b6cec24f23f..730574a4b9846 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -943,6 +943,20 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil) } + test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val msg = intercept[IllegalArgumentException] { + evaluateWithoutCodegen( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST"))) + }.getMessage + assert(msg.contains("Find an invaild url string")) + } + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + checkEvaluation( + ParseUrl(Seq("https://a.b.c/index.php?params1=a|b¶ms2=x", "HOST")), null) + } + } + test("Sentences") { val nullString = Literal.create(null, StringType) checkEvaluation(Sentences(nullString, nullString, nullString), null) From 47326ac1c6a296a84af76d832061741740ae9f12 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 20 Nov 2020 08:40:14 -0800 Subject: [PATCH 0531/1009] [SPARK-28704][SQL][TEST] Add back Skiped HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+ ### What changes were proposed in this pull request? We skip test HiveExternalCatalogVersionsSuite when testing with JAVA_9 or later because our previous version does not support JAVA_9 or later. We now add it back since we have a version supports JAVA_9 or later. ### Why are the changes needed? To recover test coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Check CI logs. Closes #30428 from AngersZhuuuu/SPARK-28704. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- .../HiveExternalCatalogVersionsSuite.scala | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 38a8c492d77a7..4cafd3e8ca626 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -52,7 +52,6 @@ import org.apache.spark.util.Utils @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { import HiveExternalCatalogVersionsSuite._ - private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse") private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data") // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to @@ -149,7 +148,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } - private def prepare(): Unit = { + override def beforeAll(): Unit = { + super.beforeAll() + val tempPyFile = File.createTempFile("test", ".py") // scalastyle:off line.size.limit Files.write(tempPyFile.toPath, @@ -199,7 +200,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -211,23 +212,14 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tempPyFile.delete() } - override def beforeAll(): Unit = { - super.beforeAll() - if (!isTestAtLeastJava9) { - prepare() - } - } - test("backward compatibility") { - // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+ - assume(!isTestAtLeastJava9) val args = Seq( "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"), "--name", "HiveExternalCatalog backward compatibility test", "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", @@ -252,7 +244,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { // do not throw exception during object initialization. case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } - versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + versions + .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) } protected var spark: SparkSession = _ From 116b7b72a1980a0768413329f28591f772822827 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 20 Nov 2020 11:35:34 -0600 Subject: [PATCH 0532/1009] [SPARK-33466][ML][PYTHON] Imputer support mode(most_frequent) strategy ### What changes were proposed in this pull request? impl a new strategy `mode`: replace missing using the most frequent value along each column. ### Why are the changes needed? it is highly scalable, and had been a function in [sklearn.impute.SimpleImputer](https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html#sklearn.impute.SimpleImputer) for a long time. ### Does this PR introduce _any_ user-facing change? Yes, a new strategy is added ### How was this patch tested? updated testsuites Closes #30397 from zhengruifeng/imputer_max_freq. Lead-authored-by: Ruifeng Zheng Co-authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../org/apache/spark/ml/feature/Imputer.scala | 49 ++-- .../spark/ml/feature/ImputerSuite.scala | 211 ++++++++++-------- python/pyspark/ml/feature.py | 5 +- 3 files changed, 144 insertions(+), 121 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index ad1010da5c104..03ebe0299f63f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -39,14 +39,16 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp * The imputation strategy. Currently only "mean" and "median" are supported. * If "mean", then replace missing values using the mean value of the feature. * If "median", then replace missing values using the approximate median value of the feature. + * If "mode", then replace missing using the most frequent value of the feature. * Default: mean * * @group param */ final val strategy: Param[String] = new Param(this, "strategy", s"strategy for imputation. " + s"If ${Imputer.mean}, then replace missing values using the mean value of the feature. " + - s"If ${Imputer.median}, then replace missing values using the median value of the feature.", - ParamValidators.inArray[String](Array(Imputer.mean, Imputer.median))) + s"If ${Imputer.median}, then replace missing values using the median value of the feature. " + + s"If ${Imputer.mode}, then replace missing values using the most frequent value of " + + s"the feature.", ParamValidators.inArray[String](Imputer.supportedStrategies)) /** @group getParam */ def getStrategy: String = $(strategy) @@ -104,7 +106,7 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp * For example, if the input column is IntegerType (1, 2, 4, null), * the output will be IntegerType (1, 2, 4, 2) after mean imputation. * - * Note that the mean/median value is computed after filtering out missing values. + * Note that the mean/median/mode value is computed after filtering out missing values. * All Null values in the input columns are treated as missing, and so are also imputed. For * computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001. */ @@ -132,7 +134,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String) def setOutputCols(value: Array[String]): this.type = set(outputCols, value) /** - * Imputation strategy. Available options are ["mean", "median"]. + * Imputation strategy. Available options are ["mean", "median", "mode"]. * @group setParam */ @Since("2.2.0") @@ -151,39 +153,42 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String) val spark = dataset.sparkSession val (inputColumns, _) = getInOutCols() - val cols = inputColumns.map { inputCol => when(col(inputCol).equalTo($(missingValue)), null) .when(col(inputCol).isNaN, null) .otherwise(col(inputCol)) - .cast("double") + .cast(DoubleType) .as(inputCol) } + val numCols = cols.length val results = $(strategy) match { case Imputer.mean => // Function avg will ignore null automatically. // For a column only containing null, avg will return null. val row = dataset.select(cols.map(avg): _*).head() - Array.range(0, inputColumns.length).map { i => - if (row.isNullAt(i)) { - Double.NaN - } else { - row.getDouble(i) - } - } + Array.tabulate(numCols)(i => if (row.isNullAt(i)) Double.NaN else row.getDouble(i)) case Imputer.median => // Function approxQuantile will ignore null automatically. // For a column only containing null, approxQuantile will return an empty array. dataset.select(cols: _*).stat.approxQuantile(inputColumns, Array(0.5), $(relativeError)) - .map { array => - if (array.isEmpty) { - Double.NaN - } else { - array.head - } - } + .map(_.headOption.getOrElse(Double.NaN)) + + case Imputer.mode => + import spark.implicits._ + // If there is more than one mode, choose the smallest one to keep in line + // with sklearn.impute.SimpleImputer (using scipy.stats.mode). + val modes = dataset.select(cols: _*).flatMap { row => + // Ignore null. + Iterator.range(0, numCols) + .flatMap(i => if (row.isNullAt(i)) None else Some((i, row.getDouble(i)))) + }.toDF("index", "value") + .groupBy("index", "value").agg(negate(count(lit(0))).as("negative_count")) + .groupBy("index").agg(min(struct("negative_count", "value")).as("mode")) + .select("index", "mode.value") + .as[(Int, Double)].collect().toMap + Array.tabulate(numCols)(i => modes.getOrElse(i, Double.NaN)) } val emptyCols = inputColumns.zip(results).filter(_._2.isNaN).map(_._1) @@ -212,6 +217,10 @@ object Imputer extends DefaultParamsReadable[Imputer] { /** strategy names that Imputer currently supports. */ private[feature] val mean = "mean" private[feature] val median = "median" + private[feature] val mode = "mode" + + /* Set of strategies that Imputer supports */ + private[feature] val supportedStrategies = Array(mean, median, mode) @Since("2.2.0") override def load(path: String): Imputer = super.load(path) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala index dfee2b4029c8b..30887f55638f9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala @@ -28,13 +28,14 @@ import org.apache.spark.sql.types._ class ImputerSuite extends MLTest with DefaultReadWriteTest { test("Imputer for Double with default missing Value NaN") { - val df = spark.createDataFrame( Seq( - (0, 1.0, 4.0, 1.0, 1.0, 4.0, 4.0), - (1, 11.0, 12.0, 11.0, 11.0, 12.0, 12.0), - (2, 3.0, Double.NaN, 3.0, 3.0, 10.0, 12.0), - (3, Double.NaN, 14.0, 5.0, 3.0, 14.0, 14.0) - )).toDF("id", "value1", "value2", "expected_mean_value1", "expected_median_value1", - "expected_mean_value2", "expected_median_value2") + val df = spark.createDataFrame(Seq( + (0, 1.0, 4.0, 1.0, 1.0, 1.0, 4.0, 4.0, 4.0), + (1, 11.0, 12.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0), + (2, 3.0, Double.NaN, 3.0, 3.0, 3.0, 10.0, 12.0, 4.0), + (3, Double.NaN, 14.0, 5.0, 3.0, 1.0, 14.0, 14.0, 14.0) + )).toDF("id", "value1", "value2", + "expected_mean_value1", "expected_median_value1", "expected_mode_value1", + "expected_mean_value2", "expected_median_value2", "expected_mode_value2") val imputer = new Imputer() .setInputCols(Array("value1", "value2")) .setOutputCols(Array("out1", "out2")) @@ -42,23 +43,25 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Single Column: Imputer for Double with default missing Value NaN") { - val df1 = spark.createDataFrame( Seq( - (0, 1.0, 1.0, 1.0), - (1, 11.0, 11.0, 11.0), - (2, 3.0, 3.0, 3.0), - (3, Double.NaN, 5.0, 3.0) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df1 = spark.createDataFrame(Seq( + (0, 1.0, 1.0, 1.0, 1.0), + (1, 11.0, 11.0, 11.0, 11.0), + (2, 3.0, 3.0, 3.0, 3.0), + (3, Double.NaN, 5.0, 3.0, 1.0) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer1 = new Imputer() .setInputCol("value") .setOutputCol("out") ImputerSuite.iterateStrategyTest(false, imputer1, df1) - val df2 = spark.createDataFrame( Seq( - (0, 4.0, 4.0, 4.0), - (1, 12.0, 12.0, 12.0), - (2, Double.NaN, 10.0, 12.0), - (3, 14.0, 14.0, 14.0) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df2 = spark.createDataFrame(Seq( + (0, 4.0, 4.0, 4.0, 4.0), + (1, 12.0, 12.0, 12.0, 12.0), + (2, Double.NaN, 10.0, 12.0, 4.0), + (3, 14.0, 14.0, 14.0, 14.0) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer2 = new Imputer() .setInputCol("value") .setOutputCol("out") @@ -66,12 +69,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Imputer should handle NaNs when computing surrogate value, if missingValue is not NaN") { - val df = spark.createDataFrame( Seq( - (0, 1.0, 1.0, 1.0), - (1, 3.0, 3.0, 3.0), - (2, Double.NaN, Double.NaN, Double.NaN), - (3, -1.0, 2.0, 1.0) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq( + (0, 1.0, 1.0, 1.0, 1.0), + (1, 3.0, 3.0, 3.0, 3.0), + (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN), + (3, -1.0, 2.0, 1.0, 1.0) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out")) .setMissingValue(-1.0) ImputerSuite.iterateStrategyTest(true, imputer, df) @@ -79,64 +83,69 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { test("Single Column: Imputer should handle NaNs when computing surrogate value," + " if missingValue is not NaN") { - val df = spark.createDataFrame( Seq( - (0, 1.0, 1.0, 1.0), - (1, 3.0, 3.0, 3.0), - (2, Double.NaN, Double.NaN, Double.NaN), - (3, -1.0, 2.0, 1.0) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq( + (0, 1.0, 1.0, 1.0, 1.0), + (1, 3.0, 3.0, 3.0, 3.0), + (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN), + (3, -1.0, 2.0, 1.0, 1.0) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer().setInputCol("value").setOutputCol("out") .setMissingValue(-1.0) ImputerSuite.iterateStrategyTest(false, imputer, df) } test("Imputer for Float with missing Value -1.0") { - val df = spark.createDataFrame( Seq( - (0, 1.0F, 1.0F, 1.0F), - (1, 3.0F, 3.0F, 3.0F), - (2, 10.0F, 10.0F, 10.0F), - (3, 10.0F, 10.0F, 10.0F), - (4, -1.0F, 6.0F, 3.0F) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq( + (0, 1.0F, 1.0F, 1.0F, 1.0F), + (1, 3.0F, 3.0F, 3.0F, 3.0F), + (2, 10.0F, 10.0F, 10.0F, 10.0F), + (3, 10.0F, 10.0F, 10.0F, 10.0F), + (4, -1.0F, 6.0F, 3.0F, 10.0F) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out")) .setMissingValue(-1) ImputerSuite.iterateStrategyTest(true, imputer, df) } test("Single Column: Imputer for Float with missing Value -1.0") { - val df = spark.createDataFrame( Seq( - (0, 1.0F, 1.0F, 1.0F), - (1, 3.0F, 3.0F, 3.0F), - (2, 10.0F, 10.0F, 10.0F), - (3, 10.0F, 10.0F, 10.0F), - (4, -1.0F, 6.0F, 3.0F) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq( + (0, 1.0F, 1.0F, 1.0F, 1.0F), + (1, 3.0F, 3.0F, 3.0F, 3.0F), + (2, 10.0F, 10.0F, 10.0F, 10.0F), + (3, 10.0F, 10.0F, 10.0F, 10.0F), + (4, -1.0F, 6.0F, 3.0F, 10.0F) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer().setInputCol("value").setOutputCol("out") .setMissingValue(-1) ImputerSuite.iterateStrategyTest(false, imputer, df) } test("Imputer should impute null as well as 'missingValue'") { - val rawDf = spark.createDataFrame( Seq( - (0, 4.0, 4.0, 4.0), - (1, 10.0, 10.0, 10.0), - (2, 10.0, 10.0, 10.0), - (3, Double.NaN, 8.0, 10.0), - (4, -1.0, 8.0, 10.0) - )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value") + val rawDf = spark.createDataFrame(Seq( + (0, 4.0, 4.0, 4.0, 4.0), + (1, 10.0, 10.0, 10.0, 10.0), + (2, 10.0, 10.0, 10.0, 10.0), + (3, Double.NaN, 8.0, 10.0, 10.0), + (4, -1.0, 8.0, 10.0, 10.0) + )).toDF("id", "rawValue", + "expected_mean_value", "expected_median_value", "expected_mode_value") val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value") val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out")) ImputerSuite.iterateStrategyTest(true, imputer, df) } test("Single Column: Imputer should impute null as well as 'missingValue'") { - val rawDf = spark.createDataFrame( Seq( - (0, 4.0, 4.0, 4.0), - (1, 10.0, 10.0, 10.0), - (2, 10.0, 10.0, 10.0), - (3, Double.NaN, 8.0, 10.0), - (4, -1.0, 8.0, 10.0) - )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value") + val rawDf = spark.createDataFrame(Seq( + (0, 4.0, 4.0, 4.0, 4.0), + (1, 10.0, 10.0, 10.0, 10.0), + (2, 10.0, 10.0, 10.0, 10.0), + (3, Double.NaN, 8.0, 10.0, 10.0), + (4, -1.0, 8.0, 10.0, 10.0) + )).toDF("id", "rawValue", + "expected_mean_value", "expected_median_value", "expected_mode_value") val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value") val imputer = new Imputer().setInputCol("value").setOutputCol("out") ImputerSuite.iterateStrategyTest(false, imputer, df) @@ -187,7 +196,7 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Imputer throws exception when surrogate cannot be computed") { - val df = spark.createDataFrame( Seq( + val df = spark.createDataFrame(Seq( (0, Double.NaN, 1.0, 1.0), (1, Double.NaN, 3.0, 3.0), (2, Double.NaN, Double.NaN, Double.NaN) @@ -205,12 +214,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Single Column: Imputer throws exception when surrogate cannot be computed") { - val df = spark.createDataFrame( Seq( - (0, Double.NaN, 1.0, 1.0), - (1, Double.NaN, 3.0, 3.0), - (2, Double.NaN, Double.NaN, Double.NaN) - )).toDF("id", "value", "expected_mean_value", "expected_median_value") - Seq("mean", "median").foreach { strategy => + val df = spark.createDataFrame(Seq( + (0, Double.NaN, 1.0, 1.0, 1.0), + (1, Double.NaN, 3.0, 3.0, 3.0), + (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN) + )).toDF("id", "value", + "expected_mean_value", "expected_median_value", "expected_mode_value") + Seq("mean", "median", "mode").foreach { strategy => val imputer = new Imputer().setInputCol("value").setOutputCol("out") .setStrategy(strategy) withClue("Imputer should fail all the values are invalid") { @@ -223,12 +233,12 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Imputer input & output column validation") { - val df = spark.createDataFrame( Seq( + val df = spark.createDataFrame(Seq( (0, 1.0, 1.0, 1.0), (1, Double.NaN, 3.0, 3.0), (2, Double.NaN, Double.NaN, Double.NaN) )).toDF("id", "value1", "value2", "value3") - Seq("mean", "median").foreach { strategy => + Seq("mean", "median", "mode").foreach { strategy => withClue("Imputer should fail if inputCols and outputCols are different length") { val e: IllegalArgumentException = intercept[IllegalArgumentException] { val imputer = new Imputer().setStrategy(strategy) @@ -306,13 +316,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Imputer for IntegerType with default missing value null") { - - val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)]( - (1, 1, 1), - (11, 11, 11), - (3, 3, 3), - (null, 5, 3) - )).toDF("value1", "expected_mean_value1", "expected_median_value1") + val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)]( + (1, 1, 1, 1), + (11, 11, 11, 11), + (3, 3, 3, 3), + (null, 5, 3, 1) + )).toDF("value1", + "expected_mean_value1", "expected_median_value1", "expected_mode_value1") val imputer = new Imputer() .setInputCols(Array("value1")) @@ -327,12 +337,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Single Column Imputer for IntegerType with default missing value null") { - val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)]( - (1, 1, 1), - (11, 11, 11), - (3, 3, 3), - (null, 5, 3) - )).toDF("value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)]( + (1, 1, 1, 1), + (11, 11, 11, 11), + (3, 3, 3, 3), + (null, 5, 3, 1) + )).toDF("value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer() .setInputCol("value") @@ -347,13 +358,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Imputer for IntegerType with missing value -1") { - - val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)]( - (1, 1, 1), - (11, 11, 11), - (3, 3, 3), - (-1, 5, 3) - )).toDF("value1", "expected_mean_value1", "expected_median_value1") + val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)]( + (1, 1, 1, 1), + (11, 11, 11, 11), + (3, 3, 3, 3), + (-1, 5, 3, 1) + )).toDF("value1", + "expected_mean_value1", "expected_median_value1", "expected_mode_value1") val imputer = new Imputer() .setInputCols(Array("value1")) @@ -369,12 +380,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Single Column: Imputer for IntegerType with missing value -1") { - val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)]( - (1, 1, 1), - (11, 11, 11), - (3, 3, 3), - (-1, 5, 3) - )).toDF("value", "expected_mean_value", "expected_median_value") + val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)]( + (1, 1, 1, 1), + (11, 11, 11, 11), + (3, 3, 3, 3), + (-1, 5, 3, 1) + )).toDF("value", + "expected_mean_value", "expected_median_value", "expected_mode_value") val imputer = new Imputer() .setInputCol("value") @@ -402,13 +414,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } test("Compare single/multiple column(s) Imputer in pipeline") { - val df = spark.createDataFrame( Seq( + val df = spark.createDataFrame(Seq( (0, 1.0, 4.0), (1, 11.0, 12.0), (2, 3.0, Double.NaN), (3, Double.NaN, 14.0) )).toDF("id", "value1", "value2") - Seq("mean", "median").foreach { strategy => + Seq("mean", "median", "mode").foreach { strategy => val multiColsImputer = new Imputer() .setInputCols(Array("value1", "value2")) .setOutputCols(Array("result1", "result2")) @@ -450,11 +462,12 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { object ImputerSuite { /** - * Imputation strategy. Available options are ["mean", "median"]. - * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median" + * Imputation strategy. Available options are ["mean", "median", "mode"]. + * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median", + * "expected_mode". */ def iterateStrategyTest(isMultiCol: Boolean, imputer: Imputer, df: DataFrame): Unit = { - Seq("mean", "median").foreach { strategy => + Seq("mean", "median", "mode").foreach { strategy => imputer.setStrategy(strategy) val model = imputer.fit(df) val resultDF = model.transform(df) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 4d898bd5fffa8..82b9a6db1eb92 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1507,7 +1507,8 @@ class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, Has strategy = Param(Params._dummy(), "strategy", "strategy for imputation. If mean, then replace missing values using the mean " "value of the feature. If median, then replace missing values using the " - "median value of the feature.", + "median value of the feature. If mode, then replace missing using the most " + "frequent value of the feature.", typeConverter=TypeConverters.toString) missingValue = Param(Params._dummy(), "missingValue", @@ -1541,7 +1542,7 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable): numeric type. Currently Imputer does not support categorical features and possibly creates incorrect values for a categorical feature. - Note that the mean/median value is computed after filtering out missing values. + Note that the mean/median/mode value is computed after filtering out missing values. All Null values in the input columns are treated as missing, and so are also imputed. For computing median, :py:meth:`pyspark.sql.DataFrame.approxQuantile` is used with a relative error of `0.001`. From a1a3d5cb02e380156eab320bf6cf512c01b11284 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 20 Nov 2020 10:14:37 -0800 Subject: [PATCH 0533/1009] [MINOR][TESTS][DOCS] Use fully-qualified class name in docker integration test ### What changes were proposed in this pull request? change ``` ./build/sbt -Pdocker-integration-tests "testOnly *xxxIntegrationSuite" ``` to ``` ./build/sbt -Pdocker-integration-tests "testOnly org.apache.spark.sql.jdbc.xxxIntegrationSuite" ``` ### Why are the changes needed? We only want to start v1 ```xxxIntegrationSuite```, not the newly added```v2.xxxIntegrationSuite```. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually checked Closes #30448 from huaxingao/dockertest. Authored-by: Huaxin Gao Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala | 3 ++- .../apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala | 3 ++- .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 3 ++- .../org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index 4b9acd0d39f3f..d086c8cdcc589 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -29,7 +29,8 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0): * {{{ * DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0 - * ./build/sbt -Pdocker-integration-tests "testOnly *DB2IntegrationSuite" + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index f1ffc8f0f3dc7..939a07238934b 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04): * {{{ * MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04 - * ./build/sbt -Pdocker-integration-tests "testOnly *MsSqlServerIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 6f96ab33d0fee..68f0dbc057c1f 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., mysql:5.7.31): * {{{ * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31 - * ./build/sbt -Pdocker-integration-tests "testOnly *MySQLIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite" * }}} */ @DockerTest diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index fa13100b5fdc8..0347c98bba2c4 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -30,7 +30,8 @@ import org.apache.spark.tags.DockerTest * To run this test suite for a specific version (e.g., postgres:13.0): * {{{ * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 - * ./build/sbt -Pdocker-integration-tests "testOnly *PostgresIntegrationSuite" + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite" * }}} */ @DockerTest From 247977893473f810ffbcda31ee2710e445120e42 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 20 Nov 2020 14:59:56 -0800 Subject: [PATCH 0534/1009] [SPARK-33492][SQL] DSv2: Append/Overwrite/ReplaceTable should invalidate cache ### What changes were proposed in this pull request? This adds changes in the following places: - logic to also refresh caches referencing the target table in v2 `AppendDataExec`, `OverwriteByExpressionExec`, `OverwritePartitionsDynamicExec`, as well as their v1 fallbacks `AppendDataExecV1` and `OverwriteByExpressionExecV1`. - logic to invalidate caches referencing the target table in v2 `ReplaceTableAsSelectExec` and its atomic version `AtomicReplaceTableAsSelectExec`. These are only supported in v2 at the moment though. In addition to the above, in order to test the v1 write fallback behavior, I extended `InMemoryTableWithV1Fallback` to also support batch reads. ### Why are the changes needed? Currently in DataSource v2 we don't refresh or invalidate caches referencing the target table when the table content is changed by operations such as append, overwrite, or replace table. This is different from DataSource v1, and could potentially cause data correctness issue if the staled caches are queried later. ### Does this PR introduce _any_ user-facing change? Yes. Now When a data source v2 is cached (either directly or indirectly), all the relevant caches will be refreshed or invalidated if the table is replaced. ### How was this patch tested? Added unit tests for the new code path. Closes #30429 from sunchao/SPARK-33492. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Strategy.scala | 13 +-- .../datasources/v2/V1FallbackWriters.scala | 21 +++-- .../v2/WriteToDataSourceV2Exec.scala | 38 ++++++++- .../sql/connector/DataSourceV2SQLSuite.scala | 78 ++++++++++++++++++ .../sql/connector/V1WriteFallbackSuite.scala | 79 ++++++++++++++++++- 5 files changed, 212 insertions(+), 17 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 21abfc2816ee4..e5c29312b80e7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -147,6 +147,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat catalog match { case staging: StagingTableCatalog => AtomicReplaceTableAsSelectExec( + session, staging, ident, parts, @@ -157,6 +158,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat orCreate = orCreate) :: Nil case _ => ReplaceTableAsSelectExec( + session, catalog, ident, parts, @@ -170,9 +172,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AppendData(r: DataSourceV2Relation, query, writeOptions, _) => r.table.asWritable match { case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - AppendDataExecV1(v1, writeOptions.asOptions, query) :: Nil + AppendDataExecV1(v1, writeOptions.asOptions, query, r) :: Nil case v2 => - AppendDataExec(v2, writeOptions.asOptions, planLater(query)) :: Nil + AppendDataExec(session, v2, r, writeOptions.asOptions, planLater(query)) :: Nil } case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) => @@ -184,14 +186,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat }.toArray r.table.asWritable match { case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query) :: Nil + OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query, r) :: Nil case v2 => - OverwriteByExpressionExec(v2, filters, writeOptions.asOptions, planLater(query)) :: Nil + OverwriteByExpressionExec(session, v2, r, filters, + writeOptions.asOptions, planLater(query)) :: Nil } case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) => OverwritePartitionsDynamicExec( - r.table.asWritable, writeOptions.asOptions, planLater(query)) :: Nil + session, r.table.asWritable, r, writeOptions.asOptions, planLater(query)) :: Nil case DeleteFromTable(relation, condition) => relation match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala index 560da39314b36..af7721588edeb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala @@ -37,10 +37,11 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap case class AppendDataExecV1( table: SupportsWrite, writeOptions: CaseInsensitiveStringMap, - plan: LogicalPlan) extends V1FallbackWriters { + plan: LogicalPlan, + v2Relation: DataSourceV2Relation) extends V1FallbackWriters { override protected def run(): Seq[InternalRow] = { - writeWithV1(newWriteBuilder().buildForV1Write()) + writeWithV1(newWriteBuilder().buildForV1Write(), Some(v2Relation)) } } @@ -59,7 +60,8 @@ case class OverwriteByExpressionExecV1( table: SupportsWrite, deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, - plan: LogicalPlan) extends V1FallbackWriters { + plan: LogicalPlan, + v2Relation: DataSourceV2Relation) extends V1FallbackWriters { private def isTruncate(filters: Array[Filter]): Boolean = { filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] @@ -68,10 +70,10 @@ case class OverwriteByExpressionExecV1( override protected def run(): Seq[InternalRow] = { newWriteBuilder() match { case builder: SupportsTruncate if isTruncate(deleteWhere) => - writeWithV1(builder.truncate().asV1Builder.buildForV1Write()) + writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), Some(v2Relation)) case builder: SupportsOverwrite => - writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write()) + writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), Some(v2Relation)) case _ => throw new SparkException(s"Table does not support overwrite by expression: $table") @@ -112,9 +114,14 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write { trait SupportsV1Write extends SparkPlan { def plan: LogicalPlan - protected def writeWithV1(relation: InsertableRelation): Seq[InternalRow] = { + protected def writeWithV1( + relation: InsertableRelation, + v2Relation: Option[DataSourceV2Relation] = None): Seq[InternalRow] = { + val session = sqlContext.sparkSession // The `plan` is already optimized, we should not analyze and optimize it again. - relation.insert(AlreadyOptimized.dataFrame(sqlContext.sparkSession, plan), overwrite = false) + relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false) + v2Relation.foreach(r => session.sharedState.cacheManager.recacheByPlan(session, r)) + Nil } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index 1421a9315c3a8..1648134d0a1b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -26,6 +26,7 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.expressions.Attribute @@ -127,6 +128,7 @@ case class AtomicCreateTableAsSelectExec( * ReplaceTableAsSelectStagingExec. */ case class ReplaceTableAsSelectExec( + session: SparkSession, catalog: TableCatalog, ident: Identifier, partitioning: Seq[Transform], @@ -146,6 +148,8 @@ case class ReplaceTableAsSelectExec( // 2. Writing to the new table fails, // 3. The table returned by catalog.createTable doesn't support writing. if (catalog.tableExists(ident)) { + val table = catalog.loadTable(ident) + uncacheTable(session, catalog, table, ident) catalog.dropTable(ident) } else if (!orCreate) { throw new CannotReplaceMissingTableException(ident) @@ -169,6 +173,7 @@ case class ReplaceTableAsSelectExec( * is left untouched. */ case class AtomicReplaceTableAsSelectExec( + session: SparkSession, catalog: StagingTableCatalog, ident: Identifier, partitioning: Seq[Transform], @@ -180,6 +185,10 @@ case class AtomicReplaceTableAsSelectExec( override protected def run(): Seq[InternalRow] = { val schema = query.schema.asNullable + if (catalog.tableExists(ident)) { + val table = catalog.loadTable(ident) + uncacheTable(session, catalog, table, ident) + } val staged = if (orCreate) { catalog.stageCreateOrReplace( ident, schema, partitioning.toArray, properties.asJava) @@ -204,12 +213,16 @@ case class AtomicReplaceTableAsSelectExec( * Rows in the output data set are appended. */ case class AppendDataExec( + session: SparkSession, table: SupportsWrite, + relation: DataSourceV2Relation, writeOptions: CaseInsensitiveStringMap, query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { override protected def run(): Seq[InternalRow] = { - writeWithV2(newWriteBuilder().buildForBatch()) + val writtenRows = writeWithV2(newWriteBuilder().buildForBatch()) + session.sharedState.cacheManager.recacheByPlan(session, relation) + writtenRows } } @@ -224,7 +237,9 @@ case class AppendDataExec( * AlwaysTrue to delete all rows. */ case class OverwriteByExpressionExec( + session: SparkSession, table: SupportsWrite, + relation: DataSourceV2Relation, deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { @@ -234,7 +249,7 @@ case class OverwriteByExpressionExec( } override protected def run(): Seq[InternalRow] = { - newWriteBuilder() match { + val writtenRows = newWriteBuilder() match { case builder: SupportsTruncate if isTruncate(deleteWhere) => writeWithV2(builder.truncate().buildForBatch()) @@ -244,9 +259,12 @@ case class OverwriteByExpressionExec( case _ => throw new SparkException(s"Table does not support overwrite by expression: $table") } + session.sharedState.cacheManager.recacheByPlan(session, relation) + writtenRows } } + /** * Physical plan node for dynamic partition overwrite into a v2 table. * @@ -257,18 +275,22 @@ case class OverwriteByExpressionExec( * are not modified. */ case class OverwritePartitionsDynamicExec( + session: SparkSession, table: SupportsWrite, + relation: DataSourceV2Relation, writeOptions: CaseInsensitiveStringMap, query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { override protected def run(): Seq[InternalRow] = { - newWriteBuilder() match { + val writtenRows = newWriteBuilder() match { case builder: SupportsDynamicOverwrite => writeWithV2(builder.overwriteDynamicPartitions().buildForBatch()) case _ => throw new SparkException(s"Table does not support dynamic partition overwrite: $table") } + session.sharedState.cacheManager.recacheByPlan(session, relation) + writtenRows } } @@ -370,6 +392,15 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode { Nil } + + protected def uncacheTable( + session: SparkSession, + catalog: TableCatalog, + table: Table, + ident: Identifier): Unit = { + val plan = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) + session.sharedState.cacheManager.uncacheQuery(session, plan, cascade = true) + } } object DataWritingSparkTask extends Logging { @@ -484,3 +515,4 @@ private[v2] case class DataWritingSparkTaskResult( * Sink progress information collected after commit. */ private[sql] case class StreamWriterCommitProgress(numOutputRows: Long) + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 0057415ff6e1d..0e7aec8d80e01 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -780,6 +780,84 @@ class DataSourceV2SQLSuite } } + test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") { + Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t => + val view = "view" + withTable(t) { + withTempView(view) { + sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) + checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) + + sql(s"REPLACE TABLE $t USING foo AS SELECT id FROM source") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } + } + + test("SPARK-33492: AppendData should refresh cache") { + import testImplicits._ + + val t = "testcat.ns.t" + val view = "view" + withTable(t) { + withTempView(view) { + Seq((1, "a")).toDF("i", "j").write.saveAsTable(t) + sql(s"CACHE TABLE $view AS SELECT i FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Nil) + checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil) + + Seq((2, "b")).toDF("i", "j").write.mode(SaveMode.Append).saveAsTable(t) + + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined) + checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Row(2, "b") :: Nil) + checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Row(2) :: Nil) + } + } + } + + test("SPARK-33492: OverwriteByExpression should refresh cache") { + val t = "testcat.ns.t" + val view = "view" + withTable(t) { + withTempView(view) { + sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) + checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) + + sql(s"INSERT OVERWRITE TABLE $t VALUES (1, 'a')") + + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined) + checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Nil) + checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil) + } + } + } + + test("SPARK-33492: OverwritePartitionsDynamic should refresh cache") { + import testImplicits._ + + val t = "testcat.ns.t" + val view = "view" + withTable(t) { + withTempView(view) { + Seq((1, "a", 1)).toDF("i", "j", "k").write.partitionBy("k") saveAsTable(t) + sql(s"CACHE TABLE $view AS SELECT i FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a", 1) :: Nil) + checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil) + + Seq((2, "b", 1)).toDF("i", "j", "k").writeTo(t).overwritePartitions() + + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined) + checkAnswer(sql(s"SELECT * FROM $t"), Row(2, "b", 1) :: Nil) + checkAnswer(sql(s"SELECT * FROM $view"), Row(2) :: Nil) + } + } + } + test("Relation: basic") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala index 4b52a4cbf4116..cba7dd35fb3bc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala @@ -24,14 +24,17 @@ import scala.collection.mutable import org.scalatest.BeforeAndAfter +import org.apache.spark.rdd.RDD import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag -import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability} +import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, SupportsWrite, Table, TableCapability} import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} +import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan} import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder} import org.apache.spark.sql.execution.datasources.DataSourceUtils +import org.apache.spark.sql.functions.lit import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION import org.apache.spark.sql.internal.connector.SimpleTableProvider import org.apache.spark.sql.sources._ @@ -145,6 +148,52 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before SparkSession.setDefaultSession(spark) } } + + test("SPARK-33492: append fallback should refresh cache") { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + try { + val session = SparkSession.builder() + .master("local[1]") + .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName) + .getOrCreate() + val df = session.createDataFrame(Seq((1, "x"))) + df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test") + session.catalog.cacheTable("test") + checkAnswer(session.read.table("test"), Row(1, "x") :: Nil) + + val df2 = session.createDataFrame(Seq((2, "y"))) + df2.writeTo("test").append() + checkAnswer(session.read.table("test"), Row(1, "x") :: Row(2, "y") :: Nil) + + } finally { + SparkSession.setActiveSession(spark) + SparkSession.setDefaultSession(spark) + } + } + + test("SPARK-33492: overwrite fallback should refresh cache") { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + try { + val session = SparkSession.builder() + .master("local[1]") + .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName) + .getOrCreate() + val df = session.createDataFrame(Seq((1, "x"))) + df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test") + session.catalog.cacheTable("test") + checkAnswer(session.read.table("test"), Row(1, "x") :: Nil) + + val df2 = session.createDataFrame(Seq((2, "y"))) + df2.writeTo("test").overwrite(lit(true)) + checkAnswer(session.read.table("test"), Row(2, "y") :: Nil) + + } finally { + SparkSession.setActiveSession(spark) + SparkSession.setDefaultSession(spark) + } + } } class V1WriteFallbackSessionCatalogSuite @@ -177,6 +226,7 @@ class V1FallbackTableCatalog extends TestV2SessionCatalogBase[InMemoryTableWithV properties: util.Map[String, String]): InMemoryTableWithV1Fallback = { val t = new InMemoryTableWithV1Fallback(name, schema, partitions, properties) InMemoryV1Provider.tables.put(name, t) + tables.put(Identifier.of(Array("default"), name), t) t } } @@ -272,7 +322,7 @@ class InMemoryTableWithV1Fallback( override val partitioning: Array[Transform], override val properties: util.Map[String, String]) extends Table - with SupportsWrite { + with SupportsWrite with SupportsRead { partitioning.foreach { t => if (!t.isInstanceOf[IdentityTransform]) { @@ -281,6 +331,7 @@ class InMemoryTableWithV1Fallback( } override def capabilities: util.Set[TableCapability] = Set( + TableCapability.BATCH_READ, TableCapability.V1_BATCH_WRITE, TableCapability.OVERWRITE_BY_FILTER, TableCapability.TRUNCATE).asJava @@ -338,6 +389,30 @@ class InMemoryTableWithV1Fallback( } } } + + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = + new V1ReadFallbackScanBuilder(schema) + + private class V1ReadFallbackScanBuilder(schema: StructType) extends ScanBuilder { + override def build(): Scan = new V1ReadFallbackScan(schema) + } + + private class V1ReadFallbackScan(schema: StructType) extends V1Scan { + override def readSchema(): StructType = schema + override def toV1TableScan[T <: BaseRelation with TableScan](context: SQLContext): T = + new V1TableScan(context, schema).asInstanceOf[T] + } + + private class V1TableScan( + context: SQLContext, + requiredSchema: StructType) extends BaseRelation with TableScan { + override def sqlContext: SQLContext = context + override def schema: StructType = requiredSchema + override def buildScan(): RDD[Row] = { + val data = InMemoryV1Provider.getTableData(context.sparkSession, name).collect() + context.sparkContext.makeRDD(data) + } + } } /** A rule that fails if a query plan is analyzed twice. */ From de0f50abf407ec972c6a80ae80853a66b24468f4 Mon Sep 17 00:00:00 2001 From: anchovYu Date: Sat, 21 Nov 2020 08:33:39 +0900 Subject: [PATCH 0535/1009] [SPARK-32670][SQL] Group exception messages in Catalyst Analyzer in one file ### What changes were proposed in this pull request? Group all messages of `AnalysisExcpetions` created and thrown directly in org.apache.spark.sql.catalyst.analysis.Analyzer in one file. * Create a new object: `org.apache.spark.sql.CatalystErrors` with many exception-creating functions. * When the `Analyzer` wants to create and throw a new `AnalysisException`, call functions of `CatalystErrors` ### Why are the changes needed? This is the sample PR that groups exception messages together in several files. It will largely help with standardization of error messages and its maintenance. ### Does this PR introduce _any_ user-facing change? No. Error messages remain unchanged. ### How was this patch tested? No new tests - pass all original tests to make sure it doesn't break any existing behavior. ### Naming of exception functions All function names ended with `Error`. * For specific errors like `groupingIDMismatch` and `groupingColInvalid`, directly use them as name, just like `groupingIDMismatchError` and `groupingColInvalidError`. * For generic errors like `dataTypeMismatch`, * if confident with the context, prefix and condition can be added, like `pivotValDataTypeMismatchError` * if not sure about the context, add a `For` suffix of the specific component that this exception is related to, like `dataTypeMismatchForDeserializerError` Closes #29497 from anchovYu/32670. Lead-authored-by: anchovYu Co-authored-by: anchovYu Signed-off-by: HyukjinKwon --- .../spark/sql/QueryCompilationErrors.scala | 164 ++++++++++++++++++ .../sql/catalyst/analysis/Analyzer.scala | 84 +++------ 2 files changed, 192 insertions(+), 56 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala new file mode 100644 index 0000000000000..c680502cb328f --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.errors + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.{Expression, GroupingID} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.connector.catalog.TableChange +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType} + +/** + * Object for grouping all error messages of the query compilation. + * Currently it includes all AnalysisExcpetions created and thrown directly in + * org.apache.spark.sql.catalyst.analysis.Analyzer. + */ +object QueryCompilationErrors { + def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { + new AnalysisException( + s"Columns of grouping_id (${groupingID.groupByExprs.mkString(",")}) " + + s"does not match grouping columns (${groupByExprs.mkString(",")})") + } + + def groupingColInvalidError(groupingCol: Expression, groupByExprs: Seq[Expression]): Throwable = { + new AnalysisException( + s"Column of grouping ($groupingCol) can't be found " + + s"in grouping columns ${groupByExprs.mkString(",")}") + } + + def groupingSizeTooLargeError(sizeLimit: Int): Throwable = { + new AnalysisException( + s"Grouping sets size cannot be greater than $sizeLimit") + } + + def unorderablePivotColError(pivotCol: Expression): Throwable = { + new AnalysisException( + s"Invalid pivot column '$pivotCol'. Pivot columns must be comparable." + ) + } + + def nonLiteralPivotValError(pivotVal: Expression): Throwable = { + new AnalysisException( + s"Literal expressions required for pivot values, found '$pivotVal'") + } + + def pivotValDataTypeMismatchError(pivotVal: Expression, pivotCol: Expression): Throwable = { + new AnalysisException( + s"Invalid pivot value '$pivotVal': " + + s"value data type ${pivotVal.dataType.simpleString} does not match " + + s"pivot column data type ${pivotCol.dataType.catalogString}") + } + + def unsupportedIfNotExistsError(tableName: String): Throwable = { + new AnalysisException( + s"Cannot write, IF NOT EXISTS is not supported for table: $tableName") + } + + def nonPartitionColError(partitionName: String): Throwable = { + new AnalysisException( + s"PARTITION clause cannot contain a non-partition column name: $partitionName") + } + + def addStaticValToUnknownColError(staticName: String): Throwable = { + new AnalysisException( + s"Cannot add static value for unknown column: $staticName") + } + + def unknownStaticPartitionColError(name: String): Throwable = { + new AnalysisException(s"Unknown static partition column: $name") + } + + def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = { + new AnalysisException( + "Generators are not supported when it's nested in " + + "expressions, but got: " + toPrettySQL(trimmedNestedGenerator)) + } + + def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = { + new AnalysisException( + s"Only one generator allowed per $clause clause but found " + + generators.size + ": " + generators.map(toPrettySQL).mkString(", ")) + } + + def generatorOutsideSelectError(plan: LogicalPlan): Throwable = { + new AnalysisException( + "Generators are not supported outside the SELECT clause, but " + + "got: " + plan.simpleString(SQLConf.get.maxToStringFields)) + } + + def legacyStoreAssignmentPolicyError(): Throwable = { + val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key + new AnalysisException( + "LEGACY store assignment policy is disallowed in Spark data source V2. " + + s"Please set the configuration $configKey to other values.") + } + + def unresolvedUsingColForJoinError( + colName: String, plan: LogicalPlan, side: String): Throwable = { + new AnalysisException( + s"USING column `$colName` cannot be resolved on the $side " + + s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]") + } + + def dataTypeMismatchForDeserializerError( + dataType: DataType, desiredType: String): Throwable = { + val quantifier = if (desiredType.equals("array")) "an" else "a" + new AnalysisException( + s"need $quantifier $desiredType field but got " + dataType.catalogString) + } + + def fieldNumberMismatchForDeserializerError( + schema: StructType, maxOrdinal: Int): Throwable = { + new AnalysisException( + s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " + + "but failed as the number of fields does not line up.") + } + + def upCastFailureError( + fromStr: String, from: Expression, to: DataType, walkedTypePath: Seq[String]): Throwable = { + new AnalysisException( + s"Cannot up cast $fromStr from " + + s"${from.dataType.catalogString} to ${to.catalogString}.\n" + + s"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") + + "You can either add an explicit cast to the input data or choose a higher precision " + + "type of the field in the target object") + } + + def unsupportedAbstractDataTypeForUpCastError(gotType: AbstractDataType): Throwable = { + new AnalysisException( + s"UpCast only support DecimalType as AbstractDataType yet, but got: $gotType") + } + + def outerScopeFailureForNewInstanceError(className: String): Throwable = { + new AnalysisException( + s"Unable to generate an encoder for inner class `$className` without " + + "access to the scope that this class was defined in.\n" + + "Try moving this class out of its parent class.") + } + + def referenceColNotFoundForAlterTableChangesError( + after: TableChange.After, parentName: String): Throwable = { + new AnalysisException( + s"Couldn't find the reference column for $after at $parentName") + } + +} + + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 8d95d8cf49d45..53c0ff687c6d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -44,6 +44,7 @@ import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy} @@ -448,9 +449,7 @@ class Analyzer(override val catalogManager: CatalogManager) e.groupByExprs.map(_.canonicalized) == groupByExprs.map(_.canonicalized)) { Alias(gid, toPrettySQL(e))() } else { - throw new AnalysisException( - s"Columns of grouping_id (${e.groupByExprs.mkString(",")}) does not match " + - s"grouping columns (${groupByExprs.mkString(",")})") + throw QueryCompilationErrors.groupingIDMismatchError(e, groupByExprs) } case e @ Grouping(col: Expression) => val idx = groupByExprs.indexWhere(_.semanticEquals(col)) @@ -458,8 +457,7 @@ class Analyzer(override val catalogManager: CatalogManager) Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)), Literal(1L)), ByteType), toPrettySQL(e))() } else { - throw new AnalysisException(s"Column of grouping ($col) can't be found " + - s"in grouping columns ${groupByExprs.mkString(",")}") + throw QueryCompilationErrors.groupingColInvalidError(col, groupByExprs) } } } @@ -575,8 +573,7 @@ class Analyzer(override val catalogManager: CatalogManager) val finalGroupByExpressions = getFinalGroupByExpressions(selectedGroupByExprs, groupByExprs) if (finalGroupByExpressions.size > GroupingID.dataType.defaultSize * 8) { - throw new AnalysisException( - s"Grouping sets size cannot be greater than ${GroupingID.dataType.defaultSize * 8}") + throw QueryCompilationErrors.groupingSizeTooLargeError(GroupingID.dataType.defaultSize * 8) } // Expand works by setting grouping expressions to null as determined by the @@ -712,8 +709,7 @@ class Analyzer(override val catalogManager: CatalogManager) || !p.pivotColumn.resolved || !p.pivotValues.forall(_.resolved) => p case Pivot(groupByExprsOpt, pivotColumn, pivotValues, aggregates, child) => if (!RowOrdering.isOrderable(pivotColumn.dataType)) { - throw new AnalysisException( - s"Invalid pivot column '${pivotColumn}'. Pivot columns must be comparable.") + throw QueryCompilationErrors.unorderablePivotColError(pivotColumn) } // Check all aggregate expressions. aggregates.foreach(checkValidAggregateExpression) @@ -724,13 +720,10 @@ class Analyzer(override val catalogManager: CatalogManager) case _ => value.foldable } if (!foldable) { - throw new AnalysisException( - s"Literal expressions required for pivot values, found '$value'") + throw QueryCompilationErrors.nonLiteralPivotValError(value) } if (!Cast.canCast(value.dataType, pivotColumn.dataType)) { - throw new AnalysisException(s"Invalid pivot value '$value': " + - s"value data type ${value.dataType.simpleString} does not match " + - s"pivot column data type ${pivotColumn.dataType.catalogString}") + throw QueryCompilationErrors.pivotValDataTypeMismatchError(value, pivotColumn) } Cast(value, pivotColumn.dataType, Some(conf.sessionLocalTimeZone)).eval(EmptyRow) } @@ -1167,8 +1160,7 @@ class Analyzer(override val catalogManager: CatalogManager) case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved => // ifPartitionNotExists is append with validation, but validation is not supported if (i.ifPartitionNotExists) { - throw new AnalysisException( - s"Cannot write, IF NOT EXISTS is not supported for table: ${r.table.name}") + throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name) } val partCols = partitionColumnNames(r.table) @@ -1205,8 +1197,7 @@ class Analyzer(override val catalogManager: CatalogManager) partitionColumnNames.find(name => conf.resolver(name, partitionName)) match { case Some(_) => case None => - throw new AnalysisException( - s"PARTITION clause cannot contain a non-partition column name: $partitionName") + throw QueryCompilationErrors.nonPartitionColError(partitionName) } } } @@ -1228,8 +1219,7 @@ class Analyzer(override val catalogManager: CatalogManager) case Some(attr) => attr.name -> staticName case _ => - throw new AnalysisException( - s"Cannot add static value for unknown column: $staticName") + throw QueryCompilationErrors.addStaticValToUnknownColError(staticName) }).toMap val queryColumns = query.output.iterator @@ -1271,7 +1261,7 @@ class Analyzer(override val catalogManager: CatalogManager) // an UnresolvedAttribute. EqualTo(UnresolvedAttribute(attr.name), Cast(Literal(value), attr.dataType)) case None => - throw new AnalysisException(s"Unknown static partition column: $name") + throw QueryCompilationErrors.unknownStaticPartitionColError(name) } }.reduce(And) } @@ -2483,23 +2473,19 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case Project(projectList, _) if projectList.exists(hasNestedGenerator) => val nestedGenerator = projectList.find(hasNestedGenerator).get - throw new AnalysisException("Generators are not supported when it's nested in " + - "expressions, but got: " + toPrettySQL(trimAlias(nestedGenerator))) + throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator)) case Project(projectList, _) if projectList.count(hasGenerator) > 1 => val generators = projectList.filter(hasGenerator).map(trimAlias) - throw new AnalysisException("Only one generator allowed per select clause but found " + - generators.size + ": " + generators.map(toPrettySQL).mkString(", ")) + throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "select") case Aggregate(_, aggList, _) if aggList.exists(hasNestedGenerator) => val nestedGenerator = aggList.find(hasNestedGenerator).get - throw new AnalysisException("Generators are not supported when it's nested in " + - "expressions, but got: " + toPrettySQL(trimAlias(nestedGenerator))) + throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator)) case Aggregate(_, aggList, _) if aggList.count(hasGenerator) > 1 => val generators = aggList.filter(hasGenerator).map(trimAlias) - throw new AnalysisException("Only one generator allowed per aggregate clause but found " + - generators.size + ": " + generators.map(toPrettySQL).mkString(", ")) + throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "aggregate") case agg @ Aggregate(groupList, aggList, child) if aggList.forall { case AliasedGenerator(_, _, _) => true @@ -2582,8 +2568,7 @@ class Analyzer(override val catalogManager: CatalogManager) case g: Generate => g case p if p.expressions.exists(hasGenerator) => - throw new AnalysisException("Generators are not supported outside the SELECT clause, but " + - "got: " + p.simpleString(SQLConf.get.maxToStringFields)) + throw QueryCompilationErrors.generatorOutsideSelectError(p) } } @@ -3122,10 +3107,7 @@ class Analyzer(override val catalogManager: CatalogManager) private def validateStoreAssignmentPolicy(): Unit = { // SPARK-28730: LEGACY store assignment policy is disallowed in data source v2. if (conf.storeAssignmentPolicy == StoreAssignmentPolicy.LEGACY) { - val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key - throw new AnalysisException(s""" - |"LEGACY" store assignment policy is disallowed in Spark data source V2. - |Please set the configuration $configKey to other values.""".stripMargin) + throw QueryCompilationErrors.legacyStoreAssignmentPolicyError() } } @@ -3138,14 +3120,12 @@ class Analyzer(override val catalogManager: CatalogManager) hint: JoinHint) = { val leftKeys = joinNames.map { keyName => left.output.find(attr => resolver(attr.name, keyName)).getOrElse { - throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the left " + - s"side of the join. The left-side columns: [${left.output.map(_.name).mkString(", ")}]") + throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, left, "left") } } val rightKeys = joinNames.map { keyName => right.output.find(attr => resolver(attr.name, keyName)).getOrElse { - throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the right " + - s"side of the join. The right-side columns: [${right.output.map(_.name).mkString(", ")}]") + throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, right, "right") } } val joinPairs = leftKeys.zip(rightKeys) @@ -3208,7 +3188,8 @@ class Analyzer(override val catalogManager: CatalogManager) ExtractValue(child, fieldName, resolver) } case other => - throw new AnalysisException("need an array field but got " + other.catalogString) + throw QueryCompilationErrors.dataTypeMismatchForDeserializerError(other, + "array") } case u: UnresolvedCatalystToExternalMap if u.child.resolved => u.child.dataType match { @@ -3218,7 +3199,7 @@ class Analyzer(override val catalogManager: CatalogManager) ExtractValue(child, fieldName, resolver) } case other => - throw new AnalysisException("need a map field but got " + other.catalogString) + throw QueryCompilationErrors.dataTypeMismatchForDeserializerError(other, "map") } } validateNestedTupleFields(result) @@ -3227,8 +3208,7 @@ class Analyzer(override val catalogManager: CatalogManager) } private def fail(schema: StructType, maxOrdinal: Int): Unit = { - throw new AnalysisException(s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}" + - ", but failed as the number of fields does not line up.") + throw QueryCompilationErrors.fieldNumberMismatchForDeserializerError(schema, maxOrdinal) } /** @@ -3287,10 +3267,7 @@ class Analyzer(override val catalogManager: CatalogManager) case n: NewInstance if n.childrenResolved && !n.resolved => val outer = OuterScopes.getOuterScope(n.cls) if (outer == null) { - throw new AnalysisException( - s"Unable to generate an encoder for inner class `${n.cls.getName}` without " + - "access to the scope that this class was defined in.\n" + - "Try moving this class out of its parent class.") + throw QueryCompilationErrors.outerScopeFailureForNewInstanceError(n.cls.getName) } n.copy(outerPointer = Some(outer)) } @@ -3306,11 +3283,7 @@ class Analyzer(override val catalogManager: CatalogManager) case l: LambdaVariable => "array element" case e => e.sql } - throw new AnalysisException(s"Cannot up cast $fromStr from " + - s"${from.dataType.catalogString} to ${to.catalogString}.\n" + - "The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") + - "You can either add an explicit cast to the input data or choose a higher precision " + - "type of the field in the target object") + throw QueryCompilationErrors.upCastFailureError(fromStr, from, to, walkedTypePath) } def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { @@ -3321,8 +3294,7 @@ class Analyzer(override val catalogManager: CatalogManager) case u @ UpCast(child, _, _) if !child.resolved => u case UpCast(_, target, _) if target != DecimalType && !target.isInstanceOf[DataType] => - throw new AnalysisException( - s"UpCast only support DecimalType as AbstractDataType yet, but got: $target") + throw QueryCompilationErrors.unsupportedAbstractDataTypeForUpCastError(target) case UpCast(child, target, walkedTypePath) if target == DecimalType && child.dataType.isInstanceOf[DecimalType] => @@ -3501,8 +3473,8 @@ class Analyzer(override val catalogManager: CatalogManager) case Some(colName) => ColumnPosition.after(colName) case None => - throw new AnalysisException("Couldn't find the reference column for " + - s"$after at $parentName") + throw QueryCompilationErrors.referenceColNotFoundForAlterTableChangesError(after, + parentName) } case other => other } From 67c6ed90682455dbc866e43709fd9081dfc15ad9 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Sat, 21 Nov 2020 10:27:00 +0900 Subject: [PATCH 0536/1009] [SPARK-33223][SS][FOLLOWUP] Clarify the meaning of "number of rows dropped by watermark" in SS UI page ### What changes were proposed in this pull request? This PR fixes the representation to clarify the meaning of "number of rows dropped by watermark" in SS UI page. ### Why are the changes needed? `Aggregated Number Of State Rows Dropped By Watermark` says that the dropped rows are from the state, whereas they're not. We say "evicted from the state" for the case, which is "normal" to emit outputs and reduce memory usage of the state. The metric actually represents the number of "input" rows dropped by watermark, and the meaning of "input" is relative to the "stateful operator". That's a bit confusing as we normally think "input" as "input from source" whereas it's not. ### Does this PR introduce _any_ user-facing change? Yes, UI element & tooltip change. ### How was this patch tested? Only text change in UI, so we know how thing will be changed intuitively. Closes #30439 from HeartSaVioR/SPARK-33223-FOLLOWUP. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../streaming/ui/StreamingQueryStatisticsPage.scala | 10 +++++----- .../spark/sql/streaming/ui/UISeleniumSuite.scala | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index 7d38acfceee81..f48672afb41f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -189,8 +189,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) val graphUIDataForNumRowsDroppedByWatermark = new GraphUIData( - "aggregated-num-state-rows-dropped-by-watermark-timeline", - "aggregated-num-state-rows-dropped-by-watermark-histogram", + "aggregated-num-rows-dropped-by-watermark-timeline", + "aggregated-num-rows-dropped-by-watermark-histogram", numRowsDroppedByWatermarkData, minBatchTime, maxBatchTime, @@ -230,11 +230,11 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
    -
    Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}
    +
    Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}
    - {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} - {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} + {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} + {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} // scalastyle:on } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala index 1a8b28001b8d1..307479db33949 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala @@ -139,7 +139,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B summaryText should contain ("Aggregated Number Of Total State Rows (?)") summaryText should contain ("Aggregated Number Of Updated State Rows (?)") summaryText should contain ("Aggregated State Memory Used In Bytes (?)") - summaryText should contain ("Aggregated Number Of State Rows Dropped By Watermark (?)") + summaryText should contain ("Aggregated Number Of Rows Dropped By Watermark (?)") } } finally { spark.streams.active.foreach(_.stop()) From 530c0a8e28973c57a5d0deec6b15fc29500b6c00 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 20 Nov 2020 18:41:25 -0800 Subject: [PATCH 0537/1009] [SPARK-33505][SQL][TESTS] Fix adding new partitions by INSERT INTO `InMemoryPartitionTable` ### What changes were proposed in this pull request? 1. Add a hook method to `addPartitionKey()` of `InMemoryTable` which is called per every row. 2. Override `addPartitionKey()` in `InMemoryPartitionTable`, and add partition key every time when new row is inserted to the table. ### Why are the changes needed? To be able to write unified tests for datasources V1 and V2. Currently, INSERT INTO a V1 table creates partitions but the same doesn't work for the custom catalog `InMemoryPartitionTableCatalog` used in DSv2 tests. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the affected test suite `DataSourceV2SQLSuite`. Closes #30449 from MaxGekk/insert-into-InMemoryPartitionTable. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../connector/InMemoryPartitionTable.scala | 4 ++++ .../spark/sql/connector/InMemoryTable.scala | 3 +++ .../sql/connector/DataSourceV2SQLSuite.scala | 21 +++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index 1c96bdf3afa20..23987e909aa70 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -92,4 +92,8 @@ class InMemoryPartitionTable( override def partitionExists(ident: InternalRow): Boolean = memoryTablePartitions.containsKey(ident) + + override protected def addPartitionKey(key: Seq[Any]): Unit = { + memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index 3b47271a114e2..c93053abc550a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -160,12 +160,15 @@ class InMemoryTable( } } + protected def addPartitionKey(key: Seq[Any]): Unit = {} + def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized { data.foreach(_.rows.foreach { row => val key = getKey(row) dataMap += dataMap.get(key) .map(key -> _.withRow(row)) .getOrElse(key -> new BufferedRows(key.toArray.mkString("/")).withRow(row)) + addPartitionKey(key) }) this } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 0e7aec8d80e01..90df4ee08bfc0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -24,6 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ @@ -35,6 +36,7 @@ import org.apache.spark.sql.internal.connector.SimpleTableProvider import org.apache.spark.sql.sources.SimpleScanSource import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils class DataSourceV2SQLSuite @@ -2538,6 +2540,25 @@ class DataSourceV2SQLSuite } } + test("SPARK-33505: insert into partitioned table") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + sql(s""" + |CREATE TABLE $t (id bigint, city string, data string) + |USING foo + |PARTITIONED BY (id, city)""".stripMargin) + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] + val expectedPartitionIdent = InternalRow.fromSeq(Seq(1, UTF8String.fromString("NY"))) + assert(!partTable.partitionExists(expectedPartitionIdent)) + sql(s"INSERT INTO $t PARTITION(id = 1, city = 'NY') SELECT 'abc'") + assert(partTable.partitionExists(expectedPartitionIdent)) + // Insert into the existing partition must not fail + sql(s"INSERT INTO $t PARTITION(id = 1, city = 'NY') SELECT 'def'") + assert(partTable.partitionExists(expectedPartitionIdent)) + } + } + private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { val e = intercept[AnalysisException] { sql(s"$sqlCommand $sqlParams") From b623c03456be12169de7d3823f191ae6774e33ce Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 20 Nov 2020 18:45:17 -0800 Subject: [PATCH 0538/1009] [SPARK-32381][CORE][FOLLOWUP][TEST-HADOOP2.7] Don't remove SerializableFileStatus and SerializableBlockLocation for Hadoop 2.7 ### What changes were proposed in this pull request? Revert the change in #29959 and don't remove `SerializableFileStatus` and `SerializableBlockLocation`. ### Why are the changes needed? In Hadoop 2.7 `FileStatus` and `BlockLocation` are not serializable, so we still need the two wrapper classes. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30447 from sunchao/SPARK-32381-followup. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/util/HadoopFSUtils.scala | 61 ++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala index a3a528cddee37..4af48d5b9125c 100644 --- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala @@ -136,12 +136,53 @@ private[spark] object HadoopFSUtils extends Logging { parallelismMax = 0) (path, leafFiles) }.iterator + }.map { case (path, statuses) => + val serializableStatuses = statuses.map { status => + // Turn FileStatus into SerializableFileStatus so we can send it back to the driver + val blockLocations = status match { + case f: LocatedFileStatus => + f.getBlockLocations.map { loc => + SerializableBlockLocation( + loc.getNames, + loc.getHosts, + loc.getOffset, + loc.getLength) + } + + case _ => + Array.empty[SerializableBlockLocation] + } + + SerializableFileStatus( + status.getPath.toString, + status.getLen, + status.isDirectory, + status.getReplication, + status.getBlockSize, + status.getModificationTime, + status.getAccessTime, + blockLocations) + } + (path.toString, serializableStatuses) }.collect() } finally { sc.setJobDescription(previousJobDescription) } - statusMap.toSeq + // turn SerializableFileStatus back to Status + statusMap.map { case (path, serializableStatuses) => + val statuses = serializableStatuses.map { f => + val blockLocations = f.blockLocations.map { loc => + new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length) + } + new LocatedFileStatus( + new FileStatus( + f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime, + new Path(f.path)), + blockLocations) + } + (new Path(path), statuses) + } } // scalastyle:off argcount @@ -291,4 +332,22 @@ private[spark] object HadoopFSUtils extends Logging { resolvedLeafStatuses } // scalastyle:on argcount + + /** A serializable variant of HDFS's BlockLocation. This is required by Hadoop 2.7. */ + private case class SerializableBlockLocation( + names: Array[String], + hosts: Array[String], + offset: Long, + length: Long) + + /** A serializable variant of HDFS's FileStatus. This is required by Hadoop 2.7. */ + private case class SerializableFileStatus( + path: String, + length: Long, + isDir: Boolean, + blockReplication: Short, + blockSize: Long, + modificationTime: Long, + accessTime: Long, + blockLocations: Array[SerializableBlockLocation]) } From cf7490112ab81cce4a483c2a94368ce3d9d986df Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 20 Nov 2020 19:01:58 -0800 Subject: [PATCH 0539/1009] Revert "[SPARK-28704][SQL][TEST] Add back Skiped HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+" This reverts commit 47326ac1c6a296a84af76d832061741740ae9f12. --- .../HiveExternalCatalogVersionsSuite.scala | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 4cafd3e8ca626..38a8c492d77a7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -52,6 +52,7 @@ import org.apache.spark.util.Utils @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { import HiveExternalCatalogVersionsSuite._ + private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse") private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data") // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to @@ -148,9 +149,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } - override def beforeAll(): Unit = { - super.beforeAll() - + private def prepare(): Unit = { val tempPyFile = File.createTempFile("test", ".py") // scalastyle:off line.size.limit Files.write(tempPyFile.toPath, @@ -200,7 +199,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -212,14 +211,23 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tempPyFile.delete() } + override def beforeAll(): Unit = { + super.beforeAll() + if (!isTestAtLeastJava9) { + prepare() + } + } + test("backward compatibility") { + // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+ + assume(!isTestAtLeastJava9) val args = Seq( "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"), "--name", "HiveExternalCatalog backward compatibility test", "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", @@ -244,9 +252,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { // do not throw exception during object initialization. case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } - versions - .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) - .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) + versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) } protected var spark: SparkSession = _ From 517b810dfa5076c3d0155d1e134dc93317ec3ec0 Mon Sep 17 00:00:00 2001 From: Gustavo Martin Morcuende Date: Sat, 21 Nov 2020 08:39:16 -0800 Subject: [PATCH 0540/1009] [SPARK-33463][SQL] Keep Job Id during incremental collect in Spark Thrift Server ### What changes were proposed in this pull request? When enabling **spark.sql.thriftServer.incrementalCollect** Job Ids get lost and tracing queries in Spark Thrift Server ends up being too complicated. ### Why are the changes needed? Because it will make easier tracing Spark Thrift Server queries. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The current tests are enough. No need of more tests. Closes #30390 from gumartinm/master. Authored-by: Gustavo Martin Morcuende Signed-off-by: Dongjoon Hyun --- .../SparkExecuteStatementOperation.scala | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 2e9975bcabc3f..f7a4be9591818 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -63,6 +63,10 @@ private[hive] class SparkExecuteStatementOperation( } } + private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) { + new VariableSubstitution().substitute(statement) + } + private var result: DataFrame = _ // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST. @@ -126,6 +130,17 @@ private[hive] class SparkExecuteStatementOperation( } def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withLocalProperties { + try { + sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement) + getNextRowSetInternal(order, maxRowsL) + } finally { + sqlContext.sparkContext.clearJobGroup() + } + } + + private def getNextRowSetInternal( + order: FetchOrientation, + maxRowsL: Long): RowSet = withLocalProperties { log.info(s"Received getNextRowSet request order=${order} and maxRowsL=${maxRowsL} " + s"with ${statementId}") validateDefaultFetchOrientation(order) @@ -306,9 +321,6 @@ private[hive] class SparkExecuteStatementOperation( parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader) } - val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) { - new VariableSubstitution().substitute(statement) - } sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement) result = sqlContext.sql(statement) logDebug(result.queryExecution.toString()) From d7f4b2ad50aa7acdb0392bb400fc0c87491c6e45 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sun, 22 Nov 2020 10:29:15 -0800 Subject: [PATCH 0541/1009] [SPARK-28704][SQL][TEST] Add back Skiped HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+ ### What changes were proposed in this pull request? We skip test HiveExternalCatalogVersionsSuite when testing with JAVA_9 or later because our previous version does not support JAVA_9 or later. We now add it back since we have a version supports JAVA_9 or later. ### Why are the changes needed? To recover test coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Check CI logs. Closes #30451 from AngersZhuuuu/SPARK-28704. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- .../HiveExternalCatalogVersionsSuite.scala | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 38a8c492d77a7..cf070f4611f3b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -52,7 +52,6 @@ import org.apache.spark.util.Utils @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { import HiveExternalCatalogVersionsSuite._ - private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse") private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data") // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to @@ -60,6 +59,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { private val sparkTestingDir = Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY)) .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark")) private val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { + "2.3.7" + } else { + "1.2.1" + } override def afterAll(): Unit = { try { @@ -149,7 +153,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } - private def prepare(): Unit = { + override def beforeAll(): Unit = { + super.beforeAll() + val tempPyFile = File.createTempFile("test", ".py") // scalastyle:off line.size.limit Files.write(tempPyFile.toPath, @@ -199,7 +205,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -211,23 +217,14 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tempPyFile.delete() } - override def beforeAll(): Unit = { - super.beforeAll() - if (!isTestAtLeastJava9) { - prepare() - } - } - test("backward compatibility") { - // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+ - assume(!isTestAtLeastJava9) val args = Seq( "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"), "--name", "HiveExternalCatalog backward compatibility test", "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", @@ -252,7 +249,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { // do not throw exception during object initialization. case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } - versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + versions + .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) } protected var spark: SparkSession = _ From d338af3101a4c986b5e979e8fdc63b8551e12d29 Mon Sep 17 00:00:00 2001 From: CC Highman Date: Mon, 23 Nov 2020 08:30:41 +0900 Subject: [PATCH 0542/1009] [SPARK-31962][SQL] Provide modifiedAfter and modifiedBefore options when filtering from a batch-based file data source ### What changes were proposed in this pull request? Two new options, _modifiiedBefore_ and _modifiedAfter_, is provided expecting a value in 'YYYY-MM-DDTHH:mm:ss' format. _PartioningAwareFileIndex_ considers these options during the process of checking for files, just before considering applied _PathFilters_ such as `pathGlobFilter.` In order to filter file results, a new PathFilter class was derived for this purpose. General house-keeping around classes extending PathFilter was performed for neatness. It became apparent support was needed to handle multiple potential path filters. Logic was introduced for this purpose and the associated tests written. ### Why are the changes needed? When loading files from a data source, there can often times be thousands of file within a respective file path. In many cases I've seen, we want to start loading from a folder path and ideally be able to begin loading files having modification dates past a certain point. This would mean out of thousands of potential files, only the ones with modification dates greater than the specified timestamp would be considered. This saves a ton of time automatically and reduces significant complexity managing this in code. ### Does this PR introduce _any_ user-facing change? This PR introduces an option that can be used with batch-based Spark file data sources. A documentation update was made to reflect an example and usage of the new data source option. **Example Usages** _Load all CSV files modified after date:_ `spark.read.format("csv").option("modifiedAfter","2020-06-15T05:00:00").load()` _Load all CSV files modified before date:_ `spark.read.format("csv").option("modifiedBefore","2020-06-15T05:00:00").load()` _Load all CSV files modified between two dates:_ `spark.read.format("csv").option("modifiedAfter","2019-01-15T05:00:00").option("modifiedBefore","2020-06-15T05:00:00").load() ` ### How was this patch tested? A handful of unit tests were added to support the positive, negative, and edge case code paths. It's also live in a handful of our Databricks dev environments. (quoted from cchighman) Closes #30411 from HeartSaVioR/SPARK-31962. Lead-authored-by: CC Highman Co-authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- docs/sql-data-sources-generic-options.md | 37 +++ .../sql/JavaSQLDataSourceExample.java | 16 + examples/src/main/python/sql/datasource.py | 20 ++ examples/src/main/r/RSparkSQLExample.R | 8 + .../examples/sql/SQLDataSourceExample.scala | 21 ++ python/pyspark/sql/readwriter.py | 81 ++++- .../apache/spark/sql/DataFrameReader.scala | 30 ++ .../PartitioningAwareFileIndex.scala | 13 +- .../execution/datasources/pathFilters.scala | 161 +++++++++ .../streaming/FileStreamOptions.scala | 11 + .../spark/sql/FileBasedDataSourceSuite.scala | 32 -- .../datasources/PathFilterStrategySuite.scala | 54 +++ .../datasources/PathFilterSuite.scala | 307 ++++++++++++++++++ .../sql/streaming/FileStreamSourceSuite.scala | 44 ++- 14 files changed, 787 insertions(+), 48 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala diff --git a/docs/sql-data-sources-generic-options.md b/docs/sql-data-sources-generic-options.md index 6bcf48235bced..2e4fc879a435f 100644 --- a/docs/sql-data-sources-generic-options.md +++ b/docs/sql-data-sources-generic-options.md @@ -119,3 +119,40 @@ To load all files recursively, you can use: {% include_example recursive_file_lookup r/RSparkSQLExample.R %} + +### Modification Time Path Filters + +`modifiedBefore` and `modifiedAfter` are options that can be +applied together or separately in order to achieve greater +granularity over which files may load during a Spark batch query. +(Note that Structured Streaming file sources don't support these options.) + +* `modifiedBefore`: an optional timestamp to only include files with +modification times occurring before the specified time. The provided timestamp +must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) +* `modifiedAfter`: an optional timestamp to only include files with +modification times occurring after the specified time. The provided timestamp +must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + +When a timezone option is not provided, the timestamps will be interpreted according +to the Spark session timezone (`spark.sql.session.timeZone`). + +To load files with paths matching a given modified time range, you can use: + +
    +
    +{% include_example load_with_modified_time_filter scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %} +
    + +
    +{% include_example load_with_modified_time_filter java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %} +
    + +
    +{% include_example load_with_modified_time_filter python/sql/datasource.py %} +
    + +
    +{% include_example load_with_modified_time_filter r/RSparkSQLExample.R %} +
    +
    \ No newline at end of file diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index 2295225387a33..46e740d78bffb 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -147,6 +147,22 @@ private static void runGenericFileSourceOptionsExample(SparkSession spark) { // |file1.parquet| // +-------------+ // $example off:load_with_path_glob_filter$ + // $example on:load_with_modified_time_filter$ + Dataset beforeFilterDF = spark.read().format("parquet") + // Only load files modified before 7/1/2020 at 05:30 + .option("modifiedBefore", "2020-07-01T05:30:00") + // Only load files modified after 6/1/2020 at 05:30 + .option("modifiedAfter", "2020-06-01T05:30:00") + // Interpret both times above relative to CST timezone + .option("timeZone", "CST") + .load("examples/src/main/resources/dir1"); + beforeFilterDF.show(); + // +-------------+ + // | file| + // +-------------+ + // |file1.parquet| + // +-------------+ + // $example off:load_with_modified_time_filter$ } private static void runBasicDataSourceExample(SparkSession spark) { diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py index eecd8c2d84788..8c146ba0c9455 100644 --- a/examples/src/main/python/sql/datasource.py +++ b/examples/src/main/python/sql/datasource.py @@ -67,6 +67,26 @@ def generic_file_source_options_example(spark): # +-------------+ # $example off:load_with_path_glob_filter$ + # $example on:load_with_modified_time_filter$ + # Only load files modified before 07/1/2050 @ 08:30:00 + df = spark.read.load("examples/src/main/resources/dir1", + format="parquet", modifiedBefore="2050-07-01T08:30:00") + df.show() + # +-------------+ + # | file| + # +-------------+ + # |file1.parquet| + # +-------------+ + # Only load files modified after 06/01/2050 @ 08:30:00 + df = spark.read.load("examples/src/main/resources/dir1", + format="parquet", modifiedAfter="2050-06-01T08:30:00") + df.show() + # +-------------+ + # | file| + # +-------------+ + # +-------------+ + # $example off:load_with_modified_time_filter$ + def basic_datasource_example(spark): # $example on:generic_load_save_functions$ diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R index 8685cfb5c05f2..86ad5334248bc 100644 --- a/examples/src/main/r/RSparkSQLExample.R +++ b/examples/src/main/r/RSparkSQLExample.R @@ -144,6 +144,14 @@ df <- read.df("examples/src/main/resources/dir1", "parquet", pathGlobFilter = "* # 1 file1.parquet # $example off:load_with_path_glob_filter$ +# $example on:load_with_modified_time_filter$ +beforeDF <- read.df("examples/src/main/resources/dir1", "parquet", modifiedBefore= "2020-07-01T05:30:00") +# file +# 1 file1.parquet +afterDF <- read.df("examples/src/main/resources/dir1", "parquet", modifiedAfter = "2020-06-01T05:30:00") +# file +# $example off:load_with_modified_time_filter$ + # $example on:manual_save_options_orc$ df <- read.df("examples/src/main/resources/users.orc", "orc") write.orc(df, "users_with_options.orc", orc.bloom.filter.columns = "favorite_color", orc.dictionary.key.threshold = 1.0, orc.column.encoding.direct = "name") diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala index 2c7abfcd335d1..90c0eeb5ba888 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala @@ -81,6 +81,27 @@ object SQLDataSourceExample { // |file1.parquet| // +-------------+ // $example off:load_with_path_glob_filter$ + // $example on:load_with_modified_time_filter$ + val beforeFilterDF = spark.read.format("parquet") + // Files modified before 07/01/2020 at 05:30 are allowed + .option("modifiedBefore", "2020-07-01T05:30:00") + .load("examples/src/main/resources/dir1"); + beforeFilterDF.show(); + // +-------------+ + // | file| + // +-------------+ + // |file1.parquet| + // +-------------+ + val afterFilterDF = spark.read.format("parquet") + // Files modified after 06/01/2020 at 05:30 are allowed + .option("modifiedAfter", "2020-06-01T05:30:00") + .load("examples/src/main/resources/dir1"); + afterFilterDF.show(); + // +-------------+ + // | file| + // +-------------+ + // +-------------+ + // $example off:load_with_modified_time_filter$ } private def runBasicDataSourceExample(spark: SparkSession): Unit = { diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 2ed991c87f506..bb31e6a3e09f8 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -125,6 +125,12 @@ def option(self, key, value): * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. It does not change the behavior of partition discovery. + * ``modifiedBefore``: an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + * ``modifiedAfter``: an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) """ self._jreader = self._jreader.option(key, to_str(value)) return self @@ -149,6 +155,12 @@ def options(self, **options): * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. It does not change the behavior of partition discovery. + * ``modifiedBefore``: an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + * ``modifiedAfter``: an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) """ for k in options: self._jreader = self._jreader.option(k, to_str(options[k])) @@ -203,7 +215,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None, multiLine=None, allowUnquotedControlChars=None, lineSep=None, samplingRatio=None, dropFieldIfAllNull=None, encoding=None, locale=None, pathGlobFilter=None, - recursiveFileLookup=None, allowNonNumericNumbers=None): + recursiveFileLookup=None, allowNonNumericNumbers=None, + modifiedBefore=None, modifiedAfter=None): """ Loads JSON files and returns the results as a :class:`DataFrame`. @@ -322,6 +335,13 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, ``+Infinity`` and ``Infinity``. * ``-INF``: for negative infinity, alias ``-Infinity``. * ``NaN``: for other not-a-numbers, like result of division by zero. + modifiedBefore : an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedAfter : an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + Examples -------- @@ -344,6 +364,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, samplingRatio=samplingRatio, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding, locale=locale, pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup, + modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter, allowNonNumericNumbers=allowNonNumericNumbers) if isinstance(path, str): path = [path] @@ -410,6 +431,15 @@ def parquet(self, *paths, **options): disables `partition discovery `_. # noqa + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedBefore (batch only) : an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedAfter (batch only) : an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + Examples -------- >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned') @@ -418,13 +448,18 @@ def parquet(self, *paths, **options): """ mergeSchema = options.get('mergeSchema', None) pathGlobFilter = options.get('pathGlobFilter', None) + modifiedBefore = options.get('modifiedBefore', None) + modifiedAfter = options.get('modifiedAfter', None) recursiveFileLookup = options.get('recursiveFileLookup', None) self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, - recursiveFileLookup=recursiveFileLookup) + recursiveFileLookup=recursiveFileLookup, modifiedBefore=modifiedBefore, + modifiedAfter=modifiedAfter) + return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths))) def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, - recursiveFileLookup=None): + recursiveFileLookup=None, modifiedBefore=None, + modifiedAfter=None): """ Loads text files and returns a :class:`DataFrame` whose schema starts with a string column named "value", and followed by partitioned columns if there @@ -453,6 +488,15 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, recursively scan a directory for files. Using this option disables `partition discovery `_. # noqa + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedBefore (batch only) : an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedAfter (batch only) : an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + Examples -------- >>> df = spark.read.text('python/test_support/sql/text-test.txt') @@ -464,7 +508,9 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, """ self._set_opts( wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter, - recursiveFileLookup=recursiveFileLookup) + recursiveFileLookup=recursiveFileLookup, modifiedBefore=modifiedBefore, + modifiedAfter=modifiedAfter) + if isinstance(paths, str): paths = [paths] return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths))) @@ -476,7 +522,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None, columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None, samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None, - pathGlobFilter=None, recursiveFileLookup=None): + pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None): r"""Loads a CSV file and returns the result as a :class:`DataFrame`. This function will go through the input once to determine the input schema if @@ -631,6 +677,15 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non recursively scan a directory for files. Using this option disables `partition discovery `_. # noqa + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedBefore (batch only) : an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedAfter (batch only) : an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + Examples -------- >>> df = spark.read.csv('python/test_support/sql/ages.csv') @@ -652,7 +707,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine, charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio, enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep, - pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) + pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup, + modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter) if isinstance(path, str): path = [path] if type(path) == list: @@ -679,7 +735,8 @@ def func(iterator): else: raise TypeError("path can be only string, list or RDD") - def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): + def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None, + modifiedBefore=None, modifiedAfter=None): """Loads ORC files, returning the result as a :class:`DataFrame`. .. versionadded:: 1.5.0 @@ -701,6 +758,15 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N disables `partition discovery `_. # noqa + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedBefore : an optional timestamp to only include files with + modification times occurring before the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + modifiedAfter : an optional timestamp to only include files with + modification times occurring after the specified time. The provided timestamp + must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + Examples -------- >>> df = spark.read.orc('python/test_support/sql/orc_partitioned') @@ -708,6 +774,7 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N [('a', 'bigint'), ('b', 'int'), ('c', 'int')] """ self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, + modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter, recursiveFileLookup=recursiveFileLookup) if isinstance(path, str): path = [path] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 276d5d29bfa2c..b26bc6441b6cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -493,6 +493,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  6. `pathGlobFilter`: an optional glob pattern to only include files with paths matching * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. * It does not change the behavior of partition discovery.
  7. + *
  8. `modifiedBefore` (batch only): an optional timestamp to only include files with + * modification times occurring before the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  9. + *
  10. `modifiedAfter` (batch only): an optional timestamp to only include files with + * modification times occurring after the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  11. *
  12. `recursiveFileLookup`: recursively scan a directory for files. Using this option * disables partition discovery
  13. *
  14. `allowNonNumericNumbers` (default `true`): allows JSON parser to recognize set of @@ -750,6 +756,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  15. `pathGlobFilter`: an optional glob pattern to only include files with paths matching * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. * It does not change the behavior of partition discovery.
  16. + *
  17. `modifiedBefore` (batch only): an optional timestamp to only include files with + * modification times occurring before the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  18. + *
  19. `modifiedAfter` (batch only): an optional timestamp to only include files with + * modification times occurring after the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  20. *
  21. `recursiveFileLookup`: recursively scan a directory for files. Using this option * disables partition discovery
  22. * @@ -781,6 +793,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  23. `pathGlobFilter`: an optional glob pattern to only include files with paths matching * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. * It does not change the behavior of partition discovery.
  24. + *
  25. `modifiedBefore` (batch only): an optional timestamp to only include files with + * modification times occurring before the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  26. + *
  27. `modifiedAfter` (batch only): an optional timestamp to only include files with + * modification times occurring after the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  28. *
  29. `recursiveFileLookup`: recursively scan a directory for files. Using this option * disables partition discovery
  30. * @@ -814,6 +832,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  31. `pathGlobFilter`: an optional glob pattern to only include files with paths matching * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. * It does not change the behavior of partition discovery.
  32. + *
  33. `modifiedBefore` (batch only): an optional timestamp to only include files with + * modification times occurring before the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  34. + *
  35. `modifiedAfter` (batch only): an optional timestamp to only include files with + * modification times occurring after the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  36. *
  37. `recursiveFileLookup`: recursively scan a directory for files. Using this option * disables partition discovery
  38. * @@ -880,6 +904,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  39. `pathGlobFilter`: an optional glob pattern to only include files with paths matching * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. * It does not change the behavior of partition discovery.
  40. + *
  41. `modifiedBefore` (batch only): an optional timestamp to only include files with + * modification times occurring before the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  42. + *
  43. `modifiedAfter` (batch only): an optional timestamp to only include files with + * modification times occurring after the specified Time. The provided timestamp + * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
  44. *
  45. `recursiveFileLookup`: recursively scan a directory for files. Using this option * disables partition discovery
  46. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala index fed9614347f6a..5b0d0606da093 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala @@ -57,13 +57,10 @@ abstract class PartitioningAwareFileIndex( protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]] private val caseInsensitiveMap = CaseInsensitiveMap(parameters) + private val pathFilters = PathFilterFactory.create(caseInsensitiveMap) - protected lazy val pathGlobFilter: Option[GlobFilter] = - caseInsensitiveMap.get("pathGlobFilter").map(new GlobFilter(_)) - - protected def matchGlobPattern(file: FileStatus): Boolean = { - pathGlobFilter.forall(_.accept(file.getPath)) - } + protected def matchPathPattern(file: FileStatus): Boolean = + pathFilters.forall(_.accept(file)) protected lazy val recursiveFileLookup: Boolean = { caseInsensitiveMap.getOrElse("recursiveFileLookup", "false").toBoolean @@ -86,7 +83,7 @@ abstract class PartitioningAwareFileIndex( val files: Seq[FileStatus] = leafDirToChildrenFiles.get(path) match { case Some(existingDir) => // Directory has children files in it, return them - existingDir.filter(f => matchGlobPattern(f) && isNonEmptyFile(f)) + existingDir.filter(f => matchPathPattern(f) && isNonEmptyFile(f)) case None => // Directory does not exist, or has no children files @@ -135,7 +132,7 @@ abstract class PartitioningAwareFileIndex( } else { leafFiles.values.toSeq } - files.filter(matchGlobPattern) + files.filter(matchPathPattern) } protected def inferPartitioning(): PartitionSpec = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala new file mode 100644 index 0000000000000..c8f23988f93c6 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import java.util.{Locale, TimeZone} + +import org.apache.hadoop.fs.{FileStatus, GlobFilter} + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.unsafe.types.UTF8String + +trait PathFilterStrategy extends Serializable { + def accept(fileStatus: FileStatus): Boolean +} + +trait StrategyBuilder { + def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] +} + +class PathGlobFilter(filePatten: String) extends PathFilterStrategy { + + private val globFilter = new GlobFilter(filePatten) + + override def accept(fileStatus: FileStatus): Boolean = + globFilter.accept(fileStatus.getPath) +} + +object PathGlobFilter extends StrategyBuilder { + val PARAM_NAME = "pathglobfilter" + + override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = { + parameters.get(PARAM_NAME).map(new PathGlobFilter(_)) + } +} + +/** + * Provide modifiedAfter and modifiedBefore options when + * filtering from a batch-based file data source. + * + * Example Usages + * Load all CSV files modified after date: + * {{{ + * spark.read.format("csv").option("modifiedAfter","2020-06-15T05:00:00").load() + * }}} + * + * Load all CSV files modified before date: + * {{{ + * spark.read.format("csv").option("modifiedBefore","2020-06-15T05:00:00").load() + * }}} + * + * Load all CSV files modified between two dates: + * {{{ + * spark.read.format("csv").option("modifiedAfter","2019-01-15T05:00:00") + * .option("modifiedBefore","2020-06-15T05:00:00").load() + * }}} + */ +abstract class ModifiedDateFilter extends PathFilterStrategy { + + def timeZoneId: String + + protected def localTime(micros: Long): Long = + DateTimeUtils.fromUTCTime(micros, timeZoneId) +} + +object ModifiedDateFilter { + + def getTimeZoneId(options: CaseInsensitiveMap[String]): String = { + options.getOrElse( + DateTimeUtils.TIMEZONE_OPTION.toLowerCase(Locale.ROOT), + SQLConf.get.sessionLocalTimeZone) + } + + def toThreshold(timeString: String, timeZoneId: String, strategy: String): Long = { + val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId) + val ts = UTF8String.fromString(timeString) + DateTimeUtils.stringToTimestamp(ts, timeZone.toZoneId).getOrElse { + throw new AnalysisException( + s"The timestamp provided for the '$strategy' option is invalid. The expected format " + + s"is 'YYYY-MM-DDTHH:mm:ss', but the provided timestamp: $timeString") + } + } +} + +/** + * Filter used to determine whether file was modified before the provided timestamp. + */ +class ModifiedBeforeFilter(thresholdTime: Long, val timeZoneId: String) + extends ModifiedDateFilter { + + override def accept(fileStatus: FileStatus): Boolean = + // We standardize on microseconds wherever possible + // getModificationTime returns in milliseconds + thresholdTime - localTime(DateTimeUtils.millisToMicros(fileStatus.getModificationTime)) > 0 +} + +object ModifiedBeforeFilter extends StrategyBuilder { + import ModifiedDateFilter._ + + val PARAM_NAME = "modifiedbefore" + + override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = { + parameters.get(PARAM_NAME).map { value => + val timeZoneId = getTimeZoneId(parameters) + val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME) + new ModifiedBeforeFilter(thresholdTime, timeZoneId) + } + } +} + +/** + * Filter used to determine whether file was modified after the provided timestamp. + */ +class ModifiedAfterFilter(thresholdTime: Long, val timeZoneId: String) + extends ModifiedDateFilter { + + override def accept(fileStatus: FileStatus): Boolean = + // getModificationTime returns in milliseconds + // We standardize on microseconds wherever possible + localTime(DateTimeUtils.millisToMicros(fileStatus.getModificationTime)) - thresholdTime > 0 +} + +object ModifiedAfterFilter extends StrategyBuilder { + import ModifiedDateFilter._ + + val PARAM_NAME = "modifiedafter" + + override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = { + parameters.get(PARAM_NAME).map { value => + val timeZoneId = getTimeZoneId(parameters) + val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME) + new ModifiedAfterFilter(thresholdTime, timeZoneId) + } + } +} + +object PathFilterFactory { + + private val strategies = + Seq(PathGlobFilter, ModifiedBeforeFilter, ModifiedAfterFilter) + + def create(parameters: CaseInsensitiveMap[String]): Seq[PathFilterStrategy] = { + strategies.flatMap { _.create(parameters) } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala index 712ed1585bc8a..6f43542fd6595 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala @@ -23,6 +23,7 @@ import scala.util.Try import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.execution.datasources.{ModifiedAfterFilter, ModifiedBeforeFilter} import org.apache.spark.util.Utils /** @@ -32,6 +33,16 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters)) + checkDisallowedOptions(parameters) + + private def checkDisallowedOptions(options: Map[String, String]): Unit = { + Seq(ModifiedBeforeFilter.PARAM_NAME, ModifiedAfterFilter.PARAM_NAME).foreach { param => + if (parameters.contains(param)) { + throw new IllegalArgumentException(s"option '$param' is not allowed in file stream sources") + } + } + } + val maxFilesPerTrigger: Option[Int] = parameters.get("maxFilesPerTrigger").map { str => Try(str.toInt).toOption.filter(_ > 0).getOrElse { throw new IllegalArgumentException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index b27c1145181bd..876f62803dc7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -577,38 +577,6 @@ class FileBasedDataSourceSuite extends QueryTest } } - test("Option pathGlobFilter: filter files correctly") { - withTempPath { path => - val dataDir = path.getCanonicalPath - Seq("foo").toDS().write.text(dataDir) - Seq("bar").toDS().write.mode("append").orc(dataDir) - val df = spark.read.option("pathGlobFilter", "*.txt").text(dataDir) - checkAnswer(df, Row("foo")) - - // Both glob pattern in option and path should be effective to filter files. - val df2 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*.orc") - checkAnswer(df2, Seq.empty) - - val df3 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*xt") - checkAnswer(df3, Row("foo")) - } - } - - test("Option pathGlobFilter: simple extension filtering should contains partition info") { - withTempPath { path => - val input = Seq(("foo", 1), ("oof", 2)).toDF("a", "b") - input.write.partitionBy("b").text(path.getCanonicalPath) - Seq("bar").toDS().write.mode("append").orc(path.getCanonicalPath + "/b=1") - - // If we use glob pattern in the path, the partition column won't be shown in the result. - val df = spark.read.text(path.getCanonicalPath + "/*/*.txt") - checkAnswer(df, input.select("a")) - - val df2 = spark.read.option("pathGlobFilter", "*.txt").text(path.getCanonicalPath) - checkAnswer(df2, input) - } - } - test("Option recursiveFileLookup: recursive loading correctly") { val expectedFileList = mutable.ListBuffer[String]() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala new file mode 100644 index 0000000000000..b965a78c9eec0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.test.SharedSparkSession + +class PathFilterStrategySuite extends QueryTest with SharedSparkSession { + + test("SPARK-31962: PathFilterStrategies - modifiedAfter option") { + val options = + CaseInsensitiveMap[String](Map("modifiedAfter" -> "2010-10-01T01:01:00")) + val strategy = PathFilterFactory.create(options) + assert(strategy.head.isInstanceOf[ModifiedAfterFilter]) + assert(strategy.size == 1) + } + + test("SPARK-31962: PathFilterStrategies - modifiedBefore option") { + val options = + CaseInsensitiveMap[String](Map("modifiedBefore" -> "2020-10-01T01:01:00")) + val strategy = PathFilterFactory.create(options) + assert(strategy.head.isInstanceOf[ModifiedBeforeFilter]) + assert(strategy.size == 1) + } + + test("SPARK-31962: PathFilterStrategies - pathGlobFilter option") { + val options = CaseInsensitiveMap[String](Map("pathGlobFilter" -> "*.txt")) + val strategy = PathFilterFactory.create(options) + assert(strategy.head.isInstanceOf[PathGlobFilter]) + assert(strategy.size == 1) + } + + test("SPARK-31962: PathFilterStrategies - no options") { + val options = CaseInsensitiveMap[String](Map.empty) + val strategy = PathFilterFactory.create(options) + assert(strategy.isEmpty) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala new file mode 100644 index 0000000000000..1af2adfd8640c --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import java.io.File +import java.time.{LocalDateTime, ZoneId, ZoneOffset} +import java.time.format.DateTimeFormatter + +import scala.util.Random + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.util.{stringToFile, DateTimeUtils} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{StringType, StructField, StructType} + +class PathFilterSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + test("SPARK-31962: modifiedBefore specified" + + " and sharing same timestamp with file last modified time.") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formatTime(curTime))) + } + } + + test("SPARK-31962: modifiedAfter specified" + + " and sharing same timestamp with file last modified time.") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + executeTest(dir, Seq(curTime), 0, modifiedAfter = Some(formatTime(curTime))) + } + } + + test("SPARK-31962: modifiedBefore and modifiedAfter option" + + " share same timestamp with file last modified time.") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val formattedTime = formatTime(curTime) + executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime), + modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore and modifiedAfter option" + + " share same timestamp with earlier file last modified time.") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val fileTime = curTime.minusDays(3) + val formattedTime = formatTime(curTime) + executeTest(dir, Seq(fileTime), 0, modifiedBefore = Some(formattedTime), + modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore and modifiedAfter option" + + " share same timestamp with later file last modified time.") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val formattedTime = formatTime(curTime) + executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime), + modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: when modifiedAfter specified with a past date") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val pastTime = curTime.minusYears(1) + val formattedTime = formatTime(pastTime) + executeTest(dir, Seq(curTime), 1, modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: when modifiedBefore specified with a future date") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val futureTime = curTime.plusYears(1) + val formattedTime = formatTime(futureTime) + executeTest(dir, Seq(curTime), 1, modifiedBefore = Some(formattedTime)) + } + } + + test("SPARK-31962: with modifiedBefore option provided using a past date") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val pastTime = curTime.minusYears(1) + val formattedTime = formatTime(pastTime) + executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedAfter specified with a past date, multiple files, one valid") { + withTempDir { dir => + val fileTime1 = LocalDateTime.now(ZoneOffset.UTC) + val fileTime2 = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC) + val pastTime = fileTime1.minusYears(1) + val formattedTime = formatTime(pastTime) + executeTest(dir, Seq(fileTime1, fileTime2), 1, modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedAfter specified with a past date, multiple files, both valid") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val pastTime = curTime.minusYears(1) + val formattedTime = formatTime(pastTime) + executeTest(dir, Seq(curTime, curTime), 2, modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedAfter specified with a past date, multiple files, none valid") { + withTempDir { dir => + val fileTime = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC) + val pastTime = LocalDateTime.now(ZoneOffset.UTC).minusYears(1) + val formattedTime = formatTime(pastTime) + executeTest(dir, Seq(fileTime, fileTime), 0, modifiedAfter = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore specified with a future date, multiple files, both valid") { + withTempDir { dir => + val fileTime = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC) + val futureTime = LocalDateTime.now(ZoneOffset.UTC).plusYears(1) + val formattedTime = formatTime(futureTime) + executeTest(dir, Seq(fileTime, fileTime), 2, modifiedBefore = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore specified with a future date, multiple files, one valid") { + withTempDir { dir => + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val fileTime1 = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC) + val fileTime2 = curTime.plusDays(3) + val formattedTime = formatTime(curTime) + executeTest(dir, Seq(fileTime1, fileTime2), 1, modifiedBefore = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore specified with a future date, multiple files, none valid") { + withTempDir { dir => + val fileTime = LocalDateTime.now(ZoneOffset.UTC).minusDays(1) + val formattedTime = formatTime(fileTime) + executeTest(dir, Seq(fileTime, fileTime), 0, modifiedBefore = Some(formattedTime)) + } + } + + test("SPARK-31962: modifiedBefore/modifiedAfter is specified with an invalid date") { + executeTestWithBadOption( + Map("modifiedBefore" -> "2024-05+1 01:00:00"), + Seq("The timestamp provided", "modifiedbefore", "2024-05+1 01:00:00")) + + executeTestWithBadOption( + Map("modifiedAfter" -> "2024-05+1 01:00:00"), + Seq("The timestamp provided", "modifiedafter", "2024-05+1 01:00:00")) + } + + test("SPARK-31962: modifiedBefore/modifiedAfter - empty option") { + executeTestWithBadOption( + Map("modifiedBefore" -> ""), + Seq("The timestamp provided", "modifiedbefore")) + + executeTestWithBadOption( + Map("modifiedAfter" -> ""), + Seq("The timestamp provided", "modifiedafter")) + } + + test("SPARK-31962: modifiedBefore/modifiedAfter filter takes into account local timezone " + + "when specified as an option.") { + Seq("modifiedbefore", "modifiedafter").foreach { filterName => + // CET = UTC + 1 hour, HST = UTC - 10 hours + Seq("CET", "HST").foreach { tzId => + testModifiedDateFilterWithTimezone(tzId, filterName) + } + } + } + + test("Option pathGlobFilter: filter files correctly") { + withTempPath { path => + val dataDir = path.getCanonicalPath + Seq("foo").toDS().write.text(dataDir) + Seq("bar").toDS().write.mode("append").orc(dataDir) + val df = spark.read.option("pathGlobFilter", "*.txt").text(dataDir) + checkAnswer(df, Row("foo")) + + // Both glob pattern in option and path should be effective to filter files. + val df2 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*.orc") + checkAnswer(df2, Seq.empty) + + val df3 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*xt") + checkAnswer(df3, Row("foo")) + } + } + + test("Option pathGlobFilter: simple extension filtering should contains partition info") { + withTempPath { path => + val input = Seq(("foo", 1), ("oof", 2)).toDF("a", "b") + input.write.partitionBy("b").text(path.getCanonicalPath) + Seq("bar").toDS().write.mode("append").orc(path.getCanonicalPath + "/b=1") + + // If we use glob pattern in the path, the partition column won't be shown in the result. + val df = spark.read.text(path.getCanonicalPath + "/*/*.txt") + checkAnswer(df, input.select("a")) + + val df2 = spark.read.option("pathGlobFilter", "*.txt").text(path.getCanonicalPath) + checkAnswer(df2, input) + } + } + + private def executeTest( + dir: File, + fileDates: Seq[LocalDateTime], + expectedCount: Long, + modifiedBefore: Option[String] = None, + modifiedAfter: Option[String] = None): Unit = { + fileDates.foreach { fileDate => + val file = createSingleFile(dir) + setFileTime(fileDate, file) + } + + val schema = StructType(Seq(StructField("a", StringType))) + + var dfReader = spark.read.format("csv").option("timeZone", "UTC").schema(schema) + modifiedBefore.foreach { opt => dfReader = dfReader.option("modifiedBefore", opt) } + modifiedAfter.foreach { opt => dfReader = dfReader.option("modifiedAfter", opt) } + + if (expectedCount > 0) { + // without pathGlobFilter + val df1 = dfReader.load(dir.getCanonicalPath) + assert(df1.count() === expectedCount) + + // pathGlobFilter matched + val df2 = dfReader.option("pathGlobFilter", "*.csv").load(dir.getCanonicalPath) + assert(df2.count() === expectedCount) + + // pathGlobFilter mismatched + val df3 = dfReader.option("pathGlobFilter", "*.txt").load(dir.getCanonicalPath) + assert(df3.count() === 0) + } else { + val df = dfReader.load(dir.getCanonicalPath) + assert(df.count() === 0) + } + } + + private def executeTestWithBadOption( + options: Map[String, String], + expectedMsgParts: Seq[String]): Unit = { + withTempDir { dir => + createSingleFile(dir) + val exc = intercept[AnalysisException] { + var dfReader = spark.read.format("csv") + options.foreach { case (key, value) => + dfReader = dfReader.option(key, value) + } + dfReader.load(dir.getCanonicalPath) + } + expectedMsgParts.foreach { msg => assert(exc.getMessage.contains(msg)) } + } + } + + private def testModifiedDateFilterWithTimezone( + timezoneId: String, + filterParamName: String): Unit = { + val curTime = LocalDateTime.now(ZoneOffset.UTC) + val zoneId: ZoneId = DateTimeUtils.getTimeZone(timezoneId).toZoneId + val strategyTimeInMicros = + ModifiedDateFilter.toThreshold( + curTime.toString, + timezoneId, + filterParamName) + val strategyTimeInSeconds = strategyTimeInMicros / 1000 / 1000 + + val curTimeAsSeconds = curTime.atZone(zoneId).toEpochSecond + withClue(s"timezone: $timezoneId / param: $filterParamName,") { + assert(strategyTimeInSeconds === curTimeAsSeconds) + } + } + + private def createSingleFile(dir: File): File = { + val file = new File(dir, "temp" + Random.nextInt(1000000) + ".csv") + stringToFile(file, "text") + } + + private def setFileTime(time: LocalDateTime, file: File): Boolean = { + val sameTime = time.toEpochSecond(ZoneOffset.UTC) + file.setLastModified(sameTime * 1000) + } + + private def formatTime(time: LocalDateTime): String = { + time.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss")) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index cf9664a9764be..718095003b096 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.streaming import java.io.File import java.net.URI +import java.time.{LocalDateTime, ZoneOffset} +import java.time.format.DateTimeFormatter import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable @@ -40,7 +42,6 @@ import org.apache.spark.sql.execution.streaming.sources.MemorySink import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.util.StreamManualClock import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types._ import org.apache.spark.sql.types.{StructType, _} import org.apache.spark.util.Utils @@ -2054,6 +2055,47 @@ class FileStreamSourceSuite extends FileStreamSourceTest { } } + test("SPARK-31962: file stream source shouldn't allow modifiedBefore/modifiedAfter") { + def formatTime(time: LocalDateTime): String = { + time.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss")) + } + + def assertOptionIsNotSupported(options: Map[String, String], path: String): Unit = { + val schema = StructType(Seq(StructField("a", StringType))) + var dsReader = spark.readStream + .format("csv") + .option("timeZone", "UTC") + .schema(schema) + + options.foreach { case (k, v) => dsReader = dsReader.option(k, v) } + + val df = dsReader.load(path) + + testStream(df)( + ExpectFailure[IllegalArgumentException]( + t => assert(t.getMessage.contains("is not allowed in file stream source")), + isFatalError = false) + ) + } + + withTempDir { dir => + // "modifiedBefore" + val futureTime = LocalDateTime.now(ZoneOffset.UTC).plusYears(1) + val formattedFutureTime = formatTime(futureTime) + assertOptionIsNotSupported(Map("modifiedBefore" -> formattedFutureTime), dir.getCanonicalPath) + + // "modifiedAfter" + val prevTime = LocalDateTime.now(ZoneOffset.UTC).minusYears(1) + val formattedPrevTime = formatTime(prevTime) + assertOptionIsNotSupported(Map("modifiedAfter" -> formattedPrevTime), dir.getCanonicalPath) + + // both + assertOptionIsNotSupported( + Map("modifiedBefore" -> formattedFutureTime, "modifiedAfter" -> formattedPrevTime), + dir.getCanonicalPath) + } + } + private def createFile(content: String, src: File, tmp: File): File = { val tempFile = Utils.tempFileWith(new File(tmp, "text")) val finalFile = new File(src, tempFile.getName) From 6d625ccd5b5a76a149e2070df31984610629a295 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sun, 22 Nov 2020 15:36:44 -0800 Subject: [PATCH 0543/1009] [SPARK-33469][SQL] Add current_timezone function ### What changes were proposed in this pull request? Add a `CurrentTimeZone` function and replace the value at `Optimizer` side. ### Why are the changes needed? Let user get current timezone easily. Then user can call ``` SELECT current_timezone() ``` Presto: https://prestodb.io/docs/current/functions/datetime.html SQL Server: https://docs.microsoft.com/en-us/sql/t-sql/functions/current-timezone-transact-sql?view=sql-server-ver15 ### Does this PR introduce _any_ user-facing change? Yes, a new function. ### How was this patch tested? Add test. Closes #30400 from ulysses-you/SPARK-33469. Lead-authored-by: ulysses Co-authored-by: ulysses-you Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/datetimeExpressions.scala | 15 +++++++++++++++ .../sql/catalyst/optimizer/finishAnalysis.scala | 3 +++ .../optimizer/ComputeCurrentTimeSuite.scala | 16 +++++++++++++++- .../sql-functions/sql-expression-schema.md | 3 ++- .../org/apache/spark/sql/DatasetSuite.scala | 8 ++++++++ .../sql/expressions/ExpressionInfoSuite.scala | 1 + 7 files changed, 45 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 508239077a70e..6fb9bed9625d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -391,6 +391,7 @@ object FunctionRegistry { expression[AddMonths]("add_months"), expression[CurrentDate]("current_date"), expression[CurrentTimestamp]("current_timestamp"), + expression[CurrentTimeZone]("current_timezone"), expression[DateDiff]("datediff"), expression[DateAdd]("date_add"), expression[DateFormatClass]("date_format"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 97aacb3f7530c..9953b780ceace 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -73,6 +73,21 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression { } } +@ExpressionDescription( + usage = "_FUNC_() - Returns the current session local timezone.", + examples = """ + Examples: + > SELECT _FUNC_(); + Asia/Shanghai + """, + group = "datetime_funcs", + since = "3.1.0") +case class CurrentTimeZone() extends LeafExpression with Unevaluable { + override def nullable: Boolean = false + override def dataType: DataType = StringType + override def prettyName: String = "current_timezone" +} + /** * Returns the current date at the start of query evaluation. * There is no code generation since this expression should get constant folded by the optimizer. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 9aa7e3201ab1b..1f2389176d1e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -75,6 +76,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { val timeExpr = CurrentTimestamp() val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long] val currentTime = Literal.create(timestamp, timeExpr.dataType) + val timezone = Literal.create(SQLConf.get.sessionLocalTimeZone, StringType) plan transformAllExpressions { case currentDate @ CurrentDate(Some(timeZoneId)) => @@ -84,6 +86,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { DateType) }) case CurrentTimestamp() | Now() => currentTime + case CurrentTimeZone() => timezone } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala index db0399d2a73ee..82d6757407b51 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala @@ -20,11 +20,13 @@ package org.apache.spark.sql.catalyst.optimizer import java.time.ZoneId import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} +import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, CurrentTimeZone, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.unsafe.types.UTF8String class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { @@ -67,4 +69,16 @@ class ComputeCurrentTimeSuite extends PlanTest { assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } + + test("SPARK-33469: Add current_timezone function") { + val in = Project(Seq(Alias(CurrentTimeZone(), "c")()), LocalRelation()) + val plan = Optimize.execute(in.analyze).asInstanceOf[Project] + val lits = new scala.collection.mutable.ArrayBuffer[String] + plan.transformAllExpressions { case e: Literal => + lits += e.value.asInstanceOf[UTF8String].toString + e + } + assert(lits.size == 1) + assert(lits.head == SQLConf.get.sessionLocalTimeZone) + } } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index da83df4994d8d..0a54dff3a1cea 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 341 + - Number of queries: 342 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -86,6 +86,7 @@ | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct | +| org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 6a1378837ea9b..953a58760cd5c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1947,6 +1947,14 @@ class DatasetSuite extends QueryTest df.where($"zoo".contains(Array('a', 'b'))), Seq(Row("abc"))) } + + test("SPARK-33469: Add current_timezone function") { + val df = Seq(1).toDF("c") + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "Asia/Shanghai") { + val timezone = df.selectExpr("current_timezone()").collect().head.getString(0) + assert(timezone == "Asia/Shanghai") + } + } } object AssertExecutionId { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 9f62ff8301ebc..6085c1f2cccb0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -149,6 +149,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", "org.apache.spark.sql.catalyst.expressions.CurrentDate", "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp", + "org.apache.spark.sql.catalyst.expressions.CurrentTimeZone", "org.apache.spark.sql.catalyst.expressions.Now", // Random output without a seed "org.apache.spark.sql.catalyst.expressions.Rand", From df4a1c2256b71c9a1bd2006819135f56c99a2f21 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 22 Nov 2020 16:40:54 -0800 Subject: [PATCH 0544/1009] [SPARK-33512][BUILD] Upgrade test libraries ### What changes were proposed in this pull request? This PR aims to update the test libraries. - ScalaTest: 3.2.0 -> 3.2.3 - JUnit: 4.12 -> 4.13.1 - Mockito: 3.1.0 -> 3.4.6 - JMock: 2.8.4 -> 2.12.0 - maven-surefire-plugin: 3.0.0-M3 -> 3.0.0-M5 - scala-maven-plugin: 4.3.0 -> 4.4.0 ### Why are the changes needed? This will make the test frameworks up-to-date for Apache Spark 3.1.0. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30456 from dongjoon-hyun/SPARK-33512. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- pom.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index 85cf5a00b0b24..0ab5a8c5b3efa 100644 --- a/pom.xml +++ b/pom.xml @@ -931,7 +931,7 @@ org.scalatest scalatest_${scala.binary.version} - 3.2.0 + 3.2.3 test @@ -955,14 +955,14 @@ org.mockito mockito-core - 3.1.0 + 3.4.6 test org.jmock jmock-junit4 test - 2.8.4 + 2.12.0 org.scalacheck @@ -973,7 +973,7 @@ junit junit - 4.12 + 4.13.1 test @@ -2498,7 +2498,7 @@ net.alchim31.maven scala-maven-plugin - 4.3.0 + 4.4.0 eclipse-add-source @@ -2573,7 +2573,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M3 + 3.0.0-M5 From a45923852342ce3f9454743a71740b09e6efe859 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 23 Nov 2020 10:38:40 +0900 Subject: [PATCH 0545/1009] [MINOR][INFRA] Suppress warning in check-license ### What changes were proposed in this pull request? This PR aims to suppress the warning `File exists` in check-license ### Why are the changes needed? **BEFORE** ``` % dev/check-license Attempting to fetch rat RAT checks passed. % dev/check-license mkdir: target: File exists RAT checks passed. ``` **AFTER** ``` % dev/check-license Attempting to fetch rat RAT checks passed. % dev/check-license RAT checks passed. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually do dev/check-license twice. Closes #30460 from williamhyun/checklicense. Authored-by: William Hyun Signed-off-by: HyukjinKwon --- dev/check-license | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/check-license b/dev/check-license index 0cc17ffe55c67..bd255954d6db4 100755 --- a/dev/check-license +++ b/dev/check-license @@ -67,7 +67,7 @@ mkdir -p "$FWDIR"/lib exit 1 } -mkdir target +mkdir -p target $java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > target/rat-results.txt if [ $? -ne 0 ]; then From aa78c05edc9cb910cca9fb14f7670559fe00c62d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 23 Nov 2020 10:42:28 +0900 Subject: [PATCH 0546/1009] [SPARK-33427][SQL][FOLLOWUP] Put key and value into IdentityHashMap sequantially ### What changes were proposed in this pull request? This follow-up fixes an issue when inserting key/value pairs into `IdentityHashMap` in `SubExprEvaluationRuntime`. ### Why are the changes needed? The last commits to #30341 follows review comment to use `IdentityHashMap`. Because we leverage `IdentityHashMap` to compare keys in reference, we should not convert expression pairs to Scala map before inserting. Scala map compares keys by equality so we will loss keys with different references. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Run benchmark to verify. Closes #30459 from viirya/SPARK-33427-map. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- .../SubExprEvaluationRuntime.scala | 9 +++++--- .../SubExprEvaluationRuntimeSuite.scala | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala index 3189d81289903..ff9c4cf3147d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala @@ -18,8 +18,6 @@ package org.apache.spark.sql.catalyst.expressions import java.util.IdentityHashMap -import scala.collection.JavaConverters._ - import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException} @@ -98,7 +96,12 @@ class SubExprEvaluationRuntime(cacheMaxEntries: Int) { val proxy = ExpressionProxy(expr, proxyExpressionCurrentId, this) proxyExpressionCurrentId += 1 - proxyMap.putAll(e.map(_ -> proxy).toMap.asJava) + // We leverage `IdentityHashMap` so we compare expression keys by reference here. + // So for example if there are one group of common exprs like Seq(common expr 1, + // common expr2, ..., common expr n), we will insert into `proxyMap` some key/value + // pairs like Map(common expr 1 -> proxy(common expr 1), ..., + // common expr n -> proxy(common expr 1)). + e.map(proxyMap.put(_, proxy)) } // Only adding proxy if we find subexpressions. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala index 64b619ca7766b..f8dca266a62d4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala @@ -95,4 +95,26 @@ class SubExprEvaluationRuntimeSuite extends SparkFunSuite { }) assert(proxys.isEmpty) } + + test("SubExprEvaluationRuntime should wrap semantically equal exprs") { + val runtime = new SubExprEvaluationRuntime(1) + + val one = Literal(1) + val two = Literal(2) + def mul: (Literal, Literal) => Expression = + (left: Literal, right: Literal) => Multiply(left, right) + + val mul2_1 = Multiply(mul(one, two), mul(one, two)) + val mul2_2 = Multiply(mul(one, two), mul(one, two)) + + val sqrt = Sqrt(mul2_1) + val sum = Add(mul2_2, sqrt) + val proxyExpressions = runtime.proxyExpressions(Seq(sum)) + val proxys = proxyExpressions.flatMap(_.collect { + case p: ExpressionProxy => p + }) + // ( (one * two) * (one * two) ) + assert(proxys.size == 2) + assert(proxys.forall(_.child.semanticEquals(mul2_1))) + } } From 0bb911d979955ac59adc39818667b616eb539103 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Mon, 23 Nov 2020 15:19:34 +0900 Subject: [PATCH 0547/1009] [SPARK-33143][PYTHON] Add configurable timeout to python server and client ### What changes were proposed in this pull request? Spark creates local server to serialize several type of data for python. The python code tries to connect to the server, immediately after it's created but there are several system calls in between (this may change in each Spark version): * getaddrinfo * socket * settimeout * connect Under some circumstances in heavy user environments these calls can be super slow (more than 15 seconds). These issues must be analyzed one-by-one but since these are system calls the underlying OS and/or DNS servers must be debugged and fixed. This is not trivial task and at the same time data processing must work somehow. In this PR I'm only intended to add a configuration possibility to increase the mentioned timeouts in order to be able to provide temporary workaround. The rootcause analysis is ongoing but I think this can vary in each case. Because the server part doesn't contain huge amount of log entries to with one can measure time, I've added some. ### Why are the changes needed? Provide workaround when localhost python server connection timeout appears. ### Does this PR introduce _any_ user-facing change? Yes, new configuration added. ### How was this patch tested? Existing unit tests + manual test. ``` #Compile Spark echo "spark.io.encryption.enabled true" >> conf/spark-defaults.conf echo "spark.python.authenticate.socketTimeout 10" >> conf/spark-defaults.conf $ ./bin/pyspark Python 3.8.5 (default, Jul 21 2020, 10:48:26) [Clang 11.0.3 (clang-1103.0.32.62)] on darwin Type "help", "copyright", "credits" or "license" for more information. 20/11/20 10:17:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 20/11/20 10:17:03 WARN SparkEnv: I/O encryption enabled without RPC encryption: keys will be visible on the wire. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /__ / .__/\_,_/_/ /_/\_\ version 3.1.0-SNAPSHOT /_/ Using Python version 3.8.5 (default, Jul 21 2020 10:48:26) Spark context Web UI available at http://192.168.0.189:4040 Spark context available as 'sc' (master = local[*], app id = local-1605863824276). SparkSession available as 'spark'. >>> sc.setLogLevel("TRACE") >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect() 20/11/20 10:17:09 TRACE PythonParallelizeServer: Creating listening socket 20/11/20 10:17:09 TRACE PythonParallelizeServer: Setting timeout to 10 sec 20/11/20 10:17:09 TRACE PythonParallelizeServer: Waiting for connection on port 59726 20/11/20 10:17:09 TRACE PythonParallelizeServer: Connection accepted from address /127.0.0.1:59727 20/11/20 10:17:09 TRACE PythonParallelizeServer: Client authenticated 20/11/20 10:17:09 TRACE PythonParallelizeServer: Closing server ... 20/11/20 10:17:10 TRACE SocketFuncServer: Creating listening socket 20/11/20 10:17:10 TRACE SocketFuncServer: Setting timeout to 10 sec 20/11/20 10:17:10 TRACE SocketFuncServer: Waiting for connection on port 59735 20/11/20 10:17:10 TRACE SocketFuncServer: Connection accepted from address /127.0.0.1:59736 20/11/20 10:17:10 TRACE SocketFuncServer: Client authenticated 20/11/20 10:17:10 TRACE SocketFuncServer: Closing server [[0], [2], [3], [4], [6]] >>> ``` Closes #30389 from gaborgsomogyi/SPARK-33143. Lead-authored-by: Gabor Somogyi Co-authored-by: Hyukjin Kwon Co-authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../apache/spark/api/python/PythonRunner.scala | 2 ++ .../apache/spark/api/python/PythonUtils.scala | 4 ++++ .../apache/spark/internal/config/Python.scala | 6 ++++++ .../spark/security/SocketAuthHelper.scala | 2 +- .../spark/security/SocketAuthServer.scala | 17 +++++++++++++---- python/pyspark/context.py | 2 ++ python/pyspark/java_gateway.py | 2 +- 7 files changed, 29 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala index 136da80d48dee..f49cb3c2b8836 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala @@ -80,6 +80,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT]( private val conf = SparkEnv.get.conf protected val bufferSize: Int = conf.get(BUFFER_SIZE) + protected val authSocketTimeout = conf.get(PYTHON_AUTH_SOCKET_TIMEOUT) private val reuseWorker = conf.get(PYTHON_WORKER_REUSE) protected val simplifiedTraceback: Boolean = false @@ -139,6 +140,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT]( if (workerMemoryMb.isDefined) { envVars.put("PYSPARK_EXECUTOR_MEMORY_MB", workerMemoryMb.get.toString) } + envVars.put("SPARK_AUTH_SOCKET_TIMEOUT", authSocketTimeout.toString) envVars.put("SPARK_BUFFER_SIZE", bufferSize.toString) val worker: Socket = env.createPythonWorker(pythonExec, envVars.asScala.toMap) // Whether is the worker released into idle pool or closed. When any codes try to release or diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 527d0d6d3a48d..33849f6fcb65f 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -85,4 +85,8 @@ private[spark] object PythonUtils { def getBroadcastThreshold(sc: JavaSparkContext): Long = { sc.conf.get(org.apache.spark.internal.config.BROADCAST_FOR_UDF_COMPRESSION_THRESHOLD) } + + def getPythonAuthSocketTimeout(sc: JavaSparkContext): Long = { + sc.conf.get(org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT) + } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/Python.scala b/core/src/main/scala/org/apache/spark/internal/config/Python.scala index 188d884319644..348a33e129d65 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/Python.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/Python.scala @@ -50,4 +50,10 @@ private[spark] object Python { .version("2.4.0") .bytesConf(ByteUnit.MiB) .createOptional + + val PYTHON_AUTH_SOCKET_TIMEOUT = ConfigBuilder("spark.python.authenticate.socketTimeout") + .internal() + .version("3.1.0") + .timeConf(TimeUnit.SECONDS) + .createWithDefaultString("15s") } diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala index dbcb376905338..f800553c5388b 100644 --- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala +++ b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala @@ -34,7 +34,7 @@ import org.apache.spark.util.Utils * * There's no secrecy, so this relies on the sockets being either local or somehow encrypted. */ -private[spark] class SocketAuthHelper(conf: SparkConf) { +private[spark] class SocketAuthHelper(val conf: SparkConf) { val secret = Utils.createSecret(conf) diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala index 548fd1b07ddc5..35990b5a59281 100644 --- a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala +++ b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala @@ -25,6 +25,8 @@ import scala.concurrent.duration.Duration import scala.util.Try import org.apache.spark.SparkEnv +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT import org.apache.spark.network.util.JavaUtils import org.apache.spark.util.{ThreadUtils, Utils} @@ -34,11 +36,11 @@ import org.apache.spark.util.{ThreadUtils, Utils} * handling one batch of data, with authentication and error handling. * * The socket server can only accept one connection, or close if no connection - * in 15 seconds. + * in configurable amount of seconds (default 15). */ private[spark] abstract class SocketAuthServer[T]( authHelper: SocketAuthHelper, - threadName: String) { + threadName: String) extends Logging { def this(env: SparkEnv, threadName: String) = this(new SocketAuthHelper(env.conf), threadName) def this(threadName: String) = this(SparkEnv.get, threadName) @@ -46,19 +48,26 @@ private[spark] abstract class SocketAuthServer[T]( private val promise = Promise[T]() private def startServer(): (Int, String) = { + logTrace("Creating listening socket") val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1))) - // Close the socket if no connection in 15 seconds - serverSocket.setSoTimeout(15000) + // Close the socket if no connection in the configured seconds + val timeout = authHelper.conf.get(PYTHON_AUTH_SOCKET_TIMEOUT).toInt + logTrace(s"Setting timeout to $timeout sec") + serverSocket.setSoTimeout(timeout * 1000) new Thread(threadName) { setDaemon(true) override def run(): Unit = { var sock: Socket = null try { + logTrace(s"Waiting for connection on port ${serverSocket.getLocalPort}") sock = serverSocket.accept() + logTrace(s"Connection accepted from address ${sock.getRemoteSocketAddress}") authHelper.authClient(sock) + logTrace("Client authenticated") promise.complete(Try(handleConnection(sock))) } finally { + logTrace("Closing server") JavaUtils.closeQuietly(serverSocket) JavaUtils.closeQuietly(sock) } diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 9c9e3f4b3c881..1bd5961e0525a 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -222,6 +222,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, # data via a socket. # scala's mangled names w/ $ in them require special treatment. self._encryption_enabled = self._jvm.PythonUtils.isEncryptionEnabled(self._jsc) + os.environ["SPARK_AUTH_SOCKET_TIMEOUT"] = \ + str(self._jvm.PythonUtils.getPythonAuthSocketTimeout(self._jsc)) self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python') self.pythonVer = "%d.%d" % sys.version_info[:2] diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index eafa5d90f9ff8..fe2e326dff8be 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -201,7 +201,7 @@ def local_connect_and_auth(port, auth_secret): af, socktype, proto, _, sa = res try: sock = socket.socket(af, socktype, proto) - sock.settimeout(15) + sock.settimeout(int(os.environ.get("SPARK_AUTH_SOCKET_TIMEOUT", 15))) sock.connect(sa) sockfile = sock.makefile("rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536))) _do_server_auth(sockfile, auth_secret) From 84e70362dbf2bbebc7f1a1b734b99952d7e95e4d Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sun, 22 Nov 2020 22:56:59 -0800 Subject: [PATCH 0548/1009] [SPARK-33510][BUILD] Update SBT to 1.4.4 ### What changes were proposed in this pull request? This PR aims to update SBT from 1.4.2 to 1.4.4. ### Why are the changes needed? This will bring the latest bug fixes. - https://github.com/sbt/sbt/releases/tag/v1.4.3 - https://github.com/sbt/sbt/releases/tag/v1.4.4 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30453 from williamhyun/sbt143. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- dev/mima | 4 ++-- project/build.properties | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/mima b/dev/mima index f324c5c00a45c..d214bb96e09a3 100755 --- a/dev/mima +++ b/dev/mima @@ -25,8 +25,8 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)" cd "$FWDIR" SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"} -TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)" -OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)" +TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | grep jar | tail -n1)" +OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | grep jar | tail -n1)" rm -f .generated-mima* diff --git a/project/build.properties b/project/build.properties index 5ec1d700fd2a8..c92de941c10be 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.4.2 +sbt.version=1.4.4 From c891e025b8ed34392fbc81e988b75bdbdb268c11 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Mon, 23 Nov 2020 17:43:58 +0900 Subject: [PATCH 0549/1009] Revert "[SPARK-32481][CORE][SQL] Support truncate table to move data to trash" ### What changes were proposed in this pull request? This reverts commit 065f17386d1851d732b4c1badf1ce2e14d0de338, which is not part of any released version. That is, this is an unreleased feature ### Why are the changes needed? I like the concept of Trash, but I think this PR might just resolve a very specific issue by introducing a mechanism without a proper design doc. This could make the usage more complex. I think we need to consider the big picture. Trash directory is an important concept. If we decide to introduce it, we should consider all the code paths of Spark SQL that could delete the data, instead of Truncate only. We also need to consider what is the current behavior if the underlying file system does not provide the API `Trash.moveToAppropriateTrash`. Is the exception good? How about the performance when users are using the object store instead of HDFS? Will it impact the GDPR compliance? In sum, I think we should not merge the PR https://github.com/apache/spark/pull/29552 without the design doc and implementation plan. That is why I reverted it before the code freeze of Spark 3.1 ### Does this PR introduce _any_ user-facing change? Reverted the original commit ### How was this patch tested? The existing tests. Closes #30463 from gatorsmile/revertSpark-32481. Authored-by: Xiao Li Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/util/Utils.scala | 25 +----- .../apache/spark/sql/internal/SQLConf.scala | 14 ---- .../spark/sql/execution/command/tables.scala | 4 +- .../sql/execution/command/DDLSuite.scala | 78 ------------------- 4 files changed, 2 insertions(+), 119 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 6ccf65b737c1a..71a310a4279ad 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -50,7 +50,7 @@ import com.google.common.net.InetAddresses import org.apache.commons.codec.binary.Hex import org.apache.commons.lang3.SystemUtils import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, FileUtil, Path, Trash} +import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec} import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.conf.YarnConfiguration @@ -269,29 +269,6 @@ private[spark] object Utils extends Logging { file.setExecutable(true, true) } - /** - * Move data to trash if 'spark.sql.truncate.trash.enabled' is true, else - * delete the data permanently. If move data to trash failed fallback to hard deletion. - */ - def moveToTrashOrDelete( - fs: FileSystem, - partitionPath: Path, - isTrashEnabled: Boolean, - hadoopConf: Configuration): Boolean = { - if (isTrashEnabled) { - logDebug(s"Try to move data ${partitionPath.toString} to trash") - val isSuccess = Trash.moveToAppropriateTrash(fs, partitionPath, hadoopConf) - if (!isSuccess) { - logWarning(s"Failed to move data ${partitionPath.toString} to trash. " + - "Fallback to hard deletion") - return fs.delete(partitionPath, true) - } - isSuccess - } else { - fs.delete(partitionPath, true) - } - } - /** * Create a directory given the abstract pathname * @return true, if the directory is successfully created; otherwise, return false. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fcf222c8fdab0..ef974dc176e51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2913,18 +2913,6 @@ object SQLConf { .booleanConf .createWithDefault(false) - val TRUNCATE_TRASH_ENABLED = - buildConf("spark.sql.truncate.trash.enabled") - .doc("This configuration decides when truncating table, whether data files will be moved " + - "to trash directory or deleted permanently. The trash retention time is controlled by " + - "'fs.trash.interval', and in default, the server side configuration value takes " + - "precedence over the client-side one. Note that if 'fs.trash.interval' is non-positive, " + - "this will be a no-op and log a warning message. If the data fails to be moved to " + - "trash, Spark will turn to delete it permanently.") - .version("3.1.0") - .booleanConf - .createWithDefault(false) - val DISABLED_JDBC_CONN_PROVIDER_LIST = buildConf("spark.sql.sources.disabledJdbcConnProviderList") .internal() @@ -3577,8 +3565,6 @@ class SQLConf extends Serializable with Logging { def legacyPathOptionBehavior: Boolean = getConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR) - def truncateTrashEnabled: Boolean = getConf(SQLConf.TRUNCATE_TRASH_ENABLED) - def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST) /** ********************** SQLConf functionality methods ************ */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 206f952fed0ca..847052cd4fcde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -48,7 +48,6 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils -import org.apache.spark.util.Utils /** * A command to create a table with the same definition of the given existing table. @@ -490,7 +489,6 @@ case class TruncateTableCommand( } val hadoopConf = spark.sessionState.newHadoopConf() val ignorePermissionAcl = SQLConf.get.truncateTableIgnorePermissionAcl - val isTrashEnabled = SQLConf.get.truncateTrashEnabled locations.foreach { location => if (location.isDefined) { val path = new Path(location.get) @@ -515,7 +513,7 @@ case class TruncateTableCommand( } } - Utils.moveToTrashOrDelete(fs, path, isTrashEnabled, hadoopConf) + fs.delete(path, true) // We should keep original permission/acl of the path. // For owner/group, only super-user can set it, for example on HDFS. Because diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 9d0147048dbb8..43a33860d262e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -3104,84 +3104,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(spark.sessionState.catalog.isRegisteredFunction(rand)) } } - - test("SPARK-32481 Move data to trash on truncate table if enabled") { - val trashIntervalKey = "fs.trash.interval" - withTable("tab1") { - withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") { - sql("CREATE TABLE tab1 (col INT) USING parquet") - sql("INSERT INTO tab1 SELECT 1") - // scalastyle:off hadoopconfiguration - val hadoopConf = spark.sparkContext.hadoopConfiguration - // scalastyle:on hadoopconfiguration - val originalValue = hadoopConf.get(trashIntervalKey, "0") - val tablePath = new Path(spark.sessionState.catalog - .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get) - - val fs = tablePath.getFileSystem(hadoopConf) - val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current") - val trashPath = Path.mergePaths(trashCurrent, tablePath) - assume( - fs.mkdirs(trashPath) && fs.delete(trashPath, false), - "Trash directory could not be created, skipping.") - assert(!fs.exists(trashPath)) - try { - hadoopConf.set(trashIntervalKey, "5") - sql("TRUNCATE TABLE tab1") - } finally { - hadoopConf.set(trashIntervalKey, originalValue) - } - assert(fs.exists(trashPath)) - fs.delete(trashPath, true) - } - } - } - - test("SPARK-32481 delete data permanently on truncate table if trash interval is non-positive") { - val trashIntervalKey = "fs.trash.interval" - withTable("tab1") { - withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") { - sql("CREATE TABLE tab1 (col INT) USING parquet") - sql("INSERT INTO tab1 SELECT 1") - // scalastyle:off hadoopconfiguration - val hadoopConf = spark.sparkContext.hadoopConfiguration - // scalastyle:on hadoopconfiguration - val originalValue = hadoopConf.get(trashIntervalKey, "0") - val tablePath = new Path(spark.sessionState.catalog - .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get) - - val fs = tablePath.getFileSystem(hadoopConf) - val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current") - val trashPath = Path.mergePaths(trashCurrent, tablePath) - assert(!fs.exists(trashPath)) - try { - hadoopConf.set(trashIntervalKey, "0") - sql("TRUNCATE TABLE tab1") - } finally { - hadoopConf.set(trashIntervalKey, originalValue) - } - assert(!fs.exists(trashPath)) - } - } - } - - test("SPARK-32481 Do not move data to trash on truncate table if disabled") { - withTable("tab1") { - withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "false") { - sql("CREATE TABLE tab1 (col INT) USING parquet") - sql("INSERT INTO tab1 SELECT 1") - val hadoopConf = spark.sessionState.newHadoopConf() - val tablePath = new Path(spark.sessionState.catalog - .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get) - - val fs = tablePath.getFileSystem(hadoopConf) - val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current") - val trashPath = Path.mergePaths(trashCurrent, tablePath) - sql("TRUNCATE TABLE tab1") - assert(!fs.exists(trashPath)) - } - } - } } object FakeLocalFsFileSystem { From 60f3a730e4e67c3b67d6e45fb18f589ad66b07e6 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 23 Nov 2020 08:54:00 +0000 Subject: [PATCH 0550/1009] [SPARK-33515][SQL] Improve exception messages while handling UnresolvedTable ### What changes were proposed in this pull request? This PR proposes to improve the exception messages while `UnresolvedTable` is handled based on this suggestion: https://github.com/apache/spark/pull/30321#discussion_r521127001. Currently, when an identifier is resolved to a view when a table is expected, the following exception message is displayed (e.g., for `COMMENT ON TABLE`): ``` v is a temp view not table. ``` After this PR, the message will be: ``` v is a temp view. 'COMMENT ON TABLE' expects a table. ``` Also, if an identifier is not resolved, the following exception message is currently used: ``` Table not found: t ``` After this PR, the message will be: ``` Table not found for 'COMMENT ON TABLE': t ``` ### Why are the changes needed? To improve the exception message. ### Does this PR introduce _any_ user-facing change? Yes, the exception message will be changed as described above. ### How was this patch tested? Updated existing tests. Closes #30461 from imback82/unresolved_table_message. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/Analyzer.scala | 10 +++++----- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../catalyst/analysis/v2ResolutionPlans.scala | 4 +++- .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++---- .../sql/catalyst/parser/DDLParserSuite.scala | 18 +++++++++--------- .../sql/connector/DataSourceV2SQLSuite.scala | 3 ++- .../spark/sql/execution/SQLViewSuite.scala | 8 ++++---- .../sql/hive/execution/HiveDDLSuite.scala | 4 ++-- 8 files changed, 34 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 53c0ff687c6d2..837686420375a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -861,9 +861,9 @@ class Analyzer(override val catalogManager: CatalogManager) }.getOrElse(write) case _ => write } - case u @ UnresolvedTable(ident) => + case u @ UnresolvedTable(ident, cmd) => lookupTempView(ident).foreach { _ => - u.failAnalysis(s"${ident.quoted} is a temp view not table.") + u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table") } u case u @ UnresolvedTableOrView(ident, allowTempView) => @@ -950,7 +950,7 @@ class Analyzer(override val catalogManager: CatalogManager) SubqueryAlias(catalog.get.name +: ident.namespace :+ ident.name, relation) }.getOrElse(u) - case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) => + case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident), _) => CatalogV2Util.loadTable(catalog, ident) .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) @@ -1077,11 +1077,11 @@ class Analyzer(override val catalogManager: CatalogManager) lookupRelation(u.multipartIdentifier, u.options, u.isStreaming) .map(resolveViews).getOrElse(u) - case u @ UnresolvedTable(identifier) => + case u @ UnresolvedTable(identifier, cmd) => lookupTableOrView(identifier).map { case v: ResolvedView => val viewStr = if (v.isTemp) "temp view" else "view" - u.failAnalysis(s"${v.identifier.quoted} is a $viewStr not table.") + u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.'") case table => table }.getOrElse(u) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 452ba80b23441..9998035d65c3f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -98,7 +98,7 @@ trait CheckAnalysis extends PredicateHelper { u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}") case u: UnresolvedTable => - u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") + u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") case u: UnresolvedTableOrView => u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 98bd84fb94bd6..0e883a88f2691 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -37,7 +37,9 @@ case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNod * Holds the name of a table that has yet to be looked up in a catalog. It will be resolved to * [[ResolvedTable]] during analysis. */ -case class UnresolvedTable(multipartIdentifier: Seq[String]) extends LeafNode { +case class UnresolvedTable( + multipartIdentifier: Seq[String], + commandName: String) extends LeafNode { override lazy val resolved: Boolean = false override def output: Seq[Attribute] = Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 23de8ab09dd0a..ea4baafbacede 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3303,7 +3303,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) { LoadData( - child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), + child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "LOAD DATA"), path = string(ctx.path), isLocal = ctx.LOCAL != null, isOverwrite = ctx.OVERWRITE != null, @@ -3449,7 +3449,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg UnresolvedPartitionSpec(spec, location) } AlterTableAddPartition( - UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), + UnresolvedTable( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER TABLE ... ADD PARTITION ..."), specsAndLocs.toSeq, ctx.EXISTS != null) } @@ -3491,7 +3493,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec) .map(spec => UnresolvedPartitionSpec(spec)) AlterTableDropPartition( - UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)), + UnresolvedTable( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER TABLE ... DROP PARTITION ..."), partSpecs.toSeq, ifExists = ctx.EXISTS != null, purge = ctx.PURGE != null, @@ -3720,6 +3724,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case _ => string(ctx.STRING) } val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier) - CommentOnTable(UnresolvedTable(nameParts), comment) + CommentOnTable(UnresolvedTable(nameParts, "COMMENT ON TABLE"), comment) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index f93c0dcf59f4c..bd28484b23f46 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1555,15 +1555,15 @@ class DDLParserSuite extends AnalysisTest { test("LOAD DATA INTO table") { comparePlans( parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"), - LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", false, false, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", false, false, None)) comparePlans( parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"), - LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, false, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", true, false, None)) comparePlans( parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"), - LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, None)) + LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", true, true, None)) comparePlans( parsePlan( @@ -1572,7 +1572,7 @@ class DDLParserSuite extends AnalysisTest { |PARTITION(ds='2017-06-10') """.stripMargin), LoadData( - UnresolvedTable(Seq("a", "b", "c")), + UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", true, true, @@ -1674,13 +1674,13 @@ class DDLParserSuite extends AnalysisTest { val parsed2 = parsePlan(sql2) val expected1 = AlterTableAddPartition( - UnresolvedTable(Seq("a", "b", "c")), + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), Seq( UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")), UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)), ifNotExists = true) val expected2 = AlterTableAddPartition( - UnresolvedTable(Seq("a", "b", "c")), + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))), ifNotExists = false) @@ -1747,7 +1747,7 @@ class DDLParserSuite extends AnalysisTest { assertUnsupported(sql2_view) val expected1_table = AlterTableDropPartition( - UnresolvedTable(Seq("table_name")), + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."), Seq( UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), @@ -1763,7 +1763,7 @@ class DDLParserSuite extends AnalysisTest { val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')" val expected3_table = AlterTableDropPartition( - UnresolvedTable(Seq("a", "b", "c")), + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."), Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))), ifExists = true, purge = false, @@ -2174,7 +2174,7 @@ class DDLParserSuite extends AnalysisTest { comparePlans( parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"), - CommentOnTable(UnresolvedTable(Seq("a", "b", "c")), "xYz")) + CommentOnTable(UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON TABLE"), "xYz")) } // TODO: ignored by SPARK-31707, restore the test after create table syntax unification diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 90df4ee08bfc0..da53936239de8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2414,7 +2414,8 @@ class DataSourceV2SQLSuite withTempView("v") { sql("create global temp view v as select 1") val e = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL")) - assert(e.getMessage.contains("global_temp.v is a temp view not table.")) + assert(e.getMessage.contains( + "global_temp.v is a temp view. 'COMMENT ON TABLE' expects a table")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 792f920ee0217..504cc57dc12d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -147,10 +147,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { s"'$viewName' is a view not a table") assertAnalysisError( s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')", - s"$viewName is a temp view not table") + s"$viewName is a temp view. 'ALTER TABLE ... ADD PARTITION ...' expects a table") assertAnalysisError( s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')", - s"$viewName is a temp view not table") + s"$viewName is a temp view. 'ALTER TABLE ... DROP PARTITION ...' expects a table") // For the following v2 ALERT TABLE statements, unsupported operations are checked first // before resolving the relations. @@ -175,7 +175,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val e2 = intercept[AnalysisException] { sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") }.getMessage - assert(e2.contains(s"$viewName is a temp view not table")) + assert(e2.contains(s"$viewName is a temp view. 'LOAD DATA' expects a table")) assertNoSuchTable(s"TRUNCATE TABLE $viewName") val e3 = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") @@ -214,7 +214,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { e = intercept[AnalysisException] { sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") }.getMessage - assert(e.contains("default.testView is a view not table")) + assert(e.contains("default.testView is a view. 'LOAD DATA' expects a table")) e = intercept[AnalysisException] { sql(s"TRUNCATE TABLE $viewName") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 1f15bd685b239..56b871644453b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -904,10 +904,10 @@ class HiveDDLSuite assertAnalysisError( s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')", - s"$oldViewName is a view not table") + s"$oldViewName is a view. 'ALTER TABLE ... ADD PARTITION ...' expects a table.") assertAnalysisError( s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')", - s"$oldViewName is a view not table") + s"$oldViewName is a view. 'ALTER TABLE ... DROP PARTITION ...' expects a table.") assert(catalog.tableExists(TableIdentifier(tabName))) assert(catalog.tableExists(TableIdentifier(oldViewName))) From 23e9920b3910e4f05269853429c7f18888cdc7b5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 23 Nov 2020 09:00:41 +0000 Subject: [PATCH 0551/1009] [SPARK-33511][SQL] Respect case sensitivity while resolving V2 partition specs ### What changes were proposed in this pull request? 1. Pre-process partition specs in `ResolvePartitionSpec`, and convert partition names according to the partition schema and the SQL config `spark.sql.caseSensitive`. In the PR, I propose to invoke `normalizePartitionSpec` for that. The function is used in DSv1 commands, so, the behavior will be similar to DSv1. 2. Move `normalizePartitionSpec()` from `sql/core/.../datasources/PartitioningUtils` to `sql/catalyst/.../util/PartitioningUtils` to use it in Catalyst's rule `ResolvePartitionSpec` ### Why are the changes needed? DSv1 commands like `ALTER TABLE .. ADD PARTITION` and `ALTER TABLE .. DROP PARTITION` respect the SQL config `spark.sql.caseSensitive` while resolving partition specs. For example: ```sql spark-sql> CREATE TABLE tbl1 (id bigint, data string) USING parquet PARTITIONED BY (id); spark-sql> ALTER TABLE tbl1 ADD PARTITION (ID=1); spark-sql> SHOW PARTITIONS tbl1; id=1 ``` The same command fails on V2 Table catalog with error: ``` AnalysisException: Partition key ID not exists ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes, partition spec resolution works as for DSv1 (without the exception showed above). ### How was this patch tested? By running `AlterTablePartitionV2SQLSuite`. Closes #30454 from MaxGekk/partition-spec-case-sensitivity. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/ResolvePartitionSpec.scala | 27 +++++++---- .../spark/sql/util/PartitioningUtils.scala | 47 +++++++++++++++++++ .../command/AnalyzePartitionCommand.scala | 2 +- .../spark/sql/execution/command/ddl.scala | 3 +- .../spark/sql/execution/command/tables.scala | 3 +- .../datasources/PartitioningUtils.scala | 26 +--------- .../sql/execution/datasources/rules.scala | 3 +- .../AlterTablePartitionV2SQLSuite.scala | 26 ++++++++++ 8 files changed, 98 insertions(+), 39 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 5e19a32968992..531d40f431dee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, Alte import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec /** * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands. @@ -33,32 +34,38 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case r @ AlterTableAddPartition( ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) => - r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema())) + r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema())) case r @ AlterTableDropPartition( ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) => - r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema())) + r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema())) } private def resolvePartitionSpecs( - partSpecs: Seq[PartitionSpec], partSchema: StructType): Seq[ResolvedPartitionSpec] = + tableName: String, + partSpecs: Seq[PartitionSpec], + partSchema: StructType): Seq[ResolvedPartitionSpec] = partSpecs.map { case unresolvedPartSpec: UnresolvedPartitionSpec => ResolvedPartitionSpec( - convertToPartIdent(unresolvedPartSpec.spec, partSchema), unresolvedPartSpec.location) + convertToPartIdent(tableName, unresolvedPartSpec.spec, partSchema), + unresolvedPartSpec.location) case resolvedPartitionSpec: ResolvedPartitionSpec => resolvedPartitionSpec } private def convertToPartIdent( - partSpec: TablePartitionSpec, partSchema: StructType): InternalRow = { - val conflictKeys = partSpec.keys.toSeq.diff(partSchema.map(_.name)) - if (conflictKeys.nonEmpty) { - throw new AnalysisException(s"Partition key ${conflictKeys.mkString(",")} not exists") - } + tableName: String, + partitionSpec: TablePartitionSpec, + partSchema: StructType): InternalRow = { + val normalizedSpec = normalizePartitionSpec( + partitionSpec, + partSchema.map(_.name), + tableName, + conf.resolver) val partValues = partSchema.map { part => - val partValue = partSpec.get(part.name).orNull + val partValue = normalizedSpec.get(part.name).orNull if (partValue == null) { null } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala new file mode 100644 index 0000000000000..586aa6c59164f --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.util + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.Resolver + +object PartitioningUtils { + /** + * Normalize the column names in partition specification, w.r.t. the real partition column names + * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a + * partition column named `month`, and it's case insensitive, we will normalize `monTh` to + * `month`. + */ + def normalizePartitionSpec[T]( + partitionSpec: Map[String, T], + partColNames: Seq[String], + tblName: String, + resolver: Resolver): Map[String, T] = { + val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) => + val normalizedKey = partColNames.find(resolver(_, key)).getOrElse { + throw new AnalysisException(s"$key is not a valid partition column in table $tblName.") + } + normalizedKey -> value + } + + SchemaUtils.checkColumnNameDuplication( + normalizedPartSpec.map(_._1), "in the partition schema", resolver) + + normalizedPartSpec.toMap + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala index fc62dce5002b1..0b265bfb63e3e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, Unresol import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalogUtils} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{And, EqualTo, Literal} -import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.util.PartitioningUtils /** * Analyzes a given set of partitions to generate per-partition statistics, which will be used in diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index d550fe270c753..27ad62026c9b5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -39,11 +39,12 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog} import org.apache.spark.sql.connector.catalog.SupportsNamespaces._ -import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.PartitioningUtils import org.apache.spark.util.{SerializableConfiguration, ThreadUtils} // Note: The definition of these commands are based on the ones described in diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 847052cd4fcde..bd238948aab02 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap} -import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils} +import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat @@ -47,6 +47,7 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.PartitioningUtils import org.apache.spark.sql.util.SchemaUtils /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 796c23c7337d8..ea437d200eaab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -30,7 +30,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion} +import org.apache.spark.sql.catalyst.analysis.TypeCoercion import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal} import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter} @@ -357,30 +357,6 @@ object PartitioningUtils { getPathFragment(spec, StructType.fromAttributes(partitionColumns)) } - /** - * Normalize the column names in partition specification, w.r.t. the real partition column names - * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a - * partition column named `month`, and it's case insensitive, we will normalize `monTh` to - * `month`. - */ - def normalizePartitionSpec[T]( - partitionSpec: Map[String, T], - partColNames: Seq[String], - tblName: String, - resolver: Resolver): Map[String, T] = { - val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) => - val normalizedKey = partColNames.find(resolver(_, key)).getOrElse { - throw new AnalysisException(s"$key is not a valid partition column in table $tblName.") - } - normalizedKey -> value - } - - SchemaUtils.checkColumnNameDuplication( - normalizedPartSpec.map(_._1), "in the partition schema", resolver) - - normalizedPartSpec.toMap - } - /** * Resolves possible type conflicts between partitions by up-casting "lower" types using * [[findWiderTypeForPartitionColumn]]. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 3a2a642b870f8..9e65b0ce13693 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 import org.apache.spark.sql.sources.InsertableRelation import org.apache.spark.sql.types.{AtomicType, StructType} +import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec import org.apache.spark.sql.util.SchemaUtils /** @@ -386,7 +387,7 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] { partColNames: Seq[String], catalogTable: Option[CatalogTable]): InsertIntoStatement = { - val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec( + val normalizedPartSpec = normalizePartitionSpec( insert.partitionSpec, partColNames, tblName, conf.resolver) val staticPartCols = normalizedPartSpec.filter(_._2.isDefined).keySet diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 107d0ea47249d..e05c2c09ace2a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException} import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits +import org.apache.spark.sql.internal.SQLConf class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { @@ -159,4 +160,29 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty) } } + + test("case sensitivity in resolving partition specs") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val errMsg = intercept[AnalysisException] { + spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains(s"ID is not a valid partition column in table $t")) + } + + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + .asPartitionable + assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + spark.sql(s"ALTER TABLE $t DROP PARTITION (Id=1)") + assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) + } + } + } } From f83fcb12543049672a54ef5b582d58817e2ee5d3 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 23 Nov 2020 14:54:44 +0000 Subject: [PATCH 0552/1009] [SPARK-33278][SQL][FOLLOWUP] Improve OptimizeWindowFunctions to avoid transfer first to nth_value ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/30178 provided `OptimizeWindowFunctions` used to transfer `first` to `nth_value`. If the window frame is `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`, `nth_value` has better performance than `first`. But the `OptimizeWindowFunctions` need to exclude other window frame. ### Why are the changes needed? Improve `OptimizeWindowFunctions` to avoid transfer `first` to `nth_value` if the specified window frame isn't `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #30419 from beliefer/SPARK-33278_followup. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 9 +++-- .../OptimizeWindowFunctionsSuite.scala | 33 +++++++++++++++++-- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index c4b9936fa4c4f..9eee7c2b914a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -811,9 +811,12 @@ object CollapseRepartition extends Rule[LogicalPlan] { */ object OptimizeWindowFunctions extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { - case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _), spec) - if spec.orderSpec.nonEmpty && - spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame].frameType == RowFrame => + case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _), + WindowSpecDefinition(_, orderSpec, frameSpecification: SpecifiedWindowFrame)) + if orderSpec.nonEmpty && frameSpecification.frameType == RowFrame && + frameSpecification.lower == UnboundedPreceding && + (frameSpecification.upper == UnboundedFollowing || + frameSpecification.upper == CurrentRow) => we.copy(windowFunction = NthValue(first.child, Literal(1), first.ignoreNulls)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala index 389aaeafe655f..cf850bbe21ce6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala @@ -36,7 +36,7 @@ class OptimizeWindowFunctionsSuite extends PlanTest { val b = testRelation.output(1) val c = testRelation.output(2) - test("replace first(col) by nth_value(col, 1)") { + test("replace first by nth_value if frame is UNBOUNDED PRECEDING AND CURRENT ROW") { val inputPlan = testRelation.select( WindowExpression( First(a, false).toAggregateExpression(), @@ -52,7 +52,34 @@ class OptimizeWindowFunctionsSuite extends PlanTest { assert(optimized == correctAnswer) } - test("can't replace first(col) by nth_value(col, 1) if the window frame type is range") { + test("replace first by nth_value if frame is UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING") { + val inputPlan = testRelation.select( + WindowExpression( + First(a, false).toAggregateExpression(), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)))) + val correctAnswer = testRelation.select( + WindowExpression( + NthValue(a, Literal(1), false), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)))) + + val optimized = Optimize.execute(inputPlan) + assert(optimized == correctAnswer) + } + + test("can't replace first by nth_value if frame is not suitable") { + val inputPlan = testRelation.select( + WindowExpression( + First(a, false).toAggregateExpression(), + WindowSpecDefinition(b :: Nil, c.asc :: Nil, + SpecifiedWindowFrame(RowFrame, Literal(1), CurrentRow)))) + + val optimized = Optimize.execute(inputPlan) + assert(optimized == inputPlan) + } + + test("can't replace first by nth_value if the window frame type is range") { val inputPlan = testRelation.select( WindowExpression( First(a, false).toAggregateExpression(), @@ -63,7 +90,7 @@ class OptimizeWindowFunctionsSuite extends PlanTest { assert(optimized == inputPlan) } - test("can't replace first(col) by nth_value(col, 1) if the window frame isn't ordered") { + test("can't replace first by nth_value if the window frame isn't ordered") { val inputPlan = testRelation.select( WindowExpression( First(a, false).toAggregateExpression(), From 1bd897cbc4fe30eb8b7740c7232aae87081e8e33 Mon Sep 17 00:00:00 2001 From: Ye Zhou Date: Mon, 23 Nov 2020 15:16:20 -0600 Subject: [PATCH 0553/1009] [SPARK-32918][SHUFFLE] RPC implementation to support control plane coordination for push-based shuffle ### What changes were proposed in this pull request? This is one of the patches for SPIP SPARK-30602 which is needed for push-based shuffle. Summary of changes: This PR introduces a new RPC to be called within Driver. When the expected shuffle push wait time reaches, Driver will call this RPC to facilitate coordination of shuffle map/reduce stages and notify external shuffle services to finalize shuffle block merge for a given shuffle. Shuffle services also respond back the metadata about a merged shuffle partition back to the caller. ### Why are the changes needed? Refer to the SPIP in SPARK-30602. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This code snippets won't be called by any existing code and will be tested after the coordinated driver changes gets merged in SPARK-32920. Lead-authored-by: Min Shen mshenlinkedin.com Closes #30163 from zhouyejoe/SPARK-32918. Lead-authored-by: Ye Zhou Co-authored-by: Min Shen Signed-off-by: Mridul Muralidharan gmail.com> --- .../network/shuffle/BlockStoreClient.java | 22 ++++++++++ .../shuffle/ExternalBlockStoreClient.java | 29 +++++++++++++ .../shuffle/MergeFinalizerListener.java | 43 +++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java index 37befcd4b67fa..a6bdc13e93234 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java @@ -147,6 +147,8 @@ public void onFailure(Throwable t) { * @param blockIds block ids to be pushed * @param buffers buffers to be pushed * @param listener the listener to receive block push status. + * + * @since 3.1.0 */ public void pushBlocks( String host, @@ -156,4 +158,24 @@ public void pushBlocks( BlockFetchingListener listener) { throw new UnsupportedOperationException(); } + + /** + * Invoked by Spark driver to notify external shuffle services to finalize the shuffle merge + * for a given shuffle. This allows the driver to start the shuffle reducer stage after properly + * finishing the shuffle merge process associated with the shuffle mapper stage. + * + * @param host host of shuffle server + * @param port port of shuffle server. + * @param shuffleId shuffle ID of the shuffle to be finalized + * @param listener the listener to receive MergeStatuses + * + * @since 3.1.0 + */ + public void finalizeShuffleMerge( + String host, + int port, + int shuffleId, + MergeFinalizerListener listener) { + throw new UnsupportedOperationException(); + } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java index eca35ed290467..56c06e640acda 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java @@ -158,6 +158,35 @@ public void pushBlocks( } } + @Override + public void finalizeShuffleMerge( + String host, + int port, + int shuffleId, + MergeFinalizerListener listener) { + checkInit(); + try { + TransportClient client = clientFactory.createClient(host, port); + ByteBuffer finalizeShuffleMerge = new FinalizeShuffleMerge(appId, shuffleId).toByteBuffer(); + client.sendRpc(finalizeShuffleMerge, new RpcResponseCallback() { + @Override + public void onSuccess(ByteBuffer response) { + listener.onShuffleMergeSuccess( + (MergeStatuses) BlockTransferMessage.Decoder.fromByteBuffer(response)); + } + + @Override + public void onFailure(Throwable e) { + listener.onShuffleMergeFailure(e); + } + }); + } catch (Exception e) { + logger.error("Exception while sending finalizeShuffleMerge request to {}:{}", + host, port, e); + listener.onShuffleMergeFailure(e); + } + } + @Override public MetricSet shuffleMetrics() { checkInit(); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java new file mode 100644 index 0000000000000..08e13eea9f40d --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle; + +import java.util.EventListener; + +import org.apache.spark.network.shuffle.protocol.MergeStatuses; + +/** + * :: DeveloperApi :: + * + * Listener providing a callback function to invoke when driver receives the response for the + * finalize shuffle merge request sent to remote shuffle service. + * + * @since 3.1.0 + */ +public interface MergeFinalizerListener extends EventListener { + /** + * Called once upon successful response on finalize shuffle merge on a remote shuffle service. + * The returned {@link MergeStatuses} is passed to the listener for further processing + */ + void onShuffleMergeSuccess(MergeStatuses statuses); + + /** + * Called once upon failure response on finalize shuffle merge on a remote shuffle service. + */ + void onShuffleMergeFailure(Throwable e); +} From 05921814e2349e1acecb14a365e6d47ffb0d68e8 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 24 Nov 2020 09:27:44 +0900 Subject: [PATCH 0554/1009] [SPARK-33479][DOC][FOLLOWUP] DocSearch: Support filtering search results by version ### What changes were proposed in this pull request? In the discussion https://github.com/apache/spark/pull/30292#issuecomment-725613417, we planned to apply a new API key for each Spark release. However, it turns that DocSearch supports crawling multiple URLs from one website and filtering by fact key: https://docsearch.algolia.com/docs/config-file/#using-regular-expressions Thanks to the help from shortcuts, our Spark doc supports multiple version now: https://github.com/algolia/docsearch-configs/pull/2868 This PR is to add the fact key in the search script and update the instruction in the comment. ### Why are the changes needed? To support filtering Spark documentation search results by the current document version. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test Closes #30469 from gengliangwang/apiKeyFollowUp. Authored-by: Gengliang Wang Signed-off-by: Takeshi Yamamuro --- docs/_config.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/_config.yml b/docs/_config.yml index cd341063a1f92..026b3dd804690 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -26,15 +26,20 @@ SCALA_VERSION: "2.12.10" MESOS_VERSION: 1.0.0 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark -# Before a new release, we should apply a new `apiKey` for the new Spark documentation -# on https://docsearch.algolia.com/. Otherwise, after release, the search results are always based -# on the latest documentation(https://spark.apache.org/docs/latest/) even when visiting the -# documentation of previous releases. +# Before a new release, we should: +# 1. update the `version` array for the new Spark documentation +# on https://github.com/algolia/docsearch-configs/blob/master/configs/apache_spark.json. +# 2. update the value of `facetFilters.version` in `algoliaOptions` on the new release branch. +# Otherwise, after release, the search results are always based on the latest documentation +# (https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases. DOCSEARCH_SCRIPT: | docsearch({ apiKey: 'b18ca3732c502995563043aa17bc6ecb', indexName: 'apache_spark', inputSelector: '#docsearch-input', enhancedSearchInput: true, + algoliaOptions: { + 'facetFilters': ["version:latest"] + }, debug: false // Set debug to true if you want to inspect the dropdown }); From 3ce4ab545bfc28db7df2c559726b887b0c8c33b7 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 23 Nov 2020 16:28:43 -0800 Subject: [PATCH 0555/1009] [SPARK-33513][BUILD] Upgrade to Scala 2.13.4 to improve exhaustivity ### What changes were proposed in this pull request? This PR aims the followings. 1. Upgrade from Scala 2.13.3 to 2.13.4 for Apache Spark 3.1 2. Fix exhaustivity issues in both Scala 2.12/2.13 (Scala 2.13.4 requires this for compilation.) 3. Enforce the improved exhaustive check by using the existing Scala 2.13 GitHub Action compilation job. ### Why are the changes needed? Scala 2.13.4 is a maintenance release for 2.13 line and improves JDK 15 support. - https://github.com/scala/scala/releases/tag/v2.13.4 Also, it improves exhaustivity check. - https://github.com/scala/scala/pull/9140 (Check exhaustivity of pattern matches with "if" guards and custom extractors) - https://github.com/scala/scala/pull/9147 (Check all bindings exhaustively, e.g. tuples components) ### Does this PR introduce _any_ user-facing change? Yep. Although it's a maintenance version change, it's a Scala version change. ### How was this patch tested? Pass the CIs and do the manual testing. - Scala 2.12 CI jobs(GitHub Action/Jenkins UT/Jenkins K8s IT) to check the validity of code change. - Scala 2.13 Compilation job to check the compilation Closes #30455 from dongjoon-hyun/SCALA_3.13. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/storage/StorageUtils.scala | 2 +- .../main/scala/org/apache/spark/util/JsonProtocol.scala | 8 ++++---- .../src/main/scala/org/apache/spark/ml/linalg/BLAS.scala | 2 ++ .../org/apache/spark/ml/feature/RFormulaParser.scala | 6 +++++- .../org/apache/spark/ml/feature/StandardScaler.scala | 2 ++ .../org/apache/spark/ml/linalg/JsonMatrixConverter.scala | 2 ++ .../org/apache/spark/ml/linalg/JsonVectorConverter.scala | 2 ++ .../main/scala/org/apache/spark/ml/linalg/VectorUDT.scala | 2 ++ .../spark/ml/optim/aggregator/HingeAggregator.scala | 3 +++ .../spark/ml/optim/aggregator/LogisticAggregator.scala | 3 +++ .../scala/org/apache/spark/ml/util/Instrumentation.scala | 2 ++ .../org/apache/spark/mllib/feature/StandardScaler.scala | 2 ++ .../main/scala/org/apache/spark/mllib/linalg/BLAS.scala | 2 ++ .../scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 ++ .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala | 4 ++++ .../apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 ++ pom.xml | 2 +- .../scheduler/cluster/mesos/MesosSchedulerUtils.scala | 2 +- .../mesos/MesosFineGrainedSchedulerBackendSuite.scala | 2 +- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 2 +- .../apache/spark/sql/catalyst/expressions/literals.scala | 4 +++- .../spark/sql/catalyst/expressions/objects/objects.scala | 2 +- .../apache/spark/sql/catalyst/json/JsonInferSchema.scala | 3 +++ .../sql/catalyst/optimizer/StarSchemaDetection.scala | 6 +++--- .../apache/spark/sql/catalyst/optimizer/expressions.scala | 1 + .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 ++ .../catalyst/plans/logical/basicLogicalOperators.scala | 2 +- .../apache/spark/sql/catalyst/util/GenericArrayData.scala | 2 +- .../spark/sql/catalyst/planning/ScanOperationSuite.scala | 5 +++++ .../sql/catalyst/util/ArrayDataIndexedSeqSuite.scala | 2 +- .../org/apache/spark/sql/execution/SparkSqlParser.scala | 6 +++--- .../spark/sql/execution/aggregate/BaseAggregateExec.scala | 2 +- .../spark/sql/execution/window/WindowExecBase.scala | 6 ++++++ .../scala/org/apache/spark/sql/hive/HiveInspectors.scala | 1 + .../spark/streaming/util/FileBasedWriteAheadLog.scala | 2 +- 35 files changed, 77 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala index 147731a0fb547..c607fb28b2f56 100644 --- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala @@ -169,7 +169,7 @@ private[spark] class StorageStatus( .getOrElse((0L, 0L)) case _ if !level.useOffHeap => (_nonRddStorageInfo.onHeapUsage, _nonRddStorageInfo.diskUsage) - case _ if level.useOffHeap => + case _ => (_nonRddStorageInfo.offHeapUsage, _nonRddStorageInfo.diskUsage) } val newMem = math.max(oldMem + changeInMem, 0L) diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala index 13f7cb453346f..103965e4860a3 100644 --- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala @@ -757,7 +757,7 @@ private[spark] object JsonProtocol { def taskResourceRequestMapFromJson(json: JValue): Map[String, TaskResourceRequest] = { val jsonFields = json.asInstanceOf[JObject].obj - jsonFields.map { case JField(k, v) => + jsonFields.collect { case JField(k, v) => val req = taskResourceRequestFromJson(v) (k, req) }.toMap @@ -765,7 +765,7 @@ private[spark] object JsonProtocol { def executorResourceRequestMapFromJson(json: JValue): Map[String, ExecutorResourceRequest] = { val jsonFields = json.asInstanceOf[JObject].obj - jsonFields.map { case JField(k, v) => + jsonFields.collect { case JField(k, v) => val req = executorResourceRequestFromJson(v) (k, req) }.toMap @@ -1229,7 +1229,7 @@ private[spark] object JsonProtocol { def resourcesMapFromJson(json: JValue): Map[String, ResourceInformation] = { val jsonFields = json.asInstanceOf[JObject].obj - jsonFields.map { case JField(k, v) => + jsonFields.collect { case JField(k, v) => val resourceInfo = ResourceInformation.parseJson(v) (k, resourceInfo) }.toMap @@ -1241,7 +1241,7 @@ private[spark] object JsonProtocol { def mapFromJson(json: JValue): Map[String, String] = { val jsonFields = json.asInstanceOf[JObject].obj - jsonFields.map { case JField(k, JString(v)) => (k, v) }.toMap + jsonFields.collect { case JField(k, JString(v)) => (k, v) }.toMap } def propertiesFromJson(json: JValue): Properties = { diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala index 368f177cda828..b6c1b011f004c 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala @@ -302,6 +302,8 @@ private[spark] object BLAS extends Serializable { j += 1 prevCol = col } + case _ => + throw new IllegalArgumentException(s"spr doesn't support vector type ${v.getClass}.") } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala index dbbfd8f329431..c5b28c95eb7c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala @@ -286,6 +286,7 @@ private[ml] object RFormulaParser extends RegexParsers { private val pow: Parser[Term] = term ~ "^" ~ "^[1-9]\\d*".r ^^ { case base ~ "^" ~ degree => power(base, degree.toInt) + case t => throw new IllegalArgumentException(s"Invalid term: $t") } | term private val interaction: Parser[Term] = pow * (":" ^^^ { interact _ }) @@ -298,7 +299,10 @@ private[ml] object RFormulaParser extends RegexParsers { private val expr = (sum | term) private val formula: Parser[ParsedRFormula] = - (label ~ "~" ~ expr) ^^ { case r ~ "~" ~ t => ParsedRFormula(r, t.asTerms.terms) } + (label ~ "~" ~ expr) ^^ { + case r ~ "~" ~ t => ParsedRFormula(r, t.asTerms.terms) + case t => throw new IllegalArgumentException(s"Invalid term: $t") + } def parse(value: String): ParsedRFormula = parseAll(formula, value) match { case Success(result, _) => result diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 7434b1adb2ff2..92dee46ad0055 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -314,6 +314,8 @@ object StandardScalerModel extends MLReadable[StandardScalerModel] { case SparseVector(size, indices, values) => val newValues = transformSparseWithScale(scale, indices, values.clone()) Vectors.sparse(size, indices, newValues) + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } case (false, false) => diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala index 0bee643412b3f..8f03a29eb991a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala @@ -74,6 +74,8 @@ private[ml] object JsonMatrixConverter { ("values" -> values.toSeq) ~ ("isTransposed" -> isTransposed) compact(render(jValue)) + case _ => + throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.") } } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala index 781e69f8d63db..1b949d75eeaa0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala @@ -57,6 +57,8 @@ private[ml] object JsonVectorConverter { case DenseVector(values) => val jValue = ("type" -> 1) ~ ("values" -> values.toSeq) compact(render(jValue)) + case _ => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala index 37f173bc20469..35bbaf5aa1ded 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala @@ -45,6 +45,8 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { row.setNullAt(2) row.update(3, UnsafeArrayData.fromPrimitiveArray(values)) row + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala index 3d72512563154..0fe1ed231aa83 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala @@ -200,6 +200,9 @@ private[ml] class BlockHingeAggregator( case sm: SparseMatrix if !fitIntercept => val gradSumVec = new DenseVector(gradientSumArray) BLAS.gemv(1.0, sm.transpose, vec, 1.0, gradSumVec) + + case m => + throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.") } if (fitIntercept) gradientSumArray(numFeatures) += vec.values.sum diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala index 2496c789f8da6..5a516940b9788 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala @@ -504,6 +504,9 @@ private[ml] class BlockLogisticAggregator( case sm: SparseMatrix if !fitIntercept => val gradSumVec = new DenseVector(gradientSumArray) BLAS.gemv(1.0, sm.transpose, vec, 1.0, gradSumVec) + + case m => + throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.") } if (fitIntercept) gradientSumArray(numFeatures) += vec.values.sum diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala index d4b39e11fd1d7..2215c2b071584 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala @@ -192,6 +192,8 @@ private[spark] object Instrumentation { case Failure(NonFatal(e)) => instr.logFailure(e) throw e + case Failure(e) => + throw e case Success(result) => instr.logSuccess() result diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 8f9d6d07a4c36..12a5a0f2b2189 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -167,6 +167,8 @@ class StandardScalerModel @Since("1.3.0") ( val newValues = NewStandardScalerModel .transformSparseWithScale(localScale, indices, values.clone()) Vectors.sparse(size, indices, newValues) + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } case _ => vector diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index da486010cfa9e..bd60364326e28 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -285,6 +285,8 @@ private[spark] object BLAS extends Serializable with Logging { j += 1 prevCol = col } + case _ => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 2fe415f14032f..9ed9dd0c88c9b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -289,6 +289,8 @@ class VectorUDT extends UserDefinedType[Vector] { row.setNullAt(2) row.update(3, UnsafeArrayData.fromPrimitiveArray(values)) row + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index ad79230c7513c..da5d1650694d6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -145,6 +145,8 @@ class IndexedRowMatrix @Since("1.0.0") ( .map { case (values, blockColumn) => ((blockRow.toInt, blockColumn), (rowInBlock.toInt, values.zipWithIndex)) } + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } }.groupByKey(GridPartitioner(numRowBlocks, numColBlocks, rows.getNumPartitions)).map { case ((blockRow, blockColumn), itr) => @@ -187,6 +189,8 @@ class IndexedRowMatrix @Since("1.0.0") ( Iterator.tabulate(indices.length)(i => MatrixEntry(rowIndex, indices(i), values(i))) case DenseVector(values) => Iterator.tabulate(values.length)(i => MatrixEntry(rowIndex, i, values(i))) + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } new CoordinateMatrix(entries, numRows(), numCols()) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 07b9d91c1f59b..c618b71ddc5a8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -748,6 +748,8 @@ class RowMatrix @Since("1.0.0") ( } buf }.flatten + case v => + throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.") } } }.reduceByKey(_ + _).map { case ((i, j), sim) => diff --git a/pom.xml b/pom.xml index 0ab5a8c5b3efa..e5b1f30edd3be 100644 --- a/pom.xml +++ b/pom.xml @@ -3264,7 +3264,7 @@ scala-2.13 - 2.13.3 + 2.13.4 2.13 diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index b5a360167679e..4620bdb005094 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -313,7 +313,6 @@ trait MesosSchedulerUtils extends Logging { // offer has the required attribute and subsumes the required values for that attribute case (name, requiredValues) => offerAttributes.get(name) match { - case None => false case Some(_) if requiredValues.isEmpty => true // empty value matches presence case Some(scalarValue: Value.Scalar) => // check if provided values is less than equal to the offered values @@ -332,6 +331,7 @@ trait MesosSchedulerUtils extends Logging { // check if the specified value is equal, if multiple values are specified // we succeed if any of them match. requiredValues.contains(textValue.getValue) + case _ => false } } } diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala index 67ecf3242f52d..6a6514569cf90 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala @@ -178,7 +178,7 @@ class MesosFineGrainedSchedulerBackendSuite val (execInfo, _) = backend.createExecutorInfo( Arrays.asList(backend.createResource("cpus", 4)), "mockExecutor") assert(execInfo.getContainer.getDocker.getImage.equals("spark/mock")) - assert(execInfo.getContainer.getDocker.getForcePullImage.equals(true)) + assert(execInfo.getContainer.getDocker.getForcePullImage) val portmaps = execInfo.getContainer.getDocker.getPortMappingsList assert(portmaps.get(0).getHostPort.equals(80)) assert(portmaps.get(0).getContainerPort.equals(8080)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 39d9eb5a36964..a363615d3afe0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -94,7 +94,7 @@ private[this] object JsonPathParser extends RegexParsers { case Success(result, _) => Some(result) - case NoSuccess(msg, next) => + case _ => None } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 1e69814673082..810cecff379d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -322,7 +322,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b) case (a: ArrayBasedMapData, b: ArrayBasedMapData) => a.keyArray == b.keyArray && a.valueArray == b.valueArray - case (a, b) => a != null && a.equals(b) + case (a: Double, b: Double) if a.isNaN && b.isNaN => true + case (a: Float, b: Float) if a.isNaN && b.isNaN => true + case (a, b) => a != null && a == b } case _ => false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 9701420e65870..9303df75af503 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -981,7 +981,7 @@ case class MapObjects private( (genValue: String) => s"$builder.add($genValue);", s"$builder;" ) - case None => + case _ => // array ( s""" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index de396a4c63458..a39f06628b9ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -190,6 +190,9 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { } case VALUE_TRUE | VALUE_FALSE => BooleanType + + case _ => + throw new SparkException("Malformed JSON") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala index b65fc7f7e2bde..bf3fced0ae0fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala @@ -197,9 +197,9 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper { } else { false } - case None => false + case _ => false } - case None => false + case _ => false } case _ => false } @@ -239,7 +239,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper { case Some(col) if t.outputSet.contains(col) => val stats = t.stats stats.attributeStats.nonEmpty && stats.attributeStats.contains(col) - case None => false + case _ => false } case _ => false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 55a45f4410b34..d1eb3b07d3d5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -685,6 +685,7 @@ object FoldablePropagation extends Rule[LogicalPlan] { case LeftOuter => newJoin.right.output case RightOuter => newJoin.left.output case FullOuter => newJoin.left.output ++ newJoin.right.output + case _ => Nil }) val newFoldableMap = AttributeMap(foldableMap.baseMap.values.filterNot { case (attr, _) => missDerivedAttrsSet.contains(attr) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ea4baafbacede..50580b8e335ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -967,6 +967,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None) case Some(c) if c.booleanExpression != null => (baseJoinType, Option(expression(c.booleanExpression))) + case Some(c) => + throw new ParseException(s"Unimplemented joinCriteria: $c", ctx) case None if join.NATURAL != null => if (baseJoinType == Cross) { throw new ParseException("NATURAL CROSS JOIN is not supported", ctx) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index f96e07863fa69..c7108ea8ac74b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -362,7 +362,7 @@ case class Join( left.constraints case RightOuter => right.constraints - case FullOuter => + case _ => ExpressionSet() } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala index 81f412c14304d..e46d730afb4a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala @@ -120,7 +120,7 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData { if (!o2.isInstanceOf[Double] || ! java.lang.Double.isNaN(o2.asInstanceOf[Double])) { return false } - case _ => if (!o1.equals(o2)) { + case _ => if (o1.getClass != o2.getClass || o1 != o2) { return false } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala index 7790f467a890b..1290f770349e7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala @@ -39,6 +39,7 @@ class ScanOperationSuite extends SparkFunSuite { assert(projects(0) === colB) assert(projects(1) === aliasR) assert(filters.size === 1) + case _ => assert(false) } } @@ -50,6 +51,7 @@ class ScanOperationSuite extends SparkFunSuite { assert(projects(0) === colA) assert(projects(1) === colB) assert(filters.size === 1) + case _ => assert(false) } } @@ -65,6 +67,7 @@ class ScanOperationSuite extends SparkFunSuite { assert(projects.size === 2) assert(projects(0) === colA) assert(projects(1) === aliasId) + case _ => assert(false) } } @@ -81,6 +84,7 @@ class ScanOperationSuite extends SparkFunSuite { assert(projects(0) === colA) assert(projects(1) === aliasR) assert(filters.size === 1) + case _ => assert(false) } } @@ -93,6 +97,7 @@ class ScanOperationSuite extends SparkFunSuite { assert(projects(0) === colA) assert(projects(1) === aliasR) assert(filters.size === 1) + case _ => assert(false) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala index 1e430351b5137..9c3aaea0f7772 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala @@ -45,7 +45,7 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite { if (e != null) { elementDt match { // For Nan, etc. - case FloatType | DoubleType => assert(seq(i).equals(e)) + case FloatType | DoubleType => assert(seq(i) == e) case _ => assert(seq(i) === e) } } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 85476bcd21e19..01522257c072d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -868,12 +868,12 @@ class SparkSqlAstBuilder extends AstBuilder { // assert if directory is local when LOCAL keyword is mentioned val scheme = Option(storage.locationUri.get.getScheme) scheme match { - case None => + case Some(pathScheme) if (!pathScheme.equals("file")) => + throw new ParseException("LOCAL is supported only with file: scheme", ctx) + case _ => // force scheme to be file rather than fs.default.name val loc = Some(UriBuilder.fromUri(CatalogUtils.stringToURI(path)).scheme("file").build()) storage = storage.copy(locationUri = loc) - case Some(pathScheme) if (!pathScheme.equals("file")) => - throw new ParseException("LOCAL is supported only with file: scheme", ctx) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala index efba51706cf98..c676609bc37e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala @@ -91,7 +91,7 @@ trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning override def requiredChildDistribution: List[Distribution] = { requiredChildDistributionExpressions match { case Some(exprs) if exprs.isEmpty => AllTuples :: Nil - case Some(exprs) if exprs.nonEmpty => ClusteredDistribution(exprs) :: Nil + case Some(exprs) => ClusteredDistribution(exprs) :: Nil case None => UnspecifiedDistribution :: Nil } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index c6b98d48d7dde..9832e5cd74ae7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -71,6 +71,9 @@ trait WindowExecBase extends UnaryExecNode { case (RowFrame, IntegerLiteral(offset)) => RowBoundOrdering(offset) + case (RowFrame, _) => + sys.error(s"Unhandled bound in windows expressions: $bound") + case (RangeFrame, CurrentRow) => val ordering = RowOrdering.create(orderSpec, child.output) RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection) @@ -249,6 +252,9 @@ trait WindowExecBase extends UnaryExecNode { createBoundOrdering(frameType, lower, timeZone), createBoundOrdering(frameType, upper, timeZone)) } + + case _ => + sys.error(s"Unsupported factory: $key") } // Keep track of the number of expressions. This is a side-effect in a map... diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 8ab6e28366753..9213173bbc9ba 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -1039,6 +1039,7 @@ private[hive] trait HiveInspectors { private def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match { case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale) + case dt => throw new AnalysisException(s"${dt.catalogString} is not supported.") } def toTypeInfo: TypeInfo = dt match { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala index 2e5000159bcb7..d1f9dfb791355 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala @@ -293,7 +293,7 @@ private[streaming] object FileBasedWriteAheadLog { val startTime = startTimeStr.toLong val stopTime = stopTimeStr.toLong Some(LogInfo(startTime, stopTime, file.toString)) - case None => + case None | Some(_) => None } }.sortBy { _.startTime } From 8380e00419281cd1b1fc5706d23d5231356a3379 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 23 Nov 2020 19:35:58 -0800 Subject: [PATCH 0556/1009] [SPARK-33524][SQL][TESTS] Change `InMemoryTable` not to use Tuple.hashCode for `BucketTransform` ### What changes were proposed in this pull request? This PR aims to change `InMemoryTable` not to use `Tuple.hashCode` for `BucketTransform`. ### Why are the changes needed? SPARK-32168 made `InMemoryTable` to handle `BucketTransform` as a hash of `Tuple` which is dependents on Scala versions. - https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala#L159 **Scala 2.12.10** ```scala $ bin/scala Welcome to Scala 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_272). Type in expressions for evaluation. Or try :help. scala> (1, 1).hashCode res0: Int = -2074071657 ``` **Scala 2.13.3** ```scala Welcome to Scala 2.13.3 (OpenJDK 64-Bit Server VM, Java 1.8.0_272). Type in expressions for evaluation. Or try :help. scala> (1, 1).hashCode val res0: Int = -1669302457 ``` ### Does this PR introduce _any_ user-facing change? Yes. This is a correctness issue. ### How was this patch tested? Pass the UT with both Scala 2.12/2.13. Closes #30477 from dongjoon-hyun/SPARK-33524. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/connector/InMemoryTable.scala | 4 +++- .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index c93053abc550a..ffff00b54f1b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -156,7 +156,9 @@ class InMemoryTable( throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case BucketTransform(numBuckets, ref) => - (extractor(ref.fieldNames, schema, row).hashCode() & Integer.MAX_VALUE) % numBuckets + val (value, dataType) = extractor(ref.fieldNames, schema, row) + val valueHashCode = if (value == null) 0 else value.hashCode + ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index da53936239de8..dc4abf3eb19cf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2511,7 +2511,7 @@ class DataSourceV2SQLSuite checkAnswer( spark.sql(s"SELECT id, data, _partition FROM $t1"), - Seq(Row(1, "a", "3/1"), Row(2, "b", "2/2"), Row(3, "c", "2/3"))) + Seq(Row(1, "a", "3/1"), Row(2, "b", "0/2"), Row(3, "c", "1/3"))) } } @@ -2524,7 +2524,7 @@ class DataSourceV2SQLSuite checkAnswer( spark.sql(s"SELECT index, data, _partition FROM $t1"), - Seq(Row(3, "c", "2/3"), Row(2, "b", "2/2"), Row(1, "a", "3/1"))) + Seq(Row(3, "c", "1/3"), Row(2, "b", "0/2"), Row(1, "a", "3/1"))) } } From f35e28fea5605de4b28630eb643a821ecd7c8523 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 24 Nov 2020 13:30:06 +0900 Subject: [PATCH 0557/1009] [SPARK-33523][SQL][TEST] Add predicate related benchmark to SubExprEliminationBenchmark ### What changes were proposed in this pull request? This patch adds predicate related benchmark to `SubExprEliminationBenchmark`. ### Why are the changes needed? We should have a benchmark for subexpression elimination of predicate. ### Does this PR introduce _any_ user-facing change? No, dev only. ### How was this patch tested? Run benchmark locally. Closes #30476 from viirya/SPARK-33523. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- ...ExprEliminationBenchmark-jdk11-results.txt | 22 +++- .../SubExprEliminationBenchmark-results.txt | 22 +++- .../SubExprEliminationBenchmark.scala | 106 ++++++++++-------- 3 files changed, 90 insertions(+), 60 deletions(-) diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt index 3d2b2e5c8edba..1eb7b534d2194 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt @@ -5,11 +5,21 @@ Benchmark for performance of subexpression elimination Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz -from_json as subExpr: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------- -subexpressionElimination off, codegen on 25932 26908 916 0.0 259320042.3 1.0X -subexpressionElimination off, codegen off 26085 26159 65 0.0 260848905.0 1.0X -subexpressionElimination on, codegen on 2860 2939 72 0.0 28603312.9 9.1X -subexpressionElimination on, codegen off 2517 2617 93 0.0 25165157.7 10.3X +from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +subExprElimination false, codegen: true 26447 27127 605 0.0 264467933.4 1.0X +subExprElimination false, codegen: false 25673 26035 546 0.0 256732419.1 1.0X +subExprElimination true, codegen: true 1384 1448 102 0.0 13842910.3 19.1X +subExprElimination true, codegen: false 1244 1347 123 0.0 12442389.3 21.3X + +Preparing data for benchmarking ... +OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 +Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz +from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +subexpressionElimination off, codegen on 34631 35449 833 0.0 346309884.0 1.0X +subexpressionElimination off, codegen on 34480 34851 353 0.0 344798490.4 1.0X +subexpressionElimination off, codegen on 16618 16811 291 0.0 166176642.6 2.1X +subexpressionElimination off, codegen on 34316 34667 310 0.0 343157094.7 1.0X diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt index ca2a9c6497500..801f519ca76a1 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt @@ -5,11 +5,21 @@ Benchmark for performance of subexpression elimination Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz -from_json as subExpr: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------- -subexpressionElimination off, codegen on 26503 27622 1937 0.0 265033362.4 1.0X -subexpressionElimination off, codegen off 24920 25376 430 0.0 249196978.2 1.1X -subexpressionElimination on, codegen on 2421 2466 39 0.0 24213606.1 10.9X -subexpressionElimination on, codegen off 2360 2435 87 0.0 23604320.7 11.2X +from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +subExprElimination false, codegen: true 22767 23240 424 0.0 227665316.7 1.0X +subExprElimination false, codegen: false 22869 23351 465 0.0 228693464.1 1.0X +subExprElimination true, codegen: true 1328 1340 10 0.0 13280056.2 17.1X +subExprElimination true, codegen: false 1248 1276 31 0.0 12476135.1 18.2X + +Preparing data for benchmarking ... +OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 +Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz +from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +subexpressionElimination off, codegen on 37691 38846 1004 0.0 376913767.9 1.0X +subexpressionElimination off, codegen on 37852 39124 1103 0.0 378517745.5 1.0X +subexpressionElimination off, codegen on 22900 23085 202 0.0 229000242.5 1.6X +subexpressionElimination off, codegen on 38298 38598 374 0.0 382978731.3 1.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala index 34b4a70d05a25..e26acbcb3cd21 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Or} import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf @@ -39,7 +41,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { import spark.implicits._ def withFromJson(rowsNum: Int, numIters: Int): Unit = { - val benchmark = new Benchmark("from_json as subExpr", rowsNum, output = output) + val benchmark = new Benchmark("from_json as subExpr in Project", rowsNum, output = output) withTempPath { path => prepareDataInfo(benchmark) @@ -50,57 +52,65 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { from_json('value, schema).getField(s"col$idx") } - // We only benchmark subexpression performance under codegen/non-codegen, so disabling - // json optimization. - benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ => - withSQLConf( - SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false", - SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", - SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", - SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { - val df = spark.read - .text(path.getAbsolutePath) - .select(cols: _*) - df.collect() + Seq( + ("false", "true", "CODEGEN_ONLY"), + ("false", "false", "NO_CODEGEN"), + ("true", "true", "CODEGEN_ONLY"), + ("true", "false", "NO_CODEGEN") + ).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) => + // We only benchmark subexpression performance under codegen/non-codegen, so disabling + // json optimization. + val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled" + benchmark.addCase(caseName, numIters) { _ => + withSQLConf( + SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled, + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled, + SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory, + SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { + val df = spark.read + .text(path.getAbsolutePath) + .select(cols: _*) + df.write.mode("overwrite").format("noop").save() + } } } - benchmark.addCase("subexpressionElimination off, codegen off", numIters) { _ => - withSQLConf( - SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false", - SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", - SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", - SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { - val df = spark.read - .text(path.getAbsolutePath) - .select(cols: _*) - df.collect() - } - } + benchmark.run() + } + } - benchmark.addCase("subexpressionElimination on, codegen on", numIters) { _ => - withSQLConf( - SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true", - SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", - SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", - SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { - val df = spark.read - .text(path.getAbsolutePath) - .select(cols: _*) - df.collect() - } - } + def withFilter(rowsNum: Int, numIters: Int): Unit = { + val benchmark = new Benchmark("from_json as subExpr in Filter", rowsNum, output = output) - benchmark.addCase("subexpressionElimination on, codegen off", numIters) { _ => - withSQLConf( - SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true", - SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", - SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", - SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { - val df = spark.read - .text(path.getAbsolutePath) - .select(cols: _*) - df.collect() + withTempPath { path => + prepareDataInfo(benchmark) + val numCols = 1000 + val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols) + + val predicate = (0 until numCols).map { idx => + (from_json('value, schema).getField(s"col$idx") >= Literal(100000)).expr + }.asInstanceOf[Seq[Expression]].reduce(Or) + + Seq( + ("false", "true", "CODEGEN_ONLY"), + ("false", "false", "NO_CODEGEN"), + ("true", "true", "CODEGEN_ONLY"), + ("true", "false", "NO_CODEGEN") + ).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) => + // We only benchmark subexpression performance under codegen/non-codegen, so disabling + // json optimization. + val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled" + benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ => + withSQLConf( + SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled, + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled, + SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory, + SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") { + val df = spark.read + .text(path.getAbsolutePath) + .where(Column(predicate)) + df.write.mode("overwrite").format("noop").save() + } } } @@ -108,11 +118,11 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { } } - override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val numIters = 3 runBenchmark("Benchmark for performance of subexpression elimination") { withFromJson(100, numIters) + withFilter(100, numIters) } } } From a6555ee59626bbc4ef860c4ff9fcefae0d45b45e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 24 Nov 2020 08:04:21 +0000 Subject: [PATCH 0558/1009] [SPARK-33521][SQL] Universal type conversion in resolving V2 partition specs ### What changes were proposed in this pull request? In the PR, I propose to changes the resolver of partition specs used in V2 `ALTER TABLE .. ADD/DROP PARTITION` (at the moment), and re-use `CAST` in conversion partition values to desired types according to the partition schema. ### Why are the changes needed? Currently, the resolver of V2 partition specs supports just a few types: https://github.com/apache/spark/blob/23e9920b3910e4f05269853429c7f18888cdc7b5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala#L72, and fails on other types like date/timestamp. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By running `AlterTablePartitionV2SQLSuite` Closes #30474 from MaxGekk/dsv2-partition-value-types. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/ResolvePartitionSpec.scala | 29 +--------- .../AlterTablePartitionV2SQLSuite.scala | 58 +++++++++++++++++++ 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 531d40f431dee..6d061fce06919 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement @@ -65,31 +65,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { conf.resolver) val partValues = partSchema.map { part => - val partValue = normalizedSpec.get(part.name).orNull - if (partValue == null) { - null - } else { - // TODO: Support other datatypes, such as DateType - part.dataType match { - case _: ByteType => - partValue.toByte - case _: ShortType => - partValue.toShort - case _: IntegerType => - partValue.toInt - case _: LongType => - partValue.toLong - case _: FloatType => - partValue.toFloat - case _: DoubleType => - partValue.toDouble - case _: StringType => - partValue - case _ => - throw new AnalysisException( - s"Type ${part.dataType.typeName} is not supported for partition.") - } - } + val raw = normalizedSpec.get(part.name).orNull + Cast(Literal.create(raw, StringType), part.dataType, Some(conf.sessionLocalTimeZone)).eval() } InternalRow.fromSeq(partValues) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index e05c2c09ace2a..4cacd5ec2b49e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -17,12 +17,16 @@ package org.apache.spark.sql.connector +import java.time.{LocalDate, LocalDateTime} + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException} +import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.unsafe.types.UTF8String class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { @@ -185,4 +189,58 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { } } } + + test("SPARK-33521: universal type conversions of partition values") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + sql(s""" + |CREATE TABLE $t ( + | part0 tinyint, + | part1 smallint, + | part2 int, + | part3 bigint, + | part4 float, + | part5 double, + | part6 string, + | part7 boolean, + | part8 date, + | part9 timestamp + |) USING foo + |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9) + |""".stripMargin) + val partTable = catalog("testpart").asTableCatalog + .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) + .asPartitionable + val expectedPartition = InternalRow.fromSeq(Seq[Any]( + -1, // tinyint + 0, // smallint + 1, // int + 2, // bigint + 3.14F, // float + 3.14D, // double + UTF8String.fromString("abc"), // string + true, // boolean + LocalDate.parse("2020-11-23").toEpochDay, + DateTimeUtils.instantToMicros( + LocalDateTime.parse("2020-11-23T22:13:10.123456").atZone(DateTimeTestUtils.LA).toInstant) + )) + assert(!partTable.partitionExists(expectedPartition)) + val partSpec = """ + | part0 = -1, + | part1 = 0, + | part2 = 1, + | part3 = 2, + | part4 = 3.14, + | part5 = 3.14, + | part6 = 'abc', + | part7 = true, + | part8 = '2020-11-23', + | part9 = '2020-11-23T22:13:10.123456' + |""".stripMargin + sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'") + assert(partTable.partitionExists(expectedPartition)) + sql(s" ALTER TABLE $t DROP PARTITION ($partSpec)") + assert(!partTable.partitionExists(expectedPartition)) + } + } } From fdd6c73b3cfac5af30c789c7f70b92367a79f7e7 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 24 Nov 2020 11:06:39 +0000 Subject: [PATCH 0559/1009] [SPARK-33514][SQL] Migrate TRUNCATE TABLE command to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `TRUNCATE TABLE` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `TRUNCATE TABLE` works only with v1 tables, and not supported for v2 tables. ### Why are the changes needed? The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior: ```scala sql("CREATE TEMPORARY VIEW t AS SELECT 1") sql("CREATE DATABASE db") sql("CREATE TABLE t using csv AS SELECT 1") sql("USE db") sql("TRUNCATE TABLE t") // Succeeds ``` With this PR, `TRUNCATE TABLE` above fails with the following: ``` org.apache.spark.sql.AnalysisException: t is a temp view not table.; line 1 pos 0 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$42(Analyzer.scala:866) ``` , which is expected since temporary view is resolved first and `TRUNCATE TABLE` doesn't support a temporary view. ### Does this PR introduce _any_ user-facing change? After this PR, `TRUNCATE TABLE` is resolved to a temp view `t` instead of table `db.t` in the above scenario. ### How was this patch tested? Updated existing tests. Closes #30457 from imback82/truncate_table. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/AstBuilder.scala | 6 +++--- .../sql/catalyst/plans/logical/v2Commands.scala | 9 +++++++++ .../spark/sql/catalyst/parser/DDLParserSuite.scala | 6 ++++-- .../catalyst/analysis/ResolveSessionCatalog.scala | 5 ++--- .../datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++-- .../apache/spark/sql/execution/SQLViewSuite.scala | 13 ++++++++----- .../spark/sql/execution/command/DDLSuite.scala | 10 +++++++--- 8 files changed, 38 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 50580b8e335ff..a4298abd211b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3356,7 +3356,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a [[TruncateTableStatement]] command. + * Create a [[TruncateTable]] command. * * For example: * {{{ @@ -3364,8 +3364,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) { - TruncateTableStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), + TruncateTable( + UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "TRUNCATE TABLE"), Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 5bda2b5b8db01..a65b9fc59bd55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -670,3 +670,12 @@ case class LoadData( case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the TRUNCATE TABLE command. + */ +case class TruncateTable( + child: LogicalPlan, + partitionSpec: Option[TablePartitionSpec]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index bd28484b23f46..997c642276bfb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1621,11 +1621,13 @@ class DDLParserSuite extends AnalysisTest { test("TRUNCATE table") { comparePlans( parsePlan("TRUNCATE TABLE a.b.c"), - TruncateTableStatement(Seq("a", "b", "c"), None)) + TruncateTable(UnresolvedTable(Seq("a", "b", "c"), "TRUNCATE TABLE"), None)) comparePlans( parsePlan("TRUNCATE TABLE a.b.c PARTITION(ds='2017-06-10')"), - TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))) + TruncateTable( + UnresolvedTable(Seq("a", "b", "c"), "TRUNCATE TABLE"), + Some(Map("ds" -> "2017-06-10")))) } test("REFRESH TABLE") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 303ae47f06b84..726099991a897 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -456,10 +456,9 @@ class ResolveSessionCatalog( val name = parseTempViewOrV1Table(tbl, "UNCACHE TABLE") UncacheTableCommand(name.asTableIdentifier, ifExists) - case TruncateTableStatement(tbl, partitionSpec) => - val v1TableName = parseV1Table(tbl, "TRUNCATE TABLE") + case TruncateTable(ResolvedV1TableIdentifier(ident), partitionSpec) => TruncateTableCommand( - v1TableName.asTableIdentifier, + ident.asTableIdentifier, partitionSpec) case ShowPartitionsStatement(tbl, partitionSpec) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index e5c29312b80e7..30d976524bfa8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -302,6 +302,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowCreateTable(_: ResolvedTable, _) => throw new AnalysisException("SHOW CREATE TABLE is not supported for v2 tables.") + case TruncateTable(_: ResolvedTable, _) => + throw new AnalysisException("TRUNCATE TABLE is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index dc4abf3eb19cf..9a3fa0c5bd3f4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1986,8 +1986,8 @@ class DataSourceV2SQLSuite |PARTITIONED BY (id) """.stripMargin) - testV1Command("TRUNCATE TABLE", t) - testV1Command("TRUNCATE TABLE", s"$t PARTITION(id='1')") + testNotSupportedV2Command("TRUNCATE TABLE", t) + testNotSupportedV2Command("TRUNCATE TABLE", s"$t PARTITION(id='1')") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 504cc57dc12d3..edeebde7db726 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -176,15 +176,18 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""") }.getMessage assert(e2.contains(s"$viewName is a temp view. 'LOAD DATA' expects a table")) - assertNoSuchTable(s"TRUNCATE TABLE $viewName") val e3 = intercept[AnalysisException] { - sql(s"SHOW CREATE TABLE $viewName") + sql(s"TRUNCATE TABLE $viewName") }.getMessage - assert(e3.contains(s"$viewName is a temp view not table or permanent view")) + assert(e3.contains(s"$viewName is a temp view. 'TRUNCATE TABLE' expects a table")) val e4 = intercept[AnalysisException] { - sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") + sql(s"SHOW CREATE TABLE $viewName") }.getMessage assert(e4.contains(s"$viewName is a temp view not table or permanent view")) + val e5 = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") + }.getMessage + assert(e5.contains(s"$viewName is a temp view not table or permanent view")) assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") } } @@ -219,7 +222,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { e = intercept[AnalysisException] { sql(s"TRUNCATE TABLE $viewName") }.getMessage - assert(e.contains(s"Operation not allowed: TRUNCATE TABLE on views: `default`.`testview`")) + assert(e.contains("default.testView is a view. 'TRUNCATE TABLE' expects a table")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 43a33860d262e..07201f9f85b5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -2169,11 +2169,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { (1 to 10).map { i => (i, i) }.toDF("a", "b").createTempView("my_temp_tab") sql(s"CREATE TABLE my_ext_tab using parquet LOCATION '${tempDir.toURI}'") sql(s"CREATE VIEW my_view AS SELECT 1") - intercept[NoSuchTableException] { + val e1 = intercept[AnalysisException] { sql("TRUNCATE TABLE my_temp_tab") - } + }.getMessage + assert(e1.contains("my_temp_tab is a temp view. 'TRUNCATE TABLE' expects a table")) assertUnsupported("TRUNCATE TABLE my_ext_tab") - assertUnsupported("TRUNCATE TABLE my_view") + val e2 = intercept[AnalysisException] { + sql("TRUNCATE TABLE my_view") + }.getMessage + assert(e2.contains("default.my_view is a view. 'TRUNCATE TABLE' expects a table")) } } } From 048a9821c788b6796d52d1e2a0cd174377ebd0f0 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 24 Nov 2020 09:50:10 -0800 Subject: [PATCH 0560/1009] [SPARK-33535][INFRA][TESTS] Export LANG to en_US.UTF-8 in run-tests-jenkins script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? It seems that Jenkins tests tasks in many pr have test failed. The failed cases include: - `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V1 get binary type` - `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V2 get binary type` - `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V3 get binary type` - `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V4 get binary type` - `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V5 get binary type` The error message as follows: ``` Error Messageorg.scalatest.exceptions.TestFailedException: "[?](" did not equal "[�]("Stacktracesbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: "[?](" did not equal "[�](" at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231) at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295) at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$26(SparkThriftServerProtocolVersionsSuite.scala:302) ``` But they can pass the GitHub Action, maybe it's related to the `LANG` of the Jenkins build machine, this pr add `export LANG="en_US.UTF-8"` in `run-test-jenkins` script. ### Why are the changes needed? Ensure LANG in Jenkins test process is `en_US.UTF-8` to pass `HIVE_CLI_SERVICE_PROTOCOL_VX` related tests ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Jenkins tests pass Closes #30487 from LuciferYang/SPARK-33535. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- dev/run-tests-jenkins | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index c3adc696a5122..c155d4ea3f076 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -26,6 +26,7 @@ FWDIR="$( cd "$( dirname "$0" )/.." && pwd )" cd "$FWDIR" export PATH=/home/anaconda/envs/py36/bin:$PATH +export LANG="en_US.UTF-8" PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then From 95b6dabc33515f1975eb889480ccca12bf5ac3c8 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Wed, 25 Nov 2020 07:38:45 +0900 Subject: [PATCH 0561/1009] [SPARK-33287][SS][UI] Expose state custom metrics information on SS UI ### What changes were proposed in this pull request? Structured Streaming UI is not containing state custom metrics information. In this PR I've added it. ### Why are the changes needed? Missing state custom metrics information. ### Does this PR introduce _any_ user-facing change? Additional UI elements appear. ### How was this patch tested? Existing unit tests + manual test. ``` #Compile Spark echo "spark.sql.streaming.ui.enabledCustomMetricList stateOnCurrentVersionSizeBytes" >> conf/spark-defaults.conf sbin/start-master.sh sbin/start-worker.sh spark://gsomogyi-MBP16:7077 ./bin/spark-submit --master spark://gsomogyi-MBP16:7077 --deploy-mode client --class com.spark.Main ../spark-test/target/spark-test-1.0-SNAPSHOT-jar-with-dependencies.jar ``` Screenshot 2020-11-18 at 12 45 36 Closes #30336 from gaborgsomogyi/SPARK-33287. Authored-by: Gabor Somogyi Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../spark/sql/internal/StaticSQLConf.scala | 12 ++ .../ui/StreamingQueryStatisticsPage.scala | 143 +++++++++++++----- .../ui/StreamingQueryPageSuite.scala | 5 + .../sql/streaming/ui/UISeleniumSuite.scala | 6 + 4 files changed, 127 insertions(+), 39 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala index ca1074fcf6fc0..02cb6f29622f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala @@ -249,4 +249,16 @@ object StaticSQLConf { .version("3.1.0") .timeConf(TimeUnit.SECONDS) .createWithDefault(-1) + + val ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST = + buildStaticConf("spark.sql.streaming.ui.enabledCustomMetricList") + .internal() + .doc("Configures a list of custom metrics on Structured Streaming UI, which are enabled. " + + "The list contains the name of the custom metrics separated by comma. In aggregation" + + "only sum used. The list of supported custom metrics is state store provider specific " + + "and it can be found out for example from query progress log entry.") + .version("3.1.0") + .stringConf + .toSequence + .createWithDefault(Nil) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index f48672afb41f3..77b1e61d587a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -19,18 +19,32 @@ package org.apache.spark.sql.streaming.ui import java.{util => ju} import java.lang.{Long => JLong} -import java.util.UUID +import java.util.{Locale, UUID} import javax.servlet.http.HttpServletRequest +import scala.collection.JavaConverters._ import scala.xml.{Node, NodeBuffer, Unparsed} import org.apache.spark.internal.Logging +import org.apache.spark.sql.execution.streaming.state.StateStoreProvider +import org.apache.spark.sql.internal.SQLConf.STATE_STORE_PROVIDER_CLASS +import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST import org.apache.spark.sql.streaming.ui.UIUtils._ import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage} private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) extends WebUIPage("statistics") with Logging { + // State store provider implementation mustn't do any heavyweight initialiation in constructor + // but in its init method. + private val supportedCustomMetrics = StateStoreProvider.create( + parent.parent.conf.get(STATE_STORE_PROVIDER_CLASS)).supportedCustomMetrics + logDebug(s"Supported custom metrics: $supportedCustomMetrics") + + private val enabledCustomMetrics = + parent.parent.conf.get(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST).map(_.toLowerCase(Locale.ROOT)) + logDebug(s"Enabled custom metrics: $enabledCustomMetrics") + def generateLoadResources(request: HttpServletRequest): Seq[Node] = { // scalastyle:off @@ -199,49 +213,100 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) "records") graphUIDataForNumRowsDroppedByWatermark.generateDataJs(jsCollector) - // scalastyle:off - - -
    -
    Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}
    -
    - - {graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)} - {graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)} - - - -
    -
    Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}
    -
    - - {graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)} - {graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)} - - - -
    -
    Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}
    -
    - - {graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)} - {graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)} - - - -
    -
    Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}
    -
    - - {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} - {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} - - // scalastyle:on + val result = + // scalastyle:off + + +
    +
    Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}
    +
    + + {graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)} + {graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}
    +
    + + {graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)} + {graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}
    +
    + + {graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)} + {graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)} + + + +
    +
    Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}
    +
    + + {graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)} + {graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)} + + // scalastyle:on + + if (enabledCustomMetrics.nonEmpty) { + result ++= generateAggregatedCustomMetrics(query, minBatchTime, maxBatchTime, jsCollector) + } + result } else { new NodeBuffer() } } + def generateAggregatedCustomMetrics( + query: StreamingQueryUIData, + minBatchTime: Long, + maxBatchTime: Long, + jsCollector: JsCollector): NodeBuffer = { + val result: NodeBuffer = new NodeBuffer + + // This is made sure on caller side but put it here to be defensive + require(query.lastProgress.stateOperators.nonEmpty) + query.lastProgress.stateOperators.head.customMetrics.keySet().asScala + .filter(m => enabledCustomMetrics.contains(m.toLowerCase(Locale.ROOT))).map { metricName => + val data = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), + p.stateOperators.map(_.customMetrics.get(metricName).toDouble).sum)) + val max = data.maxBy(_._2)._2 + val metric = supportedCustomMetrics.find(_.name.equalsIgnoreCase(metricName)).get + + val graphUIData = + new GraphUIData( + s"aggregated-$metricName-timeline", + s"aggregated-$metricName-histogram", + data, + minBatchTime, + maxBatchTime, + 0, + max, + "") + graphUIData.generateDataJs(jsCollector) + + result ++= + // scalastyle:off + + +
    +
    Aggregated Custom Metric {s"$metricName"} {SparkUIUtils.tooltip(metric.desc, "right")}
    +
    + + {graphUIData.generateTimelineHtml(jsCollector)} + {graphUIData.generateHistogramHtml(jsCollector)} + + // scalastyle:on + } + + result + } + def generateStatTable(query: StreamingQueryUIData): Seq[Node] = { val batchToTimestamps = withNoProgress(query, query.recentProgress.map(p => (p.batchId, parseProgressTimestamp(p.timestamp))), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala index 640c21c52a146..c2b6688faf0e7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala @@ -24,8 +24,10 @@ import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.scalatest.BeforeAndAfter import scala.xml.Node +import org.apache.spark.SparkConf import org.apache.spark.sql.streaming.StreamingQueryProgress import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.ui.SparkUI class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { @@ -65,10 +67,13 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { val request = mock(classOf[HttpServletRequest]) val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS) val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS) + val ui = mock(classOf[SparkUI]) when(request.getParameter("id")).thenReturn(id.toString) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(tab.statusListener).thenReturn(statusListener) + when(ui.conf).thenReturn(new SparkConf()) + when(tab.parent).thenReturn(ui) val streamQuery = createStreamQueryUIData(id) when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala index 307479db33949..94844c4e87a84 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_PORT} import org.apache.spark.sql.LocalSparkSession.withSparkSession import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST import org.apache.spark.sql.streaming.StreamingQueryException import org.apache.spark.ui.SparkUICssErrorHandler @@ -53,6 +54,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B .setAppName("ui-test") .set(UI_ENABLED, true) .set(UI_PORT, 0) + .set(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST, Seq("stateOnCurrentVersionSizeBytes")) additionalConfs.foreach { case (k, v) => conf.set(k, v) } val spark = SparkSession.builder().master(master).config(conf).getOrCreate() assert(spark.sparkContext.ui.isDefined) @@ -140,6 +142,10 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B summaryText should contain ("Aggregated Number Of Updated State Rows (?)") summaryText should contain ("Aggregated State Memory Used In Bytes (?)") summaryText should contain ("Aggregated Number Of Rows Dropped By Watermark (?)") + summaryText should contain ("Aggregated Custom Metric stateOnCurrentVersionSizeBytes" + + " (?)") + summaryText should not contain ("Aggregated Custom Metric loadedMapCacheHitCount (?)") + summaryText should not contain ("Aggregated Custom Metric loadedMapCacheMissCount (?)") } } finally { spark.streams.active.foreach(_.stop()) From 665817bd4fc07b18cee0f8c6ff759288472514c2 Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 25 Nov 2020 09:27:04 +0900 Subject: [PATCH 0562/1009] [SPARK-33457][PYTHON] Adjust mypy configuration ### What changes were proposed in this pull request? This pull request: - Adds following flags to the main mypy configuration: - [`strict_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-strict_optional) - [`no_implicit_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-no_implicit_optional) - [`disallow_untyped_defs`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-disallow_untyped_calls) These flags are enabled only for public API and disabled for tests and internal modules. Additionally, these PR fixes missing annotations. ### Why are the changes needed? Primary reason to propose this changes is to use standard configuration as used by typeshed project. This will allow us to be more strict, especially when interacting with JVM code. See for example https://github.com/apache/spark/pull/29122#pullrequestreview-513112882 Additionally, it will allow us to detect cases where annotations have unintentionally omitted. ### Does this PR introduce _any_ user-facing change? Annotations only. ### How was this patch tested? `dev/lint-python`. Closes #30382 from zero323/SPARK-33457. Authored-by: zero323 Signed-off-by: HyukjinKwon --- python/mypy.ini | 87 +++++++++++++++++++++++ python/pyspark/broadcast.pyi | 10 +-- python/pyspark/context.pyi | 25 +++++-- python/pyspark/ml/classification.pyi | 6 +- python/pyspark/ml/common.pyi | 10 ++- python/pyspark/ml/evaluation.pyi | 24 ++++--- python/pyspark/ml/feature.pyi | 20 ++++-- python/pyspark/ml/linalg/__init__.pyi | 36 +++++----- python/pyspark/ml/pipeline.pyi | 4 +- python/pyspark/ml/regression.pyi | 10 +-- python/pyspark/mllib/classification.pyi | 2 +- python/pyspark/mllib/clustering.pyi | 6 +- python/pyspark/mllib/common.pyi | 20 ++++-- python/pyspark/mllib/linalg/__init__.pyi | 45 +++++++----- python/pyspark/mllib/random.pyi | 2 +- python/pyspark/mllib/recommendation.pyi | 4 +- python/pyspark/mllib/stat/_statistics.pyi | 2 +- python/pyspark/rdd.pyi | 8 ++- python/pyspark/resource/profile.pyi | 2 +- python/pyspark/sql/column.pyi | 8 ++- python/pyspark/sql/context.pyi | 6 +- python/pyspark/sql/functions.pyi | 8 ++- python/pyspark/sql/session.pyi | 10 ++- python/pyspark/sql/types.pyi | 15 ++-- python/pyspark/sql/udf.pyi | 7 +- python/pyspark/streaming/context.pyi | 2 +- python/pyspark/streaming/dstream.pyi | 10 ++- python/pyspark/streaming/kinesis.pyi | 2 +- 28 files changed, 277 insertions(+), 114 deletions(-) diff --git a/python/mypy.ini b/python/mypy.ini index 4a5368a519097..5103452a053be 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -16,10 +16,97 @@ ; [mypy] +strict_optional = True +no_implicit_optional = True +disallow_untyped_defs = True + +; Allow untyped def in internal modules and tests + +[mypy-pyspark.daemon] +disallow_untyped_defs = False + +[mypy-pyspark.find_spark_home] +disallow_untyped_defs = False + +[mypy-pyspark._globals] +disallow_untyped_defs = False + +[mypy-pyspark.install] +disallow_untyped_defs = False + +[mypy-pyspark.java_gateway] +disallow_untyped_defs = False + +[mypy-pyspark.join] +disallow_untyped_defs = False + +[mypy-pyspark.ml.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.mllib.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.rddsampler] +disallow_untyped_defs = False + +[mypy-pyspark.resource.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.serializers] +disallow_untyped_defs = False + +[mypy-pyspark.shuffle] +disallow_untyped_defs = False + +[mypy-pyspark.streaming.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.streaming.util] +disallow_untyped_defs = False + +[mypy-pyspark.sql.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.sql.pandas.serializers] +disallow_untyped_defs = False + +[mypy-pyspark.sql.pandas.types] +disallow_untyped_defs = False + +[mypy-pyspark.sql.pandas.typehints] +disallow_untyped_defs = False + +[mypy-pyspark.sql.pandas.utils] +disallow_untyped_defs = False + +[mypy-pyspark.sql.pandas._typing.protocols.*] +disallow_untyped_defs = False + +[mypy-pyspark.sql.utils] +disallow_untyped_defs = False + +[mypy-pyspark.tests.*] +disallow_untyped_defs = False + +[mypy-pyspark.testing.*] +disallow_untyped_defs = False + +[mypy-pyspark.traceback_utils] +disallow_untyped_defs = False + +[mypy-pyspark.util] +disallow_untyped_defs = False + +[mypy-pyspark.worker] +disallow_untyped_defs = False + +; Ignore errors in embedded third party code [mypy-pyspark.cloudpickle.*] ignore_errors = True +; Ignore missing imports for external untyped packages + [mypy-py4j.*] ignore_missing_imports = True diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi index 4b019a509a003..944cb06d4178c 100644 --- a/python/pyspark/broadcast.pyi +++ b/python/pyspark/broadcast.pyi @@ -17,7 +17,7 @@ # under the License. import threading -from typing import Any, Dict, Generic, Optional, TypeVar +from typing import Any, Callable, Dict, Generic, Optional, Tuple, TypeVar T = TypeVar("T") @@ -32,14 +32,14 @@ class Broadcast(Generic[T]): path: Optional[Any] = ..., sock_file: Optional[Any] = ..., ) -> None: ... - def dump(self, value: Any, f: Any) -> None: ... - def load_from_path(self, path: Any): ... - def load(self, file: Any): ... + def dump(self, value: T, f: Any) -> None: ... + def load_from_path(self, path: Any) -> T: ... + def load(self, file: Any) -> T: ... @property def value(self) -> T: ... def unpersist(self, blocking: bool = ...) -> None: ... def destroy(self, blocking: bool = ...) -> None: ... - def __reduce__(self): ... + def __reduce__(self) -> Tuple[Callable[[int], T], Tuple[int]]: ... class BroadcastPickleRegistry(threading.local): def __init__(self) -> None: ... diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi index 2789a38b3be9f..640a69cad08ab 100644 --- a/python/pyspark/context.pyi +++ b/python/pyspark/context.pyi @@ -16,7 +16,19 @@ # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + NoReturn, + Optional, + Tuple, + Type, + TypeVar, +) +from types import TracebackType from py4j.java_gateway import JavaGateway, JavaObject # type: ignore[import] @@ -51,9 +63,14 @@ class SparkContext: jsc: Optional[JavaObject] = ..., profiler_cls: type = ..., ) -> None: ... - def __getnewargs__(self): ... - def __enter__(self): ... - def __exit__(self, type, value, trace): ... + def __getnewargs__(self) -> NoReturn: ... + def __enter__(self) -> SparkContext: ... + def __exit__( + self, + type: Optional[Type[BaseException]], + value: Optional[BaseException], + trace: Optional[TracebackType], + ) -> None: ... @classmethod def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ... def setLogLevel(self, logLevel: str) -> None: ... diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi index 4bde851bb1e0d..c44176a13a69b 100644 --- a/python/pyspark/ml/classification.pyi +++ b/python/pyspark/ml/classification.pyi @@ -107,7 +107,7 @@ class _JavaProbabilisticClassifier( class _JavaProbabilisticClassificationModel( ProbabilisticClassificationModel, _JavaClassificationModel[T] ): - def predictProbability(self, value: Any): ... + def predictProbability(self, value: Vector) -> Vector: ... class _ClassificationSummary(JavaWrapper): @property @@ -543,7 +543,7 @@ class RandomForestClassificationModel( @property def trees(self) -> List[DecisionTreeClassificationModel]: ... def summary(self) -> RandomForestClassificationTrainingSummary: ... - def evaluate(self, dataset) -> RandomForestClassificationSummary: ... + def evaluate(self, dataset: DataFrame) -> RandomForestClassificationSummary: ... class RandomForestClassificationSummary(_ClassificationSummary): ... class RandomForestClassificationTrainingSummary( @@ -891,7 +891,7 @@ class FMClassifier( solver: str = ..., thresholds: Optional[Any] = ..., seed: Optional[Any] = ..., - ): ... + ) -> FMClassifier: ... def setFactorSize(self, value: int) -> FMClassifier: ... def setFitLinear(self, value: bool) -> FMClassifier: ... def setMiniBatchFraction(self, value: float) -> FMClassifier: ... diff --git a/python/pyspark/ml/common.pyi b/python/pyspark/ml/common.pyi index 7bf0ed6183d8a..a38fc5734f466 100644 --- a/python/pyspark/ml/common.pyi +++ b/python/pyspark/ml/common.pyi @@ -16,5 +16,11 @@ # specific language governing permissions and limitations # under the License. -def callJavaFunc(sc, func, *args): ... -def inherit_doc(cls): ... +from typing import Any, TypeVar + +import pyspark.context + +C = TypeVar("C", bound=type) + +def callJavaFunc(sc: pyspark.context.SparkContext, func: Any, *args: Any) -> Any: ... +def inherit_doc(cls: C) -> C: ... diff --git a/python/pyspark/ml/evaluation.pyi b/python/pyspark/ml/evaluation.pyi index ea0a9f045cd6a..55a3ae2774115 100644 --- a/python/pyspark/ml/evaluation.pyi +++ b/python/pyspark/ml/evaluation.pyi @@ -39,9 +39,12 @@ from pyspark.ml.param.shared import ( HasWeightCol, ) from pyspark.ml.util import JavaMLReadable, JavaMLWritable +from pyspark.sql.dataframe import DataFrame class Evaluator(Params, metaclass=abc.ABCMeta): - def evaluate(self, dataset, params: Optional[ParamMap] = ...) -> float: ... + def evaluate( + self, dataset: DataFrame, params: Optional[ParamMap] = ... + ) -> float: ... def isLargerBetter(self) -> bool: ... class JavaEvaluator(JavaParams, Evaluator, metaclass=abc.ABCMeta): @@ -75,16 +78,15 @@ class BinaryClassificationEvaluator( def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ... def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ... def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ... - -def setParams( - self, - *, - rawPredictionCol: str = ..., - labelCol: str = ..., - metricName: BinaryClassificationEvaluatorMetricType = ..., - weightCol: Optional[str] = ..., - numBins: int = ... -) -> BinaryClassificationEvaluator: ... + def setParams( + self, + *, + rawPredictionCol: str = ..., + labelCol: str = ..., + metricName: BinaryClassificationEvaluatorMetricType = ..., + weightCol: Optional[str] = ..., + numBins: int = ... + ) -> BinaryClassificationEvaluator: ... class RegressionEvaluator( JavaEvaluator, diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi index f5b12a5b2ffc6..4999defdf8a70 100644 --- a/python/pyspark/ml/feature.pyi +++ b/python/pyspark/ml/feature.pyi @@ -100,9 +100,9 @@ class _LSHParams(HasInputCol, HasOutputCol): def getNumHashTables(self) -> int: ... class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable): - def setNumHashTables(self: P, value) -> P: ... - def setInputCol(self: P, value) -> P: ... - def setOutputCol(self: P, value) -> P: ... + def setNumHashTables(self: P, value: int) -> P: ... + def setInputCol(self: P, value: str) -> P: ... + def setOutputCol(self: P, value: str) -> P: ... class _LSHModel(JavaModel, _LSHParams): def setInputCol(self: P, value: str) -> P: ... @@ -1518,7 +1518,7 @@ class ChiSqSelector( fpr: float = ..., fdr: float = ..., fwe: float = ... - ): ... + ) -> ChiSqSelector: ... def setSelectorType(self, value: str) -> ChiSqSelector: ... def setNumTopFeatures(self, value: int) -> ChiSqSelector: ... def setPercentile(self, value: float) -> ChiSqSelector: ... @@ -1602,7 +1602,10 @@ class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): def getVarianceThreshold(self) -> float: ... class VarianceThresholdSelector( - JavaEstimator, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable + JavaEstimator[VarianceThresholdSelectorModel], + _VarianceThresholdSelectorParams, + JavaMLReadable[VarianceThresholdSelector], + JavaMLWritable, ): def __init__( self, @@ -1615,13 +1618,16 @@ class VarianceThresholdSelector( featuresCol: str = ..., outputCol: Optional[str] = ..., varianceThreshold: float = ..., - ): ... + ) -> VarianceThresholdSelector: ... def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ... def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ... def setOutputCol(self, value: str) -> VarianceThresholdSelector: ... class VarianceThresholdSelectorModel( - JavaModel, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable + JavaModel, + _VarianceThresholdSelectorParams, + JavaMLReadable[VarianceThresholdSelectorModel], + JavaMLWritable, ): def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ... def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ... diff --git a/python/pyspark/ml/linalg/__init__.pyi b/python/pyspark/ml/linalg/__init__.pyi index a576b30aec308..b4fba8823b678 100644 --- a/python/pyspark/ml/linalg/__init__.pyi +++ b/python/pyspark/ml/linalg/__init__.pyi @@ -17,7 +17,7 @@ # under the License. from typing import overload -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Type, Union from pyspark.ml import linalg as newlinalg # noqa: F401 from pyspark.sql.types import StructType, UserDefinedType @@ -45,7 +45,7 @@ class MatrixUDT(UserDefinedType): @classmethod def scalaUDT(cls) -> str: ... def serialize( - self, obj + self, obj: Matrix ) -> Tuple[ int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool ]: ... @@ -64,9 +64,7 @@ class DenseVector(Vector): def __init__(self, __arr: bytes) -> None: ... @overload def __init__(self, __arr: Iterable[float]) -> None: ... - @staticmethod - def parse(s) -> DenseVector: ... - def __reduce__(self) -> Tuple[type, bytes]: ... + def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ... def numNonzeros(self) -> int: ... def norm(self, p: Union[float, str]) -> float64: ... def dot(self, other: Iterable[float]) -> float64: ... @@ -112,16 +110,14 @@ class SparseVector(Vector): def __init__(self, size: int, __map: Dict[int, float]) -> None: ... def numNonzeros(self) -> int: ... def norm(self, p: Union[float, str]) -> float64: ... - def __reduce__(self): ... - @staticmethod - def parse(s: str) -> SparseVector: ... + def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ... def dot(self, other: Iterable[float]) -> float64: ... def squared_distance(self, other: Iterable[float]) -> float64: ... def toArray(self) -> ndarray: ... def __len__(self) -> int: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... def __getitem__(self, index: int) -> float64: ... - def __ne__(self, other) -> bool: ... + def __ne__(self, other: Any) -> bool: ... def __hash__(self) -> int: ... class Vectors: @@ -144,13 +140,13 @@ class Vectors: def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ... @overload @staticmethod - def dense(self, *elements: float) -> DenseVector: ... + def dense(*elements: float) -> DenseVector: ... @overload @staticmethod - def dense(self, __arr: bytes) -> DenseVector: ... + def dense(__arr: bytes) -> DenseVector: ... @overload @staticmethod - def dense(self, __arr: Iterable[float]) -> DenseVector: ... + def dense(__arr: Iterable[float]) -> DenseVector: ... @staticmethod def stringify(vector: Vector) -> str: ... @staticmethod @@ -158,8 +154,6 @@ class Vectors: @staticmethod def norm(vector: Vector, p: Union[float, str]) -> float64: ... @staticmethod - def parse(s: str) -> Vector: ... - @staticmethod def zeros(size: int) -> DenseVector: ... class Matrix: @@ -170,7 +164,7 @@ class Matrix: def __init__( self, numRows: int, numCols: int, isTransposed: bool = ... ) -> None: ... - def toArray(self): ... + def toArray(self) -> NoReturn: ... class DenseMatrix(Matrix): values: Any @@ -186,11 +180,11 @@ class DenseMatrix(Matrix): values: Iterable[float], isTransposed: bool = ..., ) -> None: ... - def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ... + def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ... def toArray(self) -> ndarray: ... def toSparse(self) -> SparseMatrix: ... def __getitem__(self, indices: Tuple[int, int]) -> float64: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... class SparseMatrix(Matrix): colPtrs: ndarray @@ -216,11 +210,13 @@ class SparseMatrix(Matrix): values: Iterable[float], isTransposed: bool = ..., ) -> None: ... - def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ... + def __reduce__( + self, + ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ... def __getitem__(self, indices: Tuple[int, int]) -> float64: ... def toArray(self) -> ndarray: ... def toDense(self) -> DenseMatrix: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... class Matrices: @overload diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi index 44680586d70d1..f47e9e012ae14 100644 --- a/python/pyspark/ml/pipeline.pyi +++ b/python/pyspark/ml/pipeline.pyi @@ -51,7 +51,7 @@ class PipelineWriter(MLWriter): def __init__(self, instance: Pipeline) -> None: ... def saveImpl(self, path: str) -> None: ... -class PipelineReader(MLReader): +class PipelineReader(MLReader[Pipeline]): cls: Type[Pipeline] def __init__(self, cls: Type[Pipeline]) -> None: ... def load(self, path: str) -> Pipeline: ... @@ -61,7 +61,7 @@ class PipelineModelWriter(MLWriter): def __init__(self, instance: PipelineModel) -> None: ... def saveImpl(self, path: str) -> None: ... -class PipelineModelReader(MLReader): +class PipelineModelReader(MLReader[PipelineModel]): cls: Type[PipelineModel] def __init__(self, cls: Type[PipelineModel]) -> None: ... def load(self, path: str) -> PipelineModel: ... diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi index 5cb0e7a5092f7..b8f1e61859c72 100644 --- a/python/pyspark/ml/regression.pyi +++ b/python/pyspark/ml/regression.pyi @@ -414,7 +414,7 @@ class RandomForestRegressionModel( _TreeEnsembleModel, _RandomForestRegressorParams, JavaMLWritable, - JavaMLReadable, + JavaMLReadable[RandomForestRegressionModel], ): @property def trees(self) -> List[DecisionTreeRegressionModel]: ... @@ -749,10 +749,10 @@ class _FactorizationMachinesParams( initStd: Param[float] solver: Param[str] def __init__(self, *args: Any): ... - def getFactorSize(self): ... - def getFitLinear(self): ... - def getMiniBatchFraction(self): ... - def getInitStd(self): ... + def getFactorSize(self) -> int: ... + def getFitLinear(self) -> bool: ... + def getMiniBatchFraction(self) -> float: ... + def getInitStd(self) -> float: ... class FMRegressor( _JavaRegressor[FMRegressionModel], diff --git a/python/pyspark/mllib/classification.pyi b/python/pyspark/mllib/classification.pyi index c51882c87bfc2..967b0a9f289dd 100644 --- a/python/pyspark/mllib/classification.pyi +++ b/python/pyspark/mllib/classification.pyi @@ -118,7 +118,7 @@ class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]): labels: ndarray pi: ndarray theta: ndarray - def __init__(self, labels, pi, theta) -> None: ... + def __init__(self, labels: ndarray, pi: ndarray, theta: ndarray) -> None: ... @overload def predict(self, x: VectorLike) -> float64: ... @overload diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi index 1c3eba17e201c..b4f349612f0fe 100644 --- a/python/pyspark/mllib/clustering.pyi +++ b/python/pyspark/mllib/clustering.pyi @@ -63,7 +63,7 @@ class BisectingKMeans: class KMeansModel(Saveable, Loader[KMeansModel]): centers: List[ndarray] - def __init__(self, centers: List[ndarray]) -> None: ... + def __init__(self, centers: List[VectorLike]) -> None: ... @property def clusterCenters(self) -> List[ndarray]: ... @property @@ -144,7 +144,9 @@ class PowerIterationClustering: class Assignment(NamedTuple("Assignment", [("id", int), ("cluster", int)])): ... class StreamingKMeansModel(KMeansModel): - def __init__(self, clusterCenters, clusterWeights) -> None: ... + def __init__( + self, clusterCenters: List[VectorLike], clusterWeights: VectorLike + ) -> None: ... @property def clusterWeights(self) -> List[float64]: ... centers: ndarray diff --git a/python/pyspark/mllib/common.pyi b/python/pyspark/mllib/common.pyi index 1df308b91b5a1..daba212d93633 100644 --- a/python/pyspark/mllib/common.pyi +++ b/python/pyspark/mllib/common.pyi @@ -16,12 +16,20 @@ # specific language governing permissions and limitations # under the License. -def callJavaFunc(sc, func, *args): ... -def callMLlibFunc(name, *args): ... +from typing import Any, TypeVar + +import pyspark.context + +from py4j.java_gateway import JavaObject + +C = TypeVar("C", bound=type) + +def callJavaFunc(sc: pyspark.context.SparkContext, func: Any, *args: Any) -> Any: ... +def callMLlibFunc(name: str, *args: Any) -> Any: ... class JavaModelWrapper: - def __init__(self, java_model) -> None: ... - def __del__(self): ... - def call(self, name, *a): ... + def __init__(self, java_model: JavaObject) -> None: ... + def __del__(self) -> None: ... + def call(self, name: str, *a: Any) -> Any: ... -def inherit_doc(cls): ... +def inherit_doc(cls: C) -> C: ... diff --git a/python/pyspark/mllib/linalg/__init__.pyi b/python/pyspark/mllib/linalg/__init__.pyi index c0719c535c8f4..60d16b26f3590 100644 --- a/python/pyspark/mllib/linalg/__init__.pyi +++ b/python/pyspark/mllib/linalg/__init__.pyi @@ -17,7 +17,18 @@ # under the License. from typing import overload -from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, +) from pyspark.ml import linalg as newlinalg from pyspark.sql.types import StructType, UserDefinedType from numpy import float64, ndarray # type: ignore[import] @@ -46,7 +57,7 @@ class MatrixUDT(UserDefinedType): @classmethod def scalaUDT(cls) -> str: ... def serialize( - self, obj + self, obj: Matrix ) -> Tuple[ int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool ]: ... @@ -67,8 +78,8 @@ class DenseVector(Vector): @overload def __init__(self, __arr: Iterable[float]) -> None: ... @staticmethod - def parse(s) -> DenseVector: ... - def __reduce__(self) -> Tuple[type, bytes]: ... + def parse(s: str) -> DenseVector: ... + def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ... def numNonzeros(self) -> int: ... def norm(self, p: Union[float, str]) -> float64: ... def dot(self, other: Iterable[float]) -> float64: ... @@ -115,7 +126,7 @@ class SparseVector(Vector): def __init__(self, size: int, __map: Dict[int, float]) -> None: ... def numNonzeros(self) -> int: ... def norm(self, p: Union[float, str]) -> float64: ... - def __reduce__(self): ... + def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ... @staticmethod def parse(s: str) -> SparseVector: ... def dot(self, other: Iterable[float]) -> float64: ... @@ -123,9 +134,9 @@ class SparseVector(Vector): def toArray(self) -> ndarray: ... def asML(self) -> newlinalg.SparseVector: ... def __len__(self) -> int: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... def __getitem__(self, index: int) -> float64: ... - def __ne__(self, other) -> bool: ... + def __ne__(self, other: Any) -> bool: ... def __hash__(self) -> int: ... class Vectors: @@ -148,13 +159,13 @@ class Vectors: def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ... @overload @staticmethod - def dense(self, *elements: float) -> DenseVector: ... + def dense(*elements: float) -> DenseVector: ... @overload @staticmethod - def dense(self, __arr: bytes) -> DenseVector: ... + def dense(__arr: bytes) -> DenseVector: ... @overload @staticmethod - def dense(self, __arr: Iterable[float]) -> DenseVector: ... + def dense(__arr: Iterable[float]) -> DenseVector: ... @staticmethod def fromML(vec: newlinalg.DenseVector) -> DenseVector: ... @staticmethod @@ -176,8 +187,8 @@ class Matrix: def __init__( self, numRows: int, numCols: int, isTransposed: bool = ... ) -> None: ... - def toArray(self): ... - def asML(self): ... + def toArray(self) -> ndarray: ... + def asML(self) -> newlinalg.Matrix: ... class DenseMatrix(Matrix): values: Any @@ -193,12 +204,12 @@ class DenseMatrix(Matrix): values: Iterable[float], isTransposed: bool = ..., ) -> None: ... - def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ... + def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ... def toArray(self) -> ndarray: ... def toSparse(self) -> SparseMatrix: ... def asML(self) -> newlinalg.DenseMatrix: ... def __getitem__(self, indices: Tuple[int, int]) -> float64: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... class SparseMatrix(Matrix): colPtrs: ndarray @@ -224,12 +235,14 @@ class SparseMatrix(Matrix): values: Iterable[float], isTransposed: bool = ..., ) -> None: ... - def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ... + def __reduce__( + self, + ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ... def __getitem__(self, indices: Tuple[int, int]) -> float64: ... def toArray(self) -> ndarray: ... def toDense(self) -> DenseMatrix: ... def asML(self) -> newlinalg.SparseMatrix: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... class Matrices: @overload diff --git a/python/pyspark/mllib/random.pyi b/python/pyspark/mllib/random.pyi index dc5f4701614da..ec83170625c74 100644 --- a/python/pyspark/mllib/random.pyi +++ b/python/pyspark/mllib/random.pyi @@ -90,7 +90,7 @@ class RandomRDDs: def logNormalVectorRDD( sc: SparkContext, mean: float, - std, + std: float, numRows: int, numCols: int, numPartitions: Optional[int] = ..., diff --git a/python/pyspark/mllib/recommendation.pyi b/python/pyspark/mllib/recommendation.pyi index e2f15494209e9..4fea0acf3c1f9 100644 --- a/python/pyspark/mllib/recommendation.pyi +++ b/python/pyspark/mllib/recommendation.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Type, Union import array from collections import namedtuple @@ -27,7 +27,7 @@ from pyspark.mllib.common import JavaModelWrapper from pyspark.mllib.util import JavaLoader, JavaSaveable class Rating(namedtuple("Rating", ["user", "product", "rating"])): - def __reduce__(self): ... + def __reduce__(self) -> Tuple[Type[Rating], Tuple[int, int, float]]: ... class MatrixFactorizationModel( JavaModelWrapper, JavaSaveable, JavaLoader[MatrixFactorizationModel] diff --git a/python/pyspark/mllib/stat/_statistics.pyi b/python/pyspark/mllib/stat/_statistics.pyi index 4d2701d486881..3834d51639eb2 100644 --- a/python/pyspark/mllib/stat/_statistics.pyi +++ b/python/pyspark/mllib/stat/_statistics.pyi @@ -65,5 +65,5 @@ class Statistics: def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: ... @staticmethod def kolmogorovSmirnovTest( - data, distName: Literal["norm"] = ..., *params: float + data: RDD[float], distName: Literal["norm"] = ..., *params: float ) -> KolmogorovSmirnovTestResult: ... diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi index 35c49e952b0cd..a277cd9f7edae 100644 --- a/python/pyspark/rdd.pyi +++ b/python/pyspark/rdd.pyi @@ -85,12 +85,16 @@ class PythonEvalType: SQL_COGROUPED_MAP_PANDAS_UDF: PandasCogroupedMapUDFType class BoundedFloat(float): - def __new__(cls, mean: float, confidence: float, low: float, high: float): ... + def __new__( + cls, mean: float, confidence: float, low: float, high: float + ) -> BoundedFloat: ... class Partitioner: numPartitions: int partitionFunc: Callable[[Any], int] - def __init__(self, numPartitions, partitionFunc) -> None: ... + def __init__( + self, numPartitions: int, partitionFunc: Callable[[Any], int] + ) -> None: ... def __eq__(self, other: Any) -> bool: ... def __call__(self, k: Any) -> int: ... diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi index 6763baf6590a3..04838692436df 100644 --- a/python/pyspark/resource/profile.pyi +++ b/python/pyspark/resource/profile.pyi @@ -49,7 +49,7 @@ class ResourceProfileBuilder: def __init__(self) -> None: ... def require( self, resourceRequest: Union[ExecutorResourceRequest, TaskResourceRequests] - ): ... + ) -> ResourceProfileBuilder: ... def clearExecutorResourceRequests(self) -> None: ... def clearTaskResourceRequests(self) -> None: ... @property diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi index 0fbb10053fdbf..1f63e65b3de81 100644 --- a/python/pyspark/sql/column.pyi +++ b/python/pyspark/sql/column.pyi @@ -32,7 +32,7 @@ from pyspark.sql.window import WindowSpec from py4j.java_gateway import JavaObject # type: ignore[import] class Column: - def __init__(self, JavaObject) -> None: ... + def __init__(self, jc: JavaObject) -> None: ... def __neg__(self) -> Column: ... def __add__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... def __sub__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ... @@ -105,7 +105,11 @@ class Column: def name(self, *alias: str) -> Column: ... def cast(self, dataType: Union[DataType, str]) -> Column: ... def astype(self, dataType: Union[DataType, str]) -> Column: ... - def between(self, lowerBound, upperBound) -> Column: ... + def between( + self, + lowerBound: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral], + upperBound: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral], + ) -> Column: ... def when(self, condition: Column, value: Any) -> Column: ... def otherwise(self, value: Any) -> Column: ... def over(self, window: WindowSpec) -> Column: ... diff --git a/python/pyspark/sql/context.pyi b/python/pyspark/sql/context.pyi index 64927b37ac2a9..915a0fe1f6709 100644 --- a/python/pyspark/sql/context.pyi +++ b/python/pyspark/sql/context.pyi @@ -43,14 +43,14 @@ class SQLContext: sparkSession: SparkSession def __init__( self, - sparkContext, + sparkContext: SparkContext, sparkSession: Optional[SparkSession] = ..., jsqlContext: Optional[JavaObject] = ..., ) -> None: ... @classmethod def getOrCreate(cls: type, sc: SparkContext) -> SQLContext: ... def newSession(self) -> SQLContext: ... - def setConf(self, key: str, value) -> None: ... + def setConf(self, key: str, value: Union[bool, int, str]) -> None: ... def getConf(self, key: str, defaultValue: Optional[str] = ...) -> str: ... @property def udf(self) -> UDFRegistration: ... @@ -116,7 +116,7 @@ class SQLContext: path: Optional[str] = ..., source: Optional[str] = ..., schema: Optional[StructType] = ..., - **options + **options: str ) -> DataFrame: ... def sql(self, sqlQuery: str) -> DataFrame: ... def table(self, tableName: str) -> DataFrame: ... diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 281c1d75436c6..252f883b5fb09 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -65,13 +65,13 @@ def round(col: ColumnOrName, scale: int = ...) -> Column: ... def bround(col: ColumnOrName, scale: int = ...) -> Column: ... def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ... def shiftRight(col: ColumnOrName, numBits: int) -> Column: ... -def shiftRightUnsigned(col, numBits) -> Column: ... +def shiftRightUnsigned(col: ColumnOrName, numBits: int) -> Column: ... def spark_partition_id() -> Column: ... def expr(str: str) -> Column: ... def struct(*cols: ColumnOrName) -> Column: ... def greatest(*cols: ColumnOrName) -> Column: ... def least(*cols: Column) -> Column: ... -def when(condition: Column, value) -> Column: ... +def when(condition: Column, value: Any) -> Column: ... @overload def log(arg1: ColumnOrName) -> Column: ... @overload @@ -174,7 +174,9 @@ def create_map(*cols: ColumnOrName) -> Column: ... def array(*cols: ColumnOrName) -> Column: ... def array_contains(col: ColumnOrName, value: Any) -> Column: ... def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ... -def slice(x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]) -> Column: ... +def slice( + x: ColumnOrName, start: Union[Column, int], length: Union[Column, int] +) -> Column: ... def array_join( col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ... ) -> Column: ... diff --git a/python/pyspark/sql/session.pyi b/python/pyspark/sql/session.pyi index 17ba8894c1731..6cd2d3bed2b2f 100644 --- a/python/pyspark/sql/session.pyi +++ b/python/pyspark/sql/session.pyi @@ -17,7 +17,8 @@ # under the License. from typing import overload -from typing import Any, Iterable, List, Optional, Tuple, TypeVar, Union +from typing import Any, Iterable, List, Optional, Tuple, Type, TypeVar, Union +from types import TracebackType from py4j.java_gateway import JavaObject # type: ignore[import] @@ -122,4 +123,9 @@ class SparkSession(SparkConversionMixin): def streams(self) -> StreamingQueryManager: ... def stop(self) -> None: ... def __enter__(self) -> SparkSession: ... - def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: ... diff --git a/python/pyspark/sql/types.pyi b/python/pyspark/sql/types.pyi index 31765e94884d7..3adf823d99a82 100644 --- a/python/pyspark/sql/types.pyi +++ b/python/pyspark/sql/types.pyi @@ -17,7 +17,8 @@ # under the License. from typing import overload -from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, TypeVar +from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, Type, TypeVar +from py4j.java_gateway import JavaGateway, JavaObject import datetime T = TypeVar("T") @@ -37,7 +38,7 @@ class DataType: def fromInternal(self, obj: Any) -> Any: ... class DataTypeSingleton(type): - def __call__(cls): ... + def __call__(cls: Type[T]) -> T: ... # type: ignore class NullType(DataType, metaclass=DataTypeSingleton): ... class AtomicType(DataType): ... @@ -85,8 +86,8 @@ class ShortType(IntegralType): class ArrayType(DataType): elementType: DataType containsNull: bool - def __init__(self, elementType=DataType, containsNull: bool = ...) -> None: ... - def simpleString(self): ... + def __init__(self, elementType: DataType, containsNull: bool = ...) -> None: ... + def simpleString(self) -> str: ... def jsonValue(self) -> Dict[str, Any]: ... @classmethod def fromJson(cls, json: Dict[str, Any]) -> ArrayType: ... @@ -197,8 +198,8 @@ class Row(tuple): class DateConverter: def can_convert(self, obj: Any) -> bool: ... - def convert(self, obj, gateway_client) -> Any: ... + def convert(self, obj: datetime.date, gateway_client: JavaGateway) -> JavaObject: ... class DatetimeConverter: - def can_convert(self, obj) -> bool: ... - def convert(self, obj, gateway_client) -> Any: ... + def can_convert(self, obj: Any) -> bool: ... + def convert(self, obj: datetime.datetime, gateway_client: JavaGateway) -> JavaObject: ... diff --git a/python/pyspark/sql/udf.pyi b/python/pyspark/sql/udf.pyi index 87c3672780037..ea61397a67ba1 100644 --- a/python/pyspark/sql/udf.pyi +++ b/python/pyspark/sql/udf.pyi @@ -18,8 +18,9 @@ from typing import Any, Callable, Optional -from pyspark.sql._typing import ColumnOrName, DataTypeOrString +from pyspark.sql._typing import ColumnOrName, DataTypeOrString, UserDefinedFunctionLike from pyspark.sql.column import Column +from pyspark.sql.types import DataType import pyspark.sql.session class UserDefinedFunction: @@ -35,7 +36,7 @@ class UserDefinedFunction: deterministic: bool = ..., ) -> None: ... @property - def returnType(self): ... + def returnType(self) -> DataType: ... def __call__(self, *cols: ColumnOrName) -> Column: ... def asNondeterministic(self) -> UserDefinedFunction: ... @@ -47,7 +48,7 @@ class UDFRegistration: name: str, f: Callable[..., Any], returnType: Optional[DataTypeOrString] = ..., - ): ... + ) -> UserDefinedFunctionLike: ... def registerJavaFunction( self, name: str, diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi index 026163fc9a1db..117a6742e6b6b 100644 --- a/python/pyspark/streaming/context.pyi +++ b/python/pyspark/streaming/context.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, List, Optional, TypeVar, Union +from typing import Any, Callable, List, Optional, TypeVar from py4j.java_gateway import JavaObject # type: ignore[import] diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi index 7b76ce4c65233..1521d838fc2b5 100644 --- a/python/pyspark/streaming/dstream.pyi +++ b/python/pyspark/streaming/dstream.pyi @@ -30,9 +30,12 @@ from typing import ( ) import datetime from pyspark.rdd import RDD +import pyspark.serializers from pyspark.storagelevel import StorageLevel import pyspark.streaming.context +from py4j.java_gateway import JavaObject + S = TypeVar("S") T = TypeVar("T") U = TypeVar("U") @@ -42,7 +45,12 @@ V = TypeVar("V") class DStream(Generic[T]): is_cached: bool is_checkpointed: bool - def __init__(self, jdstream, ssc, jrdd_deserializer) -> None: ... + def __init__( + self, + jdstream: JavaObject, + ssc: pyspark.streaming.context.StreamingContext, + jrdd_deserializer: pyspark.serializers.Serializer, + ) -> None: ... def context(self) -> pyspark.streaming.context.StreamingContext: ... def count(self) -> DStream[int]: ... def filter(self, f: Callable[[T], bool]) -> DStream[T]: ... diff --git a/python/pyspark/streaming/kinesis.pyi b/python/pyspark/streaming/kinesis.pyi index af7cd6f6ec13c..399c37f869620 100644 --- a/python/pyspark/streaming/kinesis.pyi +++ b/python/pyspark/streaming/kinesis.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Any, Callable, Optional, TypeVar +from typing import Callable, Optional, TypeVar from pyspark.storagelevel import StorageLevel from pyspark.streaming.context import StreamingContext from pyspark.streaming.dstream import DStream From 01321bc0fec54a1610d0873c17fa7354137d3a6b Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 25 Nov 2020 10:24:41 +0900 Subject: [PATCH 0563/1009] [SPARK-33252][PYTHON][DOCS] Migration to NumPy documentation style in MLlib (pyspark.mllib.*) ### What changes were proposed in this pull request? This PR proposes migration of `pyspark.mllib` to NumPy documentation style. ### Why are the changes needed? To improve documentation style. Before: ![old](https://user-images.githubusercontent.com/1554276/100097941-90234980-2e5d-11eb-8b4d-c25d98d85191.png) After: ![new](https://user-images.githubusercontent.com/1554276/100097966-987b8480-2e5d-11eb-9e02-07b18c327624.png) ### Does this PR introduce _any_ user-facing change? Yes, this changes both rendered HTML docs and console representation (SPARK-33243). ### How was this patch tested? `dev/lint-python` and manual inspection. Closes #30413 from zero323/SPARK-33252. Authored-by: zero323 Signed-off-by: HyukjinKwon --- .../docs/source/reference/pyspark.mllib.rst | 3 +- python/pyspark/mllib/classification.py | 353 ++++++----- python/pyspark/mllib/clustering.py | 576 +++++++++++------- python/pyspark/mllib/evaluation.py | 60 +- python/pyspark/mllib/feature.py | 288 ++++++--- python/pyspark/mllib/feature.pyi | 4 +- python/pyspark/mllib/fpm.py | 86 +-- python/pyspark/mllib/fpm.pyi | 4 +- python/pyspark/mllib/linalg/__init__.py | 132 +++- python/pyspark/mllib/linalg/distributed.py | 495 ++++++++++----- python/pyspark/mllib/linalg/distributed.pyi | 6 +- python/pyspark/mllib/random.py | 378 ++++++++---- python/pyspark/mllib/recommendation.py | 116 ++-- python/pyspark/mllib/regression.py | 392 +++++++----- python/pyspark/mllib/stat/KernelDensity.py | 2 + python/pyspark/mllib/stat/__init__.py | 5 +- python/pyspark/mllib/stat/_statistics.py | 115 ++-- python/pyspark/mllib/stat/distribution.py | 2 + python/pyspark/mllib/tree.py | 469 +++++++------- python/pyspark/mllib/util.py | 256 +++++--- 20 files changed, 2375 insertions(+), 1367 deletions(-) diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst index acc834c065ac3..df5ea017d0fbf 100644 --- a/python/docs/source/reference/pyspark.mllib.rst +++ b/python/docs/source/reference/pyspark.mllib.rst @@ -216,6 +216,8 @@ Statistics ChiSqTestResult MultivariateGaussian KernelDensity + ChiSqTestResult + KolmogorovSmirnovTestResult Tree @@ -250,4 +252,3 @@ Utilities Loader MLUtils Saveable - diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index bbca216cce493..bd43e91afd280 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -88,20 +88,26 @@ class LogisticRegressionModel(LinearClassificationModel): Classification model trained using Multinomial/Binary Logistic Regression. - :param weights: - Weights computed for every feature. - :param intercept: - Intercept computed for this model. (Only used in Binary Logistic - Regression. In Multinomial Logistic Regression, the intercepts will - not bea single value, so the intercepts will be part of the - weights.) - :param numFeatures: - The dimension of the features. - :param numClasses: - The number of possible outcomes for k classes classification problem - in Multinomial Logistic Regression. By default, it is binary - logistic regression so numClasses will be set to 2. + .. versionadded:: 0.9.0 + Parameters + ---------- + weights : :py:class:`pyspark.mllib.linalg.Vector` + Weights computed for every feature. + intercept : float + Intercept computed for this model. (Only used in Binary Logistic + Regression. In Multinomial Logistic Regression, the intercepts will + not be a single value, so the intercepts will be part of the + weights.) + numFeatures : int + The dimension of the features. + numClasses : int + The number of possible outcomes for k classes classification problem + in Multinomial Logistic Regression. By default, it is binary + logistic regression so numClasses will be set to 2. + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), @@ -159,8 +165,6 @@ class LogisticRegressionModel(LinearClassificationModel): 1 >>> mcm.predict([0.0, 0.0, 0.3]) 2 - - .. versionadded:: 0.9.0 """ def __init__(self, weights, intercept, numFeatures, numClasses): super(LogisticRegressionModel, self).__init__(weights, intercept) @@ -263,54 +267,60 @@ def __repr__(self): class LogisticRegressionWithSGD(object): """ + Train a classification model for Binary Logistic Regression using Stochastic Gradient Descent. + .. versionadded:: 0.9.0 - .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or - LogisticRegressionWithLBFGS. + .. deprecated:: 2.0.0 + Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS. """ @classmethod - @since('0.9.0') def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=0.01, regType="l2", intercept=False, validateData=True, convergenceTol=0.001): """ Train a logistic regression model on the given data. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param step: - The step parameter used in SGD. - (default: 1.0) - :param miniBatchFraction: - Fraction of data to be used for each SGD iteration. - (default: 1.0) - :param initialWeights: - The initial weights. - (default: None) - :param regParam: - The regularizer parameter. - (default: 0.01) - :param regType: - The type of regularizer used for training our model. - Supported values: + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. + iterations : int, optional + The number of iterations. + (default: 100) + step : float, optional + The step parameter used in SGD. + (default: 1.0) + miniBatchFraction : float, optional + Fraction of data to be used for each SGD iteration. + (default: 1.0) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + regParam : float, optional + The regularizer parameter. + (default: 0.01) + regType : str, optional + The type of regularizer used for training our model. + Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e., whether bias - features are activated or not). - (default: False) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param convergenceTol: - A condition which decides iteration termination. - (default: 0.001) + + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e., whether bias + features are activated or not). + (default: False) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + convergenceTol : float, optional + A condition which decides iteration termination. + (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or " @@ -326,55 +336,65 @@ def train(rdd, i): class LogisticRegressionWithLBFGS(object): """ + Train a classification model for Multinomial/Binary Logistic Regression + using Limited-memory BFGS. + + Standard feature scaling and L2 regularization are used by default. .. versionadded:: 1.2.0 """ @classmethod - @since('1.2.0') def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2", intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2): """ Train a logistic regression model on the given data. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param initialWeights: - The initial weights. - (default: None) - :param regParam: - The regularizer parameter. - (default: 0.0) - :param regType: - The type of regularizer used for training our model. - Supported values: + .. versionadded:: 1.2.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. + iterations : int, optional + The number of iterations. + (default: 100) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + regParam : float, optional + The regularizer parameter. + (default: 0.01) + regType : str, optional + The type of regularizer used for training our model. + Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e., whether bias - features are activated or not). - (default: False) - :param corrections: - The number of corrections used in the LBFGS update. - If a known updater is used for binary classification, - it calls the ml implementation and this parameter will - have no effect. (default: 10) - :param tolerance: - The convergence tolerance of iterations for L-BFGS. - (default: 1e-6) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param numClasses: - The number of classes (i.e., outcomes) a label can take in - Multinomial Logistic Regression. - (default: 2) + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e., whether bias + features are activated or not). + (default: False) + corrections : int, optional + The number of corrections used in the LBFGS update. + If a known updater is used for binary classification, + it calls the ml implementation and this parameter will + have no effect. (default: 10) + tolerance : float, optional + The convergence tolerance of iterations for L-BFGS. + (default: 1e-6) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + numClasses : int, optional + The number of classes (i.e., outcomes) a label can take in + Multinomial Logistic Regression. + (default: 2) + + Examples + -------- >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), @@ -406,11 +426,17 @@ class SVMModel(LinearClassificationModel): """ Model for Support Vector Machines (SVMs). - :param weights: - Weights computed for every feature. - :param intercept: - Intercept computed for this model. + .. versionadded:: 0.9.0 + + Parameters + ---------- + weights : :py:class:`pyspark.mllib.linalg.Vector` + Weights computed for every feature. + intercept : float + Intercept computed for this model. + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0]), @@ -451,8 +477,6 @@ class SVMModel(LinearClassificationModel): ... rmtree(path) ... except: ... pass - - .. versionadded:: 0.9.0 """ def __init__(self, weights, intercept): super(SVMModel, self).__init__(weights, intercept) @@ -501,53 +525,59 @@ def load(cls, sc, path): class SVMWithSGD(object): """ + Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. + .. versionadded:: 0.9.0 """ @classmethod - @since('0.9.0') def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False, validateData=True, convergenceTol=0.001): """ Train a support vector machine on the given data. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param step: - The step parameter used in SGD. - (default: 1.0) - :param regParam: - The regularizer parameter. - (default: 0.01) - :param miniBatchFraction: - Fraction of data to be used for each SGD iteration. - (default: 1.0) - :param initialWeights: - The initial weights. - (default: None) - :param regType: - The type of regularizer used for training our model. - Allowed values: + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. + iterations : int, optional + The number of iterations. + (default: 100) + step : float, optional + The step parameter used in SGD. + (default: 1.0) + regParam : float, optional + The regularizer parameter. + (default: 0.01) + miniBatchFraction : float, optional + Fraction of data to be used for each SGD iteration. + (default: 1.0) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + regType : str, optional + The type of regularizer used for training our model. + Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e. whether bias - features are activated or not). - (default: False) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param convergenceTol: - A condition which decides iteration termination. - (default: 0.001) + + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e. whether bias + features are activated or not). + (default: False) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + convergenceTol : float, optional + A condition which decides iteration termination. + (default: 0.001) """ def train(rdd, i): return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step), @@ -563,14 +593,20 @@ class NaiveBayesModel(Saveable, Loader): """ Model for Naive Bayes classifiers. - :param labels: - List of labels. - :param pi: - Log of class priors, whose dimension is C, number of labels. - :param theta: - Log of class conditional probabilities, whose dimension is C-by-D, - where D is number of features. + .. versionadded:: 0.9.0 + Parameters + ---------- + labels : :py:class:`numpy.ndarray` + List of labels. + pi : :py:class:`numpy.ndarray` + Log of class priors, whose dimension is C, number of labels. + theta : :py:class:`numpy.ndarray` + Log of class conditional probabilities, whose dimension is C-by-D, + where D is number of features. + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> data = [ ... LabeledPoint(0.0, [0.0, 0.0]), @@ -605,8 +641,6 @@ class NaiveBayesModel(Saveable, Loader): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 0.9.0 """ def __init__(self, labels, pi, theta): self.labels = labels @@ -652,11 +686,12 @@ def load(cls, sc, path): class NaiveBayes(object): """ + Train a Multinomial Naive Bayes model. + .. versionadded:: 0.9.0 """ @classmethod - @since('0.9.0') def train(cls, data, lambda_=1.0): """ Train a Naive Bayes model given an RDD of (label, features) @@ -669,11 +704,15 @@ def train(cls, data, lambda_=1.0): it can also be used as `Bernoulli NB `_. The input feature values must be nonnegative. - :param data: - RDD of LabeledPoint. - :param lambda_: - The smoothing parameter. - (default: 1.0) + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. + lambda\\_ : float, optional + The smoothing parameter. + (default: 1.0) """ first = data.first() if not isinstance(first, LabeledPoint): @@ -694,23 +733,25 @@ class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm): of features must be constant. An initial weight vector must be provided. - :param stepSize: - Step size for each iteration of gradient descent. - (default: 0.1) - :param numIterations: - Number of iterations run for each batch of data. - (default: 50) - :param miniBatchFraction: - Fraction of each batch of data to use for updates. - (default: 1.0) - :param regParam: - L2 Regularization parameter. - (default: 0.0) - :param convergenceTol: - Value used to determine when to terminate iterations. - (default: 0.001) - .. versionadded:: 1.5.0 + + Parameters + ---------- + stepSize : float, optional + Step size for each iteration of gradient descent. + (default: 0.1) + numIterations : int, optional + Number of iterations run for each batch of data. + (default: 50) + miniBatchFraction : float, optional + Fraction of each batch of data to use for updates. + (default: 1.0) + regParam : float, optional + L2 Regularization parameter. + (default: 0.0) + convergenceTol : float, optional + Value used to determine when to terminate iterations. + (default: 0.001) """ def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, regParam=0.0, convergenceTol=0.001): diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index b99a4150c396d..e1a009643c5f2 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -41,6 +41,10 @@ class BisectingKMeansModel(JavaModelWrapper): """ A clustering model derived from the bisecting k-means method. + .. versionadded:: 2.0.0 + + Examples + -------- >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2) >>> bskm = BisectingKMeans() >>> model = bskm.train(sc.parallelize(data, 2), k=4) @@ -51,8 +55,6 @@ class BisectingKMeansModel(JavaModelWrapper): 4 >>> model.computeCost(p) 0.0 - - .. versionadded:: 2.0.0 """ def __init__(self, java_model): @@ -72,17 +74,25 @@ def k(self): """Get the number of clusters""" return self.call("k") - @since('2.0.0') def predict(self, x): """ Find the cluster that each of the points belongs to in this model. - :param x: - A data point (or RDD of points) to determine cluster index. - :return: - Predicted cluster index or an RDD of predicted cluster indices - if the input is an RDD. + .. versionadded:: 2.0.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + A data point (or RDD of points) to determine cluster index. + :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent + objects (list, tuple, numpy.ndarray). + + Returns + ------- + int or :py:class:`pyspark.RDD` of int + Predicted cluster index or an RDD of predicted cluster indices + if the input is an RDD. """ if isinstance(x, RDD): vecs = x.map(_convert_to_vector) @@ -91,15 +101,20 @@ def predict(self, x): x = _convert_to_vector(x) return self.call("predict", x) - @since('2.0.0') def computeCost(self, x): """ Return the Bisecting K-means cost (sum of squared distances of points to their nearest center) for this model on the given data. If provided with an RDD of points returns the sum. - :param point: - A data point (or RDD of points) to compute the cost(s). + .. versionadded:: 2.0.0 + + Parameters + ---------- + point : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + A data point (or RDD of points) to compute the cost(s). + :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent + objects (list, tuple, numpy.ndarray). """ if isinstance(x, RDD): vecs = x.map(_convert_to_vector) @@ -122,37 +137,43 @@ class BisectingKMeans(object): clusters on the bottom level would result more than `k` leaf clusters, larger clusters get higher priority. - Based on - `Steinbach, Karypis, and Kumar, A comparison of document clustering - techniques, KDD Workshop on Text Mining, 2000 - `_. - .. versionadded:: 2.0.0 + + Notes + ----- + See the original paper [1]_ + + .. [1] Steinbach, M. et al. “A Comparison of Document Clustering Techniques.” (2000). + KDD Workshop on Text Mining, 2000 + http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf """ @classmethod - @since('2.0.0') def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1888008604): """ Runs the bisecting k-means algorithm return the model. - :param rdd: - Training points as an `RDD` of `Vector` or convertible - sequence types. - :param k: - The desired number of leaf clusters. The actual number could - be smaller if there are no divisible leaf clusters. - (default: 4) - :param maxIterations: - Maximum number of iterations allowed to split clusters. - (default: 20) - :param minDivisibleClusterSize: - Minimum number of points (if >= 1.0) or the minimum proportion - of points (if < 1.0) of a divisible cluster. - (default: 1) - :param seed: - Random seed value for cluster initialization. - (default: -1888008604 from classOf[BisectingKMeans].getName.##) + .. versionadded:: 2.0.0 + + Parameters + ---------- + rdd : :py:class:`pyspark.RDD` + Training points as an `RDD` of `Vector` or convertible + sequence types. + k : int, optional + The desired number of leaf clusters. The actual number could + be smaller if there are no divisible leaf clusters. + (default: 4) + maxIterations : int, optional + Maximum number of iterations allowed to split clusters. + (default: 20) + minDivisibleClusterSize : float, optional + Minimum number of points (if >= 1.0) or the minimum proportion + of points (if < 1.0) of a divisible cluster. + (default: 1) + seed : int, optional + Random seed value for cluster initialization. + (default: -1888008604 from classOf[BisectingKMeans].getName.##) """ java_model = callMLlibFunc( "trainBisectingKMeans", rdd.map(_convert_to_vector), @@ -165,6 +186,10 @@ class KMeansModel(Saveable, Loader): """A clustering model derived from the k-means method. + .. versionadded:: 0.9.0 + + Examples + -------- >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2) >>> model = KMeans.train( ... sc.parallelize(data), 2, maxIterations=10, initializationMode="random", @@ -213,8 +238,6 @@ class KMeansModel(Saveable, Loader): ... initialModel = KMeansModel([(-1000.0,-1000.0),(5.0,5.0),(1000.0,1000.0)])) >>> model.clusterCenters [array([-1000., -1000.]), array([ 5., 5.]), array([ 1000., 1000.])] - - .. versionadded:: 0.9.0 """ def __init__(self, centers): @@ -232,17 +255,25 @@ def k(self): """Total number of clusters.""" return len(self.centers) - @since('0.9.0') def predict(self, x): """ Find the cluster that each of the points belongs to in this model. - :param x: - A data point (or RDD of points) to determine cluster index. - :return: - Predicted cluster index or an RDD of predicted cluster indices - if the input is an RDD. + .. versionadded:: 0.9.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + A data point (or RDD of points) to determine cluster index. + :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent + objects (list, tuple, numpy.ndarray). + + Returns + ------- + int or :py:class:`pyspark.RDD` of int + Predicted cluster index or an RDD of predicted cluster indices + if the input is an RDD. """ best = 0 best_distance = float("inf") @@ -257,15 +288,18 @@ def predict(self, x): best_distance = distance return best - @since('1.4.0') def computeCost(self, rdd): """ Return the K-means cost (sum of squared distances of points to their nearest center) for this model on the given data. - :param rdd: - The RDD of points to compute the cost on. + .. versionadded:: 1.4.0 + + Parameters + ---------- + rdd : ::py:class:`pyspark.RDD` + The RDD of points to compute the cost on. """ cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector), [_convert_to_vector(c) for c in self.centers]) @@ -292,46 +326,51 @@ def load(cls, sc, path): class KMeans(object): """ + K-means clustering. + .. versionadded:: 0.9.0 """ @classmethod - @since('0.9.0') def train(cls, rdd, k, maxIterations=100, initializationMode="k-means||", seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None): """ Train a k-means clustering model. - :param rdd: - Training points as an `RDD` of `Vector` or convertible - sequence types. - :param k: - Number of clusters to create. - :param maxIterations: - Maximum number of iterations allowed. - (default: 100) - :param initializationMode: - The initialization algorithm. This can be either "random" or - "k-means||". - (default: "k-means||") - :param seed: - Random seed value for cluster initialization. Set as None to - generate seed based on system time. - (default: None) - :param initializationSteps: - Number of steps for the k-means|| initialization mode. - This is an advanced setting -- the default of 2 is almost - always enough. - (default: 2) - :param epsilon: - Distance threshold within which a center will be considered to - have converged. If all centers move less than this Euclidean - distance, iterations are stopped. - (default: 1e-4) - :param initialModel: - Initial cluster centers can be provided as a KMeansModel object - rather than using the random or k-means|| initializationModel. - (default: None) + .. versionadded:: 0.9.0 + + Parameters + ---------- + rdd : ::py:class:`pyspark.RDD` + Training points as an `RDD` of :py:class:`pyspark.mllib.linalg.Vector` + or convertible sequence types. + k : int + Number of clusters to create. + maxIterations : int, optional + Maximum number of iterations allowed. + (default: 100) + initializationMode : str, optional + The initialization algorithm. This can be either "random" or + "k-means||". + (default: "k-means||") + seed : int, optional + Random seed value for cluster initialization. Set as None to + generate seed based on system time. + (default: None) + initializationSteps : + Number of steps for the k-means|| initialization mode. + This is an advanced setting -- the default of 2 is almost + always enough. + (default: 2) + epsilon : float, optional + Distance threshold within which a center will be considered to + have converged. If all centers move less than this Euclidean + distance, iterations are stopped. + (default: 1e-4) + initialModel : :py:class:`KMeansModel`, optional + Initial cluster centers can be provided as a KMeansModel object + rather than using the random or k-means|| initializationModel. + (default: None) """ clusterInitialModel = [] if initialModel is not None: @@ -352,6 +391,10 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ A clustering model derived from the Gaussian Mixture Model method. + .. versionadded:: 1.3.0 + + Examples + -------- >>> from pyspark.mllib.linalg import Vectors, DenseMatrix >>> from numpy.testing import assert_equal >>> from shutil import rmtree @@ -410,8 +453,6 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): True >>> labels[2]==labels[3]==labels[4] True - - .. versionadded:: 1.3.0 """ @property @@ -440,17 +481,23 @@ def k(self): """Number of gaussians in mixture.""" return len(self.weights) - @since('1.3.0') def predict(self, x): """ Find the cluster to which the point 'x' or each point in RDD 'x' has maximum membership in this model. - :param x: - A feature vector or an RDD of vectors representing data points. - :return: - Predicted cluster label or an RDD of predicted cluster labels - if the input is an RDD. + .. versionadded:: 1.3.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + A feature vector or an RDD of vectors representing data points. + + Returns + ------- + numpy.float64 or :py:class:`pyspark.RDD` of int + Predicted cluster label or an RDD of predicted cluster labels + if the input is an RDD. """ if isinstance(x, RDD): cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z))) @@ -459,16 +506,22 @@ def predict(self, x): z = self.predictSoft(x) return z.argmax() - @since('1.3.0') def predictSoft(self, x): """ Find the membership of point 'x' or each point in RDD 'x' to all mixture components. - :param x: - A feature vector or an RDD of vectors representing data points. - :return: - The membership value to all mixture components for vector 'x' - or each vector in RDD 'x'. + .. versionadded:: 1.3.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + A feature vector or an RDD of vectors representing data points. + + Returns + ------- + numpy.ndarray or :py:class:`pyspark.RDD` + The membership value to all mixture components for vector 'x' + or each vector in RDD 'x'. """ if isinstance(x, RDD): means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians]) @@ -479,14 +532,16 @@ def predictSoft(self, x): return self.call("predictSoft", _convert_to_vector(x)).toArray() @classmethod - @since('1.5.0') def load(cls, sc, path): """Load the GaussianMixtureModel from disk. - :param sc: - SparkContext. - :param path: - Path to where the model is stored. + .. versionadded:: 1.5.0 + + Parameters + ---------- + sc : :py:class:`SparkContext` + path : str + Path to where the model is stored. """ model = cls._load_java(sc, path) wrapper = sc._jvm.org.apache.spark.mllib.api.python.GaussianMixtureModelWrapper(model) @@ -499,32 +554,36 @@ class GaussianMixture(object): .. versionadded:: 1.3.0 """ + @classmethod - @since('1.3.0') def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None): """ Train a Gaussian Mixture clustering model. - :param rdd: - Training points as an `RDD` of `Vector` or convertible - sequence types. - :param k: - Number of independent Gaussians in the mixture model. - :param convergenceTol: - Maximum change in log-likelihood at which convergence is - considered to have occurred. - (default: 1e-3) - :param maxIterations: - Maximum number of iterations allowed. - (default: 100) - :param seed: - Random seed for initial Gaussian distribution. Set as None to - generate seed based on system time. - (default: None) - :param initialModel: - Initial GMM starting point, bypassing the random - initialization. - (default: None) + .. versionadded:: 1.3.0 + + Parameters + ---------- + rdd : ::py:class:`pyspark.RDD` + Training points as an `RDD` of :py:class:`pyspark.mllib.linalg.Vector` + or convertible sequence types. + k : int + Number of independent Gaussians in the mixture model. + convergenceTol : float, optional + Maximum change in log-likelihood at which convergence is + considered to have occurred. + (default: 1e-3) + maxIterations : int, optional + Maximum number of iterations allowed. + (default: 100) + seed : int, optional + Random seed for initial Gaussian distribution. Set as None to + generate seed based on system time. + (default: None) + initialModel : GaussianMixtureModel, optional + Initial GMM starting point, bypassing the random + initialization. + (default: None) """ initialModelWeights = None initialModelMu = None @@ -545,8 +604,12 @@ def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initia class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ - Model produced by [[PowerIterationClustering]]. + Model produced by :py:class:`PowerIterationClustering`. + .. versionadded:: 1.5.0 + + Examples + -------- >>> import math >>> def genCircle(r, n): ... points = [] @@ -589,8 +652,6 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 1.5.0 """ @property @@ -623,37 +684,48 @@ def load(cls, sc, path): class PowerIterationClustering(object): """ - Power Iteration Clustering (PIC), a scalable graph clustering algorithm - developed by [[http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf Lin and Cohen]]. - From the abstract: PIC finds a very low-dimensional embedding of a - dataset using truncated power iteration on a normalized pair-wise - similarity matrix of the data. + Power Iteration Clustering (PIC), a scalable graph clustering algorithm. + + + Developed by Lin and Cohen [1]_. From the abstract: + + "PIC finds a very low-dimensional embedding of a + dataset using truncated power iteration on a normalized pair-wise + similarity matrix of the data." .. versionadded:: 1.5.0 + + .. [1] Lin, Frank & Cohen, William. (2010). Power Iteration Clustering. + http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf """ @classmethod - @since('1.5.0') def train(cls, rdd, k, maxIterations=100, initMode="random"): r""" - :param rdd: - An RDD of (i, j, s\ :sub:`ij`\) tuples representing the - affinity matrix, which is the matrix A in the PIC paper. The - similarity s\ :sub:`ij`\ must be nonnegative. This is a symmetric - matrix and hence s\ :sub:`ij`\ = s\ :sub:`ji`\ For any (i, j) with - nonzero similarity, there should be either (i, j, s\ :sub:`ij`\) or - (j, i, s\ :sub:`ji`\) in the input. Tuples with i = j are ignored, - because it is assumed s\ :sub:`ij`\ = 0.0. - :param k: - Number of clusters. - :param maxIterations: - Maximum number of iterations of the PIC algorithm. - (default: 100) - :param initMode: - Initialization mode. This can be either "random" to use - a random vector as vertex properties, or "degree" to use - normalized sum similarities. - (default: "random") + Train PowerIterationClusteringModel + + .. versionadded:: 1.5.0 + + Parameters + ---------- + rdd : :py:class:`pyspark.RDD` + An RDD of (i, j, s\ :sub:`ij`\) tuples representing the + affinity matrix, which is the matrix A in the PIC paper. The + similarity s\ :sub:`ij`\ must be nonnegative. This is a symmetric + matrix and hence s\ :sub:`ij`\ = s\ :sub:`ji`\ For any (i, j) with + nonzero similarity, there should be either (i, j, s\ :sub:`ij`\) or + (j, i, s\ :sub:`ji`\) in the input. Tuples with i = j are ignored, + because it is assumed s\ :sub:`ij`\ = 0.0. + k : int + Number of clusters. + maxIterations : int, optional + Maximum number of iterations of the PIC algorithm. + (default: 100) + initMode : str, optional + Initialization mode. This can be either "random" to use + a random vector as vertex properties, or "degree" to use + normalized sum similarities. + (default: "random") """ model = callMLlibFunc("trainPowerIterationClusteringModel", rdd.map(_convert_to_vector), int(k), int(maxIterations), initMode) @@ -673,29 +745,37 @@ class StreamingKMeansModel(KMeansModel): The update formula for each centroid is given by - * c_t+1 = ((c_t * n_t * a) + (x_t * m_t)) / (n_t + m_t) - * n_t+1 = n_t * a + m_t + - c_t+1 = ((c_t * n_t * a) + (x_t * m_t)) / (n_t + m_t) + - n_t+1 = n_t * a + m_t where - * c_t: Centroid at the n_th iteration. - * n_t: Number of samples (or) weights associated with the centroid - at the n_th iteration. - * x_t: Centroid of the new data closest to c_t. - * m_t: Number of samples (or) weights of the new data closest to c_t - * c_t+1: New centroid. - * n_t+1: New number of weights. - * a: Decay Factor, which gives the forgetfulness. + - c_t: Centroid at the n_th iteration. + - n_t: Number of samples (or) weights associated with the centroid + at the n_th iteration. + - x_t: Centroid of the new data closest to c_t. + - m_t: Number of samples (or) weights of the new data closest to c_t + - c_t+1: New centroid. + - n_t+1: New number of weights. + - a: Decay Factor, which gives the forgetfulness. - .. note:: If a is set to 1, it is the weighted mean of the previous - and new data. If it set to zero, the old centroids are completely - forgotten. - - :param clusterCenters: - Initial cluster centers. - :param clusterWeights: - List of weights assigned to each cluster. + .. versionadded:: 1.5.0 + Parameters + ---------- + clusterCenters : list of :py:class:`pyspark.mllib.linalg.Vector` or covertible + Initial cluster centers. + clusterWeights : :py:class:`pyspark.mllib.linalg.Vector` or covertible + List of weights assigned to each cluster. + + Notes + ----- + If a is set to 1, it is the weighted mean of the previous + and new data. If it set to zero, the old centroids are completely + forgotten. + + Examples + -------- >>> initCenters = [[0.0, 0.0], [1.0, 1.0]] >>> initWeights = [1.0, 1.0] >>> stkm = StreamingKMeansModel(initCenters, initWeights) @@ -723,8 +803,6 @@ class StreamingKMeansModel(KMeansModel): 0 >>> stkm.predict([1.5, 1.5]) 1 - - .. versionadded:: 1.5.0 """ def __init__(self, clusterCenters, clusterWeights): super(StreamingKMeansModel, self).__init__(centers=clusterCenters) @@ -740,14 +818,18 @@ def clusterWeights(self): def update(self, data, decayFactor, timeUnit): """Update the centroids, according to data - :param data: - RDD with new data for the model update. - :param decayFactor: - Forgetfulness of the previous centroids. - :param timeUnit: - Can be "batches" or "points". If points, then the decay factor - is raised to the power of number of new points and if batches, - then decay factor will be used as is. + .. versionadded:: 1.5.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + RDD with new data for the model update. + decayFactor : float + Forgetfulness of the previous centroids. + timeUnit : str + Can be "batches" or "points". If points, then the decay factor + is raised to the power of number of new points and if batches, + then decay factor will be used as is. """ if not isinstance(data, RDD): raise TypeError("Data should be of an RDD, got %s." % type(data)) @@ -772,19 +854,21 @@ class StreamingKMeans(object): More details on how the centroids are updated are provided under the docs of StreamingKMeansModel. - :param k: - Number of clusters. - (default: 2) - :param decayFactor: - Forgetfulness of the previous centroids. - (default: 1.0) - :param timeUnit: - Can be "batches" or "points". If points, then the decay factor is - raised to the power of number of new points and if batches, then - decay factor will be used as is. - (default: "batches") - .. versionadded:: 1.5.0 + + Parameters + ---------- + k : int, optional + Number of clusters. + (default: 2) + decayFactor : float, optional + Forgetfulness of the previous centroids. + (default: 1.0) + timeUnit : str, optional + Can be "batches" or "points". If points, then the decay factor is + raised to the power of number of new points and if batches, then + decay factor will be used as is. + (default: "batches") """ def __init__(self, k=2, decayFactor=1.0, timeUnit="batches"): self._k = k @@ -887,13 +971,23 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader): Latent Dirichlet Allocation (LDA), a topic model designed for text documents. Terminology + - "word" = "term": an element of the vocabulary - "token": instance of a term appearing in a document - "topic": multinomial distribution over words representing some concept - References: - - Original LDA paper (journal version): - Blei, Ng, and Jordan. "Latent Dirichlet Allocation." JMLR, 2003. + .. versionadded:: 1.5.0 + + Notes + ----- + See the original LDA paper (journal version) [1]_ + + .. [1] Blei, D. et al. "Latent Dirichlet Allocation." + J. Mach. Learn. Res. 3 (2003): 993-1022. + https://www.jmlr.org/papers/v3/blei03a + + Examples + -------- >>> from pyspark.mllib.linalg import Vectors >>> from numpy.testing import assert_almost_equal, assert_equal >>> data = [ @@ -925,8 +1019,6 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 1.5.0 """ @since('1.5.0') @@ -939,19 +1031,24 @@ def vocabSize(self): """Vocabulary size (number of terms or terms in the vocabulary)""" return self.call("vocabSize") - @since('1.6.0') def describeTopics(self, maxTermsPerTopic=None): """Return the topics described by weighted terms. - WARNING: If vocabSize and k are large, this can return a large object! - - :param maxTermsPerTopic: - Maximum number of terms to collect for each topic. - (default: vocabulary size) - :return: - Array over topics. Each topic is represented as a pair of - matching arrays: (term indices, term weights in topic). - Each topic's terms are sorted in order of decreasing weight. + .. versionadded:: 1.6.0 + .. warning:: If vocabSize and k are large, this can return a large object! + + Parameters + ---------- + maxTermsPerTopic : int, optional + Maximum number of terms to collect for each topic. + (default: vocabulary size) + + Returns + ------- + list + Array over topics. Each topic is represented as a pair of + matching arrays: (term indices, term weights in topic). + Each topic's terms are sorted in order of decreasing weight. """ if maxTermsPerTopic is None: topics = self.call("describeTopics") @@ -960,14 +1057,16 @@ def describeTopics(self, maxTermsPerTopic=None): return topics @classmethod - @since('1.5.0') def load(cls, sc, path): """Load the LDAModel from disk. - :param sc: - SparkContext. - :param path: - Path to where the model is stored. + .. versionadded:: 1.5.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + path : str + Path to where the model is stored. """ if not isinstance(sc, SparkContext): raise TypeError("sc should be a SparkContext, got type %s" % type(sc)) @@ -979,47 +1078,52 @@ def load(cls, sc, path): class LDA(object): """ + Train Latent Dirichlet Allocation (LDA) model. + .. versionadded:: 1.5.0 """ @classmethod - @since('1.5.0') def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0, topicConcentration=-1.0, seed=None, checkpointInterval=10, optimizer="em"): """Train a LDA model. - :param rdd: - RDD of documents, which are tuples of document IDs and term - (word) count vectors. The term count vectors are "bags of - words" with a fixed-size vocabulary (where the vocabulary size - is the length of the vector). Document IDs must be unique - and >= 0. - :param k: - Number of topics to infer, i.e., the number of soft cluster - centers. - (default: 10) - :param maxIterations: - Maximum number of iterations allowed. - (default: 20) - :param docConcentration: - Concentration parameter (commonly named "alpha") for the prior - placed on documents' distributions over topics ("theta"). - (default: -1.0) - :param topicConcentration: - Concentration parameter (commonly named "beta" or "eta") for - the prior placed on topics' distributions over terms. - (default: -1.0) - :param seed: - Random seed for cluster initialization. Set as None to generate - seed based on system time. - (default: None) - :param checkpointInterval: - Period (in iterations) between checkpoints. - (default: 10) - :param optimizer: - LDAOptimizer used to perform the actual calculation. Currently - "em", "online" are supported. - (default: "em") + .. versionadded:: 1.5.0 + + Parameters + ---------- + rdd : :py:class:`pyspark.RDD` + RDD of documents, which are tuples of document IDs and term + (word) count vectors. The term count vectors are "bags of + words" with a fixed-size vocabulary (where the vocabulary size + is the length of the vector). Document IDs must be unique + and >= 0. + k : int, optional + Number of topics to infer, i.e., the number of soft cluster + centers. + (default: 10) + maxIterations : int, optional + Maximum number of iterations allowed. + (default: 20) + docConcentration : float, optional + Concentration parameter (commonly named "alpha") for the prior + placed on documents' distributions over topics ("theta"). + (default: -1.0) + topicConcentration : float, optional + Concentration parameter (commonly named "beta" or "eta") for + the prior placed on topics' distributions over terms. + (default: -1.0) + seed : int, optional + Random seed for cluster initialization. Set as None to generate + seed based on system time. + (default: None) + checkpointInterval : int, optional + Period (in iterations) between checkpoints. + (default: 10) + optimizer : str, optional + LDAOptimizer used to perform the actual calculation. Currently + "em", "online" are supported. + (default: "em") """ model = callMLlibFunc("trainLDAModel", rdd, k, maxIterations, docConcentration, topicConcentration, seed, diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index f3be827fb6e4f..198a9791774a9 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -30,8 +30,15 @@ class BinaryClassificationMetrics(JavaModelWrapper): """ Evaluator for binary classification. - :param scoreAndLabels: an RDD of score, label and optional weight. + .. versionadded:: 1.4.0 + + Parameters + ---------- + scoreAndLabels : :py:class:`pyspark.RDD` + an RDD of score, label and optional weight. + Examples + -------- >>> scoreAndLabels = sc.parallelize([ ... (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2) >>> metrics = BinaryClassificationMetrics(scoreAndLabels) @@ -48,8 +55,6 @@ class BinaryClassificationMetrics(JavaModelWrapper): 0.79... >>> metrics.areaUnderPR 0.88... - - .. versionadded:: 1.4.0 """ def __init__(self, scoreAndLabels): @@ -95,8 +100,15 @@ class RegressionMetrics(JavaModelWrapper): """ Evaluator for regression. - :param predictionAndObservations: an RDD of prediction, observation and optional weight. + .. versionadded:: 1.4.0 + + Parameters + ---------- + predictionAndObservations : :py:class:`pyspark.RDD` + an RDD of prediction, observation and optional weight. + Examples + -------- >>> predictionAndObservations = sc.parallelize([ ... (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)]) >>> metrics = RegressionMetrics(predictionAndObservations) @@ -115,8 +127,6 @@ class RegressionMetrics(JavaModelWrapper): >>> metrics = RegressionMetrics(predictionAndObservationsWithOptWeight) >>> metrics.rootMeanSquaredError 0.68... - - .. versionadded:: 1.4.0 """ def __init__(self, predictionAndObservations): @@ -182,9 +192,15 @@ class MulticlassMetrics(JavaModelWrapper): """ Evaluator for multiclass classification. - :param predictionAndLabels: an RDD of prediction, label, optional weight - and optional probability. + .. versionadded:: 1.4.0 + + Parameters + ---------- + predictionAndLabels : :py:class:`pyspark.RDD` + an RDD of prediction, label, optional weight and optional probability. + Examples + -------- >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0), ... (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]) >>> metrics = MulticlassMetrics(predictionAndLabels) @@ -246,8 +262,6 @@ class MulticlassMetrics(JavaModelWrapper): >>> metrics = MulticlassMetrics(predictionAndLabelsWithProbabilities) >>> metrics.logLoss() 0.9682... - - .. versionadded:: 1.4.0 """ def __init__(self, predictionAndLabels): @@ -377,9 +391,15 @@ class RankingMetrics(JavaModelWrapper): """ Evaluator for ranking algorithms. - :param predictionAndLabels: an RDD of (predicted ranking, - ground truth set) pairs. + .. versionadded:: 1.4.0 + Parameters + ---------- + predictionAndLabels : :py:class:`pyspark.RDD` + an RDD of (predicted ranking, ground truth set) pairs. + + Examples + -------- >>> predictionAndLabels = sc.parallelize([ ... ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]), ... ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]), @@ -407,8 +427,6 @@ class RankingMetrics(JavaModelWrapper): 0.35... >>> metrics.recallAt(15) 0.66... - - .. versionadded:: 1.4.0 """ def __init__(self, predictionAndLabels): @@ -484,10 +502,16 @@ class MultilabelMetrics(JavaModelWrapper): """ Evaluator for multilabel classification. - :param predictionAndLabels: an RDD of (predictions, labels) pairs, - both are non-null Arrays, each with - unique elements. + .. versionadded:: 1.4.0 + + Parameters + ---------- + predictionAndLabels : :py:class:`pyspark.RDD` + an RDD of (predictions, labels) pairs, + both are non-null Arrays, each with unique elements. + Examples + -------- >>> predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]), ... ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]), ... ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]) @@ -516,8 +540,6 @@ class MultilabelMetrics(JavaModelWrapper): 0.28... >>> metrics.accuracy 0.54... - - .. versionadded:: 1.4.0 """ def __init__(self, predictionAndLabels): diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index d95f9197eaedf..1d37ab815655b 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -41,7 +41,10 @@ def transform(self, vector): """ Applies transformation on a vector. - :param vector: vector to be transformed. + Parameters + ---------- + vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + vector or convertible or RDD to be transformed. """ raise NotImplementedError @@ -56,8 +59,15 @@ class Normalizer(VectorTransformer): For `p` = float('inf'), max(abs(vector)) will be used as norm for normalization. - :param p: Normalization in L^p^ space, p = 2 by default. + .. versionadded:: 1.2.0 + + Parameters + ---------- + p : float, optional + Normalization in L^p^ space, p = 2 by default. + Examples + -------- >>> from pyspark.mllib.linalg import Vectors >>> v = Vectors.dense(range(3)) >>> nor = Normalizer(1) @@ -71,21 +81,27 @@ class Normalizer(VectorTransformer): >>> nor2 = Normalizer(float("inf")) >>> nor2.transform(v) DenseVector([0.0, 0.5, 1.0]) - - .. versionadded:: 1.2.0 """ def __init__(self, p=2.0): assert p >= 1.0, "p should be greater than 1.0" self.p = float(p) - @since('1.2.0') def transform(self, vector): """ Applies unit length normalization on a vector. - :param vector: vector or RDD of vector to be normalized. - :return: normalized vector. If the norm of the input is zero, it - will return the input vector. + .. versionadded:: 1.2.0 + + Parameters + ---------- + vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + vector or RDD of vector to be normalized. + + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + normalized vector(s). If the norm of the input is zero, it + will return the input vector. """ if isinstance(vector, RDD): vector = vector.map(_convert_to_vector) @@ -103,11 +119,16 @@ def transform(self, vector): """ Applies transformation on a vector or an RDD[Vector]. - .. note:: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + Parameters + ---------- + vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Input vector(s) to be transformed. - :param vector: Vector or RDD of Vector to be transformed. + Notes + ----- + In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. """ if isinstance(vector, RDD): vector = vector.map(_convert_to_vector) @@ -123,19 +144,29 @@ class StandardScalerModel(JavaVectorTransformer): .. versionadded:: 1.2.0 """ - @since('1.2.0') def transform(self, vector): """ Applies standardization transformation on a vector. - .. note:: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + .. versionadded:: 1.2.0 + + Parameters + ---------- + vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Input vector(s) to be standardized. - :param vector: Vector or RDD of Vector to be standardized. - :return: Standardized vector. If the variance of a column is - zero, it will return default `0.0` for the column with - zero variance. + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Standardized vector(s). If the variance of a column is + zero, it will return default `0.0` for the column with + zero variance. + + Notes + ----- + In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. """ return JavaVectorTransformer.transform(self, vector) @@ -196,12 +227,20 @@ class StandardScaler(object): variance using column summary statistics on the samples in the training set. - :param withMean: False by default. Centers the data with mean - before scaling. It will build a dense output, so take - care when applying to sparse input. - :param withStd: True by default. Scales the data to unit - standard deviation. + .. versionadded:: 1.2.0 + Parameters + ---------- + withMean : bool, optional + False by default. Centers the data with mean + before scaling. It will build a dense output, so take + care when applying to sparse input. + withStd : bool, optional + True by default. Scales the data to unit + standard deviation. + + Examples + -------- >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])] >>> dataset = sc.parallelize(vs) >>> standardizer = StandardScaler(True, True) @@ -218,8 +257,6 @@ class StandardScaler(object): True >>> model.withMean True - - .. versionadded:: 1.2.0 """ def __init__(self, withMean=False, withStd=True): if not (withMean or withStd): @@ -227,15 +264,22 @@ def __init__(self, withMean=False, withStd=True): self.withMean = withMean self.withStd = withStd - @since('1.2.0') def fit(self, dataset): """ Computes the mean and variance and stores as a model to be used for later scaling. - :param dataset: The data used to compute the mean and variance - to build the transformation model. - :return: a StandardScalarModel + .. versionadded:: 1.2.0 + + Parameters + ---------- + dataset : :py:class:`pyspark.RDD` + The data used to compute the mean and variance + to build the transformation model. + + Returns + ------- + :py:class:`StandardScalerModel` """ dataset = dataset.map(_convert_to_vector) jmodel = callMLlibFunc("fitStandardScaler", self.withMean, self.withStd, dataset) @@ -249,13 +293,21 @@ class ChiSqSelectorModel(JavaVectorTransformer): .. versionadded:: 1.4.0 """ - @since('1.4.0') def transform(self, vector): """ Applies transformation on a vector. - :param vector: Vector or RDD of Vector to be transformed. - :return: transformed vector. + .. versionadded:: 1.4.0 + + Examples + -------- + vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Input vector(s) to be transformed. + + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + transformed vector(s). """ return JavaVectorTransformer.transform(self, vector) @@ -284,6 +336,10 @@ class ChiSqSelector(object): By default, the selection method is `numTopFeatures`, with the default number of top features set to 50. + .. versionadded:: 1.4.0 + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector, DenseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = sc.parallelize([ @@ -306,8 +362,6 @@ class ChiSqSelector(object): >>> model = ChiSqSelector(selectorType="percentile", percentile=0.34).fit(data) >>> model.transform(DenseVector([7.0, 9.0, 5.0])) DenseVector([7.0]) - - .. versionadded:: 1.4.0 """ def __init__(self, numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1, fpr=0.05, fdr=0.05, fwe=0.05): @@ -372,15 +426,18 @@ def setSelectorType(self, selectorType): self.selectorType = str(selectorType) return self - @since('1.4.0') def fit(self, data): """ Returns a ChiSquared feature selector. - :param data: an `RDD[LabeledPoint]` containing the labeled dataset - with categorical features. Real-valued features will be - treated as categorical for each distinct value. - Apply feature discretizer before using this function. + .. versionadded:: 1.4.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` of :py:class:`pyspark.mllib.regression.LabeledPoint` + containing the labeled dataset with categorical features. + Real-valued features will be treated as categorical for each + distinct value. Apply feature discretizer before using this function. """ jmodel = callMLlibFunc("fitChiSqSelector", self.selectorType, self.numTopFeatures, self.percentile, self.fpr, self.fdr, self.fwe, data) @@ -399,6 +456,10 @@ class PCA(object): """ A feature transformer that projects vectors to a low-dimensional space using PCA. + .. versionadded:: 1.5.0 + + Examples + -------- >>> data = [Vectors.sparse(5, [(1, 1.0), (3, 7.0)]), ... Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]), ... Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0])] @@ -408,20 +469,26 @@ class PCA(object): 1.648... >>> pcArray[1] -4.013... - - .. versionadded:: 1.5.0 """ def __init__(self, k): """ - :param k: number of principal components. + Parameters + ---------- + k : int + number of principal components. """ self.k = int(k) - @since('1.5.0') def fit(self, data): """ Computes a [[PCAModel]] that contains the principal components of the input vectors. - :param data: source vectors + + .. versionadded:: 1.5.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + source vectors """ jmodel = callMLlibFunc("fitPCA", self.k, data) return PCAModel(jmodel) @@ -432,16 +499,23 @@ class HashingTF(object): Maps a sequence of terms to their term frequencies using the hashing trick. - .. note:: The terms must be hashable (can not be dict/set/list...). + .. versionadded:: 1.2.0 + + Parameters + ---------- + numFeatures : int, optional + number of features (default: 2^20) - :param numFeatures: number of features (default: 2^20) + Notes + ----- + The terms must be hashable (can not be dict/set/list...). + Examples + -------- >>> htf = HashingTF(100) >>> doc = "a a b b c d".split(" ") >>> htf.transform(doc) SparseVector(100, {...}) - - .. versionadded:: 1.2.0 """ def __init__(self, numFeatures=1 << 20): self.numFeatures = numFeatures @@ -485,7 +559,7 @@ class IDFModel(JavaVectorTransformer): .. versionadded:: 1.2.0 """ - @since('1.2.0') + def transform(self, x): """ Transforms term frequency (TF) vectors to TF-IDF vectors. @@ -494,13 +568,24 @@ def transform(self, x): the terms which occur in fewer than `minDocFreq` documents will have an entry of 0. - .. note:: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + .. versionadded:: 1.2.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + an RDD of term frequency vectors or a term frequency + vector - :param x: an RDD of term frequency vectors or a term frequency - vector - :return: an RDD of TF-IDF vectors or a TF-IDF vector + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + an RDD of TF-IDF vectors or a TF-IDF vector + + Notes + ----- + In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. """ return JavaVectorTransformer.transform(self, x) @@ -539,9 +624,15 @@ class IDF(object): `minDocFreq`). For terms that are not in at least `minDocFreq` documents, the IDF is found as 0, resulting in TF-IDFs of 0. - :param minDocFreq: minimum of documents in which a term - should appear for filtering + .. versionadded:: 1.2.0 + + Parameters + ---------- + minDocFreq : int + minimum of documents in which a term should appear for filtering + Examples + -------- >>> n = 4 >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)), ... Vectors.dense([0.0, 1.0, 2.0, 3.0]), @@ -560,18 +651,20 @@ class IDF(object): DenseVector([0.0, 0.0, 1.3863, 0.863]) >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0))) SparseVector(4, {1: 0.0, 3: 0.5754}) - - .. versionadded:: 1.2.0 """ def __init__(self, minDocFreq=0): self.minDocFreq = minDocFreq - @since('1.2.0') def fit(self, dataset): """ Computes the inverse document frequency. - :param dataset: an RDD of term frequency vectors + .. versionadded:: 1.2.0 + + Parameters + ---------- + dataset : :py:class:`pyspark.RDD` + an RDD of term frequency vectors """ if not isinstance(dataset, RDD): raise TypeError("dataset should be an RDD of term frequency vectors") @@ -582,34 +675,55 @@ def fit(self, dataset): class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): """ class for Word2Vec model - - .. versionadded:: 1.2.0 """ - @since('1.2.0') + def transform(self, word): """ Transforms a word to its vector representation - .. note:: Local use only + .. versionadded:: 1.2.0 + + Parameters + ---------- + word : str + a word - :param word: a word - :return: vector representation of word(s) + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` + vector representation of word(s) + + Notes + ----- + Local use only """ try: return self.call("transform", word) except Py4JJavaError: raise ValueError("%s not found" % word) - @since('1.2.0') def findSynonyms(self, word, num): """ Find synonyms of a word - :param word: a word or a vector representation of word - :param num: number of synonyms to find - :return: array of (word, cosineSimilarity) + .. versionadded:: 1.2.0 + + Parameters + ---------- + + word : str or :py:class:`pyspark.mllib.linalg.Vector` + a word or a vector representation of word + num : int + number of synonyms to find + + Returns + ------- + :py:class:`collections.abc.Iterable` + array of (word, cosineSimilarity) - .. note:: Local use only + Notes + ----- + Local use only """ if not isinstance(word, str): word = _convert_to_vector(word) @@ -653,6 +767,10 @@ class Word2Vec(object): and Distributed Representations of Words and Phrases and their Compositionality. + .. versionadded:: 1.2.0 + + Examples + -------- >>> sentence = "a b " * 100 + "a c " * 10 >>> localDoc = [sentence, sentence] >>> doc = sc.parallelize(localDoc).map(lambda line: line.split(" ")) @@ -686,9 +804,6 @@ class Word2Vec(object): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 1.2.0 - """ def __init__(self): """ @@ -761,13 +876,20 @@ def setWindowSize(self, windowSize): self.windowSize = windowSize return self - @since('1.2.0') def fit(self, data): """ Computes the vector representation of each word in vocabulary. - :param data: training data. RDD of list of string - :return: Word2VecModel instance + .. versionadded:: 1.2.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + training data. RDD of list of string + + Returns + ------- + :py:class:`Word2VecModel` """ if not isinstance(data, RDD): raise TypeError("data should be an RDD of list of string") @@ -783,6 +905,10 @@ class ElementwiseProduct(VectorTransformer): Scales each column of the vector, with the supplied weight vector. i.e the elementwise product. + .. versionadded:: 1.5.0 + + Examples + -------- >>> weight = Vectors.dense([1.0, 2.0, 3.0]) >>> eprod = ElementwiseProduct(weight) >>> a = Vectors.dense([2.0, 1.0, 3.0]) @@ -792,8 +918,6 @@ class ElementwiseProduct(VectorTransformer): >>> rdd = sc.parallelize([a, b]) >>> eprod.transform(rdd).collect() [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])] - - .. versionadded:: 1.5.0 """ def __init__(self, scalingVector): self.scalingVector = _convert_to_vector(scalingVector) diff --git a/python/pyspark/mllib/feature.pyi b/python/pyspark/mllib/feature.pyi index 9ccec36abd6ff..24a46f6bee798 100644 --- a/python/pyspark/mllib/feature.pyi +++ b/python/pyspark/mllib/feature.pyi @@ -17,7 +17,7 @@ # under the License. from typing import overload -from typing import Iterable, Hashable, List, Tuple +from typing import Iterable, Hashable, List, Tuple, Union from pyspark.mllib._typing import VectorLike from pyspark.context import SparkContext @@ -135,7 +135,7 @@ class IDF: class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader[Word2VecModel]): def transform(self, word: str) -> Vector: ... # type: ignore - def findSynonyms(self, word: str, num: int) -> Iterable[Tuple[str, float]]: ... + def findSynonyms(self, word: Union[str, VectorLike], num: int) -> Iterable[Tuple[str, float]]: ... def getVectors(self) -> JavaMap: ... @classmethod def load(cls, sc: SparkContext, path: str) -> Word2VecModel: ... diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index cbbd7b351b20d..1f87a15cb11c9 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -32,6 +32,10 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): A FP-Growth model for mining frequent itemsets using the Parallel FP-Growth algorithm. + .. versionadded:: 1.4.0 + + Examples + -------- >>> data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]] >>> rdd = sc.parallelize(data, 2) >>> model = FPGrowth.train(rdd, 0.6, 2) @@ -42,8 +46,6 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> sameModel = FPGrowthModel.load(sc, model_path) >>> sorted(model.freqItemsets().collect()) == sorted(sameModel.freqItemsets().collect()) True - - .. versionadded:: 1.4.0 """ @since("1.4.0") @@ -72,20 +74,23 @@ class FPGrowth(object): """ @classmethod - @since("1.4.0") def train(cls, data, minSupport=0.3, numPartitions=-1): """ Computes an FP-Growth model that contains frequent itemsets. - :param data: - The input data set, each element contains a transaction. - :param minSupport: - The minimal support level. - (default: 0.3) - :param numPartitions: - The number of partitions used by parallel FP-growth. A value - of -1 will use the same number as input data. - (default: -1) + .. versionadded:: 1.4.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The input data set, each element contains a transaction. + minSupport : float, optional + The minimal support level. + (default: 0.3) + numPartitions : int, optional + The number of partitions used by parallel FP-growth. A value + of -1 will use the same number as input data. + (default: -1) """ model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions)) return FPGrowthModel(model) @@ -103,6 +108,10 @@ class PrefixSpanModel(JavaModelWrapper): """ Model fitted by PrefixSpan + .. versionadded:: 1.6.0 + + Examples + -------- >>> data = [ ... [["a", "b"], ["c"]], ... [["a"], ["c", "b"], ["a", "b"]], @@ -112,8 +121,6 @@ class PrefixSpanModel(JavaModelWrapper): >>> model = PrefixSpan.train(rdd) >>> sorted(model.freqSequences().collect()) [FreqSequence(sequence=[['a']], freq=3), FreqSequence(sequence=[['a'], ['a']], freq=1), ... - - .. versionadded:: 1.6.0 """ @since("1.6.0") @@ -125,38 +132,45 @@ def freqSequences(self): class PrefixSpan(object): """ A parallel PrefixSpan algorithm to mine frequent sequential patterns. - The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: - Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth - ([[https://doi.org/10.1109/ICDE.2001.914830]]). + The PrefixSpan algorithm is described in Jian Pei et al (2001) [1]_ .. versionadded:: 1.6.0 + + .. [1] Jian Pei et al., + "PrefixSpan,: mining sequential patterns efficiently by prefix-projected pattern growth," + Proceedings 17th International Conference on Data Engineering, Heidelberg, + Germany, 2001, pp. 215-224, + doi: https://doi.org/10.1109/ICDE.2001.914830 """ @classmethod - @since("1.6.0") def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000): """ Finds the complete set of frequent sequential patterns in the input sequences of itemsets. - :param data: - The input data set, each element contains a sequence of - itemsets. - :param minSupport: - The minimal support level of the sequential pattern, any - pattern that appears more than (minSupport * - size-of-the-dataset) times will be output. - (default: 0.1) - :param maxPatternLength: - The maximal length of the sequential pattern, any pattern - that appears less than maxPatternLength will be output. - (default: 10) - :param maxLocalProjDBSize: - The maximum number of items (including delimiters used in the - internal storage format) allowed in a projected database before - local processing. If a projected database exceeds this size, - another iteration of distributed prefix growth is run. - (default: 32000000) + .. versionadded:: 1.6.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The input data set, each element contains a sequence of + itemsets. + minSupport : float, optional + The minimal support level of the sequential pattern, any + pattern that appears more than (minSupport * + size-of-the-dataset) times will be output. + (default: 0.1) + maxPatternLength : int, optional + The maximal length of the sequential pattern, any pattern + that appears less than maxPatternLength will be output. + (default: 10) + maxLocalProjDBSize : int, optional + The maximum number of items (including delimiters used in the + internal storage format) allowed in a projected database before + local processing. If a projected database exceeds this size, + another iteration of distributed prefix growth is run. + (default: 32000000) """ model = callMLlibFunc("trainPrefixSpanModel", data, minSupport, maxPatternLength, maxLocalProjDBSize) diff --git a/python/pyspark/mllib/fpm.pyi b/python/pyspark/mllib/fpm.pyi index 880baae1a91a5..c5a6b5f6806c0 100644 --- a/python/pyspark/mllib/fpm.pyi +++ b/python/pyspark/mllib/fpm.pyi @@ -37,8 +37,8 @@ class FPGrowth: cls, data: RDD[List[T]], minSupport: float = ..., numPartitions: int = ... ) -> FPGrowthModel[T]: ... class FreqItemset(Generic[T]): - items = ... # List[T] - freq = ... # int + items: List[T] + freq: int class PrefixSpanModel(JavaModelWrapper, Generic[T]): def freqSequences(self) -> RDD[PrefixSpan.FreqSequence[T]]: ... diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index c1402fb98a50d..f20004ab70ab3 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -71,6 +71,8 @@ def _vector_size(v): """ Returns the size of the vector. + Examples + -------- >>> _vector_size([1., 2., 3.]) 3 >>> _vector_size((1., 2., 3.)) @@ -231,7 +233,9 @@ def toArray(self): """ Convert the vector into an numpy.ndarray - :return: numpy.ndarray + Returns + ------- + :py:class:`numpy.ndarray` """ raise NotImplementedError @@ -240,7 +244,9 @@ def asML(self): Convert this vector to the new mllib-local representation. This does NOT copy the data; it copies references. - :return: :py:class:`pyspark.ml.linalg.Vector` + Returns + ------- + :py:class:`pyspark.ml.linalg.Vector` """ raise NotImplementedError @@ -251,6 +257,8 @@ class DenseVector(Vector): storage and arithmetics will be delegated to the underlying numpy array. + Examples + -------- >>> v = Vectors.dense([1.0, 2.0]) >>> u = Vectors.dense([3.0, 4.0]) >>> v + u @@ -282,6 +290,8 @@ def parse(s): """ Parse string representation back into the DenseVector. + Examples + -------- >>> DenseVector.parse(' [ 0.0,1.0,2.0, 3.0]') DenseVector([0.0, 1.0, 2.0, 3.0]) """ @@ -312,6 +322,8 @@ def norm(self, p): """ Calculates the norm of a DenseVector. + Examples + -------- >>> a = DenseVector([0, -1, 2, -3]) >>> a.norm(2) 3.7... @@ -327,6 +339,8 @@ def dot(self, other): and a target NumPy array that is either 1- or 2-dimensional. Equivalent to calling numpy.dot of the two vectors. + Examples + -------- >>> dense = DenseVector(array.array('d', [1., 2.])) >>> dense.dot(dense) 5.0 @@ -367,6 +381,8 @@ def squared_distance(self, other): """ Squared distance of two Vectors. + Examples + -------- >>> dense1 = DenseVector(array.array('d', [1., 2.])) >>> dense1.squared_distance(dense1) 0.0 @@ -412,9 +428,11 @@ def asML(self): Convert this vector to the new mllib-local representation. This does NOT copy the data; it copies references. - :return: :py:class:`pyspark.ml.linalg.DenseVector` - .. versionadded:: 2.0.0 + + Returns + ------- + :py:class:`pyspark.ml.linalg.DenseVector` """ return newlinalg.DenseVector(self.array) @@ -501,12 +519,18 @@ def __init__(self, size, *args): (index, value) pairs, or two separate arrays of indices and values (sorted by index). - :param size: Size of the vector. - :param args: Active entries, as a dictionary {index: value, ...}, - a list of tuples [(index, value), ...], or a list of strictly - increasing indices and a list of corresponding values [index, ...], - [value, ...]. Inactive entries are treated as zeros. - + Parameters + ---------- + size : int + Size of the vector. + args + Active entries, as a dictionary {index: value, ...}, + a list of tuples [(index, value), ...], or a list of strictly + increasing indices and a list of corresponding values [index, ...], + [value, ...]. Inactive entries are treated as zeros. + + Examples + -------- >>> SparseVector(4, {1: 1.0, 3: 5.5}) SparseVector(4, {1: 1.0, 3: 5.5}) >>> SparseVector(4, [(1, 1.0), (3, 5.5)]) @@ -556,6 +580,8 @@ def norm(self, p): """ Calculates the norm of a SparseVector. + Examples + -------- >>> a = SparseVector(4, [0, 1], [3., -4.]) >>> a.norm(1) 7.0 @@ -574,6 +600,8 @@ def parse(s): """ Parse string representation back into the SparseVector. + Examples + -------- >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )') SparseVector(4, {0: 4.0, 1: 5.0}) """ @@ -622,6 +650,8 @@ def dot(self, other): """ Dot product with a SparseVector or 1- or 2-dimensional Numpy array. + Examples + -------- >>> a = SparseVector(4, [1, 3], [3.0, 4.0]) >>> a.dot(a) 25.0 @@ -678,6 +708,8 @@ def squared_distance(self, other): """ Squared distance from a SparseVector or 1-dimensional NumPy array. + Examples + -------- >>> a = SparseVector(4, [1, 3], [3.0, 4.0]) >>> a.squared_distance(a) 0.0 @@ -754,9 +786,11 @@ def asML(self): Convert this vector to the new mllib-local representation. This does NOT copy the data; it copies references. - :return: :py:class:`pyspark.ml.linalg.SparseVector` - .. versionadded:: 2.0.0 + + Returns + ------- + :py:class:`pyspark.ml.linalg.SparseVector` """ return newlinalg.SparseVector(self.size, self.indices, self.values) @@ -828,10 +862,12 @@ class Vectors(object): """ Factory methods for working with vectors. - .. note:: Dense vectors are simply represented as NumPy array objects, - so there is no need to covert them for use in MLlib. For sparse vectors, - the factory methods in this class create an MLlib-compatible type, or users - can pass in SciPy's `scipy.sparse` column vectors. + Notes + ----- + Dense vectors are simply represented as NumPy array objects, + so there is no need to covert them for use in MLlib. For sparse vectors, + the factory methods in this class create an MLlib-compatible type, or users + can pass in SciPy's `scipy.sparse` column vectors. """ @staticmethod @@ -841,10 +877,16 @@ def sparse(size, *args): (index, value) pairs, or two separate arrays of indices and values (sorted by index). - :param size: Size of the vector. - :param args: Non-zero entries, as a dictionary, list of tuples, - or two sorted lists containing indices and values. + Parameters + ---------- + size : int + Size of the vector. + args + Non-zero entries, as a dictionary, list of tuples, + or two sorted lists containing indices and values. + Examples + -------- >>> Vectors.sparse(4, {1: 1.0, 3: 5.5}) SparseVector(4, {1: 1.0, 3: 5.5}) >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)]) @@ -859,6 +901,8 @@ def dense(*elements): """ Create a dense vector of 64-bit floats from a Python list or numbers. + Examples + -------- >>> Vectors.dense([1, 2, 3]) DenseVector([1.0, 2.0, 3.0]) >>> Vectors.dense(1.0, 2.0) @@ -875,10 +919,15 @@ def fromML(vec): Convert a vector from the new mllib-local representation. This does NOT copy the data; it copies references. - :param vec: a :py:class:`pyspark.ml.linalg.Vector` - :return: a :py:class:`pyspark.mllib.linalg.Vector` - .. versionadded:: 2.0.0 + + Parameters + ---------- + vec : :py:class:`pyspark.ml.linalg.Vector` + + Returns + ------- + :py:class:`pyspark.mllib.linalg.Vector` """ if isinstance(vec, newlinalg.DenseVector): return DenseVector(vec.array) @@ -893,6 +942,8 @@ def stringify(vector): Converts a vector into a string, which can be recognized by Vectors.parse(). + Examples + -------- >>> Vectors.stringify(Vectors.sparse(2, [1], [1.0])) '(2,[1],[1.0])' >>> Vectors.stringify(Vectors.dense([0.0, 1.0])) @@ -907,6 +958,8 @@ def squared_distance(v1, v2): a and b can be of type SparseVector, DenseVector, np.ndarray or array.array. + Examples + -------- >>> a = Vectors.sparse(4, [(0, 1), (3, 4)]) >>> b = Vectors.dense([2, 5, 4, 1]) >>> a.squared_distance(b) @@ -926,6 +979,8 @@ def norm(vector, p): def parse(s): """Parse a string representation back into the Vector. + Examples + -------- >>> Vectors.parse('[2,1,2 ]') DenseVector([2.0, 1.0, 2.0]) >>> Vectors.parse(' ( 100, [0], [2])') @@ -1023,6 +1078,8 @@ def __str__(self): """ Pretty printing of a DenseMatrix + Examples + -------- >>> dm = DenseMatrix(2, 2, range(4)) >>> print(dm) DenseMatrix([[ 0., 2.], @@ -1044,6 +1101,8 @@ def __repr__(self): """ Representation of a DenseMatrix + Examples + -------- >>> dm = DenseMatrix(2, 2, range(4)) >>> dm DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False) @@ -1067,6 +1126,8 @@ def toArray(self): """ Return an numpy.ndarray + Examples + -------- >>> m = DenseMatrix(2, 2, range(4)) >>> m.toArray() array([[ 0., 2.], @@ -1098,9 +1159,11 @@ def asML(self): Convert this matrix to the new mllib-local representation. This does NOT copy the data; it copies references. - :return: :py:class:`pyspark.ml.linalg.DenseMatrix` - .. versionadded:: 2.0.0 + + Returns + ------- + :py:class:`pyspark.ml.linalg.DenseMatrix` """ return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed) @@ -1154,6 +1217,8 @@ def __str__(self): """ Pretty printing of a SparseMatrix + Examples + -------- >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4]) >>> print(sm1) 2 X 2 CSCMatrix @@ -1200,6 +1265,8 @@ def __repr__(self): """ Representation of a SparseMatrix + Examples + -------- >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4]) >>> sm1 SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False) @@ -1281,9 +1348,11 @@ def asML(self): Convert this matrix to the new mllib-local representation. This does NOT copy the data; it copies references. - :return: :py:class:`pyspark.ml.linalg.SparseMatrix` - .. versionadded:: 2.0.0 + + Returns + ------- + :py:class:`pyspark.ml.linalg.SparseMatrix` """ return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices, self.values, self.isTransposed) @@ -1314,10 +1383,15 @@ def fromML(mat): Convert a matrix from the new mllib-local representation. This does NOT copy the data; it copies references. - :param mat: a :py:class:`pyspark.ml.linalg.Matrix` - :return: a :py:class:`pyspark.mllib.linalg.Matrix` - .. versionadded:: 2.0.0 + + Parameters + ---------- + mat : :py:class:`pyspark.ml.linalg.Matrix` + + Returns + ------- + :py:class:`pyspark.mllib.linalg.Matrix` """ if isinstance(mat, newlinalg.DenseMatrix): return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 603d31d3d7b26..f0e889b15bf51 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -55,16 +55,22 @@ class RowMatrix(DistributedMatrix): Represents a row-oriented distributed Matrix with no meaningful row indices. - :param rows: An RDD or DataFrame of vectors. If a DataFrame is provided, it must have a single - vector typed column. - :param numRows: Number of rows in the matrix. A non-positive - value means unknown, at which point the number - of rows will be determined by the number of - records in the `rows` RDD. - :param numCols: Number of columns in the matrix. A non-positive - value means unknown, at which point the number - of columns will be determined by the size of - the first row. + + Parameters + ---------- + rows : :py:class:`pyspark.RDD` or :py:class:`pyspark.sql.DataFrame` + An RDD or DataFrame of vectors. If a DataFrame is provided, it must have a single + vector typed column. + numRows : int, optional + Number of rows in the matrix. A non-positive + value means unknown, at which point the number + of rows will be determined by the number of + records in the `rows` RDD. + numCols : int, optional + Number of columns in the matrix. A non-positive + value means unknown, at which point the number + of columns will be determined by the size of + the first row. """ def __init__(self, rows, numRows=0, numCols=0): """ @@ -77,6 +83,8 @@ def __init__(self, rows, numRows=0, numCols=0): object, in which case we can wrap it directly. This assists in clean matrix conversions. + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) >>> mat = RowMatrix(rows) @@ -108,6 +116,8 @@ def rows(self): """ Rows of the RowMatrix stored as an RDD of vectors. + Examples + -------- >>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]])) >>> rows = mat.rows >>> rows.first() @@ -119,6 +129,8 @@ def numRows(self): """ Get or compute the number of rows. + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6], ... [7, 8, 9], [10, 11, 12]]) @@ -136,6 +148,8 @@ def numCols(self): """ Get or compute the number of cols. + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6], ... [7, 8, 9], [10, 11, 12]]) @@ -149,14 +163,19 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") - @since('2.0.0') def computeColumnSummaryStatistics(self): """ Computes column-wise summary statistics. - :return: :class:`MultivariateStatisticalSummary` object - containing column-wise summary statistics. + .. versionadded:: 2.0.0 + + Returns + ------- + :py:class:`MultivariateStatisticalSummary` + object containing column-wise summary statistics. + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) >>> mat = RowMatrix(rows) @@ -167,14 +186,19 @@ def computeColumnSummaryStatistics(self): java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics") return MultivariateStatisticalSummary(java_col_stats) - @since('2.0.0') def computeCovariance(self): """ Computes the covariance matrix, treating each row as an observation. - .. note:: This cannot be computed on matrices with more than 65535 columns. + .. versionadded:: 2.0.0 + + Notes + ----- + This cannot be computed on matrices with more than 65535 columns. + Examples + -------- >>> rows = sc.parallelize([[1, 2], [2, 1]]) >>> mat = RowMatrix(rows) @@ -183,13 +207,18 @@ def computeCovariance(self): """ return self._java_matrix_wrapper.call("computeCovariance") - @since('2.0.0') def computeGramianMatrix(self): """ Computes the Gramian matrix `A^T A`. - .. note:: This cannot be computed on matrices with more than 65535 columns. + .. versionadded:: 2.0.0 + Notes + ----- + This cannot be computed on matrices with more than 65535 columns. + + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) >>> mat = RowMatrix(rows) @@ -220,11 +249,12 @@ def columnSimilarities(self, threshold=0.0): similarity threshold. To describe the guarantee, we set some notation: - * Let A be the smallest in magnitude non-zero element of - this matrix. - * Let B be the largest in magnitude non-zero element of - this matrix. - * Let L be the maximum number of non-zeros per row. + + - Let A be the smallest in magnitude non-zero element of + this matrix. + - Let B be the largest in magnitude non-zero element of + this matrix. + - Let L be the maximum number of non-zeros per row. For example, for {0,1} matrices: A=B=1. Another example, for the Netflix matrix: A=1, B=5 @@ -236,20 +266,31 @@ def columnSimilarities(self, threshold=0.0): The shuffle size is bounded by the *smaller* of the following two expressions: - * O(n log(n) L / (threshold * A)) - * O(m L^2^) + - O(n log(n) L / (threshold * A)) + - O(m L^2^) The latter is the cost of the brute-force approach, so for non-zero thresholds, the cost is always cheaper than the brute-force approach. - :param: threshold: Set to 0 for deterministic guaranteed - correctness. Similarities above this - threshold are estimated with the cost vs - estimate quality trade-off described above. - :return: An n x n sparse upper-triangular CoordinateMatrix of - cosine similarities between columns of this matrix. + .. versionadded:: 2.0.0 + + Parameters + ---------- + threshold : float, optional + Set to 0 for deterministic guaranteed + correctness. Similarities above this + threshold are estimated with the cost vs + estimate quality trade-off described above. + Returns + ------- + :py:class:`CoordinateMatrix` + An n x n sparse upper-triangular CoordinateMatrix of + cosine similarities between columns of this matrix. + + Examples + -------- >>> rows = sc.parallelize([[1, 2], [1, 5]]) >>> mat = RowMatrix(rows) @@ -260,23 +301,32 @@ def columnSimilarities(self, threshold=0.0): java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold)) return CoordinateMatrix(java_sims_mat) - @since('2.0.0') def tallSkinnyQR(self, computeQ=False): """ Compute the QR decomposition of this RowMatrix. The implementation is designed to optimize the QR decomposition - (factorization) for the RowMatrix of a tall and skinny shape. + (factorization) for the RowMatrix of a tall and skinny shape [1]_. - Reference: - Paul G. Constantine, David F. Gleich. "Tall and skinny QR - factorizations in MapReduce architectures" - ([[https://doi.org/10.1145/1996092.1996103]]) + .. [1] Paul G. Constantine, David F. Gleich. "Tall and skinny QR + factorizations in MapReduce architectures" + https://doi.org/10.1145/1996092.1996103 - :param: computeQ: whether to computeQ - :return: QRDecomposition(Q: RowMatrix, R: Matrix), where - Q = None if computeQ = false. + .. versionadded:: 2.0.0 + Parameters + ---------- + computeQ : bool, optional + whether to computeQ + + Returns + ------- + :py:class:`pyspark.mllib.linalg.QRDecomposition` + QRDecomposition(Q: RowMatrix, R: Matrix), where + Q = None if computeQ = false. + + Examples + -------- >>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]]) >>> mat = RowMatrix(rows) >>> decomp = mat.tallSkinnyQR(True) @@ -301,7 +351,6 @@ def tallSkinnyQR(self, computeQ=False): R = decomp.call("R") return QRDecomposition(Q, R) - @since('2.2.0') def computeSVD(self, k, computeU=False, rCond=1e-9): """ Computes the singular value decomposition of the RowMatrix. @@ -309,27 +358,39 @@ def computeSVD(self, k, computeU=False, rCond=1e-9): The given row matrix A of dimension (m X n) is decomposed into U * s * V'T where - * U: (m X k) (left singular vectors) is a RowMatrix whose - columns are the eigenvectors of (A X A') - * s: DenseVector consisting of square root of the eigenvalues - (singular values) in descending order. - * v: (n X k) (right singular vectors) is a Matrix whose columns - are the eigenvectors of (A' X A) + - U: (m X k) (left singular vectors) is a RowMatrix whose + columns are the eigenvectors of (A X A') + - s: DenseVector consisting of square root of the eigenvalues + (singular values) in descending order. + - v: (n X k) (right singular vectors) is a Matrix whose columns + are the eigenvectors of (A' X A) For more specific details on implementation, please refer the Scala documentation. - :param k: Number of leading singular values to keep (`0 < k <= n`). - It might return less than k if there are numerically zero singular values - or there are not enough Ritz values converged before the maximum number of - Arnoldi update iterations is reached (in case that matrix A is ill-conditioned). - :param computeU: Whether or not to compute U. If set to be - True, then U is computed by A * V * s^-1 - :param rCond: Reciprocal condition number. All singular values - smaller than rCond * s[0] are treated as zero - where s[0] is the largest singular value. - :returns: :py:class:`SingularValueDecomposition` - + .. versionadded:: 2.2.0 + + Parameters + ---------- + k : int + Number of leading singular values to keep (`0 < k <= n`). + It might return less than k if there are numerically zero singular values + or there are not enough Ritz values converged before the maximum number of + Arnoldi update iterations is reached (in case that matrix A is ill-conditioned). + computeU : bool, optional + Whether or not to compute U. If set to be + True, then U is computed by A * V * s^-1 + rCond : float, optional + Reciprocal condition number. All singular values + smaller than rCond * s[0] are treated as zero + where s[0] is the largest singular value. + + Returns + ------- + :py:class:`SingularValueDecomposition` + + Examples + -------- >>> rows = sc.parallelize([[3, 1, 1], [-1, 3, 1]]) >>> rm = RowMatrix(rows) @@ -345,16 +406,27 @@ def computeSVD(self, k, computeU=False, rCond=1e-9): "computeSVD", int(k), bool(computeU), float(rCond)) return SingularValueDecomposition(j_model) - @since('2.2.0') def computePrincipalComponents(self, k): """ Computes the k principal components of the given row matrix - .. note:: This cannot be computed on matrices with more than 65535 columns. + .. versionadded:: 2.2.0 + + Notes + ----- + This cannot be computed on matrices with more than 65535 columns. - :param k: Number of principal components to keep. - :returns: :py:class:`pyspark.mllib.linalg.DenseMatrix` + Parameters + ---------- + k : int + Number of principal components to keep. + Returns + ------- + :py:class:`pyspark.mllib.linalg.DenseMatrix` + + Examples + -------- >>> rows = sc.parallelize([[1, 2, 3], [2, 4, 5], [3, 6, 1]]) >>> rm = RowMatrix(rows) @@ -370,15 +442,24 @@ def computePrincipalComponents(self, k): """ return self._java_matrix_wrapper.call("computePrincipalComponents", k) - @since('2.2.0') def multiply(self, matrix): """ Multiply this matrix by a local dense matrix on the right. - :param matrix: a local dense matrix whose number of rows must match the number of columns - of this matrix - :returns: :py:class:`RowMatrix` + .. versionadded:: 2.2.0 + + Parameters + ---------- + matrix : :py:class:`pyspark.mllib.linalg.Matrix` + a local dense matrix whose number of rows must match the number of columns + of this matrix + Returns + ------- + :py:class:`RowMatrix` + + Examples + -------- >>> rm = RowMatrix(sc.parallelize([[0, 1], [2, 3]])) >>> rm.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect() [DenseVector([2.0, 3.0]), DenseVector([6.0, 11.0])] @@ -438,8 +519,12 @@ class IndexedRow(object): Just a wrapper over a (int, vector) tuple. - :param index: The index for the given row. - :param vector: The row in the matrix at the given index. + Parameters + ---------- + index : int + The index for the given row. + vector : :py:class:`pyspark.mllib.linalg.Vector` or convertible + The row in the matrix at the given index. """ def __init__(self, index, vector): self.index = int(index) @@ -462,16 +547,21 @@ class IndexedRowMatrix(DistributedMatrix): """ Represents a row-oriented distributed Matrix with indexed rows. - :param rows: An RDD of IndexedRows or (int, vector) tuples or a DataFrame consisting of a - int typed column of indices and a vector typed column. - :param numRows: Number of rows in the matrix. A non-positive - value means unknown, at which point the number - of rows will be determined by the max row - index plus one. - :param numCols: Number of columns in the matrix. A non-positive - value means unknown, at which point the number - of columns will be determined by the size of - the first row. + Parameters + ---------- + rows : :py:class:`pyspark.RDD` + An RDD of IndexedRows or (int, vector) tuples or a DataFrame consisting of a + int typed column of indices and a vector typed column. + numRows : int, optional + Number of rows in the matrix. A non-positive + value means unknown, at which point the number + of rows will be determined by the max row + index plus one. + numCols : int, optional + Number of columns in the matrix. A non-positive + value means unknown, at which point the number + of columns will be determined by the size of + the first row. """ def __init__(self, rows, numRows=0, numCols=0): """ @@ -484,6 +574,8 @@ def __init__(self, rows, numRows=0, numCols=0): object, in which case we can wrap it directly. This assists in clean matrix conversions. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6])]) >>> mat = IndexedRowMatrix(rows) @@ -524,6 +616,8 @@ def rows(self): """ Rows of the IndexedRowMatrix stored as an RDD of IndexedRows. + Examples + -------- >>> mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6])])) >>> rows = mat.rows @@ -542,6 +636,8 @@ def numRows(self): """ Get or compute the number of rows. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6]), ... IndexedRow(2, [7, 8, 9]), @@ -561,6 +657,8 @@ def numCols(self): """ Get or compute the number of cols. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6]), ... IndexedRow(2, [7, 8, 9]), @@ -580,6 +678,8 @@ def columnSimilarities(self): """ Compute all cosine similarities between columns. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(6, [4, 5, 6])]) >>> mat = IndexedRowMatrix(rows) @@ -590,13 +690,18 @@ def columnSimilarities(self): java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities") return CoordinateMatrix(java_coordinate_matrix) - @since('2.0.0') def computeGramianMatrix(self): """ Computes the Gramian matrix `A^T A`. - .. note:: This cannot be computed on matrices with more than 65535 columns. + .. versionadded:: 2.0.0 + + Notes + ----- + This cannot be computed on matrices with more than 65535 columns. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6])]) >>> mat = IndexedRowMatrix(rows) @@ -610,6 +715,8 @@ def toRowMatrix(self): """ Convert this matrix to a RowMatrix. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(6, [4, 5, 6])]) >>> mat = IndexedRowMatrix(rows).toRowMatrix() @@ -623,6 +730,8 @@ def toCoordinateMatrix(self): """ Convert this matrix to a CoordinateMatrix. + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 0]), ... IndexedRow(6, [0, 5])]) >>> mat = IndexedRowMatrix(rows).toCoordinateMatrix() @@ -636,13 +745,19 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024): """ Convert this matrix to a BlockMatrix. - :param rowsPerBlock: Number of rows that make up each block. - The blocks forming the final rows are not - required to have the given number of rows. - :param colsPerBlock: Number of columns that make up each block. - The blocks forming the final columns are not - required to have the given number of columns. - + Parameters + ---------- + rowsPerBlock : int, optional + Number of rows that make up each block. + The blocks forming the final rows are not + required to have the given number of rows. + colsPerBlock : int, optional + Number of columns that make up each block. + The blocks forming the final columns are not + required to have the given number of columns. + + Examples + -------- >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(6, [4, 5, 6])]) >>> mat = IndexedRowMatrix(rows).toBlockMatrix() @@ -661,7 +776,6 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024): colsPerBlock) return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock) - @since('2.2.0') def computeSVD(self, k, computeU=False, rCond=1e-9): """ Computes the singular value decomposition of the IndexedRowMatrix. @@ -679,17 +793,29 @@ def computeSVD(self, k, computeU=False, rCond=1e-9): For more specific details on implementation, please refer the scala documentation. - :param k: Number of leading singular values to keep (`0 < k <= n`). - It might return less than k if there are numerically zero singular values - or there are not enough Ritz values converged before the maximum number of - Arnoldi update iterations is reached (in case that matrix A is ill-conditioned). - :param computeU: Whether or not to compute U. If set to be - True, then U is computed by A * V * s^-1 - :param rCond: Reciprocal condition number. All singular values - smaller than rCond * s[0] are treated as zero - where s[0] is the largest singular value. - :returns: SingularValueDecomposition object - + .. versionadded:: 2.2.0 + + Parameters + ---------- + k : int + Number of leading singular values to keep (`0 < k <= n`). + It might return less than k if there are numerically zero singular values + or there are not enough Ritz values converged before the maximum number of + Arnoldi update iterations is reached (in case that matrix A is ill-conditioned). + computeU : bool, optional + Whether or not to compute U. If set to be + True, then U is computed by A * V * s^-1 + rCond : float, optional + Reciprocal condition number. All singular values + smaller than rCond * s[0] are treated as zero + where s[0] is the largest singular value. + + Returns + ------- + :py:class:`SingularValueDecomposition` + + Examples + -------- >>> rows = [(0, (3, 1, 1)), (1, (-1, 3, 1))] >>> irm = IndexedRowMatrix(sc.parallelize(rows)) >>> svd_model = irm.computeSVD(2, True) @@ -705,15 +831,24 @@ def computeSVD(self, k, computeU=False, rCond=1e-9): "computeSVD", int(k), bool(computeU), float(rCond)) return SingularValueDecomposition(j_model) - @since('2.2.0') def multiply(self, matrix): """ Multiply this matrix by a local dense matrix on the right. - :param matrix: a local dense matrix whose number of rows must match the number of columns - of this matrix - :returns: :py:class:`IndexedRowMatrix` + .. versionadded:: 2.2.0 + + Parameters + ---------- + matrix : :py:class:`pyspark.mllib.linalg.Matrix` + a local dense matrix whose number of rows must match the number of columns + of this matrix + Returns + ------- + :py:class:`IndexedRowMatrix` + + Examples + -------- >>> mat = IndexedRowMatrix(sc.parallelize([(0, (0, 1)), (1, (2, 3))])) >>> mat.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect() [IndexedRow(0, [2.0,3.0]), IndexedRow(1, [6.0,11.0])] @@ -730,9 +865,14 @@ class MatrixEntry(object): Just a wrapper over a (int, int, float) tuple. - :param i: The row index of the matrix. - :param j: The column index of the matrix. - :param value: The (i, j)th entry of the matrix, as a float. + Parameters + ---------- + i : int + The row index of the matrix. + j : int + The column index of the matrix. + value : float + The (i, j)th entry of the matrix, as a float. """ def __init__(self, i, j, value): self.i = int(i) @@ -756,16 +896,21 @@ class CoordinateMatrix(DistributedMatrix): """ Represents a matrix in coordinate format. - :param entries: An RDD of MatrixEntry inputs or - (int, int, float) tuples. - :param numRows: Number of rows in the matrix. A non-positive - value means unknown, at which point the number - of rows will be determined by the max row - index plus one. - :param numCols: Number of columns in the matrix. A non-positive - value means unknown, at which point the number - of columns will be determined by the max row - index plus one. + Parameters + ---------- + entries : :py:class:`pyspark.RDD` + An RDD of MatrixEntry inputs or + (int, int, float) tuples. + numRows : int, optional + Number of rows in the matrix. A non-positive + value means unknown, at which point the number + of rows will be determined by the max row + index plus one. + numCols : int, optional + Number of columns in the matrix. A non-positive + value means unknown, at which point the number + of columns will be determined by the max row + index plus one. """ def __init__(self, entries, numRows=0, numCols=0): """ @@ -778,6 +923,8 @@ def __init__(self, entries, numRows=0, numCols=0): object, in which case we can wrap it directly. This assists in clean matrix conversions. + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(6, 4, 2.1)]) >>> mat = CoordinateMatrix(entries) @@ -817,6 +964,8 @@ def entries(self): Entries of the CoordinateMatrix stored as an RDD of MatrixEntries. + Examples + -------- >>> mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(6, 4, 2.1)])) >>> entries = mat.entries @@ -835,6 +984,8 @@ def numRows(self): """ Get or compute the number of rows. + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(1, 0, 2), ... MatrixEntry(2, 1, 3.7)]) @@ -853,6 +1004,8 @@ def numCols(self): """ Get or compute the number of cols. + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(1, 0, 2), ... MatrixEntry(2, 1, 3.7)]) @@ -867,11 +1020,14 @@ def numCols(self): """ return self._java_matrix_wrapper.call("numCols") - @since('2.0.0') def transpose(self): """ Transpose this CoordinateMatrix. + .. versionadded:: 2.0.0 + + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(1, 0, 2), ... MatrixEntry(2, 1, 3.7)]) @@ -891,6 +1047,8 @@ def toRowMatrix(self): """ Convert this matrix to a RowMatrix. + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(6, 4, 2.1)]) >>> mat = CoordinateMatrix(entries).toRowMatrix() @@ -915,6 +1073,8 @@ def toIndexedRowMatrix(self): """ Convert this matrix to an IndexedRowMatrix. + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(6, 4, 2.1)]) >>> mat = CoordinateMatrix(entries).toIndexedRowMatrix() @@ -938,13 +1098,19 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024): """ Convert this matrix to a BlockMatrix. - :param rowsPerBlock: Number of rows that make up each block. - The blocks forming the final rows are not - required to have the given number of rows. - :param colsPerBlock: Number of columns that make up each block. - The blocks forming the final columns are not - required to have the given number of columns. - + Parameters + ---------- + rowsPerBlock : int, optional + Number of rows that make up each block. + The blocks forming the final rows are not + required to have the given number of rows. + colsPerBlock : int, optional + Number of columns that make up each block. + The blocks forming the final columns are not + required to have the given number of columns. + + Examples + -------- >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2), ... MatrixEntry(6, 4, 2.1)]) >>> mat = CoordinateMatrix(entries).toBlockMatrix() @@ -983,26 +1149,33 @@ class BlockMatrix(DistributedMatrix): """ Represents a distributed matrix in blocks of local matrices. - :param blocks: An RDD of sub-matrix blocks - ((blockRowIndex, blockColIndex), sub-matrix) that - form this distributed matrix. If multiple blocks - with the same index exist, the results for - operations like add and multiply will be - unpredictable. - :param rowsPerBlock: Number of rows that make up each block. - The blocks forming the final rows are not - required to have the given number of rows. - :param colsPerBlock: Number of columns that make up each block. - The blocks forming the final columns are not - required to have the given number of columns. - :param numRows: Number of rows of this matrix. If the supplied - value is less than or equal to zero, the number - of rows will be calculated when `numRows` is - invoked. - :param numCols: Number of columns of this matrix. If the supplied - value is less than or equal to zero, the number - of columns will be calculated when `numCols` is - invoked. + Parameters + ---------- + blocks : :py:class:`pyspark.RDD` + An RDD of sub-matrix blocks + ((blockRowIndex, blockColIndex), sub-matrix) that + form this distributed matrix. If multiple blocks + with the same index exist, the results for + operations like add and multiply will be + unpredictable. + rowsPerBlock : int + Number of rows that make up each block. + The blocks forming the final rows are not + required to have the given number of rows. + colsPerBlock : int + Number of columns that make up each block. + The blocks forming the final columns are not + required to have the given number of columns. + numRows : int, optional + Number of rows of this matrix. If the supplied + value is less than or equal to zero, the number + of rows will be calculated when `numRows` is + invoked. + numCols : int, optional + Number of columns of this matrix. If the supplied + value is less than or equal to zero, the number + of columns will be calculated when `numCols` is + invoked. """ def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0): """ @@ -1015,6 +1188,8 @@ def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0): object, in which case we can wrap it directly. This assists in clean matrix conversions. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1058,6 +1233,8 @@ def blocks(self): ((blockRowIndex, blockColIndex), sub-matrix) that form this distributed matrix. + Examples + -------- >>> mat = BlockMatrix( ... sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2) @@ -1079,6 +1256,8 @@ def rowsPerBlock(self): """ Number of rows that make up each block. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1092,6 +1271,8 @@ def colsPerBlock(self): """ Number of columns that make up each block. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1105,6 +1286,8 @@ def numRowBlocks(self): """ Number of rows of blocks in the BlockMatrix. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1118,6 +1301,8 @@ def numColBlocks(self): """ Number of columns of blocks in the BlockMatrix. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1130,6 +1315,8 @@ def numRows(self): """ Get or compute the number of rows. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) @@ -1147,6 +1334,8 @@ def numCols(self): """ Get or compute the number of cols. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) @@ -1197,6 +1386,8 @@ def add(self, other): two dense sub matrix blocks are added, the output block will also be a DenseMatrix. + Examples + -------- >>> dm1 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6]) >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]) >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [7, 11, 12]) @@ -1220,7 +1411,6 @@ def add(self, other): java_block_matrix = self._java_matrix_wrapper.call("add", other_java_block_matrix) return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) - @since('2.0.0') def subtract(self, other): """ Subtracts the given block matrix `other` from this block matrix: @@ -1232,6 +1422,10 @@ def subtract(self, other): If two dense sub matrix blocks are subtracted, the output block will also be a DenseMatrix. + .. versionadded:: 2.0.0 + + Examples + -------- >>> dm1 = Matrices.dense(3, 2, [3, 1, 5, 4, 6, 2]) >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]) >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [1, 2, 3]) @@ -1265,6 +1459,8 @@ def multiply(self, other): This may cause some performance issues until support for multiplying two sparse matrices is added. + Examples + -------- >>> dm1 = Matrices.dense(2, 3, [1, 2, 3, 4, 5, 6]) >>> dm2 = Matrices.dense(2, 3, [7, 8, 9, 10, 11, 12]) >>> dm3 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6]) @@ -1290,12 +1486,15 @@ def multiply(self, other): java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix) return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock) - @since('2.0.0') def transpose(self): """ Transpose this BlockMatrix. Returns a new BlockMatrix instance sharing the same underlying data. Is a lazy operation. + .. versionadded:: 2.0.0 + + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2) @@ -1311,6 +1510,8 @@ def toLocalMatrix(self): """ Collect the distributed matrix on the driver as a DenseMatrix. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2).toLocalMatrix() @@ -1333,6 +1534,8 @@ def toIndexedRowMatrix(self): """ Convert this matrix to an IndexedRowMatrix. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) >>> mat = BlockMatrix(blocks, 3, 2).toIndexedRowMatrix() @@ -1356,6 +1559,8 @@ def toCoordinateMatrix(self): """ Convert this matrix to a CoordinateMatrix. + Examples + -------- >>> blocks = sc.parallelize([((0, 0), Matrices.dense(1, 2, [1, 2])), ... ((1, 0), Matrices.dense(1, 2, [7, 8]))]) >>> mat = BlockMatrix(blocks, 1, 2).toCoordinateMatrix() diff --git a/python/pyspark/mllib/linalg/distributed.pyi b/python/pyspark/mllib/linalg/distributed.pyi index 238c4ea32e4e8..7ec2d60c5a947 100644 --- a/python/pyspark/mllib/linalg/distributed.pyi +++ b/python/pyspark/mllib/linalg/distributed.pyi @@ -22,6 +22,7 @@ from pyspark.storagelevel import StorageLevel from pyspark.mllib.common import JavaModelWrapper from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition from pyspark.mllib.stat import MultivariateStatisticalSummary +import pyspark.sql.dataframe from numpy import ndarray # noqa: F401 VectorLike = Union[Vector, Sequence[Union[float, int]]] @@ -35,7 +36,10 @@ class DistributedMatrix: class RowMatrix(DistributedMatrix): def __init__( - self, rows: RDD[Vector], numRows: int = ..., numCols: int = ... + self, + rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame], + numRows: int = ..., + numCols: int = ..., ) -> None: ... @property def rows(self) -> RDD[Vector]: ... diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py index 6106c58584882..a33dfe26fbad9 100644 --- a/python/pyspark/mllib/random.py +++ b/python/pyspark/mllib/random.py @@ -22,7 +22,6 @@ import sys from functools import wraps -from pyspark import since from pyspark.mllib.common import callMLlibFunc @@ -46,7 +45,6 @@ class RandomRDDs(object): """ @staticmethod - @since("1.1.0") def uniformRDD(sc, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the @@ -56,12 +54,26 @@ def uniformRDD(sc, size, numPartitions=None, seed=None): to U(a, b), use ``RandomRDDs.uniformRDD(sc, n, p, seed).map(lambda v: a + (b - a) * v)`` - :param sc: SparkContext used to create the RDD. - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`. - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + used to create the RDD. + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`. + + Examples + -------- >>> x = RandomRDDs.uniformRDD(sc, 100).collect() >>> len(x) 100 @@ -76,7 +88,6 @@ def uniformRDD(sc, size, numPartitions=None, seed=None): return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed) @staticmethod - @since("1.1.0") def normalRDD(sc, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the standard normal @@ -86,12 +97,26 @@ def normalRDD(sc, size, numPartitions=None, seed=None): to some other normal N(mean, sigma^2), use ``RandomRDDs.normal(sc, n, p, seed).map(lambda v: mean + sigma * v)`` - :param sc: SparkContext used to create the RDD. - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0). - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + used to create the RDD. + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0). + + Examples + -------- >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1) >>> stats = x.stats() >>> stats.count() @@ -104,20 +129,34 @@ def normalRDD(sc, size, numPartitions=None, seed=None): return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed) @staticmethod - @since("1.3.0") def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the log normal distribution with the input mean and standard distribution. - :param sc: SparkContext used to create the RDD. - :param mean: mean for the log Normal distribution - :param std: std for the log Normal distribution - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ log N(mean, std). - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + used to create the RDD. + mean : float + mean for the log Normal distribution + std : float + std for the log Normal distribution + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + RDD of float comprised of i.i.d. samples ~ log N(mean, std). + + Examples + -------- >>> from math import sqrt, exp >>> mean = 0.0 >>> std = 1.0 @@ -137,19 +176,33 @@ def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None): size, numPartitions, seed) @staticmethod - @since("1.1.0") def poissonRDD(sc, mean, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean. - :param sc: SparkContext used to create the RDD. - :param mean: Mean, or lambda, for the Poisson distribution. - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ Pois(mean). - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + mean : float + Mean, or lambda, for the Poisson distribution. + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of float comprised of i.i.d. samples ~ Pois(mean). + + Examples + -------- >>> mean = 100.0 >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2) >>> stats = x.stats() @@ -164,19 +217,33 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None): return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed) @staticmethod - @since("1.3.0") def exponentialRDD(sc, mean, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the Exponential distribution with the input mean. - :param sc: SparkContext used to create the RDD. - :param mean: Mean, or 1 / lambda, for the Exponential distribution. - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ Exp(mean). - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + mean : float + Mean, or 1 / lambda, for the Exponential distribution. + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of float comprised of i.i.d. samples ~ Exp(mean). + + Examples + -------- >>> mean = 2.0 >>> x = RandomRDDs.exponentialRDD(sc, mean, 1000, seed=2) >>> stats = x.stats() @@ -191,20 +258,35 @@ def exponentialRDD(sc, mean, size, numPartitions=None, seed=None): return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed) @staticmethod - @since("1.3.0") def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None): """ Generates an RDD comprised of i.i.d. samples from the Gamma distribution with the input shape and scale. - :param sc: SparkContext used to create the RDD. - :param shape: shape (> 0) parameter for the Gamma distribution - :param scale: scale (> 0) parameter for the Gamma distribution - :param size: Size of the RDD. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale). - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + shape : float + shape (> 0) parameter for the Gamma distribution + scale : float + scale (> 0) parameter for the Gamma distribution + size : int + Size of the RDD. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale). + + Examples + -------- >>> from math import sqrt >>> shape = 1.0 >>> scale = 2.0 @@ -224,19 +306,33 @@ def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None): @staticmethod @toArray - @since("1.1.0") def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the uniform distribution U(0.0, 1.0). - :param sc: SparkContext used to create the RDD. - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD. - :param seed: Seed for the RNG that generates the seed for the generator in each partition. - :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`. - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + numRows : int + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD. + seed : int, optional + Seed for the RNG that generates the seed for the generator in each partition. + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`. + + Examples + -------- >>> import numpy as np >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect()) >>> mat.shape @@ -250,19 +346,33 @@ def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None): @staticmethod @toArray - @since("1.1.0") def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the standard normal distribution. - :param sc: SparkContext used to create the RDD. - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`. - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + numRows : int + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`. + + Examples + -------- >>> import numpy as np >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1).collect()) >>> mat.shape @@ -276,21 +386,37 @@ def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None): @staticmethod @toArray - @since("1.3.0") def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the log normal distribution. - :param sc: SparkContext used to create the RDD. - :param mean: Mean of the log normal distribution - :param std: Standard Deviation of the log normal distribution - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`. - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + mean : float + Mean of the log normal distribution + std : float + Standard Deviation of the log normal distribution + numRows : int + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`. + + Examples + -------- >>> import numpy as np >>> from math import sqrt, exp >>> mean = 0.0 @@ -311,20 +437,35 @@ def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed @staticmethod @toArray - @since("1.1.0") def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Poisson distribution with the input mean. - :param sc: SparkContext used to create the RDD. - :param mean: Mean, or lambda, for the Poisson distribution. - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`) - :param seed: Random seed (default: a random long integer). - :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean). - + .. versionadded:: 1.1.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + mean : float + Mean, or lambda, for the Poisson distribution. + numRows : float + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`) + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean). + + Examples + -------- >>> import numpy as np >>> mean = 100.0 >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1) @@ -342,20 +483,35 @@ def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None): @staticmethod @toArray - @since("1.3.0") def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Exponential distribution with the input mean. - :param sc: SparkContext used to create the RDD. - :param mean: Mean, or 1 / lambda, for the Exponential distribution. - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`) - :param seed: Random seed (default: a random long integer). - :return: RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean). - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + mean : float + Mean, or 1 / lambda, for the Exponential distribution. + numRows : int + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`) + seed : int, optional + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean). + + Examples + -------- >>> import numpy as np >>> mean = 0.5 >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1) @@ -373,21 +529,37 @@ def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=No @staticmethod @toArray - @since("1.3.0") def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=None): """ Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Gamma distribution. - :param sc: SparkContext used to create the RDD. - :param shape: Shape (> 0) of the Gamma distribution - :param scale: Scale (> 0) of the Gamma distribution - :param numRows: Number of Vectors in the RDD. - :param numCols: Number of elements in each Vector. - :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`). - :param seed: Random seed (default: a random long integer). - :return: RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale). - + .. versionadded:: 1.3.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + SparkContext used to create the RDD. + shape : float + Shape (> 0) of the Gamma distribution + scale : float + Scale (> 0) of the Gamma distribution + numRows : int + Number of Vectors in the RDD. + numCols : int + Number of elements in each Vector. + numPartitions : int, optional + Number of partitions in the RDD (default: `sc.defaultParallelism`). + seed : int, optional, + Random seed (default: a random long integer). + + Returns + ------- + :py:class:`pyspark.RDD` + RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale). + + Examples + -------- >>> import numpy as np >>> from math import sqrt >>> shape = 1.0 diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 3dd7cb200c280..7a5fb6e6eea9e 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -32,13 +32,15 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])): """ Represents a (user, product, rating) tuple. + .. versionadded:: 1.2.0 + + Examples + -------- >>> r = Rating(1, 2, 5.0) >>> (r.user, r.product, r.rating) (1, 2, 5.0) >>> (r[0], r[1], r[2]) (1, 2, 5.0) - - .. versionadded:: 1.2.0 """ def __reduce__(self): @@ -51,6 +53,10 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): """A matrix factorisation model trained by regularized alternating least-squares. + .. versionadded:: 0.9.0 + + Examples + -------- >>> r1 = (1, 1, 1.0) >>> r2 = (1, 2, 2.0) >>> r3 = (2, 1, 2.0) @@ -126,8 +132,6 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 0.9.0 """ @since("0.9.0") def predict(self, user, product): @@ -237,7 +241,6 @@ def _prepare(cls, ratings): return ratings @classmethod - @since("0.9.0") def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False, seed=None): """ @@ -247,35 +250,38 @@ def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative features). To solve for these features, ALS is run iteratively with a configurable level of parallelism. - :param ratings: - RDD of `Rating` or (userID, productID, rating) tuple. - :param rank: - Number of features to use (also referred to as the number of latent factors). - :param iterations: - Number of iterations of ALS. - (default: 5) - :param lambda_: - Regularization parameter. - (default: 0.01) - :param blocks: - Number of blocks used to parallelize the computation. A value - of -1 will use an auto-configured number of blocks. - (default: -1) - :param nonnegative: - A value of True will solve least-squares with nonnegativity - constraints. - (default: False) - :param seed: - Random seed for initial matrix factorization model. A value - of None will use system time as the seed. - (default: None) + .. versionadded:: 0.9.0 + + Parameters + ---------- + ratings : :py:class:`pyspark.RDD` + RDD of `Rating` or (userID, productID, rating) tuple. + rank : int + Number of features to use (also referred to as the number of latent factors). + iterations : int, optional + Number of iterations of ALS. + (default: 5) + lambda\\_ : float, optional + Regularization parameter. + (default: 0.01) + blocks : int, optional + Number of blocks used to parallelize the computation. A value + of -1 will use an auto-configured number of blocks. + (default: -1) + nonnegative : bool, optional + A value of True will solve least-squares with nonnegativity + constraints. + (default: False) + seed : bool, optional + Random seed for initial matrix factorization model. A value + of None will use system time as the seed. + (default: None) """ model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations, lambda_, blocks, nonnegative, seed) return MatrixFactorizationModel(model) @classmethod - @since("0.9.0") def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01, nonnegative=False, seed=None): """ @@ -285,31 +291,35 @@ def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alp given rank (number of features). To solve for these features, ALS is run iteratively with a configurable level of parallelism. - :param ratings: - RDD of `Rating` or (userID, productID, rating) tuple. - :param rank: - Number of features to use (also referred to as the number of latent factors). - :param iterations: - Number of iterations of ALS. - (default: 5) - :param lambda_: - Regularization parameter. - (default: 0.01) - :param blocks: - Number of blocks used to parallelize the computation. A value - of -1 will use an auto-configured number of blocks. - (default: -1) - :param alpha: - A constant used in computing confidence. - (default: 0.01) - :param nonnegative: - A value of True will solve least-squares with nonnegativity - constraints. - (default: False) - :param seed: - Random seed for initial matrix factorization model. A value - of None will use system time as the seed. - (default: None) + .. versionadded:: 0.9.0 + + Parameters + ---------- + ratings : :py:class:`pyspark.RDD` + RDD of `Rating` or (userID, productID, rating) tuple. + rank : int + Number of features to use (also referred to as the number of latent factors). + iterations : int, optional + Number of iterations of ALS. + (default: 5) + lambda\\_ : float, optional + Regularization parameter. + (default: 0.01) + blocks : int, optional + Number of blocks used to parallelize the computation. A value + of -1 will use an auto-configured number of blocks. + (default: -1) + alpha : float, optional + A constant used in computing confidence. + (default: 0.01) + nonnegative : bool, optional + A value of True will solve least-squares with nonnegativity + constraints. + (default: False) + seed : int, optional + Random seed for initial matrix factorization model. A value + of None will use system time as the seed. + (default: None) """ model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank, iterations, lambda_, blocks, alpha, nonnegative, seed) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 77bca86ac1b27..e549b0ac43721 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -39,15 +39,19 @@ class LabeledPoint(object): """ Class that represents the features and labels of a data point. - :param label: - Label for this data point. - :param features: - Vector of features for this point (NumPy array, list, - pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix). - - .. note:: 'label' and 'features' are accessible as class attributes. - .. versionadded:: 1.0.0 + + Parameters + ---------- + label : int + Label for this data point. + features : :py:class:`pyspark.mllib.linalg.Vector` or convertible + Vector of features for this point (NumPy array, list, + pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix). + + Notes + ----- + 'label' and 'features' are accessible as class attributes. """ def __init__(self, label, features): @@ -69,12 +73,14 @@ class LinearModel(object): """ A linear model that has a vector of coefficients and an intercept. - :param weights: - Weights computed for every feature. - :param intercept: - Intercept computed for this model. - .. versionadded:: 0.9.0 + + Parameters + ---------- + weights : :py:class:`pyspark.mllib.linalg.Vector` + Weights computed for every feature. + intercept : float + Intercept computed for this model. """ def __init__(self, weights, intercept): @@ -102,14 +108,16 @@ class LinearRegressionModelBase(LinearModel): """A linear regression model. + .. versionadded:: 0.9.0 + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1) >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6 True >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6 True - - .. versionadded:: 0.9.0 """ @since("0.9.0") @@ -129,6 +137,10 @@ class LinearRegressionModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit. + .. versionadded:: 0.9.0 + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ @@ -181,8 +193,6 @@ class LinearRegressionModel(LinearRegressionModelBase): True >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5 True - - .. versionadded:: 0.9.0 """ @since("1.4.0") def save(self, sc, path): @@ -224,11 +234,13 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): class LinearRegressionWithSGD(object): """ + Train a linear regression model with no regularization using Stochastic Gradient Descent. + .. versionadded:: 0.9.0 - .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression. + .. deprecated:: 2.0.0 + Use :py:class:`pyspark.ml.regression.LinearRegression`. """ @classmethod - @since("0.9.0") def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=0.0, regType=None, intercept=False, validateData=True, convergenceTol=0.001): @@ -244,42 +256,47 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param step: - The step parameter used in SGD. - (default: 1.0) - :param miniBatchFraction: - Fraction of data to be used for each SGD iteration. - (default: 1.0) - :param initialWeights: - The initial weights. - (default: None) - :param regParam: - The regularizer parameter. - (default: 0.0) - :param regType: - The type of regularizer used for training our model. - Supported values: + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of LabeledPoint. + iterations : int, optional + The number of iterations. + (default: 100) + step : float, optional + The step parameter used in SGD. + (default: 1.0) + miniBatchFraction : float, optional + Fraction of data to be used for each SGD iteration. + (default: 1.0) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + regParam : float, optional + The regularizer parameter. + (default: 0.0) + regType : str, optional + The type of regularizer used for training our model. + Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default) - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e., whether bias - features are activated or not). - (default: False) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param convergenceTol: - A condition which decides iteration termination. - (default: 0.001) + + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e., whether bias + features are activated or not). + (default: False) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + convergenceTol : float, optional + A condition which decides iteration termination. + (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning) @@ -299,6 +316,10 @@ class LassoModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit with an l_1 penalty term. + .. versionadded:: 0.9.0 + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ @@ -351,8 +372,6 @@ class LassoModel(LinearRegressionModelBase): True >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5 True - - .. versionadded:: 0.9.0 """ @since("1.4.0") def save(self, sc, path): @@ -375,12 +394,14 @@ def load(cls, sc, path): class LassoWithSGD(object): """ + Train a regression model with L1-regularization using Stochastic Gradient Descent. + .. versionadded:: 0.9.0 - .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. - Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression. + .. deprecated:: 2.0.0 + Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 1.0. + Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression. """ @classmethod - @since("0.9.0") def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, intercept=False, validateData=True, convergenceTol=0.001): @@ -395,35 +416,39 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param step: - The step parameter used in SGD. - (default: 1.0) - :param regParam: - The regularizer parameter. - (default: 0.01) - :param miniBatchFraction: - Fraction of data to be used for each SGD iteration. - (default: 1.0) - :param initialWeights: - The initial weights. - (default: None) - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e. whether bias - features are activated or not). - (default: False) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param convergenceTol: - A condition which decides iteration termination. - (default: 0.001) + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of LabeledPoint. + iterations : int, optional + The number of iterations. + (default: 100) + step : float, optional + The step parameter used in SGD. + (default: 1.0) + regParam : float, optional + The regularizer parameter. + (default: 0.01) + miniBatchFraction : float, optional + Fraction of data to be used for each SGD iteration. + (default: 1.0) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e. whether bias + features are activated or not). + (default: False) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + convergenceTol : float, optional + A condition which decides iteration termination. + (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. " @@ -444,6 +469,10 @@ class RidgeRegressionModel(LinearRegressionModelBase): """A linear regression model derived from a least-squares fit with an l_2 penalty term. + .. versionadded:: 0.9.0 + + Examples + -------- >>> from pyspark.mllib.linalg import SparseVector >>> from pyspark.mllib.regression import LabeledPoint >>> data = [ @@ -496,8 +525,6 @@ class RidgeRegressionModel(LinearRegressionModelBase): True >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5 True - - .. versionadded:: 0.9.0 """ @since("1.4.0") def save(self, sc, path): @@ -520,13 +547,15 @@ def load(cls, sc, path): class RidgeRegressionWithSGD(object): """ + Train a regression model with L2-regularization using Stochastic Gradient Descent. + .. versionadded:: 0.9.0 - .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. - Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for - LinearRegression. + .. deprecated:: 2.0.0 + Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 0.0. + Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for + LinearRegression. """ @classmethod - @since("0.9.0") def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, intercept=False, validateData=True, convergenceTol=0.001): @@ -541,35 +570,39 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: - The training data, an RDD of LabeledPoint. - :param iterations: - The number of iterations. - (default: 100) - :param step: - The step parameter used in SGD. - (default: 1.0) - :param regParam: - The regularizer parameter. - (default: 0.01) - :param miniBatchFraction: - Fraction of data to be used for each SGD iteration. - (default: 1.0) - :param initialWeights: - The initial weights. - (default: None) - :param intercept: - Boolean parameter which indicates the use or not of the - augmented representation for training data (i.e. whether bias - features are activated or not). - (default: False) - :param validateData: - Boolean parameter which indicates if the algorithm should - validate data before training. - (default: True) - :param convergenceTol: - A condition which decides iteration termination. - (default: 0.001) + .. versionadded:: 0.9.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + The training data, an RDD of LabeledPoint. + iterations : int, optional + The number of iterations. + (default: 100) + step : float, optional + The step parameter used in SGD. + (default: 1.0) + regParam : float, optional + The regularizer parameter. + (default: 0.01) + miniBatchFraction : float, optional + Fraction of data to be used for each SGD iteration. + (default: 1.0) + initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional + The initial weights. + (default: None) + intercept : bool, optional + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e. whether bias + features are activated or not). + (default: False) + validateData : bool, optional + Boolean parameter which indicates if the algorithm should + validate data before training. + (default: True) + convergenceTol : float, optional + A condition which decides iteration termination. + (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. " @@ -589,15 +622,21 @@ class IsotonicRegressionModel(Saveable, Loader): """ Regression model for isotonic regression. - :param boundaries: - Array of boundaries for which predictions are known. Boundaries - must be sorted in increasing order. - :param predictions: - Array of predictions associated to the boundaries at the same - index. Results of isotonic regression and therefore monotone. - :param isotonic: - Indicates whether this is isotonic or antitonic. + .. versionadded:: 1.4.0 + Parameters + ---------- + boundaries : ndarray + Array of boundaries for which predictions are known. Boundaries + must be sorted in increasing order. + predictions : ndarray + Array of predictions associated to the boundaries at the same + index. Results of isotonic regression and therefore monotone. + isotonic : true + Indicates whether this is isotonic or antitonic. + + Examples + -------- >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)] >>> irm = IsotonicRegression.train(sc.parallelize(data)) >>> irm.predict(3) @@ -619,8 +658,6 @@ class IsotonicRegressionModel(Saveable, Loader): ... rmtree(path) ... except OSError: ... pass - - .. versionadded:: 1.4.0 """ def __init__(self, boundaries, predictions, isotonic): @@ -628,7 +665,6 @@ def __init__(self, boundaries, predictions, isotonic): self.predictions = predictions self.isotonic = isotonic - @since("1.4.0") def predict(self, x): """ Predict labels for provided features. @@ -647,8 +683,13 @@ def predict(self, x): values with the same boundary then the same rules as in 2) are used. - :param x: - Feature or RDD of Features to be labeled. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Feature or RDD of Features to be labeled. """ if isinstance(x, RDD): return x.map(lambda v: self.predict(v)) @@ -680,35 +721,42 @@ class IsotonicRegression(object): Currently implemented using parallelized pool adjacent violators algorithm. Only univariate (single feature) algorithm supported. - Sequential PAV implementation based on: + .. versionadded:: 1.4.0 + + Notes + ----- + Sequential PAV implementation based on + Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani (2011) [1]_ - Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani. - "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61. - Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf + Sequential PAV parallelization based on + Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset (1996) [2]_ - Sequential PAV parallelization based on: + See also + `Isotonic regression (Wikipedia) `_. - Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset. + .. [1] Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani. + "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61. + Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf + .. [2] Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset "An approach to parallelizing isotonic regression." Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147. Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf - - See `Isotonic regression (Wikipedia) `_. - - .. versionadded:: 1.4.0 """ @classmethod - @since("1.4.0") def train(cls, data, isotonic=True): """ Train an isotonic regression model on the given data. - :param data: - RDD of (label, feature, weight) tuples. - :param isotonic: - Whether this is isotonic (which is default) or antitonic. - (default: True) + .. versionadded:: 1.4.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + RDD of (label, feature, weight) tuples. + isotonic : bool, optional + Whether this is isotonic (which is default) or antitonic. + (default: True) """ boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel", data.map(_convert_to_vector), bool(isotonic)) @@ -741,26 +789,32 @@ def _validate(self, dstream): raise ValueError( "Model must be intialized using setInitialWeights") - @since("1.5.0") def predictOn(self, dstream): """ Use the model to make predictions on batches of data from a DStream. - :return: - DStream containing predictions. + .. versionadded:: 1.5.0 + + Returns + ------- + :py:class:`pyspark.streaming.DStream` + DStream containing predictions. """ self._validate(dstream) return dstream.map(lambda x: self._model.predict(x)) - @since("1.5.0") def predictOnValues(self, dstream): """ Use the model to make predictions on the values of a DStream and carry over its keys. - :return: - DStream containing the input keys and the predictions as values. + .. versionadded:: 1.5.0 + + Returns + ------- + :py:class:`pyspark.streaming.DStream` + DStream containing predictions. """ self._validate(dstream) return dstream.mapValues(lambda x: self._model.predict(x)) @@ -779,20 +833,22 @@ class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm): of features must be constant. An initial weight vector must be provided. - :param stepSize: - Step size for each iteration of gradient descent. - (default: 0.1) - :param numIterations: - Number of iterations run for each batch of data. - (default: 50) - :param miniBatchFraction: - Fraction of each batch of data to use for updates. - (default: 1.0) - :param convergenceTol: - Value used to determine when to terminate iterations. - (default: 0.001) - .. versionadded:: 1.5.0 + + Parameters + ---------- + stepSize : float, optional + Step size for each iteration of gradient descent. + (default: 0.1) + numIterations : int, optional + Number of iterations run for each batch of data. + (default: 50) + miniBatchFraction : float, optional + Fraction of each batch of data to use for updates. + (default: 1.0) + convergenceTol : float, optional + Value used to determine when to terminate iterations. + (default: 0.001) """ def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, convergenceTol=0.001): self.stepSize = stepSize diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py index 56444c152f0ba..1d4d43e53519c 100644 --- a/python/pyspark/mllib/stat/KernelDensity.py +++ b/python/pyspark/mllib/stat/KernelDensity.py @@ -26,6 +26,8 @@ class KernelDensity(object): Estimate probability density at required points given an RDD of samples from the population. + Examples + -------- >>> kd = KernelDensity() >>> sample = sc.parallelize([0.0, 1.0]) >>> kd.setSample(sample) diff --git a/python/pyspark/mllib/stat/__init__.py b/python/pyspark/mllib/stat/__init__.py index 0fb33061838af..d3b4ddf7e4c68 100644 --- a/python/pyspark/mllib/stat/__init__.py +++ b/python/pyspark/mllib/stat/__init__.py @@ -21,8 +21,9 @@ from pyspark.mllib.stat._statistics import Statistics, MultivariateStatisticalSummary from pyspark.mllib.stat.distribution import MultivariateGaussian -from pyspark.mllib.stat.test import ChiSqTestResult +from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult from pyspark.mllib.stat.KernelDensity import KernelDensity -__all__ = ["Statistics", "MultivariateStatisticalSummary", "ChiSqTestResult", +__all__ = ["Statistics", "MultivariateStatisticalSummary", + "ChiSqTestResult", "KolmogorovSmirnovTestResult", "MultivariateGaussian", "KernelDensity"] diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 43454ba5187dd..a4b45cf55febe 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -65,11 +65,19 @@ def colStats(rdd): """ Computes column-wise summary statistics for the input RDD[Vector]. - :param rdd: an RDD[Vector] for which column-wise summary statistics - are to be computed. - :return: :class:`MultivariateStatisticalSummary` object containing - column-wise summary statistics. - + Parameters + ---------- + rdd : :py:class:`pyspark.RDD` + an RDD[Vector] for which column-wise summary statistics + are to be computed. + + Returns + ------- + :class:`MultivariateStatisticalSummary` + object containing column-wise summary statistics. + + Examples + -------- >>> from pyspark.mllib.linalg import Vectors >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]), ... Vectors.dense([4, 5, 0, 3]), @@ -103,13 +111,24 @@ def corr(x, y=None, method=None): to specify the method to be used for single RDD inout. If two RDDs of floats are passed in, a single float is returned. - :param x: an RDD of vector for which the correlation matrix is to be computed, - or an RDD of float of the same cardinality as y when y is specified. - :param y: an RDD of float of the same cardinality as x. - :param method: String specifying the method to use for computing correlation. - Supported: `pearson` (default), `spearman` - :return: Correlation matrix comparing columns in x. - + Parameters + ---------- + x : :py:class:`pyspark.RDD` + an RDD of vector for which the correlation matrix is to be computed, + or an RDD of float of the same cardinality as y when y is specified. + y : :py:class:`pyspark.RDD`, optional + an RDD of float of the same cardinality as x. + method : str, optional + String specifying the method to use for computing correlation. + Supported: `pearson` (default), `spearman` + + Returns + ------- + :py:class:`pyspark.mllib.linalg.Matrix` + Correlation matrix comparing columns in x. + + Examples + -------- >>> x = sc.parallelize([1.0, 0.0, -2.0], 2) >>> y = sc.parallelize([4.0, 5.0, 3.0], 2) >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2) @@ -172,20 +191,33 @@ def chiSqTest(observed, expected=None): contingency matrix for which the chi-squared statistic is computed. All label and feature values must be categorical. - .. note:: `observed` cannot contain negative values - - :param observed: it could be a vector containing the observed categorical - counts/relative frequencies, or the contingency matrix - (containing either counts or relative frequencies), - or an RDD of LabeledPoint containing the labeled dataset - with categorical features. Real-valued features will be - treated as categorical for each distinct value. - :param expected: Vector containing the expected categorical counts/relative - frequencies. `expected` is rescaled if the `expected` sum - differs from the `observed` sum. - :return: ChiSquaredTest object containing the test statistic, degrees - of freedom, p-value, the method used, and the null hypothesis. - + Parameters + ---------- + observed : :py:class:`pyspark.mllib.linalg.Vector` or \ + :py:class:`pyspark.mllib.linalg.Matrix` + it could be a vector containing the observed categorical + counts/relative frequencies, or the contingency matrix + (containing either counts or relative frequencies), + or an RDD of LabeledPoint containing the labeled dataset + with categorical features. Real-valued features will be + treated as categorical for each distinct value. + expected : :py:class:`pyspark.mllib.linalg.Vector` + Vector containing the expected categorical counts/relative + frequencies. `expected` is rescaled if the `expected` sum + differs from the `observed` sum. + + Returns + ------- + :py:class:`pyspark.mllib.stat.ChiSqTestResult` + object containing the test statistic, degrees + of freedom, p-value, the method used, and the null hypothesis. + + Notes + ----- + `observed` cannot contain negative values + + Examples + -------- >>> from pyspark.mllib.linalg import Vectors, Matrices >>> observed = Vectors.dense([4, 6, 5]) >>> pearson = Statistics.chiSqTest(observed) @@ -259,17 +291,28 @@ def kolmogorovSmirnovTest(data, distName="norm", *params): For specific details of the implementation, please have a look at the Scala documentation. - :param data: RDD, samples from the data - :param distName: string, currently only "norm" is supported. - (Normal distribution) to calculate the - theoretical distribution of the data. - :param params: additional values which need to be provided for - a certain distribution. - If not provided, the default values are used. - :return: KolmogorovSmirnovTestResult object containing the test - statistic, degrees of freedom, p-value, - the method used, and the null hypothesis. + Parameters + ---------- + data : :py:class:`pyspark.RDD` + RDD, samples from the data + distName : str, optional + string, currently only "norm" is supported. + (Normal distribution) to calculate the + theoretical distribution of the data. + params + additional values which need to be provided for + a certain distribution. + If not provided, the default values are used. + + Returns + ------- + :py:class:`pyspark.mllib.stat.KolmogorovSmirnovTestResult` + object containing the test statistic, degrees of freedom, p-value, + the method used, and the null hypothesis. + + Examples + -------- >>> kstest = Statistics.kolmogorovSmirnovTest >>> data = sc.parallelize([-1.0, 0.0, 1.0]) >>> ksmodel = kstest(data, "norm") diff --git a/python/pyspark/mllib/stat/distribution.py b/python/pyspark/mllib/stat/distribution.py index 46f7a1d2f277a..aa35ac6dfdae1 100644 --- a/python/pyspark/mllib/stat/distribution.py +++ b/python/pyspark/mllib/stat/distribution.py @@ -24,6 +24,8 @@ class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])): """Represents a (mu, sigma) tuple + Examples + -------- >>> m = MultivariateGaussian(Vectors.dense([11,12]),DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0))) >>> (m.mu, m.sigma.toArray()) (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]])) diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index e05dfdb953ceb..493dcf8db6fd2 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -33,15 +33,18 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable): .. versionadded:: 1.3.0 """ - @since("1.3.0") def predict(self, x): """ Predict values for a single data point or an RDD of points using the model trained. - .. note:: In Python, predict cannot currently be used within an RDD - transformation or action. - Call predict directly on the RDD instead. + .. versionadded:: 1.3.0 + + Notes + ----- + In Python, predict cannot currently be used within an RDD + transformation or action. + Call predict directly on the RDD instead. """ if isinstance(x, RDD): return self.call("predict", x.map(_convert_to_vector)) @@ -79,18 +82,23 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader): .. versionadded:: 1.1.0 """ - @since("1.1.0") def predict(self, x): """ Predict the label of one or more examples. - .. note:: In Python, predict cannot currently be used within an RDD - transformation or action. - Call predict directly on the RDD instead. + .. versionadded:: 1.1.0 + + Parameters + ---------- + x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD` + Data point (feature vector), or an RDD of data points (feature + vectors). - :param x: - Data point (feature vector), or an RDD of data points (feature - vectors). + Notes + ----- + In Python, predict cannot currently be used within an RDD + transformation or action. + Call predict directly on the RDD instead. """ if isinstance(x, RDD): return self.call("predict", x.map(_convert_to_vector)) @@ -143,45 +151,50 @@ def _train(cls, data, type, numClasses, features, impurity="gini", maxDepth=5, m return DecisionTreeModel(model) @classmethod - @since("1.1.0") def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0): """ Train a decision tree model for classification. - :param data: - Training data: RDD of LabeledPoint. Labels should take values - {0, 1, ..., numClasses-1}. - :param numClasses: - Number of classes for classification. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param impurity: - Criterion used for information gain calculation. - Supported values: "gini" or "entropy". - (default: "gini") - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 5) - :param maxBins: - Number of bins used for finding splits at each node. - (default: 32) - :param minInstancesPerNode: - Minimum number of instances required at child nodes to create - the parent split. - (default: 1) - :param minInfoGain: - Minimum info gain required to create a split. - (default: 0.0) - :return: - DecisionTreeModel. - - Example usage: - + .. versionadded:: 1.1.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + Training data: RDD of LabeledPoint. Labels should take values + {0, 1, ..., numClasses-1}. + numClasses : int + Number of classes for classification. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + impurity : str, optional + Criterion used for information gain calculation. + Supported values: "gini" or "entropy". + (default: "gini") + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 5) + maxBins : int, optional + Number of bins used for finding splits at each node. + (default: 32) + minInstancesPerNode : int, optional + Minimum number of instances required at child nodes to create + the parent split. + (default: 1) + minInfoGain : float, optional + Minimum info gain required to create a split. + (default: 0.0) + + Returns + ------- + :py:class:`DecisionTreeModel` + + Examples + -------- >>> from numpy import array >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import DecisionTree @@ -222,35 +235,39 @@ def trainRegressor(cls, data, categoricalFeaturesInfo, """ Train a decision tree model for regression. - :param data: - Training data: RDD of LabeledPoint. Labels are real numbers. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param impurity: - Criterion used for information gain calculation. - The only supported value for regression is "variance". - (default: "variance") - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 5) - :param maxBins: - Number of bins used for finding splits at each node. - (default: 32) - :param minInstancesPerNode: - Minimum number of instances required at child nodes to create - the parent split. - (default: 1) - :param minInfoGain: - Minimum info gain required to create a split. - (default: 0.0) - :return: - DecisionTreeModel. - - Example usage: - + Parameters + ---------- + data : :py:class:`pyspark.RDD` + Training data: RDD of LabeledPoint. Labels are real numbers. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + impurity : str, optional + Criterion used for information gain calculation. + The only supported value for regression is "variance". + (default: "variance") + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 5) + maxBins : int, optional + Number of bins used for finding splits at each node. + (default: 32) + minInstancesPerNode : int, optional + Minimum number of instances required at child nodes to create + the parent split. + (default: 1) + minInfoGain : float, optional + Minimum info gain required to create a split. + (default: 0.0) + + Returns + ------- + :py:class:`DecisionTreeModel` + + Examples + -------- >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import DecisionTree >>> from pyspark.mllib.linalg import SparseVector @@ -313,7 +330,6 @@ def _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees, return RandomForestModel(model) @classmethod - @since("1.2.0") def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto", impurity="gini", maxDepth=4, maxBins=32, seed=None): @@ -321,44 +337,51 @@ def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees, Train a random forest model for binary or multiclass classification. - :param data: - Training dataset: RDD of LabeledPoint. Labels should take values - {0, 1, ..., numClasses-1}. - :param numClasses: - Number of classes for classification. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param numTrees: - Number of trees in the random forest. - :param featureSubsetStrategy: - Number of features to consider for splits at each node. - Supported values: "auto", "all", "sqrt", "log2", "onethird". - If "auto" is set, this parameter is set based on numTrees: - if numTrees == 1, set to "all"; - if numTrees > 1 (forest) set to "sqrt". - (default: "auto") - :param impurity: - Criterion used for information gain calculation. - Supported values: "gini" or "entropy". - (default: "gini") - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 4) - :param maxBins: - Maximum number of bins used for splitting features. - (default: 32) - :param seed: - Random seed for bootstrapping and choosing feature subsets. - Set as None to generate seed based on system time. - (default: None) - :return: - RandomForestModel that can be used for prediction. - - Example usage: - + .. versionadded:: 1.2.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + Training dataset: RDD of LabeledPoint. Labels should take values + {0, 1, ..., numClasses-1}. + numClasses : int + Number of classes for classification. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + numTrees : int + Number of trees in the random forest. + featureSubsetStrategy : str, optional + Number of features to consider for splits at each node. + Supported values: "auto", "all", "sqrt", "log2", "onethird". + If "auto" is set, this parameter is set based on numTrees: + if numTrees == 1, set to "all"; + if numTrees > 1 (forest) set to "sqrt". + (default: "auto") + impurity : str, optional + Criterion used for information gain calculation. + Supported values: "gini" or "entropy". + (default: "gini") + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 4) + maxBins : int, optional + Maximum number of bins used for splitting features. + (default: 32) + seed : int, Optional + Random seed for bootstrapping and choosing feature subsets. + Set as None to generate seed based on system time. + (default: None) + + Returns + ------- + :py:class:`RandomForestModel` + that can be used for prediction. + + Examples + -------- >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import RandomForest >>> @@ -405,47 +428,55 @@ def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees, maxDepth, maxBins, seed) @classmethod - @since("1.2.0") def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto", impurity="variance", maxDepth=4, maxBins=32, seed=None): """ Train a random forest model for regression. - :param data: - Training dataset: RDD of LabeledPoint. Labels are real numbers. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param numTrees: - Number of trees in the random forest. - :param featureSubsetStrategy: - Number of features to consider for splits at each node. - Supported values: "auto", "all", "sqrt", "log2", "onethird". - If "auto" is set, this parameter is set based on numTrees: - if numTrees == 1, set to "all"; - if numTrees > 1 (forest) set to "onethird" for regression. - (default: "auto") - :param impurity: - Criterion used for information gain calculation. - The only supported value for regression is "variance". - (default: "variance") - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 4) - :param maxBins: - Maximum number of bins used for splitting features. - (default: 32) - :param seed: - Random seed for bootstrapping and choosing feature subsets. - Set as None to generate seed based on system time. - (default: None) - :return: - RandomForestModel that can be used for prediction. - - Example usage: - + .. versionadded:: 1.2.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + Training dataset: RDD of LabeledPoint. Labels are real numbers. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + numTrees : int + Number of trees in the random forest. + featureSubsetStrategy : str, optional + Number of features to consider for splits at each node. + Supported values: "auto", "all", "sqrt", "log2", "onethird". + If "auto" is set, this parameter is set based on numTrees: + + - if numTrees == 1, set to "all"; + - if numTrees > 1 (forest) set to "onethird" for regression. + + (default: "auto") + impurity : str, optional + Criterion used for information gain calculation. + The only supported value for regression is "variance". + (default: "variance") + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 4) + maxBins : int, optional + Maximum number of bins used for splitting features. + (default: 32) + seed : int, optional + Random seed for bootstrapping and choosing feature subsets. + Set as None to generate seed based on system time. + (default: None) + + Returns + ------- + :py:class:`RandomForestModel` + that can be used for prediction. + + Examples + -------- >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import RandomForest >>> from pyspark.mllib.linalg import SparseVector @@ -505,45 +536,51 @@ def _train(cls, data, algo, categoricalFeaturesInfo, return GradientBoostedTreesModel(model) @classmethod - @since("1.3.0") def trainClassifier(cls, data, categoricalFeaturesInfo, loss="logLoss", numIterations=100, learningRate=0.1, maxDepth=3, maxBins=32): """ Train a gradient-boosted trees model for classification. - :param data: - Training dataset: RDD of LabeledPoint. Labels should take values - {0, 1}. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param loss: - Loss function used for minimization during gradient boosting. - Supported values: "logLoss", "leastSquaresError", - "leastAbsoluteError". - (default: "logLoss") - :param numIterations: - Number of iterations of boosting. - (default: 100) - :param learningRate: - Learning rate for shrinking the contribution of each estimator. - The learning rate should be between in the interval (0, 1]. - (default: 0.1) - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 3) - :param maxBins: - Maximum number of bins used for splitting features. DecisionTree - requires maxBins >= max categories. - (default: 32) - :return: - GradientBoostedTreesModel that can be used for prediction. - - Example usage: - + .. versionadded:: 1.3.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + Training dataset: RDD of LabeledPoint. Labels should take values + {0, 1}. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + loss : str, optional + Loss function used for minimization during gradient boosting. + Supported values: "logLoss", "leastSquaresError", + "leastAbsoluteError". + (default: "logLoss") + numIterations : int, optional + Number of iterations of boosting. + (default: 100) + learningRate : float, optional + Learning rate for shrinking the contribution of each estimator. + The learning rate should be between in the interval (0, 1]. + (default: 0.1) + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 3) + maxBins : int, optional + Maximum number of bins used for splitting features. DecisionTree + requires maxBins >= max categories. + (default: 32) + + Returns + ------- + :py:class:`GradientBoostedTreesModel` + that can be used for prediction. + + Examples + -------- >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import GradientBoostedTrees >>> @@ -574,44 +611,50 @@ def trainClassifier(cls, data, categoricalFeaturesInfo, loss, numIterations, learningRate, maxDepth, maxBins) @classmethod - @since("1.3.0") def trainRegressor(cls, data, categoricalFeaturesInfo, loss="leastSquaresError", numIterations=100, learningRate=0.1, maxDepth=3, maxBins=32): """ Train a gradient-boosted trees model for regression. - :param data: - Training dataset: RDD of LabeledPoint. Labels are real numbers. - :param categoricalFeaturesInfo: - Map storing arity of categorical features. An entry (n -> k) - indicates that feature n is categorical with k categories - indexed from 0: {0, 1, ..., k-1}. - :param loss: - Loss function used for minimization during gradient boosting. - Supported values: "logLoss", "leastSquaresError", - "leastAbsoluteError". - (default: "leastSquaresError") - :param numIterations: - Number of iterations of boosting. - (default: 100) - :param learningRate: - Learning rate for shrinking the contribution of each estimator. - The learning rate should be between in the interval (0, 1]. - (default: 0.1) - :param maxDepth: - Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 - means 1 internal node + 2 leaf nodes). - (default: 3) - :param maxBins: - Maximum number of bins used for splitting features. DecisionTree - requires maxBins >= max categories. - (default: 32) - :return: - GradientBoostedTreesModel that can be used for prediction. - - Example usage: - + .. versionadded:: 1.3.0 + + Parameters + ---------- + data : + Training dataset: RDD of LabeledPoint. Labels are real numbers. + categoricalFeaturesInfo : dict + Map storing arity of categorical features. An entry (n -> k) + indicates that feature n is categorical with k categories + indexed from 0: {0, 1, ..., k-1}. + loss : str, optional + Loss function used for minimization during gradient boosting. + Supported values: "logLoss", "leastSquaresError", + "leastAbsoluteError". + (default: "leastSquaresError") + numIterations : int, optional + Number of iterations of boosting. + (default: 100) + learningRate : float, optional + Learning rate for shrinking the contribution of each estimator. + The learning rate should be between in the interval (0, 1]. + (default: 0.1) + maxDepth : int, optional + Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1 + means 1 internal node + 2 leaf nodes). + (default: 3) + maxBins : int, optional + Maximum number of bins used for splitting features. DecisionTree + requires maxBins >= max categories. + (default: 32) + + Returns + ------- + :py:class:`GradientBoostedTreesModel` + that can be used for prediction. + + Examples + -------- >>> from pyspark.mllib.regression import LabeledPoint >>> from pyspark.mllib.tree import GradientBoostedTrees >>> from pyspark.mllib.linalg import SparseVector diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index a0be29a82e3dc..68feb9563852c 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -65,7 +65,6 @@ def _convert_labeled_point_to_libsvm(p): return " ".join(items) @staticmethod - @since("1.0.0") def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None): """ Loads labeled data in the LIBSVM format into an RDD of @@ -79,20 +78,33 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None): method parses each line into a LabeledPoint, where the feature indices are converted to zero-based. - :param sc: Spark context - :param path: file or directory path in any Hadoop-supported file - system URI - :param numFeatures: number of features, which will be determined - from the input data if a nonpositive value - is given. This is useful when the dataset is - already split into multiple files and you - want to load them separately, because some - features may not present in certain files, - which leads to inconsistent feature - dimensions. - :param minPartitions: min number of partitions - :return: labeled data stored as an RDD of LabeledPoint - + .. versionadded:: 1.0.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + Spark context + path : str + file or directory path in any Hadoop-supported file system URI + numFeatures : int, optional + number of features, which will be determined + from the input data if a nonpositive value + is given. This is useful when the dataset is + already split into multiple files and you + want to load them separately, because some + features may not present in certain files, + which leads to inconsistent feature + dimensions. + minPartitions : int, optional + min number of partitions + + Returns + ------- + :py:class:`pyspark.RDD` + labeled data stored as an RDD of LabeledPoint + + Examples + -------- >>> from tempfile import NamedTemporaryFile >>> from pyspark.mllib.util import MLUtils >>> from pyspark.mllib.regression import LabeledPoint @@ -118,14 +130,21 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None): return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2]))) @staticmethod - @since("1.0.0") def saveAsLibSVMFile(data, dir): """ Save labeled data in LIBSVM format. - :param data: an RDD of LabeledPoint to be saved - :param dir: directory to save the data + .. versionadded:: 1.0.0 + + Parameters + ---------- + data : :py:class:`pyspark.RDD` + an RDD of LabeledPoint to be saved + dir : str + directory to save the data + Examples + -------- >>> from tempfile import NamedTemporaryFile >>> from fileinput import input >>> from pyspark.mllib.regression import LabeledPoint @@ -143,17 +162,28 @@ def saveAsLibSVMFile(data, dir): lines.saveAsTextFile(dir) @staticmethod - @since("1.1.0") def loadLabeledPoints(sc, path, minPartitions=None): """ Load labeled points saved using RDD.saveAsTextFile. - :param sc: Spark context - :param path: file or directory path in any Hadoop-supported file - system URI - :param minPartitions: min number of partitions - :return: labeled data stored as an RDD of LabeledPoint + .. versionadded:: 1.0.0 + + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + Spark context + path : str + file or directory path in any Hadoop-supported file system URI + minPartitions : int, optional + min number of partitions + Returns + ------- + :py:class:`pyspark.RDD` + labeled data stored as an RDD of LabeledPoint + + Examples + -------- >>> from tempfile import NamedTemporaryFile >>> from pyspark.mllib.util import MLUtils >>> from pyspark.mllib.regression import LabeledPoint @@ -193,7 +223,6 @@ def loadVectors(sc, path): return callMLlibFunc("loadVectors", sc, path) @staticmethod - @since("2.0.0") def convertVectorColumnsToML(dataset, *cols): """ Converts vector columns in an input DataFrame from the @@ -201,16 +230,26 @@ def convertVectorColumnsToML(dataset, *cols): :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml` package. - :param dataset: - input dataset - :param cols: - a list of vector columns to be converted. - New vector columns will be ignored. If unspecified, all old - vector columns will be converted excepted nested ones. - :return: - the input dataset with old vector columns converted to the - new vector type + .. versionadded:: 2.0.0 + + Parameters + ---------- + dataset : :py:class:`pyspark.sql.DataFrame` + input dataset + \\*cols : str + Vector columns to be converted. + New vector columns will be ignored. If unspecified, all old + vector columns will be converted excepted nested ones. + + Returns + ------- + :py:class:`pyspark.sql.DataFrame` + the input dataset with old vector columns converted to the + new vector type + + Examples + -------- >>> import pyspark >>> from pyspark.mllib.linalg import Vectors >>> from pyspark.mllib.util import MLUtils @@ -233,7 +272,6 @@ def convertVectorColumnsToML(dataset, *cols): return callMLlibFunc("convertVectorColumnsToML", dataset, list(cols)) @staticmethod - @since("2.0.0") def convertVectorColumnsFromML(dataset, *cols): """ Converts vector columns in an input DataFrame to the @@ -241,16 +279,26 @@ def convertVectorColumnsFromML(dataset, *cols): :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml` package. - :param dataset: - input dataset - :param cols: - a list of vector columns to be converted. - Old vector columns will be ignored. If unspecified, all new - vector columns will be converted except nested ones. - :return: - the input dataset with new vector columns converted to the - old vector type + .. versionadded:: 2.0.0 + + Parameters + ---------- + dataset : :py:class:`pyspark.sql.DataFrame` + input dataset + \\*cols : str + Vector columns to be converted. + + Old vector columns will be ignored. If unspecified, all new + vector columns will be converted except nested ones. + + Returns + ------- + :py:class:`pyspark.sql.DataFrame` + the input dataset with new vector columns converted to the + old vector type + Examples + -------- >>> import pyspark >>> from pyspark.ml.linalg import Vectors >>> from pyspark.mllib.util import MLUtils @@ -273,7 +321,6 @@ def convertVectorColumnsFromML(dataset, *cols): return callMLlibFunc("convertVectorColumnsFromML", dataset, list(cols)) @staticmethod - @since("2.0.0") def convertMatrixColumnsToML(dataset, *cols): """ Converts matrix columns in an input DataFrame from the @@ -281,16 +328,26 @@ def convertMatrixColumnsToML(dataset, *cols): :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml` package. - :param dataset: - input dataset - :param cols: - a list of matrix columns to be converted. - New matrix columns will be ignored. If unspecified, all old - matrix columns will be converted excepted nested ones. - :return: - the input dataset with old matrix columns converted to the - new matrix type + .. versionadded:: 2.0.0 + Parameters + ---------- + dataset : :py:class:`pyspark.sql.DataFrame` + input dataset + \\*cols : str + Matrix columns to be converted. + + New matrix columns will be ignored. If unspecified, all old + matrix columns will be converted excepted nested ones. + + Returns + ------- + :py:class:`pyspark.sql.DataFrame` + the input dataset with old matrix columns converted to the + new matrix type + + Examples + -------- >>> import pyspark >>> from pyspark.mllib.linalg import Matrices >>> from pyspark.mllib.util import MLUtils @@ -313,7 +370,6 @@ def convertMatrixColumnsToML(dataset, *cols): return callMLlibFunc("convertMatrixColumnsToML", dataset, list(cols)) @staticmethod - @since("2.0.0") def convertMatrixColumnsFromML(dataset, *cols): """ Converts matrix columns in an input DataFrame to the @@ -321,16 +377,26 @@ def convertMatrixColumnsFromML(dataset, *cols): :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml` package. - :param dataset: - input dataset - :param cols: - a list of matrix columns to be converted. - Old matrix columns will be ignored. If unspecified, all new - matrix columns will be converted except nested ones. - :return: - the input dataset with new matrix columns converted to the - old matrix type + .. versionadded:: 2.0.0 + + Parameters + ---------- + dataset : :py:class:`pyspark.sql.DataFrame` + input dataset + \\*cols : str + Matrix columns to be converted. + + Old matrix columns will be ignored. If unspecified, all new + matrix columns will be converted except nested ones. + Returns + ------- + :py:class:`pyspark.sql.DataFrame` + the input dataset with new matrix columns converted to the + old matrix type + + Examples + -------- >>> import pyspark >>> from pyspark.ml.linalg import Matrices >>> from pyspark.mllib.util import MLUtils @@ -370,10 +436,14 @@ def save(self, sc, path): The model may be loaded using :py:meth:`Loader.load`. - :param sc: Spark context used to save model data. - :param path: Path specifying the directory in which to save - this model. If the directory already exists, - this method throws an exception. + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + Spark context used to save model data. + path : str + Path specifying the directory in which to save + this model. If the directory already exists, + this method throws an exception. """ raise NotImplementedError @@ -410,10 +480,17 @@ def load(cls, sc, path): Load a model from the given path. The model should have been saved using :py:meth:`Saveable.save`. - :param sc: Spark context used for loading model files. - :param path: Path specifying the directory to which the model - was saved. - :return: model instance + Parameters + ---------- + sc : :py:class:`pyspark.SparkContext` + Spark context used for loading model files. + path : str + Path specifying the directory to which the model was saved. + + Returns + ------- + object + model instance """ raise NotImplementedError @@ -463,20 +540,33 @@ class LinearDataGenerator(object): """ @staticmethod - @since("1.5.0") def generateLinearInput(intercept, weights, xMean, xVariance, nPoints, seed, eps): """ - :param: intercept bias factor, the term c in X'w + c - :param: weights feature vector, the term w in X'w + c - :param: xMean Point around which the data X is centered. - :param: xVariance Variance of the given data - :param: nPoints Number of points to be generated - :param: seed Random Seed - :param: eps Used to scale the noise. If eps is set high, - the amount of gaussian noise added is more. - - Returns a list of LabeledPoints of length nPoints + .. versionadded:: 1.5.0 + + Parameters + ---------- + intercept : float + bias factor, the term c in X'w + c + weights : :py:class:`pyspark.mllib.linalg.Vector` or convertible + feature vector, the term w in X'w + c + xMean : :py:class:`pyspark.mllib.linalg.Vector` or convertible + Point around which the data X is centered. + xVariance : :py:class:`pyspark.mllib.linalg.Vector` or convertible + Variance of the given data + nPoints : int + Number of points to be generated + seed : int + Random Seed + eps : float + Used to scale the noise. If eps is set high, + the amount of gaussian noise added is more. + + Returns + ------- + list + of :py:class:`pyspark.mllib.regression.LabeledPoints` of length nPoints """ weights = [float(weight) for weight in weights] xMean = [float(mean) for mean in xMean] From d1b4f06179f3f7838ae1ce7a6244b2ba75134e41 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 25 Nov 2020 02:02:32 +0000 Subject: [PATCH 0564/1009] [SPARK-33494][SQL][AQE] Do not use local shuffle reader for repartition ### What changes were proposed in this pull request? This PR updates `ShuffleExchangeExec` to carry more information about how much we can change the partitioning. For `repartition(col)`, we should preserve the user-specified partitioning and don't apply the AQE local shuffle reader. ### Why are the changes needed? Similar to `repartition(number, col)`, we should respect the user-specified partitioning. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? a new test Closes #30432 from cloud-fan/aqe. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../spark/sql/execution/SparkStrategies.scala | 14 ++++---- .../adaptive/CoalesceShufflePartitions.scala | 9 +++++- .../adaptive/OptimizeLocalShuffleReader.scala | 11 +++++-- .../exchange/ShuffleExchangeExec.scala | 28 +++++++++++----- .../sql-tests/results/explain-aqe.sql.out | 24 +++++++------- .../sql-tests/results/explain.sql.out | 32 +++++++++---------- .../sql/SparkSessionExtensionSuite.scala | 6 ++-- .../adaptive/AdaptiveQueryExecSuite.scala | 10 ++++++ 8 files changed, 86 insertions(+), 48 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index e9b1aa81895f5..f5f77b03c2b1b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRe import org.apache.spark.sql.execution.aggregate.AggUtils import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.exchange.{REPARTITION, REPARTITION_WITH_NUM, ShuffleExchangeExec} import org.apache.spark.sql.execution.python._ import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.sources.MemoryPlan @@ -670,7 +670,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case logical.Repartition(numPartitions, shuffle, child) => if (shuffle) { ShuffleExchangeExec(RoundRobinPartitioning(numPartitions), - planLater(child), noUserSpecifiedNumPartition = false) :: Nil + planLater(child), REPARTITION_WITH_NUM) :: Nil } else { execution.CoalesceExec(numPartitions, planLater(child)) :: Nil } @@ -703,10 +703,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case r: logical.Range => execution.RangeExec(r) :: Nil case r: logical.RepartitionByExpression => - exchange.ShuffleExchangeExec( - r.partitioning, - planLater(r.child), - noUserSpecifiedNumPartition = r.optNumPartitions.isEmpty) :: Nil + val shuffleOrigin = if (r.optNumPartitions.isEmpty) { + REPARTITION + } else { + REPARTITION_WITH_NUM + } + exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil case r: LogicalRDD => RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala index 89ff528d7a188..0cf3ab0cca49a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.physical.SinglePartition import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike} import org.apache.spark.sql.internal.SQLConf /** @@ -47,7 +49,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl val shuffleStages = collectShuffleStages(plan) // ShuffleExchanges introduced by repartition do not support changing the number of partitions. // We change the number of partitions in the stage only if all the ShuffleExchanges support it. - if (!shuffleStages.forall(_.shuffle.canChangeNumPartitions)) { + if (!shuffleStages.forall(s => supportCoalesce(s.shuffle))) { plan } else { // `ShuffleQueryStageExec#mapStats` returns None when the input RDD has 0 partitions, @@ -82,4 +84,9 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl } } } + + private def supportCoalesce(s: ShuffleExchangeLike): Boolean = { + s.outputPartitioning != SinglePartition && + (s.shuffleOrigin == ENSURE_REQUIREMENTS || s.shuffleOrigin == REPARTITION) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala index 8db2827beaf43..8f57947cb6396 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.plans.physical.SinglePartition import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ -import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ShuffleExchangeExec} +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike} import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.internal.SQLConf @@ -136,9 +137,13 @@ object OptimizeLocalShuffleReader extends Rule[SparkPlan] { def canUseLocalShuffleReader(plan: SparkPlan): Boolean = plan match { case s: ShuffleQueryStageExec => - s.shuffle.canChangeNumPartitions && s.mapStats.isDefined + s.mapStats.isDefined && supportLocalReader(s.shuffle) case CustomShuffleReaderExec(s: ShuffleQueryStageExec, partitionSpecs) => - s.shuffle.canChangeNumPartitions && s.mapStats.isDefined && partitionSpecs.nonEmpty + s.mapStats.isDefined && partitionSpecs.nonEmpty && supportLocalReader(s.shuffle) case _ => false } + + private def supportLocalReader(s: ShuffleExchangeLike): Boolean = { + s.outputPartitioning != SinglePartition && s.shuffleOrigin == ENSURE_REQUIREMENTS + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala index 6af4b098bee2f..affa92de693af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala @@ -57,9 +57,9 @@ trait ShuffleExchangeLike extends Exchange { def numPartitions: Int /** - * Returns whether the shuffle partition number can be changed. + * The origin of this shuffle operator. */ - def canChangeNumPartitions: Boolean + def shuffleOrigin: ShuffleOrigin /** * The asynchronous job that materializes the shuffle. @@ -77,18 +77,30 @@ trait ShuffleExchangeLike extends Exchange { def runtimeStatistics: Statistics } +// Describes where the shuffle operator comes from. +sealed trait ShuffleOrigin + +// Indicates that the shuffle operator was added by the internal `EnsureRequirements` rule. It +// means that the shuffle operator is used to ensure internal data partitioning requirements and +// Spark is free to optimize it as long as the requirements are still ensured. +case object ENSURE_REQUIREMENTS extends ShuffleOrigin + +// Indicates that the shuffle operator was added by the user-specified repartition operator. Spark +// can still optimize it via changing shuffle partition number, as data partitioning won't change. +case object REPARTITION extends ShuffleOrigin + +// Indicates that the shuffle operator was added by the user-specified repartition operator with +// a certain partition number. Spark can't optimize it. +case object REPARTITION_WITH_NUM extends ShuffleOrigin + /** * Performs a shuffle that will result in the desired partitioning. */ case class ShuffleExchangeExec( override val outputPartitioning: Partitioning, child: SparkPlan, - noUserSpecifiedNumPartition: Boolean = true) extends ShuffleExchangeLike { - - // If users specify the num partitions via APIs like `repartition`, we shouldn't change it. - // For `SinglePartition`, it requires exactly one partition and we can't change it either. - override def canChangeNumPartitions: Boolean = - noUserSpecifiedNumPartition && outputPartitioning != SinglePartition + shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS) + extends ShuffleExchangeLike { private lazy val writeMetrics = SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext) diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 567e0eabe1805..578b0a807fc52 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 23 +-- Number of queries: 24 -- !query @@ -67,10 +67,10 @@ Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL]) - +- Exchange SinglePartition, true, [id=#x] + +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +- HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL]) +- HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) - +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x] + +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), ENSURE_REQUIREMENTS, [id=#x] +- HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct @@ -116,7 +116,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -127,7 +127,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (6) Exchange Input [2]: [key#x, max(val)#x] -Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), true, [id=#x] +Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x] (7) Sort Input [2]: [key#x, max(val)#x] @@ -179,7 +179,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -254,7 +254,7 @@ Results [2]: [key#x, val#x] (7) Exchange Input [2]: [key#x, val#x] -Arguments: hashpartitioning(key#x, val#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x] (8) HashAggregate Input [2]: [key#x, val#x] @@ -576,7 +576,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -605,7 +605,7 @@ Results [2]: [key#x, max#x] (9) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (10) HashAggregate Input [2]: [key#x, max#x] @@ -687,7 +687,7 @@ Results [3]: [count#xL, sum#xL, count#xL] (3) Exchange Input [3]: [count#xL, sum#xL, count#xL] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (4) HashAggregate Input [3]: [count#xL, sum#xL, count#xL] @@ -732,7 +732,7 @@ Results [2]: [key#x, buf#x] (3) Exchange Input [2]: [key#x, buf#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (4) ObjectHashAggregate Input [2]: [key#x, buf#x] @@ -783,7 +783,7 @@ Results [2]: [key#x, min#x] (4) Exchange Input [2]: [key#x, min#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (5) Sort Input [2]: [key#x, min#x] diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index fcd69549f2c6e..886b98e538d28 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 23 +-- Number of queries: 24 -- !query @@ -66,10 +66,10 @@ Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL] == Physical Plan == *HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL]) -+- Exchange SinglePartition, true, [id=#x] ++- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL]) +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) - +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x] + +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), ENSURE_REQUIREMENTS, [id=#x] +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL]) +- *ColumnarToRow +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct @@ -119,7 +119,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (6) HashAggregate [codegen id : 2] Input [2]: [key#x, max#x] @@ -130,7 +130,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (7) Exchange Input [2]: [key#x, max(val)#x] -Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), true, [id=#x] +Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x] (8) Sort [codegen id : 3] Input [2]: [key#x, max(val)#x] @@ -181,7 +181,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (6) HashAggregate [codegen id : 2] Input [2]: [key#x, max#x] @@ -259,7 +259,7 @@ Results [2]: [key#x, val#x] (9) Exchange Input [2]: [key#x, val#x] -Arguments: hashpartitioning(key#x, val#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x] (10) HashAggregate [codegen id : 4] Input [2]: [key#x, val#x] @@ -452,7 +452,7 @@ Results [1]: [max#x] (9) Exchange Input [1]: [max#x] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (10) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -498,7 +498,7 @@ Results [1]: [max#x] (16) Exchange Input [1]: [max#x] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (17) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -580,7 +580,7 @@ Results [1]: [max#x] (9) Exchange Input [1]: [max#x] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (10) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -626,7 +626,7 @@ Results [2]: [sum#x, count#xL] (16) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (17) HashAggregate [codegen id : 2] Input [2]: [sum#x, count#xL] @@ -690,7 +690,7 @@ Results [2]: [sum#x, count#xL] (7) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (8) HashAggregate [codegen id : 2] Input [2]: [sum#x, count#xL] @@ -810,7 +810,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (6) HashAggregate [codegen id : 4] Input [2]: [key#x, max#x] @@ -901,7 +901,7 @@ Results [3]: [count#xL, sum#xL, count#xL] (4) Exchange Input [3]: [count#xL, sum#xL, count#xL] -Arguments: SinglePartition, true, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] (5) HashAggregate [codegen id : 2] Input [3]: [count#xL, sum#xL, count#xL] @@ -945,7 +945,7 @@ Results [2]: [key#x, buf#x] (4) Exchange Input [2]: [key#x, buf#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (5) ObjectHashAggregate Input [2]: [key#x, buf#x] @@ -995,7 +995,7 @@ Results [2]: [key#x, min#x] (5) Exchange Input [2]: [key#x, min#x] -Arguments: hashpartitioning(key#x, 4), true, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] (6) Sort [codegen id : 2] Input [2]: [key#x, min#x] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 951b72a863483..12abd31b99e93 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec} -import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike} +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin} import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.COLUMN_BATCH_SIZE @@ -766,7 +766,9 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] { case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleExchangeLike { override def numMappers: Int = delegate.numMappers override def numPartitions: Int = delegate.numPartitions - override def canChangeNumPartitions: Boolean = delegate.canChangeNumPartitions + override def shuffleOrigin: ShuffleOrigin = { + delegate.shuffleOrigin + } override def mapOutputStatisticsFuture: Future[MapOutputStatistics] = delegate.mapOutputStatisticsFuture override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 38a323b1c057e..758965954b374 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -1307,4 +1307,14 @@ class AdaptiveQueryExecSuite spark.listenerManager.unregister(listener) } } + + test("SPARK-33494: Do not use local shuffle reader for repartition") { + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + val df = spark.table("testData").repartition('key) + df.collect() + // local shuffle reader breaks partitioning and shouldn't be used for repartition operation + // which is specified by users. + checkNumLocalShuffleReaders(df.queryExecution.executedPlan, numShufflesWithoutLocalReader = 1) + } + } } From b7f034d8dc17b9ae5eced387d20f37b9e3e58901 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 25 Nov 2020 03:04:04 +0000 Subject: [PATCH 0565/1009] [SPARK-33543][SQL] Migrate SHOW COLUMNS command to use UnresolvedTableOrView to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `SHOW COLUMNS` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `SHOW COLUMNS` is not yet supported for v2 tables. ### Why are the changes needed? To use `UnresolvedTableOrView` for table/view resolution. Note that `ShowColumnsCommand` internally resolves to a temp view first, so there is no resolution behavior change with this PR. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated existing tests. Closes #30490 from imback82/show_columns. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/AstBuilder.scala | 13 ++++++++++--- .../catalyst/plans/logical/statements.scala | 7 ------- .../catalyst/plans/logical/v2Commands.scala | 10 ++++++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 8 ++++---- .../analysis/ResolveSessionCatalog.scala | 18 +++--------------- .../datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../sql-tests/results/show_columns.sql.out | 16 ++++++++-------- .../sql/connector/DataSourceV2SQLSuite.scala | 11 +++-------- .../spark/sql/execution/command/DDLSuite.scala | 11 +++++++++++ 9 files changed, 52 insertions(+), 45 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a4298abd211b3..5f8394c525949 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3400,7 +3400,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg /** * A command for users to list the column names for a table. - * This function creates a [[ShowColumnsStatement]] logical plan. + * This function creates a [[ShowColumns]] logical plan. * * The syntax of using this command in SQL is: * {{{ @@ -3409,9 +3409,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitShowColumns(ctx: ShowColumnsContext): LogicalPlan = withOrigin(ctx) { - val table = visitMultipartIdentifier(ctx.table) + val nameParts = visitMultipartIdentifier(ctx.table) val namespace = Option(ctx.ns).map(visitMultipartIdentifier) - ShowColumnsStatement(table, namespace) + // Use namespace only if table name doesn't specify it. If namespace is already specified + // in the table name, it's checked against the given namespace after table/view is resolved. + val tableName = if (namespace.isDefined && nameParts.length == 1) { + namespace.get ++ nameParts + } else { + nameParts + } + ShowColumns(UnresolvedTableOrView(tableName), namespace) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 39bc5a5604b20..3660e8a95a7f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -359,13 +359,6 @@ case class ShowPartitionsStatement( tableName: Seq[String], partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * A SHOW COLUMNS statement, as parsed from SQL - */ -case class ShowColumnsStatement( - table: Seq[String], - namespace: Option[Seq[String]]) extends ParsedStatement - /** * A SHOW CURRENT NAMESPACE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index a65b9fc59bd55..ebf41f6a6e304 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -671,6 +671,15 @@ case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the SHOW COLUMN command. + */ +case class ShowColumns( + child: LogicalPlan, + namespace: Option[Seq[String]]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the TRUNCATE TABLE command. */ @@ -679,3 +688,4 @@ case class TruncateTable( partitionSpec: Option[TablePartitionSpec]) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 997c642276bfb..cc3c824befb3e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1643,13 +1643,13 @@ class DDLParserSuite extends AnalysisTest { val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1" val parsed1 = parsePlan(sql1) - val expected1 = ShowColumnsStatement(Seq("t1"), None) + val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1")), None) val parsed2 = parsePlan(sql2) - val expected2 = ShowColumnsStatement(Seq("db1", "t1"), None) + val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), None) val parsed3 = parsePlan(sql3) - val expected3 = ShowColumnsStatement(Seq("t1"), Some(Seq("db1"))) + val expected3 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1"))) val parsed4 = parsePlan(sql4) - val expected4 = ShowColumnsStatement(Seq("db1", "t1"), Some(Seq("db1"))) + val expected4 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1"))) comparePlans(parsed1, expected1) comparePlans(parsed2, expected2) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 726099991a897..395f5efd5a52d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -467,25 +467,13 @@ class ResolveSessionCatalog( v1TableName.asTableIdentifier, partitionSpec) - case ShowColumnsStatement(tbl, ns) => - if (ns.isDefined && ns.get.length > 1) { - throw new AnalysisException( - s"Namespace name should have only one part if specified: ${ns.get.quoted}") - } - // Use namespace only if table name doesn't specify it. If namespace is already specified - // in the table name, it's checked against the given namespace below. - val nameParts = if (ns.isDefined && tbl.length == 1) { - ns.get ++ tbl - } else { - tbl - } - val sql = "SHOW COLUMNS" - val v1TableName = parseTempViewOrV1Table(nameParts, sql).asTableIdentifier + case ShowColumns(ResolvedV1TableOrViewIdentifier(ident), ns) => + val v1TableName = ident.asTableIdentifier val resolver = conf.resolver val db = ns match { case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) => throw new AnalysisException( - s"SHOW COLUMNS with conflicting databases: " + + "SHOW COLUMNS with conflicting databases: " + s"'${db.head}' != '${v1TableName.database.get}'") case _ => ns.map(_.head) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 30d976524bfa8..eb0d7010041b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -305,6 +305,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case TruncateTable(_: ResolvedTable, _) => throw new AnalysisException("TRUNCATE TABLE is not supported for v2 tables.") + case ShowColumns(_: ResolvedTable, _) => + throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out index 4f5db7f6c6b2f..6ddffb89987d8 100644 --- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out @@ -93,8 +93,8 @@ SHOW COLUMNS IN badtable FROM showdb -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchTableException -Table or view 'badtable' not found in database 'showdb'; +org.apache.spark.sql.AnalysisException +Table or view not found: showdb.badtable; line 1 pos 0 -- !query @@ -129,8 +129,8 @@ SHOW COLUMNS IN showdb.showcolumn3 -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchTableException -Table or view 'showcolumn3' not found in database 'showdb'; +org.apache.spark.sql.AnalysisException +Table or view not found: showdb.showcolumn3; line 1 pos 0 -- !query @@ -138,8 +138,8 @@ SHOW COLUMNS IN showcolumn3 FROM showdb -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchTableException -Table or view 'showcolumn3' not found in database 'showdb'; +org.apache.spark.sql.AnalysisException +Table or view not found: showdb.showcolumn3; line 1 pos 0 -- !query @@ -147,8 +147,8 @@ SHOW COLUMNS IN showcolumn4 -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchTableException -Table or view 'showcolumn4' not found in database 'showdb'; +org.apache.spark.sql.AnalysisException +Table or view not found: showcolumn4; line 1 pos 0 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 9a3fa0c5bd3f4..222fa8ace4dca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2047,14 +2047,9 @@ class DataSourceV2SQLSuite withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - testV1CommandSupportingTempView("SHOW COLUMNS", s"FROM $t") - testV1CommandSupportingTempView("SHOW COLUMNS", s"IN $t") - - val e3 = intercept[AnalysisException] { - sql(s"SHOW COLUMNS FROM tbl IN testcat.ns1.ns2") - } - assert(e3.message.contains("Namespace name should have " + - "only one part if specified: testcat.ns1.ns2")) + testNotSupportedV2Command("SHOW COLUMNS", s"FROM $t") + testNotSupportedV2Command("SHOW COLUMNS", s"IN $t") + testNotSupportedV2Command("SHOW COLUMNS", "FROM tbl IN testcat.ns1.ns2") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 07201f9f85b5d..4f79e71419a10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -2266,6 +2266,17 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } + test("show columns - invalid db name") { + withTable("tbl") { + sql("CREATE TABLE tbl(col1 int, col2 string) USING parquet ") + val message = intercept[AnalysisException] { + sql("SHOW COLUMNS IN tbl FROM a.b.c") + }.getMessage + assert(message.contains( + "The namespace in session catalog must have exactly one name part: a.b.c.tbl")) + } + } + test("SPARK-18009 calling toLocalIterator on commands") { import scala.collection.JavaConverters._ val df = sql("show databases") From edab094dda3d5acbc100d01bd98e0ab15d7b4178 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Wed, 25 Nov 2020 13:12:20 +0900 Subject: [PATCH 0566/1009] [SPARK-33224][SS][WEBUI] Add watermark gap information into SS UI page ### What changes were proposed in this pull request? This PR proposes to add the watermark gap information in SS UI page. Please refer below screenshots to see what we'd like to show in UI. ![Screen Shot 2020-11-19 at 6 56 38 PM](https://user-images.githubusercontent.com/1317309/99669306-3532d080-2ab2-11eb-9a93-03d2c6a54948.png) Please note that this PR doesn't plot the watermark value - knowing the gap between actual wall clock and watermark looks more useful than the absolute value. ### Why are the changes needed? Watermark is the one of major metrics the end users need to track for stateful queries. Watermark defines "when" the output will be emitted for append mode, hence knowing how much gap between wall clock and watermark (input data) is very helpful to make expectation of the output. ### Does this PR introduce _any_ user-facing change? Yes, SS UI query page will contain the watermark gap information. ### How was this patch tested? Basic UT added. Manually tested with two queries: > simple case You'll see consistent watermark gap with (15 seconds + a) = 10 seconds are from delay in watermark definition, 5 seconds are trigger interval. ``` import org.apache.spark.sql.streaming.Trigger spark.conf.set("spark.sql.shuffle.partitions", "10") val query = spark .readStream .format("rate") .option("rowsPerSecond", 1000) .option("rampUpTime", "10s") .load() .selectExpr("timestamp", "mod(value, 100) as mod", "value") .withWatermark("timestamp", "10 seconds") .groupBy(window($"timestamp", "1 minute", "10 seconds"), $"mod") .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value")) .writeStream .format("console") .trigger(Trigger.ProcessingTime("5 seconds")) .outputMode("append") .start() query.awaitTermination() ``` ![Screen Shot 2020-11-19 at 7 00 21 PM](https://user-images.githubusercontent.com/1317309/99669049-dbcaa180-2ab1-11eb-8789-10b35857dda0.png) > complicated case This randomizes the timestamp, hence producing random watermark gap. This won't be smaller than 15 seconds as I described earlier. ``` import org.apache.spark.sql.streaming.Trigger spark.conf.set("spark.sql.shuffle.partitions", "10") val query = spark .readStream .format("rate") .option("rowsPerSecond", 1000) .option("rampUpTime", "10s") .load() .selectExpr("*", "CAST(CAST(timestamp AS BIGINT) - CAST((RAND() * 100000) AS BIGINT) AS TIMESTAMP) AS tsMod") .selectExpr("tsMod", "mod(value, 100) as mod", "value") .withWatermark("tsMod", "10 seconds") .groupBy(window($"tsMod", "1 minute", "10 seconds"), $"mod") .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value")) .writeStream .format("console") .trigger(Trigger.ProcessingTime("5 seconds")) .outputMode("append") .start() query.awaitTermination() ``` ![Screen Shot 2020-11-19 at 6 56 47 PM](https://user-images.githubusercontent.com/1317309/99669029-d5d4c080-2ab1-11eb-9c63-d05b3e1ab391.png) Closes #30427 from HeartSaVioR/SPARK-33224. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../ui/StreamingQueryStatisticsPage.scala | 53 +++++++++++++++++++ .../sql/streaming/ui/UISeleniumSuite.scala | 15 ++++-- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index 77b1e61d587a7..24709ba470cde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -140,6 +140,58 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
    } + def generateWatermark( + query: StreamingQueryUIData, + minBatchTime: Long, + maxBatchTime: Long, + jsCollector: JsCollector): Seq[Node] = { + // This is made sure on caller side but put it here to be defensive + require(query.lastProgress != null) + if (query.lastProgress.eventTime.containsKey("watermark")) { + val watermarkData = query.recentProgress.flatMap { p => + val batchTimestamp = parseProgressTimestamp(p.timestamp) + val watermarkValue = parseProgressTimestamp(p.eventTime.get("watermark")) + if (watermarkValue > 0L) { + // seconds + Some((batchTimestamp, ((batchTimestamp - watermarkValue) / 1000.0))) + } else { + None + } + } + + if (watermarkData.nonEmpty) { + val maxWatermark = watermarkData.maxBy(_._2)._2 + val graphUIDataForWatermark = + new GraphUIData( + "watermark-gap-timeline", + "watermark-gap-histogram", + watermarkData, + minBatchTime, + maxBatchTime, + 0, + maxWatermark, + "seconds") + graphUIDataForWatermark.generateDataJs(jsCollector) + + // scalastyle:off + + +
    +
    Global Watermark Gap {SparkUIUtils.tooltip("The gap between batch timestamp and global watermark for the batch.", "right")}
    +
    + + {graphUIDataForWatermark.generateTimelineHtml(jsCollector)} + {graphUIDataForWatermark.generateHistogramHtml(jsCollector)} + + // scalastyle:on + } else { + Seq.empty[Node] + } + } else { + Seq.empty[Node] + } + } + def generateAggregatedStateOperators( query: StreamingQueryUIData, minBatchTime: Long, @@ -465,6 +517,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) {graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)} + {generateWatermark(query, minBatchTime, maxBatchTime, jsCollector)} {generateAggregatedStateOperators(query, minBatchTime, maxBatchTime, jsCollector)} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala index 94844c4e87a84..db3d6529c9906 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala @@ -31,8 +31,10 @@ import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_PORT} import org.apache.spark.sql.LocalSparkSession.withSparkSession import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.functions.{window => windowFn, _} +import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST -import org.apache.spark.sql.streaming.StreamingQueryException +import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} import org.apache.spark.ui.SparkUICssErrorHandler class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll { @@ -52,6 +54,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B val conf = new SparkConf() .setMaster(master) .setAppName("ui-test") + .set(SHUFFLE_PARTITIONS, 5) .set(UI_ENABLED, true) .set(UI_PORT, 0) .set(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST, Seq("stateOnCurrentVersionSizeBytes")) @@ -79,10 +82,15 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B val input1 = spark.readStream.format("rate").load() val input2 = spark.readStream.format("rate").load() + val input3 = spark.readStream.format("rate").load() val activeQuery = - input1.join(input2, "value").writeStream.format("noop").start() + input1.selectExpr("timestamp", "mod(value, 100) as mod", "value") + .withWatermark("timestamp", "0 second") + .groupBy(windowFn($"timestamp", "10 seconds", "2 seconds"), $"mod") + .agg(avg("value").as("avg_value")) + .writeStream.format("noop").trigger(Trigger.ProcessingTime("5 seconds")).start() val completedQuery = - input1.join(input2, "value").writeStream.format("noop").start() + input2.join(input3, "value").writeStream.format("noop").start() completedQuery.stop() val failedQuery = spark.readStream.format("rate").load().select("value").as[Long] .map(_ / 0).writeStream.format("noop").start() @@ -138,6 +146,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B summaryText should contain ("Input Rows (?)") summaryText should contain ("Batch Duration (?)") summaryText should contain ("Operation Duration (?)") + summaryText should contain ("Global Watermark Gap (?)") summaryText should contain ("Aggregated Number Of Total State Rows (?)") summaryText should contain ("Aggregated Number Of Updated State Rows (?)") summaryText should contain ("Aggregated State Memory Used In Bytes (?)") From c3ce9701b458511255072c72b9b245036fa98653 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 24 Nov 2020 20:18:45 -0800 Subject: [PATCH 0567/1009] [SPARK-33533][SQL] Fix the regression bug that ConnectionProviders don't consider case-sensitivity for properties ### What changes were proposed in this pull request? This PR fixes an issue that `BasicConnectionProvider` doesn't consider case-sensitivity for properties. For example, the property `oracle.jdbc.mapDateToTimestamp` should be considered case-sensitivity but it is not considered. ### Why are the changes needed? This is a bug introduced by #29024 . Caused by this issue, `OracleIntegrationSuite` doesn't pass. ``` [info] - SPARK-16625: General data types to be mapped to Oracle *** FAILED *** (32 seconds, 129 milliseconds) [info] types.apply(9).equals(org.apache.spark.sql.types.DateType) was false (OracleIntegrationSuite.scala:238) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) [info] at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231) [info] at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295) [info] at org.apache.spark.sql.jdbc.OracleIntegrationSuite.$anonfun$new$4(OracleIntegrationSuite.scala:238) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:176) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:61) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:61) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:392) [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232) [info] at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563) [info] at org.scalatest.Suite.run(Suite.scala:1112) [info] at org.scalatest.Suite.run$(Suite.scala:1094) [info] at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1563) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:237) [info] at org.scalatest.SuperEngine.runImpl(Engine.scala:535) [info] at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:237) [info] at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:236) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:61) [info] at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) [info] at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) [info] at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) [info] at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61) [info] at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318) [info] at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513) [info] at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:413) [info] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [info] at java.lang.Thread.run(Thread.java:748) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? With this change, I confirmed that `OracleIntegrationSuite` passes with the following command. ``` $ git clone https://github.com/oracle/docker-images.git $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles $ ./buildDockerImage.sh -v 18.4.0 -x $ ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" ``` Closes #30485 from sarutak/fix-oracle-integration-suite. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../datasources/jdbc/connection/BasicConnectionProvider.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala index 1c0513f982a1e..890205f2f6826 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import java.sql.{Connection, Driver} import java.util.Properties +import scala.collection.JavaConverters._ + import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.sql.jdbc.JdbcConnectionProvider @@ -40,7 +42,7 @@ private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with override def getConnection(driver: Driver, options: Map[String, String]): Connection = { val jdbcOptions = new JDBCOptions(options) val properties = getAdditionalProperties(jdbcOptions) - options.foreach { case(k, v) => + jdbcOptions.asProperties.asScala.foreach { case(k, v) => properties.put(k, v) } logDebug(s"JDBC connection initiated with URL: ${jdbcOptions.url} and properties: $properties") From 781e19c4d1f376b52e5305078356bf0a58522bcd Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 25 Nov 2020 16:38:55 +0900 Subject: [PATCH 0568/1009] [SPARK-33477][SQL] Hive Metastore support filter by date type ### What changes were proposed in this pull request? Hive Metastore supports strings and integral types in filters. It could also support dates. Please see [HIVE-5679](https://github.com/apache/hive/commit/5106bf1c8671740099fca8e1a7d4b37afe97137f) for more details. This pr add support it. ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30408 from wangyum/SPARK-33477. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../spark/sql/hive/HiveExternalCatalog.scala | 6 +- .../spark/sql/hive/client/HiveClient.scala | 3 +- .../sql/hive/client/HiveClientImpl.scala | 6 +- .../spark/sql/hive/client/HiveShim.scala | 46 ++++++++-- .../spark/sql/hive/client/FiltersSuite.scala | 35 ++++++- .../client/HivePartitionFilteringSuite.scala | 92 +++++++++++++++---- .../spark/sql/hive/client/VersionsSuite.scala | 3 +- 7 files changed, 155 insertions(+), 36 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 907bb86ad0c1c..54c237f78cb9c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} import org.apache.spark.sql.hive.client.HiveClient @@ -1264,11 +1264,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat defaultTimeZoneId: String): Seq[CatalogTablePartition] = withClient { val rawTable = getRawTable(db, table) val catalogTable = restoreTableMetadata(rawTable) + val timeZoneId = CaseInsensitiveMap(catalogTable.storage.properties).getOrElse( + DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId) val partColNameMap = buildLowerCasePartColNameMap(catalogTable) val clientPrunedPartitions = - client.getPartitionsByFilter(rawTable, predicates).map { part => + client.getPartitionsByFilter(rawTable, predicates, timeZoneId).map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) } prunePartitionsByFilter(catalogTable, clientPrunedPartitions, predicates, defaultTimeZoneId) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala index 3ea80eaf6f714..48f3837740933 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala @@ -233,7 +233,8 @@ private[hive] trait HiveClient { /** Returns partitions filtered by predicates for the given table. */ def getPartitionsByFilter( catalogTable: CatalogTable, - predicates: Seq[Expression]): Seq[CatalogTablePartition] + predicates: Seq[Expression], + timeZoneId: String): Seq[CatalogTablePartition] /** Loads a static partition into an existing table. */ def loadPartition( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 9bc99b08c2cc8..b2f0867114bae 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -733,9 +733,11 @@ private[hive] class HiveClientImpl( override def getPartitionsByFilter( table: CatalogTable, - predicates: Seq[Expression]): Seq[CatalogTablePartition] = withHiveState { + predicates: Seq[Expression], + timeZoneId: String): Seq[CatalogTablePartition] = withHiveState { val hiveTable = toHiveTable(table, Some(userName)) - val parts = shim.getPartitionsByFilter(client, hiveTable, predicates).map(fromHivePartition) + val parts = shim.getPartitionsByFilter(client, hiveTable, predicates, timeZoneId) + .map(fromHivePartition) HiveCatalogMetrics.incrementFetchedPartitions(parts.length) parts } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index d989f0154ea95..17a64a67df283 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -45,9 +45,9 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TypeUtils} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType} +import org.apache.spark.sql.types.{AtomicType, DateType, IntegralType, StringType} import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -79,7 +79,11 @@ private[client] sealed abstract class Shim { def getAllPartitions(hive: Hive, table: Table): Seq[Partition] - def getPartitionsByFilter(hive: Hive, table: Table, predicates: Seq[Expression]): Seq[Partition] + def getPartitionsByFilter( + hive: Hive, + table: Table, + predicates: Seq[Expression], + timeZoneId: String): Seq[Partition] def getCommandProcessor(token: String, conf: HiveConf): CommandProcessor @@ -349,7 +353,8 @@ private[client] class Shim_v0_12 extends Shim with Logging { override def getPartitionsByFilter( hive: Hive, table: Table, - predicates: Seq[Expression]): Seq[Partition] = { + predicates: Seq[Expression], + timeZoneId: String): Seq[Partition] = { // getPartitionsByFilter() doesn't support binary comparison ops in Hive 0.12. // See HIVE-4888. logDebug("Hive 0.12 doesn't support predicate pushdown to metastore. " + @@ -632,7 +637,9 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { * * Unsupported predicates are skipped. */ - def convertFilters(table: Table, filters: Seq[Expression]): String = { + def convertFilters(table: Table, filters: Seq[Expression], timeZoneId: String): String = { + lazy val dateFormatter = DateFormatter(DateTimeUtils.getZoneId(timeZoneId)) + /** * An extractor that matches all binary comparison operators except null-safe equality. * @@ -650,6 +657,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { case Literal(null, _) => None // `null`s can be cast as other types; we want to avoid NPEs. case Literal(value, _: IntegralType) => Some(value.toString) case Literal(value, _: StringType) => Some(quoteStringLiteral(value.toString)) + case Literal(value, _: DateType) => + Some(dateFormatter.format(value.asInstanceOf[Int])) case _ => None } } @@ -700,6 +709,21 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { } } + object ExtractableDateValues { + private lazy val valueToLiteralString: PartialFunction[Any, String] = { + case value: Int => dateFormatter.format(value) + } + + def unapply(values: Set[Any]): Option[Seq[String]] = { + val extractables = values.toSeq.map(valueToLiteralString.lift) + if (extractables.nonEmpty && extractables.forall(_.isDefined)) { + Some(extractables.map(_.get)) + } else { + None + } + } + } + object SupportedAttribute { // hive varchar is treated as catalyst string, but hive varchar can't be pushed down. private val varcharKeys = table.getPartitionKeys.asScala @@ -711,7 +735,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { val resolver = SQLConf.get.resolver if (varcharKeys.exists(c => resolver(c, attr.name))) { None - } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) { + } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType || + attr.dataType == DateType) { Some(attr.name) } else { None @@ -748,6 +773,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)), LessThanOrEqual(child, Literal(sortedValues.last, dataType)))) + case InSet(child @ ExtractAttribute(SupportedAttribute(name)), ExtractableDateValues(values)) + if useAdvanced && child.dataType == DateType => + Some(convertInToOr(name, values)) + case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values)) if useAdvanced => Some(convertInToOr(name, values)) @@ -803,11 +832,12 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { override def getPartitionsByFilter( hive: Hive, table: Table, - predicates: Seq[Expression]): Seq[Partition] = { + predicates: Seq[Expression], + timeZoneId: String): Seq[Partition] = { // Hive getPartitionsByFilter() takes a string that represents partition // predicates like "str_key=\"value\" and int_key=1 ..." - val filter = convertFilters(table, predicates) + val filter = convertFilters(table, predicates, timeZoneId) val partitions = if (filter.isEmpty) { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala index 12b409e487061..6c0531182e6d6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hive.client +import java.sql.Date import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema @@ -29,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String /** * A set of tests for the filter conversion logic used when pushing partition pruning into the @@ -63,6 +65,28 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") + filterTest("date filter", + (a("datecol", DateType) === Literal(Date.valueOf("2019-01-01"))) :: Nil, + "datecol = 2019-01-01") + + filterTest("date filter with IN predicate", + (a("datecol", DateType) in + (Literal(Date.valueOf("2019-01-01")), Literal(Date.valueOf("2019-01-07")))) :: Nil, + "(datecol = 2019-01-01 or datecol = 2019-01-07)") + + filterTest("date and string filter", + (Literal(Date.valueOf("2019-01-01")) === a("datecol", DateType)) :: + (Literal("a") === a("strcol", IntegerType)) :: Nil, + "2019-01-01 = datecol and \"a\" = strcol") + + filterTest("date filter with null", + (a("datecol", DateType) === Literal(null)) :: Nil, + "") + + filterTest("string filter with InSet predicate", + InSet(a("strcol", StringType), Set("1", "2").map(s => UTF8String.fromString(s))) :: Nil, + "(strcol = \"1\" or strcol = \"2\")") + filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") @@ -89,7 +113,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") { - val converted = shim.convertFilters(testTable, filters) + val converted = shim.convertFilters(testTable, filters, conf.sessionLocalTimeZone) if (converted != result) { fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'") } @@ -104,7 +128,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { val filters = (Literal(1) === a("intcol", IntegerType) || Literal(2) === a("intcol", IntegerType)) :: Nil - val converted = shim.convertFilters(testTable, filters) + val converted = shim.convertFilters(testTable, filters, conf.sessionLocalTimeZone) if (enabled) { assert(converted == "(1 = intcol or 2 = intcol)") } else { @@ -116,7 +140,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { test("SPARK-33416: Avoid Hive metastore stack overflow when InSet predicate have many values") { def checkConverted(inSet: InSet, result: String): Unit = { - assert(shim.convertFilters(testTable, inSet :: Nil) == result) + assert(shim.convertFilters(testTable, inSet :: Nil, conf.sessionLocalTimeZone) == result) } withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "15") { @@ -139,6 +163,11 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { InSet(a("doublecol", DoubleType), Range(1, 20).map(s => Literal(s.toDouble).eval(EmptyRow)).toSet), "") + + checkConverted( + InSet(a("datecol", DateType), + Range(1, 20).map(d => Literal(d, DateType).eval(EmptyRow)).toSet), + "(datecol >= 1970-01-02 and datecol <= 1970-01-20)") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index 81186909bb167..ab83f751f1425 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.client +import java.sql.Date + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -28,7 +30,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{BooleanType, DateType, IntegerType, LongType, StringType, StructType} import org.apache.spark.util.Utils class HivePartitionFilteringSuite(version: String) @@ -38,15 +41,16 @@ class HivePartitionFilteringSuite(version: String) private val testPartitionCount = 3 * 5 * 4 - private def init(tryDirectSql: Boolean): HiveClient = { - val storageFormat = CatalogStorageFormat( - locationUri = None, - inputFormat = None, - outputFormat = None, - serde = None, - compressed = false, - properties = Map.empty) + private val storageFormat = CatalogStorageFormat( + locationUri = None, + inputFormat = Some(classOf[TextInputFormat].getName), + outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName), + serde = Some(classOf[LazySimpleSerDe].getName()), + compressed = false, + properties = Map.empty + ) + private def init(tryDirectSql: Boolean): HiveClient = { val hadoopConf = new Configuration() hadoopConf.setBoolean(tryDirectSqlKey, tryDirectSql) hadoopConf.set("hive.metastore.warehouse.dir", Utils.createTempDir().toURI().toString()) @@ -58,14 +62,7 @@ class HivePartitionFilteringSuite(version: String) tableType = CatalogTableType.MANAGED, schema = tableSchema, partitionColumnNames = Seq("ds", "h", "chunk"), - storage = CatalogStorageFormat( - locationUri = None, - inputFormat = Some(classOf[TextInputFormat].getName), - outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName), - serde = Some(classOf[LazySimpleSerDe].getName()), - compressed = false, - properties = Map.empty - )) + storage = storageFormat) client.createTable(table, ignoreIfExists = false) val partitions = @@ -102,7 +99,7 @@ class HivePartitionFilteringSuite(version: String) test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") { val client = init(false) val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), - Seq(attr("ds") === 20170101)) + Seq(attr("ds") === 20170101), SQLConf.get.sessionLocalTimeZone) assert(filteredPartitions.size == testPartitionCount) } @@ -297,6 +294,63 @@ class HivePartitionFilteringSuite(version: String) day :: Nil) } + test("getPartitionsByFilter: date type pruning by metastore") { + val table = CatalogTable( + identifier = TableIdentifier("test_date", Some("default")), + tableType = CatalogTableType.MANAGED, + schema = new StructType().add("value", "int").add("part", "date"), + partitionColumnNames = Seq("part"), + storage = storageFormat) + client.createTable(table, ignoreIfExists = false) + + val partitions = + for { + date <- Seq("2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04") + } yield CatalogTablePartition(Map( + "part" -> date + ), storageFormat) + assert(partitions.size == 4) + + client.createPartitions("default", "test_date", partitions, ignoreIfExists = false) + + def testDataTypeFiltering( + filterExprs: Seq[Expression], + expectedPartitionCubes: Seq[Seq[Date]]): Unit = { + val filteredPartitions = client.getPartitionsByFilter( + client.getTable("default", "test_date"), + filterExprs, + SQLConf.get.sessionLocalTimeZone) + + val expectedPartitions = expectedPartitionCubes.map { + expectedDt => + for { + dt <- expectedDt + } yield Set( + "part" -> dt.toString + ) + }.reduce(_ ++ _) + + assert(filteredPartitions.map(_.spec.toSet).toSet == expectedPartitions.toSet) + } + + val dateAttr: Attribute = AttributeReference("part", DateType)() + + testDataTypeFiltering( + Seq(dateAttr === Date.valueOf("2019-01-01")), + Seq("2019-01-01").map(Date.valueOf) :: Nil) + testDataTypeFiltering( + Seq(dateAttr > Date.valueOf("2019-01-02")), + Seq("2019-01-03", "2019-01-04").map(Date.valueOf) :: Nil) + testDataTypeFiltering( + Seq(In(dateAttr, + Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d))))), + Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil) + testDataTypeFiltering( + Seq(InSet(dateAttr, + Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow)))), + Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil) + } + private def testMetastorePartitionFiltering( filterExpr: Expression, expectedDs: Seq[Int], @@ -333,7 +387,7 @@ class HivePartitionFilteringSuite(version: String) val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), Seq( transform(filterExpr) - )) + ), SQLConf.get.sessionLocalTimeZone) val expectedPartitionCount = expectedPartitionCubes.map { case (expectedDs, expectedH, expectedChunks) => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index c5c92ddad9014..d9ba6dd80e4ef 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -488,7 +488,8 @@ class VersionsSuite extends SparkFunSuite with Logging { test(s"$version: getPartitionsByFilter") { // Only one partition [1, 1] for key2 == 1 val result = client.getPartitionsByFilter(client.getTable("default", "src_part"), - Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1)))) + Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1))), + versionSpark.conf.sessionLocalTimeZone) // Hive 0.12 doesn't support getPartitionsByFilter, it ignores the filter condition. if (version != "0.12") { From 19f3b89d62932fef96e72095164920deb64ea647 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 25 Nov 2020 08:59:31 +0000 Subject: [PATCH 0569/1009] [SPARK-33549][SQL] Remove configuration spark.sql.legacy.allowCastNumericToTimestamp ### What changes were proposed in this pull request? Remove SQL configuration spark.sql.legacy.allowCastNumericToTimestamp ### Why are the changes needed? In the current master branch, there is a new configuration `spark.sql.legacy.allowCastNumericToTimestamp` which controls whether to cast Numeric types to Timestamp or not. The default value is true. After https://github.com/apache/spark/pull/30260, the type conversion between Timestamp type and Numeric type is disallowed in ANSI mode. So, we don't need to a separate configuration `spark.sql.legacy.allowCastNumericToTimestamp` for disallowing the conversion. Users just need to set `spark.sql.ansi.enabled` for the behavior. As the configuration is not in any released yet, we should remove the configuration to make things simpler. ### Does this PR introduce _any_ user-facing change? No, since the configuration is not released yet. ### How was this patch tested? Existing test cases Closes #30493 from gengliangwang/LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/expressions/Cast.scala | 13 ++----------- .../org/apache/spark/sql/internal/SQLConf.scala | 12 ------------ .../spark/sql/catalyst/expressions/CastSuite.scala | 14 -------------- .../hive/execution/HiveCompatibilitySuite.scala | 6 ------ 4 files changed, 2 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 5afc308e52ead..e5f11b5e74916 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -59,8 +59,7 @@ object Cast { case (StringType, TimestampType) => true case (BooleanType, TimestampType) => true case (DateType, TimestampType) => true - case (_: NumericType, TimestampType) => - SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP) + case (_: NumericType, TimestampType) => true case (StringType, DateType) => true case (TimestampType, DateType) => true @@ -273,15 +272,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( - if (child.dataType.isInstanceOf[NumericType] && dataType.isInstanceOf[TimestampType]) { - s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}," + - "you can enable the casting by setting " + - s"${SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key} to true," + - "but we strongly recommend using function " + - "TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead." - } else { - s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}" - }) + s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ef974dc176e51..0738478888aeb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2824,15 +2824,6 @@ object SQLConf { .checkValue(_ > 0, "The timeout value must be positive") .createWithDefault(10L) - val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP = - buildConf("spark.sql.legacy.allowCastNumericToTimestamp") - .internal() - .doc("When true, allow casting numeric to timestamp," + - "when false, forbid the cast, more details in SPARK-31710") - .version("3.1.0") - .booleanConf - .createWithDefault(true) - val COALESCE_BUCKETS_IN_JOIN_ENABLED = buildConf("spark.sql.bucketing.coalesceBucketsInJoin.enabled") .doc("When true, if two bucketed tables with the different number of buckets are joined, " + @@ -3550,9 +3541,6 @@ class SQLConf extends Serializable with Logging { def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID) - def legacyAllowCastNumericToTimestamp: Boolean = - getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP) - def metadataCacheTTL: Long = getConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS) def coalesceBucketsInJoinEnabled: Boolean = getConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index afb76d8a5a68c..2bc27ad35efff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1311,20 +1311,6 @@ class CastSuite extends CastSuiteBase { } } - test("SPARK-31710: fail casting from numeric to timestamp if it is forbidden") { - Seq(true, false).foreach { enable => - withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> enable.toString) { - assert(cast(2.toByte, TimestampType).resolved == enable) - assert(cast(10.toShort, TimestampType).resolved == enable) - assert(cast(3, TimestampType).resolved == enable) - assert(cast(10L, TimestampType).resolved == enable) - assert(cast(Decimal(1.2), TimestampType).resolved == enable) - assert(cast(1.7f, TimestampType).resolved == enable) - assert(cast(2.3d, TimestampType).resolved == enable) - } - } - } - test("SPARK-32828: cast from a derived user-defined type to a base type") { val v = Literal.create(Row(1), new ExampleSubTypeUDT()) checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1)) diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index d9b6bb43c2b47..462206d8c546f 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -40,8 +40,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone - private val originalLegacyAllowCastNumericToTimestamp = - TestHive.conf.legacyAllowCastNumericToTimestamp def testCases: Seq[(String, File)] = { hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) @@ -61,8 +59,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests // (timestamp_*) TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, "America/Los_Angeles") - // Ensures that cast numeric to timestamp enabled so that we can test them - TestHive.setConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP, true) RuleExecutor.resetMetrics() } @@ -73,8 +69,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone) - TestHive.setConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP, - originalLegacyAllowCastNumericToTimestamp) // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) From 2c5cc36e3f59011009c3c6083e0d0c1c81857cbd Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 25 Nov 2020 12:41:53 +0000 Subject: [PATCH 0570/1009] [SPARK-33509][SQL] List partition by names from a V2 table which supports partition management ### What changes were proposed in this pull request? 1. Add new method `listPartitionByNames` to the `SupportsPartitionManagement` interface. It allows to list partitions by partition names and their values. 2. Implement new method in `InMemoryPartitionTable` which is used in DSv2 tests. ### Why are the changes needed? Currently, the `SupportsPartitionManagement` interface exposes only `listPartitionIdentifiers` which allows to list partitions by partition values. And it requires to specify all values for partition schema fields in the prefix. This restriction does not allow to list partitions by some of partition names (not all of them). For example, the table `tableA` is partitioned by two column `year` and `month` ``` CREATE TABLE tableA (price int, year int, month int) USING _ partitioned by (year, month) ``` and has the following partitions: ``` PARTITION(year = 2015, month = 1) PARTITION(year = 2015, month = 2) PARTITION(year = 2016, month = 2) PARTITION(year = 2016, month = 3) ``` If we want to list all partitions with `month = 2`, we have to specify `year` for **listPartitionIdentifiers()** which not always possible as we don't know all `year` values in advance. New method **listPartitionByNames()** allows to specify partition values only for `month`, and get two partitions: ``` PARTITION(year = 2015, month = 2) PARTITION(year = 2016, month = 2) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the affected test suite `SupportsPartitionManagementSuite`. Closes #30452 from MaxGekk/column-names-listPartitionIdentifiers. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../catalog/SupportsPartitionManagement.java | 11 ++++- .../connector/InMemoryPartitionTable.scala | 22 ++++++++++ .../SupportsPartitionManagementSuite.scala | 43 ++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index 446ea1463309f..380717d2e0e9b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -106,10 +106,19 @@ Map loadPartitionMetadata(InternalRow ident) throws UnsupportedOperationException; /** - * List the identifiers of all partitions that contains the ident in a table. + * List the identifiers of all partitions that have the ident prefix in a table. * * @param ident a prefix of partition identifier * @return an array of Identifiers for the partitions */ InternalRow[] listPartitionIdentifiers(InternalRow ident); + + /** + * List the identifiers of all partitions that match to the ident by names. + * + * @param names the names of partition values in the identifier. + * @param ident a partition identifier values. + * @return an array of Identifiers for the partitions + */ + InternalRow[] listPartitionByNames(String[] names, InternalRow ident); } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index 23987e909aa70..ba762a58b1e52 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -24,6 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException} +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.types.StructType @@ -96,4 +97,25 @@ class InMemoryPartitionTable( override protected def addPartitionKey(key: Seq[Any]): Unit = { memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava) } + + override def listPartitionByNames( + names: Array[String], + ident: InternalRow): Array[InternalRow] = { + assert(names.length == ident.numFields, + s"Number of partition names (${names.length}) must be equal to " + + s"the number of partition values (${ident.numFields}).") + val schema = partitionSchema + assert(names.forall(fieldName => schema.fieldNames.contains(fieldName)), + s"Some partition names ${names.mkString("[", ", ", "]")} don't belong to " + + s"the partition schema '${schema.sql}'.") + val indexes = names.map(schema.fieldIndex) + val dataTypes = names.map(schema(_).dataType) + val currentRow = new GenericInternalRow(new Array[Any](names.length)) + memoryTablePartitions.keySet().asScala.filter { key => + for (i <- 0 until names.length) { + currentRow.values(i) = key.get(indexes(i), dataTypes(i)) + } + currentRow == ident + }.toArray + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala index e8e28e3422f27..caf7e91612563 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryTableCatalog} +import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -140,4 +140,45 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { partTable.dropPartition(partIdent1) assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) } + + test("listPartitionByNames") { + val partCatalog = new InMemoryPartitionTableCatalog + partCatalog.initialize("test", CaseInsensitiveStringMap.empty()) + val table = partCatalog.createTable( + ident, + new StructType() + .add("col0", IntegerType) + .add("part0", IntegerType) + .add("part1", StringType), + Array(LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))), + util.Collections.emptyMap[String, String]) + val partTable = table.asInstanceOf[InMemoryPartitionTable] + + Seq( + InternalRow(0, "abc"), + InternalRow(0, "def"), + InternalRow(1, "abc")).foreach { partIdent => + partTable.createPartition(partIdent, new util.HashMap[String, String]()) + } + + Seq( + (Array("part0", "part1"), InternalRow(0, "abc")) -> Set(InternalRow(0, "abc")), + (Array("part0"), InternalRow(0)) -> Set(InternalRow(0, "abc"), InternalRow(0, "def")), + (Array("part1"), InternalRow("abc")) -> Set(InternalRow(0, "abc"), InternalRow(1, "abc")), + (Array.empty[String], InternalRow.empty) -> + Set(InternalRow(0, "abc"), InternalRow(0, "def"), InternalRow(1, "abc")), + (Array("part0", "part1"), InternalRow(3, "xyz")) -> Set(), + (Array("part1"), InternalRow(3.14f)) -> Set() + ).foreach { case ((names, idents), expected) => + assert(partTable.listPartitionByNames(names, idents).toSet === expected) + } + // Check invalid parameters + Seq( + (Array("part0", "part1"), InternalRow(0)), + (Array("col0", "part1"), InternalRow(0, 1)), + (Array("wrong"), InternalRow("invalid")) + ).foreach { case (names, idents) => + intercept[AssertionError](partTable.listPartitionByNames(names, idents)) + } + } } From 7c59aeeef4c571838bd291079f9b804d6f546487 Mon Sep 17 00:00:00 2001 From: duripeng Date: Wed, 25 Nov 2020 12:50:21 +0000 Subject: [PATCH 0571/1009] [SPARK-27194][SPARK-29302][SQL] Fix commit collision in dynamic partition overwrite mode ### What changes were proposed in this pull request? When using dynamic partition overwrite, each task has its working dir under staging dir like `stagingDir/.spark-staging-{jobId}`, each task commits to `outputPath/.spark-staging-{jobId}/{partitionId}/part-{taskId}-{jobId}{ext}`. When speculation enable, multiple task attempts would be setup for one task, **they have same task id and they would commit to same file concurrently**. Due to host done or node preemption, the partly-committed files aren't cleaned up, a FileAlreadyExistsException would be raised in this situation, resulting in job failure. I don't try to change task commit process for dynamic partition overwrite, like adding attempt id to task working dir for each attempts and committing to final output dir via a new outputCommitCoordinator, here is reason: 1. `FileOutputCommitter` already has commit coordinator for each task attempts, we can leverage it rather than build a new one. 2. To say the least, we implement a coordinator solving task attempts commit conflict, suppose a severe case, application master failover, tasks with same attempt id and same task id would commit to same files, the `FileAlreadyExistsException` risk still exists In this pr, I leverage FileOutputCommitter to solve the problem: 1. when initing a write job description, set `outputPath/.spark-staging-{jobId}` as the output dir 2. each task attempt writes output to `outputPath/.spark-staging-{jobId}/_temporary/${appAttemptId}/_temporary/${taskAttemptId}/{partitionId}/part-{taskId}-{jobId}{ext}` 3. leverage `FileOutputCommitter` coordinator, write job firstly commits output to `outputPath/.spark-staging-{jobId}/{partitionId}` 4. for dynamic partition overwrite, write job finally move `outputPath/.spark-staging-{jobId}/{partitionId}` to `outputPath/{partitionId}` ### Why are the changes needed? Without this pr, dynamic partition overwrite would fail ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? added UT. Closes #29000 from WinkerDu/master-fix-dynamic-partition-multi-commit. Authored-by: duripeng Signed-off-by: Wenchen Fan --- .../internal/io/FileCommitProtocol.scala | 4 ++ .../io/HadoopMapReduceCommitProtocol.scala | 41 +++++++++++----- .../InsertIntoHadoopFsRelationCommand.scala | 14 +++++- .../SQLHadoopMapReduceCommitProtocol.scala | 3 +- .../sql/sources/PartitionedWriteSuite.scala | 47 ++++++++++++++++++- 5 files changed, 92 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala index 0746e43babf9a..d9d7b06cdb8ce 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala @@ -169,4 +169,8 @@ object FileCommitProtocol extends Logging { ctor.newInstance(jobId, outputPath) } } + + def getStagingDir(path: String, jobId: String): Path = { + new Path(path, ".spark-staging-" + jobId) + } } diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala index 11ce608f52ee2..30f9a650a69c9 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala @@ -41,13 +41,28 @@ import org.apache.spark.mapred.SparkHadoopMapRedUtil * @param jobId the job's or stage's id * @param path the job's output path, or null if committer acts as a noop * @param dynamicPartitionOverwrite If true, Spark will overwrite partition directories at runtime - * dynamically, i.e., we first write files under a staging - * directory with partition path, e.g. - * /path/to/staging/a=1/b=1/xxx.parquet. When committing the job, - * we first clean up the corresponding partition directories at - * destination path, e.g. /path/to/destination/a=1/b=1, and move - * files from staging directory to the corresponding partition - * directories under destination path. + * dynamically. Suppose final path is /path/to/outputPath, output + * path of [[FileOutputCommitter]] is an intermediate path, e.g. + * /path/to/outputPath/.spark-staging-{jobId}, which is a staging + * directory. Task attempts firstly write files under the + * intermediate path, e.g. + * /path/to/outputPath/.spark-staging-{jobId}/_temporary/ + * {appAttemptId}/_temporary/{taskAttemptId}/a=1/b=1/xxx.parquet. + * + * 1. When [[FileOutputCommitter]] algorithm version set to 1, + * we firstly move task attempt output files to + * /path/to/outputPath/.spark-staging-{jobId}/_temporary/ + * {appAttemptId}/{taskId}/a=1/b=1, + * then move them to + * /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1. + * 2. When [[FileOutputCommitter]] algorithm version set to 2, + * committing tasks directly move task attempt output files to + * /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1. + * + * At the end of committing job, we move output files from + * intermediate path to final path, e.g., move files from + * /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1 + * to /path/to/outputPath/a=1/b=1 */ class HadoopMapReduceCommitProtocol( jobId: String, @@ -89,7 +104,7 @@ class HadoopMapReduceCommitProtocol( * The staging directory of this write job. Spark uses it to deal with files with absolute output * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true. */ - private def stagingDir = new Path(path, ".spark-staging-" + jobId) + protected def stagingDir = getStagingDir(path, jobId) protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = { val format = context.getOutputFormatClass.getConstructor().newInstance() @@ -106,13 +121,13 @@ class HadoopMapReduceCommitProtocol( val filename = getFilename(taskContext, ext) val stagingDir: Path = committer match { - case _ if dynamicPartitionOverwrite => - assert(dir.isDefined, - "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.") - partitionPaths += dir.get - this.stagingDir // For FileOutputCommitter it has its own staging path called "work path". case f: FileOutputCommitter => + if (dynamicPartitionOverwrite) { + assert(dir.isDefined, + "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.") + partitionPaths += dir.get + } new Path(Option(f.getWorkPath).map(_.toString).getOrElse(path)) case _ => new Path(path) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index fe733f4238e1a..db7264d0c6ec8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -106,9 +106,10 @@ case class InsertIntoHadoopFsRelationCommand( fs, catalogTable.get, qualifiedOutputPath, matchingPartitions) } + val jobId = java.util.UUID.randomUUID().toString val committer = FileCommitProtocol.instantiate( sparkSession.sessionState.conf.fileCommitProtocolClass, - jobId = java.util.UUID.randomUUID().toString, + jobId = jobId, outputPath = outputPath.toString, dynamicPartitionOverwrite = dynamicPartitionOverwrite) @@ -163,6 +164,15 @@ case class InsertIntoHadoopFsRelationCommand( } } + // For dynamic partition overwrite, FileOutputCommitter's output path is staging path, files + // will be renamed from staging path to final output path during commit job + val committerOutputPath = if (dynamicPartitionOverwrite) { + FileCommitProtocol.getStagingDir(outputPath.toString, jobId) + .makeQualified(fs.getUri, fs.getWorkingDirectory) + } else { + qualifiedOutputPath + } + val updatedPartitionPaths = FileFormatWriter.write( sparkSession = sparkSession, @@ -170,7 +180,7 @@ case class InsertIntoHadoopFsRelationCommand( fileFormat = fileFormat, committer = committer, outputSpec = FileFormatWriter.OutputSpec( - qualifiedOutputPath.toString, customPartitionLocations, outputColumns), + committerOutputPath.toString, customPartitionLocations, outputColumns), hadoopConf = hadoopConf, partitionColumns = partitionColumns, bucketSpec = bucketSpec, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala index 39c594a9bc618..144be2316f091 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala @@ -55,7 +55,8 @@ class SQLHadoopMapReduceCommitProtocol( // The specified output committer is a FileOutputCommitter. // So, we will use the FileOutputCommitter-specified constructor. val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext]) - committer = ctor.newInstance(new Path(path), context) + val committerOutputPath = if (dynamicPartitionOverwrite) stagingDir else new Path(path) + committer = ctor.newInstance(committerOutputPath, context) } else { // The specified output committer is just an OutputCommitter. // So, we will use the no-argument constructor. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala index 6df1c5db14c26..52825a155e46a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala @@ -20,7 +20,8 @@ package org.apache.spark.sql.sources import java.io.File import java.sql.Timestamp -import org.apache.hadoop.mapreduce.TaskAttemptContext +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext} import org.apache.spark.TestUtils import org.apache.spark.internal.Logging @@ -164,4 +165,48 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession { assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`;")) } } + + test("SPARK-27194 SPARK-29302: Fix commit collision in dynamic partition overwrite mode") { + withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> + SQLConf.PartitionOverwriteMode.DYNAMIC.toString, + SQLConf.FILE_COMMIT_PROTOCOL_CLASS.key -> + classOf[PartitionFileExistCommitProtocol].getName) { + withTempDir { d => + withTable("t") { + sql( + s""" + | create table t(c1 int, p1 int) using parquet partitioned by (p1) + | location '${d.getAbsolutePath}' + """.stripMargin) + + val df = Seq((1, 2)).toDF("c1", "p1") + df.write + .partitionBy("p1") + .mode("overwrite") + .saveAsTable("t") + checkAnswer(sql("select * from t"), df) + } + } + } + } +} + +/** + * A file commit protocol with pre-created partition file. when try to overwrite partition dir + * in dynamic partition mode, FileAlreadyExist exception would raise without SPARK-27194 + */ +private class PartitionFileExistCommitProtocol( + jobId: String, + path: String, + dynamicPartitionOverwrite: Boolean) + extends SQLHadoopMapReduceCommitProtocol(jobId, path, dynamicPartitionOverwrite) { + override def setupJob(jobContext: JobContext): Unit = { + super.setupJob(jobContext) + val stagingDir = new File(new Path(path).toUri.getPath, s".spark-staging-$jobId") + stagingDir.mkdirs() + val stagingPartDir = new File(stagingDir, "p1=2") + stagingPartDir.mkdirs() + val conflictTaskFile = new File(stagingPartDir, s"part-00000-$jobId.c000.snappy.parquet") + conflictTaskFile.createNewFile() + } } From 6f68ccf532ec3fdd7224ba05c52bce58372572e9 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 25 Nov 2020 15:09:02 +0000 Subject: [PATCH 0572/1009] [SPARK-31257][SPARK-33561][SQL] Unify create table syntax ### What changes were proposed in this pull request? * Unify the create table syntax in the parser by merging Hive and DataSource clauses * Add `SerdeInfo` and `external` boolean to statement plans and update AstBuilder to produce them * Add conversion from create statement plan to v1 create plans in ResolveSessionCatalog * Support new statement clauses in ResolveCatalogs conversion to v2 create plans * Remove SparkSqlParser rules for Hive syntax * Add "option." namespace to distinguish SERDEPROPERTIES and OPTIONS in table properties ### Why are the changes needed? * Current behavior is confusing. * A way to pass the Hive create options to DSv2 is needed for a Hive source. ### Does this PR introduce any user-facing change? Not by default, but v2 sources will be able to handle STORED AS and other Hive clauses. ### How was this patch tested? Existing tests validate there are no behavior changes. Update unit tests for using a statement plan for Hive create syntax: * Move create tests from spark-sql DDLParserSuite into PlanResolutionSuite * Add parser tests to spark-catalyst DDLParserSuite Closes #28026 from rdblue/unify-create-table. Lead-authored-by: Ryan Blue Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 29 +- .../sql/connector/catalog/TableCatalog.java | 10 + .../catalyst/analysis/ResolveCatalogs.scala | 16 +- .../sql/catalyst/parser/AstBuilder.scala | 331 +++++++-- .../catalyst/plans/logical/statements.scala | 81 +++ .../sql/connector/catalog/CatalogV2Util.scala | 55 +- .../sql/catalyst/parser/DDLParserSuite.scala | 348 ++++++++- .../apache/spark/sql/DataFrameWriter.scala | 5 +- .../apache/spark/sql/DataFrameWriterV2.scala | 5 +- .../analysis/ResolveSessionCatalog.scala | 111 ++- .../spark/sql/execution/SparkSqlParser.scala | 394 ++--------- .../datasources/v2/V2SessionCatalog.scala | 8 +- .../sql/connector/DataSourceV2SQLSuite.scala | 4 +- .../sql/execution/SparkSqlParserSuite.scala | 129 +--- .../execution/command/DDLParserSuite.scala | 524 +------------- .../command/PlanResolutionSuite.scala | 660 +++++++++++++++++- .../sources/CreateTableAsSelectSuite.scala | 4 +- .../sql/hive/execution/HiveDDLSuite.scala | 24 +- .../sql/hive/execution/HiveSerDeSuite.scala | 7 +- .../sql/hive/execution/SQLQuerySuite.scala | 3 +- 20 files changed, 1626 insertions(+), 1122 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 6b6b751cc3c15..5d17028c32ae2 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -119,20 +119,9 @@ statement (RESTRICT | CASCADE)? #dropNamespace | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)? (LIKE? pattern=STRING)? #showNamespaces - | createTableHeader ('(' colTypeList ')')? tableProvider + | createTableHeader ('(' colTypeList ')')? tableProvider? createTableClauses (AS? query)? #createTable - | createTableHeader ('(' columns=colTypeList ')')? - (commentSpec | - (PARTITIONED BY '(' partitionColumns=colTypeList ')' | - PARTITIONED BY partitionColumnNames=identifierList) | - bucketSpec | - skewSpec | - rowFormat | - createFileFormat | - locationSpec | - (TBLPROPERTIES tableProps=tablePropertyList))* - (AS? query)? #createHiveTable | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier LIKE source=tableIdentifier (tableProvider | @@ -140,7 +129,7 @@ statement createFileFormat | locationSpec | (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike - | replaceTableHeader ('(' colTypeList ')')? tableProvider + | replaceTableHeader ('(' colTypeList ')')? tableProvider? createTableClauses (AS? query)? #replaceTable | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS @@ -393,8 +382,11 @@ tableProvider createTableClauses :((OPTIONS options=tablePropertyList) | - (PARTITIONED BY partitioning=transformList) | + (PARTITIONED BY partitioning=partitionFieldList) | + skewSpec | bucketSpec | + rowFormat | + createFileFormat | locationSpec | commentSpec | (TBLPROPERTIES tableProps=tablePropertyList))* @@ -741,8 +733,13 @@ namedExpressionSeq : namedExpression (',' namedExpression)* ; -transformList - : '(' transforms+=transform (',' transforms+=transform)* ')' +partitionFieldList + : '(' fields+=partitionField (',' fields+=partitionField)* ')' + ; + +partitionField + : transform #partitionTransform + | colType #partitionColumn ; transform diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 92079d127b1e3..52a74ab9dd9f5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -46,6 +46,11 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_LOCATION = "location"; + /** + * A reserved property to specify a table was created with EXTERNAL. + */ + String PROP_EXTERNAL = "external"; + /** * A reserved property to specify the description of the table. */ @@ -61,6 +66,11 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_OWNER = "owner"; + /** + * A prefix used to pass OPTIONS in table properties + */ + String OPTION_PREFIX = "option."; + /** * List the tables in a namespace from the catalog. *

    diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index deeb8215d22c6..7354d2478b7c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -143,7 +143,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager) RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier) case c @ CreateTableStatement( - NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => + NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) assertNoCharTypeInSchema(c.tableSchema) CreateV2Table( @@ -152,11 +152,11 @@ class ResolveCatalogs(val catalogManager: CatalogManager) c.tableSchema, // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), - convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider), + convertTableProperties(c), ignoreIfExists = c.ifNotExists) case c @ CreateTableAsSelectStatement( - NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => + NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _, _) => if (c.asSelect.resolved) { assertNoNullTypeInSchema(c.asSelect.schema) } @@ -166,12 +166,12 @@ class ResolveCatalogs(val catalogManager: CatalogManager) // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), c.asSelect, - convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider), + convertTableProperties(c), writeOptions = c.writeOptions, ignoreIfExists = c.ifNotExists) case c @ ReplaceTableStatement( - NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => + NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) assertNoCharTypeInSchema(c.tableSchema) ReplaceTable( @@ -180,11 +180,11 @@ class ResolveCatalogs(val catalogManager: CatalogManager) c.tableSchema, // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), - convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider), + convertTableProperties(c), orCreate = c.orCreate) case c @ ReplaceTableAsSelectStatement( - NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => + NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => if (c.asSelect.resolved) { assertNoNullTypeInSchema(c.asSelect.schema) } @@ -194,7 +194,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager) // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), c.asSelect, - convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider), + convertTableProperties(c), writeOptions = c.writeOptions, orCreate = c.orCreate) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5f8394c525949..25423e510157a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2459,10 +2459,22 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg /** * Type to keep track of table clauses: - * (partitioning, bucketSpec, properties, options, location, comment). + * - partition transforms + * - partition columns + * - bucketSpec + * - properties + * - options + * - location + * - comment + * - serde + * + * Note: Partition transforms are based on existing table schema definition. It can be simple + * column names, or functions like `year(date_col)`. Partition columns are column names with data + * types like `i INT`, which should be appended to the existing table schema. */ - type TableClauses = (Seq[Transform], Option[BucketSpec], Map[String, String], - Map[String, String], Option[String], Option[String]) + type TableClauses = ( + Seq[Transform], Seq[StructField], Option[BucketSpec], Map[String, String], + Map[String, String], Option[String], Option[String], Option[SerdeInfo]) /** * Validate a create table statement and return the [[TableIdentifier]]. @@ -2495,9 +2507,22 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Parse a list of transforms. + * Parse a list of transforms or columns. */ - override def visitTransformList(ctx: TransformListContext): Seq[Transform] = withOrigin(ctx) { + override def visitPartitionFieldList( + ctx: PartitionFieldListContext): (Seq[Transform], Seq[StructField]) = withOrigin(ctx) { + val (transforms, columns) = ctx.fields.asScala.map { + case transform: PartitionTransformContext => + (Some(visitPartitionTransform(transform)), None) + case field: PartitionColumnContext => + (None, Some(visitColType(field.colType))) + }.unzip + + (transforms.flatten.toSeq, columns.flatten.toSeq) + } + + override def visitPartitionTransform( + ctx: PartitionTransformContext): Transform = withOrigin(ctx) { def getFieldReference( ctx: ApplyTransformContext, arg: V2Expression): FieldReference = { @@ -2524,7 +2549,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } } - ctx.transforms.asScala.map { + ctx.transform match { case identityCtx: IdentityTransformContext => IdentityTransform(FieldReference(typedVisit[Seq[String]](identityCtx.qualifiedName))) @@ -2563,7 +2588,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case name => ApplyTransform(name, arguments) } - }.toSeq + } } /** @@ -2763,16 +2788,157 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg (filtered, path) } + /** + * Create a [[SerdeInfo]] for creating tables. + * + * Format: STORED AS (name | INPUTFORMAT input_format OUTPUTFORMAT output_format) + */ + override def visitCreateFileFormat(ctx: CreateFileFormatContext): SerdeInfo = withOrigin(ctx) { + (ctx.fileFormat, ctx.storageHandler) match { + // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format + case (c: TableFileFormatContext, null) => + SerdeInfo(formatClasses = Some(FormatClasses(string(c.inFmt), string(c.outFmt)))) + // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO + case (c: GenericFileFormatContext, null) => + SerdeInfo(storedAs = Some(c.identifier.getText)) + case (null, storageHandler) => + operationNotAllowed("STORED BY", ctx) + case _ => + throw new ParseException("Expected either STORED AS or STORED BY, not both", ctx) + } + } + + /** + * Create a [[SerdeInfo]] used for creating tables. + * + * Example format: + * {{{ + * SERDE serde_name [WITH SERDEPROPERTIES (k1=v1, k2=v2, ...)] + * }}} + * + * OR + * + * {{{ + * DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] + * [COLLECTION ITEMS TERMINATED BY char] + * [MAP KEYS TERMINATED BY char] + * [LINES TERMINATED BY char] + * [NULL DEFINED AS char] + * }}} + */ + def visitRowFormat(ctx: RowFormatContext): SerdeInfo = withOrigin(ctx) { + ctx match { + case serde: RowFormatSerdeContext => visitRowFormatSerde(serde) + case delimited: RowFormatDelimitedContext => visitRowFormatDelimited(delimited) + } + } + + /** + * Create SERDE row format name and properties pair. + */ + override def visitRowFormatSerde(ctx: RowFormatSerdeContext): SerdeInfo = withOrigin(ctx) { + import ctx._ + SerdeInfo( + serde = Some(string(name)), + serdeProperties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)) + } + + /** + * Create a delimited row format properties object. + */ + override def visitRowFormatDelimited( + ctx: RowFormatDelimitedContext): SerdeInfo = withOrigin(ctx) { + // Collect the entries if any. + def entry(key: String, value: Token): Seq[(String, String)] = { + Option(value).toSeq.map(x => key -> string(x)) + } + // TODO we need proper support for the NULL format. + val entries = + entry("field.delim", ctx.fieldsTerminatedBy) ++ + entry("serialization.format", ctx.fieldsTerminatedBy) ++ + entry("escape.delim", ctx.escapedBy) ++ + // The following typo is inherited from Hive... + entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++ + entry("mapkey.delim", ctx.keysTerminatedBy) ++ + Option(ctx.linesSeparatedBy).toSeq.map { token => + val value = string(token) + validate( + value == "\n", + s"LINES TERMINATED BY only supports newline '\\n' right now: $value", + ctx) + "line.delim" -> value + } + SerdeInfo(serdeProperties = entries.toMap) + } + + /** + * Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT + * and STORED AS. + * + * The following are allowed. Anything else is not: + * ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE] + * ROW FORMAT DELIMITED ... STORED AS TEXTFILE + * ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ... + */ + protected def validateRowFormatFileFormat( + rowFormatCtx: RowFormatContext, + createFileFormatCtx: CreateFileFormatContext, + parentCtx: ParserRuleContext): Unit = { + if (rowFormatCtx == null || createFileFormatCtx == null) { + return + } + (rowFormatCtx, createFileFormatCtx.fileFormat) match { + case (_, ffTable: TableFileFormatContext) => // OK + case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) => + ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { + case ("sequencefile" | "textfile" | "rcfile") => // OK + case fmt => + operationNotAllowed( + s"ROW FORMAT SERDE is incompatible with format '$fmt', which also specifies a serde", + parentCtx) + } + case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) => + ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { + case "textfile" => // OK + case fmt => operationNotAllowed( + s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx) + } + case _ => + // should never happen + def str(ctx: ParserRuleContext): String = { + (0 until ctx.getChildCount).map { i => ctx.getChild(i).getText }.mkString(" ") + } + operationNotAllowed( + s"Unexpected combination of ${str(rowFormatCtx)} and ${str(createFileFormatCtx)}", + parentCtx) + } + } + + protected def validateRowFormatFileFormat( + rowFormatCtx: Seq[RowFormatContext], + createFileFormatCtx: Seq[CreateFileFormatContext], + parentCtx: ParserRuleContext): Unit = { + if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) { + validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx) + } + } + override def visitCreateTableClauses(ctx: CreateTableClausesContext): TableClauses = { checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx) checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx) checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx) + checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx) + checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx) checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx) checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx) checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx) - val partitioning: Seq[Transform] = - Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil) + if (ctx.skewSpec.size > 0) { + operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx) + } + + val (partTransforms, partCols) = + Option(ctx.partitioning).map(visitPartitionFieldList).getOrElse((Nil, Nil)) val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec) val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) val cleanedProperties = cleanTableProperties(ctx, properties) @@ -2780,7 +2946,45 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val location = visitLocationSpecList(ctx.locationSpec()) val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location) val comment = visitCommentSpecList(ctx.commentSpec()) - (partitioning, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment) + val serdeInfo = getSerdeInfo(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx) + (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment, + serdeInfo) + } + + protected def getSerdeInfo( + rowFormatCtx: Seq[RowFormatContext], + createFileFormatCtx: Seq[CreateFileFormatContext], + ctx: ParserRuleContext, + skipCheck: Boolean = false): Option[SerdeInfo] = { + if (!skipCheck) validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx) + val rowFormatSerdeInfo = rowFormatCtx.map(visitRowFormat) + val fileFormatSerdeInfo = createFileFormatCtx.map(visitCreateFileFormat) + (fileFormatSerdeInfo ++ rowFormatSerdeInfo).reduceLeftOption((l, r) => l.merge(r)) + } + + private def partitionExpressions( + partTransforms: Seq[Transform], + partCols: Seq[StructField], + ctx: ParserRuleContext): Seq[Transform] = { + if (partTransforms.nonEmpty) { + if (partCols.nonEmpty) { + val references = partTransforms.map(_.describe()).mkString(", ") + val columns = partCols + .map(field => s"${field.name} ${field.dataType.simpleString}") + .mkString(", ") + operationNotAllowed( + s"""PARTITION BY: Cannot mix partition expressions and partition columns: + |Expressions: $references + |Columns: $columns""".stripMargin, ctx) + + } + partTransforms + } else { + // columns were added to create the schema. convert to column references + partCols.map { column => + IdentityTransform(FieldReference(Seq(column.name))) + } + } } /** @@ -2789,13 +2993,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * Expected format: * {{{ * CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name - * USING table_provider + * [USING table_provider] * create_table_clauses * [[AS] select_statement]; * * create_table_clauses (order insensitive): + * [PARTITIONED BY (partition_fields)] * [OPTIONS table_property_list] - * [PARTITIONED BY (col_name, transform(col_name), transform(constant, col_name), ...)] + * [ROW FORMAT row_format] + * [STORED AS file_format] * [CLUSTERED BY (col_name, col_name, ...) * [SORTED BY (col_name [ASC|DESC], ...)] * INTO num_buckets BUCKETS @@ -2803,40 +3009,55 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * [LOCATION path] * [COMMENT table_comment] * [TBLPROPERTIES (property_name=property_value, ...)] + * + * partition_fields: + * col_name, transform(col_name), transform(constant, col_name), ... | + * col_name data_type [NOT NULL] [COMMENT col_comment], ... * }}} */ override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) - if (external) { - operationNotAllowed("CREATE EXTERNAL TABLE ...", ctx) - } - val schema = Option(ctx.colTypeList()).map(createSchema) + + val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText) - val (partitioning, bucketSpec, properties, options, location, comment) = + val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) = visitCreateTableClauses(ctx.createTableClauses()) - Option(ctx.query).map(plan) match { - case Some(_) if temp => - operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx) + if (provider.isDefined && serdeInfo.isDefined) { + operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx) + } + + if (temp) { + val asSelect = if (ctx.query == null) "" else " AS ..." + operationNotAllowed( + s"CREATE TEMPORARY TABLE ...$asSelect, use CREATE TEMPORARY VIEW instead", ctx) + } - case Some(_) if schema.isDefined => + val partitioning = partitionExpressions(partTransforms, partCols, ctx) + + Option(ctx.query).map(plan) match { + case Some(_) if columns.nonEmpty => operationNotAllowed( "Schema may not be specified in a Create Table As Select (CTAS) statement", ctx) + case Some(_) if partCols.nonEmpty => + // non-reference partition columns are not allowed because schema can't be specified + operationNotAllowed( + "Partition column types may not be specified in Create Table As Select (CTAS)", + ctx) + case Some(query) => CreateTableAsSelectStatement( table, query, partitioning, bucketSpec, properties, provider, options, location, comment, - writeOptions = Map.empty, ifNotExists = ifNotExists) - - case None if temp => - // CREATE TEMPORARY TABLE ... USING ... is not supported by the catalyst parser. - // Use CREATE TEMPORARY VIEW ... USING ... instead. - operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx) + writeOptions = Map.empty, serdeInfo, external = external, ifNotExists = ifNotExists) case _ => - CreateTableStatement(table, schema.getOrElse(new StructType), partitioning, bucketSpec, - properties, provider, options, location, comment, ifNotExists = ifNotExists) + // Note: table schema includes both the table columns list and the partition columns + // with data type. + val schema = StructType(columns ++ partCols) + CreateTableStatement(table, schema, partitioning, bucketSpec, properties, provider, + options, location, comment, serdeInfo, external = external, ifNotExists = ifNotExists) } } @@ -2846,13 +3067,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * Expected format: * {{{ * [CREATE OR] REPLACE TABLE [db_name.]table_name - * USING table_provider + * [USING table_provider] * replace_table_clauses * [[AS] select_statement]; * * replace_table_clauses (order insensitive): * [OPTIONS table_property_list] - * [PARTITIONED BY (col_name, transform(col_name), transform(constant, col_name), ...)] + * [PARTITIONED BY (partition_fields)] * [CLUSTERED BY (col_name, col_name, ...) * [SORTED BY (col_name [ASC|DESC], ...)] * INTO num_buckets BUCKETS @@ -2860,33 +3081,63 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * [LOCATION path] * [COMMENT table_comment] * [TBLPROPERTIES (property_name=property_value, ...)] + * + * partition_fields: + * col_name, transform(col_name), transform(constant, col_name), ... | + * col_name data_type [NOT NULL] [COMMENT col_comment], ... * }}} */ override def visitReplaceTable(ctx: ReplaceTableContext): LogicalPlan = withOrigin(ctx) { - val (table, _, ifNotExists, external) = visitReplaceTableHeader(ctx.replaceTableHeader) + val (table, temp, ifNotExists, external) = visitReplaceTableHeader(ctx.replaceTableHeader) + val orCreate = ctx.replaceTableHeader().CREATE() != null + + if (temp) { + val action = if (orCreate) "CREATE OR REPLACE" else "REPLACE" + operationNotAllowed(s"$action TEMPORARY TABLE ..., use $action TEMPORARY VIEW instead.", ctx) + } + if (external) { - operationNotAllowed("REPLACE EXTERNAL TABLE ... USING", ctx) + operationNotAllowed("REPLACE EXTERNAL TABLE ...", ctx) + } + + if (ifNotExists) { + operationNotAllowed("REPLACE ... IF NOT EXISTS, use CREATE IF NOT EXISTS instead", ctx) } - val (partitioning, bucketSpec, properties, options, location, comment) = + val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) = visitCreateTableClauses(ctx.createTableClauses()) - val schema = Option(ctx.colTypeList()).map(createSchema) + val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText) - val orCreate = ctx.replaceTableHeader().CREATE() != null + + if (provider.isDefined && serdeInfo.isDefined) { + operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx) + } + + val partitioning = partitionExpressions(partTransforms, partCols, ctx) Option(ctx.query).map(plan) match { - case Some(_) if schema.isDefined => + case Some(_) if columns.nonEmpty => operationNotAllowed( "Schema may not be specified in a Replace Table As Select (RTAS) statement", ctx) + case Some(_) if partCols.nonEmpty => + // non-reference partition columns are not allowed because schema can't be specified + operationNotAllowed( + "Partition column types may not be specified in Replace Table As Select (RTAS)", + ctx) + case Some(query) => ReplaceTableAsSelectStatement(table, query, partitioning, bucketSpec, properties, - provider, options, location, comment, writeOptions = Map.empty, orCreate = orCreate) + provider, options, location, comment, writeOptions = Map.empty, serdeInfo, + orCreate = orCreate) case _ => - ReplaceTableStatement(table, schema.getOrElse(new StructType), partitioning, - bucketSpec, properties, provider, options, location, comment, orCreate = orCreate) + // Note: table schema includes both the table columns list and the partition columns + // with data type. + val schema = StructType(columns ++ partCols) + ReplaceTableStatement(table, schema, partitioning, bucketSpec, properties, provider, + options, location, comment, serdeInfo, orCreate = orCreate) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 3660e8a95a7f6..281d57b3648f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -53,6 +53,81 @@ abstract class ParsedStatement extends LogicalPlan { final override lazy val resolved = false } +/** + * Type to keep track of Hive serde info + */ +case class SerdeInfo( + storedAs: Option[String] = None, + formatClasses: Option[FormatClasses] = None, + serde: Option[String] = None, + serdeProperties: Map[String, String] = Map.empty) { + // this uses assertions because validation is done in validateRowFormatFileFormat etc. + assert(storedAs.isEmpty || formatClasses.isEmpty, + "Cannot specify both STORED AS and INPUTFORMAT/OUTPUTFORMAT") + + def describe: String = { + val serdeString = if (serde.isDefined || serdeProperties.nonEmpty) { + "ROW FORMAT " + serde.map(sd => s"SERDE $sd").getOrElse("DELIMITED") + } else { + "" + } + + this match { + case SerdeInfo(Some(storedAs), _, _, _) => + s"STORED AS $storedAs $serdeString" + case SerdeInfo(_, Some(formatClasses), _, _) => + s"STORED AS $formatClasses $serdeString" + case _ => + serdeString + } + } + + def merge(other: SerdeInfo): SerdeInfo = { + def getOnly[T](desc: String, left: Option[T], right: Option[T]): Option[T] = { + (left, right) match { + case (Some(l), Some(r)) => + assert(l == r, s"Conflicting $desc values: $l != $r") + left + case (Some(_), _) => + left + case (_, Some(_)) => + right + case _ => + None + } + } + + SerdeInfo.checkSerdePropMerging(serdeProperties, other.serdeProperties) + SerdeInfo( + getOnly("STORED AS", storedAs, other.storedAs), + getOnly("INPUTFORMAT/OUTPUTFORMAT", formatClasses, other.formatClasses), + getOnly("SERDE", serde, other.serde), + serdeProperties ++ other.serdeProperties) + } +} + +case class FormatClasses(input: String, output: String) { + override def toString: String = s"INPUTFORMAT $input OUTPUTFORMAT $output" +} + +object SerdeInfo { + val empty: SerdeInfo = SerdeInfo(None, None, None, Map.empty) + + def checkSerdePropMerging( + props1: Map[String, String], props2: Map[String, String]): Unit = { + val conflictKeys = props1.keySet.intersect(props2.keySet) + if (conflictKeys.nonEmpty) { + throw new UnsupportedOperationException( + s""" + |Cannot safely merge SERDEPROPERTIES: + |${props1.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")} + |${props2.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")} + |The conflict keys: ${conflictKeys.mkString(", ")} + |""".stripMargin) + } + } +} + /** * A CREATE TABLE command, as parsed from SQL. * @@ -68,6 +143,8 @@ case class CreateTableStatement( options: Map[String, String], location: Option[String], comment: Option[String], + serde: Option[SerdeInfo], + external: Boolean, ifNotExists: Boolean) extends ParsedStatement /** @@ -84,6 +161,8 @@ case class CreateTableAsSelectStatement( location: Option[String], comment: Option[String], writeOptions: Map[String, String], + serde: Option[SerdeInfo], + external: Boolean, ifNotExists: Boolean) extends ParsedStatement { override def children: Seq[LogicalPlan] = Seq(asSelect) @@ -119,6 +198,7 @@ case class ReplaceTableStatement( options: Map[String, String], location: Option[String], comment: Option[String], + serde: Option[SerdeInfo], orCreate: Boolean) extends ParsedStatement /** @@ -135,6 +215,7 @@ case class ReplaceTableAsSelectStatement( location: Option[String], comment: Option[String], writeOptions: Map[String, String], + serde: Option[SerdeInfo], orCreate: Boolean) extends ParsedStatement { override def children: Seq[LogicalPlan] = Seq(asSelect) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 1a3a7207c6ca9..b6dc4f61c8588 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.plans.logical.AlterTable +import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelectStatement, CreateTableStatement, ReplaceTableAsSelectStatement, ReplaceTableStatement, SerdeInfo} import org.apache.spark.sql.connector.catalog.TableChange._ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.types.{ArrayType, DataType, HIVE_TYPE_STRING, HiveStringType, MapType, NullType, StructField, StructType} @@ -295,18 +295,65 @@ private[sql] object CatalogV2Util { catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME) } - def convertTableProperties( + def convertTableProperties(c: CreateTableStatement): Map[String, String] = { + convertTableProperties( + c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external) + } + + def convertTableProperties(c: CreateTableAsSelectStatement): Map[String, String] = { + convertTableProperties( + c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external) + } + + def convertTableProperties(r: ReplaceTableStatement): Map[String, String] = { + convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider) + } + + def convertTableProperties(r: ReplaceTableAsSelectStatement): Map[String, String] = { + convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider) + } + + private def convertTableProperties( properties: Map[String, String], options: Map[String, String], + serdeInfo: Option[SerdeInfo], location: Option[String], comment: Option[String], - provider: Option[String]): Map[String, String] = { - properties ++ options ++ + provider: Option[String], + external: Boolean = false): Map[String, String] = { + properties ++ + options ++ // to make the transition to the "option." prefix easier, add both + options.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value } ++ + convertToProperties(serdeInfo) ++ + (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++ provider.map(TableCatalog.PROP_PROVIDER -> _) ++ comment.map(TableCatalog.PROP_COMMENT -> _) ++ location.map(TableCatalog.PROP_LOCATION -> _) } + /** + * Converts Hive Serde info to table properties. The mapped property keys are: + * - INPUTFORMAT/OUTPUTFORMAT: hive.input/output-format + * - STORED AS: hive.stored-as + * - ROW FORMAT SERDE: hive.serde + * - SERDEPROPERTIES: add "option." prefix + */ + private def convertToProperties(serdeInfo: Option[SerdeInfo]): Map[String, String] = { + serdeInfo match { + case Some(s) => + s.formatClasses.map { f => + Map("hive.input-format" -> f.input, "hive.output-format" -> f.output) + }.getOrElse(Map.empty) ++ + s.storedAs.map("hive.stored-as" -> _) ++ + s.serde.map("hive.serde" -> _) ++ + s.serdeProperties.map { + case (key, value) => TableCatalog.OPTION_PREFIX + key -> value + } + case None => + Map.empty + } + } + def withDefaultOwnership(properties: Map[String, String]): Map[String, String] = { properties ++ Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName()) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index cc3c824befb3e..f650922e75f6e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -63,6 +63,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => @@ -70,7 +71,7 @@ class DDLParserSuite extends AnalysisTest { } intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet", - "no viable alternative at input") + "extraneous input ':'") } test("create/replace table - with IF NOT EXISTS") { @@ -86,6 +87,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None), expectedIfNotExists = true) } @@ -106,6 +108,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) @@ -160,6 +163,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) @@ -182,6 +186,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) @@ -200,7 +205,8 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, - Some("abc")) + Some("abc"), + None) Seq(createSql, replaceSql).foreach{ sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) } @@ -220,6 +226,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) @@ -238,6 +245,7 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], Some("/tmp/file"), + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) @@ -256,19 +264,309 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], None, + None, None) Seq(createSql, replaceSql).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) } } + test("create/replace table - partition column definitions") { + val createSql = "CREATE TABLE my_tab (id bigint) PARTITIONED BY (part string)" + val replaceSql = "REPLACE TABLE my_tab (id bigint) PARTITIONED BY (part string)" + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + None) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - empty columns list") { + val createSql = "CREATE TABLE my_tab PARTITIONED BY (part string)" + val replaceSql = "REPLACE TABLE my_tab PARTITIONED BY (part string)" + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + None) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - using with partition column definitions") { + val createSql = "CREATE TABLE my_tab (id bigint) USING parquet PARTITIONED BY (part string)" + val replaceSql = "REPLACE TABLE my_tab (id bigint) USING parquet PARTITIONED BY (part string)" + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + Some("parquet"), + Map.empty[String, String], + None, + None, + None) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - mixed partition references and column definitions") { + val createSql = "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p1, p2 string)" + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq( + "PARTITION BY: Cannot mix partition expressions and partition columns", + "Expressions: p1", + "Columns: p2 string")) + } + + val createSqlWithExpr = + "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p2 string, truncate(p1, 16))" + val replaceSqlWithExpr = createSqlWithExpr.replaceFirst("CREATE", "REPLACE") + Seq(createSqlWithExpr, replaceSqlWithExpr).foreach { sql => + assertUnsupported(sql, Seq( + "PARTITION BY: Cannot mix partition expressions and partition columns", + "Expressions: truncate(p1, 16)", + "Columns: p2 string")) + } + } + + test("create/replace table - stored as") { + val createSql = + """CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS parquet + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + Some(SerdeInfo(storedAs = Some("parquet")))) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - stored as format with serde") { + Seq("sequencefile", "textfile", "rcfile").foreach { format => + val createSql = + s"""CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS $format + |ROW FORMAT SERDE 'customSerde' + |WITH SERDEPROPERTIES ('prop'='value') + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + Some(SerdeInfo(storedAs = Some(format), serde = Some("customSerde"), serdeProperties = Map( + "prop" -> "value" + )))) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + val createSql = + s"""CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS otherFormat + |ROW FORMAT SERDE 'customSerde' + |WITH SERDEPROPERTIES ('prop'='value') + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq("ROW FORMAT SERDE is incompatible with format 'otherFormat'")) + } + } + + test("create/replace table - stored as format with delimited clauses") { + val createSql = + s"""CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS textfile + |ROW FORMAT DELIMITED + |FIELDS TERMINATED BY ',' ESCAPED BY '\\\\' -- double escape for Scala and for SQL + |COLLECTION ITEMS TERMINATED BY '#' + |MAP KEYS TERMINATED BY '=' + |LINES TERMINATED BY '\\n' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + Some(SerdeInfo(storedAs = Some("textfile"), serdeProperties = Map( + "field.delim" -> ",", "serialization.format" -> ",", "escape.delim" -> "\\", + "colelction.delim" -> "#", "mapkey.delim" -> "=", "line.delim" -> "\n" + )))) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + + val createFailSql = + s"""CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS otherFormat + |ROW FORMAT DELIMITED + |FIELDS TERMINATED BY ',' + """.stripMargin + val replaceFailSql = createFailSql.replaceFirst("CREATE", "REPLACE") + Seq(createFailSql, replaceFailSql).foreach { sql => + assertUnsupported(sql, Seq( + "ROW FORMAT DELIMITED is only compatible with 'textfile', not 'otherFormat'")) + } + } + + test("create/replace table - stored as inputformat/outputformat") { + val createSql = + """CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + Some(SerdeInfo(formatClasses = Some(FormatClasses("inFormat", "outFormat"))))) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - stored as inputformat/outputformat with serde") { + val createSql = + """CREATE TABLE my_tab (id bigint) + |PARTITIONED BY (part string) + |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat' + |ROW FORMAT SERDE 'customSerde' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + val expectedTableSpec = TableSpec( + Seq("my_tab"), + Some(new StructType().add("id", LongType).add("part", StringType)), + Seq(IdentityTransform(FieldReference("part"))), + None, + Map.empty[String, String], + None, + Map.empty[String, String], + None, + None, + Some(SerdeInfo( + formatClasses = Some(FormatClasses("inFormat", "outFormat")), + serde = Some("customSerde")))) + Seq(createSql, replaceSql).foreach { sql => + testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) + } + } + + test("create/replace table - using with stored as") { + val createSql = + """CREATE TABLE my_tab (id bigint, part string) + |USING parquet + |STORED AS parquet + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq("CREATE TABLE ... USING ... STORED AS")) + } + } + + test("create/replace table - using with row format serde") { + val createSql = + """CREATE TABLE my_tab (id bigint, part string) + |USING parquet + |ROW FORMAT SERDE 'customSerde' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT SERDE")) + } + } + + test("create/replace table - using with row format delimited") { + val createSql = + """CREATE TABLE my_tab (id bigint, part string) + |USING parquet + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT DELIMITED")) + } + } + + test("create/replace table - stored by") { + val createSql = + """CREATE TABLE my_tab (id bigint, p1 string) + |STORED BY 'handler' + """.stripMargin + val replaceSql = createSql.replaceFirst("CREATE", "REPLACE") + Seq(createSql, replaceSql).foreach { sql => + assertUnsupported(sql, Seq("stored by")) + } + } + + test("Unsupported skew clause - create/replace table") { + intercept("CREATE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)", + "CREATE TABLE ... SKEWED BY") + intercept("REPLACE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)", + "CREATE TABLE ... SKEWED BY") + } + test("Duplicate clauses - create/replace table") { def createTableHeader(duplicateClause: String): String = { - s"CREATE TABLE my_tab(a INT, b STRING) USING parquet $duplicateClause $duplicateClause" + s"CREATE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause" } def replaceTableHeader(duplicateClause: String): String = { - s"CREATE TABLE my_tab(a INT, b STRING) USING parquet $duplicateClause $duplicateClause" + s"CREATE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause" } intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"), @@ -281,6 +579,14 @@ class DDLParserSuite extends AnalysisTest { "Found duplicate clauses: CLUSTERED BY") intercept(createTableHeader("PARTITIONED BY (b)"), "Found duplicate clauses: PARTITIONED BY") + intercept(createTableHeader("PARTITIONED BY (c int)"), + "Found duplicate clauses: PARTITIONED BY") + intercept(createTableHeader("STORED AS parquet"), + "Found duplicate clauses: STORED AS") + intercept(createTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"), + "Found duplicate clauses: STORED AS") + intercept(createTableHeader("ROW FORMAT SERDE 'serde'"), + "Found duplicate clauses: ROW FORMAT") intercept(replaceTableHeader("TBLPROPERTIES('test' = 'test2')"), "Found duplicate clauses: TBLPROPERTIES") @@ -292,6 +598,14 @@ class DDLParserSuite extends AnalysisTest { "Found duplicate clauses: CLUSTERED BY") intercept(replaceTableHeader("PARTITIONED BY (b)"), "Found duplicate clauses: PARTITIONED BY") + intercept(replaceTableHeader("PARTITIONED BY (c int)"), + "Found duplicate clauses: PARTITIONED BY") + intercept(replaceTableHeader("STORED AS parquet"), + "Found duplicate clauses: STORED AS") + intercept(replaceTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"), + "Found duplicate clauses: STORED AS") + intercept(replaceTableHeader("ROW FORMAT SERDE 'serde'"), + "Found duplicate clauses: ROW FORMAT") } test("support for other types in OPTIONS") { @@ -317,6 +631,7 @@ class DDLParserSuite extends AnalysisTest { Some("json"), Map("a" -> "1", "b" -> "0.1", "c" -> "true"), None, + None, None), expectedIfNotExists = false) } @@ -372,7 +687,8 @@ class DDLParserSuite extends AnalysisTest { Some("parquet"), Map.empty[String, String], Some("/user/external/page_view"), - Some("This is the staging page view table")) + Some("This is the staging page view table"), + None) Seq(s1, s2, s3, s4).foreach { sql => testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = true) } @@ -2105,7 +2421,9 @@ class DDLParserSuite extends AnalysisTest { provider: Option[String], options: Map[String, String], location: Option[String], - comment: Option[String]) + comment: Option[String], + serdeInfo: Option[SerdeInfo], + external: Boolean = false) private object TableSpec { def apply(plan: LogicalPlan): TableSpec = { @@ -2120,7 +2438,9 @@ class DDLParserSuite extends AnalysisTest { create.provider, create.options, create.location, - create.comment) + create.comment, + create.serde, + create.external) case replace: ReplaceTableStatement => TableSpec( replace.tableName, @@ -2131,7 +2451,8 @@ class DDLParserSuite extends AnalysisTest { replace.provider, replace.options, replace.location, - replace.comment) + replace.comment, + replace.serde) case ctas: CreateTableAsSelectStatement => TableSpec( ctas.tableName, @@ -2142,7 +2463,9 @@ class DDLParserSuite extends AnalysisTest { ctas.provider, ctas.options, ctas.location, - ctas.comment) + ctas.comment, + ctas.serde, + ctas.external) case rtas: ReplaceTableAsSelectStatement => TableSpec( rtas.tableName, @@ -2153,7 +2476,8 @@ class DDLParserSuite extends AnalysisTest { rtas.provider, rtas.options, rtas.location, - rtas.comment) + rtas.comment, + rtas.serde) case other => fail(s"Expected to parse Create, CTAS, Replace, or RTAS plan" + s" from query, got ${other.getClass.getName}.") @@ -2179,8 +2503,7 @@ class DDLParserSuite extends AnalysisTest { CommentOnTable(UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON TABLE"), "xYz")) } - // TODO: ignored by SPARK-31707, restore the test after create table syntax unification - ignore("create table - without using") { + test("create table - without using") { val sql = "CREATE TABLE 1m.2g(a INT)" val expectedTableSpec = TableSpec( Seq("1m", "2g"), @@ -2191,6 +2514,7 @@ class DDLParserSuite extends AnalysisTest { None, Map.empty[String, String], None, + None, None) testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 31b4c158aa67b..a8688bdf15495 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -658,6 +658,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { extraOptions.get("path"), extraOptions.get(TableCatalog.PROP_COMMENT), extraOptions.toMap, + None, orCreate = true) // Create the table if it doesn't exist case (other, _) => @@ -675,7 +676,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { extraOptions.get("path"), extraOptions.get(TableCatalog.PROP_COMMENT), extraOptions.toMap, - ifNotExists = other == SaveMode.Ignore) + None, + ifNotExists = other == SaveMode.Ignore, + external = false) } runCommand(df.sparkSession, "saveAsTable") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala index d55b5c3103537..9a49fc3d74780 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala @@ -119,7 +119,9 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) None, None, options.toMap, - ifNotExists = false) + None, + ifNotExists = false, + external = false) } } @@ -207,6 +209,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T]) None, None, options.toMap, + None, orCreate = orCreate) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 395f5efd5a52d..f49caf7f04a20 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 +import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType} /** @@ -265,16 +266,17 @@ class ResolveSessionCatalog( // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the // session catalog and the table provider is not v2. case c @ CreateTableStatement( - SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => + SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) - val provider = c.provider.getOrElse(conf.defaultDataSourceName) + val (storageFormat, provider) = getStorageFormatAndProvider( + c.provider, c.options, c.location, c.serde, ctas = false) if (!isV2Provider(provider)) { if (!DDLUtils.isHiveTable(Some(provider))) { assertNoCharTypeInSchema(c.tableSchema) } val tableDesc = buildCatalogTable(tbl.asTableIdentifier, c.tableSchema, - c.partitioning, c.bucketSpec, c.properties, provider, c.options, c.location, - c.comment, c.ifNotExists) + c.partitioning, c.bucketSpec, c.properties, provider, c.location, + c.comment, storageFormat, c.external) val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTable(tableDesc, mode, None) } else { @@ -285,30 +287,32 @@ class ResolveSessionCatalog( c.tableSchema, // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), - convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)), + convertTableProperties(c), ignoreIfExists = c.ifNotExists) } case c @ CreateTableAsSelectStatement( - SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => + SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _, _) => if (c.asSelect.resolved) { assertNoNullTypeInSchema(c.asSelect.schema) } - val provider = c.provider.getOrElse(conf.defaultDataSourceName) + val (storageFormat, provider) = getStorageFormatAndProvider( + c.provider, c.options, c.location, c.serde, ctas = true) if (!isV2Provider(provider)) { val tableDesc = buildCatalogTable(tbl.asTableIdentifier, new StructType, - c.partitioning, c.bucketSpec, c.properties, provider, c.options, c.location, - c.comment, c.ifNotExists) + c.partitioning, c.bucketSpec, c.properties, provider, c.location, + c.comment, storageFormat, c.external) val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTable(tableDesc, mode, Some(c.asSelect)) } else { + assertNoCharTypeInSchema(c.schema) CreateTableAsSelect( catalog.asTableCatalog, tbl.asIdentifier, // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), c.asSelect, - convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)), + convertTableProperties(c), writeOptions = c.writeOptions, ignoreIfExists = c.ifNotExists) } @@ -322,7 +326,7 @@ class ResolveSessionCatalog( // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the // session catalog and the table provider is not v2. case c @ ReplaceTableStatement( - SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => + SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) val provider = c.provider.getOrElse(conf.defaultDataSourceName) if (!isV2Provider(provider)) { @@ -335,12 +339,12 @@ class ResolveSessionCatalog( c.tableSchema, // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), - convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)), + convertTableProperties(c), orCreate = c.orCreate) } case c @ ReplaceTableAsSelectStatement( - SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => + SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => if (c.asSelect.resolved) { assertNoNullTypeInSchema(c.asSelect.schema) } @@ -354,7 +358,7 @@ class ResolveSessionCatalog( // convert the bucket spec and add it as a transform c.partitioning ++ c.bucketSpec.map(_.asTransform), c.asSelect, - convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)), + convertTableProperties(c), writeOptions = c.writeOptions, orCreate = c.orCreate) } @@ -621,6 +625,64 @@ class ResolveSessionCatalog( case _ => throw new AnalysisException(s"$sql is only supported with temp views or v1 tables.") } + private def getStorageFormatAndProvider( + provider: Option[String], + options: Map[String, String], + location: Option[String], + maybeSerdeInfo: Option[SerdeInfo], + ctas: Boolean): (CatalogStorageFormat, String) = { + val nonHiveStorageFormat = CatalogStorageFormat.empty.copy( + locationUri = location.map(CatalogUtils.stringToURI), + properties = options) + val defaultHiveStorage = HiveSerDe.getDefaultStorage(conf).copy( + locationUri = location.map(CatalogUtils.stringToURI), + properties = options) + + if (provider.isDefined) { + // The parser guarantees that USING and STORED AS/ROW FORMAT won't co-exist. + if (maybeSerdeInfo.isDefined) { + throw new AnalysisException( + s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}") + } + (nonHiveStorageFormat, provider.get) + } else if (maybeSerdeInfo.isDefined) { + val serdeInfo = maybeSerdeInfo.get + SerdeInfo.checkSerdePropMerging(serdeInfo.serdeProperties, defaultHiveStorage.properties) + val storageFormat = if (serdeInfo.storedAs.isDefined) { + // If `STORED AS fileFormat` is used, infer inputFormat, outputFormat and serde from it. + HiveSerDe.sourceToSerDe(serdeInfo.storedAs.get) match { + case Some(hiveSerde) => + defaultHiveStorage.copy( + inputFormat = hiveSerde.inputFormat.orElse(defaultHiveStorage.inputFormat), + outputFormat = hiveSerde.outputFormat.orElse(defaultHiveStorage.outputFormat), + // User specified serde takes precedence over the one inferred from file format. + serde = serdeInfo.serde.orElse(hiveSerde.serde).orElse(defaultHiveStorage.serde), + properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties) + case _ => throw new AnalysisException( + s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.") + } + } else { + defaultHiveStorage.copy( + inputFormat = + serdeInfo.formatClasses.map(_.input).orElse(defaultHiveStorage.inputFormat), + outputFormat = + serdeInfo.formatClasses.map(_.output).orElse(defaultHiveStorage.outputFormat), + serde = serdeInfo.serde.orElse(defaultHiveStorage.serde), + properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties) + } + (storageFormat, DDLUtils.HIVE_PROVIDER) + } else { + // If neither USING nor STORED AS/ROW FORMAT is specified, we create native data source + // tables if it's a CTAS and `conf.convertCTAS` is true. + // TODO: create native data source table by default for non-CTAS. + if (ctas && conf.convertCTAS) { + (nonHiveStorageFormat, conf.defaultDataSourceName) + } else { + (defaultHiveStorage, DDLUtils.HIVE_PROVIDER) + } + } + } + private def buildCatalogTable( table: TableIdentifier, schema: StructType, @@ -628,13 +690,19 @@ class ResolveSessionCatalog( bucketSpec: Option[BucketSpec], properties: Map[String, String], provider: String, - options: Map[String, String], location: Option[String], comment: Option[String], - ifNotExists: Boolean): CatalogTable = { - val storage = CatalogStorageFormat.empty.copy( - locationUri = location.map(CatalogUtils.stringToURI), - properties = options) + storageFormat: CatalogStorageFormat, + external: Boolean): CatalogTable = { + if (external) { + if (DDLUtils.isHiveTable(Some(provider))) { + if (location.isEmpty) { + throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION") + } + } else { + throw new AnalysisException(s"Operation not allowed: CREATE EXTERNAL TABLE ... USING") + } + } val tableType = if (location.isDefined) { CatalogTableType.EXTERNAL @@ -645,7 +713,7 @@ class ResolveSessionCatalog( CatalogTable( identifier = table, tableType = tableType, - storage = storage, + storage = storageFormat, schema = schema, provider = Some(provider), partitionColumnNames = partitioning.asPartitionColumns, @@ -717,6 +785,9 @@ class ResolveSessionCatalog( } private def isV2Provider(provider: String): Boolean = { + // Return earlier since `lookupDataSourceV2` may fail to resolve provider "hive" to + // `HiveFileFormat`, when running tests in sql/core. + if (DDLUtils.isHiveTable(Some(provider))) return false DataSource.lookupDataSourceV2(provider, conf) match { // TODO(SPARK-28396): Currently file source v2 can't work with tables. case Some(_: FileDataSourceV2) => false diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 01522257c072d..a92f0775f1c05 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -26,7 +26,6 @@ import scala.collection.JavaConverters._ import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.Expression @@ -37,7 +36,6 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution} -import org.apache.spark.sql.types.StructType /** * Concrete parser for Spark SQL statements. @@ -279,7 +277,7 @@ class SparkSqlAstBuilder extends AstBuilder { operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx) } - val (_, _, _, options, location, _) = visitCreateTableClauses(ctx.createTableClauses()) + val (_, _, _, _, options, location, _, _) = visitCreateTableClauses(ctx.createTableClauses()) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse( throw new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx)) val schema = Option(ctx.colTypeList()).map(createSchema) @@ -382,153 +380,34 @@ class SparkSqlAstBuilder extends AstBuilder { } } - /** - * Create a Hive serde table, returning a [[CreateTable]] logical plan. - * - * This is a legacy syntax for Hive compatibility, we recommend users to use the Spark SQL - * CREATE TABLE syntax to create Hive serde table, e.g. "CREATE TABLE ... USING hive ..." - * - * Note: several features are currently not supported - temporary tables, bucketing, - * skewed columns and storage handlers (STORED BY). - * - * Expected format: - * {{{ - * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name - * [(col1[:] data_type [COMMENT col_comment], ...)] - * create_table_clauses - * [AS select_statement]; - * - * create_table_clauses (order insensitive): - * [COMMENT table_comment] - * [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)] - * [ROW FORMAT row_format] - * [STORED AS file_format] - * [LOCATION path] - * [TBLPROPERTIES (property_name=property_value, ...)] - * }}} - */ - override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) { - val (ident, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) - // TODO: implement temporary tables - if (temp) { - throw new ParseException( - "CREATE TEMPORARY TABLE is not supported yet. " + - "Please use CREATE TEMPORARY VIEW as an alternative.", ctx) - } - if (ctx.skewSpec.size > 0) { - operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx) - } - - checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx) - checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx) - checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx) - checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx) - checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx) - checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx) - checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx) - - val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil) - val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil) - val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) - val selectQuery = Option(ctx.query).map(plan) - val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec) - - // Note: Hive requires partition columns to be distinct from the schema, so we need - // to include the partition columns here explicitly - val schema = StructType(dataCols ++ partitionCols) - - // Storage format - val defaultStorage = HiveSerDe.getDefaultStorage(conf) - validateRowFormatFileFormat( - ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx) - val fileStorage = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat) - .getOrElse(CatalogStorageFormat.empty) - val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat) - .getOrElse(CatalogStorageFormat.empty) - val location = visitLocationSpecList(ctx.locationSpec()) - // If we are creating an EXTERNAL table, then the LOCATION field is required - if (external && location.isEmpty) { - operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx) - } - - val locUri = location.map(CatalogUtils.stringToURI(_)) - val storage = CatalogStorageFormat( - locationUri = locUri, - inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat), - outputFormat = fileStorage.outputFormat.orElse(defaultStorage.outputFormat), - serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde), - compressed = false, - properties = rowStorage.properties ++ fileStorage.properties) - // If location is defined, we'll assume this is an external table. - // Otherwise, we may accidentally delete existing data. - val tableType = if (external || location.isDefined) { - CatalogTableType.EXTERNAL + private def toStorageFormat( + location: Option[String], + maybeSerdeInfo: Option[SerdeInfo], + ctx: ParserRuleContext): CatalogStorageFormat = { + if (maybeSerdeInfo.isEmpty) { + CatalogStorageFormat.empty.copy(locationUri = location.map(CatalogUtils.stringToURI)) } else { - CatalogTableType.MANAGED - } - - val name = tableIdentifier(ident, "CREATE TABLE ... STORED AS ...", ctx) - - // TODO support the sql text - have a proper location for this! - val tableDesc = CatalogTable( - identifier = name, - tableType = tableType, - storage = storage, - schema = schema, - bucketSpec = bucketSpec, - provider = Some(DDLUtils.HIVE_PROVIDER), - partitionColumnNames = partitionCols.map(_.name), - properties = properties, - comment = visitCommentSpecList(ctx.commentSpec())) - - val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists - - selectQuery match { - case Some(q) => - // Don't allow explicit specification of schema for CTAS. - if (dataCols.nonEmpty) { - operationNotAllowed( - "Schema may not be specified in a Create Table As Select (CTAS) statement", - ctx) - } - - // When creating partitioned table with CTAS statement, we can't specify data type for the - // partition columns. - if (partitionCols.nonEmpty) { - val errorMessage = "Create Partitioned Table As Select cannot specify data type for " + - "the partition columns of the target table." - operationNotAllowed(errorMessage, ctx) - } - - // Hive CTAS supports dynamic partition by specifying partition column names. - val partitionColumnNames = - Option(ctx.partitionColumnNames) - .map(visitIdentifierList(_).toArray) - .getOrElse(Array.empty[String]) - - val tableDescWithPartitionColNames = - tableDesc.copy(partitionColumnNames = partitionColumnNames) - - val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0) - if (conf.convertCTAS && !hasStorageProperties) { - // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties - // are empty Maps. - val newTableDesc = tableDescWithPartitionColNames.copy( - storage = CatalogStorageFormat.empty.copy(locationUri = locUri), - provider = Some(conf.defaultDataSourceName)) - CreateTable(newTableDesc, mode, Some(q)) - } else { - CreateTable(tableDescWithPartitionColNames, mode, Some(q)) - } - case None => - // When creating partitioned table, we must specify data type for the partition columns. - if (Option(ctx.partitionColumnNames).isDefined) { - val errorMessage = "Must specify a data type for each partition column while creating " + - "Hive partitioned table." - operationNotAllowed(errorMessage, ctx) + val serdeInfo = maybeSerdeInfo.get + if (serdeInfo.storedAs.isEmpty) { + CatalogStorageFormat.empty.copy( + locationUri = location.map(CatalogUtils.stringToURI), + inputFormat = serdeInfo.formatClasses.map(_.input), + outputFormat = serdeInfo.formatClasses.map(_.output), + serde = serdeInfo.serde, + properties = serdeInfo.serdeProperties) + } else { + HiveSerDe.sourceToSerDe(serdeInfo.storedAs.get) match { + case Some(hiveSerde) => + CatalogStorageFormat.empty.copy( + locationUri = location.map(CatalogUtils.stringToURI), + inputFormat = hiveSerde.inputFormat, + outputFormat = hiveSerde.outputFormat, + serde = serdeInfo.serde.orElse(hiveSerde.serde), + properties = serdeInfo.serdeProperties) + case _ => + operationNotAllowed(s"STORED AS with file format '${serdeInfo.storedAs.get}'", ctx) } - - CreateTable(tableDesc, mode, None) + } } } @@ -559,189 +438,27 @@ class SparkSqlAstBuilder extends AstBuilder { checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx) val provider = ctx.tableProvider.asScala.headOption.map(_.multipartIdentifier.getText) val location = visitLocationSpecList(ctx.locationSpec()) - // rowStorage used to determine CatalogStorageFormat.serde and - // CatalogStorageFormat.properties in STORED AS clause. - val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat) - .getOrElse(CatalogStorageFormat.empty) - val fileFormat = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat) match { - case Some(f) => - if (provider.isDefined) { - throw new ParseException("'STORED AS hiveFormats' and 'USING provider' " + - "should not be specified both", ctx) - } - f.copy( - locationUri = location.map(CatalogUtils.stringToURI), - serde = rowStorage.serde.orElse(f.serde), - properties = rowStorage.properties ++ f.properties) - case None => - if (rowStorage.serde.isDefined) { - throw new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx) - } - CatalogStorageFormat.empty.copy(locationUri = location.map(CatalogUtils.stringToURI)) + // TODO: Do not skip serde check for CREATE TABLE LIKE. + val serdeInfo = getSerdeInfo( + ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx, skipCheck = true) + if (provider.isDefined && serdeInfo.isDefined) { + operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx) } - val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) - CreateTableLikeCommand( - targetTable, sourceTable, fileFormat, provider, properties, ctx.EXISTS != null) - } - /** - * Create a [[CatalogStorageFormat]] for creating tables. - * - * Format: STORED AS ... - */ - override def visitCreateFileFormat( - ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { - (ctx.fileFormat, ctx.storageHandler) match { - // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format - case (c: TableFileFormatContext, null) => - visitTableFileFormat(c) - // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO - case (c: GenericFileFormatContext, null) => - visitGenericFileFormat(c) - case (null, storageHandler) => - operationNotAllowed("STORED BY", ctx) - case _ => - throw new ParseException("Expected either STORED AS or STORED BY, not both", ctx) - } - } - - /** - * Create a [[CatalogStorageFormat]]. - */ - override def visitTableFileFormat( - ctx: TableFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { - CatalogStorageFormat.empty.copy( - inputFormat = Option(string(ctx.inFmt)), - outputFormat = Option(string(ctx.outFmt))) - } - - /** - * Resolve a [[HiveSerDe]] based on the name given and return it as a [[CatalogStorageFormat]]. - */ - override def visitGenericFileFormat( - ctx: GenericFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { - val source = ctx.identifier.getText - HiveSerDe.sourceToSerDe(source) match { - case Some(s) => - CatalogStorageFormat.empty.copy( - inputFormat = s.inputFormat, - outputFormat = s.outputFormat, - serde = s.serde) - case None => - operationNotAllowed(s"STORED AS with file format '$source'", ctx) - } - } - - /** - * Create a [[CatalogStorageFormat]] used for creating tables. - * - * Example format: - * {{{ - * SERDE serde_name [WITH SERDEPROPERTIES (k1=v1, k2=v2, ...)] - * }}} - * - * OR - * - * {{{ - * DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] - * [COLLECTION ITEMS TERMINATED BY char] - * [MAP KEYS TERMINATED BY char] - * [LINES TERMINATED BY char] - * [NULL DEFINED AS char] - * }}} - */ - private def visitRowFormat(ctx: RowFormatContext): CatalogStorageFormat = withOrigin(ctx) { - ctx match { - case serde: RowFormatSerdeContext => visitRowFormatSerde(serde) - case delimited: RowFormatDelimitedContext => visitRowFormatDelimited(delimited) - } - } - - /** - * Create SERDE row format name and properties pair. - */ - override def visitRowFormatSerde( - ctx: RowFormatSerdeContext): CatalogStorageFormat = withOrigin(ctx) { - import ctx._ - CatalogStorageFormat.empty.copy( - serde = Option(string(name)), - properties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)) - } - - /** - * Create a delimited row format properties object. - */ - override def visitRowFormatDelimited( - ctx: RowFormatDelimitedContext): CatalogStorageFormat = withOrigin(ctx) { - // TODO we need proper support for the NULL format. - val entries = - entry("field.delim", ctx.fieldsTerminatedBy) ++ - entry("serialization.format", ctx.fieldsTerminatedBy) ++ - entry("escape.delim", ctx.escapedBy) ++ - // The following typo is inherited from Hive... - entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++ - entry("mapkey.delim", ctx.keysTerminatedBy) ++ - Option(ctx.linesSeparatedBy).toSeq.map { token => - val value = string(token) - validate( - value == "\n", - s"LINES TERMINATED BY only supports newline '\\n' right now: $value", - ctx) - "line.delim" -> value - } - CatalogStorageFormat.empty.copy(properties = entries.toMap) - } - - /** - * Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT - * and STORED AS. - * - * The following are allowed. Anything else is not: - * ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE] - * ROW FORMAT DELIMITED ... STORED AS TEXTFILE - * ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ... - */ - private def validateRowFormatFileFormat( - rowFormatCtx: RowFormatContext, - createFileFormatCtx: CreateFileFormatContext, - parentCtx: ParserRuleContext): Unit = { - if (rowFormatCtx == null || createFileFormatCtx == null) { - return - } - (rowFormatCtx, createFileFormatCtx.fileFormat) match { - case (_, ffTable: TableFileFormatContext) => // OK - case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) => - ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { - case ("sequencefile" | "textfile" | "rcfile") => // OK - case fmt => - operationNotAllowed( - s"ROW FORMAT SERDE is incompatible with format '$fmt', which also specifies a serde", - parentCtx) - } - case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) => - ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { - case "textfile" => // OK - case fmt => operationNotAllowed( - s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx) + // TODO: remove this restriction as it seems unnecessary. + serdeInfo match { + case Some(SerdeInfo(storedAs, formatClasses, serde, _)) => + if (storedAs.isEmpty && formatClasses.isEmpty && serde.isDefined) { + throw new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx) } case _ => - // should never happen - def str(ctx: ParserRuleContext): String = { - (0 until ctx.getChildCount).map { i => ctx.getChild(i).getText }.mkString(" ") - } - operationNotAllowed( - s"Unexpected combination of ${str(rowFormatCtx)} and ${str(createFileFormatCtx)}", - parentCtx) } - } - private def validateRowFormatFileFormat( - rowFormatCtx: Seq[RowFormatContext], - createFileFormatCtx: Seq[CreateFileFormatContext], - parentCtx: ParserRuleContext): Unit = { - if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) { - validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx) - } + // TODO: also look at `HiveSerDe.getDefaultStorage`. + val storage = toStorageFormat(location, serdeInfo, ctx) + val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) + CreateTableLikeCommand( + targetTable, sourceTable, storage, provider, properties, ctx.EXISTS != null) } /** @@ -788,7 +505,7 @@ class SparkSqlAstBuilder extends AstBuilder { case c: RowFormatSerdeContext => // Use a serde format. - val CatalogStorageFormat(None, None, None, Some(name), _, props) = visitRowFormatSerde(c) + val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c) // SPARK-10310: Special cases LazySimpleSerDe val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { @@ -896,28 +613,21 @@ class SparkSqlAstBuilder extends AstBuilder { */ override def visitInsertOverwriteHiveDir( ctx: InsertOverwriteHiveDirContext): InsertDirParams = withOrigin(ctx) { - validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx) - val rowStorage = Option(ctx.rowFormat).map(visitRowFormat) - .getOrElse(CatalogStorageFormat.empty) - val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat) - .getOrElse(CatalogStorageFormat.empty) - + val serdeInfo = getSerdeInfo( + Option(ctx.rowFormat).toSeq, Option(ctx.createFileFormat).toSeq, ctx) val path = string(ctx.path) // The path field is required if (path.isEmpty) { operationNotAllowed("INSERT OVERWRITE DIRECTORY must be accompanied by path", ctx) } - val defaultStorage = HiveSerDe.getDefaultStorage(conf) - - val storage = CatalogStorageFormat( - locationUri = Some(CatalogUtils.stringToURI(path)), - inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat), - outputFormat = fileStorage.outputFormat.orElse(defaultStorage.outputFormat), - serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde), - compressed = false, - properties = rowStorage.properties ++ fileStorage.properties) + val default = HiveSerDe.getDefaultStorage(conf) + val storage = toStorageFormat(Some(path), serdeInfo, ctx) + val finalStorage = storage.copy( + inputFormat = storage.inputFormat.orElse(default.inputFormat), + outputFormat = storage.outputFormat.orElse(default.outputFormat), + serde = storage.serde.orElse(default.serde)) - (ctx.LOCAL != null, storage, Some(DDLUtils.HIVE_PROVIDER)) + (ctx.LOCAL != null, finalStorage, Some(DDLUtils.HIVE_PROVIDER)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala index 9ee145580ce6d..f330d6a8c99e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala @@ -85,7 +85,7 @@ class V2SessionCatalog(catalog: SessionCatalog) val provider = properties.getOrDefault(TableCatalog.PROP_PROVIDER, conf.defaultDataSourceName) val tableProperties = properties.asScala val location = Option(properties.get(TableCatalog.PROP_LOCATION)) - val storage = DataSource.buildStorageFormatFromOptions(tableProperties.toMap) + val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties.toMap)) .copy(locationUri = location.map(CatalogUtils.stringToURI)) val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED @@ -111,6 +111,12 @@ class V2SessionCatalog(catalog: SessionCatalog) loadTable(ident) } + private def toOptions(properties: Map[String, String]): Map[String, String] = { + properties.filterKeys(_.startsWith(TableCatalog.OPTION_PREFIX)).map { + case (key, value) => key.drop(TableCatalog.OPTION_PREFIX.length) -> value + } + } + override def alterTable( ident: Identifier, changes: TableChange*): Table = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 222fa8ace4dca..f2b57f9442d09 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -432,7 +432,7 @@ class DataSourceV2SQLSuite intercept[Exception] { spark.sql("REPLACE TABLE testcat.table_name" + - s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" + + s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" + s" AS SELECT id FROM source") } @@ -465,7 +465,7 @@ class DataSourceV2SQLSuite intercept[Exception] { spark.sql("REPLACE TABLE testcat_atomic.table_name" + - s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" + + s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" + s" AS SELECT id FROM source") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index f55fbc9809f71..61c16baedb7cc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -20,16 +20,14 @@ package org.apache.spark.sql.execution import scala.collection.JavaConverters._ import org.apache.spark.internal.config.ConfigEntry -import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} -import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, SortOrder} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, RefreshResource} -import org.apache.spark.sql.internal.{HiveSerDe, StaticSQLConf} -import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType} +import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, RefreshResource} +import org.apache.spark.sql.internal.StaticSQLConf +import org.apache.spark.sql.types.StringType /** * Parser test cases for rules defined in [[SparkSqlParser]]. @@ -42,23 +40,8 @@ class SparkSqlParserSuite extends AnalysisTest { private lazy val parser = new SparkSqlParser() - /** - * Normalizes plans: - * - CreateTable the createTime in tableDesc will replaced by -1L. - */ - override def normalizePlan(plan: LogicalPlan): LogicalPlan = { - plan match { - case CreateTable(tableDesc, mode, query) => - val newTableDesc = tableDesc.copy(createTime = -1L) - CreateTable(newTableDesc, mode, query) - case _ => plan // Don't transform - } - } - private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = { - val normalized1 = normalizePlan(parser.parsePlan(sqlCommand)) - val normalized2 = normalizePlan(plan) - comparePlans(normalized1, normalized2) + comparePlans(parser.parsePlan(sqlCommand), plan) } private def intercept(sqlCommand: String, messages: String*): Unit = @@ -210,110 +193,6 @@ class SparkSqlParserSuite extends AnalysisTest { Map("path" -> "/data/tmp/testspark1"))) } - private def createTableUsing( - table: String, - database: Option[String] = None, - tableType: CatalogTableType = CatalogTableType.MANAGED, - storage: CatalogStorageFormat = CatalogStorageFormat.empty, - schema: StructType = new StructType, - provider: Option[String] = Some("parquet"), - partitionColumnNames: Seq[String] = Seq.empty, - bucketSpec: Option[BucketSpec] = None, - mode: SaveMode = SaveMode.ErrorIfExists, - query: Option[LogicalPlan] = None): CreateTable = { - CreateTable( - CatalogTable( - identifier = TableIdentifier(table, database), - tableType = tableType, - storage = storage, - schema = schema, - provider = provider, - partitionColumnNames = partitionColumnNames, - bucketSpec = bucketSpec - ), mode, query - ) - } - - private def createTable( - table: String, - database: Option[String] = None, - tableType: CatalogTableType = CatalogTableType.MANAGED, - storage: CatalogStorageFormat = CatalogStorageFormat.empty.copy( - inputFormat = HiveSerDe.sourceToSerDe("textfile").get.inputFormat, - outputFormat = HiveSerDe.sourceToSerDe("textfile").get.outputFormat, - serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")), - schema: StructType = new StructType, - provider: Option[String] = Some("hive"), - partitionColumnNames: Seq[String] = Seq.empty, - comment: Option[String] = None, - mode: SaveMode = SaveMode.ErrorIfExists, - query: Option[LogicalPlan] = None): CreateTable = { - CreateTable( - CatalogTable( - identifier = TableIdentifier(table, database), - tableType = tableType, - storage = storage, - schema = schema, - provider = provider, - partitionColumnNames = partitionColumnNames, - comment = comment - ), mode, query - ) - } - - test("create table - schema") { - assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile", - createTable( - table = "my_tab", - schema = (new StructType) - .add("a", IntegerType, nullable = true, "test") - .add("b", StringType) - ) - ) - assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + - "PARTITIONED BY (c INT, d STRING COMMENT 'test2')", - createTable( - table = "my_tab", - schema = (new StructType) - .add("a", IntegerType, nullable = true, "test") - .add("b", StringType) - .add("c", IntegerType) - .add("d", StringType, nullable = true, "test2"), - partitionColumnNames = Seq("c", "d") - ) - ) - assertEqual("CREATE TABLE my_tab(id BIGINT, nested STRUCT) " + - "STORED AS textfile", - createTable( - table = "my_tab", - schema = (new StructType) - .add("id", LongType) - .add("nested", (new StructType) - .add("col1", StringType) - .add("col2", IntegerType) - ) - ) - ) - // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze - // rule in `AnalyzeCreateTable`. - assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + - "PARTITIONED BY (nested STRUCT)", - createTable( - table = "my_tab", - schema = (new StructType) - .add("a", IntegerType, nullable = true, "test") - .add("b", StringType) - .add("nested", (new StructType) - .add("col1", StringType) - .add("col2", IntegerType) - ), - partitionColumnNames = Seq("nested") - ) - ) - intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)", - "no viable alternative at input") - } - test("describe query") { val query = "SELECT * FROM t" assertEqual("DESCRIBE QUERY " + query, DescribeQueryCommand(query, parser.parsePlan(query))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 8ce4bcbadc223..96f9421e1d988 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -17,14 +17,10 @@ package org.apache.spark.sql.execution.command -import java.net.URI import java.util.Locale -import scala.reflect.{classTag, ClassTag} - -import org.apache.spark.sql.{AnalysisException, SaveMode} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute} -import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan @@ -32,10 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.JsonTuple import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.SparkSqlParser -import org.apache.spark.sql.execution.datasources.CreateTable -import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.StructType class DDLParserSuite extends AnalysisTest with SharedSparkSession { private lazy val parser = new SparkSqlParser() @@ -50,159 +43,17 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { } } - private def intercept(sqlCommand: String, messages: String*): Unit = - interceptParseException(parser.parsePlan)(sqlCommand, messages: _*) - - private def parseAs[T: ClassTag](query: String): T = { - parser.parsePlan(query) match { - case t: T => t - case other => - fail(s"Expected to parse ${classTag[T].runtimeClass} from query," + - s"got ${other.getClass.getName}: $query") - } - } - private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = { val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null) comparePlans(plan, expected, checkAnalysis = false) } - private def extractTableDesc(sql: String): (CatalogTable, Boolean) = { - parser.parsePlan(sql).collect { - case CreateTable(tableDesc, mode, _) => (tableDesc, mode == SaveMode.Ignore) - }.head - } - test("alter database - property values must be set") { assertUnsupported( sql = "ALTER DATABASE my_db SET DBPROPERTIES('key_without_value', 'key_with_value'='x')", containsThesePhrases = Seq("key_without_value")) } - test("create hive table - table file format") { - val allSources = Seq("parquet", "parquetfile", "orc", "orcfile", "avro", "avrofile", - "sequencefile", "rcfile", "textfile") - - allSources.foreach { s => - val query = s"CREATE TABLE my_tab STORED AS $s" - val ct = parseAs[CreateTable](query) - val hiveSerde = HiveSerDe.sourceToSerDe(s) - assert(hiveSerde.isDefined) - assert(ct.tableDesc.storage.serde == - hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))) - assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) - assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) - } - } - - test("create hive table - row format and table file format") { - val createTableStart = "CREATE TABLE my_tab ROW FORMAT" - val fileFormat = s"STORED AS INPUTFORMAT 'inputfmt' OUTPUTFORMAT 'outputfmt'" - val query1 = s"$createTableStart SERDE 'anything' $fileFormat" - val query2 = s"$createTableStart DELIMITED FIELDS TERMINATED BY ' ' $fileFormat" - - // No conflicting serdes here, OK - val parsed1 = parseAs[CreateTable](query1) - assert(parsed1.tableDesc.storage.serde == Some("anything")) - assert(parsed1.tableDesc.storage.inputFormat == Some("inputfmt")) - assert(parsed1.tableDesc.storage.outputFormat == Some("outputfmt")) - - val parsed2 = parseAs[CreateTable](query2) - assert(parsed2.tableDesc.storage.serde == - Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(parsed2.tableDesc.storage.inputFormat == Some("inputfmt")) - assert(parsed2.tableDesc.storage.outputFormat == Some("outputfmt")) - } - - test("create hive table - row format serde and generic file format") { - val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile") - val supportedSources = Set("sequencefile", "rcfile", "textfile") - - allSources.foreach { s => - val query = s"CREATE TABLE my_tab ROW FORMAT SERDE 'anything' STORED AS $s" - if (supportedSources.contains(s)) { - val ct = parseAs[CreateTable](query) - val hiveSerde = HiveSerDe.sourceToSerDe(s) - assert(hiveSerde.isDefined) - assert(ct.tableDesc.storage.serde == Some("anything")) - assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) - assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) - } else { - assertUnsupported(query, Seq("row format serde", "incompatible", s)) - } - } - } - - test("create hive table - row format delimited and generic file format") { - val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile") - val supportedSources = Set("textfile") - - allSources.foreach { s => - val query = s"CREATE TABLE my_tab ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS $s" - if (supportedSources.contains(s)) { - val ct = parseAs[CreateTable](query) - val hiveSerde = HiveSerDe.sourceToSerDe(s) - assert(hiveSerde.isDefined) - assert(ct.tableDesc.storage.serde == - hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))) - assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) - assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) - } else { - assertUnsupported(query, Seq("row format delimited", "only compatible with 'textfile'", s)) - } - } - } - - test("create hive external table - location must be specified") { - assertUnsupported( - sql = "CREATE EXTERNAL TABLE my_tab STORED AS parquet", - containsThesePhrases = Seq("create external table", "location")) - val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'" - val ct = parseAs[CreateTable](query) - assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) - assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything"))) - } - - test("create hive table - property values must be set") { - assertUnsupported( - sql = "CREATE TABLE my_tab STORED AS parquet " + - "TBLPROPERTIES('key_without_value', 'key_with_value'='x')", - containsThesePhrases = Seq("key_without_value")) - assertUnsupported( - sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " + - "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')", - containsThesePhrases = Seq("key_without_value")) - } - - test("create hive table - location implies external") { - val query = "CREATE TABLE my_tab STORED AS parquet LOCATION '/something/anything'" - val ct = parseAs[CreateTable](query) - assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) - assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything"))) - } - - test("Duplicate clauses - create hive table") { - def createTableHeader(duplicateClause: String): String = { - s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause" - } - - intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"), - "Found duplicate clauses: TBLPROPERTIES") - intercept(createTableHeader("LOCATION '/tmp/file'"), - "Found duplicate clauses: LOCATION") - intercept(createTableHeader("COMMENT 'a table'"), - "Found duplicate clauses: COMMENT") - intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"), - "Found duplicate clauses: CLUSTERED BY") - intercept(createTableHeader("PARTITIONED BY (k int)"), - "Found duplicate clauses: PARTITIONED BY") - intercept(createTableHeader("STORED AS parquet"), - "Found duplicate clauses: STORED AS/BY") - intercept( - createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"), - "Found duplicate clauses: ROW FORMAT") - } - test("insert overwrite directory") { val v1 = "INSERT OVERWRITE DIRECTORY '/tmp/file' USING parquet SELECT 1 as a" parser.parsePlan(v1) match { @@ -359,180 +210,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { assert(e.contains("Found duplicate keys 'a'")) } - test("Test CTAS #1") { - val s1 = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |COMMENT 'This is the staging page view table' - |STORED AS RCFILE - |LOCATION '/user/external/page_view' - |TBLPROPERTIES ('p1'='v1', 'p2'='v2') - |AS SELECT * FROM src - """.stripMargin - - val s2 = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |STORED AS RCFILE - |COMMENT 'This is the staging page view table' - |TBLPROPERTIES ('p1'='v1', 'p2'='v2') - |LOCATION '/user/external/page_view' - |AS SELECT * FROM src - """.stripMargin - - val s3 = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |TBLPROPERTIES ('p1'='v1', 'p2'='v2') - |LOCATION '/user/external/page_view' - |STORED AS RCFILE - |COMMENT 'This is the staging page view table' - |AS SELECT * FROM src - """.stripMargin - - checkParsing(s1) - checkParsing(s2) - checkParsing(s3) - - def checkParsing(sql: String): Unit = { - val (desc, exists) = extractTableDesc(sql) - assert(exists) - assert(desc.identifier.database == Some("mydb")) - assert(desc.identifier.table == "page_view") - assert(desc.tableType == CatalogTableType.EXTERNAL) - assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) - assert(desc.schema.isEmpty) // will be populated later when the table is actually created - assert(desc.comment == Some("This is the staging page view table")) - // TODO will be SQLText - assert(desc.viewText.isEmpty) - assert(desc.viewCatalogAndNamespace.isEmpty) - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.partitionColumnNames.isEmpty) - assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) - assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) - assert(desc.storage.serde == - Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) - assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) - } - } - - test("Test CTAS #2") { - val s1 = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |COMMENT 'This is the staging page view table' - |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' - | STORED AS - | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat' - | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' - |LOCATION '/user/external/page_view' - |TBLPROPERTIES ('p1'='v1', 'p2'='v2') - |AS SELECT * FROM src - """.stripMargin - - val s2 = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |LOCATION '/user/external/page_view' - |TBLPROPERTIES ('p1'='v1', 'p2'='v2') - |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' - | STORED AS - | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat' - | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' - |COMMENT 'This is the staging page view table' - |AS SELECT * FROM src - """.stripMargin - - checkParsing(s1) - checkParsing(s2) - - def checkParsing(sql: String): Unit = { - val (desc, exists) = extractTableDesc(sql) - assert(exists) - assert(desc.identifier.database == Some("mydb")) - assert(desc.identifier.table == "page_view") - assert(desc.tableType == CatalogTableType.EXTERNAL) - assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) - assert(desc.schema.isEmpty) // will be populated later when the table is actually created - // TODO will be SQLText - assert(desc.comment == Some("This is the staging page view table")) - assert(desc.viewText.isEmpty) - assert(desc.viewCatalogAndNamespace.isEmpty) - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.partitionColumnNames.isEmpty) - assert(desc.storage.properties == Map()) - assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat")) - assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat")) - assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe")) - assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) - } - } - - test("Test CTAS #3") { - val s3 = """CREATE TABLE page_view AS SELECT * FROM src""" - val (desc, exists) = extractTableDesc(s3) - assert(exists == false) - assert(desc.identifier.database == None) - assert(desc.identifier.table == "page_view") - assert(desc.tableType == CatalogTableType.MANAGED) - assert(desc.storage.locationUri == None) - assert(desc.schema.isEmpty) - assert(desc.viewText == None) // TODO will be SQLText - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.storage.properties == Map()) - assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat")) - assert(desc.storage.outputFormat == - Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) - assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(desc.properties == Map()) - } - - test("Test CTAS #4") { - val s4 = - """CREATE TABLE page_view - |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin - intercept[AnalysisException] { - extractTableDesc(s4) - } - } - - test("Test CTAS #5") { - val s5 = """CREATE TABLE ctas2 - | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" - | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2") - | STORED AS RCFile - | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22") - | AS - | SELECT key, value - | FROM src - | ORDER BY key, value""".stripMargin - val (desc, exists) = extractTableDesc(s5) - assert(exists == false) - assert(desc.identifier.database == None) - assert(desc.identifier.table == "ctas2") - assert(desc.tableType == CatalogTableType.MANAGED) - assert(desc.storage.locationUri == None) - assert(desc.schema.isEmpty) - assert(desc.viewText == None) // TODO will be SQLText - assert(desc.viewCatalogAndNamespace.isEmpty) - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2"))) - assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) - assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) - assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) - assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22"))) - } - - test("CTAS statement with a PARTITIONED BY clause is not allowed") { - assertUnsupported(s"CREATE TABLE ctas1 PARTITIONED BY (k int)" + - " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp") - } - - test("CTAS statement with schema") { - assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src") - assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'") - } - test("unsupported operations") { intercept[ParseException] { parser.parsePlan( @@ -642,205 +319,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { """.stripMargin) } - test("create table - basic") { - val query = "CREATE TABLE my_table (id int, name string)" - val (desc, allowExisting) = extractTableDesc(query) - assert(!allowExisting) - assert(desc.identifier.database.isEmpty) - assert(desc.identifier.table == "my_table") - assert(desc.tableType == CatalogTableType.MANAGED) - assert(desc.schema == new StructType().add("id", "int").add("name", "string")) - assert(desc.partitionColumnNames.isEmpty) - assert(desc.bucketSpec.isEmpty) - assert(desc.viewText.isEmpty) - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.storage.locationUri.isEmpty) - assert(desc.storage.inputFormat == - Some("org.apache.hadoop.mapred.TextInputFormat")) - assert(desc.storage.outputFormat == - Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) - assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(desc.storage.properties.isEmpty) - assert(desc.properties.isEmpty) - assert(desc.comment.isEmpty) - } - - test("create table - with database name") { - val query = "CREATE TABLE dbx.my_table (id int, name string)" - val (desc, _) = extractTableDesc(query) - assert(desc.identifier.database == Some("dbx")) - assert(desc.identifier.table == "my_table") - } - - test("create table - temporary") { - val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)" - val e = intercept[ParseException] { parser.parsePlan(query) } - assert(e.message.contains("CREATE TEMPORARY TABLE is not supported yet")) - } - - test("create table - external") { - val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'" - val (desc, _) = extractTableDesc(query) - assert(desc.tableType == CatalogTableType.EXTERNAL) - assert(desc.storage.locationUri == Some(new URI("/path/to/nowhere"))) - } - - test("create table - if not exists") { - val query = "CREATE TABLE IF NOT EXISTS tab1 (id int, name string)" - val (_, allowExisting) = extractTableDesc(query) - assert(allowExisting) - } - - test("create table - comment") { - val query = "CREATE TABLE my_table (id int, name string) COMMENT 'its hot as hell below'" - val (desc, _) = extractTableDesc(query) - assert(desc.comment == Some("its hot as hell below")) - } - - test("create table - partitioned columns") { - val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (month int)" - val (desc, _) = extractTableDesc(query) - assert(desc.schema == new StructType() - .add("id", "int") - .add("name", "string") - .add("month", "int")) - assert(desc.partitionColumnNames == Seq("month")) - } - - test("create table - clustered by") { - val numBuckets = 10 - val bucketedColumn = "id" - val sortColumn = "id" - val baseQuery = - s""" - CREATE TABLE my_table ( - $bucketedColumn int, - name string) - CLUSTERED BY($bucketedColumn) - """ - - val query1 = s"$baseQuery INTO $numBuckets BUCKETS" - val (desc1, _) = extractTableDesc(query1) - assert(desc1.bucketSpec.isDefined) - val bucketSpec1 = desc1.bucketSpec.get - assert(bucketSpec1.numBuckets == numBuckets) - assert(bucketSpec1.bucketColumnNames.head.equals(bucketedColumn)) - assert(bucketSpec1.sortColumnNames.isEmpty) - - val query2 = s"$baseQuery SORTED BY($sortColumn) INTO $numBuckets BUCKETS" - val (desc2, _) = extractTableDesc(query2) - assert(desc2.bucketSpec.isDefined) - val bucketSpec2 = desc2.bucketSpec.get - assert(bucketSpec2.numBuckets == numBuckets) - assert(bucketSpec2.bucketColumnNames.head.equals(bucketedColumn)) - assert(bucketSpec2.sortColumnNames.head.equals(sortColumn)) - } - - test("create table(hive) - skewed by") { - val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY" - val query1 = s"$baseQuery(id) ON (1, 10, 100)" - val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))" - val query3 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z')) STORED AS DIRECTORIES" - val e1 = intercept[ParseException] { parser.parsePlan(query1) } - val e2 = intercept[ParseException] { parser.parsePlan(query2) } - val e3 = intercept[ParseException] { parser.parsePlan(query3) } - assert(e1.getMessage.contains("Operation not allowed")) - assert(e2.getMessage.contains("Operation not allowed")) - assert(e3.getMessage.contains("Operation not allowed")) - } - - test("create table(hive) - row format") { - val baseQuery = "CREATE TABLE my_table (id int, name string) ROW FORMAT" - val query1 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff'" - val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')" - val query3 = - s""" - |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y' - |COLLECTION ITEMS TERMINATED BY 'a' - |MAP KEYS TERMINATED BY 'b' - |LINES TERMINATED BY '\n' - |NULL DEFINED AS 'c' - """.stripMargin - val (desc1, _) = extractTableDesc(query1) - val (desc2, _) = extractTableDesc(query2) - val (desc3, _) = extractTableDesc(query3) - assert(desc1.storage.serde == Some("org.apache.poof.serde.Baff")) - assert(desc1.storage.properties.isEmpty) - assert(desc2.storage.serde == Some("org.apache.poof.serde.Baff")) - assert(desc2.storage.properties == Map("k1" -> "v1")) - assert(desc3.storage.properties == Map( - "field.delim" -> "x", - "escape.delim" -> "y", - "serialization.format" -> "x", - "line.delim" -> "\n", - "colelction.delim" -> "a", // yes, it's a typo from Hive :) - "mapkey.delim" -> "b")) - } - - test("create table(hive) - file format") { - val baseQuery = "CREATE TABLE my_table (id int, name string) STORED AS" - val query1 = s"$baseQuery INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'" - val query2 = s"$baseQuery ORC" - val (desc1, _) = extractTableDesc(query1) - val (desc2, _) = extractTableDesc(query2) - assert(desc1.storage.inputFormat == Some("winput")) - assert(desc1.storage.outputFormat == Some("wowput")) - assert(desc1.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(desc2.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")) - assert(desc2.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")) - assert(desc2.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) - } - - test("create table(hive) - storage handler") { - val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY" - val query1 = s"$baseQuery 'org.papachi.StorageHandler'" - val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')" - val e1 = intercept[ParseException] { parser.parsePlan(query1) } - val e2 = intercept[ParseException] { parser.parsePlan(query2) } - assert(e1.getMessage.contains("Operation not allowed")) - assert(e2.getMessage.contains("Operation not allowed")) - } - - test("create table - properties") { - val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')" - val (desc, _) = extractTableDesc(query) - assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2")) - } - - test("create table(hive) - everything!") { - val query = - """ - |CREATE EXTERNAL TABLE IF NOT EXISTS dbx.my_table (id int, name string) - |COMMENT 'no comment' - |PARTITIONED BY (month int) - |ROW FORMAT SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1') - |STORED AS INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput' - |LOCATION '/path/to/mercury' - |TBLPROPERTIES ('k1'='v1', 'k2'='v2') - """.stripMargin - val (desc, allowExisting) = extractTableDesc(query) - assert(allowExisting) - assert(desc.identifier.database == Some("dbx")) - assert(desc.identifier.table == "my_table") - assert(desc.tableType == CatalogTableType.EXTERNAL) - assert(desc.schema == new StructType() - .add("id", "int") - .add("name", "string") - .add("month", "int")) - assert(desc.partitionColumnNames == Seq("month")) - assert(desc.bucketSpec.isEmpty) - assert(desc.viewText.isEmpty) - assert(desc.viewCatalogAndNamespace.isEmpty) - assert(desc.viewQueryColumnNames.isEmpty) - assert(desc.storage.locationUri == Some(new URI("/path/to/mercury"))) - assert(desc.storage.inputFormat == Some("winput")) - assert(desc.storage.outputFormat == Some("wowput")) - assert(desc.storage.serde == Some("org.apache.poof.serde.Baff")) - assert(desc.storage.properties == Map("k1" -> "v1")) - assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2")) - assert(desc.comment == Some("no comment")) - } - test("create table like") { val v1 = "CREATE TABLE table1 LIKE table2" val (target, source, fileFormat, provider, properties, exists) = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index fd1978c5137a5..92c114e116d0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -29,14 +29,14 @@ import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral} -import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable} +import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} +import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable} import org.apache.spark.sql.connector.FakeV2Provider import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.catalog.TableChange.{UpdateColumnComment, UpdateColumnType} import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.sources.SimpleScanSource import org.apache.spark.sql.types.{CharType, DoubleType, HIVE_TYPE_STRING, IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType} @@ -178,6 +178,16 @@ class PlanResolutionSuite extends AnalysisTest { }.head } + private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = { + val e = intercept[ParseException] { + parsePlan(sql) + } + assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed")) + containsThesePhrases.foreach { p => + assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT))) + } + } + test("create table - with partitioned by") { val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " + "USING parquet PARTITIONED BY (a)" @@ -428,10 +438,11 @@ class PlanResolutionSuite extends AnalysisTest { val expectedProperties = Map( "p1" -> "v1", "p2" -> "v2", - "other" -> "20", + "option.other" -> "20", "provider" -> "parquet", "location" -> "s3://bucket/path/to/data", - "comment" -> "table comment") + "comment" -> "table comment", + "other" -> "20") parseAndResolve(sql) match { case create: CreateV2Table => @@ -467,10 +478,11 @@ class PlanResolutionSuite extends AnalysisTest { val expectedProperties = Map( "p1" -> "v1", "p2" -> "v2", - "other" -> "20", + "option.other" -> "20", "provider" -> "parquet", "location" -> "s3://bucket/path/to/data", - "comment" -> "table comment") + "comment" -> "table comment", + "other" -> "20") parseAndResolve(sql, withDefault = true) match { case create: CreateV2Table => @@ -542,10 +554,11 @@ class PlanResolutionSuite extends AnalysisTest { val expectedProperties = Map( "p1" -> "v1", "p2" -> "v2", - "other" -> "20", + "option.other" -> "20", "provider" -> "parquet", "location" -> "s3://bucket/path/to/data", - "comment" -> "table comment") + "comment" -> "table comment", + "other" -> "20") parseAndResolve(sql) match { case ctas: CreateTableAsSelect => @@ -576,10 +589,11 @@ class PlanResolutionSuite extends AnalysisTest { val expectedProperties = Map( "p1" -> "v1", "p2" -> "v2", - "other" -> "20", + "option.other" -> "20", "provider" -> "parquet", "location" -> "s3://bucket/path/to/data", - "comment" -> "table comment") + "comment" -> "table comment", + "other" -> "20") parseAndResolve(sql, withDefault = true) match { case ctas: CreateTableAsSelect => @@ -1557,6 +1571,630 @@ class PlanResolutionSuite extends AnalysisTest { checkFailure("testcat.tab", "foo") } + private def compareNormalized(plan1: LogicalPlan, plan2: LogicalPlan): Unit = { + /** + * Normalizes plans: + * - CreateTable the createTime in tableDesc will replaced by -1L. + */ + def normalizePlan(plan: LogicalPlan): LogicalPlan = { + plan match { + case CreateTable(tableDesc, mode, query) => + val newTableDesc = tableDesc.copy(createTime = -1L) + CreateTable(newTableDesc, mode, query) + case _ => plan // Don't transform + } + } + comparePlans(normalizePlan(plan1), normalizePlan(plan2)) + } + + test("create table - schema") { + def createTable( + table: String, + database: Option[String] = None, + tableType: CatalogTableType = CatalogTableType.MANAGED, + storage: CatalogStorageFormat = CatalogStorageFormat.empty.copy( + inputFormat = HiveSerDe.sourceToSerDe("textfile").get.inputFormat, + outputFormat = HiveSerDe.sourceToSerDe("textfile").get.outputFormat, + serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")), + schema: StructType = new StructType, + provider: Option[String] = Some("hive"), + partitionColumnNames: Seq[String] = Seq.empty, + comment: Option[String] = None, + mode: SaveMode = SaveMode.ErrorIfExists, + query: Option[LogicalPlan] = None): CreateTable = { + CreateTable( + CatalogTable( + identifier = TableIdentifier(table, database), + tableType = tableType, + storage = storage, + schema = schema, + provider = provider, + partitionColumnNames = partitionColumnNames, + comment = comment + ), mode, query + ) + } + + def compare(sql: String, plan: LogicalPlan): Unit = { + compareNormalized(parseAndResolve(sql), plan) + } + + compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile", + createTable( + table = "my_tab", + database = Some("default"), + schema = (new StructType) + .add("a", IntegerType, nullable = true, "test") + .add("b", StringType) + ) + ) + compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + + "PARTITIONED BY (c INT, d STRING COMMENT 'test2')", + createTable( + table = "my_tab", + database = Some("default"), + schema = (new StructType) + .add("a", IntegerType, nullable = true, "test") + .add("b", StringType) + .add("c", IntegerType) + .add("d", StringType, nullable = true, "test2"), + partitionColumnNames = Seq("c", "d") + ) + ) + compare("CREATE TABLE my_tab(id BIGINT, nested STRUCT) " + + "STORED AS textfile", + createTable( + table = "my_tab", + database = Some("default"), + schema = (new StructType) + .add("id", LongType) + .add("nested", (new StructType) + .add("col1", StringType) + .add("col2", IntegerType) + ) + ) + ) + // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze + // rule in `AnalyzeCreateTable`. + compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + + "PARTITIONED BY (nested STRUCT)", + createTable( + table = "my_tab", + database = Some("default"), + schema = (new StructType) + .add("a", IntegerType, nullable = true, "test") + .add("b", StringType) + .add("nested", (new StructType) + .add("col1", StringType) + .add("col2", IntegerType) + ), + partitionColumnNames = Seq("nested") + ) + ) + + interceptParseException(parsePlan)( + "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)", + "extraneous input ':'") + } + + test("create hive table - table file format") { + val allSources = Seq("parquet", "parquetfile", "orc", "orcfile", "avro", "avrofile", + "sequencefile", "rcfile", "textfile") + + allSources.foreach { s => + val query = s"CREATE TABLE my_tab STORED AS $s" + parseAndResolve(query) match { + case ct: CreateTable => + val hiveSerde = HiveSerDe.sourceToSerDe(s) + assert(hiveSerde.isDefined) + assert(ct.tableDesc.storage.serde == + hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))) + assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) + assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) + } + } + } + + test("create hive table - row format and table file format") { + val createTableStart = "CREATE TABLE my_tab ROW FORMAT" + val fileFormat = s"STORED AS INPUTFORMAT 'inputfmt' OUTPUTFORMAT 'outputfmt'" + val query1 = s"$createTableStart SERDE 'anything' $fileFormat" + val query2 = s"$createTableStart DELIMITED FIELDS TERMINATED BY ' ' $fileFormat" + + // No conflicting serdes here, OK + parseAndResolve(query1) match { + case parsed1: CreateTable => + assert(parsed1.tableDesc.storage.serde == Some("anything")) + assert(parsed1.tableDesc.storage.inputFormat == Some("inputfmt")) + assert(parsed1.tableDesc.storage.outputFormat == Some("outputfmt")) + } + + parseAndResolve(query2) match { + case parsed2: CreateTable => + assert(parsed2.tableDesc.storage.serde == + Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + assert(parsed2.tableDesc.storage.inputFormat == Some("inputfmt")) + assert(parsed2.tableDesc.storage.outputFormat == Some("outputfmt")) + } + } + + test("create hive table - row format serde and generic file format") { + val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile") + val supportedSources = Set("sequencefile", "rcfile", "textfile") + + allSources.foreach { s => + val query = s"CREATE TABLE my_tab ROW FORMAT SERDE 'anything' STORED AS $s" + if (supportedSources.contains(s)) { + parseAndResolve(query) match { + case ct: CreateTable => + val hiveSerde = HiveSerDe.sourceToSerDe(s) + assert(hiveSerde.isDefined) + assert(ct.tableDesc.storage.serde == Some("anything")) + assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) + assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) + } + } else { + assertUnsupported(query, Seq("row format serde", "incompatible", s)) + } + } + } + + test("create hive table - row format delimited and generic file format") { + val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile") + val supportedSources = Set("textfile") + + allSources.foreach { s => + val query = s"CREATE TABLE my_tab ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS $s" + if (supportedSources.contains(s)) { + parseAndResolve(query) match { + case ct: CreateTable => + val hiveSerde = HiveSerDe.sourceToSerDe(s) + assert(hiveSerde.isDefined) + assert(ct.tableDesc.storage.serde == hiveSerde.get.serde + .orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))) + assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat) + assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat) + } + } else { + assertUnsupported(query, Seq("row format delimited", "only compatible with 'textfile'", s)) + } + } + } + + test("create hive external table - location must be specified") { + val exc = intercept[AnalysisException] { + parseAndResolve("CREATE EXTERNAL TABLE my_tab STORED AS parquet") + } + assert(exc.getMessage.contains("CREATE EXTERNAL TABLE must be accompanied by LOCATION")) + + val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'" + parseAndResolve(query) match { + case ct: CreateTable => + assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) + assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything"))) + } + } + + test("create hive table - property values must be set") { + assertUnsupported( + sql = "CREATE TABLE my_tab STORED AS parquet " + + "TBLPROPERTIES('key_without_value', 'key_with_value'='x')", + containsThesePhrases = Seq("key_without_value")) + assertUnsupported( + sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " + + "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')", + containsThesePhrases = Seq("key_without_value")) + } + + test("create hive table - location implies external") { + val query = "CREATE TABLE my_tab STORED AS parquet LOCATION '/something/anything'" + parseAndResolve(query) match { + case ct: CreateTable => + assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) + assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything"))) + } + } + + test("Duplicate clauses - create hive table") { + def intercept(sqlCommand: String, messages: String*): Unit = + interceptParseException(parsePlan)(sqlCommand, messages: _*) + + def createTableHeader(duplicateClause: String): String = { + s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause" + } + + intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"), + "Found duplicate clauses: TBLPROPERTIES") + intercept(createTableHeader("LOCATION '/tmp/file'"), + "Found duplicate clauses: LOCATION") + intercept(createTableHeader("COMMENT 'a table'"), + "Found duplicate clauses: COMMENT") + intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"), + "Found duplicate clauses: CLUSTERED BY") + intercept(createTableHeader("PARTITIONED BY (k int)"), + "Found duplicate clauses: PARTITIONED BY") + intercept(createTableHeader("STORED AS parquet"), + "Found duplicate clauses: STORED AS/BY") + intercept( + createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"), + "Found duplicate clauses: ROW FORMAT") + } + + test("Test CTAS #1") { + val s1 = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |COMMENT 'This is the staging page view table' + |STORED AS RCFILE + |LOCATION '/user/external/page_view' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |AS SELECT * FROM src + """.stripMargin + + val s2 = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |STORED AS RCFILE + |COMMENT 'This is the staging page view table' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |LOCATION '/user/external/page_view' + |AS SELECT * FROM src + """.stripMargin + + val s3 = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |LOCATION '/user/external/page_view' + |STORED AS RCFILE + |COMMENT 'This is the staging page view table' + |AS SELECT * FROM src + """.stripMargin + + checkParsing(s1) + checkParsing(s2) + checkParsing(s3) + + def checkParsing(sql: String): Unit = { + val (desc, exists) = extractTableDesc(sql) + assert(exists) + assert(desc.identifier.database == Some("mydb")) + assert(desc.identifier.table == "page_view") + assert(desc.tableType == CatalogTableType.EXTERNAL) + assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) + assert(desc.schema.isEmpty) // will be populated later when the table is actually created + assert(desc.comment == Some("This is the staging page view table")) + // TODO will be SQLText + assert(desc.viewText.isEmpty) + assert(desc.viewCatalogAndNamespace.isEmpty) + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.partitionColumnNames.isEmpty) + assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) + assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + assert(desc.storage.serde == + Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) + assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) + } + } + + test("Test CTAS #2") { + val s1 = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |COMMENT 'This is the staging page view table' + |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' + | STORED AS + | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat' + | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' + |LOCATION '/user/external/page_view' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |AS SELECT * FROM src + """.stripMargin + + val s2 = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |LOCATION '/user/external/page_view' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' + | STORED AS + | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat' + | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' + |COMMENT 'This is the staging page view table' + |AS SELECT * FROM src + """.stripMargin + + checkParsing(s1) + checkParsing(s2) + + def checkParsing(sql: String): Unit = { + val (desc, exists) = extractTableDesc(sql) + assert(exists) + assert(desc.identifier.database == Some("mydb")) + assert(desc.identifier.table == "page_view") + assert(desc.tableType == CatalogTableType.EXTERNAL) + assert(desc.storage.locationUri == Some(new URI("/user/external/page_view"))) + assert(desc.schema.isEmpty) // will be populated later when the table is actually created + // TODO will be SQLText + assert(desc.comment == Some("This is the staging page view table")) + assert(desc.viewText.isEmpty) + assert(desc.viewCatalogAndNamespace.isEmpty) + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.partitionColumnNames.isEmpty) + assert(desc.storage.properties == Map()) + assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat")) + assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat")) + assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe")) + assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2")) + } + } + + test("Test CTAS #3") { + val s3 = """CREATE TABLE page_view AS SELECT * FROM src""" + val (desc, exists) = extractTableDesc(s3) + assert(exists == false) + assert(desc.identifier.database == Some("default")) + assert(desc.identifier.table == "page_view") + assert(desc.tableType == CatalogTableType.MANAGED) + assert(desc.storage.locationUri == None) + assert(desc.schema.isEmpty) + assert(desc.viewText == None) // TODO will be SQLText + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.storage.properties == Map()) + assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat")) + assert(desc.storage.outputFormat == + Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) + assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + assert(desc.properties == Map()) + } + + test("Test CTAS #4") { + val s4 = + """CREATE TABLE page_view + |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin + intercept[AnalysisException] { + extractTableDesc(s4) + } + } + + test("Test CTAS #5") { + val s5 = """CREATE TABLE ctas2 + | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" + | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2") + | STORED AS RCFile + | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22") + | AS + | SELECT key, value + | FROM src + | ORDER BY key, value""".stripMargin + val (desc, exists) = extractTableDesc(s5) + assert(exists == false) + assert(desc.identifier.database == Some("default")) + assert(desc.identifier.table == "ctas2") + assert(desc.tableType == CatalogTableType.MANAGED) + assert(desc.storage.locationUri == None) + assert(desc.schema.isEmpty) + assert(desc.viewText == None) // TODO will be SQLText + assert(desc.viewCatalogAndNamespace.isEmpty) + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2"))) + assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) + assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) + assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22"))) + } + + test("CTAS statement with a PARTITIONED BY clause is not allowed") { + assertUnsupported(s"CREATE TABLE ctas1 PARTITIONED BY (k int)" + + " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp") + } + + test("CTAS statement with schema") { + assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src") + assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'") + } + + test("create table - basic") { + val query = "CREATE TABLE my_table (id int, name string)" + val (desc, allowExisting) = extractTableDesc(query) + assert(!allowExisting) + assert(desc.identifier.database == Some("default")) + assert(desc.identifier.table == "my_table") + assert(desc.tableType == CatalogTableType.MANAGED) + assert(desc.schema == new StructType().add("id", "int").add("name", "string")) + assert(desc.partitionColumnNames.isEmpty) + assert(desc.bucketSpec.isEmpty) + assert(desc.viewText.isEmpty) + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.storage.locationUri.isEmpty) + assert(desc.storage.inputFormat == + Some("org.apache.hadoop.mapred.TextInputFormat")) + assert(desc.storage.outputFormat == + Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")) + assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + assert(desc.storage.properties.isEmpty) + assert(desc.properties.isEmpty) + assert(desc.comment.isEmpty) + } + + test("create table - with database name") { + val query = "CREATE TABLE dbx.my_table (id int, name string)" + val (desc, _) = extractTableDesc(query) + assert(desc.identifier.database == Some("dbx")) + assert(desc.identifier.table == "my_table") + } + + test("create table - temporary") { + val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)" + val e = intercept[ParseException] { parsePlan(query) } + assert(e.message.contains("Operation not allowed: CREATE TEMPORARY TABLE")) + } + + test("create table - external") { + val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'" + val (desc, _) = extractTableDesc(query) + assert(desc.tableType == CatalogTableType.EXTERNAL) + assert(desc.storage.locationUri == Some(new URI("/path/to/nowhere"))) + } + + test("create table - if not exists") { + val query = "CREATE TABLE IF NOT EXISTS tab1 (id int, name string)" + val (_, allowExisting) = extractTableDesc(query) + assert(allowExisting) + } + + test("create table - comment") { + val query = "CREATE TABLE my_table (id int, name string) COMMENT 'its hot as hell below'" + val (desc, _) = extractTableDesc(query) + assert(desc.comment == Some("its hot as hell below")) + } + + test("create table - partitioned columns") { + val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (month int)" + val (desc, _) = extractTableDesc(query) + assert(desc.schema == new StructType() + .add("id", "int") + .add("name", "string") + .add("month", "int")) + assert(desc.partitionColumnNames == Seq("month")) + } + + test("create table - clustered by") { + val numBuckets = 10 + val bucketedColumn = "id" + val sortColumn = "id" + val baseQuery = + s""" + CREATE TABLE my_table ( + $bucketedColumn int, + name string) + CLUSTERED BY($bucketedColumn) + """ + + val query1 = s"$baseQuery INTO $numBuckets BUCKETS" + val (desc1, _) = extractTableDesc(query1) + assert(desc1.bucketSpec.isDefined) + val bucketSpec1 = desc1.bucketSpec.get + assert(bucketSpec1.numBuckets == numBuckets) + assert(bucketSpec1.bucketColumnNames.head.equals(bucketedColumn)) + assert(bucketSpec1.sortColumnNames.isEmpty) + + val query2 = s"$baseQuery SORTED BY($sortColumn) INTO $numBuckets BUCKETS" + val (desc2, _) = extractTableDesc(query2) + assert(desc2.bucketSpec.isDefined) + val bucketSpec2 = desc2.bucketSpec.get + assert(bucketSpec2.numBuckets == numBuckets) + assert(bucketSpec2.bucketColumnNames.head.equals(bucketedColumn)) + assert(bucketSpec2.sortColumnNames.head.equals(sortColumn)) + } + + test("create table(hive) - skewed by") { + val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY" + val query1 = s"$baseQuery(id) ON (1, 10, 100)" + val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))" + val query3 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z')) STORED AS DIRECTORIES" + val e1 = intercept[ParseException] { parsePlan(query1) } + val e2 = intercept[ParseException] { parsePlan(query2) } + val e3 = intercept[ParseException] { parsePlan(query3) } + assert(e1.getMessage.contains("Operation not allowed")) + assert(e2.getMessage.contains("Operation not allowed")) + assert(e3.getMessage.contains("Operation not allowed")) + } + + test("create table(hive) - row format") { + val baseQuery = "CREATE TABLE my_table (id int, name string) ROW FORMAT" + val query1 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff'" + val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')" + val query3 = + s""" + |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y' + |COLLECTION ITEMS TERMINATED BY 'a' + |MAP KEYS TERMINATED BY 'b' + |LINES TERMINATED BY '\n' + |NULL DEFINED AS 'c' + """.stripMargin + val (desc1, _) = extractTableDesc(query1) + val (desc2, _) = extractTableDesc(query2) + val (desc3, _) = extractTableDesc(query3) + assert(desc1.storage.serde == Some("org.apache.poof.serde.Baff")) + assert(desc1.storage.properties.isEmpty) + assert(desc2.storage.serde == Some("org.apache.poof.serde.Baff")) + assert(desc2.storage.properties == Map("k1" -> "v1")) + assert(desc3.storage.properties == Map( + "field.delim" -> "x", + "escape.delim" -> "y", + "serialization.format" -> "x", + "line.delim" -> "\n", + "colelction.delim" -> "a", // yes, it's a typo from Hive :) + "mapkey.delim" -> "b")) + } + + test("create table(hive) - file format") { + val baseQuery = "CREATE TABLE my_table (id int, name string) STORED AS" + val query1 = s"$baseQuery INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'" + val query2 = s"$baseQuery ORC" + val (desc1, _) = extractTableDesc(query1) + val (desc2, _) = extractTableDesc(query2) + assert(desc1.storage.inputFormat == Some("winput")) + assert(desc1.storage.outputFormat == Some("wowput")) + assert(desc1.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + assert(desc2.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")) + assert(desc2.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")) + assert(desc2.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + } + + test("create table(hive) - storage handler") { + val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY" + val query1 = s"$baseQuery 'org.papachi.StorageHandler'" + val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')" + val e1 = intercept[ParseException] { parsePlan(query1) } + val e2 = intercept[ParseException] { parsePlan(query2) } + assert(e1.getMessage.contains("Operation not allowed")) + assert(e2.getMessage.contains("Operation not allowed")) + } + + test("create table - properties") { + val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')" + parsePlan(query) match { + case state: CreateTableStatement => + assert(state.properties == Map("k1" -> "v1", "k2" -> "v2")) + } + } + + test("create table(hive) - everything!") { + val query = + """ + |CREATE EXTERNAL TABLE IF NOT EXISTS dbx.my_table (id int, name string) + |COMMENT 'no comment' + |PARTITIONED BY (month int) + |ROW FORMAT SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1') + |STORED AS INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput' + |LOCATION '/path/to/mercury' + |TBLPROPERTIES ('k1'='v1', 'k2'='v2') + """.stripMargin + val (desc, allowExisting) = extractTableDesc(query) + assert(allowExisting) + assert(desc.identifier.database == Some("dbx")) + assert(desc.identifier.table == "my_table") + assert(desc.tableType == CatalogTableType.EXTERNAL) + assert(desc.schema == new StructType() + .add("id", "int") + .add("name", "string") + .add("month", "int")) + assert(desc.partitionColumnNames == Seq("month")) + assert(desc.bucketSpec.isEmpty) + assert(desc.viewText.isEmpty) + assert(desc.viewCatalogAndNamespace.isEmpty) + assert(desc.viewQueryColumnNames.isEmpty) + assert(desc.storage.locationUri == Some(new URI("/path/to/mercury"))) + assert(desc.storage.inputFormat == Some("winput")) + assert(desc.storage.outputFormat == Some("wowput")) + assert(desc.storage.serde == Some("org.apache.poof.serde.Baff")) + assert(desc.storage.properties == Map("k1" -> "v1")) + assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2")) + assert(desc.comment == Some("no comment")) + } + // TODO: add tests for more commands. } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 983209051c8ae..00c599065ce31 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -166,13 +166,13 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession { ) }.getMessage assert(error.contains("Operation not allowed") && - error.contains("CREATE TEMPORARY TABLE ... USING ... AS query")) + error.contains("CREATE TEMPORARY TABLE")) } } test("disallows CREATE EXTERNAL TABLE ... USING ... AS query") { withTable("t") { - val error = intercept[ParseException] { + val error = intercept[AnalysisException] { sql( s""" |CREATE EXTERNAL TABLE t USING PARQUET diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 56b871644453b..b8b1da4cb9db7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -598,8 +598,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { sql("CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet") } - assert(e.message.contains("Must specify a data type for each partition column while creating " + - "Hive partitioned table.")) + assert(e.message.contains("partition column b is not defined in table")) } test("add/drop partition with location - managed table") { @@ -2701,8 +2700,7 @@ class HiveDDLSuite |AS SELECT 1 as a, "a" as b """.stripMargin) }.getMessage - assert(err1.contains("Schema may not be specified in a Create Table As Select " + - "(CTAS) statement")) + assert(err1.contains("Schema may not be specified in a Create Table As Select")) val err2 = intercept[ParseException] { spark.sql( @@ -2713,8 +2711,7 @@ class HiveDDLSuite |AS SELECT 1 as a, "a" as b """.stripMargin) }.getMessage - assert(err2.contains("Create Partitioned Table As Select cannot specify data type for " + - "the partition columns of the target table")) + assert(err2.contains("Partition column types may not be specified in Create Table As Select")) } test("Hive CTAS with dynamic partition") { @@ -2783,7 +2780,7 @@ class HiveDDLSuite |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' """.stripMargin) }.getMessage - assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'")) + assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")) // row format doesn't work with provider hive e = intercept[AnalysisException] { @@ -2794,7 +2791,7 @@ class HiveDDLSuite |WITH SERDEPROPERTIES ('test' = 'test') """.stripMargin) }.getMessage - assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'")) + assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")) // row format doesn't work without 'STORED AS' e = intercept[AnalysisException] { @@ -2807,6 +2804,17 @@ class HiveDDLSuite }.getMessage assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'")) + // 'INPUTFORMAT' and 'OUTPUTFORMAT' conflict with 'USING' + e = intercept[AnalysisException] { + spark.sql( + """ + |CREATE TABLE targetDsTable LIKE sourceDsTable USING format + |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat' + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + """.stripMargin) + }.getMessage + assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS")) + // row format works with STORED AS hive format (from hive table) spark.sql( """ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index 24b1e3405379c..f723c9f80c2ab 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -21,11 +21,10 @@ import java.net.URI import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession} +import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils} -import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -71,8 +70,8 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte } private def extractTableDesc(sql: String): (CatalogTable, Boolean) = { - TestHive.sessionState.sqlParser.parsePlan(sql).collect { - case CreateTable(tableDesc, mode, _) => (tableDesc, mode == SaveMode.Ignore) + TestHive.sessionState.analyzer.execute(TestHive.sessionState.sqlParser.parsePlan(sql)).collect { + case CreateTableCommand(tableDesc, ifNotExists) => (tableDesc, ifNotExists) }.head } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 712f81d98753e..79b3c3efe531c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -712,8 +712,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi |AS SELECT key, value FROM mytable1 """.stripMargin) }.getMessage - assert(e.contains("Create Partitioned Table As Select cannot specify data type for " + - "the partition columns of the target table")) + assert(e.contains("Partition column types may not be specified in Create Table As Select")) } } } From d691d85701adc3db3b7545b87065f2a5113c2b99 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 25 Nov 2020 23:15:52 +0800 Subject: [PATCH 0573/1009] [SPARK-33496][SQL] Improve error message of ANSI explicit cast ### What changes were proposed in this pull request? After https://github.com/apache/spark/pull/30260, there are some type conversions disallowed under ANSI mode. We should tell users what they can do if they have to use the disallowed casting. ### Why are the changes needed? Make it more user-friendly. ### Does this PR introduce _any_ user-facing change? Yes, the error message is improved on casting failure when ANSI mode is enabled ### How was this patch tested? Unit tests. Closes #30440 from gengliangwang/improveAnsiCastErrorMSG. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../spark/sql/catalyst/expressions/Cast.scala | 51 ++++++++++++++++++- .../sql/catalyst/expressions/CastSuite.scala | 38 ++++++++++++-- 2 files changed, 82 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e5f11b5e74916..e6f585cacc6c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -262,6 +262,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit */ def canCast(from: DataType, to: DataType): Boolean + /** + * Returns the error message if casting from one type to another one is invalid. + */ + def typeCheckFailureMessage: String + override def toString: String = { val ansi = if (ansiEnabled) "ansi_" else "" s"${ansi}cast($child as ${dataType.simpleString})" @@ -271,8 +276,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit if (canCast(child.dataType, dataType)) { TypeCheckResult.TypeCheckSuccess } else { - TypeCheckResult.TypeCheckFailure( - s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}") + TypeCheckResult.TypeCheckFailure(typeCheckFailureMessage) } } @@ -1755,6 +1759,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String } else { Cast.canCast(from, to) } + + override def typeCheckFailureMessage: String = if (ansiEnabled) { + AnsiCast.typeCheckFailureMessage(child.dataType, dataType, SQLConf.ANSI_ENABLED.key, "false") + } else { + s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}" + } } /** @@ -1774,6 +1784,14 @@ case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[St override protected val ansiEnabled: Boolean = true override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to) + + // For now, this expression is only used in table insertion. + // If there are more scenarios for this expression, we should update the error message on type + // check failure. + override def typeCheckFailureMessage: String = + AnsiCast.typeCheckFailureMessage(child.dataType, dataType, + SQLConf.STORE_ASSIGNMENT_POLICY.key, SQLConf.StoreAssignmentPolicy.LEGACY.toString) + } object AnsiCast { @@ -1876,6 +1894,35 @@ object AnsiCast { case _ => false } + + def typeCheckFailureMessage( + from: DataType, + to: DataType, + fallbackConfKey: String, + fallbackConfValue: String): String = + (from, to) match { + case (_: NumericType, TimestampType) => + // scalastyle:off line.size.limit + s""" + | cannot cast ${from.catalogString} to ${to.catalogString}. + | To convert values from ${from.catalogString} to ${to.catalogString}, you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead. + |""".stripMargin + + case (_: ArrayType, StringType) => + s""" + | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on. + | If you have to cast ${from.catalogString} to ${to.catalogString}, you can use the function ARRAY_JOIN or set $fallbackConfKey as $fallbackConfValue. + |""".stripMargin + + case _ if Cast.canCast(from, to) => + s""" + | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on. + | If you have to cast ${from.catalogString} to ${to.catalogString}, you can set $fallbackConfKey as $fallbackConfValue. + |""".stripMargin + + case _ => s"cannot cast ${from.catalogString} to ${to.catalogString}" + // scalastyle:on line.size.limit + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 2bc27ad35efff..f1fc921e401ba 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -25,6 +25,7 @@ import scala.collection.parallel.immutable.ParVector import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} @@ -841,12 +842,28 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented") } + protected def setConfigurationHint: String + + private def verifyCastFailure(c: CastBase, optionalExpectedMsg: Option[String] = None): Unit = { + val typeCheckResult = c.checkInputDataTypes() + assert(typeCheckResult.isFailure) + assert(typeCheckResult.isInstanceOf[TypeCheckFailure]) + val message = typeCheckResult.asInstanceOf[TypeCheckFailure].message + + if (optionalExpectedMsg.isDefined) { + assert(message.contains(optionalExpectedMsg.get)) + } else { + assert(message.contains("with ANSI mode on")) + assert(message.contains(setConfigurationHint)) + } + } + test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") { import DataTypeTestUtils.numericTypes checkInvalidCastFromNumericType(TimestampType) val timestampLiteral = Literal(1L, TimestampType) numericTypes.foreach { numericType => - assert(cast(timestampLiteral, numericType).checkInputDataTypes().isFailure) + verifyCastFailure(cast(timestampLiteral, numericType)) } } @@ -855,7 +872,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkInvalidCastFromNumericType(DateType) val dateLiteral = Literal(1, DateType) numericTypes.foreach { numericType => - assert(cast(dateLiteral, numericType).checkInputDataTypes().isFailure) + verifyCastFailure(cast(dateLiteral, numericType)) } } @@ -880,9 +897,9 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { } test("ANSI mode: disallow casting complex types as String type") { - assert(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType).checkInputDataTypes().isFailure) - assert(cast(Literal.create(Map(1 -> "a")), StringType).checkInputDataTypes().isFailure) - assert(cast(Literal.create((1, "a", 0.1)), StringType).checkInputDataTypes().isFailure) + verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)) + verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType)) + verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType)) } test("cast from invalid string to numeric should throw NumberFormatException") { @@ -1489,6 +1506,9 @@ class CastSuiteWithAnsiModeOn extends AnsiCastSuiteBase { case _ => Cast(Literal(v), targetType, timeZoneId) } } + + override def setConfigurationHint: String = + s"set ${SQLConf.ANSI_ENABLED.key} as false" } /** @@ -1511,6 +1531,10 @@ class AnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase { case _ => AnsiCast(Literal(v), targetType, timeZoneId) } } + + override def setConfigurationHint: String = + s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" + + s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}" } /** @@ -1533,4 +1557,8 @@ class AnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase { case _ => AnsiCast(Literal(v), targetType, timeZoneId) } } + + override def setConfigurationHint: String = + s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" + + s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}" } From 9643eab53e4bbaee08f7f8c766b0d1e0d9348d55 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 25 Nov 2020 08:55:39 -0800 Subject: [PATCH 0574/1009] [SPARK-33540][SQL] Subexpression elimination for interpreted predicate ### What changes were proposed in this pull request? This patch proposes to support subexpression elimination for interpreted predicate. ### Why are the changes needed? Similar to interpreted projection, there are use cases when codegen predicate is not able to work, e.g. too complex schema, non-codegen expression, etc. When there are frequently occurring expressions (subexpressions) among predicate expression, the performance is quite bad as we need to re-compute same expressions. We should be able to support subexpression elimination for interpreted predicate like interpreted projection. ### Does this PR introduce _any_ user-facing change? No, this doesn't change user behavior. ### How was this patch tested? Unit test and benchmark. Closes #30497 from viirya/SPARK-33540. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/expressions/predicates.scala | 19 +++++++++++++++++-- ...ExprEliminationBenchmark-jdk11-results.txt | 16 ++++++++-------- .../SubExprEliminationBenchmark-results.txt | 16 ++++++++-------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 53d6394d0d1f1..53ac3560bc3b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -46,11 +46,26 @@ abstract class BasePredicate { } case class InterpretedPredicate(expression: Expression) extends BasePredicate { - override def eval(r: InternalRow): Boolean = expression.eval(r).asInstanceOf[Boolean] + private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled + private[this] lazy val runtime = + new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries) + private[this] val expr = if (subExprEliminationEnabled) { + runtime.proxyExpressions(Seq(expression)).head + } else { + expression + } + + override def eval(r: InternalRow): Boolean = { + if (subExprEliminationEnabled) { + runtime.setInput(r) + } + + expr.eval(r).asInstanceOf[Boolean] + } override def initialize(partitionIndex: Int): Unit = { super.initialize(partitionIndex) - expression.foreach { + expr.foreach { case n: Nondeterministic => n.initialize(partitionIndex) case _ => } diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt index 1eb7b534d2194..a7f0acc3cdc86 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt @@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subExprElimination false, codegen: true 26447 27127 605 0.0 264467933.4 1.0X -subExprElimination false, codegen: false 25673 26035 546 0.0 256732419.1 1.0X -subExprElimination true, codegen: true 1384 1448 102 0.0 13842910.3 19.1X -subExprElimination true, codegen: false 1244 1347 123 0.0 12442389.3 21.3X +subExprElimination false, codegen: true 24827 25398 562 0.0 248271027.2 1.0X +subExprElimination false, codegen: false 25052 25704 625 0.0 250518603.6 1.0X +subExprElimination true, codegen: true 1540 1606 92 0.0 15403083.7 16.1X +subExprElimination true, codegen: false 1487 1535 53 0.0 14865051.6 16.7X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subexpressionElimination off, codegen on 34631 35449 833 0.0 346309884.0 1.0X -subexpressionElimination off, codegen on 34480 34851 353 0.0 344798490.4 1.0X -subexpressionElimination off, codegen on 16618 16811 291 0.0 166176642.6 2.1X -subexpressionElimination off, codegen on 34316 34667 310 0.0 343157094.7 1.0X +subexpressionElimination off, codegen on 37327 38261 809 0.0 373266387.0 1.0X +subexpressionElimination off, codegen on 36126 37445 1575 0.0 361263987.0 1.0X +subexpressionElimination off, codegen on 20152 21596 1263 0.0 201522903.8 1.9X +subexpressionElimination off, codegen on 20799 20940 233 0.0 207993923.0 1.8X diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt index 801f519ca76a1..e5f1bc14243e0 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt @@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subExprElimination false, codegen: true 22767 23240 424 0.0 227665316.7 1.0X -subExprElimination false, codegen: false 22869 23351 465 0.0 228693464.1 1.0X -subExprElimination true, codegen: true 1328 1340 10 0.0 13280056.2 17.1X -subExprElimination true, codegen: false 1248 1276 31 0.0 12476135.1 18.2X +subExprElimination false, codegen: true 23094 23763 585 0.0 230939301.2 1.0X +subExprElimination false, codegen: false 23161 24087 844 0.0 231611379.8 1.0X +subExprElimination true, codegen: true 1492 1517 30 0.0 14921022.9 15.5X +subExprElimination true, codegen: false 1300 1361 93 0.0 12996167.7 17.8X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subexpressionElimination off, codegen on 37691 38846 1004 0.0 376913767.9 1.0X -subexpressionElimination off, codegen on 37852 39124 1103 0.0 378517745.5 1.0X -subexpressionElimination off, codegen on 22900 23085 202 0.0 229000242.5 1.6X -subexpressionElimination off, codegen on 38298 38598 374 0.0 382978731.3 1.0X +subexpressionElimination off, codegen on 37069 37767 985 0.0 370694301.5 1.0X +subexpressionElimination off, codegen on 37095 37970 1008 0.0 370945081.6 1.0X +subexpressionElimination off, codegen on 20618 21443 715 0.0 206175173.8 1.8X +subexpressionElimination off, codegen on 21563 21887 307 0.0 215626274.7 1.7X From 7cf6a6f996e25754de13aa66badbe6d1d53efb36 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 25 Nov 2020 09:57:46 -0800 Subject: [PATCH 0575/1009] [SPARK-31257][SPARK-33561][SQL][FOLLOWUP] Fix Scala 2.13 compilation ### What changes were proposed in this pull request? This PR is a follow-up to fix Scala 2.13 compilation. ### Why are the changes needed? To support Scala 2.13 in Apache Spark 3.1. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the GitHub Action Scala 2.13 compilation job. Closes #30502 from dongjoon-hyun/SPARK-31257. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 ++- .../scala/org/apache/spark/sql/execution/SparkSqlParser.scala | 2 +- .../spark/sql/execution/datasources/v2/V2SessionCatalog.scala | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 25423e510157a..606d923061441 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2946,7 +2946,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val location = visitLocationSpecList(ctx.locationSpec()) val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location) val comment = visitCommentSpecList(ctx.commentSpec()) - val serdeInfo = getSerdeInfo(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx) + val serdeInfo = + getSerdeInfo(ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx) (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment, serdeInfo) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index a92f0775f1c05..568c7112954f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -440,7 +440,7 @@ class SparkSqlAstBuilder extends AstBuilder { val location = visitLocationSpecList(ctx.locationSpec()) // TODO: Do not skip serde check for CREATE TABLE LIKE. val serdeInfo = getSerdeInfo( - ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx, skipCheck = true) + ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx, skipCheck = true) if (provider.isDefined && serdeInfo.isDefined) { operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala index f330d6a8c99e2..a0bc65d3f9057 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala @@ -114,7 +114,7 @@ class V2SessionCatalog(catalog: SessionCatalog) private def toOptions(properties: Map[String, String]): Map[String, String] = { properties.filterKeys(_.startsWith(TableCatalog.OPTION_PREFIX)).map { case (key, value) => key.drop(TableCatalog.OPTION_PREFIX.length) -> value - } + }.toMap } override def alterTable( From 1de3fc42829187c54334df1fb2149dc4aeb78ed9 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 25 Nov 2020 12:37:59 -0800 Subject: [PATCH 0576/1009] [SPARK-33525][SQL] Update hive-service-rpc to 3.1.2 ### What changes were proposed in this pull request? We supported Hive metastore are 0.12.0 through 3.1.2, but we supported hive-jdbc are 0.12.0 through 2.3.7. It will throw `TProtocolException` if we use hive-jdbc 3.x: ``` [rootspark-3267648 apache-hive-3.1.2-bin]# bin/beeline -u jdbc:hive2://localhost:10000/default Connecting to jdbc:hive2://localhost:10000/default Connected to: Spark SQL (version 3.1.0-SNAPSHOT) Driver: Hive JDBC (version 3.1.2) Transaction isolation: TRANSACTION_REPEATABLE_READ Beeline version 3.1.2 by Apache Hive 0: jdbc:hive2://localhost:10000/default> create table t1(id int) using parquet; Unexpected end of file when reading from HS2 server. The root cause might be too many concurrent connections. Please ask the administrator to check the number of active connections, and adjust hive.server2.thrift.max.worker.threads if applicable. Error: org.apache.thrift.transport.TTransportException (state=08S01,code=0) ``` ``` org.apache.thrift.protocol.TProtocolException: Missing version in readMessageBegin, old client? at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:234) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:27) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:53) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:310) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) at java.base/java.lang.Thread.run(Thread.java:832) ``` This pr upgrade hive-service-rpc to 3.1.2 to fix this issue. ### Why are the changes needed? To support hive-jdbc 3.x. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test: ``` [rootspark-3267648 apache-hive-3.1.2-bin]# bin/beeline -u jdbc:hive2://localhost:10000/default Connecting to jdbc:hive2://localhost:10000/default Connected to: Spark SQL (version 3.1.0-SNAPSHOT) Driver: Hive JDBC (version 3.1.2) Transaction isolation: TRANSACTION_REPEATABLE_READ Beeline version 3.1.2 by Apache Hive 0: jdbc:hive2://localhost:10000/default> create table t1(id int) using parquet; +---------+ | Result | +---------+ +---------+ No rows selected (1.051 seconds) 0: jdbc:hive2://localhost:10000/default> insert into t1 values(1); +---------+ | Result | +---------+ +---------+ No rows selected (2.08 seconds) 0: jdbc:hive2://localhost:10000/default> select * from t1; +-----+ | id | +-----+ | 1 | +-----+ 1 row selected (0.605 seconds) ``` Closes #30478 from wangyum/SPARK-33525. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- .../apache/hive/service/cli/CLIService.java | 10 ++++++ .../apache/hive/service/cli/GetInfoType.java | 3 +- .../apache/hive/service/cli/ICLIService.java | 3 ++ .../cli/thrift/ThriftBinaryCLIService.java | 13 ++++++++ .../service/cli/thrift/ThriftCLIService.java | 31 +++++++++++++++++++ .../cli/thrift/ThriftCLIServiceClient.java | 9 ++++++ .../thriftserver/SparkSQLCLIService.scala | 1 + 10 files changed, 72 insertions(+), 4 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index bcf05506855c5..8802220726f78 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -88,7 +88,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar hive-metastore/2.3.7//hive-metastore-2.3.7.jar hive-serde/2.3.7//hive-serde-2.3.7.jar -hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar +hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index cd274bef7045b..d45eeea0ee92b 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -65,7 +65,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar hive-metastore/2.3.7//hive-metastore-2.3.7.jar hive-serde/2.3.7//hive-serde-2.3.7.jar -hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar +hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar diff --git a/pom.xml b/pom.xml index e5b1f30edd3be..cd7e1767d6b18 100644 --- a/pom.xml +++ b/pom.xml @@ -2088,7 +2088,7 @@ ${hive.group} hive-service-rpc - ${hive.version} + 3.1.2 * diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java index bdc1e6251e560..68f044c6a0f28 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java @@ -45,6 +45,7 @@ import org.apache.hive.service.cli.operation.Operation; import org.apache.hive.service.cli.session.HiveSession; import org.apache.hive.service.cli.session.SessionManager; +import org.apache.hive.service.rpc.thrift.TOperationHandle; import org.apache.hive.service.rpc.thrift.TProtocolVersion; import org.apache.hive.service.server.HiveServer2; import org.slf4j.Logger; @@ -567,6 +568,15 @@ public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory au LOG.info(sessionHandle + ": renewDelegationToken()"); } + @Override + public String getQueryId(TOperationHandle opHandle) throws HiveSQLException { + Operation operation = sessionManager.getOperationManager().getOperation( + new OperationHandle(opHandle)); + final String queryId = operation.getParentSession().getHiveConf().getVar(ConfVars.HIVEQUERYID); + LOG.debug(opHandle + ": getQueryId() " + queryId); + return queryId; + } + public SessionManager getSessionManager() { return sessionManager; } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java index a64d262a8f301..575dff8f8f47b 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java @@ -72,7 +72,8 @@ public enum GetInfoType { CLI_DESCRIBE_PARAMETER(TGetInfoType.CLI_DESCRIBE_PARAMETER), CLI_CATALOG_NAME(TGetInfoType.CLI_CATALOG_NAME), CLI_COLLATION_SEQ(TGetInfoType.CLI_COLLATION_SEQ), - CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN); + CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN), + CLI_ODBC_KEYWORDS(TGetInfoType.CLI_ODBC_KEYWORDS); private final TGetInfoType tInfoType; diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java index 3200909477821..a87c6691ebac7 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java @@ -24,6 +24,7 @@ import org.apache.hive.service.auth.HiveAuthFactory; +import org.apache.hive.service.rpc.thrift.TOperationHandle; public interface ICLIService { @@ -98,6 +99,8 @@ RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, String owner, String renewer) throws HiveSQLException; + String getQueryId(TOperationHandle operationHandle) throws HiveSQLException; + void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory, String tokenStr) throws HiveSQLException; diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index ce79e3c8228a6..ffca1070d0047 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -32,7 +32,11 @@ import org.apache.hive.service.ServiceException; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.cli.CLIService; +import org.apache.hive.service.cli.HiveSQLException; +import org.apache.hive.service.rpc.thrift.TGetQueryIdReq; +import org.apache.hive.service.rpc.thrift.TGetQueryIdResp; import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup; +import org.apache.thrift.TException; import org.apache.thrift.TProcessorFactory; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.server.TThreadPoolServer; @@ -107,6 +111,15 @@ protected void initializeServer() { } } + @Override + public TGetQueryIdResp GetQueryId(TGetQueryIdReq req) throws TException { + try { + return new TGetQueryIdResp(cliService.getQueryId(req.getOperationHandle())); + } catch (HiveSQLException e) { + throw new TException(e); + } + } + @Override public void run() { try { diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index ea9ed57410045..150f1d60fc466 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -262,6 +262,28 @@ public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException { return resp; } + @Override + public TSetClientInfoResp SetClientInfo(TSetClientInfoReq req) throws TException { + // TODO: We don't do anything for now, just log this for debugging. + // We may be able to make use of this later, e.g. for workload management. + if (req.isSetConfiguration()) { + StringBuilder sb = null; + for (Map.Entry e : req.getConfiguration().entrySet()) { + if (sb == null) { + SessionHandle sh = new SessionHandle(req.getSessionHandle()); + sb = new StringBuilder("Client information for ").append(sh).append(": "); + } else { + sb.append(", "); + } + sb.append(e.getKey()).append(" = ").append(e.getValue()); + } + if (sb != null) { + LOG.info("{}", sb); + } + } + return new TSetClientInfoResp(OK_STATUS); + } + private String getIpAddress() { String clientIpAddress; // Http transport mode. @@ -674,6 +696,15 @@ public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) protected abstract void initializeServer(); + @Override + public TGetQueryIdResp GetQueryId(TGetQueryIdReq req) throws TException { + try { + return new TGetQueryIdResp(cliService.getQueryId(req.getOperationHandle())); + } catch (HiveSQLException e) { + throw new TException(e); + } + } + @Override public abstract void run(); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java index b13ddf72f77e7..0e81e4446caac 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java @@ -490,4 +490,13 @@ public OperationHandle getCrossReference(SessionHandle sessionHandle, throw new HiveSQLException(e); } } + + @Override + public String getQueryId(TOperationHandle operationHandle) throws HiveSQLException { + try { + return cliService.GetQueryId(new TGetQueryIdReq(operationHandle)).getQueryId(); + } catch (TException e) { + throw new HiveSQLException(e); + } + } } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala index df0fa514ccff3..e9420ad21bebd 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala @@ -104,6 +104,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) + case GetInfoType.CLI_ODBC_KEYWORDS => new GetInfoValue("Unimplemented") case _ => super.getInfo(sessionHandle, getInfoType) } } From c529426d872c6f09b05679ba76478e3b932e3696 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 25 Nov 2020 15:15:50 -0800 Subject: [PATCH 0577/1009] [SPARK-33565][BUILD][PYTHON] remove python3.8 and fix breakage ### What changes were proposed in this pull request? remove python 3.8 from python/run-tests.py and stop build breaks ### Why are the changes needed? the python tests are running against the bare-bones system install of python3, rather than an anaconda environment. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? via jenkins Closes #30506 from shaneknapp/remove-py38. Authored-by: shane knapp Signed-off-by: shane knapp --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index 712f38fb81b83..34800b0e9fa54 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "python3.8", "pypy3"] if which(x)] + python_execs = [x for x in ["python3.6", "pypy3"] if which(x)] if "python3.6" not in python_execs: p = which("python3") From fb7b87021437c52d72ad276f92c8d6f5443ebd78 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 25 Nov 2020 15:22:47 -0800 Subject: [PATCH 0578/1009] [SPARK-33523][SQL][TEST][FOLLOWUP] Fix benchmark case name in SubExprEliminationBenchmark ### What changes were proposed in this pull request? Fix the wrong benchmark case name. ### Why are the changes needed? The last commit to refactor the benchmark code missed a change of case name. ### Does this PR introduce _any_ user-facing change? No, dev only. ### How was this patch tested? Unit test. Closes #30505 from viirya/SPARK-33523-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- ...SubExprEliminationBenchmark-jdk11-results.txt | 16 ++++++++-------- .../SubExprEliminationBenchmark-results.txt | 16 ++++++++-------- .../execution/SubExprEliminationBenchmark.scala | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt index a7f0acc3cdc86..5eeb485a921b8 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt @@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subExprElimination false, codegen: true 24827 25398 562 0.0 248271027.2 1.0X -subExprElimination false, codegen: false 25052 25704 625 0.0 250518603.6 1.0X -subExprElimination true, codegen: true 1540 1606 92 0.0 15403083.7 16.1X -subExprElimination true, codegen: false 1487 1535 53 0.0 14865051.6 16.7X +subExprElimination false, codegen: true 22482 23194 652 0.0 224817884.1 1.0X +subExprElimination false, codegen: false 22544 22658 155 0.0 225436869.9 1.0X +subExprElimination true, codegen: true 1371 1403 34 0.0 13710714.3 16.4X +subExprElimination true, codegen: false 1295 1317 20 0.0 12949824.3 17.4X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subexpressionElimination off, codegen on 37327 38261 809 0.0 373266387.0 1.0X -subexpressionElimination off, codegen on 36126 37445 1575 0.0 361263987.0 1.0X -subexpressionElimination off, codegen on 20152 21596 1263 0.0 201522903.8 1.9X -subexpressionElimination off, codegen on 20799 20940 233 0.0 207993923.0 1.8X +subExprElimination false, codegen: true 34976 35331 326 0.0 349759975.5 1.0X +subExprElimination false, codegen: false 34101 34802 607 0.0 341014685.7 1.0X +subExprElimination true, codegen: true 19440 19622 272 0.0 194402251.0 1.8X +subExprElimination true, codegen: false 19247 20064 719 0.0 192466667.6 1.8X diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt index e5f1bc14243e0..49a107f542857 100644 --- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt +++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt @@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Project: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subExprElimination false, codegen: true 23094 23763 585 0.0 230939301.2 1.0X -subExprElimination false, codegen: false 23161 24087 844 0.0 231611379.8 1.0X -subExprElimination true, codegen: true 1492 1517 30 0.0 14921022.9 15.5X -subExprElimination true, codegen: false 1300 1361 93 0.0 12996167.7 17.8X +subExprElimination false, codegen: true 25399 25869 466 0.0 253992369.6 1.0X +subExprElimination false, codegen: false 24086 25094 888 0.0 240858699.5 1.1X +subExprElimination true, codegen: true 1527 1600 64 0.0 15274388.8 16.6X +subExprElimination true, codegen: false 1560 1600 52 0.0 15597825.4 16.3X Preparing data for benchmarking ... OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz from_json as subExpr in Filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -subexpressionElimination off, codegen on 37069 37767 985 0.0 370694301.5 1.0X -subexpressionElimination off, codegen on 37095 37970 1008 0.0 370945081.6 1.0X -subexpressionElimination off, codegen on 20618 21443 715 0.0 206175173.8 1.8X -subexpressionElimination off, codegen on 21563 21887 307 0.0 215626274.7 1.7X +subExprElimination false, codegen: true 39661 40585 844 0.0 396612867.5 1.0X +subExprElimination false, codegen: false 40633 48813 1858 0.0 406328241.3 1.0X +subExprElimination true, codegen: true 25819 27096 1174 0.0 258194064.4 1.5X +subExprElimination true, codegen: false 23467 25137 1447 0.0 234668398.2 1.7X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala index e26acbcb3cd21..0ed0126add7a2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala @@ -100,7 +100,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark { // We only benchmark subexpression performance under codegen/non-codegen, so disabling // json optimization. val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled" - benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ => + benchmark.addCase(caseName, numIters) { _ => withSQLConf( SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled, SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled, From 919ea45e89b17d2f9b336dc4bfe6e15e8a083ed3 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 26 Nov 2020 10:19:38 +0900 Subject: [PATCH 0579/1009] [SPARK-33562][UI] Improve the style of the checkbox in executor page ### What changes were proposed in this pull request? 1. Remove the fixed width style of class `container-fluid-div`. So that the UI looks clean when the text is long. 2. Add one space between a checkbox and the text on the right side, which is consistent with the stage page. ### Why are the changes needed? The width of class `container-fluid-div` is set as 200px after https://github.com/apache/spark/pull/21688 . This makes the checkbox in the executor page messy. ![image](https://user-images.githubusercontent.com/1097932/100242069-3bc5ab80-2ee9-11eb-8c7d-96c221398fee.png) We should remove the width limit. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test. After the changes: ![image](https://user-images.githubusercontent.com/1097932/100257802-2f4a4e80-2efb-11eb-9eb0-92d6988ad14b.png) Closes #30500 from gengliangwang/reviseStyle. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- .../apache/spark/ui/static/executorspage.js | 18 +++++++++--------- .../org/apache/spark/ui/static/webui.css | 4 ---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index 4f179a93c9d5f..1d3f628f5fab6 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -608,15 +608,15 @@ $(document).ready(function () { "Show Additional Metrics" + "" + "

    " + - "
    Select All
    " + - "
    On Heap Memory
    " + - "
    Off Heap Memory
    " + - "
    Peak JVM Memory OnHeap / OffHeap
    " + - "
    Peak Execution Memory OnHeap / OffHeap
    " + - "
    Peak Storage Memory OnHeap / OffHeap
    " + - "
    Peak Pool Memory Direct / Mapped
    " + - "
    Resources
    " + - "
    Resource Profile Id
    " + + "
    Select All
    " + + "
    On Heap Memory
    " + + "
    Off Heap Memory
    " + + "
    Peak JVM Memory OnHeap / OffHeap
    " + + "
    Peak Execution Memory OnHeap / OffHeap
    " + + "
    Peak Storage Memory OnHeap / OffHeap
    " + + "
    Peak Pool Memory Direct / Mapped
    " + + "
    Resources
    " + + "
    Resource Profile Id
    " + "
    "); reselectCheckboxesBasedOnTaskTableState(); diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index d4394ebcfd258..262cee7b58aff 100755 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -321,10 +321,6 @@ a.expandbutton { width: 100%; } -.container-fluid-div { - width: 200px; -} - .select-all-div-checkbox-div { width: 90px; } From ed9e6fc18236ef6994c7f24a4017cf43f77b7ca1 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 26 Nov 2020 11:42:12 +0900 Subject: [PATCH 0580/1009] [SPARK-33565][INFRA][FOLLOW-UP] Keep the test coverage with Python 3.8 in GitHub Actions ### What changes were proposed in this pull request? This PR proposes to keep the test coverage with Python 3.8 in GitHub Actions. It is not tested for now in Jenkins due to an env issue. **Before this change in GitHub Actions:** ``` ======================================================================== Running PySpark tests ======================================================================== Running PySpark tests. Output is in /__w/spark/spark/python/unit-tests.log Will test against the following Python executables: ['python3.6', 'pypy3'] ... ``` **After this change in GitHub Actions:** ``` ======================================================================== Running PySpark tests ======================================================================== Running PySpark tests. Output is in /__w/spark/spark/python/unit-tests.log Will test against the following Python executables: ['python3.6', 'python3.8', 'pypy3'] ``` ### Why are the changes needed? To keep the test coverage with Python 3.8 in GitHub Actions. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions in this build will test. Closes #30510 from HyukjinKwon/SPARK-33565. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/run-tests.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dev/run-tests.py b/dev/run-tests.py index 5bdbc0ffb850c..6bc73ca3669f3 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -483,6 +483,12 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): if test_modules != [modules.root]: command.append("--modules=%s" % ','.join(m.name for m in test_modules)) command.append("--parallelism=%i" % parallelism) + if "GITHUB_ACTIONS" in os.environ: + # See SPARK-33565. Python 3.8 was temporarily removed as its default Python executables + # to test because of Jenkins environment issue. Once Jenkins has Python 3.8 to test, + # we should remove this change back and add python3.8 into python/run-tests.py script. + command.append("--python-executable=%s" % ','.join( + x for x in ["python3.6", "python3.8", "pypy3"] if which(x))) run_cmd(command) if with_coverage: From dfa3978d9191e02eabf65d1829c970644d25d57e Mon Sep 17 00:00:00 2001 From: Maryann Xue Date: Wed, 25 Nov 2020 19:32:22 -0800 Subject: [PATCH 0581/1009] [SPARK-33551][SQL] Do not use custom shuffle reader for repartition ### What changes were proposed in this pull request? This PR fixes an AQE issue where local shuffle reader, partition coalescing, or skew join optimization can be mistakenly applied to a shuffle introduced by repartition or a regular shuffle that logically replaces a repartition shuffle. The proposed solution checks for the presence of any repartition shuffle and filters out not applicable optimization rules for the final stage in an AQE plan. ### Why are the changes needed? Without the change, the output of a repartition query may not be correct. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UT. Closes #30494 from maryannxue/csr-repartition. Authored-by: Maryann Xue Signed-off-by: Xiao Li --- .../apache/spark/sql/internal/SQLConf.scala | 2 +- .../adaptive/AdaptiveSparkPlanExec.scala | 31 +++-- .../adaptive/CoalesceShufflePartitions.scala | 11 +- .../adaptive/CustomShuffleReaderRule.scala | 33 +++++ .../adaptive/OptimizeLocalShuffleReader.scala | 9 +- .../adaptive/OptimizeSkewedJoin.scala | 14 ++- .../adaptive/AdaptiveQueryExecSuite.scala | 116 +++++++++++++++++- 7 files changed, 187 insertions(+), 29 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0738478888aeb..add9a1d0f3aa6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -509,7 +509,7 @@ object SQLConf { "'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes'") .version("3.0.0") .intConf - .checkValue(_ > 0, "The skew factor must be positive.") + .checkValue(_ >= 0, "The skew factor cannot be negative.") .createWithDefault(5) val SKEW_JOIN_SKEWED_PARTITION_THRESHOLD = diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 570edbf5f78a3..89d3b53510469 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -37,8 +37,6 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._ import org.apache.spark.sql.execution.bucketing.DisableUnnecessaryBucketedScan -import org.apache.spark.sql.execution.command.DataWritingCommandExec -import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange._ import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric} import org.apache.spark.sql.internal.SQLConf @@ -104,16 +102,6 @@ case class AdaptiveSparkPlanExec( OptimizeLocalShuffleReader ) - private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] = - context.qe.sparkPlan match { - case _: DataWritingCommandExec | _: V2TableWriteExec => - // SPARK-32932: Local shuffle reader could break partitioning that works best - // for the following writing command - queryStageOptimizerRules.filterNot(_ == OptimizeLocalShuffleReader) - case _ => - queryStageOptimizerRules - } - // A list of physical optimizer rules to be applied right after a new stage is created. The input // plan to these rules has exchange as its root node. @transient private val postStageCreationRules = Seq( @@ -121,6 +109,23 @@ case class AdaptiveSparkPlanExec( CollapseCodegenStages() ) + // The partitioning of the query output depends on the shuffle(s) in the final stage. If the + // original plan contains a repartition operator, we need to preserve the specified partitioning, + // whether or not the repartition-introduced shuffle is optimized out because of an underlying + // shuffle of the same partitioning. Thus, we need to exclude some `CustomShuffleReaderRule`s + // from the final stage, depending on the presence and properties of repartition operators. + private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] = { + val origins = inputPlan.collect { + case s: ShuffleExchangeLike => s.shuffleOrigin + } + val allRules = queryStageOptimizerRules ++ postStageCreationRules + allRules.filter { + case c: CustomShuffleReaderRule => + origins.forall(c.supportedShuffleOrigins.contains) + case _ => true + } + } + @transient private val costEvaluator = SimpleCostEvaluator @transient private val initialPlan = context.session.withActive { @@ -249,7 +254,7 @@ case class AdaptiveSparkPlanExec( // Run the final plan when there's no more unfinished stages. currentPhysicalPlan = applyPhysicalRules( result.newPlan, - finalStageOptimizerRules ++ postStageCreationRules, + finalStageOptimizerRules, Some((planChangeLogger, "AQE Final Query Stage Optimization"))) isFinalPlan = true executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan))) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala index 0cf3ab0cca49a..0f482142227d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala @@ -19,16 +19,18 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.physical.SinglePartition -import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike} +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike, ShuffleOrigin} import org.apache.spark.sql.internal.SQLConf /** * A rule to coalesce the shuffle partitions based on the map output statistics, which can * avoid many small reduce tasks that hurt performance. */ -case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPlan] { +case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffleReaderRule { + + override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS, REPARTITION) + override def apply(plan: SparkPlan): SparkPlan = { if (!conf.coalesceShufflePartitionsEnabled) { return plan @@ -86,7 +88,6 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl } private def supportCoalesce(s: ShuffleExchangeLike): Boolean = { - s.outputPartitioning != SinglePartition && - (s.shuffleOrigin == ENSURE_REQUIREMENTS || s.shuffleOrigin == REPARTITION) + s.outputPartitioning != SinglePartition && supportedShuffleOrigins.contains(s.shuffleOrigin) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala new file mode 100644 index 0000000000000..c5b8f73ea59d3 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.adaptive + +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.exchange.ShuffleOrigin + +/** + * Adaptive Query Execution rule that may create [[CustomShuffleReaderExec]] on top of query stages. + */ +trait CustomShuffleReaderRule extends Rule[SparkPlan] { + + /** + * Returns the list of [[ShuffleOrigin]]s supported by this rule. + */ + def supportedShuffleOrigins: Seq[ShuffleOrigin] +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala index 8f57947cb6396..4dc982d666d18 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala @@ -19,9 +19,8 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans.physical.SinglePartition -import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ -import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike} +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin} import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.internal.SQLConf @@ -34,7 +33,9 @@ import org.apache.spark.sql.internal.SQLConf * then run `EnsureRequirements` to check whether additional shuffle introduced. * If introduced, we will revert all the local readers. */ -object OptimizeLocalShuffleReader extends Rule[SparkPlan] { +object OptimizeLocalShuffleReader extends CustomShuffleReaderRule { + + override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS) private val ensureRequirements = EnsureRequirements @@ -144,6 +145,6 @@ object OptimizeLocalShuffleReader extends Rule[SparkPlan] { } private def supportLocalReader(s: ShuffleExchangeLike): Boolean = { - s.outputPartitioning != SinglePartition && s.shuffleOrigin == ENSURE_REQUIREMENTS + s.outputPartitioning != SinglePartition && supportedShuffleOrigins.contains(s.shuffleOrigin) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala index 582d586c59358..085934d906b3c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala @@ -23,9 +23,8 @@ import org.apache.commons.io.FileUtils import org.apache.spark.{MapOutputStatistics, MapOutputTrackerMaster, SparkEnv} import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ -import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ShuffleExchangeExec} +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleOrigin} import org.apache.spark.sql.execution.joins.SortMergeJoinExec import org.apache.spark.sql.internal.SQLConf @@ -53,7 +52,9 @@ import org.apache.spark.sql.internal.SQLConf * Note that, when this rule is enabled, it also coalesces non-skewed partitions like * `CoalesceShufflePartitions` does. */ -object OptimizeSkewedJoin extends Rule[SparkPlan] { +object OptimizeSkewedJoin extends CustomShuffleReaderRule { + + override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS) private val ensureRequirements = EnsureRequirements @@ -290,7 +291,9 @@ object OptimizeSkewedJoin extends Rule[SparkPlan] { private object ShuffleStage { def unapply(plan: SparkPlan): Option[ShuffleStageInfo] = plan match { - case s: ShuffleQueryStageExec if s.mapStats.isDefined => + case s: ShuffleQueryStageExec + if s.mapStats.isDefined && + OptimizeSkewedJoin.supportedShuffleOrigins.contains(s.shuffle.shuffleOrigin) => val mapStats = s.mapStats.get val sizes = mapStats.bytesByPartitionId val partitions = sizes.zipWithIndex.map { @@ -299,7 +302,8 @@ private object ShuffleStage { Some(ShuffleStageInfo(s, mapStats, partitions)) case CustomShuffleReaderExec(s: ShuffleQueryStageExec, partitionSpecs) - if s.mapStats.isDefined && partitionSpecs.nonEmpty => + if s.mapStats.isDefined && partitionSpecs.nonEmpty && + OptimizeSkewedJoin.supportedShuffleOrigins.contains(s.shuffle.shuffleOrigin) => val mapStats = s.mapStats.get val sizes = mapStats.bytesByPartitionId val partitions = partitionSpecs.map { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 758965954b374..45ba2202d83d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, QueryExecuti import org.apache.spark.sql.execution.command.DataWritingCommandExec import org.apache.spark.sql.execution.datasources.noop.NoopDataSource import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec -import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, ReusedExchangeExec, ShuffleExchangeExec} +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, REPARTITION, REPARTITION_WITH_NUM, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike} import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate import org.apache.spark.sql.functions._ @@ -1317,4 +1317,118 @@ class AdaptiveQueryExecSuite checkNumLocalShuffleReaders(df.queryExecution.executedPlan, numShufflesWithoutLocalReader = 1) } } + + test("SPARK-33551: Do not use custom shuffle reader for repartition") { + def hasRepartitionShuffle(plan: SparkPlan): Boolean = { + find(plan) { + case s: ShuffleExchangeLike => + s.shuffleOrigin == REPARTITION || s.shuffleOrigin == REPARTITION_WITH_NUM + case _ => false + }.isDefined + } + + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", + SQLConf.SHUFFLE_PARTITIONS.key -> "5") { + val df = sql( + """ + |SELECT * FROM ( + | SELECT * FROM testData WHERE key = 1 + |) + |RIGHT OUTER JOIN testData2 + |ON value = b + """.stripMargin) + + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") { + // Repartition with no partition num specified. + val dfRepartition = df.repartition('b) + dfRepartition.collect() + val plan = dfRepartition.queryExecution.executedPlan + // The top shuffle from repartition is optimized out. + assert(!hasRepartitionShuffle(plan)) + val bhj = findTopLevelBroadcastHashJoin(plan) + assert(bhj.length == 1) + checkNumLocalShuffleReaders(plan, 1) + // Probe side is coalesced. + val customReader = bhj.head.right.find(_.isInstanceOf[CustomShuffleReaderExec]) + assert(customReader.isDefined) + assert(customReader.get.asInstanceOf[CustomShuffleReaderExec].hasCoalescedPartition) + + // Repartition with partition default num specified. + val dfRepartitionWithNum = df.repartition(5, 'b) + dfRepartitionWithNum.collect() + val planWithNum = dfRepartitionWithNum.queryExecution.executedPlan + // The top shuffle from repartition is optimized out. + assert(!hasRepartitionShuffle(planWithNum)) + val bhjWithNum = findTopLevelBroadcastHashJoin(planWithNum) + assert(bhjWithNum.length == 1) + checkNumLocalShuffleReaders(planWithNum, 1) + // Probe side is not coalesced. + assert(bhjWithNum.head.right.find(_.isInstanceOf[CustomShuffleReaderExec]).isEmpty) + + // Repartition with partition non-default num specified. + val dfRepartitionWithNum2 = df.repartition(3, 'b) + dfRepartitionWithNum2.collect() + val planWithNum2 = dfRepartitionWithNum2.queryExecution.executedPlan + // The top shuffle from repartition is not optimized out, and this is the only shuffle that + // does not have local shuffle reader. + assert(hasRepartitionShuffle(planWithNum2)) + val bhjWithNum2 = findTopLevelBroadcastHashJoin(planWithNum2) + assert(bhjWithNum2.length == 1) + checkNumLocalShuffleReaders(planWithNum2, 1) + val customReader2 = bhjWithNum2.head.right.find(_.isInstanceOf[CustomShuffleReaderExec]) + assert(customReader2.isDefined) + assert(customReader2.get.asInstanceOf[CustomShuffleReaderExec].isLocalReader) + } + + // Force skew join + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.SKEW_JOIN_ENABLED.key -> "true", + SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "1", + SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key -> "0", + SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "10") { + // Repartition with no partition num specified. + val dfRepartition = df.repartition('b) + dfRepartition.collect() + val plan = dfRepartition.queryExecution.executedPlan + // The top shuffle from repartition is optimized out. + assert(!hasRepartitionShuffle(plan)) + val smj = findTopLevelSortMergeJoin(plan) + assert(smj.length == 1) + // No skew join due to the repartition. + assert(!smj.head.isSkewJoin) + // Both sides are coalesced. + val customReaders = collect(smj.head) { + case c: CustomShuffleReaderExec if c.hasCoalescedPartition => c + } + assert(customReaders.length == 2) + + // Repartition with default partition num specified. + val dfRepartitionWithNum = df.repartition(5, 'b) + dfRepartitionWithNum.collect() + val planWithNum = dfRepartitionWithNum.queryExecution.executedPlan + // The top shuffle from repartition is optimized out. + assert(!hasRepartitionShuffle(planWithNum)) + val smjWithNum = findTopLevelSortMergeJoin(planWithNum) + assert(smjWithNum.length == 1) + // No skew join due to the repartition. + assert(!smjWithNum.head.isSkewJoin) + // No coalesce due to the num in repartition. + val customReadersWithNum = collect(smjWithNum.head) { + case c: CustomShuffleReaderExec if c.hasCoalescedPartition => c + } + assert(customReadersWithNum.isEmpty) + + // Repartition with default non-partition num specified. + val dfRepartitionWithNum2 = df.repartition(3, 'b) + dfRepartitionWithNum2.collect() + val planWithNum2 = dfRepartitionWithNum2.queryExecution.executedPlan + // The top shuffle from repartition is not optimized out. + assert(hasRepartitionShuffle(planWithNum2)) + val smjWithNum2 = findTopLevelSortMergeJoin(planWithNum2) + assert(smjWithNum2.length == 1) + // Skew join can apply as the repartition is not optimized out. + assert(smjWithNum2.head.isSkewJoin) + } + } + } } From d082ad0abfe0bc26760626ae0ecb415a8d508a1f Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 27 Nov 2020 11:00:09 +0900 Subject: [PATCH 0582/1009] [SPARK-33563][PYTHON][R][SQL] Expose inverse hyperbolic trig functions in PySpark and SparkR ### What changes were proposed in this pull request? This PR adds the following functions (introduced in Scala API with SPARK-33061): - `acosh` - `asinh` - `atanh` to Python and R. ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? New functions. ### How was this patch tested? New unit tests. Closes #30501 from zero323/SPARK-33563. Authored-by: zero323 Signed-off-by: HyukjinKwon --- R/pkg/NAMESPACE | 3 ++ R/pkg/R/functions.R | 39 ++++++++++++++++++++ R/pkg/tests/fulltests/test_sparkSQL.R | 1 + python/docs/source/reference/pyspark.sql.rst | 4 +- python/pyspark/sql/functions.py | 39 ++++++++++++++++++++ python/pyspark/sql/functions.pyi | 3 ++ python/pyspark/sql/tests/test_functions.py | 16 ++++++++ 7 files changed, 104 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b927a6b96b810..91f6e6dc8a0e6 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -202,6 +202,7 @@ exportMethods("%<=>%", "%in%", "abs", "acos", + "acosh", "add_months", "alias", "approx_count_distinct", @@ -232,8 +233,10 @@ exportMethods("%<=>%", "asc_nulls_last", "ascii", "asin", + "asinh", "assert_true", "atan", + "atanh", "atan2", "avg", "base64", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 039d28a3a37b6..b12f7b472ec83 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -455,6 +455,19 @@ setMethod("acos", column(jc) }) +#' @details +#' \code{acosh}: Computes inverse hyperbolic cosine of the input column. +#' +#' @rdname column_math_functions +#' @aliases acosh acosh,Column-method +#' @note acosh since 3.1.0 +setMethod("acosh", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "acosh", x@jc) + column(jc) + }) + #' @details #' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group. #' @@ -522,6 +535,19 @@ setMethod("asin", column(jc) }) +#' @details +#' \code{asinh}: Computes inverse hyperbolic sine of the input column. +#' +#' @rdname column_math_functions +#' @aliases asinh asinh,Column-method +#' @note asinh since 3.1.0 +setMethod("asinh", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "asinh", x@jc) + column(jc) + }) + #' @details #' \code{atan}: Returns the inverse tangent of the given value, #' as if computed by \code{java.lang.Math.atan()} @@ -536,6 +562,19 @@ setMethod("atan", column(jc) }) +#' @details +#' \code{atanh}: Computes inverse hyperbolic tangent of the input column. +#' +#' @rdname column_math_functions +#' @aliases atanh atanh,Column-method +#' @note atanh since 3.1.0 +setMethod("atanh", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "atanh", x@jc) + column(jc) + }) + #' avg #' #' Aggregate function: returns the average of the values in a group. diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 45de1ef1bd3d1..81d4e14df791d 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1430,6 +1430,7 @@ test_that("column functions", { nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE) c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) + desc_nulls_first(c1) + desc_nulls_last(c1) + c29 <- acosh(c1) + asinh(c1) + atanh(c1) # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index 3f903fe8c7acd..0dc2f6e55bb96 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -307,6 +307,7 @@ Functions abs acos + acosh add_months aggregate approxCountDistinct @@ -331,8 +332,10 @@ Functions asc_nulls_last ascii asin + asinh assert_true atan + atanh atan2 avg base64 @@ -583,4 +586,3 @@ Grouping GroupedData.pivot GroupedData.sum PandasCogroupedOps.applyInPandas - diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 4af5d1f484ee4..ea91e8593e21f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -220,6 +220,19 @@ def acos(col): return _invoke_function_over_column("acos", col) +def acosh(col): + """ + Computes inverse hyperbolic cosine of the input column. + + .. versionadded:: 3.1.0 + + Returns + ------- + :class:`Column` + """ + return _invoke_function_over_column("acosh", col) + + def asin(col): """ .. versionadded:: 1.3.0 @@ -233,6 +246,19 @@ def asin(col): return _invoke_function_over_column("asin", col) +def asinh(col): + """ + Computes inverse hyperbolic sine of the input column. + + .. versionadded:: 3.1.0 + + Returns + ------- + :class:`Column` + """ + return _invoke_function_over_column("asinh", col) + + def atan(col): """ .. versionadded:: 1.4.0 @@ -245,6 +271,19 @@ def atan(col): return _invoke_function_over_column("atan", col) +def atanh(col): + """ + Computes inverse hyperbolic tangent of the input column. + + .. versionadded:: 3.1.0 + + Returns + ------- + :class:`Column` + """ + return _invoke_function_over_column("atanh", col) + + @since(1.4) def cbrt(col): """ diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 252f883b5fb09..50e178df9996f 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -260,12 +260,15 @@ def map_zip_with( ) -> Column: ... def abs(col: ColumnOrName) -> Column: ... def acos(col: ColumnOrName) -> Column: ... +def acosh(col: ColumnOrName) -> Column: ... def asc(col: ColumnOrName) -> Column: ... def asc_nulls_first(col: ColumnOrName) -> Column: ... def asc_nulls_last(col: ColumnOrName) -> Column: ... def ascii(col: ColumnOrName) -> Column: ... def asin(col: ColumnOrName) -> Column: ... +def asinh(col: ColumnOrName) -> Column: ... def atan(col: ColumnOrName) -> Column: ... +def atanh(col: ColumnOrName) -> Column: ... @overload def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ... @overload diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 32549343d938f..2858bdeca0d5a 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -116,6 +116,7 @@ def assert_close(a, b): c = get_values(b) diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)] return sum(diff) == len(a) + assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) assert_close([math.cos(i) for i in range(10)], @@ -139,6 +140,21 @@ def assert_close(a, b): assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect()) + def test_inverse_trig_functions(self): + from pyspark.sql import functions + + funs = [ + (functions.acosh, "ACOSH"), + (functions.asinh, "ASINH"), + (functions.atanh, "ATANH"), + ] + + cols = ["a", functions.col("a")] + + for f, alias in funs: + for c in cols: + self.assertIn(f"{alias}(a)", repr(f(c))) + def test_rand_functions(self): df = self.df from pyspark.sql import functions From 433ae9064f55b8adb27b561e1ff17c32f0bf3465 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 27 Nov 2020 15:47:39 +0900 Subject: [PATCH 0583/1009] [SPARK-33566][CORE][SQL][SS][PYTHON] Make unescapedQuoteHandling option configurable when read CSV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? There are some differences between Spark CSV, opencsv and commons-csv, the typical case are described in SPARK-33566, When there are both unescaped quotes and unescaped qualifier in value, the results of parsing are different. The reason for the difference is Spark use `STOP_AT_DELIMITER` as default `UnescapedQuoteHandling` to build `CsvParser` and it not configurable. On the other hand, opencsv and commons-csv use the parsing mechanism similar to `STOP_AT_CLOSING_QUOTE ` by default. So this pr make `unescapedQuoteHandling` option configurable to get the same parsing result as opencsv and commons-csv. ### Why are the changes needed? Make unescapedQuoteHandling option configurable when read CSV to make parsing more flexible。 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass the Jenkins or GitHub Action - Add a new case similar to that described in SPARK-33566 Closes #30518 from LuciferYang/SPARK-33566. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- python/pyspark/sql/readwriter.py | 26 +++++++++++++++++-- python/pyspark/sql/readwriter.pyi | 1 + python/pyspark/sql/streaming.py | 25 ++++++++++++++++-- python/pyspark/sql/streaming.pyi | 1 + .../spark/sql/catalyst/csv/CSVOptions.scala | 8 +++++- .../apache/spark/sql/DataFrameReader.scala | 21 +++++++++++++++ .../sql/streaming/DataStreamReader.scala | 21 +++++++++++++++ .../execution/datasources/csv/CSVSuite.scala | 24 +++++++++++++++++ 8 files changed, 122 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index bb31e6a3e09f8..d120daa5a9434 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -522,7 +522,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None, columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None, samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None, - pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None): + pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None, + unescapedQuoteHandling=None): r"""Loads a CSV file and returns the result as a :class:`DataFrame`. This function will go through the input once to determine the input schema if @@ -685,6 +686,26 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non modifiedAfter (batch only) : an optional timestamp to only include files with modification times occurring after the specified time. The provided timestamp must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00) + unescapedQuoteHandling : str, optional + defines how the CsvParser will handle values with unescaped quotes. If None is + set, it uses the default value, ``STOP_AT_DELIMITER``. + + * ``STOP_AT_CLOSING_QUOTE``: If unescaped quotes are found in the input, accumulate + the quote character and proceed parsing the value as a quoted value, until a closing + quote is found. + * ``BACK_TO_DELIMITER``: If unescaped quotes are found in the input, consider the value + as an unquoted value. This will make the parser accumulate all characters of the current + parsed value until the delimiter is found. If no delimiter is found in the value, the + parser will continue accumulating characters from the input until a delimiter or line + ending is found. + * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value + as an unquoted value. This will make the parser accumulate all characters until the + delimiter or a line ending is found in the input. + * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed + for the given value will be skipped and the value set in nullValue will be produced + instead. + * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException + will be thrown. Examples -------- @@ -708,7 +729,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio, enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep, pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup, - modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter) + modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter, + unescapedQuoteHandling=unescapedQuoteHandling) if isinstance(path, str): path = [path] if type(path) == list: diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi index 64c5697203a44..c3b9a428f22b3 100644 --- a/python/pyspark/sql/readwriter.pyi +++ b/python/pyspark/sql/readwriter.pyi @@ -113,6 +113,7 @@ class DataFrameReader(OptionUtils): lineSep: Optional[str] = ..., pathGlobFilter: Optional[Union[bool, str]] = ..., recursiveFileLookup: Optional[Union[bool, str]] = ..., + unescapedQuoteHandling: Optional[str] = ..., ) -> DataFrame: ... def orc( self, diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index e7b2fa16d620a..365b5f38694a7 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -761,7 +761,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None, columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None, - pathGlobFilter=None, recursiveFileLookup=None): + pathGlobFilter=None, recursiveFileLookup=None, unescapedQuoteHandling=None): r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`. This function will go through the input once to determine the input schema if @@ -900,6 +900,26 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non recursiveFileLookup : str or bool, optional recursively scan a directory for files. Using this option disables `partition discovery `_. # noqa + unescapedQuoteHandling : str, optional + defines how the CsvParser will handle values with unescaped quotes. If None is + set, it uses the default value, ``STOP_AT_DELIMITER``. + + * ``STOP_AT_CLOSING_QUOTE``: If unescaped quotes are found in the input, accumulate + the quote character and proceed parsing the value as a quoted value, until a closing + quote is found. + * ``BACK_TO_DELIMITER``: If unescaped quotes are found in the input, consider the value + as an unquoted value. This will make the parser accumulate all characters of the current + parsed value until the delimiter is found. If no delimiter is found in the value, the + parser will continue accumulating characters from the input until a delimiter or line + ending is found. + * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value + as an unquoted value. This will make the parser accumulate all characters until the + delimiter or a line ending is found in the input. + * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed + for the given value will be skipped and the value set in nullValue will be produced + instead. + * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException + will be thrown. .. versionadded:: 2.0.0 @@ -926,7 +946,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine, charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep, - pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) + pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup, + unescapedQuoteHandling=unescapedQuoteHandling) if isinstance(path, str): return self._df(self._jreader.csv(path)) else: diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi index 56ce140b826d5..829610ad3b94b 100644 --- a/python/pyspark/sql/streaming.pyi +++ b/python/pyspark/sql/streaming.pyi @@ -149,6 +149,7 @@ class DataStreamReader(OptionUtils): lineSep: Optional[str] = ..., pathGlobFilter: Optional[Union[bool, str]] = ..., recursiveFileLookup: Optional[Union[bool, str]] = ..., + unescapedQuoteHandling: Optional[str] = ..., ) -> DataFrame: ... class DataStreamWriter: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala index f2191fcf35f1a..ec405994eadef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala @@ -213,6 +213,12 @@ class CSVOptions( } val lineSeparatorInWrite: Option[String] = lineSeparator + /** + * The handling method to be used when unescaped quotes are found in the input. + */ + val unescapedQuoteHandling: UnescapedQuoteHandling = UnescapedQuoteHandling.valueOf(parameters + .getOrElse("unescapedQuoteHandling", "STOP_AT_DELIMITER").toUpperCase(Locale.ROOT)) + def asWriterSettings: CsvWriterSettings = { val writerSettings = new CsvWriterSettings() val format = writerSettings.getFormat @@ -258,7 +264,7 @@ class CSVOptions( settings.setNullValue(nullValue) settings.setEmptyValue(emptyValueInRead) settings.setMaxCharsPerColumn(maxCharsPerColumn) - settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER) + settings.setUnescapedQuoteHandling(unescapedQuoteHandling) settings.setLineSeparatorDetectionEnabled(lineSeparatorInRead.isEmpty && multiLine) lineSeparatorInRead.foreach { _ => settings.setNormalizeLineEndingsWithinQuotes(!multiLine) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b26bc6441b6cf..8f96f0b882424 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -727,6 +727,27 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * a record can have. *
  47. `maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed * for any given value being read. By default, it is -1 meaning unlimited length
  48. + *
  49. `unescapedQuoteHandling` (default `STOP_AT_DELIMITER`): defines how the CsvParser + * will handle values with unescaped quotes. + *
      + *
    • `STOP_AT_CLOSING_QUOTE`: If unescaped quotes are found in the input, accumulate + * the quote character and proceed parsing the value as a quoted value, until a closing + * quote is found.
    • + *
    • `BACK_TO_DELIMITER`: If unescaped quotes are found in the input, consider the value + * as an unquoted value. This will make the parser accumulate all characters of the current + * parsed value until the delimiter is found. If no + * delimiter is found in the value, the parser will continue accumulating characters from + * the input until a delimiter or line ending is found.
    • + *
    • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value + * as an unquoted value. This will make the parser accumulate all characters until the + * delimiter or a line ending is found in the input.
    • + *
    • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed + * for the given value will be skipped and the value set in nullValue will be produced + * instead.
    • + *
    • `RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException + * will be thrown.
    • + *
    + *
  50. *
  51. `mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records * during parsing. It supports the following case-insensitive modes. Note that Spark tries * to parse only required columns in CSV under column pruning. Therefore, corrupt records diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 9bc4acd49a980..7f4ef8be562fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -396,6 +396,27 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * a record can have.
  52. *
  53. `maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed * for any given value being read. By default, it is -1 meaning unlimited length
  54. + *
  55. `unescapedQuoteHandling` (default `STOP_AT_DELIMITER`): defines how the CsvParser + * will handle values with unescaped quotes. + *
      + *
    • `STOP_AT_CLOSING_QUOTE`: If unescaped quotes are found in the input, accumulate + * the quote character and proceed parsing the value as a quoted value, until a closing + * quote is found.
    • + *
    • `BACK_TO_DELIMITER`: If unescaped quotes are found in the input, consider the value + * as an unquoted value. This will make the parser accumulate all characters of the current + * parsed value until the delimiter is found. If no delimiter is found in the value, the + * parser will continue accumulating characters from the input until a delimiter or line + * ending is found.
    • + *
    • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value + * as an unquoted value. This will make the parser accumulate all characters until the + * delimiter or a line ending is found in the input.
    • + *
    • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed + * for the given value will be skipped and the value set in nullValue will be produced + * instead.
    • + *
    • `RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException + * will be thrown.
    • + *
    + *
  56. *
  57. `mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records * during parsing. It supports the following case-insensitive modes. *
      diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index a236814fdcdcd..30f0e45d04eab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -2428,6 +2428,30 @@ abstract class CSVSuite assert(readback.collect sameElements Array(Row("0"), Row("1"), Row("2"))) } } + + test("SPARK-33566: configure UnescapedQuoteHandling to parse " + + "unescaped quotes and unescaped delimiter data correctly") { + withTempPath { path => + val dataPath = path.getCanonicalPath + val row1 = Row("""a,""b,c""", "xyz") + val row2 = Row("""a,b,c""", """x""yz""") + // Generate the test data, use `,` as delimiter and `"` as quotes, but they didn't escape. + Seq( + """c1,c2""", + s""""${row1.getString(0)}","${row1.getString(1)}"""", + s""""${row2.getString(0)}","${row2.getString(1)}"""") + .toDF().repartition(1).write.text(dataPath) + // Without configure UnescapedQuoteHandling to STOP_AT_CLOSING_QUOTE, + // the result will be Row(""""a,""b""", """c""""), Row("""a,b,c""", """"x""yz"""") + val result = spark.read + .option("inferSchema", "true") + .option("header", "true") + .option("unescapedQuoteHandling", "STOP_AT_CLOSING_QUOTE") + .csv(dataPath).collect() + val exceptResults = Array(row1, row2) + assert(result.sameElements(exceptResults)) + } + } } class CSVv1Suite extends CSVSuite { From 8792280a735598589dc6cbced03262be2b6f8f76 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Fri, 27 Nov 2020 07:08:24 +0000 Subject: [PATCH 0584/1009] [SPARK-33575][SQL] Fix misleading exception for "ANALYZE TABLE ... FOR COLUMNS" on temporary views ### What changes were proposed in this pull request? This PR proposes to fix the exception message for `ANALYZE TABLE ... FOR COLUMNS` on temporary views. The current behavior throws `NoSuchTableException` even if the temporary view exists: ``` sql("CREATE TEMP VIEW t AS SELECT 1 AS id") sql("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS id") org.apache.spark.sql.catalyst.analysis.NoSuchTableException: Table or view 't' not found in database 'db'; at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.analyzeColumnInTempView(AnalyzeColumnCommand.scala:76) at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.run(AnalyzeColumnCommand.scala:54) ``` After this PR, more reasonable exception is thrown: ``` org.apache.spark.sql.AnalysisException: Temporary view `testView` is not cached for analyzing columns.; [info] at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.analyzeColumnInTempView(AnalyzeColumnCommand.scala:74) [info] at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.run(AnalyzeColumnCommand.scala:54) ``` ### Why are the changes needed? To fix a misleading exception. ### Does this PR introduce _any_ user-facing change? Yes, the exception thrown is changed as shown above. ### How was this patch tested? Updated existing test. Closes #30519 from imback82/analyze_table_message. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/execution/command/AnalyzeColumnCommand.scala | 5 ++--- .../org/apache/spark/sql/StatisticsCollectionSuite.scala | 5 +++-- .../scala/org/apache/spark/sql/execution/SQLViewSuite.scala | 5 ++++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 5017893077922..3b90f807b3138 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -71,9 +71,8 @@ case class AnalyzeColumnCommand( private def analyzeColumnInTempView(plan: LogicalPlan, sparkSession: SparkSession): Unit = { if (!analyzeColumnInCachedData(plan, sparkSession)) { - val catalog = sparkSession.sessionState.catalog - val db = tableIdent.database.getOrElse(catalog.getCurrentDatabase) - throw new NoSuchTableException(db = db, table = tableIdent.identifier) + throw new AnalysisException( + s"Temporary view $tableIdent is not cached for analyzing columns.") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 65377594f083c..cd03fadf34b98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -526,7 +526,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared val errMsg = intercept[AnalysisException] { sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id") }.getMessage - assert(errMsg.contains(s"Table or view 'tempView' not found in database 'default'")) + assert(errMsg.contains("Temporary view `tempView` is not cached for analyzing columns")) // Cache the view then analyze it sql("CACHE TABLE tempView") @@ -548,7 +548,8 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared val errMsg2 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id") }.getMessage - assert(errMsg2.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'")) + assert(errMsg2.contains( + s"Temporary view `$globalTempDB`.`gTempView` is not cached for analyzing columns")) // Cache the view then analyze it sql(s"CACHE TABLE $globalTempDB.gTempView") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index edeebde7db726..5d29503848772 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -188,7 +188,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") }.getMessage assert(e5.contains(s"$viewName is a temp view not table or permanent view")) - assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") + val e6 = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") + }.getMessage + assert(e6.contains(s"Temporary view `$viewName` is not cached for analyzing columns.")) } } From 2c41d9d8fa363b62519128819841f39e68429205 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Fri, 27 Nov 2020 10:16:56 +0000 Subject: [PATCH 0585/1009] [SPARK-33522][SQL] Improve exception messages while handling UnresolvedTableOrView ### What changes were proposed in this pull request? This PR proposes to improve the exception messages while `UnresolvedTableOrView` is handled based on this suggestion: https://github.com/apache/spark/pull/30321#discussion_r521127001. Currently, when an identifier is resolved to a temp view when a table/permanent view is expected, the following exception message is displayed (e.g., for `SHOW CREATE TABLE`): ``` t is a temp view not table or permanent view. ``` After this PR, the message will be: ``` t is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view. ``` Also, if an identifier is not resolved, the following exception message is currently used: ``` Table or view not found: t ``` After this PR, the message will be: ``` Table or permanent view not found for 'SHOW CREATE TABLE': t ``` or ``` Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': t ``` ### Why are the changes needed? To improve the exception message. ### Does this PR introduce _any_ user-facing change? Yes, the exception message will be changed as described above. ### How was this patch tested? Updated existing tests. Closes #30475 from imback82/unresolved_table_or_view. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 9 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 +- .../catalyst/analysis/v2ResolutionPlans.scala | 1 + .../sql/catalyst/parser/AstBuilder.scala | 31 +++--- .../sql/catalyst/parser/DDLParserSuite.scala | 96 +++++++++++-------- .../sql-tests/results/describe.sql.out | 2 +- .../sql-tests/results/show_columns.sql.out | 8 +- .../spark/sql/ShowCreateTableSuite.scala | 6 +- .../spark/sql/StatisticsCollectionSuite.scala | 3 +- .../sql/connector/DataSourceV2SQLSuite.scala | 2 +- .../spark/sql/execution/SQLViewSuite.scala | 6 +- .../v2/jdbc/JDBCTableCatalogSuite.scala | 6 +- .../sql/hive/execution/HiveCommandSuite.scala | 2 +- 13 files changed, 104 insertions(+), 72 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 837686420375a..77c1dd9ebb7fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -866,11 +866,12 @@ class Analyzer(override val catalogManager: CatalogManager) u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table") } u - case u @ UnresolvedTableOrView(ident, allowTempView) => + case u @ UnresolvedTableOrView(ident, cmd, allowTempView) => lookupTempView(ident) .map { _ => if (!allowTempView) { - u.failAnalysis(s"${ident.quoted} is a temp view not table or permanent view.") + u.failAnalysis( + s"${ident.quoted} is a temp view. '$cmd' expects a table or permanent view.") } ResolvedView(ident.asIdentifier, isTemp = true) } @@ -955,7 +956,7 @@ class Analyzer(override val catalogManager: CatalogManager) .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) - case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _) => + case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _, _) => CatalogV2Util.loadTable(catalog, ident) .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) @@ -1085,7 +1086,7 @@ class Analyzer(override val catalogManager: CatalogManager) case table => table }.getOrElse(u) - case u @ UnresolvedTableOrView(identifier, _) => + case u @ UnresolvedTableOrView(identifier, _, _) => lookupTableOrView(identifier).getOrElse(u) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9998035d65c3f..9a3ab4a5f8d11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -101,7 +101,9 @@ trait CheckAnalysis extends PredicateHelper { u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") case u: UnresolvedTableOrView => - u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") + val viewStr = if (u.allowTempView) "view" else "permanent view" + u.failAnalysis( + s"Table or $viewStr not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") case u: UnresolvedRelation => u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 0e883a88f2691..95fc4f47dec7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -51,6 +51,7 @@ case class UnresolvedTable( */ case class UnresolvedTableOrView( multipartIdentifier: Seq[String], + commandName: String, allowTempView: Boolean = true) extends LeafNode { override lazy val resolved: Boolean = false override def output: Seq[Attribute] = Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 606d923061441..4cd9b2bea32a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3148,7 +3148,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) { // DROP TABLE works with either a table or a temporary view. DropTable( - UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())), + UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier()), "DROP TABLE"), ctx.EXISTS != null, ctx.PURGE != null) } @@ -3453,12 +3453,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) { val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null + val relation = UnresolvedTableOrView( + visitMultipartIdentifier(ctx.multipartIdentifier()), + "DESCRIBE TABLE") if (ctx.describeColName != null) { if (ctx.partitionSpec != null) { throw new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx) } else { DescribeColumn( - UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())), + relation, ctx.describeColName.nameParts.asScala.map(_.getText).toSeq, isExtended) } @@ -3473,10 +3476,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } else { Map.empty[String, String] } - DescribeRelation( - UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())), - partitionSpec, - isExtended) + DescribeRelation(relation, partitionSpec, isExtended) } } @@ -3514,7 +3514,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val tableName = visitMultipartIdentifier(ctx.multipartIdentifier()) if (ctx.ALL() != null) { checkPartitionSpec() - AnalyzeColumn(UnresolvedTableOrView(tableName), None, allColumns = true) + AnalyzeColumn( + UnresolvedTableOrView(tableName, "ANALYZE TABLE ... FOR ALL COLUMNS"), + None, + allColumns = true) } else if (ctx.identifierSeq() == null) { val partitionSpec = if (ctx.partitionSpec != null) { visitPartitionSpec(ctx.partitionSpec) @@ -3522,13 +3525,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg Map.empty[String, Option[String]] } AnalyzeTable( - UnresolvedTableOrView(tableName, allowTempView = false), + UnresolvedTableOrView(tableName, "ANALYZE TABLE", allowTempView = false), partitionSpec, noScan = ctx.identifier != null) } else { checkPartitionSpec() AnalyzeColumn( - UnresolvedTableOrView(tableName), + UnresolvedTableOrView(tableName, "ANALYZE TABLE ... FOR COLUMNS ..."), Option(visitIdentifierSeq(ctx.identifierSeq())), allColumns = false) } @@ -3572,6 +3575,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ShowCreateTable( UnresolvedTableOrView( visitMultipartIdentifier(ctx.multipartIdentifier()), + "SHOW CREATE TABLE", allowTempView = false), ctx.SERDE != null) } @@ -3647,7 +3651,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) { - RefreshTable(UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier()))) + RefreshTable( + UnresolvedTableOrView( + visitMultipartIdentifier(ctx.multipartIdentifier()), + "REFRESH TABLE")) } /** @@ -3670,7 +3677,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } else { nameParts } - ShowColumns(UnresolvedTableOrView(tableName), namespace) + ShowColumns(UnresolvedTableOrView(tableName, "SHOW COLUMNS"), namespace) } /** @@ -3881,7 +3888,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg override def visitShowTblProperties( ctx: ShowTblPropertiesContext): LogicalPlan = withOrigin(ctx) { ShowTableProperties( - UnresolvedTableOrView(visitMultipartIdentifier(ctx.table)), + UnresolvedTableOrView(visitMultipartIdentifier(ctx.table), "SHOW TBLPROPERTIES"), Option(ctx.key).map(visitTablePropertyKey)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index f650922e75f6e..c58ff81f17131 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -697,27 +697,27 @@ class DDLParserSuite extends AnalysisTest { test("drop table") { parseCompare("DROP TABLE testcat.ns1.ns2.tbl", DropTable( - UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl")), + UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"), ifExists = false, purge = false)) parseCompare(s"DROP TABLE db.tab", DropTable( - UnresolvedTableOrView(Seq("db", "tab")), ifExists = false, purge = false)) + UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false)) parseCompare(s"DROP TABLE IF EXISTS db.tab", DropTable( - UnresolvedTableOrView(Seq("db", "tab")), ifExists = true, purge = false)) + UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false)) parseCompare(s"DROP TABLE tab", DropTable( - UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = false)) + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false)) parseCompare(s"DROP TABLE IF EXISTS tab", DropTable( - UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = false)) + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false)) parseCompare(s"DROP TABLE tab PURGE", DropTable( - UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = true)) + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true)) parseCompare(s"DROP TABLE IF EXISTS tab PURGE", DropTable( - UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = true)) + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true)) } test("drop view") { @@ -1112,26 +1112,26 @@ class DDLParserSuite extends AnalysisTest { test("describe table column") { comparePlans(parsePlan("DESCRIBE t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t `abc.xyz`"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("abc.xyz"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc.xyz"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t abc.xyz"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("abc", "xyz"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc", "xyz"), isExtended = false)) comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("a.b", "x.y"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("a.b", "x.y"), isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true)) comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true)) val caught = intercept[AnalysisException]( parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col")) @@ -1150,13 +1150,17 @@ class DDLParserSuite extends AnalysisTest { test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") { comparePlans(parsePlan("describe t"), - DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false)) + DescribeRelation( + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = false)) comparePlans(parsePlan("describe table t"), - DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false)) + DescribeRelation( + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = false)) comparePlans(parsePlan("describe table extended t"), - DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true)) + DescribeRelation( + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = true)) comparePlans(parsePlan("describe table formatted t"), - DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true)) + DescribeRelation( + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = true)) } test("insert table: basic append") { @@ -1769,57 +1773,57 @@ class DDLParserSuite extends AnalysisTest { test("analyze table statistics") { comparePlans(parsePlan("analyze table a.b.c compute statistics"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map.empty, noScan = false)) comparePlans(parsePlan("analyze table a.b.c compute statistics noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map.empty, noScan = true)) comparePlans(parsePlan("analyze table a.b.c partition (a) compute statistics nOscAn"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("a" -> None), noScan = true)) // Partitions specified comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> Some("2008-04-09")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr=11) COMPUTE STATISTICS noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> None, "hr" -> Some("11")), noScan = true)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> None, "hr" -> None), noScan = false)) comparePlans( parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS noscan"), AnalyzeTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false), Map("ds" -> None, "hr" -> None), noScan = true)) intercept("analyze table a.b.c compute statistics xxxx", @@ -1834,7 +1838,9 @@ class DDLParserSuite extends AnalysisTest { comparePlans( parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"), AnalyzeColumn( - UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false)) + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR COLUMNS ..."), + Option(Seq("key", "value")), + allColumns = false)) // Partition specified - should be ignored comparePlans( @@ -1844,7 +1850,9 @@ class DDLParserSuite extends AnalysisTest { |COMPUTE STATISTICS FOR COLUMNS key, value """.stripMargin), AnalyzeColumn( - UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false)) + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR COLUMNS ..."), + Option(Seq("key", "value")), + allColumns = false)) // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS comparePlans( @@ -1854,7 +1862,9 @@ class DDLParserSuite extends AnalysisTest { |COMPUTE STATISTICS FOR ALL COLUMNS """.stripMargin), AnalyzeColumn( - UnresolvedTableOrView(Seq("a", "b", "c")), None, allColumns = true)) + UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR ALL COLUMNS"), + None, + allColumns = true)) intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value", "mismatched input 'key' expecting {, ';'}") @@ -1898,12 +1908,13 @@ class DDLParserSuite extends AnalysisTest { test("SHOW CREATE table") { comparePlans( parsePlan("SHOW CREATE TABLE a.b.c"), - ShowCreateTable(UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false))) + ShowCreateTable( + UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false))) comparePlans( parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"), ShowCreateTable( - UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false), + UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false), asSerde = true)) } @@ -1949,7 +1960,7 @@ class DDLParserSuite extends AnalysisTest { test("REFRESH TABLE") { comparePlans( parsePlan("REFRESH TABLE a.b.c"), - RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c")))) + RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c"), "REFRESH TABLE"))) } test("show columns") { @@ -1959,13 +1970,15 @@ class DDLParserSuite extends AnalysisTest { val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1" val parsed1 = parsePlan(sql1) - val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1")), None) + val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1"), "SHOW COLUMNS"), None) val parsed2 = parsePlan(sql2) - val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), None) + val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), None) val parsed3 = parsePlan(sql3) - val expected3 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1"))) + val expected3 = + ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), Some(Seq("db1"))) val parsed4 = parsePlan(sql4) - val expected4 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1"))) + val expected4 = + ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), Some(Seq("db1"))) comparePlans(parsed1, expected1) comparePlans(parsed2, expected2) @@ -2300,11 +2313,12 @@ class DDLParserSuite extends AnalysisTest { test("SHOW TBLPROPERTIES table") { comparePlans( parsePlan("SHOW TBLPROPERTIES a.b.c"), - ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c")), None)) + ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW TBLPROPERTIES"), None)) comparePlans( parsePlan("SHOW TBLPROPERTIES a.b.c('propKey1')"), - ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c")), Some("propKey1"))) + ShowTableProperties( + UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW TBLPROPERTIES"), Some("propKey1"))) } test("DESCRIBE FUNCTION") { diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 07aed98d120f9..145c987ee5f61 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -540,7 +540,7 @@ struct -- !query output == Parsed Logical Plan == 'DescribeRelation false -+- 'UnresolvedTableOrView [t], true ++- 'UnresolvedTableOrView [t], DESCRIBE TABLE, true == Analyzed Logical Plan == col_name: string, data_type: string, comment: string diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out index 6ddffb89987d8..03df876133aa4 100644 --- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out @@ -94,7 +94,7 @@ SHOW COLUMNS IN badtable FROM showdb struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found: showdb.badtable; line 1 pos 0 +Table or view not found for 'SHOW COLUMNS': showdb.badtable; line 1 pos 0 -- !query @@ -130,7 +130,7 @@ SHOW COLUMNS IN showdb.showcolumn3 struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found: showdb.showcolumn3; line 1 pos 0 +Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0 -- !query @@ -139,7 +139,7 @@ SHOW COLUMNS IN showcolumn3 FROM showdb struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found: showdb.showcolumn3; line 1 pos 0 +Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0 -- !query @@ -148,7 +148,7 @@ SHOW COLUMNS IN showcolumn4 struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found: showcolumn4; line 1 pos 0 +Table or view not found for 'SHOW COLUMNS': showcolumn4; line 1 pos 0 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index 7b4c8d1cc71d8..92d306c0e3c11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -155,7 +155,8 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { val ex = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") } - assert(ex.getMessage.contains(s"$viewName is a temp view not table or permanent view")) + assert(ex.getMessage.contains( + s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view.")) } withGlobalTempView(viewName) { @@ -165,7 +166,8 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { sql(s"SHOW CREATE TABLE $globalTempViewDb.$viewName") } assert(ex.getMessage.contains( - s"$globalTempViewDb.$viewName is a temp view not table or permanent view")) + s"$globalTempViewDb.$viewName is a temp view. " + + "'SHOW CREATE TABLE' expects a table or permanent view.")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index cd03fadf34b98..3fc679f6b9fc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -542,7 +542,8 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared val errMsg1 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id") }.getMessage - assert(errMsg1.contains(s"Table or view not found: $globalTempDB.gTempView")) + assert(errMsg1.contains("Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': " + + s"$globalTempDB.gTempView")) // Analyzes in a global temporary view sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)") val errMsg2 = intercept[AnalysisException] { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index f2b57f9442d09..98580568a8df6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -729,7 +729,7 @@ class DataSourceV2SQLSuite val ex = intercept[AnalysisException] { sql("DROP TABLE testcat.db.notbl") } - assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl")) + assert(ex.getMessage.contains("Table or view not found for 'DROP TABLE': testcat.db.notbl")) sql("DROP TABLE IF EXISTS testcat.db.notbl") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 5d29503848772..d776198bc7470 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -183,11 +183,13 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val e4 = intercept[AnalysisException] { sql(s"SHOW CREATE TABLE $viewName") }.getMessage - assert(e4.contains(s"$viewName is a temp view not table or permanent view")) + assert(e4.contains( + s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view.")) val e5 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS") }.getMessage - assert(e5.contains(s"$viewName is a temp view not table or permanent view")) + assert(e5.contains( + s"$viewName is a temp view. 'ANALYZE TABLE' expects a table or permanent view.")) val e6 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id") }.getMessage diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index c7ad96c8f7619..97dd92acc7805 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -80,8 +80,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("DROP TABLE h2.test.to_drop") checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) Seq( - "h2.test.not_existing_table" -> "Table or view not found: h2.test.not_existing_table", - "h2.bad_test.not_existing_table" -> "Table or view not found: h2.bad_test.not_existing_table" + "h2.test.not_existing_table" -> + "Table or view not found for 'DROP TABLE': h2.test.not_existing_table", + "h2.bad_test.not_existing_table" -> + "Table or view not found for 'DROP TABLE': h2.bad_test.not_existing_table" ).foreach { case (table, expectedMsg) => val msg = intercept[AnalysisException] { sql(s"DROP TABLE $table") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala index d3398842afb21..4feb970ea6f1a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala @@ -137,7 +137,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto val message = intercept[AnalysisException] { sql("SHOW TBLPROPERTIES badtable") }.getMessage - assert(message.contains("Table or view not found: badtable")) + assert(message.contains("Table or view not found for 'SHOW TBLPROPERTIES': badtable")) // When key is not found, a row containing the error is returned. checkAnswer( From e43255051c0a82713d653fe590fe7728e43556ce Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 27 Nov 2020 10:27:08 +0000 Subject: [PATCH 0586/1009] [SPARK-28645][SQL] ParseException is thrown when the window is redefined ### What changes were proposed in this pull request? Currently in Spark one could redefine a window. For instance: `select count(*) OVER w FROM tenk1 WINDOW w AS (ORDER BY unique1), w AS (ORDER BY unique1);` The window `w` is defined two times. In PgSQL, on the other hand, a thrown will happen: `ERROR: window "w" is already defined` ### Why are the changes needed? The current implement gives the following window definitions a higher priority. But it wasn't Spark's intention and users can't know from any document of Spark. This PR fixes the bug. ### Does this PR introduce _any_ user-facing change? Yes. There is an example query output with/without this fix. ``` SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) ORDER BY salary DESC ``` The output before this fix: ``` Larry Bott 11798 Larry Bott Gerard Bondur Gerard Bondur 11472 Larry Bott Gerard Bondur Pamela Castillo 11303 Larry Bott Gerard Bondur Barry Jones 10586 Larry Bott Gerard Bondur George Vanauf 10563 Larry Bott Gerard Bondur Loui Bondur 10449 Larry Bott Gerard Bondur Mary Patterson 9998 Larry Bott Gerard Bondur Steve Patterson 9441 Larry Bott Gerard Bondur Julie Firrelli 9181 Larry Bott Gerard Bondur Jeff Firrelli 8992 Larry Bott Gerard Bondur William Patterson 8870 Larry Bott Gerard Bondur Diane Murphy 8435 Larry Bott Gerard Bondur Leslie Jennings 8113 Larry Bott Gerard Bondur Gerard Hernandez 6949 Larry Bott Gerard Bondur Foon Yue Tseng 6660 Larry Bott Gerard Bondur Anthony Bow 6627 Larry Bott Gerard Bondur Leslie Thompson 5186 Larry Bott Gerard Bondur ``` The output after this fix: ``` struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException The definition of window 'w' is repetitive(line 8, pos 0) ``` ### How was this patch tested? Jenkins test. Closes #30512 from beliefer/SPARK-28645. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 10 ++++- .../resources/sql-tests/inputs/window.sql | 14 ++++++- .../sql-tests/results/window.sql.out | 38 ++++++++++++++++++- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4cd9b2bea32a4..afef88f7e97e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -815,10 +815,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx: WindowClauseContext, query: LogicalPlan): LogicalPlan = withOrigin(ctx) { // Collect all window specifications defined in the WINDOW clause. - val baseWindowMap = ctx.namedWindow.asScala.map { + val baseWindowTuples = ctx.namedWindow.asScala.map { wCtx => (wCtx.name.getText, typedVisit[WindowSpec](wCtx.windowSpec)) - }.toMap + } + baseWindowTuples.groupBy(_._1).foreach { kv => + if (kv._2.size > 1) { + throw new ParseException(s"The definition of window '${kv._1}' is repetitive", ctx) + } + } + val baseWindowMap = baseWindowTuples.toMap // Handle cases like // window w1 as (partition by p_mfgr order by p_name diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index f5223af9125f6..f0336d764bdea 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -250,4 +250,16 @@ WINDOW w AS ( ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) -ORDER BY department; \ No newline at end of file +ORDER BY department; + +SELECT + employee_name, + salary, + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary +FROM + basic_pays +WINDOW + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) +ORDER BY salary DESC; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index 1304dcf21d0b3..df2ad96649186 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 35 +-- Number of queries: 36 -- !query @@ -739,4 +739,38 @@ Gerard Hernandez SCM 6949 Larry Bott Pamela Castillo George Vanauf Sales 10563 George Vanauf Steve Patterson Steve Patterson Sales 9441 George Vanauf Steve Patterson Julie Firrelli Sales 9181 George Vanauf Steve Patterson -Foon Yue Tseng Sales 6660 George Vanauf Steve Patterson \ No newline at end of file +Foon Yue Tseng Sales 6660 George Vanauf Steve Patterson + + +-- !query +SELECT + employee_name, + salary, + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary +FROM + basic_pays +WINDOW + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) +ORDER BY salary DESC +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +The definition of window 'w' is repetitive(line 8, pos 0) + +== SQL == +SELECT + employee_name, + salary, + first_value(employee_name) OVER w highest_salary, + nth_value(employee_name, 2) OVER w second_highest_salary +FROM + basic_pays +WINDOW +^^^ + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) +ORDER BY salary DESC \ No newline at end of file From b9f2f78de59758d1932c1573338539e485a01112 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Fri, 27 Nov 2020 13:24:11 +0000 Subject: [PATCH 0587/1009] [SPARK-33498][SQL] Datetime parsing should fail if the input string can't be parsed, or the pattern string is invalid ### What changes were proposed in this pull request? Datetime parsing should fail if the input string can't be parsed, or the pattern string is invalid, when ANSI mode is enable. This patch should update GetTimeStamp, UnixTimeStamp, ToUnixTimeStamp and Cast. ### Why are the changes needed? For ANSI mode. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added UT and Existing UT. Closes #30442 from leanken/leanken-SPARK-33498. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 5 + .../spark/sql/catalyst/expressions/Cast.scala | 33 +++-- .../expressions/datetimeExpressions.scala | 51 +++++--- .../sql/catalyst/util/DateTimeUtils.scala | 9 ++ .../sql/catalyst/expressions/CastSuite.scala | 41 ++++-- .../expressions/DateExpressionsSuite.scala | 59 ++++++++- .../resources/sql-tests/inputs/datetime.sql | 11 ++ .../sql-tests/results/ansi/datetime.sql.out | 123 +++++++++++++++--- .../sql-tests/results/datetime-legacy.sql.out | 74 ++++++++++- .../sql-tests/results/datetime.sql.out | 74 ++++++++++- .../results/postgreSQL/window_part3.sql.out | 3 +- 11 files changed, 424 insertions(+), 59 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 870ed0aa0daaa..4e19799ca75b9 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -136,12 +136,17 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. + - `to_date` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `to_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `to_unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. ### SQL Operators The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map. + - `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed. ### SQL Keywords diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e6f585cacc6c7..95f09d64c484b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -448,7 +448,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // TimestampConverter private[this] def castToTimestamp(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull) + buildCast[UTF8String](_, utfs => { + if (ansiEnabled) { + DateTimeUtils.stringToTimestampAnsi(utfs, zoneId) + } else { + DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull + } + }) case BooleanType => buildCast[Boolean](_, b => if (b) 1L else 0) case LongType => @@ -1250,15 +1256,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit zoneIdClass) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => - code""" - scala.Option $longOpt = - org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid); - if ($longOpt.isDefined()) { - $evPrim = ((Long) $longOpt.get()).longValue(); - } else { - $evNull = true; - } - """ + if (ansiEnabled) { + code""" + $evPrim = + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestampAnsi($c, $zid); + """ + } else { + code""" + scala.Option $longOpt = + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid); + if ($longOpt.isDefined()) { + $evPrim = ((Long) $longOpt.get()).longValue(); + } else { + $evNull = true; + } + """ + } case BooleanType => (c, evPrim, evNull) => code"$evPrim = $c ? 1L : 0L;" case _: IntegralType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 9953b780ceace..1ff5833fb4dd6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -720,10 +720,12 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti case class ToUnixTimestamp( timeExp: Expression, format: Expression, - timeZoneId: Option[String] = None) + timeZoneId: Option[String] = None, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends UnixTime { - def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) + def this(timeExp: Expression, format: Expression) = + this(timeExp, format, None, SQLConf.get.ansiEnabled) override def left: Expression = timeExp override def right: Expression = format @@ -767,10 +769,15 @@ case class ToUnixTimestamp( group = "datetime_funcs", since = "1.5.0") // scalastyle:on line.size.limit -case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Option[String] = None) +case class UnixTimestamp( + timeExp: Expression, + format: Expression, + timeZoneId: Option[String] = None, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends UnixTime { - def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) + def this(timeExp: Expression, format: Expression) = + this(timeExp, format, None, SQLConf.get.ansiEnabled) override def left: Expression = timeExp override def right: Expression = format @@ -792,6 +799,8 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op abstract class ToTimestamp extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes { + def failOnError: Boolean + // The result of the conversion to timestamp is microseconds divided by this factor. // For example if the factor is 1000000, the result of the expression is in seconds. protected def downScaleFactor: Long @@ -803,7 +812,14 @@ abstract class ToTimestamp Seq(TypeCollection(StringType, DateType, TimestampType), StringType) override def dataType: DataType = LongType - override def nullable: Boolean = true + override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true + + private def isParseError(e: Throwable): Boolean = e match { + case _: DateTimeParseException | + _: DateTimeException | + _: ParseException => true + case _ => false + } override def eval(input: InternalRow): Any = { val t = left.eval(input) @@ -824,9 +840,12 @@ abstract class ToTimestamp try { formatter.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor } catch { - case _: DateTimeParseException | - _: DateTimeException | - _: ParseException => null + case e if isParseError(e) => + if (failOnError) { + throw e + } else { + null + } } } } @@ -835,6 +854,7 @@ abstract class ToTimestamp override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = CodeGenerator.javaType(dataType) + val parseErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;" left.dataType match { case StringType => formatterOption.map { fmt => val df = classOf[TimestampFormatter].getName @@ -844,11 +864,11 @@ abstract class ToTimestamp |try { | ${ev.value} = $formatterName.parse($datetimeStr.toString()) / $downScaleFactor; |} catch (java.time.DateTimeException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} catch (java.time.format.DateTimeParseException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} catch (java.text.ParseException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} |""".stripMargin) }.getOrElse { @@ -866,11 +886,11 @@ abstract class ToTimestamp |try { | ${ev.value} = $timestampFormatter.parse($string.toString()) / $downScaleFactor; |} catch (java.time.format.DateTimeParseException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} catch (java.time.DateTimeException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} catch (java.text.ParseException e) { - | ${ev.isNull} = true; + | $parseErrorBranch |} |""".stripMargin) } @@ -1737,7 +1757,8 @@ case class DateDiff(endDate: Expression, startDate: Expression) private case class GetTimestamp( left: Expression, right: Expression, - timeZoneId: Option[String] = None) + timeZoneId: Option[String] = None, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends ToTimestamp { override val downScaleFactor = 1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 3b974759bd6c0..87cf3c93ba26e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -364,6 +364,15 @@ object DateTimeUtils { } } + def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = { + val timestamp = stringToTimestamp(s, timeZoneId) + if (timestamp.isEmpty) { + throw new DateTimeException(s"Cannot cast $s to TimestampType.") + } else { + timestamp.get + } + } + /** * Gets the number of microseconds since the epoch of 1970-01-01 00:00:00Z from the given * instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count of diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index f1fc921e401ba..0900a303b4cbe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} +import java.time.DateTimeException import java.util.{Calendar, TimeZone} import scala.collection.parallel.immutable.ParVector @@ -106,8 +107,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(Literal(str), TimestampType, Option(zid.getId)), expected) } - checkCastStringToTimestamp("123", null) - val tz = TimeZone.getTimeZone(zid) var c = Calendar.getInstance(tz) c.set(2015, 0, 1, 0, 0, 0) @@ -184,15 +183,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { c.set(2015, 2, 18, 12, 3, 17) c.set(Calendar.MILLISECOND, 123) checkCastStringToTimestamp("2015-03-18T12:03:17.123+7:3", new Timestamp(c.getTimeInMillis)) - - checkCastStringToTimestamp("2015-03-18 123142", null) - checkCastStringToTimestamp("2015-03-18T123123", null) - checkCastStringToTimestamp("2015-03-18X", null) - checkCastStringToTimestamp("2015/03/18", null) - checkCastStringToTimestamp("2015.03.18", null) - checkCastStringToTimestamp("20150318", null) - checkCastStringToTimestamp("2015-031-8", null) - checkCastStringToTimestamp("2015-03-18T12:03:17-0:70", null) } } @@ -302,7 +292,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } checkEvaluation(cast("abdef", StringType), "abdef") - checkEvaluation(cast("abdef", TimestampType, UTC_OPT), null) checkEvaluation(cast("12.65", DecimalType.SYSTEM_DEFAULT), Decimal(12.65)) checkEvaluation(cast(cast(sd, DateType), StringType), sd) @@ -962,6 +951,34 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { cast("abcd", DecimalType(38, 1)), "invalid input syntax for type numeric") } + + test("ANSI mode: cast string to timestamp with parse error") { + val activeConf = conf + new ParVector(ALL_TIMEZONES.toVector).foreach { zid => + def checkCastWithParseError(str: String): Unit = { + checkExceptionInExpression[DateTimeException]( + cast(Literal(str), TimestampType, Option(zid.getId)), + s"Cannot cast $str to TimestampType.") + } + + SQLConf.withExistingConf(activeConf) { + checkCastWithParseError("123") + checkCastWithParseError("2015-03-18 123142") + checkCastWithParseError("2015-03-18T123123") + checkCastWithParseError("2015-03-18X") + checkCastWithParseError("2015/03/18") + checkCastWithParseError("2015.03.18") + checkCastWithParseError("20150318") + checkCastWithParseError("2015-031-8") + checkCastWithParseError("2015-03-18T12:03:17-0:70") + + val input = "abdef" + checkExceptionInExpression[DateTimeException]( + cast(input, TimestampType, Option(zid.getId)), + s"Cannot cast $input to TimestampType.") + } + } + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 85492084d51ac..a3ffc1129fd5e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -18,8 +18,9 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} -import java.text.SimpleDateFormat +import java.text.{ParseException, SimpleDateFormat} import java.time.{Instant, LocalDate, ZoneId} +import java.time.format.DateTimeParseException import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit._ @@ -1286,4 +1287,58 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { testIntegralFunc(Long.MaxValue) testIntegralFunc(Long.MinValue) } -} + + test("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") { + Seq(true, false).foreach { ansiEnabled => + Seq("LEGACY", "CORRECTED", "EXCEPTION").foreach { policy => + withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy, + SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + + val exprSeq = Seq[Expression]( + GetTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")), + GetTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")), + UnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")), + UnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")), + ToUnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")), + ToUnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")) + ) + + if (!ansiEnabled) { + exprSeq.foreach(checkEvaluation(_, null)) + } else if (policy == "LEGACY") { + exprSeq.foreach(checkExceptionInExpression[ParseException](_, "Unparseable")) + } else { + exprSeq.foreach( + checkExceptionInExpression[DateTimeParseException](_, "could not be parsed")) + } + + // LEGACY works, CORRECTED failed, EXCEPTION with SparkUpgradeException + val exprSeq2 = Seq[(Expression, Long)]( + (GetTimestamp(Literal("2020-01-27T20:06:11.847!!!"), + Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371847000L), + (UnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"), + Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L), + (ToUnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"), + Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L) + ) + + if (policy == "LEGACY") { + exprSeq2.foreach(pair => checkEvaluation(pair._1, pair._2)) + } else if (policy == "EXCEPTION") { + exprSeq2.foreach(pair => + checkExceptionInExpression[SparkUpgradeException]( + pair._1, + "You may get a different result due to the upgrading of Spark 3.0")) + } else { + if (ansiEnabled) { + exprSeq2.foreach(pair => + checkExceptionInExpression[DateTimeParseException](pair._1, "could not be parsed")) + } else { + exprSeq2.foreach(pair => checkEvaluation(pair._1, null)) + } + } + } + } + } + } + } diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 19b4c53702662..534e222b7c13e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -153,3 +153,14 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat' select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy')); + +-- Timestamp type parse error +select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); +select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); +select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); +select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); +select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); +select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); +select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); +select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); +select cast("Unparseable" as timestamp) diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 5b357fd064e41..10669f14aa87b 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 108 +-- Number of queries: 117 -- !query @@ -301,9 +301,10 @@ struct -- !query select '1' - interval '2' second -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Cannot cast 1 to TimestampType. -- !query @@ -600,9 +601,10 @@ struct -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema -struct +struct<> -- !query output -NULL +java.time.format.DateTimeParseException +Text '2019-10-06 10:11:12.' could not be parsed at index 20 -- !query @@ -664,9 +666,10 @@ struct +struct<> -- !query output -NULL +java.time.format.DateTimeParseException +Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26 -- !query @@ -680,9 +683,10 @@ struct +struct<> -- !query output -NULL +java.time.format.DateTimeParseException +Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27 -- !query @@ -744,17 +748,19 @@ struct +struct<> -- !query output -NULL +java.time.format.DateTimeParseException +Text '12.1232019-10-06S10:11' could not be parsed at index 7 -- !query select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm") -- !query schema -struct +struct<> -- !query output -NULL +java.time.format.DateTimeParseException +Text '12.1232019-10-06S10:11' could not be parsed at index 9 -- !query @@ -824,9 +830,10 @@ struct -- !query select to_timestamp("02-29", "MM-dd") -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Invalid date 'February 29' as '1970' is not a leap year -- !query @@ -840,9 +847,10 @@ struct -- !query select to_date("02-29", "MM-dd") -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Invalid date 'February 29' as '1970' is not a leap year -- !query @@ -931,3 +939,84 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text '2020-01-27T20:06:11.847' could not be parsed at index 10 + + +-- !query +select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text 'Unparseable' could not be parsed at index 0 + + +-- !query +select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text '2020-01-27T20:06:11.847' could not be parsed at index 10 + + +-- !query +select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text 'Unparseable' could not be parsed at index 0 + + +-- !query +select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text '2020-01-27T20:06:11.847' could not be parsed at index 10 + + +-- !query +select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text 'Unparseable' could not be parsed at index 0 + + +-- !query +select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text '2020-01-27T20:06:11.847' could not be parsed at index 10 + + +-- !query +select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct<> +-- !query output +java.time.format.DateTimeParseException +Text 'Unparseable' could not be parsed at index 0 + + +-- !query +select cast("Unparseable" as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast Unparseable to TimestampType. diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 8727b74d771ee..7c2c62a2db496 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 108 +-- Number of queries: 117 -- !query @@ -901,3 +901,75 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy')) struct> -- !query output {"d":2015-10-26} + + +-- !query +select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast("Unparseable" as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 850cc86d943d3..810ab6ef0cbfc 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 108 +-- Number of queries: 117 -- !query @@ -909,3 +909,75 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast("Unparseable" as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 553432e503d5c..0e177f7ea82bd 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -71,7 +71,8 @@ insert into datetimes values -- !query schema struct<> -- !query output - +org.apache.spark.sql.AnalysisException +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType.; line 1 pos 22 -- !query From 35ded12fc67a3d8e51f8be3186246745a72a05bc Mon Sep 17 00:00:00 2001 From: luluorta Date: Fri, 27 Nov 2020 13:32:25 +0000 Subject: [PATCH 0588/1009] [SPARK-33141][SQL] Capture SQL configs when creating permanent views ### What changes were proposed in this pull request? This PR makes CreateViewCommand/AlterViewAsCommand capturing runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. Users can set `spark.sql.legacy.useCurrentConfigsForView` to `true` to restore the behavior before. ### Why are the changes needed? This PR is a sub-task of [SPARK-33138](https://issues.apache.org/jira/browse/SPARK-33138) that proposes to unify temp view and permanent view behaviors. This PR makes permanent views mimicking the temp view behavior that "fixes" view semantic by directly storing resolved LogicalPlan. For example, if a user uses spark 2.4 to create a view that contains null values from division-by-zero expressions, she may not want that other users' queries which reference her view throw exceptions when running on spark 3.x with ansi mode on. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? added UT + existing UTs (improved) Closes #30289 from luluorta/SPARK-33141. Authored-by: luluorta Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 + .../sql/catalyst/analysis/Analyzer.scala | 4 +- .../sql/catalyst/catalog/SessionCatalog.scala | 9 ++- .../sql/catalyst/catalog/interface.scala | 18 +++++ .../plans/logical/basicLogicalOperators.scala | 16 ++++ .../apache/spark/sql/internal/SQLConf.scala | 11 +++ .../spark/sql/execution/command/views.scala | 49 +++++++++++- .../results/postgreSQL/create_view.sql.out | 28 +++---- .../spark/sql/execution/SQLViewSuite.scala | 75 ++++++++++++++++++- 9 files changed, 190 insertions(+), 22 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 6942ef7201703..7997090e710a9 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -51,6 +51,8 @@ license: | - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached. + + - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 77c1dd9ebb7fa..dae496244c858 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1034,7 +1034,9 @@ class Analyzer(override val catalogManager: CatalogManager) s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " + "around this.") } - executeSameContext(child) + SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs)) { + executeSameContext(child) + } } view.copy(child = newChild) case p @ SubqueryAlias(_, view: View) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 17ab6664df75c..5122ca7521d9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -795,14 +795,19 @@ class SessionCatalog( if (metadata.tableType == CatalogTableType.VIEW) { val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) - logDebug(s"'$viewText' will be used for the view($table).") + val viewConfigs = metadata.viewSQLConfigs + val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) { + parser.parsePlan(viewText) + } + + logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.") // The relation is a view, so we wrap the relation by: // 1. Add a [[View]] operator over the relation to keep track of the view desc; // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view. val child = View( desc = metadata, output = metadata.schema.toAttributes, - child = parser.parsePlan(viewText)) + child = viewPlan) SubqueryAlias(multiParts, child) } else { SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index ee7216e93ebb5..621ad84f1f5ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -305,6 +305,22 @@ case class CatalogTable( } } + /** + * Return the SQL configs of when the view was created, the configs are applied when parsing and + * analyzing the view, should be empty if the CatalogTable is not a View or created by older + * versions of Spark(before 3.1.0). + */ + def viewSQLConfigs: Map[String, String] = { + try { + for ((key, value) <- properties if key.startsWith(CatalogTable.VIEW_SQL_CONFIG_PREFIX)) + yield (key.substring(CatalogTable.VIEW_SQL_CONFIG_PREFIX.length), value) + } catch { + case e: Exception => + throw new AnalysisException( + "Corrupted view SQL configs in catalog", cause = Some(e)) + } + } + /** * Return the output column names of the query that creates a view, the column names are used to * resolve a view, should be empty if the CatalogTable is not a View or created by older versions @@ -411,6 +427,8 @@ object CatalogTable { props.toMap } + val VIEW_SQL_CONFIG_PREFIX = VIEW_PREFIX + "sqlConfig." + val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out." val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols" val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index c7108ea8ac74b..a524ed4ff73e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -453,6 +453,22 @@ case class View( } } +object View { + def effectiveSQLConf(configs: Map[String, String]): SQLConf = { + val activeConf = SQLConf.get + if (activeConf.useCurrentSQLConfigsForView) return activeConf + + val sqlConf = new SQLConf() + for ((k, v) <- configs) { + sqlConf.settings.put(k, v) + } + // We should respect the current maxNestedViewDepth cause the view resolving are executed + // from top to down. + sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth) + sqlConf + } +} + /** * A container for holding named common table expressions (CTEs) and a query plan. * This operator will be removed during analysis and the relations will be substituted into child. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index add9a1d0f3aa6..b2c28ffa984a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1481,6 +1481,15 @@ object SQLConf { "must be positive.") .createWithDefault(100) + val USE_CURRENT_SQL_CONFIGS_FOR_VIEW = + buildConf("spark.sql.legacy.useCurrentConfigsForView") + .internal() + .doc("When true, SQL Configs of the current active SparkSession instead of the captured " + + "ones will be applied during the parsing and analysis phases of the view resolution.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val STREAMING_FILE_COMMIT_PROTOCOL_CLASS = buildConf("spark.sql.streaming.commitProtocolClass") .version("2.1.0") @@ -3415,6 +3424,8 @@ class SQLConf extends Serializable with Logging { def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH) + def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW) + def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION) def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 43bc50522f2a8..a02f863a360f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper -import org.apache.spark.sql.internal.StaticSQLConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType} import org.apache.spark.sql.util.SchemaUtils @@ -334,6 +334,18 @@ case class ShowViewsCommand( object ViewHelper { + private val configPrefixDenyList = Seq( + SQLConf.MAX_NESTED_VIEW_DEPTH.key, + "spark.sql.optimizer.", + "spark.sql.codegen.", + "spark.sql.execution.", + "spark.sql.shuffle.", + "spark.sql.adaptive.") + + private def shouldCaptureConfig(key: String): Boolean = { + !configPrefixDenyList.exists(prefix => key.startsWith(prefix)) + } + import CatalogTable._ /** @@ -361,11 +373,37 @@ object ViewHelper { } } + /** + * Convert the view SQL configs to `properties`. + */ + private def sqlConfigsToProps(conf: SQLConf): Map[String, String] = { + val modifiedConfs = conf.getAllConfs.filter { case (k, _) => + conf.isModifiable(k) && shouldCaptureConfig(k) + } + val props = new mutable.HashMap[String, String] + for ((key, value) <- modifiedConfs) { + props.put(s"$VIEW_SQL_CONFIG_PREFIX$key", value) + } + props.toMap + } + + /** + * Remove the view SQL configs in `properties`. + */ + private def removeSQLConfigs(properties: Map[String, String]): Map[String, String] = { + // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable, + // while `CatalogTable` should be serializable. + properties.filterNot { case (key, _) => + key.startsWith(VIEW_SQL_CONFIG_PREFIX) + } + } + /** * Generate the view properties in CatalogTable, including: * 1. view default database that is used to provide the default database name on view resolution. * 2. the output column names of the query that creates a view, this is used to map the output of * the view child to the view output during view resolution. + * 3. the SQL configs when creating the view. * * @param properties the `properties` in CatalogTable. * @param session the spark session. @@ -380,15 +418,18 @@ object ViewHelper { // for createViewCommand queryOutput may be different from fieldNames val queryOutput = analyzedPlan.schema.fieldNames + val conf = session.sessionState.conf + // Generate the query column names, throw an AnalysisException if there exists duplicate column // names. SchemaUtils.checkColumnNameDuplication( - fieldNames, "in the view definition", session.sessionState.conf.resolver) + fieldNames, "in the view definition", conf.resolver) - // Generate the view default catalog and namespace. + // Generate the view default catalog and namespace, as well as captured SQL configs. val manager = session.sessionState.catalogManager - removeQueryColumnNames(properties) ++ + removeSQLConfigs(removeQueryColumnNames(properties)) ++ catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++ + sqlConfigsToProps(conf) ++ generateQueryColumnNames(queryOutput) } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index ae1cb2f171704..2fab32fa4b4eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -257,7 +257,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -313,7 +313,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a WHERE t1.id = t2.id View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [t1_a, t2_a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_ View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [id, a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2 View Original Text SELECT * FROM t1 CROSS JOIN t2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index d776198bc7470..0b19f706836be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.parser.ParseException -import org.apache.spark.sql.internal.SQLConf.MAX_NESTED_VIEW_DEPTH +import org.apache.spark.sql.internal.SQLConf._ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} class SimpleSQLViewSuite extends SQLViewSuite with SharedSparkSession @@ -762,4 +762,77 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33141: view should be parsed and analyzed with configs set when creating") { + withTable("t") { + withView("v1", "v2", "v3", "v4", "v5") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + sql("CREATE VIEW v1 (c1) AS SELECT C1 FROM t") + sql("CREATE VIEW v2 (c1) AS SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC") + sql("CREATE VIEW v3 (c1, count) AS SELECT c1, count(c1) FROM t GROUP BY 1") + sql("CREATE VIEW v4 (a, count) AS SELECT c1 as a, count(c1) FROM t GROUP BY a") + sql("CREATE VIEW v5 (c1) AS SELECT 1/0") + + withSQLConf(CASE_SENSITIVE.key -> "true") { + checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2), Row(3), Row(1))) + } + withSQLConf(ORDER_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1), Row(2), Row(3))) + } + withSQLConf(GROUP_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v3"), + Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + withSQLConf(GROUP_BY_ALIASES.key -> "false") { + checkAnswer(sql("SELECT * FROM v4"), + Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + withSQLConf(ANSI_ENABLED.key -> "true") { + checkAnswer(sql("SELECT * FROM v5"), Seq(Row(null))) + } + + withSQLConf(USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true") { + withSQLConf(CASE_SENSITIVE.key -> "true") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v1") + }.getMessage + assert(e.contains("cannot resolve '`C1`' given input columns: " + + "[spark_catalog.default.t.c1]")) + } + withSQLConf(ORDER_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v2"), Seq(Row(3), Row(2), Row(1))) + } + withSQLConf(GROUP_BY_ORDINAL.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v3") + }.getMessage + assert(e.contains("expression 'spark_catalog.default.t.`c1`' is neither present " + + "in the group by, nor is it an aggregate function. Add to group by or wrap in " + + "first() (or first_value) if you don't care which value you get.")) + } + withSQLConf(GROUP_BY_ALIASES.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v4") + }.getMessage + assert(e.contains("cannot resolve '`a`' given input columns: " + + "[spark_catalog.default.t.c1]")) + } + withSQLConf(ANSI_ENABLED.key -> "true") { + val e = intercept[ArithmeticException] { + sql("SELECT * FROM v5").collect() + }.getMessage + assert(e.contains("divide by zero")) + } + } + + withSQLConf(ANSI_ENABLED.key -> "true") { + sql("ALTER VIEW v1 AS SELECT 1/0") + } + val e = intercept[ArithmeticException] { + sql("SELECT * FROM v1").collect() + }.getMessage + assert(e.contains("divide by zero")) + } + } + } } From 13fd272cd353c8aa40a6030c4c847c2e2f632f68 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Fri, 27 Nov 2020 10:22:45 -0600 Subject: [PATCH 0589/1009] Spelling r common dev mlib external project streaming resource managers python ### What changes were proposed in this pull request? This PR intends to fix typos in the sub-modules: * `R` * `common` * `dev` * `mlib` * `external` * `project` * `streaming` * `resource-managers` * `python` Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618 NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356 ### Why are the changes needed? Misspelled words make it harder to read / understand content. ### Does this PR introduce _any_ user-facing change? There are various fixes to documentation, etc... ### How was this patch tested? No testing was performed Closes #30402 from jsoref/spelling-R_common_dev_mlib_external_project_streaming_resource-managers_python. Authored-by: Josh Soref Signed-off-by: Sean Owen --- R/CRAN_RELEASE.md | 2 +- R/install-dev.bat | 2 +- R/pkg/R/DataFrame.R | 6 ++--- R/pkg/R/RDD.R | 4 ++-- R/pkg/R/SQLContext.R | 2 +- R/pkg/R/WindowSpec.R | 4 ++-- R/pkg/R/column.R | 16 +++++++------- R/pkg/R/context.R | 4 ++-- R/pkg/R/deserialize.R | 2 +- R/pkg/R/functions.R | 4 ++-- R/pkg/R/install.R | 2 +- R/pkg/R/mllib_fpm.R | 2 +- R/pkg/R/mllib_tree.R | 4 ++-- R/pkg/R/mllib_utils.R | 2 +- R/pkg/R/pairRDD.R | 4 ++-- R/pkg/R/streaming.R | 2 +- R/pkg/R/types.R | 2 +- R/pkg/R/utils.R | 2 +- R/pkg/inst/worker/daemon.R | 4 ++-- R/pkg/inst/worker/worker.R | 8 +++---- R/pkg/tests/fulltests/test_Serde.R | 2 +- R/pkg/tests/fulltests/test_jvm_api.R | 6 ++--- R/pkg/tests/fulltests/test_sparkSQL.R | 6 ++--- R/pkg/tests/fulltests/test_utils.R | 2 +- R/pkg/vignettes/sparkr-vignettes.Rmd | 2 +- .../spark/util/kvstore/LevelDBTypeInfo.java | 2 +- .../spark/network/client/TransportClient.java | 2 +- .../spark/network/crypto/AuthEngine.java | 2 +- .../spark/network/crypto/AuthEngineSuite.java | 10 ++++----- .../protocol/MessageWithHeaderSuite.java | 4 ++-- .../spark/network/sasl/SparkSaslSuite.java | 16 +++++++------- .../server/OneForOneStreamManagerSuite.java | 2 +- .../util/TransportFrameDecoderSuite.java | 2 +- .../network/shuffle/SimpleDownloadFile.java | 2 +- .../apache/spark/unsafe/types/UTF8String.java | 10 ++++----- .../types/UTF8StringPropertyCheckSuite.scala | 6 ++--- dev/appveyor-guide.md | 12 +++++----- dev/create-release/known_translations | 2 +- dev/create-release/release-build.sh | 2 +- dev/create-release/releaseutils.py | 6 ++--- dev/create-release/translate-contributors.py | 22 +++++++++---------- dev/github_jira_sync.py | 10 ++++----- dev/run-tests-jenkins.py | 18 +++++++-------- dev/run-tests.py | 6 ++--- dev/tests/pr_merge_ability.sh | 2 +- dev/tests/pr_public_classes.sh | 2 +- project/MimaExcludes.scala | 2 +- project/SparkBuild.scala | 6 ++--- python/docs/source/_static/css/pyspark.css | 2 +- .../source/_templates/autosummary/class.rst | 2 +- python/docs/source/development/debugging.rst | 2 +- python/docs/source/development/testing.rst | 2 +- .../docs/source/getting_started/install.rst | 6 ++--- .../source/getting_started/quickstart.ipynb | 4 ++-- python/docs/source/index.rst | 2 +- python/pyspark/__init__.pyi | 2 +- python/pyspark/cloudpickle/cloudpickle.py | 10 ++++----- .../pyspark/cloudpickle/cloudpickle_fast.py | 10 ++++----- python/pyspark/context.py | 4 ++-- python/pyspark/java_gateway.py | 2 +- python/pyspark/ml/feature.py | 2 +- python/pyspark/ml/regression.py | 2 +- python/pyspark/ml/regression.pyi | 2 +- python/pyspark/ml/tests/test_algorithms.py | 2 +- python/pyspark/ml/tests/test_image.py | 2 +- python/pyspark/mllib/clustering.py | 2 +- python/pyspark/mllib/evaluation.py | 4 ++-- python/pyspark/mllib/regression.py | 2 +- python/pyspark/mllib/stat/_statistics.py | 2 +- .../mllib/tests/test_streaming_algorithms.py | 2 +- python/pyspark/rdd.py | 4 ++-- python/pyspark/resource/requests.py | 4 ++-- python/pyspark/shuffle.py | 2 +- python/pyspark/sql/column.py | 2 +- python/pyspark/sql/dataframe.py | 2 +- python/pyspark/sql/functions.py | 14 ++++++------ .../sql/pandas/_typing/protocols/frame.pyi | 2 +- .../sql/pandas/_typing/protocols/series.pyi | 2 +- python/pyspark/sql/pandas/functions.py | 4 ++-- .../sql/tests/test_pandas_grouped_map.py | 2 +- python/pyspark/sql/tests/test_udf.py | 4 ++-- python/pyspark/sql/utils.py | 6 ++--- python/pyspark/streaming/context.py | 2 +- python/pyspark/tests/test_context.py | 4 ++-- python/pyspark/worker.py | 2 +- python/test_support/userlibrary.py | 2 +- .../org/apache/spark/deploy/k8s/Config.scala | 2 +- .../k8s/ExecutorPodsSnapshotsStoreImpl.scala | 4 ++-- .../k8s/KubernetesVolumeUtilsSuite.scala | 4 ++-- .../MountVolumesFeatureStepSuite.scala | 2 +- .../apache/spark/deploy/mesos/config.scala | 2 +- .../cluster/mesos/MesosSchedulerUtils.scala | 2 +- .../spark/deploy/yarn/YarnAllocator.scala | 2 +- .../apache/hadoop/net/ServerSocketUtil.java | 2 +- .../yarn/YarnShuffleServiceSuite.scala | 2 +- .../streaming/api/python/PythonDStream.scala | 2 +- .../spark/streaming/dstream/DStream.scala | 4 ++-- .../spark/streaming/util/HdfsUtils.scala | 2 +- .../apache/spark/streaming/JavaAPISuite.java | 2 +- .../spark/streaming/MapWithStateSuite.scala | 6 ++--- .../streaming/rdd/MapWithStateRDDSuite.scala | 6 ++--- 101 files changed, 208 insertions(+), 208 deletions(-) diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md index 4d9b6416c01cb..2f410cf8bfd94 100644 --- a/R/CRAN_RELEASE.md +++ b/R/CRAN_RELEASE.md @@ -25,7 +25,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control. -Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`). +Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, e.g. `yum -q -y install qpdf`). To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible. diff --git a/R/install-dev.bat b/R/install-dev.bat index c570d93049a14..ae5aa589a19d1 100644 --- a/R/install-dev.bat +++ b/R/install-dev.bat @@ -26,7 +26,7 @@ MKDIR %SPARK_HOME%\R\lib rem When you pass the package path directly as an argument to R CMD INSTALL, rem it takes the path as 'C:\projects\spark\R\..\R\pkg"' as an example at -rem R 4.0. To work around this, directly go to the directoy and install it. +rem R 4.0. To work around this, directly go to the directory and install it. rem See also SPARK-32074 pushd %SPARK_HOME%\R\pkg\ R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" . diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 2ce53782d9af0..31a651ea1279b 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2772,7 +2772,7 @@ setMethod("merge", #' Creates a list of columns by replacing the intersected ones with aliases #' #' Creates a list of columns by replacing the intersected ones with aliases. -#' The name of the alias column is formed by concatanating the original column name and a suffix. +#' The name of the alias column is formed by concatenating the original column name and a suffix. #' #' @param x a SparkDataFrame #' @param intersectedColNames a list of intersected column names of the SparkDataFrame @@ -3231,7 +3231,7 @@ setMethod("describe", #' \item stddev #' \item min #' \item max -#' \item arbitrary approximate percentiles specified as a percentage (eg, "75\%") +#' \item arbitrary approximate percentiles specified as a percentage (e.g., "75\%") #' } #' If no statistics are given, this function computes count, mean, stddev, min, #' approximate quartiles (percentiles at 25\%, 50\%, and 75\%), and max. @@ -3743,7 +3743,7 @@ setMethod("histogram", #' #' @param x a SparkDataFrame. #' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}. -#' @param tableName yhe name of the table in the external database. +#' @param tableName the name of the table in the external database. #' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore' #' save mode (it is 'error' by default) #' @param ... additional JDBC database connection properties. diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 7a1d157bb8a36..408a3ff25b2b2 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -970,7 +970,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", MAXINT))))) # If the first sample didn't turn out large enough, keep trying to # take samples; this shouldn't happen often because we use a big - # multiplier for thei initial size + # multiplier for the initial size while (length(samples) < total) samples <- collectRDD(sampleRDD(x, withReplacement, fraction, as.integer(ceiling(stats::runif(1, @@ -1512,7 +1512,7 @@ setMethod("glom", #' #' @param x An RDD. #' @param y An RDD. -#' @return a new RDD created by performing the simple union (witout removing +#' @return a new RDD created by performing the simple union (without removing #' duplicates) of two input RDDs. #' @examples #'\dontrun{ diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index c0ac68332ec41..5ed0481f33d8f 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -203,7 +203,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) { }) } - # SPAKR-SQL does not support '.' in column name, so replace it with '_' + # SPARK-SQL does not support '.' in column name, so replace it with '_' # TODO(davies): remove this once SPARK-2775 is fixed names <- lapply(names, function(n) { nn <- gsub(".", "_", n, fixed = TRUE) diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R index 037809cd0923e..be47d0117ed7f 100644 --- a/R/pkg/R/WindowSpec.R +++ b/R/pkg/R/WindowSpec.R @@ -54,7 +54,7 @@ setMethod("show", "WindowSpec", #' Defines the partitioning columns in a WindowSpec. #' #' @param x a WindowSpec. -#' @param col a column to partition on (desribed by the name or Column). +#' @param col a column to partition on (described by the name or Column). #' @param ... additional column(s) to partition on. #' @return A WindowSpec. #' @rdname partitionBy @@ -231,7 +231,7 @@ setMethod("rangeBetween", #' @rdname over #' @name over #' @aliases over,Column,WindowSpec-method -#' @family colum_func +#' @family column_func #' @examples #' \dontrun{ #' df <- createDataFrame(mtcars) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 835178990b485..9fa117ccb6281 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -135,7 +135,7 @@ createMethods() #' @rdname alias #' @name alias #' @aliases alias,Column-method -#' @family colum_func +#' @family column_func #' @examples #' \dontrun{ #' df <- createDataFrame(iris) @@ -161,7 +161,7 @@ setMethod("alias", #' #' @rdname substr #' @name substr -#' @family colum_func +#' @family column_func #' @aliases substr,Column-method #' #' @param x a Column. @@ -187,7 +187,7 @@ setMethod("substr", signature(x = "Column"), #' #' @rdname startsWith #' @name startsWith -#' @family colum_func +#' @family column_func #' @aliases startsWith,Column-method #' #' @param x vector of character string whose "starts" are considered @@ -206,7 +206,7 @@ setMethod("startsWith", signature(x = "Column"), #' #' @rdname endsWith #' @name endsWith -#' @family colum_func +#' @family column_func #' @aliases endsWith,Column-method #' #' @param x vector of character string whose "ends" are considered @@ -224,7 +224,7 @@ setMethod("endsWith", signature(x = "Column"), #' #' @rdname between #' @name between -#' @family colum_func +#' @family column_func #' @aliases between,Column-method #' #' @param x a Column @@ -251,7 +251,7 @@ setMethod("between", signature(x = "Column"), # nolint end #' @rdname cast #' @name cast -#' @family colum_func +#' @family column_func #' @aliases cast,Column-method #' #' @examples @@ -300,7 +300,7 @@ setMethod("%in%", #' Can be a single value or a Column. #' @rdname otherwise #' @name otherwise -#' @family colum_func +#' @family column_func #' @aliases otherwise,Column-method #' @note otherwise since 1.5.0 setMethod("otherwise", @@ -440,7 +440,7 @@ setMethod("withField", #' ) #' #' # However, if you are going to add/replace multiple nested fields, -#' # it is preffered to extract out the nested struct before +#' # it is preferred to extract out the nested struct before #' # adding/replacing multiple fields e.g. #' head( #' withColumn( diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index e3c9d9f8793d6..cca6c2c817de9 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -86,7 +86,7 @@ makeSplits <- function(numSerializedSlices, length) { # For instance, for numSerializedSlices of 22, length of 50 # [1] 0 0 2 2 4 4 6 6 6 9 9 11 11 13 13 15 15 15 18 18 20 20 22 22 22 # [26] 25 25 27 27 29 29 31 31 31 34 34 36 36 38 38 40 40 40 43 43 45 45 47 47 47 - # Notice the slice group with 3 slices (ie. 6, 15, 22) are roughly evenly spaced. + # Notice the slice group with 3 slices (i.e. 6, 15, 22) are roughly evenly spaced. # We are trying to reimplement the calculation in the positions method in ParallelCollectionRDD if (numSerializedSlices > 0) { unlist(lapply(0: (numSerializedSlices - 1), function(x) { @@ -116,7 +116,7 @@ makeSplits <- function(numSerializedSlices, length) { #' This change affects both createDataFrame and spark.lapply. #' In the specific one case that it is used to convert R native object into SparkDataFrame, it has #' always been kept at the default of 1. In the case the object is large, we are explicitly setting -#' the parallism to numSlices (which is still 1). +#' the parallelism to numSlices (which is still 1). #' #' Specifically, we are changing to split positions to match the calculation in positions() of #' ParallelCollectionRDD in Spark. diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 5d22340fb62a0..89a8fbecd36b0 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -250,7 +250,7 @@ readDeserializeWithKeysInArrow <- function(inputCon) { keys <- readMultipleObjects(inputCon) - # Read keys to map with each groupped batch later. + # Read keys to map with each grouped batch later. list(keys = keys, data = data) } diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index b12f7b472ec83..99406443165d5 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -144,7 +144,7 @@ NULL #' @param y Column to compute on. #' @param pos In \itemize{ #' \item \code{locate}: a start position of search. -#' \item \code{overlay}: a start postiton for replacement. +#' \item \code{overlay}: a start position for replacement. #' } #' @param len In \itemize{ #' \item \code{lpad} the maximum length of each output result. @@ -2918,7 +2918,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"), }) #' @details -#' \code{shiftRightUnsigned}: (Unigned) shifts the given value numBits right. If the given value is +#' \code{shiftRightUnsigned}: (Unsigned) shifts the given value numBits right. If the given value is #' a long value, it will return a long value else it will return an integer value. #' #' @rdname column_math_functions diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index ea2c0b4c0f42f..5bc5ae07c5f03 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -289,7 +289,7 @@ sparkCachePath <- function() { } # Length of the Spark cache specific relative path segments for each platform -# eg. "Apache\Spark\Cache" is 3 in Windows, or "spark" is 1 in unix +# e.g. "Apache\Spark\Cache" is 3 in Windows, or "spark" is 1 in unix # Must match sparkCachePath() exactly. sparkCacheRelPathLength <- function() { if (is_windows()) { diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R index 30bc51b932041..65a43514930f0 100644 --- a/R/pkg/R/mllib_fpm.R +++ b/R/pkg/R/mllib_fpm.R @@ -125,7 +125,7 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"), #' The \code{SparkDataFrame} contains five columns: #' \code{antecedent} (an array of the same type as the input column), #' \code{consequent} (an array of the same type as the input column), -#' \code{condfidence} (confidence for the rule) +#' \code{confidence} (confidence for the rule) #' \code{lift} (lift for the rule) #' and \code{support} (support for the rule) #' @rdname spark.fpGrowth diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R index f6aa48f5fa04a..b5a014b0a3cfd 100644 --- a/R/pkg/R/mllib_tree.R +++ b/R/pkg/R/mllib_tree.R @@ -53,7 +53,7 @@ setClass("DecisionTreeRegressionModel", representation(jobj = "jobj")) #' @note DecisionTreeClassificationModel since 2.3.0 setClass("DecisionTreeClassificationModel", representation(jobj = "jobj")) -# Create the summary of a tree ensemble model (eg. Random Forest, GBT) +# Create the summary of a tree ensemble model (e.g. Random Forest, GBT) summary.treeEnsemble <- function(model) { jobj <- model@jobj formula <- callJMethod(jobj, "formula") @@ -73,7 +73,7 @@ summary.treeEnsemble <- function(model) { jobj = jobj) } -# Prints the summary of tree ensemble models (eg. Random Forest, GBT) +# Prints the summary of tree ensemble models (e.g. Random Forest, GBT) print.summary.treeEnsemble <- function(x) { jobj <- x$jobj cat("Formula: ", x$formula) diff --git a/R/pkg/R/mllib_utils.R b/R/pkg/R/mllib_utils.R index f38f1ac3a6b4c..d943d8d0ab4c0 100644 --- a/R/pkg/R/mllib_utils.R +++ b/R/pkg/R/mllib_utils.R @@ -18,7 +18,7 @@ # mllib_utils.R: Utilities for MLlib integration # Integration with R's standard functions. -# Most of MLlib's argorithms are provided in two flavours: +# Most of MLlib's algorithms are provided in two flavours: # - a specialization of the default R methods (glm). These methods try to respect # the inputs and the outputs of R's method to the largest extent, but some small differences # may exist. diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index b29381bb900fb..41676be03e951 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -239,7 +239,7 @@ setMethod("partitionByRDD", javaPairRDD <- callJMethod(javaPairRDD, "partitionBy", rPartitioner) # Call .values() on the result to get back the final result, the - # shuffled acutal content key-val pairs. + # shuffled actual content key-val pairs. r <- callJMethod(javaPairRDD, "values") RDD(r, serializedMode = "byte") @@ -411,7 +411,7 @@ setMethod("reduceByKeyLocally", #' \itemize{ #' \item createCombiner, which turns a V into a C (e.g., creates a one-element list) #' \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) - -#' \item mergeCombiners, to combine two C's into a single one (e.g., concatentates +#' \item mergeCombiners, to combine two C's into a single one (e.g., concatenates #' two lists). #' } #' diff --git a/R/pkg/R/streaming.R b/R/pkg/R/streaming.R index 5eccbdc9d3818..2bcfb363f9d24 100644 --- a/R/pkg/R/streaming.R +++ b/R/pkg/R/streaming.R @@ -93,7 +93,7 @@ setMethod("explain", #' lastProgress #' -#' Prints the most recent progess update of this streaming query in JSON format. +#' Prints the most recent progress update of this streaming query in JSON format. #' #' @param x a StreamingQuery. #' @rdname lastProgress diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index 5d48a9eee2799..dfa83c35665ce 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -68,7 +68,7 @@ rToSQLTypes <- as.environment(list( "character" = "string", "logical" = "boolean")) -# Helper function of coverting decimal type. When backend returns column type in the +# Helper function of converting decimal type. When backend returns column type in the # format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type # as double type. This function converts backend returned types that are not the key # of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES. diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index d6f9f927d5cdc..264cbfc9ba929 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -930,7 +930,7 @@ getOne <- function(x, envir, inherits = TRUE, ifnotfound = NULL) { } # Returns a vector of parent directories, traversing up count times, starting with a full path -# eg. traverseParentDirs("/Users/user/Library/Caches/spark/spark2.2", 1) should return +# e.g. traverseParentDirs("/Users/user/Library/Caches/spark/spark2.2", 1) should return # this "/Users/user/Library/Caches/spark/spark2.2" # and "/Users/user/Library/Caches/spark" traverseParentDirs <- function(x, count) { diff --git a/R/pkg/inst/worker/daemon.R b/R/pkg/inst/worker/daemon.R index fb9db63b07cd0..4589bb9c6ad1b 100644 --- a/R/pkg/inst/worker/daemon.R +++ b/R/pkg/inst/worker/daemon.R @@ -32,7 +32,7 @@ inputCon <- socketConnection( SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET")) -# Waits indefinitely for a socket connecion by default. +# Waits indefinitely for a socket connection by default. selectTimeout <- NULL while (TRUE) { @@ -72,7 +72,7 @@ while (TRUE) { } }) } else if (is.null(children)) { - # If it is NULL, there are no children. Waits indefinitely for a socket connecion. + # If it is NULL, there are no children. Waits indefinitely for a socket connection. selectTimeout <- NULL } diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R index 1ef05ea621e83..dd271f91d0084 100644 --- a/R/pkg/inst/worker/worker.R +++ b/R/pkg/inst/worker/worker.R @@ -85,7 +85,7 @@ outputResult <- function(serializer, output, outputCon) { } # Constants -specialLengths <- list(END_OF_STERAM = 0L, TIMING_DATA = -1L) +specialLengths <- list(END_OF_STREAM = 0L, TIMING_DATA = -1L) # Timing R process boot bootTime <- currentTimeSecs() @@ -180,7 +180,7 @@ if (isEmpty != 0) { } else if (deserializer == "arrow" && mode == 1) { data <- SparkR:::readDeserializeInArrow(inputCon) # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html - # rbind.fill might be an anternative to make it faster if plyr is installed. + # rbind.fill might be an alternative to make it faster if plyr is installed. # Also, note that, 'dapply' applies a function to each partition. data <- do.call("rbind", data) } @@ -212,7 +212,7 @@ if (isEmpty != 0) { if (serializer == "arrow") { # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html - # rbind.fill might be an anternative to make it faster if plyr is installed. + # rbind.fill might be an alternative to make it faster if plyr is installed. combined <- do.call("rbind", outputs) SparkR:::writeSerializeInArrow(outputCon, combined) } @@ -285,7 +285,7 @@ SparkR:::writeDouble(outputCon, computeInputElapsDiff) # compute SparkR:::writeDouble(outputCon, outputComputeElapsDiff) # output # End of output -SparkR:::writeInt(outputCon, specialLengths$END_OF_STERAM) +SparkR:::writeInt(outputCon, specialLengths$END_OF_STREAM) close(outputCon) close(inputCon) diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R index e01f6ee005218..a52289e43ca5e 100644 --- a/R/pkg/tests/fulltests/test_Serde.R +++ b/R/pkg/tests/fulltests/test_Serde.R @@ -125,7 +125,7 @@ test_that("SerDe of list of lists", { sparkR.session.stop() -# Note that this test should be at the end of tests since the configruations used here are not +# Note that this test should be at the end of tests since the configurations used here are not # specific to sessions, and the Spark context is restarted. test_that("createDataFrame large objects", { for (encryptionEnabled in list("true", "false")) { diff --git a/R/pkg/tests/fulltests/test_jvm_api.R b/R/pkg/tests/fulltests/test_jvm_api.R index 8b3b4f73de170..3bf6ae556c079 100644 --- a/R/pkg/tests/fulltests/test_jvm_api.R +++ b/R/pkg/tests/fulltests/test_jvm_api.R @@ -20,11 +20,11 @@ context("JVM API") sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) test_that("Create and call methods on object", { - jarr <- sparkR.newJObject("java.util.ArrayList") + jarray <- sparkR.newJObject("java.util.ArrayList") # Add an element to the array - sparkR.callJMethod(jarr, "add", 1L) + sparkR.callJMethod(jarray, "add", 1L) # Check if get returns the same element - expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L) + expect_equal(sparkR.callJMethod(jarray, "get", 0L), 1L) }) test_that("Call static methods", { diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 81d4e14df791d..833f77786c80b 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2093,7 +2093,7 @@ test_that("higher order functions", { createDataFrame(data.frame(id = 1)), expr("CAST(array(1.0, 2.0, -3.0, -4.0) AS array) xs"), expr("CAST(array(0.0, 3.0, 48.0) AS array) ys"), - expr("array('FAILED', 'SUCCEDED') as vs"), + expr("array('FAILED', 'SUCCEEDED') as vs"), expr("map('foo', 1, 'bar', 2) as mx"), expr("map('foo', 42, 'bar', -1, 'baz', 0) as my") ) @@ -3667,7 +3667,7 @@ test_that("gapply() and gapplyCollect() on a DataFrame", { } # Computes the arithmetic mean of the second column by grouping - # on the first and third columns. Output the groupping value and the average. + # on the first and third columns. Output the grouping value and the average. schema <- structType(structField("a", "integer"), structField("c", "string"), structField("avg", "double")) df3 <- gapply( @@ -3965,7 +3965,7 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", { paste("Error in listFunctions : analysis error - Database", "'zxwtyswklpf_db' does not exist")) - # recoverPartitions does not work with tempory view + # recoverPartitions does not work with temporary view expect_error(recoverPartitions("cars"), "no such table - Table or view 'cars' not found in database 'default'") expect_error(refreshTable("cars"), NA) diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R index c3fb9046fcda4..6c83a137cfb7b 100644 --- a/R/pkg/tests/fulltests/test_utils.R +++ b/R/pkg/tests/fulltests/test_utils.R @@ -116,7 +116,7 @@ test_that("cleanClosure on R functions", { actual <- get("y", envir = env, inherits = FALSE) expect_equal(actual, y) - # Test for combination for nested and sequenctial functions in a closure + # Test for combination for nested and sequential functions in a closure f1 <- function(x) x + 1 f2 <- function(x) f1(x) + 2 userFunc <- function(x) { f1(x); f2(x) } diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 3713e6c784855..a0608748696a3 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -146,7 +146,7 @@ sparkR.session.stop() Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs. -After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html). +After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (e.g. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html). ```{r, eval=FALSE} install.spark() diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java index d7423537ddfcf..4d7f76f673865 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java @@ -133,7 +133,7 @@ class LevelDBTypeInfo { // First create the parent indices, then the child indices. ti.indices().forEach(idx -> { - // In LevelDB, there is no parent index for the NUTURAL INDEX. + // In LevelDB, there is no parent index for the NATURAL INDEX. if (idx.parent().isEmpty() || idx.value().equals(KVIndex.NATURAL_INDEX_NAME)) { indices.put(idx.value(), new Index(idx, ti.getAccessor(idx.value()), null)); } diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java index 6dcc703e92669..eb2882074d7c7 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java @@ -303,7 +303,7 @@ public void close() { @Override public String toString() { return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("remoteAdress", channel.remoteAddress()) + .append("remoteAddress", channel.remoteAddress()) .append("clientId", clientId) .append("isActive", isActive()) .toString(); diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java index 64fdb32a67ada..c2b2edc7f07d5 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java @@ -287,7 +287,7 @@ private byte[] doCipherOp(int mode, byte[] in, boolean isFinal) } } } catch (InternalError ie) { - // SPARK-25535. The commons-cryto library will throw InternalError if something goes wrong, + // SPARK-25535. The commons-crypto library will throw InternalError if something goes wrong, // and leave bad state behind in the Java wrappers, so it's not safe to use them afterwards. if (mode == Cipher.ENCRYPT_MODE) { this.encryptor = null; diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java index 0790f0079c2bd..1c2061699a128 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java @@ -150,8 +150,8 @@ public void testEncryptedMessage() throws Exception { ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length); TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf); - while (emsg.transfered() < emsg.count()) { - emsg.transferTo(channel, emsg.transfered()); + while (emsg.transferred() < emsg.count()) { + emsg.transferTo(channel, emsg.transferred()); } assertEquals(data.length, channel.length()); } finally { @@ -196,9 +196,9 @@ public Long answer(InvocationOnMock invocationOnMock) throws Throwable { TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region); ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength); // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes. - assertEquals(0L, emsg.transferTo(channel, emsg.transfered())); - assertEquals(testDataLength, emsg.transferTo(channel, emsg.transfered())); - assertEquals(emsg.transfered(), emsg.count()); + assertEquals(0L, emsg.transferTo(channel, emsg.transferred())); + assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred())); + assertEquals(emsg.transferred(), emsg.count()); assertEquals(4, channel.length()); } finally { client.close(); diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java index 3bff34e210e3c..af1c2878672c0 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java @@ -129,8 +129,8 @@ private void testFileRegionBody(int totalWrites, int writesPerCall) throws Excep private ByteBuf doWrite(MessageWithHeader msg, int minExpectedWrites) throws Exception { int writes = 0; ByteArrayWritableChannel channel = new ByteArrayWritableChannel((int) msg.count()); - while (msg.transfered() < msg.count()) { - msg.transferTo(channel, msg.transfered()); + while (msg.transferred() < msg.count()) { + msg.transferTo(channel, msg.transferred()); writes++; } assertTrue("Not enough writes!", minExpectedWrites <= writes); diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java index ecaeec98da182..32c9acd327213 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java @@ -191,28 +191,28 @@ public void testEncryptedMessage() throws Exception { SaslEncryption.EncryptedMessage emsg = new SaslEncryption.EncryptedMessage(backend, msg, 1024); - long count = emsg.transferTo(channel, emsg.transfered()); + long count = emsg.transferTo(channel, emsg.transferred()); assertTrue(count < data.length); assertTrue(count > 0); // Here, the output buffer is full so nothing should be transferred. - assertEquals(0, emsg.transferTo(channel, emsg.transfered())); + assertEquals(0, emsg.transferTo(channel, emsg.transferred())); // Now there's room in the buffer, but not enough to transfer all the remaining data, // so the dummy count should be returned. channel.reset(); - assertEquals(1, emsg.transferTo(channel, emsg.transfered())); + assertEquals(1, emsg.transferTo(channel, emsg.transferred())); // Eventually, the whole message should be transferred. for (int i = 0; i < data.length / 32 - 2; i++) { channel.reset(); - assertEquals(1, emsg.transferTo(channel, emsg.transfered())); + assertEquals(1, emsg.transferTo(channel, emsg.transferred())); } channel.reset(); - count = emsg.transferTo(channel, emsg.transfered()); + count = emsg.transferTo(channel, emsg.transferred()); assertTrue("Unexpected count: " + count, count > 1 && count < data.length); - assertEquals(data.length, emsg.transfered()); + assertEquals(data.length, emsg.transferred()); } finally { msg.release(); } @@ -237,9 +237,9 @@ public void testEncryptedMessageChunking() throws Exception { new SaslEncryption.EncryptedMessage(backend, msg.convertToNetty(), data.length / 8); ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length); - while (emsg.transfered() < emsg.count()) { + while (emsg.transferred() < emsg.count()) { channel.reset(); - emsg.transferTo(channel, emsg.transfered()); + emsg.transferTo(channel, emsg.transferred()); } verify(backend, times(8)).wrap(any(byte[].class), anyInt(), anyInt()); diff --git a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java index 45e1836da641f..634b40ed450ee 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java @@ -72,7 +72,7 @@ public void testMissingChunk() { Assert.assertNotNull(getChunk(manager, streamId, 2)); manager.connectionTerminated(dummyChannel); - // loaded buffers are not released yet as in production a MangedBuffer returned by getChunk() + // loaded buffers are not released yet as in production a ManagedBuffer returned by getChunk() // would only be released by Netty after it is written to the network Mockito.verify(buffer1, Mockito.never()).release(); Mockito.verify(buffer2, Mockito.never()).release(); diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java index 4b67aa80351d2..163c52b023822 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java @@ -98,7 +98,7 @@ public void testConsolidationPerf() throws Exception { writtenBytes += pieceBytes; } logger.info("Writing 300MiB frame buf with consolidation of threshold " + threshold - + " took " + totalTime + " milis"); + + " took " + totalTime + " millis"); } finally { for (ByteBuf buf : retained) { release(buf); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java index 670612fd6f66a..97ecaa627b66c 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java @@ -32,7 +32,7 @@ * A DownloadFile that does not take any encryption settings into account for reading and * writing data. * - * This does *not* mean the data in the file is un-encrypted -- it could be that the data is + * This does *not* mean the data in the file is unencrypted -- it could be that the data is * already encrypted when its written, and subsequent layer is responsible for decrypting. */ public class SimpleDownloadFile implements DownloadFile { diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index b8dda22240042..c6aa5f0b58285 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -635,13 +635,13 @@ public UTF8String trimLeft() { public UTF8String trimLeft(UTF8String trimString) { if (trimString == null) return null; // the searching byte position in the source string - int srchIdx = 0; + int searchIdx = 0; // the first beginning byte position of a non-matching character int trimIdx = 0; - while (srchIdx < numBytes) { + while (searchIdx < numBytes) { UTF8String searchChar = copyUTF8String( - srchIdx, srchIdx + numBytesForFirstByte(this.getByte(srchIdx)) - 1); + searchIdx, searchIdx + numBytesForFirstByte(this.getByte(searchIdx)) - 1); int searchCharBytes = searchChar.numBytes; // try to find the matching for the searchChar in the trimString set if (trimString.find(searchChar, 0) >= 0) { @@ -650,9 +650,9 @@ public UTF8String trimLeft(UTF8String trimString) { // no matching, exit the search break; } - srchIdx += searchCharBytes; + searchIdx += searchCharBytes; } - if (srchIdx == 0) { + if (searchIdx == 0) { // Nothing trimmed return this; } diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index 69a082053aa65..ab488e18ba3f4 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -192,7 +192,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp } } - val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString)) + val nullableSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString)) test("concat") { def concat(origin: Seq[String]): String = @@ -201,7 +201,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp forAll { (inputs: Seq[String]) => assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString)) } - forAll (nullalbeSeq) { (inputs: Seq[String]) => + forAll (nullableSeq) { (inputs: Seq[String]) => assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs))) } } @@ -216,7 +216,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === toUTF8(inputs.mkString(sep))) } - forAll(randomString, nullalbeSeq) {(sep: String, inputs: Seq[String]) => + forAll(randomString, nullableSeq) {(sep: String, inputs: Seq[String]) => assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) === toUTF8(concatWs(sep, inputs))) } diff --git a/dev/appveyor-guide.md b/dev/appveyor-guide.md index a8c0c1ef23ac3..c68b5de9e61d0 100644 --- a/dev/appveyor-guide.md +++ b/dev/appveyor-guide.md @@ -33,22 +33,22 @@ Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor 2016-09-04 11 07 58 -- Click "Github". +- Click "GitHub". 2016-09-04 11 08 10 -#### After signing up, go to profile to link Github and AppVeyor. +#### After signing up, go to profile to link GitHub and AppVeyor. - Click your account and then click "Profile". 2016-09-04 11 09 43 -- Enable the link with GitHub via clicking "Link Github account". +- Enable the link with GitHub via clicking "Link GitHub account". 2016-09-04 11 09 52 -- Click "Authorize application" in Github site. +- Click "Authorize application" in GitHub site. 2016-09-04 11 10 05 @@ -63,11 +63,11 @@ Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor 2016-08-30 12 16 35 -- Since we will use Github here, click the "GITHUB" button and then click "Authorize Github" so that AppVeyor can access the Github logs (e.g. commits). +- Since we will use GitHub here, click the "GITHUB" button and then click "Authorize GitHub" so that AppVeyor can access the GitHub logs (e.g. commits). 2016-09-04 11 10 22 -- Click "Authorize application" from Github (the above step will pop up this page). +- Click "Authorize application" from GitHub (the above step will pop up this page). 2016-09-04 11 10 27 diff --git a/dev/create-release/known_translations b/dev/create-release/known_translations index ff41cccde0140..64bd9ada1bf61 100644 --- a/dev/create-release/known_translations +++ b/dev/create-release/known_translations @@ -1,5 +1,5 @@ # This is a mapping of names to be translated through translate-contributors.py -# The format expected on each line should be: - +# The format expected on each line should be: - 012huang - Weiyi Huang 07ARB - Ankit Raj Boudh 10110346 - Xian Liu diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 240f4c8dfd371..d2953a86afafd 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -452,7 +452,7 @@ if [[ "$1" == "publish-release" ]]; then if ! is_dry_run; then nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id - echo "Uplading files to $nexus_upload" + echo "Uploading files to $nexus_upload" for file in $(find . -type f) do # strip leading ./ diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py index cc7ad931198a2..a0e9695d58361 100755 --- a/dev/create-release/releaseutils.py +++ b/dev/create-release/releaseutils.py @@ -110,7 +110,7 @@ def __str__(self): # Under the hood, this runs a `git log` on that tag and parses the fields # from the command output to construct a list of Commit objects. Note that # because certain fields reside in the commit description and cannot be parsed -# through the Github API itself, we need to do some intelligent regex parsing +# through the GitHub API itself, we need to do some intelligent regex parsing # to extract those fields. # # This is written using Git 1.8.5. @@ -140,7 +140,7 @@ def get_commits(tag): sys.exit("Unexpected format in commit: %s" % commit_digest) [_hash, author, title] = commit_digest.split(field_end_marker) # The PR number and github username is in the commit message - # itself and cannot be accessed through any Github API + # itself and cannot be accessed through any GitHub API pr_number = None match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body) if match: @@ -252,7 +252,7 @@ def nice_join(str_list): return ", ".join(str_list[:-1]) + ", and " + str_list[-1] -# Return the full name of the specified user on Github +# Return the full name of the specified user on GitHub # If the user doesn't exist, return None def get_github_name(author, github_client): if github_client: diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py index 8340266527fc6..be5611ce65a7d 100755 --- a/dev/create-release/translate-contributors.py +++ b/dev/create-release/translate-contributors.py @@ -17,7 +17,7 @@ # This script translates invalid authors in the contributors list generated # by generate-contributors.py. When the script encounters an author name that -# is considered invalid, it searches Github and JIRA in an attempt to search +# is considered invalid, it searches GitHub and JIRA in an attempt to search # for replacements. This tool runs in two modes: # # (1) Interactive mode: For each invalid author name, this script presents @@ -68,7 +68,7 @@ if INTERACTIVE_MODE: print("Running in interactive mode. To disable this, provide the --non-interactive flag.") -# Setup Github and JIRA clients +# Setup GitHub and JIRA clients jira_options = {"server": JIRA_API_BASE} jira_client = JIRA(options=jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) github_client = Github(GITHUB_API_TOKEN) @@ -89,11 +89,11 @@ # Generate candidates for the given author. This should only be called if the given author # name does not represent a full name as this operation is somewhat expensive. Under the -# hood, it makes several calls to the Github and JIRA API servers to find the candidates. +# hood, it makes several calls to the GitHub and JIRA API servers to find the candidates. # # This returns a list of (candidate name, source) 2-tuples. E.g. # [ -# (NOT_FOUND, "No full name found for Github user andrewor14"), +# (NOT_FOUND, "No full name found for GitHub user andrewor14"), # ("Andrew Or", "Full name of JIRA user andrewor14"), # ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"), # ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"), @@ -104,12 +104,12 @@ def generate_candidates(author, issues): candidates = [] - # First check for full name of Github user + # First check for full name of GitHub user github_name = get_github_name(author, github_client) if github_name: - candidates.append((github_name, "Full name of Github user %s" % author)) + candidates.append((github_name, "Full name of GitHub user %s" % author)) else: - candidates.append((NOT_FOUND, "No full name found for Github user %s" % author)) + candidates.append((NOT_FOUND, "No full name found for GitHub user %s" % author)) # Then do the same for JIRA user jira_name = get_jira_name(author, jira_client) if jira_name: @@ -151,7 +151,7 @@ def generate_candidates(author, issues): candidates[i] = (candidate, source) return candidates -# Translate each invalid author by searching for possible candidates from Github and JIRA +# Translate each invalid author by searching for possible candidates from GitHub and JIRA # In interactive mode, this script presents the user with a list of choices and have the user # select from this list. Additionally, the user may also choose to enter a custom name. # In non-interactive mode, this script picks the first valid author name from the candidates @@ -180,12 +180,12 @@ def generate_candidates(author, issues): issues = temp_author.split("/")[1:] candidates = generate_candidates(author, issues) # Print out potential replacement candidates along with the sources, e.g. - # [X] No full name found for Github user andrewor14 + # [X] No full name found for GitHub user andrewor14 # [X] No assignee found for SPARK-1763 # [0] Andrew Or - Full name of JIRA user andrewor14 # [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14 # [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14 - # [3] andrewor14 - Raw Github username + # [3] andrewor14 - Raw GitHub username # [4] Custom candidate_names = [] bad_prompts = [] # Prompts that can't actually be selected; print these first. @@ -207,7 +207,7 @@ def generate_candidates(author, issues): print(p) # In interactive mode, additionally provide "custom" option and await user response if INTERACTIVE_MODE: - print(" [%d] %s - Raw Github username" % (raw_index, author)) + print(" [%d] %s - Raw GitHub username" % (raw_index, author)) print(" [%d] Custom" % custom_index) response = raw_input(" Your choice: ") last_index = custom_index diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py index 9bcebaa22ab86..27451bba905dd 100755 --- a/dev/github_jira_sync.py +++ b/dev/github_jira_sync.py @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# Utility for updating JIRA's with information about Github pull requests +# Utility for updating JIRA's with information about GitHub pull requests import json import os @@ -142,9 +142,9 @@ def reset_pr_labels(pr_num, jira_components): jira_prs = get_jira_prs() previous_max = get_max_pr() -print("Retrieved %s JIRA PR's from Github" % len(jira_prs)) +print("Retrieved %s JIRA PR's from GitHub" % len(jira_prs)) jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max] -print("%s PR's remain after excluding visted ones" % len(jira_prs)) +print("%s PR's remain after excluding visited ones" % len(jira_prs)) num_updates = 0 considered = [] @@ -157,7 +157,7 @@ def reset_pr_labels(pr_num, jira_components): considered = considered + [pr_num] url = pr['html_url'] - title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login']) + title = "[GitHub] Pull Request #%s (%s)" % (pr['number'], pr['user']['login']) try: page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue + "/remotelink")) existing_links = map(lambda l: l['object']['url'], page) @@ -174,7 +174,7 @@ def reset_pr_labels(pr_num, jira_components): destination = {"title": title, "url": url, "icon": icon} # For all possible fields see: # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links - # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"} + # application = {"name": "GitHub pull requests", "type": "org.apache.spark.jira.github"} jira_client.add_remote_link(issue, destination) comment = "User '%s' has created a pull request for this issue:" % pr['user']['login'] diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 610fb1fd27027..4309a74773e89 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -38,7 +38,7 @@ def print_err(msg): def post_message_to_github(msg, ghprb_pull_id): - print("Attempting to post to Github...") + print("Attempting to post to GitHub...") api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark") url = api_url + "/issues/" + ghprb_pull_id + "/comments" @@ -57,12 +57,12 @@ def post_message_to_github(msg, ghprb_pull_id): if response.getcode() == 201: print(" > Post successful.") except HTTPError as http_e: - print_err("Failed to post message to Github.") + print_err("Failed to post message to GitHub.") print_err(" > http_code: %s" % http_e.code) print_err(" > api_response: %s" % http_e.read()) print_err(" > data: %s" % posted_message) except URLError as url_e: - print_err("Failed to post message to Github.") + print_err("Failed to post message to GitHub.") print_err(" > urllib_status: %s" % url_e.reason[1]) print_err(" > data: %s" % posted_message) @@ -89,7 +89,7 @@ def run_pr_checks(pr_tests, ghprb_actual_commit, sha1): """ Executes a set of pull request checks to ease development and report issues with various components such as style, linting, dependencies, compatibilities, etc. - @return a list of messages to post back to Github + @return a list of messages to post back to GitHub """ # Ensure we save off the current HEAD to revert to current_pr_head = run_cmd(['git', 'rev-parse', 'HEAD'], return_output=True).strip() @@ -109,7 +109,7 @@ def run_tests(tests_timeout): """ Runs the `dev/run-tests` script and responds with the correct error message under the various failure scenarios. - @return a tuple containing the test result code and the result note to post to Github + @return a tuple containing the test result code and the result note to post to GitHub """ test_result_code = subprocess.Popen(['timeout', @@ -198,16 +198,16 @@ def main(): # To write a PR test: # * the file must reside within the dev/tests directory # * be an executable bash script - # * accept three arguments on the command line, the first being the Github PR long commit - # hash, the second the Github SHA1 hash, and the final the current PR hash + # * accept three arguments on the command line, the first being the GitHub PR long commit + # hash, the second the GitHub SHA1 hash, and the final the current PR hash # * and, lastly, return string output to be included in the pr message output that will - # be posted to Github + # be posted to GitHub pr_tests = [ "pr_merge_ability", "pr_public_classes" ] - # `bind_message_base` returns a function to generate messages for Github posting + # `bind_message_base` returns a function to generate messages for GitHub posting github_message = functools.partial(pr_message, build_display_name, build_url, diff --git a/dev/run-tests.py b/dev/run-tests.py index 6bc73ca3669f3..37a15a758d898 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -642,7 +642,7 @@ def main(): # /home/jenkins/anaconda2/envs/py36/bin os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH") else: - # else we're running locally or Github Actions. + # else we're running locally or GitHub Actions. build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop3.2") hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") @@ -660,12 +660,12 @@ def main(): included_tags = [] excluded_tags = [] if should_only_test_modules: - # If we're running the tests in Github Actions, attempt to detect and test + # If we're running the tests in GitHub Actions, attempt to detect and test # only the affected modules. if test_env == "github_actions": if os.environ["GITHUB_INPUT_BRANCH"] != "": # Dispatched request - # Note that it assumes Github Actions has already merged + # Note that it assumes GitHub Actions has already merged # the given `GITHUB_INPUT_BRANCH` branch. changed_files = identify_changed_files_from_git_commits( "HEAD", target_branch=os.environ["GITHUB_SHA"]) diff --git a/dev/tests/pr_merge_ability.sh b/dev/tests/pr_merge_ability.sh index 25fdbccac4dd8..a32667730f76c 100755 --- a/dev/tests/pr_merge_ability.sh +++ b/dev/tests/pr_merge_ability.sh @@ -22,7 +22,7 @@ # another branch and returning results to be published. More details can be # found at dev/run-tests-jenkins. # -# Arg1: The Github Pull Request Actual Commit +# Arg1: The GitHub Pull Request Actual Commit # known as `ghprbActualCommit` in `run-tests-jenkins` # Arg2: The SHA1 hash # known as `sha1` in `run-tests-jenkins` diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh index 479d1851fe0b8..ad1ad5e736594 100755 --- a/dev/tests/pr_public_classes.sh +++ b/dev/tests/pr_public_classes.sh @@ -22,7 +22,7 @@ # another branch and returning results to be published. More details can be # found at dev/run-tests-jenkins. # -# Arg1: The Github Pull Request Actual Commit +# Arg1: The GitHub Pull Request Actual Commit # known as `ghprbActualCommit` in `run-tests-jenkins` ghprbActualCommit="$1" diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 98769d951b6ac..5a66bfca27a27 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -1729,7 +1729,7 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy") ) ++ Seq( - // [SPARK-21680][ML][MLLIB]optimzie Vector coompress + // [SPARK-21680][ML][MLLIB]optimize Vector compress ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparseWithSize"), ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparseWithSize") ) ++ Seq( diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 05413b7091ad9..a5951e0452943 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -198,7 +198,7 @@ object SparkBuild extends PomBuild { ) // Silencer: Scala compiler plugin for warning suppression - // Aim: enable fatal warnings, but supress ones related to using of deprecated APIs + // Aim: enable fatal warnings, but suppress ones related to using of deprecated APIs // depends on scala version: // <2.13 - silencer 1.6.0 and compiler settings to enable fatal warnings // 2.13.0,2.13.1 - silencer 1.7.1 and compiler settings to enable fatal warnings @@ -222,7 +222,7 @@ object SparkBuild extends PomBuild { "-Xfatal-warnings", "-deprecation", "-Ywarn-unused-import", - "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them + "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and suppress them ) } else { Seq( @@ -327,7 +327,7 @@ object SparkBuild extends PomBuild { // to be enabled in specific ones that have previous artifacts MimaKeys.mimaFailOnNoPrevious := false, - // To prevent intermittent compliation failures, see also SPARK-33297 + // To prevent intermittent compilation failures, see also SPARK-33297 // Apparently we can remove this when we use JDK 11. Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat ) diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css index 2fd8720e2fa0d..1e493c4c868e6 100644 --- a/python/docs/source/_static/css/pyspark.css +++ b/python/docs/source/_static/css/pyspark.css @@ -51,7 +51,7 @@ h3 { max-width: 80%; } -/* Left pannel size */ +/* Left panel size */ @media (min-width: 768px) { .col-md-3 { flex: 0 0 20%; diff --git a/python/docs/source/_templates/autosummary/class.rst b/python/docs/source/_templates/autosummary/class.rst index d794f797ee2ad..b5f62677ee0ed 100644 --- a/python/docs/source/_templates/autosummary/class.rst +++ b/python/docs/source/_templates/autosummary/class.rst @@ -15,7 +15,7 @@ specific language governing permissions and limitations under the License. -.. Workaround to avoud documenting __init__. +.. Workaround to avoid documenting __init__. {% extends "!autosummary/class.rst" %} diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst index bc141a6f44a6f..829919858f67a 100644 --- a/python/docs/source/development/debugging.rst +++ b/python/docs/source/development/debugging.rst @@ -54,7 +54,7 @@ Enter the name of this new configuration, for example, ``MyRemoteDebugger`` and .. image:: ../../../../docs/img/pyspark-remote-debug1.png :alt: PyCharm remote debugger setting -| After that, you should install the corresponding version of the ``pydevd-pycahrm`` package in all the machines which will connect to your PyCharm debugger. In the previous dialog, it shows the command to install. +| After that, you should install the corresponding version of the ``pydevd-pycharm`` package in all the machines which will connect to your PyCharm debugger. In the previous dialog, it shows the command to install. .. code-block:: text diff --git a/python/docs/source/development/testing.rst b/python/docs/source/development/testing.rst index 08fd730a19f4b..3eab8d04511d6 100644 --- a/python/docs/source/development/testing.rst +++ b/python/docs/source/development/testing.rst @@ -53,5 +53,5 @@ Running tests using GitHub Actions ---------------------------------- You can run the full PySpark tests by using GitHub Actions in your own forked GitHub -repositry with a few clicks. Please refer to +repository with a few clicks. Please refer to `Running tests in your forked repository using GitHub Actions `_ for more details. diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 9c9ff7fa7844b..a90f5fe159553 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -42,7 +42,7 @@ PySpark installation using `PyPI `_ is as fol pip install pyspark -If you want to install extra dependencies for a specific componenet, you can install it as below: +If you want to install extra dependencies for a specific component, you can install it as below: .. code-block:: bash @@ -105,7 +105,7 @@ Now activate the newly created environment with the following command: conda activate pyspark_env You can install pyspark by `Using PyPI <#using-pypi>`_ to install PySpark in the newly created -environment, for example as below. It will install PySpark under the new virtual environemnt +environment, for example as below. It will install PySpark under the new virtual environment ``pyspark_env`` created above. .. code-block:: bash @@ -126,7 +126,7 @@ Manually Downloading -------------------- PySpark is included in the distributions available at the `Apache Spark website `_. -You can download a distribution you want from the site. After that, uncompress the tar file into the directoy where you want +You can download a distribution you want from the site. After that, uncompress the tar file into the directory where you want to install Spark, for example, as below: .. code-block:: bash diff --git a/python/docs/source/getting_started/quickstart.ipynb b/python/docs/source/getting_started/quickstart.ipynb index ab3645591955f..550b532fefc14 100644 --- a/python/docs/source/getting_started/quickstart.ipynb +++ b/python/docs/source/getting_started/quickstart.ipynb @@ -11,7 +11,7 @@ "\n", "There is also other useful information in Apache Spark documentation site, see the latest version of [Spark SQL and DataFrames](https://spark.apache.org/docs/latest/sql-programming-guide.html), [RDD Programming Guide](https://spark.apache.org/docs/latest/rdd-programming-guide.html), [Structured Streaming Programming Guide](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html), [Spark Streaming Programming Guide](https://spark.apache.org/docs/latest/streaming-programming-guide.html) and [Machine Learning Library (MLlib) Guide](https://spark.apache.org/docs/latest/ml-guide.html).\n", "\n", - "PySaprk applications start with initializing `SparkSession` which is the entry point of PySpark as below. In case of running it in PySpark shell via pyspark executable, the shell automatically creates the session in the variable spark for users." + "PySpark applications start with initializing `SparkSession` which is the entry point of PySpark as below. In case of running it in PySpark shell via pyspark executable, the shell automatically creates the session in the variable spark for users." ] }, { @@ -392,7 +392,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`DataFrame.collect()` collects the distributed data to the driver side as the local data in Python. Note that this can throw an out-of-memory error when the dataset is too larget to fit in the driver side because it collects all the data from executors to the driver side." + "`DataFrame.collect()` collects the distributed data to the driver side as the local data in Python. Note that this can throw an out-of-memory error when the dataset is too large to fit in the driver side because it collects all the data from executors to the driver side." ] }, { diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst index 4286f616374c5..6a631052a642d 100644 --- a/python/docs/source/index.rst +++ b/python/docs/source/index.rst @@ -30,7 +30,7 @@ of Spark's features such as Spark SQL, DataFrame, Streaming, MLlib (Machine Learning) and Spark Core. .. image:: ../../../docs/img/pyspark-components.png - :alt: PySpark Compoenents + :alt: PySpark Components **Spark SQL and DataFrame** diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi index 98bd40684c01b..ef07c32b1db7b 100644 --- a/python/pyspark/__init__.pyi +++ b/python/pyspark/__init__.pyi @@ -53,7 +53,7 @@ from pyspark.taskcontext import ( # noqa: F401 ) from pyspark.util import InheritableThread as InheritableThread # noqa: F401 -# Compatiblity imports +# Compatibility imports from pyspark.sql import ( # noqa: F401 SQLContext as SQLContext, HiveContext as HiveContext, diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py index 8e683e7a6988b..58c274bd79720 100644 --- a/python/pyspark/cloudpickle/cloudpickle.py +++ b/python/pyspark/cloudpickle/cloudpickle.py @@ -88,7 +88,7 @@ def g(): DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL # Track the provenance of reconstructed dynamic classes to make it possible to -# recontruct instances from the matching singleton class definition when +# reconstruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() @@ -236,7 +236,7 @@ def _extract_code_globals(co): out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} # Declaring a function inside another one using the "def ..." - # syntax generates a constant code object corresonding to the one + # syntax generates a constant code object corresponding to the one # of the nested function's As the nested function may itself need # global variables, we need to introspect its code, extract its # globals, (look for code object in it's co_consts attribute..) and @@ -457,7 +457,7 @@ def _is_parametrized_type_hint(obj): is_typing = getattr(obj, '__origin__', None) is not None # typing_extensions.Literal - is_litteral = getattr(obj, '__values__', None) is not None + is_literal = getattr(obj, '__values__', None) is not None # typing_extensions.Final is_final = getattr(obj, '__type__', None) is not None @@ -469,7 +469,7 @@ def _is_parametrized_type_hint(obj): getattr(obj, '__result__', None) is not None and getattr(obj, '__args__', None) is not None ) - return any((is_typing, is_litteral, is_final, is_union, is_tuple, + return any((is_typing, is_literal, is_final, is_union, is_tuple, is_callable)) def _create_parametrized_type_hint(origin, args): @@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None): """ # This function is deprecated and should be removed in cloudpickle 1.7 warnings.warn( - "A pickle file created using an old (<=1.4.1) version of cloudpicke " + "A pickle file created using an old (<=1.4.1) version of cloudpickle " "is currently being loaded. This is not supported by cloudpickle and " "will break in cloudpickle 1.7", category=UserWarning ) diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py index e8e46b88fdc91..3c48ff7b0a885 100644 --- a/python/pyspark/cloudpickle/cloudpickle_fast.py +++ b/python/pyspark/cloudpickle/cloudpickle_fast.py @@ -6,7 +6,7 @@ is only available for Python versions 3.8+, a lot of backward-compatibility code is also removed. -Note that the C Pickler sublassing API is CPython-specific. Therefore, some +Note that the C Pickler subclassing API is CPython-specific. Therefore, some guards present in cloudpickle.py that were written to handle PyPy specificities are not present in cloudpickle_fast.py """ @@ -179,7 +179,7 @@ def _class_getstate(obj): clsdict.pop('__weakref__', None) if issubclass(type(obj), abc.ABCMeta): - # If obj is an instance of an ABCMeta subclass, dont pickle the + # If obj is an instance of an ABCMeta subclass, don't pickle the # cache/negative caches populated during isinstance/issubclass # checks, but pickle the list of registered subclasses of obj. clsdict.pop('_abc_cache', None) @@ -407,7 +407,7 @@ def _class_reduce(obj): def _function_setstate(obj, state): - """Update the state of a dynaamic function. + """Update the state of a dynamic function. As __closure__ and __globals__ are readonly attributes of a function, we cannot rely on the native setstate routine of pickle.load_build, that calls @@ -556,7 +556,7 @@ def dump(self, obj): # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler # used `CloudPickler.dispatch` as a class-level attribute storing all # reducers implemented by cloudpickle, but the attribute name was not a - # great choice given the meaning of `Cloudpickler.dispatch` when + # great choice given the meaning of `CloudPickler.dispatch` when # `CloudPickler` extends the pure-python pickler. dispatch = dispatch_table @@ -630,7 +630,7 @@ def reducer_override(self, obj): return self._function_reduce(obj) else: # fallback to save_global, including the Pickler's - # distpatch_table + # dispatch_table return NotImplemented else: diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 1bd5961e0525a..1c542fa897ece 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -260,7 +260,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, sys.path.insert(1, filepath) except Exception: warnings.warn( - "Failed to add file [%s] speficied in 'spark.submit.pyFiles' to " + "Failed to add file [%s] specified in 'spark.submit.pyFiles' to " "Python path:\n %s" % (path, "\n ".join(sys.path)), RuntimeWarning) @@ -603,7 +603,7 @@ def _serialize_to_jvm(self, data, serializer, reader_func, createRDDServer): tempFile.close() return reader_func(tempFile.name) finally: - # we eagerily reads the file so we can delete right after. + # we eagerly reads the file so we can delete right after. os.unlink(tempFile.name) def pickleFile(self, name, minPartitions=None): diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index fe2e326dff8be..cc0c3a8888a66 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -208,7 +208,7 @@ def local_connect_and_auth(port, auth_secret): return (sockfile, sock) except socket.error as e: emsg = str(e) - errors.append("tried to connect to %s, but an error occured: %s" % (sa, emsg)) + errors.append("tried to connect to %s, but an error occurred: %s" % (sa, emsg)) sock.close() sock = None raise Exception("could not open socket: %s" % errors) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 82b9a6db1eb92..8138f34d7a19e 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -5798,7 +5798,7 @@ def setHandleInvalid(self, value): class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): """ Params for :py:class:`VarianceThresholdSelector` and - :py:class:`VarianceThresholdSelectorrModel`. + :py:class:`VarianceThresholdSelectorModel`. .. versionadded:: 3.1.0 """ diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 5ce484d964a5a..d37654a7388f5 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -1491,7 +1491,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol=" maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, - impuriy="variance", featureSubsetStrategy="all", validationTol=0.01, + impurity="variance", featureSubsetStrategy="all", validationTol=0.01, validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0, weightCol=None): """ diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi index b8f1e61859c72..61172305a3726 100644 --- a/python/pyspark/ml/regression.pyi +++ b/python/pyspark/ml/regression.pyi @@ -477,7 +477,7 @@ class GBTRegressor( maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., - impuriy: str = ..., + impurity: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index f8b61b7c57919..50475210607c8 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -116,7 +116,7 @@ def test_output_columns(self): output = model.transform(df) self.assertEqual(output.columns, ["label", "features", "rawPrediction", "prediction"]) - def test_parallelism_doesnt_change_output(self): + def test_parallelism_does_not_change_output(self): df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))], diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py index ceecdae971c99..1001598779d48 100644 --- a/python/pyspark/ml/tests/test_image.py +++ b/python/pyspark/ml/tests/test_image.py @@ -33,7 +33,7 @@ def test_read_images(self): self.assertEqual(df.count(), 4) first_row = df.take(1)[0][0] # compare `schema.simpleString()` instead of directly compare schema, - # because the df loaded from datasouce may change schema column nullability. + # because the df loaded from datasource may change schema column nullability. self.assertEqual(df.schema.simpleString(), ImageSchema.imageSchema.simpleString()) self.assertEqual(df.schema["image"].dataType.simpleString(), ImageSchema.columnSchema.simpleString()) diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index e1a009643c5f2..cfc18c057f0a8 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -927,7 +927,7 @@ def setInitialCenters(self, centers, weights): @since('1.5.0') def setRandomCenters(self, dim, weight, seed): """ - Set the initial centres to be random samples from + Set the initial centers to be random samples from a gaussian population with constant weights. """ rng = random.RandomState(seed) diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 198a9791774a9..2f25c7672a93a 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -457,7 +457,7 @@ def meanAveragePrecision(self): """ Returns the mean average precision (MAP) of all the queries. If a query has an empty ground truth set, the average precision will be zero and - a log warining is generated. + a log warning is generated. """ return self.call("meanAveragePrecision") @@ -466,7 +466,7 @@ def meanAveragePrecisionAt(self, k): """ Returns the mean average precision (MAP) at first k ranking of all the queries. If a query has an empty ground truth set, the average precision will be zero and - a log warining is generated. + a log warning is generated. """ return self.call("meanAveragePrecisionAt", int(k)) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index e549b0ac43721..c224e38473cf6 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -787,7 +787,7 @@ def _validate(self, dstream): "dstream should be a DStream object, got %s" % type(dstream)) if not self._model: raise ValueError( - "Model must be intialized using setInitialWeights") + "Model must be initialized using setInitialWeights") def predictOn(self, dstream): """ diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index a4b45cf55febe..d8f3cb840e45c 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -178,7 +178,7 @@ def chiSqTest(observed, expected=None): """ If `observed` is Vector, conduct Pearson's chi-squared goodness of fit test of the observed data against the expected distribution, - or againt the uniform distribution (by default), with each category + or against the uniform distribution (by default), with each category having an expected frequency of `1 / len(observed)`. If `observed` is matrix, conduct Pearson's independence test on the diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py index b94fb2778d88d..f6c6779e83f13 100644 --- a/python/pyspark/mllib/tests/test_streaming_algorithms.py +++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py @@ -189,7 +189,7 @@ def generateLogisticInput(offset, scale, nPoints, seed): Generate 1 / (1 + exp(-x * scale + offset)) where, - x is randomnly distributed and the threshold + x is randomly distributed and the threshold and labels for each sample in x is obtained from a random uniform distribution. """ diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 1964070040cdf..34faaacff5eb3 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1253,7 +1253,7 @@ def histogram(self, buckets): and 50 we would have a histogram of 1,0,1. If your histogram is evenly spaced (e.g. [0, 10, 20, 30]), - this can be switched from an O(log n) inseration to O(1) per + this can be switched from an O(log n) insertion to O(1) per element (where n is the number of buckets). Buckets must be sorted, not contain any duplicates, and have @@ -2292,7 +2292,7 @@ def groupWith(self, other, *others): """ return python_cogroup((self, other) + others, numPartitions=None) - # TODO: add variant with custom parittioner + # TODO: add variant with custom partitioner def cogroup(self, other, numPartitions=None): """ For each key k in `self` or `other`, return a resulting RDD that diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py index 74d26d04312c4..4deb22b5948f0 100644 --- a/python/pyspark/resource/requests.py +++ b/python/pyspark/resource/requests.py @@ -189,7 +189,7 @@ def requests(self): class TaskResourceRequest(object): """ - A task resource request. This is used in conjuntion with the + A task resource request. This is used in conjunction with the :class:`pyspark.resource.ResourceProfile` to programmatically specify the resources needed for an RDD that will be applied at the stage level. The amount is specified as a Double to allow for saying you want more than 1 task per resource. Valid values @@ -226,7 +226,7 @@ def amount(self): class TaskResourceRequests(object): """ - A set of task resource requests. This is used in conjuntion with the + A set of task resource requests. This is used in conjunction with the :class:`pyspark.resource.ResourceProfileBuilder` to programmatically specify the resources needed for an RDD that will be applied at the stage level. diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 89be6295f9888..4ba846227188c 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -418,7 +418,7 @@ def _cleanup(self): class ExternalSorter(object): """ - ExtenalSorter will divide the elements into chunks, sort them in + ExternalSorter will divide the elements into chunks, sort them in memory and dump them into disks, finally merge them back. The spilling will only happen when the used memory goes above diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 345e81bd2d73e..760805400aca9 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -425,7 +425,7 @@ def dropFields(self, *fieldNames): +--------------+ However, if you are going to add/replace multiple nested fields, - it is preffered to extract out the nested struct before + it is preferred to extract out the nested struct before adding/replacing multiple fields e.g. >>> df.select(col("a").withField( diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 9fae27a2d9c6c..fe7d26d1bcfd2 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1497,7 +1497,7 @@ def summary(self, *statistics): - stddev - min - max - - arbitrary approximate percentiles specified as a percentage (eg, 75%) + - arbitrary approximate percentiles specified as a percentage (e.g., 75%) If no statistics are given, this function computes count, mean, stddev, min, approximate quartiles (percentiles at 25%, 50%, and 75%), and max. diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index ea91e8593e21f..4dc3129fd6bc2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1300,7 +1300,7 @@ def spark_partition_id(): Notes ----- - This is indeterministic because it depends on data partitioning and task scheduling. + This is non deterministic because it depends on data partitioning and task scheduling. Examples -------- @@ -4110,7 +4110,7 @@ def _get_lambda_parameters(f): # We should exclude functions that use # variable args and keyword argnames # as well as keyword only args - supported_parmeter_types = { + supported_parameter_types = { inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.POSITIONAL_ONLY, } @@ -4125,7 +4125,7 @@ def _get_lambda_parameters(f): ) # and all arguments can be used as positional - if not all(p.kind in supported_parmeter_types for p in parameters): + if not all(p.kind in supported_parameter_types for p in parameters): raise ValueError( "f should use only POSITIONAL or POSITIONAL OR KEYWORD arguments" ) @@ -4640,7 +4640,7 @@ def years(col): Notes ----- - This function can be used only in combinatiion with + This function can be used only in combination with :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` method of the `DataFrameWriterV2`. @@ -4664,7 +4664,7 @@ def months(col): Notes ----- - This function can be used only in combinatiion with + This function can be used only in combination with :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` method of the `DataFrameWriterV2`. @@ -4688,7 +4688,7 @@ def days(col): Notes ----- - This function can be used only in combinatiion with + This function can be used only in combination with :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` method of the `DataFrameWriterV2`. @@ -4712,7 +4712,7 @@ def hours(col): Notes ----- - This function can be used only in combinatiion with + This function can be used only in combination with :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy` method of the `DataFrameWriterV2`. diff --git a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi index de679ee2cd017..9148e7a2dca8e 100644 --- a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +++ b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -# This Protocol resuses core Pandas annotation. +# This Protocol reuses core Pandas annotation. # Overall pipeline looks as follows # - Stubgen pandas.core.frame # - Add Protocol as a base class diff --git a/python/pyspark/sql/pandas/_typing/protocols/series.pyi b/python/pyspark/sql/pandas/_typing/protocols/series.pyi index 14babb067da0d..f2de2e8b129fd 100644 --- a/python/pyspark/sql/pandas/_typing/protocols/series.pyi +++ b/python/pyspark/sql/pandas/_typing/protocols/series.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -# This Protocol resuses core Pandas annotation. +# This Protocol reuses core Pandas annotation. # Overall pipeline looks as follows # - Stubgen pandas.core.series # - Add Protocol as a base class diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py index 750aa4b0e6c56..4cd0b196d3366 100644 --- a/python/pyspark/sql/pandas/functions.py +++ b/python/pyspark/sql/pandas/functions.py @@ -99,7 +99,7 @@ def pandas_udf(f=None, returnType=None, functionType=None): ... s3['col2'] = s1 + s2.str.len() ... return s3 ... - >>> # Create a Spark DataFrame that has three columns including a sturct column. + >>> # Create a Spark DataFrame that has three columns including a struct column. ... df = spark.createDataFrame( ... [[1, "a string", ("a nested string",)]], ... "long_col long, string_col string, struct_col struct") @@ -114,7 +114,7 @@ def pandas_udf(f=None, returnType=None, functionType=None): | |-- col1: string (nullable = true) | |-- col2: long (nullable = true) - In the following sections, it describes the cominations of the supported type hints. For + In the following sections, it describes the combinations of the supported type hints. For simplicity, `pandas.DataFrame` variant is omitted. * Series to Series diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index ee68b95fc478d..a639a8d51f55c 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -484,7 +484,7 @@ def dummy_pandas_udf(df): col('temp0.key') == col('temp1.key')) self.assertEquals(res.count(), 5) - def test_mixed_scalar_udfs_followed_by_grouby_apply(self): + def test_mixed_scalar_udfs_followed_by_groupby_apply(self): df = self.spark.range(0, 10).toDF('v1') df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \ .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1'])) diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index a7dcbfd32ac1c..9a1c0edcce4ed 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -459,7 +459,7 @@ def test_udf_with_string_return_type(self): self.assertTupleEqual(expected, actual) - def test_udf_shouldnt_accept_noncallable_object(self): + def test_udf_should_not_accept_noncallable_object(self): non_callable = None self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType()) @@ -683,7 +683,7 @@ def tearDown(self): if SparkContext._active_spark_context is not None: SparkContext._active_spark_context.stop() - def test_udf_init_shouldnt_initialize_context(self): + def test_udf_init_should_not_initialize_context(self): UserDefinedFunction(lambda x: x, StringType()) self.assertIsNone( diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index 18f8ba29f95a2..f5db783d2b5bc 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -151,10 +151,10 @@ def toJArray(gateway, jtype, arr): arr : python type list """ - jarr = gateway.new_array(jtype, len(arr)) + jarray = gateway.new_array(jtype, len(arr)) for i in range(0, len(arr)): - jarr[i] = arr[i] - return jarr + jarray[i] = arr[i] + return jarray def require_test_compiled(): diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py index c4dc0d3af3332..2e6d7ede88551 100644 --- a/python/pyspark/streaming/context.py +++ b/python/pyspark/streaming/context.py @@ -281,7 +281,7 @@ def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_ def textFileStream(self, directory): """ Create an input stream that monitors a Hadoop-compatible file system - for new files and reads them as text files. Files must be wrriten to the + for new files and reads them as text files. Files must be written to the monitored directory by "moving" them from another location within the same file system. File names starting with . are ignored. The text files must be encoded as UTF-8. diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py index d86f6c3c1571c..8397ef1c4b62d 100644 --- a/python/pyspark/tests/test_context.py +++ b/python/pyspark/tests/test_context.py @@ -175,8 +175,8 @@ def test_parallelize_eager_cleanup(self): with SparkContext() as sc: temp_files = os.listdir(sc._temp_dir) rdd = sc.parallelize([0, 1, 2]) - post_parallalize_temp_files = os.listdir(sc._temp_dir) - self.assertEqual(temp_files, post_parallalize_temp_files) + post_parallelize_temp_files = os.listdir(sc._temp_dir) + self.assertEqual(temp_files, post_parallelize_temp_files) def test_set_conf(self): # This is for an internal use case. When there is an existing SparkContext, diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 1b09d327a5dfe..8ca4bb37e5fa4 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -59,7 +59,7 @@ def report_times(outfile, boot, init, finish): def add_path(path): - # worker can be used, so donot add path multiple times + # worker can be used, so do not add path multiple times if path not in sys.path: # overwrite system packages sys.path.insert(1, path) diff --git a/python/test_support/userlibrary.py b/python/test_support/userlibrary.py index 73fd26e71f10d..90cd30723ddfe 100755 --- a/python/test_support/userlibrary.py +++ b/python/test_support/userlibrary.py @@ -16,7 +16,7 @@ # """ -Used to test shipping of code depenencies with SparkContext.addPyFile(). +Used to test shipping of code dependencies with SparkContext.addPyFile(). """ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index e3af1ccc24f1c..41194f3a2676f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -420,7 +420,7 @@ private[spark] object Config extends Logging { val KUBERNETES_FILE_UPLOAD_PATH = ConfigBuilder("spark.kubernetes.file.upload.path") .doc("Hadoop compatible file system path where files from the local file system " + - "will be uploded to in cluster mode.") + "will be uploaded to in cluster mode.") .version("3.0.0") .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala index 3f2cb485bbb31..22764d9d2eb0e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala @@ -52,7 +52,7 @@ import org.apache.spark.util.ThreadUtils * time-windowed chunks. Each subscriber can choose to receive their snapshot chunks at different * time intervals. *
      - * The subcriber notification callback is guaranteed to be called from a single thread at a time. + * The subscriber notification callback is guaranteed to be called from a single thread at a time. */ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: ScheduledExecutorService) extends ExecutorPodsSnapshotsStore with Logging { @@ -142,7 +142,7 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul } if (notificationCount.decrementAndGet() > 0) { - // There was another concurrent request for this subcriber. Schedule a task to + // There was another concurrent request for this subscriber. Schedule a task to // immediately process snapshots again, so that the subscriber can pick up any // changes that may have happened between the time it started looking at snapshots // above, and the time the concurrent request arrived. diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala index 349cbd04f6027..156740d7c8aee 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala @@ -49,14 +49,14 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite { val sparkConf = new SparkConf(false) sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path") sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true") - sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimeName") + sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName") val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head assert(volumeSpec.volumeName === "volumeName") assert(volumeSpec.mountPath === "/path") assert(volumeSpec.mountReadOnly) assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] === - KubernetesPVCVolumeConf("claimeName")) + KubernetesPVCVolumeConf("claimName")) } test("Parses emptyDir volumes correctly") { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala index 95ee37e3daa41..38f8fac1858f1 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala @@ -42,7 +42,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite { assert(configuredPod.container.getVolumeMounts.get(0).getReadOnly === false) } - test("Mounts pesistentVolumeClaims") { + test("Mounts persistentVolumeClaims") { val volumeConf = KubernetesVolumeSpec( "testVolume", "/tmp", diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala index bd42f6f05655f..5927af176062d 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala @@ -230,7 +230,7 @@ package object config { ConfigBuilder("spark.mesos.appJar.local.resolution.mode") .doc("Provides support for the `local:///` scheme to reference the app jar resource in " + "cluster mode. If user uses a local resource (`local:///path/to/jar`) and the config " + - "option is not used it defaults to `host` eg. the mesos fetcher tries to get the " + + "option is not used it defaults to `host` e.g. the mesos fetcher tries to get the " + "resource from the host's file system. If the value is unknown it prints a warning msg " + "in the dispatcher logs and defaults to `host`. If the value is `container` then spark " + "submit in the container will use the jar in the container's path: `/path/to/jar`.") diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 4620bdb005094..8dbb70b616df1 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -356,7 +356,7 @@ trait MesosSchedulerUtils extends Logging { * https://github.com/apache/mesos/blob/master/src/common/values.cpp * https://github.com/apache/mesos/blob/master/src/common/attributes.cpp * - * @param constraintsVal constains string consisting of ';' separated key-value pairs (separated + * @param constraintsVal contains string consisting of ';' separated key-value pairs (separated * by ':') * @return Map of constraints to match resources offers. */ diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 57af76b46fe64..ac50c1c77a24e 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -781,7 +781,7 @@ private[yarn] class YarnAllocator( val (exitCausedByApp, containerExitReason) = exitStatus match { case ContainerExitStatus.SUCCESS => (false, s"Executor for container $containerId exited because of a YARN event (e.g., " + - "pre-emption) and not because of an error in the running job.") + "preemption) and not because of an error in the running job.") case ContainerExitStatus.PREEMPTED => // Preemption is not the fault of the running tasks, since YARN preempts containers // merely to do resource sharing, and tasks that fail due to preempted executors could diff --git a/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java index df0ebcc9871ac..89e012ecd42e1 100644 --- a/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java +++ b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java @@ -112,7 +112,7 @@ public static int waitForPort(int port, int retries) * The ports are all closed afterwards, * so other network services started may grab those same ports. * - * @param numPorts number of required port nubmers + * @param numPorts number of required port numbers * @return array of available port numbers * @throws IOException */ diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala index c2bdd971a0fe9..188a48509212d 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala @@ -250,7 +250,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd ShuffleTestAccessor.getExecutorInfo(app2Id, "exec-2", resolver2) should be (Some(shuffleInfo2)) s2.stop() - // another stop & restart should be fine though (eg., we recover from previous corruption) + // another stop & restart should be fine though (e.g., we recover from previous corruption) s3 = new YarnShuffleService s3.setRecoveryPath(new Path(recoveryLocalDir.toURI)) s3.init(yarnConfig) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala index 570663c6f6ad3..7a8e3f1d2ccf4 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala @@ -163,7 +163,7 @@ private[python] object PythonTransformFunctionSerializer { private[streaming] object PythonDStream { /** - * can not access PythonTransformFunctionSerializer.register() via Py4j + * cannot access PythonTransformFunctionSerializer.register() via Py4j * Py4JError: PythonTransformFunctionSerializerregister does not exist in the JVM */ def registerSerializer(ser: PythonTransformFunctionSerializer): Unit = { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala index e037f26088347..ca4f3670d5ad7 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala @@ -960,7 +960,7 @@ object DStream { /** Get the creation site of a DStream from the stack trace of when the DStream is created. */ private[streaming] def getCreationSite(): CallSite = { /** Filtering function that excludes non-user classes for a streaming application */ - def streamingExclustionFunction(className: String): Boolean = { + def streamingExclusionFunction(className: String): Boolean = { def doesMatch(r: Regex): Boolean = r.findFirstIn(className).isDefined val isSparkClass = doesMatch(SPARK_CLASS_REGEX) val isSparkExampleClass = doesMatch(SPARK_EXAMPLES_CLASS_REGEX) @@ -972,6 +972,6 @@ object DStream { // non-Spark and non-Scala class, as the rest would streaming application classes. (isSparkClass || isScalaClass) && !isSparkExampleClass && !isSparkStreamingTestClass } - org.apache.spark.util.Utils.getCallSite(streamingExclustionFunction) + org.apache.spark.util.Utils.getCallSite(streamingExclusionFunction) } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala index 006bcad5d68c2..ef040681adf37 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala @@ -39,7 +39,7 @@ private[streaming] object HdfsUtils { throw new IllegalStateException("File exists and there is no append support!") } } else { - // we dont' want to use hdfs erasure coding, as that lacks support for append and hflush + // we don't want to use hdfs erasure coding, as that lacks support for append and hflush SparkHadoopUtil.createFile(dfs, dfsPath, false) } } diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java index c7cde5674f547..8a57b0c58b228 100644 --- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java @@ -1595,7 +1595,7 @@ public void testContextGetOrCreate() throws InterruptedException { /* TEST DISABLED: Pending a discussion about checkpoint() semantics with TD @SuppressWarnings("unchecked") @Test - public void testCheckpointofIndividualStream() throws InterruptedException { + public void testCheckpointOfIndividualStream() throws InterruptedException { List> inputData = Arrays.asList( Arrays.asList("this", "is"), Arrays.asList("a", "test"), diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala index b2b8d2f41fc80..3ffaa62bd75ac 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala @@ -541,12 +541,12 @@ class MapWithStateSuite extends SparkFunSuite with LocalStreamingContext // Setup the stream computation val ssc = new StreamingContext(sc, Seconds(1)) val inputStream = new TestInputStream(ssc, input, numPartitions = 2) - val trackeStateStream = inputStream.map(x => (x, 1)).mapWithState(mapWithStateSpec) + val trackedStateStream = inputStream.map(x => (x, 1)).mapWithState(mapWithStateSpec) val collectedOutputs = new ConcurrentLinkedQueue[Seq[T]] - val outputStream = new TestOutputStream(trackeStateStream, collectedOutputs) + val outputStream = new TestOutputStream(trackedStateStream, collectedOutputs) val collectedStateSnapshots = new ConcurrentLinkedQueue[Seq[(K, S)]] val stateSnapshotStream = new TestOutputStream( - trackeStateStream.stateSnapshots(), collectedStateSnapshots) + trackedStateStream.stateSnapshots(), collectedStateSnapshots) outputStream.register() stateSnapshotStream.register() diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala index 58ce3a93251a9..f06b1feb8c0cd 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala @@ -320,7 +320,7 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B makeStateRDDWithLongLineageDataRDD, reliableCheckpoint = true, rddCollectFunc _) /** Generate MapWithStateRDD with parent state RDD having a long lineage */ - def makeStateRDDWithLongLineageParenttateRDD( + def makeStateRDDWithLongLineageParentStateRDD( longLineageRDD: RDD[Int]): MapWithStateRDD[Int, Int, Int, Int] = { // Create a MapWithStateRDD that has a long lineage using the data RDD with a long lineage @@ -337,9 +337,9 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B } testRDD( - makeStateRDDWithLongLineageParenttateRDD, reliableCheckpoint = true, rddCollectFunc _) + makeStateRDDWithLongLineageParentStateRDD, reliableCheckpoint = true, rddCollectFunc _) testRDDPartitions( - makeStateRDDWithLongLineageParenttateRDD, reliableCheckpoint = true, rddCollectFunc _) + makeStateRDDWithLongLineageParentStateRDD, reliableCheckpoint = true, rddCollectFunc _) } test("checkpointing empty state RDD") { From cf98a761de677c733f3c33230e1c63ddb785d5c5 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 28 Nov 2020 23:38:11 +0900 Subject: [PATCH 0590/1009] [SPARK-33570][SQL][TESTS] Set the proper version of gssapi plugin automatically for MariaDBKrbIntegrationSuite ### What changes were proposed in this pull request? This PR changes mariadb_docker_entrypoint.sh to set the proper version automatically for mariadb-plugin-gssapi-server. The proper version is based on the one of mariadb-server. Also, this PR enables to use arbitrary docker image by setting the environment variable `MARIADB_CONTAINER_IMAGE_NAME`. ### Why are the changes needed? For `MariaDBKrbIntegrationSuite`, the version of `mariadb-plugin-gssapi-server` is currently set to `10.5.5` in `mariadb_docker_entrypoint.sh` but it's no longer available in the official apt repository and `MariaDBKrbIntegrationSuite` doesn't pass for now. It seems that only the most recent three versions are available for each major version and they are `10.5.6`, `10.5.7` and `10.5.8` for now. Further, the release cycle of MariaDB seems to be very rapid (1 ~ 2 months) so I don't think it's a good idea to set to an specific version for `mariadb-plugin-gssapi-server`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Confirmed that `MariaDBKrbIntegrationSuite` passes with the following commands. ``` $ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite" ``` In this case, we can see what version of `mariadb-plugin-gssapi-server` is going to be installed in the following container log message. ``` Installing mariadb-plugin-gssapi-server=1:10.5.8+maria~focal ``` Or, we can set MARIADB_CONTAINER_IMAGE_NAME for a specific version of MariaDB. ``` $ MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.6 build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite" ``` ``` Installing mariadb-plugin-gssapi-server=1:10.5.6+maria~focal ``` Closes #30515 from sarutak/fix-MariaDBKrbIntegrationSuite. Authored-by: Kousuke Saruta Signed-off-by: Takeshi Yamamuro --- .../src/test/resources/mariadb_docker_entrypoint.sh | 4 +++- .../spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh index 97c00a9d81b76..ab7d967a927d0 100755 --- a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh +++ b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh @@ -18,7 +18,9 @@ dpkg-divert --add /bin/systemctl && ln -sT /bin/true /bin/systemctl apt update -apt install -y mariadb-plugin-gssapi-server=1:10.5.5+maria~focal +GSSAPI_PLUGIN=mariadb-plugin-gssapi-server=$(dpkg -s mariadb-server | sed -n "s/^Version: \(.*\)/\1/p") +echo "Installing $GSSAPI_PLUGIN" +apt install -y "$GSSAPI_PLUGIN" echo "gssapi_keytab_path=/docker-entrypoint-initdb.d/mariadb.keytab" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf echo "gssapi_principal_name=mariadb/__IP_ADDRESS_REPLACE_ME__@EXAMPLE.COM" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf docker-entrypoint.sh mysqld diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala index adee2bebe41ce..59a6f530afd7e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala @@ -24,15 +24,21 @@ import com.spotify.docker.client.messages.{ContainerConfig, HostConfig} import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider import org.apache.spark.tags.DockerTest +/** + * To run this test suite for a specific version (e.g., mariadb:10.5.8): + * {{{ + * MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8 + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite" + * }}} + */ @DockerTest class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { override protected val userName = s"mariadb/$dockerIp" override protected val keytabFileName = "mariadb.keytab" override val db = new DatabaseOnDocker { - // If you change `imageName`, you need to update the version of `mariadb-plugin-gssapi-server` - // in `resources/mariadb_docker_entrypoint.sh` accordingly. - override val imageName = "mariadb:10.5" + override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.5") override val env = Map( "MYSQL_ROOT_PASSWORD" -> "rootpass" ) From 3650a6bd97b9cecf382f96a55a97ff56b75471cd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 28 Nov 2020 12:47:47 -0800 Subject: [PATCH 0591/1009] [SPARK-33580][CORE] resolveDependencyPaths should use classifier attribute of artifact ### What changes were proposed in this pull request? This patch proposes to use classifier attribute to construct artifact path instead of type. ### Why are the changes needed? `resolveDependencyPaths` now takes artifact type to decide to add "-tests" postfix. However, the path pattern of ivy in `resolveMavenCoordinates` is `[organization]_[artifact][revision](-[classifier]).[ext]`. We should use classifier instead of type to construct file path. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Manual test. Closes #30524 from viirya/SPARK-33580. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/deploy/SparkSubmit.scala | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 4b17661496808..7332c6d54c981 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1186,12 +1186,16 @@ private[spark] object SparkSubmitUtils { def resolveDependencyPaths( artifacts: Array[AnyRef], cacheDirectory: File): String = { - artifacts.map { ai => - val artifactInfo = ai.asInstanceOf[Artifact] - val artifact = artifactInfo.getModuleRevisionId - val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else "" + artifacts.map { artifactInfo => + val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId + val extraAttrs = artifactInfo.asInstanceOf[Artifact].getExtraAttributes + val classifier = if (extraAttrs.containsKey("classifier")) { + "-" + extraAttrs.get("classifier") + } else { + "" + } cacheDirectory.getAbsolutePath + File.separator + - s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar" + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar" }.mkString(",") } From bfe9380ba2bc9762ccfaa36d3ed938867c143876 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sat, 28 Nov 2020 16:58:40 -0800 Subject: [PATCH 0592/1009] [MINOR][SQL] Remove `getTables()` from `r.SQLUtils` ### What changes were proposed in this pull request? Remove the unused method `getTables()` from `r.SQLUtils`. The method was used before the changes https://github.com/apache/spark/pull/17483 but R's `tables.default` was rewritten using `listTables()`: https://github.com/apache/spark/pull/17483/files#diff-2c01472a7bcb1d318244afcd621d726e00d36cd15dffe7e44fa96c54fce4cd9aR220-R223 ### Why are the changes needed? To improve code maintenance, and remove the dead code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By R tests. Closes #30527 from MaxGekk/remove-getTables-in-r-SQLUtils. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/api/r/SQLUtils.scala | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 693be99d47495..1d1358487abcb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -33,7 +33,6 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.arrow.ArrowConverters -import org.apache.spark.sql.execution.command.ShowTablesCommand import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.types._ @@ -216,15 +215,6 @@ private[sql] object SQLUtils extends Logging { } } - def getTables(sparkSession: SparkSession, databaseName: String): DataFrame = { - databaseName match { - case n: String if n != null && n.trim.nonEmpty => - Dataset.ofRows(sparkSession, ShowTablesCommand(Some(n), None)) - case _ => - Dataset.ofRows(sparkSession, ShowTablesCommand(None, None)) - } - } - def getTableNames(sparkSession: SparkSession, databaseName: String): Array[String] = { val db = databaseName match { case _ if databaseName != null && databaseName.trim.nonEmpty => From ba178f852f8e4b11a243d907ac204b30a60369b5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 29 Nov 2020 09:36:55 +0800 Subject: [PATCH 0593/1009] [SPARK-33581][SQL][TEST] Refactor HivePartitionFilteringSuite ### What changes were proposed in this pull request? This pr refactor HivePartitionFilteringSuite. ### Why are the changes needed? To make it easy to maintain. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #30525 from wangyum/SPARK-33581. Authored-by: Yuming Wang Signed-off-by: Yuming Wang --- .../client/HivePartitionFilteringSuite.scala | 291 +++++++++++------- 1 file changed, 177 insertions(+), 114 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index ab83f751f1425..e07fbc29ee8aa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -39,7 +39,13 @@ class HivePartitionFilteringSuite(version: String) private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname - private val testPartitionCount = 3 * 5 * 4 + private val dsValue = 20170101 to 20170103 + private val hValue = 0 to 4 + private val chunkValue = Seq("aa", "ab", "ba", "bb") + private val dateValue = Seq("2019-01-01", "2019-01-02", "2019-01-03") + private val dateStrValue = Seq("2020-01-01", "2020-01-02", "2020-01-03") + private val testPartitionCount = + dsValue.size * hValue.size * chunkValue.size * dateValue.size * dateStrValue.size private val storageFormat = CatalogStorageFormat( locationUri = None, @@ -57,23 +63,28 @@ class HivePartitionFilteringSuite(version: String) val client = buildClient(hadoopConf) val tableSchema = new StructType().add("value", "int").add("ds", "int").add("h", "int").add("chunk", "string") + .add("d", "date").add("datestr", "string") val table = CatalogTable( identifier = TableIdentifier("test", Some("default")), tableType = CatalogTableType.MANAGED, schema = tableSchema, - partitionColumnNames = Seq("ds", "h", "chunk"), + partitionColumnNames = Seq("ds", "h", "chunk", "d", "datestr"), storage = storageFormat) client.createTable(table, ignoreIfExists = false) val partitions = for { - ds <- 20170101 to 20170103 - h <- 0 to 4 - chunk <- Seq("aa", "ab", "ba", "bb") + ds <- dsValue + h <- hValue + chunk <- chunkValue + date <- dateValue + dateStr <- dateStrValue } yield CatalogTablePartition(Map( "ds" -> ds.toString, "h" -> h.toString, - "chunk" -> chunk + "chunk" -> chunk, + "d" -> date, + "datestr" -> dateStr ), storageFormat) assert(partitions.size == testPartitionCount) @@ -108,17 +119,21 @@ class HivePartitionFilteringSuite(version: String) // Should return all partitions where <=> is not supported testMetastorePartitionFiltering( attr("ds") <=> 20170101, - 20170101 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + dsValue, + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds=20170101") { testMetastorePartitionFiltering( attr("ds") === 20170101, 20170101 to 20170101, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds=(20170101 + 1) and h=0") { @@ -126,41 +141,51 @@ class HivePartitionFilteringSuite(version: String) // comparisons to non-literal values testMetastorePartitionFiltering( attr("ds") === (Literal(20170101) + 1) && attr("h") === 0, - 20170101 to 20170103, + dsValue, 0 to 0, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: chunk='aa'") { testMetastorePartitionFiltering( attr("chunk") === "aa", - 20170101 to 20170103, - 0 to 4, - "aa" :: Nil) + dsValue, + hValue, + "aa" :: Nil, + dateValue, + dateStrValue) } test("getPartitionsByFilter: cast(chunk as int)=1 (not a valid partition predicate)") { testMetastorePartitionFiltering( attr("chunk").cast(IntegerType) === 1, - 20170101 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + dsValue, + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: cast(chunk as boolean)=true (not a valid partition predicate)") { testMetastorePartitionFiltering( attr("chunk").cast(BooleanType) === true, - 20170101 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + dsValue, + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: 20170101=ds") { testMetastorePartitionFiltering( Literal(20170101) === attr("ds"), 20170101 to 20170101, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds=20170101 and h=2") { @@ -168,7 +193,9 @@ class HivePartitionFilteringSuite(version: String) attr("ds") === 20170101 && attr("h") === 2, 20170101 to 20170101, 2 to 2, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: cast(ds as long)=20170101L and h=2") { @@ -176,39 +203,49 @@ class HivePartitionFilteringSuite(version: String) attr("ds").cast(LongType) === 20170101L && attr("h") === 2, 20170101 to 20170101, 2 to 2, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds=20170101 or ds=20170102") { testMetastorePartitionFiltering( attr("ds") === 20170101 || attr("ds") === 20170102, 20170101 to 20170102, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds in (20170102, 20170103) (using IN expression)") { testMetastorePartitionFiltering( attr("ds").in(20170102, 20170103), 20170102 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: cast(ds as long) in (20170102L, 20170103L) (using IN expression)") { testMetastorePartitionFiltering( attr("ds").cast(LongType).in(20170102L, 20170103L), 20170102 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil) + hValue, + chunkValue, + dateValue, + dateStrValue) } test("getPartitionsByFilter: ds in (20170102, 20170103) (using INSET expression)") { testMetastorePartitionFiltering( attr("ds").in(20170102, 20170103), 20170102 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil, { + hValue, + chunkValue, + dateValue, + dateStrValue, { case expr @ In(v, list) if expr.inSetConvertible => InSet(v, list.map(_.eval(EmptyRow)).toSet) }) @@ -219,8 +256,10 @@ class HivePartitionFilteringSuite(version: String) testMetastorePartitionFiltering( attr("ds").cast(LongType).in(20170102L, 20170103L), 20170102 to 20170103, - 0 to 4, - "aa" :: "ab" :: "ba" :: "bb" :: Nil, { + hValue, + chunkValue, + dateValue, + dateStrValue, { case expr @ In(v, list) if expr.inSetConvertible => InSet(v, list.map(_.eval(EmptyRow)).toSet) }) @@ -229,41 +268,45 @@ class HivePartitionFilteringSuite(version: String) test("getPartitionsByFilter: chunk in ('ab', 'ba') (using IN expression)") { testMetastorePartitionFiltering( attr("chunk").in("ab", "ba"), - 20170101 to 20170103, - 0 to 4, - "ab" :: "ba" :: Nil) + dsValue, + hValue, + "ab" :: "ba" :: Nil, + dateValue, + dateStrValue) } test("getPartitionsByFilter: chunk in ('ab', 'ba') (using INSET expression)") { testMetastorePartitionFiltering( attr("chunk").in("ab", "ba"), - 20170101 to 20170103, - 0 to 4, - "ab" :: "ba" :: Nil, { + dsValue, + hValue, + "ab" :: "ba" :: Nil, + dateValue, + dateStrValue, { case expr @ In(v, list) if expr.inSetConvertible => InSet(v, list.map(_.eval(EmptyRow)).toSet) }) } test("getPartitionsByFilter: (ds=20170101 and h>=2) or (ds=20170102 and h<2)") { - val day1 = (20170101 to 20170101, 2 to 4, Seq("aa", "ab", "ba", "bb")) - val day2 = (20170102 to 20170102, 0 to 1, Seq("aa", "ab", "ba", "bb")) + val day1 = (20170101 to 20170101, 2 to 4, chunkValue, dateValue, dateStrValue) + val day2 = (20170102 to 20170102, 0 to 1, chunkValue, dateValue, dateStrValue) testMetastorePartitionFiltering((attr("ds") === 20170101 && attr("h") >= 2) || (attr("ds") === 20170102 && attr("h") < 2), day1 :: day2 :: Nil) } test("getPartitionsByFilter: (ds=20170101 and h>=2) or (ds=20170102 and h<(1+1))") { - val day1 = (20170101 to 20170101, 2 to 4, Seq("aa", "ab", "ba", "bb")) + val day1 = (20170101 to 20170101, 2 to 4, chunkValue, dateValue, dateStrValue) // Day 2 should include all hours because we can't build a filter for h<(7+1) - val day2 = (20170102 to 20170102, 0 to 4, Seq("aa", "ab", "ba", "bb")) + val day2 = (20170102 to 20170102, 0 to 4, chunkValue, dateValue, dateStrValue) testMetastorePartitionFiltering((attr("ds") === 20170101 && attr("h") >= 2) || (attr("ds") === 20170102 && attr("h") < (Literal(1) + 1)), day1 :: day2 :: Nil) } test("getPartitionsByFilter: " + "chunk in ('ab', 'ba') and ((ds=20170101 and h>=2) or (ds=20170102 and h<2))") { - val day1 = (20170101 to 20170101, 2 to 4, Seq("ab", "ba")) - val day2 = (20170102 to 20170102, 0 to 1, Seq("ab", "ba")) + val day1 = (20170101 to 20170101, 2 to 4, Seq("ab", "ba"), dateValue, dateStrValue) + val day2 = (20170102 to 20170102, 0 to 1, Seq("ab", "ba"), dateValue, dateStrValue) testMetastorePartitionFiltering(attr("chunk").in("ab", "ba") && ((attr("ds") === 20170101 && attr("h") >= 2) || (attr("ds") === 20170102 && attr("h") < 2)), day1 :: day2 :: Nil) @@ -272,93 +315,105 @@ class HivePartitionFilteringSuite(version: String) test("getPartitionsByFilter: chunk contains bb") { testMetastorePartitionFiltering( attr("chunk").contains("bb"), - (20170101 to 20170103, 0 to 4, Seq("bb")) :: Nil) + dsValue, + hValue, + Seq("bb"), + dateValue, + dateStrValue) } test("getPartitionsByFilter: chunk startsWith b") { testMetastorePartitionFiltering( attr("chunk").startsWith("b"), - (20170101 to 20170103, 0 to 4, Seq("ba", "bb")) :: Nil) + dsValue, + hValue, + Seq("ba", "bb"), + dateValue, + dateStrValue) } test("getPartitionsByFilter: chunk endsWith b") { testMetastorePartitionFiltering( attr("chunk").endsWith("b"), - (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil) + dsValue, + hValue, + Seq("ab", "bb"), + dateValue, + dateStrValue) } test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") { - val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba")) testMetastorePartitionFiltering( attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"), - day :: Nil) + dsValue, + hValue, + Seq("ab", "ba"), + dateValue, + dateStrValue) } - test("getPartitionsByFilter: date type pruning by metastore") { - val table = CatalogTable( - identifier = TableIdentifier("test_date", Some("default")), - tableType = CatalogTableType.MANAGED, - schema = new StructType().add("value", "int").add("part", "date"), - partitionColumnNames = Seq("part"), - storage = storageFormat) - client.createTable(table, ignoreIfExists = false) + test("getPartitionsByFilter: d=2019-01-01") { + testMetastorePartitionFiltering( + attr("d") === Date.valueOf("2019-01-01"), + dsValue, + hValue, + chunkValue, + Seq("2019-01-01"), + dateStrValue) + } - val partitions = - for { - date <- Seq("2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04") - } yield CatalogTablePartition(Map( - "part" -> date - ), storageFormat) - assert(partitions.size == 4) - - client.createPartitions("default", "test_date", partitions, ignoreIfExists = false) - - def testDataTypeFiltering( - filterExprs: Seq[Expression], - expectedPartitionCubes: Seq[Seq[Date]]): Unit = { - val filteredPartitions = client.getPartitionsByFilter( - client.getTable("default", "test_date"), - filterExprs, - SQLConf.get.sessionLocalTimeZone) - - val expectedPartitions = expectedPartitionCubes.map { - expectedDt => - for { - dt <- expectedDt - } yield Set( - "part" -> dt.toString - ) - }.reduce(_ ++ _) - - assert(filteredPartitions.map(_.spec.toSet).toSet == expectedPartitions.toSet) - } + test("getPartitionsByFilter: d>2019-01-02") { + testMetastorePartitionFiltering( + attr("d") > Date.valueOf("2019-01-02"), + dsValue, + hValue, + chunkValue, + Seq("2019-01-03"), + dateStrValue) + } + + test("getPartitionsByFilter: In(d, 2019-01-01, 2019-01-02)") { + testMetastorePartitionFiltering( + In(attr("d"), + Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)))), + dsValue, + hValue, + chunkValue, + Seq("2019-01-01", "2019-01-02"), + dateStrValue) + } - val dateAttr: Attribute = AttributeReference("part", DateType)() + test("getPartitionsByFilter: InSet(d, 2019-01-01, 2019-01-02)") { + testMetastorePartitionFiltering( + InSet(attr("d"), + Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow))), + dsValue, + hValue, + chunkValue, + Seq("2019-01-01", "2019-01-02"), + dateStrValue) + } - testDataTypeFiltering( - Seq(dateAttr === Date.valueOf("2019-01-01")), - Seq("2019-01-01").map(Date.valueOf) :: Nil) - testDataTypeFiltering( - Seq(dateAttr > Date.valueOf("2019-01-02")), - Seq("2019-01-03", "2019-01-04").map(Date.valueOf) :: Nil) - testDataTypeFiltering( - Seq(In(dateAttr, - Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d))))), - Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil) - testDataTypeFiltering( - Seq(InSet(dateAttr, - Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow)))), - Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil) + test("getPartitionsByFilter: cast(datestr as date)= 2020-01-01") { + testMetastorePartitionFiltering( + attr("datestr").cast(DateType) === Date.valueOf("2020-01-01"), + dsValue, + hValue, + chunkValue, + dateValue, + dateStrValue) } private def testMetastorePartitionFiltering( filterExpr: Expression, expectedDs: Seq[Int], expectedH: Seq[Int], - expectedChunks: Seq[String]): Unit = { + expectedChunks: Seq[String], + expectedD: Seq[String], + expectedDatestr: Seq[String]): Unit = { testMetastorePartitionFiltering( filterExpr, - (expectedDs, expectedH, expectedChunks) :: Nil, + (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) :: Nil, identity) } @@ -367,22 +422,25 @@ class HivePartitionFilteringSuite(version: String) expectedDs: Seq[Int], expectedH: Seq[Int], expectedChunks: Seq[String], + expectedD: Seq[String], + expectedDatestr: Seq[String], transform: Expression => Expression): Unit = { testMetastorePartitionFiltering( filterExpr, - (expectedDs, expectedH, expectedChunks) :: Nil, + (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) :: Nil, transform) } private def testMetastorePartitionFiltering( filterExpr: Expression, - expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])]): Unit = { + expectedPartitionCubes: + Seq[(Seq[Int], Seq[Int], Seq[String], Seq[String], Seq[String])]): Unit = { testMetastorePartitionFiltering(filterExpr, expectedPartitionCubes, identity) } private def testMetastorePartitionFiltering( filterExpr: Expression, - expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])], + expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String], Seq[String], Seq[String])], transform: Expression => Expression): Unit = { val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), Seq( @@ -390,20 +448,25 @@ class HivePartitionFilteringSuite(version: String) ), SQLConf.get.sessionLocalTimeZone) val expectedPartitionCount = expectedPartitionCubes.map { - case (expectedDs, expectedH, expectedChunks) => - expectedDs.size * expectedH.size * expectedChunks.size + case (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) => + expectedDs.size * expectedH.size * expectedChunks.size * + expectedD.size * expectedDatestr.size }.sum val expectedPartitions = expectedPartitionCubes.map { - case (expectedDs, expectedH, expectedChunks) => + case (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) => for { ds <- expectedDs h <- expectedH chunk <- expectedChunks + d <- expectedD + datestr <- expectedDatestr } yield Set( "ds" -> ds.toString, "h" -> h.toString, - "chunk" -> chunk + "chunk" -> chunk, + "d" -> d, + "datestr" -> datestr ) }.reduce(_ ++ _) From b94ff1e870152ac692c6f1ebf3d110caa274ebb2 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sun, 29 Nov 2020 11:24:58 -0800 Subject: [PATCH 0594/1009] [SPARK-33590][DOCS][SQL] Add missing sub-bullets in Spark SQL Guide ### What changes were proposed in this pull request? Add the missing sub-bullets in the left side of `Spark SQL Guide` ### Why are the changes needed? The three sub-bullets in the left side is not consistent with the contents (five bullets) in the right side. ![image](https://user-images.githubusercontent.com/1315079/100546388-7a21e880-32a4-11eb-922d-62a52f4f9f9b.png) ### Does this PR introduce _any_ user-facing change? Yes, you can see more lines in the left menu. ### How was this patch tested? Manually build the doc as follows. This can be verified as attached: ``` cd docs SKIP_API=1 jekyll build firefox _site/sql-pyspark-pandas-with-arrow.html ``` ![image](https://user-images.githubusercontent.com/1315079/100546399-8ad25e80-32a4-11eb-80ac-44af0aebc717.png) Closes #30537 from kiszk/SPARK-33590. Authored-by: Kazuaki Ishizaki Signed-off-by: Dongjoon Hyun --- docs/_data/menu-sql.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 2207bd6a17656..ec0b404fe672f 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -51,6 +51,10 @@ url: sql-performance-tuning.html#other-configuration-options - text: Join Strategy Hints for SQL Queries url: sql-performance-tuning.html#join-strategy-hints-for-sql-queries + - text: Coalesce Hints for SQL Queries + url: sql-performance-tuning.html#coalesce-hints-for-sql-queries + - text: Adaptive Query Execution + url: sql-performance-tuning.html#adaptive-query-execution - text: Distributed SQL Engine url: sql-distributed-sql-engine.html subitems: From c8286ec41616909f1f6e452ce63f0e7605d5bc63 Mon Sep 17 00:00:00 2001 From: Shixiong Zhu Date: Sun, 29 Nov 2020 11:56:48 -0800 Subject: [PATCH 0595/1009] [SPARK-33587][CORE] Kill the executor on nested fatal errors ### What changes were proposed in this pull request? Currently we will kill the executor when hitting a fatal error. However, if the fatal error is wrapped by another exception, such as - java.util.concurrent.ExecutionException, com.google.common.util.concurrent.UncheckedExecutionException, com.google.common.util.concurrent.ExecutionError when using Guava cache or Java thread pool. - SparkException thrown from https://github.com/apache/spark/blob/cf98a761de677c733f3c33230e1c63ddb785d5c5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala#L231 or https://github.com/apache/spark/blob/cf98a761de677c733f3c33230e1c63ddb785d5c5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala#L296 We will still keep the executor running. Fatal errors are usually unrecoverable (such as OutOfMemoryError), some components may be in a broken state when hitting a fatal error and it's hard to predicate the behaviors of a broken component. Hence, it's better to detect the nested fatal error as well and kill the executor. Then we can rely on Spark's fault tolerance to recover. ### Why are the changes needed? Fatal errors are usually unrecoverable (such as OutOfMemoryError), some components may be in a broken state when hitting a fatal error and it's hard to predicate the behaviors of a broken component. Hence, it's better to detect the nested fatal error as well and kill the executor. Then we can rely on Spark's fault tolerance to recover. ### Does this PR introduce _any_ user-facing change? Yep. There is a slight internal behavior change on when to kill an executor. We will kill the executor when detecting a nested fatal error in the exception chain. `spark.executor.killOnFatalError.depth` is added to allow users to turn off this change if the slight behavior change impacts them. ### How was this patch tested? The new method `Executor.isFatalError` is tested by `spark.executor.killOnNestedFatalError`. Closes #30528 from zsxwing/SPARK-33587. Authored-by: Shixiong Zhu Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/executor/Executor.scala | 28 ++++++- .../spark/internal/config/package.scala | 11 +++ .../apache/spark/executor/ExecutorSuite.scala | 73 ++++++++++++++++++- 3 files changed, 108 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index f7246448959e9..efb0b2c26d9a9 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -150,6 +150,8 @@ private[spark] class Executor( // Whether to monitor killed / interrupted tasks private val taskReaperEnabled = conf.get(TASK_REAPER_ENABLED) + private val killOnFatalErrorDepth = conf.get(EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH) + // Create our ClassLoader // do this after SparkEnv creation so can access the SecurityManager private val urlClassLoader = createClassLoader() @@ -648,7 +650,7 @@ private[spark] class Executor( plugins.foreach(_.onTaskFailed(reason)) execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason)) - case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) => + case t: Throwable if hasFetchFailure && !Executor.isFatalError(t, killOnFatalErrorDepth) => val reason = task.context.fetchFailed.get.toTaskFailedReason if (!t.isInstanceOf[FetchFailedException]) { // there was a fetch failure in the task, but some user code wrapped that exception @@ -711,7 +713,7 @@ private[spark] class Executor( // Don't forcibly exit unless the exception was inherently fatal, to avoid // stopping other tasks unnecessarily. - if (!t.isInstanceOf[SparkOutOfMemoryError] && Utils.isFatalError(t)) { + if (Executor.isFatalError(t, killOnFatalErrorDepth)) { uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t) } } finally { @@ -997,4 +999,26 @@ private[spark] object Executor { // Used to store executorSource, for local mode only var executorSourceLocalModeOnly: ExecutorSource = null + + /** + * Whether a `Throwable` thrown from a task is a fatal error. We will use this to decide whether + * to kill the executor. + * + * @param depthToCheck The max depth of the exception chain we should search for a fatal error. 0 + * means not checking any fatal error (in other words, return false), 1 means + * checking only the exception but not the cause, and so on. This is to avoid + * `StackOverflowError` when hitting a cycle in the exception chain. + */ + def isFatalError(t: Throwable, depthToCheck: Int): Boolean = { + if (depthToCheck <= 0) { + false + } else { + t match { + case _: SparkOutOfMemoryError => false + case e if Utils.isFatalError(e) => true + case e if e.getCause != null => isFatalError(e.getCause, depthToCheck - 1) + case _ => false + } + } + } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index b38d0e5c617b9..b8bcb374ef961 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1946,6 +1946,17 @@ package object config { .booleanConf .createWithDefault(false) + private[spark] val EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH = + ConfigBuilder("spark.executor.killOnFatalError.depth") + .doc("The max depth of the exception chain in a failed task Spark will search for a fatal " + + "error to check whether it should kill an executor. 0 means not checking any fatal " + + "error, 1 means checking only the exception but not the cause, and so on.") + .internal() + .version("3.1.0") + .intConf + .checkValue(_ >= 0, "needs to be a non-negative value") + .createWithDefault(5) + private[spark] val PUSH_BASED_SHUFFLE_ENABLED = ConfigBuilder("spark.shuffle.push.enabled") .doc("Set to 'true' to enable push-based shuffle on the client side and this works in " + diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 31049d104e63d..1326ae3c11a06 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -28,6 +28,7 @@ import scala.collection.immutable import scala.collection.mutable.{ArrayBuffer, Map} import scala.concurrent.duration._ +import com.google.common.cache.{CacheBuilder, CacheLoader} import org.mockito.ArgumentCaptor import org.mockito.ArgumentMatchers.{any, eq => meq} import org.mockito.Mockito.{inOrder, verify, when} @@ -43,7 +44,7 @@ import org.apache.spark.TaskState.TaskState import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.config._ import org.apache.spark.internal.config.UI._ -import org.apache.spark.memory.TestMemoryManager +import org.apache.spark.memory.{SparkOutOfMemoryError, TestMemoryManager} import org.apache.spark.metrics.MetricsSystem import org.apache.spark.rdd.RDD import org.apache.spark.resource.ResourceInformation @@ -52,7 +53,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task, import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager} import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.storage.{BlockManager, BlockManagerId} -import org.apache.spark.util.{LongAccumulator, UninterruptibleThread} +import org.apache.spark.util.{LongAccumulator, ThreadUtils, UninterruptibleThread} class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester { @@ -402,6 +403,74 @@ class ExecutorSuite extends SparkFunSuite assert(taskMetrics.getMetricValue("JVMHeapMemory") > 0) } + test("SPARK-33587: isFatalError") { + def errorInThreadPool(e: => Throwable): Throwable = { + intercept[Throwable] { + val taskPool = ThreadUtils.newDaemonFixedThreadPool(1, "test") + try { + val f = taskPool.submit(new java.util.concurrent.Callable[String] { + override def call(): String = throw e + }) + f.get() + } finally { + taskPool.shutdown() + } + } + } + + def errorInGuavaCache(e: => Throwable): Throwable = { + val cache = CacheBuilder.newBuilder() + .build(new CacheLoader[String, String] { + override def load(key: String): String = throw e + }) + intercept[Throwable] { + cache.get("test") + } + } + + def testThrowable( + e: => Throwable, + depthToCheck: Int, + isFatal: Boolean): Unit = { + import Executor.isFatalError + // `e`'s depth is 1 so `depthToCheck` needs to be at least 3 to detect fatal errors. + assert(isFatalError(e, depthToCheck) == (depthToCheck >= 1 && isFatal)) + // `e`'s depth is 2 so `depthToCheck` needs to be at least 3 to detect fatal errors. + assert(isFatalError(errorInThreadPool(e), depthToCheck) == (depthToCheck >= 2 && isFatal)) + assert(isFatalError(errorInGuavaCache(e), depthToCheck) == (depthToCheck >= 2 && isFatal)) + assert(isFatalError( + new SparkException("foo", e), + depthToCheck) == (depthToCheck >= 2 && isFatal)) + // `e`'s depth is 3 so `depthToCheck` needs to be at least 3 to detect fatal errors. + assert(isFatalError( + errorInThreadPool(errorInGuavaCache(e)), + depthToCheck) == (depthToCheck >= 3 && isFatal)) + assert(isFatalError( + errorInGuavaCache(errorInThreadPool(e)), + depthToCheck) == (depthToCheck >= 3 && isFatal)) + assert(isFatalError( + new SparkException("foo", new SparkException("foo", e)), + depthToCheck) == (depthToCheck >= 3 && isFatal)) + } + + for (depthToCheck <- 0 to 5) { + testThrowable(new OutOfMemoryError(), depthToCheck, isFatal = true) + testThrowable(new InterruptedException(), depthToCheck, isFatal = false) + testThrowable(new RuntimeException("test"), depthToCheck, isFatal = false) + testThrowable(new SparkOutOfMemoryError("test"), depthToCheck, isFatal = false) + } + + // Verify we can handle the cycle in the exception chain + val e1 = new Exception("test1") + val e2 = new Exception("test2") + e1.initCause(e2) + e2.initCause(e1) + for (depthToCheck <- 0 to 5) { + testThrowable(e1, depthToCheck, isFatal = false) + testThrowable(e2, depthToCheck, isFatal = false) + } + } + private def createMockEnv(conf: SparkConf, serializer: JavaSerializer): SparkEnv = { val mockEnv = mock[SparkEnv] val mockRpcEnv = mock[RpcEnv] From 0054fc937f804660c6501d9d3f6319f3047a68f8 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 29 Nov 2020 12:10:16 -0800 Subject: [PATCH 0596/1009] [SPARK-33588][SQL] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED` ### What changes were proposed in this pull request? Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity. ### Why are the changes needed? Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive: ```sql spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int) > USING parquet > partitioned by (year, month); spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1; spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`'; ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result: ```sql spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); default tbl1 false Partition Values: [year=2015, month=1] Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1] Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1} Created Time: Sat Nov 28 23:25:18 MSK 2020 Last Access: UNKNOWN Partition Statistics: 623 bytes ``` ### How was this patch tested? By running the modified test suite `v1/ShowTablesSuite` Closes #30529 from MaxGekk/show-table-case-sensitive-spec. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/command/tables.scala | 17 ++++++++----- .../sql-tests/results/show-tables.sql.out | 2 +- .../command/v1/ShowTablesSuite.scala | 25 +++++++++++++++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index bd238948aab02..9e3ca3c321a54 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -878,12 +878,17 @@ case class ShowTablesCommand( // // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]] // should have been thrown by the sql parser. - val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db)) - val table = catalog.getTableMetadata(tableIdent).identifier - val partition = catalog.getPartition(tableIdent, partitionSpec.get) - val database = table.database.getOrElse("") - val tableName = table.table - val isTemp = catalog.isTemporaryTable(table) + val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db))) + val tableIdent = table.identifier + val normalizedSpec = PartitioningUtils.normalizePartitionSpec( + partitionSpec.get, + table.partitionColumnNames, + tableIdent.quotedString, + sparkSession.sessionState.conf.resolver) + val partition = catalog.getPartition(tableIdent, normalizedSpec) + val database = tableIdent.database.getOrElse("") + val tableName = tableIdent.table + val isTemp = catalog.isTemporaryTable(tableIdent) val information = partition.simpleString Seq(Row(database, tableName, isTemp, s"$information\n")) } diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index a95b02c7f7743..60c5e6d5642b7 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1) struct<> -- !query output org.apache.spark.sql.AnalysisException -Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`'; +a is not a valid partition column in table `showdb`.`show_t1`.; -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index 5bbc6c6285193..8f29f9f276138 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{BooleanType, StringType, StructType} @@ -84,6 +85,30 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { result.foreach { case Row(_, _, _, info: String) => assert(info.nonEmpty) } } } + + test("case sensitivity of partition spec") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val t = s"$catalog.ns.part_table" + withTable(t) { + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |$defaultUsing + |partitioned by (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec") + val information = df.select("information").first().getString(0) + assert(information.contains("Partition Values: [year=2015, month=1]")) + } + } + } + } + } } class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession From a088a801ed8c17171545c196a3f26ce415de0cd1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 29 Nov 2020 12:18:07 -0800 Subject: [PATCH 0597/1009] [SPARK-33585][SQL][DOCS] Fix the comment for `SQLContext.tables()` and mention the `database` column ### What changes were proposed in this pull request? Change the comments for `SQLContext.tables()` to "The returned DataFrame has three columns, database, tableName and isTemporary". ### Why are the changes needed? Currently, the comment mentions only 2 columns but `tables()` returns 3 columns actually: ```scala scala> spark.range(10).createOrReplaceTempView("view1") scala> val tables = spark.sqlContext.tables() tables: org.apache.spark.sql.DataFrame = [database: string, tableName: string ... 1 more field] scala> tables.printSchema root |-- database: string (nullable = false) |-- tableName: string (nullable = false) |-- isTemporary: boolean (nullable = false) scala> tables.show +--------+---------+-----------+ |database|tableName|isTemporary| +--------+---------+-----------+ | default| t1| false| | default| t2| false| | default| ymd| false| | | view1| true| +--------+---------+-----------+ ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `./dev/scalastyle` Closes #30526 from MaxGekk/sqlcontext-tables-doc. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 7cf0b6bb70364..dd237962110ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -661,7 +661,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * Returns a `DataFrame` containing names of existing tables in the current database. - * The returned DataFrame has two columns, tableName and isTemporary (a Boolean + * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean * indicating if a table is a temporary one or not). * * @group ddl_ops @@ -673,7 +673,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * Returns a `DataFrame` containing names of existing tables in the given database. - * The returned DataFrame has two columns, tableName and isTemporary (a Boolean + * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean * indicating if a table is a temporary one or not). * * @group ddl_ops From 3d54774fb9cbf674580851aa2323991c7e462a1e Mon Sep 17 00:00:00 2001 From: liucht Date: Mon, 30 Nov 2020 10:03:18 +0900 Subject: [PATCH 0598/1009] [SPARK-33517][SQL][DOCS] Fix the correct menu items and page links in PySpark Usage Guide for Pandas with Apache Arrow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Change "Apache Arrow in Spark" to "Apache Arrow in PySpark" and the link to “/sql-pyspark-pandas-with-arrow.html#apache-arrow-in-pyspark” ### Why are the changes needed? When I click on the menu item it doesn't point to the correct page, and from the parent menu I can infer that the correct menu item name and link should be "Apache Arrow in PySpark". like this: image ![image](https://user-images.githubusercontent.com/28332082/99954725-2b64e200-2dbe-11eb-9576-cf6a3d758980.png) ### Does this PR introduce any user-facing change? Yes, clicking on the menu item will take you to the correct guide page ### How was this patch tested? Manually build the doc. This can be verified as below: cd docs SKIP_API=1 jekyll build open _site/sql-pyspark-pandas-with-arrow.html Closes #30466 from liucht-inspur/master. Authored-by: liucht Signed-off-by: HyukjinKwon --- docs/_data/menu-sql.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index ec0b404fe672f..cda2a1a5139a1 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -64,17 +64,6 @@ url: sql-distributed-sql-engine.html#running-the-spark-sql-cli - text: PySpark Usage Guide for Pandas with Apache Arrow url: sql-pyspark-pandas-with-arrow.html - subitems: - - text: Apache Arrow in Spark - url: sql-pyspark-pandas-with-arrow.html#apache-arrow-in-spark - - text: "Enabling for Conversion to/from Pandas" - url: sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas - - text: "Pandas UDFs (a.k.a. Vectorized UDFs)" - url: sql-pyspark-pandas-with-arrow.html#pandas-udfs-aka-vectorized-udfs - - text: "Pandas Function APIs" - url: sql-pyspark-pandas-with-arrow.html#pandas-function-apis - - text: Usage Notes - url: sql-pyspark-pandas-with-arrow.html#usage-notes - text: Migration Guide url: sql-migration-old.html - text: SQL Reference From f93d4395b25ea546cebb1ff16879dea696a217b5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 30 Nov 2020 11:21:02 +0900 Subject: [PATCH 0599/1009] [SPARK-33589][SQL] Close opened session if the initialization fails ### What changes were proposed in this pull request? This pr add try catch when opening session. ### Why are the changes needed? Close opened session if the initialization fails. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test. Before this pr: ``` [rootspark-3267648 spark]# bin/beeline -u jdbc:hive2://localhost:10000/db_not_exist NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of assembly. Connecting to jdbc:hive2://localhost:10000/db_not_exist log4j:WARN No appenders could be found for logger (org.apache.hive.jdbc.Utils). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/db_not_exist: Database 'db_not_exist' not found; (state=08S01,code=0) Beeline version 2.3.7 by Apache Hive beeline> ``` ![image](https://user-images.githubusercontent.com/5399861/100560975-73ba5d80-32f2-11eb-8f92-b2509e7a121f.png) After this pr: ``` [rootspark-3267648 spark]# bin/beeline -u jdbc:hive2://localhost:10000/db_not_exist NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of assembly. log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. Connecting to jdbc:hive2://localhost:10000/db_not_exist Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/db_not_exist: Failed to open new session: org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'db_not_exist' not found; (state=08S01,code=0) Beeline version 2.3.7 by Apache Hive beeline> ``` ![image](https://user-images.githubusercontent.com/5399861/100560917-479edc80-32f2-11eb-986f-7a997f1163fc.png) Closes #30536 from wangyum/SPARK-33589. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../thriftserver/SparkSQLSessionManager.scala | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala index 856edede0b85f..0c092abb37f3e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala @@ -18,11 +18,12 @@ package org.apache.spark.sql.hive.thriftserver import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hive.service.cli.SessionHandle +import org.apache.hive.service.cli.{HiveSQLException, SessionHandle} import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 +import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ @@ -32,7 +33,7 @@ import org.apache.spark.sql.internal.SQLConf private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) - with ReflectedCompositeService { + with ReflectedCompositeService with Logging { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() @@ -52,24 +53,35 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) - val session = super.getSession(sessionHandle) - HiveThriftServer2.eventManager.onSessionCreated( - session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) - val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { - sqlContext - } else { - sqlContext.newSession() + try { + val session = super.getSession(sessionHandle) + HiveThriftServer2.eventManager.onSessionCreated( + session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) + val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { + sqlContext + } else { + sqlContext.newSession() + } + ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) + ctx.setConf(SQLConf.DATETIME_JAVA8API_ENABLED, true) + val hiveSessionState = session.getSessionState + setConfMap(ctx, hiveSessionState.getOverriddenConfigurations) + setConfMap(ctx, hiveSessionState.getHiveVariables) + if (sessionConf != null && sessionConf.containsKey("use:database")) { + ctx.sql(s"use ${sessionConf.get("use:database")}") + } + sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) + sessionHandle + } catch { + case e: Exception => + try { + closeSession(sessionHandle) + } catch { + case t: Throwable => + logWarning("Error closing session", t) + } + throw new HiveSQLException("Failed to open new session: " + e, e) } - ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) - ctx.setConf(SQLConf.DATETIME_JAVA8API_ENABLED, true) - val hiveSessionState = session.getSessionState - setConfMap(ctx, hiveSessionState.getOverriddenConfigurations) - setConfMap(ctx, hiveSessionState.getHiveVariables) - if (sessionConf != null && sessionConf.containsKey("use:database")) { - ctx.sql(s"use ${sessionConf.get("use:database")}") - } - sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) - sessionHandle } override def closeSession(sessionHandle: SessionHandle): Unit = { From a5e13acd19871831a93a5bdcbc99a9eb9f1aba07 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 30 Nov 2020 11:24:15 +0900 Subject: [PATCH 0600/1009] [SPARK-33582][SQL] Hive Metastore support filter by not-equals ### What changes were proposed in this pull request? This pr make partition predicate pushdown into Hive metastore support not-equals operator. Hive related changes: https://github.com/apache/hive/blob/b8bd4594bef718b1eeac9fceb437d7df7b480ed1/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java#L2194-L2207 https://issues.apache.org/jira/browse/HIVE-2702 ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30534 from wangyum/SPARK-33582. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../spark/sql/hive/client/HiveShim.scala | 8 ++++++++ .../spark/sql/hive/client/FiltersSuite.scala | 8 ++++++++ .../client/HivePartitionFilteringSuite.scala | 20 +++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 17a64a67df283..ed088648bc20a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -812,6 +812,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { right <- convert(expr2) } yield s"($left or $right)" + case Not(EqualTo( + ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value))) if useAdvanced => + Some(s"$name != $value") + + case Not(EqualTo( + ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name)))) if useAdvanced => + Some(s"$value != $name") + case _ => None } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala index 6c0531182e6d6..12ed0e5305299 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala @@ -100,6 +100,14 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { (a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil, "(intcol = 1)") + filterTest("NOT: int and string filters", + (a("intcol", IntegerType) =!= Literal(1)) :: (Literal("a") =!= a("strcol", IntegerType)) :: Nil, + """intcol != 1 and "a" != strcol""") + + filterTest("NOT: date filter", + (a("datecol", DateType) =!= Literal(Date.valueOf("2019-01-01"))) :: Nil, + "datecol != 2019-01-01") + // Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization // will be applied by Catalyst, this filter converter does not need to account for this. filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index e07fbc29ee8aa..dc56e6bc4da81 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -352,6 +352,26 @@ class HivePartitionFilteringSuite(version: String) dateStrValue) } + test("getPartitionsByFilter: ds<>20170101") { + testMetastorePartitionFiltering( + attr("ds") =!= 20170101, + 20170102 to 20170103, + hValue, + chunkValue, + dateValue, + dateStrValue) + } + + test("getPartitionsByFilter: h<>0 and chunk<>ab and d<>2019-01-01") { + testMetastorePartitionFiltering( + attr("h") =!= 0 && attr("chunk") =!= "ab" && attr("d") =!= Date.valueOf("2019-01-01"), + dsValue, + 1 to 4, + Seq("aa", "ba", "bb"), + Seq("2019-01-02", "2019-01-03"), + dateStrValue) + } + test("getPartitionsByFilter: d=2019-01-01") { testMetastorePartitionFiltering( attr("d") === Date.valueOf("2019-01-01"), From feda7299e3d8ebe665b8fae0328f22a4927c66da Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 30 Nov 2020 04:50:50 +0000 Subject: [PATCH 0601/1009] [SPARK-33567][SQL] DSv2: Use callback instead of passing Spark session and v2 relation for refreshing cache ### What changes were proposed in this pull request? This replaces Spark session and `DataSourceV2Relation` in V2 write plans by replacing them with a callback `afterWrite`. ### Why are the changes needed? Per discussion in #30429, it's better to not pass Spark session and `DataSourceV2Relation` through Spark plans. Instead we can use a callback which makes the interface cleaner. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30491 from sunchao/SPARK-33492-followup. Authored-by: Chao Sun Signed-off-by: Wenchen Fan --- .../datasources/v2/DataSourceV2Strategy.scala | 26 +++++++++++++------ .../datasources/v2/DropTableExec.scala | 11 +++----- .../datasources/v2/RefreshTableExec.scala | 11 +++----- .../datasources/v2/V1FallbackWriters.scala | 15 ++++++----- .../v2/WriteToDataSourceV2Exec.scala | 21 +++++++-------- 5 files changed, 43 insertions(+), 41 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index eb0d7010041b9..1fae8d937e90c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -52,6 +52,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } } + private def refreshCache(r: DataSourceV2Relation)(): Unit = { + session.sharedState.cacheManager.recacheByPlan(session, r) + } + + private def invalidateCache(r: ResolvedTable)(): Unit = { + val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier)) + session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + } + override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation @ DataSourceV2ScanRelation(_, V1ScanWrapper(scan, translated, pushed), output)) => @@ -128,7 +137,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } case RefreshTable(r: ResolvedTable) => - RefreshTableExec(session, r.catalog, r.table, r.identifier) :: Nil + RefreshTableExec(r.catalog, r.identifier, invalidateCache(r)) :: Nil case ReplaceTable(catalog, ident, schema, parts, props, orCreate) => val propsWithOwner = CatalogV2Util.withDefaultOwnership(props) @@ -172,9 +181,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AppendData(r: DataSourceV2Relation, query, writeOptions, _) => r.table.asWritable match { case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - AppendDataExecV1(v1, writeOptions.asOptions, query, r) :: Nil + AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r)) :: Nil case v2 => - AppendDataExec(session, v2, r, writeOptions.asOptions, planLater(query)) :: Nil + AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil } case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) => @@ -186,15 +195,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat }.toArray r.table.asWritable match { case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query, r) :: Nil + OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, + query, refreshCache(r)) :: Nil case v2 => - OverwriteByExpressionExec(session, v2, r, filters, - writeOptions.asOptions, planLater(query)) :: Nil + OverwriteByExpressionExec(v2, filters, + writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil } case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) => OverwritePartitionsDynamicExec( - session, r.table.asWritable, r, writeOptions.asOptions, planLater(query)) :: Nil + r.table.asWritable, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil case DeleteFromTable(relation, condition) => relation match { @@ -232,7 +242,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException("Describing columns is not supported for v2 tables.") case DropTable(r: ResolvedTable, ifExists, purge) => - DropTableExec(session, r.catalog, r.table, r.identifier, ifExists, purge) :: Nil + DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil case _: NoopDropTable => LocalTableScanExec(Nil, Nil) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 068475fc56f47..f89b89096772a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -17,27 +17,24 @@ package org.apache.spark.sql.execution.datasources.v2 -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} /** * Physical plan node for dropping a table. */ case class DropTableExec( - session: SparkSession, catalog: TableCatalog, - table: Table, ident: Identifier, ifExists: Boolean, - purge: Boolean) extends V2CommandExec { + purge: Boolean, + invalidateCache: () => Unit) extends V2CommandExec { override def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { - val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) - session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + invalidateCache() catalog.dropTable(ident, purge) } else if (!ifExists) { throw new NoSuchTableException(ident) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala index 52836de5a926b..994583c1e338f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala @@ -17,23 +17,20 @@ package org.apache.spark.sql.execution.datasources.v2 -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} case class RefreshTableExec( - session: SparkSession, catalog: TableCatalog, - table: Table, - ident: Identifier) extends V2CommandExec { + ident: Identifier, + invalidateCache: () => Unit) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { catalog.invalidateTable(ident) // invalidate all caches referencing the given table // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table - val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) - session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + invalidateCache() Seq.empty } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala index af7721588edeb..9d2cea9fbaff3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala @@ -38,10 +38,10 @@ case class AppendDataExecV1( table: SupportsWrite, writeOptions: CaseInsensitiveStringMap, plan: LogicalPlan, - v2Relation: DataSourceV2Relation) extends V1FallbackWriters { + refreshCache: () => Unit) extends V1FallbackWriters { override protected def run(): Seq[InternalRow] = { - writeWithV1(newWriteBuilder().buildForV1Write(), Some(v2Relation)) + writeWithV1(newWriteBuilder().buildForV1Write(), refreshCache = refreshCache) } } @@ -61,7 +61,7 @@ case class OverwriteByExpressionExecV1( deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, plan: LogicalPlan, - v2Relation: DataSourceV2Relation) extends V1FallbackWriters { + refreshCache: () => Unit) extends V1FallbackWriters { private def isTruncate(filters: Array[Filter]): Boolean = { filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] @@ -70,10 +70,11 @@ case class OverwriteByExpressionExecV1( override protected def run(): Seq[InternalRow] = { newWriteBuilder() match { case builder: SupportsTruncate if isTruncate(deleteWhere) => - writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), Some(v2Relation)) + writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), refreshCache = refreshCache) case builder: SupportsOverwrite => - writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), Some(v2Relation)) + writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), + refreshCache = refreshCache) case _ => throw new SparkException(s"Table does not support overwrite by expression: $table") @@ -116,11 +117,11 @@ trait SupportsV1Write extends SparkPlan { protected def writeWithV1( relation: InsertableRelation, - v2Relation: Option[DataSourceV2Relation] = None): Seq[InternalRow] = { + refreshCache: () => Unit = () => ()): Seq[InternalRow] = { val session = sqlContext.sparkSession // The `plan` is already optimized, we should not analyze and optimize it again. relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false) - v2Relation.foreach(r => session.sharedState.cacheManager.recacheByPlan(session, r)) + refreshCache() Nil } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index 1648134d0a1b2..47aad2bcb2c56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -213,15 +213,14 @@ case class AtomicReplaceTableAsSelectExec( * Rows in the output data set are appended. */ case class AppendDataExec( - session: SparkSession, table: SupportsWrite, - relation: DataSourceV2Relation, writeOptions: CaseInsensitiveStringMap, - query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { + query: SparkPlan, + refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { override protected def run(): Seq[InternalRow] = { val writtenRows = writeWithV2(newWriteBuilder().buildForBatch()) - session.sharedState.cacheManager.recacheByPlan(session, relation) + refreshCache() writtenRows } } @@ -237,12 +236,11 @@ case class AppendDataExec( * AlwaysTrue to delete all rows. */ case class OverwriteByExpressionExec( - session: SparkSession, table: SupportsWrite, - relation: DataSourceV2Relation, deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, - query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { + query: SparkPlan, + refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { private def isTruncate(filters: Array[Filter]): Boolean = { filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] @@ -259,7 +257,7 @@ case class OverwriteByExpressionExec( case _ => throw new SparkException(s"Table does not support overwrite by expression: $table") } - session.sharedState.cacheManager.recacheByPlan(session, relation) + refreshCache() writtenRows } } @@ -275,11 +273,10 @@ case class OverwriteByExpressionExec( * are not modified. */ case class OverwritePartitionsDynamicExec( - session: SparkSession, table: SupportsWrite, - relation: DataSourceV2Relation, writeOptions: CaseInsensitiveStringMap, - query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper { + query: SparkPlan, + refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { override protected def run(): Seq[InternalRow] = { val writtenRows = newWriteBuilder() match { @@ -289,7 +286,7 @@ case class OverwritePartitionsDynamicExec( case _ => throw new SparkException(s"Table does not support dynamic partition overwrite: $table") } - session.sharedState.cacheManager.recacheByPlan(session, relation) + refreshCache() writtenRows } } From 485145326a9c97ede260b0e267ee116f182cfd56 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Mon, 30 Nov 2020 13:59:51 +0900 Subject: [PATCH 0602/1009] [MINOR] Spelling bin core docs external mllib repl ### What changes were proposed in this pull request? This PR intends to fix typos in the sub-modules: * `bin` * `core` * `docs` * `external` * `mllib` * `repl` * `pom.xml` Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618 NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356 ### Why are the changes needed? Misspelled words make it harder to read / understand content. ### Does this PR introduce _any_ user-facing change? There are various fixes to documentation, etc... ### How was this patch tested? No testing was performed Closes #30530 from jsoref/spelling-bin-core-docs-external-mllib-repl. Authored-by: Josh Soref Signed-off-by: Takeshi Yamamuro --- bin/docker-image-tool.sh | 2 +- .../apache/spark/ui/static/spark-dag-viz.js | 2 +- .../org/apache/spark/ui/static/utils.js | 2 +- .../spark/ExecutorAllocationManager.scala | 4 +- .../apache/spark/api/java/JavaPairRDD.scala | 4 +- .../apache/spark/api/java/JavaRDDLike.scala | 2 +- .../apache/spark/api/python/PythonRDD.scala | 6 +- .../apache/spark/deploy/JsonProtocol.scala | 2 +- .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- .../deploy/history/FsHistoryProvider.scala | 2 +- .../spark/deploy/history/HybridStore.scala | 2 +- .../org/apache/spark/executor/Executor.scala | 4 +- .../apache/spark/metrics/MetricsConfig.scala | 2 +- .../metrics/sink/PrometheusServlet.scala | 6 +- .../apache/spark/rdd/DoubleRDDFunctions.scala | 2 +- .../spark/rdd/OrderedRDDFunctions.scala | 4 +- .../main/scala/org/apache/spark/rdd/RDD.scala | 2 +- .../spark/resource/TaskResourceRequest.scala | 2 +- .../apache/spark/rpc/netty/NettyRpcEnv.scala | 4 +- .../BarrierJobAllocationFailed.scala | 4 +- .../apache/spark/scheduler/DAGScheduler.scala | 8 +- .../spark/scheduler/HealthTracker.scala | 4 +- .../spark/scheduler/TaskSetManager.scala | 2 +- .../spark/security/CryptoStreamUtils.scala | 2 +- .../apache/spark/storage/BlockManager.scala | 4 +- .../storage/BlockManagerMasterEndpoint.scala | 2 +- .../apache/spark/ui/jobs/AllJobsPage.scala | 2 +- .../org/apache/spark/ui/jobs/JobPage.scala | 2 +- .../apache/spark/util/ClosureCleaner.scala | 2 +- .../scala/org/apache/spark/util/Utils.scala | 22 ++-- .../spark/util/io/ChunkedByteBuffer.scala | 2 +- .../sort/UnsafeShuffleWriterSuite.java | 10 +- .../test/org/apache/spark/JavaAPISuite.java | 2 +- .../org/apache/spark/CheckpointSuite.scala | 12 +- .../apache/spark/ContextCleanerSuite.scala | 10 +- .../ExecutorAllocationManagerSuite.scala | 2 +- .../scala/org/apache/spark/FileSuite.scala | 2 +- .../spark/benchmark/BenchmarkBase.scala | 2 +- .../history/FsHistoryProviderSuite.scala | 4 +- .../spark/deploy/master/MasterSuite.scala | 2 +- .../spark/deploy/worker/WorkerSuite.scala | 2 +- .../apache/spark/executor/ExecutorSuite.scala | 2 +- ...FileCommitProtocolInstantiationSuite.scala | 4 +- .../metrics/InputOutputMetricsSuite.scala | 2 +- .../NettyBlockTransferServiceSuite.scala | 2 +- .../spark/rdd/PairRDDFunctionsSuite.scala | 34 +++--- .../scala/org/apache/spark/rdd/RDDSuite.scala | 2 +- .../spark/resource/ResourceUtilsSuite.scala | 2 +- .../spark/rpc/netty/NettyRpcEnvSuite.scala | 2 +- .../spark/scheduler/DAGSchedulerSuite.scala | 6 +- .../spark/scheduler/ReplayListenerSuite.scala | 2 +- .../scheduler/SchedulerIntegrationSuite.scala | 8 +- .../spark/scheduler/SparkListenerSuite.scala | 6 +- .../spark/scheduler/TaskSetManagerSuite.scala | 6 +- .../spark/status/AppStatusListenerSuite.scala | 2 +- .../spark/storage/BlockManagerSuite.scala | 4 +- .../apache/spark/util/JsonProtocolSuite.scala | 8 +- .../spark/util/SizeEstimatorSuite.scala | 2 +- docs/_plugins/include_example.rb | 4 +- docs/building-spark.md | 2 +- docs/configuration.md | 2 +- docs/css/main.css | 4 +- docs/graphx-programming-guide.md | 4 +- docs/ml-migration-guide.md | 2 +- docs/mllib-clustering.md | 2 +- docs/mllib-data-types.md | 2 +- docs/monitoring.md | 6 +- docs/running-on-kubernetes.md | 4 +- docs/running-on-mesos.md | 2 +- docs/running-on-yarn.md | 2 +- docs/sparkr.md | 2 +- docs/sql-data-sources-jdbc.md | 2 +- docs/sql-migration-guide.md | 6 +- ...l-ref-syntax-aux-conf-mgmt-set-timezone.md | 2 +- ...-ref-syntax-ddl-create-table-hiveformat.md | 8 +- docs/sql-ref-syntax-dml-insert-into.md | 114 +++++++++--------- ...l-ref-syntax-dml-insert-overwrite-table.md | 52 ++++---- docs/sql-ref-syntax-qry-select-groupby.md | 4 +- .../sql-ref-syntax-qry-select-lateral-view.md | 6 +- docs/sql-ref-syntax-qry-select-orderby.md | 2 +- .../ml/evaluation/ClusteringMetrics.scala | 4 +- .../apache/spark/ml/feature/Binarizer.scala | 6 +- .../apache/spark/ml/feature/Selector.scala | 2 +- .../spark/ml/feature/StopWordsRemover.scala | 6 +- .../apache/spark/ml/image/ImageSchema.scala | 2 +- .../ml/r/AFTSurvivalRegressionWrapper.scala | 4 +- .../spark/ml/regression/FMRegressor.scala | 2 +- .../spark/mllib/classification/SVM.scala | 2 +- .../mllib/clustering/DistanceMeasure.scala | 6 +- .../spark/mllib/clustering/LDAOptimizer.scala | 2 +- .../mllib/clustering/StreamingKMeans.scala | 2 +- .../org/apache/spark/mllib/feature/PCA.scala | 4 +- .../apache/spark/mllib/feature/Word2Vec.scala | 2 +- .../spark/mllib/fpm/AssociationRules.scala | 4 +- .../mllib/linalg/distributed/RowMatrix.scala | 4 +- .../stat/test/KolmogorovSmirnovTest.scala | 2 +- .../ml/feature/JavaStopWordsRemoverSuite.java | 2 +- .../ml/clustering/GaussianMixtureSuite.scala | 2 +- .../evaluation/RegressionEvaluatorSuite.scala | 2 +- .../spark/ml/feature/ANOVASelectorSuite.scala | 10 +- .../apache/spark/ml/feature/DCTSuite.scala | 2 +- .../org/apache/spark/ml/feature/LSHTest.scala | 2 +- .../VarianceThresholdSelectorSuite.scala | 2 +- .../GeneralizedLinearRegressionSuite.scala | 4 +- pom.xml | 4 +- .../spark/repl/ExecutorClassLoaderSuite.scala | 5 +- 106 files changed, 288 insertions(+), 289 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 6d74f8328aea2..2ec1ab8861798 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -274,7 +274,7 @@ Examples: - Build and push JDK11-based image for multiple archs to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build # Note: buildx, which does cross building, needs to do the push during build - # So there is no seperate push step with -X + # So there is no separate push step with -X EOF } diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js index 474c453643365..1fc1fb4b4513b 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js @@ -334,7 +334,7 @@ function preprocessGraphLayout(g, forJob) { } /* - * Helper function to size the SVG appropriately such that all elements are displyed. + * Helper function to size the SVG appropriately such that all elements are displayed. * This assumes that all outermost elements are clusters (rectangles). */ function resizeSvg(svg) { diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js index 4cd83332cde5f..7e6dd678e2641 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/utils.js +++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js @@ -74,7 +74,7 @@ function getTimeZone() { return Intl.DateTimeFormat().resolvedOptions().timeZone; } catch(ex) { // Get time zone from a string representing the date, - // eg. "Thu Nov 16 2017 01:13:32 GMT+0800 (CST)" -> "CST" + // e.g. "Thu Nov 16 2017 01:13:32 GMT+0800 (CST)" -> "CST" return new Date().toString().match(/\((.*)\)/)[1]; } } diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index e445f188e1eed..61ab63584269b 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -248,7 +248,7 @@ private[spark] class ExecutorAllocationManager( executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS) } - // copy the maps inside synchonize to ensure not being modified + // copy the maps inside synchronize to ensure not being modified val (numExecutorsTarget, numLocalityAware) = synchronized { val numTarget = numExecutorsTargetPerResourceProfileId.toMap val numLocality = numLocalityAwareTasksPerResourceProfileId.toMap @@ -379,7 +379,7 @@ private[spark] class ExecutorAllocationManager( // We lower the target number of executors but don't actively kill any yet. Killing is // controlled separately by an idle timeout. It's still helpful to reduce - // the target number in case an executor just happens to get lost (eg., bad hardware, + // the target number in case an executor just happens to get lost (e.g., bad hardware, // or the cluster manager preempts it) -- in that case, there is no point in trying // to immediately get a new executor, since we wouldn't even use it yet. decrementExecutorsFromTarget(maxNeeded, rpId, updatesNeeded) diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 1bcd203f2e435..6dd36309378cc 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -941,7 +941,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) /** * Return a RDD containing only the elements in the inclusive range `lower` to `upper`. * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be - * performed efficiently by only scanning the partitions that might containt matching elements. + * performed efficiently by only scanning the partitions that might contain matching elements. * Otherwise, a standard `filter` is applied to all partitions. * * @since 3.1.0 @@ -955,7 +955,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) /** * Return a RDD containing only the elements in the inclusive range `lower` to `upper`. * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be - * performed efficiently by only scanning the partitions that might containt matching elements. + * performed efficiently by only scanning the partitions that might contain matching elements. * Otherwise, a standard `filter` is applied to all partitions. * * @since 3.1.0 diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 89b33945dfb08..306af24ada584 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -78,7 +78,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** * Internal method to this RDD; will read from cache if applicable, or otherwise compute it. - * This should ''not'' be called by users directly, but is available for implementors of custom + * This should ''not'' be called by users directly, but is available for implementers of custom * subclasses of RDD. */ def iterator(split: Partition, taskContext: TaskContext): JIterator[T] = diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 86a1ac31c0845..6d4dc3d3dfe92 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -48,14 +48,14 @@ import org.apache.spark.util._ private[spark] class PythonRDD( parent: RDD[_], func: PythonFunction, - preservePartitoning: Boolean, + preservePartitioning: Boolean, isFromBarrier: Boolean = false) extends RDD[Array[Byte]](parent) { override def getPartitions: Array[Partition] = firstParent.partitions override val partitioner: Option[Partitioner] = { - if (preservePartitoning) firstParent.partitioner else None + if (preservePartitioning) firstParent.partitioner else None } val asJavaRDD: JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this) @@ -837,7 +837,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial * We might be serializing a really large object from python -- we don't want * python to buffer the whole thing in memory, nor can it write to a file, * so we don't know the length in advance. So python writes it in chunks, each chunk - * preceeded by a length, till we get a "length" of -1 which serves as EOF. + * preceded by a length, till we get a "length" of -1 which serves as EOF. * * Tested from python tests. */ diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index d76fb7f9a20b3..f697892aacc83 100644 --- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -80,7 +80,7 @@ private[deploy] object JsonProtocol { } /** - * Export the [[ApplicationInfo]] to a Json objec. An [[ApplicationInfo]] consists of the + * Export the [[ApplicationInfo]] to a Json object. An [[ApplicationInfo]] consists of the * information of an application. * * @return a Json object containing the following fields: diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 7332c6d54c981..4aa393c514af6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -311,7 +311,7 @@ private[spark] class SparkSubmit extends Logging { // In K8s client mode, when in the driver, add resolved jars early as we might need // them at the submit time for artifact downloading. // For example we might use the dependencies for downloading - // files from a Hadoop Compatible fs eg. S3. In this case the user might pass: + // files from a Hadoop Compatible fs e.g. S3. In this case the user might pass: // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6 if (isKubernetesClusterModeDriver) { val loader = getSubmitClassLoader(sparkConf) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index e5341aff8ce66..e6df260bdeaa3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -722,7 +722,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) /** * Replay the given log file, saving the application in the listing db. - * Visable for testing + * Visible for testing */ private[history] def doMergeApplicationListing( reader: EventLogFileReader, diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala index 1b8c7ff26e9f5..4eb5c15d4ed18 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala @@ -52,7 +52,7 @@ private[history] class HybridStore extends KVStore { // A background thread that dumps data from inMemoryStore to levelDB private var backgroundThread: Thread = null - // A hash map that stores all classes that had been writen to inMemoryStore + // A hash map that stores all classes that had been written to inMemoryStore // Visible for testing private[history] val klassMap = new ConcurrentHashMap[Class[_], Boolean] diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index efb0b2c26d9a9..c81ac778a32d1 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -685,7 +685,7 @@ private[spark] class Executor( // SPARK-20904: Do not report failure to driver if if happened during shut down. Because // libraries may set up shutdown hooks that race with running tasks during shutdown, // spurious failures may occur and can result in improper accounting in the driver (e.g. - // the task failure would not be ignored if the shutdown happened because of premption, + // the task failure would not be ignored if the shutdown happened because of preemption, // instead of an app issue). if (!ShutdownHookManager.inShutdown()) { val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs) @@ -744,7 +744,7 @@ private[spark] class Executor( * sending a Thread.interrupt(), and monitoring the task until it finishes. * * Spark's current task cancellation / task killing mechanism is "best effort" because some tasks - * may not be interruptable or may not respond to their "killed" flags being set. If a significant + * may not be interruptible or may not respond to their "killed" flags being set. If a significant * fraction of a cluster's task slots are occupied by tasks that have been marked as killed but * remain running then this can lead to a situation where new jobs and tasks are starved of * resources that are being used by these zombie tasks. diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala index d98d5e3b81aa0..bddd18adc683e 100644 --- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala +++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala @@ -102,7 +102,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging { * * @param prop the flat list of properties to "unflatten" based on prefixes * @param regex the regex that the prefix has to comply with - * @return an unflatted map, mapping prefix with sub-properties under that prefix + * @return an unflattened map, mapping prefix with sub-properties under that prefix */ def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala index 59b863b89f75a..e9c2974622300 100644 --- a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala +++ b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala @@ -56,7 +56,7 @@ private[spark] class PrometheusServlet( def getMetricsSnapshot(request: HttpServletRequest): String = { import scala.collection.JavaConverters._ - val guagesLabel = """{type="gauges"}""" + val gaugesLabel = """{type="gauges"}""" val countersLabel = """{type="counters"}""" val metersLabel = countersLabel val histogramslabels = """{type="histograms"}""" @@ -65,8 +65,8 @@ private[spark] class PrometheusServlet( val sb = new StringBuilder() registry.getGauges.asScala.foreach { case (k, v) => if (!v.getValue.isInstanceOf[String]) { - sb.append(s"${normalizeKey(k)}Number$guagesLabel ${v.getValue}\n") - sb.append(s"${normalizeKey(k)}Value$guagesLabel ${v.getValue}\n") + sb.append(s"${normalizeKey(k)}Number$gaugesLabel ${v.getValue}\n") + sb.append(s"${normalizeKey(k)}Value$gaugesLabel ${v.getValue}\n") } } registry.getCounters.asScala.foreach { case (k, v) => diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala index 943abae17a911..39f69567981ea 100644 --- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala @@ -173,7 +173,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { if (buckets.length < 2) { throw new IllegalArgumentException("buckets array must have at least two elements") } - // The histogramPartition function computes the partail histogram for a given + // The histogramPartition function computes the partial histogram for a given // partition. The provided bucketFunction determines which bucket in the array // to increment or returns None if there is no bucket. This is done so we can // specialize for uniformly distributed buckets and save the O(log n) binary diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala index 5b1c024257529..3cefcb16d6eb1 100644 --- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala @@ -88,10 +88,10 @@ class OrderedRDDFunctions[K : Ordering : ClassTag, val rddToFilter: RDD[P] = self.partitioner match { case Some(rp: RangePartitioner[K, V]) => - val partitionIndicies = (rp.getPartition(lower), rp.getPartition(upper)) match { + val partitionIndices = (rp.getPartition(lower), rp.getPartition(upper)) match { case (l, u) => Math.min(l, u) to Math.max(l, u) } - PartitionPruningRDD.create(self, partitionIndicies.contains) + PartitionPruningRDD.create(self, partitionIndices.contains) case _ => self } diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 15b00a4496da6..65b39c4b65603 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -327,7 +327,7 @@ abstract class RDD[T: ClassTag]( /** * Internal method to this RDD; will read from cache if applicable, or otherwise compute it. - * This should ''not'' be called by users directly, but is available for implementors of custom + * This should ''not'' be called by users directly, but is available for implementers of custom * subclasses of RDD. */ final def iterator(split: Partition, context: TaskContext): Iterator[T] = { diff --git a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala index d3f979fa8672f..12ef34241f9cb 100644 --- a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala +++ b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala @@ -20,7 +20,7 @@ package org.apache.spark.resource import org.apache.spark.annotation.{Evolving, Since} /** - * A task resource request. This is used in conjuntion with the ResourceProfile to + * A task resource request. This is used in conjunction with the ResourceProfile to * programmatically specify the resources needed for an RDD that will be applied at the * stage level. * diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala index fcb9fe422c0d4..5864e9e2ceac0 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala @@ -254,14 +254,14 @@ private[netty] class NettyRpcEnv( val timeoutCancelable = timeoutScheduler.schedule(new Runnable { override def run(): Unit = { - val remoteReceAddr = if (remoteAddr == null) { + val remoteRecAddr = if (remoteAddr == null) { Try { message.receiver.client.getChannel.remoteAddress() }.toOption.orNull } else { remoteAddr } - onFailure(new TimeoutException(s"Cannot receive any reply from ${remoteReceAddr} " + + onFailure(new TimeoutException(s"Cannot receive any reply from ${remoteRecAddr} " + s"in ${timeout.duration}")) } }, timeout.duration.toNanos, TimeUnit.NANOSECONDS) diff --git a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala index 043c6b90384b4..8f0764ed1a61e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala @@ -45,10 +45,10 @@ private[spark] object BarrierJobAllocationFailed { val ERROR_MESSAGE_RUN_BARRIER_WITH_UNSUPPORTED_RDD_CHAIN_PATTERN = "[SPARK-24820][SPARK-24821]: Barrier execution mode does not allow the following pattern of " + "RDD chain within a barrier stage:\n1. Ancestor RDDs that have different number of " + - "partitions from the resulting RDD (eg. union()/coalesce()/first()/take()/" + + "partitions from the resulting RDD (e.g. union()/coalesce()/first()/take()/" + "PartitionPruningRDD). A workaround for first()/take() can be barrierRdd.collect().head " + "(scala) or barrierRdd.collect()[0] (python).\n" + - "2. An RDD that depends on multiple barrier RDDs (eg. barrierRdd1.zip(barrierRdd2))." + "2. An RDD that depends on multiple barrier RDDs (e.g. barrierRdd1.zip(barrierRdd2))." // Error message when running a barrier stage with dynamic resource allocation enabled. val ERROR_MESSAGE_RUN_BARRIER_WITH_DYN_ALLOCATION = diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 6fb0fb93f253b..02f5bb8cccd52 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -409,9 +409,9 @@ private[spark] class DAGScheduler( /** * Check to make sure we don't launch a barrier stage with unsupported RDD chain pattern. The * following patterns are not supported: - * 1. Ancestor RDDs that have different number of partitions from the resulting RDD (eg. + * 1. Ancestor RDDs that have different number of partitions from the resulting RDD (e.g. * union()/coalesce()/first()/take()/PartitionPruningRDD); - * 2. An RDD that depends on multiple barrier RDDs (eg. barrierRdd1.zip(barrierRdd2)). + * 2. An RDD that depends on multiple barrier RDDs (e.g. barrierRdd1.zip(barrierRdd2)). */ private def checkBarrierStageWithRDDChainPattern(rdd: RDD[_], numTasksInStage: Int): Unit = { if (rdd.isBarrier() && @@ -459,7 +459,7 @@ private[spark] class DAGScheduler( /** * We don't support run a barrier stage with dynamic resource allocation enabled, it shall lead - * to some confusing behaviors (eg. with dynamic resource allocation enabled, it may happen that + * to some confusing behaviors (e.g. with dynamic resource allocation enabled, it may happen that * we acquire some executors (but not enough to launch all the tasks in a barrier stage) and * later release them due to executor idle time expire, and then acquire again). * @@ -1555,7 +1555,7 @@ private[spark] class DAGScheduler( event.reason) if (!stageIdToStage.contains(task.stageId)) { - // The stage may have already finished when we get this event -- eg. maybe it was a + // The stage may have already finished when we get this event -- e.g. maybe it was a // speculative task. It is important that we send the TaskEnd event in any case, so listeners // are properly notified and can chose to handle it. For instance, some listeners are // doing their own accounting and if they don't get the task end event they think diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala index 9bbacea94bf68..c6b8dca3597ba 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala @@ -32,7 +32,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils} * additional logic for exclusion of executors and nodes for individual tasks and stages which * works in concert with the logic here. * - * The tracker needs to deal with a variety of workloads, eg.: + * The tracker needs to deal with a variety of workloads, e.g.: * * * bad user code -- this may lead to many task failures, but that should not count against * individual executors @@ -362,7 +362,7 @@ private[scheduler] class HealthTracker ( * Apply the timeout to individual tasks. This is to prevent one-off failures that are very * spread out in time (and likely have nothing to do with problems on the executor) from * triggering exlusion. However, note that we do *not* remove executors and nodes from - * being excluded as we expire individual task failures -- each have their own timeout. Eg., + * being excluded as we expire individual task failures -- each have their own timeout. E.g., * suppose: * * timeout = 10, maxFailuresPerExec = 2 * * Task 1 fails on exec 1 at time 0 diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 0cfa76583bfbb..914fccc1a67cd 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -216,7 +216,7 @@ private[spark] class TaskSetManager( /** * Track the set of locality levels which are valid given the tasks locality preferences and * the set of currently available executors. This is updated as executors are added and removed. - * This allows a performance optimization, of skipping levels that aren't relevant (eg., skip + * This allows a performance optimization, of skipping levels that aren't relevant (e.g., skip * PROCESS_LOCAL if no tasks could be run PROCESS_LOCAL for the current set of executors). */ private[scheduler] var myLocalityLevels = computeValidLocalityLevels() diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala index a4df0d543ecbe..4ebb7b0defd7f 100644 --- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala +++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala @@ -167,7 +167,7 @@ private[spark] object CryptoStreamUtils extends Logging { } /** - * SPARK-25535. The commons-cryto library will throw InternalError if something goes + * SPARK-25535. The commons-crypto library will throw InternalError if something goes * wrong, and leave bad state behind in the Java wrappers, so it's not safe to use them * afterwards. This wrapper detects that situation and avoids further calls into the * commons-crypto code, while still allowing the underlying streams to be closed. diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 924601f92c5b8..072702b343328 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -1103,7 +1103,7 @@ private[spark] class BlockManager( blockSize: Long): Option[ManagedBuffer] = { val file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId.name) if (file.exists()) { - val mangedBuffer = securityManager.getIOEncryptionKey() match { + val managedBuffer = securityManager.getIOEncryptionKey() match { case Some(key) => // Encrypted blocks cannot be memory mapped; return a special object that does decryption // and provides InputStream / FileRegion implementations for reading the data. @@ -1114,7 +1114,7 @@ private[spark] class BlockManager( val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle") new FileSegmentManagedBuffer(transportConf, file, 0, file.length) } - Some(mangedBuffer) + Some(managedBuffer) } else { None } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 4d565511704d4..eada4b3ee2e38 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -357,7 +357,7 @@ class BlockManagerMasterEndpoint( blockLocations.remove(blockId) logWarning(s"No more replicas available for $blockId !") } else if (proactivelyReplicate && (blockId.isRDD || blockId.isInstanceOf[TestBlockId])) { - // As a heursitic, assume single executor failure to find out the number of replicas that + // As a heuristic, assume single executor failure to find out the number of replicas that // existed before failure val maxReplicas = locations.size + 1 val i = (new Random(blockId.hashCode)).nextInt(locations.size) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index 5f5a08fe0e574..cfe15eb832273 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -85,7 +85,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We } // The timeline library treats contents as HTML, so we have to escape them. We need to add - // extra layers of escaping in order to embed this in a Javascript string literal. + // extra layers of escaping in order to embed this in a JavaScript string literal. val escapedDesc = Utility.escape(jobDescription) val jsEscapedDescForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedDesc)) val jsEscapedDescForLabel = StringEscapeUtils.escapeEcmaScript(escapedDesc) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala index 19eccc5209b8e..c40e1bc248a49 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -68,7 +68,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP .getOrElse(System.currentTimeMillis()) // The timeline library treats contents as HTML, so we have to escape them. We need to add - // extra layers of escaping in order to embed this in a Javascript string literal. + // extra layers of escaping in order to embed this in a JavaScript string literal. val escapedName = Utility.escape(name) val jsEscapedNameForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedName)) val jsEscapedNameForLabel = StringEscapeUtils.escapeEcmaScript(escapedName) diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala index 6ffd6605f75b8..7e2b9c72ad91b 100644 --- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala @@ -285,7 +285,7 @@ private[spark] object ClosureCleaner extends Logging { logDebug(s" + outermost object is a closure, so we clone it: ${outermostClass}") } else if (outermostClass.getName.startsWith("$line")) { // SPARK-14558: if the outermost object is a REPL line object, we should clone - // and clean it as it may carray a lot of unnecessary information, + // and clean it as it may carry a lot of unnecessary information, // e.g. hadoop conf, spark conf, etc. logDebug(s" + outermost object is a REPL line object, so we clone it:" + s" ${outermostClass}") diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 71a310a4279ad..accf3d7c0d333 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -376,7 +376,7 @@ private[spark] object Utils extends Logging { * This returns a new InputStream which contains the same data as the original input stream. * It may be entirely on in-memory buffer, or it may be a combination of in-memory data, and then * continue to read from the original stream. The only real use of this is if the original input - * stream will potentially detect corruption while the data is being read (eg. from compression). + * stream will potentially detect corruption while the data is being read (e.g. from compression). * This allows for an eager check of corruption in the first maxSize bytes of data. * * @return An InputStream which includes all data from the original stream (combining buffered @@ -1067,20 +1067,20 @@ private[spark] object Utils extends Logging { } // checks if the hostport contains IPV6 ip and parses the host, port if (hostPort != null && hostPort.split(":").length > 2) { - val indx: Int = hostPort.lastIndexOf("]:") - if (-1 == indx) { + val index: Int = hostPort.lastIndexOf("]:") + if (-1 == index) { return setDefaultPortValue } - val port = hostPort.substring(indx + 2).trim() - val retval = (hostPort.substring(0, indx + 1).trim(), if (port.isEmpty) 0 else port.toInt) + val port = hostPort.substring(index + 2).trim() + val retval = (hostPort.substring(0, index + 1).trim(), if (port.isEmpty) 0 else port.toInt) hostPortParseResults.putIfAbsent(hostPort, retval) } else { - val indx: Int = hostPort.lastIndexOf(':') - if (-1 == indx) { + val index: Int = hostPort.lastIndexOf(':') + if (-1 == index) { return setDefaultPortValue } - val port = hostPort.substring(indx + 1).trim() - val retval = (hostPort.substring(0, indx).trim(), if (port.isEmpty) 0 else port.toInt) + val port = hostPort.substring(index + 1).trim() + val retval = (hostPort.substring(0, index).trim(), if (port.isEmpty) 0 else port.toInt) hostPortParseResults.putIfAbsent(hostPort, retval) } @@ -2854,11 +2854,11 @@ private[spark] object Utils extends Logging { if (lastDollarIndex < s.length - 1) { // The last char is not a dollar sign if (lastDollarIndex == -1 || !s.contains("$iw")) { - // The name does not have dollar sign or is not an intepreter + // The name does not have dollar sign or is not an interpreter // generated class, so we should return the full string s } else { - // The class name is intepreter generated, + // The class name is interpreter generated, // return the part after the last dollar sign // This is the same behavior as getClass.getSimpleName s.substring(lastDollarIndex + 1) diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala index 2c3730de08b5b..8635f1a3d702e 100644 --- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala @@ -193,7 +193,7 @@ private[spark] object ChunkedByteBuffer { length: Long): ChunkedByteBuffer = { // We do *not* memory map the file, because we may end up putting this into the memory store, // and spark currently is not expecting memory-mapped buffers in the memory store, it conflicts - // with other parts that manage the lifecyle of buffers and dispose them. See SPARK-25422. + // with other parts that manage the lifecycle of buffers and dispose them. See SPARK-25422. val is = new FileInputStream(file) ByteStreams.skipFully(is, offset) val in = new LimitedInputStream(is, length) diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java index ee8e38c24b47f..df1d306e628a9 100644 --- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java +++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java @@ -68,10 +68,10 @@ public class UnsafeShuffleWriterSuite { static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096; - static final int NUM_PARTITITONS = 4; + static final int NUM_PARTITIONS = 4; TestMemoryManager memoryManager; TaskMemoryManager taskMemoryManager; - final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITITONS); + final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITIONS); File mergedOutputFile; File tempDir; long[] partitionSizesInMergedFile; @@ -194,7 +194,7 @@ private void assertSpillFilesWereCleanedUp() { private List> readRecordsFromFile() throws IOException { final ArrayList> recordsList = new ArrayList<>(); long startOffset = 0; - for (int i = 0; i < NUM_PARTITITONS; i++) { + for (int i = 0; i < NUM_PARTITIONS; i++) { final long partitionSize = partitionSizesInMergedFile[i]; if (partitionSize > 0) { FileInputStream fin = new FileInputStream(mergedOutputFile); @@ -253,7 +253,7 @@ public void writeEmptyIterator() throws Exception { assertTrue(mapStatus.isDefined()); assertTrue(mergedOutputFile.exists()); assertEquals(0, spillFilesCreated.size()); - assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile); + assertArrayEquals(new long[NUM_PARTITIONS], partitionSizesInMergedFile); assertEquals(0, taskMetrics.shuffleWriteMetrics().recordsWritten()); assertEquals(0, taskMetrics.shuffleWriteMetrics().bytesWritten()); assertEquals(0, taskMetrics.diskBytesSpilled()); @@ -264,7 +264,7 @@ public void writeEmptyIterator() throws Exception { public void writeWithoutSpilling() throws Exception { // In this example, each partition should have exactly one record: final ArrayList> dataToWrite = new ArrayList<>(); - for (int i = 0; i < NUM_PARTITITONS; i++) { + for (int i = 0; i < NUM_PARTITIONS; i++) { dataToWrite.add(new Tuple2<>(i, i)); } final UnsafeShuffleWriter writer = createWriter(true); diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java index dbaca71c5fdc3..e73ac0e9fb7a6 100644 --- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java @@ -1518,7 +1518,7 @@ public void testAsyncActionErrorWrapping() throws Exception { JavaFutureAction future = rdd.map(new BuggyMapFunction<>()).countAsync(); try { future.get(2, TimeUnit.SECONDS); - fail("Expected future.get() for failed job to throw ExcecutionException"); + fail("Expected future.get() for failed job to throw ExecutionException"); } catch (ExecutionException ee) { assertTrue(Throwables.getStackTraceAsString(ee).contains("Custom exception!")); } diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index 21090e98ea285..e42df0821589b 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -635,12 +635,12 @@ class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext { // Verify that RDD is checkpointed assert(rdd.firstParent.isInstanceOf[ReliableCheckpointRDD[_]]) val checkpointedRDD = rdd.firstParent.asInstanceOf[ReliableCheckpointRDD[_]] - val partiton = checkpointedRDD.partitions(0) - assert(!checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton)) + val partition = checkpointedRDD.partitions(0) + assert(!checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partition)) - val preferredLoc = checkpointedRDD.preferredLocations(partiton) - assert(checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton)) - assert(preferredLoc == checkpointedRDD.cachedPreferredLocations.get(partiton)) + val preferredLoc = checkpointedRDD.preferredLocations(partition) + assert(checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partition)) + assert(preferredLoc == checkpointedRDD.cachedPreferredLocations.get(partition)) } } @@ -653,7 +653,7 @@ class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext { val rdd = sc.makeRDD(1 to 200, numSlices = 4).repartition(1).mapPartitions { iter => iter.map { i => if (i > 100 && TaskContext.get().stageAttemptNumber() == 0) { - // throw new SparkException("Make first attemp failed.") + // throw new SparkException("Make first attempt failed.") // Throw FetchFailedException to explicitly trigger stage resubmission. // A normal exception will only trigger task resubmission in the same stage. throw new FetchFailedException(null, 0, 0L, 0, 0, "Fake") diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala index 81530a8fda84d..5434e82c95b1b 100644 --- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala @@ -368,7 +368,7 @@ class CleanerTester( val toBeCleanedRDDIds = new HashSet[Int] ++= rddIds val toBeCleanedShuffleIds = new HashSet[Int] ++= shuffleIds - val toBeCleanedBroadcstIds = new HashSet[Long] ++= broadcastIds + val toBeCleanedBroadcastIds = new HashSet[Long] ++= broadcastIds val toBeCheckpointIds = new HashSet[Long] ++= checkpointIds val isDistributed = !sc.isLocal @@ -384,7 +384,7 @@ class CleanerTester( } def broadcastCleaned(broadcastId: Long): Unit = { - toBeCleanedBroadcstIds.synchronized { toBeCleanedBroadcstIds -= broadcastId } + toBeCleanedBroadcastIds.synchronized { toBeCleanedBroadcastIds -= broadcastId } logInfo("Broadcast " + broadcastId + " cleaned") } @@ -508,8 +508,8 @@ class CleanerTester( val s2 = toBeCleanedShuffleIds.synchronized { toBeCleanedShuffleIds.toSeq.sorted.mkString("[", ", ", "]") } - val s3 = toBeCleanedBroadcstIds.synchronized { - toBeCleanedBroadcstIds.toSeq.sorted.mkString("[", ", ", "]") + val s3 = toBeCleanedBroadcastIds.synchronized { + toBeCleanedBroadcastIds.toSeq.sorted.mkString("[", ", ", "]") } s""" |\tRDDs = $s1 @@ -521,7 +521,7 @@ class CleanerTester( private def isAllCleanedUp = toBeCleanedRDDIds.synchronized { toBeCleanedRDDIds.isEmpty } && toBeCleanedShuffleIds.synchronized { toBeCleanedShuffleIds.isEmpty } && - toBeCleanedBroadcstIds.synchronized { toBeCleanedBroadcstIds.isEmpty } && + toBeCleanedBroadcastIds.synchronized { toBeCleanedBroadcastIds.isEmpty } && toBeCheckpointIds.synchronized { toBeCheckpointIds.isEmpty } private def getRDDBlocks(rddId: Int): Seq[BlockId] = { diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index d1edb80e40b21..c1269a9c91049 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -268,7 +268,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { test("add executors multiple profiles initial num same as needed") { // test when the initial number of executors equals the number needed for the first - // stage using a non default profile to make sure we request the intitial number + // stage using a non default profile to make sure we request the initial number // properly. Here initial is 2, each executor in ResourceProfile 1 can have 2 tasks // per executor, and start a stage with 4 tasks, which would need 2 executors. val clock = new ManualClock(8888L) diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index e9ee6b5dfb665..f953bf4043f33 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -170,7 +170,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { val nums = sc.makeRDD(1 to 3).map(x => (x, "a" * x)) // (1,a), (2,aa), (3,aaa) nums.saveAsSequenceFile(outputDir) // Similar to the tests above, we read a SequenceFile, but this time we pass type params - // that are convertable to Writable instead of calling sequenceFile[IntWritable, Text] + // that are convertible to Writable instead of calling sequenceFile[IntWritable, Text] val output1 = sc.sequenceFile[Int, String](outputDir) assert(output1.collect().toList === List((1, "a"), (2, "aa"), (3, "aaa"))) // Also try having one type be a subclass of Writable and one not diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index e97b9d5d6bea6..eff4fd20d7fca 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -21,7 +21,7 @@ import java.io.{File, FileOutputStream, OutputStream} /** * A base class for generate benchmark results to a file. - * For JDK9+, JDK major version number is added to the file names to distingush the results. + * For JDK9+, JDK major version number is added to the file names to distinguish the results. */ abstract class BenchmarkBase { var output: Option[OutputStream] = None diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 0b0754be2f56f..3b8677742ca16 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -926,8 +926,8 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging { oldProvider.listing.setMetadata(meta) oldProvider.stop() - val mistatchedVersionProvider = new FsHistoryProvider(conf) - assert(mistatchedVersionProvider.listing.count(classOf[ApplicationInfoWrapper]) === 0) + val mismatchedVersionProvider = new FsHistoryProvider(conf) + assert(mismatchedVersionProvider.listing.count(classOf[ApplicationInfoWrapper]) === 0) } test("invalidate cached UI") { diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index a46799df069d6..b1b97a61ed1f0 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -784,7 +784,7 @@ class MasterSuite extends SparkFunSuite var worker: MockExecutorLaunchFailWorker = null try { val conf = new SparkConf() - // SPARK-32250: When running test on Github Action machine, the available processors in JVM + // SPARK-32250: When running test on GitHub Action machine, the available processors in JVM // is only 2, while on Jenkins it's 32. For this specific test, 2 available processors, which // also decides number of threads in Dispatcher, is not enough to consume the messages. In // the worst situation, MockExecutorLaunchFailWorker would occupy these 2 threads for diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala index 5bbd60f99f77e..8ed861ad34ea7 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala @@ -342,7 +342,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter { testWorkDirCleanupAndRemoveMetadataWithConfig(true) } - test("WorkdDirCleanup cleans only app dirs when" + + test("WorkDirCleanup cleans only app dirs when" + "spark.shuffle.service.db.enabled=false") { testWorkDirCleanupAndRemoveMetadataWithConfig(false) } diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 1326ae3c11a06..5b868604ecf94 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -552,7 +552,7 @@ class ExecutorSuite extends SparkFunSuite if (poll) { executor.metricsPoller.poll() } - executor.killAllTasks(true, "Killed task, eg. because of speculative execution") + executor.killAllTasks(true, "Killed task, e.g. because of speculative execution") } else { timedOut.set(true) } diff --git a/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala b/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala index 2bd32fc927e21..778f748f83950 100644 --- a/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala +++ b/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala @@ -75,7 +75,7 @@ class FileCommitProtocolInstantiationSuite extends SparkFunSuite { /** * Create a classic two-arg protocol instance. - * @param dynamic dyanmic partitioning mode + * @param dynamic dynamic partitioning mode * @return the instance */ private def instantiateClassic(dynamic: Boolean): ClassicConstructorCommitProtocol = { @@ -88,7 +88,7 @@ class FileCommitProtocolInstantiationSuite extends SparkFunSuite { /** * Create a three-arg protocol instance. - * @param dynamic dyanmic partitioning mode + * @param dynamic dynamic partitioning mode * @return the instance */ private def instantiateNew( diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala index 330347299ab56..905bb8110736d 100644 --- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala @@ -213,7 +213,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext } // Computing the amount of bytes read for a cartesian operation is a little involved. - // Cartesian interleaves reads between two partitions eg. p1 and p2. + // Cartesian interleaves reads between two partitions e.g. p1 and p2. // Here are the steps: // 1) First it creates an iterator for p1 // 2) Creates an iterator for p2 diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala index 182c3c09e0524..c8a8f37212a82 100644 --- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala @@ -88,7 +88,7 @@ class NettyBlockTransferServiceSuite } test("SPARK-27637: test fetch block with executor dead") { - implicit val exectionContext = ExecutionContext.global + implicit val executionContext = ExecutionContext.global val port = 17634 + Random.nextInt(10000) logInfo("random port for test: " + port) diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index 2de4b109e40e9..a669993352fe7 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.mapred._ import org.apache.hadoop.mapreduce.{Job => NewJob, JobContext => NewJobContext, OutputCommitter => NewOutputCommitter, OutputFormat => NewOutputFormat, - RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttempContext} + RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttemptContext} import org.apache.hadoop.util.Progressable import org.scalatest.Assertions @@ -892,7 +892,7 @@ class FakeOutputFormat() extends OutputFormat[Integer, Integer]() { */ class NewFakeWriter extends NewRecordWriter[Integer, Integer] { - def close(p1: NewTaskAttempContext): Unit = () + def close(p1: NewTaskAttemptContext): Unit = () def write(p1: Integer, p2: Integer): Unit = () @@ -901,24 +901,24 @@ class NewFakeWriter extends NewRecordWriter[Integer, Integer] { class NewFakeCommitter extends NewOutputCommitter { def setupJob(p1: NewJobContext): Unit = () - def needsTaskCommit(p1: NewTaskAttempContext): Boolean = false + def needsTaskCommit(p1: NewTaskAttemptContext): Boolean = false - def setupTask(p1: NewTaskAttempContext): Unit = () + def setupTask(p1: NewTaskAttemptContext): Unit = () - def commitTask(p1: NewTaskAttempContext): Unit = () + def commitTask(p1: NewTaskAttemptContext): Unit = () - def abortTask(p1: NewTaskAttempContext): Unit = () + def abortTask(p1: NewTaskAttemptContext): Unit = () } class NewFakeFormat() extends NewOutputFormat[Integer, Integer]() { def checkOutputSpecs(p1: NewJobContext): Unit = () - def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = { + def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = { new NewFakeWriter() } - def getOutputCommitter(p1: NewTaskAttempContext): NewOutputCommitter = { + def getOutputCommitter(p1: NewTaskAttemptContext): NewOutputCommitter = { new NewFakeCommitter() } } @@ -958,7 +958,7 @@ class FakeFormatWithCallback() extends FakeOutputFormat { } class NewFakeWriterWithCallback extends NewFakeWriter { - override def close(p1: NewTaskAttempContext): Unit = { + override def close(p1: NewTaskAttemptContext): Unit = { FakeWriterWithCallback.calledBy += "close" } @@ -972,7 +972,7 @@ class NewFakeWriterWithCallback extends NewFakeWriter { } class NewFakeFormatWithCallback() extends NewFakeFormat { - override def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = { + override def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = { new NewFakeWriterWithCallback() } } @@ -982,27 +982,27 @@ class YetAnotherFakeCommitter extends NewOutputCommitter with Assertions { JobID.jobid = j.getJobID().getId } - def needsTaskCommit(t: NewTaskAttempContext): Boolean = false + def needsTaskCommit(t: NewTaskAttemptContext): Boolean = false - def setupTask(t: NewTaskAttempContext): Unit = { + def setupTask(t: NewTaskAttemptContext): Unit = { val jobId = t.getTaskAttemptID().getJobID().getId assert(jobId === JobID.jobid) } - def commitTask(t: NewTaskAttempContext): Unit = {} + def commitTask(t: NewTaskAttemptContext): Unit = {} - def abortTask(t: NewTaskAttempContext): Unit = {} + def abortTask(t: NewTaskAttemptContext): Unit = {} } class YetAnotherFakeFormat() extends NewOutputFormat[Integer, Integer]() { def checkOutputSpecs(j: NewJobContext): Unit = {} - def getRecordWriter(t: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = { + def getRecordWriter(t: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = { new NewFakeWriter() } - def getOutputCommitter(t: NewTaskAttempContext): NewOutputCommitter = { + def getOutputCommitter(t: NewTaskAttemptContext): NewOutputCommitter = { new YetAnotherFakeCommitter() } } @@ -1021,7 +1021,7 @@ class ConfigTestFormat() extends NewFakeFormat() with Configurable { def getConf: Configuration = null - override def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = { + override def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = { assert(setConfCalled, "setConf was never called") super.getRecordWriter(p1) } diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index 8962fd6740bf6..df8ac2ef744cd 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -1102,7 +1102,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually { } } - test("RDD.partitions() fails fast when partitions indicies are incorrect (SPARK-13021)") { + test("RDD.partitions() fails fast when partitions indices are incorrect (SPARK-13021)") { class BadRDD[T: ClassTag](prev: RDD[T]) extends RDD[T](prev) { override def compute(part: Partition, context: TaskContext): Iterator[T] = { diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala index e8e8682e20ed4..eac45e6ac5801 100644 --- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala @@ -221,7 +221,7 @@ class ResourceUtilsSuite extends SparkFunSuite val conf = new SparkConf assume(!(Utils.isWindows)) withTempDir { dir => - val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDisocveryScript", + val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript", """{"name": "gpu", "addresses": ["0", "1"]}""") conf.set(DRIVER_GPU_ID.amountConf, "2") conf.set(DRIVER_GPU_ID.discoveryScriptConf, gpuDiscovery) diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala index c2730f90ed982..fe6d0db837bda 100644 --- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala @@ -73,7 +73,7 @@ class NettyRpcEnvSuite extends RpcEnvSuite with MockitoSugar with TimeLimits { val nettyEnv = env.asInstanceOf[NettyRpcEnv] val client = mock[TransportClient] - val senderAddress = RpcAddress("locahost", 12345) + val senderAddress = RpcAddress("localhost", 12345) val receiverAddress = RpcEndpointAddress("localhost", 54321, "test") val receiver = new NettyRpcEndpointRef(nettyEnv.conf, receiverAddress, nettyEnv) diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 58aa246b7358f..194e0dfe312d5 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -2569,7 +2569,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti val newTaskSet = taskSets(1) // 2 tasks should have been re-submitted, for tasks 0 and 1 (which ran on hostA). assert(newTaskSet.tasks.size === 2) - // Complete task 0 from the original task set (i.e., not hte one that's currently active). + // Complete task 0 from the original task set (i.e., not the one that's currently active). // This should still be counted towards the job being complete (but there's still one // outstanding task). runEvent(makeCompletionEvent(newTaskSet.tasks(0), Success, makeMapStatus("hostB", 2))) @@ -3057,7 +3057,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti assertResultStageFailToRollback(shuffleMapRdd) } - private def assertResultStageNotRollbacked(mapRdd: MyRDD): Unit = { + private def assertResultStageNotRolledBack(mapRdd: MyRDD): Unit = { val shuffleDep = new ShuffleDependency(mapRdd, new HashPartitioner(2)) val shuffleId = shuffleDep.shuffleId val finalRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker) @@ -3097,7 +3097,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true) shuffleMapRdd.checkpoint() shuffleMapRdd.doCheckpoint() - assertResultStageNotRollbacked(shuffleMapRdd) + assertResultStageNotRolledBack(shuffleMapRdd) } } diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala index e6fbf9b09d43d..cb50c7c959754 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala @@ -255,7 +255,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp /* * This is a dummy input stream that wraps another input stream but ends prematurely when - * reading at the specified position, throwing an EOFExeption. + * reading at the specified position, throwing an EOFException. */ private class EarlyEOFInputStream(in: InputStream, failAtPos: Int) extends InputStream { private val countDown = new AtomicInteger(failAtPos) diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala index 0874163b0e946..88d2868b957f9 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala @@ -44,7 +44,7 @@ import org.apache.spark.util.{CallSite, ThreadUtils, Utils} * TaskSetManagers. * * Test cases are configured by providing a set of jobs to submit, and then simulating interaction - * with spark's executors via a mocked backend (eg., task completion, task failure, executors + * with spark's executors via a mocked backend (e.g., task completion, task failure, executors * disconnecting, etc.). */ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends SparkFunSuite @@ -372,7 +372,7 @@ private[spark] abstract class MockBackend( /** * Accessed by both scheduling and backend thread, so should be protected by this. - * Most likely the only thing that needs to be protected are the inidividual ExecutorTaskStatus, + * Most likely the only thing that needs to be protected are the individual ExecutorTaskStatus, * but for simplicity in this mock just lock the whole backend. */ def executorIdToExecutor: Map[String, ExecutorTaskStatus] @@ -535,8 +535,8 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor */ testScheduler("super simple job") { def runBackend(): Unit = { - val (taskDescripition, _) = backend.beginTask() - backend.taskSuccess(taskDescripition, 42) + val (taskDescription, _) = backend.beginTask() + backend.taskSuccess(taskDescription, 42) } withBackend(runBackend _) { val jobFuture = submit(new MockRDD(sc, 10, Nil), (0 until 10).toArray) diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index a4a84b0e89809..d72744c5cc348 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -571,9 +571,9 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match } } - test("event queue size can be configued through spark conf") { + test("event queue size can be configured through spark conf") { // configure the shared queue size to be 1, event log queue size to be 2, - // and listner bus event queue size to be 5 + // and listener bus event queue size to be 5 val conf = new SparkConf(false) .set(LISTENER_BUS_EVENT_QUEUE_CAPACITY, 5) .set(s"spark.scheduler.listenerbus.eventqueue.${SHARED_QUEUE}.capacity", "1") @@ -593,7 +593,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match // check the size of shared queue is 1 as configured assert(bus.getQueueCapacity(SHARED_QUEUE) == Some(1)) // no specific size of status queue is configured, - // it shoud use the LISTENER_BUS_EVENT_QUEUE_CAPACITY + // it should use the LISTENER_BUS_EVENT_QUEUE_CAPACITY assert(bus.getQueueCapacity(APP_STATUS_QUEUE) == Some(5)) // check the size of event log queue is 5 as configured assert(bus.getQueueCapacity(EVENT_LOG_QUEUE) == Some(2)) diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala index a760dda3897df..3bf6cc226c0aa 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala @@ -377,8 +377,8 @@ class TaskSetManagerSuite // offers not accepted due to task set zombies are not delay schedule rejects manager.isZombie = true - val (taskDesciption, delayReject) = manager.resourceOffer("exec2", "host2", ANY) - assert(taskDesciption.isEmpty) + val (taskDescription, delayReject) = manager.resourceOffer("exec2", "host2", ANY) + assert(taskDescription.isEmpty) assert(delayReject === false) manager.isZombie = false @@ -1322,7 +1322,7 @@ class TaskSetManagerSuite test("SPARK-19868: DagScheduler only notified of taskEnd when state is ready") { // dagScheduler.taskEnded() is async, so it may *seem* ok to call it before we've set all - // appropriate state, eg. isZombie. However, this sets up a race that could go the wrong way. + // appropriate state, e.g. isZombie. However, this sets up a race that could go the wrong way. // This is a super-focused regression test which checks the zombie state as soon as // dagScheduler.taskEnded() is called, to ensure we haven't introduced a race. sc = new SparkContext("local", "test") diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala index 6ca1109791c35..a251c164a79ca 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala @@ -234,7 +234,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { // Send two executor metrics update. Only update one metric to avoid a lot of boilerplate code. // The tasks are distributed among the two executors, so the executor-level metrics should - // hold half of the cummulative value of the metric being updated. + // hold half of the cumulative value of the metric being updated. Seq(1L, 2L).foreach { value => s1Tasks.foreach { task => val accum = new AccumulableInfo(1L, Some(InternalAccumulator.MEMORY_BYTES_SPILLED), diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 144489c5f7922..44b6f1b82e75a 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -1712,12 +1712,12 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE val externalShuffleServicePort = StorageUtils.externalShuffleServicePort(conf) val port = store.blockTransferService.port val rack = Some("rack") - val blockManagerWithTopolgyInfo = BlockManagerId( + val blockManagerWithTopologyInfo = BlockManagerId( store.blockManagerId.executorId, store.blockManagerId.host, store.blockManagerId.port, rack) - store.blockManagerId = blockManagerWithTopolgyInfo + store.blockManagerId = blockManagerWithTopologyInfo val locations = Seq( BlockManagerId("executor4", otherHost, externalShuffleServicePort, rack), BlockManagerId("executor3", otherHost, port, rack), diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 4cd1fc19f1484..7640c17166222 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -506,9 +506,9 @@ class JsonProtocolSuite extends SparkFunSuite { val oldExecutorMetricsJson = JsonProtocol.executorMetricsToJson(executorMetrics) .removeField( _._1 == "MappedPoolMemory") - val exepectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L, + val expectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L, 78L, 89L, 90L, 123L, 456L, 0L, 40L, 20L, 20L, 10L, 20L, 10L)) - assertEquals(exepectedExecutorMetrics, + assertEquals(expectedExecutorMetrics, JsonProtocol.executorMetricsFromJson(oldExecutorMetricsJson)) } @@ -978,8 +978,8 @@ private[spark] object JsonProtocolSuite extends Assertions { private val stackTrace = { Array[StackTraceElement]( new StackTraceElement("Apollo", "Venus", "Mercury", 42), - new StackTraceElement("Afollo", "Vemus", "Mercurry", 420), - new StackTraceElement("Ayollo", "Vesus", "Blackberry", 4200) + new StackTraceElement("Afollo", "Vemus", "Mercurry", 420), /* odd spellings intentional */ + new StackTraceElement("Ayollo", "Vesus", "Blackberry", 4200) /* odd spellings intentional */ ) } diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala index 6183ba9faa6b4..d669f2c655abb 100644 --- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala @@ -94,7 +94,7 @@ class SizeEstimatorSuite override def beforeEach(): Unit = { super.beforeEach() // Set the arch to 64-bit and compressedOops to true so that SizeEstimator - // provides identical results accross all systems in these tests. + // provides identical results across all systems in these tests. reinitializeSizeEstimator("amd64", "true") } diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb index 6b4b1c652a81b..7d0e78738095e 100644 --- a/docs/_plugins/include_example.rb +++ b/docs/_plugins/include_example.rb @@ -66,10 +66,10 @@ def render(context) rendered_code + hint end - # Trim the code block so as to have the same indention, regardless of their positions in the + # Trim the code block so as to have the same indentation, regardless of their positions in the # code file. def trim_codeblock(lines) - # Select the minimum indention of the current code block. + # Select the minimum indentation of the current code block. min_start_spaces = lines .select { |l| l.strip.size !=0 } .map { |l| l[/\A */].size } diff --git a/docs/building-spark.md b/docs/building-spark.md index 73c527b7a5ed6..5106f2abd4187 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -273,7 +273,7 @@ Enable the profile (e.g. 2.13): # For sbt ./build/sbt -Pscala-2.13 compile -## Running Jenkins tests with Github Enterprise +## Running Jenkins tests with GitHub Enterprise To run tests with Jenkins: diff --git a/docs/configuration.md b/docs/configuration.md index 14ff38dac9b13..76494b04c9279 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2170,7 +2170,7 @@ Apart from these, the following properties are also available, and may be useful 120s The timeout in seconds to wait to acquire a new executor and schedule a task before aborting a - TaskSet which is unschedulable because all executors are exluded due to task failures. + TaskSet which is unschedulable because all executors are excluded due to task failures. 2.4.1 diff --git a/docs/css/main.css b/docs/css/main.css index 8b279a157c2b6..271113c904d26 100755 --- a/docs/css/main.css +++ b/docs/css/main.css @@ -254,7 +254,7 @@ a:hover code { position: relative; background-color: #FFF; max-width: 914px; - line-height: 1.6; /* Inspired by Github's wiki style */ + line-height: 1.6; /* Inspired by GitHub's wiki style */ padding-left: 15px; } @@ -263,7 +263,7 @@ a:hover code { position: relative; background-color: #FFF; max-width: 914px; - line-height: 1.6; /* Inspired by Github's wiki style */ + line-height: 1.6; /* Inspired by GitHub's wiki style */ padding-left: 30px; min-height: 100vh; } diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md index 50c9366a0999f..a1026669dc4fd 100644 --- a/docs/graphx-programming-guide.md +++ b/docs/graphx-programming-guide.md @@ -571,7 +571,7 @@ messages to the source and destination attributes. Think of `sendMsg` as the reduce function in map-reduce. -The [`aggregateMessages`][Graph.aggregateMessages] operator returns a `VertexRDD[Msg]` +The [`aggregateMessages`][Graph.aggregateMessages] operator returns an `VertexRDD[Msg]` containing the aggregate message (of type `Msg`) destined to each vertex. Vertices that did not receive a message are not included in the returned `VertexRDD`[VertexRDD]. @@ -874,7 +874,7 @@ change the `VertexId` thereby enabling the same `HashMap` data structures to be `HashMap` and implement the join by linear scan rather than costly point lookups. The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD`[VertexRDD] from an -`RDD[(VertexId, A)]`. Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices, +`RDD[(VertexId, A)]`. Conceptually, if I have constructed an `VertexRDD[B]` over a set of vertices, *which is a super-set* of the vertices in some `RDD[(VertexId, A)]` then I can reuse the index to both aggregate and then subsequently index the `RDD[(VertexId, A)]`. For example: diff --git a/docs/ml-migration-guide.md b/docs/ml-migration-guide.md index 4e6d68f5a8cf4..43b8de83a9d8c 100644 --- a/docs/ml-migration-guide.md +++ b/docs/ml-migration-guide.md @@ -281,7 +281,7 @@ Several deprecated methods were removed in the `spark.mllib` and `spark.ml` pack * `weights` in `LinearRegression` and `LogisticRegression` in `spark.ml` * `setMaxNumIterations` in `mllib.optimization.LBFGS` (marked as `DeveloperApi`) * `treeReduce` and `treeAggregate` in `mllib.rdd.RDDFunctions` (these functions are available on `RDD`s directly, and were marked as `DeveloperApi`) -* `defaultStategy` in `mllib.tree.configuration.Strategy` +* `defaultStrategy` in `mllib.tree.configuration.Strategy` * `build` in `mllib.tree.Node` * libsvm loaders for multiclass and load/save labeledData methods in `mllib.util.MLUtils` diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 4cb2e259ccfbc..cc0c0e39e66f8 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -189,7 +189,7 @@ Refer to the [`PowerIterationClustering` Scala docs](api/scala/org/apache/spark/ [`PowerIterationClustering`](api/java/org/apache/spark/mllib/clustering/PowerIterationClustering.html) implements the PIC algorithm. -It takes an `JavaRDD` of `(srcId: Long, dstId: Long, similarity: Double)` tuples representing the +It takes a `JavaRDD` of `(srcId: Long, dstId: Long, similarity: Double)` tuples representing the affinity matrix. Calling `PowerIterationClustering.run` returns a [`PowerIterationClusteringModel`](api/java/org/apache/spark/mllib/clustering/PowerIterationClusteringModel.html) diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index 6d3b1a599d48b..ce4e6b8e05814 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -643,7 +643,7 @@ entries = sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), Matrix # - or using (long, long, float) tuples: entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)]) -# Create an CoordinateMatrix from an RDD of MatrixEntries. +# Create a CoordinateMatrix from an RDD of MatrixEntries. mat = CoordinateMatrix(entries) # Get its size. diff --git a/docs/monitoring.md b/docs/monitoring.md index 15a6cbd910210..c6105188f07ec 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -421,7 +421,7 @@ to handle the Spark Context setup and tear down. In addition to viewing the metrics in the UI, they are also available as JSON. This gives developers an easy way to create new visualizations and monitoring tools for Spark. The JSON is available for -both running applications, and in the history server. The endpoints are mounted at `/api/v1`. Eg., +both running applications, and in the history server. The endpoints are mounted at `/api/v1`. For example, for the history server, they would typically be accessible at `http://:18080/api/v1`, and for a running application, at `http://localhost:4040/api/v1`. @@ -951,11 +951,11 @@ These endpoints have been strongly versioned to make it easier to develop applic * Individual fields will never be removed for any given endpoint * New endpoints may be added * New fields may be added to existing endpoints -* New versions of the api may be added in the future as a separate endpoint (eg., `api/v2`). New versions are *not* required to be backwards compatible. +* New versions of the api may be added in the future as a separate endpoint (e.g., `api/v2`). New versions are *not* required to be backwards compatible. * Api versions may be dropped, but only after at least one minor release of co-existing with a new api version. Note that even when examining the UI of running applications, the `applications/[app-id]` portion is -still required, though there is only one application available. Eg. to see the list of jobs for the +still required, though there is only one application available. E.g. to see the list of jobs for the running app, you would go to `http://localhost:4040/api/v1/applications/[app-id]/jobs`. This is to keep the paths consistent in both modes. diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5ec7a2c6f0bf4..71b7df8176d1b 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1079,7 +1079,7 @@ See the [configuration page](configuration.html) for information on Spark config 0.1 This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, and various systems processes. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs. - This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This prempts this error with a higher default. + This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default. 2.4.0 @@ -1402,4 +1402,4 @@ Kubernetes does not tell Spark the addresses of the resources allocated to each ### Stage Level Scheduling Overview Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires spark.dynamicAllocation.shuffleTracking.enabled to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config spark.dynamicAllocation.shuffleTracking.timeout to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed. -Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propogated to custom ResourceProfiles. +Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propagated to custom ResourceProfiles. diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 80591bd08650a..8c0bac1815bbd 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -857,7 +857,7 @@ See the [configuration page](configuration.html) for information on Spark config host Provides support for the `local:///` scheme to reference the app jar resource in cluster mode. - If user uses a local resource (`local:///path/to/jar`) and the config option is not used it defaults to `host` eg. + If user uses a local resource (`local:///path/to/jar`) and the config option is not used it defaults to `host` e.g. the mesos fetcher tries to get the resource from the host's file system. If the value is unknown it prints a warning msg in the dispatcher logs and defaults to `host`. If the value is `container` then spark submit in the container will use the jar in the container's path: diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 73c4930dadbd5..797d18a0d4139 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -644,7 +644,7 @@ YARN does not tell Spark the addresses of the resources allocated to each contai # Stage Level Scheduling Overview Stage level scheduling is supported on YARN when dynamic allocation is enabled. One thing to note that is YARN specific is that each ResourceProfile requires a different container priority on YARN. The mapping is simply the ResourceProfile id becomes the priority, on YARN lower numbers are higher priority. This means that profiles created earlier will have a higher priority in YARN. Normally this won't matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind. -Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the spark.yarn.executor.resource. config. Those configs are only used in the base default profile though and do not get propogated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in spark.yarn.executor.resource. plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types yarn.io/gpu) and yarn.io/fpga, but does not know the mapping of any other resources. Any other Spark custom resources are not propogated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (spark.yarn.{driver/executor}.resource.) and Spark (spark.{driver/executor}.resource.) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propogate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN. +Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the spark.yarn.executor.resource. config. Those configs are only used in the base default profile though and do not get propagated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in spark.yarn.executor.resource. plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types yarn.io/gpu) and yarn.io/fpga, but does not know the mapping of any other resources. Any other Spark custom resources are not propagated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (spark.yarn.{driver/executor}.resource.) and Spark (spark.{driver/executor}.resource.) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propagate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN. # Important notes diff --git a/docs/sparkr.md b/docs/sparkr.md index 05310f89f278d..002da5a56fa9e 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -671,7 +671,7 @@ Arrow R library is available on CRAN and it can be installed as below. ```bash Rscript -e 'install.packages("arrow", repos="https://cloud.r-project.org/")' ``` -Please refer [the official documentation of Apache Arrow](https://arrow.apache.org/docs/r/) for more detials. +Please refer [the official documentation of Apache Arrow](https://arrow.apache.org/docs/r/) for more details. Note that you must ensure that Arrow R package is installed and available on all cluster nodes. The current supported minimum version is 1.0.0; however, this might change between the minor releases since Arrow optimization in SparkR is experimental. diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md index b95be0974585e..7d60915e2a65e 100644 --- a/docs/sql-data-sources-jdbc.md +++ b/docs/sql-data-sources-jdbc.md @@ -131,7 +131,7 @@ the following case-insensitive options: fetchsize - The JDBC fetch size, which determines how many rows to fetch per round trip. This can help performance on JDBC drivers which default to low fetch size (eg. Oracle with 10 rows). This option applies only to reading. + The JDBC fetch size, which determines how many rows to fetch per round trip. This can help performance on JDBC drivers which default to low fetch size (e.g. Oracle with 10 rows). This option applies only to reading. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 7997090e710a9..2c86e7a932637 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -333,7 +333,7 @@ license: | - - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite. + - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. For example, if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite. - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive. @@ -532,11 +532,11 @@ license: | - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes - - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`). + - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, i.e. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive modulus (`pmod`). - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them. - - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible. + - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, i.e. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible. - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details. diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md b/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md index 47dd2be77ae90..ada86d8dd3913 100644 --- a/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md +++ b/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md @@ -43,7 +43,7 @@ SET TIME ZONE INTERVAL interval_literal * **interval_literal** - The [interval literal](sql-ref-literals.html#interval-literal) represents the difference between the session time zone to the 'UTC'. It must be in the range of [-18, 18] hours and max to second precision, e.g. `INTERVAL 2 HOURS 30 MINITUES` or `INTERVAL '15:40:32' HOUR TO SECOND`. + The [interval literal](sql-ref-literals.html#interval-literal) represents the difference between the session time zone to the 'UTC'. It must be in the range of [-18, 18] hours and max to second precision, e.g. `INTERVAL 2 HOURS 30 MINUTES` or `INTERVAL '15:40:32' HOUR TO SECOND`. ### Examples diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md index 3a8c8d5b1160a..11ec2f1d9ea85 100644 --- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md +++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md @@ -42,10 +42,10 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier row_format: : SERDE serde_class [ WITH SERDEPROPERTIES (k1=v1, k2=v2, ... ) ] - | DELIMITED [ FIELDS TERMINATED BY fields_termiated_char [ ESCAPED BY escaped_char ] ] - [ COLLECTION ITEMS TERMINATED BY collection_items_termiated_char ] - [ MAP KEYS TERMINATED BY map_key_termiated_char ] - [ LINES TERMINATED BY row_termiated_char ] + | DELIMITED [ FIELDS TERMINATED BY fields_terminated_char [ ESCAPED BY escaped_char ] ] + [ COLLECTION ITEMS TERMINATED BY collection_items_terminated_char ] + [ MAP KEYS TERMINATED BY map_key_terminated_char ] + [ LINES TERMINATED BY row_terminated_char ] [ NULL DEFINED AS null_char ] ``` diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md index ed5da2b2d28df..39d15808d033e 100644 --- a/docs/sql-ref-syntax-dml-insert-into.md +++ b/docs/sql-ref-syntax-dml-insert-into.md @@ -69,11 +69,11 @@ INSERT INTO students VALUES ('Amy Smith', '123 Park Ave, San Jose', 111111); SELECT * FROM students; -+---------+---------------------+----------+ -| name| address|student_id| -+---------+---------------------+----------+ -|Amy Smith|123 Park Ave,San Jose| 111111| -+---------+---------------------+----------+ ++---------+----------------------+----------+ +| name| address|student_id| ++---------+----------------------+----------+ +|Amy Smith|123 Park Ave, San Jose| 111111| ++---------+----------------------+----------+ ``` #### Multi-Row Insert Using a VALUES Clause @@ -100,29 +100,29 @@ SELECT * FROM students; ```sql -- Assuming the persons table has already been created and populated. SELECT * FROM persons; -+-------------+-------------------------+---------+ -| name| address| ssn| -+-------------+-------------------------+---------+ -|Dora Williams|134 Forest Ave, Melo Park|123456789| -+-------------+-------------------------+---------+ -| Eddie Davis| 245 Market St, Milpitas|345678901| -+-------------+-------------------------+---------+ ++-------------+--------------------------+---------+ +| name| address| ssn| ++-------------+--------------------------+---------+ +|Dora Williams|134 Forest Ave, Menlo Park|123456789| ++-------------+--------------------------+---------+ +| Eddie Davis| 245 Market St, Milpitas|345678901| ++-------------+--------------------------+---------+ INSERT INTO students PARTITION (student_id = 444444) SELECT name, address FROM persons WHERE name = "Dora Williams"; SELECT * FROM students; -+-------------+-------------------------+----------+ -| name| address|student_id| -+-------------+-------------------------+----------+ -| Amy Smith| 123 Park Ave, San Jose| 111111| -+-------------+-------------------------+----------+ -| Bob Brown| 456 Taylor St, Cupertino| 222222| -+-------------+-------------------------+----------+ -|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| -+-------------+-------------------------+----------+ -|Dora Williams|134 Forest Ave, Melo Park| 444444| -+-------------+-------------------------+----------+ ++-------------+--------------------------+----------+ +| name| address|student_id| ++-------------+--------------------------+----------+ +| Amy Smith| 123 Park Ave, San Jose| 111111| ++-------------+--------------------------+----------+ +| Bob Brown| 456 Taylor St, Cupertino| 222222| ++-------------+--------------------------+----------+ +|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| ++-------------+--------------------------+----------+ +|Dora Williams|134 Forest Ave, Menlo Park| 444444| ++-------------+--------------------------+----------+ ``` #### Insert Using a TABLE Statement @@ -141,21 +141,21 @@ SELECT * FROM visiting_students; INSERT INTO students TABLE visiting_students; SELECT * FROM students; -+-------------+-------------------------+----------+ -| name| address|student_id| -+-------------+-------------------------+----------+ -| Amy Smith| 123 Park Ave,San Jose| 111111| -+-------------+-------------------------+----------+ -| Bob Brown| 456 Taylor St, Cupertino| 222222| -+-------------+-------------------------+----------+ -|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| -+-------------+-------------------------+----------+ -|Dora Williams|134 Forest Ave, Melo Park| 444444| -+-------------+-------------------------+----------+ -|Fleur Laurent| 345 Copper St, London| 777777| -+-------------+-------------------------+----------+ -|Gordon Martin| 779 Lake Ave, Oxford| 888888| -+-------------+-------------------------+----------+ ++-------------+--------------------------+----------+ +| name| address|student_id| ++-------------+--------------------------+----------+ +| Amy Smith| 123 Park Ave, San Jose| 111111| ++-------------+--------------------------+----------+ +| Bob Brown| 456 Taylor St, Cupertino| 222222| ++-------------+--------------------------+----------+ +|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| ++-------------+--------------------------+----------+ +|Dora Williams|134 Forest Ave, Menlo Park| 444444| ++-------------+--------------------------+----------+ +|Fleur Laurent| 345 Copper St, London| 777777| ++-------------+--------------------------+----------+ +|Gordon Martin| 779 Lake Ave, Oxford| 888888| ++-------------+--------------------------+----------+ ``` #### Insert Using a FROM Statement @@ -177,25 +177,25 @@ INSERT INTO students FROM applicants SELECT name, address, id applicants WHERE qualified = true; SELECT * FROM students; -+-------------+-------------------------+----------+ -| name| address|student_id| -+-------------+-------------------------+----------+ -| Amy Smith| 123 Park Ave, San Jose| 111111| -+-------------+-------------------------+----------+ -| Bob Brown| 456 Taylor St, Cupertino| 222222| -+-------------+-------------------------+----------+ -|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| -+-------------+-------------------------+----------+ -|Dora Williams|134 Forest Ave, Melo Park| 444444| -+-------------+-------------------------+----------+ -|Fleur Laurent| 345 Copper St, London| 777777| -+-------------+-------------------------+----------+ -|Gordon Martin| 779 Lake Ave, Oxford| 888888| -+-------------+-------------------------+----------+ -| Helen Davis|469 Mission St, San Diego| 999999| -+-------------+-------------------------+----------+ -| Jason Wang| 908 Bird St, Saratoga| 121212| -+-------------+-------------------------+----------+ ++-------------+--------------------------+----------+ +| name| address|student_id| ++-------------+--------------------------+----------+ +| Amy Smith| 123 Park Ave, San Jose| 111111| ++-------------+--------------------------+----------+ +| Bob Brown| 456 Taylor St, Cupertino| 222222| ++-------------+--------------------------+----------+ +|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| ++-------------+--------------------------+----------+ +|Dora Williams|134 Forest Ave, Menlo Park| 444444| ++-------------+--------------------------+----------+ +|Fleur Laurent| 345 Copper St, London| 777777| ++-------------+--------------------------+----------+ +|Gordon Martin| 779 Lake Ave, Oxford| 888888| ++-------------+--------------------------+----------+ +| Helen Davis| 469 Mission St, San Diego| 999999| ++-------------+--------------------------+----------+ +| Jason Wang| 908 Bird St, Saratoga| 121212| ++-------------+--------------------------+----------+ ``` ### Related Statements diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md index ecfd060dfd5ee..638dcb34bb1d2 100644 --- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md +++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md @@ -64,18 +64,18 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] ```sql -- Assuming the students table has already been created and populated. SELECT * FROM students; -+-------------+-------------------------+----------+ -| name| address|student_id| -+-------------+-------------------------+----------+ -| Amy Smith| 123 Park Ave, San Jose| 111111| -| Bob Brown| 456 Taylor St, Cupertino| 222222| -|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| -|Dora Williams|134 Forest Ave, Melo Park| 444444| -|Fleur Laurent| 345 Copper St, London| 777777| -|Gordon Martin| 779 Lake Ave, Oxford| 888888| -| Helen Davis|469 Mission St, San Diego| 999999| -| Jason Wang| 908 Bird St, Saratoga| 121212| -+-------------+-------------------------+----------+ ++-------------+--------------------------+----------+ +| name| address|student_id| ++-------------+--------------------------+----------+ +| Amy Smith| 123 Park Ave, San Jose| 111111| +| Bob Brown| 456 Taylor St, Cupertino| 222222| +|Cathy Johnson| 789 Race Ave, Palo Alto| 333333| +|Dora Williams|134 Forest Ave, Menlo Park| 444444| +|Fleur Laurent| 345 Copper St, London| 777777| +|Gordon Martin| 779 Lake Ave, Oxford| 888888| +| Helen Davis| 469 Mission St, San Diego| 999999| +| Jason Wang| 908 Bird St, Saratoga| 121212| ++-------------+--------------------------+----------+ INSERT OVERWRITE students VALUES ('Ashua Hill', '456 Erica Ct, Cupertino', 111111), @@ -95,25 +95,25 @@ SELECT * FROM students; ```sql -- Assuming the persons table has already been created and populated. SELECT * FROM persons; -+-------------+-------------------------+---------+ -| name| address| ssn| -+-------------+-------------------------+---------+ -|Dora Williams|134 Forest Ave, Melo Park|123456789| -+-------------+-------------------------+---------+ -| Eddie Davis| 245 Market St,Milpitas|345678901| -+-------------+-------------------------+---------+ ++-------------+--------------------------+---------+ +| name| address| ssn| ++-------------+--------------------------+---------+ +|Dora Williams|134 Forest Ave, Menlo Park|123456789| ++-------------+--------------------------+---------+ +| Eddie Davis| 245 Market St, Milpitas|345678901| ++-------------+--------------------------+---------+ INSERT OVERWRITE students PARTITION (student_id = 222222) SELECT name, address FROM persons WHERE name = "Dora Williams"; SELECT * FROM students; -+-------------+-------------------------+----------+ -| name| address|student_id| -+-------------+-------------------------+----------+ -| Ashua Hill| 456 Erica Ct, Cupertino| 111111| -+-------------+-------------------------+----------+ -|Dora Williams|134 Forest Ave, Melo Park| 222222| -+-------------+-------------------------+----------+ ++-------------+--------------------------+----------+ +| name| address|student_id| ++-------------+--------------------------+----------+ +| Ashua Hill| 456 Erica Ct, Cupertino| 111111| ++-------------+--------------------------+----------+ +|Dora Williams|134 Forest Ave, Menlo Park| 222222| ++-------------+--------------------------+----------+ ``` #### Insert Using a TABLE Statement diff --git a/docs/sql-ref-syntax-qry-select-groupby.md b/docs/sql-ref-syntax-qry-select-groupby.md index 934e5f70d4b08..ef9de1f594a31 100644 --- a/docs/sql-ref-syntax-qry-select-groupby.md +++ b/docs/sql-ref-syntax-qry-select-groupby.md @@ -269,7 +269,7 @@ INSERT INTO person VALUES (300, 'Mike', 80), (400, 'Dan', 50); ---Select the first row in cloumn age +--Select the first row in column age SELECT FIRST(age) FROM person; +--------------------+ | first(age, false) | @@ -277,7 +277,7 @@ SELECT FIRST(age) FROM person; | NULL | +--------------------+ ---Get the first row in cloumn `age` ignore nulls,last row in column `id` and sum of cloumn `id`. +--Get the first row in column `age` ignore nulls,last row in column `id` and sum of column `id`. SELECT FIRST(age IGNORE NULLS), LAST(id), SUM(id) FROM person; +-------------------+------------------+----------+ | first(age, true) | last(id, false) | sum(id) | diff --git a/docs/sql-ref-syntax-qry-select-lateral-view.md b/docs/sql-ref-syntax-qry-select-lateral-view.md index f742c8fa57043..c854625a1a959 100644 --- a/docs/sql-ref-syntax-qry-select-lateral-view.md +++ b/docs/sql-ref-syntax-qry-select-lateral-view.md @@ -58,7 +58,7 @@ INSERT INTO person VALUES (400, 'Dan', 50, 4, 'Street 4'); SELECT * FROM person - LATERAL VIEW EXPLODE(ARRAY(30, 60)) tabelName AS c_age + LATERAL VIEW EXPLODE(ARRAY(30, 60)) tableName AS c_age LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age; +------+-------+-------+--------+-----------+--------+--------+ | id | name | age | class | address | c_age | d_age | @@ -93,14 +93,14 @@ GROUP BY c_age; +--------+-----------+ SELECT * FROM person - LATERAL VIEW EXPLODE(ARRAY()) tabelName AS c_age; + LATERAL VIEW EXPLODE(ARRAY()) tableName AS c_age; +-----+-------+------+--------+----------+--------+ | id | name | age | class | address | c_age | +-----+-------+------+--------+----------+--------+ +-----+-------+------+--------+----------+--------+ SELECT * FROM person - LATERAL VIEW OUTER EXPLODE(ARRAY()) tabelName AS c_age; + LATERAL VIEW OUTER EXPLODE(ARRAY()) tableName AS c_age; +------+-------+-------+--------+-----------+--------+ | id | name | age | class | address | c_age | +------+-------+-------+--------+-----------+--------+ diff --git a/docs/sql-ref-syntax-qry-select-orderby.md b/docs/sql-ref-syntax-qry-select-orderby.md index 13f0ae40cb828..552ee9be66d1e 100644 --- a/docs/sql-ref-syntax-qry-select-orderby.md +++ b/docs/sql-ref-syntax-qry-select-orderby.md @@ -28,7 +28,7 @@ clause, this clause guarantees a total order in the output. ### Syntax ```sql -ORDER BY { expression [ sort_direction | nulls_sort_oder ] [ , ... ] } +ORDER BY { expression [ sort_direction | nulls_sort_order ] [ , ... ] } ``` ### Parameters diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala index a785d063f1476..3dea244c77226 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala @@ -127,7 +127,7 @@ private[evaluation] abstract class Silhouette { * `$a_{i}$` can be interpreted as how well `i` is assigned to its cluster * (the smaller the value, the better the assignment), while `$b_{i}$` is * a measure of how well `i` has not been assigned to its "neighboring cluster", - * ie. the nearest cluster to `i`. + * i.e. the nearest cluster to `i`. * * Unfortunately, the naive implementation of the algorithm requires to compute * the distance of each couple of points in the dataset. Since the computation of @@ -486,7 +486,7 @@ private[evaluation] object CosineSilhouette extends Silhouette { * for the point. * @param weightCol The name of the column which contains the instance weight. * @return A [[scala.collection.immutable.Map]] which associates each cluster id to a - * its statistics (ie. the precomputed values `N` and `$\Omega_{\Gamma}$`). + * its statistics (i.e. the precomputed values `N` and `$\Omega_{\Gamma}$`). */ def computeClusterStats( df: DataFrame, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 5ed7619fce5dc..2ec7a8632e39d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -112,7 +112,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) (Seq($(inputCol)), Seq($(outputCol)), Seq($(threshold))) } - val ouputCols = inputColNames.zip(tds).map { case (inputColName, td) => + val mappedOutputCols = inputColNames.zip(tds).map { case (inputColName, td) => val binarizerUDF = dataset.schema(inputColName).dataType match { case DoubleType => udf { in: Double => if (in > td) 1.0 else 0.0 } @@ -147,8 +147,8 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) binarizerUDF(col(inputColName)) } - val ouputMetadata = outputColNames.map(outputSchema(_).metadata) - dataset.withColumns(outputColNames, ouputCols, ouputMetadata) + val outputMetadata = outputColNames.map(outputSchema(_).metadata) + dataset.withColumns(outputColNames, mappedOutputCols, outputMetadata) } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala index 46052a89fdf1a..41de26dff03ab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala @@ -77,7 +77,7 @@ private[feature] trait SelectorParams extends Params * @group param */ @Since("3.1.0") - final val fpr = new DoubleParam(this, "fpr", "The higest p-value for features to be kept.", + final val fpr = new DoubleParam(this, "fpr", "The highest p-value for features to be kept.", ParamValidators.inRange(0, 1)) /** @group getParam */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index b6ed4f2b000cc..8bcd7909b6078 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -166,11 +166,11 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String } val (inputColNames, outputColNames) = getInOutCols() - val ouputCols = inputColNames.map { inputColName => + val outputCols = inputColNames.map { inputColName => t(col(inputColName)) } - val ouputMetadata = outputColNames.map(outputSchema(_).metadata) - dataset.withColumns(outputColNames, ouputCols, ouputMetadata) + val outputMetadata = outputColNames.map(outputSchema(_).metadata) + dataset.withColumns(outputColNames, outputCols, outputMetadata) } @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala index 5efcf0dce68a2..37b715930a501 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala @@ -133,7 +133,7 @@ object ImageSchema { val img = try { ImageIO.read(new ByteArrayInputStream(bytes)) } catch { - // Catch runtime exception because `ImageIO` may throw unexcepted `RuntimeException`. + // Catch runtime exception because `ImageIO` may throw unexpected `RuntimeException`. // But do not catch the declared `IOException` (regarded as FileSystem failure) case _: RuntimeException => null } diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala index 1b5f77a9ae897..594d9f315f508 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala @@ -88,9 +88,9 @@ private[r] object AFTSurvivalRegressionWrapper extends MLReadable[AFTSurvivalReg aggregationDepth: Int, stringIndexerOrderType: String): AFTSurvivalRegressionWrapper = { - val (rewritedFormula, censorCol) = formulaRewrite(formula) + val (rewrittenFormula, censorCol) = formulaRewrite(formula) - val rFormula = new RFormula().setFormula(rewritedFormula) + val rFormula = new RFormula().setFormula(rewrittenFormula) .setStringIndexerOrderType(stringIndexerOrderType) RWrapperUtils.checkDataColumns(rFormula, data) val rFormulaModel = rFormula.fit(data) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala index 84c0985245a2e..f70baa4ddd393 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala @@ -555,7 +555,7 @@ object FMRegressionModel extends MLReadable[FMRegressionModel] { * \hat{y} = p\left( y_{fm} \right) * }}} * p is the prediction function, for binary classification task is sigmoid. - * The loss funcation gradient formula: + * The loss function gradient formula: * {{{ * \frac{\partial}{\partial\theta} l\left( \hat{y},y \right) = * \frac{\partial}{\partial\theta} l\left( p\left( y_{fm} \right),y \right) = diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 087c2c2639831..90cc4fb13b995 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -146,7 +146,7 @@ class SVMWithSGD private ( /** * Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100, - * regParm: 0.01, miniBatchFraction: 1.0}. + * regParam: 0.01, miniBatchFraction: 1.0}. */ @Since("0.8.0") def this() = this(1.0, 100, 0.01, 1.0) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala index bffed61c291ea..9ac473aabecea 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala @@ -41,7 +41,7 @@ private[spark] abstract class DistanceMeasure extends Serializable { * 1, if i != j: a bound r = matrix(i,j) to help avoiding unnecessary distance * computation. Given point x, let i be current closest center, and d be current best * distance, if d < f(r), then we no longer need to compute the distance to center j; - * 2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If distance + * 2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If distance * between point x and center i is less than f(r), then center i is the closest center * to point x. */ @@ -268,7 +268,7 @@ private[spark] class EuclideanDistanceMeasure extends DistanceMeasure { * squared distance, if d < r, then we no longer need to compute the distance to center * j. matrix(i,j) equals to squared of half of Euclidean distance between centers i * and j; - * 2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If squared + * 2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If squared * distance between point x and center i is less than r, then center i is the closest * center to point x. */ @@ -405,7 +405,7 @@ private[spark] class CosineDistanceMeasure extends DistanceMeasure { * is used instead of Cosine distance to compute matrix(i,j): for centers i and j, * compute the radian/angle between them, halving it, and converting it back to Cosine * distance at the end; - * 2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If Cosine + * 2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If Cosine * distance between point x and center i is less than r, then center i is the closest * center to point x. */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index b2742ee6ecb5b..c9f6d789d6740 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -466,7 +466,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer with Logging { val seed = randomGenerator.nextLong() // If and only if optimizeDocConcentration is set true, // we calculate logphat in the same pass as other statistics. - // No calculation of loghat happens otherwise. + // No calculation of logphat happens otherwise. val logphatPartOptionBase = () => if (optimizeDocConcentration) { Some(BDV.zeros[Double](k)) } else { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index 3c9b806d616fc..111030dada491 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom * doing a single iteration of the standard k-means algorithm. * * The update algorithm uses the "mini-batch" KMeans rule, - * generalized to incorporate forgetfullness (i.e. decay). + * generalized to incorporate forgetfulness (i.e. decay). * The update rule (for each cluster) is: * *
      diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index c165d4810c934..f7c6d09f5e437 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -48,11 +48,11 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { val mat = if (numFeatures > 65535) { val summary = Statistics.colStats(sources.map((_, 1.0)), Seq("mean")) val mean = Vectors.fromML(summary.mean) - val meanCentredRdd = sources.map { row => + val meanCenteredRdd = sources.map { row => BLAS.axpy(-1, mean, row) row } - new RowMatrix(meanCentredRdd) + new RowMatrix(meanCenteredRdd) } else { require(PCAUtil.memoryCost(k, numFeatures) < Int.MaxValue, "The param k and numFeatures is too large for SVD computation. " + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 13899fa8296f6..eeb583f84ca8b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -560,7 +560,7 @@ class Word2VecModel private[spark] ( /** * Find synonyms of the vector representation of a word, possibly - * including any words in the model vocabulary whose vector respresentation + * including any words in the model vocabulary whose vector representation * is the supplied vector. * @param vector vector representation of a word * @param num number of synonyms to find diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala index 601c7da30ffed..606e2f2f212ca 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala @@ -88,8 +88,8 @@ class AssociationRules private[fpm] ( // Join to get (X, ((Y, freq(X union Y)), freq(X))), generate rules, and filter by confidence candidates.join(freqItemsets.map(x => (x.items.toSeq, x.freq))) - .map { case (antecendent, ((consequent, freqUnion), freqAntecedent)) => - new Rule(antecendent.toArray, + .map { case (antecedent, ((consequent, freqUnion), freqAntecedent)) => + new Rule(antecedent.toArray, consequent.toArray, freqUnion, freqAntecedent, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index c618b71ddc5a8..d546f0c1a8e19 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -693,11 +693,11 @@ class RowMatrix @Since("1.0.0") ( val pBV = sc.broadcast(colMagsCorrected.map(c => sg / c)) val qBV = sc.broadcast(colMagsCorrected.map(c => math.min(sg, c))) - val sims = rows.mapPartitionsWithIndex { (indx, iter) => + val sims = rows.mapPartitionsWithIndex { (index, iter) => val p = pBV.value val q = qBV.value - val rand = new XORShiftRandom(indx) + val rand = new XORShiftRandom(index) val scaled = new Array[Double](p.size) iter.flatMap { row => row match { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala index d17f7047c5b2b..778de30e756c0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala @@ -91,7 +91,7 @@ private[stat] object KolmogorovSmirnovTest extends Logging { * @param partData `Iterator[Double]` 1 partition of a sorted RDD * @param n `Double` the total size of the RDD * @param cdf `Double => Double` a function the calculates the theoretical CDF of a value - * @return `Iterator[(Double, Double)] `Unadjusted (ie. off by a constant) potential extrema + * @return `Iterator[(Double, Double)] `Unadjusted (i.e. off by a constant) potential extrema * in a partition. The first element corresponds to the (empirical CDF - 1/N) - CDF, * the second element corresponds to empirical CDF - CDF. We can then search the resulting * iterator for the minimum of the first and the maximum of the second element, and provide diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java index 6480b57e1f796..af32e03854b53 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java @@ -41,7 +41,7 @@ public void javaCompatibilityTest() { .setOutputCol("filtered"); List data = Arrays.asList( - RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), + RowFactory.create(Arrays.asList("I", "saw", "the", "red", "balloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala index b35f964c959bf..0eae23df8358d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala @@ -181,7 +181,7 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest { } } - test("multivariate data and check againt R mvnormalmixEM") { + test("multivariate data and check against R mvnormalmixEM") { /* Using the following R code to generate data and train the model using mixtools package. library(mvtnorm) diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala index 5ee161ce8dd33..deaad2bd54d0e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala @@ -36,7 +36,7 @@ class RegressionEvaluatorSuite test("Regression Evaluator: default params") { /** * Here is the instruction describing how to export the test data into CSV format - * so we can validate the metrics compared with R's mmetric package. + * so we can validate the metrics compared with R's mmetric function. * * import org.apache.spark.mllib.util.LinearDataGenerator * val data = sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala index 1e1ab206cc1c2..0d664e421da4c 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala @@ -133,35 +133,35 @@ class ANOVASelectorSuite extends MLTest with DefaultReadWriteTest { ParamsSuite.checkParams(new ANOVASelector()) } - test("Test ANOVAFValue calssification selector: numTopFeatures") { + test("Test ANOVAFValue classification selector: numTopFeatures") { val selector = new ANOVASelector() .setOutputCol("filtered").setSelectorType("numTopFeatures").setNumTopFeatures(1) val model = testSelector(selector, dataset) MLTestingUtils.checkCopyAndUids(selector, model) } - test("Test ANOVAFValue calssification selector: percentile") { + test("Test ANOVAFValue classification selector: percentile") { val selector = new ANOVASelector() .setOutputCol("filtered").setSelectorType("percentile").setPercentile(0.17) val model = testSelector(selector, dataset) MLTestingUtils.checkCopyAndUids(selector, model) } - test("Test ANOVAFValue calssification selector: fpr") { + test("Test ANOVAFValue classification selector: fpr") { val selector = new ANOVASelector() .setOutputCol("filtered").setSelectorType("fpr").setFpr(1.0E-12) val model = testSelector(selector, dataset) MLTestingUtils.checkCopyAndUids(selector, model) } - test("Test ANOVAFValue calssification selector: fdr") { + test("Test ANOVAFValue classification selector: fdr") { val selector = new ANOVASelector() .setOutputCol("filtered").setSelectorType("fdr").setFdr(6.0E-12) val model = testSelector(selector, dataset) MLTestingUtils.checkCopyAndUids(selector, model) } - test("Test ANOVAFValue calssification selector: fwe") { + test("Test ANOVAFValue classification selector: fwe") { val selector = new ANOVASelector() .setOutputCol("filtered").setSelectorType("fwe").setFwe(6.0E-12) val model = testSelector(selector, dataset) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala index 19645b517d79c..8f8365a59082b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala @@ -81,7 +81,7 @@ class DCTSuite extends MLTest with DefaultReadWriteTest { .map { case Row(vec: Vector) => vec.size } .head() - // Can not infer size of ouput vector, since no metadata is provided + // Can not infer size of output vector, since no metadata is provided intercept[TestFailedException] { val transformed = transformer.transform(dataset) checkVectorSizeOnDF(transformed, "resultVec", vectorSize) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala index 93564681994d7..55dade28920ed 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.DataTypes private[ml] object LSHTest { /** - * For any locality sensitive function h in a metric space, we meed to verify whether + * For any locality sensitive function h in a metric space, we need to verify whether * the following property is satisfied. * * There exist dist1, dist2, p1, p2, so that for any two elements e1 and e2, diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala index cc451c0b60379..142abf2ccdfb9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala @@ -53,7 +53,7 @@ class VarianceThresholdSelectorSuite extends MLTest with DefaultReadWriteTest { ParamsSuite.checkParams(new VarianceThresholdSelector) } - test("Test VarianceThresholdSelector: varainceThreshold not set") { + test("Test VarianceThresholdSelector: varianceThreshold not set") { val selector = new VarianceThresholdSelector().setOutputCol("filtered") val model = testSelector(selector, dataset) MLTestingUtils.checkCopyAndUids(selector, model) diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index a0e17a4b40fd2..bfa9f4b59511c 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -494,7 +494,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest [1] -0.0457441 -0.6833928 [1] 1.8121235 -0.1747493 -0.5815417 - R code for deivance calculation: + R code for deviance calculation: data = cbind(y=c(0,1,0,0,0,1), x1=c(18, 12, 15, 13, 15, 16), x2=c(1,0,0,2,1,1)) summary(glm(y~x1+x2, family=poisson, data=data.frame(data)))$deviance [1] 3.70055 @@ -1661,7 +1661,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest } test("evaluate with labels that are not doubles") { - // Evaulate with a dataset that contains Labels not as doubles to verify correct casting + // Evaluate with a dataset that contains Labels not as doubles to verify correct casting val dataset = Seq( Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse), Instance(19.0, 1.0, Vectors.dense(1.0, 7.0)), diff --git a/pom.xml b/pom.xml index cd7e1767d6b18..f0ad9b0167c32 100644 --- a/pom.xml +++ b/pom.xml @@ -229,7 +229,7 @@ declared in the projects that build assemblies. For other projects the scope should remain as "compile", otherwise they are not available - during compilation if the dependency is transivite (e.g. "graphx/" depending on "core/" and + during compilation if the dependency is transitive (e.g. "graphx/" depending on "core/" and needing Hadoop classes in the classpath to compile). --> compile @@ -1758,7 +1758,7 @@ ${hive.deps.scope} - + ${hive.group} hive-metastore diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index f696e93e9cef2..386de19e919e6 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -113,10 +113,9 @@ class ExecutorClassLoaderSuite val classLoader = new ExecutorClassLoader( new SparkConf(), null, url1, parentLoader, true) - // load 'scala.Option', using ClassforName to do the exact same behavior as - // what JavaDeserializationStream does - // scalastyle:off classforname + // load 'scala.Option', using Class.forName to do the exact same behavior as + // what JavaDeserializationStream does val optionClass = Class.forName("scala.Option", false, classLoader) // scalastyle:on classforname From 2da72593c1cf63fc6f815416b8d553f0a53f3e65 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 30 Nov 2020 05:23:23 +0000 Subject: [PATCH 0603/1009] [SPARK-32976][SQL] Support column list in INSERT statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? #### JIRA expectations ``` INSERT currently does not support named column lists. INSERT INTO (col1, col2,…) VALUES( 'val1', 'val2', … ) Note, we assume the column list contains all the column names. Issue an exception if the list is not complete. The column order could be different from the column order defined in the table definition. ``` #### implemetations In this PR, we add a column list as an optional part to the `INSERT OVERWRITE/INTO` statements: ``` /** * {{{ * INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList] ... * INSERT INTO [TABLE] tableIdentifier [partitionSpec] [identifierList] ... * }}} */ ``` The column list represents all expected columns with an explicit order that you want to insert to the target table. **Particularly**, we assume the column list contains all the column names in the current implementation, it will fail when the list is incomplete. In **Analyzer**, we add a code path to resolve the column list in the `ResolveOutputRelation` rule before it is transformed to v1 or v2 command. It will fail here if the list has any field that not belongs to the target table. Then, for v2 command, e.g. `AppendData`, we use the resolved column list and output of the target table to resolve the output of the source query `ResolveOutputRelation` rule. If the list has duplicated columns, we fail. If the list is not empty but the list size does not match the target table, we fail. If no other exceptions occur, we use the column list to map the output of the source query to the output of the target table. The column list will be set to Nil and it will not hit the rule again after it is resolved. for v1 command, those all happen in the `PreprocessTableInsertion` rule ### Why are the changes needed? new feature support ### Does this PR introduce _any_ user-facing change? yes, insert into/overwrite table support specify column list ### How was this patch tested? new tests Closes #29893 from yaooqinn/SPARK-32976. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 4 +- .../sql/catalyst/analysis/Analyzer.scala | 52 ++++- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../spark/sql/catalyst/dsl/package.scala | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 20 +- .../catalyst/plans/logical/statements.scala | 2 + .../sql/catalyst/parser/DDLParserSuite.scala | 66 ++++++ .../sql/catalyst/parser/PlanParserSuite.scala | 4 +- .../apache/spark/sql/DataFrameWriter.scala | 1 + .../datasources/DataSourceStrategy.scala | 10 +- .../datasources/FallBackFileSourceV2.scala | 4 +- .../sql/execution/datasources/rules.scala | 6 +- .../apache/spark/sql/SQLInsertTestSuite.scala | 221 ++++++++++++++++++ .../command/PlanResolutionSuite.scala | 2 +- .../spark/sql/hive/HiveStrategies.scala | 9 +- .../sql/hive/HiveSQLInsertTestSuite.scala | 25 ++ 16 files changed, 396 insertions(+), 34 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 5d17028c32ae2..a23994f456f75 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -332,8 +332,8 @@ query ; insertInto - : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? #insertOverwriteTable - | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? #insertIntoTable + : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable + | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index dae496244c858..9b599b4c8f8d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -49,7 +49,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy} import org.apache.spark.sql.types._ -import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils} import org.apache.spark.util.Utils /** @@ -218,6 +218,7 @@ class Analyzer(override val catalogManager: CatalogManager) ResolveTableValuedFunctions :: ResolveNamespace(catalogManager) :: new ResolveCatalogs(catalogManager) :: + ResolveUserSpecifiedColumns :: ResolveInsertInto :: ResolveRelations :: ResolveTables :: @@ -846,7 +847,7 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case u @ UnresolvedRelation(ident, _, isStreaming) => lookupTempView(ident, isStreaming).getOrElse(u) - case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _) => + case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _, _) => lookupTempView(ident) .map(view => i.copy(table = view)) .getOrElse(i) @@ -961,7 +962,7 @@ class Analyzer(override val catalogManager: CatalogManager) .map(ResolvedTable(catalog.asTableCatalog, ident, _)) .getOrElse(u) - case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _) + case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _, _) if i.query.resolved => lookupV2Relation(u.multipartIdentifier, u.options, false) .map(v2Relation => i.copy(table = v2Relation)) @@ -1045,7 +1046,7 @@ class Analyzer(override val catalogManager: CatalogManager) } def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp { - case i @ InsertIntoStatement(table, _, _, _, _) if i.query.resolved => + case i @ InsertIntoStatement(table, _, _, _, _, _) if i.query.resolved => val relation = table match { case u @ UnresolvedRelation(_, _, false) => lookupRelation(u.multipartIdentifier, u.options, false).getOrElse(u) @@ -1160,7 +1161,8 @@ class Analyzer(override val catalogManager: CatalogManager) object ResolveInsertInto extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved => + case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _) + if i.query.resolved && i.userSpecifiedCols.isEmpty => // ifPartitionNotExists is append with validation, but validation is not supported if (i.ifPartitionNotExists) { throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name) @@ -3107,6 +3109,46 @@ class Analyzer(override val catalogManager: CatalogManager) } } + object ResolveUserSpecifiedColumns extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { + case i: InsertIntoStatement if i.table.resolved && i.query.resolved && + i.userSpecifiedCols.nonEmpty => + val resolved = resolveUserSpecifiedColumns(i) + val projection = addColumnListOnQuery(i.table.output, resolved, i.query) + i.copy(userSpecifiedCols = Nil, query = projection) + } + + private def resolveUserSpecifiedColumns(i: InsertIntoStatement): Seq[NamedExpression] = { + SchemaUtils.checkColumnNameDuplication( + i.userSpecifiedCols, "in the column list", resolver) + + i.userSpecifiedCols.map { col => + i.table.resolve(Seq(col), resolver) + .getOrElse(i.table.failAnalysis(s"Cannot resolve column name $col")) + } + } + + private def addColumnListOnQuery( + tableOutput: Seq[Attribute], + cols: Seq[NamedExpression], + query: LogicalPlan): LogicalPlan = { + if (cols.size != query.output.size) { + query.failAnalysis( + s"Cannot write to table due to mismatched user specified column size(${cols.size}) and" + + s" data column size(${query.output.size})") + } + val nameToQueryExpr = cols.zip(query.output).toMap + // Static partition columns in the table output should not appear in the column list + // they will be handled in another rule ResolveInsertInto + val reordered = tableOutput.flatMap { nameToQueryExpr.get(_).orElse(None) } + if (reordered == query.output) { + query + } else { + Project(reordered, query) + } + } + } + private def validateStoreAssignmentPolicy(): Unit = { // SPARK-28730: LEGACY store assignment policy is disallowed in data source v2. if (conf.storeAssignmentPolicy == StoreAssignmentPolicy.LEGACY) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9a3ab4a5f8d11..7f89c130749f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -108,7 +108,7 @@ trait CheckAnalysis extends PredicateHelper { case u: UnresolvedRelation => u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") - case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) => + case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) => failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") // TODO (SPARK-27484): handle streaming write commands when we have them. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 4cd649b07a5c0..89cf97e76d798 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -431,7 +431,7 @@ package object dsl { partition: Map[String, Option[String]] = Map.empty, overwrite: Boolean = false, ifPartitionNotExists: Boolean = false): LogicalPlan = - InsertIntoStatement(table, partition, logicalPlan, overwrite, ifPartitionNotExists) + InsertIntoStatement(table, partition, Nil, logicalPlan, overwrite, ifPartitionNotExists) def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index afef88f7e97e8..e85a3eba85377 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -243,9 +243,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg /** * Parameters used for writing query to a table: - * (multipartIdentifier, partitionKeys, ifPartitionNotExists). + * (multipartIdentifier, tableColumnList, partitionKeys, ifPartitionNotExists). */ - type InsertTableParams = (Seq[String], Map[String, Option[String]], Boolean) + type InsertTableParams = (Seq[String], Seq[String], Map[String, Option[String]], Boolean) /** * Parameters used for writing query to a directory: (isLocal, CatalogStorageFormat, provider). @@ -255,8 +255,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg /** * Add an * {{{ - * INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? - * INSERT INTO [TABLE] tableIdentifier [partitionSpec] + * INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList] + * INSERT INTO [TABLE] tableIdentifier [partitionSpec] [identifierList] * INSERT OVERWRITE [LOCAL] DIRECTORY STRING [rowFormat] [createFileFormat] * INSERT OVERWRITE [LOCAL] DIRECTORY [STRING] tableProvider [OPTIONS tablePropertyList] * }}} @@ -267,18 +267,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg query: LogicalPlan): LogicalPlan = withOrigin(ctx) { ctx match { case table: InsertIntoTableContext => - val (tableIdent, partition, ifPartitionNotExists) = visitInsertIntoTable(table) + val (tableIdent, cols, partition, ifPartitionNotExists) = visitInsertIntoTable(table) InsertIntoStatement( UnresolvedRelation(tableIdent), partition, + cols, query, overwrite = false, ifPartitionNotExists) case table: InsertOverwriteTableContext => - val (tableIdent, partition, ifPartitionNotExists) = visitInsertOverwriteTable(table) + val (tableIdent, cols, partition, ifPartitionNotExists) = visitInsertOverwriteTable(table) InsertIntoStatement( UnresolvedRelation(tableIdent), partition, + cols, query, overwrite = true, ifPartitionNotExists) @@ -299,13 +301,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg override def visitInsertIntoTable( ctx: InsertIntoTableContext): InsertTableParams = withOrigin(ctx) { val tableIdent = visitMultipartIdentifier(ctx.multipartIdentifier) + val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil) val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty) if (ctx.EXISTS != null) { operationNotAllowed("INSERT INTO ... IF NOT EXISTS", ctx) } - (tableIdent, partitionKeys, false) + (tableIdent, cols, partitionKeys, false) } /** @@ -315,6 +318,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx: InsertOverwriteTableContext): InsertTableParams = withOrigin(ctx) { assert(ctx.OVERWRITE() != null) val tableIdent = visitMultipartIdentifier(ctx.multipartIdentifier) + val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil) val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty) val dynamicPartitionKeys: Map[String, Option[String]] = partitionKeys.filter(_._2.isEmpty) @@ -323,7 +327,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg dynamicPartitionKeys.keys.mkString(", "), ctx) } - (tableIdent, partitionKeys, ctx.EXISTS() != null) + (tableIdent, cols, partitionKeys, ctx.EXISTS() != null) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 281d57b3648f4..d5f739466a802 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -357,6 +357,7 @@ case class DropViewStatement( * An INSERT INTO statement, as parsed from SQL. * * @param table the logical plan representing the table. + * @param userSpecifiedCols the user specified list of columns that belong to the table. * @param query the logical plan representing data to write to. * @param overwrite overwrite existing table or partitions. * @param partitionSpec a map from the partition key to the partition value (optional). @@ -371,6 +372,7 @@ case class DropViewStatement( case class InsertIntoStatement( table: LogicalPlan, partitionSpec: Map[String, Option[String]], + userSpecifiedCols: Seq[String], query: LogicalPlan, overwrite: Boolean, ifPartitionNotExists: Boolean) extends ParsedStatement { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index c58ff81f17131..91b35bcac98ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1172,6 +1172,22 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map.empty, + Nil, + Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), + overwrite = false, ifPartitionNotExists = false)) + } + } + + test("insert table: basic append with a column list") { + Seq( + "INSERT INTO TABLE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source", + "INSERT INTO testcat.ns1.ns2.tbl (a, b) SELECT * FROM source" + ).foreach { sql => + parseCompare(sql, + InsertIntoStatement( + UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), + Map.empty, + Seq("a", "b"), Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), overwrite = false, ifPartitionNotExists = false)) } @@ -1182,6 +1198,7 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map.empty, + Nil, Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testcat2", "db", "tbl"))), overwrite = false, ifPartitionNotExists = false)) } @@ -1196,6 +1213,22 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map("p1" -> Some("3"), "p2" -> None), + Nil, + Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), + overwrite = false, ifPartitionNotExists = false)) + } + + test("insert table: append with partition and a column list") { + parseCompare( + """ + |INSERT INTO testcat.ns1.ns2.tbl + |PARTITION (p1 = 3, p2) (a, b) + |SELECT * FROM source + """.stripMargin, + InsertIntoStatement( + UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), + Map("p1" -> Some("3"), "p2" -> None), + Seq("a", "b"), Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), overwrite = false, ifPartitionNotExists = false)) } @@ -1209,6 +1242,22 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map.empty, + Nil, + Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), + overwrite = true, ifPartitionNotExists = false)) + } + } + + test("insert table: overwrite with column list") { + Seq( + "INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source", + "INSERT OVERWRITE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source" + ).foreach { sql => + parseCompare(sql, + InsertIntoStatement( + UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), + Map.empty, + Seq("a", "b"), Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), overwrite = true, ifPartitionNotExists = false)) } @@ -1224,6 +1273,22 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map("p1" -> Some("3"), "p2" -> None), + Nil, + Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), + overwrite = true, ifPartitionNotExists = false)) + } + + test("insert table: overwrite with partition and column list") { + parseCompare( + """ + |INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl + |PARTITION (p1 = 3, p2) (a, b) + |SELECT * FROM source + """.stripMargin, + InsertIntoStatement( + UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), + Map("p1" -> Some("3"), "p2" -> None), + Seq("a", "b"), Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), overwrite = true, ifPartitionNotExists = false)) } @@ -1238,6 +1303,7 @@ class DDLParserSuite extends AnalysisTest { InsertIntoStatement( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), Map("p1" -> Some("3")), + Nil, Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))), overwrite = true, ifPartitionNotExists = true)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 88afcb10d9c20..6fef18babedb6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -295,7 +295,7 @@ class PlanParserSuite extends AnalysisTest { partition: Map[String, Option[String]], overwrite: Boolean = false, ifPartitionNotExists: Boolean = false): LogicalPlan = - InsertIntoStatement(table("s"), partition, plan, overwrite, ifPartitionNotExists) + InsertIntoStatement(table("s"), partition, Nil, plan, overwrite, ifPartitionNotExists) // Single inserts assertEqual(s"insert overwrite table s $sql", @@ -713,7 +713,7 @@ class PlanParserSuite extends AnalysisTest { comparePlans( parsePlan( "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t"), - InsertIntoStatement(table("s"), Map.empty, + InsertIntoStatement(table("s"), Map.empty, Nil, UnresolvedHint("REPARTITION", Seq(Literal(100)), UnresolvedHint("COALESCE", Seq(Literal(500)), UnresolvedHint("COALESCE", Seq(Literal(10)), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index a8688bdf15495..c5f2a3d568e97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -536,6 +536,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { InsertIntoStatement( table = UnresolvedRelation(tableIdent), partitionSpec = Map.empty[String, Option[String]], + Nil, query = df.logicalPlan, overwrite = mode == SaveMode.Overwrite, ifPartitionNotExists = false) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 361d1fab03421..e4f001d61a767 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -156,7 +156,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport { CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name)) case InsertIntoStatement(l @ LogicalRelation(_: InsertableRelation, _, _, _), - parts, query, overwrite, false) if parts.isEmpty => + parts, _, query, overwrite, false) if parts.isEmpty => InsertIntoDataSourceCommand(l, query, overwrite) case InsertIntoDir(_, storage, provider, query, overwrite) @@ -168,7 +168,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport { InsertIntoDataSourceDirCommand(storage, provider.get, query, overwrite) case i @ InsertIntoStatement( - l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, query, overwrite, _) => + l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, _, query, overwrite, _) => // If the InsertIntoTable command is for a partitioned HadoopFsRelation and // the user has specified static partitions, we add a Project operator on top of the query // to include those constant column values in the query result. @@ -276,11 +276,11 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _, _) - if DDLUtils.isDatasourceTable(tableMeta) => + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false), + _, _, _, _, _) if DDLUtils.isDatasourceTable(tableMeta) => i.copy(table = readDataSourceTable(tableMeta, options)) - case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _) => + case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _, _) => i.copy(table = DDLUtils.readHiveTable(tableMeta)) case UnresolvedCatalogRelation(tableMeta, options, false) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala index 1149767bdade2..b5d06db024112 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala @@ -34,8 +34,8 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File */ class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ - InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) => + case i @ InsertIntoStatement( + d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _, _) => val v1FileFormat = table.fallbackFileFormat.newInstance() val relation = HadoopFsRelation( table.fileIndex, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 9e65b0ce13693..2cc78258378ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -434,7 +434,7 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] { } def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case i @ InsertIntoStatement(table, _, query, _, _) if table.resolved && query.resolved => + case i @ InsertIntoStatement(table, _, _, query, _, _) if table.resolved && query.resolved => table match { case relation: HiveTableRelation => val metadata = relation.tableMeta @@ -512,7 +512,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) { def apply(plan: LogicalPlan): Unit = { plan.foreach { - case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) => + case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, _, query, _, _) => // Get all input data source relations of the query. val srcRelations = query.collect { case LogicalRelation(src, _, _, _) => src @@ -534,7 +534,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) { case _ => failAnalysis(s"$relation does not allow insertion.") } - case InsertIntoStatement(t, _, _, _, _) + case InsertIntoStatement(t, _, _, _, _, _) if !t.isInstanceOf[LeafNode] || t.isInstanceOf[Range] || t.isInstanceOf[OneRowRelation] || diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala new file mode 100644 index 0000000000000..e454f0e6d540f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.SparkConf +import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +/** + * The base trait for DML - insert syntax + */ +trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { + + import testImplicits._ + + def format: String + + protected def createTable( + table: String, + cols: Seq[String], + colTypes: Seq[String], + partCols: Seq[String] = Nil): Unit = { + val values = cols.zip(colTypes).map(tuple => tuple._1 + " " + tuple._2).mkString("(", ", ", ")") + val partitionSpec = if (partCols.nonEmpty) { + partCols.mkString("PARTITIONED BY (", ",", ")") + } else "" + sql(s"CREATE TABLE $table$values USING $format $partitionSpec") + } + + protected def processInsert( + tableName: String, + input: DataFrame, + cols: Seq[String] = Nil, + partitionExprs: Seq[String] = Nil, + overwrite: Boolean): Unit = { + val tmpView = "tmp_view" + val columnList = if (cols.nonEmpty) cols.mkString("(", ",", ")") else "" + val partitionList = if (partitionExprs.nonEmpty) { + partitionExprs.mkString("PARTITION (", ",", ")") + } else "" + withTempView(tmpView) { + input.createOrReplaceTempView(tmpView) + val overwriteStr = if (overwrite) "OVERWRITE" else "INTO" + sql( + s"INSERT $overwriteStr TABLE $tableName $partitionList $columnList SELECT * FROM $tmpView") + } + } + + protected def verifyTable(tableName: String, expected: DataFrame): Unit = { + checkAnswer(spark.table(tableName), expected) + } + + test("insert with column list - follow table output order") { + withTable("t1") { + val df = Seq((1, 2L, "3")).toDF() + val cols = Seq("c1", "c2", "c3") + createTable("t1", cols, Seq("int", "long", "string")) + Seq(false, true).foreach { m => + processInsert("t1", df, cols, overwrite = m) + verifyTable("t1", df) + } + } + } + + test("insert with column list - follow table output order + partitioned table") { + val cols = Seq("c1", "c2", "c3", "c4") + val df = Seq((1, 2, 3, 4)).toDF(cols: _*) + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert("t1", df, cols, overwrite = m) + verifyTable("t1", df) + } + } + + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert( + "t1", df.selectExpr("c1", "c2"), cols.take(2), Seq("c3=3", "c4=4"), overwrite = m) + verifyTable("t1", df) + } + } + + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert("t1", df.selectExpr("c1", "c2", "c4"), + cols.filterNot(_ == "c3"), Seq("c3=3", "c4"), overwrite = m) + verifyTable("t1", df) + } + } + } + + test("insert with column list - table output reorder") { + withTable("t1") { + val cols = Seq("c1", "c2", "c3") + val df = Seq((1, 2, 3)).toDF(cols: _*) + createTable("t1", cols, Seq("int", "int", "int")) + Seq(false, true).foreach { m => + processInsert("t1", df, cols.reverse, overwrite = m) + verifyTable("t1", df.selectExpr(cols.reverse: _*)) + } + } + } + + test("insert with column list - table output reorder + partitioned table") { + val cols = Seq("c1", "c2", "c3", "c4") + val df = Seq((1, 2, 3, 4)).toDF(cols: _*) + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert("t1", df, cols.reverse, overwrite = m) + verifyTable("t1", df.selectExpr(cols.reverse: _*)) + } + } + + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert( + "t1", df.selectExpr("c1", "c2"), cols.take(2).reverse, Seq("c3=3", "c4=4"), overwrite = m) + verifyTable("t1", df.selectExpr("c2", "c1", "c3", "c4")) + } + } + + withTable("t1") { + createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2)) + Seq(false, true).foreach { m => + processInsert("t1", + df.selectExpr("c1", "c2", "c4"), Seq("c4", "c2", "c1"), Seq("c3=3", "c4"), overwrite = m) + verifyTable("t1", df.selectExpr("c4", "c2", "c3", "c1")) + } + } + } + + test("insert with column list - duplicated columns") { + withTable("t1") { + val cols = Seq("c1", "c2", "c3") + createTable("t1", cols, Seq("int", "long", "string")) + val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)")) + assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`;") + } + } + + test("insert with column list - invalid columns") { + withTable("t1") { + val cols = Seq("c1", "c2", "c3") + createTable("t1", cols, Seq("int", "long", "string")) + val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)")) + assert(e1.getMessage === "Cannot resolve column name c4;") + } + } + + test("insert with column list - mismatched column list size") { + val msg = "Cannot write to table due to mismatched user specified column size" + withTable("t1") { + val cols = Seq("c1", "c2", "c3") + createTable("t1", cols, Seq("int", "long", "string")) + val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2) values(1, 2, 3)")) + assert(e1.getMessage.contains(msg)) + val e2 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c3) values(1, 2)")) + assert(e2.getMessage.contains(msg)) + } + } + + test("insert with column list - mismatched target table out size after rewritten query") { + val v2Msg = "Cannot write to 'testcat.t1', not enough data columns:" + val cols = Seq("c1", "c2", "c3", "c4") + + withTable("t1") { + createTable("t1", cols, Seq.fill(4)("int")) + val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1) values(1)")) + assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 1") || + e1.getMessage.contains(v2Msg)) + } + + withTable("t1") { + createTable("t1", cols, Seq.fill(4)("int"), cols.takeRight(2)) + val e1 = intercept[AnalysisException] { + sql(s"INSERT INTO t1 partition(c3=3, c4=4) (c1) values(1)") + } + assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 3") || + e1.getMessage.contains(v2Msg)) + } + } +} + +class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession { + override def format: String = "parquet" + override protected def sparkConf: SparkConf = { + super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, format) + } +} + +class DSV2SQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession { + + override def format: String = "foo" + + protected override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName) + .set(SQLConf.DEFAULT_CATALOG.key, "testcat") + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 92c114e116d0c..9710fca6bc82c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1185,7 +1185,7 @@ class PlanResolutionSuite extends AnalysisTest { case Project(_, AsDataSourceV2Relation(r)) => assert(r.catalog.exists(_ == catlogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) - case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) => + case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _) => assert(r.catalog.exists(_ == catlogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case DescribeRelation(r: ResolvedTable, _, _) => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 3d8bba8b1b425..ff7dc58829fa1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -145,7 +145,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] { // handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its // children, hence not matched directly by previous HiveTableRelation case. - case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _) + case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _, _) if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty => i.copy(table = hiveTableWithStats(relation)) } @@ -159,7 +159,8 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] { */ object HiveAnalysis extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { - case InsertIntoStatement(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists) + case InsertIntoStatement( + r: HiveTableRelation, partSpec, _, query, overwrite, ifPartitionNotExists) if DDLUtils.isHiveTable(r.tableMeta) => InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite, ifPartitionNotExists, query.output.map(_.name)) @@ -207,11 +208,11 @@ case class RelationConversions( plan resolveOperators { // Write path case InsertIntoStatement( - r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists) + r: HiveTableRelation, partition, cols, query, overwrite, ifPartitionNotExists) if query.resolved && DDLUtils.isHiveTable(r.tableMeta) && (!r.isPartitioned || SQLConf.get.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE)) && isConvertible(r) => - InsertIntoStatement(metastoreCatalog.convert(r), partition, + InsertIntoStatement(metastoreCatalog.convert(r), partition, cols, query, overwrite, ifPartitionNotExists) // Read path diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala new file mode 100644 index 0000000000000..49b005bca938e --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.spark.sql.SQLInsertTestSuite +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class HiveSQLInsertTestSuite extends SQLInsertTestSuite with TestHiveSingleton { + override def format: String = "hive OPTIONS(fileFormat='parquet')" +} From 0fd9f57dd4cee32b4d0a16345f98e628a9d5f0fe Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 30 Nov 2020 05:37:10 +0000 Subject: [PATCH 0604/1009] [SPARK-33448][SQL] Support CACHE/UNCACHE TABLE commands for v2 tables ### What changes were proposed in this pull request? This PR proposes to support `CHACHE/UNCACHE TABLE` commands for v2 tables. In addtion, this PR proposes to migrate `CACHE/UNCACHE TABLE` to use `UnresolvedTableOrView` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? To support `CACHE/UNCACHE TABLE` commands for v2 tables. Note that `CACHE/UNCACHE TABLE` for v1 tables/views go through `SparkSession.table` to resolve identifier, which resolves temp views first, so there is no change in the behavior by moving to the new framework. ### Does this PR introduce _any_ user-facing change? Yes. Now the user can run `CACHE/UNCACHE TABLE` commands on v2 tables. ### How was this patch tested? Added/updated existing tests. Closes #30403 from imback82/cache_table. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 31 ------------- .../catalyst/plans/logical/statements.scala | 16 ------- .../sql/catalyst/parser/DDLParserSuite.scala | 27 ----------- .../analysis/ResolveSessionCatalog.scala | 19 +------- .../spark/sql/execution/SparkSqlParser.scala | 34 ++++++++++++++ .../spark/sql/execution/command/cache.scala | 43 +++++++++++------- .../apache/spark/sql/CachedTableSuite.scala | 11 +++++ .../sql/connector/DataSourceV2SQLSuite.scala | 40 ++++++++++------- .../sql/execution/SparkSqlParserSuite.scala | 45 ++++++++++++++++++- .../execution/metric/SQLMetricsSuite.scala | 2 +- .../HiveThriftServer2Suites.scala | 4 +- .../spark/sql/hive/CachedTableSuite.scala | 14 +++--- .../apache/spark/sql/hive/test/TestHive.scala | 2 +- 13 files changed, 152 insertions(+), 136 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e85a3eba85377..a31d7ca7268a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3590,37 +3590,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx.SERDE != null) } - /** - * Create a [[CacheTableStatement]]. - * - * For example: - * {{{ - * CACHE [LAZY] TABLE multi_part_name - * [OPTIONS tablePropertyList] [[AS] query] - * }}} - */ - override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - - val query = Option(ctx.query).map(plan) - val tableName = visitMultipartIdentifier(ctx.multipartIdentifier) - if (query.isDefined && tableName.length > 1) { - val catalogAndNamespace = tableName.init - throw new ParseException("It is not allowed to add catalog/namespace " + - s"prefix ${catalogAndNamespace.quoted} to " + - "the table name in CACHE TABLE AS SELECT", ctx) - } - val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) - CacheTableStatement(tableName, query, ctx.LAZY != null, options) - } - - /** - * Create an [[UncacheTableStatement]] logical plan. - */ - override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) { - UncacheTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier), ctx.EXISTS != null) - } - /** * Create a [[TruncateTable]] command. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index d5f739466a802..effb4cff75930 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -412,22 +412,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends */ case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement -/** - * A CACHE TABLE statement, as parsed from SQL - */ -case class CacheTableStatement( - tableName: Seq[String], - plan: Option[LogicalPlan], - isLazy: Boolean, - options: Map[String, String]) extends ParsedStatement - -/** - * An UNCACHE TABLE statement, as parsed from SQL - */ -case class UncacheTableStatement( - tableName: Seq[String], - ifExists: Boolean) extends ParsedStatement - /** * A TRUNCATE TABLE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 91b35bcac98ae..0f1b4a3ea918c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1984,33 +1984,6 @@ class DDLParserSuite extends AnalysisTest { asSerde = true)) } - test("CACHE TABLE") { - comparePlans( - parsePlan("CACHE TABLE a.b.c"), - CacheTableStatement(Seq("a", "b", "c"), None, false, Map.empty)) - - comparePlans( - parsePlan("CACHE LAZY TABLE a.b.c"), - CacheTableStatement(Seq("a", "b", "c"), None, true, Map.empty)) - - comparePlans( - parsePlan("CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')"), - CacheTableStatement(Seq("a", "b", "c"), None, true, Map("storageLevel" -> "DISK_ONLY"))) - - intercept("CACHE TABLE a.b.c AS SELECT * FROM testData", - "It is not allowed to add catalog/namespace prefix a.b") - } - - test("UNCACHE TABLE") { - comparePlans( - parsePlan("UNCACHE TABLE a.b.c"), - UncacheTableStatement(Seq("a", "b", "c"), ifExists = false)) - - comparePlans( - parsePlan("UNCACHE TABLE IF EXISTS a.b.c"), - UncacheTableStatement(Seq("a", "b", "c"), ifExists = true)) - } - test("TRUNCATE table") { comparePlans( parsePlan("TRUNCATE TABLE a.b.c"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index f49caf7f04a20..582f11a2be8fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -446,20 +446,6 @@ class ResolveSessionCatalog( ShowCreateTableCommand(ident.asTableIdentifier) } - case CacheTableStatement(tbl, plan, isLazy, options) => - val name = if (plan.isDefined) { - // CACHE TABLE ... AS SELECT creates a temp view with the input query. - // Temp view doesn't belong to any catalog and we shouldn't resolve catalog in the name. - tbl - } else { - parseTempViewOrV1Table(tbl, "CACHE TABLE") - } - CacheTableCommand(name.asTableIdentifier, plan, isLazy, options) - - case UncacheTableStatement(tbl, ifExists) => - val name = parseTempViewOrV1Table(tbl, "UNCACHE TABLE") - UncacheTableCommand(name.asTableIdentifier, ifExists) - case TruncateTable(ResolvedV1TableIdentifier(ident), partitionSpec) => TruncateTableCommand( ident.asTableIdentifier, @@ -561,12 +547,9 @@ class ResolveSessionCatalog( "SHOW VIEWS, only SessionCatalog supports this command.") } - case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey) => + case ShowTableProperties(ResolvedV1TableOrViewIdentifier(ident), propertyKey) => ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey) - case ShowTableProperties(r: ResolvedView, propertyKey) => - ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey) - case DescribeFunction(ResolvedFunc(identifier), extended) => DescribeFunctionCommand(identifier.asFunctionIdentifier, extended) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 568c7112954f5..c82e3818b48cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -192,6 +192,40 @@ class SparkSqlAstBuilder extends AstBuilder { unquotedPath } + /** + * Create a [[CacheTableCommand]]. + * + * For example: + * {{{ + * CACHE [LAZY] TABLE multi_part_name + * [OPTIONS tablePropertyList] [[AS] query] + * }}} + */ + override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + val query = Option(ctx.query).map(plan) + val tableName = visitMultipartIdentifier(ctx.multipartIdentifier) + if (query.isDefined && tableName.length > 1) { + val catalogAndNamespace = tableName.init + throw new ParseException("It is not allowed to add catalog/namespace " + + s"prefix ${catalogAndNamespace.quoted} to " + + "the table name in CACHE TABLE AS SELECT", ctx) + } + val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) + CacheTableCommand(tableName, query, ctx.LAZY != null, options) + } + + + /** + * Create an [[UncacheTableCommand]] logical plan. + */ + override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) { + UncacheTableCommand( + visitMultipartIdentifier(ctx.multipartIdentifier), + ctx.EXISTS != null) + } + /** * Create a [[ClearCacheCommand]] logical plan. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala index f99dc8d9f1a8e..3f0945d1e817b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala @@ -19,26 +19,27 @@ package org.apache.spark.sql.execution.command import java.util.Locale -import org.apache.spark.sql.{Dataset, Row, SparkSession} -import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper import org.apache.spark.storage.StorageLevel case class CacheTableCommand( - tableIdent: TableIdentifier, + multipartIdentifier: Seq[String], plan: Option[LogicalPlan], isLazy: Boolean, options: Map[String, String]) extends RunnableCommand { - require(plan.isEmpty || tableIdent.database.isEmpty, - "Database name is not allowed in CACHE TABLE AS SELECT") + require(plan.isEmpty || multipartIdentifier.length == 1, + "Namespace name is not allowed in CACHE TABLE AS SELECT") override def innerChildren: Seq[QueryPlan[_]] = plan.toSeq override def run(sparkSession: SparkSession): Seq[Row] = { + val tableName = multipartIdentifier.quoted plan.foreach { logicalPlan => - Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) + Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableName) } val storageLevelKey = "storagelevel" @@ -49,34 +50,46 @@ case class CacheTableCommand( logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}") } + val table = sparkSession.table(tableName) if (storageLevelValue.nonEmpty) { - sparkSession.catalog.cacheTable( - tableIdent.quotedString, StorageLevel.fromString(storageLevelValue.get)) + sparkSession.sharedState.cacheManager.cacheQuery( + table, + Some(tableName), + StorageLevel.fromString(storageLevelValue.get)) } else { - sparkSession.catalog.cacheTable(tableIdent.quotedString) + sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableName)) } if (!isLazy) { // Performs eager caching - sparkSession.table(tableIdent).count() + table.count() } Seq.empty[Row] } } - case class UncacheTableCommand( - tableIdent: TableIdentifier, + multipartIdentifier: Seq[String], ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { - val tableId = tableIdent.quotedString - if (!ifExists || sparkSession.catalog.tableExists(tableId)) { - sparkSession.catalog.uncacheTable(tableId) + val tableName = multipartIdentifier.quoted + table(sparkSession, tableName).foreach { table => + val cascade = !sparkSession.sessionState.catalog.isTempView(multipartIdentifier) + sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade) } Seq.empty[Row] } + + private def table(sparkSession: SparkSession, name: String): Option[DataFrame] = { + try { + Some(sparkSession.table(name)) + } catch { + case ex: AnalysisException if ifExists && ex.getMessage.contains("Table or view not found") => + None + } + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 6313370476c93..ef3f4daa6dc6b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -25,6 +25,7 @@ import org.apache.spark.executor.DataReadMethod._ import org.apache.spark.executor.DataReadMethod.DataReadMethod import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join, JoinStrategyHint, SHUFFLE_HASH} import org.apache.spark.sql.catalyst.util.DateTimeConstants @@ -140,6 +141,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } + test("cache table as select - existing temp view") { + withTempView("tempView") { + sql("CREATE TEMPORARY VIEW tempView as SELECT 1") + val e = intercept[TempTableAlreadyExistsException] { + sql("CACHE TABLE tempView AS SELECT 1") + } + assert(e.getMessage.contains("Temporary view 'tempView' already exists")) + } + } + test("uncaching temp table") { withTempView("tempTable1", "tempTable2") { testData.select("key").createOrReplaceTempView("tempTable1") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 98580568a8df6..ffbc2287d81ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership +import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG_IMPLEMENTATION} import org.apache.spark.sql.internal.connector.SimpleTableProvider @@ -2018,28 +2019,29 @@ class DataSourceV2SQLSuite } } - test("CACHE TABLE") { + test("CACHE/UNCACHE TABLE") { val t = "testcat.ns1.ns2.tbl" withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") + def isCached(table: String): Boolean = { + spark.table(table).queryExecution.withCachedData.isInstanceOf[InMemoryRelation] + } - testV1CommandSupportingTempView("CACHE TABLE", t) + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") + sql(s"CACHE TABLE $t") + assert(isCached(t)) - val e = intercept[AnalysisException] { - sql(s"CACHE LAZY TABLE $t") - } - assert(e.message.contains("CACHE TABLE is only supported with temp views or v1 tables")) + sql(s"UNCACHE TABLE $t") + assert(!isCached(t)) } - } - test("UNCACHE TABLE") { - val t = "testcat.ns1.ns2.tbl" - withTable(t) { - sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - - testV1CommandSupportingTempView("UNCACHE TABLE", t) - testV1CommandSupportingTempView("UNCACHE TABLE", s"IF EXISTS $t") + // Test a scenario where a table does not exist. + val e = intercept[AnalysisException] { + sql(s"UNCACHE TABLE $t") } + assert(e.message.contains("Table or view not found: testcat.ns1.ns2.tbl")) + + // If "IF EXISTS" is set, UNCACHE TABLE will not throw an exception. + sql(s"UNCACHE TABLE IF EXISTS $t") } test("SHOW COLUMNS") { @@ -2555,11 +2557,15 @@ class DataSourceV2SQLSuite } } - private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { + private def testNotSupportedV2Command( + sqlCommand: String, + sqlParams: String, + sqlCommandInMessage: Option[String] = None): Unit = { val e = intercept[AnalysisException] { sql(s"$sqlCommand $sqlParams") } - assert(e.message.contains(s"$sqlCommand is not supported for v2 tables")) + val cmdStr = sqlCommandInMessage.getOrElse(sqlCommand) + assert(e.message.contains(s"$cmdStr is not supported for v2 tables")) } private def testV1Command(sqlCommand: String, sqlParams: String): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 61c16baedb7cc..1a826c00c81f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -337,5 +337,48 @@ class SparkSqlParserSuite extends AnalysisTest { |FROM v """.stripMargin, "LINES TERMINATED BY only supports newline '\\n' right now") - } + } + + test("CACHE TABLE") { + assertEqual( + "CACHE TABLE a.b.c", + CacheTableCommand(Seq("a", "b", "c"), None, false, Map.empty)) + + assertEqual( + "CACHE TABLE t AS SELECT * FROM testData", + CacheTableCommand( + Seq("t"), + Some(Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData")))), + false, + Map.empty)) + + assertEqual( + "CACHE LAZY TABLE a.b.c", + CacheTableCommand(Seq("a", "b", "c"), None, true, Map.empty)) + + assertEqual( + "CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')", + CacheTableCommand( + Seq("a", "b", "c"), + None, + true, + Map("storageLevel" -> "DISK_ONLY"))) + + intercept("CACHE TABLE a.b.c AS SELECT * FROM testData", + "It is not allowed to add catalog/namespace prefix a.b") + } + + test("UNCACHE TABLE") { + assertEqual( + "UNCACHE TABLE a.b.c", + UncacheTableCommand(Seq("a", "b", "c"), ifExists = false)) + + assertEqual( + "UNCACHE TABLE IF EXISTS a.b.c", + UncacheTableCommand(Seq("a", "b", "c"), ifExists = true)) + } + + test("CLEAR CACHE") { + assertEqual("CLEAR CACHE", ClearCacheCommand) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index 4872906dbfec3..b4f921efcac81 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -705,7 +705,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils sql("CREATE TEMPORARY VIEW inMemoryTable AS SELECT 1 AS c1") sql("CACHE TABLE inMemoryTable") testSparkPlanMetrics(spark.table("inMemoryTable"), 1, - Map(1L -> (("Scan In-memory table `inMemoryTable`", Map.empty))) + Map(1L -> (("Scan In-memory table inMemoryTable", Map.empty))) ) sql("CREATE TEMPORARY VIEW ```a``b``` AS SELECT 2 AS c1") diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 7cc60bb505089..5bf7892478082 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -305,7 +305,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { val plan = statement.executeQuery("explain select * from test_table") plan.next() plan.next() - assert(plan.getString(1).contains("Scan In-memory table `test_table`")) + assert(plan.getString(1).contains("Scan In-memory table test_table")) val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC") val buf1 = new collection.mutable.ArrayBuffer[Int]() @@ -391,7 +391,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { val plan = statement.executeQuery("explain select key from test_map ORDER BY key DESC") plan.next() plan.next() - assert(plan.getString(1).contains("Scan In-memory table `test_table`")) + assert(plan.getString(1).contains("Scan In-memory table test_table")) val rs = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC") val buf = new collection.mutable.ArrayBuffer[Int]() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index fc793534641df..81c3f271b18d4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -113,7 +113,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto e = intercept[AnalysisException] { sql("UNCACHE TABLE nonexistentTable") }.getMessage - assert(e.contains(s"$expectedErrorMsg default.nonexistentTable")) + assert(e.contains(s"$expectedErrorMsg nonexistentTable")) sql("UNCACHE TABLE IF EXISTS nonexistentTable") } @@ -364,14 +364,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto // Cache the table 'cachedTable' in temp db with qualified table name, // and then check whether the table is cached with expected name sql(s"CACHE TABLE $db.cachedTable OPTIONS('storageLevel' 'MEMORY_ONLY')") - assertCached(sql(s"SELECT * FROM $db.cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY) + assertCached(sql(s"SELECT * FROM $db.cachedTable"), s"$db.cachedTable", MEMORY_ONLY) assert(spark.catalog.isCached(s"$db.cachedTable"), s"Table '$db.cachedTable' should be cached.") // Refresh the table 'cachedTable' in temp db with qualified table name, and then check // whether the table is still cached with the same name and storage level. sql(s"REFRESH TABLE $db.cachedTable") - assertCached(sql(s"select * from $db.cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY) + assertCached(sql(s"select * from $db.cachedTable"), s"$db.cachedTable", MEMORY_ONLY) assert(spark.catalog.isCached(s"$db.cachedTable"), s"Table '$db.cachedTable' should be cached after refreshing with its qualified name.") @@ -382,7 +382,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto // 'cachedTable', instead of '$db.cachedTable' activateDatabase(db) { sql("REFRESH TABLE cachedTable") - assertCached(sql("SELECT * FROM cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY) + assertCached(sql("SELECT * FROM cachedTable"), s"$db.cachedTable", MEMORY_ONLY) assert(spark.catalog.isCached("cachedTable"), s"Table '$db.cachedTable' should be cached after refreshing with its " + "unqualified name.") @@ -403,13 +403,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto // Cache the table 'cachedTable' in default db without qualified table name , and then // check whether the table is cached with expected name. sql("CACHE TABLE cachedTable OPTIONS('storageLevel' 'DISK_ONLY')") - assertCached(sql("SELECT * FROM cachedTable"), "`default`.`cachedTable`", DISK_ONLY) + assertCached(sql("SELECT * FROM cachedTable"), "cachedTable", DISK_ONLY) assert(spark.catalog.isCached("cachedTable"), "Table 'cachedTable' should be cached.") // Refresh the table 'cachedTable' in default db with unqualified table name, and then // check whether the table is still cached with the same name. sql("REFRESH TABLE cachedTable") - assertCached(sql("SELECT * FROM cachedTable"), "`default`.`cachedTable`", DISK_ONLY) + assertCached(sql("SELECT * FROM cachedTable"), "cachedTable", DISK_ONLY) assert(spark.catalog.isCached("cachedTable"), "Table 'cachedTable' should be cached after refreshing with its unqualified name.") @@ -421,7 +421,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto activateDatabase(db) { sql("REFRESH TABLE default.cachedTable") assertCached( - sql("SELECT * FROM default.cachedTable"), "`default`.`cachedTable`", DISK_ONLY) + sql("SELECT * FROM default.cachedTable"), "cachedTable", DISK_ONLY) assert(spark.catalog.isCached("default.cachedTable"), "Table 'cachedTable' should be cached after refreshing with its qualified name.") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index f7c13ea047da7..a25c61c96f3d8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -596,7 +596,7 @@ private[hive] class TestHiveQueryExecution( override lazy val analyzed: LogicalPlan = sparkSession.withActive { val describedTables = logical match { - case CacheTableCommand(tbl, _, _, _) => tbl :: Nil + case CacheTableCommand(tbl, _, _, _) => tbl.asTableIdentifier :: Nil case _ => Nil } From 225c2e2815988ebf3e0926a4ca2af9a933b48467 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Mon, 30 Nov 2020 15:36:26 +0900 Subject: [PATCH 0605/1009] [SPARK-33498][SQL][FOLLOW-UP] Deduplicate the unittest by using checkCastWithParseError ### What changes were proposed in this pull request? Dup code removed in SPARK-33498 as follow-up. ### Why are the changes needed? Nit. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Existing UT. Closes #30540 from leanken/leanken-SPARK-33498. Authored-by: xuewei.linxuewei Signed-off-by: HyukjinKwon --- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 0900a303b4cbe..d284c417042c1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -971,11 +971,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkCastWithParseError("20150318") checkCastWithParseError("2015-031-8") checkCastWithParseError("2015-03-18T12:03:17-0:70") - - val input = "abdef" - checkExceptionInExpression[DateTimeException]( - cast(input, TimestampType, Option(zid.getId)), - s"Cannot cast $input to TimestampType.") + checkCastWithParseError("abdef") } } } From b665d5881915f042930f502bcc3c6ee3cb00c50d Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 30 Nov 2020 17:04:38 +0900 Subject: [PATCH 0606/1009] [SPARK-28646][SQL] Fix bug of Count so as consistent with mainstream databases ### What changes were proposed in this pull request? Currently, Spark allows calls to `count` even for non parameterless aggregate function. For example, the following query actually works: `SELECT count() FROM tenk1;` On the other hand, mainstream databases will throw an error. **Oracle** `> ORA-00909: invalid number of arguments` **PgSQL** `ERROR: count(*) must be used to call a parameterless aggregate function` **MySQL** `> 1064 - You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')` ### Why are the changes needed? Fix a bug so that consistent with mainstream databases. There is an example query output with/without this fix. `SELECT count() FROM testData;` The output before this fix: `0` The output after this fix: ``` org.apache.spark.sql.AnalysisException cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7 ``` ### Does this PR introduce _any_ user-facing change? Yes. If not specify parameter for `count`, will throw an error. ### How was this patch tested? Jenkins test. Closes #30541 from beliefer/SPARK-28646. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: HyukjinKwon --- .../sql/catalyst/expressions/aggregate/Count.scala | 10 ++++++++++ .../src/test/resources/sql-tests/inputs/count.sql | 3 +++ .../test/resources/sql-tests/results/count.sql.out | 13 +++++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index e043c81975066..e4488b26f197e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ @@ -43,11 +44,20 @@ import org.apache.spark.sql.types._ since = "1.0.0") // scalastyle:on line.size.limit case class Count(children: Seq[Expression]) extends DeclarativeAggregate { + override def nullable: Boolean = false // Return data type. override def dataType: DataType = LongType + override def checkInputDataTypes(): TypeCheckResult = { + if (children.isEmpty) { + TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one argument.") + } else { + TypeCheckResult.TypeCheckSuccess + } + } + protected lazy val count = AttributeReference("count", LongType, nullable = false)() override lazy val aggBufferAttributes = count :: Nil diff --git a/sql/core/src/test/resources/sql-tests/inputs/count.sql b/sql/core/src/test/resources/sql-tests/inputs/count.sql index 203f04c589373..fc0d66258ea29 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/count.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/count.sql @@ -35,3 +35,6 @@ SELECT count(DISTINCT a), count(DISTINCT 3,2) FROM testData; SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData; SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData; SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData; + +-- count without expressions +SELECT count() FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out index c0cdd0d697538..64614b5b67784 100644 --- a/sql/core/src/test/resources/sql-tests/results/count.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 13 +-- Number of queries: 14 -- !query @@ -116,4 +116,13 @@ SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData -- !query schema struct -- !query output -1 2 \ No newline at end of file +1 2 + + +-- !query +SELECT count() FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7 \ No newline at end of file From 5cfbdddefe0753c3aff03f326b31c0ba8882b3a9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 30 Nov 2020 09:23:05 +0000 Subject: [PATCH 0607/1009] [SPARK-33480][SQL] Support char/varchar type ### What changes were proposed in this pull request? This PR adds the char/varchar type which is kind of a variant of string type: 1. Char type is fixed-length string. When comparing char type values, we need to pad the shorter one to the longer length. 2. Varchar type is string with a length limitation. To implement the char/varchar semantic, this PR: 1. Do string length check when writing to char/varchar type columns. 2. Do string padding when reading char type columns. We don't do it at the writing side to save storage space. 3. Do string padding when comparing char type column with string literal or another char type column. (string literal is fixed length so should be treated as char type as well) To simplify the implementation, this PR doesn't propagate char/varchar type info through functions/operators(e.g. `substring`). That said, a column can only be char/varchar type if it's a table column, not a derived column like `SELECT substring(col)`. To be safe, this PR doesn't add char/varchar type to the query engine(expression input check, internal row framework, codegen framework, etc.). We will replace char/varchar type by string type with metadata (`Attribute.metadata` or `StructField.metadata`) that includes the original type string before it goes into the query engine. That said, the existing code will not see char/varchar type but only string type. char/varchar type may come from several places: 1. v1 table from hive catalog. 2. v2 table from v2 catalog. 3. user-specified schema in `spark.read.schema` and `spark.readStream.schema` 4. `Column.cast` 5. schema string in places like `from_json`, pandas UDF, etc. These places use SQL parser which replaces char/varchar with string already, even before this PR. This PR covers all the above cases, implements the length check and padding feature by looking at string type with special metadata. ### Why are the changes needed? char and varchar are standard SQL types. varchar is widely used in other databases instead of string type. ### Does this PR introduce _any_ user-facing change? For hive tables: now the table insertion fails if the value exceeds char/varchar length. Previously we truncate the value silently. For other tables: 1. now char type is allowed. 2. now we have length check when inserting to varchar columns. Previously we write the value as it is. ### How was this patch tested? new tests Closes #30412 from cloud-fan/char. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- docs/sql-ref-datatypes.md | 2 + .../sql/catalyst/analysis/Analyzer.scala | 9 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 6 +- .../catalyst/analysis/ResolveCatalogs.scala | 5 - .../analysis/ResolvePartitionSpec.scala | 4 +- .../analysis/TableOutputResolver.scala | 19 +- .../sql/catalyst/catalog/SessionCatalog.scala | 7 +- .../sql/catalyst/parser/AstBuilder.scala | 17 +- .../catalyst/plans/logical/v2Commands.scala | 4 +- .../sql/catalyst/util/CharVarcharUtils.scala | 276 ++++++++++ .../sql/connector/catalog/CatalogV2Util.scala | 18 +- .../datasources/v2/DataSourceV2Relation.scala | 8 +- .../org/apache/spark/sql/types/CharType.scala | 38 ++ .../org/apache/spark/sql/types/DataType.scala | 10 +- .../spark/sql/types/HiveStringType.scala | 81 --- .../apache/spark/sql/types/VarcharType.scala | 37 ++ .../org/apache/spark/sql/types/package.scala | 10 +- .../sql/catalyst/analysis/AnalysisSuite.scala | 18 +- .../parser/TableSchemaParserSuite.scala | 15 +- .../spark/sql/connector/InMemoryTable.scala | 15 +- .../catalog/CatalogV2UtilSuite.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 6 +- .../apache/spark/sql/DataFrameReader.scala | 4 +- .../analysis/ResolveSessionCatalog.scala | 37 +- .../datasources/ApplyCharTypePadding.scala | 135 +++++ .../datasources/LogicalRelation.scala | 18 +- .../datasources/jdbc/JdbcUtils.scala | 19 +- .../datasources/v2/PushDownUtils.scala | 4 +- .../internal/BaseSessionStateBuilder.scala | 1 + .../sql/streaming/DataStreamReader.scala | 4 +- .../spark/sql/CharVarcharTestSuite.scala | 505 ++++++++++++++++++ .../command/PlanResolutionSuite.scala | 44 +- .../spark/sql/sources/TableScanSuite.scala | 14 +- .../sql/hive/HiveSessionStateBuilder.scala | 1 + .../sql/hive/client/HiveClientImpl.scala | 19 +- .../spark/sql/HiveCharVarcharTestSuite.scala | 43 ++ .../sql/hive/HiveMetastoreCatalogSuite.scala | 15 +- .../sql/hive/execution/HiveDDLSuite.scala | 4 +- 38 files changed, 1172 insertions(+), 302 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md index f27f1a0ca967f..0087867a8c7f7 100644 --- a/docs/sql-ref-datatypes.md +++ b/docs/sql-ref-datatypes.md @@ -37,6 +37,8 @@ Spark SQL and DataFrames support the following data types: - `DecimalType`: Represents arbitrary-precision signed decimal numbers. Backed internally by `java.math.BigDecimal`. A `BigDecimal` consists of an arbitrary precision integer unscaled value and a 32-bit integer scale. * String type - `StringType`: Represents character string values. + - `VarcharType(length)`: A variant of `StringType` which has a length limitation. Data writing will fail if the input string exceeds the length limitation. Note: this type can only be used in table schema, not functions/operators. + - `CharType(length)`: A variant of `VarcharType(length)` which is fixed length. Reading column of type `CharType(n)` always returns string values of length `n`. Char type column comparison will pad the short one to the longer length. * Binary type - `BinaryType`: Represents byte sequence values. * Boolean type diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9b599b4c8f8d4..23a1b7bdde93c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.catalyst.trees.TreeNodeRef -import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils} import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} @@ -3102,7 +3102,12 @@ class Analyzer(override val catalogManager: CatalogManager) val projection = TableOutputResolver.resolveOutputColumns( v2Write.table.name, v2Write.table.output, v2Write.query, v2Write.isByName, conf) if (projection != v2Write.query) { - v2Write.withNewQuery(projection) + val cleanedTable = v2Write.table match { + case r: DataSourceV2Relation => + r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata)) + case other => other + } + v2Write.withNewQuery(projection).withNewTable(cleanedTable) } else { v2Write } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 7f89c130749f4..2818ba58075cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table} import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} import org.apache.spark.sql.internal.SQLConf @@ -94,6 +94,10 @@ trait CheckAnalysis extends PredicateHelper { case p if p.analyzed => // Skip already analyzed sub-plans + case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) => + throw new IllegalStateException( + "[BUG] logical plan should not have output of char/varchar type: " + leaf) + case u: UnresolvedNamespace => u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 7354d2478b7c8..a90de697bc084 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -35,7 +35,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case AlterTableAddColumnsStatement( nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) => cols.foreach(c => failNullType(c.dataType)) - cols.foreach(c => failCharType(c.dataType)) val changes = cols.map { col => TableChange.addColumn( col.name.toArray, @@ -49,7 +48,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case AlterTableReplaceColumnsStatement( nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) => cols.foreach(c => failNullType(c.dataType)) - cols.foreach(c => failCharType(c.dataType)) val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match { case Some(table) => // REPLACE COLUMNS deletes all the existing columns and adds new columns specified. @@ -72,7 +70,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case a @ AlterTableAlterColumnStatement( nameParts @ NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _) => a.dataType.foreach(failNullType) - a.dataType.foreach(failCharType) val colName = a.column.toArray val typeChange = a.dataType.map { newDataType => TableChange.updateColumnType(colName, newDataType) @@ -145,7 +142,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case c @ CreateTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) - assertNoCharTypeInSchema(c.tableSchema) CreateV2Table( catalog.asTableCatalog, tbl.asIdentifier, @@ -173,7 +169,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case c @ ReplaceTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) - assertNoCharTypeInSchema(c.tableSchema) ReplaceTable( catalog.asTableCatalog, tbl.asIdentifier, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 6d061fce06919..98c6872a47cc6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement import org.apache.spark.sql.types._ import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec @@ -66,7 +67,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { val partValues = partSchema.map { part => val raw = normalizedSpec.get(part.name).orNull - Cast(Literal.create(raw, StringType), part.dataType, Some(conf.sessionLocalTimeZone)).eval() + val dt = CharVarcharUtils.replaceCharVarcharWithString(part.dataType) + Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval() } InternalRow.fromSeq(partValues) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala index 4f33ca99c02db..d5c407b47c5be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala @@ -22,6 +22,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.sql.types.DataType @@ -93,19 +94,17 @@ object TableOutputResolver { tableAttr.metadata == queryExpr.metadata) { Some(queryExpr) } else { - // Renaming is needed for handling the following cases like - // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2 - // 2) Target tables have column metadata - storeAssignmentPolicy match { + val casted = storeAssignmentPolicy match { case StoreAssignmentPolicy.ANSI => - Some(Alias( - AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)), - tableAttr.name)(explicitMetadata = Option(tableAttr.metadata))) + AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)) case _ => - Some(Alias( - Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)), - tableAttr.name)(explicitMetadata = Option(tableAttr.metadata))) + Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)) } + val exprWithStrLenCheck = CharVarcharUtils.stringLengthCheck(casted, tableAttr) + // Renaming is needed for handling the following cases like + // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2 + // 2) Target tables have column metadata + Some(Alias(exprWithStrLenCheck, tableAttr.name)(explicitMetadata = Some(tableAttr.metadata))) } storeAssignmentPolicy match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 5122ca7521d9a..01bce079610ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo, ImplicitCastInputTypes} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View} -import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE @@ -473,7 +473,10 @@ class SessionCatalog( val table = formatTableName(name.table) requireDbExists(db) requireTableExists(TableIdentifier(table, Some(db))) - externalCatalog.getTable(db, table) + val t = externalCatalog.getTable(db, table) + // We replace char/varchar with "annotated" string type in the table schema, as the query + // engine doesn't support char/varchar yet. + t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema)) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a31d7ca7268a6..ce95ea4b41def 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -36,8 +36,8 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp} -import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition @@ -99,7 +99,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = { - withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList))) + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema( + StructType(visitColTypeList(ctx.colTypeList))) + withOrigin(ctx)(schema) } def parseRawDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { @@ -2226,7 +2228,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * Create a Spark DataType. */ private def visitSparkDataType(ctx: DataTypeContext): DataType = { - HiveStringType.replaceCharType(typedVisit(ctx)) + CharVarcharUtils.replaceCharVarcharWithString(typedVisit(ctx)) } /** @@ -2301,16 +2303,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg builder.putString("comment", _) } - // Add Hive type string to metadata. - val rawDataType = typedVisit[DataType](ctx.dataType) - val cleanedDataType = HiveStringType.replaceCharType(rawDataType) - if (rawDataType != cleanedDataType) { - builder.putString(HIVE_TYPE_STRING, rawDataType.catalogString) - } - StructField( name = colName.getText, - dataType = cleanedDataType, + dataType = typedVisit[DataType](ctx.dataType), nullable = NULL == null, metadata = builder.build()) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index ebf41f6a6e304..4931f0eb2c007 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.{NamedRelation, PartitionSpec, Res import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange} import org.apache.spark.sql.connector.expressions.Transform @@ -45,9 +46,10 @@ trait V2WriteCommand extends Command { table.skipSchemaResolution || (query.output.size == table.output.size && query.output.zip(table.output).forall { case (inAttr, outAttr) => + val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType) // names and types must match, nullability must be compatible inAttr.name == outAttr.name && - DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) && + DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) && (outAttr.nullable || !inAttr.nullable) }) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala new file mode 100644 index 0000000000000..0cbe5abdbbd7a --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.types._ + +object CharVarcharUtils { + + private val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING" + + /** + * Replaces CharType/VarcharType with StringType recursively in the given struct type. If a + * top-level StructField's data type is CharType/VarcharType or has nested CharType/VarcharType, + * this method will add the original type string to the StructField's metadata, so that we can + * re-construct the original data type with CharType/VarcharType later when needed. + */ + def replaceCharVarcharWithStringInSchema(st: StructType): StructType = { + StructType(st.map { field => + if (hasCharVarchar(field.dataType)) { + val metadata = new MetadataBuilder().withMetadata(field.metadata) + .putString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY, field.dataType.sql).build() + field.copy(dataType = replaceCharVarcharWithString(field.dataType), metadata = metadata) + } else { + field + } + }) + } + + /** + * Returns true if the given data type is CharType/VarcharType or has nested CharType/VarcharType. + */ + def hasCharVarchar(dt: DataType): Boolean = { + dt.existsRecursively(f => f.isInstanceOf[CharType] || f.isInstanceOf[VarcharType]) + } + + /** + * Replaces CharType/VarcharType with StringType recursively in the given data type. + */ + def replaceCharVarcharWithString(dt: DataType): DataType = dt match { + case ArrayType(et, nullable) => + ArrayType(replaceCharVarcharWithString(et), nullable) + case MapType(kt, vt, nullable) => + MapType(replaceCharVarcharWithString(kt), replaceCharVarcharWithString(vt), nullable) + case StructType(fields) => + StructType(fields.map { field => + field.copy(dataType = replaceCharVarcharWithString(field.dataType)) + }) + case _: CharType => StringType + case _: VarcharType => StringType + case _ => dt + } + + /** + * Removes the metadata entry that contains the original type string of CharType/VarcharType from + * the given attribute's metadata. + */ + def cleanAttrMetadata(attr: AttributeReference): AttributeReference = { + val cleaned = new MetadataBuilder().withMetadata(attr.metadata) + .remove(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY).build() + attr.withMetadata(cleaned) + } + + /** + * Re-construct the original data type from the type string in the given metadata. + * This is needed when dealing with char/varchar columns/fields. + */ + def getRawType(metadata: Metadata): Option[DataType] = { + if (metadata.contains(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) { + Some(CatalystSqlParser.parseRawDataType( + metadata.getString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY))) + } else { + None + } + } + + /** + * Returns expressions to apply read-side char type padding for the given attributes. String + * values should be right-padded to N characters if it's from a CHAR(N) column/field. + */ + def charTypePadding(output: Seq[AttributeReference]): Seq[NamedExpression] = { + output.map { attr => + getRawType(attr.metadata).filter { rawType => + rawType.existsRecursively(_.isInstanceOf[CharType]) + }.map { rawType => + Alias(charTypePadding(attr, rawType), attr.name)(explicitMetadata = Some(attr.metadata)) + }.getOrElse(attr) + } + } + + private def charTypePadding(expr: Expression, dt: DataType): Expression = dt match { + case CharType(length) => StringRPad(expr, Literal(length)) + + case StructType(fields) => + val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) => + Seq(Literal(f.name), charTypePadding(GetStructField(expr, i, Some(f.name)), f.dataType)) + }) + if (expr.nullable) { + If(IsNull(expr), Literal(null, struct.dataType), struct) + } else { + struct + } + + case ArrayType(et, containsNull) => charTypePaddingInArray(expr, et, containsNull) + + case MapType(kt, vt, valueContainsNull) => + val newKeys = charTypePaddingInArray(MapKeys(expr), kt, containsNull = false) + val newValues = charTypePaddingInArray(MapValues(expr), vt, valueContainsNull) + MapFromArrays(newKeys, newValues) + + case _ => expr + } + + private def charTypePaddingInArray( + arr: Expression, et: DataType, containsNull: Boolean): Expression = { + val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull) + val func = LambdaFunction(charTypePadding(param, et), Seq(param)) + ArrayTransform(arr, func) + } + + /** + * Returns an expression to apply write-side string length check for the given expression. A + * string value can not exceed N characters if it's written into a CHAR(N)/VARCHAR(N) + * column/field. + */ + def stringLengthCheck(expr: Expression, targetAttr: Attribute): Expression = { + getRawType(targetAttr.metadata).map { rawType => + stringLengthCheck(expr, rawType) + }.getOrElse(expr) + } + + private def raiseError(expr: Expression, typeName: String, length: Int): Expression = { + val errorMsg = Concat(Seq( + Literal("input string '"), + expr, + Literal(s"' exceeds $typeName type length limitation: $length"))) + Cast(RaiseError(errorMsg), StringType) + } + + private def stringLengthCheck(expr: Expression, dt: DataType): Expression = dt match { + case CharType(length) => + val trimmed = StringTrimRight(expr) + // Trailing spaces do not count in the length check. We don't need to retain the trailing + // spaces, as we will pad char type columns/fields at read time. + If( + GreaterThan(Length(trimmed), Literal(length)), + raiseError(expr, "char", length), + trimmed) + + case VarcharType(length) => + val trimmed = StringTrimRight(expr) + // Trailing spaces do not count in the length check. We need to retain the trailing spaces + // (truncate to length N), as there is no read-time padding for varchar type. + // TODO: create a special TrimRight function that can trim to a certain length. + If( + LessThanOrEqual(Length(expr), Literal(length)), + expr, + If( + GreaterThan(Length(trimmed), Literal(length)), + raiseError(expr, "varchar", length), + StringRPad(trimmed, Literal(length)))) + + case StructType(fields) => + val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) => + Seq(Literal(f.name), stringLengthCheck(GetStructField(expr, i, Some(f.name)), f.dataType)) + }) + if (expr.nullable) { + If(IsNull(expr), Literal(null, struct.dataType), struct) + } else { + struct + } + + case ArrayType(et, containsNull) => stringLengthCheckInArray(expr, et, containsNull) + + case MapType(kt, vt, valueContainsNull) => + val newKeys = stringLengthCheckInArray(MapKeys(expr), kt, containsNull = false) + val newValues = stringLengthCheckInArray(MapValues(expr), vt, valueContainsNull) + MapFromArrays(newKeys, newValues) + + case _ => expr + } + + private def stringLengthCheckInArray( + arr: Expression, et: DataType, containsNull: Boolean): Expression = { + val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull) + val func = LambdaFunction(stringLengthCheck(param, et), Seq(param)) + ArrayTransform(arr, func) + } + + /** + * Return expressions to apply char type padding for the string comparison between the given + * attributes. When comparing two char type columns/fields, we need to pad the shorter one to + * the longer length. + */ + def addPaddingInStringComparison(attrs: Seq[Attribute]): Seq[Expression] = { + val rawTypes = attrs.map(attr => getRawType(attr.metadata)) + if (rawTypes.exists(_.isEmpty)) { + attrs + } else { + val typeWithTargetCharLength = rawTypes.map(_.get).reduce(typeWithWiderCharLength) + attrs.zip(rawTypes.map(_.get)).map { case (attr, rawType) => + padCharToTargetLength(attr, rawType, typeWithTargetCharLength).getOrElse(attr) + } + } + } + + private def typeWithWiderCharLength(type1: DataType, type2: DataType): DataType = { + (type1, type2) match { + case (CharType(len1), CharType(len2)) => + CharType(math.max(len1, len2)) + case (StructType(fields1), StructType(fields2)) => + assert(fields1.length == fields2.length) + StructType(fields1.zip(fields2).map { case (left, right) => + StructField("", typeWithWiderCharLength(left.dataType, right.dataType)) + }) + case (ArrayType(et1, _), ArrayType(et2, _)) => + ArrayType(typeWithWiderCharLength(et1, et2)) + case _ => NullType + } + } + + private def padCharToTargetLength( + expr: Expression, + rawType: DataType, + typeWithTargetCharLength: DataType): Option[Expression] = { + (rawType, typeWithTargetCharLength) match { + case (CharType(len), CharType(target)) if target > len => + Some(StringRPad(expr, Literal(target))) + + case (StructType(fields), StructType(targets)) => + assert(fields.length == targets.length) + var i = 0 + var needPadding = false + val createStructExprs = mutable.ArrayBuffer.empty[Expression] + while (i < fields.length) { + val field = fields(i) + val fieldExpr = GetStructField(expr, i, Some(field.name)) + val padded = padCharToTargetLength(fieldExpr, field.dataType, targets(i).dataType) + needPadding = padded.isDefined + createStructExprs += Literal(field.name) + createStructExprs += padded.getOrElse(fieldExpr) + i += 1 + } + if (needPadding) Some(CreateNamedStruct(createStructExprs.toSeq)) else None + + case (ArrayType(et, containsNull), ArrayType(target, _)) => + val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull) + padCharToTargetLength(param, et, target).map { padded => + val func = LambdaFunction(padded, Seq(param)) + ArrayTransform(expr, func) + } + + // We don't handle MapType here as it's not comparable. + + case _ => None + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index b6dc4f61c8588..02db2293ec64a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -24,11 +24,10 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation} -import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelectStatement, CreateTableStatement, ReplaceTableAsSelectStatement, ReplaceTableStatement, SerdeInfo} import org.apache.spark.sql.connector.catalog.TableChange._ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation -import org.apache.spark.sql.types.{ArrayType, DataType, HIVE_TYPE_STRING, HiveStringType, MapType, NullType, StructField, StructType} +import org.apache.spark.sql.types.{ArrayType, DataType, MapType, NullType, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.Utils @@ -379,21 +378,6 @@ private[sql] object CatalogV2Util { .asTableCatalog } - def failCharType(dt: DataType): Unit = { - if (HiveStringType.containsCharType(dt)) { - throw new AnalysisException( - "Cannot use CHAR type in non-Hive-Serde tables, please use STRING type instead.") - } - } - - def assertNoCharTypeInSchema(schema: StructType): Unit = { - schema.foreach { f => - if (f.metadata.contains(HIVE_TYPE_STRING)) { - failCharType(CatalystSqlParser.parseRawDataType(f.metadata.getString(HIVE_TYPE_STRING))) - } - } - } - def failNullType(dt: DataType): Unit = { def containsNullType(dt: DataType): Boolean = dt match { case ArrayType(et, _) => containsNullType(et) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala index f541411daeff4..4debdd380e6b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} -import org.apache.spark.sql.catalyst.util.truncatedString +import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils} import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability} import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics} import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream} @@ -171,8 +171,10 @@ object DataSourceV2Relation { catalog: Option[CatalogPlugin], identifier: Option[Identifier], options: CaseInsensitiveStringMap): DataSourceV2Relation = { - val output = table.schema().toAttributes - DataSourceV2Relation(table, output, catalog, identifier, options) + // The v2 source may return schema containing char/varchar type. We replace char/varchar + // with "annotated" string type here as the query engine doesn't support char/varchar yet. + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema) + DataSourceV2Relation(table, schema.toAttributes, catalog, identifier, options) } def create( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala new file mode 100644 index 0000000000000..67ab1cc2f3321 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import scala.math.Ordering +import scala.reflect.runtime.universe.typeTag + +import org.apache.spark.annotation.Experimental +import org.apache.spark.unsafe.types.UTF8String + +@Experimental +case class CharType(length: Int) extends AtomicType { + require(length >= 0, "The length of char type cannot be negative.") + + private[sql] type InternalType = UTF8String + @transient private[sql] lazy val tag = typeTag[InternalType] + private[sql] val ordering = implicitly[Ordering[InternalType]] + + override def defaultSize: Int = length + override def typeName: String = s"char($length)" + override def toString: String = s"CharType($length)" + private[spark] override def asNullable: CharType = this +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 7556a19f0d316..e4ee6eb377a4d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -124,13 +124,15 @@ abstract class DataType extends AbstractDataType { object DataType { private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r + private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r + private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r def fromDDL(ddl: String): DataType = { parseTypeWithFallback( ddl, CatalystSqlParser.parseDataType, "Cannot parse the data type: ", - fallbackParser = CatalystSqlParser.parseTableSchema) + fallbackParser = str => CatalystSqlParser.parseTableSchema(str)) } /** @@ -166,7 +168,7 @@ object DataType { def fromJson(json: String): DataType = parseDataType(parse(json)) - private val nonDecimalNameToType = { + private val otherTypes = { Seq(NullType, DateType, TimestampType, BinaryType, IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType, ByteType, StringType, CalendarIntervalType) .map(t => t.typeName -> t).toMap @@ -177,7 +179,9 @@ object DataType { name match { case "decimal" => DecimalType.USER_DEFAULT case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt) - case other => nonDecimalNameToType.getOrElse( + case CHAR_TYPE(length) => CharType(length.toInt) + case VARCHAR_TYPE(length) => VarcharType(length.toInt) + case other => otherTypes.getOrElse( other, throw new IllegalArgumentException( s"Failed to convert the JSON string '$name' to a data type.")) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala deleted file mode 100644 index a29f49ad14a77..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.types - -import scala.math.Ordering -import scala.reflect.runtime.universe.typeTag - -import org.apache.spark.unsafe.types.UTF8String - -/** - * A hive string type for compatibility. These datatypes should only used for parsing, - * and should NOT be used anywhere else. Any instance of these data types should be - * replaced by a [[StringType]] before analysis. - */ -sealed abstract class HiveStringType extends AtomicType { - private[sql] type InternalType = UTF8String - - private[sql] val ordering = implicitly[Ordering[InternalType]] - - @transient private[sql] lazy val tag = typeTag[InternalType] - - override def defaultSize: Int = length - - private[spark] override def asNullable: HiveStringType = this - - def length: Int -} - -object HiveStringType { - def replaceCharType(dt: DataType): DataType = dt match { - case ArrayType(et, nullable) => - ArrayType(replaceCharType(et), nullable) - case MapType(kt, vt, nullable) => - MapType(replaceCharType(kt), replaceCharType(vt), nullable) - case StructType(fields) => - StructType(fields.map { field => - field.copy(dataType = replaceCharType(field.dataType)) - }) - case _: HiveStringType => StringType - case _ => dt - } - - def containsCharType(dt: DataType): Boolean = dt match { - case ArrayType(et, _) => containsCharType(et) - case MapType(kt, vt, _) => containsCharType(kt) || containsCharType(vt) - case StructType(fields) => fields.exists(f => containsCharType(f.dataType)) - case _ => dt.isInstanceOf[CharType] - } -} - -/** - * Hive char type. Similar to other HiveStringType's, these datatypes should only used for - * parsing, and should NOT be used anywhere else. Any instance of these data types should be - * replaced by a [[StringType]] before analysis. - */ -case class CharType(length: Int) extends HiveStringType { - override def simpleString: String = s"char($length)" -} - -/** - * Hive varchar type. Similar to other HiveStringType's, these datatypes should only used for - * parsing, and should NOT be used anywhere else. Any instance of these data types should be - * replaced by a [[StringType]] before analysis. - */ -case class VarcharType(length: Int) extends HiveStringType { - override def simpleString: String = s"varchar($length)" -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala new file mode 100644 index 0000000000000..8d78640c1e125 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.types + +import scala.math.Ordering +import scala.reflect.runtime.universe.typeTag + +import org.apache.spark.annotation.Experimental +import org.apache.spark.unsafe.types.UTF8String + +@Experimental +case class VarcharType(length: Int) extends AtomicType { + require(length >= 0, "The length of varchar type cannot be negative.") + + private[sql] type InternalType = UTF8String + @transient private[sql] lazy val tag = typeTag[InternalType] + private[sql] val ordering = implicitly[Ordering[InternalType]] + + override def defaultSize: Int = length + override def typeName: String = s"varchar($length)" + override def toString: String = s"CharType($length)" + private[spark] override def asNullable: VarcharType = this +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala index f29cbc2069e39..346a51ea10c82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala @@ -21,12 +21,4 @@ package org.apache.spark.sql * Contains a type system for attributes produced by relations, including complex types like * structs, arrays and maps. */ -package object types { - /** - * Metadata key used to store the raw hive type string in the metadata of StructField. This - * is relevant for datatypes that do not have a direct Spark SQL counterpart, such as CHAR and - * VARCHAR. We need to preserve the original type in order to invoke the correct object - * inspector in Hive. - */ - val HIVE_TYPE_STRING = "HIVE_TYPE_STRING" -} +package object types diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index f0a24d4a56048..0afa811e5d590 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis import java.util.TimeZone +import scala.collection.JavaConverters._ import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag @@ -41,9 +42,11 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.connector.InMemoryTable +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ - +import org.apache.spark.sql.util.CaseInsensitiveStringMap class AnalysisSuite extends AnalysisTest with Matchers { import org.apache.spark.sql.catalyst.analysis.TestRelations._ @@ -55,6 +58,19 @@ class AnalysisSuite extends AnalysisTest with Matchers { } } + test("fail if a leaf node has char/varchar type output") { + val schema1 = new StructType().add("c", CharType(5)) + val schema2 = new StructType().add("c", VarcharType(5)) + val schema3 = new StructType().add("c", ArrayType(CharType(5))) + Seq(schema1, schema2, schema3).foreach { schema => + val table = new InMemoryTable("t", schema, Array.empty, Map.empty[String, String].asJava) + intercept[IllegalStateException] { + DataSourceV2Relation( + table, schema.toAttributes, None, None, CaseInsensitiveStringMap.empty()).analyze + } + } + } + test("union project *") { val plan = (1 to 120) .map(_ => testRelation) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala index 6803fc307f919..95851d44b4747 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.types._ class TableSchemaParserSuite extends SparkFunSuite { @@ -57,11 +58,6 @@ class TableSchemaParserSuite extends SparkFunSuite { |anotherArray:Array> """.stripMargin.replace("\n", "") - val builder = new MetadataBuilder - builder.putString(HIVE_TYPE_STRING, - "struct," + - "MAP:map,arrAy:array,anotherArray:array>") - val expectedDataType = StructType( StructField("complexStructCol", StructType( @@ -69,13 +65,12 @@ class TableSchemaParserSuite extends SparkFunSuite { StructType( StructField("deciMal", DecimalType.USER_DEFAULT) :: StructField("anotherDecimal", DecimalType(5, 2)) :: Nil)) :: - StructField("MAP", MapType(TimestampType, StringType)) :: + StructField("MAP", MapType(TimestampType, VarcharType(10))) :: StructField("arrAy", ArrayType(DoubleType)) :: - StructField("anotherArray", ArrayType(StringType)) :: Nil), - nullable = true, - builder.build()) :: Nil) + StructField("anotherArray", ArrayType(CharType(9))) :: Nil)) :: Nil) - assert(parse(tableSchemaString) === expectedDataType) + assert(parse(tableSchemaString) === + CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedDataType)) } // Negative cases diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index ffff00b54f1b8..cfb044b428e41 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -28,7 +28,7 @@ import org.scalatest.Assertions._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow} -import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils} import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform} import org.apache.spark.sql.connector.read._ @@ -116,11 +116,12 @@ class InMemoryTable( } } + val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema) partitioning.map { case IdentityTransform(ref) => - extractor(ref.fieldNames, schema, row)._1 + extractor(ref.fieldNames, cleanedSchema, row)._1 case YearsTransform(ref) => - extractor(ref.fieldNames, schema, row) match { + extractor(ref.fieldNames, cleanedSchema, row) match { case (days: Int, DateType) => ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days)) case (micros: Long, TimestampType) => @@ -130,7 +131,7 @@ class InMemoryTable( throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case MonthsTransform(ref) => - extractor(ref.fieldNames, schema, row) match { + extractor(ref.fieldNames, cleanedSchema, row) match { case (days: Int, DateType) => ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days)) case (micros: Long, TimestampType) => @@ -140,7 +141,7 @@ class InMemoryTable( throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case DaysTransform(ref) => - extractor(ref.fieldNames, schema, row) match { + extractor(ref.fieldNames, cleanedSchema, row) match { case (days, DateType) => days case (micros: Long, TimestampType) => @@ -149,14 +150,14 @@ class InMemoryTable( throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case HoursTransform(ref) => - extractor(ref.fieldNames, schema, row) match { + extractor(ref.fieldNames, cleanedSchema, row) match { case (micros: Long, TimestampType) => ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros)) case (v, t) => throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case BucketTransform(numBuckets, ref) => - val (value, dataType) = extractor(ref.fieldNames, schema, row) + val (value, dataType) = extractor(ref.fieldNames, cleanedSchema, row) val valueHashCode = if (value == null) 0 else value.hashCode ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala index 7a9a7f52ff8fd..da5cfab8be3c7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala @@ -28,7 +28,7 @@ class CatalogV2UtilSuite extends SparkFunSuite { val testCatalog = mock(classOf[TableCatalog]) val ident = mock(classOf[Identifier]) val table = mock(classOf[Table]) - when(table.schema()).thenReturn(mock(classOf[StructType])) + when(table.schema()).thenReturn(new StructType().add("i", "int")) when(testCatalog.loadTable(ident)).thenReturn(table) val r = CatalogV2Util.loadRelation(testCatalog, ident) assert(r.isDefined) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index c164835c753e8..b3e403ffa7382 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.lit @@ -1181,7 +1181,9 @@ class Column(val expr: Expression) extends Logging { * @group expr_ops * @since 1.3.0 */ - def cast(to: DataType): Column = withExpr { Cast(expr, to) } + def cast(to: DataType): Column = withExpr { + Cast(expr, CharVarcharUtils.replaceCharVarcharWithString(to)) + } /** * Casts the column to a different data type, using the canonical string representation diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 8f96f0b882424..007df183ee353 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser} import org.apache.spark.sql.catalyst.expressions.ExprUtils import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions} -import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailureSafeParser} +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, FailureSafeParser} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsCatalogOptions, SupportsRead} import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.command.DDLUtils @@ -73,7 +73,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * @since 1.4.0 */ def schema(schema: StructType): DataFrameReader = { - this.userSpecifiedSchema = Option(schema) + this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)) this } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 582f11a2be8fa..53edd4fca7794 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 import org.apache.spark.sql.internal.HiveSerDe -import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType} +import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} /** * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements @@ -51,9 +51,6 @@ class ResolveSessionCatalog( cols.foreach(c => failNullType(c.dataType)) loadTable(catalog, tbl.asIdentifier).collect { case v1Table: V1Table => - if (!DDLUtils.isHiveTable(v1Table.v1Table)) { - cols.foreach(c => failCharType(c.dataType)) - } cols.foreach { c => assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand") if (!c.nullable) { @@ -63,7 +60,6 @@ class ResolveSessionCatalog( } AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField)) }.getOrElse { - cols.foreach(c => failCharType(c.dataType)) val changes = cols.map { col => TableChange.addColumn( col.name.toArray, @@ -82,7 +78,6 @@ class ResolveSessionCatalog( case Some(_: V1Table) => throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.") case Some(table) => - cols.foreach(c => failCharType(c.dataType)) // REPLACE COLUMNS deletes all the existing columns and adds new columns specified. val deleteChanges = table.schema.fieldNames.map { name => TableChange.deleteColumn(Array(name)) @@ -105,10 +100,6 @@ class ResolveSessionCatalog( a.dataType.foreach(failNullType) loadTable(catalog, tbl.asIdentifier).collect { case v1Table: V1Table => - if (!DDLUtils.isHiveTable(v1Table.v1Table)) { - a.dataType.foreach(failCharType) - } - if (a.column.length > 1) { throw new AnalysisException( "ALTER COLUMN with qualified column is only supported with v2 tables.") @@ -134,19 +125,13 @@ class ResolveSessionCatalog( s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") } } - // Add Hive type string to metadata. - val cleanedDataType = HiveStringType.replaceCharType(dataType) - if (dataType != cleanedDataType) { - builder.putString(HIVE_TYPE_STRING, dataType.catalogString) - } val newColumn = StructField( colName, - cleanedDataType, + dataType, nullable = true, builder.build()) AlterTableChangeColumnCommand(tbl.asTableIdentifier, colName, newColumn) }.getOrElse { - a.dataType.foreach(failCharType) val colName = a.column.toArray val typeChange = a.dataType.map { newDataType => TableChange.updateColumnType(colName, newDataType) @@ -271,16 +256,12 @@ class ResolveSessionCatalog( val (storageFormat, provider) = getStorageFormatAndProvider( c.provider, c.options, c.location, c.serde, ctas = false) if (!isV2Provider(provider)) { - if (!DDLUtils.isHiveTable(Some(provider))) { - assertNoCharTypeInSchema(c.tableSchema) - } val tableDesc = buildCatalogTable(tbl.asTableIdentifier, c.tableSchema, c.partitioning, c.bucketSpec, c.properties, provider, c.location, c.comment, storageFormat, c.external) val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTable(tableDesc, mode, None) } else { - assertNoCharTypeInSchema(c.tableSchema) CreateV2Table( catalog.asTableCatalog, tbl.asIdentifier, @@ -305,7 +286,6 @@ class ResolveSessionCatalog( val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTable(tableDesc, mode, Some(c.asSelect)) } else { - assertNoCharTypeInSchema(c.schema) CreateTableAsSelect( catalog.asTableCatalog, tbl.asIdentifier, @@ -332,7 +312,6 @@ class ResolveSessionCatalog( if (!isV2Provider(provider)) { throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.") } else { - assertNoCharTypeInSchema(c.tableSchema) ReplaceTable( catalog.asTableCatalog, tbl.asIdentifier, @@ -754,17 +733,7 @@ class ResolveSessionCatalog( private def convertToStructField(col: QualifiedColType): StructField = { val builder = new MetadataBuilder col.comment.foreach(builder.putString("comment", _)) - - val cleanedDataType = HiveStringType.replaceCharType(col.dataType) - if (col.dataType != cleanedDataType) { - builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString) - } - - StructField( - col.name.head, - cleanedDataType, - nullable = true, - builder.build()) + StructField(col.name.head, col.dataType, nullable = true, builder.build()) } private def isV2Provider(provider: String): Boolean = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala new file mode 100644 index 0000000000000..35bb86f178eb1 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.sql.catalyst.catalog.HiveTableRelation +import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryComparison, Expression, In, Literal, StringRPad} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.types.{CharType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * This rule applies char type padding in two places: + * 1. When reading values from column/field of type CHAR(N), right-pad the values to length N. + * 2. When comparing char type column/field with string literal or char type column/field, + * right-pad the shorter one to the longer length. + */ +object ApplyCharTypePadding extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = { + val padded = plan.resolveOperatorsUpWithNewOutput { + case r: LogicalRelation => + val projectList = CharVarcharUtils.charTypePadding(r.output) + if (projectList == r.output) { + r -> Nil + } else { + val cleanedOutput = r.output.map(CharVarcharUtils.cleanAttrMetadata) + val padded = Project(projectList, r.copy(output = cleanedOutput)) + padded -> r.output.zip(padded.output) + } + + case r: DataSourceV2Relation => + val projectList = CharVarcharUtils.charTypePadding(r.output) + if (projectList == r.output) { + r -> Nil + } else { + val cleanedOutput = r.output.map(CharVarcharUtils.cleanAttrMetadata) + val padded = Project(projectList, r.copy(output = cleanedOutput)) + padded -> r.output.zip(padded.output) + } + + case r: HiveTableRelation => + val projectList = CharVarcharUtils.charTypePadding(r.output) + if (projectList == r.output) { + r -> Nil + } else { + val cleanedDataCols = r.dataCols.map(CharVarcharUtils.cleanAttrMetadata) + val cleanedPartCols = r.partitionCols.map(CharVarcharUtils.cleanAttrMetadata) + val padded = Project(projectList, + r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols)) + padded -> r.output.zip(padded.output) + } + } + + padded.resolveOperatorsUp { + case operator if operator.resolved => operator.transformExpressionsUp { + // String literal is treated as char type when it's compared to a char type column. + // We should pad the shorter one to the longer length. + case b @ BinaryComparison(attr: Attribute, lit) if lit.foldable => + padAttrLitCmp(attr, lit).map { newChildren => + b.withNewChildren(newChildren) + }.getOrElse(b) + + case b @ BinaryComparison(lit, attr: Attribute) if lit.foldable => + padAttrLitCmp(attr, lit).map { newChildren => + b.withNewChildren(newChildren.reverse) + }.getOrElse(b) + + case i @ In(attr: Attribute, list) + if attr.dataType == StringType && list.forall(_.foldable) => + CharVarcharUtils.getRawType(attr.metadata).flatMap { + case CharType(length) => + val literalCharLengths = list.map(_.eval().asInstanceOf[UTF8String].numChars()) + val targetLen = (length +: literalCharLengths).max + Some(i.copy( + value = addPadding(attr, length, targetLen), + list = list.zip(literalCharLengths).map { + case (lit, charLength) => addPadding(lit, charLength, targetLen) + })) + case _ => None + }.getOrElse(i) + + // For char type column or inner field comparison, pad the shorter one to the longer length. + case b @ BinaryComparison(left: Attribute, right: Attribute) => + b.withNewChildren(CharVarcharUtils.addPaddingInStringComparison(Seq(left, right))) + + case i @ In(attr: Attribute, list) if list.forall(_.isInstanceOf[Attribute]) => + val newChildren = CharVarcharUtils.addPaddingInStringComparison( + attr +: list.map(_.asInstanceOf[Attribute])) + i.copy(value = newChildren.head, list = newChildren.tail) + } + } + } + + private def padAttrLitCmp(attr: Attribute, lit: Expression): Option[Seq[Expression]] = { + if (attr.dataType == StringType) { + CharVarcharUtils.getRawType(attr.metadata).flatMap { + case CharType(length) => + val str = lit.eval().asInstanceOf[UTF8String] + val stringLitLen = str.numChars() + if (length < stringLitLen) { + Some(Seq(StringRPad(attr, Literal(stringLitLen)), lit)) + } else if (length > stringLitLen) { + Some(Seq(attr, StringRPad(lit, Literal(length)))) + } else { + None + } + case _ => None + } + } else { + None + } + } + + private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = { + if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala index 33a3486bf6f67..8c61c8cd4f52e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} -import org.apache.spark.sql.catalyst.util.truncatedString +import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils} import org.apache.spark.sql.sources.BaseRelation /** @@ -69,9 +69,17 @@ case class LogicalRelation( } object LogicalRelation { - def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation = - LogicalRelation(relation, relation.schema.toAttributes, None, isStreaming) + def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation = { + // The v1 source may return schema containing char/varchar type. We replace char/varchar + // with "annotated" string type here as the query engine doesn't support char/varchar yet. + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema) + LogicalRelation(relation, schema.toAttributes, None, isStreaming) + } - def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = - LogicalRelation(relation, relation.schema.toAttributes, Some(table), false) + def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = { + // The v1 source may return schema containing char/varchar type. We replace char/varchar + // with "annotated" string type here as the query engine doesn't support char/varchar yet. + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema) + LogicalRelation(relation, schema.toAttributes, Some(table), false) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 78f31fb80ecf6..5dd0d2bd74838 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData} +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType} @@ -761,17 +761,10 @@ object JdbcUtils extends Logging { schema: StructType, caseSensitive: Boolean, createTableColumnTypes: String): Map[String, String] = { - def typeName(f: StructField): String = { - // char/varchar gets translated to string type. Real data type specified by the user - // is available in the field metadata as HIVE_TYPE_STRING - if (f.metadata.contains(HIVE_TYPE_STRING)) { - f.metadata.getString(HIVE_TYPE_STRING) - } else { - f.dataType.catalogString - } - } - - val userSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes) + val parsedSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes) + val userSchema = StructType(parsedSchema.map { field => + field.copy(dataType = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)) + }) val nameEquality = if (caseSensitive) { org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution } else { @@ -791,7 +784,7 @@ object JdbcUtils extends Logging { } } - val userSchemaMap = userSchema.fields.map(f => f.name -> typeName(f)).toMap + val userSchemaMap = userSchema.fields.map(f => f.name -> f.dataType.catalogString).toMap if (caseSensitive) userSchemaMap else CaseInsensitiveMap(userSchemaMap) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index ce8edce6f08d6..2208e930f6b08 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.internal.SQLConf @@ -110,7 +111,8 @@ object PushDownUtils extends PredicateHelper { schema: StructType, relation: DataSourceV2Relation): Seq[AttributeReference] = { val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap - schema.toAttributes.map { + val cleaned = CharVarcharUtils.replaceCharVarcharWithString(schema).asInstanceOf[StructType] + cleaned.toAttributes.map { // we have to keep the attribute id during transformation a => a.withExprId(nameToAttr(a.name).exprId) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 538a5408723bb..a89a5de3b7e72 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -189,6 +189,7 @@ abstract class BaseSessionStateBuilder( PreprocessTableCreation(session) +: PreprocessTableInsertion +: DataSourceAnalysis +: + ApplyCharTypePadding +: customPostHocResolutionRules override val extendedCheckRules: Seq[LogicalPlan => Unit] = diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 7f4ef8be562fb..eb7bb5c87a990 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession} import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils} import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider} import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.command.DDLUtils @@ -64,7 +64,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * @since 2.0.0 */ def schema(schema: StructType): DataStreamReader = { - this.userSpecifiedSchema = Option(schema) + this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)) this } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala new file mode 100644 index 0000000000000..abb13270d20e7 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, SchemaRequiredDataSource} +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources.SimpleInsertSource +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, StringType, StructField, StructType} + +// The base trait for char/varchar tests that need to be run with different table implementations. +trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { + + def format: String + + def checkColType(f: StructField, dt: DataType): Unit = { + assert(f.dataType == CharVarcharUtils.replaceCharVarcharWithString(dt)) + assert(CharVarcharUtils.getRawType(f.metadata) == Some(dt)) + } + + test("char type values should be padded: top-level columns") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format") + sql("INSERT INTO t VALUES ('1', 'a')") + checkAnswer(spark.table("t"), Row("1", "a" + " " * 4)) + checkColType(spark.table("t").schema(1), CharType(5)) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + } + } + + test("char type values should be padded: partitioned columns") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)") + sql("INSERT INTO t VALUES ('1', 'a')") + checkAnswer(spark.table("t"), Row("1", "a" + " " * 4)) + checkColType(spark.table("t").schema(1), CharType(5)) + + sql("ALTER TABLE t DROP PARTITION(c='a')") + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + } + } + + test("char type values should be padded: nested in struct") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c STRUCT) USING $format") + sql("INSERT INTO t VALUES ('1', struct('a'))") + checkAnswer(spark.table("t"), Row("1", Row("a" + " " * 4))) + checkColType(spark.table("t").schema(1), new StructType().add("c", CharType(5))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', struct(null))") + checkAnswer(spark.table("t"), Row("1", Row(null))) + } + } + + test("char type values should be padded: nested in array") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c ARRAY) USING $format") + sql("INSERT INTO t VALUES ('1', array('a', 'ab'))") + checkAnswer(spark.table("t"), Row("1", Seq("a" + " " * 4, "ab" + " " * 3))) + checkColType(spark.table("t").schema(1), ArrayType(CharType(5))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', array(null))") + checkAnswer(spark.table("t"), Row("1", Seq(null))) + } + } + + test("char type values should be padded: nested in map key") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c MAP) USING $format") + sql("INSERT INTO t VALUES ('1', map('a', 'ab'))") + checkAnswer(spark.table("t"), Row("1", Map(("a" + " " * 4, "ab")))) + checkColType(spark.table("t").schema(1), MapType(CharType(5), StringType)) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + } + } + + test("char type values should be padded: nested in map value") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c MAP) USING $format") + sql("INSERT INTO t VALUES ('1', map('a', 'ab'))") + checkAnswer(spark.table("t"), Row("1", Map(("a", "ab" + " " * 3)))) + checkColType(spark.table("t").schema(1), MapType(StringType, CharType(5))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', map('a', null))") + checkAnswer(spark.table("t"), Row("1", Map("a" -> null))) + } + } + + test("char type values should be padded: nested in both map key and value") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c MAP) USING $format") + sql("INSERT INTO t VALUES ('1', map('a', 'ab'))") + checkAnswer(spark.table("t"), Row("1", Map(("a" + " " * 4, "ab" + " " * 8)))) + checkColType(spark.table("t").schema(1), MapType(CharType(5), CharType(10))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + } + } + + test("char type values should be padded: nested in struct of array") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c STRUCT>) USING $format") + sql("INSERT INTO t VALUES ('1', struct(array('a', 'ab')))") + checkAnswer(spark.table("t"), Row("1", Row(Seq("a" + " " * 4, "ab" + " " * 3)))) + checkColType(spark.table("t").schema(1), + new StructType().add("c", ArrayType(CharType(5)))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', struct(null))") + checkAnswer(spark.table("t"), Row("1", Row(null))) + sql("INSERT OVERWRITE t VALUES ('1', struct(array(null)))") + checkAnswer(spark.table("t"), Row("1", Row(Seq(null)))) + } + } + + test("char type values should be padded: nested in array of struct") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c ARRAY>) USING $format") + sql("INSERT INTO t VALUES ('1', array(struct('a'), struct('ab')))") + checkAnswer(spark.table("t"), Row("1", Seq(Row("a" + " " * 4), Row("ab" + " " * 3)))) + checkColType(spark.table("t").schema(1), + ArrayType(new StructType().add("c", CharType(5)))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', array(null))") + checkAnswer(spark.table("t"), Row("1", Seq(null))) + sql("INSERT OVERWRITE t VALUES ('1', array(struct(null)))") + checkAnswer(spark.table("t"), Row("1", Seq(Row(null)))) + } + } + + test("char type values should be padded: nested in array of array") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c ARRAY>) USING $format") + sql("INSERT INTO t VALUES ('1', array(array('a', 'ab')))") + checkAnswer(spark.table("t"), Row("1", Seq(Seq("a" + " " * 4, "ab" + " " * 3)))) + checkColType(spark.table("t").schema(1), ArrayType(ArrayType(CharType(5)))) + + sql("INSERT OVERWRITE t VALUES ('1', null)") + checkAnswer(spark.table("t"), Row("1", null)) + sql("INSERT OVERWRITE t VALUES ('1', array(null))") + checkAnswer(spark.table("t"), Row("1", Seq(null))) + sql("INSERT OVERWRITE t VALUES ('1', array(array(null)))") + checkAnswer(spark.table("t"), Row("1", Seq(Seq(null)))) + } + } + + private def testTableWrite(f: String => Unit): Unit = { + withTable("t") { f("char") } + withTable("t") { f("varchar") } + } + + test("length check for input string values: top-level columns") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c $typeName(5)) USING $format") + sql("INSERT INTO t VALUES (null)") + checkAnswer(spark.table("t"), Row(null)) + val e = intercept[SparkException](sql("INSERT INTO t VALUES ('123456')")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: partitioned columns") { + // DS V2 doesn't support partitioned table. + if (!conf.contains(SQLConf.DEFAULT_CATALOG.key)) { + testTableWrite { typeName => + sql(s"CREATE TABLE t(i INT, c $typeName(5)) USING $format PARTITIONED BY (c)") + sql("INSERT INTO t VALUES (1, null)") + checkAnswer(spark.table("t"), Row(1, null)) + val e = intercept[SparkException](sql("INSERT INTO t VALUES (1, '123456')")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + } + + test("length check for input string values: nested in struct") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c STRUCT) USING $format") + sql("INSERT INTO t SELECT struct(null)") + checkAnswer(spark.table("t"), Row(Row(null))) + val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in array") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c ARRAY<$typeName(5)>) USING $format") + sql("INSERT INTO t VALUES (array(null))") + checkAnswer(spark.table("t"), Row(Seq(null))) + val e = intercept[SparkException](sql("INSERT INTO t VALUES (array('a', '123456'))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in map key") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format") + val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in map value") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c MAP) USING $format") + sql("INSERT INTO t VALUES (map('a', null))") + checkAnswer(spark.table("t"), Row(Map("a" -> null))) + val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in both map key and value") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format") + val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))")) + assert(e1.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))")) + assert(e2.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in struct of array") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c STRUCT>) USING $format") + sql("INSERT INTO t SELECT struct(array(null))") + checkAnswer(spark.table("t"), Row(Row(Seq(null)))) + val e = intercept[SparkException](sql("INSERT INTO t SELECT struct(array('123456'))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in array of struct") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c ARRAY>) USING $format") + sql("INSERT INTO t VALUES (array(struct(null)))") + checkAnswer(spark.table("t"), Row(Seq(Row(null)))) + val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(struct('123456')))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: nested in array of array") { + testTableWrite { typeName => + sql(s"CREATE TABLE t(c ARRAY>) USING $format") + sql("INSERT INTO t VALUES (array(array(null)))") + checkAnswer(spark.table("t"), Row(Seq(Seq(null)))) + val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(array('123456')))")) + assert(e.getCause.getMessage.contains( + s"input string '123456' exceeds $typeName type length limitation: 5")) + } + } + + test("length check for input string values: with trailing spaces") { + withTable("t") { + sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format") + sql("INSERT INTO t VALUES ('12 ', '12 ')") + sql("INSERT INTO t VALUES ('1234 ', '1234 ')") + checkAnswer(spark.table("t"), Seq( + Row("12" + " " * 3, "12 "), + Row("1234 ", "1234 "))) + } + } + + test("length check for input string values: with implicit cast") { + withTable("t") { + sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format") + sql("INSERT INTO t VALUES (1234, 1234)") + checkAnswer(spark.table("t"), Row("1234 ", "1234")) + val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (123456, 1)")) + assert(e1.getCause.getMessage.contains( + "input string '123456' exceeds char type length limitation: 5")) + val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (1, 123456)")) + assert(e2.getCause.getMessage.contains( + "input string '123456' exceeds varchar type length limitation: 5")) + } + } + + private def testConditions(df: DataFrame, conditions: Seq[(String, Boolean)]): Unit = { + checkAnswer(df.selectExpr(conditions.map(_._1): _*), Row.fromSeq(conditions.map(_._2))) + } + + test("char type comparison: top-level columns") { + withTable("t") { + sql(s"CREATE TABLE t(c1 CHAR(2), c2 CHAR(5)) USING $format") + sql("INSERT INTO t VALUES ('a', 'a')") + testConditions(spark.table("t"), Seq( + ("c1 = 'a'", true), + ("'a' = c1", true), + ("c1 = 'a '", true), + ("c1 > 'a'", false), + ("c1 IN ('a', 'b')", true), + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: partitioned columns") { + withTable("t") { + sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 CHAR(5)) USING $format PARTITIONED BY (c1, c2)") + sql("INSERT INTO t VALUES (1, 'a', 'a')") + testConditions(spark.table("t"), Seq( + ("c1 = 'a'", true), + ("'a' = c1", true), + ("c1 = 'a '", true), + ("c1 > 'a'", false), + ("c1 IN ('a', 'b')", true), + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: join") { + withTable("t1", "t2") { + sql(s"CREATE TABLE t1(c CHAR(2)) USING $format") + sql(s"CREATE TABLE t2(c CHAR(5)) USING $format") + sql("INSERT INTO t1 VALUES ('a')") + sql("INSERT INTO t2 VALUES ('a')") + checkAnswer(sql("SELECT t1.c FROM t1 JOIN t2 ON t1.c = t2.c"), Row("a ")) + } + } + + test("char type comparison: nested in struct") { + withTable("t") { + sql(s"CREATE TABLE t(c1 STRUCT, c2 STRUCT) USING $format") + sql("INSERT INTO t VALUES (struct('a'), struct('a'))") + testConditions(spark.table("t"), Seq( + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: nested in array") { + withTable("t") { + sql(s"CREATE TABLE t(c1 ARRAY, c2 ARRAY) USING $format") + sql("INSERT INTO t VALUES (array('a', 'b'), array('a', 'b'))") + testConditions(spark.table("t"), Seq( + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: nested in struct of array") { + withTable("t") { + sql("CREATE TABLE t(c1 STRUCT>, c2 STRUCT>) " + + s"USING $format") + sql("INSERT INTO t VALUES (struct(array('a', 'b')), struct(array('a', 'b')))") + testConditions(spark.table("t"), Seq( + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: nested in array of struct") { + withTable("t") { + sql("CREATE TABLE t(c1 ARRAY>, c2 ARRAY>) " + + s"USING $format") + sql("INSERT INTO t VALUES (array(struct('a')), array(struct('a')))") + testConditions(spark.table("t"), Seq( + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } + + test("char type comparison: nested in array of array") { + withTable("t") { + sql("CREATE TABLE t(c1 ARRAY>, c2 ARRAY>) " + + s"USING $format") + sql("INSERT INTO t VALUES (array(array('a')), array(array('a')))") + testConditions(spark.table("t"), Seq( + ("c1 = c2", true), + ("c1 < c2", false), + ("c1 IN (c2)", true))) + } + } +} + +// Some basic char/varchar tests which doesn't rely on table implementation. +class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + test("user-specified schema in cast") { + def assertNoCharType(df: DataFrame): Unit = { + checkAnswer(df, Row("0")) + assert(df.schema.map(_.dataType) == Seq(StringType)) + } + + assertNoCharType(spark.range(1).select($"id".cast("char(5)"))) + assertNoCharType(spark.range(1).select($"id".cast(CharType(5)))) + assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))")) + assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)")) + } + + test("user-specified schema in functions") { + val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""") + checkAnswer(df, Row(Row("str"))) + val schema = df.schema.head.dataType.asInstanceOf[StructType] + assert(schema.map(_.dataType) == Seq(StringType)) + } + + test("user-specified schema in DataFrameReader: file source from Dataset") { + val ds = spark.range(10).map(_.toString) + val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds) + assert(df1.schema.map(_.dataType) == Seq(StringType)) + val df2 = spark.read.schema("id char(5)").csv(ds) + assert(df2.schema.map(_.dataType) == Seq(StringType)) + } + + test("user-specified schema in DataFrameReader: DSV1") { + def checkSchema(df: DataFrame): Unit = { + val relations = df.queryExecution.analyzed.collect { + case l: LogicalRelation => l.relation + } + assert(relations.length == 1) + assert(relations.head.schema.map(_.dataType) == Seq(StringType)) + } + + checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) + .format(classOf[SimpleInsertSource].getName).load()) + checkSchema(spark.read.schema("id char(5)") + .format(classOf[SimpleInsertSource].getName).load()) + } + + test("user-specified schema in DataFrameReader: DSV2") { + def checkSchema(df: DataFrame): Unit = { + val tables = df.queryExecution.analyzed.collect { + case d: DataSourceV2Relation => d.table + } + assert(tables.length == 1) + assert(tables.head.schema.map(_.dataType) == Seq(StringType)) + } + + checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) + .format(classOf[SchemaRequiredDataSource].getName).load()) + checkSchema(spark.read.schema("id char(5)") + .format(classOf[SchemaRequiredDataSource].getName).load()) + } +} + +class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSparkSession { + override def format: String = "parquet" + override protected def sparkConf: SparkConf = { + super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet") + } +} + +class DSV2CharVarcharTestSuite extends CharVarcharTestSuite + with SharedSparkSession { + override def format: String = "foo" + protected override def sparkConf = { + super.sparkConf + .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName) + .set(SQLConf.DEFAULT_CATALOG.key, "testcat") + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 9710fca6bc82c..20cad721d3d0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.sources.SimpleScanSource -import org.apache.spark.sql.types.{CharType, DoubleType, HIVE_TYPE_STRING, IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType} +import org.apache.spark.sql.types.{CharType, DoubleType, IntegerType, LongType, StringType, StructField, StructType} class PlanResolutionSuite extends AnalysisTest { import CatalystSqlParser._ @@ -1090,9 +1090,7 @@ class PlanResolutionSuite extends AnalysisTest { } val sql = s"ALTER TABLE v1HiveTable ALTER COLUMN i TYPE char(1)" - val builder = new MetadataBuilder - builder.putString(HIVE_TYPE_STRING, CharType(1).catalogString) - val newColumnWithCleanedType = StructField("i", StringType, true, builder.build()) + val newColumnWithCleanedType = StructField("i", CharType(1), true) val expected = AlterTableChangeColumnCommand( TableIdentifier("v1HiveTable", Some("default")), "i", newColumnWithCleanedType) val parsed = parseAndResolve(sql) @@ -1533,44 +1531,6 @@ class PlanResolutionSuite extends AnalysisTest { } } - test("SPARK-31147: forbid CHAR type in non-Hive tables") { - def checkFailure(t: String, provider: String): Unit = { - val types = Seq( - "CHAR(2)", - "ARRAY", - "MAP", - "MAP", - "STRUCT") - types.foreach { tpe => - intercept[AnalysisException] { - parseAndResolve(s"CREATE TABLE $t(col $tpe) USING $provider") - } - intercept[AnalysisException] { - parseAndResolve(s"REPLACE TABLE $t(col $tpe) USING $provider") - } - intercept[AnalysisException] { - parseAndResolve(s"CREATE OR REPLACE TABLE $t(col $tpe) USING $provider") - } - intercept[AnalysisException] { - parseAndResolve(s"ALTER TABLE $t ADD COLUMN col $tpe") - } - intercept[AnalysisException] { - parseAndResolve(s"ALTER TABLE $t ADD COLUMN col $tpe") - } - intercept[AnalysisException] { - parseAndResolve(s"ALTER TABLE $t ALTER COLUMN col TYPE $tpe") - } - intercept[AnalysisException] { - parseAndResolve(s"ALTER TABLE $t REPLACE COLUMNS (col $tpe)") - } - } - } - - checkFailure("v1Table", v1Format) - checkFailure("v2Table", v2Format) - checkFailure("testcat.tab", "foo") - } - private def compareNormalized(plan1: LogicalPlan, plan2: LogicalPlan): Unit = { /** * Normalizes plans: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala index 9a95bf770772e..ca3e714665818 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala @@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp} import org.apache.spark.rdd.RDD import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ @@ -127,7 +128,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { Date.valueOf("1970-01-01"), new Timestamp(20000 + i), s"varchar_$i", - s"char_$i", + s"char_$i".padTo(18, ' '), Seq(i, i + 1), Seq(Map(s"str_$i" -> Row(i.toLong))), Map(i -> i.toString), @@ -206,10 +207,6 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { (2 to 10).map(i => Row(i, i - 1)).toSeq) test("Schema and all fields") { - def hiveMetadata(dt: String): Metadata = { - new MetadataBuilder().putString(HIVE_TYPE_STRING, dt).build() - } - val expectedSchema = StructType( StructField("string$%Field", StringType, true) :: StructField("binaryField", BinaryType, true) :: @@ -224,8 +221,8 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { StructField("decimalField2", DecimalType(9, 2), true) :: StructField("dateField", DateType, true) :: StructField("timestampField", TimestampType, true) :: - StructField("varcharField", StringType, true, hiveMetadata("varchar(12)")) :: - StructField("charField", StringType, true, hiveMetadata("char(18)")) :: + StructField("varcharField", VarcharType(12), true) :: + StructField("charField", CharType(18), true) :: StructField("arrayFieldSimple", ArrayType(IntegerType), true) :: StructField("arrayFieldComplex", ArrayType( @@ -248,7 +245,8 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { Nil ) - assert(expectedSchema == spark.table("tableWithSchema").schema) + assert(CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedSchema) == + spark.table("tableWithSchema").schema) withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") { checkAnswer( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index b30492802495f..da37b61688951 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -90,6 +90,7 @@ class HiveSessionStateBuilder( PreprocessTableCreation(session) +: PreprocessTableInsertion +: DataSourceAnalysis +: + ApplyCharTypePadding +: HiveAnalysis +: customPostHocResolutionRules diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index b2f0867114bae..bada131c8ba6d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -978,19 +978,14 @@ private[hive] class HiveClientImpl( private[hive] object HiveClientImpl extends Logging { /** Converts the native StructField to Hive's FieldSchema. */ def toHiveColumn(c: StructField): FieldSchema = { - val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) { - c.metadata.getString(HIVE_TYPE_STRING) - } else { - // replace NullType to HiveVoidType since Hive parse void not null. - HiveVoidType.replaceVoidType(c.dataType).catalogString - } + val typeString = HiveVoidType.replaceVoidType(c.dataType).catalogString new FieldSchema(c.name, typeString, c.getComment().orNull) } /** Get the Spark SQL native DataType from Hive's FieldSchema. */ private def getSparkSQLDataType(hc: FieldSchema): DataType = { try { - CatalystSqlParser.parseDataType(hc.getType) + CatalystSqlParser.parseRawDataType(hc.getType) } catch { case e: ParseException => throw new SparkException( @@ -1001,18 +996,10 @@ private[hive] object HiveClientImpl extends Logging { /** Builds the native StructField from Hive's FieldSchema. */ def fromHiveColumn(hc: FieldSchema): StructField = { val columnType = getSparkSQLDataType(hc) - val replacedVoidType = HiveVoidType.replaceVoidType(columnType) - val metadata = if (hc.getType != replacedVoidType.catalogString) { - new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() - } else { - Metadata.empty - } - val field = StructField( name = hc.getName, dataType = columnType, - nullable = true, - metadata = metadata) + nullable = true) Option(hc.getComment).map(field.withComment).getOrElse(field) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala new file mode 100644 index 0000000000000..55d305fda4f96 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSingleton { + + // The default Hive serde doesn't support nested null values. + override def format: String = "hive OPTIONS(fileFormat='parquet')" + + private var originalPartitionMode = "" + + override protected def beforeAll(): Unit = { + super.beforeAll() + originalPartitionMode = spark.conf.get("hive.exec.dynamic.partition.mode", "") + spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") + } + + override protected def afterAll(): Unit = { + if (originalPartitionMode == "") { + spark.conf.unset("hive.exec.dynamic.partition.mode") + } else { + spark.conf.set("hive.exec.dynamic.partition.mode", originalPartitionMode) + } + super.afterAll() + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 8f71ba3337aa2..1a6f6843d3911 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -113,24 +113,19 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils { .add("c9", "date") .add("c10", "timestamp") .add("c11", "string") - .add("c12", "string", true, - new MetadataBuilder().putString(HIVE_TYPE_STRING, "char(10)").build()) - .add("c13", "string", true, - new MetadataBuilder().putString(HIVE_TYPE_STRING, "varchar(10)").build()) + .add("c12", CharType(10), true) + .add("c13", VarcharType(10), true) .add("c14", "binary") .add("c15", "decimal") .add("c16", "decimal(10)") .add("c17", "decimal(10,2)") .add("c18", "array") .add("c19", "array") - .add("c20", "array", true, - new MetadataBuilder().putString(HIVE_TYPE_STRING, "array").build()) + .add("c20", ArrayType(CharType(10)), true) .add("c21", "map") - .add("c22", "map", true, - new MetadataBuilder().putString(HIVE_TYPE_STRING, "map").build()) + .add("c22", MapType(IntegerType, CharType(10)), true) .add("c23", "struct") - .add("c24", "struct", true, - new MetadataBuilder().putString(HIVE_TYPE_STRING, "struct").build()) + .add("c24", new StructType().add("c", VarcharType(10)).add("d", "int"), true) assert(schema == expectedSchema) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index b8b1da4cb9db7..2dfb8bb552594 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2251,8 +2251,8 @@ class HiveDDLSuite ) sql("ALTER TABLE tab ADD COLUMNS (c5 char(10))") - assert(spark.table("tab").schema.find(_.name == "c5") - .get.metadata.getString("HIVE_TYPE_STRING") == "char(10)") + assert(spark.sharedState.externalCatalog.getTable("default", "tab") + .schema.find(_.name == "c5").get.dataType == CharType(10)) } } } From 6e5446e61f278e9afac342e8f33905f5630aa7d5 Mon Sep 17 00:00:00 2001 From: Pascal Gillet Date: Mon, 30 Nov 2020 19:31:42 +0900 Subject: [PATCH 0608/1009] [SPARK-33579][UI] Fix executor blank page behind proxy ### What changes were proposed in this pull request? Fix some "hardcoded" API urls in Web UI. More specifically, we avoid the use of `location.origin` when constructing URLs for internal API calls within the JavaScript. Instead, we use `apiRoot` global variable. ### Why are the changes needed? On one hand, it allows us to build relative URLs. On the other hand, `apiRoot` reflects the Spark property `spark.ui.proxyBase` which can be set to change the root path of the Web UI. If `spark.ui.proxyBase` is actually set, original URLs become incorrect, and we end up with an executors blank page. I encounter this bug when accessing the Web UI behind a proxy (in my case a Kubernetes Ingress). See the following link for more context: https://github.com/jupyterhub/jupyter-server-proxy/issues/57#issuecomment-699163115 ### Does this PR introduce _any_ user-facing change? Yes, as all the changes introduced are in the JavaScript for the Web UI. ### How the changes have been tested ? I modified/debugged the JavaScript as in the commit with the help of the developer tools in Google Chrome, while accessing the Web UI of my Spark app behind my k8s ingress. Closes #30523 from pgillet/fix-executors-blank-page-behind-proxy. Authored-by: Pascal Gillet Signed-off-by: Kousuke Saruta --- .../main/resources/org/apache/spark/ui/static/stagepage.js | 2 +- core/src/main/resources/org/apache/spark/ui/static/utils.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js index ee1115868f69b..2877aa819ab9e 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js @@ -70,7 +70,7 @@ function stageEndPoint(appId) { return newBaseURI + "/api/v1/applications/" + appId + "/" + appAttemptId + "/stages/" + stageId; } } - return location.origin + "/api/v1/applications/" + appId + "/stages/" + stageId; + return uiRoot + "/api/v1/applications/" + appId + "/stages/" + stageId; } function getColumnNameForTaskMetricSummary(columnKey) { diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js index 7e6dd678e2641..f4914f000e705 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/utils.js +++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js @@ -105,7 +105,7 @@ function getStandAloneAppId(cb) { } // Looks like Web UI is running in standalone mode // Let's get application-id using REST End Point - $.getJSON(location.origin + "/api/v1/applications", function(response, status, jqXHR) { + $.getJSON(uiRoot + "/api/v1/applications", function(response, status, jqXHR) { if (response && response.length > 0) { var appId = response[0].id; cb(appId); @@ -152,7 +152,7 @@ function createTemplateURI(appId, templateName) { var baseURI = words.slice(0, ind).join('/') + '/static/' + templateName + '-template.html'; return baseURI; } - return location.origin + "/static/" + templateName + "-template.html"; + return uiRoot + "/static/" + templateName + "-template.html"; } function setDataTableDefaults() { @@ -193,5 +193,5 @@ function createRESTEndPointForExecutorsPage(appId) { return newBaseURI + "/api/v1/applications/" + appId + "/" + attemptId + "/allexecutors"; } } - return location.origin + "/api/v1/applications/" + appId + "/allexecutors"; + return uiRoot + "/api/v1/applications/" + appId + "/allexecutors"; } From 0a612b6a40696ed8ce00997ebb4e76d05adbbd82 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 30 Nov 2020 13:45:53 +0000 Subject: [PATCH 0609/1009] [SPARK-33452][SQL] Support v2 SHOW PARTITIONS ### What changes were proposed in this pull request? 1. Remove V2 logical node `ShowPartitionsStatement `, and replace it by V2 `ShowPartitions`. 2. Implement V2 execution node `ShowPartitionsExec` similar to V1 `ShowPartitionsCommand`. ### Why are the changes needed? To have feature parity with Datasource V1. ### Does this PR introduce _any_ user-facing change? Yes. Before the change, `SHOW PARTITIONS` fails in V2 table catalogs with the exception: ``` org.apache.spark.sql.AnalysisException: SHOW PARTITIONS is only supported with v1 tables. at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.org$apache$spark$sql$catalyst$analysis$ResolveSessionCatalog$$parseV1Table(ResolveSessionCatalog.scala:628) at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:466) ``` ### How was this patch tested? By running the following test suites: 1. Modified `ShowPartitionsParserSuite` where `ShowPartitionsStatement` is replaced by V2 `ShowPartitions`. 2. `v2.ShowPartitionsSuite` Closes #30398 from MaxGekk/show-partitions-exec-v2. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 14 ++ .../analysis/ResolvePartitionSpec.scala | 31 +++-- .../catalyst/analysis/v2ResolutionPlans.scala | 3 +- .../sql/catalyst/parser/AstBuilder.scala | 9 +- .../catalyst/plans/logical/statements.scala | 7 - .../catalyst/plans/logical/v2Commands.scala | 15 +++ .../analysis/ResolveSessionCatalog.scala | 9 +- .../v2/AlterTableAddPartitionExec.scala | 8 +- .../v2/AlterTableDropPartitionExec.scala | 2 +- .../datasources/v2/DataSourceV2Strategy.scala | 11 +- .../datasources/v2/ShowPartitionsExec.scala | 65 ++++++++++ .../sql/connector/DataSourceV2SQLSuite.scala | 1 - .../command/ShowPartitionsParserSuite.scala | 23 ++-- .../command/ShowPartitionsSuiteBase.scala | 120 +++++++++++++++++- .../command/v1/ShowPartitionsSuite.scala | 110 +++------------- .../command/v2/ShowPartitionsSuite.scala | 38 +++--- .../hive/PartitionedTablePerfStatsSuite.scala | 4 +- 18 files changed, 309 insertions(+), 163 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 23a1b7bdde93c..abd38f2f9d940 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1085,7 +1085,7 @@ class Analyzer(override val catalogManager: CatalogManager) lookupTableOrView(identifier).map { case v: ResolvedView => val viewStr = if (v.isTemp) "temp view" else "view" - u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.'") + u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.") case table => table }.getOrElse(u) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 2818ba58075cd..61ac6346ff944 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -577,6 +577,8 @@ trait CheckAnalysis extends PredicateHelper { case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) => checkAlterTablePartition(table, parts) + case showPartitions: ShowPartitions => checkShowPartitions(showPartitions) + case _ => // Fallbacks to the following checks } @@ -1009,4 +1011,16 @@ trait CheckAnalysis extends PredicateHelper { case _ => } } + + // Make sure that the `SHOW PARTITIONS` command is allowed for the table + private def checkShowPartitions(showPartitions: ShowPartitions): Unit = showPartitions match { + case ShowPartitions(rt: ResolvedTable, _) + if !rt.table.isInstanceOf[SupportsPartitionManagement] => + failAnalysis(s"SHOW PARTITIONS cannot run for a table which does not support partitioning") + case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement), _) + if partTable.partitionSchema().isEmpty => + failAnalysis( + s"SHOW PARTITIONS is not allowed on a table that is not partitioned: ${partTable.name()}") + case _ => + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 98c6872a47cc6..38991a9e24fa8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} -import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan, ShowPartitions} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement @@ -40,6 +40,12 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { case r @ AlterTableDropPartition( ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) => r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema())) + + case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) => + r.copy(pattern = resolvePartitionSpecs( + table.name, + partSpecs.toSeq, + table.partitionSchema()).headOption) } private def resolvePartitionSpecs( @@ -48,25 +54,26 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { partSchema: StructType): Seq[ResolvedPartitionSpec] = partSpecs.map { case unresolvedPartSpec: UnresolvedPartitionSpec => + val normalizedSpec = normalizePartitionSpec( + unresolvedPartSpec.spec, + partSchema.map(_.name), + tableName, + conf.resolver) + val partitionNames = normalizedSpec.keySet + val requestedFields = partSchema.filter(field => partitionNames.contains(field.name)) ResolvedPartitionSpec( - convertToPartIdent(tableName, unresolvedPartSpec.spec, partSchema), + requestedFields.map(_.name), + convertToPartIdent(normalizedSpec, requestedFields), unresolvedPartSpec.location) case resolvedPartitionSpec: ResolvedPartitionSpec => resolvedPartitionSpec } private def convertToPartIdent( - tableName: String, partitionSpec: TablePartitionSpec, - partSchema: StructType): InternalRow = { - val normalizedSpec = normalizePartitionSpec( - partitionSpec, - partSchema.map(_.name), - tableName, - conf.resolver) - - val partValues = partSchema.map { part => - val raw = normalizedSpec.get(part.name).orNull + schema: Seq[StructField]): InternalRow = { + val partValues = schema.map { part => + val raw = partitionSpec.get(part.name).orNull val dt = CharVarcharUtils.replaceCharVarcharWithString(part.dataType) Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 95fc4f47dec7f..1518f064d78db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -89,7 +89,8 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T } case class ResolvedPartitionSpec( - spec: InternalRow, + names: Seq[String], + ident: InternalRow, location: Option[String] = None) extends PartitionSpec /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ce95ea4b41def..ff8b56f0b724b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3611,9 +3611,12 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitShowPartitions(ctx: ShowPartitionsContext): LogicalPlan = withOrigin(ctx) { - val table = visitMultipartIdentifier(ctx.multipartIdentifier) - val partitionKeys = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec) - ShowPartitionsStatement(table, partitionKeys) + val partitionKeys = Option(ctx.partitionSpec).map { specCtx => + UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None) + } + ShowPartitions( + UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "SHOW PARTITIONS"), + partitionKeys) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index effb4cff75930..1763547792e35 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -419,13 +419,6 @@ case class TruncateTableStatement( tableName: Seq[String], partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * A SHOW PARTITIONS statement, as parsed from SQL - */ -case class ShowPartitionsStatement( - tableName: Seq[String], - partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement - /** * A SHOW CURRENT NAMESPACE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 4931f0eb2c007..67056470418fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -691,3 +691,18 @@ case class TruncateTable( override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the SHOW PARTITIONS command. + */ +case class ShowPartitions( + child: LogicalPlan, + pattern: Option[PartitionSpec]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil + + override lazy val resolved: Boolean = + childrenResolved && pattern.forall(_.isInstanceOf[ResolvedPartitionSpec]) + + override val output: Seq[Attribute] = Seq( + AttributeReference("partition", StringType, nullable = false)()) +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 53edd4fca7794..f6005f4b413a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -430,11 +430,12 @@ class ResolveSessionCatalog( ident.asTableIdentifier, partitionSpec) - case ShowPartitionsStatement(tbl, partitionSpec) => - val v1TableName = parseV1Table(tbl, "SHOW PARTITIONS") + case ShowPartitions( + ResolvedV1TableOrViewIdentifier(ident), + pattern @ (None | Some(UnresolvedPartitionSpec(_, _)))) => ShowPartitionsCommand( - v1TableName.asTableIdentifier, - partitionSpec) + ident.asTableIdentifier, + pattern.map(_.asInstanceOf[UnresolvedPartitionSpec].spec)) case ShowColumns(ResolvedV1TableOrViewIdentifier(ident), ns) => val v1TableName = ident.asTableIdentifier diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala index 0171cdd9ca41a..d7fe25cff2064 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala @@ -37,20 +37,20 @@ case class AlterTableAddPartitionExec( override protected def run(): Seq[InternalRow] = { val (existsParts, notExistsParts) = - partSpecs.partition(p => table.partitionExists(p.spec)) + partSpecs.partition(p => table.partitionExists(p.ident)) if (existsParts.nonEmpty && !ignoreIfExists) { throw new PartitionsAlreadyExistException( - table.name(), existsParts.map(_.spec), table.partitionSchema()) + table.name(), existsParts.map(_.ident), table.partitionSchema()) } notExistsParts match { case Seq() => // Nothing will be done case Seq(partitionSpec) => val partProp = partitionSpec.location.map(loc => "location" -> loc).toMap - table.createPartition(partitionSpec.spec, partProp.asJava) + table.createPartition(partitionSpec.ident, partProp.asJava) case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] => - val partIdents = notExistsParts.map(_.spec) + val partIdents = notExistsParts.map(_.ident) val partProps = notExistsParts.map(_.location.map(loc => "location" -> loc).toMap) table.asAtomicPartitionable .createPartitions( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala index 09a65804a05eb..c7a68ecb2bbee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala @@ -35,7 +35,7 @@ case class AlterTableDropPartitionExec( override protected def run(): Seq[InternalRow] = { val (existsPartIdents, notExistsPartIdents) = - partSpecs.map(_.spec).partition(table.partitionExists) + partSpecs.map(_.ident).partition(table.partitionExists) if (notExistsPartIdents.nonEmpty && !ignoreIfNotExists) { throw new NoSuchPartitionsException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 1fae8d937e90c..0c7bc19ad054e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.JavaConverters._ import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy} -import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable} +import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ @@ -318,6 +318,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowColumns(_: ResolvedTable, _) => throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.") + case r @ ShowPartitions( + ResolvedTable(catalog, _, table: SupportsPartitionManagement), + pattern @ (None | Some(_: ResolvedPartitionSpec))) => + ShowPartitionsExec( + r.output, + catalog, + table, + pattern.map(_.asInstanceOf[ResolvedPartitionSpec])) :: Nil + case _ => Nil } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala new file mode 100644 index 0000000000000..44d6f4495f552 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.ResolvedPartitionSpec +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal} +import org.apache.spark.sql.connector.catalog.{SupportsPartitionManagement, TableCatalog} +import org.apache.spark.sql.execution.LeafExecNode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StringType +import org.apache.spark.unsafe.types.UTF8String + +/** + * Physical plan node for showing partitions. + */ +case class ShowPartitionsExec( + output: Seq[Attribute], + catalog: TableCatalog, + table: SupportsPartitionManagement, + partitionSpec: Option[ResolvedPartitionSpec]) extends V2CommandExec with LeafExecNode { + override protected def run(): Seq[InternalRow] = { + val (names, ident) = partitionSpec + .map(spec => (spec.names, spec.ident)) + // listPartitionByNames() should return all partitions if the partition spec + // does not specify any partition names. + .getOrElse((Seq.empty[String], InternalRow.empty)) + val partitionIdentifiers = table.listPartitionByNames(names.toArray, ident) + // Converting partition identifiers as `InternalRow` of partition values, + // for instance InternalRow(value0, value1, ..., valueN), to `InternalRow`s + // with a string in the format: "col0=value0/col1=value1/.../colN=valueN". + val schema = table.partitionSchema() + val len = schema.length + val partitions = new Array[String](len) + val timeZoneId = SQLConf.get.sessionLocalTimeZone + partitionIdentifiers.map { row => + var i = 0 + while (i < len) { + val dataType = schema(i).dataType + val partValue = row.get(i, dataType) + val partValueStr = Cast(Literal(partValue, dataType), StringType, Some(timeZoneId)) + .eval().toString + partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr) + i += 1 + } + InternalRow(UTF8String.fromString(partitions.mkString("/"))) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index ffbc2287d81ad..583bc694dc3be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2284,7 +2284,6 @@ class DataSourceV2SQLSuite verify(s"CACHE TABLE $t") verify(s"UNCACHE TABLE $t") verify(s"TRUNCATE TABLE $t") - verify(s"SHOW PARTITIONS $t") verify(s"SHOW COLUMNS FROM $t") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala index bc75528b9644c..7b5cf8af4eead 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala @@ -17,25 +17,30 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.catalyst.analysis.AnalysisTest +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan import org.apache.spark.sql.catalyst.parser.ParseException -import org.apache.spark.sql.catalyst.plans.logical.ShowPartitionsStatement +import org.apache.spark.sql.catalyst.plans.logical.ShowPartitions import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.test.SharedSparkSession class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession { test("SHOW PARTITIONS") { + val commandName = "SHOW PARTITIONS" Seq( - "SHOW PARTITIONS t1" -> ShowPartitionsStatement(Seq("t1"), None), - "SHOW PARTITIONS db1.t1" -> ShowPartitionsStatement(Seq("db1", "t1"), None), + "SHOW PARTITIONS t1" -> ShowPartitions(UnresolvedTable(Seq("t1"), commandName), None), + "SHOW PARTITIONS db1.t1" -> ShowPartitions( + UnresolvedTable(Seq("db1", "t1"), commandName), None), "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')" -> - ShowPartitionsStatement( - Seq("t1"), - Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue"))), - "SHOW PARTITIONS a.b.c" -> ShowPartitionsStatement(Seq("a", "b", "c"), None), + ShowPartitions( + UnresolvedTable(Seq("t1"), commandName), + Some(UnresolvedPartitionSpec(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))), + "SHOW PARTITIONS a.b.c" -> ShowPartitions( + UnresolvedTable(Seq("a", "b", "c"), commandName), None), "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')" -> - ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))) + ShowPartitions( + UnresolvedTable(Seq("a", "b", "c"), commandName), + Some(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")))) ).foreach { case (sql, expected) => val parsed = parsePlan(sql) comparePlans(parsed, expected) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index 413e170326eea..82457f96a3003 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -20,17 +20,133 @@ package org.apache.spark.sql.execution.command import org.scalactic.source.Position import org.scalatest.Tag -import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types.{StringType, StructType} trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { protected def version: String protected def catalog: String - protected def defaultNamespace: Seq[String] protected def defaultUsing: String + protected def wrongPartitionColumnsError(columns: String*): String + // Gets the schema of `SHOW PARTITIONS` + private val showSchema: StructType = new StructType().add("partition", StringType, false) + protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = { + val df = spark.sql(sqlText) + assert(df.schema === showSchema) + checkAnswer(df, expected) + } override def test(testName: String, testTags: Tag*)(testFun: => Any) (implicit pos: Position): Unit = { super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun) } + + protected def createDateTable(table: String): Unit = { + sql(s""" + |CREATE TABLE $table (price int, qty int, year int, month int) + |$defaultUsing + |partitioned by (year, month)""".stripMargin) + sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1") + sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2") + sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 2)") + sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)") + } + + protected def createWideTable(table: String): Unit = { + sql(s""" + |CREATE TABLE $table ( + | price int, qty int, + | year int, month int, hour int, minute int, sec int, extra int) + |$defaultUsing + |PARTITIONED BY (year, month, hour, minute, sec, extra) + |""".stripMargin) + sql(s""" + |INSERT INTO $table + |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 + |""".stripMargin) + sql(s""" + |ALTER TABLE $table + |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) + |""".stripMargin) + } + + test("show partitions of non-partitioned table") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.not_partitioned_table" + withTable(table) { + sql(s"CREATE TABLE $table (col1 int) $defaultUsing") + val errMsg = intercept[AnalysisException] { + sql(s"SHOW PARTITIONS $table") + }.getMessage + assert(errMsg.contains("not allowed on a table that is not partitioned")) + } + } + } + + test("non-partitioning columns") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.dateTable" + withTable(table) { + createDateTable(table) + val errMsg = intercept[AnalysisException] { + sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)") + }.getMessage + assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz"))) + } + } + } + + test("show everything") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.dateTable" + withTable(table) { + createDateTable(table) + runShowPartitionsSql( + s"show partitions $table", + Row("year=2015/month=1") :: + Row("year=2015/month=2") :: + Row("year=2016/month=2") :: + Row("year=2016/month=3") :: Nil) + } + } + } + + test("filter by partitions") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.dateTable" + withTable(table) { + createDateTable(table) + runShowPartitionsSql( + s"show partitions $table PARTITION(year=2015)", + Row("year=2015/month=1") :: + Row("year=2015/month=2") :: Nil) + runShowPartitionsSql( + s"show partitions $table PARTITION(year=2015, month=1)", + Row("year=2015/month=1") :: Nil) + runShowPartitionsSql( + s"show partitions $table PARTITION(month=2)", + Row("year=2015/month=2") :: + Row("year=2016/month=2") :: Nil) + } + } + } + + test("show everything more than 5 part keys") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.wideTable" + withTable(table) { + createWideTable(table) + runShowPartitionsSql( + s"show partitions $table", + Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") :: + Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index bcc71e9b7241c..2b2bc9e63dc82 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.{AnalysisException, Row, SaveMode} -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command import org.apache.spark.sql.test.SharedSparkSession @@ -26,104 +25,27 @@ import org.apache.spark.sql.test.SharedSparkSession trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { override def version: String = "V1" override def catalog: String = CatalogManager.SESSION_CATALOG_NAME - override def defaultNamespace: Seq[String] = Seq("default") override def defaultUsing: String = "USING parquet" - private def createDateTable(table: String): Unit = { - sql(s""" - |CREATE TABLE $table (price int, qty int, year int, month int) - |$defaultUsing - |partitioned by (year, month)""".stripMargin) - sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1") - sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2") - sql(s"INSERT INTO $table PARTITION(year = 2016, month = 2) SELECT 3, 3") - sql(s"INSERT INTO $table PARTITION(year = 2016, month = 3) SELECT 3, 3") + override protected def wrongPartitionColumnsError(columns: String*): String = { + s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are specified" } - test("show everything") { + test("show everything in the default database") { val table = "dateTable" withTable(table) { createDateTable(table) - checkAnswer( - sql(s"show partitions $table"), + runShowPartitionsSql( + s"show partitions default.$table", Row("year=2015/month=1") :: - Row("year=2015/month=2") :: - Row("year=2016/month=2") :: - Row("year=2016/month=3") :: Nil) - - checkAnswer( - sql(s"show partitions default.$table"), - Row("year=2015/month=1") :: - Row("year=2015/month=2") :: - Row("year=2016/month=2") :: - Row("year=2016/month=3") :: Nil) - } - } - - test("filter by partitions") { - val table = "dateTable" - withTable(table) { - createDateTable(table) - checkAnswer( - sql(s"show partitions default.$table PARTITION(year=2015)"), - Row("year=2015/month=1") :: - Row("year=2015/month=2") :: Nil) - checkAnswer( - sql(s"show partitions default.$table PARTITION(year=2015, month=1)"), - Row("year=2015/month=1") :: Nil) - checkAnswer( - sql(s"show partitions default.$table PARTITION(month=2)"), Row("year=2015/month=2") :: - Row("year=2016/month=2") :: Nil) - } - } - - test("show everything more than 5 part keys") { - val table = "wideTable" - withTable(table) { - sql(s""" - |CREATE TABLE $table ( - | price int, qty int, - | year int, month int, hour int, minute int, sec int, extra int) - |$defaultUsing - |PARTITIONED BY (year, month, hour, minute, sec, extra)""".stripMargin) - sql(s""" - |INSERT INTO $table - |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 - """.stripMargin) - sql(s""" - |INSERT INTO $table - |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 - """.stripMargin) - checkAnswer( - sql(s"show partitions $table"), - Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") :: - Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) - } - } - - test("non-partitioning columns") { - val table = "dateTable" - withTable(table) { - createDateTable(table) - val errMsg = intercept[AnalysisException] { - sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)") - }.getMessage - assert(errMsg.contains("Non-partitioning column(s) [abcd, xyz] are specified")) - } - } - - test("show partitions of non-partitioned table") { - val table = "not_partitioned_table" - withTable(table) { - sql(s"CREATE TABLE $table (col1 int) $defaultUsing") - val errMsg = intercept[AnalysisException] { - sql(s"SHOW PARTITIONS $table") - }.getMessage - assert(errMsg.contains("not allowed on a table that is not partitioned")) + Row("year=2016/month=2") :: + Row("year=2016/month=3") :: Nil) } } + // The test fails for V2 Table Catalogs with the exception: + // org.apache.spark.sql.AnalysisException: CREATE VIEW is only supported with v1 tables. test("show partitions of a view") { val table = "dateTable" withTable(table) { @@ -134,7 +56,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { val errMsg = intercept[AnalysisException] { sql(s"SHOW PARTITIONS $view") }.getMessage - assert(errMsg.contains("is not allowed on a view")) + assert(errMsg.contains("'SHOW PARTITIONS' expects a table")) } } } @@ -143,10 +65,10 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { val viewName = "test_view" withTempView(viewName) { spark.range(10).createTempView(viewName) - val errMsg = intercept[NoSuchTableException] { + val errMsg = intercept[AnalysisException] { sql(s"SHOW PARTITIONS $viewName") }.getMessage - assert(errMsg.contains(s"Table or view '$viewName' not found")) + assert(errMsg.contains("'SHOW PARTITIONS' expects a table")) } } } @@ -159,12 +81,12 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSessio val viewName = "test_view" withTempView(viewName) { sql(s""" - |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING) - |$defaultUsing""".stripMargin) - val errMsg = intercept[NoSuchTableException] { + |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING) + |$defaultUsing""".stripMargin) + val errMsg = intercept[AnalysisException] { sql(s"SHOW PARTITIONS $viewName") }.getMessage - assert(errMsg.contains(s"Table or view '$viewName' not found")) + assert(errMsg.contains("'SHOW PARTITIONS' expects a table")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index 8a63cd49e89e9..ca47a713ad604 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -19,38 +19,34 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.connector.InMemoryTableCatalog +import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.execution.command import org.apache.spark.sql.test.SharedSparkSession class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession { override def version: String = "V2" override def catalog: String = "test_catalog" - override def defaultNamespace: Seq[String] = Nil override def defaultUsing: String = "USING _" override def sparkConf: SparkConf = super.sparkConf - .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName) + .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) + .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - // TODO(SPARK-33452): Create a V2 SHOW PARTITIONS execution node - test("not supported SHOW PARTITIONS") { - def testV1Command(sqlCommand: String, sqlParams: String): Unit = { - val e = intercept[AnalysisException] { - sql(s"$sqlCommand $sqlParams") - } - assert(e.message.contains(s"$sqlCommand is only supported with v1 tables")) - } - val t = s"$catalog.ns1.ns2.tbl" - withTable(t) { - sql( - s""" - |CREATE TABLE $t (id bigint, data string) - |$defaultUsing - |PARTITIONED BY (id) - """.stripMargin) + override protected def wrongPartitionColumnsError(columns: String*): String = { + s"${columns.head} is not a valid partition column" + } - testV1Command("SHOW PARTITIONS", t) - testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')") + test("a table does not support partitioning") { + val table = s"non_part_$catalog.tab1" + withTable(table) { + sql(s""" + |CREATE TABLE $table (price int, qty int, year int, month int) + |$defaultUsing""".stripMargin) + val errMsg = intercept[AnalysisException] { + sql(s"SHOW PARTITIONS $table") + }.getMessage + assert(errMsg.contains( + "SHOW PARTITIONS cannot run for a table which does not support partitioning")) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala index 3af163af0968c..49e26614e13c4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala @@ -300,7 +300,7 @@ class PartitionedTablePerfStatsSuite HiveCatalogMetrics.reset() assert(spark.sql("show partitions test").count() == 100) - assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() <= 10) } } } @@ -323,7 +323,7 @@ class PartitionedTablePerfStatsSuite HiveCatalogMetrics.reset() assert(spark.sql("show partitions test").count() == 100) - assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() <= 10) } } } From 6fd148fea890391941f876e0a14446d875fe72e1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 30 Nov 2020 14:05:49 +0000 Subject: [PATCH 0610/1009] [SPARK-33569][SQL] Remove getting partitions by an identifier prefix ### What changes were proposed in this pull request? 1. Remove the method `listPartitionIdentifiers()` from the `SupportsPartitionManagement` interface. The method lists partitions by ident prefix. 2. Rename `listPartitionByNames()` to `listPartitionIdentifiers()`. 3. Re-implement the default method `partitionExists()` using new method. ### Why are the changes needed? Getting partitions by ident prefix only is not used, and it can be removed to improve code maintenance. Also this makes the `SupportsPartitionManagement` interface cleaner. ### Does this PR introduce _any_ user-facing change? Should not. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly org.apache.spark.sql.connector.catalog.*" ``` Closes #30514 from MaxGekk/remove-listPartitionIdentifiers. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../catalog/SupportsPartitionManagement.java | 15 ++---- .../connector/InMemoryPartitionTable.scala | 10 +--- ...pportsAtomicPartitionManagementSuite.scala | 28 ++++++----- .../SupportsPartitionManagementSuite.scala | 48 ++++++++++--------- .../AlterTablePartitionV2SQLSuite.scala | 6 ++- 5 files changed, 52 insertions(+), 55 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index 380717d2e0e9b..9d898f2f477e1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -17,6 +17,7 @@ package org.apache.spark.sql.connector.catalog; +import java.util.Arrays; import java.util.Map; import org.apache.spark.annotation.Experimental; @@ -79,7 +80,9 @@ void createPartition( * @return true if the partition exists, false otherwise */ default boolean partitionExists(InternalRow ident) { - return listPartitionIdentifiers(ident).length > 0; + String[] partitionNames = partitionSchema().names(); + String[] requiredNames = Arrays.copyOfRange(partitionNames, 0, ident.numFields()); + return listPartitionIdentifiers(requiredNames, ident).length > 0; } /** @@ -105,14 +108,6 @@ void replacePartitionMetadata( Map loadPartitionMetadata(InternalRow ident) throws UnsupportedOperationException; - /** - * List the identifiers of all partitions that have the ident prefix in a table. - * - * @param ident a prefix of partition identifier - * @return an array of Identifiers for the partitions - */ - InternalRow[] listPartitionIdentifiers(InternalRow ident); - /** * List the identifiers of all partitions that match to the ident by names. * @@ -120,5 +115,5 @@ Map loadPartitionMetadata(InternalRow ident) * @param ident a partition identifier values. * @return an array of Identifiers for the partitions */ - InternalRow[] listPartitionByNames(String[] names, InternalRow ident); + InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident); } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index ba762a58b1e52..6a8432e635310 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -83,14 +83,6 @@ class InMemoryPartitionTable( } } - def listPartitionIdentifiers(ident: InternalRow): Array[InternalRow] = { - val prefixPartCols = - new StructType(partitionSchema.dropRight(partitionSchema.length - ident.numFields).toArray) - val prefixPart = ident.toSeq(prefixPartCols) - memoryTablePartitions.keySet().asScala - .filter(_.toSeq(partitionSchema).startsWith(prefixPart)).toArray - } - override def partitionExists(ident: InternalRow): Boolean = memoryTablePartitions.containsKey(ident) @@ -98,7 +90,7 @@ class InMemoryPartitionTable( memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava) } - override def listPartitionByNames( + override def listPartitionIdentifiers( names: Array[String], ident: InternalRow): Array[InternalRow] = { assert(names.length == ident.numFields, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala index 6f7c30653110b..ad2631650b7ef 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala @@ -47,34 +47,38 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite { newCatalog } + private def hasPartitions(table: SupportsPartitionManagement): Boolean = { + !table.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty + } + test("createPartitions") { val table = catalog.loadTable(ident) val partTable = new InMemoryAtomicPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4")) partTable.createPartitions( partIdents, Array(new util.HashMap[String, String](), new util.HashMap[String, String]())) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(InternalRow.apply("3"))) assert(partTable.partitionExists(InternalRow.apply("4"))) partTable.dropPartition(InternalRow.apply("3")) partTable.dropPartition(InternalRow.apply("4")) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("createPartitions failed if partition already exists") { val table = catalog.loadTable(ident) val partTable = new InMemoryAtomicPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("4") partTable.createPartition(partIdent, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(partIdent)) val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4")) @@ -85,42 +89,42 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite { assert(!partTable.partitionExists(InternalRow.apply("3"))) partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("dropPartitions") { val table = catalog.loadTable(ident) val partTable = new InMemoryAtomicPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4")) partTable.createPartitions( partIdents, Array(new util.HashMap[String, String](), new util.HashMap[String, String]())) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(InternalRow.apply("3"))) assert(partTable.partitionExists(InternalRow.apply("4"))) partTable.dropPartitions(partIdents) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("dropPartitions failed if partition not exists") { val table = catalog.loadTable(ident) val partTable = new InMemoryAtomicPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("4") partTable.createPartition(partIdent, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1) val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4")) assert(!partTable.dropPartitions(partIdents)) assert(partTable.partitionExists(partIdent)) partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala index caf7e91612563..9de0fe6108c99 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala @@ -48,97 +48,101 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { newCatalog } + private def hasPartitions(table: SupportsPartitionManagement): Boolean = { + !table.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty + } + test("createPartition") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("3") partTable.createPartition(partIdent, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(partIdent)) partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("dropPartition") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("3") val partIdent1 = InternalRow.apply("4") partTable.createPartition(partIdent, new util.HashMap[String, String]()) partTable.createPartition(partIdent1, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 2) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 2) partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1) partTable.dropPartition(partIdent1) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("replacePartitionMetadata") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("3") partTable.createPartition(partIdent, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(partIdent)) assert(partTable.loadPartitionMetadata(partIdent).isEmpty) partTable.replacePartitionMetadata(partIdent, Map("paramKey" -> "paramValue").asJava) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(partIdent)) assert(!partTable.loadPartitionMetadata(partIdent).isEmpty) assert(partTable.loadPartitionMetadata(partIdent).get("paramKey") == "paramValue") partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("loadPartitionMetadata") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("3") partTable.createPartition(partIdent, Map("paramKey" -> "paramValue").asJava) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty) + assert(hasPartitions(partTable)) assert(partTable.partitionExists(partIdent)) assert(!partTable.loadPartitionMetadata(partIdent).isEmpty) assert(partTable.loadPartitionMetadata(partIdent).get("paramKey") == "paramValue") partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("listPartitionIdentifiers") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( table.name(), table.schema(), table.partitioning(), table.properties()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) val partIdent = InternalRow.apply("3") partTable.createPartition(partIdent, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1) val partIdent1 = InternalRow.apply("4") partTable.createPartition(partIdent1, new util.HashMap[String, String]()) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 2) - assert(partTable.listPartitionIdentifiers(partIdent1).length == 1) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 2) + assert(partTable.listPartitionIdentifiers(Array("dt"), partIdent1).length == 1) partTable.dropPartition(partIdent) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1) + assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1) partTable.dropPartition(partIdent1) - assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert(!hasPartitions(partTable)) } test("listPartitionByNames") { @@ -170,7 +174,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { (Array("part0", "part1"), InternalRow(3, "xyz")) -> Set(), (Array("part1"), InternalRow(3.14f)) -> Set() ).foreach { case ((names, idents), expected) => - assert(partTable.listPartitionByNames(names, idents).toSet === expected) + assert(partTable.listPartitionIdentifiers(names, idents).toSet === expected) } // Check invalid parameters Seq( @@ -178,7 +182,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { (Array("col0", "part1"), InternalRow(0, 1)), (Array("wrong"), InternalRow("invalid")) ).foreach { case (names, idents) => - intercept[AssertionError](partTable.listPartitionByNames(names, idents)) + intercept[AssertionError](partTable.listPartitionIdentifiers(names, idents)) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 4cacd5ec2b49e..3583eceec7559 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -141,7 +141,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) - assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert( + partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty) } } @@ -161,7 +162,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) - assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty) + assert( + partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty) } } From 030b3139dadc342e82d71f3fb241c320a7577131 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 30 Nov 2020 16:40:36 +0000 Subject: [PATCH 0611/1009] [SPARK-33569][SPARK-33452][SQL][FOLLOWUP] Fix a build error in `ShowPartitionsExec` ### What changes were proposed in this pull request? Use `listPartitionIdentifiers ` instead of `listPartitionByNames` in `ShowPartitionsExec`. The `listPartitionByNames` was renamed by https://github.com/apache/spark/pull/30514. ### Why are the changes needed? To fix build error. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running tests for the `SHOW PARTITIONS` command: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite" ``` Closes #30553 from MaxGekk/fix-build-show-partitions-exec. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../spark/sql/execution/datasources/v2/ShowPartitionsExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala index 44d6f4495f552..c4b6aa805d58f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala @@ -41,7 +41,7 @@ case class ShowPartitionsExec( // listPartitionByNames() should return all partitions if the partition spec // does not specify any partition names. .getOrElse((Seq.empty[String], InternalRow.empty)) - val partitionIdentifiers = table.listPartitionByNames(names.toArray, ident) + val partitionIdentifiers = table.listPartitionIdentifiers(names.toArray, ident) // Converting partition identifiers as `InternalRow` of partition values, // for instance InternalRow(value0, value1, ..., valueN), to `InternalRow`s // with a string in the format: "col0=value0/col1=value1/.../colN=valueN". From f3c2583cc3ad6a2a24bfb09e2ee7af4e63e5bf66 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Mon, 30 Nov 2020 14:40:51 -0600 Subject: [PATCH 0612/1009] [SPARK-33185][YARN][FOLLOW-ON] Leverage RM's RPC API instead of REST to fetch driver log links in yarn.Client ### What changes were proposed in this pull request? This is a follow-on to PR #30096 which initially added support for printing direct links to the driver stdout/stderr logs from the application report output in `yarn.Client` using the `spark.yarn.includeDriverLogsLink` configuration. That PR made use of the ResourceManager's REST APIs to fetch the necessary information to construct the links. This PR proposes removing the dependency on the REST API, since the new logic is the only place in `yarn.Client` which makes use of this API, and instead leverages the RPC API via `YarnClient`, which brings the code in line with the rest of `yarn.Client`. ### Why are the changes needed? While the old logic worked okay when running a Spark application in a "standard" environment with full access to Kerberos credentials, it can fail when run in an environment with restricted Kerberos credentials. In our case, this environment is represented by [Azkaban](https://azkaban.github.io/), but it likely affects other job scheduling systems as well. In such an environment, the application has delegation tokens which enabled it to communicate with services such as YARN, but the RM REST API is not typically covered by such delegation tokens (note that although YARN does actually support accessing the RM REST API via a delegation token as documented [here](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Delegation_Tokens_API), it is a new feature in alpha phase, and most deployments are likely not retrieving this token today). Besides this enhancement, leveraging the `YarnClient` APIs greatly simplifies the processing logic, such as removing all JSON parsing. ### Does this PR introduce _any_ user-facing change? Very minimal user-facing changes on top of PR #30096. Basically expands the scope of environments in which that feature will operate correctly. ### How was this patch tested? In addition to redoing the `spark-submit` testing as mentioned in PR #30096, I also tested this logic in a restricted-credentials environment (Azkaban). It succeeds where the previous logic would fail with a 401 error. Closes #30450 from xkrogen/xkrogen-SPARK-33185-driverlogs-followon. Authored-by: Erik Krogen Signed-off-by: Mridul Muralidharan gmail.com> --- .../org/apache/spark/deploy/yarn/Client.scala | 67 +++++++------------ .../spark/deploy/yarn/ClientSuite.scala | 47 ------------- .../spark/deploy/yarn/YarnClusterSuite.scala | 31 +++++++++ 3 files changed, 54 insertions(+), 91 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 552167c935b30..d252e8368a0c4 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -29,12 +29,8 @@ import scala.collection.immutable.{Map => IMap} import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map} import scala.util.control.NonFatal -import com.fasterxml.jackson.databind.ObjectMapper import com.google.common.base.Objects import com.google.common.io.Files -import javax.ws.rs.client.ClientBuilder -import javax.ws.rs.core.MediaType -import javax.ws.rs.core.Response.Status.Family import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -51,7 +47,6 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException import org.apache.hadoop.yarn.security.AMRMTokenIdentifier import org.apache.hadoop.yarn.util.Records -import org.apache.hadoop.yarn.webapp.util.WebAppUtils import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.api.python.PythonUtils @@ -1089,9 +1084,9 @@ private[spark] class Client( // If DEBUG is enabled, log report details every iteration // Otherwise, log them every time the application changes state if (log.isDebugEnabled) { - logDebug(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) + logDebug(formatReportDetails(report, getDriverLogsLink(report))) } else if (lastState != state) { - logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) + logInfo(formatReportDetails(report, getDriverLogsLink(report))) } } @@ -1192,33 +1187,31 @@ private[spark] class Client( } /** - * Fetch links to the logs of the driver for the given application ID. This requires hitting the - * RM REST API. Returns an empty map if the links could not be fetched. If this feature is - * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately. + * Fetch links to the logs of the driver for the given application report. This requires + * query the ResourceManager via RPC. Returns an empty map if the links could not be fetched. + * If this feature is disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], or if the application + * report indicates that the driver container isn't currently running, an empty map is + * returned immediately. */ - private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = { - if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) { - return IMap() + private def getDriverLogsLink(appReport: ApplicationReport): IMap[String, String] = { + if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK) + || appReport.getYarnApplicationState != YarnApplicationState.RUNNING) { + return IMap.empty } try { - val baseRmUrl = WebAppUtils.getRMWebAppURLWithScheme(hadoopConf) - val response = ClientBuilder.newClient() - .target(baseRmUrl) - .path("ws").path("v1").path("cluster").path("apps") - .path(appId.toString).path("appattempts") - .request(MediaType.APPLICATION_JSON) - .get() - response.getStatusInfo.getFamily match { - case Family.SUCCESSFUL => parseAppAttemptsJsonResponse(response.readEntity(classOf[String])) - case _ => - logWarning(s"Unable to fetch app attempts info from $baseRmUrl, got " - + s"status code ${response.getStatus}: ${response.getStatusInfo.getReasonPhrase}") - IMap() - } + Option(appReport.getCurrentApplicationAttemptId) + .flatMap(attemptId => Option(yarnClient.getApplicationAttemptReport(attemptId))) + .flatMap(attemptReport => Option(attemptReport.getAMContainerId)) + .flatMap(amContainerId => Option(yarnClient.getContainerReport(amContainerId))) + .flatMap(containerReport => Option(containerReport.getLogUrl)) + .map(YarnContainerInfoHelper.getLogUrlsFromBaseUrl) + .getOrElse(IMap.empty) } catch { case e: Exception => - logWarning(s"Unable to get driver log links for $appId", e) - IMap() + logWarning(s"Unable to get driver log links for $appId: $e") + // Include the full stack trace only at DEBUG level to reduce verbosity + logDebug(s"Unable to get driver log links for $appId", e) + IMap.empty } } @@ -1236,7 +1229,7 @@ private[spark] class Client( val report = getApplicationReport(appId) val state = report.getYarnApplicationState logInfo(s"Application report for $appId (state: $state)") - logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId))) + logInfo(formatReportDetails(report, getDriverLogsLink(report))) if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) { throw new SparkException(s"Application $appId finished with status: $state") } @@ -1627,20 +1620,6 @@ private object Client extends Logging { writer.flush() out.closeEntry() } - - private[yarn] def parseAppAttemptsJsonResponse(jsonString: String): IMap[String, String] = { - val objectMapper = new ObjectMapper() - // If JSON response is malformed somewhere along the way, MissingNode will be returned, - // which allows for safe continuation of chaining. The `elements()` call will be empty, - // and None will get returned. - objectMapper.readTree(jsonString) - .path("appAttempts").path("appAttempt") - .elements().asScala.toList.takeRight(1).headOption - .map(_.path("logsLink").asText("")) - .filterNot(_ == "") - .map(baseUrl => YarnContainerInfoHelper.getLogUrlsFromBaseUrl(baseUrl)) - .getOrElse(IMap()) - } } private[spark] class YarnClusterApplication extends SparkApplication { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index fccb2406d66f8..ea3acec3bb78b 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -583,53 +583,6 @@ class ClientSuite extends SparkFunSuite with Matchers { } } - test("SPARK-33185 Parse YARN AppAttempts valid JSON response") { - val appIdSuffix = "1500000000000_1234567" - val containerId = s"container_e1_${appIdSuffix}_01_000001" - val nodeHost = "node.example.com" - val jsonString = - s""" - |{"appAttempts": { - | "appAttempt": [ { - | "id":1, - | "startTime":1600000000000, - | "finishedTime":1600000100000, - | "containerId":"$containerId", - | "nodeHttpAddress":"$nodeHost:8042", - | "nodeId":"node.example.com:8041", - | "logsLink":"http://$nodeHost:8042/node/containerlogs/$containerId/username", - | "blacklistedNodes":"", - | "nodesBlacklistedBySystem":"", - | "appAttemptId":"appattempt_${appIdSuffix}_000001" - | }] - |}} - |""".stripMargin - val logLinkMap = Client.parseAppAttemptsJsonResponse(jsonString) - assert(logLinkMap.keySet === Set("stdout", "stderr")) - assert(logLinkMap("stdout") === - s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stdout?start=-4096") - assert(logLinkMap("stderr") === - s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stderr?start=-4096") - } - - test("SPARK-33185 Parse YARN AppAttempts invalid JSON response") { - // No "appAttempt" present - assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { } }""") === Map()) - - // "appAttempt" is empty - assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { "appAttempt": [ ] } }""") - === Map()) - - // logsLink is missing - assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"id":1}]}}""") - === Map()) - - // logsLink is present but empty - assert( - Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"logsLink":""}]}}""") - === Map()) - } - private val matching = Seq( ("files URI match test1", "file:///file1", "file:///file2"), ("files URI match test2", "file:///c:file1", "file://c:file2"), diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index cf754cca315f0..222b24ca12dce 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -230,6 +230,37 @@ class YarnClusterSuite extends BaseYarnClusterSuite { } } + test("running Spark in yarn-cluster mode displays driver log links") { + val log4jConf = new File(tempDir, "log4j.properties") + val logOutFile = new File(tempDir, "logs") + Files.write( + s"""log4j.rootCategory=DEBUG,file + |log4j.appender.file=org.apache.log4j.FileAppender + |log4j.appender.file.file=$logOutFile + |log4j.appender.file.layout=org.apache.log4j.PatternLayout + |""".stripMargin, + log4jConf, StandardCharsets.UTF_8) + // Since this test is trying to extract log output from the SparkSubmit process itself, + // standard options to the Spark process don't take effect. Leverage the java-opts file which + // will get picked up for the SparkSubmit process. + val confDir = new File(tempDir, "conf") + confDir.mkdir() + val javaOptsFile = new File(confDir, "java-opts") + Files.write(s"-Dlog4j.configuration=file://$log4jConf\n", javaOptsFile, StandardCharsets.UTF_8) + + val result = File.createTempFile("result", null, tempDir) + val finalState = runSpark(clientMode = false, + mainClassName(YarnClusterDriver.getClass), + appArgs = Seq(result.getAbsolutePath), + extraEnv = Map("SPARK_CONF_DIR" -> confDir.getAbsolutePath), + extraConf = Map(CLIENT_INCLUDE_DRIVER_LOGS_LINK.key -> true.toString)) + checkResult(finalState, result) + val logOutput = Files.toString(logOutFile, StandardCharsets.UTF_8) + val logFilePattern = raw"""(?s).+\sDriver Logs \(\): https?://.+/(\?\S+)?\s.+""" + logOutput should fullyMatch regex logFilePattern.replace("", "stdout") + logOutput should fullyMatch regex logFilePattern.replace("", "stderr") + } + test("timeout to get SparkContext in cluster mode triggers failure") { val timeout = 2000 val finalState = runSpark(false, mainClassName(SparkContextTimeoutApp.getClass), From c6994354f70061b2a15445dbd298a2db926b548c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 30 Nov 2020 13:29:50 -0800 Subject: [PATCH 0613/1009] [SPARK-33545][CORE] Support Fallback Storage during Worker decommission ### What changes were proposed in this pull request? This PR aims to support storage migration to the fallback storage like cloud storage (`S3`) during worker decommission for the corner cases where the exceptions occur or there is no live peer left. Although this PR focuses on cloud storage like `S3` which has a TTL feature in order to simplify Spark's logic, we can use alternative fallback storages like HDFS/NFS(EFS) if the user provides a clean-up mechanism. ### Why are the changes needed? Currently, storage migration is not possible when there is no available executor. For example, when there is one executor, the executor cannot perform storage migration because it has no peer. ### Does this PR introduce _any_ user-facing change? Yes. This is a new feature. ### How was this patch tested? Pass the CIs with newly added test cases. Closes #30492 from dongjoon-hyun/SPARK-33545. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- core/pom.xml | 41 +++ .../scala/org/apache/spark/SparkContext.scala | 1 + .../spark/internal/config/package.scala | 10 + .../shuffle/IndexShuffleBlockResolver.scala | 2 +- .../apache/spark/storage/BlockManager.scala | 18 +- .../storage/BlockManagerDecommissioner.scala | 3 + .../spark/storage/FallbackStorage.scala | 174 +++++++++++ .../storage/ShuffleBlockFetcherIterator.scala | 3 +- .../spark/storage/FallbackStorageSuite.scala | 269 ++++++++++++++++++ 9 files changed, 517 insertions(+), 4 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala create mode 100644 core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala diff --git a/core/pom.xml b/core/pom.xml index 7a56c4ca3c638..9d2bf7dbe57a9 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -461,6 +461,47 @@ test + + org.apache.hadoop + hadoop-aws + ${hadoop.version} + test + + + org.apache.hadoop + hadoop-common + + + commons-logging + commons-logging + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-core-asl + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + + com.amazonaws + aws-java-sdk + + + org.apache.commons commons-crypto diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 0440a9de6ab31..b953592fa04dc 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -576,6 +576,7 @@ class SparkContext(config: SparkConf) extends Logging { } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) + FallbackStorage.registerBlockManagerIfNeeded(_env.blockManager.master, _conf) // The metrics system for Driver need to be set spark.app.id to app ID. // So it should start after we get app ID from the task scheduler and set spark.app.id. diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index b8bcb374ef961..093a0ecf58d32 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -471,6 +471,16 @@ package object config { "cache block replication should be positive.") .createWithDefaultString("30s") + private[spark] val STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH = + ConfigBuilder("spark.storage.decommission.fallbackStorage.path") + .doc("The location for fallback storage during block manager decommissioning. " + + "For example, `s3a://spark-storage/`. In case of empty, fallback storage is disabled. " + + "The storage should be managed by TTL because Spark will not clean it up.") + .version("3.1.0") + .stringConf + .checkValue(_.endsWith(java.io.File.separator), "Path should end with separator.") + .createOptional + private[spark] val STORAGE_REPLICATION_TOPOLOGY_FILE = ConfigBuilder("spark.storage.replication.topologyFile") .version("2.1.0") diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index e5df27c0d3c7a..5f0bb42108c56 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -91,7 +91,7 @@ private[spark] class IndexShuffleBlockResolver( * When the dirs parameter is None then use the disk manager's local directories. Otherwise, * read from the specified directories. */ - private def getIndexFile( + def getIndexFile( shuffleId: Int, mapId: Long, dirs: Option[Array[String]] = None): File = { diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 072702b343328..a5b8d5d0c8cda 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -627,7 +627,16 @@ private[spark] class BlockManager( override def getLocalBlockData(blockId: BlockId): ManagedBuffer = { if (blockId.isShuffle) { logDebug(s"Getting local shuffle block ${blockId}") - shuffleManager.shuffleBlockResolver.getBlockData(blockId) + try { + shuffleManager.shuffleBlockResolver.getBlockData(blockId) + } catch { + case e: IOException => + if (conf.get(config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) { + FallbackStorage.read(conf, blockId) + } else { + throw e + } + } } else { getLocalBytes(blockId) match { case Some(blockData) => @@ -1580,7 +1589,12 @@ private[spark] class BlockManager( lastPeerFetchTimeNs = System.nanoTime() logDebug("Fetched peers from master: " + cachedPeers.mkString("[", ",", "]")) } - cachedPeers + if (cachedPeers.isEmpty && + conf.get(config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) { + Seq(FallbackStorage.FALLBACK_BLOCK_MANAGER_ID) + } else { + cachedPeers + } } } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index 7a55039db1b60..e73e359a70f1e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -39,6 +39,7 @@ private[storage] class BlockManagerDecommissioner( conf: SparkConf, bm: BlockManager) extends Logging { + private val fallbackStorage = FallbackStorage.getFallbackStorage(conf) private val maxReplicationFailuresForDecommission = conf.get(config.STORAGE_DECOMMISSION_MAX_REPLICATION_FAILURE_PER_BLOCK) @@ -114,6 +115,8 @@ private[storage] class BlockManagerDecommissioner( // driver a no longer referenced RDD with shuffle files. if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) { logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.") + } else if (fallbackStorage.isDefined) { + fallbackStorage.foreach(_.copy(shuffleBlockInfo, bm)) } else { throw e } diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala new file mode 100644 index 0000000000000..9221731f77a59 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.storage + +import java.io.DataInputStream +import java.nio.ByteBuffer + +import scala.concurrent.Future +import scala.reflect.ClassTag + +import org.apache.hadoop.fs.{FileSystem, Path} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH +import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} +import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcTimeout} +import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleBlockInfo} +import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID +import org.apache.spark.util.Utils + +/** + * A fallback storage used by storage decommissioners. + */ +private[storage] class FallbackStorage(conf: SparkConf) extends Logging { + require(conf.contains("spark.app.id")) + require(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) + + private val fallbackPath = new Path(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).get) + private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + private val fallbackFileSystem = FileSystem.get(fallbackPath.toUri, hadoopConf) + private val appId = conf.getAppId + + // Visible for testing + def copy( + shuffleBlockInfo: ShuffleBlockInfo, + bm: BlockManager): Unit = { + val shuffleId = shuffleBlockInfo.shuffleId + val mapId = shuffleBlockInfo.mapId + + bm.migratableResolver match { + case r: IndexShuffleBlockResolver => + val indexFile = r.getIndexFile(shuffleId, mapId) + + if (indexFile.exists()) { + fallbackFileSystem.copyFromLocalFile( + new Path(indexFile.getAbsolutePath), + new Path(fallbackPath, s"$appId/$shuffleId/${indexFile.getName}")) + + val dataFile = r.getDataFile(shuffleId, mapId) + if (dataFile.exists()) { + fallbackFileSystem.copyFromLocalFile( + new Path(dataFile.getAbsolutePath), + new Path(fallbackPath, s"$appId/$shuffleId/${dataFile.getName}")) + } + + // Report block statuses + val reduceId = NOOP_REDUCE_ID + val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, reduceId) + FallbackStorage.reportBlockStatus(bm, indexBlockId, indexFile.length) + if (dataFile.exists) { + val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, reduceId) + FallbackStorage.reportBlockStatus(bm, dataBlockId, dataFile.length) + } + } + case r => + logWarning(s"Unsupported Resolver: ${r.getClass.getName}") + } + } + + def exists(shuffleId: Int, filename: String): Boolean = { + fallbackFileSystem.exists(new Path(fallbackPath, s"$appId/$shuffleId/$filename")) + } +} + +class NoopRpcEndpointRef(conf: SparkConf) extends RpcEndpointRef(conf) { + import scala.concurrent.ExecutionContext.Implicits.global + override def address: RpcAddress = null + override def name: String = "fallback" + override def send(message: Any): Unit = {} + override def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T] = { + Future{true.asInstanceOf[T]} + } +} + +object FallbackStorage extends Logging { + /** We use one block manager id as a place holder. */ + val FALLBACK_BLOCK_MANAGER_ID: BlockManagerId = BlockManagerId("fallback", "remote", 7337) + + def getFallbackStorage(conf: SparkConf): Option[FallbackStorage] = { + if (conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) { + Some(new FallbackStorage(conf)) + } else { + None + } + } + + /** Register the fallback block manager and its RPC endpoint. */ + def registerBlockManagerIfNeeded(master: BlockManagerMaster, conf: SparkConf): Unit = { + if (conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) { + master.registerBlockManager( + FALLBACK_BLOCK_MANAGER_ID, Array.empty[String], 0, 0, new NoopRpcEndpointRef(conf)) + } + } + + /** Report block status to block manager master and map output tracker master. */ + private def reportBlockStatus(blockManager: BlockManager, blockId: BlockId, dataLength: Long) = { + assert(blockManager.master != null) + blockManager.master.updateBlockInfo( + FALLBACK_BLOCK_MANAGER_ID, blockId, StorageLevel.DISK_ONLY, memSize = 0, dataLength) + } + + /** + * Read a ManagedBuffer. + */ + def read(conf: SparkConf, blockId: BlockId): ManagedBuffer = { + logInfo(s"Read $blockId") + val fallbackPath = new Path(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).get) + val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + val fallbackFileSystem = FileSystem.get(fallbackPath.toUri, hadoopConf) + val appId = conf.getAppId + + val (shuffleId, mapId, startReduceId, endReduceId) = blockId match { + case id: ShuffleBlockId => + (id.shuffleId, id.mapId, id.reduceId, id.reduceId + 1) + case batchId: ShuffleBlockBatchId => + (batchId.shuffleId, batchId.mapId, batchId.startReduceId, batchId.endReduceId) + case _ => + throw new IllegalArgumentException("unexpected shuffle block id format: " + blockId) + } + + val name = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name + val indexFile = new Path(fallbackPath, s"$appId/$shuffleId/$name") + val start = startReduceId * 8L + val end = endReduceId * 8L + Utils.tryWithResource(fallbackFileSystem.open(indexFile)) { inputStream => + Utils.tryWithResource(new DataInputStream(inputStream)) { index => + index.skip(start) + val offset = index.readLong() + index.skip(end - (start + 8L)) + val nextOffset = index.readLong() + val name = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name + val dataFile = new Path(fallbackPath, s"$appId/$shuffleId/$name") + val f = fallbackFileSystem.open(dataFile) + val size = nextOffset - 1 - offset + logDebug(s"To byte array $size") + val array = new Array[Byte](size.toInt) + val startTimeNs = System.nanoTime() + f.seek(offset) + f.read(array) + logDebug(s"Took ${(System.nanoTime() - startTimeNs) / (1000 * 1000)}ms") + f.close() + new NioManagedBuffer(ByteBuffer.wrap(array)) + } + } + } +} + diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala index e3b3fc5cc4565..fa4e46590aa5e 100644 --- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala +++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala @@ -295,8 +295,9 @@ final class ShuffleBlockFetcherIterator( var hostLocalBlockBytes = 0L var remoteBlockBytes = 0L + val fallback = FallbackStorage.FALLBACK_BLOCK_MANAGER_ID.executorId for ((address, blockInfos) <- blocksByAddress) { - if (address.executorId == blockManager.blockManagerId.executorId) { + if (Seq(blockManager.blockManagerId.executorId, fallback).contains(address.executorId)) { checkBlockSizes(blockInfos) val mergedBlockInfos = mergeContinuousShuffleBlockIdsIfNeeded( blockInfos.map(info => FetchBlockInfo(info._1, info._2, info._3)), doBatchFetch) diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala new file mode 100644 index 0000000000000..2eeae2ecad5eb --- /dev/null +++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.storage + +import java.io.{DataOutputStream, FileOutputStream, IOException} +import java.nio.file.Files + +import scala.concurrent.duration._ + +import org.mockito.{ArgumentMatchers => mc} +import org.mockito.Mockito.{mock, times, verify, when} +import org.scalatest.concurrent.Eventually.{eventually, interval, timeout} + +import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils} +import org.apache.spark.LocalSparkContext.withSpark +import org.apache.spark.internal.config._ +import org.apache.spark.launcher.SparkLauncher.{EXECUTOR_MEMORY, SPARK_MASTER} +import org.apache.spark.network.BlockTransferService +import org.apache.spark.network.buffer.ManagedBuffer +import org.apache.spark.scheduler.ExecutorDecommissionInfo +import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend +import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleBlockInfo} +import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID +import org.apache.spark.util.Utils.tryWithResource + +class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { + + def getSparkConf(initialExecutor: Int = 1, minExecutor: Int = 1): SparkConf = { + new SparkConf(false) + .setAppName(getClass.getName) + .set(SPARK_MASTER, s"local-cluster[$initialExecutor,1,1024]") + .set(EXECUTOR_MEMORY, "1g") + .set(UI.UI_ENABLED, false) + .set(DYN_ALLOCATION_ENABLED, true) + .set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, true) + .set(DYN_ALLOCATION_INITIAL_EXECUTORS, initialExecutor) + .set(DYN_ALLOCATION_MIN_EXECUTORS, minExecutor) + .set(DECOMMISSION_ENABLED, true) + .set(STORAGE_DECOMMISSION_ENABLED, true) + .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true) + .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH, + Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/") + } + + test("fallback storage APIs - copy/exists") { + val conf = new SparkConf(false) + .set("spark.app.id", "testId") + .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true) + .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH, + Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/") + val fallbackStorage = new FallbackStorage(conf) + val bmm = new BlockManagerMaster(new NoopRpcEndpointRef(conf), null, conf, false) + + val bm = mock(classOf[BlockManager]) + val dbm = new DiskBlockManager(conf, false) + when(bm.diskBlockManager).thenReturn(dbm) + when(bm.master).thenReturn(bmm) + val resolver = new IndexShuffleBlockResolver(conf, bm) + when(bm.migratableResolver).thenReturn(resolver) + + resolver.getIndexFile(1, 1L).createNewFile() + resolver.getDataFile(1, 1L).createNewFile() + + val indexFile = resolver.getIndexFile(1, 2L) + tryWithResource(new FileOutputStream(indexFile)) { fos => + tryWithResource(new DataOutputStream(fos)) { dos => + dos.writeLong(0) + dos.writeLong(4) + } + } + + val dataFile = resolver.getDataFile(1, 2L) + tryWithResource(new FileOutputStream(dataFile)) { fos => + tryWithResource(new DataOutputStream(fos)) { dos => + dos.writeLong(0) + } + } + + fallbackStorage.copy(ShuffleBlockInfo(1, 1L), bm) + fallbackStorage.copy(ShuffleBlockInfo(1, 2L), bm) + + assert(fallbackStorage.exists(1, ShuffleIndexBlockId(1, 1L, NOOP_REDUCE_ID).name)) + assert(fallbackStorage.exists(1, ShuffleDataBlockId(1, 1L, NOOP_REDUCE_ID).name)) + assert(fallbackStorage.exists(1, ShuffleIndexBlockId(1, 2L, NOOP_REDUCE_ID).name)) + assert(fallbackStorage.exists(1, ShuffleDataBlockId(1, 2L, NOOP_REDUCE_ID).name)) + + // The files for shuffle 1 and map 1 are empty intentionally. + intercept[java.io.EOFException] { + FallbackStorage.read(conf, ShuffleBlockId(1, 1L, 0)) + } + FallbackStorage.read(conf, ShuffleBlockId(1, 2L, 0)) + } + + test("migrate shuffle data to fallback storage") { + val conf = new SparkConf(false) + .set("spark.app.id", "testId") + .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true) + .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH, + Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/") + + val ids = Set((1, 1L, 1)) + val bm = mock(classOf[BlockManager]) + val dbm = new DiskBlockManager(conf, false) + when(bm.diskBlockManager).thenReturn(dbm) + val indexShuffleBlockResolver = new IndexShuffleBlockResolver(conf, bm) + val indexFile = indexShuffleBlockResolver.getIndexFile(1, 1L) + val dataFile = indexShuffleBlockResolver.getDataFile(1, 1L) + indexFile.createNewFile() + dataFile.createNewFile() + + val resolver = mock(classOf[IndexShuffleBlockResolver]) + when(resolver.getStoredShuffles()) + .thenReturn(ids.map(triple => ShuffleBlockInfo(triple._1, triple._2)).toSeq) + ids.foreach { case (shuffleId: Int, mapId: Long, reduceId: Int) => + when(resolver.getMigrationBlocks(mc.any())) + .thenReturn(List( + (ShuffleIndexBlockId(shuffleId, mapId, reduceId), mock(classOf[ManagedBuffer])), + (ShuffleDataBlockId(shuffleId, mapId, reduceId), mock(classOf[ManagedBuffer])))) + when(resolver.getIndexFile(shuffleId, mapId)).thenReturn(indexFile) + when(resolver.getDataFile(shuffleId, mapId)).thenReturn(dataFile) + } + + when(bm.getPeers(mc.any())) + .thenReturn(Seq(FallbackStorage.FALLBACK_BLOCK_MANAGER_ID)) + val bmm = new BlockManagerMaster(new NoopRpcEndpointRef(conf), null, conf, false) + when(bm.master).thenReturn(bmm) + val blockTransferService = mock(classOf[BlockTransferService]) + when(blockTransferService.uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), + mc.any(), mc.any())).thenThrow(new IOException) + when(bm.blockTransferService).thenReturn(blockTransferService) + when(bm.migratableResolver).thenReturn(resolver) + when(bm.getMigratableRDDBlocks()).thenReturn(Seq()) + + val decommissioner = new BlockManagerDecommissioner(conf, bm) + + try { + decommissioner.start() + val fallbackStorage = new FallbackStorage(conf) + eventually(timeout(10.second), interval(1.seconds)) { + // uploadBlockSync is not used + verify(blockTransferService, times(1)) + .uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any()) + + Seq("shuffle_1_1_0.index", "shuffle_1_1_0.data").foreach { filename => + assert(fallbackStorage.exists(shuffleId = 1, filename)) + } + } + } finally { + decommissioner.stop() + } + } + + test("Upload from all decommissioned executors") { + sc = new SparkContext(getSparkConf(2, 2)) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 2, 60000) + val rdd1 = sc.parallelize(1 to 10, 10) + val rdd2 = rdd1.map(x => (x % 2, 1)) + val rdd3 = rdd2.reduceByKey(_ + _) + assert(rdd3.count() === 2) + + // Decommission all + val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend] + sc.getExecutorIds().foreach { + sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false) + } + + val files = Seq("shuffle_0_0_0.index", "shuffle_0_0_0.data") + val fallbackStorage = new FallbackStorage(sc.getConf) + // Uploading is not started yet. + files.foreach { file => assert(!fallbackStorage.exists(0, file)) } + + // Uploading is completed on decommissioned executors + eventually(timeout(20.seconds), interval(1.seconds)) { + files.foreach { file => assert(fallbackStorage.exists(0, file)) } + } + + // All executors are still alive. + assert(sc.getExecutorIds().size == 2) + } + } + + test("Upload multi stages") { + sc = new SparkContext(getSparkConf()) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 1, 60000) + val rdd1 = sc.parallelize(1 to 10, 2) + val rdd2 = rdd1.map(x => (x % 2, 1)) + val rdd3 = rdd2.reduceByKey(_ + _) + val rdd4 = rdd3.sortByKey() + assert(rdd4.count() === 2) + + val shuffle0_files = Seq( + "shuffle_0_0_0.index", "shuffle_0_0_0.data", + "shuffle_0_1_0.index", "shuffle_0_1_0.data") + val shuffle1_files = Seq( + "shuffle_1_4_0.index", "shuffle_1_4_0.data", + "shuffle_1_5_0.index", "shuffle_1_5_0.data") + val fallbackStorage = new FallbackStorage(sc.getConf) + shuffle0_files.foreach { file => assert(!fallbackStorage.exists(0, file)) } + shuffle1_files.foreach { file => assert(!fallbackStorage.exists(1, file)) } + + // Decommission all + val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend] + sc.getExecutorIds().foreach { + sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false) + } + + eventually(timeout(10.seconds), interval(1.seconds)) { + shuffle0_files.foreach { file => assert(fallbackStorage.exists(0, file)) } + shuffle1_files.foreach { file => assert(fallbackStorage.exists(1, file)) } + } + } + } + + test("Newly added executors should access old data from remote storage") { + sc = new SparkContext(getSparkConf(2, 0)) + withSpark(sc) { sc => + TestUtils.waitUntilExecutorsUp(sc, 2, 60000) + val rdd1 = sc.parallelize(1 to 10, 2) + val rdd2 = rdd1.map(x => (x % 2, 1)) + val rdd3 = rdd2.reduceByKey(_ + _) + assert(rdd3.collect() === Array((0, 5), (1, 5))) + + // Decommission all + val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend] + sc.getExecutorIds().foreach { + sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false) + } + + // Make it sure that fallback storage are ready + val fallbackStorage = new FallbackStorage(sc.getConf) + eventually(timeout(10.seconds), interval(1.seconds)) { + Seq( + "shuffle_0_0_0.index", "shuffle_0_0_0.data", + "shuffle_0_1_0.index", "shuffle_0_1_0.data").foreach { file => + assert(fallbackStorage.exists(0, file)) + } + } + + // Since the data is safe, force to shrink down to zero executor + sc.getExecutorIds().foreach { id => + sched.killExecutor(id) + } + eventually(timeout(20.seconds), interval(1.seconds)) { + assert(sc.getExecutorIds().isEmpty) + } + + // Dynamic allocation will start new executors + assert(rdd3.collect() === Array((0, 5), (1, 5))) + assert(rdd3.sortByKey().count() == 2) + assert(sc.getExecutorIds().nonEmpty) + } + } +} From f5d2165c95fe83f24be9841807613950c1d5d6d0 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Tue, 1 Dec 2020 06:44:15 +0900 Subject: [PATCH 0614/1009] [SPARK-33440][CORE] Use current timestamp with warning log in HadoopFSDelegationTokenProvider when the issue date for token is not set up properly ### What changes were proposed in this pull request? This PR proposes to use current timestamp with warning log when the issue date for token is not set up properly. The next section will explain the rationalization with details. ### Why are the changes needed? Unfortunately not every implementations respect the `issue date` in `AbstractDelegationTokenIdentifier`, which Spark relies on while calculating. The default value of issue date is 0L, which is far from actual issue date, breaking logic on calculating next renewal date under some circumstance, leading to 0 interval (immediate) on rescheduling token renewal. In HadoopFSDelegationTokenProvider, Spark calculates token renewal interval as below: https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala#L123-L134 The interval is calculated as `token.renew() - identifier.getIssueDate`, which is providing correct interval assuming both `token.renew()` and `identifier.getIssueDate` produce correct value, but it's going to be weird when `identifier.getIssueDate` provides 0L (default value), like below: ``` 20/10/13 06:34:19 INFO security.HadoopFSDelegationTokenProvider: Renewal interval is 1603175657000 for token S3ADelegationToken/IDBroker 20/10/13 06:34:19 INFO security.HadoopFSDelegationTokenProvider: Renewal interval is 86400048 for token HDFS_DELEGATION_TOKEN ``` Hopefully we pick the minimum value as safety guard (so in this case, `86400048` is being picked up), but the safety guard leads unintentional bad impact on this case. https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala#L58-L71 Spark leverages the interval being calculated in above, "minimum" value of intervals, and blindly adds the value to token's issue date to calculates the next renewal date for the token, and picks "minimum" value again. In problematic case, the value would be `86400048` (86400048 + 0) which is quite smaller than current timestamp. https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala#L228-L234 The next renewal date is subtracted with current timestamp again to get the interval, and multiplexed by configured ratio to produce the final schedule interval. In problematic case, this value goes to negative. https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala#L180-L188 There's a safety guard to not allow negative value, but that's simply 0 meaning schedule immediately. This triggers next calculation of next renewal date to calculate the schedule interval, lead to the same behavior, hence updating delegation token immediately and continuously. As we fetch token just before the calculation happens, the actual issue date is likely slightly before, hence it's not that dangerous to use current timestamp as issue date for the token the issue date has not been set up properly. Still, it's better not to leave the token implementation as it is, so we log warn message to let end users consult with token implementer. ### Does this PR introduce _any_ user-facing change? Yes. End users won't encounter the tight loop of schedule of token renewal after the PR. In end users' perspective of reflection, there's nothing end users need to change. ### How was this patch tested? Manually tested with problematic environment. Closes #30366 from HeartSaVioR/SPARK-33440. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../HadoopDelegationTokenManager.scala | 4 ++- .../HadoopFSDelegationTokenProvider.scala | 27 ++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala index 3168c763df4df..6ce195b6c7a34 100644 --- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala +++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala @@ -178,7 +178,7 @@ private[spark] class HadoopDelegationTokenManager( private def scheduleRenewal(delay: Long): Unit = { val _delay = math.max(0, delay) - logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(delay)}.") + logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(_delay)}.") val renewalTask = new Runnable() { override def run(): Unit = { @@ -230,6 +230,8 @@ private[spark] class HadoopDelegationTokenManager( val now = System.currentTimeMillis val ratio = sparkConf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO) val delay = (ratio * (nextRenewal - now)).toLong + logInfo(s"Calculated delay on renewal is $delay, based on next renewal $nextRenewal " + + s"and the ratio $ratio, and current time $now") scheduleRenewal(delay) creds } diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala index a46864e2d3c9c..0dc6aa1d7ef30 100644 --- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala @@ -63,7 +63,8 @@ private[deploy] class HadoopFSDelegationTokenProvider val identifier = token .decodeIdentifier() .asInstanceOf[AbstractDelegationTokenIdentifier] - identifier.getIssueDate + interval + val tokenKind = token.getKind.toString + getIssueDate(tokenKind, identifier) + interval } if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min) } @@ -126,13 +127,33 @@ private[deploy] class HadoopFSDelegationTokenProvider Try { val newExpiration = token.renew(hadoopConf) val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier] - val interval = newExpiration - identifier.getIssueDate - logInfo(s"Renewal interval is $interval for token ${token.getKind.toString}") + val tokenKind = token.getKind.toString + val interval = newExpiration - getIssueDate(tokenKind, identifier) + logInfo(s"Renewal interval is $interval for token $tokenKind") interval }.toOption } if (renewIntervals.isEmpty) None else Some(renewIntervals.min) } + + private def getIssueDate(kind: String, identifier: AbstractDelegationTokenIdentifier): Long = { + val now = System.currentTimeMillis() + val issueDate = identifier.getIssueDate + if (issueDate > now) { + logWarning(s"Token $kind has set up issue date later than current time. (provided: " + + s"$issueDate / current timestamp: $now) Please make sure clocks are in sync between " + + "machines. If the issue is not a clock mismatch, consult token implementor to check " + + "whether issue date is valid.") + issueDate + } else if (issueDate > 0L) { + issueDate + } else { + logWarning(s"Token $kind has not set up issue date properly. (provided: $issueDate) " + + s"Using current timestamp ($now) as issue date instead. Consult token implementor to fix " + + "the behavior.") + now + } + } } private[deploy] object HadoopFSDelegationTokenProvider { From 596fbc1d292259c8850f026e2d7267056abee3bc Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Tue, 1 Dec 2020 09:52:19 +0900 Subject: [PATCH 0615/1009] [SPARK-33556][ML] Add array_to_vector function for dataframe column ### What changes were proposed in this pull request? Add array_to_vector function for dataframe column ### Why are the changes needed? Utility function for array to vector conversion. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? scala unit test & doctest. Closes #30498 from WeichenXu123/array_to_vec. Lead-authored-by: Weichen Xu Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/ml/functions.scala | 16 ++++++++- .../org/apache/spark/ml/FunctionsSuite.scala | 18 ++++++++-- python/docs/source/reference/pyspark.ml.rst | 1 + python/pyspark/ml/functions.py | 34 +++++++++++++++++++ python/pyspark/ml/functions.pyi | 2 ++ 5 files changed, 68 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala index a0b6d11a46be9..43622a4f3edfb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml import org.apache.spark.annotation.Since -import org.apache.spark.ml.linalg.{SparseVector, Vector} +import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors} import org.apache.spark.mllib.linalg.{Vector => OldVector} import org.apache.spark.sql.Column import org.apache.spark.sql.functions.udf @@ -72,6 +72,20 @@ object functions { } } + private val arrayToVectorUdf = udf { array: Seq[Double] => + Vectors.dense(array.toArray) + } + + /** + * Converts a column of array of numeric type into a column of dense vectors in MLlib. + * @param v: the column of array<NumericType> type + * @return a column of type `org.apache.spark.ml.linalg.Vector` + * @since 3.1.0 + */ + def array_to_vector(v: Column): Column = { + arrayToVectorUdf(v) + } + private[ml] def checkNonNegativeWeight = udf { value: Double => require(value >= 0, s"illegal weight value: $value. weight must be >= 0.0.") diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala index 3dd9a7d8ec85d..21b823383d233 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -18,8 +18,8 @@ package org.apache.spark.ml import org.apache.spark.SparkException -import org.apache.spark.ml.functions.vector_to_array -import org.apache.spark.ml.linalg.Vectors +import org.apache.spark.ml.functions.{array_to_vector, vector_to_array} +import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.util.MLTest import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.functions.col @@ -87,4 +87,18 @@ class FunctionsSuite extends MLTest { assert(thrown2.getMessage.contains( s"Unsupported dtype: float16. Valid values: float64, float32.")) } + + test("test array_to_vector") { + val df1 = Seq(Tuple1(Array(0.5, 1.5))).toDF("c1") + val resultVec = df1.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector] + assert(resultVec === Vectors.dense(Array(0.5, 1.5))) + + val df2 = Seq(Tuple1(Array(1.5f, 2.5f))).toDF("c1") + val resultVec2 = df2.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector] + assert(resultVec2 === Vectors.dense(Array(1.5, 2.5))) + + val df3 = Seq(Tuple1(Array(1, 2))).toDF("c1") + val resultVec3 = df3.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector] + assert(resultVec3 === Vectors.dense(Array(1.0, 2.0))) + } } diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst index 5fafe5899f20b..2de0ff65a3ae8 100644 --- a/python/docs/source/reference/pyspark.ml.rst +++ b/python/docs/source/reference/pyspark.ml.rst @@ -196,6 +196,7 @@ ML Functions .. autosummary:: :toctree: api/ + array_to_vector vector_to_array diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index cf4a014d897fb..fb245a3d05827 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -69,6 +69,40 @@ def vector_to_array(col, dtype="float64"): sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col), dtype)) +def array_to_vector(col): + """ + Converts a column of array of numeric type into a column of dense vectors in MLlib + + .. versionadded:: 3.1.0 + + Parameters + ---------- + col : :py:class:`pyspark.sql.Column` or str + Input column + + Returns + ------- + :py:class:`pyspark.sql.Column` + The converted column of MLlib dense vectors. + + Examples + -------- + >>> from pyspark.ml.functions import array_to_vector + >>> df1 = spark.createDataFrame([([1.5, 2.5],),], schema='v1 array') + >>> df1.select(array_to_vector('v1').alias('vec1')).collect() + [Row(vec1=DenseVector([1.5, 2.5]))] + >>> df2 = spark.createDataFrame([([1.5, 3.5],),], schema='v1 array') + >>> df2.select(array_to_vector('v1').alias('vec1')).collect() + [Row(vec1=DenseVector([1.5, 3.5]))] + >>> df3 = spark.createDataFrame([([1, 3],),], schema='v1 array') + >>> df3.select(array_to_vector('v1').alias('vec1')).collect() + [Row(vec1=DenseVector([1.0, 3.0]))] + """ + sc = SparkContext._active_spark_context + return Column( + sc._jvm.org.apache.spark.ml.functions.array_to_vector(_to_java_column(col))) + + def _test(): import doctest from pyspark.sql import SparkSession diff --git a/python/pyspark/ml/functions.pyi b/python/pyspark/ml/functions.pyi index 42650e742e781..12b44fc63b5b7 100644 --- a/python/pyspark/ml/functions.pyi +++ b/python/pyspark/ml/functions.pyi @@ -20,3 +20,5 @@ from pyspark import SparkContext as SparkContext, since as since # noqa: F401 from pyspark.sql.column import Column as Column def vector_to_array(col: Column) -> Column: ... + +def array_to_vector(col: Column) -> Column: ... From aeb3649fb9103a7541ef54f451c60fcd5a091934 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 1 Dec 2020 10:34:40 +0900 Subject: [PATCH 0616/1009] [SPARK-33613][PYTHON][TESTS] Replace deprecated APIs in pyspark tests ### What changes were proposed in this pull request? This replaces deprecated API usage in PySpark tests with the preferred APIs. These have been deprecated for some time and usage is not consistent within tests. - https://docs.python.org/3/library/unittest.html#deprecated-aliases ### Why are the changes needed? For consistency and eventual removal of deprecated APIs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #30557 from BryanCutler/replace-deprecated-apis-in-tests. Authored-by: Bryan Cutler Signed-off-by: HyukjinKwon --- python/pyspark/ml/tests/test_feature.py | 2 +- python/pyspark/ml/tests/test_image.py | 6 +- python/pyspark/ml/tests/test_param.py | 2 +- python/pyspark/ml/tests/test_persistence.py | 2 +- python/pyspark/ml/tests/test_tuning.py | 4 +- python/pyspark/ml/tests/test_wrapper.py | 6 +- python/pyspark/sql/tests/test_arrow.py | 28 ++--- python/pyspark/sql/tests/test_catalog.py | 56 ++++----- python/pyspark/sql/tests/test_column.py | 10 +- python/pyspark/sql/tests/test_conf.py | 2 +- python/pyspark/sql/tests/test_dataframe.py | 78 ++++++------- python/pyspark/sql/tests/test_datasources.py | 10 +- python/pyspark/sql/tests/test_functions.py | 22 ++-- .../sql/tests/test_pandas_cogrouped_map.py | 14 +-- .../sql/tests/test_pandas_grouped_map.py | 32 +++--- python/pyspark/sql/tests/test_pandas_map.py | 8 +- python/pyspark/sql/tests/test_pandas_udf.py | 32 +++--- .../sql/tests/test_pandas_udf_grouped_agg.py | 16 +-- .../sql/tests/test_pandas_udf_scalar.py | 108 +++++++++--------- .../sql/tests/test_pandas_udf_typehints.py | 2 +- .../sql/tests/test_pandas_udf_window.py | 6 +- python/pyspark/sql/tests/test_types.py | 24 ++-- python/pyspark/sql/tests/test_udf.py | 28 ++--- python/pyspark/sql/tests/test_utils.py | 15 ++- python/pyspark/tests/test_profiler.py | 4 +- python/pyspark/tests/test_rdd.py | 30 ++--- python/pyspark/tests/test_worker.py | 2 +- 27 files changed, 274 insertions(+), 275 deletions(-) diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py index 244110a986138..98b8ce6dfb95c 100644 --- a/python/pyspark/ml/tests/test_feature.py +++ b/python/pyspark/ml/tests/test_feature.py @@ -169,7 +169,7 @@ def test_count_vectorizer_from_vocab(self): # Test an empty vocabulary with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "vocabSize.*invalid.*0"): + with self.assertRaisesRegex(Exception, "vocabSize.*invalid.*0"): CountVectorizerModel.from_vocabulary([], inputCol="words") # Test model with default settings can transform diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py index 1001598779d48..00e4c95a84355 100644 --- a/python/pyspark/ml/tests/test_image.py +++ b/python/pyspark/ml/tests/test_image.py @@ -47,19 +47,19 @@ def test_read_images(self): self.assertEqual(ImageSchema.undefinedImageType, "Undefined") with QuietTest(self.sc): - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "image argument should be pyspark.sql.types.Row; however", lambda: ImageSchema.toNDArray("a")) with QuietTest(self.sc): - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, "image argument should have attributes specified in", lambda: ImageSchema.toNDArray(Row(a=1))) with QuietTest(self.sc): - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "array argument should be numpy.ndarray; however, it got", lambda: ImageSchema.toImage("a")) diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py index 4cddf50f36bdf..09fe21e9fdeca 100644 --- a/python/pyspark/ml/tests/test_param.py +++ b/python/pyspark/ml/tests/test_param.py @@ -308,7 +308,7 @@ def test_logistic_regression_check_thresholds(self): LogisticRegression ) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, "Logistic Regression getThreshold found inconsistent.*$", LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5] diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py index 826e6cd351d32..0bbcfcdf50e95 100644 --- a/python/pyspark/ml/tests/test_persistence.py +++ b/python/pyspark/ml/tests/test_persistence.py @@ -442,7 +442,7 @@ def test_default_read_write_default_params(self): del metadata['defaultParamMap'] metadataStr = json.dumps(metadata, separators=[',', ':']) loadedMetadata = reader._parseMetaData(metadataStr, ) - with self.assertRaisesRegexp(AssertionError, "`defaultParamMap` section not found"): + with self.assertRaisesRegex(AssertionError, "`defaultParamMap` section not found"): reader.getAndSetParams(lr, loadedMetadata) # Prior to 2.4.0, metadata doesn't have `defaultParamMap`. diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index 729e46419ae2c..ced32c07f245f 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -499,7 +499,7 @@ def test_invalid_user_specified_folds(self): evaluator=evaluator, numFolds=2, foldCol="fold") - with self.assertRaisesRegexp(Exception, "Fold number must be in range"): + with self.assertRaisesRegex(Exception, "Fold number must be in range"): cv.fit(dataset_with_folds) cv = CrossValidator(estimator=lr, @@ -507,7 +507,7 @@ def test_invalid_user_specified_folds(self): evaluator=evaluator, numFolds=4, foldCol="fold") - with self.assertRaisesRegexp(Exception, "The validation data at fold 3 is empty"): + with self.assertRaisesRegex(Exception, "The validation data at fold 3 is empty"): cv.fit(dataset_with_folds) diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py index 31475299c7b98..8ed6a6bad95ed 100644 --- a/python/pyspark/ml/tests/test_wrapper.py +++ b/python/pyspark/ml/tests/test_wrapper.py @@ -54,7 +54,7 @@ def test_java_object_gets_detached(self): model.__del__() def condition(): - with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object): + with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object): model._java_obj.toString() self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString()) return True @@ -67,9 +67,9 @@ def condition(): pass def condition(): - with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object): + with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object): model._java_obj.toString() - with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object): + with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object): summary._java_obj.toString() return True diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index e764c42d88a31..bf80c62ea0542 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -34,7 +34,7 @@ if have_pandas: import pandas as pd - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal if have_pyarrow: import pyarrow as pa # noqa: F401 @@ -137,7 +137,7 @@ def test_toPandas_fallback_disabled(self): df = self.spark.createDataFrame([(None,)], schema=schema) with QuietTest(self.sc): with self.warnings_lock: - with self.assertRaisesRegexp(Exception, 'Unsupported type'): + with self.assertRaisesRegex(Exception, 'Unsupported type'): df.toPandas() def test_null_conversion(self): @@ -214,7 +214,7 @@ def raise_exception(): exception_udf = udf(raise_exception, IntegerType()) df = df.withColumn("error", exception_udf()) with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, 'My error'): + with self.assertRaisesRegex(Exception, 'My error'): df.toPandas() def _createDataFrame_toggle(self, pdf, schema=None): @@ -228,7 +228,7 @@ def _createDataFrame_toggle(self, pdf, schema=None): def test_createDataFrame_toggle(self): pdf = self.create_pandas_data_frame() df_no_arrow, df_arrow = self._createDataFrame_toggle(pdf, schema=self.schema) - self.assertEquals(df_no_arrow.collect(), df_arrow.collect()) + self.assertEqual(df_no_arrow.collect(), df_arrow.collect()) def test_createDataFrame_respect_session_timezone(self): from datetime import timedelta @@ -258,7 +258,7 @@ def test_createDataFrame_respect_session_timezone(self): def test_createDataFrame_with_schema(self): pdf = self.create_pandas_data_frame() df = self.spark.createDataFrame(pdf, schema=self.schema) - self.assertEquals(self.schema, df.schema) + self.assertEqual(self.schema, df.schema) pdf_arrow = df.toPandas() assert_frame_equal(pdf_arrow, pdf) @@ -269,7 +269,7 @@ def test_createDataFrame_with_incorrect_schema(self): wrong_schema = StructType(fields) with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}): with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"): + with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"): self.spark.createDataFrame(pdf, schema=wrong_schema) def test_createDataFrame_with_names(self): @@ -277,23 +277,23 @@ def test_createDataFrame_with_names(self): new_names = list(map(str, range(len(self.schema.fieldNames())))) # Test that schema as a list of column names gets applied df = self.spark.createDataFrame(pdf, schema=list(new_names)) - self.assertEquals(df.schema.fieldNames(), new_names) + self.assertEqual(df.schema.fieldNames(), new_names) # Test that schema as tuple of column names gets applied df = self.spark.createDataFrame(pdf, schema=tuple(new_names)) - self.assertEquals(df.schema.fieldNames(), new_names) + self.assertEqual(df.schema.fieldNames(), new_names) def test_createDataFrame_column_name_encoding(self): pdf = pd.DataFrame({u'a': [1]}) columns = self.spark.createDataFrame(pdf).columns self.assertTrue(isinstance(columns[0], str)) - self.assertEquals(columns[0], 'a') + self.assertEqual(columns[0], 'a') columns = self.spark.createDataFrame(pdf, [u'b']).columns self.assertTrue(isinstance(columns[0], str)) - self.assertEquals(columns[0], 'b') + self.assertEqual(columns[0], 'b') def test_createDataFrame_with_single_data_type(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"): + with self.assertRaisesRegex(ValueError, ".*IntegerType.*not supported.*"): self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int") def test_createDataFrame_does_not_modify_input(self): @@ -311,7 +311,7 @@ def test_schema_conversion_roundtrip(self): from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema arrow_schema = to_arrow_schema(self.schema) schema_rt = from_arrow_schema(arrow_schema) - self.assertEquals(self.schema, schema_rt) + self.assertEqual(self.schema, schema_rt) def test_createDataFrame_with_array_type(self): pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], [u"y", u"z"]]}) @@ -420,7 +420,7 @@ def test_createDataFrame_fallback_enabled(self): def test_createDataFrame_fallback_disabled(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(TypeError, 'Unsupported type'): + with self.assertRaisesRegex(TypeError, 'Unsupported type'): self.spark.createDataFrame( pd.DataFrame({"a": [[datetime.datetime(2015, 11, 1, 0, 30)]]}), "a: array") @@ -545,7 +545,7 @@ def tearDownClass(cls): cls.spark.stop() def test_exception_by_max_results(self): - with self.assertRaisesRegexp(Exception, "is bigger than"): + with self.assertRaisesRegex(Exception, "is bigger than"): self.spark.range(0, 10000, 1, 100).toPandas() diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index ca4e427a7db28..56e7c97020662 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -25,11 +25,11 @@ class CatalogTests(ReusedSQLTestCase): def test_current_database(self): spark = self.spark with self.database("some_db"): - self.assertEquals(spark.catalog.currentDatabase(), "default") + self.assertEqual(spark.catalog.currentDatabase(), "default") spark.sql("CREATE DATABASE some_db") spark.catalog.setCurrentDatabase("some_db") - self.assertEquals(spark.catalog.currentDatabase(), "some_db") - self.assertRaisesRegexp( + self.assertEqual(spark.catalog.currentDatabase(), "some_db") + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.setCurrentDatabase("does_not_exist")) @@ -38,10 +38,10 @@ def test_list_databases(self): spark = self.spark with self.database("some_db"): databases = [db.name for db in spark.catalog.listDatabases()] - self.assertEquals(databases, ["default"]) + self.assertEqual(databases, ["default"]) spark.sql("CREATE DATABASE some_db") databases = [db.name for db in spark.catalog.listDatabases()] - self.assertEquals(sorted(databases), ["default", "some_db"]) + self.assertEqual(sorted(databases), ["default", "some_db"]) def test_list_tables(self): from pyspark.sql.catalog import Table @@ -50,8 +50,8 @@ def test_list_tables(self): spark.sql("CREATE DATABASE some_db") with self.table("tab1", "some_db.tab2", "tab3_via_catalog"): with self.tempView("temp_tab"): - self.assertEquals(spark.catalog.listTables(), []) - self.assertEquals(spark.catalog.listTables("some_db"), []) + self.assertEqual(spark.catalog.listTables(), []) + self.assertEqual(spark.catalog.listTables("some_db"), []) spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab") spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet") spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet") @@ -66,40 +66,40 @@ def test_list_tables(self): sorted(spark.catalog.listTables("default"), key=lambda t: t.name) tablesSomeDb = \ sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name) - self.assertEquals(tables, tablesDefault) - self.assertEquals(len(tables), 3) - self.assertEquals(len(tablesSomeDb), 2) - self.assertEquals(tables[0], Table( + self.assertEqual(tables, tablesDefault) + self.assertEqual(len(tables), 3) + self.assertEqual(len(tablesSomeDb), 2) + self.assertEqual(tables[0], Table( name="tab1", database="default", description=None, tableType="MANAGED", isTemporary=False)) - self.assertEquals(tables[1], Table( + self.assertEqual(tables[1], Table( name="tab3_via_catalog", database="default", description=description, tableType="MANAGED", isTemporary=False)) - self.assertEquals(tables[2], Table( + self.assertEqual(tables[2], Table( name="temp_tab", database=None, description=None, tableType="TEMPORARY", isTemporary=True)) - self.assertEquals(tablesSomeDb[0], Table( + self.assertEqual(tablesSomeDb[0], Table( name="tab2", database="some_db", description=None, tableType="MANAGED", isTemporary=False)) - self.assertEquals(tablesSomeDb[1], Table( + self.assertEqual(tablesSomeDb[1], Table( name="temp_tab", database=None, description=None, tableType="TEMPORARY", isTemporary=True)) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.listTables("does_not_exist")) @@ -119,12 +119,12 @@ def test_list_functions(self): self.assertTrue("to_timestamp" in functions) self.assertTrue("to_unix_timestamp" in functions) self.assertTrue("current_database" in functions) - self.assertEquals(functions["+"], Function( + self.assertEqual(functions["+"], Function( name="+", description=None, className="org.apache.spark.sql.catalyst.expressions.Add", isTemporary=True)) - self.assertEquals(functions, functionsDefault) + self.assertEqual(functions, functionsDefault) with self.function("func1", "some_db.func2"): spark.catalog.registerFunction("temp_func", lambda x: str(x)) @@ -141,7 +141,7 @@ def test_list_functions(self): self.assertTrue("temp_func" in newFunctionsSomeDb) self.assertTrue("func1" not in newFunctionsSomeDb) self.assertTrue("func2" in newFunctionsSomeDb) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.listFunctions("does_not_exist")) @@ -158,16 +158,16 @@ def test_list_columns(self): columns = sorted(spark.catalog.listColumns("tab1"), key=lambda c: c.name) columnsDefault = \ sorted(spark.catalog.listColumns("tab1", "default"), key=lambda c: c.name) - self.assertEquals(columns, columnsDefault) - self.assertEquals(len(columns), 2) - self.assertEquals(columns[0], Column( + self.assertEqual(columns, columnsDefault) + self.assertEqual(len(columns), 2) + self.assertEqual(columns[0], Column( name="age", description=None, dataType="int", nullable=True, isPartition=False, isBucket=False)) - self.assertEquals(columns[1], Column( + self.assertEqual(columns[1], Column( name="name", description=None, dataType="string", @@ -176,26 +176,26 @@ def test_list_columns(self): isBucket=False)) columns2 = \ sorted(spark.catalog.listColumns("tab2", "some_db"), key=lambda c: c.name) - self.assertEquals(len(columns2), 2) - self.assertEquals(columns2[0], Column( + self.assertEqual(len(columns2), 2) + self.assertEqual(columns2[0], Column( name="nickname", description=None, dataType="string", nullable=True, isPartition=False, isBucket=False)) - self.assertEquals(columns2[1], Column( + self.assertEqual(columns2[1], Column( name="tolerance", description=None, dataType="float", nullable=True, isPartition=False, isBucket=False)) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "tab2", lambda: spark.catalog.listColumns("tab2")) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.listColumns("does_not_exist")) diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 4a9c7106a12b0..2ae0a9bedd67d 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -47,7 +47,7 @@ def test_validate_column_types(self): self.assertTrue("Column" in _to_java_column(u"a").getClass().toString()) self.assertTrue("Column" in _to_java_column(self.spark.range(1).id).getClass().toString()) - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "Invalid argument, not a string or column", lambda: _to_java_column(1)) @@ -58,7 +58,7 @@ class A(): self.assertRaises(TypeError, lambda: _to_java_column(A())) self.assertRaises(TypeError, lambda: _to_java_column([])) - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "Invalid argument, not a string or column", lambda: udf(lambda x: x)(None)) @@ -79,9 +79,9 @@ def test_column_operators(self): cs.startswith('a'), cs.endswith('a'), ci.eqNullSafe(cs) self.assertTrue(all(isinstance(c, Column) for c in css)) self.assertTrue(isinstance(ci.cast(LongType()), Column)) - self.assertRaisesRegexp(ValueError, - "Cannot apply 'in' operator against a column", - lambda: 1 in cs) + self.assertRaisesRegex(ValueError, + "Cannot apply 'in' operator against a column", + lambda: 1 in cs) def test_column_accessor(self): from pyspark.sql.functions import col diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py index 1cc0c1b7562c5..9222e2b8272d6 100644 --- a/python/pyspark/sql/tests/test_conf.py +++ b/python/pyspark/sql/tests/test_conf.py @@ -28,7 +28,7 @@ def test_conf(self): self.assertEqual(spark.conf.get("bogo"), "ta") self.assertEqual(spark.conf.get("bogo", "not.read"), "ta") self.assertEqual(spark.conf.get("not.set", "ta"), "ta") - self.assertRaisesRegexp(Exception, "not.set", lambda: spark.conf.get("not.set")) + self.assertRaisesRegex(Exception, "not.set", lambda: spark.conf.get("not.set")) spark.conf.unset("bogo") self.assertEqual(spark.conf.get("bogo", "colombia"), "colombia") diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index d941707b8969f..e3977e8185180 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -343,7 +343,7 @@ def test_replace(self): self.spark.createDataFrame( [(u'Alice', 10, 80.1)], schema).replace({u"Alice": u"Bob", 10: 20}).first() - with self.assertRaisesRegexp( + with self.assertRaisesRegex( TypeError, 'value argument is required when to_replace is not a dictionary.'): self.spark.createDataFrame( @@ -390,7 +390,7 @@ def test_extended_hint_types(self): self.assertEqual(3, logical_plan.toString().count("itworks")) def test_sample(self): - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "should be a bool, float and number", lambda: self.spark.range(1).sample()) @@ -426,12 +426,12 @@ def test_toDF_with_schema_string(self): self.assertEqual(df.collect(), data) # number of fields must match. - self.assertRaisesRegexp(Exception, "Length of object", - lambda: rdd.toDF("key: int").collect()) + self.assertRaisesRegex(Exception, "Length of object", + lambda: rdd.toDF("key: int").collect()) # field types mismatch will cause exception at runtime. - self.assertRaisesRegexp(Exception, "FloatType can not accept", - lambda: rdd.toDF("key: float, value: string").collect()) + self.assertRaisesRegex(Exception, "FloatType can not accept", + lambda: rdd.toDF("key: float, value: string").collect()) # flat schema values will be wrapped into row. df = rdd.map(lambda row: row.key).toDF("int") @@ -491,15 +491,15 @@ def test_cache(self): spark.catalog.clearCache() self.assertFalse(spark.catalog.isCached("tab1")) self.assertFalse(spark.catalog.isCached("tab2")) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.isCached("does_not_exist")) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.cacheTable("does_not_exist")) - self.assertRaisesRegexp( + self.assertRaisesRegex( AnalysisException, "does_not_exist", lambda: spark.catalog.uncacheTable("does_not_exist")) @@ -523,12 +523,12 @@ def test_to_pandas(self): import numpy as np pdf = self._to_pandas() types = pdf.dtypes - self.assertEquals(types[0], np.int32) - self.assertEquals(types[1], np.object) - self.assertEquals(types[2], np.bool) - self.assertEquals(types[3], np.float32) - self.assertEquals(types[4], np.object) # datetime.date - self.assertEquals(types[5], 'datetime64[ns]') + self.assertEqual(types[0], np.int32) + self.assertEqual(types[1], np.object) + self.assertEqual(types[2], np.bool) + self.assertEqual(types[3], np.float32) + self.assertEqual(types[4], np.object) # datetime.date + self.assertEqual(types[5], 'datetime64[ns]') @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_with_duplicated_column_names(self): @@ -540,8 +540,8 @@ def test_to_pandas_with_duplicated_column_names(self): df = self.spark.sql(sql) pdf = df.toPandas() types = pdf.dtypes - self.assertEquals(types.iloc[0], np.int32) - self.assertEquals(types.iloc[1], np.int32) + self.assertEqual(types.iloc[0], np.int32) + self.assertEqual(types.iloc[1], np.int32) @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_on_cross_join(self): @@ -560,13 +560,13 @@ def test_to_pandas_on_cross_join(self): df = self.spark.sql(sql) pdf = df.toPandas() types = pdf.dtypes - self.assertEquals(types.iloc[0], np.int32) - self.assertEquals(types.iloc[1], np.int32) + self.assertEqual(types.iloc[0], np.int32) + self.assertEqual(types.iloc[1], np.int32) @unittest.skipIf(have_pandas, "Required Pandas was found.") def test_to_pandas_required_pandas_not_found(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'): + with self.assertRaisesRegex(ImportError, 'Pandas >= .* must be installed'): self._to_pandas() @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore @@ -577,9 +577,9 @@ def test_to_pandas_avoid_astype(self): data = [(1, "foo", 16777220), (None, "bar", None)] df = self.spark.createDataFrame(data, schema) types = df.toPandas().dtypes - self.assertEquals(types[0], np.float64) # doesn't convert to np.int32 due to NaN value. - self.assertEquals(types[1], np.object) - self.assertEquals(types[2], np.float64) + self.assertEqual(types[0], np.float64) # doesn't convert to np.int32 due to NaN value. + self.assertEqual(types[1], np.object) + self.assertEqual(types[2], np.float64) @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_to_pandas_from_empty_dataframe(self): @@ -675,7 +675,7 @@ def test_create_dataframe_from_pandas_with_timestamp(self): @unittest.skipIf(have_pandas, "Required Pandas was found.") def test_create_dataframe_required_pandas_not_found(self): with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( ImportError, "(Pandas >= .* must be installed|No module named '?pandas'?)"): import pandas as pd @@ -688,7 +688,7 @@ def test_create_dataframe_required_pandas_not_found(self): @unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore def test_create_dataframe_from_pandas_with_dst(self): import pandas as pd - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal from datetime import datetime pdf = pd.DataFrame({'time': [datetime(2015, 10, 31, 22, 30)]}) @@ -724,7 +724,7 @@ def test_repr_behaviors(self): ||22222|22222| |+-----+-----+ |""" - self.assertEquals(re.sub(pattern, '', expected1), df.__repr__()) + self.assertEqual(re.sub(pattern, '', expected1), df.__repr__()) with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}): expected2 = """+---+-----+ ||key|value| @@ -733,7 +733,7 @@ def test_repr_behaviors(self): ||222| 222| |+---+-----+ |""" - self.assertEquals(re.sub(pattern, '', expected2), df.__repr__()) + self.assertEqual(re.sub(pattern, '', expected2), df.__repr__()) with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}): expected3 = """+---+-----+ ||key|value| @@ -742,7 +742,7 @@ def test_repr_behaviors(self): |+---+-----+ |only showing top 1 row |""" - self.assertEquals(re.sub(pattern, '', expected3), df.__repr__()) + self.assertEqual(re.sub(pattern, '', expected3), df.__repr__()) # test when eager evaluation is enabled and _repr_html_ will be called with self.sql_conf({"spark.sql.repl.eagerEval.enabled": True}): @@ -752,7 +752,7 @@ def test_repr_behaviors(self): | |
      2222222222
      |""" - self.assertEquals(re.sub(pattern, '', expected1), df._repr_html_()) + self.assertEqual(re.sub(pattern, '', expected1), df._repr_html_()) with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}): expected2 = """ | @@ -760,7 +760,7 @@ def test_repr_behaviors(self): | |
      keyvalue
      222222
      |""" - self.assertEquals(re.sub(pattern, '', expected2), df._repr_html_()) + self.assertEqual(re.sub(pattern, '', expected2), df._repr_html_()) with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}): expected3 = """ | @@ -768,19 +768,19 @@ def test_repr_behaviors(self): |
      keyvalue
      |only showing top 1 row |""" - self.assertEquals(re.sub(pattern, '', expected3), df._repr_html_()) + self.assertEqual(re.sub(pattern, '', expected3), df._repr_html_()) # test when eager evaluation is disabled and _repr_html_ will be called with self.sql_conf({"spark.sql.repl.eagerEval.enabled": False}): expected = "DataFrame[key: bigint, value: string]" - self.assertEquals(None, df._repr_html_()) - self.assertEquals(expected, df.__repr__()) + self.assertEqual(None, df._repr_html_()) + self.assertEqual(expected, df.__repr__()) with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}): - self.assertEquals(None, df._repr_html_()) - self.assertEquals(expected, df.__repr__()) + self.assertEqual(None, df._repr_html_()) + self.assertEqual(expected, df.__repr__()) with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}): - self.assertEquals(None, df._repr_html_()) - self.assertEquals(expected, df.__repr__()) + self.assertEqual(None, df._repr_html_()) + self.assertEqual(expected, df.__repr__()) def test_to_local_iterator(self): df = self.spark.range(8, numPartitions=4) @@ -818,7 +818,7 @@ def test_to_local_iterator_not_fully_consumed(self): def test_same_semantics_error(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(ValueError, "should be of DataFrame.*int"): + with self.assertRaisesRegex(ValueError, "should be of DataFrame.*int"): self.spark.range(10).sameSemantics(1) def test_input_files(self): @@ -830,7 +830,7 @@ def test_input_files(self): input_files_list = self.spark.read.parquet(tpath).inputFiles() # input files list should contain 10 entries - self.assertEquals(len(input_files_list), 10) + self.assertEqual(len(input_files_list), 10) # all file paths in list must contain tpath for file_path in input_files_list: self.assertTrue(tpath in file_path) diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py index 9425494fb0d90..26a6c58dbad6b 100644 --- a/python/pyspark/sql/tests/test_datasources.py +++ b/python/pyspark/sql/tests/test_datasources.py @@ -107,7 +107,7 @@ def test_read_text_file_list(self): df = self.spark.read.text(['python/test_support/sql/text-test.txt', 'python/test_support/sql/text-test.txt']) count = df.count() - self.assertEquals(count, 4) + self.assertEqual(count, 4) def test_json_sampling_ratio(self): rdd = self.spark.sparkContext.range(0, 100, 1, 1) \ @@ -115,14 +115,14 @@ def test_json_sampling_ratio(self): schema = self.spark.read.option('inferSchema', True) \ .option('samplingRatio', 0.5) \ .json(rdd).schema - self.assertEquals(schema, StructType([StructField("a", LongType(), True)])) + self.assertEqual(schema, StructType([StructField("a", LongType(), True)])) def test_csv_sampling_ratio(self): rdd = self.spark.sparkContext.range(0, 100, 1, 1) \ .map(lambda x: '0.1' if x == 1 else str(x)) schema = self.spark.read.option('inferSchema', True)\ .csv(rdd, samplingRatio=0.5).schema - self.assertEquals(schema, StructType([StructField("_c0", IntegerType(), True)])) + self.assertEqual(schema, StructType([StructField("_c0", IntegerType(), True)])) def test_checking_csv_header(self): path = tempfile.mkdtemp() @@ -135,7 +135,7 @@ def test_checking_csv_header(self): StructField('f1', IntegerType(), nullable=True)]) df = self.spark.read.option('header', 'true').schema(schema)\ .csv(path, enforceSchema=False) - self.assertRaisesRegexp( + self.assertRaisesRegex( Exception, "CSV header does not conform to the schema", lambda: df.collect()) @@ -154,7 +154,7 @@ def test_ignore_column_of_all_nulls(self): StructField('b', LongType(), nullable=True), StructField('c', StringType(), nullable=True)]) readback = self.spark.read.json(path, dropFieldIfAllNull=True) - self.assertEquals(readback.schema, schema) + self.assertEqual(readback.schema, schema) finally: shutil.rmtree(path) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 2858bdeca0d5a..58599a9fa42f5 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -185,7 +185,7 @@ def test_string_functions(self): ] df = self.spark.createDataFrame([['nick']], schema=['name']) - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "must be the same type", lambda: df.select(col('name').substr(0, lit(1)))) @@ -321,16 +321,16 @@ def test_sort_with_nulls_order(self): df = self.spark.createDataFrame( [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"]) - self.assertEquals( + self.assertEqual( df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]) - self.assertEquals( + self.assertEqual( df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(), [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]) - self.assertEquals( + self.assertEqual( df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]) - self.assertEquals( + self.assertEqual( df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(), [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)]) @@ -354,7 +354,7 @@ def test_slice(self): df = self.spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x']) - self.assertEquals( + self.assertEqual( df.select(slice(df.x, 2, 2).alias("sliced")).collect(), df.select(slice(df.x, lit(2), lit(2)).alias("sliced")).collect(), ) @@ -364,7 +364,7 @@ def test_array_repeat(self): df = self.spark.range(1) - self.assertEquals( + self.assertEqual( df.select(array_repeat("id", 3)).toDF("val").collect(), df.select(array_repeat("id", lit(3))).toDF("val").collect(), ) @@ -580,14 +580,14 @@ def test_datetime_functions(self): from datetime import date df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol") parse_result = df.select(functions.to_date(functions.col("dateCol"))).first() - self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)']) + self.assertEqual(date(2017, 1, 22), parse_result['to_date(dateCol)']) def test_assert_true(self): from pyspark.sql.functions import assert_true df = self.spark.range(3) - self.assertEquals( + self.assertEqual( df.select(assert_true(df.id < 3)).toDF("val").collect(), [Row(val=None), Row(val=None), Row(val=None)], ) @@ -604,7 +604,7 @@ def test_assert_true(self): with self.assertRaises(TypeError) as cm: df.select(assert_true(df.id < 2, 5)) - self.assertEquals( + self.assertEqual( "errMsg should be a Column or a str, got ", str(cm.exception) ) @@ -626,7 +626,7 @@ def test_raise_error(self): with self.assertRaises(TypeError) as cm: df.select(raise_error(None)) - self.assertEquals( + self.assertEqual( "errMsg should be a Column or a str, got ", str(cm.exception) ) diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py index 4afc1dfcc1c6e..3c016e04adf2e 100644 --- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py @@ -25,7 +25,7 @@ if have_pandas: import pandas as pd - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal if have_pyarrow: import pyarrow as pa # noqa: F401 @@ -135,8 +135,8 @@ def test_mixed_scalar_udfs_followed_by_cogrouby_apply(self): .applyInPandas(lambda x, y: pd.DataFrame([(x.sum().sum(), y.sum().sum())]), 'sum1 int, sum2 int').collect() - self.assertEquals(result[0]['sum1'], 165) - self.assertEquals(result[0]['sum2'], 165) + self.assertEqual(result[0]['sum1'], 165) + self.assertEqual(result[0]['sum2'], 165) def test_with_key_left(self): self._test_with_key(self.data1, self.data1, isLeft=True) @@ -174,7 +174,7 @@ def test_wrong_return_type(self): left = self.data1 right = self.data2 with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( NotImplementedError, 'Invalid return type.*ArrayType.*TimestampType'): left.groupby('id').cogroup(right.groupby('id')).applyInPandas( @@ -183,7 +183,7 @@ def test_wrong_return_type(self): def test_wrong_args(self): left = self.data1 right = self.data2 - with self.assertRaisesRegexp(ValueError, 'Invalid function'): + with self.assertRaisesRegex(ValueError, 'Invalid function'): left.groupby('id').cogroup(right.groupby('id')) \ .applyInPandas(lambda: 1, StructType([StructField("d", DoubleType())])) @@ -194,14 +194,14 @@ def test_case_insensitive_grouping_column(self): row = df1.groupby("ColUmn").cogroup( df1.groupby("COLUMN") ).applyInPandas(lambda r, l: r + l, "column long, value long").first() - self.assertEquals(row.asDict(), Row(column=2, value=2).asDict()) + self.assertEqual(row.asDict(), Row(column=2, value=2).asDict()) df2 = self.spark.createDataFrame([(1, 1)], ("column", "value")) row = df1.groupby("ColUmn").cogroup( df2.groupby("COLUMN") ).applyInPandas(lambda r, l: r + l, "column long, value long").first() - self.assertEquals(row.asDict(), Row(column=2, value=2).asDict()) + self.assertEqual(row.asDict(), Row(column=2, value=2).asDict()) @staticmethod def _test_with_key(left, right, isLeft): diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index a639a8d51f55c..64803a6574675 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -33,7 +33,7 @@ if have_pandas: import pandas as pd - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal if have_pyarrow: import pyarrow as pa # noqa: F401 @@ -160,7 +160,7 @@ def test_array_type_correct(self): def test_register_grouped_map_udf(self): foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUPED_MAP) with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( ValueError, 'f.*SQL_BATCHED_UDF.*SQL_SCALAR_PANDAS_UDF.*SQL_GROUPED_AGG_PANDAS_UDF.*'): self.spark.catalog.registerFunction("foo_udf", foo_udf) @@ -244,7 +244,7 @@ def test_datatype_string(self): def test_wrong_return_type(self): with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( NotImplementedError, 'Invalid return type.*grouped map Pandas UDF.*ArrayType.*TimestampType'): pandas_udf( @@ -256,20 +256,20 @@ def test_wrong_args(self): df = self.data with QuietTest(self.sc): - with self.assertRaisesRegexp(ValueError, 'Invalid udf'): + with self.assertRaisesRegex(ValueError, 'Invalid udf'): df.groupby('id').apply(lambda x: x) - with self.assertRaisesRegexp(ValueError, 'Invalid udf'): + with self.assertRaisesRegex(ValueError, 'Invalid udf'): df.groupby('id').apply(udf(lambda x: x, DoubleType())) - with self.assertRaisesRegexp(ValueError, 'Invalid udf'): + with self.assertRaisesRegex(ValueError, 'Invalid udf'): df.groupby('id').apply(sum(df.v)) - with self.assertRaisesRegexp(ValueError, 'Invalid udf'): + with self.assertRaisesRegex(ValueError, 'Invalid udf'): df.groupby('id').apply(df.v + 1) - with self.assertRaisesRegexp(ValueError, 'Invalid function'): + with self.assertRaisesRegex(ValueError, 'Invalid function'): df.groupby('id').apply( pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())]))) - with self.assertRaisesRegexp(ValueError, 'Invalid udf'): + with self.assertRaisesRegex(ValueError, 'Invalid udf'): df.groupby('id').apply(pandas_udf(lambda x, y: x, DoubleType())) - with self.assertRaisesRegexp(ValueError, 'Invalid udf.*GROUPED_MAP'): + with self.assertRaisesRegex(ValueError, 'Invalid udf.*GROUPED_MAP'): df.groupby('id').apply( pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR)) @@ -284,7 +284,7 @@ def test_unsupported_types(self): for unsupported_type in unsupported_types: schema = StructType([StructField('id', LongType(), True), unsupported_type]) with QuietTest(self.sc): - with self.assertRaisesRegexp(NotImplementedError, common_err_msg): + with self.assertRaisesRegex(NotImplementedError, common_err_msg): pandas_udf(lambda x: x, schema, PandasUDFType.GROUPED_MAP) # Regression test for SPARK-23314 @@ -451,9 +451,9 @@ def invalid_positional_types(pdf): with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}): with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "KeyError: 'id'"): + with self.assertRaisesRegex(Exception, "KeyError: 'id'"): grouped_df.apply(column_name_typo).collect() - with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"): + with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"): grouped_df.apply(invalid_positional_types).collect() def test_positional_assignment_conf(self): @@ -482,7 +482,7 @@ def dummy_pandas_udf(df): # this was throwing an AnalysisException before SPARK-24208 res = df_with_pandas.alias('temp0').join(df_with_pandas.alias('temp1'), col('temp0.key') == col('temp1.key')) - self.assertEquals(res.count(), 5) + self.assertEqual(res.count(), 5) def test_mixed_scalar_udfs_followed_by_groupby_apply(self): df = self.spark.range(0, 10).toDF('v1') @@ -494,7 +494,7 @@ def test_mixed_scalar_udfs_followed_by_groupby_apply(self): 'sum int', PandasUDFType.GROUPED_MAP)) - self.assertEquals(result.collect()[0]['sum'], 165) + self.assertEqual(result.collect()[0]['sum'], 165) def test_grouped_with_empty_partition(self): data = [Row(id=1, x=2), Row(id=1, x=3), Row(id=2, x=4)] @@ -604,7 +604,7 @@ def my_pandas_udf(pdf): df = self.spark.createDataFrame([[1, 1]], ["column", "score"]) row = df.groupby('COLUMN').applyInPandas( my_pandas_udf, schema="column integer, score float").first() - self.assertEquals(row.asDict(), Row(column=1, score=0.5).asDict()) + self.assertEqual(row.asDict(), Row(column=1, score=0.5).asDict()) if __name__ == "__main__": diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index 3ca437f75fc23..d53face702201 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -61,7 +61,7 @@ def func(iterator): df = self.spark.range(10) actual = df.mapInPandas(func, 'id long').collect() expected = df.collect() - self.assertEquals(actual, expected) + self.assertEqual(actual, expected) def test_multiple_columns(self): data = [(1, "foo"), (2, None), (3, "bar"), (4, "bar")] @@ -75,7 +75,7 @@ def func(iterator): actual = df.mapInPandas(func, df.schema).collect() expected = df.collect() - self.assertEquals(actual, expected) + self.assertEqual(actual, expected) def test_different_output_length(self): def func(iterator): @@ -84,7 +84,7 @@ def func(iterator): df = self.spark.range(10) actual = df.repartition(1).mapInPandas(func, 'a long').collect() - self.assertEquals(set((r.a for r in actual)), set(range(100))) + self.assertEqual(set((r.a for r in actual)), set(range(100))) def test_empty_iterator(self): def empty_iter(_): @@ -110,7 +110,7 @@ def func(iterator): df = self.spark.range(10) actual = df.mapInPandas(func, 'id long').mapInPandas(func, 'id long').collect() expected = df.collect() - self.assertEquals(actual, expected) + self.assertEqual(actual, expected) if __name__ == "__main__": diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py index cc742fc4267cb..975eb4680dd04 100644 --- a/python/pyspark/sql/tests/test_pandas_udf.py +++ b/python/pyspark/sql/tests/test_pandas_udf.py @@ -114,31 +114,31 @@ def test_udf_wrong_arg(self): @pandas_udf('blah') def foo(x): return x - with self.assertRaisesRegexp(ValueError, 'Invalid return type.*None'): + with self.assertRaisesRegex(ValueError, 'Invalid return type.*None'): @pandas_udf(functionType=PandasUDFType.SCALAR) def foo(x): return x - with self.assertRaisesRegexp(ValueError, 'Invalid function'): + with self.assertRaisesRegex(ValueError, 'Invalid function'): @pandas_udf('double', 100) def foo(x): return x - with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'): + with self.assertRaisesRegex(ValueError, '0-arg pandas_udfs.*not.*supported'): pandas_udf(lambda: 1, LongType(), PandasUDFType.SCALAR) - with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'): + with self.assertRaisesRegex(ValueError, '0-arg pandas_udfs.*not.*supported'): @pandas_udf(LongType(), PandasUDFType.SCALAR) def zero_with_type(): return 1 - with self.assertRaisesRegexp(TypeError, 'Invalid return type'): + with self.assertRaisesRegex(TypeError, 'Invalid return type'): @pandas_udf(returnType=PandasUDFType.GROUPED_MAP) def foo(df): return df - with self.assertRaisesRegexp(TypeError, 'Invalid return type'): + with self.assertRaisesRegex(TypeError, 'Invalid return type'): @pandas_udf(returnType='double', functionType=PandasUDFType.GROUPED_MAP) def foo(df): return df - with self.assertRaisesRegexp(ValueError, 'Invalid function'): + with self.assertRaisesRegex(ValueError, 'Invalid function'): @pandas_udf(returnType='k int, v double', functionType=PandasUDFType.GROUPED_MAP) def foo(k, v, w): return k @@ -154,14 +154,14 @@ def foofoo(x, y): df = self.spark.range(0, 100) # plain udf (test for SPARK-23754) - self.assertRaisesRegexp( + self.assertRaisesRegex( PythonException, exc_message, df.withColumn('v', udf(foo)('id')).collect ) # pandas scalar udf - self.assertRaisesRegexp( + self.assertRaisesRegex( PythonException, exc_message, df.withColumn( @@ -170,7 +170,7 @@ def foofoo(x, y): ) # pandas grouped map - self.assertRaisesRegexp( + self.assertRaisesRegex( PythonException, exc_message, df.groupBy('id').apply( @@ -178,7 +178,7 @@ def foofoo(x, y): ).collect ) - self.assertRaisesRegexp( + self.assertRaisesRegex( PythonException, exc_message, df.groupBy('id').apply( @@ -187,7 +187,7 @@ def foofoo(x, y): ) # pandas grouped agg - self.assertRaisesRegexp( + self.assertRaisesRegex( PythonException, exc_message, df.groupBy('id').agg( @@ -210,8 +210,8 @@ def udf(column): # Since 0.11.0, PyArrow supports the feature to raise an error for unsafe cast. with self.sql_conf({ "spark.sql.execution.pandas.convertToArrowArraySafely": True}): - with self.assertRaisesRegexp(Exception, - "Exception thrown when converting pandas.Series"): + with self.assertRaisesRegex(Exception, + "Exception thrown when converting pandas.Series"): df.select(['A']).withColumn('udf', udf('A')).collect() # Disabling Arrow safe type check. @@ -231,8 +231,8 @@ def udf(column): # When enabling safe type check, Arrow 0.11.0+ disallows overflow cast. with self.sql_conf({ "spark.sql.execution.pandas.convertToArrowArraySafely": True}): - with self.assertRaisesRegexp(Exception, - "Exception thrown when converting pandas.Series"): + with self.assertRaisesRegex(Exception, + "Exception thrown when converting pandas.Series"): df.withColumn('udf', udf('id')).collect() # Disabling safe type check, let Arrow do the cast anyway. diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py index 2cbcf31f6e7b3..b49092ed70d04 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py @@ -30,7 +30,7 @@ if have_pandas: import pandas as pd - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal @unittest.skipIf( @@ -145,20 +145,20 @@ def test_basic(self): def test_unsupported_types(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(NotImplementedError, 'not supported'): + with self.assertRaisesRegex(NotImplementedError, 'not supported'): pandas_udf( lambda x: x, ArrayType(ArrayType(TimestampType())), PandasUDFType.GROUPED_AGG) with QuietTest(self.sc): - with self.assertRaisesRegexp(NotImplementedError, 'not supported'): + with self.assertRaisesRegex(NotImplementedError, 'not supported'): @pandas_udf('mean double, std double', PandasUDFType.GROUPED_AGG) def mean_and_std_udf(v): return v.mean(), v.std() with QuietTest(self.sc): - with self.assertRaisesRegexp(NotImplementedError, 'not supported'): + with self.assertRaisesRegex(NotImplementedError, 'not supported'): @pandas_udf(ArrayType(TimestampType()), PandasUDFType.GROUPED_AGG) def mean_and_std_udf(v): return {v.mean(): v.std()} @@ -428,7 +428,7 @@ def test_array_type(self): array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array', PandasUDFType.GROUPED_AGG) result1 = df.groupby('id').agg(array_udf(df['v']).alias('v2')) - self.assertEquals(result1.first()['v2'], [1.0, 2.0]) + self.assertEqual(result1.first()['v2'], [1.0, 2.0]) def test_invalid_args(self): df = self.data @@ -436,19 +436,19 @@ def test_invalid_args(self): mean_udf = self.pandas_agg_mean_udf with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( AnalysisException, 'nor.*aggregate function'): df.groupby(df.id).agg(plus_one(df.v)).collect() with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( AnalysisException, 'aggregate function.*argument.*aggregate function'): df.groupby(df.id).agg(mean_udf(mean_udf(df.v))).collect() with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( AnalysisException, 'mixture.*aggregate function.*group aggregate pandas UDF'): df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect() diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index 5da5d043ceca4..2eb2dec00106e 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -133,7 +133,7 @@ def test_vectorized_udf_basic(self): long_f(col('long')), float_f(col('float')), double_f(col('double')), decimal_f('decimal'), bool_f(col('bool')), array_long_f('array_long')) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_register_nondeterministic_vectorized_udf_basic(self): random_pandas_udf = pandas_udf( @@ -169,7 +169,7 @@ def test_vectorized_udf_null_boolean(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: bool_f = pandas_udf(lambda x: x, BooleanType(), udf_type) res = df.select(bool_f(col('bool'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_byte(self): data = [(None,), (2,), (3,), (4,)] @@ -178,7 +178,7 @@ def test_vectorized_udf_null_byte(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: byte_f = pandas_udf(lambda x: x, ByteType(), udf_type) res = df.select(byte_f(col('byte'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_short(self): data = [(None,), (2,), (3,), (4,)] @@ -187,7 +187,7 @@ def test_vectorized_udf_null_short(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: short_f = pandas_udf(lambda x: x, ShortType(), udf_type) res = df.select(short_f(col('short'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_int(self): data = [(None,), (2,), (3,), (4,)] @@ -196,7 +196,7 @@ def test_vectorized_udf_null_int(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: int_f = pandas_udf(lambda x: x, IntegerType(), udf_type) res = df.select(int_f(col('int'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_long(self): data = [(None,), (2,), (3,), (4,)] @@ -205,7 +205,7 @@ def test_vectorized_udf_null_long(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: long_f = pandas_udf(lambda x: x, LongType(), udf_type) res = df.select(long_f(col('long'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_float(self): data = [(3.0,), (5.0,), (-1.0,), (None,)] @@ -214,7 +214,7 @@ def test_vectorized_udf_null_float(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: float_f = pandas_udf(lambda x: x, FloatType(), udf_type) res = df.select(float_f(col('float'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_double(self): data = [(3.0,), (5.0,), (-1.0,), (None,)] @@ -223,7 +223,7 @@ def test_vectorized_udf_null_double(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: double_f = pandas_udf(lambda x: x, DoubleType(), udf_type) res = df.select(double_f(col('double'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_decimal(self): data = [(Decimal(3.0),), (Decimal(5.0),), (Decimal(-1.0),), (None,)] @@ -232,7 +232,7 @@ def test_vectorized_udf_null_decimal(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: decimal_f = pandas_udf(lambda x: x, DecimalType(38, 18), udf_type) res = df.select(decimal_f(col('decimal'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_string(self): data = [("foo",), (None,), ("bar",), ("bar",)] @@ -241,7 +241,7 @@ def test_vectorized_udf_null_string(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: str_f = pandas_udf(lambda x: x, StringType(), udf_type) res = df.select(str_f(col('str'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_string_in_udf(self): df = self.spark.range(10) @@ -255,7 +255,7 @@ def iter_f(it): str_f = pandas_udf(f, StringType(), udf_type) actual = df.select(str_f(col('id'))) expected = df.select(col('id').cast('string')) - self.assertEquals(expected.collect(), actual.collect()) + self.assertEqual(expected.collect(), actual.collect()) def test_vectorized_udf_datatype_string(self): df = self.spark.range(10).select( @@ -279,7 +279,7 @@ def test_vectorized_udf_datatype_string(self): long_f(col('long')), float_f(col('float')), double_f(col('double')), decimal_f('decimal'), bool_f(col('bool'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_null_binary(self): data = [(bytearray(b"a"),), (None,), (bytearray(b"bb"),), (bytearray(b"ccc"),)] @@ -288,7 +288,7 @@ def test_vectorized_udf_null_binary(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: str_f = pandas_udf(lambda x: x, BinaryType(), udf_type) res = df.select(str_f(col('binary'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_array_type(self): data = [([1, 2],), ([3, 4],)] @@ -297,7 +297,7 @@ def test_vectorized_udf_array_type(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()), udf_type) result = df.select(array_f(col('array'))) - self.assertEquals(df.collect(), result.collect()) + self.assertEqual(df.collect(), result.collect()) def test_vectorized_udf_null_array(self): data = [([1, 2],), (None,), (None,), ([3, 4],), (None,)] @@ -306,7 +306,7 @@ def test_vectorized_udf_null_array(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()), udf_type) result = df.select(array_f(col('array'))) - self.assertEquals(df.collect(), result.collect()) + self.assertEqual(df.collect(), result.collect()) def test_vectorized_udf_struct_type(self): df = self.spark.range(10) @@ -375,7 +375,7 @@ def test_vectorized_udf_nested_struct(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( Exception, 'Invalid return type with scalar Pandas UDFs'): pandas_udf(lambda x: x, returnType=nested_type, functionType=udf_type) @@ -392,7 +392,7 @@ def test_vectorized_udf_map_type(self): else: map_f = pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type) result = df.select(map_f(col('map'))) - self.assertEquals(df.collect(), result.collect()) + self.assertEqual(df.collect(), result.collect()) def test_vectorized_udf_complex(self): df = self.spark.range(10).select( @@ -422,7 +422,7 @@ def iter_mul(it): (iter_add, iter_power2, iter_mul)]: res = df.select(add(col('a'), col('b')), power2(col('a')), mul(col('b'), col('c'))) expected = df.select(expr('a + b'), expr('power(2, a)'), expr('b * c')) - self.assertEquals(expected.collect(), res.collect()) + self.assertEqual(expected.collect(), res.collect()) def test_vectorized_udf_exception(self): df = self.spark.range(10) @@ -435,14 +435,14 @@ def iter_raise_exception(it): for raise_exception in [scalar_raise_exception, iter_raise_exception]: with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, 'division( or modulo)? by zero'): + with self.assertRaisesRegex(Exception, 'division( or modulo)? by zero'): df.select(raise_exception(col('id'))).collect() def test_vectorized_udf_invalid_length(self): df = self.spark.range(10) raise_exception = pandas_udf(lambda _: pd.Series(1), LongType()) with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( Exception, 'Result vector from pandas_udf was not the required length'): df.select(raise_exception(col('id'))).collect() @@ -453,7 +453,7 @@ def iter_udf_wong_output_size(it): yield pd.Series(1) with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( Exception, "The length of output in Scalar iterator.*" "the length of output was 1"): @@ -469,7 +469,7 @@ def iter_udf_not_reading_all_input(it): with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}): df1 = self.spark.range(10).repartition(1) with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( Exception, "pandas iterator UDF should exhaust"): df1.select(iter_udf_not_reading_all_input(col('id'))).collect() @@ -486,7 +486,7 @@ def test_vectorized_udf_chained(self): for f, g in [(scalar_f, scalar_g), (iter_f, iter_g)]: res = df.select(g(f(col('id')))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_chained_struct_type(self): df = self.spark.range(10) @@ -517,7 +517,7 @@ def iter_f(it): def test_vectorized_udf_wrong_return_type(self): with QuietTest(self.sc): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: - with self.assertRaisesRegexp( + with self.assertRaisesRegex( NotImplementedError, 'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'): pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type) @@ -529,7 +529,7 @@ def test_vectorized_udf_return_scalar(self): PandasUDFType.SCALAR_ITER) for f in [scalar_f, iter_f]: with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, 'Return.*type.*Series'): + with self.assertRaisesRegex(Exception, 'Return.*type.*Series'): df.select(f(col('id'))).collect() def test_vectorized_udf_decorator(self): @@ -545,14 +545,14 @@ def iter_identity(x): for identity in [scalar_identity, iter_identity]: res = df.select(identity(col('id'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_empty_partition(self): df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2)) for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: f = pandas_udf(lambda x: x, LongType(), udf_type) res = df.select(f(col('id'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_struct_with_empty_partition(self): df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))\ @@ -585,16 +585,16 @@ def iter_f(it): for f in [scalar_f, iter_f]: res = df.select(f(col('id'), col('id'))) - self.assertEquals(df.collect(), res.collect()) + self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_unsupported_types(self): with QuietTest(self.sc): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: - with self.assertRaisesRegexp( + with self.assertRaisesRegex( NotImplementedError, 'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'): pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( NotImplementedError, 'Invalid return type.*scalar Pandas UDF.*ArrayType.StructType'): pandas_udf(lambda x: x, @@ -637,10 +637,10 @@ def iter_check_data(it): result = df.withColumn("check_data", check_data(col("idx"), col("date"), col("date_copy"))).collect() - self.assertEquals(len(data), len(result)) + self.assertEqual(len(data), len(result)) for i in range(len(result)): - self.assertEquals(data[i][1], result[i][1]) # "date" col - self.assertEquals(data[i][1], result[i][2]) # "date_copy" col + self.assertEqual(data[i][1], result[i][1]) # "date" col + self.assertEqual(data[i][1], result[i][2]) # "date_copy" col self.assertIsNone(result[i][3]) # "check_data" col def test_vectorized_udf_timestamps(self): @@ -686,10 +686,10 @@ def iter_check_data(it): result = df.withColumn("check_data", check_data(col("idx"), col("timestamp"), col("timestamp_copy"))).collect() # Check that collection values are correct - self.assertEquals(len(data), len(result)) + self.assertEqual(len(data), len(result)) for i in range(len(result)): - self.assertEquals(data[i][1], result[i][1]) # "timestamp" col - self.assertEquals(data[i][1], result[i][2]) # "timestamp_copy" col + self.assertEqual(data[i][1], result[i][1]) # "timestamp" col + self.assertEqual(data[i][1], result[i][2]) # "timestamp_copy" col self.assertIsNone(result[i][3]) # "check_data" col def test_vectorized_udf_return_timestamp_tz(self): @@ -713,7 +713,7 @@ def iter_gen_timestamps(it): i, ts = r ts_tz = pd.Timestamp(i, unit='D', tz='America/Los_Angeles').to_pydatetime() expected = spark_ts_t.fromInternal(spark_ts_t.toInternal(ts_tz)) - self.assertEquals(expected, ts) + self.assertEqual(expected, ts) def test_vectorized_udf_check_config(self): with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}): @@ -799,9 +799,9 @@ def test_nondeterministic_vectorized_udf_in_aggregate(self): for random_udf in [self.nondeterministic_vectorized_udf, self.nondeterministic_vectorized_iter_udf]: with QuietTest(self.sc): - with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'): + with self.assertRaisesRegex(AnalysisException, 'nondeterministic'): df.groupby(df.id).agg(sum(random_udf(df.id))).collect() - with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'): + with self.assertRaisesRegex(AnalysisException, 'nondeterministic'): df.agg(sum(random_udf(df.id))).collect() def test_register_vectorized_udf_basic(self): @@ -825,8 +825,8 @@ def iter_original_add(it): res2 = self.spark.sql( "SELECT add1(t.a, t.b) FROM (SELECT id as a, id as b FROM range(10)) t") expected = df.select(expr('a + b')) - self.assertEquals(expected.collect(), res1.collect()) - self.assertEquals(expected.collect(), res2.collect()) + self.assertEqual(expected.collect(), res1.collect()) + self.assertEqual(expected.collect(), res2.collect()) def test_scalar_iter_udf_init(self): import numpy as np @@ -854,7 +854,7 @@ def test_close(batch_iter): finally: raise RuntimeError("reached finally block") with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, "reached finally block"): + with self.assertRaisesRegex(Exception, "reached finally block"): self.spark.range(1).select(test_close(col("id"))).collect() def test_scalar_iter_udf_close_early(self): @@ -905,7 +905,7 @@ def test_timestamp_dst(self): for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: foo_udf = pandas_udf(lambda x: x, 'timestamp', udf_type) result = df.withColumn('time', foo_udf(df.time)) - self.assertEquals(df.collect(), result.collect()) + self.assertEqual(df.collect(), result.collect()) def test_udf_category_type(self): @@ -1003,11 +1003,11 @@ def f4_iter(it): df_chained_4 = df.withColumn('f4_f2_f1', f4(f2(f1(df['v'])))) df_chained_5 = df.withColumn('f4_f3_f1', f4(f3(f1(df['v'])))) - self.assertEquals(expected_chained_1, df_chained_1.collect()) - self.assertEquals(expected_chained_2, df_chained_2.collect()) - self.assertEquals(expected_chained_3, df_chained_3.collect()) - self.assertEquals(expected_chained_4, df_chained_4.collect()) - self.assertEquals(expected_chained_5, df_chained_5.collect()) + self.assertEqual(expected_chained_1, df_chained_1.collect()) + self.assertEqual(expected_chained_2, df_chained_2.collect()) + self.assertEqual(expected_chained_3, df_chained_3.collect()) + self.assertEqual(expected_chained_4, df_chained_4.collect()) + self.assertEqual(expected_chained_5, df_chained_5.collect()) # Test multiple mixed UDF expressions in a single projection df_multi_1 = df \ @@ -1045,8 +1045,8 @@ def f4_iter(it): .withColumn('f4_f3_f2', f4(f3(f2(col('v'))))) \ .withColumn('f4_f3_f2_f1', f4(f3(f2(f1(col('v')))))) - self.assertEquals(expected_multi, df_multi_1.collect()) - self.assertEquals(expected_multi, df_multi_2.collect()) + self.assertEqual(expected_multi, df_multi_1.collect()) + self.assertEqual(expected_multi, df_multi_2.collect()) def test_mixed_udf_and_sql(self): df = self.spark.range(0, 1).toDF('v') @@ -1107,7 +1107,7 @@ def f3i(it): .withColumn('f3_f1_f2', f3(f1(f2(df['v'])))) \ .withColumn('f3_f2_f1', f3(f2(f1(df['v'])))) - self.assertEquals(expected, df1.collect()) + self.assertEqual(expected, df1.collect()) # SPARK-24721 @unittest.skipIf(not test_compiled, test_not_compiled_message) # type: ignore @@ -1138,17 +1138,17 @@ def test_datasource_with_udf(self): for df in [filesource_df, datasource_df, datasource_v2_df]: result = df.withColumn('c', c1) expected = df.withColumn('c', lit(2)) - self.assertEquals(expected.collect(), result.collect()) + self.assertEqual(expected.collect(), result.collect()) for df in [filesource_df, datasource_df, datasource_v2_df]: result = df.withColumn('c', c2) expected = df.withColumn('c', col('i') + 1) - self.assertEquals(expected.collect(), result.collect()) + self.assertEqual(expected.collect(), result.collect()) for df in [filesource_df, datasource_df, datasource_v2_df]: for f in [f1, f2]: result = df.filter(f) - self.assertEquals(0, result.count()) + self.assertEqual(0, result.count()) finally: shutil.rmtree(path) diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py index d9717da4d2fbd..e30f43181ae96 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py +++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py @@ -29,7 +29,7 @@ if have_pandas: import pandas as pd import numpy as np - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal @unittest.skipIf( diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py index 5ad2ecd8f85d4..d861bcce9e8b8 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_window.py +++ b/python/pyspark/sql/tests/test_pandas_udf_window.py @@ -26,7 +26,7 @@ from pyspark.testing.utils import QuietTest if have_pandas: - from pandas.util.testing import assert_frame_equal + from pandas.testing import assert_frame_equal @unittest.skipIf( @@ -241,14 +241,14 @@ def test_array_type(self): array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array', PandasUDFType.GROUPED_AGG) result1 = df.withColumn('v2', array_udf(df['v']).over(w)) - self.assertEquals(result1.first()['v2'], [1.0, 2.0]) + self.assertEqual(result1.first()['v2'], [1.0, 2.0]) def test_invalid_args(self): df = self.data w = self.unbounded_window with QuietTest(self.sc): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( AnalysisException, '.*not supported within a window function'): foo_udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUPED_MAP) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 6b5c1ad6c4e46..eb4caf05d1af0 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -180,7 +180,7 @@ def test_infer_schema_not_enough_names(self): self.assertEqual(df.columns, ['col1', '_2']) def test_infer_schema_fails(self): - with self.assertRaisesRegexp(TypeError, 'field a'): + with self.assertRaisesRegex(TypeError, 'field a'): self.spark.createDataFrame(self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]), schema=["a", "b"], samplingRatio=0.99) @@ -578,18 +578,18 @@ def test_merge_type(self): ArrayType(LongType()), ArrayType(LongType()) ), ArrayType(LongType())) - with self.assertRaisesRegexp(TypeError, 'element in array'): + with self.assertRaisesRegex(TypeError, 'element in array'): _merge_type(ArrayType(LongType()), ArrayType(DoubleType())) self.assertEqual(_merge_type( MapType(StringType(), LongType()), MapType(StringType(), LongType()) ), MapType(StringType(), LongType())) - with self.assertRaisesRegexp(TypeError, 'key of map'): + with self.assertRaisesRegex(TypeError, 'key of map'): _merge_type( MapType(StringType(), LongType()), MapType(DoubleType(), LongType())) - with self.assertRaisesRegexp(TypeError, 'value of map'): + with self.assertRaisesRegex(TypeError, 'value of map'): _merge_type( MapType(StringType(), LongType()), MapType(StringType(), DoubleType())) @@ -598,7 +598,7 @@ def test_merge_type(self): StructType([StructField("f1", LongType()), StructField("f2", StringType())]), StructType([StructField("f1", LongType()), StructField("f2", StringType())]) ), StructType([StructField("f1", LongType()), StructField("f2", StringType())])) - with self.assertRaisesRegexp(TypeError, 'field f1'): + with self.assertRaisesRegex(TypeError, 'field f1'): _merge_type( StructType([StructField("f1", LongType()), StructField("f2", StringType())]), StructType([StructField("f1", DoubleType()), StructField("f2", StringType())])) @@ -607,7 +607,7 @@ def test_merge_type(self): StructType([StructField("f1", StructType([StructField("f2", LongType())]))]), StructType([StructField("f1", StructType([StructField("f2", LongType())]))]) ), StructType([StructField("f1", StructType([StructField("f2", LongType())]))])) - with self.assertRaisesRegexp(TypeError, 'field f2 in field f1'): + with self.assertRaisesRegex(TypeError, 'field f2 in field f1'): _merge_type( StructType([StructField("f1", StructType([StructField("f2", LongType())]))]), StructType([StructField("f1", StructType([StructField("f2", StringType())]))])) @@ -616,7 +616,7 @@ def test_merge_type(self): StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]), StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]) ), StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())])) - with self.assertRaisesRegexp(TypeError, 'element in array field f1'): + with self.assertRaisesRegex(TypeError, 'element in array field f1'): _merge_type( StructType([ StructField("f1", ArrayType(LongType())), @@ -635,7 +635,7 @@ def test_merge_type(self): ), StructType([ StructField("f1", MapType(StringType(), LongType())), StructField("f2", StringType())])) - with self.assertRaisesRegexp(TypeError, 'value of map field f1'): + with self.assertRaisesRegex(TypeError, 'value of map field f1'): _merge_type( StructType([ StructField("f1", MapType(StringType(), LongType())), @@ -648,7 +648,7 @@ def test_merge_type(self): StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]), StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]) ), StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))])) - with self.assertRaisesRegexp(TypeError, 'key of map element in array field f1'): + with self.assertRaisesRegex(TypeError, 'key of map element in array field f1'): _merge_type( StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]), StructType([StructField("f1", ArrayType(MapType(DoubleType(), LongType())))]) @@ -734,7 +734,7 @@ def assertCollectSuccess(typecode, value): unsupported_types = all_types - set(supported_types) # test unsupported types for t in unsupported_types: - with self.assertRaisesRegexp(TypeError, "infer the type of the field myarray"): + with self.assertRaisesRegex(TypeError, "infer the type of the field myarray"): a = array.array(t) self.spark.createDataFrame([Row(myarray=a)]).collect() @@ -789,13 +789,13 @@ def test_invalid_create_row(self): class DataTypeVerificationTests(unittest.TestCase): def test_verify_type_exception_msg(self): - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, "test_name", lambda: _make_type_verifier(StringType(), nullable=False, name="test_name")(None)) schema = StructType([StructField('a', StructType([StructField('b', IntegerType())]))]) - self.assertRaisesRegexp( + self.assertRaisesRegex( TypeError, "field b in field a", lambda: _make_type_verifier(schema)([["data"]])) diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index 9a1c0edcce4ed..bfc55dff94540 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -98,7 +98,7 @@ def test_udf_registration_return_type_none(self): def test_udf_registration_return_type_not_none(self): with QuietTest(self.sc): - with self.assertRaisesRegexp(TypeError, "Invalid return type"): + with self.assertRaisesRegex(TypeError, "Invalid return type"): self.spark.catalog.registerFunction( "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()) @@ -149,9 +149,9 @@ def test_nondeterministic_udf_in_aggregate(self): df = self.spark.range(10) with QuietTest(self.sc): - with self.assertRaisesRegexp(AnalysisException, "nondeterministic"): + with self.assertRaisesRegex(AnalysisException, "nondeterministic"): df.groupby('id').agg(sum(udf_random_col())).collect() - with self.assertRaisesRegexp(AnalysisException, "nondeterministic"): + with self.assertRaisesRegex(AnalysisException, "nondeterministic"): df.agg(sum(udf_random_col())).collect() def test_chained_udf(self): @@ -203,7 +203,7 @@ def test_udf_in_join_condition(self): # Cross join. df = left.join(right, f("a", "b")) with self.sql_conf({"spark.sql.crossJoin.enabled": False}): - with self.assertRaisesRegexp(AnalysisException, 'Detected implicit cartesian product'): + with self.assertRaisesRegex(AnalysisException, 'Detected implicit cartesian product'): df.collect() with self.sql_conf({"spark.sql.crossJoin.enabled": True}): self.assertEqual(df.collect(), [Row(a=1, b=1)]) @@ -238,7 +238,7 @@ def test_udf_not_supported_in_join_condition(self): f = udf(lambda a, b: a == b, BooleanType()) def runWithJoinType(join_type, type_string): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( AnalysisException, 'Using PythonUDF.*%s is not supported.' % type_string): left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect() @@ -385,18 +385,18 @@ def test_register_java_udaf(self): def test_non_existed_udf(self): spark = self.spark - self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf", - lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf")) + self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udf", + lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf")) # This is to check if a deprecated 'SQLContext.registerJavaFunction' can call its alias. sqlContext = spark._wrapped - self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf", - lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf")) + self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udf", + lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf")) def test_non_existed_udaf(self): spark = self.spark - self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udaf", - lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf")) + self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udaf", + lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf")) def test_udf_with_input_file_name(self): from pyspark.sql.functions import input_file_name @@ -587,17 +587,17 @@ def test_datasource_with_udf(self): for df in [filesource_df, datasource_df, datasource_v2_df]: result = df.withColumn('c', c1) expected = df.withColumn('c', lit(2)) - self.assertEquals(expected.collect(), result.collect()) + self.assertEqual(expected.collect(), result.collect()) for df in [filesource_df, datasource_df, datasource_v2_df]: result = df.withColumn('c', c2) expected = df.withColumn('c', col('i') + 1) - self.assertEquals(expected.collect(), result.collect()) + self.assertEqual(expected.collect(), result.collect()) for df in [filesource_df, datasource_df, datasource_v2_df]: for f in [f1, f2]: result = df.filter(f) - self.assertEquals(0, result.count()) + self.assertEqual(0, result.count()) finally: shutil.rmtree(path) diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index b08e17208d8af..005f0e892b60f 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -31,23 +31,22 @@ def test_capture_user_friendly_exception(self): try: self.spark.sql("select `中文字段`") except AnalysisException as e: - self.assertRegexpMatches(str(e), "cannot resolve '`中文字段`'") + self.assertRegex(str(e), "cannot resolve '`中文字段`'") def test_capture_parse_exception(self): self.assertRaises(ParseException, lambda: self.spark.sql("abc")) def test_capture_illegalargument_exception(self): - self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks", - lambda: self.spark.sql("SET mapred.reduce.tasks=-1")) + self.assertRaisesRegex(IllegalArgumentException, "Setting negative mapred.reduce.tasks", + lambda: self.spark.sql("SET mapred.reduce.tasks=-1")) df = self.spark.createDataFrame([(1, 2)], ["a", "b"]) - self.assertRaisesRegexp(IllegalArgumentException, "1024 is not in the permitted values", - lambda: df.select(sha2(df.a, 1024)).collect()) + self.assertRaisesRegex(IllegalArgumentException, "1024 is not in the permitted values", + lambda: df.select(sha2(df.a, 1024)).collect()) try: df.select(sha2(df.a, 1024)).collect() except IllegalArgumentException as e: - self.assertRegexpMatches(e.desc, "1024 is not in the permitted values") - self.assertRegexpMatches(e.stackTrace, - "org.apache.spark.sql.functions") + self.assertRegex(e.desc, "1024 is not in the permitted values") + self.assertRegex(e.stackTrace, "org.apache.spark.sql.functions") if __name__ == "__main__": diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py index de72a547b0844..e621321283dab 100644 --- a/python/pyspark/tests/test_profiler.py +++ b/python/pyspark/tests/test_profiler.py @@ -85,11 +85,11 @@ class ProfilerTests2(unittest.TestCase): def test_profiler_disabled(self): sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false")) try: - self.assertRaisesRegexp( + self.assertRaisesRegex( RuntimeError, "'spark.python.profile' configuration must be set", lambda: sc.show_profiles()) - self.assertRaisesRegexp( + self.assertRaisesRegex( RuntimeError, "'spark.python.profile' configuration must be set", lambda: sc.dump_profiles("/tmp/abc")) diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py index 47b8f10a5b05e..b17c039889a71 100644 --- a/python/pyspark/tests/test_rdd.py +++ b/python/pyspark/tests/test_rdd.py @@ -733,25 +733,25 @@ def stopit(*x): keyed_rdd = self.sc.parallelize((x % 2, x) for x in range(10)) msg = "Caught StopIteration thrown from user's code; failing the task" - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.map(stopit).collect) - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.filter(stopit).collect) - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit) - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.reduce, stopit) - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.fold, 0, stopit) - self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit) - self.assertRaisesRegexp(Py4JJavaError, msg, - seq_rdd.cartesian(seq_rdd).flatMap(stopit).collect) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.map(stopit).collect) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.filter(stopit).collect) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.foreach, stopit) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.reduce, stopit) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.fold, 0, stopit) + self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.foreach, stopit) + self.assertRaisesRegex(Py4JJavaError, msg, + seq_rdd.cartesian(seq_rdd).flatMap(stopit).collect) # these methods call the user function both in the driver and in the executor # the exception raised is different according to where the StopIteration happens # RuntimeError is raised if in the driver # Py4JJavaError is raised if in the executor (wraps the RuntimeError raised in the worker) - self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg, - keyed_rdd.reduceByKeyLocally, stopit) - self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg, - seq_rdd.aggregate, 0, stopit, lambda *x: 1) - self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg, - seq_rdd.aggregate, 0, lambda *x: 1, stopit) + self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg, + keyed_rdd.reduceByKeyLocally, stopit) + self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg, + seq_rdd.aggregate, 0, stopit, lambda *x: 1) + self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg, + seq_rdd.aggregate, 0, lambda *x: 1, stopit) def test_overwritten_global_func(self): # Regression test for SPARK-27000 @@ -768,7 +768,7 @@ def fail(_): rdd = self.sc.range(10).map(fail) - with self.assertRaisesRegexp(Exception, "local iterator error"): + with self.assertRaisesRegex(Exception, "local iterator error"): for _ in rdd.toLocalIterator(): pass diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py index d7a4b84e8dc41..51ebee4de7cec 100644 --- a/python/pyspark/tests/test_worker.py +++ b/python/pyspark/tests/test_worker.py @@ -165,7 +165,7 @@ def f(): self.sc.parallelize([1]).map(lambda x: f()).count() except Py4JJavaError as e: - self.assertRegexpMatches(str(e), "exception with 中") + self.assertRegex(str(e), "exception with 中") class WorkerReuseTest(PySparkTestCase): From 80161238fe9393aabd5fcd56752ff1e43f6989b1 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Tue, 1 Dec 2020 09:36:42 +0800 Subject: [PATCH 0617/1009] [SPARK-33592] Fix: Pyspark ML Validator params in estimatorParamMaps may be lost after saving and reloading ### What changes were proposed in this pull request? Fix: Pyspark ML Validator params in estimatorParamMaps may be lost after saving and reloading When saving validator estimatorParamMaps, will check all nested stages in tuned estimator to get correct param parent. Two typical cases to manually test: ~~~python tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression() pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) paramGrid = ParamGridBuilder() \ .addGrid(hashingTF.numFeatures, [10, 100]) \ .addGrid(lr.maxIter, [100, 200]) \ .build() tvs = TrainValidationSplit(estimator=pipeline, estimatorParamMaps=paramGrid, evaluator=MulticlassClassificationEvaluator()) tvs.save(tvsPath) loadedTvs = TrainValidationSplit.load(tvsPath) # check `loadedTvs.getEstimatorParamMaps()` restored correctly. ~~~ ~~~python lr = LogisticRegression() ova = OneVsRest(classifier=lr) grid = ParamGridBuilder().addGrid(lr.maxIter, [100, 200]).build() evaluator = MulticlassClassificationEvaluator() tvs = TrainValidationSplit(estimator=ova, estimatorParamMaps=grid, evaluator=evaluator) tvs.save(tvsPath) loadedTvs = TrainValidationSplit.load(tvsPath) # check `loadedTvs.getEstimatorParamMaps()` restored correctly. ~~~ ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30539 from WeichenXu123/fix_tuning_param_maps_io. Authored-by: Weichen Xu Signed-off-by: Ruifeng Zheng --- dev/sparktestsupport/modules.py | 1 + python/pyspark/ml/classification.py | 46 +------------ python/pyspark/ml/param/__init__.py | 6 ++ python/pyspark/ml/pipeline.py | 53 +-------------- python/pyspark/ml/tests/test_tuning.py | 47 +++++++++++-- python/pyspark/ml/tests/test_util.py | 84 +++++++++++++++++++++++ python/pyspark/ml/tuning.py | 94 ++++++++++++++++++++++++-- python/pyspark/ml/util.py | 38 +++++++++++ python/pyspark/ml/util.pyi | 6 ++ 9 files changed, 268 insertions(+), 107 deletions(-) create mode 100644 python/pyspark/ml/tests/test_util.py diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 868e4a5d23ed7..5d8b714711774 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -564,6 +564,7 @@ def __hash__(self): "pyspark.ml.tests.test_stat", "pyspark.ml.tests.test_training_summary", "pyspark.ml.tests.test_tuning", + "pyspark.ml.tests.test_util", "pyspark.ml.tests.test_wrapper", ], excluded_python_implementations=[ diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 50882fc895d6c..763038ede876a 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -36,7 +36,7 @@ from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary from pyspark.ml.wrapper import JavaParams, \ JavaPredictor, JavaPredictionModel, JavaWrapper -from pyspark.ml.common import inherit_doc, _java2py, _py2java +from pyspark.ml.common import inherit_doc from pyspark.ml.linalg import Vectors from pyspark.sql import DataFrame from pyspark.sql.functions import udf, when @@ -2991,50 +2991,6 @@ def _to_java(self): _java_obj.setRawPredictionCol(self.getRawPredictionCol()) return _java_obj - def _make_java_param_pair(self, param, value): - """ - Makes a Java param pair. - """ - sc = SparkContext._active_spark_context - param = self._resolveParam(param) - _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRest", - self.uid) - java_param = _java_obj.getParam(param.name) - if isinstance(value, JavaParams): - # used in the case of an estimator having another estimator as a parameter - # the reason why this is not in _py2java in common.py is that importing - # Estimator and Model in common.py results in a circular import with inherit_doc - java_value = value._to_java() - else: - java_value = _py2java(sc, value) - return java_param.w(java_value) - - def _transfer_param_map_to_java(self, pyParamMap): - """ - Transforms a Python ParamMap into a Java ParamMap. - """ - paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap") - for param in self.params: - if param in pyParamMap: - pair = self._make_java_param_pair(param, pyParamMap[param]) - paramMap.put([pair]) - return paramMap - - def _transfer_param_map_from_java(self, javaParamMap): - """ - Transforms a Java ParamMap into a Python ParamMap. - """ - sc = SparkContext._active_spark_context - paramMap = dict() - for pair in javaParamMap.toList(): - param = pair.param() - if self.hasParam(str(param.name())): - if param.name() == "classifier": - paramMap[self.getParam(param.name())] = JavaParams._from_java(pair.value()) - else: - paramMap[self.getParam(param.name())] = _java2py(sc, pair.value()) - return paramMap - class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable, JavaMLWritable): """ diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index f2381a4c42698..3eab6607aa7ee 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -437,6 +437,12 @@ def _resolveParam(self, param): else: raise ValueError("Cannot resolve %r as a param." % param) + def _testOwnParam(self, param_parent, param_name): + """ + Test the ownership. Return True or False + """ + return self.uid == param_parent and self.hasParam(param_name) + @staticmethod def _dummy(): """ diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index a6471a8dd1fe5..b0aa735709e8d 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -21,8 +21,8 @@ from pyspark.ml.param import Param, Params from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \ DefaultParamsReader, DefaultParamsWriter, MLWriter, MLReader, JavaMLWritable -from pyspark.ml.wrapper import JavaParams, JavaWrapper -from pyspark.ml.common import inherit_doc, _java2py, _py2java +from pyspark.ml.wrapper import JavaParams +from pyspark.ml.common import inherit_doc @inherit_doc @@ -190,55 +190,6 @@ def _to_java(self): return _java_obj - def _make_java_param_pair(self, param, value): - """ - Makes a Java param pair. - """ - sc = SparkContext._active_spark_context - param = self._resolveParam(param) - java_param = sc._jvm.org.apache.spark.ml.param.Param(param.parent, param.name, param.doc) - if isinstance(value, Params) and hasattr(value, "_to_java"): - # Convert JavaEstimator/JavaTransformer object or Estimator/Transformer object which - # implements `_to_java` method (such as OneVsRest, Pipeline object) to java object. - # used in the case of an estimator having another estimator as a parameter - # the reason why this is not in _py2java in common.py is that importing - # Estimator and Model in common.py results in a circular import with inherit_doc - java_value = value._to_java() - else: - java_value = _py2java(sc, value) - return java_param.w(java_value) - - def _transfer_param_map_to_java(self, pyParamMap): - """ - Transforms a Python ParamMap into a Java ParamMap. - """ - paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap") - for param in self.params: - if param in pyParamMap: - pair = self._make_java_param_pair(param, pyParamMap[param]) - paramMap.put([pair]) - return paramMap - - def _transfer_param_map_from_java(self, javaParamMap): - """ - Transforms a Java ParamMap into a Python ParamMap. - """ - sc = SparkContext._active_spark_context - paramMap = dict() - for pair in javaParamMap.toList(): - param = pair.param() - if self.hasParam(str(param.name())): - java_obj = pair.value() - if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(java_obj): - # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class - # and Estimator/Transformer class which implements `_from_java` static method - # (such as OneVsRest, Pipeline class). - py_obj = JavaParams._from_java(java_obj) - else: - py_obj = _java2py(sc, java_obj) - paramMap[self.getParam(param.name())] = py_obj - return paramMap - @inherit_doc class PipelineWriter(MLWriter): diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index ced32c07f245f..ebd7457e4d30a 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -73,7 +73,21 @@ def test_addGrid(self): .build()) -class CrossValidatorTests(SparkSessionTestCase): +class ValidatorTestUtilsMixin: + def assert_param_maps_equal(self, paramMaps1, paramMaps2): + self.assertEqual(len(paramMaps1), len(paramMaps2)) + for paramMap1, paramMap2 in zip(paramMaps1, paramMaps2): + self.assertEqual(set(paramMap1.keys()), set(paramMap2.keys())) + for param in paramMap1.keys(): + v1 = paramMap1[param] + v2 = paramMap2[param] + if isinstance(v1, Params): + self.assertEqual(v1.uid, v2.uid) + else: + self.assertEqual(v1, v2) + + +class CrossValidatorTests(SparkSessionTestCase, ValidatorTestUtilsMixin): def test_copy(self): dataset = self.spark.createDataFrame([ @@ -256,7 +270,7 @@ def test_save_load_simple_estimator(self): loadedCV = CrossValidator.load(cvPath) self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid) self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid) - self.assertEqual(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps()) + self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps()) # test save/load of CrossValidatorModel cvModelPath = temp_path + "/cvModel" @@ -351,6 +365,7 @@ def test_save_load_nested_estimator(self): cvPath = temp_path + "/cv" cv.save(cvPath) loadedCV = CrossValidator.load(cvPath) + self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), grid) self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid) self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid) @@ -367,6 +382,7 @@ def test_save_load_nested_estimator(self): cvModelPath = temp_path + "/cvModel" cvModel.save(cvModelPath) loadedModel = CrossValidatorModel.load(cvModelPath) + self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid) self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid) def test_save_load_pipeline_estimator(self): @@ -401,6 +417,11 @@ def test_save_load_pipeline_estimator(self): estimatorParamMaps=paramGrid, evaluator=MulticlassClassificationEvaluator(), numFolds=2) # use 3+ folds in practice + cvPath = temp_path + "/cv" + crossval.save(cvPath) + loadedCV = CrossValidator.load(cvPath) + self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), paramGrid) + self.assertEqual(loadedCV.getEstimator().uid, crossval.getEstimator().uid) # Run cross-validation, and choose the best set of parameters. cvModel = crossval.fit(training) @@ -421,6 +442,11 @@ def test_save_load_pipeline_estimator(self): estimatorParamMaps=paramGrid, evaluator=MulticlassClassificationEvaluator(), numFolds=2) # use 3+ folds in practice + cv2Path = temp_path + "/cv2" + crossval2.save(cv2Path) + loadedCV2 = CrossValidator.load(cv2Path) + self.assert_param_maps_equal(loadedCV2.getEstimatorParamMaps(), paramGrid) + self.assertEqual(loadedCV2.getEstimator().uid, crossval2.getEstimator().uid) # Run cross-validation, and choose the best set of parameters. cvModel2 = crossval2.fit(training) @@ -511,7 +537,7 @@ def test_invalid_user_specified_folds(self): cv.fit(dataset_with_folds) -class TrainValidationSplitTests(SparkSessionTestCase): +class TrainValidationSplitTests(SparkSessionTestCase, ValidatorTestUtilsMixin): def test_fit_minimize_metric(self): dataset = self.spark.createDataFrame([ @@ -632,7 +658,8 @@ def test_save_load_simple_estimator(self): loadedTvs = TrainValidationSplit.load(tvsPath) self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid) self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid) - self.assertEqual(loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps()) + self.assert_param_maps_equal( + loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps()) tvsModelPath = temp_path + "/tvsModel" tvsModel.save(tvsModelPath) @@ -713,6 +740,7 @@ def test_save_load_nested_estimator(self): tvsPath = temp_path + "/tvs" tvs.save(tvsPath) loadedTvs = TrainValidationSplit.load(tvsPath) + self.assert_param_maps_equal(loadedTvs.getEstimatorParamMaps(), grid) self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid) self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid) @@ -728,6 +756,7 @@ def test_save_load_nested_estimator(self): tvsModelPath = temp_path + "/tvsModel" tvsModel.save(tvsModelPath) loadedModel = TrainValidationSplitModel.load(tvsModelPath) + self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid) self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid) def test_save_load_pipeline_estimator(self): @@ -761,6 +790,11 @@ def test_save_load_pipeline_estimator(self): tvs = TrainValidationSplit(estimator=pipeline, estimatorParamMaps=paramGrid, evaluator=MulticlassClassificationEvaluator()) + tvsPath = temp_path + "/tvs" + tvs.save(tvsPath) + loadedTvs = TrainValidationSplit.load(tvsPath) + self.assert_param_maps_equal(loadedTvs.getEstimatorParamMaps(), paramGrid) + self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid) # Run train validation split, and choose the best set of parameters. tvsModel = tvs.fit(training) @@ -780,6 +814,11 @@ def test_save_load_pipeline_estimator(self): tvs2 = TrainValidationSplit(estimator=nested_pipeline, estimatorParamMaps=paramGrid, evaluator=MulticlassClassificationEvaluator()) + tvs2Path = temp_path + "/tvs2" + tvs2.save(tvs2Path) + loadedTvs2 = TrainValidationSplit.load(tvs2Path) + self.assert_param_maps_equal(loadedTvs2.getEstimatorParamMaps(), paramGrid) + self.assertEqual(loadedTvs2.getEstimator().uid, tvs2.getEstimator().uid) # Run train validation split, and choose the best set of parameters. tvsModel2 = tvs2.fit(training) diff --git a/python/pyspark/ml/tests/test_util.py b/python/pyspark/ml/tests/test_util.py new file mode 100644 index 0000000000000..498a649e480a8 --- /dev/null +++ b/python/pyspark/ml/tests/test_util.py @@ -0,0 +1,84 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest + +from pyspark.ml import Pipeline +from pyspark.ml.classification import LogisticRegression, OneVsRest +from pyspark.ml.feature import VectorAssembler +from pyspark.ml.linalg import Vectors +from pyspark.ml.util import MetaAlgorithmReadWrite +from pyspark.testing.mlutils import SparkSessionTestCase + + +class MetaAlgorithmReadWriteTests(SparkSessionTestCase): + + def test_getAllNestedStages(self): + def _check_uid_set_equal(stages, expected_stages): + uids = set(map(lambda x: x.uid, stages)) + expected_uids = set(map(lambda x: x.uid, expected_stages)) + self.assertEqual(uids, expected_uids) + + df1 = self.spark.createDataFrame([ + (Vectors.dense([1., 2.]), 1.0), + (Vectors.dense([-1., -2.]), 0.0), + ], ['features', 'label']) + df2 = self.spark.createDataFrame([ + (1., 2., 1.0), + (1., 2., 0.0), + ], ['a', 'b', 'label']) + vs = VectorAssembler(inputCols=['a', 'b'], outputCol='features') + lr = LogisticRegression() + pipeline = Pipeline(stages=[vs, lr]) + pipelineModel = pipeline.fit(df2) + ova = OneVsRest(classifier=lr) + ovaModel = ova.fit(df1) + + ova_pipeline = Pipeline(stages=[vs, ova]) + nested_pipeline = Pipeline(stages=[ova_pipeline]) + + _check_uid_set_equal( + MetaAlgorithmReadWrite.getAllNestedStages(pipeline), + [pipeline, vs, lr] + ) + _check_uid_set_equal( + MetaAlgorithmReadWrite.getAllNestedStages(pipelineModel), + [pipelineModel] + pipelineModel.stages + ) + _check_uid_set_equal( + MetaAlgorithmReadWrite.getAllNestedStages(ova), + [ova, lr] + ) + _check_uid_set_equal( + MetaAlgorithmReadWrite.getAllNestedStages(ovaModel), + [ovaModel, lr] + ovaModel.models + ) + _check_uid_set_equal( + MetaAlgorithmReadWrite.getAllNestedStages(nested_pipeline), + [nested_pipeline, ova_pipeline, vs, ova, lr] + ) + + +if __name__ == "__main__": + from pyspark.ml.tests.test_util import * # noqa: F401 + + try: + import xmlrunner # type: ignore[import] + testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 6f4ad99484546..2b5a9857b0f18 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -26,8 +26,9 @@ from pyspark.ml.common import _py2java, _java2py from pyspark.ml.param import Params, Param, TypeConverters from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed -from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader -from pyspark.ml.wrapper import JavaParams +from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \ + MetaAlgorithmReadWrite +from pyspark.ml.wrapper import JavaParams, JavaEstimator, JavaWrapper from pyspark.sql.functions import col, lit, rand, UserDefinedFunction from pyspark.sql.types import BooleanType @@ -64,6 +65,10 @@ def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel): def singleTask(): index, model = next(modelIter) + # TODO: duplicate evaluator to take extra params from input + # Note: Supporting tuning params in evaluator need update method + # `MetaAlgorithmReadWrite.getAllNestedStages`, make it return + # all nested stages and evaluators metric = eva.evaluate(model.transform(validation, epm[index])) return index, metric, model if collectSubModel else None @@ -186,8 +191,16 @@ def _from_java_impl(cls, java_stage): # Load information from java_stage to the instance. estimator = JavaParams._from_java(java_stage.getEstimator()) evaluator = JavaParams._from_java(java_stage.getEvaluator()) - epms = [estimator._transfer_param_map_from_java(epm) - for epm in java_stage.getEstimatorParamMaps()] + if isinstance(estimator, JavaEstimator): + epms = [estimator._transfer_param_map_from_java(epm) + for epm in java_stage.getEstimatorParamMaps()] + elif MetaAlgorithmReadWrite.isMetaEstimator(estimator): + # Meta estimator such as Pipeline, OneVsRest + epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_from_java( + estimator, java_stage.getEstimatorParamMaps()) + else: + raise ValueError('Unsupported estimator used in tuning: ' + str(estimator)) + return estimator, epms, evaluator def _to_java_impl(self): @@ -198,15 +211,82 @@ def _to_java_impl(self): gateway = SparkContext._gateway cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap - java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps())) - for idx, epm in enumerate(self.getEstimatorParamMaps()): - java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm) + estimator = self.getEstimator() + if isinstance(estimator, JavaEstimator): + java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps())) + for idx, epm in enumerate(self.getEstimatorParamMaps()): + java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm) + elif MetaAlgorithmReadWrite.isMetaEstimator(estimator): + # Meta estimator such as Pipeline, OneVsRest + java_epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_to_java( + estimator, self.getEstimatorParamMaps()) + else: + raise ValueError('Unsupported estimator used in tuning: ' + str(estimator)) java_estimator = self.getEstimator()._to_java() java_evaluator = self.getEvaluator()._to_java() return java_estimator, java_epms, java_evaluator +class _ValidatorSharedReadWrite: + @staticmethod + def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps): + pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator) + stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages)) + sc = SparkContext._active_spark_context + + paramMapCls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap + javaParamMaps = SparkContext._gateway.new_array(paramMapCls, len(pyParamMaps)) + + for idx, pyParamMap in enumerate(pyParamMaps): + javaParamMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap") + for pyParam, pyValue in pyParamMap.items(): + javaParam = None + for pyStage, javaStage in stagePairs: + if pyStage._testOwnParam(pyParam.parent, pyParam.name): + javaParam = javaStage.getParam(pyParam.name) + break + if javaParam is None: + raise ValueError('Resolve param in estimatorParamMaps failed: ' + str(pyParam)) + if isinstance(pyValue, Params) and hasattr(pyValue, "_to_java"): + javaValue = pyValue._to_java() + else: + javaValue = _py2java(sc, pyValue) + pair = javaParam.w(javaValue) + javaParamMap.put([pair]) + javaParamMaps[idx] = javaParamMap + return javaParamMaps + + @staticmethod + def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps): + pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator) + stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages)) + sc = SparkContext._active_spark_context + pyParamMaps = [] + for javaParamMap in javaParamMaps: + pyParamMap = dict() + for javaPair in javaParamMap.toList(): + javaParam = javaPair.param() + pyParam = None + for pyStage, javaStage in stagePairs: + if pyStage._testOwnParam(javaParam.parent(), javaParam.name()): + pyParam = pyStage.getParam(javaParam.name()) + if pyParam is None: + raise ValueError('Resolve param in estimatorParamMaps failed: ' + + javaParam.parent() + '.' + javaParam.name()) + javaValue = javaPair.value() + if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(javaValue): + # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class + # and Estimator/Transformer class which implements `_from_java` static method + # (such as OneVsRest, Pipeline class). + pyValue = JavaParams._from_java(javaValue) + else: + pyValue = _java2py(sc, javaValue) + pyParamMap[pyParam] = pyValue + pyParamMaps.append(pyParamMap) + return pyParamMaps + + class _CrossValidatorParams(_ValidatorParams): """ Params for :py:class:`CrossValidator` and :py:class:`CrossValidatorModel`. diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index a7b5a79d75f5f..a34bfb53482a0 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -592,3 +592,41 @@ def summary(self): no summary exists. """ return (self._call_java("summary")) + + +class MetaAlgorithmReadWrite: + + @staticmethod + def isMetaEstimator(pyInstance): + from pyspark.ml import Estimator, Pipeline + from pyspark.ml.tuning import _ValidatorParams + from pyspark.ml.classification import OneVsRest + return isinstance(pyInstance, Pipeline) or isinstance(pyInstance, OneVsRest) or \ + (isinstance(pyInstance, Estimator) and isinstance(pyInstance, _ValidatorParams)) + + @staticmethod + def getAllNestedStages(pyInstance): + from pyspark.ml import Pipeline, PipelineModel + from pyspark.ml.tuning import _ValidatorParams + from pyspark.ml.classification import OneVsRest, OneVsRestModel + + # TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel + # support pipelineModel property. + if isinstance(pyInstance, Pipeline): + pySubStages = pyInstance.getStages() + elif isinstance(pyInstance, PipelineModel): + pySubStages = pyInstance.stages + elif isinstance(pyInstance, _ValidatorParams): + raise ValueError('PySpark does not support nested validator.') + elif isinstance(pyInstance, OneVsRest): + pySubStages = [pyInstance.getClassifier()] + elif isinstance(pyInstance, OneVsRestModel): + pySubStages = [pyInstance.getClassifier()] + pyInstance.models + else: + pySubStages = [] + + nestedStages = [] + for pySubStage in pySubStages: + nestedStages.extend(MetaAlgorithmReadWrite.getAllNestedStages(pySubStage)) + + return [pyInstance] + nestedStages diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi index d0781b2e26ed5..e2496e181f14f 100644 --- a/python/pyspark/ml/util.pyi +++ b/python/pyspark/ml/util.pyi @@ -126,3 +126,9 @@ class HasTrainingSummary(Generic[S]): def hasSummary(self) -> bool: ... @property def summary(self) -> S: ... + +class MetaAlgorithmReadWrite: + @staticmethod + def isMetaEstimator(pyInstance: Any) -> bool: ... + @staticmethod + def getAllNestedStages(pyInstance: Any) -> list: ... From c50fcac00ea9b86aa6f6edb738e53ba476261027 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 1 Dec 2020 11:45:32 +0900 Subject: [PATCH 0618/1009] [SPARK-33607][SS][WEBUI] Input Rate timeline/histogram aren't rendered if built with Scala 2.13 ### What changes were proposed in this pull request? This PR fixes an issue that the histogram and timeline aren't rendered in the `Streaming Query Statistics` page if we built Spark with Scala 2.13. ![before-fix-the-issue](https://user-images.githubusercontent.com/4736016/100612855-f543d700-3356-11eb-90d9-ede57b8b3f4f.png) ![NaN_Error](https://user-images.githubusercontent.com/4736016/100612879-00970280-3357-11eb-97cf-43978bbe2d3a.png) The reason is [`maxRecordRate` can be `NaN`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala#L371) for Scala 2.13. The `NaN` is the result of [`query.recentProgress.map(_.inputRowsPerSecond).max`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala#L372) when the first element of `query.recentProgress.map(_.inputRowsPerSecond)` is `NaN`. Actually, the comparison logic for `Double` type was changed in Scala 2.13. https://github.com/scala/bug/issues/12107 https://github.com/scala/scala/pull/6410 So this issue happens as of Scala 2.13. The root cause of the `NaN` is [here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala#L164). This `NaN` seems to be an initial value of `inputTimeSec` so I think `Double.PositiveInfinity` is suitable rather than `NaN` and this change can resolve this issue. ### Why are the changes needed? To make sure we can use the histogram/timeline with Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? First, I built with the following commands. ``` $ /dev/change-scala-version.sh 2.13 $ build/sbt -Phive -Phive-thriftserver -Pscala-2.13 package ``` Then, ran the following query (this is brought from #30427 ). ``` import org.apache.spark.sql.streaming.Trigger val query = spark .readStream .format("rate") .option("rowsPerSecond", 1000) .option("rampUpTime", "10s") .load() .selectExpr("*", "CAST(CAST(timestamp AS BIGINT) - CAST((RAND() * 100000) AS BIGINT) AS TIMESTAMP) AS tsMod") .selectExpr("tsMod", "mod(value, 100) as mod", "value") .withWatermark("tsMod", "10 seconds") .groupBy(window($"tsMod", "1 minute", "10 seconds"), $"mod") .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value")) .writeStream .format("console") .trigger(Trigger.ProcessingTime("5 seconds")) .outputMode("append") .start() ``` Finally, I confirmed that the timeline and histogram are rendered. ![after-fix-the-issue](https://user-images.githubusercontent.com/4736016/100612736-c9285600-3356-11eb-856d-7e53cc656c36.png) ``` Closes #30546 from sarutak/ss-nan. Authored-by: Kousuke Saruta Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../apache/spark/sql/execution/streaming/ProgressReporter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala index fe3f0e95b383c..57cb551bba17d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala @@ -161,7 +161,7 @@ trait ProgressReporter extends Logging { val inputTimeSec = if (lastTriggerStartTimestamp >= 0) { (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND } else { - Double.NaN + Double.PositiveInfinity } logDebug(s"Execution stats: $executionStats") From 2af2da5a4b1f5dbf0b55afd0b2514a52f03ffa94 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Tue, 1 Dec 2020 13:11:14 +0900 Subject: [PATCH 0619/1009] [SPARK-30900][SS] FileStreamSource: Avoid reading compact metadata log twice if the query restarts from compact batch ### What changes were proposed in this pull request? This patch addresses the case where compact metadata file is read twice in FileStreamSource during restarting query. When restarting the query, there is a case which the query starts from compaction batch, and the batch has source metadata file to read. One case is that the previous query succeeded to read from inputs, but not finalized the batch for various reasons. The patch finds the latest compaction batch when restoring from metadata log, and put entries for the batch into the file entry cache which would avoid reading compact batch file twice. FileStreamSourceLog doesn't know about offset / commit metadata in checkpoint so doesn't know which exactly batch to start from, but in practice, only couple of latest batches are candidates to be started from when restarting query. This patch leverages the fact to skip calculation if possible. ### Why are the changes needed? Spark incurs unnecessary cost on reading the compact metadata file twice on some case, which may not be ignorable when the query has been processed huge number of files so far. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? New UT. Closes #27649 from HeartSaVioR/SPARK-30900. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../streaming/FileStreamSource.scala | 2 +- .../streaming/FileStreamSourceLog.scala | 27 ++++++++ .../sql/streaming/FileStreamSourceSuite.scala | 64 +++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index 42401fe069551..e53c5a9c4024e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -104,7 +104,7 @@ class FileStreamSource( // Visible for testing and debugging in production. val seenFiles = new SeenFilesMap(maxFileAgeMs, fileNameOnly) - metadataLog.allFiles().foreach { entry => + metadataLog.restore().foreach { entry => seenFiles.add(entry.path, entry.timestamp) } seenFiles.purge() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala index 88a2326c9a02c..5fe9a39c91e0b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala @@ -36,6 +36,7 @@ class FileStreamSourceLog( extends CompactibleFileStreamLog[FileEntry](metadataLogVersion, sparkSession, path) { import CompactibleFileStreamLog._ + import FileStreamSourceLog._ // Configurations about metadata compaction protected override val defaultCompactInterval: Int = @@ -118,8 +119,34 @@ class FileStreamSourceLog( } batches } + + def restore(): Array[FileEntry] = { + val files = allFiles() + + // When restarting the query, there is a case which the query starts from compaction batch, + // and the batch has source metadata file to read. One case is that the previous query + // succeeded to read from inputs, but not finalized the batch for various reasons. + // The below code finds the latest compaction batch, and put entries for the batch into the + // file entry cache which would avoid reading compact batch file twice. + // It doesn't know about offset / commit metadata in checkpoint so doesn't know which exactly + // batch to start from, but in practice, only couple of latest batches are candidates to + // be started. We leverage the fact to skip calculation if possible. + files.lastOption.foreach { lastEntry => + val latestBatchId = lastEntry.batchId + val latestCompactedBatchId = getAllValidBatches(latestBatchId, compactInterval)(0) + if ((latestBatchId - latestCompactedBatchId) < PREV_NUM_BATCHES_TO_READ_IN_RESTORE) { + val logsForLatestCompactedBatch = files.filter { entry => + entry.batchId == latestCompactedBatchId + } + fileEntryCache.put(latestCompactedBatchId, logsForLatestCompactedBatch) + } + } + + files + } } object FileStreamSourceLog { val VERSION = 1 + val PREV_NUM_BATCHES_TO_READ_IN_RESTORE = 2 } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 718095003b096..3c74e316f260e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -1376,6 +1376,70 @@ class FileStreamSourceSuite extends FileStreamSourceTest { } } + test("restore from file stream source log") { + def createEntries(batchId: Long, count: Int): Array[FileEntry] = { + (1 to count).map { idx => + FileEntry(s"path_${batchId}_$idx", 10000 * batchId + count, batchId) + }.toArray + } + + withSQLConf(SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "5") { + def verifyBatchAvailabilityInCache( + fileEntryCache: java.util.LinkedHashMap[Long, Array[FileEntry]], + expectNotAvailable: Seq[Int], + expectAvailable: Seq[Int]): Unit = { + expectNotAvailable.foreach { batchId => + assert(!fileEntryCache.containsKey(batchId.toLong)) + } + expectAvailable.foreach { batchId => + assert(fileEntryCache.containsKey(batchId.toLong)) + } + } + withTempDir { chk => + val _fileEntryCache = PrivateMethod[java.util.LinkedHashMap[Long, Array[FileEntry]]]( + Symbol("fileEntryCache")) + + val metadata = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, + chk.getCanonicalPath) + val fileEntryCache = metadata invokePrivate _fileEntryCache() + + (0 to 4).foreach { batchId => + metadata.add(batchId, createEntries(batchId, 100)) + } + val allFiles = metadata.allFiles() + + // batch 4 is a compact batch which logs would be cached in fileEntryCache + verifyBatchAvailabilityInCache(fileEntryCache, Seq(0, 1, 2, 3), Seq(4)) + + val metadata2 = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, + chk.getCanonicalPath) + val fileEntryCache2 = metadata2 invokePrivate _fileEntryCache() + + // allFiles() doesn't restore the logs for the latest compact batch into file entry cache + assert(metadata2.allFiles() === allFiles) + verifyBatchAvailabilityInCache(fileEntryCache2, Seq(0, 1, 2, 3, 4), Seq.empty) + + // restore() will restore the logs for the latest compact batch into file entry cache + assert(metadata2.restore() === allFiles) + verifyBatchAvailabilityInCache(fileEntryCache2, Seq(0, 1, 2, 3), Seq(4)) + + (5 to 5 + FileStreamSourceLog.PREV_NUM_BATCHES_TO_READ_IN_RESTORE).foreach { batchId => + metadata2.add(batchId, createEntries(batchId, 100)) + } + + val metadata3 = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, + chk.getCanonicalPath) + val fileEntryCache3 = metadata3 invokePrivate _fileEntryCache() + + // restore() will not restore the logs for the latest compact batch into file entry cache + // if the latest batch is too far from latest compact batch, because it's unlikely Spark + // will request the batch for the start point. + assert(metadata3.restore() === metadata2.allFiles()) + verifyBatchAvailabilityInCache(fileEntryCache3, Seq(0, 1, 2, 3, 4), Seq.empty) + } + } + } + test("get arbitrary batch from FileStreamSource") { withTempDirs { case (src, tmp) => withSQLConf( From 1a042cc414c0c720535798b9a1197fe8885d6f6e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 1 Dec 2020 13:43:02 +0900 Subject: [PATCH 0620/1009] [SPARK-33530][CORE] Support --archives and spark.archives option natively ### What changes were proposed in this pull request? TL;DR: - This PR completes the support of archives in Spark itself instead of Yarn-only - It makes `--archives` option work in other cluster modes too and adds `spark.archives` configuration. - After this PR, PySpark users can leverage Conda to ship Python packages together as below: ```python conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0 conda activate pyspark_env conda pack -f -o pyspark_env.tar.gz PYSPARK_DRIVER_PYTHON=python PYSPARK_PYTHON=./environment/bin/python pyspark --archives pyspark_env.tar.gz#environment ``` - Issue a warning that undocumented and hidden behavior of partial archive handling in `spark.files` / `SparkContext.addFile` will be deprecated, and users can use `spark.archives` and `SparkContext.addArchive`. This PR proposes to add Spark's native `--archives` in Spark submit, and `spark.archives` configuration. Currently, both are supported only in Yarn mode: ```bash ./bin/spark-submit --help ``` ``` Options: ... Spark on YARN only: --queue QUEUE_NAME The YARN queue to submit to (Default: "default"). --archives ARCHIVES Comma separated list of archives to be extracted into the working directory of each executor. ``` This `archives` feature is useful often when you have to ship a directory and unpack into executors. One example is native libraries to use e.g. JNI. Another example is to ship Python packages together by Conda environment. Especially for Conda, PySpark currently does not have a nice way to ship a package that works in general, please see also https://hyukjin-spark.readthedocs.io/en/stable/user_guide/python_packaging.html#using-zipped-virtual-environment (PySpark new documentation demo for 3.1.0). The neatest way is arguably to use Conda environment by shipping zipped Conda environment but this is currently dependent on this archive feature. NOTE that we are able to use `spark.files` by relying on its undocumented behaviour that untars `tar.gz` but I don't think we should document such ways and promote people to more rely on it. Also, note that this PR does not target to add the feature parity of `spark.files.overwrite`, `spark.files.useFetchCache`, etc. yet. I documented that this is an experimental feature as well. ### Why are the changes needed? To complete the feature parity, and to provide a better support of shipping Python libraries together with Conda env. ### Does this PR introduce _any_ user-facing change? Yes, this makes `--archives` works in Spark instead of Yarn-only, and adds a new configuration `spark.archives`. ### How was this patch tested? I added unittests. Also, manually tested in standalone cluster, local-cluster, and local modes. Closes #30486 from HyukjinKwon/native-archive. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/SparkContext.scala | 89 ++++++++++++++++--- .../scala/org/apache/spark/SparkEnv.scala | 5 +- .../org/apache/spark/deploy/SparkSubmit.scala | 3 + .../spark/deploy/SparkSubmitArguments.scala | 5 +- .../org/apache/spark/executor/Executor.scala | 50 ++++++++--- .../spark/internal/config/package.scala | 10 +++ .../spark/scheduler/TaskDescription.scala | 9 +- .../spark/scheduler/TaskSetManager.scala | 2 + .../scala/org/apache/spark/util/Utils.scala | 52 +++++++++-- .../org/apache/spark/SparkContextSuite.scala | 79 ++++++++++++++++ .../spark/deploy/SparkSubmitSuite.scala | 37 ++++++++ .../deploy/rest/SubmitRestProtocolSuite.scala | 3 + .../CoarseGrainedExecutorBackendSuite.scala | 2 +- .../apache/spark/executor/ExecutorSuite.scala | 1 + .../CoarseGrainedSchedulerBackendSuite.scala | 3 +- .../scheduler/EventLoggingListenerSuite.scala | 3 +- .../scheduler/TaskDescriptionSuite.scala | 6 ++ docs/configuration.md | 11 +++ project/MimaExcludes.scala | 1 + .../source/user_guide/python_packaging.rst | 27 +++--- ...esosFineGrainedSchedulerBackendSuite.scala | 2 + 21 files changed, 347 insertions(+), 53 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b953592fa04dc..86f1d745d91d4 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -22,6 +22,7 @@ import java.net.URI import java.util.{Arrays, Locale, Properties, ServiceLoader, UUID} import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap} import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference} +import javax.ws.rs.core.UriBuilder import scala.collection.JavaConverters._ import scala.collection.Map @@ -39,7 +40,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, Sequence import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob} import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat} -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil} import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource} @@ -221,6 +222,7 @@ class SparkContext(config: SparkConf) extends Logging { private var _listenerBusStarted: Boolean = false private var _jars: Seq[String] = _ private var _files: Seq[String] = _ + private var _archives: Seq[String] = _ private var _shutdownHookRef: AnyRef = _ private var _statusStore: AppStatusStore = _ private var _heartbeater: Heartbeater = _ @@ -246,6 +248,7 @@ class SparkContext(config: SparkConf) extends Logging { def jars: Seq[String] = _jars def files: Seq[String] = _files + def archives: Seq[String] = _archives def master: String = _conf.get("spark.master") def deployMode: String = _conf.get(SUBMIT_DEPLOY_MODE) def appName: String = _conf.get("spark.app.name") @@ -278,6 +281,7 @@ class SparkContext(config: SparkConf) extends Logging { // Used to store a URL for each static file/jar together with the file's local timestamp private[spark] val addedFiles = new ConcurrentHashMap[String, Long]().asScala + private[spark] val addedArchives = new ConcurrentHashMap[String, Long]().asScala private[spark] val addedJars = new ConcurrentHashMap[String, Long]().asScala // Keeps track of all persisted RDDs @@ -422,6 +426,7 @@ class SparkContext(config: SparkConf) extends Logging { _jars = Utils.getUserJars(_conf) _files = _conf.getOption(FILES.key).map(_.split(",")).map(_.filter(_.nonEmpty)) .toSeq.flatten + _archives = _conf.getOption(ARCHIVES.key).map(Utils.stringToSeq).toSeq.flatten _eventLogDir = if (isEventLogEnabled) { @@ -506,6 +511,13 @@ class SparkContext(config: SparkConf) extends Logging { } } + if (archives != null) { + archives.foreach(file => addFile(file, false, true, isArchive = true)) + if (addedArchives.nonEmpty) { + _conf.set("spark.app.initial.archive.urls", addedArchives.keys.toSeq.mkString(",")) + } + } + _executorMemory = _conf.getOption(EXECUTOR_MEMORY.key) .orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY"))) .orElse(Option(System.getenv("SPARK_MEM")) @@ -1521,6 +1533,36 @@ class SparkContext(config: SparkConf) extends Logging { */ def listFiles(): Seq[String] = addedFiles.keySet.toSeq + /** + * :: Experimental :: + * Add an archive to be downloaded and unpacked with this Spark job on every node. + * + * If an archive is added during execution, it will not be available until the next TaskSet + * starts. + * + * @param path can be either a local file, a file in HDFS (or other Hadoop-supported + * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, + * use `SparkFiles.get(paths-to-files)` to find its download/unpacked location. + * The given path should be one of .zip, .tar, .tar.gz, .tgz and .jar. + * + * @note A path can be added only once. Subsequent additions of the same path are ignored. + * + * @since 3.1.0 + */ + @Experimental + def addArchive(path: String): Unit = { + addFile(path, false, false, isArchive = true) + } + + /** + * :: Experimental :: + * Returns a list of archive paths that are added to resources. + * + * @since 3.1.0 + */ + @Experimental + def listArchives(): Seq[String] = addedArchives.keySet.toSeq + /** * Add a file to be downloaded with this Spark job on every node. * @@ -1538,8 +1580,14 @@ class SparkContext(config: SparkConf) extends Logging { addFile(path, recursive, false) } - private def addFile(path: String, recursive: Boolean, addedOnSubmit: Boolean): Unit = { - val uri = new Path(path).toUri + private def addFile( + path: String, recursive: Boolean, addedOnSubmit: Boolean, isArchive: Boolean = false + ): Unit = { + val uri = if (!isArchive) { + new Path(path).toUri + } else { + Utils.resolveURI(path) + } val schemeCorrectedURI = uri.getScheme match { case null => new File(path).getCanonicalFile.toURI case "local" => @@ -1551,7 +1599,7 @@ class SparkContext(config: SparkConf) extends Logging { val hadoopPath = new Path(schemeCorrectedURI) val scheme = schemeCorrectedURI.getScheme - if (!Array("http", "https", "ftp").contains(scheme)) { + if (!Array("http", "https", "ftp").contains(scheme) && !isArchive) { val fs = hadoopPath.getFileSystem(hadoopConfiguration) val isDir = fs.getFileStatus(hadoopPath).isDirectory if (!isLocal && scheme == "file" && isDir) { @@ -1569,21 +1617,39 @@ class SparkContext(config: SparkConf) extends Logging { val key = if (!isLocal && scheme == "file") { env.rpcEnv.fileServer.addFile(new File(uri.getPath)) + } else if (uri.getScheme == null) { + schemeCorrectedURI.toString + } else if (isArchive) { + uri.toString } else { - if (uri.getScheme == null) { - schemeCorrectedURI.toString - } else { - path - } + path } + val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis - if (addedFiles.putIfAbsent(key, timestamp).isEmpty) { + if (!isArchive && addedFiles.putIfAbsent(key, timestamp).isEmpty) { logInfo(s"Added file $path at $key with timestamp $timestamp") // Fetch the file locally so that closures which are run on the driver can still use the // SparkFiles API to access files. Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, hadoopConfiguration, timestamp, useCache = false) postEnvironmentUpdate() + } else if ( + isArchive && + addedArchives.putIfAbsent( + UriBuilder.fromUri(new URI(key)).fragment(uri.getFragment).build().toString, + timestamp).isEmpty) { + logInfo(s"Added archive $path at $key with timestamp $timestamp") + val uriToDownload = UriBuilder.fromUri(new URI(key)).fragment(null).build() + val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf, + env.securityManager, hadoopConfiguration, timestamp, useCache = false, shouldUntar = false) + val dest = new File( + SparkFiles.getRootDirectory(), + if (uri.getFragment != null) uri.getFragment else source.getName) + logInfo( + s"Unpacking an archive $path from ${source.getAbsolutePath} to ${dest.getAbsolutePath}") + Utils.deleteRecursively(dest) + Utils.unpack(source, dest) + postEnvironmentUpdate() } else { logWarning(s"The path $path has been added already. Overwriting of added paths " + "is not supported in the current version.") @@ -2495,8 +2561,9 @@ class SparkContext(config: SparkConf) extends Logging { val schedulingMode = getSchedulingMode.toString val addedJarPaths = addedJars.keys.toSeq val addedFilePaths = addedFiles.keys.toSeq + val addedArchivePaths = addedArchives.keys.toSeq val environmentDetails = SparkEnv.environmentDetails(conf, hadoopConfiguration, - schedulingMode, addedJarPaths, addedFilePaths) + schedulingMode, addedJarPaths, addedFilePaths, addedArchivePaths) val environmentUpdate = SparkListenerEnvironmentUpdate(environmentDetails) listenerBus.post(environmentUpdate) } diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index d543359f4dedf..9fc60ac3990fc 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -454,7 +454,8 @@ object SparkEnv extends Logging { hadoopConf: Configuration, schedulingMode: String, addedJars: Seq[String], - addedFiles: Seq[String]): Map[String, Seq[(String, String)]] = { + addedFiles: Seq[String], + addedArchives: Seq[String]): Map[String, Seq[(String, String)]] = { import Properties._ val jvmInformation = Seq( @@ -484,7 +485,7 @@ object SparkEnv extends Logging { .split(File.pathSeparator) .filterNot(_.isEmpty) .map((_, "System Classpath")) - val addedJarsAndFiles = (addedJars ++ addedFiles).map((_, "Added By User")) + val addedJarsAndFiles = (addedJars ++ addedFiles ++ addedArchives).map((_, "Added By User")) val classPaths = (addedJarsAndFiles ++ classPathEntries).sorted // Add Hadoop properties, it will not ignore configs including in Spark. Some spark diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 4aa393c514af6..a344bce7a0f3c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -607,6 +607,8 @@ private[spark] class SparkSubmit extends Logging { confKey = CORES_MAX.key), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, confKey = FILES.key), + OptionAssigner(args.archives, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, + confKey = ARCHIVES.key), OptionAssigner(args.jars, LOCAL, CLIENT, confKey = JARS.key), OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, confKey = JARS.key), @@ -796,6 +798,7 @@ private[spark] class SparkSubmit extends Logging { val pathConfigs = Seq( JARS.key, FILES.key, + ARCHIVES.key, "spark.yarn.dist.files", "spark.yarn.dist.archives", "spark.yarn.dist.jars") diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 3090a3b10a97c..9da1a73bba692 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -183,6 +183,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull jars = Option(jars).orElse(sparkProperties.get(config.JARS.key)).orNull files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull + archives = Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull ivySettingsPath = sparkProperties.get("spark.jars.ivySettings") @@ -512,6 +513,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S | --files FILES Comma-separated list of files to be placed in the working | directory of each executor. File paths of these files | in executors can be accessed via SparkFiles.get(fileName). + | --archives ARCHIVES Comma-separated list of archives to be extracted into the + | working directory of each executor. | | --conf, -c PROP=VALUE Arbitrary Spark configuration property. | --properties-file FILE Path to a file from which to load extra properties. If not @@ -562,8 +565,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S | | Spark on YARN only: | --queue QUEUE_NAME The YARN queue to submit to (Default: "default"). - | --archives ARCHIVES Comma separated list of archives to be extracted into the - | working directory of each executor. """.stripMargin ) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index c81ac778a32d1..e7f1b8f3cf17a 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -26,6 +26,7 @@ import java.util.{Locale, Properties} import java.util.concurrent._ import java.util.concurrent.atomic.AtomicBoolean import javax.annotation.concurrent.GuardedBy +import javax.ws.rs.core.UriBuilder import scala.collection.JavaConverters._ import scala.collection.immutable @@ -78,6 +79,7 @@ private[spark] class Executor( // Each map holds the master's timestamp for the version of that file or JAR we got. private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]() private val currentJars: HashMap[String, Long] = new HashMap[String, Long]() + private val currentArchives: HashMap[String, Long] = new HashMap[String, Long]() private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0)) @@ -232,16 +234,17 @@ private[spark] class Executor( private val appStartTime = conf.getLong("spark.app.startTime", 0) // To allow users to distribute plugins and their required files - // specified by --jars and --files on application submission, those jars/files should be - // downloaded and added to the class loader via updateDependencies. - // This should be done before plugin initialization below + // specified by --jars, --files and --archives on application submission, those + // jars/files/archives should be downloaded and added to the class loader via + // updateDependencies. This should be done before plugin initialization below // because executors search plugins from the class loader and initialize them. - private val Seq(initialUserJars, initialUserFiles) = Seq("jar", "file").map { key => - conf.getOption(s"spark.app.initial.$key.urls").map { urls => - Map(urls.split(",").map(url => (url, appStartTime)): _*) - }.getOrElse(Map.empty) - } - updateDependencies(initialUserFiles, initialUserJars) + private val Seq(initialUserJars, initialUserFiles, initialUserArchives) = + Seq("jar", "file", "archive").map { key => + conf.getOption(s"spark.app.initial.$key.urls").map { urls => + Map(urls.split(",").map(url => (url, appStartTime)): _*) + }.getOrElse(Map.empty) + } + updateDependencies(initialUserFiles, initialUserJars, initialUserArchives) // Plugins need to load using a class loader that includes the executor's user classpath. // Plugins also needs to be initialized after the heartbeater started @@ -449,7 +452,8 @@ private[spark] class Executor( // requires access to properties contained within (e.g. for access control). Executor.taskDeserializationProps.set(taskDescription.properties) - updateDependencies(taskDescription.addedFiles, taskDescription.addedJars) + updateDependencies( + taskDescription.addedFiles, taskDescription.addedJars, taskDescription.addedArchives) task = ser.deserialize[Task[Any]]( taskDescription.serializedTask, Thread.currentThread.getContextClassLoader) task.localProperties = taskDescription.properties @@ -909,24 +913,42 @@ private[spark] class Executor( * Download any missing dependencies if we receive a new set of files and JARs from the * SparkContext. Also adds any new JARs we fetched to the class loader. */ - private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]): Unit = { + private def updateDependencies( + newFiles: Map[String, Long], + newJars: Map[String, Long], + newArchives: Map[String, Long]): Unit = { lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) synchronized { // Fetch missing dependencies for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) { - logInfo("Fetching " + name + " with timestamp " + timestamp) + logInfo(s"Fetching $name with timestamp $timestamp") // Fetch file with useCache mode, close cache for local mode. Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, hadoopConf, timestamp, useCache = !isLocal) currentFiles(name) = timestamp } + for ((name, timestamp) <- newArchives if currentArchives.getOrElse(name, -1L) < timestamp) { + logInfo(s"Fetching $name with timestamp $timestamp") + val sourceURI = new URI(name) + val uriToDownload = UriBuilder.fromUri(sourceURI).fragment(null).build() + val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf, + env.securityManager, hadoopConf, timestamp, useCache = !isLocal, shouldUntar = false) + val dest = new File( + SparkFiles.getRootDirectory(), + if (sourceURI.getFragment != null) sourceURI.getFragment else source.getName) + logInfo( + s"Unpacking an archive $name from ${source.getAbsolutePath} to ${dest.getAbsolutePath}") + Utils.deleteRecursively(dest) + Utils.unpack(source, dest) + currentArchives(name) = timestamp + } for ((name, timestamp) <- newJars) { val localName = new URI(name).getPath.split("/").last val currentTimeStamp = currentJars.get(name) .orElse(currentJars.get(localName)) .getOrElse(-1L) if (currentTimeStamp < timestamp) { - logInfo("Fetching " + name + " with timestamp " + timestamp) + logInfo(s"Fetching $name with timestamp $timestamp") // Fetch file with useCache mode, close cache for local mode. Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, hadoopConf, timestamp, useCache = !isLocal) @@ -934,7 +956,7 @@ private[spark] class Executor( // Add it to our class loader val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL if (!urlClassLoader.getURLs().contains(url)) { - logInfo("Adding " + url + " to class loader") + logInfo(s"Adding $url to class loader") urlClassLoader.addURL(url) } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 093a0ecf58d32..6639f20a068d4 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1813,6 +1813,16 @@ package object config { .toSequence .createWithDefault(Nil) + private[spark] val ARCHIVES = ConfigBuilder("spark.archives") + .version("3.1.0") + .doc("Comma-separated list of archives to be extracted into the working directory of each " + + "executor. .jar, .tar.gz, .tgz and .zip are supported. You can specify the directory " + + "name to unpack via adding '#' after the file name to unpack, for example, " + + "'file.zip#directory'. This configuration is experimental.") + .stringConf + .toSequence + .createWithDefault(Nil) + private[spark] val SUBMIT_DEPLOY_MODE = ConfigBuilder("spark.submit.deployMode") .version("1.5.0") .stringConf diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala index 863bf27088355..12b911d06153b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala @@ -55,6 +55,7 @@ private[spark] class TaskDescription( val partitionId: Int, val addedFiles: Map[String, Long], val addedJars: Map[String, Long], + val addedArchives: Map[String, Long], val properties: Properties, val resources: immutable.Map[String, ResourceInformation], val serializedTask: ByteBuffer) { @@ -99,6 +100,9 @@ private[spark] object TaskDescription { // Write jars. serializeStringLongMap(taskDescription.addedJars, dataOut) + // Write archives. + serializeStringLongMap(taskDescription.addedArchives, dataOut) + // Write properties. dataOut.writeInt(taskDescription.properties.size()) taskDescription.properties.asScala.foreach { case (key, value) => @@ -167,6 +171,9 @@ private[spark] object TaskDescription { // Read jars. val taskJars = deserializeStringLongMap(dataIn) + // Read archives. + val taskArchives = deserializeStringLongMap(dataIn) + // Read properties. val properties = new Properties() val numProperties = dataIn.readInt() @@ -185,6 +192,6 @@ private[spark] object TaskDescription { val serializedTask = byteBuffer.slice() new TaskDescription(taskId, attemptNumber, executorId, name, index, partitionId, taskFiles, - taskJars, properties, resources, serializedTask) + taskJars, taskArchives, properties, resources, serializedTask) } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 914fccc1a67cd..ad0791fa42931 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -63,6 +63,7 @@ private[spark] class TaskSetManager( // SPARK-21563 make a copy of the jars/files so they are consistent across the TaskSet private val addedJars = HashMap[String, Long](sched.sc.addedJars.toSeq: _*) private val addedFiles = HashMap[String, Long](sched.sc.addedFiles.toSeq: _*) + private val addedArchives = HashMap[String, Long](sched.sc.addedArchives.toSeq: _*) val maxResultSize = conf.get(config.MAX_RESULT_SIZE) @@ -493,6 +494,7 @@ private[spark] class TaskSetManager( task.partitionId, addedFiles, addedJars, + addedArchives, task.localProperties, taskResourceAssignments, serializedTask) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index accf3d7c0d333..ae4df146b0a4c 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -53,6 +53,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec} import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.util.{RunJar, StringUtils} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.eclipse.jetty.util.MultiException import org.slf4j.Logger @@ -486,6 +487,10 @@ private[spark] object Utils extends Logging { * * Throws SparkException if the target file already exists and has different contents than * the requested file. + * + * If `shouldUntar` is true, it untars the given url if it is a tar.gz or tgz into `targetDir`. + * This is a legacy behavior, and users should better use `spark.archives` configuration or + * `SparkContext.addArchive` */ def fetchFile( url: String, @@ -494,7 +499,8 @@ private[spark] object Utils extends Logging { securityMgr: SecurityManager, hadoopConf: Configuration, timestamp: Long, - useCache: Boolean): File = { + useCache: Boolean, + shouldUntar: Boolean = true): File = { val fileName = decodeFileNameInURI(new URI(url)) val targetFile = new File(targetDir, fileName) val fetchCacheEnabled = conf.getBoolean("spark.files.useFetchCache", defaultValue = true) @@ -535,13 +541,23 @@ private[spark] object Utils extends Logging { doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf) } - // Decompress the file if it's a .tar or .tar.gz - if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { - logInfo("Untarring " + fileName) - executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir) - } else if (fileName.endsWith(".tar")) { - logInfo("Untarring " + fileName) - executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir) + if (shouldUntar) { + // Decompress the file if it's a .tar or .tar.gz + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + logWarning( + "Untarring behavior will be deprecated at spark.files and " + + "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " + + "instead.") + logInfo("Untarring " + fileName) + executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir) + } else if (fileName.endsWith(".tar")) { + logWarning( + "Untarring behavior will be deprecated at spark.files and " + + "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " + + "instead.") + logInfo("Untarring " + fileName) + executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir) + } } // Make the file executable - That's necessary for scripts FileUtil.chmod(targetFile.getAbsolutePath, "a+x") @@ -555,6 +571,26 @@ private[spark] object Utils extends Logging { targetFile } + /** + * Unpacks an archive file into the specified directory. It expects .jar, .zip, .tar.gz, .tgz + * and .tar files. This behaves same as Hadoop's archive in distributed cache. This method is + * basically copied from `org.apache.hadoop.yarn.util.FSDownload.unpack`. + */ + def unpack(source: File, dest: File): Unit = { + val lowerSrc = StringUtils.toLowerCase(source.getName) + if (lowerSrc.endsWith(".jar")) { + RunJar.unJar(source, dest, RunJar.MATCH_ANY) + } else if (lowerSrc.endsWith(".zip")) { + FileUtil.unZip(source, dest) + } else if ( + lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || lowerSrc.endsWith(".tar")) { + FileUtil.unTar(source, dest) + } else { + logWarning(s"Cannot unpack $source, just copying it to $dest.") + copyRecursive(source, dest) + } + } + /** Records the duration of running `body`. */ def timeTakenMs[T](body: => T): (T, Long) = { val startTime = System.nanoTime() diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index ebdf2f59a2770..55bfa70f21fc2 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -160,6 +160,85 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu } } + test("SPARK-33530: basic case for addArchive and listArchives") { + withTempDir { dir => + val file1 = File.createTempFile("someprefix1", "somesuffix1", dir) + val file2 = File.createTempFile("someprefix2", "somesuffix2", dir) + val file3 = File.createTempFile("someprefix3", "somesuffix3", dir) + val file4 = File.createTempFile("someprefix4", "somesuffix4", dir) + + val jarFile = new File(dir, "test!@$jar.jar") + val zipFile = new File(dir, "test-zip.zip") + val relativePath1 = + s"${zipFile.getParent}/../${zipFile.getParentFile.getName}/${zipFile.getName}" + val relativePath2 = + s"${jarFile.getParent}/../${jarFile.getParentFile.getName}/${jarFile.getName}#zoo" + + try { + Files.write("somewords1", file1, StandardCharsets.UTF_8) + Files.write("somewords22", file2, StandardCharsets.UTF_8) + Files.write("somewords333", file3, StandardCharsets.UTF_8) + Files.write("somewords4444", file4, StandardCharsets.UTF_8) + val length1 = file1.length() + val length2 = file2.length() + val length3 = file1.length() + val length4 = file2.length() + + createJar(Seq(file1, file2), jarFile) + createJar(Seq(file3, file4), zipFile) + + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) + sc.addArchive(jarFile.getAbsolutePath) + sc.addArchive(relativePath1) + sc.addArchive(s"${jarFile.getAbsolutePath}#foo") + sc.addArchive(s"${zipFile.getAbsolutePath}#bar") + sc.addArchive(relativePath2) + + sc.parallelize(Array(1), 1).map { x => + val gotten1 = new File(SparkFiles.get(jarFile.getName)) + val gotten2 = new File(SparkFiles.get(zipFile.getName)) + val gotten3 = new File(SparkFiles.get("foo")) + val gotten4 = new File(SparkFiles.get("bar")) + val gotten5 = new File(SparkFiles.get("zoo")) + + Seq(gotten1, gotten2, gotten3, gotten4, gotten5).foreach { gotten => + if (!gotten.exists()) { + throw new SparkException(s"The archive doesn't exist: ${gotten.getAbsolutePath}") + } + if (!gotten.isDirectory) { + throw new SparkException(s"The archive was not unpacked: ${gotten.getAbsolutePath}") + } + } + + // Jars + Seq(gotten1, gotten3, gotten5).foreach { gotten => + val actualLength1 = new File(gotten, file1.getName).length() + val actualLength2 = new File(gotten, file2.getName).length() + if (actualLength1 != length1 || actualLength2 != length2) { + s"Unpacked files have different lengths $actualLength1 and $actualLength2. at " + + s"${gotten.getAbsolutePath}. They should be $length1 and $length2." + } + } + + // Zip + Seq(gotten2, gotten4).foreach { gotten => + val actualLength3 = new File(gotten, file1.getName).length() + val actualLength4 = new File(gotten, file2.getName).length() + if (actualLength3 != length3 || actualLength4 != length4) { + s"Unpacked files have different lengths $actualLength3 and $actualLength4. at " + + s"${gotten.getAbsolutePath}. They should be $length3 and $length4." + } + } + x + }.count() + assert(sc.listArchives().count(_.endsWith("test!@$jar.jar")) == 1) + assert(sc.listArchives().count(_.contains("test-zip.zip")) == 2) + } finally { + sc.stop() + } + } + } + test("add and list jar files") { val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar") try { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index b5b3751439750..dcd35f3f6b93f 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -335,6 +335,43 @@ class SparkSubmitSuite sys.props("SPARK_SUBMIT") should be ("true") } + test("SPARK-33530: handles standalone mode with archives") { + val clArgs = Seq( + "--master", "spark://localhost:1234", + "--executor-memory", "5g", + "--executor-cores", "5", + "--class", "org.SomeClass", + "--jars", "one.jar,two.jar,three.jar", + "--driver-memory", "4g", + "--files", "file1.txt,file2.txt", + "--archives", "archive1.zip,archive2.jar", + "--num-executors", "6", + "--name", "beauty", + "--conf", "spark.ui.enabled=false", + "thejar.jar", + "arg1", "arg2") + val appArgs = new SparkSubmitArguments(clArgs) + val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs) + val childArgsStr = childArgs.mkString(" ") + childArgsStr should include ("arg1 arg2") + mainClass should be ("org.SomeClass") + + classpath(0) should endWith ("thejar.jar") + classpath(1) should endWith ("one.jar") + classpath(2) should endWith ("two.jar") + classpath(3) should endWith ("three.jar") + + conf.get("spark.executor.memory") should be ("5g") + conf.get("spark.driver.memory") should be ("4g") + conf.get("spark.executor.cores") should be ("5") + conf.get("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar") + conf.get("spark.files") should include regex (".*file1.txt,.*file2.txt") + conf.get("spark.archives") should include regex (".*archive1.zip,.*archive2.jar") + conf.get("spark.app.name") should be ("beauty") + conf.get(UI_ENABLED) should be (false) + sys.props("SPARK_SUBMIT") should be ("true") + } + test("handles standalone cluster mode") { testStandaloneCluster(useRest = true) } diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala index d08052faa0043..9fdbf485e17d3 100644 --- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala @@ -98,6 +98,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite { // optional fields conf.set(JARS, Seq("mayonnaise.jar", "ketchup.jar")) conf.set(FILES.key, "fireball.png") + conf.set(ARCHIVES.key, "fireballs.zip") conf.set("spark.driver.memory", s"${Utils.DEFAULT_DRIVER_MEM_MB}m") conf.set(DRIVER_CORES, 180) conf.set("spark.driver.extraJavaOptions", " -Dslices=5 -Dcolor=mostly_red") @@ -246,6 +247,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite { | }, | "mainClass" : "org.apache.spark.examples.SparkPie", | "sparkProperties" : { + | "spark.archives" : "fireballs.zip", | "spark.driver.extraLibraryPath" : "pickle.jar", | "spark.jars" : "mayonnaise.jar,ketchup.jar", | "spark.driver.supervise" : "false", @@ -272,6 +274,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite { | }, | "mainClass" : "org.apache.spark.examples.SparkPie", | "sparkProperties" : { + | "spark.archives" : "fireballs.zip", | "spark.driver.extraLibraryPath" : "pickle.jar", | "spark.jars" : "mayonnaise.jar,ketchup.jar", | "spark.driver.supervise" : "false", diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala index 319dcfeecee24..810dcf0e61007 100644 --- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala @@ -302,7 +302,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite // We don't really verify the data, just pass it around. val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) val taskDescription = new TaskDescription(taskId, 2, "1", "TASK 1000000", 19, - 1, mutable.Map.empty, mutable.Map.empty, new Properties, + 1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data) val serializedTaskDescription = TaskDescription.encode(taskDescription) backend.executor = mock[Executor] diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 5b868604ecf94..7cf7a81a76133 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -519,6 +519,7 @@ class ExecutorSuite extends SparkFunSuite partitionId = 0, addedFiles = Map[String, Long](), addedJars = Map[String, Long](), + addedArchives = Map[String, Long](), properties = new Properties, resources = immutable.Map[String, ResourceInformation](), serializedTask) diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala index 65d51e57ee308..7a74dd877a042 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala @@ -244,7 +244,8 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0"))) var taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1", - "t1", 0, 1, mutable.Map.empty[String, Long], mutable.Map.empty[String, Long], + "t1", 0, 1, mutable.Map.empty[String, Long], + mutable.Map.empty[String, Long], mutable.Map.empty[String, Long], new Properties(), taskResources, bytebuffer))) val ts = backend.getTaskSchedulerImpl() when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs) diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index 915035e9eb71c..c4a8bcbb26a1d 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -91,7 +91,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit .set(key, secretPassword) val hadoopconf = SparkHadoopUtil.get.newConfiguration(new SparkConf()) val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf) - val envDetails = SparkEnv.environmentDetails(conf, hadoopconf, "FIFO", Seq.empty, Seq.empty) + val envDetails = SparkEnv.environmentDetails( + conf, hadoopconf, "FIFO", Seq.empty, Seq.empty, Seq.empty) val event = SparkListenerEnvironmentUpdate(envDetails) val redactedProps = eventLogger.redactEvent(event).environmentDetails("Spark Properties").toMap assert(redactedProps(key) == "*********(redacted)") diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala index 5839532f11666..98b5bada27646 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala @@ -33,6 +33,10 @@ class TaskDescriptionSuite extends SparkFunSuite { originalFiles.put("fileUrl1", 1824) originalFiles.put("fileUrl2", 2) + val originalArchives = new HashMap[String, Long]() + originalArchives.put("archiveUrl1", 1824) + originalArchives.put("archiveUrl2", 2) + val originalJars = new HashMap[String, Long]() originalJars.put("jar1", 3) @@ -70,6 +74,7 @@ class TaskDescriptionSuite extends SparkFunSuite { partitionId = 1, originalFiles, originalJars, + originalArchives, originalProperties, originalResources, taskBuffer @@ -87,6 +92,7 @@ class TaskDescriptionSuite extends SparkFunSuite { assert(decodedTaskDescription.partitionId === originalTaskDescription.partitionId) assert(decodedTaskDescription.addedFiles.equals(originalFiles)) assert(decodedTaskDescription.addedJars.equals(originalJars)) + assert(decodedTaskDescription.addedArchives.equals(originalArchives)) assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties)) assert(equalResources(decodedTaskDescription.resources, originalTaskDescription.resources)) assert(decodedTaskDescription.serializedTask.equals(taskBuffer)) diff --git a/docs/configuration.md b/docs/configuration.md index 76494b04c9279..d4d8e47645921 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -784,6 +784,17 @@ Apart from these, the following properties are also available, and may be useful 2.3.0 + + spark.archives + + + Comma-separated list of archives to be extracted into the working directory of each executor. + .jar, .tar.gz, .tgz and .zip are supported. You can specify the directory name to unpack via + adding # after the file name to unpack, for example, file.zip#directory. + This configuration is experimental. + + 3.1.0 + spark.pyspark.driver.python diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 5a66bfca27a27..9405927eb1cb5 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -41,6 +41,7 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"), ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.environmentDetails"), // mllib module ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"), diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst index ef4d05a8eefea..0aff6dc1d16b4 100644 --- a/python/docs/source/user_guide/python_packaging.rst +++ b/python/docs/source/user_guide/python_packaging.rst @@ -77,8 +77,7 @@ Using Zipped Virtual Environment -------------------------------- The idea of zipped environments is to zip your whole `virtual environment `_, -ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. Note that this -is currently supported *only for YARN*. +ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. Zip Virtual Environment ~~~~~~~~~~~~~~~~~~~~~~~ @@ -92,16 +91,15 @@ Example with `conda-pack`: .. code-block:: bash - conda create -y -n conda_env -c conda-forge \ - pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 conda-pack==0.4.0 - conda activate conda_env - conda pack -f -o conda_env.tar.gz + conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0 + conda activate pyspark_env + conda pack -f -o pyspark_env.tar.gz Upload to Spark Executors ~~~~~~~~~~~~~~~~~~~~~~~~~ Unzipping will be done by Spark when using target ``--archives`` option in spark-submit -or setting ``spark.yarn.dist.archives`` configuration. +or setting ``spark.archives`` configuration. Example with ``spark-submit``: @@ -109,8 +107,7 @@ Example with ``spark-submit``: export PYSPARK_DRIVER_PYTHON=python export PYSPARK_PYTHON=./environment/bin/python - spark-submit --master=yarn --deploy-mode client \ - --archives conda_env.tar.gz#environment app.py + spark-submit --master=... --archives pyspark_env.tar.gz#environment app.py Example using ``SparkSession.builder``: @@ -121,11 +118,17 @@ Example using ``SparkSession.builder``: from app import main os.environ['PYSPARK_PYTHON'] = "./environment/bin/python" - builder = SparkSession.builder.master("yarn").config( - "spark.yarn.dist.archives", "conda_env.tar.gz#environment") - spark = builder.getOrCreate() + spark = SparkSession.builder.master("...").config("spark.archives", "pyspark_env.tar.gz#environment").getOrCreate() main(spark) +Example with ``pyspark`` shell: + +.. code-block:: bash + + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./environment/bin/python + pyspark --master=... --archives pyspark_env.tar.gz#environment + Using PEX --------- diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala index 6a6514569cf90..10030a20f0884 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala @@ -264,6 +264,7 @@ class MesosFineGrainedSchedulerBackendSuite partitionId = 0, addedFiles = mutable.Map.empty[String, Long], addedJars = mutable.Map.empty[String, Long], + addedArchives = mutable.Map.empty[String, Long], properties = new Properties(), resources = immutable.Map.empty[String, ResourceInformation], ByteBuffer.wrap(new Array[Byte](0))) @@ -377,6 +378,7 @@ class MesosFineGrainedSchedulerBackendSuite partitionId = 0, addedFiles = mutable.Map.empty[String, Long], addedJars = mutable.Map.empty[String, Long], + addedArchives = mutable.Map.empty[String, Long], properties = new Properties(), resources = immutable.Map.empty[String, ResourceInformation], ByteBuffer.wrap(new Array[Byte](0))) From 52e5cc46bc184bf582f9bc9ebcc5c8180222c421 Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Tue, 1 Dec 2020 14:42:48 +0900 Subject: [PATCH 0621/1009] [SPARK-27188][SS] FileStreamSink: provide a new option to have retention on output files ### What changes were proposed in this pull request? This patch proposes to provide a new option to specify time-to-live (TTL) for output file entries in FileStreamSink. TTL is defined via current timestamp - the last modified time for the file. This patch will filter out outdated output files in metadata while compacting batches (other batches don't have functionality to clean entries), which helps metadata to not grow linearly, as well as filtered out files will be "eventually" no longer seen in reader queries which leverage File(Stream)Source. ### Why are the changes needed? The metadata log greatly helps to easily achieve exactly-once but given the output path is open to arbitrary readers, there's no way to compact the metadata log, which ends up growing the metadata file as query runs for long time, especially for compacted batch. Lots of end users have been reporting the issue: see comments in [SPARK-24295](https://issues.apache.org/jira/browse/SPARK-24295) and [SPARK-29995](https://issues.apache.org/jira/browse/SPARK-29995), and [SPARK-30462](https://issues.apache.org/jira/browse/SPARK-30462). (There're some reports from end users which include their workarounds: SPARK-24295) ### Does this PR introduce any user-facing change? No, as the configuration is new and by default it is not applied. ### How was this patch tested? New UT. Closes #28363 from HeartSaVioR/SPARK-27188-v2. Lead-authored-by: Jungtaek Lim (HeartSaVioR) Co-authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../structured-streaming-programming-guide.md | 6 +- .../streaming/CompactibleFileStreamLog.scala | 8 +- .../execution/streaming/FileStreamSink.scala | 7 +- .../streaming/FileStreamSinkLog.scala | 25 +++++- .../streaming/FileStreamSinkLogSuite.scala | 77 +++++++++++-------- 5 files changed, 83 insertions(+), 40 deletions(-) diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index c671d6b590626..6995ee2475aee 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -1874,7 +1874,11 @@ Here are the details of all the sinks in Spark. File Sink Append - path: path to the output directory, must be specified. + path: path to the output directory, must be specified.
      + retention: time to live (TTL) for output files. Output files which batches were + committed older than TTL will be eventually excluded in metadata log. This means reader queries which read + the sink's output directory may not process them. You can provide the value as string format of the time. (like "12h", "7d", etc.) + By default it's disabled.

      For file-format-specific options, see the related methods in DataFrameWriter (Scala/Java/Python/ - filterInBatch(id)(shouldRetain).getOrElse { + filterInBatch(id)(shouldRetain(_, curTime)).getOrElse { throw new IllegalStateException( s"${batchIdToPath(id)} doesn't exist " + s"(latestId: $latestId, compactInterval: $compactInterval)") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala index ecaf4f8160a06..e1c9b82ec2ac9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormat, FileFormatWriter} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.SerializableConfiguration +import org.apache.spark.util.{SerializableConfiguration, Utils} object FileStreamSink extends Logging { // The name of the subdirectory that is used to store metadata about which files are valid. @@ -136,8 +136,9 @@ class FileStreamSink( private val basePath = new Path(path) private val logPath = getMetadataLogPath(basePath.getFileSystem(hadoopConf), basePath, sparkSession.sessionState.conf) - private val fileLog = - new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, logPath.toString) + private val retention = options.get("retention").map(Utils.timeStringAsMs) + private val fileLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, + logPath.toString, retention) private def basicWriteJobStatsTracker: BasicWriteJobStatsTracker = { val serializableHadoopConf = new SerializableConfiguration(hadoopConf) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala index 5cb68e1ae956e..2d70d95c6850d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala @@ -81,7 +81,8 @@ object SinkFileStatus { class FileStreamSinkLog( metadataLogVersion: Int, sparkSession: SparkSession, - path: String) + path: String, + _retentionMs: Option[Long] = None) extends CompactibleFileStreamLog[SinkFileStatus](metadataLogVersion, sparkSession, path) { private implicit val formats = Serialization.formats(NoTypeHints) @@ -96,6 +97,28 @@ class FileStreamSinkLog( require(defaultCompactInterval > 0, s"Please set ${SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key} (was $defaultCompactInterval) " + "to a positive value.") + + val retentionMs: Long = _retentionMs match { + case Some(retention) => + logInfo(s"Retention is set to $retention ms") + retention + + case _ => Long.MaxValue + } + + override def shouldRetain(log: SinkFileStatus, currentTime: Long): Boolean = { + if (retentionMs < Long.MaxValue) { + if (currentTime - log.modificationTime > retentionMs) { + logDebug(s"${log.path} excluded by retention - current time: $currentTime / " + + s"modification time: ${log.modificationTime} / retention: $retentionMs ms.") + false + } else { + true + } + } else { + true + } + } } object FileStreamSinkLog { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala index 622d69e188821..d6707e7be71fc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala @@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap import scala.util.Random -import org.apache.hadoop.fs.{FSDataInputStream, Path, RawLocalFileSystem} +import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path, RawLocalFileSystem} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.internal.SQLConf @@ -39,7 +39,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { test("shouldRetain") { withFileStreamSinkLog { sinkLog => val log = newFakeSinkFileStatus("/a/b/x", FileStreamSinkLog.ADD_ACTION) - assert(sinkLog.shouldRetain(log)) + assert(sinkLog.shouldRetain(log, System.currentTimeMillis())) } } @@ -129,6 +129,17 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { } } + private def listBatchFiles(fs: FileSystem, sinkLog: FileStreamSinkLog): Set[String] = { + fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName => + try { + getBatchIdFromFileName(fileName) + true + } catch { + case _: NumberFormatException => false + } + }.toSet + } + test("delete expired file") { // Set FILE_SINK_LOG_CLEANUP_DELAY to 0 so that we can detect the deleting behaviour // deterministically and one min batches to retain @@ -138,18 +149,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { SQLConf.MIN_BATCHES_TO_RETAIN.key -> "1") { withFileStreamSinkLog { sinkLog => val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf()) - - def listBatchFiles(): Set[String] = { - fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName => - try { - getBatchIdFromFileName(fileName) - true - } catch { - case _: NumberFormatException => false - } - }.toSet - } - + def listBatchFiles(): Set[String] = this.listBatchFiles(fs, sinkLog) sinkLog.add(0, Array(newFakeSinkFileStatus("/a/b/0", FileStreamSinkLog.ADD_ACTION))) assert(Set("0") === listBatchFiles()) sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION))) @@ -173,18 +173,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2") { withFileStreamSinkLog { sinkLog => val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf()) - - def listBatchFiles(): Set[String] = { - fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName => - try { - getBatchIdFromFileName(fileName) - true - } catch { - case _: NumberFormatException => false - } - }.toSet - } - + def listBatchFiles(): Set[String] = this.listBatchFiles(fs, sinkLog) sinkLog.add(0, Array(newFakeSinkFileStatus("/a/b/0", FileStreamSinkLog.ADD_ACTION))) assert(Set("0") === listBatchFiles()) sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION))) @@ -205,6 +194,24 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { } } + test("filter out outdated entries when compacting") { + val curTime = System.currentTimeMillis() + withFileStreamSinkLog(sinkLog => { + val logs = Seq( + newFakeSinkFileStatus("/a/b/x", FileStreamSinkLog.ADD_ACTION, curTime), + newFakeSinkFileStatus("/a/b/y", FileStreamSinkLog.ADD_ACTION, curTime), + newFakeSinkFileStatus("/a/b/z", FileStreamSinkLog.ADD_ACTION, curTime)) + logs.foreach { log => assert(sinkLog.shouldRetain(log, curTime)) } + + val logs2 = Seq( + newFakeSinkFileStatus("/a/b/m", FileStreamSinkLog.ADD_ACTION, curTime - 80000), + newFakeSinkFileStatus("/a/b/n", FileStreamSinkLog.ADD_ACTION, curTime - 120000)) + logs2.foreach { log => + assert(!sinkLog.shouldRetain(log, curTime)) + } + }, Some(60000)) + } + test("read Spark 2.1.0 log format") { assert(readFromResource("file-sink-log-version-2.1.0") === Seq( SinkFileStatus("/a/b/0", 1, false, 1, 1, 100, FileStreamSinkLog.ADD_ACTION), @@ -259,23 +266,29 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { } /** - * Create a fake SinkFileStatus using path and action. Most of tests don't care about other fields - * in SinkFileStatus. + * Create a fake SinkFileStatus using path and action, and optionally modification time. + * Most of tests don't care about other fields in SinkFileStatus. */ - private def newFakeSinkFileStatus(path: String, action: String): SinkFileStatus = { + private def newFakeSinkFileStatus( + path: String, + action: String, + modificationTime: Long = Long.MaxValue): SinkFileStatus = { SinkFileStatus( path = path, size = 100L, isDir = false, - modificationTime = 100L, + modificationTime = modificationTime, blockReplication = 1, blockSize = 100L, action = action) } - private def withFileStreamSinkLog(f: FileStreamSinkLog => Unit): Unit = { + private def withFileStreamSinkLog( + f: FileStreamSinkLog => Unit, + ttl: Option[Long] = None): Unit = { withTempDir { file => - val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark, file.getCanonicalPath) + val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark, file.getCanonicalPath, + ttl) f(sinkLog) } } From 103481551979297729123aaa56896d182d74847f Mon Sep 17 00:00:00 2001 From: "zky.zhoukeyong" Date: Tue, 1 Dec 2020 11:07:16 +0000 Subject: [PATCH 0622/1009] [SPARK-33572][SQL] Datetime building should fail if the year, month, ..., second combination is invalid ### What changes were proposed in this pull request? Datetime building should fail if the year, month, ..., second combination is invalid, when ANSI mode is enabled. This patch should update MakeDate, MakeTimestamp and MakeInterval. ### Why are the changes needed? For ANSI mode. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UT and Existing UT. Closes #30516 from waitinfuture/SPARK-33498. Lead-authored-by: zky.zhoukeyong Co-authored-by: waitinfuture Signed-off-by: Wenchen Fan --- .../expressions/datetimeExpressions.scala | 27 ++-- .../expressions/intervalExpressions.scala | 23 +++- .../expressions/DateExpressionsSuite.scala | 118 ++++++++++++------ .../IntervalExpressionsSuite.scala | 60 +++++++++ .../sql-tests/results/postgreSQL/date.sql.out | 15 ++- 5 files changed, 187 insertions(+), 56 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 1ff5833fb4dd6..bbf1e4657f351 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1789,31 +1789,36 @@ private case class GetTimestamp( """, group = "datetime_funcs", since = "3.0.0") -case class MakeDate(year: Expression, month: Expression, day: Expression) +case class MakeDate(year: Expression, month: Expression, day: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { + def this(year: Expression, month: Expression, day: Expression) = + this(year, month, day, SQLConf.get.ansiEnabled) + override def children: Seq[Expression] = Seq(year, month, day) override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType) override def dataType: DataType = DateType - override def nullable: Boolean = true + override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true override def nullSafeEval(year: Any, month: Any, day: Any): Any = { try { val ld = LocalDate.of(year.asInstanceOf[Int], month.asInstanceOf[Int], day.asInstanceOf[Int]) localDateToDays(ld) } catch { - case _: java.time.DateTimeException => null + case _: java.time.DateTimeException if !failOnError => null } } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + val failOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;" nullSafeCodeGen(ctx, ev, (year, month, day) => { s""" try { ${ev.value} = $dtu.localDateToDays(java.time.LocalDate.of($year, $month, $day)); } catch (java.time.DateTimeException e) { - ${ev.isNull} = true; + $failOnErrorBranch }""" }) } @@ -1860,7 +1865,8 @@ case class MakeTimestamp( min: Expression, sec: Expression, timezone: Option[Expression] = None, - timeZoneId: Option[String] = None) + timeZoneId: Option[String] = None, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends SeptenaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1871,7 +1877,7 @@ case class MakeTimestamp( hour: Expression, min: Expression, sec: Expression) = { - this(year, month, day, hour, min, sec, None, None) + this(year, month, day, hour, min, sec, None, None, SQLConf.get.ansiEnabled) } def this( @@ -1882,7 +1888,7 @@ case class MakeTimestamp( min: Expression, sec: Expression, timezone: Expression) = { - this(year, month, day, hour, min, sec, Some(timezone), None) + this(year, month, day, hour, min, sec, Some(timezone), None, SQLConf.get.ansiEnabled) } override def children: Seq[Expression] = Seq(year, month, day, hour, min, sec) ++ timezone @@ -1892,7 +1898,7 @@ case class MakeTimestamp( Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(8, 6)) ++ timezone.map(_ => StringType) override def dataType: DataType = TimestampType - override def nullable: Boolean = true + override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) @@ -1926,7 +1932,7 @@ case class MakeTimestamp( } instantToMicros(ldt.atZone(zoneId).toInstant) } catch { - case _: DateTimeException => null + case _: DateTimeException if !failOnError => null } } @@ -1955,6 +1961,7 @@ case class MakeTimestamp( val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) val d = Decimal.getClass.getName.stripSuffix("$") + val failOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;" nullSafeCodeGen(ctx, ev, (year, month, day, hour, min, secAndNanos, timezone) => { val zoneId = timezone.map(tz => s"$dtu.getZoneId(${tz}.toString())").getOrElse(zid) s""" @@ -1978,7 +1985,7 @@ case class MakeTimestamp( java.time.Instant instant = ldt.atZone($zoneId).toInstant(); ${ev.value} = $dtu.instantToMicros(instant); } catch (java.time.DateTimeException e) { - ${ev.isNull} = true; + $failOnErrorBranch }""" }) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index 8b92c619df626..6219457bba994 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -161,9 +161,20 @@ case class MakeInterval( days: Expression, hours: Expression, mins: Expression, - secs: Expression) + secs: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends SeptenaryExpression with ImplicitCastInputTypes with NullIntolerant { + def this( + years: Expression, + months: Expression, + weeks: Expression, + days: Expression, + hours: Expression, + mins: Expression, + sec: Expression) = { + this(years, months, weeks, days, hours, mins, sec, SQLConf.get.ansiEnabled) + } def this( years: Expression, months: Expression, @@ -171,7 +182,8 @@ case class MakeInterval( days: Expression, hours: Expression, mins: Expression) = { - this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6))) + this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)), + SQLConf.get.ansiEnabled) } def this( years: Expression, @@ -195,7 +207,7 @@ case class MakeInterval( override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6)) override def dataType: DataType = CalendarIntervalType - override def nullable: Boolean = true + override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true override def nullSafeEval( year: Any, @@ -215,7 +227,7 @@ case class MakeInterval( min.asInstanceOf[Int], sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, Decimal.MAX_LONG_DIGITS, 6))) } catch { - case _: ArithmeticException => null + case _: ArithmeticException if !failOnError => null } } @@ -223,11 +235,12 @@ case class MakeInterval( nullSafeCodeGen(ctx, ev, (year, month, week, day, hour, min, sec) => { val iu = IntervalUtils.getClass.getName.stripSuffix("$") val secFrac = sec.getOrElse("0") + val faileOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;" s""" try { ${ev.value} = $iu.makeInterval($year, $month, $week, $day, $hour, $min, $secFrac); } catch (java.lang.ArithmeticException e) { - ${ev.isNull} = true; + $faileOnErrorBranch } """ }) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index a3ffc1129fd5e..587ca0cdbed6e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.{ParseException, SimpleDateFormat} -import java.time.{Instant, LocalDate, ZoneId} +import java.time.{DateTimeException, Instant, LocalDate, ZoneId} import java.time.format.DateTimeParseException import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit._ @@ -1014,49 +1014,97 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("creating values of DateType via make_date") { - checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15")) - checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null) - checkEvaluation(MakeDate(Literal(2019), Literal.create(null, IntegerType), Literal(19)), null) - checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal.create(null, IntegerType)), null) - checkEvaluation(MakeDate(Literal(Int.MaxValue), Literal(13), Literal(19)), null) - checkEvaluation(MakeDate(Literal(2019), Literal(13), Literal(19)), null) - checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null) + Seq(true, false).foreach({ ansi => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) { + checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15")) + checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null) + checkEvaluation(MakeDate(Literal(2019), Literal.create(null, IntegerType), Literal(19)), + null) + checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal.create(null, IntegerType)), + null) + } + }) + + // ansi test + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + checkExceptionInExpression[DateTimeException](MakeDate(Literal(Int.MaxValue), Literal(13), + Literal(19)), EmptyRow, "Invalid value for Year") + checkExceptionInExpression[DateTimeException](MakeDate(Literal(2019), + Literal(13), Literal(19)), EmptyRow, "Invalid value for Month") + checkExceptionInExpression[DateTimeException](MakeDate(Literal(2019), Literal(7), + Literal(32)), EmptyRow, "Invalid value for Day") + } + + // non-ansi test + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + checkEvaluation(MakeDate(Literal(Int.MaxValue), Literal(13), Literal(19)), null) + checkEvaluation(MakeDate(Literal(2019), Literal(13), Literal(19)), null) + checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null) + } } test("creating values of TimestampType via make_timestamp") { - var makeTimestampExpr = MakeTimestamp( - Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15), - Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId))) val expected = Timestamp.valueOf("2013-7-15 8:15:23.5") - checkEvaluation(makeTimestampExpr, expected) - checkEvaluation(makeTimestampExpr.copy(timezone = None), expected) - - checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(year = Literal(Int.MaxValue)), null) - - checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(month = Literal(13)), null) - - checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(day = Literal(32)), null) - checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(hour = Literal(25)), null) + Seq(true, false).foreach { ansi => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) { + var makeTimestampExpr = MakeTimestamp( + Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15), + Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId))) + checkEvaluation(makeTimestampExpr, expected) + checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null) + checkEvaluation(makeTimestampExpr.copy(timezone = None), expected) + + Seq( + (makeTimestampExpr.copy(year = Literal(Int.MaxValue)), "Invalid value for Year"), + (makeTimestampExpr.copy(month = Literal(13)), "Invalid value for Month"), + (makeTimestampExpr.copy(day = Literal(32)), "Invalid value for Day"), + (makeTimestampExpr.copy(hour = Literal(25)), "Invalid value for Hour"), + (makeTimestampExpr.copy(min = Literal(65)), "Invalid value for Min"), + (makeTimestampExpr.copy(sec = Literal(Decimal( + BigDecimal(70.0), 8, 6))), "Invalid value for Second") + ).foreach { entry => + if (ansi) { + checkExceptionInExpression[DateTimeException](entry._1, EmptyRow, entry._2) + } else { + checkEvaluation(entry._1, null) + } + } - checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(min = Literal(65)), null) + makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30), + Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6))) + if (ansi) { + checkExceptionInExpression[DateTimeException](makeTimestampExpr.copy(sec = Literal( + Decimal(BigDecimal(60.5), 8, 6))), EmptyRow, "The fraction of sec must be zero") + } else { + checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00")) + } - checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null) - checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(70.0), 8, 6))), null) + makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), Literal(0), + Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6))) + checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001")) + } + } - makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30), - Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6))) - checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00")) - checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null) + // non-ansi test + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30), + Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6))) + checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null) + } - makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), - Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6))) - checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001")) + Seq(true, false).foreach { ansi => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) { + val makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), + Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6))) + checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001")) + } + } } test("ISO 8601 week-numbering year") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala index 6b7be4f1609a5..5c73a91de4f79 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala @@ -214,4 +214,64 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { millis = Int.MaxValue, micros = Int.MaxValue) } + + test("ANSI mode: make interval") { + def check( + years: Int = 0, + months: Int = 0, + weeks: Int = 0, + days: Int = 0, + hours: Int = 0, + minutes: Int = 0, + seconds: Int = 0, + millis: Int = 0, + micros: Int = 0): Unit = { + val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros) + val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks), + Literal(days), Literal(hours), Literal(minutes), + Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6))) + val totalMonths = years * MONTHS_PER_YEAR + months + val totalDays = weeks * DAYS_PER_WEEK + days + val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR + val expected = new CalendarInterval(totalMonths, totalDays, totalMicros) + checkEvaluation(intervalExpr, expected) + } + + def checkException( + years: Int = 0, + months: Int = 0, + weeks: Int = 0, + days: Int = 0, + hours: Int = 0, + minutes: Int = 0, + seconds: Int = 0, + millis: Int = 0, + micros: Int = 0): Unit = { + val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros) + val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks), + Literal(days), Literal(hours), Literal(minutes), + Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6))) + checkExceptionInExpression[ArithmeticException](intervalExpr, EmptyRow, "") + } + + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + check(months = 0, days = 0, micros = 0) + check(years = -123) + check(weeks = 123) + check(millis = -123) + check(9999, 11, 0, 31, 23, 59, 59, 999, 999) + check(years = 10000, micros = -1) + check(-9999, -11, 0, -31, -23, -59, -59, -999, -999) + check(years = -10000, micros = 1) + check( + hours = Int.MaxValue, + minutes = Int.MaxValue, + seconds = Int.MaxValue, + millis = Int.MaxValue, + micros = Int.MaxValue) + + checkException(years = Int.MaxValue) + checkException(weeks = Int.MaxValue) + } + } } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index 151fa1e28d725..a959284750483 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -590,25 +590,28 @@ struct -- !query select make_date(2013, 2, 30) -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Invalid date 'FEBRUARY 30' -- !query select make_date(2013, 13, 1) -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Invalid value for MonthOfYear (valid values 1 - 12): 13 -- !query select make_date(2013, 11, -1) -- !query schema -struct +struct<> -- !query output -NULL +java.time.DateTimeException +Invalid value for DayOfMonth (valid values 1 - 28/31): -1 -- !query From e5bb2937f6682239e83605b65214dfca3bdd50e5 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Tue, 1 Dec 2020 20:34:00 +0900 Subject: [PATCH 0623/1009] [SPARK-32032][SS] Avoid infinite wait in driver because of KafkaConsumer.poll(long) API ### What changes were proposed in this pull request? Deprecated `KafkaConsumer.poll(long)` API calls may cause infinite wait in the driver. In this PR I've added a new `AdminClient` based offset fetching which is turned off by default. There is a new flag named `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`) which can be set to `false` to reach the newly added functionality. The Structured Streaming migration guide contains more information what migration consideration must be done. Please see the following [doc](https://docs.google.com/document/d/1gAh0pKgZUgyqO2Re3sAy-fdYpe_SxpJ6DkeXE8R1P7E/edit?usp=sharing) for further details. The PR contains the following changes: * Added `AdminClient` based offset fetching * GroupId prefix feature removed from driver but only in `AdminClient` based approach (`AdminClient` doesn't need any GroupId) * GroupId override feature removed from driver but only in `AdminClient` based approach (`AdminClient` doesn't need any GroupId) * Additional unit tests * Code comment changes * Minor bugfixes here and there * Removed Kafka auto topic creation feature but only in `AdminClient` based approach (please see doc for rationale). In short, it's super hidden, not sure anybody ever used in production + error prone. * Added documentation to `ss-migration-guide` and `structured-streaming-kafka-integration` ### Why are the changes needed? Driver may hang forever. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing + additional unit tests. Cluster test with simple Kafka topic to another topic query. Documentation: ``` cd docs/ SKIP_API=1 jekyll build ``` Manual webpage check. Closes #29729 from gaborgsomogyi/SPARK-32032. Authored-by: Gabor Somogyi Signed-off-by: Jungtaek Lim (HeartSaVioR) --- docs/ss-migration-guide.md | 5 + .../structured-streaming-kafka-integration.md | 20 + .../spark/sql/kafka010/ConsumerStrategy.scala | 65 +- .../spark/sql/kafka010/KafkaBatch.scala | 2 +- .../sql/kafka010/KafkaOffsetReader.scala | 601 +---------------- .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 573 ++++++++++++++++ .../kafka010/KafkaOffsetReaderConsumer.scala | 614 ++++++++++++++++++ .../spark/sql/kafka010/KafkaRelation.scala | 2 +- .../sql/kafka010/KafkaSourceProvider.scala | 6 +- .../sql/kafka010/ConsumerStrategySuite.scala | 147 +++++ .../kafka010/KafkaMicroBatchSourceSuite.scala | 42 +- .../sql/kafka010/KafkaOffsetReaderSuite.scala | 95 ++- .../sql/kafka010/KafkaRelationSuite.scala | 47 +- .../apache/spark/sql/internal/SQLConf.scala | 13 + 14 files changed, 1587 insertions(+), 645 deletions(-) create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md index d52b2e095fc76..480e5e2695a16 100644 --- a/docs/ss-migration-guide.md +++ b/docs/ss-migration-guide.md @@ -30,6 +30,11 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide. - In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false. +- In Spark 3.0 and before Spark uses `KafkaConsumer` for offset fetching which could cause infinite wait in the driver. + In Spark 3.1 a new configuration option added `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`) + which could be set to `false` allowing Spark to use new offset fetching mechanism using `AdminClient`. + For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching). + ## Upgrading from Structured Streaming 2.4 to 3.0 - In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`. diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md index 0e4d167b58d6b..f92dd039d53b7 100644 --- a/docs/structured-streaming-kafka-integration.md +++ b/docs/structured-streaming-kafka-integration.md @@ -512,6 +512,26 @@ The following configurations are optional: +### Offset fetching + +In Spark 3.0 and before Spark uses KafkaConsumer for offset fetching which could cause infinite wait in the driver. +In Spark 3.1 a new configuration option added spark.sql.streaming.kafka.useDeprecatedOffsetFetching (default: true) +which could be set to `false` allowing Spark to use new offset fetching mechanism using AdminClient. +When the new mechanism used the following applies. + +First of all the new approach supports Kafka brokers `0.11.0.0+`. + +In Spark 3.0 and below, secure Kafka processing needed the following ACLs from driver perspective: +* Topic resource describe operation +* Topic resource read operation +* Group resource read operation + +Since Spark 3.1, offsets can be obtained with AdminClient instead of KafkaConsumer and for that the following ACLs needed from driver perspective: +* Topic resource describe operation + +Since AdminClient in driver is not connecting to consumer group, group.id based authorization will not work anymore (executors never done group based authorization). +Worth to mention executor side is behaving the exact same way like before (group prefix and override works). + ### Consumer Caching It's time-consuming to initialize Kafka consumers, especially in streaming scenarios where processing time is a key factor. diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala index 7bb829c282eba..a0331d7889e04 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala @@ -20,12 +20,15 @@ package org.apache.spark.sql.kafka010 import java.{util => ju} import scala.collection.JavaConverters._ +import scala.collection.mutable +import org.apache.kafka.clients.admin.Admin import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer} import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener import org.apache.kafka.common.TopicPartition -import org.apache.spark.kafka010.KafkaConfigUpdater +import org.apache.spark.internal.Logging +import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaRedactionUtil} /** * Subscribe allows you to subscribe to a fixed collection of topics. @@ -36,10 +39,20 @@ import org.apache.spark.kafka010.KafkaConfigUpdater * All three strategies have overloaded constructors that allow you to specify * the starting offset for a particular partition. */ -private[kafka010] sealed trait ConsumerStrategy { +private[kafka010] sealed trait ConsumerStrategy extends Logging { /** Create a [[KafkaConsumer]] and subscribe to topics according to a desired strategy */ def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] + /** Creates an [[org.apache.kafka.clients.admin.AdminClient]] */ + def createAdmin(kafkaParams: ju.Map[String, Object]): Admin = { + val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams) + logDebug(s"Admin params: ${KafkaRedactionUtil.redactParams(updatedKafkaParams.asScala.toSeq)}") + Admin.create(updatedKafkaParams) + } + + /** Returns the assigned or subscribed [[TopicPartition]] */ + def assignedTopicPartitions(admin: Admin): Set[TopicPartition] + /** * Updates the parameters with security if needed. * Added a function to hide internals and reduce code duplications because all strategy uses it. @@ -48,13 +61,24 @@ private[kafka010] sealed trait ConsumerStrategy { KafkaConfigUpdater("source", kafkaParams.asScala.toMap) .setAuthenticationConfigIfNeeded() .build() + + protected def retrieveAllPartitions(admin: Admin, topics: Set[String]): Set[TopicPartition] = { + admin.describeTopics(topics.asJava).all().get().asScala.filterNot(_._2.isInternal).flatMap { + case (topic, topicDescription) => + topicDescription.partitions().asScala.map { topicPartitionInfo => + val partition = topicPartitionInfo.partition() + logDebug(s"Partition found: $topic:$partition") + new TopicPartition(topic, partition) + } + }.toSet + } } /** * Specify a fixed collection of partitions. */ private[kafka010] case class AssignStrategy(partitions: Array[TopicPartition]) - extends ConsumerStrategy { + extends ConsumerStrategy with Logging { override def createConsumer( kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = { val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams) @@ -63,13 +87,20 @@ private[kafka010] case class AssignStrategy(partitions: Array[TopicPartition]) consumer } + override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = { + val topics = partitions.map(_.topic()).toSet + logDebug(s"Topics for assignment: $topics") + retrieveAllPartitions(admin, topics).filter(partitions.contains(_)) + } + override def toString: String = s"Assign[${partitions.mkString(", ")}]" } /** * Subscribe to a fixed collection of topics. */ -private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy { +private[kafka010] case class SubscribeStrategy(topics: Seq[String]) + extends ConsumerStrategy with Logging { override def createConsumer( kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = { val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams) @@ -78,6 +109,10 @@ private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends Cons consumer } + override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = { + retrieveAllPartitions(admin, topics.toSet) + } + override def toString: String = s"Subscribe[${topics.mkString(", ")}]" } @@ -85,16 +120,30 @@ private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends Cons * Use a regex to specify topics of interest. */ private[kafka010] case class SubscribePatternStrategy(topicPattern: String) - extends ConsumerStrategy { + extends ConsumerStrategy with Logging { + private val topicRegex = topicPattern.r + override def createConsumer( kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = { val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams) val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams) - consumer.subscribe( - ju.regex.Pattern.compile(topicPattern), - new NoOpConsumerRebalanceListener()) + consumer.subscribe(ju.regex.Pattern.compile(topicPattern), new NoOpConsumerRebalanceListener()) consumer } + override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = { + logDebug(s"Topic pattern: $topicPattern") + var topics = mutable.Seq.empty[String] + // listTopics is not listing internal topics by default so no filter needed + admin.listTopics().listings().get().asScala.foreach { topicListing => + val name = topicListing.name() + if (topicRegex.findFirstIn(name).isDefined) { + logDebug(s"Topic matches pattern: $name") + topics :+= name + } + } + retrieveAllPartitions(admin, topics.toSet) + } + override def toString: String = s"SubscribePattern[$topicPattern]" } diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala index a1b0f7d22216b..268719d6aed2c 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala @@ -48,7 +48,7 @@ private[kafka010] class KafkaBatch( // id. Hence, we should generate a unique id for each query. val uniqueGroupId = KafkaSourceProvider.batchUniqueGroupId(sourceOptions) - val kafkaOffsetReader = new KafkaOffsetReader( + val kafkaOffsetReader = KafkaOffsetReader.build( strategy, KafkaSourceProvider.kafkaParamsForDriver(specifiedKafkaParams), sourceOptions, diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala index adcc20c25cb5f..b1992c1dc6a0a 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala @@ -19,595 +19,62 @@ package org.apache.spark.sql.kafka010 import java.{util => ju} -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer -import scala.util.control.NonFatal - -import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp} import org.apache.kafka.common.TopicPartition -import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging -import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner} +import org.apache.spark.sql.internal.SQLConf /** - * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to - * read data offsets from Kafka. - * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read - * by this source. These strategies directly correspond to the different consumption options - * in. This class is designed to return a configured - * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the - * [[KafkaSource]] to query for the offsets. See the docs on - * [[org.apache.spark.sql.kafka010.ConsumerStrategy]] - * for more details. - * - * Note: This class is not ThreadSafe + * Base trait to fetch offsets from Kafka. The implementations are + * [[KafkaOffsetReaderConsumer]] and [[KafkaOffsetReaderAdmin]]. + * Please see the documentation and API description there. */ -private[kafka010] class KafkaOffsetReader( - consumerStrategy: ConsumerStrategy, - val driverKafkaParams: ju.Map[String, Object], - readerOptions: CaseInsensitiveMap[String], - driverGroupIdPrefix: String) extends Logging { - - /** - * [[UninterruptibleThreadRunner]] ensures that all - * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an - * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an - * [[UninterruptibleThread]], however for batch mode this is not the case. - */ - val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader") - - /** - * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer is - * created -- see SPARK-19564. - */ - private var groupId: String = null - private var nextId = 0 +private[kafka010] trait KafkaOffsetReader { - /** - * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the - * offsets and never commits them. - */ - @volatile protected var _consumer: Consumer[Array[Byte], Array[Byte]] = null - - protected def consumer: Consumer[Array[Byte], Array[Byte]] = synchronized { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) - if (_consumer == null) { - val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams) - if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) { - newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId()) - } - _consumer = consumerStrategy.createConsumer(newKafkaParams) - } - _consumer - } + // These are needed here because of KafkaSourceProviderSuite + private[kafka010] val maxOffsetFetchAttempts: Int + private[kafka010] val offsetFetchAttemptIntervalMs: Long - private[kafka010] val maxOffsetFetchAttempts = - readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt + // This is needed here because of KafkaContinuousStream + val driverKafkaParams: ju.Map[String, Object] - /** - * Number of partitions to read from Kafka. If this value is greater than the number of Kafka - * topicPartitions, we will split up the read tasks of the skewed partitions to multiple Spark - * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or - * more depending on rounding errors or Kafka partitions that didn't receive any new data. - */ - private val minPartitions = - readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt) - - private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions) - - private[kafka010] val offsetFetchAttemptIntervalMs = - readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong - - /** - * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks. - */ - private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = { - minPartitions.map(_ > numTopicPartitions).getOrElse(false) - } - - private def nextGroupId(): String = { - groupId = driverGroupIdPrefix + "-" + nextId - nextId += 1 - groupId - } - - override def toString(): String = consumerStrategy.toString - - /** - * Closes the connection to Kafka, and cleans up state. - */ - def close(): Unit = { - if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() } - uninterruptibleThreadRunner.shutdown() - } - - /** - * @return The Set of TopicPartitions for a given topic - */ - def fetchTopicPartitions(): Set[TopicPartition] = uninterruptibleThreadRunner.runUninterruptibly { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) - // Poll to get the latest assigned partitions - consumer.poll(0) - val partitions = consumer.assignment() - consumer.pause(partitions) - partitions.asScala.toSet - } - - /** - * Fetch the partition offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. - */ + def close(): Unit def fetchPartitionOffsets( offsetRangeLimit: KafkaOffsetRangeLimit, - isStartingOffsets: Boolean): Map[TopicPartition, Long] = { - def validateTopicPartitions(partitions: Set[TopicPartition], - partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = { - assert(partitions == partitionOffsets.keySet, - "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + - "Use -1 for latest, -2 for earliest.\n" + - s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}") - logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets") - partitionOffsets - } - val partitions = fetchTopicPartitions() - // Obtain TopicPartition offsets with late binding support - offsetRangeLimit match { - case EarliestOffsetRangeLimit => partitions.map { - case tp => tp -> KafkaOffsetRangeLimit.EARLIEST - }.toMap - case LatestOffsetRangeLimit => partitions.map { - case tp => tp -> KafkaOffsetRangeLimit.LATEST - }.toMap - case SpecificOffsetRangeLimit(partitionOffsets) => - validateTopicPartitions(partitions, partitionOffsets) - case SpecificTimestampRangeLimit(partitionTimestamps) => - fetchSpecificTimestampBasedOffsets(partitionTimestamps, - failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets - } - } - - /** - * Resolves the specific offsets based on Kafka seek positions. - * This method resolves offset value -1 to the latest and -2 to the - * earliest Kafka seek position. - * - * @param partitionOffsets the specific offsets to resolve - * @param reportDataLoss callback to either report or log data loss depending on setting - */ + isStartingOffsets: Boolean): Map[TopicPartition, Long] def fetchSpecificOffsets( partitionOffsets: Map[TopicPartition, Long], - reportDataLoss: String => Unit): KafkaSourceOffset = { - val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => - assert(partitions.asScala == partitionOffsets.keySet, - "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + - "Use -1 for latest, -2 for earliest, if you don't care.\n" + - s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}") - logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets") - } - - val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => - partitionOffsets - } - - val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched => - partitionOffsets.foreach { - case (tp, off) if off != KafkaOffsetRangeLimit.LATEST && - off != KafkaOffsetRangeLimit.EARLIEST => - if (fetched(tp) != off) { - reportDataLoss( - s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}") - } - case _ => - // no real way to check that beginning or end is reasonable - } - } - - fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets, - fnAssertFetchedOffsets) - } - + reportDataLoss: String => Unit): KafkaSourceOffset def fetchSpecificTimestampBasedOffsets( partitionTimestamps: Map[TopicPartition, Long], - failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = { - val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => - assert(partitions.asScala == partitionTimestamps.keySet, - "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " + - s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}") - logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps") - } - - val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => { - val converted = partitionTimestamps.map { case (tp, timestamp) => - tp -> java.lang.Long.valueOf(timestamp) - }.asJava - - val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] = - consumer.offsetsForTimes(converted) - - offsetForTime.asScala.map { case (tp, offsetAndTimestamp) => - if (failsOnNoMatchingOffset) { - assert(offsetAndTimestamp != null, "No offset matched from request of " + - s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.") - } - - if (offsetAndTimestamp == null) { - tp -> KafkaOffsetRangeLimit.LATEST - } else { - tp -> offsetAndTimestamp.offset() - } - }.toMap - } - } - - val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => } - - fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets, - fnAssertFetchedOffsets) - } - - private def fetchSpecificOffsets0( - fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit, - fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long], - fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = { - val fetched = partitionsAssignedToConsumer { - partitions => { - fnAssertParametersWithPartitions(partitions) - - val partitionOffsets = fnRetrievePartitionOffsets(partitions) - - partitionOffsets.foreach { - case (tp, KafkaOffsetRangeLimit.LATEST) => - consumer.seekToEnd(ju.Arrays.asList(tp)) - case (tp, KafkaOffsetRangeLimit.EARLIEST) => - consumer.seekToBeginning(ju.Arrays.asList(tp)) - case (tp, off) => consumer.seek(tp, off) - } - - partitionOffsets.map { - case (tp, _) => tp -> consumer.position(tp) - } - } - } - - fnAssertFetchedOffsets(fetched) - - KafkaSourceOffset(fetched) - } - - /** - * Fetch the earliest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - */ - def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer( - partitions => { - logDebug("Seeking to the beginning") - - consumer.seekToBeginning(partitions) - val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap - logDebug(s"Got earliest offsets for partition : $partitionOffsets") - partitionOffsets - }, fetchingEarliestOffset = true) - - /** - * Fetch the latest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - * - * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called - * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after - * `poll` to wait until the potential offset request triggered by `poll(0)` is done. - * - * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the - * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less - * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When - * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot - * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying. - */ - def fetchLatestOffsets( - knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = - partitionsAssignedToConsumer { partitions => { - logDebug("Seeking to the end.") - - if (knownOffsets.isEmpty) { - consumer.seekToEnd(partitions) - partitions.asScala.map(p => p -> consumer.position(p)).toMap - } else { - var partitionOffsets: PartitionOffsetMap = Map.empty - - /** - * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect - * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`). - */ - def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = { - var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]() - partitionOffsets.foreach { case (tp, offset) => - knownOffsets.foreach(_.get(tp).foreach { knownOffset => - if (knownOffset > offset) { - val incorrectOffset = (tp, knownOffset, offset) - incorrectOffsets += incorrectOffset - } - }) - } - incorrectOffsets.toSeq - } - - // Retry to fetch latest offsets when detecting incorrect offsets. We don't use - // `withRetriesWithoutInterrupt` to retry because: - // - // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh - // consumer has a much bigger chance to hit KAFKA-7703. - // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703. - var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil - var attempt = 0 - do { - consumer.seekToEnd(partitions) - partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap - attempt += 1 - - incorrectOffsets = findIncorrectOffsets() - if (incorrectOffsets.nonEmpty) { - logWarning("Found incorrect offsets in some partitions " + - s"(partition, previous offset, fetched offset): $incorrectOffsets") - if (attempt < maxOffsetFetchAttempts) { - logWarning("Retrying to fetch latest offsets because of incorrect offsets") - Thread.sleep(offsetFetchAttemptIntervalMs) - } - } - } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts) - - logDebug(s"Got latest offsets for partition : $partitionOffsets") - partitionOffsets - } - } - } - - /** - * Fetch the earliest offsets for specific topic partitions. - * The return result may not contain some partitions if they are deleted. - */ - def fetchEarliestOffsets( - newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = { - if (newPartitions.isEmpty) { - Map.empty[TopicPartition, Long] - } else { - partitionsAssignedToConsumer(partitions => { - // Get the earliest offset of each partition - consumer.seekToBeginning(partitions) - val partitionOffsets = newPartitions.filter { p => - // When deleting topics happen at the same time, some partitions may not be in - // `partitions`. So we need to ignore them - partitions.contains(p) - }.map(p => p -> consumer.position(p)).toMap - logDebug(s"Got earliest offsets for new partitions: $partitionOffsets") - partitionOffsets - }, fetchingEarliestOffset = true) - } - } - - /** - * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may - * split partitions to respect it. Since offsets can be early and late binding which are evaluated - * on the executors, in order to divvy up the partitions we need to perform some substitutions. We - * don't want to send exact offsets to the executors, because data may age out before we can - * consume the data. This method makes some approximate splitting, and replaces the special offset - * values in the final output. - */ + failsOnNoMatchingOffset: Boolean): KafkaSourceOffset + def fetchEarliestOffsets(): Map[TopicPartition, Long] + def fetchLatestOffsets(knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap + def fetchEarliestOffsets(newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] def getOffsetRangesFromUnresolvedOffsets( startingOffsets: KafkaOffsetRangeLimit, - endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = { - val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true) - val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false) - - // Obtain topicPartitions in both from and until partition offset, ignoring - // topic partitions that were added and/or deleted between the two above calls. - if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) { - implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic()) - val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",") - val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",") - throw new IllegalStateException("different topic partitions " + - s"for starting offsets topics[${fromTopics}] and " + - s"ending offsets topics[${untilTopics}]") - } - - // Calculate offset ranges - val offsetRangesBase = untilPartitionOffsets.keySet.map { tp => - val fromOffset = fromPartitionOffsets.get(tp).getOrElse { - // This should not happen since topicPartitions contains all partitions not in - // fromPartitionOffsets - throw new IllegalStateException(s"$tp doesn't have a from offset") - } - val untilOffset = untilPartitionOffsets(tp) - KafkaOffsetRange(tp, fromOffset, untilOffset, None) - }.toSeq - - if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) { - val fromOffsetsMap = - offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap - val untilOffsetsMap = - offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap - - // No need to report data loss here - val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets - val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets - val ranges = offsetRangesBase.map(_.topicPartition).map { tp => - KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None) - } - val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition) - divvied.flatMap { case (tp, splitOffsetRanges) => - if (splitOffsetRanges.length == 1) { - Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None)) - } else { - // the list can't be empty - val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp)) - val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp)) - Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end - } - }.toArray.toSeq - } else { - offsetRangesBase - } - } - - private def getSortedExecutorList(): Array[String] = { - def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = { - if (a.host == b.host) { - a.executorId > b.executorId - } else { - a.host > b.host - } - } - - val bm = SparkEnv.get.blockManager - bm.master.getPeers(bm.blockManagerId).toArray - .map(x => ExecutorCacheTaskLocation(x.host, x.executorId)) - .sortWith(compare) - .map(_.toString) - } - - /** - * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method - * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will - * be called. - */ + endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] def getOffsetRangesFromResolvedOffsets( fromPartitionOffsets: PartitionOffsetMap, untilPartitionOffsets: PartitionOffsetMap, - reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = { - // Find the new partitions, and get their earliest offsets - val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet) - val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq) - if (newPartitionInitialOffsets.keySet != newPartitions) { - // We cannot get from offsets for some partitions. It means they got deleted. - val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet) - reportDataLoss( - s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed") - } - logInfo(s"Partitions added: $newPartitionInitialOffsets") - newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) => - reportDataLoss( - s"Added partition $p starts from $o instead of 0. Some data may have been missed") - } - - val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet) - if (deletedPartitions.nonEmpty) { - val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { - s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}" - } else { - s"$deletedPartitions are gone. Some data may have been missed." - } - reportDataLoss(message) - } - - // Use the until partitions to calculate offset ranges to ignore partitions that have - // been deleted - val topicPartitions = untilPartitionOffsets.keySet.filter { tp => - // Ignore partitions that we don't know the from offsets. - newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp) - }.toSeq - logDebug("TopicPartitions: " + topicPartitions.mkString(", ")) - - val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets - val untilOffsets = untilPartitionOffsets - val ranges = topicPartitions.map { tp => - val fromOffset = fromOffsets(tp) - val untilOffset = untilOffsets(tp) - if (untilOffset < fromOffset) { - reportDataLoss(s"Partition $tp's offset was changed from " + - s"$fromOffset to $untilOffset, some data may have been missed") - } - KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None) - } - rangeCalculator.getRanges(ranges, getSortedExecutorList) - } - - private def partitionsAssignedToConsumer( - body: ju.Set[TopicPartition] => Map[TopicPartition, Long], - fetchingEarliestOffset: Boolean = false) - : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly { - - withRetriesWithoutInterrupt { - // Poll to get the latest assigned partitions - consumer.poll(0) - val partitions = consumer.assignment() - - if (!fetchingEarliestOffset) { - // Call `position` to wait until the potential offset request triggered by `poll(0)` is - // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by - // `poll(0)` may reset offsets that should have been set by another request. - partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {}) - } - - consumer.pause(partitions) - logDebug(s"Partitions assigned to consumer: $partitions.") - body(partitions) - } - } - - /** - * Helper function that does multiple retries on a body of code that returns offsets. - * Retries are needed to handle transient failures. For e.g. race conditions between getting - * assignment and getting position while topics/partitions are deleted can cause NPEs. - * - * This method also makes sure `body` won't be interrupted to workaround a potential issue in - * `KafkaConsumer.poll`. (KAFKA-1894) - */ - private def withRetriesWithoutInterrupt( - body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = { - // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894) - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + reportDataLoss: String => Unit): Seq[KafkaOffsetRange] +} - synchronized { - var result: Option[Map[TopicPartition, Long]] = None - var attempt = 1 - var lastException: Throwable = null - while (result.isEmpty && attempt <= maxOffsetFetchAttempts - && !Thread.currentThread().isInterrupted) { - Thread.currentThread match { - case ut: UninterruptibleThread => - // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query - // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it. - // - // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may - // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the - // issue. - ut.runUninterruptibly { - try { - result = Some(body) - } catch { - case NonFatal(e) => - lastException = e - logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e) - attempt += 1 - Thread.sleep(offsetFetchAttemptIntervalMs) - resetConsumer() - } - } - case _ => - throw new IllegalStateException( - "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread") - } - } - if (Thread.interrupted()) { - throw new InterruptedException() - } - if (result.isEmpty) { - assert(attempt > maxOffsetFetchAttempts) - assert(lastException != null) - throw lastException - } - result.get +private[kafka010] object KafkaOffsetReader extends Logging { + def build( + consumerStrategy: ConsumerStrategy, + driverKafkaParams: ju.Map[String, Object], + readerOptions: CaseInsensitiveMap[String], + driverGroupIdPrefix: String): KafkaOffsetReader = { + if (SQLConf.get.useDeprecatedKafkaOffsetFetching) { + logDebug("Creating old and deprecated Consumer based offset reader") + new KafkaOffsetReaderConsumer(consumerStrategy, driverKafkaParams, readerOptions, + driverGroupIdPrefix) + } else { + logDebug("Creating new Admin based offset reader") + new KafkaOffsetReaderAdmin(consumerStrategy, driverKafkaParams, readerOptions, + driverGroupIdPrefix) } } - - private def stopConsumer(): Unit = synchronized { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) - if (_consumer != null) _consumer.close() - } - - private def resetConsumer(): Unit = synchronized { - stopConsumer() - _consumer = null // will automatically get reinitialized again - } } diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala new file mode 100644 index 0000000000000..d5905795c626b --- /dev/null +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala @@ -0,0 +1,573 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.kafka010 + +import java.{util => ju} +import java.util.Locale + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.kafka.clients.admin.{Admin, ListOffsetsOptions, OffsetSpec} +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.common.{IsolationLevel, TopicPartition} +import org.apache.kafka.common.requests.OffsetFetchResponse + +import org.apache.spark.SparkEnv +import org.apache.spark.internal.Logging +import org.apache.spark.scheduler.ExecutorCacheTaskLocation +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner} + +/** + * This class uses Kafka's own [[Admin]] API to read data offsets from Kafka. + * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read + * by this source. These strategies directly correspond to the different consumption options + * in. This class is designed to return a configured [[Admin]] that is used by the + * [[KafkaSource]] to query for the offsets. See the docs on + * [[org.apache.spark.sql.kafka010.ConsumerStrategy]] + * for more details. + * + * Note: This class is not ThreadSafe + */ +private[kafka010] class KafkaOffsetReaderAdmin( + consumerStrategy: ConsumerStrategy, + override val driverKafkaParams: ju.Map[String, Object], + readerOptions: CaseInsensitiveMap[String], + driverGroupIdPrefix: String) extends KafkaOffsetReader with Logging { + + private[kafka010] val maxOffsetFetchAttempts = + readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt + + private[kafka010] val offsetFetchAttemptIntervalMs = + readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong + + /** + * [[UninterruptibleThreadRunner]] ensures that all [[Admin]] communication called in an + * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an + * [[UninterruptibleThread]], however for batch mode this is not the case. + */ + val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader") + + /** + * An AdminClient used in the driver to query the latest Kafka offsets. + * This only queries the offsets because AdminClient has no functionality to commit offsets like + * KafkaConsumer. + */ + @volatile protected var _admin: Admin = null + + protected def admin: Admin = synchronized { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + if (_admin == null) { + _admin = consumerStrategy.createAdmin(driverKafkaParams) + } + _admin + } + + lazy val isolationLevel: IsolationLevel = { + Option(driverKafkaParams.get(ConsumerConfig.ISOLATION_LEVEL_CONFIG)) match { + case Some(s: String) => IsolationLevel.valueOf(s.toUpperCase(Locale.ROOT)) + case None => IsolationLevel.valueOf( + ConsumerConfig.DEFAULT_ISOLATION_LEVEL.toUpperCase(Locale.ROOT)) + case _ => throw new IllegalArgumentException(s"${ConsumerConfig.ISOLATION_LEVEL_CONFIG} " + + "must be either not defined or with type String") + } + } + + private lazy val listOffsetsOptions = new ListOffsetsOptions(isolationLevel) + + private def listOffsets(admin: Admin, listOffsetsParams: ju.Map[TopicPartition, OffsetSpec]) = { + admin.listOffsets(listOffsetsParams, listOffsetsOptions).all().get().asScala + .map(result => result._1 -> result._2.offset()).toMap + } + + /** + * Number of partitions to read from Kafka. If this value is greater than the number of Kafka + * topicPartitions, we will split up the read tasks of the skewed partitions to multiple Spark + * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or + * more depending on rounding errors or Kafka partitions that didn't receive any new data. + */ + private val minPartitions = + readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt) + + private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions) + + /** + * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks. + */ + private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = { + minPartitions.map(_ > numTopicPartitions).getOrElse(false) + } + + override def toString(): String = consumerStrategy.toString + + /** + * Closes the connection to Kafka, and cleans up state. + */ + override def close(): Unit = { + if (_admin != null) uninterruptibleThreadRunner.runUninterruptibly { stopAdmin() } + uninterruptibleThreadRunner.shutdown() + } + + /** + * Fetch the partition offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. + */ + override def fetchPartitionOffsets( + offsetRangeLimit: KafkaOffsetRangeLimit, + isStartingOffsets: Boolean): Map[TopicPartition, Long] = { + def validateTopicPartitions(partitions: Set[TopicPartition], + partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = { + assert(partitions == partitionOffsets.keySet, + "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + + "Use -1 for latest, -2 for earliest.\n" + + s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}") + logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets") + partitionOffsets + } + val partitions = uninterruptibleThreadRunner.runUninterruptibly { + consumerStrategy.assignedTopicPartitions(admin) + } + // Obtain TopicPartition offsets with late binding support + offsetRangeLimit match { + case EarliestOffsetRangeLimit => partitions.map { + case tp => tp -> KafkaOffsetRangeLimit.EARLIEST + }.toMap + case LatestOffsetRangeLimit => partitions.map { + case tp => tp -> KafkaOffsetRangeLimit.LATEST + }.toMap + case SpecificOffsetRangeLimit(partitionOffsets) => + validateTopicPartitions(partitions, partitionOffsets) + case SpecificTimestampRangeLimit(partitionTimestamps) => + fetchSpecificTimestampBasedOffsets(partitionTimestamps, + failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets + } + } + + /** + * Resolves the specific offsets based on Kafka seek positions. + * This method resolves offset value -1 to the latest and -2 to the + * earliest Kafka seek position. + * + * @param partitionOffsets the specific offsets to resolve + * @param reportDataLoss callback to either report or log data loss depending on setting + */ + override def fetchSpecificOffsets( + partitionOffsets: Map[TopicPartition, Long], + reportDataLoss: String => Unit): KafkaSourceOffset = { + val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => + assert(partitions.asScala == partitionOffsets.keySet, + "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + + "Use -1 for latest, -2 for earliest, if you don't care.\n" + + s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}") + logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets") + } + + val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => + partitionOffsets + } + + fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets) + } + + override def fetchSpecificTimestampBasedOffsets( + partitionTimestamps: Map[TopicPartition, Long], + failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = { + val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => + assert(partitions.asScala == partitionTimestamps.keySet, + "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " + + s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}") + logDebug(s"Assigned partitions: $partitions. Seeking to $partitionTimestamps") + } + + val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => { + val listOffsetsParams = partitionTimestamps.map { case (tp, timestamp) => + tp -> OffsetSpec.forTimestamp(timestamp) + }.asJava + admin.listOffsets(listOffsetsParams, listOffsetsOptions).all().get().asScala.map { + case (tp, offsetSpec) => + if (failsOnNoMatchingOffset) { + assert(offsetSpec.offset() != OffsetFetchResponse.INVALID_OFFSET, "No offset " + + s"matched from request of topic-partition $tp and timestamp " + + s"${partitionTimestamps(tp)}.") + } + + if (offsetSpec.offset() == OffsetFetchResponse.INVALID_OFFSET) { + tp -> KafkaOffsetRangeLimit.LATEST + } else { + tp -> offsetSpec.offset() + } + }.toMap + } + } + + fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets) + } + + private def fetchSpecificOffsets0( + fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit, + fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] + ): KafkaSourceOffset = { + val fetched = partitionsAssignedToConsumer { + partitions => { + fnAssertParametersWithPartitions(partitions) + + val partitionOffsets = fnRetrievePartitionOffsets(partitions) + + val listOffsetsParams = partitionOffsets.filter { case (_, off) => + off == KafkaOffsetRangeLimit.LATEST || off == KafkaOffsetRangeLimit.EARLIEST + }.map { case (tp, off) => + off match { + case KafkaOffsetRangeLimit.LATEST => + tp -> OffsetSpec.latest() + case KafkaOffsetRangeLimit.EARLIEST => + tp -> OffsetSpec.earliest() + } + } + val resolvedPartitionOffsets = listOffsets(admin, listOffsetsParams.asJava) + + partitionOffsets.map { case (tp, off) => + off match { + case KafkaOffsetRangeLimit.LATEST => + tp -> resolvedPartitionOffsets(tp) + case KafkaOffsetRangeLimit.EARLIEST => + tp -> resolvedPartitionOffsets(tp) + case _ => + tp -> off + } + } + } + } + + KafkaSourceOffset(fetched) + } + + /** + * Fetch the earliest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + */ + override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer( + partitions => { + val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava + val partitionOffsets = listOffsets(admin, listOffsetsParams) + logDebug(s"Got earliest offsets for partitions: $partitionOffsets") + partitionOffsets + }) + + /** + * Fetch the latest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + * + * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called + * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after + * `poll` to wait until the potential offset request triggered by `poll(0)` is done. + * + * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the + * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less + * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When + * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot + * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying. + */ + override def fetchLatestOffsets( + knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = + partitionsAssignedToConsumer { partitions => { + val listOffsetsParams = partitions.asScala.map(_ -> OffsetSpec.latest()).toMap.asJava + if (knownOffsets.isEmpty) { + val partitionOffsets = listOffsets(admin, listOffsetsParams) + logDebug(s"Got latest offsets for partitions: $partitionOffsets") + partitionOffsets + } else { + var partitionOffsets: PartitionOffsetMap = Map.empty + + /** + * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect + * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`). + */ + def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = { + var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]() + partitionOffsets.foreach { case (tp, offset) => + knownOffsets.foreach(_.get(tp).foreach { knownOffset => + if (knownOffset > offset) { + val incorrectOffset = (tp, knownOffset, offset) + incorrectOffsets += incorrectOffset + } + }) + } + // toSeq seems redundant but it's needed for Scala 2.13 + incorrectOffsets.toSeq + } + + // Retry to fetch latest offsets when detecting incorrect offsets. We don't use + // `withRetriesWithoutInterrupt` to retry because: + // + // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh + // consumer has a much bigger chance to hit KAFKA-7703. + // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703. + var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil + var attempt = 0 + do { + partitionOffsets = listOffsets(admin, listOffsetsParams) + attempt += 1 + + incorrectOffsets = findIncorrectOffsets() + if (incorrectOffsets.nonEmpty) { + logWarning("Found incorrect offsets in some partitions " + + s"(partition, previous offset, fetched offset): $incorrectOffsets") + if (attempt < maxOffsetFetchAttempts) { + logWarning("Retrying to fetch latest offsets because of incorrect offsets") + Thread.sleep(offsetFetchAttemptIntervalMs) + } + } + } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts) + + logDebug(s"Got latest offsets for partitions: $partitionOffsets") + partitionOffsets + } + } + } + + /** + * Fetch the earliest offsets for specific topic partitions. + * The return result may not contain some partitions if they are deleted. + */ + override def fetchEarliestOffsets( + newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = { + if (newPartitions.isEmpty) { + Map.empty[TopicPartition, Long] + } else { + partitionsAssignedToConsumer(partitions => { + // Get the earliest offset of each partition + val listOffsetsParams = newPartitions.filter { newPartition => + // When deleting topics happen at the same time, some partitions may not be in + // `partitions`. So we need to ignore them + partitions.contains(newPartition) + }.map(partition => partition -> OffsetSpec.earliest()).toMap.asJava + val partitionOffsets = listOffsets(admin, listOffsetsParams) + logDebug(s"Got earliest offsets for new partitions: $partitionOffsets") + partitionOffsets + }) + } + } + + /** + * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may + * split partitions to respect it. Since offsets can be early and late binding which are evaluated + * on the executors, in order to divvy up the partitions we need to perform some substitutions. We + * don't want to send exact offsets to the executors, because data may age out before we can + * consume the data. This method makes some approximate splitting, and replaces the special offset + * values in the final output. + */ + override def getOffsetRangesFromUnresolvedOffsets( + startingOffsets: KafkaOffsetRangeLimit, + endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = { + val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true) + val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false) + + // Obtain topicPartitions in both from and until partition offset, ignoring + // topic partitions that were added and/or deleted between the two above calls. + if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) { + implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic()) + val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",") + val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",") + throw new IllegalStateException("different topic partitions " + + s"for starting offsets topics[${fromTopics}] and " + + s"ending offsets topics[${untilTopics}]") + } + + // Calculate offset ranges + val offsetRangesBase = untilPartitionOffsets.keySet.map { tp => + val fromOffset = fromPartitionOffsets.get(tp).getOrElse { + // This should not happen since topicPartitions contains all partitions not in + // fromPartitionOffsets + throw new IllegalStateException(s"$tp doesn't have a from offset") + } + val untilOffset = untilPartitionOffsets(tp) + KafkaOffsetRange(tp, fromOffset, untilOffset, None) + }.toSeq + + if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) { + val fromOffsetsMap = + offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap + val untilOffsetsMap = + offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap + + // No need to report data loss here + val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets + val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets + val ranges = offsetRangesBase.map(_.topicPartition).map { tp => + KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None) + } + val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition) + divvied.flatMap { case (tp, splitOffsetRanges) => + if (splitOffsetRanges.length == 1) { + Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None)) + } else { + // the list can't be empty + val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp)) + val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp)) + Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end + } + }.toArray.toSeq + } else { + offsetRangesBase + } + } + + private def getSortedExecutorList: Array[String] = { + def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = { + if (a.host == b.host) { + a.executorId > b.executorId + } else { + a.host > b.host + } + } + + val bm = SparkEnv.get.blockManager + bm.master.getPeers(bm.blockManagerId).toArray + .map(x => ExecutorCacheTaskLocation(x.host, x.executorId)) + .sortWith(compare) + .map(_.toString) + } + + /** + * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method + * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will + * be called. + */ + override def getOffsetRangesFromResolvedOffsets( + fromPartitionOffsets: PartitionOffsetMap, + untilPartitionOffsets: PartitionOffsetMap, + reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = { + // Find the new partitions, and get their earliest offsets + val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet) + val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq) + if (newPartitionInitialOffsets.keySet != newPartitions) { + // We cannot get from offsets for some partitions. It means they got deleted. + val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet) + reportDataLoss( + s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed") + } + logInfo(s"Partitions added: $newPartitionInitialOffsets") + newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) => + reportDataLoss( + s"Added partition $p starts from $o instead of 0. Some data may have been missed") + } + + val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet) + if (deletedPartitions.nonEmpty) { + val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { + s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}" + } else { + s"$deletedPartitions are gone. Some data may have been missed." + } + reportDataLoss(message) + } + + // Use the until partitions to calculate offset ranges to ignore partitions that have + // been deleted + val topicPartitions = untilPartitionOffsets.keySet.filter { tp => + // Ignore partitions that we don't know the from offsets. + newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp) + }.toSeq + logDebug("TopicPartitions: " + topicPartitions.mkString(", ")) + + val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets + val untilOffsets = untilPartitionOffsets + val ranges = topicPartitions.map { tp => + val fromOffset = fromOffsets(tp) + val untilOffset = untilOffsets(tp) + if (untilOffset < fromOffset) { + reportDataLoss(s"Partition $tp's offset was changed from " + + s"$fromOffset to $untilOffset, some data may have been missed") + } + KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None) + } + rangeCalculator.getRanges(ranges, getSortedExecutorList) + } + + private def partitionsAssignedToConsumer( + body: ju.Set[TopicPartition] => Map[TopicPartition, Long]) + : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly { + + withRetriesWithoutInterrupt { + val partitions = consumerStrategy.assignedTopicPartitions(admin).asJava + logDebug(s"Partitions assigned: $partitions.") + body(partitions) + } + } + + /** + * Helper function that does multiple retries on a body of code that returns offsets. + * Retries are needed to handle transient failures. For e.g. race conditions between getting + * assignment and getting position while topics/partitions are deleted can cause NPEs. + * + * This method also makes sure `body` won't be interrupted to workaround similar issues like in + * `KafkaConsumer.poll`. (KAFKA-1894) + */ + private def withRetriesWithoutInterrupt( + body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + + synchronized { + var result: Option[Map[TopicPartition, Long]] = None + var attempt = 1 + var lastException: Throwable = null + while (result.isEmpty && attempt <= maxOffsetFetchAttempts + && !Thread.currentThread().isInterrupted) { + Thread.currentThread match { + case ut: UninterruptibleThread => + ut.runUninterruptibly { + try { + result = Some(body) + } catch { + case NonFatal(e) => + lastException = e + logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e) + attempt += 1 + Thread.sleep(offsetFetchAttemptIntervalMs) + resetAdmin() + } + } + case _ => + throw new IllegalStateException( + "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread") + } + } + if (Thread.interrupted()) { + throw new InterruptedException() + } + if (result.isEmpty) { + assert(attempt > maxOffsetFetchAttempts) + assert(lastException != null) + throw lastException + } + result.get + } + } + + private def stopAdmin(): Unit = synchronized { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + if (_admin != null) _admin.close() + } + + private def resetAdmin(): Unit = synchronized { + stopAdmin() + _admin = null // will automatically get reinitialized again + } +} diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala new file mode 100644 index 0000000000000..eca41c510f1f2 --- /dev/null +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala @@ -0,0 +1,614 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.kafka010 + +import java.{util => ju} + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp} +import org.apache.kafka.common.TopicPartition + +import org.apache.spark.SparkEnv +import org.apache.spark.internal.Logging +import org.apache.spark.scheduler.ExecutorCacheTaskLocation +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner} + +/** + * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to + * read data offsets from Kafka. + * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read + * by this source. These strategies directly correspond to the different consumption options + * in. This class is designed to return a configured + * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the + * [[KafkaSource]] to query for the offsets. See the docs on + * [[org.apache.spark.sql.kafka010.ConsumerStrategy]] + * for more details. + * + * Note: This class is not ThreadSafe + */ +private[kafka010] class KafkaOffsetReaderConsumer( + consumerStrategy: ConsumerStrategy, + override val driverKafkaParams: ju.Map[String, Object], + readerOptions: CaseInsensitiveMap[String], + driverGroupIdPrefix: String) extends KafkaOffsetReader with Logging { + + /** + * [[UninterruptibleThreadRunner]] ensures that all + * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an + * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an + * [[UninterruptibleThread]], however for batch mode this is not the case. + */ + val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader") + + /** + * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer is + * created -- see SPARK-19564. + */ + private var groupId: String = null + private var nextId = 0 + + /** + * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the + * offsets and never commits them. + */ + @volatile protected var _consumer: Consumer[Array[Byte], Array[Byte]] = null + + protected def consumer: Consumer[Array[Byte], Array[Byte]] = synchronized { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + if (_consumer == null) { + val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams) + if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) { + newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId()) + } + _consumer = consumerStrategy.createConsumer(newKafkaParams) + } + _consumer + } + + private[kafka010] val maxOffsetFetchAttempts = + readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt + + /** + * Number of partitions to read from Kafka. If this value is greater than the number of Kafka + * topicPartitions, we will split up the read tasks of the skewed partitions to multiple Spark + * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or + * more depending on rounding errors or Kafka partitions that didn't receive any new data. + */ + private val minPartitions = + readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt) + + private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions) + + private[kafka010] val offsetFetchAttemptIntervalMs = + readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong + + /** + * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks. + */ + private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = { + minPartitions.map(_ > numTopicPartitions).getOrElse(false) + } + + private def nextGroupId(): String = { + groupId = driverGroupIdPrefix + "-" + nextId + nextId += 1 + groupId + } + + override def toString(): String = consumerStrategy.toString + + /** + * Closes the connection to Kafka, and cleans up state. + */ + override def close(): Unit = { + if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() } + uninterruptibleThreadRunner.shutdown() + } + + /** + * @return The Set of TopicPartitions for a given topic + */ + private def fetchTopicPartitions(): Set[TopicPartition] = + uninterruptibleThreadRunner.runUninterruptibly { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + // Poll to get the latest assigned partitions + consumer.poll(0) + val partitions = consumer.assignment() + consumer.pause(partitions) + partitions.asScala.toSet + } + + /** + * Fetch the partition offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. + */ + override def fetchPartitionOffsets( + offsetRangeLimit: KafkaOffsetRangeLimit, + isStartingOffsets: Boolean): Map[TopicPartition, Long] = { + def validateTopicPartitions(partitions: Set[TopicPartition], + partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = { + assert(partitions == partitionOffsets.keySet, + "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + + "Use -1 for latest, -2 for earliest.\n" + + s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}") + logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets") + partitionOffsets + } + val partitions = fetchTopicPartitions() + // Obtain TopicPartition offsets with late binding support + offsetRangeLimit match { + case EarliestOffsetRangeLimit => partitions.map { + case tp => tp -> KafkaOffsetRangeLimit.EARLIEST + }.toMap + case LatestOffsetRangeLimit => partitions.map { + case tp => tp -> KafkaOffsetRangeLimit.LATEST + }.toMap + case SpecificOffsetRangeLimit(partitionOffsets) => + validateTopicPartitions(partitions, partitionOffsets) + case SpecificTimestampRangeLimit(partitionTimestamps) => + fetchSpecificTimestampBasedOffsets(partitionTimestamps, + failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets + } + } + + /** + * Resolves the specific offsets based on Kafka seek positions. + * This method resolves offset value -1 to the latest and -2 to the + * earliest Kafka seek position. + * + * @param partitionOffsets the specific offsets to resolve + * @param reportDataLoss callback to either report or log data loss depending on setting + */ + override def fetchSpecificOffsets( + partitionOffsets: Map[TopicPartition, Long], + reportDataLoss: String => Unit): KafkaSourceOffset = { + val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => + assert(partitions.asScala == partitionOffsets.keySet, + "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" + + "Use -1 for latest, -2 for earliest, if you don't care.\n" + + s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}") + logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets") + } + + val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => + partitionOffsets + } + + val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched => + partitionOffsets.foreach { + case (tp, off) if off != KafkaOffsetRangeLimit.LATEST && + off != KafkaOffsetRangeLimit.EARLIEST => + if (fetched(tp) != off) { + reportDataLoss( + s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}") + } + case _ => + // no real way to check that beginning or end is reasonable + } + } + + fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets, + fnAssertFetchedOffsets) + } + + override def fetchSpecificTimestampBasedOffsets( + partitionTimestamps: Map[TopicPartition, Long], + failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = { + val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions => + assert(partitions.asScala == partitionTimestamps.keySet, + "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " + + s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}") + logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps") + } + + val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => { + val converted = partitionTimestamps.map { case (tp, timestamp) => + tp -> java.lang.Long.valueOf(timestamp) + }.asJava + + val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] = + consumer.offsetsForTimes(converted) + + offsetForTime.asScala.map { case (tp, offsetAndTimestamp) => + if (failsOnNoMatchingOffset) { + assert(offsetAndTimestamp != null, "No offset matched from request of " + + s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.") + } + + if (offsetAndTimestamp == null) { + tp -> KafkaOffsetRangeLimit.LATEST + } else { + tp -> offsetAndTimestamp.offset() + } + }.toMap + } + } + + val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => } + + fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets, + fnAssertFetchedOffsets) + } + + private def fetchSpecificOffsets0( + fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit, + fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long], + fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = { + val fetched = partitionsAssignedToConsumer { + partitions => { + fnAssertParametersWithPartitions(partitions) + + val partitionOffsets = fnRetrievePartitionOffsets(partitions) + + partitionOffsets.foreach { + case (tp, KafkaOffsetRangeLimit.LATEST) => + consumer.seekToEnd(ju.Arrays.asList(tp)) + case (tp, KafkaOffsetRangeLimit.EARLIEST) => + consumer.seekToBeginning(ju.Arrays.asList(tp)) + case (tp, off) => consumer.seek(tp, off) + } + + partitionOffsets.map { + case (tp, _) => tp -> consumer.position(tp) + } + } + } + + fnAssertFetchedOffsets(fetched) + + KafkaSourceOffset(fetched) + } + + /** + * Fetch the earliest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + */ + override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer( + partitions => { + logDebug("Seeking to the beginning") + + consumer.seekToBeginning(partitions) + val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap + logDebug(s"Got earliest offsets for partition : $partitionOffsets") + partitionOffsets + }, fetchingEarliestOffset = true) + + /** + * Fetch the latest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + * + * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called + * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after + * `poll` to wait until the potential offset request triggered by `poll(0)` is done. + * + * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the + * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less + * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When + * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot + * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying. + */ + override def fetchLatestOffsets( + knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = + partitionsAssignedToConsumer { partitions => { + logDebug("Seeking to the end.") + + if (knownOffsets.isEmpty) { + consumer.seekToEnd(partitions) + partitions.asScala.map(p => p -> consumer.position(p)).toMap + } else { + var partitionOffsets: PartitionOffsetMap = Map.empty + + /** + * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect + * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`). + */ + def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = { + var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]() + partitionOffsets.foreach { case (tp, offset) => + knownOffsets.foreach(_.get(tp).foreach { knownOffset => + if (knownOffset > offset) { + val incorrectOffset = (tp, knownOffset, offset) + incorrectOffsets += incorrectOffset + } + }) + } + incorrectOffsets.toSeq + } + + // Retry to fetch latest offsets when detecting incorrect offsets. We don't use + // `withRetriesWithoutInterrupt` to retry because: + // + // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh + // consumer has a much bigger chance to hit KAFKA-7703. + // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703. + var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil + var attempt = 0 + do { + consumer.seekToEnd(partitions) + partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap + attempt += 1 + + incorrectOffsets = findIncorrectOffsets() + if (incorrectOffsets.nonEmpty) { + logWarning("Found incorrect offsets in some partitions " + + s"(partition, previous offset, fetched offset): $incorrectOffsets") + if (attempt < maxOffsetFetchAttempts) { + logWarning("Retrying to fetch latest offsets because of incorrect offsets") + Thread.sleep(offsetFetchAttemptIntervalMs) + } + } + } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts) + + logDebug(s"Got latest offsets for partition : $partitionOffsets") + partitionOffsets + } + } + } + + /** + * Fetch the earliest offsets for specific topic partitions. + * The return result may not contain some partitions if they are deleted. + */ + override def fetchEarliestOffsets( + newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = { + if (newPartitions.isEmpty) { + Map.empty[TopicPartition, Long] + } else { + partitionsAssignedToConsumer(partitions => { + // Get the earliest offset of each partition + consumer.seekToBeginning(partitions) + val partitionOffsets = newPartitions.filter { p => + // When deleting topics happen at the same time, some partitions may not be in + // `partitions`. So we need to ignore them + partitions.contains(p) + }.map(p => p -> consumer.position(p)).toMap + logDebug(s"Got earliest offsets for new partitions: $partitionOffsets") + partitionOffsets + }, fetchingEarliestOffset = true) + } + } + + /** + * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may + * split partitions to respect it. Since offsets can be early and late binding which are evaluated + * on the executors, in order to divvy up the partitions we need to perform some substitutions. We + * don't want to send exact offsets to the executors, because data may age out before we can + * consume the data. This method makes some approximate splitting, and replaces the special offset + * values in the final output. + */ + override def getOffsetRangesFromUnresolvedOffsets( + startingOffsets: KafkaOffsetRangeLimit, + endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = { + val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true) + val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false) + + // Obtain topicPartitions in both from and until partition offset, ignoring + // topic partitions that were added and/or deleted between the two above calls. + if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) { + implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic()) + val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",") + val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",") + throw new IllegalStateException("different topic partitions " + + s"for starting offsets topics[${fromTopics}] and " + + s"ending offsets topics[${untilTopics}]") + } + + // Calculate offset ranges + val offsetRangesBase = untilPartitionOffsets.keySet.map { tp => + val fromOffset = fromPartitionOffsets.get(tp).getOrElse { + // This should not happen since topicPartitions contains all partitions not in + // fromPartitionOffsets + throw new IllegalStateException(s"$tp doesn't have a from offset") + } + val untilOffset = untilPartitionOffsets(tp) + KafkaOffsetRange(tp, fromOffset, untilOffset, None) + }.toSeq + + if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) { + val fromOffsetsMap = + offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap + val untilOffsetsMap = + offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap + + // No need to report data loss here + val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets + val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets + val ranges = offsetRangesBase.map(_.topicPartition).map { tp => + KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None) + } + val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition) + divvied.flatMap { case (tp, splitOffsetRanges) => + if (splitOffsetRanges.length == 1) { + Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None)) + } else { + // the list can't be empty + val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp)) + val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp)) + Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end + } + }.toArray.toSeq + } else { + offsetRangesBase + } + } + + private def getSortedExecutorList(): Array[String] = { + def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = { + if (a.host == b.host) { + a.executorId > b.executorId + } else { + a.host > b.host + } + } + + val bm = SparkEnv.get.blockManager + bm.master.getPeers(bm.blockManagerId).toArray + .map(x => ExecutorCacheTaskLocation(x.host, x.executorId)) + .sortWith(compare) + .map(_.toString) + } + + /** + * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method + * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will + * be called. + */ + override def getOffsetRangesFromResolvedOffsets( + fromPartitionOffsets: PartitionOffsetMap, + untilPartitionOffsets: PartitionOffsetMap, + reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = { + // Find the new partitions, and get their earliest offsets + val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet) + val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq) + if (newPartitionInitialOffsets.keySet != newPartitions) { + // We cannot get from offsets for some partitions. It means they got deleted. + val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet) + reportDataLoss( + s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed") + } + logInfo(s"Partitions added: $newPartitionInitialOffsets") + newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) => + reportDataLoss( + s"Added partition $p starts from $o instead of 0. Some data may have been missed") + } + + val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet) + if (deletedPartitions.nonEmpty) { + val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { + s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}" + } else { + s"$deletedPartitions are gone. Some data may have been missed." + } + reportDataLoss(message) + } + + // Use the until partitions to calculate offset ranges to ignore partitions that have + // been deleted + val topicPartitions = untilPartitionOffsets.keySet.filter { tp => + // Ignore partitions that we don't know the from offsets. + newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp) + }.toSeq + logDebug("TopicPartitions: " + topicPartitions.mkString(", ")) + + val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets + val untilOffsets = untilPartitionOffsets + val ranges = topicPartitions.map { tp => + val fromOffset = fromOffsets(tp) + val untilOffset = untilOffsets(tp) + if (untilOffset < fromOffset) { + reportDataLoss(s"Partition $tp's offset was changed from " + + s"$fromOffset to $untilOffset, some data may have been missed") + } + KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None) + } + rangeCalculator.getRanges(ranges, getSortedExecutorList) + } + + private def partitionsAssignedToConsumer( + body: ju.Set[TopicPartition] => Map[TopicPartition, Long], + fetchingEarliestOffset: Boolean = false) + : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly { + + withRetriesWithoutInterrupt { + // Poll to get the latest assigned partitions + consumer.poll(0) + val partitions = consumer.assignment() + + if (!fetchingEarliestOffset) { + // Call `position` to wait until the potential offset request triggered by `poll(0)` is + // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by + // `poll(0)` may reset offsets that should have been set by another request. + partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {}) + } + + consumer.pause(partitions) + logDebug(s"Partitions assigned to consumer: $partitions.") + body(partitions) + } + } + + /** + * Helper function that does multiple retries on a body of code that returns offsets. + * Retries are needed to handle transient failures. For e.g. race conditions between getting + * assignment and getting position while topics/partitions are deleted can cause NPEs. + * + * This method also makes sure `body` won't be interrupted to workaround a potential issue in + * `KafkaConsumer.poll`. (KAFKA-1894) + */ + private def withRetriesWithoutInterrupt( + body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = { + // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894) + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + + synchronized { + var result: Option[Map[TopicPartition, Long]] = None + var attempt = 1 + var lastException: Throwable = null + while (result.isEmpty && attempt <= maxOffsetFetchAttempts + && !Thread.currentThread().isInterrupted) { + Thread.currentThread match { + case ut: UninterruptibleThread => + // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query + // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it. + // + // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may + // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the + // issue. + ut.runUninterruptibly { + try { + result = Some(body) + } catch { + case NonFatal(e) => + lastException = e + logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e) + attempt += 1 + Thread.sleep(offsetFetchAttemptIntervalMs) + resetConsumer() + } + } + case _ => + throw new IllegalStateException( + "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread") + } + } + if (Thread.interrupted()) { + throw new InterruptedException() + } + if (result.isEmpty) { + assert(attempt > maxOffsetFetchAttempts) + assert(lastException != null) + throw lastException + } + result.get + } + } + + private def stopConsumer(): Unit = synchronized { + assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) + if (_consumer != null) _consumer.close() + } + + private def resetConsumer(): Unit = synchronized { + stopConsumer() + _consumer = null // will automatically get reinitialized again + } +} diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala index 69a66e2209773..ed3407c822b96 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala @@ -56,7 +56,7 @@ private[kafka010] class KafkaRelation( // id. Hence, we should generate a unique id for each query. val uniqueGroupId = KafkaSourceProvider.batchUniqueGroupId(sourceOptions) - val kafkaOffsetReader = new KafkaOffsetReader( + val kafkaOffsetReader = KafkaOffsetReader.build( strategy, KafkaSourceProvider.kafkaParamsForDriver(specifiedKafkaParams), sourceOptions, diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala index 3ace0874674b6..7299b182ae1cc 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala @@ -93,7 +93,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) - val kafkaOffsetReader = new KafkaOffsetReader( + val kafkaOffsetReader = KafkaOffsetReader.build( strategy(caseInsensitiveParameters), kafkaParamsForDriver(specifiedKafkaParams), caseInsensitiveParameters, @@ -460,7 +460,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) - val kafkaOffsetReader = new KafkaOffsetReader( + val kafkaOffsetReader = KafkaOffsetReader.build( strategy(caseInsensitiveOptions), kafkaParamsForDriver(specifiedKafkaParams), caseInsensitiveOptions, @@ -489,7 +489,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) - val kafkaOffsetReader = new KafkaOffsetReader( + val kafkaOffsetReader = KafkaOffsetReader.build( strategy(caseInsensitiveOptions), kafkaParamsForDriver(specifiedKafkaParams), caseInsensitiveOptions, diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala new file mode 100644 index 0000000000000..939cf0bb36a8c --- /dev/null +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.kafka010 + +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.admin.Admin +import org.apache.kafka.common.TopicPartition +import org.mockito.Mockito.mock + +import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite} + +class ConsumerStrategySuite extends SparkFunSuite { + private var testUtils: KafkaTestUtils = _ + + private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*) + + protected def newTopic(prefix: String = "topic") = s"$prefix-${UUID.randomUUID().toString}" + + private def setSparkEnv(settings: Iterable[(String, String)]): Unit = { + val conf = new SparkConf().setAll(settings) + val env = mock(classOf[SparkEnv]) + doReturn(conf).when(env).conf + SparkEnv.set(env) + } + + private def adminProps = { + Map[String, Object]( + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress + ).asJava + } + + private def admin(strategy: ConsumerStrategy): Admin = { + strategy.createAdmin(adminProps) + } + + override def beforeAll(): Unit = { + super.beforeAll() + testUtils = new KafkaTestUtils(Map.empty) + testUtils.setup() + setSparkEnv(Map.empty) + } + + override def afterAll(): Unit = { + if (testUtils != null) { + testUtils.teardown() + testUtils = null + } + super.afterAll() + } + + test("createAdmin must create admin properly") { + val strategy = AssignStrategy(Array.empty) + assert(strategy.createAdmin(adminProps) != null) + } + + test("AssignStrategy.assignedTopicPartitions must give back all assigned") { + val assignedTopic = newTopic() + testUtils.createTopic(assignedTopic, partitions = 3) + val otherExistingTopic = newTopic() + testUtils.createTopic(otherExistingTopic, partitions = 2) + + val partitions = Array( + new TopicPartition(assignedTopic, 0), + new TopicPartition(assignedTopic, 2) + ) + val strategy = AssignStrategy(partitions) + assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions.toSet) + + testUtils.deleteTopic(assignedTopic) + testUtils.deleteTopic(otherExistingTopic) + } + + test("AssignStrategy.assignedTopicPartitions must skip invalid partitions") { + val assignedTopic = newTopic() + testUtils.createTopic(assignedTopic, partitions = 1) + + val partitions = Array(new TopicPartition(assignedTopic, 1)) + val strategy = AssignStrategy(partitions) + assert(strategy.assignedTopicPartitions(admin(strategy)) === Set.empty) + + testUtils.deleteTopic(assignedTopic) + } + + test("SubscribeStrategy.assignedTopicPartitions must give back all assigned") { + val subscribedTopic1 = newTopic() + testUtils.createTopic(subscribedTopic1, partitions = 2) + val subscribedTopic2 = newTopic() + testUtils.createTopic(subscribedTopic2, partitions = 2) + val otherExistingTopic = newTopic() + testUtils.createTopic(otherExistingTopic, partitions = 2) + + val partitions = Set( + new TopicPartition(subscribedTopic1, 0), + new TopicPartition(subscribedTopic1, 1), + new TopicPartition(subscribedTopic2, 0), + new TopicPartition(subscribedTopic2, 1) + ) + val strategy = SubscribeStrategy(Seq(subscribedTopic1, subscribedTopic2)) + assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions) + + testUtils.deleteTopic(subscribedTopic1) + testUtils.deleteTopic(subscribedTopic2) + testUtils.deleteTopic(otherExistingTopic) + } + + test("SubscribePatternStrategy.assignedTopicPartitions must give back all assigned") { + val subscribePattern = "subscribePattern" + val subscribedTopic1 = newTopic(subscribePattern) + testUtils.createTopic(subscribedTopic1, partitions = 2) + val subscribedTopic2 = newTopic(subscribePattern) + testUtils.createTopic(subscribedTopic2, partitions = 2) + val otherExistingTopic = newTopic("other") + testUtils.createTopic(otherExistingTopic, partitions = 2) + + val partitions = Set( + new TopicPartition(subscribedTopic1, 0), + new TopicPartition(subscribedTopic1, 1), + new TopicPartition(subscribedTopic2, 0), + new TopicPartition(subscribedTopic2, 1) + ) + val strategy = SubscribePatternStrategy(s"$subscribePattern.*") + assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions) + + testUtils.deleteTopic(subscribedTopic1) + testUtils.deleteTopic(subscribedTopic2) + testUtils.deleteTopic(otherExistingTopic) + } +} diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 08f673455d729..f2be8475151e3 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -608,7 +608,9 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { // in executors. val query = kafka.map(kv => kv._2.toInt).writeStream.foreach(new ForeachWriter[Int] { override def open(partitionId: Long, version: Long): Boolean = { + // Re-create topic since Kafka auto topic creation is not supported by Spark KafkaSourceSuite.globalTestUtils.deleteTopic(topic) + KafkaSourceSuite.globalTestUtils.createTopic(topic) true } @@ -690,19 +692,25 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { } test("allow group.id prefix") { - testGroupId("groupIdPrefix", (expected, actual) => { - assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected), - "Valid consumer groups don't contain the expected group id - " + - s"Valid consumer groups: $actual / expected group id: $expected") - }) + // Group ID prefix is only supported by consumer based offset reader + if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) { + testGroupId("groupIdPrefix", (expected, actual) => { + assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected), + "Valid consumer groups don't contain the expected group id - " + + s"Valid consumer groups: $actual / expected group id: $expected") + }) + } } test("allow group.id override") { - testGroupId("kafka.group.id", (expected, actual) => { - assert(actual.exists(_ === expected), "Valid consumer groups don't " + - s"contain the expected group id - Valid consumer groups: $actual / " + - s"expected group id: $expected") - }) + // Group ID override is only supported by consumer based offset reader + if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) { + testGroupId("kafka.group.id", (expected, actual) => { + assert(actual.exists(_ === expected), "Valid consumer groups don't " + + s"contain the expected group id - Valid consumer groups: $actual / " + + s"expected group id: $expected") + }) + } } private def testGroupId(groupIdKey: String, @@ -1121,6 +1129,20 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { } +class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite { + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false") + } +} + +class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite { + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false") + } +} + class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase { override def beforeAll(): Unit = { super.beforeAll() diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala index ad22a56d9157f..d1e49b0e14314 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala @@ -17,13 +17,17 @@ package org.apache.spark.sql.kafka010 +import java.util.Locale import java.util.UUID import java.util.concurrent.atomic.AtomicInteger -import org.apache.kafka.common.TopicPartition +import org.apache.kafka.clients.CommonClientConfigs +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.common.{IsolationLevel, TopicPartition} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.kafka010.KafkaOffsetRangeLimit.{EARLIEST, LATEST} import org.apache.spark.sql.test.SharedSparkSession @@ -53,9 +57,9 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk } private def createKafkaReader(topic: String, minPartitions: Option[Int]): KafkaOffsetReader = { - new KafkaOffsetReader( + KafkaOffsetReader.build( SubscribeStrategy(Seq(topic)), - org.apache.spark.sql.kafka010.KafkaSourceProvider.kafkaParamsForDriver( + KafkaSourceProvider.kafkaParamsForDriver( Map( "bootstrap.servers" -> testUtils.brokerAddress @@ -66,7 +70,39 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk ) } - test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using specific offsets") { + test("isolationLevel must give back default isolation level when not set") { + testIsolationLevel(None, + IsolationLevel.valueOf(ConsumerConfig.DEFAULT_ISOLATION_LEVEL.toUpperCase(Locale.ROOT))) + } + + test("isolationLevel must give back READ_UNCOMMITTED when set") { + testIsolationLevel(Some("read_uncommitted"), IsolationLevel.READ_UNCOMMITTED) + } + + test("isolationLevel must give back READ_COMMITTED when set") { + testIsolationLevel(Some("read_committed"), IsolationLevel.READ_COMMITTED) + } + + test("isolationLevel must throw exception when invalid isolation level set") { + intercept[IllegalArgumentException] { + testIsolationLevel(Some("intentionally_invalid"), IsolationLevel.READ_COMMITTED) + } + } + + private def testIsolationLevel(kafkaParam: Option[String], isolationLevel: IsolationLevel) = { + var kafkaParams = Map(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress) + kafkaParam.foreach(p => kafkaParams ++= Map(ConsumerConfig.ISOLATION_LEVEL_CONFIG -> p)) + val reader = new KafkaOffsetReaderAdmin( + SubscribeStrategy(Seq()), + KafkaSourceProvider.kafkaParamsForDriver(kafkaParams), + CaseInsensitiveMap(Map.empty), + "" + ) + assert(reader.isolationLevel === isolationLevel) + } + + testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " + + "using specific offsets") { val topic = newTopic() testUtils.createTopic(topic, partitions = 1) testUtils.sendMessages(topic, (0 until 10).map(_.toString).toArray, Some(0)) @@ -74,14 +110,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk val reader = createKafkaReader(topic, minPartitions = Some(3)) val startingOffsets = SpecificOffsetRangeLimit(Map(tp -> 1)) val endingOffsets = SpecificOffsetRangeLimit(Map(tp -> 4)) - val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets) - assert(offsetRanges === Seq( + val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, + endingOffsets) + assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq( KafkaOffsetRange(tp, 1, 2, None), KafkaOffsetRange(tp, 2, 3, None), - KafkaOffsetRange(tp, 3, 4, None))) + KafkaOffsetRange(tp, 3, 4, None)).sortBy(_.topicPartition.toString)) } - test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using special offsets") { + testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " + + "using special offsets") { val topic = newTopic() testUtils.createTopic(topic, partitions = 1) testUtils.sendMessages(topic, (0 until 4).map(_.toString).toArray, Some(0)) @@ -89,14 +127,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk val reader = createKafkaReader(topic, minPartitions = Some(3)) val startingOffsets = EarliestOffsetRangeLimit val endingOffsets = LatestOffsetRangeLimit - val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets) - assert(offsetRanges === Seq( + val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, + endingOffsets) + assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq( KafkaOffsetRange(tp, EARLIEST, 1, None), KafkaOffsetRange(tp, 1, 2, None), - KafkaOffsetRange(tp, 2, LATEST, None))) + KafkaOffsetRange(tp, 2, LATEST, None)).sortBy(_.topicPartition.toString)) } - test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - multiple topic partitions") { + testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " + + "multiple topic partitions") { val topic = newTopic() testUtils.createTopic(topic, partitions = 2) testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0)) @@ -107,15 +147,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk val startingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> EARLIEST, tp2 -> EARLIEST)) val endingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> LATEST, tp2 -> 3)) - val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets) - assert(offsetRanges === Seq( + val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, + endingOffsets) + assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq( KafkaOffsetRange(tp2, EARLIEST, 3, None), KafkaOffsetRange(tp1, EARLIEST, 33, None), KafkaOffsetRange(tp1, 33, 66, None), - KafkaOffsetRange(tp1, 66, LATEST, None))) + KafkaOffsetRange(tp1, 66, LATEST, None)).sortBy(_.topicPartition.toString)) } - test("SPARK-30656: getOffsetRangesFromResolvedOffsets") { + testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromResolvedOffsets") { val topic = newTopic() testUtils.createTopic(topic, partitions = 2) testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0)) @@ -130,10 +171,28 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk fromPartitionOffsets, untilPartitionOffsets, _ => {}) - assert(offsetRanges === Seq( + assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq( KafkaOffsetRange(tp1, 0, 33, None), KafkaOffsetRange(tp1, 33, 66, None), KafkaOffsetRange(tp1, 66, 100, None), - KafkaOffsetRange(tp2, 0, 3, None))) + KafkaOffsetRange(tp2, 0, 3, None)).sortBy(_.topicPartition.toString)) + } + + private def testWithAllOffsetFetchingSQLConf(name: String)(func: => Any): Unit = { + Seq("true", "false").foreach { useDeprecatedOffsetFetching => + val testName = s"$name with useDeprecatedOffsetFetching $useDeprecatedOffsetFetching" + executeFuncWithSQLConf(testName, useDeprecatedOffsetFetching, func) + } + } + + private def executeFuncWithSQLConf( + name: String, + useDeprecatedOffsetFetching: String, + func: => Any): Unit = { + test(name) { + withSQLConf(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key -> useDeprecatedOffsetFetching) { + func + } + } } } diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala index 6f5dc0bb081ba..16fa24a68abe2 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala @@ -22,8 +22,6 @@ import java.util.Locale import java.util.concurrent.atomic.AtomicInteger import scala.annotation.tailrec -import scala.collection.JavaConverters._ -import scala.util.Random import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition @@ -465,41 +463,6 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession testBadOptions("subscribePattern" -> "")("pattern to subscribe is empty") } - test("allow group.id prefix") { - testGroupId("groupIdPrefix", (expected, actual) => { - assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected), - "Valid consumer groups don't contain the expected group id - " + - s"Valid consumer groups: $actual / expected group id: $expected") - }) - } - - test("allow group.id override") { - testGroupId("kafka.group.id", (expected, actual) => { - assert(actual.exists(_ === expected), "Valid consumer groups don't " + - s"contain the expected group id - Valid consumer groups: $actual / " + - s"expected group id: $expected") - }) - } - - private def testGroupId(groupIdKey: String, - validateGroupId: (String, Iterable[String]) => Unit): Unit = { - // Tests code path KafkaSourceProvider.createRelation(.) - val topic = newTopic() - testUtils.createTopic(topic, partitions = 3) - testUtils.sendMessages(topic, (1 to 10).map(_.toString).toArray, Some(0)) - testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1)) - testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2)) - - val customGroupId = "id-" + Random.nextInt() - val df = createDF(topic, withOptions = Map(groupIdKey -> customGroupId)) - checkAnswer(df, (1 to 30).map(_.toString).toDF()) - - val consumerGroups = testUtils.listConsumerGroups() - val validGroups = consumerGroups.valid().get() - val validGroupsId = validGroups.asScala.map(_.groupId()) - validateGroupId(customGroupId, validGroupsId) - } - test("read Kafka transactional messages: read_committed") { val topic = newTopic() testUtils.createTopic(topic) @@ -622,6 +585,16 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession } } +class KafkaRelationSuiteWithAdminV1 extends KafkaRelationSuiteV1 { + override protected def sparkConf: SparkConf = + super.sparkConf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false") +} + +class KafkaRelationSuiteWithAdminV2 extends KafkaRelationSuiteV2 { + override protected def sparkConf: SparkConf = + super.sparkConf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false") +} + class KafkaRelationSuiteV1 extends KafkaRelationSuiteBase { override protected def sparkConf: SparkConf = super diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b2c28ffa984a9..979ddebc637f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1415,6 +1415,17 @@ object SQLConf { .booleanConf .createWithDefault(true) + val USE_DEPRECATED_KAFKA_OFFSET_FETCHING = + buildConf("spark.sql.streaming.kafka.useDeprecatedOffsetFetching") + .internal() + .doc("When true, the deprecated Consumer based offset fetching used which could cause " + + "infinite wait in Spark queries. Such cases query restart is the only workaround. " + + "For further details please see Offset Fetching chapter of Structured Streaming Kafka " + + "Integration Guide.") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED = buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled") .internal() @@ -3065,6 +3076,8 @@ class SQLConf extends Serializable with Logging { def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED) + def useDeprecatedKafkaOffsetFetching: Boolean = getConf(USE_DEPRECATED_KAFKA_OFFSET_FETCHING) + def statefulOperatorCorrectnessCheckEnabled: Boolean = getConf(STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED) From d38883c1d811f57e5b9f07b29730b7ac6a6731ca Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Tue, 1 Dec 2020 11:38:42 +0000 Subject: [PATCH 0624/1009] [SPARK-32405][SQL][FOLLOWUP] Throw Exception if provider is specified in JDBCTableCatalog create table ### What changes were proposed in this pull request? Throw Exception if JDBC Table Catalog has provider in create table. ### Why are the changes needed? JDBC Table Catalog doesn't support provider and we should throw Exception. Previously CREATE TABLE syntax forces people to specify a provider so we have to add a `USING_`. Now the problem was fix and we will throw Exception for provider. ### Does this PR introduce _any_ user-facing change? Yes. We throw Exception if a provider is specified in CREATE TABLE for JDBC Table catalog. ### How was this patch tested? Existing tests (remove `USING _`) Closes #30544 from huaxingao/followup. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../v2/jdbc/JDBCTableCatalog.scala | 3 ++- .../v2/jdbc/JDBCTableCatalogSuite.scala | 27 +++++++++---------- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 21 +++++---------- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala index e96b37e05c762..63f802363f7c0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala @@ -126,8 +126,9 @@ class JDBCTableCatalog extends TableCatalog with Logging { properties.asScala.map { case (k, v) => k match { case "comment" => tableComment = v - // ToDo: have a follow up to fail provider once unify create table syntax PR is merged case "provider" => + throw new AnalysisException("CREATE TABLE ... USING ... is not supported in" + + " JDBC catalog.") case "owner" => // owner is ignored. It is default to current user name. case "location" => throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index 97dd92acc7805..9e9df7db1e1c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -153,21 +153,20 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("create a table") { withTable("h2.test.new_table") { - // TODO (SPARK-32427): Omit USING in CREATE TABLE - sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") + sql("CREATE TABLE h2.test.new_table(i INT, j STRING)") checkAnswer( sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"), Row("test", "new_table"))) } withTable("h2.test.new_table") { - sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") + sql("CREATE TABLE h2.test.new_table(i INT, j STRING)") val msg = intercept[AnalysisException] { - sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _") + sql("CREATE TABLE h2.test.new_table(i INT, j STRING)") }.getMessage assert(msg.contains("Table test.new_table already exists")) } val exp = intercept[NoSuchNamespaceException] { - sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _") + sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING)") } assert(exp.getMessage.contains("Failed table creation: bad_test.new_table")) assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found")) @@ -176,7 +175,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... add column") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (ID INTEGER) USING _") + sql(s"CREATE TABLE $tableName (ID INTEGER)") sql(s"ALTER TABLE $tableName ADD COLUMNS (C1 INTEGER, C2 STRING)") var t = spark.table(tableName) var expectedSchema = new StructType() @@ -206,7 +205,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... rename column") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER) USING _") + sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER)") sql(s"ALTER TABLE $tableName RENAME COLUMN id TO C") val t = spark.table(tableName) val expectedSchema = new StructType() @@ -231,7 +230,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... drop column") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _") + sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER)") sql(s"ALTER TABLE $tableName DROP COLUMN C1") sql(s"ALTER TABLE $tableName DROP COLUMN c3") val t = spark.table(tableName) @@ -255,7 +254,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... update column type") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER) USING _") + sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER)") sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE DOUBLE") sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE DOUBLE") val t = spark.table(tableName) @@ -284,7 +283,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... update column nullability") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _") + sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL)") sql(s"ALTER TABLE $tableName ALTER COLUMN ID DROP NOT NULL") sql(s"ALTER TABLE $tableName ALTER COLUMN deptno DROP NOT NULL") val t = spark.table(tableName) @@ -309,7 +308,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE ... update column comment not supported") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (ID INTEGER) USING _") + sql(s"CREATE TABLE $tableName (ID INTEGER)") val exp = intercept[AnalysisException] { sql(s"ALTER TABLE $tableName ALTER COLUMN ID COMMENT 'test'") } @@ -333,7 +332,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("ALTER TABLE case sensitivity") { val tableName = "h2.test.alt_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER) USING _") + sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER)") var t = spark.table(tableName) var expectedSchema = new StructType().add("c1", IntegerType).add("c2", IntegerType) assert(t.schema === expectedSchema) @@ -400,7 +399,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { withTable("h2.test.new_table") { val logAppender = new LogAppender("table comment") withLogAppender(logAppender) { - sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _ COMMENT 'this is a comment'") + sql("CREATE TABLE h2.test.new_table(i INT, j STRING) COMMENT 'this is a comment'") } val createCommentWarning = logAppender.loggingEvents .filter(_.getLevel == Level.WARN) @@ -413,7 +412,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { test("CREATE TABLE with table property") { withTable("h2.test.new_table") { val m = intercept[AnalysisException] { - sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _" + + sql("CREATE TABLE h2.test.new_table(i INT, j STRING)" + " TBLPROPERTIES('ENGINE'='tableEngineName')") }.cause.get.getMessage assert(m.contains("\"TABLEENGINENAME\" not found")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 3bcacd03b4a0d..e8157e552d754 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -111,7 +111,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession { test("read/write with partition info") { withTable("h2.test.abc") { - sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people") + sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people") val df1 = Seq(("evan", 3), ("cathy", 4), ("alex", 5)).toDF("NAME", "ID") val e = intercept[IllegalArgumentException] { df1.write @@ -148,11 +148,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession { Seq(Row("test", "people"), Row("test", "empty_table"))) } - // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support - // multi-part identifiers test("SQL API: create table as select") { withTable("h2.test.abc") { - sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people") + sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people") checkAnswer(sql("SELECT name, id FROM h2.test.abc"), Seq(Row("fred", 1), Row("mary", 2))) } } @@ -164,15 +162,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession { } } - // TODO (SPARK-32603): ParseException: mismatched input 'AS' expecting {'(', 'USING'} test("SQL API: replace table as select") { withTable("h2.test.abc") { intercept[CannotReplaceMissingTableException] { - sql("REPLACE TABLE h2.test.abc USING _ AS SELECT 1 as col") + sql("REPLACE TABLE h2.test.abc AS SELECT 1 as col") } - sql("CREATE OR REPLACE TABLE h2.test.abc USING _ AS SELECT 1 as col") + sql("CREATE OR REPLACE TABLE h2.test.abc AS SELECT 1 as col") checkAnswer(sql("SELECT col FROM h2.test.abc"), Row(1)) - sql("REPLACE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people") + sql("REPLACE TABLE h2.test.abc AS SELECT * FROM h2.test.people") checkAnswer(sql("SELECT name, id FROM h2.test.abc"), Seq(Row("fred", 1), Row("mary", 2))) } } @@ -189,11 +186,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession { } } - // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support - // multi-part identifiers test("SQL API: insert and overwrite") { withTable("h2.test.abc") { - sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people") + sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people") sql("INSERT INTO h2.test.abc SELECT 'lucy', 3") checkAnswer( @@ -205,11 +200,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession { } } - // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support - // multi-part identifiers test("DataFrameWriterV2: insert and overwrite") { withTable("h2.test.abc") { - sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people") + sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people") // `DataFrameWriterV2` is by-name. sql("SELECT 3 AS ID, 'lucy' AS NAME").writeTo("h2.test.abc").append() From 9273d4250ddd5e011487a5a942c1b4d0f0412f78 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 1 Dec 2020 11:48:30 +0000 Subject: [PATCH 0625/1009] [SPARK-33045][SQL][FOLLOWUP] Support built-in function like_any and fix StackOverflowError issue ### What changes were proposed in this pull request? Spark already support `LIKE ANY` syntax, but it will throw `StackOverflowError` if there are many elements(more than 14378 elements). We should implement built-in function for LIKE ANY to fix this issue. Why the stack overflow can happen in the current approach ? The current approach uses reduceLeft to connect each `Like(e, p)`, this will lead the the call depth of the thread is too large, causing `StackOverflowError` problems. Why the fix in this PR can avoid the error? This PR support built-in function for `LIKE ANY` and avoid this issue. ### Why are the changes needed? 1.Fix the `StackOverflowError` issue. 2.Support built-in function `like_any`. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #30465 from beliefer/SPARK-33045-like_any-bak. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/dsl/package.scala | 4 + .../expressions/regexpExpressions.scala | 98 ++++++++++++++++--- .../sql/catalyst/parser/AstBuilder.scala | 31 +++--- .../apache/spark/sql/internal/SQLConf.scala | 14 --- .../expressions/RegexpExpressionsSuite.scala | 26 +++++ .../parser/ExpressionParserSuite.scala | 12 +-- .../resources/sql-tests/inputs/like-all.sql | 2 - .../resources/sql-tests/inputs/like-any.sql | 2 + 8 files changed, 138 insertions(+), 51 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 89cf97e76d798..2bcbdf6512389 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -107,6 +107,10 @@ package object dsl { LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) def notLikeAll(others: Expression*): Expression = NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) + def likeAny(others: Expression*): Expression = + LikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) + def notLikeAny(others: Expression*): Expression = + NotLikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String])) def contains(other: Expression): Expression = Contains(expr, other) def startsWith(other: Expression): Expression = StartsWith(expr, other) def endsWith(other: Expression): Expression = EndsWith(expr, other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index b4d9921488d5f..0b94fe8b5d47e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -180,14 +180,12 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) } } -/** - * Optimized version of LIKE ALL, when all pattern values are literal. - */ -abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { +abstract class MultiLikeBase + extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { protected def patterns: Seq[UTF8String] - protected def isNotLikeAll: Boolean + protected def isNotSpecified: Boolean override def inputTypes: Seq[DataType] = StringType :: Nil @@ -195,27 +193,39 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w override def nullable: Boolean = true - private lazy val hasNull: Boolean = patterns.contains(null) + protected lazy val hasNull: Boolean = patterns.contains(null) - private lazy val cache = patterns.filterNot(_ == null) + protected lazy val cache = patterns.filterNot(_ == null) .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\'))) - private lazy val matchFunc = if (isNotLikeAll) { + protected lazy val matchFunc = if (isNotSpecified) { (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches() } else { (p: Pattern, inputValue: String) => p.matcher(inputValue).matches() } + protected def matches(exprValue: String): Any + override def eval(input: InternalRow): Any = { val exprValue = child.eval(input) if (exprValue == null) { null } else { - if (cache.forall(matchFunc(_, exprValue.toString))) { - if (hasNull) null else true - } else { - false - } + matches(exprValue.toString) + } + } +} + +/** + * Optimized version of LIKE ALL, when all pattern values are literal. + */ +abstract class LikeAllBase extends MultiLikeBase { + + override def matches(exprValue: String): Any = { + if (cache.forall(matchFunc(_, exprValue))) { + if (hasNull) null else true + } else { + false } } @@ -227,7 +237,7 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w val valueArg = ctx.freshName("valueArg") val patternCache = ctx.addReferenceObj("patternCache", cache.asJava) - val checkNotMatchCode = if (isNotLikeAll) { + val checkNotMatchCode = if (isNotSpecified) { s"$pattern.matcher($valueArg.toString()).matches()" } else { s"!$pattern.matcher($valueArg.toString()).matches()" @@ -255,11 +265,67 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w } case class LikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase { - override def isNotLikeAll: Boolean = false + override def isNotSpecified: Boolean = false } case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase { - override def isNotLikeAll: Boolean = true + override def isNotSpecified: Boolean = true +} + +/** + * Optimized version of LIKE ANY, when all pattern values are literal. + */ +abstract class LikeAnyBase extends MultiLikeBase { + + override def matches(exprValue: String): Any = { + if (cache.exists(matchFunc(_, exprValue))) { + true + } else { + if (hasNull) null else false + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val eval = child.genCode(ctx) + val patternClass = classOf[Pattern].getName + val javaDataType = CodeGenerator.javaType(child.dataType) + val pattern = ctx.freshName("pattern") + val valueArg = ctx.freshName("valueArg") + val patternCache = ctx.addReferenceObj("patternCache", cache.asJava) + + val checkMatchCode = if (isNotSpecified) { + s"!$pattern.matcher($valueArg.toString()).matches()" + } else { + s"$pattern.matcher($valueArg.toString()).matches()" + } + + ev.copy(code = + code""" + |${eval.code} + |boolean ${ev.isNull} = false; + |boolean ${ev.value} = false; + |if (${eval.isNull}) { + | ${ev.isNull} = true; + |} else { + | $javaDataType $valueArg = ${eval.value}; + | for ($patternClass $pattern: $patternCache) { + | if ($checkMatchCode) { + | ${ev.value} = true; + | break; + | } + | } + | if (!${ev.value} && $hasNull) ${ev.isNull} = true; + |} + """.stripMargin) + } +} + +case class LikeAny(child: Expression, patterns: Seq[UTF8String]) extends LikeAnyBase { + override def isNotSpecified: Boolean = false +} + +case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends LikeAnyBase { + override def isNotSpecified: Boolean = true } // scalastyle:off line.contains.tab diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ff8b56f0b724b..3788e1631c3dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1396,14 +1396,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case other => Seq(other) } - def getLikeQuantifierExprs(expressions: java.util.List[ExpressionContext]): Seq[Expression] = { - if (expressions.isEmpty) { - throw new ParseException("Expected something between '(' and ')'.", ctx) - } else { - expressions.asScala.map(expression).map(p => invertIfNotDefined(new Like(e, p))).toSeq - } - } - // Create the predicate. ctx.kind.getType match { case SqlBaseParser.BETWEEN => @@ -1418,12 +1410,24 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case SqlBaseParser.LIKE => Option(ctx.quantifier).map(_.getType) match { case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) => - getLikeQuantifierExprs(ctx.expression).reduceLeft(Or) + validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx) + val expressions = expressionList(ctx.expression) + if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { + // If there are many pattern expressions, will throw StackOverflowError. + // So we use LikeAny or NotLikeAny instead. + val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) + ctx.NOT match { + case null => LikeAny(e, patterns.toSeq) + case _ => NotLikeAny(e, patterns.toSeq) + } + } else { + ctx.expression.asScala.map(expression) + .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(Or) + } case Some(SqlBaseParser.ALL) => validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx) - val expressions = ctx.expression.asScala.map(expression) - if (expressions.size > SQLConf.get.optimizerLikeAllConversionThreshold && - expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { + val expressions = expressionList(ctx.expression) + if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { // If there are many pattern expressions, will throw StackOverflowError. // So we use LikeAll or NotLikeAll instead. val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) @@ -1432,7 +1436,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case _ => NotLikeAll(e, patterns.toSeq) } } else { - getLikeQuantifierExprs(ctx.expression).reduceLeft(And) + ctx.expression.asScala.map(expression) + .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(And) } case _ => val escapeChar = Option(ctx.escapeChar).map(string).map { str => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 979ddebc637f0..a1d6f9f608873 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -216,18 +216,6 @@ object SQLConf { "for using switch statements in InSet must be non-negative and less than or equal to 600") .createWithDefault(400) - val OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD = - buildConf("spark.sql.optimizer.likeAllConversionThreshold") - .internal() - .doc("Configure the maximum size of the pattern sequence in like all. Spark will convert " + - "the logical combination of like to avoid StackOverflowError. 200 is an empirical value " + - "that will not cause StackOverflowError.") - .version("3.1.0") - .intConf - .checkValue(threshold => threshold >= 0, "The maximum size of pattern sequence " + - "in like all must be non-negative") - .createWithDefault(200) - val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level") .internal() .doc("Configures the log level for logging the change from the original plan to the new " + @@ -3048,8 +3036,6 @@ class SQLConf extends Serializable with Logging { def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD) - def optimizerLikeAllConversionThreshold: Int = getConf(OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD) - def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL) def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index cc5ab5dc7b4e0..8d7501d952ecb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -72,6 +72,32 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { .notLikeAll(Literal.create(null, StringType), "%yoo%"), null) } + test("LIKE ANY") { + checkEvaluation(Literal.create(null, StringType).likeAny("%foo%", "%oo"), null) + checkEvaluation(Literal.create("foo", StringType).likeAny("%foo%", "%oo"), true) + checkEvaluation(Literal.create("foo", StringType).likeAny("%foo%", "%bar%"), true) + checkEvaluation(Literal.create("foo", StringType).likeAny("%fee%", "%bar%"), false) + checkEvaluation(Literal.create("foo", StringType) + .likeAny("%foo%", Literal.create(null, StringType)), true) + checkEvaluation(Literal.create("foo", StringType) + .likeAny(Literal.create(null, StringType), "%foo%"), true) + checkEvaluation(Literal.create("foo", StringType) + .likeAny("%feo%", Literal.create(null, StringType)), null) + checkEvaluation(Literal.create("foo", StringType) + .likeAny(Literal.create(null, StringType), "%feo%"), null) + checkEvaluation(Literal.create("foo", StringType).notLikeAny("tee", "%yoo%"), true) + checkEvaluation(Literal.create("foo", StringType).notLikeAny("%oo%", "%yoo%"), true) + checkEvaluation(Literal.create("foo", StringType).notLikeAny("%foo%", "%oo"), false) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAny("%foo%", Literal.create(null, StringType)), null) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAny(Literal.create(null, StringType), "%foo%"), null) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAny("%yoo%", Literal.create(null, StringType)), true) + checkEvaluation(Literal.create("foo", StringType) + .notLikeAny(Literal.create(null, StringType), "%yoo%"), true) + } + test("LIKE Pattern") { // null handling diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index b1d0d044eaead..9f6a76b9228c5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -210,13 +210,13 @@ class ExpressionParserSuite extends AnalysisTest { test("(NOT) LIKE (ANY | SOME | ALL) expressions") { Seq("any", "some").foreach { quantifier => - assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") || ('a like "b%")) - assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%") || !('a like "b%")) - assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like "foo%") || ('a like "b%"))) + assertEqual(s"a like $quantifier ('foo%', 'b%')", 'a likeAny("foo%", "b%")) + assertEqual(s"a not like $quantifier ('foo%', 'b%')", 'a notLikeAny("foo%", "b%")) + assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !('a likeAny("foo%", "b%"))) } - assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like "b%")) - assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a like "b%")) - assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a like "b%"))) + assertEqual("a like all ('foo%', 'b%')", 'a likeAll("foo%", "b%")) + assertEqual("a not like all ('foo%', 'b%')", 'a notLikeAll("foo%", "b%")) + assertEqual("not (a like all ('foo%', 'b%'))", !('a likeAll("foo%", "b%"))) Seq("any", "some", "all").foreach { quantifier => intercept(s"a like $quantifier()", "Expected something between '(' and ')'") diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql index f83277376e680..51b689607e8e3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql @@ -1,6 +1,4 @@ -- test cases for like all ---CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=0 ---CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=200 CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES ('google', '%oo%'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql index 5758a2a494944..a6e9827d58d94 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql @@ -1,3 +1,5 @@ +-- test cases for like any + CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES ('google', '%oo%'), ('facebook', '%oo%'), From cf4ad212b100901b7065f2db8c1688c83423141d Mon Sep 17 00:00:00 2001 From: Prakhar Jain Date: Tue, 1 Dec 2020 21:13:27 +0900 Subject: [PATCH 0626/1009] [SPARK-33503][SQL] Refactor SortOrder class to allow multiple childrens ### What changes were proposed in this pull request? This is a followup of #30302 . As part of this PR, sameOrderExpressions set is made part of children of SortOrder node - so that they don't need any special handling as done in #30302 . ### Why are the changes needed? sameOrderExpressions should get same treatment as child. So making them part of children helps in transforming them easily. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UTs Closes #30430 from prakharjain09/SPARK-33400-sortorder-refactor. Authored-by: Prakhar Jain Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/dsl/package.scala | 4 +-- .../sql/catalyst/expressions/SortOrder.scala | 10 ++++--- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 8 +++--- .../AliasAwareOutputExpression.scala | 6 +---- .../execution/joins/SortMergeJoinExec.scala | 9 ++++--- .../spark/sql/execution/PlannerSuite.scala | 26 +++++++++++++++++++ 8 files changed, 46 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index abd38f2f9d940..6b06cf13262d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1822,7 +1822,7 @@ class Analyzer(override val catalogManager: CatalogManager) val newOrders = orders map { case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering, _) => if (index > 0 && index <= child.output.size) { - SortOrder(child.output(index - 1), direction, nullOrdering, Set.empty) + SortOrder(child.output(index - 1), direction, nullOrdering, Seq.empty) } else { s.failAnalysis( s"ORDER BY position $index is not in select list " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 2bcbdf6512389..5a778d2785a67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -135,9 +135,9 @@ package object dsl { } def asc: SortOrder = SortOrder(expr, Ascending) - def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Set.empty) + def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Seq.empty) def desc: SortOrder = SortOrder(expr, Descending) - def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst, Set.empty) + def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst, Seq.empty) def as(alias: String): NamedExpression = Alias(expr, alias)() def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 54259e713accd..d9923b5d022e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -63,8 +63,10 @@ case class SortOrder( child: Expression, direction: SortDirection, nullOrdering: NullOrdering, - sameOrderExpressions: Set[Expression]) - extends UnaryExpression with Unevaluable { + sameOrderExpressions: Seq[Expression]) + extends Expression with Unevaluable { + + override def children: Seq[Expression] = child +: sameOrderExpressions override def checkInputDataTypes(): TypeCheckResult = { if (RowOrdering.isOrderable(dataType)) { @@ -83,7 +85,7 @@ case class SortOrder( def isAscending: Boolean = direction == Ascending def satisfies(required: SortOrder): Boolean = { - (sameOrderExpressions + child).exists(required.child.semanticEquals) && + children.exists(required.child.semanticEquals) && direction == required.direction && nullOrdering == required.nullOrdering } } @@ -92,7 +94,7 @@ object SortOrder { def apply( child: Expression, direction: SortDirection, - sameOrderExpressions: Set[Expression] = Set.empty): SortOrder = { + sameOrderExpressions: Seq[Expression] = Seq.empty): SortOrder = { new SortOrder(child, direction, direction.defaultNullOrdering, sameOrderExpressions) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3788e1631c3dd..12c5e0de686fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1910,7 +1910,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } else { direction.defaultNullOrdering } - SortOrder(expression(ctx.expression), direction, nullOrdering, Set.empty) + SortOrder(expression(ctx.expression), direction, nullOrdering, Seq.empty) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index b3e403ffa7382..95134d9111593 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1228,7 +1228,7 @@ class Column(val expr: Expression) extends Logging { * @group expr_ops * @since 2.1.0 */ - def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Set.empty) } + def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Seq.empty) } /** * Returns a sort expression based on the descending order of the column, @@ -1244,7 +1244,7 @@ class Column(val expr: Expression) extends Logging { * @group expr_ops * @since 2.1.0 */ - def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Set.empty) } + def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Seq.empty) } /** * Returns a sort expression based on ascending order of the column. @@ -1275,7 +1275,7 @@ class Column(val expr: Expression) extends Logging { * @group expr_ops * @since 2.1.0 */ - def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Set.empty) } + def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Seq.empty) } /** * Returns a sort expression based on ascending order of the column, @@ -1291,7 +1291,7 @@ class Column(val expr: Expression) extends Logging { * @group expr_ops * @since 2.1.0 */ - def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Set.empty) } + def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Seq.empty) } /** * Prints the expression to the console for debugging purposes. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala index 3ba8745be995f..3cbe1654ea2cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala @@ -65,11 +65,7 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression { final override def outputOrdering: Seq[SortOrder] = { if (hasAlias) { - orderingExpressions.map { sortOrder => - val newSortOrder = normalizeExpression(sortOrder).asInstanceOf[SortOrder] - val newSameOrderExpressions = newSortOrder.sameOrderExpressions.map(normalizeExpression) - newSortOrder.copy(sameOrderExpressions = newSameOrderExpressions) - } + orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder]) } else { orderingExpressions } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala index 6e59ad07d7168..eabbdc8ed3243 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala @@ -68,9 +68,9 @@ case class SortMergeJoinExec( val leftKeyOrdering = getKeyOrdering(leftKeys, left.outputOrdering) val rightKeyOrdering = getKeyOrdering(rightKeys, right.outputOrdering) leftKeyOrdering.zip(rightKeyOrdering).map { case (lKey, rKey) => - // Also add the right key and its `sameOrderExpressions` - SortOrder(lKey.child, Ascending, lKey.sameOrderExpressions + rKey.child ++ rKey - .sameOrderExpressions) + // Also add expressions from right side sort order + val sameOrderExpressions = ExpressionSet(lKey.sameOrderExpressions ++ rKey.children) + SortOrder(lKey.child, Ascending, sameOrderExpressions.toSeq) } // For left and right outer joins, the output is ordered by the streamed input's join keys. case LeftOuter => getKeyOrdering(leftKeys, left.outputOrdering) @@ -96,7 +96,8 @@ case class SortMergeJoinExec( val requiredOrdering = requiredOrders(keys) if (SortOrder.orderingSatisfies(childOutputOrdering, requiredOrdering)) { keys.zip(childOutputOrdering).map { case (key, childOrder) => - SortOrder(key, Ascending, childOrder.sameOrderExpressions + childOrder.child - key) + val sameOrderExpressionsSet = ExpressionSet(childOrder.children) - key + SortOrder(key, Ascending, sameOrderExpressionsSet.toSeq) } } else { requiredOrdering diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 6de81cc414d7d..5e30f846307ae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -1090,6 +1090,32 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } } + test("sort order doesn't have repeated expressions") { + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withTempView("t1", "t2") { + spark.range(10).repartition($"id").createTempView("t1") + spark.range(20).repartition($"id").createTempView("t2") + val planned = sql( + """ + | SELECT t12.id, t1.id + | FROM (SELECT t1.id FROM t1, t2 WHERE t1.id * 2 = t2.id) t12, t1 + | where 2 * t12.id = t1.id + """.stripMargin).queryExecution.executedPlan + + // t12 is already sorted on `t1.id * 2`. and we need to sort it on `2 * t12.id` + // for 2nd join. So sorting on t12 can be avoided + val sortNodes = planned.collect { case s: SortExec => s } + assert(sortNodes.size == 3) + val outputOrdering = planned.outputOrdering + assert(outputOrdering.size == 1) + // Sort order should have 3 childrens, not 4. This is because t1.id*2 and 2*t1.id are same + assert(outputOrdering.head.children.size == 3) + assert(outputOrdering.head.children.count(_.isInstanceOf[AttributeReference]) == 2) + assert(outputOrdering.head.children.count(_.isInstanceOf[Multiply]) == 1) + } + } + } + test("aliases to expressions should not be replaced") { withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { withTempView("df1", "df2") { From 478fb7f5280d8da2c68b858114eda358708e681b Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 1 Dec 2020 14:11:01 +0000 Subject: [PATCH 0627/1009] [SPARK-33608][SQL] Handle DELETE/UPDATE/MERGE in PullupCorrelatedPredicates ### What changes were proposed in this pull request? This PR adds logic to handle DELETE/UPDATE/MERGE plans in `PullupCorrelatedPredicates`. ### Why are the changes needed? Right now, `PullupCorrelatedPredicates` applies only to filters and unary nodes. As a result, correlated predicates in DELETE/UPDATE/MERGE are not rewritten. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The PR adds 3 new test cases. Closes #30555 from aokolnychyi/spark-33608. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/subquery.scala | 2 + .../PullupCorrelatedPredicatesSuite.scala | 64 ++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 11532d22204a4..3c2ee3149d317 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -328,6 +328,8 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries. case q: UnaryNode => rewriteSubQueries(q, q.children) + case s: SupportsSubquery => + rewriteSubQueries(s, s.children) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala index 17dfc7f3f18f7..ae9a694b50444 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{Assignment, DeleteAction, DeleteFromTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, UpdateTable} import org.apache.spark.sql.catalyst.rules.RuleExecutor class PullupCorrelatedPredicatesSuite extends PlanTest { @@ -98,4 +98,66 @@ class PullupCorrelatedPredicatesSuite extends PlanTest { val doubleOptimized = Optimize.execute(optimized) comparePlans(optimized, doubleOptimized, false) } + + test("PullupCorrelatedPredicates should handle deletes") { + val subPlan = testRelation2.where('a === 'c).select('c) + val cond = InSubquery(Seq('a), ListQuery(subPlan)) + val deletePlan = DeleteFromTable(testRelation, Some(cond)).analyze + assert(deletePlan.resolved) + + val optimized = Optimize.execute(deletePlan) + assert(optimized.resolved) + + optimized match { + case DeleteFromTable(_, Some(s: InSubquery)) => + val outerRefs = SubExprUtils.getOuterReferences(s.query.plan) + assert(outerRefs.isEmpty, "should be no outer refs") + case other => + fail(s"unexpected logical plan: $other") + } + } + + test("PullupCorrelatedPredicates should handle updates") { + val subPlan = testRelation2.where('a === 'c).select('c) + val cond = InSubquery(Seq('a), ListQuery(subPlan)) + val updatePlan = UpdateTable(testRelation, Seq.empty, Some(cond)).analyze + assert(updatePlan.resolved) + + val optimized = Optimize.execute(updatePlan) + assert(optimized.resolved) + + optimized match { + case UpdateTable(_, _, Some(s: InSubquery)) => + val outerRefs = SubExprUtils.getOuterReferences(s.query.plan) + assert(outerRefs.isEmpty, "should be no outer refs") + case other => + fail(s"unexpected logical plan: $other") + } + } + + test("PullupCorrelatedPredicates should handle merge") { + val testRelation3 = LocalRelation('e.int, 'f.double) + val subPlan = testRelation3.where('a === 'e).select('e) + val cond = InSubquery(Seq('a), ListQuery(subPlan)) + + val mergePlan = MergeIntoTable( + testRelation, + testRelation2, + cond, + Seq(DeleteAction(None)), + Seq(InsertAction(None, Seq(Assignment('a, 'c), Assignment('b, 'd))))) + val analyzedMergePlan = mergePlan.analyze + assert(analyzedMergePlan.resolved) + + val optimized = Optimize.execute(analyzedMergePlan) + assert(optimized.resolved) + + optimized match { + case MergeIntoTable(_, _, s: InSubquery, _, _) => + val outerRefs = SubExprUtils.getOuterReferences(s.query.plan) + assert(outerRefs.isEmpty, "should be no outer refs") + case other => + fail(s"unexpected logical plan: $other") + } + } } From c24f2b2d6afb411fbfffb90fa87150f3b6912343 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 1 Dec 2020 09:27:46 -0800 Subject: [PATCH 0628/1009] [SPARK-33612][SQL] Add dataSourceRewriteRules batch to Optimizer ### What changes were proposed in this pull request? This PR adds a new batch to the optimizer for executing rules that rewrite plans for data sources. ### Why are the changes needed? Right now, we have a special place in the optimizer where we construct v2 scans. As time shows, we need more rewrite rules that would be executed after the operator optimization and before any stats-related rules for v2 tables. Not all rules will be specific to reads. One option is to rename the current batch into something more generic but it would require changing quite some places. That's why it seems better to introduce a new batch and use it for all rewrites. The name is generic so that we don't limit ourselves to v2 data sources only. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The change is trivial and SPARK-23889 will depend on it. Closes #30558 from aokolnychyi/spark-33612. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 9 +++++++++ .../spark/sql/internal/BaseSessionStateBuilder.scala | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 9eee7c2b914a4..b7c8f775b857f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -185,6 +185,9 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil ++ operatorOptimizationBatch) :+ + // This batch rewrites data source plans and should be run after the operator + // optimization batch and before any batches that depend on stats. + Batch("Data Source Rewrite Rules", Once, dataSourceRewriteRules: _*) :+ // This batch pushes filters and projections into scan nodes. Before this batch, the logical // plan may contain nodes that do not report stats. Anything that uses stats must run after // this batch. @@ -289,6 +292,12 @@ abstract class Optimizer(catalogManager: CatalogManager) */ def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil + /** + * Override to provide additional rules for rewriting data source plans. Such rules will be + * applied after operator optimization rules and before any rules that depend on stats. + */ + def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil + /** * Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that * eventually run in the Optimizer. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index a89a5de3b7e72..8101f9e291b44 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -241,6 +241,9 @@ abstract class BaseSessionStateBuilder( override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = super.earlyScanPushDownRules ++ customEarlyScanPushDownRules + override def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = + super.dataSourceRewriteRules ++ customDataSourceRewriteRules + override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules } @@ -264,6 +267,14 @@ abstract class BaseSessionStateBuilder( */ protected def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil + /** + * Custom rules for rewriting data source plans to add to the Optimizer. Prefer overriding + * this instead of creating your own Optimizer. + * + * Note that this may NOT depend on the `optimizer` function. + */ + protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil + /** * Planner that converts optimized logical plans to physical plans. * From 5d0045eedf4b138c031accac2b1fa1e8d6f3f7c6 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 2 Dec 2020 01:36:41 +0800 Subject: [PATCH 0629/1009] [SPARK-33611][UI] Avoid encoding twice on the query parameter of rewritten proxy URL ### What changes were proposed in this pull request? When running Spark behind a reverse proxy(e.g. Nginx, Apache HTTP server), the request URL can be encoded twice if we pass the query string directly to the constructor of `java.net.URI`: ``` > val uri = "http://localhost:8081/test" > val query = "order%5B0%5D%5Bcolumn%5D=0" // query string of URL from the reverse proxy > val rewrittenURI = URI.create(uri.toString()) > new URI(rewrittenURI.getScheme(), rewrittenURI.getAuthority(), rewrittenURI.getPath(), query, rewrittenURI.getFragment()).toString result: http://localhost:8081/test?order%255B0%255D%255Bcolumn%255D=0 ``` In Spark's stage page, the URL of "/taskTable" contains query parameter order[0][dir]. After encoding twice, the query parameter becomes `order%255B0%255D%255Bdir%255D` and it will be decoded as `order%5B0%5D%5Bdir%5D` instead of `order[0][dir]`. As a result, there will be NullPointerException from https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala#L176 Other than that, the other parameter may not work as expected after encoded twice. This PR is to fix the bug by calling the method `URI.create(String URL)` directly. This convenience method can avoid encoding twice on the query parameter. ``` > val uri = "http://localhost:8081/test" > val query = "order%5B0%5D%5Bcolumn%5D=0" > URI.create(s"$uri?$query").toString result: http://localhost:8081/test?order%5B0%5D%5Bcolumn%5D=0 > URI.create(s"$uri?$query").getQuery result: order[0][column]=0 ``` ### Why are the changes needed? Fix a potential bug when Spark's reverse proxy is enabled. The bug itself is similar to https://github.com/apache/spark/pull/29271. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a new unit test. Also, Manual UI testing for master, worker and app UI with an nginx proxy Spark config: ``` spark.ui.port 8080 spark.ui.reverseProxy=true spark.ui.reverseProxyUrl=/path/to/spark/ ``` nginx config: ``` server { listen 9000; set $SPARK_MASTER http://127.0.0.1:8080; # split spark UI path into prefix and local path within master UI location ~ ^(/path/to/spark/) { # strip prefix when forwarding request rewrite /path/to/spark(/.*) $1 break; #rewrite /path/to/spark/ "/" ; # forward to spark master UI proxy_pass $SPARK_MASTER; proxy_intercept_errors on; error_page 301 302 307 = handle_redirects; } location handle_redirects { set $saved_redirect_location '$upstream_http_location'; proxy_pass $saved_redirect_location; } } ``` Closes #30552 from gengliangwang/decodeProxyRedirect. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../scala/org/apache/spark/ui/JettyUtils.scala | 16 ++++++---------- .../test/scala/org/apache/spark/ui/UISuite.scala | 9 +++++++++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 2a3597e323543..663da0d33e20b 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -401,17 +401,13 @@ private[spark] object JettyUtils extends Logging { uri.append(rest) } - val rewrittenURI = URI.create(uri.toString()) - if (query != null) { - return new URI( - rewrittenURI.getScheme(), - rewrittenURI.getAuthority(), - rewrittenURI.getPath(), - query, - rewrittenURI.getFragment() - ).normalize() + val queryString = if (query == null) { + "" + } else { + s"?$query" } - rewrittenURI.normalize() + // SPARK-33611: use method `URI.create` to avoid percent-encoding twice on the query string. + URI.create(uri.toString() + queryString).normalize() } def createProxyLocationHeader( diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index 56026eaa0072b..c7e1dfe71d563 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -216,6 +216,15 @@ class UISuite extends SparkFunSuite { assert(rewrittenURI === null) } + test("SPARK-33611: Avoid encoding twice on the query parameter of proxy rewrittenURI") { + val prefix = "/worker-id" + val target = "http://localhost:8081" + val path = "/worker-id/json" + val rewrittenURI = + JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0") + assert(rewrittenURI.toString === "http://localhost:8081/json?order%5B0%5D%5Bcolumn%5D=0") + } + test("verify rewriting location header for reverse proxy") { val clientRequest = mock(classOf[HttpServletRequest]) var headerValue = "http://localhost:4040/jobs" From 5a1c5ac8073ab46c145146485c71cc6aceb8c5b8 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 1 Dec 2020 10:44:14 -0800 Subject: [PATCH 0630/1009] [SPARK-33622][R][ML] Add array_to_vector to SparkR ### What changes were proposed in this pull request? This PR adds `array_to_vector` to R API. ### Why are the changes needed? Feature parity. ### Does this PR introduce _any_ user-facing change? New function exposed in the public API. ### How was this patch tested? New unit test. Manual verification of the documentation examples. Closes #30561 from zero323/SPARK-33622. Authored-by: zero323 Signed-off-by: Dongjoon Hyun --- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 26 +++++++++++++++++++++++++- R/pkg/R/generics.R | 4 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 3 ++- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 91f6e6dc8a0e6..6ef2df5731e10 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -223,6 +223,7 @@ exportMethods("%<=>%", "array_remove", "array_repeat", "array_sort", + "array_to_vector", "array_transform", "arrays_overlap", "array_union", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 99406443165d5..58d07a8d8fc2f 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -357,7 +357,13 @@ NULL #' @examples #' \dontrun{ #' df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm") -#' head(select(df, vector_to_array(df$features))) +#' head( +#' withColumn( +#' withColumn(df, "array", vector_to_array(df$features)), +#' "vector", +#' array_to_vector(column("array")) +#' ) +#' ) #' } NULL @@ -4609,6 +4615,24 @@ setMethod("timestamp_seconds", column(jc) }) +#' @details +#' \code{array_to_vector} Converts a column of array of numeric type into +#' a column of dense vectors in MLlib +#' +#' @rdname column_ml_functions +#' @aliases array_to_vector array_to_vector,Column-method +#' @note array_to_vector since 3.1.0 +setMethod("array_to_vector", + signature(x = "Column"), + function(x) { + jc <- callJStatic( + "org.apache.spark.ml.functions", + "array_to_vector", + x@jc + ) + column(jc) + }) + #' @details #' \code{vector_to_array} Converts a column of MLlib sparse/dense vectors into #' a column of dense arrays. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 1fe6599bf1b97..fb830aa686f72 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -838,6 +838,10 @@ setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat") #' @name NULL setGeneric("array_sort", function(x) { standardGeneric("array_sort") }) +#' @rdname column_ml_functions +#' @name NULL +setGeneric("array_to_vector", function(x) { standardGeneric("array_to_vector") }) + #' @rdname column_collection_functions #' @name NULL setGeneric("array_transform", function(x, f) { standardGeneric("array_transform") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 833f77786c80b..c623f534f706c 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1425,7 +1425,8 @@ test_that("column functions", { c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) + overlay(c1, c2, 3, 4) c26 <- timestamp_seconds(c1) + vector_to_array(c) + - vector_to_array(c, "float32") + vector_to_array(c, "float64") + vector_to_array(c, "float32") + vector_to_array(c, "float64") + + array_to_vector(c) c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) + nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE) c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) + From f71f34572d5510e50953ccd0191c833962b63a32 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Wed, 2 Dec 2020 09:50:02 +0900 Subject: [PATCH 0631/1009] [SPARK-33544][SQL] Optimize size of CreateArray/CreateMap to be the size of its children ### What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-32295 added in an optimization to insert a filter for not null and size > 0 when using inner explode/inline. This is fine in most cases but the extra filter is not needed if the explode is with a create array and not using Literals (it already handles LIterals). When this happens you know that the values aren't null and it has a size. It already handles the empty array. The not null check is already optimized out because Createarray and createMap are not nullable, that leaves the size > 0 check. To handle that this PR makes it so that the size > 0 check gets optimized in ConstantFolding to be the size of the children in the array or map. That makes it a literal and then makes it ultimately be optimized out. ### Why are the changes needed? remove unneeded filter ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Unit tests added and manually tested various cases Closes #30504 from tgravescs/SPARK-33544. Lead-authored-by: Thomas Graves Co-authored-by: Thomas Graves Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- .../expressions/complexTypeCreator.scala | 12 ++++-- .../sql/catalyst/optimizer/expressions.scala | 13 ++++++ .../optimizer/ConstantFoldingSuite.scala | 36 ++++++++++++++++ .../InferFiltersFromGenerateSuite.scala | 41 ++++++++++++++++++- 4 files changed, 98 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 3958cfd0af2a3..f0f92e2d935f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -30,6 +30,12 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String +/** + * Trait to indicate the expression doesn't have any side effects. This can be used + * to indicate its ok to optimize it out under certain circumstances. + */ +trait NoSideEffect + /** * Returns an Array containing the evaluation of all children expressions. */ @@ -42,7 +48,7 @@ import org.apache.spark.unsafe.types.UTF8String """, since = "1.1.0") case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) - extends Expression { + extends Expression with NoSideEffect { def this(children: Seq[Expression]) = { this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) @@ -160,7 +166,7 @@ private [sql] object GenArrayData { """, since = "2.0.0") case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) - extends Expression { + extends Expression with NoSideEffect{ def this(children: Seq[Expression]) = { this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) @@ -379,7 +385,7 @@ object CreateStruct { """, since = "1.5.0") // scalastyle:on line.size.limit -case class CreateNamedStruct(children: Seq[Expression]) extends Expression { +case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoSideEffect { lazy val (nameExprs, valExprs) = children.grouped(2).map { case Seq(name, value) => (name, value) }.toList.unzip diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index d1eb3b07d3d5f..4725f49340451 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -41,6 +41,14 @@ import org.apache.spark.sql.types._ * equivalent [[Literal]] values. */ object ConstantFolding extends Rule[LogicalPlan] { + + private def hasNoSideEffect(e: Expression): Boolean = e match { + case _: Attribute => true + case _: Literal => true + case _: NoSideEffect => e.children.forall(hasNoSideEffect) + case _ => false + } + def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { // Skip redundant folding of literals. This rule is technically not necessary. Placing this @@ -48,6 +56,11 @@ object ConstantFolding extends Rule[LogicalPlan] { // object and running eval unnecessarily. case l: Literal => l + case Size(c: CreateArray, _) if c.children.forall(hasNoSideEffect) => + Literal(c.children.length) + case Size(c: CreateMap, _) if c.children.forall(hasNoSideEffect) => + Literal(c.children.length / 2) + // Fold expressions that are foldable. case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 23ab6b2df3e64..fd9b58a7a06aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -263,4 +263,40 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-33544: Constant folding test with sideaffects") { + val originalQuery = + testRelation + .select('a) + .where(Size(CreateArray(Seq(AssertTrue(false)))) > 0) + + val optimized = Optimize.execute(originalQuery.analyze) + comparePlans(optimized, originalQuery.analyze) + } + + object OptimizeForCreate extends RuleExecutor[LogicalPlan] { + val batches = + Batch("AnalysisNodes", Once, + EliminateSubqueryAliases) :: + Batch("ConstantFolding", FixedPoint(4), + OptimizeIn, + ConstantFolding, + PruneFilters) :: Nil + } + + test("SPARK-33544: Constant folding test CreateArray") { + val originalQuery = + testRelation + .select('a) + .where(Size(CreateArray(Seq('a))) > 0) + + val optimized = OptimizeForCreate.execute(originalQuery.analyze) + + val correctAnswer = + testRelation + .select('a) + .analyze + + comparePlans(optimized, correctAnswer) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala index 3f83971aa9821..c6fa1bd6e415c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer +import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ @@ -33,7 +34,7 @@ class InferFiltersFromGenerateSuite extends PlanTest { val testRelation = LocalRelation('a.array(StructType(Seq( StructField("x", IntegerType), StructField("y", IntegerType) - )))) + ))), 'c1.string, 'c2.string) Seq(Explode(_), PosExplode(_), Inline(_)).foreach { f => val generator = f('a) @@ -72,4 +73,42 @@ class InferFiltersFromGenerateSuite extends PlanTest { comparePlans(optimized, originalQuery) } } + + // setup rules to test inferFilters with ConstantFolding to make sure + // the Filter rule added in inferFilters is removed again when doing + // explode with CreateArray/CreateMap + object OptimizeInferAndConstantFold extends RuleExecutor[LogicalPlan] { + val batches = + Batch("AnalysisNodes", Once, + EliminateSubqueryAliases) :: + Batch("Infer Filters", Once, InferFiltersFromGenerate) :: + Batch("ConstantFolding after", FixedPoint(4), + ConstantFolding, + NullPropagation, + PruneFilters) :: Nil + } + + Seq(Explode(_), PosExplode(_)).foreach { f => + val createArrayExplode = f(CreateArray(Seq('c1))) + test("Don't infer filters from CreateArray " + createArrayExplode) { + val originalQuery = testRelation.generate(createArrayExplode).analyze + val optimized = OptimizeInferAndConstantFold.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + val createMapExplode = f(CreateMap(Seq('c1, 'c2))) + test("Don't infer filters from CreateMap " + createMapExplode) { + val originalQuery = testRelation.generate(createMapExplode).analyze + val optimized = OptimizeInferAndConstantFold.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + } + + Seq(Inline(_)).foreach { f => + val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq('c1))))) + test("Don't infer filters from CreateArray " + createArrayStructExplode) { + val originalQuery = testRelation.generate(createArrayStructExplode).analyze + val optimized = OptimizeInferAndConstantFold.execute(originalQuery) + comparePlans(optimized, originalQuery) + } + } } From 51ebcd95a5f7e377245f302a91e90f9b3db9953e Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Wed, 2 Dec 2020 10:17:00 +0900 Subject: [PATCH 0632/1009] [SPARK-32863][SS] Full outer stream-stream join ### What changes were proposed in this pull request? This PR is to add full outer stream-stream join, and the implementation of full outer join is: * For left side input row, check if there's a match on right side state store. * if there's a match, output the joined row, o.w. output nothing. Put the row in left side state store. * For right side input row, check if there's a match on left side state store. * if there's a match, output the joined row, o.w. output nothing. Put the row in right side state store. * State store eviction: evict rows from left/right side state store below watermark, and output rows never matched before (a combination of left outer and right outer join). ### Why are the changes needed? Enable more use cases for spark stream-stream join. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests in `UnsupportedOperationChecker.scala` and `StreamingJoinSuite.scala`. Closes #30395 from c21/stream-foj. Authored-by: Cheng Su Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../UnsupportedOperationChecker.scala | 71 +++--- .../analysis/UnsupportedOperationsSuite.scala | 16 +- .../StreamingSymmetricHashJoinExec.scala | 57 +++-- .../sql/streaming/StreamingJoinSuite.scala | 209 +++++++++++++++++- 4 files changed, 297 insertions(+), 56 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 7dcc6a81b48cd..ab7d90098bfd3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -287,7 +287,7 @@ object UnsupportedOperationChecker extends Logging { throwError("dropDuplicates is not supported after aggregation on a " + "streaming DataFrame/Dataset") - case Join(left, right, joinType, condition, _) => + case j @ Join(left, right, joinType, condition, _) => if (left.isStreaming && right.isStreaming && outputMode != InternalOutputModes.Append) { throwError("Join between two streaming DataFrames/Datasets is not supported" + s" in ${outputMode} output mode, only in Append output mode") @@ -298,8 +298,14 @@ object UnsupportedOperationChecker extends Logging { // no further validations needed case FullOuter => - if (left.isStreaming || right.isStreaming) { - throwError("Full outer joins with streaming DataFrames/Datasets are not supported") + if (left.isStreaming && !right.isStreaming) { + throwError("FullOuter joins with streaming DataFrames/Datasets on the left " + + "and a static DataFrame/Dataset on the right is not supported") + } else if (!left.isStreaming && right.isStreaming) { + throwError("FullOuter joins with streaming DataFrames/Datasets on the right " + + "and a static DataFrame/Dataset on the left is not supported") + } else if (left.isStreaming && right.isStreaming) { + checkForStreamStreamJoinWatermark(j) } case LeftAnti => @@ -315,40 +321,17 @@ object UnsupportedOperationChecker extends Logging { throwError(s"$joinType join with a streaming DataFrame/Dataset " + "on the right and a static DataFrame/Dataset on the left is not supported") } else if (left.isStreaming && right.isStreaming) { - val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan) - - val hasValidWatermarkRange = - StreamingJoinHelper.getStateValueWatermark( - left.outputSet, right.outputSet, condition, Some(1000000)).isDefined - - if (!watermarkInJoinKeys && !hasValidWatermarkRange) { - throwError( - s"Stream-stream $joinType join between two streaming DataFrame/Datasets " + - "is not supported without a watermark in the join keys, or a watermark on " + - "the nullable side and an appropriate range condition") - } + checkForStreamStreamJoinWatermark(j) } // We support streaming right outer joins with static on the left always, and with // stream on both sides under the appropriate conditions. case RightOuter => if (left.isStreaming && !right.isStreaming) { - throwError("Right outer join with a streaming DataFrame/Dataset on the left and " + + throwError("RightOuter join with a streaming DataFrame/Dataset on the left and " + "a static DataFrame/DataSet on the right not supported") } else if (left.isStreaming && right.isStreaming) { - val isWatermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan) - - // Check if the nullable side has a watermark, and there's a range condition which - // implies a state value watermark on the first side. - val hasValidWatermarkRange = - StreamingJoinHelper.getStateValueWatermark( - right.outputSet, left.outputSet, condition, Some(1000000)).isDefined - - if (!isWatermarkInJoinKeys && !hasValidWatermarkRange) { - throwError("Stream-stream outer join between two streaming DataFrame/Datasets " + - "is not supported without a watermark in the join keys, or a watermark on " + - "the nullable side and an appropriate range condition") - } + checkForStreamStreamJoinWatermark(j) } case NaturalJoin(_) | UsingJoin(_, _) => @@ -438,4 +421,34 @@ object UnsupportedOperationChecker extends Logging { throw new AnalysisException( msg, operator.origin.line, operator.origin.startPosition, Some(operator)) } + + private def checkForStreamStreamJoinWatermark(join: Join): Unit = { + val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(join) + + // Check if the nullable side has a watermark, and there's a range condition which + // implies a state value watermark on the first side. + val hasValidWatermarkRange = join.joinType match { + case LeftOuter | LeftSemi => StreamingJoinHelper.getStateValueWatermark( + join.left.outputSet, join.right.outputSet, join.condition, Some(1000000)).isDefined + case RightOuter => StreamingJoinHelper.getStateValueWatermark( + join.right.outputSet, join.left.outputSet, join.condition, Some(1000000)).isDefined + case FullOuter => + Seq((join.left.outputSet, join.right.outputSet), + (join.right.outputSet, join.left.outputSet)).exists { + case (attributesToFindStateWatermarkFor, attributesWithEventWatermark) => + StreamingJoinHelper.getStateValueWatermark(attributesToFindStateWatermarkFor, + attributesWithEventWatermark, join.condition, Some(1000000)).isDefined + } + case _ => + throwError( + s"Join type ${join.joinType} is not supported with streaming DataFrame/Dataset")(join) + } + + if (!watermarkInJoinKeys && !hasValidWatermarkRange) { + throwError( + s"Stream-stream ${join.joinType} join between two streaming DataFrame/Datasets " + + "is not supported without a watermark in the join keys, or a watermark on " + + "the nullable side and an appropriate range condition")(join) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index 3be417de472c6..cdc3f4275414c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -408,13 +408,15 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper { streamStreamSupported = false, expectedMsg = "is not supported in Update output mode") - // Full outer joins: only batch-batch is allowed + // Full outer joins: stream-batch/batch-stream join are not allowed, + // and stream-stream join is allowed 'conditionally' - see below check testBinaryOperationInStreamingPlan( - "full outer join", + "FullOuter join", _.join(_, joinType = FullOuter), streamStreamSupported = false, batchStreamSupported = false, - streamBatchSupported = false) + streamBatchSupported = false, + expectedMsg = "FullOuter join") // Left outer, left semi, left anti join: *-stream not allowed Seq((LeftOuter, "LeftOuter join"), (LeftSemi, "LeftSemi join"), (LeftAnti, "LeftAnti join")) @@ -429,14 +431,14 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper { // Right outer joins: stream-* not allowed testBinaryOperationInStreamingPlan( - "right outer join", + "RightOuter join", _.join(_, joinType = RightOuter), streamBatchSupported = false, streamStreamSupported = false, - expectedMsg = "outer join") + expectedMsg = "RightOuter join") - // Left outer, right outer, left semi joins - Seq(LeftOuter, RightOuter, LeftSemi).foreach { joinType => + // Left outer, right outer, full outer, left semi joins + Seq(LeftOuter, RightOuter, FullOuter, LeftSemi).foreach { joinType => // Update mode not allowed assertNotSupportedInStreamingPlan( s"$joinType join with stream-stream relations and update mode", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala index 8b69205530769..73d2f826f1126 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala @@ -165,8 +165,14 @@ case class StreamingSymmetricHashJoinExec( throw new IllegalArgumentException(errorMessageForJoinType) } + private def throwBadStateFormatVersionException(): Nothing = { + throw new IllegalStateException("Unexpected state format version! " + + s"version $stateFormatVersion") + } + require( - joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == LeftSemi, + joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == FullOuter || + joinType == LeftSemi, errorMessageForJoinType) require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType)) @@ -186,6 +192,7 @@ case class StreamingSymmetricHashJoinExec( case _: InnerLike => left.output ++ right.output case LeftOuter => left.output ++ right.output.map(_.withNullability(true)) case RightOuter => left.output.map(_.withNullability(true)) ++ right.output + case FullOuter => (left.output ++ right.output).map(_.withNullability(true)) case LeftSemi => left.output case _ => throwBadJoinTypeException() } @@ -195,6 +202,7 @@ case class StreamingSymmetricHashJoinExec( PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning)) case LeftOuter => left.outputPartitioning case RightOuter => right.outputPartitioning + case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions) case LeftSemi => left.outputPartitioning case _ => throwBadJoinTypeException() } @@ -250,14 +258,14 @@ case class StreamingSymmetricHashJoinExec( // Join one side input using the other side's buffered/state rows. Here is how it is done. // // - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` - // - Inner, Left Outer, Right Outer Join: generates all rows from matching new left input - // with stored right input, and also stores all the left input. + // - Inner, Left Outer, Right Outer, Full Outer Join: generates all rows from matching + // new left input with stored right input, and also stores all the left input. // - Left Semi Join: generates all new left input rows from matching new left input with // stored right input, and also stores all the non-matched left input. // // - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` - // - Inner, Left Outer, Right Outer Join: generates all rows from matching new right input - // with stored left input, and also stores all the right input. + // - Inner, Left Outer, Right Outer, Full Outer Join: generates all rows from matching + // new right input with stored left input, and also stores all the right input. // It also generates all rows from matching new left input with new right input, since // the new left input has become stored by that point. This tiny asymmetry is necessary // to avoid duplication. @@ -314,9 +322,7 @@ case class StreamingSymmetricHashJoinExec( stateFormatVersion match { case 1 => matchesWithRightSideState(new UnsafeRowPair(kv.key, kv.value)) case 2 => kv.matched - case _ => - throw new IllegalStateException("Unexpected state format version! " + - s"version $stateFormatVersion") + case _ => throwBadStateFormatVersionException() } }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) @@ -333,13 +339,23 @@ case class StreamingSymmetricHashJoinExec( stateFormatVersion match { case 1 => matchesWithLeftSideState(new UnsafeRowPair(kv.key, kv.value)) case 2 => kv.matched - case _ => - throw new IllegalStateException("Unexpected state format version! " + - s"version $stateFormatVersion") + case _ => throwBadStateFormatVersionException() } }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) hashJoinOutputIter ++ outerOutputIter + case FullOuter => + lazy val isKeyToValuePairMatched = (kv: KeyToValuePair) => + stateFormatVersion match { + case 2 => kv.matched + case _ => throwBadStateFormatVersionException() + } + val leftSideOutputIter = leftSideJoiner.removeOldState().filterNot( + isKeyToValuePairMatched).map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) + val rightSideOutputIter = rightSideJoiner.removeOldState().filterNot( + isKeyToValuePairMatched).map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) + + hashJoinOutputIter ++ leftSideOutputIter ++ rightSideOutputIter case _ => throwBadJoinTypeException() } @@ -372,16 +388,21 @@ case class StreamingSymmetricHashJoinExec( // For inner and left semi joins, we have to remove unnecessary state rows from both sides // if possible. // - // For outer joins, we have already removed unnecessary state rows from the outer side - // (e.g., left side for left outer join) while generating the outer "null" outputs. Now, we - // have to remove unnecessary state rows from the other side (e.g., right side for the left - // outer join) if possible. In all cases, nothing needs to be outputted, hence the removal - // needs to be done greedily by immediately consuming the returned iterator. + // For left outer and right outer joins, we have already removed unnecessary state rows from + // the outer side (e.g., left side for left outer join) while generating the outer "null" + // outputs. Now, we have to remove unnecessary state rows from the other side (e.g., right + // side for the left outer join) if possible. In all cases, nothing needs to be outputted, + // hence the removal needs to be done greedily by immediately consuming the returned + // iterator. + // + // For full outer joins, we have already removed unnecessary states from both sides, so + // nothing needs to be outputted here. val cleanupIter = joinType match { case Inner | LeftSemi => leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState() case LeftOuter => rightSideJoiner.removeOldState() case RightOuter => leftSideJoiner.removeOldState() + case FullOuter => Iterator.empty case _ => throwBadJoinTypeException() } while (cleanupIter.hasNext) { @@ -491,9 +512,9 @@ case class StreamingSymmetricHashJoinExec( } val generateFilteredJoinedRow: InternalRow => Iterator[InternalRow] = joinSide match { - case LeftSide if joinType == LeftOuter => + case LeftSide if joinType == LeftOuter || joinType == FullOuter => (row: InternalRow) => Iterator(generateJoinedRow(row, nullRight)) - case RightSide if joinType == RightOuter => + case RightSide if joinType == RightOuter || joinType == FullOuter => (row: InternalRow) => Iterator(generateJoinedRow(row, nullLeft)) case _ => (_: InternalRow) => Iterator.empty } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index a25616af360b1..476abcbf5c241 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -99,7 +99,8 @@ abstract class StreamingJoinSuite } else if (joinType == "right_outer") { joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) } else { - joined + joined.select(left("key"), left("window.end").cast("long"), 'leftValue, + right("key"), right("window.end").cast("long"), 'rightValue) } (leftInput, rightInput, select) @@ -128,7 +129,8 @@ abstract class StreamingJoinSuite } else if (joinType == "right_outer") { joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue) } else { - joined + joined.select(left("key"), left("window.end").cast("long"), 'leftValue, + right("key"), right("window.end").cast("long"), 'rightValue) } (leftInput, rightInput, select) @@ -1070,6 +1072,209 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { } } +class StreamingFullOuterJoinSuite extends StreamingJoinSuite { + + test("windowed full outer join") { + val (leftInput, rightInput, joined) = setupWindowedJoin("full_outer") + + testStream(joined)( + MultiAddData(leftInput, 1, 2, 3, 4, 5)(rightInput, 3, 4, 5, 6, 7), + CheckNewAnswer(Row(3, 10, 6, 9), Row(4, 10, 8, 12), Row(5, 10, 10, 15)), + // states + // left: 1, 2, 3, 4 ,5 + // right: 3, 4, 5, 6, 7 + assertNumStateRows(total = 10, updated = 10), + MultiAddData(leftInput, 21)(rightInput, 22), + // Watermark = 11, should remove rows having window=[0,10]. + CheckNewAnswer(Row(1, 10, 2, null), Row(2, 10, 4, null), Row(6, 10, null, 18), + Row(7, 10, null, 21)), + // states + // left: 21 + // right: 22 + // + // states evicted + // left: 1, 2, 3, 4 ,5 (below watermark) + // right: 3, 4, 5, 6, 7 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(leftInput, 22), + CheckNewAnswer(Row(22, 30, 44, 66)), + // states + // left: 21, 22 + // right: 22 + assertNumStateRows(total = 3, updated = 1), + StopStream, + StartStream(), + + AddData(leftInput, 1), + // Row not add as 1 < state key watermark = 12. + CheckNewAnswer(), + // states + // left: 21, 22 + // right: 22 + assertNumStateRows(total = 3, updated = 0, droppedByWatermark = 1), + AddData(rightInput, 5), + // Row not add as 5 < state key watermark = 12. + CheckNewAnswer(), + // states + // left: 21, 22 + // right: 22 + assertNumStateRows(total = 3, updated = 0, droppedByWatermark = 1) + ) + } + + test("full outer early state exclusion on left") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("full_outer") + + testStream(joined)( + MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5), + // The left rows with leftValue <= 4 should generate their outer join rows now and + // not get added to the state. + CheckNewAnswer(Row(1, 10, 2, null, null, null), Row(2, 10, 4, null, null, null), + Row(3, 10, 6, 3, 10, "9")), + // states + // left: 3 + // right: 3, 4, 5 + assertNumStateRows(total = 4, updated = 4), + // Generate outer join result for all non-matched rows when the watermark advances. + MultiAddData(leftInput, 20)(rightInput, 21), + CheckNewAnswer(Row(null, null, null, 4, 10, "12"), Row(null, null, null, 5, 10, "15")), + // states + // left: 20 + // right: 21 + // + // states evicted + // left: 3 (below watermark) + // right: 3, 4, 5 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, 20), + CheckNewAnswer(Row(20, 30, 40, 20, 30, "60")), + // states + // left: 20 + // right: 21, 20 + assertNumStateRows(total = 3, updated = 1) + ) + } + + test("full outer early state exclusion on right") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("full_outer") + + testStream(joined)( + MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3), + // The right rows with rightValue <= 7 should generate their outer join rows now, + // and never be added to the state. + // The right row with rightValue = 9 > 7, hence joined and added to state. + CheckNewAnswer(Row(null, null, null, 1, 10, "3"), Row(null, null, null, 2, 10, "6"), + Row(3, 10, 6, 3, 10, "9")), + // states + // left: 3, 4, 5 + // right: 3 + assertNumStateRows(total = 4, updated = 4), + // Generate outer join result for all non-matched rows when the watermark advances. + MultiAddData(leftInput, 20)(rightInput, 21), + CheckNewAnswer(Row(4, 10, 8, null, null, null), Row(5, 10, 10, null, null, null)), + // states + // left: 20 + // right: 21 + // + // states evicted + // left: 3, 4, 5 (below watermark) + // right: 3 (below watermark) + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, 20), + CheckNewAnswer(Row(20, 30, 40, 20, 30, "60")), + // states + // left: 20 + // right: 21, 20 + assertNumStateRows(total = 3, updated = 1) + ) + } + + test("full outer join with watermark range condition") { + val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("full_outer") + + testStream(joined)( + AddData(leftInput, (1, 5), (3, 5)), + CheckNewAnswer(), + // states + // left: (1, 5), (3, 5) + // right: nothing + assertNumStateRows(total = 2, updated = 2), + AddData(rightInput, (1, 10), (2, 5)), + // Match left row in the state. + CheckNewAnswer(Row(1, 1, 5, 10)), + // states + // left: (1, 5), (3, 5) + // right: (1, 10), (2, 5) + assertNumStateRows(total = 4, updated = 2), + AddData(rightInput, (1, 9)), + // Match left row in the state. + CheckNewAnswer(Row(1, 1, 5, 9)), + // states + // left: (1, 5), (3, 5) + // right: (1, 10), (2, 5), (1, 9) + assertNumStateRows(total = 5, updated = 1), + // Increase event time watermark to 20s by adding data with time = 30s on both inputs. + AddData(leftInput, (1, 7), (1, 30)), + CheckNewAnswer(Row(1, 1, 7, 9), Row(1, 1, 7, 10)), + // states + // left: (1, 5), (3, 5), (1, 7), (1, 30) + // right: (1, 10), (2, 5), (1, 9) + assertNumStateRows(total = 7, updated = 2), + // Watermark = 30 - 10 = 20, no matched row. + // Generate outer join result for all non-matched rows when the watermark advances. + AddData(rightInput, (0, 30)), + CheckNewAnswer(Row(3, null, 5, null), Row(null, 2, null, 5)), + // states + // left: (1, 30) + // right: (0, 30) + // + // states evicted + // left: (1, 5), (3, 5), (1, 5) (below watermark = 20) + // right: (1, 10), (2, 5), (1, 9) (below watermark = 20) + assertNumStateRows(total = 2, updated = 1) + ) + } + + test("self full outer join") { + val (inputStream, query) = setupWindowedSelfJoin("full_outer") + + testStream(query)( + AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), + CheckNewAnswer(Row(2, 2L, 2, 2L), Row(4, 4L, 4, 4L)), + // batch 1 - global watermark = 0 + // states + // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L) + // right: (2, 2L), (4, 4L) + assertNumStateRows(total = 7, updated = 7), + AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)), + CheckNewAnswer(Row(6, 6L, 6, 6L), Row(8, 8L, 8, 8L), Row(10, 10L, 10, 10L)), + // batch 2 - global watermark = 5 + // states + // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L), + // (9, 9L), (10, 10L) + // right: (6, 6L), (8, 8L), (10, 10L) + // + // states evicted + // left: nothing (it waits for 5 seconds more than watermark due to join condition) + // right: (2, 2L), (4, 4L) + assertNumStateRows(total = 13, updated = 8), + AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)), + CheckNewAnswer(Row(12, 12L, 12, 12L), Row(14, 14L, 14, 14L), Row(1, 1L, null, null), + Row(3, 3L, null, null)), + // batch 3 - global watermark = 9 + // states + // left: (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L), (11, 11L), + // (12, 12L), (13, 13L), (14, 14L), (15, 15L) + // right: (10, 10L), (12, 12L), (14, 14L) + // + // states evicted + // left: (1, 1L), (2, 2L), (3, 3L) + // right: (6, 6L), (8, 8L) + assertNumStateRows(total = 15, updated = 7) + ) + } +} + class StreamingLeftSemiJoinSuite extends StreamingJoinSuite { import testImplicits._ From a4788ee8c61e1373e6eded41bb57d84c68149968 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Wed, 2 Dec 2020 15:28:16 +0900 Subject: [PATCH 0633/1009] [MINOR][SS] Rename auxiliary protected methods in StreamingJoinSuite ### What changes were proposed in this pull request? Per request from https://github.com/apache/spark/pull/30395#issuecomment-735028698, here we remove `Windowed` from methods names `setupWindowedJoinWithRangeCondition` and `setupWindowedSelfJoin` as they don't join on time window. ### Why are the changes needed? There's no such official name for `windowed join`, so this is to help avoid confusion for future developers. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #30563 from c21/stream-minor. Authored-by: Cheng Su Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../spark/sql/streaming/StreamingJoinSuite.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index 476abcbf5c241..d264886c8cf46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -136,7 +136,7 @@ abstract class StreamingJoinSuite (leftInput, rightInput, select) } - protected def setupWindowedJoinWithRangeCondition(joinType: String) + protected def setupJoinWithRangeCondition(joinType: String) : (MemoryStream[(Int, Int)], MemoryStream[(Int, Int)], DataFrame) = { val leftInput = MemoryStream[(Int, Int)] @@ -167,7 +167,7 @@ abstract class StreamingJoinSuite (leftInput, rightInput, select) } - protected def setupWindowedSelfJoin(joinType: String) + protected def setupSelfJoin(joinType: String) : (MemoryStream[(Int, Long)], DataFrame) = { val inputStream = MemoryStream[(Int, Long)] @@ -750,7 +750,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { ("right_outer", Row(null, 2, null, 5)) ).foreach { case (joinType: String, outerResult) => test(s"${joinType.replaceAllLiterally("_", " ")} with watermark range condition") { - val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition(joinType) + val (leftInput, rightInput, joined) = setupJoinWithRangeCondition(joinType) testStream(joined)( AddData(leftInput, (1, 5), (3, 5)), @@ -830,7 +830,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { } test("SPARK-26187 self left outer join should not return outer nulls for already matched rows") { - val (inputStream, query) = setupWindowedSelfJoin("left_outer") + val (inputStream, query) = setupSelfJoin("left_outer") testStream(query)( AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), @@ -1190,7 +1190,7 @@ class StreamingFullOuterJoinSuite extends StreamingJoinSuite { } test("full outer join with watermark range condition") { - val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("full_outer") + val (leftInput, rightInput, joined) = setupJoinWithRangeCondition("full_outer") testStream(joined)( AddData(leftInput, (1, 5), (3, 5)), @@ -1236,7 +1236,7 @@ class StreamingFullOuterJoinSuite extends StreamingJoinSuite { } test("self full outer join") { - val (inputStream, query) = setupWindowedSelfJoin("full_outer") + val (inputStream, query) = setupSelfJoin("full_outer") testStream(query)( AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), @@ -1394,7 +1394,7 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite { } test("left semi join with watermark range condition") { - val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("left_semi") + val (leftInput, rightInput, joined) = setupJoinWithRangeCondition("left_semi") testStream(joined)( AddData(leftInput, (1, 5), (3, 5)), @@ -1439,7 +1439,7 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite { } test("self left semi join") { - val (inputStream, query) = setupWindowedSelfJoin("left_semi") + val (inputStream, query) = setupSelfJoin("left_semi") testStream(query)( AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)), From 290aa021796139e503454d315e5cd350f836ab42 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 2 Dec 2020 18:23:48 +0900 Subject: [PATCH 0634/1009] [SPARK-33618][CORE] Use hadoop-client instead of hadoop-client-api to make hadoop-aws work ### What changes were proposed in this pull request? This reverts commit SPARK-33212 (cb3fa6c9368e64184a5f7b19688181d11de9511c) mostly with three exceptions: 1. `SparkSubmitUtils` was updated recently by SPARK-33580 2. `resource-managers/yarn/pom.xml` was updated recently by SPARK-33104 to add `hadoop-yarn-server-resourcemanager` test dependency. 3. Adjust `com.fasterxml.jackson.module:jackson-module-jaxb-annotations` dependency in K8s module which is updated recently by SPARK-33471. ### Why are the changes needed? According to [HADOOP-16080](https://issues.apache.org/jira/browse/HADOOP-16080) since Apache Hadoop 3.1.1, `hadoop-aws` doesn't work with `hadoop-client-api`. It fails at write operation like the following. **1. Spark distribution with `-Phadoop-cloud`** ```scala $ bin/spark-shell --conf spark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID --conf spark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY 20/11/30 23:01:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). Spark context available as 'sc' (master = local[*], app id = local-1606806088715). Spark session available as 'spark'. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 3.1.0-SNAPSHOT /_/ Using Scala version 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_272) Type in expressions to have them evaluated. Type :help for more information. scala> spark.read.parquet("s3a://dongjoon/users.parquet").show 20/11/30 23:01:34 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties +------+--------------+----------------+ | name|favorite_color|favorite_numbers| +------+--------------+----------------+ |Alyssa| null| [3, 9, 15, 20]| | Ben| red| []| +------+--------------+----------------+ scala> Seq(1).toDF.write.parquet("s3a://dongjoon/out.parquet") 20/11/30 23:02:14 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)/ 1] java.lang.NoSuchMethodError: org.apache.hadoop.util.SemaphoredDelegatingExecutor.(Lcom/google/common/util/concurrent/ListeningExecutorService;IZ)V ``` **2. Spark distribution without `-Phadoop-cloud`** ```scala $ bin/spark-shell --conf spark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID --conf spark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY -c spark.eventLog.enabled=true -c spark.eventLog.dir=s3a://dongjoon/spark-events/ --packages org.apache.hadoop:hadoop-aws:3.2.0,org.apache.hadoop:hadoop-common:3.2.0 ... java.lang.NoSuchMethodError: org.apache.hadoop.util.SemaphoredDelegatingExecutor.(Lcom/google/common/util/concurrent/ListeningExecutorService;IZ)V at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:772) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. Closes #30508 from dongjoon-hyun/SPARK-33212-REVERT. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- common/network-yarn/pom.xml | 8 +-- core/pom.xml | 16 +---- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 3 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 52 +++++++++++++- external/kafka-0-10-assembly/pom.xml | 8 +-- external/kafka-0-10-sql/pom.xml | 4 -- external/kafka-0-10-token-provider/pom.xml | 5 -- external/kinesis-asl-assembly/pom.xml | 8 +-- hadoop-cloud/pom.xml | 7 +- launcher/pom.xml | 9 +-- pom.xml | 57 +++------------- resource-managers/kubernetes/core/pom.xml | 9 +++ resource-managers/yarn/pom.xml | 67 +++++++------------ .../spark/deploy/yarn/ApplicationMaster.scala | 6 +- .../deploy/yarn/BaseYarnClusterSuite.scala | 10 --- sql/catalyst/pom.xml | 4 -- sql/hive/pom.xml | 5 -- .../hive/client/IsolatedClientLoader.scala | 19 +----- 18 files changed, 107 insertions(+), 190 deletions(-) diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 9938e5d769e12..0225db81925c5 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -65,13 +65,7 @@ org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} - - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} + hadoop-client org.slf4j diff --git a/core/pom.xml b/core/pom.xml index 9d2bf7dbe57a9..ce6f6ed9c7051 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -66,13 +66,7 @@ org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} - - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} + hadoop-client org.apache.spark @@ -183,14 +177,6 @@ org.apache.commons commons-text - - commons-io - commons-io - - - commons-collections - commons-collections - com.google.code.findbugs jsr305 diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 8802220726f78..a19558bc2a5e3 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -127,7 +127,7 @@ javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.11//jaxb-api-2.2.11.jar +jaxb-api/2.2.2//jaxb-api-2.2.2.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -226,6 +226,7 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar +stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index d45eeea0ee92b..24283224dd37d 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -3,12 +3,14 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar +accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar +aopalliance/1.0//aopalliance-1.0.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar arrow-format/2.0.0//arrow-format-2.0.0.jar arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar @@ -25,12 +27,15 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar +commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.20//commons-compress-1.20.jar +commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar +commons-daemon/1.0.13//commons-daemon-1.0.13.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar @@ -50,13 +55,30 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.12.1.1//derby-10.12.1.1.jar +dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar +ehcache/3.3.1//ehcache-3.3.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar +geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -hadoop-client-api/3.2.0//hadoop-client-api-3.2.0.jar -hadoop-client-runtime/3.2.0//hadoop-client-runtime-3.2.0.jar +guice-servlet/4.0//guice-servlet-4.0.jar +guice/4.0//guice-4.0.jar +hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar +hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar +hadoop-client/3.2.0//hadoop-client-3.2.0.jar +hadoop-common/3.2.0//hadoop-common-3.2.0.jar +hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar +hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar +hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar +hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar +hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar +hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar +hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar +hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar +hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar +hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar hive-beeline/2.3.7//hive-beeline-2.3.7.jar hive-cli/2.3.7//hive-cli-2.3.7.jar hive-common/2.3.7//hive-common-2.3.7.jar @@ -86,6 +108,8 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar +jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar +jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar @@ -98,11 +122,13 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar +javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar +jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.30//jersey-client-2.30.jar @@ -116,14 +142,30 @@ jline/2.14.6//jline-2.14.6.jar joda-time/2.10.5//joda-time-2.10.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar +json-smart/2.3//json-smart-2.3.jar json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar +jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar +kerb-admin/1.0.1//kerb-admin-1.0.1.jar +kerb-client/1.0.1//kerb-client-1.0.1.jar +kerb-common/1.0.1//kerb-common-1.0.1.jar +kerb-core/1.0.1//kerb-core-1.0.1.jar +kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar +kerb-identity/1.0.1//kerb-identity-1.0.1.jar +kerb-server/1.0.1//kerb-server-1.0.1.jar +kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar +kerb-util/1.0.1//kerb-util-1.0.1.jar +kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar +kerby-config/1.0.1//kerby-config-1.0.1.jar +kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar +kerby-util/1.0.1//kerby-util-1.0.1.jar +kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar @@ -161,7 +203,9 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar +nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar +okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar @@ -180,6 +224,7 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar +re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar scala-library/2.12.10//scala-library-2.12.10.jar @@ -197,12 +242,15 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar stax-api/1.0.1//stax-api-1.0.1.jar +stax2-api/3.1.4//stax2-api-3.1.4.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar +token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar velocity/1.5//velocity-1.5.jar +woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index b1e306c499385..d9d9fb7f55c77 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -71,15 +71,9 @@ org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} + hadoop-client provided - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} - org.apache.avro avro-mapred diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 06a6bef005e69..95a99ac88412e 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -79,10 +79,6 @@ kafka-clients ${kafka.version} - - com.google.code.findbugs - jsr305 - org.apache.commons commons-pool2 diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 1b0d6d322917f..941946f30e96f 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -58,11 +58,6 @@ mockito-core test - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.deps.scope} - org.apache.spark spark-tags_${scala.binary.version} diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 5a49358a84241..76ee5bb7b2f85 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -91,15 +91,9 @@ org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} + hadoop-client provided - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} - org.apache.avro avro-ipc diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index a5642a5a68fe4..8689e0b8a9ea8 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -58,15 +58,10 @@ org.apache.hadoop - ${hadoop-client-api.artifact} + hadoop-client ${hadoop.version} provided - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} - org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} - test - - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} + hadoop-client test diff --git a/pom.xml b/pom.xml index f0ad9b0167c32..4d6e3bbc95378 100644 --- a/pom.xml +++ b/pom.xml @@ -244,15 +244,6 @@ compile test - - hadoop-client-api - hadoop-client-runtime - hadoop-client-minicluster - - - org.apache.hadoop - hadoop-client-api - ${hadoop.version} - ${hadoop.deps.scope} - - - org.apache.hadoop - hadoop-client-runtime - ${hadoop.version} - ${hadoop.deps.scope} - - - org.apache.hadoop - hadoop-client-minicluster - ${yarn.version} - test - - org.apache.hadoop hadoop-client @@ -1688,14 +1654,6 @@ org.apache.ant ant
      - - org.apache.hadoop - hadoop-common - - - org.apache.hadoop - hadoop-auth - org.apache.zookeeper zookeeper @@ -2460,6 +2418,17 @@ + + enforce-no-duplicate-dependencies + + enforce + + + + + + + @@ -2919,7 +2888,6 @@ maven-shade-plugin false - false org.spark-project.spark:unused @@ -3181,9 +3149,6 @@ 2.7.4 2.7.1 2.4 - hadoop-client - hadoop-client - hadoop-client diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index edeb95fdba684..18e1c65e2e932 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -63,6 +63,10 @@ com.fasterxml.jackson.core * + + com.fasterxml.jackson.module + jackson-module-jaxb-annotations + com.fasterxml.jackson.dataformat jackson-dataformat-yaml @@ -81,6 +85,11 @@ jackson-dataformat-yaml ${fasterxml.jackson.version} + + com.fasterxml.jackson.module + jackson-module-jaxb-annotations + ${fasterxml.jackson.version} + diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index f6d6ddccc99c3..e9122ce202723 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -40,42 +40,6 @@ true - - hadoop-2.7 - - - org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common - - - org.apache.hadoop - hadoop-yarn-server-web-proxy - - - org.apache.hadoop - hadoop-yarn-client - - - org.apache.hadoop - hadoop-yarn-server-tests - tests - test - - - - org.apache.hadoop - hadoop-yarn-server-resourcemanager - test - - - @@ -105,20 +69,23 @@ org.apache.hadoop - ${hadoop-client-api.artifact} - ${hadoop.version} + hadoop-yarn-api org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.version} - ${hadoop.deps.scope} + hadoop-yarn-common org.apache.hadoop - ${hadoop-client-minicluster.artifact} - ${hadoop.version} - test + hadoop-yarn-server-web-proxy + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-client @@ -175,6 +142,18 @@ test + + org.apache.hadoop + hadoop-yarn-server-tests + tests + test + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + org.mockito mockito-core diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index e23773229c560..be9a88ca9b1d6 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{URI, URL, URLEncoder} +import java.net.{URI, URL} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException +import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark._ @@ -307,8 +308,7 @@ private[spark] class ApplicationMaster( // The client-mode AM doesn't listen for incoming connections, so report an invalid port. registerAM(Utils.localHostName, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId) - val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8") - addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId") + addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId)) createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf) reporterThread.join() } catch { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index a813b9913f23b..20f5339c46fef 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -80,16 +80,6 @@ abstract class BaseYarnClusterSuite yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "100.0") - // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround - yarnConf.set("yarn.scheduler.capacity.root.queues", "default") - yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100) - yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1) - yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100) - yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING") - yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*") - yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*") - yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1) - yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1) yarnCluster.init(yarnConf) yarnCluster.start() diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index af976fa1fa983..6b79eb722fcdd 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -104,10 +104,6 @@ org.antlr antlr4-runtime - - javax.xml.bind - jaxb-api - commons-codec commons-codec diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 4fca6264c0594..0453094cf8b7b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -162,11 +162,6 @@ org.datanucleus datanucleus-core - - org.apache.hadoop - ${hadoop-client-runtime.artifact} - ${hadoop.deps.scope} - org.apache.thrift libthrift diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 9663e03ee6a74..c0758dcdfc879 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -112,24 +112,11 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { - val hadoopJarNames = if (hadoopVersion.startsWith("3")) { - Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", - s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") - } else { - Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion") - } val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames - - val extraExclusions = if (hadoopVersion.startsWith("3")) { - // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so - // exclude here in favor of the ones in Hadoop 3.2+ - Seq("org.apache.hadoop:hadoop-auth") - } else { - Seq.empty - } + Seq("com.google.guava:guava:14.0.1", + s"org.apache.hadoop:hadoop-client:$hadoopVersion") val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( @@ -137,7 +124,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.buildIvySettings( Some(remoteRepos), ivyPath), - exclusions = version.exclusions ++ extraExclusions) + exclusions = version.exclusions) } val allFiles = classpath.split(",").map(new File(_)).toSet From 084d38b64ecbcaa9fac47ffca5604cf2a72936fc Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 2 Dec 2020 18:41:49 +0900 Subject: [PATCH 0635/1009] [SPARK-33557][CORE][MESOS][TEST] Ensure the relationship between STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT and NETWORK_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? As described in SPARK-33557, `HeartbeatReceiver` and `MesosCoarseGrainedSchedulerBackend` will always use `Network.NETWORK_TIMEOUT.defaultValueString` as value of `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` when we configure `NETWORK_TIMEOUT` without configure `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT`, this is different from the relationship described in `configuration.md`. To fix this problem,the main change of this pr as follow: - Remove the explicitly default value of `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` - Use actual value of `NETWORK_TIMEOUT` as `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` when `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` not configured in `HeartbeatReceiver` and `MesosCoarseGrainedSchedulerBackend` ### Why are the changes needed? To ensure the relationship between `NETWORK_TIMEOUT` and `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` as we described in `configuration.md` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass the Jenkins or GitHub Action - Manual test configure `NETWORK_TIMEOUT` and `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` locally Closes #30547 from LuciferYang/SPARK-33557. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala | 4 +++- .../scala/org/apache/spark/internal/config/package.scala | 2 +- .../org/apache/spark/repl/ExecutorClassLoaderSuite.scala | 1 + .../cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala | 5 ++++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala index 233ad884a721a..13ff075660cd7 100644 --- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala +++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala @@ -80,7 +80,9 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock) // executor ID -> timestamp of when the last heartbeat from this executor was received private val executorLastSeen = new HashMap[String, Long] - private val executorTimeoutMs = sc.conf.get(config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT) + private val executorTimeoutMs = sc.conf.get( + config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT + ).getOrElse(Utils.timeStringAsMs(s"${sc.conf.get(Network.NETWORK_TIMEOUT)}s")) private val checkTimeoutIntervalMs = sc.conf.get(Network.NETWORK_TIMEOUT_INTERVAL) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 6639f20a068d4..f6de5e4128ca5 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -504,7 +504,7 @@ package object config { .version("0.7.0") .withAlternative("spark.storage.blockManagerSlaveTimeoutMs") .timeConf(TimeUnit.MILLISECONDS) - .createWithDefaultString(Network.NETWORK_TIMEOUT.defaultValueString) + .createOptional private[spark] val STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT = ConfigBuilder("spark.storage.cleanupFilesAfterExecutorExit") diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index 386de19e919e6..23ea3fee2505b 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -231,6 +231,7 @@ class ExecutorClassLoaderSuite .setMaster("local") .setAppName("executor-class-loader-test") .set("spark.network.timeout", "11s") + .set("spark.network.timeoutInterval", "11s") .set("spark.repl.class.outputDir", tempDir1.getAbsolutePath) val sc = new SparkContext(conf) try { diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index efcef09132f5b..6fedce61d8208 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala @@ -34,6 +34,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkContext, SparkExceptio import org.apache.spark.deploy.mesos.config._ import org.apache.spark.deploy.security.HadoopDelegationTokenManager import org.apache.spark.internal.config +import org.apache.spark.internal.config.Network import org.apache.spark.internal.config.Tests.IS_TESTING import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle} import org.apache.spark.network.netty.SparkTransportConf @@ -651,7 +652,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( .registerDriverWithShuffleService( agent.hostname, externalShufflePort, - sc.conf.get(config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT), + sc.conf.get( + config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT + ).getOrElse(Utils.timeStringAsMs(s"${sc.conf.get(Network.NETWORK_TIMEOUT)}s")), sc.conf.get(config.EXECUTOR_HEARTBEAT_INTERVAL)) agent.shuffleRegistered = true } From 28dad1ba770e5b7f7cf542da1ae3f05975a969c6 Mon Sep 17 00:00:00 2001 From: neko Date: Wed, 2 Dec 2020 09:24:19 -0600 Subject: [PATCH 0636/1009] [SPARK-33504][CORE] The application log in the Spark history server contains sensitive attributes should be redacted ### What changes were proposed in this pull request? To make sure the sensitive attributes to be redacted in the history server log. ### Why are the changes needed? We found the secure attributes like password in SparkListenerJobStart and SparkListenerStageSubmitted events would not been redated, resulting in sensitive attributes can be viewd directly. The screenshot can be viewed in the attachment of JIRA spark-33504 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? muntual test works well, I have also added unit testcase. Closes #30446 from akiyamaneko/eventlog_unredact. Authored-by: neko Signed-off-by: Thomas Graves --- .../scheduler/EventLoggingListener.scala | 24 ++++++- .../scheduler/EventLoggingListenerSuite.scala | 64 ++++++++++++++++++- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala index 1fda03f732636..d4e22d739098f 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala @@ -18,7 +18,9 @@ package org.apache.spark.scheduler import java.net.URI +import java.util.Properties +import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.hadoop.conf.Configuration @@ -103,7 +105,7 @@ private[spark] class EventLoggingListener( // Events that do not trigger a flush override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = { - logEvent(event) + logEvent(event.copy(properties = redactProperties(event.properties))) if (shouldLogStageExecutorMetrics) { // record the peak metrics for the new stage liveStageExecutorMetrics.put((event.stageInfo.stageId, event.stageInfo.attemptNumber()), @@ -156,7 +158,9 @@ private[spark] class EventLoggingListener( logEvent(event, flushLogger = true) } - override def onJobStart(event: SparkListenerJobStart): Unit = logEvent(event, flushLogger = true) + override def onJobStart(event: SparkListenerJobStart): Unit = { + logEvent(event.copy(properties = redactProperties(event.properties)), flushLogger = true) + } override def onJobEnd(event: SparkListenerJobEnd): Unit = logEvent(event, flushLogger = true) @@ -276,6 +280,22 @@ private[spark] class EventLoggingListener( logWriter.stop() } + private def redactProperties(properties: Properties): Properties = { + if (properties == null) { + return properties + } + val redactedProperties = new Properties + // properties may contain some custom local properties such as stage/job description + // only properties in sparkConf need to be redacted. + val (globalProperties, localProperties) = properties.asScala.toSeq.partition { + case (key, _) => sparkConf.contains(key) + } + (Utils.redact(sparkConf, globalProperties) ++ localProperties).foreach { + case (key, value) => redactedProperties.setProperty(key, value) + } + redactedProperties + } + private[spark] def redactEvent( event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = { // environmentDetails maps a string descriptor to a set of properties diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index c4a8bcbb26a1d..7acb8451e3b38 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.scheduler import java.io.{File, InputStream} -import java.util.Arrays +import java.util.{Arrays, Properties} import scala.collection.immutable.Map import scala.collection.mutable @@ -98,6 +98,68 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit assert(redactedProps(key) == "*********(redacted)") } + test("Spark-33504 sensitive attributes redaction in properties") { + val (secretKey, secretPassword) = ("spark.executorEnv.HADOOP_CREDSTORE_PASSWORD", + "secret_password") + val (customKey, customValue) = ("parse_token", "secret_password") + + val conf = getLoggingConf(testDirPath, None).set(secretKey, secretPassword) + + val properties = new Properties() + properties.setProperty(secretKey, secretPassword) + properties.setProperty(customKey, customValue) + + val logName = "properties-reaction-test" + val eventLogger = new EventLoggingListener(logName, None, testDirPath.toUri(), conf) + val listenerBus = new LiveListenerBus(conf) + + val stageId = 1 + val jobId = 1 + val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, + Seq.empty, Seq.empty, "details", + resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) + + val events = Array(SparkListenerStageSubmitted(stageInfo, properties), + SparkListenerJobStart(jobId, 0, Seq(stageInfo), properties)) + + eventLogger.start() + listenerBus.start(Mockito.mock(classOf[SparkContext]), Mockito.mock(classOf[MetricsSystem])) + listenerBus.addToEventLogQueue(eventLogger) + events.foreach(event => listenerBus.post(event)) + listenerBus.stop() + eventLogger.stop() + + val logData = EventLogFileReader.openEventLog(new Path(eventLogger.logWriter.logPath), + fileSystem) + try { + val lines = readLines(logData) + val logStart = SparkListenerLogStart(SPARK_VERSION) + assert(lines.size === 3) + assert(lines(0).contains("SparkListenerLogStart")) + assert(lines(1).contains("SparkListenerStageSubmitted")) + assert(lines(2).contains("SparkListenerJobStart")) + + lines.foreach{ + line => JsonProtocol.sparkEventFromJson(parse(line)) match { + case logStartEvent: SparkListenerLogStart => + assert(logStartEvent == logStart) + + case stageSubmittedEvent: SparkListenerStageSubmitted => + assert(stageSubmittedEvent.properties.getProperty(secretKey) == "*********(redacted)") + assert(stageSubmittedEvent.properties.getProperty(customKey) == customValue) + + case jobStartEvent : SparkListenerJobStart => + assert(jobStartEvent.properties.getProperty(secretKey) == "*********(redacted)") + assert(jobStartEvent.properties.getProperty(customKey) == customValue) + + case _ => assert(false) + } + } + } finally { + logData.close() + } + } + test("Executor metrics update") { testStageExecutorMetricsEventLogging() } From df8d3f1bf779ce1a9f3520939ab85814f09b48b7 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 2 Dec 2020 16:03:08 +0000 Subject: [PATCH 0637/1009] [SPARK-33544][SQL][FOLLOW-UP] Rename NoSideEffect to NoThrow and clarify the documentation more ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/30504. It proposes: - Rename `NoSideEffect` to `NoThrow`, and use `Expression.deterministic` together where it is used. - Clarify, in the docs in the expressions, that it means they don't throw exceptions ### Why are the changes needed? `NoSideEffect` virtually means that `Expression.eval` does not throw an exception, and the expressions are deterministic. It's best to be explicit so `NoThrow` was proposed - I looked if there's a similar name to represent this concept and borrowed the name of [nothrow](https://clang.llvm.org/docs/AttributeReference.html#nothrow). For determinism, we already have a way to note it under `Expression.deterministic`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually ran the existing unittests written. Closes #30570 from HyukjinKwon/SPARK-33544. Authored-by: HyukjinKwon Signed-off-by: Wenchen Fan --- .../expressions/complexTypeCreator.scala | 18 ++++++++++++------ .../sql/catalyst/optimizer/expressions.scala | 2 +- .../optimizer/ConstantFoldingSuite.scala | 2 +- .../InferFiltersFromGenerateSuite.scala | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index f0f92e2d935f1..cb59fbda2b3b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -31,10 +31,16 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String /** - * Trait to indicate the expression doesn't have any side effects. This can be used - * to indicate its ok to optimize it out under certain circumstances. + * Trait to indicate the expression does not throw an exception by itself when they are evaluated. + * For example, UDFs, [[AssertTrue]], etc can throw an exception when they are executed. + * In such case, it is necessary to call [[Expression.eval]], and the optimization rule should + * not ignore it. + * + * This trait can be used in an optimization rule such as + * [[org.apache.spark.sql.catalyst.optimizer.ConstantFolding]] to fold the expressions that + * do not need to execute, for example, `size(array(c0, c1, c2))`. */ -trait NoSideEffect +trait NoThrow /** * Returns an Array containing the evaluation of all children expressions. @@ -48,7 +54,7 @@ trait NoSideEffect """, since = "1.1.0") case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) - extends Expression with NoSideEffect { + extends Expression with NoThrow { def this(children: Seq[Expression]) = { this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) @@ -166,7 +172,7 @@ private [sql] object GenArrayData { """, since = "2.0.0") case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) - extends Expression with NoSideEffect{ + extends Expression with NoThrow { def this(children: Seq[Expression]) = { this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) @@ -385,7 +391,7 @@ object CreateStruct { """, since = "1.5.0") // scalastyle:on line.size.limit -case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoSideEffect { +case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoThrow { lazy val (nameExprs, valExprs) = children.grouped(2).map { case Seq(name, value) => (name, value) }.toList.unzip diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 4725f49340451..1b1e2ad71e7c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -45,7 +45,7 @@ object ConstantFolding extends Rule[LogicalPlan] { private def hasNoSideEffect(e: Expression): Boolean = e match { case _: Attribute => true case _: Literal => true - case _: NoSideEffect => e.children.forall(hasNoSideEffect) + case _: NoThrow if e.deterministic => e.children.forall(hasNoSideEffect) case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index fd9b58a7a06aa..ae644c1110740 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -264,7 +264,7 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("SPARK-33544: Constant folding test with sideaffects") { + test("SPARK-33544: Constant folding test with side effects") { val originalQuery = testRelation .select('a) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala index c6fa1bd6e415c..93a1d414ed403 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala @@ -90,13 +90,13 @@ class InferFiltersFromGenerateSuite extends PlanTest { Seq(Explode(_), PosExplode(_)).foreach { f => val createArrayExplode = f(CreateArray(Seq('c1))) - test("Don't infer filters from CreateArray " + createArrayExplode) { + test("SPARK-33544: Don't infer filters from CreateArray " + createArrayExplode) { val originalQuery = testRelation.generate(createArrayExplode).analyze val optimized = OptimizeInferAndConstantFold.execute(originalQuery) comparePlans(optimized, originalQuery) } val createMapExplode = f(CreateMap(Seq('c1, 'c2))) - test("Don't infer filters from CreateMap " + createMapExplode) { + test("SPARK-33544: Don't infer filters from CreateMap " + createMapExplode) { val originalQuery = testRelation.generate(createMapExplode).analyze val optimized = OptimizeInferAndConstantFold.execute(originalQuery) comparePlans(optimized, originalQuery) @@ -105,7 +105,7 @@ class InferFiltersFromGenerateSuite extends PlanTest { Seq(Inline(_)).foreach { f => val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq('c1))))) - test("Don't infer filters from CreateArray " + createArrayStructExplode) { + test("SPARK-33544: Don't infer filters from CreateArray " + createArrayStructExplode) { val originalQuery = testRelation.generate(createArrayStructExplode).analyze val optimized = OptimizeInferAndConstantFold.execute(originalQuery) comparePlans(optimized, originalQuery) From 58583f7c3fdcac1232607a7ab4b0d052320ac3ea Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Wed, 2 Dec 2020 16:10:45 +0000 Subject: [PATCH 0638/1009] [SPARK-33619][SQL] Fix GetMapValueUtil code generation error ### What changes were proposed in this pull request? Code Gen bug fix that introduced by SPARK-33460 ``` GetMapValueUtil s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");""" SHOULD BE s"""throw new java.util.NoSuchElementException("Key " + $eval2 + " does not exist.");""" ``` And the reason why SPARK-33460 failed to detect this bug via UT, it was because that `checkExceptionInExpression ` did not work as expect like `checkEvaluation` which will try eval expression with BOTH `CODEGEN_ONLY` and `NO_CODEGEN` mode, and in this PR, will also fix this Test bug, too. ### Why are the changes needed? Bug Fix. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Add UT and Existing UT. Closes #30560 from leanken/leanken-SPARK-33619. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- .../expressions/complexTypeExtractors.scala | 2 +- .../expressions/datetimeExpressions.scala | 7 ++- .../expressions/intervalExpressions.scala | 14 +++--- .../expressions/ExpressionEvalHelper.scala | 49 ++++++------------- .../ExpressionEvalHelperSuite.scala | 25 +++++++++- .../IntervalExpressionsSuite.scala | 36 +++++++------- .../expressions/MathExpressionsSuite.scala | 5 +- 7 files changed, 70 insertions(+), 68 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 767650d022200..ef247efbe1a04 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -394,7 +394,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { val keyJavaType = CodeGenerator.javaType(keyType) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val keyNotFoundBranch = if (failOnError) { - s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");""" + s"""throw new java.util.NoSuchElementException("Key " + $eval2 + " does not exist.");""" } else { s"${ev.isNull} = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index bbf1e4657f351..424887a13cb97 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1789,8 +1789,11 @@ private case class GetTimestamp( """, group = "datetime_funcs", since = "3.0.0") -case class MakeDate(year: Expression, month: Expression, day: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) +case class MakeDate( + year: Expression, + month: Expression, + day: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { def this(year: Expression, month: Expression, day: Expression) = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index 6219457bba994..27067e17e7f45 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -166,13 +166,13 @@ case class MakeInterval( extends SeptenaryExpression with ImplicitCastInputTypes with NullIntolerant { def this( - years: Expression, - months: Expression, - weeks: Expression, - days: Expression, - hours: Expression, - mins: Expression, - sec: Expression) = { + years: Expression, + months: Expression, + weeks: Expression, + days: Expression, + hours: Expression, + mins: Expression, + sec: Expression) = { this(years, months, weeks, days, hours, mins, sec, SQLConf.get.ansiEnabled) } def this( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 842c8f3243f2a..70eb391ad6e05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project} import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, MapData} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils /** * A few helper functions for expression evaluation testing. Mixin this trait to use them. @@ -160,9 +159,14 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB expectedErrMsg: String): Unit = { def checkException(eval: => Unit, testMode: String): Unit = { + val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN) withClue(s"($testMode)") { val errMsg = intercept[T] { - eval + for (fallbackMode <- modes) { + withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) { + eval + } + } }.getMessage if (errMsg == null) { if (expectedErrMsg != null) { @@ -192,22 +196,6 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB expression.eval(inputRow) } - protected def generateProject( - generator: => Projection, - expression: Expression): Projection = { - try { - generator - } catch { - case e: Throwable => - fail( - s""" - |Code generation of $expression failed: - |$e - |${Utils.exceptionString(e)} - """.stripMargin) - } - } - protected def checkEvaluationWithoutCodegen( expression: Expression, expected: Any, @@ -244,9 +232,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB protected def evaluateWithMutableProjection( expression: => Expression, inputRow: InternalRow = EmptyRow): Any = { - val plan = generateProject( - MutableProjection.create(Alias(expression, s"Optimized($expression)")() :: Nil), - expression) + val plan = MutableProjection.create(Alias(expression, s"Optimized($expression)")() :: Nil) plan.initialize(0) plan(inputRow).get(0, expression.dataType) @@ -292,11 +278,9 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB // SPARK-16489 Explicitly doing code generation twice so code gen will fail if // some expression is reusing variable names across different instances. // This behavior is tested in ExpressionEvalHelperSuite. - val plan = generateProject( - UnsafeProjection.create( - Alias(expression, s"Optimized($expression)1")() :: - Alias(expression, s"Optimized($expression)2")() :: Nil), - expression) + val plan = UnsafeProjection.create( + Alias(expression, s"Optimized($expression)1")() :: + Alias(expression, s"Optimized($expression)2")() :: Nil) plan.initialize(0) plan(inputRow) @@ -319,16 +303,13 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB checkEvaluationWithMutableProjection(expression, expected) checkEvaluationWithOptimization(expression, expected) - var plan = generateProject( - GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), - expression) + var plan: Projection = + GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil) plan.initialize(0) var actual = plan(inputRow).get(0, expression.dataType) assert(checkResult(actual, expected, expression)) - plan = generateProject( - GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), - expression) + plan = GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil) plan.initialize(0) val ref = new BoundReference(0, expression.dataType, nullable = true) actual = GenerateSafeProjection.generate(ref :: Nil)(plan(inputRow)).get(0, expression.dataType) @@ -456,9 +437,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB } } - val plan = generateProject( - GenerateMutableProjection.generate(Alias(expr, s"Optimized($expr)")() :: Nil), - expr) + val plan = GenerateMutableProjection.generate(Alias(expr, s"Optimized($expr)")() :: Nil) val (codegen, codegenExc) = try { (Some(plan(inputRow).get(0, expr.dataType)), None) } catch { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala index 54ef9641bee0d..3cc50da38906e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala @@ -32,8 +32,8 @@ import org.apache.spark.sql.types.{DataType, IntegerType, MapType} */ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper { - test("SPARK-16489 checkEvaluation should fail if expression reuses variable names") { - val e = intercept[RuntimeException] { checkEvaluation(BadCodegenExpression(), 10) } + test("SPARK-16489: checkEvaluation should fail if expression reuses variable names") { + val e = intercept[Exception] { checkEvaluation(BadCodegenExpression(), 10) } assert(e.getMessage.contains("some_variable")) } @@ -43,6 +43,12 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper } assert(e.getMessage.contains("and exprNullable was")) } + + test("SPARK-33619: make sure checkExceptionInExpression work as expected") { + checkExceptionInExpression[Exception]( + BadCodegenAndEvalExpression(), + "Cannot determine simple type name \"NoSuchElementException\"") + } } /** @@ -76,3 +82,18 @@ case class MapIncorrectDataTypeExpression() extends LeafExpression with CodegenF // since values includes null, valueContainsNull must be true override def dataType: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false) } + +case class BadCodegenAndEvalExpression() extends LeafExpression { + override def nullable: Boolean = false + override def eval(input: InternalRow): Any = + throw new Exception("Cannot determine simple type name \"NoSuchElementException\"") + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // it should be java.util.NoSuchElementException in generated code. + ev.copy(code = + code""" + |int ${ev.value} = 10; + |throw new NoSuchElementException("compile failed!"); + """.stripMargin) + } + override def dataType: DataType = IntegerType +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala index 5c73a91de4f79..950637c958426 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala @@ -217,15 +217,15 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("ANSI mode: make interval") { def check( - years: Int = 0, - months: Int = 0, - weeks: Int = 0, - days: Int = 0, - hours: Int = 0, - minutes: Int = 0, - seconds: Int = 0, - millis: Int = 0, - micros: Int = 0): Unit = { + years: Int = 0, + months: Int = 0, + weeks: Int = 0, + days: Int = 0, + hours: Int = 0, + minutes: Int = 0, + seconds: Int = 0, + millis: Int = 0, + micros: Int = 0): Unit = { val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros) val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks), Literal(days), Literal(hours), Literal(minutes), @@ -238,15 +238,15 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } def checkException( - years: Int = 0, - months: Int = 0, - weeks: Int = 0, - days: Int = 0, - hours: Int = 0, - minutes: Int = 0, - seconds: Int = 0, - millis: Int = 0, - micros: Int = 0): Unit = { + years: Int = 0, + months: Int = 0, + weeks: Int = 0, + days: Int = 0, + hours: Int = 0, + minutes: Int = 0, + seconds: Int = 0, + millis: Int = 0, + micros: Int = 0): Unit = { val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros) val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks), Literal(days), Literal(hours), Literal(minutes), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index b4096f21bea3a..6d09e28362e11 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -138,9 +138,8 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { expression: Expression, inputRow: InternalRow = EmptyRow): Unit = { - val plan = generateProject( - GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), - expression) + val plan = + GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil) val actual = plan(inputRow).get(0, expression.dataType) if (!actual.asInstanceOf[Double].isNaN) { From 91182d6cce0a56a50801d530aff0c8e3aba59e27 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 2 Dec 2020 08:43:30 -0800 Subject: [PATCH 0639/1009] [SPARK-33626][K8S][TEST] Allow k8s integration tests to assert both driver and executor logs for expected log(s) ### What changes were proposed in this pull request? Allow k8s integration tests to assert both driver and executor logs for expected log(s) ### Why are the changes needed? Some of the tests will be able to provide full coverage of the use case, by asserting both driver and executor logs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? TBD Closes #30568 from ScrapCodes/expectedDriverLogChanges. Authored-by: Prashant Sharma Signed-off-by: Dongjoon Hyun --- .../integrationtest/DecommissionSuite.scala | 6 ++-- .../k8s/integrationtest/DepsTestsSuite.scala | 2 +- .../k8s/integrationtest/KubernetesSuite.scala | 32 ++++++++++++++++--- .../integrationtest/PythonTestsSuite.scala | 6 ++-- .../k8s/integrationtest/RTestsSuite.scala | 2 +- .../SparkConfPropagateSuite.scala | 22 ++++++------- 6 files changed, 47 insertions(+), 23 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index 9d7db04bb72b0..92f6a32cd156a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -38,7 +38,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_DECOMISSIONING, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "Finished waiting, stopping Spark", "Decommission executors", "Final accumulator value is: 100"), @@ -69,7 +69,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_DECOMISSIONING_CLEANUP, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "Finished waiting, stopping Spark", "Decommission executors"), appArgs = Array.empty[String], @@ -104,7 +104,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_SCALE, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "Finished waiting, stopping Spark", "Decommission executors"), appArgs = Array.empty[String], diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala index 8f6e9cd8af740..760e9ba55d335 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala @@ -177,7 +177,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = pySparkFiles, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "Python runtime version check is: True", "Python environment version check is: True", "Python runtime version check for executor is: True"), diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index cc226b341916d..193a02aad0cea 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -171,6 +171,7 @@ class KubernetesSuite extends SparkFunSuite appResource, SPARK_PI_MAIN_CLASS, Seq("Pi is roughly 3"), + Seq(), appArgs, driverPodChecker, executorPodChecker, @@ -192,6 +193,7 @@ class KubernetesSuite extends SparkFunSuite SPARK_DFS_READ_WRITE_TEST, Seq(s"Success! Local Word Count $wordCount and " + s"DFS Word Count $wordCount agree."), + Seq(), appArgs, driverPodChecker, executorPodChecker, @@ -212,6 +214,7 @@ class KubernetesSuite extends SparkFunSuite appResource, SPARK_REMOTE_MAIN_CLASS, Seq(s"Mounting of ${appArgs.head} was true"), + Seq(), appArgs, driverPodChecker, executorPodChecker, @@ -261,7 +264,8 @@ class KubernetesSuite extends SparkFunSuite protected def runSparkApplicationAndVerifyCompletion( appResource: String, mainClass: String, - expectedLogOnCompletion: Seq[String], + expectedDriverLogOnCompletion: Seq[String], + expectedExecutorLogOnCompletion: Seq[String] = Seq(), appArgs: Array[String], driverPodChecker: Pod => Unit, executorPodChecker: Pod => Unit, @@ -374,7 +378,6 @@ class KubernetesSuite extends SparkFunSuite .list() .getItems .get(0) - driverPodChecker(driverPod) // If we're testing decommissioning we an executors, but we should have an executor @@ -383,14 +386,35 @@ class KubernetesSuite extends SparkFunSuite execPods.values.nonEmpty should be (true) } execPods.values.foreach(executorPodChecker(_)) + + val execPod: Option[Pod] = if (expectedExecutorLogOnCompletion.nonEmpty) { + Some(kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", appLocator) + .withLabel("spark-role", "executor") + .list() + .getItems + .get(0)) + } else { + None + } + Eventually.eventually(patienceTimeout, patienceInterval) { - expectedLogOnCompletion.foreach { e => + expectedDriverLogOnCompletion.foreach { e => assert(kubernetesTestComponents.kubernetesClient .pods() .withName(driverPod.getMetadata.getName) .getLog .contains(e), - s"The application did not complete, did not find str ${e}") + s"The application did not complete, driver log did not contain str ${e}") + } + expectedExecutorLogOnCompletion.foreach { e => + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(execPod.get.getMetadata.getName) + .getLog + .contains(e), + s"The application did not complete, executor log did not contain str ${e}") } } execWatcher.close() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala index bad6f1c1021ba..457a766cae124 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala @@ -27,7 +27,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_PI, mainClass = "", - expectedLogOnCompletion = Seq("Pi is roughly 3"), + expectedDriverLogOnCompletion = Seq("Pi is roughly 3"), appArgs = Array("5"), driverPodChecker = doBasicDriverPyPodCheck, executorPodChecker = doBasicExecutorPyPodCheck, @@ -41,7 +41,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_FILES, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "Python runtime version check is: True", "Python environment version check is: True", "Python runtime version check for executor is: True"), @@ -61,7 +61,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = PYSPARK_MEMORY_CHECK, mainClass = "", - expectedLogOnCompletion = Seq( + expectedDriverLogOnCompletion = Seq( "PySpark Worker Memory Check is: True"), appArgs = Array(s"$additionalMemoryInBytes"), driverPodChecker = doDriverMemoryCheck, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala index b7c8886a15ae7..a22066c18064c 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala @@ -26,7 +26,7 @@ private[spark] trait RTestsSuite { k8sSuite: KubernetesSuite => runSparkApplicationAndVerifyCompletion( appResource = SPARK_R_DATAFRAME_TEST, mainClass = "", - expectedLogOnCompletion = Seq("name: string (nullable = true)", "1 Justin"), + expectedDriverLogOnCompletion = Seq("name: string (nullable = true)", "1 Justin"), appArgs = Array.empty[String], driverPodChecker = doBasicDriverRPodCheck, executorPodChecker = doBasicExecutorRPodCheck, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala index 6d15201d19796..5d3b426598fdd 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala @@ -16,14 +16,11 @@ */ package org.apache.spark.deploy.k8s.integrationtest -import java.io.{BufferedWriter, File, FileWriter} +import java.io.File import java.net.URL +import java.nio.file.Files -import scala.io.{BufferedSource, Source} - -import io.fabric8.kubernetes.api.model._ - -import org.apache.spark.internal.config +import scala.io.Source private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite => import KubernetesSuite.{k8sTestTag, SPARK_PI_MAIN_CLASS} @@ -38,18 +35,21 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite => val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j.properties" try { - val writer = new BufferedWriter(new FileWriter(logConfFilePath)) - writer.write(content) - writer.close() + Files.write(new File(logConfFilePath).toPath, content.getBytes) sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug") + sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug") + + val log4jExpectedLog = + s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties" runSparkApplicationAndVerifyCompletion( appResource = containerLocalSparkDistroExamplesJar, mainClass = SPARK_PI_MAIN_CLASS, - expectedLogOnCompletion = (Seq("DEBUG", - s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties", + expectedDriverLogOnCompletion = (Seq("DEBUG", + log4jExpectedLog, "Pi is roughly 3")), + expectedExecutorLogOnCompletion = Seq(log4jExpectedLog), appArgs = Array.empty[String], driverPodChecker = doBasicDriverPodCheck, executorPodChecker = doBasicExecutorPodCheck, From a082f4600b1cb814442beed1b578bc3430a257a7 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 2 Dec 2020 17:51:22 +0000 Subject: [PATCH 0640/1009] [SPARK-33071][SPARK-33536][SQL] Avoid changing dataset_id of LogicalPlan in join() to not break DetectAmbiguousSelfJoin ### What changes were proposed in this pull request? Currently, `join()` uses `withPlan(logicalPlan)` for convenient to call some Dataset functions. But it leads to the `dataset_id` inconsistent between the `logicalPlan` and the original `Dataset`(because `withPlan(logicalPlan)` will create a new Dataset with the new id and reset the `dataset_id` with the new id of the `logicalPlan`). As a result, it breaks the rule `DetectAmbiguousSelfJoin`. In this PR, we propose to drop the usage of `withPlan` but use the `logicalPlan` directly so its `dataset_id` doesn't change. Besides, this PR also removes related metadata (`DATASET_ID_KEY`, `COL_POS_KEY`) when an `Alias` tries to construct its own metadata. Because the `Alias` is no longer a reference column after converting to an `Attribute`. To achieve that, we add a new field, `deniedMetadataKeys`, to indicate the metadata that needs to be removed. ### Why are the changes needed? For the query below, it returns the wrong result while it should throws ambiguous self join exception instead: ```scala val emp1 = Seq[TestData]( TestData(1, "sales"), TestData(2, "personnel"), TestData(3, "develop"), TestData(4, "IT")).toDS() val emp2 = Seq[TestData]( TestData(1, "sales"), TestData(2, "personnel"), TestData(3, "develop")).toDS() val emp3 = emp1.join(emp2, emp1("key") === emp2("key")).select(emp1("*")) emp1.join(emp3, emp1.col("key") === emp3.col("key"), "left_outer") .select(emp1.col("*"), emp3.col("key").as("e2")).show() // wrong result +---+---------+---+ |key| value| e2| +---+---------+---+ | 1| sales| 1| | 2|personnel| 2| | 3| develop| 3| | 4| IT| 4| +---+---------+---+ ``` This PR fixes the wrong behaviour. ### Does this PR introduce _any_ user-facing change? Yes, users hit the exception instead of the wrong result after this PR. ### How was this patch tested? Added a new unit test. Closes #30488 from Ngone51/fix-self-join. Authored-by: yi.wu Signed-off-by: Wenchen Fan --- .../catalyst/expressions/AliasHelper.scala | 3 +- .../expressions/namedExpressions.scala | 15 +++++-- .../scala/org/apache/spark/sql/Column.scala | 5 ++- .../scala/org/apache/spark/sql/Dataset.scala | 39 +++++++++++-------- .../spark/sql/DataFrameSelfJoinSuite.scala | 29 ++++++++++++++ .../sql/SparkSessionExtensionSuite.scala | 7 ++-- 6 files changed, 73 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala index ec47875754a6f..c61eb68db5bfa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala @@ -89,7 +89,8 @@ trait AliasHelper { a.copy(child = trimAliases(a.child))( exprId = a.exprId, qualifier = a.qualifier, - explicitMetadata = Some(a.metadata)) + explicitMetadata = Some(a.metadata), + deniedMetadataKeys = a.deniedMetadataKeys) case a: MultiAlias => a.copy(child = trimAliases(a.child)) case other => trimAliases(other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 2abd9d7bb4423..22aabd3c6b30b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -143,11 +143,14 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn * fully qualified way. Consider the examples tableName.name, subQueryAlias.name. * tableName and subQueryAlias are possible qualifiers. * @param explicitMetadata Explicit metadata associated with this alias that overwrites child's. + * @param deniedMetadataKeys Keys of metadata entries that are supposed to be removed when + * inheriting the metadata from the child. */ case class Alias(child: Expression, name: String)( val exprId: ExprId = NamedExpression.newExprId, val qualifier: Seq[String] = Seq.empty, - val explicitMetadata: Option[Metadata] = None) + val explicitMetadata: Option[Metadata] = None, + val deniedMetadataKeys: Seq[String] = Seq.empty) extends UnaryExpression with NamedExpression { // Alias(Generator, xx) need to be transformed into Generate(generator, ...) @@ -167,7 +170,11 @@ case class Alias(child: Expression, name: String)( override def metadata: Metadata = { explicitMetadata.getOrElse { child match { - case named: NamedExpression => named.metadata + case named: NamedExpression => + val builder = new MetadataBuilder().withMetadata(named.metadata) + deniedMetadataKeys.foreach(builder.remove) + builder.build() + case _ => Metadata.empty } } @@ -194,7 +201,7 @@ case class Alias(child: Expression, name: String)( override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix" override protected final def otherCopyArgs: Seq[AnyRef] = { - exprId :: qualifier :: explicitMetadata :: Nil + exprId :: qualifier :: explicitMetadata :: deniedMetadataKeys :: Nil } override def hashCode(): Int = { @@ -205,7 +212,7 @@ case class Alias(child: Expression, name: String)( override def equals(other: Any): Boolean = other match { case a: Alias => name == a.name && exprId == a.exprId && child == a.child && qualifier == a.qualifier && - explicitMetadata == a.explicitMetadata + explicitMetadata == a.explicitMetadata && deniedMetadataKeys == a.deniedMetadataKeys case _ => false } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 95134d9111593..86ba81340272b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1164,7 +1164,10 @@ class Column(val expr: Expression) extends Logging { * @since 2.0.0 */ def name(alias: String): Column = withExpr { - Alias(normalizedExpr(), alias)() + // SPARK-33536: The Alias is no longer a column reference after converting to an attribute. + // These denied metadata keys are used to strip the column reference related metadata for + // the Alias. So it won't be caught as a column reference in DetectAmbiguousSelfJoin. + Alias(expr, alias)(deniedMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 2c38a65ac2106..0716043bcf660 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -231,7 +231,8 @@ class Dataset[T] private[sql]( case _ => queryExecution.analyzed } - if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) { + if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) && + plan.getTagValue(Dataset.DATASET_ID_TAG).isEmpty) { plan.setTagValue(Dataset.DATASET_ID_TAG, id) } plan @@ -259,15 +260,16 @@ class Dataset[T] private[sql]( private[sql] def resolve(colName: String): NamedExpression = { val resolver = sparkSession.sessionState.analyzer.resolver queryExecution.analyzed.resolveQuoted(colName, resolver) - .getOrElse { - val fields = schema.fieldNames - val extraMsg = if (fields.exists(resolver(_, colName))) { - s"; did you mean to quote the `$colName` column?" - } else "" - val fieldsStr = fields.mkString(", ") - val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}""" - throw new AnalysisException(errorMsg) - } + .getOrElse(throw resolveException(colName, schema.fieldNames)) + } + + private def resolveException(colName: String, fields: Array[String]): AnalysisException = { + val extraMsg = if (fields.exists(sparkSession.sessionState.analyzer.resolver(_, colName))) { + s"; did you mean to quote the `$colName` column?" + } else "" + val fieldsStr = fields.mkString(", ") + val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}""" + new AnalysisException(errorMsg) } private[sql] def numericColumns: Seq[Expression] = { @@ -1083,8 +1085,8 @@ class Dataset[T] private[sql]( } // If left/right have no output set intersection, return the plan. - val lanalyzed = withPlan(this.logicalPlan).queryExecution.analyzed - val ranalyzed = withPlan(right.logicalPlan).queryExecution.analyzed + val lanalyzed = this.queryExecution.analyzed + val ranalyzed = right.queryExecution.analyzed if (lanalyzed.outputSet.intersect(ranalyzed.outputSet).isEmpty) { return withPlan(plan) } @@ -1092,17 +1094,22 @@ class Dataset[T] private[sql]( // Otherwise, find the trivially true predicates and automatically resolves them to both sides. // By the time we get here, since we have already run analysis, all attributes should've been // resolved and become AttributeReference. + val resolver = sparkSession.sessionState.analyzer.resolver val cond = plan.condition.map { _.transform { case catalyst.expressions.EqualTo(a: AttributeReference, b: AttributeReference) if a.sameRef(b) => catalyst.expressions.EqualTo( - withPlan(plan.left).resolve(a.name), - withPlan(plan.right).resolve(b.name)) + plan.left.resolveQuoted(a.name, resolver) + .getOrElse(throw resolveException(a.name, plan.left.schema.fieldNames)), + plan.right.resolveQuoted(b.name, resolver) + .getOrElse(throw resolveException(b.name, plan.right.schema.fieldNames))) case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference) if a.sameRef(b) => catalyst.expressions.EqualNullSafe( - withPlan(plan.left).resolve(a.name), - withPlan(plan.right).resolve(b.name)) + plan.left.resolveQuoted(a.name, resolver) + .getOrElse(throw resolveException(a.name, plan.left.schema.fieldNames)), + plan.right.resolveQuoted(b.name, resolver) + .getOrElse(throw resolveException(b.name, plan.right.schema.fieldNames))) }} withPlan { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala index 3b3b54f75da57..50846d9d12b97 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.{count, sum} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.test.SQLTestData.TestData class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -219,4 +220,32 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple)) } } + + test("SPARK-33071/SPARK-33536: Avoid changing dataset_id of LogicalPlan in join() " + + "to not break DetectAmbiguousSelfJoin") { + val emp1 = Seq[TestData]( + TestData(1, "sales"), + TestData(2, "personnel"), + TestData(3, "develop"), + TestData(4, "IT")).toDS() + val emp2 = Seq[TestData]( + TestData(1, "sales"), + TestData(2, "personnel"), + TestData(3, "develop")).toDS() + val emp3 = emp1.join(emp2, emp1("key") === emp2("key")).select(emp1("*")) + assertAmbiguousSelfJoin(emp1.join(emp3, emp1.col("key") === emp3.col("key"), + "left_outer").select(emp1.col("*"), emp3.col("key").as("e2"))) + } + + test("df.show() should also not change dataset_id of LogicalPlan") { + val df = Seq[TestData]( + TestData(1, "sales"), + TestData(2, "personnel"), + TestData(3, "develop"), + TestData(4, "IT")).toDF() + val ds_id1 = df.logicalPlan.getTagValue(Dataset.DATASET_ID_TAG) + df.show(0) + val ds_id2 = df.logicalPlan.getTagValue(Dataset.DATASET_ID_TAG) + assert(ds_id1 === ds_id2) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 12abd31b99e93..f02d2041dd7f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -573,8 +573,9 @@ class ColumnarBoundReference(ordinal: Int, dataType: DataType, nullable: Boolean class ColumnarAlias(child: ColumnarExpression, name: String)( override val exprId: ExprId = NamedExpression.newExprId, override val qualifier: Seq[String] = Seq.empty, - override val explicitMetadata: Option[Metadata] = None) - extends Alias(child, name)(exprId, qualifier, explicitMetadata) + override val explicitMetadata: Option[Metadata] = None, + override val deniedMetadataKeys: Seq[String] = Seq.empty) + extends Alias(child, name)(exprId, qualifier, explicitMetadata, deniedMetadataKeys) with ColumnarExpression { override def columnarEval(batch: ColumnarBatch): Any = child.columnarEval(batch) @@ -711,7 +712,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] { def replaceWithColumnarExpression(exp: Expression): ColumnarExpression = exp match { case a: Alias => new ColumnarAlias(replaceWithColumnarExpression(a.child), - a.name)(a.exprId, a.qualifier, a.explicitMetadata) + a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.deniedMetadataKeys) case att: AttributeReference => new ColumnarAttributeReference(att.name, att.dataType, att.nullable, att.metadata)(att.exprId, att.qualifier) From b76c6b759c8dd549290aa174b62b8d34ea34aa3f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 2 Dec 2020 12:44:39 -0800 Subject: [PATCH 0641/1009] [SPARK-33627][SQL] Add new function UNIX_SECONDS, UNIX_MILLIS and UNIX_MICROS ### What changes were proposed in this pull request? As https://github.com/apache/spark/pull/28534 adds functions from [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions) for converting numbers to timestamp, this PR is to add functions UNIX_SECONDS, UNIX_MILLIS and UNIX_MICROS for converting timestamp to numbers. ### Why are the changes needed? 1. Symmetry of the conversion functions 2. Casting timestamp type to numeric types is disallowed in ANSI mode, we should provide functions for users to complete the conversion. ### Does this PR introduce _any_ user-facing change? 3 new functions UNIX_SECONDS, UNIX_MILLIS and UNIX_MICROS for converting timestamp to long type. ### How was this patch tested? Unit tests. Closes #30566 from gengliangwang/timestampLong. Authored-by: Gengliang Wang Signed-off-by: Dongjoon Hyun --- .../catalyst/analysis/FunctionRegistry.scala | 3 + .../expressions/datetimeExpressions.scala | 73 +++++++++++++++++++ .../expressions/DateExpressionsSuite.scala | 45 ++++++++++++ .../sql-functions/sql-expression-schema.md | 5 +- .../resources/sql-tests/inputs/datetime.sql | 4 + .../sql-tests/results/ansi/datetime.sql.out | 26 ++++++- .../sql-tests/results/datetime-legacy.sql.out | 26 ++++++- .../sql-tests/results/datetime.sql.out | 26 ++++++- 8 files changed, 204 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6fb9bed9625d5..5c2816a0baa95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -430,6 +430,9 @@ object FunctionRegistry { expression[SecondsToTimestamp]("timestamp_seconds"), expression[MillisToTimestamp]("timestamp_millis"), expression[MicrosToTimestamp]("timestamp_micros"), + expression[UnixSeconds]("unix_seconds"), + expression[UnixMillis]("unix_millis"), + expression[UnixMicros]("unix_micros"), // collection functions expression[CreateArray]("array"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 424887a13cb97..60dc32c1571fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -524,6 +524,79 @@ case class MicrosToTimestamp(child: Expression) override def prettyName: String = "timestamp_micros" } +abstract class TimestampToLongBase extends UnaryExpression + with ExpectsInputTypes with NullIntolerant { + + protected def scaleFactor: Long + + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) + + override def dataType: DataType = LongType + + override def nullSafeEval(input: Any): Any = { + Math.floorDiv(input.asInstanceOf[Number].longValue(), scaleFactor) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + if (scaleFactor == 1) { + defineCodeGen(ctx, ev, c => c) + } else { + defineCodeGen(ctx, ev, c => s"java.lang.Math.floorDiv($c, ${scaleFactor}L)") + } + } +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(timestamp) - Returns the number of seconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision.", + examples = """ + Examples: + > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z')); + 1 + """, + group = "datetime_funcs", + since = "3.1.0") +// scalastyle:on line.size.limit +case class UnixSeconds(child: Expression) extends TimestampToLongBase { + override def scaleFactor: Long = MICROS_PER_SECOND + + override def prettyName: String = "unix_seconds" +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(timestamp) - Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision.", + examples = """ + Examples: + > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z')); + 1000 + """, + group = "datetime_funcs", + since = "3.1.0") +// scalastyle:on line.size.limit +case class UnixMillis(child: Expression) extends TimestampToLongBase { + override def scaleFactor: Long = MICROS_PER_MILLIS + + override def prettyName: String = "unix_millis" +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(timestamp) - Returns the number of microseconds since 1970-01-01 00:00:00 UTC.", + examples = """ + Examples: + > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z')); + 1000000 + """, + group = "datetime_funcs", + since = "3.1.0") +// scalastyle:on line.size.limit +case class UnixMicros(child: Expression) extends TimestampToLongBase { + override def scaleFactor: Long = 1L + + override def prettyName: String = "unix_micros" +} + @ExpressionDescription( usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", examples = """ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 587ca0cdbed6e..8a1a34276341d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1245,6 +1245,51 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkResult(Int.MinValue.toLong - 100) } + test("UNIX_SECONDS") { + checkEvaluation(UnixSeconds(Literal(null, TimestampType)), null) + var timestamp = Literal(new Timestamp(0L)) + checkEvaluation(UnixSeconds(timestamp), 0L) + timestamp = Literal(new Timestamp(1000L)) + checkEvaluation(UnixSeconds(timestamp), 1L) + timestamp = Literal(new Timestamp(-1000L)) + checkEvaluation(UnixSeconds(timestamp), -1L) + // -1ms is considered to be in -1st second, as 0-999ms is in 0th second. + timestamp = Literal(new Timestamp(-1L)) + checkEvaluation(UnixSeconds(timestamp), -1L) + timestamp = Literal(new Timestamp(-1000L)) + checkEvaluation(UnixSeconds(timestamp), -1L) + // Truncates higher levels of precision + timestamp = Literal(new Timestamp(1999L)) + checkEvaluation(UnixSeconds(timestamp), 1L) + } + + test("UNIX_MILLIS") { + checkEvaluation(UnixMillis(Literal(null, TimestampType)), null) + var timestamp = Literal(new Timestamp(0L)) + checkEvaluation(UnixMillis(timestamp), 0L) + timestamp = Literal(new Timestamp(1000L)) + checkEvaluation(UnixMillis(timestamp), 1000L) + timestamp = Literal(new Timestamp(-1000L)) + checkEvaluation(UnixMillis(timestamp), -1000L) + // Truncates higher levels of precision + val timestampWithNanos = new Timestamp(1000L) + timestampWithNanos.setNanos(999999) + checkEvaluation(UnixMillis(Literal(timestampWithNanos)), 1000L) + } + + test("UNIX_MICROS") { + checkEvaluation(UnixMicros(Literal(null, TimestampType)), null) + var timestamp = Literal(new Timestamp(0L)) + checkEvaluation(UnixMicros(timestamp), 0L) + timestamp = Literal(new Timestamp(1000L)) + checkEvaluation(UnixMicros(timestamp), 1000000L) + timestamp = Literal(new Timestamp(-1000L)) + checkEvaluation(UnixMicros(timestamp), -1000000L) + val timestampWithNanos = new Timestamp(1000L) + timestampWithNanos.setNanos(1000) // 1 microsecond + checkEvaluation(UnixMicros(Literal(timestampWithNanos)), 1000001L) + } + test("TIMESTAMP_SECONDS") { def testIntegralFunc(value: Number): Unit = { checkEvaluation( diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 0a54dff3a1cea..861062a1f7705 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 342 + - Number of queries: 345 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -289,6 +289,9 @@ | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> | | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | +| org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | +| org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | +| org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 534e222b7c13e..c2ccb3ee0db06 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -14,6 +14,10 @@ select TIMESTAMP_MILLIS(-92233720368547758); select TIMESTAMP_SECONDS(0.1234567); -- truncation is OK for float/double select TIMESTAMP_SECONDS(0.1234567d), TIMESTAMP_SECONDS(FLOAT(0.1234567)); +-- UNIX_SECONDS, UNIX_MILLISECONDS and UNIX_MICROSECONDS +select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null); +select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null); +select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null); -- [SPARK-16836] current_date and current_timestamp literals select current_date = current_date(), current_timestamp = current_timestamp(); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 10669f14aa87b..9d99d3b870b3f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 117 +-- Number of queries: 120 -- !query @@ -87,6 +87,30 @@ struct +-- !query output +1606833008 1606833008 NULL + + +-- !query +select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null) +-- !query schema +struct +-- !query output +1606833008000 1606833008999 NULL + + +-- !query +select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null) +-- !query schema +struct +-- !query output +1606833008000000 1606833008999999 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 7c2c62a2db496..73e9823d96a73 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 117 +-- Number of queries: 120 -- !query @@ -87,6 +87,30 @@ struct +-- !query output +1606833008 1606833008 NULL + + +-- !query +select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null) +-- !query schema +struct +-- !query output +1606833008000 1606833008999 NULL + + +-- !query +select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null) +-- !query schema +struct +-- !query output +1606833008000000 1606833008999999 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 810ab6ef0cbfc..2c39c1291aa70 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 117 +-- Number of queries: 120 -- !query @@ -87,6 +87,30 @@ struct +-- !query output +1606833008 1606833008 NULL + + +-- !query +select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null) +-- !query schema +struct +-- !query output +1606833008000 1606833008999 NULL + + +-- !query +select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null) +-- !query schema +struct +-- !query output +1606833008000000 1606833008999999 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema From 92bfbcb2e372e8fecfe65bc582c779d9df4036bb Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 2 Dec 2020 12:58:41 -0800 Subject: [PATCH 0642/1009] [SPARK-33631][DOCS][TEST] Clean up spark.core.connection.ack.wait.timeout from configuration.md ### What changes were proposed in this pull request? SPARK-9767 remove `ConnectionManager` and related files, the configuration `spark.core.connection.ack.wait.timeout` previously used by `ConnectionManager` is no longer used by other Spark code, but it still exists in the `configuration.md`. So this pr cleans up the useless configuration item spark.core.connection.ack.wait.timeout` from `configuration.md`. ### Why are the changes needed? Clean up useless configuration from `configuration.md`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30569 from LuciferYang/SPARK-33631. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../spark/storage/BlockManagerReplicationSuite.scala | 2 -- docs/configuration.md | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala index 0b673c580d71f..1e9b48102616f 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala @@ -95,8 +95,6 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite conf.set(MEMORY_STORAGE_FRACTION, 0.999) conf.set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L) - // to make a replication attempt to inactive store fail fast - conf.set("spark.core.connection.ack.wait.timeout", "1s") // to make cached peers refresh frequently conf.set(STORAGE_CACHED_PEERS_TTL, 10) diff --git a/docs/configuration.md b/docs/configuration.md index d4d8e47645921..21506e6901263 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1919,7 +1919,6 @@ Apart from these, the following properties are also available, and may be useful 120s Default timeout for all network interactions. This config will be used in place of - spark.core.connection.ack.wait.timeout, spark.storage.blockManagerHeartbeatTimeoutMs, spark.shuffle.io.connectionTimeout, spark.rpc.askTimeout or spark.rpc.lookupTimeout if they are not configured. @@ -1982,16 +1981,6 @@ Apart from these, the following properties are also available, and may be useful 1.4.0 - - spark.core.connection.ack.wait.timeout - spark.network.timeout - - How long for the connection to wait for ack to occur before timing - out and giving up. To avoid unwilling timeout caused by long pause like GC, - you can set larger value. - - 1.1.1 - spark.network.maxRemoteBlockSizeFetchToMem 200m From f94cb53a90558285541090d484a6ae9938fe02e8 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 3 Dec 2020 09:34:42 +0900 Subject: [PATCH 0643/1009] [MINOR][INFRA] Use the latest image for GitHub Action jobs ### What changes were proposed in this pull request? Currently, GitHub Action is using two docker images. ``` $ git grep dongjoon/apache-spark-github-action-image .github/workflows/build_and_test.yml: image: dongjoon/apache-spark-github-action-image:20201015 .github/workflows/build_and_test.yml: image: dongjoon/apache-spark-github-action-image:20201025 ``` This PR aims to make it consistent by using the latest one. ``` - image: dongjoon/apache-spark-github-action-image:20201015 + image: dongjoon/apache-spark-github-action-image:20201025 ``` ### Why are the changes needed? This is for better maintainability. The image size is almost the same. ``` $ docker images | grep 202010 dongjoon/apache-spark-github-action-image 20201025 37adfa3d226a 5 weeks ago 2.18GB dongjoon/apache-spark-github-action-image 20201015 ff6fee8dc36d 6 weeks ago 2.16GB ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the GitHub Action. Closes #30578 from dongjoon-hyun/SPARK-MINOR. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b2b6a38916eeb..a3bb083387f3e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -153,7 +153,7 @@ jobs: name: "Build modules: ${{ matrix.modules }}" runs-on: ubuntu-20.04 container: - image: dongjoon/apache-spark-github-action-image:20201015 + image: dongjoon/apache-spark-github-action-image:20201025 strategy: fail-fast: false matrix: From 4f9667035886a67e6c9a4e8fad2efa390e87ca68 Mon Sep 17 00:00:00 2001 From: uncleGen Date: Wed, 2 Dec 2020 17:11:51 -0800 Subject: [PATCH 0644/1009] [SPARK-31953][SS] Add Spark Structured Streaming History Server Support ### What changes were proposed in this pull request? Add Spark Structured Streaming History Server Support. ### Why are the changes needed? Add a streaming query history server plugin. ![image](https://user-images.githubusercontent.com/7402327/84248291-d26cfe80-ab3b-11ea-86d2-98205fa2bcc4.png) ![image](https://user-images.githubusercontent.com/7402327/84248347-e44ea180-ab3b-11ea-81de-eefe207656f2.png) ![image](https://user-images.githubusercontent.com/7402327/84248396-f0d2fa00-ab3b-11ea-9b0d-e410115471b0.png) - Follow-ups - Query duration should not update in history UI. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Update UT. Closes #28781 from uncleGen/SPARK-31953. Lead-authored-by: uncleGen Co-authored-by: Genmao Yu Co-authored-by: Yuanjian Li Signed-off-by: Shixiong Zhu --- dev/.rat-excludes | 1 + ...apache.spark.status.AppHistoryServerPlugin | 1 + .../streaming/StreamingQueryListenerBus.scala | 26 ++- .../StreamingQueryHistoryServerPlugin.scala | 43 +++++ .../ui/StreamingQueryStatusStore.scala | 53 ++++++ .../spark/sql/internal/SharedState.scala | 8 +- .../sql/streaming/StreamingQueryManager.scala | 3 +- .../sql/streaming/ui/StreamingQueryPage.scala | 44 ++--- .../ui/StreamingQueryStatisticsPage.scala | 27 +-- .../ui/StreamingQueryStatusListener.scala | 166 +++++++++++------- .../sql/streaming/ui/StreamingQueryTab.scala | 3 +- .../spark/sql/streaming/ui/UIUtils.scala | 12 +- .../spark-events/local-1596020211915 | 160 +++++++++++++++++ .../apache/spark/deploy/history/Utils.scala | 40 +++++ .../ui/StreamingQueryHistorySuite.scala | 63 +++++++ .../ui/StreamingQueryPageSuite.scala | 42 +++-- .../StreamingQueryStatusListenerSuite.scala | 159 ++++++++++++++--- 17 files changed, 693 insertions(+), 158 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala create mode 100644 sql/core/src/test/resources/spark-events/local-1596020211915 create mode 100644 sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 7da330dfe1fbf..167cf224f92c2 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -123,6 +123,7 @@ SessionHandler.java GangliaReporter.java application_1578436911597_0052 config.properties +local-1596020211915 app-20200706201101-0003 py.typed _metadata diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin index 0bba2f88b92a5..6771eef525307 100644 --- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin +++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin @@ -1 +1,2 @@ org.apache.spark.sql.execution.ui.SQLHistoryServerPlugin +org.apache.spark.sql.execution.ui.StreamingQueryHistoryServerPlugin diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala index 1b8d69ffb7521..4b98acd16f6fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala @@ -31,16 +31,21 @@ import org.apache.spark.util.ListenerBus * Spark listener bus, so that it can receive [[StreamingQueryListener.Event]]s and dispatch them * to StreamingQueryListeners. * - * Note that each bus and its registered listeners are associated with a single SparkSession + * Note 1: Each bus and its registered listeners are associated with a single SparkSession * and StreamingQueryManager. So this bus will dispatch events to registered listeners for only * those queries that were started in the associated SparkSession. + * + * Note 2: To rebuild Structured Streaming UI in SHS, this bus will be registered into + * [[org.apache.spark.scheduler.ReplayListenerBus]]. We check `sparkListenerBus` defined or not to + * determine how to process [[StreamingQueryListener.Event]]. If false, it means this bus is used to + * replay all streaming query event from eventLog. */ -class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus) +class StreamingQueryListenerBus(sparkListenerBus: Option[LiveListenerBus]) extends SparkListener with ListenerBus[StreamingQueryListener, StreamingQueryListener.Event] { import StreamingQueryListener._ - sparkListenerBus.addToQueue(this, StreamingQueryListenerBus.STREAM_EVENT_QUERY) + sparkListenerBus.foreach(_.addToQueue(this, StreamingQueryListenerBus.STREAM_EVENT_QUERY)) /** * RunIds of active queries whose events are supposed to be forwarded by this ListenerBus @@ -67,11 +72,11 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus) event match { case s: QueryStartedEvent => activeQueryRunIds.synchronized { activeQueryRunIds += s.runId } - sparkListenerBus.post(s) + sparkListenerBus.foreach(bus => bus.post(s)) // post to local listeners to trigger callbacks postToAll(s) case _ => - sparkListenerBus.post(event) + sparkListenerBus.foreach(bus => bus.post(event)) } } @@ -95,7 +100,11 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus) // synchronously and the ones attached to LiveListenerBus asynchronously. Therefore, // we need to ignore QueryStartedEvent if this method is called within SparkListenerBus // thread - if (!LiveListenerBus.withinListenerThread.value || !e.isInstanceOf[QueryStartedEvent]) { + // + // When loaded by Spark History Server, we should process all event coming from replay + // listener bus. + if (sparkListenerBus.isEmpty || !LiveListenerBus.withinListenerThread.value || + !e.isInstanceOf[QueryStartedEvent]) { postToAll(e) } case _ => @@ -110,7 +119,10 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus) listener: StreamingQueryListener, event: StreamingQueryListener.Event): Unit = { def shouldReport(runId: UUID): Boolean = { - activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) } + // When loaded by Spark History Server, we should process all event coming from replay + // listener bus. + sparkListenerBus.isEmpty || + activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) } } event match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala new file mode 100644 index 0000000000000..a127fa59b7433 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.ui + +import org.apache.spark.SparkConf +import org.apache.spark.scheduler.SparkListener +import org.apache.spark.sql.execution.streaming.StreamingQueryListenerBus +import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab} +import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore} +import org.apache.spark.ui.SparkUI + +class StreamingQueryHistoryServerPlugin extends AppHistoryServerPlugin { + + override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = { + val listenerBus = new StreamingQueryListenerBus(None) + listenerBus.addListener(new StreamingQueryStatusListener(conf, store)) + Seq(listenerBus) + } + + override def setupUI(ui: SparkUI): Unit = { + val streamingQueryStatusStore = new StreamingQueryStatusStore(ui.store.store) + if (streamingQueryStatusStore.allQueryUIData.nonEmpty) { + new StreamingQueryTab(streamingQueryStatusStore, ui) + } + } + + override def displayOrder: Int = 1 +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala new file mode 100644 index 0000000000000..9eb14a6a63063 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.ui + +import java.util.UUID + +import org.apache.spark.sql.streaming.ui.{StreamingQueryData, StreamingQueryProgressWrapper, StreamingQueryUIData} +import org.apache.spark.status.KVUtils +import org.apache.spark.util.kvstore.KVStore + +/** + * Provides a view of a KVStore with methods that make it easy to query Streaming Query state. + * There's no state kept in this class, so it's ok to have multiple instances of it in an + * application. + */ +class StreamingQueryStatusStore(store: KVStore) { + + def allQueryUIData: Seq[StreamingQueryUIData] = { + val view = store.view(classOf[StreamingQueryData]).index("startTimestamp").first(0L) + KVUtils.viewToSeq(view, Int.MaxValue)(_ => true).map(makeUIData) + } + + // visible for test + private[sql] def getQueryProgressData(runId: UUID): Seq[StreamingQueryProgressWrapper] = { + val view = store.view(classOf[StreamingQueryProgressWrapper]) + .index("runId").first(runId.toString).last(runId.toString) + KVUtils.viewToSeq(view, Int.MaxValue)(_ => true) + } + + private def makeUIData(summary: StreamingQueryData): StreamingQueryUIData = { + val runId = summary.runId.toString + val view = store.view(classOf[StreamingQueryProgressWrapper]) + .index("runId").first(runId).last(runId) + val recentProgress = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true) + .map(_.progress).sortBy(_.timestamp).toArray + StreamingQueryUIData(summary, recentProgress) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 89aceacac6007..ea430db9f030f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -34,7 +34,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.CacheManager import org.apache.spark.sql.execution.streaming.StreamExecution -import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab} +import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab, StreamingQueryStatusStore} import org.apache.spark.sql.internal.StaticSQLConf._ import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab} import org.apache.spark.status.ElementTrackingStore @@ -111,9 +111,9 @@ private[sql] class SharedState( lazy val streamingQueryStatusListener: Option[StreamingQueryStatusListener] = { sparkContext.ui.flatMap { ui => if (conf.get(STREAMING_UI_ENABLED)) { - val statusListener = new StreamingQueryStatusListener(conf) - new StreamingQueryTab(statusListener, ui) - Some(statusListener) + val kvStore = sparkContext.statusStore.store.asInstanceOf[ElementTrackingStore] + new StreamingQueryTab(new StreamingQueryStatusStore(kvStore), ui) + Some(new StreamingQueryStatusListener(conf, kvStore)) } else { None } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala index ffdbe9d4e4915..b66037d00919d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala @@ -49,7 +49,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo private[sql] val stateStoreCoordinator = StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env) - private val listenerBus = new StreamingQueryListenerBus(sparkSession.sparkContext.listenerBus) + private val listenerBus = + new StreamingQueryListenerBus(Some(sparkSession.sparkContext.listenerBus)) @GuardedBy("activeQueriesSharedLock") private val activeQueries = new mutable.HashMap[UUID, StreamingQuery] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala index b98fdf16eef31..96e498991e1bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala @@ -40,8 +40,8 @@ private[ui] class StreamingQueryPage(parent: StreamingQueryTab) } private def generateStreamingQueryTable(request: HttpServletRequest): Seq[Node] = { - val (activeQueries, inactiveQueries) = parent.statusListener.allQueryStatus - .partition(_.isActive) + val (activeQueries, inactiveQueries) = + parent.store.allQueryUIData.partition(_.summary.isActive) val content = mutable.ListBuffer[Node]() // show active queries table only if there is at least one active query @@ -176,7 +176,7 @@ class StreamingQueryPagedTable( val streamingQuery = query.streamingUIData val statisticsLink = "%s/%s/statistics?id=%s" .format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix, - streamingQuery.runId) + streamingQuery.summary.runId) def details(detail: Any): Seq[Node] = { if (isActive) { @@ -194,14 +194,14 @@ class StreamingQueryPagedTable( {UIUtils.getQueryName(streamingQuery)} {UIUtils.getQueryStatus(streamingQuery)} - {streamingQuery.id} - {streamingQuery.runId} - {SparkUIUtils.formatDate(streamingQuery.startTimestamp)} + {streamingQuery.summary.id} + {streamingQuery.summary.runId} + {SparkUIUtils.formatDate(streamingQuery.summary.startTimestamp)} {SparkUIUtils.formatDurationVerbose(query.duration)} {withNoProgress(streamingQuery, {query.avgInput.formatted("%.2f")}, "NaN")} {withNoProgress(streamingQuery, {query.avgProcess.formatted("%.2f")}, "NaN")} {withNoProgress(streamingQuery, {streamingQuery.lastProgress.batchId}, "NaN")} - {details(streamingQuery.exception.getOrElse("-"))} + {details(streamingQuery.summary.exception.getOrElse("-"))} } } @@ -222,32 +222,32 @@ class StreamingQueryDataSource(uiData: Seq[StreamingQueryUIData], sortColumn: St override def sliceData(from: Int, to: Int): Seq[StructuredStreamingRow] = data.slice(from, to) - private def streamingRow(query: StreamingQueryUIData): StructuredStreamingRow = { + private def streamingRow(uiData: StreamingQueryUIData): StructuredStreamingRow = { val duration = if (isActive) { - System.currentTimeMillis() - query.startTimestamp + System.currentTimeMillis() - uiData.summary.startTimestamp } else { - withNoProgress(query, { - val endTimeMs = query.lastProgress.timestamp - parseProgressTimestamp(endTimeMs) - query.startTimestamp + withNoProgress(uiData, { + val endTimeMs = uiData.lastProgress.timestamp + parseProgressTimestamp(endTimeMs) - uiData.summary.startTimestamp }, 0) } - val avgInput = (query.recentProgress.map(p => withNumberInvalid(p.inputRowsPerSecond)).sum / - query.recentProgress.length) + val avgInput = (uiData.recentProgress.map(p => withNumberInvalid(p.inputRowsPerSecond)).sum / + uiData.recentProgress.length) - val avgProcess = (query.recentProgress.map(p => - withNumberInvalid(p.processedRowsPerSecond)).sum / query.recentProgress.length) + val avgProcess = (uiData.recentProgress.map(p => + withNumberInvalid(p.processedRowsPerSecond)).sum / uiData.recentProgress.length) - StructuredStreamingRow(duration, avgInput, avgProcess, query) + StructuredStreamingRow(duration, avgInput, avgProcess, uiData) } private def ordering(sortColumn: String, desc: Boolean): Ordering[StructuredStreamingRow] = { val ordering: Ordering[StructuredStreamingRow] = sortColumn match { - case "Name" => Ordering.by(q => UIUtils.getQueryName(q.streamingUIData)) - case "Status" => Ordering.by(q => UIUtils.getQueryStatus(q.streamingUIData)) - case "ID" => Ordering.by(_.streamingUIData.id) - case "Run ID" => Ordering.by(_.streamingUIData.runId) - case "Start Time" => Ordering.by(_.streamingUIData.startTimestamp) + case "Name" => Ordering.by(row => UIUtils.getQueryName(row.streamingUIData)) + case "Status" => Ordering.by(row => UIUtils.getQueryStatus(row.streamingUIData)) + case "ID" => Ordering.by(_.streamingUIData.summary.id) + case "Run ID" => Ordering.by(_.streamingUIData.summary.runId) + case "Start Time" => Ordering.by(_.streamingUIData.summary.startTimestamp) case "Duration" => Ordering.by(_.duration) case "Avg Input /sec" => Ordering.by(_.avgInput) case "Avg Process /sec" => Ordering.by(_.avgProcess) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala index 24709ba470cde..97691d9d7e827 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala @@ -58,8 +58,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) val parameterId = request.getParameter("id") require(parameterId != null && parameterId.nonEmpty, "Missing id parameter") - val query = parent.statusListener.allQueryStatus.find { case q => - q.runId.equals(UUID.fromString(parameterId)) + val query = parent.store.allQueryUIData.find { uiData => + uiData.summary.runId.equals(UUID.fromString(parameterId)) }.getOrElse(throw new IllegalArgumentException(s"Failed to find streaming query $parameterId")) val resources = generateLoadResources(request) @@ -109,34 +109,35 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab) } - def generateBasicInfo(query: StreamingQueryUIData): Seq[Node] = { - val duration = if (query.isActive) { - SparkUIUtils.formatDurationVerbose(System.currentTimeMillis() - query.startTimestamp) + def generateBasicInfo(uiData: StreamingQueryUIData): Seq[Node] = { + val duration = if (uiData.summary.isActive) { + val durationMs = System.currentTimeMillis() - uiData.summary.startTimestamp + SparkUIUtils.formatDurationVerbose(durationMs) } else { - withNoProgress(query, { - val end = query.lastProgress.timestamp - val start = query.recentProgress.head.timestamp + withNoProgress(uiData, { + val end = uiData.lastProgress.timestamp + val start = uiData.recentProgress.head.timestamp SparkUIUtils.formatDurationVerbose( parseProgressTimestamp(end) - parseProgressTimestamp(start)) }, "-") } - val name = UIUtils.getQueryName(query) - val numBatches = withNoProgress(query, { query.lastProgress.batchId + 1L }, 0) + val name = UIUtils.getQueryName(uiData) + val numBatches = withNoProgress(uiData, { uiData.lastProgress.batchId + 1L }, 0)
      Running batches for {duration} since - {SparkUIUtils.formatDate(query.startTimestamp)} + {SparkUIUtils.formatDate(uiData.summary.startTimestamp)} ({numBatches} completed batches)

      Name: {name}
      -
      Id: {query.id}
      -
      RunId: {query.runId}
      +
      Id: {uiData.summary.id}
      +
      RunId: {uiData.summary.runId}

      } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala index e331083b30024..fdd3754344108 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala @@ -20,102 +20,144 @@ package org.apache.spark.sql.streaming.ui import java.util.UUID import java.util.concurrent.ConcurrentHashMap -import scala.collection.JavaConverters._ import scala.collection.mutable +import com.fasterxml.jackson.annotation.JsonIgnore + import org.apache.spark.SparkConf import org.apache.spark.sql.internal.StaticSQLConf import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress} +import org.apache.spark.sql.streaming.ui.StreamingQueryProgressWrapper._ import org.apache.spark.sql.streaming.ui.UIUtils.parseProgressTimestamp +import org.apache.spark.status.{ElementTrackingStore, KVUtils} +import org.apache.spark.status.KVUtils.KVIndexParam +import org.apache.spark.util.kvstore.KVIndex /** * A customized StreamingQueryListener used in structured streaming UI, which contains all * UI data for both active and inactive query. - * TODO: Add support for history server. */ -private[sql] class StreamingQueryStatusListener(conf: SparkConf) extends StreamingQueryListener { - - /** - * We use runId as the key here instead of id in active query status map, - * because the runId is unique for every started query, even it its a restart. - */ - private[ui] val activeQueryStatus = new ConcurrentHashMap[UUID, StreamingQueryUIData]() - private[ui] val inactiveQueryStatus = new mutable.Queue[StreamingQueryUIData]() +private[sql] class StreamingQueryStatusListener( + conf: SparkConf, + store: ElementTrackingStore) extends StreamingQueryListener { private val streamingProgressRetention = conf.get(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES) private val inactiveQueryStatusRetention = conf.get(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES) + store.addTrigger(classOf[StreamingQueryData], inactiveQueryStatusRetention) { count => + cleanupInactiveQueries(count) + } + + // Events from the same query run will never be processed concurrently, so it's safe to + // access `progressIds` without any protection. + private val queryToProgress = new ConcurrentHashMap[UUID, mutable.Queue[String]]() + + private def cleanupInactiveQueries(count: Long): Unit = { + val view = store.view(classOf[StreamingQueryData]).index("active").first(false).last(false) + val inactiveQueries = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true) + val numInactiveQueries = inactiveQueries.size + if (numInactiveQueries <= inactiveQueryStatusRetention) { + return + } + val toDelete = inactiveQueries.sortBy(_.endTimestamp.get) + .take(numInactiveQueries - inactiveQueryStatusRetention) + val runIds = toDelete.map { e => + store.delete(e.getClass, e.runId) + e.runId.toString + } + // Delete wrappers in one pass, as deleting them for each summary is slow + store.removeAllByIndexValues(classOf[StreamingQueryProgressWrapper], "runId", runIds) + } + override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = { val startTimestamp = parseProgressTimestamp(event.timestamp) - activeQueryStatus.putIfAbsent(event.runId, - new StreamingQueryUIData(event.name, event.id, event.runId, startTimestamp)) + store.write(new StreamingQueryData( + event.name, + event.id, + event.runId, + isActive = true, + None, + startTimestamp + ), checkTriggers = true) } override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = { - val batchTimestamp = parseProgressTimestamp(event.progress.timestamp) - val queryStatus = activeQueryStatus.getOrDefault( - event.progress.runId, - new StreamingQueryUIData(event.progress.name, event.progress.id, event.progress.runId, - batchTimestamp)) - queryStatus.updateProcess(event.progress, streamingProgressRetention) - } - - override def onQueryTerminated( - event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized { - val queryStatus = activeQueryStatus.remove(event.runId) - if (queryStatus != null) { - queryStatus.queryTerminated(event) - inactiveQueryStatus += queryStatus - while (inactiveQueryStatus.length >= inactiveQueryStatusRetention) { - inactiveQueryStatus.dequeue() - } + val runId = event.progress.runId + val batchId = event.progress.batchId + val timestamp = event.progress.timestamp + if (!queryToProgress.containsKey(runId)) { + queryToProgress.put(runId, mutable.Queue.empty[String]) + } + val progressIds = queryToProgress.get(runId) + progressIds.enqueue(getUniqueId(runId, batchId, timestamp)) + store.write(new StreamingQueryProgressWrapper(event.progress)) + while (progressIds.length > streamingProgressRetention) { + val uniqueId = progressIds.dequeue + store.delete(classOf[StreamingQueryProgressWrapper], uniqueId) } } - def allQueryStatus: Seq[StreamingQueryUIData] = synchronized { - activeQueryStatus.values().asScala.toSeq ++ inactiveQueryStatus + override def onQueryTerminated( + event: StreamingQueryListener.QueryTerminatedEvent): Unit = { + val querySummary = store.read(classOf[StreamingQueryData], event.runId) + val curTime = System.currentTimeMillis() + store.write(new StreamingQueryData( + querySummary.name, + querySummary.id, + querySummary.runId, + isActive = false, + querySummary.exception, + querySummary.startTimestamp, + Some(curTime) + ), checkTriggers = true) + queryToProgress.remove(event.runId) } } +private[sql] class StreamingQueryData( + val name: String, + val id: UUID, + @KVIndexParam val runId: UUID, + @KVIndexParam("active") val isActive: Boolean, + val exception: Option[String], + @KVIndexParam("startTimestamp") val startTimestamp: Long, + val endTimestamp: Option[Long] = None) + /** * This class contains all message related to UI display, each instance corresponds to a single * [[org.apache.spark.sql.streaming.StreamingQuery]]. */ -private[ui] class StreamingQueryUIData( - val name: String, - val id: UUID, - val runId: UUID, - val startTimestamp: Long) { - - /** Holds the most recent query progress updates. */ - private val progressBuffer = new mutable.Queue[StreamingQueryProgress]() - - private var _isActive = true - private var _exception: Option[String] = None - - def isActive: Boolean = synchronized { _isActive } - - def exception: Option[String] = synchronized { _exception } - - def queryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized { - _isActive = false - _exception = event.exception - } - - def updateProcess( - newProgress: StreamingQueryProgress, retentionNum: Int): Unit = progressBuffer.synchronized { - progressBuffer += newProgress - while (progressBuffer.length >= retentionNum) { - progressBuffer.dequeue() +private[sql] case class StreamingQueryUIData( + summary: StreamingQueryData, + recentProgress: Array[StreamingQueryProgress]) { + + def lastProgress: StreamingQueryProgress = { + if (recentProgress.nonEmpty) { + recentProgress.last + } else { + null } } +} - def recentProgress: Array[StreamingQueryProgress] = progressBuffer.synchronized { - progressBuffer.toArray - } +private[sql] class StreamingQueryProgressWrapper(val progress: StreamingQueryProgress) { + @JsonIgnore @KVIndex + private val uniqueId: String = getUniqueId(progress.runId, progress.batchId, progress.timestamp) - def lastProgress: StreamingQueryProgress = progressBuffer.synchronized { - progressBuffer.lastOption.orNull + @JsonIgnore @KVIndex("runId") + private def runIdIndex: String = progress.runId.toString +} + +private[sql] object StreamingQueryProgressWrapper { + /** + * Adding `timestamp` into unique id to support reporting `empty` query progress + * in which no data comes but with the same batchId. + */ + def getUniqueId( + runId: UUID, + batchId: Long, + timestamp: String): String = { + s"${runId}_${batchId}_$timestamp" } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala index bb097ffc06912..65cad8f06cc1c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala @@ -17,10 +17,11 @@ package org.apache.spark.sql.streaming.ui import org.apache.spark.internal.Logging +import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore import org.apache.spark.ui.{SparkUI, SparkUITab} private[sql] class StreamingQueryTab( - val statusListener: StreamingQueryStatusListener, + val store: StreamingQueryStatusStore, sparkUI: SparkUI) extends SparkUITab(sparkUI, "StreamingQuery") with Logging { override val name = "Structured Streaming" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala index 1f7e65dede170..88a110fa9a329 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala @@ -46,19 +46,19 @@ private[ui] object UIUtils { } } - def getQueryName(query: StreamingQueryUIData): String = { - if (query.name == null || query.name.isEmpty) { + def getQueryName(uiData: StreamingQueryUIData): String = { + if (uiData.summary.name == null || uiData.summary.name.isEmpty) { "" } else { - query.name + uiData.summary.name } } - def getQueryStatus(query: StreamingQueryUIData): String = { - if (query.isActive) { + def getQueryStatus(uiData: StreamingQueryUIData): String = { + if (uiData.summary.isActive) { "RUNNING" } else { - query.exception.map(_ => "FAILED").getOrElse("FINISHED") + uiData.summary.exception.map(_ => "FAILED").getOrElse("FINISHED") } } diff --git a/sql/core/src/test/resources/spark-events/local-1596020211915 b/sql/core/src/test/resources/spark-events/local-1596020211915 new file mode 100644 index 0000000000000..ff34bbc16ef3a --- /dev/null +++ b/sql/core/src/test/resources/spark-events/local-1596020211915 @@ -0,0 +1,160 @@ +{"Event":"SparkListenerLogStart","Spark Version":"3.1.0-SNAPSHOT"} +{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}} +{"Event":"SparkListenerExecutorAdded","Timestamp":1596020212090,"Executor ID":"driver","Executor Info":{"Host":"iZbp19vpr16ix621sdw476Z","Total Cores":4,"Log Urls":{},"Attributes":{},"Resources":{},"Resource Profile Id":0}} +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Port":39845},"Maximum Memory":384093388,"Timestamp":1596020212109,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre","Java Version":"1.8.0_252 (Oracle Corporation)","Scala Version":"version 2.12.10"},"Spark Properties":{"spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.driver.port":"46309","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","spark.app.name":"StructuredKafkaWordCount","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.executor.id":"driver","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"},"Hadoop Properties":{"yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"15","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.framework.name":"local","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.s3.block.size":"67108864","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","dfs.bytes-per-checksum":"512","fs.s3.buffer.dir":"${hadoop.tmp.dir}/s3","mapreduce.job.acl-view-job":" ","mapreduce.jobhistory.loadedjobs.cache.size":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"10000","yarn.resourcemanager.system-metrics-publisher.enabled":"false","yarn.sharedcache.webapp.address":"0.0.0.0:8788","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","yarn.nodemanager.container-metrics.enable":"true","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.nodemanager.docker-container-executor.exec-name":"/usr/bin/docker","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","mapred.child.java.opts":"-Xmx200m","hadoop.common.configuration.version":"0.23.0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.registry.zk.retry.ceiling.ms":"60000","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","mapreduce.task.profile.reduces":"0-2","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"file:///","yarn.minicluster.use-rpc":"false","fs.har.impl.disable.cache":"true","io.compression.codec.bzip2.library":"system-native","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","ipc.server.log.slow.rpc":"false","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.nodemanager.webapp.cross-origin.enabled":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"-1","fs.s3a.fast.upload.active.blocks":"4","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ha.zookeeper.session-timeout.ms":"10000","tfile.io.chunk.size":"1048576","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","fs.s3a.connection.timeout":"200000","mapreduce.job.max.split.locations":"10","hadoop.registry.zk.session.timeout.ms":"60000","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1,SSLv2Hello,TLSv1.1,TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","ftp.blocksize":"67108864","yarn.nodemanager.log-container-debug-info.enabled":"false","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","hadoop.security.groups.cache.secs":"300","yarn.resourcemanager.zk-retry-interval-ms":"1000","ipc.maximum.data.length":"67108864","mapreduce.shuffle.max.threads":"0","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","s3native.replication":"3","hadoop.security.group.mapping.ldap.ssl":"false","yarn.client.application-client-protocol.poll-interval-ms":"200","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","hadoop.ssl.enabled":"false","fs.s3a.multipart.purge":"false","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"10","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","mapreduce.cluster.temp.dir":"${hadoop.tmp.dir}/mapred/temp","s3.replication":"3","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"/tmp/hadoop-${user.name}","mapreduce.job.maps":"2","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"2147483647","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","yarn.resourcemanager.zk-acl":"world:anyone:rwcda","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.amrmproxy.enable":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","mapreduce.local.clientfactory.class.name":"org.apache.hadoop.mapred.LocalClientFactory","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"2000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","fs.s3.sleepTimeSeconds":"10","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.scheduler.minimum-allocation-mb":"1024","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","hadoop.caller.context.enabled":"false","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","s3native.stream-buffer-size":"4096","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.s3n.multipart.uploads.enabled":"false","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"false","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","s3.blocksize":"67108864","io.map.index.interval":"128","mapreduce.job.counters.max":"120","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","s3.stream-buffer-size":"4096","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","s3native.bytes-per-checksum":"512","mapreduce.jobtracker.address":"local","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","yarn.node-labels.fs-store.retry-policy-spec":"2000, 500","hadoop.security.groups.cache.background.reload":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","hadoop.http.authentication.type":"simple","mapreduce.task.userlog.limit.kb":"0","yarn.resourcemanager.scheduler.monitor.enable":"false","fs.s3n.block.size":"67108864","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","mapreduce.jobtracker.staging.root.dir":"${hadoop.tmp.dir}/mapred/staging","yarn.nodemanager.resource-monitor.interval-ms":"3000","mapreduce.shuffle.listen.queue.size":"128","mapreduce.map.cpu.vcores":"1","yarn.timeline-service.client.fd-retain-secs":"300","hadoop.user.group.static.mapping.overrides":"dr.who=;","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.app.mapreduce.client.job.max-retries":"0","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","hadoop.ssl.server.conf":"ssl-server.xml","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","mapreduce.task.profile":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","fs.automatic.close":"true","fs.s3n.multipart.copy.block.size":"5368709120","yarn.nodemanager.hostname":"0.0.0.0","yarn.resourcemanager.zk-timeout-ms":"10000","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.nodemanager.minimum.version":"NONE","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","mapreduce.job.running.reduce.limit":"0","ipc.maximum.response.length":"134217728","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","fs.s3a.max.total.tasks":"5","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.fast.upload":"false","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.nodemanager.health-checker.script.timeout-ms":"1200000","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","mapreduce.map.log.level":"INFO","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","hadoop.registry.rm.enabled":"false","mapreduce.ifile.readahead.bytes":"4194304","yarn.resourcemanager.fs.state-store.retry-policy-spec":"2000, 500","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"1024","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","fs.s3.maxRetries":"4","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","fs.s3a.socket.recv.buffer":"8192","fs.s3n.multipart.uploads.block.size":"67108864","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"1024","s3native.client-write-packet-size":"65536","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","yarn.sharedcache.nm.uploader.thread-count":"20","mapreduce.shuffle.ssl.enabled":"false","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","mapreduce.jobtracker.system.dir":"${hadoop.tmp.dir}/mapred/system","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.nodemanager.resource.cpu-vcores":"-1","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"100M","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","ha.health-monitor.sleep-after-disconnect.ms":"1000","s3.bytes-per-checksum":"512","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"json","yarn.resourcemanager.ha.enabled":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","s3.client-write-packet-size":"65536","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.app.mapreduce.am.container.log.limit.kb":"0","s3native.blocksize":"67108864","ipc.client.connect.retry.interval":"1000","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","yarn.resourcemanager.zk-num-retries":"1000","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","ftp.client-write-packet-size":"65536","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","io.native.lib.available":"true","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8048","ipc.server.listen.queue.size":"128","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","mapreduce.job.ubertask.maxreduces":"1","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","yarn.client.nodemanager-connect.retry-interval-ms":"10000","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME","yarn.sharedcache.nested-level":"3","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false"},"System Properties":{"java.io.tmpdir":"/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/root","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/amd64","user.dir":"/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.252-b09","jetty.git.hash":"ab228fde9e55e9164c738d7fa121f8ac5acd51c9","java.endorsed.dirs":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/endorsed","java.runtime.version":"1.8.0_252-b09","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/resources.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/rt.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jsse.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jce.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/charsets.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jfr.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"3.10.0-1127.10.1.el7.x86_64","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"root","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local[*] --conf spark.eventLog.dir=/tmp/spark-history --conf spark.eventLog.enabled=true --conf spark.sql.shuffle.partitions=2 --class org.apache.spark.examples.sql.streaming.StructuredKafkaWordCount ./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar 192.168.130.97:9092 subscribe test5","java.home":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre","java.version":"1.8.0_252","sun.io.unicode.encoding":"UnicodeLittle"},"Classpath Entries":{"/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-graphite-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/nimbus-jose-jwt-4.41.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-vector-code-gen-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-jaxrs-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-server-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/pyrolite-4.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/conf/":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json-smart-2.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/objenesis-2.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-auth-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jsp-api-2.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-unsafe_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-codec-1.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/protobuf-java-2.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-1.8.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guice-3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aopalliance-repackaged-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/transaction-api-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/gson-2.2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-rdbms-4.1.19.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-module-paranamer-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/libfb303-0.9.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-cli-1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-tags_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-library-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xbean-asm7-shaded-4.15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-container-servlet-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-api-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/okhttp-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/derby-10.12.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-collections-3.2.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/httpcore-4.4.12.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-beanutils-1.9.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-util_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-crypto-1.0.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-launcher_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stax-api-1.0-2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-ast_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/lz4-java-1.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-format-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-column-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-logging-1.1.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/audience-annotations-0.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-jdbc-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-hive-thriftserver_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-cli-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javolution-5.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/JLargeArrays-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-api-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/algebra_2.12-2.0.0-M2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-dbcp-1.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/threeten-extra-1.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-io-2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-json-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/libthrift-0.12.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/compress-lzf-1.0.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-jmx-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.inject-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stax-api-1.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-recipes-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/antlr4-runtime-4.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/slf4j-api-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/oro-2.0.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-memory-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jpam-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/velocity-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-core-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sql_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-databind-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-text-1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-client-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/htrace-core4-4.0.1-incubating.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-graphx_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-ipc-1.8.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/macro-compat_2.12-1.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-util-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/core-1.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/joda-time-2.10.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-encoding-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-llap-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-network-common_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-api-jdo-4.2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/paranamer-2.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-0.23-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/activation-1.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-framework-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-compress-1.8.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/RoaringBitmap-0.7.45.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/ivy-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-core-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-httpclient-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-yarn_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-common-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/zstd-jni-1.4.5-2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-container-servlet-core-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/snappy-java-1.1.7.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/shapeless_2.12-2.3.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-pool-1.5.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-core_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/httpclient-4.5.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/api-util-1.0.0-M20.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aircompressor-0.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-repl_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/leveldbjni-all-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-hk2-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jta-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-sslengine-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-net-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-core-4.1.17.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-server-web-proxy-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/breeze_2.12-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-mapreduce-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-core_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xz-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.inject-1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-compiler-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-jvm-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-shims-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jaxb-api-2.2.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.validation-api-2.0.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-macros_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/janino-3.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/osgi-resource-locator-1.0.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jcl-over-slf4j-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-app-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-utils-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sketch_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/JTransforms-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/kafka-clients-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guice-servlet-3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/okio-1.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-annotations-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-math3-3.4.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-scalap_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/bonecp-0.8.0.RELEASE.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-streaming_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/accessors-smart-1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guava-14.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/shims-0.7.45.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/ST4-4.0.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-module-scala_2.12-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-xml_2.12-1.2.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/chill-java-0.9.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-shuffle-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/cats-kernel_2.12-2.0.0-M4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stream-2.9.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-configuration-1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jodd-core-3.5.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-pool2-2.6.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jul-to-slf4j-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xmlenc-0.52.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/flatbuffers-java-1.9.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-token-provider-kafka-0-10_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-jackson_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-compiler-3.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jline-2.14.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/breeze-macros_2.12-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/kryo-shaded-4.0.2.jar":"System Classpath","spark://iZbp19vpr16ix621sdw476Z:46309/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar":"Added By User","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-hive_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-common-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/istack-commons-runtime-3.0.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-client-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-xc-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/zookeeper-3.4.14.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-hadoop-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.annotation-api-1.3.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-scheduler-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/univocity-parsers-2.8.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-digester-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-mllib_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arpack_combined_all-0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sql-kafka-0-10_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-annotations-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-locator-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-core-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-mapred-1.8.2-hadoop2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-server-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-reflect-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/super-csv-2.2.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-jobclient-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-exec-2.3.7-core.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/opencsv-2.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/api-asn1-api-1.0.0-M20.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-storage-api-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-platform_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aopalliance-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/HikariCP-2.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-metastore-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/minlog-1.3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-format-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jsr305-3.0.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-lang-2.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-lang3-3.9.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.jdo-3.2.0-m3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javassist-3.25.0-GA.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/antlr-runtime-3.5.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/log4j-1.2.17.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-beeline-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/chill_2.12-0.9.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jdo-api-3.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-kvstore_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-core-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jaxb-runtime-2.3.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-mllib-local_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/py4j-0.10.9.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-serde-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-hdfs-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-network-shuffle_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jcip-annotations-1.0-1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-media-jaxb-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-jackson-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/slf4j-log4j12-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/netty-all-4.1.47.Final.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-vector-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-catalyst_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/machinist_2.12-0.6.8.jar":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"StructuredKafkaWordCount","App ID":"local-1596020211915","Timestamp":1596020210919,"User":"root"} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent","id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:56:55.947Z"} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":0,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48276}}, {\"test5\":{\"0\":48279}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#142]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = f7faa1e9-69d9-41b4-9d77-919795af2413, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = f7faa1e9-69d9-41b4-9d77-919795af2413, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#66]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":80,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":79,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":76,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":77,"metricType":"timing"},{"name":"peak memory","accumulatorId":75,"metricType":"size"},{"name":"number of output rows","accumulatorId":74,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":78,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":71,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":72,"metricType":"timing"},{"name":"peak memory","accumulatorId":70,"metricType":"size"},{"name":"number of output rows","accumulatorId":69,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":73,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":68,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":20,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":21,"metricType":"nsTiming"},{"name":"records read","accumulatorId":18,"metricType":"sum"},{"name":"local bytes read","accumulatorId":16,"metricType":"size"},{"name":"fetch wait time","accumulatorId":17,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":14,"metricType":"size"},{"name":"local blocks read","accumulatorId":13,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":12,"metricType":"sum"},{"name":"data size","accumulatorId":11,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":15,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":19,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":67,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":64,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":65,"metricType":"timing"},{"name":"peak memory","accumulatorId":63,"metricType":"size"},{"name":"number of output rows","accumulatorId":62,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":66,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":61,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":51,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":52,"metricType":"sum"},{"name":"memory used by state","accumulatorId":57,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":59,"metricType":"sum"},{"name":"number of output rows","accumulatorId":50,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":58,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":60,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":56,"metricType":"timing"},{"name":"time to remove","accumulatorId":55,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":53,"metricType":"sum"},{"name":"time to update","accumulatorId":54,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":47,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":48,"metricType":"timing"},{"name":"peak memory","accumulatorId":46,"metricType":"size"},{"name":"number of output rows","accumulatorId":45,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":49,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":44,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020220179} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":1,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48276}}, {\"test5\":{\"0\":48279}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#218]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 64a4779b-846a-4f20-9f5c-899a8dbf68d8, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 64a4779b-846a-4f20-9f5c-899a8dbf68d8, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#66]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":80,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":79,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":76,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":77,"metricType":"timing"},{"name":"peak memory","accumulatorId":75,"metricType":"size"},{"name":"number of output rows","accumulatorId":74,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":78,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":71,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":72,"metricType":"timing"},{"name":"peak memory","accumulatorId":70,"metricType":"size"},{"name":"number of output rows","accumulatorId":69,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":73,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":68,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":20,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":21,"metricType":"nsTiming"},{"name":"records read","accumulatorId":18,"metricType":"sum"},{"name":"local bytes read","accumulatorId":16,"metricType":"size"},{"name":"fetch wait time","accumulatorId":17,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":14,"metricType":"size"},{"name":"local blocks read","accumulatorId":13,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":12,"metricType":"sum"},{"name":"data size","accumulatorId":11,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":15,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":19,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":67,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":64,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":65,"metricType":"timing"},{"name":"peak memory","accumulatorId":63,"metricType":"size"},{"name":"number of output rows","accumulatorId":62,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":66,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":61,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":51,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":52,"metricType":"sum"},{"name":"memory used by state","accumulatorId":57,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":59,"metricType":"sum"},{"name":"number of output rows","accumulatorId":50,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":58,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":60,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":56,"metricType":"timing"},{"name":"time to remove","accumulatorId":55,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":53,"metricType":"sum"},{"name":"time to update","accumulatorId":54,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":47,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":48,"metricType":"timing"},{"name":"peak memory","accumulatorId":46,"metricType":"size"},{"name":"number of output rows","accumulatorId":45,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":49,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":44,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020220258} +{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1596020221633,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[0,1],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020221656,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1596020221738,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1596020221738,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222649,"Failed":false,"Killed":false,"Accumulables":[{"ID":21,"Name":"shuffle write time","Update":"9599308","Value":"9599308","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":19,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":11,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"duration","Update":"296","Value":"296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"time in aggregation build","Update":"200","Value":"200","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":75,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":77,"Name":"time in aggregation build","Update":"190","Value":"190","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":79,"Name":"duration","Update":"336","Value":"336","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":80,"Name":"number of output rows","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":125,"Name":"internal.metrics.input.recordsRead","Update":3,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":123,"Name":"internal.metrics.shuffle.write.writeTime","Update":9599308,"Value":9599308,"Internal":true,"Count Failed Values":true},{"ID":122,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":121,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":108,"Name":"internal.metrics.jvmGCTime","Update":17,"Value":17,"Internal":true,"Count Failed Values":true},{"ID":107,"Name":"internal.metrics.resultSize","Update":2630,"Value":2630,"Internal":true,"Count Failed Values":true},{"ID":106,"Name":"internal.metrics.executorCpuTime","Update":466139164,"Value":466139164,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.executorRunTime","Update":503,"Value":503,"Internal":true,"Count Failed Values":true},{"ID":104,"Name":"internal.metrics.executorDeserializeCpuTime","Update":301869581,"Value":301869581,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorDeserializeTime","Update":361,"Value":361,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":361,"Executor Deserialize CPU Time":301869581,"Executor Run Time":503,"Executor CPU Time":466139164,"Peak Execution Memory":524288,"Result Size":2630,"JVM GC Time":17,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":9599308,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":3},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020221656,"Completion Time":1596020222661,"Accumulables":[{"ID":104,"Name":"internal.metrics.executorDeserializeCpuTime","Value":301869581,"Internal":true,"Count Failed Values":true},{"ID":122,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"time in aggregation build","Value":"190","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"duration","Value":"296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":80,"Name":"number of output rows","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":125,"Name":"internal.metrics.input.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":107,"Name":"internal.metrics.resultSize","Value":2630,"Internal":true,"Count Failed Values":true},{"ID":74,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":11,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":106,"Name":"internal.metrics.executorCpuTime","Value":466139164,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":121,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorDeserializeTime","Value":361,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"duration","Value":"336","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":19,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":123,"Name":"internal.metrics.shuffle.write.writeTime","Value":9599308,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.executorRunTime","Value":503,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"time in aggregation build","Value":"200","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":108,"Name":"internal.metrics.jvmGCTime","Value":17,"Internal":true,"Count Failed Values":true},{"ID":21,"Name":"shuffle write time","Value":"9599308","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":75,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020222688,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1596020222709,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":0,"Attempt":0,"Launch Time":1596020222713,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":0,"Attempt":0,"Launch Time":1596020222713,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222954,"Failed":false,"Killed":false,"Accumulables":[{"ID":44,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":46,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":55,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Update":"50","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":58,"Name":"estimated size of state only on current version","Update":"64","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Update":"208","Value":"208","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":134,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Update":5354,"Value":5354,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Update":93367533,"Value":93367533,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.executorRunTime","Update":203,"Value":203,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Update":10308753,"Value":10308753,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Update":23,"Value":23,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":23,"Executor Deserialize CPU Time":10308753,"Executor Run Time":203,"Executor CPU Time":93367533,"Peak Execution Memory":524288,"Result Size":5354,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1596020222709,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222965,"Failed":false,"Killed":false,"Accumulables":[{"ID":44,"Name":"duration","Update":"33","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":45,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":46,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Update":"28","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":53,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":55,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Update":"31","Value":"81","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":58,"Name":"estimated size of state only on current version","Update":"424","Value":"488","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":50,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Update":"568","Value":"776","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":52,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Update":"28","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":62,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":13,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":17,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":16,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":18,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Update":5574,"Value":10928,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Update":91355172,"Value":184722705,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.executorRunTime","Update":205,"Value":408,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Update":21029530,"Value":31338283,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Update":34,"Value":57,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":34,"Executor Deserialize CPU Time":21029530,"Executor Run Time":205,"Executor CPU Time":91355172,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020222688,"Completion Time":1596020222967,"Accumulables":[{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Value":57,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Value":184722705,"Internal":true,"Count Failed Values":true},{"ID":50,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":53,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":62,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":17,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":134,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"duration","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Value":"81","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":46,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":55,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":58,"Name":"estimated size of state only on current version","Value":"488","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":13,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":130,"Name":"internal.metrics.executorRunTime","Value":408,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":52,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Value":31338283,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Value":10928,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":45,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Value":"776","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1596020222973,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":2,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#46, count#47]\nArguments: [value#46, count#47]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#46, count#47]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":153,"metricType":"sum"}]},"time":1596020223028} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":2,"time":1596020223062} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":1,"time":1596020223069} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":0,"time":1596020223069} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:56:56.015Z","batchId":0,"batchDuration":7110,"durationMs":{"triggerExecution":7109,"queryPlanning":439,"getBatch":21,"latestOffset":3524,"addBatch":3011,"walCommit":35},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":776,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":488,"loadedMapCacheHitCount":0,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":null,"endOffset":"{\"test5\":{\"0\":48279}}","numInputRows":3,"inputRowsPerSecond":"NaN","processedRowsPerSecond":0.42194092827004215}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":3,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48279}}, {\"test5\":{\"0\":48642}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#373]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 1fb6b6c6-ced8-4f85-80af-1f3f4c424457, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 1fb6b6c6-ced8-4f85-80af-1f3f4c424457, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#297]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":237,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":236,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":233,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":234,"metricType":"timing"},{"name":"peak memory","accumulatorId":232,"metricType":"size"},{"name":"number of output rows","accumulatorId":231,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":235,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":228,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":229,"metricType":"timing"},{"name":"peak memory","accumulatorId":227,"metricType":"size"},{"name":"number of output rows","accumulatorId":226,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":230,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":225,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":177,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":178,"metricType":"nsTiming"},{"name":"records read","accumulatorId":175,"metricType":"sum"},{"name":"local bytes read","accumulatorId":173,"metricType":"size"},{"name":"fetch wait time","accumulatorId":174,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":171,"metricType":"size"},{"name":"local blocks read","accumulatorId":170,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":169,"metricType":"sum"},{"name":"data size","accumulatorId":168,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":172,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":176,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":224,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":221,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":222,"metricType":"timing"},{"name":"peak memory","accumulatorId":220,"metricType":"size"},{"name":"number of output rows","accumulatorId":219,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":223,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":218,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":208,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":209,"metricType":"sum"},{"name":"memory used by state","accumulatorId":214,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":216,"metricType":"sum"},{"name":"number of output rows","accumulatorId":207,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":215,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":217,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":213,"metricType":"timing"},{"name":"time to remove","accumulatorId":212,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":210,"metricType":"sum"},{"name":"time to update","accumulatorId":211,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":204,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":205,"metricType":"timing"},{"name":"peak memory","accumulatorId":203,"metricType":"size"},{"name":"number of output rows","accumulatorId":202,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":206,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":201,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223333} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":4,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48279}}, {\"test5\":{\"0\":48642}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#449]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 7992c0a8-0641-440d-aaf7-ad453fe25c0a, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 7992c0a8-0641-440d-aaf7-ad453fe25c0a, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#297]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":237,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":236,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":233,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":234,"metricType":"timing"},{"name":"peak memory","accumulatorId":232,"metricType":"size"},{"name":"number of output rows","accumulatorId":231,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":235,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":228,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":229,"metricType":"timing"},{"name":"peak memory","accumulatorId":227,"metricType":"size"},{"name":"number of output rows","accumulatorId":226,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":230,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":225,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":177,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":178,"metricType":"nsTiming"},{"name":"records read","accumulatorId":175,"metricType":"sum"},{"name":"local bytes read","accumulatorId":173,"metricType":"size"},{"name":"fetch wait time","accumulatorId":174,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":171,"metricType":"size"},{"name":"local blocks read","accumulatorId":170,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":169,"metricType":"sum"},{"name":"data size","accumulatorId":168,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":172,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":176,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":224,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":221,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":222,"metricType":"timing"},{"name":"peak memory","accumulatorId":220,"metricType":"size"},{"name":"number of output rows","accumulatorId":219,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":223,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":218,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":208,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":209,"metricType":"sum"},{"name":"memory used by state","accumulatorId":214,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":216,"metricType":"sum"},{"name":"number of output rows","accumulatorId":207,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":215,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":217,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":213,"metricType":"timing"},{"name":"time to remove","accumulatorId":212,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":210,"metricType":"sum"},{"name":"time to update","accumulatorId":211,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":204,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":205,"metricType":"timing"},{"name":"peak memory","accumulatorId":203,"metricType":"size"},{"name":"number of output rows","accumulatorId":202,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":206,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":201,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223382} +{"Event":"SparkListenerJobStart","Job ID":1,"Submission Time":1596020223482,"Stage Infos":[{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[2,3],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223485,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":0,"Attempt":0,"Launch Time":1596020223493,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":0,"Attempt":0,"Launch Time":1596020223493,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223601,"Failed":false,"Killed":false,"Accumulables":[{"ID":178,"Name":"shuffle write time","Update":"837580","Value":"837580","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":177,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":176,"Name":"shuffle bytes written","Update":"169","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":168,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":225,"Name":"duration","Update":"84","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":226,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":227,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":229,"Name":"time in aggregation build","Update":"74","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":231,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":232,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":234,"Name":"time in aggregation build","Update":"68","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":236,"Name":"duration","Update":"84","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":237,"Name":"number of output rows","Update":"363","Value":"363","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":282,"Name":"internal.metrics.input.recordsRead","Update":363,"Value":363,"Internal":true,"Count Failed Values":true},{"ID":280,"Name":"internal.metrics.shuffle.write.writeTime","Update":837580,"Value":837580,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":169,"Value":169,"Internal":true,"Count Failed Values":true},{"ID":269,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":264,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":263,"Name":"internal.metrics.executorCpuTime","Update":95945587,"Value":95945587,"Internal":true,"Count Failed Values":true},{"ID":262,"Name":"internal.metrics.executorRunTime","Update":96,"Value":96,"Internal":true,"Count Failed Values":true},{"ID":261,"Name":"internal.metrics.executorDeserializeCpuTime","Update":7437557,"Value":7437557,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.executorDeserializeTime","Update":7,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":7,"Executor Deserialize CPU Time":7437557,"Executor Run Time":96,"Executor CPU Time":95945587,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":169,"Shuffle Write Time":837580,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":363},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223485,"Completion Time":1596020223603,"Accumulables":[{"ID":227,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":236,"Name":"duration","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":176,"Name":"shuffle bytes written","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":262,"Name":"internal.metrics.executorRunTime","Value":96,"Internal":true,"Count Failed Values":true},{"ID":226,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":280,"Name":"internal.metrics.shuffle.write.writeTime","Value":837580,"Internal":true,"Count Failed Values":true},{"ID":229,"Name":"time in aggregation build","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":232,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":178,"Name":"shuffle write time","Value":"837580","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":225,"Name":"duration","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":261,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7437557,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":234,"Name":"time in aggregation build","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":264,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":282,"Name":"internal.metrics.input.recordsRead","Value":363,"Internal":true,"Count Failed Values":true},{"ID":237,"Name":"number of output rows","Value":"363","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":177,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":168,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":231,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":263,"Name":"internal.metrics.executorCpuTime","Value":95945587,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":269,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":169,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223613,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1596020223625,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":1,"Attempt":0,"Launch Time":1596020223626,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1596020223625,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223717,"Failed":false,"Killed":false,"Accumulables":[{"ID":201,"Name":"duration","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":212,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Update":"38","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":216,"Name":"count of cache hit on states cache in provider","Update":"2","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Update":"376","Value":"376","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":218,"Name":"duration","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Update":22954307,"Value":22954307,"Internal":true,"Count Failed Values":true},{"ID":287,"Name":"internal.metrics.executorRunTime","Update":77,"Value":77,"Internal":true,"Count Failed Values":true},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6627382,"Value":6627382,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6627382,"Executor Run Time":77,"Executor CPU Time":22954307,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":1,"Attempt":0,"Launch Time":1596020223626,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223720,"Failed":false,"Killed":false,"Accumulables":[{"ID":201,"Name":"duration","Update":"4","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":206,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":202,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Update":"18","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":210,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":212,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Update":"30","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":207,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":216,"Name":"count of cache hit on states cache in provider","Update":"2","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Update":"840","Value":"1216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":209,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":218,"Name":"duration","Update":"19","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":219,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":224,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":170,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":174,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":173,"Name":"local bytes read","Update":"169","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":175,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":169,"Value":169,"Internal":true,"Count Failed Values":true},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Update":25907369,"Value":48861676,"Internal":true,"Count Failed Values":true},{"ID":287,"Name":"internal.metrics.executorRunTime","Update":82,"Value":159,"Internal":true,"Count Failed Values":true},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Update":7573630,"Value":14201012,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Update":7,"Value":13,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":7,"Executor Deserialize CPU Time":7573630,"Executor Run Time":82,"Executor CPU Time":25907369,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":169,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223613,"Completion Time":1596020223724,"Accumulables":[{"ID":218,"Name":"duration","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":209,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":173,"Name":"local bytes read","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Value":14201012,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":175,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":202,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Value":"1216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":169,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Value":13,"Internal":true,"Count Failed Values":true},{"ID":207,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Value":48861676,"Internal":true,"Count Failed Values":true},{"ID":216,"Name":"count of cache hit on states cache in provider","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":174,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":210,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":219,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":201,"Name":"duration","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":212,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":170,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":287,"Name":"internal.metrics.executorRunTime","Value":159,"Internal":true,"Count Failed Values":true},{"ID":206,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":224,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":1,"Completion Time":1596020223725,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":5,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#60, count#61]\nArguments: [value#60, count#61]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#60, count#61]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":310,"metricType":"sum"}]},"time":1596020223752} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":5,"time":1596020223761} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":4,"time":1596020223762} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":3,"time":1596020223762} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:03.168Z","batchId":1,"batchDuration":622,"durationMs":{"triggerExecution":622,"queryPlanning":47,"getBatch":0,"latestOffset":7,"addBatch":478,"walCommit":59},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1216,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":4,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48279}}","endOffset":"{\"test5\":{\"0\":48642}}","numInputRows":363,"inputRowsPerSecond":50.74793792814204,"processedRowsPerSecond":583.6012861736334}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":6,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48642}}, {\"test5\":{\"0\":48705}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#604]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 39c861a0-0e30-4ca2-b363-495aff0f3f93, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 39c861a0-0e30-4ca2-b363-495aff0f3f93, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#528]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":394,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":393,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":390,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":391,"metricType":"timing"},{"name":"peak memory","accumulatorId":389,"metricType":"size"},{"name":"number of output rows","accumulatorId":388,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":392,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":385,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":386,"metricType":"timing"},{"name":"peak memory","accumulatorId":384,"metricType":"size"},{"name":"number of output rows","accumulatorId":383,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":387,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":382,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":334,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":335,"metricType":"nsTiming"},{"name":"records read","accumulatorId":332,"metricType":"sum"},{"name":"local bytes read","accumulatorId":330,"metricType":"size"},{"name":"fetch wait time","accumulatorId":331,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":328,"metricType":"size"},{"name":"local blocks read","accumulatorId":327,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":326,"metricType":"sum"},{"name":"data size","accumulatorId":325,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":329,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":333,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":381,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":378,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":379,"metricType":"timing"},{"name":"peak memory","accumulatorId":377,"metricType":"size"},{"name":"number of output rows","accumulatorId":376,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":380,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":375,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":365,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":366,"metricType":"sum"},{"name":"memory used by state","accumulatorId":371,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":373,"metricType":"sum"},{"name":"number of output rows","accumulatorId":364,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":372,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":374,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":370,"metricType":"timing"},{"name":"time to remove","accumulatorId":369,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":367,"metricType":"sum"},{"name":"time to update","accumulatorId":368,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":361,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":362,"metricType":"timing"},{"name":"peak memory","accumulatorId":360,"metricType":"size"},{"name":"number of output rows","accumulatorId":359,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":363,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":358,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223909} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":7,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48642}}, {\"test5\":{\"0\":48705}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#680]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = c2fd3b95-1ba6-4d3e-8b9c-0256dfd90973, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = c2fd3b95-1ba6-4d3e-8b9c-0256dfd90973, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#528]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":394,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":393,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":390,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":391,"metricType":"timing"},{"name":"peak memory","accumulatorId":389,"metricType":"size"},{"name":"number of output rows","accumulatorId":388,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":392,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":385,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":386,"metricType":"timing"},{"name":"peak memory","accumulatorId":384,"metricType":"size"},{"name":"number of output rows","accumulatorId":383,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":387,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":382,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":334,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":335,"metricType":"nsTiming"},{"name":"records read","accumulatorId":332,"metricType":"sum"},{"name":"local bytes read","accumulatorId":330,"metricType":"size"},{"name":"fetch wait time","accumulatorId":331,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":328,"metricType":"size"},{"name":"local blocks read","accumulatorId":327,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":326,"metricType":"sum"},{"name":"data size","accumulatorId":325,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":329,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":333,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":381,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":378,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":379,"metricType":"timing"},{"name":"peak memory","accumulatorId":377,"metricType":"size"},{"name":"number of output rows","accumulatorId":376,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":380,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":375,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":365,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":366,"metricType":"sum"},{"name":"memory used by state","accumulatorId":371,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":373,"metricType":"sum"},{"name":"number of output rows","accumulatorId":364,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":372,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":374,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":370,"metricType":"timing"},{"name":"time to remove","accumulatorId":369,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":367,"metricType":"sum"},{"name":"time to update","accumulatorId":368,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":361,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":362,"metricType":"timing"},{"name":"peak memory","accumulatorId":360,"metricType":"size"},{"name":"number of output rows","accumulatorId":359,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":363,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":358,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224006} +{"Event":"SparkListenerJobStart","Job ID":2,"Submission Time":1596020224100,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[5,4],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224103,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":0,"Attempt":0,"Launch Time":1596020224113,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":0,"Attempt":0,"Launch Time":1596020224113,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224174,"Failed":false,"Killed":false,"Accumulables":[{"ID":335,"Name":"shuffle write time","Update":"686296","Value":"686296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":333,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":325,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":382,"Name":"duration","Update":"39","Value":"39","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":383,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":384,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":386,"Name":"time in aggregation build","Update":"32","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":388,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":389,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":391,"Name":"time in aggregation build","Update":"26","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":393,"Name":"duration","Update":"40","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":394,"Name":"number of output rows","Update":"63","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":439,"Name":"internal.metrics.input.recordsRead","Update":63,"Value":63,"Internal":true,"Count Failed Values":true},{"ID":437,"Name":"internal.metrics.shuffle.write.writeTime","Update":686296,"Value":686296,"Internal":true,"Count Failed Values":true},{"ID":436,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":435,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":426,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":421,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":420,"Name":"internal.metrics.executorCpuTime","Update":33390843,"Value":33390843,"Internal":true,"Count Failed Values":true},{"ID":419,"Name":"internal.metrics.executorRunTime","Update":49,"Value":49,"Internal":true,"Count Failed Values":true},{"ID":418,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4867521,"Value":4867521,"Internal":true,"Count Failed Values":true},{"ID":417,"Name":"internal.metrics.executorDeserializeTime","Update":8,"Value":8,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":8,"Executor Deserialize CPU Time":4867521,"Executor Run Time":49,"Executor CPU Time":33390843,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":686296,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":63},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224103,"Completion Time":1596020224175,"Accumulables":[{"ID":436,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":391,"Name":"time in aggregation build","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":382,"Name":"duration","Value":"39","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":418,"Name":"internal.metrics.executorDeserializeCpuTime","Value":4867521,"Internal":true,"Count Failed Values":true},{"ID":421,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":394,"Name":"number of output rows","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":439,"Name":"internal.metrics.input.recordsRead","Value":63,"Internal":true,"Count Failed Values":true},{"ID":388,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":325,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":420,"Name":"internal.metrics.executorCpuTime","Value":33390843,"Internal":true,"Count Failed Values":true},{"ID":426,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":417,"Name":"internal.metrics.executorDeserializeTime","Value":8,"Internal":true,"Count Failed Values":true},{"ID":435,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":384,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":393,"Name":"duration","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":333,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":383,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":437,"Name":"internal.metrics.shuffle.write.writeTime","Value":686296,"Internal":true,"Count Failed Values":true},{"ID":419,"Name":"internal.metrics.executorRunTime","Value":49,"Internal":true,"Count Failed Values":true},{"ID":386,"Name":"time in aggregation build","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":335,"Name":"shuffle write time","Value":"686296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":389,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224179,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224256,"Failed":false,"Killed":false,"Accumulables":[{"ID":358,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":360,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":368,"Name":"time to update","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":369,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Update":"32","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":372,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":373,"Name":"count of cache hit on states cache in provider","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":375,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":379,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Update":17230622,"Value":17230622,"Internal":true,"Count Failed Values":true},{"ID":444,"Name":"internal.metrics.executorRunTime","Update":56,"Value":56,"Internal":true,"Count Failed Values":true},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Update":5948051,"Value":5948051,"Internal":true,"Count Failed Values":true},{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":5948051,"Executor Run Time":56,"Executor CPU Time":17230622,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224257,"Failed":false,"Killed":false,"Accumulables":[{"ID":358,"Name":"duration","Update":"4","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":363,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":359,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":360,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":368,"Name":"time to update","Update":"21","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":367,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":369,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Update":"18","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":372,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":364,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":373,"Name":"count of cache hit on states cache in provider","Update":"4","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":366,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":375,"Name":"duration","Update":"22","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":376,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":379,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":381,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":327,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":332,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Update":23808555,"Value":41039177,"Internal":true,"Count Failed Values":true},{"ID":444,"Name":"internal.metrics.executorRunTime","Update":56,"Value":112,"Internal":true,"Count Failed Values":true},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6247106,"Value":12195157,"Internal":true,"Count Failed Values":true},{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":12,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6247106,"Executor Run Time":56,"Executor CPU Time":23808555,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224179,"Completion Time":1596020224259,"Accumulables":[{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Value":12,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Value":41039177,"Internal":true,"Count Failed Values":true},{"ID":364,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":373,"Name":"count of cache hit on states cache in provider","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":367,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":376,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":358,"Name":"duration","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":379,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":369,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":360,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":381,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":372,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":363,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":327,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":444,"Name":"internal.metrics.executorRunTime","Value":112,"Internal":true,"Count Failed Values":true},{"ID":375,"Name":"duration","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":366,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Value":12195157,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":332,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":359,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":368,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":2,"Completion Time":1596020224259,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":8,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#74, count#75]\nArguments: [value#74, count#75]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#74, count#75]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":467,"metricType":"sum"}]},"time":1596020224278} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":8,"time":1596020224287} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":7,"time":1596020224287} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":6,"time":1596020224288} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:03.793Z","batchId":2,"batchDuration":522,"durationMs":{"triggerExecution":522,"queryPlanning":41,"getBatch":1,"latestOffset":3,"addBatch":421,"walCommit":27},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":8,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48642}}","endOffset":"{\"test5\":{\"0\":48705}}","numInputRows":63,"inputRowsPerSecond":100.8,"processedRowsPerSecond":120.6896551724138}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":9,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48705}}, {\"test5\":{\"0\":48757}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#835]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 8bb5d8a6-42f8-4141-8f25-e1b98f81aac4, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 8bb5d8a6-42f8-4141-8f25-e1b98f81aac4, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#759]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":551,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":550,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":547,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":548,"metricType":"timing"},{"name":"peak memory","accumulatorId":546,"metricType":"size"},{"name":"number of output rows","accumulatorId":545,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":549,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":542,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":543,"metricType":"timing"},{"name":"peak memory","accumulatorId":541,"metricType":"size"},{"name":"number of output rows","accumulatorId":540,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":544,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":539,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":491,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":492,"metricType":"nsTiming"},{"name":"records read","accumulatorId":489,"metricType":"sum"},{"name":"local bytes read","accumulatorId":487,"metricType":"size"},{"name":"fetch wait time","accumulatorId":488,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":485,"metricType":"size"},{"name":"local blocks read","accumulatorId":484,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":483,"metricType":"sum"},{"name":"data size","accumulatorId":482,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":486,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":490,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":538,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":535,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":536,"metricType":"timing"},{"name":"peak memory","accumulatorId":534,"metricType":"size"},{"name":"number of output rows","accumulatorId":533,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":537,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":532,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":522,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":523,"metricType":"sum"},{"name":"memory used by state","accumulatorId":528,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":530,"metricType":"sum"},{"name":"number of output rows","accumulatorId":521,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":529,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":531,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":527,"metricType":"timing"},{"name":"time to remove","accumulatorId":526,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":524,"metricType":"sum"},{"name":"time to update","accumulatorId":525,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":518,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":519,"metricType":"timing"},{"name":"peak memory","accumulatorId":517,"metricType":"size"},{"name":"number of output rows","accumulatorId":516,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":520,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":515,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224419} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":10,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48705}}, {\"test5\":{\"0\":48757}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#911]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 29402d2a-a5da-4bb1-8d1a-c6d1c2d998d5, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 29402d2a-a5da-4bb1-8d1a-c6d1c2d998d5, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#759]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":551,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":550,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":547,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":548,"metricType":"timing"},{"name":"peak memory","accumulatorId":546,"metricType":"size"},{"name":"number of output rows","accumulatorId":545,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":549,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":542,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":543,"metricType":"timing"},{"name":"peak memory","accumulatorId":541,"metricType":"size"},{"name":"number of output rows","accumulatorId":540,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":544,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":539,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":491,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":492,"metricType":"nsTiming"},{"name":"records read","accumulatorId":489,"metricType":"sum"},{"name":"local bytes read","accumulatorId":487,"metricType":"size"},{"name":"fetch wait time","accumulatorId":488,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":485,"metricType":"size"},{"name":"local blocks read","accumulatorId":484,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":483,"metricType":"sum"},{"name":"data size","accumulatorId":482,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":486,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":490,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":538,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":535,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":536,"metricType":"timing"},{"name":"peak memory","accumulatorId":534,"metricType":"size"},{"name":"number of output rows","accumulatorId":533,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":537,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":532,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":522,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":523,"metricType":"sum"},{"name":"memory used by state","accumulatorId":528,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":530,"metricType":"sum"},{"name":"number of output rows","accumulatorId":521,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":529,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":531,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":527,"metricType":"timing"},{"name":"time to remove","accumulatorId":526,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":524,"metricType":"sum"},{"name":"time to update","accumulatorId":525,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":518,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":519,"metricType":"timing"},{"name":"peak memory","accumulatorId":517,"metricType":"size"},{"name":"number of output rows","accumulatorId":516,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":520,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":515,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224452} +{"Event":"SparkListenerJobStart","Job ID":3,"Submission Time":1596020224533,"Stage Infos":[{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[6,7],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224535,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1596020224541,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1596020224541,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224581,"Failed":false,"Killed":false,"Accumulables":[{"ID":492,"Name":"shuffle write time","Update":"643278","Value":"643278","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":491,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":490,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":482,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":539,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":540,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":541,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":543,"Name":"time in aggregation build","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":545,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":546,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":548,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":550,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":551,"Name":"number of output rows","Update":"52","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":596,"Name":"internal.metrics.input.recordsRead","Update":52,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":594,"Name":"internal.metrics.shuffle.write.writeTime","Update":643278,"Value":643278,"Internal":true,"Count Failed Values":true},{"ID":593,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":592,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":583,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":578,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":577,"Name":"internal.metrics.executorCpuTime","Update":29099071,"Value":29099071,"Internal":true,"Count Failed Values":true},{"ID":576,"Name":"internal.metrics.executorRunTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true},{"ID":575,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3091128,"Value":3091128,"Internal":true,"Count Failed Values":true},{"ID":574,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3091128,"Executor Run Time":29,"Executor CPU Time":29099071,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":643278,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":52},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224535,"Completion Time":1596020224582,"Accumulables":[{"ID":550,"Name":"duration","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":541,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":577,"Name":"internal.metrics.executorCpuTime","Value":29099071,"Internal":true,"Count Failed Values":true},{"ID":490,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":576,"Name":"internal.metrics.executorRunTime","Value":29,"Internal":true,"Count Failed Values":true},{"ID":540,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":594,"Name":"internal.metrics.shuffle.write.writeTime","Value":643278,"Internal":true,"Count Failed Values":true},{"ID":543,"Name":"time in aggregation build","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":492,"Name":"shuffle write time","Value":"643278","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":546,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":539,"Name":"duration","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":575,"Name":"internal.metrics.executorDeserializeCpuTime","Value":3091128,"Internal":true,"Count Failed Values":true},{"ID":593,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":548,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":578,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":596,"Name":"internal.metrics.input.recordsRead","Value":52,"Internal":true,"Count Failed Values":true},{"ID":551,"Name":"number of output rows","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":482,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":491,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":545,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":592,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":574,"Name":"internal.metrics.executorDeserializeTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":583,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224588,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":0,"Attempt":0,"Launch Time":1596020224596,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":1,"Attempt":0,"Launch Time":1596020224597,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":0,"Attempt":0,"Launch Time":1596020224596,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224670,"Failed":false,"Killed":false,"Accumulables":[{"ID":515,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Update":"5","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":526,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Update":"27","Value":"27","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Update":"5","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":534,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":536,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Update":19967906,"Value":19967906,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Update":62,"Value":62,"Internal":true,"Count Failed Values":true},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4899567,"Value":4899567,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4899567,"Executor Run Time":62,"Executor CPU Time":19967906,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":1,"Attempt":0,"Launch Time":1596020224597,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224687,"Failed":false,"Killed":false,"Accumulables":[{"ID":515,"Name":"duration","Update":"4","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":520,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":516,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Update":"17","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":524,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":526,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Update":"26","Value":"53","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":521,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Update":"6","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":523,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Update":"17","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":533,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":534,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":536,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":538,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":484,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":488,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":487,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":489,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Update":22402538,"Value":42370444,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Update":79,"Value":141,"Internal":true,"Count Failed Values":true},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4671511,"Value":9571078,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":8,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4671511,"Executor Run Time":79,"Executor CPU Time":22402538,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224588,"Completion Time":1596020224688,"Accumulables":[{"ID":523,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":487,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Value":9571078,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":516,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":534,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":489,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Value":8,"Internal":true,"Count Failed Values":true},{"ID":521,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Value":42370444,"Internal":true,"Count Failed Values":true},{"ID":488,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":515,"Name":"duration","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":524,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":533,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":536,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Value":"53","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":526,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":520,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Value":141,"Internal":true,"Count Failed Values":true},{"ID":484,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":538,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":3,"Completion Time":1596020224689,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":11,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#88, count#89]\nArguments: [value#88, count#89]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#88, count#89]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":624,"metricType":"sum"}]},"time":1596020224709} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":11,"time":1596020224713} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":10,"time":1596020224714} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":9,"time":1596020224714} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:04.317Z","batchId":3,"batchDuration":415,"durationMs":{"triggerExecution":415,"queryPlanning":38,"getBatch":1,"latestOffset":3,"addBatch":332,"walCommit":21},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":12,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48705}}","endOffset":"{\"test5\":{\"0\":48757}}","numInputRows":52,"inputRowsPerSecond":99.23664122137404,"processedRowsPerSecond":125.30120481927712}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":12,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48757}}, {\"test5\":{\"0\":48799}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1066]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 42efe357-12ef-4061-9b83-20bf4c29a257, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 42efe357-12ef-4061-9b83-20bf4c29a257, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#990]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":708,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":707,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":704,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":705,"metricType":"timing"},{"name":"peak memory","accumulatorId":703,"metricType":"size"},{"name":"number of output rows","accumulatorId":702,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":706,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":699,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":700,"metricType":"timing"},{"name":"peak memory","accumulatorId":698,"metricType":"size"},{"name":"number of output rows","accumulatorId":697,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":701,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":696,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":648,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":649,"metricType":"nsTiming"},{"name":"records read","accumulatorId":646,"metricType":"sum"},{"name":"local bytes read","accumulatorId":644,"metricType":"size"},{"name":"fetch wait time","accumulatorId":645,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":642,"metricType":"size"},{"name":"local blocks read","accumulatorId":641,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":640,"metricType":"sum"},{"name":"data size","accumulatorId":639,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":643,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":647,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":695,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":692,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":693,"metricType":"timing"},{"name":"peak memory","accumulatorId":691,"metricType":"size"},{"name":"number of output rows","accumulatorId":690,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":694,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":689,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":679,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":680,"metricType":"sum"},{"name":"memory used by state","accumulatorId":685,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":687,"metricType":"sum"},{"name":"number of output rows","accumulatorId":678,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":686,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":688,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":684,"metricType":"timing"},{"name":"time to remove","accumulatorId":683,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":681,"metricType":"sum"},{"name":"time to update","accumulatorId":682,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":675,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":676,"metricType":"timing"},{"name":"peak memory","accumulatorId":674,"metricType":"size"},{"name":"number of output rows","accumulatorId":673,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":677,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":672,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224817} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":13,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48757}}, {\"test5\":{\"0\":48799}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1142]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 6fa28bd2-2924-4e01-8bbe-128888d2669b, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 6fa28bd2-2924-4e01-8bbe-128888d2669b, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#990]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":708,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":707,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":704,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":705,"metricType":"timing"},{"name":"peak memory","accumulatorId":703,"metricType":"size"},{"name":"number of output rows","accumulatorId":702,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":706,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":699,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":700,"metricType":"timing"},{"name":"peak memory","accumulatorId":698,"metricType":"size"},{"name":"number of output rows","accumulatorId":697,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":701,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":696,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":648,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":649,"metricType":"nsTiming"},{"name":"records read","accumulatorId":646,"metricType":"sum"},{"name":"local bytes read","accumulatorId":644,"metricType":"size"},{"name":"fetch wait time","accumulatorId":645,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":642,"metricType":"size"},{"name":"local blocks read","accumulatorId":641,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":640,"metricType":"sum"},{"name":"data size","accumulatorId":639,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":643,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":647,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":695,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":692,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":693,"metricType":"timing"},{"name":"peak memory","accumulatorId":691,"metricType":"size"},{"name":"number of output rows","accumulatorId":690,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":694,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":689,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":679,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":680,"metricType":"sum"},{"name":"memory used by state","accumulatorId":685,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":687,"metricType":"sum"},{"name":"number of output rows","accumulatorId":678,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":686,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":688,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":684,"metricType":"timing"},{"name":"time to remove","accumulatorId":683,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":681,"metricType":"sum"},{"name":"time to update","accumulatorId":682,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":675,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":676,"metricType":"timing"},{"name":"peak memory","accumulatorId":674,"metricType":"size"},{"name":"number of output rows","accumulatorId":673,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":677,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":672,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224849} +{"Event":"SparkListenerJobStart","Job ID":4,"Submission Time":1596020224928,"Stage Infos":[{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[9,8],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224929,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":0,"Attempt":0,"Launch Time":1596020224941,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":0,"Attempt":0,"Launch Time":1596020224941,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224979,"Failed":false,"Killed":false,"Accumulables":[{"ID":649,"Name":"shuffle write time","Update":"572754","Value":"572754","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":648,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":647,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":639,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":696,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":697,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":698,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":700,"Name":"time in aggregation build","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":702,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":703,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":705,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":707,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":708,"Name":"number of output rows","Update":"42","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":753,"Name":"internal.metrics.input.recordsRead","Update":42,"Value":42,"Internal":true,"Count Failed Values":true},{"ID":751,"Name":"internal.metrics.shuffle.write.writeTime","Update":572754,"Value":572754,"Internal":true,"Count Failed Values":true},{"ID":750,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":749,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":740,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":735,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":734,"Name":"internal.metrics.executorCpuTime","Update":27800373,"Value":27800373,"Internal":true,"Count Failed Values":true},{"ID":733,"Name":"internal.metrics.executorRunTime","Update":28,"Value":28,"Internal":true,"Count Failed Values":true},{"ID":732,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4768103,"Value":4768103,"Internal":true,"Count Failed Values":true},{"ID":731,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4768103,"Executor Run Time":28,"Executor CPU Time":27800373,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":572754,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":42},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224929,"Completion Time":1596020224979,"Accumulables":[{"ID":732,"Name":"internal.metrics.executorDeserializeCpuTime","Value":4768103,"Internal":true,"Count Failed Values":true},{"ID":696,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":750,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":705,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":735,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":708,"Name":"number of output rows","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":753,"Name":"internal.metrics.input.recordsRead","Value":42,"Internal":true,"Count Failed Values":true},{"ID":648,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":639,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":702,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":740,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":731,"Name":"internal.metrics.executorDeserializeTime","Value":4,"Internal":true,"Count Failed Values":true},{"ID":749,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":698,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":734,"Name":"internal.metrics.executorCpuTime","Value":27800373,"Internal":true,"Count Failed Values":true},{"ID":707,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":647,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":733,"Name":"internal.metrics.executorRunTime","Value":28,"Internal":true,"Count Failed Values":true},{"ID":697,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":751,"Name":"internal.metrics.shuffle.write.writeTime","Value":572754,"Internal":true,"Count Failed Values":true},{"ID":700,"Name":"time in aggregation build","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":649,"Name":"shuffle write time","Value":"572754","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":703,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224987,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":1,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":1,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225056,"Failed":false,"Killed":false,"Accumulables":[{"ID":672,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":677,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":673,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":681,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":683,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":686,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":678,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":687,"Name":"count of cache hit on states cache in provider","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":680,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":690,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":695,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":641,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":645,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":644,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":646,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":759,"Name":"internal.metrics.executorCpuTime","Update":19548688,"Value":19548688,"Internal":true,"Count Failed Values":true},{"ID":758,"Name":"internal.metrics.executorRunTime","Update":52,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Update":5622533,"Value":5622533,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":5,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":5622533,"Executor Run Time":52,"Executor CPU Time":19548688,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225058,"Failed":false,"Killed":false,"Accumulables":[{"ID":672,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Update":"4","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":683,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Update":"35","Value":"46","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":686,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":687,"Name":"count of cache hit on states cache in provider","Update":"8","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Update":"4","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":759,"Name":"internal.metrics.executorCpuTime","Update":16813539,"Value":36362227,"Internal":true,"Count Failed Values":true},{"ID":758,"Name":"internal.metrics.executorRunTime","Update":55,"Value":107,"Internal":true,"Count Failed Values":true},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4322992,"Value":9945525,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":9,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4322992,"Executor Run Time":55,"Executor CPU Time":16813539,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224987,"Completion Time":1596020225059,"Accumulables":[{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Value":9,"Internal":true,"Count Failed Values":true},{"ID":678,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":759,"Name":"internal.metrics.executorCpuTime","Value":36362227,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":687,"Name":"count of cache hit on states cache in provider","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":681,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":690,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":672,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":645,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Value":"46","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":683,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":686,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":695,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":677,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":641,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":758,"Name":"internal.metrics.executorRunTime","Value":107,"Internal":true,"Count Failed Values":true},{"ID":644,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":680,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Value":9945525,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":646,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":673,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":4,"Completion Time":1596020225059,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":14,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#102, count#103]\nArguments: [value#102, count#103]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#102, count#103]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":781,"metricType":"sum"}]},"time":1596020225079} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":14,"time":1596020225087} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":13,"time":1596020225087} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":12,"time":1596020225087} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:04.734Z","batchId":4,"batchDuration":387,"durationMs":{"triggerExecution":387,"queryPlanning":30,"getBatch":1,"latestOffset":3,"addBatch":306,"walCommit":12},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":16,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48757}}","endOffset":"{\"test5\":{\"0\":48799}}","numInputRows":42,"inputRowsPerSecond":100.71942446043165,"processedRowsPerSecond":108.52713178294573}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":15,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48799}}, {\"test5\":{\"0\":48837}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1297]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 9579cc6c-8827-43f7-9678-7747602e493e, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 9579cc6c-8827-43f7-9678-7747602e493e, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1221]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":865,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":864,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":861,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":862,"metricType":"timing"},{"name":"peak memory","accumulatorId":860,"metricType":"size"},{"name":"number of output rows","accumulatorId":859,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":863,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":856,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":857,"metricType":"timing"},{"name":"peak memory","accumulatorId":855,"metricType":"size"},{"name":"number of output rows","accumulatorId":854,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":858,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":853,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":805,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":806,"metricType":"nsTiming"},{"name":"records read","accumulatorId":803,"metricType":"sum"},{"name":"local bytes read","accumulatorId":801,"metricType":"size"},{"name":"fetch wait time","accumulatorId":802,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":799,"metricType":"size"},{"name":"local blocks read","accumulatorId":798,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":797,"metricType":"sum"},{"name":"data size","accumulatorId":796,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":800,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":804,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":852,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":849,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":850,"metricType":"timing"},{"name":"peak memory","accumulatorId":848,"metricType":"size"},{"name":"number of output rows","accumulatorId":847,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":851,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":846,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":836,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":837,"metricType":"sum"},{"name":"memory used by state","accumulatorId":842,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":844,"metricType":"sum"},{"name":"number of output rows","accumulatorId":835,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":843,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":845,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":841,"metricType":"timing"},{"name":"time to remove","accumulatorId":840,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":838,"metricType":"sum"},{"name":"time to update","accumulatorId":839,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":832,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":833,"metricType":"timing"},{"name":"peak memory","accumulatorId":831,"metricType":"size"},{"name":"number of output rows","accumulatorId":830,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":834,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":829,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225211} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":16,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48799}}, {\"test5\":{\"0\":48837}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1373]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = b800d96e-7584-4e8d-8df8-c9b901b7f2e2, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = b800d96e-7584-4e8d-8df8-c9b901b7f2e2, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1221]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":865,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":864,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":861,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":862,"metricType":"timing"},{"name":"peak memory","accumulatorId":860,"metricType":"size"},{"name":"number of output rows","accumulatorId":859,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":863,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":856,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":857,"metricType":"timing"},{"name":"peak memory","accumulatorId":855,"metricType":"size"},{"name":"number of output rows","accumulatorId":854,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":858,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":853,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":805,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":806,"metricType":"nsTiming"},{"name":"records read","accumulatorId":803,"metricType":"sum"},{"name":"local bytes read","accumulatorId":801,"metricType":"size"},{"name":"fetch wait time","accumulatorId":802,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":799,"metricType":"size"},{"name":"local blocks read","accumulatorId":798,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":797,"metricType":"sum"},{"name":"data size","accumulatorId":796,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":800,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":804,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":852,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":849,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":850,"metricType":"timing"},{"name":"peak memory","accumulatorId":848,"metricType":"size"},{"name":"number of output rows","accumulatorId":847,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":851,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":846,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":836,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":837,"metricType":"sum"},{"name":"memory used by state","accumulatorId":842,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":844,"metricType":"sum"},{"name":"number of output rows","accumulatorId":835,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":843,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":845,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":841,"metricType":"timing"},{"name":"time to remove","accumulatorId":840,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":838,"metricType":"sum"},{"name":"time to update","accumulatorId":839,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":832,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":833,"metricType":"timing"},{"name":"peak memory","accumulatorId":831,"metricType":"size"},{"name":"number of output rows","accumulatorId":830,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":834,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":829,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225270} +{"Event":"SparkListenerJobStart","Job ID":5,"Submission Time":1596020225342,"Stage Infos":[{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[10,11],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225343,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":10,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":0,"Attempt":0,"Launch Time":1596020225359,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":10,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":0,"Attempt":0,"Launch Time":1596020225359,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225400,"Failed":false,"Killed":false,"Accumulables":[{"ID":806,"Name":"shuffle write time","Update":"530930","Value":"530930","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":805,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":804,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":796,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":853,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":854,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":855,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":857,"Name":"time in aggregation build","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":859,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":860,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":862,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":864,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":865,"Name":"number of output rows","Update":"38","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":910,"Name":"internal.metrics.input.recordsRead","Update":38,"Value":38,"Internal":true,"Count Failed Values":true},{"ID":908,"Name":"internal.metrics.shuffle.write.writeTime","Update":530930,"Value":530930,"Internal":true,"Count Failed Values":true},{"ID":907,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":906,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":897,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":892,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":891,"Name":"internal.metrics.executorCpuTime","Update":22440089,"Value":22440089,"Internal":true,"Count Failed Values":true},{"ID":890,"Name":"internal.metrics.executorRunTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true},{"ID":889,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6808170,"Value":6808170,"Internal":true,"Count Failed Values":true},{"ID":888,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6808170,"Executor Run Time":29,"Executor CPU Time":22440089,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":530930,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":38},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225343,"Completion Time":1596020225401,"Accumulables":[{"ID":855,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":891,"Name":"internal.metrics.executorCpuTime","Value":22440089,"Internal":true,"Count Failed Values":true},{"ID":864,"Name":"duration","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":804,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":908,"Name":"internal.metrics.shuffle.write.writeTime","Value":530930,"Internal":true,"Count Failed Values":true},{"ID":890,"Name":"internal.metrics.executorRunTime","Value":29,"Internal":true,"Count Failed Values":true},{"ID":857,"Name":"time in aggregation build","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":860,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":806,"Name":"shuffle write time","Value":"530930","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":854,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":853,"Name":"duration","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":862,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":889,"Name":"internal.metrics.executorDeserializeCpuTime","Value":6808170,"Internal":true,"Count Failed Values":true},{"ID":907,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":892,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":910,"Name":"internal.metrics.input.recordsRead","Value":38,"Internal":true,"Count Failed Values":true},{"ID":865,"Name":"number of output rows","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":805,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":796,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":859,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":888,"Name":"internal.metrics.executorDeserializeTime","Value":6,"Internal":true,"Count Failed Values":true},{"ID":897,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":906,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225410,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225498,"Failed":false,"Killed":false,"Accumulables":[{"ID":829,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":834,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":830,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":833,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":838,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":840,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Update":"37","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":835,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":844,"Name":"count of cache hit on states cache in provider","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":837,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":846,"Name":"duration","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":847,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":852,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":798,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":802,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":801,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":803,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":916,"Name":"internal.metrics.executorCpuTime","Update":17945299,"Value":17945299,"Internal":true,"Count Failed Values":true},{"ID":915,"Name":"internal.metrics.executorRunTime","Update":68,"Value":68,"Internal":true,"Count Failed Values":true},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3451032,"Value":3451032,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3451032,"Executor Run Time":68,"Executor CPU Time":17945299,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225509,"Failed":false,"Killed":false,"Accumulables":[{"ID":829,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":833,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Update":"4","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":840,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Update":"50","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":844,"Name":"count of cache hit on states cache in provider","Update":"10","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":846,"Name":"duration","Update":"4","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":916,"Name":"internal.metrics.executorCpuTime","Update":15599091,"Value":33544390,"Internal":true,"Count Failed Values":true},{"ID":915,"Name":"internal.metrics.executorRunTime","Update":84,"Value":152,"Internal":true,"Count Failed Values":true},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4357806,"Value":7808838,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4357806,"Executor Run Time":84,"Executor CPU Time":15599091,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225410,"Completion Time":1596020225514,"Accumulables":[{"ID":846,"Name":"duration","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":837,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":801,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":830,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":803,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":833,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7808838,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":844,"Name":"count of cache hit on states cache in provider","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":835,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":916,"Name":"internal.metrics.executorCpuTime","Value":33544390,"Internal":true,"Count Failed Values":true},{"ID":829,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":802,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":838,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":847,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":840,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":834,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":852,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":798,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":915,"Name":"internal.metrics.executorRunTime","Value":152,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":5,"Completion Time":1596020225514,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":17,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#116, count#117]\nArguments: [value#116, count#117]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#116, count#117]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":938,"metricType":"sum"}]},"time":1596020225536} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":17,"time":1596020225541} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":16,"time":1596020225542} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":15,"time":1596020225542} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.123Z","batchId":5,"batchDuration":437,"durationMs":{"triggerExecution":437,"queryPlanning":35,"getBatch":1,"latestOffset":3,"addBatch":361,"walCommit":18},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":20,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48799}}","endOffset":"{\"test5\":{\"0\":48837}}","numInputRows":38,"inputRowsPerSecond":97.68637532133675,"processedRowsPerSecond":86.95652173913044}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":18,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48837}}, {\"test5\":{\"0\":48881}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1528]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 6a12c2d9-8d02-4241-93fc-f53da01bb454, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 6a12c2d9-8d02-4241-93fc-f53da01bb454, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1452]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1022,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1021,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1018,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1019,"metricType":"timing"},{"name":"peak memory","accumulatorId":1017,"metricType":"size"},{"name":"number of output rows","accumulatorId":1016,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1020,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1013,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1014,"metricType":"timing"},{"name":"peak memory","accumulatorId":1012,"metricType":"size"},{"name":"number of output rows","accumulatorId":1011,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1015,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1010,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":962,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":963,"metricType":"nsTiming"},{"name":"records read","accumulatorId":960,"metricType":"sum"},{"name":"local bytes read","accumulatorId":958,"metricType":"size"},{"name":"fetch wait time","accumulatorId":959,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":956,"metricType":"size"},{"name":"local blocks read","accumulatorId":955,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":954,"metricType":"sum"},{"name":"data size","accumulatorId":953,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":957,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":961,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1009,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1006,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1007,"metricType":"timing"},{"name":"peak memory","accumulatorId":1005,"metricType":"size"},{"name":"number of output rows","accumulatorId":1004,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1008,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1003,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":993,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":994,"metricType":"sum"},{"name":"memory used by state","accumulatorId":999,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1001,"metricType":"sum"},{"name":"number of output rows","accumulatorId":992,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1000,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1002,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":998,"metricType":"timing"},{"name":"time to remove","accumulatorId":997,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":995,"metricType":"sum"},{"name":"time to update","accumulatorId":996,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":989,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":990,"metricType":"timing"},{"name":"peak memory","accumulatorId":988,"metricType":"size"},{"name":"number of output rows","accumulatorId":987,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":991,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":986,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225657} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":19,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48837}}, {\"test5\":{\"0\":48881}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1604]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 96456757-8d0b-46da-a006-9fe2cb6fc936, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = 96456757-8d0b-46da-a006-9fe2cb6fc936, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1452]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1022,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1021,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1018,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1019,"metricType":"timing"},{"name":"peak memory","accumulatorId":1017,"metricType":"size"},{"name":"number of output rows","accumulatorId":1016,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1020,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1013,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1014,"metricType":"timing"},{"name":"peak memory","accumulatorId":1012,"metricType":"size"},{"name":"number of output rows","accumulatorId":1011,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1015,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1010,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":962,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":963,"metricType":"nsTiming"},{"name":"records read","accumulatorId":960,"metricType":"sum"},{"name":"local bytes read","accumulatorId":958,"metricType":"size"},{"name":"fetch wait time","accumulatorId":959,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":956,"metricType":"size"},{"name":"local blocks read","accumulatorId":955,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":954,"metricType":"sum"},{"name":"data size","accumulatorId":953,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":957,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":961,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1009,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1006,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1007,"metricType":"timing"},{"name":"peak memory","accumulatorId":1005,"metricType":"size"},{"name":"number of output rows","accumulatorId":1004,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1008,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1003,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":993,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":994,"metricType":"sum"},{"name":"memory used by state","accumulatorId":999,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1001,"metricType":"sum"},{"name":"number of output rows","accumulatorId":992,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1000,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1002,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":998,"metricType":"timing"},{"name":"time to remove","accumulatorId":997,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":995,"metricType":"sum"},{"name":"time to update","accumulatorId":996,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":989,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":990,"metricType":"timing"},{"name":"peak memory","accumulatorId":988,"metricType":"size"},{"name":"number of output rows","accumulatorId":987,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":991,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":986,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225687} +{"Event":"SparkListenerJobStart","Job ID":6,"Submission Time":1596020225759,"Stage Infos":[{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[12,13],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225760,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":12,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Launch Time":1596020225766,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":12,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Launch Time":1596020225766,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225796,"Failed":false,"Killed":false,"Accumulables":[{"ID":963,"Name":"shuffle write time","Update":"543836","Value":"543836","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":962,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":961,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":953,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1010,"Name":"duration","Update":"17","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1011,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1012,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1014,"Name":"time in aggregation build","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1016,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1017,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1019,"Name":"time in aggregation build","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1021,"Name":"duration","Update":"17","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1022,"Name":"number of output rows","Update":"44","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1067,"Name":"internal.metrics.input.recordsRead","Update":44,"Value":44,"Internal":true,"Count Failed Values":true},{"ID":1065,"Name":"internal.metrics.shuffle.write.writeTime","Update":543836,"Value":543836,"Internal":true,"Count Failed Values":true},{"ID":1064,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1063,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1054,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1049,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1048,"Name":"internal.metrics.executorCpuTime","Update":23733439,"Value":23733439,"Internal":true,"Count Failed Values":true},{"ID":1047,"Name":"internal.metrics.executorRunTime","Update":23,"Value":23,"Internal":true,"Count Failed Values":true},{"ID":1046,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3714406,"Value":3714406,"Internal":true,"Count Failed Values":true},{"ID":1045,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3714406,"Executor Run Time":23,"Executor CPU Time":23733439,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":543836,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":44},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225760,"Completion Time":1596020225797,"Accumulables":[{"ID":1064,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1010,"Name":"duration","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1046,"Name":"internal.metrics.executorDeserializeCpuTime","Value":3714406,"Internal":true,"Count Failed Values":true},{"ID":1019,"Name":"time in aggregation build","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1067,"Name":"internal.metrics.input.recordsRead","Value":44,"Internal":true,"Count Failed Values":true},{"ID":1022,"Name":"number of output rows","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1049,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1016,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":962,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":953,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1054,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1045,"Name":"internal.metrics.executorDeserializeTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":1063,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1048,"Name":"internal.metrics.executorCpuTime","Value":23733439,"Internal":true,"Count Failed Values":true},{"ID":1012,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1021,"Name":"duration","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":961,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1065,"Name":"internal.metrics.shuffle.write.writeTime","Value":543836,"Internal":true,"Count Failed Values":true},{"ID":1047,"Name":"internal.metrics.executorRunTime","Value":23,"Internal":true,"Count Failed Values":true},{"ID":1014,"Name":"time in aggregation build","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":963,"Name":"shuffle write time","Value":"543836","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1017,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1011,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225801,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Launch Time":1596020225808,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Launch Time":1596020225809,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Launch Time":1596020225808,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225868,"Failed":false,"Killed":false,"Accumulables":[{"ID":986,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":997,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Update":"26","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1000,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1001,"Name":"count of cache hit on states cache in provider","Update":"12","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":999,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1007,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1074,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Update":17503528,"Value":17503528,"Internal":true,"Count Failed Values":true},{"ID":1072,"Name":"internal.metrics.executorRunTime","Update":50,"Value":50,"Internal":true,"Count Failed Values":true},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4255703,"Value":4255703,"Internal":true,"Count Failed Values":true},{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4255703,"Executor Run Time":50,"Executor CPU Time":17503528,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Launch Time":1596020225809,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225874,"Failed":false,"Killed":false,"Accumulables":[{"ID":986,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":991,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":987,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Update":"15","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":995,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":997,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Update":"23","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1000,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":992,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1001,"Name":"count of cache hit on states cache in provider","Update":"12","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":999,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":994,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Update":"15","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1004,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1007,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1009,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":955,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":959,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":958,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":960,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1074,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Update":17516707,"Value":35020235,"Internal":true,"Count Failed Values":true},{"ID":1072,"Name":"internal.metrics.executorRunTime","Update":56,"Value":106,"Internal":true,"Count Failed Values":true},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3750230,"Value":8005933,"Internal":true,"Count Failed Values":true},{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3750230,"Executor Run Time":56,"Executor CPU Time":17516707,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225801,"Completion Time":1596020225874,"Accumulables":[{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":992,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Value":35020235,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1001,"Name":"count of cache hit on states cache in provider","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":995,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1004,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":986,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":959,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1007,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":997,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":955,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":991,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1009,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1072,"Name":"internal.metrics.executorRunTime","Value":106,"Internal":true,"Count Failed Values":true},{"ID":1000,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":994,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":958,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1074,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":960,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":987,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Value":8005933,"Internal":true,"Count Failed Values":true},{"ID":999,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":6,"Completion Time":1596020225875,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":20,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#130, count#131]\nArguments: [value#130, count#131]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#130, count#131]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1095,"metricType":"sum"}]},"time":1596020225891} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":20,"time":1596020225896} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":19,"time":1596020225897} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":18,"time":1596020225897} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.562Z","batchId":6,"batchDuration":351,"durationMs":{"triggerExecution":351,"queryPlanning":28,"getBatch":1,"latestOffset":6,"addBatch":273,"walCommit":25},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":24,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48837}}","endOffset":"{\"test5\":{\"0\":48881}}","numInputRows":44,"inputRowsPerSecond":100.22779043280183,"processedRowsPerSecond":125.35612535612536}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":21,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48881}}, {\"test5\":{\"0\":48917}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1759]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = c0968891-bf48-4112-a19b-444014085d1d, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = c0968891-bf48-4112-a19b-444014085d1d, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1683]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1179,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1178,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1175,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1176,"metricType":"timing"},{"name":"peak memory","accumulatorId":1174,"metricType":"size"},{"name":"number of output rows","accumulatorId":1173,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1177,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1170,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1171,"metricType":"timing"},{"name":"peak memory","accumulatorId":1169,"metricType":"size"},{"name":"number of output rows","accumulatorId":1168,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1172,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1167,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":1119,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":1120,"metricType":"nsTiming"},{"name":"records read","accumulatorId":1117,"metricType":"sum"},{"name":"local bytes read","accumulatorId":1115,"metricType":"size"},{"name":"fetch wait time","accumulatorId":1116,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":1113,"metricType":"size"},{"name":"local blocks read","accumulatorId":1112,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":1111,"metricType":"sum"},{"name":"data size","accumulatorId":1110,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":1114,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":1118,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1166,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1163,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1164,"metricType":"timing"},{"name":"peak memory","accumulatorId":1162,"metricType":"size"},{"name":"number of output rows","accumulatorId":1161,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1165,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1160,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":1150,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":1151,"metricType":"sum"},{"name":"memory used by state","accumulatorId":1156,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1158,"metricType":"sum"},{"name":"number of output rows","accumulatorId":1149,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1157,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1159,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":1155,"metricType":"timing"},{"name":"time to remove","accumulatorId":1154,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":1152,"metricType":"sum"},{"name":"time to update","accumulatorId":1153,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1146,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1147,"metricType":"timing"},{"name":"peak memory","accumulatorId":1145,"metricType":"size"},{"name":"number of output rows","accumulatorId":1144,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1148,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1143,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225988} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":22,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n +- StateStoreSave (12)\n +- * HashAggregate (11)\n +- StateStoreRestore (10)\n +- Exchange (9)\n +- * HashAggregate (8)\n +- * HashAggregate (7)\n +- * SerializeFromObject (6)\n +- MapPartitions (5)\n +- DeserializeToObject (4)\n +- * Project (3)\n +- * Project (2)\n +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48881}}, {\"test5\":{\"0\":48917}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1835]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = e165b23b-1a6f-459f-9c51-288922bb2647, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = , runId = e165b23b-1a6f-459f-9c51-288922bb2647, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1683]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1179,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1178,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1175,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1176,"metricType":"timing"},{"name":"peak memory","accumulatorId":1174,"metricType":"size"},{"name":"number of output rows","accumulatorId":1173,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1177,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1170,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1171,"metricType":"timing"},{"name":"peak memory","accumulatorId":1169,"metricType":"size"},{"name":"number of output rows","accumulatorId":1168,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1172,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1167,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":1119,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":1120,"metricType":"nsTiming"},{"name":"records read","accumulatorId":1117,"metricType":"sum"},{"name":"local bytes read","accumulatorId":1115,"metricType":"size"},{"name":"fetch wait time","accumulatorId":1116,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":1113,"metricType":"size"},{"name":"local blocks read","accumulatorId":1112,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":1111,"metricType":"sum"},{"name":"data size","accumulatorId":1110,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":1114,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":1118,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1166,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1163,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1164,"metricType":"timing"},{"name":"peak memory","accumulatorId":1162,"metricType":"size"},{"name":"number of output rows","accumulatorId":1161,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1165,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1160,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":1150,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":1151,"metricType":"sum"},{"name":"memory used by state","accumulatorId":1156,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1158,"metricType":"sum"},{"name":"number of output rows","accumulatorId":1149,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1157,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1159,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":1155,"metricType":"timing"},{"name":"time to remove","accumulatorId":1154,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":1152,"metricType":"sum"},{"name":"time to update","accumulatorId":1153,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1146,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1147,"metricType":"timing"},{"name":"peak memory","accumulatorId":1145,"metricType":"size"},{"name":"number of output rows","accumulatorId":1144,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1148,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1143,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020226019} +{"Event":"SparkListenerJobStart","Job ID":7,"Submission Time":1596020226076,"Stage Infos":[{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[15,14],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226077,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":14,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":0,"Attempt":0,"Launch Time":1596020226086,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":14,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":0,"Attempt":0,"Launch Time":1596020226086,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226116,"Failed":false,"Killed":false,"Accumulables":[{"ID":1120,"Name":"shuffle write time","Update":"543034","Value":"543034","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1119,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1118,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1110,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1167,"Name":"duration","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1168,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1169,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1171,"Name":"time in aggregation build","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1173,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1174,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1176,"Name":"time in aggregation build","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1178,"Name":"duration","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1179,"Name":"number of output rows","Update":"36","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1224,"Name":"internal.metrics.input.recordsRead","Update":36,"Value":36,"Internal":true,"Count Failed Values":true},{"ID":1222,"Name":"internal.metrics.shuffle.write.writeTime","Update":543034,"Value":543034,"Internal":true,"Count Failed Values":true},{"ID":1221,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1220,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1211,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1206,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1205,"Name":"internal.metrics.executorCpuTime","Update":19652237,"Value":19652237,"Internal":true,"Count Failed Values":true},{"ID":1204,"Name":"internal.metrics.executorRunTime","Update":19,"Value":19,"Internal":true,"Count Failed Values":true},{"ID":1203,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2829254,"Value":2829254,"Internal":true,"Count Failed Values":true},{"ID":1202,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2829254,"Executor Run Time":19,"Executor CPU Time":19652237,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":543034,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":36},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226077,"Completion Time":1596020226117,"Accumulables":[{"ID":1205,"Name":"internal.metrics.executorCpuTime","Value":19652237,"Internal":true,"Count Failed Values":true},{"ID":1178,"Name":"duration","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1169,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1118,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1204,"Name":"internal.metrics.executorRunTime","Value":19,"Internal":true,"Count Failed Values":true},{"ID":1222,"Name":"internal.metrics.shuffle.write.writeTime","Value":543034,"Internal":true,"Count Failed Values":true},{"ID":1171,"Name":"time in aggregation build","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1120,"Name":"shuffle write time","Value":"543034","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1174,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1168,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1203,"Name":"internal.metrics.executorDeserializeCpuTime","Value":2829254,"Internal":true,"Count Failed Values":true},{"ID":1167,"Name":"duration","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1221,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1176,"Name":"time in aggregation build","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1206,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1224,"Name":"internal.metrics.input.recordsRead","Value":36,"Internal":true,"Count Failed Values":true},{"ID":1179,"Name":"number of output rows","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1119,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1110,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1173,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1202,"Name":"internal.metrics.executorDeserializeTime","Value":2,"Internal":true,"Count Failed Values":true},{"ID":1211,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1220,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226120,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}} +{"Event":"SparkListenerTaskStart","Stage ID":15,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":0,"Attempt":0,"Launch Time":1596020226128,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":15,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":1,"Attempt":0,"Launch Time":1596020226129,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":15,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":1,"Attempt":0,"Launch Time":1596020226129,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226196,"Failed":false,"Killed":false,"Accumulables":[{"ID":1143,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1148,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1144,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1147,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1153,"Name":"time to update","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1152,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1149,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1158,"Name":"count of cache hit on states cache in provider","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1151,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1161,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1166,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1112,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1116,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1115,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1117,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":1231,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Update":19415818,"Value":19415818,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Update":60,"Value":60,"Internal":true,"Count Failed Values":true},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3845429,"Value":3845429,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3845429,"Executor Run Time":60,"Executor CPU Time":19415818,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":15,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":0,"Attempt":0,"Launch Time":1596020226128,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226204,"Failed":false,"Killed":false,"Accumulables":[{"ID":1143,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1147,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1153,"Name":"time to update","Update":"3","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Update":"48","Value":"67","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1158,"Name":"count of cache hit on states cache in provider","Update":"14","Value":"28","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Update":"3","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1231,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Update":14652861,"Value":34068679,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Update":65,"Value":125,"Internal":true,"Count Failed Values":true},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3933877,"Value":7779306,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3933877,"Executor Run Time":65,"Executor CPU Time":14652861,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226120,"Completion Time":1596020226204,"Accumulables":[{"ID":1115,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1151,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1231,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1153,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1144,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1117,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7779306,"Internal":true,"Count Failed Values":true},{"ID":1147,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Value":6,"Internal":true,"Count Failed Values":true},{"ID":1158,"Name":"count of cache hit on states cache in provider","Value":"28","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1149,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Value":34068679,"Internal":true,"Count Failed Values":true},{"ID":1152,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1116,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1161,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1143,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Value":"67","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1148,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1166,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Value":125,"Internal":true,"Count Failed Values":true},{"ID":1112,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":7,"Completion Time":1596020226204,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":23,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#144, count#145]\nArguments: [value#144, count#145]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#144, count#145]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1252,"metricType":"sum"}]},"time":1596020226221} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":23,"time":1596020226230} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":22,"time":1596020226231} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":21,"time":1596020226231} +{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.916Z","batchId":7,"batchDuration":341,"durationMs":{"triggerExecution":341,"queryPlanning":24,"getBatch":0,"latestOffset":3,"addBatch":271,"walCommit":14},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":28,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48881}}","endOffset":"{\"test5\":{\"0\":48917}}","numInputRows":36,"inputRowsPerSecond":101.69491525423729,"processedRowsPerSecond":105.57184750733137}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1596020226301} diff --git a/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala b/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala new file mode 100644 index 0000000000000..f73305b1b001e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.history + +import org.apache.spark.SparkConf +import org.apache.spark.internal.config.History.HISTORY_LOG_DIR +import org.apache.spark.util.ManualClock + +object Utils { + def withFsHistoryProvider(logDir: String)(fn: FsHistoryProvider => Unit): Unit = { + var provider: FsHistoryProvider = null + try { + val clock = new ManualClock() + val conf = new SparkConf().set(HISTORY_LOG_DIR, logDir) + val provider = new FsHistoryProvider(conf, clock) + provider.checkForLogs() + fn(provider) + } finally { + if (provider != null) { + provider.stop() + provider = null + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala new file mode 100644 index 0000000000000..160535ea4d048 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.streaming.ui + +import java.util.Locale +import javax.servlet.http.HttpServletRequest + +import org.mockito.Mockito.{mock, when} +import org.scalatest.BeforeAndAfter + +import org.apache.spark.deploy.history.{Utils => HsUtils} +import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore +import org.apache.spark.sql.test.SharedSparkSession + +class StreamingQueryHistorySuite extends SharedSparkSession with BeforeAndAfter { + + test("support streaming query events") { + val logDir = Thread.currentThread().getContextClassLoader.getResource("spark-events").toString + HsUtils.withFsHistoryProvider(logDir) { provider => + val appUi = provider.getAppUI("local-1596020211915", None).getOrElse { + assert(false, "Failed to load event log of local-1596020211915.") + null + } + assert(appUi.ui.appName == "StructuredKafkaWordCount") + assert(appUi.ui.store.store.count(classOf[StreamingQueryData]) == 1) + assert(appUi.ui.store.store.count(classOf[StreamingQueryProgressWrapper]) == 8) + + val store = new StreamingQueryStatusStore(appUi.ui.store.store) + val tab = new StreamingQueryTab(store, appUi.ui) + val request = mock(classOf[HttpServletRequest]) + var html = new StreamingQueryPage(tab).render(request) + .toString().toLowerCase(Locale.ROOT) + // 81.39: Avg Input /sec + assert(html.contains("81.39")) + // 157.05: Avg Process /sec + assert(html.contains("157.05")) + + val id = "8d268dc2-bc9c-4be8-97a9-b135d2943028" + val runId = "e225d92f-2545-48f8-87a2-9c0309580f8a" + when(request.getParameter("id")).thenReturn(runId) + html = new StreamingQueryStatisticsPage(tab).render(request) + .toString().toLowerCase(Locale.ROOT) + assert(html.contains("8 completed batches")) + assert(html.contains(id)) + assert(html.contains(runId)) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala index c2b6688faf0e7..246fa1f7c9184 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala @@ -20,11 +20,13 @@ package org.apache.spark.sql.streaming.ui import java.util.{Locale, UUID} import javax.servlet.http.HttpServletRequest +import scala.xml.Node + import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.scalatest.BeforeAndAfter -import scala.xml.Node import org.apache.spark.SparkConf +import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore import org.apache.spark.sql.streaming.StreamingQueryProgress import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.ui.SparkUI @@ -35,26 +37,26 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { val id = UUID.randomUUID() val request = mock(classOf[HttpServletRequest]) val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS) - val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS) + val store = mock(classOf[StreamingQueryStatusStore], RETURNS_SMART_NULLS) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) - when(tab.statusListener).thenReturn(statusListener) + when(tab.store).thenReturn(store) val streamQuery = createStreamQueryUIData(id) - when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery)) + when(store.allQueryUIData).thenReturn(Seq(streamQuery)) var html = renderStreamingQueryPage(request, tab) .toString().toLowerCase(Locale.ROOT) assert(html.contains("active streaming queries (1)")) - when(streamQuery.isActive).thenReturn(false) - when(streamQuery.exception).thenReturn(None) + when(streamQuery.summary.isActive).thenReturn(false) + when(streamQuery.summary.exception).thenReturn(None) html = renderStreamingQueryPage(request, tab) .toString().toLowerCase(Locale.ROOT) assert(html.contains("completed streaming queries (1)")) assert(html.contains("finished")) - when(streamQuery.isActive).thenReturn(false) - when(streamQuery.exception).thenReturn(Option("exception in query")) + when(streamQuery.summary.isActive).thenReturn(false) + when(streamQuery.summary.exception).thenReturn(Option("exception in query")) html = renderStreamingQueryPage(request, tab) .toString().toLowerCase(Locale.ROOT) assert(html.contains("completed streaming queries (1)")) @@ -66,17 +68,20 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { val id = UUID.randomUUID() val request = mock(classOf[HttpServletRequest]) val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS) - val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS) + val store = mock(classOf[StreamingQueryStatusStore], RETURNS_SMART_NULLS) + when(request.getParameter("id")).thenReturn(id.toString) + when(tab.appName).thenReturn("testing") + when(tab.headerTabs).thenReturn(Seq.empty) + when(tab.store).thenReturn(store) val ui = mock(classOf[SparkUI]) when(request.getParameter("id")).thenReturn(id.toString) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) - when(tab.statusListener).thenReturn(statusListener) when(ui.conf).thenReturn(new SparkConf()) when(tab.parent).thenReturn(ui) val streamQuery = createStreamQueryUIData(id) - when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery)) + when(store.allQueryUIData).thenReturn(Seq(streamQuery)) val html = renderStreamingQueryStatisticsPage(request, tab) .toString().toLowerCase(Locale.ROOT) @@ -94,15 +99,18 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { when(progress.batchId).thenReturn(2) when(progress.prettyJson).thenReturn("""{"a":1}""") + val summary = mock(classOf[StreamingQueryData], RETURNS_SMART_NULLS) + when(summary.isActive).thenReturn(true) + when(summary.name).thenReturn("query") + when(summary.id).thenReturn(id) + when(summary.runId).thenReturn(id) + when(summary.startTimestamp).thenReturn(1L) + when(summary.exception).thenReturn(None) + val streamQuery = mock(classOf[StreamingQueryUIData], RETURNS_SMART_NULLS) - when(streamQuery.isActive).thenReturn(true) - when(streamQuery.name).thenReturn("query") - when(streamQuery.id).thenReturn(id) - when(streamQuery.runId).thenReturn(id) - when(streamQuery.startTimestamp).thenReturn(1L) + when(streamQuery.summary).thenReturn(summary) when(streamQuery.lastProgress).thenReturn(progress) when(streamQuery.recentProgress).thenReturn(Array(progress)) - when(streamQuery.exception).thenReturn(None) streamQuery } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala index 6aa440e5609c5..91c55d5598a6b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala @@ -17,19 +17,28 @@ package org.apache.spark.sql.streaming.ui -import java.util.UUID +import java.text.SimpleDateFormat +import java.util.{Date, UUID} import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} +import org.scalatest.time.SpanSugar._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone +import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore +import org.apache.spark.sql.internal.StaticSQLConf import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress, StreamTest} import org.apache.spark.sql.streaming +import org.apache.spark.status.ElementTrackingStore +import org.apache.spark.util.kvstore.InMemoryStore class StreamingQueryStatusListenerSuite extends StreamTest { test("onQueryStarted, onQueryProgress, onQueryTerminated") { - val listener = new StreamingQueryStatusListener(spark.sparkContext.conf) + val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf) + val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore) + val queryStore = new StreamingQueryStatusStore(kvStore) - // hanlde query started event + // handle query started event val id = UUID.randomUUID() val runId = UUID.randomUUID() val startEvent = new StreamingQueryListener.QueryStartedEvent( @@ -37,8 +46,9 @@ class StreamingQueryStatusListenerSuite extends StreamTest { listener.onQueryStarted(startEvent) // result checking - assert(listener.activeQueryStatus.size() == 1) - assert(listener.activeQueryStatus.get(runId).name == "test") + assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1) + assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData => + uiData.summary.runId == runId && uiData.summary.name.equals("test"))) // handle query progress event val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS) @@ -53,28 +63,32 @@ class StreamingQueryStatusListenerSuite extends StreamTest { listener.onQueryProgress(processEvent) // result checking - val activeQuery = listener.activeQueryStatus.get(runId) - assert(activeQuery.isActive) - assert(activeQuery.recentProgress.length == 1) - assert(activeQuery.lastProgress.id == id) - assert(activeQuery.lastProgress.runId == runId) - assert(activeQuery.lastProgress.timestamp == "2001-10-01T01:00:00.100Z") - assert(activeQuery.lastProgress.inputRowsPerSecond == 10.0) - assert(activeQuery.lastProgress.processedRowsPerSecond == 12.0) - assert(activeQuery.lastProgress.batchId == 2) - assert(activeQuery.lastProgress.prettyJson == """{"a":1}""") + val activeQuery = + queryStore.allQueryUIData.filter(_.summary.isActive).find(_.summary.runId == runId) + assert(activeQuery.isDefined) + assert(activeQuery.get.summary.isActive) + assert(activeQuery.get.recentProgress.length == 1) + assert(activeQuery.get.lastProgress.id == id) + assert(activeQuery.get.lastProgress.runId == runId) + assert(activeQuery.get.lastProgress.timestamp == "2001-10-01T01:00:00.100Z") + assert(activeQuery.get.lastProgress.inputRowsPerSecond == 10.0) + assert(activeQuery.get.lastProgress.processedRowsPerSecond == 12.0) + assert(activeQuery.get.lastProgress.batchId == 2) + assert(activeQuery.get.lastProgress.prettyJson == """{"a":1}""") // handle terminate event val terminateEvent = new StreamingQueryListener.QueryTerminatedEvent(id, runId, None) listener.onQueryTerminated(terminateEvent) - assert(!listener.inactiveQueryStatus.head.isActive) - assert(listener.inactiveQueryStatus.head.runId == runId) - assert(listener.inactiveQueryStatus.head.id == id) + assert(!queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.isActive) + assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId) + assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id) } test("same query start multiple times") { - val listener = new StreamingQueryStatusListener(spark.sparkContext.conf) + val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf) + val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore) + val queryStore = new StreamingQueryStatusStore(kvStore) // handle first time start val id = UUID.randomUUID() @@ -94,11 +108,106 @@ class StreamingQueryStatusListenerSuite extends StreamTest { listener.onQueryStarted(startEvent1) // result checking - assert(listener.activeQueryStatus.size() == 1) - assert(listener.inactiveQueryStatus.length == 1) - assert(listener.activeQueryStatus.containsKey(runId1)) - assert(listener.activeQueryStatus.get(runId1).id == id) - assert(listener.inactiveQueryStatus.head.runId == runId0) - assert(listener.inactiveQueryStatus.head.id == id) + assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1) + assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).length == 1) + assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(_.summary.runId == runId1)) + assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData => + uiData.summary.runId == runId1 && uiData.summary.id == id)) + assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId0) + assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id) + } + + test("test small retained queries") { + val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf) + val conf = spark.sparkContext.conf + conf.set(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES.key, "2") + val listener = new StreamingQueryStatusListener(conf, kvStore) + val queryStore = new StreamingQueryStatusStore(kvStore) + + def addNewQuery(): (UUID, UUID) = { + val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601 + format.setTimeZone(getTimeZone("UTC")) + val id = UUID.randomUUID() + val runId = UUID.randomUUID() + val startEvent = new StreamingQueryListener.QueryStartedEvent( + id, runId, "test1", format.format(new Date(System.currentTimeMillis()))) + listener.onQueryStarted(startEvent) + (id, runId) + } + + def checkInactiveQueryStatus(numInactives: Int, targetInactives: Seq[UUID]): Unit = { + eventually(timeout(10.seconds)) { + val inactiveQueries = queryStore.allQueryUIData.filter(!_.summary.isActive) + assert(inactiveQueries.size == numInactives) + assert(inactiveQueries.map(_.summary.id).toSet == targetInactives.toSet) + } + } + + val (id1, runId1) = addNewQuery() + val (id2, runId2) = addNewQuery() + val (id3, runId3) = addNewQuery() + assert(queryStore.allQueryUIData.count(!_.summary.isActive) == 0) + + val terminateEvent1 = new StreamingQueryListener.QueryTerminatedEvent(id1, runId1, None) + listener.onQueryTerminated(terminateEvent1) + checkInactiveQueryStatus(1, Seq(id1)) + val terminateEvent2 = new StreamingQueryListener.QueryTerminatedEvent(id2, runId2, None) + listener.onQueryTerminated(terminateEvent2) + checkInactiveQueryStatus(2, Seq(id1, id2)) + val terminateEvent3 = new StreamingQueryListener.QueryTerminatedEvent(id3, runId3, None) + listener.onQueryTerminated(terminateEvent3) + checkInactiveQueryStatus(2, Seq(id2, id3)) + } + + test("test small retained progress") { + val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf) + val conf = spark.sparkContext.conf + conf.set(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES.key, "5") + val listener = new StreamingQueryStatusListener(conf, kvStore) + val queryStore = new StreamingQueryStatusStore(kvStore) + + val id = UUID.randomUUID() + val runId = UUID.randomUUID() + val startEvent = new StreamingQueryListener.QueryStartedEvent( + id, runId, "test", "2016-12-05T20:54:20.827Z") + listener.onQueryStarted(startEvent) + + var batchId: Int = 0 + + def addQueryProgress(): Unit = { + val progress = mockProgressData(id, runId) + val processEvent = new streaming.StreamingQueryListener.QueryProgressEvent(progress) + listener.onQueryProgress(processEvent) + } + + def mockProgressData(id: UUID, runId: UUID): StreamingQueryProgress = { + val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601 + format.setTimeZone(getTimeZone("UTC")) + + val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS) + when(progress.id).thenReturn(id) + when(progress.runId).thenReturn(runId) + when(progress.timestamp).thenReturn(format.format(new Date(System.currentTimeMillis()))) + when(progress.inputRowsPerSecond).thenReturn(10.0) + when(progress.processedRowsPerSecond).thenReturn(12.0) + when(progress.batchId).thenReturn(batchId) + when(progress.prettyJson).thenReturn("""{"a":1}""") + + batchId += 1 + progress + } + + def checkQueryProcessData(targetNum: Int): Unit = { + eventually(timeout(10.seconds)) { + assert(queryStore.getQueryProgressData(runId).size == targetNum) + } + } + + Array.tabulate(4) { _ => addQueryProgress() } + checkQueryProcessData(4) + addQueryProgress() + checkQueryProcessData(5) + addQueryProgress() + checkQueryProcessData(5) } } From 90d4d7d43ffd29ad780dc7c5588b7e55a73aba97 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 3 Dec 2020 09:31:46 +0800 Subject: [PATCH 0645/1009] [SPARK-33610][ML] Imputer transform skip duplicate head() job ### What changes were proposed in this pull request? on each call of `transform`, a head() job will be triggered, which can be skipped by using a lazy var. ### Why are the changes needed? avoiding duplicate head() jobs ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #30550 from zhengruifeng/imputer_transform. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng --- .../org/apache/spark/ml/feature/Imputer.scala | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index 03ebe0299f63f..d0b6ab1ef2cbc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -254,20 +254,25 @@ class ImputerModel private[ml] ( /** @group setParam */ def setOutputCols(value: Array[String]): this.type = set(outputCols, value) + @transient private lazy val surrogates = { + val row = surrogateDF.head() + row.schema.fieldNames.zipWithIndex + .map { case (name, index) => (name, row.getDouble(index)) } + .toMap + } + override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) - val (inputColumns, outputColumns) = getInOutCols - val surrogates = surrogateDF.select(inputColumns.map(col): _*).head().toSeq - - - val newCols = inputColumns.zip(outputColumns).zip(surrogates).map { - case ((inputCol, outputCol), surrogate) => - val inputType = dataset.schema(inputCol).dataType - val ic = col(inputCol).cast(DoubleType) - when(ic.isNull, surrogate) - .when(ic === $(missingValue), surrogate) - .otherwise(ic) - .cast(inputType) + val (inputColumns, outputColumns) = getInOutCols() + + val newCols = inputColumns.map { inputCol => + val surrogate = surrogates(inputCol) + val inputType = dataset.schema(inputCol).dataType + val ic = col(inputCol).cast(DoubleType) + when(ic.isNull, surrogate) + .when(ic === $(missingValue), surrogate) + .otherwise(ic) + .cast(inputType) } dataset.withColumns(outputColumns, newCols).toDF() } From 878cc0e6e95f300a0a58c742654f53a28b30b174 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Wed, 2 Dec 2020 17:36:25 -0800 Subject: [PATCH 0646/1009] [SPARK-32896][SS][FOLLOW-UP] Rename the API to `toTable` ### What changes were proposed in this pull request? As the discussion in https://github.com/apache/spark/pull/30521#discussion_r531463427, rename the API to `toTable`. ### Why are the changes needed? Rename the API for further extension and accuracy. ### Does this PR introduce _any_ user-facing change? Yes, it's an API change but the new API is not released yet. ### How was this patch tested? Existing UT. Closes #30571 from xuanyuanking/SPARK-32896-follow. Authored-by: Yuanjian Li Signed-off-by: Shixiong Zhu --- .../scala/org/apache/spark/sql/streaming/DataStreamWriter.scala | 2 +- .../spark/sql/streaming/test/DataStreamTableAPISuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index d67e175c24dd9..9e3599712fde5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -304,7 +304,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { * @since 3.1.0 */ @throws[TimeoutException] - def saveAsTable(tableName: String): StreamingQuery = { + def toTable(tableName: String): StreamingQuery = { this.source = SOURCE_NAME_TABLE this.tableName = tableName startInternal(None) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala index 062b1060bc601..bf850432d5c0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -291,7 +291,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { val query = inputDF .writeStream .option("checkpointLocation", checkpointDir.getAbsolutePath) - .saveAsTable(tableIdentifier) + .toTable(tableIdentifier) inputData.addData(newInputs: _*) From 08809897554a48065c2280c709d7efba28fa441d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 3 Dec 2020 10:57:14 +0900 Subject: [PATCH 0647/1009] [SPARK-22798][PYTHON][ML][FOLLOWUP] Add labelsArray to PySpark StringIndexer ### What changes were proposed in this pull request? This is a followup to add missing `labelsArray` to PySpark `StringIndexer`. ### Why are the changes needed? `labelsArray` is for multi-column case for `StringIndexer`. We should provide this accessor at PySpark side too. ### Does this PR introduce _any_ user-facing change? Yes, `labelsArray` was missing in PySpark `StringIndexer` in Spark 3.0. ### How was this patch tested? Unit test. Closes #30579 from viirya/SPARK-22798-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- python/pyspark/ml/feature.py | 12 ++++++++++++ python/pyspark/ml/tests/test_feature.py | 1 + 2 files changed, 13 insertions(+) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 8138f34d7a19e..7cfeabea4aa97 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -3852,9 +3852,21 @@ def from_arrays_of_labels(cls, arrayOfLabels, inputCols, outputCols=None, def labels(self): """ Ordered list of labels, corresponding to indices to be assigned. + + .. deprecated:: 3.1.0 + It will be removed in future versions. Use `labelsArray` method instead. """ return self._call_java("labels") + @property + @since("3.1.0") + def labelsArray(self): + """ + Array of ordered list of labels, corresponding to indices to be assigned + for each input column. + """ + return self._call_java("labelsArray") + @inherit_doc class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py index 98b8ce6dfb95c..2cceb04338806 100644 --- a/python/pyspark/ml/tests/test_feature.py +++ b/python/pyspark/ml/tests/test_feature.py @@ -232,6 +232,7 @@ def test_string_indexer_from_labels(self): model = StringIndexerModel.from_labels(["a", "b", "c"], inputCol="label", outputCol="indexed", handleInvalid="keep") self.assertEqual(model.labels, ["a", "b", "c"]) + self.assertEqual(model.labelsArray, [("a", "b", "c")]) df1 = self.spark.createDataFrame([ (0, "a"), From 3b2ff16ee6e457daade0ecb9f96955c8ed73f2a5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 3 Dec 2020 14:34:44 +0900 Subject: [PATCH 0648/1009] [SPARK-33636][PYTHON][ML][FOLLOWUP] Update since tag of labelsArray in StringIndexer ### What changes were proposed in this pull request? This is to update `labelsArray`'s since tag. ### Why are the changes needed? The original change was backported to branch-3.0 for 3.0.2 version. So it is better to update the since tag to reflect the fact. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A. Just tag change. Closes #30582 from viirya/SPARK-33636-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- python/pyspark/ml/feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 7cfeabea4aa97..546c46383d340 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -3859,7 +3859,7 @@ def labels(self): return self._call_java("labels") @property - @since("3.1.0") + @since("3.0.2") def labelsArray(self): """ Array of ordered list of labels, corresponding to indices to be assigned From ff13f574e67ff9e2c38167368dc6190455e8ed7f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 3 Dec 2020 14:04:08 +0000 Subject: [PATCH 0649/1009] [SPARK-20044][SQL] Add new function DATE_FROM_UNIX_DATE and UNIX_DATE ### What changes were proposed in this pull request? Add new functions DATE_FROM_UNIX_DATE and UNIX_DATE for conversion between Date type and Numeric types. ### Why are the changes needed? 1. Explicit conversion between Date type and Numeric types is disallowed in ANSI mode. We need to provide new functions for users to complete the conversion. 2. We have introduced new functions from Bigquery for conversion between Timestamp type and Numeric types: TIMESTAMP_SECONDS, TIMESTAMP_MILLIS, TIMESTAMP_MICROS , UNIX_SECONDS, UNIX_MILLIS, and UNIX_MICROS. It makes sense to add functions for conversion between Date type and Numeric types as well. ### Does this PR introduce _any_ user-facing change? Yes, two new datetime functions are added. ### How was this patch tested? Unit tests Closes #30588 from gengliangwang/dateToNumber. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../catalyst/analysis/FunctionRegistry.scala | 2 + .../expressions/datetimeExpressions.scala | 46 +++++++++++++++++++ .../expressions/DateExpressionsSuite.scala | 24 ++++++++++ .../sql-functions/sql-expression-schema.md | 4 +- .../resources/sql-tests/inputs/datetime.sql | 5 +- .../sql-tests/results/ansi/datetime.sql.out | 18 +++++++- .../sql-tests/results/datetime-legacy.sql.out | 18 +++++++- .../sql-tests/results/datetime.sql.out | 18 +++++++- 8 files changed, 130 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 5c2816a0baa95..3b46de539ce3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -427,6 +427,8 @@ object FunctionRegistry { expression[MakeInterval]("make_interval"), expression[DatePart]("date_part"), expression[Extract]("extract"), + expression[DateFromUnixDate]("date_from_unix_date"), + expression[UnixDate]("unix_date"), expression[SecondsToTimestamp]("timestamp_seconds"), expression[MillisToTimestamp]("timestamp_millis"), expression[MicrosToTimestamp]("timestamp_micros"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 60dc32c1571fe..c20dd6148be3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -400,6 +400,52 @@ case class DayOfYear(child: Expression) extends GetDateField { override val funcName = "getDayInYear" } +@ExpressionDescription( + usage = "_FUNC_(days) - Create date from the number of days since 1970-01-01.", + examples = """ + Examples: + > SELECT _FUNC_(1); + 1970-01-02 + """, + group = "datetime_funcs", + since = "3.1.0") +case class DateFromUnixDate(child: Expression) extends UnaryExpression + with ImplicitCastInputTypes with NullIntolerant { + override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType) + + override def dataType: DataType = DateType + + override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int] + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + defineCodeGen(ctx, ev, c => c) + + override def prettyName: String = "date_from_unix_date" +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the number of days since 1970-01-01.", + examples = """ + Examples: + > SELECT _FUNC_(DATE("1970-01-02")); + 1 + """, + group = "datetime_funcs", + since = "3.1.0") +case class UnixDate(child: Expression) extends UnaryExpression + with ExpectsInputTypes with NullIntolerant { + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int] + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + defineCodeGen(ctx, ev, c => c) + + override def prettyName: String = "unix_date" +} + abstract class IntegralToTimestampBase extends UnaryExpression with ExpectsInputTypes with NullIntolerant { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 8a1a34276341d..79770505ec35d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1245,6 +1245,30 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkResult(Int.MinValue.toLong - 100) } + test("DATE_FROM_UNIX_DATE") { + def testIntegralFunc(value: Number): Unit = { + checkEvaluation( + DateFromUnixDate(Literal(value.intValue())), + LocalDate.ofEpochDay(value.intValue())) + } + // test null input + checkEvaluation(DateFromUnixDate(Literal(null, IntegerType)), null) + // test integral input + testIntegralInput(testIntegralFunc) + } + + test("UNIX_DATE") { + def testIntegralFunc(value: Number): Unit = { + checkEvaluation( + UnixDate(Literal(LocalDate.ofEpochDay(value.intValue()))), + value.intValue()) + } + // test null input + checkEvaluation(UnixDate(Literal(null, DateType)), null) + // test various inputs + testIntegralInput(testIntegralFunc) + } + test("UNIX_SECONDS") { checkEvaluation(UnixSeconds(Literal(null, TimestampType)), null) var timestamp = Literal(new Timestamp(0L)) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 861062a1f7705..a6d041a588a6d 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 345 + - Number of queries: 347 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -91,6 +91,7 @@ | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | +| org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct | | org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | @@ -289,6 +290,7 @@ | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> | | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | +| org.apache.spark.sql.catalyst.expressions.UnixDate | unix_date | SELECT unix_date(DATE("1970-01-02")) | struct | | org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | | org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | | org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index c2ccb3ee0db06..e35266a85d46b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -18,7 +18,10 @@ select TIMESTAMP_SECONDS(0.1234567d), TIMESTAMP_SECONDS(FLOAT(0.1234567)); select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null); select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null); select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null); - +-- DATE_FROM_UNIX_DATE +select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null); +-- UNIX_DATE +select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null); -- [SPARK-16836] current_date and current_timestamp literals select current_date = current_date(), current_timestamp = current_timestamp(); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 9d99d3b870b3f..18a751f573bc2 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 120 +-- Number of queries: 122 -- !query @@ -111,6 +111,22 @@ struct +-- !query output +1970-01-01 1972-09-27 NULL + + +-- !query +select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null) +-- !query schema +struct +-- !query output +0 18600 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 73e9823d96a73..be75f6fb994dd 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 120 +-- Number of queries: 122 -- !query @@ -111,6 +111,22 @@ struct +-- !query output +1970-01-01 1972-09-27 NULL + + +-- !query +select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null) +-- !query schema +struct +-- !query output +0 18600 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 2c39c1291aa70..1e963ed16fd96 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 120 +-- Number of queries: 122 -- !query @@ -111,6 +111,22 @@ struct +-- !query output +1970-01-01 1972-09-27 NULL + + +-- !query +select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null) +-- !query schema +struct +-- !query output +0 18600 NULL + + -- !query select current_date = current_date(), current_timestamp = current_timestamp() -- !query schema From 512fb32b38e4694abd9f667581cdd5e99dee811f Mon Sep 17 00:00:00 2001 From: luluorta Date: Thu, 3 Dec 2020 14:58:56 +0000 Subject: [PATCH 0650/1009] [SPARK-26218][SQL][FOLLOW UP] Fix the corner case of codegen when casting float to Integer ### What changes were proposed in this pull request? This is a followup of [#27151](https://github.com/apache/spark/pull/27151). It fixes the same issue for the codegen path. ### Why are the changes needed? Result corrupt. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added Unit test. Closes #30585 from luluorta/SPARK-26218. Authored-by: luluorta Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/expressions/Cast.scala | 52 +++++++------------ .../sql/catalyst/expressions/CastSuite.scala | 5 ++ 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 95f09d64c484b..1b2e2db932970 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1393,25 +1393,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit """ } - private[this] def lowerAndUpperBound( - fractionType: String, - integralType: String): (String, String) = { - assert(fractionType == "float" || fractionType == "double") - val typeIndicator = fractionType.charAt(0) - val (min, max) = integralType.toLowerCase(Locale.ROOT) match { - case "long" => (Long.MinValue, Long.MaxValue) - case "int" => (Int.MinValue, Int.MaxValue) - case "short" => (Short.MinValue, Short.MaxValue) - case "byte" => (Byte.MinValue, Byte.MaxValue) + private[this] def lowerAndUpperBound(integralType: String): (String, String) = { + val (min, max, typeIndicator) = integralType.toLowerCase(Locale.ROOT) match { + case "long" => (Long.MinValue, Long.MaxValue, "L") + case "int" => (Int.MinValue, Int.MaxValue, "") + case "short" => (Short.MinValue, Short.MaxValue, "") + case "byte" => (Byte.MinValue, Byte.MaxValue, "") } (min.toString + typeIndicator, max.toString + typeIndicator) } - private[this] def castFractionToIntegralTypeCode( - fractionType: String, - integralType: String): CastFunction = { + private[this] def castFractionToIntegralTypeCode(integralType: String): CastFunction = { assert(ansiEnabled) - val (min, max) = lowerAndUpperBound(fractionType, integralType) + val (min, max) = lowerAndUpperBound(integralType) val mathClass = classOf[Math].getName // When casting floating values to integral types, Spark uses the method `Numeric.toInt` // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to `Math.floor`; @@ -1449,12 +1443,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte") case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte") - case _: ShortType | _: IntegerType | _: LongType if ansiEnabled => + case ShortType | IntegerType | LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("byte") - case _: FloatType if ansiEnabled => - castFractionToIntegralTypeCode("float", "byte") - case _: DoubleType if ansiEnabled => - castFractionToIntegralTypeCode("double", "byte") + case FloatType | DoubleType if ansiEnabled => + castFractionToIntegralTypeCode("byte") case x: NumericType => (c, evPrim, evNull) => code"$evPrim = (byte) $c;" } @@ -1482,12 +1474,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => castTimestampToIntegralTypeCode(ctx, "short") case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short") - case _: IntegerType | _: LongType if ansiEnabled => + case IntegerType | LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("short") - case _: FloatType if ansiEnabled => - castFractionToIntegralTypeCode("float", "short") - case _: DoubleType if ansiEnabled => - castFractionToIntegralTypeCode("double", "short") + case FloatType | DoubleType if ansiEnabled => + castFractionToIntegralTypeCode("short") case x: NumericType => (c, evPrim, evNull) => code"$evPrim = (short) $c;" } @@ -1513,11 +1503,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => code"$evNull = true;" case TimestampType => castTimestampToIntegralTypeCode(ctx, "int") case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int") - case _: LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int") - case _: FloatType if ansiEnabled => - castFractionToIntegralTypeCode("float", "int") - case _: DoubleType if ansiEnabled => - castFractionToIntegralTypeCode("double", "int") + case LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int") + case FloatType | DoubleType if ansiEnabled => + castFractionToIntegralTypeCode("int") case x: NumericType => (c, evPrim, evNull) => code"$evPrim = (int) $c;" } @@ -1544,10 +1532,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case TimestampType => (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};" case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long") - case _: FloatType if ansiEnabled => - castFractionToIntegralTypeCode("float", "long") - case _: DoubleType if ansiEnabled => - castFractionToIntegralTypeCode("double", "long") + case FloatType | DoubleType if ansiEnabled => + castFractionToIntegralTypeCode("long") case x: NumericType => (c, evPrim, evNull) => code"$evPrim = (long) $c;" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index d284c417042c1..35db25ec9342c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -975,6 +975,11 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { } } } + + test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") { + checkExceptionInExpression[ArithmeticException]( + cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow") + } } /** From 0706e64c49f66431560cdbecb28adcda244c3342 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 3 Dec 2020 15:24:44 +0000 Subject: [PATCH 0651/1009] [SPARK-30098][SQL] Add a configuration to use default datasource as provider for CREATE TABLE command ### What changes were proposed in this pull request? For CRETE TABLE [AS SELECT] command, creates native Parquet table if neither USING nor STORE AS is specified and `spark.sql.legacy.createHiveTableByDefault` is false. This is a retry after we unify the CREATE TABLE syntax. It partially reverts https://github.com/apache/spark/commit/d2bec5e265e0aa4fa527c3f43cfe738cdbdc4598 This PR allows `CREATE EXTERNAL TABLE` when `LOCATION` is present. This was not allowed for data source tables before, which is an unnecessary behavior different with hive tables. ### Why are the changes needed? Changing from Hive text table to native Parquet table has many benefits: 1. be consistent with `DataFrameWriter.saveAsTable`. 2. better performance 3. better support for nested types (Hive text table doesn't work well with nested types, e.g. `insert into t values struct(null)` actually inserts a null value not `struct(null)` if `t` is a Hive text table, which leads to wrong result) 4. better interoperability as Parquet is a more popular open file format. ### Does this PR introduce _any_ user-facing change? No by default. If the config is set, the behavior change is described below: Behavior-wise, the change is very small as the native Parquet table is also Hive-compatible. All the Spark DDL commands that works for hive tables also works for native Parquet tables, with two exceptions: `ALTER TABLE SET [SERDE | SERDEPROPERTIES]` and `LOAD DATA`. char/varchar behavior has been taken care by https://github.com/apache/spark/pull/30412, and there is no behavior difference between data source and hive tables. One potential issue is `CREATE TABLE ... LOCATION ...` while users want to directly access the files later. It's more like a corner case and the legacy config should be good enough. Another potential issue is users may use Spark to create the table and then use Hive to add partitions with different serde. This is not allowed for Spark native tables. ### How was this patch tested? Re-enable the tests Closes #30554 from cloud-fan/create-table. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../apache/spark/sql/internal/SQLConf.scala | 9 +++++ .../analysis/ResolveSessionCatalog.scala | 13 +++++--- .../sql/connector/DataSourceV2SQLSuite.scala | 33 ++++++++++--------- .../command/PlanResolutionSuite.scala | 6 ++-- .../execution/HiveCompatibilitySuite.scala | 4 +++ .../sql/hive/HiveShowCreateTableSuite.scala | 18 +++++++++- .../apache/spark/sql/hive/InsertSuite.scala | 3 +- .../spark/sql/hive/QueryPartitionSuite.scala | 5 +-- .../spark/sql/hive/StatisticsSuite.scala | 27 ++++++++++----- .../spark/sql/hive/client/VersionsSuite.scala | 1 + .../sql/hive/execution/HiveDDLSuite.scala | 2 +- .../sql/hive/execution/HiveSerDeSuite.scala | 5 +-- .../hive/execution/HiveTableScanSuite.scala | 5 ++- .../sql/hive/execution/SQLQuerySuite.scala | 1 + .../apache/spark/sql/hive/test/TestHive.scala | 13 ++++---- 15 files changed, 100 insertions(+), 45 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a1d6f9f608873..b32476a5af71a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2921,6 +2921,15 @@ object SQLConf { .stringConf .createWithDefault("") + val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT = + buildConf("spark.sql.legacy.createHiveTableByDefault") + .internal() + .doc("When set to true, CREATE TABLE syntax without USING or STORED AS will use Hive " + + s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key} as the table provider.") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index f6005f4b413a2..f35fcdc07c372 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 -import org.apache.spark.sql.internal.HiveSerDe +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} /** @@ -636,11 +636,16 @@ class ResolveSessionCatalog( (storageFormat, DDLUtils.HIVE_PROVIDER) } else { // If neither USING nor STORED AS/ROW FORMAT is specified, we create native data source - // tables if it's a CTAS and `conf.convertCTAS` is true. - // TODO: create native data source table by default for non-CTAS. - if (ctas && conf.convertCTAS) { + // tables if: + // 1. `LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT` is false, or + // 2. It's a CTAS and `conf.convertCTAS` is true. + val createHiveTableByDefault = conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT) + if (!createHiveTableByDefault || (ctas && conf.convertCTAS)) { (nonHiveStorageFormat, conf.defaultDataSourceName) } else { + logWarning("A Hive serde table will be created as there is no table provider " + + s"specified. You can set ${SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key} to false " + + "so that native data source table will be created instead.") (defaultHiveStorage, DDLUtils.HIVE_PROVIDER) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 583bc694dc3be..7635590ab462e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -266,22 +266,23 @@ class DataSourceV2SQLSuite checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty) } - // TODO: ignored by SPARK-31707, restore the test after create table syntax unification - ignore("CreateTable: without USING clause") { - // unset this config to use the default v2 session catalog. - spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key) - val testCatalog = catalog("testcat").asTableCatalog - - sql("CREATE TABLE testcat.t1 (id int)") - val t1 = testCatalog.loadTable(Identifier.of(Array(), "t1")) - // Spark shouldn't set the default provider for catalog plugins. - assert(!t1.properties.containsKey(TableCatalog.PROP_PROVIDER)) - - sql("CREATE TABLE t2 (id int)") - val t2 = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog - .loadTable(Identifier.of(Array("default"), "t2")).asInstanceOf[V1Table] - // Spark should set the default provider as DEFAULT_DATA_SOURCE_NAME for the session catalog. - assert(t2.v1Table.provider == Some(conf.defaultDataSourceName)) + test("CreateTable: without USING clause") { + withSQLConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key -> "false") { + // unset this config to use the default v2 session catalog. + spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key) + val testCatalog = catalog("testcat").asTableCatalog + + sql("CREATE TABLE testcat.t1 (id int)") + val t1 = testCatalog.loadTable(Identifier.of(Array(), "t1")) + // Spark shouldn't set the default provider for catalog plugins. + assert(!t1.properties.containsKey(TableCatalog.PROP_PROVIDER)) + + sql("CREATE TABLE t2 (id int)") + val t2 = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog + .loadTable(Identifier.of(Array("default"), "t2")).asInstanceOf[V1Table] + // Spark should set the default provider as DEFAULT_DATA_SOURCE_NAME for the session catalog. + assert(t2.v1Table.provider == Some(conf.defaultDataSourceName)) + } } test("CreateTable/RepalceTable: invalid schema if has interval type") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 20cad721d3d0e..33515ad41e918 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1588,7 +1588,7 @@ class PlanResolutionSuite extends AnalysisTest { .add("b", StringType) ) ) - compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + + compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile " + "PARTITIONED BY (c INT, d STRING COMMENT 'test2')", createTable( table = "my_tab", @@ -1616,7 +1616,7 @@ class PlanResolutionSuite extends AnalysisTest { ) // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze // rule in `AnalyzeCreateTable`. - compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " + + compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile " + "PARTITIONED BY (nested STRUCT)", createTable( table = "my_tab", @@ -1890,7 +1890,7 @@ class PlanResolutionSuite extends AnalysisTest { } test("Test CTAS #3") { - val s3 = """CREATE TABLE page_view AS SELECT * FROM src""" + val s3 = """CREATE TABLE page_view STORED AS textfile AS SELECT * FROM src""" val (desc, exists) = extractTableDesc(s3) assert(exists == false) assert(desc.identifier.database == Some("default")) diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 462206d8c546f..4ce1964a19bd9 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -40,6 +40,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone + private val originalCreateHiveTable = + TestHive.conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT) def testCases: Seq[(String, File)] = { hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) @@ -59,6 +61,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests // (timestamp_*) TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, "America/Los_Angeles") + TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, true) RuleExecutor.resetMetrics() } @@ -69,6 +72,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone) + TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, originalCreateHiveTable) // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala index 3e7c3e6799724..2fb67c793dc6a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -21,10 +21,26 @@ import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.internal.HiveSerDe +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton { + private var origCreateHiveTableConfig = false + + protected override def beforeAll(): Unit = { + super.beforeAll() + origCreateHiveTableConfig = + spark.conf.get(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT) + spark.conf.set(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key, true) + } + + protected override def afterAll(): Unit = { + spark.conf.set( + SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key, + origCreateHiveTableConfig) + super.afterAll() + } + test("view") { Seq(true, false).foreach { serde => withView("v1") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index ebc6cfb77d355..71750e6b3a516 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -277,7 +277,8 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter test("Test partition mode = strict") { withSQLConf(("hive.exec.dynamic.partition.mode", "strict")) { withTable("partitioned") { - sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY (part string)") + sql("CREATE TABLE partitioned (id bigint, data string) USING hive " + + "PARTITIONED BY (part string)") val data = (1 to 10).map(i => (i, s"data-$i", if ((i % 2) == 0) "even" else "odd")) .toDF("id", "data", "part") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala index 483622b16762a..cec6ec1ee1275 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala @@ -38,7 +38,7 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingl testData.createOrReplaceTempView("testData") // create the table for test - sql(s"CREATE TABLE table_with_partition(key int,value string) " + + sql(s"CREATE TABLE table_with_partition(key int,value string) USING hive " + s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") @@ -81,7 +81,8 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingl test("SPARK-21739: Cast expression should initialize timezoneId") { withTable("table_with_timestamp_partition") { - sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)") + sql("CREATE TABLE table_with_timestamp_partition(value int) USING hive " + + "PARTITIONED BY (ts TIMESTAMP)") sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " + "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 43d1ba04c561d..2ea98943011f4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -165,7 +165,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto // Partitioned table val partTable = "part_table" withTable(partTable) { - sql(s"CREATE TABLE $partTable (key STRING, value STRING) PARTITIONED BY (ds STRING)") + sql(s"CREATE TABLE $partTable (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING)") sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-01') SELECT * FROM src") sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-02') SELECT * FROM src") sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-03') SELECT * FROM src") @@ -191,7 +192,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION.key -> "True") { val checkSizeTable = "checkSizeTable" withTable(checkSizeTable) { - sql(s"CREATE TABLE $checkSizeTable (key STRING, value STRING) PARTITIONED BY (ds STRING)") + sql(s"CREATE TABLE $checkSizeTable (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING)") sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-01') SELECT * FROM src") sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-02') SELECT * FROM src") sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-03') SELECT * FROM src") @@ -274,7 +276,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto test("SPARK-22745 - read Hive's statistics for partition") { val tableName = "hive_stats_part_table" withTable(tableName) { - sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)") + sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING)") sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2017-01-01') SELECT * FROM src") var partition = spark.sessionState.catalog .getPartition(TableIdentifier(tableName), Map("ds" -> "2017-01-01")) @@ -296,7 +299,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto val tableName = "analyzeTable_part" withTable(tableName) { withTempPath { path => - sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)") + sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING)") val partitionDates = List("2010-01-01", "2010-01-02", "2010-01-03") partitionDates.foreach { ds => @@ -321,6 +325,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql( s""" |CREATE TABLE $sourceTableName (key STRING, value STRING) + |USING hive |PARTITIONED BY (ds STRING) |LOCATION '${path.toURI}' """.stripMargin) @@ -338,6 +343,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql( s""" |CREATE TABLE $tableName (key STRING, value STRING) + |USING hive |PARTITIONED BY (ds STRING) |LOCATION '${path.toURI}' """.stripMargin) @@ -371,7 +377,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } withTable(tableName) { - sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)") + sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING)") createPartition("2010-01-01", "SELECT '1', 'A' from src") createPartition("2010-01-02", "SELECT '1', 'A' from src UNION ALL SELECT '1', 'A' from src") @@ -424,7 +431,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } withTable(tableName) { - sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING, hr INT)") + sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING, hr INT)") createPartition("2010-01-01", 10, "SELECT '1', 'A' from src") createPartition("2010-01-01", 11, "SELECT '1', 'A' from src") @@ -472,7 +480,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } withTable(tableName) { - sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING, hr INT)") + sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " + + "PARTITIONED BY (ds STRING, hr INT)") createPartition("2010-01-01", 10, "SELECT '1', 'A' from src") createPartition("2010-01-01", 11, "SELECT '1', 'A' from src") @@ -961,7 +970,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(false, true).foreach { autoUpdate => withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> autoUpdate.toString) { withTable(table) { - sql(s"CREATE TABLE $table (i INT, j STRING) PARTITIONED BY (ds STRING, hr STRING)") + sql(s"CREATE TABLE $table (i INT, j STRING) USING hive " + + "PARTITIONED BY (ds STRING, hr STRING)") // table has two partitions initially for (ds <- Seq("2008-04-08"); hr <- Seq("11", "12")) { sql(s"INSERT OVERWRITE TABLE $table PARTITION (ds='$ds',hr='$hr') SELECT 1, 'a'") @@ -1034,6 +1044,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql( s""" |CREATE TABLE $managedTable (key INT, value STRING) + |USING hive |PARTITIONED BY (ds STRING, hr STRING) """.stripMargin) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index d9ba6dd80e4ef..684529aa330a7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -798,6 +798,7 @@ class VersionsSuite extends SparkFunSuite with Logging { versionSpark.sql( """ |CREATE TABLE tbl(c1 string) + |USING hive |PARTITIONED BY (ds STRING) """.stripMargin) versionSpark.sql("INSERT OVERWRITE TABLE tbl partition (ds='2') SELECT '1'") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 2dfb8bb552594..ce31e39985971 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -983,7 +983,7 @@ class HiveDDLSuite } test("alter table partition - storage information") { - sql("CREATE TABLE boxes (height INT, length INT) PARTITIONED BY (width INT)") + sql("CREATE TABLE boxes (height INT, length INT) STORED AS textfile PARTITIONED BY (width INT)") sql("INSERT OVERWRITE TABLE boxes PARTITION (width=4) SELECT 4, 4") val catalog = spark.sessionState.catalog val expectedSerde = "com.sparkbricks.serde.ColumnarSerDe" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index f723c9f80c2ab..d7129bcb37e69 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -88,7 +88,7 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte test("Test the default fileformat for Hive-serde tables") { withSQLConf("hive.default.fileformat" -> "orc") { val (desc, exists) = extractTableDesc( - "CREATE TABLE IF NOT EXISTS fileformat_test (id int)") + "CREATE TABLE IF NOT EXISTS fileformat_test (id int) USING hive") assert(exists) assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")) assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")) @@ -96,7 +96,8 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte } withSQLConf("hive.default.fileformat" -> "parquet") { - val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)") + val (desc, exists) = extractTableDesc( + "CREATE TABLE IF NOT EXISTS fileformat_test (id int) USING hive") assert(exists) val input = desc.storage.inputFormat val output = desc.storage.outputFormat diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 4a50621d89d4e..5b43f82f253ea 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -113,6 +113,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH sql( s""" |CREATE TABLE $table(id string) + |USING hive |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) """.stripMargin) sql( @@ -157,6 +158,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH sql( s""" |CREATE TABLE $table(id string) + |USING hive |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) """.stripMargin) sql( @@ -182,6 +184,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH sql( s""" |CREATE TABLE $table (id int) + |USING hive |PARTITIONED BY (a int, b int) """.stripMargin) val scan1 = getHiveTableScanExec(s"SELECT * FROM $table WHERE a = 1 AND b = 2") @@ -252,7 +255,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH test("SPARK-32069: Improve error message on reading unexpected directory") { withTable("t") { withTempDir { f => - sql(s"CREATE TABLE t(i LONG) LOCATION '${f.getAbsolutePath}'") + sql(s"CREATE TABLE t(i LONG) USING hive LOCATION '${f.getAbsolutePath}'") sql("INSERT INTO t VALUES(1)") val dir = new File(f.getCanonicalPath + "/data") dir.mkdir() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 79b3c3efe531c..6b82b1267bc66 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2026,6 +2026,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi sql( """ |CREATE TABLE part_table (c STRING) + |STORED AS textfile |PARTITIONED BY (d STRING) """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$path/part-r-000011' " + diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index a25c61c96f3d8..e996f2c6ec78f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -327,20 +327,22 @@ private[hive] class TestHiveSparkSession( } if (loadTestTables) { + def createTableSQL(tblName: String): String = { + s"CREATE TABLE $tblName (key INT, value STRING) STORED AS textfile" + } // The test tables that are defined in the Hive QTestUtil. // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java // https://github.com/apache/hive/blob/branch-0.13/data/scripts/q_test_init.sql @transient val hiveQTestUtilTables: Seq[TestTable] = Seq( TestTable("src", - "CREATE TABLE src (key INT, value STRING) STORED AS TEXTFILE".cmd, + createTableSQL("src").cmd, s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd), TestTable("src1", - "CREATE TABLE src1 (key INT, value STRING) STORED AS TEXTFILE".cmd, + createTableSQL("src1").cmd, s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd), TestTable("srcpart", () => { - "CREATE TABLE srcpart (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)" - .cmd.apply() + s"${createTableSQL("srcpart")} PARTITIONED BY (ds STRING, hr STRING)".cmd.apply() for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { s""" |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' @@ -349,8 +351,7 @@ private[hive] class TestHiveSparkSession( } }), TestTable("srcpart1", () => { - "CREATE TABLE srcpart1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr INT)" - .cmd.apply() + s"${createTableSQL("srcpart1")} PARTITIONED BY (ds STRING, hr INT)".cmd.apply() for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- 11 to 12) { s""" |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' From bd711863fdcdde21a7d64de8a9b6b7a8bf7c19ec Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Fri, 4 Dec 2020 01:37:44 +0900 Subject: [PATCH 0652/1009] [SPARK-33629][PYTHON] Make spark.buffer.size configuration visible on driver side ### What changes were proposed in this pull request? `spark.buffer.size` not applied in driver from pyspark. In this PR I've fixed this issue. ### Why are the changes needed? Apply the mentioned config on driver side. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests + manually. Added the following code temporarily: ``` def local_connect_and_auth(port, auth_secret): ... sock.connect(sa) print("SPARK_BUFFER_SIZE: %d" % int(os.environ.get("SPARK_BUFFER_SIZE", 65536))) <- This is the addition sockfile = sock.makefile("rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536))) ... ``` Test: ``` #Compile Spark echo "spark.buffer.size 10000" >> conf/spark-defaults.conf $ ./bin/pyspark Python 3.8.5 (default, Jul 21 2020, 10:48:26) [Clang 11.0.3 (clang-1103.0.32.62)] on darwin Type "help", "copyright", "credits" or "license" for more information. 20/12/03 13:38:13 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 20/12/03 13:38:14 WARN SparkEnv: I/O encryption enabled without RPC encryption: keys will be visible on the wire. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /__ / .__/\_,_/_/ /_/\_\ version 3.1.0-SNAPSHOT /_/ Using Python version 3.8.5 (default, Jul 21 2020 10:48:26) Spark context Web UI available at http://192.168.0.189:4040 Spark context available as 'sc' (master = local[*], app id = local-1606999094506). SparkSession available as 'spark'. >>> sc.setLogLevel("TRACE") >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect() ... SPARK_BUFFER_SIZE: 10000 ... [[0], [2], [3], [4], [6]] >>> ``` Closes #30592 from gaborgsomogyi/SPARK-33629. Authored-by: Gabor Somogyi Signed-off-by: HyukjinKwon --- .../main/scala/org/apache/spark/api/python/PythonUtils.scala | 4 ++++ python/pyspark/context.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 33849f6fcb65f..2f47d28f09103 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -89,4 +89,8 @@ private[spark] object PythonUtils { def getPythonAuthSocketTimeout(sc: JavaSparkContext): Long = { sc.conf.get(org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT) } + + def getSparkBufferSize(sc: JavaSparkContext): Int = { + sc.conf.get(org.apache.spark.internal.config.BUFFER_SIZE) + } } diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 1c542fa897ece..3da535b026137 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -224,6 +224,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, self._encryption_enabled = self._jvm.PythonUtils.isEncryptionEnabled(self._jsc) os.environ["SPARK_AUTH_SOCKET_TIMEOUT"] = \ str(self._jvm.PythonUtils.getPythonAuthSocketTimeout(self._jsc)) + os.environ["SPARK_BUFFER_SIZE"] = \ + str(self._jvm.PythonUtils.getSparkBufferSize(self._jsc)) self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python') self.pythonVer = "%d.%d" % sys.version_info[:2] From aa13e207c9091e24aae1edcf3bb5cd35d3a27cbb Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Thu, 3 Dec 2020 09:12:30 -0800 Subject: [PATCH 0653/1009] [SPARK-33623][SQL] Add canDeleteWhere to SupportsDelete ### What changes were proposed in this pull request? This PR provides us with a way to check if a data source is going to reject the delete via `deleteWhere` at planning time. ### Why are the changes needed? The only way to support delete statements right now is to implement ``SupportsDelete``. According to its Javadoc, that interface is meant for cases when we can delete data without much effort (e.g. like deleting a complete partition in a Hive table). This PR actually provides us with a way to check if a data source is going to reject the delete via `deleteWhere` at planning time instead of just getting an exception during execution. In the future, we can use this functionality to decide whether Spark should rewrite this delete and execute a distributed query or it can just pass a set of filters. Consider an example of a partitioned Hive table. If we have a delete predicate like `part_col = '2020'`, we can just drop the matching partition to satisfy this delete. In this case, the data source should return `true` from `canDeleteWhere` and use the filters it accepts in `deleteWhere` to drop the partition. I consider this as a delete without significant effort. At the same time, if we have a delete predicate like `id = 10`, Hive tables would not be able to execute this delete using a metadata only operation without rewriting files. In that case, the data source should return `false` from `canDeleteWhere` and we should use a more sophisticated row-level API to find out which records should be removed (the API is yet to be discussed, but we need this PR as a basis). If we decide to support subqueries and all delete use cases by simply extending the existing API, this will mean all data sources will have to implement a lot of Spark logic to determine which records changed. I don't think we want to go that way as the Spark logic to determine which records should be deleted is independent of the underlying data source. So the assumption is that Spark will execute a plan to find which records must be deleted for data sources that return `false` from `canDeleteWhere`. ### Does this PR introduce _any_ user-facing change? Yes but it is backward compatible. ### How was this patch tested? This PR comes with a new test. Closes #30562 from aokolnychyi/spark-33623. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../sql/connector/catalog/SupportsDelete.java | 24 ++++++++++++++++++- .../spark/sql/connector/InMemoryTable.scala | 12 ++++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 6 +++++ .../sql/connector/DataSourceV2SQLSuite.scala | 14 +++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java index 106f3283a62c8..261e5344be7b9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java @@ -28,8 +28,30 @@ */ @Evolving public interface SupportsDelete { + + /** + * Checks whether it is possible to delete data from a data source table that matches filter + * expressions. + *

      + * Rows should be deleted from the data source iff all of the filter expressions match. + * That is, the expressions must be interpreted as a set of filters that are ANDed together. + *

      + * Spark will call this method at planning time to check whether {@link #deleteWhere(Filter[])} + * would reject the delete operation because it requires significant effort. If this method + * returns false, Spark will not call {@link #deleteWhere(Filter[])} and will try to rewrite + * the delete operation and produce row-level changes if the data source table supports deleting + * individual records. + * + * @param filters filter expressions, used to select rows to delete when all expressions match + * @return true if the delete operation can be performed + */ + default boolean canDeleteWhere(Filter[] filters) { + return true; + } + /** - * Delete data from a data source table that matches filter expressions. + * Delete data from a data source table that matches filter expressions. Note that this method + * will be invoked only if {@link #canDeleteWhere(Filter[])} returns true. *

      * Rows are deleted from the data source iff all of the filter expressions match. That is, the * expressions must be interpreted as a set of filters that are ANDed together. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index cfb044b428e41..c4c5835d9d1f5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -335,6 +335,10 @@ class InMemoryTable( } } + override def canDeleteWhere(filters: Array[Filter]): Boolean = { + InMemoryTable.supportsFilters(filters) + } + override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters) @@ -360,6 +364,14 @@ object InMemoryTable { } } + def supportsFilters(filters: Array[Filter]): Boolean = { + filters.flatMap(splitAnd).forall { + case _: EqualTo => true + case _: IsNotNull => true + case _ => false + } + } + private def extractValue( attr: String, partFieldNames: Seq[String], diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 0c7bc19ad054e..938ba77fede47 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -221,6 +221,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException(s"Exec update failed:" + s" cannot translate expression to source filter: $f")) }).toArray + + if (!table.asDeletable.canDeleteWhere(filters)) { + throw new AnalysisException( + s"Cannot delete from table ${table.name} where ${filters.mkString("[", ", ", "]")}") + } + DeleteFromTableExec(table.asDeletable, filters) :: Nil case _ => throw new AnalysisException("DELETE is only supported with v2 tables.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 7635590ab462e..6ef4fd1372a78 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1812,6 +1812,20 @@ class DataSourceV2SQLSuite } } + test("DeleteFrom: delete with unsupported predicates") { + val t = "testcat.ns1.ns2.tbl" + withTable(t) { + sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo") + sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)") + val exc = intercept[AnalysisException] { + sql(s"DELETE FROM $t WHERE id > 3 AND p > 3") + } + + assert(spark.table(t).count === 3) + assert(exc.getMessage.contains(s"Cannot delete from table $t")) + } + } + test("DeleteFrom: DELETE is only supported with v2 tables") { // unset this config to use the default v2 session catalog. spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key) From 63f9d474b9ec4b66741fcca1d3c3865c32936a85 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 3 Dec 2020 09:22:53 -0800 Subject: [PATCH 0654/1009] [SPARK-33634][SQL][TESTS] Use Analyzer in PlanResolutionSuite ### What changes were proposed in this pull request? Instead of using several analyzer rules, this PR uses the actual analyzer to run tests in `PlanResolutionSuite`. ### Why are the changes needed? Make the test suite to match reality. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? test-only Closes #30574 from cloud-fan/test. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../command/PlanResolutionSuite.scala | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 33515ad41e918..9b7222da55368 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -26,14 +26,16 @@ import org.mockito.invocation.InvocationOnMock import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, EmptyFunctionRegistry, NoSuchTableException, ResolvedTable, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} -import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable} +import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, AppendData, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable} +import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.FakeV2Provider import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.catalog.TableChange.{UpdateColumnComment, UpdateColumnType} +import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -49,6 +51,7 @@ class PlanResolutionSuite extends AnalysisTest { private val table: Table = { val t = mock(classOf[Table]) when(t.schema()).thenReturn(new StructType().add("i", "int").add("s", "string")) + when(t.partitioning()).thenReturn(Array.empty[Transform]) t } @@ -151,22 +154,12 @@ class PlanResolutionSuite extends AnalysisTest { } else { catalogManagerWithoutDefault } - val analyzer = new Analyzer(catalogManager) - // TODO: run the analyzer directly. - val rules = Seq( - CTESubstitution, - ResolveInlineTables, - analyzer.ResolveRelations, - new ResolveCatalogs(catalogManager), - new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false), - analyzer.ResolveTables, - analyzer.ResolveReferences, - analyzer.ResolveSubqueryColumnAliases, - analyzer.ResolveReferences, - analyzer.ResolveAlterTableChanges) - rules.foldLeft(parsePlan(query)) { - case (plan, rule) => rule.apply(plan) + val analyzer = new Analyzer(catalogManager) { + override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq( + new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false)) } + // We don't check analysis here, as we expect the plan to be unresolved such as `CreateTable`. + analyzer.execute(CatalystSqlParser.parsePlan(query)) } private def parseResolveCompare(query: String, expected: LogicalPlan): Unit = @@ -1156,9 +1149,9 @@ class PlanResolutionSuite extends AnalysisTest { ("ALTER TABLE testcat.tab ALTER COLUMN i TYPE bigint", false), ("ALTER TABLE tab ALTER COLUMN i TYPE bigint", false), (s"ALTER TABLE $v2SessionCatalogTable ALTER COLUMN i TYPE bigint", true), - ("INSERT INTO TABLE tab VALUES (1)", false), - ("INSERT INTO TABLE testcat.tab VALUES (1)", false), - (s"INSERT INTO TABLE $v2SessionCatalogTable VALUES (1)", true), + ("INSERT INTO TABLE tab VALUES (1, 'a')", false), + ("INSERT INTO TABLE testcat.tab VALUES (1, 'a')", false), + (s"INSERT INTO TABLE $v2SessionCatalogTable VALUES (1, 'a')", true), ("DESC TABLE tab", false), ("DESC TABLE testcat.tab", false), (s"DESC TABLE $v2SessionCatalogTable", true), @@ -1183,7 +1176,7 @@ class PlanResolutionSuite extends AnalysisTest { case Project(_, AsDataSourceV2Relation(r)) => assert(r.catalog.exists(_ == catlogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) - case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _) => + case AppendData(r: DataSourceV2Relation, _, _, _) => assert(r.catalog.exists(_ == catlogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case DescribeRelation(r: ResolvedTable, _, _) => From 7e759b2d95eb3592d62ec010297c39384173a93c Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Fri, 4 Dec 2020 08:35:50 +0800 Subject: [PATCH 0655/1009] [SPARK-33520][ML][PYSPARK] make CrossValidator/TrainValidateSplit/OneVsRest Reader/Writer support Python backend estimator/evaluator ### What changes were proposed in this pull request? make CrossValidator/TrainValidateSplit/OneVsRest Reader/Writer support Python backend estimator/model ### Why are the changes needed? Currently, pyspark support third-party library to define python backend estimator/evaluator, i.e., estimator that inherit `Estimator` instead of `JavaEstimator`, and only can be used in pyspark. CrossValidator and TrainValidateSplit support tuning these python backend estimator, but cannot support saving/load, becase CrossValidator and TrainValidateSplit writer implementation is use JavaMLWriter, which require to convert nested estimator and evaluator into java instance. OneVsRest saving/load now only support java backend classifier due to similar issue. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30471 from WeichenXu123/support_pyio_tuning. Authored-by: Weichen Xu Signed-off-by: Weichen Xu --- python/pyspark/ml/classification.py | 128 ++++++- python/pyspark/ml/classification.pyi | 31 +- python/pyspark/ml/tests/test_persistence.py | 14 +- python/pyspark/ml/tests/test_tuning.py | 97 ++++-- python/pyspark/ml/tuning.py | 357 +++++++++++++++++++- python/pyspark/ml/tuning.pyi | 40 +++ python/pyspark/ml/util.py | 42 ++- python/pyspark/ml/util.pyi | 2 + python/pyspark/testing/mlutils.py | 87 +++++ 9 files changed, 739 insertions(+), 59 deletions(-) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 763038ede876a..0553a61c6c771 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -15,6 +15,7 @@ # limitations under the License. # +import os import operator import sys import uuid @@ -33,7 +34,9 @@ _HasVarianceImpurity, _TreeClassifierParams from pyspark.ml.regression import _FactorizationMachinesParams, DecisionTreeRegressionModel from pyspark.ml.base import _PredictorParams -from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary +from pyspark.ml.util import DefaultParamsReader, DefaultParamsWriter, \ + JavaMLReadable, JavaMLReader, JavaMLWritable, JavaMLWriter, \ + MLReader, MLReadable, MLWriter, MLWritable, HasTrainingSummary from pyspark.ml.wrapper import JavaParams, \ JavaPredictor, JavaPredictionModel, JavaWrapper from pyspark.ml.common import inherit_doc @@ -2760,7 +2763,7 @@ def getClassifier(self): @inherit_doc -class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable): +class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, MLReadable, MLWritable): """ Reduction of Multiclass Classification to Binary Classification. Performs reduction using one against all strategy. @@ -2991,8 +2994,73 @@ def _to_java(self): _java_obj.setRawPredictionCol(self.getRawPredictionCol()) return _java_obj + @classmethod + def read(cls): + return OneVsRestReader(cls) + + def write(self): + if isinstance(self.getClassifier(), JavaMLWritable): + return JavaMLWriter(self) + else: + return OneVsRestWriter(self) + + +class _OneVsRestSharedReadWrite: + @staticmethod + def saveImpl(instance, sc, path, extraMetadata=None): + skipParams = ['classifier'] + jsonParams = DefaultParamsWriter.extractJsonParams(instance, skipParams) + DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap=jsonParams, + extraMetadata=extraMetadata) + classifierPath = os.path.join(path, 'classifier') + instance.getClassifier().save(classifierPath) + + @staticmethod + def loadClassifier(path, sc): + classifierPath = os.path.join(path, 'classifier') + return DefaultParamsReader.loadParamsInstance(classifierPath, sc) + + @staticmethod + def validateParams(instance): + elems_to_check = [instance.getClassifier()] + if isinstance(instance, OneVsRestModel): + elems_to_check.extend(instance.models) + + for elem in elems_to_check: + if not isinstance(elem, MLWritable): + raise ValueError(f'OneVsRest write will fail because it contains {elem.uid} ' + f'which is not writable.') + + +@inherit_doc +class OneVsRestReader(MLReader): + def __init__(self, cls): + super(OneVsRestReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc) + ova = OneVsRest(classifier=classifier)._resetUid(metadata['uid']) + DefaultParamsReader.getAndSetParams(ova, metadata, skipParams=['classifier']) + return ova + + +@inherit_doc +class OneVsRestWriter(MLWriter): + def __init__(self, instance): + super(OneVsRestWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _OneVsRestSharedReadWrite.validateParams(self.instance) + _OneVsRestSharedReadWrite.saveImpl(self.instance, self.sc, path) -class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable, JavaMLWritable): + +class OneVsRestModel(Model, _OneVsRestParams, MLReadable, MLWritable): """ Model fitted by OneVsRest. This stores the models resulting from training k binary classifiers: one for each class. @@ -3023,6 +3091,9 @@ def setRawPredictionCol(self, value): def __init__(self, models): super(OneVsRestModel, self).__init__() self.models = models + if not isinstance(models[0], JavaMLWritable): + return + # set java instance java_models = [model._to_java() for model in self.models] sc = SparkContext._active_spark_context java_models_array = JavaWrapper._new_java_array(java_models, @@ -3160,6 +3231,57 @@ def _to_java(self): _java_obj.set("weightCol", self.getWeightCol()) return _java_obj + @classmethod + def read(cls): + return OneVsRestModelReader(cls) + + def write(self): + if all(map(lambda elem: isinstance(elem, JavaMLWritable), + [self.getClassifier()] + self.models)): + return JavaMLWriter(self) + else: + return OneVsRestModelWriter(self) + + +@inherit_doc +class OneVsRestModelReader(MLReader): + def __init__(self, cls): + super(OneVsRestModelReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc) + numClasses = metadata['numClasses'] + subModels = [None] * numClasses + for idx in range(numClasses): + subModelPath = os.path.join(path, f'model_{idx}') + subModels[idx] = DefaultParamsReader.loadParamsInstance(subModelPath, self.sc) + ovaModel = OneVsRestModel(subModels)._resetUid(metadata['uid']) + ovaModel.set(ovaModel.classifier, classifier) + DefaultParamsReader.getAndSetParams(ovaModel, metadata, skipParams=['classifier']) + return ovaModel + + +@inherit_doc +class OneVsRestModelWriter(MLWriter): + def __init__(self, instance): + super(OneVsRestModelWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _OneVsRestSharedReadWrite.validateParams(self.instance) + instance = self.instance + numClasses = len(instance.models) + extraMetadata = {'numClasses': numClasses} + _OneVsRestSharedReadWrite.saveImpl(instance, self.sc, path, extraMetadata=extraMetadata) + for idx in range(numClasses): + subModelPath = os.path.join(path, f'model_{idx}') + instance.models[idx].save(subModelPath) + @inherit_doc class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, JavaMLWritable, diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi index c44176a13a69b..a4a3d21018ad9 100644 --- a/python/pyspark/ml/classification.pyi +++ b/python/pyspark/ml/classification.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Any, List, Optional +from typing import Any, List, Optional, Type from pyspark.ml._typing import JM, M, P, T, ParamMap import abc @@ -53,7 +53,8 @@ from pyspark.ml.tree import ( _TreeClassifierParams, _TreeEnsembleModel, ) -from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable +from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable, \ + MLReader, MLReadable, MLWriter, MLWritable from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper from pyspark.ml.linalg import Matrix, Vector @@ -797,8 +798,8 @@ class OneVsRest( Estimator[OneVsRestModel], _OneVsRestParams, HasParallelism, - JavaMLReadable[OneVsRest], - JavaMLWritable, + MLReadable[OneVsRest], + MLWritable, ): def __init__( self, @@ -832,7 +833,7 @@ class OneVsRest( def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ... class OneVsRestModel( - Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable + Model, _OneVsRestParams, MLReadable[OneVsRestModel], MLWritable ): models: List[Transformer] def __init__(self, models: List[Transformer]) -> None: ... @@ -841,6 +842,26 @@ class OneVsRestModel( def setRawPredictionCol(self, value: str) -> OneVsRestModel: ... def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ... +class OneVsRestWriter(MLWriter): + instance: OneVsRest + def __init__(self, instance: OneVsRest) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class OneVsRestReader(MLReader[OneVsRest]): + cls: Type[OneVsRest] + def __init__(self, cls: Type[OneVsRest]) -> None: ... + def load(self, path: str) -> OneVsRest: ... + +class OneVsRestModelWriter(MLWriter): + instance: OneVsRestModel + def __init__(self, instance: OneVsRestModel) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class OneVsRestModelReader(MLReader[OneVsRestModel]): + cls: Type[OneVsRestModel] + def __init__(self, cls: Type[OneVsRestModel]) -> None: ... + def load(self, path: str) -> OneVsRestModel: ... + class FMClassifier( _JavaProbabilisticClassifier[FMClassificationModel], _FactorizationMachinesParams, diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py index 0bbcfcdf50e95..77a6c0309628a 100644 --- a/python/pyspark/ml/tests/test_persistence.py +++ b/python/pyspark/ml/tests/test_persistence.py @@ -237,6 +237,11 @@ def _compare_pipelines(self, m1, m2): self.assertEqual(len(m1.models), len(m2.models)) for x, y in zip(m1.models, m2.models): self._compare_pipelines(x, y) + elif isinstance(m1, Params): + # Test on python backend Estimator/Transformer/Model/Evaluator + self.assertEqual(len(m1.params), len(m2.params)) + for p in m1.params: + self._compare_params(m1, m2, p) else: raise RuntimeError("_compare_pipelines does not yet support type: %s" % type(m1)) @@ -326,14 +331,14 @@ def test_python_transformer_pipeline_persistence(self): except OSError: pass - def test_onevsrest(self): + def _run_test_onevsrest(self, LogisticRegressionCls): temp_path = tempfile.mkdtemp() df = self.spark.createDataFrame([(0.0, 0.5, Vectors.dense(1.0, 0.8)), (1.0, 0.5, Vectors.sparse(2, [], [])), (2.0, 1.0, Vectors.dense(0.5, 0.5))] * 10, ["label", "wt", "features"]) - lr = LogisticRegression(maxIter=5, regParam=0.01) + lr = LogisticRegressionCls(maxIter=5, regParam=0.01) ovr = OneVsRest(classifier=lr) def reload_and_compare(ovr, suffix): @@ -350,6 +355,11 @@ def reload_and_compare(ovr, suffix): reload_and_compare(OneVsRest(classifier=lr), "ovr") reload_and_compare(OneVsRest(classifier=lr).setWeightCol("wt"), "ovrw") + def test_onevsrest(self): + from pyspark.testing.mlutils import DummyLogisticRegression + self._run_test_onevsrest(LogisticRegression) + self._run_test_onevsrest(DummyLogisticRegression) + def test_decisiontree_classifier(self): dt = DecisionTreeClassifier(maxDepth=1) path = tempfile.mkdtemp() diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index ebd7457e4d30a..3cde34facbf9a 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -28,7 +28,8 @@ from pyspark.ml.tuning import CrossValidator, CrossValidatorModel, ParamGridBuilder, \ TrainValidationSplit, TrainValidationSplitModel from pyspark.sql.functions import rand -from pyspark.testing.mlutils import SparkSessionTestCase +from pyspark.testing.mlutils import DummyEvaluator, DummyLogisticRegression, \ + DummyLogisticRegressionModel, SparkSessionTestCase class HasInducedError(Params): @@ -201,7 +202,7 @@ def test_param_grid_type_coercion(self): for v in param.values(): assert(type(v) == float) - def test_save_load_trained_model(self): + def _run_test_save_load_trained_model(self, LogisticRegressionCls, LogisticRegressionModelCls): # This tests saving and loading the trained model only. # Save/load for CrossValidator will be added later: SPARK-13786 temp_path = tempfile.mkdtemp() @@ -212,7 +213,7 @@ def test_save_load_trained_model(self): (Vectors.dense([0.6]), 1.0), (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - lr = LogisticRegression() + lr = LogisticRegressionCls() grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build() evaluator = BinaryClassificationEvaluator() cv = CrossValidator( @@ -228,7 +229,7 @@ def test_save_load_trained_model(self): lrModelPath = temp_path + "/lrModel" lrModel.save(lrModelPath) - loadedLrModel = LogisticRegressionModel.load(lrModelPath) + loadedLrModel = LogisticRegressionModelCls.load(lrModelPath) self.assertEqual(loadedLrModel.uid, lrModel.uid) self.assertEqual(loadedLrModel.intercept, lrModel.intercept) @@ -248,7 +249,12 @@ def test_save_load_trained_model(self): loadedCvModel.isSet(param) for param in loadedCvModel.params )) - def test_save_load_simple_estimator(self): + def test_save_load_trained_model(self): + self._run_test_save_load_trained_model(LogisticRegression, LogisticRegressionModel) + self._run_test_save_load_trained_model(DummyLogisticRegression, + DummyLogisticRegressionModel) + + def _run_test_save_load_simple_estimator(self, LogisticRegressionCls, evaluatorCls): temp_path = tempfile.mkdtemp() dataset = self.spark.createDataFrame( [(Vectors.dense([0.0]), 0.0), @@ -258,9 +264,9 @@ def test_save_load_simple_estimator(self): (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - lr = LogisticRegression() + lr = LogisticRegressionCls() grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build() - evaluator = BinaryClassificationEvaluator() + evaluator = evaluatorCls() # test save/load of CrossValidator cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) @@ -278,6 +284,12 @@ def test_save_load_simple_estimator(self): loadedModel = CrossValidatorModel.load(cvModelPath) self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid) + def test_save_load_simple_estimator(self): + self._run_test_save_load_simple_estimator( + LogisticRegression, BinaryClassificationEvaluator) + self._run_test_save_load_simple_estimator( + DummyLogisticRegression, DummyEvaluator) + def test_parallel_evaluation(self): dataset = self.spark.createDataFrame( [(Vectors.dense([0.0]), 0.0), @@ -343,7 +355,7 @@ def checkSubModels(subModels): for j in range(len(grid)): self.assertEqual(cvModel.subModels[i][j].uid, cvModel3.subModels[i][j].uid) - def test_save_load_nested_estimator(self): + def _run_test_save_load_nested_estimator(self, LogisticRegressionCls): temp_path = tempfile.mkdtemp() dataset = self.spark.createDataFrame( [(Vectors.dense([0.0]), 0.0), @@ -353,9 +365,9 @@ def test_save_load_nested_estimator(self): (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - ova = OneVsRest(classifier=LogisticRegression()) - lr1 = LogisticRegression().setMaxIter(100) - lr2 = LogisticRegression().setMaxIter(150) + ova = OneVsRest(classifier=LogisticRegressionCls()) + lr1 = LogisticRegressionCls().setMaxIter(100) + lr2 = LogisticRegressionCls().setMaxIter(150) grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build() evaluator = MulticlassClassificationEvaluator() @@ -385,7 +397,11 @@ def test_save_load_nested_estimator(self): self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid) self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid) - def test_save_load_pipeline_estimator(self): + def test_save_load_nested_estimator(self): + self._run_test_save_load_nested_estimator(LogisticRegression) + self._run_test_save_load_nested_estimator(DummyLogisticRegression) + + def _run_test_save_load_pipeline_estimator(self, LogisticRegressionCls): temp_path = tempfile.mkdtemp() training = self.spark.createDataFrame([ (0, "a b c d e spark", 1.0), @@ -402,9 +418,9 @@ def test_save_load_pipeline_estimator(self): tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") - ova = OneVsRest(classifier=LogisticRegression()) - lr1 = LogisticRegression().setMaxIter(5) - lr2 = LogisticRegression().setMaxIter(10) + ova = OneVsRest(classifier=LogisticRegressionCls()) + lr1 = LogisticRegressionCls().setMaxIter(5) + lr2 = LogisticRegressionCls().setMaxIter(10) pipeline = Pipeline(stages=[tokenizer, hashingTF, ova]) @@ -464,6 +480,10 @@ def test_save_load_pipeline_estimator(self): original_nested_pipeline_model.stages): self.assertEqual(loadedStage.uid, originalStage.uid) + def test_save_load_pipeline_estimator(self): + self._run_test_save_load_pipeline_estimator(LogisticRegression) + self._run_test_save_load_pipeline_estimator(DummyLogisticRegression) + def test_user_specified_folds(self): from pyspark.sql import functions as F @@ -593,7 +613,7 @@ def test_fit_maximize_metric(self): "validationMetrics has the same size of grid parameter") self.assertEqual(1.0, max(validationMetrics)) - def test_save_load_trained_model(self): + def _run_test_save_load_trained_model(self, LogisticRegressionCls, LogisticRegressionModelCls): # This tests saving and loading the trained model only. # Save/load for TrainValidationSplit will be added later: SPARK-13786 temp_path = tempfile.mkdtemp() @@ -604,7 +624,7 @@ def test_save_load_trained_model(self): (Vectors.dense([0.6]), 1.0), (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - lr = LogisticRegression() + lr = LogisticRegressionCls() grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build() evaluator = BinaryClassificationEvaluator() tvs = TrainValidationSplit( @@ -619,7 +639,7 @@ def test_save_load_trained_model(self): lrModelPath = temp_path + "/lrModel" lrModel.save(lrModelPath) - loadedLrModel = LogisticRegressionModel.load(lrModelPath) + loadedLrModel = LogisticRegressionModelCls.load(lrModelPath) self.assertEqual(loadedLrModel.uid, lrModel.uid) self.assertEqual(loadedLrModel.intercept, lrModel.intercept) @@ -636,7 +656,12 @@ def test_save_load_trained_model(self): loadedTvsModel.isSet(param) for param in loadedTvsModel.params )) - def test_save_load_simple_estimator(self): + def test_save_load_trained_model(self): + self._run_test_save_load_trained_model(LogisticRegression, LogisticRegressionModel) + self._run_test_save_load_trained_model(DummyLogisticRegression, + DummyLogisticRegressionModel) + + def _run_test_save_load_simple_estimator(self, LogisticRegressionCls, evaluatorCls): # This tests saving and loading the trained model only. # Save/load for TrainValidationSplit will be added later: SPARK-13786 temp_path = tempfile.mkdtemp() @@ -647,9 +672,9 @@ def test_save_load_simple_estimator(self): (Vectors.dense([0.6]), 1.0), (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - lr = LogisticRegression() + lr = LogisticRegressionCls() grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build() - evaluator = BinaryClassificationEvaluator() + evaluator = evaluatorCls() tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) tvsModel = tvs.fit(dataset) @@ -666,6 +691,12 @@ def test_save_load_simple_estimator(self): loadedModel = TrainValidationSplitModel.load(tvsModelPath) self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid) + def test_save_load_simple_estimator(self): + self._run_test_save_load_simple_estimator( + LogisticRegression, BinaryClassificationEvaluator) + self._run_test_save_load_simple_estimator( + DummyLogisticRegression, DummyEvaluator) + def test_parallel_evaluation(self): dataset = self.spark.createDataFrame( [(Vectors.dense([0.0]), 0.0), @@ -718,7 +749,7 @@ def test_expose_sub_models(self): for i in range(len(grid)): self.assertEqual(tvsModel.subModels[i].uid, tvsModel3.subModels[i].uid) - def test_save_load_nested_estimator(self): + def _run_test_save_load_nested_estimator(self, LogisticRegressionCls): # This tests saving and loading the trained model only. # Save/load for TrainValidationSplit will be added later: SPARK-13786 temp_path = tempfile.mkdtemp() @@ -729,9 +760,9 @@ def test_save_load_nested_estimator(self): (Vectors.dense([0.6]), 1.0), (Vectors.dense([1.0]), 1.0)] * 10, ["features", "label"]) - ova = OneVsRest(classifier=LogisticRegression()) - lr1 = LogisticRegression().setMaxIter(100) - lr2 = LogisticRegression().setMaxIter(150) + ova = OneVsRest(classifier=LogisticRegressionCls()) + lr1 = LogisticRegressionCls().setMaxIter(100) + lr2 = LogisticRegressionCls().setMaxIter(150) grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build() evaluator = MulticlassClassificationEvaluator() @@ -759,7 +790,11 @@ def test_save_load_nested_estimator(self): self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid) self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid) - def test_save_load_pipeline_estimator(self): + def test_save_load_nested_estimator(self): + self._run_test_save_load_nested_estimator(LogisticRegression) + self._run_test_save_load_nested_estimator(DummyLogisticRegression) + + def _run_test_save_load_pipeline_estimator(self, LogisticRegressionCls): temp_path = tempfile.mkdtemp() training = self.spark.createDataFrame([ (0, "a b c d e spark", 1.0), @@ -776,9 +811,9 @@ def test_save_load_pipeline_estimator(self): tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") - ova = OneVsRest(classifier=LogisticRegression()) - lr1 = LogisticRegression().setMaxIter(5) - lr2 = LogisticRegression().setMaxIter(10) + ova = OneVsRest(classifier=LogisticRegressionCls()) + lr1 = LogisticRegressionCls().setMaxIter(5) + lr2 = LogisticRegressionCls().setMaxIter(10) pipeline = Pipeline(stages=[tokenizer, hashingTF, ova]) @@ -836,6 +871,10 @@ def test_save_load_pipeline_estimator(self): original_nested_pipeline_model.stages): self.assertEqual(loadedStage.uid, originalStage.uid) + def test_save_load_pipeline_estimator(self): + self._run_test_save_load_pipeline_estimator(LogisticRegression) + self._run_test_save_load_pipeline_estimator(DummyLogisticRegression) + def test_copy(self): dataset = self.spark.createDataFrame([ (10, 10.0), diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 2b5a9857b0f18..2c083182de470 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -15,6 +15,7 @@ # limitations under the License. # +import os import sys import itertools from multiprocessing.pool import ThreadPool @@ -22,12 +23,13 @@ import numpy as np from pyspark import keyword_only, since, SparkContext -from pyspark.ml import Estimator, Model -from pyspark.ml.common import _py2java, _java2py +from pyspark.ml import Estimator, Transformer, Model +from pyspark.ml.common import inherit_doc, _py2java, _java2py +from pyspark.ml.evaluation import Evaluator from pyspark.ml.param import Params, Param, TypeConverters from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed -from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \ - MetaAlgorithmReadWrite +from pyspark.ml.util import DefaultParamsReader, DefaultParamsWriter, MetaAlgorithmReadWrite, \ + MLReadable, MLReader, MLWritable, MLWriter, JavaMLReader, JavaMLWriter from pyspark.ml.wrapper import JavaParams, JavaEstimator, JavaWrapper from pyspark.sql.functions import col, lit, rand, UserDefinedFunction from pyspark.sql.types import BooleanType @@ -229,6 +231,7 @@ def _to_java_impl(self): class _ValidatorSharedReadWrite: + @staticmethod def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps): pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator) @@ -275,10 +278,8 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps): raise ValueError('Resolve param in estimatorParamMaps failed: ' + javaParam.parent() + '.' + javaParam.name()) javaValue = javaPair.value() - if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(javaValue): - # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class - # and Estimator/Transformer class which implements `_from_java` static method - # (such as OneVsRest, Pipeline class). + if sc._jvm.Class.forName("org.apache.spark.ml.util.DefaultParamsWritable") \ + .isInstance(javaValue): pyValue = JavaParams._from_java(javaValue) else: pyValue = _java2py(sc, javaValue) @@ -286,6 +287,222 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps): pyParamMaps.append(pyParamMap) return pyParamMaps + @staticmethod + def is_java_convertible(instance): + allNestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance.getEstimator()) + evaluator_convertible = isinstance(instance.getEvaluator(), JavaParams) + estimator_convertible = all(map(lambda stage: hasattr(stage, '_to_java'), allNestedStages)) + return estimator_convertible and evaluator_convertible + + @staticmethod + def saveImpl(path, instance, sc, extraMetadata=None): + numParamsNotJson = 0 + jsonEstimatorParamMaps = [] + for paramMap in instance.getEstimatorParamMaps(): + jsonParamMap = [] + for p, v in paramMap.items(): + jsonParam = {'parent': p.parent, 'name': p.name} + if (isinstance(v, Estimator) and not MetaAlgorithmReadWrite.isMetaEstimator(v)) \ + or isinstance(v, Transformer) or isinstance(v, Evaluator): + relative_path = f'epm_{p.name}{numParamsNotJson}' + param_path = os.path.join(path, relative_path) + numParamsNotJson += 1 + v.save(param_path) + jsonParam['value'] = relative_path + jsonParam['isJson'] = False + elif isinstance(v, MLWritable): + raise RuntimeError( + "ValidatorSharedReadWrite.saveImpl does not handle parameters of type: " + "MLWritable that are not Estimaor/Evaluator/Transformer, and if parameter " + "is estimator, it cannot be meta estimator such as Validator or OneVsRest") + else: + jsonParam['value'] = v + jsonParam['isJson'] = True + jsonParamMap.append(jsonParam) + jsonEstimatorParamMaps.append(jsonParamMap) + + skipParams = ['estimator', 'evaluator', 'estimatorParamMaps'] + jsonParams = DefaultParamsWriter.extractJsonParams(instance, skipParams) + jsonParams['estimatorParamMaps'] = jsonEstimatorParamMaps + + DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, jsonParams) + evaluatorPath = os.path.join(path, 'evaluator') + instance.getEvaluator().save(evaluatorPath) + estimatorPath = os.path.join(path, 'estimator') + instance.getEstimator().save(estimatorPath) + + @staticmethod + def load(path, sc, metadata): + evaluatorPath = os.path.join(path, 'evaluator') + evaluator = DefaultParamsReader.loadParamsInstance(evaluatorPath, sc) + estimatorPath = os.path.join(path, 'estimator') + estimator = DefaultParamsReader.loadParamsInstance(estimatorPath, sc) + + uidToParams = MetaAlgorithmReadWrite.getUidMap(estimator) + uidToParams[evaluator.uid] = evaluator + + jsonEstimatorParamMaps = metadata['paramMap']['estimatorParamMaps'] + + estimatorParamMaps = [] + for jsonParamMap in jsonEstimatorParamMaps: + paramMap = {} + for jsonParam in jsonParamMap: + est = uidToParams[jsonParam['parent']] + param = getattr(est, jsonParam['name']) + if 'isJson' not in jsonParam or ('isJson' in jsonParam and jsonParam['isJson']): + value = jsonParam['value'] + else: + relativePath = jsonParam['value'] + valueSavedPath = os.path.join(path, relativePath) + value = DefaultParamsReader.loadParamsInstance(valueSavedPath, sc) + paramMap[param] = value + estimatorParamMaps.append(paramMap) + + return metadata, estimator, evaluator, estimatorParamMaps + + @staticmethod + def validateParams(instance): + estiamtor = instance.getEstimator() + evaluator = instance.getEvaluator() + uidMap = MetaAlgorithmReadWrite.getUidMap(estiamtor) + + for elem in [evaluator] + list(uidMap.values()): + if not isinstance(elem, MLWritable): + raise ValueError(f'Validator write will fail because it contains {elem.uid} ' + f'which is not writable.') + + estimatorParamMaps = instance.getEstimatorParamMaps() + paramErr = 'Validator save requires all Params in estimatorParamMaps to apply to ' \ + f'its Estimator, An extraneous Param was found: ' + for paramMap in estimatorParamMaps: + for param in paramMap: + if param.parent not in uidMap: + raise ValueError(paramErr + repr(param)) + + @staticmethod + def getValidatorModelWriterPersistSubModelsParam(writer): + if 'persistsubmodels' in writer.optionMap: + persistSubModelsParam = writer.optionMap['persistsubmodels'].lower() + if persistSubModelsParam == 'true': + return True + elif persistSubModelsParam == 'false': + return False + else: + raise ValueError( + f'persistSubModels option value {persistSubModelsParam} is invalid, ' + f"the possible values are True, 'True' or False, 'False'") + else: + return writer.instance.subModels is not None + + +_save_with_persist_submodels_no_submodels_found_err = \ + 'When persisting tuning models, you can only set persistSubModels to true if the tuning ' \ + 'was done with collectSubModels set to true. To save the sub-models, try rerunning fitting ' \ + 'with collectSubModels set to true.' + + +@inherit_doc +class CrossValidatorReader(MLReader): + + def __init__(self, cls): + super(CrossValidatorReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + metadata, estimator, evaluator, estimatorParamMaps = \ + _ValidatorSharedReadWrite.load(path, self.sc, metadata) + cv = CrossValidator(estimator=estimator, + estimatorParamMaps=estimatorParamMaps, + evaluator=evaluator) + cv = cv._resetUid(metadata['uid']) + DefaultParamsReader.getAndSetParams(cv, metadata, skipParams=['estimatorParamMaps']) + return cv + + +@inherit_doc +class CrossValidatorWriter(MLWriter): + + def __init__(self, instance): + super(CrossValidatorWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _ValidatorSharedReadWrite.validateParams(self.instance) + _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc) + + +@inherit_doc +class CrossValidatorModelReader(MLReader): + + def __init__(self, cls): + super(CrossValidatorModelReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + metadata, estimator, evaluator, estimatorParamMaps = \ + _ValidatorSharedReadWrite.load(path, self.sc, metadata) + numFolds = metadata['paramMap']['numFolds'] + bestModelPath = os.path.join(path, 'bestModel') + bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc) + avgMetrics = metadata['avgMetrics'] + persistSubModels = ('persistSubModels' in metadata) and metadata['persistSubModels'] + + if persistSubModels: + subModels = [[None] * len(estimatorParamMaps)] * numFolds + for splitIndex in range(numFolds): + for paramIndex in range(len(estimatorParamMaps)): + modelPath = os.path.join( + path, 'subModels', f'fold{splitIndex}', f'{paramIndex}') + subModels[splitIndex][paramIndex] = \ + DefaultParamsReader.loadParamsInstance(modelPath, self.sc) + else: + subModels = None + + cvModel = CrossValidatorModel(bestModel, avgMetrics=avgMetrics, subModels=subModels) + cvModel = cvModel._resetUid(metadata['uid']) + cvModel.set(cvModel.estimator, estimator) + cvModel.set(cvModel.estimatorParamMaps, estimatorParamMaps) + cvModel.set(cvModel.evaluator, evaluator) + DefaultParamsReader.getAndSetParams( + cvModel, metadata, skipParams=['estimatorParamMaps']) + return cvModel + + +@inherit_doc +class CrossValidatorModelWriter(MLWriter): + + def __init__(self, instance): + super(CrossValidatorModelWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _ValidatorSharedReadWrite.validateParams(self.instance) + instance = self.instance + persistSubModels = _ValidatorSharedReadWrite \ + .getValidatorModelWriterPersistSubModelsParam(self) + extraMetadata = {'avgMetrics': instance.avgMetrics, + 'persistSubModels': persistSubModels} + _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata) + bestModelPath = os.path.join(path, 'bestModel') + instance.bestModel.save(bestModelPath) + if persistSubModels: + if instance.subModels is None: + raise ValueError(_save_with_persist_submodels_no_submodels_found_err) + subModelsPath = os.path.join(path, 'subModels') + for splitIndex in range(instance.getNumFolds()): + splitPath = os.path.join(subModelsPath, f'fold{splitIndex}') + for paramIndex in range(len(instance.getEstimatorParamMaps())): + modelPath = os.path.join(splitPath, f'{paramIndex}') + instance.subModels[splitIndex][paramIndex].save(modelPath) + class _CrossValidatorParams(_ValidatorParams): """ @@ -553,13 +770,15 @@ def copy(self, extra=None): @since("2.3.0") def write(self): """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) + if _ValidatorSharedReadWrite.is_java_convertible(self): + return JavaMLWriter(self) + return CrossValidatorWriter(self) @classmethod @since("2.3.0") def read(cls): """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) + return CrossValidatorReader(cls) @classmethod def _from_java(cls, java_stage): @@ -662,13 +881,15 @@ def copy(self, extra=None): @since("2.3.0") def write(self): """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) + if _ValidatorSharedReadWrite.is_java_convertible(self): + return JavaMLWriter(self) + return CrossValidatorModelWriter(self) @classmethod @since("2.3.0") def read(cls): """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) + return CrossValidatorModelReader(cls) @classmethod def _from_java(cls, java_stage): @@ -738,6 +959,106 @@ def _to_java(self): return _java_obj +@inherit_doc +class TrainValidationSplitReader(MLReader): + + def __init__(self, cls): + super(TrainValidationSplitReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + metadata, estimator, evaluator, estimatorParamMaps = \ + _ValidatorSharedReadWrite.load(path, self.sc, metadata) + tvs = TrainValidationSplit(estimator=estimator, + estimatorParamMaps=estimatorParamMaps, + evaluator=evaluator) + tvs = tvs._resetUid(metadata['uid']) + DefaultParamsReader.getAndSetParams(tvs, metadata, skipParams=['estimatorParamMaps']) + return tvs + + +@inherit_doc +class TrainValidationSplitWriter(MLWriter): + + def __init__(self, instance): + super(TrainValidationSplitWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _ValidatorSharedReadWrite.validateParams(self.instance) + _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc) + + +@inherit_doc +class TrainValidationSplitModelReader(MLReader): + + def __init__(self, cls): + super(TrainValidationSplitModelReader, self).__init__() + self.cls = cls + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + if not DefaultParamsReader.isPythonParamsInstance(metadata): + return JavaMLReader(self.cls).load(path) + else: + metadata, estimator, evaluator, estimatorParamMaps = \ + _ValidatorSharedReadWrite.load(path, self.sc, metadata) + bestModelPath = os.path.join(path, 'bestModel') + bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc) + validationMetrics = metadata['validationMetrics'] + persistSubModels = ('persistSubModels' in metadata) and metadata['persistSubModels'] + + if persistSubModels: + subModels = [None] * len(estimatorParamMaps) + for paramIndex in range(len(estimatorParamMaps)): + modelPath = os.path.join(path, 'subModels', f'{paramIndex}') + subModels[paramIndex] = \ + DefaultParamsReader.loadParamsInstance(modelPath, self.sc) + else: + subModels = None + + tvsModel = TrainValidationSplitModel( + bestModel, validationMetrics=validationMetrics, subModels=subModels) + tvsModel = tvsModel._resetUid(metadata['uid']) + tvsModel.set(tvsModel.estimator, estimator) + tvsModel.set(tvsModel.estimatorParamMaps, estimatorParamMaps) + tvsModel.set(tvsModel.evaluator, evaluator) + DefaultParamsReader.getAndSetParams( + tvsModel, metadata, skipParams=['estimatorParamMaps']) + return tvsModel + + +@inherit_doc +class TrainValidationSplitModelWriter(MLWriter): + + def __init__(self, instance): + super(TrainValidationSplitModelWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + _ValidatorSharedReadWrite.validateParams(self.instance) + instance = self.instance + persistSubModels = _ValidatorSharedReadWrite \ + .getValidatorModelWriterPersistSubModelsParam(self) + + extraMetadata = {'validationMetrics': instance.validationMetrics, + 'persistSubModels': persistSubModels} + _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata) + bestModelPath = os.path.join(path, 'bestModel') + instance.bestModel.save(bestModelPath) + if persistSubModels: + if instance.subModels is None: + raise ValueError(_save_with_persist_submodels_no_submodels_found_err) + subModelsPath = os.path.join(path, 'subModels') + for paramIndex in range(len(instance.getEstimatorParamMaps())): + modelPath = os.path.join(subModelsPath, f'{paramIndex}') + instance.subModels[paramIndex].save(modelPath) + + class _TrainValidationSplitParams(_ValidatorParams): """ Params for :py:class:`TrainValidationSplit` and :py:class:`TrainValidationSplitModel`. @@ -942,13 +1263,15 @@ def copy(self, extra=None): @since("2.3.0") def write(self): """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) + if _ValidatorSharedReadWrite.is_java_convertible(self): + return JavaMLWriter(self) + return TrainValidationSplitWriter(self) @classmethod @since("2.3.0") def read(cls): """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) + return TrainValidationSplitReader(cls) @classmethod def _from_java(cls, java_stage): @@ -1046,13 +1369,15 @@ def copy(self, extra=None): @since("2.3.0") def write(self): """Returns an MLWriter instance for this ML instance.""" - return JavaMLWriter(self) + if _ValidatorSharedReadWrite.is_java_convertible(self): + return JavaMLWriter(self) + return TrainValidationSplitModelWriter(self) @classmethod @since("2.3.0") def read(cls): """Returns an MLReader instance for this class.""" - return JavaMLReader(cls) + return TrainValidationSplitModelReader(cls) @classmethod def _from_java(cls, java_stage): diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi index 63cd75f0e1d74..e5f153d49e9c6 100644 --- a/python/pyspark/ml/tuning.pyi +++ b/python/pyspark/ml/tuning.pyi @@ -183,3 +183,43 @@ class TrainValidationSplitModel( def write(self) -> MLWriter: ... @classmethod def read(cls: Type[TrainValidationSplitModel]) -> MLReader: ... + +class CrossValidatorWriter(MLWriter): + instance: CrossValidator + def __init__(self, instance: CrossValidator) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class CrossValidatorReader(MLReader[CrossValidator]): + cls: Type[CrossValidator] + def __init__(self, cls: Type[CrossValidator]) -> None: ... + def load(self, path: str) -> CrossValidator: ... + +class CrossValidatorModelWriter(MLWriter): + instance: CrossValidatorModel + def __init__(self, instance: CrossValidatorModel) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class CrossValidatorModelReader(MLReader[CrossValidatorModel]): + cls: Type[CrossValidatorModel] + def __init__(self, cls: Type[CrossValidatorModel]) -> None: ... + def load(self, path: str) -> CrossValidatorModel: ... + +class TrainValidationSplitWriter(MLWriter): + instance: TrainValidationSplit + def __init__(self, instance: TrainValidationSplit) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class TrainValidationSplitReader(MLReader[TrainValidationSplit]): + cls: Type[TrainValidationSplit] + def __init__(self, cls: Type[TrainValidationSplit]) -> None: ... + def load(self, path: str) -> TrainValidationSplit: ... + +class TrainValidationSplitModelWriter(MLWriter): + instance: TrainValidationSplitModel + def __init__(self, instance: TrainValidationSplitModel) -> None: ... + def saveImpl(self, path: str) -> None: ... + +class TrainValidationSplitModelReader(MLReader[TrainValidationSplitModel]): + cls: Type[TrainValidationSplitModel] + def __init__(self, cls: Type[TrainValidationSplitModel]) -> None: ... + def load(self, path: str) -> TrainValidationSplitModel: ... diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index a34bfb53482a0..156e7f0fe65e6 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -106,6 +106,7 @@ class MLWriter(BaseReadWrite): def __init__(self): super(MLWriter, self).__init__() self.shouldOverwrite = False + self.optionMap = {} def _handleOverwrite(self, path): from pyspark.ml.wrapper import JavaWrapper @@ -132,6 +133,14 @@ def overwrite(self): self.shouldOverwrite = True return self + def option(self, key, value): + """ + Adds an option to the underlying MLWriter. See the documentation for the specific model's + writer for possible options. The option name (key) is case-insensitive. + """ + self.optionMap[key.lower()] = str(value) + return self + @inherit_doc class GeneralMLWriter(MLWriter): @@ -375,6 +384,13 @@ def __init__(self, instance): def saveImpl(self, path): DefaultParamsWriter.saveMetadata(self.instance, path, self.sc) + @staticmethod + def extractJsonParams(instance, skipParams): + paramMap = instance.extractParamMap() + jsonParams = {param.name: value for param, value in paramMap.items() + if param.name not in skipParams} + return jsonParams + @staticmethod def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None): """ @@ -530,15 +546,16 @@ def _parseMetaData(metadataStr, expectedClassName=""): return metadata @staticmethod - def getAndSetParams(instance, metadata): + def getAndSetParams(instance, metadata, skipParams=None): """ Extract Params from metadata, and set them in the instance. """ # Set user-supplied param values for paramName in metadata['paramMap']: param = instance.getParam(paramName) - paramValue = metadata['paramMap'][paramName] - instance.set(param, paramValue) + if skipParams is None or paramName not in skipParams: + paramValue = metadata['paramMap'][paramName] + instance.set(param, paramValue) # Set default param values majorAndMinorVersions = VersionUtils.majorMinorVersion(metadata['sparkVersion']) @@ -554,6 +571,10 @@ def getAndSetParams(instance, metadata): paramValue = metadata['defaultParamMap'][paramName] instance._setDefault(**{paramName: paramValue}) + @staticmethod + def isPythonParamsInstance(metadata): + return metadata['class'].startswith('pyspark.ml.') + @staticmethod def loadParamsInstance(path, sc): """ @@ -561,7 +582,10 @@ def loadParamsInstance(path, sc): This assumes the instance inherits from :py:class:`MLReadable`. """ metadata = DefaultParamsReader.loadMetadata(path, sc) - pythonClassName = metadata['class'].replace("org.apache.spark", "pyspark") + if DefaultParamsReader.isPythonParamsInstance(metadata): + pythonClassName = metadata['class'] + else: + pythonClassName = metadata['class'].replace("org.apache.spark", "pyspark") py_type = DefaultParamsReader.__get_class(pythonClassName) instance = py_type.load(path) return instance @@ -630,3 +654,13 @@ def getAllNestedStages(pyInstance): nestedStages.extend(MetaAlgorithmReadWrite.getAllNestedStages(pySubStage)) return [pyInstance] + nestedStages + + @staticmethod + def getUidMap(instance): + nestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance) + uidMap = {stage.uid: stage for stage in nestedStages} + if len(nestedStages) != len(uidMap): + raise RuntimeError(f'{instance.__class__.__module__}.{instance.__class__.__name__}' + f'.load found a compound estimator with stages with duplicate ' + f'UIDs. List of UIDs: {list(uidMap.keys())}.') + return uidMap diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi index e2496e181f14f..db28c095a5568 100644 --- a/python/pyspark/ml/util.pyi +++ b/python/pyspark/ml/util.pyi @@ -132,3 +132,5 @@ class MetaAlgorithmReadWrite: def isMetaEstimator(pyInstance: Any) -> bool: ... @staticmethod def getAllNestedStages(pyInstance: Any) -> list: ... + @staticmethod + def getUidMap(instance: Any) -> dict: ... diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py index a90a64e747dea..d6edf9d64af49 100644 --- a/python/pyspark/testing/mlutils.py +++ b/python/pyspark/testing/mlutils.py @@ -17,8 +17,12 @@ import numpy as np +from pyspark import keyword_only from pyspark.ml import Estimator, Model, Transformer, UnaryTransformer +from pyspark.ml.evaluation import Evaluator from pyspark.ml.param import Param, Params, TypeConverters +from pyspark.ml.param.shared import HasMaxIter, HasRegParam +from pyspark.ml.classification import Classifier, ClassificationModel from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable from pyspark.ml.wrapper import _java2py # type: ignore from pyspark.sql import DataFrame, SparkSession @@ -161,3 +165,86 @@ def _fit(self, dataset): class MockModel(MockTransformer, Model, HasFake): pass + + +class _DummyLogisticRegressionParams(HasMaxIter, HasRegParam): + def setMaxIter(self, value): + return self._set(maxIter=value) + + def setRegParam(self, value): + return self._set(regParam=value) + + +# This is a dummy LogisticRegression used in test for python backend estimator/model +class DummyLogisticRegression(Classifier, _DummyLogisticRegressionParams, + DefaultParamsReadable, DefaultParamsWritable): + @keyword_only + def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", + maxIter=100, regParam=0.0, rawPredictionCol="rawPrediction"): + super(DummyLogisticRegression, self).__init__() + kwargs = self._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", + maxIter=100, regParam=0.0, rawPredictionCol="rawPrediction"): + kwargs = self._input_kwargs + self._set(**kwargs) + return self + + def _fit(self, dataset): + # Do nothing but create a dummy model + return self._copyValues(DummyLogisticRegressionModel()) + + +class DummyLogisticRegressionModel(ClassificationModel, _DummyLogisticRegressionParams, + DefaultParamsReadable, DefaultParamsWritable): + + def __init__(self): + super(DummyLogisticRegressionModel, self).__init__() + + def _transform(self, dataset): + # A dummy transform impl which always predict label 1 + from pyspark.sql.functions import array, lit + from pyspark.ml.functions import array_to_vector + rawPredCol = self.getRawPredictionCol() + if rawPredCol: + dataset = dataset.withColumn( + rawPredCol, array_to_vector(array(lit(-100.0), lit(100.0)))) + predCol = self.getPredictionCol() + if predCol: + dataset = dataset.withColumn(predCol, lit(1.0)) + + return dataset + + @property + def numClasses(self): + # a dummy implementation for test. + return 2 + + @property + def intercept(self): + # a dummy implementation for test. + return 0.0 + + # This class only used in test. The following methods/properties are not used in tests. + + @property + def coefficients(self): + raise NotImplementedError() + + def predictRaw(self, value): + raise NotImplementedError() + + def numFeatures(self): + raise NotImplementedError() + + def predict(self, value): + raise NotImplementedError() + + +class DummyEvaluator(Evaluator, DefaultParamsReadable, DefaultParamsWritable): + + def _evaluate(self, dataset): + # a dummy implementation for test. + return 1.0 From 85949588b71ed548a2e10d2e58183d9cce313a48 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 3 Dec 2020 16:43:15 -0800 Subject: [PATCH 0656/1009] [SPARK-33650][SQL] Fix the error from ALTER TABLE .. ADD/DROP PARTITION for non-supported partition management table ### What changes were proposed in this pull request? In the PR, I propose to change the order of post-analysis checks for the `ALTER TABLE .. ADD/DROP PARTITION` command, and perform the general check (does the table support partition management at all) before specific checks. ### Why are the changes needed? The error message for the table which doesn't support partition management can mislead users: ```java PartitionSpecs are not resolved;; 'AlterTableAddPartition [UnresolvedPartitionSpec(Map(id -> 1),None)], false +- ResolvedTable org.apache.spark.sql.connector.InMemoryTableCatalog2fd64b11, ns1.ns2.tbl, org.apache.spark.sql.connector.InMemoryTable5d3ff859 ``` because it says nothing about the root cause of the issue. ### Does this PR introduce _any_ user-facing change? Yes. After the change, the error message will be: ``` Table ns1.ns2.tbl can not alter partitions ``` ### How was this patch tested? By running the affected test suite `AlterTablePartitionV2SQLSuite`. Closes #30594 from MaxGekk/check-order-AlterTablePartition. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/CheckAnalysis.scala | 6 +++--- .../AlterTablePartitionV2SQLSuite.scala | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 61ac6346ff944..64496a953861a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -996,12 +996,12 @@ trait CheckAnalysis extends PredicateHelper { private def checkAlterTablePartition( table: Table, parts: Seq[PartitionSpec]): Unit = { (table, parts) match { - case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) => - failAnalysis("PartitionSpecs are not resolved") - case (table, _) if !table.isInstanceOf[SupportsPartitionManagement] => failAnalysis(s"Table ${table.name()} can not alter partitions.") + case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) => + failAnalysis("PartitionSpecs are not resolved") + // Skip atomic partition tables case (_: SupportsAtomicPartitionManagement, _) => case (_: SupportsPartitionManagement, parts) if parts.size > 1 => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 3583eceec7559..47b5e5e54edde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -245,4 +245,20 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { assert(!partTable.partitionExists(expectedPartition)) } } + + test("SPARK-33650: add/drop partition into a table which doesn't support partition management") { + val t = "testcat.ns1.ns2.tbl" + withTable(t) { + spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _") + Seq( + s"ALTER TABLE $t ADD PARTITION (id=1)", + s"ALTER TABLE $t DROP PARTITION (id=1)" + ).foreach { alterTable => + val errMsg = intercept[AnalysisException] { + spark.sql(alterTable) + }.getMessage + assert(errMsg.contains(s"Table $t can not alter partitions")) + } + } + } } From 29e415deac3c90936dd1466eab6b001b7f1f4959 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 4 Dec 2020 10:58:41 +0800 Subject: [PATCH 0657/1009] [SPARK-33649][SQL][DOC] Improve the doc of spark.sql.ansi.enabled ### What changes were proposed in this pull request? Improve the documentation of SQL configuration `spark.sql.ansi.enabled` ### Why are the changes needed? As there are more and more new features under the SQL configuration `spark.sql.ansi.enabled`, we should make it more clear about: 1. what exactly it is 2. where can users find all the features of the ANSI mode 3. whether all the features are exactly from the SQL standard ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? It's just doc change. Closes #30593 from gengliangwang/reviseAnsiDoc. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 3 ++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 4e19799ca75b9..c13ea2b167d93 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -21,7 +21,8 @@ license: | Since Spark 3.0, Spark SQL introduces two experimental options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details). -When `spark.sql.ansi.enabled` is set to `true`, Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, SQL functions and SQL parsing). +When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. For example, Spark will throw an exception at runtime instead of returning null results if the inputs to a SQL operator/function are invalid. Some ANSI dialect features may be not from the ANSI SQL standard directly, but their behaviors align with ANSI SQL's style. + Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table. The casting behaviours are defined as store assignment rules in the standard. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b32476a5af71a..07cd41b06de21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2209,11 +2209,12 @@ object SQLConf { .createWithDefault(StoreAssignmentPolicy.ANSI.toString) val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") - .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " + - "throw an exception at runtime if the inputs to a SQL operator/function are invalid, " + - "e.g. overflow in arithmetic operations, out-of-range index when accessing array elements. " + - "2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " + - "the SQL parser. 3. Spark will return NULL for null input for function `size`.") + .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + + "For example, Spark will throw an exception at runtime instead of returning null results " + + "when the inputs to a SQL operator/function are invalid." + + "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " + + "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " + + "standard directly, but their behaviors align with ANSI SQL's style") .version("3.0.0") .booleanConf .createWithDefault(false) From e22ddb6740e73a5d1b4ef1ddd21e4241bf85f03c Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 4 Dec 2020 05:43:05 +0000 Subject: [PATCH 0658/1009] [SPARK-32405][SQL][FOLLOWUP] Remove USING _ in CREATE TABLE in JDBCTableCatalog docker tests ### What changes were proposed in this pull request? remove USING _ in CREATE TABLE in JDBCTableCatalog docker tests ### Why are the changes needed? Previously CREATE TABLE syntax forces users to specify a provider so we have to add a USING _ . Now the problem was fix and we need to remove it. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #30599 from huaxingao/remove_USING. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../spark/sql/jdbc/v2/DB2IntegrationSuite.scala | 4 ++-- .../sql/jdbc/v2/MsSqlServerIntegrationSuite.scala | 4 ++-- .../spark/sql/jdbc/v2/MySQLIntegrationSuite.scala | 6 +++--- .../spark/sql/jdbc/v2/OracleIntegrationSuite.scala | 2 +- .../spark/sql/jdbc/v2/PostgresIntegrationSuite.scala | 4 ++-- .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 12 ++++++------ 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 4b6461815d306..6f803b8f61dd4 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -59,7 +59,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { override def dataPreparation(conn: Connection): Unit = {} override def testUpdateColumnType(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + sql(s"CREATE TABLE $tbl (ID INTEGER)") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) assert(t.schema === expectedSchema) @@ -75,7 +75,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { } override def testCreateTableWithProperty(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INT) USING _" + + sql(s"CREATE TABLE $tbl (ID INT)" + s" TBLPROPERTIES('CCSID'='UNICODE')") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index fd101607ad3ee..a7e257dbdc554 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -65,7 +65,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC override def notSupportsTableComment: Boolean = true override def testUpdateColumnType(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + sql(s"CREATE TABLE $tbl (ID INTEGER)") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) assert(t.schema === expectedSchema) @@ -81,7 +81,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC } override def testUpdateColumnNullability(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _") + sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)") // Update nullability is unsupported for mssql db. val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL") diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index a81399fc2a4f7..5f63fde7a0f58 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -67,7 +67,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { } override def testUpdateColumnType(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + sql(s"CREATE TABLE $tbl (ID INTEGER)") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) assert(t.schema === expectedSchema) @@ -98,7 +98,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { } override def testUpdateColumnNullability(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _") + sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)") // Update nullability is unsupported for mysql db. val msg = intercept[AnalysisException] { sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL") @@ -108,7 +108,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { } override def testCreateTableWithProperty(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INT) USING _" + + sql(s"CREATE TABLE $tbl (ID INT)" + s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index 403f16aac6356..241c9c1409550 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -73,7 +73,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest override def dataPreparation(conn: Connection): Unit = {} override def testUpdateColumnType(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + sql(s"CREATE TABLE $tbl (ID INTEGER)") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", DecimalType(10, 0)) assert(t.schema === expectedSchema) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index df2c865e4d13b..a7fd9aa9a9868 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -52,7 +52,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes override def dataPreparation(conn: Connection): Unit = {} override def testUpdateColumnType(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INTEGER) USING _") + sql(s"CREATE TABLE $tbl (ID INTEGER)") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) assert(t.schema === expectedSchema) @@ -68,7 +68,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes } override def testCreateTableWithProperty(tbl: String): Unit = { - sql(s"CREATE TABLE $tbl (ID INT) USING _" + + sql(s"CREATE TABLE $tbl (ID INT)" + s" TBLPROPERTIES('TABLESPACE'='pg_default')") var t = spark.table(tbl) var expectedSchema = new StructType().add("ID", IntegerType) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index e36555e514c9f..a2dd8375834bf 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -33,7 +33,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { def notSupportsTableComment: Boolean = false def testUpdateColumnNullability(tbl: String): Unit = { - sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _") + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)") var t = spark.table(s"$catalogName.alt_table") // nullable is true in the expectedSchema because Spark always sets nullable to true // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445 @@ -62,7 +62,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { test("SPARK-33034: ALTER TABLE ... add new columns") { withTable(s"$catalogName.alt_table") { - sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _") + sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)") var t = spark.table(s"$catalogName.alt_table") var expectedSchema = new StructType().add("ID", StringType) assert(t.schema === expectedSchema) @@ -89,7 +89,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { test("SPARK-33034: ALTER TABLE ... drop column") { withTable(s"$catalogName.alt_table") { - sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER) USING _") + sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)") sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1") sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3") val t = spark.table(s"$catalogName.alt_table") @@ -127,7 +127,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { test("SPARK-33034: ALTER TABLE ... rename column") { withTable(s"$catalogName.alt_table") { sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," + - s" ID1 STRING NOT NULL, ID2 STRING NOT NULL) USING _") + s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)") testRenameColumn(s"$catalogName.alt_table") // Rename to already existing column val msg = intercept[AnalysisException] { @@ -157,7 +157,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { withTable(s"$catalogName.new_table") { val logAppender = new LogAppender("table comment") withLogAppender(logAppender) { - sql(s"CREATE TABLE $catalogName.new_table(i INT) USING _ COMMENT 'this is a comment'") + sql(s"CREATE TABLE $catalogName.new_table(i INT) COMMENT 'this is a comment'") } val createCommentWarning = logAppender.loggingEvents .filter(_.getLevel == Level.WARN) @@ -170,7 +170,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession { test("CREATE TABLE with table property") { withTable(s"$catalogName.new_table") { val m = intercept[AnalysisException] { - sql(s"CREATE TABLE $catalogName.new_table (i INT) USING _ TBLPROPERTIES('a'='1')") + sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')") }.message assert(m.contains("Failed table creation")) testCreateTableWithProperty(s"$catalogName.new_table") From e02324f2dda3510dd229199e97c87ffdcc766a18 Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Fri, 4 Dec 2020 06:48:49 +0000 Subject: [PATCH 0659/1009] [SPARK-33142][SPARK-33647][SQL] Store SQL text for SQL temp view ### What changes were proposed in this pull request? Currently, in spark, the temp view is saved as its analyzed logical plan, while the permanent view is kept in HMS with its origin SQL text. As a result, permanent and temporary views have different behaviors in some cases. In this PR we store the SQL text for temporary view in order to unify the behavior between permanent and temporary views. ### Why are the changes needed? to unify the behavior between permanent and temporary views ### Does this PR introduce _any_ user-facing change? Yes, with this PR, the temporary view will be re-analyzed when it's referred. So if the underlying datasource changed, the view will also be updated. ### How was this patch tested? existing and newly added test cases Closes #30567 from linhongliu-db/SPARK-33142. Authored-by: Linhong Liu Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 43 ++-- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../spark/sql/catalyst/analysis/view.scala | 4 +- .../sql/catalyst/catalog/SessionCatalog.scala | 57 +++-- .../sql/catalyst/catalog/interface.scala | 48 ++++ .../plans/logical/basicLogicalOperators.scala | 42 ++- .../apache/spark/sql/internal/SQLConf.scala | 11 + .../sql/catalyst/analysis/AnalysisSuite.scala | 1 + .../catalog/SessionCatalogSuite.scala | 4 +- .../command/AnalyzeColumnCommand.scala | 5 +- .../spark/sql/execution/command/views.scala | 239 ++++++++++++++---- .../sql-tests/results/describe.sql.out | 4 +- .../sql-tests/results/group-by-filter.sql.out | 56 ++-- .../results/postgreSQL/create_view.sql.out | 28 +- .../results/show-tblproperties.sql.out | 2 + .../invalid-correlation.sql.out | 7 +- .../apache/spark/sql/CachedTableSuite.scala | 22 -- .../spark/sql/execution/SQLViewSuite.scala | 84 ++++++ .../sql/execution/SQLViewTestSuite.scala | 203 +++++++++++++++ .../SparkGetColumnsOperation.scala | 2 +- 20 files changed, 691 insertions(+), 173 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6b06cf13262d4..ebe1004872ef6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -105,7 +105,8 @@ object FakeV2SessionCatalog extends TableCatalog { case class AnalysisContext( catalogAndNamespace: Seq[String] = Nil, nestedViewDepth: Int = 0, - relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty) + relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty, + referredTempViewNames: Seq[Seq[String]] = Seq.empty) object AnalysisContext { private val value = new ThreadLocal[AnalysisContext]() { @@ -117,10 +118,14 @@ object AnalysisContext { private def set(context: AnalysisContext): Unit = value.set(context) - def withAnalysisContext[A](catalogAndNamespace: Seq[String])(f: => A): A = { + def withAnalysisContext[A]( + catalogAndNamespace: Seq[String], referredTempViewNames: Seq[Seq[String]])(f: => A): A = { val originContext = value.get() val context = AnalysisContext( - catalogAndNamespace, originContext.nestedViewDepth + 1, originContext.relationCache) + catalogAndNamespace, + originContext.nestedViewDepth + 1, + originContext.relationCache, + referredTempViewNames) set(context) try f finally { set(originContext) } } @@ -838,6 +843,7 @@ class Analyzer(override val catalogManager: CatalogManager) } private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty + private def referredTempViewNames: Seq[Seq[String]] = AnalysisContext.get.referredTempViewNames /** * Resolve relations to temp views. This is not an actual rule, and is called by @@ -882,7 +888,7 @@ class Analyzer(override val catalogManager: CatalogManager) def lookupTempView( identifier: Seq[String], isStreaming: Boolean = false): Option[LogicalPlan] = { // Permanent View can't refer to temp views, no need to lookup at all. - if (isResolvingView) return None + if (isResolvingView && !referredTempViewNames.contains(identifier)) return None val tmpView = identifier match { case Seq(part1) => v1SessionCatalog.lookupTempView(part1) @@ -894,14 +900,14 @@ class Analyzer(override val catalogManager: CatalogManager) throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " + s"logical plan, please use batch API such as `DataFrameReader.table` to read it.") } - tmpView + tmpView.map(ResolveRelations.resolveViews) } } // If we are resolving relations insides views, we need to expand single-part relation names with // the current catalog and namespace of when the view was created. private def expandRelationName(nameParts: Seq[String]): Seq[String] = { - if (!isResolvingView) return nameParts + if (!isResolvingView || referredTempViewNames.contains(nameParts)) return nameParts if (nameParts.length == 1) { AnalysisContext.get.catalogAndNamespace :+ nameParts.head @@ -1022,23 +1028,24 @@ class Analyzer(override val catalogManager: CatalogManager) // look at `AnalysisContext.catalogAndNamespace` when resolving relations with single-part name. // If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will expand single-part names // with it, instead of current catalog and namespace. - private def resolveViews(plan: LogicalPlan): LogicalPlan = plan match { + def resolveViews(plan: LogicalPlan): LogicalPlan = plan match { // The view's child should be a logical plan parsed from the `desc.viewText`, the variable // `viewText` should be defined, or else we throw an error on the generation of the View // operator. - case view @ View(desc, _, child) if !child.resolved => + case view @ View(desc, isTempView, _, child) if !child.resolved => // Resolve all the UnresolvedRelations and Views in the child. - val newChild = AnalysisContext.withAnalysisContext(desc.viewCatalogAndNamespace) { - if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) { - view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " + - s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " + - s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " + - "around this.") - } - SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs)) { - executeSameContext(child) + val newChild = AnalysisContext.withAnalysisContext( + desc.viewCatalogAndNamespace, desc.viewReferredTempViewNames) { + if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) { + view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " + + s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " + + s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " + + "work around this.") + } + SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) { + executeSameContext(child) + } } - } view.copy(child = newChild) case p @ SubqueryAlias(_, view: View) => p.copy(child = resolveViews(view)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 64496a953861a..11c4883992560 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -407,7 +407,7 @@ trait CheckAnalysis extends PredicateHelper { // output, nor with the query column names, throw an AnalysisException. // If the view's child output can't up cast to the view output, // throw an AnalysisException, too. - case v @ View(desc, output, child) if child.resolved && !v.sameOutput(child) => + case v @ View(desc, _, output, child) if child.resolved && !v.sameOutput(child) => val queryColumnNames = desc.viewQueryColumnNames val queryOutput = if (queryColumnNames.nonEmpty) { if (output.length != queryColumnNames.length) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala index 06de023098a1c..dfadf0a539948 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala @@ -56,7 +56,7 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport { override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // The child has the different output attributes with the View operator. Adds a Project over // the child of the view. - case v @ View(desc, output, child) if child.resolved && !v.sameOutput(child) => + case v @ View(desc, _, output, child) if child.resolved && !v.sameOutput(child) => val resolver = conf.resolver val queryColumnNames = desc.viewQueryColumnNames val queryOutput = if (queryColumnNames.nonEmpty) { @@ -83,7 +83,7 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport { // The child should have the same output attributes with the View operator, so we simply // remove the View operator. - case View(_, _, child) => + case View(_, _, _, child) => child } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 01bce079610ae..29481b85e9f2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -606,7 +606,7 @@ class SessionCatalog( * Return a local temporary view exactly as it was stored. */ def getTempView(name: String): Option[LogicalPlan] = synchronized { - tempViews.get(formatTableName(name)) + tempViews.get(formatTableName(name)).map(getTempViewPlan) } def getTempViewNames(): Seq[String] = synchronized { @@ -617,7 +617,7 @@ class SessionCatalog( * Return a global temporary view exactly as it was stored. */ def getGlobalTempView(name: String): Option[LogicalPlan] = { - globalTempViewManager.get(formatTableName(name)) + globalTempViewManager.get(formatTableName(name)).map(getTempViewPlan) } /** @@ -654,20 +654,25 @@ class SessionCatalog( def getTempViewOrPermanentTableMetadata(name: TableIdentifier): CatalogTable = synchronized { val table = formatTableName(name.table) if (name.database.isEmpty) { - getTempView(table).map { plan => - CatalogTable( - identifier = TableIdentifier(table), - tableType = CatalogTableType.VIEW, - storage = CatalogStorageFormat.empty, - schema = plan.output.toStructType) + getTempView(table).map { + case TemporaryViewRelation(metadata) => metadata + case plan => + CatalogTable( + identifier = TableIdentifier(table), + tableType = CatalogTableType.VIEW, + storage = CatalogStorageFormat.empty, + schema = plan.output.toStructType) }.getOrElse(getTableMetadata(name)) } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) { - globalTempViewManager.get(table).map { plan => - CatalogTable( - identifier = TableIdentifier(table, Some(globalTempViewManager.database)), - tableType = CatalogTableType.VIEW, - storage = CatalogStorageFormat.empty, - schema = plan.output.toStructType) + val a = globalTempViewManager.get(table) + globalTempViewManager.get(table).map { + case TemporaryViewRelation(metadata) => metadata + case plan => + CatalogTable( + identifier = TableIdentifier(table, Some(globalTempViewManager.database)), + tableType = CatalogTableType.VIEW, + storage = CatalogStorageFormat.empty, + schema = plan.output.toStructType) }.getOrElse(throw new NoSuchTableException(globalTempViewManager.database, table)) } else { getTableMetadata(name) @@ -777,13 +782,13 @@ class SessionCatalog( val table = formatTableName(name.table) if (db == globalTempViewManager.database) { globalTempViewManager.get(table).map { viewDef => - SubqueryAlias(table, db, viewDef) + SubqueryAlias(table, db, getTempViewPlan(viewDef)) }.getOrElse(throw new NoSuchTableException(db, table)) } else if (name.database.isDefined || !tempViews.contains(table)) { val metadata = externalCatalog.getTable(db, table) getRelation(metadata) } else { - SubqueryAlias(table, tempViews(table)) + SubqueryAlias(table, getTempViewPlan(tempViews(table))) } } } @@ -797,26 +802,24 @@ class SessionCatalog( val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table) if (metadata.tableType == CatalogTableType.VIEW) { - val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) - val viewConfigs = metadata.viewSQLConfigs - val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) { - parser.parsePlan(viewText) - } - - logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.") // The relation is a view, so we wrap the relation by: // 1. Add a [[View]] operator over the relation to keep track of the view desc; // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view. - val child = View( - desc = metadata, - output = metadata.schema.toAttributes, - child = viewPlan) + val child = View.fromCatalogTable(metadata, isTempView = false, parser) SubqueryAlias(multiParts, child) } else { SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options)) } } + def getTempViewPlan(plan: LogicalPlan): LogicalPlan = { + plan match { + case viewInfo: TemporaryViewRelation => + View.fromCatalogTable(viewInfo.tableMeta, isTempView = true, parser) + case v => v + } + } + def lookupTempView(table: String): Option[SubqueryAlias] = { val formattedTable = formatTableName(table) getTempView(formattedTable).map { view => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 621ad84f1f5ec..6743b052fb3a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -25,6 +25,8 @@ import scala.collection.mutable import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils +import org.json4s.JsonAST.{JArray, JString} +import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException @@ -337,6 +339,40 @@ case class CatalogTable( ) } + /** + * Return temporary view names the current view was referred. should be empty if the + * CatalogTable is not a Temporary View or created by older versions of Spark(before 3.1.0). + */ + def viewReferredTempViewNames: Seq[Seq[String]] = { + try { + properties.get(VIEW_REFERRED_TEMP_VIEW_NAMES).map { json => + parse(json).asInstanceOf[JArray].arr.map { namePartsJson => + namePartsJson.asInstanceOf[JArray].arr.map(_.asInstanceOf[JString].s) + } + }.getOrElse(Seq.empty) + } catch { + case e: Exception => + throw new AnalysisException( + "corrupted view referred temp view names in catalog", cause = Some(e)) + } + } + + /** + * Return temporary function names the current view was referred. should be empty if the + * CatalogTable is not a Temporary View or created by older versions of Spark(before 3.1.0). + */ + def viewReferredTempFunctionNames: Seq[String] = { + try { + properties.get(VIEW_REFERRED_TEMP_FUNCTION_NAMES).map { json => + parse(json).asInstanceOf[JArray].arr.map(_.asInstanceOf[JString].s) + }.getOrElse(Seq.empty) + } catch { + case e: Exception => + throw new AnalysisException( + "corrupted view referred temp functions names in catalog", cause = Some(e)) + } + } + /** Syntactic sugar to update a field in `storage`. */ def withNewStorage( locationUri: Option[URI] = storage.locationUri, @@ -432,6 +468,9 @@ object CatalogTable { val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out." val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols" val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col." + + val VIEW_REFERRED_TEMP_VIEW_NAMES = VIEW_PREFIX + "referredTempViewNames" + val VIEW_REFERRED_TEMP_FUNCTION_NAMES = VIEW_PREFIX + "referredTempFunctionsNames" } /** @@ -667,6 +706,15 @@ case class UnresolvedCatalogRelation( override def output: Seq[Attribute] = Nil } +/** + * A wrapper to store the temporary view info, will be kept in `SessionCatalog` + * and will be transformed to `View` during analysis + */ +case class TemporaryViewRelation(tableMeta: CatalogTable) extends LeafNode { + override lazy val resolved: Boolean = false + override def output: Seq[Attribute] = Nil +} + /** * A `LogicalPlan` that represents a hive table. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index a524ed4ff73e9..c8b7e8651686a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -18,10 +18,11 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.AliasIdentifier -import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.analysis.{EliminateView, MultiInstanceRelation} import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.catalyst.util.truncatedString @@ -437,6 +438,7 @@ case class InsertIntoDir( */ case class View( desc: CatalogTable, + isTempView: Boolean, output: Seq[Attribute], child: LogicalPlan) extends LogicalPlan with MultiInstanceRelation { @@ -451,12 +453,31 @@ case class View( override def simpleString(maxFields: Int): String = { s"View (${desc.identifier}, ${output.mkString("[", ",", "]")})" } + + override def doCanonicalize(): LogicalPlan = { + def sameOutput( + outerProject: Seq[NamedExpression], innerProject: Seq[NamedExpression]): Boolean = { + outerProject.length == innerProject.length && + outerProject.zip(innerProject).forall { + case(outer, inner) => outer.name == inner.name && outer.dataType == inner.dataType + } + } + + val eliminated = EliminateView(this) match { + case Project(viewProjectList, child @ Project(queryProjectList, _)) + if sameOutput(viewProjectList, queryProjectList) => + child + case other => other + } + eliminated.canonicalized + } } object View { - def effectiveSQLConf(configs: Map[String, String]): SQLConf = { + def effectiveSQLConf(configs: Map[String, String], isTempView: Boolean): SQLConf = { val activeConf = SQLConf.get - if (activeConf.useCurrentSQLConfigsForView) return activeConf + // For temporary view, we always use captured sql configs + if (activeConf.useCurrentSQLConfigsForView && !isTempView) return activeConf val sqlConf = new SQLConf() for ((k, v) <- configs) { @@ -467,6 +488,21 @@ object View { sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth) sqlConf } + + def fromCatalogTable( + metadata: CatalogTable, isTempView: Boolean, parser: ParserInterface): View = { + val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) + val viewConfigs = metadata.viewSQLConfigs + val viewPlan = + SQLConf.withExistingConf(effectiveSQLConf(viewConfigs, isTempView = isTempView)) { + parser.parsePlan(viewText) + } + View( + desc = metadata, + isTempView = isTempView, + output = metadata.schema.toAttributes, + child = viewPlan) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 07cd41b06de21..496065f85fbbf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1489,6 +1489,15 @@ object SQLConf { .booleanConf .createWithDefault(false) + val STORE_ANALYZED_PLAN_FOR_VIEW = + buildConf("spark.sql.legacy.storeAnalyzedPlanForView") + .internal() + .doc("When true, analyzed plan instead of SQL text will be stored when creating " + + "temporary view") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val STREAMING_FILE_COMMIT_PROTOCOL_CLASS = buildConf("spark.sql.streaming.commitProtocolClass") .version("2.1.0") @@ -3435,6 +3444,8 @@ class SQLConf extends Serializable with Logging { def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW) + def storeAnalyzedPlanForView: Boolean = getConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW) + def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION) def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 0afa811e5d590..f5bfdc5e695e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -665,6 +665,7 @@ class AnalysisSuite extends AnalysisTest with Matchers { tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType)))), + isTempView = false, output = Seq(Symbol("a").int, Symbol("b").string), child = relation) val tz = Option(conf.sessionLocalTimeZone) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index f30ae70dceffa..98f9ce6fe9dbb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -646,7 +646,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually { // Look up a view. catalog.setCurrentDatabase("default") - val view = View(desc = metadata, output = metadata.schema.toAttributes, + val view = View(desc = metadata, isTempView = false, output = metadata.schema.toAttributes, child = CatalystSqlParser.parsePlan(metadata.viewText.get)) comparePlans(catalog.lookupRelation(TableIdentifier("view1", Some("db3"))), SubqueryAlias(Seq(CatalogManager.SESSION_CATALOG_NAME, "db3", "view1"), view)) @@ -666,7 +666,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually { assert(metadata.viewText.isDefined) assert(metadata.viewCatalogAndNamespace == Seq(CatalogManager.SESSION_CATALOG_NAME, "db2")) - val view = View(desc = metadata, output = metadata.schema.toAttributes, + val view = View(desc = metadata, isTempView = false, output = metadata.schema.toAttributes, child = CatalystSqlParser.parsePlan(metadata.viewText.get)) comparePlans(catalog.lookupRelation(TableIdentifier("view2", Some("db3"))), SubqueryAlias(Seq(CatalogManager.SESSION_CATALOG_NAME, "db3", "view2"), view)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 3b90f807b3138..641bd26c381ad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -61,9 +61,10 @@ case class AnalyzeColumnCommand( private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = { val cacheManager = sparkSession.sharedState.cacheManager - cacheManager.lookupCachedData(plan).map { cachedData => + val planToLookup = sparkSession.sessionState.executePlan(plan).analyzed + cacheManager.lookupCachedData(planToLookup).map { cachedData => val columnsToAnalyze = getColumnsToAnalyze( - tableIdent, cachedData.plan, columnNames, allColumns) + tableIdent, cachedData.cachedRepresentation, columnNames, allColumns) cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze) cachedData }.isDefined diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index a02f863a360f8..4ad5eddb83f43 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -19,16 +19,19 @@ package org.apache.spark.sql.execution.command import scala.collection.mutable +import org.json4s.JsonAST.{JArray, JString} +import org.json4s.jackson.JsonMethods._ + import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunction, UnresolvedRelation, ViewType} -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, SessionCatalog} +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, SessionCatalog, TemporaryViewRelation} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, SubqueryExpression} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} -import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType} +import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType} import org.apache.spark.sql.util.SchemaUtils /** @@ -107,26 +110,61 @@ case class CreateViewCommand( // When creating a permanent view, not allowed to reference temporary objects. // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved) - verifyTemporaryObjectsNotExists(catalog) + verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child) if (viewType == LocalTempView) { - if (replace && catalog.getTempView(name.table).isDefined && - !catalog.getTempView(name.table).get.sameResult(child)) { + val samePlan = catalog.getTempView(name.table).exists { + // Don't perform sameResult check for View logical plan, since it's unresolved + case _: View => false + case other => other.sameResult(child) + } + if (replace && !samePlan) { logInfo(s"Try to uncache ${name.quotedString} before replacing.") + checkCyclicViewReference(analyzedPlan, Seq(name), name) CommandUtils.uncacheTableOrView(sparkSession, name.quotedString) } val aliasedPlan = aliasPlan(sparkSession, analyzedPlan) - catalog.createTempView(name.table, aliasedPlan, overrideIfExists = replace) + // If there is no sql text (e.g. from Dataset API), we will always store the analyzed plan + val tableDefinition = if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) { + TemporaryViewRelation( + prepareTemporaryView( + name, + sparkSession, + analyzedPlan, + aliasedPlan.schema, + originalText, + child)) + } else { + aliasedPlan + } + catalog.createTempView(name.table, tableDefinition, overrideIfExists = replace) } else if (viewType == GlobalTempView) { - if (replace && catalog.getGlobalTempView(name.table).isDefined && - !catalog.getGlobalTempView(name.table).get.sameResult(child)) { - val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE) - val globalTempView = TableIdentifier(name.table, Option(db)) - logInfo(s"Try to uncache ${globalTempView.quotedString} before replacing.") - CommandUtils.uncacheTableOrView(sparkSession, globalTempView.quotedString) + val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE) + val viewIdent = TableIdentifier(name.table, Option(db)) + val samePlan = catalog.getGlobalTempView(name.table).exists { + // Don't perform sameResult check for View logical plan, since it's unresolved + case _: View => false + case other => other.sameResult(child) + } + if (replace && !samePlan) { + logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.") + checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) + CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString) } val aliasedPlan = aliasPlan(sparkSession, analyzedPlan) - catalog.createGlobalTempView(name.table, aliasedPlan, overrideIfExists = replace) + val tableDefinition = if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) { + TemporaryViewRelation( + prepareTemporaryView( + viewIdent, + sparkSession, + analyzedPlan, + aliasedPlan.schema, + originalText, + child)) + } else { + aliasedPlan + } + catalog.createGlobalTempView(name.table, tableDefinition, overrideIfExists = replace) } else if (catalog.tableExists(name)) { val tableMetadata = catalog.getTableMetadata(name) if (allowExisting) { @@ -161,39 +199,6 @@ case class CreateViewCommand( Seq.empty[Row] } - /** - * Permanent views are not allowed to reference temp objects, including temp function and views - */ - private def verifyTemporaryObjectsNotExists(catalog: SessionCatalog): Unit = { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - if (!isTemporary) { - // This func traverses the unresolved plan `child`. Below are the reasons: - // 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding - // logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is - // added/generated from a temporary view. - // 2) The temp functions are represented by multiple classes. Most are inaccessible from this - // package (e.g., HiveGenericUDF). - def verify(child: LogicalPlan): Unit = { - child.collect { - // Disallow creating permanent views based on temporary views. - case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) => - throw new AnalysisException(s"Not allowed to create a permanent view $name by " + - s"referencing a temporary view ${nameParts.quoted}. " + - "Please create a temp view instead by CREATE TEMP VIEW") - case other if !other.resolved => other.expressions.flatMap(_.collect { - // Traverse subquery plan for any unresolved relations. - case e: SubqueryExpression => verify(e.plan) - // Disallow creating permanent views based on temporary UDFs. - case e: UnresolvedFunction if catalog.isTemporaryFunction(e.name) => - throw new AnalysisException(s"Not allowed to create a permanent view $name by " + - s"referencing a temporary function `${e.name}`") - }) - } - } - verify(child) - } - } - /** * If `userSpecifiedColumns` is defined, alias the analyzed plan to the user specified columns, * else return the analyzed plan directly. @@ -266,15 +271,26 @@ case class AlterViewAsCommand( qe.assertAnalyzed() val analyzedPlan = qe.analyzed - if (session.sessionState.catalog.alterTempViewDefinition(name, analyzedPlan)) { - // a local/global temp view has been altered, we are done. + if (session.sessionState.catalog.isTemporaryTable(name)) { + alterTemporaryView(session, analyzedPlan) } else { alterPermanentView(session, analyzedPlan) } - Seq.empty[Row] } + private def alterTemporaryView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = { + val tableDefinition = if (conf.storeAnalyzedPlanForView) { + analyzedPlan + } else { + checkCyclicViewReference(analyzedPlan, Seq(name), name) + TemporaryViewRelation( + prepareTemporaryView( + name, session, analyzedPlan, analyzedPlan.schema, Some(originalText), query)) + } + session.sessionState.catalog.alterTempViewDefinition(name, tableDefinition) + } + private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = { val viewMeta = session.sessionState.catalog.getTableMetadata(name) if (viewMeta.tableType != CatalogTableType.VIEW) { @@ -398,6 +414,34 @@ object ViewHelper { } } + /** + * Convert the temporary object names to `properties`. + */ + private def referredTempNamesToProps( + viewNames: Seq[Seq[String]], functionsNames: Seq[String]): Map[String, String] = { + val viewNamesJson = + JArray(viewNames.map(nameParts => JArray(nameParts.map(JString).toList)).toList) + val functionsNamesJson = JArray(functionsNames.map(JString).toList) + + val props = new mutable.HashMap[String, String] + props.put(VIEW_REFERRED_TEMP_VIEW_NAMES, compact(render(viewNamesJson))) + props.put(VIEW_REFERRED_TEMP_FUNCTION_NAMES, compact(render(functionsNamesJson))) + props.toMap + } + + /** + * Remove the temporary object names in `properties`. + */ + private def removeReferredTempNames(properties: Map[String, String]): Map[String, String] = { + // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable, + // while `CatalogTable` should be serializable. + properties.filterNot { case (key, _) => + key.startsWith(VIEW_REFERRED_TEMP_VIEW_NAMES) || + key.startsWith(VIEW_REFERRED_TEMP_FUNCTION_NAMES) + } + } + + /** * Generate the view properties in CatalogTable, including: * 1. view default database that is used to provide the default database name on view resolution. @@ -414,7 +458,9 @@ object ViewHelper { properties: Map[String, String], session: SparkSession, analyzedPlan: LogicalPlan, - fieldNames: Array[String]): Map[String, String] = { + fieldNames: Array[String], + tempViewNames: Seq[Seq[String]] = Seq.empty, + tempFunctionNames: Seq[String] = Seq.empty): Map[String, String] = { // for createViewCommand queryOutput may be different from fieldNames val queryOutput = analyzedPlan.schema.fieldNames @@ -427,10 +473,11 @@ object ViewHelper { // Generate the view default catalog and namespace, as well as captured SQL configs. val manager = session.sessionState.catalogManager - removeSQLConfigs(removeQueryColumnNames(properties)) ++ + removeReferredTempNames(removeSQLConfigs(removeQueryColumnNames(properties))) ++ catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++ sqlConfigsToProps(conf) ++ - generateQueryColumnNames(queryOutput) + generateQueryColumnNames(queryOutput) ++ + referredTempNamesToProps(tempViewNames, tempFunctionNames) } /** @@ -481,4 +528,92 @@ object ViewHelper { } } } + + + /** + * Permanent views are not allowed to reference temp objects, including temp function and views + */ + def verifyTemporaryObjectsNotExists( + catalog: SessionCatalog, + isTemporary: Boolean, + name: TableIdentifier, + child: LogicalPlan): Unit = { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + if (!isTemporary) { + val (tempViews, tempFunctions) = collectTemporaryObjects(catalog, child) + tempViews.foreach { nameParts => + throw new AnalysisException(s"Not allowed to create a permanent view $name by " + + s"referencing a temporary view ${nameParts.quoted}. " + + "Please create a temp view instead by CREATE TEMP VIEW") + } + tempFunctions.foreach { funcName => + throw new AnalysisException(s"Not allowed to create a permanent view $name by " + + s"referencing a temporary function `${funcName}`") + } + } + } + + /** + * Collect all temporary views and functions and return the identifiers separately + * This func traverses the unresolved plan `child`. Below are the reasons: + * 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding + * logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is + * added/generated from a temporary view. + * 2) The temp functions are represented by multiple classes. Most are inaccessible from this + * package (e.g., HiveGenericUDF). + */ + private def collectTemporaryObjects( + catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]], Seq[String]) = { + def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = { + child.collect { + case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) => + Seq(nameParts) + case plan if !plan.resolved => plan.expressions.flatMap(_.collect { + case e: SubqueryExpression => collectTempViews(e.plan) + }).flatten + }.flatten.distinct + } + + def collectTempFunctions(child: LogicalPlan): Seq[String] = { + child.collect { + case plan if !plan.resolved => plan.expressions.flatMap(_.collect { + case e: SubqueryExpression => collectTempFunctions(e.plan) + case e: UnresolvedFunction if catalog.isTemporaryFunction(e.name) => + Seq(e.name.funcName) + }).flatten + }.flatten.distinct + } + (collectTempViews(child), collectTempFunctions(child)) + } + + + /** + * Returns a [[CatalogTable]] that contains information for temporary view. + * Generate the view-specific properties(e.g. view default database, view query output + * column names) and store them as properties in the CatalogTable, and also creates + * the proper schema for the view. + */ + def prepareTemporaryView( + viewName: TableIdentifier, + session: SparkSession, + analyzedPlan: LogicalPlan, + viewSchema: StructType, + originalText: Option[String], + child: LogicalPlan): CatalogTable = { + + val catalog = session.sessionState.catalog + val (tempViews, tempFunctions) = collectTemporaryObjects(catalog, child) + // TBLPROPERTIES is not allowed for temporary view, so we don't use it for + // generating temporary view properties + val newProperties = generateViewProperties( + Map.empty, session, analyzedPlan, viewSchema.fieldNames, tempViews, tempFunctions) + + CatalogTable( + identifier = viewName, + tableType = CatalogTableType.VIEW, + storage = CatalogStorageFormat.empty, + schema = viewSchema, + viewText = originalText, + properties = newProperties) + } } diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 145c987ee5f61..2674d055ac450 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -477,7 +477,7 @@ View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default View Query Output Columns [a, b, c, d] -Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default] +Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default] -- !query @@ -501,7 +501,7 @@ View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default View Query Output Columns [a, b, c, d] -Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default] +Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out index 89a4da116a6b3..149e031e8829c 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out @@ -795,13 +795,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman : +- Project [state#x] : +- Filter (dept_id#x = outer(dept_id#x)) : +- SubqueryAlias dept -: +- Project [dept_id#x, dept_name#x, state#x] -: +- SubqueryAlias DEPT -: +- LocalRelation [dept_id#x, dept_name#x, state#x] +: +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +: +- Project [dept_id#x, dept_name#x, state#x] +: +- SubqueryAlias DEPT +: +- LocalRelation [dept_id#x, dept_name#x, state#x] +- SubqueryAlias emp - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) + +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] ; @@ -821,13 +823,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman : +- Project [state#x] : +- Filter (dept_id#x = outer(dept_id#x)) : +- SubqueryAlias dept -: +- Project [dept_id#x, dept_name#x, state#x] -: +- SubqueryAlias DEPT -: +- LocalRelation [dept_id#x, dept_name#x, state#x] +: +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +: +- Project [dept_id#x, dept_name#x, state#x] +: +- SubqueryAlias DEPT +: +- LocalRelation [dept_id#x, dept_name#x, state#x] +- SubqueryAlias emp - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) + +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] ; @@ -846,13 +850,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman : +- Distinct : +- Project [dept_id#x] : +- SubqueryAlias dept -: +- Project [dept_id#x, dept_name#x, state#x] -: +- SubqueryAlias DEPT -: +- LocalRelation [dept_id#x, dept_name#x, state#x] +: +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +: +- Project [dept_id#x, dept_name#x, state#x] +: +- SubqueryAlias DEPT +: +- LocalRelation [dept_id#x, dept_name#x, state#x] +- SubqueryAlias emp - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) + +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] ; @@ -871,13 +877,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman : +- Distinct : +- Project [dept_id#x] : +- SubqueryAlias dept -: +- Project [dept_id#x, dept_name#x, state#x] -: +- SubqueryAlias DEPT -: +- LocalRelation [dept_id#x, dept_name#x, state#x] +: +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +: +- Project [dept_id#x, dept_name#x, state#x] +: +- SubqueryAlias DEPT +: +- LocalRelation [dept_id#x, dept_name#x, state#x] +- SubqueryAlias emp - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) + +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] ; diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index 2fab32fa4b4eb..7d331f24b9215 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -257,7 +257,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -313,7 +313,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a WHERE t1.id = t2.id View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [t1_a, t2_a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_ View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [id, a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2 View Original Text SELECT * FROM t1 CROSS JOIN t2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out index eaaf894590d35..3fb948056dc01 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out @@ -64,6 +64,8 @@ view.catalogAndNamespace.part.0 spark_catalog view.catalogAndNamespace.part.1 default view.query.out.col.0 c1 view.query.out.numCols 1 +view.referredTempFunctionsNames [] +view.referredTempViewNames [] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index d703d4e9112e9..cd96eaf1b878b 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -111,7 +111,8 @@ org.apache.spark.sql.AnalysisException Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses: Aggregate [min(outer(t2a#x)) AS min(outer(t2.`t2a`))#x] +- SubqueryAlias t3 - +- Project [t3a#x, t3b#x, t3c#x] - +- SubqueryAlias t3 - +- LocalRelation [t3a#x, t3b#x, t3c#x] + +- View (`t3`, [t3a#x,t3b#x,t3c#x]) + +- Project [t3a#x, t3b#x, t3c#x] + +- SubqueryAlias t3 + +- LocalRelation [t3a#x, t3b#x, t3c#x] ; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index ef3f4daa6dc6b..d0150616cd67e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -1239,26 +1239,4 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } } - - test("SPARK-33290: querying temporary view after REFRESH TABLE fails with FNFE") { - withTable("t") { - withTempPath { path => - withTempView("tempView1") { - Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath) - sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'") - sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t") - checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a"))) - - Utils.deleteRecursively(path) - sql("REFRESH TABLE t") - checkAnswer(sql("SELECT * FROM t"), Seq.empty) - val exception = intercept[Exception] { - checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty) - } - assert(exception.getMessage.contains("FileNotFoundException")) - assert(exception.getMessage.contains("REFRESH TABLE")) - } - } - } - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 0b19f706836be..709d6321d199d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution +import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException @@ -763,6 +764,89 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } + test("temporary view should ignore useCurrentSQLConfigsForView config") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + withTempView("v1") { + sql("CREATE TEMPORARY VIEW v1 AS SELECT 1/0") + withSQLConf( + USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true", + ANSI_ENABLED.key -> "true") { + checkAnswer(sql("SELECT * FROM v1"), Seq(Row(null))) + } + } + } + } + + test("alter temporary view should follow current storeAnalyzedPlanForView config") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + withView("v1") { + withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") { + sql("CREATE TEMPORARY VIEW v1 AS SELECT * FROM t") + Seq(4, 6, 5).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t") + val e = intercept[SparkException] { + sql("SELECT * FROM v1").collect() + }.getMessage + assert(e.contains("does not exist")) + } + + withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "false") { + // alter view from legacy to non-legacy config + sql("ALTER VIEW v1 AS SELECT * FROM t") + Seq(1, 3, 5).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t") + checkAnswer(sql("SELECT * FROM v1"), Seq(Row(1), Row(3), Row(5))) + } + + withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") { + // alter view from non-legacy to legacy config + sql("ALTER VIEW v1 AS SELECT * FROM t") + Seq(2, 4, 6).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t") + val e = intercept[SparkException] { + sql("SELECT * FROM v1").collect() + }.getMessage + assert(e.contains("does not exist")) + } + } + } + } + + test("local temp view refers global temp view") { + withGlobalTempView("v1") { + withTempView("v2") { + val globalTempDB = spark.sharedState.globalTempViewManager.database + sql("CREATE GLOBAL TEMPORARY VIEW v1 AS SELECT 1") + sql(s"CREATE TEMPORARY VIEW v2 AS SELECT * FROM ${globalTempDB}.v1") + checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1))) + } + } + } + + test("global temp view refers local temp view") { + withTempView("v1") { + withGlobalTempView("v2") { + val globalTempDB = spark.sharedState.globalTempViewManager.database + sql("CREATE TEMPORARY VIEW v1 AS SELECT 1") + sql(s"CREATE GLOBAL TEMPORARY VIEW v2 AS SELECT * FROM v1") + checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v2"), Seq(Row(1))) + } + } + } + + test("creating local temp view should not affect existing table reference") { + withTable("t") { + withTempView("t") { + withGlobalTempView("v") { + val globalTempDB = spark.sharedState.globalTempViewManager.database + Seq(2).toDF("c1").write.format("parquet").saveAsTable("t") + sql("CREATE GLOBAL TEMPORARY VIEW v AS SELECT * FROM t") + sql("CREATE TEMPORARY VIEW t AS SELECT 1") + checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v"), Seq(Row(2))) + } + } + } + } + test("SPARK-33141: view should be parsed and analyzed with configs set when creating") { withTable("t") { withView("v1", "v2", "v3", "v4", "v5") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala new file mode 100644 index 0000000000000..fb9f5a73f6d9e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.internal.SQLConf._ +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} + +/** + * A base suite contains a set of view related test cases for different kind of views + * Currently, the test cases in this suite should have same behavior across all kind of views + * TODO: Combine this with [[SQLViewSuite]] + */ +abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { + import testImplicits._ + + protected def viewTypeString: String + protected def formattedViewName(viewName: String): String + + def createView( + viewName: String, + sqlText: String, + columnNames: Seq[String] = Seq.empty, + replace: Boolean = false): String = { + val replaceString = if (replace) "OR REPLACE" else "" + val columnString = if (columnNames.nonEmpty) columnNames.mkString("(", ",", ")") else "" + sql(s"CREATE $replaceString $viewTypeString $viewName $columnString AS $sqlText") + formattedViewName(viewName) + } + + def checkViewOutput(viewName: String, expectedAnswer: Seq[Row]): Unit = { + checkAnswer(sql(s"SELECT * FROM $viewName"), expectedAnswer) + } + + test("change SQLConf should not change view behavior - caseSensitiveAnalysis") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT c1 FROM t", Seq("C1")) + withView(viewName) { + Seq("true", "false").foreach { flag => + withSQLConf(CASE_SENSITIVE.key -> flag) { + checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1))) + } + } + } + } + } + + test("change SQLConf should not change view behavior - orderByOrdinal") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC", Seq("c1")) + withView(viewName) { + Seq("true", "false").foreach { flag => + withSQLConf(ORDER_BY_ORDINAL.key -> flag) { + checkViewOutput(viewName, Seq(Row(1), Row(2), Row(3))) + } + } + } + } + } + + test("change SQLConf should not change view behavior - groupByOrdinal") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT c1, count(c1) FROM t GROUP BY 1", Seq("c1", "count")) + withView(viewName) { + Seq("true", "false").foreach { flag => + withSQLConf(GROUP_BY_ORDINAL.key -> flag) { + checkViewOutput(viewName, Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + } + } + } + } + + test("change SQLConf should not change view behavior - groupByAliases") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView( + "v1", "SELECT c1 as a, count(c1) FROM t GROUP BY a", Seq("a", "count")) + withView(viewName) { + Seq("true", "false").foreach { flag => + withSQLConf(GROUP_BY_ALIASES.key -> flag) { + checkViewOutput(viewName, Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + } + } + } + } + + test("change SQLConf should not change view behavior - ansiEnabled") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT 1/0", Seq("c1")) + withView(viewName) { + Seq("true", "false").foreach { flag => + withSQLConf(ANSI_ENABLED.key -> flag) { + checkViewOutput(viewName, Seq(Row(null))) + } + } + } + } + } + + test("change current database should not change view behavior") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT * from t") + withView(viewName) { + withTempDatabase { db => + sql(s"USE $db") + Seq(4, 5, 6).toDF("c1").write.format("parquet").saveAsTable("t") + checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1))) + } + } + } + } + + test("view should read the new data if table is updated") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT c1 from t", Seq("c1")) + withView(viewName) { + Seq(9, 7, 8).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t") + checkViewOutput(viewName, Seq(Row(9), Row(7), Row(8))) + } + } + } + + test("add column for table should not affect view output") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName = createView("v1", "SELECT * from t") + withView(viewName) { + sql("ALTER TABLE t ADD COLUMN (c2 INT)") + checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1))) + } + } + } + + test("check cyclic view reference on CREATE OR REPLACE VIEW") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName1 = createView("v1", "SELECT * from t") + val viewName2 = createView("v2", s"SELECT * from $viewName1") + withView(viewName2, viewName1) { + val e = intercept[AnalysisException] { + createView("v1", s"SELECT * FROM $viewName2", replace = true) + }.getMessage + assert(e.contains("Recursive view")) + } + } + } + + test("check cyclic view reference on ALTER VIEW") { + withTable("t") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + val viewName1 = createView("v1", "SELECT * from t") + val viewName2 = createView("v2", s"SELECT * from $viewName1") + withView(viewName2, viewName1) { + val e = intercept[AnalysisException] { + sql(s"ALTER VIEW $viewName1 AS SELECT * FROM $viewName2") + }.getMessage + assert(e.contains("Recursive view")) + } + } + } +} + +class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { + override protected def viewTypeString: String = "TEMPORARY VIEW" + override protected def formattedViewName(viewName: String): String = viewName + +} + +class GlobalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { + override protected def viewTypeString: String = "GLOBAL TEMPORARY VIEW" + override protected def formattedViewName(viewName: String): String = { + val globalTempDB = spark.sharedState.globalTempViewManager.database + s"$globalTempDB.$viewName" + } +} + +class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { + override protected def viewTypeString: String = "VIEW" + override protected def formattedViewName(viewName: String): String = s"default.$viewName" +} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala index 88aebb36633f6..66e6cf82922b7 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala @@ -105,7 +105,7 @@ private[hive] class SparkGetColumnsOperation( val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName)) if (databasePattern.matcher(globalTempViewDb).matches()) { catalog.globalTempViewManager.listViewNames(tablePattern).foreach { globalTempView => - catalog.globalTempViewManager.get(globalTempView).foreach { plan => + catalog.getGlobalTempView(globalTempView).foreach { plan => addToRowSet(columnPattern, globalTempViewDb, globalTempView, plan.schema) } } From 15579ba1f82e321a694130d4c9db2a6524e9ae2e Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 4 Dec 2020 07:23:35 +0000 Subject: [PATCH 0660/1009] [SPARK-33430][SQL] Support namespaces in JDBC v2 Table Catalog ### What changes were proposed in this pull request? Add namespaces support in JDBC v2 Table Catalog by making ```JDBCTableCatalog``` extends```SupportsNamespaces``` ### Why are the changes needed? make v2 JDBC implementation complete ### Does this PR introduce _any_ user-facing change? Yes. Add the following to ```JDBCTableCatalog``` - listNamespaces - listNamespaces(String[] namespace) - namespaceExists(String[] namespace) - loadNamespaceMetadata(String[] namespace) - createNamespace - alterNamespace - dropNamespace ### How was this patch tested? Add new docker tests Closes #30473 from huaxingao/name_space. Authored-by: Huaxin Gao Signed-off-by: Wenchen Fan --- .../sql/jdbc/v2/PostgresNamespaceSuite.scala | 59 +++++++ .../sql/jdbc/v2/V2JDBCNamespaceTest.scala | 62 ++++++++ .../datasources/jdbc/JdbcUtils.scala | 49 ++++++ .../v2/jdbc/JDBCTableCatalog.scala | 144 +++++++++++++++++- .../apache/spark/sql/jdbc/JdbcDialects.scala | 12 +- 5 files changed, 317 insertions(+), 9 deletions(-) create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala new file mode 100644 index 0000000000000..e534df84ce6fa --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite} +import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.tags.DockerTest + +/** + * To run this test suite for a specific version (e.g., postgres:13.0): + * {{{ + * POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0 + * ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite" + * }}} + */ +@DockerTest +class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest { + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine") + override val env = Map( + "POSTGRES_PASSWORD" -> "rootpass" + ) + override val usesIpc = false + override val jdbcPort = 5432 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" + } + + val map = new CaseInsensitiveStringMap( + Map("url" -> db.getJdbcUrl(dockerIp, externalPort), + "driver" -> "org.postgresql.Driver").asJava) + + catalog.initialize("postgresql", map) + + override def dataPreparation(conn: Connection): Unit = {} + + override def builtinNamespaces: Array[Array[String]] = { + Array(Array("information_schema"), Array("pg_catalog"), Array("public")) + } +} diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala new file mode 100644 index 0000000000000..979b0784f0448 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import scala.collection.JavaConverters._ + +import org.apache.log4j.Level + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connector.catalog.NamespaceChange +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.DockerTest + +@DockerTest +private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession { + val catalog = new JDBCTableCatalog() + + def builtinNamespaces: Array[Array[String]] + + test("listNamespaces: basic behavior") { + catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava) + assert(catalog.listNamespaces() === Array(Array("foo")) ++ builtinNamespaces) + assert(catalog.listNamespaces(Array("foo")) === Array()) + assert(catalog.namespaceExists(Array("foo")) === true) + + val logAppender = new LogAppender("catalog comment") + withLogAppender(logAppender) { + catalog.alterNamespace(Array("foo"), NamespaceChange + .setProperty("comment", "comment for foo")) + catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment")) + } + val createCommentWarning = logAppender.loggingEvents + .filter(_.getLevel == Level.WARN) + .map(_.getRenderedMessage) + .exists(_.contains("catalog comment")) + assert(createCommentWarning === false) + + catalog.dropNamespace(Array("foo")) + assert(catalog.namespaceExists(Array("foo")) === false) + assert(catalog.listNamespaces() === builtinNamespaces) + val msg = intercept[AnalysisException] { + catalog.listNamespaces(Array("foo")) + }.getMessage + assert(msg.contains("Namespace 'foo' not found")) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 5dd0d2bd74838..216fb02740500 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -927,6 +927,55 @@ object JdbcUtils extends Logging { } } + /** + * Creates a namespace. + */ + def createNamespace( + conn: Connection, + options: JDBCOptions, + namespace: String, + comment: String): Unit = { + val dialect = JdbcDialects.get(options.url) + executeStatement(conn, options, s"CREATE SCHEMA ${dialect.quoteIdentifier(namespace)}") + if (!comment.isEmpty) createNamespaceComment(conn, options, namespace, comment) + } + + def createNamespaceComment( + conn: Connection, + options: JDBCOptions, + namespace: String, + comment: String): Unit = { + val dialect = JdbcDialects.get(options.url) + try { + executeStatement( + conn, options, dialect.getSchemaCommentQuery(namespace, comment)) + } catch { + case e: Exception => + logWarning("Cannot create JDBC catalog comment. The catalog comment will be ignored.") + } + } + + def removeNamespaceComment( + conn: Connection, + options: JDBCOptions, + namespace: String): Unit = { + val dialect = JdbcDialects.get(options.url) + try { + executeStatement(conn, options, dialect.removeSchemaCommentQuery(namespace)) + } catch { + case e: Exception => + logWarning("Cannot drop JDBC catalog comment.") + } + } + + /** + * Drops a namespace from the JDBC database. + */ + def dropNamespace(conn: Connection, options: JDBCOptions, namespace: String): Unit = { + val dialect = JdbcDialects.get(options.url) + executeStatement(conn, options, s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)}") + } + private def executeStatement(conn: Connection, options: JDBCOptions, sql: String): Unit = { val statement = conn.createStatement try { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala index 63f802363f7c0..27558e5b0d61b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala @@ -17,13 +17,16 @@ package org.apache.spark.sql.execution.datasources.v2.jdbc import java.sql.{Connection, SQLException} +import java.util import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.collection.mutable.ArrayBuilder import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException} -import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog, TableChange} +import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException} +import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcOptionsInWrite, JDBCRDD, JdbcUtils} import org.apache.spark.sql.internal.SQLConf @@ -31,7 +34,8 @@ import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -class JDBCTableCatalog extends TableCatalog with Logging { +class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper private var catalogName: String = null private var options: JDBCOptions = _ @@ -125,12 +129,12 @@ class JDBCTableCatalog extends TableCatalog with Logging { if (!properties.isEmpty) { properties.asScala.map { case (k, v) => k match { - case "comment" => tableComment = v - case "provider" => + case TableCatalog.PROP_COMMENT => tableComment = v + case TableCatalog.PROP_PROVIDER => throw new AnalysisException("CREATE TABLE ... USING ... is not supported in" + " JDBC catalog.") - case "owner" => // owner is ignored. It is default to current user name. - case "location" => + case TableCatalog.PROP_OWNER => // owner is ignored. It is default to current user name. + case TableCatalog.PROP_LOCATION => throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" + " JDBC catalog.") case _ => tableProperties = tableProperties + " " + s"$k $v" @@ -171,6 +175,132 @@ class JDBCTableCatalog extends TableCatalog with Logging { } } + override def namespaceExists(namespace: Array[String]): Boolean = namespace match { + case Array(db) => + withConnection { conn => + val rs = conn.getMetaData.getSchemas(null, db) + while (rs.next()) { + if (rs.getString(1) == db) return true; + } + false + } + case _ => false + } + + override def listNamespaces(): Array[Array[String]] = { + withConnection { conn => + val schemaBuilder = ArrayBuilder.make[Array[String]] + val rs = conn.getMetaData.getSchemas() + while (rs.next()) { + schemaBuilder += Array(rs.getString(1)) + } + schemaBuilder.result + } + } + + override def listNamespaces(namespace: Array[String]): Array[Array[String]] = { + namespace match { + case Array() => + listNamespaces() + case Array(_) if namespaceExists(namespace) => + Array() + case _ => + throw new NoSuchNamespaceException(namespace) + } + } + + override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = { + namespace match { + case Array(db) => + if (!namespaceExists(namespace)) throw new NoSuchNamespaceException(db) + mutable.HashMap[String, String]().asJava + + case _ => + throw new NoSuchNamespaceException(namespace) + } + } + + override def createNamespace( + namespace: Array[String], + metadata: util.Map[String, String]): Unit = namespace match { + case Array(db) if !namespaceExists(namespace) => + var comment = "" + if (!metadata.isEmpty) { + metadata.asScala.map { + case (k, v) => k match { + case SupportsNamespaces.PROP_COMMENT => comment = v + case SupportsNamespaces.PROP_OWNER => // ignore + case SupportsNamespaces.PROP_LOCATION => + throw new AnalysisException("CREATE NAMESPACE ... LOCATION ... is not supported in" + + " JDBC catalog.") + case _ => + throw new AnalysisException(s"CREATE NAMESPACE with property $k is not supported in" + + " JDBC catalog.") + } + } + } + withConnection { conn => + classifyException(s"Failed create name space: $db") { + JdbcUtils.createNamespace(conn, options, db, comment) + } + } + + case Array(_) => + throw new NamespaceAlreadyExistsException(namespace) + + case _ => + throw new IllegalArgumentException(s"Invalid namespace name: ${namespace.quoted}") + } + + override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = { + namespace match { + case Array(db) => + changes.foreach { + case set: NamespaceChange.SetProperty => + if (set.property() == SupportsNamespaces.PROP_COMMENT) { + withConnection { conn => + JdbcUtils.createNamespaceComment(conn, options, db, set.value) + } + } else { + throw new AnalysisException(s"SET NAMESPACE with property ${set.property} " + + "is not supported in JDBC catalog.") + } + + case unset: NamespaceChange.RemoveProperty => + if (unset.property() == SupportsNamespaces.PROP_COMMENT) { + withConnection { conn => + JdbcUtils.removeNamespaceComment(conn, options, db) + } + } else { + throw new AnalysisException(s"Remove NAMESPACE property ${unset.property} " + + "is not supported in JDBC catalog.") + } + + case _ => + throw new AnalysisException(s"Unsupported NamespaceChange $changes in JDBC catalog.") + } + + case _ => + throw new NoSuchNamespaceException(namespace) + } + } + + override def dropNamespace(namespace: Array[String]): Boolean = namespace match { + case Array(db) if namespaceExists(namespace) => + if (listTables(Array(db)).nonEmpty) { + throw new IllegalStateException(s"Namespace ${namespace.quoted} is not empty") + } + withConnection { conn => + classifyException(s"Failed drop name space: $db") { + JdbcUtils.dropNamespace(conn, options, db) + true + } + } + + case _ => + throw new NoSuchNamespaceException(namespace) + } + private def checkNamespace(namespace: Array[String]): Unit = { // In JDBC there is no nested database/schema if (namespace.length > 1) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index b12882b72fb66..ead0a1aa3a243 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.jdbc -import java.sql.{Connection, Date, SQLFeatureNotSupportedException, Timestamp} +import java.sql.{Connection, Date, Timestamp} import scala.collection.mutable.ArrayBuilder @@ -232,7 +232,7 @@ abstract class JdbcDialect extends Serializable with Logging{ val name = updateNull.fieldNames updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable()) case _ => - throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change") + throw new AnalysisException(s"Unsupported TableChange $change in JDBC catalog.") } } updateClause.result() @@ -270,6 +270,14 @@ abstract class JdbcDialect extends Serializable with Logging{ s"COMMENT ON TABLE $table IS '$comment'" } + def getSchemaCommentQuery(schema: String, comment: String): String = { + s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS '$comment'" + } + + def removeSchemaCommentQuery(schema: String): String = { + s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS NULL" + } + /** * Gets a dialect exception, classifies it and wraps it by `AnalysisException`. * @param message The error message to be placed to the returned exception. From e8380665c7e3aca446631964f49e09f264dee1c2 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 4 Dec 2020 16:24:41 +0900 Subject: [PATCH 0661/1009] [SPARK-33658][SQL] Suggest using Datetime conversion functions for invalid ANSI casting ### What changes were proposed in this pull request? Suggest users using Datetime conversion functions in the error message of invalid ANSI explicit casting. ### Why are the changes needed? In ANSI mode, explicit cast between DateTime types and Numeric types is not allowed. As of now, we have introduced new functions `UNIX_SECONDS`/`UNIX_MILLIS`/`UNIX_MICROS`/`UNIX_DATE`/`DATE_FROM_UNIX_DATE`, we can show suggestions to users so that they can complete these type conversions precisely and easily in ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, better error messages ### How was this patch tested? Unit test Closes #30603 from gengliangwang/improveErrorMsgOfExplicitCast. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- docs/sql-ref-ansi-compliance.md | 11 +++++++ .../spark/sql/catalyst/expressions/Cast.scala | 30 +++++++++++++++---- .../sql/catalyst/expressions/CastSuite.scala | 12 ++++++-- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index c13ea2b167d93..c3e17dc22eed0 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -96,6 +96,10 @@ java.lang.NumberFormatException: invalid input syntax for type numeric: a SELECT CAST(2147483648L AS INT); java.lang.ArithmeticException: Casting 2147483648 to int causes overflow +SELECT CAST(DATE'2020-01-01' AS INT) +org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int. +To convert values from date to int, you can use function UNIX_DATE instead. + -- `spark.sql.ansi.enabled=false` (This is a default behaviour) SELECT CAST('a' AS INT); +--------------+ @@ -111,6 +115,13 @@ SELECT CAST(2147483648L AS INT); | -2147483648| +-----------------------+ +SELECT CAST(DATE'2020-01-01' AS INT) ++------------------------------+ +|CAST(DATE '2020-01-01' AS INT)| ++------------------------------+ +| null| ++------------------------------+ + -- Examples of store assignment rules CREATE TABLE t (v INT); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 1b2e2db932970..72bd9ca4d3d1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1894,6 +1894,19 @@ object AnsiCast { case _ => false } + // Show suggestion on how to complete the disallowed explicit casting with built-in type + // conversion functions. + private def suggestionOnConversionFunctions ( + from: DataType, + to: DataType, + functionNames: String): String = { + // scalastyle:off line.size.limit + s"""cannot cast ${from.catalogString} to ${to.catalogString}. + |To convert values from ${from.catalogString} to ${to.catalogString}, you can use $functionNames instead. + |""".stripMargin + // scalastyle:on line.size.limit + } + def typeCheckFailureMessage( from: DataType, to: DataType, @@ -1901,12 +1914,19 @@ object AnsiCast { fallbackConfValue: String): String = (from, to) match { case (_: NumericType, TimestampType) => - // scalastyle:off line.size.limit - s""" - | cannot cast ${from.catalogString} to ${to.catalogString}. - | To convert values from ${from.catalogString} to ${to.catalogString}, you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead. - |""".stripMargin + suggestionOnConversionFunctions(from, to, + "functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS") + + case (TimestampType, _: NumericType) => + suggestionOnConversionFunctions(from, to, "functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS") + + case (_: NumericType, DateType) => + suggestionOnConversionFunctions(from, to, "function DATE_FROM_UNIX_DATE") + + case (DateType, _: NumericType) => + suggestionOnConversionFunctions(from, to, "function UNIX_DATE") + // scalastyle:off line.size.limit case (_: ArrayType, StringType) => s""" | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 35db25ec9342c..e46599dc19a8b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -850,18 +850,26 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") { import DataTypeTestUtils.numericTypes checkInvalidCastFromNumericType(TimestampType) + var errorMsg = + "you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead" + verifyCastFailure(cast(Literal(0L), TimestampType), Some(errorMsg)) + val timestampLiteral = Literal(1L, TimestampType) + errorMsg = "you can use functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS instead." numericTypes.foreach { numericType => - verifyCastFailure(cast(timestampLiteral, numericType)) + verifyCastFailure(cast(timestampLiteral, numericType), Some(errorMsg)) } } test("ANSI mode: disallow type conversions between Numeric types and Date type") { import DataTypeTestUtils.numericTypes checkInvalidCastFromNumericType(DateType) + var errorMsg = "you can use function DATE_FROM_UNIX_DATE instead" + verifyCastFailure(cast(Literal(0L), DateType), Some(errorMsg)) val dateLiteral = Literal(1, DateType) + errorMsg = "you can use function UNIX_DATE instead" numericTypes.foreach { numericType => - verifyCastFailure(cast(dateLiteral, numericType)) + verifyCastFailure(cast(dateLiteral, numericType), Some(errorMsg)) } } From 94c144bdd05d6c751dcd907161e1b965e637f69c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 4 Dec 2020 16:26:07 +0900 Subject: [PATCH 0662/1009] [SPARK-33571][SQL][DOCS] Add a ref to INT96 config from the doc for `spark.sql.legacy.parquet.datetimeRebaseModeInWrite/Read` ### What changes were proposed in this pull request? For the SQL configs `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` and `spark.sql.legacy.parquet.datetimeRebaseModeInRead`, improve their descriptions by: 1. Explicitly document on which parquet types, those configs influence on 2. Refer to corresponding configs for `INT96` ### Why are the changes needed? To avoid user confusions like reposted in SPARK-33571, and make the config description more precise. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `./dev/scalastyle`. Closes #30596 from MaxGekk/clarify-rebase-docs. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../apache/spark/sql/internal/SQLConf.scala | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 496065f85fbbf..4442581b77811 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2746,20 +2746,6 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_PARQUET_REBASE_MODE_IN_WRITE = - buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInWrite") - .internal() - .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " + - "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " + - "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " + - "When EXCEPTION, which is the default, Spark will fail the writing if it sees " + - "ancient dates/timestamps that are ambiguous between the two calendars.") - .version("3.0.0") - .stringConf - .transform(_.toUpperCase(Locale.ROOT)) - .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) - .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) - val LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE = buildConf("spark.sql.legacy.parquet.int96RebaseModeInWrite") .internal() @@ -2774,15 +2760,17 @@ object SQLConf { .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) - val LEGACY_PARQUET_REBASE_MODE_IN_READ = - buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead") + val LEGACY_PARQUET_REBASE_MODE_IN_WRITE = + buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInWrite") .internal() - .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " + - "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " + - "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " + - "When EXCEPTION, which is the default, Spark will fail the reading if it sees " + - "ancient dates/timestamps that are ambiguous between the two calendars. This config is " + - "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown.") + .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " + + "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " + + "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " + + "When EXCEPTION, which is the default, Spark will fail the writing if it sees " + + "ancient dates/timestamps that are ambiguous between the two calendars. " + + "This config influences on writes of the following parquet logical types: DATE, " + + "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " + + s"${LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key}.") .version("3.0.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) @@ -2804,6 +2792,24 @@ object SQLConf { .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) + val LEGACY_PARQUET_REBASE_MODE_IN_READ = + buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead") + .internal() + .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " + + "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " + + "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " + + "When EXCEPTION, which is the default, Spark will fail the reading if it sees " + + "ancient dates/timestamps that are ambiguous between the two calendars. This config is " + + "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown. " + + "This config influences on reads of the following parquet logical types: DATE, " + + "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " + + s"${LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key}.") + .version("3.0.0") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) + val LEGACY_AVRO_REBASE_MODE_IN_WRITE = buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite") .internal() From 325abf7957373161d2cf0921d35567235186d6eb Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Fri, 4 Dec 2020 16:45:55 +0900 Subject: [PATCH 0663/1009] [SPARK-33577][SS] Add support for V1Table in stream writer table API and create table if not exist by default ### What changes were proposed in this pull request? After SPARK-32896, we have table API for stream writer but only support DataSource v2 tables. Here we add the following enhancements: - Create non-existing tables by default - Support both managed and external V1Tables ### Why are the changes needed? Make the API covers more use cases. Especially for the file provider based tables. ### Does this PR introduce _any_ user-facing change? Yes, new features added. ### How was this patch tested? Add new UTs. Closes #30521 from xuanyuanking/SPARK-33577. Authored-by: Yuanjian Li Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../sql/streaming/DataStreamWriter.scala | 101 ++++++++---- .../test/DataStreamTableAPISuite.scala | 151 ++++++++++++++---- 2 files changed, 188 insertions(+), 64 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 9e3599712fde5..01e626e5436a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -22,12 +22,16 @@ import java.util.concurrent.TimeoutException import scala.collection.JavaConverters._ +import org.apache.hadoop.fs.Path + import org.apache.spark.annotation.Evolving import org.apache.spark.api.java.function.VoidFunction2 import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.plans.logical.CreateTableStatement import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider} +import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider, V1Table, V2TableWithV1Fallback} import org.apache.spark.sql.connector.catalog.TableCapability._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.DataSource @@ -298,52 +302,85 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { /** * Starts the execution of the streaming query, which will continually output results to the given - * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with - * the stream. + * table as new data arrives. A new table will be created if the table not exists. The returned + * [[StreamingQuery]] object can be used to interact with the stream. * * @since 3.1.0 */ @throws[TimeoutException] def toTable(tableName: String): StreamingQuery = { - this.source = SOURCE_NAME_TABLE this.tableName = tableName - startInternal(None) - } - private def startInternal(path: Option[String]): StreamingQuery = { - if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { - throw new AnalysisException("Hive data source can only be used with tables, you can not " + - "write files of Hive data source directly.") - } + import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier - if (source == SOURCE_NAME_TABLE) { - assertNotPartitioned(SOURCE_NAME_TABLE) + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser + .parseMultipartIdentifier(tableName) + val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier - import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier + // Currently we don't create a logical streaming writer node in logical plan, so cannot rely + // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message. + // TODO (SPARK-27484): we should add the writing node before the plan is analyzed. + if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) { + throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write") + } + if (!catalog.asTableCatalog.tableExists(identifier)) { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser - .parseMultipartIdentifier(tableName) - val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier - - // Currently we don't create a logical streaming writer node in logical plan, so cannot rely - // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message. - // TODO (SPARK-27484): we should add the writing node before the plan is analyzed. - if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) { - throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write") - } + /** + * Note, currently the new table creation by this API doesn't fully cover the V2 table. + * TODO (SPARK-33638): Full support of v2 table creation + */ + val cmd = CreateTableStatement( + originalMultipartIdentifier, + df.schema.asNullable, + partitioningColumns.getOrElse(Nil).asTransforms.toSeq, + None, + Map.empty[String, String], + Some(source), + Map.empty[String, String], + extraOptions.get("path"), + None, + None, + external = false, + ifNotExists = false) + Dataset.ofRows(df.sparkSession, cmd) + } - val tableInstance = catalog.asTableCatalog.loadTable(identifier) + val tableInstance = catalog.asTableCatalog.loadTable(identifier) - import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ - val sink = tableInstance match { - case t: SupportsWrite if t.supports(STREAMING_WRITE) => t - case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " + - s"write - $t") + def writeToV1Table(table: CatalogTable): StreamingQuery = { + if (table.tableType == CatalogTableType.VIEW) { + throw new AnalysisException(s"Streaming into views $tableName is not supported.") + } + require(table.provider.isDefined) + if (source != table.provider.get) { + throw new AnalysisException(s"The input source($source) is different from the table " + + s"$tableName's data source provider(${table.provider.get}).") } + format(table.provider.get) + .option("path", new Path(table.location).toString).start() + } - startQuery(sink, extraOptions) - } else if (source == SOURCE_NAME_MEMORY) { + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ + tableInstance match { + case t: SupportsWrite if t.supports(STREAMING_WRITE) => startQuery(t, extraOptions) + case t: V2TableWithV1Fallback => + writeToV1Table(t.v1Table) + case t: V1Table => + writeToV1Table(t.v1Table) + case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " + + s"write - $t") + } + } + + private def startInternal(path: Option[String]): StreamingQuery = { + if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { + throw new AnalysisException("Hive data source can only be used with tables, you can not " + + "write files of Hive data source directly.") + } + + if (source == SOURCE_NAME_MEMORY) { assertNotPartitioned(SOURCE_NAME_MEMORY) if (extraOptions.get("queryName").isEmpty) { throw new AnalysisException("queryName must be specified for memory sink") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala index bf850432d5c0e..0296366f3578b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -26,7 +26,7 @@ import org.scalatest.BeforeAndAfter import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog, InMemoryTableSessionCatalog} @@ -39,6 +39,7 @@ import org.apache.spark.sql.streaming.StreamTest import org.apache.spark.sql.streaming.sources.FakeScanBuilder import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.util.Utils class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { import testImplicits._ @@ -175,21 +176,24 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { test("write: write to table with custom catalog & no namespace") { val tableIdentifier = "testcat.table_name" - spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") - checkAnswer(spark.table(tableIdentifier), Seq.empty) + withTable(tableIdentifier) { + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) - runTestWithStreamAppend(tableIdentifier) + runTestWithStreamAppend(tableIdentifier) + } } test("write: write to table with custom catalog & namespace") { spark.sql("CREATE NAMESPACE testcat.ns") - val tableIdentifier = "testcat.ns.table_name" - spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") - checkAnswer(spark.table(tableIdentifier), Seq.empty) + withTable(tableIdentifier) { + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") + checkAnswer(spark.table(tableIdentifier), Seq.empty) - runTestWithStreamAppend(tableIdentifier) + runTestWithStreamAppend(tableIdentifier) + } } test("write: write to table with default session catalog") { @@ -200,35 +204,19 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { spark.sql("CREATE NAMESPACE ns") val tableIdentifier = "ns.table_name" - spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source") - checkAnswer(spark.table(tableIdentifier), Seq.empty) + withTable(tableIdentifier) { + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source") + checkAnswer(spark.table(tableIdentifier), Seq.empty) - runTestWithStreamAppend(tableIdentifier) + runTestWithStreamAppend(tableIdentifier) + } } test("write: write to non-exist table with custom catalog") { val tableIdentifier = "testcat.nonexisttable" - spark.sql("CREATE NAMESPACE testcat.ns") - - withTempDir { checkpointDir => - val exc = intercept[NoSuchTableException] { - runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty) - } - assert(exc.getMessage.contains("nonexisttable")) - } - } - - test("write: write to file provider based table isn't allowed yet") { - val tableIdentifier = "table_name" - - spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING parquet") - checkAnswer(spark.table(tableIdentifier), Seq.empty) - withTempDir { checkpointDir => - val exc = intercept[AnalysisException] { - runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty) - } - assert(exc.getMessage.contains("doesn't support streaming write")) + withTable(tableIdentifier) { + runTestWithStreamAppend(tableIdentifier) } } @@ -262,8 +250,107 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { val exc = intercept[AnalysisException] { runStreamQueryAppendMode(viewIdentifier, checkpointDir, Seq.empty, Seq.empty) } - assert(exc.getMessage.contains("doesn't support streaming write")) + assert(exc.getMessage.contains(s"Streaming into views $viewIdentifier is not supported")) + } + } + + test("write: write to an external table") { + withTempDir { dir => + val tableName = "stream_test" + withTable(tableName) { + checkForStreamTable(Some(dir), tableName) + } + } + } + + test("write: write to a managed table") { + val tableName = "stream_test" + withTable(tableName) { + checkForStreamTable(None, tableName) + } + } + + test("write: write to an external table with existing path") { + withTempDir { dir => + val tableName = "stream_test" + withTable(tableName) { + // The file written by batch will not be seen after the table was written by a streaming + // query. This is because we loads files from the metadata log instead of listing them + // using HDFS API. + Seq(4, 5, 6).toDF("value").write.format("parquet") + .option("path", dir.getCanonicalPath).saveAsTable(tableName) + + checkForStreamTable(Some(dir), tableName) + } + } + } + + test("write: write to a managed table with existing path") { + val tableName = "stream_test" + withTable(tableName) { + // The file written by batch will not be seen after the table was written by a streaming + // query. This is because we loads files from the metadata log instead of listing them + // using HDFS API. + Seq(4, 5, 6).toDF("value").write.format("parquet").saveAsTable(tableName) + + checkForStreamTable(None, tableName) + } + } + + test("write: write to an external path and create table") { + withTempDir { dir => + val tableName = "stream_test" + withTable(tableName) { + // The file written by batch will not be seen after the table was written by a streaming + // query. This is because we loads files from the metadata log instead of listing them + // using HDFS API. + Seq(4, 5, 6).toDF("value").write + .mode("append").format("parquet").save(dir.getCanonicalPath) + + checkForStreamTable(Some(dir), tableName) + } + } + } + + test("write: write to table with different format shouldn't be allowed") { + val tableName = "stream_test" + + spark.sql(s"CREATE TABLE $tableName (id bigint, data string) USING json") + checkAnswer(spark.table(tableName), Seq.empty) + + withTempDir { checkpointDir => + val exc = intercept[AnalysisException] { + runStreamQueryAppendMode(tableName, checkpointDir, Seq.empty, Seq.empty) + } + assert(exc.getMessage.contains("The input source(parquet) is different from the table " + + s"$tableName's data source provider(json)")) + } + } + + private def checkForStreamTable(dir: Option[File], tableName: String): Unit = { + val memory = MemoryStream[Int] + val dsw = memory.toDS().writeStream.format("parquet") + dir.foreach { output => + dsw.option("path", output.getCanonicalPath) + } + val sq = dsw + .option("checkpointLocation", Utils.createTempDir().getCanonicalPath) + .toTable(tableName) + memory.addData(1, 2, 3) + sq.processAllAvailable() + + checkDataset( + spark.table(tableName).as[Int], + 1, 2, 3) + val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)) + val path = if (dir.nonEmpty) { + dir.get + } else { + new File(catalogTable.location) } + checkDataset( + spark.read.format("parquet").load(path.getCanonicalPath).as[Int], + 1, 2, 3) } private def runTestWithStreamAppend(tableIdentifier: String) = { From 91baab77f7e0a5102ac069846f0e2920bb2dd15a Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 3 Dec 2020 23:47:43 -0800 Subject: [PATCH 0664/1009] [SPARK-33656][TESTS] Add option to keep container after tests finish for DockerJDBCIntegrationSuites for debug ### What changes were proposed in this pull request? This PR add an option to keep container after DockerJDBCIntegrationSuites (e.g. DB2IntegrationSuite, PostgresIntegrationSuite) finish. By setting a system property `spark.test.docker.keepContainer` to `true`, we can use this option. ### Why are the changes needed? If some error occur during the tests, it would be useful to keep the container for debug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed that the container is kept after the test by the following commands. ``` # With sbt $ build/sbt -Dspark.test.docker.keepContainer=true -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite" # With Maven $ build/mvn -Dspark.test.docker.keepContainer=true -Pdocker-integration-tests -Phive -Phive-thriftserver -Dtest=none -DwildcardSuites=org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite test $ docker container ls ``` I also confirmed that there are no regression for all the subclasses of `DockerJDBCIntegrationSuite` with sbt/Maven. * MariaDBKrbIntegrationSuite * DB2KrbIntegrationSuite * PostgresKrbIntegrationSuite * MySQLIntegrationSuite * PostgresIntegrationSuite * DB2IntegrationSuite * MsSqlServerintegrationsuite * OracleIntegrationSuite * v2.MySQLIntegrationSuite * v2.PostgresIntegrationSuite * v2.DB2IntegrationSuite * v2.MsSqlServerIntegrationSuite * v2.OracleIntegrationSuite NOTE: `DB2IntegrationSuite`, `v2.DB2IntegrationSuite` and `DB2KrbIntegrationSuite` can fail due to the too much short connection timeout. It's a separate issue and I'll fix it in #30583 Closes #30601 from sarutak/keepContainer. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 39 ++++++++++++------- pom.xml | 2 + 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala index 00b7b413a964d..d6270313cabea 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import scala.util.control.NonFatal import com.spotify.docker.client._ +import com.spotify.docker.client.DockerClient.ListContainersParam import com.spotify.docker.client.exceptions.ImageNotFoundException import com.spotify.docker.client.messages.{ContainerConfig, HostConfig, PortBinding} import org.scalatest.concurrent.Eventually @@ -95,7 +96,9 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu protected val dockerIp = DockerUtils.getDockerIp() val db: DatabaseOnDocker - val connectionTimeout = timeout(2.minutes) + val connectionTimeout = timeout(5.minutes) + val keepContainer = + sys.props.getOrElse("spark.test.docker.keepContainer", "false").toBoolean private var docker: DockerClient = _ // Configure networking (necessary for boot2docker / Docker Machine) @@ -176,20 +179,11 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu override def afterAll(): Unit = { try { + cleanupContainer() + } finally { if (docker != null) { - try { - if (containerId != null) { - docker.killContainer(containerId) - docker.removeContainer(containerId) - } - } catch { - case NonFatal(e) => - logWarning(s"Could not stop container $containerId", e) - } finally { - docker.close() - } + docker.close() } - } finally { super.afterAll() } } @@ -205,4 +199,23 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu * Prepare databases and tables for testing. */ def dataPreparation(connection: Connection): Unit + + private def cleanupContainer(): Unit = { + if (docker != null && containerId != null && !keepContainer) { + try { + docker.killContainer(containerId) + } catch { + case NonFatal(e) => + val exitContainerIds = + docker.listContainers(ListContainersParam.withStatusExited()).asScala.map(_.id()) + if (exitContainerIds.contains(containerId)) { + logWarning(s"Container $containerId already stopped") + } else { + logWarning(s"Could not stop container $containerId", e) + } + } finally { + docker.removeContainer(containerId) + } + } + } } diff --git a/pom.xml b/pom.xml index 4d6e3bbc95378..80097aec0f429 100644 --- a/pom.xml +++ b/pom.xml @@ -250,6 +250,7 @@ --> ${session.executionRootDirectory} + false 1g @@ -2626,6 +2627,7 @@ false true ${spark.test.webdriver.chrome.driver} + ${spark.test.docker.keepContainer} __not_used__ From 976e8970399a1a0fef4c826d4fdd1a138ca52c77 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 4 Dec 2020 00:12:04 -0800 Subject: [PATCH 0665/1009] [SPARK-33640][TESTS] Extend connection timeout to DB server for DB2IntegrationSuite and its variants ### What changes were proposed in this pull request? This PR extends the connection timeout to the DB server for DB2IntegrationSuite and its variants. The container image ibmcom/db2 creates a database when it starts up. The database creation can take over 2 minutes. DB2IntegrationSuite and its variants use the container image but the connection timeout is set to 2 minutes so these suites almost always fail. ### Why are the changes needed? To pass those suites. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed the suites pass with the following commands. ``` $ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite" $ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.v2.DB2IntegrationSuite" $ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.DB2KrbIntegrationSuite" Closes #30583 from sarutak/extend-timeout-for-db2. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala | 4 ++++ .../org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala | 3 +++ .../org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index d086c8cdcc589..49ca91c50d25e 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -21,6 +21,8 @@ import java.math.BigDecimal import java.sql.{Connection, Date, Timestamp} import java.util.Properties +import org.scalatest.time.SpanSugar._ + import org.apache.spark.sql.Row import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType} import org.apache.spark.tags.DockerTest @@ -51,6 +53,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore } + override val connectionTimeout = timeout(3.minutes) + override def dataPreparation(conn: Connection): Unit = { conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y VARCHAR(8))").executeUpdate() conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate() diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala index 9c3a609b98bbe..5cbe6fab186a5 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala @@ -24,6 +24,7 @@ import javax.security.auth.login.Configuration import com.spotify.docker.client.messages.{ContainerConfig, HostConfig} import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation} import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS +import org.scalatest.time.SpanSugar._ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2ConnectionProvider, SecureConnectionProvider} @@ -76,6 +77,8 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite { } } + override val connectionTimeout = timeout(3.minutes) + override protected def setAuthentication(keytabFile: String, principal: String): Unit = { val config = new SecureConnectionProvider.JDBCConfiguration( Configuration.getConfiguration, "JaasClient", keytabFile, principal) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 6f803b8f61dd4..8cabf353c6fef 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.jdbc.v2 import java.sql.Connection +import org.scalatest.time.SpanSugar._ + import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog @@ -52,6 +54,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest { s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore } + override val connectionTimeout = timeout(3.minutes) + override def sparkConf: SparkConf = super.sparkConf .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName) .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort)) From 233a8494c8cc7bc8a4a9393ec512943749f11bef Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Fri, 4 Dec 2020 19:33:11 +0900 Subject: [PATCH 0666/1009] [SPARK-27237][SS] Introduce State schema validation among query restart ## What changes were proposed in this pull request? Please refer the description of [SPARK-27237](https://issues.apache.org/jira/browse/SPARK-27237) to see rationalization of this patch. This patch proposes to introduce state schema validation, via storing key schema and value schema to `schema` file (for the first time) and verify new key schema and value schema for state are compatible with existing one. To be clear for definition of "compatible", state schema is "compatible" when number of fields are same and data type for each field is same - Spark has been allowing rename of field. This patch will prevent query run which has incompatible state schema, which would reduce the chance to get indeterministic behavior (actually renaming of field is also the smell of semantically incompatible, but end users could just modify its name so we can't say) as well as providing more informative error message. ## How was this patch tested? Added UTs. Closes #24173 from HeartSaVioR/SPARK-27237. Lead-authored-by: Jungtaek Lim (HeartSaVioR) Co-authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: HyukjinKwon --- .../apache/spark/sql/internal/SQLConf.scala | 10 + .../org/apache/spark/sql/types/DataType.scala | 38 ++- .../execution/streaming/HDFSMetadataLog.scala | 32 +-- .../streaming/MetadataVersionUtil.scala | 51 ++++ .../StateSchemaCompatibilityChecker.scala | 118 +++++++++ .../streaming/state/StateStore.scala | 36 ++- .../streaming/state/StateStoreConf.scala | 3 + ...StateSchemaCompatibilityCheckerSuite.scala | 230 ++++++++++++++++++ .../streaming/StreamingAggregationSuite.scala | 87 ++++++- ...ngStateStoreFormatCompatibilitySuite.scala | 21 +- 10 files changed, 582 insertions(+), 44 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 4442581b77811..025478214e492 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1294,6 +1294,14 @@ object SQLConf { .createWithDefault( "org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider") + val STATE_SCHEMA_CHECK_ENABLED = + buildConf("spark.sql.streaming.stateStore.stateSchemaCheck") + .doc("When true, Spark will validate the state schema against schema on existing state and " + + "fail query if it's incompatible.") + .version("3.1.0") + .booleanConf + .createWithDefault(true) + val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT = buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot") .internal() @@ -3079,6 +3087,8 @@ class SQLConf extends Serializable with Logging { def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS) + def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED) + def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT) def stateStoreFormatValidationEnabled: Boolean = getConf(STATE_STORE_FORMAT_VALIDATION_ENABLED) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index e4ee6eb377a4d..9e820f0796a96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -307,21 +307,49 @@ object DataType { * of `fromField.nullable` and `toField.nullable` are false. */ private[sql] def equalsIgnoreCompatibleNullability(from: DataType, to: DataType): Boolean = { + equalsIgnoreCompatibleNullability(from, to, ignoreName = false) + } + + /** + * Compares two types, ignoring compatible nullability of ArrayType, MapType, StructType, and + * also the field name. It compares based on the position. + * + * Compatible nullability is defined as follows: + * - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to` + * if and only if `to.containsNull` is true, or both of `from.containsNull` and + * `to.containsNull` are false. + * - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to` + * if and only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and + * `to.valueContainsNull` are false. + * - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to` + * if and only if for all every pair of fields, `to.nullable` is true, or both + * of `fromField.nullable` and `toField.nullable` are false. + */ + private[sql] def equalsIgnoreNameAndCompatibleNullability( + from: DataType, + to: DataType): Boolean = { + equalsIgnoreCompatibleNullability(from, to, ignoreName = true) + } + + private def equalsIgnoreCompatibleNullability( + from: DataType, + to: DataType, + ignoreName: Boolean = false): Boolean = { (from, to) match { case (ArrayType(fromElement, fn), ArrayType(toElement, tn)) => - (tn || !fn) && equalsIgnoreCompatibleNullability(fromElement, toElement) + (tn || !fn) && equalsIgnoreCompatibleNullability(fromElement, toElement, ignoreName) case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => (tn || !fn) && - equalsIgnoreCompatibleNullability(fromKey, toKey) && - equalsIgnoreCompatibleNullability(fromValue, toValue) + equalsIgnoreCompatibleNullability(fromKey, toKey, ignoreName) && + equalsIgnoreCompatibleNullability(fromValue, toValue, ignoreName) case (StructType(fromFields), StructType(toFields)) => fromFields.length == toFields.length && fromFields.zip(toFields).forall { case (fromField, toField) => - fromField.name == toField.name && + (ignoreName || fromField.name == toField.name) && (toField.nullable || !fromField.nullable) && - equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType) + equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType, ignoreName) } case (fromDataType, toDataType) => fromDataType == toDataType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala index 893639a86c88c..b87a5b49eb6ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala @@ -267,36 +267,8 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path: } } - /** - * Parse the log version from the given `text` -- will throw exception when the parsed version - * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1", - * "v123xyz" etc.) - */ - private[sql] def validateVersion(text: String, maxSupportedVersion: Int): Int = { - if (text.length > 0 && text(0) == 'v') { - val version = - try { - text.substring(1, text.length).toInt - } catch { - case _: NumberFormatException => - throw new IllegalStateException(s"Log file was malformed: failed to read correct log " + - s"version from $text.") - } - if (version > 0) { - if (version > maxSupportedVersion) { - throw new IllegalStateException(s"UnsupportedLogVersion: maximum supported log version " + - s"is v${maxSupportedVersion}, but encountered v$version. The log file was produced " + - s"by a newer version of Spark and cannot be read by this version. Please upgrade.") - } else { - return version - } - } - } - - // reaching here means we failed to read the correct log version - throw new IllegalStateException(s"Log file was malformed: failed to read correct log " + - s"version from $text.") - } + private[sql] def validateVersion(text: String, maxSupportedVersion: Int): Int = + MetadataVersionUtil.validateVersion(text, maxSupportedVersion) } object HDFSMetadataLog { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala new file mode 100644 index 0000000000000..548f2aa5d5c5b --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming + +object MetadataVersionUtil { + /** + * Parse the log version from the given `text` -- will throw exception when the parsed version + * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1", + * "v123xyz" etc.) + */ + def validateVersion(text: String, maxSupportedVersion: Int): Int = { + if (text.length > 0 && text(0) == 'v') { + val version = + try { + text.substring(1, text.length).toInt + } catch { + case _: NumberFormatException => + throw new IllegalStateException(s"Log file was malformed: failed to read correct log " + + s"version from $text.") + } + if (version > 0) { + if (version > maxSupportedVersion) { + throw new IllegalStateException(s"UnsupportedLogVersion: maximum supported log version " + + s"is v${maxSupportedVersion}, but encountered v$version. The log file was produced " + + s"by a newer version of Spark and cannot be read by this version. Please upgrade.") + } else { + return version + } + } + } + + // reaching here means we failed to read the correct log version + throw new IllegalStateException(s"Log file was malformed: failed to read correct log " + + s"version from $text.") + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala new file mode 100644 index 0000000000000..4ac12c089c0d3 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.state + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MetadataVersionUtil} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, StructType} + +case class StateSchemaNotCompatible(message: String) extends Exception(message) + +class StateSchemaCompatibilityChecker( + providerId: StateStoreProviderId, + hadoopConf: Configuration) extends Logging { + + private val storeCpLocation = providerId.storeId.storeCheckpointLocation() + private val fm = CheckpointFileManager.create(storeCpLocation, hadoopConf) + private val schemaFileLocation = schemaFile(storeCpLocation) + + fm.mkdirs(schemaFileLocation.getParent) + + def check(keySchema: StructType, valueSchema: StructType): Unit = { + if (fm.exists(schemaFileLocation)) { + logDebug(s"Schema file for provider $providerId exists. Comparing with provided schema.") + val (storedKeySchema, storedValueSchema) = readSchemaFile() + if (storedKeySchema.equals(keySchema) && storedValueSchema.equals(valueSchema)) { + // schema is exactly same + } else if (!schemasCompatible(storedKeySchema, keySchema) || + !schemasCompatible(storedValueSchema, valueSchema)) { + val errorMsg = "Provided schema doesn't match to the schema for existing state! " + + "Please note that Spark allow difference of field name: check count of fields " + + "and data type of each field.\n" + + s"- Provided key schema: $keySchema\n" + + s"- Provided value schema: $valueSchema\n" + + s"- Existing key schema: $storedKeySchema\n" + + s"- Existing value schema: $storedValueSchema\n" + + s"If you want to force running query without schema validation, please set " + + s"${SQLConf.STATE_SCHEMA_CHECK_ENABLED.key} to false.\n" + + "Please note running query with incompatible schema could cause indeterministic" + + " behavior." + logError(errorMsg) + throw StateSchemaNotCompatible(errorMsg) + } else { + logInfo("Detected schema change which is compatible. Allowing to put rows.") + } + } else { + // schema doesn't exist, create one now + logDebug(s"Schema file for provider $providerId doesn't exist. Creating one.") + createSchemaFile(keySchema, valueSchema) + } + } + + private def schemasCompatible(storedSchema: StructType, schema: StructType): Boolean = + DataType.equalsIgnoreNameAndCompatibleNullability(storedSchema, schema) + + private def readSchemaFile(): (StructType, StructType) = { + val inStream = fm.open(schemaFileLocation) + try { + val versionStr = inStream.readUTF() + // Currently we only support version 1, which we can simplify the version validation and + // the parse logic. + val version = MetadataVersionUtil.validateVersion(versionStr, + StateSchemaCompatibilityChecker.VERSION) + require(version == 1) + + val keySchemaStr = inStream.readUTF() + val valueSchemaStr = inStream.readUTF() + + (StructType.fromString(keySchemaStr), StructType.fromString(valueSchemaStr)) + } catch { + case e: Throwable => + logError(s"Fail to read schema file from $schemaFileLocation", e) + throw e + } finally { + inStream.close() + } + } + + private def createSchemaFile(keySchema: StructType, valueSchema: StructType): Unit = { + val outStream = fm.createAtomic(schemaFileLocation, overwriteIfPossible = false) + try { + outStream.writeUTF(s"v${StateSchemaCompatibilityChecker.VERSION}") + outStream.writeUTF(keySchema.json) + outStream.writeUTF(valueSchema.json) + outStream.close() + } catch { + case e: Throwable => + logError(s"Fail to write schema file to $schemaFileLocation", e) + outStream.cancel() + throw e + } + } + + private def schemaFile(storeCpLocation: Path): Path = + new Path(new Path(storeCpLocation, "_metadata"), "schema") +} + +object StateSchemaCompatibilityChecker { + val VERSION = 1 +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index 05bcee7b05c6f..ab67c19783ff7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -22,6 +22,7 @@ import java.util.concurrent.{ScheduledFuture, TimeUnit} import javax.annotation.concurrent.GuardedBy import scala.collection.mutable +import scala.util.Try import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration @@ -280,14 +281,14 @@ object StateStoreProvider { * Return a instance of the required provider, initialized with the given configurations. */ def createAndInit( - stateStoreId: StateStoreId, + providerId: StateStoreProviderId, keySchema: StructType, valueSchema: StructType, indexOrdinal: Option[Int], // for sorting the data storeConf: StateStoreConf, hadoopConf: Configuration): StateStoreProvider = { val provider = create(storeConf.providerClass) - provider.init(stateStoreId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf) + provider.init(providerId.storeId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf) provider } @@ -386,10 +387,14 @@ object StateStore extends Logging { val MAINTENANCE_INTERVAL_CONFIG = "spark.sql.streaming.stateStore.maintenanceInterval" val MAINTENANCE_INTERVAL_DEFAULT_SECS = 60 + val PARTITION_ID_TO_CHECK_SCHEMA = 0 @GuardedBy("loadedProviders") private val loadedProviders = new mutable.HashMap[StateStoreProviderId, StateStoreProvider]() + @GuardedBy("loadedProviders") + private val schemaValidated = new mutable.HashMap[StateStoreProviderId, Option[Throwable]]() + /** * Runs the `task` periodically and automatically cancels it if there is an exception. `onError` * will be called when an exception happens. @@ -467,10 +472,29 @@ object StateStore extends Logging { hadoopConf: Configuration): StateStoreProvider = { loadedProviders.synchronized { startMaintenanceIfNeeded() + + if (storeProviderId.storeId.partitionId == PARTITION_ID_TO_CHECK_SCHEMA) { + val result = schemaValidated.getOrElseUpdate(storeProviderId, { + val checker = new StateSchemaCompatibilityChecker(storeProviderId, hadoopConf) + // regardless of configuration, we check compatibility to at least write schema file + // if necessary + val ret = Try(checker.check(keySchema, valueSchema)).toEither.fold(Some(_), _ => None) + if (storeConf.stateSchemaCheckEnabled) { + ret + } else { + None + } + }) + + if (result.isDefined) { + throw result.get + } + } + val provider = loadedProviders.getOrElseUpdate( storeProviderId, StateStoreProvider.createAndInit( - storeProviderId.storeId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf) + storeProviderId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf) ) reportActiveStoreInstance(storeProviderId) provider @@ -482,6 +506,12 @@ object StateStore extends Logging { loadedProviders.remove(storeProviderId).foreach(_.close()) } + /** Unload all state store providers: unit test purpose */ + private[sql] def unloadAll(): Unit = loadedProviders.synchronized { + loadedProviders.keySet.foreach { key => unload(key) } + loadedProviders.clear() + } + /** Whether a state store provider is loaded or not */ def isLoaded(storeProviderId: StateStoreProviderId): Boolean = loadedProviders.synchronized { loadedProviders.contains(storeProviderId) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala index 11043bc81ae3f..23cb3be32c85a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala @@ -55,6 +55,9 @@ class StateStoreConf( /** The compression codec used to compress delta and snapshot files. */ val compressionCodec: String = sqlConf.stateStoreCompressionCodec + /** whether to validate state schema during query run. */ + val stateSchemaCheckEnabled = sqlConf.isStateSchemaCheckEnabled + /** * Additional configurations related to state store. This will capture all configs in * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala new file mode 100644 index 0000000000000..4eb7603b316aa --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming.state + +import java.util.UUID + +import scala.util.Random + +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.sql.execution.streaming.state.StateStoreTestsHelper.newDir +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession { + + private val hadoopConf: Configuration = new Configuration() + private val opId = Random.nextInt(100000) + private val partitionId = StateStore.PARTITION_ID_TO_CHECK_SCHEMA + + private val structSchema = new StructType() + .add(StructField("nested1", IntegerType, nullable = true)) + .add(StructField("nested2", StringType, nullable = true)) + + private val keySchema = new StructType() + .add(StructField("key1", IntegerType, nullable = true)) + .add(StructField("key2", StringType, nullable = true)) + .add(StructField("key3", structSchema, nullable = true)) + + private val valueSchema = new StructType() + .add(StructField("value1", IntegerType, nullable = true)) + .add(StructField("value2", StringType, nullable = true)) + .add(StructField("value3", structSchema, nullable = true)) + + test("adding field to key should fail") { + val fieldAddedKeySchema = keySchema.add(StructField("newKey", IntegerType)) + verifyException(keySchema, valueSchema, fieldAddedKeySchema, valueSchema) + } + + test("adding field to value should fail") { + val fieldAddedValueSchema = valueSchema.add(StructField("newValue", IntegerType)) + verifyException(keySchema, valueSchema, keySchema, fieldAddedValueSchema) + } + + test("adding nested field in key should fail") { + val fieldAddedNestedSchema = structSchema.add(StructField("newNested", IntegerType)) + val newKeySchema = applyNewSchemaToNestedFieldInKey(fieldAddedNestedSchema) + verifyException(keySchema, valueSchema, newKeySchema, valueSchema) + } + + test("adding nested field in value should fail") { + val fieldAddedNestedSchema = structSchema.add(StructField("newNested", IntegerType)) + val newValueSchema = applyNewSchemaToNestedFieldInValue(fieldAddedNestedSchema) + verifyException(keySchema, valueSchema, keySchema, newValueSchema) + } + + test("removing field from key should fail") { + val fieldRemovedKeySchema = StructType(keySchema.dropRight(1)) + verifyException(keySchema, valueSchema, fieldRemovedKeySchema, valueSchema) + } + + test("removing field from value should fail") { + val fieldRemovedValueSchema = StructType(valueSchema.drop(1)) + verifyException(keySchema, valueSchema, keySchema, fieldRemovedValueSchema) + } + + test("removing nested field from key should fail") { + val fieldRemovedNestedSchema = StructType(structSchema.dropRight(1)) + val newKeySchema = applyNewSchemaToNestedFieldInKey(fieldRemovedNestedSchema) + verifyException(keySchema, valueSchema, newKeySchema, valueSchema) + } + + test("removing nested field from value should fail") { + val fieldRemovedNestedSchema = StructType(structSchema.drop(1)) + val newValueSchema = applyNewSchemaToNestedFieldInValue(fieldRemovedNestedSchema) + verifyException(keySchema, valueSchema, keySchema, newValueSchema) + } + + test("changing the type of field in key should fail") { + val typeChangedKeySchema = StructType(keySchema.map(_.copy(dataType = TimestampType))) + verifyException(keySchema, valueSchema, typeChangedKeySchema, valueSchema) + } + + test("changing the type of field in value should fail") { + val typeChangedValueSchema = StructType(valueSchema.map(_.copy(dataType = TimestampType))) + verifyException(keySchema, valueSchema, keySchema, typeChangedValueSchema) + } + + test("changing the type of nested field in key should fail") { + val typeChangedNestedSchema = StructType(structSchema.map(_.copy(dataType = TimestampType))) + val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema) + verifyException(keySchema, valueSchema, newKeySchema, valueSchema) + } + + test("changing the type of nested field in value should fail") { + val typeChangedNestedSchema = StructType(structSchema.map(_.copy(dataType = TimestampType))) + val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema) + verifyException(keySchema, valueSchema, keySchema, newValueSchema) + } + + test("changing the nullability of nullable to non-nullable in key should fail") { + val nonNullChangedKeySchema = StructType(keySchema.map(_.copy(nullable = false))) + verifyException(keySchema, valueSchema, nonNullChangedKeySchema, valueSchema) + } + + test("changing the nullability of nullable to non-nullable in value should fail") { + val nonNullChangedValueSchema = StructType(valueSchema.map(_.copy(nullable = false))) + verifyException(keySchema, valueSchema, keySchema, nonNullChangedValueSchema) + } + + test("changing the nullability of nullable to nonnullable in nested field in key should fail") { + val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false))) + val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema) + verifyException(keySchema, valueSchema, newKeySchema, valueSchema) + } + + test("changing the nullability of nullable to nonnullable in nested field in value should fail") { + val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false))) + val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema) + verifyException(keySchema, valueSchema, keySchema, newValueSchema) + } + + test("changing the name of field in key should be allowed") { + val newName: StructField => StructField = f => f.copy(name = f.name + "_new") + val fieldNameChangedKeySchema = StructType(keySchema.map(newName)) + verifySuccess(keySchema, valueSchema, fieldNameChangedKeySchema, valueSchema) + } + + test("changing the name of field in value should be allowed") { + val newName: StructField => StructField = f => f.copy(name = f.name + "_new") + val fieldNameChangedValueSchema = StructType(valueSchema.map(newName)) + verifySuccess(keySchema, valueSchema, keySchema, fieldNameChangedValueSchema) + } + + test("changing the name of nested field in key should be allowed") { + val newName: StructField => StructField = f => f.copy(name = f.name + "_new") + val newNestedFieldsSchema = StructType(structSchema.map(newName)) + val fieldNameChangedKeySchema = applyNewSchemaToNestedFieldInKey(newNestedFieldsSchema) + verifySuccess(keySchema, valueSchema, fieldNameChangedKeySchema, valueSchema) + } + + test("changing the name of nested field in value should be allowed") { + val newName: StructField => StructField = f => f.copy(name = f.name + "_new") + val newNestedFieldsSchema = StructType(structSchema.map(newName)) + val fieldNameChangedValueSchema = applyNewSchemaToNestedFieldInValue(newNestedFieldsSchema) + verifySuccess(keySchema, valueSchema, keySchema, fieldNameChangedValueSchema) + } + + private def applyNewSchemaToNestedFieldInKey(newNestedSchema: StructType): StructType = { + applyNewSchemaToNestedField(keySchema, newNestedSchema, "key3") + } + + private def applyNewSchemaToNestedFieldInValue(newNestedSchema: StructType): StructType = { + applyNewSchemaToNestedField(valueSchema, newNestedSchema, "value3") + } + + private def applyNewSchemaToNestedField( + originSchema: StructType, + newNestedSchema: StructType, + fieldName: String): StructType = { + val newFields = originSchema.map { field => + if (field.name == fieldName) { + field.copy(dataType = newNestedSchema) + } else { + field + } + } + StructType(newFields) + } + + private def runSchemaChecker( + dir: String, + queryId: UUID, + newKeySchema: StructType, + newValueSchema: StructType): Unit = { + // in fact, Spark doesn't support online state schema change, so need to check + // schema only once for each running of JVM + val providerId = StateStoreProviderId( + StateStoreId(dir, opId, partitionId), queryId) + + new StateSchemaCompatibilityChecker(providerId, hadoopConf) + .check(newKeySchema, newValueSchema) + } + + private def verifyException( + oldKeySchema: StructType, + oldValueSchema: StructType, + newKeySchema: StructType, + newValueSchema: StructType): Unit = { + val dir = newDir() + val queryId = UUID.randomUUID() + runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema) + + val e = intercept[StateSchemaNotCompatible] { + runSchemaChecker(dir, queryId, newKeySchema, newValueSchema) + } + + e.getMessage.contains("Provided schema doesn't match to the schema for existing state!") + e.getMessage.contains(newKeySchema.json) + e.getMessage.contains(newValueSchema.json) + e.getMessage.contains(oldKeySchema.json) + e.getMessage.contains(oldValueSchema.json) + } + + private def verifySuccess( + oldKeySchema: StructType, + oldValueSchema: StructType, + newKeySchema: StructType, + newValueSchema: StructType): Unit = { + val dir = newDir() + val queryId = UUID.randomUUID() + runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema) + runSchemaChecker(dir, queryId, newKeySchema, newValueSchema) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala index 0524e29662014..491b0d8b2c26c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.streaming import java.io.File import java.util.{Locale, TimeZone} +import scala.annotation.tailrec + import org.apache.commons.io.FileUtils import org.scalatest.Assertions @@ -33,7 +35,7 @@ import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.execution.exchange.Exchange import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.sources.MemorySink -import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager +import org.apache.spark.sql.execution.streaming.state.{StateSchemaNotCompatible, StateStore, StreamingAggregationStateManager} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode._ @@ -753,6 +755,89 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { ) } + testQuietlyWithAllStateVersions("changing schema of state when restarting query", + (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) { + withTempDir { tempDir => + val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir) + + // if we don't have verification phase on state schema, modified query would throw NPE with + // stack trace which end users would not easily understand + + testStream(aggregated, Update())( + StartStream(checkpointLocation = tempDir.getAbsolutePath), + AddData(inputData, 21), + ExpectFailure[SparkException] { e => + val stateSchemaExc = findStateSchemaNotCompatible(e) + assert(stateSchemaExc.isDefined) + val msg = stateSchemaExc.get.getMessage + assert(msg.contains("Provided schema doesn't match to the schema for existing state")) + // other verifications are presented in StateStoreSuite + } + ) + } + } + + testQuietlyWithAllStateVersions("changing schema of state when restarting query -" + + " schema check off", + (SQLConf.STATE_SCHEMA_CHECK_ENABLED.key, "false"), + (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) { + withTempDir { tempDir => + val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir) + + testStream(aggregated, Update())( + StartStream(checkpointLocation = tempDir.getAbsolutePath), + AddData(inputData, 21), + ExpectFailure[SparkException] { e => + val stateSchemaExc = findStateSchemaNotCompatible(e) + // it would bring other error in runtime, but it shouldn't check schema in any way + assert(stateSchemaExc.isEmpty) + } + ) + } + } + + private def prepareTestForChangingSchemaOfState( + tempDir: File): (MemoryStream[Int], DataFrame) = { + val inputData = MemoryStream[Int] + val aggregated = inputData.toDF() + .selectExpr("value % 10 AS id", "value") + .groupBy($"id") + .agg( + sum("value").as("sum_value"), + avg("value").as("avg_value"), + max("value").as("max_value")) + + testStream(aggregated, Update())( + StartStream(checkpointLocation = tempDir.getAbsolutePath), + AddData(inputData, 1, 11), + CheckLastBatch((1L, 12L, 6.0, 11)), + StopStream + ) + + StateStore.unloadAll() + + val inputData2 = MemoryStream[Int] + val aggregated2 = inputData2.toDF() + .selectExpr("value % 10 AS id", "value") + .groupBy($"id") + .agg( + sum("value").as("sum_value"), + avg("value").as("avg_value"), + collect_list("value").as("values")) + + inputData2.addData(1, 11) + + (inputData2, aggregated2) + } + + @tailrec + private def findStateSchemaNotCompatible(exc: Throwable): Option[StateSchemaNotCompatible] = { + exc match { + case e1: StateSchemaNotCompatible => Some(e1) + case e1 if e1.getCause != null => findStateSchemaNotCompatible(e1.getCause) + case _ => None + } + } /** Add blocks of data to the `BlockRDDBackedSource`. */ case class AddBlockData(source: BlockRDDBackedSource, data: Seq[Int]*) extends AddData { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala index 33f6b02acb6dd..1032d6c5b6ff2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala @@ -19,12 +19,15 @@ package org.apache.spark.sql.streaming import java.io.File +import scala.annotation.tailrec + import org.apache.commons.io.FileUtils import org.apache.spark.SparkException import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Complete import org.apache.spark.sql.execution.streaming.MemoryStream +import org.apache.spark.sql.execution.streaming.state.{InvalidUnsafeRowException, StateSchemaNotCompatible} import org.apache.spark.sql.functions._ import org.apache.spark.util.Utils @@ -239,11 +242,19 @@ class StreamingStateStoreFormatCompatibilitySuite extends StreamTest { CheckAnswer(Row(0, 20, Seq(0, 2, 4, 6, 8)), Row(1, 25, Seq(1, 3, 5, 7, 9))) */ AddData(inputData, 10 to 19: _*), - ExpectFailure[SparkException](e => { - // Check the exception message to make sure the state store format changing. - assert(e.getCause.getCause.getMessage.contains( - "The streaming query failed by state format invalidation.")) - }) + ExpectFailure[SparkException] { e => + assert(findStateSchemaException(e)) + } ) } + + @tailrec + private def findStateSchemaException(exc: Throwable): Boolean = { + exc match { + case _: StateSchemaNotCompatible => true + case _: InvalidUnsafeRowException => true + case e1 if e1.getCause != null => findStateSchemaException(e1.getCause) + case _ => false + } + } } From 990bee9c58ea9abd8c4f04f20c78c6d5b720406a Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 4 Dec 2020 19:37:03 +0900 Subject: [PATCH 0667/1009] [SPARK-33615][K8S] Make 'spark.archives' working in Kubernates ### What changes were proposed in this pull request? This PR proposes to make `spark.archives` configuration working in Kubernates. It works without a problem in standalone cluster but there seems a bug in Kubernates. It fails to fetch the file on the driver side as below: ``` 20/12/03 13:33:53 INFO SparkContext: Added JAR file:/tmp/spark-75004286-c83a-4369-b624-14c5d2d2a748/spark-examples_2.12-3.1.0-SNAPSHOT.jar at spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar with timestamp 1607002432558 20/12/03 13:33:53 INFO SparkContext: Added archive file:///tmp/tmp4542734800151332666.txt.tar.gz#test_tar_gz at spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/files/tmp4542734800151332666.txt.tar.gz with timestamp 1607002432558 20/12/03 13:33:53 INFO TransportClientFactory: Successfully created connection to spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc/172.17.0.4:7078 after 83 ms (47 ms spent in bootstraps) 20/12/03 13:33:53 INFO Utils: Fetching spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/files/tmp4542734800151332666.txt.tar.gz to /tmp/spark-66573e24-27a3-427c-99f4-36f06d9e9cd5/fetchFileTemp2665785666227461849.tmp 20/12/03 13:33:53 ERROR SparkContext: Error initializing SparkContext. java.lang.RuntimeException: Stream '/files/tmp4542734800151332666.txt.tar.gz' was not found. at org.apache.spark.network.client.TransportResponseHandler.handle(TransportResponseHandler.java:242) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:142) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:53) ``` This is because `spark.archives` was not actually added on the driver side correctly. The changes here fix it by adding and resolving URIs correctly. ### Why are the changes needed? `spark.archives` feature can be leveraged for many things such as Conda support. We should make it working in Kubernates as well. This is a bug fix too. ### Does this PR introduce _any_ user-facing change? No, this feature is not out yet. ### How was this patch tested? I manually tested with Minikube 1.15.1. For an environment issue (?), I had to use a custom namespace, service account and roles. `default` service account does not work for me and complains it doesn't have permissions to get/list pods, etc. ```bash minikube delete minikube start --cpus 12 --memory 16384 kubectl create namespace spark-integration-test cat < Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/SparkContext.scala | 4 +++- .../org/apache/spark/deploy/SparkSubmit.scala | 13 +++++++++-- docs/running-on-kubernetes.md | 2 +- .../k8s/features/BasicDriverFeatureStep.scala | 22 ++++++++++++++++--- .../k8s/integrationtest/DepsTestsSuite.scala | 12 ++++++++++ .../deploy/k8s/integrationtest/Utils.scala | 22 +++++++++++++++++++ .../org/apache/spark/deploy/yarn/Client.scala | 1 + 7 files changed, 69 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 86f1d745d91d4..17ceb5f1887c6 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1639,7 +1639,9 @@ class SparkContext(config: SparkConf) extends Logging { UriBuilder.fromUri(new URI(key)).fragment(uri.getFragment).build().toString, timestamp).isEmpty) { logInfo(s"Added archive $path at $key with timestamp $timestamp") - val uriToDownload = UriBuilder.fromUri(new URI(key)).fragment(null).build() + // If the scheme is file, use URI to simply copy instead of downloading. + val uriToUse = if (!isLocal && scheme == "file") uri else new URI(key) + val uriToDownload = UriBuilder.fromUri(uriToUse).fragment(null).build() val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf, env.securityManager, hadoopConfiguration, timestamp, useCache = false, shouldUntar = false) val dest = new File( diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index a344bce7a0f3c..ea293f03a2169 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction import java.text.ParseException import java.util.{ServiceLoader, UUID} import java.util.jar.JarInputStream +import javax.ws.rs.core.UriBuilder import scala.annotation.tailrec import scala.collection.JavaConverters._ @@ -387,10 +388,18 @@ private[spark] class SparkSubmit extends Logging { // Executors will get the jars from the Spark file server. // Explicitly download the related files here args.jars = renameResourcesToLocalFS(args.jars, localJars) - val localFiles = Option(args.files).map { + val filesLocalFiles = Option(args.files).map { downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr) }.orNull - args.files = renameResourcesToLocalFS(args.files, localFiles) + val archiveLocalFiles = Option(args.archives).map { uri => + val resolvedUri = Utils.resolveURI(uri) + val downloadedUri = downloadFileList( + UriBuilder.fromUri(resolvedUri).fragment(null).build().toString, + targetDir, sparkConf, hadoopConf, secMgr) + UriBuilder.fromUri(downloadedUri).fragment(resolvedUri.getFragment).build().toString + }.orNull + args.files = renameResourcesToLocalFS(args.files, filesLocalFiles) + args.archives = renameResourcesToLocalFS(args.archives, archiveLocalFiles) args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles) } } diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 71b7df8176d1b..e735c7493486e 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -222,7 +222,7 @@ The app jar file will be uploaded to the S3 and then when the driver is launched to the driver pod and will be added to its classpath. Spark will generate a subdir under the upload path with a random name to avoid conflicts with spark apps running in parallel. User could manage the subdirs created according to his needs. -The client scheme is supported for the application jar, and dependencies specified by properties `spark.jars` and `spark.files`. +The client scheme is supported for the application jar, and dependencies specified by properties `spark.jars`, `spark.files` and `spark.archives`. Important: all client-side dependencies will be uploaded to the given path with a flat directory structure so file names must be unique otherwise files will be overwritten. Also make sure in the derived k8s image default ivy dir diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala index f5ba261c8f405..cec8272beed57 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.k8s.features +import javax.ws.rs.core.UriBuilder + import scala.collection.JavaConverters._ import scala.collection.mutable @@ -159,11 +161,25 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true", MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) // try upload local, resolvable files to a hadoop compatible file system - Seq(JARS, FILES, SUBMIT_PYTHON_FILES).foreach { key => - val value = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri)) + Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key => + val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri)) + val value = { + if (key == ARCHIVES) { + uris.map(UriBuilder.fromUri(_).fragment(null).build()).map(_.toString) + } else { + uris + } + } val resolved = KubernetesUtils.uploadAndTransformFileUris(value, Some(conf.sparkConf)) if (resolved.nonEmpty) { - additionalProps.put(key.key, resolved.mkString(",")) + val resolvedValue = if (key == ARCHIVES) { + uris.zip(resolved).map { case (uri, r) => + UriBuilder.fromUri(r).fragment(new java.net.URI(uri).getFragment).build().toString + } + } else { + resolved + } + additionalProps.put(key.key, resolvedValue.mkString(",")) } } additionalProps.toMap diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala index 760e9ba55d335..a15f7ffa134b8 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala @@ -163,6 +163,18 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => }) } + test("SPARK-33615: Launcher client archives", k8sTestTag, MinikubeTag) { + tryDepsTest { + val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH) + Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tar.gz") + sparkAppConf.set("spark.archives", s"$HOST_PATH/$fileName.tar.gz#test_tar_gz") + val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir) + runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar, + appArgs = Array(s"test_tar_gz/$fileName"), + timeout = Option(DEPS_TIMEOUT)) + } + } + test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) { val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir) val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip" diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala index ee44cb5f85835..519443130008b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala @@ -25,6 +25,8 @@ import scala.collection.JavaConverters._ import io.fabric8.kubernetes.client.dsl.ExecListener import okhttp3.Response +import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveOutputStream} +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream import org.apache.commons.compress.utils.IOUtils import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.util.VersionInfo @@ -149,4 +151,24 @@ object Utils extends Logging { IOUtils.closeQuietly(fis) IOUtils.closeQuietly(zipOut) } + + def createTarGzFile(inFile: String, outFile: String): Unit = { + val fileToTarGz = new File(inFile) + Utils.tryWithResource( + new FileInputStream(fileToTarGz) + ) { fis => + Utils.tryWithResource( + new TarArchiveOutputStream( + new GzipCompressorOutputStream( + new FileOutputStream( + new File(outFile)))) + ) { tOut => + val tarEntry = new TarArchiveEntry(fileToTarGz, fileToTarGz.getName) + tOut.putArchiveEntry(tarEntry) + IOUtils.copy(fis, tOut) + tOut.closeArchiveEntry() + tOut.finish() + } + } + } } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index d252e8368a0c4..7f791e02a392b 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1629,6 +1629,7 @@ private[spark] class YarnClusterApplication extends SparkApplication { // so remove them from sparkConf here for yarn mode. conf.remove(JARS) conf.remove(FILES) + conf.remove(ARCHIVES) new Client(new ClientArguments(args), conf, null).run() } From acc211d2cf0e6ab94f6578e1eb488766fd20fa4e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 4 Dec 2020 14:01:15 +0000 Subject: [PATCH 0668/1009] [SPARK-33141][SQL][FOLLOW-UP] Store the max nested view depth in AnalysisContext ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/30289. It removes the hack in `View.effectiveSQLConf`, by putting the max nested view depth in `AnalysisContext`. Then we don't get the max nested view depth from the active SQLConf, which keeps changing during nested view resolution. ### Why are the changes needed? remove hacks. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? If I just remove the hack, `SimpleSQLViewSuite.restrict the nested level of a view` fails. With this fix, it passes again. Closes #30575 from cloud-fan/view. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 49 ++++++++++++------- .../plans/logical/basicLogicalOperators.scala | 3 -- .../spark/sql/execution/SQLViewSuite.scala | 25 ---------- .../sql/execution/SQLViewTestSuite.scala | 32 +++++++++--- 4 files changed, 57 insertions(+), 52 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ebe1004872ef6..6769dc895d32e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -87,8 +87,8 @@ object FakeV2SessionCatalog extends TableCatalog { } /** - * Provides a way to keep state during the analysis, this enables us to decouple the concerns - * of analysis environment from the catalog. + * Provides a way to keep state during the analysis, mostly for resolving views. This enables us to + * decouple the concerns of analysis environment from the catalog. * The state that is kept here is per-query. * * Note this is thread local. @@ -98,13 +98,21 @@ object FakeV2SessionCatalog extends TableCatalog { * views. * @param nestedViewDepth The nested depth in the view resolution, this enables us to limit the * depth of nested views. + * @param maxNestedViewDepth The maximum allowed depth of nested view resolution. * @param relationCache A mapping from qualified table names to resolved relations. This can ensure * that the table is resolved only once if a table is used multiple times * in a query. + * @param referredTempViewNames All the temp view names referred by the current view we are + * resolving. It's used to make sure the relation resolution is + * consistent between view creation and view resolution. For example, + * if `t` was a permanent table when the current view was created, it + * should still be a permanent table when resolving the current view, + * even if a temp view `t` has been created. */ case class AnalysisContext( catalogAndNamespace: Seq[String] = Nil, nestedViewDepth: Int = 0, + maxNestedViewDepth: Int = -1, relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty, referredTempViewNames: Seq[Seq[String]] = Seq.empty) @@ -118,14 +126,20 @@ object AnalysisContext { private def set(context: AnalysisContext): Unit = value.set(context) - def withAnalysisContext[A]( - catalogAndNamespace: Seq[String], referredTempViewNames: Seq[Seq[String]])(f: => A): A = { + def withAnalysisContext[A](viewDesc: CatalogTable)(f: => A): A = { val originContext = value.get() + val maxNestedViewDepth = if (originContext.maxNestedViewDepth == -1) { + // Here we start to resolve views, get `maxNestedViewDepth` from configs. + SQLConf.get.maxNestedViewDepth + } else { + originContext.maxNestedViewDepth + } val context = AnalysisContext( - catalogAndNamespace, + viewDesc.viewCatalogAndNamespace, originContext.nestedViewDepth + 1, + maxNestedViewDepth, originContext.relationCache, - referredTempViewNames) + viewDesc.viewReferredTempViewNames) set(context) try f finally { set(originContext) } } @@ -1034,18 +1048,19 @@ class Analyzer(override val catalogManager: CatalogManager) // operator. case view @ View(desc, isTempView, _, child) if !child.resolved => // Resolve all the UnresolvedRelations and Views in the child. - val newChild = AnalysisContext.withAnalysisContext( - desc.viewCatalogAndNamespace, desc.viewReferredTempViewNames) { - if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) { - view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " + - s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " + - s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " + - "work around this.") - } - SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) { - executeSameContext(child) - } + val newChild = AnalysisContext.withAnalysisContext(desc) { + val nestedViewDepth = AnalysisContext.get.nestedViewDepth + val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth + if (nestedViewDepth > maxNestedViewDepth) { + view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " + + s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " + + s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " + + "work around this.") + } + SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) { + executeSameContext(child) } + } view.copy(child = newChild) case p @ SubqueryAlias(_, view: View) => p.copy(child = resolveViews(view)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index c8b7e8651686a..aa7151ad36850 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -483,9 +483,6 @@ object View { for ((k, v) <- configs) { sqlConf.settings.put(k, v) } - // We should respect the current maxNestedViewDepth cause the view resolving are executed - // from top to down. - sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth) sqlConf } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 709d6321d199d..c4303f0f1e19d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -704,31 +704,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } - test("restrict the nested level of a view") { - val viewNames = Array.range(0, 11).map(idx => s"view$idx") - withView(viewNames: _*) { - sql("CREATE VIEW view0 AS SELECT * FROM jt") - Array.range(0, 10).foreach { idx => - sql(s"CREATE VIEW view${idx + 1} AS SELECT * FROM view$idx") - } - - withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") { - val e = intercept[AnalysisException] { - sql("SELECT * FROM view10") - }.getMessage - assert(e.contains("The depth of view `default`.`view0` exceeds the maximum view " + - "resolution depth (10). Analysis is aborted to avoid errors. Increase the value " + - s"of ${MAX_NESTED_VIEW_DEPTH.key} to work around this.")) - } - - val e = intercept[IllegalArgumentException] { - withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "0") {} - }.getMessage - assert(e.contains("The maximum depth of a view reference in a nested view must be " + - "positive.")) - } - } - test("permanent view should be case-preserving") { withView("v") { sql("CREATE VIEW v AS SELECT 1 as aBc") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index fb9f5a73f6d9e..3cffc5bc21ab6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -121,7 +121,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { test("change current database should not change view behavior") { withTable("t") { Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") - val viewName = createView("v1", "SELECT * from t") + val viewName = createView("v1", "SELECT * FROM t") withView(viewName) { withTempDatabase { db => sql(s"USE $db") @@ -135,7 +135,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { test("view should read the new data if table is updated") { withTable("t") { Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") - val viewName = createView("v1", "SELECT c1 from t", Seq("c1")) + val viewName = createView("v1", "SELECT c1 FROM t", Seq("c1")) withView(viewName) { Seq(9, 7, 8).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t") checkViewOutput(viewName, Seq(Row(9), Row(7), Row(8))) @@ -146,7 +146,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { test("add column for table should not affect view output") { withTable("t") { Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") - val viewName = createView("v1", "SELECT * from t") + val viewName = createView("v1", "SELECT * FROM t") withView(viewName) { sql("ALTER TABLE t ADD COLUMN (c2 INT)") checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1))) @@ -157,8 +157,8 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { test("check cyclic view reference on CREATE OR REPLACE VIEW") { withTable("t") { Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") - val viewName1 = createView("v1", "SELECT * from t") - val viewName2 = createView("v2", s"SELECT * from $viewName1") + val viewName1 = createView("v1", "SELECT * FROM t") + val viewName2 = createView("v2", s"SELECT * FROM $viewName1") withView(viewName2, viewName1) { val e = intercept[AnalysisException] { createView("v1", s"SELECT * FROM $viewName2", replace = true) @@ -171,8 +171,8 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { test("check cyclic view reference on ALTER VIEW") { withTable("t") { Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") - val viewName1 = createView("v1", "SELECT * from t") - val viewName2 = createView("v2", s"SELECT * from $viewName1") + val viewName1 = createView("v1", "SELECT * FROM t") + val viewName2 = createView("v2", s"SELECT * FROM $viewName1") withView(viewName2, viewName1) { val e = intercept[AnalysisException] { sql(s"ALTER VIEW $viewName1 AS SELECT * FROM $viewName2") @@ -181,6 +181,24 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { } } } + + test("restrict the nested level of a view") { + val viewNames = scala.collection.mutable.ArrayBuffer.empty[String] + val view0 = createView("view0", "SELECT 1") + viewNames += view0 + for (i <- 1 to 10) { + viewNames += createView(s"view$i", s"SELECT * FROM ${viewNames.last}") + } + withView(viewNames.reverse: _*) { + withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") { + val e = intercept[AnalysisException] { + sql(s"SELECT * FROM ${viewNames.last}") + }.getMessage + assert(e.contains("exceeds the maximum view resolution depth (10)")) + assert(e.contains(s"Increase the value of ${MAX_NESTED_VIEW_DEPTH.key}")) + } + } + } } class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { From d671e053e9806d6b4e43a39f5018aa9718790160 Mon Sep 17 00:00:00 2001 From: german Date: Sat, 5 Dec 2020 06:51:54 +0900 Subject: [PATCH 0669/1009] [SPARK-33660][DOCS][SS] Fix Kafka Headers Documentation ### What changes were proposed in this pull request? Update kafka headers documentation, type is not longer a map but an array [jira](https://issues.apache.org/jira/browse/SPARK-33660) ### Why are the changes needed? To help users ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? It is only documentation Closes #30605 from Gschiavon/SPARK-33660-fix-kafka-headers-documentation. Authored-by: german Signed-off-by: Jungtaek Lim (HeartSaVioR) --- docs/structured-streaming-kafka-integration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md index f92dd039d53b7..5336695478c14 100644 --- a/docs/structured-streaming-kafka-integration.md +++ b/docs/structured-streaming-kafka-integration.md @@ -61,7 +61,7 @@ val df = spark .option("includeHeaders", "true") .load() df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers") - .as[(String, String, Map)] + .as[(String, String, Array[(String, Array[Byte])])] // Subscribe to multiple topics val df = spark From de9818f043c1ebcda321077633f93072feba601f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 4 Dec 2020 14:10:42 -0800 Subject: [PATCH 0670/1009] [SPARK-33662][BUILD] Setting version to 3.2.0-SNAPSHOT ### What changes were proposed in this pull request? This PR aims to update `master` branch version to 3.2.0-SNAPSHOT. ### Why are the changes needed? Start to prepare Apache Spark 3.2.0. ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Pass the CIs. Closes #30606 from dongjoon-hyun/SPARK-3.2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- project/MimaExcludes.scala | 5 +++++ python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 40 files changed, 45 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 2047f0d75ca18..20433362459d9 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.1.0 +Version: 3.2.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index d17abe857ade5..6aa97710f7307 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 39cdc6d6d6cd3..4ade8c2032b24 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d328a7de0a762..0318f60d546e7 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 562a1d495cc8a..6be6df993478d 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 0225db81925c5..7aff79ea91d72 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 72a2c4ceb43b6..b5a6775366a47 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index ea16dadca40cb..e51357d97faab 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 769e2518b1fd4..b22400575dd02 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ce6f6ed9c7051..84ca852d1f30a 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 026b3dd804690..a8d42e483d17d 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.1.0-SNAPSHOT -SPARK_VERSION_SHORT: 3.1.0 +SPARK_VERSION: 3.2.0-SNAPSHOT +SPARK_VERSION_SHORT: 3.2.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.10" MESOS_VERSION: 1.0.0 diff --git a/examples/pom.xml b/examples/pom.xml index 8b632cef6d44d..3d7713f10402f 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 98036846eb2a8..a8614c4ff76ab 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index b240dd281823a..808f48f18e1ff 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index d9d9fb7f55c77..2359e99f657f9 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95a99ac88412e..843f16067463f 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 941946f30e96f..dbe2ab92a28e7 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 024fdb26d5bf4..69c5862fdbb2d 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 76ee5bb7b2f85..22259b08141da 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 7e80bd28c19e8..b54ad71eba305 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 728b489da6785..bbb71035c3e19 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 38836db01553a..3ed68c0652711 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 8689e0b8a9ea8..03910ba091997 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index a2550ac939e83..5da2a496e9eb8 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 851af8d52a3ee..2a2c373242201 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 9eacf380e17f2..f5b5a979e35b8 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 80097aec0f429..1d7704055898b 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 9405927eb1cb5..33e65c9def41b 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -34,6 +34,10 @@ import com.typesafe.tools.mima.core.ProblemFilters._ */ object MimaExcludes { + // Exclude rules for 3.2.x + lazy val v32excludes = v31excludes ++ Seq( + ) + // Exclude rules for 3.1.x lazy val v31excludes = v30excludes ++ Seq( // mima plugin update caused new incompatibilities to be detected @@ -1742,6 +1746,7 @@ object MimaExcludes { } def excludes(version: String) = version match { + case v if v.startsWith("3.2") => v32excludes case v if v.startsWith("3.1") => v31excludes case v if v.startsWith("3.0") => v30excludes case v if v.startsWith("2.4") => v24excludes diff --git a/python/pyspark/version.py b/python/pyspark/version.py index e8da19fc44185..935795190797f 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.1.0.dev0" +__version__ = "3.2.0.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index a1079e7a6fe6a..a982af21d86f9 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 18e1c65e2e932..44df4e1da5331 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 258d3dfc3df9d..bc680077ead8a 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 54a8d66ea1ad6..b9b3642498992 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index e9122ce202723..1d3856742f520 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6b79eb722fcdd..0553438a1ad4a 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 3f088e420a9a3..5ab66bd5aac8a 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 9cd8adb6cb4df..dd6d21e3cbdac 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 0453094cf8b7b..27d2756c741ef 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 53b49dd320e94..bd8d352092e73 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 6e806413ef261..8fe8ab358d60c 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ../pom.xml From b6b45bc695706201693572bfb87bcee310548945 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 4 Dec 2020 15:04:18 -0800 Subject: [PATCH 0671/1009] [SPARK-33141][SQL][FOLLOW-UP] Fix Scala 2.13 compilation ### What changes were proposed in this pull request? This PR aims to fix Scala 2.13 compilation. ### Why are the changes needed? To recover Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GitHub Action Scala 2.13 build job. Closes #30611 from dongjoon-hyun/SPARK-33141. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index 3cffc5bc21ab6..f6172e3b65050 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -189,7 +189,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { for (i <- 1 to 10) { viewNames += createView(s"view$i", s"SELECT * FROM ${viewNames.last}") } - withView(viewNames.reverse: _*) { + withView(viewNames.reverse.toSeq: _*) { withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") { val e = intercept[AnalysisException] { sql(s"SELECT * FROM ${viewNames.last}") From 960d6af75d5ef29b1efcf0d03e7db840270382e6 Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Fri, 4 Dec 2020 15:15:19 -0800 Subject: [PATCH 0672/1009] [SPARK-33472][SQL][FOLLOW-UP] Update RemoveRedundantSorts comment ### What changes were proposed in this pull request? This PR is a follow-up for #30373 that updates the comment for RemoveRedundantSorts in QueryExecution. ### Why are the changes needed? To update an incorrect comment. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30584 from allisonwang-db/spark-33472-followup. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/execution/QueryExecution.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 040d1f36ed8a5..0531dd210e539 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -344,7 +344,7 @@ object QueryExecution { PlanSubqueries(sparkSession), RemoveRedundantProjects, EnsureRequirements, - // `RemoveRedundantSorts` needs to be added before `EnsureRequirements` to guarantee the same + // `RemoveRedundantSorts` needs to be added after `EnsureRequirements` to guarantee the same // number of partitions when instantiating PartitioningCollection. RemoveRedundantSorts, DisableUnnecessaryBucketedScan, From 1b4e35d1a8acf7b744e11b9ac9ca8f81de6db5e5 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 4 Dec 2020 16:48:31 -0800 Subject: [PATCH 0673/1009] [SPARK-33651][SQL] Allow CREATE EXTERNAL TABLE with LOCATION for data source tables ### What changes were proposed in this pull request? This PR removes the restriction and allows CREATE EXTERNAL TABLE with LOCATION for data source tables. It also moves the check from the analyzer rule `ResolveSessionCatalog` to `SessionCatalog`, so that v2 session catalog can overwrite it. ### Why are the changes needed? It's an unnecessary behavior difference that Hive serde table can be created with `CREATE EXTERNAL TABLE` if LOCATION is present, while data source table doesn't allow `CREATE EXTERNAL TABLE` at all. ### Does this PR introduce _any_ user-facing change? Yes, now `CREATE EXTERNAL TABLE ... USING ... LOCATION ...` is allowed. ### How was this patch tested? new tests Closes #30595 from cloud-fan/minor. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/catalog/SessionCatalog.scala | 5 ++++ .../analysis/ResolveSessionCatalog.scala | 12 +-------- .../datasources/v2/V2SessionCatalog.scala | 7 +++++- .../DataSourceV2SQLSessionCatalogSuite.scala | 8 ++++++ .../connector/TestV2SessionCatalogBase.scala | 24 +++++++++++++++--- .../command/PlanResolutionSuite.scala | 14 ++++++----- .../sources/CreateTableAsSelectSuite.scala | 25 ++++++++----------- .../spark/sql/sources/InsertSuite.scala | 2 +- .../sql/hive/MetastoreDataSourcesSuite.scala | 5 ++-- 9 files changed, 64 insertions(+), 38 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 29481b85e9f2e..0cdbc1a234c66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -318,6 +318,11 @@ class SessionCatalog( tableDefinition: CatalogTable, ignoreIfExists: Boolean, validateLocation: Boolean = true): Unit = { + val isExternal = tableDefinition.tableType == CatalogTableType.EXTERNAL + if (isExternal && tableDefinition.storage.locationUri.isEmpty) { + throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION") + } + val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase)) val table = formatTableName(tableDefinition.identifier.table) val tableIdentifier = TableIdentifier(table, Some(db)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index f35fcdc07c372..a87ed4b6275d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -662,17 +662,7 @@ class ResolveSessionCatalog( comment: Option[String], storageFormat: CatalogStorageFormat, external: Boolean): CatalogTable = { - if (external) { - if (DDLUtils.isHiveTable(Some(provider))) { - if (location.isEmpty) { - throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION") - } - } else { - throw new AnalysisException(s"Operation not allowed: CREATE EXTERNAL TABLE ... USING") - } - } - - val tableType = if (location.isDefined) { + val tableType = if (external || location.isDefined) { CatalogTableType.EXTERNAL } else { CatalogTableType.MANAGED diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala index a0bc65d3f9057..87f5366354fa0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala @@ -87,7 +87,12 @@ class V2SessionCatalog(catalog: SessionCatalog) val location = Option(properties.get(TableCatalog.PROP_LOCATION)) val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties.toMap)) .copy(locationUri = location.map(CatalogUtils.stringToURI)) - val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED + val isExternal = properties.containsKey(TableCatalog.PROP_EXTERNAL) + val tableType = if (isExternal || location.isDefined) { + CatalogTableType.EXTERNAL + } else { + CatalogTableType.MANAGED + } val tableDesc = CatalogTable( identifier = ident.asTableIdentifier, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala index cf00b3b5e4410..c973e2ba30004 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala @@ -79,4 +79,12 @@ class DataSourceV2SQLSessionCatalogSuite Row("keyX", s"Table default.$t1 does not have property: keyX")) } } + + test("SPARK-33651: allow CREATE EXTERNAL TABLE without LOCATION") { + withTable("t") { + val prop = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY + "=true" + // The following should not throw AnalysisException. + sql(s"CREATE EXTERNAL TABLE t (i INT) USING $v2Format TBLPROPERTIES($prop)") + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala index f57edb9eb220c..bf2749d1afc53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala @@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicBoolean import scala.collection.JavaConverters._ import org.apache.spark.sql.catalyst.catalog.CatalogTableType -import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, V1Table} +import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.types.StructType @@ -70,8 +70,22 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): Table = { - val created = super.createTable(ident, schema, partitions, properties) - val t = newTable(created.name(), schema, partitions, properties) + val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY + val propsWithLocation = if (properties.containsKey(key)) { + // Always set a location so that CREATE EXTERNAL TABLE won't fail with LOCATION not specified. + if (!properties.containsKey(TableCatalog.PROP_LOCATION)) { + val newProps = new util.HashMap[String, String]() + newProps.putAll(properties) + newProps.put(TableCatalog.PROP_LOCATION, "file:/abc") + newProps + } else { + properties + } + } else { + properties + } + val created = super.createTable(ident, schema, partitions, propsWithLocation) + val t = newTable(created.name(), schema, partitions, propsWithLocation) addTable(ident, t) t } @@ -90,3 +104,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating tableCreated.set(false) } } + +object TestV2SessionCatalogBase { + val SIMULATE_ALLOW_EXTERNAL_PROPERTY = "spark.sql.test.simulateAllowExternal" +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 9b7222da55368..38719311f1aef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1714,14 +1714,16 @@ class PlanResolutionSuite extends AnalysisTest { } } - test("create hive external table - location must be specified") { - val exc = intercept[AnalysisException] { - parseAndResolve("CREATE EXTERNAL TABLE my_tab STORED AS parquet") + test("create hive external table") { + val withoutLoc = "CREATE EXTERNAL TABLE my_tab STORED AS parquet" + parseAndResolve(withoutLoc) match { + case ct: CreateTable => + assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) + assert(ct.tableDesc.storage.locationUri.isEmpty) } - assert(exc.getMessage.contains("CREATE EXTERNAL TABLE must be accompanied by LOCATION")) - val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'" - parseAndResolve(query) match { + val withLoc = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'" + parseAndResolve(withLoc) match { case ct: CreateTable => assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL) assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything"))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 00c599065ce31..9464f7e4c1241 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -22,7 +22,7 @@ import java.io.File import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTableType} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.internal.SQLConf.BUCKETING_MAX_BUCKETS import org.apache.spark.sql.test.SharedSparkSession @@ -170,20 +170,17 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession { } } - test("disallows CREATE EXTERNAL TABLE ... USING ... AS query") { + test("SPARK-33651: allow CREATE EXTERNAL TABLE ... USING ... if location is specified") { withTable("t") { - val error = intercept[AnalysisException] { - sql( - s""" - |CREATE EXTERNAL TABLE t USING PARQUET - |OPTIONS (PATH '${path.toURI}') - |AS SELECT 1 AS a, 2 AS b - """.stripMargin - ) - }.getMessage - - assert(error.contains("Operation not allowed") && - error.contains("CREATE EXTERNAL TABLE ...")) + sql( + s""" + |CREATE EXTERNAL TABLE t USING PARQUET + |OPTIONS (PATH '${path.toURI}') + |AS SELECT 1 AS a, 2 AS b + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(table.tableType == CatalogTableType.EXTERNAL) + assert(table.location.toString == path.toURI.toString.stripSuffix("/")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index aaf8765c04425..bfd04ffaaf754 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -845,7 +845,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { .add("s", StringType, false) val newTable = CatalogTable( identifier = TableIdentifier("test_table", None), - tableType = CatalogTableType.EXTERNAL, + tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat( locationUri = None, inputFormat = None, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 41a26344f7c21..0593dbe7f6653 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -711,7 +711,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv identifier = TableIdentifier("wide_schema"), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( - properties = Map("path" -> tempDir.getCanonicalPath) + locationUri = Some(tempDir.toURI) ), schema = schema, provider = Some("json") @@ -1076,7 +1076,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv identifier = TableIdentifier("skip_hive_metadata", Some("default")), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( - properties = Map("path" -> tempPath.getCanonicalPath, "skipHiveMetadata" -> "true") + locationUri = Some(tempPath.toURI), + properties = Map("skipHiveMetadata" -> "true") ), schema = schema, provider = Some("parquet") From 154f6044033d1a3b4c19c64b206b168bf919cb3b Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Sun, 6 Dec 2020 12:03:14 +0900 Subject: [PATCH 0674/1009] [MINOR] Fix string interpolation in CommandUtils.scala and KafkaDataConsumer.scala ### What changes were proposed in this pull request? This PR proposes to fix a string interpolation in `CommandUtils.scala` and `KafkaDataConsumer.scala`. ### Why are the changes needed? To fix a string interpolation bug. ### Does this PR introduce _any_ user-facing change? Yes, the string will be correctly constructed. ### How was this patch tested? Existing tests since they were used in exception/log messages. Closes #30609 from imback82/fix_cache_str_interporlation. Authored-by: Terry Kim Signed-off-by: HyukjinKwon --- .../apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala | 2 +- .../org/apache/spark/sql/execution/command/CommandUtils.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala index f2bf7cd1360ec..649430d434a73 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala @@ -276,7 +276,7 @@ private[kafka010] class KafkaDataConsumer( val fetchedData = getOrRetrieveFetchedData(offset) logDebug(s"Get $groupId $topicPartition nextOffset ${fetchedData.nextOffsetInFetchedData} " + - "requested $offset") + s"requested $offset") // The following loop is basically for `failOnDataLoss = false`. When `failOnDataLoss` is // `false`, first, we will try to fetch the record at `offset`. If no such record exists, then diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index f86f62bbf853b..15a735be8043f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -391,7 +391,7 @@ object CommandUtils extends Logging { try { sparkSession.catalog.uncacheTable(name) } catch { - case NonFatal(e) => logWarning("Exception when attempting to uncache $name", e) + case NonFatal(e) => logWarning(s"Exception when attempting to uncache $name", e) } } } From 6317ba29a1bb1b7198fe8df71ddefcf47a55bd51 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Sat, 5 Dec 2020 23:04:55 -0800 Subject: [PATCH 0675/1009] [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging configuration is picked from the provided ### What changes were proposed in this pull request? Fix flaky test "Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j.properties." The test is flaking, with multiple flaked instances - the reason for the failure has been similar to: ``` The code passed to eventually never returned normally. Attempted 109 times over 3.0079882413999997 minutes. Last failure message: Failure executing: GET at: https://192.168.39.167:8443/api/v1/namespaces/b37fc72a991b49baa68a2eaaa1516463/pods/spark-pi-97a9bc76308e7fe3-exec-1/log?pretty=false. Message: pods "spark-pi-97a9bc76308e7fe3-exec-1" not found. Received status: Status(apiVersion=v1, code=404, details=StatusDetails(causes=[], group=null, kind=pods, name=spark-pi-97a9bc76308e7fe3-exec-1, retryAfterSeconds=null, uid=null, additionalProperties={}), kind=Status, message=pods "spark-pi-97a9bc76308e7fe3-exec-1" not found, metadata=ListMeta(_continue=null, remainingItemCount=null, resourceVersion=null, selfLink=null, additionalProperties={}), reason=NotFound, status=Failure, additionalProperties={}).. (KubernetesSuite.scala:402) ``` https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36854/console https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36852/console https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36850/console https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36848/console From the above failures, it seems, that executor finishes too quickly and is removed by spark before the test can complete. So, in order to mitigate this situation, one way is to turn on the flag "spark.kubernetes.executor.deleteOnTermination" ### Why are the changes needed? Fixes a flaky test. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. May be a few runs of jenkins integration test, may reveal if the problem is resolved or not. Closes #30616 from ScrapCodes/SPARK-33668/fix-flaky-k8s-integration-test. Authored-by: Prashant Sharma Signed-off-by: Dongjoon Hyun --- .../k8s/integrationtest/KubernetesSuite.scala | 18 ++++++++++++++++++ .../SparkConfPropagateSuite.scala | 1 + 2 files changed, 19 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 193a02aad0cea..7b2a2d0820238 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -158,6 +158,7 @@ class KubernetesSuite extends SparkFunSuite kubernetesTestComponents.deleteNamespace() } deleteDriverPod() + deleteExecutorPod(appLocator) } protected def runSparkPiAndVerifyCompletion( @@ -508,6 +509,23 @@ class KubernetesSuite extends SparkFunSuite .get() == null) } } + + private def deleteExecutorPod(appLocator: String): Unit = { + kubernetesTestComponents + .kubernetesClient + .pods() + .withLabel("spark-app-locator", appLocator) + .withLabel("spark-role", "executor") + .delete() + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", appLocator) + .withLabel("spark-role", "executor") + .list() + .getItems.isEmpty) + } + } } private[spark] object KubernetesSuite { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala index 5d3b426598fdd..0bc632716fa8b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala @@ -39,6 +39,7 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite => sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug") sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug") + sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", "false") val log4jExpectedLog = s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties" From e857e06452c2cf478beb31367f76d6950b660ebb Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sun, 6 Dec 2020 01:14:22 -0800 Subject: [PATCH 0676/1009] [SPARK-33652][SQL] DSv2: DeleteFrom should refresh cache ### What changes were proposed in this pull request? This changes `DeleteFromTableExec` to also refresh caches referencing the original table, by passing the `refreshCache` callback to the class. Note that in order to construct the callback, I have to change `DataSourceV2ScanRelation` to contain a `DataSourceV2Relation` instead of a `Table`. ### Why are the changes needed? Currently DSv2 delete from table doesn't refresh caches. This could lead to correctness issue if the staled cache is queried later. ### Does this PR introduce _any_ user-facing change? Yes. Now delete from table in v2 also refreshes cache. ### How was this patch tested? Added a test case. Closes #30597 from sunchao/SPARK-33652. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Relation.scala | 6 +++--- .../scala/org/apache/spark/sql/Dataset.scala | 4 ++-- .../datasources/v2/DataSourceV2Strategy.scala | 5 +++-- .../datasources/v2/DeleteFromTableExec.scala | 4 +++- .../datasources/v2/V2ScanRelationPushDown.scala | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 16 ++++++++++++++++ 6 files changed, 28 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala index 4debdd380e6b4..513fce0aba10c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala @@ -111,16 +111,16 @@ case class DataSourceV2Relation( * plan. This ensures that the stats that are used by the optimizer account for the filters and * projection that will be pushed down. * - * @param table a DSv2 [[Table]] + * @param relation a [[DataSourceV2Relation]] * @param scan a DSv2 [[Scan]] * @param output the output attributes of this relation */ case class DataSourceV2ScanRelation( - table: Table, + relation: DataSourceV2Relation, scan: Scan, output: Seq[AttributeReference]) extends LeafNode with NamedRelation { - override def name: String = table.name() + override def name: String = relation.table.name() override def simpleString(maxFields: Int): String = { s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 0716043bcf660..05d6647afd958 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -53,7 +53,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters} import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.LogicalRelation -import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, FileTable} +import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable} import org.apache.spark.sql.execution.python.EvaluatePython import org.apache.spark.sql.execution.stat.StatFunctions import org.apache.spark.sql.internal.SQLConf @@ -3464,7 +3464,7 @@ class Dataset[T] private[sql]( fr.inputFiles case r: HiveTableRelation => r.tableMeta.storage.locationUri.map(_.toString).toArray - case DataSourceV2ScanRelation(table: FileTable, _, _) => + case DataSourceV2ScanRelation(DataSourceV2Relation(table: FileTable, _, _, _, _), _, _) => table.fileIndex.inputFiles }.flatten files.toSet.toArray diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 938ba77fede47..5289d359f7809 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -208,7 +208,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DeleteFromTable(relation, condition) => relation match { - case DataSourceV2ScanRelation(table, _, output) => + case DataSourceV2ScanRelation(r, _, output) => + val table = r.table if (condition.exists(SubqueryExpression.hasSubquery)) { throw new AnalysisException( s"Delete by condition with subquery is not supported: $condition") @@ -227,7 +228,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat s"Cannot delete from table ${table.name} where ${filters.mkString("[", ", ", "]")}") } - DeleteFromTableExec(table.asDeletable, filters) :: Nil + DeleteFromTableExec(table.asDeletable, filters, refreshCache(r)) :: Nil case _ => throw new AnalysisException("DELETE is only supported with v2 tables.") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala index afebbfd01db22..f0a45c249dc10 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala @@ -24,10 +24,12 @@ import org.apache.spark.sql.sources.Filter case class DeleteFromTableExec( table: SupportsDelete, - condition: Array[Filter]) extends V2CommandExec { + condition: Array[Filter], + refreshCache: () => Unit) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { table.deleteWhere(condition) + refreshCache() Seq.empty } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index b168e848f0b6f..d2180566790ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -64,7 +64,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] { case _ => scan } - val scanRelation = DataSourceV2ScanRelation(relation.table, wrappedScan, output) + val scanRelation = DataSourceV2ScanRelation(relation, wrappedScan, output) val projectionOverSchema = ProjectionOverSchema(output.toStructType) val projectionFunc = (expr: Expression) => expr transformDown { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 6ef4fd1372a78..6838a7644a29f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1841,6 +1841,22 @@ class DataSourceV2SQLSuite } } + test("SPARK-33652: DeleteFrom should refresh caches referencing the table") { + val t = "testcat.ns1.ns2.tbl" + val view = "view" + withTable(t) { + withTempView(view) { + sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)") + sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)") + sql(s"CACHE TABLE view AS SELECT id FROM $t") + assert(spark.table(view).count() == 3) + + sql(s"DELETE FROM $t WHERE id = 2") + assert(spark.table(view).count() == 1) + } + } + } + test("UPDATE TABLE") { val t = "testcat.ns1.ns2.tbl" withTable(t) { From 5250841537d7a8c54fb451748e2a21d3bcc5d966 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Sun, 6 Dec 2020 01:22:24 -0800 Subject: [PATCH 0677/1009] [SPARK-33256][PYTHON][DOCS] Clarify PySpark follows NumPy documentation style ### What changes were proposed in this pull request? This PR adds few lines about docstring style to document that PySpark follows [NumPy documentation style](https://numpydoc.readthedocs.io/en/latest/format.html). We all completed the migration to NumPy documentation style at SPARK-32085. Ideally we should have a page like https://pandas.pydata.org/docs/development/contributing_docstring.html but I would like to leave it as a future work. ### Why are the changes needed? To tell developers that PySpark now follows NumPy documentation style. ### Does this PR introduce _any_ user-facing change? No, it's a change in unreleased branches yet. ### How was this patch tested? Manually tested via `make clean html` under `python/docs`: ![Screen Shot 2020-12-06 at 1 34 50 PM](https://user-images.githubusercontent.com/6477701/101271623-d5ce0380-37c7-11eb-93ac-da73caa50c37.png) Closes #30622 from HyukjinKwon/SPARK-33256. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/docs/source/development/contributing.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst index 2b62c953e0786..a41b8a1a1de9e 100644 --- a/python/docs/source/development/contributing.rst +++ b/python/docs/source/development/contributing.rst @@ -123,11 +123,12 @@ Annotations can be validated using ``dev/lint-python`` script or by invoking myp -Code Style Guide ----------------- +Code and Docstring Guide +---------------------------------- Please follow the style of the existing codebase as is, which is virtually PEP 8 with one exception: lines can be up to 100 characters in length, not 79. +For the docstring style, PySpark follows `NumPy documentation style `_. Note that the method and variable names in PySpark are the similar case is ``threading`` library in Python itself where the APIs were inspired by Java. PySpark also follows `camelCase` for exposed APIs that match with Scala and Java. From 48297818f37a8e02cc02ba6fa9ec04fe37540aca Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 6 Dec 2020 02:56:08 -0800 Subject: [PATCH 0678/1009] [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW PARTITIONS` ### What changes were proposed in this pull request? Preprocess the partition spec passed to the V1 SHOW PARTITIONS implementation `ShowPartitionsCommand`, and normalize the passed spec according to the partition columns w.r.t the case sensitivity flag **spark.sql.caseSensitive**. ### Why are the changes needed? V1 SHOW PARTITIONS is case sensitive in fact, and doesn't respect the SQL config **spark.sql.caseSensitive** which is false by default, for instance: ```sql spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int) > USING parquet > PARTITIONED BY (year, month); spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1; spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1); Error in query: Non-partitioning column(s) [YEAR, Month] are specified for SHOW PARTITIONS; ``` The `SHOW PARTITIONS` command must show the partition `year = 2015, month = 1` specified by `YEAR = 2015, Month = 1`. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the command above works as expected: ```sql spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1); year=2015/month=1 ``` ### How was this patch tested? By running the affected test suites: - `v1/ShowPartitionsSuite` - `v2/ShowPartitionsSuite` Closes #30615 from MaxGekk/show-partitions-case-sensitivity-test. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/command/tables.scala | 18 ++++++------ .../command/ShowPartitionsSuiteBase.scala | 28 +++++++++++++++++-- .../command/v1/ShowPartitionsSuite.scala | 4 --- .../command/v2/ShowPartitionsSuite.scala | 4 --- 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 9e3ca3c321a54..59adb7dd7e319 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -1006,20 +1006,18 @@ case class ShowPartitionsCommand( DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW PARTITIONS") /** - * Validate the partitioning spec by making sure all the referenced columns are + * Normalizes the partition spec w.r.t the partition columns and case sensitivity settings, + * and validates the spec by making sure all the referenced columns are * defined as partitioning columns in table definition. An AnalysisException exception is * thrown if the partitioning spec is invalid. */ - if (spec.isDefined) { - val badColumns = spec.get.keySet.filterNot(table.partitionColumnNames.contains) - if (badColumns.nonEmpty) { - val badCols = badColumns.mkString("[", ", ", "]") - throw new AnalysisException( - s"Non-partitioning column(s) $badCols are specified for SHOW PARTITIONS") - } - } + val normalizedSpec = spec.map(partitionSpec => PartitioningUtils.normalizePartitionSpec( + partitionSpec, + table.partitionColumnNames, + table.identifier.quotedString, + sparkSession.sessionState.conf.resolver)) - val partNames = catalog.listPartitionNames(tableName, spec) + val partNames = catalog.listPartitionNames(tableName, normalizedSpec) partNames.map(Row(_)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index 82457f96a3003..b695decdb3ec9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -21,6 +21,7 @@ import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{StringType, StructType} @@ -28,7 +29,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { protected def version: String protected def catalog: String protected def defaultUsing: String - protected def wrongPartitionColumnsError(columns: String*): String // Gets the schema of `SHOW PARTITIONS` private val showSchema: StructType = new StructType().add("partition", StringType, false) protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = { @@ -94,7 +94,7 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { val errMsg = intercept[AnalysisException] { sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)") }.getMessage - assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz"))) + assert(errMsg.contains("abcd is not a valid partition column")) } } } @@ -149,4 +149,28 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33667: case sensitivity of partition spec") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val t = s"$catalog.ns.part_table" + withTable(t) { + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |$defaultUsing + |PARTITIONED BY (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + runShowPartitionsSql( + s"SHOW PARTITIONS $t $partitionSpec", + Row("year=2015/month=1") :: Nil) + } + } + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index 2b2bc9e63dc82..c752a5f358bb9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -27,10 +27,6 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { override def catalog: String = CatalogManager.SESSION_CATALOG_NAME override def defaultUsing: String = "USING parquet" - override protected def wrongPartitionColumnsError(columns: String*): String = { - s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are specified" - } - test("show everything in the default database") { val table = "dateTable" withTable(table) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index ca47a713ad604..55985a335c94b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -32,10 +32,6 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSpa .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - override protected def wrongPartitionColumnsError(columns: String*): String = { - s"${columns.head} is not a valid partition column" - } - test("a table does not support partitioning") { val table = s"non_part_$catalog.tab1" withTable(table) { From b94ecf0734b829878956d98b74323e0c80822fec Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Sun, 6 Dec 2020 22:36:34 +0800 Subject: [PATCH 0679/1009] [SPARK-33674][TEST] Show Slowpoke notifications in SBT tests ### What changes were proposed in this pull request? This PR is to show Slowpoke notifications in the log when running tests using SBT. For example, the test case "zero sized blocks" in ExternalShuffleServiceSuite enters the infinite loop. After this change, the log file will have a notification message every 5 minute when the test case running longer than two minutes. Below is an example message. ``` [info] ExternalShuffleServiceSuite: [info] - groupByKey without compression (101 milliseconds) [info] - shuffle non-zero block size (3 seconds, 186 milliseconds) [info] - shuffle serializer (3 seconds, 189 milliseconds) [info] *** Test still running after 2 minute, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks. [info] *** Test still running after 7 minute, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks. [info] *** Test still running after 12 minutes, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks. [info] *** Test still running after 17 minutes, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks. ``` ### Why are the changes needed? When the tests/code has bug and enters the infinite loop, it is hard to tell which test cases hit some issues from the log, especially when we are running the tests in parallel. It would be nice to show the Slowpoke notifications. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual testing in my local dev environment. Closes #30621 from gatorsmile/addSlowpoke. Authored-by: Xiao Li Signed-off-by: Yuming Wang --- project/SparkBuild.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a5951e0452943..23fb73d228e01 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1055,6 +1055,9 @@ object TestSettings { }.getOrElse(Nil): _*), // Show full stack trace and duration in test cases. testOptions in Test += Tests.Argument("-oDF"), + // Slowpoke notifications: receive notifications every 5 minute of tests that have been running + // longer than two minutes. + testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"), testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"), // Enable Junit testing. libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test", From 119539fd493af5ed0e37af79320787f145eaf3f1 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 7 Dec 2020 09:48:16 +0900 Subject: [PATCH 0680/1009] [SPARK-33663][SQL] Uncaching should not be called on non-existing temp views ### What changes were proposed in this pull request? This PR proposes to fix a misleading logs in the following scenario when uncaching is called on non-existing views: ``` scala> sql("CREATE TABLE table USING parquet AS SELECT 2") res0: org.apache.spark.sql.DataFrame = [] scala> val df = spark.table("table") df: org.apache.spark.sql.DataFrame = [2: int] scala> df.createOrReplaceTempView("t2") 20/12/04 10:16:24 WARN CommandUtils: Exception when attempting to uncache $name org.apache.spark.sql.AnalysisException: Table or view not found: t2;; 'UnresolvedRelation [t2], [], false at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:113) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:93) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:183) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:93) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:90) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:152) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:172) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:214) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:169) at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:138) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:138) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88) at org.apache.spark.sql.DataFrameReader.table(DataFrameReader.scala:889) at org.apache.spark.sql.SparkSession.table(SparkSession.scala:589) at org.apache.spark.sql.internal.CatalogImpl.uncacheTable(CatalogImpl.scala:476) at org.apache.spark.sql.execution.command.CommandUtils$.uncacheTableOrView(CommandUtils.scala:392) at org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:124) ``` Since `t2` does not exist yet, it shouldn't try to uncache. ### Why are the changes needed? To fix misleading message. ### Does this PR introduce _any_ user-facing change? Yes, the above message will not be displayed if the view doesn't exist yet. ### How was this patch tested? Manually tested since this is a log message printed. Closes #30608 from imback82/fix_cache_message. Authored-by: Terry Kim Signed-off-by: HyukjinKwon --- .../spark/sql/execution/command/views.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 4ad5eddb83f43..06b1e03adea50 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -113,12 +113,12 @@ case class CreateViewCommand( verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child) if (viewType == LocalTempView) { - val samePlan = catalog.getTempView(name.table).exists { - // Don't perform sameResult check for View logical plan, since it's unresolved - case _: View => false - case other => other.sameResult(child) + val shouldUncache = replace && catalog.getTempView(name.table).exists { + // Uncache View logical plan without checking the same result check, since it's unresolved. + case _: View => true + case other => !other.sameResult(child) } - if (replace && !samePlan) { + if (shouldUncache) { logInfo(s"Try to uncache ${name.quotedString} before replacing.") checkCyclicViewReference(analyzedPlan, Seq(name), name) CommandUtils.uncacheTableOrView(sparkSession, name.quotedString) @@ -141,12 +141,12 @@ case class CreateViewCommand( } else if (viewType == GlobalTempView) { val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE) val viewIdent = TableIdentifier(name.table, Option(db)) - val samePlan = catalog.getGlobalTempView(name.table).exists { - // Don't perform sameResult check for View logical plan, since it's unresolved - case _: View => false - case other => other.sameResult(child) + val shouldUncache = replace && catalog.getGlobalTempView(name.table).exists { + // Uncache View logical plan without checking the same result check, since it's unresolved. + case _: View => true + case other => !other.sameResult(child) } - if (replace && !samePlan) { + if (shouldUncache) { logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.") checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString) From e32de29bcee6073a2d2b9bb4e5930459eaf460d9 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 7 Dec 2020 10:05:28 +0900 Subject: [PATCH 0681/1009] [SPARK-33675][INFRA] Add GitHub Action job to publish snapshot ### What changes were proposed in this pull request? This PR aims to add `GitHub Action` job to publish daily snapshot for **master** branch. - https://repository.apache.org/content/groups/snapshots/org/apache/spark/spark-core_2.12/3.2.0-SNAPSHOT/ For the other branches, I'll make adjusted backports. - For `branch-3.1`, we can specify the checkout `ref` to `branch-3.1`. - For `branch-2.4` and `branch-3.0`, we can publish at every commit since the traffic is low. - https://github.com/apache/spark/pull/30630 (branch-3.0) - https://github.com/apache/spark/pull/30629 (branch-2.4 LTS) ### Why are the changes needed? After this series of jobs, this will reduce our maintenance burden permanently from AmpLab Jenkins by removing the following completely. https://amplab.cs.berkeley.edu/jenkins/view/Spark%20Packaging/ For now, AmpLab Jenkins doesn't have a job for `branch-3.1`. We can do it by ourselves by `GitHub Action`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The snapshot publishing is tested here at PR trigger. Since this PR adds a scheduled job, we cannot test in this PR. - https://github.com/dongjoon-hyun/spark/runs/1505792859 Apache Infra team finished the setup here. - https://issues.apache.org/jira/browse/INFRA-21167 Closes #30623 from dongjoon-hyun/SPARK-33675. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .github/workflows/publish_snapshot.yml | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/publish_snapshot.yml diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml new file mode 100644 index 0000000000000..9871680f73891 --- /dev/null +++ b/.github/workflows/publish_snapshot.yml @@ -0,0 +1,30 @@ +name: Publish Snapshot + +on: + schedule: + - cron: '0 0 * * *' + +jobs: + publish-snapshot: + runs-on: ubuntu-latest + steps: + - name: Checkout Spark repository + uses: actions/checkout@master + - name: Cache Maven local repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: snapshot-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + snapshot-maven- + - name: Install Java 8 + uses: actions/setup-java@v1 + with: + java-version: 8 + - name: Publish snapshot + env: + ASF_USERNAME: ${{ secrets.NEXUS_USER }} + ASF_PASSWORD: ${{ secrets.NEXUS_PW }} + GPG_KEY: "not_used" + GPG_PASSPHRASE: "not_used" + run: ./dev/create-release/release-build.sh publish-snapshot From 29096a8869c95221dc75ce7fd3d098680bef4f55 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 7 Dec 2020 10:21:04 +0900 Subject: [PATCH 0682/1009] [SPARK-33670][SQL] Verify the partition provider is Hive in v1 SHOW TABLE EXTENDED ### What changes were proposed in this pull request? Invoke the check `DDLUtils.verifyPartitionProviderIsHive()` from V1 implementation of `SHOW TABLE EXTENDED` when partition specs are specified. This PR is some kind of follow up https://github.com/apache/spark/pull/16373 and https://github.com/apache/spark/pull/15515. ### Why are the changes needed? To output an user friendly error with recommendation like **" ... partition metadata is not stored in the Hive metastore. To import this information into the metastore, run `msck repair table tableName` "** instead of silently output an empty result. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the affected test suites, in particular: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "hive/test:testOnly *PartitionProviderCompatibilitySuite" ``` Closes #30618 from MaxGekk/show-table-extended-verifyPartitionProviderIsHive. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../spark/sql/execution/command/tables.scala | 3 +++ .../execution/command/v1/ShowTablesSuite.scala | 18 ++++++++++++++++-- .../PartitionProviderCompatibilitySuite.scala | 14 ++++++++++---- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 59adb7dd7e319..54660ced8d834 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -879,6 +879,9 @@ case class ShowTablesCommand( // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]] // should have been thrown by the sql parser. val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db))) + + DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW TABLE EXTENDED") + val tableIdent = table.identifier val normalizedSpec = PartitioningUtils.normalizePartitionSpec( partitionSpec.get, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index 8f29f9f276138..3db880c776365 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.{AnalysisException, Row, SaveMode} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command import org.apache.spark.sql.internal.SQLConf @@ -111,4 +111,18 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { } } -class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession +class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession { + test("SPARK-33670: show partitions from a datasource table") { + import testImplicits._ + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + sql(s"USE $catalog.ns") + val t = "part_datasrc" + withTable(t) { + val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c") + df.write.partitionBy("a").format("parquet").mode(SaveMode.Overwrite).saveAsTable(t) + assert(sql(s"SHOW TABLE EXTENDED LIKE '$t' PARTITION(a = 1)").count() === 1) + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala index 80afc9d8f44bc..e1b0637963b75 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala @@ -53,7 +53,8 @@ class PartitionProviderCompatibilitySuite s"ALTER TABLE $tableName PARTITION (partCol=1) SET LOCATION '/foo'", s"ALTER TABLE $tableName DROP PARTITION (partCol=1)", s"DESCRIBE $tableName PARTITION (partCol=1)", - s"SHOW PARTITIONS $tableName") + s"SHOW PARTITIONS $tableName", + s"SHOW TABLE EXTENDED LIKE '$tableName' PARTITION (partCol=1)") withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") { for (cmd <- unsupportedCommands) { @@ -124,10 +125,15 @@ class PartitionProviderCompatibilitySuite } // disabled withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") { - val e = intercept[AnalysisException] { - spark.sql(s"show partitions test") + Seq( + "SHOW PARTITIONS test", + "SHOW TABLE EXTENDED LIKE 'test' PARTITION (partCol=1)" + ).foreach { showPartitions => + val e = intercept[AnalysisException] { + spark.sql(showPartitions) + } + assert(e.getMessage.contains("filesource partition management is disabled")) } - assert(e.getMessage.contains("filesource partition management is disabled")) spark.sql("refresh table test") assert(spark.sql("select * from test").count() == 5) } From e88f0d4a2436cc47c8bf8ed2a739eab728ea3d81 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sun, 6 Dec 2020 17:57:19 -0800 Subject: [PATCH 0683/1009] [SPARK-33683][INFRA] Remove -Djava.version=11 from Scala 2.13 build in GitHub Actions ### What changes were proposed in this pull request? This PR removes `-Djava.version=11` from the build command for Scala 2.13 in the GitHub Actions' job. In the GitHub Actions' job, the build command for Scala 2.13 is defined as follows. ``` ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile ``` Though, Scala 2.13 build uses Java 8 rather than 11 so let's remove `-Djava.version=11`. ### Why are the changes needed? To build with consistent configuration. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done by GitHub Actions' workflow. Closes #30633 from sarutak/scala-213-java11. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a3bb083387f3e..72b2caf907151 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -414,7 +414,7 @@ jobs: - name: Build with SBT run: | ./dev/change-scala-version.sh 2.13 - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile + ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pscala-2.13 compile test:compile hadoop-2: name: Hadoop 2 build with SBT From 73412ffb3a857acda5dab41d7be3f7ae627f6eaf Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 6 Dec 2020 19:34:54 -0800 Subject: [PATCH 0684/1009] [SPARK-33680][SQL][TESTS] Fix PrunePartitionSuiteBase/BucketedReadWithHiveSupportSuite not to depend on the default conf ### What changes were proposed in this pull request? This PR updates `PrunePartitionSuiteBase/BucketedReadWithHiveSupportSuite` to have the require conf explicitly. ### Why are the changes needed? The unit test should not depend on the default configurations. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? According to https://github.com/apache/spark/pull/30628 , this seems to be the only ones. Pass the CIs. Closes #30631 from dongjoon-hyun/SPARK-CONF-AGNO. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../execution/PrunePartitionSuiteBase.scala | 81 ++++++++++--------- .../BucketedReadWithHiveSupportSuite.scala | 4 +- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala index 8e35cd034311d..bc170fcd59026 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, Expression, IsNotNull, Literal} import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.internal.SQLConf.ADAPTIVE_EXECUTION_ENABLED import org.apache.spark.sql.test.SQLTestUtils abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with TestHiveSingleton { @@ -28,48 +29,50 @@ abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with protected def format: String test("SPARK-28169: Convert scan predicate condition to CNF") { - withTempView("temp") { - withTable("t") { - sql( - s""" - |CREATE TABLE t(i INT, p STRING) - |USING $format - |PARTITIONED BY (p)""".stripMargin) - - spark.range(0, 1000, 1).selectExpr("id as col") - .createOrReplaceTempView("temp") - - for (part <- Seq(1, 2, 3, 4)) { + withSQLConf(ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + withTempView("temp") { + withTable("t") { sql( s""" - |INSERT OVERWRITE TABLE t PARTITION (p='$part') - |SELECT col FROM temp""".stripMargin) - } + |CREATE TABLE t(i INT, p STRING) + |USING $format + |PARTITIONED BY (p)""".stripMargin) - assertPrunedPartitions( - "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2, - "((`p` = '1') || (`p` = '2'))") - assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4, - "") - assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2, - "((`p` = '1') || (`p` = '3'))") - assertPrunedPartitions( - "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3, - "((`p` = '1') || ((`p` = '2') || (`p` = '3')))") - assertPrunedPartitions( - "SELECT * FROM t", 4, - "") - assertPrunedPartitions( - "SELECT * FROM t WHERE p = '1' AND i = 2", 1, - "(`p` = '1')") - assertPrunedPartitions( - """ - |SELECT i, COUNT(1) FROM ( - |SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1) - |) tmp GROUP BY i - """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))") + spark.range(0, 1000, 1).selectExpr("id as col") + .createOrReplaceTempView("temp") + + for (part <- Seq(1, 2, 3, 4)) { + sql( + s""" + |INSERT OVERWRITE TABLE t PARTITION (p='$part') + |SELECT col FROM temp""".stripMargin) + } + + assertPrunedPartitions( + "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2, + "((`p` = '1') || (`p` = '2'))") + assertPrunedPartitions( + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4, + "") + assertPrunedPartitions( + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2, + "((`p` = '1') || (`p` = '3'))") + assertPrunedPartitions( + "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3, + "((`p` = '1') || ((`p` = '2') || (`p` = '3')))") + assertPrunedPartitions( + "SELECT * FROM t", 4, + "") + assertPrunedPartitions( + "SELECT * FROM t WHERE p = '1' AND i = 2", 1, + "(`p` = '1')") + assertPrunedPartitions( + """ + |SELECT i, COUNT(1) FROM ( + |SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1) + |) tmp GROUP BY i + """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))") + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala index 35dab79ff6dff..07901351fc0fc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala @@ -17,10 +17,12 @@ package org.apache.spark.sql.sources +import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION -class BucketedReadWithHiveSupportSuite extends BucketedReadSuite with TestHiveSingleton { +class BucketedReadWithHiveSupportSuite + extends BucketedReadSuite with DisableAdaptiveExecutionSuite with TestHiveSingleton { protected override def beforeAll(): Unit = { super.beforeAll() assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") From d48ef34911b8928b66df92399119caebb24616d4 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sun, 6 Dec 2020 23:02:36 -0800 Subject: [PATCH 0685/1009] [SPARK-33684][BUILD] Upgrade httpclient from 4.5.6 to 4.5.13 ### What changes were proposed in this pull request? This PR upgrades `commons.httpclient` from `4.5.6` to `4.5.13`. 4.5.6 is released over 2 years ago and now we can use more stable `4.5.13`. https://archive.apache.org/dist/httpcomponents/httpclient/RELEASE_NOTES-4.5.x.txt ### Why are the changes needed? To follow the more stable release. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done by the existing tests. Closes #30634 from sarutak/upgrade-httpclient. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index a19558bc2a5e3..401050a60e493 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -99,7 +99,7 @@ hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar -httpclient/4.5.6//httpclient-4.5.6.jar +httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 24283224dd37d..b0f8935843281 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -98,7 +98,7 @@ hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar -httpclient/4.5.6//httpclient-4.5.6.jar +httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar diff --git a/pom.xml b/pom.xml index 1d7704055898b..364dec688b38b 100644 --- a/pom.xml +++ b/pom.xml @@ -155,7 +155,7 @@ 0.12.8 - 4.5.6 + 4.5.13 4.4.12 3.1 From 87c056088e853d475f1507e296ad06480862e8a7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 6 Dec 2020 23:22:52 -0800 Subject: [PATCH 0686/1009] [SPARK-33671][SQL] Remove VIEW checks from V1 table commands ### What changes were proposed in this pull request? Remove VIEW checks from the following V1 commands: - `SHOW PARTITIONS` - `TRUNCATE TABLE` - `LOAD DATA` The checks are performed earlier at: https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L885-L889 ### Why are the changes needed? To improve code maintenance, and remove dead codes. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By existing test suites like `v1/ShowPartitionsSuite`. 1. LOAD DATA: https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala#L176-L179 2. TRUNCATE TABLE: https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala#L180-L183 3. SHOW PARTITIONS: - v1/ShowPartitionsSuite Closes #30620 from MaxGekk/show-table-check-view. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/execution/command/tables.scala | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 54660ced8d834..640051384e94c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -311,9 +311,6 @@ case class LoadDataCommand( sparkSession.sessionState.conf.resolver) } - if (targetTable.tableType == CatalogTableType.VIEW) { - throw new AnalysisException(s"Target table in LOAD DATA cannot be a view: $tableIdentwithDB") - } if (DDLUtils.isDatasourceTable(targetTable)) { throw new AnalysisException( s"LOAD DATA is not supported for datasource tables: $tableIdentwithDB") @@ -452,10 +449,6 @@ case class TruncateTableCommand( throw new AnalysisException( s"Operation not allowed: TRUNCATE TABLE on external tables: $tableIdentWithDB") } - if (table.tableType == CatalogTableType.VIEW) { - throw new AnalysisException( - s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB") - } if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) { throw new AnalysisException( s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " + @@ -995,11 +988,7 @@ case class ShowPartitionsCommand( * Validate and throws an [[AnalysisException]] exception under the following conditions: * 1. If the table is not partitioned. * 2. If it is a datasource table. - * 3. If it is a view. */ - if (table.tableType == VIEW) { - throw new AnalysisException(s"SHOW PARTITIONS is not allowed on a view: $tableIdentWithDB") - } if (table.partitionColumnNames.isEmpty) { throw new AnalysisException( From 26c0493318c2a3e5b74ff3829de88605aff8e832 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 7 Dec 2020 08:14:36 +0000 Subject: [PATCH 0687/1009] [SPARK-33676][SQL] Require exact matching of partition spec to the schema in V2 `ALTER TABLE .. ADD/DROP PARTITION` ### What changes were proposed in this pull request? Check that partitions specs passed to v2 `ALTER TABLE .. ADD/DROP PARTITION` exactly match to the partition schema (all partition fields from the schema are specified in partition specs). ### Why are the changes needed? 1. To have the same behavior as V1 `ALTER TABLE .. ADD/DROP PARTITION` that output the error: ```sql spark-sql> create table tab1 (id int, a int, b int) using parquet partitioned by (a, b); spark-sql> ALTER TABLE tab1 ADD PARTITION (A='9'); Error in query: Partition spec is invalid. The spec (a) must match the partition spec (a, b) defined in table '`default`.`tab1`'; ``` 2. To prevent future errors caused by not fully specified partition specs. ### Does this PR introduce _any_ user-facing change? Yes. The V2 implementation of `ALTER TABLE .. ADD/DROP PARTITION` output the same error as V1 commands. ### How was this patch tested? By running the test suite with new UT: ``` $ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite" ``` Closes #30624 from MaxGekk/add-partition-full-spec. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/ResolvePartitionSpec.scala | 20 +++++++++++++++---- .../sql/catalyst/catalog/SessionCatalog.scala | 15 ++++++-------- .../spark/sql/util/PartitioningUtils.scala | 18 +++++++++++++++++ .../AlterTablePartitionV2SQLSuite.scala | 20 +++++++++++++++++++ 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 38991a9e24fa8..feb05d3b6926b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement import org.apache.spark.sql.types._ -import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec +import org.apache.spark.sql.util.PartitioningUtils.{normalizePartitionSpec, requireExactMatchedPartitionSpec} /** * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands. @@ -35,11 +35,21 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case r @ AlterTableAddPartition( ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) => - r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema())) + val partitionSchema = table.partitionSchema() + r.copy(parts = resolvePartitionSpecs( + table.name, + partSpecs, + partitionSchema, + requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ AlterTableDropPartition( ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) => - r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema())) + val partitionSchema = table.partitionSchema() + r.copy(parts = resolvePartitionSpecs( + table.name, + partSpecs, + partitionSchema, + requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) => r.copy(pattern = resolvePartitionSpecs( @@ -51,7 +61,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { private def resolvePartitionSpecs( tableName: String, partSpecs: Seq[PartitionSpec], - partSchema: StructType): Seq[ResolvedPartitionSpec] = + partSchema: StructType, + checkSpec: TablePartitionSpec => Unit = _ => ()): Seq[ResolvedPartitionSpec] = partSpecs.map { case unresolvedPartSpec: UnresolvedPartitionSpec => val normalizedSpec = normalizePartitionSpec( @@ -59,6 +70,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { partSchema.map(_.name), tableName, conf.resolver) + checkSpec(normalizedSpec) val partitionNames = normalizedSpec.keySet val requestedFields = partSchema.filter(field => partitionNames.contains(field.name)) ResolvedPartitionSpec( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 0cdbc1a234c66..a2ab756382488 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -43,7 +43,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.sql.util.{CaseInsensitiveStringMap, PartitioningUtils} import org.apache.spark.util.Utils object SessionCatalog { @@ -1167,14 +1167,11 @@ class SessionCatalog( private def requireExactMatchedPartitionSpec( specs: Seq[TablePartitionSpec], table: CatalogTable): Unit = { - val defined = table.partitionColumnNames.sorted - specs.foreach { s => - if (s.keys.toSeq.sorted != defined) { - throw new AnalysisException( - s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must match " + - s"the partition spec (${table.partitionColumnNames.mkString(", ")}) defined in " + - s"table '${table.identifier}'") - } + specs.foreach { spec => + PartitioningUtils.requireExactMatchedPartitionSpec( + table.identifier.toString, + spec, + table.partitionColumnNames) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala index 586aa6c59164f..e473e1d1b7ff3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.util import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec object PartitioningUtils { /** @@ -44,4 +45,21 @@ object PartitioningUtils { normalizedPartSpec.toMap } + + /** + * Verify if the input partition spec exactly matches the existing defined partition spec + * The columns must be the same but the orders could be different. + */ + def requireExactMatchedPartitionSpec( + tableName: String, + spec: TablePartitionSpec, + partitionColumnNames: Seq[String]): Unit = { + val defined = partitionColumnNames.sorted + if (spec.keys.toSeq.sorted != defined) { + throw new AnalysisException( + s"Partition spec is invalid. The spec (${spec.keys.mkString(", ")}) must match " + + s"the partition spec (${partitionColumnNames.mkString(", ")}) defined in " + + s"table '$tableName'") + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 47b5e5e54edde..45d47c6d8681c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -261,4 +261,24 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { } } } + + test("SPARK-33676: not fully specified partition spec") { + val t = "testpart.ns1.ns2.tbl" + withTable(t) { + sql(s""" + |CREATE TABLE $t (id bigint, part0 int, part1 string) + |USING foo + |PARTITIONED BY (part0, part1)""".stripMargin) + Seq( + s"ALTER TABLE $t ADD PARTITION (part0 = 1)", + s"ALTER TABLE $t DROP PARTITION (part0 = 1)" + ).foreach { alterTable => + val errMsg = intercept[AnalysisException] { + sql(alterTable) + }.getMessage + assert(errMsg.contains("Partition spec is invalid. " + + "The spec (part0) must match the partition spec (part0, part1)")) + } + } + } } From 1e0c006748c031d5277ba3b906b0bbf68e6bc893 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 7 Dec 2020 21:36:52 +0900 Subject: [PATCH 0688/1009] [SPARK-33617][SQL] Add default parallelism configuration for Spark SQL queries ### What changes were proposed in this pull request? This pr add default parallelism configuration(`spark.sql.default.parallelism`) for Spark SQL and make it effective for `LocalTableScan`. ### Why are the changes needed? Avoid generating small files for INSERT INTO TABLE from VALUES, for example: ```sql CREATE TABLE t1(id int) USING parquet; INSERT INTO TABLE t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8); ``` Before this pr: ``` -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00000-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00001-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00002-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00003-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00004-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00005-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00006-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 421 Dec 1 01:54 part-00007-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet -rw-r--r-- 1 root root 0 Dec 1 01:54 _SUCCESS ``` After this pr and set `spark.sql.files.minPartitionNum` to 1: ``` -rw-r--r-- 1 root root 452 Dec 1 01:59 part-00000-6de50c79-e305-4f8d-b6ae-39f46b2619c6-c000.snappy.parquet -rw-r--r-- 1 root root 0 Dec 1 01:59 _SUCCESS ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30559 from wangyum/SPARK-33617. Lead-authored-by: Yuming Wang Co-authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++ .../main/scala/org/apache/spark/sql/SparkSession.scala | 6 ++++-- .../spark/sql/execution/LocalTableScanExec.scala | 4 +++- .../execution/adaptive/CoalesceShufflePartitions.scala | 2 +- .../spark/sql/execution/basicPhysicalOperators.scala | 3 ++- .../org/apache/spark/sql/execution/command/ddl.scala | 3 ++- .../sql/execution/datasources/FilePartition.scala | 1 + .../sql/execution/datasources/SchemaMergeUtils.scala | 3 ++- .../apache/spark/sql/execution/SparkPlanSuite.scala | 9 +++++++++ 9 files changed, 34 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 025478214e492..ea30832008b56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -374,6 +374,14 @@ object SQLConf { .booleanConf .createWithDefault(true) + val DEFAULT_PARALLELISM = buildConf("spark.sql.default.parallelism") + .doc("The number of parallelism for Spark SQL, the default value is " + + "`spark.default.parallelism`.") + .version("3.2.0") + .intConf + .checkValue(_ > 0, "The value of spark.sql.default.parallelism must be positive.") + .createOptional + val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions") .doc("The default number of partitions to use when shuffling data for joins or aggregations. " + "Note: For structured streaming, this configuration cannot be changed between query " + @@ -3160,6 +3168,8 @@ class SQLConf extends Serializable with Logging { def cacheVectorizedReaderEnabled: Boolean = getConf(CACHE_VECTORIZED_READER_ENABLED) + def defaultParallelism: Option[Int] = getConf(DEFAULT_PARALLELISM) + def defaultNumShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS) def numShufflePartitions: Int = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index db5ad52977c71..3a9b06940b769 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -519,7 +519,8 @@ class SparkSession private( * @since 2.0.0 */ def range(start: Long, end: Long): Dataset[java.lang.Long] = { - range(start, end, step = 1, numPartitions = sparkContext.defaultParallelism) + range(start, end, step = 1, + numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism)) } /** @@ -529,7 +530,8 @@ class SparkSession private( * @since 2.0.0 */ def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = { - range(start, end, step, numPartitions = sparkContext.defaultParallelism) + range(start, end, step, + numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala index b452213cd6cc7..02a8f46824241 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala @@ -49,7 +49,9 @@ case class LocalTableScanExec( if (rows.isEmpty) { sqlContext.sparkContext.emptyRDD } else { - val numSlices = math.min(unsafeRows.length, sqlContext.sparkContext.defaultParallelism) + val numSlices = math.min( + unsafeRows.length, + conf.defaultParallelism.getOrElse(sqlContext.sparkContext.defaultParallelism)) sqlContext.sparkContext.parallelize(unsafeRows, numSlices) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala index 0f482142227d2..6149bd214e540 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala @@ -67,7 +67,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffl // We fall back to Spark default parallelism if the minimum number of coalesced partitions // is not set, so to avoid perf regressions compared to no coalescing. val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM) - .getOrElse(session.sparkContext.defaultParallelism) + .orElse(conf.defaultParallelism).getOrElse(session.sparkContext.defaultParallelism) val partitionSpecs = ShufflePartitionsUtil.coalescePartitions( validMetrics.toArray, advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 006fa0fba4138..80a4090ce03f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -382,7 +382,8 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range) val start: Long = range.start val end: Long = range.end val step: Long = range.step - val numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism) + val numSlices: Int = range.numSlices.orElse(sqlContext.conf.defaultParallelism) + .getOrElse(sparkContext.defaultParallelism) val numElements: BigInt = range.numElements val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 27ad62026c9b5..69425cfed285f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -738,7 +738,8 @@ case class AlterTableRecoverPartitionsCommand( // Set the number of parallelism to prevent following file listing from generating many tasks // in case of large #defaultParallelism. val numParallelism = Math.min(serializedPaths.length, - Math.min(spark.sparkContext.defaultParallelism, 10000)) + Math.min(spark.sessionState.conf.defaultParallelism + .getOrElse(spark.sparkContext.defaultParallelism), 10000)) // gather the fast stats for all the partitions otherwise Hive metastore will list all the // files for all the new partitions in sequential way, which is super slow. logInfo(s"Gather the fast stats in parallel using $numParallelism tasks.") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala index 864130bbd87b7..1b35db8d0873c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala @@ -89,6 +89,7 @@ object FilePartition extends Logging { val defaultMaxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes val minPartitionNum = sparkSession.sessionState.conf.filesMinPartitionNum + .orElse(sparkSession.sessionState.conf.defaultParallelism) .getOrElse(sparkSession.sparkContext.defaultParallelism) val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum val bytesPerCore = totalBytes / minPartitionNum diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala index 28097c35401c9..54d79898bb81b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala @@ -57,7 +57,8 @@ object SchemaMergeUtils extends Logging { // Set the number of partitions to prevent following schema reads from generating many tasks // in case of a small number of orc files. val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1), - sparkSession.sparkContext.defaultParallelism) + sparkSession.sessionState.conf.defaultParallelism + .getOrElse(sparkSession.sparkContext.defaultParallelism)) val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala index 56fff1107ae39..254855247ced3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala @@ -88,4 +88,13 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession { test("SPARK-30780 empty LocalTableScan should use RDD without partitions") { assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0) } + + test("SPARK-33617: spark.sql.default.parallelism effective for LocalTableScan") { + Seq(1, 4).foreach { minPartitionNum => + withSQLConf(SQLConf.DEFAULT_PARALLELISM.key -> minPartitionNum.toString) { + val df = spark.sql("SELECT * FROM VALUES (1), (2), (3), (4), (5), (6), (7), (8)") + assert(df.rdd.partitions.length === minPartitionNum) + } + } + } } From d730b6bdaa92f2ca19cc8852ac58035e28d47a4f Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Mon, 7 Dec 2020 13:25:43 +0000 Subject: [PATCH 0689/1009] [SPARK-32680][SQL] Don't Preprocess V2 CTAS with Unresolved Query ### What changes were proposed in this pull request? The analyzer rule `PreprocessTableCreation` will preprocess table creation related logical plan. But for CTAS, if the sub-query can't be resolved, preprocess it will cause "Invalid call to toAttribute on unresolved object" (instead of a user-friendly error msg: "table or view not found"). This PR fixes this wrongly preprocess for CTAS using V2 catalog. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? The error message for CTAS with a non-exists table changed from: `UnresolvedException: Invalid call to toAttribute on unresolved object, tree: xxx` to `AnalysisException: Table or view not found: xxx` ### How was this patch tested? added test Closes #30637 from linhongliu-db/fix-ctas. Authored-by: Linhong Liu Signed-off-by: Wenchen Fan --- .../apache/spark/sql/execution/datasources/rules.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 2cc78258378ab..b9866e415c9b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -239,7 +239,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi c.copy(tableDesc = normalizedTable.copy(schema = reorderedSchema)) } - case create: V2CreateTablePlan => + case create: V2CreateTablePlan if create.childrenResolved => val schema = create.tableSchema val partitioning = create.partitioning val identifier = create.tableName diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d34dcb4fe0c01..a45bf12e8f841 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.Uuid import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, OneRowRelation} import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.connector.FakeV2Provider import org.apache.spark.sql.execution.{FilterExec, QueryExecution, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.HashAggregateExec @@ -2451,6 +2452,14 @@ class DataFrameSuite extends QueryTest assert(e.getMessage.contains("Table or view not found:")) } + test("SPARK-32680: Don't analyze CTAS with unresolved query") { + val v2Source = classOf[FakeV2Provider].getName + val e = intercept[AnalysisException] { + sql(s"CREATE TABLE t USING $v2Source AS SELECT * from nonexist") + } + assert(e.getMessage.contains("Table or view not found:")) + } + test("CalendarInterval reflection support") { val df = Seq((1, new CalendarInterval(1, 2, 3))).toDF("a", "b") checkAnswer(df.selectExpr("b"), Row(new CalendarInterval(1, 2, 3))) From da72b87374a7be5416b99ed016dc2fc9da0ed88a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 7 Dec 2020 13:40:15 +0000 Subject: [PATCH 0690/1009] [SPARK-33641][SQL] Invalidate new char/varchar types in public APIs that produce incorrect results ### What changes were proposed in this pull request? In this PR, we suppose to narrow the use cases of the char/varchar data types, of which are invalid now or later ### Why are the changes needed? 1. udf ```scala scala> spark.udf.register("abcd", () => "12345", org.apache.spark.sql.types.VarcharType(2)) scala> spark.sql("select abcd()").show scala.MatchError: CharType(2) (of class org.apache.spark.sql.types.VarcharType) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.externalDataTypeFor(RowEncoder.scala:215) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.externalDataTypeForInput(RowEncoder.scala:212) at org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType.(objects.scala:1741) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.$anonfun$serializerFor$3(RowEncoder.scala:175) at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245) at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242) at scala.collection.mutable.ArrayOps$ofRef.flatMap(ArrayOps.scala:198) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.serializerFor(RowEncoder.scala:171) at org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:66) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:611) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:606) ... 47 elided ``` 2. spark.createDataframe ``` scala> spark.createDataFrame(spark.read.text("README.md").rdd, new org.apache.spark.sql.types.StructType().add("c", "char(1)")).show +--------------------+ | c| +--------------------+ | # Apache Spark| | | |Spark is a unifie...| |high-level APIs i...| |supports general ...| |rich set of highe...| |MLlib for machine...| |and Structured St...| | | | spark.read.schema("a varchar(2)").text("./README.md").show(100) +--------------------+ | a| +--------------------+ | # Apache Spark| | | |Spark is a unifie...| |high-level APIs i...| |supports general ...| ``` 4. etc ### Does this PR introduce _any_ user-facing change? NO, we intend to avoid protentical breaking change ### How was this patch tested? new tests Closes #30586 from yaooqinn/SPARK-33641. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../sql/catalyst/expressions/ExprUtils.scala | 6 +- .../sql/catalyst/parser/AstBuilder.scala | 19 +-- .../sql/catalyst/parser/ParseDriver.scala | 5 - .../sql/catalyst/parser/ParserInterface.scala | 6 - .../sql/catalyst/util/CharVarcharUtils.scala | 38 +++++- .../apache/spark/sql/internal/SQLConf.scala | 13 ++ .../apache/spark/sql/types/VarcharType.scala | 2 +- .../catalyst/parser/DataTypeParserSuite.scala | 14 +-- .../parser/TableSchemaParserSuite.scala | 4 +- .../spark/sql/types/DataTypeSuite.scala | 10 ++ .../scala/org/apache/spark/sql/Column.scala | 2 +- .../apache/spark/sql/DataFrameReader.scala | 7 +- .../org/apache/spark/sql/SparkSession.scala | 10 +- .../apache/spark/sql/UDFRegistration.scala | 73 +++++++---- .../datasources/jdbc/JdbcUtils.scala | 7 +- .../org/apache/spark/sql/functions.scala | 12 +- .../spark/sql/CharVarcharTestSuite.scala | 114 ++++++++++++------ .../sql/SparkSessionExtensionSuite.scala | 3 - .../spark/sql/jdbc/JDBCWriteSuite.scala | 5 +- .../sql/hive/client/HiveClientImpl.scala | 2 +- 20 files changed, 226 insertions(+), 126 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index 56bd3d7026d52..b45bbe417caf4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -21,7 +21,7 @@ import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition} import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.util.ArrayBasedMapData +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils} import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType} import org.apache.spark.unsafe.types.UTF8String @@ -30,7 +30,9 @@ object ExprUtils { def evalTypeExpr(exp: Expression): DataType = { if (exp.foldable) { exp.eval() match { - case s: UTF8String if s != null => DataType.fromDDL(s.toString) + case s: UTF8String if s != null => + val dataType = DataType.fromDDL(s.toString) + CharVarcharUtils.failIfHasCharVarchar(dataType) case _ => throw new AnalysisException( s"The expression '${exp.sql}' is not a valid schema string.") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 12c5e0de686fa..a22383c62bf74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -95,19 +95,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { - visitSparkDataType(ctx.dataType) + typedVisit[DataType](ctx.dataType) } override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = { - val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema( - StructType(visitColTypeList(ctx.colTypeList))) + val schema = StructType(visitColTypeList(ctx.colTypeList)) withOrigin(ctx)(schema) } - def parseRawDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { - typedVisit[DataType](ctx.dataType()) - } - /* ******************************************************************************************** * Plan parsing * ******************************************************************************************** */ @@ -1550,7 +1545,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * Create a [[Cast]] expression. */ override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) { - Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType)) + val rawDataType = typedVisit[DataType](ctx.dataType()) + val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) + Cast(expression(ctx.expression), dataType) } /** @@ -2229,12 +2226,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg /* ******************************************************************************************** * DataType parsing * ******************************************************************************************** */ - /** - * Create a Spark DataType. - */ - private def visitSparkDataType(ctx: DataTypeContext): DataType = { - CharVarcharUtils.replaceCharVarcharWithString(typedVisit(ctx)) - } /** * Resolve/create a primitive type. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index ac3fbbf6b0512..d08be467f96cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -39,11 +39,6 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with astBuilder.visitSingleDataType(parser.singleDataType()) } - /** Similar to `parseDataType`, but without CHAR/VARCHAR replacement. */ - override def parseRawDataType(sqlText: String): DataType = parse(sqlText) { parser => - astBuilder.parseRawDataType(parser.singleDataType()) - } - /** Creates Expression for a given SQL string. */ override def parseExpression(sqlText: String): Expression = parse(sqlText) { parser => astBuilder.visitSingleExpression(parser.singleExpression()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala index d724933bc1029..77e357ad073da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala @@ -70,10 +70,4 @@ trait ParserInterface { */ @throws[ParseException]("Text cannot be parsed to a DataType") def parseDataType(sqlText: String): DataType - - /** - * Parse a string to a raw [[DataType]] without CHAR/VARCHAR replacement. - */ - @throws[ParseException]("Text cannot be parsed to a DataType") - def parseRawDataType(sqlText: String): DataType } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala index 0cbe5abdbbd7a..b551d9699f360 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala @@ -19,11 +19,14 @@ package org.apache.spark.sql.catalyst.util import scala.collection.mutable +import org.apache.spark.internal.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -object CharVarcharUtils { +object CharVarcharUtils extends Logging { private val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING" @@ -52,6 +55,19 @@ object CharVarcharUtils { dt.existsRecursively(f => f.isInstanceOf[CharType] || f.isInstanceOf[VarcharType]) } + /** + * Validate the given [[DataType]] to fail if it is char or varchar types or contains nested ones + */ + def failIfHasCharVarchar(dt: DataType): DataType = { + if (!SQLConf.get.charVarcharAsString && hasCharVarchar(dt)) { + throw new AnalysisException("char/varchar type can only be used in the table schema. " + + s"You can set ${SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key} to true, so that Spark" + + s" treat them as string type as same as Spark 3.0 and earlier") + } else { + replaceCharVarcharWithString(dt) + } + } + /** * Replaces CharType/VarcharType with StringType recursively in the given data type. */ @@ -69,6 +85,24 @@ object CharVarcharUtils { case _ => dt } + /** + * Replaces CharType/VarcharType with StringType recursively in the given data type, with a + * warning message if it has char or varchar types + */ + def replaceCharVarcharWithStringForCast(dt: DataType): DataType = { + if (SQLConf.get.charVarcharAsString) { + replaceCharVarcharWithString(dt) + } else if (hasCharVarchar(dt)) { + logWarning("The Spark cast operator does not support char/varchar type and simply treats" + + " them as string type. Please use string type directly to avoid confusion. Otherwise," + + s" you can set ${SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key} to true, so that Spark treat" + + s" them as string type as same as Spark 3.0 and earlier") + replaceCharVarcharWithString(dt) + } else { + dt + } + } + /** * Removes the metadata entry that contains the original type string of CharType/VarcharType from * the given attribute's metadata. @@ -85,7 +119,7 @@ object CharVarcharUtils { */ def getRawType(metadata: Metadata): Option[DataType] = { if (metadata.contains(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) { - Some(CatalystSqlParser.parseRawDataType( + Some(CatalystSqlParser.parseDataType( metadata.getString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY))) } else { None diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ea30832008b56..69f04e11ff0bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2962,6 +2962,17 @@ object SQLConf { .booleanConf .createWithDefault(true) + val LEGACY_CHAR_VARCHAR_AS_STRING = + buildConf("spark.sql.legacy.charVarcharAsString") + .internal() + .doc("When true, Spark will not fail if user uses char and varchar type directly in those" + + " APIs that accept or parse data types as parameters, e.g." + + " `SparkSession.read.schema(...)`, `SparkSession.udf.register(...)` but treat them as" + + " string type as Spark 3.0 and earlier.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * @@ -3612,6 +3623,8 @@ class SQLConf extends Serializable with Logging { def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST) + def charVarcharAsString: Boolean = getConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala index 8d78640c1e125..2e30820ef0a05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala @@ -32,6 +32,6 @@ case class VarcharType(length: Int) extends AtomicType { override def defaultSize: Int = length override def typeName: String = s"varchar($length)" - override def toString: String = s"CharType($length)" + override def toString: String = s"VarcharType($length)" private[spark] override def asNullable: VarcharType = this } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index 655b1d26d6c90..b9f984001523a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -56,10 +56,10 @@ class DataTypeParserSuite extends SparkFunSuite { checkDataType("DATE", DateType) checkDataType("timestamp", TimestampType) checkDataType("string", StringType) - checkDataType("ChaR(5)", StringType) - checkDataType("ChaRacter(5)", StringType) - checkDataType("varchAr(20)", StringType) - checkDataType("cHaR(27)", StringType) + checkDataType("ChaR(5)", CharType(5)) + checkDataType("ChaRacter(5)", CharType(5)) + checkDataType("varchAr(20)", VarcharType(20)) + checkDataType("cHaR(27)", CharType(27)) checkDataType("BINARY", BinaryType) checkDataType("void", NullType) checkDataType("interval", CalendarIntervalType) @@ -103,9 +103,9 @@ class DataTypeParserSuite extends SparkFunSuite { StructType( StructField("deciMal", DecimalType.USER_DEFAULT, true) :: StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) :: - StructField("MAP", MapType(TimestampType, StringType), true) :: + StructField("MAP", MapType(TimestampType, VarcharType(10)), true) :: StructField("arrAy", ArrayType(DoubleType, true), true) :: - StructField("anotherArray", ArrayType(StringType, true), true) :: Nil) + StructField("anotherArray", ArrayType(CharType(9), true), true) :: Nil) ) // Use backticks to quote column names having special characters. checkDataType( @@ -113,7 +113,7 @@ class DataTypeParserSuite extends SparkFunSuite { StructType( StructField("x+y", IntegerType, true) :: StructField("!@#$%^&*()", StringType, true) :: - StructField("1_2.345<>:\"", StringType, true) :: Nil) + StructField("1_2.345<>:\"", VarcharType(20), true) :: Nil) ) // Empty struct. checkDataType("strUCt<>", StructType(Nil)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala index 95851d44b4747..5519f016e48d3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.types._ class TableSchemaParserSuite extends SparkFunSuite { @@ -69,8 +68,7 @@ class TableSchemaParserSuite extends SparkFunSuite { StructField("arrAy", ArrayType(DoubleType)) :: StructField("anotherArray", ArrayType(CharType(9))) :: Nil)) :: Nil) - assert(parse(tableSchemaString) === - CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedDataType)) + assert(parse(tableSchemaString) === expectedDataType) } // Negative cases diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 9442a3e87fc72..8c2e5db6e9364 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -249,6 +249,12 @@ class DataTypeSuite extends SparkFunSuite { checkDataTypeFromJson(MapType(IntegerType, ArrayType(DoubleType), false)) checkDataTypeFromDDL(MapType(IntegerType, ArrayType(DoubleType), false)) + checkDataTypeFromJson(CharType(1)) + checkDataTypeFromDDL(CharType(1)) + + checkDataTypeFromJson(VarcharType(10)) + checkDataTypeFromDDL(VarcharType(11)) + val metadata = new MetadataBuilder() .putString("name", "age") .build() @@ -310,6 +316,10 @@ class DataTypeSuite extends SparkFunSuite { checkDefaultSize(MapType(IntegerType, StringType, true), 24) checkDefaultSize(MapType(IntegerType, ArrayType(DoubleType), false), 12) checkDefaultSize(structType, 20) + checkDefaultSize(CharType(5), 5) + checkDefaultSize(CharType(100), 100) + checkDefaultSize(VarcharType(5), 5) + checkDefaultSize(VarcharType(10), 10) def checkEqualsIgnoreCompatibleNullability( from: DataType, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 86ba81340272b..4ef23d7e31c59 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1185,7 +1185,7 @@ class Column(val expr: Expression) extends Logging { * @since 1.3.0 */ def cast(to: DataType): Column = withExpr { - Cast(expr, CharVarcharUtils.replaceCharVarcharWithString(to)) + Cast(expr, CharVarcharUtils.replaceCharVarcharWithStringForCast(to)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 007df183ee353..b94c42a2c9544 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -73,7 +73,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * @since 1.4.0 */ def schema(schema: StructType): DataFrameReader = { - this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)) + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] + this.userSpecifiedSchema = Option(replaced) this } @@ -89,7 +90,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * @since 2.3.0 */ def schema(schemaString: String): DataFrameReader = { - this.userSpecifiedSchema = Option(StructType.fromDDL(schemaString)) + val rawSchema = StructType.fromDDL(schemaString) + val schema = CharVarcharUtils.failIfHasCharVarchar(rawSchema).asInstanceOf[StructType] + this.userSpecifiedSchema = Option(schema) this } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 3a9b06940b769..a2c9406f6becf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.encoders._ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.ExternalCommandRunner import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.command.ExternalCommandExecutor @@ -347,9 +348,10 @@ class SparkSession private( */ @DeveloperApi def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = withActive { + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] // TODO: use MutableProjection when rowRDD is another DataFrame and the applied // schema differs from the existing schema on any field data type. - val encoder = RowEncoder(schema) + val encoder = RowEncoder(replaced) val toRow = encoder.createSerializer() val catalystRows = rowRDD.map(toRow) internalCreateDataFrame(catalystRows.setName(rowRDD.name), schema) @@ -365,7 +367,8 @@ class SparkSession private( */ @DeveloperApi def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { - createDataFrame(rowRDD.rdd, schema) + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] + createDataFrame(rowRDD.rdd, replaced) } /** @@ -378,7 +381,8 @@ class SparkSession private( */ @DeveloperApi def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = withActive { - Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala.toSeq)) + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] + Dataset.ofRows(self, LocalRelation.fromExternalRows(replaced.toAttributes, rows.asScala.toSeq)) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index cceb38558946e..237cfe18ed855 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.execution.python.UserDefinedPythonFunction import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction} @@ -162,9 +163,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends | * @since $version | */ |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = { + | val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) | val func = $funcCall | def builder(e: Seq[Expression]) = if (e.length == $i) { - | ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + | ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) | } else { | throw new AnalysisException("Invalid number of arguments for function " + name + | ". Expected: $i; Found: " + e.length) @@ -753,9 +755,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 2.3.0 */ def register(name: String, f: UDF0[_], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = () => f.asInstanceOf[UDF0[Any]].call() def builder(e: Seq[Expression]) = if (e.length == 0) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 0; Found: " + e.length) @@ -768,9 +771,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any) def builder(e: Seq[Expression]) = if (e.length == 1) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 1; Found: " + e.length) @@ -783,9 +787,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 2) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 2; Found: " + e.length) @@ -798,9 +803,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 3) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 3; Found: " + e.length) @@ -813,9 +819,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 4) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 4; Found: " + e.length) @@ -828,9 +835,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 5) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 5; Found: " + e.length) @@ -843,9 +851,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 6) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 6; Found: " + e.length) @@ -858,9 +867,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 7) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 7; Found: " + e.length) @@ -873,9 +883,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 8) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 8; Found: " + e.length) @@ -888,9 +899,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 9) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 9; Found: " + e.length) @@ -903,9 +915,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 10) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 10; Found: " + e.length) @@ -918,9 +931,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 11) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 11; Found: " + e.length) @@ -933,9 +947,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 12) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 12; Found: " + e.length) @@ -948,9 +963,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 13) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 13; Found: " + e.length) @@ -963,9 +979,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 14) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 14; Found: " + e.length) @@ -978,9 +995,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 15) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 15; Found: " + e.length) @@ -993,9 +1011,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 16) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 16; Found: " + e.length) @@ -1008,9 +1027,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 17) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 17; Found: " + e.length) @@ -1023,9 +1043,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 18) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 18; Found: " + e.length) @@ -1038,9 +1059,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 19) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 19; Found: " + e.length) @@ -1053,9 +1075,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 20) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 20; Found: " + e.length) @@ -1068,9 +1091,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 21) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 21; Found: " + e.length) @@ -1083,9 +1107,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends * @since 1.3.0 */ def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { + val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType) val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any) def builder(e: Seq[Expression]) = if (e.length == 22) { - ScalaUDF(func, returnType, e, Nil, udfName = Some(name)) + ScalaUDF(func, replaced, e, Nil, udfName = Some(name)) } else { throw new AnalysisException("Invalid number of arguments for function " + name + ". Expected: 22; Found: " + e.length) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 216fb02740500..f997e57b23206 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, GenericArrayData} +import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType} @@ -761,10 +761,7 @@ object JdbcUtils extends Logging { schema: StructType, caseSensitive: Boolean, createTableColumnTypes: String): Map[String, String] = { - val parsedSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes) - val userSchema = StructType(parsedSchema.map { field => - field.copy(dataType = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)) - }) + val userSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes) val nameEquality = if (caseSensitive) { org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 9861d21d3a430..5b1ee2deefc10 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, ResolvedHint} -import org.apache.spark.sql.catalyst.util.TimestampFormatter +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TimestampFormatter} import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction} import org.apache.spark.sql.internal.SQLConf @@ -4009,7 +4009,7 @@ object functions { * @since 2.2.0 */ def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = withExpr { - JsonToStructs(schema, options, e.expr) + JsonToStructs(CharVarcharUtils.failIfHasCharVarchar(schema), options, e.expr) } /** @@ -4040,8 +4040,9 @@ object functions { * @group collection_funcs * @since 2.2.0 */ - def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = - from_json(e, schema, options.asScala.toMap) + def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = { + from_json(e, CharVarcharUtils.failIfHasCharVarchar(schema), options.asScala.toMap) + } /** * Parses a column containing a JSON string into a `StructType` with the specified schema. @@ -4393,7 +4394,8 @@ object functions { * @since 3.0.0 */ def from_csv(e: Column, schema: StructType, options: Map[String, String]): Column = withExpr { - CsvToStructs(schema, options, e.expr) + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] + CsvToStructs(replaced, options, e.expr) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index abb13270d20e7..fcd334be7a6f7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.SimpleInsertSource import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} -import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, StringType, StructField, StructType} +import org.apache.spark.sql.types._ // The base trait for char/varchar tests that need to be run with different table implementations. trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { @@ -435,55 +435,91 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession { assert(df.schema.map(_.dataType) == Seq(StringType)) } - assertNoCharType(spark.range(1).select($"id".cast("char(5)"))) - assertNoCharType(spark.range(1).select($"id".cast(CharType(5)))) - assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))")) - assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)")) + val logAppender = new LogAppender("The Spark cast operator does not support char/varchar" + + " type and simply treats them as string type. Please use string type directly to avoid" + + " confusion.") + withLogAppender(logAppender) { + assertNoCharType(spark.range(1).select($"id".cast("char(5)"))) + assertNoCharType(spark.range(1).select($"id".cast(CharType(5)))) + assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))")) + assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)")) + } } - test("user-specified schema in functions") { - val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""") - checkAnswer(df, Row(Row("str"))) - val schema = df.schema.head.dataType.asInstanceOf[StructType] - assert(schema.map(_.dataType) == Seq(StringType)) + def failWithInvalidCharUsage[T](fn: => T): Unit = { + val e = intercept[AnalysisException](fn) + assert(e.getMessage contains "char/varchar type can only be used in the table schema") } - test("user-specified schema in DataFrameReader: file source from Dataset") { - val ds = spark.range(10).map(_.toString) - val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds) - assert(df1.schema.map(_.dataType) == Seq(StringType)) - val df2 = spark.read.schema("id char(5)").csv(ds) - assert(df2.schema.map(_.dataType) == Seq(StringType)) + test("invalidate char/varchar in functions") { + failWithInvalidCharUsage(sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""")) + withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) { + val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""") + checkAnswer(df, Row(Row("str"))) + val schema = df.schema.head.dataType.asInstanceOf[StructType] + assert(schema.map(_.dataType) == Seq(StringType)) + } } - test("user-specified schema in DataFrameReader: DSV1") { - def checkSchema(df: DataFrame): Unit = { - val relations = df.queryExecution.analyzed.collect { - case l: LogicalRelation => l.relation - } - assert(relations.length == 1) - assert(relations.head.schema.map(_.dataType) == Seq(StringType)) + test("invalidate char/varchar in SparkSession createDataframe") { + val df = spark.range(10).map(_.toString).toDF() + val schema = new StructType().add("id", CharType(5)) + failWithInvalidCharUsage(spark.createDataFrame(df.collectAsList(), schema)) + failWithInvalidCharUsage(spark.createDataFrame(df.rdd, schema)) + failWithInvalidCharUsage(spark.createDataFrame(df.toJavaRDD, schema)) + withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) { + val df1 = spark.createDataFrame(df.collectAsList(), schema) + checkAnswer(df1, df) + assert(df1.schema.head.dataType === StringType) } - - checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) - .format(classOf[SimpleInsertSource].getName).load()) - checkSchema(spark.read.schema("id char(5)") - .format(classOf[SimpleInsertSource].getName).load()) } - test("user-specified schema in DataFrameReader: DSV2") { - def checkSchema(df: DataFrame): Unit = { - val tables = df.queryExecution.analyzed.collect { - case d: DataSourceV2Relation => d.table + test("invalidate char/varchar in spark.read.schema") { + failWithInvalidCharUsage(spark.read.schema(new StructType().add("id", CharType(5)))) + failWithInvalidCharUsage(spark.read.schema("id char(5)")) + withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) { + val ds = spark.range(10).map(_.toString) + val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds) + assert(df1.schema.map(_.dataType) == Seq(StringType)) + val df2 = spark.read.schema("id char(5)").csv(ds) + assert(df2.schema.map(_.dataType) == Seq(StringType)) + + def checkSchema(df: DataFrame): Unit = { + val schemas = df.queryExecution.analyzed.collect { + case l: LogicalRelation => l.relation.schema + case d: DataSourceV2Relation => d.table.schema() + } + assert(schemas.length == 1) + assert(schemas.head.map(_.dataType) == Seq(StringType)) } - assert(tables.length == 1) - assert(tables.head.schema.map(_.dataType) == Seq(StringType)) - } - checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) - .format(classOf[SchemaRequiredDataSource].getName).load()) - checkSchema(spark.read.schema("id char(5)") - .format(classOf[SchemaRequiredDataSource].getName).load()) + // user-specified schema in DataFrameReader: DSV1 + checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) + .format(classOf[SimpleInsertSource].getName).load()) + checkSchema(spark.read.schema("id char(5)") + .format(classOf[SimpleInsertSource].getName).load()) + + // user-specified schema in DataFrameReader: DSV2 + checkSchema(spark.read.schema(new StructType().add("id", CharType(5))) + .format(classOf[SchemaRequiredDataSource].getName).load()) + checkSchema(spark.read.schema("id char(5)") + .format(classOf[SchemaRequiredDataSource].getName).load()) + } + } + + test("invalidate char/varchar in udf's result type") { + failWithInvalidCharUsage(spark.udf.register("testchar", () => "B", VarcharType(1))) + failWithInvalidCharUsage(spark.udf.register("testchar2", (x: String) => x, VarcharType(1))) + withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) { + spark.udf.register("testchar", () => "B", VarcharType(1)) + spark.udf.register("testchar2", (x: String) => x, VarcharType(1)) + val df1 = spark.sql("select testchar()") + checkAnswer(df1, Row("B")) + assert(df1.schema.head.dataType === StringType) + val df2 = spark.sql("select testchar2('abc')") + checkAnswer(df2, Row("abc")) + assert(df2.schema.head.dataType === StringType) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index f02d2041dd7f3..ea276bcec0f78 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -384,9 +384,6 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) - - override def parseRawDataType(sqlText: String): DataType = - delegate.parseRawDataType(sqlText) } object MyExtensions { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala index fb46c2ff4c0ea..1a28523cc939f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala @@ -390,14 +390,13 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter { .foldLeft(new StructType())((schema, colType) => schema.add(colType._1, colType._2)) val createTableColTypes = colTypes.map { case (col, dataType) => s"$col $dataType" }.mkString(", ") - val df = spark.createDataFrame(sparkContext.parallelize(Seq(Row.empty)), schema) val expectedSchemaStr = colTypes.map { case (col, dataType) => s""""$col" $dataType """ }.mkString(", ") assert(JdbcUtils.schemaString( - df.schema, - df.sqlContext.conf.caseSensitiveAnalysis, + schema, + spark.sqlContext.conf.caseSensitiveAnalysis, url1, Option(createTableColTypes)) == expectedSchemaStr) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index bada131c8ba6d..34befb8a6f965 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -985,7 +985,7 @@ private[hive] object HiveClientImpl extends Logging { /** Get the Spark SQL native DataType from Hive's FieldSchema. */ private def getSparkSQLDataType(hc: FieldSchema): DataType = { try { - CatalystSqlParser.parseRawDataType(hc.getType) + CatalystSqlParser.parseDataType(hc.getType) } catch { case e: ParseException => throw new SparkException( From c62b84a0432e51fd10e628088ee311dc3be73d2f Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Mon, 7 Dec 2020 08:40:29 -0600 Subject: [PATCH 0691/1009] [MINOR] Spelling sql not core ### What changes were proposed in this pull request? This PR intends to fix typos in the sub-modules: * `sql/catalyst` * `sql/hive-thriftserver` * `sql/hive` Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618 NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356 ### Why are the changes needed? Misspelled words make it harder to read / understand content. ### Does this PR introduce _any_ user-facing change? There are various fixes to documentation, etc... ### How was this patch tested? No testing was performed Closes #30532 from jsoref/spelling-sql-not-core. Authored-by: Josh Soref Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/sql/Row.scala | 6 +++--- .../apache/spark/sql/catalyst/StructFilters.scala | 2 +- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../sql/catalyst/analysis/StreamingJoinHelper.scala | 4 ++-- .../analysis/UpdateAttributeNullability.scala | 2 +- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../spark/sql/catalyst/csv/CSVInferSchema.scala | 2 +- .../sql/catalyst/encoders/ExpressionEncoder.scala | 2 +- .../spark/sql/catalyst/expressions/AliasHelper.scala | 2 +- .../spark/sql/catalyst/expressions/ScalaUDF.scala | 4 ++-- .../catalyst/expressions/aggregate/Percentile.scala | 6 +++--- .../spark/sql/catalyst/expressions/arithmetic.scala | 2 +- .../catalyst/expressions/codegen/CodeGenerator.scala | 2 +- .../expressions/codegen/GenerateSafeProjection.scala | 2 +- .../apache/spark/sql/catalyst/expressions/hash.scala | 4 ++-- .../catalyst/expressions/higherOrderFunctions.scala | 2 +- .../sql/catalyst/expressions/jsonExpressions.scala | 12 ++++++------ .../sql/catalyst/expressions/regexpExpressions.scala | 2 +- .../sql/catalyst/expressions/windowExpressions.scala | 2 +- .../catalyst/optimizer/NestedColumnAliasing.scala | 2 +- .../spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++-- .../optimizer/PushDownLeftSemiAntiJoin.scala | 2 +- .../spark/sql/catalyst/optimizer/expressions.scala | 2 +- .../spark/sql/catalyst/optimizer/subquery.scala | 6 +++--- .../spark/sql/catalyst/parser/ParserUtils.scala | 2 +- .../apache/spark/sql/catalyst/plans/QueryPlan.scala | 2 +- .../sql/catalyst/plans/logical/LogicalPlan.scala | 2 +- .../sql/catalyst/plans/logical/PlanHelper.scala | 2 +- .../plans/logical/basicLogicalOperators.scala | 2 +- .../sql/catalyst/plans/physical/partitioning.scala | 2 +- .../sql/catalyst/util/DateTimeFormatterHelper.scala | 4 ++-- .../spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- .../spark/sql/catalyst/util/QuantileSummaries.scala | 6 +++--- .../org/apache/spark/sql/internal/SQLConf.scala | 6 +++--- .../org/apache/spark/sql/RandomDataGenerator.scala | 6 +++--- .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../analysis/ResolveGroupingAnalyticsSuite.scala | 4 ++-- .../sql/catalyst/analysis/TypeCoercionSuite.scala | 2 +- .../analysis/UnsupportedOperationsSuite.scala | 2 +- .../catalyst/expressions/CodeGenerationSuite.scala | 4 ++-- .../sql/catalyst/expressions/ComplexTypeSuite.scala | 4 ++-- .../expressions/ConditionalExpressionSuite.scala | 4 ++-- .../catalyst/expressions/ExpressionEvalHelper.scala | 2 +- .../expressions/ObjectExpressionsSuite.scala | 4 ++-- .../expressions/StringExpressionsSuite.scala | 2 +- .../expressions/aggregate/PercentileSuite.scala | 8 ++++---- .../expressions/codegen/CodeBlockSuite.scala | 2 +- .../sql/catalyst/optimizer/SetOperationSuite.scala | 8 ++++---- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 2 +- .../sql/catalyst/parser/DataTypeParserSuite.scala | 4 ++-- .../spark/sql/catalyst/parser/ErrorParserSuite.scala | 2 +- .../sql/catalyst/parser/ExpressionParserSuite.scala | 4 ++-- .../catalyst/parser/TableIdentifierParserSuite.scala | 2 +- .../spark/sql/catalyst/util/UnsafeArraySuite.scala | 8 ++++---- .../apache/hive/service/cli/ColumnDescriptor.java | 2 +- .../org/apache/hive/service/cli/GetInfoValue.java | 2 +- .../service/cli/operation/GetColumnsOperation.java | 2 +- .../hive/service/cli/session/HiveSessionImpl.java | 4 ++-- .../service/cli/thrift/ThriftHttpCLIService.java | 2 +- .../spark/sql/hive/thriftserver/DummyListeners.scala | 2 +- .../sql/hive/thriftserver/SparkSQLEnvSuite.scala | 2 +- .../sql/hive/execution/HiveCompatibilitySuite.scala | 12 ++++++------ .../apache/spark/sql/hive/HiveMetastoreCatalog.scala | 2 +- .../spark/sql/hive/client/HiveClientImpl.scala | 4 ++-- .../execution/HiveScriptTransformationExec.scala | 2 +- .../sql/hive/execution/InsertIntoHiveTable.scala | 2 +- .../hive/execution/PruneHiveTablePartitions.scala | 2 +- .../queries/clientpositive/auto_sortmerge_join_13.q | 6 +++--- .../clientpositive/bucketsortoptimize_insert_3.q | 4 ++-- .../src/test/queries/clientpositive/smb_mapjoin_20.q | 2 +- .../org/apache/spark/sql/hive/InsertSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- .../execution/HiveScriptTransformationSuite.scala | 4 ++-- .../spark/sql/hive/execution/SQLQuerySuite.scala | 8 ++++---- 75 files changed, 128 insertions(+), 128 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 88c672f1cdf85..d43c57ed0f5c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -351,7 +351,7 @@ trait Row extends Serializable { /** * Returns the value at position i. * For primitive types if value is null it returns 'zero value' specific for primitive - * ie. 0 for Int - use isNullAt to ensure that value is not null + * i.e. 0 for Int - use isNullAt to ensure that value is not null * * @throws ClassCastException when data type does not match. */ @@ -360,7 +360,7 @@ trait Row extends Serializable { /** * Returns the value of a given fieldName. * For primitive types if value is null it returns 'zero value' specific for primitive - * ie. 0 for Int - use isNullAt to ensure that value is not null + * i.e. 0 for Int - use isNullAt to ensure that value is not null * * @throws UnsupportedOperationException when schema is not defined. * @throws IllegalArgumentException when fieldName do not exist. @@ -381,7 +381,7 @@ trait Row extends Serializable { /** * Returns a Map consisting of names and values for the requested fieldNames * For primitive types if value is null it returns 'zero value' specific for primitive - * ie. 0 for Int - use isNullAt to ensure that value is not null + * i.e. 0 for Int - use isNullAt to ensure that value is not null * * @throws UnsupportedOperationException when schema is not defined. * @throws IllegalArgumentException when fieldName do not exist. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala index fed1b323f5773..ff67b6fccfae9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala @@ -51,7 +51,7 @@ abstract class StructFilters(pushedFilters: Seq[sources.Filter], schema: StructT /** * Resets states of pushed down filters. The method must be called before - * precessing any new row otherwise `skipRow()` may return wrong result. + * processing any new row otherwise `skipRow()` may return wrong result. */ def reset(): Unit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6769dc895d32e..6541961f5613e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1495,7 +1495,7 @@ class Analyzer(override val catalogManager: CatalogManager) val rightRes = rightAttributes .map(x => resolveExpressionBottomUp(x, right).asInstanceOf[Attribute]) f.copy(leftAttributes = leftRes, rightAttributes = rightRes) - // intersect/except will be rewritten to join at the begininng of optimizer. Here we need to + // intersect/except will be rewritten to join at the beginning of optimizer. Here we need to // deduplicate the right side plan, so that we won't produce an invalid self-join later. case i @ Intersect(left, right, _) if !i.duplicateResolved => i.copy(right = dedupRight(left, right)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 11c4883992560..9f5eefc744135 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -579,7 +579,7 @@ trait CheckAnalysis extends PredicateHelper { case showPartitions: ShowPartitions => checkShowPartitions(showPartitions) - case _ => // Fallbacks to the following checks + case _ => // Falls back to the following checks } operator match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala index cddc3a44f4d9d..d8e200d6b01e4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala @@ -55,7 +55,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging { * given the join condition and the event time watermark. This is how it works. * - The condition is split into conjunctive predicates, and we find the predicates of the * form `leftTime + c1 < rightTime + c2` (or <=, >, >=). - * - We canoncalize the predicate and solve it with the event time watermark value to find the + * - We canonicalize the predicate and solve it with the event time watermark value to find the * value of the state watermark. * This function is supposed to make best-effort attempt to get the state watermark. If there is * any error, it will return None. @@ -94,7 +94,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging { // The generated the state watermark cleanup expression is inclusive of the state watermark. // If state watermark is W, all state where timestamp <= W will be cleaned up. - // Now when the canonicalized join condition solves to leftTime >= W, we dont want to clean + // Now when the canonicalized join condition solves to leftTime >= W, we don't want to clean // up leftTime <= W. Rather we should clean up leftTime <= W - 1. Hence the -1 below. val stateWatermark = predicate match { case LessThan(l, r) => getStateWatermarkSafely(l, r) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala index 3eae34da7e502..5004108d348b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule * Updates nullability of Attributes in a resolved LogicalPlan by using the nullability of * corresponding Attributes of its children output Attributes. This step is needed because * users can use a resolved AttributeReference in the Dataset API and outer joins - * can change the nullability of an AttribtueReference. Without this rule, a nullable column's + * can change the nullability of an AttributeReference. Without this rule, a nullable column's * nullable field can be actually set as non-nullable, which cause illegal optimization * (e.g., NULL propagation) and wrong answers. * See SPARK-13484 and SPARK-13801 for the concrete queries of this case. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index a2ab756382488..4c32870abe621 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1615,7 +1615,7 @@ class SessionCatalog( } /** - * Validate the new locatoin before renaming a managed table, which should be non-existent. + * Validate the new location before renaming a managed table, which should be non-existent. */ private def validateNewLocationOfRename( oldName: TableIdentifier, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index 56677d7d97af2..fd9e30d155148 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -143,7 +143,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { // The conversion can fail when the `field` is not a form of number. val bigDecimal = decimalParser(field) // Because many other formats do not support decimal, it reduces the cases for - // decimals by disallowing values having scale (eg. `1.1`). + // decimals by disallowing values having scale (e.g. `1.1`). if (bigDecimal.scale <= 0) { // `DecimalType` conversion can fail when // 1. The precision is bigger than 38. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index 9ab38044e6a88..80a0374ae1f26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -189,7 +189,7 @@ object ExpressionEncoder { } /** - * Function that serializesa an object of type `T` to an [[InternalRow]]. This class is not + * Function that serializes an object of type `T` to an [[InternalRow]]. This class is not * thread-safe. Note that multiple calls to `apply(..)` return the same actual [[InternalRow]] * object. Thus, the caller should copy the result before making another call if required. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala index c61eb68db5bfa..ad6cf959a69c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala @@ -64,7 +64,7 @@ trait AliasHelper { /** * Replace all attributes, that reference an alias, with the aliased expression, - * but keep the name of the outmost attribute. + * but keep the name of the outermost attribute. */ protected def replaceAliasButKeepName( expr: NamedExpression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala index 0a69d5aa6b9ad..4a89d24e5f635 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala @@ -1145,7 +1145,7 @@ case class ScalaUDF( val resultConverter = s"$convertersTerm[${children.length}]" val boxedType = CodeGenerator.boxedType(dataType) - val funcInvokation = if (isPrimitive(dataType) + val funcInvocation = if (isPrimitive(dataType) // If the output is nullable, the returned value must be unwrapped from the Option && !nullable) { s"$resultTerm = ($boxedType)$getFuncResult" @@ -1156,7 +1156,7 @@ case class ScalaUDF( s""" |$boxedType $resultTerm = null; |try { - | $funcInvokation; + | $funcInvocation; |} catch (Exception e) { | throw new org.apache.spark.SparkException($errorMsgTerm, e); |} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala index 0eba61c741133..b808083152cd3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala @@ -191,13 +191,13 @@ case class Percentile( val sortedCounts = buffer.toSeq.sortBy(_._1)( child.dataType.asInstanceOf[NumericType].ordering.asInstanceOf[Ordering[AnyRef]]) - val accumlatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) { + val accumulatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) { case ((key1, count1), (key2, count2)) => (key2, count1 + count2) }.tail - val maxPosition = accumlatedCounts.last._2 - 1 + val maxPosition = accumulatedCounts.last._2 - 1 percentages.map { percentile => - getPercentile(accumlatedCounts, maxPosition * percentile) + getPercentile(accumulatedCounts, maxPosition * percentile) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index c69edccc696bb..3fbb798f1fd53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -216,7 +216,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { case DoubleType | FloatType => // When Double/Float overflows, there can be 2 cases: // - precision loss: according to SQL standard, the number is truncated; - // - returns (+/-)Infinite: same behavior also other DBs have (eg. Postgres) + // - returns (+/-)Infinite: same behavior also other DBs have (e.g. Postgres) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" |${ev.value} = $eval1 $symbol $eval2; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1ff4a93cf0acd..638878b312dc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -175,7 +175,7 @@ class CodegenContext extends Logging { mutable.ArrayBuffer.empty[(String, String)] /** - * The mapping between mutable state types and corrseponding compacted arrays. + * The mapping between mutable state types and corresponding compacted arrays. * The keys are java type string. The values are [[MutableStateArrays]] which encapsulates * the compacted arrays for the mutable states with the same java type. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index e285398ba1958..4efcca0017eaa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, Generic import org.apache.spark.sql.types._ /** - * Java can not access Projection (in package object) + * Java cannot access Projection (in package object) */ abstract class BaseProjection extends Projection {} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index 64360827fb794..ce177f50956f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -907,7 +907,7 @@ object HiveHashFunction extends InterpretedHashFunction { * - year, month (stored as HiveIntervalYearMonth) * - day, hour, minute, second, nanosecond (stored as HiveIntervalDayTime) * - * eg. (INTERVAL '30' YEAR + INTERVAL '-23' DAY) fails in Hive + * e.g. (INTERVAL '30' YEAR + INTERVAL '-23' DAY) fails in Hive * * This method mimics HiveIntervalDayTime.hashCode() in Hive. * @@ -919,7 +919,7 @@ object HiveHashFunction extends InterpretedHashFunction { * * - Spark's [[CalendarInterval]] has precision upto microseconds but Hive's * HiveIntervalDayTime can store data with precision upto nanoseconds. So, any input intervals - * with nanosecond values will lead to wrong output hashes (ie. non adherent with Hive output) + * with nanosecond values will lead to wrong output hashes (i.e. non adherent with Hive output) */ def hashCalendarInterval(calendarInterval: CalendarInterval): Long = { val totalMicroSeconds = calendarInterval.days * MICROS_PER_DAY + calendarInterval.microseconds diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index 4454afb6c099b..d1dabe732c882 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -128,7 +128,7 @@ trait HigherOrderFunction extends Expression with ExpectsInputTypes { def argumentTypes: Seq[AbstractDataType] /** - * All arguments have been resolved. This means that the types and nullabilty of (most of) the + * All arguments have been resolved. This means that the types and nullability of (most of) the * lambda function arguments is known, and that we can start binding the lambda functions. */ lazy val argumentsResolved: Boolean = arguments.forall(_.resolved) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index a363615d3afe0..c22b68890a0d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -808,10 +808,10 @@ case class SchemaOfJson( } /** - * A function that returns the number of elements in the outmost JSON array. + * A function that returns the number of elements in the outermost JSON array. */ @ExpressionDescription( - usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.", + usage = "_FUNC_(jsonArray) - Returns the number of elements in the outermost JSON array.", arguments = """ Arguments: * jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string, @@ -877,13 +877,13 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression } /** - * A function which returns all the keys of the outmost JSON object. + * A function which returns all the keys of the outermost JSON object. */ @ExpressionDescription( - usage = "_FUNC_(json_object) - Returns all the keys of the outmost JSON object as an array.", + usage = "_FUNC_(json_object) - Returns all the keys of the outermost JSON object as an array.", arguments = """ Arguments: - * json_object - A JSON object. If a valid JSON object is given, all the keys of the outmost + * json_object - A JSON object. If a valid JSON object is given, all the keys of the outermost object will be returned as an array. If it is any other valid JSON string, an invalid JSON string or an empty string, the function returns null. """, @@ -921,7 +921,7 @@ case class JsonObjectKeys(child: Expression) extends UnaryExpression with Codege if (parser.nextToken() == null || parser.currentToken() != JsonToken.START_OBJECT) { return null } - // Parse the JSON string to get all the keys of outmost JSON object + // Parse the JSON string to get all the keys of outermost JSON object getJsonKeys(parser, input) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 0b94fe8b5d47e..28c9aefb42837 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -93,7 +93,7 @@ abstract class StringRegexExpression extends BinaryExpression Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order to match "\abc", the pattern should be "\\abc". - When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks + When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc". * escape - an character added since Spark 3.0. The default escape character is the '\'. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index b6dd817794723..43ecbd6a83fdb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -173,7 +173,7 @@ sealed trait WindowFrame extends Expression with Unevaluable { case object UnspecifiedFrame extends WindowFrame /** - * A specified Window Frame. The val lower/uppper can be either a foldable [[Expression]] or a + * A specified Window Frame. The val lower/upper can be either a foldable [[Expression]] or a * [[SpecialFrameBoundary]]. */ case class SpecifiedWindowFrame( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index b053bf6d61e6b..0be2792bfd7db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -227,7 +227,7 @@ object NestedColumnAliasing { } /** - * This prunes unnessary nested columns from `Generate` and optional `Project` on top + * This prunes unnecessary nested columns from `Generate` and optional `Project` on top * of it. */ object GeneratorNestedColumnAliasing { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b7c8f775b857f..aa8540fb44556 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -853,7 +853,7 @@ object CollapseWindow extends Rule[LogicalPlan] { * of the child window expression, transpose them. */ object TransposeWindow extends Rule[LogicalPlan] { - private def compatibleParititions(ps1 : Seq[Expression], ps2: Seq[Expression]): Boolean = { + private def compatiblePartitions(ps1 : Seq[Expression], ps2: Seq[Expression]): Boolean = { ps1.length < ps2.length && ps2.take(ps1.length).permutations.exists(ps1.zip(_).forall { case (l, r) => l.semanticEquals(r) }) @@ -864,7 +864,7 @@ object TransposeWindow extends Rule[LogicalPlan] { if w1.references.intersect(w2.windowOutputSet).isEmpty && w1.expressions.forall(_.deterministic) && w2.expressions.forall(_.deterministic) && - compatibleParititions(ps1, ps2) => + compatiblePartitions(ps1, ps2) => Project(w1.output, Window(we2, ps2, os2, Window(we1, ps1, os1, grandChild))) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala index 50fe0192d6f26..286b447cdb5a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala @@ -172,7 +172,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper { * TODO: * Currently this rule can push down the left semi or left anti joins to either * left or right leg of the child join. This matches the behaviour of `PushPredicateThroughJoin` - * when the lefi semi or left anti join is in expression form. We need to explore the possibility + * when the left semi or left anti join is in expression form. We need to explore the possibility * to push the left semi/anti joins to both legs of join if the join condition refers to * both left and right legs of the child join. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 1b1e2ad71e7c8..4cdaf10dd3c60 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -70,7 +70,7 @@ object ConstantFolding extends Rule[LogicalPlan] { /** * Substitutes [[Attribute Attributes]] which can be statically evaluated with their corresponding * value in conjunctive [[Expression Expressions]] - * eg. + * e.g. * {{{ * SELECT * FROM table WHERE i = 5 AND j = i + 3 * ==> SELECT * FROM table WHERE i = 5 AND j = 8 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 3c2ee3149d317..9d023b7f11401 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -63,7 +63,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper { // the produced join then becomes unresolved and break structural integrity. We should // de-duplicate conflicting attributes. // SPARK-26078: it may also happen that the subquery has conflicting attributes with the outer - // values. In this case, the resulting join would contain trivially true conditions (eg. + // values. In this case, the resulting join would contain trivially true conditions (e.g. // id#3 = id#3) which cannot be de-duplicated after. In this method, if there are conflicting // attributes in the join condition, the subquery's conflicting attributes are changed using // a projection which aliases them and resolves the problem. @@ -174,7 +174,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper { val inConditions = values.zip(sub.output).map(EqualTo.tupled) // To handle a null-aware predicate not-in-subquery in nested conditions // (e.g., `v > 0 OR t1.id NOT IN (SELECT id FROM t2)`), we transform - // `inConditon` (t1.id=t2.id) into `(inCondition) OR ISNULL(inCondition)`. + // `inCondition` (t1.id=t2.id) into `(inCondition) OR ISNULL(inCondition)`. // // For example, `SELECT * FROM t1 WHERE v > 0 OR t1.id NOT IN (SELECT id FROM t2)` // is transformed into a plan below; @@ -567,7 +567,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe subqueryRoot = Project(projList ++ havingInputs, subqueryRoot) case s @ SubqueryAlias(alias, _) => subqueryRoot = SubqueryAlias(alias, subqueryRoot) - case op => sys.error(s"Unexpected operator $op in corelated subquery") + case op => sys.error(s"Unexpected operator $op in correlated subquery") } // CASE WHEN alwaysTrue IS NULL THEN resultOnZeroTups diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 1f32620e54902..948b94a7e9d66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -127,7 +127,7 @@ object ParserUtils { } } - /** Unescape baskslash-escaped string enclosed by quotes. */ + /** Unescape backslash-escaped string enclosed by quotes. */ def unescapeSQLString(b: String): String = { var enclosure: Character = null val sb = new StringBuilder(b.length()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 864ca4f57483d..e0839a34ae589 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -397,7 +397,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] case ar: AttributeReference if allAttributes.indexOf(ar.exprId) == -1 => // Top level `AttributeReference` may also be used for output like `Alias`, we should - // normalize the epxrId too. + // normalize the exprId too. id += 1 ar.withExprId(ExprId(id)).canonicalized diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index ad5c3fd74e9b5..1a9c9d14e3eed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -136,7 +136,7 @@ abstract class LogicalPlan def outputOrdering: Seq[SortOrder] = Nil /** - * Returns true iff `other`'s output is semantically the same, ie.: + * Returns true iff `other`'s output is semantically the same, i.e.: * - it contains the same number of `Attribute`s; * - references are the same; * - the order is equal too. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala index 63348f766a5b1..5ec488efc328c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala @@ -29,7 +29,7 @@ object PlanHelper { /** * Check if there's any expression in this query plan operator that is * - A WindowExpression but the plan is not Window - * - An AggregateExpresion but the plan is not Aggregate or Window + * - An AggregateExpression but the plan is not Aggregate or Window * - A Generator but the plan is not Generate * Returns the list of invalid expressions that this operator hosts. This can happen when * 1. The input query from users contain invalid expressions. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index aa7151ad36850..0e4bfa4dc34da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -675,7 +675,7 @@ object Expand { val numAttributes = attrMap.size assert(numAttributes <= GroupingID.dataType.defaultSize * 8) val mask = if (numAttributes != 64) (1L << numAttributes) - 1 else 0xFFFFFFFFFFFFFFFFL - // Calculate the attrbute masks of selected grouping set. For example, if we have GroupBy + // Calculate the attribute masks of selected grouping set. For example, if we have GroupBy // attributes (a, b, c, d), grouping set (a, c) will produce the following sequence: // (15, 7, 13), whose binary form is (1111, 0111, 1101) val masks = (mask +: groupingSetAttrs.map(attrMap).map(index => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index 17e1cb416fc8a..c4002aa441a50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -164,7 +164,7 @@ trait Partitioning { * i.e. the current dataset does not need to be re-partitioned for the `required` * Distribution (it is possible that tuples within a partition need to be reorganized). * - * A [[Partitioning]] can never satisfy a [[Distribution]] if its `numPartitions` does't match + * A [[Partitioning]] can never satisfy a [[Distribution]] if its `numPartitions` doesn't match * [[Distribution.requiredNumPartitions]]. */ final def satisfies(required: Distribution): Boolean = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index f02b2d08c0935..eac34c8f076a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -226,8 +226,8 @@ private object DateTimeFormatterHelper { // string at res(0). So when the first element here is empty string we do not need append `'` // literal to the DateTimeFormatterBuilder. case ("", idx) if idx != 0 => builder.appendLiteral("'") - case (pattenPart, idx) if idx % 2 == 0 => - var rest = pattenPart + case (patternPart, idx) if idx % 2 == 0 => + var rest = patternPart while (rest.nonEmpty) { rest match { case extractor(prefix, secondFraction, suffix) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 87cf3c93ba26e..0543ef99f8947 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -189,7 +189,7 @@ object DateTimeUtils { * precision, so this conversion is lossy. */ def microsToMillis(micros: Long): Long = { - // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion. + // When the timestamp is negative i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. // In millis precision the above needs to be represented as (-157700927877). Math.floorDiv(micros, MICROS_PER_MILLIS) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala index ae7066d87d530..addf1408a33a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala @@ -173,13 +173,13 @@ class QuantileSummaries( // Take the case of the sample `10` from `b`. In the original stream, it could have appeared // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`. // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the - // maximum additional uncertainty over samples comming from `b` is `max(g_a + delta_a) = + // maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) = // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is // `floor(2 * eps_b * n_b)`. // Only samples that interleave the other side are affected. That means that samples from // one side that are lesser (or greater) than all samples from the other side are just copied - // unmodifed. - // If the merging instances have different `relativeError`, the resulting instance will cary + // unmodified. + // If the merging instances have different `relativeError`, the resulting instance will carry // the largest one: `eps_ab = max(eps_a, eps_b)`. // The main invariant of the GK algorithm is kept: // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 69f04e11ff0bc..e8e1120cbb884 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1145,7 +1145,7 @@ object SQLConf { val CODEGEN_FACTORY_MODE = buildConf("spark.sql.codegen.factoryMode") .doc("This config determines the fallback behavior of several codegen generators " + - "during tests. `FALLBACK` means trying codegen first and then fallbacking to " + + "during tests. `FALLBACK` means trying codegen first and then falling back to " + "interpreted if any compile error happens. Disabling fallback if `CODEGEN_ONLY`. " + "`NO_CODEGEN` skips codegen and goes interpreted path always. Note that " + "this config works only for tests.") @@ -1570,7 +1570,7 @@ object SQLConf { val JSON_EXPRESSION_OPTIMIZATION = buildConf("spark.sql.optimizer.enableJsonExpressionOptimization") .doc("Whether to optimize JSON expressions in SQL optimizer. It includes pruning " + - "unnecessary columns from from_json, simplifing from_json + to_json, to_json + " + + "unnecessary columns from from_json, simplifying from_json + to_json, to_json + " + "named_struct(from_json.col1, from_json.col2, ....).") .version("3.1.0") .booleanConf @@ -2058,7 +2058,7 @@ object SQLConf { buildConf("spark.sql.decimalOperations.allowPrecisionLoss") .internal() .doc("When true (default), establishing the result type of an arithmetic operation " + - "happens according to Hive behavior and SQL ANSI 2011 specification, ie. rounding the " + + "happens according to Hive behavior and SQL ANSI 2011 specification, i.e. rounding the " + "decimal part of the result if an exact representation is not possible. Otherwise, NULL " + "is returned in those cases, as previously.") .version("2.3.1") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 9fa27c7df3832..4badcbaa89aa4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -204,7 +204,7 @@ object RandomDataGenerator { specialDates.map(java.sql.Date.valueOf)) } case TimestampType => - def uniformMicorsRand(rand: Random): Long = { + def uniformMicrosRand(rand: Random): Long = { var milliseconds = rand.nextLong() % 253402329599999L // -62135740800000L is the number of milliseconds before January 1, 1970, 00:00:00 GMT // for "0001-01-01 00:00:00.000000". We need to find a @@ -225,7 +225,7 @@ object RandomDataGenerator { if (SQLConf.get.getConf(SQLConf.DATETIME_JAVA8API_ENABLED)) { randomNumeric[Instant]( rand, - (rand: Random) => DateTimeUtils.microsToInstant(uniformMicorsRand(rand)), + (rand: Random) => DateTimeUtils.microsToInstant(uniformMicrosRand(rand)), specialTs.map { s => val ldt = LocalDateTime.parse(s.replace(" ", "T")) ldt.atZone(ZoneId.systemDefault()).toInstant @@ -235,7 +235,7 @@ object RandomDataGenerator { rand, (rand: Random) => { // DateTimeUtils.toJavaTimestamp takes microsecond. - val ts = DateTimeUtils.toJavaTimestamp(uniformMicorsRand(rand)) + val ts = DateTimeUtils.toJavaTimestamp(uniformMicrosRand(rand)) // The generated `ts` is based on the hybrid calendar Julian + Gregorian since // 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used // by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `ts` to diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index f5bfdc5e695e0..61186c178b083 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -427,7 +427,7 @@ class AnalysisSuite extends AnalysisTest with Matchers { checkAnalysis(plan, expected) } - test("SPARK-12102: Ignore nullablity when comparing two sides of case") { + test("SPARK-12102: Ignore nullability when comparing two sides of case") { val relation = LocalRelation(Symbol("a").struct(Symbol("x").int), Symbol("b").struct(Symbol("x").int.withNullability(false))) val plan = relation.select( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala index 249e7a49a0a90..cdfae14138290 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala @@ -160,7 +160,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { } test("grouping function") { - // GrouingSets + // GroupingSets val originalPlan = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)), Seq(unresolved_a, unresolved_b), r1, Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)), @@ -200,7 +200,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { } test("grouping_id") { - // GrouingSets + // GroupingSets val originalPlan = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)), Seq(unresolved_a, unresolved_b), r1, Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 1e5bc271ab270..5c4d45b5394f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -260,7 +260,7 @@ class TypeCoercionSuite extends AnalysisTest { // Tests that its not possible to setup implicit casts between two map types when // source map's key type is integer and the target map's key type are either Binary, - // Boolean, Date, Timestamp, Array, Struct, CaleandarIntervalType or NullType + // Boolean, Date, Timestamp, Array, Struct, CalendarIntervalType or NullType nonCastableTargetTypes.foreach { targetType => shouldNotCast(sourceType, targetType) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala index cdc3f4275414c..fa779477cccab 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala @@ -887,7 +887,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper { } } - /** Assert that the logical plan is supported for continuous procsssing mode */ + /** Assert that the logical plan is supported for continuous processing mode */ def assertSupportedForContinuousProcessing( name: String, plan: LogicalPlan, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index adaabfe4d32bb..bca8c56a1071e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -527,7 +527,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { .exists(_.getRenderedMessage().contains("Generated method too long"))) } - test("SPARK-28916: subexrepssion elimination can cause 64kb code limit on UnsafeProjection") { + test("SPARK-28916: subexpression elimination can cause 64kb code limit on UnsafeProjection") { val numOfExprs = 10000 val exprs = (0 to numOfExprs).flatMap(colIndex => Seq(Add(BoundReference(colIndex, DoubleType, true), @@ -554,7 +554,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { // Expecting result: // "((scala.math.LowPriorityOrderingImplicits$$anon$3) references[0] /* comparator */)" - // Using lenient assertions to be resilient to annonymous class numbering changes + // Using lenient assertions to be resilient to anonymous class numbering changes assert(!refTerm.contains("null")) assert(refTerm.contains("scala.math.LowPriorityOrderingImplicits$$anon$")) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 3d6f6937e780b..57abdb4de229f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -425,14 +425,14 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { def checkErrorMessage( childDataType: DataType, fieldDataType: DataType, - errorMesage: String): Unit = { + errorMessage: String): Unit = { val e = intercept[org.apache.spark.sql.AnalysisException] { ExtractValue( Literal.create(null, childDataType), Literal.create(null, fieldDataType), _ == _) } - assert(e.getMessage().contains(errorMesage)) + assert(e.getMessage().contains(errorMessage)) } checkErrorMessage(structType, IntegerType, "Field name should be String Literal") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala index 87e34aca510f5..ee6f89a155ae0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala @@ -212,8 +212,8 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper test("case key when - internal pattern matching expects a List while apply takes a Seq") { val indexedSeq = IndexedSeq(Literal(1), Literal(42), Literal(42), Literal(1)) - val caseKeyWhaen = CaseKeyWhen(Literal(12), indexedSeq) - assert(caseKeyWhaen.branches == + val caseKeyWhen = CaseKeyWhen(Literal(12), indexedSeq) + assert(caseKeyWhen.branches == IndexedSeq((Literal(12) === Literal(1), Literal(42)), (Literal(12) === Literal(42), Literal(1)))) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 70eb391ad6e05..26d98157807cd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -451,7 +451,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB if (interpret.isDefined && codegen.isDefined && !compareResults(interpret.get, codegen.get)) { fail(s"Incorrect evaluation: $expr, interpret: ${interpret.get}, codegen: ${codegen.get}") } else if (interpretExc.isDefined && codegenExc.isEmpty) { - fail(s"Incorrect evaluation: $expr, interpet threw exception ${interpretExc.get}") + fail(s"Incorrect evaluation: $expr, interpret threw exception ${interpretExc.get}") } else if (interpretExc.isEmpty && codegenExc.isDefined) { fail(s"Incorrect evaluation: $expr, codegen threw exception ${codegenExc.get}") } else if (interpretExc.isDefined && codegenExc.isDefined diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index bc2b93e5390da..d425d0ba42186 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -212,9 +212,9 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val initializeWithNonexistingMethod = InitializeJavaBean( Literal.fromObject(new java.util.LinkedList[Int]), - Map("nonexisting" -> Literal(1))) + Map("nonexistent" -> Literal(1))) checkExceptionInExpression[Exception](initializeWithNonexistingMethod, - """A method named "nonexisting" is not declared in any enclosing class """ + + """A method named "nonexistent" is not declared in any enclosing class """ + "nor any supertype") val initializeWithWrongParamType = InitializeJavaBean( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 730574a4b9846..78e9cf82a28b1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -118,7 +118,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { testElt(null, 1, null, "world") testElt(null, null, "hello", "world") - // Invalid ranages + // Invalid ranges testElt(null, 3, "hello", "world") testElt(null, 0, "hello", "world") testElt(null, -1, "hello", "world") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala index 972db7fa30a91..d6e6142b07a3f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala @@ -66,20 +66,20 @@ class PercentileSuite extends SparkFunSuite { // Test with row with frequency. Second and third columns are frequency in Int and Long val countForFrequencyTest = 1000 val rowsWithFrequency = (1 to countForFrequencyTest).map(x => Seq(x, x):+ x.toLong) - val expectedPercentilesWithFrquency = Seq(1.0, 500.0, 707.0, 866.0, 1000.0) + val expectedPercentilesWithFrequency = Seq(1.0, 500.0, 707.0, 866.0, 1000.0) val frequencyExpressionInt = BoundReference(1, IntegerType, nullable = false) val aggInt = new Percentile(childExpression, percentageExpression, frequencyExpressionInt) - runTest(aggInt, rowsWithFrequency, expectedPercentilesWithFrquency) + runTest(aggInt, rowsWithFrequency, expectedPercentilesWithFrequency) val frequencyExpressionLong = BoundReference(2, LongType, nullable = false) val aggLong = new Percentile(childExpression, percentageExpression, frequencyExpressionLong) - runTest(aggLong, rowsWithFrequency, expectedPercentilesWithFrquency) + runTest(aggLong, rowsWithFrequency, expectedPercentilesWithFrequency) // Run test with Flatten data val flattenRows = (1 to countForFrequencyTest).flatMap(current => (1 to current).map(y => current )).map(Seq(_)) - runTest(agg, flattenRows, expectedPercentilesWithFrquency) + runTest(agg, flattenRows, expectedPercentilesWithFrequency) } private def runTest(agg: Percentile, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala index d660afb7f8a05..9d4c5986300c5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala @@ -115,7 +115,7 @@ class CodeBlockSuite extends SparkFunSuite { assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) } - test("Throws exception when interpolating unexcepted object in code block") { + test("Throws exception when interpolating unexpected object in code block") { val obj = Tuple2(1, 1) val e = intercept[IllegalArgumentException] { code"$obj" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala index 2eea840e21a31..8543b62fd8bdd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala @@ -154,11 +154,11 @@ class SetOperationSuite extends PlanTest { .union(testRelation2.select(Literal(-1L).as("vcol"), 'd, 'e, 'f)) .groupBy('a, 'b, 'c)('a, 'b, 'c, sum('vcol).as("sum")) .where(GreaterThan('sum, Literal(0L))).analyze - val multiplerAttr = planFragment.output.last + val multiplierAttr = planFragment.output.last val output = planFragment.output.dropRight(1) val expectedPlan = Project(output, Generate( - ReplicateRows(Seq(multiplerAttr) ++ output), + ReplicateRows(Seq(multiplierAttr) ++ output), Nil, false, None, @@ -183,11 +183,11 @@ class SetOperationSuite extends PlanTest { .select('a, 'b, 'c, If(GreaterThan('vcol1_count, 'vcol2_count), 'vcol2_count, 'vcol1_count).as("min_count")) .analyze - val multiplerAttr = planFragment.output.last + val multiplierAttr = planFragment.output.last val output = planFragment.output.dropRight(1) val expectedPlan = Project(output, Generate( - ReplicateRows(Seq(multiplerAttr) ++ output), + ReplicateRows(Seq(multiplierAttr) ++ output), Nil, false, None, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 0f1b4a3ea918c..e98ec6a667a73 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -958,7 +958,7 @@ class DDLParserSuite extends AnalysisTest { Some(first()))) } - test("alter table: mutiple property changes are not allowed") { + test("alter table: multiple property changes are not allowed") { intercept[ParseException] { parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " + "TYPE bigint COMMENT 'new comment'")} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala index b9f984001523a..46ad5d1dec7e4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala @@ -124,8 +124,8 @@ class DataTypeParserSuite extends SparkFunSuite { unsupported("struct") test("Do not print empty parentheses for no params") { - assert(intercept("unkwon").getMessage.contains("unkwon is not supported")) - assert(intercept("unkwon(1,2,3)").getMessage.contains("unkwon(1,2,3) is not supported")) + assert(intercept("unknown").getMessage.contains("unknown is not supported")) + assert(intercept("unknown(1,2,3)").getMessage.contains("unknown(1,2,3) is not supported")) } // DataType parser accepts certain reserved keywords. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala index 00b6828c08b38..99051d692451b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala @@ -77,7 +77,7 @@ class ErrorParserSuite extends AnalysisTest { } test("SPARK-21136: misleading error message due to problematic antlr grammar") { - intercept("select * from a left joinn b on a.id = b.id", "missing 'JOIN' at 'joinn'") + intercept("select * from a left join_ b on a.id = b.id", "missing 'JOIN' at 'join_'") intercept("select * from test where test.t is like 'test'", "mismatched input 'is' expecting") intercept("SELECT * FROM test WHERE x NOT NULL", "mismatched input 'NOT' expecting") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 9f6a76b9228c5..0b304a799cdc5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -590,7 +590,7 @@ class ExpressionParserSuite extends AnalysisTest { // tests that have different result regarding the conf if (escape) { - // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to + // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing falls back to // Spark 1.6 behavior. // 'LIKE' string literals. @@ -780,7 +780,7 @@ class ExpressionParserSuite extends AnalysisTest { val complexName = FunctionIdentifier("`ba`r", Some("`fo`o")) assertEqual(complexName.quotedString, UnresolvedAttribute("`fo`o.`ba`r")) intercept(complexName.unquotedString, "mismatched input") - // Function identifier contains countious backticks should be treated correctly. + // Function identifier contains continuous backticks should be treated correctly. val complexName2 = FunctionIdentifier("ba``r", Some("fo``o")) assertEqual(complexName2.quotedString, UnresolvedAttribute("fo``o.ba``r")) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index f037ce7b9e793..bad3e0d79dd12 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -355,7 +355,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLKeywordUtils { assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) - // Table identifier contains countious backticks should be treated correctly. + // Table identifier contains continuous backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala index 2e190c6ba6d4b..5729b02dc4926 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala @@ -55,7 +55,7 @@ class UnsafeArraySuite extends SparkFunSuite { BigDecimal("1.2345678901234567890123456").setScale(21, BigDecimal.RoundingMode.FLOOR), BigDecimal("2.3456789012345678901234567").setScale(21, BigDecimal.RoundingMode.FLOOR)) - val calenderintervalArray = Array( + val calendarintervalArray = Array( new CalendarInterval(3, 2, 321), new CalendarInterval(1, 2, 123)) val intMultiDimArray = Array(Array(1), Array(2, 20), Array(3, 30, 300)) @@ -142,12 +142,12 @@ class UnsafeArraySuite extends SparkFunSuite { val schema = new StructType().add("array", ArrayType(CalendarIntervalType)) val encoder = RowEncoder(schema).resolveAndBind() - val externalRow = Row(calenderintervalArray) + val externalRow = Row(calendarintervalArray) val ir = encoder.createSerializer().apply(externalRow) val unsafeCalendar = ir.getArray(0) assert(unsafeCalendar.isInstanceOf[UnsafeArrayData]) - assert(unsafeCalendar.numElements == calenderintervalArray.length) - calenderintervalArray.zipWithIndex.map { case (e, i) => + assert(unsafeCalendar.numElements == calendarintervalArray.length) + calendarintervalArray.zipWithIndex.map { case (e, i) => assert(unsafeCalendar.getInterval(i) == e) } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java index d8e61a87e7f62..b2ef1c7722ef8 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java @@ -49,7 +49,7 @@ public ColumnDescriptor(TColumnDesc tColumnDesc) { public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) { // Current usage looks like it's only for metadata columns, but if that changes then - // this method may need to require a type qualifiers aruments. + // this method may need to require a type qualifiers arguments. return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position); } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java index 2b2359cc13c0f..bf3c6b27ea81d 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java @@ -52,7 +52,7 @@ public GetInfoValue(TGetInfoValue tGetInfoValue) { stringValue = tGetInfoValue.getStringValue(); break; default: - throw new IllegalArgumentException("Unreconigzed TGetInfoValue"); + throw new IllegalArgumentException("Unrecognized TGetInfoValue"); } } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java index c25c742d392b3..59630672847e4 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java @@ -103,7 +103,7 @@ public class GetColumnsOperation extends MetadataOperation { "Schema of table that is the scope of a reference attribute " + "(null if the DATA_TYPE isn't REF)") .addPrimitiveColumn("SCOPE_TABLE", Type.STRING_TYPE, - "Table name that this the scope of a reference attribure " + "Table name that this the scope of a reference attribute " + "(null if the DATA_TYPE isn't REF)") .addPrimitiveColumn("SOURCE_DATA_TYPE", Type.SMALLINT_TYPE, "Source type of a distinct type or user-generated Ref type, " diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 1b3e8fe6bfb9d..f47a4388f7bea 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -175,9 +175,9 @@ protected BufferedReader loadFile(String fileName) throws IOException { @Override protected int processCmd(String cmd) { int rc = 0; - String cmd_trimed = cmd.trim(); + String cmd_trimmed = cmd.trim(); try { - executeStatementInternal(cmd_trimed, null, false, 0); + executeStatementInternal(cmd_trimmed, null, false, 0); } catch (HiveSQLException e) { rc = -1; LOG.warn("Failed to execute HQL command in global .hiverc file.", e); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java index ab9ed5b1f371e..13fc552a9a42e 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java @@ -137,7 +137,7 @@ protected void initializeServer() { httpServer.setHandler(context); context.addServlet(new ServletHolder(thriftHttpServlet), httpPath); - // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc. + // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyReceiveDuration, etc. // Finally, start the server httpServer.start(); // In case HIVE_SERVER2_THRIFT_HTTP_PORT or hive.server2.thrift.http.port is configured with diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala index 4564c2209a931..820859b65925b 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala @@ -17,7 +17,7 @@ /** * These classes in this package are intentionally placed to the outer package of spark, - * because IsolatedClientLoader leverages Spark classloader for shared classess including + * because IsolatedClientLoader leverages Spark classloader for shared classes including * spark package, and the test should fail if Spark initializes these listeners with * IsolatedClientLoader. */ diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala index f28faea2be868..f2bb337e4a826 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala @@ -42,7 +42,7 @@ class SparkSQLEnvSuite extends SparkFunSuite { QUERY_EXECUTION_LISTENERS.key -> classOf[DummyQueryExecutionListener].getCanonicalName, STREAMING_QUERY_LISTENERS.key -> classOf[DummyStreamingQueryListener].getCanonicalName, WAREHOUSE_PATH.key -> TestHiveContext.makeWarehouseDir().toURI.getPath, - // The issue occured from "maven" and list of custom jars, but providing list of custom + // The issue occurred from "maven" and list of custom jars, but providing list of custom // jars to initialize HiveClient isn't trivial, so just use "maven". HIVE_METASTORE_JARS.key -> "maven", HIVE_METASTORE_VERSION.key -> null, diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 4ce1964a19bd9..c263932c2f535 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -56,7 +56,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true) // Ensures that cross joins are enabled so that we can test them TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true) - // Ensures that the table insertion behaivor is consistent with Hive + // Ensures that the table insertion behavior is consistent with Hive TestHive.setConf(SQLConf.STORE_ASSIGNMENT_POLICY, StoreAssignmentPolicy.LEGACY.toString) // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests // (timestamp_*) @@ -305,7 +305,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Unsupported underscore syntax. "inputddl5", - // Thift is broken... + // Thrift is broken... "inputddl8", // Hive changed ordering of ddl: @@ -496,7 +496,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "drop_partitions_filter2", "drop_partitions_filter3", - // The following failes due to truncate table + // The following fails due to truncate table "truncate_table", // We do not support DFS command. @@ -716,7 +716,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "groupby_multi_insert_common_distinct", "groupby_multi_single_reducer2", "groupby_multi_single_reducer3", - "groupby_mutli_insert_common_distinct", + "groupby_multi_insert_common_distinct", "groupby_neg_float", "groupby_ppd", "groupby_ppr", @@ -958,8 +958,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "subq2", "subquery_exists", "subquery_exists_having", - "subquery_notexists", - "subquery_notexists_having", + "subquery_nonexistent", + "subquery_nonexistent_having", "subquery_in_having", "tablename_with_select", "timestamp_comparison", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index a89243c331c7b..e02589e5cad00 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -332,7 +332,7 @@ private[hive] object HiveMetastoreCatalog { metastoreSchema: StructType, inferredSchema: StructType): StructType = try { // scalastyle:off caselocale - // Find any nullable fields in mestastore schema that are missing from the inferred schema. + // Find any nullable fields in metastore schema that are missing from the inferred schema. val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap val missingNullables = metastoreFields .filterKeys(!inferredSchema.map(_.name.toLowerCase).contains(_)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 34befb8a6f965..b4ebf153fc178 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -367,14 +367,14 @@ private[hive] class HiveClientImpl( override def getDatabase(dbName: String): CatalogDatabase = withHiveState { Option(client.getDatabase(dbName)).map { d => - val paras = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++ + val params = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++ Map(PROP_OWNER -> shim.getDatabaseOwnerName(d)) CatalogDatabase( name = d.getName, description = Option(d.getDescription).getOrElse(""), locationUri = CatalogUtils.stringToURI(d.getLocationUri), - properties = paras) + properties = params) }.getOrElse(throw new NoSuchDatabaseException(dbName)) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala index 4096916a100c3..26baff3d83eec 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala @@ -92,7 +92,7 @@ case class HiveScriptTransformationExec( scriptOutputWritable.readFields(scriptOutputStream) } catch { case _: EOFException => - // This means that the stdout of `proc` (ie. TRANSFORM process) has exhausted. + // This means that the stdout of `proc` (i.e. TRANSFORM process) has exhausted. // Ideally the proc should *not* be alive at this point but // there can be a lag between EOF being written out and the process // being terminated. So explicitly waiting for the process to be done. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 3c3f31ac2994a..63e46880376e1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -172,7 +172,7 @@ case class InsertIntoHiveTable( table.bucketSpec match { case Some(bucketSpec) => // Writes to bucketed hive tables are allowed only if user does not care about maintaining - // table's bucketing ie. both "hive.enforce.bucketing" and "hive.enforce.sorting" are + // table's bucketing i.e. both "hive.enforce.bucketing" and "hive.enforce.sorting" are // set to false val enforceBucketingConfig = "hive.enforce.bucketing" val enforceSortingConfig = "hive.enforce.sorting" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala index cd07199e48ed7..3fa8449c3cb01 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy * the hive table relation will be updated based on pruned partitions. * * This rule is executed in optimization phase, so the statistics can be updated before physical - * planning, which is useful for some spark strategy, eg. + * planning, which is useful for some spark strategy, e.g. * [[org.apache.spark.sql.execution.SparkStrategies.JoinSelection]]. * * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source. diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 28bbc2d8f1a3e..df5334c785f6a 100644 --- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -19,7 +19,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; --- A SMB join followed by a mutli-insert +-- A SMB join followed by a multi-insert explain from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -41,7 +41,7 @@ select * from dest2 order by k1, k2; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=200; --- A SMB join followed by a mutli-insert +-- A SMB join followed by a multi-insert explain from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -61,7 +61,7 @@ select * from dest1 order by k1, k2; select * from dest2 order by k1, k2; set hive.auto.convert.sortmerge.join.to.mapjoin=true; --- A SMB join followed by a mutli-insert +-- A SMB join followed by a multi-insert explain from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q index 91e97de62c82f..843ba4a3dbacd 100644 --- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q +++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q @@ -18,7 +18,7 @@ FROM src INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; -- Insert data into the bucketed table by selecting from another bucketed table --- The bucketing positions dont match - although the actual bucketing do. +-- The bucketing positions don't match - although the actual bucketing do. -- This should be a map-only operation EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') @@ -37,7 +37,7 @@ CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; -- Insert data into the bucketed table by selecting from another bucketed table --- The bucketing positions dont match - this should be a map-reduce operation +-- The bucketing positions don't match - this should be a map-reduce operation EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q index f70e7d5c86237..4c56cad2411fc 100644 --- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q +++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q @@ -32,7 +32,7 @@ CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS; -- Insert data into the bucketed table by selecting from another bucketed table --- This should be a map-only operation, although the bucketing positions dont match +-- This should be a map-only operation, although the bucketing positions don't match EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'; diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index 71750e6b3a516..b715f484fa02a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.util.Utils case class TestData(key: Int, value: String) -case class ThreeCloumntable(key: Int, value: String, key1: String) +case class ThreeColumnTable(key: Int, value: String, key1: String) class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter with SQLTestUtils with PrivateMethodTester { @@ -764,7 +764,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter val path = dir.toURI.getPath val e = intercept[AnalysisException] { - sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE notexists") + sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE nonexistent") }.getMessage assert(e.contains("Table or view not found")) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 2ea98943011f4..2e98a76c52488 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -735,7 +735,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } } - test("analyze column command paramaters validation") { + test("analyze column command parameters validation") { val e1 = intercept[IllegalArgumentException] { AnalyzeColumnCommand(TableIdentifier("test"), Option(Seq("c1")), true).run(spark) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index 1018ae5b68895..0876709c31899 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -372,7 +372,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T } } - test("SPARK-32400: TRANSFORM doesn't support CalenderIntervalType/UserDefinedType (hive serde)") { + test("SPARK-32400: TRANSFORM doesn't support CalendarIntervalType/UserDefinedType (hive serde)") { assume(TestUtils.testCommandAvailable("/bin/bash")) withTempView("v") { val df = Seq( @@ -410,7 +410,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T } test("SPARK-32400: TRANSFORM doesn't support" + - " CalenderIntervalType/UserDefinedType end to end (hive serde)") { + " CalendarIntervalType/UserDefinedType end to end (hive serde)") { assume(TestUtils.testCommandAvailable("/bin/bash")) withTempView("v") { val df = Seq( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 6b82b1267bc66..3370695245fd0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -228,7 +228,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi checkAnswer(sql(s"SHOW functions $db.temp_abs"), Row("temp_abs")) checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs")) checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs")) - checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil) + checkAnswer(sql("SHOW functions `a function doesn't exist`"), Nil) checkAnswer(sql("SHOW functions `temp_weekofyea*`"), Row("temp_weekofyear")) // this probably will failed if we add more function with `sha` prefixing. @@ -768,7 +768,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi sql("SELECT * FROM nested").collect().toSeq) intercept[AnalysisException] { - sql("CREATE TABLE test_ctas_1234 AS SELECT * from notexists").collect() + sql("CREATE TABLE test_ctas_1234 AS SELECT * from nonexistent").collect() } } } @@ -1739,12 +1739,12 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi |SELECT 'blarr' """.stripMargin) - // project list is the same order of paritioning columns in table definition + // project list is the same order of partitioning columns in table definition checkAnswer( sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"), Row("a", "b", "c", "d", "e", "blarr") :: Nil) - // project list does not have the same order of paritioning columns in table definition + // project list does not have the same order of partitioning columns in table definition checkAnswer( sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"), Row("b", "c", "d", "a", "e", "blarr") :: Nil) From 6aff215077e2cdf9cec187c827da63c067514e4e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 7 Dec 2020 10:50:31 -0800 Subject: [PATCH 0692/1009] [SPARK-33693][SQL] deprecate spark.sql.hive.convertCTAS ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/30554 . Now we have a new config for converting CREATE TABLE, we don't need the old config that only works for CTAS. ### Why are the changes needed? It's confusing for having two config while one can cover another completely. ### Does this PR introduce _any_ user-facing change? no, it's deprecating not removing. ### How was this patch tested? N/A Closes #30651 from cloud-fan/minor. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e8e1120cbb884..bc62213bdb740 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3005,7 +3005,9 @@ object SQLConf { s"Use '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' instead of it."), DeprecatedConfig(OPTIMIZER_METADATA_ONLY.key, "3.0", "Avoid to depend on this optimization to prevent a potential correctness issue. " + - "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule.") + "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule."), + DeprecatedConfig(CONVERT_CTAS.key, "3.1", + s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead.") ) Map(configs.map { cfg => cfg.key -> cfg } : _*) From c0874ba9f13b9802eef4418490020692e37652ba Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 7 Dec 2020 13:35:37 -0800 Subject: [PATCH 0693/1009] [SPARK-33480][SQL][FOLLOWUP] do not expose user data in error message ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/30412. This PR updates the error message of char/varchar table insertion length check, to not expose user data. ### Why are the changes needed? This is risky to expose user data in the error message, especially the string data, as it may contain sensitive data. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? updated tests Closes #30653 from cloud-fan/minor2. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/util/CharVarcharUtils.scala | 6 ++--- .../spark/sql/CharVarcharTestSuite.scala | 26 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala index b551d9699f360..e42e384e4b86b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala @@ -183,9 +183,9 @@ object CharVarcharUtils extends Logging { private def raiseError(expr: Expression, typeName: String, length: Int): Expression = { val errorMsg = Concat(Seq( - Literal("input string '"), - expr, - Literal(s"' exceeds $typeName type length limitation: $length"))) + Literal("input string of length "), + Cast(Length(expr), StringType), + Literal(s" exceeds $typeName type length limitation: $length"))) Cast(RaiseError(errorMsg), StringType) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index fcd334be7a6f7..b0f1198e46440 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -190,7 +190,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(null)) val e = intercept[SparkException](sql("INSERT INTO t VALUES ('123456')")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -203,7 +203,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(1, null)) val e = intercept[SparkException](sql("INSERT INTO t VALUES (1, '123456')")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } } @@ -215,7 +215,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Row(null))) val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -226,7 +226,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Seq(null))) val e = intercept[SparkException](sql("INSERT INTO t VALUES (array('a', '123456'))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -235,7 +235,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format") val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -246,7 +246,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Map("a" -> null))) val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -255,10 +255,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format") val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))")) assert(e1.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))")) assert(e2.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -269,7 +269,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Row(Seq(null)))) val e = intercept[SparkException](sql("INSERT INTO t SELECT struct(array('123456'))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -280,7 +280,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Seq(Row(null)))) val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(struct('123456')))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -291,7 +291,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row(Seq(Seq(null)))) val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(array('123456')))")) assert(e.getCause.getMessage.contains( - s"input string '123456' exceeds $typeName type length limitation: 5")) + s"input string of length 6 exceeds $typeName type length limitation: 5")) } } @@ -313,10 +313,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { checkAnswer(spark.table("t"), Row("1234 ", "1234")) val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (123456, 1)")) assert(e1.getCause.getMessage.contains( - "input string '123456' exceeds char type length limitation: 5")) + "input string of length 6 exceeds char type length limitation: 5")) val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (1, 123456)")) assert(e2.getCause.getMessage.contains( - "input string '123456' exceeds varchar type length limitation: 5")) + "input string of length 6 exceeds varchar type length limitation: 5")) } } From 02508b68ecc56658a13d89bf798c5ef824ba2cdc Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 7 Dec 2020 15:32:10 -0800 Subject: [PATCH 0694/1009] [SPARK-33621][SQL] Add a way to inject data source rewrite rules ### What changes were proposed in this pull request? This PR adds a way to inject data source rewrite rules. ### Why are the changes needed? Right now `SparkSessionExtensions` allow us to inject optimization rules but they are added to operator optimization batch. There are cases when users need to run rules after the operator optimization batch (e.g. cases when a rule relies on the fact that expressions have been optimized). Currently, this is not possible. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? This PR comes with a new test. Closes #30577 from aokolnychyi/spark-33621-v3. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../spark/sql/SparkSessionExtensions.scala | 16 ++++++++++++++++ .../sql/internal/BaseSessionStateBuilder.scala | 4 +++- .../spark/sql/SparkSessionExtensionSuite.scala | 6 ++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala index 6952f4bfd0566..d5d969032a5e1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan} *

    • Analyzer Rules.
    • *
    • Check Analysis Rules.
    • *
    • Optimizer Rules.
    • + *
    • Data Source Rewrite Rules.
    • *
    • Planning Strategies.
    • *
    • Customized Parser.
    • *
    • (External) Catalog listeners.
    • @@ -199,6 +200,21 @@ class SparkSessionExtensions { optimizerRules += builder } + private[this] val dataSourceRewriteRules = mutable.Buffer.empty[RuleBuilder] + + private[sql] def buildDataSourceRewriteRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { + dataSourceRewriteRules.map(_.apply(session)).toSeq + } + + /** + * Inject an optimizer `Rule` builder that rewrites data source plans into the [[SparkSession]]. + * The injected rules will be executed after the operator optimization batch and before rules + * that depend on stats. + */ + def injectDataSourceRewriteRule(builder: RuleBuilder): Unit = { + dataSourceRewriteRules += builder + } + private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder] private[sql] def buildPlannerStrategies(session: SparkSession): Seq[Strategy] = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 8101f9e291b44..f51ee11091d02 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -273,7 +273,9 @@ abstract class BaseSessionStateBuilder( * * Note that this may NOT depend on the `optimizer` function. */ - protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil + protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = { + extensions.buildDataSourceRewriteRules(session) + } /** * Planner that converts optimized logical plans to physical plans. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index ea276bcec0f78..576ad26505d27 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -88,6 +88,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite { } } + test("SPARK-33621: inject data source rewrite rule") { + withSession(Seq(_.injectDataSourceRewriteRule(MyRule))) { session => + assert(session.sessionState.optimizer.dataSourceRewriteRules.contains(MyRule(session))) + } + } + test("inject spark planner strategy") { withSession(Seq(_.injectPlannerStrategy(MySparkStrategy))) { session => assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session))) From e4d1c10760800563d2a30410b46e5b0cd2671c4d Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 8 Dec 2020 09:35:36 +0800 Subject: [PATCH 0695/1009] [SPARK-32320][PYSPARK] Remove mutable default arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is bad practice, and might lead to unexpected behaviour: https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/ ``` fokkodriesprongFan spark % grep -R "={}" python | grep def python/pyspark/resource/profile.py: def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}): python/pyspark/sql/functions.py:def from_json(col, schema, options={}): python/pyspark/sql/functions.py:def to_json(col, options={}): python/pyspark/sql/functions.py:def schema_of_json(json, options={}): python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}): python/pyspark/sql/functions.py:def to_csv(col, options={}): python/pyspark/sql/functions.py:def from_csv(col, schema, options={}): python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}): ``` ``` fokkodriesprongFan spark % grep -R "=\[\]" python | grep def python/pyspark/ml/tuning.py: def __init__(self, bestModel, avgMetrics=[], subModels=None): python/pyspark/ml/tuning.py: def __init__(self, bestModel, validationMetrics=[], subModels=None): ``` ### What changes were proposed in this pull request? Removing the mutable default arguments. ### Why are the changes needed? Removing the mutable default arguments, and changing the signature to `Optional[...]`. ### Does this PR introduce _any_ user-facing change? No 👍 ### How was this patch tested? Using the Flake8 bugbear code analysis plugin. Closes #29122 from Fokko/SPARK-32320. Authored-by: Fokko Driesprong Signed-off-by: Ruifeng Zheng --- dev/sparktestsupport/modules.py | 9 +++++---- dev/tox.ini | 2 +- python/mypy.ini | 2 ++ python/pyspark/ml/regression.py | 4 ++-- python/pyspark/ml/tuning.py | 8 ++++---- python/pyspark/ml/tuning.pyi | 4 ++-- python/pyspark/resource/profile.py | 6 +++--- python/pyspark/resource/profile.pyi | 6 +++--- python/pyspark/sql/avro/functions.py | 4 ++-- python/pyspark/sql/avro/functions.pyi | 4 ++-- python/pyspark/sql/functions.py | 18 ++++++++++-------- python/pyspark/sql/functions.pyi | 12 ++++++------ 12 files changed, 42 insertions(+), 37 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 5d8b714711774..87bfbdf64a49f 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -31,9 +31,10 @@ class Module(object): files have changed. """ - def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={}, - sbt_test_goals=(), python_test_goals=(), excluded_python_implementations=(), - test_tags=(), should_run_r_tests=False, should_run_build_tests=False): + def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), + environ=None, sbt_test_goals=(), python_test_goals=(), + excluded_python_implementations=(), test_tags=(), should_run_r_tests=False, + should_run_build_tests=False): """ Define a new module. @@ -62,7 +63,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= self.source_file_prefixes = source_file_regexes self.sbt_test_goals = sbt_test_goals self.build_profile_flags = build_profile_flags - self.environ = environ + self.environ = environ or {} self.python_test_goals = python_test_goals self.excluded_python_implementations = excluded_python_implementations self.test_tags = test_tags diff --git a/dev/tox.ini b/dev/tox.ini index 7edf7d597fb58..43cd5877dfdb8 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -19,6 +19,6 @@ max-line-length=100 exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* [flake8] -select = E901,E999,F821,F822,F823,F401,F405 +select = E901,E999,F821,F822,F823,F401,F405,B006 exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi max-line-length = 100 diff --git a/python/mypy.ini b/python/mypy.ini index 5103452a053be..ad4fcf7f317f0 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -102,6 +102,8 @@ disallow_untyped_defs = False ; Ignore errors in embedded third party code +no_implicit_optional = True + [mypy-pyspark.cloudpickle.*] ignore_errors = True diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index d37654a7388f5..8ecb68458ffbc 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -1801,7 +1801,7 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams, @keyword_only def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", - quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), + quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), # noqa: B005 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -1819,7 +1819,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p @since("1.6.0") def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", - quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), + quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), # noqa: B005 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0): """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 2c083182de470..2bddfe822f29e 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -835,13 +835,13 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): .. versionadded:: 1.4.0 """ - def __init__(self, bestModel, avgMetrics=[], subModels=None): + def __init__(self, bestModel, avgMetrics=None, subModels=None): super(CrossValidatorModel, self).__init__() #: best model from cross validation self.bestModel = bestModel #: Average cross-validation metrics for each paramMap in #: CrossValidator.estimatorParamMaps, in the corresponding order. - self.avgMetrics = avgMetrics + self.avgMetrics = avgMetrics or [] #: sub model list from cross validation self.subModels = subModels @@ -1323,12 +1323,12 @@ class TrainValidationSplitModel(Model, _TrainValidationSplitParams, MLReadable, .. versionadded:: 2.0.0 """ - def __init__(self, bestModel, validationMetrics=[], subModels=None): + def __init__(self, bestModel, validationMetrics=None, subModels=None): super(TrainValidationSplitModel, self).__init__() #: best model from train validation split self.bestModel = bestModel #: evaluated validation metrics - self.validationMetrics = validationMetrics + self.validationMetrics = validationMetrics or [] #: sub models from train validation split self.subModels = subModels diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi index e5f153d49e9c6..912abd4d7124a 100644 --- a/python/pyspark/ml/tuning.pyi +++ b/python/pyspark/ml/tuning.pyi @@ -104,7 +104,7 @@ class CrossValidatorModel( def __init__( self, bestModel: Model, - avgMetrics: List[float] = ..., + avgMetrics: Optional[List[float]] = ..., subModels: Optional[List[List[Model]]] = ..., ) -> None: ... def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ... @@ -171,7 +171,7 @@ class TrainValidationSplitModel( def __init__( self, bestModel: Model, - validationMetrics: List[float] = ..., + validationMetrics: Optional[List[float]] = ..., subModels: Optional[List[Model]] = ..., ) -> None: ... def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ... diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py index 1c59a1c4a123c..38a68bc74d97e 100644 --- a/python/pyspark/resource/profile.py +++ b/python/pyspark/resource/profile.py @@ -34,13 +34,13 @@ class ResourceProfile(object): This API is evolving. """ - def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}): + def __init__(self, _java_resource_profile=None, _exec_req=None, _task_req=None): if _java_resource_profile is not None: self._java_resource_profile = _java_resource_profile else: self._java_resource_profile = None - self._executor_resource_requests = _exec_req - self._task_resource_requests = _task_req + self._executor_resource_requests = _exec_req or {} + self._task_resource_requests = _task_req or {} @property def id(self): diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi index 04838692436df..c8f23a5cac370 100644 --- a/python/pyspark/resource/profile.pyi +++ b/python/pyspark/resource/profile.pyi @@ -22,7 +22,7 @@ from pyspark.resource.requests import ( # noqa: F401 TaskResourceRequest as TaskResourceRequest, TaskResourceRequests as TaskResourceRequests, ) -from typing import overload, Dict, Union +from typing import overload, Dict, Union, Optional from py4j.java_gateway import JavaObject # type: ignore[import] class ResourceProfile: @@ -35,8 +35,8 @@ class ResourceProfile: def __init__( self, _java_resource_profile: None = ..., - _exec_req: Dict[str, ExecutorResourceRequest] = ..., - _task_req: Dict[str, TaskResourceRequest] = ..., + _exec_req: Optional[Dict[str, ExecutorResourceRequest]] = ..., + _task_req: Optional[Dict[str, TaskResourceRequest]] = ..., ) -> None: ... @property def id(self) -> int: ... diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py index ce322814e34f8..7e4ceb20cd2c4 100644 --- a/python/pyspark/sql/avro/functions.py +++ b/python/pyspark/sql/avro/functions.py @@ -25,7 +25,7 @@ from pyspark.util import _print_missing_jar -def from_avro(data, jsonFormatSchema, options={}): +def from_avro(data, jsonFormatSchema, options=None): """ Converts a binary column of Avro format into its corresponding catalyst value. The specified schema must match the read data, otherwise the behavior is undefined: @@ -70,7 +70,7 @@ def from_avro(data, jsonFormatSchema, options={}): sc = SparkContext._active_spark_context try: jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro( - _to_java_column(data), jsonFormatSchema, options) + _to_java_column(data), jsonFormatSchema, options or {}) except TypeError as e: if str(e) == "'JavaPackage' object is not callable": _print_missing_jar("Avro", "avro", "avro", sc.version) diff --git a/python/pyspark/sql/avro/functions.pyi b/python/pyspark/sql/avro/functions.pyi index 4c2e3814a9e94..49881335d8fcc 100644 --- a/python/pyspark/sql/avro/functions.pyi +++ b/python/pyspark/sql/avro/functions.pyi @@ -16,12 +16,12 @@ # specific language governing permissions and limitations # under the License. -from typing import Dict +from typing import Dict, Optional from pyspark.sql._typing import ColumnOrName from pyspark.sql.column import Column def from_avro( - data: ColumnOrName, jsonFormatSchema: str, options: Dict[str, str] = ... + data: ColumnOrName, jsonFormatSchema: str, options: Optional[Dict[str, str]] = ... ) -> Column: ... def to_avro(data: ColumnOrName, jsonFormatSchema: str = ...) -> Column: ... diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 4dc3129fd6bc2..f612d2d0366f2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -80,8 +80,10 @@ def _invoke_binary_math_function(name, col1, col2): ) -def _options_to_str(options): - return {key: to_str(value) for (key, value) in options.items()} +def _options_to_str(options=None): + if options: + return {key: to_str(value) for (key, value) in options.items()} + return {} def lit(col): @@ -3454,7 +3456,7 @@ def json_tuple(col, *fields): return Column(jc) -def from_json(col, schema, options={}): +def from_json(col, schema, options=None): """ Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType` as keys type, :class:`StructType` or :class:`ArrayType` with @@ -3510,7 +3512,7 @@ def from_json(col, schema, options={}): return Column(jc) -def to_json(col, options={}): +def to_json(col, options=None): """ Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType` into a JSON string. Throws an exception, in the case of an unsupported type. @@ -3557,7 +3559,7 @@ def to_json(col, options={}): return Column(jc) -def schema_of_json(json, options={}): +def schema_of_json(json, options=None): """ Parses a JSON string and infers its schema in DDL format. @@ -3594,7 +3596,7 @@ def schema_of_json(json, options={}): return Column(jc) -def schema_of_csv(csv, options={}): +def schema_of_csv(csv, options=None): """ Parses a CSV string and infers its schema in DDL format. @@ -3627,7 +3629,7 @@ def schema_of_csv(csv, options={}): return Column(jc) -def to_csv(col, options={}): +def to_csv(col, options=None): """ Converts a column containing a :class:`StructType` into a CSV string. Throws an exception, in the case of an unsupported type. @@ -4038,7 +4040,7 @@ def sequence(start, stop, step=None): _to_java_column(start), _to_java_column(stop), _to_java_column(step))) -def from_csv(col, schema, options={}): +def from_csv(col, schema, options=None): """ Parses a column containing a CSV string to a row with the specified schema. Returns `null`, in the case of an unparseable string. diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 50e178df9996f..acb17a2657d00 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -196,12 +196,12 @@ def json_tuple(col: ColumnOrName, *fields: str) -> Column: ... def from_json( col: ColumnOrName, schema: Union[ArrayType, StructType, Column, str], - options: Dict[str, str] = ..., + options: Optional[Dict[str, str]] = ..., ) -> Column: ... -def to_json(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... -def schema_of_json(json: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... -def schema_of_csv(csv: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... -def to_csv(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ... +def to_json(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ... +def schema_of_json(json: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ... +def schema_of_csv(csv: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ... +def to_csv(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ... def size(col: ColumnOrName) -> Column: ... def array_min(col: ColumnOrName) -> Column: ... def array_max(col: ColumnOrName) -> Column: ... @@ -223,7 +223,7 @@ def sequence( def from_csv( col: ColumnOrName, schema: Union[StructType, Column, str], - options: Dict[str, str] = ..., + options: Optional[Dict[str, str]] = ..., ) -> Column: ... @overload def transform(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... From b2a79306ef7b330c5bf4dc1337ed80ebd6e08d0c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 7 Dec 2020 18:59:15 -0800 Subject: [PATCH 0696/1009] [SPARK-33680][SQL][TESTS][FOLLOWUP] Fix more test suites to have explicit confs ### What changes were proposed in this pull request? This is a follow-up for SPARK-33680 to remove the assumption on the default value of `spark.sql.adaptive.enabled` . ### Why are the changes needed? According to the test result https://github.com/apache/spark/pull/30628#issuecomment-739866168, the [previous run](https://github.com/apache/spark/pull/30628#issuecomment-739641105) didn't run all tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30655 from dongjoon-hyun/SPARK-33680. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/sql/DataFrameAggregateSuite.scala | 4 +- .../apache/spark/sql/DataFrameJoinSuite.scala | 4 +- .../org/apache/spark/sql/JoinSuite.scala | 9 ++- .../spark/sql/execution/PlannerSuite.scala | 73 +++++++++++++------ .../spark/sql/sources/BucketedReadSuite.scala | 5 +- .../SqlResourceWithActualMetricsSuite.scala | 11 ++- 6 files changed, 74 insertions(+), 32 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index d4e64aa03df0e..78983a4bd1a29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -1001,7 +1001,9 @@ class DataFrameAggregateSuite extends QueryTest Seq(true, false).foreach { value => test(s"SPARK-31620: agg with subquery (whole-stage-codegen = $value)") { - withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString) { + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString, + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2") { sql("create temporary view t1 as select * from values (1, 2) as t1(a, b)") sql("create temporary view t2 as select * from values (3, 4) as t2(c, d)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index 14d03a30453ac..c317f562c65dc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -335,7 +335,9 @@ class DataFrameJoinSuite extends QueryTest withTempDatabase { dbName => withTable(table1Name, table2Name) { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { spark.range(50).write.saveAsTable(s"$dbName.$table1Name") spark.range(100).write.saveAsTable(s"$dbName.$table2Name") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 8755dccb801c2..a728e5cc17001 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -1107,6 +1107,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan test("SPARK-32330: Preserve shuffled hash join build side partitioning") { withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", SQLConf.SHUFFLE_PARTITIONS.key -> "2", SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { @@ -1130,6 +1131,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan // Test broadcast hash join withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") { Seq("inner", "left_outer").foreach(joinType => { val plan = df1.join(df2, $"k1" === $"k2", joinType) @@ -1146,6 +1148,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan // Test shuffled hash join withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", SQLConf.SHUFFLE_PARTITIONS.key -> "2", SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { @@ -1253,6 +1256,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan withSQLConf( // Set broadcast join threshold and number of shuffle partitions, // as shuffled hash join depends on these two configs. + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80", SQLConf.SHUFFLE_PARTITIONS.key -> "2") { val smjDF = df1.join(df2, joinExprs, "full") @@ -1284,7 +1288,9 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan ) inputDFs.foreach { case (df1, df2, joinType) => // Test broadcast hash join - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { val bhjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType) assert(bhjCodegenDF.queryExecution.executedPlan.collect { case WholeStageCodegenExec(_ : BroadcastHashJoinExec) => true @@ -1305,6 +1311,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan // Set broadcast join threshold and number of shuffle partitions, // as shuffled hash join depends on these two configs. SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", SQLConf.SHUFFLE_PARTITIONS.key -> "2") { val shjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType) assert(shjCodegenDF.queryExecution.executedPlan.collect { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 5e30f846307ae..4e01d1c06f64e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -877,7 +877,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("aliases in the project should not introduce extra shuffle") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("df1", "df2") { spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1") spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2") @@ -897,7 +899,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { test("SPARK-33399: aliases should be handled properly in PartitioningCollection output" + " partitioning") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2", "t3") { spark.range(10).repartition($"id").createTempView("t1") spark.range(20).repartition($"id").createTempView("t2") @@ -927,7 +931,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("SPARK-33399: aliases should be handled properly in HashPartitioning") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2", "t3") { spark.range(10).repartition($"id").createTempView("t1") spark.range(20).repartition($"id").createTempView("t2") @@ -955,7 +961,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("SPARK-33399: alias handling should happen properly for RangePartitioning") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { val df = spark.range(1, 100) .select(col("id").as("id1")).groupBy("id1").count() // Plan for this will be Range -> ProjectWithAlias -> HashAggregate -> HashAggregate @@ -976,7 +984,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { test("SPARK-33399: aliased should be handled properly " + "for partitioning and sortorder involving complex expressions") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2", "t3") { spark.range(10).select(col("id").as("id1")).createTempView("t1") spark.range(20).select(col("id").as("id2")).createTempView("t2") @@ -1014,7 +1024,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("SPARK-33399: alias handling should happen properly for SinglePartition") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { val df = spark.range(1, 100, 1, 1) .select(col("id").as("id1")).groupBy("id1").count() val planned = df.queryExecution.executedPlan @@ -1031,7 +1043,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { test("SPARK-33399: No extra exchanges in case of" + " [Inner Join -> Project with aliases -> HashAggregate]") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2") { spark.range(10).repartition($"id").createTempView("t1") spark.range(20).repartition($"id").createTempView("t2") @@ -1060,7 +1074,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2", "t3") { spark.range(10).repartition($"id").createTempView("t1") spark.range(20).repartition($"id").createTempView("t2") @@ -1091,7 +1107,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("sort order doesn't have repeated expressions") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("t1", "t2") { spark.range(10).repartition($"id").createTempView("t1") spark.range(20).repartition($"id").createTempView("t2") @@ -1117,7 +1135,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("aliases to expressions should not be replaced") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { withTempView("df1", "df2") { spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1") spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2") @@ -1143,7 +1163,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("aliases in the aggregate expressions should not introduce extra shuffle") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { val t1 = spark.range(10).selectExpr("floor(id/4) as k1") val t2 = spark.range(20).selectExpr("floor(id/4) as k2") @@ -1160,7 +1182,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("aliases in the object hash/sort aggregate expressions should not introduce extra shuffle") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { Seq(true, false).foreach { useObjectHashAgg => withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> useObjectHashAgg.toString) { val t1 = spark.range(10).selectExpr("floor(id/4) as k1") @@ -1185,21 +1209,22 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } test("aliases in the sort aggregate expressions should not introduce extra sort") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { - withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") { - val t1 = spark.range(10).selectExpr("floor(id/4) as k1") - val t2 = spark.range(20).selectExpr("floor(id/4) as k2") + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", + SQLConf.USE_OBJECT_HASH_AGG.key -> "false") { + val t1 = spark.range(10).selectExpr("floor(id/4) as k1") + val t2 = spark.range(20).selectExpr("floor(id/4) as k2") - val agg1 = t1.groupBy("k1").agg(collect_list("k1")).withColumnRenamed("k1", "k3") - val agg2 = t2.groupBy("k2").agg(collect_list("k2")) + val agg1 = t1.groupBy("k1").agg(collect_list("k1")).withColumnRenamed("k1", "k3") + val agg2 = t2.groupBy("k2").agg(collect_list("k2")) - val planned = agg1.join(agg2, $"k3" === $"k2").queryExecution.executedPlan - assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty) + val planned = agg1.join(agg2, $"k3" === $"k2").queryExecution.executedPlan + assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty) - // We expect two SortExec nodes on each side of join. - val sorts = planned.collect { case s: SortExec => s } - assert(sorts.size == 4) - } + // We expect two SortExec nodes on each side of join. + val sorts = planned.collect { case s: SortExec => s } + assert(sorts.size == 4) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 167e87dd3d5cb..0ff9303421ade 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan} -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, DisableAdaptiveExecutionSuite} import org.apache.spark.sql.execution.datasources.BucketingUtils import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.execution.joins.SortMergeJoinExec @@ -39,7 +39,8 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} import org.apache.spark.util.Utils import org.apache.spark.util.collection.BitSet -class BucketedReadWithoutHiveSupportSuite extends BucketedReadSuite with SharedSparkSession { +class BucketedReadWithoutHiveSupportSuite + extends BucketedReadSuite with DisableAdaptiveExecutionSuite with SharedSparkSession { protected override def beforeAll(): Unit = { super.beforeAll() assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala index 0c0e3ac90510e..1510e8957f9ae 100644 --- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala @@ -26,7 +26,9 @@ import org.json4s.jackson.JsonMethods import org.apache.spark.SparkConf import org.apache.spark.deploy.history.HistoryServerSuite.getContentAndCode import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils +import org.apache.spark.sql.internal.SQLConf.ADAPTIVE_EXECUTION_ENABLED import org.apache.spark.sql.test.SharedSparkSession case class Person(id: Int, name: String, age: Int) @@ -35,7 +37,8 @@ case class Salary(personId: Int, salary: Double) /** * Sql Resource Public API Unit Tests running query and extracting the metrics. */ -class SqlResourceWithActualMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils { +class SqlResourceWithActualMetricsSuite + extends SharedSparkSession with SQLMetricsTestUtils with SQLHelper { import testImplicits._ @@ -52,8 +55,10 @@ class SqlResourceWithActualMetricsSuite extends SharedSparkSession with SQLMetri test("Check Sql Rest Api Endpoints") { // Materalize result DataFrame - val count = getDF().count() - assert(count == 2, s"Expected Query Count is 2 but received: $count") + withSQLConf(ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + val count = getDF().count() + assert(count == 2, s"Expected Query Count is 2 but received: $count") + } // Spark apps launched by local-mode seems not having `attemptId` as default // so UT is just added for existing endpoints. From ebd8b9357af296b8859e65577ab1e16593fab50d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 8 Dec 2020 11:04:29 +0800 Subject: [PATCH 0697/1009] [SPARK-33609][ML] word2vec reduce broadcast size ### What changes were proposed in this pull request? 1, directly use float vectors instead of converting to double vectors, this is about 2x faster than using vec.axpy; 2, mark `wordList` and `wordVecNorms` lazy 3, avoid slicing in computation of `wordVecNorms` ### Why are the changes needed? halve broadcast size ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #30548 from zhengruifeng/w2v_float32_transform. Lead-authored-by: Ruifeng Zheng Co-authored-by: zhengruifeng Signed-off-by: Ruifeng Zheng --- .../apache/spark/ml/feature/Word2Vec.scala | 32 +++++++++++-------- .../apache/spark/mllib/feature/Word2Vec.scala | 27 +++++++--------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 9b5f5a619e02c..0b9c1b570d943 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -285,27 +285,33 @@ class Word2VecModel private[ml] ( @Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { val outputSchema = transformSchema(dataset.schema, logging = true) - val vectors = wordVectors.getVectors - .mapValues(vv => Vectors.dense(vv.map(_.toDouble))) - .map(identity).toMap // mapValues doesn't return a serializable map (SI-7005) - val bVectors = dataset.sparkSession.sparkContext.broadcast(vectors) - val d = $(vectorSize) - val emptyVec = Vectors.sparse(d, Array.emptyIntArray, Array.emptyDoubleArray) - val word2Vec = udf { sentence: Seq[String] => + + val bcModel = dataset.sparkSession.sparkContext.broadcast(this.wordVectors) + val size = $(vectorSize) + val emptyVec = Vectors.sparse(size, Array.emptyIntArray, Array.emptyDoubleArray) + val transformer = udf { sentence: Seq[String] => if (sentence.isEmpty) { emptyVec } else { - val sum = Vectors.zeros(d) + val wordIndices = bcModel.value.wordIndex + val wordVectors = bcModel.value.wordVectors + val array = Array.ofDim[Double](size) + var count = 0 sentence.foreach { word => - bVectors.value.get(word).foreach { v => - BLAS.axpy(1.0, v, sum) + wordIndices.get(word).foreach { index => + val offset = index * size + var i = 0 + while (i < size) { array(i) += wordVectors(offset + i); i += 1 } } + count += 1 } - BLAS.scal(1.0 / sentence.size, sum) - sum + val vec = Vectors.dense(array) + BLAS.scal(1.0 / count, vec) + vec } } - dataset.withColumn($(outputCol), word2Vec(col($(inputCol))), + + dataset.withColumn($(outputCol), transformer(col($(inputCol))), outputSchema($(outputCol)).metadata) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index eeb583f84ca8b..8a6317a910146 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -502,22 +502,15 @@ class Word2VecModel private[spark] ( private val vectorSize = wordVectors.length / numWords // wordList: Ordered list of words obtained from wordIndex. - private val wordList: Array[String] = { - val (wl, _) = wordIndex.toSeq.sortBy(_._2).unzip - wl.toArray + private lazy val wordList: Array[String] = { + wordIndex.toSeq.sortBy(_._2).iterator.map(_._1).toArray } // wordVecNorms: Array of length numWords, each value being the Euclidean norm // of the wordVector. - private val wordVecNorms: Array[Float] = { - val wordVecNorms = new Array[Float](numWords) - var i = 0 - while (i < numWords) { - val vec = wordVectors.slice(i * vectorSize, i * vectorSize + vectorSize) - wordVecNorms(i) = blas.snrm2(vectorSize, vec, 1) - i += 1 - } - wordVecNorms + private lazy val wordVecNorms: Array[Float] = { + val size = vectorSize + Array.tabulate(numWords)(i => blas.snrm2(size, wordVectors, i * size, 1)) } @Since("1.5.0") @@ -538,9 +531,13 @@ class Word2VecModel private[spark] ( @Since("1.1.0") def transform(word: String): Vector = { wordIndex.get(word) match { - case Some(ind) => - val vec = wordVectors.slice(ind * vectorSize, ind * vectorSize + vectorSize) - Vectors.dense(vec.map(_.toDouble)) + case Some(index) => + val size = vectorSize + val offset = index * size + val array = Array.ofDim[Double](size) + var i = 0 + while (i < size) { array(i) = wordVectors(offset + i); i += 1 } + Vectors.dense(array) case None => throw new IllegalStateException(s"$word not in vocabulary") } From 8bcebfa59a64123f014c01bc4fb5de8d9624f8f4 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 7 Dec 2020 19:09:59 -0800 Subject: [PATCH 0698/1009] [SPARK-33698][BUILD][TESTS] Fix the build error of OracleIntegrationSuite for Scala 2.13 ### What changes were proposed in this pull request? This PR fixes a build error of `OracleIntegrationSuite` with Scala 2.13. ### Why are the changes needed? Build should pass with Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed that the build pass with the following command. ``` $ build/sbt -Pdocker-integration-tests -Pscala-2.13 "docker-integration-tests/test:compile" ``` Closes #30660 from sarutak/fix-docker-integration-tests-for-scala-2.13. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 60eb1c055a38e..3937d62afacc2 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -401,7 +401,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark val values = rows(0) assert(values.getDecimal(0).equals(new java.math.BigDecimal("12312321321321312312312312123"))) assert(values.getInt(1).equals(1)) - assert(values.getBoolean(2).equals(false)) + assert(values.getBoolean(2) == false) } test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") { From 5aefc49b0f7047f2c928c18b371098314c2f59f0 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 8 Dec 2020 03:54:16 +0000 Subject: [PATCH 0699/1009] [SPARK-33664][SQL] Migrate ALTER TABLE ... RENAME TO to use UnresolvedTableOrView to resolve identifier ### What changes were proposed in this pull request? This PR proposes to migrate `ALTER [TABLE|ViEW] ... RENAME TO` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? To use `UnresolvedTableOrView` for table/view resolution. Note that `AlterTableRenameCommand` internally resolves to a temp view first, so there is no resolution behavior change with this PR. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated existing tests. Closes #30610 from imback82/rename_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/ResolveCatalogs.scala | 6 ------ .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++---- .../sql/catalyst/plans/logical/statements.scala | 8 -------- .../sql/catalyst/plans/logical/v2Commands.scala | 10 ++++++---- .../sql/catalyst/parser/DDLParserSuite.scala | 10 ++++++++-- .../analysis/ResolveSessionCatalog.scala | 3 +-- .../datasources/v2/DataSourceV2Strategy.scala | 8 ++++++-- .../sql/connector/DataSourceV2SQLSuite.scala | 13 ++++++++++--- .../v2/jdbc/JDBCTableCatalogSuite.scala | 16 +++++++--------- 9 files changed, 46 insertions(+), 40 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index a90de697bc084..6d89414ba106d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -133,12 +133,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " + s"because view support in catalog has not been implemented yet") - case RenameTableStatement(NonSessionCatalogAndTable(catalog, oldName), newNameParts, isView) => - if (isView) { - throw new AnalysisException("Renaming view is not supported in v2 catalogs.") - } - RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier) - case c @ CreateTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a22383c62bf74..42c67ac963cbe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3834,7 +3834,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a [[RenameTableStatement]] command. + * Create a [[RenameTable]] command. * * For example: * {{{ @@ -3843,10 +3843,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitRenameTable(ctx: RenameTableContext): LogicalPlan = withOrigin(ctx) { - RenameTableStatement( - visitMultipartIdentifier(ctx.from), + val isView = ctx.VIEW != null + val relationStr = if (isView) "VIEW" else "TABLE" + RenameTable( + UnresolvedTableOrView( + visitMultipartIdentifier(ctx.from), + s"ALTER $relationStr ... RENAME TO"), visitMultipartIdentifier(ctx.to), - ctx.VIEW != null) + isView) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 1763547792e35..8f0889bbcebd8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -338,14 +338,6 @@ case class AlterViewAsStatement( originalText: String, query: LogicalPlan) extends ParsedStatement -/** - * ALTER TABLE ... RENAME TO command, as parsed from SQL. - */ -case class RenameTableStatement( - oldName: Seq[String], - newName: Seq[String], - isView: Boolean) extends ParsedStatement - /** * A DROP VIEW statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 67056470418fe..6f35364cce131 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -456,12 +456,14 @@ case class AlterTable( } /** - * The logical plan of the ALTER TABLE RENAME command. + * The logical plan of the ALTER [TABLE|VIEW] ... RENAME TO command. */ case class RenameTable( - catalog: TableCatalog, - oldIdent: Identifier, - newIdent: Identifier) extends Command + child: LogicalPlan, + newName: Seq[String], + isView: Boolean) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} /** * The logical plan of the SHOW TABLE command. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index e98ec6a667a73..f925be8617b47 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1103,10 +1103,16 @@ class DDLParserSuite extends AnalysisTest { test("alter table/view: rename table/view") { comparePlans( parsePlan("ALTER TABLE a.b.c RENAME TO x.y.z"), - RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = false)) + RenameTable( + UnresolvedTableOrView(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO"), + Seq("x", "y", "z"), + isView = false)) comparePlans( parsePlan("ALTER VIEW a.b.c RENAME TO x.y.z"), - RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = true)) + RenameTable( + UnresolvedTableOrView(Seq("a", "b", "c"), "ALTER VIEW ... RENAME TO"), + Seq("x", "y", "z"), + isView = true)) } test("describe table column") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index a87ed4b6275d8..7e5f39e398a6b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -237,8 +237,7 @@ class ResolveSessionCatalog( } AlterDatabaseSetLocationCommand(ns.head, location) - // v1 RENAME TABLE supports temp view. - case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) => + case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 5289d359f7809..075d2a43dce4e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -257,8 +257,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AlterTable(catalog, ident, _, changes) => AlterTableExec(catalog, ident, changes) :: Nil - case RenameTable(catalog, oldIdent, newIdent) => - RenameTableExec(catalog, oldIdent, newIdent) :: Nil + case RenameTable(ResolvedTable(catalog, oldIdent, _), newIdent, isView) => + if (isView) { + throw new AnalysisException( + "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.") + } + RenameTableExec(catalog, oldIdent, newIdent.asIdentifier) :: Nil case AlterNamespaceSetProperties(ResolvedNamespace(catalog, ns), properties) => AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 6838a7644a29f..2673577aecf36 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1975,10 +1975,16 @@ class DataSourceV2SQLSuite test("AlterTable: rename table basic test") { withTable("testcat.ns1.new") { - sql(s"CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source") + sql("CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source") checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq(Row("ns1.ns2", "old"))) - sql(s"ALTER TABLE testcat.ns1.ns2.old RENAME TO ns1.new") + val e = intercept[AnalysisException] { + sql("ALTER VIEW testcat.ns1.ns2.old RENAME TO ns1.new") + } + assert(e.getMessage.contains( + "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead")) + + sql("ALTER TABLE testcat.ns1.ns2.old RENAME TO ns1.new") checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq.empty) checkAnswer(sql("SHOW TABLES FROM testcat.ns1"), Seq(Row("ns1", "new"))) } @@ -1988,7 +1994,8 @@ class DataSourceV2SQLSuite val e = intercept[AnalysisException] { sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view") } - assert(e.getMessage.contains("Renaming view is not supported in v2 catalogs")) + assert(e.getMessage.contains( + "Table or view not found for 'ALTER VIEW ... RENAME TO': testcat.ns.tbl")) } test("ANALYZE TABLE") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index 9e9df7db1e1c6..e764f71867426 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -23,7 +23,7 @@ import org.apache.log4j.Level import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -106,18 +106,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { Seq(Row("test", "dst_table"), Row("test", "people"))) } // Rename not existing table or namespace - val exp1 = intercept[NoSuchTableException] { - sql(s"ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table") + val exp1 = intercept[AnalysisException] { + sql("ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table") } assert(exp1.getMessage.contains( - "Failed table renaming from test.not_existing_table to test.dst_table")) - assert(exp1.cause.get.getMessage.contains("Table \"not_existing_table\" not found")) - val exp2 = intercept[NoSuchNamespaceException] { - sql(s"ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table") + "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.test.not_existing_table")) + val exp2 = intercept[AnalysisException] { + sql("ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table") } assert(exp2.getMessage.contains( - "Failed table renaming from bad_test.not_existing_table to test.dst_table")) - assert(exp2.cause.get.getMessage.contains("Schema \"bad_test\" not found")) + "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.bad_test.not_existing_table")) // Rename to an existing table withTable("h2.test.dst_table") { withConnection { conn => From 3a6546d3858e7c184f36cb6c4fd454f2142460f0 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 8 Dec 2020 14:11:39 +0900 Subject: [PATCH 0700/1009] [MINOR][INFRA] Add -Pdocker-integration-tests to GitHub Action Scala 2.13 build job ### What changes were proposed in this pull request? This aims to add `-Pdocker-integration-tests` at GitHub Action job for Scala 2.13 compilation. ### Why are the changes needed? We fixed Scala 2.13 compilation of this module at https://github.com/apache/spark/pull/30660 . This PR will prevent accidental regression at that module. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GitHub Action Scala 2.13 job. Closes #30661 from dongjoon-hyun/SPARK-DOCKER-IT. Authored-by: Dongjoon Hyun Signed-off-by: Kousuke Saruta --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 72b2caf907151..e40d6362fd23f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -414,7 +414,7 @@ jobs: - name: Build with SBT run: | ./dev/change-scala-version.sh 2.13 - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pscala-2.13 compile test:compile + ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pscala-2.13 compile test:compile hadoop-2: name: Hadoop 2 build with SBT From 031c5ef280e0cba8c4718a6457a44b6cccb17f46 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 7 Dec 2020 23:10:35 -0800 Subject: [PATCH 0701/1009] [SPARK-33679][SQL] Enable spark.sql.adaptive.enabled by default ### What changes were proposed in this pull request? This PR aims to enable `spark.sql.adaptive.enabled` by default for Apache Spark **3.2.0**. ### Why are the changes needed? By switching the default for Apache Spark 3.2, the whole community can focus more on the stabilizing this feature in the various situation more seriously. ### Does this PR introduce _any_ user-facing change? Yes, but this is an improvement and it's supposed to have no bugs. ### How was this patch tested? Pass the CIs. Closes #30628 from dongjoon-hyun/SPARK-33679. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- docs/sql-migration-guide.md | 4 ++++ .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 2c86e7a932637..65a769da70aea 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Spark SQL 3.1 to 3.2 + + - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`. + ## Upgrading from Spark SQL 3.0 to 3.1 - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index bc62213bdb740..11fe6c7894f76 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -404,7 +404,7 @@ object SQLConf { "middle of query execution, based on accurate runtime statistics.") .version("1.6.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val ADAPTIVE_EXECUTION_FORCE_APPLY = buildConf("spark.sql.adaptive.forceApply") .internal() From 99613cd5815b2de12274027dee0c0a6c0c57bd95 Mon Sep 17 00:00:00 2001 From: luluorta Date: Tue, 8 Dec 2020 20:45:25 +0900 Subject: [PATCH 0702/1009] [SPARK-33677][SQL] Skip LikeSimplification rule if pattern contains any escapeChar ### What changes were proposed in this pull request? `LikeSimplification` rule does not work correctly for many cases that have patterns containing escape characters, for example: `SELECT s LIKE 'm%aca' ESCAPE '%' FROM t` `SELECT s LIKE 'maacaa' ESCAPE 'a' FROM t` For simpilicy, this PR makes this rule just be skipped if `pattern` contains any `escapeChar`. ### Why are the changes needed? Result corrupt. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added Unit test. Closes #30625 from luluorta/SPARK-33677. Authored-by: luluorta Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/optimizer/expressions.scala | 18 ++++--- .../optimizer/LikeSimplificationSuite.scala | 48 +++++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 14 ++++++ 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 4cdaf10dd3c60..7666c4a53e5dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -543,27 +543,33 @@ object LikeSimplification extends Rule[LogicalPlan] { private val equalTo = "([^_%]*)".r def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case Like(input, Literal(pattern, StringType), escapeChar) => + case l @ Like(input, Literal(pattern, StringType), escapeChar) => if (pattern == null) { // If pattern is null, return null value directly, since "col like null" == null. Literal(null, BooleanType) } else { - val escapeStr = String.valueOf(escapeChar) pattern.toString match { - case startsWith(prefix) if !prefix.endsWith(escapeStr) => + // There are three different situations when pattern containing escapeChar: + // 1. pattern contains invalid escape sequence, e.g. 'm\aca' + // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca' + // 3. pattern contains escaped escape character, e.g. 'ma\\ca' + // Although there are patterns can be optimized if we handle the escape first, we just + // skip this rule if pattern contains any escapeChar for simplicity. + case p if p.contains(escapeChar) => l + case startsWith(prefix) => StartsWith(input, Literal(prefix)) case endsWith(postfix) => EndsWith(input, Literal(postfix)) // 'a%a' pattern is basically same with 'a%' && '%a'. // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. - case startsAndEndsWith(prefix, postfix) if !prefix.endsWith(escapeStr) => + case startsAndEndsWith(prefix, postfix) => And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)), And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))) - case contains(infix) if !infix.endsWith(escapeStr) => + case contains(infix) => Contains(input, Literal(infix)) case equalTo(str) => EqualTo(input, Literal(str)) - case _ => Like(input, Literal.create(pattern, StringType), escapeChar) + case _ => l } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala index 436f62e4225c8..1812dce0da426 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala @@ -116,4 +116,52 @@ class LikeSimplificationSuite extends PlanTest { val optimized2 = Optimize.execute(originalQuery2.analyze) comparePlans(optimized2, originalQuery2.analyze) } + + test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") { + val originalQuery1 = + testRelation + .where(('a like "abc%") || ('a like "\\abc%")) + val optimized1 = Optimize.execute(originalQuery1.analyze) + val correctAnswer1 = testRelation + .where(StartsWith('a, "abc") || ('a like "\\abc%")) + .analyze + comparePlans(optimized1, correctAnswer1) + + val originalQuery2 = + testRelation + .where(('a like "%xyz") || ('a like "%xyz\\")) + val optimized2 = Optimize.execute(originalQuery2.analyze) + val correctAnswer2 = testRelation + .where(EndsWith('a, "xyz") || ('a like "%xyz\\")) + .analyze + comparePlans(optimized2, correctAnswer2) + + val originalQuery3 = + testRelation + .where(('a like ("@bc%def", '@')) || ('a like "abc%def")) + val optimized3 = Optimize.execute(originalQuery3.analyze) + val correctAnswer3 = testRelation + .where(('a like ("@bc%def", '@')) || + (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) + .analyze + comparePlans(optimized3, correctAnswer3) + + val originalQuery4 = + testRelation + .where(('a like "%mn%") || ('a like ("%mn%", '%'))) + val optimized4 = Optimize.execute(originalQuery4.analyze) + val correctAnswer4 = testRelation + .where(Contains('a, "mn") || ('a like ("%mn%", '%'))) + .analyze + comparePlans(optimized4, correctAnswer4) + + val originalQuery5 = + testRelation + .where(('a like "abc") || ('a like ("abbc", 'b'))) + val optimized5 = Optimize.execute(originalQuery5.analyze) + val correctAnswer5 = testRelation + .where(('a === "abc") || ('a like ("abbc", 'b'))) + .analyze + comparePlans(optimized5, correctAnswer5) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 727482e551a8b..2eeb729ece3fb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3718,6 +3718,20 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") { + withTempView("df") { + Seq("m@ca").toDF("s").createOrReplaceTempView("df") + + val e = intercept[AnalysisException] { + sql("SELECT s LIKE 'm%@ca' ESCAPE '%' FROM df").collect() + } + assert(e.message.contains("the pattern 'm%@ca' is invalid, " + + "the escape character is not allowed to precede '@'")) + + checkAnswer(sql("SELECT s LIKE 'm@@ca' ESCAPE '@' FROM df"), Row(true)) + } + } } case class Foo(bar: Option[String]) From 2b30dde24972f7123b7ee14583fdce72e9ee955f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 8 Dec 2020 12:08:22 +0000 Subject: [PATCH 0703/1009] [SPARK-33688][SQL] Migrate SHOW TABLE EXTENDED to new resolution framework ### What changes were proposed in this pull request? 1. Remove old statement `ShowTableStatement` 2. Introduce new command `ShowTableExtended` for `SHOW TABLE EXTENDED`. This PR is the first step of new V2 implementation of `SHOW TABLE EXTENDED`, see SPARK-33393. ### Why are the changes needed? This is a part of effort to make the relation lookup behavior consistent: SPARK-29900. ### Does this PR introduce _any_ user-facing change? The changes should not affect V1 tables. For V2, Spark outputs the error: ``` SHOW TABLE EXTENDED is not supported for v2 tables. ``` ### How was this patch tested? By running `SHOW TABLE EXTENDED` tests: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite" ``` Closes #30645 from MaxGekk/show-table-extended-statement. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/analysis/Analyzer.scala | 2 ++ .../sql/catalyst/parser/AstBuilder.scala | 15 +++++++---- .../catalyst/plans/logical/statements.scala | 9 ------- .../catalyst/plans/logical/v2Commands.scala | 20 ++++++++++++-- .../analysis/ResolveSessionCatalog.scala | 20 +++++++++----- .../datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../command/ShowTablesParserSuite.scala | 27 ++++++++++++------- .../command/v2/ShowTablesSuite.scala | 7 +++-- 9 files changed, 67 insertions(+), 38 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index a23994f456f75..b08451d8a6cfa 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -198,7 +198,7 @@ statement | SHOW TABLES ((FROM | IN) multipartIdentifier)? (LIKE? pattern=STRING)? #showTables | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? - LIKE pattern=STRING partitionSpec? #showTable + LIKE pattern=STRING partitionSpec? #showTableExtended | SHOW TBLPROPERTIES table=multipartIdentifier ('(' key=tablePropertyKey ')')? #showTblProperties | SHOW COLUMNS (FROM | IN) table=multipartIdentifier diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6541961f5613e..680ec982b2112 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -847,6 +847,8 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case s @ ShowTables(UnresolvedNamespace(Seq()), _) => s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace)) + case s @ ShowTableExtended(UnresolvedNamespace(Seq()), _, _) => + s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace)) case s @ ShowViews(UnresolvedNamespace(Seq()), _) => s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace)) case UnresolvedNamespace(Seq()) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 42c67ac963cbe..b6bd3b77fc874 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3190,13 +3190,18 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a [[ShowTableStatement]] command. + * Create a [[ShowTableExtended]] command. */ - override def visitShowTable(ctx: ShowTableContext): LogicalPlan = withOrigin(ctx) { - ShowTableStatement( - Option(ctx.ns).map(visitMultipartIdentifier), + override def visitShowTableExtended( + ctx: ShowTableExtendedContext): LogicalPlan = withOrigin(ctx) { + val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier) + val partitionKeys = Option(ctx.partitionSpec).map { specCtx => + UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None) + } + ShowTableExtended( + UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])), string(ctx.pattern), - Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)) + partitionKeys) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 8f0889bbcebd8..402ae657d1709 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -377,15 +377,6 @@ case class InsertIntoStatement( override def children: Seq[LogicalPlan] = query :: Nil } -/** - * A SHOW TABLE EXTENDED statement, as parsed from SQL. - */ -case class ShowTableStatement( - namespace: Option[Seq[String]], - pattern: String, - partitionSpec: Option[TablePartitionSpec]) - extends ParsedStatement - /** * A CREATE NAMESPACE statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 6f35364cce131..72ba9cf6db0e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange} import org.apache.spark.sql.connector.expressions.Transform -import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, StructType} +import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType} /** * Base trait for DataSourceV2 write commands @@ -466,7 +466,7 @@ case class RenameTable( } /** - * The logical plan of the SHOW TABLE command. + * The logical plan of the SHOW TABLES command. */ case class ShowTables( namespace: LogicalPlan, @@ -478,6 +478,22 @@ case class ShowTables( AttributeReference("tableName", StringType, nullable = false)()) } +/** + * The logical plan of the SHOW TABLE EXTENDED command. + */ +case class ShowTableExtended( + namespace: LogicalPlan, + pattern: String, + partitionSpec: Option[PartitionSpec]) extends Command { + override def children: Seq[LogicalPlan] = namespace :: Nil + + override val output: Seq[Attribute] = Seq( + AttributeReference("namespace", StringType, nullable = false)(), + AttributeReference("tableName", StringType, nullable = false)(), + AttributeReference("isTemporary", BooleanType, nullable = false)(), + AttributeReference("information", StringType, nullable = false)()) +} + /** * The logical plan of the SHOW VIEWS command. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 7e5f39e398a6b..4c7e6fefd9759 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -383,14 +383,20 @@ class ResolveSessionCatalog( } ShowTablesCommand(Some(ns.head), pattern) - case ShowTableStatement(ns, pattern, partitionsSpec) => - val db = ns match { - case Some(ns) if ns.length != 1 => - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") - case _ => ns.map(_.head) + case ShowTableExtended( + SessionCatalogAndNamespace(_, ns), + pattern, + partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) => + assert(ns.nonEmpty) + if (ns.length != 1) { + throw new AnalysisException( + s"The database name is not valid: ${ns.quoted}") } - ShowTablesCommand(db, Some(pattern), true, partitionsSpec) + ShowTablesCommand( + databaseName = Some(ns.head), + tableIdentifierPattern = Some(pattern), + isExtended = true, + partitionSpec.map(_.asInstanceOf[UnresolvedPartitionSpec].spec)) // ANALYZE TABLE works on permanent views if the views are cached. case AnalyzeTable(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, noScan) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 075d2a43dce4e..5f67b39b95c35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -295,6 +295,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) => ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil + case _: ShowTableExtended => + throw new AnalysisException("SHOW TABLE EXTENDED is not supported for v2 tables.") + case SetCatalogAndNamespace(catalogManager, catalogName, ns) => SetCatalogAndNamespaceExec(catalogManager, catalogName, ns) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala index 16f3dea8d75ef..d68e1233f7ab2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace, UnresolvedPartitionSpec} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan -import org.apache.spark.sql.catalyst.plans.logical.{ShowTables, ShowTableStatement} +import org.apache.spark.sql.catalyst.plans.logical.{ShowTableExtended, ShowTables} import org.apache.spark.sql.test.SharedSparkSession class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession { @@ -52,25 +52,32 @@ class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession { test("show table extended") { comparePlans( parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"), - ShowTableStatement(None, "*test*", None)) + ShowTableExtended(UnresolvedNamespace(Seq.empty[String]), "*test*", None)) comparePlans( parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*'"), - ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None)) + ShowTableExtended(UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), "*test*", None)) comparePlans( parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*'"), - ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None)) + ShowTableExtended(UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), "*test*", None)) comparePlans( parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"), - ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11")))) + ShowTableExtended( + UnresolvedNamespace(Seq.empty[String]), + "*test*", + Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09", "hr" -> "11"))))) comparePlans( parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*' " + "PARTITION(ds='2008-04-09')"), - ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", - Some(Map("ds" -> "2008-04-09")))) + ShowTableExtended( + UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), + "*test*", + Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09"))))) comparePlans( parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*' " + "PARTITION(ds='2008-04-09')"), - ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", - Some(Map("ds" -> "2008-04-09")))) + ShowTableExtended( + UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), + "*test*", + Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09"))))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala index aff1729a000b6..370c8358e64da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, Row} -import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException import org.apache.spark.sql.connector.InMemoryTableCatalog import org.apache.spark.sql.execution.command import org.apache.spark.sql.test.SharedSparkSession @@ -74,7 +73,7 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio val e = intercept[AnalysisException] { sql(sqlCommand) } - assert(e.message.contains(s"The database name is not valid: ${namespace}")) + assert(e.message.contains(s"SHOW TABLE EXTENDED is not supported for v2 tables")) } val namespace = s"$catalog.ns1.ns2" @@ -101,10 +100,10 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio val table = "people" withTable(s"$catalog.$table") { sql(s"CREATE TABLE $catalog.$table (name STRING, id INT) $defaultUsing") - val errMsg = intercept[NoSuchDatabaseException] { + val errMsg = intercept[AnalysisException] { sql(s"SHOW TABLE EXTENDED FROM $catalog LIKE '*$table*'").collect() }.getMessage - assert(errMsg.contains(s"Database '$catalog' not found")) + assert(errMsg.contains("SHOW TABLE EXTENDED is not supported for v2 tables")) } } } From c05ee06f5b711dd261dc94a01b4ba4ffccdf2ea0 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 8 Dec 2020 14:07:58 +0000 Subject: [PATCH 0704/1009] [SPARK-33685][SQL] Migrate DROP VIEW command to use UnresolvedView to resolve the identifier ### What changes were proposed in this pull request? This PR introduces `UnresolvedView` in the resolution framework to resolve the identifier. This PR then migrates `DROP VIEW` to use `UnresolvedView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? To use `UnresolvedView` for view resolution. Note that there is no resolution behavior change with this PR. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated existing tests. Closes #30636 from imback82/drop_view_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 17 ++++++++-- .../sql/catalyst/analysis/CheckAnalysis.scala | 13 ++++++-- .../catalyst/analysis/ResolveCatalogs.scala | 5 --- ...cala => ResolveCommandsWithIfExists.scala} | 14 +++++---- .../catalyst/analysis/v2ResolutionPlans.scala | 13 ++++++++ .../sql/catalyst/parser/AstBuilder.scala | 9 ++++-- .../catalyst/plans/logical/statements.scala | 7 ----- .../catalyst/plans/logical/v2Commands.scala | 15 +++++++-- .../sql/catalyst/parser/DDLParserSuite.scala | 17 ++++++---- .../analysis/ResolveSessionCatalog.scala | 5 ++- .../datasources/v2/DataSourceV2Strategy.scala | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 14 ++++----- .../sql/execution/command/DDLSuite.scala | 5 ++- .../command/PlanResolutionSuite.scala | 31 ++++++++++++++++--- .../sql/hive/execution/HiveDDLSuite.scala | 3 +- 15 files changed, 118 insertions(+), 52 deletions(-) rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/{ResolveNoopDropTable.scala => ResolveCommandsWithIfExists.scala} (63%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 680ec982b2112..6b0cf4be7de74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -150,7 +150,7 @@ object AnalysisContext { * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]]. */ class Analyzer(override val catalogManager: CatalogManager) - extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog with SQLConfHelper { + extends RuleExecutor[LogicalPlan] with CheckAnalysis with SQLConfHelper { private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog @@ -277,7 +277,7 @@ class Analyzer(override val catalogManager: CatalogManager) TypeCoercion.typeCoercionRules ++ extendedResolutionRules : _*), Batch("Post-Hoc Resolution", Once, - Seq(ResolveNoopDropTable) ++ + Seq(ResolveCommandsWithIfExists) ++ postHocResolutionRules: _*), Batch("Normalize Alter Table", Once, ResolveAlterTableChanges), Batch("Remove Unresolved Hints", Once, @@ -889,6 +889,11 @@ class Analyzer(override val catalogManager: CatalogManager) u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table") } u + case u @ UnresolvedView(ident, _, _) => + lookupTempView(ident).map { _ => + ResolvedView(ident.asIdentifier, isTemp = true) + } + .getOrElse(u) case u @ UnresolvedTableOrView(ident, cmd, allowTempView) => lookupTempView(ident) .map { _ => @@ -1113,6 +1118,14 @@ class Analyzer(override val catalogManager: CatalogManager) case table => table }.getOrElse(u) + case u @ UnresolvedView(identifier, cmd, relationTypeMismatchHint) => + lookupTableOrView(identifier).map { + case v: ResolvedView => v + case _ => + u.failAnalysis(s"${identifier.quoted} is a table. '$cmd' expects a view." + + relationTypeMismatchHint.map(" " + _).getOrElse("")) + }.getOrElse(u) + case u @ UnresolvedTableOrView(identifier, _, _) => lookupTableOrView(identifier).getOrElse(u) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9f5eefc744135..39cdea2bd4d2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils} -import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table} +import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table} import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -34,7 +34,7 @@ import org.apache.spark.sql.types._ /** * Throws user facing errors when passed invalid queries that fail to analyze. */ -trait CheckAnalysis extends PredicateHelper { +trait CheckAnalysis extends PredicateHelper with LookupCatalog { protected def isView(nameParts: Seq[String]): Boolean @@ -104,6 +104,15 @@ trait CheckAnalysis extends PredicateHelper { case u: UnresolvedTable => u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") + case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _) => + u.failAnalysis( + s"Cannot specify catalog `${catalog.name}` for view ${ident.quoted} " + + "because view support in v2 catalog has not been implemented yet. " + + s"$cmd expects a view.") + + case u: UnresolvedView => + u.failAnalysis(s"View not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") + case u: UnresolvedTableOrView => val viewStr = if (u.allowTempView) "view" else "permanent view" u.failAnalysis( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 6d89414ba106d..b4dfee1330036 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -187,11 +187,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) writeOptions = c.writeOptions, orCreate = c.orCreate) - case DropViewStatement(NonSessionCatalogAndTable(catalog, viewName), _) => - throw new AnalysisException( - s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " + - s"because view support in catalog has not been implemented yet") - case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _) if !isSessionCatalog(catalog) => CreateNamespace(catalog.asNamespaceCatalog, ns, c.ifNotExists, c.properties) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala similarity index 63% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala index f9da9174f85e6..196a07a7f9904 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala @@ -17,17 +17,19 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan, NoopDropTable} +import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand} import org.apache.spark.sql.catalyst.rules.Rule /** - * A rule for handling [[DropTable]] logical plan when the table or temp view is not resolved. - * If "ifExists" flag is set to true, the plan is resolved to [[NoopDropTable]], - * which is a no-op command. + * A rule for handling commands when the table or temp view is not resolved. + * These commands support a flag, "ifExists", so that they do not fail when a relation is not + * resolved. If the "ifExists" flag is set to true. the plan is resolved to [[NoopCommand]], */ -object ResolveNoopDropTable extends Rule[LogicalPlan] { +object ResolveCommandsWithIfExists extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case DropTable(u: UnresolvedTableOrView, ifExists, _) if ifExists => - NoopDropTable(u.multipartIdentifier) + NoopCommand("DROP TABLE", u.multipartIdentifier) + case DropView(u: UnresolvedView, ifExists) if ifExists => + NoopCommand("DROP VIEW", u.multipartIdentifier) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 1518f064d78db..2737b5d58bf42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -45,6 +45,19 @@ case class UnresolvedTable( override def output: Seq[Attribute] = Nil } +/** + * Holds the name of a view that has yet to be looked up in a catalog. It will be resolved to + * [[ResolvedView]] during analysis. + */ +case class UnresolvedView( + multipartIdentifier: Seq[String], + commandName: String, + relationTypeMismatchHint: Option[String] = None) extends LeafNode { + override lazy val resolved: Boolean = false + + override def output: Seq[Attribute] = Nil +} + /** * Holds the name of a table or view that has yet to be looked up in a catalog. It will * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b6bd3b77fc874..89b81ec1d83aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3155,11 +3155,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a [[DropViewStatement]] command. + * Create a [[DropView]] command. */ override def visitDropView(ctx: DropViewContext): AnyRef = withOrigin(ctx) { - DropViewStatement( - visitMultipartIdentifier(ctx.multipartIdentifier()), + DropView( + UnresolvedView( + visitMultipartIdentifier(ctx.multipartIdentifier()), + "DROP VIEW", + Some("Please use DROP TABLE instead.")), ctx.EXISTS != null) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 402ae657d1709..c4ac8ea8f2e69 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -338,13 +338,6 @@ case class AlterViewAsStatement( originalText: String, query: LogicalPlan) extends ParsedStatement -/** - * A DROP VIEW statement, as parsed from SQL. - */ -case class DropViewStatement( - viewName: Seq[String], - ifExists: Boolean) extends ParsedStatement - /** * An INSERT INTO statement, as parsed from SQL. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 72ba9cf6db0e2..1e17c51137a55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -419,9 +419,11 @@ case class DropTable( } /** - * The logical plan for handling non-existing table for DROP TABLE command. + * The logical plan for no-op command handling non-existing table. */ -case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command +case class NoopCommand( + commandName: String, + multipartIdentifier: Seq[String]) extends Command /** * The logical plan of the ALTER TABLE command. @@ -724,3 +726,12 @@ case class ShowPartitions( override val output: Seq[Attribute] = Seq( AttributeReference("partition", StringType, nullable = false)()) } + +/** + * The logical plan of the DROP VIEW command. + */ +case class DropView( + child: LogicalPlan, + ifExists: Boolean) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index f925be8617b47..d5b27d9ad25cf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -721,13 +721,18 @@ class DDLParserSuite extends AnalysisTest { } test("drop view") { + val cmd = "DROP VIEW" + val hint = Some("Please use DROP TABLE instead.") parseCompare(s"DROP VIEW testcat.db.view", - DropViewStatement(Seq("testcat", "db", "view"), ifExists = false)) - parseCompare(s"DROP VIEW db.view", DropViewStatement(Seq("db", "view"), ifExists = false)) + DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, hint), ifExists = false)) + parseCompare(s"DROP VIEW db.view", + DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = false)) parseCompare(s"DROP VIEW IF EXISTS db.view", - DropViewStatement(Seq("db", "view"), ifExists = true)) - parseCompare(s"DROP VIEW view", DropViewStatement(Seq("view"), ifExists = false)) - parseCompare(s"DROP VIEW IF EXISTS view", DropViewStatement(Seq("view"), ifExists = true)) + DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = true)) + parseCompare(s"DROP VIEW view", + DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = false)) + parseCompare(s"DROP VIEW IF EXISTS view", + DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = true)) } private def testCreateOrReplaceDdl( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 4c7e6fefd9759..657764832a931 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -352,9 +352,8 @@ class ResolveSessionCatalog( } DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge) - // v1 DROP TABLE supports temp view. - case DropViewStatement(TempViewOrV1Table(name), ifExists) => - DropTableCommand(name.asTableIdentifier, ifExists, isView = true, purge = false) + case DropView(r: ResolvedView, ifExists) => + DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = true, purge = false) case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _) if isSessionCatalog(catalog) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 5f67b39b95c35..7e2a485dcb4cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -251,7 +251,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DropTable(r: ResolvedTable, ifExists, purge) => DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil - case _: NoopDropTable => + case _: NoopCommand => LocalTableScanExec(Nil, Nil) :: Nil case AlterTable(catalog, ident, _, changes) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 2673577aecf36..9a8c3e3cf1a11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2594,6 +2594,13 @@ class DataSourceV2SQLSuite } } + test("DROP VIEW is not supported for v2 catalogs") { + assertAnalysisError( + "DROP VIEW testcat.v", + "Cannot specify catalog `testcat` for view v because view support in v2 catalog " + + "has not been implemented yet. DROP VIEW expects a view.") + } + private def testNotSupportedV2Command( sqlCommand: String, sqlParams: String, @@ -2612,13 +2619,6 @@ class DataSourceV2SQLSuite assert(e.message.contains(s"$sqlCommand is only supported with v1 tables")) } - private def testV1CommandSupportingTempView(sqlCommand: String, sqlParams: String): Unit = { - val e = intercept[AnalysisException] { - sql(s"$sqlCommand $sqlParams") - } - assert(e.message.contains(s"$sqlCommand is only supported with temp views or v1 tables")) - } - private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = { val errMsg = intercept[AnalysisException] { sql(sqlStatement) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 4f79e71419a10..b3cd9f1057a70 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1363,12 +1363,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { createDatabase(catalog, "dbx") createTable(catalog, tableIdent) assert(catalog.listTables("dbx") == Seq(tableIdent)) - val e = intercept[AnalysisException] { sql("DROP VIEW dbx.tab1") } - assert( - e.getMessage.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead")) + assert(e.getMessage.contains( + "dbx.tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.")) } protected def testSetProperties(isDatasourceTable: Boolean): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 38719311f1aef..5147a8485ea25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -78,6 +78,14 @@ class PlanResolutionSuite extends AnalysisTest { V1Table(t) } + private val view: V1Table = { + val t = mock(classOf[CatalogTable]) + when(t.schema).thenReturn(new StructType().add("i", "int").add("s", "string")) + when(t.tableType).thenReturn(CatalogTableType.VIEW) + when(t.provider).thenReturn(Some(v1Format)) + V1Table(t) + } + private val testCat: TableCatalog = { val newCatalog = mock(classOf[TableCatalog]) when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => { @@ -101,6 +109,7 @@ class PlanResolutionSuite extends AnalysisTest { case "v2Table" => table case "v2Table1" => table case "v2TableWithAcceptAnySchemaCapability" => tableWithAcceptAnySchemaCapability + case "view" => view case name => throw new NoSuchTableException(name) } }) @@ -148,7 +157,10 @@ class PlanResolutionSuite extends AnalysisTest { manager } - def parseAndResolve(query: String, withDefault: Boolean = false): LogicalPlan = { + def parseAndResolve( + query: String, + withDefault: Boolean = false, + checkAnalysis: Boolean = false): LogicalPlan = { val catalogManager = if (withDefault) { catalogManagerWithDefault } else { @@ -158,8 +170,13 @@ class PlanResolutionSuite extends AnalysisTest { override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq( new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false)) } - // We don't check analysis here, as we expect the plan to be unresolved such as `CreateTable`. - analyzer.execute(CatalystSqlParser.parsePlan(query)) + // We don't check analysis here by default, as we expect the plan to be unresolved + // such as `CreateTable`. + val analyzed = analyzer.execute(CatalystSqlParser.parsePlan(query)) + if (checkAnalysis) { + analyzer.checkAnalysis(analyzed) + } + analyzed } private def parseResolveCompare(query: String, expected: LogicalPlan): Unit = @@ -677,6 +694,8 @@ class PlanResolutionSuite extends AnalysisTest { val viewIdent1 = TableIdentifier("view", Option("db")) val viewName2 = "view" val viewIdent2 = TableIdentifier("view", Option("default")) + val tempViewName = "v" + val tempViewIdent = TableIdentifier("v") parseResolveCompare(s"DROP VIEW $viewName1", DropTableCommand(viewIdent1, ifExists = false, isView = true, purge = false)) @@ -686,11 +705,15 @@ class PlanResolutionSuite extends AnalysisTest { DropTableCommand(viewIdent2, ifExists = false, isView = true, purge = false)) parseResolveCompare(s"DROP VIEW IF EXISTS $viewName2", DropTableCommand(viewIdent2, ifExists = true, isView = true, purge = false)) + parseResolveCompare(s"DROP VIEW $tempViewName", + DropTableCommand(tempViewIdent, ifExists = false, isView = true, purge = false)) + parseResolveCompare(s"DROP VIEW IF EXISTS $tempViewName", + DropTableCommand(tempViewIdent, ifExists = true, isView = true, purge = false)) } test("drop view in v2 catalog") { intercept[AnalysisException] { - parseAndResolve("DROP VIEW testcat.db.view") + parseAndResolve("DROP VIEW testcat.db.view", checkAnalysis = true) }.getMessage.toLowerCase(Locale.ROOT).contains( "view support in catalog has not been implemented") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index ce31e39985971..d6a4d76386889 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1048,7 +1048,8 @@ class HiveDDLSuite val message = intercept[AnalysisException] { sql("DROP VIEW tab1") }.getMessage - assert(message.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead")) + assert(message.contains( + "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.")) } } From a093d6feefb0e086d19c86ae53bf92df12ccf2fa Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Tue, 8 Dec 2020 08:57:13 -0600 Subject: [PATCH 0705/1009] [MINOR] Spelling sql/core ### What changes were proposed in this pull request? This PR intends to fix typos in the sub-modules: * `sql/core` Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618 NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356 ### Why are the changes needed? Misspelled words make it harder to read / understand content. ### Does this PR introduce _any_ user-facing change? There are various fixes to documentation, etc... ### How was this patch tested? No testing was performed Closes #30531 from jsoref/spelling-sql-core. Authored-by: Josh Soref Signed-off-by: Sean Owen --- .../sql/execution/ui/static/spark-sql-viz.js | 8 ++--- .../scala/org/apache/spark/sql/Dataset.scala | 10 +++--- .../sql/execution/DataSourceScanExec.scala | 6 ++-- .../spark/sql/execution/ExplainUtils.scala | 8 ++--- .../ExternalAppendOnlyUnsafeRowArray.scala | 2 +- .../spark/sql/execution/SparkSqlParser.scala | 14 ++++---- .../sql/execution/WholeStageCodegenExec.scala | 2 +- .../adaptive/AdaptiveSparkPlanHelper.scala | 2 +- .../InsertIntoDataSourceDirCommand.scala | 2 +- .../spark/sql/execution/command/ddl.scala | 4 +-- .../spark/sql/execution/command/tables.scala | 2 +- .../execution/datasources/DataSource.scala | 2 +- .../datasources/FileFormatDataWriter.scala | 14 ++++---- .../datasources/FileFormatWriter.scala | 2 +- .../datasources/PartitioningUtils.scala | 2 +- .../v2/WriteToDataSourceV2Exec.scala | 2 +- .../sql/execution/joins/HashedRelation.scala | 4 +-- .../execution/python/ExtractPythonUDFs.scala | 6 ++-- .../streaming/CompactibleFileStreamLog.scala | 2 +- .../execution/streaming/StreamExecution.scala | 2 +- .../FlatMapGroupsWithStateExecHelper.scala | 2 +- .../apache/spark/sql/internal/HiveSerDe.scala | 2 +- .../sql/streaming/DataStreamWriter.scala | 4 +-- .../sql/Java8DatasetAggregatorSuite.java | 16 +++++----- .../spark/sql/JavaDatasetAggregatorSuite.java | 24 +++++++------- .../ansi/decimalArithmeticOperations.sql | 2 +- .../inputs/postgreSQL/create_view.sql | 2 +- .../apache/spark/sql/CachedTableSuite.scala | 8 ++--- .../org/apache/spark/sql/DataFrameSuite.scala | 2 +- .../apache/spark/sql/DatasetCacheSuite.scala | 13 ++++---- .../spark/sql/DatasetPrimitiveSuite.scala | 8 ++--- .../org/apache/spark/sql/DatasetSuite.scala | 32 +++++++++---------- .../apache/spark/sql/DateFunctionsSuite.scala | 6 ++-- .../org/apache/spark/sql/SQLQuerySuite.scala | 6 ++-- .../apache/spark/sql/SQLQueryTestSuite.scala | 10 +++--- .../sql/SparkSessionExtensionSuite.scala | 18 +++++------ .../apache/spark/sql/TPCDSTableStats.scala | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 12 +++---- .../execution/SQLWindowFunctionSuite.scala | 2 +- .../sql/execution/SparkSqlParserSuite.scala | 2 +- .../execution/WholeStageCodegenSuite.scala | 4 +-- .../adaptive/AdaptiveQueryExecSuite.scala | 8 ++--- .../arrow/ArrowConvertersSuite.scala | 2 +- .../sql/execution/command/DDLSuite.scala | 12 +++---- .../command/PlanResolutionSuite.scala | 16 +++++----- .../datasources/DataSourceSuite.scala | 4 +-- .../datasources/SchemaPruningSuite.scala | 8 ++--- .../ParquetInteroperabilitySuite.scala | 2 +- .../ParquetPartitionDiscoverySuite.scala | 4 +-- .../parquet/ParquetQuerySuite.scala | 4 +-- .../exchange/EnsureRequirementsSuite.scala | 2 +- .../execution/metric/SQLMetricsSuite.scala | 2 +- .../streaming/HDFSMetadataLogSuite.scala | 2 +- .../sql/execution/ui/SparkPlanInfoSuite.scala | 6 ++-- .../internal/ExecutorSideSQLConfSuite.scala | 4 +-- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 12 +++---- .../spark/sql/sources/BucketedReadSuite.scala | 18 ++++++----- .../sources/CreateTableAsSelectSuite.scala | 2 +- .../spark/sql/sources/TableScanSuite.scala | 6 ++-- .../sql/streaming/FileStreamSourceSuite.scala | 4 +-- .../spark/sql/streaming/StreamSuite.scala | 8 ++--- .../test/DataStreamTableAPISuite.scala | 8 ++--- .../apache/spark/sql/test/SQLTestData.scala | 4 +-- .../spark/sql/test/SharedSparkSession.scala | 2 +- 64 files changed, 208 insertions(+), 205 deletions(-) diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js index 301183f749a84..d1def1b0a42ff 100644 --- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js +++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js @@ -87,14 +87,14 @@ function preprocessGraphLayout(g) { var node = g.node(nodes[i]); node.padding = "5"; - var firstSearator; + var firstSeparator; var secondSeparator; var splitter; if (node.isCluster) { - firstSearator = secondSeparator = labelSeparator; + firstSeparator = secondSeparator = labelSeparator; splitter = "\\n"; } else { - firstSearator = ""; + firstSeparator = ""; secondSeparator = ""; splitter = "
      "; } @@ -104,7 +104,7 @@ function preprocessGraphLayout(g) { if (newTexts) { node.label = node.label.replace( newTexts[0], - newTexts[1] + firstSearator + newTexts[2] + secondSeparator + newTexts[3]); + newTexts[1] + firstSeparator + newTexts[2] + secondSeparator + newTexts[3]); } }); } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 05d6647afd958..6afbbce3ff8d4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1363,7 +1363,7 @@ class Dataset[T] private[sql]( // Attach the dataset id and column position to the column reference, so that we can detect // ambiguous self-join correctly. See the rule `DetectAmbiguousSelfJoin`. // This must be called before we return a `Column` that contains `AttributeReference`. - // Note that, the metadata added here are only avaiable in the analyzer, as the analyzer rule + // Note that, the metadata added here are only available in the analyzer, as the analyzer rule // `DetectAmbiguousSelfJoin` will remove it. private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = { val newExpr = expr transform { @@ -1665,10 +1665,10 @@ class Dataset[T] private[sql]( * See [[RelationalGroupedDataset]] for all the available aggregate functions. * * {{{ - * // Compute the average for all numeric columns rolluped by department and group. + * // Compute the average for all numeric columns rolled up by department and group. * ds.rollup($"department", $"group").avg() * - * // Compute the max age and average salary, rolluped by department and gender. + * // Compute the max age and average salary, rolled up by department and gender. * ds.rollup($"department", $"gender").agg(Map( * "salary" -> "avg", * "age" -> "max" @@ -1794,10 +1794,10 @@ class Dataset[T] private[sql]( * (i.e. cannot construct expressions). * * {{{ - * // Compute the average for all numeric columns rolluped by department and group. + * // Compute the average for all numeric columns rolled up by department and group. * ds.rollup("department", "group").avg() * - * // Compute the max age and average salary, rolluped by department and gender. + * // Compute the max age and average salary, rolled up by department and gender. * ds.rollup($"department", $"gender").agg(Map( * "salary" -> "avg", * "age" -> "max" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 44636beeec7fc..df3b9f2a4e9cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -284,7 +284,7 @@ case class FileSourceScanExec( // // Sort ordering would be over the prefix subset of `sort columns` being read // from the table. - // eg. + // e.g. // Assume (col0, col2, col3) are the columns read from the table // If sort columns are (col0, col1), then sort ordering would be considered as (col0) // If sort columns are (col1, col0), then sort ordering would be empty as per rule #2 @@ -379,12 +379,12 @@ case class FileSourceScanExec( case (key, _) if (key.equals("Location")) => val location = relation.location val numPaths = location.rootPaths.length - val abbreviatedLoaction = if (numPaths <= 1) { + val abbreviatedLocation = if (numPaths <= 1) { location.rootPaths.mkString("[", ", ", "]") } else { "[" + location.rootPaths.head + s", ... ${numPaths - 1} entries]" } - s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLoaction)}" + s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLocation)}" case (key, value) => s"$key: ${redact(value)}" } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala index b54bd6a579b66..20e6fb6f96eaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala @@ -28,14 +28,14 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS object ExplainUtils extends AdaptiveSparkPlanHelper { /** * Given a input physical plan, performs the following tasks. - * 1. Computes the operator id for current operator and records it in the operaror + * 1. Computes the operator id for current operator and records it in the operator * by setting a tag. * 2. Computes the whole stage codegen id for current operator and records it in the * operator by setting a tag. * 3. Generate the two part explain output for this plan. * 1. First part explains the operator tree with each operator tagged with an unique * identifier. - * 2. Second part explans each operator in a verbose manner. + * 2. Second part explains each operator in a verbose manner. * * Note : This function skips over subqueries. They are handled by its caller. * @@ -117,7 +117,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper { } /** - * Traverses the supplied input plan in a bottem-up fashion does the following : + * Traverses the supplied input plan in a bottom-up fashion does the following : * 1. produces a map : operator identifier -> operator * 2. Records the operator id via setting a tag in the operator. * Note : @@ -210,7 +210,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper { /** * Given a input plan, returns an array of tuples comprising of : - * 1. Hosting opeator id. + * 1. Hosting operator id. * 2. Hosting expression * 3. Subquery plan */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala index 993627847c08c..c5e5de588ba9d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala @@ -87,7 +87,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray( def isEmpty: Boolean = numRows == 0 /** - * Clears up resources (eg. memory) held by the backing storage + * Clears up resources (e.g. memory) held by the backing storage */ def clear(): Unit = { if (spillableArray != null) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index c82e3818b48cc..7a31b0dcdd43d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -386,25 +386,25 @@ class SparkSqlAstBuilder extends AstBuilder { * - '/path/to/fileOrJar' */ override def visitManageResource(ctx: ManageResourceContext): LogicalPlan = withOrigin(ctx) { - val mayebePaths = if (ctx.STRING != null) string(ctx.STRING) else remainder(ctx.identifier).trim + val maybePaths = if (ctx.STRING != null) string(ctx.STRING) else remainder(ctx.identifier).trim ctx.op.getType match { case SqlBaseParser.ADD => ctx.identifier.getText.toLowerCase(Locale.ROOT) match { - case "file" => AddFileCommand(mayebePaths) - case "jar" => AddJarCommand(mayebePaths) + case "file" => AddFileCommand(maybePaths) + case "jar" => AddJarCommand(maybePaths) case other => operationNotAllowed(s"ADD with resource type '$other'", ctx) } case SqlBaseParser.LIST => ctx.identifier.getText.toLowerCase(Locale.ROOT) match { case "files" | "file" => - if (mayebePaths.length > 0) { - ListFilesCommand(mayebePaths.split("\\s+")) + if (maybePaths.length > 0) { + ListFilesCommand(maybePaths.split("\\s+")) } else { ListFilesCommand() } case "jars" | "jar" => - if (mayebePaths.length > 0) { - ListJarsCommand(mayebePaths.split("\\s+")) + if (maybePaths.length > 0) { + ListJarsCommand(maybePaths.split("\\s+")) } else { ListJarsCommand() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index b2963457e22db..c6ea99cfdad7b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -670,7 +670,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) } ${ctx.registerComment( - s"""Codegend pipeline for stage (id=$codegenStageId) + s"""Codegened pipeline for stage (id=$codegenStageId) |${this.treeString.trim}""".stripMargin, "wsc_codegenPipeline")} ${ctx.registerComment(s"codegenStageId=$codegenStageId", "wsc_codegenStageId", true)} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala index 6ba375910a4eb..eecfa40e8d0bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala @@ -115,7 +115,7 @@ trait AdaptiveSparkPlanHelper { /** * Returns a sequence containing the subqueries in this plan, also including the (nested) - * subquries in its children + * subqueries in its children */ def subqueriesAll(p: SparkPlan): Seq[SparkPlan] = { val subqueries = flatMap(p)(_.subqueries) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala index 08d31fdda2dc8..d065bc0dab4cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.datasources._ * @param storage storage format used to describe how the query result is stored. * @param provider the data source type to be used * @param query the logical plan representing data to write to - * @param overwrite whthere overwrites existing directory + * @param overwrite whether overwrites existing directory */ case class InsertIntoDataSourceDirCommand( storage: CatalogStorageFormat, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 69425cfed285f..6d631e044e917 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -89,8 +89,8 @@ case class CreateDatabaseCommand( * A command for users to remove a database from the system. * * 'ifExists': - * - true, if database_name does't exist, no action - * - false (default), if database_name does't exist, a warning message will be issued + * - true, if database_name doesn't exist, no action + * - false (default), if database_name doesn't exist, a warning message will be issued * 'cascade': * - true, the dependent objects are automatically dropped before dropping database. * - false (default), it is in the Restrict mode. The database cannot be dropped if diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 640051384e94c..431a103063c68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -352,7 +352,7 @@ case class LoadDataCommand( // entire string will be considered while making a Path instance,this is mainly done // by considering the wild card scenario in mind.as per old logic query param is // been considered while creating URI instance and if path contains wild card char '?' - // the remaining charecters after '?' will be removed while forming URI instance + // the remaining characters after '?' will be removed while forming URI instance LoadDataCommand.makeQualified(defaultFS, uriPath, loadPath) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 34ded5d456d09..4783789b91f3e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -211,7 +211,7 @@ case class DataSource( s"Unable to infer schema for $format. It must be specified manually.") } - // We just print a waring message if the data schema and partition schema have the duplicate + // We just print a warning message if the data schema and partition schema have the duplicate // columns. This is because we allow users to do so in the previous Spark releases and // we have the existing tests for the cases (e.g., `ParquetHadoopFsRelationSuite`). // See SPARK-18108 and SPARK-21144 for related discussions. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala index edb49d3f90ca3..6de9b1d7cea4b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala @@ -167,7 +167,7 @@ class DynamicPartitionDataWriter( private var fileCounter: Int = _ private var recordsInFile: Long = _ - private var currentPartionValues: Option[UnsafeRow] = None + private var currentPartitionValues: Option[UnsafeRow] = None private var currentBucketId: Option[Int] = None /** Extracts the partition values out of an input row. */ @@ -247,11 +247,11 @@ class DynamicPartitionDataWriter( val nextPartitionValues = if (isPartitioned) Some(getPartitionValues(record)) else None val nextBucketId = if (isBucketed) Some(getBucketId(record)) else None - if (currentPartionValues != nextPartitionValues || currentBucketId != nextBucketId) { + if (currentPartitionValues != nextPartitionValues || currentBucketId != nextBucketId) { // See a new partition or bucket - write to a new partition dir (or a new bucket file). - if (isPartitioned && currentPartionValues != nextPartitionValues) { - currentPartionValues = Some(nextPartitionValues.get.copy()) - statsTrackers.foreach(_.newPartition(currentPartionValues.get)) + if (isPartitioned && currentPartitionValues != nextPartitionValues) { + currentPartitionValues = Some(nextPartitionValues.get.copy()) + statsTrackers.foreach(_.newPartition(currentPartitionValues.get)) } if (isBucketed) { currentBucketId = nextBucketId @@ -259,7 +259,7 @@ class DynamicPartitionDataWriter( } fileCounter = 0 - newOutputWriter(currentPartionValues, currentBucketId) + newOutputWriter(currentPartitionValues, currentBucketId) } else if (description.maxRecordsPerFile > 0 && recordsInFile >= description.maxRecordsPerFile) { // Exceeded the threshold in terms of the number of records per file. @@ -268,7 +268,7 @@ class DynamicPartitionDataWriter( assert(fileCounter < MAX_FILE_COUNTER, s"File counter $fileCounter is beyond max value $MAX_FILE_COUNTER") - newOutputWriter(currentPartionValues, currentBucketId) + newOutputWriter(currentPartitionValues, currentBucketId) } val outputRow = getOutputRow(record) currentWriter.write(outputRow) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala index a71aeb47872ce..48ebd6f0c610f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala @@ -164,7 +164,7 @@ object FileFormatWriter extends Logging { SQLExecution.checkSQLExecutionId(sparkSession) - // propagate the decription UUID into the jobs, so that committers + // propagate the description UUID into the jobs, so that committers // get an ID guaranteed to be unique. job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index ea437d200eaab..69123ee7af5b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -453,7 +453,7 @@ object PartitioningUtils { val decimalTry = Try { // `BigDecimal` conversion can fail when the `field` is not a form of number. val bigDecimal = new JBigDecimal(raw) - // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). + // It reduces the cases for decimals by disallowing values having scale (e.g. `1.1`). require(bigDecimal.scale <= 0) // `DecimalType` conversion can fail when // 1. The precision is bigger than 38. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index 47aad2bcb2c56..f5f77d38b8716 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -168,7 +168,7 @@ case class ReplaceTableAsSelectExec( * A new table will be created using the schema of the query, and rows from the query are appended. * If the table exists, its contents and schema should be replaced with the schema and the contents * of the query. This implementation is atomic. The table replacement is staged, and the commit - * operation at the end should perform tne replacement of the table's metadata and contents. If the + * operation at the end should perform the replacement of the table's metadata and contents. If the * write fails, the table is instructed to roll back staged changes and any previously written table * is left untouched. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala index 3c5ed40551206..a91cc0782e1f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala @@ -426,9 +426,9 @@ private[joins] class UnsafeHashedRelation( readBuffer(valuesBuffer, 0, valuesSize) val loc = binaryMap.lookup(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize) - val putSuceeded = loc.append(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize, + val putSucceeded = loc.append(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize, valuesBuffer, Platform.BYTE_ARRAY_OFFSET, valuesSize) - if (!putSuceeded) { + if (!putSucceeded) { binaryMap.free() throw new IOException("Could not allocate memory to grow BytesToBytesMap") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala index dab2723d25726..b79bcd176b7b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala @@ -102,7 +102,7 @@ object ExtractGroupingPythonUDFFromAggregate extends Rule[LogicalPlan] { case p: PythonUDF => // This is just a sanity check, the rule PullOutNondeterministic should // already pull out those nondeterministic expressions. - assert(p.udfDeterministic, "Non-determinstic PythonUDFs should not appear " + + assert(p.udfDeterministic, "Non-deterministic PythonUDFs should not appear " + "in grouping expression") val canonicalized = p.canonicalized.asInstanceOf[PythonUDF] if (attributeMap.contains(canonicalized)) { @@ -174,7 +174,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { } private def collectEvaluableUDFsFromExpressions(expressions: Seq[Expression]): Seq[PythonUDF] = { - // If fisrt UDF is SQL_SCALAR_PANDAS_ITER_UDF, then only return this UDF, + // If first UDF is SQL_SCALAR_PANDAS_ITER_UDF, then only return this UDF, // otherwise check if subsequent UDFs are of the same type as the first UDF. (since we can only // extract UDFs of the same eval type) @@ -268,7 +268,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { case PythonEvalType.SQL_SCALAR_PANDAS_UDF | PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF => ArrowEvalPython(validUdfs, resultAttrs, child, evalType) case _ => - throw new AnalysisException("Unexcepted UDF evalType") + throw new AnalysisException("Unexpected UDF evalType") } attributeMap ++= validUdfs.map(canonicalizeDeterministic).zip(resultAttrs) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala index 3c76306f20cd7..835c7c4d5261f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala @@ -288,7 +288,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag]( /** * Delete expired log entries that proceed the currentBatchId and retain - * sufficient minimum number of batches (given by minBatchsToRetain). This + * sufficient minimum number of batches (given by minBatchesToRetain). This * equates to retaining the earliest compaction log that proceeds * batch id position currentBatchId + 1 - minBatchesToRetain. All log entries * prior to the earliest compaction log proceeding that position will be removed. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index d6be33c76e937..6b0d33b819a20 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -685,6 +685,6 @@ object StreamExecution { /** * A special thread to run the stream query. Some codes require to run in the QueryExecutionThread - * and will use `classOf[QueryxecutionThread]` to check. + * and will use `classOf[QueryExecutionThread]` to check. */ abstract class QueryExecutionThread(name: String) extends UninterruptibleThread(name) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala index 0a16a3819b778..cc785ee4247c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala @@ -77,7 +77,7 @@ object FlatMapGroupsWithStateExecHelper { // =========================== Private implementations of StateManager =========================== // =============================================================================================== - /** Commmon methods for StateManager implementations */ + /** Common methods for StateManager implementations */ private abstract class StateManagerImplBase(shouldStoreTimestamp: Boolean) extends StateManager { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala index 64b7e7fe7923a..cfcfeabbf1f6e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala @@ -65,7 +65,7 @@ object HiveSerDe { outputFormat = Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"), serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe"))) - // `HiveSerDe` in `serdeMap` should be dintinct. + // `HiveSerDe` in `serdeMap` should be distinct. val serdeInverseMap: Map[HiveSerDe, String] = serdeMap.flatMap { case ("sequencefile", _) => None case ("rcfile", _) => None diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 01e626e5436a4..9e8dff37bcfd2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -387,8 +387,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { } val sink = new MemorySink() val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes)) - val recoverFromChkpoint = outputMode == OutputMode.Complete() - val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromChkpoint) + val recoverFromCheckpoint = outputMode == OutputMode.Complete() + val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromCheckpoint) resultDf.createOrReplaceTempView(query.name) query } else if (source == SOURCE_NAME_FOREACH) { diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java index dd3755d3f904e..de88f80eb53b8 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java @@ -34,43 +34,43 @@ public class Java8DatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase @Test public void testTypedAggregationAverage() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.avg(v -> (double)(v._2() * 2))); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationCount() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.count(v -> v)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationSumDouble() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.sum(v -> (double)v._2())); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationSumLong() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.sumLong(v -> (long)v._2())); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)), - agged.collectAsList()); + aggregated.collectAsList()); } } diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java index 8a90624f2070b..979b7751fa9a8 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java @@ -38,18 +38,18 @@ public class JavaDatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase { public void testTypedAggregationAnonClass() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg(new IntSumOf().toColumn()); + Dataset> aggregated = grouped.agg(new IntSumOf().toColumn()); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3), new Tuple2<>("b", 3)), - agged.collectAsList()); + aggregated.collectAsList()); - Dataset> agged2 = grouped.agg(new IntSumOf().toColumn()) + Dataset> aggregated2 = grouped.agg(new IntSumOf().toColumn()) .as(Encoders.tuple(Encoders.STRING(), Encoders.INT())); Assert.assertEquals( Arrays.asList( new Tuple2<>("a", 3), new Tuple2<>("b", 3)), - agged2.collectAsList()); + aggregated2.collectAsList()); } static class IntSumOf extends Aggregator, Integer, Integer> { @@ -88,43 +88,43 @@ public Encoder outputEncoder() { @Test public void testTypedAggregationAverage() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.avg(value -> value._2() * 2.0)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationCount() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.count(value -> value)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationSumDouble() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.sum(value -> (double) value._2())); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)), - agged.collectAsList()); + aggregated.collectAsList()); } @SuppressWarnings("deprecation") @Test public void testTypedAggregationSumLong() { KeyValueGroupedDataset> grouped = generateGroupedDataset(); - Dataset> agged = grouped.agg( + Dataset> aggregated = grouped.agg( org.apache.spark.sql.expressions.javalang.typed.sumLong(value -> (long) value._2())); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)), - agged.collectAsList()); + aggregated.collectAsList()); } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql index d190f38345d6b..d843847e6a149 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql @@ -1,6 +1,6 @@ -- SPARK-23179: SQL ANSI 2011 states that in case of overflow during arithmetic operations, -- an exception should be thrown instead of returning NULL. --- This is what most of the SQL DBs do (eg. SQLServer, DB2). +-- This is what most of the SQL DBs do (e.g. SQLServer, DB2). -- tests for decimals handling in operations create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql index 21ffd85f7d01f..2889941c1fcc1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql @@ -636,7 +636,7 @@ DESC TABLE vv6; -- Check cases involving dropped/altered columns in a function's rowtype result -- --- Skip the tests below because Spark does't support PostgreSQL-specific UDFs/transactions +-- Skip the tests below because Spark doesn't support PostgreSQL-specific UDFs/transactions -- create table tt14t (f1 text, f2 text, f3 text, f4 text); -- insert into tt14t values('foo', 'bar', 'baz', '42'); -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index d0150616cd67e..3765093f83bc2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -835,7 +835,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } - test("SPARK-19993 nested subquery caching and scalar + predicate subqueris") { + test("SPARK-19993 nested subquery caching and scalar + predicate subqueries") { withTempView("t1", "t2", "t3", "t4") { Seq(1).toDF("c1").createOrReplaceTempView("t1") Seq(2).toDF("c1").createOrReplaceTempView("t2") @@ -886,17 +886,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } private def checkIfNoJobTriggered[T](f: => T): T = { - var numJobTrigered = 0 + var numJobTriggered = 0 val jobListener = new SparkListener { override def onJobStart(jobStart: SparkListenerJobStart): Unit = { - numJobTrigered += 1 + numJobTriggered += 1 } } sparkContext.addSparkListener(jobListener) try { val result = f sparkContext.listenerBus.waitUntilEmpty() - assert(numJobTrigered === 0) + assert(numJobTriggered === 0) result } finally { sparkContext.removeSparkListener(jobListener) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index a45bf12e8f841..4fecd625031ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -805,7 +805,7 @@ class DataFrameSuite extends QueryTest assert(df2.drop("`a.b`").columns.size == 2) } - test("drop(name: String) search and drop all top level columns that matchs the name") { + test("drop(name: String) search and drop all top level columns that matches the name") { val df1 = Seq((1, 2)).toDF("a", "b") val df2 = Seq((3, 4)).toDF("a", "b") checkAnswer(df1.crossJoin(df2), Row(1, 2, 3, 4)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala index 5c144dad23c30..009ccb9a45354 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala @@ -102,18 +102,19 @@ class DatasetCacheSuite extends QueryTest test("persist and then groupBy columns asKey, map") { val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS() val grouped = ds.groupByKey(_._1) - val agged = grouped.mapGroups { (g, iter) => (g, iter.map(_._2).sum) } - agged.persist() + val aggregated = grouped.mapGroups { (g, iter) => (g, iter.map(_._2).sum) } + aggregated.persist() checkDataset( - agged.filter(_._1 == "b"), + aggregated.filter(_._1 == "b"), ("b", 3)) - assertCached(agged.filter(_._1 == "b")) + assertCached(aggregated.filter(_._1 == "b")) ds.unpersist(blocking = true) assert(ds.storageLevel == StorageLevel.NONE, "The Dataset ds should not be cached.") - agged.unpersist(blocking = true) - assert(agged.storageLevel == StorageLevel.NONE, "The Dataset agged should not be cached.") + aggregated.unpersist(blocking = true) + assert(aggregated.storageLevel == StorageLevel.NONE, + "The Dataset aggregated should not be cached.") } test("persist and then withColumn") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala index ac51634febc99..8547d96e0f457 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala @@ -170,23 +170,23 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSparkSession { test("groupBy function, map") { val ds = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11).toDS() val grouped = ds.groupByKey(_ % 2) - val agged = grouped.mapGroups { (g, iter) => + val aggregated = grouped.mapGroups { (g, iter) => val name = if (g == 0) "even" else "odd" (name, iter.size) } checkDatasetUnorderly( - agged, + aggregated, ("even", 5), ("odd", 6)) } test("groupBy function, flatMap") { val ds = Seq("a", "b", "c", "xyz", "hello").toDS() val grouped = ds.groupByKey(_.length) - val agged = grouped.flatMapGroups { (g, iter) => Iterator(g.toString, iter.mkString) } + val aggregated = grouped.flatMapGroups { (g, iter) => Iterator(g.toString, iter.mkString) } checkDatasetUnorderly( - agged, + aggregated, "1", "abc", "3", "xyz", "5", "hello") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 953a58760cd5c..67e3ad6a80642 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -528,42 +528,42 @@ class DatasetSuite extends QueryTest test("groupBy function, map") { val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS() val grouped = ds.groupByKey(v => (v._1, "word")) - val agged = grouped.mapGroups { (g, iter) => (g._1, iter.map(_._2).sum) } + val aggregated = grouped.mapGroups { (g, iter) => (g._1, iter.map(_._2).sum) } checkDatasetUnorderly( - agged, + aggregated, ("a", 30), ("b", 3), ("c", 1)) } test("groupBy function, flatMap") { val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS() val grouped = ds.groupByKey(v => (v._1, "word")) - val agged = grouped.flatMapGroups { (g, iter) => + val aggregated = grouped.flatMapGroups { (g, iter) => Iterator(g._1, iter.map(_._2).sum.toString) } checkDatasetUnorderly( - agged, + aggregated, "a", "30", "b", "3", "c", "1") } test("groupBy function, mapValues, flatMap") { val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS() val keyValue = ds.groupByKey(_._1).mapValues(_._2) - val agged = keyValue.mapGroups { (g, iter) => (g, iter.sum) } - checkDataset(agged, ("a", 30), ("b", 3), ("c", 1)) + val aggregated = keyValue.mapGroups { (g, iter) => (g, iter.sum) } + checkDataset(aggregated, ("a", 30), ("b", 3), ("c", 1)) val keyValue1 = ds.groupByKey(t => (t._1, "key")).mapValues(t => (t._2, "value")) - val agged1 = keyValue1.mapGroups { (g, iter) => (g._1, iter.map(_._1).sum) } - checkDataset(agged1, ("a", 30), ("b", 3), ("c", 1)) + val aggregated1 = keyValue1.mapGroups { (g, iter) => (g._1, iter.map(_._1).sum) } + checkDataset(aggregated1, ("a", 30), ("b", 3), ("c", 1)) } test("groupBy function, reduce") { val ds = Seq("abc", "xyz", "hello").toDS() - val agged = ds.groupByKey(_.length).reduceGroups(_ + _) + val aggregated = ds.groupByKey(_.length).reduceGroups(_ + _) checkDatasetUnorderly( - agged, + aggregated, 3 -> "abcxyz", 5 -> "hello") } @@ -914,11 +914,11 @@ class DatasetSuite extends QueryTest test("grouping key and grouped value has field with same name") { val ds = Seq(ClassData("a", 1), ClassData("a", 2)).toDS() - val agged = ds.groupByKey(d => ClassNullableData(d.a, null)).mapGroups { + val aggregated = ds.groupByKey(d => ClassNullableData(d.a, null)).mapGroups { (key, values) => key.a + values.map(_.b).sum } - checkDataset(agged, "a3") + checkDataset(aggregated, "a3") } test("cogroup's left and right side has field with same name") { @@ -1286,7 +1286,7 @@ class DatasetSuite extends QueryTest Route("b", "c", 6)) val ds = sparkContext.parallelize(data).toDF.as[Route] - val grped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r))) + val grouped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r))) .groupByKey(r => (r.src, r.dest)) .reduceGroups { (g1: GroupedRoutes, g2: GroupedRoutes) => GroupedRoutes(g1.src, g1.dest, g1.routes ++ g2.routes) @@ -1303,7 +1303,7 @@ class DatasetSuite extends QueryTest implicit def ordering[GroupedRoutes]: Ordering[GroupedRoutes] = (x: GroupedRoutes, y: GroupedRoutes) => x.toString.compareTo(y.toString) - checkDatasetUnorderly(grped, expected: _*) + checkDatasetUnorderly(grouped, expected: _*) } test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { @@ -1383,7 +1383,7 @@ class DatasetSuite extends QueryTest } } } else { - // Local checkpoints dont require checkpoint_dir + // Local checkpoints don't require checkpoint_dir f } } @@ -1474,7 +1474,7 @@ class DatasetSuite extends QueryTest } test("SPARK-18717: code generation works for both scala.collection.Map" + - " and scala.collection.imutable.Map") { + " and scala.collection.immutable.Map") { val ds = Seq(WithImmutableMap("hi", Map(42L -> "foo"))).toDS checkDataset(ds.map(t => t), WithImmutableMap("hi", Map(42L -> "foo"))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 9caa4c0377009..d7bbf597ff983 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -454,7 +454,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { assert(e.getCause.isInstanceOf[IllegalArgumentException]) assert(e.getMessage.contains("You may get a different result due to the upgrading of Spark")) - // february + // February val x1 = "2016-02-29" val x2 = "2017-02-29" val df1 = Seq(x1, x2).toDF("x") @@ -629,7 +629,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { e.getMessage.contains("You may get a different result due to the upgrading of Spark")) } - // february + // February val y1 = "2016-02-29" val y2 = "2017-02-29" val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") @@ -680,7 +680,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq( Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - // february + // February val y1 = "2016-02-29" val y2 = "2017-02-29" val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 2eeb729ece3fb..ebfe8bdd7a749 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1316,7 +1316,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark ) } - test("oder by asc by default when not specify ascending and descending") { + test("order by asc by default when not specify ascending and descending") { checkAnswer( sql("SELECT a, b FROM testData2 ORDER BY a desc, b"), Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2)) @@ -2812,7 +2812,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SRARK-22266: the same aggregate function was calculated multiple times") { + test("SPARK-22266: the same aggregate function was calculated multiple times") { val query = "SELECT a, max(b+1), max(b+1) + 1 FROM testData2 GROUP BY a" val df = sql(query) val physical = df.queryExecution.sparkPlan @@ -3092,7 +3092,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark assert(scan.isInstanceOf[ParquetScan]) assert(scan.asInstanceOf[ParquetScan].pushedFilters === filters) case _ => - fail(s"unknow format $format") + fail(s"unknown format $format") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 36e55c0994f18..02c6fba9725d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -278,18 +278,18 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper val allCode = importedCode ++ code val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { // Although the loop is heavy, only used for bracketed comments test. - val querys = new ArrayBuffer[String] + val queries = new ArrayBuffer[String] val otherCodes = new ArrayBuffer[String] var tempStr = "" var start = false for (c <- allCode) { if (c.trim.startsWith("--QUERY-DELIMITER-START")) { start = true - querys ++= splitWithSemicolon(otherCodes.toSeq) + queries ++= splitWithSemicolon(otherCodes.toSeq) otherCodes.clear() } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { start = false - querys += s"\n${tempStr.stripSuffix(";")}" + queries += s"\n${tempStr.stripSuffix(";")}" tempStr = "" } else if (start) { tempStr += s"\n$c" @@ -298,9 +298,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } if (otherCodes.nonEmpty) { - querys ++= splitWithSemicolon(otherCodes.toSeq) + queries ++= splitWithSemicolon(otherCodes.toSeq) } - querys.toSeq + queries.toSeq } else { splitWithSemicolon(allCode).toSeq } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 576ad26505d27..5e1c6ba92803d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -166,13 +166,13 @@ class SparkSessionExtensionSuite extends SparkFunSuite { // inject rule that will run during AQE query stage optimization and will verify that the // custom tags were written in the preparation phase extensions.injectColumnar(session => - MyColumarRule(MyNewQueryStageRule(), MyNewQueryStageRule())) + MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule())) } withSession(extensions) { session => session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true) assert(session.sessionState.queryStagePrepRules.contains(MyQueryStagePrepRule())) assert(session.sessionState.columnarRules.contains( - MyColumarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))) + MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))) import session.sqlContext.implicits._ val data = Seq((100L), (200L), (300L)).toDF("vals").repartition(1) val df = data.selectExpr("vals + 1") @@ -205,12 +205,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite { val extensions = create { extensions => extensions.injectColumnar(session => - MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) + MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) } withSession(extensions) { session => session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE) assert(session.sessionState.columnarRules.contains( - MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) + MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) import session.sqlContext.implicits._ // perform a join to inject a broadcast exchange val left = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("l1", "l2") @@ -244,12 +244,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite { .config(COLUMN_BATCH_SIZE.key, 2) .withExtensions { extensions => extensions.injectColumnar(session => - MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) } + MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) } .getOrCreate() try { assert(session.sessionState.columnarRules.contains( - MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) + MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) import session.sqlContext.implicits._ val input = Seq((100L), (200L), (300L)) @@ -277,7 +277,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite { assert(session.sessionState.functionRegistry .lookupFunction(MyExtensions.myFunction._1).isDefined) assert(session.sessionState.columnarRules.contains( - MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) + MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))) } finally { stop(session) } @@ -824,7 +824,7 @@ case class MyPostRule() extends Rule[SparkPlan] { } } -case class MyColumarRule(pre: Rule[SparkPlan], post: Rule[SparkPlan]) extends ColumnarRule { +case class MyColumnarRule(pre: Rule[SparkPlan], post: Rule[SparkPlan]) extends ColumnarRule { override def preColumnarTransitions: Rule[SparkPlan] = pre override def postColumnarTransitions: Rule[SparkPlan] = post } @@ -838,7 +838,7 @@ class MyExtensions extends (SparkSessionExtensions => Unit) { e.injectOptimizerRule(MyRule) e.injectParser(MyParser) e.injectFunction(MyExtensions.myFunction) - e.injectColumnar(session => MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) + e.injectColumnar(session => MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala index f39b4b8b56c2e..ee9cf7b67225f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala @@ -376,7 +376,7 @@ object TPCDSTableStats { "s_closed_date_sk" -> CatalogColumnStat(Some(70L), Some("2450823"), Some("2451313"), Some(296), Some(4), Some(4), None, CatalogColumnStat.VERSION), "s_store_id" -> CatalogColumnStat(Some(210L), None, None, Some(0), Some(16), Some(16), None, CatalogColumnStat.VERSION), "s_geography_class" -> CatalogColumnStat(Some(1L), None, None, Some(3), Some(7), Some(7), None, CatalogColumnStat.VERSION), - "s_tax_precentage" -> CatalogColumnStat(Some(12L), Some("0.00"), Some("0.11"), Some(5), Some(8), Some(8), None, CatalogColumnStat.VERSION) + "s_tax_percentage" -> CatalogColumnStat(Some(12L), Some("0.00"), Some("0.11"), Some(5), Some(8), Some(8), None, CatalogColumnStat.VERSION) )), "store_returns" -> CatalogStatistics(4837573440L, Some(28795080L), Map( "sr_item_sk" -> CatalogColumnStat(Some(197284L), Some("1"), Some("204000"), Some(0), Some(8), Some(8), None, CatalogColumnStat.VERSION), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 9a8c3e3cf1a11..b1d61658b8a8b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -285,7 +285,7 @@ class DataSourceV2SQLSuite } } - test("CreateTable/RepalceTable: invalid schema if has interval type") { + test("CreateTable/ReplaceTable: invalid schema if has interval type") { Seq("CREATE", "REPLACE").foreach { action => val e1 = intercept[AnalysisException]( sql(s"$action TABLE table_name (id int, value interval) USING $v2Format")) @@ -1360,9 +1360,9 @@ class DataSourceV2SQLSuite test("ShowNamespaces: default v2 catalog doesn't support namespace") { spark.conf.set( - "spark.sql.catalog.testcat_no_namspace", + "spark.sql.catalog.testcat_no_namespace", classOf[BasicInMemoryTableCatalog].getName) - spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namspace") + spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namespace") val exception = intercept[AnalysisException] { sql("SHOW NAMESPACES") @@ -1373,11 +1373,11 @@ class DataSourceV2SQLSuite test("ShowNamespaces: v2 catalog doesn't support namespace") { spark.conf.set( - "spark.sql.catalog.testcat_no_namspace", + "spark.sql.catalog.testcat_no_namespace", classOf[BasicInMemoryTableCatalog].getName) val exception = intercept[AnalysisException] { - sql("SHOW NAMESPACES in testcat_no_namspace") + sql("SHOW NAMESPACES in testcat_no_namespace") } assert(exception.getMessage.contains("does not support namespaces")) @@ -2268,7 +2268,7 @@ class DataSourceV2SQLSuite val e = intercept[AnalysisException] { // Since the following multi-part name starts with `globalTempDB`, it is resolved to - // the session catalog, not the `gloabl_temp` v2 catalog. + // the session catalog, not the `global_temp` v2 catalog. sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json") } assert(e.message.contains( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala index 67ec1028f1998..eec396b2e3998 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala @@ -372,7 +372,7 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSparkSession { spark.catalog.dropTempView("nums") } - test("window function: mutiple window expressions specified by range in a single expression") { + test("window function: multiple window expressions specified by range in a single expression") { val nums = sparkContext.parallelize(1 to 10).map(x => (x, x % 2)).toDF("x", "y") nums.createOrReplaceTempView("nums") withTempView("nums") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 1a826c00c81f2..81ba09f206b92 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -184,7 +184,7 @@ class SparkSqlParserSuite extends AnalysisTest { intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements") } - test("SPARK-33118 CREATE TMEPORARY TABLE with LOCATION") { + test("SPARK-33118 CREATE TEMPORARY TABLE with LOCATION") { assertEqual("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/data/tmp/testspark1')", CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet", Map("path" -> "/data/tmp/testspark1"))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index fe40d7dce344d..eb5643df4c752 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -398,8 +398,8 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession // Case2: The parent of a LocalTableScanExec supports WholeStageCodegen. // In this case, the LocalTableScanExec should be within a WholeStageCodegen domain // and no more InputAdapter is inserted as the direct parent of the LocalTableScanExec. - val aggedDF = Seq(1, 2, 3).toDF.groupBy("value").sum() - val executedPlan = aggedDF.queryExecution.executedPlan + val aggregatedDF = Seq(1, 2, 3).toDF.groupBy("value").sum() + val executedPlan = aggregatedDF.queryExecution.executedPlan // HashAggregateExec supports WholeStageCodegen and it's the parent of // LocalTableScanExec so LocalTableScanExec should be within a WholeStageCodegen domain. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 45ba2202d83d3..69f1565c2f8de 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -755,9 +755,9 @@ class AdaptiveQueryExecSuite Utils.deleteRecursively(tableDir) df1.write.parquet(tableDir.getAbsolutePath) - val agged = spark.table("bucketed_table").groupBy("i").count() + val aggregated = spark.table("bucketed_table").groupBy("i").count() val error = intercept[Exception] { - agged.count() + aggregated.count() } assert(error.getCause().toString contains "Invalid bucket file") assert(error.getSuppressed.size === 0) @@ -962,9 +962,9 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.UI_EXPLAIN_MODE.key -> mode, SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") { - val dfApdaptive = sql("SELECT * FROM testData JOIN testData2 ON key = a WHERE value = '1'") + val dfAdaptive = sql("SELECT * FROM testData JOIN testData2 ON key = a WHERE value = '1'") try { - checkAnswer(dfApdaptive, Row(1, "1", 1, 1) :: Row(1, "1", 1, 2) :: Nil) + checkAnswer(dfAdaptive, Row(1, "1", 1, 1) :: Row(1, "1", 1, 2) :: Nil) spark.sparkContext.listenerBus.waitUntilEmpty() assert(checkDone) } finally { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index 1e6e59456c887..d861bbbf67b1c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -1210,7 +1210,7 @@ class ArrowConvertersSuite extends SharedSparkSession { testQuietly("interval is unsupported for arrow") { val e = intercept[SparkException] { - calenderIntervalData.toDF().toArrowBatchRdd.collect() + calendarIntervalData.toDF().toArrowBatchRdd.collect() } assert(e.getCause.isInstanceOf[UnsupportedOperationException]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index b3cd9f1057a70..82d3e2dfe2212 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -549,9 +549,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { import testImplicits._ val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str") - // Case 1: with partitioning columns but no schema: Option("inexistentColumns") + // Case 1: with partitioning columns but no schema: Option("nonexistentColumns") // Case 2: without schema and partitioning columns: None - Seq(Option("inexistentColumns"), None).foreach { partitionCols => + Seq(Option("nonexistentColumns"), None).foreach { partitionCols => withTempPath { pathToPartitionedTable => df.write.format("parquet").partitionBy("num") .save(pathToPartitionedTable.getCanonicalPath) @@ -589,9 +589,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { import testImplicits._ val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str") - // Case 1: with partitioning columns but no schema: Option("inexistentColumns") + // Case 1: with partitioning columns but no schema: Option("nonexistentColumns") // Case 2: without schema and partitioning columns: None - Seq(Option("inexistentColumns"), None).foreach { partitionCols => + Seq(Option("nonexistentColumns"), None).foreach { partitionCols => withTempPath { pathToNonPartitionedTable => df.write.format("parquet").save(pathToNonPartitionedTable.getCanonicalPath) checkSchemaInCreatedDataSourceTable( @@ -608,7 +608,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { import testImplicits._ val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str") - // Case 1: with partitioning columns but no schema: Option("inexistentColumns") + // Case 1: with partitioning columns but no schema: Option("nonexistentColumns") // Case 2: without schema and partitioning columns: None Seq(Option("num"), None).foreach { partitionCols => withTempPath { pathToNonPartitionedTable => @@ -1910,7 +1910,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { |OPTIONS ( | path '${tempDir.getCanonicalPath}' |) - |CLUSTERED BY (inexistentColumnA) SORTED BY (inexistentColumnB) INTO 2 BUCKETS + |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) INTO 2 BUCKETS """.stripMargin) } assert(e.message == "Cannot specify bucketing information if the table schema is not " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 5147a8485ea25..758540f1a42f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1187,26 +1187,26 @@ class PlanResolutionSuite extends AnalysisTest { ) } - DSV2ResolutionTests.foreach { case (sql, isSessionCatlog) => + DSV2ResolutionTests.foreach { case (sql, isSessionCatalog) => test(s"Data source V2 relation resolution '$sql'") { val parsed = parseAndResolve(sql, withDefault = true) - val catlogIdent = if (isSessionCatlog) v2SessionCatalog else testCat - val tableIdent = if (isSessionCatlog) "v2Table" else "tab" + val catalogIdent = if (isSessionCatalog) v2SessionCatalog else testCat + val tableIdent = if (isSessionCatalog) "v2Table" else "tab" parsed match { case AlterTable(_, _, r: DataSourceV2Relation, _) => - assert(r.catalog.exists(_ == catlogIdent)) + assert(r.catalog.exists(_ == catalogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case Project(_, AsDataSourceV2Relation(r)) => - assert(r.catalog.exists(_ == catlogIdent)) + assert(r.catalog.exists(_ == catalogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case AppendData(r: DataSourceV2Relation, _, _, _) => - assert(r.catalog.exists(_ == catlogIdent)) + assert(r.catalog.exists(_ == catalogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case DescribeRelation(r: ResolvedTable, _, _) => - assert(r.catalog == catlogIdent) + assert(r.catalog == catalogIdent) assert(r.identifier.name() == tableIdent) case ShowTableProperties(r: ResolvedTable, _) => - assert(r.catalog == catlogIdent) + assert(r.catalog == catalogIdent) assert(r.identifier.name() == tableIdent) case ShowTablePropertiesCommand(t: TableIdentifier, _) => assert(t.identifier == tableIdent) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala index dc97b7a55ee9a..6ba3d2723412b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala @@ -141,14 +141,14 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester { } test("Data source options should be propagated in method checkAndGlobPathIfNecessary") { - val dataSourceOptions = Map("fs.defaultFS" -> "nonexistsFs://nonexistsFs") + val dataSourceOptions = Map("fs.defaultFS" -> "nonexistentFs://nonexistentFs") val dataSource = DataSource(spark, "parquet", Seq("/path3"), options = dataSourceOptions) val checkAndGlobPathIfNecessary = PrivateMethod[Seq[Path]]('checkAndGlobPathIfNecessary) val message = intercept[java.io.IOException] { dataSource invokePrivate checkAndGlobPathIfNecessary(false, false) }.getMessage - val expectMessage = "No FileSystem for scheme nonexistsFs" + val expectMessage = "No FileSystem for scheme nonexistentFs" assert(message.filterNot(Set(':', '"').contains) == expectMessage) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala index 2b5cb27d59ad9..c90732183cb7a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala @@ -623,9 +623,9 @@ abstract class SchemaPruningSuite spark.read.format(dataSourceName).schema(schema).load(path + "/contacts") .createOrReplaceTempView("contacts") - val departmentScahem = "`depId` INT,`depName` STRING,`contactId` INT, " + + val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " + "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>" - spark.read.format(dataSourceName).schema(departmentScahem).load(path + "/departments") + spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments") .createOrReplaceTempView("departments") testThunk @@ -651,9 +651,9 @@ abstract class SchemaPruningSuite spark.read.format(dataSourceName).schema(schema).load(path + "/contacts") .createOrReplaceTempView("contacts") - val departmentScahem = "`depId` INT,`depName` STRING,`contactId` INT, " + + val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " + "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>" - spark.read.format(dataSourceName).schema(departmentScahem).load(path + "/departments") + spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments") .createOrReplaceTempView("departments") testThunk diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala index 8c5f7bed7c50d..2fe5953cbe12e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala @@ -183,7 +183,7 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS val oneBlockColumnMeta = oneBlockMeta.getColumns().get(0) // This is the important assert. Column stats are written, but they are ignored // when the data is read back as mentioned above, b/c int96 is unsigned. This - // assert makes sure this holds even if we change parquet versions (if eg. there + // assert makes sure this holds even if we change parquet versions (if e.g. there // were ever statistics even on unsigned columns). assert(!oneBlockColumnMeta.getStatistics.hasNonNullValue) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index 5c41614c45b6f..400f4d8e1b156 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1157,7 +1157,7 @@ class ParquetV1PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite { test("SPARK-21463: MetadataLogFileIndex should respect userSpecifiedSchema for partition cols") { withTempDir { tempDir => val output = new File(tempDir, "output").toString - val checkpoint = new File(tempDir, "chkpoint").toString + val checkpoint = new File(tempDir, "checkpoint").toString try { val stream = MemoryStream[(String, Int)] val df = stream.toDS().toDF("time", "value") @@ -1303,7 +1303,7 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite { test("SPARK-21463: MetadataLogFileIndex should respect userSpecifiedSchema for partition cols") { withTempDir { tempDir => val output = new File(tempDir, "output").toString - val checkpoint = new File(tempDir, "chkpoint").toString + val checkpoint = new File(tempDir, "checkpoint").toString try { val stream = MemoryStream[(String, Int)] val df = stream.toDS().toDF("time", "value") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 05d305a9b52ba..8f85fe3c52583 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -857,7 +857,7 @@ class ParquetV1QuerySuite extends ParquetQuerySuite { val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*) df.write.mode(SaveMode.Overwrite).parquet(path) - // donot return batch, because whole stage codegen is disabled for wide table (>200 columns) + // do not return batch - whole stage codegen is disabled for wide table (>200 columns) val df2 = spark.read.parquet(path) val fileScan2 = df2.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get assert(!fileScan2.asInstanceOf[FileSourceScanExec].supportsColumnar) @@ -890,7 +890,7 @@ class ParquetV2QuerySuite extends ParquetQuerySuite { val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*) df.write.mode(SaveMode.Overwrite).parquet(path) - // donot return batch, because whole stage codegen is disabled for wide table (>200 columns) + // do not return batch - whole stage codegen is disabled for wide table (>200 columns) val df2 = spark.read.parquet(path) val fileScan2 = df2.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec]).get val parquetScan2 = fileScan2.asInstanceOf[BatchScanExec].scan.asInstanceOf[ParquetScan] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala index 296cbc3f3ad52..061799f439e5b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala @@ -60,7 +60,7 @@ class EnsureRequirementsSuite extends SharedSparkSession { case other => fail(other.toString) } - // Both sides are PartitioningCollection, but left side cannot be reorderd to match + // Both sides are PartitioningCollection, but left side cannot be reordered to match // and it should fall back to the right side. val smjExec3 = SortMergeJoinExec( exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index b4f921efcac81..21d17f40abb34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -181,7 +181,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils assert(probes.toDouble > 1.0) } else { val mainValue = probes.split("\n").apply(1).stripPrefix("(").stripSuffix(")") - // Extract min, med, max from the string and strip off everthing else. + // Extract min, med, max from the string and strip off everything else. val index = mainValue.indexOf(" (", 0) mainValue.slice(0, index).split(", ").foreach { probe => assert(probe.toDouble > 1.0) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala index 67dd88cbab63b..980d532dd4779 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala @@ -199,7 +199,7 @@ class HDFSMetadataLogSuite extends SharedSparkSession { intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), Some(1L), Some(5L))) intercept[IllegalStateException](verifyBatchIds(Seq(1, 2, 4, 5), Some(1L), Some(5L))) - // Related to SPARK-26629, this capatures the behavior for verifyBatchIds when startId > endId + // Related to SPARK-26629, this captures the behavior for verifyBatchIds when startId > endId intercept[IllegalStateException](verifyBatchIds(Seq(), Some(2L), Some(1L))) intercept[AssertionError](verifyBatchIds(Seq(2), Some(2L), Some(1L))) intercept[AssertionError](verifyBatchIds(Seq(1), Some(2L), Some(1L))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala index a702e00ff9f92..dfc64a41d9f86 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala @@ -24,10 +24,10 @@ class SparkPlanInfoSuite extends SharedSparkSession{ import testImplicits._ - def vaidateSparkPlanInfo(sparkPlanInfo: SparkPlanInfo): Unit = { + def validateSparkPlanInfo(sparkPlanInfo: SparkPlanInfo): Unit = { sparkPlanInfo.nodeName match { case "InMemoryTableScan" => assert(sparkPlanInfo.children.length == 1) - case _ => sparkPlanInfo.children.foreach(vaidateSparkPlanInfo) + case _ => sparkPlanInfo.children.foreach(validateSparkPlanInfo) } } @@ -39,6 +39,6 @@ class SparkPlanInfoSuite extends SharedSparkSession{ val planInfoResult = SparkPlanInfo.fromSparkPlan(dfWithCache.queryExecution.executedPlan) - vaidateSparkPlanInfo(planInfoResult) + validateSparkPlanInfo(planInfoResult) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala index 567524ac75c2e..13b22dba1168b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala @@ -108,7 +108,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils { .queryExecution.executedPlan) assert(res.length == 2) assert(res.forall { case (_, code, _) => - (code.contains("* Codegend pipeline") == flag) && + (code.contains("* Codegened pipeline") == flag) && (code.contains("// input[") == flag) }) } @@ -175,7 +175,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils { df.hint("broadcast") } - // set local propert and assert + // set local property and assert val df2 = generateBroadcastDataFrame(confKey, confValue1) spark.sparkContext.setLocalProperty(confKey, confValue1) val checks = df1.join(df2).collect() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index f0b19071a969b..ede5fe538a028 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -1418,7 +1418,7 @@ class JDBCSuite extends QueryTest } test("SPARK-24327 verify and normalize a partition column based on a JDBC resolved schema") { - def testJdbcParitionColumn(partColName: String, expectedColumnName: String): Unit = { + def testJdbcPartitionColumn(partColName: String, expectedColumnName: String): Unit = { val df = spark.read.format("jdbc") .option("url", urlWithUserAndPass) .option("dbtable", "TEST.PARTITION") @@ -1439,16 +1439,16 @@ class JDBCSuite extends QueryTest } } - testJdbcParitionColumn("THEID", "THEID") - testJdbcParitionColumn("\"THEID\"", "THEID") + testJdbcPartitionColumn("THEID", "THEID") + testJdbcPartitionColumn("\"THEID\"", "THEID") withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - testJdbcParitionColumn("ThEiD", "THEID") + testJdbcPartitionColumn("ThEiD", "THEID") } - testJdbcParitionColumn("THE ID", "THE ID") + testJdbcPartitionColumn("THE ID", "THE ID") def testIncorrectJdbcPartitionColumn(partColName: String): Unit = { val errMsg = intercept[AnalysisException] { - testJdbcParitionColumn(partColName, "THEID") + testJdbcPartitionColumn(partColName, "THEID") }.getMessage assert(errMsg.contains(s"User-defined partition column $partColName not found " + "in the JDBC relation:")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 0ff9303421ade..4ae8cdbeb4f1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -639,13 +639,14 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { withTable("bucketed_table") { df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("bucketed_table") val tbl = spark.table("bucketed_table") - val agged = tbl.groupBy("i", "j").agg(max("k")) + val aggregated = tbl.groupBy("i", "j").agg(max("k")) checkAnswer( - agged.sort("i", "j"), + aggregated.sort("i", "j"), df1.groupBy("i", "j").agg(max("k")).sort("i", "j")) - assert(agged.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) + assert( + aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) } } @@ -679,13 +680,14 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { withTable("bucketed_table") { df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table") val tbl = spark.table("bucketed_table") - val agged = tbl.groupBy("i", "j").agg(max("k")) + val aggregated = tbl.groupBy("i", "j").agg(max("k")) checkAnswer( - agged.sort("i", "j"), + aggregated.sort("i", "j"), df1.groupBy("i", "j").agg(max("k")).sort("i", "j")) - assert(agged.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) + assert( + aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty) } } @@ -806,9 +808,9 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { Utils.deleteRecursively(tableDir) df1.write.parquet(tableDir.getAbsolutePath) - val agged = spark.table("bucketed_table").groupBy("i").count() + val aggregated = spark.table("bucketed_table").groupBy("i").count() val error = intercept[Exception] { - agged.count() + aggregated.count() } assert(error.getCause().toString contains "Invalid bucket file") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 9464f7e4c1241..9a7c7e0edc409 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -234,7 +234,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession { } } - test("create table using as select - with overriden max number of buckets") { + test("create table using as select - with overridden max number of buckets") { def createTableSql(numBuckets: Int): String = s""" |CREATE TABLE t USING PARQUET diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala index ca3e714665818..0da6b487e31ee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala @@ -359,7 +359,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { val schemaNotMatch = intercept[Exception] { sql( s""" - |CREATE $tableType relationProvierWithSchema (i int) + |CREATE $tableType relationProviderWithSchema (i int) |USING org.apache.spark.sql.sources.SimpleScanSource |OPTIONS ( | From '1', @@ -373,7 +373,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { val schemaNeeded = intercept[Exception] { sql( s""" - |CREATE $tableType schemaRelationProvierWithoutSchema + |CREATE $tableType schemaRelationProviderWithoutSchema |USING org.apache.spark.sql.sources.AllDataTypesScanSource |OPTIONS ( | From '1', @@ -387,7 +387,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { test("read the data source tables that do not extend SchemaRelationProvider") { Seq("TEMPORARY VIEW", "TABLE").foreach { tableType => - val tableName = "relationProvierWithSchema" + val tableName = "relationProviderWithSchema" withTable (tableName) { sql( s""" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 3c74e316f260e..b240d2058a018 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -1946,9 +1946,9 @@ class FileStreamSourceSuite extends FileStreamSourceTest { test("SourceFileArchiver - fail when base archive path matches source pattern") { val fakeFileSystem = new FakeFileSystem("fake") - def assertThrowIllegalArgumentException(sourcePatttern: Path, baseArchivePath: Path): Unit = { + def assertThrowIllegalArgumentException(sourcePattern: Path, baseArchivePath: Path): Unit = { intercept[IllegalArgumentException] { - new SourceFileArchiver(fakeFileSystem, sourcePatttern, fakeFileSystem, baseArchivePath) + new SourceFileArchiver(fakeFileSystem, sourcePattern, fakeFileSystem, baseArchivePath) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index e64d5f6f3587e..ed284df10aced 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1064,13 +1064,13 @@ class StreamSuite extends StreamTest { } test("SPARK-30657: streaming limit should not apply on limits on state subplans") { - val streanData = MemoryStream[Int] - val streamingDF = streanData.toDF().toDF("value") + val streamData = MemoryStream[Int] + val streamingDF = streamData.toDF().toDF("value") val staticDF = spark.createDataset(Seq(1)).toDF("value").orderBy("value") testStream(streamingDF.join(staticDF.limit(1), "value"))( - AddData(streanData, 1, 2, 3), + AddData(streamData, 1, 2, 3), CheckAnswer(Row(1)), - AddData(streanData, 1, 3, 5), + AddData(streamData, 1, 3, 5), CheckAnswer(Row(1), Row(1))) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala index 0296366f3578b..9cf649605ed1c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -107,12 +107,12 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } test("read: read table without streaming capability support") { - val tableIdentifer = "testcat.table_name" + val tableIdentifier = "testcat.table_name" - spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo") + spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo") intercept[AnalysisException] { - spark.readStream.table(tableIdentifer) + spark.readStream.table(tableIdentifier) }.message.contains("does not support either micro-batch or continuous scan") } @@ -213,7 +213,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { } test("write: write to non-exist table with custom catalog") { - val tableIdentifier = "testcat.nonexisttable" + val tableIdentifier = "testcat.nonexistenttable" withTable(tableIdentifier) { runTestWithStreamAppend(tableIdentifier) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala index c51faaf10f5dd..a1fd4a0215b1f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala @@ -169,10 +169,10 @@ private[sql] trait SQLTestData { self => rdd } - protected lazy val calenderIntervalData: RDD[IntervalData] = { + protected lazy val calendarIntervalData: RDD[IntervalData] = { val rdd = spark.sparkContext.parallelize( IntervalData(new CalendarInterval(1, 1, 1)) :: Nil) - rdd.toDF().createOrReplaceTempView("calenderIntervalData") + rdd.toDF().createOrReplaceTempView("calendarIntervalData") rdd } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala index cfc92a780308d..ed2e309fa075a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} trait SharedSparkSession extends SQLTestUtils with SharedSparkSessionBase { /** - * Suites extending [[SharedSparkSession]] are sharing resources (eg. SparkSession) in their + * Suites extending [[SharedSparkSession]] are sharing resources (e.g. SparkSession) in their * tests. That trait initializes the spark session in its [[beforeAll()]] implementation before * the automatic thread snapshot is performed, so the audit code could fail to report threads * leaked by that shared session. From c001dd49e4e9bb42f18618afe710e401b2df3afb Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 8 Dec 2020 10:43:41 -0800 Subject: [PATCH 0706/1009] [SPARK-33675][INFRA][FOLLOWUP] Schedule branch-3.1 snapshot at master branch ### What changes were proposed in this pull request? Currently, `master`/`branch-3.0`/`branch-2.4` snapshot publishing is successfully migrated from Jenkins to `GitHub Action`. - https://github.com/apache/spark/actions?query=workflow%3A%22Publish+Snapshot%22 This PR aims to schedule `branch-3.1` snapshot at `master` branch. ### Why are the changes needed? This is because it turns out that `GitHub Action Schedule` works only at `master` branch. (the default branch). - https://docs.github.com/en/free-pro-teamlatest/actions/reference/events-that-trigger-workflows#scheduled-events ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The matrix triggering is tested at the forked branch. - https://github.com/dongjoon-hyun/spark/runs/1519015974 Closes #30674 from dongjoon-hyun/SPARK-SCHEDULE-3.1. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/publish_snapshot.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml index 9871680f73891..504d702fd1f22 100644 --- a/.github/workflows/publish_snapshot.yml +++ b/.github/workflows/publish_snapshot.yml @@ -7,9 +7,17 @@ on: jobs: publish-snapshot: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + branch: + - master + - branch-3.1 steps: - name: Checkout Spark repository uses: actions/checkout@master + with: + ref: ${{ matrix.branch }} - name: Cache Maven local repository uses: actions/cache@v2 with: @@ -27,4 +35,5 @@ jobs: ASF_PASSWORD: ${{ secrets.NEXUS_PW }} GPG_KEY: "not_used" GPG_PASSPHRASE: "not_used" + GIT_REF: ${{ matrix.branch }} run: ./dev/create-release/release-build.sh publish-snapshot From 6fd234503cf1e85715ccd3bda42f29dae1daa71b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 8 Dec 2020 11:41:35 -0800 Subject: [PATCH 0707/1009] [SPARK-32110][SQL] normalize special floating numbers in HyperLogLog++ ### What changes were proposed in this pull request? Currently, Spark treats 0.0 and -0.0 semantically equal, while it still retains the difference between them so that users can see -0.0 when displaying the data set. The comparison expressions in Spark take care of the special floating numbers and implement the correct semantic. However, Spark doesn't always use these comparison expressions to compare values, and we need to normalize the special floating numbers before comparing them in these places: 1. GROUP BY 2. join keys 3. window partition keys This PR fixes one more place that compares values without using comparison expressions: HyperLogLog++ ### Why are the changes needed? Fix the query result ### Does this PR introduce _any_ user-facing change? Yes, the result of HyperLogLog++ becomes correct now. ### How was this patch tested? a new test case, and a few more test cases that pass before this PR to improve test coverage. Closes #30673 from cloud-fan/bug. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../optimizer/NormalizeFloatingNumbers.scala | 45 +++++----- .../util/HyperLogLogPlusPlusHelper.scala | 8 +- .../catalyst/expressions/PredicateSuite.scala | 90 +++++++++++++++++++ .../aggregate/HyperLogLogPlusPlusSuite.scala | 24 ++++- 4 files changed, 144 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala index 4434c29cbb3c4..ac8766cd74367 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala @@ -143,6 +143,28 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] { case _ => throw new IllegalStateException(s"fail to normalize $expr") } + + val FLOAT_NORMALIZER: Any => Any = (input: Any) => { + val f = input.asInstanceOf[Float] + if (f.isNaN) { + Float.NaN + } else if (f == -0.0f) { + 0.0f + } else { + f + } + } + + val DOUBLE_NORMALIZER: Any => Any = (input: Any) => { + val d = input.asInstanceOf[Double] + if (d.isNaN) { + Double.NaN + } else if (d == -0.0d) { + 0.0d + } else { + d + } + } } case class NormalizeNaNAndZero(child: Expression) extends UnaryExpression with ExpectsInputTypes { @@ -152,27 +174,8 @@ case class NormalizeNaNAndZero(child: Expression) extends UnaryExpression with E override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(FloatType, DoubleType)) private lazy val normalizer: Any => Any = child.dataType match { - case FloatType => (input: Any) => { - val f = input.asInstanceOf[Float] - if (f.isNaN) { - Float.NaN - } else if (f == -0.0f) { - 0.0f - } else { - f - } - } - - case DoubleType => (input: Any) => { - val d = input.asInstanceOf[Double] - if (d.isNaN) { - Double.NaN - } else if (d == -0.0d) { - 0.0d - } else { - d - } - } + case FloatType => NormalizeFloatingNumbers.FLOAT_NORMALIZER + case DoubleType => NormalizeFloatingNumbers.DOUBLE_NORMALIZER } override def nullSafeEval(input: Any): Any = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala index ea619c6a7666c..6471a746f2edf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala @@ -22,6 +22,7 @@ import java.util import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.XxHash64Function +import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers.{DOUBLE_NORMALIZER, FLOAT_NORMALIZER} import org.apache.spark.sql.types._ // A helper class for HyperLogLogPlusPlus. @@ -88,7 +89,12 @@ class HyperLogLogPlusPlusHelper(relativeSD: Double) extends Serializable { * * Variable names in the HLL++ paper match variable names in the code. */ - def update(buffer: InternalRow, bufferOffset: Int, value: Any, dataType: DataType): Unit = { + def update(buffer: InternalRow, bufferOffset: Int, _value: Any, dataType: DataType): Unit = { + val value = dataType match { + case FloatType => FLOAT_NORMALIZER.apply(_value) + case DoubleType => DOUBLE_NORMALIZER.apply(_value) + case _ => _value + } // Create the hashed value 'x'. val x = XxHash64Function.hash(value, dataType, 42L) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index a36baec1a0b99..6f75623dc59ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -554,4 +554,94 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(GreaterThan(Literal(Float.NaN), Literal(Float.NaN)), false) checkEvaluation(GreaterThan(Literal(0.0F), Literal(-0.0F)), false) } + + test("SPARK-32110: compare special double/float values in array") { + def createUnsafeDoubleArray(d: Double): Literal = { + Literal(UnsafeArrayData.fromPrimitiveArray(Array(d)), ArrayType(DoubleType)) + } + def createSafeDoubleArray(d: Double): Literal = { + Literal(new GenericArrayData(Array(d)), ArrayType(DoubleType)) + } + def createUnsafeFloatArray(d: Double): Literal = { + Literal(UnsafeArrayData.fromPrimitiveArray(Array(d.toFloat)), ArrayType(FloatType)) + } + def createSafeFloatArray(d: Double): Literal = { + Literal(new GenericArrayData(Array(d.toFloat)), ArrayType(FloatType)) + } + def checkExpr( + exprBuilder: (Expression, Expression) => Expression, + left: Double, + right: Double, + expected: Any): Unit = { + // test double + checkEvaluation( + exprBuilder(createUnsafeDoubleArray(left), createUnsafeDoubleArray(right)), expected) + checkEvaluation( + exprBuilder(createUnsafeDoubleArray(left), createSafeDoubleArray(right)), expected) + checkEvaluation( + exprBuilder(createSafeDoubleArray(left), createSafeDoubleArray(right)), expected) + // test float + checkEvaluation( + exprBuilder(createUnsafeFloatArray(left), createUnsafeFloatArray(right)), expected) + checkEvaluation( + exprBuilder(createUnsafeFloatArray(left), createSafeFloatArray(right)), expected) + checkEvaluation( + exprBuilder(createSafeFloatArray(left), createSafeFloatArray(right)), expected) + } + + checkExpr(EqualTo, Double.NaN, Double.NaN, true) + checkExpr(EqualTo, Double.NaN, Double.PositiveInfinity, false) + checkExpr(EqualTo, 0.0, -0.0, true) + checkExpr(GreaterThan, Double.NaN, Double.PositiveInfinity, true) + checkExpr(GreaterThan, Double.NaN, Double.NaN, false) + checkExpr(GreaterThan, 0.0, -0.0, false) + } + + test("SPARK-32110: compare special double/float values in struct") { + def createUnsafeDoubleRow(d: Double): Literal = { + val dt = new StructType().add("d", "double") + val converter = UnsafeProjection.create(dt) + val unsafeRow = converter.apply(InternalRow(d)) + Literal(unsafeRow, dt) + } + def createSafeDoubleRow(d: Double): Literal = { + Literal(InternalRow(d), new StructType().add("d", "double")) + } + def createUnsafeFloatRow(d: Double): Literal = { + val dt = new StructType().add("f", "float") + val converter = UnsafeProjection.create(dt) + val unsafeRow = converter.apply(InternalRow(d.toFloat)) + Literal(unsafeRow, dt) + } + def createSafeFloatRow(d: Double): Literal = { + Literal(InternalRow(d.toFloat), new StructType().add("f", "float")) + } + def checkExpr( + exprBuilder: (Expression, Expression) => Expression, + left: Double, + right: Double, + expected: Any): Unit = { + // test double + checkEvaluation( + exprBuilder(createUnsafeDoubleRow(left), createUnsafeDoubleRow(right)), expected) + checkEvaluation( + exprBuilder(createUnsafeDoubleRow(left), createSafeDoubleRow(right)), expected) + checkEvaluation( + exprBuilder(createSafeDoubleRow(left), createSafeDoubleRow(right)), expected) + // test float + checkEvaluation( + exprBuilder(createUnsafeFloatRow(left), createUnsafeFloatRow(right)), expected) + checkEvaluation( + exprBuilder(createUnsafeFloatRow(left), createSafeFloatRow(right)), expected) + checkEvaluation( + exprBuilder(createSafeFloatRow(left), createSafeFloatRow(right)), expected) + } + + checkExpr(EqualTo, Double.NaN, Double.NaN, true) + checkExpr(EqualTo, Double.NaN, Double.PositiveInfinity, false) + checkExpr(EqualTo, 0.0, -0.0, true) + checkExpr(GreaterThan, Double.NaN, Double.PositiveInfinity, true) + checkExpr(GreaterThan, Double.NaN, Double.NaN, false) + checkExpr(GreaterThan, 0.0, -0.0, false) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala index 98fd04c9cca91..1afccea5aef15 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate +import java.lang.{Double => JDouble} import java.util.Random import scala.collection.mutable @@ -24,7 +25,7 @@ import scala.collection.mutable import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{BoundReference, SpecificInternalRow} -import org.apache.spark.sql.types.{DataType, IntegerType} +import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType} class HyperLogLogPlusPlusSuite extends SparkFunSuite { @@ -153,4 +154,25 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite { // Check if the buffers are equal. assert(buffer2 == buffer1a, "Buffers should be equal") } + + test("SPARK-32110: add 0.0 and -0.0") { + val (hll, input, buffer) = createEstimator(0.05, DoubleType) + input.setDouble(0, 0.0) + hll.update(buffer, input) + input.setDouble(0, -0.0) + hll.update(buffer, input) + evaluateEstimate(hll, buffer, 1); + } + + test("SPARK-32110: add NaN") { + val (hll, input, buffer) = createEstimator(0.05, DoubleType) + input.setDouble(0, Double.NaN) + hll.update(buffer, input) + val specialNaN = JDouble.longBitsToDouble(0x7ff1234512345678L) + assert(JDouble.isNaN(specialNaN)) + assert(JDouble.doubleToRawLongBits(Double.NaN) != JDouble.doubleToRawLongBits(specialNaN)) + input.setDouble(0, specialNaN) + hll.update(buffer, input) + evaluateEstimate(hll, buffer, 1); + } } From 3ac70f169d653f22bd04ec7bb6ebb49696807bb2 Mon Sep 17 00:00:00 2001 From: Nicholas Marion Date: Tue, 8 Dec 2020 12:11:06 -0800 Subject: [PATCH 0708/1009] [SPARK-33695][BUILD] Upgrade to jackson to 2.10.5 and jackson-databind to 2.10.5.1 ### What changes were proposed in this pull request? Upgrade the jackson dependencies to 2.10.5 and jackson-databind to 2.10.5.1 ### Why are the changes needed? Jackson dependency has vulnerability CVE-2020-25649. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #30656 from n-marion/SPARK-33695_upgrade-jackson. Authored-by: Nicholas Marion Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 16 ++++++++-------- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 16 ++++++++-------- pom.xml | 5 +++-- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 401050a60e493..3a54dbd6232e3 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -103,17 +103,17 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar -jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar +jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.10.0//jackson-core-2.10.0.jar -jackson-databind/2.10.0//jackson-databind-2.10.0.jar -jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar +jackson-core/2.10.5//jackson-core-2.10.5.jar +jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar +jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar -jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar -jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar +jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar +jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar +jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar @@ -220,7 +220,7 @@ shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar -snakeyaml/1.24//snakeyaml-1.24.jar +snakeyaml/1.26//snakeyaml-1.26.jar snappy-java/1.1.8//snappy-java-1.1.8.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index b0f8935843281..67bcc7a8ed902 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -102,18 +102,18 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar -jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar +jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.10.0//jackson-core-2.10.0.jar -jackson-databind/2.10.0//jackson-databind-2.10.0.jar -jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar +jackson-core/2.10.5//jackson-core-2.10.5.jar +jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar +jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar -jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar -jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar +jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar +jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar +jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar @@ -235,7 +235,7 @@ shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar -snakeyaml/1.24//snakeyaml-1.24.jar +snakeyaml/1.26//snakeyaml-1.26.jar snappy-java/1.1.8//snappy-java-1.1.8.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar diff --git a/pom.xml b/pom.xml index 364dec688b38b..23eb16a7db472 100644 --- a/pom.xml +++ b/pom.xml @@ -169,7 +169,8 @@ true 1.9.13 - 2.10.0 + 2.10.5 + 2.10.5.1 1.1.8 1.1.2 1.10 @@ -773,7 +774,7 @@ com.fasterxml.jackson.core jackson-databind - ${fasterxml.jackson.version} + ${fasterxml.jackson-databind.version} com.fasterxml.jackson.core From f021f6d3c72e1c84637798b4ddcb7e208fdfbf46 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Wed, 9 Dec 2020 11:18:09 +0800 Subject: [PATCH 0709/1009] [MINOR][ML] Increase Bounded MLOR (without regularization) test error tolerance ### What changes were proposed in this pull request? Improve LogisticRegression test error tolerance ### Why are the changes needed? When we switch BLAS version, some of the tests will fail due to too strict error tolerance in test. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #30587 from WeichenXu123/fix_lor_test. Authored-by: Weichen Xu Signed-off-by: Weichen Xu --- .../LogisticRegressionSuite.scala | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index d0b282db1ece8..d2814b420e017 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -1548,9 +1548,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { val interceptsExpected1 = Vectors.dense( 1.0000152482448372, 3.591773288423673, 5.079685953744937) - checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1) + checkBoundedMLORCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1) assert(model1.interceptVector ~== interceptsExpected1 relTol 0.01) - checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected1) + checkBoundedMLORCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected1) assert(model2.interceptVector ~== interceptsExpected1 relTol 0.01) // Bound constrained optimization with bound on both side. @@ -1585,9 +1585,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { isTransposed = true) val interceptsExpected3 = Vectors.dense(1.0, 2.0, 2.0) - checkCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3) + checkBoundedMLORCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3) assert(model3.interceptVector ~== interceptsExpected3 relTol 0.01) - checkCoefficientsEquivalent(model4.coefficientMatrix, coefficientsExpected3) + checkBoundedMLORCoefficientsEquivalent(model4.coefficientMatrix, coefficientsExpected3) assert(model4.interceptVector ~== interceptsExpected3 relTol 0.01) // Bound constrained optimization with infinite bound on both side. @@ -1621,9 +1621,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { val interceptsExpected5 = Vectors.dense( -2.2231282183460723, 0.3669496747012527, 1.856178543644802) - checkCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5) + checkBoundedMLORCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5) assert(model5.interceptVector ~== interceptsExpected5 relTol 0.01) - checkCoefficientsEquivalent(model6.coefficientMatrix, coefficientsExpected5) + checkBoundedMLORCoefficientsEquivalent(model6.coefficientMatrix, coefficientsExpected5) assert(model6.interceptVector ~== interceptsExpected5 relTol 0.01) } @@ -1719,9 +1719,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { 1.7524631428961193, 1.2292565990448736, 1.3433784431904323, 1.5846063017678864), isTransposed = true) - checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected) + checkBoundedMLORCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected) assert(model1.interceptVector.toArray === Array.fill(3)(0.0)) - checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected) + checkBoundedMLORCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected) assert(model2.interceptVector.toArray === Array.fill(3)(0.0)) } @@ -2953,16 +2953,17 @@ object LogisticRegressionSuite { } /** + * Note: This method is only used in Bounded MLOR (without regularization) test * When no regularization is applied, the multinomial coefficients lack identifiability * because we do not use a pivot class. We can add any constant value to the coefficients * and get the same likelihood. If fitting under bound constrained optimization, we don't * choose the mean centered coefficients like what we do for unbound problems, since they * may out of the bounds. We use this function to check whether two coefficients are equivalent. */ - def checkCoefficientsEquivalent(coefficients1: Matrix, coefficients2: Matrix): Unit = { + def checkBoundedMLORCoefficientsEquivalent(coefficients1: Matrix, coefficients2: Matrix): Unit = { coefficients1.colIter.zip(coefficients2.colIter).foreach { case (col1: Vector, col2: Vector) => (col1.asBreeze - col2.asBreeze).toArray.toSeq.sliding(2).foreach { - case Seq(v1, v2) => assert(v1 ~= v2 absTol 1E-3) + case Seq(v1, v2) => assert(v1 ~= v2 absTol 1E-2) } } } From 29fed23ba16d580e6247b6e70e9c9eef0698aa95 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 9 Dec 2020 05:06:37 +0000 Subject: [PATCH 0710/1009] [SPARK-33703][SQL] Migrate MSCK REPAIR TABLE to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `MSCK REPAIR TABLE` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `MSCK REPAIR TABLE` is not supported for v2 tables. ### Why are the changes needed? The PR makes the resolution consistent behavior consistent. For example, ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("MSCK REPAIR TABLE t") // works fine ``` , but after this PR: ``` sql("MSCK REPAIR TABLE t") org.apache.spark.sql.AnalysisException: t is a temp view. 'MSCK REPAIR TABLE' expects a table; line 1 pos 0 ``` , which is the consistent behavior with other commands. ### Does this PR introduce _any_ user-facing change? After this PR, `MSCK REPAIR TABLE t` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`. ### How was this patch tested? Updated existing tests. Closes #30664 from imback82/repair_table_V2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 5 +++-- .../spark/sql/catalyst/plans/logical/statements.scala | 5 ----- .../spark/sql/catalyst/plans/logical/v2Commands.scala | 7 +++++++ .../spark/sql/catalyst/parser/DDLParserSuite.scala | 2 +- .../sql/catalyst/analysis/ResolveSessionCatalog.scala | 7 ++----- .../execution/datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../spark/sql/connector/DataSourceV2SQLSuite.scala | 9 +-------- 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 89b81ec1d83aa..7787e199d3770 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3547,7 +3547,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a [[RepairTableStatement]]. + * Create a [[RepairTable]]. * * For example: * {{{ @@ -3555,7 +3555,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) { - RepairTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier())) + RepairTable( + UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "MSCK REPAIR TABLE")) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index c4ac8ea8f2e69..b731b8a2fd8fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -383,11 +383,6 @@ case class CreateNamespaceStatement( */ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement -/** - * A REPAIR TABLE statement, as parsed from SQL - */ -case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement - /** * A TRUNCATE TABLE statement, as parsed from SQL */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 1e17c51137a55..e014048f723f5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -735,3 +735,10 @@ case class DropView( ifExists: Boolean) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the MSCK REPAIR TABLE command. + */ +case class RepairTable(child: LogicalPlan) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index d5b27d9ad25cf..947154eae12c8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1952,7 +1952,7 @@ class DDLParserSuite extends AnalysisTest { test("MSCK REPAIR TABLE") { comparePlans( parsePlan("MSCK REPAIR TABLE a.b.c"), - RepairTableStatement(Seq("a", "b", "c"))) + RepairTable(UnresolvedTable(Seq("a", "b", "c"), "MSCK REPAIR TABLE"))) } test("LOAD DATA INTO table") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 657764832a931..817a63aa9aa6e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -408,11 +408,8 @@ class ResolveSessionCatalog( case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) => AnalyzeColumnCommand(ident.asTableIdentifier, columnNames, allColumns) - case RepairTableStatement(tbl) => - val v1TableName = parseV1Table(tbl, "MSCK REPAIR TABLE") - AlterTableRecoverPartitionsCommand( - v1TableName.asTableIdentifier, - "MSCK REPAIR TABLE") + case RepairTable(ResolvedV1TableIdentifier(ident)) => + AlterTableRecoverPartitionsCommand(ident.asTableIdentifier, "MSCK REPAIR TABLE") case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) => LoadDataCommand( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 7e2a485dcb4cc..37a4dcf081be4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -341,6 +341,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat table, pattern.map(_.asInstanceOf[ResolvedPartitionSpec])) :: Nil + case RepairTable(_: ResolvedTable) => + throw new AnalysisException("MSCK REPAIR TABLE is not supported for v2 tables.") + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b1d61658b8a8b..9020065449cef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2011,7 +2011,7 @@ class DataSourceV2SQLSuite val t = "testcat.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo") - testV1Command("MSCK REPAIR TABLE", t) + testNotSupportedV2Command("MSCK REPAIR TABLE", t) } } @@ -2612,13 +2612,6 @@ class DataSourceV2SQLSuite assert(e.message.contains(s"$cmdStr is not supported for v2 tables")) } - private def testV1Command(sqlCommand: String, sqlParams: String): Unit = { - val e = intercept[AnalysisException] { - sql(s"$sqlCommand $sqlParams") - } - assert(e.message.contains(s"$sqlCommand is only supported with v1 tables")) - } - private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = { val errMsg = intercept[AnalysisException] { sql(sqlStatement) From c88eddac3bf860d04bba91fc913f8b2069a94153 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 9 Dec 2020 06:44:10 +0000 Subject: [PATCH 0711/1009] [SPARK-33641][SQL][DOC][FOLLOW-UP] Add migration guide for CHAR VARCHAR types ### What changes were proposed in this pull request? Add migration guide for CHAR VARCHAR types ### Why are the changes needed? for migration ### Does this PR introduce _any_ user-facing change? doc change ### How was this patch tested? passing ci Closes #30654 from yaooqinn/SPARK-33641-F. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 65a769da70aea..164bfd42d6e4a 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -58,6 +58,8 @@ license: | - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`. + - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`. + ## Upgrading from Spark SQL 3.0 to 3.0.1 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. From 48f93af9f3d40de5bf087eb1a06c1b9954b2ad76 Mon Sep 17 00:00:00 2001 From: suqilong Date: Wed, 9 Dec 2020 01:21:13 -0600 Subject: [PATCH 0712/1009] [SPARK-33669] Wrong error message from YARN application state monitor when sc.stop in yarn client mode ### What changes were proposed in this pull request? This change make InterruptedIOException to be treated as InterruptedException when closing YarnClientSchedulerBackend, which doesn't log error with "YARN application has exited unexpectedly xxx" ### Why are the changes needed? For YarnClient mode, when stopping YarnClientSchedulerBackend, it first tries to interrupt Yarn application monitor thread. In MonitorThread.run() it catches InterruptedException to gracefully response to stopping request. But client.monitorApplication method also throws InterruptedIOException when the hadoop rpc call is calling. In this case, MonitorThread will not know it is interrupted, a Yarn App failed is returned with "Failed to contact YARN for application xxxxx; YARN application has exited unexpectedly with state xxxxx" is logged with error level. which confuse user a lot. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? very simple patch, seems no need? Closes #30617 from sqlwindspeaker/yarn-client-interrupt-monitor. Authored-by: suqilong Signed-off-by: Mridul Muralidharan gmail.com> --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 7f791e02a392b..618faef2d58b3 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1069,7 +1069,7 @@ private[spark] class Client( logError(s"Application $appId not found.") cleanupStagingDir() return YarnAppReport(YarnApplicationState.KILLED, FinalApplicationStatus.KILLED, None) - case NonFatal(e) => + case NonFatal(e) if !e.isInstanceOf[InterruptedIOException] => val msg = s"Failed to contact YARN for application $appId." logError(msg, e) // Don't necessarily clean up staging dir because status is unknown diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index cb0de5a0d50b4..8a55e612ce719 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -17,6 +17,8 @@ package org.apache.spark.scheduler.cluster +import java.io.InterruptedIOException + import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.yarn.api.records.YarnApplicationState @@ -121,7 +123,8 @@ private[spark] class YarnClientSchedulerBackend( allowInterrupt = false sc.stop() } catch { - case e: InterruptedException => logInfo("Interrupting monitor thread") + case _: InterruptedException | _: InterruptedIOException => + logInfo("Interrupting monitor thread") } } From a713a7eee3e7f76df6210a6e215ffc0d67ec71f2 Mon Sep 17 00:00:00 2001 From: Dooyoung Hwang Date: Wed, 9 Dec 2020 18:35:24 +0900 Subject: [PATCH 0713/1009] [SPARK-33655][SQL] Improve performance of processing FETCH_PRIOR ### What changes were proposed in this pull request? Currently, when a client requests FETCH_PRIOR to Thriftserver, Thriftserver reiterates from the start position. Because Thriftserver caches a query result with an array when THRIFTSERVER_INCREMENTAL_COLLECT feature is off, FETCH_PRIOR can be implemented without reiterating the result. A trait FeatureIterator is added in order to separate the implementation for iterator and an array. Also, FeatureIterator supports moves cursor with absolute position, which will be useful for the implementation of FETCH_RELATIVE, FETCH_ABSOLUTE. ### Why are the changes needed? For better performance of Thriftserver. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? FetchIteratorSuite Closes #30600 from Dooyoung-Hwang/refactor_with_fetch_iterator. Authored-by: Dooyoung Hwang Signed-off-by: HyukjinKwon --- .../sql/hive/thriftserver/FetchIterator.scala | 107 ++++++++++++++ .../SparkExecuteStatementOperation.scala | 69 ++------- .../thriftserver/FetchIteratorSuite.scala | 134 ++++++++++++++++++ 3 files changed, 256 insertions(+), 54 deletions(-) create mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala create mode 100644 sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala new file mode 100644 index 0000000000000..b9db657952b56 --- /dev/null +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.thriftserver + +private[hive] sealed trait FetchIterator[A] extends Iterator[A] { + /** + * Begin a fetch block, forward from the current position. + * Resets the fetch start offset. + */ + def fetchNext(): Unit + + /** + * Begin a fetch block, moving the iterator back by offset from the start of the previous fetch + * block start. + * Resets the fetch start offset. + * + * @param offset the amount to move a fetch start position toward the prior direction. + */ + def fetchPrior(offset: Long): Unit = fetchAbsolute(getFetchStart - offset) + + /** + * Begin a fetch block, moving the iterator to the given position. + * Resets the fetch start offset. + * + * @param pos index to move a position of iterator. + */ + def fetchAbsolute(pos: Long): Unit + + def getFetchStart: Long + + def getPosition: Long +} + +private[hive] class ArrayFetchIterator[A](src: Array[A]) extends FetchIterator[A] { + private var fetchStart: Long = 0 + + private var position: Long = 0 + + override def fetchNext(): Unit = fetchStart = position + + override def fetchAbsolute(pos: Long): Unit = { + position = (pos max 0) min src.length + fetchStart = position + } + + override def getFetchStart: Long = fetchStart + + override def getPosition: Long = position + + override def hasNext: Boolean = position < src.length + + override def next(): A = { + position += 1 + src(position.toInt - 1) + } +} + +private[hive] class IterableFetchIterator[A](iterable: Iterable[A]) extends FetchIterator[A] { + private var iter: Iterator[A] = iterable.iterator + + private var fetchStart: Long = 0 + + private var position: Long = 0 + + override def fetchNext(): Unit = fetchStart = position + + override def fetchAbsolute(pos: Long): Unit = { + val newPos = pos max 0 + if (newPos < position) resetPosition() + while (position < newPos && hasNext) next() + fetchStart = position + } + + override def getFetchStart: Long = fetchStart + + override def getPosition: Long = position + + override def hasNext: Boolean = iter.hasNext + + override def next(): A = { + position += 1 + iter.next() + } + + private def resetPosition(): Unit = { + if (position != 0) { + iter = iterable.iterator + position = 0 + fetchStart = 0 + } + } +} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index f7a4be9591818..c4ae035e1f836 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -69,13 +69,7 @@ private[hive] class SparkExecuteStatementOperation( private var result: DataFrame = _ - // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST. - // This is only used when `spark.sql.thriftServer.incrementalCollect` is set to `false`. - // In case of `true`, this will be `None` and FETCH_FIRST will trigger re-execution. - private var resultList: Option[Array[SparkRow]] = _ - private var previousFetchEndOffset: Long = 0 - private var previousFetchStartOffset: Long = 0 - private var iter: Iterator[SparkRow] = _ + private var iter: FetchIterator[SparkRow] = _ private var dataTypes: Array[DataType] = _ private lazy val resultSchema: TableSchema = { @@ -148,43 +142,14 @@ private[hive] class SparkExecuteStatementOperation( setHasResultSet(true) val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion, false) - // Reset iter when FETCH_FIRST or FETCH_PRIOR - if ((order.equals(FetchOrientation.FETCH_FIRST) || - order.equals(FetchOrientation.FETCH_PRIOR)) && previousFetchEndOffset != 0) { - // Reset the iterator to the beginning of the query. - iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) { - resultList = None - result.toLocalIterator.asScala - } else { - if (resultList.isEmpty) { - resultList = Some(result.collect()) - } - resultList.get.iterator - } - } - - var resultOffset = { - if (order.equals(FetchOrientation.FETCH_FIRST)) { - logInfo(s"FETCH_FIRST request with $statementId. Resetting to resultOffset=0") - 0 - } else if (order.equals(FetchOrientation.FETCH_PRIOR)) { - // TODO: FETCH_PRIOR should be handled more efficiently than rewinding to beginning and - // reiterating. - val targetOffset = math.max(previousFetchStartOffset - maxRowsL, 0) - logInfo(s"FETCH_PRIOR request with $statementId. Resetting to resultOffset=$targetOffset") - var off = 0 - while (off < targetOffset && iter.hasNext) { - iter.next() - off += 1 - } - off - } else { // FETCH_NEXT - previousFetchEndOffset - } + if (order.equals(FetchOrientation.FETCH_FIRST)) { + iter.fetchAbsolute(0) + } else if (order.equals(FetchOrientation.FETCH_PRIOR)) { + iter.fetchPrior(maxRowsL) + } else { + iter.fetchNext() } - - resultRowSet.setStartOffset(resultOffset) - previousFetchStartOffset = resultOffset + resultRowSet.setStartOffset(iter.getPosition) if (!iter.hasNext) { resultRowSet } else { @@ -206,11 +171,9 @@ private[hive] class SparkExecuteStatementOperation( } resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]]) curRow += 1 - resultOffset += 1 } - previousFetchEndOffset = resultOffset log.info(s"Returning result set with ${curRow} rows from offsets " + - s"[$previousFetchStartOffset, $previousFetchEndOffset) with $statementId") + s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId") resultRowSet } } @@ -326,14 +289,12 @@ private[hive] class SparkExecuteStatementOperation( logDebug(result.queryExecution.toString()) HiveThriftServer2.eventManager.onStatementParsed(statementId, result.queryExecution.toString()) - iter = { - if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) { - resultList = None - result.toLocalIterator.asScala - } else { - resultList = Some(result.collect()) - resultList.get.iterator - } + iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) { + new IterableFetchIterator[SparkRow](new Iterable[SparkRow] { + override def iterator: Iterator[SparkRow] = result.toLocalIterator.asScala + }) + } else { + new ArrayFetchIterator[SparkRow](result.collect()) } dataTypes = result.schema.fields.map(_.dataType) } catch { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala new file mode 100644 index 0000000000000..0fbdb8a9050c8 --- /dev/null +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.thriftserver + +import org.apache.spark.SparkFunSuite + +class FetchIteratorSuite extends SparkFunSuite { + + private def getRows(fetchIter: FetchIterator[Int], maxRowCount: Int): Seq[Int] = { + for (_ <- 0 until maxRowCount if fetchIter.hasNext) yield fetchIter.next() + } + + test("SPARK-33655: Test fetchNext and fetchPrior") { + val testData = 0 until 10 + + def iteratorTest(fetchIter: FetchIterator[Int]): Unit = { + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 0) + assertResult(0 until 2)(getRows(fetchIter, 2)) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 2) + + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 2) + assert(fetchIter.getPosition == 2) + assertResult(2 until 3)(getRows(fetchIter, 1)) + assert(fetchIter.getFetchStart == 2) + assert(fetchIter.getPosition == 3) + + fetchIter.fetchPrior(2) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 0) + assertResult(0 until 3)(getRows(fetchIter, 3)) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 3) + + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 3) + assert(fetchIter.getPosition == 3) + assertResult(3 until 8)(getRows(fetchIter, 5)) + assert(fetchIter.getFetchStart == 3) + assert(fetchIter.getPosition == 8) + + fetchIter.fetchPrior(2) + assert(fetchIter.getFetchStart == 1) + assert(fetchIter.getPosition == 1) + assertResult(1 until 4)(getRows(fetchIter, 3)) + assert(fetchIter.getFetchStart == 1) + assert(fetchIter.getPosition == 4) + + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 4) + assert(fetchIter.getPosition == 4) + assertResult(4 until 10)(getRows(fetchIter, 10)) + assert(fetchIter.getFetchStart == 4) + assert(fetchIter.getPosition == 10) + + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 10) + assert(fetchIter.getPosition == 10) + assertResult(Seq.empty[Int])(getRows(fetchIter, 10)) + assert(fetchIter.getFetchStart == 10) + assert(fetchIter.getPosition == 10) + + fetchIter.fetchPrior(20) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 0) + assertResult(0 until 3)(getRows(fetchIter, 3)) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 3) + } + iteratorTest(new ArrayFetchIterator[Int](testData.toArray)) + iteratorTest(new IterableFetchIterator[Int](testData)) + } + + test("SPARK-33655: Test fetchAbsolute") { + val testData = 0 until 10 + + def iteratorTest(fetchIter: FetchIterator[Int]): Unit = { + fetchIter.fetchNext() + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 0) + assertResult(0 until 5)(getRows(fetchIter, 5)) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 5) + + fetchIter.fetchAbsolute(2) + assert(fetchIter.getFetchStart == 2) + assert(fetchIter.getPosition == 2) + assertResult(2 until 5)(getRows(fetchIter, 3)) + assert(fetchIter.getFetchStart == 2) + assert(fetchIter.getPosition == 5) + + fetchIter.fetchAbsolute(7) + assert(fetchIter.getFetchStart == 7) + assert(fetchIter.getPosition == 7) + assertResult(7 until 8)(getRows(fetchIter, 1)) + assert(fetchIter.getFetchStart == 7) + assert(fetchIter.getPosition == 8) + + fetchIter.fetchAbsolute(20) + assert(fetchIter.getFetchStart == 10) + assert(fetchIter.getPosition == 10) + assertResult(Seq.empty[Int])(getRows(fetchIter, 1)) + assert(fetchIter.getFetchStart == 10) + assert(fetchIter.getPosition == 10) + + fetchIter.fetchAbsolute(0) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 0) + assertResult(0 until 3)(getRows(fetchIter, 3)) + assert(fetchIter.getFetchStart == 0) + assert(fetchIter.getPosition == 3) + } + iteratorTest(new ArrayFetchIterator[Int](testData.toArray)) + iteratorTest(new IterableFetchIterator[Int](testData)) + } +} From 9959d49942d334b03a05c43299f3949a48e5fa17 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 9 Dec 2020 19:47:20 +0900 Subject: [PATCH 0714/1009] [SPARK-33719][DOC] Add make_date/make_timestamp/make_interval into the doc of ANSI Compliance ### What changes were proposed in this pull request? Add make_date/make_timestamp/make_interval into the doc of ANSI Compliance ### Why are the changes needed? Users can know that these functions throw runtime exceptions under ANSI mode if the result is not valid. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Build doc and check it in browser: ![image](https://user-images.githubusercontent.com/1097932/101608930-34a79e80-39bb-11eb-9294-9d9b8c3f6faa.png) Closes #30683 from gengliangwang/improveDoc. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- docs/sql-ref-ansi-compliance.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index c3e17dc22eed0..08ba07aa8de63 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -144,14 +144,18 @@ SELECT * FROM t; The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - `size`: This function returns null for null input. - - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. + - `element_at`: + - This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. + - This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. - - `to_date` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. - - `to_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. - - `unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. - - `to_unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `to_date`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `to_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `unix_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `to_unix_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. + - `make_date`: This function should fail with an exception if the result date is invalid. + - `make_timestamp`: This function should fail with an exception if the result timestamp is invalid. + - `make_interval`: This function should fail with an exception if the result interval is invalid. ### SQL Operators From b5399d4ef1c4e3df9d01a07e76bede41d7255d1c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 9 Dec 2020 20:26:18 +0900 Subject: [PATCH 0715/1009] [SPARK-33071][SPARK-33536][SQL][FOLLOW-UP] Rename deniedMetadataKeys to nonInheritableMetadataKeys in Alias ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/30488. This PR proposes to rename `Alias.deniedMetadataKeys` to `Alias.nonInheritableMetadataKeys` to make it less confusing. ### Why are the changes needed? To make it easier to maintain and read. ### Does this PR introduce _any_ user-facing change? No. This is rather a code cleanup. ### How was this patch tested? Ran the unittests written in the previous PR manually. Jenkins and GitHub Actions in this PR should also test them. Closes #30682 from HyukjinKwon/SPARK-33071-SPARK-33536. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../sql/catalyst/expressions/AliasHelper.scala | 2 +- .../expressions/namedExpressions.scala | 18 +++++++++++------- .../scala/org/apache/spark/sql/Column.scala | 9 +++++---- .../spark/sql/SparkSessionExtensionSuite.scala | 6 +++--- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala index ad6cf959a69c6..1f3f762662252 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala @@ -90,7 +90,7 @@ trait AliasHelper { exprId = a.exprId, qualifier = a.qualifier, explicitMetadata = Some(a.metadata), - deniedMetadataKeys = a.deniedMetadataKeys) + nonInheritableMetadataKeys = a.nonInheritableMetadataKeys) case a: MultiAlias => a.copy(child = trimAliases(a.child)) case other => trimAliases(other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 22aabd3c6b30b..badc2ecc9cb28 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -143,14 +143,14 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn * fully qualified way. Consider the examples tableName.name, subQueryAlias.name. * tableName and subQueryAlias are possible qualifiers. * @param explicitMetadata Explicit metadata associated with this alias that overwrites child's. - * @param deniedMetadataKeys Keys of metadata entries that are supposed to be removed when - * inheriting the metadata from the child. + * @param nonInheritableMetadataKeys Keys of metadata entries that are supposed to be removed when + * inheriting the metadata from the child. */ case class Alias(child: Expression, name: String)( val exprId: ExprId = NamedExpression.newExprId, val qualifier: Seq[String] = Seq.empty, val explicitMetadata: Option[Metadata] = None, - val deniedMetadataKeys: Seq[String] = Seq.empty) + val nonInheritableMetadataKeys: Seq[String] = Seq.empty) extends UnaryExpression with NamedExpression { // Alias(Generator, xx) need to be transformed into Generate(generator, ...) @@ -172,7 +172,7 @@ case class Alias(child: Expression, name: String)( child match { case named: NamedExpression => val builder = new MetadataBuilder().withMetadata(named.metadata) - deniedMetadataKeys.foreach(builder.remove) + nonInheritableMetadataKeys.foreach(builder.remove) builder.build() case _ => Metadata.empty @@ -181,7 +181,10 @@ case class Alias(child: Expression, name: String)( } def newInstance(): NamedExpression = - Alias(child, name)(qualifier = qualifier, explicitMetadata = explicitMetadata) + Alias(child, name)( + qualifier = qualifier, + explicitMetadata = explicitMetadata, + nonInheritableMetadataKeys = nonInheritableMetadataKeys) override def toAttribute: Attribute = { if (resolved) { @@ -201,7 +204,7 @@ case class Alias(child: Expression, name: String)( override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix" override protected final def otherCopyArgs: Seq[AnyRef] = { - exprId :: qualifier :: explicitMetadata :: deniedMetadataKeys :: Nil + exprId :: qualifier :: explicitMetadata :: nonInheritableMetadataKeys :: Nil } override def hashCode(): Int = { @@ -212,7 +215,8 @@ case class Alias(child: Expression, name: String)( override def equals(other: Any): Boolean = other match { case a: Alias => name == a.name && exprId == a.exprId && child == a.child && qualifier == a.qualifier && - explicitMetadata == a.explicitMetadata && deniedMetadataKeys == a.deniedMetadataKeys + explicitMetadata == a.explicitMetadata && + nonInheritableMetadataKeys == a.nonInheritableMetadataKeys case _ => false } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 4ef23d7e31c59..539ef8dfe2665 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1164,10 +1164,11 @@ class Column(val expr: Expression) extends Logging { * @since 2.0.0 */ def name(alias: String): Column = withExpr { - // SPARK-33536: The Alias is no longer a column reference after converting to an attribute. - // These denied metadata keys are used to strip the column reference related metadata for - // the Alias. So it won't be caught as a column reference in DetectAmbiguousSelfJoin. - Alias(expr, alias)(deniedMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)) + // SPARK-33536: an alias is no longer a column reference. Therefore, + // we should not inherit the column reference related metadata in an alias + // so that it is not caught as a column reference in DetectAmbiguousSelfJoin. + Alias(expr, alias)( + nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 5e1c6ba92803d..7c19f98b762f4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -577,8 +577,8 @@ class ColumnarAlias(child: ColumnarExpression, name: String)( override val exprId: ExprId = NamedExpression.newExprId, override val qualifier: Seq[String] = Seq.empty, override val explicitMetadata: Option[Metadata] = None, - override val deniedMetadataKeys: Seq[String] = Seq.empty) - extends Alias(child, name)(exprId, qualifier, explicitMetadata, deniedMetadataKeys) + override val nonInheritableMetadataKeys: Seq[String] = Seq.empty) + extends Alias(child, name)(exprId, qualifier, explicitMetadata, nonInheritableMetadataKeys) with ColumnarExpression { override def columnarEval(batch: ColumnarBatch): Any = child.columnarEval(batch) @@ -715,7 +715,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] { def replaceWithColumnarExpression(exp: Expression): ColumnarExpression = exp match { case a: Alias => new ColumnarAlias(replaceWithColumnarExpression(a.child), - a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.deniedMetadataKeys) + a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys) case att: AttributeReference => new ColumnarAttributeReference(att.name, att.dataType, att.nullable, att.metadata)(att.exprId, att.qualifier) From fa9ce1d4e893e3a32bc05e4d95241d32710deb54 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 9 Dec 2020 11:42:54 -0800 Subject: [PATCH 0716/1009] [SPARK-33722][SQL] Handle DELETE in ReplaceNullWithFalseInPredicate ### What changes were proposed in this pull request? This PR adds `DeleteFromTable` to supported plans in `ReplaceNullWithFalseInPredicate`. ### Why are the changes needed? This change allows Spark to optimize delete conditions like we optimize filters. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR extends the existing test cases to also cover `DeleteFromTable`. Closes #30688 from aokolnychyi/spark-33722. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../ReplaceNullWithFalseInPredicate.scala | 3 ++- ...ReplaceNullWithFalseInPredicateSuite.scala | 23 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index ef3de4738c75c..698ece4f9e69f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If} import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFilter, Or} import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral -import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.BooleanType import org.apache.spark.util.Utils @@ -53,6 +53,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond))) + case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond))) case p: LogicalPlan => p transformExpressions { case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred)) case cw @ CaseWhen(branches, _) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index eb52c5b74772c..6fc31c94e47eb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, IntegerType} @@ -48,6 +48,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { test("replace null inside filter and join conditions") { testFilter(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) testJoin(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) + testDelete(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) } test("Not expected type - replaceNullWithFalse") { @@ -64,6 +65,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { Literal(null, BooleanType)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace nulls in nested expressions in branches of If") { @@ -73,6 +75,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { UnresolvedAttribute("b") && Literal(null, BooleanType)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in elseValue of CaseWhen") { @@ -83,6 +86,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val expectedCond = CaseWhen(branches, FalseLiteral) testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) + testDelete(originalCond, expectedCond) } test("replace null in branch values of CaseWhen") { @@ -92,6 +96,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val originalCond = CaseWhen(branches, Literal(null)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside CaseWhen") { @@ -108,6 +113,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) + testDelete(originalCond, expectedCond) } test("replace null in complex CaseWhen expressions") { @@ -127,6 +133,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) + testDelete(originalCond, expectedCond) } test("replace null in Or") { @@ -134,12 +141,14 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val expectedCond = UnresolvedAttribute("b") testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) + testDelete(originalCond, expectedCond) } test("replace null in And") { val originalCond = And(UnresolvedAttribute("b"), Literal(null)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace nulls in nested And/Or expressions") { @@ -148,6 +157,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { Or(Literal(null), And(Literal(null), And(UnresolvedAttribute("b"), Literal(null))))) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in And inside branches of If") { @@ -157,6 +167,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { And(UnresolvedAttribute("b"), Literal(null, BooleanType))) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside And") { @@ -168,6 +179,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { And(FalseLiteral, UnresolvedAttribute("b")))) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside another If") { @@ -177,6 +189,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { Literal(null)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("replace null in CaseWhen inside another CaseWhen") { @@ -184,6 +197,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val originalCond = CaseWhen(Seq(nestedCaseWhen -> TrueLiteral), Literal(null)) testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) } test("inability to replace null in non-boolean branches of If") { @@ -196,6 +210,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { FalseLiteral) testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) + testDelete(originalCond = condition, expectedCond = condition) } test("inability to replace null in non-boolean values of CaseWhen") { @@ -210,6 +225,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val condition = CaseWhen(branches) testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) + testDelete(originalCond = condition, expectedCond = condition) } test("inability to replace null in non-boolean branches of If inside another If") { @@ -222,6 +238,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { FalseLiteral) testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) + testDelete(originalCond = condition, expectedCond = condition) } test("replace null in If used as a join condition") { @@ -353,6 +370,10 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { test((rel, exp) => rel.select(exp), originalExpr, expectedExpr) } + private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) + } + private def testHigherOrderFunc( argument: Expression, createExpr: (Expression, Expression) => Expression, From 667f64f447a75141b091c361acebdc363bfe9288 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 9 Dec 2020 14:26:53 -0800 Subject: [PATCH 0717/1009] [SPARK-33725][BUILD] Upgrade snappy-java to 1.1.8.2 ### What changes were proposed in this pull request? This upgrades snappy-java to 1.1.8.2. ### Why are the changes needed? Minor version upgrade that includes: - [Fixed](https://github.com/xerial/snappy-java/pull/265) an initialization issue when using a recent Mac OS X version - Support Apple Silicon (M1, Mac-aarch64) - Fixed the pure-java Snappy fallback logic when no native library for your platform is found. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30690 from viirya/upgrade-snappy. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 3a54dbd6232e3..b731c643aabe7 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -221,7 +221,7 @@ shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.26//snakeyaml-1.26.jar -snappy-java/1.1.8//snappy-java-1.1.8.jar +snappy-java/1.1.8.2//snappy-java-1.1.8.2.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 67bcc7a8ed902..84b44342280a5 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -236,7 +236,7 @@ shims/0.9.0//shims-0.9.0.jar slf4j-api/1.7.30//slf4j-api-1.7.30.jar slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar snakeyaml/1.26//snakeyaml-1.26.jar -snappy-java/1.1.8//snappy-java-1.1.8.jar +snappy-java/1.1.8.2//snappy-java-1.1.8.2.jar spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar diff --git a/pom.xml b/pom.xml index 23eb16a7db472..f449bf7928ecc 100644 --- a/pom.xml +++ b/pom.xml @@ -171,7 +171,7 @@ 1.9.13 2.10.5 2.10.5.1 - 1.1.8 + 1.1.8.2 1.1.2 1.10 1.20 From 991b7977b5006e1e0d02b7d67a3e0fc50f5a9f66 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 10 Dec 2020 11:35:55 +0900 Subject: [PATCH 0718/1009] [SPARK-33727][K8S] Fall back from gnupg.net to openpgp.org ### What changes were proposed in this pull request? While building R docker image if we can't fetch the key from gnupg.net fall back to openpgp.org ### Why are the changes needed? gnupg.net key servers are flaky and sometimes fail to resolve or return keys. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tried to add key on my desktop, it failed, then tried to add key with openpgp.org and it succeed. Closes #30696 from holdenk/SPARK-33727-gnupg-server-is-flaky. Authored-by: Holden Karau Signed-off-by: HyukjinKwon --- .../docker/src/main/dockerfiles/spark/bindings/R/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile index bd645e40677d0..f63f2d0d58e22 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile @@ -29,7 +29,7 @@ RUN mkdir ${SPARK_HOME}/R RUN \ echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran35/" >> /etc/apt/sources.list && \ apt install -y gnupg && \ - apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' && \ + (apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' || apt-key adv --keyserver keys.openpgp.org --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF') && \ apt-get update && \ apt install -y -t buster-cran35 r-base r-base-dev && \ rm -rf /var/cache/apt/* From 1c7f5f1ac7ecf0390410d2da6f3b1a615a5a71cc Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 9 Dec 2020 20:42:10 -0800 Subject: [PATCH 0719/1009] [SPARK-33724][K8S] Add decom script as a configuration param ### What changes were proposed in this pull request? Makes the location of the decommission script used in Kubernetes for graceful shutdown configurable. ### Why are the changes needed? Some environments don't use the Spark image builder and instead mount the decompressed Spark distro. In those envs configuring the location of the decommissioning script is required. ### Does this PR introduce _any_ user-facing change? New configuration parameter. ### How was this patch tested? Existing decommissioning integration test. Closes #30694 from holdenk/SPARK-33724-allow-decommissioning-script-location-to-be-configured. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/deploy/k8s/Config.scala | 8 ++++++++ .../deploy/k8s/features/BasicExecutorFeatureStep.scala | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index 41194f3a2676f..c28d6fd405ae1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -24,6 +24,14 @@ import org.apache.spark.internal.config.ConfigBuilder private[spark] object Config extends Logging { + val DECOMMISSION_SCRIPT = + ConfigBuilder("spark.kubernetes.decommission.script") + .doc("The location of the script to use for graceful decommissioning") + .version("3.2.0") + .stringConf + .createWithDefault("/opt/decom.sh") + + val KUBERNETES_CONTEXT = ConfigBuilder("spark.kubernetes.context") .doc("The desired context from your K8S config file used to configure the K8S " + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index 250dd8238d9ea..4398f545917bf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -230,7 +230,7 @@ private[spark] class BasicExecutorFeatureStep( new ContainerBuilder(containerWithLimitCores).withNewLifecycle() .withNewPreStop() .withNewExec() - .addToCommand("/opt/decom.sh") + .addToCommand(kubernetesConf.get(DECOMMISSION_SCRIPT)) .endExec() .endPreStop() .endLifecycle() From af37c7f4115a2edf46a304f90db0aec4d3edde16 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 10 Dec 2020 04:54:52 +0000 Subject: [PATCH 0720/1009] [SPARK-33558][SQL][TESTS] Unify v1 and v2 ALTER TABLE .. ADD PARTITION tests ### What changes were proposed in this pull request? 1. Move the `ALTER TABLE .. ADD PARTITION` parsing tests to `AlterTableAddPartitionParserSuite` 2. Place v1 tests for `ALTER TABLE .. ADD PARTITION` from `DDLSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to the common trait `AlterTableAddPartitionSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS. ### Why are the changes needed? - The unification will allow to run common `ALTER TABLE .. ADD PARTITION` tests for both DSv1 and Hive DSv1, DSv2 - We can detect missing features and differences between DSv1 and DSv2 implementations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite" ``` Closes #30685 from MaxGekk/unify-alter-table-add-partition-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/ResolvePartitionSpec.scala | 2 +- .../sql/catalyst/parser/DDLParserSuite.scala | 27 --- .../AlterTablePartitionV2SQLSuite.scala | 152 ++------------ .../AlterTableAddPartitionParserSuite.scala | 51 +++++ .../AlterTableAddPartitionSuiteBase.scala | 187 ++++++++++++++++++ .../sql/execution/command/DDLSuite.scala | 61 ------ .../v1/AlterTableAddPartitionSuite.scala | 64 ++++++ .../v2/AlterTableAddPartitionSuite.scala | 89 +++++++++ .../sql/hive/execution/HiveDDLSuite.scala | 4 - .../command/AlterTableAddPartitionSuite.scala | 46 +++++ 10 files changed, 450 insertions(+), 233 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index feb05d3b6926b..099ac6172c9e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -81,7 +81,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { resolvedPartitionSpec } - private def convertToPartIdent( + private[sql] def convertToPartIdent( partitionSpec: TablePartitionSpec, schema: Seq[StructField]): InternalRow = { val partValues = schema.map { part => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 947154eae12c8..e194e7112b1d4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2042,33 +2042,6 @@ class DDLParserSuite extends AnalysisTest { AlterTableRecoverPartitionsStatement(Seq("a", "b", "c"))) } - test("alter table: add partition") { - val sql1 = - """ - |ALTER TABLE a.b.c ADD IF NOT EXISTS PARTITION - |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION - |(dt='2009-09-09', country='uk') - """.stripMargin - val sql2 = "ALTER TABLE a.b.c ADD PARTITION (dt='2008-08-08') LOCATION 'loc'" - - val parsed1 = parsePlan(sql1) - val parsed2 = parsePlan(sql2) - - val expected1 = AlterTableAddPartition( - UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), - Seq( - UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")), - UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)), - ifNotExists = true) - val expected2 = AlterTableAddPartition( - UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), - Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))), - ifNotExists = false) - - comparePlans(parsed1, expected1) - comparePlans(parsed2, expected2) - } - test("alter view: add partition (not supported)") { assertUnsupported( """ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 45d47c6d8681c..570976965ec7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -17,16 +17,12 @@ package org.apache.spark.sql.connector -import java.time.{LocalDate, LocalDateTime} - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException} -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.unsafe.types.UTF8String class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { @@ -45,66 +41,6 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { } } - test("ALTER TABLE ADD PARTITION") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - - val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) - assert(partMetadata.containsKey("location")) - assert(partMetadata.get("location") == "loc") - } - } - - test("ALTER TABLE ADD PARTITIONS") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql( - s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc' PARTITION (id=2) LOCATION 'loc1'") - - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2)))) - - val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1))) - assert(partMetadata.containsKey("location")) - assert(partMetadata.get("location") == "loc") - - val partMetadata1 = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(2))) - assert(partMetadata1.containsKey("location")) - assert(partMetadata1.get("location") == "loc1") - } - } - - test("ALTER TABLE ADD PARTITIONS: partition already exists") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql( - s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") - - assertThrows[PartitionsAlreadyExistException]( - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'")) - - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable] - assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - - spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2)))) - } - } - test("ALTER TABLE RENAME PARTITION") { val t = "testcat.ns1.ns2.tbl" withTable(t) { @@ -173,7 +109,7 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { val errMsg = intercept[AnalysisException] { - spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + spark.sql(s"ALTER TABLE $t DROP PARTITION (ID=1)") }.getMessage assert(errMsg.contains(s"ID is not a valid partition column in table $t")) } @@ -192,73 +128,14 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { } } - test("SPARK-33521: universal type conversions of partition values") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - sql(s""" - |CREATE TABLE $t ( - | part0 tinyint, - | part1 smallint, - | part2 int, - | part3 bigint, - | part4 float, - | part5 double, - | part6 string, - | part7 boolean, - | part8 date, - | part9 timestamp - |) USING foo - |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9) - |""".stripMargin) - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - .asPartitionable - val expectedPartition = InternalRow.fromSeq(Seq[Any]( - -1, // tinyint - 0, // smallint - 1, // int - 2, // bigint - 3.14F, // float - 3.14D, // double - UTF8String.fromString("abc"), // string - true, // boolean - LocalDate.parse("2020-11-23").toEpochDay, - DateTimeUtils.instantToMicros( - LocalDateTime.parse("2020-11-23T22:13:10.123456").atZone(DateTimeTestUtils.LA).toInstant) - )) - assert(!partTable.partitionExists(expectedPartition)) - val partSpec = """ - | part0 = -1, - | part1 = 0, - | part2 = 1, - | part3 = 2, - | part4 = 3.14, - | part5 = 3.14, - | part6 = 'abc', - | part7 = true, - | part8 = '2020-11-23', - | part9 = '2020-11-23T22:13:10.123456' - |""".stripMargin - sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'") - assert(partTable.partitionExists(expectedPartition)) - sql(s" ALTER TABLE $t DROP PARTITION ($partSpec)") - assert(!partTable.partitionExists(expectedPartition)) - } - } - - test("SPARK-33650: add/drop partition into a table which doesn't support partition management") { + test("SPARK-33650: drop partition into a table which doesn't support partition management") { val t = "testcat.ns1.ns2.tbl" withTable(t) { spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _") - Seq( - s"ALTER TABLE $t ADD PARTITION (id=1)", - s"ALTER TABLE $t DROP PARTITION (id=1)" - ).foreach { alterTable => - val errMsg = intercept[AnalysisException] { - spark.sql(alterTable) - }.getMessage - assert(errMsg.contains(s"Table $t can not alter partitions")) - } + val errMsg = intercept[AnalysisException] { + spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") + }.getMessage + assert(errMsg.contains(s"Table $t can not alter partitions")) } } @@ -269,16 +146,11 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { |CREATE TABLE $t (id bigint, part0 int, part1 string) |USING foo |PARTITIONED BY (part0, part1)""".stripMargin) - Seq( - s"ALTER TABLE $t ADD PARTITION (part0 = 1)", - s"ALTER TABLE $t DROP PARTITION (part0 = 1)" - ).foreach { alterTable => - val errMsg = intercept[AnalysisException] { - sql(alterTable) - }.getMessage - assert(errMsg.contains("Partition spec is invalid. " + - "The spec (part0) must match the partition spec (part0, part1)")) - } + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)") + }.getMessage + assert(errMsg.contains("Partition spec is invalid. " + + "The spec (part0) must match the partition spec (part0, part1)")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala new file mode 100644 index 0000000000000..5ebca8f651604 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.plans.logical.AlterTableAddPartition +import org.apache.spark.sql.test.SharedSparkSession + +class AlterTableAddPartitionParserSuite extends AnalysisTest with SharedSparkSession { + test("add partition if not exists") { + val sql = """ + |ALTER TABLE a.b.c ADD IF NOT EXISTS PARTITION + |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION + |(dt='2009-09-09', country='uk')""".stripMargin + val parsed = parsePlan(sql) + val expected = AlterTableAddPartition( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), + Seq( + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")), + UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)), + ifNotExists = true) + comparePlans(parsed, expected) + } + + test("add partition") { + val sql = "ALTER TABLE a.b.c ADD PARTITION (dt='2008-08-08') LOCATION 'loc'" + val parsed = parsePlan(sql) + val expected = AlterTableAddPartition( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."), + Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))), + ifNotExists = false) + + comparePlans(parsed, expected) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala new file mode 100644 index 0000000000000..0cf0b395f139b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils + +trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { + protected def version: String + protected def catalog: String + protected def defaultUsing: String + + override def test(testName: String, testTags: Tag*)(testFun: => Any) + (implicit pos: Position): Unit = { + super.test(s"ALTER TABLE .. ADD PARTITION $version: " + testName, testTags: _*)(testFun) + } + + protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = { + val partitions = sql(s"SHOW PARTITIONS $t") + .collect() + .toSet + .map((row: Row) => row.getString(0)) + .map(PartitioningUtils.parsePathFragment) + assert(partitions === expected.toSet) + } + protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit + + protected def withNsTable(ns: String, tableName: String, cat: String = catalog) + (f: String => Unit): Unit = { + val nsCat = s"$cat.$ns" + withNamespace(nsCat) { + sql(s"CREATE NAMESPACE $nsCat") + val t = s"$nsCat.$tableName" + withTable(t) { + f(t) + } + } + } + + test("one partition") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + Seq("", "IF NOT EXISTS").foreach { exists => + sql(s"ALTER TABLE $t ADD $exists PARTITION (id=1) LOCATION 'loc'") + + checkPartitions(t, Map("id" -> "1")) + checkLocation(t, Map("id" -> "1"), "loc") + } + } + } + + test("multiple partitions") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + Seq("", "IF NOT EXISTS").foreach { exists => + sql(s""" + |ALTER TABLE $t ADD $exists + |PARTITION (id=1) LOCATION 'loc' + |PARTITION (id=2) LOCATION 'loc1'""".stripMargin) + + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + checkLocation(t, Map("id" -> "1"), "loc") + checkLocation(t, Map("id" -> "2"), "loc1") + } + } + } + + test("multi-part partition") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)") + Seq("", "IF NOT EXISTS").foreach { exists => + sql(s"ALTER TABLE $t ADD $exists PARTITION (a=2, b='abc')") + + checkPartitions(t, Map("a" -> "2", "b" -> "abc")) + } + } + } + + test("table to alter does not exist") { + withNsTable("ns", "does_not_exist") { t => + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (a='4', b='9')") + }.getMessage + assert(errMsg.contains("Table not found")) + } + } + + test("case sensitivity in resolving partition specs") { + withNsTable("ns", "tbl") { t => + spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val errMsg = intercept[AnalysisException] { + spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains("ID is not a valid partition column")) + } + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + checkPartitions(t, Map("id" -> "1")) + checkLocation(t, Map("id" -> "1"), "loc1") + } + } + } + + test("SPARK-33521: universal type conversions of partition values") { + withNsTable("ns", "tbl") { t => + sql(s""" + |CREATE TABLE $t ( + | id int, + | part0 tinyint, + | part1 smallint, + | part2 int, + | part3 bigint, + | part4 float, + | part5 double, + | part6 string, + | part7 boolean, + | part8 date, + | part9 timestamp + |) $defaultUsing + |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9) + |""".stripMargin) + val partSpec = """ + | part0 = -1, + | part1 = 0, + | part2 = 1, + | part3 = 2, + | part4 = 3.14, + | part5 = 3.14, + | part6 = 'abc', + | part7 = true, + | part8 = '2020-11-23', + | part9 = '2020-11-23 22:13:10.123456' + |""".stripMargin + sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'") + val expected = Map( + "part0" -> "-1", + "part1" -> "0", + "part2" -> "1", + "part3" -> "2", + "part4" -> "3.14", + "part5" -> "3.14", + "part6" -> "abc", + "part7" -> "true", + "part8" -> "2020-11-23", + "part9" -> "2020-11-23 22:13:10.123456") + checkPartitions(t, expected) + sql(s"ALTER TABLE $t DROP PARTITION ($partSpec)") + checkPartitions(t) // no partitions + } + } + + test("SPARK-33676: not fully specified partition spec") { + withNsTable("ns", "tbl") { t => + sql(s""" + |CREATE TABLE $t (id bigint, part0 int, part1 string) + |$defaultUsing + |PARTITIONED BY (part0, part1)""".stripMargin) + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t ADD PARTITION (part0 = 1)") + }.getMessage + assert(errMsg.contains("Partition spec is invalid. " + + "The spec (part0) must match the partition spec (part0, part1)")) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 82d3e2dfe2212..05e0f4f4a538c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { testChangeColumn(isDatasourceTable = true) } - test("alter table: add partition (datasource table)") { - testAddPartitions(isDatasourceTable = true) - } - test("alter table: drop partition (datasource table)") { testDropPartitions(isDatasourceTable = true) } @@ -1621,63 +1617,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - protected def testAddPartitions(isDatasourceTable: Boolean): Unit = { - if (!isUsingHiveMetastore) { - assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") - } - val catalog = spark.sessionState.catalog - val tableIdent = TableIdentifier("tab1", Some("dbx")) - val part1 = Map("a" -> "1", "b" -> "5") - val part2 = Map("a" -> "2", "b" -> "6") - val part3 = Map("a" -> "3", "b" -> "7") - val part4 = Map("a" -> "4", "b" -> "8") - val part5 = Map("a" -> "9", "b" -> "9") - createDatabase(catalog, "dbx") - createTable(catalog, tableIdent, isDatasourceTable) - createTablePartition(catalog, part1, tableIdent) - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1)) - - // basic add partition - sql("ALTER TABLE dbx.tab1 ADD IF NOT EXISTS " + - "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3)) - assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined) - - val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri - assert(tableLocation.isDefined) - val partitionLocation = makeQualifiedPath( - new Path(tableLocation.get.toString, "paris").toString) - - assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option(partitionLocation)) - assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined) - - // add partitions without explicitly specifying database - catalog.setCurrentDatabase("dbx") - sql("ALTER TABLE tab1 ADD IF NOT EXISTS PARTITION (a='4', b='8')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(part1, part2, part3, part4)) - - // table to alter does not exist - intercept[AnalysisException] { - sql("ALTER TABLE does_not_exist ADD IF NOT EXISTS PARTITION (a='4', b='9')") - } - - // partition to add already exists - intercept[AnalysisException] { - sql("ALTER TABLE tab1 ADD PARTITION (a='4', b='8')") - } - - // partition to add already exists when using IF NOT EXISTS - sql("ALTER TABLE tab1 ADD IF NOT EXISTS PARTITION (a='4', b='8')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(part1, part2, part3, part4)) - - // partition spec in ADD PARTITION should be case insensitive by default - sql("ALTER TABLE tab1 ADD PARTITION (A='9', B='9')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(part1, part2, part3, part4, part5)) - } - protected def testDropPartitions(isDatasourceTable: Boolean): Unit = { if (!isUsingHiveMetastore) { assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala new file mode 100644 index 0000000000000..295ce1d3da13f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession + +trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase { + override def version: String = "V1" + override def catalog: String = CatalogManager.SESSION_CATALOG_NAME + override def defaultUsing: String = "USING parquet" + + override protected def checkLocation( + t: String, + spec: TablePartitionSpec, + expected: String): Unit = { + val tablePath = t.split('.') + val tableName = tablePath.last + val ns = tablePath.init.mkString(".") + val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ") + val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)") + .select("information") + .first().getString(0) + val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head + assert(location.endsWith(expected)) + } +} + +class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession { + test("partition already exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") + + val errMsg = intercept[PartitionsAlreadyExistException] { + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains("The following partitions already exists")) + + sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala new file mode 100644 index 0000000000000..b15235d17671a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvePartitionSpec} +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession + +class AlterTableAddPartitionSuite + extends command.AlterTableAddPartitionSuiteBase + with SharedSparkSession { + + import CatalogV2Implicits._ + + override def version: String = "V2" + override def catalog: String = "test_catalog" + override def defaultUsing: String = "USING _" + + override def sparkConf: SparkConf = super.sparkConf + .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) + .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) + + override protected def checkLocation( + t: String, + spec: TablePartitionSpec, + expected: String): Unit = { + val tablePath = t.split('.') + val catalogName = tablePath.head + val namespaceWithTable = tablePath.tail + val namespaces = namespaceWithTable.init + val tableName = namespaceWithTable.last + val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName) + val partTable = catalogPlugin.asTableCatalog + .loadTable(Identifier.of(namespaces, tableName)) + .asInstanceOf[InMemoryPartitionTable] + val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields) + val partMetadata = partTable.loadPartitionMetadata(ident) + + assert(partMetadata.containsKey("location")) + assert(partMetadata.get("location") === expected) + } + + test("partition already exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") + + val errMsg = intercept[PartitionsAlreadyExistException] { + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains("The following partitions already exists")) + + sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + } + } + + test("SPARK-33650: add partition into a table which doesn't support partition management") { + withNsTable("ns", "tbl", s"non_part_$catalog") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t ADD PARTITION (id=1)") + }.getMessage + assert(errMsg.contains(s"Table $t can not alter partitions")) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index d6a4d76386889..070fdf55deb38 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -166,10 +166,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA testDropPartitions(isDatasourceTable = false) } - test("alter table: add partition") { - testAddPartitions(isDatasourceTable = false) - } - test("drop table") { testDropTable(isDatasourceTable = false) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala new file mode 100644 index 0000000000000..ef0ec8d9bd69f --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class AlterTableAddPartitionSuite + extends v1.AlterTableAddPartitionSuiteBase + with TestHiveSingleton { + override def version: String = "Hive V1" + override def defaultUsing: String = "USING HIVE" + + test("partition already exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") + + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains("already exists")) + + sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + } + } +} From b112e2bfa619d028004cbc7fb8ec1363689729a7 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Thu, 10 Dec 2020 05:18:34 +0000 Subject: [PATCH 0721/1009] [SPARK-33714][SQL] Migrate ALTER VIEW ... SET/UNSET TBLPROPERTIES commands to use UnresolvedView to resolve the identifier ### What changes were proposed in this pull request? This PR adds `allowTemp` flag to `UnresolvedView` so that `Analyzer` can check whether to resolve temp views or not. This PR also migrates `ALTER VIEW ... SET/UNSET TBLPROPERTIES` to use `UnresolvedView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). ### Why are the changes needed? To use `UnresolvedView` for view resolution. One benefit is that the exception message is better for `ALTER VIEW ... SET/UNSET TBLPROPERTIES`. Before, if a temp view is passed, you will just get `NoSuchTableException` with `Table or view 'tmpView' not found in database 'default'`. But with this PR, you will get more description exception message: `tmpView is a temp view. ALTER VIEW ... SET TBLPROPERTIES expects a permanent view`. ### Does this PR introduce _any_ user-facing change? The exception message changes as describe above. ### How was this patch tested? Updated existing tests. Closes #30676 from imback82/alter_view_set_unset_properties. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 7 +++-- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../catalyst/analysis/ResolveCatalogs.scala | 12 -------- .../catalyst/analysis/v2ResolutionPlans.scala | 1 + .../sql/catalyst/parser/AstBuilder.scala | 26 +++++++++++++---- .../catalyst/plans/logical/statements.scala | 15 ---------- .../catalyst/plans/logical/v2Commands.scala | 19 +++++++++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 28 +++++++++++-------- .../analysis/ResolveSessionCatalog.scala | 9 +++--- .../sql/connector/DataSourceV2SQLSuite.scala | 28 +++++++++++++++---- .../spark/sql/execution/SQLViewSuite.scala | 8 ++++-- .../command/PlanResolutionSuite.scala | 8 +++--- .../sql/hive/execution/HiveDDLSuite.scala | 10 +++++-- 13 files changed, 108 insertions(+), 65 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6b0cf4be7de74..7d1edbae9cea5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -889,8 +889,11 @@ class Analyzer(override val catalogManager: CatalogManager) u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table") } u - case u @ UnresolvedView(ident, _, _) => + case u @ UnresolvedView(ident, cmd, allowTemp, _) => lookupTempView(ident).map { _ => + if (!allowTemp) { + u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a permanent view.") + } ResolvedView(ident.asIdentifier, isTemp = true) } .getOrElse(u) @@ -1118,7 +1121,7 @@ class Analyzer(override val catalogManager: CatalogManager) case table => table }.getOrElse(u) - case u @ UnresolvedView(identifier, cmd, relationTypeMismatchHint) => + case u @ UnresolvedView(identifier, cmd, _, relationTypeMismatchHint) => lookupTableOrView(identifier).map { case v: ResolvedView => v case _ => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 39cdea2bd4d2a..119e17196a454 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -104,7 +104,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case u: UnresolvedTable => u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") - case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _) => + case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _, _) => u.failAnalysis( s"Cannot specify catalog `${catalog.name}` for view ${ident.quoted} " + "because view support in v2 catalog has not been implemented yet. " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index b4dfee1330036..14dccd86d2240 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -121,18 +121,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc)) createAlterTable(nameParts, catalog, tbl, changes) - case AlterViewSetPropertiesStatement( - NonSessionCatalogAndTable(catalog, tbl), props) => - throw new AnalysisException( - s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " + - s"because view support in catalog has not been implemented yet") - - case AlterViewUnsetPropertiesStatement( - NonSessionCatalogAndTable(catalog, tbl), keys, ifExists) => - throw new AnalysisException( - s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " + - s"because view support in catalog has not been implemented yet") - case c @ CreateTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) => assertNoNullTypeInSchema(c.tableSchema) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 2737b5d58bf42..940fd6085dc98 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -52,6 +52,7 @@ case class UnresolvedTable( case class UnresolvedView( multipartIdentifier: Seq[String], commandName: String, + allowTemp: Boolean = true, relationTypeMismatchHint: Option[String] = None) extends LeafNode { override lazy val resolved: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7787e199d3770..3c06a7665a0e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3161,8 +3161,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg DropView( UnresolvedView( visitMultipartIdentifier(ctx.multipartIdentifier()), - "DROP VIEW", - Some("Please use DROP TABLE instead.")), + commandName = "DROP VIEW", + allowTemp = true, + relationTypeMismatchHint = Some("Please use DROP TABLE instead.")), ctx.EXISTS != null) } @@ -3399,7 +3400,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Parse [[AlterViewSetPropertiesStatement]] or [[AlterTableSetPropertiesStatement]] commands. + * Parse [[AlterViewSetProperties]] or [[AlterTableSetPropertiesStatement]] commands. * * For example: * {{{ @@ -3413,14 +3414,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val properties = visitPropertyKeyValues(ctx.tablePropertyList) val cleanedTableProperties = cleanTableProperties(ctx, properties) if (ctx.VIEW != null) { - AlterViewSetPropertiesStatement(identifier, cleanedTableProperties) + AlterViewSetProperties( + UnresolvedView( + identifier, + commandName = "ALTER VIEW ... SET TBLPROPERTIES", + allowTemp = false, + relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")), + cleanedTableProperties) } else { AlterTableSetPropertiesStatement(identifier, cleanedTableProperties) } } /** - * Parse [[AlterViewUnsetPropertiesStatement]] or [[AlterTableUnsetPropertiesStatement]] commands. + * Parse [[AlterViewUnsetProperties]] or [[AlterTableUnsetPropertiesStatement]] commands. * * For example: * {{{ @@ -3436,7 +3443,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val ifExists = ctx.EXISTS != null if (ctx.VIEW != null) { - AlterViewUnsetPropertiesStatement(identifier, cleanedProperties, ifExists) + AlterViewUnsetProperties( + UnresolvedView( + identifier, + commandName = "ALTER VIEW ... UNSET TBLPROPERTIES", + allowTemp = false, + relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")), + cleanedProperties, + ifExists) } else { AlterTableUnsetPropertiesStatement(identifier, cleanedProperties, ifExists) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index b731b8a2fd8fd..d628bc914dba7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -315,21 +315,6 @@ case class AlterTableSerDePropertiesStatement( serdeProperties: Option[Map[String, String]], partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * ALTER VIEW ... SET TBLPROPERTIES command, as parsed from SQL. - */ -case class AlterViewSetPropertiesStatement( - viewName: Seq[String], - properties: Map[String, String]) extends ParsedStatement - -/** - * ALTER VIEW ... UNSET TBLPROPERTIES command, as parsed from SQL. - */ -case class AlterViewUnsetPropertiesStatement( - viewName: Seq[String], - propertyKeys: Seq[String], - ifExists: Boolean) extends ParsedStatement - /** * ALTER VIEW ... Query command, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index e014048f723f5..7d62dde67733b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -742,3 +742,22 @@ case class DropView( case class RepairTable(child: LogicalPlan) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the ALTER VIEW ... SET TBLPROPERTIES command. + */ +case class AlterViewSetProperties( + child: LogicalPlan, + properties: Map[String, String]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + +/** + * The logical plan of the ALTER VIEW ... UNSET TBLPROPERTIES command. + */ +case class AlterViewUnsetProperties( + child: LogicalPlan, + propertyKeys: Seq[String], + ifExists: Boolean) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index e194e7112b1d4..af5e48d922a16 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -724,15 +724,15 @@ class DDLParserSuite extends AnalysisTest { val cmd = "DROP VIEW" val hint = Some("Please use DROP TABLE instead.") parseCompare(s"DROP VIEW testcat.db.view", - DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, hint), ifExists = false)) + DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, true, hint), ifExists = false)) parseCompare(s"DROP VIEW db.view", - DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = false)) + DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = false)) parseCompare(s"DROP VIEW IF EXISTS db.view", - DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = true)) + DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = true)) parseCompare(s"DROP VIEW view", - DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = false)) + DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = false)) parseCompare(s"DROP VIEW IF EXISTS view", - DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = true)) + DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = true)) } private def testCreateOrReplaceDdl( @@ -764,16 +764,22 @@ class DDLParserSuite extends AnalysisTest { "'comment' = 'new_comment')" val sql2_view = "ALTER VIEW table_name UNSET TBLPROPERTIES ('comment', 'test')" val sql3_view = "ALTER VIEW table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')" + val hint = Some("Please use ALTER TABLE instead.") comparePlans(parsePlan(sql1_view), - AlterViewSetPropertiesStatement( - Seq("table_name"), Map("test" -> "test", "comment" -> "new_comment"))) + AlterViewSetProperties( + UnresolvedView(Seq("table_name"), "ALTER VIEW ... SET TBLPROPERTIES", false, hint), + Map("test" -> "test", "comment" -> "new_comment"))) comparePlans(parsePlan(sql2_view), - AlterViewUnsetPropertiesStatement( - Seq("table_name"), Seq("comment", "test"), ifExists = false)) + AlterViewUnsetProperties( + UnresolvedView(Seq("table_name"), "ALTER VIEW ... UNSET TBLPROPERTIES", false, hint), + Seq("comment", "test"), + ifExists = false)) comparePlans(parsePlan(sql3_view), - AlterViewUnsetPropertiesStatement( - Seq("table_name"), Seq("comment", "test"), ifExists = true)) + AlterViewUnsetProperties( + UnresolvedView(Seq("table_name"), "ALTER VIEW ... UNSET TBLPROPERTIES", false, hint), + Seq("comment", "test"), + ifExists = true)) } // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment); diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 817a63aa9aa6e..6e06cb3a1f928 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -209,12 +209,11 @@ class ResolveSessionCatalog( createAlterTable(nameParts, catalog, tbl, changes) } - // ALTER VIEW should always use v1 command if the resolved catalog is session catalog. - case AlterViewSetPropertiesStatement(SessionCatalogAndTable(_, tbl), props) => - AlterTableSetPropertiesCommand(tbl.asTableIdentifier, props, isView = true) + case AlterViewSetProperties(ResolvedView(ident, _), props) => + AlterTableSetPropertiesCommand(ident.asTableIdentifier, props, isView = true) - case AlterViewUnsetPropertiesStatement(SessionCatalogAndTable(_, tbl), keys, ifExists) => - AlterTableUnsetPropertiesCommand(tbl.asTableIdentifier, keys, ifExists, isView = true) + case AlterViewUnsetProperties(ResolvedView(ident, _), keys, ifExists) => + AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true) case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) => if (ns.length != 1) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 9020065449cef..8e1e8f88f219f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2594,11 +2594,29 @@ class DataSourceV2SQLSuite } } - test("DROP VIEW is not supported for v2 catalogs") { - assertAnalysisError( - "DROP VIEW testcat.v", - "Cannot specify catalog `testcat` for view v because view support in v2 catalog " + - "has not been implemented yet. DROP VIEW expects a view.") + test("View commands are not supported in v2 catalogs") { + def validateViewCommand( + sql: String, + catalogName: String, + viewName: String, + cmdName: String): Unit = { + assertAnalysisError( + sql, + s"Cannot specify catalog `$catalogName` for view $viewName because view support " + + s"in v2 catalog has not been implemented yet. $cmdName expects a view.") + } + + validateViewCommand("DROP VIEW testcat.v", "testcat", "v", "DROP VIEW") + validateViewCommand( + "ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')", + "testcat", + "v", + "ALTER VIEW ... SET TBLPROPERTIES") + validateViewCommand( + "ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')", + "testcat", + "v", + "ALTER VIEW ... UNSET TBLPROPERTIES") } private def testNotSupportedV2Command( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index c4303f0f1e19d..7595ae0ec7a53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -127,8 +127,12 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val viewName = "testView" withTempView(viewName) { spark.range(10).createTempView(viewName) - assertNoSuchTable(s"ALTER VIEW $viewName SET TBLPROPERTIES ('p' = 'an')") - assertNoSuchTable(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')") + assertAnalysisError( + s"ALTER VIEW $viewName SET TBLPROPERTIES ('p' = 'an')", + "testView is a temp view. 'ALTER VIEW ... SET TBLPROPERTIES' expects a permanent view.") + assertAnalysisError( + s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')", + "testView is a temp view. 'ALTER VIEW ... UNSET TBLPROPERTIES' expects a permanent view.") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 758540f1a42f5..70cbfa194313f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -721,16 +721,16 @@ class PlanResolutionSuite extends AnalysisTest { // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment); // ALTER VIEW view_name UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key'); test("alter view: alter view properties") { - val sql1_view = "ALTER VIEW table_name SET TBLPROPERTIES ('test' = 'test', " + + val sql1_view = "ALTER VIEW view SET TBLPROPERTIES ('test' = 'test', " + "'comment' = 'new_comment')" - val sql2_view = "ALTER VIEW table_name UNSET TBLPROPERTIES ('comment', 'test')" - val sql3_view = "ALTER VIEW table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')" + val sql2_view = "ALTER VIEW view UNSET TBLPROPERTIES ('comment', 'test')" + val sql3_view = "ALTER VIEW view UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')" val parsed1_view = parseAndResolve(sql1_view) val parsed2_view = parseAndResolve(sql2_view) val parsed3_view = parseAndResolve(sql3_view) - val tableIdent = TableIdentifier("table_name", Some("default")) + val tableIdent = TableIdentifier("view", Some("default")) val expected1_view = AlterTableSetPropertiesCommand( tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = true) val expected2_view = AlterTableUnsetPropertiesCommand( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 070fdf55deb38..9f75f8797fe37 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -875,11 +875,17 @@ class HiveDDLSuite assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RENAME TO $newViewName") - assertErrorForAlterViewOnTable(s"ALTER VIEW $tabName SET TBLPROPERTIES ('p' = 'an')") + assertAnalysisError( + s"ALTER VIEW $tabName SET TBLPROPERTIES ('p' = 'an')", + s"$tabName is a table. 'ALTER VIEW ... SET TBLPROPERTIES' expects a view. " + + "Please use ALTER TABLE instead.") assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET TBLPROPERTIES ('p' = 'an')") - assertErrorForAlterViewOnTable(s"ALTER VIEW $tabName UNSET TBLPROPERTIES ('p')") + assertAnalysisError( + s"ALTER VIEW $tabName UNSET TBLPROPERTIES ('p')", + s"$tabName is a table. 'ALTER VIEW ... UNSET TBLPROPERTIES' expects a view. " + + "Please use ALTER TABLE instead.") assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName UNSET TBLPROPERTIES ('p')") From 795db05bf6911aa2a66eea57460409a238957b40 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 9 Dec 2020 22:04:09 -0800 Subject: [PATCH 0722/1009] [SPARK-33732][K8S][TESTS] Kubernetes integration tests doesn't work with Minikube 1.9+ ### What changes were proposed in this pull request? This PR changes `Minikube.scala` for Kubernetes integration tests to work with Minikube 1.9+. `Minikube.scala` assumes that `apiserver.key` and `apiserver.crt` are in `~/.minikube/`. But as of Minikube 1.9, they are in `~/.minikube/profiles/`. ### Why are the changes needed? Currently, Kubernetes integration tests doesn't work with Minikube 1.9+. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed the following test passes. ``` $ build/sbt -Pkubernetes -Pkubernetes-integration-tests package 'kubernetes-integration-tests/testOnly -- -z "SparkPi with no"' ``` Closes #30700 from sarutak/minikube-1.9. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../backend/minikube/Minikube.scala | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index 547427f96d7ec..c33875243c598 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube -import java.nio.file.Paths +import java.nio.file.{Files, Paths} import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} @@ -68,15 +68,23 @@ private[spark] object Minikube extends Logging { def getKubernetesClient: DefaultKubernetesClient = { val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") + val minikubeBasePath = Paths.get(userHome, MINIKUBE_PATH).toString + val profileDir = if (Files.exists(Paths.get(minikubeBasePath, "apiserver.crt"))) { + // For Minikube <1.9 + "" + } else { + // For Minikube >=1.9 + Paths.get("profiles", executeMinikube("profile")(0)).toString + } + val apiServerCertPath = Paths.get(minikubeBasePath, profileDir, "apiserver.crt") + val apiServerKeyPath = Paths.get(minikubeBasePath, profileDir, "apiserver.key") val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(kubernetesMaster) .withCaCertFile( Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath) - .withClientCertFile( - Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath) - .withClientKeyFile( - Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath) + .withClientCertFile(apiServerCertPath.toFile.getAbsolutePath) + .withClientKeyFile(apiServerKeyPath.toFile.getAbsolutePath) .build() new DefaultKubernetesClient(kubernetesConf) } @@ -120,7 +128,7 @@ private[spark] object Minikube extends Logging { def executeMinikube(action: String, args: String*): Seq[String] = { ProcessUtils.executeProcess( - Array("bash", "-c", s"minikube $action ${args.mkString(" ")}"), + Array("bash", "-c", s"MINIKUBE_IN_STYLE=true minikube $action ${args.mkString(" ")}"), MINIKUBE_STARTUP_TIMEOUT_SECONDS).filter{x => !x.contains("There is a newer version of minikube") && !x.contains("https://github.com/kubernetes") From cef28c2c51d06506afd8a5f5ac725a1a0fd53b6d Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 10 Dec 2020 08:38:24 +0000 Subject: [PATCH 0723/1009] [SPARK-32670][SQL][FOLLOWUP] Group exception messages in Catalyst Analyzer in one file ### What changes were proposed in this pull request? This PR follows up https://github.com/apache/spark/pull/29497. Because https://github.com/apache/spark/pull/29497 just give us an example to group all `AnalysisExcpetion` in Analyzer into QueryCompilationErrors. This PR group other `AnalysisExcpetion` into QueryCompilationErrors. ### Why are the changes needed? It will largely help with standardization of error messages and its maintenance. ### Does this PR introduce _any_ user-facing change? No. Error messages remain unchanged. ### How was this patch tested? No new tests - pass all original tests to make sure it doesn't break any existing behavior. Closes #30564 from beliefer/SPARK-32670-followup. Lead-authored-by: gengjiaan Co-authored-by: Jiaan Geng Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 169 +++++++++++++++++- .../sql/catalyst/analysis/Analyzer.scala | 122 ++++++------- 2 files changed, 217 insertions(+), 74 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index c680502cb328f..87387b18dbab4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -18,9 +18,13 @@ package org.apache.spark.sql.errors import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{Expression, GroupingID} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.analysis.ResolvedView +import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType} @@ -31,6 +35,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType} * org.apache.spark.sql.catalyst.analysis.Analyzer. */ object QueryCompilationErrors { + def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { new AnalysisException( s"Columns of grouping_id (${groupingID.groupByExprs.mkString(",")}) " + @@ -159,6 +164,166 @@ object QueryCompilationErrors { s"Couldn't find the reference column for $after at $parentName") } -} + def windowSpecificationNotDefinedError(windowName: String): Throwable = { + new AnalysisException(s"Window specification $windowName is not defined in the WINDOW clause.") + } + + def selectExprNotInGroupByError(expr: Expression, groupByAliases: Seq[Alias]): Throwable = { + new AnalysisException(s"$expr doesn't show up in the GROUP BY list $groupByAliases") + } + + def groupingMustWithGroupingSetsOrCubeOrRollupError(): Throwable = { + new AnalysisException("grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") + } + + def pandasUDFAggregateNotSupportedInPivotError(): Throwable = { + new AnalysisException("Pandas UDF aggregate expressions are currently not supported in pivot.") + } + + def aggregateExpressionRequiredForPivotError(sql: String): Throwable = { + new AnalysisException(s"Aggregate expression required for pivot, but '$sql' " + + "did not appear in any aggregate function.") + } + + def expectTableNotTempViewError(quoted: String, cmd: String, t: TreeNode[_]): Throwable = { + new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table", + t.origin.line, t.origin.startPosition) + } + + def expectTableOrPermanentViewNotTempViewError( + quoted: String, cmd: String, t: TreeNode[_]): Throwable = { + new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table or permanent view.", + t.origin.line, t.origin.startPosition) + } + + def viewDepthExceedsMaxResolutionDepthError( + identifier: TableIdentifier, maxNestedViewDepth: Int, t: TreeNode[_]): Throwable = { + new AnalysisException(s"The depth of view $identifier exceeds the maximum " + + s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " + + s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " + + "around this.", t.origin.line, t.origin.startPosition) + } + + def insertIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = { + new AnalysisException(s"Inserting into a view is not allowed. View: $identifier.", + t.origin.line, t.origin.startPosition) + } + + def writeIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = { + new AnalysisException(s"Writing into a view is not allowed. View: $identifier.", + t.origin.line, t.origin.startPosition) + } + + def writeIntoV1TableNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = { + new AnalysisException(s"Cannot write into v1 table: $identifier.", + t.origin.line, t.origin.startPosition) + } + + def expectTableNotViewError(v: ResolvedView, cmd: String, t: TreeNode[_]): Throwable = { + val viewStr = if (v.isTemp) "temp view" else "view" + new AnalysisException(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.", + t.origin.line, t.origin.startPosition) + } + + def starNotAllowedWhenGroupByOrdinalPositionUsedError(): Throwable = { + new AnalysisException( + "Star (*) is not allowed in select list when GROUP BY ordinal position is used") + } + + def invalidStarUsageError(prettyName: String): Throwable = { + new AnalysisException(s"Invalid usage of '*' in $prettyName") + } + + def orderByPositionRangeError(index: Int, size: Int, t: TreeNode[_]): Throwable = { + new AnalysisException(s"ORDER BY position $index is not in select list " + + s"(valid range is [1, $size])", t.origin.line, t.origin.startPosition) + } + + def groupByPositionRangeError(index: Int, size: Int, t: TreeNode[_]): Throwable = { + new AnalysisException(s"GROUP BY position $index is not in select list " + + s"(valid range is [1, $size])", t.origin.line, t.origin.startPosition) + } + def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = { + new AnalysisException(s"$name is expected to be a generator. However, " + + s"its class is $classCanonicalName, which is not a generator.") + } + def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): Throwable = { + new AnalysisException("DISTINCT or FILTER specified, " + + s"but $prettyName is not an aggregate function") + } + + def nonDeterministicFilterInAggregateError(): Throwable = { + new AnalysisException("FILTER expression is non-deterministic, " + + "it cannot be used in aggregate functions") + } + + def aliasNumberNotMatchColumnNumberError( + columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = { + new AnalysisException("Number of column aliases does not match number of columns. " + + s"Number of column aliases: $columnSize; " + + s"number of columns: $outputSize.", t.origin.line, t.origin.startPosition) + } + + def aliasesNumberNotMatchUDTFOutputError( + aliasesSize: Int, aliasesNames: String): Throwable = { + new AnalysisException("The number of aliases supplied in the AS clause does not " + + s"match the number of columns output by the UDTF expected $aliasesSize " + + s"aliases but got $aliasesNames ") + } + + def windowAggregateFunctionWithFilterNotSupportedError(): Throwable = { + new AnalysisException("window aggregate function with filter predicate is not supported yet.") + } + + def windowFunctionInsideAggregateFunctionNotAllowedError(): Throwable = { + new AnalysisException("It is not allowed to use a window function inside an aggregate " + + "function. Please use the inner window function in a sub-query.") + } + + def expressionWithoutWindowExpressionError(expr: NamedExpression): Throwable = { + new AnalysisException(s"$expr does not have any WindowExpression.") + } + + def expressionWithMultiWindowExpressionsError( + expr: NamedExpression, distinctWindowSpec: Seq[WindowSpecDefinition]): Throwable = { + new AnalysisException(s"$expr has multiple Window Specifications ($distinctWindowSpec)." + + "Please file a bug report with this error message, stack trace, and the query.") + } + + def windowFunctionNotAllowedError(clauseName: String): Throwable = { + new AnalysisException(s"It is not allowed to use window functions inside $clauseName clause") + } + + def cannotSpecifyWindowFrameError(prettyName: String): Throwable = { + new AnalysisException(s"Cannot specify window frame for $prettyName function") + } + + def windowFrameNotMatchRequiredFrameError( + f: SpecifiedWindowFrame, required: WindowFrame): Throwable = { + new AnalysisException(s"Window Frame $f must match the required frame $required") + } + + def windowFunctionWithWindowFrameNotOrderedError(wf: WindowFunction): Throwable = { + new AnalysisException(s"Window function $wf requires window to be ordered, please add " + + s"ORDER BY clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " + + "ORDER BY window_ordering) from table") + } + + def cannotResolveUserSpecifiedColumnsError(col: String, t: TreeNode[_]): Throwable = { + new AnalysisException(s"Cannot resolve column name $col", t.origin.line, t.origin.startPosition) + } + + def writeTableWithMismatchedColumnsError( + columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = { + new AnalysisException("Cannot write to table due to mismatched user specified column " + + s"size($columnSize) and data column size($outputSize)", t.origin.line, t.origin.startPosition) + } + + def multiTimeWindowExpressionsNotSupportedError(t: TreeNode[_]): Throwable = { + new AnalysisException("Multiple time window expressions would result in a cartesian product " + + "of rows, therefore they are currently not supported.", t.origin.line, t.origin.startPosition) + } + +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7d1edbae9cea5..0d719b1f53365 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -368,10 +368,8 @@ class Analyzer(override val catalogManager: CatalogManager) // Lookup WindowSpecDefinitions. This rule works with unresolved children. case WithWindowDefinition(windowDefinitions, child) => child.resolveExpressions { case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) => - val errorMessage = - s"Window specification $windowName is not defined in the WINDOW clause." - val windowSpecDefinition = - windowDefinitions.getOrElse(windowName, failAnalysis(errorMessage)) + val windowSpecDefinition = windowDefinitions.getOrElse(windowName, + throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName)) WindowExpression(c, windowSpecDefinition) } } @@ -515,7 +513,7 @@ class Analyzer(override val catalogManager: CatalogManager) val groupingSetsAttributes = selectedGroupByExprs.map { groupingSetExprs => groupingSetExprs.map { expr => val alias = groupByAliases.find(_.child.semanticEquals(expr)).getOrElse( - failAnalysis(s"$expr doesn't show up in the GROUP BY list $groupByAliases")) + throw QueryCompilationErrors.selectExprNotInGroupByError(expr, groupByAliases)) // Map alias to expanded attribute. expandedAttributes.find(_.semanticEquals(alias.toAttribute)).getOrElse( alias.toAttribute) @@ -619,11 +617,11 @@ class Analyzer(override val catalogManager: CatalogManager) val gid = a.groupingExpressions.last if (!gid.isInstanceOf[AttributeReference] || gid.asInstanceOf[AttributeReference].name != VirtualColumn.groupingIdName) { - failAnalysis(s"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") + throw QueryCompilationErrors.groupingMustWithGroupingSetsOrCubeOrRollupError() } a.groupingExpressions.take(a.groupingExpressions.length - 1) }.getOrElse { - failAnalysis(s"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup") + throw QueryCompilationErrors.groupingMustWithGroupingSetsOrCubeOrRollupError() } } @@ -833,11 +831,9 @@ class Analyzer(override val catalogManager: CatalogManager) private def checkValidAggregateExpression(expr: Expression): Unit = expr match { case _: AggregateExpression => // OK and leave the argument check to CheckAnalysis. case expr: PythonUDF if PythonUDF.isGroupedAggPandasUDF(expr) => - failAnalysis("Pandas UDF aggregate expressions are currently not supported in pivot.") + throw QueryCompilationErrors.pandasUDFAggregateNotSupportedInPivotError() case e: Attribute => - failAnalysis( - s"Aggregate expression required for pivot, but '${e.sql}' " + - s"did not appear in any aggregate function.") + throw QueryCompilationErrors.aggregateExpressionRequiredForPivotError(e.sql) case e => e.children.foreach(checkValidAggregateExpression) } } @@ -886,7 +882,7 @@ class Analyzer(override val catalogManager: CatalogManager) } case u @ UnresolvedTable(ident, cmd) => lookupTempView(ident).foreach { _ => - u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table") + throw QueryCompilationErrors.expectTableNotTempViewError(ident.quoted, cmd, u) } u case u @ UnresolvedView(ident, cmd, allowTemp, _) => @@ -901,8 +897,8 @@ class Analyzer(override val catalogManager: CatalogManager) lookupTempView(ident) .map { _ => if (!allowTempView) { - u.failAnalysis( - s"${ident.quoted} is a temp view. '$cmd' expects a table or permanent view.") + throw QueryCompilationErrors.expectTableOrPermanentViewNotTempViewError( + ident.quoted, cmd, u) } ResolvedView(ident.asIdentifier, isTemp = true) } @@ -1062,10 +1058,8 @@ class Analyzer(override val catalogManager: CatalogManager) val nestedViewDepth = AnalysisContext.get.nestedViewDepth val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth if (nestedViewDepth > maxNestedViewDepth) { - view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " + - s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " + - s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " + - "work around this.") + throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError( + desc.identifier, maxNestedViewDepth, view) } SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) { executeSameContext(child) @@ -1087,7 +1081,7 @@ class Analyzer(override val catalogManager: CatalogManager) EliminateSubqueryAliases(relation) match { case v: View => - table.failAnalysis(s"Inserting into a view is not allowed. View: ${v.desc.identifier}.") + throw QueryCompilationErrors.insertIntoViewNotAllowedError(v.desc.identifier, table) case other => i.copy(table = other) } @@ -1098,10 +1092,11 @@ class Analyzer(override val catalogManager: CatalogManager) lookupRelation(u.multipartIdentifier, u.options, false) .map(EliminateSubqueryAliases(_)) .map { - case v: View => write.failAnalysis( - s"Writing into a view is not allowed. View: ${v.desc.identifier}.") - case u: UnresolvedCatalogRelation => write.failAnalysis( - "Cannot write into v1 table: " + u.tableMeta.identifier) + case v: View => throw QueryCompilationErrors.writeIntoViewNotAllowedError( + v.desc.identifier, write) + case u: UnresolvedCatalogRelation => + throw QueryCompilationErrors.writeIntoV1TableNotAllowedError( + u.tableMeta.identifier, write) case r: DataSourceV2Relation => write.withNewTable(r) case other => throw new IllegalStateException( "[BUG] unexpected plan returned by `lookupRelation`: " + other) @@ -1115,9 +1110,7 @@ class Analyzer(override val catalogManager: CatalogManager) case u @ UnresolvedTable(identifier, cmd) => lookupTableOrView(identifier).map { - case v: ResolvedView => - val viewStr = if (v.isTemp) "temp view" else "view" - u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.") + case v: ResolvedView => throw QueryCompilationErrors.expectTableNotViewError(v, cmd, u) case table => table }.getOrElse(u) @@ -1488,8 +1481,7 @@ class Analyzer(override val catalogManager: CatalogManager) // If the aggregate function argument contains Stars, expand it. case a: Aggregate if containsStar(a.aggregateExpressions) => if (a.groupingExpressions.exists(_.isInstanceOf[UnresolvedOrdinal])) { - failAnalysis( - "Star (*) is not allowed in select list when GROUP BY ordinal position is used") + throw QueryCompilationErrors.starNotAllowedWhenGroupByOrdinalPositionUsedError() } else { a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child)) } @@ -1502,7 +1494,7 @@ class Analyzer(override val catalogManager: CatalogManager) } ) case g: Generate if containsStar(g.generator.children) => - failAnalysis("Invalid usage of '*' in explode/json_tuple/UDTF") + throw QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF") // To resolve duplicate expression IDs for Join and Intersect case j @ Join(left, right, _, _, _) if !j.duplicateResolved => @@ -1762,7 +1754,7 @@ class Analyzer(override val catalogManager: CatalogManager) }) // count(*) has been replaced by count(1) case o if containsStar(o.children) => - failAnalysis(s"Invalid usage of '*' in expression '${o.prettyName}'") + throw QueryCompilationErrors.invalidStarUsageError(s"expression '${o.prettyName}'") } } } @@ -1864,9 +1856,7 @@ class Analyzer(override val catalogManager: CatalogManager) if (index > 0 && index <= child.output.size) { SortOrder(child.output(index - 1), direction, nullOrdering, Seq.empty) } else { - s.failAnalysis( - s"ORDER BY position $index is not in select list " + - s"(valid range is [1, ${child.output.size}])") + throw QueryCompilationErrors.orderByPositionRangeError(index, child.output.size, s) } case o => o } @@ -1880,9 +1870,7 @@ class Analyzer(override val catalogManager: CatalogManager) case u @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size => aggs(index - 1) case ordinal @ UnresolvedOrdinal(index) => - ordinal.failAnalysis( - s"GROUP BY position $index is not in select list " + - s"(valid range is [1, ${aggs.size}])") + throw QueryCompilationErrors.groupByPositionRangeError(index, aggs.size, ordinal) case o => o } Aggregate(newGroups, aggs, child) @@ -2089,9 +2077,8 @@ class Analyzer(override val catalogManager: CatalogManager) withPosition(u) { v1SessionCatalog.lookupFunction(name, children) match { case generator: Generator => generator - case other => - failAnalysis(s"$name is expected to be a generator. However, " + - s"its class is ${other.getClass.getCanonicalName}, which is not a generator.") + case other => throw QueryCompilationErrors.generatorNotExpectedError( + name, other.getClass.getCanonicalName) } } case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) => @@ -2102,22 +2089,21 @@ class Analyzer(override val catalogManager: CatalogManager) // AggregateExpression. case wf: AggregateWindowFunction => if (isDistinct || filter.isDefined) { - failAnalysis("DISTINCT or FILTER specified, " + - s"but ${wf.prettyName} is not an aggregate function") + throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( + wf.prettyName) } else { wf } // We get an aggregate function, we need to wrap it in an AggregateExpression. case agg: AggregateFunction => if (filter.isDefined && !filter.get.deterministic) { - failAnalysis("FILTER expression is non-deterministic, " + - "it cannot be used in aggregate functions") + throw QueryCompilationErrors.nonDeterministicFilterInAggregateError } AggregateExpression(agg, Complete, isDistinct, filter) // This function is not an aggregate function, just return the resolved one. case other if (isDistinct || filter.isDefined) => - failAnalysis("DISTINCT or FILTER specified, " + - s"but ${other.prettyName} is not an aggregate function") + throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( + other.prettyName) case e: String2TrimExpression if arguments.size == 2 => if (trimWarningEnabled.get) { log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." + @@ -2256,9 +2242,8 @@ class Analyzer(override val catalogManager: CatalogManager) // Checks if the number of the aliases equals to the number of output columns // in the subquery. if (columnNames.size != outputAttrs.size) { - u.failAnalysis("Number of column aliases does not match number of columns. " + - s"Number of column aliases: ${columnNames.size}; " + - s"number of columns: ${outputAttrs.size}.") + throw QueryCompilationErrors.aliasNumberNotMatchColumnNumberError( + columnNames.size, outputAttrs.size, u) } val aliases = outputAttrs.zip(columnNames).map { case (attr, aliasName) => Alias(attr, aliasName)() @@ -2649,10 +2634,8 @@ class Analyzer(override val catalogManager: CatalogManager) } else if (names.isEmpty) { elementAttrs } else { - failAnalysis( - "The number of aliases supplied in the AS clause does not match the number of columns " + - s"output by the UDTF expected ${elementAttrs.size} aliases but got " + - s"${names.mkString(",")} ") + throw QueryCompilationErrors.aliasesNumberNotMatchUDTFOutputError( + elementAttrs.size, names.mkString(",")) } } } @@ -2761,8 +2744,7 @@ class Analyzer(override val catalogManager: CatalogManager) wsc.copy(partitionSpec = newPartitionSpec, orderSpec = newOrderSpec) case WindowExpression(ae: AggregateExpression, _) if ae.filter.isDefined => - failAnalysis( - "window aggregate function with filter predicate is not supported yet.") + throw QueryCompilationErrors.windowAggregateFunctionWithFilterNotSupportedError // Extract Windowed AggregateExpression case we @ WindowExpression( @@ -2775,8 +2757,7 @@ class Analyzer(override val catalogManager: CatalogManager) WindowExpression(newAgg, spec) case AggregateExpression(aggFunc, _, _, _, _) if hasWindowFunction(aggFunc.children) => - failAnalysis("It is not allowed to use a window function inside an aggregate " + - "function. Please use the inner window function in a sub-query.") + throw QueryCompilationErrors.windowFunctionInsideAggregateFunctionNotAllowedError // Extracts AggregateExpression. For example, for SUM(x) - Sum(y) OVER (...), // we need to extract SUM(x). @@ -2840,12 +2821,12 @@ class Analyzer(override val catalogManager: CatalogManager) // We do a final check and see if we only have a single Window Spec defined in an // expressions. if (distinctWindowSpec.isEmpty) { - failAnalysis(s"$expr does not have any WindowExpression.") + throw QueryCompilationErrors.expressionWithoutWindowExpressionError(expr) } else if (distinctWindowSpec.length > 1) { // newExpressionsWithWindowFunctions only have expressions with a single // WindowExpression. If we reach here, we have a bug. - failAnalysis(s"$expr has multiple Window Specifications ($distinctWindowSpec)." + - s"Please file a bug report with this error message, stack trace, and the query.") + throw QueryCompilationErrors.expressionWithMultiWindowExpressionsError( + expr, distinctWindowSpec) } else { val spec = distinctWindowSpec.head val specKey = (spec.partitionSpec, spec.orderSpec, WindowFunctionType.functionType(expr)) @@ -2873,10 +2854,10 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsDown { case Filter(condition, _) if hasWindowFunction(condition) => - failAnalysis("It is not allowed to use window functions inside WHERE clause") + throw QueryCompilationErrors.windowFunctionNotAllowedError("WHERE") case UnresolvedHaving(condition, _) if hasWindowFunction(condition) => - failAnalysis("It is not allowed to use window functions inside HAVING clause") + throw QueryCompilationErrors.windowFunctionNotAllowedError("HAVING") // Aggregate with Having clause. This rule works with an unresolved Aggregate because // a resolved Aggregate will not have Window Functions. @@ -3076,10 +3057,10 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { case WindowExpression(wf: FrameLessOffsetWindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f => - failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function") + throw QueryCompilationErrors.cannotSpecifyWindowFrameError(wf.prettyName) case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != UnspecifiedFrame && wf.frame != f => - failAnalysis(s"Window Frame $f must match the required frame ${wf.frame}") + throw QueryCompilationErrors.windowFrameNotMatchRequiredFrameError(f, wf.frame) case WindowExpression(wf: WindowFunction, s @ WindowSpecDefinition(_, _, UnspecifiedFrame)) if wf.frame != UnspecifiedFrame => WindowExpression(wf, s.copy(frameSpecification = wf.frame)) @@ -3100,9 +3081,7 @@ class Analyzer(override val catalogManager: CatalogManager) object ResolveWindowOrder extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { case WindowExpression(wf: WindowFunction, spec) if spec.orderSpec.isEmpty => - failAnalysis(s"Window function $wf requires window to be ordered, please add ORDER BY " + - s"clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " + - s"ORDER BY window_ordering) from table") + throw QueryCompilationErrors.windowFunctionWithWindowFrameNotOrderedError(wf) case WindowExpression(rank: RankLike, spec) if spec.resolved => val order = spec.orderSpec.map(_.child) WindowExpression(rank.withOrder(order), spec) @@ -3169,7 +3148,8 @@ class Analyzer(override val catalogManager: CatalogManager) i.userSpecifiedCols.map { col => i.table.resolve(Seq(col), resolver) - .getOrElse(i.table.failAnalysis(s"Cannot resolve column name $col")) + .getOrElse(throw QueryCompilationErrors.cannotResolveUserSpecifiedColumnsError( + col, i.table)) } } @@ -3178,9 +3158,8 @@ class Analyzer(override val catalogManager: CatalogManager) cols: Seq[NamedExpression], query: LogicalPlan): LogicalPlan = { if (cols.size != query.output.size) { - query.failAnalysis( - s"Cannot write to table due to mismatched user specified column size(${cols.size}) and" + - s" data column size(${query.output.size})") + throw QueryCompilationErrors.writeTableWithMismatchedColumnsError( + cols.size, query.output.size, query) } val nameToQueryExpr = cols.zip(query.output).toMap // Static partition columns in the table output should not appear in the column list @@ -3760,8 +3739,7 @@ object TimeWindowing extends Rule[LogicalPlan] { renamedPlan.withNewChildren(substitutedPlan :: Nil) } } else if (numWindowExpr > 1) { - p.failAnalysis("Multiple time window expressions would result in a cartesian product " + - "of rows, therefore they are currently not supported.") + throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p) } else { p // Return unchanged. Analyzer will throw exception later } From 1554977670ffa452242b1433f0bff44c88c35722 Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Thu, 10 Dec 2020 09:14:07 +0000 Subject: [PATCH 0724/1009] [SPARK-33692][SQL] View should use captured catalog and namespace to lookup function ### What changes were proposed in this pull request? Using the view captured catalog and namespace to lookup function, so the view referred functions won't be overridden by newly created function with the same name, but different database or function type (i.e. temporary function) ### Why are the changes needed? bug fix, without this PR, changing database or create a temporary function with the same name may cause failure when querying a view. ### Does this PR introduce _any_ user-facing change? Yes, bug fix. ### How was this patch tested? newly added and existing test cases. Closes #30662 from linhongliu-db/SPARK-33692. Lead-authored-by: Linhong Liu Co-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 6 +++-- .../sql/catalyst/catalog/SessionCatalog.scala | 26 +++++++++++++++--- .../sql/execution/SQLViewTestSuite.scala | 27 +++++++++++++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0d719b1f53365..74edd65fd0479 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -114,7 +114,8 @@ case class AnalysisContext( nestedViewDepth: Int = 0, maxNestedViewDepth: Int = -1, relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty, - referredTempViewNames: Seq[Seq[String]] = Seq.empty) + referredTempViewNames: Seq[Seq[String]] = Seq.empty, + referredTempFunctionNames: Seq[String] = Seq.empty) object AnalysisContext { private val value = new ThreadLocal[AnalysisContext]() { @@ -139,7 +140,8 @@ object AnalysisContext { originContext.nestedViewDepth + 1, maxNestedViewDepth, originContext.relationCache, - viewDesc.viewReferredTempViewNames) + viewDesc.viewReferredTempViewNames, + viewDesc.viewReferredTempFunctionNames) set(context) try f finally { set(originContext) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 4c32870abe621..7c805bdb4b6f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1484,16 +1484,36 @@ class SessionCatalog( def lookupFunction( name: FunctionIdentifier, children: Seq[Expression]): Expression = synchronized { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ // Note: the implementation of this function is a little bit convoluted. // We probably shouldn't use a single FunctionRegistry to register all three kinds of functions // (built-in, temp, and external). if (name.database.isEmpty && functionRegistry.functionExists(name)) { - // This function has been already loaded into the function registry. - return functionRegistry.lookupFunction(name, children) + val referredTempFunctionNames = AnalysisContext.get.referredTempFunctionNames + val isResolvingView = AnalysisContext.get.catalogAndNamespace.nonEmpty + // Lookup the function as a temporary or a built-in function (i.e. without database) and + // 1. if we are not resolving view, we don't care about the function type and just return it. + // 2. if we are resolving view, only return a temp function if it's referred by this view. + if (!isResolvingView || + !isTemporaryFunction(name) || + referredTempFunctionNames.contains(name.funcName)) { + // This function has been already loaded into the function registry. + return functionRegistry.lookupFunction(name, children) + } + } + + // Get the database from AnalysisContext if it's defined, otherwise, use current database + val currentDatabase = AnalysisContext.get.catalogAndNamespace match { + case Seq() => getCurrentDatabase + case Seq(_, db) => db + case Seq(catalog, namespace @ _*) => + throw new AnalysisException( + s"V2 catalog does not support functions yet. " + + s"catalog: ${catalog}, namespace: '${namespace.quoted}'") } // If the name itself is not qualified, add the current database to it. - val database = formatDatabaseName(name.database.getOrElse(getCurrentDatabase)) + val database = formatDatabaseName(name.database.getOrElse(currentDatabase)) val qualifiedName = name.copy(database = Some(database)) if (functionRegistry.functionExists(qualifiedName)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index f6172e3b65050..3a7a63ed45ce3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -199,6 +199,33 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33692: view should use captured catalog and namespace to lookup function") { + val avgFuncClass = "test.org.apache.spark.sql.MyDoubleAvg" + val sumFuncClass = "test.org.apache.spark.sql.MyDoubleSum" + val functionName = "test_udf" + withTempDatabase { dbName => + withUserDefinedFunction( + s"default.$functionName" -> false, + s"$dbName.$functionName" -> false, + functionName -> true) { + // create a function in default database + sql("USE DEFAULT") + sql(s"CREATE FUNCTION $functionName AS '$avgFuncClass'") + // create a view using a function in 'default' database + val viewName = createView("v1", s"SELECT $functionName(col1) FROM VALUES (1), (2), (3)") + // create function in another database with the same function name + sql(s"USE $dbName") + sql(s"CREATE FUNCTION $functionName AS '$sumFuncClass'") + // create temporary function with the same function name + sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'") + withView(viewName) { + // view v1 should still using function defined in `default` database + checkViewOutput(viewName, Seq(Row(102.0))) + } + } + } + } } class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { From 31e0baca30f21f71353a27b827c2acd0e25bd9d8 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 10 Dec 2020 16:32:24 -0800 Subject: [PATCH 0725/1009] [SPARK-33740][SQL] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones ### What changes were proposed in this pull request? org.apache.hadoop.conf.Configuration#setIfUnset will ignore those with defaults too ### Why are the changes needed? fix a regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #30709 from yaooqinn/SPARK-33740. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/internal/SharedState.scala | 13 ++++++++++--- sql/core/src/test/resources/hive-site.xml | 5 +++++ .../spark/sql/internal/SharedStateSuite.scala | 11 +++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index ea430db9f030f..fd34077aba963 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -228,14 +228,21 @@ object SharedState extends Logging { sparkConf: SparkConf, hadoopConf: Configuration, initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = { + + def containsInSparkConf(key: String): Boolean = { + sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) || + (key.startsWith("hive") && sparkConf.contains("spark." + key)) + } + val hiveWarehouseKey = "hive.metastore.warehouse.dir" - val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml") + val configFile = Utils.getContextOrSparkClassLoader.getResourceAsStream("hive-site.xml") if (configFile != null) { logInfo(s"loading hive config file: $configFile") val hadoopConfTemp = new Configuration() + hadoopConfTemp.clear() hadoopConfTemp.addResource(configFile) - hadoopConfTemp.asScala.foreach { entry => - hadoopConf.setIfUnset(entry.getKey, entry.getValue) + for (entry <- hadoopConfTemp.asScala if !containsInSparkConf(entry.getKey)) { + hadoopConf.set(entry.getKey, entry.getValue) } } val sparkWarehouseOption = diff --git a/sql/core/src/test/resources/hive-site.xml b/sql/core/src/test/resources/hive-site.xml index 17297b3e22a7e..4bf6189b73ca9 100644 --- a/sql/core/src/test/resources/hive-site.xml +++ b/sql/core/src/test/resources/hive-site.xml @@ -23,4 +23,9 @@ true Internal marker for test. + + hadoop.tmp.dir + /tmp/hive_one + default is /tmp/hadoop-${user.name} and will be overridden + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala index 81bf15342423c..60a899b89e731 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala @@ -52,4 +52,15 @@ class SharedStateSuite extends SharedSparkSession { assert(conf.isInstanceOf[Configuration]) assert(conf.asInstanceOf[Configuration].get("fs.defaultFS") == "file:///") } + + test("SPARK-33740: hadoop configs in hive-site.xml can overrides pre-existing hadoop ones") { + val conf = new SparkConf() + val hadoopConf = new Configuration() + SharedState.loadHiveConfFile(conf, hadoopConf, Map.empty) + assert(hadoopConf.get("hadoop.tmp.dir") === "/tmp/hive_one") + hadoopConf.clear() + SharedState.loadHiveConfFile( + conf.set("spark.hadoop.hadoop.tmp.dir", "noop"), hadoopConf, Map.empty) + assert(hadoopConf.get("hadoop.tmp.dir") === null) + } } From fab2995972761503563fa2aa547c67047c51bd33 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 10 Dec 2020 17:49:56 -0800 Subject: [PATCH 0726/1009] [SPARK-33742][SQL] Throw PartitionsAlreadyExistException from HiveExternalCatalog.createPartitions() ### What changes were proposed in this pull request? Throw `PartitionsAlreadyExistException` from `createPartitions()` in Hive external catalog when a partition exists. Currently, `HiveExternalCatalog.createPartitions()` throws `AlreadyExistsException` wrapped by `AnalysisException`. In the PR, I propose to catch `AlreadyExistsException` in `HiveClientImpl` and replace it by `PartitionsAlreadyExistException`. ### Why are the changes needed? The behaviour of Hive external catalog deviates from V1/V2 in-memory catalogs that throw `PartitionsAlreadyExistException`. To improve user experience with Spark SQL, it would be better to throw the same exception. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By running existing test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite" ``` Closes #30711 from MaxGekk/hive-partition-exception. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../AlterTableAddPartitionSuiteBase.scala | 18 +++++++++++++++ .../v1/AlterTableAddPartitionSuite.scala | 20 +--------------- .../v2/AlterTableAddPartitionSuite.scala | 19 +-------------- .../sql/hive/client/HiveClientImpl.scala | 15 ++++++++++-- .../spark/sql/hive/client/VersionsSuite.scala | 23 ++++++++++++++++++- .../command/AlterTableAddPartitionSuite.scala | 18 --------------- 6 files changed, 55 insertions(+), 58 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala index 0cf0b395f139b..9d2c58b7e4351 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala @@ -21,6 +21,7 @@ import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.internal.SQLConf @@ -184,4 +185,21 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { "The spec (part0) must match the partition spec (part0, part1)")) } } + + test("partition already exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") + + val errMsg = intercept[PartitionsAlreadyExistException] { + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + }.getMessage + assert(errMsg.contains("The following partitions already exists")) + + sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + + " PARTITION (id=2) LOCATION 'loc1'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala index 295ce1d3da13f..b29564e1d81b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command @@ -44,21 +43,4 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit } } -class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession { - test("partition already exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") - - val errMsg = intercept[PartitionsAlreadyExistException] { - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - }.getMessage - assert(errMsg.contains("The following partitions already exists")) - - sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) - } - } -} +class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala index b15235d17671a..09921c8d8a5eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvePartitionSpec} +import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} @@ -60,23 +60,6 @@ class AlterTableAddPartitionSuite assert(partMetadata.get("location") === expected) } - test("partition already exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") - - val errMsg = intercept[PartitionsAlreadyExistException] { - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - }.getMessage - assert(errMsg.contains("The following partitions already exists")) - - sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) - } - } - test("SPARK-33650: add partition into a table which doesn't support partition management") { withNsTable("ns", "tbl", s"non_part_$catalog") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index b4ebf153fc178..0b19e5e6e8c84 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.client import java.io.{File, PrintStream} import java.lang.{Iterable => JIterable} +import java.lang.reflect.InvocationTargetException import java.nio.charset.StandardCharsets.UTF_8 import java.util.{Locale, Map => JMap} import java.util.concurrent.TimeUnit._ @@ -48,7 +49,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.metrics.source.HiveCatalogMetrics import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, PartitionsAlreadyExistException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Expression @@ -598,7 +599,17 @@ private[hive] class HiveClientImpl( table: String, parts: Seq[CatalogTablePartition], ignoreIfExists: Boolean): Unit = withHiveState { - shim.createPartitions(client, db, table, parts, ignoreIfExists) + def replaceExistException(e: Throwable): Unit = e match { + case _: HiveException if e.getCause.isInstanceOf[AlreadyExistsException] => + throw new PartitionsAlreadyExistException(db, table, parts.map(_.spec)) + case _ => throw e + } + try { + shim.createPartitions(client, db, table, parts, ignoreIfExists) + } catch { + case e: InvocationTargetException => replaceExistException(e.getCause) + case e: Throwable => replaceExistException(e) + } } override def dropPartitions( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 684529aa330a7..b5500eaf47158 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} import org.apache.spark.sql.catalyst.util.quietly @@ -594,6 +594,27 @@ class VersionsSuite extends SparkFunSuite with Logging { assert(client.getPartitionOption("default", "src_part", spec).isEmpty) } + test(s"$version: createPartitions if already exists") { + val partitions = Seq(CatalogTablePartition( + Map("key1" -> "101", "key2" -> "102"), + storageFormat)) + try { + client.createPartitions("default", "src_part", partitions, ignoreIfExists = false) + val errMsg = intercept[PartitionsAlreadyExistException] { + client.createPartitions("default", "src_part", partitions, ignoreIfExists = false) + }.getMessage + assert(errMsg.contains("partitions already exists")) + } finally { + client.dropPartitions( + "default", + "src_part", + partitions.map(_.spec), + ignoreIfNotExists = true, + purge = false, + retainData = false) + } + } + /////////////////////////////////////////////////////////////////////////// // Function related API /////////////////////////////////////////////////////////////////////////// diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala index ef0ec8d9bd69f..73776c3ef79fa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive.execution.command -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.hive.test.TestHiveSingleton @@ -26,21 +25,4 @@ class AlterTableAddPartitionSuite with TestHiveSingleton { override def version: String = "Hive V1" override def defaultUsing: String = "USING HIVE" - - test("partition already exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") - - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - }.getMessage - assert(errMsg.contains("already exists")) - - sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) - } - } } From 1ba1732beb8e01edfc4f658d9da4eaabf68ed7cf Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 10 Dec 2020 19:15:01 -0800 Subject: [PATCH 0727/1009] [SPARK-33295][BUILD] Upgrade ORC to 1.6.6 ### What changes were proposed in this pull request? This PR aims to upgrade Apache ORC to 1.6.6 for Apache Spark 3.2.0. ### Why are the changes needed? This brings the latest bug fixes and features. Apache Iceberg is already using Apache ORC 1.6.6. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30715 from dongjoon-hyun/SPARK-33295. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 9 +++++---- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 9 +++++---- pom.xml | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index b731c643aabe7..3f1199478bc67 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -4,8 +4,9 @@ JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar -aircompressor/0.10//aircompressor-0.10.jar +aircompressor/0.16//aircompressor-0.16.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar +annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar @@ -195,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.5.12//orc-core-1.5.12.jar -orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar -orc-shims/1.5.12//orc-shims-1.5.12.jar +orc-core/1.6.6//orc-core-1.6.6.jar +orc-mapreduce/1.6.6//orc-mapreduce-1.6.6.jar +orc-shims/1.6.6//orc-shims-1.6.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 84b44342280a5..d16235339897e 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -5,8 +5,9 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar -aircompressor/0.10//aircompressor-0.10.jar +aircompressor/0.16//aircompressor-0.16.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar +annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar @@ -209,9 +210,9 @@ okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.5.12//orc-core-1.5.12.jar -orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar -orc-shims/1.5.12//orc-shims-1.5.12.jar +orc-core/1.6.6//orc-core-1.6.6.jar +orc-mapreduce/1.6.6//orc-mapreduce-1.6.6.jar +orc-shims/1.6.6//orc-shims-1.6.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index f449bf7928ecc..8aaa4a504ef0c 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ 2.6.0 10.12.1.1 1.10.1 - 1.5.12 + 1.6.6 9.4.28.v20200408 3.1.0 0.9.5 From cd7a30641f25f99452b7eb46ee2b3c5d59b2c542 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 11 Dec 2020 14:15:56 +0900 Subject: [PATCH 0728/1009] [SPARK-33749][BUILD][PYTHON] Exclude target directory in pycodestyle and flake8 ### What changes were proposed in this pull request? Once you build and ran K8S tests, Python lint fails as below: ```bash $ ./dev/lint-python ``` Before this PR: ``` starting python compilation test... python compilation succeeded. downloading pycodestyle from https://raw.githubusercontent.com/PyCQA/pycodestyle/2.6.0/pycodestyle.py... starting pycodestyle test... pycodestyle checks failed: ./resource-managers/kubernetes/integration-tests/target/spark-dist-unpacked/python/pyspark/cloudpickle/cloudpickle.py:15:101: E501 line too long (105 > 100 characters) ./resource-managers/kubernetes/integration-tests/target/spark-dist-unpacked/python/docs/source/conf.py:60:101: E501 line too long (124 > 100 characters) ... ``` After this PR: ``` starting python compilation test... python compilation succeeded. downloading pycodestyle from https://raw.githubusercontent.com/PyCQA/pycodestyle/2.6.0/pycodestyle.py... starting pycodestyle test... pycodestyle checks passed. starting flake8 test... flake8 checks passed. starting mypy test... mypy checks passed. starting sphinx-build tests... sphinx-build checks passed. ``` This PR excludes target directory to avoid such cases in the future. ### Why are the changes needed? To make it easier to run linters ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested va running `./dev/lint-python`. Closes #30718 from HyukjinKwon/SPARK-33749. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/tox.ini b/dev/tox.ini index 43cd5877dfdb8..68e875f4c54ed 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -16,9 +16,9 @@ [pycodestyle] ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504 max-line-length=100 -exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* +exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* [flake8] select = E901,E999,F821,F822,F823,F401,F405,B006 -exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi +exclude = */target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi max-line-length = 100 From 7895ea1f50700b56930b3841f16c44442d26e719 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Fri, 11 Dec 2020 14:41:15 +0900 Subject: [PATCH 0729/1009] [SPARK-32910][SS] Remove UninterruptibleThread usage from KafkaOffsetReaderAdmin ### What changes were proposed in this pull request? The Kafka offset reader which uses `AdminClient` still uses `UninterruptibleThread` to call it. Since there is no evidence that `AdminClient` suffers from similar issues like [KAFKA-1894](https://issues.apache.org/jira/browse/KAFKA-1894) I'm removing `UninterruptibleThread` usage. In order to put the `AdminClient` under stress and make sure it works I've created the following standalone application: https://github.com/gaborgsomogyi/kafka-admin-interruption What this PR contains: * Removed `UninterruptibleThread` from `KafkaOffsetReaderAdmin` * Removed/modified comments which are not true * Adapted `KafkaRelationSuite` * Renamed `partitionsAssignedToConsumer` to `partitionsAssignedToAdmin` ### Why are the changes needed? `KafkaOffsetReaderAdmin` doesn't need `UninterruptibleThread` usage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests + manually with simple Kafka to Kafka query. Closes #30668 from gaborgsomogyi/SPARK-32910. Authored-by: Gabor Somogyi Signed-off-by: Jungtaek Lim (HeartSaVioR) --- .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 81 ++++++------------- .../sql/kafka010/KafkaRelationSuite.scala | 23 ++---- 2 files changed, 30 insertions(+), 74 deletions(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala index d5905795c626b..f9a714c37cb9e 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala @@ -33,7 +33,6 @@ import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap -import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner} /** * This class uses Kafka's own [[Admin]] API to read data offsets from Kafka. @@ -58,13 +57,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( private[kafka010] val offsetFetchAttemptIntervalMs = readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong - /** - * [[UninterruptibleThreadRunner]] ensures that all [[Admin]] communication called in an - * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an - * [[UninterruptibleThread]], however for batch mode this is not the case. - */ - val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader") - /** * An AdminClient used in the driver to query the latest Kafka offsets. * This only queries the offsets because AdminClient has no functionality to commit offsets like @@ -73,7 +65,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( @volatile protected var _admin: Admin = null protected def admin: Admin = synchronized { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) if (_admin == null) { _admin = consumerStrategy.createAdmin(driverKafkaParams) } @@ -121,8 +112,7 @@ private[kafka010] class KafkaOffsetReaderAdmin( * Closes the connection to Kafka, and cleans up state. */ override def close(): Unit = { - if (_admin != null) uninterruptibleThreadRunner.runUninterruptibly { stopAdmin() } - uninterruptibleThreadRunner.shutdown() + stopAdmin() } /** @@ -141,9 +131,7 @@ private[kafka010] class KafkaOffsetReaderAdmin( logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets") partitionOffsets } - val partitions = uninterruptibleThreadRunner.runUninterruptibly { - consumerStrategy.assignedTopicPartitions(admin) - } + val partitions = consumerStrategy.assignedTopicPartitions(admin) // Obtain TopicPartition offsets with late binding support offsetRangeLimit match { case EarliestOffsetRangeLimit => partitions.map { @@ -224,7 +212,7 @@ private[kafka010] class KafkaOffsetReaderAdmin( fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit, fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] ): KafkaSourceOffset = { - val fetched = partitionsAssignedToConsumer { + val fetched = partitionsAssignedToAdmin { partitions => { fnAssertParametersWithPartitions(partitions) @@ -262,7 +250,7 @@ private[kafka010] class KafkaOffsetReaderAdmin( * Fetch the earliest offsets for the topic partitions that are indicated * in the [[ConsumerStrategy]]. */ - override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer( + override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToAdmin( partitions => { val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava val partitionOffsets = listOffsets(admin, listOffsetsParams) @@ -274,19 +262,16 @@ private[kafka010] class KafkaOffsetReaderAdmin( * Fetch the latest offsets for the topic partitions that are indicated * in the [[ConsumerStrategy]]. * - * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called - * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after - * `poll` to wait until the potential offset request triggered by `poll(0)` is done. - * - * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the + * In order to avoid unknown issues, we use the given `knownOffsets` to audit the * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot - * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying. + * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka + * after retrying. */ override def fetchLatestOffsets( knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = - partitionsAssignedToConsumer { partitions => { + partitionsAssignedToAdmin { partitions => { val listOffsetsParams = partitions.asScala.map(_ -> OffsetSpec.latest()).toMap.asJava if (knownOffsets.isEmpty) { val partitionOffsets = listOffsets(admin, listOffsetsParams) @@ -314,11 +299,10 @@ private[kafka010] class KafkaOffsetReaderAdmin( } // Retry to fetch latest offsets when detecting incorrect offsets. We don't use - // `withRetriesWithoutInterrupt` to retry because: + // `withRetries` to retry because: // - // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh - // consumer has a much bigger chance to hit KAFKA-7703. - // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703. + // - `withRetries` will reset the admin for each attempt but a fresh + // admin has a much bigger chance to hit KAFKA-7703 like issues. var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil var attempt = 0 do { @@ -351,7 +335,7 @@ private[kafka010] class KafkaOffsetReaderAdmin( if (newPartitions.isEmpty) { Map.empty[TopicPartition, Long] } else { - partitionsAssignedToConsumer(partitions => { + partitionsAssignedToAdmin(partitions => { // Get the earliest offset of each partition val listOffsetsParams = newPartitions.filter { newPartition => // When deleting topics happen at the same time, some partitions may not be in @@ -501,11 +485,11 @@ private[kafka010] class KafkaOffsetReaderAdmin( rangeCalculator.getRanges(ranges, getSortedExecutorList) } - private def partitionsAssignedToConsumer( + private def partitionsAssignedToAdmin( body: ju.Set[TopicPartition] => Map[TopicPartition, Long]) - : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly { + : Map[TopicPartition, Long] = { - withRetriesWithoutInterrupt { + withRetries { val partitions = consumerStrategy.assignedTopicPartitions(admin).asJava logDebug(s"Partitions assigned: $partitions.") body(partitions) @@ -516,37 +500,23 @@ private[kafka010] class KafkaOffsetReaderAdmin( * Helper function that does multiple retries on a body of code that returns offsets. * Retries are needed to handle transient failures. For e.g. race conditions between getting * assignment and getting position while topics/partitions are deleted can cause NPEs. - * - * This method also makes sure `body` won't be interrupted to workaround similar issues like in - * `KafkaConsumer.poll`. (KAFKA-1894) */ - private def withRetriesWithoutInterrupt( - body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) - + private def withRetries(body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = { synchronized { var result: Option[Map[TopicPartition, Long]] = None var attempt = 1 var lastException: Throwable = null while (result.isEmpty && attempt <= maxOffsetFetchAttempts && !Thread.currentThread().isInterrupted) { - Thread.currentThread match { - case ut: UninterruptibleThread => - ut.runUninterruptibly { - try { - result = Some(body) - } catch { - case NonFatal(e) => - lastException = e - logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e) - attempt += 1 - Thread.sleep(offsetFetchAttemptIntervalMs) - resetAdmin() - } - } - case _ => - throw new IllegalStateException( - "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread") + try { + result = Some(body) + } catch { + case NonFatal(e) => + lastException = e + logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e) + attempt += 1 + Thread.sleep(offsetFetchAttemptIntervalMs) + resetAdmin() } } if (Thread.interrupted()) { @@ -562,7 +532,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( } private def stopAdmin(): Unit = synchronized { - assert(Thread.currentThread().isInstanceOf[UninterruptibleThread]) if (_admin != null) _admin.close() } diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala index 16fa24a68abe2..6e9d8de9fa5be 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala @@ -21,13 +21,10 @@ import java.nio.charset.StandardCharsets.UTF_8 import java.util.Locale import java.util.concurrent.atomic.AtomicInteger -import scala.annotation.tailrec - import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.common.TopicPartition -import org.apache.spark.SparkConf -import org.apache.spark.SparkException +import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.sql.{DataFrameReader, QueryTest} import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -270,7 +267,9 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession test("no matched offset for timestamp - startingOffsets") { val (topic, timestamps) = prepareTimestampRelatedUnitTest - val e = intercept[SparkException] { + // KafkaOffsetReaderConsumer and KafkaOffsetReaderAdmin both throws AssertionError + // but the UninterruptibleThread used by KafkaOffsetReaderConsumer wraps it with SparkException + val e = intercept[Throwable] { verifyTimestampRelatedQueryResult({ df => // partition 2 will make query fail val startTopicTimestamps = Map( @@ -283,19 +282,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession }, topic, Seq.empty) } - @tailrec - def assertionErrorInExceptionChain(e: Throwable): Boolean = { - if (e.isInstanceOf[AssertionError]) { - true - } else if (e.getCause == null) { - false - } else { - assertionErrorInExceptionChain(e.getCause) - } - } - - assert(assertionErrorInExceptionChain(e), - "Cannot find expected AssertionError in chained exceptions") + TestUtils.assertExceptionMsg(e, "No offset matched from request") } test("no matched offset for timestamp - endingOffsets") { From 24d7e45d31181a24a37261480fcd45a9a97db659 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 11 Dec 2020 05:52:33 +0000 Subject: [PATCH 0730/1009] [SPARK-33527][SQL] Extend the function of decode so as consistent with mainstream databases ### What changes were proposed in this pull request? In Spark, decode(bin, charset) - Decodes the first argument using the second argument character set. Unfortunately this is NOT what any other SQL vendor understands `DECODE` to do. `DECODE` generally is a short hand for a simple case expression: ``` SELECT DECODE(c1, 1, 'Hello', 2, 'World', '!') FROM (VALUES (1), (2), (3)) AS T(c1) => (Hello), (World) (!) ``` There are some mainstream database support the syntax. **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/DECODE.html#GUID-39341D91-3442-4730-BD34-D3CF5D4701CE **Vertica** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/String/DECODE.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CString%20Functions%7C_____10 **DB2** https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1447.htm **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_DECODE_expression.html **Pig** https://pig.apache.org/docs/latest/api/org/apache/pig/piggybank/evaluation/decode/Decode.html **Teradata** https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/jtCpCycpEaXESG4d63kMjg **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/decode.html ### Why are the changes needed? It is very useful. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Jenkins test. Closes #30479 from beliefer/SPARK-33527. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../expressions/stringExpressions.scala | 62 ++++++++++++++++- .../expressions/CodeGenerationSuite.scala | 2 +- .../expressions/StringExpressionsSuite.scala | 14 ++-- .../org/apache/spark/sql/functions.scala | 2 +- .../sql-tests/inputs/string-functions.sql | 10 +++ .../results/ansi/string-functions.sql.out | 68 ++++++++++++++++++- .../results/string-functions.sql.out | 68 ++++++++++++++++++- 7 files changed, 214 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 9f92181b34df1..ae29cfe8119f6 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -26,6 +26,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.commons.codec.binary.{Base64 => CommonsBase64} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult} import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -2082,6 +2083,65 @@ case class UnBase64(child: Expression) } } +object Decode { + def createExpr(params: Seq[Expression]): Expression = { + params.length match { + case 0 | 1 => + throw new AnalysisException("Invalid number of arguments for function decode. " + + s"Expected: 2; Found: ${params.length}") + case 2 => StringDecode(params.head, params.last) + case _ => + val input = params.head + val other = params.tail + val itr = other.iterator + var default: Expression = Literal.create(null, StringType) + val branches = ArrayBuffer.empty[(Expression, Expression)] + while (itr.hasNext) { + val search = itr.next + if (itr.hasNext) { + val condition = EqualTo(input, search) + branches += ((condition, itr.next)) + } else { + default = search + } + } + CaseWhen(branches.seq, default) + } + } +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + |_FUNC_(bin, charset) - Decodes the first argument using the second argument character set. + | + |_FUNC_(expr, search, result [, search, result ] ... [, default]) - Decode compares expr + | to each search value one by one. If expr is equal to a search, returns the corresponding result. + | If no match is found, then Oracle returns default. If default is omitted, returns null. + """, + examples = """ + Examples: + > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8'); + abc + > SELECT _FUNC_(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic'); + San Francisco + > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic'); + Non domestic + > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle'); + NULL + """, + since = "3.2.0") +// scalastyle:on line.size.limit +case class Decode(params: Seq[Expression], child: Expression) extends RuntimeReplaceable { + + def this(params: Seq[Expression]) = { + this(params, Decode.createExpr(params)) + } + + override def flatArguments: Iterator[Any] = Iterator(params) + override def exprsReplaced: Seq[Expression] = params +} + /** * Decodes the first argument into a String using the provided character set * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). @@ -2097,7 +2157,7 @@ case class UnBase64(child: Expression) """, since = "1.5.0") // scalastyle:on line.size.limit -case class Decode(bin: Expression, charset: Expression) +case class StringDecode(bin: Expression, charset: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { override def left: Expression = bin diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index bca8c56a1071e..b118dba9e3711 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -104,7 +104,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-22543: split large if expressions into blocks due to JVM code size limit") { var strExpr: Expression = Literal("abc") for (_ <- 1 to 150) { - strExpr = Decode(Encode(strExpr, "utf-8"), "utf-8") + strExpr = StringDecode(Encode(strExpr, "utf-8"), "utf-8") } val expressions = Seq(If(EqualTo(strExpr, strExpr), strExpr, strExpr)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 78e9cf82a28b1..11ef1e98c82ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -349,23 +349,23 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // scalastyle:off // non ascii characters are not allowed in the code, so we disable the scalastyle here. checkEvaluation( - Decode(Encode(Literal("大千世界"), Literal("UTF-16LE")), Literal("UTF-16LE")), "大千世界") + StringDecode(Encode(Literal("大千世界"), Literal("UTF-16LE")), Literal("UTF-16LE")), "大千世界") checkEvaluation( - Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", create_row("大千世界")) + StringDecode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", create_row("大千世界")) checkEvaluation( - Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", create_row("")) + StringDecode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", create_row("")) // scalastyle:on checkEvaluation(Encode(a, Literal("utf-8")), null, create_row(null)) checkEvaluation(Encode(Literal.create(null, StringType), Literal("utf-8")), null) checkEvaluation(Encode(a, Literal.create(null, StringType)), null, create_row("")) - checkEvaluation(Decode(b, Literal("utf-8")), null, create_row(null)) - checkEvaluation(Decode(Literal.create(null, BinaryType), Literal("utf-8")), null) - checkEvaluation(Decode(b, Literal.create(null, StringType)), null, create_row(null)) + checkEvaluation(StringDecode(b, Literal("utf-8")), null, create_row(null)) + checkEvaluation(StringDecode(Literal.create(null, BinaryType), Literal("utf-8")), null) + checkEvaluation(StringDecode(b, Literal.create(null, StringType)), null, create_row(null)) // Test escaping of charset GenerateUnsafeProjection.generate(Encode(a, Literal("\"quote")) :: Nil) - GenerateUnsafeProjection.generate(Decode(b, Literal("\"quote")) :: Nil) + GenerateUnsafeProjection.generate(StringDecode(b, Literal("\"quote")) :: Nil) } test("initcap unit test") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 5b1ee2deefc10..ede2b52930a17 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2438,7 +2438,7 @@ object functions { * @since 1.5.0 */ def decode(value: Column, charset: String): Column = withExpr { - Decode(value.expr, lit(charset).expr) + StringDecode(value.expr, lit(charset).expr) } /** diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index f5ed2036dc8ac..80b4b8ca8cd54 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -53,3 +53,13 @@ SELECT trim(TRAILING 'xy' FROM 'TURNERyxXxy'); -- Check lpad/rpad with invalid length parameter SELECT lpad('hi', 'invalid_length'); SELECT rpad('hi', 'invalid_length'); + +-- decode +select decode(); +select decode(encode('abc', 'utf-8')); +select decode(encode('abc', 'utf-8'), 'utf-8'); +select decode(1, 1, 'Southlake'); +select decode(2, 1, 'Southlake'); +select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic'); +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic'); +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle'); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index d5c0acb40bb1e..3164d462f8464 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 36 +-- Number of queries: 44 -- !query @@ -294,3 +294,69 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: invalid_length + + +-- !query +select decode() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7 + + +-- !query +select decode(encode('abc', 'utf-8')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7 + + +-- !query +select decode(encode('abc', 'utf-8'), 'utf-8') +-- !query schema +struct +-- !query output +abc + + +-- !query +select decode(1, 1, 'Southlake') +-- !query schema +struct +-- !query output +Southlake + + +-- !query +select decode(2, 1, 'Southlake') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic') +-- !query schema +struct +-- !query output +San Francisco + + +-- !query +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic') +-- !query schema +struct +-- !query output +Non domestic + + +-- !query +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle') +-- !query schema +struct +-- !query output +NULL \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 20c31b140b009..020a095d72e85 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 36 +-- Number of queries: 44 -- !query @@ -290,3 +290,69 @@ SELECT rpad('hi', 'invalid_length') struct -- !query output NULL + + +-- !query +select decode() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7 + + +-- !query +select decode(encode('abc', 'utf-8')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7 + + +-- !query +select decode(encode('abc', 'utf-8'), 'utf-8') +-- !query schema +struct +-- !query output +abc + + +-- !query +select decode(1, 1, 'Southlake') +-- !query schema +struct +-- !query output +Southlake + + +-- !query +select decode(2, 1, 'Southlake') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic') +-- !query schema +struct +-- !query output +San Francisco + + +-- !query +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic') +-- !query schema +struct +-- !query output +Non domestic + + +-- !query +select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle') +-- !query schema +struct +-- !query output +NULL \ No newline at end of file From 8ac86a4c318ddc99d0a979baefd197da2ce1c2b5 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 10 Dec 2020 22:32:23 -0800 Subject: [PATCH 0731/1009] [SPARK-33750][SQL][TESTS] Use `hadoop-3.2` distribution in HiveExternalCatalogVersionsSuite ### What changes were proposed in this pull request? This PR aims to use `hadoop-3.2` distribution in HiveExternalCatalogVersionsSuite if available. ### Why are the changes needed? Apache Spark 3.1 is using Hadoop 3 by default. We need to focus on Hadoop 3 more to prepare the future. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30722 from dongjoon-hyun/SPARK-33750. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index cf070f4611f3b..07d8dacf98252 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -93,7 +93,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror logInfo(s"Trying to download Spark $version from $sites") for (site <- sites) { - val filename = s"spark-$version-bin-hadoop2.7.tgz" + val filename = if (version.startsWith("3")) { + s"spark-$version-bin-hadoop3.2.tgz" + } else { + s"spark-$version-bin-hadoop2.7.tgz" + } val url = s"$site/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") try { From c05f6f98b6b06019d99d6a92b61b877afa822d0b Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Fri, 11 Dec 2020 06:49:45 +0000 Subject: [PATCH 0732/1009] [MINOR][SQL] Spelling: enabled - legacy_setops_precedence_enbled ### What changes were proposed in this pull request? Replace `legacy_setops_precedence_enbled` with `legacy_setops_precedence_enabled` Alternatively, `legacy_setops_precedence_enabled` could be added, and `legacy_setops_precedence_enbled` retained, and if set the code could honor it and warn about the deprecated spelling. ### Why are the changes needed? `enabled` is misspelled in `legacy_setops_precedence_enbled` ### Does this PR introduce _any_ user-facing change? Yes. It would break current consumers. Examples include: * https://www.programmersought.com/article/87752082924/ * https://github.com/fugue-project/fugue/blob/125d873c38e18b5f09b032bd01ac47a0c6739ddc/fugue_sql/_antlr/fugue_sqlLexer.py * https://github.com/search?q=legacy_setops_precedence_enbled&type=code ### How was this patch tested? It's been included in #30323 for a while (and is now split out here) Closes #30677 from jsoref/spelling-enabled. Authored-by: Josh Soref Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 8 ++++---- .../apache/spark/sql/catalyst/parser/ParseDriver.scala | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index b08451d8a6cfa..d2908a555858d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -21,7 +21,7 @@ grammar SqlBase; * When false, INTERSECT is given the greater precedence over the other set * operations (UNION, EXCEPT and MINUS) as per the SQL standard. */ - public boolean legacy_setops_precedence_enbled = false; + public boolean legacy_setops_precedence_enabled = false; /** * When false, a literal with an exponent would be converted into @@ -466,11 +466,11 @@ multiInsertQueryBody queryTerm : queryPrimary #queryTermDefault - | left=queryTerm {legacy_setops_precedence_enbled}? + | left=queryTerm {legacy_setops_precedence_enabled}? operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? + | left=queryTerm {!legacy_setops_precedence_enabled}? operator=INTERSECT setQuantifier? right=queryTerm #setOperation - | left=queryTerm {!legacy_setops_precedence_enbled}? + | left=queryTerm {!legacy_setops_precedence_enabled}? operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index d08be467f96cc..deaa3c9cd725f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -96,7 +96,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with parser.addParseListener(PostProcessor) parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) - parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced + parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled parser.SQL_standard_keyword_behavior = conf.ansiEnabled From d662b95535f12ebbc671a283b19291f63d2a2b8c Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 11 Dec 2020 01:52:13 -0800 Subject: [PATCH 0733/1009] [SPARK-33754][K8S][DOCS] Update kubernetes/integration-tests/README.md to follow the default Hadoop profile updated ### What changes were proposed in this pull request? This PR updates `kubernetes/integration-tests/README.md`. ### Why are the changes needed? To follow the current Hadoop profile (hadoop-3.2). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I have confirmed that the integration tests pass with the following command for both Hadoop 3.2 an 2.7. ``` build/mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.12 \ -Pkubernetes \ -Pkubernetes-integration-tests \ -Dspark.kubernetes.test.imageTag=${IMAGE_TAG} \ -Dspark.kubernetes.test.imageRepo=docker.io/kubespark \ -Dspark.kubernetes.test.namespace=default \ -Dspark.kubernetes.test.deployMode=minikube \ -Dtest.include.tags=k8s ``` Closes #30726 from sarutak/update-kube-integ-readme. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- resource-managers/kubernetes/integration-tests/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md index 6409c227ec287..67d03ec5b48a2 100644 --- a/resource-managers/kubernetes/integration-tests/README.md +++ b/resource-managers/kubernetes/integration-tests/README.md @@ -17,9 +17,9 @@ To run tests with Java 11 instead of Java 8, use `--java-image-tag` to specify t ./dev/dev-run-integration-tests.sh --java-image-tag 11-jre-slim -To run tests with Hadoop 3.2 instead of Hadoop 2.7, use `--hadoop-profile`. +To run tests with Hadoop 2.7 instead of Hadoop 3.2, use `--hadoop-profile`. - ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-3.2 + ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-2.7 The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should run with a minimum of 4 CPUs and 6G of memory: @@ -126,7 +126,7 @@ If you prefer to run just the integration tests directly, then you can customise properties to Maven. For example: mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.12 \ - -Pkubernetes -Pkubernetes-integration-tests \ + -Pkubernetes -Pkubernetes-integration-tests \ -Phadoop-2.7 -Dhadoop.version=2.7.4 \ -Dspark.kubernetes.test.sparkTgz=spark-3.0.0-SNAPSHOT-bin-example.tgz \ -Dspark.kubernetes.test.imageTag=sometag \ From 8377aca60a4f326f2d1533c5e570518fb7de2895 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 11 Dec 2020 01:53:41 -0800 Subject: [PATCH 0734/1009] [SPARK-33527][SQL][FOLLOWUP] Fix the scala 2.13 build failure ### What changes were proposed in this pull request? This PR fixes the Scala 2.13 build failure brought by #30479 . ### Why are the changes needed? To pass Scala 2.13 build. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done byGitHub Actions. Closes #30727 from sarutak/fix-scala213-build-failure. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index ae29cfe8119f6..0207b7b55c5af 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2105,7 +2105,7 @@ object Decode { default = search } } - CaseWhen(branches.seq, default) + CaseWhen(branches.seq.toSeq, default) } } } From 8f5db716fae1162e411750cd5d5380a399d410ae Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Fri, 11 Dec 2020 12:39:58 +0000 Subject: [PATCH 0735/1009] [SPARK-33654][SQL] Migrate CACHE TABLE to use UnresolvedRelation to resolve identifier ### What changes were proposed in this pull request? This PR proposes to migrate `CACHE TABLE` to use `UnresolvedRelation` to resolve the table/view identifier in Analyzer as discussed https://github.com/apache/spark/pull/30403/files#r532360022. ### Why are the changes needed? To resolve the table in the analyzer. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #30598 from imback82/cache_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 14 +++ .../sql/catalyst/analysis/CheckAnalysis.scala | 3 + .../sql/catalyst/parser/AstBuilder.scala | 29 ++++++ .../catalyst/plans/logical/v2Commands.scala | 19 +++- .../sql/catalyst/parser/DDLParserSuite.scala | 31 +++++++ .../spark/sql/execution/SparkSqlParser.scala | 25 ------ .../spark/sql/execution/command/cache.scala | 52 +---------- .../datasources/v2/CacheTableExec.scala | 89 +++++++++++++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 6 ++ .../sql/connector/DataSourceV2SQLSuite.scala | 9 +- .../sql/execution/SparkSqlParserSuite.scala | 29 ------ .../apache/spark/sql/hive/test/TestHive.scala | 5 +- 12 files changed, 197 insertions(+), 114 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 74edd65fd0479..0ceb4226b0f52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -871,6 +871,10 @@ class Analyzer(override val catalogManager: CatalogManager) lookupTempView(ident) .map(view => i.copy(table = view)) .getOrElse(i) + case c @ CacheTable(UnresolvedRelation(ident, _, false), _, _, _) => + lookupTempView(ident) + .map(view => c.copy(table = view)) + .getOrElse(c) // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand => write.table match { @@ -996,6 +1000,11 @@ class Analyzer(override val catalogManager: CatalogManager) .map(v2Relation => i.copy(table = v2Relation)) .getOrElse(i) + case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) => + lookupV2Relation(u.multipartIdentifier, u.options, false) + .map(v2Relation => c.copy(table = v2Relation)) + .getOrElse(c) + // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand => write.table match { @@ -1087,6 +1096,11 @@ class Analyzer(override val catalogManager: CatalogManager) case other => i.copy(table = other) } + case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) => + lookupRelation(u.multipartIdentifier, u.options, false) + .map(v2Relation => c.copy(table = v2Relation)) + .getOrElse(c) + // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand => write.table match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 119e17196a454..5d4dc21810281 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -124,6 +124,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) => failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") + case CacheTable(u: UnresolvedRelation, _, _, _) => + failAnalysis(s"Table or view not found for `CACHE TABLE`: ${u.multipartIdentifier.quoted}") + // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] => val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3c06a7665a0e2..a6df7690c7e47 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3604,6 +3604,35 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx.SERDE != null) } + /** + * Create a [[CacheTable]] or [[CacheTableAsSelect]]. + * + * For example: + * {{{ + * CACHE [LAZY] TABLE multi_part_name + * [OPTIONS tablePropertyList] [[AS] query] + * }}} + */ + override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + val query = Option(ctx.query).map(plan) + val tableName = visitMultipartIdentifier(ctx.multipartIdentifier) + if (query.isDefined && tableName.length > 1) { + val catalogAndNamespace = tableName.init + throw new ParseException("It is not allowed to add catalog/namespace " + + s"prefix ${catalogAndNamespace.quoted} to " + + "the table name in CACHE TABLE AS SELECT", ctx) + } + val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) + val isLazy = ctx.LAZY != null + if (query.isDefined) { + CacheTableAsSelect(tableName.head, query.get, isLazy, options) + } else { + CacheTable(UnresolvedRelation(tableName), tableName, isLazy, options) + } + } + /** * Create a [[TruncateTable]] command. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 7d62dde67733b..1a37630a48461 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -711,7 +711,6 @@ case class TruncateTable( override def children: Seq[LogicalPlan] = child :: Nil } - /** * The logical plan of the SHOW PARTITIONS command. */ @@ -761,3 +760,21 @@ case class AlterViewUnsetProperties( ifExists: Boolean) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } + +/** + * The logical plan of the CACHE TABLE command. + */ +case class CacheTable( + table: LogicalPlan, + multipartIdentifier: Seq[String], + isLazy: Boolean, + options: Map[String, String]) extends Command + +/** + * The logical plan of the CACHE TABLE ... AS SELECT command. + */ +case class CacheTableAsSelect( + tempViewName: String, + plan: LogicalPlan, + isLazy: Boolean, + options: Map[String, String]) extends Command diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index af5e48d922a16..b860571df0791 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2001,6 +2001,37 @@ class DDLParserSuite extends AnalysisTest { asSerde = true)) } + test("CACHE TABLE") { + comparePlans( + parsePlan("CACHE TABLE a.b.c"), + CacheTable( + UnresolvedRelation(Seq("a", "b", "c")), Seq("a", "b", "c"), false, Map.empty)) + + comparePlans( + parsePlan("CACHE TABLE t AS SELECT * FROM testData"), + CacheTableAsSelect( + "t", + Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData"))), + false, + Map.empty)) + + comparePlans( + parsePlan("CACHE LAZY TABLE a.b.c"), + CacheTable( + UnresolvedRelation(Seq("a", "b", "c")), Seq("a", "b", "c"), true, Map.empty)) + + comparePlans( + parsePlan("CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')"), + CacheTable( + UnresolvedRelation(Seq("a", "b", "c")), + Seq("a", "b", "c"), + true, + Map("storageLevel" -> "DISK_ONLY"))) + + intercept("CACHE TABLE a.b.c AS SELECT * FROM testData", + "It is not allowed to add catalog/namespace prefix a.b") + } + test("TRUNCATE table") { comparePlans( parsePlan("TRUNCATE TABLE a.b.c"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 7a31b0dcdd43d..ba5874c21f6c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -192,31 +192,6 @@ class SparkSqlAstBuilder extends AstBuilder { unquotedPath } - /** - * Create a [[CacheTableCommand]]. - * - * For example: - * {{{ - * CACHE [LAZY] TABLE multi_part_name - * [OPTIONS tablePropertyList] [[AS] query] - * }}} - */ - override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - - val query = Option(ctx.query).map(plan) - val tableName = visitMultipartIdentifier(ctx.multipartIdentifier) - if (query.isDefined && tableName.length > 1) { - val catalogAndNamespace = tableName.init - throw new ParseException("It is not allowed to add catalog/namespace " + - s"prefix ${catalogAndNamespace.quoted} to " + - "the table name in CACHE TABLE AS SELECT", ctx) - } - val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) - CacheTableCommand(tableName, query, ctx.LAZY != null, options) - } - - /** * Create an [[UncacheTableCommand]] logical plan. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala index 3f0945d1e817b..3f85a1b0f99d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala @@ -17,57 +17,9 @@ package org.apache.spark.sql.execution.command -import java.util.Locale - -import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession} -import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan} -import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession} +import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper -import org.apache.spark.storage.StorageLevel - -case class CacheTableCommand( - multipartIdentifier: Seq[String], - plan: Option[LogicalPlan], - isLazy: Boolean, - options: Map[String, String]) extends RunnableCommand { - require(plan.isEmpty || multipartIdentifier.length == 1, - "Namespace name is not allowed in CACHE TABLE AS SELECT") - - override def innerChildren: Seq[QueryPlan[_]] = plan.toSeq - - override def run(sparkSession: SparkSession): Seq[Row] = { - val tableName = multipartIdentifier.quoted - plan.foreach { logicalPlan => - Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableName) - } - - val storageLevelKey = "storagelevel" - val storageLevelValue = - CaseInsensitiveMap(options).get(storageLevelKey).map(_.toUpperCase(Locale.ROOT)) - val withoutStorageLevel = options.filterKeys(_.toLowerCase(Locale.ROOT) != storageLevelKey) - if (withoutStorageLevel.nonEmpty) { - logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}") - } - - val table = sparkSession.table(tableName) - if (storageLevelValue.nonEmpty) { - sparkSession.sharedState.cacheManager.cacheQuery( - table, - Some(tableName), - StorageLevel.fromString(storageLevelValue.get)) - } else { - sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableName)) - } - - if (!isLazy) { - // Performs eager caching - table.count() - } - - Seq.empty[Row] - } -} case class UncacheTableCommand( multipartIdentifier: Seq[String], diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala new file mode 100644 index 0000000000000..85107dfc9b2ef --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import java.util.Locale + +import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper +import org.apache.spark.storage.StorageLevel + +trait BaseCacheTableExec extends V2CommandExec { + def relationName: String + def dataFrameToCache: DataFrame + def isLazy: Boolean + def options: Map[String, String] + + override def run(): Seq[InternalRow] = { + val storageLevelKey = "storagelevel" + val storageLevelValue = + CaseInsensitiveMap(options).get(storageLevelKey).map(_.toUpperCase(Locale.ROOT)) + val withoutStorageLevel = options.filterKeys(_.toLowerCase(Locale.ROOT) != storageLevelKey) + if (withoutStorageLevel.nonEmpty) { + logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}") + } + + val sparkSession = sqlContext.sparkSession + val df = dataFrameToCache + if (storageLevelValue.nonEmpty) { + sparkSession.sharedState.cacheManager.cacheQuery( + df, + Some(relationName), + StorageLevel.fromString(storageLevelValue.get)) + } else { + sparkSession.sharedState.cacheManager.cacheQuery(df, Some(relationName)) + } + + if (!isLazy) { + // Performs eager caching + df.count() + } + + Seq.empty + } + + override def output: Seq[Attribute] = Seq.empty +} + +case class CacheTableExec( + relation: LogicalPlan, + multipartIdentifier: Seq[String], + override val isLazy: Boolean, + override val options: Map[String, String]) extends BaseCacheTableExec { + override def relationName: String = multipartIdentifier.quoted + + override def dataFrameToCache: DataFrame = Dataset.ofRows(sqlContext.sparkSession, relation) +} + +case class CacheTableAsSelectExec( + tempViewName: String, + query: LogicalPlan, + override val isLazy: Boolean, + override val options: Map[String, String]) extends BaseCacheTableExec { + override def relationName: String = tempViewName + + override def dataFrameToCache: DataFrame = { + val sparkSession = sqlContext.sparkSession + Dataset.ofRows(sparkSession, query).createTempView(tempViewName) + sparkSession.table(tempViewName) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 37a4dcf081be4..7d278c33b97fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -344,6 +344,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case RepairTable(_: ResolvedTable) => throw new AnalysisException("MSCK REPAIR TABLE is not supported for v2 tables.") + case r: CacheTable => + CacheTableExec(r.table, r.multipartIdentifier, r.isLazy, r.options) :: Nil + + case r: CacheTableAsSelect => + CacheTableAsSelectExec(r.tempViewName, r.plan, r.isLazy, r.options) :: Nil + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 8e1e8f88f219f..bc570efb70bdf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -778,6 +778,7 @@ class DataSourceV2SQLSuite checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) + assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) sql(s"DROP TABLE $t") assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) } @@ -2619,15 +2620,11 @@ class DataSourceV2SQLSuite "ALTER VIEW ... UNSET TBLPROPERTIES") } - private def testNotSupportedV2Command( - sqlCommand: String, - sqlParams: String, - sqlCommandInMessage: Option[String] = None): Unit = { + private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { val e = intercept[AnalysisException] { sql(s"$sqlCommand $sqlParams") } - val cmdStr = sqlCommandInMessage.getOrElse(sqlCommand) - assert(e.message.contains(s"$cmdStr is not supported for v2 tables")) + assert(e.message.contains(s"$sqlCommand is not supported for v2 tables")) } private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 81ba09f206b92..009c5b3705d2f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -339,35 +339,6 @@ class SparkSqlParserSuite extends AnalysisTest { "LINES TERMINATED BY only supports newline '\\n' right now") } - test("CACHE TABLE") { - assertEqual( - "CACHE TABLE a.b.c", - CacheTableCommand(Seq("a", "b", "c"), None, false, Map.empty)) - - assertEqual( - "CACHE TABLE t AS SELECT * FROM testData", - CacheTableCommand( - Seq("t"), - Some(Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData")))), - false, - Map.empty)) - - assertEqual( - "CACHE LAZY TABLE a.b.c", - CacheTableCommand(Seq("a", "b", "c"), None, true, Map.empty)) - - assertEqual( - "CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')", - CacheTableCommand( - Seq("a", "b", "c"), - None, - true, - Map("storageLevel" -> "DISK_ONLY"))) - - intercept("CACHE TABLE a.b.c AS SELECT * FROM testData", - "It is not allowed to add catalog/namespace prefix a.b") - } - test("UNCACHE TABLE") { assertEqual( "UNCACHE TABLE a.b.c", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index e996f2c6ec78f..ff5b9e453a482 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -39,10 +39,9 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} +import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} -import org.apache.spark.sql.execution.command.CacheTableCommand import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf, WithTestConf} @@ -597,7 +596,7 @@ private[hive] class TestHiveQueryExecution( override lazy val analyzed: LogicalPlan = sparkSession.withActive { val describedTables = logical match { - case CacheTableCommand(tbl, _, _, _) => tbl.asTableIdentifier :: Nil + case CacheTable(_, tbl, _, _) => tbl.asTableIdentifier :: Nil case _ => Nil } From 8b97b19ffad7ec78e4b1f05cb1168ef79dc647b2 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 11 Dec 2020 12:48:40 +0000 Subject: [PATCH 0736/1009] [SPARK-33706][SQL] Require fully specified partition identifier in partitionExists() ### What changes were proposed in this pull request? 1. Check that the partition identifier passed to `SupportsPartitionManagement.partitionExists()` is fully specified (specifies all values of partition fields). 2. Remove the custom implementation of `partitionExists()` from `InMemoryPartitionTable`, and re-use the default implementation from `SupportsPartitionManagement`. ### Why are the changes needed? The method is supposed to check existence of one partition but currently it can return `true` for partially specified partition. This can lead to incorrect commands behavior, for instance the commands could modify or place data in the middle of partition path. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running existing test suites: ``` $ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *SupportsPartitionManagementSuite" ``` Closes #30667 from MaxGekk/check-len-partitionExists. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../catalog/SupportsPartitionManagement.java | 14 +++++++---- .../connector/InMemoryPartitionTable.scala | 3 --- .../SupportsPartitionManagementSuite.scala | 23 +++++++++++++++++-- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index 9d898f2f477e1..cf86c44e9563b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -17,7 +17,6 @@ package org.apache.spark.sql.connector.catalog; -import java.util.Arrays; import java.util.Map; import org.apache.spark.annotation.Experimental; @@ -76,13 +75,18 @@ void createPartition( /** * Test whether a partition exists using an {@link InternalRow ident} from the table. * - * @param ident a partition identifier + * @param ident a partition identifier which must contain all partition fields in order * @return true if the partition exists, false otherwise */ default boolean partitionExists(InternalRow ident) { - String[] partitionNames = partitionSchema().names(); - String[] requiredNames = Arrays.copyOfRange(partitionNames, 0, ident.numFields()); - return listPartitionIdentifiers(requiredNames, ident).length > 0; + String[] partitionNames = partitionSchema().names(); + if (ident.numFields() == partitionNames.length) { + return listPartitionIdentifiers(partitionNames, ident).length > 0; + } else { + throw new IllegalArgumentException("The number of fields (" + ident.numFields() + + ") in the partition identifier is not equal to the partition schema length (" + + partitionNames.length + "). The identifier might not refer to one partition."); + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index 6a8432e635310..e29c78c59f769 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -83,9 +83,6 @@ class InMemoryPartitionTable( } } - override def partitionExists(ident: InternalRow): Boolean = - memoryTablePartitions.containsKey(ident) - override protected def addPartitionKey(key: Seq[Any]): Unit = { memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala index 9de0fe6108c99..dc2df546d6bfd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala @@ -145,7 +145,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { assert(!hasPartitions(partTable)) } - test("listPartitionByNames") { + private def createMultiPartTable(): InMemoryPartitionTable = { val partCatalog = new InMemoryPartitionTableCatalog partCatalog.initialize("test", CaseInsensitiveStringMap.empty()) val table = partCatalog.createTable( @@ -156,8 +156,8 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { .add("part1", StringType), Array(LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))), util.Collections.emptyMap[String, String]) - val partTable = table.asInstanceOf[InMemoryPartitionTable] + val partTable = table.asInstanceOf[InMemoryPartitionTable] Seq( InternalRow(0, "abc"), InternalRow(0, "def"), @@ -165,6 +165,12 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { partTable.createPartition(partIdent, new util.HashMap[String, String]()) } + partTable + } + + test("listPartitionByNames") { + val partTable = createMultiPartTable() + Seq( (Array("part0", "part1"), InternalRow(0, "abc")) -> Set(InternalRow(0, "abc")), (Array("part0"), InternalRow(0)) -> Set(InternalRow(0, "abc"), InternalRow(0, "def")), @@ -185,4 +191,17 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { intercept[AssertionError](partTable.listPartitionIdentifiers(names, idents)) } } + + test("partitionExists") { + val partTable = createMultiPartTable() + + assert(partTable.partitionExists(InternalRow(0, "def"))) + assert(!partTable.partitionExists(InternalRow(-1, "def"))) + assert(!partTable.partitionExists(InternalRow("abc", "def"))) + + val errMsg = intercept[IllegalArgumentException] { + partTable.partitionExists(InternalRow(0)) + }.getMessage + assert(errMsg.contains("The identifier might not refer to one partition")) + } } From 5bab27e00bcad31400c952149ffd0389f841a992 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Sat, 12 Dec 2020 00:52:33 +0900 Subject: [PATCH 0737/1009] [SPARK-33526][SQL] Add config to control if cancel invoke interrupt task on thriftserver ### What changes were proposed in this pull request? This PR add a new config `spark.sql.thriftServer.forceCancel` to give user a way to interrupt task when cancel statement. ### Why are the changes needed? After [#29933](https://github.com/apache/spark/pull/29933), we support cancel query if timeout, but the default behavior of `SparkContext.cancelJobGroups` won't interrupt task and just let task finish by itself. In some case it's dangerous, e.g., data skew or exists a heavily shuffle. A task will hold in a long time after do cancel and the resource will not release. ### Does this PR introduce _any_ user-facing change? Yes, a new config. ### How was this patch tested? Add test. Closes #30481 from ulysses-you/SPARK-33526. Lead-authored-by: ulysses-you Co-authored-by: ulysses-you Signed-off-by: HyukjinKwon --- .../apache/spark/sql/internal/SQLConf.scala | 12 +++++- .../SparkExecuteStatementOperation.scala | 6 ++- .../ThriftServerWithSparkContextSuite.scala | 37 +++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 11fe6c7894f76..2220d6f441e8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -922,13 +922,23 @@ object SQLConf { .booleanConf .createWithDefault(false) + val THRIFTSERVER_FORCE_CANCEL = + buildConf("spark.sql.thriftServer.interruptOnCancel") + .doc("When true, all running tasks will be interrupted if one cancels a query. " + + "When false, all running tasks will remain until finished.") + .version("3.2.0") + .booleanConf + .createWithDefault(false) + val THRIFTSERVER_QUERY_TIMEOUT = buildConf("spark.sql.thriftServer.queryTimeout") .doc("Set a query duration timeout in seconds in Thrift Server. If the timeout is set to " + "a positive value, a running query will be cancelled automatically when the timeout is " + "exceeded, otherwise the query continues to run till completion. If timeout values are " + "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " + - "than this configuration value, they take precedence.") + "than this configuration value, they take precedence. If you set this timeout and prefer" + + "to cancel the queries right away without waiting task to finish, consider enabling" + + s"${THRIFTSERVER_FORCE_CANCEL.key} together.") .version("3.1.0") .timeConf(TimeUnit.SECONDS) .createWithDefault(0L) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index c4ae035e1f836..8ca0ab91a73f7 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -63,6 +63,8 @@ private[hive] class SparkExecuteStatementOperation( } } + private val forceCancel = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL) + private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) { new VariableSubstitution().substitute(statement) } @@ -125,7 +127,7 @@ private[hive] class SparkExecuteStatementOperation( def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withLocalProperties { try { - sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement) + sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel) getNextRowSetInternal(order, maxRowsL) } finally { sqlContext.sparkContext.clearJobGroup() @@ -284,7 +286,7 @@ private[hive] class SparkExecuteStatementOperation( parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader) } - sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement) + sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel) result = sqlContext.sql(statement) logDebug(result.queryExecution.toString()) HiveThriftServer2.eventManager.onStatementParsed(statementId, diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index fd3a638c4fa44..036eb5850695e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -18,9 +18,14 @@ package org.apache.spark.sql.hive.thriftserver import java.sql.SQLException +import java.util.concurrent.atomic.AtomicBoolean import org.apache.hive.service.cli.HiveSQLException +import org.apache.spark.TaskKilled +import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} +import org.apache.spark.sql.internal.SQLConf + trait ThriftServerWithSparkContextSuite extends SharedThriftServer { test("the scratch dir will be deleted during server start but recreated with new operation") { @@ -79,6 +84,38 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { "java.lang.NumberFormatException: invalid input syntax for type numeric: 1.2")) } } + + test("SPARK-33526: Add config to control if cancel invoke interrupt task on thriftserver") { + withJdbcStatement { statement => + val forceCancel = new AtomicBoolean(false) + val listener = new SparkListener { + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + assert(taskEnd.reason.isInstanceOf[TaskKilled]) + if (forceCancel.get()) { + assert(System.currentTimeMillis() - taskEnd.taskInfo.launchTime < 1000) + } else { + // avoid accuracy, we check 2s instead of 3s. + assert(System.currentTimeMillis() - taskEnd.taskInfo.launchTime >= 2000) + } + } + } + + spark.sparkContext.addSparkListener(listener) + try { + statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1") + Seq(true, false).foreach { force => + statement.execute(s"SET ${SQLConf.THRIFTSERVER_FORCE_CANCEL.key}=$force") + forceCancel.set(force) + val e1 = intercept[SQLException] { + statement.execute("select java_method('java.lang.Thread', 'sleep', 3000L)") + }.getMessage + assert(e1.contains("Query timed out")) + } + } finally { + spark.sparkContext.removeSparkListener(listener) + } + } + } } From 29cc5b3f235ff178cf888f16877e6e0fd44253cc Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 12 Dec 2020 00:53:31 +0900 Subject: [PATCH 0738/1009] [MINOR][INFRA] Add kubernetes-integration-tests to GitHub Actions for Scala 2.13 build ### What changes were proposed in this pull request? This PR adds `kubernetes-integration-tests` to GitHub Actions for Scala 2.13 build. ### Why are the changes needed? Now that the build pass with `kubernetes-integration-tests` and Scala 2.13, it's better to keep it build-able. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done by GitHub Actions. I also confirmed that the build passes with the following command. ``` $ build/sbt -Pscala-2.13 -Pkubernetes -Pkubernetes-integration-tests compile test:compile ``` Closes #30731 from sarutak/github-actions-k8s. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e40d6362fd23f..426401203fc77 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -414,7 +414,7 @@ jobs: - name: Build with SBT run: | ./dev/change-scala-version.sh 2.13 - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pscala-2.13 compile test:compile + ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pscala-2.13 compile test:compile hadoop-2: name: Hadoop 2 build with SBT From fb2e3af4b5d92398d57e61b766466cc7efd9d7cb Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 12 Dec 2020 00:54:40 +0900 Subject: [PATCH 0739/1009] [SPARK-33757][INFRA][R] Fix the R dependencies build error on GitHub Actions and AppVeyor ### What changes were proposed in this pull request? This PR fixes the R dependencies build error on GitHub Actions and AppVeyor. The reason seems that `usethis` package is updated 2020/12/10. https://cran.r-project.org/web/packages/usethis/index.html ### Why are the changes needed? To keep the build clean. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be done by GitHub Actions. Closes #30737 from sarutak/fix-r-dependencies-build-error. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 3 +++ appveyor.yml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 426401203fc77..30199eaa41999 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -336,6 +336,9 @@ jobs: - name: Install R linter dependencies and SparkR run: | sudo apt-get install -y libcurl4-openssl-dev + # dependencies for usethis 1.6.3. + sudo Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')" + sudo Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')" sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" ./R/install-dev.sh diff --git a/appveyor.yml b/appveyor.yml index c40b23c8341eb..b6a42a02d1ac9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -41,6 +41,9 @@ cache: install: # Install maven and dependencies - ps: .\dev\appveyor-install-dependencies.ps1 + # usethis and its dependencies + - cmd: Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')" + - cmd: Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')" # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat. - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')" - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]" From be09d37398f6b62c853e961df64b94b34fd3389d Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 11 Dec 2020 14:43:51 -0800 Subject: [PATCH 0740/1009] [SPARK-33729][SQL] When refreshing cache, Spark should not use cached plan when recaching data ### What changes were proposed in this pull request? This fixes `CatalogImpl.refreshTable` by using a new logical plan when recache the target table. ### Why are the changes needed? In `CatalogImpl.refreshTable`, we currently recache the target table via: ```scala sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel) ``` However, here `table` is generated before the `tableRelationCache` in `SessionCatalog` is invalidated, and therefore it still refers to old and staled logical plan, which is incorrect. ### Does this PR introduce _any_ user-facing change? Yes, this fix behavior when a table is refreshed. ### How was this patch tested? Added a unit test. Closes #30699 from sunchao/SPARK-33729. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../spark/sql/internal/CatalogImpl.scala | 6 +++- .../apache/spark/sql/CachedTableSuite.scala | 33 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 3e216415c2815..8008a21804f7b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -538,8 +538,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel + // creates a new logical plan since the old table refers to old relation which + // should be refreshed + val newTable = sparkSession.table(tableIdent) + // recache with the same name and cache level. - sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel) + sparkSession.sharedState.cacheManager.cacheQuery(newTable, cacheName, cacheLevel) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 3765093f83bc2..a3a6d6721c993 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql +import java.io.{File, FilenameFilter} +import java.nio.file.{Files, Paths} + import scala.collection.mutable.HashSet import scala.concurrent.duration._ @@ -1239,4 +1242,34 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } } + + test("SPARK-33729: REFRESH TABLE should not use cached/stale plan") { + def moveParquetFiles(src: File, dst: File): Unit = { + src.listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = name.endsWith("parquet") + }).foreach { f => + Files.move(f.toPath, Paths.get(dst.getAbsolutePath, f.getName)) + } + // cleanup the rest of the files + src.listFiles().foreach(_.delete()) + src.delete() + } + + withTable("t") { + withTempDir { dir => + val path1 = new File(dir, "path1") + Seq((1 -> "a")).toDF("i", "j").write.parquet(path1.getCanonicalPath) + moveParquetFiles(path1, dir) + sql(s"CREATE TABLE t (i INT, j STRING) USING parquet LOCATION '${dir.toURI}'") + sql("CACHE TABLE t") + checkAnswer(sql("SELECT * FROM t"), Row(1, "a") :: Nil) + + val path2 = new File(dir, "path2") + Seq(2 -> "b").toDF("i", "j").write.parquet(path2.getCanonicalPath) + moveParquetFiles(path2, dir) + sql("REFRESH TABLE t") + checkAnswer(sql("SELECT * FROM t"), Row(1, "a") :: Row(2, "b") :: Nil) + } + } + } } From e2cdfcebd9b39a1104b34d8eafafbcdc6acf5d3e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Sun, 13 Dec 2020 10:41:47 +0900 Subject: [PATCH 0741/1009] [SPARK-32447][CORE][PYTHON][FOLLOW-UP] Fix other occurrences of 'python' to 'python3' ### What changes were proposed in this pull request? This PR proposes to change python to python3 in several places missed. ### Why are the changes needed? To use Python 3 by default safely. ### Does this PR introduce _any_ user-facing change? Yes, it will uses `python3` as its default Python interpreter. ### How was this patch tested? It was tested together in https://github.com/apache/spark/pull/30735. The test cases there will verify this change together. Closes #30750 from HyukjinKwon/SPARK-32447. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala | 2 +- .../org/apache/spark/launcher/SparkSubmitCommandBuilder.java | 2 +- python/pyspark/context.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 7ad92da4e055a..c3f73ed745da4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -44,7 +44,7 @@ object PythonRunner { .orElse(sparkConf.get(PYSPARK_PYTHON)) .orElse(sys.env.get("PYSPARK_DRIVER_PYTHON")) .orElse(sys.env.get("PYSPARK_PYTHON")) - .getOrElse("python") + .getOrElse("python3") // Format python file paths before adding them to the PYTHONPATH val formattedPythonFile = formatPath(pythonFile) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index d6ed1e3a3532d..b2c12973bcabd 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -336,7 +336,7 @@ private List buildPySparkShellCommand(Map env) throws IO conf.get(SparkLauncher.PYSPARK_PYTHON), System.getenv("PYSPARK_DRIVER_PYTHON"), System.getenv("PYSPARK_PYTHON"), - "python")); + "python3")); String pyOpts = System.getenv("PYSPARK_DRIVER_PYTHON_OPTS"); if (conf.containsKey(SparkLauncher.PYSPARK_PYTHON)) { // pass conf spark.pyspark.python to python by environment variable. diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 3da535b026137..79fdd22ab13fd 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -227,7 +227,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, os.environ["SPARK_BUFFER_SIZE"] = \ str(self._jvm.PythonUtils.getSparkBufferSize(self._jsc)) - self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python') + self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python3') self.pythonVer = "%d.%d" % sys.version_info[:2] # Broadcast's __reduce__ method stores Broadcast instances here. From 0277fddaef17b615354c735a2c89cdced5f1d8f6 Mon Sep 17 00:00:00 2001 From: linzebing Date: Sun, 13 Dec 2020 22:00:05 +0900 Subject: [PATCH 0742/1009] [MINOR][UI] Correct JobPage's skipped/pending tableHeaderId ### What changes were proposed in this pull request? Current Spark Web UI job page's header link of pending/skipped stages is inconsistent with their statuses. See the picture below: ![image](https://user-images.githubusercontent.com/9404831/101998894-1e843180-3c8c-11eb-8d94-10df9edb68e7.png) ### Why are the changes needed? The code determining the `pendingOrSkippedTableId` has the wrong logic. As explained in the code: > If the job is completed, then any pending stages are displayed as "skipped" [code pointer](https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala#L266) This PR fixes the logic for `pendingOrSkippedTableId` which aligns with the stage statuses. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Verified that header link is consistent with stage status with the fix. Closes #30749 from linzebing/ui_bug. Authored-by: linzebing Signed-off-by: Kousuke Saruta --- core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala index c40e1bc248a49..1dfbce82c852b 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -284,9 +284,9 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP val pendingOrSkippedTableId = if (isComplete) { - "pending" - } else { "skipped" + } else { + "pending" } val activeStagesTable = From 99848e530f8528283bb21afac2f89984924f2235 Mon Sep 17 00:00:00 2001 From: Nicholas Marion Date: Sun, 13 Dec 2020 14:36:54 -0800 Subject: [PATCH 0743/1009] [SPARK-33762][BUILD] Upgrade commons-codec to 1.15 ### What changes were proposed in this pull request? ### Why are the changes needed? Open Source scans are reporting a potential encoding/decoding issue related to versions of commons-codec prior to 1.13. Commit referenced: https://github.com/apache/commons-codec/commit/48b615756d1d770091ea3322eefc08011ee8b113 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #30740 from n-marion/SPARK-33762_upgrade-commons-codec. Authored-by: Nicholas Marion Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 3f1199478bc67..03ea28271b683 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -33,7 +33,7 @@ chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar -commons-codec/1.10//commons-codec-1.10.jar +commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.20//commons-compress-1.20.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index d16235339897e..6dd7f87ba1578 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -30,7 +30,7 @@ chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar -commons-codec/1.10//commons-codec-1.10.jar +commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.20//commons-compress-1.20.jar diff --git a/pom.xml b/pom.xml index 8aaa4a504ef0c..f087dba9abb00 100644 --- a/pom.xml +++ b/pom.xml @@ -173,7 +173,7 @@ 2.10.5.1 1.1.8.2 1.1.2 - 1.10 + 1.15 1.20 2.5 From 01b73ae6388279514d61c14a9dc9718a34dad465 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 13 Dec 2020 14:40:55 -0800 Subject: [PATCH 0744/1009] [SPARK-33766][BUILD] Upgrade Jackson to 2.11.4 ### What changes were proposed in this pull request? This pr upgrade Jackson to 2.11.4. Jackson Release 2.11: https://github.com/FasterXML/jackson/wiki/Jackson-Release-2.11 ### Why are the changes needed? Make it easy to upgrade dependency because Jackson 2.10 is not compatible with 2.11: ``` com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.10.5 requires Jackson Databind version >= 2.10.0 and < 2.11.0 ``` [Avro](https://issues.apache.org/jira/browse/AVRO-2967) has upgraded Jackson to 2.11.3. [Parquet](https://issues.apache.org/jira/browse/PARQUET-1895) has upgraded Jackson to 2.11.2. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test. Closes #30746 from wangyum/SPARK-33766. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 14 +++++++------- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 14 +++++++------- pom.xml | 5 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 03ea28271b683..c2caef3ae58d9 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -104,17 +104,17 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar -jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar +jackson-annotations/2.11.4//jackson-annotations-2.11.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.10.5//jackson-core-2.10.5.jar -jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar -jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar +jackson-core/2.11.4//jackson-core-2.11.4.jar +jackson-databind/2.11.4//jackson-databind-2.11.4.jar +jackson-dataformat-yaml/2.11.4//jackson-dataformat-yaml-2.11.4.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar -jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar -jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar +jackson-module-jaxb-annotations/2.11.4//jackson-module-jaxb-annotations-2.11.4.jar +jackson-module-paranamer/2.11.4//jackson-module-paranamer-2.11.4.jar +jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 6dd7f87ba1578..87e7a3c2ae1a7 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -103,18 +103,18 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.12//httpcore-4.4.12.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.4.0//ivy-2.4.0.jar -jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar +jackson-annotations/2.11.4//jackson-annotations-2.11.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.10.5//jackson-core-2.10.5.jar -jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar -jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar +jackson-core/2.11.4//jackson-core-2.11.4.jar +jackson-databind/2.11.4//jackson-databind-2.11.4.jar +jackson-dataformat-yaml/2.11.4//jackson-dataformat-yaml-2.11.4.jar jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar -jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar -jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar +jackson-module-jaxb-annotations/2.11.4//jackson-module-jaxb-annotations-2.11.4.jar +jackson-module-paranamer/2.11.4//jackson-module-paranamer-2.11.4.jar +jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar diff --git a/pom.xml b/pom.xml index f087dba9abb00..09d48a6592ab9 100644 --- a/pom.xml +++ b/pom.xml @@ -169,8 +169,7 @@ true 1.9.13 - 2.10.5 - 2.10.5.1 + 2.11.4 1.1.8.2 1.1.2 1.15 @@ -774,7 +773,7 @@ com.fasterxml.jackson.core jackson-databind - ${fasterxml.jackson-databind.version} + ${fasterxml.jackson.version} com.fasterxml.jackson.core From 94bc2d61a2598d995df8eb79fe450b0e5f6d7582 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 13 Dec 2020 14:52:26 -0800 Subject: [PATCH 0745/1009] [SPARK-33589][SQL][FOLLOWUP] Replace Throwable with NonFatal ### What changes were proposed in this pull request? This pr replace `Throwable` with `NonFatal`. ### Why are the changes needed? Improve code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #30744 from wangyum/SPARK-33589-2. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../sql/hive/thriftserver/SparkSQLSessionManager.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala index 0c092abb37f3e..89aaa31c35790 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.thriftserver +import scala.util.control.NonFatal + import org.apache.hadoop.hive.conf.HiveConf import org.apache.hive.service.cli.{HiveSQLException, SessionHandle} import org.apache.hive.service.cli.session.SessionManager @@ -73,12 +75,12 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } catch { - case e: Exception => + case NonFatal(e) => try { closeSession(sessionHandle) } catch { - case t: Throwable => - logWarning("Error closing session", t) + case NonFatal(inner) => + logWarning("Error closing session", inner) } throw new HiveSQLException("Failed to open new session: " + e, e) } From 45af3c96889eba1958055206f10524299d0be61c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 13 Dec 2020 14:57:09 -0800 Subject: [PATCH 0746/1009] [SPARK-33764][SS] Make state store maintenance interval as SQL config ### What changes were proposed in this pull request? Currently the maintenance interval is hard-coded in `StateStore`. This patch proposes to make it as SQL config. ### Why are the changes needed? Currently the maintenance interval is hard-coded in `StateStore`. For consistency reason, it should be placed together with other SS configs together. SQLConf also has a better way to have doc and default value setting. ### Does this PR introduce _any_ user-facing change? Yes. Previously users use Spark config to set the maintenance interval. Now they could use SQL config to set it. ### How was this patch tested? Unit test. Closes #30741 from viirya/maintenance-interval-sqlconfig. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/internal/SQLConf.scala | 13 ++++++++++ .../streaming/state/StateStore.scala | 26 ++++++++----------- .../streaming/state/StateStoreConf.scala | 3 +++ .../streaming/state/StateStoreSuite.scala | 4 +-- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2220d6f441e8e..078928391f560 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1378,6 +1378,17 @@ object SQLConf { .intConf .createWithDefault(2) + val STREAMING_MAINTENANCE_INTERVAL = + buildConf("spark.sql.streaming.stateStore.maintenanceInterval") + .internal() + .doc("The interval in milliseconds between triggering maintenance tasks in StateStore. " + + "The maintenance task executes background maintenance task in all the loaded store " + + "providers if they are still the active instances according to the coordinator. If not, " + + "inactive instances of store providers will be closed.") + .version("2.0.0") + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefault(TimeUnit.MINUTES.toMillis(1)) // 1 minute + val STATE_STORE_COMPRESSION_CODEC = buildConf("spark.sql.streaming.stateStore.compression.codec") .internal() @@ -3218,6 +3229,8 @@ class SQLConf extends Serializable with Logging { def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY) + def streamingMaintenanceInterval: Long = getConf(STREAMING_MAINTENANCE_INTERVAL) + def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC) def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index ab67c19783ff7..f87a2fb30cddc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -385,8 +385,6 @@ class UnsafeRowPair(var key: UnsafeRow = null, var value: UnsafeRow = null) { */ object StateStore extends Logging { - val MAINTENANCE_INTERVAL_CONFIG = "spark.sql.streaming.stateStore.maintenanceInterval" - val MAINTENANCE_INTERVAL_DEFAULT_SECS = 60 val PARTITION_ID_TO_CHECK_SCHEMA = 0 @GuardedBy("loadedProviders") @@ -471,7 +469,7 @@ object StateStore extends Logging { storeConf: StateStoreConf, hadoopConf: Configuration): StateStoreProvider = { loadedProviders.synchronized { - startMaintenanceIfNeeded() + startMaintenanceIfNeeded(storeConf) if (storeProviderId.storeId.partitionId == PARTITION_ID_TO_CHECK_SCHEMA) { val result = schemaValidated.getOrElseUpdate(storeProviderId, { @@ -534,19 +532,17 @@ object StateStore extends Logging { } /** Start the periodic maintenance task if not already started and if Spark active */ - private def startMaintenanceIfNeeded(): Unit = loadedProviders.synchronized { - val env = SparkEnv.get - if (env != null && !isMaintenanceRunning) { - val periodMs = env.conf.getTimeAsMs( - MAINTENANCE_INTERVAL_CONFIG, s"${MAINTENANCE_INTERVAL_DEFAULT_SECS}s") - maintenanceTask = new MaintenanceTask( - periodMs, - task = { doMaintenance() }, - onError = { loadedProviders.synchronized { loadedProviders.clear() } } - ) - logInfo("State Store maintenance task started") + private def startMaintenanceIfNeeded(storeConf: StateStoreConf): Unit = + loadedProviders.synchronized { + if (SparkEnv.get != null && !isMaintenanceRunning) { + maintenanceTask = new MaintenanceTask( + storeConf.maintenanceInterval, + task = { doMaintenance() }, + onError = { loadedProviders.synchronized { loadedProviders.clear() } } + ) + logInfo("State Store maintenance task started") + } } - } /** * Execute background maintenance task in all the loaded store providers if they are still diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala index 23cb3be32c85a..58af8272d1c09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala @@ -58,6 +58,9 @@ class StateStoreConf( /** whether to validate state schema during query run. */ val stateSchemaCheckEnabled = sqlConf.isStateSchemaCheckEnabled + /** The interval of maintenance tasks. */ + val maintenanceInterval = sqlConf.streamingMaintenanceInterval + /** * Additional configurations related to state store. This will capture all configs in * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index 0c2083ab98ade..d4cd3cdc39fd7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -390,8 +390,6 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] val conf = new SparkConf() .setMaster("local") .setAppName("test") - // Make maintenance thread do snapshots and cleanups very fast - .set(StateStore.MAINTENANCE_INTERVAL_CONFIG, "10ms") // Make sure that when SparkContext stops, the StateStore maintenance thread 'quickly' // fails to talk to the StateStoreCoordinator and unloads all the StateStores .set(RPC_NUM_RETRIES, 1) @@ -400,6 +398,8 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] val storeProviderId = StateStoreProviderId(StateStoreId(dir, opId, 0), UUID.randomUUID) val sqlConf = new SQLConf() sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2) + // Make maintenance thread do snapshots and cleanups very fast + sqlConf.setConf(SQLConf.STREAMING_MAINTENANCE_INTERVAL, 10L) val storeConf = StateStoreConf(sqlConf) val hadoopConf = new Configuration() val provider = newStoreProvider(storeProviderId.storeId) From 8197ee3b15265d39f05f192934b7d7e661713eaa Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 13 Dec 2020 15:04:23 -0800 Subject: [PATCH 0747/1009] [SPARK-33690][SQL] Escape meta-characters in showString ### What changes were proposed in this pull request? This PR intends to escape meta-characters (e.g., \n and \t) in `Dataset.showString`. Before this PR: ``` scala> Seq("aaa\nbbb\t\tccccc").toDF("value").show() +--------------+ | value| +--------------+ |aaa bbb ccccc| +--------------+ ``` After this PR: ``` +-----------------+ | value| +-----------------+ |aaa\nbbb\t\tccccc| +-----------------+ ``` ### Why are the changes needed? For better output. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a unit test. Closes #30647 from maropu/EscapeMetaInShow. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- docs/sql-migration-guide.md | 2 + .../scala/org/apache/spark/sql/Dataset.scala | 4 +- .../org/apache/spark/sql/DataFrameSuite.scala | 38 +++++++++++++++++++ .../org/apache/spark/sql/ExplainSuite.scala | 8 ++-- 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 164bfd42d6e4a..484823b7c07ab 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -26,6 +26,8 @@ license: | - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`. + - In Spark 3.2, the meta-characters `\n` and `\t` are escaped in the `show()` action. In Spark 3.1 or earlier, the two metacharacters are output as it is. + ## Upgrading from Spark SQL 3.0 to 3.1 - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 6afbbce3ff8d4..5c273591360cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -308,7 +308,9 @@ class Dataset[T] private[sql]( val str = cell match { case null => "null" case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]") - case _ => cell.toString + case _ => + // Escapes meta-characters not to break the `showString` format + cell.toString.replaceAll("\n", "\\\\n").replaceAll("\t", "\\\\t") } if (truncate > 0 && str.length > truncate) { // do not show ellipses for strings shorter than 4 characters. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 4fecd625031ba..d777cd45b61ee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1235,6 +1235,44 @@ class DataFrameSuite extends QueryTest assert(df.showString(10, vertical = true) === expectedAnswer) } + test("SPARK-33690: showString: escape meta-characters") { + val df1 = Seq("aaa\nbbb\tccc").toDF("value") + assert(df1.showString(1, truncate = 0) === + """+-------------+ + ||value | + |+-------------+ + ||aaa\nbbb\tccc| + |+-------------+ + |""".stripMargin) + + val df2 = Seq(Seq("aaa\nbbb\tccc")).toDF("value") + assert(df2.showString(1, truncate = 0) === + """+---------------+ + ||value | + |+---------------+ + ||[aaa\nbbb\tccc]| + |+---------------+ + |""".stripMargin) + + val df3 = Seq(Map("aaa\nbbb\tccc" -> "aaa\nbbb\tccc")).toDF("value") + assert(df3.showString(1, truncate = 0) === + """+--------------------------------+ + ||value | + |+--------------------------------+ + ||{aaa\nbbb\tccc -> aaa\nbbb\tccc}| + |+--------------------------------+ + |""".stripMargin) + + val df4 = Seq("aaa\nbbb\tccc").toDF("value").selectExpr("named_struct('v', value)") + assert(df4.showString(1, truncate = 0) === + """+----------------------+ + ||named_struct(v, value)| + |+----------------------+ + ||{aaa\nbbb\tccc} | + |+----------------------+ + |""".stripMargin) + } + test("SPARK-7319 showString") { val expectedAnswer = """+---+-----+ ||key|value| diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index ddc4f1dab8e63..7d3285da25a5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -261,11 +261,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite "PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " + "IN subquery#x\\)\\]" val expected_pattern3 = - "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" + - "/df2/.*, ... 99 entries\\]" + "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" + + "/df2/\\S*, ... 99 entries\\]" val expected_pattern4 = - "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" + - "/df1/.*, ... 999 entries\\]" + "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" + + "/df1/\\S*, ... 999 entries\\]" withNormalizedExplain(sqlText) { normalizedOutput => assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1) assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1) From 6e862792fbc6c0916ad04f1c23dc4acbc5f5a53b Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 14 Dec 2020 10:22:37 +0900 Subject: [PATCH 0748/1009] [SPARK-33723][SQL] ANSI mode: Casting String to Date should throw exception on parse error ### What changes were proposed in this pull request? Currently, when casting a string as timestamp type in ANSI mode, Spark throws a runtime exception on parsing error. However, the result for casting a string to date is always null. We should throw an exception on parsing error as well. ### Why are the changes needed? Add missing feature for ANSI mode ### Does this PR introduce _any_ user-facing change? Yes for ANSI mode, Casting string to date will throw an exception on parsing error ### How was this patch tested? Unit test Closes #30687 from gengliangwang/castDate. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/expressions/Cast.scala | 27 ++++++++---- .../sql/catalyst/util/DateTimeUtils.scala | 11 +++-- .../sql/catalyst/expressions/CastSuite.scala | 41 +++++++++++++++---- .../resources/sql-tests/inputs/datetime.sql | 5 ++- .../sql-tests/results/ansi/datetime.sql.out | 11 ++++- .../sql-tests/results/datetime-legacy.sql.out | 10 ++++- .../sql-tests/results/datetime.sql.out | 10 ++++- 8 files changed, 92 insertions(+), 24 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 08ba07aa8de63..8201fd707275d 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -163,6 +163,7 @@ The behavior of some SQL operators can be different under ANSI mode (`spark.sql. - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map. - `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed. + - `CAST(string_col AS DATE)`: This operator should fail with an exception if the input string can't be parsed. ### SQL Keywords diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 72bd9ca4d3d1c..e1ece732cf15d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -499,7 +499,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // DateConverter private[this] def castToDate(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull) + if (ansiEnabled) { + buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, zoneId)) + } else { + buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull) + } case TimestampType => // throw valid precision more than seconds, according to Hive. // Timestamp.nanos is in 0 to 999,999,999, no more than a second. @@ -1135,15 +1139,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) val zid = getZoneId() (c, evPrim, evNull) => - code""" - scala.Option $intOpt = - org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid); - if ($intOpt.isDefined()) { - $evPrim = ((Integer) $intOpt.get()).intValue(); + if (ansiEnabled) { + code""" + $evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDateAnsi($c, $zid); + """ } else { - $evNull = true; + code""" + scala.Option $intOpt = + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid); + if ($intOpt.isDefined()) { + $evPrim = ((Integer) $intOpt.get()).intValue(); + } else { + $evNull = true; + } + """ } - """ + case TimestampType => val zid = getZoneId() (c, evPrim, evNull) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 0543ef99f8947..780d2bad1bab2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -365,11 +365,8 @@ object DateTimeUtils { } def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = { - val timestamp = stringToTimestamp(s, timeZoneId) - if (timestamp.isEmpty) { + stringToTimestamp(s, timeZoneId).getOrElse { throw new DateTimeException(s"Cannot cast $s to TimestampType.") - } else { - timestamp.get } } @@ -466,6 +463,12 @@ object DateTimeUtils { } } + def stringToDateAnsi(s: UTF8String, zoneId: ZoneId): Int = { + stringToDate(s, zoneId).getOrElse { + throw new DateTimeException(s"Cannot cast $s to DateType.") + } + } + // Gets the local date-time parts (year, month, day and time) of the instant expressed as the // number of microseconds since the epoch at the given time zone ID. private def getLocalDateTime(micros: Long, zoneId: ZoneId): LocalDateTime = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index e46599dc19a8b..c4dd5c412401b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.util.DateTimeConstants._ +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf @@ -93,12 +94,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Cast(Literal("2015-03-18 123142"), DateType), new Date(c.getTimeInMillis)) checkEvaluation(Cast(Literal("2015-03-18T123123"), DateType), new Date(c.getTimeInMillis)) checkEvaluation(Cast(Literal("2015-03-18T"), DateType), new Date(c.getTimeInMillis)) - - checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null) - checkEvaluation(Cast(Literal("2015/03/18"), DateType), null) - checkEvaluation(Cast(Literal("2015.03.18"), DateType), null) - checkEvaluation(Cast(Literal("20150318"), DateType), null) - checkEvaluation(Cast(Literal("2015-031-8"), DateType), null) } test("cast string to timestamp") { @@ -962,7 +957,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { test("ANSI mode: cast string to timestamp with parse error") { val activeConf = conf - new ParVector(ALL_TIMEZONES.toVector).foreach { zid => + DateTimeTestUtils.outstandingZoneIds.foreach { zid => def checkCastWithParseError(str: String): Unit = { checkExceptionInExpression[DateTimeException]( cast(Literal(str), TimestampType, Option(zid.getId)), @@ -984,6 +979,30 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { } } + test("ANSI mode: cast string to date with parse error") { + val activeConf = conf + DateTimeTestUtils.outstandingZoneIds.foreach { zid => + def checkCastWithParseError(str: String): Unit = { + checkExceptionInExpression[DateTimeException]( + cast(Literal(str), DateType, Option(zid.getId)), + s"Cannot cast $str to DateType.") + } + + SQLConf.withExistingConf(activeConf) { + checkCastWithParseError("12345") + checkCastWithParseError("12345-12-18") + checkCastWithParseError("2015-13-18") + checkCastWithParseError("2015-03-128") + checkCastWithParseError("2015/03/18") + checkCastWithParseError("2015.03.18") + checkCastWithParseError("20150318") + checkCastWithParseError("2015-031-8") + checkCastWithParseError("2015-03-18ABC") + checkCastWithParseError("abdef") + } + } + } + test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") { checkExceptionInExpression[ArithmeticException]( cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow") @@ -1026,6 +1045,14 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(123, DecimalType(2, 0)), null) } + test("cast string to date #2") { + checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null) + checkEvaluation(Cast(Literal("2015/03/18"), DateType), null) + checkEvaluation(Cast(Literal("2015.03.18"), DateType), null) + checkEvaluation(Cast(Literal("20150318"), DateType), null) + checkEvaluation(Cast(Literal("2015-031-8"), DateType), null) + } + test("casting to fixed-precision decimals") { assert(cast(123, DecimalType.USER_DEFAULT).nullable === false) assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable) diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index e35266a85d46b..acfd1f50e14c9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -161,7 +161,7 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy')); --- Timestamp type parse error +-- Datetime types parse error select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); @@ -170,4 +170,5 @@ select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); -select cast("Unparseable" as timestamp) +select cast("Unparseable" as timestamp); +select cast("Unparseable" as date); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 18a751f573bc2..400c8d6c3c84f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 122 +-- Number of queries: 123 -- !query @@ -1060,3 +1060,12 @@ struct<> -- !query output java.time.DateTimeException Cannot cast Unparseable to TimestampType. + + +-- !query +select cast("Unparseable" as date) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast Unparseable to DateType. diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index be75f6fb994dd..7e4ea78bf46b9 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 122 +-- Number of queries: 123 -- !query @@ -1013,3 +1013,11 @@ select cast("Unparseable" as timestamp) struct -- !query output NULL + + +-- !query +select cast("Unparseable" as date) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 1e963ed16fd96..01db4c1c11fe4 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 122 +-- Number of queries: 123 -- !query @@ -1021,3 +1021,11 @@ select cast("Unparseable" as timestamp) struct -- !query output NULL + + +-- !query +select cast("Unparseable" as date) +-- !query schema +struct +-- !query output +NULL From b135db3b1a5c0b2170e98b97f6160bcf55903799 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sun, 13 Dec 2020 17:27:39 -0800 Subject: [PATCH 0749/1009] [SPARK-33757][INFRA][R][FOLLOWUP] Provide more simple solution ### What changes were proposed in this pull request? This PR proposes a better solution for the R build failure on GitHub Actions. The issue is solved in #30737 but I noticed the following two things. * We can use the latest `usethis` if we install additional libraries on the GitHub Actions environment. * For tests on AppVeyor, `usethis` is not necessary, so I partially revert the previous change. ### Why are the changes needed? For more simple solution. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Confirmed on GitHub Actions and AppVeyor on my account. Closes #30753 from sarutak/followup-SPARK-33757. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 5 +---- appveyor.yml | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 30199eaa41999..f133a4132b2a5 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -335,10 +335,7 @@ jobs: r-version: 4.0 - name: Install R linter dependencies and SparkR run: | - sudo apt-get install -y libcurl4-openssl-dev - # dependencies for usethis 1.6.3. - sudo Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')" - sudo Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')" + sudo apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" ./R/install-dev.sh diff --git a/appveyor.yml b/appveyor.yml index b6a42a02d1ac9..c40b23c8341eb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -41,9 +41,6 @@ cache: install: # Install maven and dependencies - ps: .\dev\appveyor-install-dependencies.ps1 - # usethis and its dependencies - - cmd: Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')" - - cmd: Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')" # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat. - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')" - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]" From 4d47ac4b4b20a475c2f416c7d614318b31323041 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 14 Dec 2020 05:14:38 +0000 Subject: [PATCH 0750/1009] [SPARK-33705][SQL][TEST] Fix HiveThriftHttpServerSuite flakiness ### What changes were proposed in this pull request? TO FIX flaky tests: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/132345/testReport/ ``` org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.JDBC query execution org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.Checks Hive version org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.SPARK-24829 Checks cast as float ``` The root cause here is a jar conflict issue. `NewCookie.isHttpOnly` is not defined in the `jsr311-api.jar` which conflicts The transitive artifact `jsr311-api.jar` of `hadoop-client` is excluded at the maven side. See https://issues.apache.org/jira/browse/SPARK-27179. The Jenkins PR builder and Github Action use `SBT` as the compiler tool. First, the exclusion rule from maven is not followed by sbt, so I was able to see `jsr311-api.jar` from maven cache to be added to the classpath directly. **This seems to be a bug of `sbt-pom-reader` plugin but I'm not that sure.** Then I added an `ExcludeRule` for the `hive-thriftserver` module at the SBT side and did see the `jsr311-api.jar` gone, but the CI jobs still failed with the same error. I added a trace log in ThriftHttpServlet ```s ERROR ThriftHttpServlet: !!!!!!!!! Suspect???????? ---> file:/home/jenkins/workspace/SparkPullRequestBuilder/assembly/target/scala-2.12/jars/jsr311-api-1.1.1.jar ``` And the log pointed out that the assembly phase copied it to `assembly/target/scala-2.12/jars/` which will be added to the classpath too. With the help of SBT `dependencyTree` tool, I saw the `jsr311-api` again as a transitive of `jersery-core` from `yarn` module with a `test` scope. So **This seems to be another bug from the SBT side of the `sbt-assembly` plugin.** It copied a test scope transitive artifact to the assembly output. In this PR, I defined some rules in SparkBuild.scala to bypass the potential bugs from the SBT side. First, exclude the `jsr311` from all over the project and then add it back separately to the YARN module for SBT. Additionally, the HiveThriftServerSuites was reflected for reducing flakiness too, but not related to the bugs I have found so far. ### Why are the changes needed? fix test here ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? passing jenkins and ga Closes #30643 from yaooqinn/HiveThriftHttpServerSuite. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- LICENSE-binary | 2 +- core/pom.xml | 6 +- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 1 + pom.xml | 2 +- project/SparkBuild.scala | 27 ++- resource-managers/yarn/pom.xml | 7 - .../HiveThriftServer2Suites.scala | 199 +++++++++++------- .../thriftserver/JdbcConnectionUriSuite.scala | 70 ------ .../SparkMetadataOperationSuite.scala | 2 +- ...arkThriftServerProtocolVersionsSuite.scala | 2 +- .../hive/thriftserver/UISeleniumSuite.scala | 6 +- 12 files changed, 156 insertions(+), 170 deletions(-) delete mode 100644 sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala diff --git a/LICENSE-binary b/LICENSE-binary index d363661b1cc7e..2a5434e14a3f5 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -521,7 +521,6 @@ Common Development and Distribution License (CDDL) 1.1 ------------------------------------------------------ javax.el:javax.el-api https://javaee.github.io/uel-ri/ -javax.servlet:javax.servlet-api https://javaee.github.io/servlet-spec/ javax.servlet.jsp:jsp-api javax.transaction:jta http://www.oracle.com/technetwork/java/index.html javax.xml.bind:jaxb-api https://github.com/javaee/jaxb-v2 @@ -553,6 +552,7 @@ Eclipse Public License (EPL) 2.0 -------------------------------- jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca +jakarta.servlet:jakarta.servlet-api https://projects.eclipse.org/projects/ee4j.servlet jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api org.glassfish.hk2.external:jakarta.inject diff --git a/core/pom.xml b/core/pom.xml index 84ca852d1f30a..1f24c5273ad0b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -161,9 +161,9 @@ compile - javax.servlet - javax.servlet-api - ${javaxservlet.version} + jakarta.servlet + jakarta.servlet-api + ${jakartaservlet.version} org.apache.commons diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index c2caef3ae58d9..ceea496d3f1dc 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -119,6 +119,7 @@ jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar +jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar @@ -126,7 +127,6 @@ janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar -javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.2//jaxb-api-2.2.2.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 87e7a3c2ae1a7..d1b811bd73607 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -118,6 +118,7 @@ jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar +jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar diff --git a/pom.xml b/pom.xml index 09d48a6592ab9..78d1fe7d54350 100644 --- a/pom.xml +++ b/pom.xml @@ -138,7 +138,7 @@ 1.10.1 1.6.6 9.4.28.v20200408 - 3.1.0 + 4.0.3 0.9.5 2.4.0 2.0.8 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 23fb73d228e01..a28c2b55b3789 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -395,6 +395,8 @@ object SparkBuild extends PomBuild { enable(KubernetesIntegrationTests.settings)(kubernetesIntegrationTests) + enable(YARN.settings)(yarn) + /** * Adds the ability to run the spark shell directly from SBT without building an assembly * jar. @@ -654,7 +656,21 @@ object DependencyOverrides { */ object ExcludedDependencies { lazy val settings = Seq( - libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") } + libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") }, + // SPARK-33705: Due to sbt compiler issues, it brings exclusions defined in maven pom back to + // the classpath directly and assemble test scope artifacts to assembly/target/scala-xx/jars, + // which is also will be added to the classpath of some unit tests that will build a subprocess + // to run `spark-submit`, e.g. HiveThriftServer2Test. + // + // These artifacts are for the jersey-1 API but Spark use jersey-2 ones, so it cause test + // flakiness w/ jar conflicts issues. + // + // Also jersey-1 is only used by yarn module(see resource-managers/yarn/pom.xml) for testing + // purpose only. Here we exclude them from the whole project scope and add them w/ yarn only. + excludeDependencies ++= Seq( + ExclusionRule(organization = "com.sun.jersey"), + ExclusionRule("javax.servlet", "javax.servlet-api"), + ExclusionRule("javax.ws.rs", "jsr311-api")) ) } @@ -758,6 +774,15 @@ object Hive { ) } +object YARN { + lazy val settings = Seq( + excludeDependencies --= Seq( + ExclusionRule(organization = "com.sun.jersey"), + ExclusionRule("javax.servlet", "javax.servlet-api"), + ExclusionRule("javax.ws.rs", "jsr311-api")) + ) +} + object Assembly { import sbtassembly.AssemblyUtils._ import sbtassembly.AssemblyPlugin.autoImport._ diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 1d3856742f520..c0ce1c8e151ed 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -88,13 +88,6 @@ hadoop-client - - jakarta.servlet - jakarta.servlet-api - 4.0.3 - test - - com.google.guava diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 5bf7892478082..bd0db743b8d4c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -29,7 +29,7 @@ import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future, Promise} import scala.concurrent.duration._ import scala.io.Source -import scala.util.{Random, Try} +import scala.util.Try import com.google.common.io.Files import org.apache.hadoop.hive.conf.HiveConf.ConfVars @@ -41,6 +41,7 @@ import org.apache.hive.service.rpc.thrift.TCLIService.Client import org.apache.thrift.protocol.TBinaryProtocol import org.apache.thrift.transport.TSocket import org.scalatest.BeforeAndAfterAll +import org.scalatest.concurrent.Eventually._ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging @@ -60,7 +61,7 @@ object TestData { val smallKvWithNull = getTestDataFilePath("small_kv_with_null.txt") } -class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { +class HiveThriftBinaryServerSuite extends HiveThriftServer2Test { override def mode: ServerMode.Value = ServerMode.binary private def withCLIServiceClient(f: ThriftCLIServiceClient => Unit): Unit = { @@ -935,7 +936,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } } -class SingleSessionSuite extends HiveThriftJdbcTest { +class SingleSessionSuite extends HiveThriftServer2TestBase { override def mode: ServerMode.Value = ServerMode.binary override protected def extraConf: Seq[String] = @@ -1046,7 +1047,7 @@ class SingleSessionSuite extends HiveThriftJdbcTest { } } -class HiveThriftCleanUpScratchDirSuite extends HiveThriftJdbcTest{ +class HiveThriftCleanUpScratchDirSuite extends HiveThriftServer2TestBase { var tempScratchDir: File = _ override protected def beforeAll(): Unit = { @@ -1079,7 +1080,7 @@ class HiveThriftCleanUpScratchDirSuite extends HiveThriftJdbcTest{ } } -class HiveThriftHttpServerSuite extends HiveThriftJdbcTest { +class HiveThriftHttpServerSuite extends HiveThriftServer2Test { override def mode: ServerMode.Value = ServerMode.http test("JDBC query execution") { @@ -1122,63 +1123,7 @@ object ServerMode extends Enumeration { val binary, http = Value } -abstract class HiveThriftJdbcTest extends HiveThriftServer2Test { - Utils.classForName(classOf[HiveDriver].getCanonicalName) - - private def jdbcUri = if (mode == ServerMode.http) { - s"""jdbc:hive2://localhost:$serverPort/ - |default? - |hive.server2.transport.mode=http; - |hive.server2.thrift.http.path=cliservice; - |${hiveConfList}#${hiveVarList} - """.stripMargin.split("\n").mkString.trim - } else { - s"jdbc:hive2://localhost:$serverPort/?${hiveConfList}#${hiveVarList}" - } - - def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*): Unit = { - val user = System.getProperty("user.name") - val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") } - val statements = connections.map(_.createStatement()) - - try { - statements.zip(fs).foreach { case (s, f) => f(s) } - } finally { - tableNames.foreach { name => - // TODO: Need a better way to drop the view. - if (name.toUpperCase(Locale.ROOT).startsWith("VIEW")) { - statements(0).execute(s"DROP VIEW IF EXISTS $name") - } else { - statements(0).execute(s"DROP TABLE IF EXISTS $name") - } - } - statements.foreach(_.close()) - connections.foreach(_.close()) - } - } - - def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = { - val user = System.getProperty("user.name") - val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") } - val statements = connections.map(_.createStatement()) - - try { - statements.zip(fs).foreach { case (s, f) => f(s) } - } finally { - dbNames.foreach { name => - statements(0).execute(s"DROP DATABASE IF EXISTS $name") - } - statements.foreach(_.close()) - connections.foreach(_.close()) - } - } - - def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = { - withMultipleConnectionJdbcStatement(tableNames: _*)(f) - } -} - -abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAll with Logging { +abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAfterAll with Logging { def mode: ServerMode.Value private val CLASS_NAME = HiveThriftServer2.getClass.getCanonicalName.stripSuffix("$") @@ -1207,7 +1152,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl protected def extraConf: Seq[String] = Nil - protected def serverStartCommand(port: Int) = { + protected def serverStartCommand(): Seq[String] = { val portConf = if (mode == ServerMode.binary) { ConfVars.HIVE_SERVER2_THRIFT_PORT } else { @@ -1220,7 +1165,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl val tempLog4jConf = Utils.createTempDir().getCanonicalPath Files.write( - """log4j.rootCategory=DEBUG, console + """log4j.rootCategory=INFO, console |log4j.appender.console=org.apache.log4j.ConsoleAppender |log4j.appender.console.target=System.err |log4j.appender.console.layout=org.apache.log4j.PatternLayout @@ -1240,7 +1185,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode | --hiveconf ${ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION}=$operationLogPath | --hiveconf ${ConfVars.LOCALSCRATCHDIR}=$lScratchDir - | --hiveconf $portConf=$port + | --hiveconf $portConf=0 | --driver-class-path $driverClassPath | --driver-java-options -Dlog4j.debug | --conf spark.ui.enabled=false @@ -1262,7 +1207,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl val SERVER_STARTUP_TIMEOUT = 3.minutes - private def startThriftServer(port: Int, attempt: Int) = { + private def startThriftServer(attempt: Int) = { warehousePath = Utils.createTempDir() warehousePath.delete() metastorePath = Utils.createTempDir() @@ -1274,18 +1219,16 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl logPath = null logTailingProcess = null - val command = serverStartCommand(port) + val command = serverStartCommand() diagnosisBuffer ++= s""" |### Attempt $attempt ### |HiveThriftServer2 command line: $command - |Listening port: $port + |Listening port: 0 |System user: $user """.stripMargin.split("\n") - logInfo(s"Trying to start HiveThriftServer2: port=$port, mode=$mode, attempt=$attempt") - logPath = { val lines = Utils.executeAndGetOutput( command = command, @@ -1312,7 +1255,11 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl // Ensures that the following "tail" command won't fail. logPath.createNewFile() - val successLines = Seq(THRIFT_BINARY_SERVICE_LIVE, THRIFT_HTTP_SERVICE_LIVE) + val successLine = if (mode == ServerMode.http) { + THRIFT_HTTP_SERVICE_LIVE + } else { + THRIFT_BINARY_SERVICE_LIVE + } logTailingProcess = { val command = s"/usr/bin/env tail -n +0 -f ${logPath.getCanonicalPath}".split(" ") @@ -1321,14 +1268,15 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl val captureOutput = (line: String) => diagnosisBuffer.synchronized { diagnosisBuffer += line - successLines.foreach { r => - if (line.contains(r)) { - serverStarted.trySuccess(()) - } + if (line.contains(successLine)) { + listeningPort = line.split(" on port ")(1).split(' ').head.toInt + logInfo(s"Started HiveThriftServer2: port=$listeningPort, mode=$mode, attempt=$attempt") + serverStarted.trySuccess(()) + () } } - val process = builder.start() + val process = builder.start() new ProcessOutputCapturer(process.getInputStream, captureOutput).start() new ProcessOutputCapturer(process.getErrorStream, captureOutput).start() @@ -1379,16 +1327,18 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl override protected def beforeAll(): Unit = { super.beforeAll() - // Chooses a random port between 10000 and 19999 - listeningPort = 10000 + Random.nextInt(10000) diagnosisBuffer.clear() // Retries up to 3 times with different port numbers if the server fails to start - (1 to 3).foldLeft(Try(startThriftServer(listeningPort, 0))) { case (started, attempt) => + (1 to 3).foldLeft(Try(startThriftServer(0))) { case (started, attempt) => started.orElse { - listeningPort += 1 stopThriftServer() - Try(startThriftServer(listeningPort, attempt)) + Try { + startThriftServer(attempt) + eventually(timeout(30.seconds), interval(1.seconds)) { + withJdbcStatement() { _.execute("SELECT 1") } + } + } } }.recover { case cause: Throwable => @@ -1407,4 +1357,91 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl super.afterAll() } } + + Utils.classForName(classOf[HiveDriver].getCanonicalName) + + protected def jdbcUri(database: String = "default"): String = if (mode == ServerMode.http) { + s"""jdbc:hive2://localhost:$serverPort/ + |$database? + |hive.server2.transport.mode=http; + |hive.server2.thrift.http.path=cliservice; + |${hiveConfList}#${hiveVarList} + """.stripMargin.split("\n").mkString.trim + } else { + s"jdbc:hive2://localhost:$serverPort/$database?${hiveConfList}#${hiveVarList}" + } + + private def tryCaptureSysLog(f: => Unit): Unit = { + try f catch { + case e: Exception => + // Dump the HiveThriftServer2 log if error occurs, e.g. getConnection failure. + dumpLogs() + throw e + } + } + + def withMultipleConnectionJdbcStatement( + tableNames: String*)(fs: (Statement => Unit)*): Unit = tryCaptureSysLog { + val user = System.getProperty("user.name") + val connections = fs.map { _ => DriverManager.getConnection(jdbcUri(), user, "") } + val statements = connections.map(_.createStatement()) + + try { + statements.zip(fs).foreach { case (s, f) => f(s) } + } finally { + tableNames.foreach { name => + // TODO: Need a better way to drop the view. + if (name.toUpperCase(Locale.ROOT).startsWith("VIEW")) { + statements(0).execute(s"DROP VIEW IF EXISTS $name") + } else { + statements(0).execute(s"DROP TABLE IF EXISTS $name") + } + } + statements.foreach(_.close()) + connections.foreach(_.close()) + } + } + + def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = tryCaptureSysLog { + val user = System.getProperty("user.name") + val connections = fs.map { _ => DriverManager.getConnection(jdbcUri(), user, "") } + val statements = connections.map(_.createStatement()) + + try { + statements.zip(fs).foreach { case (s, f) => f(s) } + } finally { + dbNames.foreach { name => + statements(0).execute(s"DROP DATABASE IF EXISTS $name") + } + statements.foreach(_.close()) + connections.foreach(_.close()) + } + } + + def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = { + withMultipleConnectionJdbcStatement(tableNames: _*)(f) + } +} + +/** + * Common tests for both binary and http mode thrift server + * TODO: SPARK-31914: Move common tests from subclasses to this trait + */ +abstract class HiveThriftServer2Test extends HiveThriftServer2TestBase { + test("SPARK-17819: Support default database in connection URIs") { + withDatabase("spark17819") { statement => + statement.execute(s"CREATE DATABASE IF NOT EXISTS spark17819") + val jdbcStr = jdbcUri("spark17819") + val connection = DriverManager.getConnection(jdbcStr, user, "") + val statementN = connection.createStatement() + try { + val resultSet = statementN.executeQuery("select current_database()") + resultSet.next() + assert(resultSet.getString(1) === "spark17819") + } finally { + statementN.close() + connection.close() + } + } + } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala deleted file mode 100644 index fb8a7e273ae44..0000000000000 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.thriftserver - -import java.sql.DriverManager - -import org.apache.hive.jdbc.HiveDriver - -import org.apache.spark.util.Utils - -class JdbcConnectionUriSuite extends HiveThriftServer2Test { - Utils.classForName(classOf[HiveDriver].getCanonicalName) - - override def mode: ServerMode.Value = ServerMode.binary - - val JDBC_TEST_DATABASE = "jdbc_test_database" - val USER = System.getProperty("user.name") - val PASSWORD = "" - - override protected def beforeAll(): Unit = { - super.beforeAll() - - val jdbcUri = s"jdbc:hive2://localhost:$serverPort/" - val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD) - val statement = connection.createStatement() - statement.execute(s"CREATE DATABASE $JDBC_TEST_DATABASE") - connection.close() - } - - override protected def afterAll(): Unit = { - try { - val jdbcUri = s"jdbc:hive2://localhost:$serverPort/" - val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD) - val statement = connection.createStatement() - statement.execute(s"DROP DATABASE $JDBC_TEST_DATABASE") - connection.close() - } finally { - super.afterAll() - } - } - - test("SPARK-17819 Support default database in connection URIs") { - val jdbcUri = s"jdbc:hive2://localhost:$serverPort/$JDBC_TEST_DATABASE" - val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD) - val statement = connection.createStatement() - try { - val resultSet = statement.executeQuery("select current_database()") - resultSet.next() - assert(resultSet.getString(1) === JDBC_TEST_DATABASE) - } finally { - statement.close() - connection.close() - } - } -} diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index b413b46adcaa1..bb7448293f559 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.types._ import org.apache.spark.util.VersionUtils -class SparkMetadataOperationSuite extends HiveThriftJdbcTest { +class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { override def mode: ServerMode.Value = ServerMode.binary diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala index 52cf429441d16..fd4d7231e8989 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala @@ -31,7 +31,7 @@ import org.apache.thrift.transport.TSocket import org.apache.spark.sql.catalyst.util.NumberConverter import org.apache.spark.unsafe.types.UTF8String -class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest { +class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase { override def mode: ServerMode.Value = ServerMode.binary diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala index d0b829c240327..2d0edb8eb8d48 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala @@ -32,7 +32,7 @@ import org.scalatestplus.selenium.WebBrowser import org.apache.spark.ui.SparkUICssErrorHandler class UISeleniumSuite - extends HiveThriftJdbcTest + extends HiveThriftServer2TestBase with WebBrowser with Matchers with BeforeAndAfterAll { implicit var webDriver: WebDriver = _ @@ -57,7 +57,7 @@ class UISeleniumSuite } } - override protected def serverStartCommand(port: Int) = { + override protected def serverStartCommand(): Seq[String] = { val portConf = if (mode == ServerMode.binary) { ConfVars.HIVE_SERVER2_THRIFT_PORT } else { @@ -71,7 +71,7 @@ class UISeleniumSuite | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode - | --hiveconf $portConf=$port + | --hiveconf $portConf=0 | --driver-class-path ${sys.props("java.class.path")} | --conf spark.ui.enabled=true | --conf spark.ui.port=$uiPort From 9160d59ae379910ca3bbd04ee25d336afff28abd Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 14 Dec 2020 15:56:46 +0900 Subject: [PATCH 0751/1009] [SPARK-33770][SQL][TESTS] Fix the `ALTER TABLE .. DROP PARTITION` tests that delete files out of partition path ### What changes were proposed in this pull request? Modify the tests that add partitions with `LOCATION`, and where the number of nested folders in `LOCATION` doesn't match to the number of partitioned columns. In that case, `ALTER TABLE .. DROP PARTITION` tries to access (delete) folder out of the "base" path in `LOCATION`. The problem belongs to Hive's MetaStore method `drop_partition_common`: https://github.com/apache/hive/blob/8696c82d07d303b6dbb69b4d443ab6f2b241b251/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L4876 which tries to delete empty partition sub-folders recursively starting from the most deeper partition sub-folder up to the base folder. In the case when the number of sub-folder is not equal to the number of partitioned columns `part_vals.size()`, the method will try to list and delete folders out of the base path. ### Why are the changes needed? To fix test failures like https://github.com/apache/spark/pull/30643#issuecomment-743774733: ``` org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite.ALTER TABLE .. ADD PARTITION Hive V1: SPARK-33521: universal type conversions of partition values sbt.ForkMain$ForkError: org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: File file:/home/jenkins/workspace/SparkPullRequestBuilder/target/tmp/spark-832cb19c-65fd-41f3-ae0b-937d76c07897 does not exist; at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:112) at org.apache.spark.sql.hive.HiveExternalCatalog.dropPartitions(HiveExternalCatalog.scala:1014) ... Caused by: sbt.ForkMain$ForkError: org.apache.hadoop.hive.metastore.api.MetaException: File file:/home/jenkins/workspace/SparkPullRequestBuilder/target/tmp/spark-832cb19c-65fd-41f3-ae0b-937d76c07897 does not exist at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.drop_partition_with_environment_context(HiveMetaStore.java:3381) at sun.reflect.GeneratedMethodAccessor304.invoke(Unknown Source) ``` The issue can be reproduced by the following steps: 1. Create a base folder, for example: `/Users/maximgekk/tmp/part-location` 2. Create a sub-folder in the base folder and drop permissions for it: ``` $ mkdir /Users/maximgekk/tmp/part-location/aaa $ chmod a-rwx chmod a-rwx /Users/maximgekk/tmp/part-location/aaa $ ls -al /Users/maximgekk/tmp/part-location total 0 drwxr-xr-x 3 maximgekk staff 96 Dec 13 18:42 . drwxr-xr-x 33 maximgekk staff 1056 Dec 13 18:32 .. d--------- 2 maximgekk staff 64 Dec 13 18:42 aaa ``` 3. Create a table with a partition folder in the base folder: ```sql spark-sql> create table tbl (id int) partitioned by (part0 int, part1 int); spark-sql> alter table tbl add partition (part0=1,part1=2) location '/Users/maximgekk/tmp/part-location/tbl'; ``` 4. Try to drop this partition: ``` spark-sql> alter table tbl drop partition (part0=1,part1=2); 20/12/13 18:46:07 ERROR HiveClientImpl: ====================== Attempt to drop the partition specs in table 'tbl' database 'default': Map(part0 -> 1, part1 -> 2) In this attempt, the following partitions have been dropped successfully: The remaining partitions have not been dropped: [1, 2] ====================== Error in query: org.apache.hadoop.hive.ql.metadata.HiveException: Error accessing file:/Users/maximgekk/tmp/part-location/aaa; org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: Error accessing file:/Users/maximgekk/tmp/part-location/aaa; ``` The command fails because it tries to access to the sub-folder `aaa` that is out of the partition path `/Users/maximgekk/tmp/part-location/tbl`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the affected tests from local IDEA which does not have access to folders out of partition paths. Closes #30752 from MaxGekk/fix-drop-partition-location. Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Signed-off-by: HyukjinKwon --- .../sql/catalyst/catalog/ExternalCatalogSuite.scala | 9 +++++++-- .../command/AlterTableAddPartitionSuiteBase.scala | 2 +- .../org/apache/spark/sql/hive/StatisticsSuite.scala | 12 ++++++++---- .../spark/sql/hive/execution/HiveDDLSuite.scala | 4 ++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index 55712d0da518d..d310538e302de 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala @@ -408,8 +408,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac partitionColumnNames = Seq("partCol1", "partCol2")) catalog.createTable(table, ignoreIfExists = false) - val newLocationPart1 = newUriForDatabase() - val newLocationPart2 = newUriForDatabase() + val newLocationPart1 = newUriForPartition(Seq("p1=1", "p2=2")) + val newLocationPart2 = newUriForPartition(Seq("p1=3", "p2=4")) val partition1 = CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "2"), @@ -991,6 +991,11 @@ abstract class CatalogTestUtils { def newUriForDatabase(): URI = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/")) + def newUriForPartition(parts: Seq[String]): URI = { + val path = parts.foldLeft(Utils.createTempDir())(new java.io.File(_, _)) + new URI(path.toURI.toString.stripSuffix("/")) + } + def newDb(name: String): CatalogDatabase = { CatalogDatabase(name, name + " description", newUriForDatabase(), Map.empty) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala index 9d2c58b7e4351..2457bb9f8b57c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala @@ -154,7 +154,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { | part8 = '2020-11-23', | part9 = '2020-11-23 22:13:10.123456' |""".stripMargin - sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'") + sql(s"ALTER TABLE $t ADD PARTITION ($partSpec)") val expected = Map( "part0" -> "-1", "part1" -> "0", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 2e98a76c52488..5357f4b63d794 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -983,12 +983,16 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto assert(fetched1.get.colStats.size == 2) withTempPaths(numPaths = 2) { case Seq(dir1, dir2) => - val file1 = new File(dir1 + "/data") + val partDir1 = new File(new File(dir1, "ds=2008-04-09"), "hr=11") + val file1 = new File(partDir1, "data") + file1.getParentFile.mkdirs() Utils.tryWithResource(new PrintWriter(file1)) { writer => writer.write("1,a") } - val file2 = new File(dir2 + "/data") + val partDir2 = new File(new File(dir2, "ds=2008-04-09"), "hr=12") + val file2 = new File(partDir2, "data") + file2.getParentFile.mkdirs() Utils.tryWithResource(new PrintWriter(file2)) { writer => writer.write("1,a") } @@ -997,8 +1001,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql( s""" |ALTER TABLE $table ADD - |PARTITION (ds='2008-04-09', hr='11') LOCATION '${dir1.toURI.toString}' - |PARTITION (ds='2008-04-09', hr='12') LOCATION '${dir2.toURI.toString}' + |PARTITION (ds='2008-04-09', hr='11') LOCATION '${partDir1.toURI.toString}' + |PARTITION (ds='2008-04-09', hr='12') LOCATION '${partDir1.toURI.toString}' """.stripMargin) if (autoUpdate) { val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 9f75f8797fe37..a6c40851b1c4e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -601,8 +601,8 @@ class HiveDDLSuite val tab = "tab_with_partitions" withTempDir { tmpDir => val basePath = new File(tmpDir.getCanonicalPath) - val part1Path = new File(basePath + "/part1") - val part2Path = new File(basePath + "/part2") + val part1Path = new File(new File(basePath, "part10"), "part11") + val part2Path = new File(new File(basePath, "part20"), "part21") val dirSet = part1Path :: part2Path :: Nil // Before data insertion, all the directory are empty From 817f58ddcb775dacbe1b4b2b99056a74a56f65e9 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 14 Dec 2020 08:16:33 +0000 Subject: [PATCH 0752/1009] [SPARK-33768][SQL] Remove `retainData` from `AlterTableDropPartition` ### What changes were proposed in this pull request? Remove the `retainData` parameter from the logical node `AlterTableDropPartition`. ### Why are the changes needed? The `AlterTableDropPartition` command reflects the sql statement (see SqlBase.g4): ``` | ALTER (TABLE | VIEW) multipartIdentifier DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions ``` but Spark doesn't allow to specify data retention. So, the parameter can be removed to improve code maintenance. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the test suite `DDLParserSuite`. Closes #30748 from MaxGekk/remove-retainData. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../spark/sql/catalyst/analysis/ResolvePartitionSpec.scala | 2 +- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +-- .../spark/sql/catalyst/plans/logical/v2Commands.scala | 3 +-- .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala | 6 ++---- .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 4 ++-- .../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 2 +- 7 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 5d4dc21810281..c8e137e9c18ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -586,7 +586,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) => checkAlterTablePartition(table, parts) - case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) => + case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) => checkAlterTablePartition(table, parts) case showPartitions: ShowPartitions => checkShowPartitions(showPartitions) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 099ac6172c9e6..35e4820cd710b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -43,7 +43,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _) => val partitionSchema = table.partitionSchema() r.copy(parts = resolvePartitionSpecs( table.name, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a6df7690c7e47..a7bb2179767c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3791,8 +3791,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg "ALTER TABLE ... DROP PARTITION ..."), partSpecs.toSeq, ifExists = ctx.EXISTS != null, - purge = ctx.PURGE != null, - retainData = false) + purge = ctx.PURGE != null) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 1a37630a48461..9446fe383dd9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -666,8 +666,7 @@ case class AlterTableDropPartition( child: LogicalPlan, parts: Seq[PartitionSpec], ifExists: Boolean, - purge: Boolean, - retainData: Boolean) extends Command { + purge: Boolean) extends Command { override lazy val resolved: Boolean = childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec]) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index b860571df0791..481d7504dda3d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2143,8 +2143,7 @@ class DDLParserSuite extends AnalysisTest { UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), ifExists = true, - purge = false, - retainData = false) + purge = false) val expected2_table = expected1_table.copy(ifExists = false) val expected1_purge = expected1_table.copy(purge = true) @@ -2157,8 +2156,7 @@ class DDLParserSuite extends AnalysisTest { UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."), Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))), ifExists = true, - purge = false, - retainData = false) + purge = false) val parsed3_table = parsePlan(sql3_table) comparePlans(parsed3_table, expected3_table) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 6e06cb3a1f928..2449f73112bf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -469,13 +469,13 @@ class ResolveSessionCatalog( to) case AlterTableDropPartition( - ResolvedV1TableIdentifier(ident), specs, ifExists, purge, retainData) => + ResolvedV1TableIdentifier(ident), specs, ifExists, purge) => AlterTableDropPartitionCommand( ident.asTableIdentifier, specs.asUnresolvedPartitionSpecs.map(_.spec), ifExists, purge, - retainData) + retainData = false) case AlterTableSerDePropertiesStatement(tbl, serdeClassName, serdeProperties, partitionSpec) => val v1TableName = parseV1Table(tbl, "ALTER TABLE SerDe Properties") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 7d278c33b97fc..ea6ac6ca92aa0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -316,7 +316,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil case AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _) => AlterTableDropPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil From e7fe92f12991ce4ccc101c2cc01354201c9c5384 Mon Sep 17 00:00:00 2001 From: "xuewei.linxuewei" Date: Mon, 14 Dec 2020 08:27:18 +0000 Subject: [PATCH 0753/1009] [SPARK-33546][SQL] Enable row format file format validation in CREATE TABLE LIKE ### What changes were proposed in this pull request? [SPARK-33546] stated the there are three inconsistency behaviors for CREATE TABLE LIKE. 1. CREATE TABLE LIKE does not validate the user-specified hive serde. e.g., STORED AS PARQUET can't be used with ROW FORMAT SERDE. 2. CREATE TABLE LIKE requires STORED AS and ROW FORMAT SERDE to be specified together, which is not necessary. 3. CREATE TABLE LIKE does not respect the default hive serde. This PR fix No.1, and after investigate, No.2 and No.3 turn out not to be issue. Within Hive. CREATE TABLE abc ... ROW FORMAT SERDE 'xxx.xxx.SerdeClass' (Without Stored as) will have following result. Using the user specific SerdeClass and fetch default input/output format from default textfile format. ``` SerDe Library: xxx.xxx.SerdeClass InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat ``` But for CREATE TABLE dst LIKE src ROW FORMAT SERDE 'xxx.xxx.SerdeClass' (Without Stored as) will just ignore user specific SerdeClass and using (input, output, serdeClass) from src table. It's better to just throw an exception on such ambiguous behavior, so No.2 is not an issue, but in the PR, we add some comments. For No.3, in fact, CreateTableLikeCommand is using following logical to try to follow src table's storageFormat if current fileFormat.inputFormat is empty ``` val newStorage = if (fileFormat.inputFormat.isDefined) { fileFormat } else { sourceTableDesc.storage.copy(locationUri = fileFormat.locationUri) } ``` If we try to fill the new target table with HiveSerDe.getDefaultStorage if file format and row format is not explicity spefified, it will break the CREATE TABLE LIKE semantic. ### Why are the changes needed? Bug Fix. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added UT and Existing UT. Closes #30705 from leanken/leanken-SPARK-33546. Authored-by: xuewei.linxuewei Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 5 +- .../spark/sql/execution/SparkSqlParser.scala | 9 +- .../sql/hive/execution/HiveDDLSuite.scala | 130 ++++++++++++++---- 3 files changed, 108 insertions(+), 36 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a7bb2179767c8..660d617a07b44 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2956,9 +2956,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg protected def getSerdeInfo( rowFormatCtx: Seq[RowFormatContext], createFileFormatCtx: Seq[CreateFileFormatContext], - ctx: ParserRuleContext, - skipCheck: Boolean = false): Option[SerdeInfo] = { - if (!skipCheck) validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx) + ctx: ParserRuleContext): Option[SerdeInfo] = { + validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx) val rowFormatSerdeInfo = rowFormatCtx.map(visitRowFormat) val fileFormatSerdeInfo = createFileFormatCtx.map(visitCreateFileFormat) (fileFormatSerdeInfo ++ rowFormatSerdeInfo).reduceLeftOption((l, r) => l.merge(r)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index ba5874c21f6c4..3ca3461dfbd47 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -447,14 +447,16 @@ class SparkSqlAstBuilder extends AstBuilder { checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx) val provider = ctx.tableProvider.asScala.headOption.map(_.multipartIdentifier.getText) val location = visitLocationSpecList(ctx.locationSpec()) - // TODO: Do not skip serde check for CREATE TABLE LIKE. val serdeInfo = getSerdeInfo( - ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx, skipCheck = true) + ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx) if (provider.isDefined && serdeInfo.isDefined) { operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx) } - // TODO: remove this restriction as it seems unnecessary. + // For "CREATE TABLE dst LIKE src ROW FORMAT SERDE xxx" which doesn't specify the file format, + // it's a bit weird to use the default file format, but it's also weird to get file format + // from the source table while the serde class is user-specified. + // Here we require both serde and format to be specified, to avoid confusion. serdeInfo match { case Some(SerdeInfo(storedAs, formatClasses, serde, _)) => if (storedAs.isEmpty && formatClasses.isEmpty && serde.isDefined) { @@ -463,7 +465,6 @@ class SparkSqlAstBuilder extends AstBuilder { case _ => } - // TODO: also look at `HiveSerDe.getDefaultStorage`. val storage = toStorageFormat(location, serdeInfo, ctx) val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) CreateTableLikeCommand( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a6c40851b1c4e..b686d040b9644 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution import java.io.File import java.net.URI +import java.util.Locale import org.apache.hadoop.fs.Path import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER @@ -2771,7 +2772,7 @@ class HiveDDLSuite test("Create Table LIKE with row format") { val catalog = spark.sessionState.catalog - withTable("sourceHiveTable", "sourceDsTable", "targetHiveTable1", "targetHiveTable2") { + withTable("sourceHiveTable", "sourceDsTable") { sql("CREATE TABLE sourceHiveTable(a INT, b INT) STORED AS PARQUET") sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET") @@ -2817,34 +2818,6 @@ class HiveDDLSuite """.stripMargin) }.getMessage assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS")) - - // row format works with STORED AS hive format (from hive table) - spark.sql( - """ - |CREATE TABLE targetHiveTable1 LIKE sourceHiveTable STORED AS PARQUET - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - |WITH SERDEPROPERTIES ('test' = 'test') - """.stripMargin) - var table = catalog.getTableMetadata(TableIdentifier("targetHiveTable1")) - assert(table.provider === Some("hive")) - assert(table.storage.inputFormat === - Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")) - assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(table.storage.properties("test") == "test") - - // row format works with STORED AS hive format (from datasource table) - spark.sql( - """ - |CREATE TABLE targetHiveTable2 LIKE sourceDsTable STORED AS PARQUET - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - |WITH SERDEPROPERTIES ('test' = 'test') - """.stripMargin) - table = catalog.getTableMetadata(TableIdentifier("targetHiveTable2")) - assert(table.provider === Some("hive")) - assert(table.storage.inputFormat === - Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")) - assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - assert(table.storage.properties("test") == "test") } } @@ -2872,4 +2845,103 @@ class HiveDDLSuite assert(sql("SELECT * FROM t2 WHERE c = 'A'").collect().isEmpty) } } + + test("SPARK-33546: CREATE TABLE LIKE should validate row format & file format") { + val catalog = spark.sessionState.catalog + withTable("sourceHiveTable", "sourceDsTable") { + sql("CREATE TABLE sourceHiveTable(a INT, b INT) STORED AS PARQUET") + sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET") + + // ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE] + val allowSerdeFileFormats = Seq("TEXTFILE", "SEQUENCEFILE", "RCFILE") + Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable => + allowSerdeFileFormats.foreach { format => + withTable("targetTable") { + spark.sql( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |STORED AS $format + """.stripMargin) + + val expectedSerde = HiveSerDe.sourceToSerDe(format) + val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default"))) + assert(table.provider === Some("hive")) + assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get)) + assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get)) + assert(table.storage.serde === + Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + } + } + + // negative case + hiveFormats.filterNot(allowSerdeFileFormats.contains(_)).foreach { format => + withTable("targetTable") { + val ex = intercept[AnalysisException] { + spark.sql( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |STORED AS $format + """.stripMargin) + }.getMessage + assert(ex.contains( + s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'")) + } + } + } + + // ROW FORMAT DELIMITED ... STORED AS TEXTFILE + Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable => + withTable("targetTable") { + spark.sql( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT DELIMITED + |STORED AS TEXTFILE + """.stripMargin) + + val expectedSerde = HiveSerDe.sourceToSerDe("TEXTFILE") + val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default"))) + assert(table.provider === Some("hive")) + assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get)) + assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get)) + assert(table.storage.serde === Some(expectedSerde.get.serde.get)) + + // negative case + val ex = intercept[AnalysisException] { + spark.sql( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT DELIMITED + |STORED AS PARQUET + """.stripMargin) + }.getMessage + assert(ex.contains("ROW FORMAT DELIMITED is only compatible with 'textfile'")) + } + } + + // ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ... + hiveFormats.foreach { tableType => + val expectedSerde = HiveSerDe.sourceToSerDe(tableType) + Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable => + withTable("targetTable") { + spark.sql( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT SERDE '${expectedSerde.get.serde.get}' + |STORED AS INPUTFORMAT '${expectedSerde.get.inputFormat.get}' + |OUTPUTFORMAT '${expectedSerde.get.outputFormat.get}' + """.stripMargin) + + val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default"))) + assert(table.provider === Some("hive")) + assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get)) + assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get)) + assert(table.storage.serde === Some(expectedSerde.get.serde.get)) + } + } + } + } + } } From b7c82101352078fb10ab1822bc745c8b4fbb2590 Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Mon, 14 Dec 2020 08:31:50 +0000 Subject: [PATCH 0754/1009] [SPARK-33142][SPARK-33647][SQL][FOLLOW-UP] Add docs and test cases ### What changes were proposed in this pull request? Addressed comments in PR #30567, including: 1. add test case for SPARK-33647 and SPARK-33142 2. add migration guide 3. add `getRawTempView` and `getRawGlobalTempView` to return the raw view info (i.e. TemporaryViewRelation) 4. other minor code clean ### Why are the changes needed? Code clean and more test cases ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing and newly added test cases Closes #30666 from linhongliu-db/SPARK-33142-followup. Lead-authored-by: Linhong Liu Co-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 4 +- .../sql/catalyst/catalog/SessionCatalog.scala | 44 +++++++++++++++---- .../plans/logical/basicLogicalOperators.scala | 16 ------- .../spark/sql/execution/command/views.scala | 16 ++----- .../apache/spark/sql/CachedTableSuite.scala | 13 ++++++ .../spark/sql/execution/SQLViewSuite.scala | 14 ------ .../sql/execution/SQLViewTestSuite.scala | 24 +++++++++- 7 files changed, 79 insertions(+), 52 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 484823b7c07ab..4b6c2266387f5 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -58,7 +58,9 @@ license: | - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached. - - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`. + - In Spark 3.1, creating or altering a permanent view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`. + + - In Spark 3.1, the temporary view will have same behaviors with the permanent view, i.e. capture and store runtime SQL configs, SQL text, catalog and namespace. The capatured view properties will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`. - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 7c805bdb4b6f1..9814f4b3aa75b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -610,8 +610,16 @@ class SessionCatalog( /** * Return a local temporary view exactly as it was stored. */ + def getRawTempView(name: String): Option[LogicalPlan] = synchronized { + tempViews.get(formatTableName(name)) + } + + /** + * Generate a [[View]] operator from the view description if the view stores sql text, + * otherwise, it is same to `getRawTempView` + */ def getTempView(name: String): Option[LogicalPlan] = synchronized { - tempViews.get(formatTableName(name)).map(getTempViewPlan) + getRawTempView(name).map(getTempViewPlan) } def getTempViewNames(): Seq[String] = synchronized { @@ -621,8 +629,16 @@ class SessionCatalog( /** * Return a global temporary view exactly as it was stored. */ + def getRawGlobalTempView(name: String): Option[LogicalPlan] = { + globalTempViewManager.get(formatTableName(name)) + } + + /** + * Generate a [[View]] operator from the view description if the view stores sql text, + * otherwise, it is same to `getRawGlobalTempView` + */ def getGlobalTempView(name: String): Option[LogicalPlan] = { - globalTempViewManager.get(formatTableName(name)).map(getTempViewPlan) + getRawGlobalTempView(name).map(getTempViewPlan) } /** @@ -659,7 +675,7 @@ class SessionCatalog( def getTempViewOrPermanentTableMetadata(name: TableIdentifier): CatalogTable = synchronized { val table = formatTableName(name.table) if (name.database.isEmpty) { - getTempView(table).map { + tempViews.get(table).map { case TemporaryViewRelation(metadata) => metadata case plan => CatalogTable( @@ -669,7 +685,6 @@ class SessionCatalog( schema = plan.output.toStructType) }.getOrElse(getTableMetadata(name)) } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) { - val a = globalTempViewManager.get(table) globalTempViewManager.get(table).map { case TemporaryViewRelation(metadata) => metadata case plan => @@ -810,21 +825,34 @@ class SessionCatalog( // The relation is a view, so we wrap the relation by: // 1. Add a [[View]] operator over the relation to keep track of the view desc; // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view. - val child = View.fromCatalogTable(metadata, isTempView = false, parser) - SubqueryAlias(multiParts, child) + SubqueryAlias(multiParts, fromCatalogTable(metadata, isTempView = false)) } else { SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options)) } } - def getTempViewPlan(plan: LogicalPlan): LogicalPlan = { + private def getTempViewPlan(plan: LogicalPlan): LogicalPlan = { plan match { case viewInfo: TemporaryViewRelation => - View.fromCatalogTable(viewInfo.tableMeta, isTempView = true, parser) + fromCatalogTable(viewInfo.tableMeta, isTempView = true) case v => v } } + private def fromCatalogTable(metadata: CatalogTable, isTempView: Boolean): View = { + val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) + val viewConfigs = metadata.viewSQLConfigs + val viewPlan = + SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs, isTempView = isTempView)) { + parser.parsePlan(viewText) + } + View( + desc = metadata, + isTempView = isTempView, + output = metadata.schema.toAttributes, + child = viewPlan) + } + def lookupTempView(table: String): Option[SubqueryAlias] = { val formattedTable = formatTableName(table) getTempView(formattedTable).map { view => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 0e4bfa4dc34da..91fb77574a0ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateView, MultiInstanceRelat import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning} import org.apache.spark.sql.catalyst.util.truncatedString @@ -485,21 +484,6 @@ object View { } sqlConf } - - def fromCatalogTable( - metadata: CatalogTable, isTempView: Boolean, parser: ParserInterface): View = { - val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) - val viewConfigs = metadata.viewSQLConfigs - val viewPlan = - SQLConf.withExistingConf(effectiveSQLConf(viewConfigs, isTempView = isTempView)) { - parser.parsePlan(viewText) - } - View( - desc = metadata, - isTempView = isTempView, - output = metadata.schema.toAttributes, - child = viewPlan) - } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 06b1e03adea50..6f32f9d2bfcbe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -113,12 +113,8 @@ case class CreateViewCommand( verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child) if (viewType == LocalTempView) { - val shouldUncache = replace && catalog.getTempView(name.table).exists { - // Uncache View logical plan without checking the same result check, since it's unresolved. - case _: View => true - case other => !other.sameResult(child) - } - if (shouldUncache) { + if (replace && catalog.getRawTempView(name.table).isDefined && + !catalog.getRawTempView(name.table).get.sameResult(child)) { logInfo(s"Try to uncache ${name.quotedString} before replacing.") checkCyclicViewReference(analyzedPlan, Seq(name), name) CommandUtils.uncacheTableOrView(sparkSession, name.quotedString) @@ -141,12 +137,8 @@ case class CreateViewCommand( } else if (viewType == GlobalTempView) { val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE) val viewIdent = TableIdentifier(name.table, Option(db)) - val shouldUncache = replace && catalog.getGlobalTempView(name.table).exists { - // Uncache View logical plan without checking the same result check, since it's unresolved. - case _: View => true - case other => !other.sameResult(child) - } - if (shouldUncache) { + if (replace && catalog.getRawGlobalTempView(name.table).isDefined && + !catalog.getRawGlobalTempView(name.table).get.sameResult(child)) { logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.") checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index a3a6d6721c993..af8d72309bdea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -1272,4 +1272,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } } + + test("SPARK-33647: cache table support for permanent view") { + withView("v1") { + spark.catalog.clearCache() + sql("create or replace view v1 as select 1") + sql("cache table v1") + assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1")).isDefined) + sql("create or replace view v1 as select 1, 2") + assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1")).isEmpty) + sql("cache table v1") + assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1, 2")).isDefined) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 7595ae0ec7a53..50db986490033 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -812,20 +812,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } - test("creating local temp view should not affect existing table reference") { - withTable("t") { - withTempView("t") { - withGlobalTempView("v") { - val globalTempDB = spark.sharedState.globalTempViewManager.database - Seq(2).toDF("c1").write.format("parquet").saveAsTable("t") - sql("CREATE GLOBAL TEMPORARY VIEW v AS SELECT * FROM t") - sql("CREATE TEMPORARY VIEW t AS SELECT 1") - checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v"), Seq(Row(2))) - } - } - } - } - test("SPARK-33141: view should be parsed and analyzed with configs set when creating") { withTable("t") { withView("v1", "v2", "v3", "v4", "v5") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index 3a7a63ed45ce3..8c3d92358a975 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -200,6 +200,29 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { } } + test("view should use captured catalog and namespace to resolve relation") { + withTempDatabase { dbName => + withTable("default.t", s"$dbName.t") { + withTempView("t") { + // create a table in default database + sql("USE DEFAULT") + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + // create a view refer the created table in default database + val viewName = createView("v1", "SELECT * FROM t") + // using another database to create a table with same name + sql(s"USE $dbName") + Seq(4, 5, 6).toDF("c1").write.format("parquet").saveAsTable("t") + // create a temporary view with the same name + sql("CREATE TEMPORARY VIEW t AS SELECT 1") + withView(viewName) { + // view v1 should still refer the table defined in `default` database + checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1))) + } + } + } + } + } + test("SPARK-33692: view should use captured catalog and namespace to lookup function") { val avgFuncClass = "test.org.apache.spark.sql.MyDoubleAvg" val sumFuncClass = "test.org.apache.spark.sql.MyDoubleSum" @@ -231,7 +254,6 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils { class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { override protected def viewTypeString: String = "TEMPORARY VIEW" override protected def formattedViewName(viewName: String): String = viewName - } class GlobalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession { From a84c8d842ca027ab0f1b641146e81fc2782d150d Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 14 Dec 2020 08:39:01 +0000 Subject: [PATCH 0755/1009] [SPARK-33751][SQL] Migrate ALTER VIEW ... AS command to use UnresolvedView to resolve the identifier ### What changes were proposed in this pull request? This PR migrates `ALTER VIEW ... AS` to use `UnresolvedView` to resolve the view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). The `TempViewOrV1Table` extractor in `ResolveSessionCatalog.scala` can now be removed as well. ### Why are the changes needed? To use `UnresolvedView` for view resolution. ### Does this PR introduce _any_ user-facing change? The exception message changes if a table is found instead of view: ``` // OLD `tab1` is not a view" ``` ``` // NEW "tab1 is a table. 'ALTER VIEW ... AS' expects a view." ``` ### How was this patch tested? Updated existing tests. Closes #30723 from imback82/alter_view_as_statement. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 8 +++++--- .../catalyst/plans/logical/statements.scala | 8 -------- .../catalyst/plans/logical/v2Commands.scala | 10 ++++++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 6 ++++-- .../analysis/ResolveSessionCatalog.scala | 19 ++----------------- .../spark/sql/execution/command/views.scala | 3 --- .../sql/connector/DataSourceV2SQLSuite.scala | 13 +++++-------- .../spark/sql/execution/SQLViewSuite.scala | 11 ++++++++--- 8 files changed, 34 insertions(+), 44 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 660d617a07b44..1bebf025cc795 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3869,7 +3869,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Alter the query of a view. This creates a [[AlterViewAsStatement]] + * Alter the query of a view. This creates a [[AlterViewAs]] * * For example: * {{{ @@ -3877,8 +3877,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) { - AlterViewAsStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), + AlterViewAs( + UnresolvedView( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER VIEW ... AS"), originalText = source(ctx.query), query = plan(ctx.query)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index d628bc914dba7..a0e11962f9c05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -315,14 +315,6 @@ case class AlterTableSerDePropertiesStatement( serdeProperties: Option[Map[String, String]], partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * ALTER VIEW ... Query command, as parsed from SQL. - */ -case class AlterViewAsStatement( - viewName: Seq[String], - originalText: String, - query: LogicalPlan) extends ParsedStatement - /** * An INSERT INTO statement, as parsed from SQL. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 9446fe383dd9d..0f35674055dc4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -741,6 +741,16 @@ case class RepairTable(child: LogicalPlan) extends Command { override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the ALTER VIEW ... AS command. + */ +case class AlterViewAs( + child: LogicalPlan, + originalText: String, + query: LogicalPlan) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the ALTER VIEW ... SET TBLPROPERTIES command. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 481d7504dda3d..e8bbc6b22a819 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2254,8 +2254,10 @@ class DDLParserSuite extends AnalysisTest { test("alter view: AS Query") { val parsed = parsePlan("ALTER VIEW a.b.c AS SELECT 1") - val expected = AlterViewAsStatement( - Seq("a", "b", "c"), "SELECT 1", parsePlan("SELECT 1")) + val expected = AlterViewAs( + UnresolvedView(Seq("a", "b", "c"), "ALTER VIEW ... AS", true, None), + "SELECT 1", + parsePlan("SELECT 1")) comparePlans(parsed, expected) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 2449f73112bf4..83dda7db09ac2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -485,10 +485,9 @@ class ResolveSessionCatalog( serdeProperties, partitionSpec) - case AlterViewAsStatement(name, originalText, query) => - val viewName = parseTempViewOrV1Table(name, "ALTER VIEW QUERY") + case AlterViewAs(ResolvedView(ident, _), originalText, query) => AlterViewAsCommand( - viewName.asTableIdentifier, + ident.asTableIdentifier, originalText, query) @@ -582,12 +581,6 @@ class ResolveSessionCatalog( case _ => throw new AnalysisException(s"$sql is only supported with v1 tables.") } - private def parseTempViewOrV1Table( - nameParts: Seq[String], sql: String): Seq[String] = nameParts match { - case TempViewOrV1Table(name) => name - case _ => throw new AnalysisException(s"$sql is only supported with temp views or v1 tables.") - } - private def getStorageFormatAndProvider( provider: Option[String], options: Map[String, String], @@ -688,14 +681,6 @@ class ResolveSessionCatalog( } } - object TempViewOrV1Table { - def unapply(nameParts: Seq[String]): Option[Seq[String]] = nameParts match { - case _ if isTempView(nameParts) => Some(nameParts) - case SessionCatalogAndIdentifier(_, tbl) => Some(tbl.asMultipartIdentifier) - case _ => None - } - } - object SessionCatalogAndNamespace { def unapply(resolved: ResolvedNamespace): Option[(CatalogPlugin, Seq[String])] = if (isSessionCatalog(resolved.catalog)) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 6f32f9d2bfcbe..6401167458a3e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -285,9 +285,6 @@ case class AlterViewAsCommand( private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = { val viewMeta = session.sessionState.catalog.getTableMetadata(name) - if (viewMeta.tableType != CatalogTableType.VIEW) { - throw new AnalysisException(s"${viewMeta.identifier} is not a view.") - } // Detect cyclic view reference on ALTER VIEW. val viewIdent = viewMeta.identifier diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index bc570efb70bdf..0c65e530f67da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2105,14 +2105,6 @@ class DataSourceV2SQLSuite } } - test("ALTER VIEW AS QUERY") { - val v = "testcat.ns1.ns2.v" - val e = intercept[AnalysisException] { - sql(s"ALTER VIEW $v AS SELECT 1") - } - assert(e.message.contains("ALTER VIEW QUERY is only supported with temp views or v1 tables")) - } - test("CREATE VIEW") { val v = "testcat.ns1.ns2.v" val e = intercept[AnalysisException] { @@ -2618,6 +2610,11 @@ class DataSourceV2SQLSuite "testcat", "v", "ALTER VIEW ... UNSET TBLPROPERTIES") + validateViewCommand( + "ALTER VIEW testcat.v AS SELECT 1", + "testcat", + "v", + "ALTER VIEW ... AS") } private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 50db986490033..c60b61a111c3f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -111,7 +111,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { e = intercept[AnalysisException] { sql("ALTER VIEW tab1 AS SELECT * FROM jt") }.getMessage - assert(e.contains("`tab1` is not a view")) + assert(e.contains("tab1 is a table. 'ALTER VIEW ... AS' expects a view.")) } } @@ -448,8 +448,13 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } test("should not allow ALTER VIEW AS when the view does not exist") { - assertNoSuchTable("ALTER VIEW testView AS SELECT 1, 2") - assertNoSuchTable("ALTER VIEW default.testView AS SELECT 1, 2") + assertAnalysisError( + "ALTER VIEW testView AS SELECT 1, 2", + "View not found for 'ALTER VIEW ... AS': testView") + + assertAnalysisError( + "ALTER VIEW default.testView AS SELECT 1, 2", + "View not found for 'ALTER VIEW ... AS': default.testView") } test("ALTER VIEW AS should try to alter temp view first if view name has no database part") { From cd0356df9e3cb8e8690a216b8adfac75bcf1365f Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 14 Dec 2020 17:51:40 +0800 Subject: [PATCH 0756/1009] [SPARK-33673][SQL] Avoid push down partition filters to ParquetScan for DataSourceV2 ### What changes were proposed in this pull request? As described in SPARK-33673, some test suites in `ParquetV2SchemaPruningSuite` will failed when set `parquet.version` to 1.11.1 because Parquet will return empty results for non-existent column since PARQUET-1765. This pr change to use `readDataSchema()` instead of `schema` to build `pushedParquetFilters` in `ParquetScanBuilder` to avoid push down partition filters to `ParquetScan` for `DataSourceV2` ### Why are the changes needed? Prepare for upgrade using Parquet 1.11.1. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Pass the Jenkins or GitHub Action - Manual test as follows: ``` mvn -Dtest=none -DwildcardSuites=org.apache.spark.sql.execution.datasources.parquet.ParquetV2SchemaPruningSuite -Dparquet.version=1.11.1 test -pl sql/core -am ``` **Before** ``` Run completed in 3 minutes, 13 seconds. Total number of tests run: 134 Suites: completed 2, aborted 0 Tests: succeeded 120, failed 14, canceled 0, ignored 0, pending 0 *** 14 TESTS FAILED *** ``` **After** ``` Run completed in 3 minutes, 46 seconds. Total number of tests run: 134 Suites: completed 2, aborted 0 Tests: succeeded 134, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Closes #30652 from LuciferYang/SPARK-33673. Authored-by: yangjie01 Signed-off-by: Yuming Wang --- .../execution/datasources/v2/parquet/ParquetScanBuilder.scala | 2 +- sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala index 2f861356e9499..44053830defe5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala @@ -50,7 +50,7 @@ case class ParquetScanBuilder( val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold val isCaseSensitive = sqlConf.caseSensitiveAnalysis val parquetSchema = - new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(schema) + new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(readDataSchema()) val parquetFilters = new ParquetFilters(parquetSchema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStringStartWith, pushDownInFilterThreshold, isCaseSensitive) parquetFilters.convertibleFilters(this.filters).toArray diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 7d3285da25a5d..75372c5437f25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -367,7 +367,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite val basePath = dir.getCanonicalPath + "/" + fmt val pushFilterMaps = Map ( "parquet" -> - "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]", + "|PushedFilers: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]", "orc" -> "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]", "csv" -> From bf2c88ccaebd8e27d9fc27c55c9955129541d3e1 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 14 Dec 2020 02:09:59 -0800 Subject: [PATCH 0757/1009] [SPARK-33716][K8S] Fix potential race condition during pod termination ### What changes were proposed in this pull request? Check that the pod state is not pending or running even if there is a deletion timestamp. ### Why are the changes needed? This can occur when the pod state and deletion timestamp are not updated by etcd in sync & we get a pod snapshot during an inconsistent view. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual testing with local version of Minikube on an overloaded computer that caused out of sync updates. Closes #30693 from holdenk/SPARK-33716-decommissioning-race-condition-during-pod-snapshot. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala index be75311bc3d4a..e81d213699e32 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala @@ -93,7 +93,8 @@ object ExecutorPodsSnapshot extends Logging { ( pod.getStatus == null || pod.getStatus.getPhase == null || - pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "terminating" + (pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "terminating" && + pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "running") )) } } From 5f9a7fea06cbbb6bf2b40cc9b3aa4d539c996301 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 14 Dec 2020 14:32:08 +0000 Subject: [PATCH 0758/1009] [SPARK-33428][SQL] Conv UDF use BigInt to avoid Long value overflow ### What changes were proposed in this pull request? Use Long value store encode value will overflow and return unexpected result, use BigInt to replace Long value and make logical more simple. ### Why are the changes needed? Fix value overflow issue ### Does this PR introduce _any_ user-facing change? People can sue `conf` function to convert value big then LONG.MAX_VALUE ### How was this patch tested? Added UT #### BenchMark ``` /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.spark.sql.execution.benchmark import scala.util.Random import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.functions._ object ConvFuncBenchMark extends SqlBasedBenchmark { val charset = Array[String]("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z") def constructString(from: Int, length: Int): String = { val chars = charset.slice(0, from) (0 to length).map(x => { val v = Random.nextInt(from) chars(v) }).mkString("") } private def doBenchmark(cardinality: Long, length: Int, from: Int, toBase: Int): Unit = { spark.range(cardinality) .withColumn("str", lit(constructString(from, length))) .select(conv(col("str"), from, toBase)) .noop() } /** * Main process of the whole benchmark. * Implementations of this method are supposed to use the wrapper method `runBenchmark` * for each benchmark scenario. */ override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 1000000L val benchmark = new Benchmark("conv", N, output = output) benchmark.addCase("length 10 from 2 to 16") { _ => doBenchmark(N, 10, 2, 16) } benchmark.addCase("length 10 from 2 to 10") { _ => doBenchmark(N, 10, 2, 10) } benchmark.addCase("length 10 from 10 to 16") { _ => doBenchmark(N, 10, 10, 16) } benchmark.addCase("length 10 from 10 to 36") { _ => doBenchmark(N, 10, 10, 36) } benchmark.addCase("length 10 from 16 to 10") { _ => doBenchmark(N, 10, 10, 10) } benchmark.addCase("length 10 from 16 to 36") { _ => doBenchmark(N, 10, 16, 36) } benchmark.addCase("length 10 from 36 to 10") { _ => doBenchmark(N, 10, 36, 10) } benchmark.addCase("length 10 from 36 to 16") { _ => doBenchmark(N, 10, 36, 16) } // benchmark.addCase("length 20 from 10 to 16") { _ => doBenchmark(N, 20, 10, 16) } benchmark.addCase("length 20 from 10 to 36") { _ => doBenchmark(N, 20, 10, 36) } benchmark.addCase("length 30 from 10 to 16") { _ => doBenchmark(N, 30, 10, 16) } benchmark.addCase("length 30 from 10 to 36") { _ => doBenchmark(N, 30, 10, 36) } // benchmark.addCase("length 20 from 16 to 10") { _ => doBenchmark(N, 20, 16, 10) } benchmark.addCase("length 20 from 16 to 36") { _ => doBenchmark(N, 20, 16, 36) } benchmark.addCase("length 30 from 16 to 10") { _ => doBenchmark(N, 30, 16, 10) } benchmark.addCase("length 30 from 16 to 36") { _ => doBenchmark(N, 30, 16, 36) } benchmark.run() } } ``` Result with patch : ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.14.6 Intel(R) Core(TM) i5-8259U CPU 2.30GHz conv: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ length 10 from 2 to 16 54 73 18 18.7 53.6 1.0X length 10 from 2 to 10 43 47 5 23.5 42.5 1.3X length 10 from 10 to 16 39 47 12 25.5 39.2 1.4X length 10 from 10 to 36 38 42 3 26.5 37.7 1.4X length 10 from 16 to 10 39 41 3 25.7 38.9 1.4X length 10 from 16 to 36 36 41 4 27.6 36.3 1.5X length 10 from 36 to 10 38 40 2 26.3 38.0 1.4X length 10 from 36 to 16 37 39 2 26.8 37.2 1.4X length 20 from 10 to 16 36 39 2 27.4 36.5 1.5X length 20 from 10 to 36 37 39 2 27.2 36.8 1.5X length 30 from 10 to 16 37 39 2 27.0 37.0 1.4X length 30 from 10 to 36 36 38 2 27.5 36.3 1.5X length 20 from 16 to 10 35 38 2 28.3 35.4 1.5X length 20 from 16 to 36 34 38 3 29.2 34.3 1.6X length 30 from 16 to 10 38 40 2 26.3 38.1 1.4X length 30 from 16 to 36 37 38 1 27.2 36.8 1.5X ``` Result without patch: ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.14.6 Intel(R) Core(TM) i5-8259U CPU 2.30GHz conv: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ length 10 from 2 to 16 66 101 29 15.1 66.1 1.0X length 10 from 2 to 10 50 55 5 20.2 49.5 1.3X length 10 from 10 to 16 46 51 5 21.8 45.9 1.4X length 10 from 10 to 36 43 48 4 23.4 42.7 1.5X length 10 from 16 to 10 44 47 4 22.9 43.7 1.5X length 10 from 16 to 36 40 44 2 24.7 40.5 1.6X length 10 from 36 to 10 40 44 4 25.0 40.1 1.6X length 10 from 36 to 16 41 43 2 24.3 41.2 1.6X length 20 from 10 to 16 39 41 2 25.7 38.9 1.7X length 20 from 10 to 36 40 42 2 24.9 40.2 1.6X length 30 from 10 to 16 39 40 1 25.9 38.6 1.7X length 30 from 10 to 36 40 41 1 25.0 40.0 1.7X length 20 from 16 to 10 40 41 1 25.1 39.8 1.7X length 20 from 16 to 36 40 42 2 25.2 39.7 1.7X length 30 from 16 to 10 39 42 2 25.6 39.0 1.7X length 30 from 16 to 36 39 40 2 25.7 38.8 1.7X ``` Closes #30350 from AngersZhuuuu/SPARK-33428. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/NumberConverter.scala | 64 ++++--------------- .../expressions/MathExpressionsSuite.scala | 6 +- .../catalyst/util/NumberConverterSuite.scala | 4 +- .../apache/spark/sql/MathFunctionsSuite.scala | 2 +- .../execution/HiveCompatibilitySuite.scala | 4 +- 5 files changed, 23 insertions(+), 57 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala index 7dbdd1ef1cdc5..8c9157784e7e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala @@ -21,64 +21,37 @@ import org.apache.spark.unsafe.types.UTF8String object NumberConverter { - /** - * Divide x by m as if x is an unsigned 64-bit integer. Examples: - * unsignedLongDiv(-1, 2) == Long.MAX_VALUE unsignedLongDiv(6, 3) == 2 - * unsignedLongDiv(0, 5) == 0 - * - * @param x is treated as unsigned - * @param m is treated as signed - */ - private def unsignedLongDiv(x: Long, m: Int): Long = { - if (x >= 0) { - x / m - } else { - // Let uval be the value of the unsigned long with the same bits as x - // Two's complement => x = uval - 2*MAX - 2 - // => uval = x + 2*MAX + 2 - // Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c - x / m + 2 * (Long.MaxValue / m) + 2 / m + (x % m + 2 * (Long.MaxValue % m) + 2 % m) / m - } - } - /** * Decode v into value[]. * - * @param v is treated as an unsigned 64-bit integer + * @param v is treated as an BigInt * @param radix must be between MIN_RADIX and MAX_RADIX */ - private def decode(v: Long, radix: Int, value: Array[Byte]): Unit = { + private def decode(v: BigInt, radix: Int, value: Array[Byte]): Unit = { var tmpV = v java.util.Arrays.fill(value, 0.asInstanceOf[Byte]) var i = value.length - 1 while (tmpV != 0) { - val q = unsignedLongDiv(tmpV, radix) - value(i) = (tmpV - q * radix).asInstanceOf[Byte] + val q = tmpV / radix + value(i) = (tmpV - q * radix).byteValue tmpV = q i -= 1 } } /** - * Convert value[] into a long. On overflow, return -1 (as mySQL does). If a - * negative digit is found, ignore the suffix starting there. + * Convert value[] into a BigInt. If a negative digit is found, + * ignore the suffix starting there. * * @param radix must be between MIN_RADIX and MAX_RADIX * @param fromPos is the first element that should be considered * @return the result should be treated as an unsigned 64-bit integer. */ - private def encode(radix: Int, fromPos: Int, value: Array[Byte]): Long = { - var v: Long = 0L - val bound = unsignedLongDiv(-1 - radix, radix) // Possible overflow once + private def encode(radix: Int, fromPos: Int, value: Array[Byte]): BigInt = { + var v: BigInt = BigInt(0) var i = fromPos while (i < value.length && value(i) >= 0) { - if (v >= bound) { - // Check for overflow - if (unsignedLongDiv(-1 - value(i), radix) < v) { - return -1 - } - } - v = v * radix + value(i) + v = (v * radix) + BigInt(value(i)) i += 1 } v @@ -129,7 +102,7 @@ object NumberConverter { return null } - var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0) + val (negative, first) = if (n(0) == '-') (true, 1) else (false, 0) // Copy the digits in the right side of the array val temp = new Array[Byte](64) @@ -140,19 +113,8 @@ object NumberConverter { } char2byte(fromBase, temp.length - n.length + first, temp) - // Do the conversion by going through a 64 bit integer - var v = encode(fromBase, temp.length - n.length + first, temp) - if (negative && toBase > 0) { - if (v < 0) { - v = -1 - } else { - v = -v - } - } - if (toBase < 0 && v < 0) { - v = -v - negative = true - } + // Do the conversion by going through a BigInt + val v: BigInt = encode(fromBase, temp.length - n.length + first, temp) decode(v, Math.abs(toBase), temp) // Find the first non-zero digit or the last digits if all are zero. @@ -163,7 +125,7 @@ object NumberConverter { byte2char(Math.abs(toBase), firstNonZeroPos, temp) var resultStartPos = firstNonZeroPos - if (negative && toBase < 0) { + if (negative) { resultStartPos = firstNonZeroPos - 1 temp(resultStartPos) = '-' } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 6d09e28362e11..4c4df9ef83de9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -158,7 +158,7 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("conv") { checkEvaluation(Conv(Literal("3"), Literal(10), Literal(2)), "11") checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(-16)), "-F") - checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "FFFFFFFFFFFFFFF1") + checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "-F") checkEvaluation(Conv(Literal("big"), Literal(36), Literal(16)), "3A48") checkEvaluation(Conv(Literal.create(null, StringType), Literal(36), Literal(16)), null) checkEvaluation(Conv(Literal("3"), Literal.create(null, IntegerType), Literal(16)), null) @@ -168,10 +168,12 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation( Conv(Literal(""), Literal(10), Literal(16)), null) checkEvaluation( - Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "FFFFFFFFFFFFFFFF") + Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "12DDAC15F246BAF8C0D551AC7") // If there is an invalid digit in the number, the longest valid prefix should be converted. checkEvaluation( Conv(Literal("11abc"), Literal(10), Literal(16)), "B") + checkEvaluation(Conv(Literal("c8dcdfb41711fc9a1f17928001d7fd61"), Literal(16), Literal(10)), + "266992441711411603393340504520074460513") } test("e") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala index ec73f4518737d..ee7057d914b21 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala @@ -34,9 +34,9 @@ class NumberConverterSuite extends SparkFunSuite { test("convert") { checkConv("3", 10, 2, "11") checkConv("-15", 10, -16, "-F") - checkConv("-15", 10, 16, "FFFFFFFFFFFFFFF1") + checkConv("-15", 10, 16, "-F") checkConv("big", 36, 16, "3A48") - checkConv("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF") + checkConv("9223372036854775807", 36, 16, "12DDAC15F246BAF8C0D551AC7") checkConv("11abc", 10, 16, "B") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index cd92976571230..87526b130d4c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -200,7 +200,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.selectExpr("""conv("100", 2, 10)"""), Row("4")) checkAnswer(df.selectExpr("""conv("-10", 16, -10)"""), Row("-16")) checkAnswer( - df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("-1")) // for overflow + df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("12DDAC15F246BAF8C0D551AC7")) } test("floor") { diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index c263932c2f535..e7c702baba752 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -525,6 +525,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_xpath_short", "udf_xpath_string", + // [SPARK-33428][SQL] CONV UDF use BigInt to avoid Long value overflow + "udf_conv", + // These tests DROP TABLE that don't exist (but do not specify IF EXISTS) "alter_rename_partition1", "date_1", @@ -1003,7 +1006,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_concat_insert1", "udf_concat_insert2", "udf_concat_ws", - "udf_conv", "udf_cos", "udf_count", "udf_date_add", From 839d6899adafd9a0695667656d00220d4665895d Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Mon, 14 Dec 2020 14:35:24 +0000 Subject: [PATCH 0759/1009] [SPARK-33733][SQL] PullOutNondeterministic should check and collect deterministic field ### What changes were proposed in this pull request? The deterministic field is wider than `NonDerterministic`, we should keep same range between pull out and check analysis. ### Why are the changes needed? For example ``` select * from values(1), (4) as t(c1) order by java_method('java.lang.Math', 'abs', c1) ``` We will get exception since `java_method` deterministic field is false but not a `NonDeterministic` ``` Exception in thread "main" org.apache.spark.sql.AnalysisException: nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window, found: java_method('java.lang.Math', 'abs', t.`c1`) ASC NULLS FIRST in operator Sort [java_method(java.lang.Math, abs, c1#1) ASC NULLS FIRST], true ;; ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Add test. Closes #30703 from ulysses-you/SPARK-33733. Authored-by: ulysses-you Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 5 ++++- .../expressions/CallMethodViaReflection.scala | 6 ++--- .../sql/catalyst/analysis/AnalysisSuite.scala | 22 +++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0ceb4226b0f52..a46f2e3168c6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2960,7 +2960,10 @@ class Analyzer(override val catalogManager: CatalogManager) private def getNondeterToAttr(exprs: Seq[Expression]): Map[Expression, NamedExpression] = { exprs.filterNot(_.deterministic).flatMap { expr => - val leafNondeterministic = expr.collect { case n: Nondeterministic => n } + val leafNondeterministic = expr.collect { + case n: Nondeterministic => n + case udf: UserDefinedExpression if !udf.deterministic => udf + } leafNondeterministic.distinct.map { e => val ne = e match { case n: NamedExpression => n diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala index 4bd6418789aa7..0979a18ac97bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala @@ -54,7 +54,7 @@ import org.apache.spark.util.Utils """, since = "2.0.0") case class CallMethodViaReflection(children: Seq[Expression]) - extends Expression with CodegenFallback { + extends Nondeterministic with CodegenFallback { override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("reflect") @@ -77,11 +77,11 @@ case class CallMethodViaReflection(children: Seq[Expression]) } } - override lazy val deterministic: Boolean = false override def nullable: Boolean = true override val dataType: DataType = StringType + override protected def initializeInternal(partitionIndex: Int): Unit = {} - override def eval(input: InternalRow): Any = { + override protected def evalInternal(input: InternalRow): Any = { var i = 0 while (i < argExprs.length) { buffer(i) = argExprs(i).eval(input).asInstanceOf[Object] diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 61186c178b083..b206bc9f84f18 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -984,4 +984,26 @@ class AnalysisSuite extends AnalysisTest with Matchers { s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value.")) } } + + test("SPARK-33733: PullOutNondeterministic should check and collect deterministic field") { + val reflect = + CallMethodViaReflection(Seq("java.lang.Math", "abs", testRelation.output.head)) + val udf = ScalaUDF( + (s: String) => s, + StringType, + Literal.create(null, StringType) :: Nil, + Option(ExpressionEncoder[String]().resolveAndBind()) :: Nil, + udfDeterministic = false) + + Seq(reflect, udf).foreach { e: Expression => + val plan = Sort(Seq(e.asc), false, testRelation) + val projected = Alias(e, "_nondeterministic")() + val expect = + Project(testRelation.output, + Sort(Seq(projected.toAttribute.asc), false, + Project(testRelation.output :+ projected, + testRelation))) + checkAnalysis(plan, expect) + } + } } From 82aca7eb8f2501dceaf610f1aaa86082153ef5ee Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 14 Dec 2020 10:54:18 -0800 Subject: [PATCH 0760/1009] [SPARK-33779][SQL] DataSource V2: API to request distribution and ordering on write ### What changes were proposed in this pull request? This PR adds connector interfaces proposed in the [design doc](https://docs.google.com/document/d/1X0NsQSryvNmXBY9kcvfINeYyKC-AahZarUqg3nS1GQs/edit#) for SPARK-23889. **Note**: This PR contains a subset of changes discussed in PR #29066. ### Why are the changes needed? Data sources should be able to request a specific distribution and ordering of data on write. In particular, these scenarios are considered useful: - global sort - cluster data and sort within partitions - local sort within partitions - no sort Please see the design doc above for a more detailed explanation of requirements. ### Does this PR introduce _any_ user-facing change? This PR introduces public changes to the DS V2 by adding a logical write abstraction as we have on the read path as well as additional interfaces to represent distribution and ordering of data (please see the doc for more info). The existing `Distribution` interface in `read` package is read-specific and not flexible enough like discussed in the design doc. The current proposal is to evolve these interfaces separately until they converge. ### How was this patch tested? This patch adds only interfaces. Closes #30706 from aokolnychyi/spark-23889-interfaces. Authored-by: Anton Okolnychyi Signed-off-by: Ryan Blue --- .../distributions/ClusteredDistribution.java | 35 +++++++ .../connector/distributions/Distribution.java | 28 ++++++ .../distributions/Distributions.java | 56 +++++++++++ .../distributions/OrderedDistribution.java | 35 +++++++ .../UnspecifiedDistribution.java | 28 ++++++ .../connector/expressions/Expressions.java | 11 +++ .../connector/expressions/NullOrdering.java | 42 ++++++++ .../connector/expressions/SortDirection.java | 42 ++++++++ .../sql/connector/expressions/SortOrder.java | 43 +++++++++ .../RequiresDistributionAndOrdering.java | 57 +++++++++++ .../spark/sql/connector/write/Write.java | 65 +++++++++++++ .../sql/connector/write/WriteBuilder.java | 39 ++++++-- .../distributions/distributions.scala | 59 ++++++++++++ .../connector/expressions/expressions.scala | 96 +++++++++++++++++++ 14 files changed, 626 insertions(+), 10 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java new file mode 100644 index 0000000000000..dcc3d191461ce --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.expressions.Expression; + +/** + * A distribution where tuples that share the same values for clustering expressions are co-located + * in the same partition. + * + * @since 3.2.0 + */ +@Experimental +public interface ClusteredDistribution extends Distribution { + /** + * Returns clustering expressions. + */ + Expression[] clustering(); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java new file mode 100644 index 0000000000000..95d68ea2d1abe --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions; + +import org.apache.spark.annotation.Experimental; + +/** + * An interface that defines how data is distributed across partitions. + * + * @since 3.2.0 + */ +@Experimental +public interface Distribution {} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java new file mode 100644 index 0000000000000..da5d6f8c81a3f --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.expressions.Expression; +import org.apache.spark.sql.connector.expressions.SortOrder; + +/** + * Helper methods to create distributions to pass into Spark. + * + * @since 3.2.0 + */ +@Experimental +public class Distributions { + private Distributions() { + } + + /** + * Creates a distribution where no promises are made about co-location of data. + */ + public static UnspecifiedDistribution unspecified() { + return LogicalDistributions.unspecified(); + } + + /** + * Creates a distribution where tuples that share the same values for clustering expressions are + * co-located in the same partition. + */ + public static ClusteredDistribution clustered(Expression[] clustering) { + return LogicalDistributions.clustered(clustering); + } + + /** + * Creates a distribution where tuples have been ordered across partitions according + * to ordering expressions, but not necessarily within a given partition. + */ + public static OrderedDistribution ordered(SortOrder[] ordering) { + return LogicalDistributions.ordered(ordering); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java new file mode 100644 index 0000000000000..3456178d8e64f --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.expressions.SortOrder; + +/** + * A distribution where tuples have been ordered across partitions according + * to ordering expressions, but not necessarily within a given partition. + * + * @since 3.2.0 + */ +@Experimental +public interface OrderedDistribution extends Distribution { + /** + * Returns ordering expressions. + */ + SortOrder[] ordering(); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java new file mode 100644 index 0000000000000..ea18d8906cfd0 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions; + +import org.apache.spark.annotation.Experimental; + +/** + * A distribution where no promises are made about co-location of data. + * + * @since 3.2.0 + */ +@Experimental +public interface UnspecifiedDistribution extends Distribution {} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java index 791dc969ab008..984de6258f84b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java @@ -164,4 +164,15 @@ public static Transform hours(String column) { return LogicalExpressions.hours(Expressions.column(column)); } + /** + * Create a sort expression. + * + * @param expr an expression to produce values to sort + * @param direction direction of the sort + * @param nullOrder null order of the sort + * @return a SortOrder + */ + public static SortOrder sort(Expression expr, SortDirection direction, NullOrdering nullOrder) { + return LogicalExpressions.sort(expr, direction, nullOrder); + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java new file mode 100644 index 0000000000000..669d1c8443b15 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions; + +import org.apache.spark.annotation.Experimental; + +/** + * A null order used in sorting expressions. + * + * @since 3.2.0 + */ +@Experimental +public enum NullOrdering { + NULLS_FIRST, NULLS_LAST; + + @Override + public String toString() { + switch (this) { + case NULLS_FIRST: + return "NULLS FIRST"; + case NULLS_LAST: + return "NULLS LAST"; + default: + throw new IllegalArgumentException("Unexpected null order: " + this); + } + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java new file mode 100644 index 0000000000000..6946032832d18 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions; + +import org.apache.spark.annotation.Experimental; + +/** + * A sort direction used in sorting expressions. + * + * @since 3.2.0 + */ +@Experimental +public enum SortDirection { + ASCENDING, DESCENDING; + + @Override + public String toString() { + switch (this) { + case ASCENDING: + return "ASC"; + case DESCENDING: + return "DESC"; + default: + throw new IllegalArgumentException("Unexpected sort direction: " + this); + } + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java new file mode 100644 index 0000000000000..72252457df26e --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions; + +import org.apache.spark.annotation.Experimental; + +/** + * Represents a sort order in the public expression API. + * + * @since 3.2.0 + */ +@Experimental +public interface SortOrder extends Expression { + /** + * Returns the sort expression. + */ + Expression expression(); + + /** + * Returns the sort direction. + */ + SortDirection direction(); + + /** + * Returns the null ordering. + */ + NullOrdering nullOrdering(); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java new file mode 100644 index 0000000000000..91fd02aae883c --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.distributions.Distribution; +import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution; +import org.apache.spark.sql.connector.expressions.SortOrder; + +/** + * A write that requires a specific distribution and ordering of data. + * + * @since 3.2.0 + */ +@Experimental +public interface RequiresDistributionAndOrdering extends Write { + /** + * Returns the distribution required by this write. + *

      + * Spark will distribute incoming records across partitions to satisfy the required distribution + * before passing the records to the data source table on write. + *

      + * Implementations may return {@link UnspecifiedDistribution} if they don't require any specific + * distribution of data on write. + * + * @return the required distribution + */ + Distribution requiredDistribution(); + + /** + * Returns the ordering required by this write. + *

      + * Spark will order incoming records within partitions to satisfy the required ordering + * before passing those records to the data source table on write. + *

      + * Implementations may return an empty array if they don't require any specific ordering of data + * on write. + * + * @return the required ordering + */ + SortOrder[] requiredOrdering(); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java new file mode 100644 index 0000000000000..873680415d447 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCapability; +import org.apache.spark.sql.connector.write.streaming.StreamingWrite; + +/** + * A logical representation of a data source write. + *

      + * This logical representation is shared between batch and streaming write. Data sources must + * implement the corresponding methods in this interface to match what the table promises + * to support. For example, {@link #toBatch()} must be implemented if the {@link Table} that + * creates this {@link Write} returns {@link TableCapability#BATCH_WRITE} support in its + * {@link Table#capabilities()}. + * + * @since 3.2.0 + */ +@Evolving +public interface Write { + + /** + * Returns the description associated with this write. + */ + default String description() { + return this.getClass().toString(); + } + + /** + * Returns a {@link BatchWrite} to write data to batch source. By default this method throws + * exception, data sources must overwrite this method to provide an implementation, if the + * {@link Table} that creates this write returns {@link TableCapability#BATCH_WRITE} support in + * its {@link Table#capabilities()}. + */ + default BatchWrite toBatch() { + throw new UnsupportedOperationException(description() + ": Batch write is not supported"); + } + + /** + * Returns a {@link StreamingWrite} to write data to streaming source. By default this method + * throws exception, data sources must overwrite this method to provide an implementation, if the + * {@link Table} that creates this write returns {@link TableCapability#STREAMING_WRITE} support + * in its {@link Table#capabilities()}. + */ + default StreamingWrite toStreaming() { + throw new UnsupportedOperationException(description() + ": Streaming write is not supported"); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java index 5398ca46e9777..bf344185118a9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java @@ -23,10 +23,10 @@ import org.apache.spark.sql.connector.write.streaming.StreamingWrite; /** - * An interface for building the {@link BatchWrite}. Implementations can mix in some interfaces to + * An interface for building the {@link Write}. Implementations can mix in some interfaces to * support different ways to write data to data sources. * - * Unless modified by a mixin interface, the {@link BatchWrite} configured by this builder is to + * Unless modified by a mixin interface, the {@link Write} configured by this builder is to * append data without affecting existing data. * * @since 3.0.0 @@ -35,22 +35,41 @@ public interface WriteBuilder { /** - * Returns a {@link BatchWrite} to write data to batch source. By default this method throws - * exception, data sources must overwrite this method to provide an implementation, if the - * {@link Table} that creates this write returns {@link TableCapability#BATCH_WRITE} support in - * its {@link Table#capabilities()}. + * Returns a logical {@link Write} shared between batch and streaming. + * + * @since 3.2.0 */ + default Write build() { + return new Write() { + @Override + public BatchWrite toBatch() { + return buildForBatch(); + } + + @Override + public StreamingWrite toStreaming() { + return buildForStreaming(); + } + }; + } + + /** + * Returns a {@link BatchWrite} to write data to batch source. + * + * @deprecated use {@link #build()} instead. + */ + @Deprecated default BatchWrite buildForBatch() { throw new UnsupportedOperationException(getClass().getName() + " does not support batch write"); } /** - * Returns a {@link StreamingWrite} to write data to streaming source. By default this method - * throws exception, data sources must overwrite this method to provide an implementation, if the - * {@link Table} that creates this write returns {@link TableCapability#STREAMING_WRITE} support - * in its {@link Table#capabilities()}. + * Returns a {@link StreamingWrite} to write data to streaming source. + * + * @deprecated use {@link #build()} instead. */ + @Deprecated default StreamingWrite buildForStreaming() { throw new UnsupportedOperationException(getClass().getName() + " does not support streaming write"); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala new file mode 100644 index 0000000000000..599f82b4dc528 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.distributions + +import org.apache.spark.sql.connector.expressions.{Expression, SortOrder} + +private[sql] object LogicalDistributions { + + def unspecified(): UnspecifiedDistribution = { + UnspecifiedDistributionImpl + } + + def clustered(clustering: Array[Expression]): ClusteredDistribution = { + ClusteredDistributionImpl(clustering) + } + + def ordered(ordering: Array[SortOrder]): OrderedDistribution = { + OrderedDistributionImpl(ordering) + } +} + +private[sql] object UnspecifiedDistributionImpl extends UnspecifiedDistribution { + override def toString: String = "UnspecifiedDistribution" +} + +private[sql] final case class ClusteredDistributionImpl( + clusteringExprs: Seq[Expression]) extends ClusteredDistribution { + + override def clustering: Array[Expression] = clusteringExprs.toArray + + override def toString: String = { + s"ClusteredDistribution(${clusteringExprs.map(_.describe).mkString(", ")})" + } +} + +private[sql] final case class OrderedDistributionImpl( + orderingExprs: Seq[SortOrder]) extends OrderedDistribution { + + override def ordering: Array[SortOrder] = orderingExprs.toArray + + override def toString: String = { + s"OrderedDistribution(${orderingExprs.map(_.describe).mkString(", ")})" + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala index 321ea14d376b4..2863d94d198b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala @@ -54,6 +54,13 @@ private[sql] object LogicalExpressions { def days(reference: NamedReference): DaysTransform = DaysTransform(reference) def hours(reference: NamedReference): HoursTransform = HoursTransform(reference) + + def sort( + reference: Expression, + direction: SortDirection, + nullOrdering: NullOrdering): SortOrder = { + SortValue(reference, direction, nullOrdering) + } } /** @@ -110,6 +117,18 @@ private[sql] final case class BucketTransform( } private[sql] object BucketTransform { + def unapply(expr: Expression): Option[(Int, FieldReference)] = expr match { + case transform: Transform => + transform match { + case BucketTransform(n, FieldReference(parts)) => + Some((n, FieldReference(parts))) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[(Int, NamedReference)] = transform match { case NamedTransform("bucket", Seq( Lit(value: Int, IntegerType), @@ -170,6 +189,18 @@ private[sql] final case class IdentityTransform( } private[sql] object IdentityTransform { + def unapply(expr: Expression): Option[FieldReference] = expr match { + case transform: Transform => + transform match { + case IdentityTransform(ref) => + Some(ref) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[FieldReference] = transform match { case NamedTransform("identity", Seq(Ref(parts))) => Some(FieldReference(parts)) @@ -185,6 +216,18 @@ private[sql] final case class YearsTransform( } private[sql] object YearsTransform { + def unapply(expr: Expression): Option[FieldReference] = expr match { + case transform: Transform => + transform match { + case YearsTransform(ref) => + Some(ref) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[FieldReference] = transform match { case NamedTransform("years", Seq(Ref(parts))) => Some(FieldReference(parts)) @@ -200,6 +243,18 @@ private[sql] final case class MonthsTransform( } private[sql] object MonthsTransform { + def unapply(expr: Expression): Option[FieldReference] = expr match { + case transform: Transform => + transform match { + case MonthsTransform(ref) => + Some(ref) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[FieldReference] = transform match { case NamedTransform("months", Seq(Ref(parts))) => Some(FieldReference(parts)) @@ -215,6 +270,18 @@ private[sql] final case class DaysTransform( } private[sql] object DaysTransform { + def unapply(expr: Expression): Option[FieldReference] = expr match { + case transform: Transform => + transform match { + case DaysTransform(ref) => + Some(ref) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[FieldReference] = transform match { case NamedTransform("days", Seq(Ref(parts))) => Some(FieldReference(parts)) @@ -230,6 +297,18 @@ private[sql] final case class HoursTransform( } private[sql] object HoursTransform { + def unapply(expr: Expression): Option[FieldReference] = expr match { + case transform: Transform => + transform match { + case HoursTransform(ref) => + Some(ref) + case _ => + None + } + case _ => + None + } + def unapply(transform: Transform): Option[FieldReference] = transform match { case NamedTransform("hours", Seq(Ref(parts))) => Some(FieldReference(parts)) @@ -261,3 +340,20 @@ private[sql] object FieldReference { LogicalExpressions.parseReference(column) } } + +private[sql] final case class SortValue( + expression: Expression, + direction: SortDirection, + nullOrdering: NullOrdering) extends SortOrder { + + override def describe(): String = s"$expression $direction $nullOrdering" +} + +private[sql] object SortValue { + def unapply(expr: Expression): Option[(Expression, SortDirection, NullOrdering)] = expr match { + case sort: SortOrder => + Some((sort.expression, sort.direction, sort.nullOrdering)) + case _ => + None + } +} From bb60fb1bbd97b70d60e42a0435e15862c3e3f97e Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 14 Dec 2020 11:39:42 -0800 Subject: [PATCH 0761/1009] [SPARK-33779][SQL][FOLLOW-UP] Fix Java Linter error ### What changes were proposed in this pull request? This PR removes unused imports. ### Why are the changes needed? These changes are required to fix the build. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Via `dev/lint-java`. Closes #30767 from aokolnychyi/fix-linter. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../java/org/apache/spark/sql/connector/write/WriteBuilder.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java index bf344185118a9..0c72f31af1c22 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java @@ -18,8 +18,6 @@ package org.apache.spark.sql.connector.write; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.catalog.Table; -import org.apache.spark.sql.connector.catalog.TableCapability; import org.apache.spark.sql.connector.write.streaming.StreamingWrite; /** From 5885cc15cae9c9780530e235d2bd4bd6beda5dbb Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 14 Dec 2020 12:05:28 -0800 Subject: [PATCH 0762/1009] [SPARK-33261][K8S] Add a developer API for custom feature steps ### What changes were proposed in this pull request? Add a developer API for custom driver & executor feature steps. ### Why are the changes needed? While we allow templates for the basis of pod creation, some deployments need more flexibility in how the pods are configured. This adds a developer API for custom deployments. ### Does this PR introduce _any_ user-facing change? New developer API. ### How was this patch tested? Extended tests to verify custom step is applied when configured. Closes #30206 from holdenk/SPARK-33261-allow-people-to-extend-pod-feature-steps. Authored-by: Holden Karau Signed-off-by: Holden Karau --- .../org/apache/spark/deploy/k8s/Config.scala | 20 +++++ .../apache/spark/deploy/k8s/SparkPod.scala | 11 ++- .../KubernetesFeatureConfigStep.scala | 7 +- .../k8s/submit/KubernetesDriverBuilder.scala | 8 +- .../k8s/KubernetesExecutorBuilder.scala | 8 +- .../spark/deploy/k8s/PodBuilderSuite.scala | 76 +++++++++++++++++++ .../submit/KubernetesDriverBuilderSuite.scala | 5 +- .../k8s/KubernetesExecutorBuilderSuite.scala | 4 + 8 files changed, 134 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index c28d6fd405ae1..40609aef1e9d8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -219,6 +219,26 @@ private[spark] object Config extends Logging { .stringConf .createOptional + val KUBERNETES_DRIVER_POD_FEATURE_STEPS = + ConfigBuilder("spark.kubernetes.driver.pod.featureSteps") + .doc("Class names of an extra driver pod feature step implementing " + + "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " + + "Runs after all of Spark internal feature steps.") + .version("3.2.0") + .stringConf + .toSequence + .createWithDefault(Nil) + + val KUBERNETES_EXECUTOR_POD_FEATURE_STEPS = + ConfigBuilder("spark.kubernetes.executor.pod.featureSteps") + .doc("Class name of an extra executor pod feature step implementing " + + "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " + + "Runs after all of Spark internal feature steps.") + .version("3.2.0") + .stringConf + .toSequence + .createWithDefault(Nil) + val KUBERNETES_ALLOCATION_BATCH_SIZE = ConfigBuilder("spark.kubernetes.allocation.batch.size") .doc("Number of pods to launch at once in each round of executor allocation.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala index fd1196368a7ff..c2298e7ca77c6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala @@ -18,7 +18,16 @@ package org.apache.spark.deploy.k8s import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder} -private[spark] case class SparkPod(pod: Pod, container: Container) { +import org.apache.spark.annotation.{DeveloperApi, Unstable} + +/** + * :: DeveloperApi :: + * + * Represents a SparkPod consisting of pod and the container within the pod. + */ +@Unstable +@DeveloperApi +case class SparkPod(pod: Pod, container: Container) { /** * Convenience method to apply a series of chained transformations to a pod. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala index 58cdaa3cadd6b..3fec92644b956 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala @@ -18,13 +18,18 @@ package org.apache.spark.deploy.k8s.features import io.fabric8.kubernetes.api.model.HasMetadata +import org.apache.spark.annotation.{DeveloperApi, Unstable} import org.apache.spark.deploy.k8s.SparkPod /** + * :: DeveloperApi :: + * * A collection of functions that together represent a "feature" in pods that are launched for * Spark drivers and executors. */ -private[spark] trait KubernetesFeatureConfigStep { +@Unstable +@DeveloperApi +trait KubernetesFeatureConfigStep { /** * Apply modifications on the given pod in accordance to this feature. This can include attaching diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala index 43639a3b7dc1b..3b38dd6e4feef 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala @@ -22,6 +22,7 @@ import io.fabric8.kubernetes.client.KubernetesClient import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.features._ +import org.apache.spark.util.Utils private[spark] class KubernetesDriverBuilder { @@ -37,6 +38,11 @@ private[spark] class KubernetesDriverBuilder { } .getOrElse(SparkPod.initialPod()) + val userFeatures = conf.get(Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS) + .map { className => + Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep] + } + val features = Seq( new BasicDriverFeatureStep(conf), new DriverKubernetesCredentialsFeatureStep(conf), @@ -48,7 +54,7 @@ private[spark] class KubernetesDriverBuilder { new HadoopConfDriverFeatureStep(conf), new KerberosConfDriverFeatureStep(conf), new PodTemplateConfigMapStep(conf), - new LocalDirsFeatureStep(conf)) + new LocalDirsFeatureStep(conf)) ++ userFeatures val spec = KubernetesDriverSpec( initialPod, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala index 5388d185489f2..43328c72a6fdd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala @@ -24,6 +24,7 @@ import org.apache.spark.SecurityManager import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.features._ import org.apache.spark.resource.ResourceProfile +import org.apache.spark.util.Utils private[spark] class KubernetesExecutorBuilder { @@ -41,13 +42,18 @@ private[spark] class KubernetesExecutorBuilder { } .getOrElse(SparkPod.initialPod()) + val userFeatures = conf.get(Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS) + .map { className => + Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep] + } + val features = Seq( new BasicExecutorFeatureStep(conf, secMgr, resourceProfile), new ExecutorKubernetesCredentialsFeatureStep(conf), new MountSecretsFeatureStep(conf), new EnvSecretsFeatureStep(conf), new MountVolumesFeatureStep(conf), - new LocalDirsFeatureStep(conf)) + new LocalDirsFeatureStep(conf)) ++ userFeatures val spec = KubernetesExecutorSpec( initialPod, diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala index 4d4c4baeb12c0..21a5b7a6486fd 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala @@ -26,12 +26,15 @@ import org.mockito.Mockito.{mock, never, verify, when} import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} +import org.apache.spark.deploy.k8s.features.KubernetesFeatureConfigStep import org.apache.spark.internal.config.ConfigEntry abstract class PodBuilderSuite extends SparkFunSuite { protected def templateFileConf: ConfigEntry[_] + protected def userFeatureStepsConf: ConfigEntry[_] + protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod private val baseConf = new SparkConf(false) @@ -50,6 +53,19 @@ abstract class PodBuilderSuite extends SparkFunSuite { verifyPod(pod) } + test("configure a custom test step") { + val client = mockKubernetesClient() + val sparkConf = baseConf.clone() + .set(userFeatureStepsConf.key, + "org.apache.spark.deploy.k8s.TestStepTwo," + + "org.apache.spark.deploy.k8s.TestStep") + .set(templateFileConf.key, "template-file.yaml") + val pod = buildPod(sparkConf, client) + verifyPod(pod) + assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "so_long")) + assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "so_long_two")) + } + test("complain about misconfigured pod template") { val client = mockKubernetesClient( new PodBuilder() @@ -173,3 +189,63 @@ abstract class PodBuilderSuite extends SparkFunSuite { } } + +/** + * A test user feature step. + */ +class TestStep extends KubernetesFeatureConfigStep { + import io.fabric8.kubernetes.api.model._ + + override def configurePod(pod: SparkPod): SparkPod = { + val localDirVolumes = Seq(new VolumeBuilder().withName("so_long").build()) + val localDirVolumeMounts = Seq( + new VolumeMountBuilder().withName("so_long") + .withMountPath("and_thanks_for_all_the_fish") + .build() + ) + + val podWithLocalDirVolumes = new PodBuilder(pod.pod) + .editSpec() + .addToVolumes(localDirVolumes: _*) + .endSpec() + .build() + val containerWithLocalDirVolumeMounts = new ContainerBuilder(pod.container) + .addNewEnv() + .withName("CUSTOM_SPARK_LOCAL_DIRS") + .withValue("fishyfishyfishy") + .endEnv() + .addToVolumeMounts(localDirVolumeMounts: _*) + .build() + SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts) + } +} + +/** + * A test user feature step. + */ +class TestStepTwo extends KubernetesFeatureConfigStep { + import io.fabric8.kubernetes.api.model._ + + override def configurePod(pod: SparkPod): SparkPod = { + val localDirVolumes = Seq(new VolumeBuilder().withName("so_long_two").build()) + val localDirVolumeMounts = Seq( + new VolumeMountBuilder().withName("so_long_two") + .withMountPath("and_thanks_for_all_the_fish_eh") + .build() + ) + + val podWithLocalDirVolumes = new PodBuilder(pod.pod) + .editSpec() + .addToVolumes(localDirVolumes: _*) + .endSpec() + .build() + val containerWithLocalDirVolumeMounts = new ContainerBuilder(pod.container) + .addNewEnv() + .withName("CUSTOM_SPARK_LOCAL_DIRS_TWO") + .withValue("fishyfishyfishyTWO") + .endEnv() + .addToVolumeMounts(localDirVolumeMounts: _*) + .build() + SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala index 6518c91a1a1fd..f9802ff967f82 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala @@ -28,9 +28,12 @@ class KubernetesDriverBuilderSuite extends PodBuilderSuite { Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE } + override protected def userFeatureStepsConf: ConfigEntry[_] = { + Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS + } + override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = { val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf) new KubernetesDriverBuilder().buildFromFeatures(conf, client).pod } - } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala index c64b733102dc8..ec60c6fc0bf82 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala @@ -29,6 +29,10 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite { Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE } + override protected def userFeatureStepsConf: ConfigEntry[_] = { + Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS + } + override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = { sparkConf.set("spark.driver.host", "https://driver.host.com") val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf) From 412d86e711188ff1bd8a6387524131aa3c200503 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 14 Dec 2020 13:34:23 -0800 Subject: [PATCH 0763/1009] [SPARK-33771][SQL][TESTS] Fix Invalid value for HourOfAmPm when testing on JDK 14 ### What changes were proposed in this pull request? This pr fix invalid value for HourOfAmPm when testing on JDK 14. ### Why are the changes needed? Run test on JDK 14. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #30754 from wangyum/SPARK-33771. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/util/TimestampFormatterSuite.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 103b7a2eded28..c65fec29bc6b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime} import java.util.concurrent.TimeUnit +import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.scalatest.matchers.should.Matchers._ import org.apache.spark.SparkUpgradeException @@ -355,9 +356,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val micros1 = formatter.parse("2009-12-12 00 am") assert(micros1 === date(2009, 12, 12)) + // JDK-8223773: DateTimeFormatter Fails to throw an Exception on Invalid HOUR_OF_AMPM // For `KK`, "12:00:00 am" is the same as "00:00:00 pm". - val micros2 = formatter.parse("2009-12-12 12 am") - assert(micros2 === date(2009, 12, 12, 12)) + if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_13)) { + intercept[DateTimeException](formatter.parse("2009-12-12 12 am")) + } else { + val micros2 = formatter.parse("2009-12-12 12 am") + assert(micros2 === date(2009, 12, 12, 12)) + } val micros3 = formatter.parse("2009-12-12 00 pm") assert(micros3 === date(2009, 12, 12, 12)) From f156718587fc33b9bf8e5abc4ae1f6fa0a5da887 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 14 Dec 2020 14:28:47 -0800 Subject: [PATCH 0764/1009] [SPARK-33777][SQL] Sort output of V2 SHOW PARTITIONS ### What changes were proposed in this pull request? List partitions returned by the V2 `SHOW PARTITIONS` command in alphabetical order. ### Why are the changes needed? To have the same behavior as: 1. V1 in-memory catalog, see https://github.com/apache/spark/blob/a28ed86a387b286745b30cd4d90b3d558205a5a7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala#L546 2. V1 Hive catalogs, see https://github.com/apache/spark/blob/fab2995972761503563fa2aa547c67047c51bd33/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala#L715 ### Does this PR introduce _any_ user-facing change? Yes, after the changes, V2 SHOW PARTITIONS sorts its output. ### How was this patch tested? Added new UT to the base trait `ShowPartitionsSuiteBase` which contains tests for V1 and V2. Closes #30764 from MaxGekk/sort-show-partitions. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../datasources/v2/ShowPartitionsExec.scala | 5 +++-- .../command/ShowPartitionsSuiteBase.scala | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala index c4b6aa805d58f..416dce6fa28c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala @@ -49,7 +49,7 @@ case class ShowPartitionsExec( val len = schema.length val partitions = new Array[String](len) val timeZoneId = SQLConf.get.sessionLocalTimeZone - partitionIdentifiers.map { row => + val output = partitionIdentifiers.map { row => var i = 0 while (i < len) { val dataType = schema(i).dataType @@ -59,7 +59,8 @@ case class ShowPartitionsExec( partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr) i += 1 } - InternalRow(UTF8String.fromString(partitions.mkString("/"))) + partitions.mkString("/") } + output.sorted.map(p => InternalRow(UTF8String.fromString(p))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index b695decdb3ec9..56c6e5a325745 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -173,4 +173,21 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33777: sorted output") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val table = s"$catalog.ns.dateTable" + withTable(table) { + sql(s""" + |CREATE TABLE $table (id int, part string) + |$defaultUsing + |PARTITIONED BY (part)""".stripMargin) + sql(s"ALTER TABLE $table ADD PARTITION(part = 'b')") + sql(s"ALTER TABLE $table ADD PARTITION(part = 'a')") + val partitions = sql(s"show partitions $table") + assert(partitions.first().getString(0) === "part=a") + } + } + } } From 49d3256497cb47d03a3167a550fb9857bd3afdbd Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 14 Dec 2020 15:18:50 -0800 Subject: [PATCH 0765/1009] [SPARK-33653][SQL] DSv2: REFRESH TABLE should recache the table itself ### What changes were proposed in this pull request? This changes DSv2 refresh table semantics to also recache the target table itself. ### Why are the changes needed? Currently "REFRESH TABLE" in DSv2 only invalidate all caches referencing the table. With #30403 merged which adds support for caching a DSv2 table, we should also recache the target table itself to make the behavior consistent with DSv1. ### Does this PR introduce _any_ user-facing change? Yes, now refreshing table in DSv2 also recache the target table itself. ### How was this patch tested? Added coverage of this new behavior in the existing UT for v2 refresh table command Closes #30742 from sunchao/SPARK-33653. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Strategy.scala | 16 +++++++++++++--- .../datasources/v2/RefreshTableExec.scala | 1 - .../sql/connector/DataSourceV2SQLSuite.scala | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index ea6ac6ca92aa0..1dd9f551ff8c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.JavaConverters._ -import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy} +import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation @@ -56,9 +56,19 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat session.sharedState.cacheManager.recacheByPlan(session, r) } - private def invalidateCache(r: ResolvedTable)(): Unit = { + private def invalidateCache(r: ResolvedTable, recacheTable: Boolean = false)(): Unit = { val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier)) + val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation) session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + if (recacheTable && cache.isDefined) { + // save the cache name and cache level for recreation + val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName + val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel + + // recache with the same name and cache level. + val ds = Dataset.ofRows(session, v2Relation) + session.sharedState.cacheManager.cacheQuery(ds, cacheName, cacheLevel) + } } override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { @@ -137,7 +147,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } case RefreshTable(r: ResolvedTable) => - RefreshTableExec(r.catalog, r.identifier, invalidateCache(r)) :: Nil + RefreshTableExec(r.catalog, r.identifier, invalidateCache(r, recacheTable = true)) :: Nil case ReplaceTable(catalog, ident, schema, parts, props, orCreate) => val propsWithOwner = CatalogV2Util.withDefaultOwnership(props) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala index 994583c1e338f..e66f0a18a1326 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala @@ -29,7 +29,6 @@ case class RefreshTableExec( catalog.invalidateTable(ident) // invalidate all caches referencing the given table - // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table invalidateCache() Seq.empty diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 0c65e530f67da..638f06d618833 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1749,6 +1749,25 @@ class DataSourceV2SQLSuite } } + test("SPARK-33653: REFRESH TABLE should recache the target table itself") { + val tblName = "testcat.ns.t" + withTable(tblName) { + sql(s"CREATE TABLE $tblName (id bigint) USING foo") + + // if the table is not cached, refreshing it should not recache it + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isEmpty) + sql(s"REFRESH TABLE $tblName") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isEmpty) + + sql(s"CACHE TABLE $tblName") + + // after caching & refreshing the table should be recached + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isDefined) + sql(s"REFRESH TABLE $tblName") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isDefined) + } + } + test("REPLACE TABLE: v1 table") { val e = intercept[AnalysisException] { sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}") From a99a47ca1df689377dbfbf4dd7258f59aee2be44 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 15 Dec 2020 08:56:45 +0900 Subject: [PATCH 0766/1009] [SPARK-33748][K8S] Respect environment variables and configurations for Python executables ### What changes were proposed in this pull request? This PR proposes: - Respect `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, or `spark.pyspark.python` and `spark.pyspark.driver.python` configurations in Kubernates just like other cluster types in Spark. - Depreate `spark.kubernetes.pyspark.pythonVersion` and guide users to set the environment variables and configurations for Python executables. NOTE that `spark.kubernetes.pyspark.pythonVersion` is already a no-op configuration without this PR. Default is `3` and other values are disallowed. - In order for Python executable settings to be consistently used, fix `spark.archives` option to unpack into the current working directory in the driver of Kubernates' cluster mode. This behaviour is identical with Yarn's cluster mode. By doing this, users can leverage Conda or virtuenenv in cluster mode as below: ```python conda create -y -n pyspark_conda_env -c conda-forge pyarrow pandas conda-pack conda activate pyspark_conda_env conda pack -f -o pyspark_conda_env.tar.gz PYSPARK_PYTHON=./environment/bin/python spark-submit --archives pyspark_conda_env.tar.gz#environment app.py ``` - Removed several unused or useless codes such as `extractS3Key` and `renameResourcesToLocalFS` ### Why are the changes needed? - To provide a consistent support of PySpark by using `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, or `spark.pyspark.python` and `spark.pyspark.driver.python` configurations. - To provide Conda and virtualenv support via `spark.archives` options. ### Does this PR introduce _any_ user-facing change? Yes: - `spark.kubernetes.pyspark.pythonVersion` is deprecated. - `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, and `spark.pyspark.python` and `spark.pyspark.driver.python` configurations are respected. ### How was this patch tested? Manually tested via: ```bash minikube delete minikube start --cpus 12 --memory 16384 kubectl create namespace spark-integration-test cat < Signed-off-by: HyukjinKwon --- .../org/apache/spark/deploy/SparkSubmit.scala | 54 ++++++------ docs/running-on-kubernetes.md | 5 +- .../org/apache/spark/deploy/k8s/Config.scala | 16 +++- .../apache/spark/deploy/k8s/Constants.scala | 3 +- .../features/DriverCommandFeatureStep.scala | 37 ++++++-- .../DriverCommandFeatureStepSuite.scala | 57 +++++++++++-- .../src/main/dockerfiles/spark/entrypoint.sh | 10 +-- .../k8s/integrationtest/DepsTestsSuite.scala | 85 ++++++++++++++----- .../k8s/integrationtest/KubernetesSuite.scala | 6 +- .../KubernetesTestComponents.scala | 5 +- .../k8s/integrationtest/ProcessUtils.scala | 5 +- .../deploy/k8s/integrationtest/Utils.scala | 9 +- .../tests/py_container_checks.py | 2 +- .../tests/python_executable_check.py | 40 +++++++++ 14 files changed, 256 insertions(+), 78 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/tests/python_executable_check.py diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index ea293f03a2169..bb3a20dce2da4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -31,7 +31,6 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.util.{Properties, Try} -import org.apache.commons.io.FilenameUtils import org.apache.commons.lang3.StringUtils import org.apache.hadoop.conf.{Configuration => HadoopConfiguration} import org.apache.hadoop.fs.{FileSystem, Path} @@ -387,20 +386,40 @@ private[spark] class SparkSubmit extends Logging { // Replace with the downloaded local jar path to avoid propagating hadoop compatible uris. // Executors will get the jars from the Spark file server. // Explicitly download the related files here - args.jars = renameResourcesToLocalFS(args.jars, localJars) + args.jars = localJars val filesLocalFiles = Option(args.files).map { downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr) }.orNull - val archiveLocalFiles = Option(args.archives).map { uri => - val resolvedUri = Utils.resolveURI(uri) - val downloadedUri = downloadFileList( - UriBuilder.fromUri(resolvedUri).fragment(null).build().toString, + val archiveLocalFiles = Option(args.archives).map { uris => + val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI) + val localArchives = downloadFileList( + resolvedUris.map( + UriBuilder.fromUri(_).fragment(null).build().toString).mkString(","), targetDir, sparkConf, hadoopConf, secMgr) - UriBuilder.fromUri(downloadedUri).fragment(resolvedUri.getFragment).build().toString + + // SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running + // in cluster mode, the archives should be available in the driver's current working + // directory too. + Utils.stringToSeq(localArchives).map(Utils.resolveURI).zip(resolvedUris).map { + case (localArchive, resolvedUri) => + val source = new File(localArchive.getPath) + val dest = new File( + ".", + if (resolvedUri.getFragment != null) resolvedUri.getFragment else source.getName) + logInfo( + s"Unpacking an archive $resolvedUri " + + s"from ${source.getAbsolutePath} to ${dest.getAbsolutePath}") + Utils.deleteRecursively(dest) + Utils.unpack(source, dest) + + // Keep the URIs of local files with the given fragments. + UriBuilder.fromUri( + localArchive).fragment(resolvedUri.getFragment).build().toString + }.mkString(",") }.orNull - args.files = renameResourcesToLocalFS(args.files, filesLocalFiles) - args.archives = renameResourcesToLocalFS(args.archives, archiveLocalFiles) - args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles) + args.files = filesLocalFiles + args.archives = archiveLocalFiles + args.pyFiles = localPyFiles } } @@ -836,21 +855,6 @@ private[spark] class SparkSubmit extends Logging { (childArgs.toSeq, childClasspath.toSeq, sparkConf, childMainClass) } - private def renameResourcesToLocalFS(resources: String, localResources: String): String = { - if (resources != null && localResources != null) { - val localResourcesSeq = Utils.stringToSeq(localResources) - Utils.stringToSeq(resources).map { resource => - val filenameRemote = FilenameUtils.getName(new URI(resource).getPath) - localResourcesSeq.find { localUri => - val filenameLocal = FilenameUtils.getName(new URI(localUri).getPath) - filenameRemote == filenameLocal - }.getOrElse(resource) - }.mkString(",") - } else { - resources - } - } - // [SPARK-20328]. HadoopRDD calls into a Hadoop library that fetches delegation tokens with // renewer set to the YARN ResourceManager. Since YARN isn't configured in Mesos or Kubernetes // mode, we must trick it into thinking we're YARN. diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e735c7493486e..93c6f94790abc 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1087,7 +1087,10 @@ See the [configuration page](configuration.html) for information on Spark config spark.kubernetes.pyspark.pythonVersion "3" - This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3. + This sets the major Python version of the docker image used to run the driver and executor containers. + It can be only "3". This configuration was deprecated from Spark 3.1.0, and is effectively no-op. + Users should set 'spark.pyspark.python' and 'spark.pyspark.driver.python' configurations or + 'PYSPARK_PYTHON' and 'PYSPARK_DRIVER_PYTHON' environment variables. 2.4.0 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index 40609aef1e9d8..6939de4697979 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -20,6 +20,7 @@ import java.util.concurrent.TimeUnit import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON} import org.apache.spark.internal.config.ConfigBuilder private[spark] object Config extends Logging { @@ -321,12 +322,19 @@ private[spark] object Config extends Logging { val PYSPARK_MAJOR_PYTHON_VERSION = ConfigBuilder("spark.kubernetes.pyspark.pythonVersion") - .doc("This sets the major Python version. Only 3 is available for Python3.") + .doc( + s"(Deprecated since Spark 3.1, please set '${PYSPARK_PYTHON.key}' and " + + s"'${PYSPARK_DRIVER_PYTHON.key}' configurations or $ENV_PYSPARK_PYTHON and " + + s"$ENV_PYSPARK_DRIVER_PYTHON environment variables instead.)") .version("2.4.0") .stringConf - .checkValue(pv => List("3").contains(pv), - "Ensure that major Python version is Python3") - .createWithDefault("3") + .checkValue("3" == _, + "Python 2 was dropped from Spark 3.1, and only 3 is allowed in " + + "this configuration. Note that this configuration was deprecated in Spark 3.1. " + + s"Please set '${PYSPARK_PYTHON.key}' and '${PYSPARK_DRIVER_PYTHON.key}' " + + s"configurations or $ENV_PYSPARK_PYTHON and $ENV_PYSPARK_DRIVER_PYTHON environment " + + "variables instead.") + .createOptional val KUBERNETES_KERBEROS_KRB5_FILE = ConfigBuilder("spark.kubernetes.kerberos.krb5.path") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala index 4014a964ed950..543ca12594763 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala @@ -74,7 +74,8 @@ private[spark] object Constants { val ENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION" // BINDINGS - val ENV_PYSPARK_MAJOR_PYTHON_VERSION = "PYSPARK_MAJOR_PYTHON_VERSION" + val ENV_PYSPARK_PYTHON = "PYSPARK_PYTHON" + val ENV_PYSPARK_DRIVER_PYTHON = "PYSPARK_DRIVER_PYTHON" // Pod spec templates val EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME = "pod-spec-template.yml" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala index d49381ba897d4..8015a1af3e17d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala @@ -24,6 +24,8 @@ import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.submit._ +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON} import org.apache.spark.launcher.SparkLauncher /** @@ -31,7 +33,7 @@ import org.apache.spark.launcher.SparkLauncher * executors can also find the app code. */ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf) - extends KubernetesFeatureConfigStep { + extends KubernetesFeatureConfigStep with Logging { override def configurePod(pod: SparkPod): SparkPod = { conf.mainAppResource match { @@ -70,12 +72,37 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf) SparkPod(pod.pod, driverContainer) } + // Exposed for testing purpose. + private[spark] def environmentVariables: Map[String, String] = sys.env + private def configureForPython(pod: SparkPod, res: String): SparkPod = { + if (conf.get(PYSPARK_MAJOR_PYTHON_VERSION).isDefined) { + logWarning( + s"${PYSPARK_MAJOR_PYTHON_VERSION.key} was deprecated in Spark 3.1. " + + s"Please set '${PYSPARK_PYTHON.key}' and '${PYSPARK_DRIVER_PYTHON.key}' " + + s"configurations or $ENV_PYSPARK_PYTHON and $ENV_PYSPARK_DRIVER_PYTHON environment " + + "variables instead.") + } + val pythonEnvs = - Seq(new EnvVarBuilder() - .withName(ENV_PYSPARK_MAJOR_PYTHON_VERSION) - .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION)) - .build()) + Seq( + conf.get(PYSPARK_PYTHON) + .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value => + new EnvVarBuilder() + .withName(ENV_PYSPARK_PYTHON) + .withValue(value) + .build() + }, + conf.get(PYSPARK_DRIVER_PYTHON) + .orElse(conf.get(PYSPARK_PYTHON)) + .orElse(environmentVariables.get(ENV_PYSPARK_DRIVER_PYTHON)) + .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value => + new EnvVarBuilder() + .withName(ENV_PYSPARK_DRIVER_PYTHON) + .withValue(value) + .build() + } + ).flatten // re-write primary resource to be the remote one and upload the related file val newResName = KubernetesUtils diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala index a44d465e35087..ebbb42f225c51 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.k8s._ import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.deploy.k8s.submit._ +import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON} class DriverCommandFeatureStepSuite extends SparkFunSuite { @@ -50,12 +51,51 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { "--properties-file", SPARK_CONF_PATH, "--class", KubernetesTestConf.MAIN_CLASS, mainResource, "5", "7", "9")) + } + + test("python executable precedence") { + val mainResource = "local:/main.py" - val envs = spec.pod.container.getEnv.asScala - .map { env => (env.getName, env.getValue) } - .toMap - val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3") - assert(envs === expected) + val pythonExecutables = Seq( + (Some("conf_py"), Some("conf_driver_py"), Some("env_py"), Some("env_driver_py")), + (Some("conf_py"), None, Some("env_py"), Some("env_driver_py")), + (None, None, Some("env_py"), Some("env_driver_py")), + (None, None, Some("env_py"), None) + ) + + val expectedResults = Seq( + ("conf_py", "conf_driver_py"), + ("conf_py", "conf_py"), + ("env_py", "env_driver_py"), + ("env_py", "env_py") + ) + + pythonExecutables.zip(expectedResults).foreach { case (pythonExecutable, expected) => + val sparkConf = new SparkConf(false) + val (confPy, confDriverPy, envPy, envDriverPy) = pythonExecutable + confPy.foreach(sparkConf.set(PYSPARK_PYTHON, _)) + confDriverPy.foreach(sparkConf.set(PYSPARK_DRIVER_PYTHON, _)) + val pythonEnvs = Map( + ( + envPy.map(v => ENV_PYSPARK_PYTHON -> v :: Nil) ++ + envDriverPy.map(v => ENV_PYSPARK_DRIVER_PYTHON -> v :: Nil) + ).flatten.toArray: _*) + + val spec = applyFeatureStep( + PythonMainAppResource(mainResource), + conf = sparkConf, + appArgs = Array("foo"), + env = pythonEnvs) + + val envs = spec.pod.container.getEnv.asScala + .map { env => (env.getName, env.getValue) } + .toMap + + val (expectedEnvPy, expectedDriverPy) = expected + assert(envs === Map( + ENV_PYSPARK_PYTHON -> expectedEnvPy, + ENV_PYSPARK_DRIVER_PYTHON -> expectedDriverPy)) + } } test("R resource") { @@ -123,13 +163,16 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { resource: MainAppResource, conf: SparkConf = new SparkConf(false), appArgs: Array[String] = Array(), - proxyUser: Option[String] = None): KubernetesDriverSpec = { + proxyUser: Option[String] = None, + env: Map[String, String] = Map.empty[String, String]): KubernetesDriverSpec = { val kubernetesConf = KubernetesTestConf.createDriverConf( sparkConf = conf, mainAppResource = resource, appArgs = appArgs, proxyUser = proxyUser) - val step = new DriverCommandFeatureStep(kubernetesConf) + val step = new DriverCommandFeatureStep(kubernetesConf) { + private[spark] override val environmentVariables: Map[String, String] = env + } val pod = step.configurePod(SparkPod.initialPod()) val props = step.getAdditionalPodSystemProperties() KubernetesDriverSpec(pod, Nil, props) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh index c837e00d2e468..f722471906bfb 100755 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh @@ -44,11 +44,11 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" fi -if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then - pyv3="$(python3 -V 2>&1)" - export PYTHON_VERSION="${pyv3:7}" - export PYSPARK_PYTHON="python3" - export PYSPARK_DRIVER_PYTHON="python3" +if ! [ -z ${PYSPARK_PYTHON+x} ]; then + export PYSPARK_PYTHON +fi +if ! [ -z ${PYSPARK_DRIVER_PYTHON+x} ]; then + export PYSPARK_DRIVER_PYTHON fi # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala index a15f7ffa134b8..0d15e0325758d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala @@ -32,6 +32,7 @@ import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT} import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube +import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON} private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => import KubernetesSuite.k8sTestTag @@ -135,7 +136,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => .create(minioStatefulSet)) } - private def deleteMinioStorage(): Unit = { + private def deleteMinioStorage(): Unit = { kubernetesTestComponents .kubernetesClient .apps() @@ -167,7 +168,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => tryDepsTest { val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH) Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tar.gz") - sparkAppConf.set("spark.archives", s"$HOST_PATH/$fileName.tar.gz#test_tar_gz") + sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tar.gz#test_tar_gz") val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir) runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar, appArgs = Array(s"test_tar_gz/$fileName"), @@ -175,40 +176,81 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => } } + test( + "SPARK-33748: Launcher python client respecting PYSPARK_PYTHON", k8sTestTag, MinikubeTag) { + val fileName = Utils.createTempFile( + """ + |#!/usr/bin/env bash + |export IS_CUSTOM_PYTHON=1 + |python3 "$@" + """.stripMargin, HOST_PATH) + Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tgz") + sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tgz#test_env") + val pySparkFiles = Utils.getTestFileAbsolutePath("python_executable_check.py", sparkHomeDir) + testPython(pySparkFiles, + Seq( + s"PYSPARK_PYTHON: ./test_env/$fileName", + s"PYSPARK_DRIVER_PYTHON: ./test_env/$fileName", + "Custom Python used on executor: True", + "Custom Python used on driver: True"), + env = Map("PYSPARK_PYTHON" -> s"./test_env/$fileName")) + } + + test( + "SPARK-33748: Launcher python client respecting " + + s"${PYSPARK_PYTHON.key} and ${PYSPARK_DRIVER_PYTHON.key}", k8sTestTag, MinikubeTag) { + val fileName = Utils.createTempFile( + """ + |#!/usr/bin/env bash + |export IS_CUSTOM_PYTHON=1 + |python3 "$@" + """.stripMargin, HOST_PATH) + Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tgz") + sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tgz#test_env") + sparkAppConf.set(PYSPARK_PYTHON.key, s"./test_env/$fileName") + sparkAppConf.set(PYSPARK_DRIVER_PYTHON.key, "python3") + val pySparkFiles = Utils.getTestFileAbsolutePath("python_executable_check.py", sparkHomeDir) + testPython(pySparkFiles, + Seq( + s"PYSPARK_PYTHON: ./test_env/$fileName", + "PYSPARK_DRIVER_PYTHON: python3", + "Custom Python used on executor: True", + "Custom Python used on driver: False")) + } + test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) { + val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir) val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir) val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip" Utils.createZipFile(inDepsFile, outDepsFile) - testPythonDeps(outDepsFile) + testPython( + pySparkFiles, + Seq( + "Python runtime version check is: True", + "Python environment version check is: True", + "Python runtime version check for executor is: True"), + Some(outDepsFile)) } - private def testPythonDeps(depsFile: String): Unit = { - tryDepsTest({ - val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir) + private def testPython( + pySparkFiles: String, + expectedDriverLogs: Seq[String], + depsFile: Option[String] = None, + env: Map[String, String] = Map.empty[String, String]): Unit = { + tryDepsTest { setPythonSparkConfProperties(sparkAppConf) runSparkApplicationAndVerifyCompletion( appResource = pySparkFiles, mainClass = "", - expectedDriverLogOnCompletion = Seq( - "Python runtime version check is: True", - "Python environment version check is: True", - "Python runtime version check for executor is: True"), + expectedDriverLogOnCompletion = expectedDriverLogs, appArgs = Array("python3"), driverPodChecker = doBasicDriverPyPodCheck, executorPodChecker = doBasicExecutorPyPodCheck, appLocator = appLocator, isJVM = false, - pyFiles = Option(depsFile)) }) - } - - private def extractS3Key(data: String, key: String): String = { - data.split("\n") - .filter(_.contains(key)) - .head - .split(":") - .last - .trim - .replaceAll("[,|\"]", "") + pyFiles = depsFile, + env = env) + } } private def createS3Bucket(accessKey: String, secretKey: String, endPoint: String): Unit = { @@ -269,7 +311,6 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => private def setPythonSparkConfProperties(conf: SparkAppConf): Unit = { sparkAppConf.set("spark.kubernetes.container.image", pyImage) - .set("spark.kubernetes.pyspark.pythonVersion", "3") } private def tryDepsTest(runTest: => Unit): Unit = { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 7b2a2d0820238..494c82512adaf 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -274,7 +274,8 @@ class KubernetesSuite extends SparkFunSuite isJVM: Boolean, pyFiles: Option[String] = None, executorPatience: Option[(Option[Interval], Option[Timeout])] = None, - decommissioningTest: Boolean = false): Unit = { + decommissioningTest: Boolean = false, + env: Map[String, String] = Map.empty[String, String]): Unit = { // scalastyle:on argcount val appArguments = SparkAppArguments( @@ -370,7 +371,8 @@ class KubernetesSuite extends SparkFunSuite TIMEOUT.value.toSeconds.toInt, sparkHomeDir, isJVM, - pyFiles) + pyFiles, + env) val driverPod = kubernetesTestComponents.kubernetesClient .pods() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala index 0bf01e6b66427..0392008fff2f5 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala @@ -109,7 +109,8 @@ private[spark] object SparkAppLauncher extends Logging { timeoutSecs: Int, sparkHomeDir: Path, isJVM: Boolean, - pyFiles: Option[String] = None): Unit = { + pyFiles: Option[String] = None, + env: Map[String, String] = Map.empty[String, String]): Unit = { val sparkSubmitExecutable = sparkHomeDir.resolve(Paths.get("bin", "spark-submit")) logInfo(s"Launching a spark app with arguments $appArguments and conf $appConf") val preCommandLine = if (isJVM) { @@ -130,6 +131,6 @@ private[spark] object SparkAppLauncher extends Logging { commandLine ++= appArguments.appArgs } logInfo(s"Launching a spark app with command line: ${commandLine.mkString(" ")}") - ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs) + ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs, env = env) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala index a1ecd48e747ea..cc05990893e36 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.k8s.integrationtest import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit +import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.io.Source @@ -32,8 +33,10 @@ object ProcessUtils extends Logging { def executeProcess( fullCommand: Array[String], timeout: Long, - dumpErrors: Boolean = true): Seq[String] = { + dumpErrors: Boolean = true, + env: Map[String, String] = Map.empty[String, String]): Seq[String] = { val pb = new ProcessBuilder().command(fullCommand: _*) + pb.environment().putAll(env.asJava) pb.redirectErrorStream(true) val proc = pb.start() val outputLines = new ArrayBuffer[String] diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala index 519443130008b..cc258533c2c8d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala @@ -153,6 +153,7 @@ object Utils extends Logging { } def createTarGzFile(inFile: String, outFile: String): Unit = { + val oFile = new File(outFile) val fileToTarGz = new File(inFile) Utils.tryWithResource( new FileInputStream(fileToTarGz) @@ -160,15 +161,19 @@ object Utils extends Logging { Utils.tryWithResource( new TarArchiveOutputStream( new GzipCompressorOutputStream( - new FileOutputStream( - new File(outFile)))) + new FileOutputStream(oFile))) ) { tOut => val tarEntry = new TarArchiveEntry(fileToTarGz, fileToTarGz.getName) + // Each entry does not keep the file permission from the input file. + // Setting permissions in the input file do not work. Just simply set + // to 777. + tarEntry.setMode(0x81ff) tOut.putArchiveEntry(tarEntry) IOUtils.copy(fis, tOut) tOut.closeArchiveEntry() tOut.finish() } } + oFile.deleteOnExit() } } diff --git a/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py b/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py index f6b3be2806c82..e6c0137c0405f 100644 --- a/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py +++ b/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py @@ -24,7 +24,7 @@ def version_check(python_env, major_python_version): These are various tests to test the Python container image. This file will be distributed via --py-files in the e2e tests. """ - env_version = os.environ.get('PYSPARK_PYTHON') + env_version = os.environ.get('PYSPARK_PYTHON', 'python3') print("Python runtime version check is: " + str(sys.version_info[0] == major_python_version)) diff --git a/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py b/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py new file mode 100644 index 0000000000000..89fd2aacab1a3 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os + +from pyspark.sql import SparkSession + + +if __name__ == "__main__": + spark = SparkSession \ + .builder \ + .appName("PythonExecutableTest") \ + .getOrCreate() + + # Check python executable at executors + is_custom_python_executor = spark.range(1).rdd.map( + lambda _: "IS_CUSTOM_PYTHON" in os.environ).first() + + print("PYSPARK_PYTHON: %s" % os.environ.get("PYSPARK_PYTHON")) + print("PYSPARK_DRIVER_PYTHON: %s" % os.environ.get("PYSPARK_DRIVER_PYTHON")) + + print("Custom Python used on executor: %s" % is_custom_python_executor) + + is_custom_python_driver = "IS_CUSTOM_PYTHON" in os.environ + print("Custom Python used on driver: %s" % is_custom_python_driver) + + spark.stop() From 366beda54a2911e59a994bfed9fb84a97aa2ab8b Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 15 Dec 2020 05:23:39 +0000 Subject: [PATCH 0767/1009] [SPARK-33785][SQL] Migrate ALTER TABLE ... RECOVER PARTITIONS to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `ALTER TABLE ... RECOVER PARTITIONS` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `ALTER TABLE ... RECOVER PARTITIONS` is not supported for v2 tables. ### Why are the changes needed? The PR makes the resolution consistent behavior consistent. For example, ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("ALTER TABLE t RECOVER PARTITIONS") // works fine ``` , but after this PR: ``` sql("ALTER TABLE t RECOVER PARTITIONS") org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table; line 1 pos 0 ``` , which is the consistent behavior with other commands. ### Does this PR introduce _any_ user-facing change? After this PR, `ALTER TABLE t RECOVER PARTITIONS` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`. ### How was this patch tested? Updated existing tests. Closes #30773 from imback82/alter_table_recover_part_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 7 +++++-- .../spark/sql/catalyst/plans/logical/statements.scala | 6 ------ .../spark/sql/catalyst/plans/logical/v2Commands.scala | 7 +++++++ .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala | 3 ++- .../sql/catalyst/analysis/ResolveSessionCatalog.scala | 5 ++--- .../execution/datasources/v2/DataSourceV2Strategy.scala | 4 ++++ .../sql/connector/AlterTablePartitionV2SQLSuite.scala | 3 ++- .../org/apache/spark/sql/execution/SQLViewSuite.scala | 4 +++- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 4 +++- 9 files changed, 28 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1bebf025cc795..0284d5d01ba96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3705,7 +3705,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create an [[AlterTableRecoverPartitionsStatement]] + * Create an [[AlterTableRecoverPartitions]] * * For example: * {{{ @@ -3714,7 +3714,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitRecoverPartitions( ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) { - AlterTableRecoverPartitionsStatement(visitMultipartIdentifier(ctx.multipartIdentifier)) + AlterTableRecoverPartitions( + UnresolvedTable( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER TABLE ... RECOVER PARTITIONS")) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index a0e11962f9c05..c8395f375b4ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -292,12 +292,6 @@ case class AlterTableSetLocationStatement( partitionSpec: Option[TablePartitionSpec], location: String) extends ParsedStatement -/** - * ALTER TABLE ... RECOVER PARTITIONS command, as parsed from SQL. - */ -case class AlterTableRecoverPartitionsStatement( - tableName: Seq[String]) extends ParsedStatement - /** * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 0f35674055dc4..2091d92eb67c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -673,6 +673,13 @@ case class AlterTableDropPartition( override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the ALTER TABLE ... RECOVER PARTITIONS command. + */ +case class AlterTableRecoverPartitions(child: LogicalPlan) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the LOAD DATA INTO TABLE command. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index e8bbc6b22a819..9862a087dd93f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2076,7 +2076,8 @@ class DDLParserSuite extends AnalysisTest { test("alter table: recover partitions") { comparePlans( parsePlan("ALTER TABLE a.b.c RECOVER PARTITIONS"), - AlterTableRecoverPartitionsStatement(Seq("a", "b", "c"))) + AlterTableRecoverPartitions( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RECOVER PARTITIONS"))) } test("alter view: add partition (not supported)") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 83dda7db09ac2..802068de10d16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -449,10 +449,9 @@ class ResolveSessionCatalog( } ShowColumnsCommand(db, v1TableName) - case AlterTableRecoverPartitionsStatement(tbl) => - val v1TableName = parseV1Table(tbl, "ALTER TABLE RECOVER PARTITIONS") + case AlterTableRecoverPartitions(ResolvedV1TableIdentifier(ident)) => AlterTableRecoverPartitionsCommand( - v1TableName.asTableIdentifier, + ident.asTableIdentifier, "ALTER TABLE RECOVER PARTITIONS") case AlterTableAddPartition(ResolvedV1TableIdentifier(ident), partSpecsAndLocs, ifNotExists) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 1dd9f551ff8c9..6020e42b21900 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -330,6 +330,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat AlterTableDropPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil + case AlterTableRecoverPartitions(_: ResolvedTable) => + throw new AnalysisException( + "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.") + case LoadData(_: ResolvedTable, _, _, _, _) => throw new AnalysisException("LOAD DATA is not supported for v2 tables.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index 570976965ec7c..cd80867000932 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -37,7 +37,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { val e = intercept[AnalysisException] { sql(s"ALTER TABLE $t RECOVER PARTITIONS") } - assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables")) + assert(e.message.contains( + "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index c60b61a111c3f..1a248fc18988a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -144,7 +144,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'") assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')") assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')") - assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS") + assertAnalysisError( + s"ALTER TABLE $viewName RECOVER PARTITIONS", + s"$viewName is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table") // For v2 ALTER TABLE statements, we have better error message saying view is not supported. assertAnalysisError( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index b686d040b9644..488b52aa7bd45 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -899,7 +899,9 @@ class HiveDDLSuite assertErrorForAlterTableOnView( s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')") - assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RECOVER PARTITIONS") + assertAnalysisError( + s"ALTER TABLE $oldViewName RECOVER PARTITIONS", + s"$oldViewName is a view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table.") assertErrorForAlterTableOnView( s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')") From 141e26d65ba92c96ce1aeaf4d93dc0bfbafda902 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 15 Dec 2020 05:36:57 +0000 Subject: [PATCH 0768/1009] [SPARK-33767][SQL][TESTS] Unify v1 and v2 ALTER TABLE .. DROP PARTITION tests ### What changes were proposed in this pull request? 1. Move the `ALTER TABLE .. DROP PARTITION` parsing tests to `AlterTableDropPartitionParserSuite` 2. Place v1 tests for `ALTER TABLE .. DROP PARTITION` from `DDLSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to the common trait `AlterTableDropPartitionSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS. ### Why are the changes needed? - The unification will allow to run common `ALTER TABLE .. DROP PARTITION` tests for both DSv1 and Hive DSv1, DSv2 - We can detect missing features and differences between DSv1 and DSv2 implementations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: ``` $ build/sbt -Phive -Phive-thriftserver "test:testOnly *AlterTableDropPartitionParserSuite" $ build/sbt -Phive -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #30747 from MaxGekk/unify-alter-table-drop-partition-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/DDLParserSuite.scala | 50 +----- .../AlterTablePartitionV2SQLSuite.scala | 112 ------------- .../AlterTableDropPartitionParserSuite.scala | 88 +++++++++++ .../AlterTableDropPartitionSuiteBase.scala | 149 ++++++++++++++++++ .../sql/execution/command/DDLSuite.scala | 57 ------- .../v1/AlterTableDropPartitionSuite.scala | 52 ++++++ .../v2/AlterTableDropPartitionSuite.scala | 66 ++++++++ .../sql/hive/execution/HiveDDLSuite.scala | 4 - .../AlterTableDropPartitionSuite.scala | 48 ++++++ 9 files changed, 404 insertions(+), 222 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 9862a087dd93f..2b3fc6f71a5c0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -2115,54 +2115,6 @@ class DDLParserSuite extends AnalysisTest { comparePlans(parsed2, expected2) } - // ALTER TABLE table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] - // ALTER VIEW table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] - test("alter table: drop partition") { - val sql1_table = - """ - |ALTER TABLE table_name DROP IF EXISTS PARTITION - |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk') - """.stripMargin - val sql2_table = - """ - |ALTER TABLE table_name DROP PARTITION - |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk') - """.stripMargin - val sql1_view = sql1_table.replace("TABLE", "VIEW") - val sql2_view = sql2_table.replace("TABLE", "VIEW") - - val parsed1_table = parsePlan(sql1_table) - val parsed2_table = parsePlan(sql2_table) - val parsed1_purge = parsePlan(sql1_table + " PURGE") - - assertUnsupported(sql1_view) - assertUnsupported(sql2_view) - - val expected1_table = AlterTableDropPartition( - UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."), - Seq( - UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), - UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), - ifExists = true, - purge = false) - val expected2_table = expected1_table.copy(ifExists = false) - val expected1_purge = expected1_table.copy(purge = true) - - comparePlans(parsed1_table, expected1_table) - comparePlans(parsed2_table, expected2_table) - comparePlans(parsed1_purge, expected1_purge) - - val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')" - val expected3_table = AlterTableDropPartition( - UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."), - Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))), - ifExists = true, - purge = false) - - val parsed3_table = parsePlan(sql3_table) - comparePlans(parsed3_table, expected3_table) - } - test("show current namespace") { comparePlans( parsePlan("SHOW CURRENT NAMESPACE"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index cd80867000932..ac4d055eb0e60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -18,18 +18,8 @@ package org.apache.spark.sql.connector import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException -import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} -import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits -import org.apache.spark.sql.internal.SQLConf class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { - - import CatalogV2Implicits._ - import DataSourceV2Implicits._ - - test("ALTER TABLE RECOVER PARTITIONS") { val t = "testcat.ns1.ns2.tbl" withTable(t) { @@ -52,106 +42,4 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables")) } } - - test("ALTER TABLE DROP PARTITION") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") - - val partTable = - catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) - } - } - - test("ALTER TABLE DROP PARTITIONS") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" + - " PARTITION (id=2) LOCATION 'loc1'") - spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") - - val partTable = - catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) - assert( - partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty) - } - } - - test("ALTER TABLE DROP PARTITIONS: partition not exists") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - assertThrows[NoSuchPartitionsException]( - spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")) - - val partTable = - catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - assert(partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) - - spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1)))) - assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2)))) - assert( - partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty) - } - } - - test("case sensitivity in resolving partition specs") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - val errMsg = intercept[AnalysisException] { - spark.sql(s"ALTER TABLE $t DROP PARTITION (ID=1)") - }.getMessage - assert(errMsg.contains(s"ID is not a valid partition column in table $t")) - } - - val partTable = catalog("testpart").asTableCatalog - .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")) - .asPartitionable - assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") - assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - spark.sql(s"ALTER TABLE $t DROP PARTITION (Id=1)") - assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1)))) - } - } - } - - test("SPARK-33650: drop partition into a table which doesn't support partition management") { - val t = "testcat.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _") - val errMsg = intercept[AnalysisException] { - spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)") - }.getMessage - assert(errMsg.contains(s"Table $t can not alter partitions")) - } - } - - test("SPARK-33676: not fully specified partition spec") { - val t = "testpart.ns1.ns2.tbl" - withTable(t) { - sql(s""" - |CREATE TABLE $t (id bigint, part0 int, part1 string) - |USING foo - |PARTITIONED BY (part0, part1)""".stripMargin) - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)") - }.getMessage - assert(errMsg.contains("Partition spec is invalid. " + - "The spec (part0) must match the partition spec (part0, part1)")) - } - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala new file mode 100644 index 0000000000000..53edd5854f289 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.AlterTableDropPartition +import org.apache.spark.sql.test.SharedSparkSession + +class AlterTableDropPartitionParserSuite extends AnalysisTest with SharedSparkSession { + test("drop partition") { + val sql = """ + |ALTER TABLE table_name DROP PARTITION + |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk') + """.stripMargin + val expected = AlterTableDropPartition( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."), + Seq( + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), + UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), + ifExists = false, + purge = false) + + comparePlans(parsePlan(sql), expected) + } + + test("drop partition if exists") { + val sql = """ + |ALTER TABLE table_name DROP IF EXISTS + |PARTITION (dt='2008-08-08', country='us'), + |PARTITION (dt='2009-09-09', country='uk') + """.stripMargin + val expected = AlterTableDropPartition( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."), + Seq( + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), + UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))), + ifExists = true, + purge = false) + comparePlans(parsePlan(sql), expected) + } + + test("drop partition in a table with multi-part identifier") { + val sql = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')" + val expected = AlterTableDropPartition( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."), + Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))), + ifExists = true, + purge = false) + + comparePlans(parsePlan(sql), expected) + } + + test("drop partition with PURGE") { + val sql = "ALTER TABLE table_name DROP PARTITION (p=1) PURGE" + val expected = AlterTableDropPartition( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."), + Seq(UnresolvedPartitionSpec(Map("p" -> "1"))), + ifExists = false, + purge = true) + + comparePlans(parsePlan(sql), expected) + } + + test("drop partition from view") { + val sql = "ALTER VIEW table_name DROP PARTITION (p=1)" + val errMsg = intercept[ParseException] { + parsePlan(sql) + }.getMessage + assert(errMsg.contains("Operation not allowed")) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala new file mode 100644 index 0000000000000..ed479e2824fb7 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils + +trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { + protected def version: String + protected def catalog: String + protected def defaultUsing: String + + protected def notFullPartitionSpecErr: String + + override def test(testName: String, testTags: Tag*)(testFun: => Any) + (implicit pos: Position): Unit = { + super.test(s"ALTER TABLE .. DROP PARTITION $version: " + testName, testTags: _*)(testFun) + } + + protected def withNsTable(ns: String, tableName: String, cat: String = catalog) + (f: String => Unit): Unit = { + val nsCat = s"$cat.$ns" + withNamespace(nsCat) { + sql(s"CREATE NAMESPACE $nsCat") + val t = s"$nsCat.$tableName" + withTable(t) { + f(t) + } + } + } + + protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = { + val partitions = sql(s"SHOW PARTITIONS $t") + .collect() + .toSet + .map((row: Row) => row.getString(0)) + .map(PartitioningUtils.parsePathFragment) + assert(partitions === expected.toSet) + } + + protected def checkDropPartition( + t: String, + ifExists: String, + specs: Map[String, Any]*): Unit = { + checkPartitions(t, specs.map(_.mapValues(_.toString).toMap): _*) + val specStr = specs.map( + _.map { + case (k, v: String) => s"$k = '$v'" + case (k, v) => s"$k = $v" + }.mkString("PARTITION (", ", ", ")")) + .mkString(", ") + sql(s"ALTER TABLE $t DROP $ifExists $specStr") + checkPartitions(t) + } + + test("single partition") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + Seq("", "IF EXISTS").foreach { ifExists => + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + checkDropPartition(t, ifExists, Map("id" -> 1)) + } + } + } + + test("multiple partitions") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + Seq("", "IF EXISTS").foreach { ifExists => + sql(s""" + |ALTER TABLE $t ADD + |PARTITION (id=1) LOCATION 'loc' + |PARTITION (id=2) LOCATION 'loc1'""".stripMargin) + checkDropPartition(t, ifExists, Map("id" -> 1), Map("id" -> 2)) + } + } + } + + test("multi-part partition") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)") + Seq("", "IF EXISTS").foreach { ifExists => + sql(s"ALTER TABLE $t ADD PARTITION (a = 2, b = 'abc')") + checkDropPartition(t, ifExists, Map("a" -> 2, "b" -> "abc")) + } + } + } + + test("table to alter does not exist") { + withNsTable("ns", "does_not_exist") { t => + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (a='4', b='9')") + }.getMessage + assert(errMsg.contains("Table not found")) + } + } + + test("case sensitivity in resolving partition specs") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (ID=1)") + }.getMessage + assert(errMsg.contains("ID is not a valid partition column")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + Seq("", "IF EXISTS").foreach { ifExists => + sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'") + checkDropPartition(t, ifExists, Map("id" -> 1)) + } + } + } + } + + test("SPARK-33676: not fully specified partition spec") { + withNsTable("ns", "tbl") { t => + sql(s""" + |CREATE TABLE $t (id bigint, part0 int, part1 string) + |$defaultUsing + |PARTITIONED BY (part0, part1)""".stripMargin) + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)") + }.getMessage + assert(errMsg.contains(notFullPartitionSpecErr)) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 05e0f4f4a538c..d6474ae7d5f00 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { testChangeColumn(isDatasourceTable = true) } - test("alter table: drop partition (datasource table)") { - testDropPartitions(isDatasourceTable = true) - } - test("alter table: rename partition (datasource table)") { testRenamePartitions(isDatasourceTable = true) } @@ -1617,59 +1613,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - protected def testDropPartitions(isDatasourceTable: Boolean): Unit = { - if (!isUsingHiveMetastore) { - assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") - } - val catalog = spark.sessionState.catalog - val tableIdent = TableIdentifier("tab1", Some("dbx")) - val part1 = Map("a" -> "1", "b" -> "5") - val part2 = Map("a" -> "2", "b" -> "6") - val part3 = Map("a" -> "3", "b" -> "7") - val part4 = Map("a" -> "4", "b" -> "8") - val part5 = Map("a" -> "9", "b" -> "9") - createDatabase(catalog, "dbx") - createTable(catalog, tableIdent, isDatasourceTable) - createTablePartition(catalog, part1, tableIdent) - createTablePartition(catalog, part2, tableIdent) - createTablePartition(catalog, part3, tableIdent) - createTablePartition(catalog, part4, tableIdent) - createTablePartition(catalog, part5, tableIdent) - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(part1, part2, part3, part4, part5)) - - // basic drop partition - sql("ALTER TABLE dbx.tab1 DROP IF EXISTS PARTITION (a='4', b='8'), PARTITION (a='3', b='7')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part5)) - - // drop partitions without explicitly specifying database - catalog.setCurrentDatabase("dbx") - sql("ALTER TABLE tab1 DROP IF EXISTS PARTITION (a='2', b ='6')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part5)) - - // table to alter does not exist - intercept[AnalysisException] { - sql("ALTER TABLE does_not_exist DROP IF EXISTS PARTITION (a='2')") - } - - // partition to drop does not exist - intercept[AnalysisException] { - sql("ALTER TABLE tab1 DROP PARTITION (a='300')") - } - - // partition to drop does not exist when using IF EXISTS - sql("ALTER TABLE tab1 DROP IF EXISTS PARTITION (a='300')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part5)) - - // partition spec in DROP PARTITION should be case insensitive by default - sql("ALTER TABLE tab1 DROP PARTITION (A='1', B='5')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part5)) - - // use int literal as partition value for int type partition column - sql("ALTER TABLE tab1 DROP PARTITION (a=9, b=9)") - assert(catalog.listPartitions(tableIdent).isEmpty) - } - protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = { if (!isUsingHiveMetastore) { assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala new file mode 100644 index 0000000000000..5ad182bc689b9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession + +trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase { + override def version: String = "V1" + override def catalog: String = CatalogManager.SESSION_CATALOG_NAME + override def defaultUsing: String = "USING parquet" + + override protected val notFullPartitionSpecErr = "The following partitions not found in table" +} + +class AlterTableDropPartitionSuite + extends AlterTableDropPartitionSuiteBase + with SharedSparkSession { + + test("partition not exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val errMsg = intercept[NoSuchPartitionsException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("partitions not found in table")) + + checkPartitions(t, Map("id" -> "1")) + sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") + checkPartitions(t) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala new file mode 100644 index 0000000000000..608e7d7c98f6f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException +import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.test.SharedSparkSession + +class AlterTableDropPartitionSuite + extends command.AlterTableDropPartitionSuiteBase + with SharedSparkSession { + + override def version: String = "V2" + override def catalog: String = "test_catalog" + override def defaultUsing: String = "USING _" + + override protected val notFullPartitionSpecErr = "Partition spec is invalid" + + override def sparkConf: SparkConf = super.sparkConf + .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) + .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) + + test("partition not exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val errMsg = intercept[NoSuchPartitionsException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("partitions not found in table")) + + checkPartitions(t, Map("id" -> "1")) + sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") + checkPartitions(t) + } + } + + test("SPARK-33650: drop partition into a table which doesn't support partition management") { + withNsTable("ns", "tbl", s"non_part_$catalog") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1)") + }.getMessage + assert(errMsg.contains("can not alter partitions")) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 488b52aa7bd45..f8a5c7f57eec5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -163,10 +163,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA testRenamePartitions(isDatasourceTable = false) } - test("alter table: drop partition") { - testDropPartitions(isDatasourceTable = false) - } - test("drop table") { testDropTable(isDatasourceTable = false) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala new file mode 100644 index 0000000000000..fe26466cdad62 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class AlterTableDropPartitionSuite + extends v1.AlterTableDropPartitionSuiteBase + with TestHiveSingleton { + + override def version: String = "Hive V1" + override def defaultUsing: String = "USING HIVE" + + override protected val notFullPartitionSpecErr = "No partition is dropped" + + test("partition not exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("No partition is dropped")) + + checkPartitions(t, Map("id" -> "1")) + sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") + checkPartitions(t) + } + } +} From 03042529e3c7bfd03185e5d751086173766926c3 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 15 Dec 2020 18:29:28 +0900 Subject: [PATCH 0769/1009] [SPARK-33273][SQL] Fix a race condition in subquery execution ### What changes were proposed in this pull request? If we call `SubqueryExec.executeTake`, it will call `SubqueryExec.execute` which will trigger the codegen of the query plan and create an RDD. However, `SubqueryExec` already has a thread (`SubqueryExec.relationFuture`) to execute the query plan, which means we have 2 threads triggering codegen of the same query plan at the same time. Spark codegen is not thread-safe, as we have places like `HashAggregateExec.bufferVars` that is a shared variable. The bug in `SubqueryExec` may lead to correctness bugs. Since https://issues.apache.org/jira/browse/SPARK-33119, `ScalarSubquery` will call `SubqueryExec.executeTake`, so flaky tests start to appear. This PR fixes the bug by reimplementing https://github.com/apache/spark/pull/30016 . We should pass the number of rows we want to collect to `SubqueryExec` at planning time, so that we can use `executeTake` inside `SubqueryExec.relationFuture`, and the caller side should always call `SubqueryExec.executeCollect`. This PR also adds checks so that we can make sure only `SubqueryExec.executeCollect` is called. ### Why are the changes needed? fix correctness bug. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? run `build/sbt "sql/testOnly *SQLQueryTestSuite -- -z scalar-subquery-select"` more than 10 times. Previously it fails, now it passes. Closes #30765 from cloud-fan/bug. Authored-by: Wenchen Fan Signed-off-by: HyukjinKwon --- .../adaptive/InsertAdaptiveSparkPlan.scala | 3 +- .../execution/basicPhysicalOperators.scala | 35 +++++++++++++++---- .../apache/spark/sql/execution/subquery.scala | 6 ++-- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala index f8478f860b2d5..cd0503fb8a147 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala @@ -120,7 +120,8 @@ case class InsertAdaptiveSparkPlan( if !subqueryMap.contains(exprId.id) => val executedPlan = compileSubquery(p) verifyAdaptivePlan(executedPlan, p) - val subquery = SubqueryExec(s"subquery#${exprId.id}", executedPlan) + val subquery = SubqueryExec.createForScalarSubquery( + s"subquery#${exprId.id}", executedPlan) subqueryMap.put(exprId.id, subquery) case expressions.InSubquery(_, ListQuery(query, _, exprId, _)) if !subqueryMap.contains(exprId.id) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 80a4090ce03f3..fcf77e588fc60 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -765,7 +765,7 @@ abstract class BaseSubqueryExec extends SparkPlan { /** * Physical plan for a subquery. */ -case class SubqueryExec(name: String, child: SparkPlan) +case class SubqueryExec(name: String, child: SparkPlan, maxNumRows: Option[Int] = None) extends BaseSubqueryExec with UnaryExecNode { override lazy val metrics = Map( @@ -784,7 +784,11 @@ case class SubqueryExec(name: String, child: SparkPlan) SQLExecution.withExecutionId(sqlContext.sparkSession, executionId) { val beforeCollect = System.nanoTime() // Note that we use .executeCollect() because we don't want to convert data to Scala types - val rows: Array[InternalRow] = child.executeCollect() + val rows: Array[InternalRow] = if (maxNumRows.isDefined) { + child.executeTake(maxNumRows.get) + } else { + child.executeCollect() + } val beforeBuild = System.nanoTime() longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect) val dataSize = rows.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum @@ -797,28 +801,45 @@ case class SubqueryExec(name: String, child: SparkPlan) } protected override def doCanonicalize(): SparkPlan = { - SubqueryExec("Subquery", child.canonicalized) + SubqueryExec("Subquery", child.canonicalized, maxNumRows) } protected override def doPrepare(): Unit = { relationFuture } + // `SubqueryExec` should only be used by calling `executeCollect`. It launches a new thread to + // collect the result of `child`. We should not trigger codegen of `child` again in other threads, + // as generating code is not thread-safe. + override def executeCollect(): Array[InternalRow] = { + ThreadUtils.awaitResult(relationFuture, Duration.Inf) + } + protected override def doExecute(): RDD[InternalRow] = { - child.execute() + throw new IllegalStateException("SubqueryExec.doExecute should never be called") } - override def executeCollect(): Array[InternalRow] = { - ThreadUtils.awaitResult(relationFuture, Duration.Inf) + override def executeTake(n: Int): Array[InternalRow] = { + throw new IllegalStateException("SubqueryExec.executeTake should never be called") + } + + override def executeTail(n: Int): Array[InternalRow] = { + throw new IllegalStateException("SubqueryExec.executeTail should never be called") } - override def stringArgs: Iterator[Any] = super.stringArgs ++ Iterator(s"[id=#$id]") + override def stringArgs: Iterator[Any] = Iterator(name, child) ++ Iterator(s"[id=#$id]") } object SubqueryExec { private[execution] val executionContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("subquery", SQLConf.get.getConf(StaticSQLConf.SUBQUERY_MAX_THREAD_THRESHOLD))) + + def createForScalarSubquery(name: String, child: SparkPlan): SubqueryExec = { + // Scalar subquery needs only one row. We require 2 rows here to validate if the scalar query is + // invalid(return more than one row). We don't need all the rows as it may OOM. + SubqueryExec(name, child, maxNumRows = Some(2)) + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 5e222d2e48769..0080b73575de1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -80,8 +80,7 @@ case class ScalarSubquery( @volatile private var updated: Boolean = false def updateResult(): Unit = { - // Only return the first two rows as an array to avoid Driver OOM. - val rows = plan.executeTake(2) + val rows = plan.executeCollect() if (rows.length > 1) { sys.error(s"more than one row returned by a subquery used as an expression:\n$plan") } @@ -178,7 +177,8 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] { case subquery: expressions.ScalarSubquery => val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan) ScalarSubquery( - SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan), + SubqueryExec.createForScalarSubquery( + s"scalar-subquery#${subquery.exprId.id}", executedPlan), subquery.exprId) case expressions.InSubquery(values, ListQuery(query, _, exprId, _)) => val expr = if (values.length == 1) { From 20f6d63bc109284f6f9daf5da20cb2fef560628a Mon Sep 17 00:00:00 2001 From: Chongguang LIU Date: Tue, 15 Dec 2020 18:55:48 +0900 Subject: [PATCH 0770/1009] [SPARK-33769][SQL] Improve the next-day function of the sql component to deal with Column type ### What changes were proposed in this pull request? The proposition of this pull request is described in this JIRA ticket: [https://issues.apache.org/jira/browse/SPARK-33769](url) It proposes to improve the next-day function of the sql component to deal with Column type for the parameter dayOfWeek. ### Why are the changes needed? It makes this functionality easier to use. Actually the signature of this function is: > def next_day(date: Column, dayOfWeek: String): Column. It accepts the dayOfWeek parameter as a String. However in some cases, the dayOfWeek is in a Column, so a different value for each row of the dataframe. A current workaround is to use the NextDay function like this: > NextDay(dateCol.expr, dayOfWeekCol.expr). The proposition is to add another signature for this function: > def next_day(date: Column, dayOfWeek: Column): Column In fact it is already the case for some other functions in this scala object, exemple: > def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days)) > def date_sub(start: Column, days: Column): Column = withExpr \{ DateSub(start.expr, days.expr) } or > def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths)) > def add_months(startDate: Column, numMonths: Column): Column = withExpr { > AddMonths(startDate.expr, numMonths.expr) > } This pull request is the same idea for the function next_day. ### Does this PR introduce _any_ user-facing change? Yes With this pull request, users of spark will have a new signature of the function: > def next_day(date: Column, dayOfWeek: Column): Column But the existing function signature should still work: > def next_day(date: Column, dayOfWeek: String): Column So this change should be retrocompatible. ### How was this patch tested? The unit tests of the next_day function has been enhanced. It tests the dayOfWeek parameter both as String and Column. I also added a test case for the existing signature where the dayOfWeek is a non valid String. This should return null. Closes #30761 from chongguang/SPARK-33769. Authored-by: Chongguang LIU Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/functions.scala | 22 +++++++++++++++++-- .../apache/spark/sql/DateFunctionsSuite.scala | 18 +++++++++++---- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index ede2b52930a17..4defcb836a978 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3074,8 +3074,26 @@ object functions { * @group datetime_funcs * @since 1.5.0 */ - def next_day(date: Column, dayOfWeek: String): Column = withExpr { - NextDay(date.expr, lit(dayOfWeek).expr) + def next_day(date: Column, dayOfWeek: String): Column = next_day(date, lit(dayOfWeek)) + + /** + * Returns the first date which is later than the value of the `date` column that is on the + * specified day of the week. + * + * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first + * Sunday after 2015-07-27. + * + * @param date A date, timestamp or string. If a string, the data must be in a format that + * can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` + * @param dayOfWeek A column of the day of week. Case insensitive, and accepts: "Mon", "Tue", + * "Wed", "Thu", "Fri", "Sat", "Sun" + * @return A date, or null if `date` was a string that could not be cast to a date or if + * `dayOfWeek` was an invalid value + * @group datetime_funcs + * @since 3.2.0 + */ + def next_day(date: Column, dayOfWeek: Column): Column = withExpr { + NextDay(date.expr, dayOfWeek.expr) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index d7bbf597ff983..b545d6097d71d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -372,11 +372,21 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { val df1 = Seq(("mon", "2015-07-23"), ("tuesday", "2015-07-20")).toDF("dow", "d") val df2 = Seq(("th", "2015-07-23 00:11:22"), ("xx", "2015-07-24 11:22:33")).toDF("dow", "t") checkAnswer( - df1.select(next_day(col("d"), "MONDAY")), - Seq(Row(Date.valueOf("2015-07-27")), Row(Date.valueOf("2015-07-27")))) + df1.select( + next_day(col("d"), "MONDAY"), + next_day(col("d"), col("dow")), + next_day(col("d"), "NonValidDay")), + Seq( + Row(Date.valueOf("2015-07-27"), Date.valueOf("2015-07-27"), null), + Row(Date.valueOf("2015-07-27"), Date.valueOf("2015-07-21"), null))) checkAnswer( - df2.select(next_day(col("t"), "th")), - Seq(Row(Date.valueOf("2015-07-30")), Row(Date.valueOf("2015-07-30")))) + df2.select( + next_day(col("t"), "th"), + next_day(col("t"), col("dow")), + next_day(col("t"), "NonValidDay")), + Seq( + Row(Date.valueOf("2015-07-30"), Date.valueOf("2015-07-30"), null), + Row(Date.valueOf("2015-07-30"), null, null))) } def checkExceptionMessage(df: DataFrame): Unit = { From 58cb2bae747a09caff194007b5c60f19b84f7c40 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 15 Dec 2020 19:20:01 +0900 Subject: [PATCH 0771/1009] [SPARK-33752][SQL] Avoid the getSimpleMessage of AnalysisException adds semicolon repeatedly ### What changes were proposed in this pull request? The current `getSimpleMessage` of `AnalysisException` may adds semicolon repeatedly. There show an example below: `select decode()` The output will be: ``` org.apache.spark.sql.AnalysisException Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7 ``` ### Why are the changes needed? Fix a bug, because it adds semicolon repeatedly. ### Does this PR introduce _any_ user-facing change? Yes. the message of AnalysisException will be correct. ### How was this patch tested? Jenkins test. Closes #30724 from beliefer/SPARK-33752. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: HyukjinKwon --- .../apache/spark/sql/AnalysisException.scala | 4 +- .../sql-tests/results/ansi/datetime.sql.out | 4 +- .../results/ansi/parse-schema-string.sql.out | 4 +- .../results/ansi/string-functions.sql.out | 6 +- .../sql-tests/results/change-column.sql.out | 10 +- .../results/columnresolution-negative.sql.out | 2 +- .../resources/sql-tests/results/count.sql.out | 2 +- .../sql-tests/results/csv-functions.sql.out | 12 +- .../sql-tests/results/cte-nested.sql.out | 16 +- .../sql-tests/results/datetime-legacy.sql.out | 4 +- .../sql-tests/results/datetime.sql.out | 4 +- .../results/describe-table-column.sql.out | 4 +- .../sql-tests/results/describe.sql.out | 8 +- .../sql-tests/results/except-all.sql.out | 4 +- .../sql-tests/results/extract.sql.out | 10 +- .../sql-tests/results/group-analytics.sql.out | 12 +- .../sql-tests/results/group-by-filter.sql.out | 10 +- .../results/group-by-ordinal.sql.out | 6 +- .../sql-tests/results/group-by.sql.out | 18 +-- .../sql-tests/results/grouping_set.sql.out | 2 +- .../sql-tests/results/having.sql.out | 2 +- .../sql-tests/results/intersect-all.sql.out | 4 +- .../sql-tests/results/json-functions.sql.out | 12 +- .../resources/sql-tests/results/limit.sql.out | 12 +- .../resources/sql-tests/results/pivot.sql.out | 14 +- .../postgreSQL/aggregates_part1.sql.out | 2 +- .../postgreSQL/aggregates_part3.sql.out | 2 +- .../results/postgreSQL/create_view.sql.out | 28 ++-- .../results/postgreSQL/limit.sql.out | 2 +- .../results/postgreSQL/numeric.sql.out | 2 +- .../results/postgreSQL/select_having.sql.out | 2 +- .../results/postgreSQL/strings.sql.out | 16 +- .../results/postgreSQL/window_part3.sql.out | 10 +- .../sql-tests/results/postgreSQL/with.sql.out | 2 +- .../results/regexp-functions.sql.out | 4 +- .../sql-tests/results/show-tables.sql.out | 8 +- .../sql-tests/results/show-views.sql.out | 2 +- .../sql-tests/results/show_columns.sql.out | 2 +- .../results/string-functions.sql.out | 6 +- .../subquery/in-subquery/in-basic.sql.out | 2 +- .../invalid-correlation.sql.out | 9 +- .../subq-input-typecheck.sql.out | 10 +- .../native/widenSetOperationTypes.sql.out | 140 +++++++++--------- .../postgreSQL/udf-aggregates_part1.sql.out | 2 +- .../postgreSQL/udf-aggregates_part3.sql.out | 2 +- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- .../results/udf/udf-except-all.sql.out | 4 +- .../results/udf/udf-group-analytics.sql.out | 12 +- .../results/udf/udf-group-by.sql.out | 18 +-- .../results/udf/udf-intersect-all.sql.out | 4 +- .../sql-tests/results/udf/udf-pivot.sql.out | 14 +- .../sql-tests/results/udf/udf-window.sql.out | 2 +- .../sql-tests/results/window.sql.out | 8 +- .../spark/sql/ColumnExpressionSuite.scala | 2 +- .../apache/spark/sql/SQLInsertTestSuite.scala | 4 +- .../command/ShowTablesSuiteBase.scala | 2 +- .../spark/sql/internal/SQLConfSuite.scala | 4 +- .../sql/sources/BucketedWriteSuite.scala | 10 +- .../sql/sources/PartitionedWriteSuite.scala | 2 +- .../sql/streaming/FileStreamSourceSuite.scala | 2 +- 60 files changed, 264 insertions(+), 265 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala index f5c87677ab9eb..1dfbff5c6df5b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala @@ -48,9 +48,11 @@ class AnalysisException protected[sql] ( // Outputs an exception without the logical plan. // For testing only - def getSimpleMessage: String = { + def getSimpleMessage: String = if (line.isDefined || startPosition.isDefined) { val lineAnnotation = line.map(l => s" line $l").getOrElse("") val positionAnnotation = startPosition.map(p => s" pos $p").getOrElse("") s"$message;$lineAnnotation$positionAnnotation" + } else { + message } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 400c8d6c3c84f..3e307a92c10f0 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -453,7 +453,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_add' function needs to be an integer.; +The second argument of 'date_add' function needs to be an integer. -- !query @@ -494,7 +494,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_sub' function needs to be an integer.; +The second argument of 'date_sub' function needs to be an integer. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out index e12d988a57672..bfbf11d54489c 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out @@ -21,7 +21,7 @@ no viable alternative at input 'create'(line 1, pos 0) == SQL == create INT ^^^ -;; line 1 pos 7 +; line 1 pos 7 -- !query @@ -51,7 +51,7 @@ no viable alternative at input 'create'(line 1, pos 0) == SQL == create INT ^^^ -;; line 1 pos 7 +; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 3164d462f8464..dd085a6437e13 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -302,7 +302,7 @@ select decode() struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7 +Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7 -- !query @@ -311,7 +311,7 @@ select decode(encode('abc', 'utf-8')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7 +Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7 -- !query @@ -359,4 +359,4 @@ select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattl -- !query schema struct -- !query output -NULL \ No newline at end of file +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out index b1a32ad1f63e9..96b28d734f5a7 100644 --- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out @@ -50,7 +50,7 @@ ALTER TABLE test_change RENAME COLUMN a TO a1 struct<> -- !query output org.apache.spark.sql.AnalysisException -RENAME COLUMN is only supported with v2 tables.; +RENAME COLUMN is only supported with v2 tables. -- !query @@ -69,7 +69,7 @@ ALTER TABLE test_change CHANGE a TYPE STRING struct<> -- !query output org.apache.spark.sql.AnalysisException -ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType'; +ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType' -- !query @@ -88,7 +88,7 @@ ALTER TABLE test_change CHANGE a AFTER b struct<> -- !query output org.apache.spark.sql.AnalysisException -ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.; +ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables. -- !query @@ -97,7 +97,7 @@ ALTER TABLE test_change CHANGE b FIRST struct<> -- !query output org.apache.spark.sql.AnalysisException -ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.; +ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables. -- !query @@ -176,7 +176,7 @@ ALTER TABLE test_change CHANGE invalid_col TYPE INT struct<> -- !query output org.apache.spark.sql.AnalysisException -Can't find column `invalid_col` given table data columns [`a`, `b`, `c`]; +Can't find column `invalid_col` given table data columns [`a`, `b`, `c`] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out index 04ddfe0ac128c..ea321638b219e 100644 --- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out @@ -195,7 +195,7 @@ SELECT t1.x.y.* FROM t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 't1.x.y.*' given input columns 'i1'; +cannot resolve 't1.x.y.*' given input columns 'i1' -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out index 64614b5b67784..ffd75d6a09e1c 100644 --- a/sql/core/src/test/resources/sql-tests/results/count.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out @@ -125,4 +125,4 @@ SELECT count() FROM testData struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7 \ No newline at end of file +cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7 diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index ed2341f71a1b0..2131487f3500a 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -24,7 +24,7 @@ select from_csv('1', 1) struct<> -- !query output org.apache.spark.sql.AnalysisException -The expression '1' is not a valid schema string.;; line 1 pos 7 +The expression '1' is not a valid schema string.; line 1 pos 7 -- !query @@ -46,7 +46,7 @@ DataType invalidtype is not supported.(line 1, pos 2) == SQL == a InvalidType --^^^ -;; line 1 pos 7 +; line 1 pos 7 -- !query @@ -55,7 +55,7 @@ select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Must use a map() function for options;; line 1 pos 7 +Must use a map() function for options; line 1 pos 7 -- !query @@ -64,7 +64,7 @@ select from_csv('1', 'a INT', map('mode', 1)) struct<> -- !query output org.apache.spark.sql.AnalysisException -A type of keys and values in map() must be string, but got map;; line 1 pos 7 +A type of keys and values in map() must be string, but got map; line 1 pos 7 -- !query @@ -148,7 +148,7 @@ select to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Must use a map() function for options;; line 1 pos 7 +Must use a map() function for options; line 1 pos 7 -- !query @@ -157,4 +157,4 @@ select to_csv(named_struct('a', 1, 'b', 2), map('mode', 1)) struct<> -- !query output org.apache.spark.sql.AnalysisException -A type of keys and values in map() must be string, but got map;; line 1 pos 7 +A type of keys and values in map() must be string, but got map; line 1 pos 7 diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out index 2f736c7b4978f..a8db4599dafcc 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out @@ -48,7 +48,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -85,7 +85,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -139,7 +139,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -154,7 +154,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -170,7 +170,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -184,7 +184,7 @@ WHERE c IN ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -213,7 +213,7 @@ SELECT * FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. -- !query @@ -226,4 +226,4 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; +Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 7e4ea78bf46b9..ed54b72111ed5 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -430,7 +430,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_add' function needs to be an integer.; +The second argument of 'date_add' function needs to be an integer. -- !query @@ -471,7 +471,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_sub' function needs to be an integer.; +The second argument of 'date_sub' function needs to be an integer. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 01db4c1c11fe4..213895dcb4bcb 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -430,7 +430,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_add' function needs to be an integer.; +The second argument of 'date_add' function needs to be an integer. -- !query @@ -471,7 +471,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.sql.AnalysisException -The second argument of 'date_sub' function needs to be an integer.; +The second argument of 'date_sub' function needs to be an integer. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out index c6d3d45879eb1..22ef8e13c36a8 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out @@ -77,7 +77,7 @@ DESC desc_col_temp_view key1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Column key1 does not exist; +Column key1 does not exist -- !query @@ -188,7 +188,7 @@ DESC FORMATTED desc_complex_col_table col.x struct<> -- !query output org.apache.spark.sql.AnalysisException -DESC TABLE COLUMN command does not support nested data types: col.x; +DESC TABLE COLUMN command does not support nested data types: col.x -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 2674d055ac450..ebec2e1976b15 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -332,7 +332,7 @@ struct<> org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException Partition not found in table 't' database 'default': c -> Us -d -> 2; +d -> 2 -- !query @@ -341,7 +341,7 @@ DESC t PARTITION (c='Us') struct<> -- !query output org.apache.spark.sql.AnalysisException -Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`'; +Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`' -- !query @@ -431,7 +431,7 @@ DESC temp_v PARTITION (c='Us', d=1) struct<> -- !query output org.apache.spark.sql.AnalysisException -DESC PARTITION is not allowed on a temporary view: temp_v; +DESC PARTITION is not allowed on a temporary view: temp_v -- !query @@ -510,7 +510,7 @@ DESC v PARTITION (c='Us', d=1) struct<> -- !query output org.apache.spark.sql.AnalysisException -DESC PARTITION is not allowed on a view: v; +DESC PARTITION is not allowed on a view: v -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out index 601ff8f024214..a1fe952e2c032 100644 --- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out @@ -141,7 +141,7 @@ SELECT array(1) struct<> -- !query output org.apache.spark.sql.AnalysisException -ExceptAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table; +ExceptAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table -- !query @@ -213,7 +213,7 @@ SELECT k, v FROM tab4 struct<> -- !query output org.apache.spark.sql.AnalysisException -ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns; +ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 9d3fe5d17fafa..5415b2c30a308 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -320,7 +320,7 @@ select extract(not_supported from c) from t struct<> -- !query output org.apache.spark.sql.AnalysisException -Literals of type 'not_supported' are currently not supported for the string type.;; line 1 pos 7 +Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7 -- !query @@ -329,7 +329,7 @@ select extract(not_supported from i) from t struct<> -- !query output org.apache.spark.sql.AnalysisException -Literals of type 'not_supported' are currently not supported for the interval type.;; line 1 pos 7 +Literals of type 'not_supported' are currently not supported for the interval type.; line 1 pos 7 -- !query @@ -642,7 +642,7 @@ select date_part('not_supported', c) from t struct<> -- !query output org.apache.spark.sql.AnalysisException -Literals of type 'not_supported' are currently not supported for the string type.;; line 1 pos 7 +Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7 -- !query @@ -651,7 +651,7 @@ select date_part(c, c) from t struct<> -- !query output org.apache.spark.sql.AnalysisException -The field parameter needs to be a foldable string value.;; line 1 pos 7 +The field parameter needs to be a foldable string value.; line 1 pos 7 -- !query @@ -668,7 +668,7 @@ select date_part(i, i) from t struct<> -- !query output org.apache.spark.sql.AnalysisException -The field parameter needs to be a foldable string value.;; line 1 pos 7 +The field parameter needs to be a foldable string value.; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out index c4f9ea1fe026a..b820fb49b09ba 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out @@ -210,7 +210,7 @@ SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping() can only be used with GroupingSets/Cube/Rollup; +grouping() can only be used with GroupingSets/Cube/Rollup -- !query @@ -219,7 +219,7 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -255,7 +255,7 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(cours struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -264,7 +264,7 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(co struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -319,7 +319,7 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(cou struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -328,7 +328,7 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID( struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out index 149e031e8829c..55a41907dd3b4 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out @@ -51,7 +51,7 @@ SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) FILTER (WHERE (testdata.`a` >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) FILTER (WHERE (testdata.`a` >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get. -- !query @@ -231,7 +231,7 @@ SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -711,7 +711,7 @@ SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1 struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -804,7 +804,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias EMP +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -; -- !query @@ -832,7 +831,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias EMP +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -; -- !query @@ -859,7 +857,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias EMP +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -; -- !query @@ -886,7 +883,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias EMP +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -; -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out index bf9f606a2224e..fedc7205ae559 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out @@ -122,7 +122,7 @@ select a, b, sum(b) from data group by 3 struct<> -- !query output org.apache.spark.sql.AnalysisException -aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT)); +aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT)) -- !query @@ -131,7 +131,7 @@ select a, b, sum(b) + 2 from data group by 3 struct<> -- !query output org.apache.spark.sql.AnalysisException -aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT)); +aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT)) -- !query @@ -155,7 +155,7 @@ select * from data group by a, b, 1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Star (*) is not allowed in select list when GROUP BY ordinal position is used; +Star (*) is not allowed in select list when GROUP BY ordinal position is used -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 5d9553f804059..75bda87b37642 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -18,7 +18,7 @@ SELECT a, COUNT(b) FROM testData struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) AS `count(b)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) AS `count(b)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get. -- !query @@ -46,7 +46,7 @@ SELECT a, COUNT(b) FROM testData GROUP BY b struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -110,7 +110,7 @@ SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1 struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -167,7 +167,7 @@ SELECT COUNT(b) AS k FROM testData GROUP BY k struct<> -- !query output org.apache.spark.sql.AnalysisException -aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`); +aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`) -- !query @@ -185,7 +185,7 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -274,7 +274,7 @@ SELECT id FROM range(10) HAVING id > 0 struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get. -- !query @@ -548,7 +548,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(count(1) > 1L)] -Invalid expressions: [count(1)]; +Invalid expressions: [count(1)] -- !query @@ -560,7 +560,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [((count(1) + 1L) > 1L)] -Invalid expressions: [count(1)]; +Invalid expressions: [count(1)] -- !query @@ -572,7 +572,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))] -Invalid expressions: [count(1), max(test_agg.`k`)]; +Invalid expressions: [count(1), max(test_agg.`k`)] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out index 7089e10cdef27..e1f94ddd02fe5 100644 --- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -165,7 +165,7 @@ SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (()) struct<> -- !query output org.apache.spark.sql.AnalysisException -expression '`c1`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression '`c1`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out index 6508143e6f9fe..237015d06ce81 100644 --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 9 +-- Number of queries: 13 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out index b99f63393cc4d..caba8c6942c55 100644 --- a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out @@ -98,7 +98,7 @@ SELECT array(1), 2 struct<> -- !query output org.apache.spark.sql.AnalysisException -IntersectAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table; +IntersectAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table -- !query @@ -109,7 +109,7 @@ SELECT k, v FROM tab2 struct<> -- !query output org.apache.spark.sql.AnalysisException -IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns; +IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 838e4607d0324..b14e3e1558fb0 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -72,7 +72,7 @@ select to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Must use a map() function for options;; line 1 pos 7 +Must use a map() function for options; line 1 pos 7 -- !query @@ -81,7 +81,7 @@ select to_json(named_struct('a', 1, 'b', 2), map('mode', 1)) struct<> -- !query output org.apache.spark.sql.AnalysisException -A type of keys and values in map() must be string, but got map;; line 1 pos 7 +A type of keys and values in map() must be string, but got map; line 1 pos 7 -- !query @@ -115,7 +115,7 @@ select from_json('{"a":1}', 1) struct<> -- !query output org.apache.spark.sql.AnalysisException -The expression '1' is not a valid schema string.;; line 1 pos 7 +The expression '1' is not a valid schema string.; line 1 pos 7 -- !query @@ -137,7 +137,7 @@ DataType invalidtype is not supported.(line 1, pos 2) == SQL == a InvalidType --^^^ -;; line 1 pos 7 +; line 1 pos 7 -- !query @@ -146,7 +146,7 @@ select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Must use a map() function for options;; line 1 pos 7 +Must use a map() function for options; line 1 pos 7 -- !query @@ -155,7 +155,7 @@ select from_json('{"a":1}', 'a INT', map('mode', 1)) struct<> -- !query output org.apache.spark.sql.AnalysisException -A type of keys and values in map() must be string, but got map;; line 1 pos 7 +A type of keys and values in map() must be string, but got map; line 1 pos 7 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 074e7a6d28c47..8e324628c6299 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -53,7 +53,7 @@ SELECT * FROM testdata LIMIT -1 struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must be equal to or greater than 0, but got -1; +The limit expression must be equal to or greater than 0, but got -1 -- !query @@ -62,7 +62,7 @@ SELECT * FROM testData TABLESAMPLE (-1 ROWS) struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must be equal to or greater than 0, but got -1; +The limit expression must be equal to or greater than 0, but got -1 -- !query @@ -79,7 +79,7 @@ SELECT * FROM testdata LIMIT CAST(NULL AS INT) struct<> -- !query output org.apache.spark.sql.AnalysisException -The evaluated limit expression must not be null, but got CAST(NULL AS INT); +The evaluated limit expression must not be null, but got CAST(NULL AS INT) -- !query @@ -88,7 +88,7 @@ SELECT * FROM testdata LIMIT key > 3 struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.`key` > 3); +The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.`key` > 3) -- !query @@ -97,7 +97,7 @@ SELECT * FROM testdata LIMIT true struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got boolean; +The limit expression must be integer type, but got boolean -- !query @@ -106,7 +106,7 @@ SELECT * FROM testdata LIMIT 'a' struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must be integer type, but got string; +The limit expression must be integer type, but got string -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out index bb0d452fa04a1..968319fbb7efe 100644 --- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out @@ -202,7 +202,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.; +Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function. -- !query @@ -217,7 +217,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.; +Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function. -- !query @@ -262,7 +262,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.; +It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query. -- !query @@ -313,7 +313,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct; +Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct -- !query @@ -339,7 +339,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Literal expressions required for pivot values, found 'course#x'; +Literal expressions required for pivot values, found 'course#x' -- !query @@ -458,7 +458,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot column 'm#x'. Pivot columns must be comparable.; +Invalid pivot column 'm#x'. Pivot columns must be comparable. -- !query @@ -475,7 +475,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.; +Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out index 212365f92946c..cc8f99ff4f453 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out @@ -382,7 +382,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT)) = CAST(b.`four` AS BIGINT))] -Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))]; +Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out index e1f735e5fe1dc..86ebb575ebce9 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out @@ -8,7 +8,7 @@ select max(min(unique1)) from tenk1 struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.; +It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index 7d331f24b9215..1ac7c4a4069b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -56,7 +56,7 @@ CREATE VIEW key_dependent_view AS struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'spark_catalog.default.view_base_table.`data`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'spark_catalog.default.view_base_table.`data`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -266,7 +266,7 @@ CREATE VIEW v1_temp AS SELECT * FROM temp_table struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -322,7 +322,7 @@ CREATE VIEW temp_view_test.v3_temp AS SELECT * FROM temp_table struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -371,7 +371,7 @@ CREATE VIEW v4_temp AS struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -383,7 +383,7 @@ CREATE VIEW v5_temp AS struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -542,7 +542,7 @@ CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -551,7 +551,7 @@ CREATE VIEW v7_temp AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM tem struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -560,7 +560,7 @@ CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -569,7 +569,7 @@ CREATE VIEW v9_temp AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `temp_view_test`.`v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `temp_view_test`.`v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -678,7 +678,7 @@ CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `testviewschm2`.`temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `testviewschm2`.`temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -719,7 +719,7 @@ CREATE VIEW temporal2 AS SELECT * FROM t1 INNER JOIN tt ON t1.num = tt.num2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `testviewschm2`.`temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `testviewschm2`.`temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -760,7 +760,7 @@ CREATE VIEW temporal3 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `testviewschm2`.`temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `testviewschm2`.`temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -801,7 +801,7 @@ CREATE VIEW temporal4 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 AND t struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `testviewschm2`.`temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `testviewschm2`.`temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW -- !query @@ -810,7 +810,7 @@ CREATE VIEW temporal5 AS SELECT * FROM t1 WHERE num IN (SELECT num FROM t1 WHERE struct<> -- !query output org.apache.spark.sql.AnalysisException -Not allowed to create a permanent view `testviewschm2`.`temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW; +Not allowed to create a permanent view `testviewschm2`.`temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out index 2c8bc31dbc6ca..b0f3482f0a282 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out @@ -59,7 +59,7 @@ select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end) struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got CASE WHEN (`_nondeterministic` < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END; +The limit expression must evaluate to a constant value, but got CASE WHEN (`_nondeterministic` < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out index fc2961a072e9f..fdad837e14b61 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out @@ -3830,7 +3830,7 @@ INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val struct<> -- !query output org.apache.spark.sql.AnalysisException -`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).; +`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s). -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index e4b7f3b1f5e88..f504e4b6c6dad 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -143,7 +143,7 @@ SELECT a FROM test_having HAVING min(a) < max(a) struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out index e8a3a9b9731a6..13cc8a8754025 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out @@ -446,7 +446,7 @@ SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'; +the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a' -- !query @@ -455,7 +455,7 @@ SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'; +the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a' -- !query @@ -464,7 +464,7 @@ SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'; +the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a' -- !query @@ -473,7 +473,7 @@ SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'; +the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a' -- !query @@ -482,7 +482,7 @@ SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'; +the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e' -- !query @@ -491,7 +491,7 @@ SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'; +the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e' -- !query @@ -500,7 +500,7 @@ SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'; +the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e' -- !query @@ -509,7 +509,7 @@ SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false` struct<> -- !query output org.apache.spark.sql.AnalysisException -the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'; +the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e' -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 0e177f7ea82bd..88aee38c4504e 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -295,7 +295,7 @@ SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10 struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use window functions inside WHERE clause; +It is not allowed to use window functions inside WHERE clause -- !query @@ -307,7 +307,7 @@ org.apache.spark.sql.AnalysisException The query operator `Join` contains one or more unsupported expression types Aggregate, Window or Generate. -Invalid expressions: [row_number() OVER (ORDER BY spark_catalog.default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]; +Invalid expressions: [row_number() OVER (ORDER BY spark_catalog.default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] -- !query @@ -319,7 +319,7 @@ org.apache.spark.sql.AnalysisException The query operator `Aggregate` contains one or more unsupported expression types Aggregate, Window or Generate. -Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]; +Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] -- !query @@ -342,7 +342,7 @@ SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10 struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use window functions inside WHERE clause; +It is not allowed to use window functions inside WHERE clause -- !query @@ -351,7 +351,7 @@ SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random()) struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use window functions inside WHERE clause; +It is not allowed to use window functions inside WHERE clause -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out index badafc9e659e2..1432bcce42e76 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out @@ -385,7 +385,7 @@ WITH test AS (SELECT 42) INSERT INTO test VALUES (1) struct<> -- !query output org.apache.spark.sql.AnalysisException -Table not found: test; +Table not found: test -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out index 8d471a5bb1c87..f2a4131818bfb 100644 --- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 37 +-- Number of queries: 40 -- !query @@ -333,4 +333,4 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null) -- !query schema struct -- !query output -NULL \ No newline at end of file +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index 60c5e6d5642b7..611b0b750c2cd 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -206,7 +206,7 @@ SHOW TABLE EXTENDED LIKE 'show_t*' PARTITION(c='Us', d=1) struct<> -- !query output org.apache.spark.sql.catalyst.analysis.NoSuchTableException -Table or view 'show_t*' not found in database 'showdb'; +Table or view 'show_t*' not found in database 'showdb' -- !query @@ -215,7 +215,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Us') struct<> -- !query output org.apache.spark.sql.AnalysisException -Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`'; +Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`' -- !query @@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1) struct<> -- !query output org.apache.spark.sql.AnalysisException -a is not a valid partition column in table `showdb`.`show_t1`.; +a is not a valid partition column in table `showdb`.`show_t1`. -- !query @@ -235,7 +235,7 @@ struct<> org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException Partition not found in table 'show_t1' database 'showdb': c -> Ch -d -> 1; +d -> 1 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out index d88790d8b5ec8..c80f8fab433fb 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out @@ -142,7 +142,7 @@ SHOW VIEWS IN wrongdb LIKE 'view_*' struct<> -- !query output org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException -Database 'wrongdb' not found; +Database 'wrongdb' not found -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out index 03df876133aa4..851e848ed4ec6 100644 --- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out @@ -112,7 +112,7 @@ SHOW COLUMNS IN showdb.showcolumn1 FROM baddb struct<> -- !query output org.apache.spark.sql.AnalysisException -SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb'; +SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb' -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 020a095d72e85..74627e7786997 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -298,7 +298,7 @@ select decode() struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7 +Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7 -- !query @@ -307,7 +307,7 @@ select decode(encode('abc', 'utf-8')) struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7 +Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7 -- !query @@ -355,4 +355,4 @@ select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattl -- !query schema struct -- !query output -NULL \ No newline at end of file +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out index a33f78abf27f9..639fe1775d2dc 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out @@ -49,7 +49,7 @@ number of columns in the output of subquery. Left side columns: [tab_a.`a1`, tab_a.`b1`]. Right side columns: -[`named_struct(a2, a2, b2, b2)`].; +[`named_struct(a2, a2, b2, b2)`]. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index cd96eaf1b878b..e77afd886aeab 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -46,7 +46,7 @@ AND t2b = (SELECT max(avg) struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 't2.`t2b`' is not an aggregate function. Wrap '(avg(CAST(t2.`t2b` AS BIGINT)) AS `avg`)' in windowing function(s) or wrap 't2.`t2b`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 't2.`t2b`' is not an aggregate function. Wrap '(avg(CAST(t2.`t2b` AS BIGINT)) AS `avg`)' in windowing function(s) or wrap 't2.`t2b`' in first() (or first_value) if you don't care which value you get. -- !query @@ -63,7 +63,7 @@ WHERE t1a IN (SELECT min(t2a) struct<> -- !query output org.apache.spark.sql.AnalysisException -Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).; +Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]). -- !query @@ -78,7 +78,7 @@ HAVING EXISTS (SELECT t2a struct<> -- !query output org.apache.spark.sql.AnalysisException -Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t1.`t1a` + t2.`t2a`)), Outer references: t1.`t1a`, Local references: t2.`t2a`.; +Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t1.`t1a` + t2.`t2a`)), Outer references: t1.`t1a`, Local references: t2.`t2a`. -- !query @@ -94,7 +94,7 @@ WHERE t1a IN (SELECT t2a struct<> -- !query output org.apache.spark.sql.AnalysisException -Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t2.`t2a` + t3.`t3a`)), Outer references: t2.`t2a`, Local references: t3.`t3a`.; +Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t2.`t2a` + t3.`t3a`)), Outer references: t2.`t2a`, Local references: t3.`t3a`. -- !query @@ -115,4 +115,3 @@ Aggregate [min(outer(t2a#x)) AS min(outer(t2.`t2a`))#x] +- Project [t3a#x, t3b#x, t3c#x] +- SubqueryAlias t3 +- LocalRelation [t3a#x, t3b#x, t3c#x] -; diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out index 776598127075b..a470775308092 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out @@ -64,7 +64,7 @@ FROM t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Scalar subquery must return only one column, but got 2; +Scalar subquery must return only one column, but got 2 -- !query @@ -79,7 +79,7 @@ FROM t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Scalar subquery must return only one column, but got 2; +Scalar subquery must return only one column, but got 2 -- !query @@ -100,7 +100,7 @@ number of columns in the output of subquery. Left side columns: [t1.`t1a`]. Right side columns: -[t2.`t2a`, t2.`t2b`].; +[t2.`t2a`, t2.`t2b`]. -- !query @@ -121,7 +121,7 @@ number of columns in the output of subquery. Left side columns: [t1.`t1a`, t1.`t1b`]. Right side columns: -[t2.`t2a`].; +[t2.`t2a`]. -- !query @@ -143,4 +143,4 @@ Mismatched columns: Left side: [double, string, string]. Right side: -[timestamp, string, bigint].; +[timestamp, string, bigint]. diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out index 89b1cdb3e353d..a527b20dc04ff 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out @@ -88,7 +88,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table -- !query @@ -97,7 +97,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table -- !query @@ -106,7 +106,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table -- !query @@ -115,7 +115,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table -- !query @@ -196,7 +196,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table -- !query @@ -205,7 +205,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table -- !query @@ -214,7 +214,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table -- !query @@ -223,7 +223,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table -- !query @@ -304,7 +304,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table -- !query @@ -313,7 +313,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table -- !query @@ -322,7 +322,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timest struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table -- !query @@ -331,7 +331,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FR struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table -- !query @@ -412,7 +412,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table -- !query @@ -421,7 +421,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table -- !query @@ -430,7 +430,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table -- !query @@ -439,7 +439,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table -- !query @@ -520,7 +520,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table -- !query @@ -529,7 +529,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table -- !query @@ -538,7 +538,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as time struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table -- !query @@ -547,7 +547,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table -- !query @@ -628,7 +628,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table -- !query @@ -637,7 +637,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table -- !query @@ -646,7 +646,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table -- !query @@ -655,7 +655,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table -- !query @@ -736,7 +736,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table -- !query @@ -745,7 +745,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table -- !query @@ -754,7 +754,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0 struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table -- !query @@ -763,7 +763,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00' struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table -- !query @@ -844,7 +844,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table -- !query @@ -853,7 +853,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table -- !query @@ -880,7 +880,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table -- !query @@ -889,7 +889,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table -- !query @@ -898,7 +898,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table -- !query @@ -907,7 +907,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table -- !query @@ -916,7 +916,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table -- !query @@ -925,7 +925,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table -- !query @@ -934,7 +934,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table -- !query @@ -943,7 +943,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table -- !query @@ -961,7 +961,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table -- !query @@ -970,7 +970,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table -- !query @@ -979,7 +979,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table -- !query @@ -988,7 +988,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table -- !query @@ -997,7 +997,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table -- !query @@ -1006,7 +1006,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table -- !query @@ -1015,7 +1015,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table -- !query @@ -1024,7 +1024,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table -- !query @@ -1033,7 +1033,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table -- !query @@ -1042,7 +1042,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table -- !query @@ -1051,7 +1051,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table -- !query @@ -1060,7 +1060,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table -- !query @@ -1077,7 +1077,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table -- !query @@ -1086,7 +1086,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table; +Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table -- !query @@ -1095,7 +1095,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table -- !query @@ -1104,7 +1104,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table -- !query @@ -1113,7 +1113,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table -- !query @@ -1122,7 +1122,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table -- !query @@ -1131,7 +1131,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table -- !query @@ -1140,7 +1140,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table -- !query @@ -1149,7 +1149,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table -- !query @@ -1167,7 +1167,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' a struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table -- !query @@ -1176,7 +1176,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table -- !query @@ -1203,7 +1203,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table -- !query @@ -1212,7 +1212,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallin struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table -- !query @@ -1221,7 +1221,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FR struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table -- !query @@ -1230,7 +1230,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table -- !query @@ -1239,7 +1239,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table -- !query @@ -1248,7 +1248,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double) struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table -- !query @@ -1257,7 +1257,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table -- !query @@ -1275,7 +1275,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binar struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table -- !query @@ -1284,7 +1284,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean struct<> -- !query output org.apache.spark.sql.AnalysisException -Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table; +Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out index a428a7a9c923b..0eb21d386378d 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out @@ -373,7 +373,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT)) = CAST(CAST(udf(ansi_cast(four as string)) AS INT) AS BIGINT))] -Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))]; +Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out index f491d9b9ba3a8..17b77a8a7aea9 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out @@ -8,7 +8,7 @@ select udf(max(min(unique1))) from tenk1 struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.; +It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index 89fc36a0da827..e3d7eb169e818 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -143,7 +143,7 @@ SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a)) struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out index 2613120e004df..7a4ae72fac97b 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out @@ -141,7 +141,7 @@ SELECT array(1) struct<> -- !query output org.apache.spark.sql.AnalysisException -ExceptAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table; +ExceptAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table -- !query @@ -213,7 +213,7 @@ SELECT k, v FROM tab4 struct<> -- !query output org.apache.spark.sql.AnalysisException -ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns; +ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out index f4cf4196298c1..15620e34f2be8 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out @@ -210,7 +210,7 @@ SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, udf struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping() can only be used with GroupingSets/Cube/Rollup; +grouping() can only be used with GroupingSets/Cube/Rollup -- !query @@ -219,7 +219,7 @@ SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY ud struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -255,7 +255,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY udf(course), year HAVING GROU struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -264,7 +264,7 @@ SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROU struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -319,7 +319,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query @@ -328,7 +328,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup; +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index da5256f5c0453..18a7708c40685 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -18,7 +18,7 @@ SELECT udf(a), udf(COUNT(b)) FROM testData struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `CAST(udf(cast(count(b) as string)) AS BIGINT)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `CAST(udf(cast(count(b) as string)) AS BIGINT)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get. -- !query @@ -46,7 +46,7 @@ SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -110,7 +110,7 @@ SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1 struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -167,7 +167,7 @@ SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k struct<> -- !query output org.apache.spark.sql.AnalysisException -aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT); +aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT) -- !query @@ -185,7 +185,7 @@ SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf struct<> -- !query output org.apache.spark.sql.AnalysisException -expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; +expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get. -- !query @@ -274,7 +274,7 @@ SELECT udf(id) FROM range(10) HAVING id > 0 struct<> -- !query output org.apache.spark.sql.AnalysisException -grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.; +grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get. -- !query @@ -496,7 +496,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(count(1) > 1L)] -Invalid expressions: [count(1)]; +Invalid expressions: [count(1)] -- !query @@ -508,7 +508,7 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [((count(1) + 1L) > 1L)] -Invalid expressions: [count(1)]; +Invalid expressions: [count(1)] -- !query @@ -520,4 +520,4 @@ org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))] -Invalid expressions: [count(1), max(test_agg.`k`)]; +Invalid expressions: [count(1), max(test_agg.`k`)] diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out index b3735ae153267..e225a3df596c0 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out @@ -98,7 +98,7 @@ SELECT array(1), udf(2) struct<> -- !query output org.apache.spark.sql.AnalysisException -IntersectAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table; +IntersectAll can only be performed on tables with the compatible column types. array <> int at the first column of the second table -- !query @@ -109,7 +109,7 @@ SELECT udf(k), udf(v) FROM tab2 struct<> -- !query output org.apache.spark.sql.AnalysisException -IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns; +IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out index 414435e6b781d..bcec61470d4a4 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out @@ -202,7 +202,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.; +Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function. -- !query @@ -217,7 +217,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.; +Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function. -- !query @@ -262,7 +262,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.; +It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query. -- !query @@ -313,7 +313,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct; +Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct -- !query @@ -339,7 +339,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Literal expressions required for pivot values, found 'course#x'; +Literal expressions required for pivot values, found 'course#x' -- !query @@ -424,7 +424,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot column 'm#x'. Pivot columns must be comparable.; +Invalid pivot column 'm#x'. Pivot columns must be comparable. -- !query @@ -441,7 +441,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.; +Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out index 928b9ebb12364..6d97800904971 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out @@ -321,7 +321,7 @@ SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER struct<> -- !query output org.apache.spark.sql.AnalysisException -Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table; +Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index df2ad96649186..c904c43ac84ed 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -19,6 +19,7 @@ struct<> -- !query output + -- !query CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES ('Diane Murphy','Accounting',8435), @@ -44,6 +45,7 @@ struct<> -- !query output + -- !query SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData ORDER BY cate, val @@ -345,7 +347,7 @@ SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY ca struct<> -- !query output org.apache.spark.sql.AnalysisException -Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table; +Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table -- !query @@ -414,7 +416,7 @@ FROM testData ORDER BY cate, val struct<> -- !query output org.apache.spark.sql.AnalysisException -window aggregate function with filter predicate is not supported yet.; +window aggregate function with filter predicate is not supported yet. -- !query @@ -773,4 +775,4 @@ WINDOW ^^^ w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) -ORDER BY salary DESC \ No newline at end of file +ORDER BY salary DESC diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 937de92bcaba6..01b1508d034c3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -2220,7 +2220,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { structLevel1 .select($"a".dropFields("c").as("a")) .select($"a".withField("z", $"a.c")).as("a") - }.getMessage should include("No such struct field c in a, b;") + }.getMessage should include("No such struct field c in a, b") } test("nestedDf should generate nested DataFrames") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index e454f0e6d540f..12394a92aed44 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -155,7 +155,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val cols = Seq("c1", "c2", "c3") createTable("t1", cols, Seq("int", "long", "string")) val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)")) - assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`;") + assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`") } } @@ -164,7 +164,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val cols = Seq("c1", "c2", "c3") createTable("t1", cols, Seq("int", "long", "string")) val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)")) - assert(e1.getMessage === "Cannot resolve column name c4;") + assert(e1.getMessage === "Cannot resolve column name c4") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala index d7659e25d2c41..58427183eeed5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala @@ -62,7 +62,7 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils { val msg = intercept[NoSuchNamespaceException] { runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq()) }.getMessage - assert(msg.matches("(Database|Namespace) 'unknown' not found;")) + assert(msg.matches("(Database|Namespace) 'unknown' not found")) } test("show tables with a pattern") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index 580e7df6ef63e..1ea2d4fd0b32c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -190,7 +190,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { assert(spark.conf.get("spark.app.id") === appId, "Should not change spark core ones") // spark core conf w/ entry registered val e1 = intercept[AnalysisException](sql("RESET spark.executor.cores")) - assert(e1.getMessage === "Cannot modify the value of a Spark config: spark.executor.cores;") + assert(e1.getMessage === "Cannot modify the value of a Spark config: spark.executor.cores") // user defined settings sql("SET spark.abc=xyz") @@ -217,7 +217,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { // static sql configs val e2 = intercept[AnalysisException](sql(s"RESET ${StaticSQLConf.WAREHOUSE_PATH.key}")) assert(e2.getMessage === - s"Cannot modify the value of a static config: ${StaticSQLConf.WAREHOUSE_PATH.key};") + s"Cannot modify the value of a static config: ${StaticSQLConf.WAREHOUSE_PATH.key}") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala index a410f32d4af7e..0a5feda1bd533 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala @@ -88,7 +88,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { df.write.sortBy("j").saveAsTable("tt") } - assert(e.getMessage == "sortBy must be used together with bucketBy;") + assert(e.getMessage == "sortBy must be used together with bucketBy") } test("sorting by non-orderable column") { @@ -102,7 +102,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { df.write.bucketBy(2, "i").parquet("/tmp/path") } - assert(e.getMessage == "'save' does not support bucketBy right now;") + assert(e.getMessage == "'save' does not support bucketBy right now") } test("write bucketed and sorted data using save()") { @@ -111,7 +111,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { df.write.bucketBy(2, "i").sortBy("i").parquet("/tmp/path") } - assert(e.getMessage == "'save' does not support bucketBy and sortBy right now;") + assert(e.getMessage == "'save' does not support bucketBy and sortBy right now") } test("write bucketed data using insertInto()") { @@ -120,7 +120,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { df.write.bucketBy(2, "i").insertInto("tt") } - assert(e.getMessage == "'insertInto' does not support bucketBy right now;") + assert(e.getMessage == "'insertInto' does not support bucketBy right now") } test("write bucketed and sorted data using insertInto()") { @@ -129,7 +129,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { df.write.bucketBy(2, "i").sortBy("i").insertInto("tt") } - assert(e.getMessage == "'insertInto' does not support bucketBy and sortBy right now;") + assert(e.getMessage == "'insertInto' does not support bucketBy and sortBy right now") } private lazy val df = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala index 52825a155e46a..b9266429f81a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala @@ -162,7 +162,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession { withTempPath { f => val e = intercept[AnalysisException]( Seq((3, 2)).toDF("a", "b").write.partitionBy("b", "b").csv(f.getAbsolutePath)) - assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`;")) + assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index b240d2058a018..6b9fa9c968fb4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -413,7 +413,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest { createFileStreamSourceAndGetSchema( format = Some("json"), path = Some(src.getCanonicalPath), schema = None) } - assert("Unable to infer schema for JSON. It must be specified manually.;" === e.getMessage) + assert("Unable to infer schema for JSON. It must be specified manually." === e.getMessage) } } } From 23083aa594360938c611a45794405d81e59ecaf1 Mon Sep 17 00:00:00 2001 From: Prakhar Jain Date: Tue, 15 Dec 2020 13:46:58 +0000 Subject: [PATCH 0772/1009] [SPARK-33758][SQL] Prune unrequired partitionings from AliasAwareOutputPartitionings when some columns are dropped from projection ### What changes were proposed in this pull request? This PR tries to prune the unrequired output partitionings in cases when the columns are dropped from Project/Aggregates etc. ### Why are the changes needed? Consider this query: select t1.id from t1 JOIN t2 on t1.id = t2.id This query will have top level Project node which will just project t1.id. But the outputPartitioning of this project node will be: PartitioningCollection(HashPartitioning(t1.id), HashPartitioning(t2.id)). But since we are not propagating t2.id column, so we can drop HashPartitioning(t2.id) from the output partitioning of Project node. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UTs. Closes #30762 from prakharjain09/SPARK-33758-prune-partitioning. Authored-by: Prakhar Jain Signed-off-by: Wenchen Fan --- .../AliasAwareOutputExpression.scala | 22 +++++++- .../spark/sql/execution/PlannerSuite.scala | 54 ++++++++++++++----- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala index 3cbe1654ea2cd..23a9527a1b349 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder} -import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning} /** * A trait that provides functionality to handle aliases in the `outputExpressions`. @@ -44,7 +44,7 @@ trait AliasAwareOutputExpression extends UnaryExecNode { */ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression { final override def outputPartitioning: Partitioning = { - if (hasAlias) { + val normalizedOutputPartitioning = if (hasAlias) { child.outputPartitioning match { case e: Expression => normalizeExpression(e).asInstanceOf[Partitioning] @@ -53,6 +53,24 @@ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression { } else { child.outputPartitioning } + + flattenPartitioning(normalizedOutputPartitioning).filter { + case hashPartitioning: HashPartitioning => hashPartitioning.references.subsetOf(outputSet) + case _ => true + } match { + case Seq() => UnknownPartitioning(child.outputPartitioning.numPartitions) + case Seq(singlePartitioning) => singlePartitioning + case seqWithMultiplePartitionings => PartitioningCollection(seqWithMultiplePartitionings) + } + } + + private def flattenPartitioning(partitioning: Partitioning): Seq[Partitioning] = { + partitioning match { + case PartitioningCollection(childPartitionings) => + childPartitionings.flatMap(flattenPartitioning) + case rest => + rest +: Nil + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 4e01d1c06f64e..924776ae3ae60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -921,10 +921,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val projects = planned.collect { case p: ProjectExec => p } assert(projects.exists(_.outputPartitioning match { - case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _), - HashPartitioning(Seq(k2: AttributeReference), _))) if k1.name == "t1id" => + case HashPartitioning(Seq(k1: AttributeReference), _) if k1.name == "t1id" => true - case _ => false + case _ => + false })) } } @@ -1008,17 +1008,11 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val projects = planned.collect { case p: ProjectExec => p } assert(projects.exists(_.outputPartitioning match { - case PartitioningCollection(Seq(HashPartitioning(Seq(Multiply(ar1, _, _)), _), - HashPartitioning(Seq(Multiply(ar2, _, _)), _))) => - Seq(ar1, ar2) match { - case Seq(ar1: AttributeReference, ar2: AttributeReference) => - ar1.name == "t1id" && ar2.name == "id2" - case _ => - false - } - case _ => false + case HashPartitioning(Seq(Multiply(ar1: AttributeReference, _, _)), _) => + ar1.name == "t1id" + case _ => + false })) - } } } @@ -1234,6 +1228,40 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val numPartitions = range.rdd.getNumPartitions assert(numPartitions == 0) } + + test("SPARK-33758: Prune unnecessary output partitioning") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + withTempView("t1", "t2") { + spark.range(10).repartition($"id").createTempView("t1") + spark.range(20).repartition($"id").createTempView("t2") + val planned = sql( + """ + | SELECT t1.id as t1id, t2.id as t2id + | FROM t1, t2 + | WHERE t1.id = t2.id + """.stripMargin).queryExecution.executedPlan + + assert(planned.outputPartitioning match { + case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _), + HashPartitioning(Seq(k2: AttributeReference), _))) => + k1.name == "t1id" && k2.name == "t2id" + }) + + val planned2 = sql( + """ + | SELECT t1.id as t1id + | FROM t1, t2 + | WHERE t1.id = t2.id + """.stripMargin).queryExecution.executedPlan + assert(planned2.outputPartitioning match { + case HashPartitioning(Seq(k1: AttributeReference), _) if k1.name == "t1id" => + true + }) + } + } + } } // Used for unit-testing EnsureRequirements From 40c37d69fd003ed6079ee8c139dba5c15915c568 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 15 Dec 2020 14:16:43 +0000 Subject: [PATCH 0773/1009] [SPARK-33617][SQL][FOLLOWUP] refine the default parallelism SQL config ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/30559 . The default parallelism config in Spark core is not good, as it's unclear where it applies. To not inherit this problem in Spark SQL, this PR refines the default parallelism SQL config, to make it clear that it only applies to leaf nodes. ### Why are the changes needed? Make the config clearer. ### Does this PR introduce _any_ user-facing change? It changes an unreleased config. ### How was this patch tested? existing tests Closes #30736 from cloud-fan/follow. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++------ .../scala/org/apache/spark/sql/SparkSession.scala | 10 ++++++---- .../spark/sql/execution/LocalTableScanExec.scala | 3 +-- .../adaptive/CoalesceShufflePartitions.scala | 2 +- .../spark/sql/execution/basicPhysicalOperators.scala | 3 +-- .../org/apache/spark/sql/execution/command/ddl.scala | 3 +-- .../sql/execution/datasources/FilePartition.scala | 3 +-- .../sql/execution/datasources/SchemaMergeUtils.scala | 3 +-- .../apache/spark/sql/execution/SparkPlanSuite.scala | 4 ++-- 9 files changed, 19 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 078928391f560..fd6a30ac6a81c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -374,12 +374,13 @@ object SQLConf { .booleanConf .createWithDefault(true) - val DEFAULT_PARALLELISM = buildConf("spark.sql.default.parallelism") - .doc("The number of parallelism for Spark SQL, the default value is " + - "`spark.default.parallelism`.") + val LEAF_NODE_DEFAULT_PARALLELISM = buildConf("spark.sql.leafNodeDefaultParallelism") + .doc("The default parallelism of Spark SQL leaf nodes that produce data, such as the file " + + "scan node, the local data scan node, the range node, etc. The default value of this " + + "config is 'SparkContext#defaultParallelism'.") .version("3.2.0") .intConf - .checkValue(_ > 0, "The value of spark.sql.default.parallelism must be positive.") + .checkValue(_ > 0, "The value of spark.sql.leafNodeDefaultParallelism must be positive.") .createOptional val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions") @@ -3202,8 +3203,6 @@ class SQLConf extends Serializable with Logging { def cacheVectorizedReaderEnabled: Boolean = getConf(CACHE_VECTORIZED_READER_ENABLED) - def defaultParallelism: Option[Int] = getConf(DEFAULT_PARALLELISM) - def defaultNumShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS) def numShufflePartitions: Int = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index a2c9406f6becf..20a2649322ae0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -523,8 +523,7 @@ class SparkSession private( * @since 2.0.0 */ def range(start: Long, end: Long): Dataset[java.lang.Long] = { - range(start, end, step = 1, - numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism)) + range(start, end, step = 1, numPartitions = leafNodeDefaultParallelism) } /** @@ -534,8 +533,7 @@ class SparkSession private( * @since 2.0.0 */ def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = { - range(start, end, step, - numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism)) + range(start, end, step, numPartitions = leafNodeDefaultParallelism) } /** @@ -775,6 +773,10 @@ class SparkSession private( SparkSession.setActiveSession(old) } } + + private[sql] def leafNodeDefaultParallelism: Int = { + conf.get(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM).getOrElse(sparkContext.defaultParallelism) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala index 02a8f46824241..054daa54d1153 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala @@ -50,8 +50,7 @@ case class LocalTableScanExec( sqlContext.sparkContext.emptyRDD } else { val numSlices = math.min( - unsafeRows.length, - conf.defaultParallelism.getOrElse(sqlContext.sparkContext.defaultParallelism)) + unsafeRows.length, sqlContext.sparkSession.leafNodeDefaultParallelism) sqlContext.sparkContext.parallelize(unsafeRows, numSlices) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala index 6149bd214e540..0f482142227d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala @@ -67,7 +67,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffl // We fall back to Spark default parallelism if the minimum number of coalesced partitions // is not set, so to avoid perf regressions compared to no coalescing. val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM) - .orElse(conf.defaultParallelism).getOrElse(session.sparkContext.defaultParallelism) + .getOrElse(session.sparkContext.defaultParallelism) val partitionSpecs = ShufflePartitionsUtil.coalescePartitions( validMetrics.toArray, advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index fcf77e588fc60..d74d0bf733c27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -382,8 +382,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range) val start: Long = range.start val end: Long = range.end val step: Long = range.step - val numSlices: Int = range.numSlices.orElse(sqlContext.conf.defaultParallelism) - .getOrElse(sparkContext.defaultParallelism) + val numSlices: Int = range.numSlices.getOrElse(sqlContext.sparkSession.leafNodeDefaultParallelism) val numElements: BigInt = range.numElements val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 6d631e044e917..604de860f04c0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -738,8 +738,7 @@ case class AlterTableRecoverPartitionsCommand( // Set the number of parallelism to prevent following file listing from generating many tasks // in case of large #defaultParallelism. val numParallelism = Math.min(serializedPaths.length, - Math.min(spark.sessionState.conf.defaultParallelism - .getOrElse(spark.sparkContext.defaultParallelism), 10000)) + Math.min(spark.sparkContext.defaultParallelism, 10000)) // gather the fast stats for all the partitions otherwise Hive metastore will list all the // files for all the new partitions in sequential way, which is super slow. logInfo(s"Gather the fast stats in parallel using $numParallelism tasks.") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala index 1b35db8d0873c..a4d16a0fd2bb2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala @@ -89,8 +89,7 @@ object FilePartition extends Logging { val defaultMaxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes val minPartitionNum = sparkSession.sessionState.conf.filesMinPartitionNum - .orElse(sparkSession.sessionState.conf.defaultParallelism) - .getOrElse(sparkSession.sparkContext.defaultParallelism) + .getOrElse(sparkSession.leafNodeDefaultParallelism) val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum val bytesPerCore = totalBytes / minPartitionNum diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala index 54d79898bb81b..28097c35401c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala @@ -57,8 +57,7 @@ object SchemaMergeUtils extends Logging { // Set the number of partitions to prevent following schema reads from generating many tasks // in case of a small number of orc files. val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1), - sparkSession.sessionState.conf.defaultParallelism - .getOrElse(sparkSession.sparkContext.defaultParallelism)) + sparkSession.sparkContext.defaultParallelism) val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala index 254855247ced3..dfec6bccb0c58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala @@ -89,9 +89,9 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession { assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0) } - test("SPARK-33617: spark.sql.default.parallelism effective for LocalTableScan") { + test("SPARK-33617: change default parallelism of LocalTableScan") { Seq(1, 4).foreach { minPartitionNum => - withSQLConf(SQLConf.DEFAULT_PARALLELISM.key -> minPartitionNum.toString) { + withSQLConf(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM.key -> minPartitionNum.toString) { val df = spark.sql("SELECT * FROM VALUES (1), (2), (3), (4), (5), (6), (7), (8)") assert(df.rdd.partitions.length === minPartitionNum) } From 4d56d438386049b5f481ec83b69e3c89807be201 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 15 Dec 2020 13:50:58 -0800 Subject: [PATCH 0774/1009] [SPARK-33735][SQL] Handle UPDATE in ReplaceNullWithFalseInPredicate ### What changes were proposed in this pull request? This PR adds `UpdateTable` to supported plans in `ReplaceNullWithFalseInPredicate`. ### Why are the changes needed? This change allows Spark to optimize update conditions like we optimize filters. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR extends the existing test cases to also cover `UpdateTable`. Closes #30787 from aokolnychyi/spark-33735. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../ReplaceNullWithFalseInPredicate.scala | 3 ++- ...ReplaceNullWithFalseInPredicateSuite.scala | 23 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index 698ece4f9e69f..4a71dba663b38 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If} import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFilter, Or} import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral -import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan, UpdateTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.BooleanType import org.apache.spark.util.Utils @@ -54,6 +54,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond))) case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond))) + case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond))) case p: LogicalPlan => p transformExpressions { case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred)) case cw @ CaseWhen(branches, _) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index 6fc31c94e47eb..00433a5490574 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} -import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, IntegerType} @@ -49,6 +49,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) testJoin(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) testDelete(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) + testUpdate(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral) } test("Not expected type - replaceNullWithFalse") { @@ -66,6 +67,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace nulls in nested expressions in branches of If") { @@ -76,6 +78,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in elseValue of CaseWhen") { @@ -87,6 +90,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) testDelete(originalCond, expectedCond) + testUpdate(originalCond, expectedCond) } test("replace null in branch values of CaseWhen") { @@ -97,6 +101,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside CaseWhen") { @@ -114,6 +119,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) testDelete(originalCond, expectedCond) + testUpdate(originalCond, expectedCond) } test("replace null in complex CaseWhen expressions") { @@ -134,6 +140,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) testDelete(originalCond, expectedCond) + testUpdate(originalCond, expectedCond) } test("replace null in Or") { @@ -142,6 +149,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) testDelete(originalCond, expectedCond) + testUpdate(originalCond, expectedCond) } test("replace null in And") { @@ -149,6 +157,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace nulls in nested And/Or expressions") { @@ -158,6 +167,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in And inside branches of If") { @@ -168,6 +178,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside And") { @@ -180,6 +191,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in branches of If inside another If") { @@ -190,6 +202,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("replace null in CaseWhen inside another CaseWhen") { @@ -198,6 +211,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond, expectedCond = FalseLiteral) testJoin(originalCond, expectedCond = FalseLiteral) testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) } test("inability to replace null in non-boolean branches of If") { @@ -211,6 +225,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) testDelete(originalCond = condition, expectedCond = condition) + testUpdate(originalCond = condition, expectedCond = condition) } test("inability to replace null in non-boolean values of CaseWhen") { @@ -226,6 +241,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) testDelete(originalCond = condition, expectedCond = condition) + testUpdate(originalCond = condition, expectedCond = condition) } test("inability to replace null in non-boolean branches of If inside another If") { @@ -239,6 +255,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testFilter(originalCond = condition, expectedCond = condition) testJoin(originalCond = condition, expectedCond = condition) testDelete(originalCond = condition, expectedCond = condition) + testUpdate(originalCond = condition, expectedCond = condition) } test("replace null in If used as a join condition") { @@ -374,6 +391,10 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) } + private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond) + } + private def testHigherOrderFunc( argument: Expression, createExpr: (Expression, Expression) => Expression, From 87c58367cd8b1815feef754695631ce08c3cde8b Mon Sep 17 00:00:00 2001 From: David McWhorter Date: Tue, 15 Dec 2020 14:00:38 -0800 Subject: [PATCH 0775/1009] [SPARK-22256][MESOS] Introduce spark.mesos.driver.memoryOverhead ### What changes were proposed in this pull request? This is a simple change to support allocating a specified amount of overhead memory for the driver's mesos container. This is already supported for executors. ### Why are the changes needed? This is needed to keep the driver process from exceeding memory limits and being killed off when running on mesos. ### Does this PR introduce _any_ user-facing change? Yes, it adds a `spark.mesos.driver.memoryOverhead` configuration option. Documentation changes for this option are included in the PR. ### How was this patch tested? Test cases covering allocation of driver memory overhead are included in the changes. ### Other notes This is a second attempt to get this change reviewed, accepted and merged. The original pull request was closed as stale back in January: https://github.com/apache/spark/pull/21006. For this pull request, I took the original change by pmackles, rebased it onto the current master branch, and added a test case that was requested in the original code review. I'm happy to make any further edits or do anything needed so that this can be included in a future spark release. I keep having to build custom spark distributions so that we can use spark within our mesos clusters. Closes #30739 from dmcwhorter/dmcwhorter-SPARK-22256. Lead-authored-by: David McWhorter Co-authored-by: Paul Mackles Signed-off-by: Dongjoon Hyun --- docs/running-on-mesos.md | 9 ++ .../apache/spark/deploy/mesos/config.scala | 8 ++ .../cluster/mesos/MesosClusterScheduler.scala | 4 +- .../cluster/mesos/MesosSchedulerUtils.scala | 17 +++- .../mesos/MesosClusterSchedulerSuite.scala | 82 +++++++++++++++++-- 5 files changed, 108 insertions(+), 12 deletions(-) diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 8c0bac1815bbd..364def8923392 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -480,6 +480,15 @@ See the [configuration page](configuration.html) for information on Spark config 1.1.1 + + spark.mesos.driver.memoryOverhead + driver memory * 0.10, with minimum of 384 + + The amount of additional memory, specified in MB, to be allocated to the driver. By default, + the overhead will be larger of either 384 or 10% of spark.driver.memory. If set, + the final overhead will be this value. Only applies to cluster mode. + + spark.mesos.uris (none) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala index 5927af176062d..38df43d71b897 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala @@ -183,6 +183,14 @@ package object config { .stringConf .createOptional + private[spark] val DRIVER_MEMORY_OVERHEAD = + ConfigBuilder("spark.mesos.driver.memoryOverhead") + .doc("The amount of additional memory, specified in MB, to be allocated to the driver. " + + "By default, the overhead will be larger of either 384 or 10% of spark.driver.memory. " + + "Only applies to cluster mode.") + .intConf + .createOptional + private[spark] val EXECUTOR_URI = ConfigBuilder("spark.executor.uri").version("0.8.0").stringConf.createOptional diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index b18737cf6126d..c7e0869e4bd5c 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -614,7 +614,7 @@ private[spark] class MesosClusterScheduler( val (remainingResources, cpuResourcesToUse) = partitionResources(offer.remainingResources, "cpus", desc.cores) val (finalResources, memResourcesToUse) = - partitionResources(remainingResources.asJava, "mem", desc.mem) + partitionResources(remainingResources.asJava, "mem", driverContainerMemory(desc)) offer.remainingResources = finalResources.asJava val appName = desc.conf.get("spark.app.name") @@ -646,7 +646,7 @@ private[spark] class MesosClusterScheduler( tasks: mutable.HashMap[OfferID, ArrayBuffer[TaskInfo]]): Unit = { for (submission <- candidates) { val driverCpu = submission.cores - val driverMem = submission.mem + val driverMem = driverContainerMemory(submission) val driverConstraints = parseConstraintString(submission.conf.get(config.DRIVER_CONSTRAINTS)) logTrace(s"Finding offer to launch driver with cpu: $driverCpu, mem: $driverMem, " + diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 8dbb70b616df1..38f83df00e428 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -36,7 +36,7 @@ import org.apache.mesos.protobuf.GeneratedMessageV3 import org.apache.spark.{SparkConf, SparkContext, SparkException} import org.apache.spark.TaskState -import org.apache.spark.deploy.mesos.{config => mesosConfig} +import org.apache.spark.deploy.mesos.{config => mesosConfig, MesosDriverDescription} import org.apache.spark.internal.Logging import org.apache.spark.internal.config.{Status => _, _} import org.apache.spark.util.Utils @@ -405,6 +405,21 @@ trait MesosSchedulerUtils extends Logging { sc.executorMemory } + /** + * Return the amount of memory to allocate to each driver, taking into account + * container overheads. + * + * @param driverDesc used to get driver memory + * @return memory requirement defined as `DRIVER_MEMORY_OVERHEAD` if set in the config, + * otherwise the larger of `MEMORY_OVERHEAD_MINIMUM (=384MB)` or + * `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory` + */ + def driverContainerMemory(driverDesc: MesosDriverDescription): Int = { + val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) + driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) + + driverDesc.mem + } + def setupUris(uris: Seq[String], builder: CommandInfo.Builder, useFetcherCache: Boolean = false): Unit = { diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala index 146a135afd795..9a1862d32dc13 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala @@ -105,7 +105,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi val response = scheduler.submitDriver( new MesosDriverDescription("d1", "jar", 1200, 1.5, true, command, - Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")), + Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test"), + (config.DRIVER_MEMORY_OVERHEAD.key, "0")), "s1", new Date())) assert(response.success) @@ -200,6 +201,60 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi }) } + test("SPARK-22256: supports spark.mesos.driver.memoryOverhead with 384mb default") { + setScheduler() + + val mem = 1000 + val cpu = 1 + + val response = scheduler.submitDriver( + new MesosDriverDescription("d1", "jar", mem, cpu, true, + command, + Map("spark.mesos.executor.home" -> "test", + "spark.app.name" -> "test"), + "s1", + new Date())) + assert(response.success) + + val offer = Utils.createOffer("o1", "s1", mem*2, cpu) + scheduler.resourceOffers(driver, List(offer).asJava) + val tasks = Utils.verifyTaskLaunched(driver, "o1") + // 1384.0 + val taskMem = tasks.head.getResourcesList + .asScala + .filter(_.getName.equals("mem")) + .map(_.getScalar.getValue) + .head + assert(1384.0 === taskMem) + } + + test("SPARK-22256: supports spark.mesos.driver.memoryOverhead with 10% default") { + setScheduler() + + val mem = 10000 + val cpu = 1 + + val response = scheduler.submitDriver( + new MesosDriverDescription("d1", "jar", mem, cpu, true, + command, + Map("spark.mesos.executor.home" -> "test", + "spark.app.name" -> "test"), + "s1", + new Date())) + assert(response.success) + + val offer = Utils.createOffer("o1", "s1", mem*2, cpu) + scheduler.resourceOffers(driver, List(offer).asJava) + val tasks = Utils.verifyTaskLaunched(driver, "o1") + // 11000.0 + val taskMem = tasks.head.getResourcesList + .asScala + .filter(_.getName.equals("mem")) + .map(_.getScalar.getValue) + .head + assert(11000.0 === taskMem) + } + test("supports spark.mesos.driverEnv.*") { setScheduler() @@ -211,7 +266,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi command, Map(config.EXECUTOR_HOME.key -> "test", "spark.app.name" -> "test", - config.DRIVER_ENV_PREFIX + "TEST_ENV" -> "TEST_VAL"), + config.DRIVER_ENV_PREFIX + "TEST_ENV" -> "TEST_VAL", + config.DRIVER_MEMORY_OVERHEAD.key -> "0" + ), "s1", new Date())) assert(response.success) @@ -236,7 +293,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi Map(config.EXECUTOR_HOME.key -> "test", "spark.app.name" -> "test", config.NETWORK_NAME.key -> "test-network-name", - config.NETWORK_LABELS.key -> "key1:val1,key2:val2"), + config.NETWORK_LABELS.key -> "key1:val1,key2:val2", + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) @@ -266,7 +324,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi command, Map(config.EXECUTOR_HOME.key -> "test", config.ENABLE_FETCHER_CACHE.key -> "true", - "spark.app.name" -> "test"), + "spark.app.name" -> "test", + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) @@ -290,7 +349,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi new MesosDriverDescription("d1", "jar", mem, cpu, true, command, Map(config.EXECUTOR_HOME.key -> "test", - "spark.app.name" -> "test"), + "spark.app.name" -> "test", + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) @@ -315,7 +375,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi command, Map(config.EXECUTOR_HOME.key -> "test", config.ENABLE_FETCHER_CACHE.key -> "false", - "spark.app.name" -> "test"), + "spark.app.name" -> "test", + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) @@ -349,7 +410,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi command, Map(config.EXECUTOR_HOME.key -> "test", "spark.app.name" -> "test", - config.DRIVER_CONSTRAINTS.key -> driverConstraints), + config.DRIVER_CONSTRAINTS.key -> driverConstraints, + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) assert(response.success) @@ -387,7 +449,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi command, Map(config.EXECUTOR_HOME.key -> "test", "spark.app.name" -> "test", - config.DRIVER_LABELS.key -> "key:value"), + config.DRIVER_LABELS.key -> "key:value", + config.DRIVER_MEMORY_OVERHEAD.key -> "0"), "s1", new Date())) @@ -745,7 +808,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi true, command, Map(config.EXECUTOR_HOME.key -> "test", - "spark.app.name" -> "test") ++ + "spark.app.name" -> "test", + config.DRIVER_MEMORY_OVERHEAD.key -> "0") ++ addlSparkConfVars, "s1", new Date()) From 3dfdcf4f92ef5e739f15c22c93d673bb2233e617 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 16 Dec 2020 10:03:48 +0900 Subject: [PATCH 0776/1009] [SPARK-33788][SQL] Throw NoSuchPartitionsException from HiveExternalCatalog.dropPartitions() ### What changes were proposed in this pull request? Throw `NoSuchPartitionsException` from `ALTER TABLE .. DROP TABLE` for not existing partitions of a table in V1 Hive external catalog. ### Why are the changes needed? The behaviour of Hive external catalog deviates from V1/V2 in-memory catalogs that throw `NoSuchPartitionsException`. To improve user experience with Spark SQL, it would be better to throw the same exception. ### Does this PR introduce _any_ user-facing change? Yes, the command throws `NoSuchPartitionsException` instead of the general exception `AnalysisException`. ### How was this patch tested? By running tests for `ALTER TABLE .. DROP PARTITION`: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #30778 from MaxGekk/hive-drop-partition-exception. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../AlterTableDropPartitionSuiteBase.scala | 17 ++++++++++++++++ .../v1/AlterTableDropPartitionSuite.scala | 20 +------------------ .../v2/AlterTableDropPartitionSuite.scala | 17 ---------------- .../sql/hive/client/HiveClientImpl.scala | 6 ++---- .../AlterTableDropPartitionSuite.scala | 19 ------------------ 5 files changed, 20 insertions(+), 59 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala index ed479e2824fb7..338f13ace891c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -21,6 +21,7 @@ import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils @@ -146,4 +147,20 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { assert(errMsg.contains(notFullPartitionSpecErr)) } } + + test("partition not exists") { + withNsTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") + + val errMsg = intercept[NoSuchPartitionsException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("partitions not found in table")) + + checkPartitions(t, Map("id" -> "1")) + sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") + checkPartitions(t) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index 5ad182bc689b9..e655debc2fdde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command import org.apache.spark.sql.test.SharedSparkSession @@ -32,21 +31,4 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu class AlterTableDropPartitionSuite extends AlterTableDropPartitionSuiteBase - with SharedSparkSession { - - test("partition not exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - val errMsg = intercept[NoSuchPartitionsException] { - sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") - }.getMessage - assert(errMsg.contains("partitions not found in table")) - - checkPartitions(t, Map("id" -> "1")) - sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") - checkPartitions(t) - } - } -} + with SharedSparkSession diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala index 608e7d7c98f6f..9dc1cad5a002d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.execution.command import org.apache.spark.sql.test.SharedSparkSession @@ -38,22 +37,6 @@ class AlterTableDropPartitionSuite .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - test("partition not exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - val errMsg = intercept[NoSuchPartitionsException] { - sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") - }.getMessage - assert(errMsg.contains("partitions not found in table")) - - checkPartitions(t, Map("id" -> "1")) - sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") - checkPartitions(t) - } - } - test("SPARK-33650: drop partition into a table which doesn't support partition management") { withNsTable("ns", "tbl", s"non_part_$catalog") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 0b19e5e6e8c84..6a964a0ce3613 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -49,7 +49,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.metrics.source.HiveCatalogMetrics import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, PartitionsAlreadyExistException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionsAlreadyExistException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Expression @@ -630,9 +630,7 @@ private[hive] class HiveClientImpl( // (b='1', c='1') and (b='1', c='2'), a partial spec of (b='1') will match both. val parts = client.getPartitions(hiveTable, s.asJava).asScala if (parts.isEmpty && !ignoreIfNotExists) { - throw new AnalysisException( - s"No partition is dropped. One partition spec '$s' does not exist in table '$table' " + - s"database '$db'") + throw new NoSuchPartitionsException(db, table, Seq(s)) } parts.map(_.getValues) }.distinct diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala index fe26466cdad62..9c7d76a0caa08 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive.execution.command -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.hive.test.TestHiveSingleton @@ -27,22 +26,4 @@ class AlterTableDropPartitionSuite override def version: String = "Hive V1" override def defaultUsing: String = "USING HIVE" - - override protected val notFullPartitionSpecErr = "No partition is dropped" - - test("partition not exists") { - withNsTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") - - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)") - }.getMessage - assert(errMsg.contains("No partition is dropped")) - - checkPartitions(t, Map("id" -> "1")) - sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)") - checkPartitions(t) - } - } } From dd042f58e7a0fd2289f6889c324c0d5e4c18ad7f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 16 Dec 2020 10:07:35 +0900 Subject: [PATCH 0777/1009] [SPARK-33796][DOCS] Show hidden text from the left menu of Spark Doc ### What changes were proposed in this pull request? If the text in the left menu of Spark is too long, it will be hidden. ![sql1](https://user-images.githubusercontent.com/1097932/102249583-5ae7a580-3eb7-11eb-813c-f2e2fe019d28.jpeg) This PR is to fix the style issue. ### Why are the changes needed? Improve the UI of Spark documentation. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test After changes: ![sql2](https://user-images.githubusercontent.com/1097932/102249603-5fac5980-3eb7-11eb-806d-4e7b8248e6b6.jpeg) Closes #30786 from gengliangwang/fixDocStyle. Authored-by: Gengliang Wang Signed-off-by: HyukjinKwon --- docs/css/main.css | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/css/main.css b/docs/css/main.css index 271113c904d26..309ad7b3bdf0b 100755 --- a/docs/css/main.css +++ b/docs/css/main.css @@ -333,10 +333,6 @@ a.anchorjs-link:hover { text-decoration: none; } overflow-y: scroll; } -.left-menu { - width: 399px; -} - .left-menu h3 { margin-left: 10px; line-height: 30px; From ddff94fd32f85072cbc5c752c337f3b89ae00bed Mon Sep 17 00:00:00 2001 From: Sander Goos Date: Wed, 16 Dec 2020 11:26:54 +0900 Subject: [PATCH 0778/1009] [SPARK-33793][TESTS] Introduce withExecutor to ensure proper cleanup in tests ### What changes were proposed in this pull request? This PR introduces a helper method `withExecutor` that handles the creation of an Executor object and ensures that it is always stopped in a finally block. The tests in ExecutorSuite have been refactored to use this method. ### Why are the changes needed? Recently an issue was discovered that leaked Executors (which are not explicitly stopped after a test) can cause other tests to fail due to the JVM being killed after 10 min. It is therefore crucial that tests always stop the Executor. By introducing this helper method, a simple pattern is established that can be easily adopted in new tests, which reduces the risk of regressions. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Run the ExecutorSuite locally. Closes #30783 from sander-goos/SPARK-33793-close-executors. Authored-by: Sander Goos Signed-off-by: HyukjinKwon --- .../apache/spark/executor/ExecutorSuite.scala | 99 ++++++++++--------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 7cf7a81a76133..97ffb36062dbc 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.executor import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.lang.Thread.UncaughtExceptionHandler +import java.net.URL import java.nio.ByteBuffer import java.util.Properties import java.util.concurrent.{ConcurrentHashMap, CountDownLatch, TimeUnit} @@ -53,7 +54,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task, import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager} import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.storage.{BlockManager, BlockManagerId} -import org.apache.spark.util.{LongAccumulator, ThreadUtils, UninterruptibleThread} +import org.apache.spark.util.{LongAccumulator, SparkUncaughtExceptionHandler, ThreadUtils, UninterruptibleThread} class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester { @@ -64,6 +65,33 @@ class ExecutorSuite extends SparkFunSuite super.afterEach() } + /** + * Creates an Executor with the provided arguments, is then passed to `f` + * and will be stopped after `f` returns. + */ + def withExecutor( + executorId: String, + executorHostname: String, + env: SparkEnv, + userClassPath: Seq[URL] = Nil, + isLocal: Boolean = true, + uncaughtExceptionHandler: UncaughtExceptionHandler + = new SparkUncaughtExceptionHandler, + resources: immutable.Map[String, ResourceInformation] + = immutable.Map.empty[String, ResourceInformation])(f: Executor => Unit): Unit = { + var executor: Executor = null + try { + executor = new Executor(executorId, executorHostname, env, userClassPath, isLocal, + uncaughtExceptionHandler, resources) + + f(executor) + } finally { + if (executor != null) { + executor.stop() + } + } + } + test("SPARK-15963: Catch `TaskKilledException` correctly in Executor.TaskRunner") { // mock some objects to make Executor.launchTask() happy val conf = new SparkConf @@ -116,10 +144,8 @@ class ExecutorSuite extends SparkFunSuite } }) - var executor: Executor = null - try { - executor = new Executor("id", "localhost", env, userClassPath = Nil, isLocal = true, - resources = immutable.Map.empty[String, ResourceInformation]) + withExecutor("id", "localhost", env) { executor => + // the task will be launched in a dedicated worker thread executor.launchTask(mockExecutorBackend, taskDescription) @@ -139,11 +165,6 @@ class ExecutorSuite extends SparkFunSuite assert(executorSuiteHelper.testFailedReason.toErrorString === "TaskKilled (test)") assert(executorSuiteHelper.taskState === TaskState.KILLED) } - finally { - if (executor != null) { - executor.stop() - } - } } test("SPARK-19276: Handle FetchFailedExceptions that are hidden by user exceptions") { @@ -255,25 +276,24 @@ class ExecutorSuite extends SparkFunSuite confs.foreach { case (k, v) => conf.set(k, v) } val serializer = new JavaSerializer(conf) val env = createMockEnv(conf, serializer) - val executor = - new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true, - resources = immutable.Map.empty[String, ResourceInformation]) - val executorClass = classOf[Executor] - - // Save all heartbeats sent into an ArrayBuffer for verification - val heartbeats = ArrayBuffer[Heartbeat]() - val mockReceiver = mock[RpcEndpointRef] - when(mockReceiver.askSync(any[Heartbeat], any[RpcTimeout])(any)) - .thenAnswer((invocation: InvocationOnMock) => { - val args = invocation.getArguments() - heartbeats += args(0).asInstanceOf[Heartbeat] - HeartbeatResponse(false) - }) - val receiverRef = executorClass.getDeclaredField("heartbeatReceiverRef") - receiverRef.setAccessible(true) - receiverRef.set(executor, mockReceiver) + withExecutor("id", "localhost", SparkEnv.get) { executor => + val executorClass = classOf[Executor] + + // Save all heartbeats sent into an ArrayBuffer for verification + val heartbeats = ArrayBuffer[Heartbeat]() + val mockReceiver = mock[RpcEndpointRef] + when(mockReceiver.askSync(any[Heartbeat], any[RpcTimeout])(any)) + .thenAnswer((invocation: InvocationOnMock) => { + val args = invocation.getArguments() + heartbeats += args(0).asInstanceOf[Heartbeat] + HeartbeatResponse(false) + }) + val receiverRef = executorClass.getDeclaredField("heartbeatReceiverRef") + receiverRef.setAccessible(true) + receiverRef.set(executor, mockReceiver) - f(executor, heartbeats) + f(executor, heartbeats) + } } private def heartbeatZeroAccumulatorUpdateTest(dropZeroMetrics: Boolean): Unit = { @@ -354,10 +374,7 @@ class ExecutorSuite extends SparkFunSuite val taskDescription = createResultTaskDescription(serializer, taskBinary, rdd, 0) val mockBackend = mock[ExecutorBackend] - var executor: Executor = null - try { - executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true, - resources = immutable.Map.empty[String, ResourceInformation]) + withExecutor("id", "localhost", SparkEnv.get) { executor => executor.launchTask(mockBackend, taskDescription) // Ensure that the executor's metricsPoller is polled so that values are recorded for @@ -368,10 +385,6 @@ class ExecutorSuite extends SparkFunSuite eventually(timeout(5.seconds), interval(10.milliseconds)) { assert(executor.numRunningTasks === 0) } - } finally { - if (executor != null) { - executor.stop() - } } // Verify that peak values for task metrics get sent in the TaskResult @@ -535,12 +548,11 @@ class ExecutorSuite extends SparkFunSuite poll: Boolean = false): (TaskFailedReason, UncaughtExceptionHandler) = { val mockBackend = mock[ExecutorBackend] val mockUncaughtExceptionHandler = mock[UncaughtExceptionHandler] - var executor: Executor = null val timedOut = new AtomicBoolean(false) - try { - executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true, - uncaughtExceptionHandler = mockUncaughtExceptionHandler, - resources = immutable.Map.empty[String, ResourceInformation]) + + withExecutor("id", "localhost", SparkEnv.get, + uncaughtExceptionHandler = mockUncaughtExceptionHandler) { executor => + // the task will be launched in a dedicated worker thread executor.launchTask(mockBackend, taskDescription) if (killTask) { @@ -573,11 +585,8 @@ class ExecutorSuite extends SparkFunSuite assert(executor.numRunningTasks === 0) } assert(!timedOut.get(), "timed out waiting to be ready to kill tasks") - } finally { - if (executor != null) { - executor.stop() - } } + val orderedMock = inOrder(mockBackend) val statusCaptor = ArgumentCaptor.forClass(classOf[ByteBuffer]) orderedMock.verify(mockBackend) From 62be2483d7d78e61fd2f77929cf41c76eff17869 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 16 Dec 2020 05:37:56 +0000 Subject: [PATCH 0779/1009] [SPARK-33765][SQL] Migrate UNCACHE TABLE to use UnresolvedRelation to resolve identifier ### What changes were proposed in this pull request? This PR proposes to migrate `UNCACHE TABLE` to use `UnresolvedRelation` to resolve the table/view identifier in Analyzer as discussed https://github.com/apache/spark/pull/30403/files#r532360022. ### Why are the changes needed? To resolve the table/view in the analyzer. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Updated existing tests Closes #30743 from imback82/uncache_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++- .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +++- .../ResolveCommandsWithIfExists.scala | 4 ++- .../sql/catalyst/parser/AstBuilder.scala | 9 +++++++ .../catalyst/plans/logical/v2Commands.scala | 8 ++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 10 +++++++ .../spark/sql/execution/SparkSqlParser.scala | 9 ------- .../spark/sql/execution/command/cache.scala | 26 +------------------ .../datasources/DataSourceStrategy.scala | 16 +++++++++++- .../datasources/v2/CacheTableExec.scala | 12 +++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 3 +++ .../sql/execution/SparkSqlParserSuite.scala | 10 ------- .../spark/sql/hive/HiveStrategies.scala | 12 ++++++++- .../spark/sql/hive/CachedTableSuite.scala | 2 +- 14 files changed, 92 insertions(+), 50 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a46f2e3168c6b..0bef6998b177d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -875,6 +875,10 @@ class Analyzer(override val catalogManager: CatalogManager) lookupTempView(ident) .map(view => c.copy(table = view)) .getOrElse(c) + case c @ UncacheTable(UnresolvedRelation(ident, _, false), _, _) => + lookupTempView(ident) + .map(view => c.copy(table = view, isTempView = true)) + .getOrElse(c) // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand => write.table match { @@ -1005,6 +1009,11 @@ class Analyzer(override val catalogManager: CatalogManager) .map(v2Relation => c.copy(table = v2Relation)) .getOrElse(c) + case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) => + lookupV2Relation(u.multipartIdentifier, u.options, false) + .map(v2Relation => c.copy(table = v2Relation)) + .getOrElse(c) + // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand => write.table match { @@ -1098,7 +1107,12 @@ class Analyzer(override val catalogManager: CatalogManager) case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) => lookupRelation(u.multipartIdentifier, u.options, false) - .map(v2Relation => c.copy(table = v2Relation)) + .map(relation => c.copy(table = EliminateSubqueryAliases(relation))) + .getOrElse(c) + + case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) => + lookupRelation(u.multipartIdentifier, u.options, false) + .map(relation => c.copy(table = EliminateSubqueryAliases(relation))) .getOrElse(c) // TODO (SPARK-27484): handle streaming write commands when we have them. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c8e137e9c18ac..30467685d75a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -125,7 +125,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") case CacheTable(u: UnresolvedRelation, _, _, _) => - failAnalysis(s"Table or view not found for `CACHE TABLE`: ${u.multipartIdentifier.quoted}") + failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") + + case UncacheTable(u: UnresolvedRelation, _, _) => + failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") // TODO (SPARK-27484): handle streaming write commands when we have them. case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala index 196a07a7f9904..60f86b31a4bdf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand} +import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand, UncacheTable} import org.apache.spark.sql.catalyst.rules.Rule /** @@ -31,5 +31,7 @@ object ResolveCommandsWithIfExists extends Rule[LogicalPlan] { NoopCommand("DROP TABLE", u.multipartIdentifier) case DropView(u: UnresolvedView, ifExists) if ifExists => NoopCommand("DROP VIEW", u.multipartIdentifier) + case UncacheTable(u: UnresolvedRelation, ifExists, _) if ifExists => + NoopCommand("UNCACHE TABLE", u.multipartIdentifier) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 0284d5d01ba96..426dff343818b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3632,6 +3632,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } } + /** + * Create an [[UncacheTable]] logical plan. + */ + override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) { + UncacheTable( + UnresolvedRelation(visitMultipartIdentifier(ctx.multipartIdentifier)), + ctx.EXISTS != null) + } + /** * Create a [[TruncateTable]] command. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 2091d92eb67c9..d13ad977910d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -794,3 +794,11 @@ case class CacheTableAsSelect( plan: LogicalPlan, isLazy: Boolean, options: Map[String, String]) extends Command + +/** + * The logical plan of the UNCACHE TABLE command. + */ +case class UncacheTable( + table: LogicalPlan, + ifExists: Boolean, + isTempView: Boolean = false) extends Command diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 2b3fc6f71a5c0..9bea6517156ae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2032,6 +2032,16 @@ class DDLParserSuite extends AnalysisTest { "It is not allowed to add catalog/namespace prefix a.b") } + test("UNCACHE TABLE") { + comparePlans( + parsePlan("UNCACHE TABLE a.b.c"), + UncacheTable(UnresolvedRelation(Seq("a", "b", "c")), ifExists = false)) + + comparePlans( + parsePlan("UNCACHE TABLE IF EXISTS a.b.c"), + UncacheTable(UnresolvedRelation(Seq("a", "b", "c")), ifExists = true)) + } + test("TRUNCATE table") { comparePlans( parsePlan("TRUNCATE TABLE a.b.c"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 3ca3461dfbd47..722ca6f992064 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -192,15 +192,6 @@ class SparkSqlAstBuilder extends AstBuilder { unquotedPath } - /** - * Create an [[UncacheTableCommand]] logical plan. - */ - override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) { - UncacheTableCommand( - visitMultipartIdentifier(ctx.multipartIdentifier), - ctx.EXISTS != null) - } - /** * Create a [[ClearCacheCommand]] logical plan. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala index 3f85a1b0f99d6..2f72af7f4b512 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala @@ -17,32 +17,8 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession} +import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper - -case class UncacheTableCommand( - multipartIdentifier: Seq[String], - ifExists: Boolean) extends RunnableCommand { - - override def run(sparkSession: SparkSession): Seq[Row] = { - val tableName = multipartIdentifier.quoted - table(sparkSession, tableName).foreach { table => - val cascade = !sparkSession.sessionState.catalog.isTempView(multipartIdentifier) - sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade) - } - Seq.empty[Row] - } - - private def table(sparkSession: SparkSession, name: String): Option[DataFrame] = { - try { - Some(sparkSession.table(name)) - } catch { - case ex: AnalysisException if ifExists && ex.getMessage.contains("Table or view not found") => - None - } - } -} /** * Clear all cached data from the in-memory cache. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index e4f001d61a767..a097017222b57 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.ScanOperation -import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, InsertIntoDir, InsertIntoStatement, LogicalPlan, Project, UncacheTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.connector.catalog.SupportsRead @@ -283,6 +283,20 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _, _) => i.copy(table = DDLUtils.readHiveTable(tableMeta)) + case c @ CacheTable(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _) + if DDLUtils.isDatasourceTable(tableMeta) => + c.copy(table = readDataSourceTable(tableMeta, options)) + + case c @ CacheTable(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _) => + c.copy(table = DDLUtils.readHiveTable(tableMeta)) + + case u @ UncacheTable(UnresolvedCatalogRelation(tableMeta, options, false), _, _) + if DDLUtils.isDatasourceTable(tableMeta) => + u.copy(table = readDataSourceTable(tableMeta, options)) + + case u @ UncacheTable(UnresolvedCatalogRelation(tableMeta, _, false), _, _) => + u.copy(table = DDLUtils.readHiveTable(tableMeta)) + case UnresolvedCatalogRelation(tableMeta, options, false) if DDLUtils.isDatasourceTable(tableMeta) => readDataSourceTable(tableMeta, options) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala index 85107dfc9b2ef..2d8e5b5e286b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala @@ -87,3 +87,15 @@ case class CacheTableAsSelectExec( sparkSession.table(tempViewName) } } + +case class UncacheTableExec( + relation: LogicalPlan, + cascade: Boolean) extends V2CommandExec { + override def run(): Seq[InternalRow] = { + val sparkSession = sqlContext.sparkSession + sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession, relation, cascade) + Seq.empty + } + + override def output: Seq[Attribute] = Seq.empty +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 6020e42b21900..120fa5288dda9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -364,6 +364,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case r: CacheTableAsSelect => CacheTableAsSelectExec(r.tempViewName, r.plan, r.isLazy, r.options) :: Nil + case r: UncacheTable => + UncacheTableExec(r.table, cascade = !r.isTempView) :: Nil + case _ => Nil } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 009c5b3705d2f..f1788e9c31af8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -339,16 +339,6 @@ class SparkSqlParserSuite extends AnalysisTest { "LINES TERMINATED BY only supports newline '\\n' right now") } - test("UNCACHE TABLE") { - assertEqual( - "UNCACHE TABLE a.b.c", - UncacheTableCommand(Seq("a", "b", "c"), ifExists = false)) - - assertEqual( - "UNCACHE TABLE IF EXISTS a.b.c", - UncacheTableCommand(Seq("a", "b", "c"), ifExists = true)) - } - test("CLEAR CACHE") { assertEqual("CLEAR CACHE", ClearCacheCommand) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index ff7dc58829fa1..e10233d2573c9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning._ -import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics} +import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics, UncacheTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSchema import org.apache.spark.sql.execution._ @@ -231,6 +231,16 @@ case class RelationConversions( assertNoNullTypeInSchema(query.schema) OptimizedCreateHiveTableAsSelectCommand( tableDesc, query, query.output.map(_.name), mode) + + // Cache table + case c @ CacheTable(relation: HiveTableRelation, _, _, _) + if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => + c.copy(table = metastoreCatalog.convert(relation)) + + // Uncache table + case u @ UncacheTable(relation: HiveTableRelation, _, _) + if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => + u.copy(table = metastoreCatalog.convert(relation)) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index 81c3f271b18d4..6cb98e92e36fa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -113,7 +113,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto e = intercept[AnalysisException] { sql("UNCACHE TABLE nonexistentTable") }.getMessage - assert(e.contains(s"$expectedErrorMsg nonexistentTable")) + assert(e.contains("Table or view not found: nonexistentTable")) sql("UNCACHE TABLE IF EXISTS nonexistentTable") } From ef7f6903b4fa28c554a1f0b58b9da194979b61ee Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 16 Dec 2020 05:45:44 +0000 Subject: [PATCH 0780/1009] [SPARK-33786][SQL] The storage level for a cache should be respected when a table name is altered ### What changes were proposed in this pull request? This PR proposes to retain the cache's storage level when a table name is altered by `ALTER TABLE ... RENAME TO ...`. ### Why are the changes needed? Currently, when a table name is altered, the table's cache is refreshed (if exists), but the storage level is not retained. For example: ```scala def getStorageLevel(tableName: String): StorageLevel = { val table = spark.table(tableName) val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get cachedData.cachedRepresentation.cacheBuilder.storageLevel } Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath) sql(s"CREATE TABLE old USING parquet LOCATION '${path.toURI}'") sql("CACHE TABLE old OPTIONS('storageLevel' 'MEMORY_ONLY')") val oldStorageLevel = getStorageLevel("old") sql("ALTER TABLE old RENAME TO new") val newStorageLevel = getStorageLevel("new") ``` `oldStorageLevel` will be `StorageLevel(memory, deserialized, 1 replicas)` whereas `newStorageLevel` will be `StorageLevel(disk, memory, deserialized, 1 replicas)`, which is the default storage level. ### Does this PR introduce _any_ user-facing change? Yes, now the storage level for the cache will be retained. ### How was this patch tested? Added a unit test. Closes #30774 from imback82/alter_table_rename_cache_fix. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/execution/command/tables.scala | 14 ++++++------- .../apache/spark/sql/CachedTableSuite.scala | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 431a103063c68..cf2a6ffb2c682 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -21,7 +21,6 @@ import java.net.{URI, URISyntaxException} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer -import scala.util.Try import scala.util.control.NonFatal import org.apache.hadoop.fs.{FileContext, FsConstants, Path} @@ -193,18 +192,19 @@ case class AlterTableRenameCommand( } else { val table = catalog.getTableMetadata(oldName) DDLUtils.verifyAlterTableType(catalog, table, isView) - // If an exception is thrown here we can just assume the table is uncached; - // this can happen with Hive tables when the underlying catalog is in-memory. - val wasCached = Try(sparkSession.catalog.isCached(oldName.unquotedString)).getOrElse(false) - if (wasCached) { + // If `optStorageLevel` is defined, the old table was cached. + val optCachedData = sparkSession.sharedState.cacheManager.lookupCachedData( + sparkSession.table(oldName.unquotedString)) + val optStorageLevel = optCachedData.map(_.cachedRepresentation.cacheBuilder.storageLevel) + if (optStorageLevel.isDefined) { CommandUtils.uncacheTableOrView(sparkSession, oldName.unquotedString) } // Invalidate the table last, otherwise uncaching the table would load the logical plan // back into the hive metastore cache catalog.refreshTable(oldName) catalog.renameTable(oldName, newName) - if (wasCached) { - sparkSession.catalog.cacheTable(newName.unquotedString) + optStorageLevel.foreach { storageLevel => + sparkSession.catalog.cacheTable(newName.unquotedString, storageLevel) } } Seq.empty[Row] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index af8d72309bdea..11eba933284f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -1285,4 +1285,24 @@ class CachedTableSuite extends QueryTest with SQLTestUtils assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1, 2")).isDefined) } } + + test("SPARK-33786: Cache's storage level should be respected when a table name is altered.") { + withTable("old", "new") { + withTempPath { path => + def getStorageLevel(tableName: String): StorageLevel = { + val table = spark.table(tableName) + val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get + cachedData.cachedRepresentation.cacheBuilder.storageLevel + } + Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath) + sql(s"CREATE TABLE old USING parquet LOCATION '${path.toURI}'") + sql("CACHE TABLE old OPTIONS('storageLevel' 'MEMORY_ONLY')") + val oldStorageLevel = getStorageLevel("old") + + sql("ALTER TABLE old RENAME TO new") + val newStorageLevel = getStorageLevel("new") + assert(oldStorageLevel === newStorageLevel) + } + } + } } From 12f3715ed7e0cd06131272845c3d04f4ad1b441c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Dec 2020 00:10:13 -0800 Subject: [PATCH 0781/1009] [MINOR][DOCS] Fix Jenkins job badge image and link in README.md ### What changes were proposed in this pull request? This PR proposes to fix the Jenkins job badge: Before: ![Screen Shot 2020-12-16 at 4 14 14 PM](https://user-images.githubusercontent.com/6477701/102316960-2c9ebe80-3fba-11eb-878d-07ae735fb3a6.png) After: ![Screen Shot 2020-12-16 at 4 14 09 PM](https://user-images.githubusercontent.com/6477701/102316956-2a3c6480-3fba-11eb-9fa4-b8312edb8a1a.png) ### Why are the changes needed? To make people can easily check the status of builds. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested via using GitHub. Closes #30797 from HyukjinKwon/minor-readme. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d7931263b0fc7..aa7d1dd338be0 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ and Structured Streaming for stream processing. -[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3) +[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2) [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark) [![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site) From 888a274a88560ebe3c43ff9f003c296751d0c207 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Dec 2020 17:20:03 +0900 Subject: [PATCH 0782/1009] [SPARK-33802][INFRA] Override name and email address explicitly when updating PySpark coverage ### What changes were proposed in this pull request? The current Jenkins job fails as below (https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/1726/console) ``` Generating HTML files for PySpark coverage under /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2/python/test_coverage/htmlcov /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2 Cloning into 'pyspark-coverage-site'... *** Please tell me who you are. Run git config --global user.email "youexample.com" git config --global user.name "Your Name" to set your account's default identity. Omit --global to set the identity only in this repository. ``` This PR proposes to set both when committing to the coverage site. ### Why are the changes needed? To make the coverage site keep working. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested in the console but it has to be merged to test in the Jenkins environment. Closes #30796 from HyukjinKwon/SPARK-33802. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/run-tests.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 37a15a758d898..e271b4dec6c74 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -521,10 +521,11 @@ def post_python_tests_results(): # 6. Commit current HTMLs. run_cmd([ "git", + "-c user.name='Apache Spark Test Account'", + "-c user.email='sparktestacc@gmail.com'", "commit", "-am", - "Coverage report at latest commit in Apache Spark", - '--author="Apache Spark Test Account "']) + "Coverage report at latest commit in Apache Spark"]) # 7. Delete the old branch. run_cmd(["git", "branch", "-D", "gh-pages"]) # 8. Rename the temporary branch to master. From 7845865b8d5c03a4daf82588be0ff2ebb90152a7 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Dec 2020 13:42:30 +0000 Subject: [PATCH 0783/1009] [SPARK-33803][SQL] Sort table properties by key in DESCRIBE TABLE command ### What changes were proposed in this pull request? This PR proposes to sort table properties in DESCRIBE TABLE command. This is consistent with DSv2 command as well: https://github.com/apache/spark/blob/e3058ba17cb4512537953eb4ded884e24ee93ba2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala#L63 This PR fixes the test case in Scala 2.13 build as well where the table properties have different order in the map. ### Why are the changes needed? To keep the deterministic and pretty output, and fix the tests in Scala 2.13 build. See https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-maven-hadoop-3.2-scala-2.13/49/testReport/junit/org.apache.spark.sql/SQLQueryTestSuite/describe_sql/ ``` describe.sql Expected "...spark_catalog, view.[query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]]", but got "...spark_catalog, view.[catalogAndNamespace.part.1=default, view.query.out.col.2=c, view.referredTempFunctionsNames=[]]]" Result did not match for query #29 DESC FORMATTED v ``` ### Does this PR introduce _any_ user-facing change? Yes, it will change the text output from `DESCRIBE [EXTENDED|FORMATTED] table_name`. Now the table properties are sorted by its key. ### How was this patch tested? Related unittests were fixed accordingly. Closes #30799 from HyukjinKwon/SPARK-33803. Authored-by: HyukjinKwon Signed-off-by: Wenchen Fan --- .../sql/catalyst/catalog/interface.scala | 3 +- .../sql-tests/results/describe.sql.out | 8 +++--- .../results/postgreSQL/create_view.sql.out | 28 +++++++++---------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 6743b052fb3a1..9876ee375cfa6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -388,7 +388,8 @@ case class CatalogTable( def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { val map = new mutable.LinkedHashMap[String, String]() - val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") + val tableProperties = properties.toSeq.sortBy(_._1) + .map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") val lastAccess = { if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index ebec2e1976b15..93b0cc3fe97e1 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -130,7 +130,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [t=test, e=3] +Table Properties [e=3, t=test] Location [not included in comparison]/{warehouse_dir}/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -162,7 +162,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [t=test, e=3] +Table Properties [e=3, t=test] Location [not included in comparison]/{warehouse_dir}/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -477,7 +477,7 @@ View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default View Query Output Columns [a, b, c, d] -Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]] -- !query @@ -501,7 +501,7 @@ View Text SELECT * FROM t View Original Text SELECT * FROM t View Catalog and Namespace spark_catalog.default View Query Output Columns [a, b, c, d] -Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index 1ac7c4a4069b3..c05c9abbcee31 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -257,7 +257,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -313,7 +313,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a WHERE t1.id = t2.id View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [t1_a, t2_a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=t1_a, view.query.out.col.1=t2_a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_ View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [id, a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=id, view.query.out.col.1=a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2 View Original Text SELECT * FROM t1 CROSS JOIN t2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query @@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true] -- !query From 9d9d4a8e122cf1137edeca857e925f7e76c1ace2 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 16 Dec 2020 13:49:49 +0000 Subject: [PATCH 0784/1009] [SPARK-33789][SQL][TESTS] Refactor unified V1 and V2 datasource tests ### What changes were proposed in this pull request? 1. Move common utility functions such as `test()`, `withNsTable()` and `checkPartitions()` to `DDLCommandTestUtils`. 2. Place common settings such as `version`, `catalog`, `defaultUsing`, `sparkConf` to `CommandSuiteBase`. ### Why are the changes needed? To improve code maintenance of the unified tests. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the affected test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #30779 from MaxGekk/refactor-unified-tests. Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Signed-off-by: Wenchen Fan --- .../AlterTableAddPartitionSuiteBase.scala | 54 ++---- .../AlterTableDropPartitionSuiteBase.scala | 53 ++---- .../command/DDLCommandTestUtils.scala | 60 +++++++ .../command/ShowPartitionsSuiteBase.scala | 167 +++++++----------- .../command/ShowTablesSuiteBase.scala | 45 ++--- .../v1/AlterTableAddPartitionSuite.scala | 8 +- .../v1/AlterTableDropPartitionSuite.scala | 8 +- .../command/v1/CommandSuiteBase.scala | 27 +++ .../command/v1/ShowPartitionsSuite.scala | 8 +- .../command/v1/ShowTablesSuite.scala | 39 ++-- .../v2/AlterTableAddPartitionSuite.scala | 16 +- .../v2/AlterTableDropPartitionSuite.scala | 15 +- .../command/v2/CommandSuiteBase.scala | 32 ++++ .../command/v2/ShowPartitionsSuite.scala | 13 +- .../command/v2/ShowTablesSuite.scala | 11 +- .../command/AlterTableAddPartitionSuite.scala | 6 +- .../AlterTableDropPartitionSuite.scala | 7 +- .../execution/command/CommandSuiteBase.scala | 27 +++ .../command/ShowPartitionsSuite.scala | 6 +- .../execution/command/ShowTablesSuite.scala | 6 +- 20 files changed, 274 insertions(+), 334 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala index 2457bb9f8b57c..1c1d802b991f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala @@ -17,50 +17,18 @@ package org.apache.spark.sql.execution.command -import org.scalactic.source.Position -import org.scalatest.Tag - -import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest} import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SQLTestUtils - -trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { - protected def version: String - protected def catalog: String - protected def defaultUsing: String - override def test(testName: String, testTags: Tag*)(testFun: => Any) - (implicit pos: Position): Unit = { - super.test(s"ALTER TABLE .. ADD PARTITION $version: " + testName, testTags: _*)(testFun) - } +trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "ALTER TABLE .. ADD PARTITION" - protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = { - val partitions = sql(s"SHOW PARTITIONS $t") - .collect() - .toSet - .map((row: Row) => row.getString(0)) - .map(PartitioningUtils.parsePathFragment) - assert(partitions === expected.toSet) - } protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit - protected def withNsTable(ns: String, tableName: String, cat: String = catalog) - (f: String => Unit): Unit = { - val nsCat = s"$cat.$ns" - withNamespace(nsCat) { - sql(s"CREATE NAMESPACE $nsCat") - val t = s"$nsCat.$tableName" - withTable(t) { - f(t) - } - } - } - test("one partition") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") Seq("", "IF NOT EXISTS").foreach { exists => sql(s"ALTER TABLE $t ADD $exists PARTITION (id=1) LOCATION 'loc'") @@ -72,7 +40,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("multiple partitions") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") Seq("", "IF NOT EXISTS").foreach { exists => sql(s""" @@ -88,7 +56,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("multi-part partition") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)") Seq("", "IF NOT EXISTS").foreach { exists => sql(s"ALTER TABLE $t ADD $exists PARTITION (a=2, b='abc')") @@ -99,7 +67,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("table to alter does not exist") { - withNsTable("ns", "does_not_exist") { t => + withNamespaceAndTable("ns", "does_not_exist") { t => val errMsg = intercept[AnalysisException] { sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (a='4', b='9')") }.getMessage @@ -108,7 +76,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("case sensitivity in resolving partition specs") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { val errMsg = intercept[AnalysisException] { @@ -125,7 +93,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("SPARK-33521: universal type conversions of partition values") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s""" |CREATE TABLE $t ( | id int, @@ -173,7 +141,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("SPARK-33676: not fully specified partition spec") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s""" |CREATE TABLE $t (id bigint, part0 int, part1 string) |$defaultUsing @@ -187,7 +155,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("partition already exists") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala index 338f13ace891c..433f24c75083c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -17,48 +17,15 @@ package org.apache.spark.sql.execution.command -import org.scalactic.source.Position -import org.scalatest.Tag - -import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest} import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException -import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SQLTestUtils -trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { - protected def version: String - protected def catalog: String - protected def defaultUsing: String +trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "ALTER TABLE .. DROP PARTITION" protected def notFullPartitionSpecErr: String - override def test(testName: String, testTags: Tag*)(testFun: => Any) - (implicit pos: Position): Unit = { - super.test(s"ALTER TABLE .. DROP PARTITION $version: " + testName, testTags: _*)(testFun) - } - - protected def withNsTable(ns: String, tableName: String, cat: String = catalog) - (f: String => Unit): Unit = { - val nsCat = s"$cat.$ns" - withNamespace(nsCat) { - sql(s"CREATE NAMESPACE $nsCat") - val t = s"$nsCat.$tableName" - withTable(t) { - f(t) - } - } - } - - protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = { - val partitions = sql(s"SHOW PARTITIONS $t") - .collect() - .toSet - .map((row: Row) => row.getString(0)) - .map(PartitioningUtils.parsePathFragment) - assert(partitions === expected.toSet) - } - protected def checkDropPartition( t: String, ifExists: String, @@ -75,7 +42,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("single partition") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") Seq("", "IF EXISTS").foreach { ifExists => sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") @@ -85,7 +52,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("multiple partitions") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") Seq("", "IF EXISTS").foreach { ifExists => sql(s""" @@ -98,7 +65,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("multi-part partition") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)") Seq("", "IF EXISTS").foreach { ifExists => sql(s"ALTER TABLE $t ADD PARTITION (a = 2, b = 'abc')") @@ -108,7 +75,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("table to alter does not exist") { - withNsTable("ns", "does_not_exist") { t => + withNamespaceAndTable("ns", "does_not_exist") { t => val errMsg = intercept[AnalysisException] { sql(s"ALTER TABLE $t DROP PARTITION (a='4', b='9')") }.getMessage @@ -117,7 +84,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("case sensitivity in resolving partition specs") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { val errMsg = intercept[AnalysisException] { @@ -136,7 +103,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("SPARK-33676: not fully specified partition spec") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s""" |CREATE TABLE $t (id bigint, part0 int, part1 string) |$defaultUsing @@ -149,7 +116,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with SQLTestUtils { } test("partition not exists") { - withNsTable("ns", "tbl") { t => + withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala new file mode 100644 index 0000000000000..a4129fe1ffee5 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.datasources.PartitioningUtils +import org.apache.spark.sql.test.SQLTestUtils + +trait DDLCommandTestUtils extends SQLTestUtils { + // The version of the catalog under testing such as "V1", "V2", "Hive V1". + protected def version: String + // Name of the command as SQL statement, for instance "SHOW PARTITIONS" + protected def command: String + protected def catalog: String + protected def defaultUsing: String + + override def test(testName: String, testTags: Tag*)(testFun: => Any) + (implicit pos: Position): Unit = { + super.test(s"$command $version: " + testName, testTags: _*)(testFun) + } + + protected def withNamespaceAndTable(ns: String, tableName: String, cat: String = catalog) + (f: String => Unit): Unit = { + val nsCat = s"$cat.$ns" + withNamespace(nsCat) { + sql(s"CREATE NAMESPACE $nsCat") + val t = s"$nsCat.$tableName" + withTable(t) { + f(t) + } + } + } + + protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = { + val partitions = sql(s"SHOW PARTITIONS $t") + .collect() + .toSet + .map((row: Row) => row.getString(0)) + .map(PartitioningUtils.parsePathFragment) + assert(partitions === expected.toSet) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index 56c6e5a325745..d66c6191fbfa2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -17,18 +17,12 @@ package org.apache.spark.sql.execution.command -import org.scalactic.source.Position -import org.scalatest.Tag - import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{StringType, StructType} -trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { - protected def version: String - protected def catalog: String - protected def defaultUsing: String +trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "SHOW PARTITIONS" // Gets the schema of `SHOW PARTITIONS` private val showSchema: StructType = new StructType().add("partition", StringType, false) protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = { @@ -37,11 +31,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { checkAnswer(df, expected) } - override def test(testName: String, testTags: Tag*)(testFun: => Any) - (implicit pos: Position): Unit = { - super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun) - } - protected def createDateTable(table: String): Unit = { sql(s""" |CREATE TABLE $table (price int, qty int, year int, month int) @@ -72,122 +61,94 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { } test("show partitions of non-partitioned table") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.not_partitioned_table" - withTable(table) { - sql(s"CREATE TABLE $table (col1 int) $defaultUsing") - val errMsg = intercept[AnalysisException] { - sql(s"SHOW PARTITIONS $table") - }.getMessage - assert(errMsg.contains("not allowed on a table that is not partitioned")) - } + withNamespaceAndTable("ns", "not_partitioned_table") { t => + sql(s"CREATE TABLE $t (col1 int) $defaultUsing") + val errMsg = intercept[AnalysisException] { + sql(s"SHOW PARTITIONS $t") + }.getMessage + assert(errMsg.contains("not allowed on a table that is not partitioned")) } } test("non-partitioning columns") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.dateTable" - withTable(table) { - createDateTable(table) - val errMsg = intercept[AnalysisException] { - sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)") - }.getMessage - assert(errMsg.contains("abcd is not a valid partition column")) - } + withNamespaceAndTable("ns", "dateTable") { t => + createDateTable(t) + val errMsg = intercept[AnalysisException] { + sql(s"SHOW PARTITIONS $t PARTITION(abcd=2015, xyz=1)") + }.getMessage + assert(errMsg.contains("abcd is not a valid partition column")) } } test("show everything") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.dateTable" - withTable(table) { - createDateTable(table) - runShowPartitionsSql( - s"show partitions $table", - Row("year=2015/month=1") :: - Row("year=2015/month=2") :: - Row("year=2016/month=2") :: - Row("year=2016/month=3") :: Nil) - } + withNamespaceAndTable("ns", "dateTable") { t => + createDateTable(t) + runShowPartitionsSql( + s"show partitions $t", + Row("year=2015/month=1") :: + Row("year=2015/month=2") :: + Row("year=2016/month=2") :: + Row("year=2016/month=3") :: Nil) } } test("filter by partitions") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.dateTable" - withTable(table) { - createDateTable(table) - runShowPartitionsSql( - s"show partitions $table PARTITION(year=2015)", - Row("year=2015/month=1") :: - Row("year=2015/month=2") :: Nil) - runShowPartitionsSql( - s"show partitions $table PARTITION(year=2015, month=1)", - Row("year=2015/month=1") :: Nil) - runShowPartitionsSql( - s"show partitions $table PARTITION(month=2)", - Row("year=2015/month=2") :: - Row("year=2016/month=2") :: Nil) - } + withNamespaceAndTable("ns", "dateTable") { t => + createDateTable(t) + runShowPartitionsSql( + s"show partitions $t PARTITION(year=2015)", + Row("year=2015/month=1") :: + Row("year=2015/month=2") :: Nil) + runShowPartitionsSql( + s"show partitions $t PARTITION(year=2015, month=1)", + Row("year=2015/month=1") :: Nil) + runShowPartitionsSql( + s"show partitions $t PARTITION(month=2)", + Row("year=2015/month=2") :: + Row("year=2016/month=2") :: Nil) } } test("show everything more than 5 part keys") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.wideTable" - withTable(table) { - createWideTable(table) - runShowPartitionsSql( - s"show partitions $table", - Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") :: - Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) - } + withNamespaceAndTable("ns", "wideTable") { t => + createWideTable(t) + runShowPartitionsSql( + s"show partitions $t", + Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") :: + Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) } } test("SPARK-33667: case sensitivity of partition spec") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val t = s"$catalog.ns.part_table" - withTable(t) { - sql(s""" - |CREATE TABLE $t (price int, qty int, year int, month int) - |$defaultUsing - |PARTITIONED BY (year, month)""".stripMargin) - sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") - Seq( - true -> "PARTITION(year = 2015, month = 1)", - false -> "PARTITION(YEAR = 2015, Month = 1)" - ).foreach { case (caseSensitive, partitionSpec) => - withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { - runShowPartitionsSql( - s"SHOW PARTITIONS $t $partitionSpec", - Row("year=2015/month=1") :: Nil) - } + withNamespaceAndTable("ns", "part_table") { t => + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |$defaultUsing + |PARTITIONED BY (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + runShowPartitionsSql( + s"SHOW PARTITIONS $t $partitionSpec", + Row("year=2015/month=1") :: Nil) } } } } test("SPARK-33777: sorted output") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val table = s"$catalog.ns.dateTable" - withTable(table) { - sql(s""" - |CREATE TABLE $table (id int, part string) - |$defaultUsing - |PARTITIONED BY (part)""".stripMargin) - sql(s"ALTER TABLE $table ADD PARTITION(part = 'b')") - sql(s"ALTER TABLE $table ADD PARTITION(part = 'a')") - val partitions = sql(s"show partitions $table") - assert(partitions.first().getString(0) === "part=a") - } + withNamespaceAndTable("ns", "dateTable") { t => + sql(s""" + |CREATE TABLE $t (id int, part string) + |$defaultUsing + |PARTITIONED BY (part)""".stripMargin) + sql(s"ALTER TABLE $t ADD PARTITION(part = 'b')") + sql(s"ALTER TABLE $t ADD PARTITION(part = 'a')") + val partitions = sql(s"show partitions $t") + assert(partitions.first().getString(0) === "part=a") } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala index 58427183eeed5..5b729a4eb1c85 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala @@ -17,21 +17,15 @@ package org.apache.spark.sql.execution.command -import org.scalactic.source.Position -import org.scalatest.Tag - import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType -trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils { - protected def version: String - protected def catalog: String +trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "SHOW TABLES" protected def defaultNamespace: Seq[String] - protected def defaultUsing: String case class ShowRow(namespace: String, table: String, isTemporary: Boolean) protected def getRows(showRows: Seq[ShowRow]): Seq[Row] // Gets the schema of `SHOW TABLES` @@ -43,18 +37,10 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils { checkAnswer(df, getRows(expected)) } - override def test(testName: String, testTags: Tag*)(testFun: => Any) - (implicit pos: Position): Unit = { - super.test(s"SHOW TABLES $version: " + testName, testTags: _*)(testFun) - } - test("show an existing table") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - withTable(s"$catalog.ns.table") { - sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing") - runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false))) - } + withNamespaceAndTable("ns", "table") { t => + sql(s"CREATE TABLE $t (name STRING, id INT) $defaultUsing") + runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false))) } } @@ -117,20 +103,17 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils { } test("change current catalog and namespace with USE statements") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - withTable(s"$catalog.ns.table") { - sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing") + withNamespaceAndTable("ns", "table") { t => + sql(s"CREATE TABLE $t (name STRING, id INT) $defaultUsing") - sql(s"USE $catalog") - // No table is matched since the current namespace is not ["ns"] - assert(defaultNamespace != Seq("ns")) - runShowTablesSql("SHOW TABLES", Seq()) + sql(s"USE $catalog") + // No table is matched since the current namespace is not ["ns"] + assert(defaultNamespace != Seq("ns")) + runShowTablesSql("SHOW TABLES", Seq()) - // Update the current namespace to match "ns.tbl". - sql(s"USE $catalog.ns") - runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false))) - } + // Update the current namespace to match "ns.tbl". + sql(s"USE $catalog.ns") + runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false))) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala index b29564e1d81b6..1b7c90067e3f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala @@ -18,15 +18,9 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase { - override def version: String = "V1" - override def catalog: String = CatalogManager.SESSION_CATALOG_NAME - override def defaultUsing: String = "USING parquet" - override protected def checkLocation( t: String, spec: TablePartitionSpec, @@ -43,4 +37,4 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit } } -class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession +class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index e655debc2fdde..737af96f5abe3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -17,18 +17,12 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase { - override def version: String = "V1" - override def catalog: String = CatalogManager.SESSION_CATALOG_NAME - override def defaultUsing: String = "USING parquet" - override protected val notFullPartitionSpecErr = "The following partitions not found in table" } class AlterTableDropPartitionSuite extends AlterTableDropPartitionSuiteBase - with SharedSparkSession + with CommandSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala new file mode 100644 index 0000000000000..323f9c9365a11 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.test.SharedSparkSession + +trait CommandSuiteBase extends SharedSparkSession { + def version: String = "V1" + def catalog: String = CatalogManager.SESSION_CATALOG_NAME + def defaultUsing: String = "USING parquet" +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index c752a5f358bb9..8acd24f0e3956 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -18,15 +18,9 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.{AnalysisException, Row, SaveMode} -import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { - override def version: String = "V1" - override def catalog: String = CatalogManager.SESSION_CATALOG_NAME - override def defaultUsing: String = "USING parquet" - test("show everything in the default database") { val table = "dateTable" withTable(table) { @@ -69,7 +63,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { } } -class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSession { +class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase { // The test is placed here because it fails with `USING HIVE`: // org.apache.spark.sql.AnalysisException: // Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index 3db880c776365..12b4df269e157 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -18,17 +18,12 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.{AnalysisException, Row, SaveMode} -import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{BooleanType, StringType, StructType} trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { - override def version: String = "V1" - override def catalog: String = CatalogManager.SESSION_CATALOG_NAME override def defaultNamespace: Seq[String] = Seq("default") - override def defaultUsing: String = "USING parquet" override def showSchema: StructType = { new StructType() .add("database", StringType, nullable = false) @@ -87,31 +82,27 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { } test("case sensitivity of partition spec") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val t = s"$catalog.ns.part_table" - withTable(t) { - sql(s""" - |CREATE TABLE $t (price int, qty int, year int, month int) - |$defaultUsing - |partitioned by (year, month)""".stripMargin) - sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") - Seq( - true -> "PARTITION(year = 2015, month = 1)", - false -> "PARTITION(YEAR = 2015, Month = 1)" - ).foreach { case (caseSensitive, partitionSpec) => - withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { - val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec") - val information = df.select("information").first().getString(0) - assert(information.contains("Partition Values: [year=2015, month=1]")) - } + withNamespaceAndTable("ns", "part_table") { t => + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |$defaultUsing + |partitioned by (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec") + val information = df.select("information").first().getString(0) + assert(information.contains("Partition Values: [year=2015, month=1]")) } } } } } -class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession { +class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase { test("SPARK-33670: show partitions from a datasource table") { import testImplicits._ withNamespace(s"$catalog.ns") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala index 09921c8d8a5eb..b8ecb87ae7595 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala @@ -17,29 +17,19 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.connector.InMemoryPartitionTable import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession class AlterTableAddPartitionSuite extends command.AlterTableAddPartitionSuiteBase - with SharedSparkSession { + with CommandSuiteBase { import CatalogV2Implicits._ - override def version: String = "V2" - override def catalog: String = "test_catalog" - override def defaultUsing: String = "USING _" - - override def sparkConf: SparkConf = super.sparkConf - .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) - .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - override protected def checkLocation( t: String, spec: TablePartitionSpec, @@ -61,7 +51,7 @@ class AlterTableAddPartitionSuite } test("SPARK-33650: add partition into a table which doesn't support partition management") { - withNsTable("ns", "tbl", s"non_part_$catalog") { t => + withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") val errMsg = intercept[AnalysisException] { sql(s"ALTER TABLE $t ADD PARTITION (id=1)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala index 9dc1cad5a002d..ffbfe3f695935 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -17,28 +17,17 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession class AlterTableDropPartitionSuite extends command.AlterTableDropPartitionSuiteBase - with SharedSparkSession { - - override def version: String = "V2" - override def catalog: String = "test_catalog" - override def defaultUsing: String = "USING _" + with CommandSuiteBase { override protected val notFullPartitionSpecErr = "Partition spec is invalid" - override def sparkConf: SparkConf = super.sparkConf - .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) - .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - test("SPARK-33650: drop partition into a table which doesn't support partition management") { - withNsTable("ns", "tbl", s"non_part_$catalog") { t => + withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") val errMsg = intercept[AnalysisException] { sql(s"ALTER TABLE $t DROP PARTITION (id=1)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala new file mode 100644 index 0000000000000..b1f6a5b318a32 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.test.SharedSparkSession + +trait CommandSuiteBase extends SharedSparkSession { + def version: String = "V2" + def catalog: String = "test_catalog" + def defaultUsing: String = "USING _" + + override def sparkConf: SparkConf = super.sparkConf + .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) + .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index 55985a335c94b..e52c60d0f9a95 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -17,21 +17,10 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession - -class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession { - override def version: String = "V2" - override def catalog: String = "test_catalog" - override def defaultUsing: String = "USING _" - - override def sparkConf: SparkConf = super.sparkConf - .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) - .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) +class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSuiteBase { test("a table does not support partitioning") { val table = s"non_part_$catalog.tab1" withTable(table) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala index 370c8358e64da..cef5eac703ee7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala @@ -17,18 +17,12 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, Row} -import org.apache.spark.sql.connector.InMemoryTableCatalog import org.apache.spark.sql.execution.command -import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{StringType, StructType} -class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSession { - override def version: String = "V2" - override def catalog: String = "test_catalog" +class ShowTablesSuite extends command.ShowTablesSuiteBase with CommandSuiteBase { override def defaultNamespace: Seq[String] = Nil - override def defaultUsing: String = "USING _" override def showSchema: StructType = { new StructType() .add("namespace", StringType, nullable = false) @@ -40,9 +34,6 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio } } - override def sparkConf: SparkConf = super.sparkConf - .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName) - // The test fails for V1 catalog with the error: // org.apache.spark.sql.AnalysisException: // The namespace in session catalog must have exactly one name part: spark_catalog.n1.n2.db diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala index 73776c3ef79fa..2a996c3f4690c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala @@ -18,11 +18,7 @@ package org.apache.spark.sql.hive.execution.command import org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.hive.test.TestHiveSingleton class AlterTableAddPartitionSuite extends v1.AlterTableAddPartitionSuiteBase - with TestHiveSingleton { - override def version: String = "Hive V1" - override def defaultUsing: String = "USING HIVE" -} + with CommandSuiteBase diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala index 9c7d76a0caa08..a4f9ab0b0433c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala @@ -18,12 +18,7 @@ package org.apache.spark.sql.hive.execution.command import org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.hive.test.TestHiveSingleton class AlterTableDropPartitionSuite extends v1.AlterTableDropPartitionSuiteBase - with TestHiveSingleton { - - override def version: String = "Hive V1" - override def defaultUsing: String = "USING HIVE" -} + with CommandSuiteBase diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala new file mode 100644 index 0000000000000..3f603fd6c7ddf --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.hive.test.TestHiveSingleton + +trait CommandSuiteBase extends TestHiveSingleton { + def version: String = "Hive V1" + def catalog: String = CatalogManager.SESSION_CATALOG_NAME + def defaultUsing: String = "USING HIVE" +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala index a92478faf0e16..fa8ac4ccaa089 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala @@ -18,9 +18,5 @@ package org.apache.spark.sql.hive.execution.command import org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.hive.test.TestHiveSingleton -class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with TestHiveSingleton { - override def version: String = "Hive V1" - override def defaultUsing: String = "USING HIVE" -} +class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala index 836f080d28e75..8c00b3fe7f7ca 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala @@ -18,9 +18,5 @@ package org.apache.spark.sql.hive.execution.command import org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.hive.test.TestHiveSingleton -class ShowTablesSuite extends v1.ShowTablesSuiteBase with TestHiveSingleton { - override def version: String = "Hive V1" - override def defaultUsing: String = "USING HIVE" -} +class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase From 205d8e40bc8446c5953c9a082ffaede3029d1d53 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 16 Dec 2020 14:36:38 +0000 Subject: [PATCH 0785/1009] [SPARK-32991][SQL] [FOLLOWUP] Reset command relies on session initials first ### What changes were proposed in this pull request? As a follow-up of https://github.com/apache/spark/pull/30045, we modify the RESET command here to respect the session initial configs per session first then fall back to the `SharedState` conf, which makes each session could maintain a different copy of initial configs for resetting. ### Why are the changes needed? to make reset command saner. ### Does this PR introduce _any_ user-facing change? yes, RESET will respect session initials first not always go to the system defaults ### How was this patch tested? add new tests Closes #30642 from yaooqinn/SPARK-32991-F. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../apache/spark/sql/internal/SQLConf.scala | 25 ++++++- .../org/apache/spark/sql/SparkSession.scala | 2 +- .../sql/execution/command/SetCommand.scala | 12 ++-- .../internal/BaseSessionStateBuilder.scala | 24 ++----- .../spark/sql/internal/SharedState.scala | 7 +- .../spark/sql/SparkSessionBuilderSuite.scala | 71 ++++++++++++++++--- 6 files changed, 104 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fd6a30ac6a81c..3f0fd70a6eae6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -29,7 +29,7 @@ import scala.util.matching.Regex import org.apache.hadoop.fs.Path -import org.apache.spark.{SparkContext, TaskContext} +import org.apache.spark.{SparkConf, SparkContext, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES} @@ -77,6 +77,29 @@ object SQLConf { } } + /** + * Merge all non-static configs to the SQLConf. For example, when the 1st [[SparkSession]] and + * the global [[SharedState]] have been initialized, all static configs have taken affect and + * should not be set to other values. Other later created sessions should respect all static + * configs and only be able to change non-static configs. + */ + private[sql] def mergeNonStaticSQLConfigs( + sqlConf: SQLConf, + configs: Map[String, String]): Unit = { + for ((k, v) <- configs if !staticConfKeys.contains(k)) { + sqlConf.setConfString(k, v) + } + } + + /** + * Extract entries from `SparkConf` and put them in the `SQLConf` + */ + private[sql] def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = { + sparkConf.getAll.foreach { case (k, v) => + sqlConf.setConfString(k, v) + } + } + /** * Default config. Only used when there is no active SparkSession for the thread. * See [[get]] for more information. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 20a2649322ae0..0fada5500edde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -83,7 +83,7 @@ class SparkSession private( @transient private val existingSharedState: Option[SharedState], @transient private val parentSessionState: Option[SessionState], @transient private[sql] val extensions: SparkSessionExtensions, - @transient private val initialSessionOptions: Map[String, String]) + @transient private[sql] val initialSessionOptions: Map[String, String]) extends Serializable with Closeable with Logging { self => // The call site where this SparkSession was constructed. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index 00accedf21556..7d92e6e189fb2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -172,16 +172,18 @@ object SetCommand { case class ResetCommand(config: Option[String]) extends RunnableCommand with IgnoreCachedData { override def run(sparkSession: SparkSession): Seq[Row] = { - val defaults = sparkSession.sharedState.conf + val globalInitialConfigs = sparkSession.sharedState.conf config match { case Some(key) => sparkSession.conf.unset(key) - defaults.getOption(key).foreach(sparkSession.conf.set(key, _)) + sparkSession.initialSessionOptions.get(key) + .orElse(globalInitialConfigs.getOption(key)) + .foreach(sparkSession.conf.set(key, _)) case None => sparkSession.sessionState.conf.clear() - defaults.getAll.foreach { case (k, v) => - sparkSession.sessionState.conf.setConfString(k, v) - } + SQLConf.mergeSparkConf(sparkSession.sessionState.conf, globalInitialConfigs) + SQLConf.mergeNonStaticSQLConfigs(sparkSession.sessionState.conf, + sparkSession.initialSessionOptions) } Seq.empty[Row] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index f51ee11091d02..8fb351a2a3b2b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -16,7 +16,6 @@ */ package org.apache.spark.sql.internal -import org.apache.spark.SparkConf import org.apache.spark.annotation.Unstable import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _} import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveSessionCatalog} @@ -73,15 +72,6 @@ abstract class BaseSessionStateBuilder( */ protected def extensions: SparkSessionExtensions = session.extensions - /** - * Extract entries from `SparkConf` and put them in the `SQLConf` - */ - protected def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = { - sparkConf.getAll.foreach { case (k, v) => - sqlConf.setConfString(k, v) - } - } - /** * SQL-specific key-value configurations. * @@ -92,15 +82,15 @@ abstract class BaseSessionStateBuilder( parentState.map { s => val cloned = s.conf.clone() if (session.sparkContext.conf.get(StaticSQLConf.SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS)) { - mergeSparkConf(cloned, session.sparkContext.conf) + SQLConf.mergeSparkConf(cloned, session.sparkContext.conf) } cloned }.getOrElse { val conf = new SQLConf - mergeSparkConf(conf, session.sparkContext.conf) - options.foreach { - case (k, v) => conf.setConfString(k, v) - } + SQLConf.mergeSparkConf(conf, session.sharedState.conf) + // the later added configs to spark conf shall be respected too + SQLConf.mergeNonStaticSQLConfigs(conf, session.sparkContext.conf.getAll.toMap) + SQLConf.mergeNonStaticSQLConfigs(conf, session.initialSessionOptions) conf } } @@ -374,7 +364,7 @@ private[sql] trait WithTestConf { self: BaseSessionStateBuilder => parentState.map { s => val cloned = s.conf.clone() if (session.sparkContext.conf.get(StaticSQLConf.SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS)) { - mergeSparkConf(conf, session.sparkContext.conf) + SQLConf.mergeSparkConf(conf, session.sparkContext.conf) } cloned }.getOrElse { @@ -386,7 +376,7 @@ private[sql] trait WithTestConf { self: BaseSessionStateBuilder => overrideConfigurations.foreach { case (key, value) => setConfString(key, value) } } } - mergeSparkConf(conf, session.sparkContext.conf) + SQLConf.mergeSparkConf(conf, session.sparkContext.conf) conf } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index fd34077aba963..6018afb0dce46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -67,11 +67,12 @@ private[sql] class SharedState( case (k, _) if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key => logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " + s"in SparkSession's options, it should be set statically for cross-session usages") - case (k, v) => - logDebug(s"Applying initial SparkSession options to SparkConf/HadoopConf: $k -> $v") + case (k, v) if SQLConf.staticConfKeys.contains(k) => + logDebug(s"Applying static initial session options to SparkConf: $k -> $v") confClone.set(k, v) + case (k, v) => + logDebug(s"Applying other initial session options to HadoopConf: $k -> $v") hadoopConfClone.set(k, v) - } (confClone, hadoopConfClone) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala index 1fbce512f976d..e53976854070d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT import org.apache.spark.internal.config.UI.UI_ENABLED import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf._ +import org.apache.spark.util.ThreadUtils /** * Test cases for the builder pattern of [[SparkSession]]. @@ -305,21 +306,18 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { // newly specified values val sharedWH = spark.sharedState.conf.get(wh) val sharedTD = spark.sharedState.conf.get(td) - val sharedCustom = spark.sharedState.conf.get(custom) assert(sharedWH === "./data2", "The warehouse dir in shared state should be determined by the 1st created spark session") assert(sharedTD === "alice", "Static sql configs in shared state should be determined by the 1st created spark session") - assert(sharedCustom === "kyao", - "Dynamic sql configs in shared state should be determined by the 1st created spark session") + assert(spark.sharedState.conf.getOption(custom).isEmpty, + "Dynamic sql configs is session specific") assert(spark.conf.get(wh) === sharedWH, "The warehouse dir in session conf and shared state conf should be consistent") assert(spark.conf.get(td) === sharedTD, "Static sql configs in session conf and shared state conf should be consistent") - assert(spark.conf.get(custom) === sharedCustom, - "Dynamic sql configs in session conf and shared state conf should be consistent before" + - " setting to new ones") + assert(spark.conf.get(custom) === "kyao", "Dynamic sql configs is session specific") spark.sql("RESET") @@ -327,12 +325,65 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { "The warehouse dir in shared state should be respect after RESET") assert(spark.conf.get(td) === sharedTD, "Static sql configs in shared state should be respect after RESET") - assert(spark.conf.get(custom) === sharedCustom, - "Dynamic sql configs in shared state should be respect after RESET") + assert(spark.conf.get(custom) === "kyao", + "Dynamic sql configs in session initial map should be respect after RESET") - val spark2 = SparkSession.builder().getOrCreate() + val spark2 = SparkSession.builder() + .config(wh, "./data3") + .config(custom, "kyaoo").getOrCreate() assert(spark2.conf.get(wh) === sharedWH) assert(spark2.conf.get(td) === sharedTD) - assert(spark2.conf.get(custom) === sharedCustom) + assert(spark2.conf.get(custom) === "kyaoo") + } + + test("SPARK-32991: RESET should work properly with multi threads") { + val wh = "spark.sql.warehouse.dir" + val td = "spark.sql.globalTempDatabase" + val custom = "spark.sql.custom" + val spark = ThreadUtils.runInNewThread("new session 0", false) { + SparkSession.builder() + .master("local") + .config(wh, "./data0") + .config(td, "bob") + .config(custom, "c0") + .getOrCreate() + } + + spark.sql(s"SET $custom=c1") + assert(spark.conf.get(custom) === "c1") + spark.sql("RESET") + assert(spark.conf.get(wh) === "./data0", + "The warehouse dir in shared state should be respect after RESET") + assert(spark.conf.get(td) === "bob", + "Static sql configs in shared state should be respect after RESET") + assert(spark.conf.get(custom) === "c0", + "Dynamic sql configs in shared state should be respect after RESET") + + val spark1 = ThreadUtils.runInNewThread("new session 1", false) { + SparkSession.builder().getOrCreate() + } + + assert(spark === spark1) + + // TODO: SPARK-33718: After clear sessions, the SharedState will be unreachable, then all + // the new static will take effect. + SparkSession.clearDefaultSession() + val spark2 = ThreadUtils.runInNewThread("new session 2", false) { + SparkSession.builder() + .master("local") + .config(wh, "./data1") + .config(td, "alice") + .config(custom, "c2") + .getOrCreate() + } + + assert(spark2 !== spark) + spark2.sql(s"SET $custom=c1") + assert(spark2.conf.get(custom) === "c1") + spark2.sql("RESET") + assert(spark2.conf.get(wh) === "./data1") + assert(spark2.conf.get(td) === "alice") + assert(spark2.conf.get(custom) === "c2") + } } From ddda32b156e4c2e2ba1d1ed37cf34fb2f26d769e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Dec 2020 23:42:34 +0900 Subject: [PATCH 0786/1009] [SPARK-33802][INFRA][FOLLOW-UP] Separate arguments properly for -c option in git command for PySpark coverage ### What changes were proposed in this pull request? This PR proposes to separate arguments properly for `-c` options. Otherwise, the space is considered as its part of argument: ``` Cloning into 'pyspark-coverage-site'... unknown option: -c user.name='Apache Spark Test Account' usage: git [--version] [--help] [-C ] [-c =] [--exec-path[=]] [--html-path] [--man-path] [--info-path] [-p | --paginate | -P | --no-pager] [--no-replace-objects] [--bare] [--git-dir=] [--work-tree=] [--namespace=] [] [error] running git -c user.name='Apache Spark Test Account' -c user.email='sparktestaccgmail.com' commit -am Coverage report at latest commit in Apache Spark ; received return code 129 ``` ### Why are the changes needed? To make the build pass (https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/1728/console). ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? ```python >>> from sparktestsupport.shellutils import run_cmd >>> run_cmd([ ... "git", ... "-c", ... "user.name='Apache Spark Test Account'", ... "-c", ... "user.email='sparktestaccgmail.com'", ... "commit", ... "-am", ... "Coverage report at latest commit in Apache Spark"]) [SPARK-33802-followup 80d2565a511] Coverage report at latest commit in Apache Spark 1 file changed, 1 insertion(+), 1 deletion(-) CompletedProcess(args=['git', '-c', "user.name='Apache Spark Test Account'", '-c', "user.email='sparktestaccgmail.com'", 'commit', '-am', 'Coverage report at latest commit in Apache Spark'], returncode=0) ``` I cannot run e2e test because it requires the env to have Jenkins secret. Closes #30804 from HyukjinKwon/SPARK-33802-followup. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/run-tests.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index e271b4dec6c74..d9d1ac85d5cd9 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -521,8 +521,10 @@ def post_python_tests_results(): # 6. Commit current HTMLs. run_cmd([ "git", - "-c user.name='Apache Spark Test Account'", - "-c user.email='sparktestacc@gmail.com'", + "-c", + "user.name='Apache Spark Test Account'", + "-c", + "user.email='sparktestacc@gmail.com'", "commit", "-am", "Coverage report at latest commit in Apache Spark"]) From 8666d1c39cb6d49e4aa3cd0b9342b82405541aed Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 16 Dec 2020 15:56:50 +0000 Subject: [PATCH 0787/1009] [SPARK-33800][SQL] Remove command name in AnalysisException message when a relation is not resolved ### What changes were proposed in this pull request? Based on the discussion https://github.com/apache/spark/pull/30743#discussion_r543124594, this PR proposes to remove the command name in AnalysisException message when a relation is not resolved. For some of the commands that use `UnresolvedTable`, `UnresolvedView`, and `UnresolvedTableOrView` to resolve an identifier, when the identifier cannot be resolved, the exception will be something like `Table or view not found for 'SHOW TBLPROPERTIES': badtable`. The command name (`SHOW TBLPROPERTIES` in this case) should be dropped to be consistent with other existing commands. ### Why are the changes needed? To make the exception message consistent. ### Does this PR introduce _any_ user-facing change? Yes, the exception message will be changed from ``` Table or view not found for 'SHOW TBLPROPERTIES': badtable ``` to ``` Table or view not found: badtable ``` for commands that use `UnresolvedTable`, `UnresolvedView`, and `UnresolvedTableOrView` to resolve an identifier. ### How was this patch tested? Updated existing tests. Closes #30794 from imback82/remove_cmd_from_exception_msg. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 6 +++--- .../test/resources/sql-tests/results/show_columns.sql.out | 8 ++++---- .../org/apache/spark/sql/StatisticsCollectionSuite.scala | 2 +- .../apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++-- .../org/apache/spark/sql/execution/SQLViewSuite.scala | 4 ++-- .../datasources/v2/jdbc/JDBCTableCatalogSuite.scala | 8 ++++---- .../spark/sql/hive/execution/HiveCommandSuite.scala | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 30467685d75a9..c5a63546c01e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -102,7 +102,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}") case u: UnresolvedTable => - u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") + u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _, _) => u.failAnalysis( @@ -111,12 +111,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { s"$cmd expects a view.") case u: UnresolvedView => - u.failAnalysis(s"View not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") + u.failAnalysis(s"View not found: ${u.multipartIdentifier.quoted}") case u: UnresolvedTableOrView => val viewStr = if (u.allowTempView) "view" else "permanent view" u.failAnalysis( - s"Table or $viewStr not found for '${u.commandName}': ${u.multipartIdentifier.quoted}") + s"Table or $viewStr not found: ${u.multipartIdentifier.quoted}") case u: UnresolvedRelation => u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out index 851e848ed4ec6..3535b30d29c44 100644 --- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out @@ -94,7 +94,7 @@ SHOW COLUMNS IN badtable FROM showdb struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found for 'SHOW COLUMNS': showdb.badtable; line 1 pos 0 +Table or view not found: showdb.badtable; line 1 pos 0 -- !query @@ -130,7 +130,7 @@ SHOW COLUMNS IN showdb.showcolumn3 struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0 +Table or view not found: showdb.showcolumn3; line 1 pos 0 -- !query @@ -139,7 +139,7 @@ SHOW COLUMNS IN showcolumn3 FROM showdb struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0 +Table or view not found: showdb.showcolumn3; line 1 pos 0 -- !query @@ -148,7 +148,7 @@ SHOW COLUMNS IN showcolumn4 struct<> -- !query output org.apache.spark.sql.AnalysisException -Table or view not found for 'SHOW COLUMNS': showcolumn4; line 1 pos 0 +Table or view not found: showcolumn4; line 1 pos 0 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 3fc679f6b9fc7..3b53a5324445b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -542,7 +542,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared val errMsg1 = intercept[AnalysisException] { sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id") }.getMessage - assert(errMsg1.contains("Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': " + + assert(errMsg1.contains("Table or view not found: " + s"$globalTempDB.gTempView")) // Analyzes in a global temporary view sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 638f06d618833..b49a692d26173 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -731,7 +731,7 @@ class DataSourceV2SQLSuite val ex = intercept[AnalysisException] { sql("DROP TABLE testcat.db.notbl") } - assert(ex.getMessage.contains("Table or view not found for 'DROP TABLE': testcat.db.notbl")) + assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl")) sql("DROP TABLE IF EXISTS testcat.db.notbl") } @@ -2015,7 +2015,7 @@ class DataSourceV2SQLSuite sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view") } assert(e.getMessage.contains( - "Table or view not found for 'ALTER VIEW ... RENAME TO': testcat.ns.tbl")) + "Table or view not found: testcat.ns.tbl")) } test("ANALYZE TABLE") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 1a248fc18988a..586b31643049f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -452,11 +452,11 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { test("should not allow ALTER VIEW AS when the view does not exist") { assertAnalysisError( "ALTER VIEW testView AS SELECT 1, 2", - "View not found for 'ALTER VIEW ... AS': testView") + "View not found: testView") assertAnalysisError( "ALTER VIEW default.testView AS SELECT 1, 2", - "View not found for 'ALTER VIEW ... AS': default.testView") + "View not found: default.testView") } test("ALTER VIEW AS should try to alter temp view first if view name has no database part") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala index e764f71867426..2fd976e0b9e17 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala @@ -81,9 +81,9 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people"))) Seq( "h2.test.not_existing_table" -> - "Table or view not found for 'DROP TABLE': h2.test.not_existing_table", + "Table or view not found: h2.test.not_existing_table", "h2.bad_test.not_existing_table" -> - "Table or view not found for 'DROP TABLE': h2.bad_test.not_existing_table" + "Table or view not found: h2.bad_test.not_existing_table" ).foreach { case (table, expectedMsg) => val msg = intercept[AnalysisException] { sql(s"DROP TABLE $table") @@ -110,12 +110,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession { sql("ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table") } assert(exp1.getMessage.contains( - "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.test.not_existing_table")) + "Table or view not found: h2.test.not_existing_table")) val exp2 = intercept[AnalysisException] { sql("ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table") } assert(exp2.getMessage.contains( - "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.bad_test.not_existing_table")) + "Table or view not found: h2.bad_test.not_existing_table")) // Rename to an existing table withTable("h2.test.dst_table") { withConnection { conn => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala index 4feb970ea6f1a..d3398842afb21 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala @@ -137,7 +137,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto val message = intercept[AnalysisException] { sql("SHOW TBLPROPERTIES badtable") }.getMessage - assert(message.contains("Table or view not found for 'SHOW TBLPROPERTIES': badtable")) + assert(message.contains("Table or view not found: badtable")) // When key is not found, a row containing the error is returned. checkAnswer( From 3d0323401f7a3e4369a3d3f4ff98f15d19e8a643 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 16 Dec 2020 08:34:22 -0800 Subject: [PATCH 0788/1009] [SPARK-33810][TESTS] Reenable test cases disabled in SPARK-31732 ### What changes were proposed in this pull request? The test failures were due to machine being slow in Jenkins. We switched to Ubuntu 20 if I am not wrong. Looks like all machines are functioning properly unlike the past, and the tests pass without a problem anymore. This PR proposes to enable them back. ### Why are the changes needed? To restore test coverage. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Jenkins jobs in this PR show the flakiness. Closes #30798 from HyukjinKwon/do-not-merge-test. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- .../spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala | 3 +-- .../org/apache/spark/sql/kafka010/KafkaRelationSuite.scala | 3 +-- .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala | 6 ++---- .../org/apache/spark/streaming/StreamingContextSuite.scala | 3 +-- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index f2be8475151e3..62ba459070c2b 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -349,8 +349,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { ) } - // TODO (SPARK-31731): re-enable it - ignore("subscribing topic by pattern with topic deletions") { + test("subscribing topic by pattern with topic deletions") { val topicPrefix = newTopic() val topic = topicPrefix + "-seems" val topic2 = topicPrefix + "-bad" diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala index 6e9d8de9fa5be..9cec37e708dbb 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala @@ -174,8 +174,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession ("3", Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8))))).toDF) } - // TODO (SPARK-31729): re-enable it - ignore("timestamp provided for starting and ending") { + test("timestamp provided for starting and ending") { val (topic, timestamps) = prepareTimestampRelatedUnitTest // timestamp both presented: starting "first" ending "finalized" diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala index 72cf3e8118228..2b7fef1e0fde3 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala @@ -332,8 +332,7 @@ class DirectKafkaStreamSuite } // Test to verify the offset ranges can be recovered from the checkpoints - // TODO (SPARK-31722): re-enable it - ignore("offset recovery") { + test("offset recovery") { val topic = "recovery" kafkaTestUtils.createTopic(topic) testDir = Utils.createTempDir() @@ -420,8 +419,7 @@ class DirectKafkaStreamSuite } // Test to verify the offsets can be recovered from Kafka - // TODO (SPARK-31722): re-enable it - ignore("offset recovery from kafka") { + test("offset recovery from kafka") { val topic = "recoveryfromkafka" kafkaTestUtils.createTopic(topic) diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala index 4eff464dcdafb..1d6637861511f 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala @@ -293,8 +293,7 @@ class StreamingContextSuite } } - // TODO (SPARK-31728): re-enable it - ignore("stop gracefully") { + test("stop gracefully") { val conf = new SparkConf().setMaster(master).setAppName(appName) conf.set("spark.dummyTimeConfig", "3600s") val sc = new SparkContext(conf) From 728a1298afa78c6acd7cdc4c21ee441120c34716 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 16 Dec 2020 14:09:28 -0800 Subject: [PATCH 0789/1009] [SPARK-33806][SQL] limit partition num to 1 when distributing by foldable expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? It seems a very popular way that people use DISTRIBUTE BY clause with a literal to coalesce partition in the pure SQL data processing. For example ``` insert into table src select * from values (1), (2), (3) t(a) distribute by 1 ``` Users may want the final output to be one single data file, but if the reality is not always true. Spark will always create a file for partition 0 whether it contains data or not, so when the data all goes to a partition(IDX >0), there will be always 2 files there and the part-00000 is empty. On the other hand, a lot of empty tasks will be launched too, this is unnecessary. When users repeat the insert statement daily, hourly, or minutely, it causes small file issues. ``` spark-sql> set spark.sql.shuffle.partitions=3;drop table if exists test2;create table test2 using parquet as select * from values (1), (2), (3) t(a) distribute by 1; kentyaohulk  ~/spark   SPARK-33806  tree /Users/kentyao/Downloads/spark/spark-3.1.0-SNAPSHOT-bin-20201202/spark-warehouse/test2/ -s /Users/kentyao/Downloads/spark/spark-3.1.0-SNAPSHOT-bin-20201202/spark-warehouse/test2/ ├── [ 0] _SUCCESS ├── [ 298] part-00000-5dc19733-9405-414b-9681-d25c4d3e9ee6-c000.snappy.parquet └── [ 426] part-00001-5dc19733-9405-414b-9681-d25c4d3e9ee6-c000.snappy.parquet ``` To avoid this, there are some options you can take. 1. use `distribute by null`, let the data go to the partition 0 2. set spark.sql.adaptive.enabled to true for Spark to automatically coalesce 3. using hints instead of `distribute by` 4. set spark.sql.shuffle.partitions to 1 In this PR, we set the partition number to 1 in this particular case. ### Why are the changes needed? 1. avoid small file issues 2. avoid unnecessary empty tasks when no adaptive execution ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #30800 from yaooqinn/SPARK-33806. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../plans/logical/basicLogicalOperators.scala | 11 ++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 15 ++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 91fb77574a0ca..8c111aa750809 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1017,7 +1017,16 @@ case class RepartitionByExpression( child: LogicalPlan, optNumPartitions: Option[Int]) extends RepartitionOperation { - val numPartitions = optNumPartitions.getOrElse(SQLConf.get.numShufflePartitions) + val numPartitions = if (optNumPartitions.nonEmpty) { + optNumPartitions.get + } else { + if (partitionExpressions.forall(_.foldable)) { + 1 + } else { + SQLConf.get.numShufflePartitions + } + } + require(numPartitions > 0, s"Number of partitions ($numPartitions) must be positive.") val partitioning: Partitioning = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ebfe8bdd7a749..112b1a7210cb4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial} import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite} -import org.apache.spark.sql.catalyst.plans.logical.Project +import org.apache.spark.sql.catalyst.plans.logical.{Project, RepartitionByExpression} import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} @@ -3732,6 +3732,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(sql("SELECT s LIKE 'm@@ca' ESCAPE '@' FROM df"), Row(true)) } } + + test("limit partition num to 1 when distributing by foldable expressions") { + withSQLConf((SQLConf.SHUFFLE_PARTITIONS.key, "5")) { + Seq(1, "1, 2", null, "version()").foreach { expr => + val plan = sql(s"select * from values (1), (2), (3) t(a) distribute by $expr") + .queryExecution.optimizedPlan + val res = plan.collect { + case r: RepartitionByExpression if r.numPartitions == 1 => true + } + assert(res.nonEmpty) + } + } + } } case class Foo(bar: Option[String]) From e7e29fd0affe81a24959ecc0286ec4c85f319722 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 16 Dec 2020 14:13:02 -0800 Subject: [PATCH 0790/1009] [SPARK-33514][SQL][FOLLOW-UP] Remove unused TruncateTableStatement case class ### What changes were proposed in this pull request? This PR removes unused `TruncateTableStatement`: https://github.com/apache/spark/pull/30457#discussion_r544433820 ### Why are the changes needed? To remove unused `TruncateTableStatement` from #30457. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Not needed. Closes #30811 from imback82/remove_truncate_table_stmt. Authored-by: Terry Kim Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/plans/logical/statements.scala | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index c8395f375b4ed..58776f549d817 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -354,13 +354,6 @@ case class CreateNamespaceStatement( */ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement -/** - * A TRUNCATE TABLE statement, as parsed from SQL - */ -case class TruncateTableStatement( - tableName: Seq[String], - partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement - /** * A SHOW CURRENT NAMESPACE statement, as parsed from SQL */ From 477046c63fab281570d26a183be4b0b8b77ac41a Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 16 Dec 2020 15:06:25 -0800 Subject: [PATCH 0791/1009] [SPARK-33775][BUILD] Suppress sbt compilation warnings in Scala 2.13 ### What changes were proposed in this pull request? There are too many compilation warnings in Scala 2.13, this pr add some `-Wconf:msg= regexes` rules to `SparkBuild.scala` to suppress compilation warnings and the suppressed will not be printed to the console. The suppressed compilation warnings includes: - All warnings related to `method\value\type\object\trait\inheritance` deprecated since 2.13 - All warnings related to `Widening conversion from XXX to YYY is deprecated because it loses precision` - Auto-application to `()` is deprecated. Supply the empty argument list `()` explicitly to invoke method `methodName`, or remove the empty argument list from its definition (Java-defined methods are exempt).In Scala 3, an unapplied method like this will be eta-expanded into a function. - method with a single empty parameter list overrides method without any parameter list - method without a parameter list overrides a method with a single empty one Not suppressed compilation warnings includes: - Unicode escapes in triple quoted strings are deprecated, use the literal character instead. - view bounds are deprecated - symbol literal is deprecated ### Why are the changes needed? Suppress unimportant compilation warnings in Scala 2.13 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30760 from LuciferYang/SPARK-33775. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a28c2b55b3789..3098060478f40 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -238,7 +238,15 @@ object SparkBuild extends PomBuild { "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv", "-Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv", "-Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite..futureAction:wv", - "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv" + "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv", + // SPARK-33775 Suppress compilation warnings that contain the following contents. + // TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after + // fixed. + "-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s", + "-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s", + "-Wconf:msg=Auto-application to \\`\\(\\)\\` is deprecated:s", + "-Wconf:msg=method with a single empty parameter list overrides method without any parameter list:s", + "-Wconf:msg=method without a parameter list overrides a method with a single empty one:s" ) } } From 0c129001201ccb63ae96f576b6f354da84024fb3 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 17 Dec 2020 11:36:31 +0900 Subject: [PATCH 0792/1009] [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader ### What changes were proposed in this pull request? `FsHistoryProvider#checkForLogs` already has `FileStatus` when constructing `SingleFileEventLogFileReader`, and there is no need to get the `FileStatus` again when `SingleFileEventLogFileReader#fileSizeForLastIndex`. ### Why are the changes needed? This can reduce a lot of rpc calls and improve the speed of the history server. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist ut Closes #30780 from cxzl25/SPARK-33790. Authored-by: sychen Signed-off-by: Jungtaek Lim --- .../apache/spark/deploy/history/EventLogFileReaders.scala | 7 ++++--- project/MimaExcludes.scala | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala index 9f63a6441a838..5a34f0b71edef 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala @@ -116,7 +116,7 @@ object EventLogFileReader { def apply(fs: FileSystem, status: FileStatus): Option[EventLogFileReader] = { if (isSingleEventLog(status)) { - Some(new SingleFileEventLogFileReader(fs, status.getPath)) + Some(new SingleFileEventLogFileReader(fs, status.getPath, Option(status))) } else if (isRollingEventLogs(status)) { Some(new RollingEventLogFilesFileReader(fs, status.getPath)) } else { @@ -166,8 +166,9 @@ object EventLogFileReader { */ class SingleFileEventLogFileReader( fs: FileSystem, - path: Path) extends EventLogFileReader(fs, path) { - private lazy val status = fileSystem.getFileStatus(rootPath) + path: Path, + maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, path) { + private lazy val status = maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath)) override def lastIndex: Option[Long] = None diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 33e65c9def41b..8f47d51799dd5 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -109,7 +109,10 @@ object MimaExcludes { ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"), // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this") + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"), + + // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this") ) // Exclude rules for 3.0.x From 0c19497222c26818ecdde527601c12c757acb4ad Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Thu, 17 Dec 2020 05:25:51 +0000 Subject: [PATCH 0793/1009] [SPARK-33815][SQL] Migrate ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `ALTER TABLE ... SET [SERDE|SERDEPROPERTIES` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]` is not supported for v2 tables. ### Why are the changes needed? The PR makes the resolution consistent behavior consistent. For example, ```scala sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("ALTER TABLE t SET SERDE 'serdename'") // works fine ``` , but after this PR: ``` sql("ALTER TABLE t SET SERDE 'serdename'") org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES\' expects a table; line 1 pos 0 ``` , which is the consistent behavior with other commands. ### Does this PR introduce _any_ user-facing change? After this PR, `t` in the above example is resolved to a temp view first instead of `spark_catalog.test.t`. ### How was this patch tested? Updated existing tests. Closes #30813 from imback82/alter_table_serde_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 8 +++-- .../catalyst/plans/logical/statements.scala | 9 ----- .../catalyst/plans/logical/v2Commands.scala | 11 ++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 34 +++++++++++-------- .../analysis/ResolveSessionCatalog.scala | 9 +++-- .../datasources/v2/DataSourceV2Strategy.scala | 4 +++ .../sql/connector/DataSourceV2SQLSuite.scala | 3 +- .../spark/sql/execution/SQLViewSuite.scala | 12 +++++-- .../sql/hive/execution/HiveDDLSuite.scala | 13 ++++--- 9 files changed, 66 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 426dff343818b..94589688953d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3806,7 +3806,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create an [[AlterTableSerDePropertiesStatement]] + * Create an [[AlterTableSerDeProperties]] * * For example: * {{{ @@ -3816,8 +3816,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) { - AlterTableSerDePropertiesStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), + AlterTableSerDeProperties( + UnresolvedTable( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), Option(ctx.STRING).map(string), Option(ctx.tablePropertyList).map(visitPropertyKeyValues), // TODO a partition spec is allowed to have optional values. This is currently violated. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 58776f549d817..59239f6e041a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -300,15 +300,6 @@ case class AlterTableRenamePartitionStatement( from: TablePartitionSpec, to: TablePartitionSpec) extends ParsedStatement -/** - * ALTER TABLE ... SERDEPROPERTIES command, as parsed from SQL - */ -case class AlterTableSerDePropertiesStatement( - tableName: Seq[String], - serdeClassName: Option[String], - serdeProperties: Option[Map[String, String]], - partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement - /** * An INSERT INTO statement, as parsed from SQL. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index d13ad977910d9..fa67d311c39c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -777,6 +777,17 @@ case class AlterViewUnsetProperties( override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] command. + */ +case class AlterTableSerDeProperties( + child: LogicalPlan, + serdeClassName: Option[String], + serdeProperties: Option[Map[String, String]], + partitionSpec: Option[TablePartitionSpec]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the CACHE TABLE command. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 9bea6517156ae..5eb0c9a39f1e6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2134,8 +2134,11 @@ class DDLParserSuite extends AnalysisTest { test("alter table: SerDe properties") { val sql1 = "ALTER TABLE table_name SET SERDE 'org.apache.class'" val parsed1 = parsePlan(sql1) - val expected1 = AlterTableSerDePropertiesStatement( - Seq("table_name"), Some("org.apache.class"), None, None) + val expected1 = AlterTableSerDeProperties( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), + Some("org.apache.class"), + None, + None) comparePlans(parsed1, expected1) val sql2 = @@ -2144,8 +2147,8 @@ class DDLParserSuite extends AnalysisTest { |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed2 = parsePlan(sql2) - val expected2 = AlterTableSerDePropertiesStatement( - Seq("table_name"), + val expected2 = AlterTableSerDeProperties( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), Some("org.apache.class"), Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None) @@ -2157,8 +2160,11 @@ class DDLParserSuite extends AnalysisTest { |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed3 = parsePlan(sql3) - val expected3 = AlterTableSerDePropertiesStatement( - Seq("table_name"), None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None) + val expected3 = AlterTableSerDeProperties( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), + None, + Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), + None) comparePlans(parsed3, expected3) val sql4 = @@ -2168,8 +2174,8 @@ class DDLParserSuite extends AnalysisTest { |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed4 = parsePlan(sql4) - val expected4 = AlterTableSerDePropertiesStatement( - Seq("table_name"), + val expected4 = AlterTableSerDeProperties( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), Some("org.apache.class"), Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) @@ -2181,8 +2187,8 @@ class DDLParserSuite extends AnalysisTest { |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed5 = parsePlan(sql5) - val expected5 = AlterTableSerDePropertiesStatement( - Seq("table_name"), + val expected5 = AlterTableSerDeProperties( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) @@ -2194,8 +2200,8 @@ class DDLParserSuite extends AnalysisTest { |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed6 = parsePlan(sql6) - val expected6 = AlterTableSerDePropertiesStatement( - Seq("a", "b", "c"), + val expected6 = AlterTableSerDeProperties( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), Some("org.apache.class"), Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None) @@ -2207,8 +2213,8 @@ class DDLParserSuite extends AnalysisTest { |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed7 = parsePlan(sql7) - val expected7 = AlterTableSerDePropertiesStatement( - Seq("a", "b", "c"), + val expected7 = AlterTableSerDeProperties( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 802068de10d16..1426d28cbbf88 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -476,10 +476,13 @@ class ResolveSessionCatalog( purge, retainData = false) - case AlterTableSerDePropertiesStatement(tbl, serdeClassName, serdeProperties, partitionSpec) => - val v1TableName = parseV1Table(tbl, "ALTER TABLE SerDe Properties") + case AlterTableSerDeProperties( + ResolvedV1TableIdentifier(ident), + serdeClassName, + serdeProperties, + partitionSpec) => AlterTableSerDePropertiesCommand( - v1TableName.asTableIdentifier, + ident.asTableIdentifier, serdeClassName, serdeProperties, partitionSpec) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 120fa5288dda9..f9c89051e421a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -334,6 +334,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException( "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.") + case AlterTableSerDeProperties(_: ResolvedTable, _, _, _) => + throw new AnalysisException( + "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables.") + case LoadData(_: ResolvedTable, _, _, _, _) => throw new AnalysisException("LOAD DATA is not supported for v2 tables.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b49a692d26173..b335dc31a3037 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2120,7 +2120,8 @@ class DataSourceV2SQLSuite val e = intercept[AnalysisException] { sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')") } - assert(e.message.contains("ALTER TABLE SerDe Properties is only supported with v1 tables")) + assert(e.message.contains( + "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 586b31643049f..6d65fddb1be62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -140,9 +140,15 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { val viewName = "testView" withTempView(viewName) { spark.range(10).createTempView(viewName) - assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'") - assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'") - assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')") + assertAnalysisError( + s"ALTER TABLE $viewName SET SERDE 'whatever'", + s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table") + assertAnalysisError( + s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'", + s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table") + assertAnalysisError( + s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')", + s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table") assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')") assertAnalysisError( s"ALTER TABLE $viewName RECOVER PARTITIONS", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f8a5c7f57eec5..aac4b88d9e3f8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -888,12 +888,17 @@ class HiveDDLSuite assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET LOCATION '/path/to/home'") - assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET SERDE 'whatever'") + assertAnalysisError( + s"ALTER TABLE $oldViewName SET SERDE 'whatever'", + s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.") - assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET SERDEPROPERTIES ('x' = 'y')") + assertAnalysisError( + s"ALTER TABLE $oldViewName SET SERDEPROPERTIES ('x' = 'y')", + s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.") - assertErrorForAlterTableOnView( - s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')") + assertAnalysisError( + s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')", + s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.") assertAnalysisError( s"ALTER TABLE $oldViewName RECOVER PARTITIONS", From 1e85707738a830d33598ca267a6740b3f06b1861 Mon Sep 17 00:00:00 2001 From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Date: Thu, 17 Dec 2020 05:47:44 +0000 Subject: [PATCH 0794/1009] [SPARK-33697][SQL] RemoveRedundantProjects should require column ordering by default ### What changes were proposed in this pull request? This PR changes the rule `RemoveRedundantProjects` from by default passing column ordering requirements from parent nodes to always require column orders regardless of the requirements from parent nodes unless otherwise specified. More specifically, instead of excluding a few nodes like GenerateExec, UnionExec that are known to require children columns to be ordered, the rule now includes a whitelist of nodes that allow passing through the ordering requirements from their parents. ### Why are the changes needed? Currently, this rule passes through ordering requirements from parents directly to children except for a few excluded nodes. This incorrectly removes the necessary project nodes below a UnionExec since it is not excluded. An earlier PR also fixed a similar issue for GenerateExec (SPARK-32861). In order to prevent similar issues, the rule should be changed to always require column ordering except for a few specific nodes that we know for sure can pass through the requirements. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests Closes #30659 from allisonwang-db/spark-33697-remove-project-union. Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com> Signed-off-by: Wenchen Fan --- .../execution/RemoveRedundantProjects.scala | 24 +++++++--- .../RemoveRedundantProjectsSuite.scala | 47 +++++++++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala index bbe3f50492d9f..bfb6e805c0541 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, PartialMerge} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.aggregate.BaseAggregateExec import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase +import org.apache.spark.sql.execution.joins.BaseJoinExec +import org.apache.spark.sql.execution.window.WindowExec import org.apache.spark.sql.internal.SQLConf /** @@ -61,15 +63,25 @@ object RemoveRedundantProjects extends Rule[SparkPlan] { val keepOrdering = a.aggregateExpressions .exists(ae => ae.mode.equals(Final) || ae.mode.equals(PartialMerge)) a.mapChildren(removeProject(_, keepOrdering)) - // GenerateExec requires column ordering since it binds input rows directly with its - // requiredChildOutput without using child's output schema. - case g: GenerateExec => g.mapChildren(removeProject(_, true)) - // JoinExec ordering requirement will inherit from its parent. If there is no ProjectExec in - // its ancestors, JoinExec should require output columns to be ordered. - case o => o.mapChildren(removeProject(_, requireOrdering)) + case o => + val required = if (canPassThrough(o)) requireOrdering else true + o.mapChildren(removeProject(_, requireOrdering = required)) } } + /** + * Check if the given node can pass the ordering requirement from its parent. + */ + private def canPassThrough(plan: SparkPlan): Boolean = plan match { + case _: FilterExec => true + // JoinExec ordering requirement should inherit from its parent. If there is no ProjectExec in + // its ancestors, JoinExec should require output columns to be ordered, and vice versa. + case _: BaseJoinExec => true + case _: WindowExec => true + case _: ExpandExec => true + case _ => false + } + /** * Check if the nullability change is positive. It catches the case when the project output * attribute is not nullable, but the child output attribute is nullable. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala index 2de9d21abca82..040c5189abcb6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala @@ -166,6 +166,53 @@ abstract class RemoveRedundantProjectsSuiteBase assertProjectExec(query, 0, 1) } } + + test("SPARK-33697: UnionExec should require column ordering") { + withTable("t1", "t2") { + spark.range(-10, 20) + .selectExpr( + "id", + "date_add(date '1950-01-01', cast(id as int)) as datecol", + "cast(id as string) strcol") + .write.mode("overwrite").format("parquet").saveAsTable("t1") + spark.range(-10, 20) + .selectExpr( + "cast(id as string) strcol", + "id", + "date_add(date '1950-01-01', cast(id as int)) as datecol") + .write.mode("overwrite").format("parquet").saveAsTable("t2") + + val queryTemplate = + """ + |SELECT DISTINCT datecol, strcol FROM + |( + |(SELECT datecol, id, strcol from t1) + | %s + |(SELECT datecol, id, strcol from t2) + |) + |""".stripMargin + + Seq(("UNION", 2, 2), ("UNION ALL", 1, 2)).foreach { case (setOperation, enabled, disabled) => + val query = queryTemplate.format(setOperation) + assertProjectExec(query, enabled = enabled, disabled = disabled) + } + } + } + + test("SPARK-33697: remove redundant projects under expand") { + val query = + """ + |SELECT t1.key, t2.key, sum(t1.a) AS s1, sum(t2.b) AS s2 FROM + |(SELECT a, key FROM testView) t1 + |JOIN + |(SELECT b, key FROM testView) t2 + |ON t1.key = t2.key + |GROUP BY t1.key, t2.key GROUPING SETS(t1.key, t2.key) + |ORDER BY t1.key, t2.key, s1, s2 + |LIMIT 10 + |""".stripMargin + assertProjectExec(query, 0, 3) + } } class RemoveRedundantProjectsSuite extends RemoveRedundantProjectsSuiteBase From b1950cc9162999c2200a0a988fa28aee640fb459 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 17 Dec 2020 15:49:40 +0900 Subject: [PATCH 0795/1009] [SPARK-33821][BUILD] Upgrade SBT to 1.4.5 ### What changes were proposed in this pull request? This PR aims to upgrade SBT to 1.4.5 to support Apple Silicon. ### Why are the changes needed? The following is the release note including `sbt 1.4.5 adds support for Apple silicon (AArch64 also called ARM64)`. - https://github.com/sbt/sbt/releases/tag/v1.4.5 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30817 from dongjoon-hyun/SPARK-33821. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index c92de941c10be..35ee6fea6d336 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.4.4 +sbt.version=1.4.5 From ed09673fb941830c15e5e5ad748be9de4755935c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 17 Dec 2020 15:51:04 +0900 Subject: [PATCH 0796/1009] [SPARK-33819][CORE] SingleFileEventLogFileReader/RollingEventLogFilesFileReader should be `package private` ### What changes were proposed in this pull request? This PR aims to convert `EventLogFileReader`'s derived classes into `package private`. - SingleFileEventLogFileReader - RollingEventLogFilesFileReader `EventLogFileReader` itself is used in `scheduler` module during tests. ### Why are the changes needed? This classes were designed to be internal. This PR hides it explicitly to reduce the maintenance burden. ### Does this PR introduce _any_ user-facing change? Yes, but these were exposed accidentally. ### How was this patch tested? Pass CIs. Closes #30814 from dongjoon-hyun/SPARK-33790. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- .../apache/spark/deploy/history/EventLogFileReaders.scala | 8 +++++--- project/MimaExcludes.scala | 5 +---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala index 5a34f0b71edef..b4771c80a175f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala @@ -164,12 +164,14 @@ object EventLogFileReader { * FileNotFoundException could occur if the log file is renamed before getting the * status of log file. */ -class SingleFileEventLogFileReader( +private[history] class SingleFileEventLogFileReader( fs: FileSystem, path: Path, - maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, path) { + maybeStatus: Option[FileStatus]) extends EventLogFileReader(fs, path) { private lazy val status = maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath)) + def this(fs: FileSystem, path: Path) = this(fs, path, None) + override def lastIndex: Option[Long] = None override def fileSizeForLastIndex: Long = status.getLen @@ -204,7 +206,7 @@ class SingleFileEventLogFileReader( * This reader lists the files only once; if caller would like to play with updated list, * it needs to create another reader instance. */ -class RollingEventLogFilesFileReader( +private[history] class RollingEventLogFilesFileReader( fs: FileSystem, path: Path) extends EventLogFileReader(fs, path) { import RollingEventLogFilesWriter._ diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 8f47d51799dd5..33e65c9def41b 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -109,10 +109,7 @@ object MimaExcludes { ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"), // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"), - - // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this") + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this") ) // Exclude rules for 3.0.x From 12b69cc27caa476a9a29844f8d096f08263ba6ef Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 17 Dec 2020 17:20:45 +0900 Subject: [PATCH 0797/1009] [SPARK-26199][SPARK-31517][R] Fix strategy for handling ... names in mutate ### What changes were proposed in this pull request? Change the strategy for how the varargs are handled in the default `mutate` method ### Why are the changes needed? Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff` ### Does this PR introduce any user-facing change? Yes, bugfix. Shouldn't change any working code. ### How was this patch tested? None! yet. Closes #28386 from MichaelChirico/r-mutate-deparse. Lead-authored-by: Michael Chirico Co-authored-by: Michael Chirico Signed-off-by: HyukjinKwon --- R/pkg/R/DataFrame.R | 18 ++++++++++-------- R/pkg/tests/fulltests/test_sparkSQL.R | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 31a651ea1279b..8ca338f09969b 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2277,16 +2277,17 @@ setMethod("mutate", # For named arguments, use the names for arguments as the column names # For unnamed arguments, use the argument symbols as the column names - args <- sapply(substitute(list(...))[-1], deparse) ns <- names(cols) - if (!is.null(ns)) { - lapply(seq_along(args), function(i) { - if (ns[[i]] != "") { - args[[i]] <<- ns[[i]] - } + if (is.null(ns)) ns <- rep("", length(cols)) + named_idx <- nzchar(ns) + if (!all(named_idx)) { + # SPARK-31517: deparse uses width.cutoff on wide input and the + # output is length>1, so need to collapse it to scalar + colsub <- substitute(list(...))[-1L] + ns[!named_idx] <- sapply(which(!named_idx), function(ii) { + paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ") }) } - ns <- args # The last column of the same name in the specific columns takes effect deDupCols <- list() @@ -3444,7 +3445,8 @@ setMethod("as.data.frame", #' @note attach since 1.6.0 setMethod("attach", signature(what = "SparkDataFrame"), - function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE), + function(what, pos = 2L, + name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "), warn.conflicts = TRUE) { args <- as.list(environment()) # capture all parameters - this must be the first line newEnv <- assignNewEnv(args$what) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c623f534f706c..ebf08b9559379 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", { expect_equal(nrow(result), 153) expect_equal(ncol(result), 2) detach(airquality) + + # ensure long inferred names are handled without error (SPARK-26199) + # test implicitly assumes eval(formals(deparse)$width.cutoff) = 60 + # (which has always been true as of 2020-11-15) + newDF <- mutate( + df, + df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234 + ) + expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE) }) test_that("read/write ORC files", { @@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", { stat3 <- summary(df[, "age", drop = F]) expect_equal(collect(stat3)[8, "age"], "30") expect_error(age) + + # attach method uses deparse(); ensure no errors from a very long input + abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint + attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop) + expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search()))) + detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop") }) test_that("with() on a DataFrame", { From 34e4d87023535c086a0aa43fe194f794b41e09b7 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Thu, 17 Dec 2020 08:52:09 -0600 Subject: [PATCH 0798/1009] [SPARK-33774][UI][CORE] Back to Master" returns 500 error in Standalone cluster ### What changes were proposed in this pull request? Initiate the `masterWebUiUrl` with the `webUi. webUrl` instead of the `masterPublicAddress`. ### Why are the changes needed? Since [SPARK-21642](https://issues.apache.org/jira/browse/SPARK-21642), `WebUI` has changed from `localHostName` to `localCanonicalHostName` as the hostname to set up the web UI. However, the `masterPublicAddress` is from `RpcEnv`'s host address, which still uses `localHostName`. As a result, it returns the wrong Master web URL to the Worker. ### Does this PR introduce _any_ user-facing change? Yes, when users click "Back to Master" in the Worker page: Before this PR: WeChat4acbfd163f51c76a5f9bc388c7479785 After this PR: ![image](https://user-images.githubusercontent.com/16397174/102058016-d438b700-3e29-11eb-8641-a23a6b2f542e.png) (Return to the Master page successfully.) ### How was this patch tested? Tested manually. Closes #30759 from Ngone51/fix-back-to-master. Authored-by: yi.wu Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index cccd3da323774..9f1b36ad1c8c1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -143,7 +143,7 @@ private[deploy] class Master( logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}") webUi = new MasterWebUI(this, webUiPort) webUi.bind() - masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}" + masterWebUiUrl = webUi.webUrl if (reverseProxy) { val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/")) if (uiReverseProxyUrl.nonEmpty) { From 8c81cf7d71baf34dfafe54835a90cc19e7293561 Mon Sep 17 00:00:00 2001 From: suqilong Date: Thu, 17 Dec 2020 08:56:45 -0600 Subject: [PATCH 0799/1009] [SPARK-22769] Do not log rpc post message error when sparkEnv is already stopped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? When driver stopping, pending rpc requests will cause error like: > 17/12/12 18:30:16 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message. org.apache.spark.SparkException: Could not find CoarseGrainedScheduler. at org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:154) at org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134) at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570) at org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:180) at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:109) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:119) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:51) Or like: > 17/12/12 18:20:44 INFO MemoryStore: MemoryStore cleared 17/12/12 18:20:44 INFO BlockManager: BlockManager stopped 17/12/12 18:20:44 INFO BlockManagerMaster: BlockManagerMaster stopped 17/12/12 18:20:44 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message. org.apache.spark.rpc.RpcEnvStoppedException: RpcEnv already stopped. at org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:152) at org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134) at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570) These are because CoarseGrainedScheduler and rpcEnv are already stopped, they're not error. The related issue SPARK-22769 was opened on 2017, but the author didn't finish the pull request, so reopen this issue. ### How was this patch tested? Existing tests Closes #30658 from sqlwindspeaker/donot-log-rpc-error. Authored-by: suqilong Signed-off-by: Sean Owen --- .../scala/org/apache/spark/rpc/netty/Dispatcher.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala index 4a9f551646fc7..14198743c4801 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import scala.concurrent.Promise import scala.util.control.NonFatal -import org.apache.spark.SparkException +import org.apache.spark.{SparkEnv, SparkException} import org.apache.spark.internal.Logging import org.apache.spark.network.client.RpcResponseCallback import org.apache.spark.rpc._ @@ -147,13 +147,15 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte /** Posts a one-way message. */ def postOneWayMessage(message: RequestMessage): Unit = { postMessage(message.receiver.name, OneWayMessage(message.senderAddress, message.content), - (e) => e match { + { // SPARK-31922: in local cluster mode, there's always a RpcEnvStoppedException when // stop is called due to some asynchronous message handling. We catch the exception // and log it at debug level to avoid verbose error message when user stop a local // cluster in spark shell. case re: RpcEnvStoppedException => logDebug(s"Message $message dropped. ${re.getMessage}") - case _ => throw e + case e if SparkEnv.get.isStopped => + logWarning(s"Message $message dropped due to sparkEnv is stopped. ${e.getMessage}") + case e => throw e }) } From 15616f499aca93c98a71732add2a80de863d3d5f Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Thu, 17 Dec 2020 09:28:17 -0800 Subject: [PATCH 0800/1009] [SPARK-33173][CORE][TESTS][FOLLOWUP] Use `local[2]` and AtomicInteger ### What changes were proposed in this pull request? Use `local[2]` to let tasks launch at the same time. And change counters (`numOnTaskXXX`) to `AtomicInteger` type to ensure thread safe. ### Why are the changes needed? The test is still flaky after the fix https://github.com/apache/spark/pull/30072. See: https://github.com/apache/spark/pull/30728/checks?check_run_id=1557987642 And it's easy to reproduce if you test it multiple times (e.g. 100) locally. The test sets up a stage with 2 tasks to run on an executor with 1 core. So these 2 tasks have to be launched one by one. The task-2 will be launched after task-1 fails. However, since we don't retry failed task in local mode (MAX_LOCAL_TASK_FAILURES = 1), the stage will abort right away after task-1 fail and cancels the running task-2 at the same time. There's a chance that task-2 gets canceled before calling `PluginContainer.onTaskStart`, which leads to the test failure. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tested manually after the fix and the test is no longer flaky. Closes #30823 from Ngone51/debug-flaky-spark-33088. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun --- .../plugin/PluginContainerSuite.scala | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala index 15966e2744491..9ef81d30ff196 100644 --- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala +++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.internal.plugin import java.io.File import java.nio.charset.StandardCharsets import java.util.{Map => JMap} +import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConverters._ import scala.concurrent.duration._ @@ -138,15 +139,15 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo sc = new SparkContext(conf) sc.parallelize(1 to 10, 2).count() - assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) - assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 2) - assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 0) + assert(TestSparkPlugin.executorPlugin.numOnTaskStart.get() == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded.get() == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskFailed.get() == 0) } test("SPARK-33088: executor failed tasks trigger plugin calls") { val conf = new SparkConf() .setAppName(getClass().getName()) - .set(SparkLauncher.SPARK_MASTER, "local[1]") + .set(SparkLauncher.SPARK_MASTER, "local[2]") .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName())) sc = new SparkContext(conf) @@ -157,9 +158,9 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo } eventually(timeout(10.seconds), interval(100.millis)) { - assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2) - assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0) - assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskStart.get() == 2) + assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded.get() == 0) + assert(TestSparkPlugin.executorPlugin.numOnTaskFailed.get() == 2) } } @@ -343,9 +344,9 @@ private class TestDriverPlugin extends DriverPlugin { private class TestExecutorPlugin extends ExecutorPlugin { - var numOnTaskStart: Int = 0 - var numOnTaskSucceeded: Int = 0 - var numOnTaskFailed: Int = 0 + val numOnTaskStart = new AtomicInteger(0) + val numOnTaskSucceeded = new AtomicInteger(0) + val numOnTaskFailed = new AtomicInteger(0) override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = { ctx.metricRegistry().register("executorMetric", new Gauge[Int] { @@ -355,15 +356,15 @@ private class TestExecutorPlugin extends ExecutorPlugin { } override def onTaskStart(): Unit = { - numOnTaskStart += 1 + numOnTaskStart.incrementAndGet() } override def onTaskSucceeded(): Unit = { - numOnTaskSucceeded += 1 + numOnTaskSucceeded.incrementAndGet() } override def onTaskFailed(failureReason: TaskFailedReason): Unit = { - numOnTaskFailed += 1 + numOnTaskFailed.incrementAndGet() } } From 51ef4430dcbc934d43315ee6bdc851c9be84a1f2 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 17 Dec 2020 16:16:05 -0800 Subject: [PATCH 0801/1009] [SPARK-33822][SQL] Use the `CastSupport.cast` method in HashJoin ### What changes were proposed in this pull request? This PR intends to fix the bug that throws a unsupported exception when running [the TPCDS q5](https://github.com/apache/spark/blob/master/sql/core/src/test/resources/tpcds/q5.sql) with AQE enabled ([this option is enabled by default now via SPARK-33679](https://github.com/apache/spark/commit/031c5ef280e0cba8c4718a6457a44b6cccb17f46)): ``` java.lang.UnsupportedOperationException: BroadcastExchange does not support the execute() code path. at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.doExecute(BroadcastExchangeExec.scala:189) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.exchange.ReusedExchangeExec.doExecute(Exchange.scala:60) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.adaptive.QueryStageExec.doExecute(QueryStageExec.scala:115) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321) at org.apache.spark.sql.execution.SparkPlan.executeCollectIterator(SparkPlan.scala:397) at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.$anonfun$relationFuture$1(BroadcastExchangeExec.scala:118) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:185) at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) ... ``` I've checked the AQE code and I found `EnsureRequirements` wrongly puts `BroadcastExchange` on a top of `BroadcastQueryStage` in the `reOptimize` phase as follows: ``` +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#2183] +- BroadcastQueryStage 2 +- ReusedExchange [d_date_sk#1086], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#1963] ``` A root cause is that a `Cast` class in a required child's distribution does not have a `timeZoneId` field (`timeZoneId=None`), and a `Cast` class in `child.outputPartitioning` has it. So, this difference can make the distribution requirement check fail in `EnsureRequirements`: https://github.com/apache/spark/blob/1e85707738a830d33598ca267a6740b3f06b1861/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala#L47-L50 The `Cast` class that does not have a `timeZoneId` field is generated in the `HashJoin` object. To fix this issue, this PR proposes to use the `CastSupport.cast` method there. ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked that q5 passed. Closes #30818 from maropu/BugfixInAQE. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/joins/HashJoin.scala | 13 ++++---- .../execution/joins/BroadcastJoinSuite.scala | 33 +++++++++++-------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala index 0c75eda7a4ce2..53bd591d98a2e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.execution.joins -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} +import org.apache.spark.sql.catalyst.analysis.CastSupport import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -756,7 +757,7 @@ trait HashJoin extends BaseJoinExec with CodegenSupport { protected def prepareRelation(ctx: CodegenContext): HashedRelationInfo } -object HashJoin { +object HashJoin extends CastSupport with SQLConfHelper { /** * Try to rewrite the key as LongType so we can use getLong(), if they key can fit with a long. * @@ -771,14 +772,14 @@ object HashJoin { } var keyExpr: Expression = if (keys.head.dataType != LongType) { - Cast(keys.head, LongType) + cast(keys.head, LongType) } else { keys.head } keys.tail.foreach { e => val bits = e.dataType.defaultSize * 8 keyExpr = BitwiseOr(ShiftLeft(keyExpr, Literal(bits)), - BitwiseAnd(Cast(e, LongType), Literal((1L << bits) - 1))) + BitwiseAnd(cast(e, LongType), Literal((1L << bits) - 1))) } keyExpr :: Nil } @@ -791,13 +792,13 @@ object HashJoin { // jump over keys that have a higher index value than the required key if (keys.size == 1) { assert(index == 0) - Cast(BoundReference(0, LongType, nullable = false), keys(index).dataType) + cast(BoundReference(0, LongType, nullable = false), keys(index).dataType) } else { val shiftedBits = keys.slice(index + 1, keys.size).map(_.dataType.defaultSize * 8).sum val mask = (1L << (keys(index).dataType.defaultSize * 8)) - 1 // build the schema for unpacking the required key - Cast(BitwiseAnd( + cast(BitwiseAnd( ShiftRightUnsigned(BoundReference(0, LongType, nullable = false), Literal(shiftedBits)), Literal(mask)), keys(index).dataType) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala index 044e9ace6243f..98a1089709b92 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala @@ -242,33 +242,40 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils assert(HashJoin.rewriteKeyExpr(l :: l :: Nil) === l :: l :: Nil) assert(HashJoin.rewriteKeyExpr(l :: i :: Nil) === l :: i :: Nil) - assert(HashJoin.rewriteKeyExpr(i :: Nil) === Cast(i, LongType) :: Nil) + assert(HashJoin.rewriteKeyExpr(i :: Nil) === + Cast(i, LongType, Some(conf.sessionLocalTimeZone)) :: Nil) assert(HashJoin.rewriteKeyExpr(i :: l :: Nil) === i :: l :: Nil) assert(HashJoin.rewriteKeyExpr(i :: i :: Nil) === - BitwiseOr(ShiftLeft(Cast(i, LongType), Literal(32)), - BitwiseAnd(Cast(i, LongType), Literal((1L << 32) - 1))) :: Nil) + BitwiseOr(ShiftLeft(Cast(i, LongType, Some(conf.sessionLocalTimeZone)), Literal(32)), + BitwiseAnd(Cast(i, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 32) - 1))) :: + Nil) assert(HashJoin.rewriteKeyExpr(i :: i :: i :: Nil) === i :: i :: i :: Nil) - assert(HashJoin.rewriteKeyExpr(s :: Nil) === Cast(s, LongType) :: Nil) + assert(HashJoin.rewriteKeyExpr(s :: Nil) === + Cast(s, LongType, Some(conf.sessionLocalTimeZone)) :: Nil) assert(HashJoin.rewriteKeyExpr(s :: l :: Nil) === s :: l :: Nil) assert(HashJoin.rewriteKeyExpr(s :: s :: Nil) === - BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil) + BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)), + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) :: + Nil) assert(HashJoin.rewriteKeyExpr(s :: s :: s :: Nil) === BitwiseOr(ShiftLeft( - BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))), + BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)), + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil) + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) :: + Nil) assert(HashJoin.rewriteKeyExpr(s :: s :: s :: s :: Nil) === BitwiseOr(ShiftLeft( BitwiseOr(ShiftLeft( - BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))), + BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)), + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), + Literal((1L << 16) - 1))), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))), + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))), Literal(16)), - BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil) + BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) :: + Nil) assert(HashJoin.rewriteKeyExpr(s :: s :: s :: s :: s :: Nil) === s :: s :: s :: s :: s :: Nil) From 6315118676c99ccef2566c50ab9873de8876e468 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 18 Dec 2020 10:03:07 +0900 Subject: [PATCH 0802/1009] [SPARK-33824][PYTHON][DOCS] Restructure and improve Python package management page ### What changes were proposed in this pull request? This PR proposes to restructure and refine the Python dependency management page. I lately wrote a blog post which will be published soon, and decided contribute some of the contents back to PySpark documentation. FWIW, it has been reviewed by some tech writers and engineers. I built the site for making the review easier: https://hyukjin-spark.readthedocs.io/en/stable/user_guide/python_packaging.html ### Why are the changes needed? For better documentation. ### Does this PR introduce _any_ user-facing change? It's doc change but only in unreleased bracnhs for now. ### How was this patch tested? I manually built the docs as: ```bash cd python/docs make clean html open ``` Closes #30822 from HyukjinKwon/SPARK-33824. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .../source/user_guide/python_packaging.rst | 200 +++++++++++------- 1 file changed, 125 insertions(+), 75 deletions(-) diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst index 0aff6dc1d16b4..71d8e532f9361 100644 --- a/python/docs/source/user_guide/python_packaging.rst +++ b/python/docs/source/user_guide/python_packaging.rst @@ -17,7 +17,7 @@ ========================= -3rd Party Python Packages +Python Package Management ========================= When you want to run your PySpark application on a cluster such as YARN, Kubernetes, Mesos, etc., you need to make @@ -51,10 +51,11 @@ Here is the script ``app.py`` from the previous example that will be executed on main(SparkSession.builder.getOrCreate()) -There are multiple ways to ship the dependencies to the cluster: +There are multiple ways to manage Python dependencies in the cluster: - Using PySpark Native Features -- Using Zipped Virtual Environment +- Using Conda +- Using Virtualenv - Using PEX @@ -62,54 +63,51 @@ Using PySpark Native Features ----------------------------- PySpark allows to upload Python files (``.py``), zipped Python packages (``.zip``), and Egg files (``.egg``) -to the executors by setting the configuration setting ``spark.submit.pyFiles`` or by directly -calling :meth:`pyspark.SparkContext.addPyFile`. +to the executors by: -This is an easy way to ship additional custom Python code to the cluster. You can just add individual files or zip whole -packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code -even after having started your job. +- Setting the configuration setting ``spark.submit.pyFiles`` +- Setting ``--py-files`` option in Spark scripts +- Directly calling :meth:`pyspark.SparkContext.addPyFile` in applications -Note that it doesn't allow to add packages built as `Wheels `_ and therefore doesn't -allow to include dependencies with native code. +This is a straightforward method to ship additional custom Python code to the cluster. You can just add individual files or zip whole +packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code even after having started your job. +However, it does not allow to add packages built as `Wheels `_ and therefore +does not allow to include dependencies with native code. -Using Zipped Virtual Environment --------------------------------- -The idea of zipped environments is to zip your whole `virtual environment `_, -ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. +Using Conda +----------- -Zip Virtual Environment -~~~~~~~~~~~~~~~~~~~~~~~ +`Conda `_ is one of the most widely-used Python package management systems. PySpark users can directly +use a Conda environment to ship their third-party Python packages by leveraging +`conda-pack `_ which is a command line tool creating +relocatable Conda environments. -You can zip the virtual environment on your own or use tools for doing this: - -* `conda-pack `_ for conda environments -* `venv-pack `_ for virtual environments - -Example with `conda-pack`: +The example below creates a Conda environment to use on both the driver and executor and packs +it into an archive file. This archive file captures the Conda environment for Python and stores +both Python interpreter and all its relevant dependencies. .. code-block:: bash - conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0 - conda activate pyspark_env - conda pack -f -o pyspark_env.tar.gz - -Upload to Spark Executors -~~~~~~~~~~~~~~~~~~~~~~~~~ + conda create -y -n pyspark_conda_env -c conda-forge pyarrow pandas conda-pack + conda activate pyspark_conda_env + conda pack -f -o pyspark_conda_env.tar.gz -Unzipping will be done by Spark when using target ``--archives`` option in spark-submit -or setting ``spark.archives`` configuration. +After that, you can ship it together with scripts or in the code by using the ``--archives`` option +or ``spark.archives`` configuration (``spark.yarn.dist.archives`` in YARN). It automatically unpacks the archive on executors. -Example with ``spark-submit``: +In the case of a ``spark-submit`` script, you can use it as follows: .. code-block:: bash export PYSPARK_DRIVER_PYTHON=python export PYSPARK_PYTHON=./environment/bin/python - spark-submit --master=... --archives pyspark_env.tar.gz#environment app.py + spark-submit --archives pyspark_conda_env.tar.gz#environment app.py -Example using ``SparkSession.builder``: +Note that ``PYSPARK_DRIVER_PYTHON`` above is not required for cluster modes in YARN or Kubernetes. + +If you’re on a regular Python shell or notebook, you can try it as shown below: .. code-block:: python @@ -118,67 +116,117 @@ Example using ``SparkSession.builder``: from app import main os.environ['PYSPARK_PYTHON'] = "./environment/bin/python" - spark = SparkSession.builder.master("...").config("spark.archives", "pyspark_env.tar.gz#environment").getOrCreate() + spark = SparkSession.builder.config( + "spark.archives", # 'spark.yarn.dist.archives' in YARN. + "pyspark_conda_env.tar.gz#environment").getOrCreate() main(spark) -Example with ``pyspark`` shell: +For a pyspark shell: .. code-block:: bash export PYSPARK_DRIVER_PYTHON=python export PYSPARK_PYTHON=./environment/bin/python - pyspark --master=... --archives pyspark_env.tar.gz#environment + pyspark --archives pyspark_conda_env.tar.gz#environment -Using PEX ---------- +Using Virtualenv +---------------- -`PEX `_ is a library for generating ``.pex`` (Python EXecutable) files. -A PEX file is a self-contained executable Python environment. It can be seen as the Python equivalent of Java uber-JARs (a.k.a. fat JARs). +`Virtualenv `_ is a Python tool to create isolated Python environments. +Since Python 3.3, a subset of its features has been integrated into Python as a standard library under +the `venv `_ module. PySpark users can use virtualenv to manage +Python dependencies in their clusters by using `venv-pack `_ +in a similar way as conda-pack. -You need to build the PEX file somewhere with all your requirements and then upload it to each Spark executor. +A virtual environment to use on both driver and executor can be created as demonstrated below. +It packs the current virtual environment to an archive file, and It self-contains both Python interpreter +and the dependencies. -Using CLI to Build PEX file -~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: bash - pex pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 -o myarchive.pex + python -m venv pyspark_venv + source pyspark_venv/bin/activate + pip install pyarrow pandas venv-pack + venv-pack -o pyspark_venv.tar.gz +You can directly pass/unpack the archive file and enable the environment on executors by leveraging +the ``--archives`` option or ``spark.archives`` configuration (``spark.yarn.dist.archives`` in YARN). -Invoking the PEX file will by default invoke the Python interpreter. pyarrow, pandas and pyspark will be included in the PEX file. +For ``spark-submit``, you can use it by running the command as follows. Also, notice that +``PYSPARK_DRIVER_PYTHON`` is not necessary in Kubernetes or YARN cluster modes. .. code-block:: bash - ./myarchive.pex - Python 3.6.6 (default, Jan 26 2019, 16:53:05) - (InteractiveConsole) - >>> import pyarrow - >>> import pandas - >>> import pyspark - >>> + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./environment/bin/python + spark-submit --archives pyspark_venv.tar.gz#environment app.py -This can also be done directly with the Python API. For more information on how to build PEX files, -please refer to `Building .pex files `_ +For regular Python shells or notebooks: -Upload to Spark Executors -~~~~~~~~~~~~~~~~~~~~~~~~~ +.. code-block:: bash -The upload can be done by setting ``--files`` option in spark-submit or setting ``spark.files`` configuration (``spark.yarn.dist.files`` on YARN) -and changing the ``PYSPARK_PYTHON`` environment variable to change the Python interpreter to the PEX executable on each executor. + import os + from pyspark.sql import SparkSession + from app import main -.. - TODO: we should also document the way on other cluster modes. + os.environ['PYSPARK_PYTHON'] = "./environment/bin/python" + spark = SparkSession.builder.config( + "spark.archives", # 'spark.yarn.dist.archives' in YARN. + "pyspark_venv.tar.gz#environment").getOrCreate() + main(spark) -Example with ``spark-submit`` on YARN: +In the case of a pyspark shell: .. code-block:: bash export PYSPARK_DRIVER_PYTHON=python - export PYSPARK_PYTHON=./myarchive.pex - spark-submit --master=yarn --deploy-mode client --files myarchive.pex app.py + export PYSPARK_PYTHON=./environment/bin/python + pyspark --archives pyspark_venv.tar.gz#environment + + +Using PEX +--------- -Example using ``SparkSession.builder`` on YARN: +PySpark can also use `PEX `_ to ship the Python packages +together. PEX is a tool that creates a self-contained Python environment. This is similar +to Conda or virtualenv, but a ``.pex`` file is executable by itself. + +The following example creates a ``.pex`` file for the driver and executor to use. +The file contains the Python dependencies specified with the ``pex`` command. + +.. code-block:: bash + + pip install pyarrow pandas pex + pex pyspark pyarrow pandas -o pyspark_pex_env.pex + +This file behaves similarly with a regular Python interpreter. + +.. code-block:: bash + + ./pyspark_pex_env.pex -c "import pandas; print(pandas.__version__)" + 1.1.5 + +However, ``.pex`` file does not include a Python interpreter itself under the hood so all +nodes in a cluster should have the same Python interpreter installed. + +In order to transfer and use the ``.pex`` file in a cluster, you should ship it via the +``spark.files`` configuration (``spark.yarn.dist.files`` in YARN) or ``--files`` option because they are regular files instead +of directories or archive files. + +For application submission, you run the commands as shown below. +Note that ``PYSPARK_DRIVER_PYTHON`` is not needed for cluster modes in YARN or Kubernetes, +and you may also need to set ``PYSPARK_PYTHON`` environment variable on +the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex`` in YARN cluster mode. + +.. code-block:: bash + + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./pyspark_pex_env.pex + spark-submit --files pyspark_pex_env.pex app.py + +For regular Python shells or notebooks: .. code-block:: python @@ -186,19 +234,21 @@ Example using ``SparkSession.builder`` on YARN: from pyspark.sql import SparkSession from app import main - os.environ['PYSPARK_PYTHON']="./myarchive.pex" - builder = SparkSession.builder - builder.master("yarn") \ - .config("spark.submit.deployMode", "client") \ - .config("spark.yarn.dist.files", "myarchive.pex") - spark = builder.getOrCreate() + os.environ['PYSPARK_PYTHON'] = "./pyspark_pex_env.pex" + spark = SparkSession.builder.config( + "spark.files", # 'spark.yarn.dist.files' in YARN. + "pyspark_pex_env.pex").getOrCreate() main(spark) -Notes -~~~~~ +For the interactive pyspark shell, the commands are almost the same: -* The Python interpreter that has been used to generate the PEX file must be available on each executor. PEX doesn't include the Python interpreter. +.. code-block:: bash -* In YARN cluster mode you may also need to set ``PYSPARK_PYTHON`` environment variable on the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex``. + export PYSPARK_DRIVER_PYTHON=python + export PYSPARK_PYTHON=./pyspark_pex_env.pex + pyspark --files pyspark_pex_env.pex -* An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX can be found `here `_ - it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having to create & upload the PEX manually. +An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX +can be found `here `_ +- it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having +to create & upload the PEX manually. From 42e1831ebb19be15921a2ac612dfdac47639edeb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 18 Dec 2020 10:48:51 +0900 Subject: [PATCH 0803/1009] [SPARK-33797][SS][DOCS] Update SS doc about State Store and task locality ### What changes were proposed in this pull request? This updates SS documentation to document about State Store and task locality. ### Why are the changes needed? During running some tests for structured streaming, I found state store locality becomes an issue sometimes and it is not very straightforward for end-users. It'd be great if we can document it. ### Does this PR introduce _any_ user-facing change? No, only doc change. ### How was this patch tested? No, only doc change. Closes #30789 from viirya/ss-statestore-doc. Authored-by: Liang-Chi Hsieh Signed-off-by: Jungtaek Lim --- .../structured-streaming-programming-guide.md | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index 6995ee2475aee..bea38ed7d805d 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -1689,6 +1689,28 @@ hence the number is not same as the number of original input rows. You'd like to There's a known workaround: split your streaming query into multiple queries per stateful operator, and ensure end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional. +### State Store and task locality + +The stateful operations store states for events in state stores of executors. State stores occupy resources such as memory and disk space to store the states. +So it is more efficient to keep a state store provider running in the same executor across different streaming batches. +Changing the location of a state store provider requires the extra overhead of loading checkpointed states. The overhead of loading state from checkpoint depends +on the external storage and the size of the state, which tends to hurt the latency of micro-batch run. For some use cases such as processing very large state data, +loading new state store providers from checkpointed states can be very time-consuming and inefficient. + +The stateful operations in Structured Streaming queries rely on the preferred location feature of Spark's RDD to run the state store provider on the same executor. +If in the next batch the corresponding state store provider is scheduled on this executor again, it could reuse the previous states and save the time of loading checkpointed states. + +However, generally the preferred location is not a hard requirement and it is still possible that Spark schedules tasks to the executors other than the preferred ones. +In this case, Spark will load state store providers from checkpointed states on new executors. The state store providers run in the previous batch will not be unloaded immediately. +Spark runs a maintenance task which checks and unloads the state store providers that are inactive on the executors. + +By changing the Spark configurations related to task scheduling, for example `spark.locality.wait`, users can configure Spark how long to wait to launch a data-local task. +For stateful operations in Structured Streaming, it can be used to let state store providers running on the same executors across batches. + +Specifically for built-in HDFS state store provider, users can check the state store metrics such as `loadedMapCacheHitCount` and `loadedMapCacheMissCount`. Ideally, +it is best if cache missing count is minimized that means Spark won't waste too much time on loading checkpointed state. +User can increase Spark locality waiting configurations to avoid loading state store providers in different executors across batches. + ## Starting Streaming Queries Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter` ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs) From 131a23d88a56280d47584aed93bc8fb617550717 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Thu, 17 Dec 2020 19:09:57 -0800 Subject: [PATCH 0804/1009] [SPARK-33831][UI] Update to jetty 9.4.34 ### What changes were proposed in this pull request? Update Jetty to 9.4.34 ### Why are the changes needed? Picks up fixes and improvements, including a possible CVE fix. https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.33.v20201020 https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.34.v20201102 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #30828 from srowen/SPARK-33831. Authored-by: Sean Owen Signed-off-by: Dongjoon Hyun --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 78d1fe7d54350..72e285bb2ba6e 100644 --- a/pom.xml +++ b/pom.xml @@ -137,7 +137,7 @@ 10.12.1.1 1.10.1 1.6.6 - 9.4.28.v20200408 + 9.4.34.v20201102 4.0.3 0.9.5 2.4.0 From 0f1a18370a1a95a2b7943519584af7a0dff42ae8 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Fri, 18 Dec 2020 04:30:15 +0000 Subject: [PATCH 0805/1009] [SPARK-33817][SQL] CACHE TABLE uses a logical plan when caching a query to avoid creating a dataframe ### What changes were proposed in this pull request? This PR proposes to update `CACHE TABLE` to use a `LogicalPlan` when caching a query to avoid creating a `DataFrame` as suggested here: https://github.com/apache/spark/pull/30743#discussion_r543123190 For reference, `UNCACHE TABLE` also uses `LogicalPlan`: https://github.com/apache/spark/blob/0c129001201ccb63ae96f576b6f354da84024fb3/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala#L91-L98 ### Why are the changes needed? To avoid creating an unnecessary dataframe and make it consistent with `uncacheQuery` used in `UNCACHE TABLE`. ### Does this PR introduce _any_ user-facing change? No, just internal changes. ### How was this patch tested? Existing tests since this is an internal refactoring change. Closes #30815 from imback82/cache_with_logical_plan. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 8 +++- .../spark/sql/execution/CacheManager.scala | 26 ++++++++++++- .../datasources/v2/CacheTableExec.scala | 38 ++++++++++++------- .../datasources/v2/DataSourceV2Strategy.scala | 5 +-- 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0bef6998b177d..10c8ac58840f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1107,12 +1107,16 @@ class Analyzer(override val catalogManager: CatalogManager) case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) => lookupRelation(u.multipartIdentifier, u.options, false) - .map(relation => c.copy(table = EliminateSubqueryAliases(relation))) + .map(resolveViews) + .map(EliminateSubqueryAliases(_)) + .map(relation => c.copy(table = relation)) .getOrElse(c) case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) => lookupRelation(u.multipartIdentifier, u.options, false) - .map(relation => c.copy(table = EliminateSubqueryAliases(relation))) + .map(resolveViews) + .map(EliminateSubqueryAliases(_)) + .map(relation => c.copy(table = relation)) .getOrElse(c) // TODO (SPARK-27484): handle streaming write commands when we have them. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index f163d85914bc9..b3671945e5891 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -88,12 +88,34 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper { query: Dataset[_], tableName: Option[String] = None, storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = { - val planToCache = query.logicalPlan + cacheQuery(query.sparkSession, query.logicalPlan, tableName, storageLevel) + } + + /** + * Caches the data produced by the given [[LogicalPlan]]. + * Unlike `RDD.cache()`, the default storage level is set to be `MEMORY_AND_DISK` because + * recomputing the in-memory columnar representation of the underlying table is expensive. + */ + def cacheQuery( + spark: SparkSession, + planToCache: LogicalPlan, + tableName: Option[String]): Unit = { + cacheQuery(spark, planToCache, tableName, MEMORY_AND_DISK) + } + + /** + * Caches the data produced by the given [[LogicalPlan]]. + */ + def cacheQuery( + spark: SparkSession, + planToCache: LogicalPlan, + tableName: Option[String], + storageLevel: StorageLevel): Unit = { if (lookupCachedData(planToCache).nonEmpty) { logWarning("Asked to cache already cached data.") } else { val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff( - query.sparkSession, forceDisableConfigs) + spark, forceDisableConfigs) val inMemoryRelation = sessionWithConfigsOff.withActive { val qe = sessionWithConfigsOff.sessionState.executePlan(planToCache) InMemoryRelation( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala index 2d8e5b5e286b8..4a7152232e8fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import java.util.Locale -import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -29,10 +29,13 @@ import org.apache.spark.storage.StorageLevel trait BaseCacheTableExec extends V2CommandExec { def relationName: String - def dataFrameToCache: DataFrame + def planToCache: LogicalPlan + def dataFrameForCachedPlan: DataFrame def isLazy: Boolean def options: Map[String, String] + protected val sparkSession: SparkSession = sqlContext.sparkSession + override def run(): Seq[InternalRow] = { val storageLevelKey = "storagelevel" val storageLevelValue = @@ -42,20 +45,22 @@ trait BaseCacheTableExec extends V2CommandExec { logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}") } - val sparkSession = sqlContext.sparkSession - val df = dataFrameToCache if (storageLevelValue.nonEmpty) { sparkSession.sharedState.cacheManager.cacheQuery( - df, + sparkSession, + planToCache, Some(relationName), StorageLevel.fromString(storageLevelValue.get)) } else { - sparkSession.sharedState.cacheManager.cacheQuery(df, Some(relationName)) + sparkSession.sharedState.cacheManager.cacheQuery( + sparkSession, + planToCache, + Some(relationName)) } if (!isLazy) { - // Performs eager caching - df.count() + // Performs eager caching. + dataFrameForCachedPlan.count() } Seq.empty @@ -69,9 +74,13 @@ case class CacheTableExec( multipartIdentifier: Seq[String], override val isLazy: Boolean, override val options: Map[String, String]) extends BaseCacheTableExec { - override def relationName: String = multipartIdentifier.quoted + override lazy val relationName: String = multipartIdentifier.quoted + + override lazy val planToCache: LogicalPlan = relation - override def dataFrameToCache: DataFrame = Dataset.ofRows(sqlContext.sparkSession, relation) + override lazy val dataFrameForCachedPlan: DataFrame = { + Dataset.ofRows(sparkSession, planToCache) + } } case class CacheTableAsSelectExec( @@ -79,11 +88,14 @@ case class CacheTableAsSelectExec( query: LogicalPlan, override val isLazy: Boolean, override val options: Map[String, String]) extends BaseCacheTableExec { - override def relationName: String = tempViewName + override lazy val relationName: String = tempViewName - override def dataFrameToCache: DataFrame = { - val sparkSession = sqlContext.sparkSession + override lazy val planToCache: LogicalPlan = { Dataset.ofRows(sparkSession, query).createTempView(tempViewName) + dataFrameForCachedPlan.logicalPlan + } + + override lazy val dataFrameForCachedPlan: DataFrame = { sparkSession.table(tempViewName) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index f9c89051e421a..c40d2ab9cba4e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.JavaConverters._ -import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession, Strategy} +import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation @@ -66,8 +66,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel // recache with the same name and cache level. - val ds = Dataset.ofRows(session, v2Relation) - session.sharedState.cacheManager.cacheQuery(ds, cacheName, cacheLevel) + session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel) } } From 25c6cc25f74e8a24aa424f6596a574f26ae80e1d Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 18 Dec 2020 14:24:53 +0900 Subject: [PATCH 0806/1009] [SPARK-26341][WEBUI] Expose executor memory metrics at the stage level, in the Stages tab ### What changes were proposed in this pull request? Expose executor memory metrics at the stage level, in the Stages tab, Current like below, and I am not sure which column we will truly need. ![image](https://user-images.githubusercontent.com/46485123/101170248-2256f900-3679-11eb-8c34-794fcf8e94a8.png) ![image](https://user-images.githubusercontent.com/46485123/101170359-4dd9e380-3679-11eb-984b-b0430f236160.png) ![image](https://user-images.githubusercontent.com/46485123/101314915-86a1d480-3894-11eb-9b6f-8050d326e11f.png) ### Why are the changes needed? User can know executor jvm usage more directly in SparkUI ### Does this PR introduce any user-facing change? User can know executor jvm usage more directly in SparkUI ### How was this patch tested? Manual Tested Closes #30573 from AngersZhuuuu/SPARK-26341. Authored-by: angerszhu Signed-off-by: Kousuke Saruta --- .../org/apache/spark/ui/static/stagepage.js | 163 +++++++++++++++--- .../spark/ui/static/stagespage-template.html | 4 + .../spark/status/AppStatusListener.scala | 3 + ...xcludeOnFailure_for_stage_expectation.json | 44 +++++ ...eOnFailure_node_for_stage_expectation.json | 110 ++++++++++++ .../one_stage_attempt_json_expectation.json | 22 +++ .../one_stage_json_expectation.json | 22 +++ ...age_with_accumulable_json_expectation.json | 22 +++ 8 files changed, 365 insertions(+), 25 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js index 2877aa819ab9e..336edff509300 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js @@ -243,23 +243,39 @@ function createRowMetadataForColumn(colKey, data, checkboxId) { } function reselectCheckboxesBasedOnTaskTableState() { - var allChecked = true; + var taskSummaryHasSelected = false; + var executorSummaryHasSelected = false; + var allTaskSummaryChecked = true; + var allExecutorSummaryChecked = true; var taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice(); if (typeof taskTableSelector !== 'undefined' && taskSummaryMetricsTableCurrentStateArray.length > 0) { for (var k = 0; k < optionalColumns.length; k++) { if (taskTableSelector.column(optionalColumns[k]).visible()) { + taskSummaryHasSelected = true; $("#box-"+optionalColumns[k]).prop('checked', true); taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == optionalColumns[k])[0]); taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice(); } else { - allChecked = false; + allTaskSummaryChecked = false; } } - if (allChecked) { - $("#box-0").prop('checked', true); - } createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableCurrentFilteredArray); } + + if (typeof executorSummaryTableSelector !== 'undefined') { + for (var k = 0; k < executorOptionalColumns.length; k++) { + if (executorSummaryTableSelector.column(executorOptionalColumns[k]).visible()) { + executorSummaryHasSelected = true; + $("#executor-box-"+executorOptionalColumns[k]).prop('checked', true); + } else { + allExecutorSummaryChecked = false; + } + } + } + + if ((taskSummaryHasSelected || executorSummaryHasSelected) && allTaskSummaryChecked && allExecutorSummaryChecked) { + $("#box-0").prop('checked', true); + } } function getStageAttemptId() { @@ -278,6 +294,9 @@ var taskSummaryMetricsDataTable; var optionalColumns = [11, 12, 13, 14, 15, 16, 17, 21]; var taskTableSelector; +var executorOptionalColumns = [15, 16, 17, 18]; +var executorSummaryTableSelector; + $(document).ready(function () { setDataTableDefaults(); @@ -288,14 +307,18 @@ $(document).ready(function () { "" + "

      " + "
      Select All
      " + - "
      Scheduler Delay
      " + - "
      Task Deserialization Time
      " + - "
      Shuffle Read Blocked Time
      " + - "
      Shuffle Remote Reads
      " + - "
      Shuffle Write Time
      " + - "
      Result Serialization Time
      " + - "
      Getting Result Time
      " + - "
      Peak Execution Memory
      " + + "
      Scheduler Delay
      " + + "
      Task Deserialization Time
      " + + "
      Shuffle Read Blocked Time
      " + + "
      Shuffle Remote Reads
      " + + "
      Shuffle Write Time
      " + + "
      Result Serialization Time
      " + + "
      Getting Result Time
      " + + "
      Peak Execution Memory
      " + + "
      Peak JVM Memory OnHeap / OffHeap
      " + + "
      Peak Execution Memory OnHeap / OffHeap
      " + + "
      Peak Storage Memory OnHeap / OffHeap
      " + + "
      Peak Pool Memory Direct / Mapped
      " + "
      "); $('#scheduler_delay').attr("data-toggle", "tooltip") @@ -463,15 +486,95 @@ $(document).ready(function () { data : function (row, type) { return typeof row.diskBytesSpilled != 'undefined' ? formatBytes(row.diskBytesSpilled, type) : ""; } + }, + { + data : function (row, type) { + var peakMemoryMetrics = row.peakMemoryMetrics; + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.JVMHeapMemory; + else + return (formatBytes(peakMemoryMetrics.JVMHeapMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.JVMOffHeapMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } + + } + }, + { + data : function (row, type) { + var peakMemoryMetrics = row.peakMemoryMetrics + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.OnHeapExecutionMemory; + else + return (formatBytes(peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.OffHeapExecutionMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } + } + }, + { + data : function (row, type) { + var peakMemoryMetrics = row.peakMemoryMetrics + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.OnHeapStorageMemory; + else + return (formatBytes(peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.OffHeapStorageMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } + } + }, + { + data : function (row, type) { + var peakMemoryMetrics = row.peakMemoryMetrics + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.DirectPoolMemory; + else + return (formatBytes(peakMemoryMetrics.DirectPoolMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.MappedPoolMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } + } } ], + "columnDefs": [ + { "visible": false, "targets": 15 }, + { "visible": false, "targets": 16 }, + { "visible": false, "targets": 17 }, + { "visible": false, "targets": 18 } + ], + "deferRender": true, "order": [[0, "asc"]], "bAutoWidth": false, "oLanguage": { "sEmptyTable": "No data to show yet" } }; - var executorSummaryTableSelector = + executorSummaryTableSelector = $("#summary-executor-table").DataTable(executorSummaryConf); $('#parent-container [data-toggle="tooltip"]').tooltip(); @@ -923,30 +1026,40 @@ $(document).ready(function () { var para = $(this).attr('data-column'); if (para == "0") { var allColumns = taskTableSelector.columns(optionalColumns); + var executorAllColumns = executorSummaryTableSelector.columns(executorOptionalColumns); if ($(this).is(":checked")) { $(".toggle-vis").prop('checked', true); allColumns.visible(true); + executorAllColumns.visible(true); createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableArray); } else { $(".toggle-vis").prop('checked', false); allColumns.visible(false); + executorAllColumns.visible(false); var taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableArray.filter(row => row.checkboxId < 11); createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray); } } else { - var column = taskTableSelector.column(para); - // Toggle the visibility - column.visible(!column.visible()); - var taskSummaryMetricsTableFilteredArray = []; - if ($(this).is(":checked")) { - taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]); - taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice(); - } else { - taskSummaryMetricsTableFilteredArray = - taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para); + var dataMetricsType = $(this).attr("data-metrics-type"); + if (dataMetricsType === 'task') { + var column = taskTableSelector.column(para); + // Toggle the visibility + column.visible(!column.visible()); + var taskSummaryMetricsTableFilteredArray = []; + if ($(this).is(":checked")) { + taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]); + taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice(); + } else { + taskSummaryMetricsTableFilteredArray = + taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para); + } + createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray); + } + if (dataMetricsType === "executor") { + var column = executorSummaryTableSelector.column(para); + column.visible(!column.visible()); } - createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray); } }); diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html index 9b40d0dc4a230..b938158b77027 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html @@ -59,6 +59,10 @@

      Aggregated Metrics by Executor

      Shuffle Write Size / Records Spill (Memory) Spill (Disk) + Peak JVM Memory OnHeap / OffHeap + Peak Execution Memory OnHeap / OffHeap + Peak Storage Memory OnHeap / OffHeap + Peak Pool Memory Direct / Mapped diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 5b0c1dc389af0..0722095cc6533 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -687,6 +687,9 @@ private[spark] class AppStatusListener( stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary) } stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1 + + stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics + .compareAndUpdatePeakValues(event.taskExecutorMetrics) // [SPARK-24415] Wait for all tasks to finish before removing stage from live list val removeStage = stage.activeTasks == 0 && diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json index a69940fa5a1a5..ab9a8b7ef885f 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json @@ -698,6 +698,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : true, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : true }, "1" : { @@ -716,6 +738,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false } }, diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json index bda9caedbbe81..1c569c19894fd 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json @@ -806,6 +806,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : true, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : true }, "5" : { @@ -824,6 +846,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : true, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : true }, "1" : { @@ -842,6 +886,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false }, "2" : { @@ -860,6 +926,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false }, "3" : { @@ -878,6 +966,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : true, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : true } }, diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json index 41e54c68858ad..b1eab0d7ac196 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json @@ -460,6 +460,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false } }, diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json index 7a6685a609523..6dfdd27cd7d8f 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json @@ -460,6 +460,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false } }, diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json index 066b6a4f884a7..a2cfd9d42cc99 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json @@ -504,6 +504,28 @@ "memoryBytesSpilled" : 0, "diskBytesSpilled" : 0, "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "isExcludedForStage" : false } }, From b0da2bcd464b24d58e2ce56d4f93f1f9527839ff Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 18 Dec 2020 15:10:13 +0900 Subject: [PATCH 0807/1009] [MINOR][INFRA] Add -Pspark-ganglia-lgpl to the build definition with Scala 2.13 on GitHub Actions ### What changes were proposed in this pull request? This PR adds `-Pspark-ganglia-lgpl` to the build definition with Scala 2.13 on GitHub Actions. ### Why are the changes needed? Keep the code build-able with Scala 2.13. With this change, all the sub-modules seems to be built-able with Scala 2.13. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed Scala 2.13 build pass with the following command. ``` $ ./dev/change-scala-version.sh 2.13 $ build/sbt -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile ``` Closes #30834 from sarutak/ganglia-scala-2.13. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f133a4132b2a5..0048bc7ffba0d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -414,7 +414,7 @@ jobs: - name: Build with SBT run: | ./dev/change-scala-version.sh 2.13 - ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pscala-2.13 compile test:compile + ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile hadoop-2: name: Hadoop 2 build with SBT From 0603913c666bae1a9640f2f1469fe50bc59e461d Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 18 Dec 2020 00:01:13 -0800 Subject: [PATCH 0808/1009] [SPARK-33593][SQL] Vector reader got incorrect data with binary partition value ### What changes were proposed in this pull request? Currently when enable parquet vectorized reader, use binary type as partition col will return incorrect value as below UT ```scala test("Parquet vector reader incorrect with binary partition value") { Seq(false, true).foreach(tag => { withSQLConf("spark.sql.parquet.enableVectorizedReader" -> tag.toString) { withTable("t1") { sql( """CREATE TABLE t1(name STRING, id BINARY, part BINARY) | USING PARQUET PARTITIONED BY (part)""".stripMargin) sql(s"INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") if (tag) { checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"), Row("a", "Spark SQL", "")) } else { checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"), Row("a", "Spark SQL", "Spark SQL")) } } } }) } ``` ### Why are the changes needed? Fix data incorrect issue ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #30824 from AngersZhuuuu/SPARK-33593. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- .../vectorized/ColumnVectorUtils.java | 5 ++ .../org/apache/spark/sql/SQLQuerySuite.scala | 26 +++++++ .../orc/OrcColumnarBatchReaderSuite.scala | 77 ++++++++++++++++++- .../datasources/parquet/ParquetIOSuite.scala | 9 ++- 4 files changed, 114 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java index bce6aa28c42a1..25aabcd086289 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java @@ -54,6 +54,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field } else { if (t == DataTypes.BooleanType) { col.putBooleans(0, capacity, row.getBoolean(fieldIdx)); + } else if (t == DataTypes.BinaryType) { + col.putByteArray(0, row.getBinary(fieldIdx)); } else if (t == DataTypes.ByteType) { col.putBytes(0, capacity, row.getByte(fieldIdx)); } else if (t == DataTypes.ShortType) { @@ -94,6 +96,9 @@ public static void populate(WritableColumnVector col, InternalRow row, int field col.putInts(0, capacity, row.getInt(fieldIdx)); } else if (t instanceof TimestampType) { col.putLongs(0, capacity, row.getLong(fieldIdx)); + } else { + throw new RuntimeException(String.format("DataType %s is not supported" + + " in column vectorized reader.", t.sql())); } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 112b1a7210cb4..b7cec55245564 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3745,6 +3745,32 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-33593: Vector reader got incorrect data with binary partition value") { + Seq("false", "true").foreach(value => { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) { + withTable("t1") { + sql( + """CREATE TABLE t1(name STRING, id BINARY, part BINARY) + |USING PARQUET PARTITIONED BY (part)""".stripMargin) + sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") + checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"), + Row("a", "Spark SQL", "Spark SQL")) + } + } + + withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) { + withTable("t2") { + sql( + """CREATE TABLE t2(name STRING, id BINARY, part BINARY) + |USING ORC PARTITIONED BY (part)""".stripMargin) + sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") + checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"), + Row("a", "Spark SQL", "Spark SQL")) + } + } + }) + } } case class Foo(bar: Option[String]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala index 719bf91e1786b..bfcef46339908 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala @@ -17,16 +17,29 @@ package org.apache.spark.sql.execution.datasources.orc +import java.io.File + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType} +import org.apache.hadoop.mapreduce.lib.input.FileSplit +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.orc.TypeDescription import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector} import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.unsafe.types.UTF8String.fromString class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession { + + import testImplicits._ + private val dataSchema = StructType.fromDDL("col1 int, col2 int") private val partitionSchema = StructType.fromDDL("p1 string, p2 string") private val partitionValues = InternalRow(fromString("partValue1"), fromString("partValue2")) @@ -77,4 +90,66 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession { assert(p1.getUTF8String(0) === partitionValues.getUTF8String(0)) } } + + test("SPARK-33593: partition column types") { + withTempPath { dir => + Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath) + + val dataTypes = + Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, + FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) + + val constantValues = + Seq( + UTF8String.fromString("a string"), + true, + 1.toByte, + "Spark SQL".getBytes, + 2.toShort, + 3, + Long.MaxValue, + 0.25.toFloat, + 0.75D, + Decimal("1234.23456"), + DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")), + DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"))) + + dataTypes.zip(constantValues).foreach { case (dt, v) => + val schema = StructType(StructField("col1", IntegerType) :: StructField("pcol", dt) :: Nil) + val partitionValues = new GenericInternalRow(Array(v)) + val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0)) + val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty) + val taskConf = sqlContext.sessionState.newHadoopConf() + val orcFileSchema = TypeDescription.fromString(schema.simpleString) + val vectorizedReader = new OrcColumnarBatchReader(4096) + val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) + val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId) + + try { + vectorizedReader.initialize(fileSplit, taskAttemptContext) + vectorizedReader.initBatch( + orcFileSchema, + schema.toArray, + Array(0, -1), + Array(-1, 0), + partitionValues) + vectorizedReader.nextKeyValue() + val row = vectorizedReader.getCurrentValue.getRow(0) + + // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch` + // in order to use get(...) method which is not implemented in `ColumnarBatch`. + val actual = row.copy().get(1, dt) + val expected = v + if (dt.isInstanceOf[BinaryType]) { + assert(actual.asInstanceOf[Array[Byte]] + sameElements expected.asInstanceOf[Array[Byte]]) + } else { + assert(actual == expected) + } + } finally { + vectorizedReader.close() + } + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index d13b3e58a30ff..c69f2e6911ba3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -790,7 +790,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath) val dataTypes = - Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType, + Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) val constantValues = @@ -798,6 +798,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession UTF8String.fromString("a string"), true, 1.toByte, + "Spark SQL".getBytes, 2.toShort, 3, Long.MaxValue, @@ -825,7 +826,11 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession // in order to use get(...) method which is not implemented in `ColumnarBatch`. val actual = row.copy().get(1, dt) val expected = v - assert(actual == expected) + if (dt.isInstanceOf[BinaryType]) { + assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]]) + } else { + assert(actual == expected) + } } finally { vectorizedReader.close() } From bc46d273e0ae0d13d0e31e30e39198ac19dcd27b Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 18 Dec 2020 20:27:16 +0900 Subject: [PATCH 0809/1009] [SPARK-33840][DOCS] Add spark.sql.files.minPartitionNum to performence tuning doc ### What changes were proposed in this pull request? Add `spark.sql.files.minPartitionNum` and it's description to sql-performence-tuning.md. ### Why are the changes needed? Help user to find it. ### Does this PR introduce _any_ user-facing change? Yes, it's the doc. ### How was this patch tested? Pass CI. Closes #30838 from ulysses-you/SPARK-33840. Authored-by: ulysses-you Signed-off-by: HyukjinKwon --- docs/sql-performance-tuning.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index 49b32e7562e0b..e99af41635c9d 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -85,6 +85,16 @@ that these options will be deprecated in future release as more optimizations ar 2.0.0 + + spark.sql.files.minPartitionNum + Default Parallelism + + The suggested (not guaranteed) minimum number of split file partitions. If not set, the default + value is `spark.default.parallelism`. This configuration is effective only when using file-based + sources such as Parquet, JSON and ORC. + + 3.1.0 + spark.sql.broadcastTimeout 300 From 06b1bbbbab8cab2ce77d255a3287a2aacdb2df78 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 18 Dec 2020 13:20:58 +0000 Subject: [PATCH 0810/1009] [SPARK-33798][SQL] Add new rule to push down the foldable expressions through CaseWhen/If ### What changes were proposed in this pull request? This pr add a new rule(`PushFoldableIntoBranches`) to push down the foldable expressions through `CaseWhen/If`. This is a real case from production: ```sql create table t1 using parquet as select * from range(100); create table t2 using parquet as select * from range(200); create temp view v1 as select 'a' as event_type, * from t1 union all select CASE WHEN id = 1 THEN 'b' WHEN id = 3 THEN 'c' end as event_type, * from t2 explain select * from v1 where event_type = 'a'; ``` Before this PR: ``` == Physical Plan == Union :- *(1) Project [a AS event_type#30533, id#30535L] : +- *(1) ColumnarToRow : +- FileScan parquet default.t1[id#30535L] Batched: true, DataFilters: [], Format: Parquet +- *(2) Project [CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END AS event_type#30534, id#30536L] +- *(2) Filter (CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END = a) +- *(2) ColumnarToRow +- FileScan parquet default.t2[id#30536L] Batched: true, DataFilters: [(CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END = a)], Format: Parquet ``` After this PR: ``` == Physical Plan == *(1) Project [a AS event_type#8, id#4L] +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#4L] Batched: true, DataFilters: [], Format: Parquet ``` ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30790 from wangyum/SPARK-33798. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../sql/catalyst/expressions/Expression.scala | 5 + .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../sql/catalyst/optimizer/expressions.scala | 44 +++- .../PushFoldableIntoBranchesSuite.scala | 225 ++++++++++++++++++ 4 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 1d23953484046..65f89bbdd0599 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -636,6 +636,11 @@ abstract class BinaryExpression extends Expression { } +object BinaryExpression { + def unapply(e: BinaryExpression): Option[(Expression, Expression)] = Some((e.left, e.right)) +} + + /** * A [[BinaryExpression]] that is an operator, with two properties: * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index aa8540fb44556..fdb9c5b4821dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -99,6 +99,7 @@ abstract class Optimizer(catalogManager: CatalogManager) LikeSimplification, BooleanSimplification, SimplifyConditionals, + PushFoldableIntoBranches, RemoveDispensableExpressions, SimplifyBinaryComparison, ReplaceNullWithFalseInPredicate, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 7666c4a53e5dd..e6730c9275a1e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -21,7 +21,7 @@ import scala.collection.immutable.HashSet import scala.collection.mutable.{ArrayBuffer, Stack} import org.apache.spark.sql.catalyst.analysis._ -import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, _} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull @@ -528,6 +528,48 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { } +/** + * Push the foldable expression into (if / case) branches. + */ +object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { + + // To be conservative here: it's only a guaranteed win if all but at most only one branch + // end up being not foldable. + private def atMostOneUnfoldable(exprs: Seq[Expression]): Boolean = { + val (foldables, others) = exprs.partition(_.foldable) + foldables.nonEmpty && others.length < 2 + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case q: LogicalPlan => q transformExpressionsUp { + case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right) + if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => + i.copy( + trueValue = b.makeCopy(Array(trueValue, right)), + falseValue = b.makeCopy(Array(falseValue, right))) + + case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue)) + if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => + i.copy( + trueValue = b.makeCopy(Array(left, trueValue)), + falseValue = b.makeCopy(Array(left, falseValue))) + + case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right) + if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + c.copy( + branches.map(e => e.copy(_2 = b.makeCopy(Array(e._2, right)))), + elseValue.map(e => b.makeCopy(Array(e, right)))) + + case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue)) + if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + c.copy( + branches.map(e => e.copy(_2 = b.makeCopy(Array(left, e._2)))), + elseValue.map(e => b.makeCopy(Array(left, e)))) + } + } +} + + /** * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition. * For example, when the expression is just checking to see if a string starts with a given diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala new file mode 100644 index 0000000000000..43360af46ffb3 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import java.sql.Date + +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + + +class PushFoldableIntoBranchesSuite + extends PlanTest with ExpressionEvalHelper with PredicateHelper { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("PushFoldableIntoBranches", FixedPoint(50), + BooleanSimplification, ConstantFolding, SimplifyConditionals, PushFoldableIntoBranches) :: Nil + } + + private val relation = LocalRelation('a.int, 'b.int, 'c.boolean) + private val a = EqualTo(UnresolvedAttribute("a"), Literal(100)) + private val b = UnresolvedAttribute("b") + private val c = EqualTo(UnresolvedAttribute("c"), Literal(true)) + private val ifExp = If(a, Literal(2), Literal(3)) + private val caseWhen = CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3))) + + protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { + val correctAnswer = Project(Alias(e2, "out")() :: Nil, relation).analyze + val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, relation).analyze) + comparePlans(actual, correctAnswer) + } + + test("Push down EqualTo through If") { + assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral) + assertEquivalent(EqualTo(ifExp, Literal(3)), If(a, FalseLiteral, TrueLiteral)) + + // Push down at most one not foldable expressions. + assertEquivalent( + EqualTo(If(a, b, Literal(2)), Literal(2)), + If(a, EqualTo(b, Literal(2)), TrueLiteral)) + assertEquivalent( + EqualTo(If(a, b, b + 1), Literal(2)), + EqualTo(If(a, b, b + 1), Literal(2))) + + // Push down non-deterministic expressions. + val nonDeterministic = If(LessThan(Rand(1), Literal(0.5)), Literal(1), Literal(2)) + assert(!nonDeterministic.deterministic) + assertEquivalent(EqualTo(nonDeterministic, Literal(2)), + If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, TrueLiteral)) + assertEquivalent(EqualTo(nonDeterministic, Literal(3)), + If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, FalseLiteral)) + + // Handle Null values. + assertEquivalent( + EqualTo(If(a, Literal(null, IntegerType), Literal(1)), Literal(1)), + If(a, Literal(null, BooleanType), TrueLiteral)) + assertEquivalent( + EqualTo(If(a, Literal(null, IntegerType), Literal(1)), Literal(2)), + If(a, Literal(null, BooleanType), FalseLiteral)) + assertEquivalent( + EqualTo(If(a, Literal(1), Literal(2)), Literal(null, IntegerType)), + Literal(null, BooleanType)) + assertEquivalent( + EqualTo(If(a, Literal(null, IntegerType), Literal(null, IntegerType)), Literal(1)), + Literal(null, BooleanType)) + } + + test("Push down other BinaryComparison through If") { + assertEquivalent(EqualNullSafe(ifExp, Literal(4)), FalseLiteral) + assertEquivalent(GreaterThan(ifExp, Literal(4)), FalseLiteral) + assertEquivalent(GreaterThanOrEqual(ifExp, Literal(4)), FalseLiteral) + assertEquivalent(LessThan(ifExp, Literal(4)), TrueLiteral) + assertEquivalent(LessThanOrEqual(ifExp, Literal(4)), TrueLiteral) + } + + test("Push down other BinaryOperator through If") { + assertEquivalent(Add(ifExp, Literal(4)), If(a, Literal(6), Literal(7))) + assertEquivalent(Subtract(ifExp, Literal(4)), If(a, Literal(-2), Literal(-1))) + assertEquivalent(Multiply(ifExp, Literal(4)), If(a, Literal(8), Literal(12))) + assertEquivalent(Pmod(ifExp, Literal(4)), If(a, Literal(2), Literal(3))) + assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3))) + assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)), + If(a, Literal(2.0), Literal(3.0))) + assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), + If(a, FalseLiteral, TrueLiteral)) + assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral) + } + + test("Push down other BinaryExpression through If") { + assertEquivalent(BRound(If(a, Literal(1.23), Literal(1.24)), Literal(1)), Literal(1.2)) + assertEquivalent(StartsWith(If(a, Literal("ab"), Literal("ac")), Literal("a")), TrueLiteral) + assertEquivalent(FindInSet(If(a, Literal("ab"), Literal("ac")), Literal("a")), Literal(0)) + assertEquivalent( + AddMonths(If(a, Literal(Date.valueOf("2020-01-01")), Literal(Date.valueOf("2021-01-01"))), + Literal(1)), + If(a, Literal(Date.valueOf("2020-02-01")), Literal(Date.valueOf("2021-02-01")))) + } + + test("Push down EqualTo through CaseWhen") { + assertEquivalent(EqualTo(caseWhen, Literal(4)), FalseLiteral) + assertEquivalent(EqualTo(caseWhen, Literal(3)), + CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), Some(TrueLiteral))) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None), Literal(4)), + CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None)) + + assertEquivalent( + And(EqualTo(caseWhen, Literal(5)), EqualTo(caseWhen, Literal(6))), + FalseLiteral) + + // Push down at most one branch is not foldable expressions. + assertEquivalent(EqualTo(CaseWhen(Seq((a, b), (c, Literal(1))), None), Literal(1)), + CaseWhen(Seq((a, EqualTo(b, Literal(1))), (c, TrueLiteral)), None)) + assertEquivalent(EqualTo(CaseWhen(Seq((a, b), (c, b + 1)), None), Literal(1)), + EqualTo(CaseWhen(Seq((a, b), (c, b + 1)), None), Literal(1))) + assertEquivalent(EqualTo(CaseWhen(Seq((a, b)), None), Literal(1)), + EqualTo(CaseWhen(Seq((a, b)), None), Literal(1))) + + // Push down non-deterministic expressions. + val nonDeterministic = + CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal(1))), Some(Literal(2))) + assert(!nonDeterministic.deterministic) + assertEquivalent(EqualTo(nonDeterministic, Literal(2)), + CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(TrueLiteral))) + assertEquivalent(EqualTo(nonDeterministic, Literal(3)), + CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(FalseLiteral))) + + // Handle Null values. + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(1))), Literal(2)), + CaseWhen(Seq((a, Literal(null, BooleanType))), Some(FalseLiteral))) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(1))), Some(Literal(2))), Literal(null, IntegerType)), + Literal(null, BooleanType)) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(1))), Literal(1)), + CaseWhen(Seq((a, Literal(null, BooleanType))), Some(TrueLiteral))) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(null, IntegerType))), + Literal(1)), + Literal(null, BooleanType)) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(null, IntegerType))), + Literal(null, IntegerType)), + Literal(null, BooleanType)) + } + + test("Push down other BinaryComparison through CaseWhen") { + assertEquivalent(EqualNullSafe(caseWhen, Literal(4)), FalseLiteral) + assertEquivalent(GreaterThan(caseWhen, Literal(4)), FalseLiteral) + assertEquivalent(GreaterThanOrEqual(caseWhen, Literal(4)), FalseLiteral) + assertEquivalent(LessThan(caseWhen, Literal(4)), TrueLiteral) + assertEquivalent(LessThanOrEqual(caseWhen, Literal(4)), TrueLiteral) + } + + test("Push down other BinaryOperator through CaseWhen") { + assertEquivalent(Add(caseWhen, Literal(4)), + CaseWhen(Seq((a, Literal(5)), (c, Literal(6))), Some(Literal(7)))) + assertEquivalent(Subtract(caseWhen, Literal(4)), + CaseWhen(Seq((a, Literal(-3)), (c, Literal(-2))), Some(Literal(-1)))) + assertEquivalent(Multiply(caseWhen, Literal(4)), + CaseWhen(Seq((a, Literal(4)), (c, Literal(8))), Some(Literal(12)))) + assertEquivalent(Pmod(caseWhen, Literal(4)), + CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3)))) + assertEquivalent(Remainder(caseWhen, Literal(4)), + CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3)))) + assertEquivalent(Divide(CaseWhen(Seq((a, Literal(1.0)), (c, Literal(2.0))), Some(Literal(3.0))), + Literal(1.0)), + CaseWhen(Seq((a, Literal(1.0)), (c, Literal(2.0))), Some(Literal(3.0)))) + assertEquivalent(And(CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral)), + TrueLiteral), + CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral))) + assertEquivalent(Or(CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral)), + TrueLiteral), TrueLiteral) + } + + test("Push down other BinaryExpression through CaseWhen") { + assertEquivalent( + BRound(CaseWhen(Seq((a, Literal(1.23)), (c, Literal(1.24))), Some(Literal(1.25))), + Literal(1)), + Literal(1.2)) + assertEquivalent( + StartsWith(CaseWhen(Seq((a, Literal("ab")), (c, Literal("ac"))), Some(Literal("ad"))), + Literal("a")), + TrueLiteral) + assertEquivalent( + FindInSet(CaseWhen(Seq((a, Literal("ab")), (c, Literal("ac"))), Some(Literal("ad"))), + Literal("a")), + Literal(0)) + assertEquivalent( + AddMonths(CaseWhen(Seq((a, Literal(Date.valueOf("2020-01-01"))), + (c, Literal(Date.valueOf("2021-01-01")))), + Some(Literal(Date.valueOf("2022-01-01")))), + Literal(1)), + CaseWhen(Seq((a, Literal(Date.valueOf("2020-02-01"))), + (c, Literal(Date.valueOf("2021-02-01")))), + Some(Literal(Date.valueOf("2022-02-01"))))) + } + + test("Push down BinaryExpression through If/CaseWhen backwards") { + assertEquivalent(EqualTo(Literal(4), ifExp), FalseLiteral) + assertEquivalent(EqualTo(Literal(4), caseWhen), FalseLiteral) + } +} From f23912880269723f02eadc2af4b2816c957c2357 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 18 Dec 2020 13:47:31 +0000 Subject: [PATCH 0811/1009] [SPARK-33597][SQL] Support REGEXP_LIKE for consistent with mainstream databases ### What changes were proposed in this pull request? There are a lot of mainstream databases support regex function `REGEXP_LIKE`. Currently, Spark supports `RLike` and we just need add a new alias `REGEXP_LIKE` for it. **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Pattern-matching-Conditions.html#GUID-D2124F3A-C6E4-4CCA-A40E-2FFCABFD8E19 **Presto** https://prestodb.io/docs/current/functions/regexp.html **Vertica** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/RegularExpressions/REGEXP_LIKE.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CRegular%20Expression%20Functions%7C_____5 **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/regexp_like.html **Additional modifications** 1. Because test case named `check outputs of expression examples` in ExpressionInfoSuite executes the example SQL of built-in function, so the below SQL be executed: `SELECT '%SystemDrive%\Users\John' regexp_like '%SystemDrive%\\Users.*'` But Spark SQL not supports this syntax yet. 2. Another reason: `SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';` is an SQL syntax, not the usecase for function `RLike`. As the above reason, this PR changes the example SQL of `RLike`. ### Why are the changes needed? No ### Does this PR introduce _any_ user-facing change? Make the behavior of Spark SQL consistent with mainstream databases. ### How was this patch tested? Jenkins test Closes #30543 from beliefer/SPARK-33597. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/regexpExpressions.scala | 13 +++++++------ .../sql-functions/sql-expression-schema.md | 5 +++-- .../sql-tests/inputs/regexp-functions.sql | 6 +++++- .../sql-tests/results/regexp-functions.sql.out | 18 +++++++++++++++++- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3b46de539ce3d..4e2f01ac2db93 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -356,6 +356,7 @@ object FunctionRegistry { expression[RegExpExtract]("regexp_extract"), expression[RegExpExtractAll]("regexp_extract_all"), expression[RegExpReplace]("regexp_replace"), + expression[RLike]("regexp_like", true), expression[StringRepeat]("repeat"), expression[StringReplace]("replace"), expression[Overlay]("overlay"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 28c9aefb42837..3a421f5075a6f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -68,8 +68,6 @@ abstract class StringRegexExpression extends BinaryExpression matches(regex, input1.asInstanceOf[UTF8String].toString) } } - - override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}" } // scalastyle:off line.contains.tab @@ -134,6 +132,8 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) case c => s"$left LIKE $right ESCAPE '$c'" } + override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}" + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val patternClass = classOf[Pattern].getName val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex" @@ -330,7 +330,7 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like // scalastyle:off line.contains.tab @ExpressionDescription( - usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.", + usage = "_FUNC_(str, regexp) - Returns true if `str` matches `regexp`, or false otherwise.", arguments = """ Arguments: * str - a string expression @@ -348,11 +348,11 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like Examples: > SET spark.sql.parser.escapedStringLiterals=true; spark.sql.parser.escapedStringLiterals true - > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'; + > SELECT _FUNC_('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*'); true > SET spark.sql.parser.escapedStringLiterals=false; spark.sql.parser.escapedStringLiterals false - > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '%SystemDrive%\\\\Users.*'; + > SELECT _FUNC_('%SystemDrive%\\Users\\John', '%SystemDrive%\\\\Users.*'); true """, note = """ @@ -364,7 +364,8 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress override def escape(v: String): String = v override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0) - override def toString: String = s"$left RLIKE $right" + override def toString: String = s"RLIKE($left, $right)" + override def sql: String = s"${prettyName.toUpperCase(Locale.ROOT)}(${left.sql}, ${right.sql})" override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val patternClass = classOf[Pattern].getName diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index a6d041a588a6d..c681730569978 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 347 + - Number of queries: 348 - Number of expressions that missing example: 13 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions @@ -211,7 +211,8 @@ | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | -| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | +| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | +| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql index 3f3eaaae9ee4e..12b34ff7d54b1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql @@ -42,4 +42,8 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8); SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26); SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27); SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30); -SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null); \ No newline at end of file +SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null); + +-- regexp_like +SELECT regexp_like('1a 2b 14m', '\\d+b'); +SELECT regexp_like('1a 2b 14m', '[a-z]+b'); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out index f2a4131818bfb..60b3e7dbb74f1 100644 --- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 40 +-- Number of queries: 42 -- !query @@ -334,3 +334,19 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null) struct -- !query output NULL + + +-- !query +SELECT regexp_like('1a 2b 14m', '\\d+b') +-- !query schema +struct +-- !query output +true + + +-- !query +SELECT regexp_like('1a 2b 14m', '[a-z]+b') +-- !query schema +struct +-- !query output +false \ No newline at end of file From 6dca2e5d35c0b1604d0264250872b87bd0b832f6 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Fri, 18 Dec 2020 14:12:35 +0000 Subject: [PATCH 0812/1009] [SPARK-33599][SQL] Group exception messages in catalyst/analysis ### What changes were proposed in this pull request? This PR group exception messages in `/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis`. ### Why are the changes needed? It will largely help with standardization of error messages and its maintenance. ### Does this PR introduce _any_ user-facing change? No. Error messages remain unchanged. ### How was this patch tested? No new tests - pass all original tests to make sure it doesn't break any existing behavior. Closes #30717 from beliefer/SPARK-33599. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 187 +++++++++++++++++- .../spark/sql/QueryExecutionErrors.scala | 59 ++++++ .../sql/catalyst/analysis/Analyzer.scala | 24 +-- .../sql/catalyst/analysis/CheckAnalysis.scala | 22 +-- .../catalyst/analysis/FunctionRegistry.scala | 20 +- .../catalyst/analysis/ResolveCatalogs.scala | 5 +- .../sql/catalyst/analysis/ResolveHints.scala | 14 +- .../sql/catalyst/analysis/unresolved.scala | 16 +- .../analysis/ResolveSessionCatalog.scala | 79 +++----- 9 files changed, 310 insertions(+), 116 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index 87387b18dbab4..3ef17ab7aed0a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -19,20 +19,19 @@ package org.apache.spark.sql.errors import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.ResolvedView -import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedView} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SerdeInfo} import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.connector.catalog.{TableChange, V1Table} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ -import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType} /** * Object for grouping all error messages of the query compilation. - * Currently it includes all AnalysisExcpetions created and thrown directly in - * org.apache.spark.sql.catalyst.analysis.Analyzer. + * Currently it includes all [[AnalysisException]]s. */ object QueryCompilationErrors { @@ -185,6 +184,11 @@ object QueryCompilationErrors { "did not appear in any aggregate function.") } + def writeIntoTempViewNotAllowedError(quoted: String): Throwable = { + new AnalysisException("Cannot write into temp view " + + s"$quoted as it's not a data source v2 relation.") + } + def expectTableNotTempViewError(quoted: String, cmd: String, t: TreeNode[_]): Throwable = { new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table", t.origin.line, t.origin.startPosition) @@ -196,6 +200,11 @@ object QueryCompilationErrors { t.origin.line, t.origin.startPosition) } + def readNonStreamingTempViewError(quoted: String): Throwable = { + new AnalysisException(s"$quoted is not a temp view of streaming " + + "logical plan, please use batch API such as `DataFrameReader.table` to read it.") + } + def viewDepthExceedsMaxResolutionDepthError( identifier: TableIdentifier, maxNestedViewDepth: Int, t: TreeNode[_]): Throwable = { new AnalysisException(s"The depth of view $identifier exceeds the maximum " + @@ -225,6 +234,11 @@ object QueryCompilationErrors { t.origin.line, t.origin.startPosition) } + def permanentViewNotSupportedByStreamingReadingAPIError(quoted: String): Throwable = { + new AnalysisException(s"$quoted is a permanent view, which is not supported by " + + "streaming reading API such as `DataStreamReader.table` yet.") + } + def starNotAllowedWhenGroupByOrdinalPositionUsedError(): Throwable = { new AnalysisException( "Star (*) is not allowed in select list when GROUP BY ordinal position is used") @@ -326,4 +340,165 @@ object QueryCompilationErrors { "of rows, therefore they are currently not supported.", t.origin.line, t.origin.startPosition) } + def viewOutputNumberMismatchQueryColumnNamesError( + output: Seq[Attribute], queryColumnNames: Seq[String]): Throwable = { + new AnalysisException( + s"The view output ${output.mkString("[", ",", "]")} doesn't have the same" + + "number of columns with the query column names " + + s"${queryColumnNames.mkString("[", ",", "]")}") + } + + def attributeNotFoundError(colName: String, child: LogicalPlan): Throwable = { + new AnalysisException( + s"Attribute with name '$colName' is not found in " + + s"'${child.output.map(_.name).mkString("(", ",", ")")}'") + } + + def cannotUpCastAsAttributeError( + fromAttr: Attribute, toAttr: Attribute): Throwable = { + new AnalysisException(s"Cannot up cast ${fromAttr.sql} from " + + s"${fromAttr.dataType.catalogString} to ${toAttr.dataType.catalogString} " + + "as it may truncate") + } + + def functionUndefinedError(name: FunctionIdentifier): Throwable = { + new AnalysisException(s"undefined function $name") + } + + def invalidFunctionArgumentNumberError( + validParametersCount: Seq[Int], name: String, params: Seq[Class[Expression]]): Throwable = { + val invalidArgumentsMsg = if (validParametersCount.length == 0) { + s"Invalid arguments for function $name" + } else { + val expectedNumberOfParameters = if (validParametersCount.length == 1) { + validParametersCount.head.toString + } else { + validParametersCount.init.mkString("one of ", ", ", " and ") + + validParametersCount.last + } + s"Invalid number of arguments for function $name. " + + s"Expected: $expectedNumberOfParameters; Found: ${params.length}" + } + new AnalysisException(invalidArgumentsMsg) + } + + def functionAcceptsOnlyOneArgumentError(name: String): Throwable = { + new AnalysisException(s"Function $name accepts only one argument") + } + + def alterV2TableSetLocationWithPartitionNotSupportedError(): Throwable = { + new AnalysisException("ALTER TABLE SET LOCATION does not support partition for v2 tables.") + } + + def joinStrategyHintParameterNotSupportedError(unsupported: Any): Throwable = { + new AnalysisException("Join strategy hint parameter " + + s"should be an identifier or string but was $unsupported (${unsupported.getClass}") + } + + def invalidHintParameterError( + hintName: String, invalidParams: Seq[Any]): Throwable = { + new AnalysisException(s"$hintName Hint parameter should include columns, but " + + s"${invalidParams.mkString(", ")} found") + } + + def invalidCoalesceHintParameterError(hintName: String): Throwable = { + new AnalysisException(s"$hintName Hint expects a partition number as a parameter") + } + + def attributeNameSyntaxError(name: String): Throwable = { + new AnalysisException(s"syntax error in attribute name: $name") + } + + def starExpandDataTypeNotSupportedError(attributes: Seq[String]): Throwable = { + new AnalysisException(s"Can only star expand struct data types. Attribute: `$attributes`") + } + + def cannotResolveStarExpandGivenInputColumnsError( + targetString: String, columns: String): Throwable = { + new AnalysisException(s"cannot resolve '$targetString.*' given input columns '$columns'") + } + + def addColumnWithV1TableCannotSpecifyNotNullError(): Throwable = { + new AnalysisException("ADD COLUMN with v1 tables cannot specify NOT NULL.") + } + + def replaceColumnsOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.") + } + + def alterQualifiedColumnOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("ALTER COLUMN with qualified column is only supported with v2 tables.") + } + + def alterColumnWithV1TableCannotSpecifyNotNullError(): Throwable = { + new AnalysisException("ALTER COLUMN with v1 tables cannot specify NOT NULL.") + } + + def alterOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.") + } + + def alterColumnCannotFindColumnInV1TableError(colName: String, v1Table: V1Table): Throwable = { + new AnalysisException( + s"ALTER COLUMN cannot find column $colName in v1 table. " + + s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") + } + + def renameColumnOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("RENAME COLUMN is only supported with v2 tables.") + } + + def dropColumnOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("DROP COLUMN is only supported with v2 tables.") + } + + def invalidDatabaseNameError(quoted: String): Throwable = { + new AnalysisException(s"The database name is not valid: $quoted") + } + + def replaceTableOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("REPLACE TABLE is only supported with v2 tables.") + } + + def replaceTableAsSelectOnlySupportedWithV2TableError(): Throwable = { + new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.") + } + + def cannotDropViewWithDropTableError(): Throwable = { + new AnalysisException("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead") + } + + def showColumnsWithConflictDatabasesError( + db: Seq[String], v1TableName: TableIdentifier): Throwable = { + new AnalysisException("SHOW COLUMNS with conflicting databases: " + + s"'${db.head}' != '${v1TableName.database.get}'") + } + + def externalCatalogNotSupportShowViewsError(resolved: ResolvedNamespace): Throwable = { + new AnalysisException(s"Catalog ${resolved.catalog.name} doesn't support " + + "SHOW VIEWS, only SessionCatalog supports this command.") + } + + def unsupportedFunctionNameError(quoted: String): Throwable = { + new AnalysisException(s"Unsupported function name '$quoted'") + } + + def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = { + new AnalysisException(s"$sql is only supported with v1 tables.") + } + + def cannotCreateTableWithBothProviderAndSerdeError( + provider: Option[String], maybeSerdeInfo: Option[SerdeInfo]): Throwable = { + new AnalysisException( + s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}") + } + + def invalidFileFormatForStoredAsError(serdeInfo: SerdeInfo): Throwable = { + new AnalysisException( + s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.") + } + + def commandNotSupportNestedColumnError(command: String, quoted: String): Throwable = { + new AnalysisException(s"$command does not support nested column: $quoted") + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala new file mode 100644 index 0000000000000..65d280ab10037 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.errors + +import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} + +/** + * Object for grouping all error messages of the query runtime. + * Currently it includes all [[SparkException]]s and RuntimeExceptions(e.g. + * UnsupportedOperationException, IllegalStateException). + */ +object QueryExecutionErrors { + + def columnChangeUnsupportedError(): Throwable = { + new UnsupportedOperationException("Please add an implementation for a column change here") + } + + def unexpectedPlanReturnError(plan: LogicalPlan, methodName: String): Throwable = { + new IllegalStateException(s"[BUG] unexpected plan returned by `$methodName`: $plan") + } + + def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = { + new IllegalStateException( + "Internal error: logical hint operator should have been removed during analysis") + } + + def logicalPlanHaveOutputOfCharOrVarcharError(leaf: LeafNode): Throwable = { + new IllegalStateException( + s"[BUG] logical plan should not have output of char/varchar type: $leaf") + } + + def cannotEvaluateGeneratorError(generator: UnresolvedGenerator): Throwable = { + new UnsupportedOperationException(s"Cannot evaluate expression: $generator") + } + + def cannotGenerateCodeForGeneratorError(generator: UnresolvedGenerator): Throwable = { + new UnsupportedOperationException(s"Cannot generate code for expression: $generator") + } + + def cannotTerminateGeneratorError(generator: UnresolvedGenerator): Throwable = { + new UnsupportedOperationException(s"Cannot terminate expression: $generator") + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 10c8ac58840f2..1a5f33443d8e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} -import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy} @@ -885,8 +885,7 @@ class Analyzer(override val catalogManager: CatalogManager) case UnresolvedRelation(ident, _, false) => lookupTempView(ident).map(EliminateSubqueryAliases(_)).map { case r: DataSourceV2Relation => write.withNewTable(r) - case _ => throw new AnalysisException("Cannot write into temp view " + - s"${ident.quoted} as it's not a data source v2 relation.") + case _ => throw QueryCompilationErrors.writeIntoTempViewNotAllowedError(ident.quoted) }.getOrElse(write) case _ => write } @@ -927,8 +926,7 @@ class Analyzer(override val catalogManager: CatalogManager) } if (isStreaming && tmpView.nonEmpty && !tmpView.get.isStreaming) { - throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " + - s"logical plan, please use batch API such as `DataFrameReader.table` to read it.") + throw QueryCompilationErrors.readNonStreamingTempViewError(identifier.quoted) } tmpView.map(ResolveRelations.resolveViews) } @@ -1020,8 +1018,8 @@ class Analyzer(override val catalogManager: CatalogManager) case u: UnresolvedRelation if !u.isStreaming => lookupV2Relation(u.multipartIdentifier, u.options, false).map { case r: DataSourceV2Relation => write.withNewTable(r) - case other => throw new IllegalStateException( - "[BUG] unexpected plan returned by `lookupV2Relation`: " + other) + case other => + throw QueryExecutionErrors.unexpectedPlanReturnError(other, "lookupV2Relation") }.getOrElse(write) case _ => write } @@ -1132,8 +1130,8 @@ class Analyzer(override val catalogManager: CatalogManager) throw QueryCompilationErrors.writeIntoV1TableNotAllowedError( u.tableMeta.identifier, write) case r: DataSourceV2Relation => write.withNewTable(r) - case other => throw new IllegalStateException( - "[BUG] unexpected plan returned by `lookupRelation`: " + other) + case other => + throw QueryExecutionErrors.unexpectedPlanReturnError(other, "lookupRelation") }.getOrElse(write) case _ => write } @@ -1187,9 +1185,8 @@ class Analyzer(override val catalogManager: CatalogManager) case v1Table: V1Table => if (isStreaming) { if (v1Table.v1Table.tableType == CatalogTableType.VIEW) { - throw new AnalysisException(s"${identifier.quoted} is a permanent view, " + - "which is not supported by streaming reading API such as " + - "`DataStreamReader.table` yet.") + throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError( + identifier.quoted) } SubqueryAlias( catalog.name +: ident.asMultipartIdentifier, @@ -3546,8 +3543,7 @@ class Analyzer(override val catalogManager: CatalogManager) case column: ColumnChange => // This is informational for future developers - throw new UnsupportedOperationException( - "Please add an implementation for a column change here") + throw QueryExecutionErrors.columnChangeUnsupportedError case other => Some(other) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c5a63546c01e3..c0cdcdf2d9577 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table} import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -95,8 +96,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case p if p.analyzed => // Skip already analyzed sub-plans case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) => - throw new IllegalStateException( - "[BUG] logical plan should not have output of char/varchar type: " + leaf) + throw QueryExecutionErrors.logicalPlanHaveOutputOfCharOrVarcharError(leaf) case u: UnresolvedNamespace => u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}") @@ -428,18 +428,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { if (output.length != queryColumnNames.length) { // If the view output doesn't have the same number of columns with the query column // names, throw an AnalysisException. - throw new AnalysisException( - s"The view output ${output.mkString("[", ",", "]")} doesn't have the same" + - "number of columns with the query column names " + - s"${queryColumnNames.mkString("[", ",", "]")}") + throw QueryCompilationErrors.viewOutputNumberMismatchQueryColumnNamesError( + output, queryColumnNames) } val resolver = SQLConf.get.resolver queryColumnNames.map { colName => child.output.find { attr => resolver(attr.name, colName) - }.getOrElse(throw new AnalysisException( - s"Attribute with name '$colName' is not found in " + - s"'${child.output.map(_.name).mkString("(", ",", ")")}'")) + }.getOrElse(throw QueryCompilationErrors.attributeNotFoundError(colName, child)) } } else { child.output @@ -451,9 +447,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { // output, so we should cast the attribute to the dataType of the view output // attribute. Will throw an AnalysisException if the cast is not a up-cast. if (!Cast.canUpCast(originAttr.dataType, attr.dataType)) { - throw new AnalysisException(s"Cannot up cast ${originAttr.sql} from " + - s"${originAttr.dataType.catalogString} to ${attr.dataType.catalogString} " + - "as it may truncate\n") + throw QueryCompilationErrors.cannotUpCastAsAttributeError( + originAttr, attr) } case _ => } @@ -671,8 +666,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { """.stripMargin) case _: UnresolvedHint => - throw new IllegalStateException( - "Internal error: logical hint operator should have been removed during analysis") + throw QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError case f @ Filter(condition, _) if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 4e2f01ac2db93..1a1b619336d54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.xml._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types._ @@ -115,7 +116,7 @@ class SimpleFunctionRegistry extends FunctionRegistry with Logging { override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = { val func = synchronized { functionBuilders.get(normalizeFuncName(name)).map(_._2).getOrElse { - throw new AnalysisException(s"undefined function $name") + throw QueryCompilationErrors.functionUndefinedError(name) } } func(children) @@ -623,19 +624,8 @@ object FunctionRegistry { val validParametersCount = constructors .filter(_.getParameterTypes.forall(_ == classOf[Expression])) .map(_.getParameterCount).distinct.sorted - val invalidArgumentsMsg = if (validParametersCount.length == 0) { - s"Invalid arguments for function $name" - } else { - val expectedNumberOfParameters = if (validParametersCount.length == 1) { - validParametersCount.head.toString - } else { - validParametersCount.init.mkString("one of ", ", ", " and ") + - validParametersCount.last - } - s"Invalid number of arguments for function $name. " + - s"Expected: $expectedNumberOfParameters; Found: ${params.length}" - } - throw new AnalysisException(invalidArgumentsMsg) + throw QueryCompilationErrors.invalidFunctionArgumentNumberError( + validParametersCount, name, params) } try { val exp = f.newInstance(expressions : _*).asInstanceOf[Expression] @@ -663,7 +653,7 @@ object FunctionRegistry { dataType: DataType): (String, (ExpressionInfo, FunctionBuilder)) = { val builder = (args: Seq[Expression]) => { if (args.size != 1) { - throw new AnalysisException(s"Function $name accepts only one argument") + throw QueryCompilationErrors.functionAcceptsOnlyOneArgumentError(name) } Cast(args.head, dataType) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 14dccd86d2240..0249711101899 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -17,10 +17,10 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableCatalog, TableChange} +import org.apache.spark.sql.errors.QueryCompilationErrors /** * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements @@ -115,8 +115,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case AlterTableSetLocationStatement( nameParts @ NonSessionCatalogAndTable(catalog, tbl), partitionSpec, newLoc) => if (partitionSpec.nonEmpty) { - throw new AnalysisException( - "ALTER TABLE SET LOCATION does not support partition for v2 tables.") + throw QueryCompilationErrors.alterV2TableSetLocationWithPartitionNotSupportedError } val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc)) createAlterTable(nameParts, catalog, tbl, changes) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index b44ca20e74bb0..ab7a59d4588ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -21,11 +21,11 @@ import java.util.Locale import scala.collection.mutable -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Ascending, Expression, IntegerLiteral, SortOrder} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf @@ -153,8 +153,8 @@ object ResolveHints { val relationNamesInHint = h.parameters.map { case tableName: String => UnresolvedAttribute.parseAttributeName(tableName) case tableId: UnresolvedAttribute => tableId.nameParts - case unsupported => throw new AnalysisException("Join strategy hint parameter " + - s"should be an identifier or string but was $unsupported (${unsupported.getClass}") + case unsupported => + throw QueryCompilationErrors.joinStrategyHintParameterNotSupportedError(unsupported) }.toSet val relationsInHintWithMatch = new mutable.HashSet[Seq[String]] val applied = applyJoinStrategyHint( @@ -193,8 +193,7 @@ object ResolveHints { """.stripMargin) val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute]) if (invalidParams.nonEmpty) { - throw new AnalysisException(s"$hintName Hint parameter should include columns, but " + - s"${invalidParams.mkString(", ")} found") + throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams) } RepartitionByExpression( partitionExprs.map(_.asInstanceOf[Expression]), hint.child, numPartitions) @@ -207,7 +206,7 @@ object ResolveHints { Repartition(numPartitions, shuffle, hint.child) // The "COALESCE" hint (shuffle = false) must have a partition number only case _ if !shuffle => - throw new AnalysisException(s"$hintName Hint expects a partition number as a parameter") + throw QueryCompilationErrors.invalidCoalesceHintParameterError(hintName) case param @ Seq(IntegerLiteral(numPartitions), _*) if shuffle => createRepartitionByExpression(Some(numPartitions), param.tail) @@ -229,8 +228,7 @@ object ResolveHints { numPartitions: Option[Int], partitionExprs: Seq[Any]): RepartitionByExpression = { val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute]) if (invalidParams.nonEmpty) { - throw new AnalysisException(s"$hintName Hint parameter should include columns, but " + - s"${invalidParams.mkString(", ")} found") + throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams) } val sortOrder = partitionExprs.map { case expr: SortOrder => expr diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index efc9e971df72a..8a73208d42e20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions._ @@ -27,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Unary import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.{DataType, Metadata, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -193,7 +193,7 @@ object UnresolvedAttribute { * Escape character is not supported now, so we can't use backtick inside name part. */ def parseAttributeName(name: String): Seq[String] = { - def e = new AnalysisException(s"syntax error in attribute name: $name") + def e = QueryCompilationErrors.attributeNameSyntaxError(name) val nameParts = scala.collection.mutable.ArrayBuffer.empty[String] val tmp = scala.collection.mutable.ArrayBuffer.empty[Char] var inBacktick = false @@ -245,13 +245,13 @@ case class UnresolvedGenerator(name: FunctionIdentifier, children: Seq[Expressio override def toString: String = s"'$name(${children.mkString(", ")})" override def eval(input: InternalRow = null): TraversableOnce[InternalRow] = - throw new UnsupportedOperationException(s"Cannot evaluate expression: $this") + throw QueryExecutionErrors.cannotEvaluateGeneratorError(this) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = - throw new UnsupportedOperationException(s"Cannot generate code for expression: $this") + throw QueryExecutionErrors.cannotGenerateCodeForGeneratorError(this) override def terminate(): TraversableOnce[InternalRow] = - throw new UnsupportedOperationException(s"Cannot terminate expression: $this") + throw QueryExecutionErrors.cannotTerminateGeneratorError(this) } case class UnresolvedFunction( @@ -358,13 +358,13 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu } case _ => - throw new AnalysisException("Can only star expand struct data types. Attribute: `" + - target.get + "`") + throw QueryCompilationErrors.starExpandDataTypeNotSupportedError(target.get) } } else { val from = input.inputSet.map(_.name).mkString(", ") val targetString = target.get.mkString(".") - throw new AnalysisException(s"cannot resolve '$targetString.*' given input columns '$from'") + throw QueryCompilationErrors.cannotResolveStarExpandGivenInputColumnsError( + targetString, from) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 1426d28cbbf88..723647a4a9207 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -17,13 +17,14 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.{AnalysisException, SaveMode} +import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 @@ -54,8 +55,7 @@ class ResolveSessionCatalog( cols.foreach { c => assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand") if (!c.nullable) { - throw new AnalysisException( - "ADD COLUMN with v1 tables cannot specify NOT NULL.") + throw QueryCompilationErrors.addColumnWithV1TableCannotSpecifyNotNullError } } AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField)) @@ -76,7 +76,7 @@ class ResolveSessionCatalog( cols.foreach(c => failNullType(c.dataType)) val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match { case Some(_: V1Table) => - throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.") + throw QueryCompilationErrors.replaceColumnsOnlySupportedWithV2TableError case Some(table) => // REPLACE COLUMNS deletes all the existing columns and adds new columns specified. val deleteChanges = table.schema.fieldNames.map { name => @@ -101,16 +101,13 @@ class ResolveSessionCatalog( loadTable(catalog, tbl.asIdentifier).collect { case v1Table: V1Table => if (a.column.length > 1) { - throw new AnalysisException( - "ALTER COLUMN with qualified column is only supported with v2 tables.") + throw QueryCompilationErrors.alterQualifiedColumnOnlySupportedWithV2TableError } if (a.nullable.isDefined) { - throw new AnalysisException( - "ALTER COLUMN with v1 tables cannot specify NOT NULL.") + throw QueryCompilationErrors.alterColumnWithV1TableCannotSpecifyNotNullError } if (a.position.isDefined) { - throw new AnalysisException("" + - "ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.") + throw QueryCompilationErrors.alterOnlySupportedWithV2TableError } val builder = new MetadataBuilder // Add comment to metadata @@ -120,9 +117,8 @@ class ResolveSessionCatalog( v1Table.schema.findNestedField(Seq(colName), resolver = conf.resolver) .map(_._2.dataType) .getOrElse { - throw new AnalysisException( - s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " + - s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") + throw QueryCompilationErrors.alterColumnCannotFindColumnInV1TableError( + quoteIfNeeded(colName), v1Table) } } val newColumn = StructField( @@ -156,7 +152,7 @@ class ResolveSessionCatalog( nameParts @ SessionCatalogAndTable(catalog, tbl), col, newName) => loadTable(catalog, tbl.asIdentifier).collect { case v1Table: V1Table => - throw new AnalysisException("RENAME COLUMN is only supported with v2 tables.") + throw QueryCompilationErrors.renameColumnOnlySupportedWithV2TableError }.getOrElse { val changes = Seq(TableChange.renameColumn(col.toArray, newName)) createAlterTable(nameParts, catalog, tbl, changes) @@ -166,7 +162,7 @@ class ResolveSessionCatalog( nameParts @ SessionCatalogAndTable(catalog, tbl), cols) => loadTable(catalog, tbl.asIdentifier).collect { case v1Table: V1Table => - throw new AnalysisException("DROP COLUMN is only supported with v2 tables.") + throw QueryCompilationErrors.dropColumnOnlySupportedWithV2TableError }.getOrElse { val changes = cols.map(col => TableChange.deleteColumn(col.toArray)) createAlterTable(nameParts, catalog, tbl, changes) @@ -202,8 +198,7 @@ class ResolveSessionCatalog( AlterTableSetLocationCommand(tbl.asTableIdentifier, partitionSpec, newLoc) }.getOrElse { if (partitionSpec.nonEmpty) { - throw new AnalysisException( - "ALTER TABLE SET LOCATION does not support partition for v2 tables.") + throw QueryCompilationErrors.alterV2TableSetLocationWithPartitionNotSupportedError } val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc)) createAlterTable(nameParts, catalog, tbl, changes) @@ -217,22 +212,19 @@ class ResolveSessionCatalog( case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) => if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } DescribeDatabaseCommand(ns.head, d.extended) case AlterNamespaceSetProperties(SessionCatalogAndNamespace(_, ns), properties) => if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } AlterDatabasePropertiesCommand(ns.head, properties) case AlterNamespaceSetLocation(SessionCatalogAndNamespace(_, ns), location) => if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } AlterDatabaseSetLocationCommand(ns.head, location) @@ -308,7 +300,7 @@ class ResolveSessionCatalog( assertNoNullTypeInSchema(c.tableSchema) val provider = c.provider.getOrElse(conf.defaultDataSourceName) if (!isV2Provider(provider)) { - throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.") + throw QueryCompilationErrors.replaceTableOnlySupportedWithV2TableError } else { ReplaceTable( catalog.asTableCatalog, @@ -327,7 +319,7 @@ class ResolveSessionCatalog( } val provider = c.provider.getOrElse(conf.defaultDataSourceName) if (!isV2Provider(provider)) { - throw new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.") + throw QueryCompilationErrors.replaceTableAsSelectOnlySupportedWithV2TableError } else { ReplaceTableAsSelect( catalog.asTableCatalog, @@ -346,8 +338,7 @@ class ResolveSessionCatalog( // v1 DROP TABLE supports temp view. case DropTable(r: ResolvedView, ifExists, purge) => if (!r.isTemp) { - throw new AnalysisException( - "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead") + throw QueryCompilationErrors.cannotDropViewWithDropTableError } DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge) @@ -357,8 +348,7 @@ class ResolveSessionCatalog( case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _) if isSessionCatalog(catalog) => if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } val comment = c.properties.get(SupportsNamespaces.PROP_COMMENT) @@ -368,16 +358,14 @@ class ResolveSessionCatalog( case d @ DropNamespace(SessionCatalogAndNamespace(_, ns), _, _) => if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } DropDatabaseCommand(ns.head, d.ifExists, d.cascade) case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) => assert(ns.nonEmpty) if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } ShowTablesCommand(Some(ns.head), pattern) @@ -387,8 +375,7 @@ class ResolveSessionCatalog( partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) => assert(ns.nonEmpty) if (ns.length != 1) { - throw new AnalysisException( - s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } ShowTablesCommand( databaseName = Some(ns.head), @@ -442,9 +429,7 @@ class ResolveSessionCatalog( val resolver = conf.resolver val db = ns match { case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) => - throw new AnalysisException( - "SHOW COLUMNS with conflicting databases: " + - s"'${db.head}' != '${v1TableName.database.get}'") + throw QueryCompilationErrors.showColumnsWithConflictDatabasesError(db, v1TableName) case _ => ns.map(_.head) } ShowColumnsCommand(db, v1TableName) @@ -520,12 +505,11 @@ class ResolveSessionCatalog( // Fallback to v1 ShowViewsCommand since there is no view API in v2 catalog assert(ns.nonEmpty) if (ns.length != 1) { - throw new AnalysisException(s"The database name is not valid: ${ns.quoted}") + throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) } ShowViewsCommand(ns.head, pattern) case _ => - throw new AnalysisException(s"Catalog ${resolved.catalog.name} doesn't support " + - "SHOW VIEWS, only SessionCatalog supports this command.") + throw QueryCompilationErrors.externalCatalogNotSupportShowViewsError(resolved) } case ShowTableProperties(ResolvedV1TableOrViewIdentifier(ident), propertyKey) => @@ -551,7 +535,7 @@ class ResolveSessionCatalog( if (isTemp) { // temp func doesn't belong to any catalog and we shouldn't resolve catalog in the name. val database = if (nameParts.length > 2) { - throw new AnalysisException(s"Unsupported function name '${nameParts.quoted}'") + throw QueryCompilationErrors.unsupportedFunctionNameError(nameParts.quoted) } else if (nameParts.length == 2) { Some(nameParts.head) } else { @@ -580,7 +564,7 @@ class ResolveSessionCatalog( private def parseV1Table(tableName: Seq[String], sql: String): Seq[String] = tableName match { case SessionCatalogAndTable(_, tbl) => tbl - case _ => throw new AnalysisException(s"$sql is only supported with v1 tables.") + case _ => throw QueryCompilationErrors.sqlOnlySupportedWithV1TablesError(sql) } private def getStorageFormatAndProvider( @@ -599,8 +583,8 @@ class ResolveSessionCatalog( if (provider.isDefined) { // The parser guarantees that USING and STORED AS/ROW FORMAT won't co-exist. if (maybeSerdeInfo.isDefined) { - throw new AnalysisException( - s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}") + throw QueryCompilationErrors.cannotCreateTableWithBothProviderAndSerdeError( + provider, maybeSerdeInfo) } (nonHiveStorageFormat, provider.get) } else if (maybeSerdeInfo.isDefined) { @@ -616,8 +600,7 @@ class ResolveSessionCatalog( // User specified serde takes precedence over the one inferred from file format. serde = serdeInfo.serde.orElse(hiveSerde.serde).orElse(defaultHiveStorage.serde), properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties) - case _ => throw new AnalysisException( - s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.") + case _ => throw QueryCompilationErrors.invalidFileFormatForStoredAsError(serdeInfo) } } else { defaultHiveStorage.copy( @@ -709,7 +692,7 @@ class ResolveSessionCatalog( private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = { if (colName.length > 1) { - throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}") + throw QueryCompilationErrors.commandNotSupportNestedColumnError(command, colName.quoted) } } From c17c76dd1647953f9bdb7135ba08a9b9f25460c9 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 18 Dec 2020 11:23:38 -0800 Subject: [PATCH 0813/1009] [SPARK-33599][SQL][FOLLOWUP] FIX Github Action with unidoc ### What changes were proposed in this pull request? FIX Github Action with unidoc ### Why are the changes needed? FIX Github Action with unidoc ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Pass GA Closes #30846 from yaooqinn/SPARK-33599. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/sql/QueryCompilationErrors.scala | 2 +- .../main/scala/org/apache/spark/sql/QueryExecutionErrors.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index 3ef17ab7aed0a..51a2cb0cb4d92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType} /** * Object for grouping all error messages of the query compilation. - * Currently it includes all [[AnalysisException]]s. + * Currently it includes all AnalysisExceptions. */ object QueryCompilationErrors { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala index 65d280ab10037..d24e61c699241 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} /** * Object for grouping all error messages of the query runtime. - * Currently it includes all [[SparkException]]s and RuntimeExceptions(e.g. + * Currently it includes all SparkExceptions and RuntimeExceptions(e.g. * UnsupportedOperationException, IllegalStateException). */ object QueryExecutionErrors { From 554600c2af0dbc8979955807658fafef5dc66c08 Mon Sep 17 00:00:00 2001 From: Vlad Glinsky Date: Fri, 18 Dec 2020 13:26:19 -0800 Subject: [PATCH 0814/1009] [SPARK-33841][CORE] Fix issue with jobs disappearing intermittently from the SHS under high load ### What changes were proposed in this pull request? Mark SHS event log entries that were `processing` at the beginning of the `checkForLogs` run as not stale and check for this mark before deleting an event log. This fixes the issue when a particular job was displayed in the SHS and disappeared after some time, but then, in several minutes showed up again. ### Why are the changes needed? The issue is caused by [SPARK-29043](https://issues.apache.org/jira/browse/SPARK-29043), which is designated to improve the concurrent performance of the History Server. The [change](https://github.com/apache/spark/pull/25797/files#) breaks the ["app deletion" logic](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R563) because of missing proper synchronization for `processing` event log entries. Since SHS now [filters out](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R462) all `processing` event log entries, such entries do not have a chance to be [updated with the new `lastProcessed`](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R472) time and thus any entity that completes processing right after [filtering](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R462) and before [the check for stale entities](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R560) will be identified as stale and will be deleted from the UI until the next `checkForLogs` run. This is because [updated `lastProcessed` time is used as criteria](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R557), and event log entries that missed to be updated with a new time, will match that criteria. The issue can be reproduced by generating a big number of event logs and uploading them to the SHS event log directory on S3. Essentially, around 236(26.7 MB) copies of an event log directory were created using [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) script. Strange behavior of SHS counting the total number of applications was noticed - at first, the number was increasing as expected, but with the next page refresh, the total number of applications decreased. No errors were logged by SHS. 58 entities are displayed at `17:35:35`: ![1-58-entries-at-17-35](https://user-images.githubusercontent.com/61428392/102648949-1129e400-4171-11eb-9463-ed1454a8f6b2.png) 25 entities are displayed at `17:36:40`: ![2-25-entries-at-17-36](https://user-images.githubusercontent.com/61428392/102648974-1c7d0f80-4171-11eb-95d8-78c2bb37a168.png) ### Does this PR introduce _any_ user-facing change? Yes, SHS users won't face the behavior when the number of displayed applications decreases periodically. ### How was this patch tested? Tested using [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) script: * Build SHS with the proposed change * Download Hadoop AWS and AWS Java SDK * Prepare S3 bucket and user for programmatic access, grant required roles to the user. Get access key and secret key * Configure SHS to read event logs from S3 * Start [monitor](https://github.com/vladhlinsky/shs-monitor/blob/spark-master/monitor.sh) script to query SHS API * Run 5 [producers](https://github.com/vladhlinsky/shs-monitor/blob/spark-master/producer.sh) for ~5 mins, create 125(14.2 MB) event log directory copies * Wait for SHS to load all the applications * Verify that the number of loaded applications increases continuously over time For more details, please refer to the [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) repository. > This version of the reproduction uses event log directories instead of single files, since recent optimization > [SPARK-33790](https://issues.apache.org/jira/browse/SPARK-33790) makes it hard to reproduce the issue with single event log files. Closes #30845 from vladhlinsky/SPARK-33841. Authored-by: Vlad Glinsky Signed-off-by: Dongjoon Hyun --- .../deploy/history/FsHistoryProvider.scala | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index e6df260bdeaa3..d35d8606eb4b4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -474,9 +474,21 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) val newLastScanTime = clock.getTimeMillis() logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime") + // Mark entries that are processing as not stale. Such entries do not have a chance to be + // updated with the new 'lastProcessed' time and thus any entity that completes processing + // right after this check and before the check for stale entities will be identified as stale + // and will be deleted from the UI until the next 'checkForLogs' run. + val notStale = mutable.HashSet[String]() val updated = Option(fs.listStatus(new Path(logDir))).map(_.toSeq).getOrElse(Nil) .filter { entry => isAccessible(entry.getPath) } - .filter { entry => !isProcessing(entry.getPath) } + .filter { entry => + if (isProcessing(entry.getPath)) { + notStale.add(entry.getPath.toString()) + false + } else { + true + } + } .flatMap { entry => EventLogFileReader(fs, entry) } .filter { reader => try { @@ -576,12 +588,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) .last(newLastScanTime - 1) .asScala .toList - stale.filterNot(isProcessing).foreach { log => - log.appId.foreach { appId => - cleanAppData(appId, log.attemptId, log.logPath) - listing.delete(classOf[LogInfo], log.logPath) + stale.filterNot(isProcessing) + .filterNot(info => notStale.contains(info.logPath)) + .foreach { log => + log.appId.foreach { appId => + cleanAppData(appId, log.attemptId, log.logPath) + listing.delete(classOf[LogInfo], log.logPath) + } } - } lastScanTime.set(newLastScanTime) } catch { From de234eec8febce99ede5ef9ae2301e36739a0f85 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 19 Dec 2020 14:35:28 +0900 Subject: [PATCH 0815/1009] [SPARK-33812][SQL] Split the histogram column stats when saving to hive metastore as table property ### What changes were proposed in this pull request? Hive metastore has a limitation for the table property length. To work around it, Spark split the schema json string into several parts when saving to hive metastore as table properties. We need to do the same for histogram column stats as it can go very big. This PR refactors the table property splitting code, so that we can share it between the schema json string and histogram column stats. ### Why are the changes needed? To be able to analyze table when histogram data is big. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing test and new tests Closes #30809 from cloud-fan/cbo. Authored-by: Wenchen Fan Signed-off-by: HyukjinKwon --- .../sql/catalyst/catalog/interface.scala | 54 +++++++++++++++- .../apache/spark/sql/internal/SQLConf.scala | 14 ++++- .../apache/spark/sql/RuntimeConfigSuite.scala | 4 +- .../spark/sql/StatisticsCollectionSuite.scala | 9 +++ .../spark/sql/internal/SQLConfSuite.scala | 16 ++--- .../spark/sql/hive/HiveExternalCatalog.scala | 61 +++---------------- .../sql/hive/client/HiveClientImpl.scala | 6 +- .../sql/hive/MetastoreDataSourcesSuite.scala | 4 +- 8 files changed, 97 insertions(+), 71 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 9876ee375cfa6..5cb237688f875 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -472,6 +472,51 @@ object CatalogTable { val VIEW_REFERRED_TEMP_VIEW_NAMES = VIEW_PREFIX + "referredTempViewNames" val VIEW_REFERRED_TEMP_FUNCTION_NAMES = VIEW_PREFIX + "referredTempFunctionsNames" + + def splitLargeTableProp( + key: String, + value: String, + addProp: (String, String) => Unit, + defaultThreshold: Int): Unit = { + val threshold = SQLConf.get.getConf(SQLConf.HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD) + .getOrElse(defaultThreshold) + if (value.length <= threshold) { + addProp(key, value) + } else { + val parts = value.grouped(threshold).toSeq + addProp(s"$key.numParts", parts.length.toString) + parts.zipWithIndex.foreach { case (part, index) => + addProp(s"$key.part.$index", part) + } + } + } + + def readLargeTableProp(props: Map[String, String], key: String): Option[String] = { + props.get(key).orElse { + if (props.filterKeys(_.startsWith(key)).isEmpty) { + None + } else { + val numParts = props.get(s"$key.numParts") + val errorMessage = s"Cannot read table property '$key' as it's corrupted." + if (numParts.isEmpty) { + throw new AnalysisException(errorMessage) + } else { + val parts = (0 until numParts.get.toInt).map { index => + props.getOrElse(s"$key.part.$index", { + throw new AnalysisException( + s"$errorMessage Missing part $index, ${numParts.get} parts are expected.") + }) + } + Some(parts.mkString) + } + } + } + } + + def isLargeTableProp(originalKey: String, propKey: String): Boolean = { + propKey == originalKey || propKey == s"$originalKey.numParts" || + propKey.startsWith(s"$originalKey.part.") + } } /** @@ -546,7 +591,11 @@ case class CatalogColumnStat( min.foreach { v => map.put(s"${colName}.${CatalogColumnStat.KEY_MIN_VALUE}", v) } max.foreach { v => map.put(s"${colName}.${CatalogColumnStat.KEY_MAX_VALUE}", v) } histogram.foreach { h => - map.put(s"${colName}.${CatalogColumnStat.KEY_HISTOGRAM}", HistogramSerializer.serialize(h)) + CatalogTable.splitLargeTableProp( + s"$colName.${CatalogColumnStat.KEY_HISTOGRAM}", + HistogramSerializer.serialize(h), + map.put, + 4000) } map.toMap } @@ -650,7 +699,8 @@ object CatalogColumnStat extends Logging { nullCount = map.get(s"${colName}.${KEY_NULL_COUNT}").map(v => BigInt(v.toLong)), avgLen = map.get(s"${colName}.${KEY_AVG_LEN}").map(_.toLong), maxLen = map.get(s"${colName}.${KEY_MAX_LEN}").map(_.toLong), - histogram = map.get(s"${colName}.${KEY_HISTOGRAM}").map(HistogramSerializer.deserialize), + histogram = CatalogTable.readLargeTableProp(map, s"$colName.$KEY_HISTOGRAM") + .map(HistogramSerializer.deserialize), version = map(s"${colName}.${KEY_VERSION}").toInt )) } catch { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3f0fd70a6eae6..b5547319f0ab3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -905,6 +905,16 @@ object SQLConf { .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString)) .createWithDefault(HiveCaseSensitiveInferenceMode.NEVER_INFER.toString) + val HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD = + buildConf("spark.sql.hive.tablePropertyLengthThreshold") + .internal() + .doc("The maximum length allowed in a single cell when storing Spark-specific information " + + "in Hive's metastore as table properties. Currently it covers 2 things: the schema's " + + "JSON string, the histogram of column statistics.") + .version("3.2.0") + .intConf + .createOptional + val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly") .internal() .doc("When true, enable the metadata-only query optimization that use the table's metadata " + @@ -3052,7 +3062,9 @@ object SQLConf { "Avoid to depend on this optimization to prevent a potential correctness issue. " + "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule."), DeprecatedConfig(CONVERT_CTAS.key, "3.1", - s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead.") + s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead."), + DeprecatedConfig("spark.sql.sources.schemaStringLengthThreshold", "3.2", + s"Use '${HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key}' instead.") ) Map(configs.map { cfg => cfg.key -> cfg } : _*) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala index 720d570ca8384..4052130720811 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql import org.apache.spark.SparkFunSuite import org.apache.spark.internal.config import org.apache.spark.sql.internal.SQLConf.CHECKPOINT_LOCATION -import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD +import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE class RuntimeConfigSuite extends SparkFunSuite { @@ -62,7 +62,7 @@ class RuntimeConfigSuite extends SparkFunSuite { val conf = newConf() // SQL configs - assert(!conf.isModifiable(SCHEMA_STRING_LENGTH_THRESHOLD.key)) + assert(!conf.isModifiable(GLOBAL_TEMP_DATABASE.key)) assert(conf.isModifiable(CHECKPOINT_LOCATION.key)) // Core configs assert(!conf.isModifiable(config.CPUS_PER_TASK.key)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 3b53a5324445b..cc3d8375db32f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -174,6 +174,15 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } + test("SPARK-33812: column stats round trip serialization with splitting histogram property") { + withSQLConf(SQLConf.HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key -> "10") { + statsWithHgms.foreach { case (k, v) => + val roundtrip = CatalogColumnStat.fromMap("t", k, v.toMap(k)) + assert(roundtrip == Some(v)) + } + } + } + test("analyze column command - result verification") { // (data.head.productArity - 1) because the last column does not support stats collection. assert(stats.size == data.head.productArity - 1) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index 1ea2d4fd0b32c..e699c972268a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -282,23 +282,23 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { } test("static SQL conf comes from SparkConf") { - val previousValue = sparkContext.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD) + val previousValue = sparkContext.conf.get(GLOBAL_TEMP_DATABASE) try { - sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, 2000) + sparkContext.conf.set(GLOBAL_TEMP_DATABASE, "a") val newSession = new SparkSession(sparkContext) - assert(newSession.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD) == 2000) + assert(newSession.conf.get(GLOBAL_TEMP_DATABASE) == "a") checkAnswer( - newSession.sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}"), - Row(SCHEMA_STRING_LENGTH_THRESHOLD.key, "2000")) + newSession.sql(s"SET ${GLOBAL_TEMP_DATABASE.key}"), + Row(GLOBAL_TEMP_DATABASE.key, "a")) } finally { - sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, previousValue) + sparkContext.conf.set(GLOBAL_TEMP_DATABASE, previousValue) } } test("cannot set/unset static SQL conf") { - val e1 = intercept[AnalysisException](sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}=10")) + val e1 = intercept[AnalysisException](sql(s"SET ${GLOBAL_TEMP_DATABASE.key}=10")) assert(e1.message.contains("Cannot modify the value of a static config")) - val e2 = intercept[AnalysisException](spark.conf.unset(SCHEMA_STRING_LENGTH_THRESHOLD.key)) + val e2 = intercept[AnalysisException](spark.conf.unset(GLOBAL_TEMP_DATABASE.key)) assert(e2.message.contains("Cannot modify the value of a static config")) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 54c237f78cb9c..b4aa073893df8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -429,18 +429,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val properties = new mutable.HashMap[String, String] properties.put(CREATED_SPARK_VERSION, table.createVersion) - - // Serialized JSON schema string may be too long to be stored into a single metastore table - // property. In this case, we split the JSON string and store each part as a separate table - // property. - val threshold = conf.get(SCHEMA_STRING_LENGTH_THRESHOLD) - val schemaJsonString = schema.json - // Split the JSON string. - val parts = schemaJsonString.grouped(threshold).toSeq - properties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString) - parts.zipWithIndex.foreach { case (part, index) => - properties.put(s"$DATASOURCE_SCHEMA_PART_PREFIX$index", part) - } + CatalogTable.splitLargeTableProp( + DATASOURCE_SCHEMA, schema.json, properties.put, conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)) if (partitionColumns.nonEmpty) { properties.put(DATASOURCE_SCHEMA_NUMPARTCOLS, partitionColumns.length.toString) @@ -744,8 +734,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat case None if table.tableType == VIEW => // If this is a view created by Spark 2.2 or higher versions, we should restore its schema // from table properties. - if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) { - table = table.copy(schema = getSchemaFromTableProperties(table)) + CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA).foreach { schemaJson => + table = table.copy(schema = DataType.fromJson(schemaJson).asInstanceOf[StructType]) } // No provider in table properties, which means this is a Hive serde table. @@ -795,8 +785,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // If this is a Hive serde table created by Spark 2.1 or higher versions, we should restore its // schema from table properties. - if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) { - val schemaFromTableProps = getSchemaFromTableProperties(table) + val schemaJson = CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA) + if (schemaJson.isDefined) { + val schemaFromTableProps = DataType.fromJson(schemaJson.get).asInstanceOf[StructType] val partColumnNames = getPartitionColumnsFromTableProperties(table) val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames) @@ -836,7 +827,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat storageWithLocation.properties.filterKeys(!HIVE_GENERATED_STORAGE_PROPERTIES(_)).toMap) val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER) - val schemaFromTableProps = getSchemaFromTableProperties(table) + val schemaFromTableProps = CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA) + .map(json => DataType.fromJson(json).asInstanceOf[StructType]).getOrElse(new StructType()) val partColumnNames = getPartitionColumnsFromTableProperties(table) val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames) @@ -1340,7 +1332,6 @@ object HiveExternalCatalog { val DATASOURCE_PROVIDER = DATASOURCE_PREFIX + "provider" val DATASOURCE_SCHEMA = DATASOURCE_PREFIX + "schema" val DATASOURCE_SCHEMA_PREFIX = DATASOURCE_SCHEMA + "." - val DATASOURCE_SCHEMA_NUMPARTS = DATASOURCE_SCHEMA_PREFIX + "numParts" val DATASOURCE_SCHEMA_NUMPARTCOLS = DATASOURCE_SCHEMA_PREFIX + "numPartCols" val DATASOURCE_SCHEMA_NUMSORTCOLS = DATASOURCE_SCHEMA_PREFIX + "numSortCols" val DATASOURCE_SCHEMA_NUMBUCKETS = DATASOURCE_SCHEMA_PREFIX + "numBuckets" @@ -1373,40 +1364,6 @@ object HiveExternalCatalog { val EMPTY_DATA_SCHEMA = new StructType() .add("col", "array", nullable = true, comment = "from deserializer") - // A persisted data source table always store its schema in the catalog. - private def getSchemaFromTableProperties(metadata: CatalogTable): StructType = { - val errorMessage = "Could not read schema from the hive metastore because it is corrupted." - val props = metadata.properties - val schema = props.get(DATASOURCE_SCHEMA) - if (schema.isDefined) { - // Originally, we used `spark.sql.sources.schema` to store the schema of a data source table. - // After SPARK-6024, we removed this flag. - // Although we are not using `spark.sql.sources.schema` any more, we need to still support. - DataType.fromJson(schema.get).asInstanceOf[StructType] - } else if (props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) { - // If there is no schema information in table properties, it means the schema of this table - // was empty when saving into metastore, which is possible in older version(prior to 2.1) of - // Spark. We should respect it. - new StructType() - } else { - val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS) - if (numSchemaParts.isDefined) { - val parts = (0 until numSchemaParts.get.toInt).map { index => - val part = metadata.properties.get(s"$DATASOURCE_SCHEMA_PART_PREFIX$index").orNull - if (part == null) { - throw new AnalysisException(errorMessage + - s" (missing part $index of the schema, ${numSchemaParts.get} parts are expected).") - } - part - } - // Stick all parts back to a single schema string. - DataType.fromJson(parts.mkString).asInstanceOf[StructType] - } else { - throw new AnalysisException(errorMessage) - } - } - } - private def getColumnNamesByType( props: Map[String, String], colType: String, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 6a964a0ce3613..e779a80f7c323 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -57,7 +57,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.connector.catalog.SupportsNamespaces._ import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.hive.HiveExternalCatalog -import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX} +import org.apache.spark.sql.hive.HiveExternalCatalog.DATASOURCE_SCHEMA import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.{CircularBuffer, Utils} @@ -580,9 +580,7 @@ private[hive] class HiveClientImpl( val it = oldTable.getParameters.entrySet.iterator while (it.hasNext) { val entry = it.next() - val isSchemaProp = entry.getKey.startsWith(DATASOURCE_SCHEMA_PART_PREFIX) || - entry.getKey == DATASOURCE_SCHEMA || entry.getKey == DATASOURCE_SCHEMA_NUMPARTS - if (isSchemaProp) { + if (CatalogTable.isLargeTableProp(DATASOURCE_SCHEMA, entry.getKey)) { it.remove() } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 0593dbe7f6653..ecbb104070b70 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -1338,7 +1338,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv val e = intercept[AnalysisException] { sharedState.externalCatalog.getTable("default", "t") }.getMessage - assert(e.contains(s"Could not read schema from the hive metastore because it is corrupted")) + assert(e.contains("Cannot read table property 'spark.sql.sources.schema' as it's corrupted")) withDebugMode { val tableMeta = sharedState.externalCatalog.getTable("default", "t") @@ -1355,7 +1355,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv val newSession = sparkSession.newSession() newSession.sql("CREATE TABLE abc(i int) USING json") val tableMeta = newSession.sessionState.catalog.getTableMetadata(TableIdentifier("abc")) - assert(tableMeta.properties(DATASOURCE_SCHEMA_NUMPARTS).toInt == 1) + assert(tableMeta.properties.contains(DATASOURCE_SCHEMA)) assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json") } } From 44563a0412257645e0053ee2c44d6eb3447e9d4f Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Sat, 19 Dec 2020 08:43:48 -0600 Subject: [PATCH 0816/1009] [SPARK-33518][ML] Improve performance of ML ALS recommendForAll by GEMV ### What changes were proposed in this pull request? There were a lot of works on improving ALS's recommendForAll For now, I found that it maybe futhermore optimized by 1, using GEMV and sharing a pre-allocated buffer per task; 2, using guava.ordering instead of BoundedPriorityQueue; ### Why are the changes needed? In my test, using `f2jBLAS.sgemv`, it is about 2.3X faster than existing impl. |Impl| Master | GEMM | GEMV | GEMV + array aggregator | GEMV + guava ordering + array aggregator | GEMV + guava ordering| |------|----------|------------|----------|------------|------------|------------| |Duration|341229|363741|191201|189790|148417|147222| ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing testsuites Closes #30468 from zhengruifeng/als_rec_opt. Authored-by: zhengruifeng Signed-off-by: Sean Owen --- .../apache/spark/ml/recommendation/ALS.scala | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 088f6a682be82..1b856bda45e24 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -27,6 +27,7 @@ import scala.util.{Sorting, Try} import scala.util.hashing.byteswap64 import com.github.fommil.netlib.BLAS.{getInstance => blas} +import com.google.common.collect.{Ordering => GuavaOrdering} import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats import org.json4s.JsonDSL._ @@ -47,7 +48,7 @@ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.storage.StorageLevel -import org.apache.spark.util.{BoundedPriorityQueue, Utils} +import org.apache.spark.util.Utils import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter} import org.apache.spark.util.random.XORShiftRandom @@ -456,30 +457,39 @@ class ALSModel private[ml] ( num: Int, blockSize: Int): DataFrame = { import srcFactors.sparkSession.implicits._ + import scala.collection.JavaConverters._ val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])], blockSize) val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])], blockSize) val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked) - .as[(Seq[(Int, Array[Float])], Seq[(Int, Array[Float])])] - .flatMap { case (srcIter, dstIter) => - val m = srcIter.size - val n = math.min(dstIter.size, num) - val output = new Array[(Int, Int, Float)](m * n) - var i = 0 - val pq = new BoundedPriorityQueue[(Int, Float)](num)(Ordering.by(_._2)) - srcIter.foreach { case (srcId, srcFactor) => - dstIter.foreach { case (dstId, dstFactor) => - // We use F2jBLAS which is faster than a call to native BLAS for vector dot product - val score = BLAS.f2jBLAS.sdot(rank, srcFactor, 1, dstFactor, 1) - pq += dstId -> score + .as[(Array[Int], Array[Float], Array[Int], Array[Float])] + .mapPartitions { iter => + var scores: Array[Float] = null + var idxOrd: GuavaOrdering[Int] = null + iter.flatMap { case (srcIds, srcMat, dstIds, dstMat) => + require(srcMat.length == srcIds.length * rank) + require(dstMat.length == dstIds.length * rank) + val m = srcIds.length + val n = dstIds.length + if (scores == null || scores.length < n) { + scores = Array.ofDim[Float](n) + idxOrd = new GuavaOrdering[Int] { + override def compare(left: Int, right: Int): Int = { + Ordering[Float].compare(scores(left), scores(right)) + } + } } - pq.foreach { case (dstId, score) => - output(i) = (srcId, dstId, score) - i += 1 + + Iterator.range(0, m).flatMap { i => + // buffer = i-th vec in srcMat * dstMat + BLAS.f2jBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank, + srcMat, i * rank, 1, 0.0F, scores, 0, 1) + + val srcId = srcIds(i) + idxOrd.greatestOf(Iterator.range(0, n).asJava, num).asScala + .iterator.map { j => (srcId, dstIds(j), scores(j)) } } - pq.clear() } - output.toSeq } // We'll force the IDs to be Int. Unfortunately this converts IDs to Int in the output. val topKAggregator = new TopByKeyAggregator[Int, Int, Float](num, Ordering.by(_._2)) @@ -499,9 +509,12 @@ class ALSModel private[ml] ( */ private def blockify( factors: Dataset[(Int, Array[Float])], - blockSize: Int): Dataset[Seq[(Int, Array[Float])]] = { + blockSize: Int): Dataset[(Array[Int], Array[Float])] = { import factors.sparkSession.implicits._ - factors.mapPartitions(_.grouped(blockSize)) + factors.mapPartitions { iter => + iter.grouped(blockSize) + .map(block => (block.map(_._1).toArray, block.flatMap(_._2).toArray)) + } } } From 00642ee19e6969ca7996fb44d16d001fcf17b407 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 19 Dec 2020 06:59:44 -0800 Subject: [PATCH 0817/1009] [SPARK-33843][BUILD] Upgrade to Zstd 1.4.8 ### What changes were proposed in this pull request? This PR aims to upgrade Zstd library to 1.4.8. ### Why are the changes needed? This will bring Zstd 1.4.7 and 1.4.8 improvement and bug fixes and the following from `zstd-jni`. - https://github.com/facebook/zstd/releases/tag/v1.4.7 - https://github.com/facebook/zstd/releases/tag/v1.4.8 - https://github.com/luben/zstd-jni/issues/153 (Apple M1 architecture) ### Does this PR introduce _any_ user-facing change? This will unblock Apple Silicon usage. ### How was this patch tested? Pass the CIs. Closes #30848 from dongjoon-hyun/SPARK-33843. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index ceea496d3f1dc..199a0d1a31751 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -242,4 +242,4 @@ xmlenc/0.52//xmlenc-0.52.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar -zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar +zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index d1b811bd73607..42e1634b6e66c 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -257,4 +257,4 @@ xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper/3.4.14//zookeeper-3.4.14.jar -zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar +zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar diff --git a/pom.xml b/pom.xml index 72e285bb2ba6e..4781f981a5949 100644 --- a/pom.xml +++ b/pom.xml @@ -695,7 +695,7 @@ com.github.luben zstd-jni - 1.4.5-6 + 1.4.8-1 com.clearspring.analytics From dd44ba5460c3850c87e93c2c126d980cb1b3a8b4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 19 Dec 2020 08:00:09 -0800 Subject: [PATCH 0818/1009] [SPARK-32976][SQL][FOLLOWUP] SET and RESTORE hive.exec.dynamic.partition.mode for HiveSQLInsertTestSuite to avoid flakiness ### What changes were proposed in this pull request? As https://github.com/apache/spark/pull/29893#discussion_r545303780 mentioned: > We need to set spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") before executing this suite; otherwise, test("insert with column list - follow table output order + partitioned table") will fail. The reason why it does not fail because some test cases [running before this suite] do not change the default value of hive.exec.dynamic.partition.mode back to strict. However, the order of test suite execution is not deterministic. ### Why are the changes needed? avoid flakiness in tests ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #30843 from yaooqinn/SPARK-32976-F. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../spark/sql/hive/HiveSQLInsertTestSuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala index 49b005bca938e..0b1d511f08511 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala @@ -21,5 +21,20 @@ import org.apache.spark.sql.SQLInsertTestSuite import org.apache.spark.sql.hive.test.TestHiveSingleton class HiveSQLInsertTestSuite extends SQLInsertTestSuite with TestHiveSingleton { + + private val originalPartitionMode = spark.conf.getOption("hive.exec.dynamic.partition.mode") + + override protected def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") + } + + override protected def afterAll(): Unit = { + originalPartitionMode + .map(v => spark.conf.set("hive.exec.dynamic.partition.mode", v)) + .getOrElse(spark.conf.unset("hive.exec.dynamic.partition.mode")) + super.afterAll() + } + override def format: String = "hive OPTIONS(fileFormat='parquet')" } From 06075d849e07a97f7aba0dceece57ed45cbae040 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Sat, 19 Dec 2020 08:32:58 -0800 Subject: [PATCH 0819/1009] [SPARK-33829][SQL] Renaming v2 tables should recreate the cache ### What changes were proposed in this pull request? Currently, renaming v2 tables does not invalidate/recreate the cache, leading to an incorrect behavior (cache not being used) when v2 tables are renamed. This PR fixes the behavior. ### Why are the changes needed? Fixing a bug since the cache associated with the renamed table is not being cleaned up/recreated. ### Does this PR introduce _any_ user-facing change? Yes, now when a v2 table is renamed, cache is correctly updated. ### How was this patch tested? Added a new test Closes #30825 from imback82/rename_recreate_cache_v2. Authored-by: Terry Kim Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Strategy.scala | 31 +++++++++++++------ .../datasources/v2/RenameTableExec.scala | 17 +++++++++- .../sql/connector/DataSourceV2SQLSuite.scala | 19 ++++++++++++ 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index c40d2ab9cba4e..50bcf81f1ba2d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.storage.StorageLevel class DataSourceV2Strategy(session: SparkSession) extends Strategy with PredicateHelper { @@ -56,17 +57,24 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat session.sharedState.cacheManager.recacheByPlan(session, r) } - private def invalidateCache(r: ResolvedTable, recacheTable: Boolean = false)(): Unit = { + // Invalidates the cache associated with the given table. If the invalidated cache matches the + // given table, the cache's storage level is returned. + private def invalidateCache( + r: ResolvedTable, + recacheTable: Boolean = false)(): Option[StorageLevel] = { val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier)) val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation) session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) - if (recacheTable && cache.isDefined) { - // save the cache name and cache level for recreation - val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName + if (cache.isDefined) { val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel - - // recache with the same name and cache level. - session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel) + if (recacheTable) { + val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName + // recache with the same name and cache level. + session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel) + } + Some(cacheLevel) + } else { + None } } @@ -266,12 +274,17 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AlterTable(catalog, ident, _, changes) => AlterTableExec(catalog, ident, changes) :: Nil - case RenameTable(ResolvedTable(catalog, oldIdent, _), newIdent, isView) => + case RenameTable(r @ ResolvedTable(catalog, oldIdent, _), newIdent, isView) => if (isView) { throw new AnalysisException( "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.") } - RenameTableExec(catalog, oldIdent, newIdent.asIdentifier) :: Nil + RenameTableExec( + catalog, + oldIdent, + newIdent.asIdentifier, + invalidateCache(r), + session.sharedState.cacheManager.cacheQuery) :: Nil case AlterNamespaceSetProperties(ResolvedNamespace(catalog, ns), properties) => AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala index a650607d5f129..a71dd33a88ba9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala @@ -17,9 +17,12 @@ package org.apache.spark.sql.execution.datasources.v2 +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.storage.StorageLevel /** * Physical plan node for renaming a table. @@ -27,14 +30,26 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} case class RenameTableExec( catalog: TableCatalog, oldIdent: Identifier, - newIdent: Identifier) extends V2CommandExec { + newIdent: Identifier, + invalidateCache: () => Option[StorageLevel], + cacheTable: (SparkSession, LogicalPlan, Option[String], StorageLevel) => Unit) + extends V2CommandExec { override def output: Seq[Attribute] = Seq.empty override protected def run(): Seq[InternalRow] = { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper + + val optOldStorageLevel = invalidateCache() catalog.invalidateTable(oldIdent) + catalog.renameTable(oldIdent, newIdent) + optOldStorageLevel.foreach { oldStorageLevel => + val tbl = catalog.loadTable(newIdent) + val newRelation = DataSourceV2Relation.create(tbl, Some(catalog), Some(newIdent)) + cacheTable(sqlContext.sparkSession, newRelation, Some(newIdent.quoted), oldStorageLevel) + } Seq.empty } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b335dc31a3037..4fdb32c24f104 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -37,6 +37,7 @@ import org.apache.spark.sql.internal.connector.SimpleTableProvider import org.apache.spark.sql.sources.SimpleScanSource import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.storage.StorageLevel import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -863,6 +864,24 @@ class DataSourceV2SQLSuite } } + test("SPARK-33829: Renaming a table should recreate a cache while retaining the old cache info") { + withTable("testcat.ns.old", "testcat.ns.new") { + def getStorageLevel(tableName: String): StorageLevel = { + val table = spark.table(tableName) + val optCachedData = spark.sharedState.cacheManager.lookupCachedData(table) + assert(optCachedData.isDefined) + optCachedData.get.cachedRepresentation.cacheBuilder.storageLevel + } + sql("CREATE TABLE testcat.ns.old USING foo AS SELECT id, data FROM source") + sql("CACHE TABLE testcat.ns.old OPTIONS('storageLevel' 'MEMORY_ONLY')") + val oldStorageLevel = getStorageLevel("testcat.ns.old") + + sql("ALTER TABLE testcat.ns.old RENAME TO ns.new") + val newStorageLevel = getStorageLevel("testcat.ns.new") + assert(oldStorageLevel === newStorageLevel) + } + } + test("Relation: basic") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { From 37c4cd8f05316227465ff9cccbba063779827660 Mon Sep 17 00:00:00 2001 From: Ammar Al-Batool Date: Sat, 19 Dec 2020 14:53:40 -0600 Subject: [PATCH 0820/1009] [MINOR][DOCS] Fix typos in ScalaDocs for DataStreamWriter#foreachBatch The title is pretty self-explanatory. ### What changes were proposed in this pull request? Fixing typos in the docs for `foreachBatch` functions. ### Why are the changes needed? To fix typos in JavaDoc/ScalaDoc. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Yes. Closes #30782 from ammar1x/patch-1. Lead-authored-by: Ammar Al-Batool Co-authored-by: Ammar Al-Batool Signed-off-by: Sean Owen --- .../spark/sql/streaming/DataStreamWriter.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 9e8dff37bcfd2..2703119ce1167 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -494,12 +494,13 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { * :: Experimental :: * * (Scala-specific) Sets the output of the streaming query to be processed using the provided - * function. This is supported only the in the micro-batch execution modes (that is, when the + * function. This is supported only in the micro-batch execution modes (that is, when the * trigger is not continuous). In every micro-batch, the provided function will be called in * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. - * The batchId can be used deduplicate and transactionally write the output + * The batchId can be used to deduplicate and transactionally write the output * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed - * to exactly same for the same batchId (assuming all operations are deterministic in the query). + * to be exactly the same for the same batchId (assuming all operations are deterministic + * in the query). * * @since 2.4.0 */ @@ -515,12 +516,13 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { * :: Experimental :: * * (Java-specific) Sets the output of the streaming query to be processed using the provided - * function. This is supported only the in the micro-batch execution modes (that is, when the + * function. This is supported only in the micro-batch execution modes (that is, when the * trigger is not continuous). In every micro-batch, the provided function will be called in * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. - * The batchId can be used deduplicate and transactionally write the output + * The batchId can be used to deduplicate and transactionally write the output * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed - * to exactly same for the same batchId (assuming all operations are deterministic in the query). + * to be exactly the same for the same batchId (assuming all operations are deterministic + * in the query). * * @since 2.4.0 */ From 70da86a085b61a0981c3f9fc6dbd897716472642 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 19 Dec 2020 14:10:20 -0800 Subject: [PATCH 0821/1009] [SPARK-33850][SQL] EXPLAIN FORMATTED doesn't show the plan for subqueries if AQE is enabled ### What changes were proposed in this pull request? This PR fixes an issue that when AQE is enabled, EXPLAIN FORMATTED doesn't show the plan for subqueries. ```scala val df = spark.range(1, 100) df.createTempView("df") spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("FORMATTED") == Physical Plan == AdaptiveSparkPlan (3) +- Project (2) +- Scan OneRowRelation (1) (1) Scan OneRowRelation Output: [] Arguments: ParallelCollectionRDD[0] at explain at :24, OneRowRelation, UnknownPartitioning(0) (2) Project Output [1]: [Subquery subquery#3, [id=#20] AS scalarsubquery()#5L] Input: [] (3) AdaptiveSparkPlan Output [1]: [scalarsubquery()#5L] Arguments: isFinalPlan=false ``` After this change, the plan for the subquerie is shown. ```scala == Physical Plan == * Project (2) +- * Scan OneRowRelation (1) (1) Scan OneRowRelation [codegen id : 1] Output: [] Arguments: ParallelCollectionRDD[0] at explain at :24, OneRowRelation, UnknownPartitioning(0) (2) Project [codegen id : 1] Output [1]: [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L] Input: [] ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery scalar-subquery#3, [id=#24] * HashAggregate (6) +- Exchange (5) +- * HashAggregate (4) +- * Range (3) (3) Range [codegen id : 1] Output [1]: [id#0L] Arguments: Range (1, 100, step=1, splits=Some(12)) (4) HashAggregate [codegen id : 1] Input [1]: [id#0L] Keys: [] Functions [1]: [partial_min(id#0L)] Aggregate Attributes [1]: [min#7L] Results [1]: [min#8L] (5) Exchange Input [1]: [min#8L] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#20] (6) HashAggregate [codegen id : 2] Input [1]: [min#8L] Keys: [] Functions [1]: [min(id#0L)] Aggregate Attributes [1]: [min(id#0L)#4L] Results [1]: [min(id#0L)#4L AS v#2L] ``` ### Why are the changes needed? For better debuggability. ### Does this PR introduce _any_ user-facing change? Yes. Users can see the formatted plan for subqueries. ### How was this patch tested? New test. Closes #30855 from sarutak/fix-aqe-explain. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/ExplainUtils.scala | 2 + .../sql-tests/results/explain-aqe.sql.out | 263 ++++++++++++++++++ .../org/apache/spark/sql/ExplainSuite.scala | 22 ++ 3 files changed, 287 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala index 20e6fb6f96eaa..f47542ca59bc7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala @@ -218,6 +218,8 @@ object ExplainUtils extends AdaptiveSparkPlanHelper { plan: => QueryPlan[_], subqueries: ArrayBuffer[(SparkPlan, Expression, BaseSubqueryExec)]): Unit = { plan.foreach { + case a: AdaptiveSparkPlanExec => + getSubqueries(a.executedPlan, subqueries) case p: SparkPlan => p.expressions.foreach (_.collect { case e: PlanExpression[_] => diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index 578b0a807fc52..d68989524d486 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -407,6 +407,101 @@ Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery subq Output [2]: [key#x, val#x] Arguments: isFinalPlan=false +===== Subqueries ===== + +Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (10) ++- HashAggregate (9) + +- Exchange (8) + +- HashAggregate (7) + +- Project (6) + +- Filter (5) + +- Scan parquet default.explain_temp2 (4) + + +(4) Scan parquet default.explain_temp2 +Output [2]: [key#x, val#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp2] +PushedFilters: [IsNotNull(key), IsNotNull(val), EqualTo(val,2)] +ReadSchema: struct + +(5) Filter +Input [2]: [key#x, val#x] +Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery subquery#x, [id=#x])) AND (val#x = 2)) + +(6) Project +Output [1]: [key#x] +Input [2]: [key#x, val#x] + +(7) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_max(key#x)] +Aggregate Attributes [1]: [max#x] +Results [1]: [max#x] + +(8) Exchange +Input [1]: [max#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(9) HashAggregate +Input [1]: [max#x] +Keys: [] +Functions [1]: [max(key#x)] +Aggregate Attributes [1]: [max(key#x)#x] +Results [1]: [max(key#x)#x AS max(key)#x] + +(10) AdaptiveSparkPlan +Output [1]: [max(key)#x] +Arguments: isFinalPlan=false + +Subquery:2 Hosting operator id = 5 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (17) ++- HashAggregate (16) + +- Exchange (15) + +- HashAggregate (14) + +- Project (13) + +- Filter (12) + +- Scan parquet default.explain_temp3 (11) + + +(11) Scan parquet default.explain_temp3 +Output [2]: [key#x, val#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp3] +PushedFilters: [IsNotNull(val), GreaterThan(val,0)] +ReadSchema: struct + +(12) Filter +Input [2]: [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(13) Project +Output [1]: [key#x] +Input [2]: [key#x, val#x] + +(14) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_max(key#x)] +Aggregate Attributes [1]: [max#x] +Results [1]: [max#x] + +(15) Exchange +Input [1]: [max#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(16) HashAggregate +Input [1]: [max#x] +Keys: [] +Functions [1]: [max(key#x)] +Aggregate Attributes [1]: [max(key#x)#x] +Results [1]: [max(key#x)#x AS max(key)#x] + +(17) AdaptiveSparkPlan +Output [1]: [max(key)#x] +Arguments: isFinalPlan=false -- !query EXPLAIN FORMATTED @@ -442,6 +537,101 @@ Condition : ((key#x = Subquery subquery#x, [id=#x]) OR (cast(key#x as double) = Output [2]: [key#x, val#x] Arguments: isFinalPlan=false +===== Subqueries ===== + +Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (10) ++- HashAggregate (9) + +- Exchange (8) + +- HashAggregate (7) + +- Project (6) + +- Filter (5) + +- Scan parquet default.explain_temp2 (4) + + +(4) Scan parquet default.explain_temp2 +Output [2]: [key#x, val#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp2] +PushedFilters: [IsNotNull(val), GreaterThan(val,0)] +ReadSchema: struct + +(5) Filter +Input [2]: [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(6) Project +Output [1]: [key#x] +Input [2]: [key#x, val#x] + +(7) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_max(key#x)] +Aggregate Attributes [1]: [max#x] +Results [1]: [max#x] + +(8) Exchange +Input [1]: [max#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(9) HashAggregate +Input [1]: [max#x] +Keys: [] +Functions [1]: [max(key#x)] +Aggregate Attributes [1]: [max(key#x)#x] +Results [1]: [max(key#x)#x AS max(key)#x] + +(10) AdaptiveSparkPlan +Output [1]: [max(key)#x] +Arguments: isFinalPlan=false + +Subquery:2 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (17) ++- HashAggregate (16) + +- Exchange (15) + +- HashAggregate (14) + +- Project (13) + +- Filter (12) + +- Scan parquet default.explain_temp3 (11) + + +(11) Scan parquet default.explain_temp3 +Output [2]: [key#x, val#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp3] +PushedFilters: [IsNotNull(val), GreaterThan(val,0)] +ReadSchema: struct + +(12) Filter +Input [2]: [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(13) Project +Output [1]: [key#x] +Input [2]: [key#x, val#x] + +(14) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_avg(cast(key#x as bigint))] +Aggregate Attributes [2]: [sum#x, count#xL] +Results [2]: [sum#x, count#xL] + +(15) Exchange +Input [2]: [sum#x, count#xL] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(16) HashAggregate +Input [2]: [sum#x, count#xL] +Keys: [] +Functions [1]: [avg(cast(key#x as bigint))] +Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x] +Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x] + +(17) AdaptiveSparkPlan +Output [1]: [avg(key)#x] +Arguments: isFinalPlan=false -- !query EXPLAIN FORMATTED @@ -470,6 +660,79 @@ Input: [] Output [1]: [(scalarsubquery() + scalarsubquery())#x] Arguments: isFinalPlan=false +===== Subqueries ===== + +Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (8) ++- HashAggregate (7) + +- Exchange (6) + +- HashAggregate (5) + +- Scan parquet default.explain_temp1 (4) + + +(4) Scan parquet default.explain_temp1 +Output [1]: [key#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp1] +ReadSchema: struct + +(5) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_avg(cast(key#x as bigint))] +Aggregate Attributes [2]: [sum#x, count#xL] +Results [2]: [sum#x, count#xL] + +(6) Exchange +Input [2]: [sum#x, count#xL] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(7) HashAggregate +Input [2]: [sum#x, count#xL] +Keys: [] +Functions [1]: [avg(cast(key#x as bigint))] +Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x] +Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x] + +(8) AdaptiveSparkPlan +Output [1]: [avg(key)#x] +Arguments: isFinalPlan=false + +Subquery:2 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x] +AdaptiveSparkPlan (13) ++- HashAggregate (12) + +- Exchange (11) + +- HashAggregate (10) + +- Scan parquet default.explain_temp1 (9) + + +(9) Scan parquet default.explain_temp1 +Output [1]: [key#x] +Batched: true +Location [not included in comparison]/{warehouse_dir}/explain_temp1] +ReadSchema: struct + +(10) HashAggregate +Input [1]: [key#x] +Keys: [] +Functions [1]: [partial_avg(cast(key#x as bigint))] +Aggregate Attributes [2]: [sum#x, count#xL] +Results [2]: [sum#x, count#xL] + +(11) Exchange +Input [2]: [sum#x, count#xL] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +(12) HashAggregate +Input [2]: [sum#x, count#xL] +Keys: [] +Functions [1]: [avg(cast(key#x as bigint))] +Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x] +Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x] + +(13) AdaptiveSparkPlan +Output [1]: [avg(key)#x] +Arguments: isFinalPlan=false -- !query EXPLAIN FORMATTED diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 75372c5437f25..0ec57c2fcb5ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -277,6 +277,28 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } } + test("SPARK-33850: explain formatted - check presence of subquery in case of AQE") { + withTable("df1") { + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + withTable("df1") { + spark.range(1, 100) + .write + .format("parquet") + .mode("overwrite") + .saveAsTable("df1") + + val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df1) as v" + val expected_pattern1 = + "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x" + + withNormalizedExplain(sqlText) { normalizedOutput => + assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1) + } + } + } + } + } + test("Support ExplainMode in Dataset.explain") { val df1 = Seq((1, 2), (2, 3)).toDF("k", "v1") val df2 = Seq((2, 3), (1, 1)).toDF("k", "v2") From 2b6ef5606bec1a4547c8e850440bf12cc3422e1d Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sat, 19 Dec 2020 14:19:44 -0800 Subject: [PATCH 0822/1009] [SPARK-33854][BUILD] Use ListBuffer instead of Stack in SparkBuild.scala ### What changes were proposed in this pull request? This PR aims to use ListBuffer instead of Stack in SparkBuild.scala to remove deprecation warning. ### Why are the changes needed? Stack is deprecated in Scala 2.12.0. ```scala % build/sbt compile ... [warn] /Users/william/spark/project/SparkBuild.scala:1112:25: class Stack in package mutable is deprecated (since 2.12.0): Stack is an inelegant and potentially poorly-performing wrapper around List. Use a List assigned to a var instead. [warn] val stack = new Stack[File]() ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual. Closes #30860 from williamhyun/SPARK-33854. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 3098060478f40..aa3e2cd65e185 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -23,7 +23,7 @@ import java.util.Locale import scala.io.Source import scala.util.Properties import scala.collection.JavaConverters._ -import scala.collection.mutable.Stack +import scala.collection.mutable.ListBuffer import sbt._ import sbt.Classpaths.publishTask @@ -1109,14 +1109,14 @@ object TestSettings { // Because File.mkdirs() can fail if multiple callers are trying to create the same // parent directory, this code tries to create parents one at a time, and avoids // failures when the directories have been created by somebody else. - val stack = new Stack[File]() + val stack = new ListBuffer[File]() while (!dir.isDirectory()) { - stack.push(dir) + stack.prepend(dir) dir = dir.getParentFile() } while (stack.nonEmpty) { - val d = stack.pop() + val d = stack.remove(0) require(d.mkdir() || d.isDirectory(), s"Failed to create directory $d") } } From df2314b63aaf4992ac86ea0b68dae8554b066828 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Sat, 19 Dec 2020 14:37:15 -0800 Subject: [PATCH 0823/1009] [SPARK-33852][SQL][TESTS] Use assertAnalysisError in HiveDDLSuite.scala ### What changes were proposed in this pull request? `HiveDDLSuite` has many of the following patterns: ```scala val e = intercept[AnalysisException] { sql(sqlString) } assert(e.message.contains(exceptionMessage)) ``` However, there already exists `assertAnalysisError` helper function which does exactly the same thing. ### Why are the changes needed? To refactor code to simplify. ### Does this PR introduce _any_ user-facing change? No, just refactoring the test code. ### How was this patch tested? Existing tests Closes #30857 from imback82/hive_ddl_suite_use_assertAnalysisError. Authored-by: Terry Kim Signed-off-by: Dongjoon Hyun --- .../sql/hive/execution/HiveDDLSuite.scala | 363 ++++++++---------- 1 file changed, 157 insertions(+), 206 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index aac4b88d9e3f8..34f127bade95b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -451,15 +451,15 @@ class HiveDDLSuite withTable("tab1", "tab2") { (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath) - var e = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING hive") }.getMessage - assert(e.contains("Unable to infer the schema. The schema specification is required to " + - "create the table `default`.`tab1`")) + assertAnalysisError( + "CREATE TABLE tab1 USING hive", + "Unable to infer the schema. The schema specification is required to " + + "create the table `default`.`tab1`") - e = intercept[AnalysisException] { - sql(s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'") - }.getMessage - assert(e.contains("Unable to infer the schema. The schema specification is required to " + - "create the table `default`.`tab2`")) + assertAnalysisError( + s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'", + "Unable to infer the schema. The schema specification is required to " + + "create the table `default`.`tab2`") } } } @@ -581,17 +581,15 @@ class HiveDDLSuite } test("create table: partition column names exist in table definition") { - val e = intercept[AnalysisException] { - sql("CREATE TABLE tbl(a int) PARTITIONED BY (a string)") - } - assert(e.message == "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`") + assertAnalysisError( + "CREATE TABLE tbl(a int) PARTITIONED BY (a string)", + "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`") } test("create partitioned table without specifying data type for the partition columns") { - val e = intercept[AnalysisException] { - sql("CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet") - } - assert(e.message.contains("partition column b is not defined in table")) + assertAnalysisError( + "CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet", + "partition column b is not defined in table") } test("add/drop partition with location - managed table") { @@ -643,11 +641,10 @@ class HiveDDLSuite test("SPARK-19129: drop partition with a empty string will drop the whole table") { val df = spark.createDataFrame(Seq((0, "a"), (1, "b"))).toDF("partCol1", "name") df.write.mode("overwrite").partitionBy("partCol1").saveAsTable("partitionedTable") - val e = intercept[AnalysisException] { - spark.sql("alter table partitionedTable drop partition(partCol1='')") - }.getMessage - assert(e.contains("Partition spec is invalid. The spec ([partCol1=]) contains an empty " + - "partition column value")) + assertAnalysisError( + "alter table partitionedTable drop partition(partCol1='')", + "Partition spec is invalid. The spec ([partCol1=]) contains an empty " + + "partition column value") } test("add/drop partitions - external table") { @@ -692,11 +689,10 @@ class HiveDDLSuite // After data insertion, all the directory are not empty assert(dirSet.forall(dir => dir.listFiles.nonEmpty)) - val message = intercept[AnalysisException] { - sql(s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')") - } - assert(message.getMessage.contains("unknownCol is not a valid partition column in table " + - "`default`.`exttable_with_partitions`")) + assertAnalysisError( + s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')", + "unknownCol is not a valid partition column in table " + + "`default`.`exttable_with_partitions`") sql( s""" @@ -798,11 +794,9 @@ class HiveDDLSuite sql(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')") checkProperties(Map()) - val message = intercept[AnalysisException] { - sql(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')") - }.getMessage - assert(message.contains( - "Attempted to unset non-existent property 'p' in table '`default`.`view1`'")) + assertAnalysisError( + s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')", + "Attempted to unset non-existent property 'p' in table '`default`.`view1`'") } } } @@ -825,10 +819,9 @@ class HiveDDLSuite test("create table - SET TBLPROPERTIES EXTERNAL to TRUE") { val tabName = "tab1" withTable(tabName) { - val message = intercept[AnalysisException] { - sql(s"CREATE TABLE $tabName (height INT, length INT) TBLPROPERTIES('EXTERNAL'='TRUE')") - }.getMessage - assert(message.contains("Cannot set or change the preserved property key: 'EXTERNAL'")) + assertAnalysisError( + s"CREATE TABLE $tabName (height INT, length INT) TBLPROPERTIES('EXTERNAL'='TRUE')", + "Cannot set or change the preserved property key: 'EXTERNAL'") } } @@ -839,10 +832,9 @@ class HiveDDLSuite sql(s"CREATE TABLE $tabName (height INT, length INT)") assert( catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED) - val message = intercept[AnalysisException] { - sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('EXTERNAL' = 'TRUE')") - }.getMessage - assert(message.contains("Cannot set or change the preserved property key: 'EXTERNAL'")) + assertAnalysisError( + s"ALTER TABLE $tabName SET TBLPROPERTIES ('EXTERNAL' = 'TRUE')", + "Cannot set or change the preserved property key: 'EXTERNAL'") // The table type is not changed to external assert( catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED) @@ -1051,11 +1043,9 @@ class HiveDDLSuite test("drop table using drop view") { withTable("tab1") { sql("CREATE TABLE tab1(c1 int)") - val message = intercept[AnalysisException] { - sql("DROP VIEW tab1") - }.getMessage - assert(message.contains( - "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.")) + assertAnalysisError( + "DROP VIEW tab1", + "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.") } } @@ -1064,10 +1054,9 @@ class HiveDDLSuite spark.range(10).write.saveAsTable("tab1") withView("view1") { sql("CREATE VIEW view1 AS SELECT * FROM tab1") - val message = intercept[AnalysisException] { - sql("DROP TABLE view1") - }.getMessage - assert(message.contains("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")) + assertAnalysisError( + "DROP TABLE view1", + "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead") } } } @@ -1221,10 +1210,9 @@ class HiveDDLSuite sql(s"USE default") val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}" if (tableExists && !cascade) { - val message = intercept[AnalysisException] { - sql(sqlDropDatabase) - }.getMessage - assert(message.contains(s"Database $dbName is not empty. One or more tables exist.")) + assertAnalysisError( + sqlDropDatabase, + s"Database $dbName is not empty. One or more tables exist.") // the database directory was not removed assert(fs.exists(new Path(expectedDBLocation))) } else { @@ -1253,17 +1241,15 @@ class HiveDDLSuite test("drop default database") { Seq("true", "false").foreach { caseSensitive => withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { - var message = intercept[AnalysisException] { - sql("DROP DATABASE default") - }.getMessage - assert(message.contains("Can not drop default database")) + assertAnalysisError( + "DROP DATABASE default", + "Can not drop default database") // SQLConf.CASE_SENSITIVE does not affect the result // because the Hive metastore is not case sensitive. - message = intercept[AnalysisException] { - sql("DROP DATABASE DeFault") - }.getMessage - assert(message.contains("Can not drop default database")) + assertAnalysisError( + "DROP DATABASE DeFault", + "Can not drop default database") } } } @@ -1653,10 +1639,9 @@ class HiveDDLSuite } // When tableExists is not invoked, we still can get an AnalysisException - val e = intercept[AnalysisException] { - sql(s"DESCRIBE $indexTabName") - }.getMessage - assert(e.contains("Hive index table is not supported.")) + assertAnalysisError( + s"DESCRIBE $indexTabName", + "Hive index table is not supported.") } finally { client.runSqlHive(s"DROP INDEX IF EXISTS $indexName ON $tabName") } @@ -1726,20 +1711,17 @@ class HiveDDLSuite sql("CREATE TABLE tbl(a INT) STORED AS parquet") Seq(DATASOURCE_PREFIX, STATISTICS_PREFIX).foreach { forbiddenPrefix => - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE tbl SET TBLPROPERTIES ('${forbiddenPrefix}foo' = 'loser')") - } - assert(e.getMessage.contains(forbiddenPrefix + "foo")) + assertAnalysisError( + s"ALTER TABLE tbl SET TBLPROPERTIES ('${forbiddenPrefix}foo' = 'loser')", + s"${forbiddenPrefix}foo") - val e2 = intercept[AnalysisException] { - sql(s"ALTER TABLE tbl UNSET TBLPROPERTIES ('${forbiddenPrefix}foo')") - } - assert(e2.getMessage.contains(forbiddenPrefix + "foo")) + assertAnalysisError( + s"ALTER TABLE tbl UNSET TBLPROPERTIES ('${forbiddenPrefix}foo')", + s"${forbiddenPrefix}foo") - val e3 = intercept[AnalysisException] { - sql(s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')") - } - assert(e3.getMessage.contains(forbiddenPrefix + "foo")) + assertAnalysisError( + s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')", + s"${forbiddenPrefix}foo") } } } @@ -1759,10 +1741,9 @@ class HiveDDLSuite assert(spark.table("rectangles").collect().isEmpty) // not supported since the table is not partitioned - val e = intercept[AnalysisException] { - sql("TRUNCATE TABLE rectangles PARTITION (width=1)") - } - assert(e.message.contains("Operation not allowed")) + assertAnalysisError( + "TRUNCATE TABLE rectangles PARTITION (width=1)", + "Operation not allowed") } } } @@ -1800,10 +1781,9 @@ class HiveDDLSuite } // throw exception if the column in partition spec is not a partition column. - val e = intercept[AnalysisException] { - sql("TRUNCATE TABLE partTable PARTITION (unknown=1)") - } - assert(e.message.contains("unknown is not a valid partition column")) + assertAnalysisError( + "TRUNCATE TABLE partTable PARTITION (unknown=1)", + "unknown is not a valid partition column") } } @@ -2161,10 +2141,9 @@ class HiveDDLSuite assert(loc.listFiles().length >= 1) checkAnswer(spark.table("t"), Row("1") :: Nil) } else { - val e = intercept[AnalysisException] { - spark.sql("INSERT INTO TABLE t SELECT 1") - }.getMessage - assert(e.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b")) + assertAnalysisError( + "INSERT INTO TABLE t SELECT 1", + "java.net.URISyntaxException: Relative path in absolute URI: a:b") } } @@ -2203,15 +2182,13 @@ class HiveDDLSuite Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil) } } else { - val e = intercept[AnalysisException] { - spark.sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1") - }.getMessage - assert(e.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b")) - - val e1 = intercept[AnalysisException] { - spark.sql("INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1") - }.getMessage - assert(e1.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b")) + assertAnalysisError( + "INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1", + "java.net.URISyntaxException: Relative path in absolute URI: a:b") + + assertAnalysisError( + "INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1", + "java.net.URISyntaxException: Relative path in absolute URI: a:b") } } } @@ -2296,30 +2273,26 @@ class HiveDDLSuite sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c2 int) STORED AS PARQUET") if (!caseSensitive) { // duplicating partitioning column name - val e1 = intercept[AnalysisException] { - sql("ALTER TABLE tab ADD COLUMNS (C2 string)") - }.getMessage - assert(e1.contains("Found duplicate column(s)")) + assertAnalysisError( + "ALTER TABLE tab ADD COLUMNS (C2 string)", + "Found duplicate column(s)") // duplicating data column name - val e2 = intercept[AnalysisException] { - sql("ALTER TABLE tab ADD COLUMNS (C1 string)") - }.getMessage - assert(e2.contains("Found duplicate column(s)")) + assertAnalysisError( + "ALTER TABLE tab ADD COLUMNS (C1 string)", + "Found duplicate column(s)") } else { // hive catalog will still complains that c1 is duplicate column name because hive // identifiers are case insensitive. - val e1 = intercept[AnalysisException] { - sql("ALTER TABLE tab ADD COLUMNS (C2 string)") - }.getMessage - assert(e1.contains("HiveException")) + assertAnalysisError( + "ALTER TABLE tab ADD COLUMNS (C2 string)", + "HiveException") // hive catalog will still complains that c1 is duplicate column name because hive // identifiers are case insensitive. - val e2 = intercept[AnalysisException] { - sql("ALTER TABLE tab ADD COLUMNS (C1 string)") - }.getMessage - assert(e2.contains("HiveException")) + assertAnalysisError( + "ALTER TABLE tab ADD COLUMNS (C1 string)", + "HiveException") } } } @@ -2341,58 +2314,49 @@ class HiveDDLSuite // Forbid CTAS with null type withTable("t1", "t2", "t3") { - val e1 = intercept[AnalysisException] { - spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT null as null_col") - }.getMessage - assert(e1.contains("Cannot create tables with null type")) + assertAnalysisError( + "CREATE TABLE t1 USING PARQUET AS SELECT null as null_col", + "Cannot create tables with null type") - val e2 = intercept[AnalysisException] { - spark.sql("CREATE TABLE t2 AS SELECT null as null_col") - }.getMessage - assert(e2.contains("Cannot create tables with null type")) + assertAnalysisError( + "CREATE TABLE t2 AS SELECT null as null_col", + "Cannot create tables with null type") - val e3 = intercept[AnalysisException] { - spark.sql("CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col") - }.getMessage - assert(e3.contains("Cannot create tables with null type")) + assertAnalysisError( + "CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col", + "Cannot create tables with null type") } // Forbid Replace table AS SELECT with null type withTable("t") { val v2Source = classOf[FakeV2Provider].getName - val e = intercept[AnalysisException] { - spark.sql(s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col") - }.getMessage - assert(e.contains("Cannot create tables with null type")) + assertAnalysisError( + s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col", + "Cannot create tables with null type") } // Forbid creating table with VOID type in Spark withTable("t1", "t2", "t3", "t4") { - val e1 = intercept[AnalysisException] { - spark.sql(s"CREATE TABLE t1 (v VOID) USING PARQUET") - }.getMessage - assert(e1.contains("Cannot create tables with null type")) - val e2 = intercept[AnalysisException] { - spark.sql(s"CREATE TABLE t2 (v VOID) USING hive") - }.getMessage - assert(e2.contains("Cannot create tables with null type")) - val e3 = intercept[AnalysisException] { - spark.sql(s"CREATE TABLE t3 (v VOID)") - }.getMessage - assert(e3.contains("Cannot create tables with null type")) - val e4 = intercept[AnalysisException] { - spark.sql(s"CREATE TABLE t4 (v VOID) STORED AS PARQUET") - }.getMessage - assert(e4.contains("Cannot create tables with null type")) + assertAnalysisError( + "CREATE TABLE t1 (v VOID) USING PARQUET", + "Cannot create tables with null type") + assertAnalysisError( + "CREATE TABLE t2 (v VOID) USING hive", + "Cannot create tables with null type") + assertAnalysisError( + "CREATE TABLE t3 (v VOID)", + "Cannot create tables with null type") + assertAnalysisError( + "CREATE TABLE t4 (v VOID) STORED AS PARQUET", + "Cannot create tables with null type") } // Forbid Replace table with VOID type withTable("t") { val v2Source = classOf[FakeV2Provider].getName - val e = intercept[AnalysisException] { - spark.sql(s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source") - }.getMessage - assert(e.contains("Cannot create tables with null type")) + assertAnalysisError( + s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source", + "Cannot create tables with null type") } // Make sure spark.catalog.createTable with null type will fail @@ -2626,9 +2590,9 @@ class HiveDDLSuite test("load command for non local invalid path validation") { withTable("tbl") { sql("CREATE TABLE tbl(i INT, j STRING) USING hive") - val e = intercept[AnalysisException]( - sql("load data inpath '/doesnotexist.csv' into table tbl")) - assert(e.message.contains("LOAD DATA input path does not exist")) + assertAnalysisError( + "load data inpath '/doesnotexist.csv' into table tbl", + "LOAD DATA input path does not exist") } } @@ -2780,47 +2744,39 @@ class HiveDDLSuite sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET") // row format doesn't work in create targetDsTable - var e = intercept[AnalysisException] { - spark.sql( - """ - |CREATE TABLE targetDsTable LIKE sourceHiveTable USING PARQUET - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - """.stripMargin) - }.getMessage - assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")) + assertAnalysisError( + """ + |CREATE TABLE targetDsTable LIKE sourceHiveTable USING PARQUET + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + """.stripMargin, + "Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE") // row format doesn't work with provider hive - e = intercept[AnalysisException] { - spark.sql( - """ - |CREATE TABLE targetHiveTable LIKE sourceHiveTable USING hive - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - |WITH SERDEPROPERTIES ('test' = 'test') - """.stripMargin) - }.getMessage - assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")) + assertAnalysisError( + """ + |CREATE TABLE targetHiveTable LIKE sourceHiveTable USING hive + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |WITH SERDEPROPERTIES ('test' = 'test') + """.stripMargin, + "Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE") // row format doesn't work without 'STORED AS' - e = intercept[AnalysisException] { - spark.sql( - """ - |CREATE TABLE targetDsTable LIKE sourceDsTable - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - |WITH SERDEPROPERTIES ('test' = 'test') - """.stripMargin) - }.getMessage - assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'")) + assertAnalysisError( + """ + |CREATE TABLE targetDsTable LIKE sourceDsTable + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |WITH SERDEPROPERTIES ('test' = 'test') + """.stripMargin, + "'ROW FORMAT' must be used with 'STORED AS'") // 'INPUTFORMAT' and 'OUTPUTFORMAT' conflict with 'USING' - e = intercept[AnalysisException] { - spark.sql( - """ - |CREATE TABLE targetDsTable LIKE sourceDsTable USING format - |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat' - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - """.stripMargin) - }.getMessage - assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS")) + assertAnalysisError( + """ + |CREATE TABLE targetDsTable LIKE sourceDsTable USING format + |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat' + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + """.stripMargin, + "Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS") } } @@ -2880,16 +2836,13 @@ class HiveDDLSuite // negative case hiveFormats.filterNot(allowSerdeFileFormats.contains(_)).foreach { format => withTable("targetTable") { - val ex = intercept[AnalysisException] { - spark.sql( - s""" - |CREATE TABLE targetTable LIKE $sourceTable - |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - |STORED AS $format - """.stripMargin) - }.getMessage - assert(ex.contains( - s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'")) + assertAnalysisError( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |STORED AS $format + """.stripMargin, + s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'") } } } @@ -2912,15 +2865,13 @@ class HiveDDLSuite assert(table.storage.serde === Some(expectedSerde.get.serde.get)) // negative case - val ex = intercept[AnalysisException] { - spark.sql( - s""" - |CREATE TABLE targetTable LIKE $sourceTable - |ROW FORMAT DELIMITED - |STORED AS PARQUET - """.stripMargin) - }.getMessage - assert(ex.contains("ROW FORMAT DELIMITED is only compatible with 'textfile'")) + assertAnalysisError( + s""" + |CREATE TABLE targetTable LIKE $sourceTable + |ROW FORMAT DELIMITED + |STORED AS PARQUET + """.stripMargin, + "ROW FORMAT DELIMITED is only compatible with 'textfile'") } } From 13391683e7a863671d3d719dc81e20ec2a870725 Mon Sep 17 00:00:00 2001 From: Xianjin YE Date: Sun, 20 Dec 2020 08:51:17 -0600 Subject: [PATCH 0824/1009] [SPARK-33756][SQL] Make BytesToBytesMap's MapIterator idempotent ### What changes were proposed in this pull request? Make MapIterator of BytesToBytesMap `hasNext` method idempotent ### Why are the changes needed? The `hasNext` maybe called multiple times, if not guarded, second call of hasNext method after reaching the end of iterator will throw NoSuchElement exception. ### Does this PR introduce _any_ user-facing change? NO. ### How was this patch tested? Update a unit test to cover this case. Closes #30728 from advancedxy/SPARK-33756. Authored-by: Xianjin YE Signed-off-by: Sean Owen --- .../org/apache/spark/unsafe/map/BytesToBytesMap.java | 10 ++++++---- .../spark/unsafe/map/AbstractBytesToBytesMapSuite.java | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index d7940fc08e1a5..f474c30b8b3d8 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -393,10 +393,12 @@ public void remove() { } private void handleFailedDelete() { - // remove the spill file from disk - File file = spillWriters.removeFirst().getFile(); - if (file != null && file.exists() && !file.delete()) { - logger.error("Was unable to delete spill file {}", file.getAbsolutePath()); + if (spillWriters.size() > 0) { + // remove the spill file from disk + File file = spillWriters.removeFirst().getFile(); + if (file != null && file.exists() && !file.delete()) { + logger.error("Was unable to delete spill file {}", file.getAbsolutePath()); + } } } } diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java index f4e952f465e54..f35176a69d94b 100644 --- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java +++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java @@ -576,6 +576,8 @@ public void spillInIterator() throws IOException { iter2.next(); } assertFalse(iter2.hasNext()); + // calls hasNext twice deliberately, make sure it's idempotent + assertFalse(iter2.hasNext()); } finally { map.free(); for (File spillFile : spillFilesCreated) { From 3c8be3983cd390306e9abbfe078536a08881a5d6 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 21 Dec 2020 09:40:42 +0900 Subject: [PATCH 0825/1009] [SPARK-33850][SQL][FOLLOWUP] Improve and cleanup the test code ### What changes were proposed in this pull request? This PR mainly improves and cleans up the test code introduced in #30855 based on the comment. The test code is actually taken from another test `explain formatted - check presence of subquery in case of DPP` so this PR cleans the code too ( removed unnecessary `withTable`). ### Why are the changes needed? To keep the test code clean. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? `ExplainSuite` passes. Closes #30861 from sarutak/followup-SPARK-33850. Authored-by: Kousuke Saruta Signed-off-by: Takeshi Yamamuro --- .../org/apache/spark/sql/ExplainSuite.scala | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 0ec57c2fcb5ad..8b7459fddb59a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -233,7 +233,6 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false", SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") { - withTable("df1", "df2") { spark.range(1000).select(col("id"), col("id").as("k")) .write .partitionBy("k") @@ -273,27 +272,21 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite assert(expected_pattern4.r.findAllMatchIn(normalizedOutput).length == 1) } } - } } } test("SPARK-33850: explain formatted - check presence of subquery in case of AQE") { - withTable("df1") { - withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { - withTable("df1") { - spark.range(1, 100) - .write - .format("parquet") - .mode("overwrite") - .saveAsTable("df1") + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + withTempView("df") { + val df = spark.range(1, 100) + df.createTempView("df") - val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df1) as v" - val expected_pattern1 = - "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x" + val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df) as v" + val expected_pattern = + "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x" - withNormalizedExplain(sqlText) { normalizedOutput => - assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1) - } + withNormalizedExplain(sqlText) { normalizedOutput => + assert(expected_pattern.r.findAllMatchIn(normalizedOutput).length == 1) } } } From 8e2633962f789a6ba5eb9448596f6ac4b7b1c2ff Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sun, 20 Dec 2020 17:38:47 -0800 Subject: [PATCH 0826/1009] [SPARK-26341][WEBUI][FOLLOWUP] Update stage memory metrics on stage end ### What changes were proposed in this pull request? This is a followup PR for #30573 . After this change applied, stage memory metrics will be updated on stage end. ### Why are the changes needed? After #30573, executor memory metrics is updated on stage end but stage memory metrics is not updated. It's better to update both metrics like `updateStageLevelPeakExecutorMetrics` does. ### Does this PR introduce _any_ user-facing change? Yes. stage memory metrics is updated more accurately. ### How was this patch tested? After I run a job and visited `/api/v1//stages`, I confirmed `peakExecutorMemory` metrics is shown even though the life time of each stage is very short . I also modify the json files for `HistoryServerSuite`. Closes #30858 from sarutak/followup-SPARK-26341. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/status/AppStatusListener.scala | 1 + .../complete_stage_list_json_expectation.json | 72 +++++++++++++- ...xcludeOnFailure_for_stage_expectation.json | 24 ++++- ...eOnFailure_node_for_stage_expectation.json | 24 ++++- .../failed_stage_list_json_expectation.json | 24 ++++- .../one_stage_attempt_json_expectation.json | 24 ++++- .../one_stage_json_expectation.json | 24 ++++- .../stage_list_json_expectation.json | 96 ++++++++++++++++++- ...ist_with_accumulable_json_expectation.json | 24 ++++- ...age_with_accumulable_json_expectation.json | 24 ++++- 10 files changed, 323 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 0722095cc6533..bf19897e51fb3 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -688,6 +688,7 @@ private[spark] class AppStatusListener( } stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1 + stage.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics) stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics .compareAndUpdatePeakValues(event.taskExecutorMetrics) // [SPARK-24415] Wait for all tasks to finish before removing stage from live list diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json index a452488294547..f04543e037c48 100644 --- a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json @@ -42,7 +42,29 @@ "rddIds" : [ 6, 5 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } }, { "status" : "COMPLETE", "stageId" : 1, @@ -87,7 +109,29 @@ "rddIds" : [ 1, 0 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } }, { "status" : "COMPLETE", "stageId" : 0, @@ -132,5 +176,27 @@ "rddIds" : [ 0 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } ] diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json index ab9a8b7ef885f..dcad8a6895ed8 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json @@ -764,5 +764,27 @@ } }, "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json index 1c569c19894fd..2ab1546bd4a86 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json @@ -992,5 +992,27 @@ } }, "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json index c38741646c64b..5573cf98db26a 100644 --- a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json @@ -43,5 +43,27 @@ "rddIds" : [ 3, 2 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } ] diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json index b1eab0d7ac196..9edb518132e87 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json @@ -486,5 +486,27 @@ } }, "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json index 6dfdd27cd7d8f..9e661bdf8a034 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json @@ -486,5 +486,27 @@ } }, "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } ] diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json index a31c907221388..d109c73b46133 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json @@ -42,7 +42,29 @@ "rddIds" : [ 6, 5 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } }, { "status" : "FAILED", "stageId" : 2, @@ -88,7 +110,29 @@ "rddIds" : [ 3, 2 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } }, { "status" : "COMPLETE", "stageId" : 1, @@ -133,7 +177,29 @@ "rddIds" : [ 1, 0 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } }, { "status" : "COMPLETE", "stageId" : 0, @@ -178,5 +244,27 @@ "rddIds" : [ 0 ], "accumulatorUpdates" : [ ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } ] diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json index 08089d4f3f65b..7901c4f93367b 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json @@ -46,5 +46,27 @@ "value" : "5050" } ], "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } ] diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json index a2cfd9d42cc99..a5958e0a093f1 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json @@ -530,5 +530,27 @@ } }, "killedTasksSummary" : { }, - "resourceProfileId" : 0 + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + } } From 1c7b79c0578c76629ac68a7e180f33e40aa380d8 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 21 Dec 2020 04:58:56 +0000 Subject: [PATCH 0827/1009] [SPARK-33856][SQL] Migrate ALTER TABLE ... RENAME TO PARTITION to use UnresolvedTable to resolve the identifier ### What changes were proposed in this pull request? This PR proposes to migrate `ALTER TABLE ... RENAME TO PARTITION` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing). Note that `ALTER TABLE ... RENAME TO PARTITION` is not supported for v2 tables. ### Why are the changes needed? The PR makes the resolution consistent behavior consistent. For example, ``` sql("CREATE DATABASE test") sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)") sql("CREATE TEMPORARY VIEW t AS SELECT 2") sql("USE spark_catalog.test") sql("ALTER TABLE t PARTITION (id=1) RENAME TO PARTITION (id=2)") // works fine assuming id=1 exists. ``` , but after this PR: ``` sql("ALTER TABLE t PARTITION (id=1) RENAME TO PARTITION (id=2)") org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table; line 1 pos 0 ``` , which is the consistent behavior with other commands. ### Does this PR introduce _any_ user-facing change? After this PR, `ALTER TABLE` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`. ### How was this patch tested? Updated existing tests. Closes #30862 from imback82/alter_table_rename_partition_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/CheckAnalysis.scala | 3 +++ .../analysis/ResolvePartitionSpec.scala | 11 +++++++++- .../sql/catalyst/parser/AstBuilder.scala | 10 ++++++---- .../catalyst/plans/logical/statements.scala | 8 -------- .../catalyst/plans/logical/v2Commands.scala | 13 ++++++++++++ .../sql/catalyst/parser/DDLParserSuite.scala | 14 ++++++------- .../analysis/ResolveSessionCatalog.scala | 6 +++--- .../datasources/v2/DataSourceV2Strategy.scala | 4 ++++ .../AlterTablePartitionV2SQLSuite.scala | 20 +++++++++++++------ .../spark/sql/execution/SQLViewSuite.scala | 4 +++- .../sql/execution/command/DDLSuite.scala | 5 +++-- .../sql/hive/execution/HiveDDLSuite.scala | 5 +++-- 12 files changed, 69 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c0cdcdf2d9577..472de096b2f22 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -587,6 +587,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) => checkAlterTablePartition(table, parts) + case AlterTableRenamePartition(ResolvedTable(_, _, table), from, _) => + checkAlterTablePartition(table, Seq(from)) + case showPartitions: ShowPartitions => checkShowPartitions(showPartitions) case _ => // Falls back to the following checks diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 35e4820cd710b..2c2bea6f89d49 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} -import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan, ShowPartitions} +import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, AlterTableRenamePartition, LogicalPlan, ShowPartitions} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement @@ -51,6 +51,15 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { partitionSchema, requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) + case r @ AlterTableRenamePartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), from, _) => + val partitionSchema = table.partitionSchema() + r.copy(from = resolvePartitionSpecs( + table.name, + Seq(from), + partitionSchema, + requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)).head) + case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) => r.copy(pattern = resolvePartitionSpecs( table.name, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 94589688953d7..9c265544f3227 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3761,7 +3761,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create an [[AlterTableRenamePartitionStatement]] + * Create an [[AlterTableRenamePartition]] * * For example: * {{{ @@ -3770,9 +3770,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitRenameTablePartition( ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) { - AlterTableRenamePartitionStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), - visitNonOptionalPartitionSpec(ctx.from), + AlterTableRenamePartition( + UnresolvedTable( + visitMultipartIdentifier(ctx.multipartIdentifier), + "ALTER TABLE ... RENAME TO PARTITION"), + UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)), visitNonOptionalPartitionSpec(ctx.to)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 59239f6e041a5..f6d141ded384a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -292,14 +292,6 @@ case class AlterTableSetLocationStatement( partitionSpec: Option[TablePartitionSpec], location: String) extends ParsedStatement -/** - * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL. - */ -case class AlterTableRenamePartitionStatement( - tableName: Seq[String], - from: TablePartitionSpec, - to: TablePartitionSpec) extends ParsedStatement - /** * An INSERT INTO statement, as parsed from SQL. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index fa67d311c39c3..87d81d5330574 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -673,6 +673,19 @@ case class AlterTableDropPartition( override def children: Seq[LogicalPlan] = child :: Nil } +/** + * The logical plan of the ALTER TABLE ... RENAME TO PARTITION command. + */ +case class AlterTableRenamePartition( + child: LogicalPlan, + from: PartitionSpec, + to: TablePartitionSpec) extends Command { + override lazy val resolved: Boolean = + childrenResolved && from.isInstanceOf[ResolvedPartitionSpec] + + override def children: Seq[LogicalPlan] = child :: Nil +} + /** * The logical plan of the ALTER TABLE ... RECOVER PARTITIONS command. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 5eb0c9a39f1e6..330a01be4bfb3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -2106,9 +2106,9 @@ class DDLParserSuite extends AnalysisTest { |RENAME TO PARTITION (dt='2008-09-09', country='uk') """.stripMargin val parsed1 = parsePlan(sql1) - val expected1 = AlterTableRenamePartitionStatement( - Seq("table_name"), - Map("dt" -> "2008-08-08", "country" -> "us"), + val expected1 = AlterTableRenamePartition( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"), + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), Map("dt" -> "2008-09-09", "country" -> "uk")) comparePlans(parsed1, expected1) @@ -2118,9 +2118,9 @@ class DDLParserSuite extends AnalysisTest { |RENAME TO PARTITION (ds='2018-06-10') """.stripMargin val parsed2 = parsePlan(sql2) - val expected2 = AlterTableRenamePartitionStatement( - Seq("a", "b", "c"), - Map("ds" -> "2017-06-10"), + val expected2 = AlterTableRenamePartition( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"), + UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")), Map("ds" -> "2018-06-10")) comparePlans(parsed2, expected2) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 723647a4a9207..66d1c406a5603 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -445,10 +445,10 @@ class ResolveSessionCatalog( partSpecsAndLocs.asUnresolvedPartitionSpecs.map(spec => (spec.spec, spec.location)), ifNotExists) - case AlterTableRenamePartitionStatement(tbl, from, to) => - val v1TableName = parseV1Table(tbl, "ALTER TABLE RENAME PARTITION") + case AlterTableRenamePartition( + ResolvedV1TableIdentifier(ident), UnresolvedPartitionSpec(from, _), to) => AlterTableRenamePartitionCommand( - v1TableName.asTableIdentifier, + ident.asTableIdentifier, from, to) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 50bcf81f1ba2d..635117a9932ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -342,6 +342,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat AlterTableDropPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil + case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) => + throw new AnalysisException( + "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.") + case AlterTableRecoverPartitions(_: ResolvedTable) => throw new AnalysisException( "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index ac4d055eb0e60..bdf2fa5b7ac96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -33,13 +33,21 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { } test("ALTER TABLE RENAME PARTITION") { - val t = "testcat.ns1.ns2.tbl" - withTable(t) { - spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") - val e = intercept[AnalysisException] { - sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)") + val nonPartTbl = "testcat.ns1.ns2.tbl" + val partTbl = "testpart.ns1.ns2.tbl" + withTable(nonPartTbl, partTbl) { + spark.sql(s"CREATE TABLE $nonPartTbl (id bigint, data string) USING foo PARTITIONED BY (id)") + val e1 = intercept[AnalysisException] { + sql(s"ALTER TABLE $nonPartTbl PARTITION (id=1) RENAME TO PARTITION (id=2)") + } + assert(e1.message.contains(s"Table $nonPartTbl can not alter partitions")) + + spark.sql(s"CREATE TABLE $partTbl (id bigint, data string) USING foo PARTITIONED BY (id)") + val e2 = intercept[AnalysisException] { + sql(s"ALTER TABLE $partTbl PARTITION (id=1) RENAME TO PARTITION (id=2)") } - assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables")) + assert(e2.message.contains( + "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 6d65fddb1be62..9b84e0fe4bcb7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -149,7 +149,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { assertAnalysisError( s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')", s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table") - assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')") + assertAnalysisError( + s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')", + s"$viewName is a temp view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table") assertAnalysisError( s"ALTER TABLE $viewName RECOVER PARTITIONS", s"$viewName is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index d6474ae7d5f00..7a6076d6d9576 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.config import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER @@ -1642,9 +1642,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { Set(Map("a" -> "10", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) // table to alter does not exist - intercept[NoSuchTableException] { + val e = intercept[AnalysisException] { sql("ALTER TABLE does_not_exist PARTITION (c='3') RENAME TO PARTITION (c='333')") } + assert(e.getMessage.contains("Table not found: does_not_exist")) // partition to rename does not exist intercept[NoSuchPartitionException] { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 34f127bade95b..e55b2d390a5d9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -896,8 +896,9 @@ class HiveDDLSuite s"ALTER TABLE $oldViewName RECOVER PARTITIONS", s"$oldViewName is a view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table.") - assertErrorForAlterTableOnView( - s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')") + assertAnalysisError( + s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')", + s"$oldViewName is a view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table.") assertAnalysisError( s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')", From b313a1e9e6360bb0ac939cb47083b9c4d21e614c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 21 Dec 2020 08:34:12 +0000 Subject: [PATCH 0828/1009] [SPARK-33849][SQL][TESTS] Unify v1 and v2 DROP TABLE tests ### What changes were proposed in this pull request? 1. Move the `DROP TABLE` parsing tests to `DropTableParserSuite` 2. Place the v1 tests for `DROP TABLE` from `DDLSuite` and v2 tests from `DataSourceV2SQLSuite` to the common trait `DropTableSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS. ### Why are the changes needed? - The unification will allow to run common `DROP TABLE` tests for both DSv1 and Hive DSv1, DSv2 - We can detect missing features and differences between DSv1 and DSv2 implementations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableParserSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableSuite" ``` Closes #30854 from MaxGekk/unify-drop-table-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/DDLParserSuite.scala | 26 ----- .../sql/connector/DataSourceV2SQLSuite.scala | 75 -------------- .../sql/execution/command/DDLSuite.scala | 21 ---- .../command/DropTableParserSuite.scala | 55 +++++++++++ .../command/DropTableSuiteBase.scala | 99 +++++++++++++++++++ .../execution/command/v1/DropTableSuite.scala | 62 ++++++++++++ .../execution/command/v2/DropTableSuite.scala | 74 ++++++++++++++ .../sql/hive/execution/HiveDDLSuite.scala | 4 - .../execution/command/DropTableSuite.scala | 22 +++++ 9 files changed, 312 insertions(+), 126 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 330a01be4bfb3..d408019053fb7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -694,32 +694,6 @@ class DDLParserSuite extends AnalysisTest { } } - test("drop table") { - parseCompare("DROP TABLE testcat.ns1.ns2.tbl", - DropTable( - UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"), - ifExists = false, - purge = false)) - parseCompare(s"DROP TABLE db.tab", - DropTable( - UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false)) - parseCompare(s"DROP TABLE IF EXISTS db.tab", - DropTable( - UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false)) - parseCompare(s"DROP TABLE tab", - DropTable( - UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false)) - parseCompare(s"DROP TABLE IF EXISTS tab", - DropTable( - UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false)) - parseCompare(s"DROP TABLE tab PURGE", - DropTable( - UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true)) - parseCompare(s"DROP TABLE IF EXISTS tab PURGE", - DropTable( - UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true)) - } - test("drop view") { val cmd = "DROP VIEW" val hint = Some("Please use DROP TABLE instead.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 4fdb32c24f104..ed4ea567e4f65 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -711,81 +711,6 @@ class DataSourceV2SQLSuite assert(t2.v1Table.provider == Some(conf.defaultDataSourceName)) } - test("DropTable: basic") { - val tableName = "testcat.ns1.ns2.tbl" - val ident = Identifier.of(Array("ns1", "ns2"), "tbl") - sql(s"CREATE TABLE $tableName USING foo AS SELECT id, data FROM source") - assert(catalog("testcat").asTableCatalog.tableExists(ident) === true) - sql(s"DROP TABLE $tableName") - assert(catalog("testcat").asTableCatalog.tableExists(ident) === false) - } - - test("DropTable: table qualified with the session catalog name") { - val ident = Identifier.of(Array("default"), "tbl") - sql("CREATE TABLE tbl USING json AS SELECT 1 AS i") - assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === true) - sql("DROP TABLE spark_catalog.default.tbl") - assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === false) - } - - test("DropTable: if exists") { - val ex = intercept[AnalysisException] { - sql("DROP TABLE testcat.db.notbl") - } - assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl")) - sql("DROP TABLE IF EXISTS testcat.db.notbl") - } - - test("DropTable: purge option") { - withTable("testcat.ns.t") { - sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") - val ex = intercept[UnsupportedOperationException] { - sql ("DROP TABLE testcat.ns.t PURGE") - } - // The default TableCatalog.dropTable implementation doesn't support the purge option. - assert(ex.getMessage.contains("Purge option is not supported")) - } - } - - test("SPARK-33174: DROP TABLE should resolve to a temporary view first") { - withTable("testcat.ns.t") { - withTempView("t") { - sql("CREATE TABLE testcat.ns.t (id bigint) USING foo") - sql("CREATE TEMPORARY VIEW t AS SELECT 2") - sql("USE testcat.ns") - - // Check the temporary view 't' exists. - runShowTablesSql( - "SHOW TABLES FROM spark_catalog.default LIKE 't'", - Seq(Row("", "t", true)), - expectV2Catalog = false) - sql("DROP TABLE t") - // Verify that the temporary view 't' is resolved first and dropped. - runShowTablesSql( - "SHOW TABLES FROM spark_catalog.default LIKE 't'", - Nil, - expectV2Catalog = false) - } - } - } - - test("SPARK-33305: DROP TABLE should also invalidate cache") { - val t = "testcat.ns.t" - val view = "view" - withTable(t) { - withTempView(view) { - sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source") - sql(s"CACHE TABLE $view AS SELECT id FROM $t") - checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) - checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) - - assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - sql(s"DROP TABLE $t") - assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - } - } - } - test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") { Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t => val view = "view" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 7a6076d6d9576..f92a93d54b1cb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -338,10 +338,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { testRenamePartitions(isDatasourceTable = true) } - test("drop table - data source table") { - testDropTable(isDatasourceTable = true) - } - test("the qualified path of a database is stored in the catalog") { val catalog = spark.sessionState.catalog @@ -1332,23 +1328,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(catalog.listTables("default") == Nil) } - protected def testDropTable(isDatasourceTable: Boolean): Unit = { - if (!isUsingHiveMetastore) { - assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") - } - val catalog = spark.sessionState.catalog - val tableIdent = TableIdentifier("tab1", Some("dbx")) - createDatabase(catalog, "dbx") - createTable(catalog, tableIdent, isDatasourceTable) - assert(catalog.listTables("dbx") == Seq(tableIdent)) - sql("DROP TABLE dbx.tab1") - assert(catalog.listTables("dbx") == Nil) - sql("DROP TABLE IF EXISTS dbx.tab1") - intercept[AnalysisException] { - sql("DROP TABLE dbx.tab1") - } - } - test("drop view") { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala new file mode 100644 index 0000000000000..f88fff8ed326e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan} +import org.apache.spark.sql.test.SharedSparkSession + +class DropTableParserSuite extends AnalysisTest with SharedSparkSession { + private def parseCompare(sql: String, expected: LogicalPlan): Unit = { + comparePlans(parsePlan(sql), expected, checkAnalysis = false) + } + + test("drop table") { + parseCompare("DROP TABLE testcat.ns1.ns2.tbl", + DropTable( + UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"), + ifExists = false, + purge = false)) + parseCompare(s"DROP TABLE db.tab", + DropTable( + UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false)) + parseCompare(s"DROP TABLE IF EXISTS db.tab", + DropTable( + UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false)) + parseCompare(s"DROP TABLE tab", + DropTable( + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false)) + parseCompare(s"DROP TABLE IF EXISTS tab", + DropTable( + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false)) + parseCompare(s"DROP TABLE tab PURGE", + DropTable( + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true)) + parseCompare(s"DROP TABLE IF EXISTS tab PURGE", + DropTable( + UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true)) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala new file mode 100644 index 0000000000000..dd620d3bd7aa4 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} + +trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "DROP TABLE" + + protected def createTable(tableName: String): Unit = { + sql(s"CREATE TABLE $tableName (c int) $defaultUsing") + sql(s"INSERT INTO $tableName SELECT 0") + } + + protected def checkTables(namespace: String, expectedTables: String*): Unit = { + val tables = sql(s"SHOW TABLES IN $catalog.$namespace").select("tableName") + val rows = expectedTables.map(Row(_)) + checkAnswer(tables, rows) + } + + test("basic") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + + createTable(s"$catalog.ns.tbl") + checkTables("ns", "tbl") + + sql(s"DROP TABLE $catalog.ns.tbl") + checkTables("ns") // no tables + } + } + + test("try to drop a nonexistent table") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + checkTables("ns") // no tables + + val errMsg = intercept[AnalysisException] { + sql(s"DROP TABLE $catalog.ns.tbl") + }.getMessage + assert(errMsg.contains("Table or view not found")) + } + } + + test("with IF EXISTS") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + + createTable(s"$catalog.ns.tbl") + checkTables("ns", "tbl") + sql(s"DROP TABLE IF EXISTS $catalog.ns.tbl") + checkTables("ns") + + // It must not throw any exceptions + sql(s"DROP TABLE IF EXISTS $catalog.ns.tbl") + checkTables("ns") + } + } + + test("SPARK-33174: DROP TABLE should resolve to a temporary view first") { + withNamespaceAndTable("ns", "t") { t => + withTempView("t") { + sql(s"CREATE TABLE $t (id bigint) $defaultUsing") + sql("CREATE TEMPORARY VIEW t AS SELECT 2") + sql(s"USE $catalog.ns") + try { + // Check the temporary view 't' exists. + checkAnswer( + sql("SHOW TABLES FROM spark_catalog.default LIKE 't'") + .select("tableName", "isTemporary"), + Row("t", true)) + sql("DROP TABLE t") + // Verify that the temporary view 't' is resolved first and dropped. + checkAnswer( + sql("SHOW TABLES FROM spark_catalog.default LIKE 't'") + .select("tableName", "isTemporary"), + Seq.empty) + } finally { + sql(s"USE spark_catalog") + } + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala new file mode 100644 index 0000000000000..4a6956e9ad82d --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.execution.command + +trait DropTableSuiteBase extends command.DropTableSuiteBase { + test("purge option") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + + createTable(s"$catalog.ns.tbl") + checkTables("ns", "tbl") + + sql(s"DROP TABLE $catalog.ns.tbl PURGE") + checkTables("ns") // no tables + } + } +} + +class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase { + // The test fails in Hive External catalog with: + // org.apache.spark.sql.AnalysisException: + // spark_catalog.ns.tbl is not a valid TableIdentifier as it has more than 2 name parts. + test("SPARK-33305: DROP TABLE should also invalidate cache") { + val t = s"$catalog.ns.tbl" + val view = "view" + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + withTempView(view, "source") { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect()) + checkAnswer( + sql(s"SELECT * FROM $view"), + spark.table("source").select("id").collect()) + + assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + sql(s"DROP TABLE $t") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } +} + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala new file mode 100644 index 0000000000000..a36df8df4dd06 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.sql.Row +import org.apache.spark.sql.connector.InMemoryTableSessionCatalog +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION + +class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase { + test("purge option") { + withNamespaceAndTable("ns", "tbl") { t => + createTable(t) + val errMsg = intercept[UnsupportedOperationException] { + sql(s"DROP TABLE $catalog.ns.tbl PURGE") + }.getMessage + // The default TableCatalog.dropTable implementation doesn't support the purge option. + assert(errMsg.contains("Purge option is not supported")) + } + } + + test("table qualified with the session catalog name") { + withSQLConf( + V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryTableSessionCatalog].getName) { + + sql("CREATE TABLE tbl USING json AS SELECT 1 AS i") + checkAnswer( + sql("SHOW TABLES IN spark_catalog.default").select("tableName"), + Row("tbl")) + + sql("DROP TABLE spark_catalog.default.tbl") + checkAnswer( + sql("SHOW TABLES IN spark_catalog.default").select("tableName"), + Seq.empty) + } + } + + test("SPARK-33305: DROP TABLE should also invalidate cache") { + val t = s"$catalog.ns.tbl" + val view = "view" + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + withTempView(view, "source") { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect()) + checkAnswer( + sql(s"SELECT * FROM $view"), + spark.table("source").select("id").collect()) + + assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + sql(s"DROP TABLE $t") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index e55b2d390a5d9..f13c8704f3b5b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -163,10 +163,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA testRenamePartitions(isDatasourceTable = false) } - test("drop table") { - testDropTable(isDatasourceTable = false) - } - test("alter datasource table add columns - orc") { testAddColumn("orc") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala new file mode 100644 index 0000000000000..b2a404d7206a6 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.execution.command.v1 + +class DropTableSuite extends v1.DropTableSuiteBase with CommandSuiteBase From 8d4d43319191ada0e07e3b27abe41929aa3eefe5 Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Mon, 21 Dec 2020 19:42:59 +0900 Subject: [PATCH 0829/1009] [SPARK-33836][SS][PYTHON] Expose DataStreamReader.table and DataStreamWriter.toTable ### What changes were proposed in this pull request? This PR proposes to expose `DataStreamReader.table` (SPARK-32885) and `DataStreamWriter.toTable` (SPARK-32896) to PySpark, which are the only way to read and write with table in Structured Streaming. ### Why are the changes needed? Please refer SPARK-32885 and SPARK-32896 for rationalizations of these public APIs. This PR only exposes them to PySpark. ### Does this PR introduce _any_ user-facing change? Yes, PySpark users will be able to read and write with table in Structured Streaming query. ### How was this patch tested? Manually tested. > v1 table >> create table A and ingest to the table A ``` spark.sql(""" create table table_pyspark_parquet ( value long, `timestamp` timestamp ) USING parquet """) df = spark.readStream.format('rate').option('rowsPerSecond', 100).load() query = df.writeStream.toTable('table_pyspark_parquet', checkpointLocation='/tmp/checkpoint5') query.lastProgress query.stop() ``` >> read table A and ingest to the table B which doesn't exist ``` df2 = spark.readStream.table('table_pyspark_parquet') query2 = df2.writeStream.toTable('table_pyspark_parquet_nonexist', format='parquet', checkpointLocation='/tmp/checkpoint2') query2.lastProgress query2.stop() ``` >> select tables ``` spark.sql("DESCRIBE TABLE table_pyspark_parquet").show() spark.sql("SELECT * FROM table_pyspark_parquet").show() spark.sql("DESCRIBE TABLE table_pyspark_parquet_nonexist").show() spark.sql("SELECT * FROM table_pyspark_parquet_nonexist").show() ``` > v2 table (leveraging Apache Iceberg as it provides V2 table and custom catalog as well) >> create table A and ingest to the table A ``` spark.sql(""" create table iceberg_catalog.default.table_pyspark_v2table ( value long, `timestamp` timestamp ) USING iceberg """) df = spark.readStream.format('rate').option('rowsPerSecond', 100).load() query = df.select('value', 'timestamp').writeStream.toTable('iceberg_catalog.default.table_pyspark_v2table', checkpointLocation='/tmp/checkpoint_v2table_1') query.lastProgress query.stop() ``` >> ingest to the non-exist table B ``` df2 = spark.readStream.format('rate').option('rowsPerSecond', 100).load() query2 = df2.select('value', 'timestamp').writeStream.toTable('iceberg_catalog.default.table_pyspark_v2table_nonexist', checkpointLocation='/tmp/checkpoint_v2table_2') query2.lastProgress query2.stop() ``` >> ingest to the non-exist table C partitioned by `value % 10` ``` df3 = spark.readStream.format('rate').option('rowsPerSecond', 100).load() df3a = df3.selectExpr('value', 'timestamp', 'value % 10 AS partition').repartition('partition') query3 = df3a.writeStream.partitionBy('partition').toTable('iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned', checkpointLocation='/tmp/checkpoint_v2table_3') query3.lastProgress query3.stop() ``` >> select tables ``` spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table").show() spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table").show() spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table_nonexist").show() spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table_nonexist").show() spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned").show() spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned").show() ``` Closes #30835 from HeartSaVioR/SPARK-33836. Lead-authored-by: Jungtaek Lim Co-authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: HyukjinKwon --- python/pyspark/sql/streaming.py | 105 ++++++++++++++++++++- python/pyspark/sql/streaming.pyi | 10 ++ python/pyspark/sql/tests/test_streaming.py | 26 +++++ 3 files changed, 139 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 365b5f38694a7..2c9c1f06274ce 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -953,6 +953,36 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non else: raise TypeError("path can be only a single string") + def table(self, tableName): + """Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should + support streaming mode. + + .. versionadded:: 3.1.0 + + Parameters + ---------- + tableName : str + string, for the name of the table. + + Returns + -------- + :class:`DataFrame` + + Notes + ----- + This API is evolving. + + Examples + -------- + >>> csv_sdf = spark.readStream.table('input_table') # doctest: +SKIP + >>> csv_sdf.isStreaming # doctest: +SKIP + True + """ + if isinstance(tableName, str): + return self._df(self._jreader.table(tableName)) + else: + raise TypeError("tableName can be only a single string") + class DataStreamWriter(object): """ @@ -987,7 +1017,7 @@ def outputMode(self, outputMode): * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the sink * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the sink - every time these is some updates + every time these are some updates * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be written to the sink every time there are some updates. If the query doesn't contain aggregations, it will be equivalent to `append` mode. @@ -1416,7 +1446,7 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the sink * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the - sink every time these is some updates + sink every time these are some updates * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be written to the sink every time there are some updates. If the query doesn't contain aggregations, it will be equivalent to `append` mode. @@ -1464,6 +1494,77 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query else: return self._sq(self._jwrite.start(path)) + def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, queryName=None, + **options): + """ + Starts the execution of the streaming query, which will continually output results to the + given table as new data arrives. + + A new table will be created if the table not exists. The returned + :class:`StreamingQuery` object can be used to interact with the stream. + + .. versionadded:: 3.1.0 + + Parameters + ---------- + tableName : str + string, for the name of the table. + format : str, optional + the format used to save. + outputMode : str, optional + specifies how data of a streaming DataFrame/Dataset is written to a + streaming sink. + + * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the + sink + * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the + sink every time these are some updates + * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be + written to the sink every time there are some updates. If the query doesn't contain + aggregations, it will be equivalent to `append` mode. + partitionBy : str or list, optional + names of partitioning columns + queryName : str, optional + unique name for the query + **options : dict + All other string options. You may want to provide a `checkpointLocation`. + + Notes + ----- + This API is evolving. + + Examples + -------- + >>> sq = sdf.writeStream.format('parquet').queryName('this_query').option( + ... 'checkpointLocation', '/tmp/checkpoint').toTable('output_table') # doctest: +SKIP + >>> sq.isActive # doctest: +SKIP + True + >>> sq.name # doctest: +SKIP + 'this_query' + >>> sq.stop() # doctest: +SKIP + >>> sq.isActive # doctest: +SKIP + False + >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').toTable( + ... 'output_table', queryName='that_query', outputMode="append", format='parquet', + ... checkpointLocation='/tmp/checkpoint') # doctest: +SKIP + >>> sq.name # doctest: +SKIP + 'that_query' + >>> sq.isActive # doctest: +SKIP + True + >>> sq.stop() # doctest: +SKIP + """ + # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API + self.options(**options) + if outputMode is not None: + self.outputMode(outputMode) + if partitionBy is not None: + self.partitionBy(partitionBy) + if format is not None: + self.format(format) + if queryName is not None: + self.queryName(queryName) + return self._sq(self._jwrite.toTable(tableName)) + def _test(): import doctest diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi index 829610ad3b94b..1d05483c012f1 100644 --- a/python/pyspark/sql/streaming.pyi +++ b/python/pyspark/sql/streaming.pyi @@ -151,6 +151,7 @@ class DataStreamReader(OptionUtils): recursiveFileLookup: Optional[Union[bool, str]] = ..., unescapedQuoteHandling: Optional[str] = ..., ) -> DataFrame: ... + def table(self, tableName: str) -> DataFrame: ... class DataStreamWriter: def __init__(self, df: DataFrame) -> None: ... @@ -185,3 +186,12 @@ class DataStreamWriter: def foreachBatch( self, func: Callable[[DataFrame, int], None] ) -> DataStreamWriter: ... + def toTable( + self, + tableName: str, + format: Optional[str] = ..., + outputMode: Optional[str] = ..., + partitionBy: Optional[Union[str, List[str]]] = ..., + queryName: Optional[str] = ..., + **options: OptionalPrimitiveType + ) -> StreamingQuery: ... diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index 28a50f9575a0a..44bfb2a7447ca 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -19,7 +19,9 @@ import shutil import tempfile import time +from random import randint +from pyspark.sql import Row from pyspark.sql.functions import lit from pyspark.sql.types import StructType, StructField, IntegerType, StringType from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -569,6 +571,30 @@ def collectBatch(df, id): if q: q.stop() + def test_streaming_read_from_table(self): + input_table_name = "sample_input_table_%d" % randint(0, 100000000) + self.spark.sql("CREATE TABLE %s (value string) USING parquet" % input_table_name) + self.spark.sql("INSERT INTO %s VALUES ('aaa'), ('bbb'), ('ccc')" % input_table_name) + df = self.spark.readStream.table(input_table_name) + self.assertTrue(df.isStreaming) + q = df.writeStream.format('memory').queryName('this_query').start() + q.processAllAvailable() + q.stop() + result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect() + self.assertEqual([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')], result) + + def test_streaming_write_to_table(self): + output_table_name = "sample_output_table_%d" % randint(0, 100000000) + tmpPath = tempfile.mkdtemp() + shutil.rmtree(tmpPath) + df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load() + q = df.writeStream.toTable(output_table_name, format='parquet', checkpointLocation=tmpPath) + self.assertTrue(q.isActive) + time.sleep(3) + q.stop() + result = self.spark.sql("SELECT value FROM %s" % output_table_name).collect() + self.assertTrue(len(result) > 0) + if __name__ == "__main__": import unittest From f4e1069bb835e3e132f7758e5842af79f26cd162 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 21 Dec 2020 03:29:00 -0800 Subject: [PATCH 0830/1009] [SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code ### What changes were proposed in this pull request? This PR fixes an issue that `EXPLAIN CODEGEN` and `BenchmarkQueryTest` don't show the corresponding code for subqueries. The following example is about `EXPLAIN CODEGEN`. ``` spark.conf.set("spark.sql.adaptive.enabled", "false") val df = spark.range(1, 100) df.createTempView("df") spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN") scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN") Found 1 WholeStageCodegen subtrees. == Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); numInnerClasses:0) == *(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L] : +- Subquery scalar-subquery#3, [id=#24] : +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L]) : +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20] : +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L]) : +- *(1) Range (1, 100, step=1, splits=12) +- *(1) Scan OneRowRelation[] Generated code: /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage1(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=1 /* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private scala.collection.Iterator rdd_input_0; /* 010 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] project_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1]; /* 011 */ /* 012 */ public GeneratedIteratorForCodegenStage1(Object[] references) { /* 013 */ this.references = references; /* 014 */ } /* 015 */ /* 016 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 017 */ partitionIndex = index; /* 018 */ this.inputs = inputs; /* 019 */ rdd_input_0 = inputs[0]; /* 020 */ project_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 021 */ /* 022 */ } /* 023 */ /* 024 */ private void project_doConsume_0() throws java.io.IOException { /* 025 */ // common sub-expressions /* 026 */ /* 027 */ project_mutableStateArray_0[0].reset(); /* 028 */ /* 029 */ if (false) { /* 030 */ project_mutableStateArray_0[0].setNullAt(0); /* 031 */ } else { /* 032 */ project_mutableStateArray_0[0].write(0, 1L); /* 033 */ } /* 034 */ append((project_mutableStateArray_0[0].getRow())); /* 035 */ /* 036 */ } /* 037 */ /* 038 */ protected void processNext() throws java.io.IOException { /* 039 */ while ( rdd_input_0.hasNext()) { /* 040 */ InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next(); /* 041 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1); /* 042 */ project_doConsume_0(); /* 043 */ if (shouldStop()) return; /* 044 */ } /* 045 */ } /* 046 */ /* 047 */ } ``` After this change, the corresponding code for subqueries are shown. ``` Found 3 WholeStageCodegen subtrees. == Subtree 1 / 3 (maxMethodCodeSize:282; maxConstantPoolSize:206(0.31% used); numInnerClasses:0) == *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L]) +- *(1) Range (1, 100, step=1, splits=12) Generated code: /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage1(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=1 /* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private boolean agg_initAgg_0; /* 010 */ private boolean agg_bufIsNull_0; /* 011 */ private long agg_bufValue_0; /* 012 */ private boolean range_initRange_0; /* 013 */ private long range_nextIndex_0; /* 014 */ private TaskContext range_taskContext_0; /* 015 */ private InputMetrics range_inputMetrics_0; /* 016 */ private long range_batchEnd_0; /* 017 */ private long range_numElementsTodo_0; /* 018 */ private boolean agg_agg_isNull_2_0; /* 019 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] range_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[3]; /* 020 */ /* 021 */ public GeneratedIteratorForCodegenStage1(Object[] references) { /* 022 */ this.references = references; /* 023 */ } /* 024 */ /* 025 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 026 */ partitionIndex = index; /* 027 */ this.inputs = inputs; /* 028 */ /* 029 */ range_taskContext_0 = TaskContext.get(); /* 030 */ range_inputMetrics_0 = range_taskContext_0.taskMetrics().inputMetrics(); /* 031 */ range_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 032 */ range_mutableStateArray_0[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 033 */ range_mutableStateArray_0[2] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 034 */ /* 035 */ } /* 036 */ /* 037 */ private void agg_doAggregateWithoutKey_0() throws java.io.IOException { /* 038 */ // initialize aggregation buffer /* 039 */ agg_bufIsNull_0 = true; /* 040 */ agg_bufValue_0 = -1L; /* 041 */ /* 042 */ // initialize Range /* 043 */ if (!range_initRange_0) { /* 044 */ range_initRange_0 = true; /* 045 */ initRange(partitionIndex); /* 046 */ } /* 047 */ /* 048 */ while (true) { /* 049 */ if (range_nextIndex_0 == range_batchEnd_0) { /* 050 */ long range_nextBatchTodo_0; /* 051 */ if (range_numElementsTodo_0 > 1000L) { /* 052 */ range_nextBatchTodo_0 = 1000L; /* 053 */ range_numElementsTodo_0 -= 1000L; /* 054 */ } else { /* 055 */ range_nextBatchTodo_0 = range_numElementsTodo_0; /* 056 */ range_numElementsTodo_0 = 0; /* 057 */ if (range_nextBatchTodo_0 == 0) break; /* 058 */ } /* 059 */ range_batchEnd_0 += range_nextBatchTodo_0 * 1L; /* 060 */ } /* 061 */ /* 062 */ int range_localEnd_0 = (int)((range_batchEnd_0 - range_nextIndex_0) / 1L); /* 063 */ for (int range_localIdx_0 = 0; range_localIdx_0 < range_localEnd_0; range_localIdx_0++) { /* 064 */ long range_value_0 = ((long)range_localIdx_0 * 1L) + range_nextIndex_0; /* 065 */ /* 066 */ agg_doConsume_0(range_value_0); /* 067 */ /* 068 */ // shouldStop check is eliminated /* 069 */ } /* 070 */ range_nextIndex_0 = range_batchEnd_0; /* 071 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(range_localEnd_0); /* 072 */ range_inputMetrics_0.incRecordsRead(range_localEnd_0); /* 073 */ range_taskContext_0.killTaskIfInterrupted(); /* 074 */ } /* 075 */ /* 076 */ } /* 077 */ /* 078 */ private void initRange(int idx) { /* 079 */ java.math.BigInteger index = java.math.BigInteger.valueOf(idx); /* 080 */ java.math.BigInteger numSlice = java.math.BigInteger.valueOf(12L); /* 081 */ java.math.BigInteger numElement = java.math.BigInteger.valueOf(99L); /* 082 */ java.math.BigInteger step = java.math.BigInteger.valueOf(1L); /* 083 */ java.math.BigInteger start = java.math.BigInteger.valueOf(1L); /* 084 */ long partitionEnd; /* 085 */ /* 086 */ java.math.BigInteger st = index.multiply(numElement).divide(numSlice).multiply(step).add(start); /* 087 */ if (st.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) { /* 088 */ range_nextIndex_0 = Long.MAX_VALUE; /* 089 */ } else if (st.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) { /* 090 */ range_nextIndex_0 = Long.MIN_VALUE; /* 091 */ } else { /* 092 */ range_nextIndex_0 = st.longValue(); /* 093 */ } /* 094 */ range_batchEnd_0 = range_nextIndex_0; /* 095 */ /* 096 */ java.math.BigInteger end = index.add(java.math.BigInteger.ONE).multiply(numElement).divide(numSlice) /* 097 */ .multiply(step).add(start); /* 098 */ if (end.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) { /* 099 */ partitionEnd = Long.MAX_VALUE; /* 100 */ } else if (end.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) { /* 101 */ partitionEnd = Long.MIN_VALUE; /* 102 */ } else { /* 103 */ partitionEnd = end.longValue(); /* 104 */ } /* 105 */ /* 106 */ java.math.BigInteger startToEnd = java.math.BigInteger.valueOf(partitionEnd).subtract( /* 107 */ java.math.BigInteger.valueOf(range_nextIndex_0)); /* 108 */ range_numElementsTodo_0 = startToEnd.divide(step).longValue(); /* 109 */ if (range_numElementsTodo_0 < 0) { /* 110 */ range_numElementsTodo_0 = 0; /* 111 */ } else if (startToEnd.remainder(step).compareTo(java.math.BigInteger.valueOf(0L)) != 0) { /* 112 */ range_numElementsTodo_0++; /* 113 */ } /* 114 */ } /* 115 */ /* 116 */ private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException { /* 117 */ // do aggregate /* 118 */ // common sub-expressions /* 119 */ /* 120 */ // evaluate aggregate functions and update aggregation buffers /* 121 */ /* 122 */ agg_agg_isNull_2_0 = true; /* 123 */ long agg_value_2 = -1L; /* 124 */ /* 125 */ if (!agg_bufIsNull_0 && (agg_agg_isNull_2_0 || /* 126 */ agg_value_2 > agg_bufValue_0)) { /* 127 */ agg_agg_isNull_2_0 = false; /* 128 */ agg_value_2 = agg_bufValue_0; /* 129 */ } /* 130 */ /* 131 */ if (!false && (agg_agg_isNull_2_0 || /* 132 */ agg_value_2 > agg_expr_0_0)) { /* 133 */ agg_agg_isNull_2_0 = false; /* 134 */ agg_value_2 = agg_expr_0_0; /* 135 */ } /* 136 */ /* 137 */ agg_bufIsNull_0 = agg_agg_isNull_2_0; /* 138 */ agg_bufValue_0 = agg_value_2; /* 139 */ /* 140 */ } /* 141 */ /* 142 */ protected void processNext() throws java.io.IOException { /* 143 */ while (!agg_initAgg_0) { /* 144 */ agg_initAgg_0 = true; /* 145 */ long agg_beforeAgg_0 = System.nanoTime(); /* 146 */ agg_doAggregateWithoutKey_0(); /* 147 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[2] /* aggTime */).add((System.nanoTime() - agg_beforeAgg_0) / 1000000); /* 148 */ /* 149 */ // output the result /* 150 */ /* 151 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[1] /* numOutputRows */).add(1); /* 152 */ range_mutableStateArray_0[2].reset(); /* 153 */ /* 154 */ range_mutableStateArray_0[2].zeroOutNullBytes(); /* 155 */ /* 156 */ if (agg_bufIsNull_0) { /* 157 */ range_mutableStateArray_0[2].setNullAt(0); /* 158 */ } else { /* 159 */ range_mutableStateArray_0[2].write(0, agg_bufValue_0); /* 160 */ } /* 161 */ append((range_mutableStateArray_0[2].getRow())); /* 162 */ } /* 163 */ } /* 164 */ /* 165 */ } ``` ### Why are the changes needed? For better debuggability. ### Does this PR introduce _any_ user-facing change? Yes. After this change, users can see subquery code by `EXPLAIN CODEGEN`. ### How was this patch tested? New test. Closes #30859 from sarutak/explain-codegen-subqueries. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/debug/package.scala | 15 ++++++++++----- .../apache/spark/sql/BenchmarkQueryTest.scala | 14 ++++++++++---- .../org/apache/spark/sql/ExplainSuite.scala | 16 ++++++++++++++++ 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala index 6c40104e52a5f..3cbebca14f7dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala @@ -107,12 +107,17 @@ package object debug { */ def codegenStringSeq(plan: SparkPlan): Seq[(String, String, ByteCodeStats)] = { val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() - plan transform { - case s: WholeStageCodegenExec => - codegenSubtrees += s - s - case s => s + + def findSubtrees(plan: SparkPlan): Unit = { + plan foreach { + case s: WholeStageCodegenExec => + codegenSubtrees += s + case s => + s.subqueries.foreach(findSubtrees) + } } + + findSubtrees(plan) codegenSubtrees.toSeq.sortBy(_.codegenStageId).map { subtree => val (_, source) = subtree.doCodeGen() val codeStats = try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala index 2c3b37a1498ec..d58bf2c6260b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala @@ -63,11 +63,17 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession { protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = { val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() - plan foreach { - case s: WholeStageCodegenExec => - codegenSubtrees += s - case _ => + + def findSubtrees(plan: SparkPlan): Unit = { + plan foreach { + case s: WholeStageCodegenExec => + codegenSubtrees += s + case s => + s.subqueries.foreach(findSubtrees) + } } + + findSubtrees(plan) codegenSubtrees.toSeq.foreach { subtree => val code = subtree.doCodeGen()._2 val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 8b7459fddb59a..bf100c0205efa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -228,6 +228,22 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } } + test("SPARK-33853: explain codegen - check presence of subquery") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + withTempView("df") { + val df1 = spark.range(1, 100) + df1.createTempView("df") + + val sqlText = "EXPLAIN CODEGEN SELECT (SELECT min(id) FROM df)" + val expectedText = "Found 3 WholeStageCodegen subtrees." + + withNormalizedExplain(sqlText) { normalizedOutput => + assert(normalizedOutput.contains(expectedText)) + } + } + } + } + test("explain formatted - check presence of subquery in case of DPP") { withTable("df1", "df2") { withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", From cdd1752ad1bbb03b817870e1ad6b1d9cbda734a1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 21 Dec 2020 03:37:30 -0800 Subject: [PATCH 0831/1009] [SPARK-33862][SQL] Throw `PartitionAlreadyExistsException` if the target partition exists while renaming ### What changes were proposed in this pull request? Throw `PartitionAlreadyExistsException` from `ALTER TABLE .. RENAME TO PARTITION` for a table from Hive V1 External Catalog in the case when the target partition already exists. ### Why are the changes needed? 1. To have the same behavior of V1 In-Memory and Hive External Catalog. 2. To not propagate internal Hive's exceptions to users. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the partition renaming command throws `PartitionAlreadyExistsException` for tables from the Hive catalog. ### How was this patch tested? Added new UT: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *HiveCatalogedDDLSuite" ``` Closes #30866 from MaxGekk/throw-PartitionAlreadyExistsException. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 8 +++++++- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index f92a93d54b1cb..49184d0a2e0d0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.config import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, PartitionAlreadyExistsException, TempTableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER @@ -1635,6 +1635,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { sql("ALTER TABLE tab1 PARTITION (A='10', B='p') RENAME TO PARTITION (A='1', B='p')") assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(Map("a" -> "1", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) + + // target partition already exists + val errMsg = intercept[PartitionAlreadyExistsException] { + sql("ALTER TABLE tab1 PARTITION (a='1', b='p') RENAME TO PARTITION (a='20', b='c')") + }.getMessage + assert(errMsg.contains("Partition already exists")) } protected def testChangeColumn(isDatasourceTable: Boolean): Unit = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index e779a80f7c323..40bcdefbc351e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -49,7 +49,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.metrics.source.HiveCatalogMetrics import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionsAlreadyExistException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionAlreadyExistsException, PartitionsAlreadyExistException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Expression @@ -665,6 +665,9 @@ private[hive] class HiveClientImpl( val catalogTable = getTable(db, table) val hiveTable = toHiveTable(catalogTable, Some(userName)) specs.zip(newSpecs).foreach { case (oldSpec, newSpec) => + if (client.getPartition(hiveTable, newSpec.asJava, false) != null) { + throw new PartitionAlreadyExistsException(db, table, newSpec) + } val hivePart = getPartitionOption(catalogTable, oldSpec) .map { p => toHivePartition(p.copy(spec = newSpec), hiveTable) } .getOrElse { throw new NoSuchPartitionException(db, table, oldSpec) } From b4bea1aa8972cdfd8901757a0ed990a20fca620f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 21 Dec 2020 20:59:33 +0900 Subject: [PATCH 0832/1009] [SPARK-28863][SQL][FOLLOWUP] Make sure optimized plan will not be re-analyzed ### What changes were proposed in this pull request? It's a known issue that re-analyzing an optimized plan can lead to various issues. We made several attempts to avoid it from happening, but the current solution `AlreadyOptimized` is still not 100% safe, as people can inject catalyst rules to call analyzer directly. This PR proposes a simpler and safer idea: we set the `analyzed` flag to true after optimization, and analyzer will skip processing plans whose `analyzed` flag is true. ### Why are the changes needed? make the code simpler and safer ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests. Closes #30777 from cloud-fan/ds. Authored-by: Wenchen Fan Signed-off-by: HyukjinKwon --- .../sql/catalyst/analysis/Analyzer.scala | 1 + .../plans/logical/AnalysisHelper.scala | 7 +- .../sql/execution/AlreadyOptimized.scala | 37 -------- .../spark/sql/execution/QueryExecution.scala | 7 +- .../datasources/v2/V1FallbackWriters.scala | 7 +- .../sql/execution/AlreadyOptimizedSuite.scala | 85 ------------------- 6 files changed, 16 insertions(+), 128 deletions(-) delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1a5f33443d8e3..8d8e00b80c506 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -168,6 +168,7 @@ class Analyzer(override val catalogManager: CatalogManager) } def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = { + if (plan.analyzed) return plan AnalysisHelper.markInAnalyzer { val analyzed = executeAndTrack(plan, tracker) try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala index 2c6a716a2ed48..ffd1f784e4670 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala @@ -46,7 +46,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => * This should only be called by * [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]]. */ - private[catalyst] def setAnalyzed(): Unit = { + private[sql] def setAnalyzed(): Unit = { if (!_analyzed) { _analyzed = true children.foreach(_.setAnalyzed()) @@ -180,6 +180,11 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => super.transformAllExpressions(rule) } + override def clone(): LogicalPlan = { + val cloned = super.clone() + if (analyzed) cloned.setAnalyzed() + cloned + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala deleted file mode 100644 index e40b1141b43eb..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan - -/** Query execution that skips re-analysis and optimize. */ -class AlreadyOptimizedExecution( - session: SparkSession, - plan: LogicalPlan) extends QueryExecution(session, plan) { - override lazy val analyzed: LogicalPlan = plan - override lazy val optimizedPlan: LogicalPlan = plan -} - -object AlreadyOptimized { - def dataFrame(sparkSession: SparkSession, optimized: LogicalPlan): DataFrame = { - val qe = new AlreadyOptimizedExecution(sparkSession, optimized) - new Dataset[Row](qe, RowEncoder(qe.analyzed.schema)) - } -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 0531dd210e539..1d5a884d6e181 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -84,7 +84,12 @@ class QueryExecution( lazy val optimizedPlan: LogicalPlan = executePhase(QueryPlanningTracker.OPTIMIZATION) { // clone the plan to avoid sharing the plan instance between different stages like analyzing, // optimizing and planning. - sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker) + val plan = sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker) + // We do not want optimized plans to be re-analyzed as literals that have been constant folded + // and such can cause issues during analysis. While `clone` should maintain the `analyzed` state + // of the LogicalPlan, we set the plan as analyzed here as well out of paranoia. + plan.setAnalyzed() + plan } private def assertOptimized(): Unit = optimizedPlan diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala index 9d2cea9fbaff3..080e977121efb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala @@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.datasources.v2 import java.util.UUID import org.apache.spark.SparkException +import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.SupportsWrite import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder} -import org.apache.spark.sql.execution.{AlreadyOptimized, SparkPlan} +import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -118,9 +119,7 @@ trait SupportsV1Write extends SparkPlan { protected def writeWithV1( relation: InsertableRelation, refreshCache: () => Unit = () => ()): Seq[InternalRow] = { - val session = sqlContext.sparkSession - // The `plan` is already optimized, we should not analyze and optimize it again. - relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false) + relation.insert(Dataset.ofRows(sqlContext.sparkSession, plan), overwrite = false) refreshCache() Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala deleted file mode 100644 index c266aa92f01cc..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.execution.adaptive.EnableAdaptiveExecutionSuite -import org.apache.spark.sql.test.SharedSparkSession - -class AlreadyOptimizedSuite extends QueryTest with SharedSparkSession { - - import testImplicits._ - - test("simple execution") { - val df = spark.range(10) - val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan) - - checkAnswer(planned, df.toDF().collect()) - } - - test("planning on top works - projection") { - val df = spark.range(10) - val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan) - - checkAnswer( - planned.withColumn("data", 'id + 1), - df.withColumn("data", 'id + 1).collect()) - } - - test("planning on top works - filter") { - val df = spark.range(10) - val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan) - - checkAnswer(planned.where('id < 5), df.where('id < 5).toDF().collect()) - } - - test("planning on top works - aggregate") { - val df = spark.range(10) - val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan) - - checkAnswer(planned.groupBy('id).count(), df.groupBy('id).count().collect()) - } - - test("planning on top works - joins") { - val df = spark.range(10) - val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan) - - val plannedLeft = planned.alias("l") - val dfLeft = df.alias("l") - val plannedRight = planned.alias("r") - val dfRight = df.alias("r") - - checkAnswer( - plannedLeft.where('id < 3).join(plannedRight, Seq("id")), - dfLeft.where('id < 3).join(dfRight, Seq("id")).collect()) - - checkAnswer( - plannedLeft.where('id < 3).join(plannedRight, plannedLeft("id") === plannedRight("id")), - dfLeft.where('id < 3).join(dfRight, dfLeft("id") === dfRight("id")).collect()) - - checkAnswer( - plannedLeft.join(plannedRight, Seq("id")).where('id < 3), - dfLeft.join(dfRight, Seq("id")).where('id < 3).collect()) - - checkAnswer( - plannedLeft.join(plannedRight, plannedLeft("id") === plannedRight("id")).where($"l.id" < 3), - dfLeft.join(dfRight, dfLeft("id") === dfRight("id")).where($"l.id" < 3).collect()) - } -} - -class AlreadyOptimizedAQESuite extends AlreadyOptimizedSuite with EnableAdaptiveExecutionSuite From 4b19f49dd01168c006bc5d8a506a1ef3c36c721d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 21 Dec 2020 04:15:29 -0800 Subject: [PATCH 0833/1009] [SPARK-33845][SQL] Remove unnecessary if when trueValue and falseValue are foldable boolean types ### What changes were proposed in this pull request? Improve `SimplifyConditionals`. Simplify `If(cond, TrueLiteral, FalseLiteral)` to `cond`. Simplify `If(cond, FalseLiteral, TrueLiteral)` to `Not(cond)`. The use case is: ```sql create table t1 using parquet as select id from range(10); select if (id > 2, false, true) from t1; ``` Before this pr: ``` == Physical Plan == *(1) Project [if ((id#1L > 2)) false else true AS (IF((id > CAST(2 AS BIGINT)), false, true))#2] +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` After this pr: ``` == Physical Plan == *(1) Project [(id#1L <= 2) AS (IF((id > CAST(2 AS BIGINT)), false, true))#2] +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30849 from wangyum/SPARK-33798-2. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/expressions.scala | 2 ++ .../PushFoldableIntoBranchesSuite.scala | 7 ++--- ...ReplaceNullWithFalseInPredicateSuite.scala | 31 +++++++++++-------- .../optimizer/SimplifyConditionalSuite.scala | 16 ++++++++++ 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index e6730c9275a1e..ac2caaeb15357 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -475,6 +475,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { case If(TrueLiteral, trueValue, _) => trueValue case If(FalseLiteral, _, falseValue) => falseValue case If(Literal(null, _), _, falseValue) => falseValue + case If(cond, TrueLiteral, FalseLiteral) => cond + case If(cond, FalseLiteral, TrueLiteral) => Not(cond) case If(cond, trueValue, falseValue) if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala index 43360af46ffb3..de4f4be8ec333 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala @@ -53,7 +53,7 @@ class PushFoldableIntoBranchesSuite test("Push down EqualTo through If") { assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral) - assertEquivalent(EqualTo(ifExp, Literal(3)), If(a, FalseLiteral, TrueLiteral)) + assertEquivalent(EqualTo(ifExp, Literal(3)), Not(a)) // Push down at most one not foldable expressions. assertEquivalent( @@ -67,7 +67,7 @@ class PushFoldableIntoBranchesSuite val nonDeterministic = If(LessThan(Rand(1), Literal(0.5)), Literal(1), Literal(2)) assert(!nonDeterministic.deterministic) assertEquivalent(EqualTo(nonDeterministic, Literal(2)), - If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, TrueLiteral)) + GreaterThanOrEqual(Rand(1), Literal(0.5))) assertEquivalent(EqualTo(nonDeterministic, Literal(3)), If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, FalseLiteral)) @@ -102,8 +102,7 @@ class PushFoldableIntoBranchesSuite assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3))) assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)), If(a, Literal(2.0), Literal(3.0))) - assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), - If(a, FalseLiteral, TrueLiteral)) + assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), Not(a)) assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index 00433a5490574..5da71c31e1990 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable} +import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, LessThanOrEqual, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} @@ -236,12 +236,13 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { Literal(2) === nestedCaseWhen, TrueLiteral, FalseLiteral) - val branches = Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue) - val condition = CaseWhen(branches) - testFilter(originalCond = condition, expectedCond = condition) - testJoin(originalCond = condition, expectedCond = condition) - testDelete(originalCond = condition, expectedCond = condition) - testUpdate(originalCond = condition, expectedCond = condition) + val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue)) + val expectedCond = + CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> (Literal(2) === nestedCaseWhen))) + testFilter(originalCond = condition, expectedCond = expectedCond) + testJoin(originalCond = condition, expectedCond = expectedCond) + testDelete(originalCond = condition, expectedCond = expectedCond) + testUpdate(originalCond = condition, expectedCond = expectedCond) } test("inability to replace null in non-boolean branches of If inside another If") { @@ -252,10 +253,14 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { Literal(3)), TrueLiteral, FalseLiteral) - testFilter(originalCond = condition, expectedCond = condition) - testJoin(originalCond = condition, expectedCond = condition) - testDelete(originalCond = condition, expectedCond = condition) - testUpdate(originalCond = condition, expectedCond = condition) + val expectedCond = Literal(5) > If( + UnresolvedAttribute("i") === Literal(15), + Literal(null, IntegerType), + Literal(3)) + testFilter(originalCond = condition, expectedCond = expectedCond) + testJoin(originalCond = condition, expectedCond = expectedCond) + testDelete(originalCond = condition, expectedCond = expectedCond) + testUpdate(originalCond = condition, expectedCond = expectedCond) } test("replace null in If used as a join condition") { @@ -405,9 +410,9 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val lambda1 = LambdaFunction( function = If(cond, Literal(null, BooleanType), TrueLiteral), arguments = lambdaArgs) - // the optimized lambda body is: if(arg > 0, false, true) + // the optimized lambda body is: if(arg > 0, false, true) => arg <= 0 val lambda2 = LambdaFunction( - function = If(cond, FalseLiteral, TrueLiteral), + function = LessThanOrEqual(condArg, Literal(0)), arguments = lambdaArgs) testProjection( originalExpr = createExpr(argument, lambda1) as 'x, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index bac962ced4618..328fc107e1c1b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -199,4 +199,20 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P If(Factorial(5) > 100L, b, nullLiteral).eval(EmptyRow)) } } + + test("SPARK-33845: remove unnecessary if when the outputs are boolean type") { + assertEquivalent( + If(IsNotNull(UnresolvedAttribute("a")), TrueLiteral, FalseLiteral), + IsNotNull(UnresolvedAttribute("a"))) + assertEquivalent( + If(IsNotNull(UnresolvedAttribute("a")), FalseLiteral, TrueLiteral), + IsNull(UnresolvedAttribute("a"))) + + assertEquivalent( + If(GreaterThan(Rand(0), UnresolvedAttribute("a")), TrueLiteral, FalseLiteral), + GreaterThan(Rand(0), UnresolvedAttribute("a"))) + assertEquivalent( + If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral), + LessThanOrEqual(Rand(0), UnresolvedAttribute("a"))) + } } From 69aa727ff495f6698fe9b37e952dfaf36f1dd5eb Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 21 Dec 2020 04:24:04 -0800 Subject: [PATCH 0834/1009] [SPARK-33124][SQL] Fills missing group tags and re-categorizes all the group tags for built-in functions ### What changes were proposed in this pull request? This PR proposes to fill missing group tags and re-categorize all the group tags for built-in functions. New groups below are added in this PR: - binary_funcs - bitwise_funcs - collection_funcs - predicate_funcs - conditional_funcs - conversion_funcs - csv_funcs - generator_funcs - hash_funcs - lambda_funcs - math_funcs - misc_funcs - string_funcs - struct_funcs - xml_funcs A basic policy to re-categorize functions is that functions in the same file are categorized into the same group. For example, all the functions in `hash.scala` are categorized into `hash_funcs`. But, there are some exceptional/ambiguous cases when categorizing them. Here are some special notes: - All the aggregate functions are categorized into `agg_funcs`. - `array_funcs` and `map_funcs` are sub-groups of `collection_funcs`. For example, `array_contains` is used only for arrays, so it is assigned to `array_funcs`. On the other hand, `reverse` is used for both arrays and strings, so it is assigned to `collection_funcs`. - Some functions logically belong to multiple groups. In this case, these functions are categorized based on the file that they belong to. For example, `schema_of_csv` can be grouped into both `csv_funcs` and `struct_funcs` in terms of input types, but it is assigned to `csv_funcs` because it belongs to the `csvExpressions.scala` file that holds the other CSV-related functions. - Functions in `nullExpressions.scala`, `complexTypeCreator.scala`, `randomExpressions.scala`, and `regexExpressions.scala` are categorized based on their functionalities. For example: - `isnull` in `nullExpressions` is assigned to `predicate_funcs` because this is a predicate function. - `array` in `complexTypeCreator.scala` is assigned to `array_funcs`based on its output type (The other functions in `array_funcs` are categorized based on their input types though). A category list (after this PR) is as follows (the list below includes the exprs that already have a group tag in the current master): |group|name|class| |-----|----|-----| |agg_funcs|any|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr| |agg_funcs|approx_count_distinct|org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus| |agg_funcs|approx_percentile|org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile| |agg_funcs|avg|org.apache.spark.sql.catalyst.expressions.aggregate.Average| |agg_funcs|bit_and|org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg| |agg_funcs|bit_or|org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg| |agg_funcs|bit_xor|org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg| |agg_funcs|bool_and|org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd| |agg_funcs|bool_or|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr| |agg_funcs|collect_list|org.apache.spark.sql.catalyst.expressions.aggregate.CollectList| |agg_funcs|collect_set|org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet| |agg_funcs|corr|org.apache.spark.sql.catalyst.expressions.aggregate.Corr| |agg_funcs|count_if|org.apache.spark.sql.catalyst.expressions.aggregate.CountIf| |agg_funcs|count_min_sketch|org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg| |agg_funcs|count|org.apache.spark.sql.catalyst.expressions.aggregate.Count| |agg_funcs|covar_pop|org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation| |agg_funcs|covar_samp|org.apache.spark.sql.catalyst.expressions.aggregate.CovSample| |agg_funcs|cube|org.apache.spark.sql.catalyst.expressions.Cube| |agg_funcs|every|org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd| |agg_funcs|first_value|org.apache.spark.sql.catalyst.expressions.aggregate.First| |agg_funcs|first|org.apache.spark.sql.catalyst.expressions.aggregate.First| |agg_funcs|grouping_id|org.apache.spark.sql.catalyst.expressions.GroupingID| |agg_funcs|grouping|org.apache.spark.sql.catalyst.expressions.Grouping| |agg_funcs|kurtosis|org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis| |agg_funcs|last_value|org.apache.spark.sql.catalyst.expressions.aggregate.Last| |agg_funcs|last|org.apache.spark.sql.catalyst.expressions.aggregate.Last| |agg_funcs|max_by|org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy| |agg_funcs|max|org.apache.spark.sql.catalyst.expressions.aggregate.Max| |agg_funcs|mean|org.apache.spark.sql.catalyst.expressions.aggregate.Average| |agg_funcs|min_by|org.apache.spark.sql.catalyst.expressions.aggregate.MinBy| |agg_funcs|min|org.apache.spark.sql.catalyst.expressions.aggregate.Min| |agg_funcs|percentile_approx|org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile| |agg_funcs|percentile|org.apache.spark.sql.catalyst.expressions.aggregate.Percentile| |agg_funcs|rollup|org.apache.spark.sql.catalyst.expressions.Rollup| |agg_funcs|skewness|org.apache.spark.sql.catalyst.expressions.aggregate.Skewness| |agg_funcs|some|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr| |agg_funcs|stddev_pop|org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop| |agg_funcs|stddev_samp|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp| |agg_funcs|stddev|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp| |agg_funcs|std|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp| |agg_funcs|sum|org.apache.spark.sql.catalyst.expressions.aggregate.Sum| |agg_funcs|var_pop|org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop| |agg_funcs|var_samp|org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp| |agg_funcs|variance|org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp| |array_funcs|array_contains|org.apache.spark.sql.catalyst.expressions.ArrayContains| |array_funcs|array_distinct|org.apache.spark.sql.catalyst.expressions.ArrayDistinct| |array_funcs|array_except|org.apache.spark.sql.catalyst.expressions.ArrayExcept| |array_funcs|array_intersect|org.apache.spark.sql.catalyst.expressions.ArrayIntersect| |array_funcs|array_join|org.apache.spark.sql.catalyst.expressions.ArrayJoin| |array_funcs|array_max|org.apache.spark.sql.catalyst.expressions.ArrayMax| |array_funcs|array_min|org.apache.spark.sql.catalyst.expressions.ArrayMin| |array_funcs|array_position|org.apache.spark.sql.catalyst.expressions.ArrayPosition| |array_funcs|array_remove|org.apache.spark.sql.catalyst.expressions.ArrayRemove| |array_funcs|array_repeat|org.apache.spark.sql.catalyst.expressions.ArrayRepeat| |array_funcs|array_union|org.apache.spark.sql.catalyst.expressions.ArrayUnion| |array_funcs|arrays_overlap|org.apache.spark.sql.catalyst.expressions.ArraysOverlap| |array_funcs|arrays_zip|org.apache.spark.sql.catalyst.expressions.ArraysZip| |array_funcs|array|org.apache.spark.sql.catalyst.expressions.CreateArray| |array_funcs|flatten|org.apache.spark.sql.catalyst.expressions.Flatten| |array_funcs|sequence|org.apache.spark.sql.catalyst.expressions.Sequence| |array_funcs|shuffle|org.apache.spark.sql.catalyst.expressions.Shuffle| |array_funcs|slice|org.apache.spark.sql.catalyst.expressions.Slice| |array_funcs|sort_array|org.apache.spark.sql.catalyst.expressions.SortArray| |bitwise_funcs|&|org.apache.spark.sql.catalyst.expressions.BitwiseAnd| |bitwise_funcs|^|org.apache.spark.sql.catalyst.expressions.BitwiseXor| |bitwise_funcs|bit_count|org.apache.spark.sql.catalyst.expressions.BitwiseCount| |bitwise_funcs|shiftrightunsigned|org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned| |bitwise_funcs|shiftright|org.apache.spark.sql.catalyst.expressions.ShiftRight| |bitwise_funcs|~|org.apache.spark.sql.catalyst.expressions.BitwiseNot| |collection_funcs|cardinality|org.apache.spark.sql.catalyst.expressions.Size| |collection_funcs|concat|org.apache.spark.sql.catalyst.expressions.Concat| |collection_funcs|reverse|org.apache.spark.sql.catalyst.expressions.Reverse| |collection_funcs|size|org.apache.spark.sql.catalyst.expressions.Size| |conditional_funcs|coalesce|org.apache.spark.sql.catalyst.expressions.Coalesce| |conditional_funcs|ifnull|org.apache.spark.sql.catalyst.expressions.IfNull| |conditional_funcs|if|org.apache.spark.sql.catalyst.expressions.If| |conditional_funcs|nanvl|org.apache.spark.sql.catalyst.expressions.NaNvl| |conditional_funcs|nullif|org.apache.spark.sql.catalyst.expressions.NullIf| |conditional_funcs|nvl2|org.apache.spark.sql.catalyst.expressions.Nvl2| |conditional_funcs|nvl|org.apache.spark.sql.catalyst.expressions.Nvl| |conditional_funcs|when|org.apache.spark.sql.catalyst.expressions.CaseWhen| |conversion_funcs|bigint|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|binary|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|boolean|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|cast|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|date|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|decimal|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|double|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|float|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|int|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|smallint|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|string|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|timestamp|org.apache.spark.sql.catalyst.expressions.Cast| |conversion_funcs|tinyint|org.apache.spark.sql.catalyst.expressions.Cast| |csv_funcs|from_csv|org.apache.spark.sql.catalyst.expressions.CsvToStructs| |csv_funcs|schema_of_csv|org.apache.spark.sql.catalyst.expressions.SchemaOfCsv| |csv_funcs|to_csv|org.apache.spark.sql.catalyst.expressions.StructsToCsv| |datetime_funcs|add_months|org.apache.spark.sql.catalyst.expressions.AddMonths| |datetime_funcs|current_date|org.apache.spark.sql.catalyst.expressions.CurrentDate| |datetime_funcs|current_timestamp|org.apache.spark.sql.catalyst.expressions.CurrentTimestamp| |datetime_funcs|current_timezone|org.apache.spark.sql.catalyst.expressions.CurrentTimeZone| |datetime_funcs|date_add|org.apache.spark.sql.catalyst.expressions.DateAdd| |datetime_funcs|date_format|org.apache.spark.sql.catalyst.expressions.DateFormatClass| |datetime_funcs|date_from_unix_date|org.apache.spark.sql.catalyst.expressions.DateFromUnixDate| |datetime_funcs|date_part|org.apache.spark.sql.catalyst.expressions.DatePart| |datetime_funcs|date_sub|org.apache.spark.sql.catalyst.expressions.DateSub| |datetime_funcs|date_trunc|org.apache.spark.sql.catalyst.expressions.TruncTimestamp| |datetime_funcs|datediff|org.apache.spark.sql.catalyst.expressions.DateDiff| |datetime_funcs|dayofmonth|org.apache.spark.sql.catalyst.expressions.DayOfMonth| |datetime_funcs|dayofweek|org.apache.spark.sql.catalyst.expressions.DayOfWeek| |datetime_funcs|dayofyear|org.apache.spark.sql.catalyst.expressions.DayOfYear| |datetime_funcs|day|org.apache.spark.sql.catalyst.expressions.DayOfMonth| |datetime_funcs|extract|org.apache.spark.sql.catalyst.expressions.Extract| |datetime_funcs|from_unixtime|org.apache.spark.sql.catalyst.expressions.FromUnixTime| |datetime_funcs|from_utc_timestamp|org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp| |datetime_funcs|hour|org.apache.spark.sql.catalyst.expressions.Hour| |datetime_funcs|last_day|org.apache.spark.sql.catalyst.expressions.LastDay| |datetime_funcs|make_date|org.apache.spark.sql.catalyst.expressions.MakeDate| |datetime_funcs|make_interval|org.apache.spark.sql.catalyst.expressions.MakeInterval| |datetime_funcs|make_timestamp|org.apache.spark.sql.catalyst.expressions.MakeTimestamp| |datetime_funcs|minute|org.apache.spark.sql.catalyst.expressions.Minute| |datetime_funcs|months_between|org.apache.spark.sql.catalyst.expressions.MonthsBetween| |datetime_funcs|month|org.apache.spark.sql.catalyst.expressions.Month| |datetime_funcs|next_day|org.apache.spark.sql.catalyst.expressions.NextDay| |datetime_funcs|now|org.apache.spark.sql.catalyst.expressions.Now| |datetime_funcs|quarter|org.apache.spark.sql.catalyst.expressions.Quarter| |datetime_funcs|second|org.apache.spark.sql.catalyst.expressions.Second| |datetime_funcs|timestamp_micros|org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp| |datetime_funcs|timestamp_millis|org.apache.spark.sql.catalyst.expressions.MillisToTimestamp| |datetime_funcs|timestamp_seconds|org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp| |datetime_funcs|to_date|org.apache.spark.sql.catalyst.expressions.ParseToDate| |datetime_funcs|to_timestamp|org.apache.spark.sql.catalyst.expressions.ParseToTimestamp| |datetime_funcs|to_unix_timestamp|org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp| |datetime_funcs|to_utc_timestamp|org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp| |datetime_funcs|trunc|org.apache.spark.sql.catalyst.expressions.TruncDate| |datetime_funcs|unix_date|org.apache.spark.sql.catalyst.expressions.UnixDate| |datetime_funcs|unix_micros|org.apache.spark.sql.catalyst.expressions.UnixMicros| |datetime_funcs|unix_millis|org.apache.spark.sql.catalyst.expressions.UnixMillis| |datetime_funcs|unix_seconds|org.apache.spark.sql.catalyst.expressions.UnixSeconds| |datetime_funcs|unix_timestamp|org.apache.spark.sql.catalyst.expressions.UnixTimestamp| |datetime_funcs|weekday|org.apache.spark.sql.catalyst.expressions.WeekDay| |datetime_funcs|weekofyear|org.apache.spark.sql.catalyst.expressions.WeekOfYear| |datetime_funcs|year|org.apache.spark.sql.catalyst.expressions.Year| |generator_funcs|explode_outer|org.apache.spark.sql.catalyst.expressions.Explode| |generator_funcs|explode|org.apache.spark.sql.catalyst.expressions.Explode| |generator_funcs|inline_outer|org.apache.spark.sql.catalyst.expressions.Inline| |generator_funcs|inline|org.apache.spark.sql.catalyst.expressions.Inline| |generator_funcs|posexplode_outer|org.apache.spark.sql.catalyst.expressions.PosExplode| |generator_funcs|posexplode|org.apache.spark.sql.catalyst.expressions.PosExplode| |generator_funcs|stack|org.apache.spark.sql.catalyst.expressions.Stack| |hash_funcs|crc32|org.apache.spark.sql.catalyst.expressions.Crc32| |hash_funcs|hash|org.apache.spark.sql.catalyst.expressions.Murmur3Hash| |hash_funcs|md5|org.apache.spark.sql.catalyst.expressions.Md5| |hash_funcs|sha1|org.apache.spark.sql.catalyst.expressions.Sha1| |hash_funcs|sha2|org.apache.spark.sql.catalyst.expressions.Sha2| |hash_funcs|sha|org.apache.spark.sql.catalyst.expressions.Sha1| |hash_funcs|xxhash64|org.apache.spark.sql.catalyst.expressions.XxHash64| |json_funcs|from_json|org.apache.spark.sql.catalyst.expressions.JsonToStructs| |json_funcs|get_json_object|org.apache.spark.sql.catalyst.expressions.GetJsonObject| |json_funcs|json_array_length|org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray| |json_funcs|json_object_keys|org.apache.spark.sql.catalyst.expressions.JsonObjectKeys| |json_funcs|json_tuple|org.apache.spark.sql.catalyst.expressions.JsonTuple| |json_funcs|schema_of_json|org.apache.spark.sql.catalyst.expressions.SchemaOfJson| |json_funcs|to_json|org.apache.spark.sql.catalyst.expressions.StructsToJson| |lambda_funcs|aggregate|org.apache.spark.sql.catalyst.expressions.ArrayAggregate| |lambda_funcs|array_sort|org.apache.spark.sql.catalyst.expressions.ArraySort| |lambda_funcs|exists|org.apache.spark.sql.catalyst.expressions.ArrayExists| |lambda_funcs|filter|org.apache.spark.sql.catalyst.expressions.ArrayFilter| |lambda_funcs|forall|org.apache.spark.sql.catalyst.expressions.ArrayForAll| |lambda_funcs|map_filter|org.apache.spark.sql.catalyst.expressions.MapFilter| |lambda_funcs|map_zip_with|org.apache.spark.sql.catalyst.expressions.MapZipWith| |lambda_funcs|transform_keys|org.apache.spark.sql.catalyst.expressions.TransformKeys| |lambda_funcs|transform_values|org.apache.spark.sql.catalyst.expressions.TransformValues| |lambda_funcs|transform|org.apache.spark.sql.catalyst.expressions.ArrayTransform| |lambda_funcs|zip_with|org.apache.spark.sql.catalyst.expressions.ZipWith| |map_funcs|element_at|org.apache.spark.sql.catalyst.expressions.ElementAt| |map_funcs|map_concat|org.apache.spark.sql.catalyst.expressions.MapConcat| |map_funcs|map_entries|org.apache.spark.sql.catalyst.expressions.MapEntries| |map_funcs|map_from_arrays|org.apache.spark.sql.catalyst.expressions.MapFromArrays| |map_funcs|map_from_entries|org.apache.spark.sql.catalyst.expressions.MapFromEntries| |map_funcs|map_keys|org.apache.spark.sql.catalyst.expressions.MapKeys| |map_funcs|map_values|org.apache.spark.sql.catalyst.expressions.MapValues| |map_funcs|map|org.apache.spark.sql.catalyst.expressions.CreateMap| |map_funcs|str_to_map|org.apache.spark.sql.catalyst.expressions.StringToMap| |math_funcs|%|org.apache.spark.sql.catalyst.expressions.Remainder| |math_funcs|*|org.apache.spark.sql.catalyst.expressions.Multiply| |math_funcs|+|org.apache.spark.sql.catalyst.expressions.Add| |math_funcs|-|org.apache.spark.sql.catalyst.expressions.Subtract| |math_funcs|/|org.apache.spark.sql.catalyst.expressions.Divide| |math_funcs|abs|org.apache.spark.sql.catalyst.expressions.Abs| |math_funcs|acosh|org.apache.spark.sql.catalyst.expressions.Acosh| |math_funcs|acos|org.apache.spark.sql.catalyst.expressions.Acos| |math_funcs|asinh|org.apache.spark.sql.catalyst.expressions.Asinh| |math_funcs|asin|org.apache.spark.sql.catalyst.expressions.Asin| |math_funcs|atan2|org.apache.spark.sql.catalyst.expressions.Atan2| |math_funcs|atanh|org.apache.spark.sql.catalyst.expressions.Atanh| |math_funcs|atan|org.apache.spark.sql.catalyst.expressions.Atan| |math_funcs|bin|org.apache.spark.sql.catalyst.expressions.Bin| |math_funcs|bround|org.apache.spark.sql.catalyst.expressions.BRound| |math_funcs|cbrt|org.apache.spark.sql.catalyst.expressions.Cbrt| |math_funcs|ceiling|org.apache.spark.sql.catalyst.expressions.Ceil| |math_funcs|ceil|org.apache.spark.sql.catalyst.expressions.Ceil| |math_funcs|conv|org.apache.spark.sql.catalyst.expressions.Conv| |math_funcs|cosh|org.apache.spark.sql.catalyst.expressions.Cosh| |math_funcs|cos|org.apache.spark.sql.catalyst.expressions.Cos| |math_funcs|cot|org.apache.spark.sql.catalyst.expressions.Cot| |math_funcs|degrees|org.apache.spark.sql.catalyst.expressions.ToDegrees| |math_funcs|div|org.apache.spark.sql.catalyst.expressions.IntegralDivide| |math_funcs|expm1|org.apache.spark.sql.catalyst.expressions.Expm1| |math_funcs|exp|org.apache.spark.sql.catalyst.expressions.Exp| |math_funcs|e|org.apache.spark.sql.catalyst.expressions.EulerNumber| |math_funcs|factorial|org.apache.spark.sql.catalyst.expressions.Factorial| |math_funcs|floor|org.apache.spark.sql.catalyst.expressions.Floor| |math_funcs|greatest|org.apache.spark.sql.catalyst.expressions.Greatest| |math_funcs|hex|org.apache.spark.sql.catalyst.expressions.Hex| |math_funcs|hypot|org.apache.spark.sql.catalyst.expressions.Hypot| |math_funcs|least|org.apache.spark.sql.catalyst.expressions.Least| |math_funcs|ln|org.apache.spark.sql.catalyst.expressions.Log| |math_funcs|log10|org.apache.spark.sql.catalyst.expressions.Log10| |math_funcs|log1p|org.apache.spark.sql.catalyst.expressions.Log1p| |math_funcs|log2|org.apache.spark.sql.catalyst.expressions.Log2| |math_funcs|log|org.apache.spark.sql.catalyst.expressions.Logarithm| |math_funcs|mod|org.apache.spark.sql.catalyst.expressions.Remainder| |math_funcs|negative|org.apache.spark.sql.catalyst.expressions.UnaryMinus| |math_funcs|pi|org.apache.spark.sql.catalyst.expressions.Pi| |math_funcs|pmod|org.apache.spark.sql.catalyst.expressions.Pmod| |math_funcs|positive|org.apache.spark.sql.catalyst.expressions.UnaryPositive| |math_funcs|power|org.apache.spark.sql.catalyst.expressions.Pow| |math_funcs|pow|org.apache.spark.sql.catalyst.expressions.Pow| |math_funcs|radians|org.apache.spark.sql.catalyst.expressions.ToRadians| |math_funcs|randn|org.apache.spark.sql.catalyst.expressions.Randn| |math_funcs|random|org.apache.spark.sql.catalyst.expressions.Rand| |math_funcs|rand|org.apache.spark.sql.catalyst.expressions.Rand| |math_funcs|rint|org.apache.spark.sql.catalyst.expressions.Rint| |math_funcs|round|org.apache.spark.sql.catalyst.expressions.Round| |math_funcs|shiftleft|org.apache.spark.sql.catalyst.expressions.ShiftLeft| |math_funcs|signum|org.apache.spark.sql.catalyst.expressions.Signum| |math_funcs|sign|org.apache.spark.sql.catalyst.expressions.Signum| |math_funcs|sinh|org.apache.spark.sql.catalyst.expressions.Sinh| |math_funcs|sin|org.apache.spark.sql.catalyst.expressions.Sin| |math_funcs|sqrt|org.apache.spark.sql.catalyst.expressions.Sqrt| |math_funcs|tanh|org.apache.spark.sql.catalyst.expressions.Tanh| |math_funcs|tan|org.apache.spark.sql.catalyst.expressions.Tan| |math_funcs|unhex|org.apache.spark.sql.catalyst.expressions.Unhex| |math_funcs|width_bucket|org.apache.spark.sql.catalyst.expressions.WidthBucket| |misc_funcs|assert_true|org.apache.spark.sql.catalyst.expressions.AssertTrue| |misc_funcs|current_catalog|org.apache.spark.sql.catalyst.expressions.CurrentCatalog| |misc_funcs|current_database|org.apache.spark.sql.catalyst.expressions.CurrentDatabase| |misc_funcs|input_file_block_length|org.apache.spark.sql.catalyst.expressions.InputFileBlockLength| |misc_funcs|input_file_block_start|org.apache.spark.sql.catalyst.expressions.InputFileBlockStart| |misc_funcs|input_file_name|org.apache.spark.sql.catalyst.expressions.InputFileName| |misc_funcs|java_method|org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection| |misc_funcs|monotonically_increasing_id|org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID| |misc_funcs|raise_error|org.apache.spark.sql.catalyst.expressions.RaiseError| |misc_funcs|reflect|org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection| |misc_funcs|spark_partition_id|org.apache.spark.sql.catalyst.expressions.SparkPartitionID| |misc_funcs|typeof|org.apache.spark.sql.catalyst.expressions.TypeOf| |misc_funcs|uuid|org.apache.spark.sql.catalyst.expressions.Uuid| |misc_funcs|version|org.apache.spark.sql.catalyst.expressions.SparkVersion| |predicate_funcs|!|org.apache.spark.sql.catalyst.expressions.Not| |predicate_funcs|<=>|org.apache.spark.sql.catalyst.expressions.EqualNullSafe| |predicate_funcs|<=|org.apache.spark.sql.catalyst.expressions.LessThanOrEqual| |predicate_funcs|<|org.apache.spark.sql.catalyst.expressions.LessThan| |predicate_funcs|==|org.apache.spark.sql.catalyst.expressions.EqualTo| |predicate_funcs|=|org.apache.spark.sql.catalyst.expressions.EqualTo| |predicate_funcs|>=|org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual| |predicate_funcs|>|org.apache.spark.sql.catalyst.expressions.GreaterThan| |predicate_funcs|and|org.apache.spark.sql.catalyst.expressions.And| |predicate_funcs|in|org.apache.spark.sql.catalyst.expressions.In| |predicate_funcs|isnan|org.apache.spark.sql.catalyst.expressions.IsNaN| |predicate_funcs|isnotnull|org.apache.spark.sql.catalyst.expressions.IsNotNull| |predicate_funcs|isnull|org.apache.spark.sql.catalyst.expressions.IsNull| |predicate_funcs|like|org.apache.spark.sql.catalyst.expressions.Like| |predicate_funcs|not|org.apache.spark.sql.catalyst.expressions.Not| |predicate_funcs|or|org.apache.spark.sql.catalyst.expressions.Or| |predicate_funcs|regexp_like|org.apache.spark.sql.catalyst.expressions.RLike| |predicate_funcs|rlike|org.apache.spark.sql.catalyst.expressions.RLike| |string_funcs|ascii|org.apache.spark.sql.catalyst.expressions.Ascii| |string_funcs|base64|org.apache.spark.sql.catalyst.expressions.Base64| |string_funcs|bit_length|org.apache.spark.sql.catalyst.expressions.BitLength| |string_funcs|char_length|org.apache.spark.sql.catalyst.expressions.Length| |string_funcs|character_length|org.apache.spark.sql.catalyst.expressions.Length| |string_funcs|char|org.apache.spark.sql.catalyst.expressions.Chr| |string_funcs|chr|org.apache.spark.sql.catalyst.expressions.Chr| |string_funcs|concat_ws|org.apache.spark.sql.catalyst.expressions.ConcatWs| |string_funcs|decode|org.apache.spark.sql.catalyst.expressions.Decode| |string_funcs|elt|org.apache.spark.sql.catalyst.expressions.Elt| |string_funcs|encode|org.apache.spark.sql.catalyst.expressions.Encode| |string_funcs|find_in_set|org.apache.spark.sql.catalyst.expressions.FindInSet| |string_funcs|format_number|org.apache.spark.sql.catalyst.expressions.FormatNumber| |string_funcs|format_string|org.apache.spark.sql.catalyst.expressions.FormatString| |string_funcs|initcap|org.apache.spark.sql.catalyst.expressions.InitCap| |string_funcs|instr|org.apache.spark.sql.catalyst.expressions.StringInstr| |string_funcs|lcase|org.apache.spark.sql.catalyst.expressions.Lower| |string_funcs|left|org.apache.spark.sql.catalyst.expressions.Left| |string_funcs|length|org.apache.spark.sql.catalyst.expressions.Length| |string_funcs|levenshtein|org.apache.spark.sql.catalyst.expressions.Levenshtein| |string_funcs|locate|org.apache.spark.sql.catalyst.expressions.StringLocate| |string_funcs|lower|org.apache.spark.sql.catalyst.expressions.Lower| |string_funcs|lpad|org.apache.spark.sql.catalyst.expressions.StringLPad| |string_funcs|ltrim|org.apache.spark.sql.catalyst.expressions.StringTrimLeft| |string_funcs|octet_length|org.apache.spark.sql.catalyst.expressions.OctetLength| |string_funcs|overlay|org.apache.spark.sql.catalyst.expressions.Overlay| |string_funcs|parse_url|org.apache.spark.sql.catalyst.expressions.ParseUrl| |string_funcs|position|org.apache.spark.sql.catalyst.expressions.StringLocate| |string_funcs|printf|org.apache.spark.sql.catalyst.expressions.FormatString| |string_funcs|regexp_extract_all|org.apache.spark.sql.catalyst.expressions.RegExpExtractAll| |string_funcs|regexp_extract|org.apache.spark.sql.catalyst.expressions.RegExpExtract| |string_funcs|regexp_replace|org.apache.spark.sql.catalyst.expressions.RegExpReplace| |string_funcs|repeat|org.apache.spark.sql.catalyst.expressions.StringRepeat| |string_funcs|replace|org.apache.spark.sql.catalyst.expressions.StringReplace| |string_funcs|right|org.apache.spark.sql.catalyst.expressions.Right| |string_funcs|rpad|org.apache.spark.sql.catalyst.expressions.StringRPad| |string_funcs|rtrim|org.apache.spark.sql.catalyst.expressions.StringTrimRight| |string_funcs|sentences|org.apache.spark.sql.catalyst.expressions.Sentences| |string_funcs|soundex|org.apache.spark.sql.catalyst.expressions.SoundEx| |string_funcs|space|org.apache.spark.sql.catalyst.expressions.StringSpace| |string_funcs|split|org.apache.spark.sql.catalyst.expressions.StringSplit| |string_funcs|substring_index|org.apache.spark.sql.catalyst.expressions.SubstringIndex| |string_funcs|substring|org.apache.spark.sql.catalyst.expressions.Substring| |string_funcs|substr|org.apache.spark.sql.catalyst.expressions.Substring| |string_funcs|translate|org.apache.spark.sql.catalyst.expressions.StringTranslate| |string_funcs|trim|org.apache.spark.sql.catalyst.expressions.StringTrim| |string_funcs|ucase|org.apache.spark.sql.catalyst.expressions.Upper| |string_funcs|unbase64|org.apache.spark.sql.catalyst.expressions.UnBase64| |string_funcs|upper|org.apache.spark.sql.catalyst.expressions.Upper| |struct_funcs|named_struct|org.apache.spark.sql.catalyst.expressions.CreateNamedStruct| |struct_funcs|struct|org.apache.spark.sql.catalyst.expressions.CreateNamedStruct| |window_funcs|cume_dist|org.apache.spark.sql.catalyst.expressions.CumeDist| |window_funcs|dense_rank|org.apache.spark.sql.catalyst.expressions.DenseRank| |window_funcs|lag|org.apache.spark.sql.catalyst.expressions.Lag| |window_funcs|lead|org.apache.spark.sql.catalyst.expressions.Lead| |window_funcs|nth_value|org.apache.spark.sql.catalyst.expressions.NthValue| |window_funcs|ntile|org.apache.spark.sql.catalyst.expressions.NTile| |window_funcs|percent_rank|org.apache.spark.sql.catalyst.expressions.PercentRank| |window_funcs|rank|org.apache.spark.sql.catalyst.expressions.Rank| |window_funcs|row_number|org.apache.spark.sql.catalyst.expressions.RowNumber| |xml_funcs|xpath_boolean|org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean| |xml_funcs|xpath_double|org.apache.spark.sql.catalyst.expressions.xml.XPathDouble| |xml_funcs|xpath_float|org.apache.spark.sql.catalyst.expressions.xml.XPathFloat| |xml_funcs|xpath_int|org.apache.spark.sql.catalyst.expressions.xml.XPathInt| |xml_funcs|xpath_long|org.apache.spark.sql.catalyst.expressions.xml.XPathLong| |xml_funcs|xpath_number|org.apache.spark.sql.catalyst.expressions.xml.XPathDouble| |xml_funcs|xpath_short|org.apache.spark.sql.catalyst.expressions.xml.XPathShort| |xml_funcs|xpath_string|org.apache.spark.sql.catalyst.expressions.xml.XPathString| |xml_funcs|xpath|org.apache.spark.sql.catalyst.expressions.xml.XPathList| Closes #30040 NOTE: An original author of this PR is tanelk, so the credit should be given to tanelk. ### Why are the changes needed? For better documents. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add a test to check if exprs have a group tag in `ExpressionInfoSuite`. Closes #30867 from maropu/pr30040. Lead-authored-by: Takeshi Yamamuro Co-authored-by: tanel.kiis@gmail.com Signed-off-by: Dongjoon Hyun --- .../catalyst/expressions/ExpressionInfo.java | 7 +- .../catalyst/analysis/FunctionRegistry.scala | 3 +- .../expressions/CallMethodViaReflection.scala | 3 +- .../spark/sql/catalyst/expressions/Cast.scala | 3 +- .../MonotonicallyIncreasingID.scala | 3 +- .../expressions/SparkPartitionID.scala | 3 +- .../sql/catalyst/expressions/arithmetic.scala | 36 +++-- .../expressions/bitwiseExpressions.scala | 15 +- .../expressions/collectionOperations.scala | 10 +- .../expressions/complexTypeCreator.scala | 17 ++- .../expressions/conditionalExpressions.scala | 6 +- .../catalyst/expressions/csvExpressions.scala | 9 +- .../expressions/datetimeExpressions.scala | 2 + .../sql/catalyst/expressions/generators.scala | 12 +- .../sql/catalyst/expressions/grouping.scala | 12 +- .../spark/sql/catalyst/expressions/hash.scala | 21 ++- .../expressions/higherOrderFunctions.scala | 31 ++-- .../catalyst/expressions/inputFileBlock.scala | 9 +- .../expressions/intervalExpressions.scala | 3 +- .../expressions/mathExpressions.scala | 132 ++++++++++++------ .../spark/sql/catalyst/expressions/misc.scala | 21 ++- .../expressions/nullExpressions.scala | 27 ++-- .../sql/catalyst/expressions/predicates.scala | 30 ++-- .../expressions/randomExpressions.scala | 6 +- .../expressions/regexpExpressions.scala | 18 ++- .../expressions/stringExpressions.scala | 114 ++++++++++----- .../sql/catalyst/expressions/xml/xpath.scala | 24 ++-- .../sql/expressions/ExpressionInfoSuite.scala | 7 +- 28 files changed, 391 insertions(+), 193 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java index a500822b21f02..0975f831bbbe2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java @@ -40,8 +40,11 @@ public class ExpressionInfo { private String deprecated; private static final Set validGroups = - new HashSet<>(Arrays.asList("agg_funcs", "array_funcs", "datetime_funcs", - "json_funcs", "map_funcs", "window_funcs")); + new HashSet<>(Arrays.asList("agg_funcs", "array_funcs", "binary_funcs", "bitwise_funcs", + "collection_funcs", "predicate_funcs", "conditional_funcs", "conversion_funcs", + "csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs", + "lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs", + "window_funcs", "xml_funcs")); public String getClassName() { return className; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 1a1b619336d54..912357b47934d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -660,7 +660,8 @@ object FunctionRegistry { val clazz = scala.reflect.classTag[Cast].runtimeClass val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`." val expressionInfo = - new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "2.0.1", "") + new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", + "conversion_funcs", "2.0.1", "") (name, (expressionInfo, builder)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala index 0979a18ac97bb..0de17d420f0c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala @@ -52,7 +52,8 @@ import org.apache.spark.util.Utils > SELECT _FUNC_('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2'); a5cf6c42-0c85-418f-af6c-3e4e5b1328f2 """, - since = "2.0.0") + since = "2.0.0", + group = "misc_funcs") case class CallMethodViaReflection(children: Seq[Expression]) extends Nondeterministic with CodegenFallback { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e1ece732cf15d..d19a51b339020 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1755,7 +1755,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit > SELECT _FUNC_('10' as int); 10 """, - since = "1.0.0") + since = "1.0.0", + group = "conversion_funcs") case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None) extends CastBase { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala index 8b04c1aa513f9..f228b36ecd472 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala @@ -46,7 +46,8 @@ import org.apache.spark.sql.types.{DataType, LongType} > SELECT _FUNC_(); 0 """, - since = "1.4.0") + since = "1.4.0", + group = "misc_funcs") case class MonotonicallyIncreasingID() extends LeafExpression with Stateful { /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala index 242735b4aebd3..2de89da2318f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala @@ -32,7 +32,8 @@ import org.apache.spark.sql.types.{DataType, IntegerType} > SELECT _FUNC_(); 0 """, - since = "1.4.0") + since = "1.4.0", + group = "misc_funcs") case class SparkPartitionID() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 3fbb798f1fd53..03dfddbdf7e6e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -33,7 +33,8 @@ import org.apache.spark.unsafe.types.CalendarInterval > SELECT _FUNC_(1); -1 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") case class UnaryMinus( child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled) @@ -105,7 +106,8 @@ case class UnaryMinus( > SELECT _FUNC_(1); 1 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class UnaryPositive(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -133,7 +135,8 @@ case class UnaryPositive(child: Expression) > SELECT _FUNC_(-1); 1 """, - since = "1.2.0") + since = "1.2.0", + group = "math_funcs") case class Abs(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -236,7 +239,8 @@ object BinaryArithmetic { > SELECT 1 _FUNC_ 2; 3 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") case class Add( left: Expression, right: Expression, @@ -274,7 +278,8 @@ case class Add( > SELECT 2 _FUNC_ 1; 1 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") case class Subtract( left: Expression, right: Expression, @@ -312,7 +317,8 @@ case class Subtract( > SELECT 2 _FUNC_ 3; 6 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") case class Multiply( left: Expression, right: Expression, @@ -436,7 +442,8 @@ trait DivModLike extends BinaryArithmetic { > SELECT 2L _FUNC_ 2L; 1.0 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Divide( left: Expression, @@ -465,7 +472,8 @@ case class Divide( > SELECT 3 _FUNC_ 2; 1 """, - since = "3.0.0") + since = "3.0.0", + group = "math_funcs") // scalastyle:on line.size.limit case class IntegralDivide( left: Expression, @@ -512,7 +520,8 @@ case class IntegralDivide( > SELECT MOD(2, 1.8); 0.2 """, - since = "1.0.0") + since = "1.0.0", + group = "math_funcs") case class Remainder( left: Expression, right: Expression, @@ -565,7 +574,8 @@ case class Remainder( > SELECT _FUNC_(-10, 3); 2 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Pmod( left: Expression, right: Expression, @@ -750,7 +760,8 @@ case class Pmod( > SELECT _FUNC_(10, 9, 2, 4, 3); 2 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression { override def nullable: Boolean = children.forall(_.nullable) @@ -824,7 +835,8 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression > SELECT _FUNC_(10, 9, 2, 4, 3); 10 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression { override def nullable: Boolean = children.forall(_.nullable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index 33ce60875c600..752af4eeeafb5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -33,7 +33,8 @@ import org.apache.spark.sql.types._ > SELECT 3 _FUNC_ 5; 1 """, - since = "1.4.0") + since = "1.4.0", + group = "bitwise_funcs") case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { protected override val failOnError: Boolean = false @@ -68,7 +69,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme > SELECT 3 _FUNC_ 5; 7 """, - since = "1.4.0") + since = "1.4.0", + group = "bitwise_funcs") case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { protected override val failOnError: Boolean = false @@ -103,7 +105,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet > SELECT 3 _FUNC_ 5; 6 """, - since = "1.4.0") + since = "1.4.0", + group = "bitwise_funcs") case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { protected override val failOnError: Boolean = false @@ -136,7 +139,8 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme > SELECT _FUNC_ 0; -1 """, - since = "1.4.0") + since = "1.4.0", + group = "bitwise_funcs") case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -174,7 +178,8 @@ case class BitwiseNot(child: Expression) > SELECT _FUNC_(0); 0 """, - since = "3.0.0") + since = "3.0.0", + group = "bitwise_funcs") case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 0765bfdd78fa6..33794467fb338 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -90,7 +90,8 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression > SELECT _FUNC_(NULL); -1 """, - since = "1.5.0") + since = "1.5.0", + group = "collection_funcs") case class Size(child: Expression, legacySizeOfNull: Boolean) extends UnaryExpression with ExpectsInputTypes { @@ -1018,7 +1019,7 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None) > SELECT _FUNC_(array(2, 1, 4, 3)); [3,4,1,2] """, - group = "array_funcs", + group = "collection_funcs", since = "1.5.0", note = """ Reverse logic for arrays is available since 2.4.0. @@ -1922,7 +1923,8 @@ case class ArrayPosition(left: Expression, right: Expression) > SELECT _FUNC_(map(1, 'a', 2, 'b'), 2); b """, - since = "2.4.0") + since = "2.4.0", + group = "map_funcs") case class ElementAt( left: Expression, right: Expression, @@ -2097,7 +2099,7 @@ case class ElementAt( note = """ Concat logic for arrays is available since 2.4.0. """, - group = "array_funcs", + group = "collection_funcs", since = "1.5.0") case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index cb59fbda2b3b9..d29da3ad2a4e4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -52,7 +52,8 @@ trait NoThrow > SELECT _FUNC_(1, 2, 3); [1,2,3] """, - since = "1.1.0") + since = "1.1.0", + group = "array_funcs") case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) extends Expression with NoThrow { @@ -170,7 +171,8 @@ private [sql] object GenArrayData { > SELECT _FUNC_(1.0, '2', 3.0, '4'); {1.0:"2",3.0:"4"} """, - since = "2.0.0") + since = "2.0.0", + group = "map_funcs") case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) extends Expression with NoThrow { @@ -271,7 +273,8 @@ object CreateMap { > SELECT _FUNC_(array(1.0, 3.0), array('2', '4')); {1.0:"2",3.0:"4"} """, - since = "2.4.0") + since = "2.4.0", + group = "map_funcs") case class MapFromArrays(left: Expression, right: Expression) extends BinaryExpression with ExpectsInputTypes with NullIntolerant { @@ -369,7 +372,7 @@ object CreateStruct { | {"col1":1,"col2":2,"col3":3} | """.stripMargin, "", - "", + "struct_funcs", "1.4.0", "") ("struct", (info, this.create)) @@ -389,7 +392,8 @@ object CreateStruct { > SELECT _FUNC_("a", 1, "b", 2, "c", 3); {"a":1,"b":2,"c":3} """, - since = "1.5.0") + since = "1.5.0", + group = "struct_funcs") // scalastyle:on line.size.limit case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoThrow { lazy val (nameExprs, valExprs) = children.grouped(2).map { @@ -495,7 +499,8 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression with > SELECT _FUNC_('a'); {"a":null} """, - since = "2.0.1") + since = "2.0.1", + group = "map_funcs") // scalastyle:on line.size.limit case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 84065d07e2b4d..7b0be8eb24097 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.types._ > SELECT _FUNC_(1 < 2, 'a', 'b'); a """, - since = "1.0.0") + since = "1.0.0", + group = "conditional_funcs") // scalastyle:on line.size.limit case class If(predicate: Expression, trueValue: Expression, falseValue: Expression) extends ComplexTypeMergingExpression { @@ -118,7 +119,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END; NULL """, - since = "1.0.1") + since = "1.0.1", + group = "conditional_funcs") // scalastyle:on line.size.limit case class CaseWhen( branches: Seq[(Expression, Expression)], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala index 6fad272aa4557..8978d55b98251 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala @@ -44,7 +44,8 @@ import org.apache.spark.unsafe.types.UTF8String > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); {"time":2015-08-26 00:00:00} """, - since = "3.0.0") + since = "3.0.0", + group = "csv_funcs") // scalastyle:on line.size.limit case class CsvToStructs( schema: StructType, @@ -146,7 +147,8 @@ case class CsvToStructs( > SELECT _FUNC_('1,abc'); STRUCT<`_c0`: INT, `_c1`: STRING> """, - since = "3.0.0") + since = "3.0.0", + group = "csv_funcs") case class SchemaOfCsv( child: Expression, options: Map[String, String]) @@ -205,7 +207,8 @@ case class SchemaOfCsv( > SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy')); 26/08/2015 """, - since = "3.0.0") + since = "3.0.0", + group = "csv_funcs") // scalastyle:on line.size.limit case class StructsToCsv( options: Map[String, String], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index c20dd6148be3e..99f80e9078aae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -697,6 +697,7 @@ case class Month(child: Expression) extends GetDateField { > SELECT _FUNC_('2009-07-30'); 30 """, + group = "datetime_funcs", since = "1.5.0") case class DayOfMonth(child: Expression) extends GetDateField { override val func = DateTimeUtils.getDayOfMonth @@ -2247,6 +2248,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression) note = """ The _FUNC_ function is equivalent to `date_part(field, source)`. """, + group = "datetime_funcs", since = "3.0.0") // scalastyle:on line.size.limit case class Extract(field: Expression, source: Expression, child: Expression) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index ad6e365f76fa9..c5122b6490ae6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -136,7 +136,8 @@ case class UserDefinedGenerator( 1 2 3 NULL """, - since = "2.0.0") + since = "2.0.0", + group = "generator_funcs") // scalastyle:on line.size.limit line.contains.tab case class Stack(children: Seq[Expression]) extends Generator { @@ -362,7 +363,8 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with 10 20 """, - since = "1.0.0") + since = "1.0.0", + group = "generator_funcs") // scalastyle:on line.size.limit case class Explode(child: Expression) extends ExplodeBase { override val position: Boolean = false @@ -386,7 +388,8 @@ case class Explode(child: Expression) extends ExplodeBase { 0 10 1 20 """, - since = "2.0.0") + since = "2.0.0", + group = "generator_funcs") // scalastyle:on line.size.limit line.contains.tab case class PosExplode(child: Expression) extends ExplodeBase { override val position = true @@ -404,7 +407,8 @@ case class PosExplode(child: Expression) extends ExplodeBase { 1 a 2 b """, - since = "2.0.0") + since = "2.0.0", + group = "generator_funcs") // scalastyle:on line.size.limit line.contains.tab case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator { override val inline: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala index ac0f6b86ccd96..f843c1a2d3594 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala @@ -56,7 +56,8 @@ trait GroupingSet extends Expression with CodegenFallback { Bob NULL 1 NULL 5 1 """, - since = "2.0.0") + since = "2.0.0", + group = "agg_funcs") // scalastyle:on line.size.limit line.contains.tab case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {} @@ -75,7 +76,8 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {} NULL NULL 2 Bob NULL 1 """, - since = "2.0.0") + since = "2.0.0", + group = "agg_funcs") // scalastyle:on line.size.limit line.contains.tab case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {} @@ -96,7 +98,8 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {} Bob 0 5 NULL 1 7 """, - since = "2.0.0") + since = "2.0.0", + group = "agg_funcs") // scalastyle:on line.size.limit line.contains.tab case class Grouping(child: Expression) extends Expression with Unevaluable { @transient @@ -133,7 +136,8 @@ case class Grouping(child: Expression) extends Expression with Unevaluable { Input columns should match with grouping columns exactly, or empty (means all the grouping columns). """, - since = "2.0.0") + since = "2.0.0", + group = "agg_funcs") // scalastyle:on line.size.limit line.contains.tab case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable { @transient diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index ce177f50956f0..9738559b6d67a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -53,7 +53,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} > SELECT _FUNC_('Spark'); 8cde774d6f7333752ed72cacddb05126 """, - since = "1.5.0") + since = "1.5.0", + group = "hash_funcs") case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -89,7 +90,8 @@ case class Md5(child: Expression) > SELECT _FUNC_('Spark', 256); 529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b """, - since = "1.5.0") + since = "1.5.0", + group = "hash_funcs") // scalastyle:on line.size.limit case class Sha2(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable { @@ -163,7 +165,8 @@ case class Sha2(left: Expression, right: Expression) > SELECT _FUNC_('Spark'); 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c """, - since = "1.5.0") + since = "1.5.0", + group = "hash_funcs") case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -192,7 +195,8 @@ case class Sha1(child: Expression) > SELECT _FUNC_('Spark'); 1557323817 """, - since = "1.5.0") + since = "1.5.0", + group = "hash_funcs") case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -580,7 +584,8 @@ abstract class InterpretedHashFunction { > SELECT _FUNC_('Spark', array(123), 2); -1321691492 """, - since = "2.0.0") + since = "2.0.0", + group = "hash_funcs") case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] { def this(arguments: Seq[Expression]) = this(arguments, 42) @@ -619,7 +624,8 @@ object Murmur3HashFunction extends InterpretedHashFunction { > SELECT _FUNC_('Spark', array(123), 2); 5602566077635097486 """, - since = "3.0.0") + since = "3.0.0", + group = "hash_funcs") case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] { def this(arguments: Seq[Expression]) = this(arguments, 42L) @@ -653,7 +659,8 @@ object XxHash64Function extends InterpretedHashFunction { */ @ExpressionDescription( usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.", - since = "2.2.0") + since = "2.2.0", + group = "hash_funcs") case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] { override val seed = 0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index d1dabe732c882..7ad62312250d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -239,7 +239,8 @@ trait MapBasedSimpleHigherOrderFunction extends SimpleHigherOrderFunction { > SELECT _FUNC_(array(1, 2, 3), (x, i) -> x + i); [1,3,5] """, - since = "2.4.0") + since = "2.4.0", + group = "lambda_funcs") case class ArrayTransform( argument: Expression, function: Expression) @@ -309,7 +310,8 @@ case class ArrayTransform( > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); ["a","b","c","d",null] """, - since = "2.4.0") + since = "2.4.0", + group = "lambda_funcs") // scalastyle:on line.size.limit case class ArraySort( argument: Expression, @@ -403,7 +405,8 @@ object ArraySort { > SELECT _FUNC_(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v); {1:0,3:-1} """, - since = "3.0.0") + since = "3.0.0", + group = "lambda_funcs") case class MapFilter( argument: Expression, function: Expression) @@ -458,6 +461,7 @@ case class MapFilter( [0,2,3] """, since = "2.4.0", + group = "lambda_funcs", note = """ The inner function may use the index argument since 3.0.0. """) @@ -525,7 +529,8 @@ case class ArrayFilter( > SELECT _FUNC_(array(1, 2, 3), x -> x IS NULL); false """, - since = "2.4.0") + since = "2.4.0", + group = "lambda_funcs") case class ArrayExists( argument: Expression, function: Expression, @@ -609,7 +614,8 @@ object ArrayExists { > SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0); NULL """, - since = "3.0.0") + since = "3.0.0", + group = "lambda_funcs") case class ArrayForAll( argument: Expression, function: Expression) @@ -679,7 +685,8 @@ case class ArrayForAll( > SELECT _FUNC_(array(1, 2, 3), 0, (acc, x) -> acc + x, acc -> acc * 10); 60 """, - since = "2.4.0") + since = "2.4.0", + group = "lambda_funcs") case class ArrayAggregate( argument: Expression, zero: Expression, @@ -766,7 +773,8 @@ case class ArrayAggregate( > SELECT _FUNC_(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v); {2:1,4:2,6:3} """, - since = "3.0.0") + since = "3.0.0", + group = "lambda_funcs") case class TransformKeys( argument: Expression, function: Expression) @@ -818,7 +826,8 @@ case class TransformKeys( > SELECT _FUNC_(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v); {1:2,2:4,3:6} """, - since = "3.0.0") + since = "3.0.0", + group = "lambda_funcs") case class TransformValues( argument: Expression, function: Expression) @@ -869,7 +878,8 @@ case class TransformValues( > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)); {1:"ax",2:"by"} """, - since = "3.0.0") + since = "3.0.0", + group = "lambda_funcs") case class MapZipWith(left: Expression, right: Expression, function: Expression) extends HigherOrderFunction with CodegenFallback { @@ -1047,7 +1057,8 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression) > SELECT _FUNC_(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y)); ["ad","be","cf"] """, - since = "2.4.0") + since = "2.4.0", + group = "lambda_funcs") // scalastyle:on line.size.limit case class ZipWith(left: Expression, right: Expression, function: Expression) extends HigherOrderFunction with CodegenFallback { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala index e9426223092de..6cd88367aa9a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala @@ -32,7 +32,8 @@ import org.apache.spark.unsafe.types.UTF8String > SELECT _FUNC_(); """, - since = "1.5.0") + since = "1.5.0", + group = "misc_funcs") // scalastyle:on whitespace.end.of.line case class InputFileName() extends LeafExpression with Nondeterministic { @@ -64,7 +65,8 @@ case class InputFileName() extends LeafExpression with Nondeterministic { > SELECT _FUNC_(); -1 """, - since = "2.2.0") + since = "2.2.0", + group = "misc_funcs") case class InputFileBlockStart() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false @@ -93,7 +95,8 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic { > SELECT _FUNC_(); -1 """, - since = "2.2.0") + since = "2.2.0", + group = "misc_funcs") case class InputFileBlockLength() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index 27067e17e7f45..fd07aff867abf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -152,7 +152,8 @@ case class DivideInterval( > SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001); 1 months 1 days 1 minutes 40.000001 seconds """, - since = "3.0.0") + since = "3.0.0", + group = "datetime_funcs") // scalastyle:on line.size.limit case class MakeInterval( years: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index 931365fb25a1e..43281c2dc3c2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -148,7 +148,8 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) > SELECT _FUNC_(); 2.718281828459045 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class EulerNumber() extends LeafMathExpression(math.E, "E") /** @@ -162,7 +163,8 @@ case class EulerNumber() extends LeafMathExpression(math.E, "E") > SELECT _FUNC_(); 3.141592653589793 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Pi() extends LeafMathExpression(math.Pi, "PI") //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -183,7 +185,8 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI") > SELECT _FUNC_(2); NaN """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS") @ExpressionDescription( @@ -198,7 +201,8 @@ case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS" > SELECT _FUNC_(2); NaN """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN") @ExpressionDescription( @@ -211,7 +215,8 @@ case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN" > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN") @ExpressionDescription( @@ -221,7 +226,8 @@ case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN" > SELECT _FUNC_(27.0); 3.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT") @ExpressionDescription( @@ -233,7 +239,8 @@ case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT" > SELECT _FUNC_(5); 5 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") { override def dataType: DataType = child.dataType match { case dt @ DecimalType.Fixed(_, 0) => dt @@ -276,7 +283,8 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL" > SELECT _FUNC_(0); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS") @ExpressionDescription( @@ -293,7 +301,8 @@ case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS") > SELECT _FUNC_(0); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH") @ExpressionDescription( @@ -307,7 +316,8 @@ case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH" > SELECT _FUNC_(0); NaN """, - since = "3.0.0") + since = "3.0.0", + group = "math_funcs") case class Acosh(child: Expression) extends UnaryMathExpression((x: Double) => StrictMath.log(x + math.sqrt(x * x - 1.0)), "ACOSH") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -332,7 +342,8 @@ case class Acosh(child: Expression) > SELECT _FUNC_(-10, 16, -10); -16 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -368,7 +379,8 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre > SELECT _FUNC_(0); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Exp(child: Expression) extends UnaryMathExpression(StrictMath.exp, "EXP") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { defineCodeGen(ctx, ev, c => s"java.lang.StrictMath.exp($c)") @@ -382,7 +394,8 @@ case class Exp(child: Expression) extends UnaryMathExpression(StrictMath.exp, "E > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Expm1(child: Expression) extends UnaryMathExpression(StrictMath.expm1, "EXPM1") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { defineCodeGen(ctx, ev, c => s"java.lang.StrictMath.expm1($c)") @@ -398,7 +411,8 @@ case class Expm1(child: Expression) extends UnaryMathExpression(StrictMath.expm1 > SELECT _FUNC_(5); 5 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") { override def dataType: DataType = child.dataType match { case dt @ DecimalType.Fixed(_, 0) => dt @@ -465,7 +479,8 @@ object Factorial { > SELECT _FUNC_(5); 120 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Factorial(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -506,7 +521,8 @@ case class Factorial(child: Expression) > SELECT _FUNC_(1); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Log(child: Expression) extends UnaryLogExpression(StrictMath.log, "LOG") { override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("ln") } @@ -518,7 +534,8 @@ case class Log(child: Expression) extends UnaryLogExpression(StrictMath.log, "LO > SELECT _FUNC_(2); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Log2(child: Expression) extends UnaryLogExpression((x: Double) => StrictMath.log(x) / StrictMath.log(2), "LOG2") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -541,7 +558,8 @@ case class Log2(child: Expression) > SELECT _FUNC_(10); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Log10(child: Expression) extends UnaryLogExpression(StrictMath.log10, "LOG10") @ExpressionDescription( @@ -551,7 +569,8 @@ case class Log10(child: Expression) extends UnaryLogExpression(StrictMath.log10, > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Log1p(child: Expression) extends UnaryLogExpression(StrictMath.log1p, "LOG1P") { protected override val yAsymptote: Double = -1.0 } @@ -564,7 +583,8 @@ case class Log1p(child: Expression) extends UnaryLogExpression(StrictMath.log1p, > SELECT _FUNC_(12.3456); 12.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") { override def funcName: String = "rint" @@ -578,7 +598,8 @@ case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND > SELECT _FUNC_(40); 1.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM") @ExpressionDescription( @@ -592,7 +613,8 @@ case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "S > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN") @ExpressionDescription( @@ -608,7 +630,8 @@ case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN") > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH") @ExpressionDescription( @@ -620,7 +643,8 @@ case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH" > SELECT _FUNC_(0); 0.0 """, - since = "3.0.0") + since = "3.0.0", + group = "math_funcs") case class Asinh(child: Expression) extends UnaryMathExpression((x: Double) => x match { case Double.NegativeInfinity => Double.NegativeInfinity @@ -639,7 +663,8 @@ case class Asinh(child: Expression) > SELECT _FUNC_(4); 2.0 """, - since = "1.1.1") + since = "1.1.1", + group = "math_funcs") case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT") @ExpressionDescription( @@ -655,7 +680,8 @@ case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT" > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN") @ExpressionDescription( @@ -671,7 +697,8 @@ case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN") > SELECT _FUNC_(1); 0.6420926159343306 """, - since = "2.3.0") + since = "2.3.0", + group = "math_funcs") case class Cot(child: Expression) extends UnaryMathExpression((x: Double) => 1 / math.tan(x), "COT") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -693,7 +720,8 @@ case class Cot(child: Expression) > SELECT _FUNC_(0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH") @ExpressionDescription( @@ -707,7 +735,8 @@ case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH" > SELECT _FUNC_(2); NaN """, - since = "3.0.0") + since = "3.0.0", + group = "math_funcs") case class Atanh(child: Expression) // SPARK-28519: more accurate express for 1/2 * ln((1 + x) / (1 - x)) extends UnaryMathExpression((x: Double) => @@ -729,7 +758,8 @@ case class Atanh(child: Expression) > SELECT _FUNC_(3.141592653589793); 180.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") { override def funcName: String = "toDegrees" } @@ -745,7 +775,8 @@ case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegre > SELECT _FUNC_(180); 3.141592653589793 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") { override def funcName: String = "toRadians" } @@ -762,7 +793,8 @@ case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadia > SELECT _FUNC_(13.3); 1101 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Bin(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable { @@ -864,7 +896,8 @@ object Hex { > SELECT _FUNC_('Spark SQL'); 537061726B2053514C """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -901,7 +934,8 @@ case class Hex(child: Expression) > SELECT decode(_FUNC_('537061726B2053514C'), 'UTF-8'); Spark SQL """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -947,7 +981,8 @@ case class Unhex(child: Expression) > SELECT _FUNC_(0, 0); 0.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Atan2(left: Expression, right: Expression) extends BinaryMathExpression(math.atan2, "ATAN2") { @@ -968,7 +1003,8 @@ case class Atan2(left: Expression, right: Expression) > SELECT _FUNC_(2, 3); 8.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Pow(left: Expression, right: Expression) extends BinaryMathExpression(StrictMath.pow, "POWER") { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -990,7 +1026,8 @@ case class Pow(left: Expression, right: Expression) > SELECT _FUNC_(2, 1); 4 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class ShiftLeft(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1025,7 +1062,8 @@ case class ShiftLeft(left: Expression, right: Expression) > SELECT _FUNC_(4, 1); 2 """, - since = "1.5.0") + since = "1.5.0", + group = "bitwise_funcs") case class ShiftRight(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1060,7 +1098,8 @@ case class ShiftRight(left: Expression, right: Expression) > SELECT _FUNC_(4, 1); 2 """, - since = "1.5.0") + since = "1.5.0", + group = "bitwise_funcs") case class ShiftRightUnsigned(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1088,7 +1127,8 @@ case class ShiftRightUnsigned(left: Expression, right: Expression) > SELECT _FUNC_(3, 4); 5.0 """, - since = "1.4.0") + since = "1.4.0", + group = "math_funcs") case class Hypot(left: Expression, right: Expression) extends BinaryMathExpression(math.hypot, "HYPOT") @@ -1106,7 +1146,8 @@ case class Hypot(left: Expression, right: Expression) > SELECT _FUNC_(10, 100); 2.0 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") case class Logarithm(left: Expression, right: Expression) extends BinaryMathExpression((c1, c2) => StrictMath.log(c2) / StrictMath.log(c1), "LOG") { @@ -1337,7 +1378,8 @@ abstract class RoundBase(child: Expression, scale: Expression, > SELECT _FUNC_(2.5, 0); 3 """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Round(child: Expression, scale: Expression) extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP") @@ -1358,7 +1400,8 @@ case class Round(child: Expression, scale: Expression) > SELECT _FUNC_(2.5, 0); 2 """, - since = "2.0.0") + since = "2.0.0", + group = "math_funcs") // scalastyle:on line.size.limit case class BRound(child: Expression, scale: Expression) extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN") @@ -1434,7 +1477,8 @@ object WidthBucket { > SELECT _FUNC_(-0.9, 5.2, 0.5, 2); 3 """, - since = "3.1.0") + since = "3.1.0", + group = "math_funcs") case class WidthBucket( value: Expression, minValue: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 4e71c8c103889..34a64dddd30fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -63,7 +63,8 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { java.lang.RuntimeException custom error message """, - since = "3.1.0") + since = "3.1.0", + group = "misc_funcs") case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def foldable: Boolean = false @@ -108,7 +109,8 @@ case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCa > SELECT _FUNC_(0 < 1); NULL """, - since = "2.0.0") + since = "2.0.0", + group = "misc_funcs") case class AssertTrue(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { @@ -140,7 +142,8 @@ object AssertTrue { > SELECT _FUNC_(); default """, - since = "1.6.0") + since = "1.6.0", + group = "misc_funcs") case class CurrentDatabase() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType override def nullable: Boolean = false @@ -157,7 +160,8 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable { > SELECT _FUNC_(); spark_catalog """, - since = "3.1.0") + since = "3.1.0", + group = "misc_funcs") case class CurrentCatalog() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType override def nullable: Boolean = false @@ -175,7 +179,8 @@ case class CurrentCatalog() extends LeafExpression with Unevaluable { note = """ The function is non-deterministic. """, - since = "2.3.0") + since = "2.3.0", + group = "misc_funcs") // scalastyle:on line.size.limit case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful with ExpressionWithRandomSeed { @@ -221,7 +226,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta > SELECT _FUNC_(); 3.1.0 a6d6ea3efedbad14d99c24143834cd4e2e52fb40 """, - since = "3.0.0") + since = "3.0.0", + group = "misc_funcs") // scalastyle:on line.size.limit case class SparkVersion() extends LeafExpression with CodegenFallback { override def nullable: Boolean = false @@ -242,7 +248,8 @@ case class SparkVersion() extends LeafExpression with CodegenFallback { > SELECT _FUNC_(array(1)); array """, - since = "3.0.0") + since = "3.0.0", + group = "misc_funcs") case class TypeOf(child: Expression) extends UnaryExpression { override def nullable: Boolean = false override def foldable: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index 09ae2186b2429..4d7582fbd23b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -43,7 +43,8 @@ import org.apache.spark.sql.types._ > SELECT _FUNC_(NULL, 1, NULL); 1 """, - since = "1.0.0") + since = "1.0.0", + group = "conditional_funcs") // scalastyle:on line.size.limit case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpression { @@ -129,7 +130,8 @@ case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpress > SELECT _FUNC_(NULL, array('2')); ["2"] """, - since = "2.0.0") + since = "2.0.0", + group = "conditional_funcs") case class IfNull(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { @@ -149,7 +151,8 @@ case class IfNull(left: Expression, right: Expression, child: Expression) > SELECT _FUNC_(2, 2); NULL """, - since = "2.0.0") + since = "2.0.0", + group = "conditional_funcs") case class NullIf(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { @@ -169,7 +172,8 @@ case class NullIf(left: Expression, right: Expression, child: Expression) > SELECT _FUNC_(NULL, array('2')); ["2"] """, - since = "2.0.0") + since = "2.0.0", + group = "conditional_funcs") case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { def this(left: Expression, right: Expression) = { @@ -189,7 +193,8 @@ case class Nvl(left: Expression, right: Expression, child: Expression) extends R > SELECT _FUNC_(NULL, 2, 1); 1 """, - since = "2.0.0") + since = "2.0.0", + group = "conditional_funcs") // scalastyle:on line.size.limit case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression) extends RuntimeReplaceable { @@ -213,7 +218,8 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: > SELECT _FUNC_(cast('NaN' as double)); true """, - since = "1.5.0") + since = "1.5.0", + group = "predicate_funcs") case class IsNaN(child: Expression) extends UnaryExpression with Predicate with ImplicitCastInputTypes { @@ -256,7 +262,8 @@ case class IsNaN(child: Expression) extends UnaryExpression > SELECT _FUNC_(cast('NaN' as double), 123); 123.0 """, - since = "1.5.0") + since = "1.5.0", + group = "conditional_funcs") case class NaNvl(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -317,7 +324,8 @@ case class NaNvl(left: Expression, right: Expression) > SELECT _FUNC_(1); false """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class IsNull(child: Expression) extends UnaryExpression with Predicate { override def nullable: Boolean = false @@ -344,7 +352,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate { > SELECT _FUNC_(1); true """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class IsNotNull(child: Expression) extends UnaryExpression with Predicate { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 53ac3560bc3b3..250d3fee94cb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -255,7 +255,8 @@ trait PredicateHelper extends AliasHelper with Logging { > SELECT _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class Not(child: Expression) extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant { @@ -358,7 +359,8 @@ case class InSubquery(values: Seq[Expression], query: ListQuery) > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); true """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") // scalastyle:on line.size.limit case class In(value: Expression, list: Seq[Expression]) extends Predicate { @@ -594,7 +596,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with > SELECT false _FUNC_ NULL; false """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -676,7 +679,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with > SELECT false _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -810,7 +814,8 @@ object Equality { > SELECT NULL _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class EqualTo(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -854,7 +859,8 @@ case class EqualTo(left: Expression, right: Expression) > SELECT NULL _FUNC_ NULL; true """, - since = "1.1.0") + since = "1.1.0", + group = "predicate_funcs") case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison { override def symbol: String = "<=>" @@ -912,7 +918,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp > SELECT 1 _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class LessThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -943,7 +950,8 @@ case class LessThan(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -974,7 +982,8 @@ case class LessThanOrEqual(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -1005,7 +1014,8 @@ case class GreaterThan(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 6a945173803b7..0fa4d6c315041 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -81,7 +81,8 @@ trait ExpressionWithRandomSeed { note = """ The function is non-deterministic in general case. """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG with ExpressionWithRandomSeed { @@ -132,7 +133,8 @@ object Rand { note = """ The function is non-deterministic in general case. """, - since = "1.5.0") + since = "1.5.0", + group = "math_funcs") // scalastyle:on line.size.limit case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG with ExpressionWithRandomSeed { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 3a421f5075a6f..dae954a579eb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -116,7 +116,8 @@ abstract class StringRegexExpression extends BinaryExpression note = """ Use RLIKE to match with standard regular expressions. """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") // scalastyle:on line.contains.tab case class Like(left: Expression, right: Expression, escapeChar: Char) extends StringRegexExpression { @@ -358,7 +359,8 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like note = """ Use LIKE to match with simple string pattern. """, - since = "1.0.0") + since = "1.0.0", + group = "predicate_funcs") // scalastyle:on line.contains.tab case class RLike(left: Expression, right: Expression) extends StringRegexExpression { @@ -436,7 +438,8 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]', 2); ["one","twoBthreeC"] """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringSplit(str: Expression, regex: Expression, limit: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -494,7 +497,8 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression) > SELECT _FUNC_('100-200', '(\\d+)', 'num'); num-num """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression) extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -687,7 +691,8 @@ abstract class RegExpExtractBase > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1); 100 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression) extends RegExpExtractBase { def this(s: Expression, r: Expression) = this(s, r, Literal(1)) @@ -787,7 +792,8 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio > SELECT _FUNC_('100-200, 300-400', '(\\d+)-(\\d+)', 1); ["100","300"] """, - since = "3.1.0") + since = "3.1.0", + group = "string_funcs") case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expression) extends RegExpExtractBase { def this(s: Expression, r: Expression) = this(s, r, Literal(1)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 0207b7b55c5af..6caf4395090f1 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -56,7 +56,8 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String} > SELECT _FUNC_(' ', 'Spark', 'SQL'); Spark SQL """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class ConcatWs(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes { @@ -244,7 +245,8 @@ case class ConcatWs(children: Seq[Expression]) > SELECT _FUNC_(1, 'scala', 'java'); scala """, - since = "2.0.0") + since = "2.0.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Elt( children: Seq[Expression], @@ -389,7 +391,8 @@ trait String2StringExpression extends ImplicitCastInputTypes { > SELECT _FUNC_('SparkSql'); SPARKSQL """, - since = "1.0.1") + since = "1.0.1", + group = "string_funcs") case class Upper(child: Expression) extends UnaryExpression with String2StringExpression with NullIntolerant { @@ -412,7 +415,8 @@ case class Upper(child: Expression) > SELECT _FUNC_('SparkSql'); sparksql """, - since = "1.0.1") + since = "1.0.1", + group = "string_funcs") case class Lower(child: Expression) extends UnaryExpression with String2StringExpression with NullIntolerant { @@ -490,7 +494,8 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate > SELECT _FUNC_('ABCabc', 'abc', 'DEF'); ABCDEF """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -570,7 +575,8 @@ object Overlay { > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4); Structured SQL """, - since = "3.0.0") + since = "3.0.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression) extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -657,7 +663,8 @@ object StringTranslate { > SELECT _FUNC_('AaBbCc', 'abc', '123'); A1B2C3 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -722,7 +729,8 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac > SELECT _FUNC_('ab','abc,b,ab,c,def'); 3 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class FindInSet(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -830,7 +838,8 @@ object StringTrim { > SELECT _FUNC_(TRAILING 'SL' FROM 'SSparkSQLS'); SSparkSQ """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringTrim( srcStr: Expression, trimStr: Option[Expression] = None) @@ -923,7 +932,8 @@ object StringTrimLeft { > SELECT _FUNC_(' SparkSQL '); SparkSQL """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringTrimLeft( srcStr: Expression, trimStr: Option[Expression] = None) @@ -1017,7 +1027,8 @@ object StringTrimRight { > SELECT _FUNC_(' SparkSQL '); SparkSQL """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringTrimRight( srcStr: Expression, @@ -1094,7 +1105,8 @@ case class StringTrimRight( > SELECT _FUNC_('SparkSQL', 'SQL'); 6 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringInstr(str: Expression, substr: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1136,7 +1148,8 @@ case class StringInstr(str: Expression, substr: Expression) > SELECT _FUNC_('www.apache.org', '.', 2); www.apache """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1176,7 +1189,8 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: > SELECT POSITION('bar' IN 'foobarbar'); 4 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringLocate(substr: Expression, str: Expression, start: Expression) extends TernaryExpression with ImplicitCastInputTypes { @@ -1266,7 +1280,8 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression) > SELECT _FUNC_('hi', 5); hi """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringLPad(str: Expression, len: Expression, pad: Expression = Literal(" ")) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1307,7 +1322,8 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression = Litera > SELECT _FUNC_('hi', 5); hi """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringRPad(str: Expression, len: Expression, pad: Expression = Literal(" ")) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1357,7 +1373,8 @@ object ParseUrl { > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query'); 1 """, - since = "2.0.0") + since = "2.0.0", + group = "string_funcs") case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression with ExpectsInputTypes with CodegenFallback { def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled) @@ -1512,7 +1529,8 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge > SELECT _FUNC_("Hello World %d %s", 100, "days"); Hello World 100 days """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes { @@ -1601,7 +1619,8 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC > SELECT _FUNC_('sPark sql'); Spark Sql """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1628,7 +1647,8 @@ case class InitCap(child: Expression) > SELECT _FUNC_('123', 2); 123123 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringRepeat(str: Expression, times: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1658,7 +1678,8 @@ case class StringRepeat(str: Expression, times: Expression) > SELECT concat(_FUNC_(2), '1'); 1 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class StringSpace(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1706,7 +1727,8 @@ case class StringSpace(child: Expression) > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1); k """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Substring(str: Expression, pos: Expression, len: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1753,7 +1775,8 @@ case class Substring(str: Expression, pos: Expression, len: Expression) > SELECT _FUNC_('Spark SQL', 3); SQL """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Right(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable { def this(str: Expression, len: Expression) = { @@ -1776,7 +1799,8 @@ case class Right(str: Expression, len: Expression, child: Expression) extends Ru > SELECT _FUNC_('Spark SQL', 3); Spa """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Left(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable { def this(str: Expression, len: Expression) = { @@ -1803,7 +1827,8 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run > SELECT CHARACTER_LENGTH('Spark SQL '); 10 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1833,7 +1858,8 @@ case class Length(child: Expression) > SELECT _FUNC_('Spark SQL'); 72 """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") case class BitLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { override def dataType: DataType = IntegerType @@ -1865,7 +1891,8 @@ case class BitLength(child: Expression) > SELECT _FUNC_('Spark SQL'); 9 """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") case class OctetLength(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { override def dataType: DataType = IntegerType @@ -1896,7 +1923,8 @@ case class OctetLength(child: Expression) > SELECT _FUNC_('kitten', 'sitting'); 3 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1922,7 +1950,8 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres > SELECT _FUNC_('Miller'); M460 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -1949,7 +1978,8 @@ case class SoundEx(child: Expression) > SELECT _FUNC_(2); 50 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -1991,7 +2021,8 @@ case class Ascii(child: Expression) > SELECT _FUNC_(65); A """, - since = "2.3.0") + since = "2.3.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Chr(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2036,7 +2067,8 @@ case class Chr(child: Expression) > SELECT _FUNC_('Spark SQL'); U3BhcmsgU1FM """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2065,7 +2097,8 @@ case class Base64(child: Expression) > SELECT _FUNC_('U3BhcmsgU1FM'); Spark SQL """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2130,7 +2163,8 @@ object Decode { > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle'); NULL """, - since = "3.2.0") + since = "3.2.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Decode(params: Seq[Expression], child: Expression) extends RuntimeReplaceable { @@ -2155,7 +2189,8 @@ case class Decode(params: Seq[Expression], child: Expression) extends RuntimeRep > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8'); abc """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class StringDecode(bin: Expression, charset: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2195,7 +2230,8 @@ case class StringDecode(bin: Expression, charset: Expression) > SELECT _FUNC_('abc', 'utf-8'); abc """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") // scalastyle:on line.size.limit case class Encode(value: Expression, charset: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2240,7 +2276,8 @@ case class Encode(value: Expression, charset: Expression) > SELECT _FUNC_(12332.123456, '##################.###'); 12332.123 """, - since = "1.5.0") + since = "1.5.0", + group = "string_funcs") case class FormatNumber(x: Expression, d: Expression) extends BinaryExpression with ExpectsInputTypes with NullIntolerant { @@ -2411,7 +2448,8 @@ case class FormatNumber(x: Expression, d: Expression) > SELECT _FUNC_('Hi there! Good morning.'); [["Hi","there"],["Good","morning"]] """, - since = "2.0.0") + since = "2.0.0", + group = "string_funcs") case class Sentences( str: Expression, language: Expression = Literal(""), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala index 5f10667c55d79..b8fc830f18183 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala @@ -64,7 +64,8 @@ abstract class XPathExtract > SELECT _FUNC_('1','a/b'); true """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract { @@ -84,7 +85,8 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract > SELECT _FUNC_('12', 'sum(a/b)'); 3 """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathShort(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_short" @@ -104,7 +106,8 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('12', 'sum(a/b)'); 3 """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathInt(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_int" @@ -124,7 +127,8 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('12', 'sum(a/b)'); 3 """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathLong(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_long" @@ -144,7 +148,8 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('12', 'sum(a/b)'); 3.0 """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_float" @@ -164,7 +169,8 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('12', 'sum(a/b)'); 3.0 """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = @@ -185,7 +191,8 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('bcc','a/c'); cc """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathString(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_string" @@ -205,7 +212,8 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract { > SELECT _FUNC_('b1b2b3c1c2','a/b/text()'); ["b1","b2","b3"] """, - since = "2.0.0") + since = "2.0.0", + group = "xml_funcs") // scalastyle:on line.size.limit case class XPathList(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 6085c1f2cccb0..438fd2351ab9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -43,6 +43,10 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { test("group info in ExpressionInfo") { val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum")) assert(info.getGroup === "agg_funcs") + Seq("agg_funcs", "array_funcs", "binary_funcs", "bitwise_funcs", "collection_funcs", + "predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", "datetime_funcs", + "generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", "map_funcs", "math_funcs", + "misc_funcs", "string_funcs", "struct_funcs", "window_funcs", "xml_funcs") Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs") .foreach { groupName => @@ -106,7 +110,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { } test("SPARK-32870: Default expressions in FunctionRegistry should have their " + - "usage, examples and since filled") { + "usage, examples, since, and group filled") { val ignoreSet = Set( // Explicitly inherits NonSQLExpression, and has no ExpressionDescription "org.apache.spark.sql.catalyst.expressions.TimeWindow", @@ -121,6 +125,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { assert(info.getExamples.startsWith("\n Examples:\n")) assert(info.getExamples.endsWith("\n ")) assert(info.getSince.matches("[0-9]+\\.[0-9]+\\.[0-9]+")) + assert(info.getGroup.nonEmpty) if (info.getArguments.nonEmpty) { assert(info.getArguments.startsWith("\n Arguments:\n")) From 661ac10901dcdf7d7bd87ef9487f7a045b786573 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 21 Dec 2020 14:06:31 +0000 Subject: [PATCH 0835/1009] [SPARK-33838][SQL][DOCS] Comment the `PURGE` option in the DropTable and in AlterTableDropPartition commands ### What changes were proposed in this pull request? Add comments for the `PURGE` option to the logical nodes `DropTable` and `AlterTableDropPartition`. ### Why are the changes needed? To improve code maintenance. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `./dev/scalastyle` Closes #30837 from MaxGekk/comment-purge-logical-node. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/plans/logical/v2Commands.scala | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 87d81d5330574..b3b538ac8b327 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -410,6 +410,14 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit /** * The logical plan of the DROP TABLE command. + * + * If the `PURGE` option is set, the table catalog must remove table data by skipping the trash + * even when the catalog has configured one. The option is applicable only for managed tables. + * + * The syntax of this command is: + * {{{ + * DROP TABLE [IF EXISTS] table [PURGE]; + * }}} */ case class DropTable( child: LogicalPlan, @@ -657,9 +665,12 @@ case class AlterTableAddPartition( * The logical plan of the ALTER TABLE DROP PARTITION command. * This may remove the data and metadata for this partition. * + * If the `PURGE` option is set, the table catalog must remove partition data by skipping the trash + * even when the catalog has configured one. The option is applicable only for managed tables. + * * The syntax of this command is: * {{{ - * ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]; + * ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE]; * }}} */ case class AlterTableDropPartition( From 1c7760568263235eaa363e8c650c67132c3dcd7a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 21 Dec 2020 10:25:23 -0800 Subject: [PATCH 0836/1009] [SPARK-33848][SQL] Push the UnaryExpression into (if / case) branches ### What changes were proposed in this pull request? This pr push the `UnaryExpression` into (if / case) branches. The use case is: ```sql create table t1 using parquet as select id from range(10); explain select id from t1 where (CASE WHEN id = 1 THEN '1' WHEN id = 3 THEN '2' end) > 3; ``` Before this pr: ``` == Physical Plan == *(1) Filter (cast(CASE WHEN (id#1L = 1) THEN 1 WHEN (id#1L = 3) THEN 2 END as int) > 3) +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [(cast(CASE WHEN (id#1L = 1) THEN 1 WHEN (id#1L = 3) THEN 2 END as int) > 3)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` After this pr: ``` == Physical Plan == LocalTableScan , [id#1L] ``` This change can also improve this case: https://github.com/apache/spark/blob/a78d6ce376edf2a8836e01f47b9dff5371058d4c/sql/core/src/test/resources/tpcds/q62.sql#L5-L22 ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30853 from wangyum/SPARK-33848. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/expressions/Expression.scala | 6 +++ .../sql/catalyst/optimizer/expressions.scala | 29 ++++++++++---- .../PushFoldableIntoBranchesSuite.scala | 39 ++++++++++++++++++- .../approved-plans-v1_4/q21.sf100/explain.txt | 12 +++--- .../q21.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q21/explain.txt | 12 +++--- .../approved-plans-v1_4/q21/simplified.txt | 2 +- .../approved-plans-v1_4/q50.sf100/explain.txt | 14 +++---- .../q50.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q50/explain.txt | 10 ++--- .../approved-plans-v1_4/q50/simplified.txt | 2 +- .../approved-plans-v1_4/q62.sf100/explain.txt | 10 ++--- .../q62.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q62/explain.txt | 10 ++--- .../approved-plans-v1_4/q62/simplified.txt | 2 +- .../approved-plans-v1_4/q97.sf100/explain.txt | 14 +++---- .../q97.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q97/explain.txt | 14 +++---- .../approved-plans-v1_4/q97/simplified.txt | 2 +- .../approved-plans-v1_4/q99.sf100/explain.txt | 10 ++--- .../q99.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q99/explain.txt | 10 ++--- .../approved-plans-v1_4/q99/simplified.txt | 2 +- 23 files changed, 133 insertions(+), 77 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 65f89bbdd0599..1d316bcf811d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -536,6 +536,12 @@ abstract class UnaryExpression extends Expression { } } + +object UnaryExpression { + def unapply(e: UnaryExpression): Option[Expression] = Some(e.child) +} + + /** * An expression with two inputs and one output. The output is by default evaluated to null * if any input is evaluated to null. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index ac2caaeb15357..47b968f6ebdd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -544,29 +544,42 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { + case a: Alias => a // Skip an alias. + case u @ UnaryExpression(i @ If(_, trueValue, falseValue)) + if atMostOneUnfoldable(Seq(trueValue, falseValue)) => + i.copy( + trueValue = u.withNewChildren(Array(trueValue)), + falseValue = u.withNewChildren(Array(falseValue))) + + case u @ UnaryExpression(c @ CaseWhen(branches, elseValue)) + if atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + c.copy( + branches.map(e => e.copy(_2 = u.withNewChildren(Array(e._2)))), + elseValue.map(e => u.withNewChildren(Array(e)))) + case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right) if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => i.copy( - trueValue = b.makeCopy(Array(trueValue, right)), - falseValue = b.makeCopy(Array(falseValue, right))) + trueValue = b.withNewChildren(Array(trueValue, right)), + falseValue = b.withNewChildren(Array(falseValue, right))) case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue)) if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => i.copy( - trueValue = b.makeCopy(Array(left, trueValue)), - falseValue = b.makeCopy(Array(left, falseValue))) + trueValue = b.withNewChildren(Array(left, trueValue)), + falseValue = b.withNewChildren(Array(left, falseValue))) case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right) if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => c.copy( - branches.map(e => e.copy(_2 = b.makeCopy(Array(e._2, right)))), - elseValue.map(e => b.makeCopy(Array(e, right)))) + branches.map(e => e.copy(_2 = b.withNewChildren(Array(e._2, right)))), + elseValue.map(e => b.withNewChildren(Array(e, right)))) case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue)) if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => c.copy( - branches.map(e => e.copy(_2 = b.makeCopy(Array(left, e._2)))), - elseValue.map(e => b.makeCopy(Array(left, e)))) + branches.map(e => e.copy(_2 = b.withNewChildren(Array(left, e._2)))), + elseValue.map(e => b.withNewChildren(Array(left, e)))) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala index de4f4be8ec333..02307a52ebb89 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.types.{BooleanType, IntegerType} +import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType} class PushFoldableIntoBranchesSuite @@ -221,4 +221,41 @@ class PushFoldableIntoBranchesSuite assertEquivalent(EqualTo(Literal(4), ifExp), FalseLiteral) assertEquivalent(EqualTo(Literal(4), caseWhen), FalseLiteral) } + + test("SPARK-33848: Push down cast through If/CaseWhen") { + assertEquivalent(If(a, Literal(2), Literal(3)).cast(StringType), + If(a, Literal("2"), Literal("3"))) + assertEquivalent(If(a, b, Literal(3)).cast(StringType), + If(a, b.cast(StringType), Literal("3"))) + assertEquivalent(If(a, b, b + 1).cast(StringType), + If(a, b, b + 1).cast(StringType)) + + assertEquivalent( + CaseWhen(Seq((a, Literal(1))), Some(Literal(3))).cast(StringType), + CaseWhen(Seq((a, Literal("1"))), Some(Literal("3")))) + assertEquivalent( + CaseWhen(Seq((a, Literal(1))), Some(b)).cast(StringType), + CaseWhen(Seq((a, Literal("1"))), Some(b.cast(StringType)))) + assertEquivalent( + CaseWhen(Seq((a, b)), Some(b + 1)).cast(StringType), + CaseWhen(Seq((a, b)), Some(b + 1)).cast(StringType)) + } + + test("SPARK-33848: Push down abs through If/CaseWhen") { + assertEquivalent(Abs(If(a, Literal(-2), Literal(-3))), If(a, Literal(2), Literal(3))) + assertEquivalent( + Abs(CaseWhen(Seq((a, Literal(-1))), Some(Literal(-3)))), + CaseWhen(Seq((a, Literal(1))), Some(Literal(3)))) + } + + test("SPARK-33848: Push down cast with binary expression through If/CaseWhen") { + assertEquivalent(EqualTo(If(a, Literal(2), Literal(3)).cast(StringType), Literal("4")), + FalseLiteral) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(1))), Some(Literal(3))).cast(StringType), Literal("4")), + FalseLiteral) + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None).cast(StringType), Literal("4")), + CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None)) + } } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt index 9de369f611d0e..094e7aac5cbbd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt @@ -130,24 +130,24 @@ Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6, d_date#10, (23) HashAggregate [codegen id : 4] Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#13, i_item_id#6, d_date#10] Keys [2]: [w_warehouse_name#13, i_item_id#6] -Functions [2]: [partial_sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Functions [2]: [partial_sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), partial_sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)] Aggregate Attributes [2]: [sum#15, sum#16] Results [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] (24) Exchange Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] -Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#6, 5), true, [id=#19] +Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#19] (25) HashAggregate [codegen id : 5] Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18] Keys [2]: [w_warehouse_name#13, i_item_id#6] -Functions [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] -Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] -Results [4]: [w_warehouse_name#13, i_item_id#6, sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] +Functions [2]: [sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20, sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21] +Results [4]: [w_warehouse_name#13, i_item_id#6, sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21 AS inv_after#23] (26) Filter [codegen id : 5] Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23] -Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) +Condition : (CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667) ELSE false END AND CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5) ELSE false END) (27) TakeOrderedAndProject Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt index 0ee47d05af65b..3da4f967ccbd3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt @@ -1,7 +1,7 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] WholeStageCodegen (5) Filter [inv_before,inv_after] - HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),sum(CASE WHEN (d_date >= 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),inv_before,inv_after,sum,sum] InputAdapter Exchange [w_warehouse_name,i_item_id] #1 WholeStageCodegen (4) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt index 788d1affde1b8..8edf52683fe7d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -130,24 +130,24 @@ Input [6]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id (23) HashAggregate [codegen id : 4] Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] Keys [2]: [w_warehouse_name#6, i_item_id#9] -Functions [2]: [partial_sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Functions [2]: [partial_sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), partial_sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)] Aggregate Attributes [2]: [sum#15, sum#16] Results [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] (24) Exchange Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] -Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), true, [id=#19] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), ENSURE_REQUIREMENTS, [id=#19] (25) HashAggregate [codegen id : 5] Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] Keys [2]: [w_warehouse_name#6, i_item_id#9] -Functions [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] -Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] -Results [4]: [w_warehouse_name#6, i_item_id#9, sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] +Functions [2]: [sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20, sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21] +Results [4]: [w_warehouse_name#6, i_item_id#9, sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21 AS inv_after#23] (26) Filter [codegen id : 5] Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] -Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) +Condition : (CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667) ELSE false END AND CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5) ELSE false END) (27) TakeOrderedAndProject Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt index 9b5483bd7191b..b9729a8c80968 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -1,7 +1,7 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] WholeStageCodegen (5) Filter [inv_before,inv_after] - HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),sum(CASE WHEN (d_date >= 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),inv_before,inv_after,sum,sum] InputAdapter Exchange [w_warehouse_name,i_item_id] #1 WholeStageCodegen (4) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt index 741ee50f800ec..69678ef86a0fc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt @@ -106,7 +106,7 @@ Input [16]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, s (16) Exchange Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#20] +Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), ENSURE_REQUIREMENTS, [id=#20] (17) Sort [codegen id : 4] Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] @@ -159,7 +159,7 @@ Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_ (28) Exchange Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] -Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), true, [id=#29] +Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#29] (29) Sort [codegen id : 7] Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24] @@ -177,20 +177,20 @@ Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number (32) HashAggregate [codegen id : 8] Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#30, sum#31, sum#32, sum#33, sum#34] Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] (33) Exchange Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] -Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 5), true, [id=#40] +Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 5), ENSURE_REQUIREMENTS, [id=#40] (34) HashAggregate [codegen id : 9] Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39] Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18] -Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45] -Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#41, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#42, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#43, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#44, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#45] +Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#41 AS 30 days #46, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#42 AS 31 - 60 days #47, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#43 AS 61 - 90 days #48, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#44 AS 91 - 120 days #49, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#45 AS >120 days #50] (35) TakeOrderedAndProject Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt index be11a69176810..02ab8c946fd31 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (9) - HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 WholeStageCodegen (8) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt index e083affa7261d..ecbd3ab5d3471 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -162,20 +162,20 @@ Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_compan (29) HashAggregate [codegen id : 5] Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#29, sum#30, sum#31, sum#32, sum#33] Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] (30) Exchange Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] -Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), true, [id=#39] +Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), ENSURE_REQUIREMENTS, [id=#39] (31) HashAggregate [codegen id : 6] Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44] -Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40 AS 30 days #45, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41 AS 31 - 60 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42 AS 61 - 90 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43 AS 91 - 120 days #48, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44 AS >120 days #49] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#40, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#41, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#42, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#43, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#44] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#40 AS 30 days #45, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#41 AS 31 - 60 days #46, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#42 AS 61 - 90 days #47, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#43 AS 91 - 120 days #48, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#44 AS >120 days #49] (32) TakeOrderedAndProject Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt index 43e7773855595..4ab50bf6c135d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (6) - HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 WholeStageCodegen (5) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt index b74dfb49c9f03..90e48794201c4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt @@ -162,20 +162,20 @@ Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10 (29) HashAggregate [codegen id : 5] Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10] Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] -Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] (30) Exchange Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, 5), true, [id=#29] +Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, 5), ENSURE_REQUIREMENTS, [id=#29] (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10] -Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt index 9b16b44792ca4..a2e1d28e1b911 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1 WholeStageCodegen (5) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt index 05ce467c349a3..b6c467d0e9863 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt @@ -162,20 +162,20 @@ Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10 (29) HashAggregate [codegen id : 5] Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] (30) Exchange Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), true, [id=#29] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] -Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt index 803326b2afd30..017ba3adcefe9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1 WholeStageCodegen (5) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt index e904ad94dd8fa..fadad48be3d6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -84,7 +84,7 @@ Results [2]: [ss_customer_sk#3, ss_item_sk#2] (12) Exchange Input [2]: [ss_customer_sk#3, ss_item_sk#2] -Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] +Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] (13) HashAggregate [codegen id : 3] Input [2]: [ss_customer_sk#3, ss_item_sk#2] @@ -132,7 +132,7 @@ Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] (22) Exchange Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] +Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#13] (23) HashAggregate [codegen id : 6] Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] @@ -157,18 +157,18 @@ Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] (27) HashAggregate [codegen id : 7] Input [2]: [customer_sk#8, customer_sk#14] Keys: [] -Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] Aggregate Attributes [3]: [sum#16, sum#17, sum#18] Results [3]: [sum#19, sum#20, sum#21] (28) Exchange Input [3]: [sum#19, sum#20, sum#21] -Arguments: SinglePartition, true, [id=#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] (29) HashAggregate [codegen id : 8] Input [3]: [sum#19, sum#20, sum#21] Keys: [] -Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] -Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt index c5921a11cd889..dc149c443c20f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt @@ -1,5 +1,5 @@ WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum] InputAdapter Exchange #1 WholeStageCodegen (7) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index e904ad94dd8fa..fadad48be3d6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -84,7 +84,7 @@ Results [2]: [ss_customer_sk#3, ss_item_sk#2] (12) Exchange Input [2]: [ss_customer_sk#3, ss_item_sk#2] -Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] +Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] (13) HashAggregate [codegen id : 3] Input [2]: [ss_customer_sk#3, ss_item_sk#2] @@ -132,7 +132,7 @@ Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] (22) Exchange Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] +Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#13] (23) HashAggregate [codegen id : 6] Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] @@ -157,18 +157,18 @@ Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] (27) HashAggregate [codegen id : 7] Input [2]: [customer_sk#8, customer_sk#14] Keys: [] -Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] Aggregate Attributes [3]: [sum#16, sum#17, sum#18] Results [3]: [sum#19, sum#20, sum#21] (28) Exchange Input [3]: [sum#19, sum#20, sum#21] -Arguments: SinglePartition, true, [id=#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] (29) HashAggregate [codegen id : 8] Input [3]: [sum#19, sum#20, sum#21] Keys: [] -Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] -Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index c5921a11cd889..dc149c443c20f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,5 +1,5 @@ WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum] InputAdapter Exchange #1 WholeStageCodegen (7) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt index 34eba382992c3..5d9c5794ae33b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt @@ -162,20 +162,20 @@ Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10, (29) HashAggregate [codegen id : 5] Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13] Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] (30) Exchange Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] +Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34] +Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt index b25b16136992c..3526a87fad82e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1 WholeStageCodegen (5) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt index 595cb2984ab75..b7dcf12fb7166 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -162,20 +162,20 @@ Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10 (29) HashAggregate [codegen id : 5] Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] (30) Exchange Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] (31) HashAggregate [codegen id : 6] Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] (32) TakeOrderedAndProject Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt index 9ebaaac52930a..79f7b4f13350d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -1,6 +1,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1 WholeStageCodegen (5) From 38bbccab7560f2cfd00f9f85ca800434efe950b4 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 21 Dec 2020 11:11:25 -0800 Subject: [PATCH 0837/1009] [SPARK-33869][PYTHON][SQL][TESTS] Have a separate metastore directory for each PySpark test job ### What changes were proposed in this pull request? This PR proposes to have its own metastore directory to avoid potential conflict in catalog operations. ### Why are the changes needed? To make PySpark tests less flaky. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested by trying some sleeps in https://github.com/apache/spark/pull/30873. Closes #30875 from HyukjinKwon/SPARK-33869. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/run-tests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/run-tests.py b/python/run-tests.py index 34800b0e9fa54..a13828d81f04f 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -83,12 +83,17 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) os.mkdir(tmp_dir) env["TMPDIR"] = tmp_dir + metastore_dir = os.path.join(tmp_dir, str(uuid.uuid4())) + while os.path.isdir(metastore_dir): + metastore_dir = os.path.join(metastore_dir, str(uuid.uuid4())) + os.mkdir(metastore_dir) # Also override the JVM's temp directory by setting driver and executor options. java_options = "-Djava.io.tmpdir={0} -Dio.netty.tryReflectionSetAccessible=true".format(tmp_dir) spark_args = [ "--conf", "spark.driver.extraJavaOptions='{0}'".format(java_options), "--conf", "spark.executor.extraJavaOptions='{0}'".format(java_options), + "--conf", "spark.sql.warehouse.dir='{0}'".format(metastore_dir), "pyspark-shell" ] env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args) From 4106731fdd508c1af6e15b4f9dc2bb139e047174 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 22 Dec 2020 06:27:27 +0900 Subject: [PATCH 0838/1009] [SPARK-33836][SS][PYTHON][FOLLOW-UP] Use test utils and clean up doctests in table and toTable ### What changes were proposed in this pull request? This PR proposes to: - Make doctests simpler to show the usage (since we're not running them now). - Use the test utils to drop the tables if exists. ### Why are the changes needed? Better docs and code readability. ### Does this PR introduce _any_ user-facing change? No, dev-only. It includes some doc changes in unreleased branches. ### How was this patch tested? Manually tested. ```bash cd python ./run-tests --python-executable=python3.9,python3.8 --testnames "pyspark.sql.tests.test_streaming StreamingTests" ``` Closes #30873 from HyukjinKwon/SPARK-33836. Authored-by: HyukjinKwon Signed-off-by: Jungtaek Lim --- python/pyspark/sql/streaming.py | 28 +++++---------- python/pyspark/sql/tests/test_streaming.py | 40 ++++++++++------------ 2 files changed, 28 insertions(+), 40 deletions(-) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 2c9c1f06274ce..5f122293f4a0a 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -974,9 +974,7 @@ def table(self, tableName): Examples -------- - >>> csv_sdf = spark.readStream.table('input_table') # doctest: +SKIP - >>> csv_sdf.isStreaming # doctest: +SKIP - True + >>> spark.readStream.table('input_table') # doctest: +SKIP """ if isinstance(tableName, str): return self._df(self._jreader.table(tableName)) @@ -1535,23 +1533,15 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que Examples -------- - >>> sq = sdf.writeStream.format('parquet').queryName('this_query').option( - ... 'checkpointLocation', '/tmp/checkpoint').toTable('output_table') # doctest: +SKIP - >>> sq.isActive # doctest: +SKIP - True - >>> sq.name # doctest: +SKIP - 'this_query' - >>> sq.stop() # doctest: +SKIP - >>> sq.isActive # doctest: +SKIP - False - >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').toTable( - ... 'output_table', queryName='that_query', outputMode="append", format='parquet', + >>> sdf.writeStream.format('parquet').queryName('query').toTable('output_table') + ... # doctest: +SKIP + + >>> sdf.writeStream.trigger(processingTime='5 seconds').toTable( + ... 'output_table', + ... queryName='that_query', + ... outputMode="append", + ... format='parquet', ... checkpointLocation='/tmp/checkpoint') # doctest: +SKIP - >>> sq.name # doctest: +SKIP - 'that_query' - >>> sq.isActive # doctest: +SKIP - True - >>> sq.stop() # doctest: +SKIP """ # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API self.options(**options) diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index 44bfb2a7447ca..44cdde0f2e8a9 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -19,7 +19,6 @@ import shutil import tempfile import time -from random import randint from pyspark.sql import Row from pyspark.sql.functions import lit @@ -572,28 +571,27 @@ def collectBatch(df, id): q.stop() def test_streaming_read_from_table(self): - input_table_name = "sample_input_table_%d" % randint(0, 100000000) - self.spark.sql("CREATE TABLE %s (value string) USING parquet" % input_table_name) - self.spark.sql("INSERT INTO %s VALUES ('aaa'), ('bbb'), ('ccc')" % input_table_name) - df = self.spark.readStream.table(input_table_name) - self.assertTrue(df.isStreaming) - q = df.writeStream.format('memory').queryName('this_query').start() - q.processAllAvailable() - q.stop() - result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect() - self.assertEqual([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')], result) + with self.table("input_table", "this_query"): + self.spark.sql("CREATE TABLE input_table (value string) USING parquet") + self.spark.sql("INSERT INTO input_table VALUES ('aaa'), ('bbb'), ('ccc')") + df = self.spark.readStream.table("input_table") + self.assertTrue(df.isStreaming) + q = df.writeStream.format('memory').queryName('this_query').start() + q.processAllAvailable() + q.stop() + result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect() + self.assertEqual( + set([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')]), set(result)) def test_streaming_write_to_table(self): - output_table_name = "sample_output_table_%d" % randint(0, 100000000) - tmpPath = tempfile.mkdtemp() - shutil.rmtree(tmpPath) - df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load() - q = df.writeStream.toTable(output_table_name, format='parquet', checkpointLocation=tmpPath) - self.assertTrue(q.isActive) - time.sleep(3) - q.stop() - result = self.spark.sql("SELECT value FROM %s" % output_table_name).collect() - self.assertTrue(len(result) > 0) + with self.table("output_table"), tempfile.TemporaryDirectory() as tmpdir: + df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load() + q = df.writeStream.toTable("output_table", format='parquet', checkpointLocation=tmpdir) + self.assertTrue(q.isActive) + time.sleep(3) + q.stop() + result = self.spark.sql("SELECT value FROM output_table").collect() + self.assertTrue(len(result) > 0) if __name__ == "__main__": From 0bf3828ac42ca994daa296a3ce20e511db568321 Mon Sep 17 00:00:00 2001 From: Kyle Krueger Date: Mon, 21 Dec 2020 14:17:09 -0800 Subject: [PATCH 0839/1009] [MINOR] update dstream.py with more accurate exceptions ### What changes were proposed in this pull request? Reopened from https://github.com/apache/spark/pull/27525. The exception messages for dstream.py when using windows were improved to be specific about what sliding duration is important. ### Why are the changes needed? The batch interval of dstreams are improperly named as sliding windows. The term sliding window is also used to reference the new window of a dstream collected over a window of rdds in a parent dstream. We should probably fix the naming convention of sliding window used in the dstream class, but for now more this more explicit exception message may reduce confusion. ### Does this PR introduce any user-facing change? No ### How was this patch tested? It wasn't since this is only a change of the exception message Closes #30871 from kykrueger/kykrueger-patch-1. Authored-by: Kyle Krueger Signed-off-by: Dongjoon Hyun --- python/pyspark/streaming/dstream.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index afb85709c771c..6ef164ae5a11a 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -421,10 +421,12 @@ def slice(self, begin, end): def _validate_window_param(self, window, slide): duration = self._jdstream.dstream().slideDuration().milliseconds() if int(window * 1000) % duration != 0: - raise ValueError("windowDuration must be multiple of the slide duration (%d ms)" + raise ValueError("windowDuration must be multiple of the parent " + "dstream's slide (batch) duration (%d ms)" % duration) if slide and int(slide * 1000) % duration != 0: - raise ValueError("slideDuration must be multiple of the slide duration (%d ms)" + raise ValueError("slideDuration must be multiple of the parent " + "dstream's slide (batch) duration (%d ms)" % duration) def window(self, windowDuration, slideDuration=None): From f62e957b31a281c542514c27da32ccda8e4bda46 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 21 Dec 2020 16:35:04 -0800 Subject: [PATCH 0840/1009] [SPARK-33873][CORE][TESTS] Test all compression codecs with encrypted spilling ### What changes were proposed in this pull request? This PR aims to test all compression codecs for encrypted spilling. ### Why are the changes needed? To improve test coverage. Currently, only `CompressionCodec.DEFAULT_COMPRESSION_CODEC` is under testing. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs with the updated test cases. Closes #30879 from dongjoon-hyun/SPARK-33873. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../util/collection/ExternalAppendOnlyMapSuite.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala index 83595ba22aa57..81a145906d33c 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala @@ -220,13 +220,13 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite testSimpleSpilling() } - test("spilling with compression") { + private def testSimpleSpillingForAllCodecs(encrypt: Boolean) { // Keep track of which compression codec we're using to report in test failure messages var lastCompressionCodec: Option[String] = None try { allCompressionCodecs.foreach { c => lastCompressionCodec = Some(c) - testSimpleSpilling(Some(c)) + testSimpleSpilling(Some(c), encrypt) } } catch { // Include compression codec used in test failure message @@ -241,8 +241,12 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite } } + test("spilling with compression") { + testSimpleSpillingForAllCodecs(encrypt = false) + } + test("spilling with compression and encryption") { - testSimpleSpilling(Some(CompressionCodec.DEFAULT_COMPRESSION_CODEC), encrypt = true) + testSimpleSpillingForAllCodecs(encrypt = true) } /** From 7466031632c5f1771cad3f3131bc1a3e52be173a Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 22 Dec 2020 11:37:59 +0900 Subject: [PATCH 0841/1009] [SPARK-32106][SQL] Implement script transform in sql/core ### What changes were proposed in this pull request? * Implement `SparkScriptTransformationExec` based on `BaseScriptTransformationExec` * Implement `SparkScriptTransformationWriterThread` based on `BaseScriptTransformationWriterThread` of writing data * Add rule `SparkScripts` to support convert script LogicalPlan to SparkPlan in Spark SQL (without hive mode) * Add `SparkScriptTransformationSuite` test spark spec case * add test in `SQLQueryTestSuite` And we will close #29085 . ### Why are the changes needed? Support user use Script Transform without Hive ### Does this PR introduce _any_ user-facing change? User can use Script Transformation without hive in no serde mode. Such as : **default no serde ** ``` SELECT TRANSFORM(a, b, c) USING 'cat' AS (a int, b string, c long) FROM testData ``` **no serde with spec ROW FORMAT DELIMITED** ``` SELECT TRANSFORM(a, b, c) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY '\u0002' MAP KEYS TERMINATED BY '\u0003' LINES TERMINATED BY '\n' NULL DEFINED AS 'null' USING 'cat' AS (a, b, c) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY '\u0004' MAP KEYS TERMINATED BY '\u0005' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' FROM testData ``` ### How was this patch tested? Added UT Closes #29414 from AngersZhuuuu/SPARK-32106-MINOR. Authored-by: angerszhu Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/parser/AstBuilder.scala | 52 ++- .../sql/catalyst/parser/PlanParserSuite.scala | 113 +++++- .../spark/sql/execution/SparkPlanner.scala | 1 + .../SparkScriptTransformationExec.scala | 91 +++++ .../spark/sql/execution/SparkSqlParser.scala | 115 +++--- .../spark/sql/execution/SparkStrategies.scala | 14 + .../resources/sql-tests/inputs/transform.sql | 195 ++++++++++ .../sql-tests/results/transform.sql.out | 357 ++++++++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 5 +- .../SparkScriptTransformationSuite.scala | 102 +++++ .../HiveScriptTransformationExec.scala | 2 + 11 files changed, 982 insertions(+), 65 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala create mode 100644 sql/core/src/test/resources/sql-tests/inputs/transform.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/transform.sql.out create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 9c265544f3227..2af84fa079d97 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -743,8 +743,33 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg selectClause.hints.asScala.foldRight(withWindow)(withHints) } + // Script Transform's input/output format. + type ScriptIOFormat = + (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String]) + + protected def getRowFormatDelimited(ctx: RowFormatDelimitedContext): ScriptIOFormat = { + // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema + // expects a seq of pairs in which the old parsers' token names are used as keys. + // Transforming the result of visitRowFormatDelimited would be quite a bit messier than + // retrieving the key value pairs ourselves. + val entries = entry("TOK_TABLEROWFORMATFIELD", ctx.fieldsTerminatedBy) ++ + entry("TOK_TABLEROWFORMATCOLLITEMS", ctx.collectionItemsTerminatedBy) ++ + entry("TOK_TABLEROWFORMATMAPKEYS", ctx.keysTerminatedBy) ++ + entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs) ++ + Option(ctx.linesSeparatedBy).toSeq.map { token => + val value = string(token) + validate( + value == "\n", + s"LINES TERMINATED BY only supports newline '\\n' right now: $value", + ctx) + "TOK_TABLEROWFORMATLINES" -> value + } + + (entries, None, Seq.empty, None) + } + /** - * Create a (Hive based) [[ScriptInputOutputSchema]]. + * Create a [[ScriptInputOutputSchema]]. */ protected def withScriptIOSchema( ctx: ParserRuleContext, @@ -753,7 +778,30 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg outRowFormat: RowFormatContext, recordReader: Token, schemaLess: Boolean): ScriptInputOutputSchema = { - throw new ParseException("Script Transform is not supported", ctx) + + def format(fmt: RowFormatContext): ScriptIOFormat = fmt match { + case c: RowFormatDelimitedContext => + getRowFormatDelimited(c) + + case c: RowFormatSerdeContext => + throw new ParseException("TRANSFORM with serde is only supported in hive mode", ctx) + + // SPARK-32106: When there is no definition about format, we return empty result + // to use a built-in default Serde in SparkScriptTransformationExec. + case null => + (Nil, None, Seq.empty, None) + } + + val (inFormat, inSerdeClass, inSerdeProps, reader) = format(inRowFormat) + + val (outFormat, outSerdeClass, outSerdeProps, writer) = format(outRowFormat) + + ScriptInputOutputSchema( + inFormat, outFormat, + inSerdeClass, outSerdeClass, + inSerdeProps, outSerdeProps, + reader, writer, + schemaLess) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 6fef18babedb6..54018198f619d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.IntegerType +import org.apache.spark.sql.types.{IntegerType, LongType, StringType} /** * Parser test cases for rules defined in [[CatalystSqlParser]] / [[AstBuilder]]. @@ -1031,4 +1031,115 @@ class PlanParserSuite extends AnalysisTest { assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b)) assertEqual("select a, b from db.c; ;; ;", table("db", "c").select('a, 'b)) } + + test("SPARK-32106: TRANSFORM plan") { + // verify schema less + assertEqual( + """ + |SELECT TRANSFORM(a, b, c) + |USING 'cat' + |FROM testData + """.stripMargin, + ScriptTransformation( + Seq('a, 'b, 'c), + "cat", + Seq(AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), + UnresolvedRelation(TableIdentifier("testData")), + ScriptInputOutputSchema(List.empty, List.empty, None, None, + List.empty, List.empty, None, None, true)) + ) + + // verify without output schema + assertEqual( + """ + |SELECT TRANSFORM(a, b, c) + |USING 'cat' AS (a, b, c) + |FROM testData + """.stripMargin, + ScriptTransformation( + Seq('a, 'b, 'c), + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + UnresolvedRelation(TableIdentifier("testData")), + ScriptInputOutputSchema(List.empty, List.empty, None, None, + List.empty, List.empty, None, None, false))) + + // verify with output schema + assertEqual( + """ + |SELECT TRANSFORM(a, b, c) + |USING 'cat' AS (a int, b string, c long) + |FROM testData + """.stripMargin, + ScriptTransformation( + Seq('a, 'b, 'c), + "cat", + Seq(AttributeReference("a", IntegerType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", LongType)()), + UnresolvedRelation(TableIdentifier("testData")), + ScriptInputOutputSchema(List.empty, List.empty, None, None, + List.empty, List.empty, None, None, false))) + + // verify with ROW FORMAT DELIMETED + assertEqual( + """ + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | COLLECTION ITEMS TERMINATED BY '\u0002' + | MAP KEYS TERMINATED BY '\u0003' + | LINES TERMINATED BY '\n' + | NULL DEFINED AS 'null' + | USING 'cat' AS (a, b, c) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | COLLECTION ITEMS TERMINATED BY '\u0004' + | MAP KEYS TERMINATED BY '\u0005' + | LINES TERMINATED BY '\n' + | NULL DEFINED AS 'NULL' + |FROM testData + """.stripMargin, + ScriptTransformation( + Seq('a, 'b, 'c), + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + UnresolvedRelation(TableIdentifier("testData")), + ScriptInputOutputSchema( + Seq(("TOK_TABLEROWFORMATFIELD", "\t"), + ("TOK_TABLEROWFORMATCOLLITEMS", "\u0002"), + ("TOK_TABLEROWFORMATMAPKEYS", "\u0003"), + ("TOK_TABLEROWFORMATNULL", "null"), + ("TOK_TABLEROWFORMATLINES", "\n")), + Seq(("TOK_TABLEROWFORMATFIELD", "\t"), + ("TOK_TABLEROWFORMATCOLLITEMS", "\u0004"), + ("TOK_TABLEROWFORMATMAPKEYS", "\u0005"), + ("TOK_TABLEROWFORMATNULL", "NULL"), + ("TOK_TABLEROWFORMATLINES", "\n")), None, None, + List.empty, List.empty, None, None, false))) + + // verify with ROW FORMAT SERDE + intercept( + """ + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' + | WITH SERDEPROPERTIES( + | "separatorChar" = "\t", + | "quoteChar" = "'", + | "escapeChar" = "\\") + | USING 'cat' AS (a, b, c) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' + | WITH SERDEPROPERTIES( + | "separatorChar" = "\t", + | "quoteChar" = "'", + | "escapeChar" = "\\") + |FROM testData + """.stripMargin, + "TRANSFORM with serde is only supported in hive mode") + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala index c88fcecc9983b..6994aaf47dfba 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala @@ -43,6 +43,7 @@ class SparkPlanner(val session: SparkSession, val experimentalMethods: Experimen Window :: JoinSelection :: InMemoryScans :: + SparkScripts :: BasicOperators :: Nil) /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala new file mode 100644 index 0000000000000..75c91667012a3 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.io._ + +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.TaskContext +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.types._ +import org.apache.spark.util.CircularBuffer + +/** + * Transforms the input by forking and running the specified script. + * + * @param input the set of expression that should be passed to the script. + * @param script the command that should be executed. + * @param output the attributes that are produced by the script. + * @param child logical plan whose output is transformed. + * @param ioschema the class set that defines how to handle input/output data. + */ +case class SparkScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema) + extends BaseScriptTransformationExec { + + override def processIterator( + inputIterator: Iterator[InternalRow], + hadoopConf: Configuration): Iterator[InternalRow] = { + + val (outputStream, proc, inputStream, stderrBuffer) = initProc + + val outputProjection = new InterpretedProjection(inputExpressionsWithoutSerde, child.output) + + // This new thread will consume the ScriptTransformation's input rows and write them to the + // external process. That process's output will be read by this current thread. + val writerThread = SparkScriptTransformationWriterThread( + inputIterator.map(outputProjection), + inputExpressionsWithoutSerde.map(_.dataType), + ioschema, + outputStream, + proc, + stderrBuffer, + TaskContext.get(), + hadoopConf + ) + + val outputIterator = + createOutputIteratorWithoutSerde(writerThread, inputStream, proc, stderrBuffer) + + writerThread.start() + + outputIterator + } +} + +case class SparkScriptTransformationWriterThread( + iter: Iterator[InternalRow], + inputSchema: Seq[DataType], + ioSchema: ScriptTransformationIOSchema, + outputStream: OutputStream, + proc: Process, + stderrBuffer: CircularBuffer, + taskContext: TaskContext, + conf: Configuration) + extends BaseScriptTransformationWriterThread { + + override def processRows(): Unit = { + processRowsWithoutSerde() + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 722ca6f992064..e530b4c9407a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution} +import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION /** * Concrete parser for Spark SQL statements. @@ -478,70 +479,62 @@ class SparkSqlAstBuilder extends AstBuilder { "Unsupported operation: Used defined record reader/writer classes.", ctx) } - // Decode and input/output format. - type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String]) - def format( - fmt: RowFormatContext, - configKey: String, - defaultConfigValue: String): Format = fmt match { - case c: RowFormatDelimitedContext => - // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - // expects a seq of pairs in which the old parsers' token names are used as keys. - // Transforming the result of visitRowFormatDelimited would be quite a bit messier than - // retrieving the key value pairs ourselves. - val entries = entry("TOK_TABLEROWFORMATFIELD", c.fieldsTerminatedBy) ++ - entry("TOK_TABLEROWFORMATCOLLITEMS", c.collectionItemsTerminatedBy) ++ - entry("TOK_TABLEROWFORMATMAPKEYS", c.keysTerminatedBy) ++ - entry("TOK_TABLEROWFORMATNULL", c.nullDefinedAs) ++ - Option(c.linesSeparatedBy).toSeq.map { token => - val value = string(token) - validate( - value == "\n", - s"LINES TERMINATED BY only supports newline '\\n' right now: $value", - c) - "TOK_TABLEROWFORMATLINES" -> value + if (!conf.getConf(CATALOG_IMPLEMENTATION).equals("hive")) { + super.withScriptIOSchema( + ctx, + inRowFormat, + recordWriter, + outRowFormat, + recordReader, + schemaLess) + } else { + def format( + fmt: RowFormatContext, + configKey: String, + defaultConfigValue: String): ScriptIOFormat = fmt match { + case c: RowFormatDelimitedContext => + getRowFormatDelimited(c) + + case c: RowFormatSerdeContext => + // Use a serde format. + val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c) + + // SPARK-10310: Special cases LazySimpleSerDe + val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { + Option(conf.getConfString(configKey, defaultConfigValue)) + } else { + None } + (Seq.empty, Option(name), props.toSeq, recordHandler) + + case null => + // Use default (serde) format. + val name = conf.getConfString("hive.script.serde", + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") + val props = Seq( + "field.delim" -> "\t", + "serialization.last.column.takes.rest" -> "true") + val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) + (Nil, Option(name), props, recordHandler) + } - (entries, None, Seq.empty, None) - - case c: RowFormatSerdeContext => - // Use a serde format. - val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c) - - // SPARK-10310: Special cases LazySimpleSerDe - val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { - Option(conf.getConfString(configKey, defaultConfigValue)) - } else { - None - } - (Seq.empty, Option(name), props.toSeq, recordHandler) - - case null => - // Use default (serde) format. - val name = conf.getConfString("hive.script.serde", - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - val props = Seq( - "field.delim" -> "\t", - "serialization.last.column.takes.rest" -> "true") - val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) - (Nil, Option(name), props, recordHandler) + val (inFormat, inSerdeClass, inSerdeProps, reader) = + format( + inRowFormat, "hive.script.recordreader", + "org.apache.hadoop.hive.ql.exec.TextRecordReader") + + val (outFormat, outSerdeClass, outSerdeProps, writer) = + format( + outRowFormat, "hive.script.recordwriter", + "org.apache.hadoop.hive.ql.exec.TextRecordWriter") + + ScriptInputOutputSchema( + inFormat, outFormat, + inSerdeClass, outSerdeClass, + inSerdeProps, outSerdeProps, + reader, writer, + schemaLess) } - - val (inFormat, inSerdeClass, inSerdeProps, reader) = - format( - inRowFormat, "hive.script.recordreader", "org.apache.hadoop.hive.ql.exec.TextRecordReader") - - val (outFormat, outSerdeClass, outSerdeProps, writer) = - format( - outRowFormat, "hive.script.recordwriter", - "org.apache.hadoop.hive.ql.exec.TextRecordWriter") - - ScriptInputOutputSchema( - inFormat, outFormat, - inSerdeClass, outSerdeClass, - inSerdeProps, outSerdeProps, - reader, writer, - schemaLess) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index f5f77b03c2b1b..a8d788f59d271 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -594,6 +594,20 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { } } + object SparkScripts extends Strategy { + def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { + case logical.ScriptTransformation(input, script, output, child, ioschema) => + SparkScriptTransformationExec( + input, + script, + output, + planLater(child), + ScriptTransformationIOSchema(ioschema) + ) :: Nil + case _ => Nil + } + } + object BasicOperators extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case d: DataWritingCommand => DataWritingCommandExec(d, planLater(d.query)) :: Nil diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql new file mode 100644 index 0000000000000..65b060eca3a62 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -0,0 +1,195 @@ +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES +('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')), +('2', false, unhex('537061726B2053514C'), tinyint(2), 2, smallint(200), bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), date('2000-04-02')), +('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), date('2000-04-03')) +AS t(a, b, c, d, e, f, g, h, i, j, k, l); + +SELECT TRANSFORM(a) +USING 'cat' AS (a) +FROM t; + +-- with non-exist command +SELECT TRANSFORM(a) +USING 'some_non_existent_command' AS (a) +FROM t; + +-- with non-exist file +SELECT TRANSFORM(a) +USING 'python some_non_existent_file' AS (a) +FROM t; + +-- common supported data types between no serde and serde transform +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + FROM t +) tmp; + +-- common supported data types between no serde and serde transform +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b string, + c string, + d string, + e string, + f string, + g string, + h string, + i string, + j string, + k string, + l string) + FROM t +) tmp; + +-- SPARK-32388 handle schema less +SELECT TRANSFORM(a) +USING 'cat' +FROM t; + +SELECT TRANSFORM(a, b) +USING 'cat' +FROM t; + +SELECT TRANSFORM(a, b, c) +USING 'cat' +FROM t; + +-- return null when return string incompatible (no serde) +SELECT TRANSFORM(a, b, c, d, e, f, g, h, i) +USING 'cat' AS (a int, b short, c long, d byte, e float, f double, g decimal(38, 18), h date, i timestamp) +FROM VALUES +('a','','1231a','a','213.21a','213.21a','0a.21d','2000-04-01123','1997-0102 00:00:') tmp(a, b, c, d, e, f, g, h, i); + +-- SPARK-28227: transform can't run with aggregation +SELECT TRANSFORM(b, max(a), sum(f)) +USING 'cat' AS (a, b) +FROM t +GROUP BY b; + +-- transform use MAP +MAP a, b USING 'cat' AS (a, b) FROM t; + +-- transform use REDUCE +REDUCE a, b USING 'cat' AS (a, b) FROM t; + +-- transform with defined row format delimit +SELECT TRANSFORM(a, b, c, null) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +USING 'cat' AS (a, b, c, d) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +FROM t; + +SELECT TRANSFORM(a, b, c, null) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +USING 'cat' AS (d) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +FROM t; + +-- transform with defined row format delimit handle schema with correct type +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + FROM t +) tmp; + +-- transform with defined row format delimit handle schema with wrong type +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b long, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k int, + l long) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + FROM t +) tmp; + +-- transform with defined row format delimit LINE TERMINATED BY only support '\n' +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b string, + c string, + d string, + e string, + f string, + g string, + h string, + i string, + j string, + k string, + l string) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + FROM t +) tmp; diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out new file mode 100644 index 0000000000000..83ab5cb729c24 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -0,0 +1,357 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 18 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES +('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')), +('2', false, unhex('537061726B2053514C'), tinyint(2), 2, smallint(200), bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), date('2000-04-02')), +('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), date('2000-04-03')) +AS t(a, b, c, d, e, f, g, h, i, j, k, l) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT TRANSFORM(a) +USING 'cat' AS (a) +FROM t +-- !query schema +struct +-- !query output +1 +2 +3 + + +-- !query +SELECT TRANSFORM(a) +USING 'some_non_existent_command' AS (a) +FROM t +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkException +Subprocess exited with status 127. Error: /bin/bash: some_non_existent_command: command not found + + +-- !query +SELECT TRANSFORM(a) +USING 'python some_non_existent_file' AS (a) +FROM t +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkException +Subprocess exited with status 2. Error: python: can't open file 'some_non_existent_file': [Errno 2] No such file or directory + + +-- !query +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + FROM t +) tmp +-- !query schema +struct +-- !query output +1 true Spark SQL 1 1 100 1 1.0 1.0 1.000000000000000000 1997-01-02 00:00:00 2000-04-01 +2 false Spark SQL 2 2 200 2 2.0 2.0 2.000000000000000000 1997-01-02 03:04:05 2000-04-02 +3 true Spark SQL 3 3 300 3 3.0 3.0 3.000000000000000000 1997-02-10 17:32:01 2000-04-03 + + +-- !query +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + USING 'cat' AS ( + a string, + b string, + c string, + d string, + e string, + f string, + g string, + h string, + i string, + j string, + k string, + l string) + FROM t +) tmp +-- !query schema +struct +-- !query output +1 true Spark SQL 1 1 100 1 1.0 1.0 1 1997-01-02 00:00:00 2000-04-01 +2 false Spark SQL 2 2 200 2 2.0 2.0 2 1997-01-02 03:04:05 2000-04-02 +3 true Spark SQL 3 3 300 3 3.0 3.0 3 1997-02-10 17:32:01 2000-04-03 + + +-- !query +SELECT TRANSFORM(a) +USING 'cat' +FROM t +-- !query schema +struct +-- !query output +1 NULL +2 NULL +3 NULL + + +-- !query +SELECT TRANSFORM(a, b) +USING 'cat' +FROM t +-- !query schema +struct +-- !query output +1 true +2 false +3 true + + +-- !query +SELECT TRANSFORM(a, b, c) +USING 'cat' +FROM t +-- !query schema +struct +-- !query output +1 true +2 false +3 true + + +-- !query +SELECT TRANSFORM(a, b, c, d, e, f, g, h, i) +USING 'cat' AS (a int, b short, c long, d byte, e float, f double, g decimal(38, 18), h date, i timestamp) +FROM VALUES +('a','','1231a','a','213.21a','213.21a','0a.21d','2000-04-01123','1997-0102 00:00:') tmp(a, b, c, d, e, f, g, h, i) +-- !query schema +struct +-- !query output +NULL NULL NULL NULL NULL NULL NULL NULL NULL + + +-- !query +SELECT TRANSFORM(b, max(a), sum(f)) +USING 'cat' AS (a, b) +FROM t +GROUP BY b +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input 'GROUP' expecting {, ';'}(line 4, pos 0) + +== SQL == +SELECT TRANSFORM(b, max(a), sum(f)) +USING 'cat' AS (a, b) +FROM t +GROUP BY b +^^^ + + +-- !query +MAP a, b USING 'cat' AS (a, b) FROM t +-- !query schema +struct +-- !query output +1 true +2 false +3 true + + +-- !query +REDUCE a, b USING 'cat' AS (a, b) FROM t +-- !query schema +struct +-- !query output +1 true +2 false +3 true + + +-- !query +SELECT TRANSFORM(a, b, c, null) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +USING 'cat' AS (a, b, c, d) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +FROM t +-- !query schema +struct +-- !query output +1 true Spark SQL null +2 false Spark SQL null +3 true Spark SQL null + + +-- !query +SELECT TRANSFORM(a, b, c, null) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +USING 'cat' AS (d) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '@' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' +FROM t +-- !query schema +struct +-- !query output +1 +2 +3 + + +-- !query +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b boolean, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k timestamp, + l date) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + FROM t +) tmp +-- !query schema +struct +-- !query output +1 true Spark SQL 1 1 100 1 1.0 1.0 1.000000000000000000 1997-01-02 00:00:00 2000-04-01 +2 false Spark SQL 2 2 200 2 2.0 2.0 2.000000000000000000 1997-01-02 03:04:05 2000-04-02 +3 true Spark SQL 3 3 300 3 3.0 3.0 3.000000000000000000 1997-02-10 17:32:01 2000-04-03 + + +-- !query +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b long, + c binary, + d tinyint, + e int, + f smallint, + g long, + h float, + i double, + j decimal(38, 18), + k int, + l long) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + NULL DEFINED AS 'NULL' + FROM t +) tmp +-- !query schema +struct +-- !query output +1 NULL Spark SQL 1 1 100 1 1.0 1.0 1.000000000000000000 NULL NULL +2 NULL Spark SQL 2 2 200 2 2.0 2.0 2.000000000000000000 NULL NULL +3 NULL Spark SQL 3 3 300 3 3.0 3.0 3.000000000000000000 NULL NULL + + +-- !query +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b string, + c string, + d string, + e string, + f string, + g string, + h string, + i string, + j string, + k string, + l string) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + FROM t +) tmp +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +LINES TERMINATED BY only supports newline '\n' right now: @(line 3, pos 4) + +== SQL == +SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( + SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) + ROW FORMAT DELIMITED +----^^^ + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + USING 'cat' AS ( + a string, + b string, + c string, + d string, + e string, + f string, + g string, + h string, + i string, + j string, + k string, + l string) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '@' + NULL DEFINED AS 'NULL' + FROM t +) tmp diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 02c6fba9725d3..eb2caa61e1590 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -24,7 +24,7 @@ import java.util.Locale import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.{SparkConf, SparkException, TestUtils} import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.SQLHelper @@ -260,6 +260,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") } + // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command, + // here we need to check command available + assume(TestUtils.testCommandAvailable("/bin/bash")) val input = fileToString(new File(testCase.inputFile)) val (comments, code) = splitCommentsAndCodes(input) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala new file mode 100644 index 0000000000000..6ff7c5d6d2f3a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.{SparkException, TestUtils} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.test.SharedSparkSession + +class SparkScriptTransformationSuite extends BaseScriptTransformationSuite with SharedSparkSession { + import testImplicits._ + + override def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec = { + SparkScriptTransformationExec( + input = input, + script = script, + output = output, + child = child, + ioschema = ioschema + ) + } + + test("SPARK-32106: TRANSFORM with serde without hive should throw exception") { + assume(TestUtils.testCommandAvailable("/bin/bash")) + withTempView("v") { + val df = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") + df.createTempView("v") + + val e = intercept[ParseException] { + sql( + """ + |SELECT TRANSFORM (a) + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |USING 'cat' AS (a) + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + |FROM v + """.stripMargin) + }.getMessage + assert(e.contains("TRANSFORM with serde is only supported in hive mode")) + } + } + + test("SPARK-32106: TRANSFORM doesn't support ArrayType/MapType/StructType " + + "as output data type (no serde)") { + assume(TestUtils.testCommandAvailable("/bin/bash")) + // check for ArrayType + val e1 = intercept[SparkException] { + sql( + """ + |SELECT TRANSFORM(a) + |USING 'cat' AS (a array) + |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c) + """.stripMargin).collect() + }.getMessage + assert(e1.contains("SparkScriptTransformation without serde does not support" + + " ArrayType as output data type")) + + // check for MapType + val e2 = intercept[SparkException] { + sql( + """ + |SELECT TRANSFORM(b) + |USING 'cat' AS (b map) + |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c) + """.stripMargin).collect() + }.getMessage + assert(e2.contains("SparkScriptTransformation without serde does not support" + + " MapType as output data type")) + + // check for StructType + val e3 = intercept[SparkException] { + sql( + """ + |SELECT TRANSFORM(c) + |USING 'cat' AS (c struct) + |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c) + """.stripMargin).collect() + }.getMessage + assert(e3.contains("SparkScriptTransformation without serde does not support" + + " StructType as output data type")) + } +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala index 26baff3d83eec..4b03cff5e8c8e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala @@ -45,6 +45,8 @@ import org.apache.spark.util.{CircularBuffer, Utils} * @param input the set of expression that should be passed to the script. * @param script the command that should be executed. * @param output the attributes that are produced by the script. + * @param child logical plan whose output is transformed. + * @param ioschema the class set that defines how to handle input/output data. */ case class HiveScriptTransformationExec( input: Seq[Expression], From f5fd10b1bc519cc05c98f5235fda3d59155cda9d Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Dec 2020 03:07:26 +0000 Subject: [PATCH 0842/1009] [SPARK-33834][SQL] Verify ALTER TABLE CHANGE COLUMN with Char and Varchar ### What changes were proposed in this pull request? Verify ALTER TABLE CHANGE COLUMN with Char and Varchar and avoid unexpected change For v1 table, changing type is not allowed, we fix a regression that uses the replaced string instead of the original char/varchar type when altering char/varchar columns For v2 table, char/varchar to string, char(x) to char(x), char(x)/varchar(x) to varchar(y) if x <=y are valid cases, other changes are invalid ### Why are the changes needed? Verify ALTER TABLE CHANGE COLUMN with Char and Varchar and avoid unexpected change ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #30833 from yaooqinn/SPARK-33834. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 3 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 18 +- .../sql/catalyst/catalog/SessionCatalog.scala | 18 +- .../spark/sql/execution/command/ddl.scala | 2 +- .../command/CharVarcharDDLTestBase.scala | 159 ++++++++++++++++++ .../spark/sql/HiveCharVarcharTestSuite.scala | 24 +++ 6 files changed, 216 insertions(+), 8 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 8d8e00b80c506..ba24914cb6835 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3481,7 +3481,8 @@ class Analyzer(override val catalogManager: CatalogManager) Some(typeChange) } else { val (fieldNames, field) = fieldOpt.get - if (field.dataType == typeChange.newDataType()) { + val dt = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType) + if (dt == typeChange.newDataType()) { // The user didn't want the field to change, so remove this change None } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 472de096b2f22..a4dfbe85abfd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -523,7 +523,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { TypeUtils.failWithIntervalType(add.dataType()) colsToAdd(parentName) = fieldsAdded :+ add.fieldNames().last case update: UpdateColumnType => - val field = findField("update", update.fieldNames) + val field = { + val f = findField("update", update.fieldNames) + CharVarcharUtils.getRawType(f.metadata) + .map(dt => f.copy(dataType = dt)) + .getOrElse(f) + } val fieldName = update.fieldNames.quoted update.newDataType match { case _: StructType => @@ -544,7 +549,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case _ => // update is okay } - if (!Cast.canUpCast(field.dataType, update.newDataType)) { + + // We don't need to handle nested types here which shall fail before + def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match { + case (CharType(l1), CharType(l2)) => l1 == l2 + case (CharType(l1), VarcharType(l2)) => l1 <= l2 + case (VarcharType(l1), VarcharType(l2)) => l1 <= l2 + case _ => Cast.canUpCast(from, to) + } + + if (!canAlterColumnType(field.dataType, update.newDataType)) { alter.failAnalysis( s"Cannot update ${table.name} field $fieldName: " + s"${field.dataType.simpleString} cannot be cast to " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 9814f4b3aa75b..9b542d6bd95ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -470,18 +470,28 @@ class SessionCatalog( /** * Retrieve the metadata of an existing permanent table/view. If no database is specified, * assume the table/view is in the current database. + * We replace char/varchar with "annotated" string type in the table schema, as the query + * engine doesn't support char/varchar yet. */ @throws[NoSuchDatabaseException] @throws[NoSuchTableException] def getTableMetadata(name: TableIdentifier): CatalogTable = { + val t = getTableRawMetadata(name) + t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema)) + } + + /** + * Retrieve the metadata of an existing permanent table/view. If no database is specified, + * assume the table/view is in the current database. + */ + @throws[NoSuchDatabaseException] + @throws[NoSuchTableException] + def getTableRawMetadata(name: TableIdentifier): CatalogTable = { val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase)) val table = formatTableName(name.table) requireDbExists(db) requireTableExists(TableIdentifier(table, Some(db))) - val t = externalCatalog.getTable(db, table) - // We replace char/varchar with "annotated" string type in the table schema, as the query - // engine doesn't support char/varchar yet. - t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema)) + externalCatalog.getTable(db, table) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 604de860f04c0..9300e25b8650e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -342,7 +342,7 @@ case class AlterTableChangeColumnCommand( // TODO: support change column name/dataType/metadata/position. override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog - val table = catalog.getTableMetadata(tableName) + val table = catalog.getTableRawMetadata(tableName) val resolver = sparkSession.sessionState.conf.resolver DDLUtils.verifyAlterTableType(catalog, table, isView = false) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala new file mode 100644 index 0000000000000..748dd7ee10c34 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types._ + +trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils { + + def format: String + + def checkColType(f: StructField, dt: DataType): Unit = { + assert(f.dataType == CharVarcharUtils.replaceCharVarcharWithString(dt)) + assert(CharVarcharUtils.getRawType(f.metadata).contains(dt)) + } + + test("allow to change column for char(x) to char(y), x == y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(4)") + checkColType(spark.table("t").schema(1), CharType(4)) + } + } + + test("not allow to change column for char(x) to char(y), x != y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + val e = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(5)") + } + val v1 = e.getMessage contains "'CharType(4)' to 'c' with type 'CharType(5)'" + val v2 = e.getMessage contains "char(4) cannot be cast to char(5)" + assert(v1 || v2) + } + } + + test("not allow to change column from string to char type") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c STRING) USING $format") + val e = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(5)") + } + val v1 = e.getMessage contains "'StringType' to 'c' with type 'CharType(5)'" + val v2 = e.getMessage contains "string cannot be cast to char(5)" + assert(v1 || v2) + } + } + + test("not allow to change column from int to char type") { + withTable("t") { + sql(s"CREATE TABLE t(i int, c CHAR(4)) USING $format") + val e = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN i TYPE CHAR(5)") + } + val v1 = e.getMessage contains "'IntegerType' to 'i' with type 'CharType(5)'" + val v2 = e.getMessage contains "int cannot be cast to char(5)" + assert(v1 || v2) + } + } + + test("allow to change column for varchar(x) to varchar(y), x == y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)") + checkColType(spark.table("t").schema(1), VarcharType(4)) + } + } + + test("not allow to change column for varchar(x) to varchar(y), x > y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format") + val e = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(3)") + } + val v1 = e.getMessage contains "'VarcharType(4)' to 'c' with type 'VarcharType(3)'" + val v2 = e.getMessage contains "varchar(4) cannot be cast to varchar(3)" + assert(v1 || v2) + } + } +} + +class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with SharedSparkSession { + override def format: String = "parquet" + override protected def sparkConf: SparkConf = { + super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet") + } +} + +class DSV2CharVarcharDDLTestSuite extends CharVarcharDDLTestBase + with SharedSparkSession { + override def format: String = "foo" + protected override def sparkConf = { + super.sparkConf + .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName) + .set(SQLConf.DEFAULT_CATALOG.key, "testcat") + } + + test("allow to change change column from char to string type") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE STRING") + assert(spark.table("t").schema(1).dataType === StringType) + } + } + + test("allow to change column from char(x) to varchar(y) type x <= y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)") + checkColType(spark.table("t").schema(1), VarcharType(4)) + } + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)") + checkColType(spark.table("t").schema(1), VarcharType(5)) + } + } + + test("allow to change column from varchar(x) to varchar(y) type x <= y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format") + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)") + checkColType(spark.table("t").schema(1), VarcharType(4)) + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)") + checkColType(spark.table("t").schema(1), VarcharType(5)) + + } + } + + test("not allow to change column from char(x) to varchar(y) type x > y") { + withTable("t") { + sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format") + val e = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(3)") + } + assert(e.getMessage contains "char(4) cannot be cast to varchar(3)") + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala index 55d305fda4f96..f48cfb8dfb899 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import org.apache.spark.sql.execution.command.CharVarcharDDLTestBase import org.apache.spark.sql.hive.test.TestHiveSingleton class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSingleton { @@ -41,3 +42,26 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet super.afterAll() } } + +class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton { + + // The default Hive serde doesn't support nested null values. + override def format: String = "hive OPTIONS(fileFormat='parquet')" + + private var originalPartitionMode = "" + + override protected def beforeAll(): Unit = { + super.beforeAll() + originalPartitionMode = spark.conf.get("hive.exec.dynamic.partition.mode", "") + spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") + } + + override protected def afterAll(): Unit = { + if (originalPartitionMode == "") { + spark.conf.unset("hive.exec.dynamic.partition.mode") + } else { + spark.conf.set("hive.exec.dynamic.partition.mode", originalPartitionMode) + } + super.afterAll() + } +} From 16ae3a5c12f1bbd6c9f5f735bfad0cf51fdf2182 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 21 Dec 2020 19:48:58 -0800 Subject: [PATCH 0843/1009] [MINOR][CORE] Remove unused variable CompressionCodec.DEFAULT_COMPRESSION_CODEC ### What changes were proposed in this pull request? This PR removed an unused variable `CompressionCodec.DEFAULT_COMPRESSION_CODEC`. ### Why are the changes needed? Apache Spark 3.0.0 centralized this default value to `IO_COMPRESSION_CODEC.defaultValue` via [SPARK-26462](https://github.com/apache/spark/pull/23447). We had better remove this variable to avoid any potential confusion in the future. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI compilation. Closes #30880 from dongjoon-hyun/minor. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/io/CompressionCodec.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 5205a2d568ac3..fa663a32d4929 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -107,7 +107,6 @@ private[spark] object CompressionCodec { } val FALLBACK_COMPRESSION_CODEC = "snappy" - val DEFAULT_COMPRESSION_CODEC = "lz4" val ALL_COMPRESSION_CODECS = shortCompressionCodecNames.values.toSeq } From b88745565b96ba1f9ec55b369a4aefab77684981 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 21 Dec 2020 20:24:23 -0800 Subject: [PATCH 0844/1009] [SPARK-33700][SQL] Avoid file meta reading when enableFilterPushDown is true and filters is empty for Orc ### What changes were proposed in this pull request? Orc support filter push down optimization, but this optimization will read file meta from external storage even if filters is empty. This pr add a extra `filters.nonEmpty` when `spark.sql.orc.filterPushdown` is true ### Why are the changes needed? Orc filters push down operation should only triggered when `filters.nonEmpty` is true ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30663 from LuciferYang/pushdownfilter-when-filter-nonempty. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/datasources/orc/OrcFileFormat.scala | 2 +- .../datasources/v2/orc/OrcPartitionReaderFactory.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala index 2671682e18f31..83504d8c4458a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala @@ -184,7 +184,7 @@ class OrcFileFormat Iterator.empty } else { // ORC predicate pushdown - if (orcFilterPushDown) { + if (orcFilterPushDown && filters.nonEmpty) { OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala index b0ddee0a6b336..6f9a3ae4c67fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala @@ -68,7 +68,7 @@ case class OrcPartitionReaderFactory( } private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = { - if (orcFilterPushDown) { + if (orcFilterPushDown && filters.nonEmpty) { OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema => OrcFilters.createFilter(fileSchema, filters).foreach { f => OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames) From 1dd63dccd893162f8ef969e42273a794ad73e49c Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 22 Dec 2020 15:10:46 +0900 Subject: [PATCH 0845/1009] [SPARK-33860][SQL] Make CatalystTypeConverters.convertToCatalyst match special Array value ### What changes were proposed in this pull request? Add some case to match Array whose element type is primitive. ### Why are the changes needed? We will get exception when use `Literal.create(Array(1, 2, 3), ArrayType(IntegerType))` . ``` Exception in thread "main" java.lang.IllegalArgumentException: requirement failed: Literal must have a corresponding value to array, but class int[] found. at scala.Predef$.require(Predef.scala:281) at org.apache.spark.sql.catalyst.expressions.Literal$.validateLiteralValue(literals.scala:215) at org.apache.spark.sql.catalyst.expressions.Literal.(literals.scala:292) at org.apache.spark.sql.catalyst.expressions.Literal$.create(literals.scala:140) ``` And same problem with other array whose element is primitive. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Add test. Closes #30868 from ulysses-you/SPARK-33860. Authored-by: ulysses-you Signed-off-by: HyukjinKwon --- .../spark/sql/catalyst/CatalystTypeConverters.scala | 4 +++- .../catalyst/expressions/LiteralExpressionSuite.scala | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index 971d61518c026..907b5877b3ac0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -457,7 +457,9 @@ object CatalystTypeConverters { case d: JavaBigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d) case seq: Seq[Any] => new GenericArrayData(seq.map(convertToCatalyst).toArray) case r: Row => InternalRow(r.toSeq.map(convertToCatalyst): _*) - case arr: Array[Any] => new GenericArrayData(arr.map(convertToCatalyst)) + case arr: Array[Byte] => arr + case arr: Array[Char] => StringConverter.toCatalyst(arr) + case arr: Array[_] => new GenericArrayData(arr.map(convertToCatalyst)) case map: Map[_, _] => ArrayBasedMapData( map, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index 7a482641def3d..1440f1e3a0668 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -326,4 +326,15 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { assert(literalStr === expected) } } + + test("SPARK-33860: Make CatalystTypeConverters.convertToCatalyst match special Array value") { + assert(Literal(Array(1, 2, 3)) == Literal.create(Array(1, 2, 3), ArrayType(IntegerType))) + assert(Literal(Array(1L, 2L, 3L)) == Literal.create(Array(1L, 2L, 3L), ArrayType(LongType))) + assert(Literal(Array(1D, 2D, 3D)) == Literal.create(Array(1D, 2D, 3D), ArrayType(DoubleType))) + assert(Literal("123") == Literal.create(Array('1', '2', '3'), StringType)) + assert(Literal(Array(1.toByte, 2.toByte, 3.toByte)) == + Literal.create(Array(1.toByte, 2.toByte, 3.toByte), BinaryType)) + assert(Literal(Array("1", "2", "3")) == + Literal.create(Array("1", "2", "3"), ArrayType(StringType))) + } } From 2562183987684c94f1ef5552495c342a10e2ed3d Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 22 Dec 2020 08:23:56 +0000 Subject: [PATCH 0846/1009] [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer ### What changes were proposed in this pull request? This PR adds logic to build logical writes introduced in SPARK-33779. Note: This PR contains a subset of changes discussed in PR #29066. ### Why are the changes needed? These changes are the next step as discussed in the [design doc](https://docs.google.com/document/d/1X0NsQSryvNmXBY9kcvfINeYyKC-AahZarUqg3nS1GQs/edit#) for SPARK-23889. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #30806 from aokolnychyi/spark-33808. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan --- project/MimaExcludes.scala | 2 + .../connector/catalog/TableCapability.java | 2 +- .../catalyst/plans/logical/v2Commands.scala | 10 +- .../spark/sql/connector/write/V1Write.java | 33 +++++++ .../sql/connector/write/V1WriteBuilder.java | 45 --------- .../spark/sql/execution/SparkOptimizer.scala | 7 +- .../datasources/v2/DataSourceV2Strategy.scala | 56 ++++++----- .../datasources/v2/TableCapabilityCheck.scala | 6 +- .../datasources/v2/V1FallbackWriters.scala | 66 +++---------- .../execution/datasources/v2/V2Writes.scala | 95 +++++++++++++++++++ .../v2/WriteToDataSourceV2Exec.scala | 72 +++++--------- .../v2/jdbc/JDBCWriteBuilder.scala | 6 +- .../sql/connector/V1WriteFallbackSuite.scala | 12 +-- .../command/PlanResolutionSuite.scala | 2 +- 14 files changed, 223 insertions(+), 191 deletions(-) create mode 100644 sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 33e65c9def41b..ba879c03795d1 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,8 @@ object MimaExcludes { // Exclude rules for 3.2.x lazy val v32excludes = v31excludes ++ Seq( + // [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder") ) // Exclude rules for 3.1.x diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java index 68161d7225fcf..5bb42fb4b313d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java @@ -96,7 +96,7 @@ public enum TableCapability { /** * Signals that the table supports append writes using the V1 InsertableRelation interface. *

      - * Tables that return this capability must create a V1WriteBuilder and may also support additional + * Tables that return this capability must create a V1Write and may also support additional * write modes, like {@link #TRUNCATE}, and {@link #OVERWRITE_BY_FILTER}, but cannot support * {@link #OVERWRITE_DYNAMIC}. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index b3b538ac8b327..02fb3a86db5d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange} import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.connector.write.Write import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType} /** @@ -65,7 +66,8 @@ case class AppendData( table: NamedRelation, query: LogicalPlan, writeOptions: Map[String, String], - isByName: Boolean) extends V2WriteCommand { + isByName: Boolean, + write: Option[Write] = None) extends V2WriteCommand { override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery) override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable) } @@ -94,7 +96,8 @@ case class OverwriteByExpression( deleteExpr: Expression, query: LogicalPlan, writeOptions: Map[String, String], - isByName: Boolean) extends V2WriteCommand { + isByName: Boolean, + write: Option[Write] = None) extends V2WriteCommand { override lazy val resolved: Boolean = { table.resolved && query.resolved && outputResolved && deleteExpr.resolved } @@ -132,7 +135,8 @@ case class OverwritePartitionsDynamic( table: NamedRelation, query: LogicalPlan, writeOptions: Map[String, String], - isByName: Boolean) extends V2WriteCommand { + isByName: Boolean, + write: Option[Write] = None) extends V2WriteCommand { override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = { copy(query = newQuery) } diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java new file mode 100644 index 0000000000000..a299967ee8bcf --- /dev/null +++ b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Unstable; +import org.apache.spark.sql.connector.catalog.TableCapability; +import org.apache.spark.sql.sources.InsertableRelation; + +/** + * A logical write that should be executed using V1 InsertableRelation interface. + *

      + * Tables that have {@link TableCapability#V1_BATCH_WRITE} in the list of their capabilities + * must build {@link V1Write}. + */ +@Unstable +public interface V1Write extends Write { + InsertableRelation toInsertableRelation(); +} diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java deleted file mode 100644 index 89b567b5231ac..0000000000000 --- a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.write; - -import org.apache.spark.annotation.Unstable; -import org.apache.spark.sql.sources.InsertableRelation; - -/** - * A trait that should be implemented by V1 DataSources that would like to leverage the DataSource - * V2 write code paths. The InsertableRelation will be used only to Append data. Other - * instances of the [[WriteBuilder]] interface such as [[SupportsOverwrite]], [[SupportsTruncate]] - * should be extended as well to support additional operations other than data appends. - * - * This interface is designed to provide Spark DataSources time to migrate to DataSource V2 and - * will be removed in a future Spark release. - * - * @since 3.0.0 - */ -@Unstable -public interface V1WriteBuilder extends WriteBuilder { - /** - * Creates an InsertableRelation that allows appending a DataFrame to a - * a destination (using data source-specific parameters). The insert method will only be - * called with `overwrite=false`. The DataSource should implement the overwrite behavior as - * part of the [[SupportsOverwrite]], and [[SupportsTruncate]] interfaces. - * - * @since 3.0.0 - */ - InsertableRelation buildForV1Write(); -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 33b86a2b5340c..dde5dc2be0556 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.datasources.SchemaPruning -import org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown +import org.apache.spark.sql.execution.datasources.v2.{V2ScanRelationPushDown, V2Writes} import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning} import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs} @@ -37,7 +37,7 @@ class SparkOptimizer( override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = // TODO: move SchemaPruning into catalyst - SchemaPruning :: V2ScanRelationPushDown :: PruneFileSourcePartitions :: Nil + SchemaPruning :: V2ScanRelationPushDown :: V2Writes :: PruneFileSourcePartitions :: Nil override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ @@ -70,7 +70,8 @@ class SparkOptimizer( ExtractPythonUDFFromJoinCondition.ruleName :+ ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+ ExtractPythonUDFs.ruleName :+ - V2ScanRelationPushDown.ruleName + V2ScanRelationPushDown.ruleName :+ + V2Writes.ruleName /** * Optimization batches that are executed before the regular optimization batches (also before diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 635117a9932ac..0c92945dc6ca5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -24,8 +24,9 @@ import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartit import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCapability, TableCatalog, TableChange} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} +import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} @@ -195,33 +196,42 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat orCreate = orCreate) :: Nil } - case AppendData(r: DataSourceV2Relation, query, writeOptions, _) => - r.table.asWritable match { - case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r)) :: Nil - case v2 => - AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil + case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), query, writeOptions, + _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) => + write match { + case v1Write: V1Write => + AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r), v1Write) :: Nil + case v2Write => + throw new AnalysisException( + s"Table ${v1.name} declares ${TableCapability.V1_BATCH_WRITE} capability but " + + s"${v2Write.getClass.getName} is not an instance of ${classOf[V1Write].getName}") } - case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) => - // fail if any filter cannot be converted. correctness depends on removing all matching data. - val filters = splitConjunctivePredicates(deleteExpr).map { - filter => DataSourceStrategy.translateFilter(deleteExpr, - supportNestedPredicatePushdown = true).getOrElse( - throw new AnalysisException(s"Cannot translate expression to source filter: $filter")) - }.toArray - r.table.asWritable match { - case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) => - OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, - query, refreshCache(r)) :: Nil - case v2 => - OverwriteByExpressionExec(v2, filters, - writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil + case AppendData(r @ DataSourceV2Relation(v2: SupportsWrite, _, _, _, _), query, writeOptions, + _, Some(write)) => + AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r), write) :: Nil + + case OverwriteByExpression(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), _, query, + writeOptions, _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) => + write match { + case v1Write: V1Write => + OverwriteByExpressionExecV1( + v1, writeOptions.asOptions, query, refreshCache(r), v1Write) :: Nil + case v2Write => + throw new AnalysisException( + s"Table ${v1.name} declares ${TableCapability.V1_BATCH_WRITE} capability but " + + s"${v2Write.getClass.getName} is not an instance of ${classOf[V1Write].getName}") } - case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) => + case OverwriteByExpression(r @ DataSourceV2Relation(v2: SupportsWrite, _, _, _, _), _, query, + writeOptions, _, Some(write)) => + OverwriteByExpressionExec( + v2, writeOptions.asOptions, planLater(query), refreshCache(r), write) :: Nil + + case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _, Some(write)) => OverwritePartitionsDynamicExec( - r.table.asWritable, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil + r.table.asWritable, writeOptions.asOptions, planLater(query), + refreshCache(r), write) :: Nil case DeleteFromTable(relation, condition) => relation match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala index cb4a2994de1f4..f697aba46d0df 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala @@ -49,14 +49,14 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) { // TODO: check STREAMING_WRITE capability. It's not doable now because we don't have a // a logical plan for streaming write. - case AppendData(r: DataSourceV2Relation, _, _, _) if !supportsBatchWrite(r.table) => + case AppendData(r: DataSourceV2Relation, _, _, _, _) if !supportsBatchWrite(r.table) => failAnalysis(s"Table ${r.table.name()} does not support append in batch mode.") - case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _) + case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _, _) if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) => failAnalysis(s"Table ${r.table.name()} does not support dynamic overwrite in batch mode.") - case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _) => + case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _, _) => expr match { case Literal(true, BooleanType) => if (!supportsBatchWrite(r.table) || diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala index 080e977121efb..3363172a85286 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala @@ -17,17 +17,14 @@ package org.apache.spark.sql.execution.datasources.v2 -import java.util.UUID - -import org.apache.spark.SparkException import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.SupportsWrite -import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder} +import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation} +import org.apache.spark.sql.sources.InsertableRelation import org.apache.spark.sql.util.CaseInsensitiveStringMap /** @@ -39,12 +36,8 @@ case class AppendDataExecV1( table: SupportsWrite, writeOptions: CaseInsensitiveStringMap, plan: LogicalPlan, - refreshCache: () => Unit) extends V1FallbackWriters { - - override protected def run(): Seq[InternalRow] = { - writeWithV1(newWriteBuilder().buildForV1Write(), refreshCache = refreshCache) - } -} + refreshCache: () => Unit, + write: V1Write) extends V1FallbackWriters /** * Physical plan node for overwrite into a v2 table with V1 write interfaces. Note that when this @@ -59,29 +52,10 @@ case class AppendDataExecV1( */ case class OverwriteByExpressionExecV1( table: SupportsWrite, - deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, plan: LogicalPlan, - refreshCache: () => Unit) extends V1FallbackWriters { - - private def isTruncate(filters: Array[Filter]): Boolean = { - filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] - } - - override protected def run(): Seq[InternalRow] = { - newWriteBuilder() match { - case builder: SupportsTruncate if isTruncate(deleteWhere) => - writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), refreshCache = refreshCache) - - case builder: SupportsOverwrite => - writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), - refreshCache = refreshCache) - - case _ => - throw new SparkException(s"Table does not support overwrite by expression: $table") - } - } -} + refreshCache: () => Unit, + write: V1Write) extends V1FallbackWriters /** Some helper interfaces that use V2 write semantics through the V1 writer interface. */ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write { @@ -90,23 +64,13 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write { def table: SupportsWrite def writeOptions: CaseInsensitiveStringMap + def refreshCache: () => Unit + def write: V1Write - protected implicit class toV1WriteBuilder(builder: WriteBuilder) { - def asV1Builder: V1WriteBuilder = builder match { - case v1: V1WriteBuilder => v1 - case other => throw new IllegalStateException( - s"The returned writer ${other} was no longer a V1WriteBuilder.") - } - } - - protected def newWriteBuilder(): V1WriteBuilder = { - val info = LogicalWriteInfoImpl( - queryId = UUID.randomUUID().toString, - schema = plan.schema, - options = writeOptions) - val writeBuilder = table.newWriteBuilder(info) - - writeBuilder.asV1Builder + override def run(): Seq[InternalRow] = { + val writtenRows = writeWithV1(write.toInsertableRelation) + refreshCache() + writtenRows } } @@ -116,12 +80,8 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write { trait SupportsV1Write extends SparkPlan { def plan: LogicalPlan - protected def writeWithV1( - relation: InsertableRelation, - refreshCache: () => Unit = () => ()): Seq[InternalRow] = { + protected def writeWithV1(relation: InsertableRelation): Seq[InternalRow] = { relation.insert(Dataset.ofRows(sqlContext.sparkSession, plan), overwrite = false) - refreshCache() - Nil } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala new file mode 100644 index 0000000000000..a8e0731edf14c --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import java.util.UUID + +import org.apache.spark.SparkException +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.PredicateHelper +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.Table +import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, WriteBuilder} +import org.apache.spark.sql.execution.datasources.DataSourceStrategy +import org.apache.spark.sql.sources.{AlwaysTrue, Filter} + +/** + * A rule that constructs logical writes. + */ +object V2Writes extends Rule[LogicalPlan] with PredicateHelper { + + import DataSourceV2Implicits._ + + override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case a @ AppendData(r: DataSourceV2Relation, query, options, _, None) => + val writeBuilder = newWriteBuilder(r.table, query, options) + val write = writeBuilder.build() + a.copy(write = Some(write)) + + case o @ OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, options, _, None) => + // fail if any filter cannot be converted. correctness depends on removing all matching data. + val filters = splitConjunctivePredicates(deleteExpr).flatMap { pred => + val filter = DataSourceStrategy.translateFilter(pred, supportNestedPredicatePushdown = true) + if (filter.isEmpty) { + throw new AnalysisException(s"Cannot translate expression to source filter: $pred") + } + filter + }.toArray + + val table = r.table + val writeBuilder = newWriteBuilder(table, query, options) + val write = writeBuilder match { + case builder: SupportsTruncate if isTruncate(filters) => + builder.truncate().build() + case builder: SupportsOverwrite => + builder.overwrite(filters).build() + case _ => + throw new SparkException(s"Table does not support overwrite by expression: $table") + } + + o.copy(write = Some(write)) + + case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) => + val table = r.table + val writeBuilder = newWriteBuilder(table, query, options) + val write = writeBuilder match { + case builder: SupportsDynamicOverwrite => + builder.overwriteDynamicPartitions().build() + case _ => + throw new SparkException(s"Table does not support dynamic partition overwrite: $table") + } + o.copy(write = Some(write)) + } + + private def isTruncate(filters: Array[Filter]): Boolean = { + filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] + } + + private def newWriteBuilder( + table: Table, + query: LogicalPlan, + writeOptions: Map[String, String]): WriteBuilder = { + + val info = LogicalWriteInfoImpl( + queryId = UUID.randomUUID().toString, + query.schema, + writeOptions.asOptions) + table.asWritable.newWriteBuilder(info) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index f5f77d38b8716..e0887d52cc376 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -33,9 +33,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform -import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage} +import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, V1Write, Write, WriteBuilder, WriterCommitMessage} import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} -import org.apache.spark.sql.sources.{AlwaysTrue, Filter} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.{LongAccumulator, Utils} @@ -216,14 +215,8 @@ case class AppendDataExec( table: SupportsWrite, writeOptions: CaseInsensitiveStringMap, query: SparkPlan, - refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { - - override protected def run(): Seq[InternalRow] = { - val writtenRows = writeWithV2(newWriteBuilder().buildForBatch()) - refreshCache() - writtenRows - } -} + refreshCache: () => Unit, + write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper /** * Physical plan node for overwrite into a v2 table. @@ -237,31 +230,10 @@ case class AppendDataExec( */ case class OverwriteByExpressionExec( table: SupportsWrite, - deleteWhere: Array[Filter], writeOptions: CaseInsensitiveStringMap, query: SparkPlan, - refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { - - private def isTruncate(filters: Array[Filter]): Boolean = { - filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue] - } - - override protected def run(): Seq[InternalRow] = { - val writtenRows = newWriteBuilder() match { - case builder: SupportsTruncate if isTruncate(deleteWhere) => - writeWithV2(builder.truncate().buildForBatch()) - - case builder: SupportsOverwrite => - writeWithV2(builder.overwrite(deleteWhere).buildForBatch()) - - case _ => - throw new SparkException(s"Table does not support overwrite by expression: $table") - } - refreshCache() - writtenRows - } -} - + refreshCache: () => Unit, + write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper /** * Physical plan node for dynamic partition overwrite into a v2 table. @@ -276,20 +248,8 @@ case class OverwritePartitionsDynamicExec( table: SupportsWrite, writeOptions: CaseInsensitiveStringMap, query: SparkPlan, - refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper { - - override protected def run(): Seq[InternalRow] = { - val writtenRows = newWriteBuilder() match { - case builder: SupportsDynamicOverwrite => - writeWithV2(builder.overwriteDynamicPartitions().buildForBatch()) - - case _ => - throw new SparkException(s"Table does not support dynamic partition overwrite: $table") - } - refreshCache() - writtenRows - } -} + refreshCache: () => Unit, + write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper case class WriteToDataSourceV2Exec( batchWrite: BatchWrite, @@ -319,6 +279,17 @@ trait BatchWriteHelper { } } +trait V2ExistingTableWriteExec extends V2TableWriteExec { + def refreshCache: () => Unit + def write: Write + + override protected def run(): Seq[InternalRow] = { + val writtenRows = writeWithV2(write.toBatch) + refreshCache() + writtenRows + } +} + /** * The base physical plan for writing data into data source v2. */ @@ -477,9 +448,10 @@ private[v2] trait TableWriteExecHelper extends V2TableWriteExec with SupportsV1W writeOptions) val writeBuilder = table.newWriteBuilder(info) - val writtenRows = writeBuilder match { - case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write()) - case v2 => writeWithV2(v2.buildForBatch()) + val write = writeBuilder.build() + val writtenRows = write match { + case v1: V1Write => writeWithV1(v1.toInsertableRelation) + case v2 => writeWithV2(v2.toBatch) } table match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala index a9f7a32bf4c69..0e6c72c2cc331 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.InsertableRelation import org.apache.spark.sql.types.StructType -case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) extends V1WriteBuilder +case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) extends WriteBuilder with SupportsTruncate { private var isTruncate = false @@ -33,8 +33,8 @@ case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) ext this } - override def buildForV1Write(): InsertableRelation = new InsertableRelation { - override def insert(data: DataFrame, overwrite: Boolean): Unit = { + override def build(): V1Write = new V1Write { + override def toInsertableRelation: InsertableRelation = (data: DataFrame, _: Boolean) => { // TODO (SPARK-32595): do truncate and append atomically. if (isTruncate) { val conn = JdbcUtils.createConnectionFactory(options)() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala index cba7dd35fb3bc..45ddc6a6fcfc6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, SupportsWrite, Table, TableCapability} import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform} import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan} -import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder} +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder} import org.apache.spark.sql.execution.datasources.DataSourceUtils import org.apache.spark.sql.functions.lit import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION @@ -311,7 +311,8 @@ class InMemoryV1Provider if (mode == SaveMode.Overwrite) { writer.asInstanceOf[SupportsTruncate].truncate() } - writer.asInstanceOf[V1WriteBuilder].buildForV1Write().insert(data, overwrite = false) + val write = writer.build() + write.asInstanceOf[V1Write].toInsertableRelation.insert(data, overwrite = false) getRelation } } @@ -348,7 +349,6 @@ class InMemoryTableWithV1Fallback( private class FallbackWriteBuilder(options: CaseInsensitiveStringMap) extends WriteBuilder - with V1WriteBuilder with SupportsTruncate with SupportsOverwrite { @@ -371,9 +371,9 @@ class InMemoryTableWithV1Fallback( partIndexes.map(row.get) } - override def buildForV1Write(): InsertableRelation = { - new InsertableRelation { - override def insert(data: DataFrame, overwrite: Boolean): Unit = { + override def build(): V1Write = new V1Write { + override def toInsertableRelation: InsertableRelation = { + (data: DataFrame, overwrite: Boolean) => { assert(!overwrite, "V1 write fallbacks cannot be called with overwrite=true") val rows = data.collect() rows.groupBy(getPartitionValues).foreach { case (partition, elements) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 70cbfa194313f..6571e27b928bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1199,7 +1199,7 @@ class PlanResolutionSuite extends AnalysisTest { case Project(_, AsDataSourceV2Relation(r)) => assert(r.catalog.exists(_ == catalogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) - case AppendData(r: DataSourceV2Relation, _, _, _) => + case AppendData(r: DataSourceV2Relation, _, _, _, _) => assert(r.catalog.exists(_ == catalogIdent)) assert(r.identifier.exists(_.name() == tableIdent)) case DescribeRelation(r: ResolvedTable, _, _) => From 7bbcbb84c266b6ff418cd2c3361aa7350299d0ae Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 22 Dec 2020 08:29:22 +0000 Subject: [PATCH 0847/1009] [SPARK-33784][SQL] Rename dataSourceRewriteRules batch ### What changes were proposed in this pull request? This PR tries to rename `dataSourceRewriteRules` into something more generic. ### Why are the changes needed? These changes are needed to address the post-review discussion [here](https://github.com/apache/spark/pull/30558#discussion_r533885837). ### Does this PR introduce _any_ user-facing change? Yes but the changes haven't been released yet. ### How was this patch tested? Existing tests. Closes #30808 from aokolnychyi/spark-33784. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 12 ++++++------ .../spark/sql/SparkSessionExtensions.scala | 18 +++++++++--------- .../sql/internal/BaseSessionStateBuilder.scala | 12 ++++++------ .../spark/sql/SparkSessionExtensionSuite.scala | 6 +++--- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index fdb9c5b4821dd..61bcf9038b845 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -186,9 +186,9 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil ++ operatorOptimizationBatch) :+ - // This batch rewrites data source plans and should be run after the operator - // optimization batch and before any batches that depend on stats. - Batch("Data Source Rewrite Rules", Once, dataSourceRewriteRules: _*) :+ + // This batch rewrites plans after the operator optimization and + // before any batches that depend on stats. + Batch("Pre CBO Rules", Once, preCBORules: _*) :+ // This batch pushes filters and projections into scan nodes. Before this batch, the logical // plan may contain nodes that do not report stats. Anything that uses stats must run after // this batch. @@ -294,10 +294,10 @@ abstract class Optimizer(catalogManager: CatalogManager) def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil /** - * Override to provide additional rules for rewriting data source plans. Such rules will be - * applied after operator optimization rules and before any rules that depend on stats. + * Override to provide additional rules for rewriting plans after operator optimization rules and + * before any cost-based optimization rules that depend on stats. */ - def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil + def preCBORules: Seq[Rule[LogicalPlan]] = Nil /** * Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala index d5d969032a5e1..074906a971b1b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan} *

    • Analyzer Rules.
    • *
    • Check Analysis Rules.
    • *
    • Optimizer Rules.
    • - *
    • Data Source Rewrite Rules.
    • + *
    • Pre CBO Rules.
    • *
    • Planning Strategies.
    • *
    • Customized Parser.
    • *
    • (External) Catalog listeners.
    • @@ -200,19 +200,19 @@ class SparkSessionExtensions { optimizerRules += builder } - private[this] val dataSourceRewriteRules = mutable.Buffer.empty[RuleBuilder] + private[this] val preCBORules = mutable.Buffer.empty[RuleBuilder] - private[sql] def buildDataSourceRewriteRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { - dataSourceRewriteRules.map(_.apply(session)).toSeq + private[sql] def buildPreCBORules(session: SparkSession): Seq[Rule[LogicalPlan]] = { + preCBORules.map(_.apply(session)).toSeq } /** - * Inject an optimizer `Rule` builder that rewrites data source plans into the [[SparkSession]]. - * The injected rules will be executed after the operator optimization batch and before rules - * that depend on stats. + * Inject an optimizer `Rule` builder that rewrites logical plans into the [[SparkSession]]. + * The injected rules will be executed once after the operator optimization batch and + * before any cost-based optimization rules that depend on stats. */ - def injectDataSourceRewriteRule(builder: RuleBuilder): Unit = { - dataSourceRewriteRules += builder + def injectPreCBORule(builder: RuleBuilder): Unit = { + preCBORules += builder } private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 8fb351a2a3b2b..6b84f0e636c1c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -231,8 +231,8 @@ abstract class BaseSessionStateBuilder( override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = super.earlyScanPushDownRules ++ customEarlyScanPushDownRules - override def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = - super.dataSourceRewriteRules ++ customDataSourceRewriteRules + override def preCBORules: Seq[Rule[LogicalPlan]] = + super.preCBORules ++ customPreCBORules override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules @@ -258,13 +258,13 @@ abstract class BaseSessionStateBuilder( protected def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil /** - * Custom rules for rewriting data source plans to add to the Optimizer. Prefer overriding - * this instead of creating your own Optimizer. + * Custom rules for rewriting plans after operator optimization and before CBO. + * Prefer overriding this instead of creating your own Optimizer. * * Note that this may NOT depend on the `optimizer` function. */ - protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = { - extensions.buildDataSourceRewriteRules(session) + protected def customPreCBORules: Seq[Rule[LogicalPlan]] = { + extensions.buildPreCBORules(session) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 7c19f98b762f4..35d2513835611 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -88,9 +88,9 @@ class SparkSessionExtensionSuite extends SparkFunSuite { } } - test("SPARK-33621: inject data source rewrite rule") { - withSession(Seq(_.injectDataSourceRewriteRule(MyRule))) { session => - assert(session.sessionState.optimizer.dataSourceRewriteRules.contains(MyRule(session))) + test("SPARK-33621: inject a pre CBO rule") { + withSession(Seq(_.injectPreCBORule(MyRule))) { session => + assert(session.sessionState.optimizer.preCBORules.contains(MyRule(session))) } } From 43a562035cd79083d06d9422a66488dba801066a Mon Sep 17 00:00:00 2001 From: Jacob Kim Date: Tue, 22 Dec 2020 17:55:16 +0900 Subject: [PATCH 0848/1009] [SPARK-33846][SQL] Include Comments for a nested schema in StructType.toDDL ### What changes were proposed in this pull request? ```scala val nestedStruct = new StructType() .add(StructField("b", StringType).withComment("Nested comment")) val struct = new StructType() .add(StructField("a", nestedStruct).withComment("comment")) struct.toDDL ``` Currently, returns: ``` `a` STRUCT<`b`: STRING> COMMENT 'comment'` ``` With this PR, the code above returns: ``` `a` STRUCT<`b`: STRING COMMENT 'Nested comment'> COMMENT 'comment'` ``` ### Why are the changes needed? My team is using nested columns as first citizens, and I thought it would be nice to have comments for nested columns. ### Does this PR introduce _any_ user-facing change? Now, when users call something like this, ```scala spark.table("foo.bar").schema.fields.map(_.toDDL).mkString(", ") ``` they will get comments for the nested columns. ### How was this patch tested? I added unit tests under `org.apache.spark.sql.types.StructTypeSuite`. They test if nested StructType's comment is included in the DDL string. Closes #30851 from jacobhjkim/structtype-toddl. Authored-by: Jacob Kim Signed-off-by: HyukjinKwon --- .../apache/spark/sql/types/StructField.scala | 19 ++++++---- .../apache/spark/sql/types/StructType.scala | 7 +--- .../spark/sql/types/StructTypeSuite.scala | 38 +++++++++++++++++++ 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index 93478af425955..f0e17b24c80a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -82,17 +82,22 @@ case class StructField( if (metadata.contains("comment")) Option(metadata.getString("comment")) else None } + private def getDDLComment = getComment() + .map(escapeSingleQuotedString) + .map(" COMMENT '" + _ + "'") + .getOrElse("") + + /** + * Returns a string containing a schema in SQL format. For example the following value: + * `StructField("eventId", IntegerType)` will be converted to `eventId`: INT. + */ + private[sql] def sql = s"${quoteIdentifier(name)}: ${dataType.sql}$getDDLComment" + /** * Returns a string containing a schema in DDL format. For example, the following value: * `StructField("eventId", IntegerType)` will be converted to `eventId` INT. * * @since 2.4.0 */ - def toDDL: String = { - val comment = getComment() - .map(escapeSingleQuotedString) - .map(" COMMENT '" + _ + "'") - - s"${quoteIdentifier(name)} ${dataType.sql}${comment.getOrElse("")}" - } + def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index c5e76c160ff46..a223344e921ee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser} -import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString, StringUtils} +import org.apache.spark.sql.catalyst.util.{truncatedString, StringUtils} import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat import org.apache.spark.sql.internal.SQLConf @@ -445,10 +445,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru stringConcat.toString } - override def sql: String = { - val fieldTypes = fields.map(f => s"${quoteIdentifier(f.name)}: ${f.dataType.sql}") - s"STRUCT<${fieldTypes.mkString(", ")}>" - } + override def sql: String = s"STRUCT<${fields.map(_.sql).mkString(", ")}>" /** * Returns a string containing a schema in DDL format. For example, the following value: diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala index 645e65f06508d..be06a31788f17 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala @@ -73,6 +73,44 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper { assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""") } + private val nestedStruct = new StructType() + .add(StructField("a", new StructType() + .add(StructField("b", new StructType() + .add(StructField("c", StringType + ).withComment("Deep Nested comment")) + ).withComment("Nested comment")) + ).withComment("comment")) + + test("SPARK-33846: toDDL should output nested field's comment") { + val ddl = "`a` STRUCT<`b`: STRUCT<`c`: STRING COMMENT 'Deep Nested comment'> " + + "COMMENT 'Nested comment'> COMMENT 'comment'" + assert(nestedStruct.toDDL == ddl) + } + + test("SPARK-33846: fromDDL should parse nested field's comment") { + val ddl = "`a` STRUCT<`b`: STRUCT<`c`: STRING COMMENT 'Deep Nested comment'> " + + "COMMENT 'Nested comment'> COMMENT 'comment'" + assert(StructType.fromDDL(ddl) == nestedStruct) + } + + test("SPARK-33846: round trip toDDL -> fromDDL - nested struct") { + assert(StructType.fromDDL(nestedStruct.toDDL) == nestedStruct) + } + + private val structWithEmptyString = new StructType() + .add(StructField("a b", StringType).withComment("comment")) + + test("SPARK-33846: empty string in a column's name should be respected by toDDL") { + val ddl = "`a b` STRING COMMENT 'comment'" + + assert(structWithEmptyString.toDDL == ddl) + } + + test("SPARK-33846: empty string in a column's name should be respected by fromDDL") { + val ddl = "`a b` STRING COMMENT 'comment'" + + assert(StructType.fromDDL(ddl) == structWithEmptyString) + } test("Print up to the given level") { val schema = StructType.fromDDL( From 84bf07bbd77e42495d36a6b1e0f592184a12022f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 22 Dec 2020 12:37:16 +0000 Subject: [PATCH 0849/1009] [SPARK-33878][SQL][TESTS] Fix resolving of `spark_catalog` in v1 Hive catalog tests ### What changes were proposed in this pull request? 1. Recognize `spark_catalog` as the default session catalog in the checks of `TestHiveQueryExecution`. 2. Move v2 and v1 in-memory catalog test `"SPARK-33305: DROP TABLE should also invalidate cache"` to the common trait `command/DropTableSuiteBase`, and run it with v1 Hive external catalog. ### Why are the changes needed? To run In-memory catalog tests in Hive catalog. ### Does this PR introduce _any_ user-facing change? No, the changes influence only on tests. ### How was this patch tested? By running the affected test suites for `DROP TABLE`: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableSuite" ``` Closes #30883 from MaxGekk/fix-spark_catalog-hive-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../command/DropTableSuiteBase.scala | 22 +++++++++++++++ .../execution/command/v1/DropTableSuite.scala | 27 +------------------ .../execution/command/v2/DropTableSuite.scala | 22 --------------- .../apache/spark/sql/hive/test/TestHive.scala | 10 ++++--- 4 files changed, 30 insertions(+), 51 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala index dd620d3bd7aa4..9cba67f04a351 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala @@ -96,4 +96,26 @@ trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils { } } } + + test("SPARK-33305: DROP TABLE should also invalidate cache") { + val t = s"$catalog.ns.tbl" + val view = "view" + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + withTempView(view, "source") { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect()) + checkAnswer( + sql(s"SELECT * FROM $view"), + spark.table("source").select("id").collect()) + + assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + sql(s"DROP TABLE $t") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala index 4a6956e9ad82d..530d18cb6f7b0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala @@ -33,30 +33,5 @@ trait DropTableSuiteBase extends command.DropTableSuiteBase { } } -class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase { - // The test fails in Hive External catalog with: - // org.apache.spark.sql.AnalysisException: - // spark_catalog.ns.tbl is not a valid TableIdentifier as it has more than 2 name parts. - test("SPARK-33305: DROP TABLE should also invalidate cache") { - val t = s"$catalog.ns.tbl" - val view = "view" - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - withTempView(view, "source") { - val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") - df.createOrReplaceTempView("source") - sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source") - sql(s"CACHE TABLE $view AS SELECT id FROM $t") - checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect()) - checkAnswer( - sql(s"SELECT * FROM $view"), - spark.table("source").select("id").collect()) - - assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - sql(s"DROP TABLE $t") - assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - } - } - } -} +class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala index a36df8df4dd06..16283d5ad6644 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala @@ -49,26 +49,4 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase { Seq.empty) } } - - test("SPARK-33305: DROP TABLE should also invalidate cache") { - val t = s"$catalog.ns.tbl" - val view = "view" - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - withTempView(view, "source") { - val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") - df.createOrReplaceTempView("source") - sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source") - sql(s"CACHE TABLE $view AS SELECT id FROM $t") - checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect()) - checkAnswer( - sql(s"SELECT * FROM $view"), - spark.table("source").select("id").collect()) - - assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - sql(s"DROP TABLE $t") - assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) - } - } - } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index ff5b9e453a482..962efa8303f9b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation} +import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} import org.apache.spark.sql.hive._ @@ -601,9 +602,12 @@ private[hive] class TestHiveQueryExecution( } // Make sure any test tables referenced are loaded. - val referencedTables = - describedTables ++ - logical.collect { case UnresolvedRelation(ident, _, _) => ident.asTableIdentifier } + val referencedTables = describedTables ++ logical.collect { + case UnresolvedRelation(ident, _, _) => + if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) { + ident.tail.asTableIdentifier + } else ident.asTableIdentifier + } val resolver = sparkSession.sessionState.conf.resolver val referencedTestTables = referencedTables.flatMap { tbl => val testTableOpt = sparkSession.testTables.keys.find(resolver(_, tbl.table)) From 6da5cdf1dbfc35cee0ce32aa9e44c0b4187373d9 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Dec 2020 14:24:12 +0000 Subject: [PATCH 0850/1009] [SPARK-33876][SQL] Add length-check for reading char/varchar from tables w/ a external location ### What changes were proposed in this pull request? This PR adds the length check to the existing ApplyCharPadding rule. Tables will have external locations when users execute SET LOCATION or CREATE TABLE ... LOCATION. If the location contains over length values we should FAIL ON READ. ### Why are the changes needed? ```sql spark-sql> INSERT INTO t2 VALUES ('1', 'b12345'); Time taken: 0.141 seconds spark-sql> alter table t set location '/tmp/hive_one/t2'; Time taken: 0.095 seconds spark-sql> select * from t; 1 b1234 ``` the above case should fail rather than implicitly applying truncation ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #30882 from yaooqinn/SPARK-33876. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/CharVarcharUtils.scala | 29 +++++++--- ...PaddingAndLengthCheckForCharVarchar.scala} | 20 ++++--- .../internal/BaseSessionStateBuilder.scala | 2 +- .../spark/sql/CharVarcharTestSuite.scala | 55 +++++++++++++++++++ .../sql/hive/HiveSessionStateBuilder.scala | 2 +- 5 files changed, 89 insertions(+), 19 deletions(-) rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/{ApplyCharTypePadding.scala => PaddingAndLengthCheckForCharVarchar.scala} (86%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala index e42e384e4b86b..cfdc50d1defb5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala @@ -127,25 +127,36 @@ object CharVarcharUtils extends Logging { } /** - * Returns expressions to apply read-side char type padding for the given attributes. String - * values should be right-padded to N characters if it's from a CHAR(N) column/field. + * Returns expressions to apply read-side char type padding for the given attributes. + * + * For a CHAR(N) column/field and the length of string value is M + * If M > N, raise runtime error + * If M <= N, the value should be right-padded to N characters. + * + * For a VARCHAR(N) column/field and the length of string value is M + * If M > N, raise runtime error + * If M <= N, the value should be remained. */ - def charTypePadding(output: Seq[AttributeReference]): Seq[NamedExpression] = { + def paddingWithLengthCheck(output: Seq[AttributeReference]): Seq[NamedExpression] = { output.map { attr => getRawType(attr.metadata).filter { rawType => - rawType.existsRecursively(_.isInstanceOf[CharType]) + rawType.existsRecursively(dt => dt.isInstanceOf[CharType] || dt.isInstanceOf[VarcharType]) }.map { rawType => - Alias(charTypePadding(attr, rawType), attr.name)(explicitMetadata = Some(attr.metadata)) + Alias(paddingWithLengthCheck(attr, rawType), attr.name)( + explicitMetadata = Some(attr.metadata)) }.getOrElse(attr) } } - private def charTypePadding(expr: Expression, dt: DataType): Expression = dt match { - case CharType(length) => StringRPad(expr, Literal(length)) + private def paddingWithLengthCheck(expr: Expression, dt: DataType): Expression = dt match { + case CharType(length) => StringRPad(stringLengthCheck(expr, dt), Literal(length)) + + case VarcharType(_) => stringLengthCheck(expr, dt) case StructType(fields) => val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) => - Seq(Literal(f.name), charTypePadding(GetStructField(expr, i, Some(f.name)), f.dataType)) + Seq(Literal(f.name), + paddingWithLengthCheck(GetStructField(expr, i, Some(f.name)), f.dataType)) }) if (expr.nullable) { If(IsNull(expr), Literal(null, struct.dataType), struct) @@ -166,7 +177,7 @@ object CharVarcharUtils extends Logging { private def charTypePaddingInArray( arr: Expression, et: DataType, containsNull: Boolean): Expression = { val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull) - val func = LambdaFunction(charTypePadding(param, et), Seq(param)) + val func = LambdaFunction(paddingWithLengthCheck(param, et), Seq(param)) ArrayTransform(arr, func) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala similarity index 86% rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala index 35bb86f178eb1..f268d5185000f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala @@ -27,17 +27,21 @@ import org.apache.spark.sql.types.{CharType, StringType} import org.apache.spark.unsafe.types.UTF8String /** - * This rule applies char type padding in two places: - * 1. When reading values from column/field of type CHAR(N), right-pad the values to length N. - * 2. When comparing char type column/field with string literal or char type column/field, - * right-pad the shorter one to the longer length. + * This rule performs char type padding and length check for both char and varchar. + * + * When reading values from column/field of type CHAR(N) or VARCHAR(N), the underlying string value + * might be over length (e.g. tables w/ external locations), it will fail in this case. + * Otherwise, right-pad the values to length N for CHAR(N) and remain the same for VARCHAR(N). + * + * When comparing char type column/field with string literal or char type column/field, + * right-pad the shorter one to the longer length. */ -object ApplyCharTypePadding extends Rule[LogicalPlan] { +object PaddingAndLengthCheckForCharVarchar extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { val padded = plan.resolveOperatorsUpWithNewOutput { case r: LogicalRelation => - val projectList = CharVarcharUtils.charTypePadding(r.output) + val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output) if (projectList == r.output) { r -> Nil } else { @@ -47,7 +51,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] { } case r: DataSourceV2Relation => - val projectList = CharVarcharUtils.charTypePadding(r.output) + val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output) if (projectList == r.output) { r -> Nil } else { @@ -57,7 +61,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] { } case r: HiveTableRelation => - val projectList = CharVarcharUtils.charTypePadding(r.output) + val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output) if (projectList == r.output) { r -> Nil } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 6b84f0e636c1c..34b9af12607ed 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -179,7 +179,7 @@ abstract class BaseSessionStateBuilder( PreprocessTableCreation(session) +: PreprocessTableInsertion +: DataSourceAnalysis +: - ApplyCharTypePadding +: + PaddingAndLengthCheckForCharVarchar +: customPostHocResolutionRules override val extendedCheckRules: Seq[LogicalPlan => Unit] = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index b0f1198e46440..d7b84a0971e0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -528,6 +528,61 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa override protected def sparkConf: SparkConf = { super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet") } + + test("create table w/ location and fit length values") { + Seq("char", "varchar").foreach { typ => + withTempPath { dir => + withTable("t") { + sql("SELECT '12' as col").write.format(format).save(dir.toString) + sql(s"CREATE TABLE t (col $typ(2)) using $format LOCATION '$dir'") + val df = sql("select * from t") + checkAnswer(sql("select * from t"), Row("12")) + } + } + } + } + + test("create table w/ location and over length values") { + Seq("char", "varchar").foreach { typ => + withTempPath { dir => + withTable("t") { + sql("SELECT '123456' as col").write.format(format).save(dir.toString) + sql(s"CREATE TABLE t (col $typ(2)) using $format LOCATION '$dir'") + val e = intercept[SparkException] { sql("select * from t").collect() } + assert(e.getCause.getMessage.contains( + s"input string of length 6 exceeds $typ type length limitation: 2")) + } + } + } + } + + test("alter table set location w/ fit length values") { + Seq("char", "varchar").foreach { typ => + withTempPath { dir => + withTable("t") { + sql("SELECT '12' as col").write.format(format).save(dir.toString) + sql(s"CREATE TABLE t (col $typ(2)) using $format") + sql(s"ALTER TABLE t SET LOCATION '$dir'") + checkAnswer(spark.table("t"), Row("12")) + } + } + } + } + + test("alter table set location w/ over length values") { + Seq("char", "varchar").foreach { typ => + withTempPath { dir => + withTable("t") { + sql("SELECT '123456' as col").write.format(format).save(dir.toString) + sql(s"CREATE TABLE t (col $typ(2)) using $format") + sql(s"ALTER TABLE t SET LOCATION '$dir'") + val e = intercept[SparkException] { spark.table("t").collect() } + assert(e.getCause.getMessage.contains( + s"input string of length 6 exceeds $typ type length limitation: 2")) + } + } + } + } } class DSV2CharVarcharTestSuite extends CharVarcharTestSuite diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index da37b61688951..5963a71f55035 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -90,7 +90,7 @@ class HiveSessionStateBuilder( PreprocessTableCreation(session) +: PreprocessTableInsertion +: DataSourceAnalysis +: - ApplyCharTypePadding +: + PaddingAndLengthCheckForCharVarchar +: HiveAnalysis +: customPostHocResolutionRules From 1d450250eb1db7e4f40451f369db830a8f01ec15 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 23 Dec 2020 00:22:42 +0900 Subject: [PATCH 0851/1009] [BUILD][MINOR] Do not publish snapshots from forks ### What changes were proposed in this pull request? The GitHub workflow `Publish Snapshot` publishes master and 3.1 branch via Nexus. For this, the workflow uses `secrets.NEXUS_USER` and `secrets.NEXUS_PW` secrets. These are not available in forks where this workflow fails every day: - https://github.com/G-Research/spark/actions/runs/431626797 - https://github.com/G-Research/spark/actions/runs/433153049 - https://github.com/G-Research/spark/actions/runs/434680048 - https://github.com/G-Research/spark/actions/runs/436958780 ### Why are the changes needed? Avoid attempting to publish snapshots from forked repositories. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Code review only. Closes #30884 from EnricoMi/branch-do-not-publish-snapshots-from-forks. Authored-by: Enrico Minack Signed-off-by: HyukjinKwon --- .github/workflows/publish_snapshot.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml index 504d702fd1f22..c5dbc8d057964 100644 --- a/.github/workflows/publish_snapshot.yml +++ b/.github/workflows/publish_snapshot.yml @@ -6,6 +6,7 @@ on: jobs: publish-snapshot: + if: github.repository == 'apache/spark' runs-on: ubuntu-latest strategy: fail-fast: false From 303b8c87737fdff83c96855084c16d6504b0b50f Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Tue, 22 Dec 2020 09:55:33 -0800 Subject: [PATCH 0852/1009] [SPARK-23862][SQL] Support Java enums from Scala Dataset API ### What changes were proposed in this pull request? Add support for Java Enums (`java.lang.Enum`) from the Scala typed Dataset APIs. This involves adding an implicit for `Encoder` creation in `SQLImplicits`, and updating `ScalaReflection` to handle Java Enums on the serialization and deserialization pathways. Enums are mapped to a `StringType` which is just the name of the Enum value. ### Why are the changes needed? In [SPARK-21255](https://issues.apache.org/jira/browse/SPARK-21255), support for (de)serialization of Java Enums was added, but only when called from Java code. It is common for Scala code to rely on Java libraries that are out of control of the Scala developer. Today, if there is a dependency on some Java code which defines an Enum, it would be necessary to define a corresponding Scala class. This change brings closer feature parity between Scala and Java APIs. ### Does this PR introduce _any_ user-facing change? Yes, previously something like: ``` val ds = Seq(MyJavaEnum.VALUE1, MyJavaEnum.VALUE2).toDS // or val ds = Seq(CaseClass(MyJavaEnum.VALUE1), CaseClass(MyJavaEnum.VALUE2)).toDS ``` would fail. Now, it will succeed. ### How was this patch tested? Additional unit tests are added in `DatasetSuite`. Tests include validating top-level enums, enums inside of case classes, enums inside of arrays, and validating that the Enum is stored as the expected string. Closes #30877 from xkrogen/xkrogen-SPARK-23862-scalareflection-java-enums. Lead-authored-by: Erik Krogen Co-authored-by: Fangshi Li Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/ScalaReflection.scala | 9 ++++++ .../sql/catalyst/SerializerBuildHelper.scala | 3 ++ .../org/apache/spark/sql/SQLImplicits.scala | 4 +++ .../org/apache/spark/sql/DatasetSuite.scala | 31 +++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 53c7f17ee6b2e..361c3476f5941 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -232,6 +232,11 @@ object ScalaReflection extends ScalaReflection { case t if isSubtype(t, localTypeOf[java.time.Instant]) => createDeserializerForInstant(path) + case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) => + createDeserializerForTypesSupportValueOf( + Invoke(path, "toString", ObjectType(classOf[String]), returnNullable = false), + getClassFromType(t)) + case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) => createDeserializerForSqlTimestamp(path) @@ -526,6 +531,9 @@ object ScalaReflection extends ScalaReflection { case t if isSubtype(t, localTypeOf[java.math.BigInteger]) => createSerializerForJavaBigInteger(inputObject) + case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) => + createSerializerForJavaEnum(inputObject) + case t if isSubtype(t, localTypeOf[scala.math.BigInt]) => createSerializerForScalaBigInt(inputObject) @@ -749,6 +757,7 @@ object ScalaReflection extends ScalaReflection { case t if isSubtype(t, localTypeOf[java.lang.Short]) => Schema(ShortType, nullable = true) case t if isSubtype(t, localTypeOf[java.lang.Byte]) => Schema(ByteType, nullable = true) case t if isSubtype(t, localTypeOf[java.lang.Boolean]) => Schema(BooleanType, nullable = true) + case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) => Schema(StringType, nullable = true) case t if isSubtype(t, definitions.IntTpe) => Schema(IntegerType, nullable = false) case t if isSubtype(t, definitions.LongTpe) => Schema(LongType, nullable = false) case t if isSubtype(t, definitions.DoubleTpe) => Schema(DoubleType, nullable = false) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala index 85acaa11230b4..0554f0f76708b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala @@ -74,6 +74,9 @@ object SerializerBuildHelper { returnNullable = false) } + def createSerializerForJavaEnum(inputObject: Expression): Expression = + createSerializerForString(Invoke(inputObject, "name", ObjectType(classOf[String]))) + def createSerializerForSqlTimestamp(inputObject: Expression): Expression = { StaticInvoke( DateTimeUtils.getClass, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala index 71cbc3ab14d97..1135c8848bc23 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala @@ -88,6 +88,10 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits { /** @since 3.0.0 */ implicit def newInstantEncoder: Encoder[java.time.Instant] = Encoders.INSTANT + /** @since 3.2.0 */ + implicit def newJavaEnumEncoder[A <: java.lang.Enum[_] : TypeTag]: Encoder[A] = + ExpressionEncoder() + // Boxed primitives /** @since 2.0.0 */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 67e3ad6a80642..3a169e487827a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1693,6 +1693,33 @@ class DatasetSuite extends QueryTest checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*) } + test("SPARK-23862: Spark ExpressionEncoder should support Java Enum type from Scala") { + val saveModeSeq = + Seq(SaveMode.Append, SaveMode.Overwrite, SaveMode.ErrorIfExists, SaveMode.Ignore, null) + assert(saveModeSeq.toDS().collect().toSeq === saveModeSeq) + assert(saveModeSeq.toDS().schema === new StructType().add("value", StringType, nullable = true)) + + val saveModeCaseSeq = saveModeSeq.map(SaveModeCase.apply) + assert(saveModeCaseSeq.toDS().collect().toSet === saveModeCaseSeq.toSet) + assert(saveModeCaseSeq.toDS().schema === + new StructType().add("mode", StringType, nullable = true)) + + val saveModeArrayCaseSeq = + Seq(SaveModeArrayCase(Array()), SaveModeArrayCase(saveModeSeq.toArray)) + val collected = saveModeArrayCaseSeq.toDS().collect() + assert(collected.length === 2) + val sortedByLength = collected.sortBy(_.modes.length) + assert(sortedByLength(0).modes === Array()) + assert(sortedByLength(1).modes === saveModeSeq.toArray) + assert(saveModeArrayCaseSeq.toDS().schema === + new StructType().add("modes", ArrayType(StringType, containsNull = true), nullable = true)) + + // Enum is stored as string, so it is possible to convert to/from string + val stringSeq = saveModeSeq.map(Option.apply).map(_.map(_.toString).orNull) + assert(stringSeq.toDS().as[SaveMode].collect().toSet === saveModeSeq.toSet) + assert(saveModeSeq.toDS().as[String].collect().toSet === stringSeq.toSet) + } + test("SPARK-24571: filtering of string values by char literal") { val df = Seq("Amsterdam", "San Francisco", "X").toDF("city") checkAnswer(df.where($"city" === 'X'), Seq(Row("X"))) @@ -2053,3 +2080,7 @@ case class CircularReferenceClassD(map: Map[String, CircularReferenceClassE]) case class CircularReferenceClassE(id: String, list: List[CircularReferenceClassD]) case class SpecialCharClass(`field.1`: String, `field 2`: String) + +/** Used to test Java Enums from Scala code */ +case class SaveModeCase(mode: SaveMode) +case class SaveModeArrayCase(modes: Array[SaveMode]) From ec1560af251d2c3580f5bccfabc750f1c7af09df Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 23 Dec 2020 11:47:13 +0900 Subject: [PATCH 0853/1009] [SPARK-33364][SQL][FOLLOWUP] Refine the catalog v2 API to purge a table ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/30267 Inspired by https://github.com/apache/spark/pull/30886, it's better to have 2 methods `def dropTable` and `def purgeTable`, than `def dropTable(ident)` and `def dropTable(ident, purge)`. ### Why are the changes needed? 1. make the APIs orthogonal. Previously, `def dropTable(ident, purge)` calls `def dropTable(ident)` and is a superset. 2. simplifies the catalog implementation a little bit. Now the `if (purge) ... else ...` check is done at the Spark side. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? existing tests Closes #30890 from cloud-fan/purgeTable. Authored-by: Wenchen Fan Signed-off-by: HyukjinKwon --- .../catalog/DelegatingCatalogExtension.java | 5 +++++ .../sql/connector/catalog/TableCatalog.java | 17 +++++++---------- .../connector/catalog/TableCatalogSuite.scala | 5 +++++ .../datasources/v2/DropTableExec.scala | 2 +- .../execution/command/v2/DropTableSuite.scala | 4 ++-- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java index d07d299d65a58..34f07b12b3666 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java @@ -99,6 +99,11 @@ public boolean dropTable(Identifier ident) { return asTableCatalog().dropTable(ident); } + @Override + public boolean purgeTable(Identifier ident) { + return asTableCatalog().purgeTable(ident); + } + @Override public void renameTable( Identifier oldIdent, diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 52a74ab9dd9f5..4163d86bcc54b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -173,26 +173,23 @@ Table alterTable( boolean dropTable(Identifier ident); /** - * Drop a table in the catalog with an option to purge. + * Drop a table in the catalog and completely remove its data by skipping a trash even if it is + * supported. *

      * If the catalog supports views and contains a view for the identifier and not a table, this * must not drop the view and must return false. *

      - * If the catalog supports the option to purge a table, this method must be overridden. - * The default implementation falls back to {@link #dropTable(Identifier)} dropTable} if the - * purge option is set to false. Otherwise, it throws {@link UnsupportedOperationException}. + * If the catalog supports to purge a table, this method should be overridden. + * The default implementation throws {@link UnsupportedOperationException}. * * @param ident a table identifier - * @param purge whether a table should be purged * @return true if a table was deleted, false if no table exists for the identifier + * @throws UnsupportedOperationException If table purging is not supported * * @since 3.1.0 */ - default boolean dropTable(Identifier ident, boolean purge) { - if (purge) { - throw new UnsupportedOperationException("Purge option is not supported."); - } - return dropTable(ident); + default boolean purgeTable(Identifier ident) throws UnsupportedOperationException { + throw new UnsupportedOperationException("Purge table is not supported."); } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala index dab20911bbdc7..ef342e7ec5539 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala @@ -643,6 +643,11 @@ class TableCatalogSuite extends SparkFunSuite { assert(!catalog.tableExists(testIdent)) } + test("purgeTable") { + val catalog = newCatalog() + intercept[UnsupportedOperationException](catalog.purgeTable(testIdent)) + } + test("renameTable") { val catalog = newCatalog() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index f89b89096772a..100eaf9021863 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -35,7 +35,7 @@ case class DropTableExec( override def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { invalidateCache() - catalog.dropTable(ident, purge) + if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident) } else if (!ifExists) { throw new NoSuchTableException(ident) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala index 16283d5ad6644..a272f649288f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala @@ -29,8 +29,8 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase { val errMsg = intercept[UnsupportedOperationException] { sql(s"DROP TABLE $catalog.ns.tbl PURGE") }.getMessage - // The default TableCatalog.dropTable implementation doesn't support the purge option. - assert(errMsg.contains("Purge option is not supported")) + // The default TableCatalog.purgeTable implementation throws an exception. + assert(errMsg.contains("Purge table is not supported")) } } From a3dd8dacee8f6b316be90500f9fd8ec8997a5784 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Dec 2020 19:46:37 -0800 Subject: [PATCH 0854/1009] [SPARK-33877][SQL] SQL reference documents for INSERT w/ a column list We support a column list of INSERT for Spark v3.1.0 (See: SPARK-32976 (https://github.com/apache/spark/pull/29893)). So, this PR targets at documenting it in the SQL documents. ### What changes were proposed in this pull request? improve doc ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? doc ### How was this patch tested? passing GA doc gen. ![image](https://user-images.githubusercontent.com/8326978/102954876-8994fa00-450f-11eb-81f9-931af6d1f69b.png) ![image](https://user-images.githubusercontent.com/8326978/102954900-99acd980-450f-11eb-9733-115ad37d2319.png) ![image](https://user-images.githubusercontent.com/8326978/102954935-af220380-450f-11eb-9aaa-fdae0725d41e.png) ![image](https://user-images.githubusercontent.com/8326978/102954949-bc3ef280-450f-11eb-8a0d-d7b688efa7bb.png) Closes #30888 from yaooqinn/SPARK-33877. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- docs/sql-ref-syntax-dml-insert-into.md | 41 +++++++++++++++++- ...l-ref-syntax-dml-insert-overwrite-table.md | 43 ++++++++++++++++++- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md index 39d15808d033e..96a95b1a629e9 100644 --- a/docs/sql-ref-syntax-dml-insert-into.md +++ b/docs/sql-ref-syntax-dml-insert-into.md @@ -26,7 +26,7 @@ The `INSERT INTO` statement inserts new rows into a table. The inserted rows can ### Syntax ```sql -INSERT INTO [ TABLE ] table_identifier [ partition_spec ] +INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ] { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query } ``` @@ -40,11 +40,20 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] * **partition_spec** - An optional parameter that specifies a comma separated list of key and value pairs + An optional parameter that specifies a comma-separated list of key and value pairs for partitions. **Syntax:** `PARTITION ( partition_col_name = partition_col_val [ , ... ] )` +* **column_list** + + An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. + + **Note:**The current behaviour has some limitations: + - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns. + - The size of the column list should be exactly the size of the data from `VALUES` clause or query. + - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position. + * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]** Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. @@ -198,6 +207,34 @@ SELECT * FROM students; +-------------+--------------------------+----------+ ``` +#### Insert with a column list + +```sql +INSERT INTO students (address, name, student_id) VALUES + ('Hangzhou, China', 'Kent Yao', 11215016); + +SELECT * FROM students WHERE name = 'Kent Yao'; ++---------+----------------------+----------+ +| name| address|student_id| ++---------+----------------------+----------+ +|Kent Yao | Hangzhou, China| 11215016| ++---------+----------------------+----------+ +``` + +#### Insert with both a partition spec and a column list + +```sql +INSERT INTO students PARTITION (student_id = 11215017) (address, name) VALUES + ('Hangzhou, China', 'Kent Yao Jr.'); + +SELECT * FROM students WHERE student_id = 11215017; ++------------+----------------------+----------+ +| name| address|student_id| ++------------+----------------------+----------+ +|Kent Yao Jr.| Hangzhou, China| 11215017| ++------------+----------------------+----------+ +``` + ### Related Statements * [INSERT OVERWRITE statement](sql-ref-syntax-dml-insert-overwrite-table.html) diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md index 638dcb34bb1d2..f2413fb72464f 100644 --- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md +++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md @@ -26,7 +26,7 @@ The `INSERT OVERWRITE` statement overwrites the existing data in the table using ### Syntax ```sql -INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] +INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] [ ( column_list ) ] { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query } ``` @@ -40,11 +40,22 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] * **partition_spec** - An optional parameter that specifies a comma separated list of key and value pairs + An optional parameter that specifies a comma-separated list of key and value pairs for partitions. **Syntax:** `PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )` +* **column_list** + + An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. + + **Note** + + The current behaviour has some limitations: + - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns. + - The size of the column list should be exactly the size of the data from `VALUES` clause or query. + - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position. + * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]** Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. @@ -169,6 +180,34 @@ SELECT * FROM students; +-----------+-------------------------+----------+ ``` +#### Insert with a column list + +```sql +INSERT OVERWRITE students (address, name, student_id) VALUES + ('Hangzhou, China', 'Kent Yao', 11215016); + +SELECT * FROM students WHERE name = 'Kent Yao'; ++---------+----------------------+----------+ +| name| address|student_id| ++---------+----------------------+----------+ +|Kent Yao | Hangzhou, China| 11215016| ++---------+----------------------+----------+ +``` + +#### Insert with both a partition spec and a column list + +```sql +INSERT OVERWRITE students PARTITION (student_id = 11215016) (address, name) VALUES + ('Hangzhou, China', 'Kent Yao Jr.'); + +SELECT * FROM students WHERE student_id = 11215016; ++------------+----------------------+----------+ +| name| address|student_id| ++------------+----------------------+----------+ +|Kent Yao Jr.| Hangzhou, China| 11215016| ++------------+----------------------+----------+ +``` + ### Related Statements * [INSERT INTO statement](sql-ref-syntax-dml-insert-into.html) From ea37717f7c709a86985e006a192bf040f8958da3 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 23 Dec 2020 13:50:05 +0900 Subject: [PATCH 0855/1009] [SPARK-32106][SQL][FOLLOWUP] Fix flaky tests in transform.sql ### What changes were proposed in this pull request? This PR intends to fix flaky GitHub Actions (GA) tests below in `transform.sql` (this flakiness does not seem to happen in the Jenkins tests): - https://github.com/apache/spark/runs/1592987501 - https://github.com/apache/spark/runs/1593196242 - https://github.com/apache/spark/runs/1595496305 - https://github.com/apache/spark/runs/1596309555 This is because the error message is different between test runs in GA (the error message seems to be truncated indeterministically) ,e.g., ``` # https://github.com/apache/spark/runs/1592987501 Expected "...h status 127. Error:[ /bin/bash: some_non_existent_command: command not found]", but got "...h status 127. Error:[]" Result did not match for query #2 # https://github.com/apache/spark/runs/1593196242 Expected "...istent_command: comm[and not found]", but got "...istent_command: comm[]" Result did not match for query #2 ``` The root cause of this indeterministic behaviour happening only in GA is not clear though, this test throws SparkException consistently even in GA. So, this PR proposes to make the test just check if it will be thrown when running it. This PR comes from the dongjoon-hyun comment: https://github.com/apache/spark/pull/29414/files#r547414513 ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. Closes #30896 from maropu/SPARK-32106-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: HyukjinKwon --- .../resources/sql-tests/inputs/transform.sql | 10 -------- .../sql-tests/results/transform.sql.out | 24 +------------------ .../BaseScriptTransformationSuite.scala | 20 ++++++++++++++++ 3 files changed, 21 insertions(+), 33 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql index 65b060eca3a62..3f39700a95913 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql @@ -9,16 +9,6 @@ SELECT TRANSFORM(a) USING 'cat' AS (a) FROM t; --- with non-exist command -SELECT TRANSFORM(a) -USING 'some_non_existent_command' AS (a) -FROM t; - --- with non-exist file -SELECT TRANSFORM(a) -USING 'python some_non_existent_file' AS (a) -FROM t; - -- common supported data types between no serde and serde transform SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index 83ab5cb729c24..3267a7625a7d9 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 18 +-- Number of queries: 16 -- !query @@ -26,28 +26,6 @@ struct 3 --- !query -SELECT TRANSFORM(a) -USING 'some_non_existent_command' AS (a) -FROM t --- !query schema -struct<> --- !query output -org.apache.spark.SparkException -Subprocess exited with status 127. Error: /bin/bash: some_non_existent_command: command not found - - --- !query -SELECT TRANSFORM(a) -USING 'python some_non_existent_file' AS (a) -FROM t --- !query schema -struct<> --- !query output -org.apache.spark.SparkException -Subprocess exited with status 2. Error: python: can't open file 'some_non_existent_file': [Errno 2] No such file or directory - - -- !query SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM ( SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index 81f292809df4a..863657a7862a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -420,6 +420,26 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU 'b.cast("string").as("b"), lit(null), lit(null)).collect()) } + + test("SPARK-32106: TRANSFORM with non-existent command/file") { + Seq( + s""" + |SELECT TRANSFORM(a) + |USING 'some_non_existent_command' AS (a) + |FROM VALUES (1) t(a) + """.stripMargin, + s""" + |SELECT TRANSFORM(a) + |USING 'python some_non_existent_file' AS (a) + |FROM VALUES (1) t(a) + """.stripMargin).foreach { query => + intercept[SparkException] { + // Since an error message is shell-dependent, this test just checks + // if the expected exception will be thrown. + sql(query).collect() + } + } + } } case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode { From 90d6f8600117da33bbb570dee6d893cfd8d35263 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 22 Dec 2020 21:59:53 -0800 Subject: [PATCH 0856/1009] [SPARK-33870][CORE] Enable spark.storage.replication.proactive by default ### What changes were proposed in this pull request? This PR aims to enable `spark.storage.replication.proactive` by default for Apache Spark 3.2.0. ### Why are the changes needed? `spark.storage.replication.proactive` is added by SPARK-15355 at Apache Spark 2.2.0 and has been helpful when the block manager loss occurs frequently like K8s environment. ### Does this PR introduce _any_ user-facing change? Yes, this will make the Spark jobs more robust. ### How was this patch tested? Pass the existing UTs. Closes #30876 from dongjoon-hyun/SPARK-33870. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/internal/config/package.scala | 2 +- docs/core-migration-guide.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index f6de5e4128ca5..cbf4a971e3d0d 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -384,7 +384,7 @@ package object config { "get the replication level of the block to the initial number") .version("2.2.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) private[spark] val STORAGE_MEMORY_MAP_THRESHOLD = ConfigBuilder("spark.storage.memoryMapThreshold") diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 11d3e0019617f..822975b4edf27 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.1 to 3.2 + +- Since Spark 3.2, `spark.storage.replication.proactive` is enabled by default which means Spark tries to replenish in case of the loss of cached RDD block replicas due to executor failures. To restore the behavior before Spark 3.2, you can set `spark.storage.replication.proactive` to `false`. + ## Upgrading from Core 3.0 to 3.1 - In Spark 3.0 and below, `SparkContext` can be created in executors. Since Spark 3.1, an exception will be thrown when creating `SparkContext` in executors. You can allow it by setting the configuration `spark.executor.allowSparkContext` when creating `SparkContext` in executors. From e853f068f6c8f9c2aebad37115b0fad1191650ee Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 22 Dec 2020 22:43:03 -0800 Subject: [PATCH 0857/1009] [SPARK-33526][SQL][FOLLOWUP] Fix flaky test due to timeout and fix docs ### What changes were proposed in this pull request? Make test stable and fix docs. ### Why are the changes needed? Query timeout sometime since we set an another config after set query timeout. ``` sbt.ForkMain$ForkError: java.sql.SQLTimeoutException: Query timed out after 0 seconds at org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:381) at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:254) at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$13(ThriftServerWithSparkContextSuite.scala:107) at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$13$adapted(ThriftServerWithSparkContextSuite.scala:106) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$12(ThriftServerWithSparkContextSuite.scala:106) at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$12$adapted(ThriftServerWithSparkContextSuite.scala:89) at org.apache.spark.sql.hive.thriftserver.SharedThriftServer.$anonfun$withJdbcStatement$4(SharedThriftServer.scala:95) at org.apache.spark.sql.hive.thriftserver.SharedThriftServer.$anonfun$withJdbcStatement$4$adapted(SharedThriftServer.scala:95) ``` The reason is: 1. we execute `set spark.sql.thriftServer.queryTimeout = 1`, then all the option will be limited in 1s. 2. we execute `set spark.sql.thriftServer.interruptOnCancel = false/true`. This sql will get timeout exception if there is something hung within 1s. It's not our expected. Reset the timeout before we do the step2 can avoid this problem. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Fix test. Closes #30897 from ulysses-you/SPARK-33526-followup. Authored-by: ulysses-you Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- .../thriftserver/ThriftServerWithSparkContextSuite.scala | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b5547319f0ab3..d14d136a81e7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -970,8 +970,8 @@ object SQLConf { "a positive value, a running query will be cancelled automatically when the timeout is " + "exceeded, otherwise the query continues to run till completion. If timeout values are " + "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " + - "than this configuration value, they take precedence. If you set this timeout and prefer" + - "to cancel the queries right away without waiting task to finish, consider enabling" + + "than this configuration value, they take precedence. If you set this timeout and prefer " + + "to cancel the queries right away without waiting task to finish, consider enabling " + s"${THRIFTSERVER_FORCE_CANCEL.key} together.") .version("3.1.0") .timeConf(TimeUnit.SECONDS) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index 036eb5850695e..3598f966b6259 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -102,14 +102,15 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { spark.sparkContext.addSparkListener(listener) try { - statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1") Seq(true, false).foreach { force => + statement.setQueryTimeout(0) statement.execute(s"SET ${SQLConf.THRIFTSERVER_FORCE_CANCEL.key}=$force") + statement.setQueryTimeout(1) forceCancel.set(force) - val e1 = intercept[SQLException] { + val e = intercept[SQLException] { statement.execute("select java_method('java.lang.Thread', 'sleep', 3000L)") }.getMessage - assert(e1.contains("Query timed out")) + assert(e.contains("Query timed out")) } } finally { spark.sparkContext.removeSparkListener(listener) From 2287f56a3e105e04cf4e86283eaee12f270c09a7 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Dec 2020 16:14:27 +0900 Subject: [PATCH 0858/1009] [SPARK-33879][SQL] Char Varchar values fails w/ match error as partition columns ### What changes were proposed in this pull request? ```sql spark-sql> select * from t10 where c0='abcd'; 20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd'] scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815) at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842) at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at org.apache.spark.sql.types.StructType.map(StructType.scala:102) at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156) ``` c0 is a partition column, it fails in the partition pruning rule In this PR, we relace char/varchar w/ string type before the CAST happends ### Why are the changes needed? bugfix, see the case above ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? yes, new tests Closes #30887 from yaooqinn/SPARK-33879. Authored-by: Kent Yao Signed-off-by: HyukjinKwon --- .../catalog/ExternalCatalogUtils.scala | 4 +++- .../spark/sql/CharVarcharTestSuite.scala | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index ae3b75dc3334b..00445a1614257 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils object ExternalCatalogUtils { // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't @@ -135,7 +136,8 @@ object ExternalCatalogUtils { if (predicates.isEmpty) { inputPartitions } else { - val partitionSchema = catalogTable.partitionSchema + val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema( + catalogTable.partitionSchema) val partitionColumnNames = catalogTable.partitionColumnNames.toSet val nonPartitionPruningPredicates = predicates.filterNot { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index d7b84a0971e0c..8ab8c37d5e790 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { } } + test("char type comparison: partition pruning") { + withTable("t") { + sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)") + sql("INSERT INTO t VALUES (1, 'a', 'a')") + Seq(("c1 = 'a'", true), + ("'a' = c1", true), + ("c1 = 'a '", true), + ("c1 > 'a'", false), + ("c1 IN ('a', 'b')", true), + ("c2 = 'a '", false), + ("c2 = 'a'", true), + ("c2 IN ('a', 'b')", true)).foreach { case (con, res) => + val df = spark.table("t") + withClue(con) { + checkAnswer(df.where(con), df.where(res.toString)) + } + } + } + } + test("char type comparison: join") { withTable("t1", "t2") { sql(s"CREATE TABLE t1(c CHAR(2)) USING $format") From d98c216e1959e276877c3d0a9562cc4cdd8b41bb Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 23 Dec 2020 18:04:28 +0900 Subject: [PATCH 0859/1009] [SPARK-31960][YARN][DOCS][FOLLOW-UP] Document the behaviour change of Hadoop's classpath propagation in migration guide ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/28788, and proposes to update migration guide. ### Why are the changes needed? To tell users about the behaviour change. ### Does this PR introduce _any_ user-facing change? Yes, it updates migration guides for users. ### How was this patch tested? GitHub Actions' documentation build should test it. Closes #30903 from HyukjinKwon/SPARK-31960-followup. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- docs/core-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 822975b4edf27..ec7c3ab9cb568 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -30,6 +30,8 @@ license: | - In Spark 3.0 and below, `SparkContext` can be created in executors. Since Spark 3.1, an exception will be thrown when creating `SparkContext` in executors. You can allow it by setting the configuration `spark.executor.allowSparkContext` when creating `SparkContext` in executors. +- In Spark 3.0 and below, Spark propagated the Hadoop classpath from `yarn.application.classpath` and `mapreduce.application.classpath` into the Spark application submitted to YARN when Spark distribution is with the built-in Hadoop. Since Spark 3.1, it does not propagate anymore when the Spark distribution is with the built-in Hadoop in order to prevent the failure from the different transitive dependencies picked up from the Hadoop cluster such as Guava and Jackson. To restore the behavior before Spark 3.1, you can set `spark.yarn.populateHadoopClasspath` to `true`. + ## Upgrading from Core 2.4 to 3.0 - The `org.apache.spark.ExecutorPlugin` interface and related configuration has been replaced with From 34bfb3a31d505a08e15454214d8f78933310ebb3 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 23 Dec 2020 09:09:48 +0000 Subject: [PATCH 0860/1009] [SPARK-33787][SQL] Allow partition purge for v2 tables ### What changes were proposed in this pull request? 1. Add new methods `purgePartition()`/`purgePartitions()` to the interfaces `SupportsPartitionManagement`/`SupportsAtomicPartitionManagement`. 2. Default implementation of new methods throw the exception `UnsupportedOperationException`. 3. Add tests for new methods to `SupportsPartitionManagementSuite`/`SupportsAtomicPartitionManagementSuite`. 4. Add `ALTER TABLE .. DROP PARTITION` tests for DS v1 and v2. Closes #30776 Closes #30821 ### Why are the changes needed? Currently, the `PURGE` option that user can set in `ALTER TABLE .. DROP PARTITION` is completely ignored. We should pass this flag to the catalog implementation, so, the catalog should decide how to handle the flag. ### Does this PR introduce _any_ user-facing change? The changes can impact on behavior of `ALTER TABLE .. DROP PARTITION` for v2 tables. ### How was this patch tested? By running the affected test suites, for instance: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #30886 from MaxGekk/purge-partition. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../SupportsAtomicPartitionManagement.java | 23 +++++++++++++++++++ .../catalog/SupportsPartitionManagement.java | 19 +++++++++++++++ ...pportsAtomicPartitionManagementSuite.scala | 14 +++++++++++ .../SupportsPartitionManagementSuite.scala | 10 ++++++++ .../v2/AlterTableDropPartitionExec.scala | 9 +++++--- .../datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../v1/AlterTableDropPartitionSuite.scala | 10 ++++++++ .../v2/AlterTableDropPartitionSuite.scala | 15 ++++++++++++ 8 files changed, 99 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java index 754203125cdc2..665946fcf3e2a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java @@ -21,6 +21,7 @@ import org.apache.spark.annotation.Experimental; import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException; import org.apache.spark.sql.catalyst.analysis.PartitionAlreadyExistsException; import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException; @@ -33,6 +34,9 @@ * add an array of partitions and any data they contain to the table * ${@link #dropPartitions}: * remove an array of partitions and any data they contain from the table + * ${@link #purgePartitions}: + * remove an array of partitions and any data they contain from the table by skipping + * a trash even if it is supported * * @since 3.1.0 */ @@ -82,4 +86,23 @@ void createPartitions( * @return true if partitions were deleted, false if any partition not exists */ boolean dropPartitions(InternalRow[] idents); + + /** + * Drop an array of partitions atomically from table, and completely remove partitions data + * by skipping a trash even if it is supported. + *

      + * If any partition doesn't exists, + * the operation of purgePartitions need to be safely rolled back. + * + * @param idents an array of partition identifiers + * @return true if partitions were deleted, false if any partition not exists + * @throws NoSuchPartitionException If any partition identifier to alter doesn't exist + * @throws UnsupportedOperationException If partition purging is not supported + * + * @since 3.2.0 + */ + default boolean purgePartitions(InternalRow[] idents) + throws NoSuchPartitionException, UnsupportedOperationException { + throw new UnsupportedOperationException("Partition purge is not supported"); + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index cf86c44e9563b..409ab3f5f9335 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -36,6 +36,9 @@ * add a partition and any data it contains to the table * ${@link #dropPartition}: * remove a partition and any data it contains from the table + * ${@link #purgePartition}: + * remove a partition and any data it contains from the table by skipping a trash + * even if it is supported. * ${@link #replacePartitionMetadata}: * point a partition to a new location, which will swap one location's data for the other * @@ -72,6 +75,22 @@ void createPartition( */ boolean dropPartition(InternalRow ident); + /** + * Drop a partition from the table and completely remove partition data by skipping a trash + * even if it is supported. + * + * @param ident a partition identifier + * @return true if a partition was deleted, false if no partition exists for the identifier + * @throws NoSuchPartitionException If the partition identifier to alter doesn't exist + * @throws UnsupportedOperationException If partition purging is not supported + * + * @since 3.2.0 + */ + default boolean purgePartition(InternalRow ident) + throws NoSuchPartitionException, UnsupportedOperationException { + throw new UnsupportedOperationException("Partition purge is not supported"); + } + /** * Test whether a partition exists using an {@link InternalRow ident} from the table. * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala index ad2631650b7ef..d8a0b56928b34 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala @@ -110,6 +110,20 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite { assert(!hasPartitions(partTable)) } + test("purgePartitions") { + val table = catalog.loadTable(ident) + val partTable = new InMemoryAtomicPartitionTable( + table.name(), table.schema(), table.partitioning(), table.properties()) + val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4")) + partTable.createPartitions( + partIdents, + Array(new util.HashMap[String, String](), new util.HashMap[String, String]())) + val errMsg = intercept[UnsupportedOperationException] { + partTable.purgePartitions(partIdents) + }.getMessage + assert(errMsg.contains("purge is not supported")) + } + test("dropPartitions failed if partition not exists") { val table = catalog.loadTable(ident) val partTable = new InMemoryAtomicPartitionTable( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala index dc2df546d6bfd..31494c7c2dd50 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala @@ -85,6 +85,16 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { assert(!hasPartitions(partTable)) } + test("purgePartition") { + val table = catalog.loadTable(ident) + val partTable = new InMemoryPartitionTable( + table.name(), table.schema(), table.partitioning(), table.properties()) + val errMsg = intercept[UnsupportedOperationException] { + partTable.purgePartition(InternalRow.apply("3")) + }.getMessage + assert(errMsg.contains("purge is not supported")) + } + test("replacePartitionMetadata") { val table = catalog.loadTable(ident) val partTable = new InMemoryPartitionTable( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala index c7a68ecb2bbee..90714c3c726f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala @@ -28,7 +28,8 @@ import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement case class AlterTableDropPartitionExec( table: SupportsPartitionManagement, partSpecs: Seq[ResolvedPartitionSpec], - ignoreIfNotExists: Boolean) extends V2CommandExec { + ignoreIfNotExists: Boolean, + purge: Boolean) extends V2CommandExec { import DataSourceV2Implicits._ override def output: Seq[Attribute] = Seq.empty @@ -45,9 +46,11 @@ case class AlterTableDropPartitionExec( existsPartIdents match { case Seq() => // Nothing will be done case Seq(partIdent) => - table.dropPartition(partIdent) + if (purge) table.purgePartition(partIdent) else table.dropPartition(partIdent) case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] => - table.asAtomicPartitionable.dropPartitions(existsPartIdents.toArray) + val idents = existsPartIdents.toArray + val atomicTable = table.asAtomicPartitionable + if (purge) atomicTable.purgePartitions(idents) else atomicTable.dropPartitions(idents) case _ => throw new UnsupportedOperationException( s"Nonatomic partition table ${table.name()} can not drop multiple partitions.") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 0c92945dc6ca5..4667bb7cca998 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -348,9 +348,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil case AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, purge) => AlterTableDropPartitionExec( - table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil + table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) => throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index 737af96f5abe3..12a99933f6633 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -21,6 +21,16 @@ import org.apache.spark.sql.execution.command trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase { override protected val notFullPartitionSpecErr = "The following partitions not found in table" + + test("purge partition data") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id = 1)") + checkPartitions(t, Map("id" -> "1")) + sql(s"ALTER TABLE $t DROP PARTITION (id = 1) PURGE") + checkPartitions(t) // no partitions + } + } } class AlterTableDropPartitionSuite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala index ffbfe3f695935..e2762f0439cb3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -35,4 +35,19 @@ class AlterTableDropPartitionSuite assert(errMsg.contains("can not alter partitions")) } } + + test("purge partition data") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"ALTER TABLE $t ADD PARTITION (id=1)") + try { + val errMsg = intercept[UnsupportedOperationException] { + sql(s"ALTER TABLE $t DROP PARTITION (id=1) PURGE") + }.getMessage + assert(errMsg.contains("purge is not supported")) + } finally { + sql(s"ALTER TABLE $t DROP PARTITION (id=1)") + } + } + } } From f421c172d976bf6844b44b0ab9d1e1fa55f380e3 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 23 Dec 2020 09:20:49 +0000 Subject: [PATCH 0861/1009] [SPARK-33497][SQL] Override maxRows in some LogicalPlan ### What changes were proposed in this pull request? This PR aims to override maxRows method in these follow `LogicalPlan`: * `ReturnAnswer` * `Join` * `Range` * `Sample` * `RepartitionOperation` * `Deduplicate` * `LocalRelation` * `Window` ### Why are the changes needed? 1. Logically, we know the max rows info with these `LogicalPlan`. 2. Before this PR, we already have some max rows with `LogicalPlan`, so we can eliminate limit with more case if we expand more. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30443 from ulysses-you/SPARK-33497. Lead-authored-by: ulysses-you Co-authored-by: ulysses-you Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/dsl/package.scala | 13 ++ .../plans/logical/LocalRelation.scala | 2 + .../plans/logical/basicLogicalOperators.scala | 35 ++- .../optimizer/CombiningLimitsSuite.scala | 129 +++++++++++- .../optimizer/EliminateSortsSuite.scala | 11 +- .../optimizer/LimitPushdownSuite.scala | 13 +- .../approved-plans-v1_4/q28.sf100/explain.txt | 169 ++++++++------- .../q28.sf100/simplified.txt | 125 ++++++----- .../approved-plans-v1_4/q28/explain.txt | 169 ++++++++------- .../approved-plans-v1_4/q28/simplified.txt | 125 ++++++----- .../approved-plans-v1_4/q61.sf100/explain.txt | 147 ++++++------- .../q61.sf100/simplified.txt | 187 ++++++++-------- .../approved-plans-v1_4/q61/explain.txt | 153 +++++++------- .../approved-plans-v1_4/q61/simplified.txt | 199 +++++++++--------- .../approved-plans-v1_4/q90.sf100/explain.txt | 109 +++++----- .../q90.sf100/simplified.txt | 143 +++++++------ .../approved-plans-v1_4/q90/explain.txt | 109 +++++----- .../approved-plans-v1_4/q90/simplified.txt | 141 +++++++------ .../spark/sql/streaming/StreamSuite.scala | 2 +- 19 files changed, 1091 insertions(+), 890 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 5a778d2785a67..6371fd942597e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -248,6 +248,9 @@ package object dsl { override def expr: Expression = Literal(s) def attr: UnresolvedAttribute = analysis.UnresolvedAttribute(s) } + implicit class DslAttr(attr: UnresolvedAttribute) extends ImplicitAttribute { + def s: String = attr.name + } abstract class ImplicitAttribute extends ImplicitOperators { def s: String @@ -456,6 +459,16 @@ package object dsl { def hint(name: String, parameters: Any*): LogicalPlan = UnresolvedHint(name, parameters, logicalPlan) + + def sample( + lowerBound: Double, + upperBound: Double, + withReplacement: Boolean, + seed: Long): LogicalPlan = { + Sample(lowerBound, upperBound, withReplacement, seed, logicalPlan) + } + + def deduplicate(colNames: Attribute*): LogicalPlan = Deduplicate(colNames, logicalPlan) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala index 8c4828a4cef23..7f5a78b6217b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala @@ -91,4 +91,6 @@ case class LocalRelation( " AS " + inlineTableName + output.map(_.name).mkString("(", ", ", ")") } + + override def maxRows: Option[Long] = Some(data.length.toLong) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 8c111aa750809..97bc0083276bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -37,6 +37,7 @@ import org.apache.spark.util.random.RandomSampler * at the top of the logical query plan. */ case class ReturnAnswer(child: LogicalPlan) extends UnaryNode { + override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output } @@ -326,6 +327,25 @@ case class Join( hint: JoinHint) extends BinaryNode with PredicateHelper { + override def maxRows: Option[Long] = { + joinType match { + case Inner | Cross | FullOuter | LeftOuter | RightOuter + if left.maxRows.isDefined && right.maxRows.isDefined => + val maxRows = BigInt(left.maxRows.get) * BigInt(right.maxRows.get) + if (maxRows.isValidLong) { + Some(maxRows.toLong) + } else { + None + } + + case LeftSemi | LeftAnti => + left.maxRows + + case _ => + None + } + } + override def output: Seq[Attribute] = { joinType match { case j: ExistenceJoin => @@ -574,6 +594,14 @@ case class Range( s"Range ($start, $end, step=$step, splits=$numSlices)" } + override def maxRows: Option[Long] = { + if (numElements.isValidLong) { + Some(numElements.toLong) + } else { + None + } + } + override def computeStats(): Statistics = { Statistics(sizeInBytes = LongType.defaultSize * numElements) } @@ -635,7 +663,7 @@ case class Window( partitionSpec: Seq[Expression], orderSpec: Seq[SortOrder], child: LogicalPlan) extends UnaryNode { - + override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output ++ windowExpressions.map(_.toAttribute) @@ -974,6 +1002,7 @@ case class Sample( s"Sampling fraction ($fraction) must be on interval [0, 1] without replacement") } + override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output } @@ -991,6 +1020,7 @@ case class Distinct(child: LogicalPlan) extends UnaryNode { abstract class RepartitionOperation extends UnaryNode { def shuffle: Boolean def numPartitions: Int + override final def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output } @@ -1050,7 +1080,6 @@ case class RepartitionByExpression( } } - override def maxRows: Option[Long] = child.maxRows override def shuffle: Boolean = true } @@ -1083,7 +1112,7 @@ case class OneRowRelation() extends LeafNode { case class Deduplicate( keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode { - + override def maxRows: Option[Long] = child.maxRows override def output: Seq[Attribute] = child.output } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index 70f130f834c68..11f908ac180bc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -17,9 +17,11 @@ package org.apache.spark.sql.catalyst.optimizer +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ @@ -39,7 +41,16 @@ class CombiningLimitsSuite extends PlanTest { SimplifyConditionals) :: Nil } - val testRelation = LocalRelation('a.int, 'b.int, 'c.int) + val testRelation = LocalRelation.fromExternalRows( + Seq("a".attr.int, "b".attr.int, "c".attr.int), + 1.to(10).map(_ => Row(1, 2, 3)) + ) + val testRelation2 = LocalRelation.fromExternalRows( + Seq("x".attr.int, "y".attr.int, "z".attr.int), + Seq(Row(1, 2, 3), Row(2, 3, 4)) + ) + val testRelation3 = RelationWithoutMaxRows(Seq("i".attr.int)) + val testRelation4 = LongMaxRelation(Seq("j".attr.int)) test("limits: combines two limits") { val originalQuery = @@ -117,4 +128,118 @@ class CombiningLimitsSuite extends PlanTest { testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze) comparePlans(optimized4, expected4) } + + test("SPARK-33497: Eliminate Limit if LocalRelation max rows not larger than Limit") { + checkPlanAndMaxRow( + testRelation.select().limit(10), + testRelation.select(), + 10 + ) + } + + test("SPARK-33497: Eliminate Limit if Range max rows not larger than Limit") { + checkPlanAndMaxRow( + Range(0, 100, 1, None).select().limit(200), + Range(0, 100, 1, None).select(), + 100 + ) + checkPlanAndMaxRow( + Range(-1, Long.MaxValue, 1, None).select().limit(1), + Range(-1, Long.MaxValue, 1, None).select().limit(1), + 1 + ) + } + + test("SPARK-33497: Eliminate Limit if Sample max rows not larger than Limit") { + checkPlanAndMaxRow( + testRelation.select().sample(0, 0.2, false, 1).limit(10), + testRelation.select().sample(0, 0.2, false, 1), + 10 + ) + } + + test("SPARK-33497: Eliminate Limit if Deduplicate max rows not larger than Limit") { + checkPlanAndMaxRow( + testRelation.deduplicate("a".attr).limit(10), + testRelation.deduplicate("a".attr), + 10 + ) + } + + test("SPARK-33497: Eliminate Limit if Repartition max rows not larger than Limit") { + checkPlanAndMaxRow( + testRelation.repartition(2).limit(10), + testRelation.repartition(2), + 10 + ) + checkPlanAndMaxRow( + testRelation.distribute("a".attr)(2).limit(10), + testRelation.distribute("a".attr)(2), + 10 + ) + } + + test("SPARK-33497: Eliminate Limit if Join max rows not larger than Limit") { + Seq(Inner, FullOuter, LeftOuter, RightOuter).foreach { joinType => + checkPlanAndMaxRow( + testRelation.join(testRelation2, joinType).limit(20), + testRelation.join(testRelation2, joinType), + 20 + ) + checkPlanAndMaxRow( + testRelation.join(testRelation2, joinType).limit(10), + testRelation.join(testRelation2, joinType).limit(10), + 10 + ) + // without maxRow + checkPlanAndMaxRow( + testRelation.join(testRelation3, joinType).limit(100), + testRelation.join(testRelation3, joinType).limit(100), + 100 + ) + // maxRow is not valid long + checkPlanAndMaxRow( + testRelation.join(testRelation4, joinType).limit(100), + testRelation.join(testRelation4, joinType).limit(100), + 100 + ) + } + + Seq(LeftSemi, LeftAnti).foreach { joinType => + checkPlanAndMaxRow( + testRelation.join(testRelation2, joinType).limit(5), + testRelation.join(testRelation2.select(), joinType).limit(5), + 5 + ) + checkPlanAndMaxRow( + testRelation.join(testRelation2, joinType).limit(10), + testRelation.join(testRelation2.select(), joinType), + 10 + ) + } + } + + test("SPARK-33497: Eliminate Limit if Window max rows not larger than Limit") { + checkPlanAndMaxRow( + testRelation.window( + Seq(count(1).as("c")), Seq("a".attr), Seq("b".attr.asc)).limit(20), + testRelation.window( + Seq(count(1).as("c")), Seq("a".attr), Seq("b".attr.asc)), + 10 + ) + } + + private def checkPlanAndMaxRow( + optimized: LogicalPlan, expected: LogicalPlan, expectedMaxRow: Long): Unit = { + comparePlans(Optimize.execute(optimized.analyze), expected.analyze) + assert(expected.maxRows.get == expectedMaxRow) + } +} + +case class RelationWithoutMaxRows(output: Seq[Attribute]) extends LeafNode { + override def maxRows: Option[Long] = None +} + +case class LongMaxRelation(output: Seq[Attribute]) extends LeafNode { + override def maxRows: Option[Long] = Some(Long.MaxValue) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 62deebd930752..01ecbd808c251 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.api.python.PythonEvalType +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -48,8 +49,14 @@ class EliminateSortsSuite extends AnalysisTest { Batch("Limit PushDown", FixedPoint(10), LimitPushDown) :: Nil } - val testRelation = LocalRelation('a.int, 'b.int, 'c.int) - val testRelationB = LocalRelation('d.int) + val testRelation = LocalRelation.fromExternalRows( + Seq("a".attr.int, "b".attr.int, "c".attr.int), + 1.to(12).map(_ => Row(1, 2, 3)) + ) + val testRelationB = LocalRelation.fromExternalRows( + Seq("d".attr.int), + 1.to(12).map(_ => Row(1)) + ) test("Empty order by clause") { val x = testRelation diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index e365e3300096e..bb23b63c03cea 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -38,8 +39,12 @@ class LimitPushdownSuite extends PlanTest { BooleanSimplification) :: Nil } - private val testRelation = LocalRelation('a.int, 'b.int, 'c.int) - private val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int) + private val testRelation = LocalRelation.fromExternalRows( + Seq("a".attr.int, "b".attr.int, "c".attr.int), + 1.to(6).map(_ => Row(1, 2, 3))) + private val testRelation2 = LocalRelation.fromExternalRows( + Seq("d".attr.int, "e".attr.int, "f".attr.int), + 1.to(6).map(_ => Row(1, 2, 3))) private val x = testRelation.subquery('x) private val y = testRelation.subquery('y) @@ -148,7 +153,7 @@ class LimitPushdownSuite extends PlanTest { } test("full outer join where neither side is limited and left side has larger statistics") { - val xBig = testRelation.copy(data = Seq.fill(2)(null)).subquery('x) + val xBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('x) assert(xBig.stats.sizeInBytes > y.stats.sizeInBytes) val originalQuery = xBig.join(y, FullOuter).limit(1).analyze val optimized = Optimize.execute(originalQuery) @@ -157,7 +162,7 @@ class LimitPushdownSuite extends PlanTest { } test("full outer join where neither side is limited and right side has larger statistics") { - val yBig = testRelation.copy(data = Seq.fill(2)(null)).subquery('y) + val yBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('y) assert(x.stats.sizeInBytes < yBig.stats.sizeInBytes) val originalQuery = x.join(yBig, FullOuter).limit(1).analyze val optimized = Optimize.execute(originalQuery) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt index 9788040bbe6de..a0f029c9b9325 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt @@ -1,82 +1,81 @@ == Physical Plan == -CollectLimit (71) -+- BroadcastNestedLoopJoin Inner BuildRight (70) - :- BroadcastNestedLoopJoin Inner BuildRight (58) - : :- BroadcastNestedLoopJoin Inner BuildRight (46) - : : :- BroadcastNestedLoopJoin Inner BuildRight (34) - : : : :- BroadcastNestedLoopJoin Inner BuildRight (22) - : : : : :- * HashAggregate (10) - : : : : : +- Exchange (9) - : : : : : +- * HashAggregate (8) - : : : : : +- * HashAggregate (7) - : : : : : +- Exchange (6) - : : : : : +- * HashAggregate (5) - : : : : : +- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (21) - : : : : +- * HashAggregate (20) - : : : : +- Exchange (19) - : : : : +- * HashAggregate (18) - : : : : +- * HashAggregate (17) - : : : : +- Exchange (16) - : : : : +- * HashAggregate (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store_sales (11) - : : : +- BroadcastExchange (33) - : : : +- * HashAggregate (32) - : : : +- Exchange (31) - : : : +- * HashAggregate (30) - : : : +- * HashAggregate (29) - : : : +- Exchange (28) - : : : +- * HashAggregate (27) - : : : +- * Project (26) - : : : +- * Filter (25) - : : : +- * ColumnarToRow (24) - : : : +- Scan parquet default.store_sales (23) - : : +- BroadcastExchange (45) - : : +- * HashAggregate (44) - : : +- Exchange (43) - : : +- * HashAggregate (42) - : : +- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- * Project (38) - : : +- * Filter (37) - : : +- * ColumnarToRow (36) - : : +- Scan parquet default.store_sales (35) - : +- BroadcastExchange (57) - : +- * HashAggregate (56) - : +- Exchange (55) - : +- * HashAggregate (54) - : +- * HashAggregate (53) - : +- Exchange (52) - : +- * HashAggregate (51) - : +- * Project (50) - : +- * Filter (49) - : +- * ColumnarToRow (48) - : +- Scan parquet default.store_sales (47) - +- BroadcastExchange (69) - +- * HashAggregate (68) - +- Exchange (67) - +- * HashAggregate (66) - +- * HashAggregate (65) - +- Exchange (64) - +- * HashAggregate (63) - +- * Project (62) - +- * Filter (61) - +- * ColumnarToRow (60) - +- Scan parquet default.store_sales (59) +BroadcastNestedLoopJoin Inner BuildRight (70) +:- BroadcastNestedLoopJoin Inner BuildRight (58) +: :- BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- Exchange (6) +: : : : : +- * HashAggregate (5) +: : : : : +- * Project (4) +: : : : : +- * Filter (3) +: : : : : +- * ColumnarToRow (2) +: : : : : +- Scan parquet default.store_sales (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- Exchange (16) +: : : : +- * HashAggregate (15) +: : : : +- * Project (14) +: : : : +- * Filter (13) +: : : : +- * ColumnarToRow (12) +: : : : +- Scan parquet default.store_sales (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- Exchange (28) +: : : +- * HashAggregate (27) +: : : +- * Project (26) +: : : +- * Filter (25) +: : : +- * ColumnarToRow (24) +: : : +- Scan parquet default.store_sales (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- Exchange (40) +: : +- * HashAggregate (39) +: : +- * Project (38) +: : +- * Filter (37) +: : +- * ColumnarToRow (36) +: : +- Scan parquet default.store_sales (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- Exchange (52) +: +- * HashAggregate (51) +: +- * Project (50) +: +- * Filter (49) +: +- * ColumnarToRow (48) +: +- Scan parquet default.store_sales (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet default.store_sales (59) (1) Scan parquet default.store_sales Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))] ReadSchema: struct (2) ColumnarToRow [codegen id : 1] @@ -84,7 +83,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (3) Filter [codegen id : 1] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) (4) Project [codegen id : 1] Output [1]: [ss_list_price#3] @@ -130,7 +129,7 @@ Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] @@ -138,7 +137,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (13) Filter [codegen id : 4] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (ss_list_price#3 <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (ss_coupon_amt#4 <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (ss_wholesale_cost#2 <= 51.00)))) (14) Project [codegen id : 4] Output [1]: [ss_list_price#3] @@ -191,7 +190,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] ReadSchema: struct (24) ColumnarToRow [codegen id : 7] @@ -199,7 +198,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (25) Filter [codegen id : 7] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (ss_list_price#3 <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (ss_coupon_amt#4 <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (ss_wholesale_cost#2 <= 99.00)))) (26) Project [codegen id : 7] Output [1]: [ss_list_price#3] @@ -252,7 +251,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] ReadSchema: struct (36) ColumnarToRow [codegen id : 10] @@ -260,7 +259,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (37) Filter [codegen id : 10] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (ss_list_price#3 <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (ss_coupon_amt#4 <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (ss_wholesale_cost#2 <= 58.00)))) (38) Project [codegen id : 10] Output [1]: [ss_list_price#3] @@ -313,7 +312,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] ReadSchema: struct (48) ColumnarToRow [codegen id : 13] @@ -321,7 +320,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (49) Filter [codegen id : 13] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (ss_list_price#3 <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (ss_coupon_amt#4 <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (ss_wholesale_cost#2 <= 37.00)))) (50) Project [codegen id : 13] Output [1]: [ss_list_price#3] @@ -374,7 +373,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] ReadSchema: struct (60) ColumnarToRow [codegen id : 16] @@ -382,7 +381,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (61) Filter [codegen id : 16] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (ss_list_price#3 <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (ss_coupon_amt#4 <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (ss_wholesale_cost#2 <= 27.00)))) (62) Project [codegen id : 16] Output [1]: [ss_list_price#3] @@ -431,7 +430,3 @@ Arguments: IdentityBroadcastMode, [id=#81] (70) BroadcastNestedLoopJoin Join condition: None -(71) CollectLimit -Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt index d896002b0965d..77afa321d3ee4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt @@ -1,107 +1,106 @@ -CollectLimit +BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - WholeStageCodegen (3) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] - InputAdapter - Exchange #1 - WholeStageCodegen (2) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - InputAdapter - Exchange [ss_list_price] #2 - WholeStageCodegen (1) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #3 - WholeStageCodegen (6) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] - InputAdapter - Exchange #4 - WholeStageCodegen (5) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - InputAdapter - Exchange [ss_list_price] #5 - WholeStageCodegen (4) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #6 - WholeStageCodegen (9) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + WholeStageCodegen (3) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastExchange #3 + WholeStageCodegen (6) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] InputAdapter - Exchange #7 - WholeStageCodegen (8) + Exchange #4 + WholeStageCodegen (5) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #8 - WholeStageCodegen (7) + Exchange [ss_list_price] #5 + WholeStageCodegen (4) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #9 - WholeStageCodegen (12) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + BroadcastExchange #6 + WholeStageCodegen (9) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] InputAdapter - Exchange #10 - WholeStageCodegen (11) + Exchange #7 + WholeStageCodegen (8) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #11 - WholeStageCodegen (10) + Exchange [ss_list_price] #8 + WholeStageCodegen (7) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #12 - WholeStageCodegen (15) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + BroadcastExchange #9 + WholeStageCodegen (12) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] InputAdapter - Exchange #13 - WholeStageCodegen (14) + Exchange #10 + WholeStageCodegen (11) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #14 - WholeStageCodegen (13) + Exchange [ss_list_price] #11 + WholeStageCodegen (10) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #15 - WholeStageCodegen (18) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + BroadcastExchange #12 + WholeStageCodegen (15) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] InputAdapter - Exchange #16 - WholeStageCodegen (17) + Exchange #13 + WholeStageCodegen (14) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #17 - WholeStageCodegen (16) + Exchange [ss_list_price] #14 + WholeStageCodegen (13) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastExchange #15 + WholeStageCodegen (18) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (17) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt index 9788040bbe6de..a0f029c9b9325 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -1,82 +1,81 @@ == Physical Plan == -CollectLimit (71) -+- BroadcastNestedLoopJoin Inner BuildRight (70) - :- BroadcastNestedLoopJoin Inner BuildRight (58) - : :- BroadcastNestedLoopJoin Inner BuildRight (46) - : : :- BroadcastNestedLoopJoin Inner BuildRight (34) - : : : :- BroadcastNestedLoopJoin Inner BuildRight (22) - : : : : :- * HashAggregate (10) - : : : : : +- Exchange (9) - : : : : : +- * HashAggregate (8) - : : : : : +- * HashAggregate (7) - : : : : : +- Exchange (6) - : : : : : +- * HashAggregate (5) - : : : : : +- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (21) - : : : : +- * HashAggregate (20) - : : : : +- Exchange (19) - : : : : +- * HashAggregate (18) - : : : : +- * HashAggregate (17) - : : : : +- Exchange (16) - : : : : +- * HashAggregate (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store_sales (11) - : : : +- BroadcastExchange (33) - : : : +- * HashAggregate (32) - : : : +- Exchange (31) - : : : +- * HashAggregate (30) - : : : +- * HashAggregate (29) - : : : +- Exchange (28) - : : : +- * HashAggregate (27) - : : : +- * Project (26) - : : : +- * Filter (25) - : : : +- * ColumnarToRow (24) - : : : +- Scan parquet default.store_sales (23) - : : +- BroadcastExchange (45) - : : +- * HashAggregate (44) - : : +- Exchange (43) - : : +- * HashAggregate (42) - : : +- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- * Project (38) - : : +- * Filter (37) - : : +- * ColumnarToRow (36) - : : +- Scan parquet default.store_sales (35) - : +- BroadcastExchange (57) - : +- * HashAggregate (56) - : +- Exchange (55) - : +- * HashAggregate (54) - : +- * HashAggregate (53) - : +- Exchange (52) - : +- * HashAggregate (51) - : +- * Project (50) - : +- * Filter (49) - : +- * ColumnarToRow (48) - : +- Scan parquet default.store_sales (47) - +- BroadcastExchange (69) - +- * HashAggregate (68) - +- Exchange (67) - +- * HashAggregate (66) - +- * HashAggregate (65) - +- Exchange (64) - +- * HashAggregate (63) - +- * Project (62) - +- * Filter (61) - +- * ColumnarToRow (60) - +- Scan parquet default.store_sales (59) +BroadcastNestedLoopJoin Inner BuildRight (70) +:- BroadcastNestedLoopJoin Inner BuildRight (58) +: :- BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- Exchange (6) +: : : : : +- * HashAggregate (5) +: : : : : +- * Project (4) +: : : : : +- * Filter (3) +: : : : : +- * ColumnarToRow (2) +: : : : : +- Scan parquet default.store_sales (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- Exchange (16) +: : : : +- * HashAggregate (15) +: : : : +- * Project (14) +: : : : +- * Filter (13) +: : : : +- * ColumnarToRow (12) +: : : : +- Scan parquet default.store_sales (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- Exchange (28) +: : : +- * HashAggregate (27) +: : : +- * Project (26) +: : : +- * Filter (25) +: : : +- * ColumnarToRow (24) +: : : +- Scan parquet default.store_sales (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- Exchange (40) +: : +- * HashAggregate (39) +: : +- * Project (38) +: : +- * Filter (37) +: : +- * ColumnarToRow (36) +: : +- Scan parquet default.store_sales (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- Exchange (52) +: +- * HashAggregate (51) +: +- * Project (50) +: +- * Filter (49) +: +- * ColumnarToRow (48) +: +- Scan parquet default.store_sales (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet default.store_sales (59) (1) Scan parquet default.store_sales Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))] ReadSchema: struct (2) ColumnarToRow [codegen id : 1] @@ -84,7 +83,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (3) Filter [codegen id : 1] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) (4) Project [codegen id : 1] Output [1]: [ss_list_price#3] @@ -130,7 +129,7 @@ Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] @@ -138,7 +137,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (13) Filter [codegen id : 4] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (ss_list_price#3 <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (ss_coupon_amt#4 <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (ss_wholesale_cost#2 <= 51.00)))) (14) Project [codegen id : 4] Output [1]: [ss_list_price#3] @@ -191,7 +190,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] ReadSchema: struct (24) ColumnarToRow [codegen id : 7] @@ -199,7 +198,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (25) Filter [codegen id : 7] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (ss_list_price#3 <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (ss_coupon_amt#4 <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (ss_wholesale_cost#2 <= 99.00)))) (26) Project [codegen id : 7] Output [1]: [ss_list_price#3] @@ -252,7 +251,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] ReadSchema: struct (36) ColumnarToRow [codegen id : 10] @@ -260,7 +259,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (37) Filter [codegen id : 10] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (ss_list_price#3 <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (ss_coupon_amt#4 <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (ss_wholesale_cost#2 <= 58.00)))) (38) Project [codegen id : 10] Output [1]: [ss_list_price#3] @@ -313,7 +312,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] ReadSchema: struct (48) ColumnarToRow [codegen id : 13] @@ -321,7 +320,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (49) Filter [codegen id : 13] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (ss_list_price#3 <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (ss_coupon_amt#4 <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (ss_wholesale_cost#2 <= 37.00)))) (50) Project [codegen id : 13] Output [1]: [ss_list_price#3] @@ -374,7 +373,7 @@ Join condition: None Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] ReadSchema: struct (60) ColumnarToRow [codegen id : 16] @@ -382,7 +381,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4 (61) Filter [codegen id : 16] Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (ss_list_price#3 <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (ss_coupon_amt#4 <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (ss_wholesale_cost#2 <= 27.00)))) (62) Project [codegen id : 16] Output [1]: [ss_list_price#3] @@ -431,7 +430,3 @@ Arguments: IdentityBroadcastMode, [id=#81] (70) BroadcastNestedLoopJoin Join condition: None -(71) CollectLimit -Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt index d896002b0965d..77afa321d3ee4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -1,107 +1,106 @@ -CollectLimit +BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - WholeStageCodegen (3) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] - InputAdapter - Exchange #1 - WholeStageCodegen (2) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - InputAdapter - Exchange [ss_list_price] #2 - WholeStageCodegen (1) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #3 - WholeStageCodegen (6) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] - InputAdapter - Exchange #4 - WholeStageCodegen (5) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - InputAdapter - Exchange [ss_list_price] #5 - WholeStageCodegen (4) - HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] - Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #6 - WholeStageCodegen (9) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + WholeStageCodegen (3) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastExchange #3 + WholeStageCodegen (6) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] InputAdapter - Exchange #7 - WholeStageCodegen (8) + Exchange #4 + WholeStageCodegen (5) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #8 - WholeStageCodegen (7) + Exchange [ss_list_price] #5 + WholeStageCodegen (4) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #9 - WholeStageCodegen (12) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + BroadcastExchange #6 + WholeStageCodegen (9) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] InputAdapter - Exchange #10 - WholeStageCodegen (11) + Exchange #7 + WholeStageCodegen (8) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #11 - WholeStageCodegen (10) + Exchange [ss_list_price] #8 + WholeStageCodegen (7) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #12 - WholeStageCodegen (15) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + BroadcastExchange #9 + WholeStageCodegen (12) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] InputAdapter - Exchange #13 - WholeStageCodegen (14) + Exchange #10 + WholeStageCodegen (11) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #14 - WholeStageCodegen (13) + Exchange [ss_list_price] #11 + WholeStageCodegen (10) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastExchange #15 - WholeStageCodegen (18) - HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + BroadcastExchange #12 + WholeStageCodegen (15) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] InputAdapter - Exchange #16 - WholeStageCodegen (17) + Exchange #13 + WholeStageCodegen (14) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter - Exchange [ss_list_price] #17 - WholeStageCodegen (16) + Exchange [ss_list_price] #14 + WholeStageCodegen (13) HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] ColumnarToRow InputAdapter Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastExchange #15 + WholeStageCodegen (18) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (17) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt index 58a60763b2b57..5574e5b16c578 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt @@ -1,73 +1,74 @@ == Physical Plan == -TakeOrderedAndProject (69) -+- * Project (68) - +- BroadcastNestedLoopJoin Inner BuildRight (67) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (31) - : : +- * BroadcastHashJoin Inner BuildRight (30) - : : :- * Project (24) - : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : :- * Project (17) - : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.date_dim (4) - : : : : +- BroadcastExchange (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.item (11) - : : : +- BroadcastExchange (22) - : : : +- * Project (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.promotion (18) - : : +- BroadcastExchange (29) - : : +- * Project (28) - : : +- * Filter (27) - : : +- * ColumnarToRow (26) - : : +- Scan parquet default.store (25) - : +- BroadcastExchange (42) - : +- * Project (41) - : +- * BroadcastHashJoin Inner BuildRight (40) - : :- * Filter (34) - : : +- * ColumnarToRow (33) - : : +- Scan parquet default.customer (32) - : +- BroadcastExchange (39) - : +- * Project (38) - : +- * Filter (37) - : +- * ColumnarToRow (36) - : +- Scan parquet default.customer_address (35) - +- BroadcastExchange (66) - +- * HashAggregate (65) - +- Exchange (64) - +- * HashAggregate (63) - +- * Project (62) - +- * BroadcastHashJoin Inner BuildRight (61) - :- * Project (59) - : +- * BroadcastHashJoin Inner BuildRight (58) - : :- * Project (56) - : : +- * BroadcastHashJoin Inner BuildRight (55) - : : :- * Project (53) - : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : :- * Filter (50) - : : : : +- * ColumnarToRow (49) - : : : : +- Scan parquet default.store_sales (48) - : : : +- ReusedExchange (51) - : : +- ReusedExchange (54) - : +- ReusedExchange (57) - +- ReusedExchange (60) +* Sort (70) ++- Exchange (69) + +- * Project (68) + +- BroadcastNestedLoopJoin Inner BuildRight (67) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.item (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.promotion (18) + : : +- BroadcastExchange (29) + : : +- * Project (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.store (25) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.customer (32) + : +- BroadcastExchange (39) + : +- * Project (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.customer_address (35) + +- BroadcastExchange (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * BroadcastHashJoin Inner BuildRight (61) + :- * Project (59) + : +- * BroadcastHashJoin Inner BuildRight (58) + : :- * Project (56) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (50) + : : : : +- * ColumnarToRow (49) + : : : : +- Scan parquet default.store_sales (48) + : : : +- ReusedExchange (51) + : : +- ReusedExchange (54) + : +- ReusedExchange (57) + +- ReusedExchange (60) (1) Scan parquet default.store_sales @@ -375,7 +376,11 @@ Join condition: None Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] Input [2]: [promotions#32, total#37] -(69) TakeOrderedAndProject +(69) Exchange Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] -Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: rangepartitioning(promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST, 5), true, [id=#40] + +(70) Sort [codegen id : 17] +Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt index 87f2b3ae03746..1ebad2d825be6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt @@ -1,101 +1,104 @@ -TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))] - WholeStageCodegen (16) - Project [promotions,total] - InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen (8) - HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] +WholeStageCodegen (17) + Sort [promotions,total] + InputAdapter + Exchange [promotions,total] #1 + WholeStageCodegen (16) + Project [promotions,total] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (8) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + InputAdapter + Exchange #2 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [c_customer_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_customer_sk,c_current_addr_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] + BroadcastExchange #8 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk] - Filter [i_category,i_item_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + BroadcastExchange #9 + WholeStageCodegen (15) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (14) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_category] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (3) - Project [p_promo_sk] - Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] - ColumnarToRow + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (4) - Project [s_store_sk] - Filter [s_gmt_offset,s_store_sk] - ColumnarToRow + ReusedExchange [d_date_sk] #3 InputAdapter - Scan parquet default.store [s_store_sk,s_gmt_offset] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (6) - Project [c_customer_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (5) - Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] - BroadcastExchange #8 - WholeStageCodegen (15) - HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] - InputAdapter - Exchange #9 - WholeStageCodegen (14) - HashAggregate [ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] - InputAdapter - ReusedExchange [d_date_sk] #2 - InputAdapter - ReusedExchange [i_item_sk] #3 - InputAdapter - ReusedExchange [s_store_sk] #5 - InputAdapter - ReusedExchange [c_customer_sk] #6 + ReusedExchange [i_item_sk] #4 + InputAdapter + ReusedExchange [s_store_sk] #6 + InputAdapter + ReusedExchange [c_customer_sk] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt index f56f48726c4ad..8025461181031 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt @@ -1,76 +1,77 @@ == Physical Plan == -TakeOrderedAndProject (72) -+- * Project (71) - +- BroadcastNestedLoopJoin Inner BuildRight (70) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) - : : :- * Project (30) - : : : +- * BroadcastHashJoin Inner BuildRight (29) - : : : :- * Project (24) - : : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : : :- * Project (17) - : : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : : :- * Project (10) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : +- BroadcastExchange (8) - : : : : : : +- * Project (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.store (4) - : : : : : +- BroadcastExchange (15) - : : : : : +- * Project (14) - : : : : : +- * Filter (13) - : : : : : +- * ColumnarToRow (12) - : : : : : +- Scan parquet default.promotion (11) - : : : : +- BroadcastExchange (22) - : : : : +- * Project (21) - : : : : +- * Filter (20) - : : : : +- * ColumnarToRow (19) - : : : : +- Scan parquet default.date_dim (18) - : : : +- BroadcastExchange (28) - : : : +- * Filter (27) - : : : +- * ColumnarToRow (26) - : : : +- Scan parquet default.customer (25) - : : +- BroadcastExchange (35) - : : +- * Project (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer_address (31) - : +- BroadcastExchange (42) - : +- * Project (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.item (38) - +- BroadcastExchange (69) - +- * HashAggregate (68) - +- Exchange (67) - +- * HashAggregate (66) - +- * Project (65) - +- * BroadcastHashJoin Inner BuildRight (64) - :- * Project (62) - : +- * BroadcastHashJoin Inner BuildRight (61) - : :- * Project (59) - : : +- * BroadcastHashJoin Inner BuildRight (58) - : : :- * Project (56) - : : : +- * BroadcastHashJoin Inner BuildRight (55) - : : : :- * Project (53) - : : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : : :- * Filter (50) - : : : : : +- * ColumnarToRow (49) - : : : : : +- Scan parquet default.store_sales (48) - : : : : +- ReusedExchange (51) - : : : +- ReusedExchange (54) - : : +- ReusedExchange (57) - : +- ReusedExchange (60) - +- ReusedExchange (63) +* Sort (73) ++- Exchange (72) + +- * Project (71) + +- BroadcastNestedLoopJoin Inner BuildRight (70) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (30) + : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : :- * Project (24) + : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.promotion (11) + : : : : +- BroadcastExchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.date_dim (18) + : : : +- BroadcastExchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet default.customer (25) + : : +- BroadcastExchange (35) + : : +- * Project (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer_address (31) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.item (38) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Project (56) + : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : :- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet default.store_sales (48) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (54) + : : +- ReusedExchange (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) (1) Scan parquet default.store_sales @@ -390,7 +391,11 @@ Join condition: None Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] Input [2]: [promotions#32, total#37] -(72) TakeOrderedAndProject +(72) Exchange Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] -Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: rangepartitioning(promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST, 5), true, [id=#40] + +(73) Sort [codegen id : 17] +Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt index da75651673cfe..3b476544403e0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt @@ -1,105 +1,108 @@ -TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))] - WholeStageCodegen (16) - Project [promotions,total] - InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen (8) - HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] +WholeStageCodegen (17) + Sort [promotions,total] + InputAdapter + Exchange [promotions,total] #1 + WholeStageCodegen (16) + Project [promotions,total] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (8) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + InputAdapter + Exchange #2 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_gmt_offset] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Filter [c_customer_sk,c_current_addr_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [s_store_sk] - Filter [s_gmt_offset,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_gmt_offset] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (2) - Project [p_promo_sk] - Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_category] + BroadcastExchange #9 + WholeStageCodegen (15) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (14) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] ColumnarToRow InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (4) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (5) - Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow + ReusedExchange [s_store_sk] #3 + InputAdapter + ReusedExchange [d_date_sk] #5 InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (6) - Project [i_item_sk] - Filter [i_category,i_item_sk] - ColumnarToRow + ReusedExchange [c_customer_sk,c_current_addr_sk] #6 InputAdapter - Scan parquet default.item [i_item_sk,i_category] - BroadcastExchange #8 - WholeStageCodegen (15) - HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] - InputAdapter - Exchange #9 - WholeStageCodegen (14) - HashAggregate [ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] - InputAdapter - ReusedExchange [s_store_sk] #2 - InputAdapter - ReusedExchange [d_date_sk] #4 - InputAdapter - ReusedExchange [c_customer_sk,c_current_addr_sk] #5 - InputAdapter - ReusedExchange [ca_address_sk] #6 - InputAdapter - ReusedExchange [i_item_sk] #7 + ReusedExchange [ca_address_sk] #7 + InputAdapter + ReusedExchange [i_item_sk] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt index 3f787bfb99b67..e279902a125c5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt @@ -1,54 +1,55 @@ == Physical Plan == -TakeOrderedAndProject (50) -+- * Project (49) - +- BroadcastNestedLoopJoin Inner BuildRight (48) - :- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.web_page (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.household_demographics (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.time_dim (18) - +- BroadcastExchange (47) - +- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (33) - : : +- * BroadcastHashJoin Inner BuildRight (32) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.web_sales (28) - : : +- ReusedExchange (31) - : +- ReusedExchange (34) - +- BroadcastExchange (41) - +- * Project (40) - +- * Filter (39) - +- * ColumnarToRow (38) - +- Scan parquet default.time_dim (37) +* Sort (51) ++- Exchange (50) + +- * Project (49) + +- BroadcastNestedLoopJoin Inner BuildRight (48) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.web_page (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.household_demographics (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.time_dim (18) + +- BroadcastExchange (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_sales (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.time_dim (37) (1) Scan parquet default.web_sales @@ -274,7 +275,11 @@ Join condition: None Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] Input [2]: [amc#17, pmc#23] -(50) TakeOrderedAndProject +(50) Exchange Input [1]: [am_pm_ratio#25] -Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] +Arguments: rangepartitioning(am_pm_ratio#25 ASC NULLS FIRST, 5), true, [id=#26] + +(51) Sort [codegen id : 12] +Input [1]: [am_pm_ratio#25] +Arguments: [am_pm_ratio#25 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt index 1fe0442eab13f..5b33a90675699 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt @@ -1,74 +1,77 @@ -TakeOrderedAndProject [am_pm_ratio] - WholeStageCodegen (11) - Project [amc,pmc] - InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen (5) - HashAggregate [count] [count(1),amc,count] - InputAdapter - Exchange #1 - WholeStageCodegen (4) - HashAggregate [count,count] - Project - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [ws_sold_time_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Project [ws_sold_time_sk,ws_ship_hdemo_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [wp_web_page_sk] - Filter [wp_char_count,wp_web_page_sk] +WholeStageCodegen (12) + Sort [am_pm_ratio] + InputAdapter + Exchange [am_pm_ratio] #1 + WholeStageCodegen (11) + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [count(1),amc,count] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] + BroadcastExchange #6 + WholeStageCodegen (10) + HashAggregate [count] [count(1),pmc,count] + InputAdapter + Exchange #7 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] ColumnarToRow InputAdapter - Scan parquet default.web_page [wp_web_page_sk,wp_char_count] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (2) - Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] - ColumnarToRow + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (3) - Project [t_time_sk] - Filter [t_hour,t_time_sk] - ColumnarToRow + ReusedExchange [wp_web_page_sk] #3 InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] - BroadcastExchange #5 - WholeStageCodegen (10) - HashAggregate [count] [count(1),pmc,count] - InputAdapter - Exchange #6 - WholeStageCodegen (9) - HashAggregate [count,count] - Project - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [ws_sold_time_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Project [ws_sold_time_sk,ws_ship_hdemo_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] - InputAdapter - ReusedExchange [wp_web_page_sk] #2 - InputAdapter - ReusedExchange [hd_demo_sk] #3 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (8) - Project [t_time_sk] - Filter [t_hour,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] + ReusedExchange [hd_demo_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt index 550bf89ce3b99..7a21808803aaa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt @@ -1,54 +1,55 @@ == Physical Plan == -TakeOrderedAndProject (50) -+- * Project (49) - +- BroadcastNestedLoopJoin Inner BuildRight (48) - :- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.household_demographics (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.time_dim (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.web_page (18) - +- BroadcastExchange (47) - +- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (40) - : +- * BroadcastHashJoin Inner BuildRight (39) - : :- * Project (33) - : : +- * BroadcastHashJoin Inner BuildRight (32) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.web_sales (28) - : : +- ReusedExchange (31) - : +- BroadcastExchange (38) - : +- * Project (37) - : +- * Filter (36) - : +- * ColumnarToRow (35) - : +- Scan parquet default.time_dim (34) - +- ReusedExchange (41) +* Sort (51) ++- Exchange (50) + +- * Project (49) + +- BroadcastNestedLoopJoin Inner BuildRight (48) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.household_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.time_dim (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.web_page (18) + +- BroadcastExchange (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_sales (28) + : : +- ReusedExchange (31) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.time_dim (34) + +- ReusedExchange (41) (1) Scan parquet default.web_sales @@ -274,7 +275,11 @@ Join condition: None Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] Input [2]: [amc#17, pmc#23] -(50) TakeOrderedAndProject +(50) Exchange Input [1]: [am_pm_ratio#25] -Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] +Arguments: rangepartitioning(am_pm_ratio#25 ASC NULLS FIRST, 5), true, [id=#26] + +(51) Sort [codegen id : 12] +Input [1]: [am_pm_ratio#25] +Arguments: [am_pm_ratio#25 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt index 121d84d9dde2f..bf3cfc9cbc037 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt @@ -1,74 +1,77 @@ -TakeOrderedAndProject [am_pm_ratio] - WholeStageCodegen (11) - Project [amc,pmc] - InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen (5) - HashAggregate [count] [count(1),amc,count] - InputAdapter - Exchange #1 - WholeStageCodegen (4) - HashAggregate [count,count] - Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (2) - Project [t_time_sk] - Filter [t_hour,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (3) - Project [wp_web_page_sk] - Filter [wp_char_count,wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk,wp_char_count] - BroadcastExchange #5 - WholeStageCodegen (10) - HashAggregate [count] [count(1),pmc,count] - InputAdapter - Exchange #6 - WholeStageCodegen (9) - HashAggregate [count,count] - Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow +WholeStageCodegen (12) + Sort [am_pm_ratio] + InputAdapter + Exchange [am_pm_ratio] #1 + WholeStageCodegen (11) + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (5) + HashAggregate [count] [count(1),amc,count] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + BroadcastExchange #3 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] InputAdapter - ReusedExchange [hd_demo_sk] #2 + BroadcastExchange #4 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Project [t_time_sk] - Filter [t_hour,t_time_sk] + BroadcastExchange #5 + WholeStageCodegen (3) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] ColumnarToRow InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] - InputAdapter - ReusedExchange [wp_web_page_sk] #4 + Scan parquet default.web_page [wp_web_page_sk,wp_char_count] + BroadcastExchange #6 + WholeStageCodegen (10) + HashAggregate [count] [count(1),pmc,count] + InputAdapter + Exchange #7 + WholeStageCodegen (9) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #3 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] + InputAdapter + ReusedExchange [wp_web_page_sk] #5 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index ed284df10aced..440fe997ae133 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1130,7 +1130,7 @@ class StreamSuite extends StreamTest { verifyLocalLimit(inputDF.dropDuplicates().repartition(1).limit(1), expectStreamingLimit = false) // Should be LocalLimitExec in the first place, not from optimization of StreamingLocalLimitExec - val staticDF = spark.range(1).toDF("value").limit(1) + val staticDF = spark.range(2).toDF("value").limit(1) verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false) verifyLocalLimit( From cc23581e2645c91fa8d6e6c81dc87b4221718bb1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 23 Dec 2020 12:19:07 +0000 Subject: [PATCH 0862/1009] [SPARK-33858][SQL][TESTS] Unify v1 and v2 ALTER TABLE .. RENAME PARTITION tests ### What changes were proposed in this pull request? 1. Move the `ALTER TABLE .. RENAME PARTITION` parsing tests to `AlterTableRenamePartitionParserSuite` 2. Place the v1 tests for `ALTER TABLE .. RENAME PARTITION` from `DDLSuite` to `v1.AlterTableRenamePartitionSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to `v2.AlterTableRenamePartitionSuite`, so, the tests will run for V1, Hive V1 and V2 DS. ### Why are the changes needed? - The unification will allow to run common `ALTER TABLE .. RENAME PARTITION` tests for both DSv1 and Hive DSv1, DSv2 - We can detect missing features and differences between DSv1 and DSv2 implementations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionParserSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite" ``` Closes #30863 from MaxGekk/unify-rename-partition-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/DDLParserSuite.scala | 28 +-- .../AlterTablePartitionV2SQLSuite.scala | 19 -- ...AlterTableRenamePartitionParserSuite.scala | 51 ++++++ .../AlterTableRenamePartitionSuiteBase.scala | 24 +++ .../command/DDLCommandTestUtils.scala | 18 ++ .../sql/execution/command/DDLSuite.scala | 57 +----- .../command/ShowPartitionsSuiteBase.scala | 18 -- .../v1/AlterTableRenamePartitionSuite.scala | 169 ++++++++++++++++++ .../v2/AlterTableRenamePartitionSuite.scala | 37 ++++ .../sql/hive/execution/HiveDDLSuite.scala | 4 - .../AlterTableRenamePartitionSuite.scala | 24 +++ 11 files changed, 325 insertions(+), 124 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index d408019053fb7..4612e72a54510 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import java.util.Locale import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -2073,32 +2073,6 @@ class DDLParserSuite extends AnalysisTest { """.stripMargin) } - test("alter table: rename partition") { - val sql1 = - """ - |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') - |RENAME TO PARTITION (dt='2008-09-09', country='uk') - """.stripMargin - val parsed1 = parsePlan(sql1) - val expected1 = AlterTableRenamePartition( - UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"), - UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), - Map("dt" -> "2008-09-09", "country" -> "uk")) - comparePlans(parsed1, expected1) - - val sql2 = - """ - |ALTER TABLE a.b.c PARTITION (ds='2017-06-10') - |RENAME TO PARTITION (ds='2018-06-10') - """.stripMargin - val parsed2 = parsePlan(sql2) - val expected2 = AlterTableRenamePartition( - UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"), - UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")), - Map("ds" -> "2018-06-10")) - comparePlans(parsed2, expected2) - } - test("show current namespace") { comparePlans( parsePlan("SHOW CURRENT NAMESPACE"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala index bdf2fa5b7ac96..f8d4a0970ff89 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala @@ -31,23 +31,4 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase { "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.")) } } - - test("ALTER TABLE RENAME PARTITION") { - val nonPartTbl = "testcat.ns1.ns2.tbl" - val partTbl = "testpart.ns1.ns2.tbl" - withTable(nonPartTbl, partTbl) { - spark.sql(s"CREATE TABLE $nonPartTbl (id bigint, data string) USING foo PARTITIONED BY (id)") - val e1 = intercept[AnalysisException] { - sql(s"ALTER TABLE $nonPartTbl PARTITION (id=1) RENAME TO PARTITION (id=2)") - } - assert(e1.message.contains(s"Table $nonPartTbl can not alter partitions")) - - spark.sql(s"CREATE TABLE $partTbl (id bigint, data string) USING foo PARTITIONED BY (id)") - val e2 = intercept[AnalysisException] { - sql(s"ALTER TABLE $partTbl PARTITION (id=1) RENAME TO PARTITION (id=2)") - } - assert(e2.message.contains( - "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.")) - } - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala new file mode 100644 index 0000000000000..db6506c85bcec --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.plans.logical.AlterTableRenamePartition +import org.apache.spark.sql.test.SharedSparkSession + +class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSparkSession { + test("rename a partition with single part") { + val sql = """ + |ALTER TABLE a.b.c PARTITION (ds='2017-06-10') + |RENAME TO PARTITION (ds='2018-06-10') + """.stripMargin + val parsed = parsePlan(sql) + val expected = AlterTableRenamePartition( + UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"), + UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")), + Map("ds" -> "2018-06-10")) + comparePlans(parsed, expected) + } + + test("rename a partition with multi parts") { + val sql = """ + |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') + |RENAME TO PARTITION (dt='2008-09-09', country='uk') + """.stripMargin + val parsed = parsePlan(sql) + val expected = AlterTableRenamePartition( + UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"), + UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), + Map("dt" -> "2008-09-09", "country" -> "uk")) + comparePlans(parsed, expected) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala new file mode 100644 index 0000000000000..a29cf6cabba49 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.QueryTest + +trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "ALTER TABLE .. RENAME PARTITION" +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala index a4129fe1ffee5..6ea2fea41f284 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala @@ -57,4 +57,22 @@ trait DDLCommandTestUtils extends SQLTestUtils { .map(PartitioningUtils.parsePathFragment) assert(partitions === expected.toSet) } + + protected def createWideTable(table: String): Unit = { + sql(s""" + |CREATE TABLE $table ( + | price int, qty int, + | year int, month int, hour int, minute int, sec int, extra int) + |$defaultUsing + |PARTITIONED BY (year, month, hour, minute, sec, extra) + |""".stripMargin) + sql(s""" + |INSERT INTO $table + |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 + |""".stripMargin) + sql(s""" + |ALTER TABLE $table + |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) + |""".stripMargin) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 49184d0a2e0d0..4e2b67e532933 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.config import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, PartitionAlreadyExistsException, TempTableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER @@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { testChangeColumn(isDatasourceTable = true) } - test("alter table: rename partition (datasource table)") { - testRenamePartitions(isDatasourceTable = true) - } - test("the qualified path of a database is stored in the catalog") { val catalog = spark.sessionState.catalog @@ -1592,57 +1588,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = { - if (!isUsingHiveMetastore) { - assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") - } - val catalog = spark.sessionState.catalog - val tableIdent = TableIdentifier("tab1", Some("dbx")) - val part1 = Map("a" -> "1", "b" -> "q") - val part2 = Map("a" -> "2", "b" -> "c") - val part3 = Map("a" -> "3", "b" -> "p") - createDatabase(catalog, "dbx") - createTable(catalog, tableIdent, isDatasourceTable) - createTablePartition(catalog, part1, tableIdent) - createTablePartition(catalog, part2, tableIdent) - createTablePartition(catalog, part3, tableIdent) - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3)) - - // basic rename partition - sql("ALTER TABLE dbx.tab1 PARTITION (a='1', b='q') RENAME TO PARTITION (a='100', b='p')") - sql("ALTER TABLE dbx.tab1 PARTITION (a='2', b='c') RENAME TO PARTITION (a='20', b='c')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(Map("a" -> "100", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) - - // rename without explicitly specifying database - catalog.setCurrentDatabase("dbx") - sql("ALTER TABLE tab1 PARTITION (a='100', b='p') RENAME TO PARTITION (a='10', b='p')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(Map("a" -> "10", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) - - // table to alter does not exist - val e = intercept[AnalysisException] { - sql("ALTER TABLE does_not_exist PARTITION (c='3') RENAME TO PARTITION (c='333')") - } - assert(e.getMessage.contains("Table not found: does_not_exist")) - - // partition to rename does not exist - intercept[NoSuchPartitionException] { - sql("ALTER TABLE tab1 PARTITION (a='not_found', b='1') RENAME TO PARTITION (a='1', b='2')") - } - - // partition spec in RENAME PARTITION should be case insensitive by default - sql("ALTER TABLE tab1 PARTITION (A='10', B='p') RENAME TO PARTITION (A='1', B='p')") - assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == - Set(Map("a" -> "1", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) - - // target partition already exists - val errMsg = intercept[PartitionAlreadyExistsException] { - sql("ALTER TABLE tab1 PARTITION (a='1', b='p') RENAME TO PARTITION (a='20', b='c')") - }.getMessage - assert(errMsg.contains("Partition already exists")) - } - protected def testChangeColumn(isDatasourceTable: Boolean): Unit = { if (!isUsingHiveMetastore) { assert(isDatasourceTable, "InMemoryCatalog only supports data source tables") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index d66c6191fbfa2..83808ab82d3b2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -42,24 +42,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils { sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)") } - protected def createWideTable(table: String): Unit = { - sql(s""" - |CREATE TABLE $table ( - | price int, qty int, - | year int, month int, hour int, minute int, sec int, extra int) - |$defaultUsing - |PARTITIONED BY (year, month, hour, minute, sec, extra) - |""".stripMargin) - sql(s""" - |INSERT INTO $table - |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 - |""".stripMargin) - sql(s""" - |ALTER TABLE $table - |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) - |""".stripMargin) - } - test("show partitions of non-partitioned table") { withNamespaceAndTable("ns", "not_partitioned_table") { t => sql(s"CREATE TABLE $t (col1 int) $defaultUsing") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala new file mode 100644 index 0000000000000..89d5e5f4635d0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException} +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf + +trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartitionSuiteBase { + protected def createSinglePartTable(t: String): Unit = { + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'") + } + + test("rename without explicitly specifying database") { + val t = "tbl" + withTable(t) { + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + + sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + checkPartitions(t, Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) + } + } + + test("table to alter does not exist") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("Table not found")) + } + } + + test("partition to rename does not exist") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + val errMsg = intercept[NoSuchPartitionException] { + sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("Partition not found in table")) + } + } + + test("target partition exists") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + val errMsg = intercept[PartitionAlreadyExistsException] { + sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("Partition already exists")) + } + } + + test("single part partition") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + + sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + checkPartitions(t, Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) + } + } + + test("multi part partition") { + withNamespaceAndTable("ns", "tbl") { t => + createWideTable(t) + checkPartitions(t, + Map( + "year" -> "2016", + "month" -> "3", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1"), + Map( + "year" -> "2016", + "month" -> "4", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1")) + + sql(s""" + |ALTER TABLE $t + |PARTITION ( + | year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1 + |) RENAME TO PARTITION ( + | year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1 + |)""".stripMargin) + checkPartitions(t, + Map( + "year" -> "2016", + "month" -> "3", + "hour" -> "10", + "minute" -> "10", + "sec" -> "123", + "extra" -> "1"), + Map( + "year" -> "2016", + "month" -> "4", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1")) + checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3)) + } + } + + test("with location") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION 'loc1'") + sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + + sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "3")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Seq(Row(1, "abc"), Row(3, "def"))) + } + } + + test("partition spec in RENAME PARTITION should be case insensitive") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("ID is not a valid partition column")) + checkPartitions(t, Map("id" -> "1")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") + checkPartitions(t, Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) + } + } + } +} + +class AlterTableRenamePartitionSuite + extends AlterTableRenamePartitionSuiteBase + with CommandSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala new file mode 100644 index 0000000000000..026f1dcc33a1a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.command + +class AlterTableRenamePartitionSuite + extends command.AlterTableRenamePartitionSuiteBase + with CommandSuiteBase { + + // TODO(SPARK-33859): Support V2 ALTER TABLE .. RENAME PARTITION + test("single part partition") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables")) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f13c8704f3b5b..b8a37a84735e3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -159,10 +159,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA testChangeColumn(isDatasourceTable = false) } - test("alter table: rename partition") { - testRenamePartitions(isDatasourceTable = false) - } - test("alter datasource table add columns - orc") { testAddColumn("orc") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala new file mode 100644 index 0000000000000..86edab74ab998 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.execution.command.v1 + +class AlterTableRenamePartitionSuite + extends v1.AlterTableRenamePartitionSuiteBase + with CommandSuiteBase From 303df64b466b7734b3c497955d1cca3e34fb663e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 23 Dec 2020 14:34:01 +0000 Subject: [PATCH 0863/1009] [SPARK-33889][SQL] Fix NPE from `SHOW PARTITIONS` on V2 tables ### What changes were proposed in this pull request? At `ShowPartitionsExec.run()`, check that a row returned by `listPartitionIdentifiers()` contains a `null` field, and convert it to `"null"`. ### Why are the changes needed? Because `SHOW PARTITIONS` throws NPE on V2 table with `null` partition values. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Added new UT to `v2.ShowPartitionsSuite`. Closes #30904 from MaxGekk/fix-npe-show-partitions. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../datasources/v2/ShowPartitionsExec.scala | 6 +++--- .../command/v2/ShowPartitionsSuite.scala | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala index 416dce6fa28c6..ac24094f9089e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala @@ -53,9 +53,9 @@ case class ShowPartitionsExec( var i = 0 while (i < len) { val dataType = schema(i).dataType - val partValue = row.get(i, dataType) - val partValueStr = Cast(Literal(partValue, dataType), StringType, Some(timeZoneId)) - .eval().toString + val partValueUTF8String = + Cast(Literal(row.get(i, dataType), dataType), StringType, Some(timeZoneId)).eval() + val partValueStr = if (partValueUTF8String == null) "null" else partValueUTF8String.toString partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr) i += 1 } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index e52c60d0f9a95..ed0a7dff62440 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.{AnalysisException, Row, SaveMode} import org.apache.spark.sql.execution.command class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSuiteBase { @@ -34,4 +34,18 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu "SHOW PARTITIONS cannot run for a table which does not support partitioning")) } } + + test("SPARK-33889: null and empty string as partition values") { + import testImplicits._ + withNamespaceAndTable("ns", "tbl") { t => + val df = Seq((0, ""), (1, null)).toDF("a", "part") + df.write + .partitionBy("part") + .format("parquet") + .mode(SaveMode.Overwrite) + .saveAsTable(t) + + runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil) + } + } } From 7ffcfcf7db57fb62941130e0c7bf61bca08aa758 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 23 Dec 2020 14:35:46 +0000 Subject: [PATCH 0864/1009] [SPARK-33847][SQL] Simplify CaseWhen if elseValue is None ### What changes were proposed in this pull request? 1. Enhance `ReplaceNullWithFalseInPredicate` to replace None of elseValue inside `CaseWhen` with `FalseLiteral` if all branches are `FalseLiteral` . The use case is: ```sql create table t1 using parquet as select id from range(10); explain select id from t1 where (CASE WHEN id = 1 THEN 'a' WHEN id = 3 THEN 'b' end) = 'c'; ``` Before this pr: ``` == Physical Plan == *(1) Filter CASE WHEN (id#1L = 1) THEN false WHEN (id#1L = 3) THEN false END +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [CASE WHEN (id#1L = 1) THEN false WHEN (id#1L = 3) THEN false END], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` After this pr: ``` == Physical Plan == LocalTableScan , [id#1L] ``` 2. Enhance `SimplifyConditionals` if elseValue is None and all outputs are null. ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30852 from wangyum/SPARK-33847. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../ReplaceNullWithFalseInPredicate.scala | 8 +++-- .../sql/catalyst/optimizer/expressions.scala | 4 +++ .../PushFoldableIntoBranchesSuite.scala | 11 +++++++ ...ReplaceNullWithFalseInPredicateSuite.scala | 33 +++++++++++++++++++ .../optimizer/SimplifyConditionalSuite.scala | 8 +++++ ...ullWithFalseInPredicateEndToEndSuite.scala | 21 +++++++++--- 6 files changed, 78 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index 4a71dba663b38..92401131e8b82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -93,8 +93,12 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { val newBranches = cw.branches.map { case (cond, value) => replaceNullWithFalse(cond) -> replaceNullWithFalse(value) } - val newElseValue = cw.elseValue.map(replaceNullWithFalse) - CaseWhen(newBranches, newElseValue) + if (newBranches.forall(_._2 == FalseLiteral) && cw.elseValue.isEmpty) { + FalseLiteral + } else { + val newElseValue = cw.elseValue.map(replaceNullWithFalse) + CaseWhen(newBranches, newElseValue) + } case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType => If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal)) case e if e.dataType == BooleanType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 47b968f6ebdd7..f01df5e5e6768 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -525,6 +525,10 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { } else { e.copy(branches = branches.take(i).map(branch => (branch._1, elseValue))) } + + case e @ CaseWhen(branches, None) + if branches.forall(_._2.semanticEquals(Literal(null, e.dataType))) => + Literal(null, e.dataType) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala index 02307a52ebb89..2d826e7b55a68 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala @@ -258,4 +258,15 @@ class PushFoldableIntoBranchesSuite EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None).cast(StringType), Literal("4")), CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None)) } + + test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") { + Seq(a, LessThan(Rand(1), Literal(0.5))).foreach { condition => + assertEquivalent( + EqualTo(CaseWhen(Seq((condition, Literal.create(null, IntegerType)))), Literal(2)), + Literal.create(null, BooleanType)) + assertEquivalent( + EqualTo(CaseWhen(Seq((condition, Literal("str")))).cast(IntegerType), Literal(2)), + Literal.create(null, BooleanType)) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index 5da71c31e1990..f49e6921fd46a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -380,6 +380,39 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { testProjection(originalExpr = column, expectedExpr = column) } + test("replace None of elseValue inside CaseWhen if all branches are FalseLiteral") { + val allFalseBranches = Seq( + (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral, + (UnresolvedAttribute("i") > Literal(40)) -> FalseLiteral) + val allFalseCond = CaseWhen(allFalseBranches) + + val nonAllFalseBranches = Seq( + (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral, + (UnresolvedAttribute("i") > Literal(40)) -> TrueLiteral) + val nonAllFalseCond = CaseWhen(nonAllFalseBranches, FalseLiteral) + + testFilter(allFalseCond, FalseLiteral) + testJoin(allFalseCond, FalseLiteral) + testDelete(allFalseCond, FalseLiteral) + testUpdate(allFalseCond, FalseLiteral) + + testFilter(nonAllFalseCond, nonAllFalseCond) + testJoin(nonAllFalseCond, nonAllFalseCond) + testDelete(nonAllFalseCond, nonAllFalseCond) + testUpdate(nonAllFalseCond, nonAllFalseCond) + } + + test("replace None of elseValue inside CaseWhen if all branches are null") { + val allNullBranches = Seq( + (UnresolvedAttribute("i") < Literal(10)) -> Literal.create(null, BooleanType), + (UnresolvedAttribute("i") > Literal(40)) -> Literal.create(null, BooleanType)) + val allFalseCond = CaseWhen(allNullBranches) + testFilter(allFalseCond, FalseLiteral) + testJoin(allFalseCond, FalseLiteral) + testDelete(allFalseCond, FalseLiteral) + testUpdate(allFalseCond, FalseLiteral) + } + private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = { test((rel, exp) => rel.where(exp), originalCond, expectedCond) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 328fc107e1c1b..1876be21dea4b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -215,4 +215,12 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral), LessThanOrEqual(Rand(0), UnresolvedAttribute("a"))) } + + test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") { + Seq(GreaterThan('a, 1), GreaterThan(Rand(0), 1)).foreach { condition => + assertEquivalent( + CaseWhen((condition, Literal.create(null, IntegerType)) :: Nil, None), + Literal.create(null, IntegerType)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala index bdbb741f24bc6..739b4052ee90d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala @@ -27,6 +27,12 @@ import org.apache.spark.sql.types.BooleanType class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with SharedSparkSession { import testImplicits._ + private def checkPlanIsEmptyLocalScan(df: DataFrame): Unit = + df.queryExecution.executedPlan match { + case s: LocalTableScanExec => assert(s.rows.isEmpty) + case p => fail(s"$p is not LocalTableScanExec") + } + test("SPARK-25860: Replace Literal(null, _) with FalseLiteral whenever possible") { withTable("t1", "t2") { Seq((1, true), (2, false)).toDF("l", "b").write.saveAsTable("t1") @@ -64,11 +70,6 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared checkAnswer(df1.where("IF(l > 10, false, b OR null)"), Row(1, true)) } - - def checkPlanIsEmptyLocalScan(df: DataFrame): Unit = df.queryExecution.executedPlan match { - case s: LocalTableScanExec => assert(s.rows.isEmpty) - case p => fail(s"$p is not LocalTableScanExec") - } } test("SPARK-26107: Replace Literal(null, _) with FalseLiteral in higher-order functions") { @@ -112,4 +113,14 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared assertNoLiteralNullInPlan(q3) } } + + test("SPARK-33847: replace None of elseValue inside CaseWhen to FalseLiteral") { + withTable("t1") { + Seq((1, 1), (2, 2)).toDF("a", "b").write.saveAsTable("t1") + val t1 = spark.table("t1") + val q1 = t1.filter("(CASE WHEN a > 1 THEN 1 END) = 0") + checkAnswer(q1, Seq.empty) + checkPlanIsEmptyLocalScan(q1) + } + } } From 47d1aa4e93f668774fd0b16c780d3b1f6200bcd8 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 23 Dec 2020 23:43:21 +0900 Subject: [PATCH 0865/1009] [SPARK-33891][DOCS][CORE] Update dynamic allocation related documents ### What changes were proposed in this pull request? This PR aims to update the followings. - Remove the outdated requirement for `spark.shuffle.service.enabled` in `configuration.md` - Dynamic allocation section in `job-scheduling.md` ### Why are the changes needed? To make the document up-to-date. ### Does this PR introduce _any_ user-facing change? No, it's a documentation update. ### How was this patch tested? Manual. **BEFORE** ![Screen Shot 2020-12-23 at 2 22 04 AM](https://user-images.githubusercontent.com/9700541/102986441-ae647f80-44c5-11eb-97a3-87c2d368952a.png) ![Screen Shot 2020-12-23 at 2 22 34 AM](https://user-images.githubusercontent.com/9700541/102986473-bcb29b80-44c5-11eb-8eae-6802001c6dfa.png) **AFTER** ![Screen Shot 2020-12-23 at 2 25 36 AM](https://user-images.githubusercontent.com/9700541/102986767-2df24e80-44c6-11eb-8540-e74856a4c313.png) ![Screen Shot 2020-12-23 at 2 21 13 AM](https://user-images.githubusercontent.com/9700541/102986366-8e34c080-44c5-11eb-8054-1efd07c9458c.png) Closes #30906 from dongjoon-hyun/SPARK-33891. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- docs/configuration.md | 3 +-- docs/job-scheduling.md | 17 +++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 21506e6901263..fe1fc3e47369b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -928,8 +928,7 @@ Apart from these, the following properties are also available, and may be useful false Enables the external shuffle service. This service preserves the shuffle files written by - executors so the executors can be safely removed. This must be enabled if - spark.dynamicAllocation.enabled is "true". The external shuffle service + executors so the executors can be safely removed. The external shuffle service must be set up in order to enable it. See dynamic allocation configuration and setup documentation for more information. diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index 7c7385b325a7f..f2b77cdfcd2c3 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -79,18 +79,19 @@ are no longer used and request them again later when there is demand. This featu useful if multiple applications share resources in your Spark cluster. This feature is disabled by default and available on all coarse-grained cluster managers, i.e. -[standalone mode](spark-standalone.html), [YARN mode](running-on-yarn.html), and -[Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes). +[standalone mode](spark-standalone.html), [YARN mode](running-on-yarn.html), +[Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes) and [K8s mode](running-on-kubernetes.html). + ### Configuration and Setup -There are two requirements for using this feature. First, your application must set -`spark.dynamicAllocation.enabled` to `true`. Second, you must set up an *external shuffle service* -on each worker node in the same cluster and set `spark.shuffle.service.enabled` to true in your -application. The purpose of the external shuffle service is to allow executors to be removed +There are two ways for using this feature. +First, your application must set both `spark.dynamicAllocation.enabled` and `spark.dynamicAllocation.shuffleTracking.enabled` to `true`. +Second, your application must set both `spark.dynamicAllocation.enabled` and `spark.shuffle.service.enabled` to `true` +after you set up an *external shuffle service* on each worker node in the same cluster. +The purpose of the shuffle tracking or the external shuffle service is to allow executors to be removed without deleting shuffle files written by them (more detail described -[below](job-scheduling.html#graceful-decommission-of-executors)). The way to set up this service -varies across cluster managers: +[below](job-scheduling.html#graceful-decommission-of-executors)). While it is simple to enable shuffle tracking, the way to set up the external shuffle service varies across cluster managers: In standalone mode, simply start your workers with `spark.shuffle.service.enabled` set to `true`. From 0677c39009de0830d995da77332f0756c76d6b56 Mon Sep 17 00:00:00 2001 From: Chandni Singh Date: Wed, 23 Dec 2020 12:42:18 -0600 Subject: [PATCH 0866/1009] [SPARK-32916][SHUFFLE][TEST-MAVEN][TEST-HADOOP2.7] Ensure the number of chunks in meta file and index file are equal ### What changes were proposed in this pull request? 1. Fixes for bugs in `RemoteBlockPushResolver` where the number of chunks in meta file and index file are inconsistent due to exceptions while writing to either index file or meta file. This java class was introduced in https://github.com/apache/spark/pull/30062. - If the writing to index file fails, the position of meta file is not reset. This means that the number of chunks in meta file is inconsistent with index file. - During the exception handling while writing to index/meta file, we just set the pointer to the start position. If the files are closed just after this then it doesn't get rid of any the extra bytes written to it. 2. Adds an IOException threshold. If the `RemoteBlockPushResolver` encounters IOExceptions greater than this threshold while updating data/meta/index file of a shuffle partition, then it responds to the client with exception- `IOExceptions exceeded the threshold` so that client can stop pushing data for this shuffle partition. 3. When the update to metadata fails, exception is not propagated back to the client. This results in the increased size of the current chunk. However, with (2) in place, the current chunk will still be of a manageable size. ### Why are the changes needed? This fix is needed for the bugs mentioned above. 1. Moved writing to meta file after index file. This fixes the issue because if there is an exception writing to meta file, then the index file position is not updated. With this change, if there is an exception writing to index file, then none of the files are effectively updated and the same is true vice-versa. 2. Truncating the lengths of data/index/meta files when the partition is finalized. 3. When the IOExceptions have reached the threshold, it is most likely that future blocks will also face the issue. So, it is better to let the clients know so that they can stop pushing the blocks for that partition. 4. When just the meta update fails, client retries pushing the block which was successfully merged to data file. This can be avoided by letting the chunk grow slightly. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit tests for all the bugs and threshold. Closes #30433 from otterc/SPARK-32916-followup. Authored-by: Chandni Singh Signed-off-by: Mridul Muralidharan gmail.com> --- .../spark/network/util/TransportConf.java | 10 + .../spark/network/shuffle/ErrorHandler.java | 9 + .../shuffle/RemoteBlockPushResolver.java | 301 ++++++++++---- .../shuffle/RemoteBlockPushResolverSuite.java | 380 ++++++++++++++++++ 4 files changed, 629 insertions(+), 71 deletions(-) diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index fd287b022618b..d305dfa8e83cf 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -398,4 +398,14 @@ public long mergedIndexCacheSize() { return JavaUtils.byteStringAsBytes( conf.get("spark.shuffle.server.mergedIndexCacheSize", "100m")); } + + /** + * The threshold for number of IOExceptions while merging shuffle blocks to a shuffle partition. + * When the number of IOExceptions while writing to merged shuffle data/index/meta file exceed + * this threshold then the shuffle server will respond back to client to stop pushing shuffle + * blocks for this shuffle partition. + */ + public int ioExceptionsThresholdDuringMerge() { + return conf.getInt("spark.shuffle.server.ioExceptionsThresholdDuringMerge", 4); + } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java index d13a0272744a0..968777fba785d 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java @@ -71,6 +71,15 @@ class BlockPushErrorHandler implements ErrorHandler { public static final String BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX = "Couldn't find an opportunity to write block"; + /** + * String constant used for generating exception messages indicating the server encountered + * IOExceptions multiple times, greater than the configured threshold, while trying to merged + * shuffle blocks of the same shuffle partition. When the client receives this this response, + * it will stop pushing any more blocks for the same shuffle partition. + */ + public static final String IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX = + "IOExceptions exceeded the threshold"; + @Override public boolean shouldRetryError(Throwable t) { // If it is a connection time out or a connection closed exception, no need to retry. diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java index 76abb05c99bb4..0e2355646465d 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java @@ -17,15 +17,16 @@ package org.apache.spark.network.shuffle; +import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; @@ -45,6 +46,8 @@ import com.google.common.cache.LoadingCache; import com.google.common.cache.Weigher; import com.google.common.collect.Maps; +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,6 +81,7 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager { private final Executor directoryCleaner; private final TransportConf conf; private final int minChunkSize; + private final int ioExceptionsThresholdDuringMerge; private final ErrorHandler.BlockPushErrorHandler errorHandler; @SuppressWarnings("UnstableApiUsage") @@ -92,6 +96,7 @@ public RemoteBlockPushResolver(TransportConf conf) { // Add `spark` prefix because it will run in NM in Yarn mode. NettyUtils.createThreadFactory("spark-shuffle-merged-shuffle-directory-cleaner")); this.minChunkSize = conf.minChunkSizeInMergedShuffleFile(); + this.ioExceptionsThresholdDuringMerge = conf.ioExceptionsThresholdDuringMerge(); CacheLoader indexCacheLoader = new CacheLoader() { public ShuffleIndexInformation load(File file) throws IOException { @@ -132,7 +137,7 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo( if (dataFile.exists()) { return null; } else { - return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, indexFile, metaFile); + return newAppShufflePartitionInfo(appShuffleId, reduceId, dataFile, indexFile, metaFile); } } catch (IOException e) { logger.error( @@ -146,6 +151,17 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo( }); } + @VisibleForTesting + AppShufflePartitionInfo newAppShufflePartitionInfo( + AppShuffleId appShuffleId, + int reduceId, + File dataFile, + File indexFile, + File metaFile) throws IOException { + return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, + new MergeShuffleFile(indexFile), new MergeShuffleFile(metaFile)); + } + @Override public MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId) { AppShuffleId appShuffleId = new AppShuffleId(appId, shuffleId); @@ -370,26 +386,19 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOExc new MergeStatuses(msg.shuffleId, new RoaringBitmap[0], new int[0], new long[0]); } else { Collection partitionsToFinalize = shufflePartitions.values(); - int totalPartitions = partitionsToFinalize.size(); - RoaringBitmap[] bitmaps = new RoaringBitmap[totalPartitions]; - int[] reduceIds = new int[totalPartitions]; - long[] sizes = new long[totalPartitions]; + List bitmaps = new ArrayList<>(partitionsToFinalize.size()); + List reduceIds = new ArrayList<>(partitionsToFinalize.size()); + List sizes = new ArrayList<>(partitionsToFinalize.size()); Iterator partitionsIter = partitionsToFinalize.iterator(); - int idx = 0; while (partitionsIter.hasNext()) { AppShufflePartitionInfo partition = partitionsIter.next(); synchronized (partition) { - // Get rid of any partial block data at the end of the file. This could either - // be due to failure or a request still being processed when the shuffle - // merge gets finalized. try { - partition.dataChannel.truncate(partition.getPosition()); - if (partition.getPosition() != partition.getLastChunkOffset()) { - partition.updateChunkInfo(partition.getPosition(), partition.lastMergedMapIndex); - } - bitmaps[idx] = partition.mapTracker; - reduceIds[idx] = partition.reduceId; - sizes[idx++] = partition.getPosition(); + // This can throw IOException which will marks this shuffle partition as not merged. + partition.finalizePartition(); + bitmaps.add(partition.mapTracker); + reduceIds.add(partition.reduceId); + sizes.add(partition.getLastChunkOffset()); } catch (IOException ioe) { logger.warn("Exception while finalizing shuffle partition {} {} {}", msg.appId, msg.shuffleId, partition.reduceId, ioe); @@ -401,7 +410,9 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOExc } } } - mergeStatuses = new MergeStatuses(msg.shuffleId, bitmaps, reduceIds, sizes); + mergeStatuses = new MergeStatuses(msg.shuffleId, + bitmaps.toArray(new RoaringBitmap[bitmaps.size()]), Ints.toArray(reduceIds), + Longs.toArray(sizes)); } partitions.remove(appShuffleId); logger.info("Finalized shuffle {} from Application {}.", msg.shuffleId, msg.appId); @@ -450,6 +461,7 @@ private PushBlockStreamCallback( this.streamId = streamId; this.partitionInfo = Preconditions.checkNotNull(partitionInfo); this.mapIndex = mapIndex; + abortIfNecessary(); } @Override @@ -466,11 +478,11 @@ public String getID() { private void writeBuf(ByteBuffer buf) throws IOException { while (buf.hasRemaining()) { if (partitionInfo.isEncounteredFailure()) { - long updatedPos = partitionInfo.getPosition() + length; + long updatedPos = partitionInfo.getDataFilePos() + length; logger.debug( "{} shuffleId {} reduceId {} encountered failure current pos {} updated pos {}", partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId, - partitionInfo.reduceId, partitionInfo.getPosition(), updatedPos); + partitionInfo.reduceId, partitionInfo.getDataFilePos(), updatedPos); length += partitionInfo.dataChannel.write(buf, updatedPos); } else { length += partitionInfo.dataChannel.write(buf); @@ -510,15 +522,35 @@ private boolean isDuplicateBlock() { * This is only invoked when the stream is able to write. The stream first writes any deferred * block parts buffered in memory. */ - private void writeAnyDeferredBufs() throws IOException { - if (deferredBufs != null && !deferredBufs.isEmpty()) { - for (ByteBuffer deferredBuf : deferredBufs) { - writeBuf(deferredBuf); - } + private void writeDeferredBufs() throws IOException { + for (ByteBuffer deferredBuf : deferredBufs) { + writeBuf(deferredBuf); + } + deferredBufs = null; + } + + /** + * This throws RuntimeException if the number of IOExceptions have exceeded threshold. + */ + private void abortIfNecessary() { + if (partitionInfo.shouldAbort(mergeManager.ioExceptionsThresholdDuringMerge)) { deferredBufs = null; + throw new RuntimeException(String.format("%s when merging %s", + ErrorHandler.BlockPushErrorHandler.IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX, + streamId)); } } + /** + * This increments the number of IOExceptions and throws RuntimeException if it exceeds the + * threshold which will abort the merge of a particular shuffle partition. + */ + private void incrementIOExceptionsAndAbortIfNecessary() { + // Update the count of IOExceptions + partitionInfo.incrementIOExceptions(); + abortIfNecessary(); + } + @Override public void onData(String streamId, ByteBuffer buf) throws IOException { // When handling the block data using StreamInterceptor, it can help to reduce the amount @@ -556,6 +588,7 @@ public void onData(String streamId, ByteBuffer buf) throws IOException { deferredBufs = null; return; } + abortIfNecessary(); logger.trace("{} shuffleId {} reduceId {} onData writable", partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId, partitionInfo.reduceId); @@ -565,8 +598,17 @@ public void onData(String streamId, ByteBuffer buf) throws IOException { // If we got here, it's safe to write the block data to the merged shuffle file. We // first write any deferred block. - writeAnyDeferredBufs(); - writeBuf(buf); + try { + if (deferredBufs != null && !deferredBufs.isEmpty()) { + writeDeferredBufs(); + } + writeBuf(buf); + } catch (IOException ioe) { + incrementIOExceptionsAndAbortIfNecessary(); + // If the above doesn't throw a RuntimeException, then we propagate the IOException + // back to the client so the block could be retried. + throw ioe; + } // If we got here, it means we successfully write the current chunk of block to merged // shuffle file. If we encountered failure while writing the previous block, we should // reset the file channel position and the status of partitionInfo to indicate that we @@ -574,7 +616,7 @@ public void onData(String streamId, ByteBuffer buf) throws IOException { // position tracked by partitionInfo here. That is only updated while the entire block // is successfully written to merged shuffle file. if (partitionInfo.isEncounteredFailure()) { - partitionInfo.dataChannel.position(partitionInfo.getPosition() + length); + partitionInfo.dataChannel.position(partitionInfo.getDataFilePos() + length); partitionInfo.setEncounteredFailure(false); } } else { @@ -636,15 +678,33 @@ public void onComplete(String streamId) throws IOException { return; } if (partitionInfo.getCurrentMapIndex() < 0) { - writeAnyDeferredBufs(); + try { + if (deferredBufs != null && !deferredBufs.isEmpty()) { + abortIfNecessary(); + writeDeferredBufs(); + } + } catch (IOException ioe) { + incrementIOExceptionsAndAbortIfNecessary(); + // If the above doesn't throw a RuntimeException, then we propagate the IOException + // back to the client so the block could be retried. + throw ioe; + } } - long updatedPos = partitionInfo.getPosition() + length; + long updatedPos = partitionInfo.getDataFilePos() + length; boolean indexUpdated = false; if (updatedPos - partitionInfo.getLastChunkOffset() >= mergeManager.minChunkSize) { - partitionInfo.updateChunkInfo(updatedPos, mapIndex); - indexUpdated = true; + try { + partitionInfo.updateChunkInfo(updatedPos, mapIndex); + indexUpdated = true; + } catch (IOException ioe) { + incrementIOExceptionsAndAbortIfNecessary(); + // If the above doesn't throw a RuntimeException, then we do not propagate the + // IOException to the client. This may increase the chunk size however the increase is + // still limited because of the limit on the number of IOExceptions for a + // particular shuffle partition. + } } - partitionInfo.setPosition(updatedPos); + partitionInfo.setDataFilePos(updatedPos); partitionInfo.setCurrentMapIndex(-1); // update merged results @@ -687,6 +747,11 @@ public void onFailure(String streamId, Throwable throwable) throws IOException { } } } + + @VisibleForTesting + AppShufflePartitionInfo getPartitionInfo() { + return partitionInfo; + } } /** @@ -736,7 +801,7 @@ public static class AppShufflePartitionInfo { // The merged shuffle data file channel public FileChannel dataChannel; // Location offset of the last successfully merged block for this shuffle partition - private long position; + private long dataFilePos; // Indicating whether failure was encountered when merging the previous block private boolean encounteredFailure; // Track the map index whose block is being merged for this shuffle partition @@ -744,44 +809,46 @@ public static class AppShufflePartitionInfo { // Bitmap tracking which mapper's blocks have been merged for this shuffle partition private RoaringBitmap mapTracker; // The index file for a particular merged shuffle contains the chunk offsets. - private RandomAccessFile indexFile; + private MergeShuffleFile indexFile; // The meta file for a particular merged shuffle contains all the map indices that belong to // every chunk. The entry per chunk is a serialized bitmap. - private RandomAccessFile metaFile; + private MergeShuffleFile metaFile; // The offset for the last chunk tracked in the index file for this shuffle partition private long lastChunkOffset; private int lastMergedMapIndex = -1; // Bitmap tracking which mapper's blocks are in the current shuffle chunk private RoaringBitmap chunkTracker; + private int numIOExceptions = 0; + private boolean indexMetaUpdateFailed; AppShufflePartitionInfo( AppShuffleId appShuffleId, int reduceId, File dataFile, - File indexFile, - File metaFile) throws IOException { + MergeShuffleFile indexFile, + MergeShuffleFile metaFile) throws IOException { this.appShuffleId = Preconditions.checkNotNull(appShuffleId, "app shuffle id"); this.reduceId = reduceId; this.dataChannel = new FileOutputStream(dataFile).getChannel(); - this.indexFile = new RandomAccessFile(indexFile, "rw"); - this.metaFile = new RandomAccessFile(metaFile, "rw"); + this.indexFile = indexFile; + this.metaFile = metaFile; this.currentMapIndex = -1; // Writing 0 offset so that we can reuse ShuffleIndexInformation.getIndex() updateChunkInfo(0L, -1); - this.position = 0; + this.dataFilePos = 0; this.encounteredFailure = false; this.mapTracker = new RoaringBitmap(); this.chunkTracker = new RoaringBitmap(); } - public long getPosition() { - return position; + public long getDataFilePos() { + return dataFilePos; } - public void setPosition(long position) { + public void setDataFilePos(long dataFilePos) { logger.trace("{} shuffleId {} reduceId {} current pos {} update pos {}", appShuffleId.appId, - appShuffleId.shuffleId, reduceId, this.position, position); - this.position = position; + appShuffleId.shuffleId, reduceId, this.dataFilePos, dataFilePos); + this.dataFilePos = dataFilePos; } boolean isEncounteredFailure() { @@ -825,25 +892,29 @@ void resetChunkTracker() { * @param mapIndex the map index to be added to chunk tracker. */ void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException { - long idxStartPos = -1; try { - // update the chunk tracker to meta file before index file + logger.trace("{} shuffleId {} reduceId {} index current {} updated {}", + appShuffleId.appId, appShuffleId.shuffleId, reduceId, this.lastChunkOffset, chunkOffset); + if (indexMetaUpdateFailed) { + indexFile.getChannel().position(indexFile.getPos()); + } + indexFile.getDos().writeLong(chunkOffset); + // Chunk bitmap should be written to the meta file after the index file because if there are + // any exceptions during writing the offset to the index file, meta file should not be + // updated. If the update to the index file is successful but the update to meta file isn't + // then the index file position is not updated. writeChunkTracker(mapIndex); - idxStartPos = indexFile.getFilePointer(); - logger.trace("{} shuffleId {} reduceId {} updated index current {} updated {}", - appShuffleId.appId, appShuffleId.shuffleId, reduceId, this.lastChunkOffset, - chunkOffset); - indexFile.writeLong(chunkOffset); + indexFile.updatePos(8); + this.lastChunkOffset = chunkOffset; + indexMetaUpdateFailed = false; } catch (IOException ioe) { - if (idxStartPos != -1) { - // reset the position to avoid corrupting index files during exception. - logger.warn("{} shuffleId {} reduceId {} reset index to position {}", - appShuffleId.appId, appShuffleId.shuffleId, reduceId, idxStartPos); - indexFile.seek(idxStartPos); - } + logger.warn("{} shuffleId {} reduceId {} update to index/meta failed", appShuffleId.appId, + appShuffleId.shuffleId, reduceId); + indexMetaUpdateFailed = true; + // Any exception here is propagated to the caller and the caller can decide whether to + // abort or not. throw ioe; } - this.lastChunkOffset = chunkOffset; } private void writeChunkTracker(int mapIndex) throws IOException { @@ -851,17 +922,38 @@ private void writeChunkTracker(int mapIndex) throws IOException { return; } chunkTracker.add(mapIndex); - long metaStartPos = metaFile.getFilePointer(); - try { - logger.trace("{} shuffleId {} reduceId {} mapIndex {} write chunk to meta file", - appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex); - chunkTracker.serialize(metaFile); - } catch (IOException ioe) { - logger.warn("{} shuffleId {} reduceId {} mapIndex {} reset position of meta file to {}", - appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex, metaStartPos); - metaFile.seek(metaStartPos); - throw ioe; + logger.trace("{} shuffleId {} reduceId {} mapIndex {} write chunk to meta file", + appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex); + if (indexMetaUpdateFailed) { + metaFile.getChannel().position(metaFile.getPos()); + } + chunkTracker.serialize(metaFile.getDos()); + metaFile.updatePos(metaFile.getChannel().position() - metaFile.getPos()); + } + + private void incrementIOExceptions() { + numIOExceptions++; + } + + private boolean shouldAbort(int ioExceptionsThresholdDuringMerge) { + return numIOExceptions > ioExceptionsThresholdDuringMerge; + } + + private void finalizePartition() throws IOException { + if (dataFilePos != lastChunkOffset) { + try { + updateChunkInfo(dataFilePos, lastMergedMapIndex); + } catch (IOException ioe) { + // Any exceptions here while updating the meta files can be ignored. If the files + // aren't successfully updated they will be truncated. + } } + // Get rid of any partial block data at the end of the file. This could either + // be due to failure, or a request still being processed when the shuffle + // merge gets finalized, or any exceptions while updating index/meta files. + dataChannel.truncate(lastChunkOffset); + indexFile.getChannel().truncate(indexFile.getPos()); + metaFile.getChannel().truncate(metaFile.getPos()); } void closeAllFiles() { @@ -877,7 +969,6 @@ void closeAllFiles() { } if (metaFile != null) { try { - // if the stream is closed, channel get's closed as well. metaFile.close(); } catch (IOException ioe) { logger.warn("Error closing meta file for {} shuffleId {} reduceId {}", @@ -902,6 +993,26 @@ void closeAllFiles() { protected void finalize() throws Throwable { closeAllFiles(); } + + @VisibleForTesting + MergeShuffleFile getIndexFile() { + return indexFile; + } + + @VisibleForTesting + MergeShuffleFile getMetaFile() { + return metaFile; + } + + @VisibleForTesting + FileChannel getDataChannel() { + return dataChannel; + } + + @VisibleForTesting + int getNumIOExceptions() { + return numIOExceptions; + } } /** @@ -931,4 +1042,52 @@ private AppPathsInfo( } } } + + @VisibleForTesting + static class MergeShuffleFile { + private FileChannel channel; + private DataOutputStream dos; + private long pos; + + @VisibleForTesting + MergeShuffleFile(File file) throws IOException { + FileOutputStream fos = new FileOutputStream(file); + channel = fos.getChannel(); + dos = new DataOutputStream(fos); + } + + @VisibleForTesting + MergeShuffleFile(FileChannel channel, DataOutputStream dos) { + this.channel = channel; + this.dos = dos; + } + + private void updatePos(long numBytes) { + pos += numBytes; + } + + void close() throws IOException { + try { + dos.close(); + } finally { + dos = null; + channel = null; + } + } + + @VisibleForTesting + DataOutputStream getDos() { + return dos; + } + + @VisibleForTesting + FileChannel getChannel() { + return channel; + } + + @VisibleForTesting + long getPos() { + return pos; + } + } } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java index 0f200dc721963..8c6f7434748ec 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java @@ -17,9 +17,12 @@ package org.apache.spark.network.shuffle; +import java.io.DataOutputStream; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -42,6 +45,7 @@ import org.apache.spark.network.buffer.FileSegmentManagedBuffer; import org.apache.spark.network.client.StreamCallbackWithID; +import org.apache.spark.network.shuffle.RemoteBlockPushResolver.MergeShuffleFile; import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo; import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge; import org.apache.spark.network.shuffle.protocol.MergeStatuses; @@ -411,6 +415,347 @@ void deleteExecutorDirs(Path[] dirs) { } } + @Test + public void testRecoverIndexFileAfterIOExceptions() throws IOException { + useTestFiles(true, false); + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4])); + callback1.onComplete(callback1.getID()); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo(); + // Close the index stream so it throws IOException + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + testIndexFile.close(); + StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 1, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any IOExceptions because number of IOExceptions are less than + // the threshold but the update to index file will be unsuccessful. + callback2.onComplete(callback2.getID()); + assertEquals("index position", 16, testIndexFile.getPos()); + // Restore the index stream so it can write successfully again. + testIndexFile.restore(); + StreamCallbackWithID callback3 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 2, 0, 0)); + callback3.onData(callback3.getID(), ByteBuffer.wrap(new byte[2])); + callback3.onComplete(callback3.getID()); + assertEquals("index position", 24, testIndexFile.getPos()); + MergeStatuses statuses = pushResolver.finalizeShuffleMerge( + new FinalizeShuffleMerge(TEST_APP, 0)); + validateMergeStatuses(statuses, new int[] {0}, new long[] {11}); + MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0); + validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 7}, new int[][] {{0}, {1, 2}}); + } + + @Test + public void testRecoverIndexFileAfterIOExceptionsInFinalize() throws IOException { + useTestFiles(true, false); + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4])); + callback1.onComplete(callback1.getID()); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo(); + // Close the index stream so it throws IOException + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + testIndexFile.close(); + StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 1, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any IOExceptions because number of IOExceptions are less than + // the threshold but the update to index file will be unsuccessful. + callback2.onComplete(callback2.getID()); + assertEquals("index position", 16, testIndexFile.getPos()); + // The last update to index was unsuccessful however any further updates will be successful. + // Restore the index stream so it can write successfully again. + testIndexFile.restore(); + MergeStatuses statuses = pushResolver.finalizeShuffleMerge( + new FinalizeShuffleMerge(TEST_APP, 0)); + assertEquals("index position", 24, testIndexFile.getPos()); + validateMergeStatuses(statuses, new int[] {0}, new long[] {9}); + MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0); + validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 5}, new int[][] {{0}, {1}}); + } + + @Test + public void testRecoverMetaFileAfterIOExceptions() throws IOException { + useTestFiles(false, true); + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4])); + callback1.onComplete(callback1.getID()); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo(); + // Close the meta stream so it throws IOException + TestMergeShuffleFile testMetaFile = (TestMergeShuffleFile) partitionInfo.getMetaFile(); + long metaPosBeforeClose = testMetaFile.getPos(); + testMetaFile.close(); + StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 1, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any IOExceptions because number of IOExceptions are less than + // the threshold but the update to index and meta file will be unsuccessful. + callback2.onComplete(callback2.getID()); + assertEquals("index position", 16, partitionInfo.getIndexFile().getPos()); + assertEquals("meta position", metaPosBeforeClose, testMetaFile.getPos()); + // Restore the meta stream so it can write successfully again. + testMetaFile.restore(); + StreamCallbackWithID callback3 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 2, 0, 0)); + callback3.onData(callback3.getID(), ByteBuffer.wrap(new byte[2])); + callback3.onComplete(callback3.getID()); + assertEquals("index position", 24, partitionInfo.getIndexFile().getPos()); + assertTrue("meta position", testMetaFile.getPos() > metaPosBeforeClose); + MergeStatuses statuses = pushResolver.finalizeShuffleMerge( + new FinalizeShuffleMerge(TEST_APP, 0)); + validateMergeStatuses(statuses, new int[] {0}, new long[] {11}); + MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0); + validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 7}, new int[][] {{0}, {1, 2}}); + } + + @Test + public void testRecoverMetaFileAfterIOExceptionsInFinalize() throws IOException { + useTestFiles(false, true); + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4])); + callback1.onComplete(callback1.getID()); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo(); + // Close the meta stream so it throws IOException + TestMergeShuffleFile testMetaFile = (TestMergeShuffleFile) partitionInfo.getMetaFile(); + long metaPosBeforeClose = testMetaFile.getPos(); + testMetaFile.close(); + StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 1, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any IOExceptions because number of IOExceptions are less than + // the threshold but the update to index and meta file will be unsuccessful. + callback2.onComplete(callback2.getID()); + MergeShuffleFile indexFile = partitionInfo.getIndexFile(); + assertEquals("index position", 16, indexFile.getPos()); + assertEquals("meta position", metaPosBeforeClose, testMetaFile.getPos()); + // Restore the meta stream so it can write successfully again. + testMetaFile.restore(); + MergeStatuses statuses = pushResolver.finalizeShuffleMerge( + new FinalizeShuffleMerge(TEST_APP, 0)); + assertEquals("index position", 24, indexFile.getPos()); + assertTrue("meta position", testMetaFile.getPos() > metaPosBeforeClose); + validateMergeStatuses(statuses, new int[] {0}, new long[] {9}); + MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0); + validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 5}, new int[][] {{0}, {1}}); + } + + @Test (expected = RuntimeException.class) + public void testIOExceptionsExceededThreshold() throws IOException { + RemoteBlockPushResolver.PushBlockStreamCallback callback = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo(); + callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4])); + callback.onComplete(callback.getID()); + // Close the data stream so it throws continuous IOException + partitionInfo.getDataChannel().close(); + for (int i = 1; i < 5; i++) { + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, i, 0, 0)); + try { + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[2])); + } catch (IOException ioe) { + // this will throw IOException so the client can retry. + callback1.onFailure(callback1.getID(), ioe); + } + } + assertEquals(4, partitionInfo.getNumIOExceptions()); + // After 4 IOException, the server will respond with IOExceptions exceeded threshold + try { + RemoteBlockPushResolver.PushBlockStreamCallback callback2 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 5, 0, 0)); + callback2.onData(callback.getID(), ByteBuffer.wrap(new byte[1])); + } catch (Throwable t) { + assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_5_0", + t.getMessage()); + throw t; + } + } + + @Test (expected = RuntimeException.class) + public void testIOExceptionsDuringMetaUpdateIncreasesExceptionCount() throws IOException { + useTestFiles(true, false); + RemoteBlockPushResolver.PushBlockStreamCallback callback = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo(); + callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4])); + callback.onComplete(callback.getID()); + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + testIndexFile.close(); + for (int i = 1; i < 5; i++) { + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, i, 0, 0)); + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any exceptions but the exception count is increased. + callback1.onComplete(callback1.getID()); + } + assertEquals(4, partitionInfo.getNumIOExceptions()); + // After 4 IOException, the server will respond with IOExceptions exceeded threshold for any + // new request for this partition. + try { + RemoteBlockPushResolver.PushBlockStreamCallback callback2 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 5, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[4])); + callback2.onComplete(callback2.getID()); + } catch (Throwable t) { + assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_5_0", + t.getMessage()); + throw t; + } + } + + @Test (expected = RuntimeException.class) + public void testRequestForAbortedShufflePartitionThrowsException() { + try { + testIOExceptionsDuringMetaUpdateIncreasesExceptionCount(); + } catch (Throwable t) { + // No more blocks can be merged to this partition. + } + try { + pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 10, 0, 0)); + } catch (Throwable t) { + assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_10_0", + t.getMessage()); + throw t; + } + } + + @Test (expected = RuntimeException.class) + public void testPendingBlockIsAbortedImmediately() throws IOException { + useTestFiles(true, false); + RemoteBlockPushResolver.PushBlockStreamCallback callback = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo(); + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + testIndexFile.close(); + for (int i = 1; i < 6; i++) { + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, i, 0, 0)); + try { + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any exceptions but the exception count is increased. + callback1.onComplete(callback1.getID()); + } catch (Throwable t) { + callback1.onFailure(callback1.getID(), t); + } + } + assertEquals(5, partitionInfo.getNumIOExceptions()); + // The server will respond with IOExceptions exceeded threshold for any additional attempts + // to write. + try { + callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4])); + } catch (Throwable t) { + assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0", + t.getMessage()); + throw t; + } + } + + @Test (expected = RuntimeException.class) + public void testWritingPendingBufsIsAbortedImmediatelyDuringComplete() throws IOException { + useTestFiles(true, false); + RemoteBlockPushResolver.PushBlockStreamCallback callback = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 0, 0, 0)); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo(); + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + testIndexFile.close(); + for (int i = 1; i < 5; i++) { + RemoteBlockPushResolver.PushBlockStreamCallback callback1 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, i, 0, 0)); + try { + callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5])); + // This will complete without any exceptions but the exception count is increased. + callback1.onComplete(callback1.getID()); + } catch (Throwable t) { + callback1.onFailure(callback1.getID(), t); + } + } + assertEquals(4, partitionInfo.getNumIOExceptions()); + RemoteBlockPushResolver.PushBlockStreamCallback callback2 = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 5, 0, 0)); + callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5])); + // This is deferred + callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4])); + // Callback2 completes which will throw another exception. + try { + callback2.onComplete(callback2.getID()); + } catch (Throwable t) { + callback2.onFailure(callback2.getID(), t); + } + assertEquals(5, partitionInfo.getNumIOExceptions()); + // Restore index file so that any further writes to it are successful and any exceptions are + // due to IOExceptions exceeding threshold. + testIndexFile.restore(); + try { + callback.onComplete(callback.getID()); + } catch (Throwable t) { + assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0", + t.getMessage()); + throw t; + } + } + + @Test + public void testFailureWhileTruncatingFiles() throws IOException { + useTestFiles(true, false); + PushBlock[] pushBlocks = new PushBlock[] { + new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[2])), + new PushBlock(0, 1, 0, ByteBuffer.wrap(new byte[3])), + new PushBlock(0, 0, 1, ByteBuffer.wrap(new byte[5])), + new PushBlock(0, 1, 1, ByteBuffer.wrap(new byte[3])) + }; + pushBlockHelper(TEST_APP, pushBlocks); + RemoteBlockPushResolver.PushBlockStreamCallback callback = + (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream( + new PushBlockStream(TEST_APP, 0, 2, 0, 0)); + callback.onData(callback.getID(), ByteBuffer.wrap(new byte[2])); + callback.onComplete(callback.getID()); + RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo(); + TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile(); + // Close the index file so truncate throws IOException + testIndexFile.close(); + MergeStatuses statuses = pushResolver.finalizeShuffleMerge( + new FinalizeShuffleMerge(TEST_APP, 0)); + validateMergeStatuses(statuses, new int[] {1}, new long[] {8}); + MergedBlockMeta meta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 1); + validateChunks(TEST_APP, 0, 1, meta, new int[]{5, 3}, new int[][]{{0},{1}}); + } + + private void useTestFiles(boolean useTestIndexFile, boolean useTestMetaFile) throws IOException { + pushResolver = new RemoteBlockPushResolver(conf) { + @Override + AppShufflePartitionInfo newAppShufflePartitionInfo(AppShuffleId appShuffleId, int reduceId, + File dataFile, File indexFile, File metaFile) throws IOException { + MergeShuffleFile mergedIndexFile = useTestIndexFile ? new TestMergeShuffleFile(indexFile) + : new MergeShuffleFile(indexFile); + MergeShuffleFile mergedMetaFile = useTestMetaFile ? new TestMergeShuffleFile(metaFile) : + new MergeShuffleFile(metaFile); + return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, mergedIndexFile, + mergedMetaFile); + } + }; + registerExecutor(TEST_APP, prepareLocalDirs(localDirs)); + } + private Path[] createLocalDirs(int numLocalDirs) throws IOException { Path[] localDirs = new Path[numLocalDirs]; for (int i = 0; i < localDirs.length; i++) { @@ -493,4 +838,39 @@ private static class PushBlock { this.buffer = buffer; } } + + private static class TestMergeShuffleFile extends MergeShuffleFile { + private DataOutputStream activeDos; + private File file; + private FileChannel channel; + + private TestMergeShuffleFile(File file) throws IOException { + super(null, null); + this.file = file; + FileOutputStream fos = new FileOutputStream(file); + channel = fos.getChannel(); + activeDos = new DataOutputStream(fos); + } + + @Override + DataOutputStream getDos() { + return activeDos; + } + + @Override + FileChannel getChannel() { + return channel; + } + + @Override + void close() throws IOException { + activeDos.close(); + } + + void restore() throws IOException { + FileOutputStream fos = new FileOutputStream(file, true); + channel = fos.getChannel(); + activeDos = new DataOutputStream(fos); + } + } } From 5c9b421c3711ba373b4d5cbbd83a8ece91291ed0 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 23 Dec 2020 14:48:01 -0800 Subject: [PATCH 0867/1009] [SPARK-33277][PYSPARK][SQL] Use ContextAwareIterator to stop consuming after the task ends ### What changes were proposed in this pull request? This is a retry of #30177. This is not a complete fix, but it would take long time to complete (#30242). As discussed offline, at least using `ContextAwareIterator` should be helpful enough for many cases. As the Python evaluation consumes the parent iterator in a separate thread, it could consume more data from the parent even after the task ends and the parent is closed. Thus, we should use `ContextAwareIterator` to stop consuming after the task ends. ### Why are the changes needed? Python/Pandas UDF right after off-heap vectorized reader could cause executor crash. E.g.,: ```py spark.range(0, 100000, 1, 1).write.parquet(path) spark.conf.set("spark.sql.columnVector.offheap.enabled", True) def f(x): return 0 fUdf = udf(f, LongType()) spark.read.parquet(path).select(fUdf('id')).head() ``` This is because, the Python evaluation consumes the parent iterator in a separate thread and it consumes more data from the parent even after the task ends and the parent is closed. If an off-heap column vector exists in the parent iterator, it could cause segmentation fault which crashes the executor. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests, and manually. Closes #30899 from ueshin/issues/SPARK-33277/context_aware_iterator. Authored-by: Takuya UESHIN Signed-off-by: Dongjoon Hyun --- .../apache/spark/ContextAwareIterator.scala | 40 +++++++++++++++++++ .../sql/execution/python/EvalPythonExec.scala | 5 ++- .../execution/python/MapInPandasExec.scala | 9 +++-- 3 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/ContextAwareIterator.scala diff --git a/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala b/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala new file mode 100644 index 0000000000000..c4d0dd8aceab0 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.annotation.DeveloperApi + +/** + * :: DeveloperApi :: + * A TaskContext aware iterator. + * + * As the Python evaluation consumes the parent iterator in a separate thread, + * it could consume more data from the parent even after the task ends and the parent is closed. + * If an off-heap access exists in the parent iterator, it could cause segmentation fault + * which crashes the executor. + * Thus, we should use [[ContextAwareIterator]] to stop consuming after the task ends. + */ +@DeveloperApi +class ContextAwareIterator[+T](val context: TaskContext, val delegate: Iterator[T]) + extends Iterator[T] { + + override def hasNext: Boolean = + !context.isCompleted() && !context.isInterrupted() && delegate.hasNext + + override def next(): T = delegate.next() +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala index 7c476ab03c002..fca43e454bff5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala @@ -21,7 +21,7 @@ import java.io.File import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SparkEnv, TaskContext} +import org.apache.spark.{ContextAwareIterator, SparkEnv, TaskContext} import org.apache.spark.api.python.ChainedPythonFunctions import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow @@ -89,6 +89,7 @@ trait EvalPythonExec extends UnaryExecNode { inputRDD.mapPartitions { iter => val context = TaskContext.get() + val contextAwareIterator = new ContextAwareIterator(context, iter) // The queue used to buffer input rows so we can drain it to // combine input with output from Python. @@ -120,7 +121,7 @@ trait EvalPythonExec extends UnaryExecNode { }.toSeq) // Add rows to queue to join later with the result. - val projectedRowIter = iter.map { inputRow => + val projectedRowIter = contextAwareIterator.map { inputRow => queue.add(inputRow.asInstanceOf[UnsafeRow]) projection(inputRow) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala index 2bb808119c0ae..71f51f1abc6f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.python import scala.collection.JavaConverters._ -import org.apache.spark.TaskContext +import org.apache.spark.{ContextAwareIterator, TaskContext} import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow @@ -61,16 +61,17 @@ case class MapInPandasExec( val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf) val outputTypes = child.schema + val context = TaskContext.get() + val contextAwareIterator = new ContextAwareIterator(context, inputIter) + // Here we wrap it via another row so that Python sides understand it // as a DataFrame. - val wrappedIter = inputIter.map(InternalRow(_)) + val wrappedIter = contextAwareIterator.map(InternalRow(_)) // DO NOT use iter.grouped(). See BatchIterator. val batchIter = if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter) - val context = TaskContext.get() - val columnarBatchIter = new ArrowPythonRunner( chainedFunc, PythonEvalType.SQL_MAP_PANDAS_ITER_UDF, From d467d817260d6ca605c34f493e68d0877209170f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 23 Dec 2020 15:31:56 -0800 Subject: [PATCH 0868/1009] [SPARK-33893][CORE] Exclude fallback block manager from executorList ### What changes were proposed in this pull request? This PR aims to exclude fallback block manager from `executorList` function. ### Why are the changes needed? When a fallback storage is used, the executors UI tab hangs because the executor list REST API result doesn't have `peakMemoryMetrics` of `ExecutorMetrics`. The root cause is that the block manager id used by fallback storage is included in the API result and it doesn't have `peakMemoryMetrics` because it's populated during HeartBeat reporting. We should hide it. ### Does this PR introduce _any_ user-facing change? No. This is a bug fix on UI. ### How was this patch tested? Manual. Run the following and visit Spark `executors` tab UI with browser. ``` bin/spark-shell -c spark.storage.decommission.fallbackStorage.path=file:///tmp/spark-storage/ ``` Closes #30911 from dongjoon-hyun/SPARK-33893. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/status/AppStatusStore.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala index affa85b76cf19..b9cc9145feb4d 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala @@ -24,6 +24,7 @@ import scala.collection.mutable.HashMap import org.apache.spark.{JobExecutionStatus, SparkConf} import org.apache.spark.status.api.v1 +import org.apache.spark.storage.FallbackStorage.FALLBACK_BLOCK_MANAGER_ID import org.apache.spark.ui.scope._ import org.apache.spark.util.Utils import org.apache.spark.util.kvstore.{InMemoryStore, KVStore} @@ -88,7 +89,7 @@ private[spark] class AppStatusStore( } else { base } - filtered.asScala.map(_.info).toSeq + filtered.asScala.map(_.info).filter(_.id != FALLBACK_BLOCK_MANAGER_ID.executorId).toSeq } def executorSummary(executorId: String): v1.ExecutorSummary = { From 368a2c341d8f3315c759e1c2362439534a9d44e7 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Dec 2020 15:38:32 -0800 Subject: [PATCH 0869/1009] [SPARK-33877][SQL][FOLLOWUP] SQL reference documents for INSERT w/ a column list ### What changes were proposed in this pull request? followup of https://github.com/apache/spark/commit/a3dd8dacee8f6b316be90500f9fd8ec8997a5784 via suggestion https://github.com/apache/spark/pull/30888#discussion_r547822642 ### Why are the changes needed? doc improvement ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? passing GA doc Closes #30909 from yaooqinn/SPARK-33877-F. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- docs/sql-ref-syntax-dml-insert-into.md | 3 +-- docs/sql-ref-syntax-dml-insert-overwrite-table.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md index 96a95b1a629e9..15400780289e2 100644 --- a/docs/sql-ref-syntax-dml-insert-into.md +++ b/docs/sql-ref-syntax-dml-insert-into.md @@ -47,12 +47,11 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ] * **column_list** - An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. + An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. Spark will reorder the columns of the input query to match the table schema according to the specified column list. **Note:**The current behaviour has some limitations: - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns. - The size of the column list should be exactly the size of the data from `VALUES` clause or query. - - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position. * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]** diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md index f2413fb72464f..5fd0880fe45d7 100644 --- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md +++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md @@ -47,14 +47,13 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] * **column_list** - An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. + An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. Spark will reorder the columns of the input query to match the table schema according to the specified column list. **Note** The current behaviour has some limitations: - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns. - The size of the column list should be exactly the size of the data from `VALUES` clause or query. - - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position. * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]** From 61881bb6988aa0320b4bacfabbc0ee6f05f287cb Mon Sep 17 00:00:00 2001 From: offthewall123 Date: Wed, 23 Dec 2020 20:01:53 -0600 Subject: [PATCH 0870/1009] [SPARK-33835][CORE] Refector AbstractCommandBuilder.buildJavaCommand: use firstNonEmpty ### What changes were proposed in this pull request? refector AbstractCommandBuilder.buildJavaCommand: use firstNonEmpty ### Why are the changes needed? For better code understanding, and firstNonEmpty can detect javaHome = " ", an empty string. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? End to End. Closes #30831 from offthewall123/refector_AbstractCommandBuilder. Authored-by: offthewall123 Signed-off-by: Sean Owen --- .../spark/launcher/AbstractCommandBuilder.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index 778fd46b91fa1..24ad9cbdba087 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -92,17 +92,13 @@ abstract List buildCommand(Map env) List buildJavaCommand(String extraClassPath) throws IOException { List cmd = new ArrayList<>(); - String[] candidateJavaHomes = new String[] { - javaHome, + String firstJavaHome = firstNonEmpty(javaHome, childEnv.get("JAVA_HOME"), System.getenv("JAVA_HOME"), - System.getProperty("java.home") - }; - for (String javaHome : candidateJavaHomes) { - if (javaHome != null) { - cmd.add(join(File.separator, javaHome, "bin", "java")); - break; - } + System.getProperty("java.home")); + + if (firstJavaHome != null) { + cmd.add(join(File.separator, firstJavaHome, "bin", "java")); } // Load extra JAVA_OPTS from conf/java-opts, if it exists. From 86c1cfc5791dae5f2ee8ccd5095dbeb2243baba6 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Thu, 24 Dec 2020 12:44:37 +0900 Subject: [PATCH 0871/1009] [SPARK-33659][SS] Document the current behavior for DataStreamWriter.toTable API ### What changes were proposed in this pull request? Follow up work for #30521, document the following behaviors in the API doc: - Figure out the effects when configurations are (provider/partitionBy) conflicting with the existing table. - Document the lack of functionality on creating a v2 table, and guide that the users should ensure a table is created in prior to avoid the behavior unintended/insufficient table is being created. ### Why are the changes needed? We didn't have full support for the V2 table created in the API now. (TODO SPARK-33638) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Document only. Closes #30885 from xuanyuanking/SPARK-33659. Authored-by: Yuanjian Li Signed-off-by: HyukjinKwon --- python/pyspark/sql/streaming.py | 13 ++++++++++--- .../spark/sql/streaming/DataStreamWriter.scala | 14 ++++++++++++-- .../streaming/test/DataStreamTableAPISuite.scala | 6 +++--- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 5f122293f4a0a..51941a6269074 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -1498,8 +1498,7 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que Starts the execution of the streaming query, which will continually output results to the given table as new data arrives. - A new table will be created if the table not exists. The returned - :class:`StreamingQuery` object can be used to interact with the stream. + The returned :class:`StreamingQuery` object can be used to interact with the stream. .. versionadded:: 3.1.0 @@ -1531,6 +1530,15 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que ----- This API is evolving. + For v1 table, partitioning columns provided by `partitionBy` will be respected no matter + the table exists or not. A new table will be created if the table not exists. + + For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will + be respected only if the v2 table does not exist. Besides, the v2 table created by this API + lacks some functionalities (e.g., customized properties, options, and serde info). If you + need them, please create the v2 table manually before the execution to avoid creating a + table with incomplete information. + Examples -------- >>> sdf.writeStream.format('parquet').queryName('query').toTable('output_table') @@ -1543,7 +1551,6 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que ... format='parquet', ... checkpointLocation='/tmp/checkpoint') # doctest: +SKIP """ - # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API self.options(**options) if outputMode is not None: self.outputMode(outputMode) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala index 2703119ce1167..1be09e0e5f97e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala @@ -302,11 +302,21 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) { /** * Starts the execution of the streaming query, which will continually output results to the given - * table as new data arrives. A new table will be created if the table not exists. The returned - * [[StreamingQuery]] object can be used to interact with the stream. + * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with + * the stream. + * + * For v1 table, partitioning columns provided by `partitionBy` will be respected no matter the + * table exists or not. A new table will be created if the table not exists. + * + * For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will be + * respected only if the v2 table does not exist. Besides, the v2 table created by this API lacks + * some functionalities (e.g., customized properties, options, and serde info). If you need them, + * please create the v2 table manually before the execution to avoid creating a table with + * incomplete information. * * @since 3.1.0 */ + @Evolving @throws[TimeoutException] def toTable(tableName: String): StreamingQuery = { this.tableName = tableName diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala index 9cf649605ed1c..4c5c5e63cecb6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala @@ -275,7 +275,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { val tableName = "stream_test" withTable(tableName) { // The file written by batch will not be seen after the table was written by a streaming - // query. This is because we loads files from the metadata log instead of listing them + // query. This is because we load files from the metadata log instead of listing them // using HDFS API. Seq(4, 5, 6).toDF("value").write.format("parquet") .option("path", dir.getCanonicalPath).saveAsTable(tableName) @@ -289,7 +289,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { val tableName = "stream_test" withTable(tableName) { // The file written by batch will not be seen after the table was written by a streaming - // query. This is because we loads files from the metadata log instead of listing them + // query. This is because we load files from the metadata log instead of listing them // using HDFS API. Seq(4, 5, 6).toDF("value").write.format("parquet").saveAsTable(tableName) @@ -302,7 +302,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter { val tableName = "stream_test" withTable(tableName) { // The file written by batch will not be seen after the table was written by a streaming - // query. This is because we loads files from the metadata log instead of listing them + // query. This is because we load files from the metadata log instead of listing them // using HDFS API. Seq(4, 5, 6).toDF("value").write .mode("append").format("parquet").save(dir.getCanonicalPath) From f1d37972910d94c713c6a7cb7bd6ea2b52576d00 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Thu, 24 Dec 2020 05:21:39 +0000 Subject: [PATCH 0872/1009] [SPARK-33886][SQL] UnresolvedTable should retain SQL text position for DDL commands ### What changes were proposed in this pull request? Currently, there are many DDL commands where the position of the unresolved identifiers are incorrect: ``` scala> sql("MSCK REPAIR TABLE unknown") org.apache.spark.sql.AnalysisException: Table not found: unknown; line 1 pos 0; ``` , whereas the `pos` should be 18. This PR proposes to fix this issue for commands using `UnresolvedTable`: ``` MSCK REPAIR TABLE t LOAD DATA LOCAL INPATH 'filepath' INTO TABLE t TRUNCATE TABLE t SHOW PARTITIONS t ALTER TABLE t RECOVER PARTITIONS ALTER TABLE t ADD PARTITION (p=1) ALTER TABLE t PARTITION (p=1) RENAME TO PARTITION (p=2) ALTER TABLE t DROP PARTITION (p=1) ALTER TABLE t SET SERDEPROPERTIES ('a'='b') COMMENT ON TABLE t IS 'hello'" ``` ### Why are the changes needed? To fix a bug. ### Does this PR introduce _any_ user-facing change? Yes, now the above example will print the following: ``` org.apache.spark.sql.AnalysisException: Table not found: unknown; line 1 pos 18; ``` ### How was this patch tested? Add a new suite of tests. Closes #30900 from imback82/position_Fix. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 41 ++++++++++-------- .../AnalysisExceptionPositionSuite.scala | 43 +++++++++++++++++++ 2 files changed, 67 insertions(+), 17 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 2af84fa079d97..9ac7b06d0a132 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2160,6 +2160,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } } + /** + * Create an [[UnresolvedTable]] from a multi-part identifier context. + */ + private def createUnresolvedTable( + ctx: MultipartIdentifierContext, + commandName: String): LogicalPlan = withOrigin(ctx) { + UnresolvedTable(visitMultipartIdentifier(ctx), commandName) + } + /** * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported: * - multiple unit value pairs, for instance: interval 2 months 2 days. @@ -3616,8 +3625,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg * }}} */ override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) { - RepairTable( - UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "MSCK REPAIR TABLE")) + RepairTable(createUnresolvedTable(ctx.multipartIdentifier, "MSCK REPAIR TABLE")) } /** @@ -3631,7 +3639,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) { LoadData( - child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "LOAD DATA"), + child = createUnresolvedTable(ctx.multipartIdentifier, "LOAD DATA"), path = string(ctx.path), isLocal = ctx.LOCAL != null, isOverwrite = ctx.OVERWRITE != null, @@ -3699,7 +3707,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) { TruncateTable( - UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "TRUNCATE TABLE"), + createUnresolvedTable(ctx.multipartIdentifier, "TRUNCATE TABLE"), Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)) } @@ -3719,7 +3727,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None) } ShowPartitions( - UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "SHOW PARTITIONS"), + createUnresolvedTable(ctx.multipartIdentifier(), "SHOW PARTITIONS"), partitionKeys) } @@ -3772,8 +3780,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg override def visitRecoverPartitions( ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) { AlterTableRecoverPartitions( - UnresolvedTable( - visitMultipartIdentifier(ctx.multipartIdentifier), + createUnresolvedTable( + ctx.multipartIdentifier, "ALTER TABLE ... RECOVER PARTITIONS")) } @@ -3801,8 +3809,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg UnresolvedPartitionSpec(spec, location) } AlterTableAddPartition( - UnresolvedTable( - visitMultipartIdentifier(ctx.multipartIdentifier), + createUnresolvedTable( + ctx.multipartIdentifier, "ALTER TABLE ... ADD PARTITION ..."), specsAndLocs.toSeq, ctx.EXISTS != null) @@ -3819,8 +3827,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg override def visitRenameTablePartition( ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) { AlterTableRenamePartition( - UnresolvedTable( - visitMultipartIdentifier(ctx.multipartIdentifier), + createUnresolvedTable( + ctx.multipartIdentifier, "ALTER TABLE ... RENAME TO PARTITION"), UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)), visitNonOptionalPartitionSpec(ctx.to)) @@ -3847,8 +3855,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec) .map(spec => UnresolvedPartitionSpec(spec)) AlterTableDropPartition( - UnresolvedTable( - visitMultipartIdentifier(ctx.multipartIdentifier), + createUnresolvedTable( + ctx.multipartIdentifier, "ALTER TABLE ... DROP PARTITION ..."), partSpecs.toSeq, ifExists = ctx.EXISTS != null, @@ -3867,8 +3875,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) { AlterTableSerDeProperties( - UnresolvedTable( - visitMultipartIdentifier(ctx.multipartIdentifier), + createUnresolvedTable( + ctx.multipartIdentifier, "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"), Option(ctx.STRING).map(string), Option(ctx.tablePropertyList).map(visitPropertyKeyValues), @@ -4084,7 +4092,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg case SqlBaseParser.NULL => "" case _ => string(ctx.STRING) } - val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier) - CommentOnTable(UnresolvedTable(nameParts, "COMMENT ON TABLE"), comment) + CommentOnTable(createUnresolvedTable(ctx.multipartIdentifier, "COMMENT ON TABLE"), comment) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala new file mode 100644 index 0000000000000..276cb4b5987f6 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan + +class AnalysisExceptionPositionSuite extends AnalysisTest { + test("SPARK-33886: UnresolvedTable should retain sql text position") { + verifyTablePosition("MSCK REPAIR TABLE unknown", "unknown") + verifyTablePosition("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE unknown", "unknown") + verifyTablePosition("TRUNCATE TABLE unknown", "unknown") + verifyTablePosition("SHOW PARTITIONS unknown", "unknown") + verifyTablePosition("ALTER TABLE unknown RECOVER PARTITIONS", "unknown") + verifyTablePosition("ALTER TABLE unknown ADD PARTITION (p=1)", "unknown") + verifyTablePosition("ALTER TABLE unknown PARTITION (p=1) RENAME TO PARTITION (p=2)", "unknown") + verifyTablePosition("ALTER TABLE unknown DROP PARTITION (p=1)", "unknown") + verifyTablePosition("ALTER TABLE unknown SET SERDEPROPERTIES ('a'='b')", "unknown") + verifyTablePosition("COMMENT ON TABLE unknown IS 'hello'", "unknown") + } + + private def verifyTablePosition(sql: String, table: String): Unit = { + val expectedPos = sql.indexOf(table) + assert(expectedPos != -1) + assertAnalysisError( + parsePlan(sql), + Seq(s"Table not found: $table; line 1 pos $expectedPos")) + } +} From d7dc42d5f6bbe861c7e4ac1bb49e0830af5e19f4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 07:40:38 +0000 Subject: [PATCH 0873/1009] [SPARK-33895][SQL] Char and Varchar fail in MetaOperation of ThriftServer ### What changes were proposed in this pull request? ``` Caused by: java.lang.IllegalArgumentException: Unrecognized type name: CHAR(10) at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.toJavaSQLType(SparkGetColumnsOperation.scala:187) at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$addToRowSet$1(SparkGetColumnsOperation.scala:203) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.addToRowSet(SparkGetColumnsOperation.scala:195) at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4(SparkGetColumnsOperation.scala:99) at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4$adapted(SparkGetColumnsOperation.scala:98) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) ``` meta operation is targeting raw table schema, we need to handle these types there. ### Why are the changes needed? bugfix, see the above case ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests locally ![image](https://user-images.githubusercontent.com/8326978/103069196-cdfcc480-45f9-11eb-9c6a-d4c42123c6e3.png) Closes #30914 from yaooqinn/SPARK-33895. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../hive/thriftserver/SparkGetColumnsOperation.scala | 3 +++ .../hive/thriftserver/SparkGetTypeInfoOperation.scala | 2 +- .../thriftserver/SparkMetadataOperationSuite.scala | 10 +++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala index 66e6cf82922b7..1f9c05c330ace 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala @@ -133,6 +133,7 @@ private[hive] class SparkGetColumnsOperation( case dt @ (BooleanType | _: NumericType | DateType | TimestampType | CalendarIntervalType | NullType) => Some(dt.defaultSize) + case CharType(n) => Some(n) case StructType(fields) => val sizeArr = fields.map(f => getColumnSize(f.dataType)) if (sizeArr.contains(None)) { @@ -176,6 +177,8 @@ private[hive] class SparkGetColumnsOperation( case DoubleType => java.sql.Types.DOUBLE case _: DecimalType => java.sql.Types.DECIMAL case StringType => java.sql.Types.VARCHAR + case VarcharType(_) => java.sql.Types.VARCHAR + case CharType(_) => java.sql.Types.CHAR case BinaryType => java.sql.Types.BINARY case DateType => java.sql.Types.DATE case TimestampType => java.sql.Types.TIMESTAMP diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala index 26b5f8ad8cee1..bd6feeaff08e8 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala @@ -99,6 +99,6 @@ private[hive] object SparkGetTypeInfoUtil { TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE, DATE_TYPE, TIMESTAMP_TYPE, - ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE) + ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE) } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index bb7448293f559..897ea00975a05 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -283,6 +283,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { .add("c14", "timestamp", nullable = false, "14") .add("c15", "struct", nullable = true, "15") .add("c16", "binary", nullable = false, "16") + .add("c17", "char(255)", nullable = true, "17") + .add("c18", "varchar(1024)", nullable = false, "18") val ddl = s""" @@ -299,7 +301,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { import java.sql.Types._ val expectedJavaTypes = Seq(BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, - DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY) + DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY, + CHAR, VARCHAR) var pos = 0 @@ -313,7 +316,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { val colSize = rowSet.getInt("COLUMN_SIZE") schema(pos).dataType match { - case StringType | BinaryType | _: ArrayType | _: MapType => assert(colSize === 0) + case StringType | BinaryType | _: ArrayType | _: MapType | _: VarcharType => + assert(colSize === 0) case o => assert(colSize === o.defaultSize) } @@ -342,7 +346,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { pos += 1 } - assert(pos === 17, "all columns should have been verified") + assert(pos === 19, "all columns should have been verified") } } From 32d4a2b06220861efda1058b26d9a2ed3a1b2c74 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 24 Dec 2020 08:10:28 +0000 Subject: [PATCH 0874/1009] [SPARK-33861][SQL] Simplify conditional in predicate ### What changes were proposed in this pull request? This pr simplify conditional in predicate, after this change we can push down the filter to datasource: Expression | After simplify -- | -- IF(cond, trueVal, false) | AND(cond, trueVal) IF(cond, trueVal, true) | OR(NOT(cond), trueVal) IF(cond, false, falseVal) | AND(NOT(cond), elseVal) IF(cond, true, falseVal) | OR(cond, elseVal) CASE WHEN cond THEN trueVal ELSE false END | AND(cond, trueVal) CASE WHEN cond THEN trueVal END | AND(cond, trueVal) CASE WHEN cond THEN trueVal ELSE null END | AND(cond, trueVal) CASE WHEN cond THEN trueVal ELSE true END | OR(NOT(cond), trueVal) CASE WHEN cond THEN false ELSE elseVal END | AND(NOT(cond), elseVal) CASE WHEN cond THEN false END | false CASE WHEN cond THEN true ELSE elseVal END | OR(cond, elseVal) CASE WHEN cond THEN true END | cond ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30865 from wangyum/SPARK-33861. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../SimplifyConditionalsInPredicate.scala | 82 ++++++ ...SimplifyConditionalsInPredicateSuite.scala | 237 ++++++++++++++++++ .../q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-modified/q34/explain.txt | 8 +- .../q34/simplified.txt | 2 +- .../q73.sf100/explain.txt | 8 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-modified/q73/explain.txt | 8 +- .../q73/simplified.txt | 2 +- .../approved-plans-v1_4/q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q34/explain.txt | 8 +- .../approved-plans-v1_4/q34/simplified.txt | 2 +- .../approved-plans-v1_4/q73.sf100/explain.txt | 12 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q73/explain.txt | 8 +- .../approved-plans-v1_4/q73/simplified.txt | 2 +- .../approved-plans-v2_7/q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v2_7/q34/explain.txt | 8 +- .../approved-plans-v2_7/q34/simplified.txt | 2 +- 23 files changed, 378 insertions(+), 58 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 61bcf9038b845..7b9b99bba5574 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -103,6 +103,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveDispensableExpressions, SimplifyBinaryComparison, ReplaceNullWithFalseInPredicate, + SimplifyConditionalsInPredicate, PruneFilters, SimplifyCasts, SimplifyCaseConversionExpressions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala new file mode 100644 index 0000000000000..1ea85085bccdb --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, Literal, Not, Or} +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.BooleanType + +/** + * A rule that converts conditional expressions to predicate expressions, if possible, in the + * search condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an implicit Boolean operator + * "(search condition) = TRUE". After this converting, we can potentially push the filter down to + * the data source. + * + * Supported cases are: + * - IF(cond, trueVal, false) => AND(cond, trueVal) + * - IF(cond, trueVal, true) => OR(NOT(cond), trueVal) + * - IF(cond, false, falseVal) => AND(NOT(cond), elseVal) + * - IF(cond, true, falseVal) => OR(cond, elseVal) + * - CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) + * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) + * - CASE WHEN cond THEN false END => false + * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) + * - CASE WHEN cond THEN true END => cond + */ +object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { + + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) + case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) + case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(simplifyConditional(cond))) + case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) + } + + private def simplifyConditional(e: Expression): Expression = e match { + case And(left, right) => And(simplifyConditional(left), simplifyConditional(right)) + case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right)) + case If(cond, trueValue, FalseLiteral) => And(cond, trueValue) + case If(cond, trueValue, TrueLiteral) => Or(Not(cond), trueValue) + case If(cond, FalseLiteral, falseValue) => And(Not(cond), falseValue) + case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue) + case CaseWhen(Seq((cond, trueValue)), + Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) => + And(cond, trueValue) + case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => + Or(Not(cond), trueValue) + case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) => + FalseLiteral + case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) => + And(Not(cond), elseValue) + case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) => + cond + case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) => + Or(cond, elseValue) + case e if e.dataType == BooleanType => e + case e => + assert(e.dataType != BooleanType, + "Expected a Boolean type expression in SimplifyConditionalsInPredicate, " + + s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.") + e + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala new file mode 100644 index 0000000000000..1f3c24bdbb664 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class SimplifyConditionalsInPredicateSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("SimplifyConditionalsInPredicate", FixedPoint(10), + NullPropagation, + ConstantFolding, + BooleanSimplification, + SimplifyConditionals, + SimplifyConditionalsInPredicate) :: Nil + } + + private val testRelation = + LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType)) + private val anotherTestRelation = LocalRelation('d.int) + + test("IF(cond, trueVal, false) => AND(cond, trueVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b"), + FalseLiteral) + val expectedCond = And( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("IF(cond, trueVal, true) => OR(NOT(cond), trueVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b"), + TrueLiteral) + val expectedCond = Or( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("IF(cond, false, falseVal) => AND(NOT(cond), elseVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + FalseLiteral, + UnresolvedAttribute("b")) + val expectedCond = And( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("IF(cond, true, falseVal) => OR(cond, elseVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + TrueLiteral, + UnresolvedAttribute("b")) + val expectedCond = Or( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal)") { + Seq(Some(FalseLiteral), None, Some(Literal(null, BooleanType))).foreach { elseExp => + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), + elseExp) + val expectedCond = And( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + } + + test("CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), + TrueLiteral) + val expectedCond = Or( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)), + UnresolvedAttribute("b")) + val expectedCond = And( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN false END => false") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral))) + testFilter(originalCond, expectedCond = FalseLiteral) + testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)), + UnresolvedAttribute("b")) + val expectedCond = Or( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN true END => cond") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral))) + val expectedCond = UnresolvedAttribute("i") > Literal(10) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("Simplify conditional in conditions of CaseWhen inside another CaseWhen") { + val nestedCaseWhen = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10)) -> UnresolvedAttribute("b")), + FalseLiteral) + val originalCond = CaseWhen(Seq(IsNotNull(nestedCaseWhen) -> FalseLiteral)) + val expectedCond = FalseLiteral + + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("Not expected type - SimplifyConditionalsInPredicate") { + val e = intercept[AnalysisException] { + testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral) + }.getMessage + assert(e.contains("'CAST(NULL AS INT)' of type int is not a boolean")) + } + + private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, exp) => rel.where(exp), originalCond, expectedCond) + } + + private def testJoin(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, exp) => rel.join(anotherTestRelation, Inner, Some(exp)), originalCond, expectedCond) + } + + private def testProjection(originalExpr: Expression, expectedExpr: Expression): Unit = { + test((rel, exp) => rel.select(exp), originalExpr, expectedExpr) + } + + private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) + } + + private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond) + } + + private def test( + func: (LogicalPlan, Expression) => LogicalPlan, + originalExpr: Expression, + expectedExpr: Expression): Unit = { + + val originalPlan = func(testRelation, originalExpr).analyze + val optimizedPlan = Optimize.execute(originalPlan) + val expectedPlan = func(testRelation, expectedExpr).analyze + comparePlans(optimizedPlan, expectedPlan) + } +} diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt index ac1fca4f67a02..547806128e64a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt index d9b416ddba9ef..c9945cda67746 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt index 898d37403d6a0..74bbb52c55fbc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt index 5af07f1d4ddef..4484587f65355 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt index 25da173c8ecde..51b480ef64ab2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (35) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt index 7496388d3430c..8695f9da17114 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt @@ -44,7 +44,7 @@ WholeStageCodegen (7) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt index e420b656c3ad0..56ad4f4d926eb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(`hd_dep_count`), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt index 46b7241565719..5e49f6cb603d5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt index 17bb0e7e71d27..6fa9bb85f0b79 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt index d9b416ddba9ef..c9945cda67746 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt index 18f465caea20d..1aea77422b14f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index 5af07f1d4ddef..4484587f65355 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt index 4af604ca3f65f..f88f1f48ac2b7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt index af8527f155c8e..9de2f2ab4cd68 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt index f4565c3edb172..43c73f3c7af61 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index 46b7241565719..5e49f6cb603d5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt index c7b8685b64bea..5d8f0d04161bf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt index 451659e2c617c..244478fd68825 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt index 01b5f46bd5dd4..e588993073a91 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt index 8aa32fed5a176..22cab3a42862f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] From 3e9821edfd636d2bc8be8f9cc5fc87be48bebc79 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 24 Dec 2020 08:13:48 +0000 Subject: [PATCH 0875/1009] [SPARK-33443][SQL] LEAD/LAG should support [ IGNORE NULLS | RESPECT NULLS ] ### What changes were proposed in this pull request? The mainstream database support `[ IGNORE NULLS | RESPECT NULLS ]` for `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE`. But the current implement of `LEAD`/`LAG` don't support this syntax. **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/LEAD.html#GUID-0A0481F1-E98F-4535-A739-FCCA8D1B5B77 **Presto** https://prestodb.io/docs/current/functions/window.html **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_WF_LEAD.html **DB2** https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1513.htm **Teradata** https://docs.teradata.com/r/756LNiPSFdY~4JcCCcR5Cw/GjCT6l7trjkIEjt~7Dhx4w **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/lead.html https://docs.snowflake.com/en/sql-reference/functions/lag.html ### Why are the changes needed? Support `[ IGNORE NULLS | RESPECT NULLS ]` for `LEAD`/`LAG` is very useful. ### Does this PR introduce _any_ user-facing change? 'Yes'. ### How was this patch tested? Jenkins test. Closes #30387 from beliefer/SPARK-33443. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../expressions/windowExpressions.scala | 16 ++- .../sql/execution/window/WindowExecBase.scala | 29 ++-- .../window/WindowFunctionFrame.scala | 132 ++++++++++++++++-- .../org/apache/spark/sql/functions.scala | 40 +++++- .../sql/DataFrameWindowFunctionsSuite.scala | 55 ++++++++ 5 files changed, 238 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 43ecbd6a83fdb..b167499620c0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -387,8 +387,6 @@ abstract class FrameLessOffsetWindowFunction override def nullable: Boolean = default == null || default.nullable || input.nullable - override val ignoreNulls = false - override lazy val frame: WindowFrame = fakeFrame override def checkInputDataTypes(): TypeCheckResult = { @@ -443,9 +441,13 @@ abstract class FrameLessOffsetWindowFunction since = "2.0.0", group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab -case class Lead(input: Expression, offset: Expression, default: Expression) +case class Lead( + input: Expression, offset: Expression, default: Expression, ignoreNulls: Boolean) extends FrameLessOffsetWindowFunction { + def this(input: Expression, offset: Expression, default: Expression) = + this(input, offset, default, false) + def this(input: Expression, offset: Expression) = this(input, offset, Literal(null)) def this(input: Expression) = this(input, Literal(1)) @@ -485,10 +487,14 @@ case class Lead(input: Expression, offset: Expression, default: Expression) since = "2.0.0", group = "window_funcs") // scalastyle:on line.size.limit line.contains.tab -case class Lag(input: Expression, inputOffset: Expression, default: Expression) +case class Lag( + input: Expression, inputOffset: Expression, default: Expression, ignoreNulls: Boolean) extends FrameLessOffsetWindowFunction { - def this(input: Expression, offset: Expression) = this(input, offset, Literal(null)) + def this(input: Expression, inputOffset: Expression, default: Expression) = + this(input, inputOffset, default, false) + + def this(input: Expression, inputOffset: Expression) = this(input, inputOffset, Literal(null)) def this(input: Expression) = this(input, Literal(1)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index 9832e5cd74ae7..5d999cb143f50 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -119,13 +119,21 @@ trait WindowExecBase extends UnaryExecNode { * [[WindowExpression]]s and factory function for the [[WindowFrameFunction]]. */ protected lazy val windowFrameExpressionFactoryPairs = { - type FrameKey = (String, FrameType, Expression, Expression) + type FrameKey = (String, FrameType, Expression, Expression, Seq[Expression]) type ExpressionBuffer = mutable.Buffer[Expression] val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)] // Add a function and its function to the map for a given frame. def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = { - val key = (tpe, fr.frameType, fr.lower, fr.upper) + val key = fn match { + // This branch is used for Lead/Lag to support ignoring null. + // All window frames move in rows. If there are multiple Leads or Lags acting on a row + // and operating on different input expressions, they should not be moved uniformly + // by row. Therefore, we put these functions in different window frames. + case f: FrameLessOffsetWindowFunction if f.ignoreNulls => + (tpe, fr.frameType, fr.lower, fr.upper, f.children.map(_.canonicalized)) + case _ => (tpe, fr.frameType, fr.lower, fr.upper, Nil) + } val (es, fns) = framedFunctions.getOrElseUpdate( key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression])) es += e @@ -183,7 +191,7 @@ trait WindowExecBase extends UnaryExecNode { // Create the factory to produce WindowFunctionFrame. val factory = key match { // Frameless offset Frame - case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _) => + case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _, expr) => target: InternalRow => new FrameLessOffsetWindowFunctionFrame( target, @@ -193,8 +201,9 @@ trait WindowExecBase extends UnaryExecNode { child.output, (expressions, schema) => MutableProjection.create(expressions, schema), - offset) - case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _) => + offset, + expr.nonEmpty) + case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _, _) => target: InternalRow => { new UnboundedOffsetWindowFunctionFrame( target, @@ -206,7 +215,7 @@ trait WindowExecBase extends UnaryExecNode { MutableProjection.create(expressions, schema), offset) } - case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _) => + case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _, _) => target: InternalRow => { new UnboundedPrecedingOffsetWindowFunctionFrame( target, @@ -220,13 +229,13 @@ trait WindowExecBase extends UnaryExecNode { } // Entire Partition Frame. - case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) => + case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing, _) => target: InternalRow => { new UnboundedWindowFunctionFrame(target, processor) } // Growing Frame. - case ("AGGREGATE", frameType, UnboundedPreceding, upper) => + case ("AGGREGATE", frameType, UnboundedPreceding, upper, _) => target: InternalRow => { new UnboundedPrecedingWindowFunctionFrame( target, @@ -235,7 +244,7 @@ trait WindowExecBase extends UnaryExecNode { } // Shrinking Frame. - case ("AGGREGATE", frameType, lower, UnboundedFollowing) => + case ("AGGREGATE", frameType, lower, UnboundedFollowing, _) => target: InternalRow => { new UnboundedFollowingWindowFunctionFrame( target, @@ -244,7 +253,7 @@ trait WindowExecBase extends UnaryExecNode { } // Moving Frame. - case ("AGGREGATE", frameType, lower, upper) => + case ("AGGREGATE", frameType, lower, upper, _) => target: InternalRow => { new SlidingWindowFunctionFrame( target, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala index 2a4b957c35426..0408deb4b8a41 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala @@ -97,13 +97,15 @@ abstract class OffsetWindowFunctionFrameBase( /** Index of the input row currently used for output. */ protected var inputIndex = 0 + /** Attributes of the input row currently used for output. */ + protected val inputAttrs = inputSchema.map(_.withNullability(true)) + /** * Create the projection used when the offset row exists. * Please note that this project always respect null input values (like PostgreSQL). */ protected val projection = { // Collect the expressions and bind them. - val inputAttrs = inputSchema.map(_.withNullability(true)) val boundExpressions = Seq.fill(ordinal)(NoOp) ++ bindReferences( expressions.toSeq.map(_.input), inputAttrs) @@ -114,7 +116,6 @@ abstract class OffsetWindowFunctionFrameBase( /** Create the projection used when the offset row DOES NOT exists. */ protected val fillDefaultValue = { // Collect the expressions and bind them. - val inputAttrs: AttributeSeq = inputSchema.map(_.withNullability(true)) val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e => if (e.default == null || e.default.foldable && e.default.eval() == null) { // The default value is null. @@ -147,31 +148,132 @@ class FrameLessOffsetWindowFunctionFrame( expressions: Array[OffsetWindowFunction], inputSchema: Seq[Attribute], newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection, - offset: Int) + offset: Int, + ignoreNulls: Boolean = false) extends OffsetWindowFunctionFrameBase( target, ordinal, expressions, inputSchema, newMutableProjection, offset) { + /** Holder the UnsafeRow where the input operator by function is not null. */ + private var nextSelectedRow = EmptyRow + + // The number of rows skipped to get the next UnsafeRow where the input operator by function + // is not null. + private var skippedNonNullCount = 0 + + /** Create the projection to determine whether input is null. */ + private val project = UnsafeProjection.create(Seq(IsNull(expressions.head.input)), inputSchema) + + /** Check if the output value of the first index is null. */ + private def nullCheck(row: InternalRow): Boolean = project(row).getBoolean(0) + + /** find the offset row whose input is not null */ + private def findNextRowWithNonNullInput(): Unit = { + while (skippedNonNullCount < offset && inputIndex < input.length) { + val r = WindowFunctionFrame.getNextOrNull(inputIterator) + if (!nullCheck(r)) { + nextSelectedRow = r + skippedNonNullCount += 1 + } + inputIndex += 1 + } + } + override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { input = rows inputIterator = input.generateIterator() // drain the first few rows if offset is larger than zero inputIndex = 0 - while (inputIndex < offset) { - if (inputIterator.hasNext) inputIterator.next() - inputIndex += 1 + if (ignoreNulls) { + findNextRowWithNonNullInput() + } else { + while (inputIndex < offset) { + if (inputIterator.hasNext) inputIterator.next() + inputIndex += 1 + } + inputIndex = offset } - inputIndex = offset + } + + private val doWrite = if (ignoreNulls && offset > 0) { + // For illustration, here is one example: the input data contains nine rows, + // and the input values of each row are: null, x, null, null, y, null, z, v, null. + // We use lead(input, 2) with IGNORE NULLS and the process is as follows: + // 1. current row -> null, next selected row -> y, output: y; + // 2. current row -> x, next selected row -> z, output: z; + // 3. current row -> null, next selected row -> z, output: z; + // 4. current row -> null, next selected row -> z, output: z; + // 5. current row -> y, next selected row -> v, output: v; + // 6. current row -> null, next selected row -> v, output: v; + // 7. current row -> z, next selected row -> empty, output: null; + // ... next selected row is empty, all following return null. + (current: InternalRow) => + if (nextSelectedRow == EmptyRow) { + // Use default values since the offset row whose input value is not null does not exist. + fillDefaultValue(current) + } else { + if (nullCheck(current)) { + projection(nextSelectedRow) + } else { + skippedNonNullCount -= 1 + findNextRowWithNonNullInput() + if (skippedNonNullCount == offset) { + projection(nextSelectedRow) + } else { + // Use default values since the offset row whose input value is not null does not exist. + fillDefaultValue(current) + nextSelectedRow = EmptyRow + } + } + } + } else if (ignoreNulls && offset < 0) { + // For illustration, here is one example: the input data contains nine rows, + // and the input values of each row are: null, x, null, null, y, null, z, v, null. + // We use lag(input, 1) with IGNORE NULLS and the process is as follows: + // 1. current row -> null, next selected row -> empty, output: null; + // 2. current row -> x, next selected row -> empty, output: null; + // 3. current row -> null, next selected row -> x, output: x; + // 4. current row -> null, next selected row -> x, output: x; + // 5. current row -> y, next selected row -> x, output: x; + // 6. current row -> null, next selected row -> y, output: y; + // 7. current row -> z, next selected row -> y, output: y; + // 8. current row -> v, next selected row -> z, output: z; + // 9. current row -> null, next selected row -> v, output: v; + val absOffset = Math.abs(offset) + (current: InternalRow) => + if (skippedNonNullCount == absOffset) { + nextSelectedRow = EmptyRow + skippedNonNullCount -= 1 + while (nextSelectedRow == EmptyRow && inputIndex < input.length) { + val r = WindowFunctionFrame.getNextOrNull(inputIterator) + if (!nullCheck(r)) { + nextSelectedRow = r + } + inputIndex += 1 + } + } + if (nextSelectedRow == EmptyRow) { + // Use default values since the offset row whose input value is not null does not exist. + fillDefaultValue(current) + } else { + projection(nextSelectedRow) + } + if (!nullCheck(current)) { + skippedNonNullCount += 1 + } + } else { + (current: InternalRow) => + if (inputIndex >= 0 && inputIndex < input.length) { + val r = WindowFunctionFrame.getNextOrNull(inputIterator) + projection(r) + } else { + // Use default values since the offset row does not exist. + fillDefaultValue(current) + } + inputIndex += 1 } override def write(index: Int, current: InternalRow): Unit = { - if (inputIndex >= 0 && inputIndex < input.length) { - val r = WindowFunctionFrame.getNextOrNull(inputIterator) - projection(r) - } else { - // Use default values since the offset row does not exist. - fillDefaultValue(current) - } - inputIndex += 1 + doWrite(current) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 4defcb836a978..764e08862a09e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -937,8 +937,24 @@ object functions { * @group window_funcs * @since 1.4.0 */ - def lag(e: Column, offset: Int, defaultValue: Any): Column = withExpr { - Lag(e.expr, Literal(offset), Literal(defaultValue)) + def lag(e: Column, offset: Int, defaultValue: Any): Column = { + lag(e, offset, defaultValue, false) + } + + /** + * Window function: returns the value that is `offset` rows before the current row, and + * `defaultValue` if there is less than `offset` rows before the current row. `ignoreNulls` + * determines whether null values of row are included in or eliminated from the calculation. + * For example, an `offset` of one will return the previous row at any given point in the + * window partition. + * + * This is equivalent to the LAG function in SQL. + * + * @group window_funcs + * @since 3.2.0 + */ + def lag(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column = withExpr { + Lag(e.expr, Literal(offset), Literal(defaultValue), ignoreNulls) } /** @@ -989,8 +1005,24 @@ object functions { * @group window_funcs * @since 1.4.0 */ - def lead(e: Column, offset: Int, defaultValue: Any): Column = withExpr { - Lead(e.expr, Literal(offset), Literal(defaultValue)) + def lead(e: Column, offset: Int, defaultValue: Any): Column = { + lead(e, offset, defaultValue, false) + } + + /** + * Window function: returns the value that is `offset` rows after the current row, and + * `defaultValue` if there is less than `offset` rows after the current row. `ignoreNulls` + * determines whether null values of row are included in or eliminated from the calculation. + * The default value of `ignoreNulls` is false. For example, an `offset` of one will return + * the next row at any given point in the window partition. + * + * This is equivalent to the LEAD function in SQL. + * + * @group window_funcs + * @since 3.2.0 + */ + def lead(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column = withExpr { + Lead(e.expr, Literal(offset), Literal(defaultValue), ignoreNulls) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 207b2963f0b3b..3568ad3a7343d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -700,6 +700,61 @@ class DataFrameWindowFunctionsSuite extends QueryTest Row("b", 3, null, null, null))) } + test("lead/lag with ignoreNulls") { + val nullStr: String = null + val df = Seq( + ("a", 0, nullStr), + ("a", 1, "x"), + ("b", 2, nullStr), + ("c", 3, nullStr), + ("a", 4, "y"), + ("b", 5, nullStr), + ("a", 6, "z"), + ("a", 7, "v"), + ("a", 8, nullStr)). + toDF("key", "order", "value") + val window = Window.orderBy($"order") + checkAnswer( + df.select( + $"key", + $"order", + $"value", + lead($"value", 1).over(window), + lead($"value", 2).over(window), + lead($"value", 0, null, true).over(window), + lead($"value", 1, null, true).over(window), + lead($"value", 2, null, true).over(window), + lead($"value", 3, null, true).over(window), + lead(concat($"value", $"key"), 1, null, true).over(window), + lag($"value", 1).over(window), + lag($"value", 2).over(window), + lag($"value", 0, null, true).over(window), + lag($"value", 1, null, true).over(window), + lag($"value", 2, null, true).over(window), + lag($"value", 3, null, true).over(window), + lag(concat($"value", $"key"), 1, null, true).over(window)) + .orderBy($"order"), + Seq( + Row("a", 0, null, "x", null, null, "x", "y", "z", "xa", + null, null, null, null, null, null, null), + Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya", + null, null, "x", null, null, null, null), + Row("b", 2, null, null, "y", null, "y", "z", "v", "ya", + "x", null, null, "x", null, null, "xa"), + Row("c", 3, null, "y", null, null, "y", "z", "v", "ya", + null, "x", null, "x", null, null, "xa"), + Row("a", 4, "y", null, "z", "y", "z", "v", null, "za", + null, null, "y", "x", null, null, "xa"), + Row("b", 5, null, "z", "v", null, "z", "v", null, "za", + "y", null, null, "y", "x", null, "ya"), + Row("a", 6, "z", "v", null, "z", "v", null, null, "va", + null, "y", "z", "y", "x", null, "ya"), + Row("a", 7, "v", null, null, "v", null, null, null, null, + "z", null, "v", "z", "y", "x", "za"), + Row("a", 8, null, null, null, null, null, null, null, null, + "v", "z", null, "v", "z", "y", "va"))) + } + test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") { val src = Seq((0, 3, 5)).toDF("a", "b", "c") .withColumn("Data", struct("a", "b")) From 54a67842e678a54e976160c5ad249767165fab0f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 24 Dec 2020 08:54:53 +0000 Subject: [PATCH 0876/1009] [SPARK-33881][SQL][TESTS] Check null and empty string as partition values in DS v1 and v2 tests ### What changes were proposed in this pull request? Add tests to check handling `null` and `''` (empty string) as partition values in commands `SHOW PARTITIONS`, `ALTER TABLE .. ADD PARTITION`, `ALTER TABLE .. DROP PARTITION`. ### Why are the changes needed? To improve test coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the modified test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.ShowPartitionsSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableAddPartitionSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableDropPartitionSuite" ``` Closes #30893 from MaxGekk/partition-value-empty-string. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../v1/AlterTableAddPartitionSuite.scala | 12 ++++++++++ .../v1/AlterTableDropPartitionSuite.scala | 15 +++++++++++- .../command/v1/ShowPartitionsSuite.scala | 19 +++++++++++++++ .../v2/AlterTableAddPartitionSuite.scala | 8 +++++++ .../v2/AlterTableDropPartitionSuite.scala | 9 +++++++ .../command/v2/ShowPartitionsSuite.scala | 8 ++++++- .../command/ShowPartitionsSuite.scala | 24 ++++++++++++++++++- 7 files changed, 92 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala index 1b7c90067e3f5..a749b1e3dd14d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.execution.command @@ -35,6 +36,17 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head assert(location.endsWith(expected)) } + + test("empty string as partition value") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')") + }.getMessage + assert(errMsg.contains("Partition spec is invalid. " + + "The spec ([p1=]) contains an empty partition column value")) + } + } } class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index 12a99933f6633..71032eefee2bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.execution.command trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase { @@ -35,4 +36,16 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu class AlterTableDropPartitionSuite extends AlterTableDropPartitionSuiteBase - with CommandSuiteBase + with CommandSuiteBase { + + test("empty string as partition value") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')") + }.getMessage + assert(errMsg.contains("Partition spec is invalid. " + + "The spec ([p1=]) contains an empty partition column value")) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index 8acd24f0e3956..5d992d18890e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -93,4 +93,23 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase assert(sql("SHOW PARTITIONS part_datasrc").count() == 3) } } + + test("null and empty string as partition values") { + import testImplicits._ + withTable("t") { + val df = Seq((0, ""), (1, null)).toDF("a", "part") + df.write + .partitionBy("part") + .format("parquet") + .mode(SaveMode.Overwrite) + .saveAsTable("t") + + runShowPartitionsSql( + "SHOW PARTITIONS t", + Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil) + checkAnswer(spark.table("t"), + Row(0, null) :: + Row(1, null) :: Nil) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala index b8ecb87ae7595..b0d0f6ced9346 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala @@ -59,4 +59,12 @@ class AlterTableAddPartitionSuite assert(errMsg.contains(s"Table $t can not alter partitions")) } } + + test("empty string as partition value") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')") + checkPartitions(t, Map("p1" -> "")) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala index e2762f0439cb3..97ef10e256515 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -50,4 +50,13 @@ class AlterTableDropPartitionSuite } } } + + test("empty string as partition value") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')") + sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')") + checkPartitions(t) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index ed0a7dff62440..431f64baf4b78 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -45,7 +45,13 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu .mode(SaveMode.Overwrite) .saveAsTable(t) - runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil) + runShowPartitionsSql( + s"SHOW PARTITIONS $t", + Row("part=") :: + Row("part=null") :: Nil) + checkAnswer(spark.table(t), + Row(0, "") :: + Row(1, null) :: Nil) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala index fa8ac4ccaa089..eaac8f5e8146c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala @@ -17,6 +17,28 @@ package org.apache.spark.sql.hive.execution.command +import org.apache.spark.sql.{Row, SaveMode} import org.apache.spark.sql.execution.command.v1 -class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase +class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase { + test("null and empty string as partition values") { + import testImplicits._ + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + withTable("t") { + val df = Seq((0, ""), (1, null)).toDF("a", "part") + df.write + .partitionBy("part") + .format("hive") + .mode(SaveMode.Overwrite) + .saveAsTable("t") + + runShowPartitionsSql( + "SHOW PARTITIONS t", + Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil) + checkAnswer(spark.table("t"), + Row(0, "__HIVE_DEFAULT_PARTITION__") :: + Row(1, "__HIVE_DEFAULT_PARTITION__") :: Nil) + } + } + } +} From 29cca68e9e55fae8389378de6f30d0dfa7a74010 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 08:56:02 +0000 Subject: [PATCH 0877/1009] [SPARK-33892][SQL] Display char/varchar in DESC and SHOW CREATE TABLE ### What changes were proposed in this pull request? Display char/varchar in - DESC table - DESC column - SHOW CREATE TABLE ### Why are the changes needed? show the correct definition for users ### Does this PR introduce _any_ user-facing change? yes, char/varchar column's will print char/varchar instead of string ### How was this patch tested? new tests Closes #30908 from yaooqinn/SPARK-33892. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../spark/sql/execution/command/tables.scala | 12 ++++---- .../v2/ShowTablePropertiesExec.scala | 4 +-- .../datasources/v2/V2CommandExec.scala | 4 +++ .../spark/sql/CharVarcharTestSuite.scala | 29 +++++++++++++++++++ .../spark/sql/HiveCharVarcharTestSuite.scala | 9 ++++++ 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index cf2a6ffb2c682..0fcf8f2717041 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap} +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils} import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -631,7 +631,7 @@ case class DescribeTableCommand( } describeSchema(catalog.lookupRelation(table).schema, result, header = false) } else { - val metadata = catalog.getTableMetadata(table) + val metadata = catalog.getTableRawMetadata(table) if (metadata.schema.isEmpty) { // In older version(prior to 2.1) of Spark, the table schema can be empty and should be // inferred at runtime. We should still support it. @@ -782,9 +782,11 @@ case class DescribeColumnCommand( None } + val dataType = CharVarcharUtils.getRawType(field.metadata) + .getOrElse(field.dataType).catalogString val buffer = ArrayBuffer[Row]( Row("col_name", field.name), - Row("data_type", field.dataType.catalogString), + Row("data_type", dataType), Row("comment", comment.getOrElse("NULL")) ) if (isExtended) { @@ -1111,7 +1113,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) throw new AnalysisException( s"SHOW CREATE TABLE is not supported on a temporary view: ${table.identifier}") } else { - val tableMetadata = catalog.getTableMetadata(table) + val tableMetadata = catalog.getTableRawMetadata(table) // TODO: [SPARK-28692] unify this after we unify the // CREATE TABLE syntax for hive serde and data source table. @@ -1262,7 +1264,7 @@ case class ShowCreateTableAsSerdeCommand(table: TableIdentifier) override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog - val tableMetadata = catalog.getTableMetadata(table) + val tableMetadata = catalog.getTableRawMetadata(table) val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { throw new AnalysisException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala index 7ceee1edee180..6d3a94ef15631 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table} /** @@ -30,8 +30,6 @@ case class ShowTablePropertiesExec( catalogTable: Table, propertyKey: Option[String]) extends V2CommandExec { - override def producedAttributes: AttributeSet = AttributeSet(output) - override protected def run(): Seq[InternalRow] = { import scala.collection.JavaConverters._ val toRow = RowEncoder(schema).resolveAndBind().createSerializer() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala index 7738f26dfd266..6b193674cc71a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.AttributeSet import org.apache.spark.sql.execution.SparkPlan /** @@ -55,4 +56,7 @@ abstract class V2CommandExec extends SparkPlan { } override def children: Seq[SparkPlan] = Nil + + override def producedAttributes: AttributeSet = outputSet + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 8ab8c37d5e790..9d4b7c4f82ed2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -443,6 +443,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { ("c1 IN (c2)", true))) } } + + test("SPARK-33892: DESCRIBE TABLE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"), + Seq(Row("char(5)"), Row("varchar(3)"))) + } + } } // Some basic char/varchar tests which doesn't rely on table implementation. @@ -603,6 +611,27 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } } + + // TODO(SPARK-33875): Move these tests to super after DESCRIBE COLUMN v2 implemented + test("SPARK-33892: DESCRIBE COLUMN w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + checkAnswer(sql("desc t v").selectExpr("info_value").where("info_value like '%char%'"), + Row("varchar(3)")) + checkAnswer(sql("desc t c").selectExpr("info_value").where("info_value like '%char%'"), + Row("char(5)")) + } + } + + // TODO(SPARK-33898): Move these tests to super after SHOW CREATE TABLE for v2 implemented + test("SPARK-33892: SHOW CREATE TABLE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + val rest = sql("SHOW CREATE TABLE t").head().getString(0) + assert(rest.contains("VARCHAR(3)")) + assert(rest.contains("CHAR(5)")) + } + } } class DSV2CharVarcharTestSuite extends CharVarcharTestSuite diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala index f48cfb8dfb899..bb7918c881c7e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -41,6 +41,15 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet } super.afterAll() } + + test("SPARK-33892: SHOW CREATE TABLE AS SERDE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + val rest = sql("SHOW CREATE TABLE t AS SERDE").head().getString(0) + assert(rest.contains("VARCHAR(3)")) + assert(rest.contains("CHAR(5)")) + } + } } class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton { From 700f5ab65c1c84522302ce92d176adf229c34daa Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 25 Dec 2020 00:54:26 +0900 Subject: [PATCH 0878/1009] [SPARK-33900][WEBUI] Show shuffle read size / records correctly when only remotebytesread is available ### What changes were proposed in this pull request? Shuffle Read Size / Records can also be displayed in remoteBytesRead>0 localBytesRead=0. current: ![image](https://user-images.githubusercontent.com/3898450/103079421-c4ca2280-460e-11eb-9e2f-49d35b5d324d.png) fix: ![image](https://user-images.githubusercontent.com/3898450/103079439-cc89c700-460e-11eb-9a41-6b2882980d11.png) ### Why are the changes needed? At present, the page only displays the data of Shuffle Read Size / Records when localBytesRead>0. When there is only remote reading, metrics cannot be seen on the stage page. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? manual test Closes #30916 from cxzl25/SPARK-33900. Authored-by: sychen Signed-off-by: Kousuke Saruta --- .../src/main/resources/org/apache/spark/ui/static/stagepage.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js index 336edff509300..ebb79f542168d 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js @@ -946,7 +946,8 @@ $(document).ready(function () { }, { data : function (row, type) { - if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics && row.taskMetrics.shuffleReadMetrics.localBytesRead > 0) { + if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics && + (row.taskMetrics.shuffleReadMetrics.localBytesRead > 0 || row.taskMetrics.shuffleReadMetrics.remoteBytesRead > 0)) { var totalBytesRead = parseInt(row.taskMetrics.shuffleReadMetrics.localBytesRead) + parseInt(row.taskMetrics.shuffleReadMetrics.remoteBytesRead); if (type === 'display') { return formatBytes(totalBytesRead, type) + " / " + row.taskMetrics.shuffleReadMetrics.recordsRead; From 9c30116fb428f87543155323617cf5fb700e84cd Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Thu, 24 Dec 2020 14:30:34 -0800 Subject: [PATCH 0879/1009] [SPARK-33857][SQL] Unify the default seed of random functions ### What changes were proposed in this pull request? Unify the seed of random functions 1. Add a hold place expression `UnresolvedSeed ` as the defualt seed. 2. Change `Rand`,`Randn`,`Uuid`,`Shuffle` default seed to `UnresolvedSeed `. 3. Replace `UnresolvedSeed ` to real seed at `ResolveRandomSeed` rule. ### Why are the changes needed? `Uuid` and `Shuffle` use the `ResolveRandomSeed` rule to set the seed if user doesn't give a seed value. `Rand` and `Randn` do this at constructing. It's better to unify the default seed at Analyzer side since we have used `ExpressionWithRandomSeed` at streaming query. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass exists test and add test. Closes #30864 from ulysses-you/SPARK-33857. Authored-by: ulysses-you Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/Analyzer.scala | 4 ++-- .../sql/catalyst/analysis/unresolved.scala | 9 +++++++ .../expressions/collectionOperations.scala | 4 +++- .../spark/sql/catalyst/expressions/misc.scala | 3 +++ .../expressions/randomExpressions.scala | 24 ++++++++++--------- .../sql/catalyst/analysis/AnalysisSuite.scala | 12 ++++++++++ 6 files changed, 42 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ba24914cb6835..8af692d9fe008 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3000,8 +3000,8 @@ class Analyzer(override val catalogManager: CatalogManager) override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case p if p.resolved => p case p => p transformExpressionsUp { - case Uuid(None) => Uuid(Some(random.nextLong())) - case Shuffle(child, None) => Shuffle(child, Some(random.nextLong())) + case e: ExpressionWithRandomSeed if e.seedExpression == UnresolvedSeed => + e.withNewSeed(random.nextLong()) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 8a73208d42e20..84614886348aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -561,3 +561,12 @@ case class UnresolvedHaving( override lazy val resolved: Boolean = false override def output: Seq[Attribute] = child.output } + +/** + * A place holder expression used in random functions, will be replaced after analyze. + */ +case object UnresolvedSeed extends LeafExpression with Unevaluable { + override def nullable: Boolean = throw new UnresolvedException(this, "nullable") + override def dataType: DataType = throw new UnresolvedException(this, "dataType") + override lazy val resolved = false +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 33794467fb338..17b45bc44a28e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -23,7 +23,7 @@ import scala.collection.mutable import scala.reflect.ClassTag import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} +import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedSeed} import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ @@ -943,6 +943,8 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None) def this(child: Expression) = this(child, None) + override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed) + override def withNewSeed(seed: Long): Shuffle = copy(randomSeed = Some(seed)) override lazy val resolved: Boolean = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 34a64dddd30fa..4ad4c4d61f10c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator @@ -187,6 +188,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta def this() = this(None) + override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed) + override def withNewSeed(seed: Long): Uuid = Uuid(Some(seed)) override lazy val resolved: Boolean = randomSeed.isDefined diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 0fa4d6c315041..630c934f79533 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -19,10 +19,10 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom /** @@ -32,7 +32,8 @@ import org.apache.spark.util.random.XORShiftRandom * * Since this expression is stateful, it cannot be a case object. */ -abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful { +abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful + with ExpressionWithRandomSeed { /** * Record ID within each partition. By being transient, the Random Number Generator is * reset every time we serialize and deserialize and initialize it. @@ -43,7 +44,9 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful rng = new XORShiftRandom(seed + partitionIndex) } - @transient protected lazy val seed: Long = child match { + override def seedExpression: Expression = child + + @transient protected lazy val seed: Long = seedExpression match { case Literal(s, IntegerType) => s.asInstanceOf[Int] case Literal(s, LongType) => s.asInstanceOf[Long] case _ => throw new AnalysisException( @@ -62,6 +65,7 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful * Usually the random seed needs to be renewed at each execution under streaming queries. */ trait ExpressionWithRandomSeed { + def seedExpression: Expression def withNewSeed(seed: Long): Expression } @@ -84,14 +88,13 @@ trait ExpressionWithRandomSeed { since = "1.5.0", group = "math_funcs") // scalastyle:on line.size.limit -case class Rand(child: Expression, hideSeed: Boolean = false) - extends RDG with ExpressionWithRandomSeed { +case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG { - def this() = this(Literal(Utils.random.nextLong(), LongType), true) + def this() = this(UnresolvedSeed, true) def this(child: Expression) = this(child, false) - override def withNewSeed(seed: Long): Rand = Rand(Literal(seed, LongType)) + override def withNewSeed(seed: Long): Rand = Rand(Literal(seed, LongType), hideSeed) override protected def evalInternal(input: InternalRow): Double = rng.nextDouble() @@ -136,14 +139,13 @@ object Rand { since = "1.5.0", group = "math_funcs") // scalastyle:on line.size.limit -case class Randn(child: Expression, hideSeed: Boolean = false) - extends RDG with ExpressionWithRandomSeed { +case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG { - def this() = this(Literal(Utils.random.nextLong(), LongType), true) + def this() = this(UnresolvedSeed, true) def this(child: Expression) = this(child, false) - override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType)) + override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType), hideSeed) override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index b206bc9f84f18..f66871ee75ecc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1006,4 +1006,16 @@ class AnalysisSuite extends AnalysisTest with Matchers { checkAnalysis(plan, expect) } } + + test("SPARK-33857: Unify the default seed of random functions") { + Seq(new Rand(), new Randn(), Shuffle(Literal(Array(1))), Uuid()).foreach { r => + assert(r.seedExpression == UnresolvedSeed) + val p = getAnalyzer.execute(Project(Seq(r.as("r")), testRelation)) + assert( + p.asInstanceOf[Project].projectList.head.asInstanceOf[Alias] + .child.asInstanceOf[ExpressionWithRandomSeed] + .seedExpression.isInstanceOf[Literal] + ) + } + } } From 65a9ac2ff4d902976bf3ef89d1d3e29c1e6d5414 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 24 Dec 2020 14:44:16 -0800 Subject: [PATCH 0880/1009] [SPARK-30027][SQL] Support codegen for aggregate filters in HashAggregateExec ### What changes were proposed in this pull request? This pr intends to support code generation for `HashAggregateExec` with filters. Quick benchmark results: ``` $ ./bin/spark-shell --master=local[1] --conf spark.driver.memory=8g --conf spark.sql.shuffle.partitions=1 -v scala> spark.range(100000000).selectExpr("id % 3 as k1", "id % 5 as k2", "rand() as v1", "rand() as v2").write.saveAsTable("t") scala> sql("SELECT k1, k2, AVG(v1) FILTER (WHERE v2 > 0.5) FROM t GROUP BY k1, k2").write.format("noop").mode("overwrite").save() >> Before this PR Elapsed time: 16.170697619s >> After this PR Elapsed time: 6.7825313s ``` The query above is compiled into code below; ``` ... /* 285 */ private void agg_doAggregate_avg_0(boolean agg_exprIsNull_2_0, org.apache.spark.sql.catalyst.InternalRow agg_unsafeRowAggBuffer_0, double agg_expr_2_0) throws java.io.IOException { /* 286 */ // evaluate aggregate function for avg /* 287 */ boolean agg_isNull_10 = true; /* 288 */ double agg_value_12 = -1.0; /* 289 */ boolean agg_isNull_11 = agg_unsafeRowAggBuffer_0.isNullAt(0); /* 290 */ double agg_value_13 = agg_isNull_11 ? /* 291 */ -1.0 : (agg_unsafeRowAggBuffer_0.getDouble(0)); /* 292 */ if (!agg_isNull_11) { /* 293 */ agg_agg_isNull_12_0 = true; /* 294 */ double agg_value_14 = -1.0; /* 295 */ do { /* 296 */ if (!agg_exprIsNull_2_0) { /* 297 */ agg_agg_isNull_12_0 = false; /* 298 */ agg_value_14 = agg_expr_2_0; /* 299 */ continue; /* 300 */ } /* 301 */ /* 302 */ if (!false) { /* 303 */ agg_agg_isNull_12_0 = false; /* 304 */ agg_value_14 = 0.0D; /* 305 */ continue; /* 306 */ } /* 307 */ /* 308 */ } while (false); /* 309 */ /* 310 */ agg_isNull_10 = false; // resultCode could change nullability. /* 311 */ /* 312 */ agg_value_12 = agg_value_13 + agg_value_14; /* 313 */ /* 314 */ } /* 315 */ boolean agg_isNull_15 = false; /* 316 */ long agg_value_17 = -1L; /* 317 */ if (!false && agg_exprIsNull_2_0) { /* 318 */ boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(1); /* 319 */ long agg_value_20 = agg_isNull_18 ? /* 320 */ -1L : (agg_unsafeRowAggBuffer_0.getLong(1)); /* 321 */ agg_isNull_15 = agg_isNull_18; /* 322 */ agg_value_17 = agg_value_20; /* 323 */ } else { /* 324 */ boolean agg_isNull_19 = true; /* 325 */ long agg_value_21 = -1L; /* 326 */ boolean agg_isNull_20 = agg_unsafeRowAggBuffer_0.isNullAt(1); /* 327 */ long agg_value_22 = agg_isNull_20 ? /* 328 */ -1L : (agg_unsafeRowAggBuffer_0.getLong(1)); /* 329 */ if (!agg_isNull_20) { /* 330 */ agg_isNull_19 = false; // resultCode could change nullability. /* 331 */ /* 332 */ agg_value_21 = agg_value_22 + 1L; /* 333 */ /* 334 */ } /* 335 */ agg_isNull_15 = agg_isNull_19; /* 336 */ agg_value_17 = agg_value_21; /* 337 */ } /* 338 */ // update unsafe row buffer /* 339 */ if (!agg_isNull_10) { /* 340 */ agg_unsafeRowAggBuffer_0.setDouble(0, agg_value_12); /* 341 */ } else { /* 342 */ agg_unsafeRowAggBuffer_0.setNullAt(0); /* 343 */ } /* 344 */ /* 345 */ if (!agg_isNull_15) { /* 346 */ agg_unsafeRowAggBuffer_0.setLong(1, agg_value_17); /* 347 */ } else { /* 348 */ agg_unsafeRowAggBuffer_0.setNullAt(1); /* 349 */ } /* 350 */ } ... ``` ### Why are the changes needed? For high performance. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Existing tests. Closes #27019 from maropu/AggregateFilterCodegen. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/expressions/predicates.scala | 18 +++ .../aggregate/HashAggregateExec.scala | 100 +++++++------- .../execution/basicPhysicalOperators.scala | 130 ++++++++++-------- .../sql-tests/inputs/group-by-filter.sql | 5 +- .../sql-tests/results/explain.sql.out | 4 +- 5 files changed, 151 insertions(+), 106 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 250d3fee94cb3..c61d24758617c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -242,6 +242,24 @@ trait PredicateHelper extends AliasHelper with Logging { None } } + + // If one expression and its children are null intolerant, it is null intolerant. + protected def isNullIntolerant(expr: Expression): Boolean = expr match { + case e: NullIntolerant => e.children.forall(isNullIntolerant) + case _ => false + } + + protected def outputWithNullability( + output: Seq[Attribute], + nonNullAttrExprIds: Seq[ExprId]): Seq[Attribute] = { + output.map { a => + if (a.nullable && nonNullAttrExprIds.contains(a.exprId)) { + a.withNullability(false) + } else { + a + } + } + } } @ExpressionDescription( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index 52d0450afb181..cdad9de00620b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -53,7 +53,8 @@ case class HashAggregateExec( resultExpressions: Seq[NamedExpression], child: SparkPlan) extends BaseAggregateExec - with BlockingOperatorWithCodegen { + with BlockingOperatorWithCodegen + with GeneratePredicateHelper { require(HashAggregateExec.supportsAggregate(aggregateBufferAttributes)) @@ -131,10 +132,8 @@ case class HashAggregateExec( override def usedInputs: AttributeSet = inputSet override def supportCodegen: Boolean = { - // ImperativeAggregate and filter predicate are not supported right now - // TODO: SPARK-30027 Support codegen for filter exprs in HashAggregateExec - !(aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate]) || - aggregateExpressions.exists(_.filter.isDefined)) + // ImperativeAggregate are not supported right now + !aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate]) } override def inputRDDs(): Seq[RDD[InternalRow]] = { @@ -254,7 +253,7 @@ case class HashAggregateExec( aggNames: Seq[String], aggBufferUpdatingExprs: Seq[Seq[Expression]], aggCodeBlocks: Seq[Block], - subExprs: Map[Expression, SubExprEliminationState]): Option[String] = { + subExprs: Map[Expression, SubExprEliminationState]): Option[Seq[String]] = { val exprValsInSubExprs = subExprs.flatMap { case (_, s) => s.value :: s.isNull :: Nil } if (exprValsInSubExprs.exists(_.isInstanceOf[SimpleExprValue])) { // `SimpleExprValue`s cannot be used as an input variable for split functions, so @@ -293,7 +292,7 @@ case class HashAggregateExec( val inputVariables = args.map(_.variableName).mkString(", ") s"$doAggFuncName($inputVariables);" } - Some(splitCodes.mkString("\n").trim) + Some(splitCodes) } else { val errMsg = "Failed to split aggregate code into small functions because the parameter " + "length of at least one split function went over the JVM limit: " + @@ -308,6 +307,39 @@ case class HashAggregateExec( } } + private def generateEvalCodeForAggFuncs( + ctx: CodegenContext, + input: Seq[ExprCode], + inputAttrs: Seq[Attribute], + boundUpdateExprs: Seq[Seq[Expression]], + aggNames: Seq[String], + aggCodeBlocks: Seq[Block], + subExprs: SubExprCodes): String = { + val aggCodes = if (conf.codegenSplitAggregateFunc && + aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) { + val maybeSplitCodes = splitAggregateExpressions( + ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states) + + maybeSplitCodes.getOrElse(aggCodeBlocks.map(_.code)) + } else { + aggCodeBlocks.map(_.code) + } + + aggCodes.zip(aggregateExpressions.map(ae => (ae.mode, ae.filter))).map { + case (aggCode, (Partial | Complete, Some(condition))) => + // Note: wrap in "do { } while(false);", so the generated checks can jump out + // with "continue;" + s""" + |do { + | ${generatePredicateCode(ctx, condition, inputAttrs, input)} + | $aggCode + |} while(false); + """.stripMargin + case (aggCode, _) => + aggCode + }.mkString("\n") + } + private def doConsumeWithoutKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = { // only have DeclarativeAggregate val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate]) @@ -354,24 +386,14 @@ case class HashAggregateExec( """.stripMargin } - val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc && - aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) { - val maybeSplitCode = splitAggregateExpressions( - ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states) - - maybeSplitCode.getOrElse { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } - } else { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } - + val codeToEvalAggFuncs = generateEvalCodeForAggFuncs( + ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs) s""" |// do aggregate |// common sub-expressions |$effectiveCodes |// evaluate aggregate functions and update aggregation buffers - |$codeToEvalAggFunc + |$codeToEvalAggFuncs """.stripMargin } @@ -908,7 +930,7 @@ case class HashAggregateExec( } } - val inputAttr = aggregateBufferAttributes ++ inputAttributes + val inputAttrs = aggregateBufferAttributes ++ inputAttributes // Here we set `currentVars(0)` to `currentVars(numBufferSlots)` to null, so that when // generating code for buffer columns, we use `INPUT_ROW`(will be the buffer row), while // generating input columns, we use `currentVars`. @@ -930,7 +952,7 @@ case class HashAggregateExec( val updateRowInRegularHashMap: String = { ctx.INPUT_ROW = unsafeRowBuffer val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc => - bindReferences(updateExprsForOneFunc, inputAttr) + bindReferences(updateExprsForOneFunc, inputAttrs) } val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten) val effectiveCodes = subExprs.codes.mkString("\n") @@ -961,23 +983,13 @@ case class HashAggregateExec( """.stripMargin } - val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc && - aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) { - val maybeSplitCode = splitAggregateExpressions( - ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states) - - maybeSplitCode.getOrElse { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } - } else { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } - + val codeToEvalAggFuncs = generateEvalCodeForAggFuncs( + ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs) s""" |// common sub-expressions |$effectiveCodes |// evaluate aggregate functions and update aggregation buffers - |$codeToEvalAggFunc + |$codeToEvalAggFuncs """.stripMargin } @@ -986,7 +998,7 @@ case class HashAggregateExec( if (isVectorizedHashMapEnabled) { ctx.INPUT_ROW = fastRowBuffer val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc => - bindReferences(updateExprsForOneFunc, inputAttr) + bindReferences(updateExprsForOneFunc, inputAttrs) } val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten) val effectiveCodes = subExprs.codes.mkString("\n") @@ -1016,18 +1028,8 @@ case class HashAggregateExec( """.stripMargin } - - val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc && - aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) { - val maybeSplitCode = splitAggregateExpressions( - ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states) - - maybeSplitCode.getOrElse { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } - } else { - aggCodeBlocks.fold(EmptyBlock)(_ + _).code - } + val codeToEvalAggFuncs = generateEvalCodeForAggFuncs( + ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs) // If vectorized fast hash map is on, we first generate code to update row // in vectorized fast hash map, if the previous loop up hit vectorized fast hash map. @@ -1037,7 +1039,7 @@ case class HashAggregateExec( | // common sub-expressions | $effectiveCodes | // evaluate aggregate functions and update aggregation buffers - | $codeToEvalAggFunc + | $codeToEvalAggFuncs |} else { | $updateRowInRegularHashMap |} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index d74d0bf733c27..abd336006848b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -109,59 +109,39 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan) } } -/** Physical plan for Filter. */ -case class FilterExec(condition: Expression, child: SparkPlan) - extends UnaryExecNode with CodegenSupport with PredicateHelper { - - // Split out all the IsNotNulls from condition. - private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition { - case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet) - case _ => false - } - - // If one expression and its children are null intolerant, it is null intolerant. - private def isNullIntolerant(expr: Expression): Boolean = expr match { - case e: NullIntolerant => e.children.forall(isNullIntolerant) - case _ => false - } - - // The columns that will filtered out by `IsNotNull` could be considered as not nullable. - private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId) - - // Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate - // all the variables at the beginning to take advantage of short circuiting. - override def usedInputs: AttributeSet = AttributeSet.empty - - override def output: Seq[Attribute] = { - child.output.map { a => - if (a.nullable && notNullAttributes.contains(a.exprId)) { - a.withNullability(false) - } else { - a - } +trait GeneratePredicateHelper extends PredicateHelper { + self: CodegenSupport => + + protected def generatePredicateCode( + ctx: CodegenContext, + condition: Expression, + inputAttrs: Seq[Attribute], + inputExprCode: Seq[ExprCode]): String = { + val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition { + case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(AttributeSet(inputAttrs)) + case _ => false } - } - - override lazy val metrics = Map( - "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) - - override def inputRDDs(): Seq[RDD[InternalRow]] = { - child.asInstanceOf[CodegenSupport].inputRDDs() - } - - protected override def doProduce(ctx: CodegenContext): String = { - child.asInstanceOf[CodegenSupport].produce(ctx, this) - } - - override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { - val numOutput = metricTerm(ctx, "numOutputRows") - + val nonNullAttrExprIds = notNullPreds.flatMap(_.references).distinct.map(_.exprId) + val outputAttrs = outputWithNullability(inputAttrs, nonNullAttrExprIds) + generatePredicateCode( + ctx, inputAttrs, inputExprCode, outputAttrs, notNullPreds, otherPreds, + nonNullAttrExprIds) + } + + protected def generatePredicateCode( + ctx: CodegenContext, + inputAttrs: Seq[Attribute], + inputExprCode: Seq[ExprCode], + outputAttrs: Seq[Attribute], + notNullPreds: Seq[Expression], + otherPreds: Seq[Expression], + nonNullAttrExprIds: Seq[ExprId]): String = { /** * Generates code for `c`, using `in` for input attributes and `attrs` for nullability. */ def genPredicate(c: Expression, in: Seq[ExprCode], attrs: Seq[Attribute]): String = { val bound = BindReferences.bindReference(c, attrs) - val evaluated = evaluateRequiredVariables(child.output, in, c.references) + val evaluated = evaluateRequiredVariables(inputAttrs, in, c.references) // Generate the code for the predicate. val ev = ExpressionCanonicalizer.execute(bound).genCode(ctx) @@ -195,10 +175,10 @@ case class FilterExec(condition: Expression, child: SparkPlan) if (idx != -1 && !generatedIsNotNullChecks(idx)) { generatedIsNotNullChecks(idx) = true // Use the child's output. The nullability is what the child produced. - genPredicate(notNullPreds(idx), input, child.output) - } else if (notNullAttributes.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) { + genPredicate(notNullPreds(idx), inputExprCode, inputAttrs) + } else if (nonNullAttrExprIds.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) { extraIsNotNullAttrs += r - genPredicate(IsNotNull(r), input, child.output) + genPredicate(IsNotNull(r), inputExprCode, inputAttrs) } else { "" } @@ -208,18 +188,61 @@ case class FilterExec(condition: Expression, child: SparkPlan) // enforced them with the IsNotNull checks above. s""" |$nullChecks - |${genPredicate(c, input, output)} + |${genPredicate(c, inputExprCode, outputAttrs)} """.stripMargin.trim }.mkString("\n") val nullChecks = notNullPreds.zipWithIndex.map { case (c, idx) => if (!generatedIsNotNullChecks(idx)) { - genPredicate(c, input, child.output) + genPredicate(c, inputExprCode, inputAttrs) } else { "" } }.mkString("\n") + s""" + |$generated + |$nullChecks + """.stripMargin + } +} + +/** Physical plan for Filter. */ +case class FilterExec(condition: Expression, child: SparkPlan) + extends UnaryExecNode with CodegenSupport with GeneratePredicateHelper { + + // Split out all the IsNotNulls from condition. + private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition { + case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet) + case _ => false + } + + // The columns that will filtered out by `IsNotNull` could be considered as not nullable. + private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId) + + // Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate + // all the variables at the beginning to take advantage of short circuiting. + override def usedInputs: AttributeSet = AttributeSet.empty + + override def output: Seq[Attribute] = outputWithNullability(child.output, notNullAttributes) + + override lazy val metrics = Map( + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) + + override def inputRDDs(): Seq[RDD[InternalRow]] = { + child.asInstanceOf[CodegenSupport].inputRDDs() + } + + protected override def doProduce(ctx: CodegenContext): String = { + child.asInstanceOf[CodegenSupport].produce(ctx, this) + } + + override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { + val numOutput = metricTerm(ctx, "numOutputRows") + + val predicateCode = generatePredicateCode( + ctx, child.output, input, output, notNullPreds, otherPreds, notNullAttributes) + // Reset the isNull to false for the not-null columns, then the followed operators could // generate better code (remove dead branches). val resultVars = input.zipWithIndex.map { case (ev, i) => @@ -232,8 +255,7 @@ case class FilterExec(condition: Expression, child: SparkPlan) // Note: wrap in "do { } while(false);", so the generated checks can jump out with "continue;" s""" |do { - | $generated - | $nullChecks + | $predicateCode | $numOutput.add(1); | ${consume(ctx, resultVars)} |} while(false); diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql index e4193d845f2e2..c1ccb654ee085 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql @@ -1,4 +1,7 @@ --- Test filter clause for aggregate expression. +-- Test filter clause for aggregate expression with codegen on and off. +--CONFIG_DIM1 spark.sql.codegen.wholeStage=true +--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY +--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 886b98e538d28..a4c92382750e8 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -878,7 +878,7 @@ struct == Physical Plan == * HashAggregate (5) +- Exchange (4) - +- HashAggregate (3) + +- * HashAggregate (3) +- * ColumnarToRow (2) +- Scan parquet default.explain_temp1 (1) @@ -892,7 +892,7 @@ ReadSchema: struct (2) ColumnarToRow [codegen id : 1] Input [2]: [key#x, val#x] -(3) HashAggregate +(3) HashAggregate [codegen id : 1] Input [2]: [key#x, val#x] Keys: [] Functions [3]: [partial_count(val#x), partial_sum(cast(key#x as bigint)), partial_count(key#x) FILTER (WHERE (val#x > 1))] From 10b6466e91d2e954386c74bf6ab7d94f23dd6810 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 25 Dec 2020 09:07:48 +0900 Subject: [PATCH 0881/1009] [SPARK-33084][CORE][SQL] Add jar support ivy path ### What changes were proposed in this pull request? Support add jar with ivy path ### Why are the changes needed? Since submit app can support ivy, add jar we can also support ivy now. ### Does this PR introduce _any_ user-facing change? User can add jar with sql like ``` add jar ivy:://group:artifict:version?exclude=xxx,xxx&transitive=true add jar ivy:://group:artifict:version?exclude=xxx,xxx&transitive=false ``` core api ``` sparkContext.addJar("ivy:://group:artifict:version?exclude=xxx,xxx&transitive=true") sparkContext.addJar("ivy:://group:artifict:version?exclude=xxx,xxx&transitive=false") ``` #### Doc Update snapshot ![image](https://user-images.githubusercontent.com/46485123/101227738-de451200-36d3-11eb-813d-78a8b879da4f.png) ### How was this patch tested? Added UT Closes #29966 from AngersZhuuuu/support-add-jar-ivy. Lead-authored-by: angerszhu Co-authored-by: AngersZhuuuu Signed-off-by: Takeshi Yamamuro --- .../scala/org/apache/spark/SparkContext.scala | 45 +++--- .../org/apache/spark/deploy/SparkSubmit.scala | 8 +- .../spark/deploy/worker/DriverWrapper.scala | 16 +- .../{deploy => util}/DependencyUtils.scala | 137 +++++++++++++++++- .../org/apache/spark/SparkContextSuite.scala | 116 +++++++++++++++ .../spark/deploy/SparkSubmitSuite.scala | 2 +- .../spark/deploy/SparkSubmitUtilsSuite.scala | 14 +- .../apache/spark/util/DependencyUtils.scala | 60 ++++++++ ...ql-ref-syntax-aux-resource-mgmt-add-jar.md | 16 +- .../spark/sql/internal/SessionState.scala | 30 ++-- sql/core/src/test/resources/SPARK-33084.jar | Bin 0 -> 6322 bytes .../org/apache/spark/sql/SQLQuerySuite.scala | 54 +++++++ .../sql/hive/HiveSessionStateBuilder.scala | 9 +- .../hive/client/IsolatedClientLoader.scala | 1 + .../sql/hive/execution/HiveQuerySuite.scala | 17 +++ 15 files changed, 475 insertions(+), 50 deletions(-) rename core/src/main/scala/org/apache/spark/{deploy => util}/DependencyUtils.scala (54%) create mode 100644 core/src/test/scala/org/apache/spark/util/DependencyUtils.scala create mode 100644 sql/core/src/test/resources/SPARK-33084.jar diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 17ceb5f1887c6..aae340953c5b2 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1929,7 +1929,7 @@ class SparkContext(config: SparkConf) extends Logging { } private def addJar(path: String, addedOnSubmit: Boolean): Unit = { - def addLocalJarFile(file: File): String = { + def addLocalJarFile(file: File): Seq[String] = { try { if (!file.exists()) { throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found") @@ -1938,15 +1938,15 @@ class SparkContext(config: SparkConf) extends Logging { throw new IllegalArgumentException( s"Directory ${file.getAbsoluteFile} is not allowed for addJar") } - env.rpcEnv.fileServer.addJar(file) + Seq(env.rpcEnv.fileServer.addJar(file)) } catch { case NonFatal(e) => logError(s"Failed to add $path to Spark environment", e) - null + Nil } } - def checkRemoteJarFile(path: String): String = { + def checkRemoteJarFile(path: String): Seq[String] = { val hadoopPath = new Path(path) val scheme = hadoopPath.toUri.getScheme if (!Array("http", "https", "ftp").contains(scheme)) { @@ -1959,28 +1959,29 @@ class SparkContext(config: SparkConf) extends Logging { throw new IllegalArgumentException( s"Directory ${path} is not allowed for addJar") } - path + Seq(path) } catch { case NonFatal(e) => logError(s"Failed to add $path to Spark environment", e) - null + Nil } } else { - path + Seq(path) } } if (path == null || path.isEmpty) { logWarning("null or empty path specified as parameter to addJar") } else { - val key = if (path.contains("\\") && Utils.isWindows) { + val (keys, scheme) = if (path.contains("\\") && Utils.isWindows) { // For local paths with backslashes on Windows, URI throws an exception - addLocalJarFile(new File(path)) + (addLocalJarFile(new File(path)), "local") } else { val uri = new Path(path).toUri // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies Utils.validateURL(uri) - uri.getScheme match { + val uriScheme = uri.getScheme + val jarPaths = uriScheme match { // A JAR file which exists only on the driver node case null => // SPARK-22585 path without schema is not url encoded @@ -1988,18 +1989,28 @@ class SparkContext(config: SparkConf) extends Logging { // A JAR file which exists only on the driver node case "file" => addLocalJarFile(new File(uri.getPath)) // A JAR file which exists locally on every worker node - case "local" => "file:" + uri.getPath + case "local" => Seq("file:" + uri.getPath) + case "ivy" => + // Since `new Path(path).toUri` will lose query information, + // so here we use `URI.create(path)` + DependencyUtils.resolveMavenDependencies(URI.create(path)) + .flatMap(jar => addLocalJarFile(new File(jar))) case _ => checkRemoteJarFile(path) } + (jarPaths, uriScheme) } - if (key != null) { + if (keys.nonEmpty) { val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis - if (addedJars.putIfAbsent(key, timestamp).isEmpty) { - logInfo(s"Added JAR $path at $key with timestamp $timestamp") + val (added, existed) = keys.partition(addedJars.putIfAbsent(_, timestamp).isEmpty) + if (added.nonEmpty) { + val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI" + logInfo(s"Added $jarMessage $path at ${added.mkString(",")} with timestamp $timestamp") postEnvironmentUpdate() - } else { - logWarning(s"The jar $path has been added already. Overwriting of added jars " + - "is not supported in the current version.") + } + if (existed.nonEmpty) { + val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI" + logInfo(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." + + " Overwriting of added jar is not supported in the current version.") } } } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index bb3a20dce2da4..ad95b18ecaeb0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -304,8 +304,8 @@ private[spark] class SparkSubmit extends Logging { // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files // too for packages that include Python code val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies( - args.packagesExclusions, args.packages, args.repositories, args.ivyRepoPath, - args.ivySettingsPath) + packagesTransitive = true, args.packagesExclusions, args.packages, + args.repositories, args.ivyRepoPath, args.ivySettingsPath) if (!StringUtils.isBlank(resolvedMavenCoordinates)) { // In K8s client mode, when in the driver, add resolved jars early as we might need @@ -1360,6 +1360,7 @@ private[spark] object SparkSubmitUtils { * Resolves any dependencies that were supplied through maven coordinates * @param coordinates Comma-delimited string of maven coordinates * @param ivySettings An IvySettings containing resolvers to use + * @param transitive Whether resolving transitive dependencies, default is true * @param exclusions Exclusions to apply when resolving transitive dependencies * @return The comma-delimited path to the jars of the given maven artifacts including their * transitive dependencies @@ -1367,6 +1368,7 @@ private[spark] object SparkSubmitUtils { def resolveMavenCoordinates( coordinates: String, ivySettings: IvySettings, + transitive: Boolean, exclusions: Seq[String] = Nil, isTest: Boolean = false): String = { if (coordinates == null || coordinates.trim.isEmpty) { @@ -1396,7 +1398,7 @@ private[spark] object SparkSubmitUtils { val ivy = Ivy.newInstance(ivySettings) // Set resolve options to download transitive dependencies as well val resolveOptions = new ResolveOptions - resolveOptions.setTransitive(true) + resolveOptions.setTransitive(transitive) val retrieveOptions = new RetrieveOptions // Turn downloading and logging off for testing if (isTest) { diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala index 45ffdde58d6c3..c1288d64c53f7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala @@ -22,7 +22,7 @@ import java.io.File import org.apache.commons.lang3.StringUtils import org.apache.spark.{SecurityManager, SparkConf} -import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.{config, Logging} import org.apache.spark.rpc.RpcEnv import org.apache.spark.util._ @@ -79,17 +79,11 @@ object DriverWrapper extends Logging { val secMgr = new SecurityManager(sparkConf) val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf) - val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = - Seq( - "spark.jars.excludes", - "spark.jars.packages", - "spark.jars.repositories", - "spark.jars.ivy", - "spark.jars.ivySettings" - ).map(sys.props.get(_).orNull) + val ivyProperties = DependencyUtils.getIvyProperties() - val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions, - packages, repositories, ivyRepoPath, Option(ivySettingsPath)) + val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(true, + ivyProperties.packagesExclusions, ivyProperties.packages, ivyProperties.repositories, + ivyProperties.ivyRepoPath, Option(ivyProperties.ivySettingsPath)) val jars = { val jarsProp = sys.props.get(config.JARS.key).orNull if (!StringUtils.isBlank(resolvedMavenCoordinates)) { diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala similarity index 54% rename from core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala rename to core/src/main/scala/org/apache/spark/util/DependencyUtils.scala index 5a17a6b6e169c..9956ccedf5842 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.deploy +package org.apache.spark.util import java.io.File import java.net.URI @@ -25,12 +25,140 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{SecurityManager, SparkConf, SparkException} +import org.apache.spark.deploy.SparkSubmitUtils import org.apache.spark.internal.Logging -import org.apache.spark.util.{MutableURLClassLoader, Utils} -private[deploy] object DependencyUtils extends Logging { +case class IvyProperties( + packagesExclusions: String, + packages: String, + repositories: String, + ivyRepoPath: String, + ivySettingsPath: String) + +private[spark] object DependencyUtils extends Logging { + + def getIvyProperties(): IvyProperties = { + val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = Seq( + "spark.jars.excludes", + "spark.jars.packages", + "spark.jars.repositories", + "spark.jars.ivy", + "spark.jars.ivySettings" + ).map(sys.props.get(_).orNull) + IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) + } + + private def isInvalidQueryString(tokens: Array[String]): Boolean = { + tokens.length != 2 || StringUtils.isBlank(tokens(0)) || StringUtils.isBlank(tokens(1)) + } + + /** + * Parse URI query string's parameter value of `transitive` and `exclude`. + * Other invalid parameters will be ignored. + * + * @param uri Ivy URI need to be downloaded. + * @return Tuple value of parameter `transitive` and `exclude` value. + * + * 1. transitive: whether to download dependency jar of Ivy URI, default value is false + * and this parameter value is case-sensitive. Invalid value will be treat as false. + * Example: Input: exclude=org.mortbay.jetty:jetty&transitive=true + * Output: true + * + * 2. exclude: comma separated exclusions to apply when resolving transitive dependencies, + * consists of `group:module` pairs separated by commas. + * Example: Input: excludeorg.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http + * Output: [org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http] + */ + private def parseQueryParams(uri: URI): (Boolean, String) = { + val uriQuery = uri.getQuery + if (uriQuery == null) { + (false, "") + } else { + val mapTokens = uriQuery.split("&").map(_.split("=")) + if (mapTokens.exists(isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid query string in Ivy URI ${uri.toString}: $uriQuery") + } + val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1) + + // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is false + val transitiveParams = groupedParams.get("transitive") + if (transitiveParams.map(_.size).getOrElse(0) > 1) { + logWarning("It's best to specify `transitive` parameter in ivy URI query only once." + + " If there are multiple `transitive` parameter, we will select the last one") + } + val transitive = + transitiveParams.flatMap(_.takeRight(1).map(_._2 == "true").headOption).getOrElse(false) + + // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http) + // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar + // in a excluded list. + val exclusionList = groupedParams.get("exclude").map { params => + params.map(_._2).flatMap { excludeString => + val excludes = excludeString.split(",") + if (excludes.map(_.split(":")).exists(isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid exclude string in Ivy URI ${uri.toString}:" + + " expected 'org:module,org:module,..', found " + excludeString) + } + excludes + }.mkString(",") + }.getOrElse("") + + val validParams = Set("transitive", "exclude") + val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq + if (invalidParams.nonEmpty) { + logWarning(s"Invalid parameters `${invalidParams.sorted.mkString(",")}` found " + + s"in Ivy URI query `$uriQuery`.") + } + + (transitive, exclusionList) + } + } + + /** + * Download Ivy URI's dependency jars. + * + * @param uri Ivy URI need to be downloaded. The URI format should be: + * `ivy://group:module:version[?query]` + * Ivy URI query part format should be: + * `parameter=value¶meter=value...` + * Note that currently Ivy URI query part support two parameters: + * 1. transitive: whether to download dependent jars related to your Ivy URI. + * transitive=false or `transitive=true`, if not set, the default value is false. + * 2. exclude: exclusion list when download Ivy URI jar and dependency jars. + * The `exclude` parameter content is a ',' separated `group:module` pair string : + * `exclude=group:module,group:module...` + * @return Comma separated string list of jars downloaded. + */ + def resolveMavenDependencies(uri: URI): Seq[String] = { + val ivyProperties = DependencyUtils.getIvyProperties() + val authority = uri.getAuthority + if (authority == null) { + throw new IllegalArgumentException( + s"Invalid Ivy URI authority in uri ${uri.toString}:" + + " Expected 'org:module:version', found null.") + } + if (authority.split(":").length != 3) { + throw new IllegalArgumentException( + s"Invalid Ivy URI authority in uri ${uri.toString}:" + + s" Expected 'org:module:version', found $authority.") + } + + val (transitive, exclusionList) = parseQueryParams(uri) + + resolveMavenDependencies( + transitive, + exclusionList, + authority, + ivyProperties.repositories, + ivyProperties.ivyRepoPath, + Option(ivyProperties.ivySettingsPath) + ).split(",") + } def resolveMavenDependencies( + packagesTransitive: Boolean, packagesExclusions: String, packages: String, repositories: String, @@ -51,7 +179,8 @@ private[deploy] object DependencyUtils extends Logging { SparkSubmitUtils.buildIvySettings(Option(repositories), Option(ivyRepoPath)) } - SparkSubmitUtils.resolveMavenCoordinates(packages, ivySettings, exclusions = exclusions) + SparkSubmitUtils.resolveMavenCoordinates(packages, ivySettings, + transitive = packagesTransitive, exclusions = exclusions) } def resolveAndDownloadJars( diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 55bfa70f21fc2..770ffeef4106f 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -1034,6 +1034,122 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu .set(EXECUTOR_ALLOW_SPARK_CONTEXT, true)).stop() } } + + test("SPARK-33084: Add jar support Ivy URI -- default transitive = false") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") + assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- invalid transitive use default false") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=foo") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(!sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- transitive=true will download dependency jars") { + val logAppender = new LogAppender("transitive=true will download dependency jars") + withLogAppender(logAppender) { + sc = new SparkContext( + new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") + val dependencyJars = Array( + "org.apache.hive_hive-storage-api-2.7.0.jar", + "org.slf4j_slf4j-api-1.7.10.jar", + "commons-lang_commons-lang-2.6.jar") + + dependencyJars.foreach(jar => assert(sc.listJars().exists(_.contains(jar)))) + + assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains( + "Added dependency jars of Ivy URI" + + " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1) + + // test dependency jars exist + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") + assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains( + "The dependency jars of Ivy URI" + + " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1) + val existMsg = logAppender.loggingEvents.filter(_.getRenderedMessage.contains( + "The dependency jars of Ivy URI" + + " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) + .head.getRenderedMessage + dependencyJars.foreach(jar => assert(existMsg.contains(jar))) + } + } + + test("SPARK-33084: Add jar support Ivy URI -- test exclude param when transitive=true") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" + + "?exclude=commons-lang:commons-lang&transitive=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- test different version") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0") + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.6.0") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.6.0.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- test invalid param") { + val logAppender = new LogAppender("test log when have invalid parameter") + withLogAppender(logAppender) { + sc = new SparkContext( + new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" + + "invalidParam1=foo&invalidParam2=boo") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains( + "Invalid parameters `invalidParam1,invalidParam2` found in Ivy URI query" + + " `invalidParam1=foo&invalidParam2=boo`."))) + } + } + + test("SPARK-33084: Add jar support Ivy URI -- test multiple transitive params") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + // transitive=invalidValue will win and treated as false + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" + + "transitive=true&transitive=invalidValue") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + + // transitive=true will win + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" + + "transitive=false&transitive=invalidValue&transitive=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- test param key case sensitive") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?TRANSITIVE=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } + + test("SPARK-33084: Add jar support Ivy URI -- test transitive value case sensitive") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=TRUE") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + + sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) + assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) + } } object SparkContextSuite { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index dcd35f3f6b93f..c64f1b5814c20 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -47,7 +47,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ import org.apache.spark.internal.config.UI._ import org.apache.spark.launcher.SparkLauncher -import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils} +import org.apache.spark.util.{CommandLineUtils, DependencyUtils, ResetSystemProperties, Utils} trait TestPrematureExit { suite: SparkFunSuite => diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 2a37f75d86a41..eaa06ce2aa057 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -135,6 +135,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val jarPath = SparkSubmitUtils.resolveMavenCoordinates( main.toString, SparkSubmitUtils.buildIvySettings(Option(repo), Some(tempIvyPath)), + transitive = true, isTest = true) assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path") } @@ -148,6 +149,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val jarPath = SparkSubmitUtils.resolveMavenCoordinates( main.toString, SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + transitive = true, isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -159,6 +161,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val jarPath = SparkSubmitUtils.resolveMavenCoordinates( main.toString, SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + transitive = true, isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -171,6 +174,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val jarPath = SparkSubmitUtils.resolveMavenCoordinates( main.toString, SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + transitive = true, isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path") @@ -183,6 +187,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { SparkSubmitUtils.resolveMavenCoordinates( "a:b:c", SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + transitive = true, isTest = true) } } @@ -195,6 +200,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val path = SparkSubmitUtils.resolveMavenCoordinates( coordinates, SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + transitive = true, isTest = true) assert(path === "", "should return empty path") val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.12", "1.2.0") @@ -202,6 +208,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val files = SparkSubmitUtils.resolveMavenCoordinates( coordinates + "," + main.toString, SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)), + transitive = true, isTest = true) assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact") } @@ -214,7 +221,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val files = SparkSubmitUtils.resolveMavenCoordinates( main.toString, SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)), - Seq("my.great.dep:mydep"), + exclusions = Seq("my.great.dep:mydep"), + transitive = true, isTest = true) assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact") assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact") @@ -250,7 +258,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { testUtilSettings.setDefaultIvyUserDir(new File(tempIvyPath)) IvyTestUtils.withRepository(main, Some(dep), Some(dummyIvyLocal), useIvyLayout = true, ivySettings = testUtilSettings) { repo => - val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings, isTest = true) + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings, + transitive = true, isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -265,6 +274,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val jarPath = SparkSubmitUtils.resolveMavenCoordinates( main.toString, ivySettings, + transitive = true, isTest = true) val r = """.*org.apache.spark-spark-submit-parent-.*""".r assert(!ivySettings.getDefaultCache.listFiles.map(_.getName) diff --git a/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala new file mode 100644 index 0000000000000..d181d4d8ce669 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.net.URI + +import org.apache.spark.SparkFunSuite + +class DependencyUtilsSuite extends SparkFunSuite { + + test("SPARK-33084: Add jar support Ivy URI -- test invalid ivy uri") { + val e1 = intercept[IllegalArgumentException] { + DependencyUtils.resolveMavenDependencies(URI.create("ivy://")) + }.getMessage + assert(e1.contains("Expected authority at index 6: ivy://")) + + val e2 = intercept[IllegalArgumentException] { + DependencyUtils.resolveMavenDependencies(URI.create("ivy://org.apache.hive:hive-contrib")) + }.getMessage + assert(e2.contains("Invalid Ivy URI authority in uri ivy://org.apache.hive:hive-contrib:" + + " Expected 'org:module:version', found org.apache.hive:hive-contrib.")) + + val e3 = intercept[IllegalArgumentException] { + DependencyUtils.resolveMavenDependencies( + URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?foo=")) + }.getMessage + assert(e3.contains("Invalid query string in Ivy URI" + + " ivy://org.apache.hive:hive-contrib:2.3.7?foo=:")) + + val e4 = intercept[IllegalArgumentException] { + DependencyUtils.resolveMavenDependencies( + URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo")) + }.getMessage + assert(e4.contains("Invalid query string in Ivy URI" + + " ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo: bar=&baz=foo")) + + val e5 = intercept[IllegalArgumentException] { + DependencyUtils.resolveMavenDependencies( + URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho")) + }.getMessage + assert(e5.contains("Invalid exclude string in Ivy URI" + + " ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho:" + + " expected 'org:module,org:module,..', found org.pentaho")) + } +} diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md index 4694bff99daf5..6d31125fd612d 100644 --- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md +++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md @@ -33,8 +33,18 @@ ADD JAR file_name * **file_name** - The name of the JAR file to be added. It could be either on a local file system or a distributed file system. + The name of the JAR file to be added. It could be either on a local file system or a distributed file system or an Ivy URI. + Apache Ivy is a popular dependency manager focusing on flexibility and simplicity. Now we support two parameter in URI query string: + * transitive: whether to download dependent jars related to your ivy URL. It is case-sensitive and only take last one if multiple transitive parameters are specified. + * exclude: exclusion list during downloading Ivy URI jar and dependent jars. + + User can write Ivy URI such as: + + ivy://group:module:version + ivy://group:module:version?transitive=[true|false] + ivy://group:module:version?transitive=[true|false]&exclude=group:module,group:module + ### Examples ```sql @@ -42,6 +52,10 @@ ADD JAR /tmp/test.jar; ADD JAR "/path/to/some.jar"; ADD JAR '/some/other.jar'; ADD JAR "/path with space/abc.jar"; +ADD JAR "ivy://group:module:version"; +ADD JAR "ivy://group:module:version?transitive=false" +ADD JAR "ivy://group:module:version?transitive=true" +ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true" ``` ### Related Statements diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 48d8c3d325347..60ca06dbe0d52 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.internal import java.io.File +import java.net.URI import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -34,6 +35,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution._ import org.apache.spark.sql.streaming.StreamingQueryManager import org.apache.spark.sql.util.ExecutionListenerManager +import org.apache.spark.util.DependencyUtils /** * A class that holds all session-specific state in a given [[SparkSession]]. @@ -159,6 +161,13 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade } } + def resolveJars(path: URI): Seq[String] = { + path.getScheme match { + case "ivy" => DependencyUtils.resolveMavenDependencies(path) + case _ => path.toString :: Nil + } + } + /** * Add a jar path to [[SparkContext]] and the classloader. * @@ -167,16 +176,19 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade * [[SessionState]]. */ def addJar(path: String): Unit = { - session.sparkContext.addJar(path) - val uri = new Path(path).toUri - val jarURL = if (uri.getScheme == null) { - // `path` is a local file path without a URL scheme - new File(path).toURI.toURL - } else { - // `path` is a URL with a scheme - uri.toURL + val uri = URI.create(path) + resolveJars(uri).foreach { p => + session.sparkContext.addJar(p) + val uri = new Path(p).toUri + val jarURL = if (uri.getScheme == null) { + // `path` is a local file path without a URL scheme + new File(p).toURI.toURL + } else { + // `path` is a URL with a scheme + uri.toURL + } + session.sharedState.jarClassLoader.addURL(jarURL) } - session.sharedState.jarClassLoader.addURL(jarURL) Thread.currentThread().setContextClassLoader(session.sharedState.jarClassLoader) } } diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar new file mode 100644 index 0000000000000000000000000000000000000000..1dc5e9303b707f9b788c28fc23a0f0aa5b2c323b GIT binary patch literal 6322 zcmbVR2|Sc*`yN~NeQB)8R>m4*36(u&7z~Y_WM?ppbwUZ#A~{(@)+GBf_AJ?Dm!*=j zi^@T^VURzk)H$cl`M&Re&+lF4H}`ct_x(Qa``qt+jXsowlnQWY&Qb9Of1G^VQ63z1 zHH=imv|*YO`rq6r08R&P5w-k4&x4p;cCU(iCu zKz%*^WpK%1>B%`SQApo&QK*}(X zM!=b&aKpytdt|pe*>kBHZrkSuP_<*SdC@9KaPN9JPG!?%fxVT}jAPa7vlkKIC`15& z;`U!NAw3Au!^`n|51S_(elGka|t{Vtysh`*z=N@0Xu)k4t4bTYQO;t8=H3+3923Ocu-&_+Lz5yDcWdw-yeWtrBE)d}tidKZpVjd$gLzDk4}+mE zVh@pxZq(>dmq^nAZ?@M3o>b!!A;ZHu29PyU?*;j(TM^ndYOB|KrIOGq$bAbNVM!h; zhU0B@AI}#v4(i671?eiOk*s_1YA75NV`j{DJ#A1_;Lwk_AjGiWoDA_Le296T@bE%u z$#g$LXz%mz8x-E%?Lv0p)}x#7{oNO@DCP9V`=4*q%1Eo$StVt^B+=#^k;{{MyRs`p zc$fGVZP2Z~tWe*qUvmquZo(&9ihNhdJrE!GEJ|#>N2G(hsp^Fz+8)|dTlPLpoI0PR zXZ8!SsTmiPs#hu|DAOPvy|^}bF7j0F><~jQ?%aFK3D?Mkw(+HRogoPq<02OqTlsF6 z-TMMRt>%DP(2`4r#10xRH5hrJuso|SgOkX!DX21{;p5iT2}0yn3HX3E`kA&fT2!Hn8SaGo?+v?UaYCu^8xUYm0aX?>bfdQTcB@fj2qtjU}6=q^)9kRT+JkklwQwa9p_ z3w_BdZ=qa0S8AWZ?AZOgNw2TP_CK%WRAunJLC;5;DpTB_X)#i)SLbxL6ftDfpwC); zsq{`r1k>fRv=>suvQyVR9|%m+Wu}VCJR@~@6IF(gSG~UdTj|U^&n3*jQVsB(THb|e1OiomY;AVP(}2e%5v!%V_`h|𝔄e>Y*Uq;UTJf%* znp31=O}3}5OevvMBwccDxtNmHxU|pQP^&99E6rb0?)ibqACtyLa-wur-TqQOA|}U) zR^pT2>r-2A$jDb{8yHws+}h12F!NYqmJP#%^*tts#bDxv1|cvAsG6NPVd9$gss`0% zEW_Wdh9_;5CqpdGs_j&vXRx+83L(UGb{3jV;QSNzYn)y?@7h|%=Pd-7g1UjFpY}@{r>6?i z_a9!@wF&G;RnpNc0m8&HJ}pmTAv)1S)#u&K+B!r+5ODF_CFV3P z;jBmsO*7MnsXeA|!4F^m;hnE3*u(f2@}NSW63AOH!vKfuY#bb!WPF;jS`0>DU8xbDd1yB%MsuaG$z5 z+r?wdNXMAY%tU(Ya_y-dr=d^y)ie(~+2MF9>+VGQe&g+8BX2>*28ZTns=1u|9qq-eqRglJv?4?7G*FyPwck?3(VaygV?zYh9m0J8^1KatouJ+3N?kmp7#h&f5L;+ySV>Yx++ba$_ zKg*WNx4V`JiW&`~Ukz-o0`Uat^7Rei>ED-_ztBxNdDbW2zsmbuOM0Bb zOT)_x4JK!3yH~`eTgz(BV$U6)1Cz1dxdX%iF*Ny8SM8hgnGMC0UF{hf$77Be#GdOY z_Adhqtq*at!!0oExReYBD&fVZ^JGC_MHt@RfEApmq+92)1#g~DOKsYZg{u?U;^T4vdx>rIu>i_P1DixHn{ z*O4)$IvqPyLMucq;~;#s;S<~Tjk{E#AUZBJxyXL4`8jUhh_YYw|gM1{=O@Ss#Gi5W9Hr+^9MH7 zD`_%Bf7*FUAk>gO3{bF=TG(>@LUi{4hHUrOnT?nd9vnV6{+ z7(zQxN6@i)sKNPOj18ZnFzk`u=N$4h<+lbhOJtwf(%iH3RW2|$wF6`3M6Z^ssY%$DE}MBW5Ew`BRigsQemnJRKCir)BwERs2qiHM=H45GzO`J%>Z zl5!%nr6I?)IqY0cTns9>!*R8M?_|-NR+(DJ%X-15yHEBAN`4MLAJW7} zjr!0}ZEposzrN_WkZD<8sEDy#XDMA0yNa<7fOv!nxr}b6P^C$>W>txrT^Y>eOw%)? zmesgabU94m)B>KyC;YWiBpm!Pd!HulMM2v5wDqdsmHzt}p;PKuW7SZ_S(ZomMg@ZP zbmQK7h7XrzM<;Eaa9Yhg5L9TZKGs-6Yq1rsW4rY*^{N2whJ?TqApN9e?sc=1H>#Wj zVawA(yN}%_=WdD=p2)uCcgz3w&PUm|xk5`(Ob)`gR7+IRGyv}QfM3C~~@F1Sfm z;rkTJ)Ei;`2z@6QVV4x^bf1Y9n!!Zd-3y7=YQq_+zT*NVQmoy7Nld9%aeX6qZ!Nxu z4*Zyp`~G9+YZ3b}jk{fM+{JKlg&OflFrmC`nnWwH@3M;Yzj^*fJ#xm?c_iFp!7}}sFNQP=X+LU+J8Ngr)^eX!qMtF* zM~IJ4BzDR?E;2{xaotF~QdW z4VIRD-j|8^o3n1v*l-JWYMHG>^XE`Fl5@Qr>nKq!S4zRp@A-?B&8+9Vf>Nd0V4}i@ zM-7vCiHIDuY9=oQ+<&Z~n_>{JzpJHOZ)>7|dX?h-VE5?n{sb8gx*f+Slq)DTb9W~C z9s}qyOJ;<)ZrsEfmfWQ-Q)1Pg67Mx=f%FyUc#hvF;J{b6VbdXBh7#3Ys}o-s1`sD$ zFtxSGnv$GTY<%7YPZohcG;}u0wYTq;4JCs)#-ki5>P3d;W5&xnV*%Z{Bp{WcUhaxj zVl+lTvQ7U=BA~?^r<~JmF4q=Y-bI@O0 zN4Me1Q$9P2&$L#_n`n->t*UY_Skq@r8sQH$CZ6h7Ul4XzIz=`( zb6;}aJmqP{awbc>%S%S*6$M!PTx!{E7pn2S)Z#gxX^rF_;)NZuHfDIYZ4j}z=SApcFAem#9W|fNJ^&| zeFP~fkxCSvC#F%9Kbua=R!3W?q8{LJbZ!z6lQLJx({ z8JEwPMfzhd)$4_dK}5B64Ycx%R&+(7y)$q+Vckn8ZC!+<6kHNqB~?)_ z1vW&{(HTfeNlHzEzR7zs2kIU~qDw+-MW?h^0`dwfs-@GXH6kLeCEE2KA`0r%f$E48 zb&-+F9}D=z!O3az0ClL@5fQVH{ImXekjsHF0Df0FJpM_IJc9jNk34ifw5*U*2m9}^ zKdF;{9{5*s;UDln2JSjI{VEtOV&spR`E_&igS+xT zp+20Mzg({G`5wmkgZo>n^Mm~7QhHc-zZwHzf8YP$>-QVys0trt{WF;N2zzkP?C(GR z8T^Z?{LgHMb@{6uvd#P#Y=5oNN6f#j(?{G{Az}2G c-v{`y80kaFjvaE*9ej8WMEu~x{~7@Q2O^~=-v9sr literal 0 HcmV?d00001 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b7cec55245564..0ba58e1634f06 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -22,6 +22,8 @@ import java.net.{MalformedURLException, URL} import java.sql.{Date, Timestamp} import java.util.concurrent.atomic.AtomicBoolean +import org.apache.commons.io.FileUtils + import org.apache.spark.{AccumulatorSuite, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.expressions.GenericRow @@ -3719,6 +3721,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-33084: Add jar support Ivy URI in SQL") { + val sc = spark.sparkContext + // default transitive=false, only download specified jar + sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7") + assert(sc.listJars() + .exists(_.contains("org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar"))) + + // test download ivy URL jar return multiple jars + sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0?transitive=true") + assert(sc.listJars().exists(_.contains("scalajs-library_2.12"))) + assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12"))) + + sql("ADD JAR ivy://org.apache.hive:hive-contrib:2.3.7" + + "?exclude=org.pentaho:pentaho-aggdesigner-algorithm&transitive=true") + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-contrib-2.3.7.jar"))) + assert(sc.listJars().exists(_.contains("org.apache.hive_hive-exec-2.3.7.jar"))) + assert(!sc.listJars().exists(_.contains("org.pentaho.pentaho_aggdesigner-algorithm"))) + } + test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") { withTempView("df") { Seq("m@ca").toDF("s").createOrReplaceTempView("df") @@ -3771,6 +3792,39 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } }) } + + test("SPARK-33084: Add jar support Ivy URI in SQL -- jar contains udf class") { + val sumFuncClass = "org.apache.spark.examples.sql.Spark33084" + val functionName = "test_udf" + withTempDir { dir => + System.setProperty("ivy.home", dir.getAbsolutePath) + val sourceJar = new File(Thread.currentThread().getContextClassLoader + .getResource("SPARK-33084.jar").getFile) + val targetCacheJarDir = new File(dir.getAbsolutePath + + "/local/org.apache.spark/SPARK-33084/1.0/jars/") + targetCacheJarDir.mkdir() + // copy jar to local cache + FileUtils.copyFileToDirectory(sourceJar, targetCacheJarDir) + withTempView("v1") { + withUserDefinedFunction( + s"default.$functionName" -> false, + functionName -> true) { + // create temporary function without class + val e = intercept[AnalysisException] { + sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'") + }.getMessage + assert(e.contains("Can not load class 'org.apache.spark.examples.sql.Spark33084")) + sql("ADD JAR ivy://org.apache.spark:SPARK-33084:1.0") + sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'") + // create a view using a function in 'default' database + sql(s"CREATE TEMPORARY VIEW v1 AS SELECT $functionName(col1) FROM VALUES (1), (2), (3)") + // view v1 should still using function defined in `default` database + checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2.0))) + } + } + System.clearProperty("ivy.home") + } + } } case class Foo(bar: Option[String]) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 5963a71f55035..654f9f62ebdd3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive +import java.net.URI + import org.apache.spark.annotation.Unstable import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog} @@ -127,7 +129,10 @@ class HiveSessionResourceLoader( extends SessionResourceLoader(session) { private lazy val client = clientBuilder() override def addJar(path: String): Unit = { - client.addJar(path) - super.addJar(path) + val uri = URI.create(path) + resolveJars(uri).foreach { p => + client.addJar(p) + super.addJar(p) + } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index c0758dcdfc879..97e685efd27de 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -124,6 +124,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.buildIvySettings( Some(remoteRepos), ivyPath), + transitive = true, exclusions = version.exclusions) } val allFiles = classpath.split(",").map(new File(_)).toSet diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 1cabf6033e8d8..21cc6af398eec 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -1219,6 +1219,23 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd } } } + + test("SPARK-33084: Add jar support Ivy URI in SQL") { + val testData = TestHive.getHiveFile("data/files/sample.json").toURI + withTable("t") { + sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7") + sql( + """CREATE TABLE t(a string, b string) + |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin) + sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t""") + sql("SELECT * FROM src JOIN t on src.key = t.a") + assert(sql("LIST JARS").filter(_.getString(0).contains( + "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0) + assert(sql("LIST JAR"). + filter(_.getString(0).contains( + "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0) + } + } } // for SPARK-2180 test From 2553d53dc85fdf1127446941e2bc749e721c1b57 Mon Sep 17 00:00:00 2001 From: kozakana Date: Sat, 26 Dec 2020 16:30:50 +0900 Subject: [PATCH 0882/1009] [SPARK-33897][SQL] Can't set option 'cross' in join method ### What changes were proposed in this pull request? [The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti." However, I get the following error when I set the cross option. ``` scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b"))) df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string] scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C"))) df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string] scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show() java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross at scala.Predef$.require(Predef.scala:281) at org.apache.spark.sql.catalyst.plans.UsingJoin.(joinTypes.scala:106) at org.apache.spark.sql.Dataset.join(Dataset.scala:1025) ... 53 elided ``` ### Why are the changes needed? The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException. ### Does this PR introduce _any_ user-facing change? Accepting this PR fix will behave the same as the documentation. ### How was this patch tested? There is already a test for [JoinTypes](https://github.com/apache/spark/blob/1b9fd67904671ea08526bfb7a97d694815d47665/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala), but I can't find a test for the join option itself. Closes #30803 from kozakana/allow_cross_option. Authored-by: kozakana Signed-off-by: HyukjinKwon --- .../apache/spark/sql/catalyst/plans/joinTypes.scala | 2 +- .../org/apache/spark/sql/DataFrameJoinSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala index feea1d2177ef0..da3cfb4c9de07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala @@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType { } case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType { - require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), + require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe), "Unsupported using join type " + tpe) override def sql: String = "USING " + tpe.sql } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index c317f562c65dc..1513c2e90e27c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest df2.crossJoin(df1), Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") :: Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil) + + checkAnswer( + df1.join(df2, Nil, "cross"), + Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") :: + Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil) + + checkAnswer( + df2.join(df1, Nil, "cross"), + Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") :: + Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil) } test("broadcast join hint using broadcast function") { From 37ae0a608670c660ba4c92b9ebb9cb9fb2bd67e6 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sat, 26 Dec 2020 17:40:19 -0600 Subject: [PATCH 0883/1009] [SPARK-33560][TEST-MAVEN][BUILD] Add "unused-import" check to Maven compilation process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Similar to SPARK-33441, this pr add `unused-import` check to Maven compilation process. After this pr `unused-import` will trigger Maven compilation error. For Scala 2.13 profile, this pr also left TODO(SPARK-33499) similar to SPARK-33441 because `scala.language.higherKinds` no longer needs to be imported explicitly since Scala 2.13.1 ### Why are the changes needed? Let Maven build also check for unused imports as compilation error. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass the Jenkins or GitHub Action - Local manual test:add an unused import intentionally to trigger maven compilation error. Closes #30784 from LuciferYang/SPARK-33560. Authored-by: yangjie01 Signed-off-by: Sean Owen --- pom.xml | 43 +++++++++++++++++++ .../sources/StreamingDataSourceV2Suite.scala | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4781f981a5949..609c9fc0ab0c3 100644 --- a/pom.xml +++ b/pom.xml @@ -2508,6 +2508,9 @@ -feature -explaintypes -target:jvm-1.8 + -Xfatal-warnings + -Ywarn-unused:imports + -P:silencer:globalFilters=.*deprecated.* -Xms1024m @@ -2521,6 +2524,13 @@ ${java.version} -Xlint:all,-serial,-path,-try + + + com.github.ghik + silencer-plugin_${scala.version} + 1.6.0 + + @@ -3243,6 +3253,39 @@ + + + + + net.alchim31.maven + scala-maven-plugin + + + -unchecked + -deprecation + -feature + -explaintypes + -target:jvm-1.8 + -Wconf:cat=deprecation:wv,any:e + + -Wconf:cat=scaladoc:wv + -Wconf:cat=lint-multiarg-infix:wv + -Wconf:cat=other-nullary-override:wv + -Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv + -Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv + -Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.<local OutputCommitCoordinatorSuite>.futureAction:wv + + + + + + + + + + -Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s + -Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s + -Wconf:msg=Auto-application to \`\(\)\` is deprecated:s + -Wconf:msg=method with a single empty parameter list overrides method without any parameter list:s + -Wconf:msg=method without a parameter list overrides a method with a single empty one:s From e0d2ffec3109d973b106adeab5de5ce0c91a4a68 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 29 Dec 2020 13:29:48 +0000 Subject: [PATCH 0905/1009] [SPARK-33859][SQL] Support V2 ALTER TABLE .. RENAME PARTITION ### What changes were proposed in this pull request? 1. Add `renamePartition()` to the `SupportsPartitionManagement` 2. Implement `renamePartition()` in `InMemoryPartitionTable` 3. Add v2 execution node `AlterTableRenamePartitionExec` 4. Resolve the logical node `AlterTableRenamePartition` to `AlterTableRenamePartitionExec` for v2 tables that support `SupportsPartitionManagement` 5. Move v1 tests to the base suite `org.apache.spark.sql.execution.command.AlterTableRenamePartitionSuiteBase` to run them for v2 table catalogs. ### Why are the changes needed? To have feature parity with Datasource V1. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? By running the unified tests: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite" ``` Closes #30935 from MaxGekk/alter-table-rename-partition-v2. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../catalog/SupportsPartitionManagement.java | 17 +++ .../analysis/ResolvePartitionSpec.scala | 9 +- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../catalyst/plans/logical/v2Commands.scala | 6 +- .../connector/InMemoryPartitionTable.scala | 13 ++ .../spark/sql/connector/InMemoryTable.scala | 22 +++ .../SupportsPartitionManagementSuite.scala | 19 +++ .../analysis/ResolveSessionCatalog.scala | 9 +- .../v2/AlterTableRenamePartitionExec.scala | 39 +++++ .../datasources/v2/DataSourceV2Strategy.scala | 9 +- .../AlterTableAddPartitionSuiteBase.scala | 3 - ...AlterTableRenamePartitionParserSuite.scala | 4 +- .../AlterTableRenamePartitionSuiteBase.scala | 130 ++++++++++++++++- .../command/DDLCommandTestUtils.scala | 3 + .../v1/AlterTableAddPartitionSuite.scala | 16 --- .../v1/AlterTableRenamePartitionSuite.scala | 136 +----------------- .../command/v1/CommandSuiteBase.scala | 17 +++ .../v2/AlterTableAddPartitionSuite.scala | 27 ---- .../v2/AlterTableRenamePartitionSuite.scala | 22 +-- .../command/v2/CommandSuiteBase.scala | 27 +++- .../execution/command/CommandSuiteBase.scala | 17 +++ 21 files changed, 343 insertions(+), 204 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index 409ab3f5f9335..a7008293a3e19 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -139,4 +139,21 @@ Map loadPartitionMetadata(InternalRow ident) * @return an array of Identifiers for the partitions */ InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident); + + /** + * Rename an existing partition of the table. + * + * @param from an existing partition identifier to rename + * @param to new partition identifier + * @return true if renaming completes successfully otherwise false + * @throws UnsupportedOperationException If partition renaming is not supported + * @throws PartitionAlreadyExistsException If the `to` partition exists already + * @throws NoSuchPartitionException If the `from` partition does not exist + */ + default boolean renamePartition(InternalRow from, InternalRow to) + throws UnsupportedOperationException, + PartitionAlreadyExistsException, + NoSuchPartitionException { + throw new UnsupportedOperationException("Partition renaming is not supported"); + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 2c2bea6f89d49..84be3f294a6ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -52,13 +52,14 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ AlterTableRenamePartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), from, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) => val partitionSchema = table.partitionSchema() - r.copy(from = resolvePartitionSpecs( + val Seq(resolvedFrom, resolvedTo) = resolvePartitionSpecs( table.name, - Seq(from), + Seq(from, to), partitionSchema, - requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)).head) + requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)) + r.copy(from = resolvedFrom, to = resolvedTo) case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) => r.copy(pattern = resolvePartitionSpecs( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c5707812e44bb..771bb5a1708b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3845,7 +3845,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx.multipartIdentifier, "ALTER TABLE ... RENAME TO PARTITION"), UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)), - visitNonOptionalPartitionSpec(ctx.to)) + UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.to))) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 02fb3a86db5d5..c51291d370c80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -694,9 +694,11 @@ case class AlterTableDropPartition( case class AlterTableRenamePartition( child: LogicalPlan, from: PartitionSpec, - to: TablePartitionSpec) extends Command { + to: PartitionSpec) extends Command { override lazy val resolved: Boolean = - childrenResolved && from.isInstanceOf[ResolvedPartitionSpec] + childrenResolved && + from.isInstanceOf[ResolvedPartitionSpec] && + to.isInstanceOf[ResolvedPartitionSpec] override def children: Seq[LogicalPlan] = child :: Nil } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index 83183a2ef6e2b..a3d610af2c06d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -107,4 +107,17 @@ class InMemoryPartitionTable( currentRow == ident }.toArray } + + override def renamePartition(from: InternalRow, to: InternalRow): Boolean = { + if (memoryTablePartitions.containsKey(to)) { + throw new PartitionAlreadyExistsException(name, to, partitionSchema) + } else { + val partValue = memoryTablePartitions.remove(from) + if (partValue == null) { + throw new NoSuchPartitionException(name, from, partitionSchema) + } + memoryTablePartitions.put(to, partValue) == null && + renamePartitionKey(partitionSchema, from.toSeq(schema), to.toSeq(schema)) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index c4c5835d9d1f5..201d67a815bea 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -165,6 +165,28 @@ class InMemoryTable( protected def addPartitionKey(key: Seq[Any]): Unit = {} + protected def renamePartitionKey( + partitionSchema: StructType, + from: Seq[Any], + to: Seq[Any]): Boolean = { + val rows = dataMap.remove(from).getOrElse(new BufferedRows(from.mkString("/"))) + val newRows = new BufferedRows(to.mkString("/")) + rows.rows.foreach { r => + val newRow = new GenericInternalRow(r.numFields) + for (i <- 0 until r.numFields) newRow.update(i, r.get(i, schema(i).dataType)) + for (i <- 0 until partitionSchema.length) { + val j = schema.fieldIndex(partitionSchema(i).name) + newRow.update(j, to(i)) + } + newRows.withRow(newRow) + } + dataMap.put(to, newRows).foreach { _ => + throw new IllegalStateException( + s"The ${to.mkString("[", ", ", "]")} partition exists already") + } + true + } + def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized { data.foreach(_.rows.foreach { row => val key = getKey(row) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala index 31494c7c2dd50..99441c81d9add 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala @@ -23,6 +23,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException} import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructType} @@ -214,4 +215,22 @@ class SupportsPartitionManagementSuite extends SparkFunSuite { }.getMessage assert(errMsg.contains("The identifier might not refer to one partition")) } + + test("renamePartition") { + val partTable = createMultiPartTable() + + val errMsg1 = intercept[PartitionAlreadyExistsException] { + partTable.renamePartition(InternalRow(0, "abc"), InternalRow(1, "abc")) + }.getMessage + assert(errMsg1.contains("Partition already exists")) + + val newPart = InternalRow(2, "xyz") + val errMsg2 = intercept[NoSuchPartitionException] { + partTable.renamePartition(newPart, InternalRow(3, "abc")) + }.getMessage + assert(errMsg2.contains("Partition not found")) + + assert(partTable.renamePartition(InternalRow(0, "abc"), newPart)) + assert(partTable.partitionExists(newPart)) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 925c7741eefe3..dec1300d66f35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -444,11 +444,10 @@ class ResolveSessionCatalog( ifNotExists) case AlterTableRenamePartition( - ResolvedV1TableIdentifier(ident), UnresolvedPartitionSpec(from, _), to) => - AlterTableRenamePartitionCommand( - ident.asTableIdentifier, - from, - to) + ResolvedV1TableIdentifier(ident), + UnresolvedPartitionSpec(from, _), + UnresolvedPartitionSpec(to, _)) => + AlterTableRenamePartitionCommand(ident.asTableIdentifier, from, to) case AlterTableDropPartition( ResolvedV1TableIdentifier(ident), specs, ifExists, purge) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala new file mode 100644 index 0000000000000..38b83e3ad74e7 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.ResolvedPartitionSpec +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement + +/** + * Physical plan node for renaming a table partition. + */ +case class AlterTableRenamePartitionExec( + table: SupportsPartitionManagement, + from: ResolvedPartitionSpec, + to: ResolvedPartitionSpec) extends V2CommandExec { + + override def output: Seq[Attribute] = Seq.empty + + override protected def run(): Seq[InternalRow] = { + table.renamePartition(from.ident, to.ident) + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 4667bb7cca998..2674aaf4f2e88 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -352,9 +352,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat AlterTableDropPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil - case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) => - throw new AnalysisException( - "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.") + case AlterTableRenamePartition( + ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) => + AlterTableRenamePartitionExec( + table, + Seq(from).asResolvedPartitionSpecs.head, + Seq(to).asResolvedPartitionSpecs.head) :: Nil case AlterTableRecoverPartitions(_: ResolvedTable) => throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala index aa0668ccaaf53..2705adb8b3c67 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, QueryTest} import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException -import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.internal.SQLConf /** @@ -39,8 +38,6 @@ import org.apache.spark.sql.internal.SQLConf trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils { override val command = "ALTER TABLE .. ADD PARTITION" - protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit - test("one partition") { withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala index db6506c85bcec..c9a6732796729 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala @@ -32,7 +32,7 @@ class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSpark val expected = AlterTableRenamePartition( UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"), UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")), - Map("ds" -> "2018-06-10")) + UnresolvedPartitionSpec(Map("ds" -> "2018-06-10"))) comparePlans(parsed, expected) } @@ -45,7 +45,7 @@ class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSpark val expected = AlterTableRenamePartition( UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"), UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")), - Map("dt" -> "2008-09-09", "country" -> "uk")) + UnresolvedPartitionSpec(Map("dt" -> "2008-09-09", "country" -> "uk"))) comparePlans(parsed, expected) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala index 40c167ce424a0..58055262d3f11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException} +import org.apache.spark.sql.internal.SQLConf /** * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that @@ -35,4 +37,130 @@ import org.apache.spark.sql.QueryTest */ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUtils { override val command = "ALTER TABLE .. RENAME PARTITION" + + protected def createSinglePartTable(t: String): Unit = { + sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'") + } + + test("rename without explicitly specifying database") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { + createSinglePartTable("t") + checkPartitions("t", Map("id" -> "1")) + + sql(s"ALTER TABLE t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + checkPartitions("t", Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM t"), Row(2, "abc")) + } + } + + test("table to alter does not exist") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)") + }.getMessage + assert(errMsg.contains("Table not found")) + } + } + + test("partition to rename does not exist") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + val errMsg = intercept[NoSuchPartitionException] { + sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("Partition not found in table")) + } + } + + test("target partition exists") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + val errMsg = intercept[PartitionAlreadyExistsException] { + sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("Partition already exists")) + } + } + + test("single part partition") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + + sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") + checkPartitions(t, Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) + } + } + + test("multi part partition") { + withNamespaceAndTable("ns", "tbl") { t => + createWideTable(t) + checkPartitions(t, + Map( + "year" -> "2016", + "month" -> "3", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1"), + Map( + "year" -> "2016", + "month" -> "4", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1")) + + sql(s""" + |ALTER TABLE $t + |PARTITION ( + | year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1 + |) RENAME TO PARTITION ( + | year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1 + |)""".stripMargin) + checkPartitions(t, + Map( + "year" -> "2016", + "month" -> "3", + "hour" -> "10", + "minute" -> "10", + "sec" -> "123", + "extra" -> "1"), + Map( + "year" -> "2016", + "month" -> "4", + "hour" -> "10", + "minute" -> "10", + "sec" -> "10", + "extra" -> "1")) + checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3)) + } + } + + test("partition spec in RENAME PARTITION should be case insensitive") { + withNamespaceAndTable("ns", "tbl") { t => + createSinglePartTable(t) + checkPartitions(t, Map("id" -> "1")) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val errMsg = intercept[AnalysisException] { + sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") + }.getMessage + assert(errMsg.contains("ID is not a valid partition column")) + checkPartitions(t, Map("id" -> "1")) + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") + checkPartitions(t, Map("id" -> "2")) + checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala index a613978ce375a..f4b84d8ee0059 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala @@ -21,6 +21,7 @@ import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.test.SQLTestUtils @@ -88,4 +89,6 @@ trait DDLCommandTestUtils extends SQLTestUtils { |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) |""".stripMargin) } + + protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala index 808eab8340524..b3c118def70b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.execution.command /** @@ -32,21 +31,6 @@ import org.apache.spark.sql.execution.command * `org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite` */ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase { - override protected def checkLocation( - t: String, - spec: TablePartitionSpec, - expected: String): Unit = { - val tablePath = t.split('.') - val tableName = tablePath.last - val ns = tablePath.init.mkString(".") - val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ") - val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)") - .select("information") - .first().getString(0) - val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head - assert(location.endsWith(expected)) - } - test("empty string as partition value") { withNamespaceAndTable("ns", "tbl") { t => sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala index d923886fbdb9a..bde77106a3ab7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala @@ -17,10 +17,8 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.{AnalysisException, Row} -import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException} +import org.apache.spark.sql.Row import org.apache.spark.sql.execution.command -import org.apache.spark.sql.internal.SQLConf /** * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that @@ -33,143 +31,19 @@ import org.apache.spark.sql.internal.SQLConf * `org.apache.spark.sql.hive.execution.command.AlterTableRenamePartitionSuite` */ trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartitionSuiteBase { - protected def createSinglePartTable(t: String): Unit = { - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'") - } - - test("rename without explicitly specifying database") { - val t = "tbl" - withTable(t) { - createSinglePartTable(t) - checkPartitions(t, Map("id" -> "1")) - - sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") - checkPartitions(t, Map("id" -> "2")) - checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) - } - } - - test("table to alter does not exist") { - withNamespace(s"$catalog.ns") { - sql(s"CREATE NAMESPACE $catalog.ns") - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)") - }.getMessage - assert(errMsg.contains("Table not found")) - } - } - - test("partition to rename does not exist") { - withNamespaceAndTable("ns", "tbl") { t => - createSinglePartTable(t) - checkPartitions(t, Map("id" -> "1")) - val errMsg = intercept[NoSuchPartitionException] { - sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)") - }.getMessage - assert(errMsg.contains("Partition not found in table")) - } - } - - test("target partition exists") { - withNamespaceAndTable("ns", "tbl") { t => - createSinglePartTable(t) - sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") - checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) - val errMsg = intercept[PartitionAlreadyExistsException] { - sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") - }.getMessage - assert(errMsg.contains("Partition already exists")) - } - } - - test("single part partition") { - withNamespaceAndTable("ns", "tbl") { t => - createSinglePartTable(t) - checkPartitions(t, Map("id" -> "1")) - - sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)") - checkPartitions(t, Map("id" -> "2")) - checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) - } - } - - test("multi part partition") { - withNamespaceAndTable("ns", "tbl") { t => - createWideTable(t) - checkPartitions(t, - Map( - "year" -> "2016", - "month" -> "3", - "hour" -> "10", - "minute" -> "10", - "sec" -> "10", - "extra" -> "1"), - Map( - "year" -> "2016", - "month" -> "4", - "hour" -> "10", - "minute" -> "10", - "sec" -> "10", - "extra" -> "1")) - - sql(s""" - |ALTER TABLE $t - |PARTITION ( - | year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1 - |) RENAME TO PARTITION ( - | year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1 - |)""".stripMargin) - checkPartitions(t, - Map( - "year" -> "2016", - "month" -> "3", - "hour" -> "10", - "minute" -> "10", - "sec" -> "123", - "extra" -> "1"), - Map( - "year" -> "2016", - "month" -> "4", - "hour" -> "10", - "minute" -> "10", - "sec" -> "10", - "extra" -> "1")) - checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3)) - } - } - test("with location") { withNamespaceAndTable("ns", "tbl") { t => createSinglePartTable(t) sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION 'loc1'") sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + checkLocation(t, Map("id" -> "2"), "loc1") sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)") checkPartitions(t, Map("id" -> "1"), Map("id" -> "3")) - checkAnswer(sql(s"SELECT id, data FROM $t"), Seq(Row(1, "abc"), Row(3, "def"))) - } - } - - test("partition spec in RENAME PARTITION should be case insensitive") { - withNamespaceAndTable("ns", "tbl") { t => - createSinglePartTable(t) - checkPartitions(t, Map("id" -> "1")) - - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") - }.getMessage - assert(errMsg.contains("ID is not a valid partition column")) - checkPartitions(t, Map("id" -> "1")) - } - - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)") - checkPartitions(t, Map("id" -> "2")) - checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc")) - } + // V1 catalogs rename the partition location of managed tables + checkLocation(t, Map("id" -> "3"), "id=3") + checkAnswer(sql(s"SELECT id, data FROM $t WHERE id = 3"), Row(3, "def")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala index c4ecf1c98bb6e..80c552de567ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.test.SharedSparkSession @@ -30,4 +31,20 @@ trait CommandSuiteBase extends SharedSparkSession { def version: String = "V1" // The prefix is added to test names def catalog: String = CatalogManager.SESSION_CATALOG_NAME def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing + + // TODO(SPARK-33393): Move this to `DDLCommandTestUtils` + def checkLocation( + t: String, + spec: TablePartitionSpec, + expected: String): Unit = { + val tablePath = t.split('.') + val tableName = tablePath.last + val ns = tablePath.init.mkString(".") + val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ") + val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)") + .select("information") + .first().getString(0) + val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head + assert(location.endsWith(expected)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala index 0f0f8fa389321..65494a7266756 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala @@ -18,10 +18,6 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec -import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.connector.InMemoryPartitionTable -import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.execution.command /** @@ -31,29 +27,6 @@ import org.apache.spark.sql.execution.command class AlterTableAddPartitionSuite extends command.AlterTableAddPartitionSuiteBase with CommandSuiteBase { - - import CatalogV2Implicits._ - - override protected def checkLocation( - t: String, - spec: TablePartitionSpec, - expected: String): Unit = { - val tablePath = t.split('.') - val catalogName = tablePath.head - val namespaceWithTable = tablePath.tail - val namespaces = namespaceWithTable.init - val tableName = namespaceWithTable.last - val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName) - val partTable = catalogPlugin.asTableCatalog - .loadTable(Identifier.of(namespaces, tableName)) - .asInstanceOf[InMemoryPartitionTable] - val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields) - val partMetadata = partTable.loadPartitionMetadata(ident) - - assert(partMetadata.containsKey("location")) - assert(partMetadata.get("location") === expected) - } - test("SPARK-33650: add partition into a table which doesn't support partition management") { withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t => sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala index d1c252adde369..bb06818da48b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.Row import org.apache.spark.sql.execution.command /** @@ -28,14 +28,20 @@ class AlterTableRenamePartitionSuite extends command.AlterTableRenamePartitionSuiteBase with CommandSuiteBase { - // TODO(SPARK-33859): Support V2 ALTER TABLE .. RENAME PARTITION - test("single part partition") { + test("with location") { withNamespaceAndTable("ns", "tbl") { t => - sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)") - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)") - }.getMessage - assert(errMsg.contains("ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables")) + createSinglePartTable(t) + val loc = "location1" + sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION '$loc'") + sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "2")) + checkLocation(t, Map("id" -> "2"), loc) + + sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)") + checkPartitions(t, Map("id" -> "1"), Map("id" -> "3")) + // `InMemoryPartitionTableCatalog` should keep the original location + checkLocation(t, Map("id" -> "3"), loc) + checkAnswer(sql(s"SELECT id, data FROM $t WHERE id = 3"), Row(3, "def")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala index 0978126f27fd1..2dd80b7bb6a02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala @@ -18,7 +18,10 @@ package org.apache.spark.sql.execution.command.v2 import org.apache.spark.SparkConf -import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier} import org.apache.spark.sql.test.SharedSparkSession /** @@ -36,4 +39,26 @@ trait CommandSuiteBase extends SharedSparkSession { override def sparkConf: SparkConf = super.sparkConf .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) + + def checkLocation( + t: String, + spec: TablePartitionSpec, + expected: String): Unit = { + import CatalogV2Implicits._ + + val tablePath = t.split('.') + val catalogName = tablePath.head + val namespaceWithTable = tablePath.tail + val namespaces = namespaceWithTable.init + val tableName = namespaceWithTable.last + val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName) + val partTable = catalogPlugin.asTableCatalog + .loadTable(Identifier.of(namespaces, tableName)) + .asInstanceOf[InMemoryPartitionTable] + val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields) + val partMetadata = partTable.loadPartitionMetadata(ident) + + assert(partMetadata.containsKey("location")) + assert(partMetadata.get("location") === expected) + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala index 39b4be61449cb..a1c808647c891 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hive.execution.command +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.hive.test.TestHiveSingleton @@ -30,4 +31,20 @@ trait CommandSuiteBase extends TestHiveSingleton { def version: String = "Hive V1" // The prefix is added to test names def catalog: String = CatalogManager.SESSION_CATALOG_NAME def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing + + def checkLocation( + t: String, + spec: TablePartitionSpec, + expected: String): Unit = { + val tablePath = t.split('.') + val tableName = tablePath.last + val ns = tablePath.init.mkString(".") + val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ") + val information = + spark.sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)") + .select("information") + .first().getString(0) + val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head + assert(location.endsWith(expected)) + } } From 3b1b209e90076e60eb18eedfaec0ecdad659376f Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 29 Dec 2020 13:33:06 +0000 Subject: [PATCH 0906/1009] [SPARK-33909][SQL] Check rand functions seed is legal at analyer side ### What changes were proposed in this pull request? Move seed is legal check to `CheckAnalysis`. ### Why are the changes needed? It's better to check seed expression is legal at analyzer side instead of execution, and user can get exception as soon as possible. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30923 from ulysses-you/SPARK-33909. Authored-by: ulysses-you Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 4 ++++ .../catalyst/expressions/randomExpressions.scala | 9 +++------ .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 13 +++++++++++++ .../optimizer/LeftSemiAntiJoinPushDownSuite.scala | 2 +- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a4dfbe85abfd7..89076fbb9ce0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -215,6 +215,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case s: SubqueryExpression => checkSubqueryExpression(operator, s) s + + case e: ExpressionWithRandomSeed if !e.seedExpression.foldable => + failAnalysis( + s"Input argument to ${e.prettyName} must be a constant.") } operator match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 630c934f79533..0a4c6e27d51d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral} @@ -47,10 +46,8 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful override def seedExpression: Expression = child @transient protected lazy val seed: Long = seedExpression match { - case Literal(s, IntegerType) => s.asInstanceOf[Int] - case Literal(s, LongType) => s.asInstanceOf[Long] - case _ => throw new AnalysisException( - s"Input argument to $prettyName must be an integer, long or null literal.") + case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int] + case e if e.dataType == LongType => e.eval().asInstanceOf[Long] } override def nullable: Boolean = false @@ -64,7 +61,7 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful * Represents the behavior of expressions which have a random seed and can renew the seed. * Usually the random seed needs to be renewed at each execution under streaming queries. */ -trait ExpressionWithRandomSeed { +trait ExpressionWithRandomSeed extends Expression { def seedExpression: Expression def withNewSeed(seed: Long): Expression } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 44128c4419951..004d577c7ad52 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -700,4 +700,17 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedRelation(TableIdentifier("t", Option("nonexist"))))))) assertAnalysisError(plan, "Table or view not found:" :: Nil) } + + test("SPARK-33909: Check rand functions seed is legal at analyer side") { + Seq(Rand("a".attr), Randn("a".attr)).foreach { r => + val plan = Project(Seq(r.as("r")), testRelation) + assertAnalysisError(plan, + s"Input argument to ${r.prettyName} must be a constant." :: Nil) + } + Seq(Rand(1.0), Rand("1"), Randn("a")).foreach { r => + val plan = Project(Seq(r.as("r")), testRelation) + assertAnalysisError(plan, + s"data type mismatch: argument 1 requires (int or bigint) type" :: Nil) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala index 729a1e9f06ca5..d4b85b036b64c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala @@ -60,7 +60,7 @@ class LeftSemiPushdownSuite extends PlanTest { test("Project: LeftSemiAnti join no pushdown because of non-deterministic proj exprs") { val originalQuery = testRelation - .select(Rand('a), 'b, 'c) + .select(Rand(1), 'b, 'c) .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd)) val optimized = Optimize.execute(originalQuery.analyze) From 872107f67fd6c2093531e8a8976ff713359cba01 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 29 Dec 2020 13:34:43 +0000 Subject: [PATCH 0907/1009] [SPARK-33848][SQL][FOLLOWUP] Introduce allowList for push into (if / case) branches ### What changes were proposed in this pull request? Introduce allowList push into (if / case) branches to fix potential bug. ### Why are the changes needed? Fix potential bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test. Closes #30955 from wangyum/SPARK-33848-2. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/expressions.scala | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 6c5dec133d2a7..1b93d514964e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -553,41 +553,68 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { foldables.nonEmpty && others.length < 2 } + // Not all UnaryExpression can be pushed into (if / case) branches, e.g. Alias. + private def supportedUnaryExpression(e: UnaryExpression): Boolean = e match { + case _: IsNull | _: IsNotNull => true + case _: UnaryMathExpression | _: Abs | _: Bin | _: Factorial | _: Hex => true + case _: String2StringExpression | _: Ascii | _: Base64 | _: BitLength | _: Chr | _: Length => + true + case _: CastBase => true + case _: GetDateField | _: LastDay => true + case _: ExtractIntervalPart => true + case _: ArraySetLike => true + case _: ExtractValue => true + case _ => false + } + + // Not all BinaryExpression can be pushed into (if / case) branches. + private def supportedBinaryExpression(e: BinaryExpression): Boolean = e match { + case _: BinaryComparison | _: StringPredicate | _: StringRegexExpression => true + case _: BinaryArithmetic => true + case _: BinaryMathExpression => true + case _: AddMonths | _: DateAdd | _: DateAddInterval | _: DateDiff | _: DateSub => true + case _: FindInSet | _: RoundBase => true + case _ => false + } + def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { - case a: Alias => a // Skip an alias. case u @ UnaryExpression(i @ If(_, trueValue, falseValue)) - if atMostOneUnfoldable(Seq(trueValue, falseValue)) => + if supportedUnaryExpression(u) && atMostOneUnfoldable(Seq(trueValue, falseValue)) => i.copy( trueValue = u.withNewChildren(Array(trueValue)), falseValue = u.withNewChildren(Array(falseValue))) case u @ UnaryExpression(c @ CaseWhen(branches, elseValue)) - if atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + if supportedUnaryExpression(u) && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => c.copy( branches.map(e => e.copy(_2 = u.withNewChildren(Array(e._2)))), elseValue.map(e => u.withNewChildren(Array(e)))) case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right) - if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => + if supportedBinaryExpression(b) && right.foldable && + atMostOneUnfoldable(Seq(trueValue, falseValue)) => i.copy( trueValue = b.withNewChildren(Array(trueValue, right)), falseValue = b.withNewChildren(Array(falseValue, right))) case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue)) - if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) => + if supportedBinaryExpression(b) && left.foldable && + atMostOneUnfoldable(Seq(trueValue, falseValue)) => i.copy( trueValue = b.withNewChildren(Array(left, trueValue)), falseValue = b.withNewChildren(Array(left, falseValue))) case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right) - if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + if supportedBinaryExpression(b) && right.foldable && + atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => c.copy( branches.map(e => e.copy(_2 = b.withNewChildren(Array(e._2, right)))), elseValue.map(e => b.withNewChildren(Array(e, right)))) case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue)) - if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => + if supportedBinaryExpression(b) && left.foldable && + atMostOneUnfoldable(branches.map(_._2) ++ elseValue) => c.copy( branches.map(e => e.copy(_2 = b.withNewChildren(Array(left, e._2)))), elseValue.map(e => b.withNewChildren(Array(left, e)))) From aadda4b561ace638fb88147a93b5e15db3527d5a Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 29 Dec 2020 23:26:27 +0900 Subject: [PATCH 0908/1009] [SPARK-33930][SQL] Script Transform default FIELD DELIMIT should be \u0001 for no serde ### What changes were proposed in this pull request? For same SQL ``` SELECT TRANSFORM(a, b, c, null) ROW FORMAT DELIMITED USING 'cat' ROW FORMAT DELIMITED FIELDS TERMINATED BY '&' FROM (select 1 as a, 2 as b, 3 as c) t ``` In hive: ``` hive> SELECT TRANSFORM(a, b, c, null) > ROW FORMAT DELIMITED > USING 'cat' > ROW FORMAT DELIMITED > FIELDS TERMINATED BY '&' > FROM (select 1 as a, 2 as b, 3 as c) t; OK 123\N NULL Time taken: 14.519 seconds, Fetched: 1 row(s) hive> packet_write_wait: Connection to 10.191.58.100 port 32200: Broken pipe ``` In Spark ``` Spark master: local[*], Application Id: local-1609225830376 spark-sql> SELECT TRANSFORM(a, b, c, null) > ROW FORMAT DELIMITED > USING 'cat' > ROW FORMAT DELIMITED > FIELDS TERMINATED BY '&' > FROM (select 1 as a, 2 as b, 3 as c) t; 1 2 3 null NULL Time taken: 4.297 seconds, Fetched 1 row(s) spark-sql> ``` We should keep same. Change default ROW FORMAT FIELD DELIMIT to `\u0001` In hive default value is '1' to char is '\u0001' ``` bucket_count -1 column.name.delimiter , columns columns.comments columns.types file.inputformat org.apache.hadoop.hive.ql.io.NullRowsInputFormat ``` ### Why are the changes needed? Keep same behavior with hive ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #30958 from AngersZhuuuu/SPARK-33930. Authored-by: angerszhu Signed-off-by: HyukjinKwon --- docs/sql-migration-guide.md | 2 ++ .../BaseScriptTransformationExec.scala | 2 +- .../BaseScriptTransformationSuite.scala | 32 ++++++++++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index cbb1de53c8896..bd54554baa09d 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -30,6 +30,8 @@ license: | - In Spark 3.2, `ALTER TABLE .. RENAME TO PARTITION` throws `PartitionAlreadyExistsException` instead of `AnalysisException` for tables from Hive external when the target partition already exists. + - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`. + ## Upgrading from Spark SQL 3.0 to 3.1 - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index 74e5aa716ad67..1c87c48ae7cb3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -335,7 +335,7 @@ case class ScriptTransformationIOSchema( object ScriptTransformationIOSchema { val defaultFormat = Map( - ("TOK_TABLEROWFORMATFIELD", "\t"), + ("TOK_TABLEROWFORMATFIELD", "\u0001"), ("TOK_TABLEROWFORMATLINES", "\n") ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index 863657a7862a6..cf9ee1ef6db72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -28,6 +28,7 @@ import org.scalatest.exceptions.TestFailedException import org.apache.spark.{SparkException, TaskContext, TestUtils} import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow} import org.apache.spark.sql.catalyst.plans.physical.Partitioning @@ -123,7 +124,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU s""" |SELECT |TRANSFORM(a, b, c, d, e) - |USING 'python $scriptFilePath' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING 'python $scriptFilePath' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' |FROM v """.stripMargin) @@ -440,6 +445,31 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU } } } + + test("SPARK-33930: Script Transform default FIELD DELIMIT should be \u0001 (no serde)") { + withTempView("v") { + val df = Seq( + (1, 2, 3), + (2, 3, 4), + (3, 4, 5) + ).toDF("a", "b", "c") + df.createTempView("v") + + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c) + | ROW FORMAT DELIMITED + | USING 'cat' AS (a) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '&' + |FROM v + """.stripMargin), identity, + Row("1\u00012\u00013") :: + Row("2\u00013\u00014") :: + Row("3\u00014\u00015") :: Nil) + } + } } case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode { From 16c594de7967ae535a87c157d5383c3af3bbccc5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 29 Dec 2020 14:30:37 +0000 Subject: [PATCH 0909/1009] [SPARK-33859][SQL][FOLLOWUP] Add version to `SupportsPartitionManagement.renamePartition()` ### What changes were proposed in this pull request? Add the version 3.2.0 to new method `renamePartition()` in the `SupportsPartitionManagement` interface. ### Why are the changes needed? To inform Spark devs when the method appears in the interface. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./dev/scalastyle` Closes #30964 from MaxGekk/alter-table-rename-partition-v2-followup. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/connector/catalog/SupportsPartitionManagement.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java index a7008293a3e19..20af0e0f8c67c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java @@ -149,6 +149,8 @@ Map loadPartitionMetadata(InternalRow ident) * @throws UnsupportedOperationException If partition renaming is not supported * @throws PartitionAlreadyExistsException If the `to` partition exists already * @throws NoSuchPartitionException If the `from` partition does not exist + * + * @since 3.2.0 */ default boolean renamePartition(InternalRow from, InternalRow to) throws UnsupportedOperationException, From c42502493a0d1012ab8ba496363fca27014b9229 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 29 Dec 2020 14:35:01 +0000 Subject: [PATCH 0910/1009] [SPARK-33847][SQL][FOLLOWUP] Remove the CaseWhen should consider deterministic ### What changes were proposed in this pull request? This pr fix remove the `CaseWhen` if elseValue is empty and other outputs are null because of we should consider deterministic. ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30960 from wangyum/SPARK-33847-2. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../ReplaceNullWithFalseInPredicate.scala | 8 ++------ .../sql/catalyst/optimizer/expressions.scala | 9 +++------ .../optimizer/PushFoldableIntoBranchesSuite.scala | 15 +++++++-------- .../ReplaceNullWithFalseInPredicateSuite.scala | 7 ++++--- .../optimizer/SimplifyConditionalSuite.scala | 12 +++++++----- 5 files changed, 23 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index df3da3e8a9982..2f95f242c851c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -98,12 +98,8 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { val newBranches = cw.branches.map { case (cond, value) => replaceNullWithFalse(cond) -> replaceNullWithFalse(value) } - if (newBranches.forall(_._2 == FalseLiteral) && cw.elseValue.isEmpty) { - FalseLiteral - } else { - val newElseValue = cw.elseValue.map(replaceNullWithFalse) - CaseWhen(newBranches, newElseValue) - } + val newElseValue = cw.elseValue.map(replaceNullWithFalse).getOrElse(FalseLiteral) + CaseWhen(newBranches, newElseValue) case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType => If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal)) case e if e.dataType == BooleanType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 1b93d514964e6..819bffeafb643 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -515,8 +515,9 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { val (h, t) = branches.span(_._1 != TrueLiteral) CaseWhen( h :+ t.head, None) - case e @ CaseWhen(branches, Some(elseValue)) - if branches.forall(_._2.semanticEquals(elseValue)) => + case e @ CaseWhen(branches, elseOpt) + if branches.forall(_._2.semanticEquals(elseOpt.getOrElse(Literal(null, e.dataType)))) => + val elseValue = elseOpt.getOrElse(Literal(null, e.dataType)) // For non-deterministic conditions with side effect, we can not remove it, or change // the ordering. As a result, we try to remove the deterministic conditions from the tail. var hitNonDeterministicCond = false @@ -532,10 +533,6 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { } else { e.copy(branches = branches.take(i).map(branch => (branch._1, elseValue))) } - - case e @ CaseWhen(branches, None) - if branches.forall(_._2.semanticEquals(Literal(null, e.dataType))) => - Literal(null, e.dataType) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala index 0d5218ac629e3..cb90a398604f2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala @@ -260,14 +260,13 @@ class PushFoldableIntoBranchesSuite } test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") { - Seq(a, LessThan(Rand(1), Literal(0.5))).foreach { condition => - assertEquivalent( - EqualTo(CaseWhen(Seq((condition, Literal.create(null, IntegerType)))), Literal(2)), - Literal.create(null, BooleanType)) - assertEquivalent( - EqualTo(CaseWhen(Seq((condition, Literal("str")))).cast(IntegerType), Literal(2)), - Literal.create(null, BooleanType)) - } + assertEquivalent( + EqualTo(CaseWhen(Seq((a, Literal.create(null, IntegerType)))), Literal(2)), + Literal.create(null, BooleanType)) + assertEquivalent( + EqualTo(CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal("str")))).cast(IntegerType), + Literal(2)), + CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal.create(null, BooleanType))))) } test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index ae97d53256837..ffab358721e1a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -114,7 +114,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { val expectedBranches = Seq( (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral, (UnresolvedAttribute("i") > Literal(40)) -> TrueLiteral) - val expectedCond = CaseWhen(expectedBranches) + val expectedCond = CaseWhen(expectedBranches, FalseLiteral) testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) @@ -135,7 +135,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { (UnresolvedAttribute("i") < Literal(10)) -> TrueLiteral, (UnresolvedAttribute("i") > Literal(10)) -> FalseLiteral, TrueLiteral -> TrueLiteral) - val expectedCond = CaseWhen(expectedBranches) + val expectedCond = CaseWhen(expectedBranches, FalseLiteral) testFilter(originalCond, expectedCond) testJoin(originalCond, expectedCond) @@ -238,7 +238,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { FalseLiteral) val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue)) val expectedCond = CaseWhen(Seq( - (UnresolvedAttribute("i") > Literal(10), (Literal(2) === nestedCaseWhen) <=> TrueLiteral))) + (UnresolvedAttribute("i") > Literal(10), (Literal(2) === nestedCaseWhen) <=> TrueLiteral)), + FalseLiteral) testFilter(originalCond = condition, expectedCond = expectedCond) testJoin(originalCond = condition, expectedCond = expectedCond) testDelete(originalCond = condition, expectedCond = expectedCond) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index f3edd70bcfb12..2a685bfeefcb2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -237,11 +237,13 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P } test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") { - Seq(GreaterThan('a, 1), GreaterThan(Rand(0), 1)).foreach { condition => - assertEquivalent( - CaseWhen((condition, Literal.create(null, IntegerType)) :: Nil, None), - Literal.create(null, IntegerType)) - } + assertEquivalent( + CaseWhen((GreaterThan('a, 1), Literal.create(null, IntegerType)) :: Nil, None), + Literal.create(null, IntegerType)) + + assertEquivalent( + CaseWhen((GreaterThan(Rand(0), 0.5), Literal.create(null, IntegerType)) :: Nil, None), + CaseWhen((GreaterThan(Rand(0), 0.5), Literal.create(null, IntegerType)) :: Nil, None)) } test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") { From 2b6836cdc289bdaaf5e9fdcc0d7da05bfcb63cab Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 29 Dec 2020 12:26:25 -0800 Subject: [PATCH 0911/1009] [SPARK-33936][SQL] Add the version when connector's methods and interfaces were updated ### What changes were proposed in this pull request? Add the `since` tag to methods and interfaces added recently. ### Why are the changes needed? 1. To follow the existing convention for Spark API. 2. To inform devs when Spark API was changed. ### Does this PR introduce _any_ user-facing change? Should not. ### How was this patch tested? `dev/scalastyle` Closes #30966 from MaxGekk/spark-23889-interfaces-followup. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/connector/catalog/MetadataColumn.java | 2 ++ .../org/apache/spark/sql/connector/catalog/SupportsDelete.java | 2 ++ .../spark/sql/connector/catalog/SupportsMetadataColumns.java | 2 ++ .../org/apache/spark/sql/connector/expressions/Expressions.java | 2 ++ 4 files changed, 8 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java index cdfa082ced317..65f31229764fe 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java @@ -33,6 +33,8 @@ * example, a partition value produced by bucket(id, 16) could be exposed by a metadata column. In * this case, {@link #transform()} should return a non-null {@link Transform} that produced the * metadata column's values. + * + * @since 3.1.0 */ @Evolving public interface MetadataColumn { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java index 261e5344be7b9..8f51f4e1e835d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java @@ -44,6 +44,8 @@ public interface SupportsDelete { * * @param filters filter expressions, used to select rows to delete when all expressions match * @return true if the delete operation can be performed + * + * @since 3.1.0 */ default boolean canDeleteWhere(Filter[] filters) { return true; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java index 208abfc302582..b7b715bd456ab 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java @@ -37,6 +37,8 @@ * If a table column and a metadata column have the same name, the metadata column will never be * requested. It is recommended that Table implementations reject data column name that conflict * with metadata column names. + * + * @since 3.1.0 */ @Evolving public interface SupportsMetadataColumns extends Table { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java index 984de6258f84b..7b472fa800821 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java @@ -171,6 +171,8 @@ public static Transform hours(String column) { * @param direction direction of the sort * @param nullOrder null order of the sort * @return a SortOrder + * + * @since 3.2.0 */ public static SortOrder sort(Expression expr, SortDirection direction, NullOrdering nullOrder) { return LogicalExpressions.sort(expr, direction, nullOrder); From 951afc3acc4009e8bb55238db59376891ef091b6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 30 Dec 2020 10:20:54 +0900 Subject: [PATCH 0912/1009] [SPARK-33932][SS] Clean up KafkaOffsetReader API document ### What changes were proposed in this pull request? This patch cleans up KafkaOffsetReader API document. ### Why are the changes needed? KafkaOffsetReader API documents are duplicated among KafkaOffsetReaderConsumer and KafkaOffsetReaderAdmin. It seems to be good if the doc is centralized. This also adds missing API doc too. ### Does this PR introduce _any_ user-facing change? No, dev only. ### How was this patch tested? Doc only. Closes #30961 from viirya/SPARK-33932. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- .../sql/kafka010/KafkaOffsetReader.scala | 66 ++++++++++++++++++- .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 47 ------------- .../kafka010/KafkaOffsetReaderConsumer.scala | 46 +------------ 3 files changed, 66 insertions(+), 93 deletions(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala index b1992c1dc6a0a..546970507a2ed 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.internal.SQLConf /** * Base trait to fetch offsets from Kafka. The implementations are * [[KafkaOffsetReaderConsumer]] and [[KafkaOffsetReaderAdmin]]. - * Please see the documentation and API description there. */ private[kafka010] trait KafkaOffsetReader { @@ -39,22 +38,87 @@ private[kafka010] trait KafkaOffsetReader { // This is needed here because of KafkaContinuousStream val driverKafkaParams: ju.Map[String, Object] + /** + * Closes the connection to Kafka, and cleans up state. + */ def close(): Unit + + /** + * Fetch the partition offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. + */ def fetchPartitionOffsets( offsetRangeLimit: KafkaOffsetRangeLimit, isStartingOffsets: Boolean): Map[TopicPartition, Long] + + /** + * Resolves the specific offsets based on Kafka seek positions. + * This method resolves offset value -1 to the latest and -2 to the + * earliest Kafka seek position. + * + * @param partitionOffsets the specific offsets to resolve + * @param reportDataLoss callback to either report or log data loss depending on setting + */ def fetchSpecificOffsets( partitionOffsets: Map[TopicPartition, Long], reportDataLoss: String => Unit): KafkaSourceOffset + + /** + * Resolves the specific offsets based on timestamp per topic-partition. + * The returned offset for each partition is the earliest offset whose timestamp is greater + * than or equal to the given timestamp in the corresponding partition. If the matched offset + * doesn't exist, depending on `failsOnNoMatchingOffset` parameter, the offset will be set to + * latest or this method throws an error. + * + * @param partitionTimestamps the timestamp per topic-partition. + * @param failsOnNoMatchingOffset whether to fail the query when no matched offset can be found. + */ def fetchSpecificTimestampBasedOffsets( partitionTimestamps: Map[TopicPartition, Long], failsOnNoMatchingOffset: Boolean): KafkaSourceOffset + + /** + * Fetch the earliest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + */ def fetchEarliestOffsets(): Map[TopicPartition, Long] + + /** + * Fetch the latest offsets for the topic partitions that are indicated + * in the [[ConsumerStrategy]]. + * + * In order to avoid unknown issues, we use the given `knownOffsets` to audit the + * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less + * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When + * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot + * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka + * after retrying. + */ def fetchLatestOffsets(knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap + + /** + * Fetch the earliest offsets for specific topic partitions. + * The return result may not contain some partitions if they are deleted. + */ def fetchEarliestOffsets(newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] + + /** + * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may + * split partitions to respect it. Since offsets can be early and late binding which are evaluated + * on the executors, in order to divvy up the partitions we need to perform some substitutions. We + * don't want to send exact offsets to the executors, because data may age out before we can + * consume the data. This method makes some approximate splitting, and replaces the special offset + * values in the final output. + */ def getOffsetRangesFromUnresolvedOffsets( startingOffsets: KafkaOffsetRangeLimit, endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] + + /** + * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method + * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will + * be called. + */ def getOffsetRangesFromResolvedOffsets( fromPartitionOffsets: PartitionOffsetMap, untilPartitionOffsets: PartitionOffsetMap, diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala index f9a714c37cb9e..6f4cb895f363d 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala @@ -108,17 +108,10 @@ private[kafka010] class KafkaOffsetReaderAdmin( override def toString(): String = consumerStrategy.toString - /** - * Closes the connection to Kafka, and cleans up state. - */ override def close(): Unit = { stopAdmin() } - /** - * Fetch the partition offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. - */ override def fetchPartitionOffsets( offsetRangeLimit: KafkaOffsetRangeLimit, isStartingOffsets: Boolean): Map[TopicPartition, Long] = { @@ -148,14 +141,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( } } - /** - * Resolves the specific offsets based on Kafka seek positions. - * This method resolves offset value -1 to the latest and -2 to the - * earliest Kafka seek position. - * - * @param partitionOffsets the specific offsets to resolve - * @param reportDataLoss callback to either report or log data loss depending on setting - */ override def fetchSpecificOffsets( partitionOffsets: Map[TopicPartition, Long], reportDataLoss: String => Unit): KafkaSourceOffset = { @@ -246,10 +231,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( KafkaSourceOffset(fetched) } - /** - * Fetch the earliest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - */ override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToAdmin( partitions => { val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava @@ -258,17 +239,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( partitionOffsets }) - /** - * Fetch the latest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - * - * In order to avoid unknown issues, we use the given `knownOffsets` to audit the - * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less - * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When - * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot - * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka - * after retrying. - */ override def fetchLatestOffsets( knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = partitionsAssignedToAdmin { partitions => { @@ -326,10 +296,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( } } - /** - * Fetch the earliest offsets for specific topic partitions. - * The return result may not contain some partitions if they are deleted. - */ override def fetchEarliestOffsets( newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = { if (newPartitions.isEmpty) { @@ -349,14 +315,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( } } - /** - * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may - * split partitions to respect it. Since offsets can be early and late binding which are evaluated - * on the executors, in order to divvy up the partitions we need to perform some substitutions. We - * don't want to send exact offsets to the executors, because data may age out before we can - * consume the data. This method makes some approximate splitting, and replaces the special offset - * values in the final output. - */ override def getOffsetRangesFromUnresolvedOffsets( startingOffsets: KafkaOffsetRangeLimit, endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = { @@ -429,11 +387,6 @@ private[kafka010] class KafkaOffsetReaderAdmin( .map(_.toString) } - /** - * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method - * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will - * be called. - */ override def getOffsetRangesFromResolvedOffsets( fromPartitionOffsets: PartitionOffsetMap, untilPartitionOffsets: PartitionOffsetMap, diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala index eca41c510f1f2..ead819e4c27aa 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala @@ -116,9 +116,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( override def toString(): String = consumerStrategy.toString - /** - * Closes the connection to Kafka, and cleans up state. - */ override def close(): Unit = { if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() } uninterruptibleThreadRunner.shutdown() @@ -137,10 +134,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( partitions.asScala.toSet } - /** - * Fetch the partition offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]]. - */ override def fetchPartitionOffsets( offsetRangeLimit: KafkaOffsetRangeLimit, isStartingOffsets: Boolean): Map[TopicPartition, Long] = { @@ -170,14 +163,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( } } - /** - * Resolves the specific offsets based on Kafka seek positions. - * This method resolves offset value -1 to the latest and -2 to the - * earliest Kafka seek position. - * - * @param partitionOffsets the specific offsets to resolve - * @param reportDataLoss callback to either report or log data loss depending on setting - */ override def fetchSpecificOffsets( partitionOffsets: Map[TopicPartition, Long], reportDataLoss: String => Unit): KafkaSourceOffset = { @@ -278,10 +263,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( KafkaSourceOffset(fetched) } - /** - * Fetch the earliest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - */ override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer( partitions => { logDebug("Seeking to the beginning") @@ -293,18 +274,10 @@ private[kafka010] class KafkaOffsetReaderConsumer( }, fetchingEarliestOffset = true) /** - * Fetch the latest offsets for the topic partitions that are indicated - * in the [[ConsumerStrategy]]. - * + * Specific to `KafkaOffsetReaderConsumer`: * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after * `poll` to wait until the potential offset request triggered by `poll(0)` is done. - * - * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the - * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less - * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When - * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot - * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying. */ override def fetchLatestOffsets( knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = @@ -364,10 +337,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( } } - /** - * Fetch the earliest offsets for specific topic partitions. - * The return result may not contain some partitions if they are deleted. - */ override def fetchEarliestOffsets( newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = { if (newPartitions.isEmpty) { @@ -387,14 +356,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( } } - /** - * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may - * split partitions to respect it. Since offsets can be early and late binding which are evaluated - * on the executors, in order to divvy up the partitions we need to perform some substitutions. We - * don't want to send exact offsets to the executors, because data may age out before we can - * consume the data. This method makes some approximate splitting, and replaces the special offset - * values in the final output. - */ override def getOffsetRangesFromUnresolvedOffsets( startingOffsets: KafkaOffsetRangeLimit, endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = { @@ -467,11 +428,6 @@ private[kafka010] class KafkaOffsetReaderConsumer( .map(_.toString) } - /** - * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method - * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will - * be called. - */ override def getOffsetRangesFromResolvedOffsets( fromPartitionOffsets: PartitionOffsetMap, untilPartitionOffsets: PartitionOffsetMap, From 448494ebcf88b4cd0a89ee933bd042d5e45169a1 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 30 Dec 2020 14:06:34 +0900 Subject: [PATCH 0913/1009] [SPARK-33874][K8S] Handle long lived sidecars ### What changes were proposed in this pull request? For liveness check when checkAllContainers is not set, we check the liveness status of the Spark container if we can find it. ### Why are the changes needed? Some environments may deploy long lived logs collecting side cars which outlive the Spark application. Just because they remain alive does not mean the Spark executor should keep running. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Extended the existing pod status tests. Closes #30892 from holdenk/SPARK-33874-handle-long-lived-sidecars. Lead-authored-by: Holden Karau Co-authored-by: Holden Karau Signed-off-by: HyukjinKwon --- .../cluster/k8s/ExecutorPodsSnapshot.scala | 28 ++++++++++++++++++- .../k8s/KubernetesClusterManager.scala | 4 +++ ...erministicExecutorPodsSnapshotsStore.scala | 4 +++ .../k8s/ExecutorLifecycleTestUtils.scala | 25 +++++++++++++++++ .../k8s/ExecutorPodsSnapshotSuite.scala | 4 ++- 5 files changed, 63 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala index e81d213699e32..71355c7af10fa 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala @@ -18,6 +18,8 @@ package org.apache.spark.scheduler.cluster.k8s import java.util.Locale +import scala.collection.JavaConverters._ + import io.fabric8.kubernetes.api.model.ContainerStateTerminated import io.fabric8.kubernetes.api.model.Pod @@ -39,6 +41,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP object ExecutorPodsSnapshot extends Logging { private var shouldCheckAllContainers: Boolean = _ + private var sparkContainerName: String = _ def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = { ExecutorPodsSnapshot(toStatesByExecutorId(executorPods)) @@ -50,6 +53,10 @@ object ExecutorPodsSnapshot extends Logging { shouldCheckAllContainers = watchAllContainers } + def setSparkContainerName(containerName: String): Unit = { + sparkContainerName = containerName + } + private def toStatesByExecutorId(executorPods: Seq[Pod]): Map[Long, ExecutorPodState] = { executorPods.map { pod => (pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL).toLong, toState(pod)) @@ -65,6 +72,7 @@ object ExecutorPodsSnapshot extends Logging { case "pending" => PodPending(pod) case "running" => + // If we're checking all containers look for any non-zero exits if (shouldCheckAllContainers && "Never" == pod.getSpec.getRestartPolicy && pod.getStatus.getContainerStatuses.stream @@ -72,7 +80,25 @@ object ExecutorPodsSnapshot extends Logging { .anyMatch(t => t != null && t.getExitCode != 0)) { PodFailed(pod) } else { - PodRunning(pod) + // Otherwise look for the Spark container + val sparkContainerStatusOpt = pod.getStatus.getContainerStatuses.asScala + .find(_.getName() == sparkContainerName) + sparkContainerStatusOpt match { + case Some(sparkContainerStatus) => + sparkContainerStatus.getState.getTerminated match { + case t if t.getExitCode != 0 => + PodFailed(pod) + case t if t.getExitCode == 0 => + PodSucceeded(pod) + case _ => + PodRunning(pod) + } + // If we can't find the Spark container status, fall back to the pod status + case _ => + logWarning(s"Unable to find container ${sparkContainerName} in pod ${pod} " + + "defaulting to entire pod status (running).") + PodRunning(pod) + } } case "failed" => PodFailed(pod) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala index 151e98ba17e3b..939a4ee9c7721 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala @@ -25,6 +25,7 @@ import io.fabric8.kubernetes.client.Config import org.apache.spark.SparkContext import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.k8s.Config._ +import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} import org.apache.spark.util.{SystemClock, ThreadUtils} @@ -96,6 +97,9 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit ExecutorPodsSnapshot.setShouldCheckAllContainers( sc.conf.get(KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS)) + val sparkContainerName = sc.conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME) + .getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME) + ExecutorPodsSnapshot.setSparkContainerName(sparkContainerName) val subscribersExecutor = ThreadUtils .newDaemonThreadPoolScheduledExecutor( "kubernetes-executor-snapshots-subscribers", 2) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala index 6e989316310e6..c30efde7b02be 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala @@ -19,9 +19,13 @@ package org.apache.spark.scheduler.cluster.k8s import io.fabric8.kubernetes.api.model.Pod import scala.collection.mutable +import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME + + class DeterministicExecutorPodsSnapshotsStore extends ExecutorPodsSnapshotsStore { ExecutorPodsSnapshot.setShouldCheckAllContainers(false) + ExecutorPodsSnapshot.setSparkContainerName(DEFAULT_EXECUTOR_CONTAINER_NAME) private val snapshotsBuffer = mutable.Buffer.empty[ExecutorPodsSnapshot] private val subscribers = mutable.Buffer.empty[Seq[ExecutorPodsSnapshot] => Unit] diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala index ad79e3a39832b..225278c2aad71 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala @@ -106,6 +106,31 @@ object ExecutorLifecycleTestUtils { .build() } + /** + * This creates a pod with a finished executor and running sidecar + */ + def finishedExecutorWithRunningSidecar( + executorId: Long, exitCode: Int): Pod = { + new PodBuilder(podWithAttachedContainerForId(executorId, DEFAULT_RESOURCE_PROFILE_ID)) + .editOrNewStatus() + .withPhase("running") + .addNewContainerStatus() + .withNewState() + .withNewTerminated() + .withExitCode(exitCode) + .endTerminated() + .endState() + .endContainerStatus() + .addNewContainerStatus() + .withNewState() + .withNewRunning() + .endRunning() + .endState() + .endContainerStatus() + .endStatus() + .build() + } + def succeededExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = { new PodBuilder(podWithAttachedContainerForId(executorId, rpId)) .editOrNewStatus() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala index ad12461bfaf8c..8d285abe753d5 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala @@ -43,7 +43,9 @@ class ExecutorPodsSnapshotSuite extends SparkFunSuite { testCase(succeededExecutor(2), PodSucceeded), testCase(failedExecutorWithoutDeletion(3), PodFailed), testCase(deletedExecutor(4), PodDeleted), - testCase(unknownExecutor(5), PodUnknown) + testCase(unknownExecutor(5), PodUnknown), + testCase(finishedExecutorWithRunningSidecar(6, 0), PodSucceeded), + testCase(finishedExecutorWithRunningSidecar(7, 1), PodFailed) ) doTest(testCases) } From 49aa6ebef112bdd4169bbf6b4c85b6712281bac0 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 30 Dec 2020 05:28:01 +0000 Subject: [PATCH 0914/1009] [SPARK-32684][SQL][TESTS] Add a test case to check if null value is same as Hive's '\\N' in script transformation ### What changes were proposed in this pull request? In hive script transform serde mode, NULL format default is `\\N` ``` String nullString = tbl.getProperty( serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); nullSequence = new Text(nullString); ``` I make a mistake that in Spark's code we need to fix and keep same with hive too. So add some test case to show this issue. ### Why are the changes needed? add UT ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #30946 from AngersZhuuuu/SPARK-32684. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../HiveScriptTransformationSuite.scala | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala index 266c526b1a24b..3892caa51eca9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala @@ -501,4 +501,31 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T """.stripMargin) checkAnswer(query4, identity, Row(null) :: Nil) } + + test("SPARK-32684: Script transform hive serde mode null format is same with hive as '\\N'") { + val query1 = sql( + """ + |SELECT TRANSFORM(null, null, null) + |USING 'cat' + |FROM (SELECT 1 AS a) t + """.stripMargin) + checkAnswer(query1, identity, Row(null, "\\N\t\\N") :: Nil) + + val query2 = sql( + """ + |SELECT TRANSFORM(null, null, null) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = ',' + | ) + |USING 'cat' AS (a) + | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + | WITH SERDEPROPERTIES ( + | 'field.delim' = '&' + | ) + |FROM (SELECT 1 AS a) t + """.stripMargin) + checkAnswer(query2, identity, Row("\\N,\\N,\\N") :: Nil) + + } } From 687f465244301112a1f6cafa5d9361b2c7d7b4a5 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Wed, 30 Dec 2020 06:06:17 +0000 Subject: [PATCH 0915/1009] [SPARK-33890][SQL] Improve the implement of trim/trimleft/trimright ### What changes were proposed in this pull request? The current implement of trim/trimleft/trimright have somewhat redundant. ### Why are the changes needed? Improve the implement of trim/trimleft/trimright ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test Closes #30905 from beliefer/SPARK-33890. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- .../expressions/stringExpressions.scala | 202 ++++++------------ 1 file changed, 64 insertions(+), 138 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 6caf4395090f1..9317684d0376f 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -764,6 +764,55 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes { override def nullable: Boolean = children.exists(_.nullable) override def foldable: Boolean = children.forall(_.foldable) + protected def doEval(srcString: UTF8String): UTF8String + protected def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String + + override def eval(input: InternalRow): Any = { + val srcString = srcStr.eval(input).asInstanceOf[UTF8String] + if (srcString == null) { + null + } else if (trimStr.isDefined) { + doEval(srcString, trimStr.get.eval(input).asInstanceOf[UTF8String]) + } else { + doEval(srcString) + } + } + + protected val trimMethod: String + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val evals = children.map(_.genCode(ctx)) + val srcString = evals(0) + + if (evals.length == 1) { + ev.copy(code = code""" + |${srcString.code} + |boolean ${ev.isNull} = false; + |UTF8String ${ev.value} = null; + |if (${srcString.isNull}) { + | ${ev.isNull} = true; + |} else { + | ${ev.value} = ${srcString.value}.$trimMethod(); + |}""".stripMargin) + } else { + val trimString = evals(1) + ev.copy(code = code""" + |${srcString.code} + |boolean ${ev.isNull} = false; + |UTF8String ${ev.value} = null; + |if (${srcString.isNull}) { + | ${ev.isNull} = true; + |} else { + | ${trimString.code} + | if (${trimString.isNull}) { + | ${ev.isNull} = true; + | } else { + | ${ev.value} = ${srcString.value}.$trimMethod(${trimString.value}); + | } + |}""".stripMargin) + } + } + override def sql: String = if (trimStr.isDefined) { s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})" } else { @@ -840,9 +889,7 @@ object StringTrim { """, since = "1.5.0", group = "string_funcs") -case class StringTrim( - srcStr: Expression, - trimStr: Option[Expression] = None) +case class StringTrim(srcStr: Expression, trimStr: Option[Expression] = None) extends String2TrimExpression { def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr)) @@ -853,51 +900,12 @@ case class StringTrim( override protected def direction: String = "BOTH" - override def eval(input: InternalRow): Any = { - val srcString = srcStr.eval(input).asInstanceOf[UTF8String] - if (srcString == null) { - null - } else { - if (trimStr.isDefined) { - srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String]) - } else { - srcString.trim() - } - } - } + override def doEval(srcString: UTF8String): UTF8String = srcString.trim() - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val evals = children.map(_.genCode(ctx)) - val srcString = evals(0) + override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String = + srcString.trim(trimString) - if (evals.length == 1) { - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trim(); - }""") - } else { - val trimString = evals(1) - val getTrimFunction = - s""" - if (${trimString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trim(${trimString.value}); - }""" - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - $getTrimFunction - }""") - } - } + override val trimMethod: String = "trim" } object StringTrimLeft { @@ -934,9 +942,7 @@ object StringTrimLeft { """, since = "1.5.0", group = "string_funcs") -case class StringTrimLeft( - srcStr: Expression, - trimStr: Option[Expression] = None) +case class StringTrimLeft(srcStr: Expression, trimStr: Option[Expression] = None) extends String2TrimExpression { def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr)) @@ -947,51 +953,12 @@ case class StringTrimLeft( override protected def direction: String = "LEADING" - override def eval(input: InternalRow): Any = { - val srcString = srcStr.eval(input).asInstanceOf[UTF8String] - if (srcString == null) { - null - } else { - if (trimStr.isDefined) { - srcString.trimLeft(trimStr.get.eval(input).asInstanceOf[UTF8String]) - } else { - srcString.trimLeft() - } - } - } + override def doEval(srcString: UTF8String): UTF8String = srcString.trimLeft() - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val evals = children.map(_.genCode(ctx)) - val srcString = evals(0) + override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String = + srcString.trimLeft(trimString) - if (evals.length == 1) { - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trimLeft(); - }""") - } else { - val trimString = evals(1) - val getTrimLeftFunction = - s""" - if (${trimString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trimLeft(${trimString.value}); - }""" - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - $getTrimLeftFunction - }""") - } - } + override val trimMethod: String = "trimLeft" } object StringTrimRight { @@ -1030,9 +997,7 @@ object StringTrimRight { since = "1.5.0", group = "string_funcs") // scalastyle:on line.size.limit -case class StringTrimRight( - srcStr: Expression, - trimStr: Option[Expression] = None) +case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = None) extends String2TrimExpression { def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr)) @@ -1043,51 +1008,12 @@ case class StringTrimRight( override protected def direction: String = "TRAILING" - override def eval(input: InternalRow): Any = { - val srcString = srcStr.eval(input).asInstanceOf[UTF8String] - if (srcString == null) { - null - } else { - if (trimStr.isDefined) { - srcString.trimRight(trimStr.get.eval(input).asInstanceOf[UTF8String]) - } else { - srcString.trimRight() - } - } - } + override def doEval(srcString: UTF8String): UTF8String = srcString.trimRight() - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val evals = children.map(_.genCode(ctx)) - val srcString = evals(0) + override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String = + srcString.trimRight(trimString) - if (evals.length == 1) { - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trimRight(); - }""") - } else { - val trimString = evals(1) - val getTrimRightFunction = - s""" - if (${trimString.isNull}) { - ${ev.isNull} = true; - } else { - ${ev.value} = ${srcString.value}.trimRight(${trimString.value}); - }""" - ev.copy(evals.map(_.code) :+ code""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = null; - if (${srcString.isNull}) { - ${ev.isNull} = true; - } else { - $getTrimRightFunction - }""") - } - } + override val trimMethod: String = "trimRight" } /** From 4a669f583089fc704cdc46cff8f1680470a068ee Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 30 Dec 2020 16:15:41 +0900 Subject: [PATCH 0916/1009] [MINOR][SS] Call fetchEarliestOffsets when it is necessary ### What changes were proposed in this pull request? This minor patch changes two variables where calling `fetchEarliestOffsets` to `lazy` because these values are not always necessary. ### Why are the changes needed? To avoid unnecessary Kafka RPC calls. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30969 from viirya/ss-minor3. Authored-by: Liang-Chi Hsieh Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala | 2 +- .../main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala index c25b8b4e510a0..d6fd3aeb7f670 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala @@ -164,7 +164,7 @@ private[kafka010] class KafkaMicroBatchStream( limit: Long, from: PartitionOffsetMap, until: PartitionOffsetMap): PartitionOffsetMap = { - val fromNew = kafkaOffsetReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq) + lazy val fromNew = kafkaOffsetReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq) val sizes = until.flatMap { case (tp, end) => // If begin isn't defined, something's wrong, but let alert logic in getBatch handle it diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala index 71ccb5f952f0a..b4e5a8db7d344 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala @@ -154,7 +154,7 @@ private[kafka010] class KafkaSource( limit: Long, from: Map[TopicPartition, Long], until: Map[TopicPartition, Long]): Map[TopicPartition, Long] = { - val fromNew = kafkaReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq) + lazy val fromNew = kafkaReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq) val sizes = until.flatMap { case (tp, end) => // If begin isn't defined, something's wrong, but let alert logic in getBatch handle it From 403bf55cbef1e4cf50dc868202cccfb867279bbd Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Dec 2020 16:37:23 +0900 Subject: [PATCH 0917/1009] [SPARK-33927][BUILD] Fix Dockerfile for Spark release to work ### What changes were proposed in this pull request? This PR proposes to fix the `Dockerfile` for Spark release. - Port https://github.com/apache/spark/commit/b135db3b1a5c0b2170e98b97f6160bcf55903799 to `Dockerfile` - Upgrade Ubuntu 18.04 -> 20.04 (because of porting b135db3) - Remove Python 2 (because of Ubuntu upgrade) - Use built-in Python 3.8.5 (because of Ubuntu upgrade) - Node.js 11 -> 12 (because of Ubuntu upgrade) - Ruby 2.5 -> 2.7 (because of Ubuntu upgrade) - Python dependencies and Jekyll + plugins upgrade to the latest as it's used in GitHub Actions build (unrelated to the issue itself) ### Why are the changes needed? To make a Spark release :-). ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually tested via: ```bash cd dev/create-release/spark-rm docker build -t spark-rm --build-arg UID=$UID . ``` ``` ... Successfully built 516d7943634f Successfully tagged spark-rm:latest ``` Closes #30971 from HyukjinKwon/SPARK-33927. Lead-authored-by: Hyukjin Kwon Co-authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- dev/create-release/spark-rm/Dockerfile | 32 +++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 6b32f10490719..8735d1fd23ce2 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -15,16 +15,20 @@ # limitations under the License. # -# Image for building Spark releases. Based on Ubuntu 18.04. +# Image for building Spark releases. Based on Ubuntu 20.04. # # Includes: # * Java 8 # * Ivy -# * Python (2.7.15/3.6.7) -# * R-base/R-base-dev (4.0.2) -# * Ruby 2.3 build utilities +# * Python (3.8.5) +# * R-base/R-base-dev (4.0.3) +# * Ruby (2.7.0) +# +# You can test it as below: +# cd dev/create-release/spark-rm +# docker build -t spark-rm --build-arg UID=$UID . -FROM ubuntu:18.04 +FROM ubuntu:20.04 # For apt to be noninteractive ENV DEBIAN_FRONTEND noninteractive @@ -36,8 +40,8 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y" # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. # We should use the latest Sphinx version once this is fixed. -ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1 numpydoc==1.1.0" -ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0" +ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0" +ARG GEM_PKGS="jekyll:4.2.0 jekyll-redirect-from:0.16.0 rouge:3.26.0" # Install extra needed repos and refresh. # - CRAN repo @@ -46,7 +50,7 @@ ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0" # This is all in a single "RUN" command so that if anything changes, "apt update" is run to fetch # the most current package versions (instead of potentially using old versions cached by docker). RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ - echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list && \ + echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list && \ gpg --keyserver keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ gpg -a --export E084DAB9 | apt-key add - && \ apt-get clean && \ @@ -54,7 +58,6 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ apt-get clean && \ apt-get update && \ $APT_INSTALL software-properties-common && \ - apt-add-repository -y ppa:brightbox/ruby-ng && \ apt-get update && \ # Install openjdk 8. $APT_INSTALL openjdk-8-jdk && \ @@ -62,26 +65,23 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ # Install build / source control tools $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \ pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \ - curl -sL https://deb.nodesource.com/setup_11.x | bash && \ + curl -sL https://deb.nodesource.com/setup_12.x | bash && \ $APT_INSTALL nodejs && \ # Install needed python packages. Use pip for installing packages (for consistency). - $APT_INSTALL libpython3-dev python3-pip python3-setuptools && \ + $APT_INSTALL python3-pip python3-setuptools && \ # qpdf is required for CRAN checks to pass. $APT_INSTALL qpdf jq && \ - # Change default python version to python3. - update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ - update-alternatives --set python /usr/bin/python3.6 && \ pip3 install $PIP_PKGS && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). # Note that PySpark doc generation also needs pandoc due to nbsphinx $APT_INSTALL r-base r-base-dev && \ + $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \ $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \ Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ Rscript -e "devtools::install_github('jimhester/lintr')" && \ # Install tools needed to build the documentation. - $APT_INSTALL ruby2.5 ruby2.5-dev && \ + $APT_INSTALL ruby2.7 ruby2.7-dev && \ gem install --no-document $GEM_PKGS WORKDIR /opt/spark-rm/output From 0eb4961ca8d8d20d215768862e0ea2e1f92c46fb Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 30 Dec 2020 07:52:34 +0000 Subject: [PATCH 0918/1009] [SPARK-33926][SQL] Improve the error message from resolving of v1 database name ### What changes were proposed in this pull request? 1. Replace `SessionCatalogAndNamespace` by `DatabaseInSessionCatalog` in resolving database name from v1 session catalog. 2. Throw more precise errors from `DatabaseInSessionCatalog` 3. Fix expected error messages in `v1.ShowTablesSuiteBase` Closes #30947 ### Why are the changes needed? Current error message "multi-part identifier cannot be empty" may confuse users. And this error message is just a consequence of "incorrectly" applied an implicit class. For example, `SHOW TABLES IN spark_catalog`: 1. Spark cuts off `spark_catalog` from namespaces in `SessionCatalogAndNamespace`, so, `ns == Seq.empty` here: https://github.com/apache/spark/blob/0617dfce7beb34662ab30a607721e9b46e65c21e/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala#L365 2. Then `ns.length != 1` is `true` and Spark tries to raise the exception at https://github.com/apache/spark/blob/0617dfce7beb34662ab30a607721e9b46e65c21e/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala#L367 3. ... but `ns.quoted` triggers implicit wrapping `Seq.empty` by `MultipartIdentifierHelper`, and hit to the second check `if (parts.isEmpty)` at https://github.com/apache/spark/blob/156704ba0dfcae39a80b8f0ce778b73913db03b2/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala#L120-L122 So, Spark throws the exception at third step instead of `new AnalysisException(s"The database name is not valid: $quoted")` on the second step. And even on the second step, the exception doesn't show actual reason as it is pretty generic. ### Does this PR introduce _any_ user-facing change? Yes in the case of v1 DDL commands when a database is not specified or nested databases is set. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DDLSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite" ``` Closes #30963 from MaxGekk/database-in-session-catalog. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../analysis/ResolveSessionCatalog.scala | 73 +++++++------------ .../sql/connector/DataSourceV2SQLSuite.scala | 3 +- .../command/v1/ShowTablesSuite.scala | 10 +-- 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index dec1300d66f35..3c5157bea9470 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} import org.apache.spark.sql.catalyst.plans.logical._ @@ -210,23 +210,14 @@ class ResolveSessionCatalog( case AlterViewUnsetProperties(ResolvedView(ident, _), keys, ifExists) => AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true) - case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - DescribeDatabaseCommand(ns.head, d.extended) + case d @ DescribeNamespace(DatabaseInSessionCatalog(db), _) => + DescribeDatabaseCommand(db, d.extended) - case AlterNamespaceSetProperties(SessionCatalogAndNamespace(_, ns), properties) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - AlterDatabasePropertiesCommand(ns.head, properties) + case AlterNamespaceSetProperties(DatabaseInSessionCatalog(db), properties) => + AlterDatabasePropertiesCommand(db, properties) - case AlterNamespaceSetLocation(SessionCatalogAndNamespace(_, ns), location) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - AlterDatabaseSetLocationCommand(ns.head, location) + case AlterNamespaceSetLocation(DatabaseInSessionCatalog(db), location) => + AlterDatabaseSetLocationCommand(db, location) case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) @@ -356,27 +347,18 @@ class ResolveSessionCatalog( val newProperties = c.properties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES CreateDatabaseCommand(ns.head, c.ifNotExists, location, comment, newProperties) - case d @ DropNamespace(SessionCatalogAndNamespace(_, ns), _, _) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - DropDatabaseCommand(ns.head, d.ifExists, d.cascade) + case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) => + DropDatabaseCommand(db, d.ifExists, d.cascade) - case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - ShowTablesCommand(Some(ns.head), pattern) + case ShowTables(DatabaseInSessionCatalog(db), pattern) => + ShowTablesCommand(Some(db), pattern) case ShowTableExtended( - SessionCatalogAndNamespace(_, ns), + DatabaseInSessionCatalog(db), pattern, partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) => - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } ShowTablesCommand( - databaseName = Some(ns.head), + databaseName = Some(db), tableIdentifierPattern = Some(pattern), isExtended = true, partitionSpec.map(_.asInstanceOf[UnresolvedPartitionSpec].spec)) @@ -498,12 +480,7 @@ class ResolveSessionCatalog( case ShowViews(resolved: ResolvedNamespace, pattern) => resolved match { - case SessionCatalogAndNamespace(_, ns) => - // Fallback to v1 ShowViewsCommand since there is no view API in v2 catalog - if (ns.length != 1) { - throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted) - } - ShowViewsCommand(ns.head, pattern) + case DatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern) case _ => throw QueryCompilationErrors.externalCatalogNotSupportShowViewsError(resolved) } @@ -662,15 +639,6 @@ class ResolveSessionCatalog( } } - object SessionCatalogAndNamespace { - def unapply(resolved: ResolvedNamespace): Option[(CatalogPlugin, Seq[String])] = - if (isSessionCatalog(resolved.catalog)) { - Some(resolved.catalog -> resolved.namespace) - } else { - None - } - } - object ResolvedV1TableIdentifier { def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident) @@ -709,4 +677,17 @@ class ResolveSessionCatalog( case _ => false } } + + private object DatabaseInSessionCatalog { + def unapply(resolved: ResolvedNamespace): Option[String] = resolved match { + case ResolvedNamespace(catalog, _) if !isSessionCatalog(catalog) => None + case ResolvedNamespace(_, Seq()) => + throw new AnalysisException("Database from v1 session catalog is not specified") + case ResolvedNamespace(_, Seq(dbName)) => Some(dbName) + case _ => + assert(resolved.namespace.length > 1) + throw new AnalysisException("Nested databases are not supported by " + + s"v1 session catalog: ${resolved.namespace.map(quoteIfNeeded).mkString(".")}") + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b8d58217efa6e..f821335690aeb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -937,7 +937,8 @@ class DataSourceV2SQLSuite sql("SHOW VIEWS FROM a.b") } - assert(exception.getMessage.contains("The database name is not valid: a.b")) + assert(exception.getMessage.contains( + "Nested databases are not supported by v1 session catalog: a.b")) } test("ShowViews: using v2 catalog, command not supported.") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index 74298c020415d..5f5bcc8170aa2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -63,11 +63,11 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { } } - test("v1 SHOW TABLES only support single-level namespace") { - val exception = intercept[AnalysisException] { + test("only support single-level namespace") { + val errMsg = intercept[AnalysisException] { runShowTablesSql("SHOW TABLES FROM a.b", Seq()) - } - assert(exception.getMessage.contains("The database name is not valid: a.b")) + }.getMessage + assert(errMsg.contains("Nested databases are not supported by v1 session catalog: a.b")) } test("SHOW TABLE EXTENDED from default") { @@ -116,7 +116,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { val errMsg = intercept[AnalysisException] { sql(showTableCmd) }.getMessage - assert(errMsg.contains("multi-part identifier cannot be empty")) + assert(errMsg.contains("Database from v1 session catalog is not specified")) } } } From 2afd1fb49243e28152b3e581923b49d3aaab0dd7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 30 Dec 2020 07:56:34 +0000 Subject: [PATCH 0919/1009] [SPARK-33904][SQL] Recognize `spark_catalog` in `saveAsTable()` and `insertInto()` ### What changes were proposed in this pull request? In the `saveAsTable()` and `insertInto()` methods of `DataFrameWriter`, recognize `spark_catalog` as the default session catalog in table names. ### Why are the changes needed? 1. To simplify writing of unified v1 and v2 tests 2. To improve Spark SQL user experience. `insertInto()` should have feature parity with the `INSERT INTO` sql command. Currently, `insertInto()` fails on a table from a namespace in `spark_catalog`: ```scala scala> sql("CREATE NAMESPACE spark_catalog.ns") scala> Seq(0).toDF().write.saveAsTable("spark_catalog.ns.tbl") org.apache.spark.sql.AnalysisException: Couldn't find a catalog to handle the identifier spark_catalog.ns.tbl. at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:629) ... 47 elided scala> Seq(0).toDF().write.insertInto("spark_catalog.ns.tbl") org.apache.spark.sql.AnalysisException: Couldn't find a catalog to handle the identifier spark_catalog.ns.tbl. at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:498) ... 47 elided ``` but `INSERT INTO` succeed: ```sql spark-sql> create table spark_catalog.ns.tbl (c int); spark-sql> insert into spark_catalog.ns.tbl select 0; spark-sql> select * from spark_catalog.ns.tbl; 0 ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes for the example above: ```scala scala> Seq(0).toDF().write.saveAsTable("spark_catalog.ns.tbl") scala> Seq(1).toDF().write.insertInto("spark_catalog.ns.tbl") scala> spark.table("spark_catalog.ns.tbl").show(false) +-----+ |value| +-----+ |0 | |1 | +-----+ ``` ### How was this patch tested? By running the affected test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.ShowPartitionsSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.FileFormatWriterSuite" ``` Closes #30919 from MaxGekk/insert-into-spark_catalog. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/connector/catalog/LookupCatalog.scala | 27 ++++++++++--------- .../command/ShowPartitionsSuiteBase.scala | 12 ++++++++- .../command/v1/ShowPartitionsSuite.scala | 19 ++++--------- .../command/v2/ShowPartitionsSuite.scala | 22 ++++----------- .../datasources/FileFormatWriterSuite.scala | 13 +++++++++ .../command/ShowPartitionsSuite.scala | 19 +++++-------- 6 files changed, 55 insertions(+), 57 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala index d8cdecce0d172..16416faeb2859 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala @@ -140,19 +140,22 @@ private[sql] trait LookupCatalog extends Logging { * For legacy support only. Please use [[CatalogAndIdentifier]] instead on DSv2 code paths. */ object AsTableIdentifier { - def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match { - case CatalogAndMultipartIdentifier(None, names) + def unapply(parts: Seq[String]): Option[TableIdentifier] = { + def namesToTableIdentifier(names: Seq[String]): Option[TableIdentifier] = names match { + case Seq(name) => Some(TableIdentifier(name)) + case Seq(database, name) => Some(TableIdentifier(name, Some(database))) + case _ => None + } + parts match { + case CatalogAndMultipartIdentifier(None, names) if CatalogV2Util.isSessionCatalog(currentCatalog) => - names match { - case Seq(name) => - Some(TableIdentifier(name)) - case Seq(database, name) => - Some(TableIdentifier(name, Some(database))) - case _ => - None - } - case _ => - None + namesToTableIdentifier(names) + case CatalogAndMultipartIdentifier(Some(catalog), names) + if CatalogV2Util.isSessionCatalog(catalog) && + CatalogV2Util.isSessionCatalog(currentCatalog) => + namesToTableIdentifier(names) + case _ => None + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index 9a942d348a181..29edb8fb51cf8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{StringType, StructType} @@ -53,6 +53,16 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils { sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)") } + protected def createNullPartTable(table: String, format: String): Unit = { + import testImplicits._ + val df = Seq((0, ""), (1, null)).toDF("a", "part") + df.write + .partitionBy("part") + .format(format) + .mode(SaveMode.Overwrite) + .saveAsTable(table) + } + test("show partitions of non-partitioned table") { withNamespaceAndTable("ns", "not_partitioned_table") { t => sql(s"CREATE TABLE $t (col1 int) $defaultUsing") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index 5be5e28d01706..e85d62c51ef45 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -105,22 +105,13 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase } } - test("null and empty string as partition values") { - import testImplicits._ - withTable("t") { - val df = Seq((0, ""), (1, null)).toDF("a", "part") - df.write - .partitionBy("part") - .format("parquet") - .mode(SaveMode.Overwrite) - .saveAsTable("t") - + test("SPARK-33904: null and empty string as partition values") { + withNamespaceAndTable("ns", "tbl") { t => + createNullPartTable(t, "parquet") runShowPartitionsSql( - "SHOW PARTITIONS t", + s"SHOW PARTITIONS $t", Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil) - checkAnswer(spark.table("t"), - Row(0, null) :: - Row(1, null) :: Nil) + checkAnswer(spark.table(t), Row(0, null) :: Row(1, null) :: Nil) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index 44d8b57ce1596..42f05ee55504a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v2 -import org.apache.spark.sql.{AnalysisException, Row, SaveMode} +import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.execution.command /** @@ -38,23 +38,11 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu } } - test("SPARK-33889: null and empty string as partition values") { - import testImplicits._ + test("SPARK-33889, SPARK-33904: null and empty string as partition values") { withNamespaceAndTable("ns", "tbl") { t => - val df = Seq((0, ""), (1, null)).toDF("a", "part") - df.write - .partitionBy("part") - .format("parquet") - .mode(SaveMode.Overwrite) - .saveAsTable(t) - - runShowPartitionsSql( - s"SHOW PARTITIONS $t", - Row("part=") :: - Row("part=null") :: Nil) - checkAnswer(spark.table(t), - Row(0, "") :: - Row(1, null) :: Nil) + createNullPartTable(t, "parquet") + runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil) + checkAnswer(spark.table(t), Row(0, "") :: Row(1, null) :: Nil) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala index ce511842e6356..f492fc653653e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala @@ -61,4 +61,17 @@ class FileFormatWriterSuite checkAnswer(spark.table("t2").sort("id"), Seq(Row(0, null), Row(1, null), Row(2, null))) } } + + test("SPARK-33904: save and insert into a table in a namespace of spark_catalog") { + val ns = "spark_catalog.ns" + withNamespace(ns) { + spark.sql(s"CREATE NAMESPACE $ns") + val t = s"$ns.tbl" + withTable(t) { + spark.range(1).write.saveAsTable(t) + Seq(100).toDF().write.insertInto(t) + checkAnswer(spark.table(t), Seq(Row(0), Row(100))) + } + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala index 904c6c40b938f..ded53cc3ea7f0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive.execution.command -import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.Row import org.apache.spark.sql.execution.command.v1 /** @@ -25,21 +25,14 @@ import org.apache.spark.sql.execution.command.v1 * V1 Hive external table catalog. */ class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase { - test("null and empty string as partition values") { - import testImplicits._ + test("SPARK-33904: null and empty string as partition values") { withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { - withTable("t") { - val df = Seq((0, ""), (1, null)).toDF("a", "part") - df.write - .partitionBy("part") - .format("hive") - .mode(SaveMode.Overwrite) - .saveAsTable("t") - + withNamespaceAndTable("ns", "tbl") { t => + createNullPartTable(t, "hive") runShowPartitionsSql( - "SHOW PARTITIONS t", + s"SHOW PARTITIONS $t", Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil) - checkAnswer(spark.table("t"), + checkAnswer(spark.table(t), Row(0, "__HIVE_DEFAULT_PARTITION__") :: Row(1, "__HIVE_DEFAULT_PARTITION__") :: Nil) } From ba974ea8e4cc8075056682c2badab5ca64b90047 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Wed, 30 Dec 2020 13:14:31 +0000 Subject: [PATCH 0920/1009] [SPARK-30789][SQL] Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE ### What changes were proposed in this pull request? All of `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE` should support IGNORE NULLS | RESPECT NULLS. For example: ``` LEAD (value_expr [, offset ]) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering ) ``` ``` LAG (value_expr [, offset ]) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering ) ``` ``` NTH_VALUE (expr, offset) [ IGNORE NULLS | RESPECT NULLS ] OVER ( [ PARTITION BY window_partition ] [ ORDER BY window_ordering frame_clause ] ) ``` The mainstream database or engine supports this syntax contains: **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/NTH_VALUE.html#GUID-F8A0E88C-67E5-4AA6-9515-95D03A7F9EA0 **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_WF_NTH.html **Presto** https://prestodb.io/docs/current/functions/window.html **DB2** https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1513.htm **Teradata** https://docs.teradata.com/r/756LNiPSFdY~4JcCCcR5Cw/GjCT6l7trjkIEjt~7Dhx4w **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/lead.html https://docs.snowflake.com/en/sql-reference/functions/lag.html https://docs.snowflake.com/en/sql-reference/functions/nth_value.html https://docs.snowflake.com/en/sql-reference/functions/first_value.html https://docs.snowflake.com/en/sql-reference/functions/last_value.html **Exasol** https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lead.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lag.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/nth_value.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/first_value.htm https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/last_value.htm ### Why are the changes needed? Support `(IGNORE | RESPECT) NULLS` for `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE `is very useful. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Jenkins test Closes #30943 from beliefer/SPARK-30789. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/parser/SqlBase.g4 | 6 +- .../spark/sql/QueryCompilationErrors.scala | 4 + .../sql/catalyst/analysis/Analyzer.scala | 45 ++- .../analysis/higherOrderFunctions.scala | 6 +- .../sql/catalyst/analysis/unresolved.scala | 3 +- .../sql/catalyst/parser/AstBuilder.scala | 4 +- .../analysis/AnalysisErrorSuite.scala | 20 ++ .../resources/sql-tests/inputs/window.sql | 148 ++++++++- .../sql-tests/results/window.sql.out | 280 +++++++++++++++++- 10 files changed, 508 insertions(+), 9 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 8201fd707275d..16059a5a08e9a 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -363,6 +363,7 @@ Below is a list of all the keywords in Spark SQL. |REPAIR|non-reserved|non-reserved|non-reserved| |REPLACE|non-reserved|non-reserved|non-reserved| |RESET|non-reserved|non-reserved|non-reserved| +|RESPECT|non-reserved|non-reserved|non-reserved| |RESTRICT|non-reserved|non-reserved|non-reserved| |REVOKE|non-reserved|non-reserved|reserved| |RIGHT|reserved|strict-non-reserved|reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index d2908a555858d..ab4b7833503fb 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -803,7 +803,8 @@ primaryExpression | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor | '(' query ')' #subqueryExpression | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' - (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)? #functionCall + (FILTER '(' WHERE where=booleanExpression ')')? + (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall | identifier '->' expression #lambda | '(' identifier (',' identifier)+ ')' '->' expression #lambda | value=primaryExpression '[' index=valueExpression ']' #subscript @@ -1143,6 +1144,7 @@ ansiNonReserved | REPAIR | REPLACE | RESET + | RESPECT | RESTRICT | REVOKE | RLIKE @@ -1397,6 +1399,7 @@ nonReserved | REPAIR | REPLACE | RESET + | RESPECT | RESTRICT | REVOKE | RLIKE @@ -1651,6 +1654,7 @@ RENAME: 'RENAME'; REPAIR: 'REPAIR'; REPLACE: 'REPLACE'; RESET: 'RESET'; +RESPECT: 'RESPECT'; RESTRICT: 'RESTRICT'; REVOKE: 'REVOKE'; RIGHT: 'RIGHT'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index 51a2cb0cb4d92..e4a1f3f8efeee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -268,6 +268,10 @@ object QueryCompilationErrors { s"but $prettyName is not an aggregate function") } + def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = { + new AnalysisException(s"Function $prettyName does not support IGNORE NULLS") + } + def nonDeterministicFilterInAggregateError(): Throwable = { new AnalysisException("FILTER expression is non-deterministic, " + "it cannot be used in aggregate functions") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 8af692d9fe008..5e86368f6f4b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes -import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.{FrameLessOffsetWindowFunction, _} import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.objects._ @@ -2113,7 +2113,7 @@ class Analyzer(override val catalogManager: CatalogManager) name, other.getClass.getCanonicalName) } } - case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) => + case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter, ignoreNulls) => withPosition(u) { v1SessionCatalog.lookupFunction(funcId, arguments) match { // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within @@ -2123,19 +2123,58 @@ class Analyzer(override val catalogManager: CatalogManager) if (isDistinct || filter.isDefined) { throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( wf.prettyName) + } else if (ignoreNulls) { + wf match { + case nthValue: NthValue => + nthValue.copy(ignoreNulls = ignoreNulls) + case _ => + throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( + wf.prettyName) + } } else { wf } + case owf: FrameLessOffsetWindowFunction => + if (isDistinct || filter.isDefined) { + throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( + owf.prettyName) + } else if (ignoreNulls) { + owf match { + case lead: Lead => + lead.copy(ignoreNulls = ignoreNulls) + case lag: Lag => + lag.copy(ignoreNulls = ignoreNulls) + case _ => + throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( + owf.prettyName) + } + } else { + owf + } // We get an aggregate function, we need to wrap it in an AggregateExpression. case agg: AggregateFunction => if (filter.isDefined && !filter.get.deterministic) { throw QueryCompilationErrors.nonDeterministicFilterInAggregateError } - AggregateExpression(agg, Complete, isDistinct, filter) + if (ignoreNulls) { + val aggFunc = agg match { + case first: First => first.copy(ignoreNulls = ignoreNulls) + case last: Last => last.copy(ignoreNulls = ignoreNulls) + case _ => + throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( + agg.prettyName) + } + AggregateExpression(aggFunc, Complete, isDistinct, filter) + } else { + AggregateExpression(agg, Complete, isDistinct, filter) + } // This function is not an aggregate function, just return the resolved one. case other if (isDistinct || filter.isDefined) => throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( other.prettyName) + case other if (ignoreNulls) => + throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( + other.prettyName) case e: String2TrimExpression if arguments.size == 2 => if (trimWarningEnabled.get) { log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 2fa6bf0acea67..6115b4ed5a117 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.DataType /** @@ -32,13 +33,16 @@ import org.apache.spark.sql.types.DataType case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions { - case u @ UnresolvedFunction(fn, children, false, filter) + case u @ UnresolvedFunction(fn, children, false, filter, ignoreNulls) if hasLambdaAndResolvedArguments(children) => withPosition(u) { catalog.lookupFunction(fn, children) match { case func: HigherOrderFunction => filter.foreach(_.failAnalysis("FILTER predicate specified, " + s"but ${func.prettyName} is not an aggregate function")) + if (ignoreNulls) { + throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName) + } func case other => other.failAnalysis( "A lambda function should only be used in a higher order function. However, " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 84614886348aa..afeef3f16b289 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -258,7 +258,8 @@ case class UnresolvedFunction( name: FunctionIdentifier, arguments: Seq[Expression], isDistinct: Boolean, - filter: Option[Expression] = None) + filter: Option[Expression] = None, + ignoreNulls: Boolean = false) extends Expression with Unevaluable { override def children: Seq[Expression] = arguments ++ filter.toSeq diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 771bb5a1708b0..a2f59b914a10d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1697,8 +1697,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg expressions } val filter = Option(ctx.where).map(expression(_)) + val ignoreNulls = + Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false) val function = UnresolvedFunction( - getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter) + getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter, ignoreNulls) // Check if the function is evaluated in a windowed context. ctx.windowSpec match { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 004d577c7ad52..ec2a8a41bf38c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -212,6 +212,26 @@ class AnalysisErrorSuite extends AnalysisTest { CatalystSqlParser.parsePlan("SELECT count(a) FILTER (WHERE rand(int(c)) > 1) FROM TaBlE2"), "FILTER expression is non-deterministic, it cannot be used in aggregate functions" :: Nil) + errorTest( + "function don't support ignore nulls", + CatalystSqlParser.parsePlan("SELECT hex(a) IGNORE NULLS FROM TaBlE2"), + "Function hex does not support IGNORE NULLS" :: Nil) + + errorTest( + "some window function don't support ignore nulls", + CatalystSqlParser.parsePlan("SELECT percent_rank(a) IGNORE NULLS FROM TaBlE2"), + "Function percent_rank does not support IGNORE NULLS" :: Nil) + + errorTest( + "aggregate function don't support ignore nulls", + CatalystSqlParser.parsePlan("SELECT count(a) IGNORE NULLS FROM TaBlE2"), + "Function count does not support IGNORE NULLS" :: Nil) + + errorTest( + "higher order function don't support ignore nulls", + CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " + + "IGNORE NULLS"), "Function aggregate does not support IGNORE NULLS" :: Nil) + errorTest( "nested aggregate functions", testRelation.groupBy($"a")( diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index f0336d764bdea..56f2b0b20c165 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -36,6 +36,18 @@ CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES ('Barry Jones','SCM',10586) AS basic_pays(employee_name, department, salary); +CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES +('a', 0, null), +('a', 1, 'x'), +('b', 2, null), +('c', 3, null), +('a', 4, 'y'), +('b', 5, null), +('a', 6, 'z'), +('a', 7, 'v'), +('a', 8, null) +AS test_ignore_null(content, id, v); + -- RowsBetween SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData ORDER BY cate, val; @@ -262,4 +274,138 @@ FROM WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) -ORDER BY salary DESC; \ No newline at end of file +ORDER BY salary DESC; + +SELECT + content, + id, + v, + lead(v, 0) IGNORE NULLS OVER w lead_0, + lead(v, 1) IGNORE NULLS OVER w lead_1, + lead(v, 2) IGNORE NULLS OVER w lead_2, + lead(v, 3) IGNORE NULLS OVER w lead_3, + lag(v, 0) IGNORE NULLS OVER w lag_0, + lag(v, 1) IGNORE NULLS OVER w lag_1, + lag(v, 2) IGNORE NULLS OVER w lag_2, + lag(v, 3) IGNORE NULLS OVER w lag_3, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +ORDER BY id; + +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY id; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index c904c43ac84ed..e3fd0cd77cb6f 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 36 +-- Number of queries: 46 -- !query @@ -46,6 +46,24 @@ struct<> +-- !query +CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES +('a', 0, null), +('a', 1, 'x'), +('b', 2, null), +('c', 3, null), +('a', 4, 'y'), +('b', 5, null), +('a', 6, 'z'), +('a', 7, 'v'), +('a', 8, null) +AS test_ignore_null(content, id, v) +-- !query schema +struct<> +-- !query output + + + -- !query SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData ORDER BY cate, val @@ -776,3 +794,263 @@ WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) ORDER BY salary DESC + + +-- !query +SELECT + content, + id, + v, + lead(v, 0) IGNORE NULLS OVER w lead_0, + lead(v, 1) IGNORE NULLS OVER w lead_1, + lead(v, 2) IGNORE NULLS OVER w lead_2, + lead(v, 3) IGNORE NULLS OVER w lead_3, + lag(v, 0) IGNORE NULLS OVER w lag_0, + lag(v, 1) IGNORE NULLS OVER w lag_1, + lag(v, 2) IGNORE NULLS OVER w lag_2, + lag(v, 3) IGNORE NULLS OVER w lag_3, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL +a 1 x x y z v x NULL NULL NULL x NULL NULL x x +b 2 NULL NULL y z v NULL x NULL NULL x NULL NULL x x +c 3 NULL NULL y z v NULL x NULL NULL x NULL NULL x x +a 4 y y z v NULL y x NULL NULL x y NULL x y +b 5 NULL NULL z v NULL NULL y x NULL x y NULL x y +a 6 z z v NULL NULL z y x NULL x y z x z +a 7 v v NULL NULL NULL v z y x x y z x v +a 8 NULL NULL NULL NULL NULL NULL v z y x y z x v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL NULL NULL NULL NULL NULL +a 1 x x NULL NULL x x +b 2 NULL x NULL NULL x x +c 3 NULL x NULL NULL x x +a 4 y x y NULL x y +b 5 NULL x y NULL x y +a 6 z x y z x z +a 7 v x y z x v +a 8 NULL x y z x v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL NULL NULL NULL NULL NULL +a 1 x x NULL NULL x x +b 2 NULL x NULL NULL x x +c 3 NULL x NULL NULL x x +a 4 y x y NULL x y +b 5 NULL x y NULL x y +a 6 z x y z x z +a 7 v x y z x v +a 8 NULL x y z x v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x NULL NULL x x +a 1 x x NULL NULL x x +b 2 NULL x y NULL x y +c 3 NULL x y NULL x y +a 4 y y z NULL y z +b 5 NULL y z v y v +a 6 z y z v y v +a 7 v z v NULL z v +a 8 NULL z v NULL z v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x NULL NULL x x +a 1 x x NULL NULL x x +b 2 NULL x y NULL x y +c 3 NULL x y NULL x y +a 4 y y z NULL y z +b 5 NULL y z v y v +a 6 z y z v y v +a 7 v z v NULL z v +a 8 NULL z v NULL z v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x y z x v +a 1 x x y z x v +b 2 NULL y z v y v +c 3 NULL y z v y v +a 4 y y z v y v +b 5 NULL z v NULL z v +a 6 z z v NULL z v +a 7 v v NULL NULL v v +a 8 NULL NULL NULL NULL NULL NULL + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x y z x v +a 1 x x y z x v +b 2 NULL x y z x v +c 3 NULL x y z x v +a 4 y x y z x v +b 5 NULL x y z x v +a 6 z x y z x v +a 7 v x y z x v +a 8 NULL x y z x v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x y z x v +a 1 x x y z x v +b 2 NULL x y z x v +c 3 NULL x y z x v +a 4 y x y z x v +b 5 NULL x y z x v +a 6 z x y z x v +a 7 v x y z x v +a 8 NULL x y z x v + + +-- !query +SELECT + content, + id, + v, + nth_value(v, 1) IGNORE NULLS OVER w nth_value_1, + nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, + nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, + first_value(v) IGNORE NULLS OVER w first_value, + last_value(v) IGNORE NULLS OVER w last_value +FROM + test_ignore_null +WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY id +-- !query schema +struct +-- !query output +a 0 NULL x NULL NULL x x +a 1 x x NULL NULL x x +b 2 NULL x NULL NULL x x +c 3 NULL x y NULL x y +a 4 y x y NULL x y +b 5 NULL x y z x z +a 6 z x y z x v +a 7 v x y z x v +a 8 NULL x y z x v \ No newline at end of file From f38265ddda62f99aea802e422e6b440ee72f2483 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 30 Dec 2020 09:57:15 -0800 Subject: [PATCH 0921/1009] [SPARK-33907][SQL] Only prune columns of from_json if parsing options is empty ### What changes were proposed in this pull request? As a follow-up task to SPARK-32958, this patch takes safer approach to only prune columns from JsonToStructs if the parsing option is empty. It is to avoid unexpected behavior change regarding parsing. This patch also adds a few e2e tests to make sure failfast parsing behavior is not changed. ### Why are the changes needed? It is to avoid unexpected behavior change regarding parsing. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #30970 from viirya/SPARK-33907-3.2. Authored-by: Liang-Chi Hsieh Signed-off-by: Liang-Chi Hsieh --- .../optimizer/OptimizeCsvJsonExprs.scala | 9 ++- .../optimizer/OptimizeJsonExprsSuite.scala | 20 ++++++ .../apache/spark/sql/JsonFunctionsSuite.scala | 65 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala index 9c32f8be736a4..5f0f3f921bdf1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala @@ -98,12 +98,17 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] { child case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _) - if schema.length > 1 => + if schema.length > 1 && j.options.isEmpty => + // Options here should be empty because the optimization should not be enabled + // for some options. For example, when the parse mode is failfast it should not + // optimize, and should force to parse the whole input JSON with failing fast for + // an invalid input. + // To be more conservative, it does not optimize when any option is set for now. val prunedSchema = StructType(Seq(schema(ordinal))) g.copy(child = j.copy(schema = prunedSchema), ordinal = 0) case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _) - if schema.elementType.asInstanceOf[StructType].length > 1 => + if schema.elementType.asInstanceOf[StructType].length > 1 && j.options.isEmpty => val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull) g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala index 05d47706ba297..ccbc61e8a4987 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala @@ -209,6 +209,26 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper { comparePlans(optimized2, expected2) } + test("SPARK-33907: do not prune unnecessary columns if options is not empty") { + val options = Map("mode" -> "failfast") + + val query1 = testRelation2 + .select(GetStructField(JsonToStructs(schema, options, 'json), 0)) + val optimized1 = Optimizer.execute(query1.analyze) + + comparePlans(optimized1, query1.analyze) + + val schema1 = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true) + val field1 = schema1.elementType.asInstanceOf[StructType](0) + + val query2 = testRelation2 + .select(GetArrayStructFields( + JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a")) + val optimized2 = Optimizer.execute(query2.analyze) + + comparePlans(optimized2, query2.analyze) + } + test("SPARK-33007: simplify named_struct + from_json") { val options = Map.empty[String, String] val schema = StructType.fromDDL("a int, b int, c long, d string") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 2e515ee92bceb..310e170e8c1b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -775,4 +775,69 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getMessage assert(errMsg3.contains("DataType cow is not supported")) } + + test("SPARK-33907: bad json input with json pruning optimization: GetStructField") { + Seq("true", "false").foreach { enabled => + withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) { + val schema = new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + val badRec = """{"a" 1, "b": 11}""" + val df = Seq(badRec, """{"a": 2, "b": 12}""").toDS() + + val exception1 = intercept[SparkException] { + df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect() + }.getMessage + assert(exception1.contains( + "Malformed records are detected in record parsing. Parse Mode: FAILFAST.")) + + val exception2 = intercept[SparkException] { + df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect() + }.getMessage + assert(exception2.contains( + "Malformed records are detected in record parsing. Parse Mode: FAILFAST.")) + } + } + } + + test("SPARK-33907: bad json input with json pruning optimization: GetArrayStructFields") { + Seq("true", "false").foreach { enabled => + withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) { + val schema = ArrayType(new StructType() + .add("a", IntegerType) + .add("b", IntegerType)) + val badRec = """{"a" 1, "b": 11}""" + val df = Seq(s"""[$badRec, {"a": 2, "b": 12}]""").toDS() + + val exception1 = intercept[SparkException] { + df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect() + }.getMessage + assert(exception1.contains( + "Malformed records are detected in record parsing. Parse Mode: FAILFAST.")) + + val exception2 = intercept[SparkException] { + df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect() + }.getMessage + assert(exception2.contains( + "Malformed records are detected in record parsing. Parse Mode: FAILFAST.")) + } + } + } + + test("SPARK-33907: json pruning optimization with corrupt record field") { + Seq("true", "false").foreach { enabled => + withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) { + val schema = new StructType() + .add("a", IntegerType) + .add("b", IntegerType) + val badRec = """{"a" 1, "b": 11}""" + + val df = Seq(badRec, """{"a": 2, "b": 12}""").toDS() + .selectExpr("from_json(value, 'a int, b int, _corrupt_record string') as parsed") + .selectExpr("parsed._corrupt_record") + + checkAnswer(df, Seq(Row("""{"a" 1, "b": 11}"""), Row(null))) + } + } + } } From 85de64473310a32c91da7878abb0bea4d371c11d Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 30 Dec 2020 13:57:44 -0600 Subject: [PATCH 0922/1009] [SPARK-33804][CORE] Fix compilation warnings about 'view bounds are deprecated' ### What changes were proposed in this pull request? There are only 3 compilation warnings related to `view bounds are deprecated` in Spark Code: ``` [WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are deprecated; use an implicit parameter instead. [WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are deprecated; use an implicit parameter instead. [WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:55: view bounds are deprecated; use an implicit parameter instead. ``` This pr try to fix these compilation warnings. ### Why are the changes needed? Fix compilation warnings about ` view bounds are deprecated` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #30924 from LuciferYang/SPARK-33804. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../org/apache/spark/rdd/SequenceFileRDDFunctions.scala | 7 ++----- core/src/main/scala/org/apache/spark/rdd/package.scala | 6 +++++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala index 02def89dd8c2b..2f6ff0acdf024 100644 --- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala @@ -32,16 +32,13 @@ import org.apache.spark.internal.Logging * @note This can't be part of PairRDDFunctions because we need more implicit parameters to * convert our keys and values to Writable. */ -class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag]( +class SequenceFileRDDFunctions[K: IsWritable: ClassTag, V: IsWritable: ClassTag]( self: RDD[(K, V)], _keyWritableClass: Class[_ <: Writable], _valueWritableClass: Class[_ <: Writable]) extends Logging with Serializable { - // TODO the context bound (<%) above should be replaced with simple type bound and implicit - // conversion but is a breaking change. This should be fixed in Spark 3.x. - /** * Output the RDD as a Hadoop SequenceFile using the Writable types we infer from the RDD's key * and value types. If the key or value are Writable, then we use their classes directly; @@ -52,7 +49,7 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag def saveAsSequenceFile( path: String, codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope { - def anyToWritable[U <% Writable](u: U): Writable = u + def anyToWritable[U: IsWritable](u: U): Writable = u // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and // valueWritableClass at the compile time. To implement that, we need to add type parameters to diff --git a/core/src/main/scala/org/apache/spark/rdd/package.scala b/core/src/main/scala/org/apache/spark/rdd/package.scala index 55fc6e4d2b4df..43ca6d7643b17 100644 --- a/core/src/main/scala/org/apache/spark/rdd/package.scala +++ b/core/src/main/scala/org/apache/spark/rdd/package.scala @@ -17,7 +17,11 @@ package org.apache.spark +import org.apache.hadoop.io.Writable + /** * Provides several RDD implementations. See [[org.apache.spark.rdd.RDD]]. */ -package object rdd +package object rdd { + type IsWritable[A] = A => Writable +} From 13e8c2840969a17d5ba113686501abd3c23e3c23 Mon Sep 17 00:00:00 2001 From: "Pradyumn Agrawal (pradyumn.ag)" Date: Wed, 30 Dec 2020 17:25:46 -0800 Subject: [PATCH 0923/1009] [SPARK-33942][DOCS] Remove `hiveClientCalls.count` in `CodeGenerator` metrics docs ### What changes were proposed in this pull request? Removed the **hiveClientCalls.count** in CodeGenerator metrics in Component instance = Executor ### Why are the changes needed? Wrong information regarding metrics was being displayed on Monitoring Documentation. I had added referred documentation for adding metrics logging in Graphite. This metric was not being reported. I had to check if the issue was at my application end or spark code or documentation. Documentation had the wrong info. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual, checked it on my forked repository feature branch [SPARK-33942](https://github.com/coderbond007/spark/blob/SPARK-33942/docs/monitoring.md) Closes #30976 from coderbond007/SPARK-33942. Authored-by: Pradyumn Agrawal (pradyumn.ag) Signed-off-by: Dongjoon Hyun --- docs/monitoring.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index c6105188f07ec..5b3278bca031d 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -1276,7 +1276,6 @@ These metrics are exposed by Spark executors. - compilationTime (histogram) - generatedClassSize (histogram) - generatedMethodSize (histogram) - - hiveClientCalls.count - sourceCodeSize (histogram) - namespace=plugin.\ From 3fe5614a7cc8f5b65a90924e9a4a535fcaf76a98 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Thu, 31 Dec 2020 13:13:02 -0800 Subject: [PATCH 0924/1009] [SPARK-31946][CORE] Make worker/executor decommission signal configurable ### What changes were proposed in this pull request? This PR proposed to make worker/executor decommission signal configurable. * Added confs: `spark.worker.decommission.signal` / `spark.executor.decommission.signal` * Rename `WorkerSigPWRReceived`/ `ExecutorSigPWRReceived` to `WorkerDecomSigReceived`/ `ExecutorDecomSigReceived` ### Why are the changes needed? The current signal `PWR` can't work on macOS since it's not compliant with POSIX while macOS does. So the developers currently can't do end-to-end decommission test on their macOS environment. Besides, the configuration becomes more flexible for users in case the default signal (`PWR`) gets conflicted with their own applications/environment. ### Does this PR introduce _any_ user-facing change? No (it's a new API for 3.2) ### How was this patch tested? Manually tested. Closes #30968 from Ngone51/configurable-decom-signal. Authored-by: yi.wu Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/deploy/DeployMessage.scala | 4 ++-- .../org/apache/spark/deploy/worker/Worker.scala | 13 +++++++------ .../executor/CoarseGrainedExecutorBackend.scala | 9 +++++---- .../org/apache/spark/internal/config/Worker.scala | 7 +++++++ .../org/apache/spark/internal/config/package.scala | 7 +++++++ .../cluster/CoarseGrainedClusterMessage.scala | 4 ++-- 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index d5b5375d64f4d..727cdbc4ef2d1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -77,10 +77,10 @@ private[deploy] object DeployMessages { object DecommissionWorker extends DeployMessage /** - * A message that sent by the Worker to itself when it receives PWR signal, + * A message that sent by the Worker to itself when it receives a signal, * indicating the Worker starts to decommission. */ - object WorkerSigPWRReceived extends DeployMessage + object WorkerDecommissionSigReceived extends DeployMessage /** * A message sent from Worker to Master to tell Master that the Worker has started diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index a6092f637a9cb..a3c73751a2136 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -66,16 +66,17 @@ private[deploy] class Worker( Utils.checkHost(host) assert (port > 0) - // If worker decommissioning is enabled register a handler on PWR to shutdown. + // If worker decommissioning is enabled register a handler on the configured signal to shutdown. if (conf.get(config.DECOMMISSION_ENABLED)) { - logInfo("Registering SIGPWR handler to trigger decommissioning.") - SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + + val signal = conf.get(config.Worker.WORKER_DECOMMISSION_SIGNAL) + logInfo(s"Registering SIG$signal handler to trigger decommissioning.") + SignalUtils.register(signal, s"Failed to register SIG$signal handler - " + "disabling worker decommission feature.") { - self.send(WorkerSigPWRReceived) + self.send(WorkerDecommissionSigReceived) true } } else { - logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.") + logInfo("Worker decommissioning not enabled.") } // A scheduled executor used to send messages at the specified time. @@ -682,7 +683,7 @@ private[deploy] class Worker( case DecommissionWorker => decommissionSelf() - case WorkerSigPWRReceived => + case WorkerDecommissionSigReceived => decommissionSelf() // Tell the Master that we are starting decommissioning // so it stops trying to launch executor/driver on us diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 6a1fd57873c3a..e1d3009598b8c 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -82,9 +82,10 @@ private[spark] class CoarseGrainedExecutorBackend( override def onStart(): Unit = { if (env.conf.get(DECOMMISSION_ENABLED)) { - logInfo("Registering PWR handler to trigger decommissioning.") - SignalUtils.register("PWR", "Failed to register SIGPWR handler - " + - "disabling executor decommission feature.") (self.askSync[Boolean](ExecutorSigPWRReceived)) + val signal = env.conf.get(EXECUTOR_DECOMMISSION_SIGNAL) + logInfo(s"Registering SIG$signal handler to trigger decommissioning.") + SignalUtils.register(signal, s"Failed to register SIG$signal handler - disabling" + + s" executor decommission feature.") (self.askSync[Boolean](ExecutorDecommissionSigReceived)) } logInfo("Connecting to driver: " + driverUrl) @@ -208,7 +209,7 @@ private[spark] class CoarseGrainedExecutorBackend( } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { - case ExecutorSigPWRReceived => + case ExecutorDecommissionSigReceived => var driverNotified = false try { driver.foreach { driverRef => diff --git a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala index a8072712c46ce..fda3a57546b67 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala @@ -82,4 +82,11 @@ private[spark] object Worker { .version("2.0.2") .intConf .createWithDefault(100) + + val WORKER_DECOMMISSION_SIGNAL = + ConfigBuilder("spark.worker.decommission.signal") + .doc("The signal that used to trigger the worker to start decommission.") + .version("3.2.0") + .stringConf + .createWithDefaultString("PWR") } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index cbf4a971e3d0d..adaf92d5a8aa1 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1927,6 +1927,13 @@ package object config { .timeConf(TimeUnit.SECONDS) .createOptional + private[spark] val EXECUTOR_DECOMMISSION_SIGNAL = + ConfigBuilder("spark.executor.decommission.signal") + .doc("The signal that used to trigger the executor to start decommission.") + .version("3.2.0") + .stringConf + .createWithDefaultString("PWR") + private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir") .doc("Staging directory used while submitting applications.") .version("2.0.0") diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index e084453be0789..2f171433bbb5c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -102,9 +102,9 @@ private[spark] object CoarseGrainedClusterMessages { // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker. object DecommissionExecutor extends CoarseGrainedClusterMessage - // A message that sent to the executor itself when it receives PWR signal, + // A message that sent to the executor itself when it receives a signal, // indicating the executor starts to decommission. - object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage + object ExecutorDecommissionSigReceived extends CoarseGrainedClusterMessage case class RemoveWorker(workerId: String, host: String, message: String) extends CoarseGrainedClusterMessage From 771c538620e66be2d0fb0e383e4aa37b4d29f7eb Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 31 Dec 2020 13:18:31 -0800 Subject: [PATCH 0925/1009] [SPARK-33084][SQL][TESTS][FOLLOW-UP] Fix Scala 2.13 UT failure ### What changes were proposed in this pull request? Fix UT according to https://github.com/apache/spark/pull/29966#issuecomment-752830046 Change StructType construct from ``` def inputSchema: StructType = StructType(StructField("inputColumn", LongType) :: Nil) ``` to ``` def inputSchema: StructType = new StructType().add("inputColumn", LongType) ``` The whole udf class is : ``` package org.apache.spark.examples.sql import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} import org.apache.spark.sql.types._ import org.apache.spark.sql.Row class Spark33084 extends UserDefinedAggregateFunction { // Data types of input arguments of this aggregate function def inputSchema: StructType = new StructType().add("inputColumn", LongType) // Data types of values in the aggregation buffer def bufferSchema: StructType = new StructType().add("sum", LongType).add("count", LongType) // The data type of the returned value def dataType: DataType = DoubleType // Whether this function always returns the same output on the identical input def deterministic: Boolean = true // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides // the opportunity to update its values. Note that arrays and maps inside the buffer are still // immutable. def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(0) = 0L buffer(1) = 0L } // Updates the given aggregation buffer `buffer` with new input data from `input` def update(buffer: MutableAggregationBuffer, input: Row): Unit = { if (!input.isNullAt(0)) { buffer(0) = buffer.getLong(0) + input.getLong(0) buffer(1) = buffer.getLong(1) + 1 } } // Merges two aggregation buffers and stores the updated buffer values back to `buffer1` def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0) buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1) } // Calculates the final result def evaluate(buffer: Row): Double = buffer.getLong(0).toDouble / buffer.getLong(1) } ``` ### Why are the changes needed? Fix UT for scala 2.13 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existed UT Closes #30980 from AngersZhuuuu/spark-33084-followup. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- sql/core/src/test/resources/SPARK-33084.jar | Bin 6322 -> 6119 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar index 1dc5e9303b707f9b788c28fc23a0f0aa5b2c323b..61e1663ad3a2804eb7c39960ea51e56a0ebfaf94 100644 GIT binary patch delta 2087 zcmZXVdpy(oAIE28Ti7tM+9?vLg_%opOFQJ&B$eE9kD?4C97$g*jdD9IxfQugF3V*r z_egHZ(DsvCa*3iL_v6ly$2sSp-{+6d>+ya+Ua$B2{oj*`8^?-R5cvi6f_^<-U`D)% z9RH8Nj8gSjKKPDake%!i7#9SAFe0>%d?$e`)}MR#4H`?eS-*wj(d;vV_hLYCIN|#A zKi9x;XM+-0IV>ff)OV~@-qkexRcC0*xZVyrbFp0>rW&Ulao6`%=0{d>;<*C7ybuR2bV&iCb^e;F7Q}Y!s;q(E*m3Dm+sAtz z#}=6-&p(b6gS(5+k#y?1@?(2I?ul=>TYE z3wKLX6^gzTEdL=>)9ul*-nw{{tj-$L3V8Q>=M#HbY5QVHSs{P1ubCAJ4Lu}%sx8y} zTO?Mu-rJbz=A7UdO*R;@%&xS)Q@*VFU}a{rkL?^8ieASYI!pRVebi0|EL5vcYuw@L z5mD8LNAzL2PC(lHqp{TjyW^9qo?n+)iUFdnnt7$R&!W!L5uBMhZhl06#|om?Ar{lL z;X?Ro%PYAvmsjaN zV{=li@&k2~s#@9oo?dfDVCkFj;5%-k(UI@knzxu;AF6GEW-UUget`{&%{d<$9)gxP zn?C39twJJ@17o%=K=iarF}hKZq|eo{i`CrVd0Fe`igQLqXMxNIm6|R^jbY`Yh+tF$ zwnU7VNmH3gfok0M3k~lqD)`EYMzX5Z>eB_8V;s531*hZ1k)vsxkx9eDD>u_@YO)62@Bzo$fmjo!44 zUlIY|QV$&4BQ0D}FLWP2=zpWl~Q(LRlAL}LaS%9)##P)zp@L}lTg*>DHYE!=( z$_Bh{x6mIP{>v)H$bdKC7SSDDhCKB3*~LpDhC%w_VJvT&tV$3te|rk&x}2YH^jf}; zd^bF4>P>Rp@hfvNRnBl%fja@FN!lcKRc@QciJ9vH1K0MwcO`C}Vt%RMuh`aC-d`2D zf=|K~xdSi5a;_TNSbiB6h4$f9sAs&1bBx>K9k7Uh=-tv$^CyJ?xeL*nYR}CH8YOv8 z|KWlxV_%YDZ-TE)d4nfvl>$;x`bB9&XMA2oo6}E}y4#4shr~dW62V~N@-*-ijh(8m zMIY0Ad!-eV>`g+V9)4e6s&313q8P9|94jceV%4KBx-W#3MAjifjH)pKaKkn|3(HbK zFRn4xt7Ez;Ga<(*Z)x1s*emVjJ^cORtf<#``8lo?+1fJu60OY5UtBS$aNVTaO^yC2 zi)UrT@%^|q-fLwy5q!ln50#r%CptWLMmRYRU4mpR!#XY!Mxo9V(!-}VVN@_h+GT^^ zZ0lgpG0mp(hW?=X{k3l#9H0#nXTgBeqA&Ag$BGDis0n5U9O&$czz;U0QEbsZFnVRn z26UmE1X=e=Nq-3vDx1DXBI1BI?muQ&Ik%rgO~f!2gT&(WR|(G@PB-u%CPLAtRx%;q z$}F2z;{h*`FM zZ!V!`PEn;|qU;60*PlaZAWohWmrM#v>@ppbXm&Gm*>7(5^iZI~qNdX4r)-Ykl*~Tb zvnXaS$woLRMELVbgAqMPA+znT(-UT+c<0=>haa!X5;)gn3HceOVz!SJkM@S0q$%l0 z^JNl8LNX_pLY92WRdsrRq(_8KTTo~Df>S=~gUp6MdQi8-b2wE>kFa`Ov&zBKUb>(# zg0}|Ur?7&(n&dQ~d^&JTL33+_7Wrkb8|jQ-hmw+{W9H&;dVZVdaJUbd@3aEyGIEHA zFEnM`C5yQdXlx=>L)Qyren*&NSL{Ni=v8N^_CK(8eW^yRprk)=N0; zfYia=Qd3Sa#Q&KgN=rZ;5M&{Lng0fy+t}9t delta 2326 zcmZWrXHb)i68#{d6A}nbgNh)ak)jwNf&~cW(n2C36cuDZYJec9mLD~gF2L&`pQ52;&fk&?a%|qXe^Jd=e%%0sdXLk1Ao+&-faR|;11eOGTMnD?l zFUTQ~e;T6{l`15&i+I1D0USpR01Bb>E+IpzjmucS0@txZJ1zF2bmE=@ANxwr{bi z?TT)CHS~RO-{8tw!CEC1s7LK&nYxA$(A;-QP!a62l@xC*dMaqGAcx#;bH8KOZi%|7 zr1x}>D%j&|_lNrlD-;Y)9?y^uv6J1qCs*6n{D|8#RDxu&WD<)&2xAIK#T5xmu}Dj9 zJ6!qQUQp4&^g?s|#4=x7k9f$VZ*Su%p?}D^l9IVU{ME&leDs6W;iQDhd#D>tR<3&| zfduF%PDC>Y9UzgF|)-g}yFxH&v5kml@E3 zfO=sYo83>XJ1&gVR2a%`UZ1X}X&AzzJJuZgh@=(2>iA7B3TVtwnTme9)rL_i&00RQ zAvbm1%^);g2FLq%GsO#nGfNLYO$xM{tc<_bIa4D<;W5?pL<7<`2z(3D0ZI)|E28G% zLYMBGX-!BLD{z$Jk3gvIkBRSBs(5-7{=?!+qZRUkU@m~-9d#sAWuPJ4kF6MZKBD>F zXxrAPN2u@Vtt{+0D#o>27hez*xIcqU>r;`i=3KA1L&3KdwHHEO9#^v-ZC;#AiNU=(qtwU2$6U?&|}3{7Vgx;t>1;KB`J$z>eEakurH zj9XRZ1c-06%yi;Yuz8v{Z8Ii$eu_8PYj{E0_!2-9k65OzQP$L4<=ShB&hcTw46@G< zgg0CbH|RgVx95-9kyDf4`+7^rv=}SZ{Kv_1CD9tTY(A^rXk<>7+8*xgQ84||;+ehx z<}UhIGOM4O&U;;fFo#{c2~`7U85Hx_7y61FYyD4_5_D{;_xl3i)@AZWZ zM`FVs9HI`(u(Pxc>cGCA)5?fw^VqG|m{>PFUUpIH_DNZ?WHDKt3m!}e&6r4}Z?&x_ zBeGxY(PFGJ!Ov%M^I^(0!4w{Xwr@(;e}dCh6+%p6_s!*;J#OhnCz#Yy(2w}l%8_FU zp5Jb9n4K@9E8N|;qN`+2aA+Q#A?;ClJp<{~lrK8QmxLJNk+wIHs6f{{cae$`=rxx7 zR>7vua#S?bXB&|fw11udy|p}D7Nu>Xz2pl>Y`(QdG--YS>uO|6wbL|%M-$>ygsjcZ z45hCzv=$HT=yns$_-f}o;y19aiC2Zg1t+vbmtWfydTTC5yc(G>kyOaIELD+NB4I)W z&j&PFXJ*&W3*g)u2cz*5>LoA3zR5*WvK2uGO2;iOI~Nk^`Th{S+3WpUo1aC*79m`i zf?06u`60>_O;~=-iMje+&UY?Gc$%w;LMof*Jsffkdh)TF-0ota-iX-%*P}&m6s&wJaVTWb%U=>C)2@NqS!A*q?}vNal(TAYD=SNq)HZX-M2 zk#EFGVSr={YIjdLu_Xq5FUt|Z- zK{t1HVR&HWs;9X?L)z1IV(Qf9f!sI4Q<-! zjhziQ(PvYe#nT?|os1~$?H1EuwmAoXyr$?xC`WZf;@Pu6!juWkc ziZ(SVHnH9ug0@HBPA8i}!2=I0&fBw9S@vvl?>h?%MQc$w$&dbq{SfN;(q?4a zzh`!1IXl$Hu$Lj}+3^s{bJ;2;L}cs)^JB+eT@j4OE?$4+HBZK^fQ!*yMPy=cWx|qf& z3x(pqzlcUS>ZdCx+k!{`pDjpGz^d+U!&}iPT~|c_zz+fddcW+%f6J4&3JLmgAvpb_ z0)(z5A*+qE6B3pO{dOGq|Iq@_-vbN~2qK|$N5y@=E(ldb2>%VG$3WHSMT*kDs8_qx z*iTBAF0Lf4x_dBwSeb=A2?BrwX#hC!dldjYiPM4Y3JvH1N=V^K_-?CDH-<|p;_O63 Rf68Qbb3YgWV&s2L{{hoAGr9l( From ed9f7288019be4803cdb1ee570ca21ad76af371a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 31 Dec 2020 13:20:31 -0800 Subject: [PATCH 0926/1009] [SPARK-33944][SQL] Incorrect logging for warehouse keys in SharedState options ### What changes were proposed in this pull request? While using SparkSession's initial options to generate the sharable Spark conf and Hadoop conf in ShardState, we shall put the log in the codeblock that the warehouse keys being handled. ### Why are the changes needed? bugfix, rm ambiguous log when setting spark.sql.warehouse.dir in SparkSession.builder.config, but only warn setting hive.metastore.warehouse.dir ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #30978 from yaooqinn/SPARK-33944. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../spark/sql/internal/SharedState.scala | 16 +++++++----- .../spark/sql/SparkSessionBuilderSuite.scala | 26 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 6018afb0dce46..cc21def3fb367 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -56,17 +56,15 @@ private[sql] class SharedState( private[sql] val (conf, hadoopConf) = { // Load hive-site.xml into hadoopConf and determine the warehouse path which will be set into // both spark conf and hadoop conf avoiding be affected by any SparkSession level options - SharedState.loadHiveConfFile( + val initialConfigsWithoutWarehouse = SharedState.loadHiveConfFile( sparkContext.conf, sparkContext.hadoopConfiguration, initialConfigs) + val confClone = sparkContext.conf.clone() val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration) // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing // `SharedState`, all `SparkSession` level configurations have higher priority to generate a // `SharedState` instance. This will be done only once then shared across `SparkSession`s - initialConfigs.foreach { - case (k, _) if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key => - logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " + - s"in SparkSession's options, it should be set statically for cross-session usages") + initialConfigsWithoutWarehouse.foreach { case (k, v) if SQLConf.staticConfKeys.contains(k) => logDebug(s"Applying static initial session options to SparkConf: $k -> $v") confClone.set(k, v) @@ -228,7 +226,8 @@ object SharedState extends Logging { def loadHiveConfFile( sparkConf: SparkConf, hadoopConf: Configuration, - initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = { + initialConfigs: scala.collection.Map[String, String] = Map.empty) + : scala.collection.Map[String, String] = { def containsInSparkConf(key: String): Boolean = { sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) || @@ -248,6 +247,10 @@ object SharedState extends Logging { } val sparkWarehouseOption = initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key)) + if (initialConfigs.contains(hiveWarehouseKey)) { + logWarning(s"Not allowing to set $hiveWarehouseKey in SparkSession's options, please use " + + s"${WAREHOUSE_PATH.key} to set statically for cross-session usages") + } // hive.metastore.warehouse.dir only stay in hadoopConf sparkConf.remove(hiveWarehouseKey) // Set the Hive metastore warehouse path to the one we use @@ -272,5 +275,6 @@ object SharedState extends Logging { sparkWarehouseDir } logInfo(s"Warehouse path is '$warehousePath'.") + initialConfigs -- Seq(WAREHOUSE_PATH.key, hiveWarehouseKey) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala index e53976854070d..1f16bb69b3a16 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala @@ -386,4 +386,30 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach { assert(spark2.conf.get(custom) === "c2") } + + test("SPARK-33944: warning setting hive.metastore.warehouse.dir using session options") { + val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options" + val logAppender = new LogAppender(msg) + withLogAppender(logAppender) { + SparkSession.builder() + .master("local") + .config("hive.metastore.warehouse.dir", "any") + .getOrCreate() + .sharedState + } + assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg))) + } + + test("SPARK-33944: no warning setting spark.sql.warehouse.dir using session options") { + val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options" + val logAppender = new LogAppender(msg) + withLogAppender(logAppender) { + SparkSession.builder() + .master("local") + .config("spark.sql.warehouse.dir", "any") + .getOrCreate() + .sharedState + } + assert(!logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg))) + } } From 45df6db906b39646f5b5f6b4a88addf1adcbe107 Mon Sep 17 00:00:00 2001 From: Baohe Zhang Date: Thu, 31 Dec 2020 13:34:55 -0800 Subject: [PATCH 0927/1009] [SPARK-33906][WEBUI] Fix the bug of UI Executor page stuck due to undefined peakMemoryMetrics ### What changes were proposed in this pull request? Check if the executorSummary.peakMemoryMetrics is defined before accessing it. Without checking, the UI has risked being stuck at the Executors page. ### Why are the changes needed? App live UI may stuck at Executors page without this fix. Steps to reproduce (with master branch): In mac OS standalone mode, open a spark-shell $SPARK_HOME/bin/spark-shell --master spark://localhost:7077 val x = sc.makeRDD(1 to 100000, 5) x.count() Then open the app UI in the browser, and click the Executors page, will get stuck at this page: ![image](https://user-images.githubusercontent.com/26694233/103105677-ca1a7380-45f4-11eb-9245-c69f4a4e816b.png) Also, the return JSON from API endpoint http://localhost:4040/api/v1/applications/app-20201224134418-0003/executors miss "peakMemoryMetrics" for executor objects. I attached the full json text in https://issues.apache.org/jira/browse/SPARK-33906. I debugged it and observed that ExecutorMetricsPoller .getExecutorUpdates returns an empty map, which causes peakExecutorMetrics to None in https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/LiveEntity.scala#L345. The possible reason for returning the empty map is that the stage completion time is shorter than the heartbeat interval, so the stage entry in stageTCMP has already been removed before the reportHeartbeat is called. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test, rerun the steps of bug reproduce and see the bug is gone. Closes #30920 from baohe-zhang/SPARK-33906. Authored-by: Baohe Zhang Signed-off-by: Dongjoon Hyun --- .../apache/spark/ui/static/executorspage.js | 76 ++++++++++++++----- 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index 1d3f628f5fab6..c8dc61991114a 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -414,38 +414,74 @@ $(document).ready(function () { }, { data: function (row, type) { - if (type !== 'display') - return row.peakMemoryMetrics.JVMHeapMemory; - else - return (formatBytes(row.peakMemoryMetrics.JVMHeapMemory, type) + ' / ' + - formatBytes(row.peakMemoryMetrics.JVMOffHeapMemory, type)); + var peakMemoryMetrics = row.peakMemoryMetrics; + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.JVMHeapMemory; + else + return (formatBytes(peakMemoryMetrics.JVMHeapMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.JVMOffHeapMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } } }, { data: function (row, type) { - if (type !== 'display') - return row.peakMemoryMetrics.OnHeapExecutionMemory; - else - return (formatBytes(row.peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' + - formatBytes(row.peakMemoryMetrics.OffHeapExecutionMemory, type)); + var peakMemoryMetrics = row.peakMemoryMetrics; + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.OnHeapExecutionMemory; + else + return (formatBytes(peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.OffHeapExecutionMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } } }, { data: function (row, type) { - if (type !== 'display') - return row.peakMemoryMetrics.OnHeapStorageMemory; - else - return (formatBytes(row.peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' + - formatBytes(row.peakMemoryMetrics.OffHeapStorageMemory, type)); + var peakMemoryMetrics = row.peakMemoryMetrics; + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.OnHeapStorageMemory; + else + return (formatBytes(peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.OffHeapStorageMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } } }, { data: function (row, type) { - if (type !== 'display') - return row.peakMemoryMetrics.DirectPoolMemory; - else - return (formatBytes(row.peakMemoryMetrics.DirectPoolMemory, type) + ' / ' + - formatBytes(row.peakMemoryMetrics.MappedPoolMemory, type)); + var peakMemoryMetrics = row.peakMemoryMetrics; + if (typeof peakMemoryMetrics !== 'undefined') { + if (type !== 'display') + return peakMemoryMetrics.DirectPoolMemory; + else + return (formatBytes(peakMemoryMetrics.DirectPoolMemory, type) + ' / ' + + formatBytes(peakMemoryMetrics.MappedPoolMemory, type)); + } else { + if (type !== 'display') { + return 0; + } else { + return '0.0 B / 0.0 B'; + } + } } }, {data: 'diskUsed', render: formatBytes}, From bd346f4a2d078dd36d2fcadf3d5025389b124814 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Fri, 1 Jan 2021 19:59:17 -0800 Subject: [PATCH 0928/1009] [SPARK-33957][BUILD] Update commons-lang3 to 3.11 ### What changes were proposed in this pull request? This PR aims to update commons-lang3 to 3.11 to support Java 16+ better. ### Why are the changes needed? commons-lang3 has the following bug fixes and Java 16 support. - https://commons.apache.org/proper/commons-lang/changes-report.html#a3.11 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Pass the CIs. Closes #30990 from williamhyun/Commons-lang3. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 199a0d1a31751..fc3b669e721ac 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -44,7 +44,7 @@ commons-digester/1.8//commons-digester-1.8.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.4//commons-io-2.4.jar commons-lang/2.6//commons-lang-2.6.jar -commons-lang3/3.10//commons-lang3-3.10.jar +commons-lang3/3.11//commons-lang3-3.11.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.4.1//commons-math3-3.4.1.jar commons-net/3.1//commons-net-3.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 42e1634b6e66c..0ff30ce0c0a2d 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -41,7 +41,7 @@ commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar commons-lang/2.6//commons-lang-2.6.jar -commons-lang3/3.10//commons-lang3-3.10.jar +commons-lang3/3.11//commons-lang3-3.11.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.4.1//commons-math3-3.4.1.jar commons-net/3.1//commons-net-3.1.jar diff --git a/pom.xml b/pom.xml index 39ce502ab0e3f..5ff84cf806649 100644 --- a/pom.xml +++ b/pom.xml @@ -178,7 +178,7 @@ 2.6 - 3.10 + 3.11 2.6.2 4.1.17 From 4cd680581a948fb4d7701842ac2cd9e12328089d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 2 Jan 2021 08:58:48 -0800 Subject: [PATCH 0929/1009] [SPARK-33956][SQL] Add rowCount for Range operator ### What changes were proposed in this pull request? This pr add rowCount for `Range` operator: ```scala spark.sql("set spark.sql.cbo.enabled=true") spark.sql("select id from range(100)").explain("cost") ``` Before this pr: ``` == Optimized Logical Plan == Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B) ``` After this pr: ``` == Optimized Logical Plan == Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B, rowCount=100) ``` ### Why are the changes needed? [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30989 from wangyum/SPARK-33956. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/plans/logical/basicLogicalOperators.scala | 2 +- .../catalyst/statsEstimation/BasicStatsEstimationSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 97bc0083276bc..ee7db7ae83542 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -603,7 +603,7 @@ case class Range( } override def computeStats(): Statistics = { - Statistics(sizeInBytes = LongType.defaultSize * numElements) + Statistics(sizeInBytes = LongType.defaultSize * numElements, rowCount = Some(numElements)) } override def outputOrdering: Seq[SortOrder] = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala index dfe790dca54d8..72e8b524cf339 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala @@ -44,7 +44,7 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase { test("range") { val range = Range(1, 5, 1, None) - val rangeStats = Statistics(sizeInBytes = 4 * 8) + val rangeStats = Statistics(sizeInBytes = 4 * 8, Some(4)) checkStats( range, expectedStatsCboOn = rangeStats, From 1c25bea0bbe8365523d2a3d6b06da03d67f25794 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 2 Jan 2021 14:49:03 -0800 Subject: [PATCH 0930/1009] [SPARK-33961][BUILD] Upgrade SBT to 1.4.6 ### What changes were proposed in this pull request? This PR aims to upgrade SBT to 1.4.6 to fix the SBT regression. ### Why are the changes needed? [SBT 1.4.6](https://github.com/sbt/sbt/releases/tag/v1.4.6) has the following fixes - Updates to Coursier 2.0.8, which fixes the cache directory setting on Windows - Fixes performance regression in shell tab completion - Fixes match error when using withDottyCompat - Fixes thread-safety in AnalysisCallback handler ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #30993 from dongjoon-hyun/SPARK-SBT-1.4.6. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index 35ee6fea6d336..e80b124bf3de6 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -sbt.version=1.4.5 +sbt.version=1.4.6 From 6c5ba8169ae64fdcefd8530c2b38326178f5fa92 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 3 Jan 2021 10:59:12 +0900 Subject: [PATCH 0931/1009] [SPARK-33959][SQL] Improve the statistics estimation of the Tail ### What changes were proposed in this pull request? This pr improve the statistics estimation of the `Tail`: ```scala spark.sql("set spark.sql.cbo.enabled=true") spark.range(100).selectExpr("id as a", "id as b", "id as c", "id as e").write.saveAsTable("t1") println(Tail(Literal(5), spark.sql("SELECT * FROM t1").queryExecution.logical).queryExecution.stringWithStats) ``` Before this pr: ``` == Optimized Logical Plan == Tail 5, Statistics(sizeInBytes=3.8 KiB) +- Relation[a#24L,b#25L,c#26L,e#27L] parquet, Statistics(sizeInBytes=3.8 KiB) ``` After this pr: ``` == Optimized Logical Plan == Tail 5, Statistics(sizeInBytes=200.0 B, rowCount=5) +- Relation[a#24L,b#25L,c#26L,e#27L] parquet, Statistics(sizeInBytes=3.8 KiB) ``` ### Why are the changes needed? Import statistics estimation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30991 from wangyum/SPARK-33959. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../sql/catalyst/plans/logical/LogicalPlanVisitor.scala | 3 +++ .../logical/statsEstimation/BasicStatsPlanVisitor.scala | 4 ++++ .../SizeInBytesOnlyStatsPlanVisitor.scala | 9 +++++++++ .../statsEstimation/BasicStatsEstimationSuite.scala | 6 ++++++ 4 files changed, 22 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala index 18baced8f3d61..9cf599167405b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala @@ -41,6 +41,7 @@ trait LogicalPlanVisitor[T] { case p: ScriptTransformation => visitScriptTransform(p) case p: Union => visitUnion(p) case p: Window => visitWindow(p) + case p: Tail => visitTail(p) case p: LogicalPlan => default(p) } @@ -81,4 +82,6 @@ trait LogicalPlanVisitor[T] { def visitUnion(p: Union): T def visitWindow(p: Window): T + + def visitTail(p: Tail): T } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala index b8c652dc8f12e..ec0c1001b1caa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala @@ -72,4 +72,8 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { override def visitUnion(p: Union): Statistics = fallback(p) override def visitWindow(p: Window): Statistics = fallback(p) + + override def visitTail(p: Tail): Statistics = { + fallback(p) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala index a586988fd3253..f02f4e2a90626 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala @@ -150,4 +150,13 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { } override def visitWindow(p: Window): Statistics = visitUnaryNode(p) + + override def visitTail(p: Tail): Statistics = { + val limit = p.limitExpr.eval().asInstanceOf[Int] + val childStats = p.child.stats + val rowCount: BigInt = childStats.rowCount.map(_.min(limit)).getOrElse(limit) + Statistics( + sizeInBytes = EstimationUtils.getOutputSize(p.output, rowCount, childStats.attributeStats), + rowCount = Some(rowCount)) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala index 72e8b524cf339..d682165e08e32 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala @@ -84,6 +84,12 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase { checkStats(globalLimit, stats) } + test("tail estimation") { + checkStats(Tail(Literal(1), plan), Statistics(sizeInBytes = 12, rowCount = Some(1))) + checkStats(Tail(Literal(20), plan), plan.stats.copy(attributeStats = AttributeMap(Nil))) + checkStats(Tail(Literal(0), plan), Statistics(sizeInBytes = 1, rowCount = Some(0))) + } + test("sample estimation") { val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 1000).toLong, plan) checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5))) From fc7d0165d29e04a8e78577c853a701bdd8a2af4a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 3 Jan 2021 11:23:46 +0900 Subject: [PATCH 0932/1009] [SPARK-33963][SQL] Canonicalize `HiveTableRelation` w/o table stats ### What changes were proposed in this pull request? Skip table stats in canonicalizing of `HiveTableRelation`. ### Why are the changes needed? The changes fix a regression comparing to Spark 3.0, see SPARK-33963. ### Does this PR introduce _any_ user-facing change? Yes. After changes Spark behaves as in the version 3.0.1. ### How was this patch tested? By running new UT: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite" ``` Closes #30995 from MaxGekk/fix-caching-hive-table. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../apache/spark/sql/catalyst/catalog/interface.scala | 3 ++- .../org/apache/spark/sql/hive/CachedTableSuite.scala | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 5cb237688f875..d25b1fe46d569 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -797,7 +797,8 @@ case class HiveTableRelation( }, partitionCols = partitionCols.zipWithIndex.map { case (attr, index) => attr.withExprId(ExprId(index + dataCols.length)) - } + }, + tableStats = None ) override def computeStats(): Statistics = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index 6cb98e92e36fa..ee93af7643b21 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -429,4 +429,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto } } } + + test("SPARK-33963: do not use table stats while looking in table cache") { + val t = "table_on_test" + withTable(t) { + sql(s"CREATE TABLE $t (col int)") + assert(!spark.catalog.isCached(t)) + sql(s"CACHE TABLE $t") + assert(spark.catalog.isCached(t)) + } + } } From cfd4a083987f985da4659333c718561c19e0cbfe Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Jan 2021 01:29:12 -0800 Subject: [PATCH 0933/1009] [SPARK-33962][SS] Fix incorrect min partition condition ### What changes were proposed in this pull request? This patch fixes an incorrect condition when comparing offset range size and min partition config. ### Why are the changes needed? When calculating offset ranges, we consider `minPartitions` configuration. If `minPartitions` is not set or is less than or equal the size of given ranges, it means there are enough partitions at Kafka so we don't need to split offsets to satisfy min partition requirement. But the current condition is `offsetRanges.size > minPartitions.get` and is not correct. Currently `getRanges` will split offsets in unnecessary case. Besides, in non-split case, we can assign preferred executor location and reuse `KafkaConsumer`. So unnecessary splitting offset range will miss the chance to reuse `KafkaConsumer`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Manual test in Spark cluster with Kafka. Closes #30994 from viirya/ss-minor4. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../sql/kafka010/KafkaOffsetRangeCalculator.scala | 2 +- .../kafka010/KafkaOffsetRangeCalculatorSuite.scala | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala index f7183f7add14b..1e9a62ecce025 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala @@ -46,7 +46,7 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int val offsetRanges = ranges.filter(_.size > 0) // If minPartitions not set or there are enough partitions to satisfy minPartitions - if (minPartitions.isEmpty || offsetRanges.size > minPartitions.get) { + if (minPartitions.isEmpty || offsetRanges.size >= minPartitions.get) { // Assign preferred executor locations to each range such that the same topic-partition is // preferentially read from the same executor and the KafkaConsumer can be reused. offsetRanges.map { range => diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala index 5d010cd553521..751b877df9c78 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala @@ -71,6 +71,20 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite { KafkaOffsetRange(tp3, 1, 2, None))) } + testWithMinPartitions("N TopicPartitions to N offset ranges with executors", 3) { calc => + assert( + calc.getRanges( + Seq( + KafkaOffsetRange(tp1, 1, 2), + KafkaOffsetRange(tp2, 1, 2), + KafkaOffsetRange(tp3, 1, 2)), + Seq("exec1", "exec2", "exec3")) === + Seq( + KafkaOffsetRange(tp1, 1, 2, Some("exec3")), + KafkaOffsetRange(tp2, 1, 2, Some("exec1")), + KafkaOffsetRange(tp3, 1, 2, Some("exec2")))) + } + testWithMinPartitions("1 TopicPartition to N offset ranges", 4) { calc => assert( calc.getRanges( From 963c60fe49a54c05cc1c50cb7abce864c5322bdf Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Jan 2021 01:31:38 -0800 Subject: [PATCH 0934/1009] [SPARK-33955][SS] Add latest offsets to source progress ### What changes were proposed in this pull request? This patch proposes to add latest offset to source progress for streaming queries. ### Why are the changes needed? Currently we record start and end offsets per source in streaming process. Latest offset is an important information for streaming process but the progress lacks of this info. We can use it to track the process lag and adjust streaming queries. We should add latest offset to source progress. ### Does this PR introduce _any_ user-facing change? Yes, for new metric about latest source offset in source progress. ### How was this patch tested? Unit test. Manually test in Spark cluster: ``` "description" : "KafkaV2[Subscribe[page_view_events]]", "startOffset" : { "page_view_events" : { "2" : 582370921, "4" : 391910836, "1" : 631009201, "3" : 406601346, "0" : 195799112 } }, "endOffset" : { "page_view_events" : { "2" : 583764414, "4" : 392338002, "1" : 632183480, "3" : 407101489, "0" : 197304028 } }, "latestOffset" : { "page_view_events" : { "2" : 589852545, "4" : 394204277, "1" : 637313869, "3" : 409286602, "0" : 203878962 } }, "numInputRows" : 4999997, "inputRowsPerSecond" : 29287.70501405811, ``` Closes #30988 from viirya/latest-offset. Authored-by: Liang-Chi Hsieh Signed-off-by: Dongjoon Hyun --- .../sql/kafka010/KafkaMicroBatchStream.scala | 8 +++++- .../spark/sql/kafka010/KafkaSource.scala | 10 ++++++++ project/MimaExcludes.scala | 5 +++- .../streaming/SupportsAdmissionControl.java | 8 ++++++ .../streaming/MicroBatchExecution.scala | 25 +++++++++++++------ .../streaming/ProgressReporter.scala | 13 ++++++++-- .../execution/streaming/StreamExecution.scala | 9 +++++++ .../continuous/ContinuousExecution.scala | 2 +- .../apache/spark/sql/streaming/progress.scala | 3 +++ ...StreamingQueryStatusAndProgressSuite.scala | 4 +++ 10 files changed, 75 insertions(+), 12 deletions(-) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala index d6fd3aeb7f670..1c816ab82d3ec 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala @@ -64,6 +64,8 @@ private[kafka010] class KafkaMicroBatchStream( private var endPartitionOffsets: KafkaSourceOffset = _ + private var latestPartitionOffsets: PartitionOffsetMap = _ + /** * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only * called in StreamExecutionThread. Otherwise, interrupting a thread while running @@ -77,6 +79,10 @@ private[kafka010] class KafkaMicroBatchStream( maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit) } + override def reportLatestOffset(): Offset = { + KafkaSourceOffset(latestPartitionOffsets) + } + override def latestOffset(): Offset = { throw new UnsupportedOperationException( "latestOffset(Offset, ReadLimit) should be called instead of this method") @@ -84,7 +90,7 @@ private[kafka010] class KafkaMicroBatchStream( override def latestOffset(start: Offset, readLimit: ReadLimit): Offset = { val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets - val latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets)) + latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets)) endPartitionOffsets = KafkaSourceOffset(readLimit match { case rows: ReadMaxRows => rateLimit(rows.maxRows(), startPartitionOffsets, latestPartitionOffsets) diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala index b4e5a8db7d344..1e17f9a7407a9 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala @@ -115,8 +115,13 @@ private[kafka010] class KafkaSource( maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit) } + // The offsets for each topic-partition currently read to process. Note this maybe not necessarily + // to be latest offsets because we possibly apply a read limit. private var currentPartitionOffsets: Option[Map[TopicPartition, Long]] = None + // The latest offsets for each topic-partition. + private var latestPartitionOffsets: Option[Map[TopicPartition, Long]] = None + private val converter = new KafkaRecordToRowConverter() override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders) @@ -127,6 +132,10 @@ private[kafka010] class KafkaSource( "latestOffset(Offset, ReadLimit) should be called instead of this method") } + override def reportLatestOffset(): streaming.Offset = { + latestPartitionOffsets.map(KafkaSourceOffset(_)).getOrElse(null) + } + override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = { // Make sure initialPartitionOffsets is initialized initialPartitionOffsets @@ -145,6 +154,7 @@ private[kafka010] class KafkaSource( } currentPartitionOffsets = Some(offsets) + latestPartitionOffsets = Some(latest) logDebug(s"GetOffset: ${offsets.toSeq.map(_.toString).sorted}") KafkaSourceOffset(offsets) } diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index ba879c03795d1..cc1b831b6f4d5 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -37,7 +37,10 @@ object MimaExcludes { // Exclude rules for 3.2.x lazy val v32excludes = v31excludes ++ Seq( // [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer - ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder") + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder"), + + // [SPARK-33955] Add latest offsets to source progress + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SourceProgress.this") ) // Exclude rules for 3.1.x diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java index 027763ce6fcdf..c808b9a3066b0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java @@ -53,4 +53,12 @@ public interface SupportsAdmissionControl extends SparkDataStream { * for the very first micro-batch. The source can return `null` if there is no data to process. */ Offset latestOffset(Offset startOffset, ReadLimit limit); + + /** + * Returns the most recent offset available. + * + * The source can return `null`, if there is no data to process or the source does not support + * to this method. + */ + default Offset reportLatestOffset() { return null; } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index c485d0f7d8b2d..a9cb345c4a06e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -212,7 +212,10 @@ class MicroBatchExecution( } // Record the trigger offset range for progress reporting *before* processing the batch - recordTriggerOffsets(from = committedOffsets, to = availableOffsets) + recordTriggerOffsets( + from = committedOffsets, + to = availableOffsets, + latest = latestOffsets) // Remember whether the current batch has data or not. This will be required later // for bookkeeping after running the batch, when `isNewDataAvailable` will have changed @@ -379,7 +382,7 @@ class MicroBatchExecution( if (isCurrentBatchConstructed) return true // Generate a map from each unique source to the next available offset. - val latestOffsets: Map[SparkDataStream, Option[OffsetV2]] = uniqueSources.map { + val (nextOffsets, recentOffsets) = uniqueSources.toSeq.map { case (s: SupportsAdmissionControl, limit) => updateStatusMessage(s"Getting offsets from $s") reportTimeTaken("latestOffset") { @@ -391,23 +394,31 @@ class MicroBatchExecution( startOffsetOpt.map(offset => v2.deserializeOffset(offset.json)) .getOrElse(v2.initialOffset()) } - (s, Option(s.latestOffset(startOffset, limit))) + val next = s.latestOffset(startOffset, limit) + val latest = s.reportLatestOffset() + ((s, Option(next)), (s, Option(latest))) } case (s: Source, _) => updateStatusMessage(s"Getting offsets from $s") reportTimeTaken("getOffset") { - (s, s.getOffset) + val offset = s.getOffset + ((s, offset), (s, offset)) } case (s: MicroBatchStream, _) => updateStatusMessage(s"Getting offsets from $s") reportTimeTaken("latestOffset") { - (s, Option(s.latestOffset())) + val latest = s.latestOffset() + ((s, Option(latest)), (s, Option(latest))) } case (s, _) => // for some reason, the compiler is unhappy and thinks the match is not exhaustive throw new IllegalStateException(s"Unexpected source: $s") - } - availableOffsets ++= latestOffsets.filter { case (_, o) => o.nonEmpty }.mapValues(_.get) + }.unzip + + availableOffsets ++= nextOffsets.filter { case (_, o) => o.nonEmpty } + .map(p => p._1 -> p._2.get).toMap + latestOffsets ++= recentOffsets.filter { case (_, o) => o.nonEmpty } + .map(p => p._1 -> p._2.get).toMap // Update the query metadata offsetSeqMetadata = offsetSeqMetadata.copy( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala index 57cb551bba17d..2ab473d737a23 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala @@ -71,6 +71,8 @@ trait ProgressReporter extends Logging { private var currentTriggerEndTimestamp = -1L private var currentTriggerStartOffsets: Map[SparkDataStream, String] = _ private var currentTriggerEndOffsets: Map[SparkDataStream, String] = _ + private var currentTriggerLatestOffsets: Map[SparkDataStream, String] = _ + // TODO: Restore this from the checkpoint when possible. private var lastTriggerStartTimestamp = -1L @@ -119,6 +121,7 @@ trait ProgressReporter extends Logging { currentTriggerStartTimestamp = triggerClock.getTimeMillis() currentTriggerStartOffsets = null currentTriggerEndOffsets = null + currentTriggerLatestOffsets = null currentDurationsMs.clear() } @@ -126,9 +129,13 @@ trait ProgressReporter extends Logging { * Record the offsets range this trigger will process. Call this before updating * `committedOffsets` in `StreamExecution` to make sure that the correct range is recorded. */ - protected def recordTriggerOffsets(from: StreamProgress, to: StreamProgress): Unit = { + protected def recordTriggerOffsets( + from: StreamProgress, + to: StreamProgress, + latest: StreamProgress): Unit = { currentTriggerStartOffsets = from.mapValues(_.json).toMap currentTriggerEndOffsets = to.mapValues(_.json).toMap + currentTriggerLatestOffsets = latest.mapValues(_.json).toMap } private def updateProgress(newProgress: StreamingQueryProgress): Unit = { @@ -151,7 +158,8 @@ trait ProgressReporter extends Logging { * though the sources don't have any new data. */ protected def finishTrigger(hasNewData: Boolean, hasExecuted: Boolean): Unit = { - assert(currentTriggerStartOffsets != null && currentTriggerEndOffsets != null) + assert(currentTriggerStartOffsets != null && currentTriggerEndOffsets != null && + currentTriggerLatestOffsets != null) currentTriggerEndTimestamp = triggerClock.getTimeMillis() val executionStats = extractExecutionStats(hasNewData, hasExecuted) @@ -171,6 +179,7 @@ trait ProgressReporter extends Logging { description = source.toString, startOffset = currentTriggerStartOffsets.get(source).orNull, endOffset = currentTriggerEndOffsets.get(source).orNull, + latestOffset = currentTriggerLatestOffsets.get(source).orNull, numInputRows = numRecords, inputRowsPerSecond = numRecords / inputTimeSec, processedRowsPerSecond = numRecords / processingTimeSec diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 6b0d33b819a20..c9f40fa22bf9e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -161,6 +161,15 @@ abstract class StreamExecution( @volatile var availableOffsets = new StreamProgress + /** + * Tracks the latest offsets for each input source. + * Only the scheduler thread should modify this field, and only in atomic steps. + * Other threads should make a shallow copy if they are going to access this field more than + * once, since the field's value may change at any time. + */ + @volatile + var latestOffsets = new StreamProgress + @volatile var sinkCommitProgress: Option[StreamWriterCommitProgress] = None diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 6eb28d4c66ded..ad041ceeba723 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -332,7 +332,7 @@ class ContinuousExecution( synchronized { // Record offsets before updating `committedOffsets` - recordTriggerOffsets(from = committedOffsets, to = availableOffsets) + recordTriggerOffsets(from = committedOffsets, to = availableOffsets, latest = latestOffsets) if (queryExecutionThread.isAlive) { commitLog.add(epoch, CommitMetadata()) val offset = diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala index 59dc5bc1f37df..1a8939e42a412 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala @@ -173,6 +173,7 @@ class StreamingQueryProgress private[sql]( * @param description Description of the source. * @param startOffset The starting offset for data being read. * @param endOffset The ending offset for data being read. + * @param latestOffset The latest offset from this source. * @param numInputRows The number of records read from this source. * @param inputRowsPerSecond The rate at which data is arriving from this source. * @param processedRowsPerSecond The rate at which data from this source is being processed by @@ -184,6 +185,7 @@ class SourceProgress protected[sql]( val description: String, val startOffset: String, val endOffset: String, + val latestOffset: String, val numInputRows: Long, val inputRowsPerSecond: Double, val processedRowsPerSecond: Double) extends Serializable { @@ -204,6 +206,7 @@ class SourceProgress protected[sql]( ("description" -> JString(description)) ~ ("startOffset" -> tryParse(startOffset)) ~ ("endOffset" -> tryParse(endOffset)) ~ + ("latestOffset" -> tryParse(latestOffset)) ~ ("numInputRows" -> JInt(numInputRows)) ~ ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~ ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala index ec61102804ea3..c0aefb8120808 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala @@ -75,6 +75,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually { | "description" : "source", | "startOffset" : 123, | "endOffset" : 456, + | "latestOffset" : 789, | "numInputRows" : 678, | "inputRowsPerSecond" : 10.0 | } ], @@ -121,6 +122,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually { | "description" : "source", | "startOffset" : 123, | "endOffset" : 456, + | "latestOffset" : 789, | "numInputRows" : 678 | } ], | "sink" : { @@ -333,6 +335,7 @@ object StreamingQueryStatusAndProgressSuite { description = "source", startOffset = "123", endOffset = "456", + latestOffset = "789", numInputRows = 678, inputRowsPerSecond = 10.0, processedRowsPerSecond = Double.PositiveInfinity // should not be present in the json @@ -361,6 +364,7 @@ object StreamingQueryStatusAndProgressSuite { description = "source", startOffset = "123", endOffset = "456", + latestOffset = "789", numInputRows = 678, inputRowsPerSecond = Double.NaN, // should not be present in the json processedRowsPerSecond = Double.NegativeInfinity // should not be present in the json From 6b7527e381591bcd51be205853aea3e349893139 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sun, 3 Jan 2021 11:52:46 -0600 Subject: [PATCH 0935/1009] [SPARK-33398] Fix loading tree models prior to Spark 3.0 ### What changes were proposed in this pull request? In https://github.com/apache/spark/pull/21632/files#diff-0fdae8a6782091746ed20ea43f77b639f9c6a5f072dd2f600fcf9a7b37db4f47, a new field `rawCount` was added into `NodeData`, which cause that a tree model trained in 2.4 can not be loaded in 3.0/3.1/master; field `rawCount` is only used in training, and not used in `transform`/`predict`/`featureImportance`. So I just set it to -1L. ### Why are the changes needed? to support load old tree model in 3.0/3.1/master ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? added testsuites Closes #30889 from zhengruifeng/fix_tree_load. Authored-by: Ruifeng Zheng Signed-off-by: Sean Owen --- .../org/apache/spark/ml/tree/treeModels.scala | 48 ++++++++++++------ .../ml-models/dtc-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-894c-ca4eac67c690-c000.snappy.parquet.crc | Bin 0 -> 36 bytes .../ml-models/dtc-2.4.7/data/_SUCCESS | 0 ...406c-894c-ca4eac67c690-c000.snappy.parquet | Bin 0 -> 3242 bytes .../dtc-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../dtc-2.4.7/metadata/.part-00000.crc | Bin 0 -> 16 bytes .../ml-models/dtc-2.4.7/metadata/_SUCCESS | 0 .../ml-models/dtc-2.4.7/metadata/part-00000 | 1 + .../ml-models/dtr-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-84af-d861adcb9ca8-c000.snappy.parquet.crc | Bin 0 -> 36 bytes .../ml-models/dtr-2.4.7/data/_SUCCESS | 0 ...4b3d-84af-d861adcb9ca8-c000.snappy.parquet | Bin 0 -> 3264 bytes .../dtr-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../dtr-2.4.7/metadata/.part-00000.crc | Bin 0 -> 12 bytes .../ml-models/dtr-2.4.7/metadata/_SUCCESS | 0 .../ml-models/dtr-2.4.7/metadata/part-00000 | 1 + .../ml-models/gbtc-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-91c0-6da8cc01fb43-c000.snappy.parquet.crc | Bin 0 -> 44 bytes .../ml-models/gbtc-2.4.7/data/_SUCCESS | 0 ...41c7-91c0-6da8cc01fb43-c000.snappy.parquet | Bin 0 -> 4542 bytes .../gbtc-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../gbtc-2.4.7/metadata/.part-00000.crc | Bin 0 -> 16 bytes .../ml-models/gbtc-2.4.7/metadata/_SUCCESS | 0 .../ml-models/gbtc-2.4.7/metadata/part-00000 | 1 + .../gbtc-2.4.7/treesMetadata/._SUCCESS.crc | Bin 0 -> 8 bytes ...-813c-ddc394101e21-c000.snappy.parquet.crc | Bin 0 -> 36 bytes .../gbtc-2.4.7/treesMetadata/_SUCCESS | 0 ...4a90-813c-ddc394101e21-c000.snappy.parquet | Bin 0 -> 3075 bytes .../ml-models/gbtr-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-9aab-639288bfae6d-c000.snappy.parquet.crc | Bin 0 -> 40 bytes .../ml-models/gbtr-2.4.7/data/_SUCCESS | 0 ...4511-9aab-639288bfae6d-c000.snappy.parquet | Bin 0 -> 3740 bytes .../gbtr-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../gbtr-2.4.7/metadata/.part-00000.crc | Bin 0 -> 16 bytes .../ml-models/gbtr-2.4.7/metadata/_SUCCESS | 0 .../ml-models/gbtr-2.4.7/metadata/part-00000 | 1 + .../gbtr-2.4.7/treesMetadata/._SUCCESS.crc | Bin 0 -> 8 bytes ...-ad9c-4be239c2215a-c000.snappy.parquet.crc | Bin 0 -> 32 bytes .../gbtr-2.4.7/treesMetadata/_SUCCESS | 0 ...4fd8-ad9c-4be239c2215a-c000.snappy.parquet | Bin 0 -> 3038 bytes .../ml-models/rfc-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-b112-25b4b11c9009-c000.snappy.parquet.crc | Bin 0 -> 40 bytes .../ml-models/rfc-2.4.7/data/_SUCCESS | 0 ...4485-b112-25b4b11c9009-c000.snappy.parquet | Bin 0 -> 3836 bytes .../rfc-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../rfc-2.4.7/metadata/.part-00000.crc | Bin 0 -> 16 bytes .../ml-models/rfc-2.4.7/metadata/_SUCCESS | 0 .../ml-models/rfc-2.4.7/metadata/part-00000 | 1 + .../rfc-2.4.7/treesMetadata/._SUCCESS.crc | Bin 0 -> 8 bytes ...-a823-70c7afdcbdc5-c000.snappy.parquet.crc | Bin 0 -> 36 bytes .../rfc-2.4.7/treesMetadata/_SUCCESS | 0 ...4c4e-a823-70c7afdcbdc5-c000.snappy.parquet | Bin 0 -> 3391 bytes .../ml-models/rfr-2.4.7/data/._SUCCESS.crc | Bin 0 -> 8 bytes ...-b681-981caaeca996-c000.snappy.parquet.crc | Bin 0 -> 40 bytes .../ml-models/rfr-2.4.7/data/_SUCCESS | 0 ...40fc-b681-981caaeca996-c000.snappy.parquet | Bin 0 -> 3797 bytes .../rfr-2.4.7/metadata/._SUCCESS.crc | Bin 0 -> 8 bytes .../rfr-2.4.7/metadata/.part-00000.crc | Bin 0 -> 16 bytes .../ml-models/rfr-2.4.7/metadata/_SUCCESS | 0 .../ml-models/rfr-2.4.7/metadata/part-00000 | 1 + .../rfr-2.4.7/treesMetadata/._SUCCESS.crc | Bin 0 -> 8 bytes ...-9b86-d95edaabcde8-c000.snappy.parquet.crc | Bin 0 -> 32 bytes .../rfr-2.4.7/treesMetadata/_SUCCESS | 0 ...447a-9b86-d95edaabcde8-c000.snappy.parquet | Bin 0 -> 3055 bytes .../DecisionTreeClassifierSuite.scala | 12 +++++ .../classification/GBTClassifierSuite.scala | 14 +++++ .../MultilayerPerceptronClassifierSuite.scala | 2 +- .../RandomForestClassifierSuite.scala | 16 +++++- .../spark/ml/feature/HashingTFSuite.scala | 2 +- .../spark/ml/feature/StringIndexerSuite.scala | 2 +- .../DecisionTreeRegressorSuite.scala | 16 +++++- .../ml/regression/GBTRegressorSuite.scala | 12 +++++ .../RandomForestRegressorSuite.scala | 12 +++++ 74 files changed, 122 insertions(+), 20 deletions(-) create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala index 162641f605264..67b9166a0f44d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala @@ -31,8 +31,10 @@ import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter} import org.apache.spark.ml.util.DefaultParamsReader.Metadata import org.apache.spark.mllib.tree.impurity.ImpurityCalculator import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel} -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.{col, lit, struct} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.VersionUtils import org.apache.spark.util.collection.OpenHashMap /** @@ -401,8 +403,13 @@ private[ml] object DecisionTreeModelReadWrite { } val dataPath = new Path(path, "data").toString - val data = sparkSession.read.parquet(dataPath).as[NodeData] - buildTreeFromNodes(data.collect(), impurityType) + var df = sparkSession.read.parquet(dataPath) + val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion) + if (major.toInt < 3) { + df = df.withColumn("rawCount", lit(-1L)) + } + + buildTreeFromNodes(df.as[NodeData].collect(), impurityType) } /** @@ -497,25 +504,36 @@ private[ml] object EnsembleModelReadWrite { } val treesMetadataPath = new Path(path, "treesMetadata").toString - val treesMetadataRDD: RDD[(Int, (Metadata, Double))] = sql.read.parquet(treesMetadataPath) - .select("treeID", "metadata", "weights").as[(Int, String, Double)].rdd.map { - case (treeID: Int, json: String, weights: Double) => + val treesMetadataRDD = sql.read.parquet(treesMetadataPath) + .select("treeID", "metadata", "weights") + .as[(Int, String, Double)].rdd + .map { case (treeID: Int, json: String, weights: Double) => treeID -> ((DefaultParamsReader.parseMetadata(json, treeClassName), weights)) - } + } val treesMetadataWeights = treesMetadataRDD.sortByKey().values.collect() val treesMetadata = treesMetadataWeights.map(_._1) val treesWeights = treesMetadataWeights.map(_._2) val dataPath = new Path(path, "data").toString - val nodeData: Dataset[EnsembleNodeData] = - sql.read.parquet(dataPath).as[EnsembleNodeData] - val rootNodesRDD: RDD[(Int, Node)] = - nodeData.rdd.map(d => (d.treeID, d.nodeData)).groupByKey().map { - case (treeID: Int, nodeData: Iterable[NodeData]) => - treeID -> DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType) + var df = sql.read.parquet(dataPath) + val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion) + if (major.toInt < 3) { + val newNodeDataCol = df.schema("nodeData").dataType match { + case StructType(fields) => + val cols = fields.map(f => col(s"nodeData.${f.name}")) :+ lit(-1L).as("rawCount") + struct(cols: _*) + } + df = df.withColumn("nodeData", newNodeDataCol) + } + + val rootNodesRDD = df.as[EnsembleNodeData].rdd + .map(d => (d.treeID, d.nodeData)) + .groupByKey() + .map { case (treeID: Int, nodeData: Iterable[NodeData]) => + treeID -> DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType) } - val rootNodes: Array[Node] = rootNodesRDD.sortByKey().values.collect() + val rootNodes = rootNodesRDD.sortByKey().values.collect() (metadata, treesMetadata.zip(rootNodes), treesWeights) } diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..3ac562a8b2b843f0df27c8239a8aa4e4e7102065 GIT binary patch literal 36 scmYc;N@ieSU}9jt&;0(&Q}%+r>Mc4nfM zQYa$gLG~a$_OOSth!h0zuy}C6lLrrrcwQF|9y}G#ivHi5?=p#@Z~yufHk!S z9(bLs2cNKvu!v>KAOs9juyV4lhTj;|5HIY@NnB2ZgH^mx2N)nw-}&v2>7Tl{ep!F_ z*S~(Szq^+vl5jpv6z$9s4SIn{$n&`)9+)=qOLZD6LS;ymllEWS;Kn7iU!Ra9$ZBi~ z+~KRf0l7VkjS4QnK@p3^kZ8ZYk)>n@GG6fDO|>sKb*XlF!y@&Fnr4SIns&Q`_6u&((^)Yre^HGnSAAXR~`(XmLZQD}5B zXnw=d=xTzNSfkLshTyCOt$@{_&mtRHx|*OR795Z-vr(O9-9>m6uhN5bB;#&XTT!>} zzOkW*D7OA{#AuPiex#tqg-tz5kCvZr1yVrMZe+Mn{ z>SScR<(s|-A&F>FPzA^Ke9Q;?$B9GQq~iycux;7^L@6d5()6#jYzLUAExY!1D|7(# znAdY`9{|~6zDZ2K?~<207Wt4eYwWb?lkKi+6Yq8RM$08$tLs?jL_w)0JN@>x_$*Rn zt*9NBq#_h#Rze!h9`lNMr<}x*&K_2Jt!$7gk}i#e$Y|c zk7U>w!2zcna;1IRSz6cRrX<#|U=JtQh_D7H)0#p&oQP+I-Z`pXDZLpI;(eJSE((&U zm4k`TOge+U%PC)QrTa2KCvrof6Khzok1w&xB2l{@AdkK>r@JdF-*E6#dVDGBZbqR- z=vMkF988|hD|_cyVxMu!0arShgv1hsqKPHeki?YVClwj|s@iq84YOy0a2ZU)jQ5G_ zfl#SqSB(W@8L3wWc6hfP1LW>@pZJE`{lI7w-?U8MR6ifz)%~3w(N}fPcl%quUeTL2 zaV$??eS25$m~FabTi_12*&Prc;)0{ocO0|fz=o#jc%-67sQT*e-d-g>ok(edVs-oU z^l%Zgy>u0Ks6pc*n zsf0LisyIv&TsTZ(2M!eoG;w4*O+8E?5E47UW$GVL^$ddNef^mxX*&T`Qu%q__x=5z zulMJ6?WH$oFv0~~EMgHJ1T17_jzb8^3y&DkgR;dwm>&_c15?}D8Ds2dh2~i~6Buu~n(IJFJeucZ&N3Yrv)=v@!!nvi%k?a7Q!@dG zP^2uQ?p|(~7BCT8w9U4S%ZSzPtg&f4UVQYhPa;! zXNBB3qFyn*84%)uND${afmaIN#P>`(g}%c`UorUu5uhWvA<&UEDA?UJtt{e|RS$Xi zl{wyBQM$*#C%>Bxy4#~rLv$;C6&_@c=hb`XXkxpJbeqXP$UtI|LXpHGYd~Vs-l!yk zUzXeUrmA%`5H6K!nEIY!J0MhY@`5_2E+YAS-wx-drGnhu>>93Wx8GNrhO6nCtI4-U zwsY=Q$H*<`9M|q{xVd7kZW@;Eu}|qt&%6t!BZ7x@I{> zF&CibmbZ6yis9)!%IP`VNv`_3!EnO?%3}+*32^Oi7)YD%860r^A%CRMzWD y^>TUkLT$mQFD%T?>ZOI!LfM!vZIo+^<;7CXsOdB3mAx!NCw$Oo3ZXUl-{l{OEe~}7 literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..bbad108766e144fcfa28032a8cab16b15aa8f5b3 GIT binary patch literal 12 TcmYc;N@ieSU}7lORyYL!5a$BN literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 new file mode 100644 index 0000000000000..2895223cffde6 --- /dev/null +++ b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.regression.DecisionTreeRegressionModel","timestamp":1608687932847,"sparkVersion":"2.4.7","uid":"dtr_c16a90fcdaf8","paramMap":{},"defaultParamMap":{"labelCol":"label","checkpointInterval":10,"minInfoGain":0.0,"maxMemoryInMB":256,"minInstancesPerNode":1,"maxBins":32,"seed":926680331,"cacheNodeIds":false,"maxDepth":5,"predictionCol":"prediction","featuresCol":"features","impurity":"variance"},"numFeatures":692} diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..13fc4ed696effdbf3b5bba54547271c8fca20e04 GIT binary patch literal 44 zcmV+{0Mq|ta$^7h00IE8y8 zv6h252(ieKHv$D^CiH150aJGq%yUF5!((Gj(~WpGUZ(@mgW%;9hykPoga$E!l!7b( zDFZQq+y=4`#0&zSxR}a}CkDk6tkpcxMxb8WhM}AURbt!SAewhBiLF_;x8@%<1ilUH zn6o+Gb`#>KYmYB~`s!(6$aUh%D|@Gsh$7JRB03=a>DSksXV{~HFY&!=H*J@aZ+b>n zF@xjD5fhC@L5_eN1xb0qeZgcSg*S|xDEvG#O*oqUE$|x$3)i&3=&JTOquL)0H{^z;ysbc~%5d)K}H(xmO z%z5E{YDe&+(dUINXYT0x`X7Hhw(KVj@HsTLfg;GKea3sqVIQ^7yy4Zc7xohb`O`5Y zM@}b=bj6XeLkE>>41#bl&b;K)IlP%c6lx)eC9RoakR@ima{cBVD^I+4&q*A?fPon& zTbT|V!2t9Hxbv7um5|5@X}#Y>lg%lLBL|v|W;9?j(~vTXJBM1ut#5dD?fbfee;J+8}|444<1|l z!(S!FOLpz5T^O!E`3-YnA$o@Sa_HI!CeJJ8 zw;<>MOvV;Ifcu?r{|?C8AWy)!9i$Ky&(vbpMzAQ0s#7+61CxsxG3N;80JN+VtmSpA zJCy5KbVFddEEUesg3-Vlg+(%XnegtLINN>K+XNyA`tVwjD4~%%WTZ*)5hK0GjM63> z4aO)3iCCkJ6dyIx4zx@JGr*Mr5sFSTWvz@37o9A_frCqM{^c>8Qcf*EJSQ^(6SP(F zMTeY>&T?~cOIfVfFMp>!))kVv{a};Ptw5Y1E=$q_O{hl4(b1lO)Z~x*8FS{DE*yY^ z6(*JT%2FWQ6%Y4BGYUG~-J6iZ@vV5BDL43L+#ipDy9QK4a%3bNi!-{+{1Pc5bxYBB zW<+lChrtkrH%6pjyfGAx01_UT%Hd6+tO86^vEE2H4hEHnWkJawPsq{((SY<6Mz$!c z{c&kij~td_kI3sovJ?yTL;_#n1%@(36WxvKTEsdnR?C>3wnDO2khCS28AiuC+2;s$ zKVheRw|KFz;Fk{)2aIkuy$8|6TY-%qBcw=fKRK(^5 zy_G4QZ8NVvitP(}c35w}p!cbSlXqZiY@2_$7sHw~6v0wUuOJ=Pvx9p3qydl$PgRUm zs4a%{i4h~MAPtrlHBmF$N@=HD8v*nn!5*jC110uPjB1qmr-v40zP1?ljzvYHR9aMt za9YP+*V%V00+d3+#wdl_VkkGti}Zrl{R-3s!G22EZpw03& zl4ARB1bdUPPcUk`VXDBk^O(3uyQP<^wrkbOWOn6@-I(WpdMRLNOp7tJQ8E9|Ta@A> zgQaa2#N7LPb~`xPvpROa&VJsK>xLQ3JO^I`Fy5}pX1#p-oRQ9C3MJGbf_{3|SwONv5K(8EC>8f_EM%>DR8L_^I3kbX~ zA;n#C&lXp=6!!;!t-06fem=gnSK{mWSX@qY#d#+m3`>zfjIV#ZpO5;x@uLjS!=vof za1>}$l0gwoL?Zsq2uuq4BQeRz=PdK}{oA)YRSUV}LA5a-*l<9d&VPAG?cC9<1bPzK z+4=jOJD&arCX^FSwWG%Gmu3G}P-joM^_gO*j&}7#<8b1|9)?6`le|6OeJI-8S;uFk zipZ-r=UX)OeAR4x`7&sS@_%DRIm=Z8YVvN;v-#(}y6`_}qd8v_RHLdl|Ibp+c;Qf? z=N-?JU(_`7n1Hx%WI5!XZrPS49h=AHal1S=M`r?F#BBcBx!ccYS6%1oy4CJZsWaeNX&Hn6z})(S`r$8xHSnLCe*+~Bn1}!X literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..a810dd9107fe7a9e566a46e6250dde025f3242d1 GIT binary patch literal 16 XcmYc;N@ieSU}9K*@!2Z#Q*ti>DzFAq literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 new file mode 100644 index 0000000000000..675fea29ba9e2 --- /dev/null +++ b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.classification.GBTClassificationModel","timestamp":1608687932103,"sparkVersion":"2.4.7","uid":"gbtc_81db008b4f25","paramMap":{"maxIter":2},"defaultParamMap":{"seed":-1287390502,"maxMemoryInMB":256,"stepSize":0.1,"validationTol":0.01,"maxBins":32,"checkpointInterval":10,"predictionCol":"prediction","lossType":"logistic","rawPredictionCol":"rawPrediction","featuresCol":"features","cacheNodeIds":false,"maxIter":20,"featureSubsetStrategy":"all","impurity":"gini","minInstancesPerNode":1,"minInfoGain":0.0,"maxDepth":5,"subsamplingRate":1.0,"labelCol":"label","probabilityCol":"probability"},"numFeatures":692,"numTrees":2} diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..101c2071193880d091fe121057591aa15c9e1ba4 GIT binary patch literal 36 scmYc;N@ieSU}9JsB^xi8eS~r2^3y!Yc`qA2)kv%{(JuB5_hRG(0NL9NVgLXD literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e232de340042335ad16e22327618ad8714d80f2b GIT binary patch literal 3075 zcmeHJ&5zqe6dz}k;4Mp4q}sJbKw;F4b|qu%xN+iDq{4P32tDm)x@4;P5o7S zoOZR!2?=rHzyXOpaPDEn9mIcv)WhBo9C|=pkPy#KvJOfL+nzYAWhI`O_kM5Yea&z0 z-F;m^!WH4FB9!5S7xRdPVq}pD0#A(L@`MSB|jl zktD%eIE;rXiHLnb)g&VEebx8Wm=0r_ByJd}JJfcwj}K!?_s^zxLx*};!OZn(!iXQ? zE#1^QW~Xf#P2JEHoMU}V<4hN_smJz;NgT!0acu^0w?YpviZN&>lmf19n5JpmfITR9KnY9X5EJ3q2t<81=mmrD zJ>tT?G*yEc@@U%)z!yeS!LA>rvC9s@!idB!32X{V;2ryZ6uJTH1&qcc0z1$_)z18O z2b|G!Kz~5Igywj=)DPpsUa-53x0)?8pWC4kJHT75=@!6kfSv5oIMW9>1$(4Vy~&32 zPdpw=|A$Z+8f{yI*zLB}I!~yc%UcVFezt!gPz#QoMRp<1iz#uAI4@0(B5*~{E#%;M z48``O`t()_{#RBWL+!nIja9CFd5#L>Zu3xJT=)0O+(m_DReTnx)z7aN<;A_TF;`uc zCzY3ZD6GFuICwWtl=V!k~_iuBRU)4VlQu=Rl(lqCqpK$umtJ|+$-oIKx{ujS+ zYgwvnzfrP&MA1j-YN!>t0qD?m>%#T9X3axZ!1MH`#2Q3v)TjH?N7m35`+r z;(EEbUY?H$%va#W^|GAe!1Xd;g{NXoxHPHcS&ipKc+834ag9^4D=ld)&uBPvxZ?5a zVj0PbBrUMQ(S*uLm7K{TX>&m$dElwQ;vS@wsd4yFoi!urlsm%gFanN)6Beg7gCGxF z>SdJa(<2-ZKUDcEz?>Tl{{aCT z$SB%!glW4h0Gu$*4FA_2?v9c4!Sw8J`Eh;fth%o1nyS|~`l;(Vb<(k%L1$nAMedt) l094g*G*dGz+R|*R4^&v|)4ro`%3YxElNbG2L1+)?|KH>&tQi0R literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..c35b81f9cb25f9937724e672820a28927b2d5819 GIT binary patch literal 40 wcmYc;N@ieSU}A848!YST&7zunY2MT`+>_*IG-_^M(ACJ<-{3LthfZ!Q01OBZNdN!< literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ba26a44f46f11305e83139315761785711bd35ed GIT binary patch literal 3740 zcmcInO>7%Q6rNpgoQ>nOQPo-2V6~Q&SgGV%&VL-GO;FP+MHU3;feNa!&W@86{_E}< zh(e^CIdDKINI6BJ2UPU{2t^#K_KJD{!38czsLFw3gnFt#@ZPR>b{%ihlpiaxciz1B zeeZAH*o!Z|HcTbjPfr)&Ln1^@5_k#lPYI3ESeC|NL#A?4l~AgJ4j~E|3amg7k%dSK zEN18oTA(Rfpt5h_uRwfeOhmg`cys8wP7J}5gASj-IY;Maq5ImO1;W!*b$xZ1Wp77qJvlyyNI@?);CdoP_IiOgPsfVjKJcnIPWAq82s%*^5WRp?+ygN|DZE?Pt&8{e*g_4cS?cKPqq`hN|UW*$Fta8=+j^D z>fn%YN0o37o6lbzjft^F4#mV>AJbnQjrkQgs{5Dju6myK0z1yLZ1WL#3EiecGQ zboIMgg;iP2c30A}W&jY<_f%NfzO-UgfC<+z(^y_<2>`mwYE%px0G-WvnQ3;@WLIlC zdmot-jlO3bipi{%dPRSV7f97tYgR9DTcX4h$;7>koP=ZzN^-)>3>B5Q z@}Z!7EM(-FBgSVU3k8!fV4Qnc+02%JVB<20CWd%&A*l}Jrl4#JpaPg~&tl#2RQM~B zNCFtdj$mJiNg7LxOL7n_P#6dFH$mAKKy`*6;#u!0I8F8gQWE0$wiLp}Ywj@pEGWMU znQaM}I_Dz}j!tP9(?%4T2Bm0XfoG__pPWAO`Uj{7lwDET5;Gf79%OJhA~j}`kf1b- zdJ9V3kx-{K0|9P9%|m7&vV=^=kW@bDaeAP=Y<|}}RNI2`xsZ9%1FhhXL~I3r6&Aow zZwPeY00ppjdU>1Sj6iI+&2SZl{V>k4r+8u9)7W+j9gZDAxg%sA#(~2hmB`_*!Z;rH zIkkt=@x}p)!_W1>Zip+pg7S@!dEDo9L)(-+WcCKn)!mbaM%q3hoKQb35GtlF zbo6$1f$~Ye5{DG1YTaDUX$=iVaL$>=xp$dq!B|zhMsj1hNur+X(6DY*axl1WHJP0= z>+j{N%+_?6(&`;~Ep4whSb92b*=BRePG{3)gH?1ZJ^kidx~5fe%boAA)lxNTFfN%1 zf?Bgu(MlCiDr*&sWz!yVdU|brJ9&{gyc6iqDj^vDKj(Oao@q5+B zc=j^*m+`+raqe&qz>{}G&j#;#Zuk${=+LhR9F%+Wf0nZCiw37Yc04zp)-=aZAnHr4 z8FIdArdmTMHJlsH=Y~^5r6zmT}C-M^oHlAN9lqL(4 Z`4TJX!{-wB;P>Uxf5eIrvIxIR{{~z=dsP4c literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..7dc6e149db71e18a5578bb119d6cccf8ff106d34 GIT binary patch literal 16 XcmYc;N@ieSU}AWubJ20OmhChEB{c;9 literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 new file mode 100644 index 0000000000000..a9a712e626510 --- /dev/null +++ b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.regression.GBTRegressionModel","timestamp":1608687942434,"sparkVersion":"2.4.7","uid":"gbtr_0a74cb2536ff","paramMap":{"maxIter":2},"defaultParamMap":{"impurity":"variance","maxMemoryInMB":256,"maxDepth":5,"subsamplingRate":1.0,"validationTol":0.01,"labelCol":"label","maxIter":20,"checkpointInterval":10,"minInfoGain":0.0,"predictionCol":"prediction","stepSize":0.1,"cacheNodeIds":false,"lossType":"squared","seed":-131597770,"featureSubsetStrategy":"all","featuresCol":"features","minInstancesPerNode":1,"maxBins":32},"numFeatures":692,"numTrees":2} diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..b681b9f615bf5fd53072363c0a42bb2759ce32e3 GIT binary patch literal 32 ocmYc;N@ieSU}D(PYL`-J`#$%RNMP*72{FP6dkmCyJ~ayf0I@y`tN;K2 literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9a7e77acf546461ce1f1efb2814d5164828a9af4 GIT binary patch literal 3038 zcmeHJ&5zqe6ragvQ@2{s7GY!4AVr3aQpvUKd^pJ}P-s^`mIkF2>IGG0Vvn&-2(?sJ?uY##0??wKOk}7!gY^GNEL7Lz1=KZA%QroWhLV`Z{Fwp-h1tL z?wBGH*Tt7LF%1hr6tZ#}BZTBM{2&Px7QTQWWI35L0QV0sh(BK#AI!i(3l*NM?5Va- z;#l2Odr?;>ePZuWJ?@j}fgbpJM7t4DM=zf({Xw9fnr(zP=0M#UD5fHRD z)KrJMB=K2$PE-T(*%s}y9d)y!sXpmY|7OnzS)(Ngu(=B#f?Z7q+;WK@Qy}h8`$50w zg{&De8Vv}XFb$0hYtf(=?KZ>K4Ry0vv4G}<%@CXkZ5p>}#6>t`=NMakK<*@{9PV+zeC~uPEzjRm1F)Q-OpN{QBD`Q1tW=!+Y(puyB#_?_V5Z<&SLv zzuU&?fV}zYTWRb*=F598-Mx*G|Fn?WE~Lz(i@y}6NmqwMD5yE{jfFZaK*HfUx-JU` zkNNZcb*RTnsKB(|81Vx0lvPYAx87eHRm=M^l&gS<86>7Qkep#qa+_O7T-!*=LO>aD z2qPO%(l5d8g-6uu?ywk1X(g*P@{`Bl-LZC?0VGaBiP4#YDfaUH7;bZHDQHujCN@RgfQ`;NBh7u z8Q7+h`4~9VW)f!PnG8_ph23K!zyKKeW~|30Ov>fl2&b2D#b?=YZ;0gWiSO5fC^rdK z&eTmqH*lahiSnrXFu!02kN~uK)l5 literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d9ec35a4de580d57b00c6375223f941923fe74b2 GIT binary patch literal 3836 zcmcInU5Fc16uvV%$I>J2$(7lVm$H zv9@Jl1s{A+L0EjTfxsVaO4goqge4+n!1D$;n7reHy&Vn!BFDuWIo5-9{`AQ0kQ zvER;~k5W)36qJc4h!OMdbjt=q&Hd$emxd2O(0>iye2kxX5)5c;;xeN@M6xA^Z3T zb4PyvWow2cxK~!5`s4@w&#jr!!TTRdzuld=_3TeqQ@6H|yW20()$Hc(l>)c#=j>>H z!1i;MqZg`>A26T-(d5L{=PO)4n0_uH9^*(gktF$qNOB4B_*qbZXZ4v17;%b@7ij?; zh|*}LLk(NIjd1jXQjnxy$2|4lycxql6#Dx5#T#+q+!wz_Fz$17t~VYLPTkq~^2Wpa zpMC!0%>0v=Kl=LPjeR2G#?duqX9Mazl^qWd#Y4zWCl|2>$F z``(7{i!cf>dht=qx89%m_@8$Y+q)HuEw&7endglQO9nHS zTDp3G6-bq6wi~Cty+lf-GwJobn1Nso3S!!g4CSSi^d2Xzad~lipT3+fLcRoa7>_PS zK7XAW^p7jFNT~>BExn%^@1?dhieG1f3 z*U7_l+z=->A>%y(wJX$h@EtzC38@chntQa;5SHy&NGbTmZ&Q86N#Ah!?HE*q-#d48 z(h%sicnEah9NE~nIq6$2zZUnZ4LgNjZ8!_d%e{n$J?SOJIh0+8F~D&z0UUn!L=Jxz z#<6zP&OM2yU)n{n_wn9c4Jm1plfDB7kGj>+)phsi#pw*3RcI}ER=1OTdfMG6Y*(Fr zu$=@~F?68~xHnbO&&Q+`gg};C#$riXR-hA??513LlNly-S2=R3G+LS<@`)Y|^O9bI z{(Y&&i0AP326>XfIf~Vy><+T{Ux4FJH?wl?E<3Z6207 zs#+7eCNn_LZ0ovG(?O}O=q4-XT;$x;wUw2khv4209>!s4HPutkJvya-?f58atBuAU z-fs7H`VS!N3(mvw{8tP^xdP(OC3nF|hG*zPt7*Z7XTAV|?gV)yXg(xu@2KNd=`rR! z%)=(lyIy^aM^A%)C;uB1`zrSUyx{Fqv%%}$+xQR4XwSC<57e{f|4e1q`-Vq-Xn!sq zR5XWBAo6pa9&)l_WIIhKJ6swbDh+1~wKlwmWtGXYTA!?!%Oj_1CKUzH literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 new file mode 100644 index 0000000000000..07748b070ee84 --- /dev/null +++ b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1608687930713,"sparkVersion":"2.4.7","uid":"rfc_db1adb353f1e","paramMap":{"numTrees":2},"defaultParamMap":{"impurity":"gini","predictionCol":"prediction","numTrees":20,"maxDepth":5,"featureSubsetStrategy":"auto","subsamplingRate":1.0,"featuresCol":"features","checkpointInterval":10,"rawPredictionCol":"rawPrediction","cacheNodeIds":false,"labelCol":"label","seed":207336481,"probabilityCol":"probability","maxBins":32,"minInstancesPerNode":1,"minInfoGain":0.0,"maxMemoryInMB":256},"numFeatures":692,"numClasses":2,"numTrees":2} diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..729c5bb30d7fe171323f7f76f91dc0f90530d162 GIT binary patch literal 36 scmYc;N@ieSU}7*02|4&HvPGAZZL(bE?HEzD3k&Z*`aEN*!Pl-b0Nk+-?*IS* literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..610882198c79067d9362d7d976664b3389ff8033 GIT binary patch literal 3391 zcmeHK&1)M+6rWwMlvU!UPLmb8L?!l88wZp{T3IW}2+}wXt%4myFr|=&Fxs7!C)p2W zcO}Od-P@jeZo%{vOi!hU-un;q++)e9he8a#m%dqjN2;A%N)9%{ntAi)y*Kmb{eDmN zI}h^`k`|94oLm2D?gxqD@hD+PNMI1G&M${!? zgx!|9o;Px3qg<*Ka-}sRtHw6JAOYJC-gIqEE5U{~wP0pN<`ZHTv1ylxUaXj~03l#^ z8-t;PZdSF3joVIC9|=_#zt|?OAMDq>?G1HZFM<;Y-h9^bsTb9}hy)$%KpeR&)7_vR zoU)*U<$ze!WCyzI!}{19+`81OdA9!a_-aPUuRbN`=9eAVR4vp(uqc zC8Q>H%Wr_r2maP9a9HYamp3PofhAXol!eu?f=)$OE-#fn8KFih^YIlC3iY28?&S%G zjBii4)iGo|JmEHWxNE(q{_rnuAufn}H!9Vch!CxLqF2|n@B%Omb-z&-e$zi;yQ zim4wGm%LY;;Nr)>>EH0FJNi|`oB)Ram26481nkkH3WSbt162r8Oce0N7)FHG1AoW2 zMeB z8g#zT+a%Hge@`1Ar2L9IP^0}8w4Q1h1#L58BtnV9de27(s)t>+)I(QT>Z?K{OqwJB zf!B7RWpO~M->D8Pv;Ml99a_+^_mkVAOPrfhpLkn`WQY6!t&e9@mVoE?jreq z|LnKiV6lH$i+L@d)AEa}jW%_xMO>~}cG<2}3VOpJwqXhRO)U#^e?8vpN#dL*Fc=P#=mf2A_%wdd^Jd&SW#&Zv7b& zl5_64-*^7*xykGc$3~dQWVWLQA0i=AmcT>d%?L}fR0-BZCNU|iil|k=h7g4mBN8YQ z5>4C%!y+rySPFr5)X)&O2)GV!+zFrxd%_1zdZ4HcK|=}DTZ*_{^uu!>T)2I2vX4jtD0n)PYWJHLMT{1VBE;1r zKs?1JYLEkPE|+_9^^dRTZr|{UKOB;ZVE?WB&L@}hH;{A(QzOwyfpsrwzZO7JJN;;d z#OG_k`4MHW*VuL>oq;@3Y2}qEIw+BqqZsULS`s^x11h8BU|J<}5c{zy0D0UUaTPA4&aM;9Zvtm1S!X3V1eL3R>zfc={49qSLB~YuKZq)QkBU z#F1k=yG4~JFl1~|+=qMmanT~g4JA;o^EHZ#y}t$yBknKo*^i4BA#NyzxPsRZgw@y# zb{3vWUec9OUeX7XN~-9$bB`8)3S$_P>>a?w&($UwpyGqcfx#je!U82jV}R$l=V0SA~PZL=k}RlXIZn~c+FW@G8!6SGGP}{ldf$T zho{M?pi0e_ZcJ;g#$WXZJX{X+?Ni7mt4(tz}+Mn z6;;tP9hV8d{h(nP4WsG$lDMdu0EF~ImQi;PESVNCp`5bK#igzSpa+~~%X9(ITO6(% zn%lOG!%f|I3z;+CZp}3oTefL9NA2TFw&5(bEd3E)AXU2AZXDoyi6UpRnJWb;3(49g zN|_)tOi*OyJ*u3e1!>|T12Yf^tzPtR;Dn(W!~lm{3BZ+BoXFQ1QNm zdL0@G@Gg`-W-1~}%&d$_<@HodD}>GOQLR$tJS|*LK}X=h4Nr9_#RbsW5(6DMM;`Wh zs$8Ij&KBNnI4QzzLn#h>HO;ZdcwziQ*>e~p9II*I2!|$egi;*G&8=STNiy^DI*N^t z_xf&-m1|V_juvii4Z5Lc;BGL6Gg&yRFj^qlg>&+Tk@gM>FVy#`@-c-y5GtlFELAAQ zgsOb8O_4#bsx5o5tSxIWiOb$lF28Bm4op{dU{Cqk@;;*O?z3=CSY?>sC)$Qvwp%C5 z4a3!R7}n~S(&@Z=YT3x|&pWQ&UU2iJeBCrG-O2BN^>n_eHE_uv^RU!4HJdOs4I32A zwqxB*a7PzfO&2aa=LjUao8&W5_n~MTXC1Ff&Y0(zn=Kl@UPFw> zFGF}I{~HwVD(3(^c@OE?=ylIG{)0B!@K*u{<=*_CrR@2=!KpVL&)lP$W)lWPeX%=3 zo^IH=?$F7Nlt(J%kz8@U4Zp*3+Ss^WAFGd#@7*(BHR{#s-o1LITB(j3qm_m6`F-R2 aD)Yv?KC(M=1^$>m{6*eQ5i$#ZX#WP9+>Les literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..1a72b8e0298442276506431fafc8c925497ded55 GIT binary patch literal 16 XcmYc;N@ieSU}6aL`H*sfwdOPcBd!HV literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 new file mode 100644 index 0000000000000..cccbb8f9f3b05 --- /dev/null +++ b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 @@ -0,0 +1 @@ +{"class":"org.apache.spark.ml.regression.RandomForestRegressionModel","timestamp":1608687933536,"sparkVersion":"2.4.7","uid":"rfr_d946d96b7ff0","paramMap":{"numTrees":2},"defaultParamMap":{"numTrees":20,"featureSubsetStrategy":"auto","maxDepth":5,"minInstancesPerNode":1,"labelCol":"label","cacheNodeIds":false,"checkpointInterval":10,"featuresCol":"features","maxMemoryInMB":256,"predictionCol":"prediction","minInfoGain":0.0,"subsamplingRate":1.0,"impurity":"variance","seed":235498149,"maxBins":32},"numFeatures":692,"numTrees":2} diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..8081f8842670e94dabc89246bcee00c2bd49a151 GIT binary patch literal 32 ocmYc;N@ieSU}88t<>t$D>o-DEAC8n2q~DiVZs+v0Kj|=7XSbN literal 0 HcmV?d00001 diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..093c3468db30122ef0c7b69316fa611f342d48a9 GIT binary patch literal 3055 zcmeHJ&1>UE6dx&CuM)CL3YAu3T}X%8WI+{@9m}yX4b6sj9h?%vQfNyFV`=PYS07$| zcsB$cdMTw83WfHt=N@wKp~q5s=&exbztF?}1^Ej~pJY4Ahj-Hy%3flOJ@aPf{pP(l z@Aux$`@6b;geBokMc`n-3w%-H7=)0-!4DGA%BdL$_+lzfH^A;+ZwY_g93L#eK?AKE z%^hOXAz_G{*bn+D84z=ys^NeHk5$)E1KJO07}~z4Zc@`u2M+^EcTdFIzC|6Z;K+7q z7!h}Xo4TQ`8Ef@MwOTXk3eI$WOoQ|+pj6aVwGIbj+k%-D1$zcvU8N0b8P!z-766*K z+XNIHCJMG_kHk*2lM69i(0n}bZ7*tj5e|2hl!m)&P}K(63hw1JuG@ zvgHli14FA71@|b4VzBJK4~I`?fbNhkb;ihp5qJU66l|vMOJ{8hkb1-k>11=y|B%?=08NFf4gzY~W&|D?L*;Y8v)w@)*wF!8 zY)Aq-#lT4-{okPit1b=`&B~q z(s%KSM|bXVBnc$jUxnlQEd0;S9YOB>_Z+)%=c`LHEDame;UU48*75f<6YRA|jrr}J zJZSThyz~H``T0oQBAr^CsDCoLzOl}1{my*QWPTYjoNK?=WItJ$|3Y5ku(JCN%OwdgFc5J^ zUnAREC4*XO?g-p7Udd`3UUsYY5C!<_@huBwytZ)bq&eO{dEn2az7=g3%$iv zml06Xit16foBTaJJV!qXI6F_F=O% zOjt8lVEJ(heAyj7Pmr`VvHe{)Se`g*Syy#U)t6VgvF%vPWUXQK)_RRbwbC_c58BbH jr5T#hpf$~GbnA_})}>ubzbCh#RUdy5TR> OldLabeledPoint} import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => OldRandomForest} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} -import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ @@ -429,6 +429,20 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest { testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings, allParamSettings, checkModelData) } + + test("SPARK-33398: Load RandomForestClassificationModel prior to Spark 3.0") { + val path = testFile("ml-models/rfc-2.4.7") + val model = RandomForestClassificationModel.load(path) + assert(model.numClasses === 2) + assert(model.numFeatures === 692) + assert(model.getNumTrees === 2) + assert(model.totalNumNodes === 10) + assert(model.trees.map(_.numNodes) === Array(3, 7)) + + val metadata = spark.read.json(s"$path/metadata") + val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) + assert(sparkVersionStr === "2.4.7") + } } private object RandomForestClassifierSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala index 8fd192fa56500..861bf1e0b1292 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala @@ -99,7 +99,7 @@ class HashingTFSuite extends MLTest with DefaultReadWriteTest { val metadata = spark.read.json(s"$hashingTFPath/metadata") val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) - assert(sparkVersionStr == "2.4.4") + assert(sparkVersionStr === "2.4.4") intercept[IllegalArgumentException] { loadedHashingTF.save(hashingTFPath) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala index 948140897d8cc..c8247b9c8f3bf 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala @@ -483,6 +483,6 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest { val metadata = spark.read.json(s"$modelPath/metadata") val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) - assert(sparkVersionStr == "2.4.4") + assert(sparkVersionStr === "2.4.4") } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala index 49ebcb385640e..9cb0345400bc4 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite import org.apache.spark.ml.feature.LabeledPoint -import org.apache.spark.ml.linalg.Vector +import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.tree.impl.TreeTests import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ @@ -236,6 +236,20 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest { TreeTests.allParamSettings ++ Map("maxDepth" -> 0), TreeTests.allParamSettings ++ Map("maxDepth" -> 0), checkModelData) } + + test("SPARK-33398: Load DecisionTreeRegressionModel prior to Spark 3.0") { + val path = testFile("ml-models/dtr-2.4.7") + val model = DecisionTreeRegressionModel.load(path) + assert(model.numFeatures === 692) + assert(model.numNodes === 5) + assert(model.featureImportances ~== + Vectors.sparse(692, Array(100, 434), + Array(0.03987240829346093, 0.960127591706539)) absTol 1e-4) + + val metadata = spark.read.json(s"$path/metadata") + val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) + assert(sparkVersionStr === "2.4.7") + } } private[ml] object DecisionTreeRegressorSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index 04b0d4b8470f3..7d84df6326397 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -370,6 +370,18 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest { testEstimatorAndModelReadWrite(gbt, continuousData, allParamSettings, allParamSettings, checkModelData) } + + test("SPARK-33398: Load GBTRegressionModel prior to Spark 3.0") { + val path = testFile("ml-models/gbtr-2.4.7") + val model = GBTRegressionModel.load(path) + assert(model.numFeatures === 692) + assert(model.totalNumNodes === 6) + assert(model.trees.map(_.numNodes) === Array(5, 1)) + + val metadata = spark.read.json(s"$path/metadata") + val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) + assert(sparkVersionStr === "2.4.7") + } } private object GBTRegressorSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala index aeddb5ac7b13e..7ec30de301779 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala @@ -221,6 +221,18 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{ testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings, allParamSettings, checkModelData) } + + test("SPARK-33398: Load RandomForestRegressionModel prior to Spark 3.0") { + val path = testFile("ml-models/rfr-2.4.7") + val model = RandomForestRegressionModel.load(path) + assert(model.numFeatures === 692) + assert(model.totalNumNodes === 8) + assert(model.trees.map(_.numNodes) === Array(5, 3)) + + val metadata = spark.read.json(s"$path/metadata") + val sparkVersionStr = metadata.select("sparkVersion").first().getString(0) + assert(sparkVersionStr === "2.4.7") + } } private object RandomForestRegressorSuite extends SparkFunSuite { From 67195d0d977caa5a458e8a609c434205f9b54d1b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Jan 2021 04:11:39 +0000 Subject: [PATCH 0936/1009] [SPARK-33950][SQL] Refresh cache in v1 `ALTER TABLE .. DROP PARTITION` ### What changes were proposed in this pull request? Invoke `refreshTable()` from `AlterTableDropPartitionCommand.run()` after partitions dropping. In particular, this invalidates the cache associated with the modified table. ### Why are the changes needed? This fixes the issues portrayed by the example: ```sql spark-sql> CREATE TABLE tbl1 (col0 int, part0 int) USING parquet PARTITIONED BY (part0); spark-sql> INSERT INTO tbl1 PARTITION (part0=0) SELECT 0; spark-sql> INSERT INTO tbl1 PARTITION (part0=1) SELECT 1; spark-sql> CACHE TABLE tbl1; spark-sql> SELECT * FROM tbl1; 0 0 1 1 spark-sql> ALTER TABLE tbl1 DROP PARTITION (part0=0); spark-sql> SELECT * FROM tbl1; 0 0 1 1 ``` The last query must not return `0 0` since it was deleted by previous command. ### Does this PR introduce _any_ user-facing change? Yes. After the changes for the example above: ```sql ... spark-sql> ALTER TABLE tbl1 DROP PARTITION (part0=0); spark-sql> SELECT * FROM tbl1; 1 1 ``` ### How was this patch tested? By running the affected test suite: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #30983 from MaxGekk/drop-partition-refresh-cache. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../spark/sql/execution/command/ddl.scala | 1 + .../v1/AlterTableDropPartitionSuite.scala | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 601594bc6b677..5e3a67927e75a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -581,6 +581,7 @@ case class AlterTableDropPartitionCommand( table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge, retainData = retainData) + sparkSession.catalog.refreshTable(table.identifier.quotedString) CommandUtils.updateTableStats(sparkSession, table) Seq.empty[Row] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index a6490ebdb950c..2f2c62427d5ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.execution.command /** @@ -42,6 +42,21 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu checkPartitions(t) // no partitions } } + + test("SPARK-33950: refresh cache after partition dropping") { + withTable("t") { + sql(s"CREATE TABLE t (id int, part int) $defaultUsing PARTITIONED BY (part)") + sql("INSERT INTO t PARTITION (part=0) SELECT 0") + sql("INSERT INTO t PARTITION (part=1) SELECT 1") + assert(!spark.catalog.isCached("t")) + sql("CACHE TABLE t") + assert(spark.catalog.isCached("t")) + checkAnswer(sql("SELECT * FROM t"), Seq(Row(0, 0), Row(1, 1))) + sql("ALTER TABLE t DROP PARTITION (part=0)") + assert(spark.catalog.isCached("t")) + checkAnswer(sql("SELECT * FROM t"), Seq(Row(1, 1))) + } + } } /** From b037930952a341f4ed956a8f1839852992feaadc Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 4 Jan 2021 05:44:00 +0000 Subject: [PATCH 0937/1009] [SPARK-33951][SQL] Distinguish the error between filter and distinct ### What changes were proposed in this pull request? The error messages for specifying filter and distinct for the aggregate function are mixed together and should be separated. This can increase readability and ease of use. ### Why are the changes needed? increase readability and ease of use. ### Does this PR introduce _any_ user-facing change? 'Yes'. ### How was this patch tested? Jenkins test Closes #30982 from beliefer/SPARK-33951. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 9 +--- .../sql/catalyst/analysis/Analyzer.scala | 45 +++++++++++-------- .../analysis/higherOrderFunctions.scala | 3 +- .../analysis/AnalysisErrorSuite.scala | 8 ++-- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index e4a1f3f8efeee..f4c91327a9e11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -263,13 +263,8 @@ object QueryCompilationErrors { s"its class is $classCanonicalName, which is not a generator.") } - def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): Throwable = { - new AnalysisException("DISTINCT or FILTER specified, " + - s"but $prettyName is not an aggregate function") - } - - def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = { - new AnalysisException(s"Function $prettyName does not support IGNORE NULLS") + def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = { + new AnalysisException(s"Function $prettyName does not support $syntax") } def nonDeterministicFilterInAggregateError(): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5e86368f6f4b3..fdd1cd0146c24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2120,24 +2120,30 @@ class Analyzer(override val catalogManager: CatalogManager) // the context of a Window clause. They do not need to be wrapped in an // AggregateExpression. case wf: AggregateWindowFunction => - if (isDistinct || filter.isDefined) { - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - wf.prettyName) + if (isDistinct) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "DISTINCT") + } else if (filter.isDefined) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "FILTER clause") } else if (ignoreNulls) { wf match { case nthValue: NthValue => nthValue.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - wf.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + wf.prettyName, "IGNORE NULLS") } } else { wf } case owf: FrameLessOffsetWindowFunction => - if (isDistinct || filter.isDefined) { - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - owf.prettyName) + if (isDistinct) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "DISTINCT") + } else if (filter.isDefined) { + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "FILTER clause") } else if (ignoreNulls) { owf match { case lead: Lead => @@ -2145,8 +2151,8 @@ class Analyzer(override val catalogManager: CatalogManager) case lag: Lag => lag.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - owf.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + owf.prettyName, "IGNORE NULLS") } } else { owf @@ -2161,20 +2167,23 @@ class Analyzer(override val catalogManager: CatalogManager) case first: First => first.copy(ignoreNulls = ignoreNulls) case last: Last => last.copy(ignoreNulls = ignoreNulls) case _ => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - agg.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + agg.prettyName, "IGNORE NULLS") } AggregateExpression(aggFunc, Complete, isDistinct, filter) } else { AggregateExpression(agg, Complete, isDistinct, filter) } // This function is not an aggregate function, just return the resolved one. - case other if (isDistinct || filter.isDefined) => - throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError( - other.prettyName) - case other if (ignoreNulls) => - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError( - other.prettyName) + case other if isDistinct => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "DISTINCT") + case other if filter.isDefined => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "FILTER clause") + case other if ignoreNulls => + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + other.prettyName, "IGNORE NULLS") case e: String2TrimExpression if arguments.size == 2 => if (trimWarningEnabled.get) { log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 6115b4ed5a117..7d74c0d1cd14f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -41,7 +41,8 @@ case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[Log filter.foreach(_.failAnalysis("FILTER predicate specified, " + s"but ${func.prettyName} is not an aggregate function")) if (ignoreNulls) { - throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName) + throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( + func.prettyName, "IGNORE NULLS") } func case other => other.failAnalysis( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index ec2a8a41bf38c..01d223d18b32b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -184,22 +184,22 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "distinct function", CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"), - "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil) + "Function hex does not support DISTINCT" :: Nil) errorTest( "non aggregate function with filter predicate", CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM TaBlE2"), - "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil) + "Function hex does not support FILTER clause" :: Nil) errorTest( "distinct window function", CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM TaBlE"), - "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil) + "Function percent_rank does not support DISTINCT" :: Nil) errorTest( "window function with filter predicate", CatalystSqlParser.parsePlan("SELECT percent_rank(a) FILTER (WHERE c > 1) OVER () FROM TaBlE2"), - "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil) + "Function percent_rank does not support FILTER clause" :: Nil) errorTest( "higher order function with filter predicate", From 2a68ed71e4402c2864202aa78a54d9921c257990 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 4 Jan 2021 05:53:14 +0000 Subject: [PATCH 0938/1009] [SPARK-33954][SQL] Some operator missing rowCount when enable CBO ### What changes were proposed in this pull request? This pr fix some operator missing rowCount when enable CBO, e.g.: ```scala spark.range(1000).selectExpr("id as a", "id as b").write.saveAsTable("t1") spark.sql("ANALYZE TABLE t1 COMPUTE STATISTICS FOR ALL COLUMNS") spark.sql("set spark.sql.cbo.enabled=true") spark.sql("set spark.sql.cbo.planStats.enabled=true") spark.sql("select * from (select * from t1 distribute by a limit 100) distribute by b").explain("cost") ``` Before this pr: ``` == Optimized Logical Plan == RepartitionByExpression [b#2129L], Statistics(sizeInBytes=2.3 KiB) +- GlobalLimit 100, Statistics(sizeInBytes=2.3 KiB, rowCount=100) +- LocalLimit 100, Statistics(sizeInBytes=23.4 KiB) +- RepartitionByExpression [a#2128L], Statistics(sizeInBytes=23.4 KiB) +- Relation[a#2128L,b#2129L] parquet, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3) ``` After this pr: ``` == Optimized Logical Plan == RepartitionByExpression [b#2129L], Statistics(sizeInBytes=2.3 KiB, rowCount=100) +- GlobalLimit 100, Statistics(sizeInBytes=2.3 KiB, rowCount=100) +- LocalLimit 100, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3) +- RepartitionByExpression [a#2128L], Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3) +- Relation[a#2128L,b#2129L] parquet, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3) ``` ### Why are the changes needed? [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #30987 from wangyum/SPARK-33954. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan --- .../BasicStatsPlanVisitor.scala | 24 +++++++++++++------ .../BasicStatsEstimationSuite.scala | 7 ++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala index ec0c1001b1caa..34baf5b90e54e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala @@ -27,13 +27,23 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { /** Falls back to the estimation computed by [[SizeInBytesOnlyStatsPlanVisitor]]. */ private def fallback(p: LogicalPlan): Statistics = SizeInBytesOnlyStatsPlanVisitor.visit(p) - override def default(p: LogicalPlan): Statistics = fallback(p) + override def default(p: LogicalPlan): Statistics = p match { + case p: LeafNode => p.computeStats() + case _: LogicalPlan => + val stats = p.children.map(_.stats) + val rowCount = if (stats.exists(_.rowCount.isEmpty)) { + None + } else { + Some(stats.map(_.rowCount.get).filter(_ > 0L).product) + } + Statistics(sizeInBytes = stats.map(_.sizeInBytes).filter(_ > 0L).product, rowCount = rowCount) + } override def visitAggregate(p: Aggregate): Statistics = { AggregateEstimation.estimate(p).getOrElse(fallback(p)) } - override def visitDistinct(p: Distinct): Statistics = fallback(p) + override def visitDistinct(p: Distinct): Statistics = default(p) override def visitExcept(p: Except): Statistics = fallback(p) @@ -43,7 +53,7 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { FilterEstimation(p).estimate.getOrElse(fallback(p)) } - override def visitGenerate(p: Generate): Statistics = fallback(p) + override def visitGenerate(p: Generate): Statistics = default(p) override def visitGlobalLimit(p: GlobalLimit): Statistics = fallback(p) @@ -55,19 +65,19 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { override def visitLocalLimit(p: LocalLimit): Statistics = fallback(p) - override def visitPivot(p: Pivot): Statistics = fallback(p) + override def visitPivot(p: Pivot): Statistics = default(p) override def visitProject(p: Project): Statistics = { ProjectEstimation.estimate(p).getOrElse(fallback(p)) } - override def visitRepartition(p: Repartition): Statistics = fallback(p) + override def visitRepartition(p: Repartition): Statistics = default(p) - override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = fallback(p) + override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = default(p) override def visitSample(p: Sample): Statistics = fallback(p) - override def visitScriptTransform(p: ScriptTransformation): Statistics = fallback(p) + override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p) override def visitUnion(p: Union): Statistics = fallback(p) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala index d682165e08e32..91f8fc406a43d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala @@ -134,6 +134,13 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase { expectedStatsCboOff = Statistics.DUMMY) } + test("SPARK-33954: Some operator missing rowCount when enable CBO") { + checkStats( + plan.repartition(10), + expectedStatsCboOn = Statistics(sizeInBytes = 120, rowCount = Some(10)), + expectedStatsCboOff = Statistics(sizeInBytes = 120)) + } + /** Check estimated stats when cbo is turned on/off. */ private def checkStats( plan: LogicalPlan, From adac633f93f05442f57456f7dcc0d59822d14c2b Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 4 Jan 2021 15:46:49 +0900 Subject: [PATCH 0939/1009] [SPARK-33934][SQL] Add SparkFile's root dir to env property PATH ### What changes were proposed in this pull request? In hive we always use ``` add file /path/to/script.py; select transform(col1, col2, ..) using 'script.py' as (col1, col2, ...) from ... ``` Since in spark we wrapper script command with `/bash/bin -c`, in this case we will throw `script.py command not found`. This pr add a SparkFile's root dir path to execution env property `PATH`, then sub-processor will find `scrip.py` as program under `PATH`. ### Why are the changes needed? Support SQL migration form Hive to Spark. ### Does this PR introduce _any_ user-facing change? User can direct use script file name as program in script transform SQL. ``` add file /path/to/script.py; select transform(col1, col2, ..) using 'script.py' as (col1, col2, ...) from ... ``` ### How was this patch tested? UT Closes #30973 from AngersZhuuuu/SPARK-33934. Authored-by: angerszhu Signed-off-by: HyukjinKwon --- .../BaseScriptTransformationExec.scala | 8 +- sql/core/src/test/resources/test_script.py | 2 + .../BaseScriptTransformationSuite.scala | 113 ++++++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index 1c87c48ae7cb3..b66f94ae1107a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution -import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream} +import java.io._ import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit @@ -26,7 +26,7 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration -import org.apache.spark.{SparkException, TaskContext} +import org.apache.spark.{SparkException, SparkFiles, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} @@ -72,6 +72,10 @@ trait BaseScriptTransformationExec extends UnaryExecNode { protected def initProc: (OutputStream, Process, InputStream, CircularBuffer) = { val cmd = List("/bin/bash", "-c", script) val builder = new ProcessBuilder(cmd.asJava) + .directory(new File(SparkFiles.getRootDirectory())) + val path = System.getenv("PATH") + File.pathSeparator + + SparkFiles.getRootDirectory() + builder.environment().put("PATH", path) val proc = builder.start() val inputStream = proc.getInputStream diff --git a/sql/core/src/test/resources/test_script.py b/sql/core/src/test/resources/test_script.py index 82ef7b38f0c1b..75b4f106d3a1a 100644 --- a/sql/core/src/test/resources/test_script.py +++ b/sql/core/src/test/resources/test_script.py @@ -1,3 +1,5 @@ +#! /usr/bin/python + # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index cf9ee1ef6db72..a25e4b8f8ea07 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -470,6 +470,119 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU Row("3\u00014\u00015") :: Nil) } } + + test("SPARK-33934: Add SparkFile's root dir to env property PATH") { + assume(TestUtils.testCommandAvailable("python")) + val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile + withTempView("v") { + val df = Seq( + (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), + (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), + (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + // test 'python /path/to/script.py' with local file + checkAnswer( + sql( + s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING 'python $scriptFilePath' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + |FROM v + """.stripMargin), identity, df.select( + 'a.cast("string"), + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).collect()) + + // test '/path/to/script.py' with script not executable + val e1 = intercept[TestFailedException] { + checkAnswer( + sql( + s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING '$scriptFilePath' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + |FROM v + """.stripMargin), identity, df.select( + 'a.cast("string"), + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).collect()) + }.getMessage + assert(e1.contains("Permission denied")) + + // test `/path/to/script.py' with script executable + scriptFilePath.setExecutable(true) + checkAnswer( + sql( + s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING '$scriptFilePath' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + |FROM v + """.stripMargin), identity, df.select( + 'a.cast("string"), + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).collect()) + + scriptFilePath.setExecutable(false) + sql(s"ADD FILE ${scriptFilePath.getAbsolutePath}") + + // test `script.py` when file added + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING '${scriptFilePath.getName}' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + |FROM v + """.stripMargin), identity, df.select( + 'a.cast("string"), + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).collect()) + + // test `python script.py` when file added + checkAnswer( + sql( + s""" + |SELECT TRANSFORM(a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + | USING 'python ${scriptFilePath.getName}' AS (a, b, c, d, e) + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY '\t' + |FROM v + """.stripMargin), identity, df.select( + 'a.cast("string"), + 'b.cast("string"), + 'c.cast("string"), + 'd.cast("string"), + 'e.cast("string")).collect()) + } + } } case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode { From 0b647fe69cf201b4dcbc0f4dfc0eb504a523571d Mon Sep 17 00:00:00 2001 From: Hoa Date: Mon, 4 Jan 2021 06:53:12 +0000 Subject: [PATCH 0940/1009] [SPARK-33888][SQL] JDBC SQL TIME type represents incorrectly as TimestampType, it should be physical Int in millis ### What changes were proposed in this pull request? JDBC SQL TIME type represents incorrectly as TimestampType, we change it to be physical Int in millis for now. ### Why are the changes needed? Currently, for JDBC, SQL TIME type represents incorrectly as Spark TimestampType. This should be represent as physical int in millis Represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. It stores the number of milliseconds after midnight, 00:00:00.000. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Close #30902 Closes #30902 from saikocat/SPARK-33888. Lead-authored-by: Hoa Co-authored-by: Hoa Co-authored-by: Duc Hoa, Nguyen Co-authored-by: Duc Hoa, Nguyen Signed-off-by: Wenchen Fan --- .../datasources/jdbc/JdbcUtils.scala | 36 +++++++++++++++++-- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 26 +++++++++++++- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index f997e57b23206..85a05f42c77fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.jdbc import java.sql.{Connection, Driver, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException} import java.util.Locale +import java.util.concurrent.TimeUnit import scala.util.Try import scala.util.control.NonFatal @@ -226,7 +227,7 @@ object JdbcUtils extends Logging { case java.sql.Types.SMALLINT => IntegerType case java.sql.Types.SQLXML => StringType case java.sql.Types.STRUCT => StringType - case java.sql.Types.TIME => TimestampType + case java.sql.Types.TIME => IntegerType case java.sql.Types.TIME_WITH_TIMEZONE => null case java.sql.Types.TIMESTAMP => TimestampType @@ -303,11 +304,23 @@ object JdbcUtils extends Logging { } else { rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls } - val metadata = new MetadataBuilder().putLong("scale", fieldScale) + val metadata = new MetadataBuilder() + // SPARK-33888 + // - include scale in metadata for only DECIMAL & NUMERIC + // - include TIME type metadata + // - always build the metadata + dataType match { + // scalastyle:off + case java.sql.Types.NUMERIC => metadata.putLong("scale", fieldScale) + case java.sql.Types.DECIMAL => metadata.putLong("scale", fieldScale) + case java.sql.Types.TIME => metadata.putBoolean("logical_time_type", true) + case _ => + // scalastyle:on + } val columnType = dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse( getCatalystType(dataType, fieldSize, fieldScale, isSigned)) - fields(i) = StructField(columnName, columnType, nullable) + fields(i) = StructField(columnName, columnType, nullable, metadata.build()) i = i + 1 } new StructType(fields) @@ -408,6 +421,23 @@ object JdbcUtils extends Logging { (rs: ResultSet, row: InternalRow, pos: Int) => row.setFloat(pos, rs.getFloat(pos + 1)) + + // SPARK-33888 - sql TIME type represents as physical int in millis + // Represents a time of day, with no reference to a particular calendar, + // time zone or date, with a precision of one millisecond. + // It stores the number of milliseconds after midnight, 00:00:00.000. + case IntegerType if metadata.contains("logical_time_type") => + (rs: ResultSet, row: InternalRow, pos: Int) => { + val rawTime = rs.getTime(pos + 1) + if (rawTime != null) { + val rawTimeInNano = rawTime.toLocalTime().toNanoOfDay() + val timeInMillis = Math.toIntExact(TimeUnit.NANOSECONDS.toMillis(rawTimeInNano)) + row.setInt(pos, timeInMillis) + } else { + row.update(pos, null) + } + } + case IntegerType => (rs: ResultSet, row: InternalRow, pos: Int) => row.setInt(pos, rs.getInt(pos + 1)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index ede5fe538a028..639fd0e6fd0f4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.jdbc import java.math.BigDecimal import java.sql.{Date, DriverManager, SQLException, Timestamp} import java.util.{Calendar, GregorianCalendar, Properties} +import java.util.concurrent.TimeUnit import scala.collection.JavaConverters._ @@ -610,7 +611,13 @@ class JDBCSuite extends QueryTest test("H2 time types") { val rows = sql("SELECT * FROM timetypes").collect() val cal = new GregorianCalendar(java.util.Locale.ROOT) - cal.setTime(rows(0).getAs[java.sql.Timestamp](0)) + val epochMillis = java.time.LocalTime.ofNanoOfDay( + TimeUnit.MILLISECONDS.toNanos(rows(0).getAs[Int](0))) + .atDate(java.time.LocalDate.ofEpochDay(0)) + .atZone(java.time.ZoneId.systemDefault()) + .toInstant() + .toEpochMilli() + cal.setTime(new Date(epochMillis)) assert(cal.get(Calendar.HOUR_OF_DAY) === 12) assert(cal.get(Calendar.MINUTE) === 34) assert(cal.get(Calendar.SECOND) === 56) @@ -625,9 +632,26 @@ class JDBCSuite extends QueryTest assert(cal.get(Calendar.HOUR) === 11) assert(cal.get(Calendar.MINUTE) === 22) assert(cal.get(Calendar.SECOND) === 33) + assert(cal.get(Calendar.MILLISECOND) === 543) assert(rows(0).getAs[java.sql.Timestamp](2).getNanos === 543543000) } + test("SPARK-33888: test TIME types") { + val rows = spark.read.jdbc( + urlWithUserAndPass, "TEST.TIMETYPES", new Properties()).collect() + val cachedRows = spark.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties()) + .cache().collect() + val expectedTimeRaw = java.sql.Time.valueOf("12:34:56") + val expectedTimeMillis = Math.toIntExact( + java.util.concurrent.TimeUnit.NANOSECONDS.toMillis( + expectedTimeRaw.toLocalTime().toNanoOfDay() + ) + ) + assert(rows(0).getAs[Int](0) === expectedTimeMillis) + assert(rows(1).getAs[Int](0) === expectedTimeMillis) + assert(cachedRows(0).getAs[Int](0) === expectedTimeMillis) + } + test("test DATE types") { val rows = spark.read.jdbc( urlWithUserAndPass, "TEST.TIMETYPES", new Properties()).collect() From 8b3fb43f408594ebcb9313b0c0d4c5982ba1ae31 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Jan 2021 08:28:26 +0000 Subject: [PATCH 0941/1009] [SPARK-33965][SQL][TESTS] Recognize `spark_catalog` by `CACHE TABLE` in Hive table names ### What changes were proposed in this pull request? Remove special handling of `CacheTable` in `TestHiveQueryExecution. analyzed` because it does not allow to support of `spark_catalog` in Hive table names. `spark_catalog` could be handled by a few lines below: ```scala case UnresolvedRelation(ident, _, _) => if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) { ``` added by https://github.com/apache/spark/pull/30883. ### Why are the changes needed? 1. To have feature parity with v1 In-Memory catalog. 2. To be able to write unified tests for In-Memory and Hive external catalogs. ### Does this PR introduce _any_ user-facing change? Should not. ### How was this patch tested? By running the test suite with new UT: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite" ``` Closes #30997 from MaxGekk/cache-table-spark_catalog. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../apache/spark/sql/hive/CachedTableSuite.scala | 13 +++++++++++++ .../org/apache/spark/sql/hive/test/TestHive.scala | 9 ++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index ee93af7643b21..7044e6ff78d4a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -439,4 +439,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto assert(spark.catalog.isCached(t)) } } + + test("SPARK-33965: cache table in spark_catalog") { + withNamespace("spark_catalog.ns") { + sql("CREATE NAMESPACE spark_catalog.ns") + val t = "spark_catalog.ns.tbl" + withTable(t) { + sql(s"CREATE TABLE $t (col int)") + assert(!spark.catalog.isCached(t)) + sql(s"CACHE TABLE $t") + assert(spark.catalog.isCached(t)) + } + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index b70afd3e6b98f..cbba9be32b77c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation -import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} @@ -596,13 +596,8 @@ private[hive] class TestHiveQueryExecution( } override lazy val analyzed: LogicalPlan = sparkSession.withActive { - val describedTables = logical match { - case CacheTable(_, tbl, _, _) => tbl.asTableIdentifier :: Nil - case _ => Nil - } - // Make sure any test tables referenced are loaded. - val referencedTables = describedTables ++ logical.collect { + val referencedTables = logical.collect { case UnresolvedRelation(ident, _, _) => if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) { ident.tail.asTableIdentifier From 271c4f6e00b7bc7c47d84a8e59018e84a19c9822 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 4 Jan 2021 00:54:47 -0800 Subject: [PATCH 0942/1009] [SPARK-33978][SQL] Support ZSTD compression in ORC data source ### What changes were proposed in this pull request? This PR aims to support ZSTD compression in ORC data source. ### Why are the changes needed? Apache ORC 1.6 supports ZSTD compression to generate more compact files and save the storage cost. - https://issues.apache.org/jira/browse/ORC-363 **BEFORE** ```scala scala> spark.range(10).write.option("compression", "zstd").orc("/tmp/zstd") java.lang.IllegalArgumentException: Codec [zstd] is not available. Available codecs are uncompressed, lzo, snappy, zlib, none. ``` **AFTER** ```scala scala> spark.range(10).write.option("compression", "zstd").orc("/tmp/zstd") ``` ```bash $ orc-tools meta /tmp/zstd Processing data file file:/tmp/zstd/part-00011-a63d9a17-456f-42d3-87a1-d922112ed28c-c000.orc [length: 230] Structure for file:/tmp/zstd/part-00011-a63d9a17-456f-42d3-87a1-d922112ed28c-c000.orc File Version: 0.12 with ORC_14 Rows: 1 Compression: ZSTD Compression size: 262144 Calendar: Julian/Gregorian Type: struct Stripe Statistics: Stripe 1: Column 0: count: 1 hasNull: false Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 9 max: 9 sum: 9 File Statistics: Column 0: count: 1 hasNull: false Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 9 max: 9 sum: 9 Stripes: Stripe: offset: 3 data: 6 rows: 1 tail: 35 index: 35 Stream: column 0 section ROW_INDEX start: 3 length 11 Stream: column 1 section ROW_INDEX start: 14 length 24 Stream: column 1 section DATA start: 38 length 6 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 File length: 230 bytes Padding length: 0 bytes Padding ratio: 0% User Metadata: org.apache.spark.version=3.2.0 ``` ### Does this PR introduce _any_ user-facing change? Yes, this is a new feature. ### How was this patch tested? Pass the newly added test case. Closes #31002 from dongjoon-hyun/SPARK-33978. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/readwriter.py | 2 +- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- .../scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +- .../sql/execution/datasources/orc/OrcOptions.scala | 3 ++- .../sql/execution/datasources/orc/OrcSourceSuite.scala | 10 +++++++++- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index d120daa5a9434..53122d6c44602 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -1391,7 +1391,7 @@ def orc(self, path, mode=None, partitionBy=None, compression=None): names of partitioning columns compression : str, optional compression codec to use when saving to file. This can be one of the - known case-insensitive shorten names (none, snappy, zlib, and lzo). + known case-insensitive shorten names (none, snappy, zlib, lzo, and zstd). This will override ``orc.compress`` and ``spark.sql.orc.compression.codec``. If None is set, it uses the value specified in ``spark.sql.orc.compression.codec``. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6fcab887dd6af..50cc47d0f80f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -796,11 +796,11 @@ object SQLConf { .doc("Sets the compression codec used when writing ORC files. If either `compression` or " + "`orc.compress` is specified in the table-specific options/properties, the precedence " + "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." + - "Acceptable values include: none, uncompressed, snappy, zlib, lzo.") + "Acceptable values include: none, uncompressed, snappy, zlib, lzo, zstd.") .version("2.3.0") .stringConf .transform(_.toLowerCase(Locale.ROOT)) - .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo")) + .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo", "zstd")) .createWithDefault("snappy") val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index c5f2a3d568e97..1dba17b451bb0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -885,7 +885,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { *

        *
      • `compression` (default is the value specified in `spark.sql.orc.compression.codec`): * compression codec to use when saving to file. This can be one of the known case-insensitive - * shorten names(`none`, `snappy`, `zlib`, and `lzo`). This will override + * shorten names(`none`, `snappy`, `zlib`, `lzo`, and `zstd`). This will override * `orc.compress` and `spark.sql.orc.compression.codec`. If `orc.compress` is given, * it overrides `spark.sql.orc.compression.codec`.
      • *
      diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala index 25f022bcdde89..af92d94d68be9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala @@ -77,7 +77,8 @@ object OrcOptions { "uncompressed" -> "NONE", "snappy" -> "SNAPPY", "zlib" -> "ZLIB", - "lzo" -> "LZO") + "lzo" -> "LZO", + "zstd" -> "ZSTD") def getORCCompressionCodecName(name: String): String = shortOrcCompressionCodecNames(name) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 4c489bdcc649e..c763f4c9428c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -337,7 +337,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll with CommonFileDa } // Test all the valid options of spark.sql.orc.compression.codec - Seq("NONE", "UNCOMPRESSED", "SNAPPY", "ZLIB", "LZO").foreach { c => + Seq("NONE", "UNCOMPRESSED", "SNAPPY", "ZLIB", "LZO", "ZSTD").foreach { c => withSQLConf(SQLConf.ORC_COMPRESSION.key -> c) { val expected = if (c == "UNCOMPRESSED") "NONE" else c assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == expected) @@ -594,4 +594,12 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession { val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc") assert(df.where("str < 'row 001000'").count() === 1000) } + + test("SPARK-33978: Write and read a file with ZSTD compression") { + withTempPath { dir => + val path = dir.getAbsolutePath + spark.range(3).write.option("compression", "zstd").orc(path) + checkAnswer(spark.read.orc(path), Seq(Row(0), Row(1), Row(2))) + } + } } From 8583a4605f74cd439bbf109bf9d551e0ec697910 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 4 Jan 2021 09:43:15 +0000 Subject: [PATCH 0943/1009] [SPARK-33844][SQL] InsertIntoHiveDir command should check col name too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? In hive-1.2.1, hive serde just split `serdeConstants.LIST_COLUMNS` and `serdeConstants.LIST_COLUMN_TYPES` use comma. When we use spark 2.4 with UT ``` test("insert overwrite directory with comma col name") { withTempDir { dir => val path = dir.toURI.getPath val v1 = s""" | INSERT OVERWRITE DIRECTORY '${path}' | STORED AS TEXTFILE | SELECT 1 as a, 'c' as b, if(1 = 1, "true", "false") """.stripMargin sql(v1).explain(true) sql(v1).show() } } ``` failed with as below since column name contains `,` then column names and column types size not equal. ``` 19:56:05.618 ERROR org.apache.spark.sql.execution.datasources.FileFormatWriter: [ angerszhu ] Aborting job dd774f18-93fa-431f-9468-3534c7d8acda. org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.apache.hadoop.hive.serde2.SerDeException: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: columns has 5 elements while columns.types has 3 elements! at org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.extractColumnInfo(LazySerDeParameters.java:145) at org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.(LazySerDeParameters.java:85) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.initialize(LazySimpleSerDe.java:125) at org.apache.spark.sql.hive.execution.HiveOutputWriter.(HiveFileFormat.scala:119) at org.apache.spark.sql.hive.execution.HiveFileFormat$$anon$1.newInstance(HiveFileFormat.scala:103) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:120) at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.(FileFormatDataWriter.scala:108) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:287) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:219) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:218) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$12.apply(Executor.scala:461) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` After hive-2.3 we will set COLUMN_NAME_DELIMITER to special char when col name cntains ',': https://github.com/apache/hive/blob/6f4c35c9e904d226451c465effdc5bfd31d395a0/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java#L1180-L1188 https://github.com/apache/hive/blob/6f4c35c9e904d226451c465effdc5bfd31d395a0/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java#L1044-L1075 And in script transform, we parse column name to avoid this problem https://github.com/apache/spark/blob/554600c2af0dbc8979955807658fafef5dc66c08/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala#L257-L261 So I think in `InsertIntoHiveDirComman`, we should do same thing too. And I have verified this method can make spark-2.4 work well. ### Why are the changes needed? More save use serde ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Closes #30850 from AngersZhuuuu/SPARK-33844. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- .../hive/execution/InsertIntoHiveDirCommand.scala | 9 +++++++-- .../spark/sql/hive/execution/HiveDDLSuite.scala | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala index b66c302a7d7ea..7ef637ed553ad 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.util.SchemaUtils @@ -63,12 +64,16 @@ case class InsertIntoHiveDirCommand( s"when inserting into ${storage.locationUri.get}", sparkSession.sessionState.conf.caseSensitiveAnalysis) - val hiveTable = HiveClientImpl.toHiveTable(CatalogTable( + val table = CatalogTable( identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")), + provider = Some(DDLUtils.HIVE_PROVIDER), tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW, storage = storage, schema = outputColumns.toStructType - )) + ) + DDLUtils.checkDataColNames(table) + + val hiveTable = HiveClientImpl.toHiveTable(table) hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB, storage.serde.getOrElse(classOf[LazySimpleSerDe].getName)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index b8a37a84735e3..50b1dd952c61e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2891,4 +2891,18 @@ class HiveDDLSuite } } } + + test("SPARK-33844: Insert overwrite directory should check schema too") { + withView("v") { + spark.range(1).createTempView("v") + withTempPath { path => + val e = intercept[AnalysisException] { + spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " + + s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v") + }.getMessage + assert(e.contains("Attribute name \"(IF((1 = 1), 1, 0))\" contains" + + " invalid character(s) among \" ,;{}()\\n\\t=\". Please use alias to rename it.")) + } + } + } } From ddc0d5148ac6decde160cca847b5db5d6de1be58 Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 4 Jan 2021 16:14:33 +0000 Subject: [PATCH 0944/1009] [SPARK-33875][SQL] Implement DESCRIBE COLUMN for v2 tables ### What changes were proposed in this pull request? This PR proposes to implement `DESCRIBE COLUMN` for v2 tables. Note that `isExnteded` option is not implemented in this PR. ### Why are the changes needed? Parity with v1 tables. ### Does this PR introduce _any_ user-facing change? Yes, now, `DESCRIBE COLUMN` works for v2 tables. ```scala sql("CREATE TABLE testcat.tbl (id bigint, data string COMMENT 'hello') USING foo") sql("DESCRIBE testcat.tbl data").show ``` ``` +---------+----------+ |info_name|info_value| +---------+----------+ | col_name| data| |data_type| string| | comment| hello| +---------+----------+ ``` Before this PR, the command would fail with: `Describing columns is not supported for v2 tables.` ### How was this patch tested? Added new test. Closes #30881 from imback82/describe_col_v2. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 4 ++ .../sql/catalyst/analysis/Analyzer.scala | 6 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 14 ++--- .../analysis/ResolvePartitionSpec.scala | 9 +-- .../catalyst/analysis/v2ResolutionPlans.scala | 22 +++++++- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../catalyst/plans/logical/v2Commands.scala | 2 +- .../sql/catalyst/parser/DDLParserSuite.scala | 28 +++++++--- .../analysis/ResolveSessionCatalog.scala | 32 +++++++++-- .../spark/sql/execution/command/tables.scala | 7 ++- .../datasources/v2/DataSourceV2Strategy.scala | 29 +++++++--- .../datasources/v2/DescribeColumnExec.scala | 56 +++++++++++++++++++ .../inputs/describe-table-column.sql | 8 ++- .../results/describe-table-column.sql.out | 24 +++++++- .../sql-tests/results/describe.sql.out | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 53 ++++++++++++++++-- .../command/PlanResolutionSuite.scala | 8 +-- 17 files changed, 250 insertions(+), 56 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index f4c91327a9e11..ff4c54df96f31 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -500,4 +500,8 @@ object QueryCompilationErrors { def commandNotSupportNestedColumnError(command: String, quoted: String): Throwable = { new AnalysisException(s"$command does not support nested column: $quoted") } + + def columnDoesNotExistError(colName: String): Throwable = { + new AnalysisException(s"Column $colName does not exist") + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index fdd1cd0146c24..e41d3de642d51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -989,12 +989,12 @@ class Analyzer(override val catalogManager: CatalogManager) case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident), _) => CatalogV2Util.loadTable(catalog, ident) - .map(ResolvedTable(catalog.asTableCatalog, ident, _)) + .map(table => ResolvedTable.create(catalog.asTableCatalog, ident, table)) .getOrElse(u) case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _, _) => CatalogV2Util.loadTable(catalog, ident) - .map(ResolvedTable(catalog.asTableCatalog, ident, _)) + .map(table => ResolvedTable.create(catalog.asTableCatalog, ident, table)) .getOrElse(u) case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _, _) @@ -1166,7 +1166,7 @@ class Analyzer(override val catalogManager: CatalogManager) case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => ResolvedView(ident, isTemp = false) case table => - ResolvedTable(catalog.asTableCatalog, ident, table) + ResolvedTable.create(catalog.asTableCatalog, ident, table) } case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 89076fbb9ce0f..95ea942be4abb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -599,14 +599,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { // no validation needed for set and remove property } - case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) => - checkAlterTablePartition(table, parts) + case AlterTableAddPartition(r: ResolvedTable, parts, _) => + checkAlterTablePartition(r.table, parts) - case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) => - checkAlterTablePartition(table, parts) + case AlterTableDropPartition(r: ResolvedTable, parts, _, _) => + checkAlterTablePartition(r.table, parts) - case AlterTableRenamePartition(ResolvedTable(_, _, table), from, _) => - checkAlterTablePartition(table, Seq(from)) + case AlterTableRenamePartition(r: ResolvedTable, from, _) => + checkAlterTablePartition(r.table, Seq(from)) case showPartitions: ShowPartitions => checkShowPartitions(showPartitions) @@ -1047,7 +1047,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case ShowPartitions(rt: ResolvedTable, _) if !rt.table.isInstanceOf[SupportsPartitionManagement] => failAnalysis(s"SHOW PARTITIONS cannot run for a table which does not support partitioning") - case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement), _) + case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement, _), _) if partTable.partitionSchema().isEmpty => failAnalysis( s"SHOW PARTITIONS is not allowed on a table that is not partitioned: ${partTable.name()}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala index 84be3f294a6ea..0ed5671d2dcc0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala @@ -34,7 +34,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case r @ AlterTableAddPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs, _) => val partitionSchema = table.partitionSchema() r.copy(parts = resolvePartitionSpecs( table.name, @@ -43,7 +43,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs, _, _) => val partitionSchema = table.partitionSchema() r.copy(parts = resolvePartitionSpecs( table.name, @@ -52,7 +52,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))) case r @ AlterTableRenamePartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) => val partitionSchema = table.partitionSchema() val Seq(resolvedFrom, resolvedTo) = resolvePartitionSpecs( table.name, @@ -61,7 +61,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] { requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)) r.copy(from = resolvedFrom, to = resolvedTo) - case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) => + case r @ ShowPartitions( + ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs) => r.copy(pattern = resolvePartitionSpecs( table.name, partSpecs.toSeq, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index de53702d15a69..52e69480dc815 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog} /** @@ -97,9 +98,26 @@ case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String]) /** * A plan containing resolved table. */ -case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: Table) +case class ResolvedTable( + catalog: TableCatalog, + identifier: Identifier, + table: Table, + outputAttributes: Seq[Attribute]) extends LeafNode { - override def output: Seq[Attribute] = Nil + override def output: Seq[Attribute] = { + val qualifier = catalog.name +: identifier.namespace :+ identifier.name + outputAttributes.map(_.withQualifier(qualifier)) + } +} + +object ResolvedTable { + def create( + catalog: TableCatalog, + identifier: Identifier, + table: Table): ResolvedTable = { + val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema) + ResolvedTable(catalog, identifier, table, schema.toAttributes) + } } case class ResolvedPartitionSpec( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a2f59b914a10d..3ea86c6ea2abf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3557,7 +3557,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } else { DescribeColumn( relation, - ctx.describeColName.nameParts.asScala.map(_.getText).toSeq, + UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq), isExtended) } } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index c51291d370c80..5728c1ed47993 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -348,7 +348,7 @@ case class DescribeRelation( */ case class DescribeColumn( relation: LogicalPlan, - colNameParts: Seq[String], + column: Expression, isExtended: Boolean) extends Command { override def children: Seq[LogicalPlan] = Seq(relation) override def output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 4612e72a54510..9ec22a982a588 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1103,26 +1103,40 @@ class DDLParserSuite extends AnalysisTest { test("describe table column") { comparePlans(parsePlan("DESCRIBE t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("col")), + isExtended = false)) comparePlans(parsePlan("DESCRIBE t `abc.xyz`"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc.xyz"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("abc.xyz")), + isExtended = false)) comparePlans(parsePlan("DESCRIBE t abc.xyz"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc", "xyz"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("abc", "xyz")), + isExtended = false)) comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("a.b", "x.y"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("a.b", "x.y")), + isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("col")), + isExtended = false)) comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("col")), + isExtended = true)) comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"), DescribeColumn( - UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true)) + UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), + UnresolvedAttribute(Seq("col")), + isExtended = true)) val caught = intercept[AnalysisException]( parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col")) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 3c5157bea9470..16cd2068fce52 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -20,8 +20,10 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.errors.QueryCompilationErrors @@ -226,8 +228,23 @@ class ResolveSessionCatalog( case DescribeRelation(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) => DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended) - case DescribeColumn(ResolvedV1TableOrViewIdentifier(ident), colNameParts, isExtended) => - DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended) + case DescribeColumn(ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended) => + // For views, the column will not be resolved by `ResolveReferences` because + // `ResolvedView` stores only the identifier. + DescribeColumnCommand(ident.asTableIdentifier, column.nameParts, isExtended) + + case DescribeColumn(ResolvedV1TableIdentifier(ident), column, isExtended) => + column match { + case u: UnresolvedAttribute => + throw QueryCompilationErrors.columnDoesNotExistError(u.name) + case a: Attribute => + DescribeColumnCommand(ident.asTableIdentifier, a.qualifier :+ a.name, isExtended) + case Alias(child, _) => + throw QueryCompilationErrors.commandNotSupportNestedColumnError( + "DESC TABLE COLUMN", toPrettySQL(child)) + case other => + throw new AnalysisException(s"[BUG] unexpected column expression: $other") + } // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the // session catalog and the table provider is not v2. @@ -639,9 +656,16 @@ class ResolveSessionCatalog( } } + object ResolvedViewIdentifier { + def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { + case ResolvedView(ident, _) => Some(ident) + case _ => None + } + } + object ResolvedV1TableIdentifier { def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { - case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident) + case ResolvedTable(catalog, ident, _: V1Table, _) if isSessionCatalog(catalog) => Some(ident) case _ => None } } @@ -649,7 +673,7 @@ class ResolveSessionCatalog( object ResolvedV1TableOrViewIdentifier { def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match { case ResolvedV1TableIdentifier(ident) => Some(ident) - case ResolvedView(ident, _) => Some(ident) + case ResolvedViewIdentifier(ident) => Some(ident) case _ => None } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 91c5a886e1d0a..cb72264b9f004 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -764,13 +765,13 @@ case class DescribeColumnCommand( val colName = UnresolvedAttribute(colNameParts).name val field = { relation.resolve(colNameParts, resolver).getOrElse { - throw new AnalysisException(s"Column $colName does not exist") + throw QueryCompilationErrors.columnDoesNotExistError(colName) } } if (!field.isInstanceOf[Attribute]) { // If the field is not an attribute after `resolve`, then it's a nested field. - throw new AnalysisException( - s"DESC TABLE COLUMN command does not support nested data types: $colName") + throw QueryCompilationErrors.commandNotSupportNestedColumnError( + "DESC TABLE COLUMN", colName) } val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 2674aaf4f2e88..faba204dcb8f4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -21,12 +21,14 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable} -import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression, NamedExpression, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.V1Write +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} @@ -272,8 +274,14 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } DescribeTableExec(desc.output, r.table, isExtended) :: Nil - case DescribeColumn(_: ResolvedTable, _, _) => - throw new AnalysisException("Describing columns is not supported for v2 tables.") + case desc @ DescribeColumn(_: ResolvedTable, column, isExtended) => + column match { + case c: Attribute => + DescribeColumnExec(desc.output, c, isExtended) :: Nil + case nested => + throw QueryCompilationErrors.commandNotSupportNestedColumnError( + "DESC TABLE COLUMN", toPrettySQL(nested)) + } case DropTable(r: ResolvedTable, ifExists, purge) => DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil @@ -284,7 +292,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case AlterTable(catalog, ident, _, changes) => AlterTableExec(catalog, ident, changes) :: Nil - case RenameTable(r @ ResolvedTable(catalog, oldIdent, _), newIdent, isView) => + case RenameTable(r @ ResolvedTable(catalog, oldIdent, _, _), newIdent, isView) => if (isView) { throw new AnalysisException( "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.") @@ -311,7 +319,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat ns, Map(SupportsNamespaces.PROP_COMMENT -> comment)) :: Nil - case CommentOnTable(ResolvedTable(catalog, identifier, _), comment) => + case CommentOnTable(ResolvedTable(catalog, identifier, _, _), comment) => val changes = TableChange.setProperty(TableCatalog.PROP_COMMENT, comment) AlterTableExec(catalog, identifier, Seq(changes)) :: Nil @@ -343,17 +351,20 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.") case AlterTableAddPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfExists) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), parts, ignoreIfExists) => AlterTableAddPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil case AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, purge) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), + parts, + ignoreIfNotExists, + purge) => AlterTableDropPartitionExec( table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil case AlterTableRenamePartition( - ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) => + ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) => AlterTableRenamePartitionExec( table, Seq(from).asResolvedPartitionSpecs.head, @@ -380,7 +391,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.") case r @ ShowPartitions( - ResolvedTable(catalog, _, table: SupportsPartitionManagement), + ResolvedTable(catalog, _, table: SupportsPartitionManagement, _), pattern @ (None | Some(_: ResolvedPartitionSpec))) => ShowPartitionsExec( r.output, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala new file mode 100644 index 0000000000000..c7ce69f744cce --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.types.StructType + +case class DescribeColumnExec( + override val output: Seq[Attribute], + column: Attribute, + isExtended: Boolean) extends V2CommandExec { + private val toRow = { + RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() + } + + override protected def run(): Seq[InternalRow] = { + val rows = new ArrayBuffer[InternalRow]() + + val comment = if (column.metadata.contains("comment")) { + column.metadata.getString("comment") + } else { + "NULL" + } + + rows += toCatalystRow("col_name", column.name) + rows += toCatalystRow("data_type", column.dataType.catalogString) + rows += toCatalystRow("comment", comment) + + // TODO: The extended description (isExtended = true) can be added here. + + rows.toSeq + } + + private def toCatalystRow(strs: String*): InternalRow = { + toRow(new GenericRowWithSchema(strs.toArray, schema)).copy() + } +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql index d55e398329b76..146977c806182 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql @@ -1,5 +1,5 @@ -- Test temp table -CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment') USING PARQUET; +CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct) USING PARQUET; DESC desc_col_temp_view key; @@ -13,6 +13,9 @@ DESC FORMATTED desc_col_temp_view desc_col_temp_view.key; -- Describe a non-existent column DESC desc_col_temp_view key1; +-- Describe a nested column +DESC desc_col_temp_view col.x; + -- Test persistent table CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET; @@ -24,6 +27,9 @@ DESC EXTENDED desc_col_table key; DESC FORMATTED desc_col_table key; +-- Describe a non-existent column +DESC desc_col_table key1; + -- Test complex columns CREATE TABLE desc_complex_col_table (`a.b` int, col struct) USING PARQUET; diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out index 22ef8e13c36a8..cc5b836b74109 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out @@ -1,9 +1,9 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 28 +-- Number of queries: 30 -- !query -CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment') USING PARQUET +CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct) USING PARQUET -- !query schema struct<> -- !query output @@ -80,6 +80,15 @@ org.apache.spark.sql.AnalysisException Column key1 does not exist +-- !query +DESC desc_col_temp_view col.x +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +DESC TABLE COLUMN does not support nested column: col.x + + -- !query CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET -- !query schema @@ -140,6 +149,15 @@ max_col_len 4 histogram NULL +-- !query +DESC desc_col_table key1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Column key1 does not exist + + -- !query CREATE TABLE desc_complex_col_table (`a.b` int, col struct) USING PARQUET -- !query schema @@ -188,7 +206,7 @@ DESC FORMATTED desc_complex_col_table col.x struct<> -- !query output org.apache.spark.sql.AnalysisException -DESC TABLE COLUMN command does not support nested data types: col.x +DESC TABLE COLUMN does not support nested column: col.x -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 93b0cc3fe97e1..3b5d8a1396283 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -561,7 +561,7 @@ struct -- !query output == Physical Plan == Execute DescribeColumnCommand - +- DescribeColumnCommand `default`.`t`, [b], false + +- DescribeColumnCommand `default`.`t`, [spark_catalog, default, t, b], false -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index f821335690aeb..47829b68cc617 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -154,13 +154,54 @@ class DataSourceV2SQLSuite Array("Table Properties", "[bar=baz]", ""))) } - test("Describe column is not supported for v2 catalog") { - withTable("testcat.tbl") { - spark.sql("CREATE TABLE testcat.tbl (id bigint) USING foo") - val ex = intercept[AnalysisException] { - spark.sql("DESCRIBE testcat.tbl id") + test("Describe column for v2 catalog") { + val t = "testcat.tbl" + withTable(t) { + sql(s"CREATE TABLE $t (id bigint, data string COMMENT 'hello') USING foo") + val df1 = sql(s"DESCRIBE $t id") + assert(df1.schema.map(field => (field.name, field.dataType)) + === Seq(("info_name", StringType), ("info_value", StringType))) + assert(df1.collect === Seq( + Row("col_name", "id"), + Row("data_type", "bigint"), + Row("comment", "NULL"))) + val df2 = sql(s"DESCRIBE $t data") + assert(df2.schema.map(field => (field.name, field.dataType)) + === Seq(("info_name", StringType), ("info_value", StringType))) + assert(df2.collect === Seq( + Row("col_name", "data"), + Row("data_type", "string"), + Row("comment", "hello"))) + + assertAnalysisError( + s"DESCRIBE $t invalid_col", + "cannot resolve '`invalid_col`' given input columns: [testcat.tbl.data, testcat.tbl.id]") + } + } + + test("Describe column for v2 catalog should work with qualified columns") { + val t = "testcat.ns.tbl" + withTable(t) { + sql(s"CREATE TABLE $t (id bigint) USING foo") + Seq("testcat.ns.tbl.id", "ns.tbl.id", "tbl.id", "id").foreach { col => + val df = sql(s"DESCRIBE $t $col") + assert(df.schema.map(field => (field.name, field.dataType)) + === Seq(("info_name", StringType), ("info_value", StringType))) + assert(df.collect === Seq( + Row("col_name", "id"), + Row("data_type", "bigint"), + Row("comment", "NULL"))) } - assert(ex.message.contains("Describing columns is not supported for v2 tables")) + } + } + + test("Describing nested column for v2 catalog is not supported") { + val t = "testcat.tbl" + withTable(t) { + sql(s"CREATE TABLE $t (d struct) USING foo") + assertAnalysisError( + s"describe $t d.a", + "DESC TABLE COLUMN does not support nested column") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 6571e27b928bb..ee2af085c0fa6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -680,13 +680,13 @@ class PlanResolutionSuite extends AnalysisTest { val tableIdent2 = Identifier.of(Array.empty, "tab") parseResolveCompare(s"DROP TABLE $tableName1", - DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = false, purge = false)) + DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = false, purge = false)) parseResolveCompare(s"DROP TABLE IF EXISTS $tableName1", - DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = true, purge = false)) + DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = true, purge = false)) parseResolveCompare(s"DROP TABLE $tableName2", - DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = false, purge = false)) + DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = false, purge = false)) parseResolveCompare(s"DROP TABLE IF EXISTS $tableName2", - DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = true, purge = false)) + DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = true, purge = false)) } test("drop view") { From 6b86aa0b524b4d19b91ab434d2088667c9a1e662 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 4 Jan 2021 10:23:38 -0800 Subject: [PATCH 0945/1009] [SPARK-33984][PYTHON] Upgrade to Py4J 0.10.9.1 ### What changes were proposed in this pull request? This PR upgrade Py4J from 0.10.9 to 0.10.9.1 that contains some bug fixes and improvements. It contains one bug fix (https://github.com/bartdag/py4j/commit/4152353ac142a7c6d177e0d8f5d420d92c846a30). ### Why are the changes needed? To leverage fixes from the upstream in Py4J. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Jenkins build and GitHub Actions will test it out. Closes #31009 from HyukjinKwon/SPARK-33984. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../apache/spark/api/python/PythonUtils.scala | 2 +- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- python/docs/Makefile | 2 +- python/docs/make2.bat | 2 +- ...j-0.10.9-src.zip => py4j-0.10.9.1-src.zip} | Bin 41587 -> 41589 bytes python/setup.py | 2 +- sbin/spark-config.sh | 2 +- 11 files changed, 10 insertions(+), 10 deletions(-) rename python/lib/{py4j-0.10.9-src.zip => py4j-0.10.9.1-src.zip} (94%) diff --git a/bin/pyspark b/bin/pyspark index 463a2dcfc7e6c..251bfef5c80a8 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.1-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index dc34be1a41706..5741480fe5501 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.1-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py diff --git a/core/pom.xml b/core/pom.xml index 1f24c5273ad0b..09fa153c8f20b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -414,7 +414,7 @@ net.sf.py4j py4j - 0.10.9 + 0.10.9.1 org.apache.spark diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 2f47d28f09103..717eb4db6dd93 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -27,7 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} private[spark] object PythonUtils { - val PY4J_ZIP_NAME = "py4j-0.10.9-src.zip" + val PY4J_ZIP_NAME = "py4j-0.10.9.1-src.zip" /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */ def sparkPythonPath: String = { diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index fc3b669e721ac..9c516203dd3fa 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -209,7 +209,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9//py4j-0.10.9.jar +py4j/0.10.9.1//py4j-0.10.9.1.jar pyrolite/4.30//pyrolite-4.30.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 0ff30ce0c0a2d..1d80fadb5762a 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -224,7 +224,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9//py4j-0.10.9.jar +py4j/0.10.9.1//py4j-0.10.9.1.jar pyrolite/4.30//pyrolite-4.30.jar re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/python/docs/Makefile b/python/docs/Makefile index 763f493a0eb58..090ad7d62bed1 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -6,7 +6,7 @@ SPHINXBUILD ?= sphinx-build SOURCEDIR ?= source BUILDDIR ?= build -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.1-src.zip) # Put it first so that "make" without argument is like "make help". help: diff --git a/python/docs/make2.bat b/python/docs/make2.bat index 2f87032820f42..485b5eda19735 100644 --- a/python/docs/make2.bat +++ b/python/docs/make2.bat @@ -8,7 +8,7 @@ if "%SPHINXBUILD%" == "" ( set SOURCEDIR=source set BUILDDIR=build -set PYTHONPATH=..;..\lib\py4j-0.10.9-src.zip +set PYTHONPATH=..;..\lib\py4j-0.10.9.1-src.zip if "%1" == "" goto help diff --git a/python/lib/py4j-0.10.9-src.zip b/python/lib/py4j-0.10.9.1-src.zip similarity index 94% rename from python/lib/py4j-0.10.9-src.zip rename to python/lib/py4j-0.10.9.1-src.zip index 2c498361470305cd2f2cdecec81c2aa21a4a6f3d..11eb331b6f2c67326dfbc2e600fb97b30ae6f47b 100644 GIT binary patch delta 879 zcmex-gz4)MCcXe~W)=|!1_lm>4GbEAlMl{Q<+t_;i$DA5Q+#O!Hv=Qfx5;1Uh4UaQ zn#iXKR&x%aW}{oF1Pj=J{SwOP25kN)@skN6`%YR7LsnVlDKkWtM?n`u)7=7EeeOo^0#3)e6w?#oDf7db%qLt$unjgW#t9AS^A%(q9iZ~BqwLhldebe zJtG4HIDF#c%TkMqGxPJ};}vWb)D83u4fHJa4Ar>;ycwC~m=Qs!lE8e!7id~O5DPF! z07(W0pdk!f8bPdrN|P-8`1s7c%#!$cy@JZn5LO0ekd@HDW}N(SzOW+5Rlhshdy|3M zZ8I4dL{Q9`&cHBHR-6x2o-+Y`!^yzFhoa9za`J`wqTEQffiyXT zGzI@!+;W+jf#H$>1A`!nrg9mmqti0;5_2-EQj4&-8e|S6pyL-vuxx3ZsxY~FfjqiR z5EXkDNU&~c+@;4b*+CDRC!D~ZU|V*ZM~H#pswH|L&S-fr&7Jl?|kw69|QY KVd^sD#b#-iAy#4E42G!JjMLtN-MY-7+Jn40+O2? zCaH`rxw%8~Clf@rRYna%c9+akW)?qb%Yey?6?FNZsOrQa8NB(v!c$&|>`cL>Pb!AYcUIC5;S|&(4=o28AFr z^nk+P;0^FbHDWT)0tuTXjT?cQ3o1>r^po>*3ldB83MxZGSQ(g!F(6@q1j~}f2hx*k z7D&_+&;W`8#wSy+U*=_CD0F84IvLr~|Ky>L$Vx0rj8D$b$w^Hv$;{6yhB*fmad`aW zCoO9kaG`-^=Y((u2DM5?v@lVgY_L#9nSdGK1eU*0B63M1V;j^t1x5KK`9SBuoI!*q z&;6R!wwaZIfm4uyfgi<_;gcUM6tBnW3NCOYqNEw12&xgm^PmnZOD!q}h7ZhLpmc>N zctBoLNnpO=%fK)>dA^{_lE%$2gW}^e^D;}~<6$OY^&u-8$ZecJD9p^j;58q_0{{S$ B<;DO2 diff --git a/python/setup.py b/python/setup.py index f5836ecf5fbfc..7bb8a00171d37 100755 --- a/python/setup.py +++ b/python/setup.py @@ -250,7 +250,7 @@ def run(self): license='http://www.apache.org/licenses/LICENSE-2.0', # Don't forget to update python/docs/source/getting_started/install.rst # if you're updating the versions or dependencies. - install_requires=['py4j==0.10.9'], + install_requires=['py4j==0.10.9.1'], extras_require={ 'ml': ['numpy>=1.7'], 'mllib': ['numpy>=1.7'], diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index b53442ec096a1..7389416bb3192 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" - export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.1-src.zip:${PYTHONPATH}" export PYSPARK_PYTHONPATH_SET=1 fi From fc3f22645e5c542e80a086d96da384feb6afe121 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Jan 2021 10:26:39 -0800 Subject: [PATCH 0946/1009] [SPARK-33990][SQL][TESTS] Remove partition data by v2 `ALTER TABLE .. DROP PARTITION` ### What changes were proposed in this pull request? Remove partition data by `ALTER TABLE .. DROP PARTITION` in V2 table catalog used in tests. ### Why are the changes needed? This is a bug fix. Before the fix, `ALTER TABLE .. DROP PARTITION` does not remove the data belongs to the dropped partition. As a consequence of that, the `select` query returns removed data. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running tests suites for v1 and v2 catalogs: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #31014 from MaxGekk/fix-drop-partition-v2. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../connector/InMemoryAtomicPartitionTable.scala | 1 + .../sql/connector/InMemoryPartitionTable.scala | 1 + .../apache/spark/sql/connector/InMemoryTable.scala | 4 ++++ .../command/AlterTableDropPartitionSuiteBase.scala | 13 ++++++++++++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala index c2a95cc3b8b07..f313c6c389ee4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala @@ -49,6 +49,7 @@ class InMemoryAtomicPartitionTable ( override def dropPartition(ident: InternalRow): Boolean = { if (memoryTablePartitions.containsKey(ident)) { memoryTablePartitions.remove(ident) + removePartitionKey(ident.toSeq(schema)) true } else { false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala index a3d610af2c06d..9e3555b9bb515 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala @@ -61,6 +61,7 @@ class InMemoryPartitionTable( def dropPartition(ident: InternalRow): Boolean = { if (memoryTablePartitions.containsKey(ident)) { memoryTablePartitions.remove(ident) + removePartitionKey(ident.toSeq(schema)) true } else { false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index 201d67a815bea..a1253dfe67e7a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -187,6 +187,10 @@ class InMemoryTable( true } + protected def removePartitionKey(key: Seq[Any]): Unit = dataMap.synchronized { + dataMap.remove(key) + } + def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized { data.foreach(_.rows.foreach { row => val key = getKey(row) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala index cf8a1e9de5e0e..d8a8920deadc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException import org.apache.spark.sql.internal.SQLConf @@ -144,4 +144,15 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil checkPartitions(t) } } + + test("SPARK-33990: don not return data from dropped partition") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)") + sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0") + sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1") + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1))) + sql(s"ALTER TABLE $t DROP PARTITION (part=0)") + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1))) + } + } } From 414d323d6c92584beb87e1c426e4beab5ddbd452 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 4 Jan 2021 10:31:20 -0800 Subject: [PATCH 0947/1009] [SPARK-33988][SQL][TEST] Add an option to enable CBO in TPCDSQueryBenchmark ### What changes were proposed in this pull request? This PR intends to add a new option `--cbo` to enable CBO in TPCDSQueryBenchmark. I think this option is useful so as to monitor performance changes with CBO enabled. ### Why are the changes needed? To monitor performance chaneges with CBO enabled. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked. Closes #31011 from maropu/AddOptionForCBOInTPCDSBenchmark. Authored-by: Takeshi Yamamuro Signed-off-by: Dongjoon Hyun --- .../benchmark/TPCDSQueryBenchmark.scala | 39 +++++++++++++++++-- .../TPCDSQueryBenchmarkArguments.scala | 6 +++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index f931914b19c6c..b34eac5df8090 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -19,11 +19,15 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.SparkConf import org.apache.spark.benchmark.Benchmark +import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.Utils /** * Benchmark to measure TPCDS query performance. @@ -38,7 +42,10 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation * Results will be written to "benchmarks/TPCDSQueryBenchmark-results.txt". * }}} */ -object TPCDSQueryBenchmark extends SqlBasedBenchmark { +object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging { + + private lazy val warehousePath = + Utils.createTempDir(namePrefix = "spark-warehouse").getAbsolutePath override def getSparkSession: SparkSession = { val conf = new SparkConf() @@ -50,6 +57,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { .set("spark.executor.memory", "3g") .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString) .set("spark.sql.crossJoin.enabled", "true") + .set("spark.sql.warehouse.dir", warehousePath) SparkSession.builder.config(conf).getOrCreate() } @@ -60,9 +68,14 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band", "time_dim", "web_page") - def setupTables(dataLocation: String): Map[String, Long] = { + def setupTables(dataLocation: String, createTempView: Boolean): Map[String, Long] = { tables.map { tableName => - spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName) + val df = spark.read.parquet(s"$dataLocation/$tableName") + if (createTempView) { + df.createOrReplaceTempView(tableName) + } else { + df.write.saveAsTable(tableName) + } tableName -> spark.table(tableName).count() }.toMap } @@ -146,7 +159,25 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}") } - val tableSizes = setupTables(benchmarkArgs.dataLocation) + val tableSizes = setupTables(benchmarkArgs.dataLocation, + createTempView = !benchmarkArgs.cboEnabled) + if (benchmarkArgs.cboEnabled) { + spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=true") + spark.sql(s"SET ${SQLConf.PLAN_STATS_ENABLED.key}=true") + spark.sql(s"SET ${SQLConf.JOIN_REORDER_ENABLED.key}=true") + spark.sql(s"SET ${SQLConf.HISTOGRAM_ENABLED.key}=true") + + // Analyze all the tables before running TPCDS queries + val startTime = System.nanoTime() + tables.foreach { tableName => + spark.sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR ALL COLUMNS") + } + logInfo("The elapsed time to analyze all the tables is " + + s"${(System.nanoTime() - startTime) / NANOS_PER_SECOND.toDouble} seconds") + } else { + spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=false") + } + runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes) runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes, nameSuffix = nameSuffixForQueriesV2_7) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala index 184ffff94298a..80a6bffc61ea4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala @@ -23,6 +23,7 @@ import java.util.Locale class TPCDSQueryBenchmarkArguments(val args: Array[String]) { var dataLocation: String = null var queryFilter: Set[String] = Set.empty + var cboEnabled: Boolean = false parseArgs(args.toList) validateArguments() @@ -44,6 +45,10 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) { queryFilter = value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet args = tail + case optName :: tail if optionMatch("--cbo", optName) => + cboEnabled = true + args = tail + case _ => // scalastyle:off println System.err.println("Unknown/unsupported param " + args) @@ -60,6 +65,7 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) { |Options: | --data-location Path to TPCDS data | --query-filter Queries to filter, e.g., q3,q5,q13 + | --cbo Whether to enable cost-based optimization | |------------------------------------------------------------------------------------------------------------------ |In order to run this benchmark, please follow the instructions at From d6322bf70c622f4068e510975e9f53c8e18bf59c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 4 Jan 2021 10:36:31 -0800 Subject: [PATCH 0948/1009] [SPARK-33983][PYTHON] Update cloudpickle to v1.6.0 ### What changes were proposed in this pull request? This PR proposes to upgrade cloudpickle from 1.5.0 to 1.6.0. It virtually contains one fix: https://github.com/cloudpipe/cloudpickle/commit/4510be850d55bc60decf86953324f98bc3199f9e From a cursory look, this isn't a regression, and not even properly supported in Python: ```python >>> import pickle >>> pickle.dumps({}.keys()) Traceback (most recent call last): File "", line 1, in TypeError: cannot pickle 'dict_keys' object ``` So it seems fine not to backport. ### Why are the changes needed? To leverage bug fixes from the cloudpickle upstream. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Jenkins build and GitHub actions build will test it out. Closes #31007 from HyukjinKwon/cloudpickle-upgrade. Authored-by: HyukjinKwon Signed-off-by: Dongjoon Hyun --- python/pyspark/cloudpickle/__init__.py | 6 ++- python/pyspark/cloudpickle/cloudpickle.py | 22 ++++++++--- .../pyspark/cloudpickle/cloudpickle_fast.py | 37 +++++++++++++++---- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/python/pyspark/cloudpickle/__init__.py b/python/pyspark/cloudpickle/__init__.py index 4e85b637800dc..56506d95fa1be 100644 --- a/python/pyspark/cloudpickle/__init__.py +++ b/python/pyspark/cloudpickle/__init__.py @@ -4,4 +4,8 @@ from pyspark.cloudpickle.cloudpickle import * # noqa from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump # noqa -__version__ = '1.5.0' +# Conform to the convention used by python serialization libraries, which +# expose their Pickler subclass at top-level under the "Pickler" name. +Pickler = CloudPickler + +__version__ = '1.6.0' diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py index 58c274bd79720..05d52afa0da96 100644 --- a/python/pyspark/cloudpickle/cloudpickle.py +++ b/python/pyspark/cloudpickle/cloudpickle.py @@ -88,7 +88,7 @@ def g(): DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL # Track the provenance of reconstructed dynamic classes to make it possible to -# reconstruct instances from the matching singleton class definition when +# recontruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() @@ -236,7 +236,7 @@ def _extract_code_globals(co): out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} # Declaring a function inside another one using the "def ..." - # syntax generates a constant code object corresponding to the one + # syntax generates a constant code object corresonding to the one # of the nested function's As the nested function may itself need # global variables, we need to introspect its code, extract its # globals, (look for code object in it's co_consts attribute..) and @@ -457,7 +457,7 @@ def _is_parametrized_type_hint(obj): is_typing = getattr(obj, '__origin__', None) is not None # typing_extensions.Literal - is_literal = getattr(obj, '__values__', None) is not None + is_litteral = getattr(obj, '__values__', None) is not None # typing_extensions.Final is_final = getattr(obj, '__type__', None) is not None @@ -469,7 +469,7 @@ def _is_parametrized_type_hint(obj): getattr(obj, '__result__', None) is not None and getattr(obj, '__args__', None) is not None ) - return any((is_typing, is_literal, is_final, is_union, is_tuple, + return any((is_typing, is_litteral, is_final, is_union, is_tuple, is_callable)) def _create_parametrized_type_hint(origin, args): @@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None): """ # This function is deprecated and should be removed in cloudpickle 1.7 warnings.warn( - "A pickle file created using an old (<=1.4.1) version of cloudpickle " + "A pickle file created using an old (<=1.4.1) version of cloudpicke " "is currently being loaded. This is not supported by cloudpickle and " "will break in cloudpickle 1.7", category=UserWarning ) @@ -828,3 +828,15 @@ def _get_bases(typ): # For regular class objects bases_attr = '__bases__' return getattr(typ, bases_attr) + + +def _make_dict_keys(obj): + return dict.fromkeys(obj).keys() + + +def _make_dict_values(obj): + return {i: _ for i, _ in enumerate(obj)}.values() + + +def _make_dict_items(obj): + return obj.items() diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py index 3c48ff7b0a885..fa8da0f635c49 100644 --- a/python/pyspark/cloudpickle/cloudpickle_fast.py +++ b/python/pyspark/cloudpickle/cloudpickle_fast.py @@ -6,10 +6,11 @@ is only available for Python versions 3.8+, a lot of backward-compatibility code is also removed. -Note that the C Pickler subclassing API is CPython-specific. Therefore, some +Note that the C Pickler sublassing API is CPython-specific. Therefore, some guards present in cloudpickle.py that were written to handle PyPy specificities are not present in cloudpickle_fast.py """ +import _collections_abc import abc import copyreg import io @@ -33,8 +34,8 @@ _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, _is_parametrized_type_hint, PYPY, cell_set, parametrized_type_hint_getinitargs, _create_parametrized_type_hint, - builtin_code_type - + builtin_code_type, + _make_dict_keys, _make_dict_values, _make_dict_items, ) @@ -179,7 +180,7 @@ def _class_getstate(obj): clsdict.pop('__weakref__', None) if issubclass(type(obj), abc.ABCMeta): - # If obj is an instance of an ABCMeta subclass, don't pickle the + # If obj is an instance of an ABCMeta subclass, dont pickle the # cache/negative caches populated during isinstance/issubclass # checks, but pickle the list of registered subclasses of obj. clsdict.pop('_abc_cache', None) @@ -400,6 +401,24 @@ def _class_reduce(obj): return NotImplemented +def _dict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), ) + + +def _dict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), ) + + +def _dict_items_reduce(obj): + return _make_dict_items, (dict(obj), ) + + # COLLECTIONS OF OBJECTS STATE SETTERS # ------------------------------------ # state setters are called at unpickling time, once the object is created and @@ -407,7 +426,7 @@ def _class_reduce(obj): def _function_setstate(obj, state): - """Update the state of a dynamic function. + """Update the state of a dynaamic function. As __closure__ and __globals__ are readonly attributes of a function, we cannot rely on the native setstate routine of pickle.load_build, that calls @@ -473,6 +492,10 @@ class CloudPickler(Pickler): _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce _dispatch_table[weakref.WeakSet] = _weakset_reduce _dispatch_table[typing.TypeVar] = _typevar_reduce + _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce + _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce + _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce + dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) @@ -556,7 +579,7 @@ def dump(self, obj): # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler # used `CloudPickler.dispatch` as a class-level attribute storing all # reducers implemented by cloudpickle, but the attribute name was not a - # great choice given the meaning of `CloudPickler.dispatch` when + # great choice given the meaning of `Cloudpickler.dispatch` when # `CloudPickler` extends the pure-python pickler. dispatch = dispatch_table @@ -630,7 +653,7 @@ def reducer_override(self, obj): return self._function_reduce(obj) else: # fallback to save_global, including the Pickler's - # dispatch_table + # distpatch_table return NotImplemented else: From ac4651a7d19b248c86290d419ac3f6d69ed2b61e Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 4 Jan 2021 12:59:45 -0800 Subject: [PATCH 0949/1009] [SPARK-33980][SS] Invalidate char/varchar in spark.readStream.schema ### What changes were proposed in this pull request? invalidate char/varchar in `spark.readStream.schema` just like what we've done for `spark.read.schema` in da72b87374a7be5416b99ed016dc2fc9da0ed88a ### Why are the changes needed? bugfix, char/varchar is only for table schema while `spark.sql.legacy.charVarcharAsString=false` ### Does this PR introduce _any_ user-facing change? yes, char/varchar will fail to define ss readers when `spark.sql.legacy.charVarcharAsString=false` ### How was this patch tested? new tests Closes #31003 from yaooqinn/SPARK-33980. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun --- .../spark/sql/streaming/DataStreamReader.scala | 7 +++++-- .../apache/spark/sql/CharVarcharTestSuite.scala | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index eb7bb5c87a990..d82fa9e88592f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -64,7 +64,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * @since 2.0.0 */ def schema(schema: StructType): DataStreamReader = { - this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)) + val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType] + this.userSpecifiedSchema = Option(replaced) this } @@ -76,7 +77,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * @since 2.3.0 */ def schema(schemaString: String): DataStreamReader = { - this.userSpecifiedSchema = Option(StructType.fromDDL(schemaString)) + val rawSchema = StructType.fromDDL(schemaString) + val schema = CharVarcharUtils.failIfHasCharVarchar(rawSchema).asInstanceOf[StructType] + this.userSpecifiedSchema = Option(schema) this } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 9d4b7c4f82ed2..62d0f51e5ff75 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -549,6 +549,21 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession { assert(df2.schema.head.dataType === StringType) } } + + test("invalidate char/varchar in spark.readStream.schema") { + failWithInvalidCharUsage(spark.readStream.schema(new StructType().add("id", CharType(5)))) + failWithInvalidCharUsage(spark.readStream.schema("id char(5)")) + withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) { + withTempPath { dir => + spark.range(2).write.save(dir.toString) + val df1 = spark.readStream.schema(new StructType().add("id", CharType(5))) + .load(dir.toString) + assert(df1.schema.map(_.dataType) == Seq(StringType)) + val df2 = spark.readStream.schema("id char(5)").load(dir.toString) + assert(df2.schema.map(_.dataType) == Seq(StringType)) + } + } + } } class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSparkSession { From 90f4ecf8cc07505c7cdea90b07fc60151c62ee2d Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 4 Jan 2021 14:54:16 -0800 Subject: [PATCH 0950/1009] [SPARK-33996][BUILD] Upgrade checkstyle plugins ### What changes were proposed in this pull request? This PR aims to upgrade `checkstyle` Maven plugins and its dependency, `com.puppycrawl.tools:checkstyle`. ### Why are the changes needed? The changes are needed to support Java 14+ better. - https://checkstyle.org/releasenotes.html#Release_8.39 - https://checkstyle.org/releasenotes.html#Release_8.38 - https://checkstyle.org/releasenotes.html#Release_8.37 - https://checkstyle.org/releasenotes.html#Release_8.36 - https://checkstyle.org/releasenotes.html#Release_8.35 - https://checkstyle.org/releasenotes.html#Release_8.34 - https://checkstyle.org/releasenotes.html#Release_8.33 - https://checkstyle.org/releasenotes.html#Release_8.32 - https://checkstyle.org/releasenotes.html#Release_8.31 - https://checkstyle.org/releasenotes.html#Release_8.30 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. Closes #31019 from williamhyun/checkstyle. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 5ff84cf806649..91ca0398a076e 100644 --- a/pom.xml +++ b/pom.xml @@ -2988,7 +2988,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 + 3.1.1 false true @@ -3008,7 +3008,7 @@ com.puppycrawl.tools checkstyle - 8.29 + 8.39 From 84c1f436690c76bfd3bd1a664dba303cfc8381da Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 4 Jan 2021 15:00:48 -0800 Subject: [PATCH 0951/1009] [SPARK-33987][SQL] Refresh cache in v2 `ALTER TABLE .. DROP PARTITION` ### What changes were proposed in this pull request? 1. Refresh the cache associated with tables from v2 table catalogs in the `ALTER TABLE .. DROP PARTITION` command. 2. Port the test for v1 catalogs to the base suite to run it for v2 table catalog. ### Why are the changes needed? The changes fix incorrect query results from cached V2 table altered by `ALTER TABLE .. DROP PARTITION`, see the added test and SPARK-33987. ### Does this PR introduce _any_ user-facing change? Yes, it could if users have v2 table catalogs. ### How was this patch tested? By running unified tests for `ALTER TABLE .. DROP PARTITION`: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite" ``` Closes #31017 from MaxGekk/drop-partition-refresh-cache-v2. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../v2/AlterTableDropPartitionExec.scala | 8 +++++--- .../datasources/v2/DataSourceV2Strategy.scala | 8 ++++++-- .../AlterTableDropPartitionSuiteBase.scala | 15 +++++++++++++++ .../v1/AlterTableDropPartitionSuite.scala | 17 +---------------- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala index 90714c3c726f3..f3137abbd1ba6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala @@ -29,7 +29,8 @@ case class AlterTableDropPartitionExec( table: SupportsPartitionManagement, partSpecs: Seq[ResolvedPartitionSpec], ignoreIfNotExists: Boolean, - purge: Boolean) extends V2CommandExec { + purge: Boolean, + refreshCache: () => Unit) extends V2CommandExec { import DataSourceV2Implicits._ override def output: Seq[Attribute] = Seq.empty @@ -43,8 +44,8 @@ case class AlterTableDropPartitionExec( table.name(), notExistsPartIdents, table.partitionSchema()) } - existsPartIdents match { - case Seq() => // Nothing will be done + val isTableAltered = existsPartIdents match { + case Seq() => false // Nothing will be done case Seq(partIdent) => if (purge) table.purgePartition(partIdent) else table.dropPartition(partIdent) case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] => @@ -55,6 +56,7 @@ case class AlterTableDropPartitionExec( throw new UnsupportedOperationException( s"Nonatomic partition table ${table.name()} can not drop multiple partitions.") } + if (isTableAltered) refreshCache() Seq.empty } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index faba204dcb8f4..1537ebf8f305c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -356,12 +356,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil case AlterTableDropPartition( - ResolvedTable(_, _, table: SupportsPartitionManagement, _), + r @ ResolvedTable(_, _, table: SupportsPartitionManagement, _), parts, ignoreIfNotExists, purge) => AlterTableDropPartitionExec( - table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil + table, + parts.asResolvedPartitionSpecs, + ignoreIfNotExists, + purge, + invalidateCache(r, recacheTable = true)) :: Nil case AlterTableRenamePartition( ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala index d8a8920deadc7..aadcda490b82b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -155,4 +155,19 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1))) } } + + test("SPARK-33950, SPARK-33987: refresh cache after partition dropping") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)") + sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0") + sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1") + assert(!spark.catalog.isCached(t)) + sql(s"CACHE TABLE $t") + assert(spark.catalog.isCached(t)) + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1))) + sql(s"ALTER TABLE $t DROP PARTITION (part=0)") + assert(spark.catalog.isCached(t)) + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1))) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index 2f2c62427d5ad..a6490ebdb950c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.execution.command /** @@ -42,21 +42,6 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu checkPartitions(t) // no partitions } } - - test("SPARK-33950: refresh cache after partition dropping") { - withTable("t") { - sql(s"CREATE TABLE t (id int, part int) $defaultUsing PARTITIONED BY (part)") - sql("INSERT INTO t PARTITION (part=0) SELECT 0") - sql("INSERT INTO t PARTITION (part=1) SELECT 1") - assert(!spark.catalog.isCached("t")) - sql("CACHE TABLE t") - assert(spark.catalog.isCached("t")) - checkAnswer(sql("SELECT * FROM t"), Seq(Row(0, 0), Row(1, 1))) - sql("ALTER TABLE t DROP PARTITION (part=0)") - assert(spark.catalog.isCached("t")) - checkAnswer(sql("SELECT * FROM t"), Seq(Row(1, 1))) - } - } } /** From 9b4173fa95047fed94e2fe323ad281fb48deffda Mon Sep 17 00:00:00 2001 From: Koert Kuipers Date: Mon, 4 Jan 2021 15:40:32 -0800 Subject: [PATCH 0952/1009] [SPARK-33894][SQL] Change visibility of private case classes in mllib to avoid runtime compilation errors with Scala 2.13 ### What changes were proposed in this pull request? Change visibility modifier of two case classes defined inside objects in mllib from private to private[OuterClass] ### Why are the changes needed? Without this change when running tests for Scala 2.13 you get runtime code generation errors. These errors look like this: ``` [info] Cause: java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 73, Column 65: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 73, Column 65: No applicable constructor/method found for zero actual parameters; candidates are: "public java.lang.String org.apache.spark.ml.feature.Word2VecModel$Data.word()" ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests now pass for Scala 2.13 Closes #31018 from koertkuipers/feat-visibility-scala213. Authored-by: Koert Kuipers Signed-off-by: Dongjoon Hyun --- .../src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +- .../scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 0b9c1b570d943..9afbc9b884168 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -344,7 +344,7 @@ class Word2VecModel private[ml] ( @Since("1.6.0") object Word2VecModel extends MLReadable[Word2VecModel] { - private case class Data(word: String, vector: Array[Float]) + private[Word2VecModel] case class Data(word: String, vector: Array[Float]) private[Word2VecModel] class Word2VecModelWriter(instance: Word2VecModel) extends MLWriter { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 04a3b6dd413b4..a24493bb7a8f9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -145,9 +145,9 @@ object KMeansModel extends Loader[KMeansModel] { } } - private case class Cluster(id: Int, point: Vector) + private[KMeansModel] case class Cluster(id: Int, point: Vector) - private object Cluster { + private[KMeansModel] object Cluster { def apply(r: Row): Cluster = { Cluster(r.getInt(0), r.getAs[Vector](1)) } From 559f411da83856a81ac39cf79df8487cc5a06134 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Mon, 4 Jan 2021 15:44:42 -0800 Subject: [PATCH 0953/1009] [SPARK-33908][CORE][FOLLOWUP] Correct Scaladoc of resolveDependencyPaths/resolveMavenDependencies ### What changes were proposed in this pull request? Fix un-correct doc of last change https://github.com/apache/spark/pull/30922#discussion_r551453193 ### Why are the changes needed? FIx doc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Builds finished correctly. Closes #31016 from AngersZhuuuu/SPARK-33908-FOLLOW-UP. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 6 +++--- .../main/scala/org/apache/spark/util/DependencyUtils.scala | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0cf309f148156..8bf7795b7bfe4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1194,11 +1194,11 @@ private[spark] object SparkSubmitUtils { } /** - * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath + * Output a list of paths for the downloaded jars to be added to the classpath * (will append to jars in SparkSubmit). * @param artifacts Sequence of dependencies that were resolved and retrieved - * @param cacheDirectory directory where jars are cached - * @return a comma-delimited list of paths for the dependencies + * @param cacheDirectory Directory where jars are cached + * @return List of paths for the dependencies */ def resolveDependencyPaths( artifacts: Array[AnyRef], diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala index 789811fa5f3a4..60e866a556796 100644 --- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala @@ -129,7 +129,7 @@ private[spark] object DependencyUtils extends Logging { * 2. exclude: exclusion list when download Ivy URI jar and dependency jars. * The `exclude` parameter content is a ',' separated `group:module` pair string : * `exclude=group:module,group:module...` - * @return Comma separated string list of jars downloaded. + * @return List of jars downloaded. */ def resolveMavenDependencies(uri: URI): Seq[String] = { val ivyProperties = DependencyUtils.getIvyProperties() From bb6d6b560287ac83e79012fe8dcbe5dcd2e7a904 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Tue, 5 Jan 2021 11:01:31 +0900 Subject: [PATCH 0954/1009] [SPARK-33964][SQL] Combine distinct unions in more cases ### What changes were proposed in this pull request? Added the `RemoveNoopOperators` rule to optimization batch `Union`. Also made sure that the `RemoveNoopOperators` would be idempotent. ### Why are the changes needed? In several TPCDS queries the `CombineUnions` rule does not manage to combine unions, because they have noop `Project`s between them. The `Project`s will be removed by `RemoveNoopOperators`, but by then `ReplaceDistinctWithAggregate` has been applied and there are aggregates between the unions. Adding a copy of `RemoveNoopOperators` earlier in the optimization chain allows `CombineUnions` to work on more queries. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New UTs and the output of `PlanStabilitySuite` Closes #30996 from tanelk/SPARK-33964_combine_unions. Authored-by: tanel.kiis@gmail.com Signed-off-by: HyukjinKwon --- .../sql/catalyst/optimizer/Optimizer.scala | 3 +- .../optimizer/RemoveNoopOperatorsSuite.scala | 57 + .../approved-plans-v1_4/q75.sf100/explain.txt | 732 ++++++------ .../q75.sf100/simplified.txt | 314 +++-- .../approved-plans-v1_4/q75/explain.txt | 606 +++++----- .../approved-plans-v1_4/q75/simplified.txt | 236 ++-- .../q14a.sf100/explain.txt | 1042 ++++++++--------- .../q14a.sf100/simplified.txt | 736 ++++++------ .../approved-plans-v2_7/q14a/explain.txt | 994 ++++++++-------- .../approved-plans-v2_7/q14a/simplified.txt | 646 +++++----- .../q36a.sf100/explain.txt | 192 ++- .../q36a.sf100/simplified.txt | 114 +- .../approved-plans-v2_7/q36a/explain.txt | 192 ++- .../approved-plans-v2_7/q36a/simplified.txt | 112 +- .../approved-plans-v2_7/q5a.sf100/explain.txt | 272 ++--- .../q5a.sf100/simplified.txt | 274 +++-- .../approved-plans-v2_7/q5a/explain.txt | 262 ++--- .../approved-plans-v2_7/q5a/simplified.txt | 258 ++-- .../q70a.sf100/explain.txt | 226 ++-- .../q70a.sf100/simplified.txt | 160 ++- .../approved-plans-v2_7/q70a/explain.txt | 226 ++-- .../approved-plans-v2_7/q70a/simplified.txt | 160 ++- .../approved-plans-v2_7/q75.sf100/explain.txt | 732 ++++++------ .../q75.sf100/simplified.txt | 314 +++-- .../approved-plans-v2_7/q75/explain.txt | 606 +++++----- .../approved-plans-v2_7/q75/simplified.txt | 236 ++-- .../q77a.sf100/explain.txt | 294 +++-- .../q77a.sf100/simplified.txt | 290 +++-- .../approved-plans-v2_7/q77a/explain.txt | 294 +++-- .../approved-plans-v2_7/q77a/simplified.txt | 290 +++-- .../q80a.sf100/explain.txt | 332 +++--- .../q80a.sf100/simplified.txt | 356 +++--- .../approved-plans-v2_7/q80a/explain.txt | 302 +++-- .../approved-plans-v2_7/q80a/simplified.txt | 306 +++-- .../q86a.sf100/explain.txt | 178 ++- .../q86a.sf100/simplified.txt | 94 +- .../approved-plans-v2_7/q86a/explain.txt | 178 ++- .../approved-plans-v2_7/q86a/simplified.txt | 94 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 18 + 39 files changed, 5986 insertions(+), 6742 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 47260cfb59bb1..f61fad7c3ef54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -156,6 +156,7 @@ abstract class Optimizer(catalogManager: CatalogManager) // - Call CombineUnions again in Batch("Operator Optimizations"), // since the other rules might make two separate Unions operators adjacent. Batch("Union", Once, + RemoveNoopOperators, CombineUnions) :: Batch("OptimizeLimitZero", Once, OptimizeLimitZero) :: @@ -490,7 +491,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { * Remove no-op operators from the query plan that do not make any modifications. */ object RemoveNoopOperators extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // Eliminate no-op Projects case p @ Project(_, child) if child.sameOutput(p) => child diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala new file mode 100644 index 0000000000000..cedd21d2bf522 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.RuleExecutor + +class RemoveNoopOperatorsSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("RemoveNoopOperators", Once, + RemoveNoopOperators) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int, 'c.int) + + test("Remove all redundant projections in one iteration") { + val originalQuery = testRelation + .select('a, 'b, 'c) + .select('a, 'b, 'c) + .analyze + + val optimized = Optimize.execute(originalQuery.analyze) + + comparePlans(optimized, testRelation) + } + + test("Remove all redundant windows in one iteration") { + val originalQuery = testRelation + .window(Nil, Nil, Nil) + .window(Nil, Nil, Nil) + .analyze + + val optimized = Optimize.execute(originalQuery.analyze) + + comparePlans(optimized, testRelation) + } +} diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt index 39748bdd2772b..1d546a445b202 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt @@ -1,142 +1,134 @@ == Physical Plan == -TakeOrderedAndProject (138) -+- * Project (137) - +- * SortMergeJoin Inner (136) - :- * Sort (74) - : +- Exchange (73) - : +- * HashAggregate (72) - : +- Exchange (71) - : +- * HashAggregate (70) - : +- * HashAggregate (69) - : +- Exchange (68) - : +- * HashAggregate (67) - : +- Union (66) - : :- * HashAggregate (47) - : : +- Exchange (46) - : : +- * HashAggregate (45) - : : +- Union (44) - : : :- * Project (25) - : : : +- SortMergeJoin LeftOuter (24) - : : : :- * Sort (18) - : : : : +- Exchange (17) - : : : : +- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.item (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.date_dim (11) - : : : +- * Sort (23) - : : : +- Exchange (22) - : : : +- * Filter (21) - : : : +- * ColumnarToRow (20) - : : : +- Scan parquet default.catalog_returns (19) - : : +- * Project (43) - : : +- SortMergeJoin LeftOuter (42) - : : :- * Sort (36) - : : : +- Exchange (35) - : : : +- * Project (34) - : : : +- * BroadcastHashJoin Inner BuildRight (33) - : : : :- * Project (31) - : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : :- * Filter (28) - : : : : : +- * ColumnarToRow (27) - : : : : : +- Scan parquet default.store_sales (26) - : : : : +- ReusedExchange (29) - : : : +- ReusedExchange (32) - : : +- * Sort (41) - : : +- Exchange (40) - : : +- * Filter (39) - : : +- * ColumnarToRow (38) - : : +- Scan parquet default.store_returns (37) - : +- * Project (65) - : +- SortMergeJoin LeftOuter (64) - : :- * Sort (58) - : : +- Exchange (57) - : : +- * Project (56) - : : +- * BroadcastHashJoin Inner BuildRight (55) - : : :- * Project (53) - : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : :- * Filter (50) - : : : : +- * ColumnarToRow (49) - : : : : +- Scan parquet default.web_sales (48) - : : : +- ReusedExchange (51) - : : +- ReusedExchange (54) - : +- * Sort (63) - : +- Exchange (62) - : +- * Filter (61) - : +- * ColumnarToRow (60) - : +- Scan parquet default.web_returns (59) - +- * Sort (135) - +- Exchange (134) - +- * HashAggregate (133) - +- Exchange (132) - +- * HashAggregate (131) - +- * HashAggregate (130) - +- Exchange (129) - +- * HashAggregate (128) - +- Union (127) - :- * HashAggregate (111) - : +- Exchange (110) - : +- * HashAggregate (109) - : +- Union (108) - : :- * Project (92) - : : +- SortMergeJoin LeftOuter (91) - : : :- * Sort (88) - : : : +- Exchange (87) - : : : +- * Project (86) - : : : +- * BroadcastHashJoin Inner BuildRight (85) - : : : :- * Project (80) - : : : : +- * BroadcastHashJoin Inner BuildRight (79) - : : : : :- * Filter (77) - : : : : : +- * ColumnarToRow (76) - : : : : : +- Scan parquet default.catalog_sales (75) - : : : : +- ReusedExchange (78) - : : : +- BroadcastExchange (84) - : : : +- * Filter (83) - : : : +- * ColumnarToRow (82) - : : : +- Scan parquet default.date_dim (81) - : : +- * Sort (90) - : : +- ReusedExchange (89) - : +- * Project (107) - : +- SortMergeJoin LeftOuter (106) - : :- * Sort (103) - : : +- Exchange (102) - : : +- * Project (101) - : : +- * BroadcastHashJoin Inner BuildRight (100) - : : :- * Project (98) - : : : +- * BroadcastHashJoin Inner BuildRight (97) - : : : :- * Filter (95) - : : : : +- * ColumnarToRow (94) - : : : : +- Scan parquet default.store_sales (93) - : : : +- ReusedExchange (96) - : : +- ReusedExchange (99) - : +- * Sort (105) - : +- ReusedExchange (104) - +- * Project (126) - +- SortMergeJoin LeftOuter (125) - :- * Sort (122) - : +- Exchange (121) - : +- * Project (120) - : +- * BroadcastHashJoin Inner BuildRight (119) - : :- * Project (117) - : : +- * BroadcastHashJoin Inner BuildRight (116) - : : :- * Filter (114) - : : : +- * ColumnarToRow (113) - : : : +- Scan parquet default.web_sales (112) - : : +- ReusedExchange (115) - : +- ReusedExchange (118) - +- * Sort (124) - +- ReusedExchange (123) +TakeOrderedAndProject (130) ++- * Project (129) + +- * SortMergeJoin Inner (128) + :- * Sort (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- * HashAggregate (65) + : +- Exchange (64) + : +- * HashAggregate (63) + : +- Union (62) + : :- * Project (25) + : : +- SortMergeJoin LeftOuter (24) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.catalog_returns (19) + : :- * Project (43) + : : +- SortMergeJoin LeftOuter (42) + : : :- * Sort (36) + : : : +- Exchange (35) + : : : +- * Project (34) + : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : :- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.store_sales (26) + : : : : +- ReusedExchange (29) + : : : +- ReusedExchange (32) + : : +- * Sort (41) + : : +- Exchange (40) + : : +- * Filter (39) + : : +- * ColumnarToRow (38) + : : +- Scan parquet default.store_returns (37) + : +- * Project (61) + : +- SortMergeJoin LeftOuter (60) + : :- * Sort (54) + : : +- Exchange (53) + : : +- * Project (52) + : : +- * BroadcastHashJoin Inner BuildRight (51) + : : :- * Project (49) + : : : +- * BroadcastHashJoin Inner BuildRight (48) + : : : :- * Filter (46) + : : : : +- * ColumnarToRow (45) + : : : : +- Scan parquet default.web_sales (44) + : : : +- ReusedExchange (47) + : : +- ReusedExchange (50) + : +- * Sort (59) + : +- Exchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.web_returns (55) + +- * Sort (127) + +- Exchange (126) + +- * HashAggregate (125) + +- Exchange (124) + +- * HashAggregate (123) + +- * HashAggregate (122) + +- Exchange (121) + +- * HashAggregate (120) + +- Union (119) + :- * Project (88) + : +- SortMergeJoin LeftOuter (87) + : :- * Sort (84) + : : +- Exchange (83) + : : +- * Project (82) + : : +- * BroadcastHashJoin Inner BuildRight (81) + : : :- * Project (76) + : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet default.catalog_sales (71) + : : : +- ReusedExchange (74) + : : +- BroadcastExchange (80) + : : +- * Filter (79) + : : +- * ColumnarToRow (78) + : : +- Scan parquet default.date_dim (77) + : +- * Sort (86) + : +- ReusedExchange (85) + :- * Project (103) + : +- SortMergeJoin LeftOuter (102) + : :- * Sort (99) + : : +- Exchange (98) + : : +- * Project (97) + : : +- * BroadcastHashJoin Inner BuildRight (96) + : : :- * Project (94) + : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : :- * Filter (91) + : : : : +- * ColumnarToRow (90) + : : : : +- Scan parquet default.store_sales (89) + : : : +- ReusedExchange (92) + : : +- ReusedExchange (95) + : +- * Sort (101) + : +- ReusedExchange (100) + +- * Project (118) + +- SortMergeJoin LeftOuter (117) + :- * Sort (114) + : +- Exchange (113) + : +- * Project (112) + : +- * BroadcastHashJoin Inner BuildRight (111) + : :- * Project (109) + : : +- * BroadcastHashJoin Inner BuildRight (108) + : : :- * Filter (106) + : : : +- * ColumnarToRow (105) + : : : +- Scan parquet default.web_sales (104) + : : +- ReusedExchange (107) + : +- ReusedExchange (110) + +- * Sort (116) + +- ReusedExchange (115) (1) Scan parquet default.catalog_sales @@ -213,7 +205,7 @@ Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, (17) Exchange Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16] (18) Sort [codegen id : 4] Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] @@ -235,7 +227,7 @@ Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17)) (22) Exchange Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21] +Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#21] (23) Sort [codegen id : 6] Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] @@ -290,7 +282,7 @@ Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity (35) Exchange Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 11] Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] @@ -312,7 +304,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (40) Exchange Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34] (41) Sort [codegen id : 13] Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] @@ -327,426 +319,386 @@ Join condition: None Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36] Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(44) Union - -(45) HashAggregate [codegen id : 15] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] - -(46) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37] - -(47) HashAggregate [codegen id : 16] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] - -(48) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(44) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(49) ColumnarToRow [codegen id : 19] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(45) ColumnarToRow [codegen id : 17] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] -(50) Filter [codegen id : 19] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) +(46) Filter [codegen id : 17] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] +Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37)) -(51) ReusedExchange [Reuses operator id: 8] +(47) ReusedExchange [Reuses operator id: 8] Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(52) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_item_sk#39] +(48) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#38] Right keys [1]: [i_item_sk#6] Join condition: None -(53) Project [codegen id : 19] -Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +(49) Project [codegen id : 17] +Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(54) ReusedExchange [Reuses operator id: 14] +(50) ReusedExchange [Reuses operator id: 14] Output [2]: [d_date_sk#13, d_year#14] -(55) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#38] +(51) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#37] Right keys [1]: [d_date_sk#13] Join condition: None -(56) Project [codegen id : 19] -Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] +(52) Project [codegen id : 17] +Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] -(57) Exchange -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43] +(53) Exchange +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#42] -(58) Sort [codegen id : 20] -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 +(54) Sort [codegen id : 18] +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0 -(59) Scan parquet default.web_returns -Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(55) Scan parquet default.web_returns +Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct -(60) ColumnarToRow [codegen id : 21] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(56) ColumnarToRow [codegen id : 19] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(61) Filter [codegen id : 21] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Condition : (isnotnull(wr_order_number#45) AND isnotnull(wr_item_sk#44)) +(57) Filter [codegen id : 19] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Condition : (isnotnull(wr_order_number#44) AND isnotnull(wr_item_sk#43)) -(62) Exchange -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48] +(58) Exchange +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: hashpartitioning(wr_order_number#44, wr_item_sk#43, 5), ENSURE_REQUIREMENTS, [id=#47] -(63) Sort [codegen id : 22] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 +(59) Sort [codegen id : 20] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0 -(64) SortMergeJoin -Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] -Right keys [2]: [wr_order_number#45, wr_item_sk#44] +(60) SortMergeJoin +Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)] +Right keys [2]: [wr_order_number#44, wr_item_sk#43] Join condition: None -(65) Project [codegen id : 23] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50] -Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(61) Project [codegen id : 21] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#49] +Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(66) Union +(62) Union -(67) HashAggregate [codegen id : 24] +(63) HashAggregate [codegen id : 22] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -(68) Exchange +(64) Exchange Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#50] -(69) HashAggregate [codegen id : 25] +(65) HashAggregate [codegen id : 23] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -(70) HashAggregate [codegen id : 25] +(66) HashAggregate [codegen id : 23] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum#52, sum#53] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] -(71) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56] +(67) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#55] -(72) HashAggregate [codegen id : 26] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +(68) HashAggregate [codegen id : 24] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#56, sum(UnscaledValue(sales_amt#23))#57] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#56 AS sales_cnt#58, MakeDecimal(sum(UnscaledValue(sales_amt#23))#57,18,2) AS sales_amt#59] -(73) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] -Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61] +(69) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#60] -(74) Sort [codegen id : 27] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +(70) Sort [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59] Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 -(75) Scan parquet default.catalog_sales +(71) Scan parquet default.catalog_sales Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(76) ColumnarToRow [codegen id : 30] +(72) ColumnarToRow [codegen id : 28] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -(77) Filter [codegen id : 30] +(73) Filter [codegen id : 28] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) -(78) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(74) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(79) BroadcastHashJoin [codegen id : 30] +(75) BroadcastHashJoin [codegen id : 28] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#62] +Right keys [1]: [i_item_sk#61] Join condition: None -(80) Project [codegen id : 30] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(76) Project [codegen id : 28] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(81) Scan parquet default.date_dim -Output [2]: [d_date_sk#67, d_year#68] +(77) Scan parquet default.date_dim +Output [2]: [d_date_sk#66, d_year#67] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(82) ColumnarToRow [codegen id : 29] -Input [2]: [d_date_sk#67, d_year#68] +(78) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#66, d_year#67] -(83) Filter [codegen id : 29] -Input [2]: [d_date_sk#67, d_year#68] -Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67)) +(79) Filter [codegen id : 27] +Input [2]: [d_date_sk#66, d_year#67] +Condition : ((isnotnull(d_year#67) AND (d_year#67 = 2001)) AND isnotnull(d_date_sk#66)) -(84) BroadcastExchange -Input [2]: [d_date_sk#67, d_year#68] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] +(80) BroadcastExchange +Input [2]: [d_date_sk#66, d_year#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] -(85) BroadcastHashJoin [codegen id : 30] +(81) BroadcastHashJoin [codegen id : 28] Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#67] +Right keys [1]: [d_date_sk#66] Join condition: None -(86) Project [codegen id : 30] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(82) Project [codegen id : 28] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(87) Exchange -Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70] +(83) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#69] -(88) Sort [codegen id : 31] -Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +(84) Sort [codegen id : 29] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 -(89) ReusedExchange [Reuses operator id: 22] +(85) ReusedExchange [Reuses operator id: 22] Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -(90) Sort [codegen id : 33] +(86) Sort [codegen id : 31] Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 -(91) SortMergeJoin +(87) SortMergeJoin Left keys [2]: [cs_order_number#3, cs_item_sk#2] Right keys [2]: [cr_order_number#18, cr_item_sk#17] Join condition: None -(92) Project [codegen id : 34] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +(88) Project [codegen id : 32] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -(93) Scan parquet default.store_sales +(89) Scan parquet default.store_sales Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(94) ColumnarToRow [codegen id : 37] +(90) ColumnarToRow [codegen id : 35] Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] -(95) Filter [codegen id : 37] +(91) Filter [codegen id : 35] Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) -(96) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(92) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(97) BroadcastHashJoin [codegen id : 37] +(93) BroadcastHashJoin [codegen id : 35] Left keys [1]: [ss_item_sk#25] -Right keys [1]: [i_item_sk#62] +Right keys [1]: [i_item_sk#61] Join condition: None -(98) Project [codegen id : 37] -Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(94) Project [codegen id : 35] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(99) ReusedExchange [Reuses operator id: 84] -Output [2]: [d_date_sk#67, d_year#68] +(95) ReusedExchange [Reuses operator id: 80] +Output [2]: [d_date_sk#66, d_year#67] -(100) BroadcastHashJoin [codegen id : 37] +(96) BroadcastHashJoin [codegen id : 35] Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#67] +Right keys [1]: [d_date_sk#66] Join condition: None -(101) Project [codegen id : 37] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(97) Project [codegen id : 35] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(102) Exchange -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71] +(98) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#70] -(103) Sort [codegen id : 38] -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +(99) Sort [codegen id : 36] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 -(104) ReusedExchange [Reuses operator id: 40] +(100) ReusedExchange [Reuses operator id: 40] Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(105) Sort [codegen id : 40] +(101) Sort [codegen id : 38] Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 -(106) SortMergeJoin +(102) SortMergeJoin Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] Join condition: None -(107) Project [codegen id : 41] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73] -Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] - -(108) Union - -(109) HashAggregate [codegen id : 42] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] - -(110) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74] - -(111) HashAggregate [codegen id : 43] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(103) Project [codegen id : 39] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#71, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#72] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(112) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(104) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(113) ColumnarToRow [codegen id : 46] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(105) ColumnarToRow [codegen id : 42] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] -(114) Filter [codegen id : 46] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) +(106) Filter [codegen id : 42] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] +Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37)) -(115) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(107) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(116) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [ws_item_sk#39] -Right keys [1]: [i_item_sk#62] +(108) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#38] +Right keys [1]: [i_item_sk#61] Join condition: None -(117) Project [codegen id : 46] -Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(109) Project [codegen id : 42] +Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(118) ReusedExchange [Reuses operator id: 84] -Output [2]: [d_date_sk#67, d_year#68] +(110) ReusedExchange [Reuses operator id: 80] +Output [2]: [d_date_sk#66, d_year#67] -(119) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [ws_sold_date_sk#38] -Right keys [1]: [d_date_sk#67] +(111) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#66] Join condition: None -(120) Project [codegen id : 46] -Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(112) Project [codegen id : 42] +Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(121) Exchange -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75] +(113) Exchange +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#73] -(122) Sort [codegen id : 47] -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 +(114) Sort [codegen id : 43] +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0 -(123) ReusedExchange [Reuses operator id: 62] -Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(115) ReusedExchange [Reuses operator id: 58] +Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(124) Sort [codegen id : 49] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 +(116) Sort [codegen id : 45] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0 -(125) SortMergeJoin -Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] -Right keys [2]: [wr_order_number#45, wr_item_sk#44] +(117) SortMergeJoin +Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)] +Right keys [2]: [wr_order_number#44, wr_item_sk#43] Join condition: None -(126) Project [codegen id : 50] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77] -Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(118) Project [codegen id : 46] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#74, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#75] +Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(127) Union +(119) Union -(128) HashAggregate [codegen id : 51] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(120) HashAggregate [codegen id : 47] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] -(129) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78] +(121) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#76] -(130) HashAggregate [codegen id : 52] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(122) HashAggregate [codegen id : 48] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] -(131) HashAggregate [codegen id : 52] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(123) HashAggregate [codegen id : 48] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum#79, sum#80] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Aggregate Attributes [2]: [sum#77, sum#78] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] -(132) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83] +(124) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] +Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#81] -(133) HashAggregate [codegen id : 53] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] -Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(125) HashAggregate [codegen id : 49] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] +Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#82, sum(UnscaledValue(sales_amt#23))#83] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum(cast(sales_cnt#22 as bigint))#82 AS sales_cnt#84, MakeDecimal(sum(UnscaledValue(sales_amt#23))#83,18,2) AS sales_amt#85] -(134) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] -Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88] +(126) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] +Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#86] -(135) Sort [codegen id : 54] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] -Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0 +(127) Sort [codegen id : 50] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] +Arguments: [i_brand_id#62 ASC NULLS FIRST, i_class_id#63 ASC NULLS FIRST, i_category_id#64 ASC NULLS FIRST, i_manufact_id#65 ASC NULLS FIRST], false, 0 -(136) SortMergeJoin [codegen id : 55] +(128) SortMergeJoin [codegen id : 51] Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#58 as decimal(17,2))) / promote_precision(cast(sales_cnt#84 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) -(137) Project [codegen id : 55] -Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94] -Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +(129) Project [codegen id : 51] +Output [10]: [d_year#67 AS prev_year#87, d_year#14 AS year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#84 AS prev_yr_cnt#89, sales_cnt#58 AS curr_yr_cnt#90, (sales_cnt#58 - sales_cnt#84) AS sales_cnt_diff#91, CheckOverflow((promote_precision(cast(sales_amt#59 as decimal(19,2))) - promote_precision(cast(sales_amt#85 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#92] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59, d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] -(138) TakeOrderedAndProject -Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] -Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] +(130) TakeOrderedAndProject +Input [10]: [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92] +Arguments: 100, [sales_cnt_diff#91 ASC NULLS FIRST], [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt index d8d1a3976559d..bac8f252c2983 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt @@ -1,113 +1,105 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] - WholeStageCodegen (55) + WholeStageCodegen (51) Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] InputAdapter - WholeStageCodegen (27) + WholeStageCodegen (25) Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 - WholeStageCodegen (26) + WholeStageCodegen (24) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 - WholeStageCodegen (25) + WholeStageCodegen (23) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 - WholeStageCodegen (24) + WholeStageCodegen (22) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (16) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #4 - WholeStageCodegen (15) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] InputAdapter - Union - WholeStageCodegen (7) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - InputAdapter - SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - WholeStageCodegen (4) - Sort [cs_order_number,cs_item_sk] - InputAdapter - Exchange [cs_order_number,cs_item_sk] #5 - WholeStageCodegen (3) - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (6) - Sort [cr_order_number,cr_item_sk] - InputAdapter - Exchange [cr_order_number,cr_item_sk] #8 - WholeStageCodegen (5) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - WholeStageCodegen (14) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - InputAdapter - SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - WholeStageCodegen (11) - Sort [ss_ticket_number,ss_item_sk] - InputAdapter - Exchange [ss_ticket_number,ss_item_sk] #9 - WholeStageCodegen (10) - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - ReusedExchange [d_date_sk,d_year] #7 - WholeStageCodegen (13) - Sort [sr_ticket_number,sr_item_sk] - InputAdapter - Exchange [sr_ticket_number,sr_item_sk] #10 - WholeStageCodegen (12) - Filter [sr_ticket_number,sr_item_sk] + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] ColumnarToRow InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - WholeStageCodegen (23) + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + InputAdapter + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + WholeStageCodegen (21) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] InputAdapter SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - WholeStageCodegen (20) + WholeStageCodegen (18) Sort [ws_order_number,ws_item_sk] InputAdapter - Exchange [ws_order_number,ws_item_sk] #11 - WholeStageCodegen (19) + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] @@ -117,108 +109,100 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 InputAdapter - ReusedExchange [d_date_sk,d_year] #7 - WholeStageCodegen (22) + ReusedExchange [d_date_sk,d_year] #6 + WholeStageCodegen (20) Sort [wr_order_number,wr_item_sk] InputAdapter - Exchange [wr_order_number,wr_item_sk] #12 - WholeStageCodegen (21) + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) Filter [wr_order_number,wr_item_sk] ColumnarToRow InputAdapter Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] InputAdapter - WholeStageCodegen (54) + WholeStageCodegen (50) Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 - WholeStageCodegen (53) + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #14 - WholeStageCodegen (52) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #15 - WholeStageCodegen (51) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (43) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #16 - WholeStageCodegen (42) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] InputAdapter - Union - WholeStageCodegen (34) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - InputAdapter - SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - WholeStageCodegen (31) - Sort [cs_order_number,cs_item_sk] - InputAdapter - Exchange [cs_order_number,cs_item_sk] #17 - WholeStageCodegen (30) - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - BroadcastExchange #18 - WholeStageCodegen (29) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (33) - Sort [cr_order_number,cr_item_sk] - InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #8 - WholeStageCodegen (41) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - InputAdapter - SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - WholeStageCodegen (38) - Sort [ss_ticket_number,ss_item_sk] - InputAdapter - Exchange [ss_ticket_number,ss_item_sk] #19 - WholeStageCodegen (37) - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - ReusedExchange [d_date_sk,d_year] #18 - WholeStageCodegen (40) - Sort [sr_ticket_number,sr_item_sk] - InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #10 - WholeStageCodegen (50) + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + InputAdapter + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] InputAdapter SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - WholeStageCodegen (47) + WholeStageCodegen (43) Sort [ws_order_number,ws_item_sk] InputAdapter - Exchange [ws_order_number,ws_item_sk] #20 - WholeStageCodegen (46) + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] @@ -228,10 +212,10 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 InputAdapter - ReusedExchange [d_date_sk,d_year] #18 - WholeStageCodegen (49) + ReusedExchange [d_date_sk,d_year] #16 + WholeStageCodegen (45) Sort [wr_order_number,wr_item_sk] InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #12 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt index 292a44930ed3d..3d52a795bb44e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -1,121 +1,113 @@ == Physical Plan == -TakeOrderedAndProject (117) -+- * Project (116) - +- * BroadcastHashJoin Inner BuildRight (115) - :- * HashAggregate (63) - : +- Exchange (62) - : +- * HashAggregate (61) - : +- * HashAggregate (60) - : +- Exchange (59) - : +- * HashAggregate (58) - : +- Union (57) - : :- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- Union (38) - : : :- * Project (22) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.item (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.date_dim (11) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_returns (17) - : : +- * Project (37) - : : +- * BroadcastHashJoin LeftOuter BuildRight (36) - : : :- * Project (31) - : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : :- * Project (28) - : : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : : :- * Filter (25) - : : : : : +- * ColumnarToRow (24) - : : : : : +- Scan parquet default.store_sales (23) - : : : : +- ReusedExchange (26) - : : : +- ReusedExchange (29) - : : +- BroadcastExchange (35) - : : +- * Filter (34) - : : +- * ColumnarToRow (33) - : : +- Scan parquet default.store_returns (32) - : +- * Project (56) - : +- * BroadcastHashJoin LeftOuter BuildRight (55) - : :- * Project (50) - : : +- * BroadcastHashJoin Inner BuildRight (49) - : : :- * Project (47) - : : : +- * BroadcastHashJoin Inner BuildRight (46) - : : : :- * Filter (44) - : : : : +- * ColumnarToRow (43) - : : : : +- Scan parquet default.web_sales (42) - : : : +- ReusedExchange (45) - : : +- ReusedExchange (48) - : +- BroadcastExchange (54) - : +- * Filter (53) - : +- * ColumnarToRow (52) - : +- Scan parquet default.web_returns (51) - +- BroadcastExchange (114) - +- * HashAggregate (113) - +- Exchange (112) - +- * HashAggregate (111) - +- * HashAggregate (110) - +- Exchange (109) - +- * HashAggregate (108) - +- Union (107) - :- * HashAggregate (94) - : +- Exchange (93) - : +- * HashAggregate (92) - : +- Union (91) - : :- * Project (78) - : : +- * BroadcastHashJoin LeftOuter BuildRight (77) - : : :- * Project (75) - : : : +- * BroadcastHashJoin Inner BuildRight (74) - : : : :- * Project (69) - : : : : +- * BroadcastHashJoin Inner BuildRight (68) - : : : : :- * Filter (66) - : : : : : +- * ColumnarToRow (65) - : : : : : +- Scan parquet default.catalog_sales (64) - : : : : +- ReusedExchange (67) - : : : +- BroadcastExchange (73) - : : : +- * Filter (72) - : : : +- * ColumnarToRow (71) - : : : +- Scan parquet default.date_dim (70) - : : +- ReusedExchange (76) - : +- * Project (90) - : +- * BroadcastHashJoin LeftOuter BuildRight (89) - : :- * Project (87) - : : +- * BroadcastHashJoin Inner BuildRight (86) - : : :- * Project (84) - : : : +- * BroadcastHashJoin Inner BuildRight (83) - : : : :- * Filter (81) - : : : : +- * ColumnarToRow (80) - : : : : +- Scan parquet default.store_sales (79) - : : : +- ReusedExchange (82) - : : +- ReusedExchange (85) - : +- ReusedExchange (88) - +- * Project (106) - +- * BroadcastHashJoin LeftOuter BuildRight (105) - :- * Project (103) - : +- * BroadcastHashJoin Inner BuildRight (102) - : :- * Project (100) - : : +- * BroadcastHashJoin Inner BuildRight (99) - : : :- * Filter (97) - : : : +- * ColumnarToRow (96) - : : : +- Scan parquet default.web_sales (95) - : : +- ReusedExchange (98) - : +- ReusedExchange (101) - +- ReusedExchange (104) +TakeOrderedAndProject (109) ++- * Project (108) + +- * BroadcastHashJoin Inner BuildRight (107) + :- * HashAggregate (59) + : +- Exchange (58) + : +- * HashAggregate (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- Union (53) + : :- * Project (22) + : : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.catalog_returns (17) + : :- * Project (37) + : : +- * BroadcastHashJoin LeftOuter BuildRight (36) + : : :- * Project (31) + : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : :- * Project (28) + : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : :- * Filter (25) + : : : : : +- * ColumnarToRow (24) + : : : : : +- Scan parquet default.store_sales (23) + : : : : +- ReusedExchange (26) + : : : +- ReusedExchange (29) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.store_returns (32) + : +- * Project (52) + : +- * BroadcastHashJoin LeftOuter BuildRight (51) + : :- * Project (46) + : : +- * BroadcastHashJoin Inner BuildRight (45) + : : :- * Project (43) + : : : +- * BroadcastHashJoin Inner BuildRight (42) + : : : :- * Filter (40) + : : : : +- * ColumnarToRow (39) + : : : : +- Scan parquet default.web_sales (38) + : : : +- ReusedExchange (41) + : : +- ReusedExchange (44) + : +- BroadcastExchange (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.web_returns (47) + +- BroadcastExchange (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * HashAggregate (102) + +- Exchange (101) + +- * HashAggregate (100) + +- Union (99) + :- * Project (74) + : +- * BroadcastHashJoin LeftOuter BuildRight (73) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- * Project (65) + : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : :- * Filter (62) + : : : : +- * ColumnarToRow (61) + : : : : +- Scan parquet default.catalog_sales (60) + : : : +- ReusedExchange (63) + : : +- BroadcastExchange (69) + : : +- * Filter (68) + : : +- * ColumnarToRow (67) + : : +- Scan parquet default.date_dim (66) + : +- ReusedExchange (72) + :- * Project (86) + : +- * BroadcastHashJoin LeftOuter BuildRight (85) + : :- * Project (83) + : : +- * BroadcastHashJoin Inner BuildRight (82) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet default.store_sales (75) + : : : +- ReusedExchange (78) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + +- * Project (98) + +- * BroadcastHashJoin LeftOuter BuildRight (97) + :- * Project (95) + : +- * BroadcastHashJoin Inner BuildRight (94) + : :- * Project (92) + : : +- * BroadcastHashJoin Inner BuildRight (91) + : : :- * Filter (89) + : : : +- * ColumnarToRow (88) + : : : +- Scan parquet default.web_sales (87) + : : +- ReusedExchange (90) + : +- ReusedExchange (93) + +- ReusedExchange (96) (1) Scan parquet default.catalog_sales @@ -282,366 +274,326 @@ Join condition: None Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(38) Union - -(39) HashAggregate [codegen id : 9] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(40) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] - -(41) HashAggregate [codegen id : 10] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(42) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(38) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(43) ColumnarToRow [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(39) ColumnarToRow [codegen id : 12] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] -(44) Filter [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) +(40) Filter [codegen id : 12] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] +Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35)) -(45) ReusedExchange [Reuses operator id: 8] +(41) ReusedExchange [Reuses operator id: 8] Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(46) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_item_sk#37] +(42) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_item_sk#36] Right keys [1]: [i_item_sk#6] Join condition: None -(47) Project [codegen id : 14] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +(43) Project [codegen id : 12] +Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(48) ReusedExchange [Reuses operator id: 14] +(44) ReusedExchange [Reuses operator id: 14] Output [2]: [d_date_sk#13, d_year#14] -(49) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#36] +(45) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_sold_date_sk#35] Right keys [1]: [d_date_sk#13] Join condition: None -(50) Project [codegen id : 14] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] +(46) Project [codegen id : 12] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] -(51) Scan parquet default.web_returns -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(47) Scan parquet default.web_returns +Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct -(52) ColumnarToRow [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(48) ColumnarToRow [codegen id : 11] +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(53) Filter [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) +(49) Filter [codegen id : 11] +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] +Condition : (isnotnull(wr_order_number#41) AND isnotnull(wr_item_sk#40)) -(54) BroadcastExchange -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] +(50) BroadcastExchange +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#44] -(55) BroadcastHashJoin [codegen id : 14] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] +(51) BroadcastHashJoin [codegen id : 12] +Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)] +Right keys [2]: [wr_order_number#41, wr_item_sk#40] Join condition: None -(56) Project [codegen id : 14] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(52) Project [codegen id : 12] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46] +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(57) Union +(53) Union -(58) HashAggregate [codegen id : 15] +(54) HashAggregate [codegen id : 13] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -(59) Exchange +(55) Exchange Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#47] -(60) HashAggregate [codegen id : 16] +(56) HashAggregate [codegen id : 14] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -(61) HashAggregate [codegen id : 16] +(57) HashAggregate [codegen id : 14] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#49, sum#50] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Aggregate Attributes [2]: [sum#48, sum#49] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] -(62) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] +(58) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#52] -(63) HashAggregate [codegen id : 34] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +(59) HashAggregate [codegen id : 30] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#53, sum(UnscaledValue(sales_amt#22))#54] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#53 AS sales_cnt#55, MakeDecimal(sum(UnscaledValue(sales_amt#22))#54,18,2) AS sales_amt#56] -(64) Scan parquet default.catalog_sales +(60) Scan parquet default.catalog_sales Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(65) ColumnarToRow [codegen id : 20] +(61) ColumnarToRow [codegen id : 18] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -(66) Filter [codegen id : 20] +(62) Filter [codegen id : 18] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) -(67) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(63) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(68) BroadcastHashJoin [codegen id : 20] +(64) BroadcastHashJoin [codegen id : 18] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#58] +Right keys [1]: [i_item_sk#57] Join condition: None -(69) Project [codegen id : 20] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(65) Project [codegen id : 18] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(70) Scan parquet default.date_dim -Output [2]: [d_date_sk#63, d_year#64] +(66) Scan parquet default.date_dim +Output [2]: [d_date_sk#62, d_year#63] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(71) ColumnarToRow [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] +(67) ColumnarToRow [codegen id : 16] +Input [2]: [d_date_sk#62, d_year#63] -(72) Filter [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] -Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) +(68) Filter [codegen id : 16] +Input [2]: [d_date_sk#62, d_year#63] +Condition : ((isnotnull(d_year#63) AND (d_year#63 = 2001)) AND isnotnull(d_date_sk#62)) -(73) BroadcastExchange -Input [2]: [d_date_sk#63, d_year#64] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] +(69) BroadcastExchange +Input [2]: [d_date_sk#62, d_year#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] -(74) BroadcastHashJoin [codegen id : 20] +(70) BroadcastHashJoin [codegen id : 18] Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#63] +Right keys [1]: [d_date_sk#62] Join condition: None -(75) Project [codegen id : 20] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(71) Project [codegen id : 18] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(76) ReusedExchange [Reuses operator id: 20] +(72) ReusedExchange [Reuses operator id: 20] Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -(77) BroadcastHashJoin [codegen id : 20] +(73) BroadcastHashJoin [codegen id : 18] Left keys [2]: [cs_order_number#3, cs_item_sk#2] Right keys [2]: [cr_order_number#17, cr_item_sk#16] Join condition: None -(78) Project [codegen id : 20] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +(74) Project [codegen id : 18] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -(79) Scan parquet default.store_sales +(75) Scan parquet default.store_sales Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(80) ColumnarToRow [codegen id : 24] +(76) ColumnarToRow [codegen id : 22] Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -(81) Filter [codegen id : 24] +(77) Filter [codegen id : 22] Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) -(82) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(83) BroadcastHashJoin [codegen id : 24] +(79) BroadcastHashJoin [codegen id : 22] Left keys [1]: [ss_item_sk#24] -Right keys [1]: [i_item_sk#58] +Right keys [1]: [i_item_sk#57] Join condition: None -(84) Project [codegen id : 24] -Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(80) Project [codegen id : 22] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(85) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] +(81) ReusedExchange [Reuses operator id: 69] +Output [2]: [d_date_sk#62, d_year#63] -(86) BroadcastHashJoin [codegen id : 24] +(82) BroadcastHashJoin [codegen id : 22] Left keys [1]: [ss_sold_date_sk#23] -Right keys [1]: [d_date_sk#63] +Right keys [1]: [d_date_sk#62] Join condition: None -(87) Project [codegen id : 24] -Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(83) Project [codegen id : 22] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(88) ReusedExchange [Reuses operator id: 35] +(84) ReusedExchange [Reuses operator id: 35] Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(89) BroadcastHashJoin [codegen id : 24] +(85) BroadcastHashJoin [codegen id : 22] Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] Join condition: None -(90) Project [codegen id : 24] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] -Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(91) Union - -(92) HashAggregate [codegen id : 25] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(93) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] - -(94) HashAggregate [codegen id : 26] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(86) Project [codegen id : 22] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(95) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(87) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(96) ColumnarToRow [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(88) ColumnarToRow [codegen id : 26] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] -(97) Filter [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) +(89) Filter [codegen id : 26] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] +Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35)) -(98) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(90) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(99) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_item_sk#37] -Right keys [1]: [i_item_sk#58] +(91) BroadcastHashJoin [codegen id : 26] +Left keys [1]: [ws_item_sk#36] +Right keys [1]: [i_item_sk#57] Join condition: None -(100) Project [codegen id : 30] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(92) Project [codegen id : 26] +Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(101) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] +(93) ReusedExchange [Reuses operator id: 69] +Output [2]: [d_date_sk#62, d_year#63] -(102) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#63] +(94) BroadcastHashJoin [codegen id : 26] +Left keys [1]: [ws_sold_date_sk#35] +Right keys [1]: [d_date_sk#62] Join condition: None -(103) Project [codegen id : 30] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(95) Project [codegen id : 26] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(104) ReusedExchange [Reuses operator id: 54] -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(96) ReusedExchange [Reuses operator id: 50] +Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(105) BroadcastHashJoin [codegen id : 30] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] +(97) BroadcastHashJoin [codegen id : 26] +Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)] +Right keys [2]: [wr_order_number#41, wr_item_sk#40] Join condition: None -(106) Project [codegen id : 30] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(98) Project [codegen id : 26] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#67, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#68] +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(107) Union +(99) Union -(108) HashAggregate [codegen id : 31] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(100) HashAggregate [codegen id : 27] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] -(109) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] +(101) Exchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#69] -(110) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(102) HashAggregate [codegen id : 28] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] -(111) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(103) HashAggregate [codegen id : 28] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#72, sum#73] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Aggregate Attributes [2]: [sum#70, sum#71] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] -(112) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] +(104) Exchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] +Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, 5), ENSURE_REQUIREMENTS, [id=#74] -(113) HashAggregate [codegen id : 33] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(105) HashAggregate [codegen id : 29] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] +Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#75, sum(UnscaledValue(sales_amt#22))#76] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum(cast(sales_cnt#21 as bigint))#75 AS sales_cnt#77, MakeDecimal(sum(UnscaledValue(sales_amt#22))#76,18,2) AS sales_amt#78] -(114) BroadcastExchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] +(106) BroadcastExchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#79] -(115) BroadcastHashJoin [codegen id : 34] +(107) BroadcastHashJoin [codegen id : 30] Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#55 as decimal(17,2))) / promote_precision(cast(sales_cnt#77 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) -(116) Project [codegen id : 34] -Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] -Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] +(108) Project [codegen id : 30] +Output [10]: [d_year#63 AS prev_year#80, d_year#14 AS year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#77 AS prev_yr_cnt#82, sales_cnt#55 AS curr_yr_cnt#83, (sales_cnt#55 - sales_cnt#77) AS sales_cnt_diff#84, CheckOverflow((promote_precision(cast(sales_amt#56 as decimal(19,2))) - promote_precision(cast(sales_amt#78 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#85] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#55, sales_amt#56, d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78] -(117) TakeOrderedAndProject -Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] -Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] +(109) TakeOrderedAndProject +Input [10]: [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85] +Arguments: 100, [sales_cnt_diff#84 ASC NULLS FIRST], [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt index 298a06b87762f..0eeca93ed7d08 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt @@ -1,83 +1,75 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] - WholeStageCodegen (34) + WholeStageCodegen (30) Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 - WholeStageCodegen (16) + WholeStageCodegen (14) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #2 - WholeStageCodegen (15) + WholeStageCodegen (13) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (10) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 - WholeStageCodegen (9) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Union - WholeStageCodegen (4) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - WholeStageCodegen (8) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + WholeStageCodegen (4) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk,d_year] #5 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - WholeStageCodegen (14) + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + WholeStageCodegen (8) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + WholeStageCodegen (12) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] @@ -89,79 +81,71 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 InputAdapter - ReusedExchange [d_date_sk,d_year] #5 + ReusedExchange [d_date_sk,d_year] #4 InputAdapter - BroadcastExchange #8 - WholeStageCodegen (13) + BroadcastExchange #7 + WholeStageCodegen (11) Filter [wr_order_number,wr_item_sk] ColumnarToRow InputAdapter Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] InputAdapter - BroadcastExchange #9 - WholeStageCodegen (33) + BroadcastExchange #8 + WholeStageCodegen (29) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #10 - WholeStageCodegen (32) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9 + WholeStageCodegen (28) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #11 - WholeStageCodegen (31) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10 + WholeStageCodegen (27) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union + WholeStageCodegen (18) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (16) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #5 + WholeStageCodegen (22) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #6 WholeStageCodegen (26) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #12 - WholeStageCodegen (25) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Union - WholeStageCodegen (20) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (18) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6 - WholeStageCodegen (24) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 - InputAdapter - ReusedExchange [d_date_sk,d_year] #13 - InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 - WholeStageCodegen (30) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] @@ -173,8 +157,8 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 InputAdapter - ReusedExchange [d_date_sk,d_year] #13 + ReusedExchange [d_date_sk,d_year] #11 InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt index 38292528b42fc..7be9447d16b45 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt @@ -1,226 +1,214 @@ == Physical Plan == -TakeOrderedAndProject (222) -+- * HashAggregate (221) - +- Exchange (220) - +- * HashAggregate (219) - +- Union (218) - :- * HashAggregate (198) - : +- Exchange (197) - : +- * HashAggregate (196) - : +- Union (195) - : :- * HashAggregate (175) - : : +- Exchange (174) - : : +- * HashAggregate (173) - : : +- Union (172) - : : :- * HashAggregate (152) - : : : +- Exchange (151) - : : : +- * HashAggregate (150) - : : : +- Union (149) - : : : :- * HashAggregate (129) - : : : : +- Exchange (128) - : : : : +- * HashAggregate (127) - : : : : +- Union (126) - : : : : :- * Project (87) - : : : : : +- * Filter (86) - : : : : : +- * HashAggregate (85) - : : : : : +- Exchange (84) - : : : : : +- * HashAggregate (83) - : : : : : +- * Project (82) - : : : : : +- * BroadcastHashJoin Inner BuildRight (81) - : : : : : :- * Project (71) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (70) - : : : : : : :- SortMergeJoin LeftSemi (64) - : : : : : : : :- * Sort (5) - : : : : : : : : +- Exchange (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : : +- * Sort (63) - : : : : : : : +- Exchange (62) - : : : : : : : +- * Project (61) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) - : : : : : : : :- * Filter (8) - : : : : : : : : +- * ColumnarToRow (7) - : : : : : : : : +- Scan parquet default.item (6) - : : : : : : : +- BroadcastExchange (59) - : : : : : : : +- * HashAggregate (58) - : : : : : : : +- * HashAggregate (57) - : : : : : : : +- * HashAggregate (56) - : : : : : : : +- Exchange (55) - : : : : : : : +- * HashAggregate (54) - : : : : : : : +- SortMergeJoin LeftSemi (53) - : : : : : : : :- SortMergeJoin LeftSemi (41) - : : : : : : : : :- * Sort (26) - : : : : : : : : : +- Exchange (25) - : : : : : : : : : +- * Project (24) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : : : : : : : :- * Project (18) - : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) - : : : : : : : : : : :- * Filter (11) - : : : : : : : : : : : +- * ColumnarToRow (10) - : : : : : : : : : : : +- Scan parquet default.store_sales (9) - : : : : : : : : : : +- BroadcastExchange (16) - : : : : : : : : : : +- * Project (15) - : : : : : : : : : : +- * Filter (14) - : : : : : : : : : : +- * ColumnarToRow (13) - : : : : : : : : : : +- Scan parquet default.date_dim (12) - : : : : : : : : : +- BroadcastExchange (22) - : : : : : : : : : +- * Filter (21) - : : : : : : : : : +- * ColumnarToRow (20) - : : : : : : : : : +- Scan parquet default.item (19) - : : : : : : : : +- * Sort (40) - : : : : : : : : +- Exchange (39) - : : : : : : : : +- * Project (38) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : : : : : : :- * Project (32) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) - : : : : : : : : : :- * Filter (29) - : : : : : : : : : : +- * ColumnarToRow (28) - : : : : : : : : : : +- Scan parquet default.catalog_sales (27) - : : : : : : : : : +- ReusedExchange (30) - : : : : : : : : +- BroadcastExchange (36) - : : : : : : : : +- * Filter (35) - : : : : : : : : +- * ColumnarToRow (34) - : : : : : : : : +- Scan parquet default.item (33) - : : : : : : : +- * Sort (52) - : : : : : : : +- Exchange (51) - : : : : : : : +- * Project (50) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) - : : : : : : : :- * Project (47) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (46) - : : : : : : : : :- * Filter (44) - : : : : : : : : : +- * ColumnarToRow (43) - : : : : : : : : : +- Scan parquet default.web_sales (42) - : : : : : : : : +- ReusedExchange (45) - : : : : : : : +- ReusedExchange (48) - : : : : : : +- BroadcastExchange (69) - : : : : : : +- * Project (68) - : : : : : : +- * Filter (67) - : : : : : : +- * ColumnarToRow (66) - : : : : : : +- Scan parquet default.date_dim (65) - : : : : : +- BroadcastExchange (80) - : : : : : +- SortMergeJoin LeftSemi (79) - : : : : : :- * Sort (76) - : : : : : : +- Exchange (75) - : : : : : : +- * Filter (74) - : : : : : : +- * ColumnarToRow (73) - : : : : : : +- Scan parquet default.item (72) - : : : : : +- * Sort (78) - : : : : : +- ReusedExchange (77) - : : : : :- * Project (106) - : : : : : +- * Filter (105) - : : : : : +- * HashAggregate (104) - : : : : : +- Exchange (103) - : : : : : +- * HashAggregate (102) - : : : : : +- * Project (101) - : : : : : +- * BroadcastHashJoin Inner BuildRight (100) - : : : : : :- * Project (98) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (97) - : : : : : : :- SortMergeJoin LeftSemi (95) - : : : : : : : :- * Sort (92) - : : : : : : : : +- Exchange (91) - : : : : : : : : +- * Filter (90) - : : : : : : : : +- * ColumnarToRow (89) - : : : : : : : : +- Scan parquet default.catalog_sales (88) - : : : : : : : +- * Sort (94) - : : : : : : : +- ReusedExchange (93) - : : : : : : +- ReusedExchange (96) - : : : : : +- ReusedExchange (99) - : : : : +- * Project (125) - : : : : +- * Filter (124) - : : : : +- * HashAggregate (123) - : : : : +- Exchange (122) - : : : : +- * HashAggregate (121) - : : : : +- * Project (120) - : : : : +- * BroadcastHashJoin Inner BuildRight (119) - : : : : :- * Project (117) - : : : : : +- * BroadcastHashJoin Inner BuildRight (116) - : : : : : :- SortMergeJoin LeftSemi (114) - : : : : : : :- * Sort (111) - : : : : : : : +- Exchange (110) - : : : : : : : +- * Filter (109) - : : : : : : : +- * ColumnarToRow (108) - : : : : : : : +- Scan parquet default.web_sales (107) - : : : : : : +- * Sort (113) - : : : : : : +- ReusedExchange (112) - : : : : : +- ReusedExchange (115) - : : : : +- ReusedExchange (118) - : : : +- * HashAggregate (148) - : : : +- Exchange (147) - : : : +- * HashAggregate (146) - : : : +- * HashAggregate (145) - : : : +- Exchange (144) - : : : +- * HashAggregate (143) - : : : +- Union (142) - : : : :- * Project (133) - : : : : +- * Filter (132) - : : : : +- * HashAggregate (131) - : : : : +- ReusedExchange (130) - : : : :- * Project (137) - : : : : +- * Filter (136) - : : : : +- * HashAggregate (135) - : : : : +- ReusedExchange (134) - : : : +- * Project (141) - : : : +- * Filter (140) - : : : +- * HashAggregate (139) - : : : +- ReusedExchange (138) - : : +- * HashAggregate (171) - : : +- Exchange (170) - : : +- * HashAggregate (169) - : : +- * HashAggregate (168) - : : +- Exchange (167) - : : +- * HashAggregate (166) - : : +- Union (165) - : : :- * Project (156) - : : : +- * Filter (155) - : : : +- * HashAggregate (154) - : : : +- ReusedExchange (153) - : : :- * Project (160) - : : : +- * Filter (159) - : : : +- * HashAggregate (158) - : : : +- ReusedExchange (157) - : : +- * Project (164) - : : +- * Filter (163) - : : +- * HashAggregate (162) - : : +- ReusedExchange (161) - : +- * HashAggregate (194) - : +- Exchange (193) - : +- * HashAggregate (192) - : +- * HashAggregate (191) - : +- Exchange (190) - : +- * HashAggregate (189) - : +- Union (188) - : :- * Project (179) - : : +- * Filter (178) - : : +- * HashAggregate (177) - : : +- ReusedExchange (176) - : :- * Project (183) - : : +- * Filter (182) - : : +- * HashAggregate (181) - : : +- ReusedExchange (180) - : +- * Project (187) - : +- * Filter (186) - : +- * HashAggregate (185) - : +- ReusedExchange (184) - +- * HashAggregate (217) - +- Exchange (216) - +- * HashAggregate (215) - +- * HashAggregate (214) - +- Exchange (213) - +- * HashAggregate (212) - +- Union (211) - :- * Project (202) - : +- * Filter (201) - : +- * HashAggregate (200) - : +- ReusedExchange (199) - :- * Project (206) - : +- * Filter (205) - : +- * HashAggregate (204) - : +- ReusedExchange (203) - +- * Project (210) - +- * Filter (209) - +- * HashAggregate (208) - +- ReusedExchange (207) +TakeOrderedAndProject (210) ++- * HashAggregate (209) + +- Exchange (208) + +- * HashAggregate (207) + +- Union (206) + :- * HashAggregate (129) + : +- Exchange (128) + : +- * HashAggregate (127) + : +- Union (126) + : :- * Project (87) + : : +- * Filter (86) + : : +- * HashAggregate (85) + : : +- Exchange (84) + : : +- * HashAggregate (83) + : : +- * Project (82) + : : +- * BroadcastHashJoin Inner BuildRight (81) + : : :- * Project (71) + : : : +- * BroadcastHashJoin Inner BuildRight (70) + : : : :- SortMergeJoin LeftSemi (64) + : : : : :- * Sort (5) + : : : : : +- Exchange (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- * Sort (63) + : : : : +- Exchange (62) + : : : : +- * Project (61) + : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.item (6) + : : : : +- BroadcastExchange (59) + : : : : +- * HashAggregate (58) + : : : : +- * HashAggregate (57) + : : : : +- * HashAggregate (56) + : : : : +- Exchange (55) + : : : : +- * HashAggregate (54) + : : : : +- SortMergeJoin LeftSemi (53) + : : : : :- SortMergeJoin LeftSemi (41) + : : : : : :- * Sort (26) + : : : : : : +- Exchange (25) + : : : : : : +- * Project (24) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : : : :- * Project (18) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : : :- * Filter (11) + : : : : : : : : +- * ColumnarToRow (10) + : : : : : : : : +- Scan parquet default.store_sales (9) + : : : : : : : +- BroadcastExchange (16) + : : : : : : : +- * Project (15) + : : : : : : : +- * Filter (14) + : : : : : : : +- * ColumnarToRow (13) + : : : : : : : +- Scan parquet default.date_dim (12) + : : : : : : +- BroadcastExchange (22) + : : : : : : +- * Filter (21) + : : : : : : +- * ColumnarToRow (20) + : : : : : : +- Scan parquet default.item (19) + : : : : : +- * Sort (40) + : : : : : +- Exchange (39) + : : : : : +- * Project (38) + : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : :- * Project (32) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : :- * Filter (29) + : : : : : : : +- * ColumnarToRow (28) + : : : : : : : +- Scan parquet default.catalog_sales (27) + : : : : : : +- ReusedExchange (30) + : : : : : +- BroadcastExchange (36) + : : : : : +- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet default.item (33) + : : : : +- * Sort (52) + : : : : +- Exchange (51) + : : : : +- * Project (50) + : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : :- * Project (47) + : : : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : : : :- * Filter (44) + : : : : : : +- * ColumnarToRow (43) + : : : : : : +- Scan parquet default.web_sales (42) + : : : : : +- ReusedExchange (45) + : : : : +- ReusedExchange (48) + : : : +- BroadcastExchange (69) + : : : +- * Project (68) + : : : +- * Filter (67) + : : : +- * ColumnarToRow (66) + : : : +- Scan parquet default.date_dim (65) + : : +- BroadcastExchange (80) + : : +- SortMergeJoin LeftSemi (79) + : : :- * Sort (76) + : : : +- Exchange (75) + : : : +- * Filter (74) + : : : +- * ColumnarToRow (73) + : : : +- Scan parquet default.item (72) + : : +- * Sort (78) + : : +- ReusedExchange (77) + : :- * Project (106) + : : +- * Filter (105) + : : +- * HashAggregate (104) + : : +- Exchange (103) + : : +- * HashAggregate (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- SortMergeJoin LeftSemi (95) + : : : : :- * Sort (92) + : : : : : +- Exchange (91) + : : : : : +- * Filter (90) + : : : : : +- * ColumnarToRow (89) + : : : : : +- Scan parquet default.catalog_sales (88) + : : : : +- * Sort (94) + : : : : +- ReusedExchange (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Project (125) + : +- * Filter (124) + : +- * HashAggregate (123) + : +- Exchange (122) + : +- * HashAggregate (121) + : +- * Project (120) + : +- * BroadcastHashJoin Inner BuildRight (119) + : :- * Project (117) + : : +- * BroadcastHashJoin Inner BuildRight (116) + : : :- SortMergeJoin LeftSemi (114) + : : : :- * Sort (111) + : : : : +- Exchange (110) + : : : : +- * Filter (109) + : : : : +- * ColumnarToRow (108) + : : : : +- Scan parquet default.web_sales (107) + : : : +- * Sort (113) + : : : +- ReusedExchange (112) + : : +- ReusedExchange (115) + : +- ReusedExchange (118) + :- * HashAggregate (148) + : +- Exchange (147) + : +- * HashAggregate (146) + : +- * HashAggregate (145) + : +- Exchange (144) + : +- * HashAggregate (143) + : +- Union (142) + : :- * Project (133) + : : +- * Filter (132) + : : +- * HashAggregate (131) + : : +- ReusedExchange (130) + : :- * Project (137) + : : +- * Filter (136) + : : +- * HashAggregate (135) + : : +- ReusedExchange (134) + : +- * Project (141) + : +- * Filter (140) + : +- * HashAggregate (139) + : +- ReusedExchange (138) + :- * HashAggregate (167) + : +- Exchange (166) + : +- * HashAggregate (165) + : +- * HashAggregate (164) + : +- Exchange (163) + : +- * HashAggregate (162) + : +- Union (161) + : :- * Project (152) + : : +- * Filter (151) + : : +- * HashAggregate (150) + : : +- ReusedExchange (149) + : :- * Project (156) + : : +- * Filter (155) + : : +- * HashAggregate (154) + : : +- ReusedExchange (153) + : +- * Project (160) + : +- * Filter (159) + : +- * HashAggregate (158) + : +- ReusedExchange (157) + :- * HashAggregate (186) + : +- Exchange (185) + : +- * HashAggregate (184) + : +- * HashAggregate (183) + : +- Exchange (182) + : +- * HashAggregate (181) + : +- Union (180) + : :- * Project (171) + : : +- * Filter (170) + : : +- * HashAggregate (169) + : : +- ReusedExchange (168) + : :- * Project (175) + : : +- * Filter (174) + : : +- * HashAggregate (173) + : : +- ReusedExchange (172) + : +- * Project (179) + : +- * Filter (178) + : +- * HashAggregate (177) + : +- ReusedExchange (176) + +- * HashAggregate (205) + +- Exchange (204) + +- * HashAggregate (203) + +- * HashAggregate (202) + +- Exchange (201) + +- * HashAggregate (200) + +- Union (199) + :- * Project (190) + : +- * Filter (189) + : +- * HashAggregate (188) + : +- ReusedExchange (187) + :- * Project (194) + : +- * Filter (193) + : +- * HashAggregate (192) + : +- ReusedExchange (191) + +- * Project (198) + +- * Filter (197) + +- * HashAggregate (196) + +- ReusedExchange (195) (1) Scan parquet default.store_sales @@ -239,7 +227,7 @@ Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) (4) Exchange Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5] +Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#5] (5) Sort [codegen id : 2] Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] @@ -333,7 +321,7 @@ Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id (25) Exchange Input [3]: [brand_id#14, class_id#15, category_id#16] -Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17] +Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), ENSURE_REQUIREMENTS, [id=#17] (26) Sort [codegen id : 6] Input [3]: [brand_id#14, class_id#15, category_id#16] @@ -394,7 +382,7 @@ Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_i (39) Exchange Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), ENSURE_REQUIREMENTS, [id=#21] (40) Sort [codegen id : 10] Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] @@ -445,7 +433,7 @@ Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_i (51) Exchange Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24] +Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), ENSURE_REQUIREMENTS, [id=#24] (52) Sort [codegen id : 14] Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] @@ -465,7 +453,7 @@ Results [3]: [brand_id#14, class_id#15, category_id#16] (55) Exchange Input [3]: [brand_id#14, class_id#15, category_id#16] -Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25] +Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), ENSURE_REQUIREMENTS, [id=#25] (56) HashAggregate [codegen id : 16] Input [3]: [brand_id#14, class_id#15, category_id#16] @@ -503,7 +491,7 @@ Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#1 (62) Exchange Input [1]: [ss_item_sk#27] -Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28] +Arguments: hashpartitioning(ss_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#28] (63) Sort [codegen id : 18] Input [1]: [ss_item_sk#27] @@ -561,7 +549,7 @@ Condition : isnotnull(i_item_sk#6) (75) Exchange Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] -Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#31] +Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#31] (76) Sort [codegen id : 21] Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] @@ -601,7 +589,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, c (84) Exchange Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] -Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#39] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#39] (85) HashAggregate [codegen id : 39] Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38] @@ -634,7 +622,7 @@ Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18)) (91) Exchange Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] -Arguments: hashpartitioning(cs_item_sk#19, 5), true, [id=#50] +Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#50] (92) Sort [codegen id : 41] Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49] @@ -685,7 +673,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, c (103) Exchange Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] -Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#57] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#57] (104) HashAggregate [codegen id : 78] Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56] @@ -718,7 +706,7 @@ Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22)) (110) Exchange Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] -Arguments: hashpartitioning(ws_item_sk#23, 5), true, [id=#66] +Arguments: hashpartitioning(ws_item_sk#23, 5), ENSURE_REQUIREMENTS, [id=#66] (111) Sort [codegen id : 80] Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65] @@ -769,7 +757,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, c (122) Exchange Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] -Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#73] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#73] (123) HashAggregate [codegen id : 117] Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72] @@ -797,7 +785,7 @@ Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, i (128) Exchange Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#86] (129) HashAggregate [codegen id : 119] Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85] @@ -871,7 +859,7 @@ Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, (144) Exchange Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#117] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#117] (145) HashAggregate [codegen id : 238] Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116] @@ -889,7 +877,7 @@ Results [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum# (147) Exchange Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), true, [id=#126] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), ENSURE_REQUIREMENTS, [id=#126] (148) HashAggregate [codegen id : 239] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125] @@ -898,536 +886,476 @@ Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] Aggregate Attributes [2]: [sum(sum_sales#89)#127, sum(number_sales#90)#128] Results [6]: [channel#47, i_brand_id#7, i_class_id#8, null AS i_category_id#129, sum(sum_sales#89)#127 AS sum(sum_sales)#130, sum(number_sales#90)#128 AS sum(number_sales)#131] -(149) Union +(149) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#132, isEmpty#133, count#134] -(150) HashAggregate [codegen id : 240] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] - -(151) Exchange -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#132] - -(152) HashAggregate [codegen id : 241] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] - -(153) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135] - -(154) HashAggregate [codegen id : 280] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135] +(150) HashAggregate [codegen id : 278] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#132, isEmpty#133, count#134] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#42, count(1)#137 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135, count(1)#136] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sales#42, count(1)#136 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137] -(155) Filter [codegen id : 280] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(151) Filter [codegen id : 278] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(156) Project [codegen id : 280] +(152) Project [codegen id : 278] Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137] -(157) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141] +(153) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#138, isEmpty#139, count#140] -(158) HashAggregate [codegen id : 319] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141] +(154) HashAggregate [codegen id : 317] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#138, isEmpty#139, count#140] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142, count(1)#143] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sales#60, count(1)#143 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141, count(1)#142] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sales#60, count(1)#142 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143] -(159) Filter [codegen id : 319] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(155) Filter [codegen id : 317] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(160) Project [codegen id : 319] -Output [6]: [catalog AS channel#145, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144] +(156) Project [codegen id : 317] +Output [6]: [catalog AS channel#144, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143] -(161) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148] +(157) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#145, isEmpty#146, count#147] -(162) HashAggregate [codegen id : 358] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148] +(158) HashAggregate [codegen id : 356] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#145, isEmpty#146, count#147] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149, count(1)#150] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sales#76, count(1)#150 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148, count(1)#149] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 AS sales#76, count(1)#149 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150] -(163) Filter [codegen id : 358] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(159) Filter [codegen id : 356] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(164) Project [codegen id : 358] -Output [6]: [web AS channel#152, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151] +(160) Project [codegen id : 356] +Output [6]: [web AS channel#151, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150] -(165) Union +(161) Union -(166) HashAggregate [codegen id : 359] +(162) HashAggregate [codegen id : 357] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] -Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155] -Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] +Aggregate Attributes [3]: [sum#152, isEmpty#153, sum#154] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157] -(167) Exchange -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#159] +(163) Exchange +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#158] -(168) HashAggregate [codegen id : 360] -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158] +(164) HashAggregate [codegen id : 358] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(sales#42), sum(number_sales#43)] -Aggregate Attributes [2]: [sum(sales#42)#160, sum(number_sales#43)#161] -Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#160 AS sum_sales#89, sum(number_sales#43)#161 AS number_sales#90] +Aggregate Attributes [2]: [sum(sales#42)#159, sum(number_sales#43)#160] +Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#159 AS sum_sales#89, sum(number_sales#43)#160 AS number_sales#90] -(169) HashAggregate [codegen id : 360] +(165) HashAggregate [codegen id : 358] Input [4]: [channel#47, i_brand_id#7, sum_sales#89, number_sales#90] Keys [2]: [channel#47, i_brand_id#7] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#162, isEmpty#163, sum#164] -Results [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] +Aggregate Attributes [3]: [sum#161, isEmpty#162, sum#163] +Results [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166] -(170) Exchange -Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] -Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), true, [id=#168] +(166) Exchange +Input [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166] +Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), ENSURE_REQUIREMENTS, [id=#167] -(171) HashAggregate [codegen id : 361] -Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167] +(167) HashAggregate [codegen id : 359] +Input [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166] Keys [2]: [channel#47, i_brand_id#7] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#169, sum(number_sales#90)#170] -Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#171, null AS i_category_id#172, sum(sum_sales#89)#169 AS sum(sum_sales)#173, sum(number_sales#90)#170 AS sum(number_sales)#174] - -(172) Union - -(173) HashAggregate [codegen id : 362] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +Aggregate Attributes [2]: [sum(sum_sales#89)#168, sum(number_sales#90)#169] +Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#170, null AS i_category_id#171, sum(sum_sales#89)#168 AS sum(sum_sales)#172, sum(number_sales#90)#169 AS sum(number_sales)#173] -(174) Exchange -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#175] +(168) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#174, isEmpty#175, count#176] -(175) HashAggregate [codegen id : 363] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] - -(176) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178] - -(177) HashAggregate [codegen id : 402] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178] +(169) HashAggregate [codegen id : 398] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#174, isEmpty#175, count#176] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179, count(1)#180] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sales#42, count(1)#180 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177, count(1)#178] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 AS sales#42, count(1)#178 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179] -(178) Filter [codegen id : 402] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(170) Filter [codegen id : 398] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(179) Project [codegen id : 402] +(171) Project [codegen id : 398] Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179] -(180) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184] +(172) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#180, isEmpty#181, count#182] -(181) HashAggregate [codegen id : 441] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184] +(173) HashAggregate [codegen id : 437] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#180, isEmpty#181, count#182] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#60, count(1)#186 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183, count(1)#184] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 AS sales#60, count(1)#184 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185] -(182) Filter [codegen id : 441] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(174) Filter [codegen id : 437] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(183) Project [codegen id : 441] -Output [6]: [catalog AS channel#188, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187] +(175) Project [codegen id : 437] +Output [6]: [catalog AS channel#186, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185] -(184) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191] +(176) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#187, isEmpty#188, count#189] -(185) HashAggregate [codegen id : 480] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191] +(177) HashAggregate [codegen id : 476] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#187, isEmpty#188, count#189] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192, count(1)#193] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sales#76, count(1)#193 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190, count(1)#191] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190 AS sales#76, count(1)#191 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192] -(186) Filter [codegen id : 480] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(178) Filter [codegen id : 476] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(187) Project [codegen id : 480] -Output [6]: [web AS channel#195, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194] +(179) Project [codegen id : 476] +Output [6]: [web AS channel#193, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192] -(188) Union +(180) Union -(189) HashAggregate [codegen id : 481] +(181) HashAggregate [codegen id : 477] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] -Aggregate Attributes [3]: [sum#196, isEmpty#197, sum#198] -Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] +Aggregate Attributes [3]: [sum#194, isEmpty#195, sum#196] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199] -(190) Exchange -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#202] +(182) Exchange +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#200] -(191) HashAggregate [codegen id : 482] -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201] +(183) HashAggregate [codegen id : 478] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(sales#42), sum(number_sales#43)] -Aggregate Attributes [2]: [sum(sales#42)#203, sum(number_sales#43)#204] -Results [3]: [channel#47, sum(sales#42)#203 AS sum_sales#89, sum(number_sales#43)#204 AS number_sales#90] +Aggregate Attributes [2]: [sum(sales#42)#201, sum(number_sales#43)#202] +Results [3]: [channel#47, sum(sales#42)#201 AS sum_sales#89, sum(number_sales#43)#202 AS number_sales#90] -(192) HashAggregate [codegen id : 482] +(184) HashAggregate [codegen id : 478] Input [3]: [channel#47, sum_sales#89, number_sales#90] Keys [1]: [channel#47] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#205, isEmpty#206, sum#207] -Results [4]: [channel#47, sum#208, isEmpty#209, sum#210] +Aggregate Attributes [3]: [sum#203, isEmpty#204, sum#205] +Results [4]: [channel#47, sum#206, isEmpty#207, sum#208] -(193) Exchange -Input [4]: [channel#47, sum#208, isEmpty#209, sum#210] -Arguments: hashpartitioning(channel#47, 5), true, [id=#211] +(185) Exchange +Input [4]: [channel#47, sum#206, isEmpty#207, sum#208] +Arguments: hashpartitioning(channel#47, 5), ENSURE_REQUIREMENTS, [id=#209] -(194) HashAggregate [codegen id : 483] -Input [4]: [channel#47, sum#208, isEmpty#209, sum#210] +(186) HashAggregate [codegen id : 479] +Input [4]: [channel#47, sum#206, isEmpty#207, sum#208] Keys [1]: [channel#47] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#212, sum(number_sales#90)#213] -Results [6]: [channel#47, null AS i_brand_id#214, null AS i_class_id#215, null AS i_category_id#216, sum(sum_sales#89)#212 AS sum(sum_sales)#217, sum(number_sales#90)#213 AS sum(number_sales)#218] +Aggregate Attributes [2]: [sum(sum_sales#89)#210, sum(number_sales#90)#211] +Results [6]: [channel#47, null AS i_brand_id#212, null AS i_class_id#213, null AS i_category_id#214, sum(sum_sales#89)#210 AS sum(sum_sales)#215, sum(number_sales#90)#211 AS sum(number_sales)#216] -(195) Union - -(196) HashAggregate [codegen id : 484] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] +(187) ReusedExchange [Reuses operator id: 84] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#217, isEmpty#218, count#219] -(197) Exchange -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#219] - -(198) HashAggregate [codegen id : 485] -Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] - -(199) ReusedExchange [Reuses operator id: 84] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] - -(200) HashAggregate [codegen id : 524] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222] +(188) HashAggregate [codegen id : 518] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#217, isEmpty#218, count#219] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#42, count(1)#224 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220, count(1)#221] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 AS sales#42, count(1)#221 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222] -(201) Filter [codegen id : 524] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(189) Filter [codegen id : 518] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(202) Project [codegen id : 524] +(190) Project [codegen id : 518] Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222] -(203) ReusedExchange [Reuses operator id: 103] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] +(191) ReusedExchange [Reuses operator id: 103] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#223, isEmpty#224, count#225] -(204) HashAggregate [codegen id : 563] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228] +(192) HashAggregate [codegen id : 557] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#223, isEmpty#224, count#225] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#60, count(1)#230 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226, count(1)#227] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226 AS sales#60, count(1)#227 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228] -(205) Filter [codegen id : 563] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(193) Filter [codegen id : 557] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(206) Project [codegen id : 563] -Output [6]: [catalog AS channel#232, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231] +(194) Project [codegen id : 557] +Output [6]: [catalog AS channel#229, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228] -(207) ReusedExchange [Reuses operator id: 122] -Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235] +(195) ReusedExchange [Reuses operator id: 122] +Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#230, isEmpty#231, count#232] -(208) HashAggregate [codegen id : 602] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235] +(196) HashAggregate [codegen id : 596] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#230, isEmpty#231, count#232] Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236, count(1)#237] -Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sales#76, count(1)#237 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233, count(1)#234] +Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233 AS sales#76, count(1)#234 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235] -(209) Filter [codegen id : 602] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) +(197) Filter [codegen id : 596] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) -(210) Project [codegen id : 602] -Output [6]: [web AS channel#239, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] -Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238] +(198) Project [codegen id : 596] +Output [6]: [web AS channel#236, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77] +Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235] -(211) Union +(199) Union -(212) HashAggregate [codegen id : 603] +(200) HashAggregate [codegen id : 597] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] -Aggregate Attributes [3]: [sum#240, isEmpty#241, sum#242] -Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] +Aggregate Attributes [3]: [sum#237, isEmpty#238, sum#239] +Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242] -(213) Exchange -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#246] +(201) Exchange +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#243] -(214) HashAggregate [codegen id : 604] -Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245] +(202) HashAggregate [codegen id : 598] +Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242] Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9] Functions [2]: [sum(sales#42), sum(number_sales#43)] -Aggregate Attributes [2]: [sum(sales#42)#247, sum(number_sales#43)#248] -Results [2]: [sum(sales#42)#247 AS sum_sales#89, sum(number_sales#43)#248 AS number_sales#90] +Aggregate Attributes [2]: [sum(sales#42)#244, sum(number_sales#43)#245] +Results [2]: [sum(sales#42)#244 AS sum_sales#89, sum(number_sales#43)#245 AS number_sales#90] -(215) HashAggregate [codegen id : 604] +(203) HashAggregate [codegen id : 598] Input [2]: [sum_sales#89, number_sales#90] Keys: [] Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)] -Aggregate Attributes [3]: [sum#249, isEmpty#250, sum#251] -Results [3]: [sum#252, isEmpty#253, sum#254] +Aggregate Attributes [3]: [sum#246, isEmpty#247, sum#248] +Results [3]: [sum#249, isEmpty#250, sum#251] -(216) Exchange -Input [3]: [sum#252, isEmpty#253, sum#254] -Arguments: SinglePartition, true, [id=#255] +(204) Exchange +Input [3]: [sum#249, isEmpty#250, sum#251] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#252] -(217) HashAggregate [codegen id : 605] -Input [3]: [sum#252, isEmpty#253, sum#254] +(205) HashAggregate [codegen id : 599] +Input [3]: [sum#249, isEmpty#250, sum#251] Keys: [] Functions [2]: [sum(sum_sales#89), sum(number_sales#90)] -Aggregate Attributes [2]: [sum(sum_sales#89)#256, sum(number_sales#90)#257] -Results [6]: [null AS channel#258, null AS i_brand_id#259, null AS i_class_id#260, null AS i_category_id#261, sum(sum_sales#89)#256 AS sum(sum_sales)#262, sum(number_sales#90)#257 AS sum(number_sales)#263] +Aggregate Attributes [2]: [sum(sum_sales#89)#253, sum(number_sales#90)#254] +Results [6]: [null AS channel#255, null AS i_brand_id#256, null AS i_class_id#257, null AS i_category_id#258, sum(sum_sales#89)#253 AS sum(sum_sales)#259, sum(number_sales#90)#254 AS sum(number_sales)#260] -(218) Union +(206) Union -(219) HashAggregate [codegen id : 606] +(207) HashAggregate [codegen id : 600] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -(220) Exchange +(208) Exchange Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#264] +Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), ENSURE_REQUIREMENTS, [id=#261] -(221) HashAggregate [codegen id : 607] +(209) HashAggregate [codegen id : 601] Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Functions: [] Aggregate Attributes: [] Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] -(222) TakeOrderedAndProject +(210) TakeOrderedAndProject Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] Arguments: 100, [channel#47 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90] ===== Subqueries ===== Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46] -* HashAggregate (252) -+- Exchange (251) - +- * HashAggregate (250) - +- Union (249) - :- * Project (232) - : +- * BroadcastHashJoin Inner BuildRight (231) - : :- * Filter (225) - : : +- * ColumnarToRow (224) - : : +- Scan parquet default.store_sales (223) - : +- BroadcastExchange (230) - : +- * Project (229) - : +- * Filter (228) - : +- * ColumnarToRow (227) - : +- Scan parquet default.date_dim (226) - :- * Project (242) - : +- * BroadcastHashJoin Inner BuildRight (241) - : :- * Filter (235) - : : +- * ColumnarToRow (234) - : : +- Scan parquet default.catalog_sales (233) - : +- BroadcastExchange (240) - : +- * Project (239) - : +- * Filter (238) - : +- * ColumnarToRow (237) - : +- Scan parquet default.date_dim (236) - +- * Project (248) - +- * BroadcastHashJoin Inner BuildRight (247) - :- * Filter (245) - : +- * ColumnarToRow (244) - : +- Scan parquet default.web_sales (243) - +- ReusedExchange (246) - - -(223) Scan parquet default.store_sales +* HashAggregate (240) ++- Exchange (239) + +- * HashAggregate (238) + +- Union (237) + :- * Project (220) + : +- * BroadcastHashJoin Inner BuildRight (219) + : :- * Filter (213) + : : +- * ColumnarToRow (212) + : : +- Scan parquet default.store_sales (211) + : +- BroadcastExchange (218) + : +- * Project (217) + : +- * Filter (216) + : +- * ColumnarToRow (215) + : +- Scan parquet default.date_dim (214) + :- * Project (230) + : +- * BroadcastHashJoin Inner BuildRight (229) + : :- * Filter (223) + : : +- * ColumnarToRow (222) + : : +- Scan parquet default.catalog_sales (221) + : +- BroadcastExchange (228) + : +- * Project (227) + : +- * Filter (226) + : +- * ColumnarToRow (225) + : +- Scan parquet default.date_dim (224) + +- * Project (236) + +- * BroadcastHashJoin Inner BuildRight (235) + :- * Filter (233) + : +- * ColumnarToRow (232) + : +- Scan parquet default.web_sales (231) + +- ReusedExchange (234) + + +(211) Scan parquet default.store_sales Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(224) ColumnarToRow [codegen id : 2] +(212) ColumnarToRow [codegen id : 2] Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -(225) Filter [codegen id : 2] +(213) Filter [codegen id : 2] Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] Condition : isnotnull(ss_sold_date_sk#1) -(226) Scan parquet default.date_dim +(214) Scan parquet default.date_dim Output [2]: [d_date_sk#10, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(227) ColumnarToRow [codegen id : 1] +(215) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#10, d_year#11] -(228) Filter [codegen id : 1] +(216) Filter [codegen id : 1] Input [2]: [d_date_sk#10, d_year#11] Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) -(229) Project [codegen id : 1] +(217) Project [codegen id : 1] Output [1]: [d_date_sk#10] Input [2]: [d_date_sk#10, d_year#11] -(230) BroadcastExchange +(218) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#262] -(231) BroadcastHashJoin [codegen id : 2] +(219) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#1] Right keys [1]: [d_date_sk#10] Join condition: None -(232) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#266, ss_list_price#4 AS list_price#267] +(220) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#263, ss_list_price#4 AS list_price#264] Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] -(233) Scan parquet default.catalog_sales +(221) Scan parquet default.catalog_sales Output [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(234) ColumnarToRow [codegen id : 4] +(222) ColumnarToRow [codegen id : 4] Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] -(235) Filter [codegen id : 4] +(223) Filter [codegen id : 4] Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49] Condition : isnotnull(cs_sold_date_sk#18) -(236) Scan parquet default.date_dim +(224) Scan parquet default.date_dim Output [2]: [d_date_sk#10, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct -(237) ColumnarToRow [codegen id : 3] +(225) ColumnarToRow [codegen id : 3] Input [2]: [d_date_sk#10, d_year#11] -(238) Filter [codegen id : 3] +(226) Filter [codegen id : 3] Input [2]: [d_date_sk#10, d_year#11] Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) -(239) Project [codegen id : 3] +(227) Project [codegen id : 3] Output [1]: [d_date_sk#10] Input [2]: [d_date_sk#10, d_year#11] -(240) BroadcastExchange +(228) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#268] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265] -(241) BroadcastHashJoin [codegen id : 4] +(229) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#18] Right keys [1]: [d_date_sk#10] Join condition: None -(242) Project [codegen id : 4] -Output [2]: [cs_quantity#48 AS quantity#269, cs_list_price#49 AS list_price#270] +(230) Project [codegen id : 4] +Output [2]: [cs_quantity#48 AS quantity#266, cs_list_price#49 AS list_price#267] Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10] -(243) Scan parquet default.web_sales +(231) Scan parquet default.web_sales Output [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(244) ColumnarToRow [codegen id : 6] +(232) ColumnarToRow [codegen id : 6] Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] -(245) Filter [codegen id : 6] +(233) Filter [codegen id : 6] Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65] Condition : isnotnull(ws_sold_date_sk#22) -(246) ReusedExchange [Reuses operator id: 240] +(234) ReusedExchange [Reuses operator id: 228] Output [1]: [d_date_sk#10] -(247) BroadcastHashJoin [codegen id : 6] +(235) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#22] Right keys [1]: [d_date_sk#10] Join condition: None -(248) Project [codegen id : 6] -Output [2]: [ws_quantity#64 AS quantity#271, ws_list_price#65 AS list_price#272] +(236) Project [codegen id : 6] +Output [2]: [ws_quantity#64 AS quantity#268, ws_list_price#65 AS list_price#269] Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10] -(249) Union +(237) Union -(250) HashAggregate [codegen id : 7] -Input [2]: [quantity#266, list_price#267] +(238) HashAggregate [codegen id : 7] +Input [2]: [quantity#263, list_price#264] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#273, count#274] -Results [2]: [sum#275, count#276] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#270, count#271] +Results [2]: [sum#272, count#273] -(251) Exchange -Input [2]: [sum#275, count#276] -Arguments: SinglePartition, true, [id=#277] +(239) Exchange +Input [2]: [sum#272, count#273] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#274] -(252) HashAggregate [codegen id : 8] -Input [2]: [sum#275, count#276] +(240) HashAggregate [codegen id : 8] +Input [2]: [sum#272, count#273] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278 AS average_sales#279] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))#275] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))#275 AS average_sales#276] Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] @@ -1439,22 +1367,22 @@ Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquer Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:7 Hosting operator id = 151 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:8 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:9 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:10 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:11 Hosting operator id = 174 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:12 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:13 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:14 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] -Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] +Subquery:15 Hosting operator id = 197 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt index 30856e02f2b62..c63f1b8a75643 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt @@ -1,427 +1,403 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - WholeStageCodegen (607) + WholeStageCodegen (601) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] InputAdapter Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 - WholeStageCodegen (606) + WholeStageCodegen (600) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] InputAdapter Union - WholeStageCodegen (485) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (119) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #2 - WholeStageCodegen (484) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + Exchange [channel,i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (118) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] InputAdapter Union - WholeStageCodegen (363) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #3 - WholeStageCodegen (362) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (39) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] InputAdapter - Union - WholeStageCodegen (241) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + Exchange #17 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #4 - WholeStageCodegen (240) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - InputAdapter - Union - WholeStageCodegen (119) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #5 - WholeStageCodegen (118) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (39) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] - InputAdapter - Exchange #20 - WholeStageCodegen (7) - HashAggregate [quantity,list_price] [sum,count,sum,count] - InputAdapter - Union - WholeStageCodegen (2) - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] - InputAdapter - BroadcastExchange #21 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (4) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - BroadcastExchange #22 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (6) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] - InputAdapter - ReusedExchange [d_date_sk] #22 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #6 - WholeStageCodegen (38) - HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] #19 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #3 + WholeStageCodegen (38) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [ss_item_sk,ss_item_sk] + WholeStageCodegen (2) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + WholeStageCodegen (18) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (17) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (16) + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #7 + WholeStageCodegen (15) + HashAggregate [brand_id,class_id,category_id] + InputAdapter + SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen (6) + Sort [brand_id,class_id,category_id] InputAdapter - SortMergeJoin [ss_item_sk,ss_item_sk] - WholeStageCodegen (2) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #7 - WholeStageCodegen (1) + Exchange [brand_id,class_id,category_id] #8 + WholeStageCodegen (5) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Filter [ss_item_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] - WholeStageCodegen (18) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #8 - WholeStageCodegen (17) - Project [i_item_sk] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] - Filter [i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen (16) - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - InputAdapter - Exchange [brand_id,class_id,category_id] #10 - WholeStageCodegen (15) - HashAggregate [brand_id,class_id,category_id] - InputAdapter - SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - WholeStageCodegen (6) - Sort [brand_id,class_id,category_id] - InputAdapter - Exchange [brand_id,class_id,category_id] #11 - WholeStageCodegen (5) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (4) - Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - WholeStageCodegen (10) - Sort [i_brand_id,i_class_id,i_category_id] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #14 - WholeStageCodegen (9) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #12 - InputAdapter - BroadcastExchange #15 - WholeStageCodegen (8) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - WholeStageCodegen (14) - Sort [i_brand_id,i_class_id,i_category_id] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #16 - WholeStageCodegen (13) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #12 - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15 - InputAdapter - BroadcastExchange #17 - WholeStageCodegen (19) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #18 - SortMergeJoin [i_item_sk,ss_item_sk] - WholeStageCodegen (21) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #19 - WholeStageCodegen (20) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - WholeStageCodegen (37) - Sort [ss_item_sk] - InputAdapter - ReusedExchange [ss_item_sk] #8 - WholeStageCodegen (78) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #23 - WholeStageCodegen (77) - HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (4) + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen (10) + Sort [i_brand_id,i_class_id,i_category_id] InputAdapter - SortMergeJoin [cs_item_sk,ss_item_sk] - WholeStageCodegen (41) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #24 - WholeStageCodegen (40) + Exchange [i_brand_id,i_class_id,i_category_id] #11 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Filter [cs_item_sk,cs_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] - WholeStageCodegen (57) - Sort [ss_item_sk] - InputAdapter - ReusedExchange [ss_item_sk] #8 - InputAdapter - ReusedExchange [d_date_sk] #17 - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18 - WholeStageCodegen (117) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #25 - WholeStageCodegen (116) - HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - InputAdapter - SortMergeJoin [ws_item_sk,ss_item_sk] - WholeStageCodegen (80) - Sort [ws_item_sk] - InputAdapter - Exchange [ws_item_sk] #26 - WholeStageCodegen (79) - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] - WholeStageCodegen (96) - Sort [ss_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen (14) + Sort [i_brand_id,i_class_id,i_category_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #13 + WholeStageCodegen (13) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 InputAdapter - ReusedExchange [ss_item_sk] #8 - InputAdapter - ReusedExchange [d_date_sk] #17 - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18 - WholeStageCodegen (239) - HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (19) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow InputAdapter - Exchange [channel,i_brand_id,i_class_id] #27 - WholeStageCodegen (238) - HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #28 - WholeStageCodegen (237) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (158) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (197) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 - WholeStageCodegen (236) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 - WholeStageCodegen (361) - HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter - Exchange [channel,i_brand_id] #29 - WholeStageCodegen (360) - HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + BroadcastExchange #15 + SortMergeJoin [i_item_sk,ss_item_sk] + WholeStageCodegen (21) + Sort [i_item_sk] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #30 - WholeStageCodegen (359) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (280) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (319) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 - WholeStageCodegen (358) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 - WholeStageCodegen (483) - HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] - InputAdapter - Exchange [channel] #31 - WholeStageCodegen (482) - HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #32 - WholeStageCodegen (481) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (402) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (441) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 - WholeStageCodegen (480) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 - WholeStageCodegen (605) + Exchange [i_item_sk] #16 + WholeStageCodegen (20) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen (37) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #5 + WholeStageCodegen (78) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #20 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [cs_item_sk,ss_item_sk] + WholeStageCodegen (41) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #21 + WholeStageCodegen (40) + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + WholeStageCodegen (57) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #5 + InputAdapter + ReusedExchange [d_date_sk] #14 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (117) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #22 + WholeStageCodegen (116) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + InputAdapter + SortMergeJoin [ws_item_sk,ss_item_sk] + WholeStageCodegen (80) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #23 + WholeStageCodegen (79) + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + WholeStageCodegen (96) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk] #5 + InputAdapter + ReusedExchange [d_date_sk] #14 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (239) + HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #24 + WholeStageCodegen (238) + HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #25 + WholeStageCodegen (237) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (158) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (197) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 + WholeStageCodegen (236) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22 + WholeStageCodegen (359) + HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id] #26 + WholeStageCodegen (358) + HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #27 + WholeStageCodegen (357) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (278) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (317) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 + WholeStageCodegen (356) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22 + WholeStageCodegen (479) + HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel] #28 + WholeStageCodegen (478) + HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #29 + WholeStageCodegen (477) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (398) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (437) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 + WholeStageCodegen (476) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22 + WholeStageCodegen (599) HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] InputAdapter - Exchange #33 - WholeStageCodegen (604) + Exchange #30 + WholeStageCodegen (598) HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #34 - WholeStageCodegen (603) + Exchange [channel,i_brand_id,i_class_id,i_category_id] #31 + WholeStageCodegen (597) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] InputAdapter Union - WholeStageCodegen (524) + WholeStageCodegen (518) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (563) + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (557) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23 - WholeStageCodegen (602) + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 + WholeStageCodegen (596) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25 + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt index 238053a3428e3..4e60a9b6b1547 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt @@ -1,210 +1,198 @@ == Physical Plan == -TakeOrderedAndProject (206) -+- * HashAggregate (205) - +- Exchange (204) - +- * HashAggregate (203) - +- Union (202) - :- * HashAggregate (182) - : +- Exchange (181) - : +- * HashAggregate (180) - : +- Union (179) - : :- * HashAggregate (159) - : : +- Exchange (158) - : : +- * HashAggregate (157) - : : +- Union (156) - : : :- * HashAggregate (136) - : : : +- Exchange (135) - : : : +- * HashAggregate (134) - : : : +- Union (133) - : : : :- * HashAggregate (113) - : : : : +- Exchange (112) - : : : : +- * HashAggregate (111) - : : : : +- Union (110) - : : : : :- * Project (77) - : : : : : +- * Filter (76) - : : : : : +- * HashAggregate (75) - : : : : : +- Exchange (74) - : : : : : +- * HashAggregate (73) - : : : : : +- * Project (72) - : : : : : +- * BroadcastHashJoin Inner BuildRight (71) - : : : : : :- * Project (65) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (64) - : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (57) - : : : : : : : :- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : : +- BroadcastExchange (56) - : : : : : : : +- * Project (55) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (54) - : : : : : : : :- * Filter (6) - : : : : : : : : +- * ColumnarToRow (5) - : : : : : : : : +- Scan parquet default.item (4) - : : : : : : : +- BroadcastExchange (53) - : : : : : : : +- * HashAggregate (52) - : : : : : : : +- * HashAggregate (51) - : : : : : : : +- * HashAggregate (50) - : : : : : : : +- Exchange (49) - : : : : : : : +- * HashAggregate (48) - : : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) - : : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) - : : : : : : : : :- * Project (22) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : : : : : : :- * Project (15) - : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : : : : : : :- * Filter (9) - : : : : : : : : : : : +- * ColumnarToRow (8) - : : : : : : : : : : : +- Scan parquet default.store_sales (7) - : : : : : : : : : : +- BroadcastExchange (13) - : : : : : : : : : : +- * Filter (12) - : : : : : : : : : : +- * ColumnarToRow (11) - : : : : : : : : : : +- Scan parquet default.item (10) - : : : : : : : : : +- BroadcastExchange (20) - : : : : : : : : : +- * Project (19) - : : : : : : : : : +- * Filter (18) - : : : : : : : : : +- * ColumnarToRow (17) - : : : : : : : : : +- Scan parquet default.date_dim (16) - : : : : : : : : +- BroadcastExchange (35) - : : : : : : : : +- * Project (34) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (33) - : : : : : : : : :- * Project (31) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : : : : : : :- * Filter (25) - : : : : : : : : : : +- * ColumnarToRow (24) - : : : : : : : : : : +- Scan parquet default.catalog_sales (23) - : : : : : : : : : +- BroadcastExchange (29) - : : : : : : : : : +- * Filter (28) - : : : : : : : : : +- * ColumnarToRow (27) - : : : : : : : : : +- Scan parquet default.item (26) - : : : : : : : : +- ReusedExchange (32) - : : : : : : : +- BroadcastExchange (46) - : : : : : : : +- * Project (45) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : : : : : :- * Project (42) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : : : : : : :- * Filter (39) - : : : : : : : : : +- * ColumnarToRow (38) - : : : : : : : : : +- Scan parquet default.web_sales (37) - : : : : : : : : +- ReusedExchange (40) - : : : : : : : +- ReusedExchange (43) - : : : : : : +- BroadcastExchange (63) - : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (62) - : : : : : : :- * Filter (60) - : : : : : : : +- * ColumnarToRow (59) - : : : : : : : +- Scan parquet default.item (58) - : : : : : : +- ReusedExchange (61) - : : : : : +- BroadcastExchange (70) - : : : : : +- * Project (69) - : : : : : +- * Filter (68) - : : : : : +- * ColumnarToRow (67) - : : : : : +- Scan parquet default.date_dim (66) - : : : : :- * Project (93) - : : : : : +- * Filter (92) - : : : : : +- * HashAggregate (91) - : : : : : +- Exchange (90) - : : : : : +- * HashAggregate (89) - : : : : : +- * Project (88) - : : : : : +- * BroadcastHashJoin Inner BuildRight (87) - : : : : : :- * Project (85) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) - : : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (82) - : : : : : : : :- * Filter (80) - : : : : : : : : +- * ColumnarToRow (79) - : : : : : : : : +- Scan parquet default.catalog_sales (78) - : : : : : : : +- ReusedExchange (81) - : : : : : : +- ReusedExchange (83) - : : : : : +- ReusedExchange (86) - : : : : +- * Project (109) - : : : : +- * Filter (108) - : : : : +- * HashAggregate (107) - : : : : +- Exchange (106) - : : : : +- * HashAggregate (105) - : : : : +- * Project (104) - : : : : +- * BroadcastHashJoin Inner BuildRight (103) - : : : : :- * Project (101) - : : : : : +- * BroadcastHashJoin Inner BuildRight (100) - : : : : : :- * BroadcastHashJoin LeftSemi BuildRight (98) - : : : : : : :- * Filter (96) - : : : : : : : +- * ColumnarToRow (95) - : : : : : : : +- Scan parquet default.web_sales (94) - : : : : : : +- ReusedExchange (97) - : : : : : +- ReusedExchange (99) - : : : : +- ReusedExchange (102) - : : : +- * HashAggregate (132) - : : : +- Exchange (131) - : : : +- * HashAggregate (130) - : : : +- * HashAggregate (129) - : : : +- Exchange (128) - : : : +- * HashAggregate (127) - : : : +- Union (126) - : : : :- * Project (117) - : : : : +- * Filter (116) - : : : : +- * HashAggregate (115) - : : : : +- ReusedExchange (114) - : : : :- * Project (121) - : : : : +- * Filter (120) - : : : : +- * HashAggregate (119) - : : : : +- ReusedExchange (118) - : : : +- * Project (125) - : : : +- * Filter (124) - : : : +- * HashAggregate (123) - : : : +- ReusedExchange (122) - : : +- * HashAggregate (155) - : : +- Exchange (154) - : : +- * HashAggregate (153) - : : +- * HashAggregate (152) - : : +- Exchange (151) - : : +- * HashAggregate (150) - : : +- Union (149) - : : :- * Project (140) - : : : +- * Filter (139) - : : : +- * HashAggregate (138) - : : : +- ReusedExchange (137) - : : :- * Project (144) - : : : +- * Filter (143) - : : : +- * HashAggregate (142) - : : : +- ReusedExchange (141) - : : +- * Project (148) - : : +- * Filter (147) - : : +- * HashAggregate (146) - : : +- ReusedExchange (145) - : +- * HashAggregate (178) - : +- Exchange (177) - : +- * HashAggregate (176) - : +- * HashAggregate (175) - : +- Exchange (174) - : +- * HashAggregate (173) - : +- Union (172) - : :- * Project (163) - : : +- * Filter (162) - : : +- * HashAggregate (161) - : : +- ReusedExchange (160) - : :- * Project (167) - : : +- * Filter (166) - : : +- * HashAggregate (165) - : : +- ReusedExchange (164) - : +- * Project (171) - : +- * Filter (170) - : +- * HashAggregate (169) - : +- ReusedExchange (168) - +- * HashAggregate (201) - +- Exchange (200) - +- * HashAggregate (199) - +- * HashAggregate (198) - +- Exchange (197) - +- * HashAggregate (196) - +- Union (195) - :- * Project (186) - : +- * Filter (185) - : +- * HashAggregate (184) - : +- ReusedExchange (183) - :- * Project (190) - : +- * Filter (189) - : +- * HashAggregate (188) - : +- ReusedExchange (187) - +- * Project (194) - +- * Filter (193) - +- * HashAggregate (192) - +- ReusedExchange (191) +TakeOrderedAndProject (194) ++- * HashAggregate (193) + +- Exchange (192) + +- * HashAggregate (191) + +- Union (190) + :- * HashAggregate (113) + : +- Exchange (112) + : +- * HashAggregate (111) + : +- Union (110) + : :- * Project (77) + : : +- * Filter (76) + : : +- * HashAggregate (75) + : : +- Exchange (74) + : : +- * HashAggregate (73) + : : +- * Project (72) + : : +- * BroadcastHashJoin Inner BuildRight (71) + : : :- * Project (65) + : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (56) + : : : : +- * Project (55) + : : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.item (4) + : : : : +- BroadcastExchange (53) + : : : : +- * HashAggregate (52) + : : : : +- * HashAggregate (51) + : : : : +- * HashAggregate (50) + : : : : +- Exchange (49) + : : : : +- * HashAggregate (48) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : : :- * Project (22) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : : :- * Project (15) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : :- * Filter (9) + : : : : : : : : +- * ColumnarToRow (8) + : : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : : +- BroadcastExchange (13) + : : : : : : : +- * Filter (12) + : : : : : : : +- * ColumnarToRow (11) + : : : : : : : +- Scan parquet default.item (10) + : : : : : : +- BroadcastExchange (20) + : : : : : : +- * Project (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet default.date_dim (16) + : : : : : +- BroadcastExchange (35) + : : : : : +- * Project (34) + : : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : : :- * Project (31) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : : :- * Filter (25) + : : : : : : : +- * ColumnarToRow (24) + : : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : : +- BroadcastExchange (29) + : : : : : : +- * Filter (28) + : : : : : : +- * ColumnarToRow (27) + : : : : : : +- Scan parquet default.item (26) + : : : : : +- ReusedExchange (32) + : : : : +- BroadcastExchange (46) + : : : : +- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Project (42) + : : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : : :- * Filter (39) + : : : : : : +- * ColumnarToRow (38) + : : : : : : +- Scan parquet default.web_sales (37) + : : : : : +- ReusedExchange (40) + : : : : +- ReusedExchange (43) + : : : +- BroadcastExchange (63) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : : :- * Filter (60) + : : : : +- * ColumnarToRow (59) + : : : : +- Scan parquet default.item (58) + : : : +- ReusedExchange (61) + : : +- BroadcastExchange (70) + : : +- * Project (69) + : : +- * Filter (68) + : : +- * ColumnarToRow (67) + : : +- Scan parquet default.date_dim (66) + : :- * Project (93) + : : +- * Filter (92) + : : +- * HashAggregate (91) + : : +- Exchange (90) + : : +- * HashAggregate (89) + : : +- * Project (88) + : : +- * BroadcastHashJoin Inner BuildRight (87) + : : :- * Project (85) + : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : : : :- * Filter (80) + : : : : : +- * ColumnarToRow (79) + : : : : : +- Scan parquet default.catalog_sales (78) + : : : : +- ReusedExchange (81) + : : : +- ReusedExchange (83) + : : +- ReusedExchange (86) + : +- * Project (109) + : +- * Filter (108) + : +- * HashAggregate (107) + : +- Exchange (106) + : +- * HashAggregate (105) + : +- * Project (104) + : +- * BroadcastHashJoin Inner BuildRight (103) + : :- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * BroadcastHashJoin LeftSemi BuildRight (98) + : : : :- * Filter (96) + : : : : +- * ColumnarToRow (95) + : : : : +- Scan parquet default.web_sales (94) + : : : +- ReusedExchange (97) + : : +- ReusedExchange (99) + : +- ReusedExchange (102) + :- * HashAggregate (132) + : +- Exchange (131) + : +- * HashAggregate (130) + : +- * HashAggregate (129) + : +- Exchange (128) + : +- * HashAggregate (127) + : +- Union (126) + : :- * Project (117) + : : +- * Filter (116) + : : +- * HashAggregate (115) + : : +- ReusedExchange (114) + : :- * Project (121) + : : +- * Filter (120) + : : +- * HashAggregate (119) + : : +- ReusedExchange (118) + : +- * Project (125) + : +- * Filter (124) + : +- * HashAggregate (123) + : +- ReusedExchange (122) + :- * HashAggregate (151) + : +- Exchange (150) + : +- * HashAggregate (149) + : +- * HashAggregate (148) + : +- Exchange (147) + : +- * HashAggregate (146) + : +- Union (145) + : :- * Project (136) + : : +- * Filter (135) + : : +- * HashAggregate (134) + : : +- ReusedExchange (133) + : :- * Project (140) + : : +- * Filter (139) + : : +- * HashAggregate (138) + : : +- ReusedExchange (137) + : +- * Project (144) + : +- * Filter (143) + : +- * HashAggregate (142) + : +- ReusedExchange (141) + :- * HashAggregate (170) + : +- Exchange (169) + : +- * HashAggregate (168) + : +- * HashAggregate (167) + : +- Exchange (166) + : +- * HashAggregate (165) + : +- Union (164) + : :- * Project (155) + : : +- * Filter (154) + : : +- * HashAggregate (153) + : : +- ReusedExchange (152) + : :- * Project (159) + : : +- * Filter (158) + : : +- * HashAggregate (157) + : : +- ReusedExchange (156) + : +- * Project (163) + : +- * Filter (162) + : +- * HashAggregate (161) + : +- ReusedExchange (160) + +- * HashAggregate (189) + +- Exchange (188) + +- * HashAggregate (187) + +- * HashAggregate (186) + +- Exchange (185) + +- * HashAggregate (184) + +- Union (183) + :- * Project (174) + : +- * Filter (173) + : +- * HashAggregate (172) + : +- ReusedExchange (171) + :- * Project (178) + : +- * Filter (177) + : +- * HashAggregate (176) + : +- ReusedExchange (175) + +- * Project (182) + +- * Filter (181) + +- * HashAggregate (180) + +- ReusedExchange (179) (1) Scan parquet default.store_sales @@ -425,7 +413,7 @@ Results [3]: [brand_id#13, class_id#14, category_id#15] (49) Exchange Input [3]: [brand_id#13, class_id#14, category_id#15] -Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), ENSURE_REQUIREMENTS, [id=#23] (50) HashAggregate [codegen id : 10] Input [3]: [brand_id#13, class_id#14, category_id#15] @@ -545,7 +533,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, c (74) Exchange Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#36] (75) HashAggregate [codegen id : 26] Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] @@ -617,7 +605,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, c (90) Exchange Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#53] (91) HashAggregate [codegen id : 52] Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] @@ -689,7 +677,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, c (106) Exchange Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#68] (107) HashAggregate [codegen id : 78] Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] @@ -717,7 +705,7 @@ Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, i (112) Exchange Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#81] (113) HashAggregate [codegen id : 80] Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80] @@ -791,7 +779,7 @@ Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, (128) Exchange Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#112] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#112] (129) HashAggregate [codegen id : 160] Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111] @@ -809,7 +797,7 @@ Results [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum# (131) Exchange Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), true, [id=#121] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), ENSURE_REQUIREMENTS, [id=#121] (132) HashAggregate [codegen id : 161] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120] @@ -818,536 +806,476 @@ Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] Aggregate Attributes [2]: [sum(sum_sales#84)#122, sum(number_sales#85)#123] Results [6]: [channel#44, i_brand_id#6, i_class_id#7, null AS i_category_id#124, sum(sum_sales#84)#122 AS sum(sum_sales)#125, sum(number_sales#85)#123 AS sum(number_sales)#126] -(133) Union +(133) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#127, isEmpty#128, count#129] -(134) HashAggregate [codegen id : 162] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] - -(135) Exchange -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#127] - -(136) HashAggregate [codegen id : 163] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] - -(137) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130] - -(138) HashAggregate [codegen id : 189] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130] +(134) HashAggregate [codegen id : 187] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#127, isEmpty#128, count#129] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131, count(1)#132] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sales#39, count(1)#132 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130, count(1)#131] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130 AS sales#39, count(1)#131 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132] -(139) Filter [codegen id : 189] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(135) Filter [codegen id : 187] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(140) Project [codegen id : 189] +(136) Project [codegen id : 187] Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132] -(141) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136] +(137) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#133, isEmpty#134, count#135] -(142) HashAggregate [codegen id : 215] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136] +(138) HashAggregate [codegen id : 213] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#133, isEmpty#134, count#135] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137, count(1)#138] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sales#56, count(1)#138 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#56, count(1)#137 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138] -(143) Filter [codegen id : 215] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(139) Filter [codegen id : 213] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(144) Project [codegen id : 215] -Output [6]: [catalog AS channel#140, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139] +(140) Project [codegen id : 213] +Output [6]: [catalog AS channel#139, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138] -(145) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143] +(141) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#140, isEmpty#141, count#142] -(146) HashAggregate [codegen id : 241] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143] +(142) HashAggregate [codegen id : 239] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#140, isEmpty#141, count#142] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144, count(1)#145] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sales#71, count(1)#145 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143, count(1)#144] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 AS sales#71, count(1)#144 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145] -(147) Filter [codegen id : 241] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(143) Filter [codegen id : 239] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(148) Project [codegen id : 241] -Output [6]: [web AS channel#147, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146] +(144) Project [codegen id : 239] +Output [6]: [web AS channel#146, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145] -(149) Union +(145) Union -(150) HashAggregate [codegen id : 242] +(146) HashAggregate [codegen id : 240] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] -Aggregate Attributes [3]: [sum#148, isEmpty#149, sum#150] -Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] +Aggregate Attributes [3]: [sum#147, isEmpty#148, sum#149] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152] -(151) Exchange -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#154] +(147) Exchange +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#153] -(152) HashAggregate [codegen id : 243] -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153] +(148) HashAggregate [codegen id : 241] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(sales#39), sum(number_sales#40)] -Aggregate Attributes [2]: [sum(sales#39)#155, sum(number_sales#40)#156] -Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#155 AS sum_sales#84, sum(number_sales#40)#156 AS number_sales#85] +Aggregate Attributes [2]: [sum(sales#39)#154, sum(number_sales#40)#155] +Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#154 AS sum_sales#84, sum(number_sales#40)#155 AS number_sales#85] -(153) HashAggregate [codegen id : 243] +(149) HashAggregate [codegen id : 241] Input [4]: [channel#44, i_brand_id#6, sum_sales#84, number_sales#85] Keys [2]: [channel#44, i_brand_id#6] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#157, isEmpty#158, sum#159] -Results [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] +Aggregate Attributes [3]: [sum#156, isEmpty#157, sum#158] +Results [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161] -(154) Exchange -Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] -Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), true, [id=#163] +(150) Exchange +Input [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161] +Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#162] -(155) HashAggregate [codegen id : 244] -Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162] +(151) HashAggregate [codegen id : 242] +Input [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161] Keys [2]: [channel#44, i_brand_id#6] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#164, sum(number_sales#85)#165] -Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#84)#164 AS sum(sum_sales)#168, sum(number_sales#85)#165 AS sum(number_sales)#169] - -(156) Union - -(157) HashAggregate [codegen id : 245] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +Aggregate Attributes [2]: [sum(sum_sales#84)#163, sum(number_sales#85)#164] +Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#165, null AS i_category_id#166, sum(sum_sales#84)#163 AS sum(sum_sales)#167, sum(number_sales#85)#164 AS sum(number_sales)#168] -(158) Exchange -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#170] +(152) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#169, isEmpty#170, count#171] -(159) HashAggregate [codegen id : 246] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] - -(160) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173] - -(161) HashAggregate [codegen id : 272] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173] +(153) HashAggregate [codegen id : 268] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#169, isEmpty#170, count#171] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174, count(1)#175] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sales#39, count(1)#175 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172, count(1)#173] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 AS sales#39, count(1)#173 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174] -(162) Filter [codegen id : 272] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(154) Filter [codegen id : 268] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(163) Project [codegen id : 272] +(155) Project [codegen id : 268] Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174] -(164) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179] +(156) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#175, isEmpty#176, count#177] -(165) HashAggregate [codegen id : 298] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179] +(157) HashAggregate [codegen id : 294] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#175, isEmpty#176, count#177] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180, count(1)#181] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sales#56, count(1)#181 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178, count(1)#179] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 AS sales#56, count(1)#179 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180] -(166) Filter [codegen id : 298] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(158) Filter [codegen id : 294] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(167) Project [codegen id : 298] -Output [6]: [catalog AS channel#183, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182] +(159) Project [codegen id : 294] +Output [6]: [catalog AS channel#181, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180] -(168) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186] +(160) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#182, isEmpty#183, count#184] -(169) HashAggregate [codegen id : 324] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186] +(161) HashAggregate [codegen id : 320] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#182, isEmpty#183, count#184] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#71, count(1)#188 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#71, count(1)#186 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187] -(170) Filter [codegen id : 324] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(162) Filter [codegen id : 320] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(171) Project [codegen id : 324] -Output [6]: [web AS channel#190, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189] +(163) Project [codegen id : 320] +Output [6]: [web AS channel#188, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187] -(172) Union +(164) Union -(173) HashAggregate [codegen id : 325] +(165) HashAggregate [codegen id : 321] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] -Aggregate Attributes [3]: [sum#191, isEmpty#192, sum#193] -Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] +Aggregate Attributes [3]: [sum#189, isEmpty#190, sum#191] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194] -(174) Exchange -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#197] +(166) Exchange +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#195] -(175) HashAggregate [codegen id : 326] -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196] +(167) HashAggregate [codegen id : 322] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(sales#39), sum(number_sales#40)] -Aggregate Attributes [2]: [sum(sales#39)#198, sum(number_sales#40)#199] -Results [3]: [channel#44, sum(sales#39)#198 AS sum_sales#84, sum(number_sales#40)#199 AS number_sales#85] +Aggregate Attributes [2]: [sum(sales#39)#196, sum(number_sales#40)#197] +Results [3]: [channel#44, sum(sales#39)#196 AS sum_sales#84, sum(number_sales#40)#197 AS number_sales#85] -(176) HashAggregate [codegen id : 326] +(168) HashAggregate [codegen id : 322] Input [3]: [channel#44, sum_sales#84, number_sales#85] Keys [1]: [channel#44] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#200, isEmpty#201, sum#202] -Results [4]: [channel#44, sum#203, isEmpty#204, sum#205] +Aggregate Attributes [3]: [sum#198, isEmpty#199, sum#200] +Results [4]: [channel#44, sum#201, isEmpty#202, sum#203] -(177) Exchange -Input [4]: [channel#44, sum#203, isEmpty#204, sum#205] -Arguments: hashpartitioning(channel#44, 5), true, [id=#206] +(169) Exchange +Input [4]: [channel#44, sum#201, isEmpty#202, sum#203] +Arguments: hashpartitioning(channel#44, 5), ENSURE_REQUIREMENTS, [id=#204] -(178) HashAggregate [codegen id : 327] -Input [4]: [channel#44, sum#203, isEmpty#204, sum#205] +(170) HashAggregate [codegen id : 323] +Input [4]: [channel#44, sum#201, isEmpty#202, sum#203] Keys [1]: [channel#44] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#207, sum(number_sales#85)#208] -Results [6]: [channel#44, null AS i_brand_id#209, null AS i_class_id#210, null AS i_category_id#211, sum(sum_sales#84)#207 AS sum(sum_sales)#212, sum(number_sales#85)#208 AS sum(number_sales)#213] +Aggregate Attributes [2]: [sum(sum_sales#84)#205, sum(number_sales#85)#206] +Results [6]: [channel#44, null AS i_brand_id#207, null AS i_class_id#208, null AS i_category_id#209, sum(sum_sales#84)#205 AS sum(sum_sales)#210, sum(number_sales#85)#206 AS sum(number_sales)#211] -(179) Union - -(180) HashAggregate [codegen id : 328] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] +(171) ReusedExchange [Reuses operator id: 74] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#212, isEmpty#213, count#214] -(181) Exchange -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#214] - -(182) HashAggregate [codegen id : 329] -Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Functions: [] -Aggregate Attributes: [] -Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] - -(183) ReusedExchange [Reuses operator id: 74] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] - -(184) HashAggregate [codegen id : 355] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217] +(172) HashAggregate [codegen id : 349] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#212, isEmpty#213, count#214] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#39, count(1)#219 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215, count(1)#216] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215 AS sales#39, count(1)#216 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217] -(185) Filter [codegen id : 355] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(173) Filter [codegen id : 349] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(186) Project [codegen id : 355] +(174) Project [codegen id : 349] Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217] -(187) ReusedExchange [Reuses operator id: 90] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] +(175) ReusedExchange [Reuses operator id: 90] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#218, isEmpty#219, count#220] -(188) HashAggregate [codegen id : 381] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223] +(176) HashAggregate [codegen id : 375] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#218, isEmpty#219, count#220] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#56, count(1)#225 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221, count(1)#222] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221 AS sales#56, count(1)#222 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223] -(189) Filter [codegen id : 381] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(177) Filter [codegen id : 375] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(190) Project [codegen id : 381] -Output [6]: [catalog AS channel#227, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226] +(178) Project [codegen id : 375] +Output [6]: [catalog AS channel#224, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223] -(191) ReusedExchange [Reuses operator id: 106] -Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230] +(179) ReusedExchange [Reuses operator id: 106] +Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#225, isEmpty#226, count#227] -(192) HashAggregate [codegen id : 407] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230] +(180) HashAggregate [codegen id : 401] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#225, isEmpty#226, count#227] Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231, count(1)#232] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sales#71, count(1)#232 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228, count(1)#229] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228 AS sales#71, count(1)#229 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230] -(193) Filter [codegen id : 407] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) +(181) Filter [codegen id : 401] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) -(194) Project [codegen id : 407] -Output [6]: [web AS channel#234, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233] +(182) Project [codegen id : 401] +Output [6]: [web AS channel#231, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230] -(195) Union +(183) Union -(196) HashAggregate [codegen id : 408] +(184) HashAggregate [codegen id : 402] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] -Aggregate Attributes [3]: [sum#235, isEmpty#236, sum#237] -Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] +Aggregate Attributes [3]: [sum#232, isEmpty#233, sum#234] +Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237] -(197) Exchange -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#241] +(185) Exchange +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#238] -(198) HashAggregate [codegen id : 409] -Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240] +(186) HashAggregate [codegen id : 403] +Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237] Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] Functions [2]: [sum(sales#39), sum(number_sales#40)] -Aggregate Attributes [2]: [sum(sales#39)#242, sum(number_sales#40)#243] -Results [2]: [sum(sales#39)#242 AS sum_sales#84, sum(number_sales#40)#243 AS number_sales#85] +Aggregate Attributes [2]: [sum(sales#39)#239, sum(number_sales#40)#240] +Results [2]: [sum(sales#39)#239 AS sum_sales#84, sum(number_sales#40)#240 AS number_sales#85] -(199) HashAggregate [codegen id : 409] +(187) HashAggregate [codegen id : 403] Input [2]: [sum_sales#84, number_sales#85] Keys: [] Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)] -Aggregate Attributes [3]: [sum#244, isEmpty#245, sum#246] -Results [3]: [sum#247, isEmpty#248, sum#249] +Aggregate Attributes [3]: [sum#241, isEmpty#242, sum#243] +Results [3]: [sum#244, isEmpty#245, sum#246] -(200) Exchange -Input [3]: [sum#247, isEmpty#248, sum#249] -Arguments: SinglePartition, true, [id=#250] +(188) Exchange +Input [3]: [sum#244, isEmpty#245, sum#246] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#247] -(201) HashAggregate [codegen id : 410] -Input [3]: [sum#247, isEmpty#248, sum#249] +(189) HashAggregate [codegen id : 404] +Input [3]: [sum#244, isEmpty#245, sum#246] Keys: [] Functions [2]: [sum(sum_sales#84), sum(number_sales#85)] -Aggregate Attributes [2]: [sum(sum_sales#84)#251, sum(number_sales#85)#252] -Results [6]: [null AS channel#253, null AS i_brand_id#254, null AS i_class_id#255, null AS i_category_id#256, sum(sum_sales#84)#251 AS sum(sum_sales)#257, sum(number_sales#85)#252 AS sum(number_sales)#258] +Aggregate Attributes [2]: [sum(sum_sales#84)#248, sum(number_sales#85)#249] +Results [6]: [null AS channel#250, null AS i_brand_id#251, null AS i_class_id#252, null AS i_category_id#253, sum(sum_sales#84)#248 AS sum(sum_sales)#254, sum(number_sales#85)#249 AS sum(number_sales)#255] -(202) Union +(190) Union -(203) HashAggregate [codegen id : 411] +(191) HashAggregate [codegen id : 405] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -(204) Exchange +(192) Exchange Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#259] +Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), ENSURE_REQUIREMENTS, [id=#256] -(205) HashAggregate [codegen id : 412] +(193) HashAggregate [codegen id : 406] Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Functions: [] Aggregate Attributes: [] Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] -(206) TakeOrderedAndProject +(194) TakeOrderedAndProject Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85] ===== Subqueries ===== Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43] -* HashAggregate (236) -+- Exchange (235) - +- * HashAggregate (234) - +- Union (233) - :- * Project (216) - : +- * BroadcastHashJoin Inner BuildRight (215) - : :- * Filter (209) - : : +- * ColumnarToRow (208) - : : +- Scan parquet default.store_sales (207) - : +- BroadcastExchange (214) - : +- * Project (213) - : +- * Filter (212) - : +- * ColumnarToRow (211) - : +- Scan parquet default.date_dim (210) - :- * Project (226) - : +- * BroadcastHashJoin Inner BuildRight (225) - : :- * Filter (219) - : : +- * ColumnarToRow (218) - : : +- Scan parquet default.catalog_sales (217) - : +- BroadcastExchange (224) - : +- * Project (223) - : +- * Filter (222) - : +- * ColumnarToRow (221) - : +- Scan parquet default.date_dim (220) - +- * Project (232) - +- * BroadcastHashJoin Inner BuildRight (231) - :- * Filter (229) - : +- * ColumnarToRow (228) - : +- Scan parquet default.web_sales (227) - +- ReusedExchange (230) - - -(207) Scan parquet default.store_sales +* HashAggregate (224) ++- Exchange (223) + +- * HashAggregate (222) + +- Union (221) + :- * Project (204) + : +- * BroadcastHashJoin Inner BuildRight (203) + : :- * Filter (197) + : : +- * ColumnarToRow (196) + : : +- Scan parquet default.store_sales (195) + : +- BroadcastExchange (202) + : +- * Project (201) + : +- * Filter (200) + : +- * ColumnarToRow (199) + : +- Scan parquet default.date_dim (198) + :- * Project (214) + : +- * BroadcastHashJoin Inner BuildRight (213) + : :- * Filter (207) + : : +- * ColumnarToRow (206) + : : +- Scan parquet default.catalog_sales (205) + : +- BroadcastExchange (212) + : +- * Project (211) + : +- * Filter (210) + : +- * ColumnarToRow (209) + : +- Scan parquet default.date_dim (208) + +- * Project (220) + +- * BroadcastHashJoin Inner BuildRight (219) + :- * Filter (217) + : +- * ColumnarToRow (216) + : +- Scan parquet default.web_sales (215) + +- ReusedExchange (218) + + +(195) Scan parquet default.store_sales Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(208) ColumnarToRow [codegen id : 2] +(196) ColumnarToRow [codegen id : 2] Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -(209) Filter [codegen id : 2] +(197) Filter [codegen id : 2] Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] Condition : isnotnull(ss_sold_date_sk#1) -(210) Scan parquet default.date_dim +(198) Scan parquet default.date_dim Output [2]: [d_date_sk#10, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(211) ColumnarToRow [codegen id : 1] +(199) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#10, d_year#11] -(212) Filter [codegen id : 1] +(200) Filter [codegen id : 1] Input [2]: [d_date_sk#10, d_year#11] Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) -(213) Project [codegen id : 1] +(201) Project [codegen id : 1] Output [1]: [d_date_sk#10] Input [2]: [d_date_sk#10, d_year#11] -(214) BroadcastExchange +(202) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#257] -(215) BroadcastHashJoin [codegen id : 2] +(203) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#1] Right keys [1]: [d_date_sk#10] Join condition: None -(216) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#261, ss_list_price#4 AS list_price#262] +(204) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#258, ss_list_price#4 AS list_price#259] Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] -(217) Scan parquet default.catalog_sales +(205) Scan parquet default.catalog_sales Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(218) ColumnarToRow [codegen id : 4] +(206) ColumnarToRow [codegen id : 4] Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] -(219) Filter [codegen id : 4] +(207) Filter [codegen id : 4] Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] Condition : isnotnull(cs_sold_date_sk#16) -(220) Scan parquet default.date_dim +(208) Scan parquet default.date_dim Output [2]: [d_date_sk#10, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct -(221) ColumnarToRow [codegen id : 3] +(209) ColumnarToRow [codegen id : 3] Input [2]: [d_date_sk#10, d_year#11] -(222) Filter [codegen id : 3] +(210) Filter [codegen id : 3] Input [2]: [d_date_sk#10, d_year#11] Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10)) -(223) Project [codegen id : 3] +(211) Project [codegen id : 3] Output [1]: [d_date_sk#10] Input [2]: [d_date_sk#10, d_year#11] -(224) BroadcastExchange +(212) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#263] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260] -(225) BroadcastHashJoin [codegen id : 4] +(213) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#16] Right keys [1]: [d_date_sk#10] Join condition: None -(226) Project [codegen id : 4] -Output [2]: [cs_quantity#45 AS quantity#264, cs_list_price#46 AS list_price#265] +(214) Project [codegen id : 4] +Output [2]: [cs_quantity#45 AS quantity#261, cs_list_price#46 AS list_price#262] Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] -(227) Scan parquet default.web_sales +(215) Scan parquet default.web_sales Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(228) ColumnarToRow [codegen id : 6] +(216) ColumnarToRow [codegen id : 6] Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] -(229) Filter [codegen id : 6] +(217) Filter [codegen id : 6] Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] Condition : isnotnull(ws_sold_date_sk#20) -(230) ReusedExchange [Reuses operator id: 224] +(218) ReusedExchange [Reuses operator id: 212] Output [1]: [d_date_sk#10] -(231) BroadcastHashJoin [codegen id : 6] +(219) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#20] Right keys [1]: [d_date_sk#10] Join condition: None -(232) Project [codegen id : 6] -Output [2]: [ws_quantity#60 AS quantity#266, ws_list_price#61 AS list_price#267] +(220) Project [codegen id : 6] +Output [2]: [ws_quantity#60 AS quantity#263, ws_list_price#61 AS list_price#264] Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] -(233) Union +(221) Union -(234) HashAggregate [codegen id : 7] -Input [2]: [quantity#261, list_price#262] +(222) HashAggregate [codegen id : 7] +Input [2]: [quantity#258, list_price#259] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#268, count#269] -Results [2]: [sum#270, count#271] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#265, count#266] +Results [2]: [sum#267, count#268] -(235) Exchange -Input [2]: [sum#270, count#271] -Arguments: SinglePartition, true, [id=#272] +(223) Exchange +Input [2]: [sum#267, count#268] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#269] -(236) HashAggregate [codegen id : 8] -Input [2]: [sum#270, count#271] +(224) HashAggregate [codegen id : 8] +Input [2]: [sum#267, count#268] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273 AS average_sales#274] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270 AS average_sales#271] Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] @@ -1359,22 +1287,22 @@ Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquer Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:7 Hosting operator id = 135 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:8 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:9 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:10 Hosting operator id = 154 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:11 Hosting operator id = 158 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:12 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:13 Hosting operator id = 173 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:14 Hosting operator id = 177 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] -Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] +Subquery:15 Hosting operator id = 181 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt index e96f1d6fed14f..18484308fecaf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt @@ -1,387 +1,363 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - WholeStageCodegen (412) + WholeStageCodegen (406) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] InputAdapter Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 - WholeStageCodegen (411) + WholeStageCodegen (405) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] InputAdapter Union - WholeStageCodegen (329) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #2 - WholeStageCodegen (328) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + Exchange [channel,i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] InputAdapter Union - WholeStageCodegen (246) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #3 - WholeStageCodegen (245) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (26) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] InputAdapter - Union - WholeStageCodegen (163) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + Exchange #14 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #4 - WholeStageCodegen (162) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] - InputAdapter - Union - WholeStageCodegen (80) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] #16 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #3 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #5 - WholeStageCodegen (79) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (26) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] - InputAdapter - Exchange #17 - WholeStageCodegen (7) - HashAggregate [quantity,list_price] [sum,count,sum,count] + BroadcastExchange #5 + WholeStageCodegen (10) + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #6 + WholeStageCodegen (9) + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter - Union - WholeStageCodegen (2) - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] - InputAdapter - BroadcastExchange #18 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (4) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - BroadcastExchange #19 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (6) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] + BroadcastExchange #7 + WholeStageCodegen (1) + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk] #19 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #6 - WholeStageCodegen (25) - HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - BroadcastHashJoin [ss_item_sk,ss_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (5) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (11) - Project [i_item_sk] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] - Filter [i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (10) - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - InputAdapter - Exchange [brand_id,class_id,category_id] #9 - WholeStageCodegen (9) - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (1) - Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (5) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - InputAdapter - ReusedExchange [d_date_sk] #11 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (8) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13 - InputAdapter - ReusedExchange [d_date_sk] #11 - InputAdapter - BroadcastExchange #15 - WholeStageCodegen (23) - BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastExchange #10 + WholeStageCodegen (3) Filter [i_item_sk] ColumnarToRow InputAdapter Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] - InputAdapter - ReusedExchange [ss_item_sk] #7 - InputAdapter - BroadcastExchange #16 - WholeStageCodegen (24) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (52) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #20 - WholeStageCodegen (51) - HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] - InputAdapter - ReusedExchange [ss_item_sk] #7 - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15 - InputAdapter - ReusedExchange [d_date_sk] #16 - WholeStageCodegen (78) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #21 - WholeStageCodegen (77) - HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (8) + Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ws_item_sk,i_item_sk] - BroadcastHashJoin [ws_item_sk,ss_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] - InputAdapter - ReusedExchange [ss_item_sk] #7 + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 InputAdapter - ReusedExchange [d_date_sk] #16 - WholeStageCodegen (161) - HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow InputAdapter - Exchange [channel,i_brand_id,i_class_id] #22 - WholeStageCodegen (160) - HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #23 - WholeStageCodegen (159) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (106) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (132) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 - WholeStageCodegen (158) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 - WholeStageCodegen (244) - HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #4 InputAdapter - Exchange [channel,i_brand_id] #24 - WholeStageCodegen (243) - HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + BroadcastExchange #13 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #17 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #25 - WholeStageCodegen (242) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (189) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (215) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 - WholeStageCodegen (241) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 - WholeStageCodegen (327) - HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] - InputAdapter - Exchange [channel] #26 - WholeStageCodegen (326) - HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] - InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #27 - WholeStageCodegen (325) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - InputAdapter - Union - WholeStageCodegen (272) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (298) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 - WholeStageCodegen (324) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 - WholeStageCodegen (410) + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (78) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #18 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (161) + HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #19 + WholeStageCodegen (160) + HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #20 + WholeStageCodegen (159) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (106) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (132) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17 + WholeStageCodegen (158) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18 + WholeStageCodegen (242) + HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id] #21 + WholeStageCodegen (241) + HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #22 + WholeStageCodegen (240) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (187) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (213) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17 + WholeStageCodegen (239) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18 + WholeStageCodegen (323) + HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel] #23 + WholeStageCodegen (322) + HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #24 + WholeStageCodegen (321) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (268) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (294) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17 + WholeStageCodegen (320) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18 + WholeStageCodegen (404) HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] InputAdapter - Exchange #28 - WholeStageCodegen (409) + Exchange #25 + WholeStageCodegen (403) HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id] #29 - WholeStageCodegen (408) + Exchange [channel,i_brand_id,i_class_id,i_category_id] #26 + WholeStageCodegen (402) HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] InputAdapter Union - WholeStageCodegen (355) + WholeStageCodegen (349) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6 - WholeStageCodegen (381) + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + WholeStageCodegen (375) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20 - WholeStageCodegen (407) + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17 + WholeStageCodegen (401) Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] ReusedSubquery [average_sales] #1 HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21 + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt index 107343f091fb2..20ea78c9140e6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt @@ -1,53 +1,49 @@ == Physical Plan == -TakeOrderedAndProject (49) -+- * Project (48) - +- Window (47) - +- * Sort (46) - +- Exchange (45) - +- * HashAggregate (44) - +- Exchange (43) - +- * HashAggregate (42) - +- Union (41) - :- * HashAggregate (35) - : +- Exchange (34) - : +- * HashAggregate (33) - : +- Union (32) - : :- * HashAggregate (26) - : : +- Exchange (25) - : : +- * HashAggregate (24) - : : +- * Project (23) - : : +- * BroadcastHashJoin Inner BuildRight (22) - : : :- * Project (17) - : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (15) - : : : +- * Project (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.store (11) - : : +- BroadcastExchange (21) - : : +- * Filter (20) - : : +- * ColumnarToRow (19) - : : +- Scan parquet default.item (18) - : +- * HashAggregate (31) - : +- Exchange (30) - : +- * HashAggregate (29) - : +- * HashAggregate (28) - : +- ReusedExchange (27) - +- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * HashAggregate (37) - +- ReusedExchange (36) +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- Union (37) + :- * HashAggregate (26) + : +- Exchange (25) + : +- * HashAggregate (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + :- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (36) + +- Exchange (35) + +- * HashAggregate (34) + +- * HashAggregate (33) + +- ReusedExchange (32) (1) Scan parquet default.store_sales @@ -162,7 +158,7 @@ Results [4]: [i_category#14, i_class#13, sum#18, sum#19] (25) Exchange Input [4]: [i_category#14, i_class#13, sum#18, sum#19] -Arguments: hashpartitioning(i_category#14, i_class#13, 5), true, [id=#20] +Arguments: hashpartitioning(i_category#14, i_class#13, 5), ENSURE_REQUIREMENTS, [id=#20] (26) HashAggregate [codegen id : 5] Input [4]: [i_category#14, i_class#13, sum#18, sum#19] @@ -190,7 +186,7 @@ Results [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] (30) Exchange Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] -Arguments: hashpartitioning(i_category#14, 5), true, [id=#41] +Arguments: hashpartitioning(i_category#14, 5), ENSURE_REQUIREMENTS, [id=#41] (31) HashAggregate [codegen id : 11] Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40] @@ -199,91 +195,71 @@ Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43] Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#14, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48] -(32) Union +(32) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#14, i_class#13, sum#49, sum#50] -(33) HashAggregate [codegen id : 12] -Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Functions: [] -Aggregate Attributes: [] -Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] - -(34) Exchange -Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49] - -(35) HashAggregate [codegen id : 13] -Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Functions: [] -Aggregate Attributes: [] -Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] - -(36) ReusedExchange [Reuses operator id: 25] -Output [4]: [i_category#14, i_class#13, sum#50, sum#51] - -(37) HashAggregate [codegen id : 18] -Input [4]: [i_category#14, i_class#13, sum#50, sum#51] +(33) HashAggregate [codegen id : 16] +Input [4]: [i_category#14, i_class#13, sum#49, sum#50] Keys [2]: [i_category#14, i_class#13] Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#51, sum(UnscaledValue(ss_ext_sales_price#4))#52] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#51,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#52,17,2) AS ss_ext_sales_price#32] -(38) HashAggregate [codegen id : 18] +(34) HashAggregate [codegen id : 16] Input [2]: [ss_net_profit#31, ss_ext_sales_price#32] Keys: [] Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] -Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] -Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Aggregate Attributes [4]: [sum#53, isEmpty#54, sum#55, isEmpty#56] +Results [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] -(39) Exchange -Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] -Arguments: SinglePartition, true, [id=#62] +(35) Exchange +Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] -(40) HashAggregate [codegen id : 19] -Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +(36) HashAggregate [codegen id : 17] +Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] Keys: [] Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] -Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#62, sum(ss_ext_sales_price#32)#63] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#62) / promote_precision(sum(ss_ext_sales_price#32)#63)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#64, null AS i_category#65, null AS i_class#66, 1 AS t_category#67, 1 AS t_class#68, 2 AS lochierarchy#69] -(41) Union +(37) Union -(42) HashAggregate [codegen id : 20] +(38) HashAggregate [codegen id : 18] Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -(43) Exchange +(39) Exchange Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] -Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71] +Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [id=#70] -(44) HashAggregate [codegen id : 21] +(40) HashAggregate [codegen id : 19] Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#14 END AS _w0#72] +Results [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#14 END AS _w0#71] -(45) Exchange -Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] -Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73] +(41) Exchange +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71] +Arguments: hashpartitioning(lochierarchy#26, _w0#71, 5), ENSURE_REQUIREMENTS, [id=#72] -(46) Sort [codegen id : 22] -Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] -Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 +(42) Sort [codegen id : 20] +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#71 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 -(47) Window -Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72] -Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST] +(43) Window +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71] +Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#71, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#73], [lochierarchy#26, _w0#71], [gross_margin#23 ASC NULLS FIRST] -(48) Project [codegen id : 23] -Output [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] -Input [6]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72, rank_within_parent#74] +(44) Project [codegen id : 21] +Output [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73] +Input [6]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71, rank_within_parent#73] -(49) TakeOrderedAndProject -Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] -Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#14 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74] +(45) TakeOrderedAndProject +Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#14 END ASC NULLS FIRST, rank_within_parent#73 ASC NULLS FIRST], [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt index aa85d4870683d..f1cf7e8587cc1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt @@ -1,82 +1,74 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] - WholeStageCodegen (23) + WholeStageCodegen (21) Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter Window [gross_margin,lochierarchy,_w0] - WholeStageCodegen (22) + WholeStageCodegen (20) Sort [lochierarchy,_w0,gross_margin] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (21) + WholeStageCodegen (19) HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0] InputAdapter Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 - WholeStageCodegen (20) + WholeStageCodegen (18) HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] InputAdapter Union - WholeStageCodegen (13) - HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] InputAdapter - Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #3 - WholeStageCodegen (12) - HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] - InputAdapter - Union - WholeStageCodegen (5) - HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] - InputAdapter - Exchange [i_category,i_class] #4 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Exchange [i_category,i_class] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Project [s_store_sk] - Filter [s_state,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - WholeStageCodegen (11) - HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - Exchange [i_category] #8 - WholeStageCodegen (10) - HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] - InputAdapter - ReusedExchange [i_category,i_class,sum,sum] #4 - WholeStageCodegen (19) + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] + WholeStageCodegen (11) + HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (10) + HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 + WholeStageCodegen (17) HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #9 - WholeStageCodegen (18) + Exchange #8 + WholeStageCodegen (16) HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] InputAdapter - ReusedExchange [i_category,i_class,sum,sum] #4 + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt index 0d6dfa6f90a86..40b823563a890 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt @@ -1,53 +1,49 @@ == Physical Plan == -TakeOrderedAndProject (49) -+- * Project (48) - +- Window (47) - +- * Sort (46) - +- Exchange (45) - +- * HashAggregate (44) - +- Exchange (43) - +- * HashAggregate (42) - +- Union (41) - :- * HashAggregate (35) - : +- Exchange (34) - : +- * HashAggregate (33) - : +- Union (32) - : :- * HashAggregate (26) - : : +- Exchange (25) - : : +- * HashAggregate (24) - : : +- * Project (23) - : : +- * BroadcastHashJoin Inner BuildRight (22) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.item (11) - : : +- BroadcastExchange (21) - : : +- * Project (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.store (17) - : +- * HashAggregate (31) - : +- Exchange (30) - : +- * HashAggregate (29) - : +- * HashAggregate (28) - : +- ReusedExchange (27) - +- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * HashAggregate (37) - +- ReusedExchange (36) +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- Union (37) + :- * HashAggregate (26) + : +- Exchange (25) + : +- * HashAggregate (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.item (11) + : +- BroadcastExchange (21) + : +- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + :- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (36) + +- Exchange (35) + +- * HashAggregate (34) + +- * HashAggregate (33) + +- ReusedExchange (32) (1) Scan parquet default.store_sales @@ -162,7 +158,7 @@ Results [4]: [i_category#11, i_class#10, sum#18, sum#19] (25) Exchange Input [4]: [i_category#11, i_class#10, sum#18, sum#19] -Arguments: hashpartitioning(i_category#11, i_class#10, 5), true, [id=#20] +Arguments: hashpartitioning(i_category#11, i_class#10, 5), ENSURE_REQUIREMENTS, [id=#20] (26) HashAggregate [codegen id : 5] Input [4]: [i_category#11, i_class#10, sum#18, sum#19] @@ -190,7 +186,7 @@ Results [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] (30) Exchange Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] -Arguments: hashpartitioning(i_category#11, 5), true, [id=#41] +Arguments: hashpartitioning(i_category#11, 5), ENSURE_REQUIREMENTS, [id=#41] (31) HashAggregate [codegen id : 11] Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40] @@ -199,91 +195,71 @@ Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43] Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#11, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48] -(32) Union +(32) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#11, i_class#10, sum#49, sum#50] -(33) HashAggregate [codegen id : 12] -Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Functions: [] -Aggregate Attributes: [] -Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] - -(34) Exchange -Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49] - -(35) HashAggregate [codegen id : 13] -Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Functions: [] -Aggregate Attributes: [] -Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] - -(36) ReusedExchange [Reuses operator id: 25] -Output [4]: [i_category#11, i_class#10, sum#50, sum#51] - -(37) HashAggregate [codegen id : 18] -Input [4]: [i_category#11, i_class#10, sum#50, sum#51] +(33) HashAggregate [codegen id : 16] +Input [4]: [i_category#11, i_class#10, sum#49, sum#50] Keys [2]: [i_category#11, i_class#10] Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#51, sum(UnscaledValue(ss_ext_sales_price#4))#52] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#51,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#52,17,2) AS ss_ext_sales_price#32] -(38) HashAggregate [codegen id : 18] +(34) HashAggregate [codegen id : 16] Input [2]: [ss_net_profit#31, ss_ext_sales_price#32] Keys: [] Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)] -Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] -Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +Aggregate Attributes [4]: [sum#53, isEmpty#54, sum#55, isEmpty#56] +Results [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] -(39) Exchange -Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] -Arguments: SinglePartition, true, [id=#62] +(35) Exchange +Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] -(40) HashAggregate [codegen id : 19] -Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61] +(36) HashAggregate [codegen id : 17] +Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60] Keys: [] Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)] -Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70] +Aggregate Attributes [2]: [sum(ss_net_profit#31)#62, sum(ss_ext_sales_price#32)#63] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#62) / promote_precision(sum(ss_ext_sales_price#32)#63)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#64, null AS i_category#65, null AS i_class#66, 1 AS t_category#67, 1 AS t_class#68, 2 AS lochierarchy#69] -(41) Union +(37) Union -(42) HashAggregate [codegen id : 20] +(38) HashAggregate [codegen id : 18] Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -(43) Exchange +(39) Exchange Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] -Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71] +Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [id=#70] -(44) HashAggregate [codegen id : 21] +(40) HashAggregate [codegen id : 19] Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#11 END AS _w0#72] +Results [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#11 END AS _w0#71] -(45) Exchange -Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] -Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73] +(41) Exchange +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71] +Arguments: hashpartitioning(lochierarchy#26, _w0#71, 5), ENSURE_REQUIREMENTS, [id=#72] -(46) Sort [codegen id : 22] -Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] -Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 +(42) Sort [codegen id : 20] +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#71 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0 -(47) Window -Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72] -Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST] +(43) Window +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71] +Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#71, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#73], [lochierarchy#26, _w0#71], [gross_margin#23 ASC NULLS FIRST] -(48) Project [codegen id : 23] -Output [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] -Input [6]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72, rank_within_parent#74] +(44) Project [codegen id : 21] +Output [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73] +Input [6]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71, rank_within_parent#73] -(49) TakeOrderedAndProject -Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] -Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74] +(45) TakeOrderedAndProject +Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#73 ASC NULLS FIRST], [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt index a72781e1da0ed..297c414a18cb0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt @@ -1,82 +1,74 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] - WholeStageCodegen (23) + WholeStageCodegen (21) Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter Window [gross_margin,lochierarchy,_w0] - WholeStageCodegen (22) + WholeStageCodegen (20) Sort [lochierarchy,_w0,gross_margin] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (21) + WholeStageCodegen (19) HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0] InputAdapter Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 - WholeStageCodegen (20) + WholeStageCodegen (18) HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] InputAdapter Union - WholeStageCodegen (13) - HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] InputAdapter - Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #3 - WholeStageCodegen (12) - HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] - InputAdapter - Union - WholeStageCodegen (5) - HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] - InputAdapter - Exchange [i_category,i_class] #4 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Exchange [i_category,i_class] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (3) - Project [s_store_sk] - Filter [s_state,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - WholeStageCodegen (11) - HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - Exchange [i_category] #8 - WholeStageCodegen (10) - HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow InputAdapter - ReusedExchange [i_category,i_class,sum,sum] #4 - WholeStageCodegen (19) + Scan parquet default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + WholeStageCodegen (11) + HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (10) + HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 + WholeStageCodegen (17) HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #9 - WholeStageCodegen (18) + Exchange #8 + WholeStageCodegen (16) HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] InputAdapter - ReusedExchange [i_category,i_class,sum,sum] #4 + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt index 471d38c89e601..432ef4db6b1eb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt @@ -1,102 +1,98 @@ == Physical Plan == -TakeOrderedAndProject (98) -+- * HashAggregate (97) - +- Exchange (96) - +- * HashAggregate (95) - +- Union (94) - :- * HashAggregate (88) - : +- Exchange (87) - : +- * HashAggregate (86) - : +- Union (85) - : :- * HashAggregate (79) - : : +- Exchange (78) - : : +- * HashAggregate (77) - : : +- Union (76) - : : :- * HashAggregate (25) - : : : +- Exchange (24) - : : : +- * HashAggregate (23) - : : : +- * Project (22) - : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- Union (9) - : : : : : :- * Project (4) - : : : : : : +- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- * Project (8) - : : : : : +- * Filter (7) - : : : : : +- * ColumnarToRow (6) - : : : : : +- Scan parquet default.store_returns (5) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * Filter (12) - : : : : +- * ColumnarToRow (11) - : : : : +- Scan parquet default.date_dim (10) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.store (17) - : : :- * HashAggregate (46) - : : : +- Exchange (45) - : : : +- * HashAggregate (44) - : : : +- * Project (43) - : : : +- * BroadcastHashJoin Inner BuildRight (42) - : : : :- * Project (37) - : : : : +- * BroadcastHashJoin Inner BuildRight (36) - : : : : :- Union (34) - : : : : : :- * Project (29) - : : : : : : +- * Filter (28) - : : : : : : +- * ColumnarToRow (27) - : : : : : : +- Scan parquet default.catalog_sales (26) - : : : : : +- * Project (33) - : : : : : +- * Filter (32) - : : : : : +- * ColumnarToRow (31) - : : : : : +- Scan parquet default.catalog_returns (30) - : : : : +- ReusedExchange (35) - : : : +- BroadcastExchange (41) - : : : +- * Filter (40) - : : : +- * ColumnarToRow (39) - : : : +- Scan parquet default.catalog_page (38) - : : +- * HashAggregate (75) - : : +- Exchange (74) - : : +- * HashAggregate (73) - : : +- * Project (72) - : : +- * BroadcastHashJoin Inner BuildRight (71) - : : :- * Project (66) - : : : +- * BroadcastHashJoin Inner BuildRight (65) - : : : :- Union (63) - : : : : :- * Project (50) - : : : : : +- * Filter (49) - : : : : : +- * ColumnarToRow (48) - : : : : : +- Scan parquet default.web_sales (47) - : : : : +- * Project (62) - : : : : +- * SortMergeJoin Inner (61) - : : : : :- * Sort (55) - : : : : : +- Exchange (54) - : : : : : +- * Filter (53) - : : : : : +- * ColumnarToRow (52) - : : : : : +- Scan parquet default.web_returns (51) - : : : : +- * Sort (60) - : : : : +- Exchange (59) - : : : : +- * Filter (58) - : : : : +- * ColumnarToRow (57) - : : : : +- Scan parquet default.web_sales (56) - : : : +- ReusedExchange (64) - : : +- BroadcastExchange (70) - : : +- * Filter (69) - : : +- * ColumnarToRow (68) - : : +- Scan parquet default.web_site (67) - : +- * HashAggregate (84) - : +- Exchange (83) - : +- * HashAggregate (82) - : +- * HashAggregate (81) - : +- ReusedExchange (80) - +- * HashAggregate (93) - +- Exchange (92) - +- * HashAggregate (91) - +- * HashAggregate (90) - +- ReusedExchange (89) +TakeOrderedAndProject (94) ++- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- Union (90) + :- * HashAggregate (79) + : +- Exchange (78) + : +- * HashAggregate (77) + : +- Union (76) + : :- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- Union (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- * Project (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_returns (5) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.store (17) + : :- * HashAggregate (46) + : : +- Exchange (45) + : : +- * HashAggregate (44) + : : +- * Project (43) + : : +- * BroadcastHashJoin Inner BuildRight (42) + : : :- * Project (37) + : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : :- Union (34) + : : : : :- * Project (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.catalog_sales (26) + : : : : +- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet default.catalog_returns (30) + : : : +- ReusedExchange (35) + : : +- BroadcastExchange (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet default.catalog_page (38) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (66) + : : +- * BroadcastHashJoin Inner BuildRight (65) + : : :- Union (63) + : : : :- * Project (50) + : : : : +- * Filter (49) + : : : : +- * ColumnarToRow (48) + : : : : +- Scan parquet default.web_sales (47) + : : : +- * Project (62) + : : : +- * SortMergeJoin Inner (61) + : : : :- * Sort (55) + : : : : +- Exchange (54) + : : : : +- * Filter (53) + : : : : +- * ColumnarToRow (52) + : : : : +- Scan parquet default.web_returns (51) + : : : +- * Sort (60) + : : : +- Exchange (59) + : : : +- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet default.web_sales (56) + : : +- ReusedExchange (64) + : +- BroadcastExchange (70) + : +- * Filter (69) + : +- * ColumnarToRow (68) + : +- Scan parquet default.web_site (67) + :- * HashAggregate (84) + : +- Exchange (83) + : +- * HashAggregate (82) + : +- * HashAggregate (81) + : +- ReusedExchange (80) + +- * HashAggregate (89) + +- Exchange (88) + +- * HashAggregate (87) + +- * HashAggregate (86) + +- ReusedExchange (85) (1) Scan parquet default.store_sales @@ -203,7 +199,7 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] (24) Exchange Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] +Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] @@ -298,7 +294,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] (45) Exchange Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] (46) HashAggregate [codegen id : 12] Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] @@ -341,7 +337,7 @@ Condition : isnotnull(wr_returned_date_sk#96) (54) Exchange Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] -Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101] +Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101] (55) Sort [codegen id : 15] Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] @@ -363,7 +359,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND (59) Exchange Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] -Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104] +Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104] (60) Sort [codegen id : 17] Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] @@ -428,7 +424,7 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] (74) Exchange Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] -Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122] +Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122] (75) HashAggregate [codegen id : 22] Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] @@ -448,7 +444,7 @@ Results [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum (78) Exchange Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] -Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#144] +Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#144] (79) HashAggregate [codegen id : 24] Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] @@ -476,7 +472,7 @@ Results [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, i (83) Exchange Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] -Arguments: hashpartitioning(channel#40, 5), true, [id=#176] +Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#176] (84) HashAggregate [codegen id : 49] Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] @@ -485,75 +481,55 @@ Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)] Aggregate Attributes [3]: [sum(sales#161)#177, sum(returns#162)#178, sum(profit#163)#179] Results [5]: [channel#40, null AS id#180, sum(sales#161)#177 AS sum(sales)#181, sum(returns#162)#178 AS sum(returns)#182, sum(profit#163)#179 AS sum(profit)#183] -(85) Union +(85) ReusedExchange [Reuses operator id: 78] +Output [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] -(86) HashAggregate [codegen id : 50] -Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] - -(87) Exchange -Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#184] - -(88) HashAggregate [codegen id : 51] -Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] - -(89) ReusedExchange [Reuses operator id: 78] -Output [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190] - -(90) HashAggregate [codegen id : 75] -Input [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190] +(86) HashAggregate [codegen id : 73] +Input [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#191)] -Aggregate Attributes [3]: [sum(sales#42)#192, sum(returns#43)#193, sum(profit#191)#194] -Results [3]: [sum(sales#42)#192 AS sales#161, sum(returns#43)#193 AS returns#162, sum(profit#191)#194 AS profit#163] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#190)] +Aggregate Attributes [3]: [sum(sales#42)#191, sum(returns#43)#192, sum(profit#190)#193] +Results [3]: [sum(sales#42)#191 AS sales#161, sum(returns#43)#192 AS returns#162, sum(profit#190)#193 AS profit#163] -(91) HashAggregate [codegen id : 75] +(87) HashAggregate [codegen id : 73] Input [3]: [sales#161, returns#162, profit#163] Keys: [] Functions [3]: [partial_sum(sales#161), partial_sum(returns#162), partial_sum(profit#163)] -Aggregate Attributes [6]: [sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200] -Results [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] +Aggregate Attributes [6]: [sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199] +Results [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] -(92) Exchange -Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] -Arguments: SinglePartition, true, [id=#207] +(88) Exchange +Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#206] -(93) HashAggregate [codegen id : 76] -Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206] +(89) HashAggregate [codegen id : 74] +Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] Keys: [] Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)] -Aggregate Attributes [3]: [sum(sales#161)#208, sum(returns#162)#209, sum(profit#163)#210] -Results [5]: [null AS channel#211, null AS id#212, sum(sales#161)#208 AS sum(sales)#213, sum(returns#162)#209 AS sum(returns)#214, sum(profit#163)#210 AS sum(profit)#215] +Aggregate Attributes [3]: [sum(sales#161)#207, sum(returns#162)#208, sum(profit#163)#209] +Results [5]: [null AS channel#210, null AS id#211, sum(sales#161)#207 AS sum(sales)#212, sum(returns#162)#208 AS sum(returns)#213, sum(profit#163)#209 AS sum(profit)#214] -(94) Union +(90) Union -(95) HashAggregate [codegen id : 77] +(91) HashAggregate [codegen id : 75] Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -(96) Exchange +(92) Exchange Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#216] +Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), ENSURE_REQUIREMENTS, [id=#215] -(97) HashAggregate [codegen id : 78] +(93) HashAggregate [codegen id : 76] Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150] -(98) TakeOrderedAndProject +(94) TakeOrderedAndProject Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150] Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#148, returns#149, profit#150] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt index 81b4178b7a9ca..233af6d8cc813 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt @@ -1,165 +1,157 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (78) + WholeStageCodegen (76) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (77) + WholeStageCodegen (75) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (51) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (24) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (50) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (23) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (24) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (23) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (6) - HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Exchange [s_store_id] #3 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter - Exchange [s_store_id] #4 - WholeStageCodegen (5) - HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - WholeStageCodegen (2) - Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] - WholeStageCodegen (12) - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] InputAdapter - Exchange [cp_catalog_page_id] #7 - WholeStageCodegen (11) - HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (7) - Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] - Filter [cs_sold_date_sk,cs_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] - WholeStageCodegen (8) - Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] - Filter [cr_returned_date_sk,cr_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #8 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] - WholeStageCodegen (22) - HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #6 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk,cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk,cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (22) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #8 + WholeStageCodegen (21) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter - Exchange [web_site_id] #9 - WholeStageCodegen (21) - HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen (18) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + WholeStageCodegen (15) + Sort [wr_item_sk,wr_order_number] InputAdapter - Union - WholeStageCodegen (13) - Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] - Filter [ws_sold_date_sk,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] - WholeStageCodegen (18) - Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] - SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - InputAdapter - WholeStageCodegen (15) - Sort [wr_item_sk,wr_order_number] - InputAdapter - Exchange [wr_item_sk,wr_order_number] #10 - WholeStageCodegen (14) - Filter [wr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Exchange [wr_item_sk,wr_order_number] #9 + WholeStageCodegen (14) + Filter [wr_returned_date_sk] + ColumnarToRow InputAdapter - WholeStageCodegen (17) - Sort [ws_item_sk,ws_order_number] - InputAdapter - Exchange [ws_item_sk,ws_order_number] #11 - WholeStageCodegen (16) - Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + WholeStageCodegen (17) + Sort [ws_item_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (20) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] - WholeStageCodegen (49) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel] #13 - WholeStageCodegen (48) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (76) + Exchange [ws_item_sk,ws_order_number] #10 + WholeStageCodegen (16) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (20) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (49) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #12 + WholeStageCodegen (48) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (74) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #14 - WholeStageCodegen (75) + Exchange #13 + WholeStageCodegen (73) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt index fa2435de73e02..d4c1b5f93a0d2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt @@ -1,99 +1,95 @@ == Physical Plan == -TakeOrderedAndProject (95) -+- * HashAggregate (94) - +- Exchange (93) - +- * HashAggregate (92) - +- Union (91) - :- * HashAggregate (85) - : +- Exchange (84) - : +- * HashAggregate (83) - : +- Union (82) - : :- * HashAggregate (76) - : : +- Exchange (75) - : : +- * HashAggregate (74) - : : +- Union (73) - : : :- * HashAggregate (25) - : : : +- Exchange (24) - : : : +- * HashAggregate (23) - : : : +- * Project (22) - : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- Union (9) - : : : : : :- * Project (4) - : : : : : : +- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- * Project (8) - : : : : : +- * Filter (7) - : : : : : +- * ColumnarToRow (6) - : : : : : +- Scan parquet default.store_returns (5) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * Filter (12) - : : : : +- * ColumnarToRow (11) - : : : : +- Scan parquet default.date_dim (10) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.store (17) - : : :- * HashAggregate (46) - : : : +- Exchange (45) - : : : +- * HashAggregate (44) - : : : +- * Project (43) - : : : +- * BroadcastHashJoin Inner BuildRight (42) - : : : :- * Project (37) - : : : : +- * BroadcastHashJoin Inner BuildRight (36) - : : : : :- Union (34) - : : : : : :- * Project (29) - : : : : : : +- * Filter (28) - : : : : : : +- * ColumnarToRow (27) - : : : : : : +- Scan parquet default.catalog_sales (26) - : : : : : +- * Project (33) - : : : : : +- * Filter (32) - : : : : : +- * ColumnarToRow (31) - : : : : : +- Scan parquet default.catalog_returns (30) - : : : : +- ReusedExchange (35) - : : : +- BroadcastExchange (41) - : : : +- * Filter (40) - : : : +- * ColumnarToRow (39) - : : : +- Scan parquet default.catalog_page (38) - : : +- * HashAggregate (72) - : : +- Exchange (71) - : : +- * HashAggregate (70) - : : +- * Project (69) - : : +- * BroadcastHashJoin Inner BuildRight (68) - : : :- * Project (63) - : : : +- * BroadcastHashJoin Inner BuildRight (62) - : : : :- Union (60) - : : : : :- * Project (50) - : : : : : +- * Filter (49) - : : : : : +- * ColumnarToRow (48) - : : : : : +- Scan parquet default.web_sales (47) - : : : : +- * Project (59) - : : : : +- * BroadcastHashJoin Inner BuildRight (58) - : : : : :- * Filter (53) - : : : : : +- * ColumnarToRow (52) - : : : : : +- Scan parquet default.web_returns (51) - : : : : +- BroadcastExchange (57) - : : : : +- * Filter (56) - : : : : +- * ColumnarToRow (55) - : : : : +- Scan parquet default.web_sales (54) - : : : +- ReusedExchange (61) - : : +- BroadcastExchange (67) - : : +- * Filter (66) - : : +- * ColumnarToRow (65) - : : +- Scan parquet default.web_site (64) - : +- * HashAggregate (81) - : +- Exchange (80) - : +- * HashAggregate (79) - : +- * HashAggregate (78) - : +- ReusedExchange (77) - +- * HashAggregate (90) - +- Exchange (89) - +- * HashAggregate (88) - +- * HashAggregate (87) - +- ReusedExchange (86) +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- Union (87) + :- * HashAggregate (76) + : +- Exchange (75) + : +- * HashAggregate (74) + : +- Union (73) + : :- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- Union (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- * Project (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_returns (5) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.store (17) + : :- * HashAggregate (46) + : : +- Exchange (45) + : : +- * HashAggregate (44) + : : +- * Project (43) + : : +- * BroadcastHashJoin Inner BuildRight (42) + : : :- * Project (37) + : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : :- Union (34) + : : : : :- * Project (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.catalog_sales (26) + : : : : +- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet default.catalog_returns (30) + : : : +- ReusedExchange (35) + : : +- BroadcastExchange (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet default.catalog_page (38) + : +- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- Union (60) + : : : :- * Project (50) + : : : : +- * Filter (49) + : : : : +- * ColumnarToRow (48) + : : : : +- Scan parquet default.web_sales (47) + : : : +- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (53) + : : : : +- * ColumnarToRow (52) + : : : : +- Scan parquet default.web_returns (51) + : : : +- BroadcastExchange (57) + : : : +- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet default.web_sales (54) + : : +- ReusedExchange (61) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet default.web_site (64) + :- * HashAggregate (81) + : +- Exchange (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- ReusedExchange (77) + +- * HashAggregate (86) + +- Exchange (85) + +- * HashAggregate (84) + +- * HashAggregate (83) + +- ReusedExchange (82) (1) Scan parquet default.store_sales @@ -200,7 +196,7 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] (24) Exchange Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] +Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] @@ -295,7 +291,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] (45) Exchange Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] (46) HashAggregate [codegen id : 12] Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] @@ -413,7 +409,7 @@ Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] (71) Exchange Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] -Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121] +Arguments: hashpartitioning(web_site_id#111, 5), ENSURE_REQUIREMENTS, [id=#121] (72) HashAggregate [codegen id : 19] Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] @@ -433,7 +429,7 @@ Results [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum (75) Exchange Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] -Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#143] +Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#143] (76) HashAggregate [codegen id : 21] Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] @@ -461,7 +457,7 @@ Results [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, i (80) Exchange Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] -Arguments: hashpartitioning(channel#40, 5), true, [id=#175] +Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#175] (81) HashAggregate [codegen id : 43] Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] @@ -470,75 +466,55 @@ Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)] Aggregate Attributes [3]: [sum(sales#160)#176, sum(returns#161)#177, sum(profit#162)#178] Results [5]: [channel#40, null AS id#179, sum(sales#160)#176 AS sum(sales)#180, sum(returns#161)#177 AS sum(returns)#181, sum(profit#162)#178 AS sum(profit)#182] -(82) Union +(82) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#40, id#41, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188] -(83) HashAggregate [codegen id : 44] -Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] - -(84) Exchange -Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#183] - -(85) HashAggregate [codegen id : 45] -Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] - -(86) ReusedExchange [Reuses operator id: 75] -Output [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] - -(87) HashAggregate [codegen id : 66] -Input [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189] +(83) HashAggregate [codegen id : 64] +Input [8]: [channel#40, id#41, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188] Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#190)] -Aggregate Attributes [3]: [sum(sales#42)#191, sum(returns#43)#192, sum(profit#190)#193] -Results [3]: [sum(sales#42)#191 AS sales#160, sum(returns#43)#192 AS returns#161, sum(profit#190)#193 AS profit#162] +Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#189)] +Aggregate Attributes [3]: [sum(sales#42)#190, sum(returns#43)#191, sum(profit#189)#192] +Results [3]: [sum(sales#42)#190 AS sales#160, sum(returns#43)#191 AS returns#161, sum(profit#189)#192 AS profit#162] -(88) HashAggregate [codegen id : 66] +(84) HashAggregate [codegen id : 64] Input [3]: [sales#160, returns#161, profit#162] Keys: [] Functions [3]: [partial_sum(sales#160), partial_sum(returns#161), partial_sum(profit#162)] -Aggregate Attributes [6]: [sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199] -Results [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] +Aggregate Attributes [6]: [sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198] +Results [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204] -(89) Exchange -Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] -Arguments: SinglePartition, true, [id=#206] +(85) Exchange +Input [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#205] -(90) HashAggregate [codegen id : 67] -Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205] +(86) HashAggregate [codegen id : 65] +Input [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204] Keys: [] Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)] -Aggregate Attributes [3]: [sum(sales#160)#207, sum(returns#161)#208, sum(profit#162)#209] -Results [5]: [null AS channel#210, null AS id#211, sum(sales#160)#207 AS sum(sales)#212, sum(returns#161)#208 AS sum(returns)#213, sum(profit#162)#209 AS sum(profit)#214] +Aggregate Attributes [3]: [sum(sales#160)#206, sum(returns#161)#207, sum(profit#162)#208] +Results [5]: [null AS channel#209, null AS id#210, sum(sales#160)#206 AS sum(sales)#211, sum(returns#161)#207 AS sum(returns)#212, sum(profit#162)#208 AS sum(profit)#213] -(91) Union +(87) Union -(92) HashAggregate [codegen id : 68] +(88) HashAggregate [codegen id : 66] Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -(93) Exchange +(89) Exchange Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#215] +Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), ENSURE_REQUIREMENTS, [id=#214] -(94) HashAggregate [codegen id : 69] +(90) HashAggregate [codegen id : 67] Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149] -(95) TakeOrderedAndProject +(91) TakeOrderedAndProject Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149] Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#147, returns#148, profit#149] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt index 6bb223e2f4488..f5a22c77a8e30 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt @@ -1,156 +1,148 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (69) + WholeStageCodegen (67) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (68) + WholeStageCodegen (66) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (45) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (21) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (44) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (20) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (21) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (20) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (6) - HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Exchange [s_store_id] #3 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter - Exchange [s_store_id] #4 - WholeStageCodegen (5) - HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - WholeStageCodegen (2) - Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] - WholeStageCodegen (12) - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] InputAdapter - Exchange [cp_catalog_page_id] #7 - WholeStageCodegen (11) - HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (7) - Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] - Filter [cs_sold_date_sk,cs_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] - WholeStageCodegen (8) - Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] - Filter [cr_returned_date_sk,cr_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #8 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] - WholeStageCodegen (19) - HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #6 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter - Exchange [web_site_id] #9 - WholeStageCodegen (18) - HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (13) - Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] - Filter [ws_sold_date_sk,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] - WholeStageCodegen (15) - Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Filter [wr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (14) - Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] - InputAdapter - ReusedExchange [d_date_sk] #5 + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk,cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk,cr_catalog_page_sk] + ColumnarToRow InputAdapter - BroadcastExchange #11 - WholeStageCodegen (17) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] - WholeStageCodegen (43) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter - Exchange [channel] #12 - WholeStageCodegen (42) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (67) + Exchange [web_site_id] #8 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (14) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (43) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #11 + WholeStageCodegen (42) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (65) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #13 - WholeStageCodegen (66) + Exchange #12 + WholeStageCodegen (64) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt index 628ca0ad4711c..214e5eadd0eac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt @@ -1,68 +1,64 @@ == Physical Plan == -TakeOrderedAndProject (64) -+- * Project (63) - +- Window (62) - +- * Sort (61) - +- Exchange (60) - +- * HashAggregate (59) - +- Exchange (58) - +- * HashAggregate (57) - +- Union (56) - :- * HashAggregate (50) - : +- Exchange (49) - : +- * HashAggregate (48) - : +- Union (47) - : :- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- * Project (38) - : : +- * BroadcastHashJoin Inner BuildRight (37) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (36) - : : +- * BroadcastHashJoin LeftSemi BuildRight (35) - : : :- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.store (11) - : : +- BroadcastExchange (34) - : : +- * Project (33) - : : +- * Filter (32) - : : +- Window (31) - : : +- * Sort (30) - : : +- Exchange (29) - : : +- * HashAggregate (28) - : : +- Exchange (27) - : : +- * HashAggregate (26) - : : +- * Project (25) - : : +- * BroadcastHashJoin Inner BuildRight (24) - : : :- * Project (19) - : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : :- * Filter (16) - : : : : +- * ColumnarToRow (15) - : : : : +- Scan parquet default.store_sales (14) - : : : +- ReusedExchange (17) - : : +- BroadcastExchange (23) - : : +- * Filter (22) - : : +- * ColumnarToRow (21) - : : +- Scan parquet default.store (20) - : +- * HashAggregate (46) - : +- Exchange (45) - : +- * HashAggregate (44) - : +- * HashAggregate (43) - : +- ReusedExchange (42) - +- * HashAggregate (55) - +- Exchange (54) - +- * HashAggregate (53) - +- * HashAggregate (52) - +- ReusedExchange (51) +TakeOrderedAndProject (60) ++- * Project (59) + +- Window (58) + +- * Sort (57) + +- Exchange (56) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- Union (52) + :- * HashAggregate (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (36) + : +- * BroadcastHashJoin LeftSemi BuildRight (35) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- Window (31) + : +- * Sort (30) + : +- Exchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.store_sales (14) + : : +- ReusedExchange (17) + : +- BroadcastExchange (23) + : +- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.store (20) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- ReusedExchange (42) + +- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- ReusedExchange (47) (1) Scan parquet default.store_sales @@ -186,7 +182,7 @@ Results [2]: [s_state#9, sum#12] (27) Exchange Input [2]: [s_state#9, sum#12] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#13] (28) HashAggregate [codegen id : 5] Input [2]: [s_state#9, sum#12] @@ -197,7 +193,7 @@ Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue( (29) Exchange Input [3]: [s_state#15, s_state#9, _w2#16] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#17] (30) Sort [codegen id : 6] Input [3]: [s_state#15, s_state#9, _w2#16] @@ -246,7 +242,7 @@ Results [3]: [s_state#9, s_county#8, sum#22] (40) Exchange Input [3]: [s_state#9, s_county#8, sum#22] -Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23] +Arguments: hashpartitioning(s_state#9, s_county#8, 5), ENSURE_REQUIREMENTS, [id=#23] (41) HashAggregate [codegen id : 10] Input [3]: [s_state#9, s_county#8, sum#22] @@ -274,7 +270,7 @@ Results [3]: [s_state#9, sum#34, isEmpty#35] (45) Exchange Input [3]: [s_state#9, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#36] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#36] (46) HashAggregate [codegen id : 21] Input [3]: [s_state#9, sum#34, isEmpty#35] @@ -283,91 +279,71 @@ Functions [1]: [sum(total_sum#31)] Aggregate Attributes [1]: [sum(total_sum#31)#37] Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42] -(47) Union +(47) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#43] -(48) HashAggregate [codegen id : 22] -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] - -(49) Exchange -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43] - -(50) HashAggregate [codegen id : 23] -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] - -(51) ReusedExchange [Reuses operator id: 40] -Output [3]: [s_state#9, s_county#8, sum#44] - -(52) HashAggregate [codegen id : 33] -Input [3]: [s_state#9, s_county#8, sum#44] +(48) HashAggregate [codegen id : 31] +Input [3]: [s_state#9, s_county#8, sum#43] Keys [2]: [s_state#9, s_county#8] Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#44] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#44,17,2) AS total_sum#31] -(53) HashAggregate [codegen id : 33] +(49) HashAggregate [codegen id : 31] Input [1]: [total_sum#31] Keys: [] Functions [1]: [partial_sum(total_sum#31)] -Aggregate Attributes [2]: [sum#46, isEmpty#47] -Results [2]: [sum#48, isEmpty#49] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [2]: [sum#47, isEmpty#48] -(54) Exchange -Input [2]: [sum#48, isEmpty#49] -Arguments: SinglePartition, true, [id=#50] +(50) Exchange +Input [2]: [sum#47, isEmpty#48] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] -(55) HashAggregate [codegen id : 34] -Input [2]: [sum#48, isEmpty#49] +(51) HashAggregate [codegen id : 32] +Input [2]: [sum#47, isEmpty#48] Keys: [] Functions [1]: [sum(total_sum#31)] -Aggregate Attributes [1]: [sum(total_sum#31)#51] -Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] +Aggregate Attributes [1]: [sum(total_sum#31)#50] +Results [6]: [sum(total_sum#31)#50 AS total_sum#51, null AS s_state#52, null AS s_county#53, 1 AS g_state#54, 1 AS g_county#55, 2 AS lochierarchy#56] -(56) Union +(52) Union -(57) HashAggregate [codegen id : 35] +(53) HashAggregate [codegen id : 33] Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Functions: [] Aggregate Attributes: [] Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -(58) Exchange +(54) Exchange Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), ENSURE_REQUIREMENTS, [id=#57] -(59) HashAggregate [codegen id : 36] +(55) HashAggregate [codegen id : 34] Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59] +Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#58] -(60) Exchange -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60] +(56) Exchange +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: hashpartitioning(lochierarchy#28, _w0#58, 5), ENSURE_REQUIREMENTS, [id=#59] -(61) Sort [codegen id : 37] -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 +(57) Sort [codegen id : 35] +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#58 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 -(62) Window -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST] +(58) Window +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#58, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#60], [lochierarchy#28, _w0#58], [total_sum#25 DESC NULLS LAST] -(63) Project [codegen id : 38] -Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] -Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61] +(59) Project [codegen id : 36] +Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] +Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58, rank_within_parent#60] -(64) TakeOrderedAndProject -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] -Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +(60) TakeOrderedAndProject +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] +Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#60 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt index b3dbc1612539a..6b02f5692b0eb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt @@ -1,107 +1,99 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] - WholeStageCodegen (38) + WholeStageCodegen (36) Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] InputAdapter Window [total_sum,lochierarchy,_w0] - WholeStageCodegen (37) + WholeStageCodegen (35) Sort [lochierarchy,_w0,total_sum] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (36) + WholeStageCodegen (34) HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] InputAdapter Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 - WholeStageCodegen (35) + WholeStageCodegen (33) HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] InputAdapter Union - WholeStageCodegen (23) - HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + WholeStageCodegen (10) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] InputAdapter - Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #3 - WholeStageCodegen (22) - HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] - InputAdapter - Union - WholeStageCodegen (10) - HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + Exchange [s_state,s_county] #3 + WholeStageCodegen (9) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter - Exchange [s_state,s_county] #4 - WholeStageCodegen (9) - HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_county,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (8) - BroadcastHashJoin [s_state,s_state] - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_county,s_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Project [s_state] - Filter [ranking] + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [s_state,_w2] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] InputAdapter - Window [_w2,s_state] - WholeStageCodegen (6) - Sort [s_state,_w2] - InputAdapter - Exchange [s_state] #8 - WholeStageCodegen (5) - HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + Exchange [s_state] #8 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter - Exchange [s_state] #9 - WholeStageCodegen (4) - HashAggregate [s_state,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_store_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] - InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - WholeStageCodegen (21) - HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] - InputAdapter - Exchange [s_state] #11 - WholeStageCodegen (20) - HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] - HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] - InputAdapter - ReusedExchange [s_state,s_county,sum] #4 - WholeStageCodegen (34) + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + WholeStageCodegen (21) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #10 + WholeStageCodegen (20) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (32) HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] InputAdapter - Exchange #12 - WholeStageCodegen (33) + Exchange #11 + WholeStageCodegen (31) HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] InputAdapter - ReusedExchange [s_state,s_county,sum] #4 + ReusedExchange [s_state,s_county,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt index 705d1b3f91342..e41dc814cbd2e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt @@ -1,68 +1,64 @@ == Physical Plan == -TakeOrderedAndProject (64) -+- * Project (63) - +- Window (62) - +- * Sort (61) - +- Exchange (60) - +- * HashAggregate (59) - +- Exchange (58) - +- * HashAggregate (57) - +- Union (56) - :- * HashAggregate (50) - : +- Exchange (49) - : +- * HashAggregate (48) - : +- Union (47) - : :- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- * Project (38) - : : +- * BroadcastHashJoin Inner BuildRight (37) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (36) - : : +- * BroadcastHashJoin LeftSemi BuildRight (35) - : : :- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.store (11) - : : +- BroadcastExchange (34) - : : +- * Project (33) - : : +- * Filter (32) - : : +- Window (31) - : : +- * Sort (30) - : : +- Exchange (29) - : : +- * HashAggregate (28) - : : +- Exchange (27) - : : +- * HashAggregate (26) - : : +- * Project (25) - : : +- * BroadcastHashJoin Inner BuildRight (24) - : : :- * Project (22) - : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : :- * Filter (16) - : : : : +- * ColumnarToRow (15) - : : : : +- Scan parquet default.store_sales (14) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.store (17) - : : +- ReusedExchange (23) - : +- * HashAggregate (46) - : +- Exchange (45) - : +- * HashAggregate (44) - : +- * HashAggregate (43) - : +- ReusedExchange (42) - +- * HashAggregate (55) - +- Exchange (54) - +- * HashAggregate (53) - +- * HashAggregate (52) - +- ReusedExchange (51) +TakeOrderedAndProject (60) ++- * Project (59) + +- Window (58) + +- * Sort (57) + +- Exchange (56) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- Union (52) + :- * HashAggregate (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (36) + : +- * BroadcastHashJoin LeftSemi BuildRight (35) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- Window (31) + : +- * Sort (30) + : +- Exchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.store_sales (14) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.store (17) + : +- ReusedExchange (23) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- ReusedExchange (42) + +- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- ReusedExchange (47) (1) Scan parquet default.store_sales @@ -186,7 +182,7 @@ Results [2]: [s_state#9, sum#12] (27) Exchange Input [2]: [s_state#9, sum#12] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#13] (28) HashAggregate [codegen id : 5] Input [2]: [s_state#9, sum#12] @@ -197,7 +193,7 @@ Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue( (29) Exchange Input [3]: [s_state#15, s_state#9, _w2#16] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#17] (30) Sort [codegen id : 6] Input [3]: [s_state#15, s_state#9, _w2#16] @@ -246,7 +242,7 @@ Results [3]: [s_state#9, s_county#8, sum#22] (40) Exchange Input [3]: [s_state#9, s_county#8, sum#22] -Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23] +Arguments: hashpartitioning(s_state#9, s_county#8, 5), ENSURE_REQUIREMENTS, [id=#23] (41) HashAggregate [codegen id : 10] Input [3]: [s_state#9, s_county#8, sum#22] @@ -274,7 +270,7 @@ Results [3]: [s_state#9, sum#34, isEmpty#35] (45) Exchange Input [3]: [s_state#9, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#36] +Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#36] (46) HashAggregate [codegen id : 21] Input [3]: [s_state#9, sum#34, isEmpty#35] @@ -283,91 +279,71 @@ Functions [1]: [sum(total_sum#31)] Aggregate Attributes [1]: [sum(total_sum#31)#37] Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42] -(47) Union +(47) ReusedExchange [Reuses operator id: 40] +Output [3]: [s_state#9, s_county#8, sum#43] -(48) HashAggregate [codegen id : 22] -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] - -(49) Exchange -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43] - -(50) HashAggregate [codegen id : 23] -Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] - -(51) ReusedExchange [Reuses operator id: 40] -Output [3]: [s_state#9, s_county#8, sum#44] - -(52) HashAggregate [codegen id : 33] -Input [3]: [s_state#9, s_county#8, sum#44] +(48) HashAggregate [codegen id : 31] +Input [3]: [s_state#9, s_county#8, sum#43] Keys [2]: [s_state#9, s_county#8] Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#44] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#44,17,2) AS total_sum#31] -(53) HashAggregate [codegen id : 33] +(49) HashAggregate [codegen id : 31] Input [1]: [total_sum#31] Keys: [] Functions [1]: [partial_sum(total_sum#31)] -Aggregate Attributes [2]: [sum#46, isEmpty#47] -Results [2]: [sum#48, isEmpty#49] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [2]: [sum#47, isEmpty#48] -(54) Exchange -Input [2]: [sum#48, isEmpty#49] -Arguments: SinglePartition, true, [id=#50] +(50) Exchange +Input [2]: [sum#47, isEmpty#48] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] -(55) HashAggregate [codegen id : 34] -Input [2]: [sum#48, isEmpty#49] +(51) HashAggregate [codegen id : 32] +Input [2]: [sum#47, isEmpty#48] Keys: [] Functions [1]: [sum(total_sum#31)] -Aggregate Attributes [1]: [sum(total_sum#31)#51] -Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] +Aggregate Attributes [1]: [sum(total_sum#31)#50] +Results [6]: [sum(total_sum#31)#50 AS total_sum#51, null AS s_state#52, null AS s_county#53, 1 AS g_state#54, 1 AS g_county#55, 2 AS lochierarchy#56] -(56) Union +(52) Union -(57) HashAggregate [codegen id : 35] +(53) HashAggregate [codegen id : 33] Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Functions: [] Aggregate Attributes: [] Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -(58) Exchange +(54) Exchange Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] -Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58] +Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), ENSURE_REQUIREMENTS, [id=#57] -(59) HashAggregate [codegen id : 36] +(55) HashAggregate [codegen id : 34] Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59] +Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#58] -(60) Exchange -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60] +(56) Exchange +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: hashpartitioning(lochierarchy#28, _w0#58, 5), ENSURE_REQUIREMENTS, [id=#59] -(61) Sort [codegen id : 37] -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 +(57) Sort [codegen id : 35] +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#58 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0 -(62) Window -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59] -Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST] +(58) Window +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58] +Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#58, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#60], [lochierarchy#28, _w0#58], [total_sum#25 DESC NULLS LAST] -(63) Project [codegen id : 38] -Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] -Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61] +(59) Project [codegen id : 36] +Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] +Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58, rank_within_parent#60] -(64) TakeOrderedAndProject -Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] -Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61] +(60) TakeOrderedAndProject +Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] +Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#60 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt index bd0bd7e87251f..b32ed8ecf2857 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt @@ -1,107 +1,99 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] - WholeStageCodegen (38) + WholeStageCodegen (36) Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] InputAdapter Window [total_sum,lochierarchy,_w0] - WholeStageCodegen (37) + WholeStageCodegen (35) Sort [lochierarchy,_w0,total_sum] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (36) + WholeStageCodegen (34) HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] InputAdapter Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 - WholeStageCodegen (35) + WholeStageCodegen (33) HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] InputAdapter Union - WholeStageCodegen (23) - HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + WholeStageCodegen (10) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] InputAdapter - Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #3 - WholeStageCodegen (22) - HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] - InputAdapter - Union - WholeStageCodegen (10) - HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + Exchange [s_state,s_county] #3 + WholeStageCodegen (9) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter - Exchange [s_state,s_county] #4 - WholeStageCodegen (9) - HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_county,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (8) - BroadcastHashJoin [s_state,s_state] - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_county,s_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Project [s_state] - Filter [ranking] + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen (6) + Sort [s_state,_w2] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] InputAdapter - Window [_w2,s_state] - WholeStageCodegen (6) - Sort [s_state,_w2] - InputAdapter - Exchange [s_state] #8 - WholeStageCodegen (5) - HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + Exchange [s_state] #8 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_net_profit,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter - Exchange [s_state] #9 - WholeStageCodegen (4) - HashAggregate [s_state,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_state] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_net_profit,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (21) - HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] - InputAdapter - Exchange [s_state] #11 - WholeStageCodegen (20) - HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] - HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] - InputAdapter - ReusedExchange [s_state,s_county,sum] #4 - WholeStageCodegen (34) + BroadcastExchange #9 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (21) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #10 + WholeStageCodegen (20) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (32) HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] InputAdapter - Exchange #12 - WholeStageCodegen (33) + Exchange #11 + WholeStageCodegen (31) HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] InputAdapter - ReusedExchange [s_state,s_county,sum] #4 + ReusedExchange [s_state,s_county,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt index 3f452dc9272dc..ce1206c0ba906 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt @@ -1,142 +1,134 @@ == Physical Plan == -TakeOrderedAndProject (138) -+- * Project (137) - +- * SortMergeJoin Inner (136) - :- * Sort (74) - : +- Exchange (73) - : +- * HashAggregate (72) - : +- Exchange (71) - : +- * HashAggregate (70) - : +- * HashAggregate (69) - : +- Exchange (68) - : +- * HashAggregate (67) - : +- Union (66) - : :- * HashAggregate (47) - : : +- Exchange (46) - : : +- * HashAggregate (45) - : : +- Union (44) - : : :- * Project (25) - : : : +- SortMergeJoin LeftOuter (24) - : : : :- * Sort (18) - : : : : +- Exchange (17) - : : : : +- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.item (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.date_dim (11) - : : : +- * Sort (23) - : : : +- Exchange (22) - : : : +- * Filter (21) - : : : +- * ColumnarToRow (20) - : : : +- Scan parquet default.catalog_returns (19) - : : +- * Project (43) - : : +- SortMergeJoin LeftOuter (42) - : : :- * Sort (36) - : : : +- Exchange (35) - : : : +- * Project (34) - : : : +- * BroadcastHashJoin Inner BuildRight (33) - : : : :- * Project (31) - : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : :- * Filter (28) - : : : : : +- * ColumnarToRow (27) - : : : : : +- Scan parquet default.store_sales (26) - : : : : +- ReusedExchange (29) - : : : +- ReusedExchange (32) - : : +- * Sort (41) - : : +- Exchange (40) - : : +- * Filter (39) - : : +- * ColumnarToRow (38) - : : +- Scan parquet default.store_returns (37) - : +- * Project (65) - : +- SortMergeJoin LeftOuter (64) - : :- * Sort (58) - : : +- Exchange (57) - : : +- * Project (56) - : : +- * BroadcastHashJoin Inner BuildRight (55) - : : :- * Project (53) - : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : :- * Filter (50) - : : : : +- * ColumnarToRow (49) - : : : : +- Scan parquet default.web_sales (48) - : : : +- ReusedExchange (51) - : : +- ReusedExchange (54) - : +- * Sort (63) - : +- Exchange (62) - : +- * Filter (61) - : +- * ColumnarToRow (60) - : +- Scan parquet default.web_returns (59) - +- * Sort (135) - +- Exchange (134) - +- * HashAggregate (133) - +- Exchange (132) - +- * HashAggregate (131) - +- * HashAggregate (130) - +- Exchange (129) - +- * HashAggregate (128) - +- Union (127) - :- * HashAggregate (111) - : +- Exchange (110) - : +- * HashAggregate (109) - : +- Union (108) - : :- * Project (92) - : : +- SortMergeJoin LeftOuter (91) - : : :- * Sort (88) - : : : +- Exchange (87) - : : : +- * Project (86) - : : : +- * BroadcastHashJoin Inner BuildRight (85) - : : : :- * Project (80) - : : : : +- * BroadcastHashJoin Inner BuildRight (79) - : : : : :- * Filter (77) - : : : : : +- * ColumnarToRow (76) - : : : : : +- Scan parquet default.catalog_sales (75) - : : : : +- ReusedExchange (78) - : : : +- BroadcastExchange (84) - : : : +- * Filter (83) - : : : +- * ColumnarToRow (82) - : : : +- Scan parquet default.date_dim (81) - : : +- * Sort (90) - : : +- ReusedExchange (89) - : +- * Project (107) - : +- SortMergeJoin LeftOuter (106) - : :- * Sort (103) - : : +- Exchange (102) - : : +- * Project (101) - : : +- * BroadcastHashJoin Inner BuildRight (100) - : : :- * Project (98) - : : : +- * BroadcastHashJoin Inner BuildRight (97) - : : : :- * Filter (95) - : : : : +- * ColumnarToRow (94) - : : : : +- Scan parquet default.store_sales (93) - : : : +- ReusedExchange (96) - : : +- ReusedExchange (99) - : +- * Sort (105) - : +- ReusedExchange (104) - +- * Project (126) - +- SortMergeJoin LeftOuter (125) - :- * Sort (122) - : +- Exchange (121) - : +- * Project (120) - : +- * BroadcastHashJoin Inner BuildRight (119) - : :- * Project (117) - : : +- * BroadcastHashJoin Inner BuildRight (116) - : : :- * Filter (114) - : : : +- * ColumnarToRow (113) - : : : +- Scan parquet default.web_sales (112) - : : +- ReusedExchange (115) - : +- ReusedExchange (118) - +- * Sort (124) - +- ReusedExchange (123) +TakeOrderedAndProject (130) ++- * Project (129) + +- * SortMergeJoin Inner (128) + :- * Sort (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- * HashAggregate (65) + : +- Exchange (64) + : +- * HashAggregate (63) + : +- Union (62) + : :- * Project (25) + : : +- SortMergeJoin LeftOuter (24) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.catalog_returns (19) + : :- * Project (43) + : : +- SortMergeJoin LeftOuter (42) + : : :- * Sort (36) + : : : +- Exchange (35) + : : : +- * Project (34) + : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : :- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.store_sales (26) + : : : : +- ReusedExchange (29) + : : : +- ReusedExchange (32) + : : +- * Sort (41) + : : +- Exchange (40) + : : +- * Filter (39) + : : +- * ColumnarToRow (38) + : : +- Scan parquet default.store_returns (37) + : +- * Project (61) + : +- SortMergeJoin LeftOuter (60) + : :- * Sort (54) + : : +- Exchange (53) + : : +- * Project (52) + : : +- * BroadcastHashJoin Inner BuildRight (51) + : : :- * Project (49) + : : : +- * BroadcastHashJoin Inner BuildRight (48) + : : : :- * Filter (46) + : : : : +- * ColumnarToRow (45) + : : : : +- Scan parquet default.web_sales (44) + : : : +- ReusedExchange (47) + : : +- ReusedExchange (50) + : +- * Sort (59) + : +- Exchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.web_returns (55) + +- * Sort (127) + +- Exchange (126) + +- * HashAggregate (125) + +- Exchange (124) + +- * HashAggregate (123) + +- * HashAggregate (122) + +- Exchange (121) + +- * HashAggregate (120) + +- Union (119) + :- * Project (88) + : +- SortMergeJoin LeftOuter (87) + : :- * Sort (84) + : : +- Exchange (83) + : : +- * Project (82) + : : +- * BroadcastHashJoin Inner BuildRight (81) + : : :- * Project (76) + : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet default.catalog_sales (71) + : : : +- ReusedExchange (74) + : : +- BroadcastExchange (80) + : : +- * Filter (79) + : : +- * ColumnarToRow (78) + : : +- Scan parquet default.date_dim (77) + : +- * Sort (86) + : +- ReusedExchange (85) + :- * Project (103) + : +- SortMergeJoin LeftOuter (102) + : :- * Sort (99) + : : +- Exchange (98) + : : +- * Project (97) + : : +- * BroadcastHashJoin Inner BuildRight (96) + : : :- * Project (94) + : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : :- * Filter (91) + : : : : +- * ColumnarToRow (90) + : : : : +- Scan parquet default.store_sales (89) + : : : +- ReusedExchange (92) + : : +- ReusedExchange (95) + : +- * Sort (101) + : +- ReusedExchange (100) + +- * Project (118) + +- SortMergeJoin LeftOuter (117) + :- * Sort (114) + : +- Exchange (113) + : +- * Project (112) + : +- * BroadcastHashJoin Inner BuildRight (111) + : :- * Project (109) + : : +- * BroadcastHashJoin Inner BuildRight (108) + : : :- * Filter (106) + : : : +- * ColumnarToRow (105) + : : : +- Scan parquet default.web_sales (104) + : : +- ReusedExchange (107) + : +- ReusedExchange (110) + +- * Sort (116) + +- ReusedExchange (115) (1) Scan parquet default.catalog_sales @@ -213,7 +205,7 @@ Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, (17) Exchange Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16] (18) Sort [codegen id : 4] Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] @@ -235,7 +227,7 @@ Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17)) (22) Exchange Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21] +Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#21] (23) Sort [codegen id : 6] Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] @@ -290,7 +282,7 @@ Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity (35) Exchange Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 11] Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] @@ -312,7 +304,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (40) Exchange Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34] (41) Sort [codegen id : 13] Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] @@ -327,426 +319,386 @@ Join condition: None Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36] Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(44) Union - -(45) HashAggregate [codegen id : 15] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] - -(46) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37] - -(47) HashAggregate [codegen id : 16] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] - -(48) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(44) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(49) ColumnarToRow [codegen id : 19] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(45) ColumnarToRow [codegen id : 17] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] -(50) Filter [codegen id : 19] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) +(46) Filter [codegen id : 17] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] +Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37)) -(51) ReusedExchange [Reuses operator id: 8] +(47) ReusedExchange [Reuses operator id: 8] Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(52) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_item_sk#39] +(48) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#38] Right keys [1]: [i_item_sk#6] Join condition: None -(53) Project [codegen id : 19] -Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +(49) Project [codegen id : 17] +Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(54) ReusedExchange [Reuses operator id: 14] +(50) ReusedExchange [Reuses operator id: 14] Output [2]: [d_date_sk#13, d_year#14] -(55) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#38] +(51) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#37] Right keys [1]: [d_date_sk#13] Join condition: None -(56) Project [codegen id : 19] -Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] +(52) Project [codegen id : 17] +Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] -(57) Exchange -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43] +(53) Exchange +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#42] -(58) Sort [codegen id : 20] -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 +(54) Sort [codegen id : 18] +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0 -(59) Scan parquet default.web_returns -Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(55) Scan parquet default.web_returns +Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct -(60) ColumnarToRow [codegen id : 21] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(56) ColumnarToRow [codegen id : 19] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(61) Filter [codegen id : 21] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Condition : (isnotnull(wr_order_number#45) AND isnotnull(wr_item_sk#44)) +(57) Filter [codegen id : 19] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Condition : (isnotnull(wr_order_number#44) AND isnotnull(wr_item_sk#43)) -(62) Exchange -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48] +(58) Exchange +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: hashpartitioning(wr_order_number#44, wr_item_sk#43, 5), ENSURE_REQUIREMENTS, [id=#47] -(63) Sort [codegen id : 22] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 +(59) Sort [codegen id : 20] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0 -(64) SortMergeJoin -Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] -Right keys [2]: [wr_order_number#45, wr_item_sk#44] +(60) SortMergeJoin +Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)] +Right keys [2]: [wr_order_number#44, wr_item_sk#43] Join condition: None -(65) Project [codegen id : 23] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50] -Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(61) Project [codegen id : 21] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#49] +Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(66) Union +(62) Union -(67) HashAggregate [codegen id : 24] +(63) HashAggregate [codegen id : 22] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -(68) Exchange +(64) Exchange Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#50] -(69) HashAggregate [codegen id : 25] +(65) HashAggregate [codegen id : 23] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] -(70) HashAggregate [codegen id : 25] +(66) HashAggregate [codegen id : 23] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum#52, sum#53] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] -(71) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56] +(67) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#55] -(72) HashAggregate [codegen id : 26] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55] +(68) HashAggregate [codegen id : 24] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#56, sum(UnscaledValue(sales_amt#23))#57] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#56 AS sales_cnt#58, MakeDecimal(sum(UnscaledValue(sales_amt#23))#57,18,2) AS sales_amt#59] -(73) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] -Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61] +(69) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#60] -(74) Sort [codegen id : 27] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60] +(70) Sort [codegen id : 25] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59] Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 -(75) Scan parquet default.catalog_sales +(71) Scan parquet default.catalog_sales Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(76) ColumnarToRow [codegen id : 30] +(72) ColumnarToRow [codegen id : 28] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -(77) Filter [codegen id : 30] +(73) Filter [codegen id : 28] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) -(78) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(74) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(79) BroadcastHashJoin [codegen id : 30] +(75) BroadcastHashJoin [codegen id : 28] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#62] +Right keys [1]: [i_item_sk#61] Join condition: None -(80) Project [codegen id : 30] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(76) Project [codegen id : 28] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(81) Scan parquet default.date_dim -Output [2]: [d_date_sk#67, d_year#68] +(77) Scan parquet default.date_dim +Output [2]: [d_date_sk#66, d_year#67] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(82) ColumnarToRow [codegen id : 29] -Input [2]: [d_date_sk#67, d_year#68] +(78) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#66, d_year#67] -(83) Filter [codegen id : 29] -Input [2]: [d_date_sk#67, d_year#68] -Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67)) +(79) Filter [codegen id : 27] +Input [2]: [d_date_sk#66, d_year#67] +Condition : ((isnotnull(d_year#67) AND (d_year#67 = 2001)) AND isnotnull(d_date_sk#66)) -(84) BroadcastExchange -Input [2]: [d_date_sk#67, d_year#68] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] +(80) BroadcastExchange +Input [2]: [d_date_sk#66, d_year#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] -(85) BroadcastHashJoin [codegen id : 30] +(81) BroadcastHashJoin [codegen id : 28] Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#67] +Right keys [1]: [d_date_sk#66] Join condition: None -(86) Project [codegen id : 30] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(82) Project [codegen id : 28] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(87) Exchange -Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70] +(83) Exchange +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#69] -(88) Sort [codegen id : 31] -Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +(84) Sort [codegen id : 29] +Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 -(89) ReusedExchange [Reuses operator id: 22] +(85) ReusedExchange [Reuses operator id: 22] Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -(90) Sort [codegen id : 33] +(86) Sort [codegen id : 31] Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 -(91) SortMergeJoin +(87) SortMergeJoin Left keys [2]: [cs_order_number#3, cs_item_sk#2] Right keys [2]: [cr_order_number#18, cr_item_sk#17] Join condition: None -(92) Project [codegen id : 34] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +(88) Project [codegen id : 32] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -(93) Scan parquet default.store_sales +(89) Scan parquet default.store_sales Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(94) ColumnarToRow [codegen id : 37] +(90) ColumnarToRow [codegen id : 35] Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] -(95) Filter [codegen id : 37] +(91) Filter [codegen id : 35] Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28] Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24)) -(96) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(92) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(97) BroadcastHashJoin [codegen id : 37] +(93) BroadcastHashJoin [codegen id : 35] Left keys [1]: [ss_item_sk#25] -Right keys [1]: [i_item_sk#62] +Right keys [1]: [i_item_sk#61] Join condition: None -(98) Project [codegen id : 37] -Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(94) Project [codegen id : 35] +Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(99) ReusedExchange [Reuses operator id: 84] -Output [2]: [d_date_sk#67, d_year#68] +(95) ReusedExchange [Reuses operator id: 80] +Output [2]: [d_date_sk#66, d_year#67] -(100) BroadcastHashJoin [codegen id : 37] +(96) BroadcastHashJoin [codegen id : 35] Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#67] +Right keys [1]: [d_date_sk#66] Join condition: None -(101) Project [codegen id : 37] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(97) Project [codegen id : 35] +Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(102) Exchange -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71] +(98) Exchange +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#70] -(103) Sort [codegen id : 38] -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] +(99) Sort [codegen id : 36] +Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0 -(104) ReusedExchange [Reuses operator id: 40] +(100) ReusedExchange [Reuses operator id: 40] Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(105) Sort [codegen id : 40] +(101) Sort [codegen id : 38] Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0 -(106) SortMergeJoin +(102) SortMergeJoin Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)] Right keys [2]: [sr_ticket_number#31, sr_item_sk#30] Join condition: None -(107) Project [codegen id : 41] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73] -Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] - -(108) Union - -(109) HashAggregate [codegen id : 42] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] - -(110) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74] - -(111) HashAggregate [codegen id : 43] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(103) Project [codegen id : 39] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#71, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#72] +Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] -(112) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(104) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(113) ColumnarToRow [codegen id : 46] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] +(105) ColumnarToRow [codegen id : 42] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] -(114) Filter [codegen id : 46] -Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38)) +(106) Filter [codegen id : 42] +Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41] +Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37)) -(115) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(107) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(116) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [ws_item_sk#39] -Right keys [1]: [i_item_sk#62] +(108) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#38] +Right keys [1]: [i_item_sk#61] Join condition: None -(117) Project [codegen id : 46] -Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(109) Project [codegen id : 42] +Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] -(118) ReusedExchange [Reuses operator id: 84] -Output [2]: [d_date_sk#67, d_year#68] +(110) ReusedExchange [Reuses operator id: 80] +Output [2]: [d_date_sk#66, d_year#67] -(119) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [ws_sold_date_sk#38] -Right keys [1]: [d_date_sk#67] +(111) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#66] Join condition: None -(120) Project [codegen id : 46] -Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68] +(112) Project [codegen id : 42] +Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67] -(121) Exchange -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75] +(113) Exchange +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#73] -(122) Sort [codegen id : 47] -Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68] -Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0 +(114) Sort [codegen id : 43] +Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67] +Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0 -(123) ReusedExchange [Reuses operator id: 62] -Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(115) ReusedExchange [Reuses operator id: 58] +Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(124) Sort [codegen id : 49] -Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] -Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0 +(116) Sort [codegen id : 45] +Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] +Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0 -(125) SortMergeJoin -Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)] -Right keys [2]: [wr_order_number#45, wr_item_sk#44] +(117) SortMergeJoin +Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)] +Right keys [2]: [wr_order_number#44, wr_item_sk#43] Join condition: None -(126) Project [codegen id : 50] -Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77] -Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47] +(118) Project [codegen id : 46] +Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#74, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#75] +Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46] -(127) Union +(119) Union -(128) HashAggregate [codegen id : 51] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(120) HashAggregate [codegen id : 47] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] -(129) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78] +(121) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#76] -(130) HashAggregate [codegen id : 52] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +(122) HashAggregate [codegen id : 48] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] -(131) HashAggregate [codegen id : 52] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23] -Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(123) HashAggregate [codegen id : 48] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23] +Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum#79, sum#80] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] +Aggregate Attributes [2]: [sum#77, sum#78] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] -(132) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] -Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83] +(124) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] +Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#81] -(133) HashAggregate [codegen id : 53] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82] -Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] +(125) HashAggregate [codegen id : 49] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80] +Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85] -Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87] +Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#82, sum(UnscaledValue(sales_amt#23))#83] +Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum(cast(sales_cnt#22 as bigint))#82 AS sales_cnt#84, MakeDecimal(sum(UnscaledValue(sales_amt#23))#83,18,2) AS sales_amt#85] -(134) Exchange -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] -Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88] +(126) Exchange +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] +Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#86] -(135) Sort [codegen id : 54] -Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] -Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0 +(127) Sort [codegen id : 50] +Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] +Arguments: [i_brand_id#62 ASC NULLS FIRST, i_class_id#63 ASC NULLS FIRST, i_category_id#64 ASC NULLS FIRST, i_manufact_id#65 ASC NULLS FIRST], false, 0 -(136) SortMergeJoin [codegen id : 55] +(128) SortMergeJoin [codegen id : 51] Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#58 as decimal(17,2))) / promote_precision(cast(sales_cnt#84 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) -(137) Project [codegen id : 55] -Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94] -Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87] +(129) Project [codegen id : 51] +Output [10]: [d_year#67 AS prev_year#87, d_year#14 AS year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#84 AS prev_yr_cnt#89, sales_cnt#58 AS curr_yr_cnt#90, (sales_cnt#58 - sales_cnt#84) AS sales_cnt_diff#91, CheckOverflow((promote_precision(cast(sales_amt#59 as decimal(19,2))) - promote_precision(cast(sales_amt#85 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#92] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59, d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85] -(138) TakeOrderedAndProject -Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] -Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST, sales_amt_diff#94 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94] +(130) TakeOrderedAndProject +Input [10]: [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92] +Arguments: 100, [sales_cnt_diff#91 ASC NULLS FIRST, sales_amt_diff#92 ASC NULLS FIRST], [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt index 69f8b6a5b6789..b44ed2a7a3894 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt @@ -1,113 +1,105 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt] - WholeStageCodegen (55) + WholeStageCodegen (51) Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] InputAdapter - WholeStageCodegen (27) + WholeStageCodegen (25) Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 - WholeStageCodegen (26) + WholeStageCodegen (24) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 - WholeStageCodegen (25) + WholeStageCodegen (23) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 - WholeStageCodegen (24) + WholeStageCodegen (22) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (16) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #4 - WholeStageCodegen (15) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] InputAdapter - Union - WholeStageCodegen (7) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - InputAdapter - SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - WholeStageCodegen (4) - Sort [cs_order_number,cs_item_sk] - InputAdapter - Exchange [cs_order_number,cs_item_sk] #5 - WholeStageCodegen (3) - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (6) - Sort [cr_order_number,cr_item_sk] - InputAdapter - Exchange [cr_order_number,cr_item_sk] #8 - WholeStageCodegen (5) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - WholeStageCodegen (14) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - InputAdapter - SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - WholeStageCodegen (11) - Sort [ss_ticket_number,ss_item_sk] - InputAdapter - Exchange [ss_ticket_number,ss_item_sk] #9 - WholeStageCodegen (10) - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - ReusedExchange [d_date_sk,d_year] #7 - WholeStageCodegen (13) - Sort [sr_ticket_number,sr_item_sk] - InputAdapter - Exchange [sr_ticket_number,sr_item_sk] #10 - WholeStageCodegen (12) - Filter [sr_ticket_number,sr_item_sk] + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] ColumnarToRow InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - WholeStageCodegen (23) + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + InputAdapter + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + WholeStageCodegen (21) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] InputAdapter SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - WholeStageCodegen (20) + WholeStageCodegen (18) Sort [ws_order_number,ws_item_sk] InputAdapter - Exchange [ws_order_number,ws_item_sk] #11 - WholeStageCodegen (19) + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] @@ -117,108 +109,100 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 InputAdapter - ReusedExchange [d_date_sk,d_year] #7 - WholeStageCodegen (22) + ReusedExchange [d_date_sk,d_year] #6 + WholeStageCodegen (20) Sort [wr_order_number,wr_item_sk] InputAdapter - Exchange [wr_order_number,wr_item_sk] #12 - WholeStageCodegen (21) + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) Filter [wr_order_number,wr_item_sk] ColumnarToRow InputAdapter Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] InputAdapter - WholeStageCodegen (54) + WholeStageCodegen (50) Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 - WholeStageCodegen (53) + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #14 - WholeStageCodegen (52) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #15 - WholeStageCodegen (51) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (43) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #16 - WholeStageCodegen (42) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] InputAdapter - Union - WholeStageCodegen (34) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - InputAdapter - SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - WholeStageCodegen (31) - Sort [cs_order_number,cs_item_sk] - InputAdapter - Exchange [cs_order_number,cs_item_sk] #17 - WholeStageCodegen (30) - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - BroadcastExchange #18 - WholeStageCodegen (29) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (33) - Sort [cr_order_number,cr_item_sk] - InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #8 - WholeStageCodegen (41) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - InputAdapter - SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - WholeStageCodegen (38) - Sort [ss_ticket_number,ss_item_sk] - InputAdapter - Exchange [ss_ticket_number,ss_item_sk] #19 - WholeStageCodegen (37) - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 - InputAdapter - ReusedExchange [d_date_sk,d_year] #18 - WholeStageCodegen (40) - Sort [sr_ticket_number,sr_item_sk] - InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #10 - WholeStageCodegen (50) + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + InputAdapter + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] InputAdapter SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - WholeStageCodegen (47) + WholeStageCodegen (43) Sort [ws_order_number,ws_item_sk] InputAdapter - Exchange [ws_order_number,ws_item_sk] #20 - WholeStageCodegen (46) + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] @@ -228,10 +212,10 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 InputAdapter - ReusedExchange [d_date_sk,d_year] #18 - WholeStageCodegen (49) + ReusedExchange [d_date_sk,d_year] #16 + WholeStageCodegen (45) Sort [wr_order_number,wr_item_sk] InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #12 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt index 1d8aab417f188..ae7442399ebd4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt @@ -1,121 +1,113 @@ == Physical Plan == -TakeOrderedAndProject (117) -+- * Project (116) - +- * BroadcastHashJoin Inner BuildRight (115) - :- * HashAggregate (63) - : +- Exchange (62) - : +- * HashAggregate (61) - : +- * HashAggregate (60) - : +- Exchange (59) - : +- * HashAggregate (58) - : +- Union (57) - : :- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- Union (38) - : : :- * Project (22) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.item (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.date_dim (11) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_returns (17) - : : +- * Project (37) - : : +- * BroadcastHashJoin LeftOuter BuildRight (36) - : : :- * Project (31) - : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : :- * Project (28) - : : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : : :- * Filter (25) - : : : : : +- * ColumnarToRow (24) - : : : : : +- Scan parquet default.store_sales (23) - : : : : +- ReusedExchange (26) - : : : +- ReusedExchange (29) - : : +- BroadcastExchange (35) - : : +- * Filter (34) - : : +- * ColumnarToRow (33) - : : +- Scan parquet default.store_returns (32) - : +- * Project (56) - : +- * BroadcastHashJoin LeftOuter BuildRight (55) - : :- * Project (50) - : : +- * BroadcastHashJoin Inner BuildRight (49) - : : :- * Project (47) - : : : +- * BroadcastHashJoin Inner BuildRight (46) - : : : :- * Filter (44) - : : : : +- * ColumnarToRow (43) - : : : : +- Scan parquet default.web_sales (42) - : : : +- ReusedExchange (45) - : : +- ReusedExchange (48) - : +- BroadcastExchange (54) - : +- * Filter (53) - : +- * ColumnarToRow (52) - : +- Scan parquet default.web_returns (51) - +- BroadcastExchange (114) - +- * HashAggregate (113) - +- Exchange (112) - +- * HashAggregate (111) - +- * HashAggregate (110) - +- Exchange (109) - +- * HashAggregate (108) - +- Union (107) - :- * HashAggregate (94) - : +- Exchange (93) - : +- * HashAggregate (92) - : +- Union (91) - : :- * Project (78) - : : +- * BroadcastHashJoin LeftOuter BuildRight (77) - : : :- * Project (75) - : : : +- * BroadcastHashJoin Inner BuildRight (74) - : : : :- * Project (69) - : : : : +- * BroadcastHashJoin Inner BuildRight (68) - : : : : :- * Filter (66) - : : : : : +- * ColumnarToRow (65) - : : : : : +- Scan parquet default.catalog_sales (64) - : : : : +- ReusedExchange (67) - : : : +- BroadcastExchange (73) - : : : +- * Filter (72) - : : : +- * ColumnarToRow (71) - : : : +- Scan parquet default.date_dim (70) - : : +- ReusedExchange (76) - : +- * Project (90) - : +- * BroadcastHashJoin LeftOuter BuildRight (89) - : :- * Project (87) - : : +- * BroadcastHashJoin Inner BuildRight (86) - : : :- * Project (84) - : : : +- * BroadcastHashJoin Inner BuildRight (83) - : : : :- * Filter (81) - : : : : +- * ColumnarToRow (80) - : : : : +- Scan parquet default.store_sales (79) - : : : +- ReusedExchange (82) - : : +- ReusedExchange (85) - : +- ReusedExchange (88) - +- * Project (106) - +- * BroadcastHashJoin LeftOuter BuildRight (105) - :- * Project (103) - : +- * BroadcastHashJoin Inner BuildRight (102) - : :- * Project (100) - : : +- * BroadcastHashJoin Inner BuildRight (99) - : : :- * Filter (97) - : : : +- * ColumnarToRow (96) - : : : +- Scan parquet default.web_sales (95) - : : +- ReusedExchange (98) - : +- ReusedExchange (101) - +- ReusedExchange (104) +TakeOrderedAndProject (109) ++- * Project (108) + +- * BroadcastHashJoin Inner BuildRight (107) + :- * HashAggregate (59) + : +- Exchange (58) + : +- * HashAggregate (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- Union (53) + : :- * Project (22) + : : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.date_dim (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.catalog_returns (17) + : :- * Project (37) + : : +- * BroadcastHashJoin LeftOuter BuildRight (36) + : : :- * Project (31) + : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : :- * Project (28) + : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : :- * Filter (25) + : : : : : +- * ColumnarToRow (24) + : : : : : +- Scan parquet default.store_sales (23) + : : : : +- ReusedExchange (26) + : : : +- ReusedExchange (29) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.store_returns (32) + : +- * Project (52) + : +- * BroadcastHashJoin LeftOuter BuildRight (51) + : :- * Project (46) + : : +- * BroadcastHashJoin Inner BuildRight (45) + : : :- * Project (43) + : : : +- * BroadcastHashJoin Inner BuildRight (42) + : : : :- * Filter (40) + : : : : +- * ColumnarToRow (39) + : : : : +- Scan parquet default.web_sales (38) + : : : +- ReusedExchange (41) + : : +- ReusedExchange (44) + : +- BroadcastExchange (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet default.web_returns (47) + +- BroadcastExchange (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * HashAggregate (102) + +- Exchange (101) + +- * HashAggregate (100) + +- Union (99) + :- * Project (74) + : +- * BroadcastHashJoin LeftOuter BuildRight (73) + : :- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- * Project (65) + : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : :- * Filter (62) + : : : : +- * ColumnarToRow (61) + : : : : +- Scan parquet default.catalog_sales (60) + : : : +- ReusedExchange (63) + : : +- BroadcastExchange (69) + : : +- * Filter (68) + : : +- * ColumnarToRow (67) + : : +- Scan parquet default.date_dim (66) + : +- ReusedExchange (72) + :- * Project (86) + : +- * BroadcastHashJoin LeftOuter BuildRight (85) + : :- * Project (83) + : : +- * BroadcastHashJoin Inner BuildRight (82) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet default.store_sales (75) + : : : +- ReusedExchange (78) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + +- * Project (98) + +- * BroadcastHashJoin LeftOuter BuildRight (97) + :- * Project (95) + : +- * BroadcastHashJoin Inner BuildRight (94) + : :- * Project (92) + : : +- * BroadcastHashJoin Inner BuildRight (91) + : : :- * Filter (89) + : : : +- * ColumnarToRow (88) + : : : +- Scan parquet default.web_sales (87) + : : +- ReusedExchange (90) + : +- ReusedExchange (93) + +- ReusedExchange (96) (1) Scan parquet default.catalog_sales @@ -282,366 +274,326 @@ Join condition: None Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(38) Union - -(39) HashAggregate [codegen id : 9] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(40) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] - -(41) HashAggregate [codegen id : 10] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(42) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(38) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(43) ColumnarToRow [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(39) ColumnarToRow [codegen id : 12] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] -(44) Filter [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) +(40) Filter [codegen id : 12] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] +Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35)) -(45) ReusedExchange [Reuses operator id: 8] +(41) ReusedExchange [Reuses operator id: 8] Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(46) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_item_sk#37] +(42) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_item_sk#36] Right keys [1]: [i_item_sk#6] Join condition: None -(47) Project [codegen id : 14] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +(43) Project [codegen id : 12] +Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -(48) ReusedExchange [Reuses operator id: 14] +(44) ReusedExchange [Reuses operator id: 14] Output [2]: [d_date_sk#13, d_year#14] -(49) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#36] +(45) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ws_sold_date_sk#35] Right keys [1]: [d_date_sk#13] Join condition: None -(50) Project [codegen id : 14] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] +(46) Project [codegen id : 12] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] -(51) Scan parquet default.web_returns -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(47) Scan parquet default.web_returns +Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct -(52) ColumnarToRow [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(48) ColumnarToRow [codegen id : 11] +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(53) Filter [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) +(49) Filter [codegen id : 11] +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] +Condition : (isnotnull(wr_order_number#41) AND isnotnull(wr_item_sk#40)) -(54) BroadcastExchange -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] +(50) BroadcastExchange +Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#44] -(55) BroadcastHashJoin [codegen id : 14] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] +(51) BroadcastHashJoin [codegen id : 12] +Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)] +Right keys [2]: [wr_order_number#41, wr_item_sk#40] Join condition: None -(56) Project [codegen id : 14] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(52) Project [codegen id : 12] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46] +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(57) Union +(53) Union -(58) HashAggregate [codegen id : 15] +(54) HashAggregate [codegen id : 13] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -(59) Exchange +(55) Exchange Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#47] -(60) HashAggregate [codegen id : 16] +(56) HashAggregate [codegen id : 14] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -(61) HashAggregate [codegen id : 16] +(57) HashAggregate [codegen id : 14] Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#49, sum#50] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Aggregate Attributes [2]: [sum#48, sum#49] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] -(62) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] +(58) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#52] -(63) HashAggregate [codegen id : 34] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +(59) HashAggregate [codegen id : 30] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51] Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#53, sum(UnscaledValue(sales_amt#22))#54] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#53 AS sales_cnt#55, MakeDecimal(sum(UnscaledValue(sales_amt#22))#54,18,2) AS sales_amt#56] -(64) Scan parquet default.catalog_sales +(60) Scan parquet default.catalog_sales Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(65) ColumnarToRow [codegen id : 20] +(61) ColumnarToRow [codegen id : 18] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -(66) Filter [codegen id : 20] +(62) Filter [codegen id : 18] Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) -(67) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(63) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(68) BroadcastHashJoin [codegen id : 20] +(64) BroadcastHashJoin [codegen id : 18] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#58] +Right keys [1]: [i_item_sk#57] Join condition: None -(69) Project [codegen id : 20] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(65) Project [codegen id : 18] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(70) Scan parquet default.date_dim -Output [2]: [d_date_sk#63, d_year#64] +(66) Scan parquet default.date_dim +Output [2]: [d_date_sk#62, d_year#63] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct -(71) ColumnarToRow [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] +(67) ColumnarToRow [codegen id : 16] +Input [2]: [d_date_sk#62, d_year#63] -(72) Filter [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] -Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) +(68) Filter [codegen id : 16] +Input [2]: [d_date_sk#62, d_year#63] +Condition : ((isnotnull(d_year#63) AND (d_year#63 = 2001)) AND isnotnull(d_date_sk#62)) -(73) BroadcastExchange -Input [2]: [d_date_sk#63, d_year#64] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] +(69) BroadcastExchange +Input [2]: [d_date_sk#62, d_year#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] -(74) BroadcastHashJoin [codegen id : 20] +(70) BroadcastHashJoin [codegen id : 18] Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#63] +Right keys [1]: [d_date_sk#62] Join condition: None -(75) Project [codegen id : 20] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(71) Project [codegen id : 18] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(76) ReusedExchange [Reuses operator id: 20] +(72) ReusedExchange [Reuses operator id: 20] Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -(77) BroadcastHashJoin [codegen id : 20] +(73) BroadcastHashJoin [codegen id : 18] Left keys [2]: [cs_order_number#3, cs_item_sk#2] Right keys [2]: [cr_order_number#17, cr_item_sk#16] Join condition: None -(78) Project [codegen id : 20] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +(74) Project [codegen id : 18] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -(79) Scan parquet default.store_sales +(75) Scan parquet default.store_sales Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(80) ColumnarToRow [codegen id : 24] +(76) ColumnarToRow [codegen id : 22] Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -(81) Filter [codegen id : 24] +(77) Filter [codegen id : 22] Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) -(82) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(83) BroadcastHashJoin [codegen id : 24] +(79) BroadcastHashJoin [codegen id : 22] Left keys [1]: [ss_item_sk#24] -Right keys [1]: [i_item_sk#58] +Right keys [1]: [i_item_sk#57] Join condition: None -(84) Project [codegen id : 24] -Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(80) Project [codegen id : 22] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(85) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] +(81) ReusedExchange [Reuses operator id: 69] +Output [2]: [d_date_sk#62, d_year#63] -(86) BroadcastHashJoin [codegen id : 24] +(82) BroadcastHashJoin [codegen id : 22] Left keys [1]: [ss_sold_date_sk#23] -Right keys [1]: [d_date_sk#63] +Right keys [1]: [d_date_sk#62] Join condition: None -(87) Project [codegen id : 24] -Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(83) Project [codegen id : 22] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(88) ReusedExchange [Reuses operator id: 35] +(84) ReusedExchange [Reuses operator id: 35] Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(89) BroadcastHashJoin [codegen id : 24] +(85) BroadcastHashJoin [codegen id : 22] Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] Join condition: None -(90) Project [codegen id : 24] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] -Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(91) Union - -(92) HashAggregate [codegen id : 25] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(93) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] - -(94) HashAggregate [codegen id : 26] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(86) Project [codegen id : 22] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -(95) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(87) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] ReadSchema: struct -(96) ColumnarToRow [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +(88) ColumnarToRow [codegen id : 26] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] -(97) Filter [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) +(89) Filter [codegen id : 26] +Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39] +Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35)) -(98) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(90) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(99) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_item_sk#37] -Right keys [1]: [i_item_sk#58] +(91) BroadcastHashJoin [codegen id : 26] +Left keys [1]: [ws_item_sk#36] +Right keys [1]: [i_item_sk#57] Join condition: None -(100) Project [codegen id : 30] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(92) Project [codegen id : 26] +Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] -(101) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] +(93) ReusedExchange [Reuses operator id: 69] +Output [2]: [d_date_sk#62, d_year#63] -(102) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#63] +(94) BroadcastHashJoin [codegen id : 26] +Left keys [1]: [ws_sold_date_sk#35] +Right keys [1]: [d_date_sk#62] Join condition: None -(103) Project [codegen id : 30] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] +(95) Project [codegen id : 26] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63] +Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63] -(104) ReusedExchange [Reuses operator id: 54] -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(96) ReusedExchange [Reuses operator id: 50] +Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(105) BroadcastHashJoin [codegen id : 30] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] +(97) BroadcastHashJoin [codegen id : 26] +Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)] +Right keys [2]: [wr_order_number#41, wr_item_sk#40] Join condition: None -(106) Project [codegen id : 30] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +(98) Project [codegen id : 26] +Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#67, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#68] +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43] -(107) Union +(99) Union -(108) HashAggregate [codegen id : 31] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(100) HashAggregate [codegen id : 27] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] -(109) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] +(101) Exchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#69] -(110) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +(102) HashAggregate [codegen id : 28] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] -(111) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(103) HashAggregate [codegen id : 28] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#72, sum#73] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Aggregate Attributes [2]: [sum#70, sum#71] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] -(112) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] +(104) Exchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] +Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, 5), ENSURE_REQUIREMENTS, [id=#74] -(113) HashAggregate [codegen id : 33] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +(105) HashAggregate [codegen id : 29] +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73] +Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#75, sum(UnscaledValue(sales_amt#22))#76] +Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum(cast(sales_cnt#21 as bigint))#75 AS sales_cnt#77, MakeDecimal(sum(UnscaledValue(sales_amt#22))#76,18,2) AS sales_amt#78] -(114) BroadcastExchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] +(106) BroadcastExchange +Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#79] -(115) BroadcastHashJoin [codegen id : 34] +(107) BroadcastHashJoin [codegen id : 30] Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#55 as decimal(17,2))) / promote_precision(cast(sales_cnt#77 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) -(116) Project [codegen id : 34] -Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] -Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] +(108) Project [codegen id : 30] +Output [10]: [d_year#63 AS prev_year#80, d_year#14 AS year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#77 AS prev_yr_cnt#82, sales_cnt#55 AS curr_yr_cnt#83, (sales_cnt#55 - sales_cnt#77) AS sales_cnt_diff#84, CheckOverflow((promote_precision(cast(sales_amt#56 as decimal(19,2))) - promote_precision(cast(sales_amt#78 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#85] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#55, sales_amt#56, d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78] -(117) TakeOrderedAndProject -Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] -Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST, sales_amt_diff#87 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] +(109) TakeOrderedAndProject +Input [10]: [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85] +Arguments: 100, [sales_cnt_diff#84 ASC NULLS FIRST, sales_amt_diff#85 ASC NULLS FIRST], [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt index d1c20801ec5fd..068187c44771a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt @@ -1,83 +1,75 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt] - WholeStageCodegen (34) + WholeStageCodegen (30) Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 - WholeStageCodegen (16) + WholeStageCodegen (14) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #2 - WholeStageCodegen (15) + WholeStageCodegen (13) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen (10) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 - WholeStageCodegen (9) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Union - WholeStageCodegen (4) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - WholeStageCodegen (8) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + WholeStageCodegen (4) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk,d_year] #5 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - WholeStageCodegen (14) + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + WholeStageCodegen (8) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + WholeStageCodegen (12) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] @@ -89,79 +81,71 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 InputAdapter - ReusedExchange [d_date_sk,d_year] #5 + ReusedExchange [d_date_sk,d_year] #4 InputAdapter - BroadcastExchange #8 - WholeStageCodegen (13) + BroadcastExchange #7 + WholeStageCodegen (11) Filter [wr_order_number,wr_item_sk] ColumnarToRow InputAdapter Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] InputAdapter - BroadcastExchange #9 - WholeStageCodegen (33) + BroadcastExchange #8 + WholeStageCodegen (29) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #10 - WholeStageCodegen (32) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9 + WholeStageCodegen (28) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #11 - WholeStageCodegen (31) + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10 + WholeStageCodegen (27) HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union + WholeStageCodegen (18) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (16) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #5 + WholeStageCodegen (22) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #6 WholeStageCodegen (26) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #12 - WholeStageCodegen (25) - HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] - InputAdapter - Union - WholeStageCodegen (20) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (18) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6 - WholeStageCodegen (24) - Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 - InputAdapter - ReusedExchange [d_date_sk,d_year] #13 - InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 - WholeStageCodegen (30) Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] @@ -173,8 +157,8 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i InputAdapter Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3 InputAdapter - ReusedExchange [d_date_sk,d_year] #13 + ReusedExchange [d_date_sk,d_year] #11 InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt index ac49cc0548c08..56a010e2ddb91 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt @@ -1,112 +1,108 @@ == Physical Plan == -TakeOrderedAndProject (108) -+- * HashAggregate (107) - +- Exchange (106) - +- * HashAggregate (105) - +- Union (104) - :- * HashAggregate (98) - : +- Exchange (97) - : +- * HashAggregate (96) - : +- Union (95) - : :- * HashAggregate (89) - : : +- Exchange (88) - : : +- * HashAggregate (87) - : : +- Union (86) - : : :- * Project (34) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (33) - : : : :- * HashAggregate (19) - : : : : +- Exchange (18) - : : : : +- * HashAggregate (17) - : : : : +- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.date_dim (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store (11) - : : : +- BroadcastExchange (32) - : : : +- * HashAggregate (31) - : : : +- Exchange (30) - : : : +- * HashAggregate (29) - : : : +- * Project (28) - : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : :- * Project (25) - : : : : +- * BroadcastHashJoin Inner BuildRight (24) - : : : : :- * Filter (22) - : : : : : +- * ColumnarToRow (21) - : : : : : +- Scan parquet default.store_returns (20) - : : : : +- ReusedExchange (23) - : : : +- ReusedExchange (26) - : : :- * Project (55) - : : : +- BroadcastNestedLoopJoin Inner BuildRight (54) - : : : :- * HashAggregate (43) - : : : : +- Exchange (42) - : : : : +- * HashAggregate (41) - : : : : +- * Project (40) - : : : : +- * BroadcastHashJoin Inner BuildRight (39) - : : : : :- * Filter (37) - : : : : : +- * ColumnarToRow (36) - : : : : : +- Scan parquet default.catalog_sales (35) - : : : : +- ReusedExchange (38) - : : : +- BroadcastExchange (53) - : : : +- * HashAggregate (52) - : : : +- Exchange (51) - : : : +- * HashAggregate (50) - : : : +- * Project (49) - : : : +- * BroadcastHashJoin Inner BuildRight (48) - : : : :- * Filter (46) - : : : : +- * ColumnarToRow (45) - : : : : +- Scan parquet default.catalog_returns (44) - : : : +- ReusedExchange (47) - : : +- * Project (85) - : : +- * BroadcastHashJoin LeftOuter BuildRight (84) - : : :- * HashAggregate (70) - : : : +- Exchange (69) - : : : +- * HashAggregate (68) - : : : +- * Project (67) - : : : +- * BroadcastHashJoin Inner BuildRight (66) - : : : :- * Project (61) - : : : : +- * BroadcastHashJoin Inner BuildRight (60) - : : : : :- * Filter (58) - : : : : : +- * ColumnarToRow (57) - : : : : : +- Scan parquet default.web_sales (56) - : : : : +- ReusedExchange (59) - : : : +- BroadcastExchange (65) - : : : +- * Filter (64) - : : : +- * ColumnarToRow (63) - : : : +- Scan parquet default.web_page (62) - : : +- BroadcastExchange (83) - : : +- * HashAggregate (82) - : : +- Exchange (81) - : : +- * HashAggregate (80) - : : +- * Project (79) - : : +- * BroadcastHashJoin Inner BuildRight (78) - : : :- * Project (76) - : : : +- * BroadcastHashJoin Inner BuildRight (75) - : : : :- * Filter (73) - : : : : +- * ColumnarToRow (72) - : : : : +- Scan parquet default.web_returns (71) - : : : +- ReusedExchange (74) - : : +- ReusedExchange (77) - : +- * HashAggregate (94) - : +- Exchange (93) - : +- * HashAggregate (92) - : +- * HashAggregate (91) - : +- ReusedExchange (90) - +- * HashAggregate (103) - +- Exchange (102) - +- * HashAggregate (101) - +- * HashAggregate (100) - +- ReusedExchange (99) +TakeOrderedAndProject (104) ++- * HashAggregate (103) + +- Exchange (102) + +- * HashAggregate (101) + +- Union (100) + :- * HashAggregate (89) + : +- Exchange (88) + : +- * HashAggregate (87) + : +- Union (86) + : :- * Project (34) + : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : :- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * Project (28) + : : +- * BroadcastHashJoin Inner BuildRight (27) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.store_returns (20) + : : : +- ReusedExchange (23) + : : +- ReusedExchange (26) + : :- * Project (55) + : : +- BroadcastNestedLoopJoin Inner BuildRight (54) + : : :- * HashAggregate (43) + : : : +- Exchange (42) + : : : +- * HashAggregate (41) + : : : +- * Project (40) + : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : :- * Filter (37) + : : : : +- * ColumnarToRow (36) + : : : : +- Scan parquet default.catalog_sales (35) + : : : +- ReusedExchange (38) + : : +- BroadcastExchange (53) + : : +- * HashAggregate (52) + : : +- Exchange (51) + : : +- * HashAggregate (50) + : : +- * Project (49) + : : +- * BroadcastHashJoin Inner BuildRight (48) + : : :- * Filter (46) + : : : +- * ColumnarToRow (45) + : : : +- Scan parquet default.catalog_returns (44) + : : +- ReusedExchange (47) + : +- * Project (85) + : +- * BroadcastHashJoin LeftOuter BuildRight (84) + : :- * HashAggregate (70) + : : +- Exchange (69) + : : +- * HashAggregate (68) + : : +- * Project (67) + : : +- * BroadcastHashJoin Inner BuildRight (66) + : : :- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet default.web_sales (56) + : : : +- ReusedExchange (59) + : : +- BroadcastExchange (65) + : : +- * Filter (64) + : : +- * ColumnarToRow (63) + : : +- Scan parquet default.web_page (62) + : +- BroadcastExchange (83) + : +- * HashAggregate (82) + : +- Exchange (81) + : +- * HashAggregate (80) + : +- * Project (79) + : +- * BroadcastHashJoin Inner BuildRight (78) + : :- * Project (76) + : : +- * BroadcastHashJoin Inner BuildRight (75) + : : :- * Filter (73) + : : : +- * ColumnarToRow (72) + : : : +- Scan parquet default.web_returns (71) + : : +- ReusedExchange (74) + : +- ReusedExchange (77) + :- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- * HashAggregate (91) + : +- ReusedExchange (90) + +- * HashAggregate (99) + +- Exchange (98) + +- * HashAggregate (97) + +- * HashAggregate (96) + +- ReusedExchange (95) (1) Scan parquet default.store_sales @@ -190,7 +186,7 @@ Results [3]: [s_store_sk#8, sum#12, sum#13] (18) Exchange Input [3]: [s_store_sk#8, sum#12, sum#13] -Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] +Arguments: hashpartitioning(s_store_sk#8, 5), ENSURE_REQUIREMENTS, [id=#14] (19) HashAggregate [codegen id : 8] Input [3]: [s_store_sk#8, sum#12, sum#13] @@ -246,7 +242,7 @@ Results [3]: [s_store_sk#23, sum#26, sum#27] (30) Exchange Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] +Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] (31) HashAggregate [codegen id : 7] Input [3]: [s_store_sk#23, sum#26, sum#27] @@ -303,7 +299,7 @@ Results [3]: [cs_call_center_sk#39, sum#44, sum#45] (42) Exchange Input [3]: [cs_call_center_sk#39, sum#44, sum#45] -Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), ENSURE_REQUIREMENTS, [id=#46] (43) HashAggregate [codegen id : 11] Input [3]: [cs_call_center_sk#39, sum#44, sum#45] @@ -347,7 +343,7 @@ Results [2]: [sum#56, sum#57] (51) Exchange Input [2]: [sum#56, sum#57] -Arguments: SinglePartition, true, [id=#58] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58] (52) HashAggregate [codegen id : 14] Input [2]: [sum#56, sum#57] @@ -429,7 +425,7 @@ Results [3]: [wp_web_page_sk#71, sum#75, sum#76] (69) Exchange Input [3]: [wp_web_page_sk#71, sum#75, sum#76] -Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), ENSURE_REQUIREMENTS, [id=#77] (70) HashAggregate [codegen id : 23] Input [3]: [wp_web_page_sk#71, sum#75, sum#76] @@ -485,7 +481,7 @@ Results [3]: [wp_web_page_sk#86, sum#89, sum#90] (81) Exchange Input [3]: [wp_web_page_sk#86, sum#89, sum#90] -Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91] (82) HashAggregate [codegen id : 22] Input [3]: [wp_web_page_sk#86, sum#89, sum#90] @@ -518,7 +514,7 @@ Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum (88) Exchange Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] -Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#113] (89) HashAggregate [codegen id : 25] Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] @@ -546,7 +542,7 @@ Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, i (93) Exchange Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] -Arguments: hashpartitioning(channel#34, 5), true, [id=#145] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#145] (94) HashAggregate [codegen id : 51] Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] @@ -555,75 +551,55 @@ Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148] Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152] -(95) Union +(95) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] -(96) HashAggregate [codegen id : 52] -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] - -(97) Exchange -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153] - -(98) HashAggregate [codegen id : 53] -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] - -(99) ReusedExchange [Reuses operator id: 88] -Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] - -(100) HashAggregate [codegen id : 78] -Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +(96) HashAggregate [codegen id : 76] +Input [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)] -Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163] -Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#159)] +Aggregate Attributes [3]: [sum(sales#17)#160, sum(returns#36)#161, sum(profit#159)#162] +Results [3]: [sum(sales#17)#160 AS sales#130, sum(returns#36)#161 AS returns#131, sum(profit#159)#162 AS profit#132] -(101) HashAggregate [codegen id : 78] +(97) HashAggregate [codegen id : 76] Input [3]: [sales#130, returns#131, profit#132] Keys: [] Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] -Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] -Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Aggregate Attributes [6]: [sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] +Results [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] -(102) Exchange -Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] -Arguments: SinglePartition, true, [id=#176] +(98) Exchange +Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#175] -(103) HashAggregate [codegen id : 79] -Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +(99) HashAggregate [codegen id : 77] +Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] Keys: [] Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] -Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179] -Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184] +Aggregate Attributes [3]: [sum(sales#130)#176, sum(returns#131)#177, sum(profit#132)#178] +Results [5]: [null AS channel#179, null AS id#180, sum(sales#130)#176 AS sales#181, sum(returns#131)#177 AS returns#182, sum(profit#132)#178 AS profit#183] -(104) Union +(100) Union -(105) HashAggregate [codegen id : 80] +(101) HashAggregate [codegen id : 78] Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Functions: [] Aggregate Attributes: [] Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -(106) Exchange +(102) Exchange Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), ENSURE_REQUIREMENTS, [id=#184] -(107) HashAggregate [codegen id : 81] +(103) HashAggregate [codegen id : 79] Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Functions: [] Aggregate Attributes: [] Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -(108) TakeOrderedAndProject +(104) TakeOrderedAndProject Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt index 92c25891f940e..3a5d78047c24b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt @@ -1,172 +1,164 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (81) + WholeStageCodegen (79) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (80) + WholeStageCodegen (78) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (53) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (25) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (52) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (24) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (25) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (24) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (8) - Project [s_store_sk,sales,returns,profit,profit_loss] - BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + WholeStageCodegen (8) + Project [s_store_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #3 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter - Exchange [s_store_sk] #4 - WholeStageCodegen (3) - HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,s_store_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] - InputAdapter - Exchange [s_store_sk] #8 - WholeStageCodegen (6) - HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] - Project [sr_return_amt,sr_net_loss,s_store_sk] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_return_amt,sr_net_loss,s_store_sk] - BroadcastHashJoin [sr_store_sk,s_store_sk] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] - InputAdapter - ReusedExchange [s_store_sk] #6 - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (15) - Project [cs_call_center_sk,sales,returns,profit,profit_loss] + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen (11) - HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + InputAdapter + ReusedExchange [s_store_sk] #5 + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (15) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #8 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow InputAdapter - Exchange [cs_call_center_sk] #9 - WholeStageCodegen (10) - HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] - Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] - InputAdapter - ReusedExchange [d_date_sk] #5 - BroadcastExchange #10 - WholeStageCodegen (14) - HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + InputAdapter + ReusedExchange [d_date_sk] #4 + BroadcastExchange #9 + WholeStageCodegen (14) + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow InputAdapter - Exchange #11 - WholeStageCodegen (13) - HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] - Project [cr_return_amount,cr_net_loss] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Filter [cr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (23) - Project [wp_web_page_sk,sales,returns,profit,profit_loss] - BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (23) + Project [wp_web_page_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #11 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] InputAdapter - Exchange [wp_web_page_sk] #12 - WholeStageCodegen (18) - HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] - Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (17) - Filter [wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (22) - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow InputAdapter - Exchange [wp_web_page_sk] #15 - WholeStageCodegen (21) - HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] - Project [wr_return_amt,wr_net_loss,wp_web_page_sk] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_return_amt,wr_net_loss,wp_web_page_sk] - BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] - Filter [wr_returned_date_sk,wr_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] - InputAdapter - ReusedExchange [wp_web_page_sk] #13 - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (51) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel] #16 - WholeStageCodegen (50) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (22) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (79) + Exchange [wp_web_page_sk] #14 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [wp_web_page_sk] #12 + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (51) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (50) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (77) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #17 - WholeStageCodegen (78) + Exchange #16 + WholeStageCodegen (76) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt index c18698ebc5b45..2d3ca673c2b08 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt @@ -1,112 +1,108 @@ == Physical Plan == -TakeOrderedAndProject (108) -+- * HashAggregate (107) - +- Exchange (106) - +- * HashAggregate (105) - +- Union (104) - :- * HashAggregate (98) - : +- Exchange (97) - : +- * HashAggregate (96) - : +- Union (95) - : :- * HashAggregate (89) - : : +- Exchange (88) - : : +- * HashAggregate (87) - : : +- Union (86) - : : :- * Project (34) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (33) - : : : :- * HashAggregate (19) - : : : : +- Exchange (18) - : : : : +- * HashAggregate (17) - : : : : +- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.date_dim (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store (11) - : : : +- BroadcastExchange (32) - : : : +- * HashAggregate (31) - : : : +- Exchange (30) - : : : +- * HashAggregate (29) - : : : +- * Project (28) - : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : :- * Project (25) - : : : : +- * BroadcastHashJoin Inner BuildRight (24) - : : : : :- * Filter (22) - : : : : : +- * ColumnarToRow (21) - : : : : : +- Scan parquet default.store_returns (20) - : : : : +- ReusedExchange (23) - : : : +- ReusedExchange (26) - : : :- * Project (55) - : : : +- BroadcastNestedLoopJoin Inner BuildLeft (54) - : : : :- BroadcastExchange (44) - : : : : +- * HashAggregate (43) - : : : : +- Exchange (42) - : : : : +- * HashAggregate (41) - : : : : +- * Project (40) - : : : : +- * BroadcastHashJoin Inner BuildRight (39) - : : : : :- * Filter (37) - : : : : : +- * ColumnarToRow (36) - : : : : : +- Scan parquet default.catalog_sales (35) - : : : : +- ReusedExchange (38) - : : : +- * HashAggregate (53) - : : : +- Exchange (52) - : : : +- * HashAggregate (51) - : : : +- * Project (50) - : : : +- * BroadcastHashJoin Inner BuildRight (49) - : : : :- * Filter (47) - : : : : +- * ColumnarToRow (46) - : : : : +- Scan parquet default.catalog_returns (45) - : : : +- ReusedExchange (48) - : : +- * Project (85) - : : +- * BroadcastHashJoin LeftOuter BuildRight (84) - : : :- * HashAggregate (70) - : : : +- Exchange (69) - : : : +- * HashAggregate (68) - : : : +- * Project (67) - : : : +- * BroadcastHashJoin Inner BuildRight (66) - : : : :- * Project (61) - : : : : +- * BroadcastHashJoin Inner BuildRight (60) - : : : : :- * Filter (58) - : : : : : +- * ColumnarToRow (57) - : : : : : +- Scan parquet default.web_sales (56) - : : : : +- ReusedExchange (59) - : : : +- BroadcastExchange (65) - : : : +- * Filter (64) - : : : +- * ColumnarToRow (63) - : : : +- Scan parquet default.web_page (62) - : : +- BroadcastExchange (83) - : : +- * HashAggregate (82) - : : +- Exchange (81) - : : +- * HashAggregate (80) - : : +- * Project (79) - : : +- * BroadcastHashJoin Inner BuildRight (78) - : : :- * Project (76) - : : : +- * BroadcastHashJoin Inner BuildRight (75) - : : : :- * Filter (73) - : : : : +- * ColumnarToRow (72) - : : : : +- Scan parquet default.web_returns (71) - : : : +- ReusedExchange (74) - : : +- ReusedExchange (77) - : +- * HashAggregate (94) - : +- Exchange (93) - : +- * HashAggregate (92) - : +- * HashAggregate (91) - : +- ReusedExchange (90) - +- * HashAggregate (103) - +- Exchange (102) - +- * HashAggregate (101) - +- * HashAggregate (100) - +- ReusedExchange (99) +TakeOrderedAndProject (104) ++- * HashAggregate (103) + +- Exchange (102) + +- * HashAggregate (101) + +- Union (100) + :- * HashAggregate (89) + : +- Exchange (88) + : +- * HashAggregate (87) + : +- Union (86) + : :- * Project (34) + : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : :- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.store (11) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * Project (28) + : : +- * BroadcastHashJoin Inner BuildRight (27) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.store_returns (20) + : : : +- ReusedExchange (23) + : : +- ReusedExchange (26) + : :- * Project (55) + : : +- BroadcastNestedLoopJoin Inner BuildLeft (54) + : : :- BroadcastExchange (44) + : : : +- * HashAggregate (43) + : : : +- Exchange (42) + : : : +- * HashAggregate (41) + : : : +- * Project (40) + : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : :- * Filter (37) + : : : : +- * ColumnarToRow (36) + : : : : +- Scan parquet default.catalog_sales (35) + : : : +- ReusedExchange (38) + : : +- * HashAggregate (53) + : : +- Exchange (52) + : : +- * HashAggregate (51) + : : +- * Project (50) + : : +- * BroadcastHashJoin Inner BuildRight (49) + : : :- * Filter (47) + : : : +- * ColumnarToRow (46) + : : : +- Scan parquet default.catalog_returns (45) + : : +- ReusedExchange (48) + : +- * Project (85) + : +- * BroadcastHashJoin LeftOuter BuildRight (84) + : :- * HashAggregate (70) + : : +- Exchange (69) + : : +- * HashAggregate (68) + : : +- * Project (67) + : : +- * BroadcastHashJoin Inner BuildRight (66) + : : :- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet default.web_sales (56) + : : : +- ReusedExchange (59) + : : +- BroadcastExchange (65) + : : +- * Filter (64) + : : +- * ColumnarToRow (63) + : : +- Scan parquet default.web_page (62) + : +- BroadcastExchange (83) + : +- * HashAggregate (82) + : +- Exchange (81) + : +- * HashAggregate (80) + : +- * Project (79) + : +- * BroadcastHashJoin Inner BuildRight (78) + : :- * Project (76) + : : +- * BroadcastHashJoin Inner BuildRight (75) + : : :- * Filter (73) + : : : +- * ColumnarToRow (72) + : : : +- Scan parquet default.web_returns (71) + : : +- ReusedExchange (74) + : +- ReusedExchange (77) + :- * HashAggregate (94) + : +- Exchange (93) + : +- * HashAggregate (92) + : +- * HashAggregate (91) + : +- ReusedExchange (90) + +- * HashAggregate (99) + +- Exchange (98) + +- * HashAggregate (97) + +- * HashAggregate (96) + +- ReusedExchange (95) (1) Scan parquet default.store_sales @@ -190,7 +186,7 @@ Results [3]: [s_store_sk#8, sum#12, sum#13] (18) Exchange Input [3]: [s_store_sk#8, sum#12, sum#13] -Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] +Arguments: hashpartitioning(s_store_sk#8, 5), ENSURE_REQUIREMENTS, [id=#14] (19) HashAggregate [codegen id : 8] Input [3]: [s_store_sk#8, sum#12, sum#13] @@ -246,7 +242,7 @@ Results [3]: [s_store_sk#23, sum#26, sum#27] (30) Exchange Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] +Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] (31) HashAggregate [codegen id : 7] Input [3]: [s_store_sk#23, sum#26, sum#27] @@ -303,7 +299,7 @@ Results [3]: [cs_call_center_sk#39, sum#44, sum#45] (42) Exchange Input [3]: [cs_call_center_sk#39, sum#44, sum#45] -Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), ENSURE_REQUIREMENTS, [id=#46] (43) HashAggregate [codegen id : 11] Input [3]: [cs_call_center_sk#39, sum#44, sum#45] @@ -351,7 +347,7 @@ Results [2]: [sum#57, sum#58] (52) Exchange Input [2]: [sum#57, sum#58] -Arguments: SinglePartition, true, [id=#59] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59] (53) HashAggregate [codegen id : 14] Input [2]: [sum#57, sum#58] @@ -429,7 +425,7 @@ Results [3]: [wp_web_page_sk#71, sum#75, sum#76] (69) Exchange Input [3]: [wp_web_page_sk#71, sum#75, sum#76] -Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), ENSURE_REQUIREMENTS, [id=#77] (70) HashAggregate [codegen id : 23] Input [3]: [wp_web_page_sk#71, sum#75, sum#76] @@ -485,7 +481,7 @@ Results [3]: [wp_web_page_sk#86, sum#89, sum#90] (81) Exchange Input [3]: [wp_web_page_sk#86, sum#89, sum#90] -Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91] (82) HashAggregate [codegen id : 22] Input [3]: [wp_web_page_sk#86, sum#89, sum#90] @@ -518,7 +514,7 @@ Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum (88) Exchange Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] -Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#113] (89) HashAggregate [codegen id : 25] Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112] @@ -546,7 +542,7 @@ Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, i (93) Exchange Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] -Arguments: hashpartitioning(channel#34, 5), true, [id=#145] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#145] (94) HashAggregate [codegen id : 51] Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144] @@ -555,75 +551,55 @@ Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148] Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152] -(95) Union +(95) ReusedExchange [Reuses operator id: 88] +Output [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] -(96) HashAggregate [codegen id : 52] -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] - -(97) Exchange -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153] - -(98) HashAggregate [codegen id : 53] -Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] - -(99) ReusedExchange [Reuses operator id: 88] -Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] - -(100) HashAggregate [codegen id : 78] -Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +(96) HashAggregate [codegen id : 76] +Input [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)] -Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163] -Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132] +Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#159)] +Aggregate Attributes [3]: [sum(sales#17)#160, sum(returns#36)#161, sum(profit#159)#162] +Results [3]: [sum(sales#17)#160 AS sales#130, sum(returns#36)#161 AS returns#131, sum(profit#159)#162 AS profit#132] -(101) HashAggregate [codegen id : 78] +(97) HashAggregate [codegen id : 76] Input [3]: [sales#130, returns#131, profit#132] Keys: [] Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)] -Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] -Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +Aggregate Attributes [6]: [sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] +Results [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] -(102) Exchange -Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] -Arguments: SinglePartition, true, [id=#176] +(98) Exchange +Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#175] -(103) HashAggregate [codegen id : 79] -Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175] +(99) HashAggregate [codegen id : 77] +Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174] Keys: [] Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)] -Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179] -Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184] +Aggregate Attributes [3]: [sum(sales#130)#176, sum(returns#131)#177, sum(profit#132)#178] +Results [5]: [null AS channel#179, null AS id#180, sum(sales#130)#176 AS sales#181, sum(returns#131)#177 AS returns#182, sum(profit#132)#178 AS profit#183] -(104) Union +(100) Union -(105) HashAggregate [codegen id : 80] +(101) HashAggregate [codegen id : 78] Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Functions: [] Aggregate Attributes: [] Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -(106) Exchange +(102) Exchange Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185] +Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), ENSURE_REQUIREMENTS, [id=#184] -(107) HashAggregate [codegen id : 81] +(103) HashAggregate [codegen id : 79] Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Functions: [] Aggregate Attributes: [] Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119] -(108) TakeOrderedAndProject +(104) TakeOrderedAndProject Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119] Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt index 864039e512231..47b743fee91dd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt @@ -1,172 +1,164 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (81) + WholeStageCodegen (79) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (80) + WholeStageCodegen (78) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (53) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (25) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (52) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (24) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (25) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (24) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (8) - Project [s_store_sk,sales,returns,profit,profit_loss] - BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + WholeStageCodegen (8) + Project [s_store_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #3 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter - Exchange [s_store_sk] #4 - WholeStageCodegen (3) - HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,s_store_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] - InputAdapter - Exchange [s_store_sk] #8 - WholeStageCodegen (6) - HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] - Project [sr_return_amt,sr_net_loss,s_store_sk] - BroadcastHashJoin [sr_store_sk,s_store_sk] - Project [sr_store_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - ReusedExchange [s_store_sk] #6 - WholeStageCodegen (15) - Project [cs_call_center_sk,sales,returns,profit,profit_loss] + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - BroadcastNestedLoopJoin - BroadcastExchange #9 - WholeStageCodegen (11) - HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] InputAdapter - Exchange [cs_call_center_sk] #10 - WholeStageCodegen (10) - HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] - Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (14) - HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [s_store_sk] #5 + WholeStageCodegen (15) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + InputAdapter + BroadcastNestedLoopJoin + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #9 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (14) + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow InputAdapter - Exchange #11 - WholeStageCodegen (13) - HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] - Project [cr_return_amount,cr_net_loss] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Filter [cr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (23) - Project [wp_web_page_sk,sales,returns,profit,profit_loss] - BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] InputAdapter - Exchange [wp_web_page_sk] #12 - WholeStageCodegen (18) - HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] - Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (17) - Filter [wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (22) - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (23) + Project [wp_web_page_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #11 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow InputAdapter - Exchange [wp_web_page_sk] #15 - WholeStageCodegen (21) - HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] - Project [wr_return_amt,wr_net_loss,wp_web_page_sk] - BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] - Project [wr_web_page_sk,wr_return_amt,wr_net_loss] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Filter [wr_returned_date_sk,wr_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #5 - InputAdapter - ReusedExchange [wp_web_page_sk] #13 - WholeStageCodegen (51) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel] #16 - WholeStageCodegen (50) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Scan parquet default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (22) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (79) + Exchange [wp_web_page_sk] #14 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [wp_web_page_sk] #12 + WholeStageCodegen (51) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (50) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (77) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #17 - WholeStageCodegen (78) + Exchange #16 + WholeStageCodegen (76) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt index 025e5a6f94741..4aa23cbe8b905 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt @@ -1,129 +1,125 @@ == Physical Plan == -TakeOrderedAndProject (125) -+- * HashAggregate (124) - +- Exchange (123) - +- * HashAggregate (122) - +- Union (121) - :- * HashAggregate (115) - : +- Exchange (114) - : +- * HashAggregate (113) - : +- Union (112) - : :- * HashAggregate (106) - : : +- Exchange (105) - : : +- * HashAggregate (104) - : : +- Union (103) - : : :- * HashAggregate (42) - : : : +- Exchange (41) - : : : +- * HashAggregate (40) - : : : +- * Project (39) - : : : +- * BroadcastHashJoin Inner BuildRight (38) - : : : :- * Project (33) - : : : : +- * BroadcastHashJoin Inner BuildRight (32) - : : : : :- * Project (26) - : : : : : +- * BroadcastHashJoin Inner BuildRight (25) - : : : : : :- * Project (19) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : : : : :- * Project (12) - : : : : : : : +- SortMergeJoin LeftOuter (11) - : : : : : : : :- * Sort (5) - : : : : : : : : +- Exchange (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : : +- * Sort (10) - : : : : : : : +- Exchange (9) - : : : : : : : +- * Filter (8) - : : : : : : : +- * ColumnarToRow (7) - : : : : : : : +- Scan parquet default.store_returns (6) - : : : : : : +- BroadcastExchange (17) - : : : : : : +- * Project (16) - : : : : : : +- * Filter (15) - : : : : : : +- * ColumnarToRow (14) - : : : : : : +- Scan parquet default.item (13) - : : : : : +- BroadcastExchange (24) - : : : : : +- * Project (23) - : : : : : +- * Filter (22) - : : : : : +- * ColumnarToRow (21) - : : : : : +- Scan parquet default.promotion (20) - : : : : +- BroadcastExchange (31) - : : : : +- * Project (30) - : : : : +- * Filter (29) - : : : : +- * ColumnarToRow (28) - : : : : +- Scan parquet default.date_dim (27) - : : : +- BroadcastExchange (37) - : : : +- * Filter (36) - : : : +- * ColumnarToRow (35) - : : : +- Scan parquet default.store (34) - : : :- * HashAggregate (72) - : : : +- Exchange (71) - : : : +- * HashAggregate (70) - : : : +- * Project (69) - : : : +- * BroadcastHashJoin Inner BuildRight (68) - : : : :- * Project (63) - : : : : +- * BroadcastHashJoin Inner BuildRight (62) - : : : : :- * Project (60) - : : : : : +- * BroadcastHashJoin Inner BuildRight (59) - : : : : : :- * Project (57) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (56) - : : : : : : :- * Project (54) - : : : : : : : +- SortMergeJoin LeftOuter (53) - : : : : : : : :- * Sort (47) - : : : : : : : : +- Exchange (46) - : : : : : : : : +- * Filter (45) - : : : : : : : : +- * ColumnarToRow (44) - : : : : : : : : +- Scan parquet default.catalog_sales (43) - : : : : : : : +- * Sort (52) - : : : : : : : +- Exchange (51) - : : : : : : : +- * Filter (50) - : : : : : : : +- * ColumnarToRow (49) - : : : : : : : +- Scan parquet default.catalog_returns (48) - : : : : : : +- ReusedExchange (55) - : : : : : +- ReusedExchange (58) - : : : : +- ReusedExchange (61) - : : : +- BroadcastExchange (67) - : : : +- * Filter (66) - : : : +- * ColumnarToRow (65) - : : : +- Scan parquet default.catalog_page (64) - : : +- * HashAggregate (102) - : : +- Exchange (101) - : : +- * HashAggregate (100) - : : +- * Project (99) - : : +- * BroadcastHashJoin Inner BuildRight (98) - : : :- * Project (93) - : : : +- * BroadcastHashJoin Inner BuildRight (92) - : : : :- * Project (90) - : : : : +- * BroadcastHashJoin Inner BuildRight (89) - : : : : :- * Project (87) - : : : : : +- * BroadcastHashJoin Inner BuildRight (86) - : : : : : :- * Project (84) - : : : : : : +- SortMergeJoin LeftOuter (83) - : : : : : : :- * Sort (77) - : : : : : : : +- Exchange (76) - : : : : : : : +- * Filter (75) - : : : : : : : +- * ColumnarToRow (74) - : : : : : : : +- Scan parquet default.web_sales (73) - : : : : : : +- * Sort (82) - : : : : : : +- Exchange (81) - : : : : : : +- * Filter (80) - : : : : : : +- * ColumnarToRow (79) - : : : : : : +- Scan parquet default.web_returns (78) - : : : : : +- ReusedExchange (85) - : : : : +- ReusedExchange (88) - : : : +- ReusedExchange (91) - : : +- BroadcastExchange (97) - : : +- * Filter (96) - : : +- * ColumnarToRow (95) - : : +- Scan parquet default.web_site (94) - : +- * HashAggregate (111) - : +- Exchange (110) - : +- * HashAggregate (109) - : +- * HashAggregate (108) - : +- ReusedExchange (107) - +- * HashAggregate (120) - +- Exchange (119) - +- * HashAggregate (118) - +- * HashAggregate (117) - +- ReusedExchange (116) +TakeOrderedAndProject (121) ++- * HashAggregate (120) + +- Exchange (119) + +- * HashAggregate (118) + +- Union (117) + :- * HashAggregate (106) + : +- Exchange (105) + : +- * HashAggregate (104) + : +- Union (103) + : :- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Project (26) + : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : :- * Project (19) + : : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : : :- * Project (12) + : : : : : : +- SortMergeJoin LeftOuter (11) + : : : : : : :- * Sort (5) + : : : : : : : +- Exchange (4) + : : : : : : : +- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- * Sort (10) + : : : : : : +- Exchange (9) + : : : : : : +- * Filter (8) + : : : : : : +- * ColumnarToRow (7) + : : : : : : +- Scan parquet default.store_returns (6) + : : : : : +- BroadcastExchange (17) + : : : : : +- * Project (16) + : : : : : +- * Filter (15) + : : : : : +- * ColumnarToRow (14) + : : : : : +- Scan parquet default.item (13) + : : : : +- BroadcastExchange (24) + : : : : +- * Project (23) + : : : : +- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.promotion (20) + : : : +- BroadcastExchange (31) + : : : +- * Project (30) + : : : +- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet default.date_dim (27) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet default.store (34) + : :- * HashAggregate (72) + : : +- Exchange (71) + : : +- * HashAggregate (70) + : : +- * Project (69) + : : +- * BroadcastHashJoin Inner BuildRight (68) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * Project (60) + : : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : : :- * Project (57) + : : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : : :- * Project (54) + : : : : : : +- SortMergeJoin LeftOuter (53) + : : : : : : :- * Sort (47) + : : : : : : : +- Exchange (46) + : : : : : : : +- * Filter (45) + : : : : : : : +- * ColumnarToRow (44) + : : : : : : : +- Scan parquet default.catalog_sales (43) + : : : : : : +- * Sort (52) + : : : : : : +- Exchange (51) + : : : : : : +- * Filter (50) + : : : : : : +- * ColumnarToRow (49) + : : : : : : +- Scan parquet default.catalog_returns (48) + : : : : : +- ReusedExchange (55) + : : : : +- ReusedExchange (58) + : : : +- ReusedExchange (61) + : : +- BroadcastExchange (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet default.catalog_page (64) + : +- * HashAggregate (102) + : +- Exchange (101) + : +- * HashAggregate (100) + : +- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- * Project (93) + : : +- * BroadcastHashJoin Inner BuildRight (92) + : : :- * Project (90) + : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : :- * Project (87) + : : : : +- * BroadcastHashJoin Inner BuildRight (86) + : : : : :- * Project (84) + : : : : : +- SortMergeJoin LeftOuter (83) + : : : : : :- * Sort (77) + : : : : : : +- Exchange (76) + : : : : : : +- * Filter (75) + : : : : : : +- * ColumnarToRow (74) + : : : : : : +- Scan parquet default.web_sales (73) + : : : : : +- * Sort (82) + : : : : : +- Exchange (81) + : : : : : +- * Filter (80) + : : : : : +- * ColumnarToRow (79) + : : : : : +- Scan parquet default.web_returns (78) + : : : : +- ReusedExchange (85) + : : : +- ReusedExchange (88) + : : +- ReusedExchange (91) + : +- BroadcastExchange (97) + : +- * Filter (96) + : +- * ColumnarToRow (95) + : +- Scan parquet default.web_site (94) + :- * HashAggregate (111) + : +- Exchange (110) + : +- * HashAggregate (109) + : +- * HashAggregate (108) + : +- ReusedExchange (107) + +- * HashAggregate (116) + +- Exchange (115) + +- * HashAggregate (114) + +- * HashAggregate (113) + +- ReusedExchange (112) (1) Scan parquet default.store_sales @@ -142,7 +138,7 @@ Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND is (4) Exchange Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] -Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#8] +Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#8] (5) Sort [codegen id : 2] Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] @@ -164,7 +160,7 @@ Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) (9) Exchange Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] -Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), true, [id=#13] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [id=#13] (10) Sort [codegen id : 4] Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] @@ -308,7 +304,7 @@ Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] (41) Exchange Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_store_id#24, 5), true, [id=#36] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36] (42) HashAggregate [codegen id : 10] Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] @@ -333,7 +329,7 @@ Condition : (((isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46 (46) Exchange Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] -Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), true, [id=#52] +Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), ENSURE_REQUIREMENTS, [id=#52] (47) Sort [codegen id : 12] Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51] @@ -355,7 +351,7 @@ Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) (51) Exchange Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), true, [id=#57] +Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#57] (52) Sort [codegen id : 14] Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] @@ -442,7 +438,7 @@ Results [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty (71) Exchange Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] -Arguments: hashpartitioning(cp_catalog_page_id#59, 5), true, [id=#71] +Arguments: hashpartitioning(cp_catalog_page_id#59, 5), ENSURE_REQUIREMENTS, [id=#71] (72) HashAggregate [codegen id : 20] Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70] @@ -467,7 +463,7 @@ Condition : (((isnotnull(ws_sold_date_sk#80) AND isnotnull(ws_web_site_sk#82)) A (76) Exchange Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] -Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), true, [id=#87] +Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), ENSURE_REQUIREMENTS, [id=#87] (77) Sort [codegen id : 22] Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86] @@ -489,7 +485,7 @@ Condition : (isnotnull(wr_item_sk#88) AND isnotnull(wr_order_number#89)) (81) Exchange Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] -Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), true, [id=#92] +Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), ENSURE_REQUIREMENTS, [id=#92] (82) Sort [codegen id : 24] Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91] @@ -576,7 +572,7 @@ Results [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#10 (101) Exchange Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] -Arguments: hashpartitioning(web_site_id#94, 5), true, [id=#106] +Arguments: hashpartitioning(web_site_id#94, 5), ENSURE_REQUIREMENTS, [id=#106] (102) HashAggregate [codegen id : 30] Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105] @@ -596,7 +592,7 @@ Results [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum (105) Exchange Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] -Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#127] +Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#127] (106) HashAggregate [codegen id : 32] Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] @@ -624,7 +620,7 @@ Results [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, i (110) Exchange Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157] -Arguments: hashpartitioning(channel#40, 5), true, [id=#158] +Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#158] (111) HashAggregate [codegen id : 65] Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157] @@ -633,75 +629,55 @@ Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)] Aggregate Attributes [3]: [sum(sales#143)#159, sum(returns#144)#160, sum(profit#145)#161] Results [5]: [channel#40, null AS id#162, sum(sales#143)#159 AS sales#163, sum(returns#144)#160 AS returns#164, sum(profit#145)#161 AS profit#165] -(112) Union +(112) ReusedExchange [Reuses operator id: 105] +Output [8]: [channel#40, id#41, sum#166, isEmpty#167, sum#168, isEmpty#169, sum#170, isEmpty#171] -(113) HashAggregate [codegen id : 66] -Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] - -(114) Exchange -Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#166] - -(115) HashAggregate [codegen id : 67] -Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] - -(116) ReusedExchange [Reuses operator id: 105] -Output [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] - -(117) HashAggregate [codegen id : 99] -Input [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +(113) HashAggregate [codegen id : 97] +Input [8]: [channel#40, id#41, sum#166, isEmpty#167, sum#168, isEmpty#169, sum#170, isEmpty#171] Keys [2]: [channel#40, id#41] Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#173, sum(returns#43)#174, sum(profit#44)#175] -Results [3]: [sum(sales#42)#173 AS sales#143, sum(returns#43)#174 AS returns#144, sum(profit#44)#175 AS profit#145] +Aggregate Attributes [3]: [sum(sales#42)#172, sum(returns#43)#173, sum(profit#44)#174] +Results [3]: [sum(sales#42)#172 AS sales#143, sum(returns#43)#173 AS returns#144, sum(profit#44)#174 AS profit#145] -(118) HashAggregate [codegen id : 99] +(114) HashAggregate [codegen id : 97] Input [3]: [sales#143, returns#144, profit#145] Keys: [] Functions [3]: [partial_sum(sales#143), partial_sum(returns#144), partial_sum(profit#145)] -Aggregate Attributes [6]: [sum#176, isEmpty#177, sum#178, isEmpty#179, sum#180, isEmpty#181] -Results [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +Aggregate Attributes [6]: [sum#175, isEmpty#176, sum#177, isEmpty#178, sum#179, isEmpty#180] +Results [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186] -(119) Exchange -Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] -Arguments: SinglePartition, true, [id=#188] +(115) Exchange +Input [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#187] -(120) HashAggregate [codegen id : 100] -Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +(116) HashAggregate [codegen id : 98] +Input [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186] Keys: [] Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)] -Aggregate Attributes [3]: [sum(sales#143)#189, sum(returns#144)#190, sum(profit#145)#191] -Results [5]: [null AS channel#192, null AS id#193, sum(sales#143)#189 AS sales#194, sum(returns#144)#190 AS returns#195, sum(profit#145)#191 AS profit#196] +Aggregate Attributes [3]: [sum(sales#143)#188, sum(returns#144)#189, sum(profit#145)#190] +Results [5]: [null AS channel#191, null AS id#192, sum(sales#143)#188 AS sales#193, sum(returns#144)#189 AS returns#194, sum(profit#145)#190 AS profit#195] -(121) Union +(117) Union -(122) HashAggregate [codegen id : 101] +(118) HashAggregate [codegen id : 99] Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -(123) Exchange +(119) Exchange Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#197] +Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), ENSURE_REQUIREMENTS, [id=#196] -(124) HashAggregate [codegen id : 102] +(120) HashAggregate [codegen id : 100] Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133] Functions: [] Aggregate Attributes: [] Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133] -(125) TakeOrderedAndProject +(121) TakeOrderedAndProject Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133] Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#131, returns#132, profit#133] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt index ad59968740aaa..c26c5b81259e6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt @@ -1,205 +1,197 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (102) + WholeStageCodegen (100) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (101) + WholeStageCodegen (99) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (67) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (32) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (66) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (31) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (32) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + WholeStageCodegen (10) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (31) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (10) - HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [s_store_id] #4 - WholeStageCodegen (9) - HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - InputAdapter - SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - WholeStageCodegen (2) - Sort [ss_item_sk,ss_ticket_number] - InputAdapter - Exchange [ss_item_sk,ss_ticket_number] #5 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] - WholeStageCodegen (4) - Sort [sr_item_sk,sr_ticket_number] - InputAdapter - Exchange [sr_item_sk,sr_ticket_number] #6 - WholeStageCodegen (3) - Filter [sr_item_sk,sr_ticket_number] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (5) - Project [i_item_sk] - Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (6) - Project [p_promo_sk] - Filter [p_channel_tv,p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_tv] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen (7) - Project [d_date_sk] - Filter [d_date,d_date_sk] + Exchange [s_store_id] #3 + WholeStageCodegen (9) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + InputAdapter + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #4 + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (8) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] - WholeStageCodegen (20) - HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [cp_catalog_page_id] #11 - WholeStageCodegen (19) - HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] - BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] - Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - InputAdapter - SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - WholeStageCodegen (12) - Sort [cs_item_sk,cs_order_number] - InputAdapter - Exchange [cs_item_sk,cs_order_number] #12 - WholeStageCodegen (11) - Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] - WholeStageCodegen (14) - Sort [cr_item_sk,cr_order_number] - InputAdapter - Exchange [cr_item_sk,cr_order_number] #13 - WholeStageCodegen (13) - Filter [cr_item_sk,cr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [i_item_sk] #7 + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #5 + WholeStageCodegen (3) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow InputAdapter - ReusedExchange [p_promo_sk] #8 + Scan parquet default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk] #9 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (18) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] - WholeStageCodegen (30) - HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + Scan parquet default.promotion [p_promo_sk,p_channel_tv] InputAdapter - Exchange [web_site_id] #15 - WholeStageCodegen (29) - HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - InputAdapter - SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - WholeStageCodegen (22) - Sort [ws_item_sk,ws_order_number] - InputAdapter - Exchange [ws_item_sk,ws_order_number] #16 - WholeStageCodegen (21) - Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] - WholeStageCodegen (24) - Sort [wr_item_sk,wr_order_number] - InputAdapter - Exchange [wr_item_sk,wr_order_number] #17 - WholeStageCodegen (23) - Filter [wr_item_sk,wr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] - InputAdapter - ReusedExchange [i_item_sk] #7 - InputAdapter - ReusedExchange [p_promo_sk] #8 - InputAdapter - ReusedExchange [d_date_sk] #9 + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #18 - WholeStageCodegen (28) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] - WholeStageCodegen (65) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #10 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + InputAdapter + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #11 + WholeStageCodegen (11) + Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (13) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (18) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (30) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel] #19 - WholeStageCodegen (64) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (100) + Exchange [web_site_id] #14 + WholeStageCodegen (29) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + InputAdapter + SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #15 + WholeStageCodegen (21) + Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #16 + WholeStageCodegen (23) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (28) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (65) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #18 + WholeStageCodegen (64) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (98) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #20 - WholeStageCodegen (99) + Exchange #19 + WholeStageCodegen (97) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt index ddfdeadcf8eb3..9e687a07c2ca0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt @@ -1,120 +1,116 @@ == Physical Plan == -TakeOrderedAndProject (116) -+- * HashAggregate (115) - +- Exchange (114) - +- * HashAggregate (113) - +- Union (112) - :- * HashAggregate (106) - : +- Exchange (105) - : +- * HashAggregate (104) - : +- Union (103) - : :- * HashAggregate (97) - : : +- Exchange (96) - : : +- * HashAggregate (95) - : : +- Union (94) - : : :- * HashAggregate (39) - : : : +- Exchange (38) - : : : +- * HashAggregate (37) - : : : +- * Project (36) - : : : +- * BroadcastHashJoin Inner BuildRight (35) - : : : :- * Project (29) - : : : : +- * BroadcastHashJoin Inner BuildRight (28) - : : : : :- * Project (22) - : : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : : :- * Project (16) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : : : :- * Project (9) - : : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) - : : : : : : : :- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : : +- BroadcastExchange (7) - : : : : : : : +- * Filter (6) - : : : : : : : +- * ColumnarToRow (5) - : : : : : : : +- Scan parquet default.store_returns (4) - : : : : : : +- BroadcastExchange (14) - : : : : : : +- * Project (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.date_dim (10) - : : : : : +- BroadcastExchange (20) - : : : : : +- * Filter (19) - : : : : : +- * ColumnarToRow (18) - : : : : : +- Scan parquet default.store (17) - : : : : +- BroadcastExchange (27) - : : : : +- * Project (26) - : : : : +- * Filter (25) - : : : : +- * ColumnarToRow (24) - : : : : +- Scan parquet default.item (23) - : : : +- BroadcastExchange (34) - : : : +- * Project (33) - : : : +- * Filter (32) - : : : +- * ColumnarToRow (31) - : : : +- Scan parquet default.promotion (30) - : : :- * HashAggregate (66) - : : : +- Exchange (65) - : : : +- * HashAggregate (64) - : : : +- * Project (63) - : : : +- * BroadcastHashJoin Inner BuildRight (62) - : : : :- * Project (60) - : : : : +- * BroadcastHashJoin Inner BuildRight (59) - : : : : :- * Project (57) - : : : : : +- * BroadcastHashJoin Inner BuildRight (56) - : : : : : :- * Project (51) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (50) - : : : : : : :- * Project (48) - : : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) - : : : : : : : :- * Filter (42) - : : : : : : : : +- * ColumnarToRow (41) - : : : : : : : : +- Scan parquet default.catalog_sales (40) - : : : : : : : +- BroadcastExchange (46) - : : : : : : : +- * Filter (45) - : : : : : : : +- * ColumnarToRow (44) - : : : : : : : +- Scan parquet default.catalog_returns (43) - : : : : : : +- ReusedExchange (49) - : : : : : +- BroadcastExchange (55) - : : : : : +- * Filter (54) - : : : : : +- * ColumnarToRow (53) - : : : : : +- Scan parquet default.catalog_page (52) - : : : : +- ReusedExchange (58) - : : : +- ReusedExchange (61) - : : +- * HashAggregate (93) - : : +- Exchange (92) - : : +- * HashAggregate (91) - : : +- * Project (90) - : : +- * BroadcastHashJoin Inner BuildRight (89) - : : :- * Project (87) - : : : +- * BroadcastHashJoin Inner BuildRight (86) - : : : :- * Project (84) - : : : : +- * BroadcastHashJoin Inner BuildRight (83) - : : : : :- * Project (78) - : : : : : +- * BroadcastHashJoin Inner BuildRight (77) - : : : : : :- * Project (75) - : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) - : : : : : : :- * Filter (69) - : : : : : : : +- * ColumnarToRow (68) - : : : : : : : +- Scan parquet default.web_sales (67) - : : : : : : +- BroadcastExchange (73) - : : : : : : +- * Filter (72) - : : : : : : +- * ColumnarToRow (71) - : : : : : : +- Scan parquet default.web_returns (70) - : : : : : +- ReusedExchange (76) - : : : : +- BroadcastExchange (82) - : : : : +- * Filter (81) - : : : : +- * ColumnarToRow (80) - : : : : +- Scan parquet default.web_site (79) - : : : +- ReusedExchange (85) - : : +- ReusedExchange (88) - : +- * HashAggregate (102) - : +- Exchange (101) - : +- * HashAggregate (100) - : +- * HashAggregate (99) - : +- ReusedExchange (98) - +- * HashAggregate (111) - +- Exchange (110) - +- * HashAggregate (109) - +- * HashAggregate (108) - +- ReusedExchange (107) +TakeOrderedAndProject (112) ++- * HashAggregate (111) + +- Exchange (110) + +- * HashAggregate (109) + +- Union (108) + :- * HashAggregate (97) + : +- Exchange (96) + : +- * HashAggregate (95) + : +- Union (94) + : :- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (16) + : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (14) + : : : : : +- * Project (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.date_dim (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Filter (19) + : : : : +- * ColumnarToRow (18) + : : : : +- Scan parquet default.store (17) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.item (23) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet default.promotion (30) + : :- * HashAggregate (66) + : : +- Exchange (65) + : : +- * HashAggregate (64) + : : +- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : :- * Project (51) + : : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : : :- * Project (48) + : : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : : : : : : :- * Filter (42) + : : : : : : : +- * ColumnarToRow (41) + : : : : : : : +- Scan parquet default.catalog_sales (40) + : : : : : : +- BroadcastExchange (46) + : : : : : : +- * Filter (45) + : : : : : : +- * ColumnarToRow (44) + : : : : : : +- Scan parquet default.catalog_returns (43) + : : : : : +- ReusedExchange (49) + : : : : +- BroadcastExchange (55) + : : : : +- * Filter (54) + : : : : +- * ColumnarToRow (53) + : : : : +- Scan parquet default.catalog_page (52) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- * HashAggregate (93) + : +- Exchange (92) + : +- * HashAggregate (91) + : +- * Project (90) + : +- * BroadcastHashJoin Inner BuildRight (89) + : :- * Project (87) + : : +- * BroadcastHashJoin Inner BuildRight (86) + : : :- * Project (84) + : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : :- * Project (78) + : : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : : :- * Project (75) + : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) + : : : : : :- * Filter (69) + : : : : : : +- * ColumnarToRow (68) + : : : : : : +- Scan parquet default.web_sales (67) + : : : : : +- BroadcastExchange (73) + : : : : : +- * Filter (72) + : : : : : +- * ColumnarToRow (71) + : : : : : +- Scan parquet default.web_returns (70) + : : : : +- ReusedExchange (76) + : : : +- BroadcastExchange (82) + : : : +- * Filter (81) + : : : +- * ColumnarToRow (80) + : : : +- Scan parquet default.web_site (79) + : : +- ReusedExchange (85) + : +- ReusedExchange (88) + :- * HashAggregate (102) + : +- Exchange (101) + : +- * HashAggregate (100) + : +- * HashAggregate (99) + : +- ReusedExchange (98) + +- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- * HashAggregate (104) + +- ReusedExchange (103) (1) Scan parquet default.store_sales @@ -287,7 +283,7 @@ Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] (38) Exchange Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] -Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35] +Arguments: hashpartitioning(s_store_id#17, 5), ENSURE_REQUIREMENTS, [id=#35] (39) HashAggregate [codegen id : 7] Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] @@ -409,7 +405,7 @@ Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty (65) Exchange Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] -Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69] +Arguments: hashpartitioning(cp_catalog_page_id#57, 5), ENSURE_REQUIREMENTS, [id=#69] (66) HashAggregate [codegen id : 14] Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] @@ -531,7 +527,7 @@ Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] (92) Exchange Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] -Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103] +Arguments: hashpartitioning(web_site_id#91, 5), ENSURE_REQUIREMENTS, [id=#103] (93) HashAggregate [codegen id : 21] Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] @@ -551,7 +547,7 @@ Results [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum (96) Exchange Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] -Arguments: hashpartitioning(channel#39, id#40, 5), true, [id=#124] +Arguments: hashpartitioning(channel#39, id#40, 5), ENSURE_REQUIREMENTS, [id=#124] (97) HashAggregate [codegen id : 23] Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] @@ -579,7 +575,7 @@ Results [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, i (101) Exchange Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] -Arguments: hashpartitioning(channel#39, 5), true, [id=#155] +Arguments: hashpartitioning(channel#39, 5), ENSURE_REQUIREMENTS, [id=#155] (102) HashAggregate [codegen id : 47] Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] @@ -588,75 +584,55 @@ Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)] Aggregate Attributes [3]: [sum(sales#140)#156, sum(returns#141)#157, sum(profit#142)#158] Results [5]: [channel#39, null AS id#159, sum(sales#140)#156 AS sales#160, sum(returns#141)#157 AS returns#161, sum(profit#142)#158 AS profit#162] -(103) Union +(103) ReusedExchange [Reuses operator id: 96] +Output [8]: [channel#39, id#40, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] -(104) HashAggregate [codegen id : 48] -Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] - -(105) Exchange -Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#163] - -(106) HashAggregate [codegen id : 49] -Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] - -(107) ReusedExchange [Reuses operator id: 96] -Output [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] - -(108) HashAggregate [codegen id : 72] -Input [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +(104) HashAggregate [codegen id : 70] +Input [8]: [channel#39, id#40, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] Keys [2]: [channel#39, id#40] Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#170, sum(returns#42)#171, sum(profit#43)#172] -Results [3]: [sum(sales#41)#170 AS sales#140, sum(returns#42)#171 AS returns#141, sum(profit#43)#172 AS profit#142] +Aggregate Attributes [3]: [sum(sales#41)#169, sum(returns#42)#170, sum(profit#43)#171] +Results [3]: [sum(sales#41)#169 AS sales#140, sum(returns#42)#170 AS returns#141, sum(profit#43)#171 AS profit#142] -(109) HashAggregate [codegen id : 72] +(105) HashAggregate [codegen id : 70] Input [3]: [sales#140, returns#141, profit#142] Keys: [] Functions [3]: [partial_sum(sales#140), partial_sum(returns#141), partial_sum(profit#142)] -Aggregate Attributes [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] -Results [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] +Aggregate Attributes [6]: [sum#172, isEmpty#173, sum#174, isEmpty#175, sum#176, isEmpty#177] +Results [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183] -(110) Exchange -Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] -Arguments: SinglePartition, true, [id=#185] +(106) Exchange +Input [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#184] -(111) HashAggregate [codegen id : 73] -Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184] +(107) HashAggregate [codegen id : 71] +Input [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183] Keys: [] Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)] -Aggregate Attributes [3]: [sum(sales#140)#186, sum(returns#141)#187, sum(profit#142)#188] -Results [5]: [null AS channel#189, null AS id#190, sum(sales#140)#186 AS sales#191, sum(returns#141)#187 AS returns#192, sum(profit#142)#188 AS profit#193] +Aggregate Attributes [3]: [sum(sales#140)#185, sum(returns#141)#186, sum(profit#142)#187] +Results [5]: [null AS channel#188, null AS id#189, sum(sales#140)#185 AS sales#190, sum(returns#141)#186 AS returns#191, sum(profit#142)#187 AS profit#192] -(112) Union +(108) Union -(113) HashAggregate [codegen id : 74] +(109) HashAggregate [codegen id : 72] Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] Functions: [] Aggregate Attributes: [] Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -(114) Exchange +(110) Exchange Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#194] +Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), ENSURE_REQUIREMENTS, [id=#193] -(115) HashAggregate [codegen id : 75] +(111) HashAggregate [codegen id : 73] Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130] Functions: [] Aggregate Attributes: [] Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130] -(116) TakeOrderedAndProject +(112) TakeOrderedAndProject Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130] Arguments: 100, [channel#39 ASC NULLS FIRST, id#40 ASC NULLS FIRST], [channel#39, id#40, sales#128, returns#129, profit#130] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt index 602a670a49116..142af3f0755f3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt @@ -1,181 +1,173 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (75) + WholeStageCodegen (73) HashAggregate [channel,id,sales,returns,profit] InputAdapter Exchange [channel,id,sales,returns,profit] #1 - WholeStageCodegen (74) + WholeStageCodegen (72) HashAggregate [channel,id,sales,returns,profit] InputAdapter Union - WholeStageCodegen (49) - HashAggregate [channel,id,sales,returns,profit] + WholeStageCodegen (23) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id,sales,returns,profit] #2 - WholeStageCodegen (48) - HashAggregate [channel,id,sales,returns,profit] + Exchange [channel,id] #2 + WholeStageCodegen (22) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen (23) - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + WholeStageCodegen (7) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [channel,id] #3 - WholeStageCodegen (22) - HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (7) - HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [s_store_id] #4 - WholeStageCodegen (6) - HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Filter [sr_item_sk,sr_ticket_number] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + Exchange [s_store_id] #3 + WholeStageCodegen (6) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow InputAdapter - BroadcastExchange #8 - WholeStageCodegen (4) - Project [i_item_sk] - Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] InputAdapter - BroadcastExchange #9 - WholeStageCodegen (5) - Project [p_promo_sk] - Filter [p_channel_tv,p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_tv] - WholeStageCodegen (14) - HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [cp_catalog_page_id] #10 - WholeStageCodegen (13) - HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] - BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] - Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (8) - Filter [cr_item_sk,cr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #6 + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #12 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (14) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #9 + WholeStageCodegen (13) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow InputAdapter - ReusedExchange [i_item_sk] #8 + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] InputAdapter - ReusedExchange [p_promo_sk] #9 - WholeStageCodegen (21) - HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - Exchange [web_site_id] #13 - WholeStageCodegen (20) - HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (15) - Filter [wr_item_sk,wr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] - InputAdapter - ReusedExchange [d_date_sk] #6 - InputAdapter - BroadcastExchange #15 - WholeStageCodegen (17) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (21) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [web_site_id] #12 + WholeStageCodegen (20) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow InputAdapter - ReusedExchange [i_item_sk] #8 + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] InputAdapter - ReusedExchange [p_promo_sk] #9 - WholeStageCodegen (47) - HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [channel] #16 - WholeStageCodegen (46) - HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] - InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 - WholeStageCodegen (73) + BroadcastExchange #13 + WholeStageCodegen (15) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (47) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (46) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (71) HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange #17 - WholeStageCodegen (72) + Exchange #16 + WholeStageCodegen (70) HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3 + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt index f61c214640e33..96f13872a2ba2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt @@ -1,46 +1,42 @@ == Physical Plan == -TakeOrderedAndProject (42) -+- * Project (41) - +- Window (40) - +- * Sort (39) - +- Exchange (38) - +- * HashAggregate (37) - +- Exchange (36) - +- * HashAggregate (35) - +- Union (34) - :- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- Union (25) - : :- * HashAggregate (19) - : : +- Exchange (18) - : : +- * HashAggregate (17) - : : +- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.item (11) - : +- * HashAggregate (24) - : +- Exchange (23) - : +- * HashAggregate (22) - : +- * HashAggregate (21) - : +- ReusedExchange (20) - +- * HashAggregate (33) - +- Exchange (32) - +- * HashAggregate (31) - +- * HashAggregate (30) - +- ReusedExchange (29) +TakeOrderedAndProject (38) ++- * Project (37) + +- Window (36) + +- * Sort (35) + +- Exchange (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- Union (30) + :- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + :- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * HashAggregate (26) + +- ReusedExchange (25) (1) Scan parquet default.web_sales @@ -124,7 +120,7 @@ Results [3]: [i_category#9, i_class#8, sum#12] (18) Exchange Input [3]: [i_category#9, i_class#8, sum#12] -Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13] +Arguments: hashpartitioning(i_category#9, i_class#8, 5), ENSURE_REQUIREMENTS, [id=#13] (19) HashAggregate [codegen id : 4] Input [3]: [i_category#9, i_class#8, sum#12] @@ -152,7 +148,7 @@ Results [3]: [i_category#9, sum#24, isEmpty#25] (23) Exchange Input [3]: [i_category#9, sum#24, isEmpty#25] -Arguments: hashpartitioning(i_category#9, 5), true, [id=#26] +Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#26] (24) HashAggregate [codegen id : 9] Input [3]: [i_category#9, sum#24, isEmpty#25] @@ -161,91 +157,71 @@ Functions [1]: [sum(total_sum#21)] Aggregate Attributes [1]: [sum(total_sum#21)#27] Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32] -(25) Union +(25) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#33] -(26) HashAggregate [codegen id : 10] -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] - -(27) Exchange -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33] - -(28) HashAggregate [codegen id : 11] -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] - -(29) ReusedExchange [Reuses operator id: 18] -Output [3]: [i_category#9, i_class#8, sum#34] - -(30) HashAggregate [codegen id : 15] -Input [3]: [i_category#9, i_class#8, sum#34] +(26) HashAggregate [codegen id : 13] +Input [3]: [i_category#9, i_class#8, sum#33] Keys [2]: [i_category#9, i_class#8] Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#34] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#34,17,2) AS total_sum#21] -(31) HashAggregate [codegen id : 15] +(27) HashAggregate [codegen id : 13] Input [1]: [total_sum#21] Keys: [] Functions [1]: [partial_sum(total_sum#21)] -Aggregate Attributes [2]: [sum#36, isEmpty#37] -Results [2]: [sum#38, isEmpty#39] +Aggregate Attributes [2]: [sum#35, isEmpty#36] +Results [2]: [sum#37, isEmpty#38] -(32) Exchange -Input [2]: [sum#38, isEmpty#39] -Arguments: SinglePartition, true, [id=#40] +(28) Exchange +Input [2]: [sum#37, isEmpty#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#39] -(33) HashAggregate [codegen id : 16] -Input [2]: [sum#38, isEmpty#39] +(29) HashAggregate [codegen id : 14] +Input [2]: [sum#37, isEmpty#38] Keys: [] Functions [1]: [sum(total_sum#21)] -Aggregate Attributes [1]: [sum(total_sum#21)#41] -Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47] +Aggregate Attributes [1]: [sum(total_sum#21)#40] +Results [6]: [sum(total_sum#21)#40 AS total_sum#41, null AS i_category#42, null AS i_class#43, 1 AS g_category#44, 1 AS g_class#45, 2 AS lochierarchy#46] -(34) Union +(30) Union -(35) HashAggregate [codegen id : 17] +(31) HashAggregate [codegen id : 15] Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Functions: [] Aggregate Attributes: [] Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -(36) Exchange +(32) Exchange Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), ENSURE_REQUIREMENTS, [id=#47] -(37) HashAggregate [codegen id : 18] +(33) HashAggregate [codegen id : 16] Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49] +Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#48] -(38) Exchange -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50] +(34) Exchange +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: hashpartitioning(lochierarchy#18, _w0#48, 5), ENSURE_REQUIREMENTS, [id=#49] -(39) Sort [codegen id : 19] -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 +(35) Sort [codegen id : 17] +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#48 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 -(40) Window -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST] +(36) Window +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#48, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#50], [lochierarchy#18, _w0#48], [total_sum#15 DESC NULLS LAST] -(41) Project [codegen id : 20] -Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] -Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51] +(37) Project [codegen id : 18] +Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] +Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48, rank_within_parent#50] -(42) TakeOrderedAndProject -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] -Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +(38) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] +Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#50 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt index 2bd128100f527..d2d6b37e90f71 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt @@ -1,72 +1,64 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] - WholeStageCodegen (20) + WholeStageCodegen (18) Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter Window [total_sum,lochierarchy,_w0] - WholeStageCodegen (19) + WholeStageCodegen (17) Sort [lochierarchy,_w0,total_sum] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (18) + WholeStageCodegen (16) HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0] InputAdapter Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 - WholeStageCodegen (17) + WholeStageCodegen (15) HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] InputAdapter Union - WholeStageCodegen (11) - HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] InputAdapter - Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #3 - WholeStageCodegen (10) - HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] - InputAdapter - Union - WholeStageCodegen (4) - HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] + Exchange [i_category,i_class] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [ws_net_paid,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] InputAdapter - Exchange [i_category,i_class] #4 - WholeStageCodegen (3) - HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] - Project [ws_net_paid,i_class,i_category] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - WholeStageCodegen (9) - HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] - InputAdapter - Exchange [i_category] #7 - WholeStageCodegen (8) - HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] - HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] - InputAdapter - ReusedExchange [i_category,i_class,sum] #4 - WholeStageCodegen (16) + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] + WholeStageCodegen (9) + HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [i_category] #6 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 + WholeStageCodegen (14) HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty] InputAdapter - Exchange #8 - WholeStageCodegen (15) + Exchange #7 + WholeStageCodegen (13) HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] InputAdapter - ReusedExchange [i_category,i_class,sum] #4 + ReusedExchange [i_category,i_class,sum] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt index f61c214640e33..96f13872a2ba2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt @@ -1,46 +1,42 @@ == Physical Plan == -TakeOrderedAndProject (42) -+- * Project (41) - +- Window (40) - +- * Sort (39) - +- Exchange (38) - +- * HashAggregate (37) - +- Exchange (36) - +- * HashAggregate (35) - +- Union (34) - :- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- Union (25) - : :- * HashAggregate (19) - : : +- Exchange (18) - : : +- * HashAggregate (17) - : : +- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.item (11) - : +- * HashAggregate (24) - : +- Exchange (23) - : +- * HashAggregate (22) - : +- * HashAggregate (21) - : +- ReusedExchange (20) - +- * HashAggregate (33) - +- Exchange (32) - +- * HashAggregate (31) - +- * HashAggregate (30) - +- ReusedExchange (29) +TakeOrderedAndProject (38) ++- * Project (37) + +- Window (36) + +- * Sort (35) + +- Exchange (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- Union (30) + :- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + :- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * HashAggregate (26) + +- ReusedExchange (25) (1) Scan parquet default.web_sales @@ -124,7 +120,7 @@ Results [3]: [i_category#9, i_class#8, sum#12] (18) Exchange Input [3]: [i_category#9, i_class#8, sum#12] -Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13] +Arguments: hashpartitioning(i_category#9, i_class#8, 5), ENSURE_REQUIREMENTS, [id=#13] (19) HashAggregate [codegen id : 4] Input [3]: [i_category#9, i_class#8, sum#12] @@ -152,7 +148,7 @@ Results [3]: [i_category#9, sum#24, isEmpty#25] (23) Exchange Input [3]: [i_category#9, sum#24, isEmpty#25] -Arguments: hashpartitioning(i_category#9, 5), true, [id=#26] +Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#26] (24) HashAggregate [codegen id : 9] Input [3]: [i_category#9, sum#24, isEmpty#25] @@ -161,91 +157,71 @@ Functions [1]: [sum(total_sum#21)] Aggregate Attributes [1]: [sum(total_sum#21)#27] Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32] -(25) Union +(25) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#9, i_class#8, sum#33] -(26) HashAggregate [codegen id : 10] -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] - -(27) Exchange -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33] - -(28) HashAggregate [codegen id : 11] -Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Functions: [] -Aggregate Attributes: [] -Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] - -(29) ReusedExchange [Reuses operator id: 18] -Output [3]: [i_category#9, i_class#8, sum#34] - -(30) HashAggregate [codegen id : 15] -Input [3]: [i_category#9, i_class#8, sum#34] +(26) HashAggregate [codegen id : 13] +Input [3]: [i_category#9, i_class#8, sum#33] Keys [2]: [i_category#9, i_class#8] Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#34] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#34,17,2) AS total_sum#21] -(31) HashAggregate [codegen id : 15] +(27) HashAggregate [codegen id : 13] Input [1]: [total_sum#21] Keys: [] Functions [1]: [partial_sum(total_sum#21)] -Aggregate Attributes [2]: [sum#36, isEmpty#37] -Results [2]: [sum#38, isEmpty#39] +Aggregate Attributes [2]: [sum#35, isEmpty#36] +Results [2]: [sum#37, isEmpty#38] -(32) Exchange -Input [2]: [sum#38, isEmpty#39] -Arguments: SinglePartition, true, [id=#40] +(28) Exchange +Input [2]: [sum#37, isEmpty#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#39] -(33) HashAggregate [codegen id : 16] -Input [2]: [sum#38, isEmpty#39] +(29) HashAggregate [codegen id : 14] +Input [2]: [sum#37, isEmpty#38] Keys: [] Functions [1]: [sum(total_sum#21)] -Aggregate Attributes [1]: [sum(total_sum#21)#41] -Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47] +Aggregate Attributes [1]: [sum(total_sum#21)#40] +Results [6]: [sum(total_sum#21)#40 AS total_sum#41, null AS i_category#42, null AS i_class#43, 1 AS g_category#44, 1 AS g_class#45, 2 AS lochierarchy#46] -(34) Union +(30) Union -(35) HashAggregate [codegen id : 17] +(31) HashAggregate [codegen id : 15] Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Functions: [] Aggregate Attributes: [] Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -(36) Exchange +(32) Exchange Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] -Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48] +Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), ENSURE_REQUIREMENTS, [id=#47] -(37) HashAggregate [codegen id : 18] +(33) HashAggregate [codegen id : 16] Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49] +Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#48] -(38) Exchange -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50] +(34) Exchange +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: hashpartitioning(lochierarchy#18, _w0#48, 5), ENSURE_REQUIREMENTS, [id=#49] -(39) Sort [codegen id : 19] -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 +(35) Sort [codegen id : 17] +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#48 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0 -(40) Window -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49] -Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST] +(36) Window +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48] +Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#48, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#50], [lochierarchy#18, _w0#48], [total_sum#15 DESC NULLS LAST] -(41) Project [codegen id : 20] -Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] -Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51] +(37) Project [codegen id : 18] +Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] +Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48, rank_within_parent#50] -(42) TakeOrderedAndProject -Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] -Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51] +(38) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] +Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#50 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt index 2bd128100f527..d2d6b37e90f71 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt @@ -1,72 +1,64 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] - WholeStageCodegen (20) + WholeStageCodegen (18) Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter Window [total_sum,lochierarchy,_w0] - WholeStageCodegen (19) + WholeStageCodegen (17) Sort [lochierarchy,_w0,total_sum] InputAdapter Exchange [lochierarchy,_w0] #1 - WholeStageCodegen (18) + WholeStageCodegen (16) HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0] InputAdapter Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 - WholeStageCodegen (17) + WholeStageCodegen (15) HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] InputAdapter Union - WholeStageCodegen (11) - HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] InputAdapter - Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #3 - WholeStageCodegen (10) - HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] - InputAdapter - Union - WholeStageCodegen (4) - HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] + Exchange [i_category,i_class] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [ws_net_paid,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] InputAdapter - Exchange [i_category,i_class] #4 - WholeStageCodegen (3) - HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] - Project [ws_net_paid,i_class,i_category] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - WholeStageCodegen (9) - HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] - InputAdapter - Exchange [i_category] #7 - WholeStageCodegen (8) - HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] - HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] - InputAdapter - ReusedExchange [i_category,i_class,sum] #4 - WholeStageCodegen (16) + Scan parquet default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] + WholeStageCodegen (9) + HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [i_category] #6 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 + WholeStageCodegen (14) HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty] InputAdapter - Exchange #8 - WholeStageCodegen (15) + Exchange #7 + WholeStageCodegen (13) HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] InputAdapter - ReusedExchange [i_category,i_class,sum] #4 + ReusedExchange [i_category,i_class,sum] #3 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0ba58e1634f06..44f3c3449ddda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial} import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite} import org.apache.spark.sql.catalyst.plans.logical.{Project, RepartitionByExpression} import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.execution.UnionExec import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec @@ -3825,6 +3826,23 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark System.clearProperty("ivy.home") } } + + test("SPARK-33964: Combine distinct unions that have noop project between them") { + val df = sql(""" + |SELECT a, b FROM ( + | SELECT a, b FROM testData2 + | UNION + | SELECT a, sum(b) FROM testData2 GROUP BY a + | UNION + | SELECT null AS a, sum(b) FROM testData2 + |)""".stripMargin) + + val unions = df.queryExecution.sparkPlan.collect { + case u: UnionExec => u + } + + assert(unions.size == 1) + } } case class Foo(bar: Option[String]) From 976e97a80de66d520167f58bdc9082e4bbbc9639 Mon Sep 17 00:00:00 2001 From: Chongguang LIU Date: Tue, 5 Jan 2021 05:20:16 +0000 Subject: [PATCH 0955/1009] [SPARK-33794][SQL] NextDay expression throw runtime IllegalArgumentException when receiving invalid input under ANSI mode ### What changes were proposed in this pull request? Instead of returning NULL, the next_day function throws runtime IllegalArgumentException when ansiMode is enable and receiving invalid input of the dayOfWeek parameter. ### Why are the changes needed? For ansiMode. ### Does this PR introduce _any_ user-facing change? Yes. When spark.sql.ansi.enabled = true, the next_day function will throw IllegalArgumentException when receiving invalid input of the dayOfWeek parameter. When spark.sql.ansi.enabled = false, same behaviour as before. ### How was this patch tested? Ansi mode is tested with existing tests. End-to-end tests have been added. Closes #30807 from chongguang/SPARK-33794. Authored-by: Chongguang LIU Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 1 + .../expressions/datetimeExpressions.scala | 72 +++++++++++++------ .../sql/catalyst/util/DateTimeUtils.scala | 6 +- .../expressions/DateExpressionsSuite.scala | 34 +++++++-- .../catalyst/util/DateTimeUtilsSuite.scala | 7 ++ .../resources/sql-tests/inputs/datetime.sql | 7 ++ .../sql-tests/results/ansi/datetime.sql.out | 44 +++++++++++- .../sql-tests/results/datetime-legacy.sql.out | 42 ++++++++++- .../sql-tests/results/datetime.sql.out | 42 ++++++++++- 9 files changed, 220 insertions(+), 35 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 16059a5a08e9a..22f4cf78f5223 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -156,6 +156,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `make_date`: This function should fail with an exception if the result date is invalid. - `make_timestamp`: This function should fail with an exception if the result timestamp is invalid. - `make_interval`: This function should fail with an exception if the result interval is invalid. + - `next_day`: This function throws `IllegalArgumentException` if input is not a valid day of week. ### SQL Operators diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 99f80e9078aae..c9a9ac38559ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1162,7 +1162,12 @@ case class LastDay(startDate: Expression) */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.", + usage = + """_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated. + The function returns NULL if at least one of the input parameters is NULL. + When both of the input parameters are not NULL and day_of_week is an invalid input, + the function throws IllegalArgumentException if `spark.sql.ansi.enabled` is set to true, otherwise NULL. + """, examples = """ Examples: > SELECT _FUNC_('2015-01-14', 'TU'); @@ -1171,52 +1176,73 @@ case class LastDay(startDate: Expression) group = "datetime_funcs", since = "1.5.0") // scalastyle:on line.size.limit -case class NextDay(startDate: Expression, dayOfWeek: Expression) +case class NextDay( + startDate: Expression, + dayOfWeek: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { override def left: Expression = startDate override def right: Expression = dayOfWeek + def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) override def dataType: DataType = DateType override def nullable: Boolean = true override def nullSafeEval(start: Any, dayOfW: Any): Any = { - val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String]) - if (dow == -1) { - null - } else { + try { + val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String]) val sd = start.asInstanceOf[Int] DateTimeUtils.getNextDateForDayOfWeek(sd, dow) + } catch { + case _: IllegalArgumentException if !failOnError => null + } + } + + private def dateTimeUtilClass: String = DateTimeUtils.getClass.getName.stripSuffix("$") + + private def nextDayGenCode( + ev: ExprCode, + dayOfWeekTerm: String, + sd: String, + dowS: String): String = { + if (failOnError) { + s""" + |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS); + |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm); + |""".stripMargin + } else { + s""" + |try { + | int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS); + | ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm); + |} catch (IllegalArgumentException e) { + | ${ev.isNull} = true; + |} + |""".stripMargin } } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (sd, dowS) => { - val dateTimeUtilClass = DateTimeUtils.getClass.getName.stripSuffix("$") val dayOfWeekTerm = ctx.freshName("dayOfWeek") if (dayOfWeek.foldable) { val input = dayOfWeek.eval().asInstanceOf[UTF8String] - if ((input eq null) || DateTimeUtils.getDayOfWeekFromString(input) == -1) { - s""" - |${ev.isNull} = true; - """.stripMargin + if (input eq null) { + s"""${ev.isNull} = true;""" } else { - val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input) - s""" - |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue); - """.stripMargin + try { + val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input) + s"${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue);" + } catch { + case _: IllegalArgumentException => nextDayGenCode(ev, dayOfWeekTerm, sd, dowS) + } } } else { - s""" - |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS); - |if ($dayOfWeekTerm == -1) { - | ${ev.isNull} = true; - |} else { - | ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm); - |} - """.stripMargin + nextDayGenCode(ev, dayOfWeekTerm, sd, dowS) } }) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 780d2bad1bab2..b4f12db439f7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -670,9 +670,10 @@ object DateTimeUtils { private val FRIDAY = 1 private val SATURDAY = 2 - /* + /** * Returns day of week from String. Starting from Thursday, marked as 0. * (Because 1970-01-01 is Thursday). + * @throws IllegalArgumentException if the input is not a valid day of week. */ def getDayOfWeekFromString(string: UTF8String): Int = { val dowString = string.toString.toUpperCase(Locale.ROOT) @@ -684,7 +685,8 @@ object DateTimeUtils { case "TH" | "THU" | "THURSDAY" => THURSDAY case "FR" | "FRI" | "FRIDAY" => FRIDAY case "SA" | "SAT" | "SATURDAY" => SATURDAY - case _ => -1 + case _ => + throw new IllegalArgumentException(s"""Illegal input for day of week: $string""") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 79770505ec35d..1af8fe882847c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -640,13 +640,33 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { testNextDay("2015-07-23", "Fri", "2015-07-24") testNextDay("2015-07-23", "fr", "2015-07-24") - checkEvaluation(NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx")), null) - checkEvaluation(NextDay(Literal.create(null, DateType), Literal("xx")), null) - checkEvaluation( - NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null) - // Test escaping of dayOfWeek - GenerateUnsafeProjection.generate( - NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil) + Seq(true, false).foreach { ansiEnabled => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { + var expr: Expression = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx")) + if (ansiEnabled) { + val errMsg = "Illegal input for day of week: xx" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + + expr = NextDay(Literal.create(null, DateType), Literal("xx")) + checkEvaluation(expr, null) + + expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)) + checkEvaluation(expr, null) + + // Test escaping of dayOfWeek + expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) + GenerateUnsafeProjection.generate(expr :: Nil) + if (ansiEnabled) { + val errMsg = """Illegal input for day of week: "quote""" + checkExceptionInExpression[Exception](expr, errMsg) + } else { + checkEvaluation(expr, null) + } + } + } } private def testTruncDate(input: Date, fmt: String, expected: Date): Unit = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 3d841f32379ff..b9b55da5a2080 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -675,4 +675,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(toDate("tomorrow CET ", zoneId).get === today + 1) } } + + test("parsing day of week") { + assert(getDayOfWeekFromString(UTF8String.fromString("THU")) == 0) + assert(getDayOfWeekFromString(UTF8String.fromString("MONDAY")) == 4) + intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("xx"))) + intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("\"quote"))) + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index acfd1f50e14c9..0493d8653c01f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -172,3 +172,10 @@ select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS"); select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS"); select cast("Unparseable" as timestamp); select cast("Unparseable" as date); + +-- next_day +select next_day("2015-07-23", "Mon"); +select next_day("2015-07-23", "xx"); +select next_day("xx", "Mon"); +select next_day(null, "Mon"); +select next_day(null, "xx"); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 3e307a92c10f0..9a0c8ff02c5bb 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 123 +-- Number of queries: 128 -- !query @@ -1069,3 +1069,45 @@ struct<> -- !query output java.time.DateTimeException Cannot cast Unparseable to DateType. + + +-- !query +select next_day("2015-07-23", "Mon") +-- !query schema +struct +-- !query output +2015-07-27 + + +-- !query +select next_day("2015-07-23", "xx") +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Illegal input for day of week: xx + + +-- !query +select next_day("xx", "Mon") +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast xx to DateType. + + +-- !query +select next_day(null, "Mon") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day(null, "xx") +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index ed54b72111ed5..d93843b231804 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 123 +-- Number of queries: 128 -- !query @@ -1021,3 +1021,43 @@ select cast("Unparseable" as date) struct -- !query output NULL + + +-- !query +select next_day("2015-07-23", "Mon") +-- !query schema +struct +-- !query output +2015-07-27 + + +-- !query +select next_day("2015-07-23", "xx") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day("xx", "Mon") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day(null, "Mon") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day(null, "xx") +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 213895dcb4bcb..b07b68ce2600d 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 123 +-- Number of queries: 128 -- !query @@ -1029,3 +1029,43 @@ select cast("Unparseable" as date) struct -- !query output NULL + + +-- !query +select next_day("2015-07-23", "Mon") +-- !query schema +struct +-- !query output +2015-07-27 + + +-- !query +select next_day("2015-07-23", "xx") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day("xx", "Mon") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day(null, "Mon") +-- !query schema +struct +-- !query output +NULL + + +-- !query +select next_day(null, "xx") +-- !query schema +struct +-- !query output +NULL From 6b00fdc756e85ce7affded605d6d8e0a5308c1ed Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Tue, 5 Jan 2021 05:32:36 +0000 Subject: [PATCH 0956/1009] [SPARK-33998][SQL] Provide an API to create an InternalRow in V2CommandExec ### What changes were proposed in this pull request? There are many v2 commands such as `SHOW TABLES`, `DESCRIBE TABLE`, etc. that require creating `InternalRow`s. Currently, the code to create `InternalRow`s are duplicated across many commands and it can be moved into `V2CommandExec` to remove duplicate code. ### Why are the changes needed? To clean up duplicate code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test since this is just refactoring. Closes #31020 from imback82/refactor_v2_command. Authored-by: Terry Kim Signed-off-by: Wenchen Fan --- .../datasources/v2/DescribeColumnExec.scala | 11 +---------- .../datasources/v2/DescribeNamespaceExec.scala | 12 +----------- .../datasources/v2/DescribeTableExec.scala | 13 +------------ .../datasources/v2/ShowCurrentNamespaceExec.scala | 10 ++-------- .../datasources/v2/ShowNamespacesExec.scala | 7 ++----- .../datasources/v2/ShowTablePropertiesExec.scala | 10 ++++------ .../execution/datasources/v2/ShowTablesExec.scala | 9 ++------- .../execution/datasources/v2/V2CommandExec.scala | 11 ++++++++++- 8 files changed, 23 insertions(+), 60 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala index c7ce69f744cce..ab8c5617aa36b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala @@ -20,17 +20,12 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.catalyst.expressions.Attribute case class DescribeColumnExec( override val output: Seq[Attribute], column: Attribute, isExtended: Boolean) extends V2CommandExec { - private val toRow = { - RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() - } override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() @@ -49,8 +44,4 @@ case class DescribeColumnExec( rows.toSeq } - - private def toCatalystRow(strs: String*): InternalRow = { - toRow(new GenericRowWithSchema(strs.toArray, schema)).copy() - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala index e273abf90e3bc..2da96b769a41a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala @@ -21,10 +21,8 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces} -import org.apache.spark.sql.types.StructType /** * Physical plan node for describing a namespace. @@ -34,10 +32,6 @@ case class DescribeNamespaceExec( catalog: SupportsNamespaces, namespace: Seq[String], isExtended: Boolean) extends V2CommandExec { - private val toRow = { - RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() - } - override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() val ns = namespace.toArray @@ -57,8 +51,4 @@ case class DescribeNamespaceExec( } rows.toSeq } - - private def toCatalystRow(strs: String*): InternalRow = { - toRow(new GenericRowWithSchema(strs.toArray, schema)).copy() - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 0ca442baeea2f..769d76a9b1c2c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -21,20 +21,13 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table} -import org.apache.spark.sql.types.StructType case class DescribeTableExec( output: Seq[Attribute], table: Table, isExtended: Boolean) extends V2CommandExec { - - private val toRow = { - RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() - } - override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() addSchema(rows) @@ -99,8 +92,4 @@ case class DescribeTableExec( } private def emptyRow(): InternalRow = toCatalystRow("", "", "") - - private def toCatalystRow(strs: String*): InternalRow = { - toRow(new GenericRowWithSchema(strs.toArray, schema)).copy() - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala index 5f7b6f4061467..121ae1c5b1176 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala @@ -18,8 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper @@ -31,11 +30,6 @@ case class ShowCurrentNamespaceExec( catalogManager: CatalogManager) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { - val toRow = RowEncoder(schema).resolveAndBind().createSerializer() - val result = new GenericRowWithSchema(Array[Any]( - catalogManager.currentCatalog.name, - catalogManager.currentNamespace.quoted), - schema) - Seq(toRow(result).copy()) + Seq(toCatalystRow(catalogManager.currentCatalog.name, catalogManager.currentNamespace.quoted)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala index ceeed0f840700..9dafbd79a527e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala @@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper import org.apache.spark.sql.connector.catalog.SupportsNamespaces @@ -44,11 +43,9 @@ case class ShowNamespacesExec( } val rows = new ArrayBuffer[InternalRow]() - val toRow = RowEncoder(schema).resolveAndBind().createSerializer() - namespaces.map(_.quoted).map { ns => if (pattern.map(StringUtils.filterPattern(Seq(ns), _).nonEmpty).getOrElse(true)) { - rows += toRow(new GenericRowWithSchema(Array(ns), schema)).copy() + rows += toCatalystRow(ns) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala index 6d3a94ef15631..4e1633e1460ec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala @@ -18,8 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table} /** @@ -32,19 +31,18 @@ case class ShowTablePropertiesExec( override protected def run(): Seq[InternalRow] = { import scala.collection.JavaConverters._ - val toRow = RowEncoder(schema).resolveAndBind().createSerializer() // The reserved properties are accessible through DESCRIBE val properties = catalogTable.properties.asScala - .filter { case (k, v) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) } + .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) } propertyKey match { case Some(p) => val propValue = properties .getOrElse(p, s"Table ${catalogTable.name} does not have property: $p") - Seq(toRow(new GenericRowWithSchema(Array(p, propValue), schema)).copy()) + Seq(toCatalystRow(p, propValue)) case None => properties.keys.map(k => - toRow(new GenericRowWithSchema(Array(k, properties(k)), schema)).copy()).toSeq + toCatalystRow(k, properties(k))).toSeq } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala index 5ba01deae9513..7ada8d2e5c39d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala @@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper import org.apache.spark.sql.connector.catalog.TableCatalog @@ -37,15 +36,11 @@ case class ShowTablesExec( pattern: Option[String]) extends V2CommandExec with LeafExecNode { override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() - val toRow = RowEncoder(schema).resolveAndBind().createSerializer() val tables = catalog.listTables(namespace.toArray) tables.map { table => if (pattern.map(StringUtils.filterPattern(Seq(table.name()), _).nonEmpty).getOrElse(true)) { - val result = new GenericRowWithSchema( - Array(table.namespace().quoted, table.name()), - schema) - rows += toRow(result).copy() + rows += toCatalystRow(table.namespace().quoted, table.name()) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala index 6b193674cc71a..b54c46fc15e7c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala @@ -19,8 +19,10 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.AttributeSet +import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.catalyst.expressions.{AttributeSet, GenericRowWithSchema} import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.types.StructType /** * A physical operator that executes run() and saves the result to prevent multiple executions. @@ -59,4 +61,11 @@ abstract class V2CommandExec extends SparkPlan { override def producedAttributes: AttributeSet = outputSet + protected def toCatalystRow(strs: String*): InternalRow = { + rowSerializer(new GenericRowWithSchema(strs.toArray, schema)).copy() + } + + private lazy val rowSerializer = { + RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() + } } From 15a863fd54aa76cbb0f2a076bd94773529536add Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Mon, 4 Jan 2021 21:32:49 -0800 Subject: [PATCH 0957/1009] [SPARK-34001][SQL][TESTS] Remove unused runShowTablesSql() in DataSourceV2SQLSuite.scala ### What changes were proposed in this pull request? After #30287, `runShowTablesSql()` in `DataSourceV2SQLSuite.scala` is no longer used. This PR removes the unused method. ### Why are the changes needed? To remove unused method. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test. Closes #31022 from imback82/33382-followup. Authored-by: Terry Kim Signed-off-by: Dongjoon Hyun --- .../sql/connector/DataSourceV2SQLSuite.scala | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 47829b68cc617..0d61306628a44 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG_IMPLEMENTATION} import org.apache.spark.sql.internal.connector.SimpleTableProvider import org.apache.spark.sql.sources.SimpleScanSource -import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType} +import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.storage.StorageLevel import org.apache.spark.unsafe.types.UTF8String @@ -991,26 +991,6 @@ class DataSourceV2SQLSuite " only SessionCatalog supports this command.")) } - private def runShowTablesSql( - sqlText: String, - expected: Seq[Row], - expectV2Catalog: Boolean = true): Unit = { - val schema = if (expectV2Catalog) { - new StructType() - .add("namespace", StringType, nullable = false) - .add("tableName", StringType, nullable = false) - } else { - new StructType() - .add("database", StringType, nullable = false) - .add("tableName", StringType, nullable = false) - .add("isTemporary", BooleanType, nullable = false) - } - - val df = spark.sql(sqlText) - assert(df.schema === schema) - assert(expected === df.collect()) - } - test("CreateNameSpace: basic tests") { // Session catalog is used. withNamespace("ns") { From f0ffe0cd652188873f2ec007e4e282744717a0b3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 5 Jan 2021 05:34:11 +0000 Subject: [PATCH 0958/1009] [SPARK-33992][SQL] override transformUpWithNewOutput to add allowInvokingTransformsInAnalyzer ### What changes were proposed in this pull request? In https://github.com/apache/spark/pull/29643, we move the plan rewriting methods to QueryPlan. we need to override transformUpWithNewOutput to add allowInvokingTransformsInAnalyzer because it and resolveOperatorsUpWithNewOutput are called in the analyzer. For example, PaddingAndLengthCheckForCharVarchar could fail query when resolveOperatorsUpWithNewOutput with ```logtalk [info] - char/varchar resolution in sub query *** FAILED *** (367 milliseconds) [info] java.lang.RuntimeException: This method should not be called in the analyzer [info] at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.assertNotAnalysisRule(AnalysisHelper.scala:150) [info] at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.assertNotAnalysisRule$(AnalysisHelper.scala:146) [info] at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.assertNotAnalysisRule(LogicalPlan.scala:29) [info] at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:161) [info] at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:160) [info] at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) [info] at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) [info] at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$updateOuterReferencesInSubquery(QueryPlan.scala:267) ``` ### Why are the changes needed? trivial bugfix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #31013 from yaooqinn/SPARK-33992. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../catalyst/plans/logical/AnalysisHelper.scala | 9 +++++++++ .../apache/spark/sql/CharVarcharTestSuite.scala | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala index ffd1f784e4670..54b01416381c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala @@ -133,6 +133,15 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan => } } + override def transformUpWithNewOutput( + rule: PartialFunction[LogicalPlan, (LogicalPlan, Seq[(Attribute, Attribute)])], + skipCond: LogicalPlan => Boolean, + canGetOutput: LogicalPlan => Boolean): LogicalPlan = { + AnalysisHelper.allowInvokingTransformsInAnalyzer { + super.transformUpWithNewOutput(rule, skipCond, canGetOutput) + } + } + /** * Recursively transforms the expressions of a tree, skipping nodes that have already * been analyzed. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 62d0f51e5ff75..d20cee0815d4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -451,6 +451,21 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { Seq(Row("char(5)"), Row("varchar(3)"))) } } + + test("SPARK-33992: char/varchar resolution in correlated sub query") { + withTable("t1", "t2") { + sql(s"CREATE TABLE t1(v VARCHAR(3), c CHAR(5)) USING $format") + sql(s"CREATE TABLE t2(v VARCHAR(3), c CHAR(5)) USING $format") + sql("INSERT INTO t1 VALUES ('c', 'b')") + sql("INSERT INTO t2 VALUES ('a', 'b')") + + checkAnswer(sql( + """ + |SELECT v FROM t1 + |WHERE 'a' IN (SELECT v FROM t2 WHERE t1.c = t2.c )""".stripMargin), + Row("c")) + } + } } // Some basic char/varchar tests which doesn't rely on table implementation. From a7d3fcd354289c1d0f5c80887b4f33beb3ad96a2 Mon Sep 17 00:00:00 2001 From: LantaoJin Date: Mon, 4 Jan 2021 21:37:26 -0800 Subject: [PATCH 0959/1009] [SPARK-34000][CORE] Fix stageAttemptToNumSpeculativeTasks java.util.NoSuchElementException ### What changes were proposed in this pull request? From below log, Stage 600 could be removed from `stageAttemptToNumSpeculativeTasks` by `onStageCompleted()`, but the speculative task 306.1 in stage 600 threw `NoSuchElementException` when it entered into `onTaskEnd()`. ``` 21/01/04 03:00:32,259 WARN [task-result-getter-2] scheduler.TaskSetManager:69 : Lost task 306.1 in stage 600.0 (TID 283610, hdc49-mcc10-01-0510-4108-039-tess0097.stratus.rno.ebay.com, executor 27): TaskKilled (another attempt succeeded) 21/01/04 03:00:32,259 INFO [task-result-getter-2] scheduler.TaskSetManager:57 : Task 306.1 in stage 600.0 (TID 283610) failed, but the task will not be re-executed (either because the task failed with a shuffle data fetch failure, so the previous stage needs to be re-run, or because a different copy of the task has already succeeded). 21/01/04 03:00:32,259 INFO [task-result-getter-2] cluster.YarnClusterScheduler:57 : Removed TaskSet 600.0, whose tasks have all completed, from pool default 21/01/04 03:00:32,259 INFO [HiveServer2-Handler-Pool: Thread-5853] thriftserver.SparkExecuteStatementOperation:190 : Returning result set with 50 rows from offsets [5378600, 5378650) with 1fe245f8-a7f9-4ec0-bcb5-8cf324cbbb47 21/01/04 03:00:32,260 ERROR [spark-listener-group-executorManagement] scheduler.AsyncEventQueue:94 : Listener ExecutorAllocationListener threw an exception java.util.NoSuchElementException: key not found: Stage 600 (Attempt 0) at scala.collection.MapLike.default(MapLike.scala:235) at scala.collection.MapLike.default$(MapLike.scala:234) at scala.collection.AbstractMap.default(Map.scala:63) at scala.collection.mutable.HashMap.apply(HashMap.scala:69) at org.apache.spark.ExecutorAllocationManager$ExecutorAllocationListener.onTaskEnd(ExecutorAllocationManager.scala:621) at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:45) at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:38) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:38) at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:115) at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:99) at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:116) at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:116) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:102) at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:97) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320) at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:97) ``` ### Why are the changes needed? To avoid throwing the java.util.NoSuchElementException ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This is a protective patch and it's not easy to reproduce in UT due to the event order is not fixed in a async queue. Closes #31025 from LantaoJin/SPARK-34000. Authored-by: LantaoJin Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/ExecutorAllocationManager.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 61ab63584269b..a83762ff01ccb 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -798,7 +798,11 @@ private[spark] class ExecutorAllocationManager( } if (taskEnd.taskInfo.speculative) { stageAttemptToSpeculativeTaskIndices.get(stageAttempt).foreach {_.remove{taskIndex}} - stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1 + // If the previous task attempt succeeded first and it was the last task in a stage, + // the stage may have been removed before handing this speculative TaskEnd event. + if (stageAttemptToNumSpeculativeTasks.contains(stageAttempt)) { + stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1 + } } taskEnd.reason match { From a071826f72cd717a58bf37b877f805490f7a147f Mon Sep 17 00:00:00 2001 From: fwang12 Date: Tue, 5 Jan 2021 15:55:30 +0900 Subject: [PATCH 0960/1009] [SPARK-33100][SQL] Ignore a semicolon inside a bracketed comment in spark-sql ### What changes were proposed in this pull request? Now the spark-sql does not support parse the sql statements with bracketed comments. For the sql statements: ``` /* SELECT 'test'; */ SELECT 'test'; ``` Would be split to two statements: The first one: `/* SELECT 'test'` The second one: `*/ SELECT 'test'` Then it would throw an exception because the first one is illegal. In this PR, we ignore the content in bracketed comments while splitting the sql statements. Besides, we ignore the comment without any content. ### Why are the changes needed? Spark-sql might split the statements inside bracketed comments and it is not correct. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UT. Closes #29982 from turboFei/SPARK-33110. Lead-authored-by: fwang12 Co-authored-by: turbofei Signed-off-by: Takeshi Yamamuro --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 40 +++++++++++++++---- .../sql/hive/thriftserver/CliSuite.scala | 23 +++++++++++ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index f2fd373bf6cc0..9155eacfa4896 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -522,14 +522,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted // string, the origin implementation from Hive will not drop the trailing semicolon as expected, // hence we refined this function a little bit. + // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql. private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false - var insideComment = false + var insideSimpleComment = false + var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 + var includingStatement = false val ret = new JArrayList[String] + def insideBracketedComment: Boolean = bracketedCommentLevel > 0 + def insideComment: Boolean = insideSimpleComment || insideBracketedComment + def statementBegin(index: Int): Boolean = includingStatement || (!insideComment && + index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) + for (index <- 0 until line.length) { if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped @@ -553,21 +561,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Sample query: select "quoted value --" // ^^ avoids starting a comment if it's inside quotes. } else if (hasNext && line.charAt(index + 1) == '-') { - // ignore quotes and ; - insideComment = true + // ignore quotes and ; in simple comment + insideSimpleComment = true } } else if (line.charAt(index) == ';') { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { - // split, do not include ; itself - ret.add(line.substring(beginIndex, index)) + if (includingStatement) { + // split, do not include ; itself + ret.add(line.substring(beginIndex, index)) + } beginIndex = index + 1 + includingStatement = false } } else if (line.charAt(index) == '\n') { - // with a new line the inline comment should end. + // with a new line the inline simple comment should end. if (!escape) { - insideComment = false + insideSimpleComment = false + } + } else if (line.charAt(index) == '/' && !insideSimpleComment) { + val hasNext = index + 1 < line.length + if (insideSingleQuote || insideDoubleQuote) { + // Ignores '/' in any case of quotes + } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { + bracketedCommentLevel -= 1 + } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { + bracketedCommentLevel += 1 } } // set the escape @@ -576,8 +596,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } else if (line.charAt(index) == '\\') { escape = true } + + includingStatement = statementBegin(index) + } + if (includingStatement) { + ret.add(line.substring(beginIndex)) } - ret.add(line.substring(beginIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index d39b94503fe40..6708cf99e7f41 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -571,4 +571,27 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { // the date formatter for `java.sql.LocalDate` must output negative years with sign. runCliWithin(1.minute)("SELECT MAKE_DATE(-44, 3, 15);" -> "-0044-03-15") } + + test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") { + runCliWithin(4.minute)( + "/* SELECT 'test';*/ SELECT 'test';" -> "test", + ";;/* SELECT 'test';*/ SELECT 'test';" -> "test", + "/* SELECT 'test';*/;; SELECT 'test';" -> "test", + "SELECT 'test'; -- SELECT 'test';" -> "", + "SELECT 'test'; /* SELECT 'test';*/;" -> "", + "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test", + "/*\nmulti-line\n*/ SELECT 'test';" -> "test", + "/*/* multi-level bracketed*/ SELECT 'test';" -> "test" + ) + } + + test("SPARK-33100: test sql statements with hint in bracketed comment") { + runCliWithin(2.minute)( + "CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES(1, 2) AS t1(k, v);" -> "", + "CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES(2, 1) AS t2(k, v);" -> "", + "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", + "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" + -> "BroadcastHashJoin" + ) + } } From f252a9334e49dc359dd9255fcfe17a6bc75b8781 Mon Sep 17 00:00:00 2001 From: "tanel.kiis@gmail.com" Date: Tue, 5 Jan 2021 16:00:24 +0900 Subject: [PATCH 0961/1009] [SPARK-33935][SQL] Fix CBO cost function ### What changes were proposed in this pull request? Changed the cost function in CBO to match documentation. ### Why are the changes needed? The parameter `spark.sql.cbo.joinReorder.card.weight` is documented as: ``` The weight of cardinality (number of rows) for plan cost comparison in join reorder: rows * weight + size * (1 - weight). ``` The implementation in `JoinReorderDP.betterThan` does not match this documentaiton: ``` def betterThan(other: JoinPlan, conf: SQLConf): Boolean = { if (other.planCost.card == 0 || other.planCost.size == 0) { false } else { val relativeRows = BigDecimal(this.planCost.card) / BigDecimal(other.planCost.card) val relativeSize = BigDecimal(this.planCost.size) / BigDecimal(other.planCost.size) relativeRows * conf.joinReorderCardWeight + relativeSize * (1 - conf.joinReorderCardWeight) < 1 } } ``` This different implementation has an unfortunate consequence: given two plans A and B, both A betterThan B and B betterThan A might give the same results. This happes when one has many rows with small sizes and other has few rows with large sizes. A example values, that have this fenomen with the default weight value (0.7): A.card = 500, B.card = 300 A.size = 30, B.size = 80 Both A betterThan B and B betterThan A would have score above 1 and would return false. This happens with several of the TPCDS queries. The new implementation does not have this behavior. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New and existing UTs Closes #30965 from tanelk/SPARK-33935_cbo_cost_function. Authored-by: tanel.kiis@gmail.com Signed-off-by: Takeshi Yamamuro --- .../optimizer/CostBasedJoinReorder.scala | 13 +- .../joinReorder/JoinReorderSuite.scala | 15 + .../StarJoinCostBasedReorderSuite.scala | 8 +- .../approved-plans-v1_4/q13.sf100/explain.txt | 132 ++-- .../q13.sf100/simplified.txt | 34 +- .../approved-plans-v1_4/q17.sf100/explain.txt | 194 +++--- .../q17.sf100/simplified.txt | 130 ++-- .../approved-plans-v1_4/q18.sf100/explain.txt | 158 ++--- .../q18.sf100/simplified.txt | 50 +- .../approved-plans-v1_4/q19.sf100/explain.txt | 368 +++++------ .../q19.sf100/simplified.txt | 116 ++-- .../q24a.sf100/explain.txt | 118 ++-- .../q24a.sf100/simplified.txt | 34 +- .../q24b.sf100/explain.txt | 118 ++-- .../q24b.sf100/simplified.txt | 34 +- .../approved-plans-v1_4/q25.sf100/explain.txt | 194 +++--- .../q25.sf100/simplified.txt | 130 ++-- .../approved-plans-v1_4/q33.sf100/explain.txt | 264 ++++---- .../q33.sf100/simplified.txt | 58 +- .../approved-plans-v1_4/q52.sf100/explain.txt | 138 ++--- .../q52.sf100/simplified.txt | 26 +- .../approved-plans-v1_4/q55.sf100/explain.txt | 134 ++-- .../q55.sf100/simplified.txt | 26 +- .../approved-plans-v1_4/q72.sf100/explain.txt | 264 ++++---- .../q72.sf100/simplified.txt | 150 ++--- .../approved-plans-v1_4/q81.sf100/explain.txt | 570 +++++++++--------- .../q81.sf100/simplified.txt | 142 ++--- .../approved-plans-v1_4/q91.sf100/explain.txt | 306 +++++----- .../q91.sf100/simplified.txt | 62 +- .../q18a.sf100/explain.txt | 306 +++++----- .../q18a.sf100/simplified.txt | 54 +- .../approved-plans-v2_7/q72.sf100/explain.txt | 264 ++++---- .../q72.sf100/simplified.txt | 150 ++--- 33 files changed, 2386 insertions(+), 2374 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala index 11b675e75869e..c41686da79487 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala @@ -349,14 +349,11 @@ object JoinReorderDP extends PredicateHelper with Logging { } def betterThan(other: JoinPlan, conf: SQLConf): Boolean = { - if (other.planCost.card == 0 || other.planCost.size == 0) { - false - } else { - val relativeRows = BigDecimal(this.planCost.card) / BigDecimal(other.planCost.card) - val relativeSize = BigDecimal(this.planCost.size) / BigDecimal(other.planCost.size) - relativeRows * conf.joinReorderCardWeight + - relativeSize * (1 - conf.joinReorderCardWeight) < 1 - } + val thisCost = BigDecimal(this.planCost.card) * conf.joinReorderCardWeight + + BigDecimal(this.planCost.size) * (1 - conf.joinReorderCardWeight) + val otherCost = BigDecimal(other.planCost.card) * conf.joinReorderCardWeight + + BigDecimal(other.planCost.size) * (1 - conf.joinReorderCardWeight) + thisCost < otherCost } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala index b84207397e5cc..2e1cf4a137e25 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap} import org.apache.spark.sql.catalyst.optimizer._ +import org.apache.spark.sql.catalyst.optimizer.JoinReorderDP.JoinPlan import org.apache.spark.sql.catalyst.plans.{Cross, Inner} import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -363,4 +364,18 @@ class JoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationTestB assertEqualJoinPlans(Optimize, originalPlan3, bestPlan3) } + + test("SPARK-33935: betterThan should be consistent") { + val plan1 = JoinPlan(null, null, null, Cost(300, 80)) + val plan2 = JoinPlan(null, null, null, Cost(500, 30)) + + // cost1 = 300*0.7 + 80*0.3 = 234 + // cost2 = 500*0.7 + 30*0.3 = 359 + + assert(!plan1.betterThan(plan1, conf)) + assert(!plan2.betterThan(plan2, conf)) + + assert(plan1.betterThan(plan2, conf)) + assert(!plan2.betterThan(plan1, conf)) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala index 703be48c6a2a9..a42914765dcc8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala @@ -294,12 +294,12 @@ class StarJoinCostBasedReorderSuite extends JoinReorderPlanTestBase with StatsEs (nameToAttr("f1_fk2") === nameToAttr("d2_pk"))) val expected = - f1.join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk"))) - .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk"))) - .join(t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1"))), Inner, - Some(nameToAttr("t3_c1") === nameToAttr("t4_c1"))) + t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1"))) .join(t1.join(t2, Inner, Some(nameToAttr("t1_c1") === nameToAttr("t2_c1"))), Inner, Some(nameToAttr("t1_c2") === nameToAttr("t4_c2"))) + .join(f1 + .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk"))) + .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))) .select(outputsOf(d1, t1, t2, t3, t4, f1, d2): _*) assertEqualJoinPlans(Optimize, query, expected) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt index 8ee427262b332..327e7db702faa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt @@ -4,8 +4,8 @@ +- * HashAggregate (36) +- * Project (35) +- * BroadcastHashJoin Inner BuildRight (34) - :- * Project (28) - : +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) : :- * Project (22) : : +- * BroadcastHashJoin Inner BuildRight (21) : : :- * Project (15) @@ -27,16 +27,16 @@ : : +- * Project (19) : : +- * Filter (18) : : +- * ColumnarToRow (17) - : : +- Scan parquet default.date_dim (16) - : +- BroadcastExchange (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.store (23) + : : +- Scan parquet default.customer_address (16) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.date_dim (23) +- BroadcastExchange (33) - +- * Project (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.customer_address (29) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store (30) (1) Scan parquet default.store_sales @@ -107,94 +107,94 @@ Join condition: (((((((cd_marital_status#12 = M) AND (cd_education_status#13 = A Output [7]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] Input [13]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13, hd_demo_sk#15, hd_dep_count#16] -(16) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_year#19] +(16) Scan parquet default.customer_address +Output [3]: [ca_address_sk#18, ca_state#19, ca_country#20] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] +ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#18, d_year#19] +Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20] (18) Filter [codegen id : 3] -Input [2]: [d_date_sk#18, d_year#19] -Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) +Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20] +Condition : (((isnotnull(ca_country#20) AND (ca_country#20 = United States)) AND isnotnull(ca_address_sk#18)) AND ((ca_state#19 IN (TX,OH) OR ca_state#19 IN (OR,NM,KY)) OR ca_state#19 IN (VA,TX,MS))) (19) Project [codegen id : 3] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_year#19] +Output [2]: [ca_address_sk#18, ca_state#19] +Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20] (20) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [2]: [ca_address_sk#18, ca_state#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#18] +Join condition: ((((ca_state#19 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#19 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#19 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) (22) Project [codegen id : 6] -Output [6]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -Input [8]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, d_date_sk#18] +Output [5]: [ss_sold_date_sk#1, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [9]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#18, ca_state#19] -(23) Scan parquet default.store -Output [1]: [s_store_sk#21] +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#22, d_year#23] Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [1]: [s_store_sk#21] +Input [2]: [d_date_sk#22, d_year#23] (25) Filter [codegen id : 4] -Input [1]: [s_store_sk#21] -Condition : isnotnull(s_store_sk#21) +Input [2]: [d_date_sk#22, d_year#23] +Condition : ((isnotnull(d_year#23) AND (d_year#23 = 2001)) AND isnotnull(d_date_sk#22)) -(26) BroadcastExchange -Input [1]: [s_store_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [2]: [d_date_sk#22, d_year#23] -(27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#21] +(27) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#22] Join condition: None -(28) Project [codegen id : 6] -Output [5]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -Input [7]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#21] +(29) Project [codegen id : 6] +Output [4]: [ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, d_date_sk#22] -(29) Scan parquet default.customer_address -Output [3]: [ca_address_sk#23, ca_state#24, ca_country#25] +(30) Scan parquet default.store +Output [1]: [s_store_sk#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct -(31) Filter [codegen id : 5] -Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] -Condition : (((isnotnull(ca_country#25) AND (ca_country#25 = United States)) AND isnotnull(ca_address_sk#23)) AND ((ca_state#24 IN (TX,OH) OR ca_state#24 IN (OR,NM,KY)) OR ca_state#24 IN (VA,TX,MS))) +(31) ColumnarToRow [codegen id : 5] +Input [1]: [s_store_sk#25] -(32) Project [codegen id : 5] -Output [2]: [ca_address_sk#23, ca_state#24] -Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25] +(32) Filter [codegen id : 5] +Input [1]: [s_store_sk#25] +Condition : isnotnull(s_store_sk#25) (33) BroadcastExchange -Input [2]: [ca_address_sk#23, ca_state#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [s_store_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] (34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_addr_sk#4] -Right keys [1]: [ca_address_sk#23] -Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#25] +Join condition: None (35) Project [codegen id : 6] Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] -Input [7]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#23, ca_state#24] +Input [5]: [ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, s_store_sk#25] (36) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] @@ -205,7 +205,7 @@ Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] (37) Exchange Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] -Arguments: SinglePartition, true, [id=#41] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#41] (38) HashAggregate [codegen id : 7] Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt index b457788dbd0b2..45d6c8f3b0bae 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt @@ -5,11 +5,11 @@ WholeStageCodegen (7) WholeStageCodegen (6) HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] Project [ss_sold_date_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] Project [ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,cd_marital_status,cd_education_status] @@ -35,23 +35,23 @@ WholeStageCodegen (7) InputAdapter BroadcastExchange #4 WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_year,d_date_sk] + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] InputAdapter BroadcastExchange #5 WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen (5) - Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt index a17356ae04a03..a9ab8c3690a00 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt @@ -5,57 +5,57 @@ TakeOrderedAndProject (57) +- * HashAggregate (54) +- * Project (53) +- * SortMergeJoin Inner (52) - :- * Sort (43) - : +- Exchange (42) - : +- * Project (41) - : +- * SortMergeJoin Inner (40) - : :- * Sort (27) - : : +- Exchange (26) - : : +- * Project (25) - : : +- * SortMergeJoin Inner (24) - : : :- * Sort (18) - : : : +- Exchange (17) - : : : +- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.store (11) - : : +- * Sort (23) - : : +- Exchange (22) - : : +- * Filter (21) - : : +- * ColumnarToRow (20) - : : +- Scan parquet default.item (19) - : +- * Sort (39) - : +- Exchange (38) - : +- * Project (37) - : +- * BroadcastHashJoin Inner BuildRight (36) - : :- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.store_returns (28) - : +- BroadcastExchange (35) - : +- * Project (34) - : +- * Filter (33) - : +- * ColumnarToRow (32) - : +- Scan parquet default.date_dim (31) + :- * Sort (27) + : +- Exchange (26) + : +- * Project (25) + : +- * SortMergeJoin Inner (24) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- * Sort (23) + : +- Exchange (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.item (19) +- * Sort (51) +- Exchange (50) +- * Project (49) - +- * BroadcastHashJoin Inner BuildRight (48) - :- * Filter (46) - : +- * ColumnarToRow (45) - : +- Scan parquet default.catalog_sales (44) - +- ReusedExchange (47) + +- * SortMergeJoin Inner (48) + :- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.store_returns (28) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.date_dim (31) + +- * Sort (47) + +- Exchange (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.catalog_sales (40) + +- ReusedExchange (43) (1) Scan parquet default.store_sales @@ -132,7 +132,7 @@ Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, s (17) Exchange Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11] -Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#13] +Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13] (18) Sort [codegen id : 4] Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11] @@ -154,7 +154,7 @@ Condition : isnotnull(i_item_sk#14) (22) Exchange Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#17] +Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] (23) Sort [codegen id : 6] Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] @@ -171,7 +171,7 @@ Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s (26) Exchange Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#18] +Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#18] (27) Sort [codegen id : 8] Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16] @@ -224,89 +224,89 @@ Input [6]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_ (38) Exchange Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] -Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), true, [id=#27] +Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), ENSURE_REQUIREMENTS, [id=#27] (39) Sort [codegen id : 11] Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] -Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0 - -(40) SortMergeJoin [codegen id : 12] -Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22] -Join condition: None - -(41) Project [codegen id : 12] -Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] -Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23] - -(42) Exchange -Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] -Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), true, [id=#28] - -(43) Sort [codegen id : 13] -Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23] Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST], false, 0 -(44) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +(40) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(45) ColumnarToRow [codegen id : 15] -Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +(41) ColumnarToRow [codegen id : 13] +Input [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] + +(42) Filter [codegen id : 13] +Input [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] +Condition : ((isnotnull(cs_bill_customer_sk#29) AND isnotnull(cs_item_sk#30)) AND isnotnull(cs_sold_date_sk#28)) -(46) Filter [codegen id : 15] -Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] -Condition : ((isnotnull(cs_bill_customer_sk#30) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_sold_date_sk#29)) +(43) ReusedExchange [Reuses operator id: 35] +Output [1]: [d_date_sk#32] -(47) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#33] +(44) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#28] +Right keys [1]: [d_date_sk#32] +Join condition: None + +(45) Project [codegen id : 13] +Output [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] +Input [5]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31, d_date_sk#32] + +(46) Exchange +Input [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] +Arguments: hashpartitioning(cast(cs_bill_customer_sk#29 as bigint), cast(cs_item_sk#30 as bigint), 5), ENSURE_REQUIREMENTS, [id=#33] -(48) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [cs_sold_date_sk#29] -Right keys [1]: [d_date_sk#33] +(47) Sort [codegen id : 14] +Input [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] +Arguments: [cast(cs_bill_customer_sk#29 as bigint) ASC NULLS FIRST, cast(cs_item_sk#30 as bigint) ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin [codegen id : 15] +Left keys [2]: [sr_customer_sk#21, sr_item_sk#20] +Right keys [2]: [cast(cs_bill_customer_sk#29 as bigint), cast(cs_item_sk#30 as bigint)] Join condition: None (49) Project [codegen id : 15] -Output [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] -Input [5]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32, d_date_sk#33] +Output [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31] +Input [7]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31] (50) Exchange -Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] -Arguments: hashpartitioning(cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint), 5), true, [id=#34] +Input [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31] +Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), ENSURE_REQUIREMENTS, [id=#34] (51) Sort [codegen id : 16] -Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] -Arguments: [cast(cs_bill_customer_sk#30 as bigint) ASC NULLS FIRST, cast(cs_item_sk#31 as bigint) ASC NULLS FIRST], false, 0 +Input [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31] +Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#21, sr_item_sk#20] -Right keys [2]: [cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint)] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22] Join condition: None (53) Project [codegen id : 17] -Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] -Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32] +Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#31, s_state#11, i_item_id#15, i_item_desc#16] +Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31] (54) HashAggregate [codegen id : 17] -Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16] +Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#31, s_state#11, i_item_id#15, i_item_desc#16] Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] -Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))] +Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#31), partial_avg(cast(cs_quantity#31 as bigint)), partial_stddev_samp(cast(cs_quantity#31 as double))] Aggregate Attributes [18]: [count#35, sum#36, count#37, n#38, avg#39, m2#40, count#41, sum#42, count#43, n#44, avg#45, m2#46, count#47, sum#48, count#49, n#50, avg#51, m2#52] Results [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] (55) Exchange Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] -Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), true, [id=#71] +Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), ENSURE_REQUIREMENTS, [id=#71] (56) HashAggregate [codegen id : 18] Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70] Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11] -Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))] -Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80] -Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92] +Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#31), avg(cast(cs_quantity#31 as bigint)), stddev_samp(cast(cs_quantity#31 as double))] +Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#31)#78, avg(cast(cs_quantity#31 as bigint))#79, stddev_samp(cast(cs_quantity#31 as double))#80] +Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#31)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#31 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#31 as double))#80 / avg(cast(cs_quantity#31 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#31 as double))#80 / avg(cast(cs_quantity#31 as bigint))#79) AS catalog_sales_quantitycov#92] (57) TakeOrderedAndProject Input [15]: [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt index bfb59441f483b..79226a34e6768 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt @@ -6,67 +6,67 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s WholeStageCodegen (17) HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] - SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] InputAdapter - WholeStageCodegen (13) - Sort [sr_customer_sk,sr_item_sk] + WholeStageCodegen (8) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] InputAdapter - Exchange [sr_customer_sk,sr_item_sk] #2 - WholeStageCodegen (12) - Project [ss_quantity,s_state,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity] - SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (7) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter - WholeStageCodegen (8) - Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + WholeStageCodegen (4) + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3 - WholeStageCodegen (7) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (4) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #4 - WholeStageCodegen (3) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_quarter_name] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - InputAdapter - WholeStageCodegen (6) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #7 - WholeStageCodegen (5) - Filter [i_item_sk] + Exchange [ss_item_sk] #3 + WholeStageCodegen (3) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + InputAdapter + WholeStageCodegen (16) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #7 + WholeStageCodegen (15) + Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,cs_quantity] + SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] InputAdapter WholeStageCodegen (11) - Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + Sort [sr_customer_sk,sr_item_sk] InputAdapter - Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 + Exchange [sr_customer_sk,sr_item_sk] #8 WholeStageCodegen (10) Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] @@ -82,17 +82,17 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_quarter_name] - InputAdapter - WholeStageCodegen (16) - Sort [cs_bill_customer_sk,cs_item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #10 - WholeStageCodegen (15) - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter - ReusedExchange [d_date_sk] #9 + WholeStageCodegen (14) + Sort [cs_bill_customer_sk,cs_item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #10 + WholeStageCodegen (13) + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + InputAdapter + ReusedExchange [d_date_sk] #9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt index 516f782057631..12e95ba50cd0d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt @@ -34,24 +34,24 @@ TakeOrderedAndProject (53) +- * Sort (46) +- Exchange (45) +- * Project (44) - +- * SortMergeJoin Inner (43) - :- * Sort (37) - : +- Exchange (36) - : +- * Project (35) - : +- * BroadcastHashJoin Inner BuildRight (34) - : :- * Project (29) - : : +- * Filter (28) - : : +- * ColumnarToRow (27) - : : +- Scan parquet default.customer (26) - : +- BroadcastExchange (33) - : +- * Filter (32) - : +- * ColumnarToRow (31) - : +- Scan parquet default.customer_address (30) - +- * Sort (42) - +- Exchange (41) - +- * Filter (40) - +- * ColumnarToRow (39) - +- Scan parquet default.customer_demographics (38) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * SortMergeJoin Inner (37) + : :- * Sort (31) + : : +- Exchange (30) + : : +- * Project (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.customer (26) + : +- * Sort (36) + : +- Exchange (35) + : +- * Filter (34) + : +- * ColumnarToRow (33) + : +- Scan parquet default.customer_demographics (32) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.customer_address (39) (1) Scan parquet default.catalog_sales @@ -159,7 +159,7 @@ Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6 (24) Exchange Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] -Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#21] (25) Sort [codegen id : 5] Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] @@ -172,89 +172,89 @@ Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct -(27) ColumnarToRow [codegen id : 7] +(27) ColumnarToRow [codegen id : 6] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] -(28) Filter [codegen id : 7] +(28) Filter [codegen id : 6] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] Condition : (((c_birth_month#25 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) -(29) Project [codegen id : 7] +(29) Project [codegen id : 6] Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] -(30) Scan parquet default.customer_address -Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 6] -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] - -(32) Filter [codegen id : 6] -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Condition : (ca_state#29 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#27)) - -(33) BroadcastExchange -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] - -(34) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#24] -Right keys [1]: [ca_address_sk#27] -Join condition: None - -(35) Project [codegen id : 7] -Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] - -(36) Exchange -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32] +(30) Exchange +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27] -(37) Sort [codegen id : 8] -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +(31) Sort [codegen id : 7] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 -(38) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#33] +(32) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 9] -Input [1]: [cd_demo_sk#33] +(33) ColumnarToRow [codegen id : 8] +Input [1]: [cd_demo_sk#28] -(40) Filter [codegen id : 9] -Input [1]: [cd_demo_sk#33] -Condition : isnotnull(cd_demo_sk#33) +(34) Filter [codegen id : 8] +Input [1]: [cd_demo_sk#28] +Condition : isnotnull(cd_demo_sk#28) -(41) Exchange -Input [1]: [cd_demo_sk#33] -Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34] +(35) Exchange +Input [1]: [cd_demo_sk#28] +Arguments: hashpartitioning(cd_demo_sk#28, 5), ENSURE_REQUIREMENTS, [id=#29] -(42) Sort [codegen id : 10] -Input [1]: [cd_demo_sk#33] -Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0 +(36) Sort [codegen id : 9] +Input [1]: [cd_demo_sk#28] +Arguments: [cd_demo_sk#28 ASC NULLS FIRST], false, 0 -(43) SortMergeJoin [codegen id : 11] +(37) SortMergeJoin [codegen id : 11] Left keys [1]: [c_current_cdemo_sk#23] -Right keys [1]: [cd_demo_sk#33] +Right keys [1]: [cd_demo_sk#28] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, cd_demo_sk#28] + +(39) Scan parquet default.customer_address +Output [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 10] +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] + +(41) Filter [codegen id : 10] +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Condition : (ca_state#32 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#30)) + +(42) BroadcastExchange +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#30] Join condition: None (44) Project [codegen id : 11] -Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33] +Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] +Input [7]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] (45) Exchange -Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] +Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35] (46) Sort [codegen id : 12] -Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 (47) SortMergeJoin [codegen id : 13] @@ -263,12 +263,12 @@ Right keys [1]: [c_customer_sk#22] Join condition: None (48) Project [codegen id : 13] -Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28] -Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31] +Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] (49) Expand [codegen id : 13] -Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28] -Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] +Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31] +Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] (50) HashAggregate [codegen id : 13] Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40] @@ -279,7 +279,7 @@ Results [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_gro (51) Exchange Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Arguments: hashpartitioning(i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, 5), true, [id=#69] +Arguments: hashpartitioning(i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, 5), ENSURE_REQUIREMENTS, [id=#69] (52) HashAggregate [codegen id : 14] Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt index 8c76e7cab3310..8069d43c3451a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt @@ -54,34 +54,34 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag Exchange [c_customer_sk] #6 WholeStageCodegen (11) Project [c_customer_sk,c_birth_year,ca_county,ca_state,ca_country] - SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] - InputAdapter - WholeStageCodegen (8) - Sort [c_current_cdemo_sk] - InputAdapter - Exchange [c_current_cdemo_sk] #7 - WholeStageCodegen (7) - Project [c_customer_sk,c_current_cdemo_sk,c_birth_year,ca_county,ca_state,ca_country] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,c_birth_year] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (7) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #7 + WholeStageCodegen (6) Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] ColumnarToRow InputAdapter Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (6) - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + WholeStageCodegen (9) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #8 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] InputAdapter - WholeStageCodegen (10) - Sort [cd_demo_sk] - InputAdapter - Exchange [cd_demo_sk] #9 - WholeStageCodegen (9) - Filter [cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk] + BroadcastExchange #9 + WholeStageCodegen (10) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt index 88b5168f6049c..4627bc19f25f0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt @@ -4,248 +4,248 @@ TakeOrderedAndProject (45) +- Exchange (43) +- * HashAggregate (42) +- * Project (41) - +- * SortMergeJoin Inner (40) - :- * Sort (25) - : +- Exchange (24) - : +- * Project (23) - : +- * BroadcastHashJoin Inner BuildRight (22) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.item (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.date_dim (11) - : +- BroadcastExchange (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.store (18) - +- * Sort (39) - +- Exchange (38) - +- * Project (37) - +- * SortMergeJoin Inner (36) - :- * Sort (30) - : +- Exchange (29) - : +- * Filter (28) - : +- * ColumnarToRow (27) - : +- Scan parquet default.customer (26) - +- * Sort (35) - +- Exchange (34) - +- * Filter (33) - +- * ColumnarToRow (32) - +- Scan parquet default.customer_address (31) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (34) + : +- * SortMergeJoin Inner (33) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : :- BroadcastExchange (5) + : : : : +- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.date_dim (1) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.store_sales (6) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- * Sort (32) + : +- Exchange (31) + : +- * Project (30) + : +- * SortMergeJoin Inner (29) + : :- * Sort (23) + : : +- Exchange (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet default.customer (19) + : +- * Sort (28) + : +- Exchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (39) + +- * Project (38) + +- * Filter (37) + +- * ColumnarToRow (36) + +- Scan parquet default.item (35) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] - -(3) Filter [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5] -Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4)) - -(4) Scan parquet default.item -Output [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] - -(6) Filter [codegen id : 1] -Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 8)) AND isnotnull(i_item_sk#6)) +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct -(7) Project [codegen id : 1] -Output [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11] +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(8) BroadcastExchange -Input [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#6] -Join condition: None +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(10) Project [codegen id : 4] -Output [8]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Input [10]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +(6) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#13, d_year#14, d_moy#15] - -(13) Filter [codegen id : 2] -Input [3]: [d_date_sk#13, d_year#14, d_moy#15] -Condition : ((((isnotnull(d_moy#15) AND isnotnull(d_year#14)) AND (d_moy#15 = 11)) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct -(14) Project [codegen id : 2] -Output [1]: [d_date_sk#13] -Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +(7) ColumnarToRow +Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] -(15) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +(8) Filter +Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Condition : (((isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6)) AND isnotnull(ss_customer_sk#7)) AND isnotnull(ss_store_sk#8)) -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#13] +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] Join condition: None -(17) Project [codegen id : 4] -Output [7]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Input [9]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, d_date_sk#13] +(10) Project [codegen id : 3] +Output [4]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] +Input [6]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9] -(18) Scan parquet default.store -Output [2]: [s_store_sk#17, s_zip#18] +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_zip#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] ReadSchema: struct -(19) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#17, s_zip#18] +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_zip#11] -(20) Filter [codegen id : 3] -Input [2]: [s_store_sk#17, s_zip#18] -Condition : (isnotnull(s_zip#18) AND isnotnull(s_store_sk#17)) +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_zip#11] +Condition : (isnotnull(s_zip#11) AND isnotnull(s_store_sk#10)) -(21) BroadcastExchange -Input [2]: [s_store_sk#17, s_zip#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +(14) BroadcastExchange +Input [2]: [s_store_sk#10, s_zip#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] -(22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#17] +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#8] +Right keys [1]: [s_store_sk#10] Join condition: None -(23) Project [codegen id : 4] -Output [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] -Input [9]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_store_sk#17, s_zip#18] +(16) Project [codegen id : 3] +Output [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11] +Input [6]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9, s_store_sk#10, s_zip#11] -(24) Exchange -Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] -Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#20] +(17) Exchange +Input [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11] +Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] -(25) Sort [codegen id : 5] -Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18] -Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0 +(18) Sort [codegen id : 4] +Input [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11] +Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 -(26) Scan parquet default.customer -Output [2]: [c_customer_sk#21, c_current_addr_sk#22] +(19) Scan parquet default.customer +Output [2]: [c_customer_sk#14, c_current_addr_sk#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct -(27) ColumnarToRow [codegen id : 6] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +(20) ColumnarToRow [codegen id : 5] +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] -(28) Filter [codegen id : 6] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) +(21) Filter [codegen id : 5] +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#15)) -(29) Exchange -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Arguments: hashpartitioning(c_current_addr_sk#22, 5), true, [id=#23] +(22) Exchange +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Arguments: hashpartitioning(c_current_addr_sk#15, 5), ENSURE_REQUIREMENTS, [id=#16] -(30) Sort [codegen id : 7] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Arguments: [c_current_addr_sk#22 ASC NULLS FIRST], false, 0 +(23) Sort [codegen id : 6] +Input [2]: [c_customer_sk#14, c_current_addr_sk#15] +Arguments: [c_current_addr_sk#15 ASC NULLS FIRST], false, 0 -(31) Scan parquet default.customer_address -Output [2]: [ca_address_sk#24, ca_zip#25] +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#17, ca_zip#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] ReadSchema: struct -(32) ColumnarToRow [codegen id : 8] -Input [2]: [ca_address_sk#24, ca_zip#25] +(25) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_zip#18] -(33) Filter [codegen id : 8] -Input [2]: [ca_address_sk#24, ca_zip#25] -Condition : (isnotnull(ca_address_sk#24) AND isnotnull(ca_zip#25)) +(26) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) -(34) Exchange -Input [2]: [ca_address_sk#24, ca_zip#25] -Arguments: hashpartitioning(ca_address_sk#24, 5), true, [id=#26] +(27) Exchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: hashpartitioning(ca_address_sk#17, 5), ENSURE_REQUIREMENTS, [id=#19] -(35) Sort [codegen id : 9] -Input [2]: [ca_address_sk#24, ca_zip#25] -Arguments: [ca_address_sk#24 ASC NULLS FIRST], false, 0 +(28) Sort [codegen id : 8] +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0 -(36) SortMergeJoin [codegen id : 10] -Left keys [1]: [c_current_addr_sk#22] -Right keys [1]: [ca_address_sk#24] +(29) SortMergeJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#15] +Right keys [1]: [ca_address_sk#17] Join condition: None -(37) Project [codegen id : 10] -Output [2]: [c_customer_sk#21, ca_zip#25] -Input [4]: [c_customer_sk#21, c_current_addr_sk#22, ca_address_sk#24, ca_zip#25] +(30) Project [codegen id : 9] +Output [2]: [c_customer_sk#14, ca_zip#18] +Input [4]: [c_customer_sk#14, c_current_addr_sk#15, ca_address_sk#17, ca_zip#18] + +(31) Exchange +Input [2]: [c_customer_sk#14, ca_zip#18] +Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#20] + +(32) Sort [codegen id : 10] +Input [2]: [c_customer_sk#14, ca_zip#18] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 -(38) Exchange -Input [2]: [c_customer_sk#21, ca_zip#25] -Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#27] +(33) SortMergeJoin [codegen id : 12] +Left keys [1]: [ss_customer_sk#7] +Right keys [1]: [c_customer_sk#14] +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#11, 1, 5)) -(39) Sort [codegen id : 11] -Input [2]: [c_customer_sk#21, ca_zip#25] -Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 +(34) Project [codegen id : 12] +Output [2]: [ss_item_sk#6, ss_ext_sales_price#9] +Input [6]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11, c_customer_sk#14, ca_zip#18] -(40) SortMergeJoin [codegen id : 12] -Left keys [1]: [ss_customer_sk#3] -Right keys [1]: [c_customer_sk#21] -Join condition: NOT (substr(ca_zip#25, 1, 5) = substr(s_zip#18, 1, 5)) +(35) Scan parquet default.item +Output [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 11] +Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26] + +(37) Filter [codegen id : 11] +Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26] +Condition : ((isnotnull(i_manager_id#26) AND (i_manager_id#26 = 8)) AND isnotnull(i_item_sk#21)) + +(38) Project [codegen id : 11] +Output [5]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25] +Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26] + +(39) BroadcastExchange +Input [5]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(40) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#21] +Join condition: None (41) Project [codegen id : 12] -Output [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Input [9]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18, c_customer_sk#21, ca_zip#25] +Output [5]: [ss_ext_sales_price#9, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25] +Input [7]: [ss_item_sk#6, ss_ext_sales_price#9, i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25] (42) HashAggregate [codegen id : 12] -Input [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10] -Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Input [5]: [ss_ext_sales_price#9, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25] +Keys [4]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#9))] Aggregate Attributes [1]: [sum#28] -Results [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] +Results [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29] (43) Exchange -Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] -Arguments: hashpartitioning(i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, 5), true, [id=#30] +Input [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29] +Arguments: hashpartitioning(i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, 5), ENSURE_REQUIREMENTS, [id=#30] (44) HashAggregate [codegen id : 13] -Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29] -Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#31] -Results [5]: [i_brand_id#7 AS brand_id#32, i_brand#8 AS brand#33, i_manufact_id#9, i_manufact#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS ext_price#34] +Input [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29] +Keys [4]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#9))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#9))#31] +Results [5]: [i_brand_id#22 AS brand_id#32, i_brand#23 AS brand#33, i_manufact_id#24, i_manufact#25, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#9))#31,17,2) AS ext_price#34] (45) TakeOrderedAndProject -Input [5]: [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34] -Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#9 ASC NULLS FIRST, i_manufact#10 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34] +Input [5]: [brand_id#32, brand#33, i_manufact_id#24, i_manufact#25, ext_price#34] +Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#24 ASC NULLS FIRST, i_manufact#25 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#24, i_manufact#25, ext_price#34] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt index 05fa3f82e27df..b6441c5fe72c1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt @@ -6,71 +6,71 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] WholeStageCodegen (12) HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] - SortMergeJoin [ss_customer_sk,c_customer_sk,ca_zip,s_zip] - InputAdapter - WholeStageCodegen (5) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #2 - WholeStageCodegen (4) - Project [ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + SortMergeJoin [ss_customer_sk,c_customer_sk,ca_zip,s_zip] + InputAdapter + WholeStageCodegen (4) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #2 + WholeStageCodegen (3) + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] - Filter [i_manager_id,i_item_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #4 WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Filter [s_zip,s_store_sk] - ColumnarToRow + Filter [s_zip,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] + InputAdapter + WholeStageCodegen (10) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #5 + WholeStageCodegen (9) + Project [c_customer_sk,ca_zip] + SortMergeJoin [c_current_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (6) + Sort [c_current_addr_sk] + InputAdapter + Exchange [c_current_addr_sk] #6 + WholeStageCodegen (5) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + WholeStageCodegen (8) + Sort [ca_address_sk] InputAdapter - Scan parquet default.store [s_store_sk,s_zip] + Exchange [ca_address_sk] #7 + WholeStageCodegen (7) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] InputAdapter - WholeStageCodegen (11) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #6 - WholeStageCodegen (10) - Project [c_customer_sk,ca_zip] - SortMergeJoin [c_current_addr_sk,ca_address_sk] - InputAdapter - WholeStageCodegen (7) - Sort [c_current_addr_sk] - InputAdapter - Exchange [c_current_addr_sk] #7 - WholeStageCodegen (6) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] - InputAdapter - WholeStageCodegen (9) - Sort [ca_address_sk] - InputAdapter - Exchange [ca_address_sk] #8 - WholeStageCodegen (8) - Filter [ca_address_sk,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_zip] + BroadcastExchange #8 + WholeStageCodegen (11) + Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt index ffcf6bd4f6d47..093c4eed6cf11 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt @@ -92,7 +92,7 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, (10) Exchange Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13] (11) Sort [codegen id : 3] Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] @@ -114,7 +114,7 @@ Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) (15) Exchange Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18] +Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#18] (16) Sort [codegen id : 5] Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] @@ -189,7 +189,7 @@ Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_c (32) Exchange Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] -Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29] (33) Sort [codegen id : 9] Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] @@ -211,7 +211,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (37) Exchange Input [2]: [sr_item_sk#30, sr_ticket_number#31] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#32] (38) Sort [codegen id : 11] Input [2]: [sr_item_sk#30, sr_ticket_number#31] @@ -235,7 +235,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_ (42) Exchange Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#35] (43) HashAggregate [codegen id : 13] Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] @@ -253,7 +253,7 @@ Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty# (45) Exchange Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), ENSURE_REQUIREMENTS, [id=#42] (46) HashAggregate [codegen id : 14] Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] @@ -309,12 +309,12 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer : : : +- Exchange (64) : : : +- * Filter (63) : : : +- * ColumnarToRow (62) - : : : +- Scan parquet default.item (61) + : : : +- Scan parquet default.customer (61) : : +- * Sort (74) : : +- Exchange (73) : : +- * Filter (72) : : +- * ColumnarToRow (71) - : : +- Scan parquet default.customer (70) + : : +- Scan parquet default.item (70) : +- * Sort (83) : +- Exchange (82) : +- * Filter (81) @@ -374,88 +374,88 @@ Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (59) Exchange Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] -Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#49] (60) Sort [codegen id : 3] Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] -Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 -(61) Scan parquet default.item -Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +(61) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct (62) ColumnarToRow [codegen id : 4] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (63) Filter [codegen id : 4] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Condition : isnotnull(i_item_sk#6) +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) (64) Exchange -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#50] (65) Sort [codegen id : 5] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 (66) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] Join condition: None (67) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Output [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (68) Exchange -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] +Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#51] (69) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 -(70) Scan parquet default.customer -Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +(70) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct (71) ColumnarToRow [codegen id : 8] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (72) Filter [codegen id : 8] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : isnotnull(i_item_sk#6) (73) Exchange -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#52] (74) Sort [codegen id : 9] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 (75) SortMergeJoin [codegen id : 10] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#14] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] Join condition: None (76) Project [codegen id : 10] -Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (77) Exchange -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), ENSURE_REQUIREMENTS, [id=#53] (78) Sort [codegen id : 11] -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0 (79) Scan parquet default.customer_address @@ -474,7 +474,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (82) Exchange Input [3]: [ca_state#25, ca_zip#26, ca_country#27] -Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), true, [id=#54] +Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [id=#54] (83) Sort [codegen id : 13] Input [3]: [ca_state#25, ca_zip#26, ca_country#27] @@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (85) Project [codegen id : 14] -Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] +Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, ca_zip#26, ca_country#27] (86) Exchange -Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#55] (87) Sort [codegen id : 15] -Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 (88) Scan parquet default.store_returns @@ -513,7 +513,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (91) Exchange Input [2]: [sr_item_sk#30, sr_ticket_number#31] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#56] (92) Sort [codegen id : 17] Input [2]: [sr_item_sk#30, sr_ticket_number#31] @@ -526,7 +526,7 @@ Join condition: None (94) Project [codegen id : 18] Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, sr_item_sk#30, sr_ticket_number#31] (95) HashAggregate [codegen id : 18] Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] @@ -537,7 +537,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_ (96) Exchange Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#59] (97) HashAggregate [codegen id : 19] Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] @@ -555,7 +555,7 @@ Results [2]: [sum#63, count#64] (99) Exchange Input [2]: [sum#63, count#64] -Arguments: SinglePartition, true, [id=#65] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65] (100) HashAggregate [codegen id : 20] Input [2]: [sum#63, count#64] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt index 10f874f8f5543..7de562c5d59a1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt @@ -21,7 +21,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_ticket_number,ss_item_sk] #12 WholeStageCodegen (14) - Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,c_first_name,c_last_name,i_current_price,i_size,i_color,i_units,i_manager_id,ca_state] SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip] InputAdapter WholeStageCodegen (11) @@ -29,21 +29,21 @@ WholeStageCodegen (14) InputAdapter Exchange [c_birth_country,s_zip] #13 WholeStageCodegen (10) - Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - SortMergeJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country,i_current_price,i_size,i_color,i_units,i_manager_id] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter WholeStageCodegen (7) - Sort [ss_customer_sk] + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk] #14 + Exchange [ss_item_sk] #14 WholeStageCodegen (6) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - SortMergeJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter WholeStageCodegen (3) - Sort [ss_item_sk] + Sort [ss_customer_sk] InputAdapter - Exchange [ss_item_sk] #15 + Exchange [ss_customer_sk] #15 WholeStageCodegen (2) Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip] BroadcastHashJoin [ss_store_sk,s_store_sk] @@ -61,24 +61,24 @@ WholeStageCodegen (14) Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter WholeStageCodegen (5) - Sort [i_item_sk] + Sort [c_customer_sk] InputAdapter - Exchange [i_item_sk] #17 + Exchange [c_customer_sk] #17 WholeStageCodegen (4) - Filter [i_item_sk] + Filter [c_customer_sk,c_birth_country] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter WholeStageCodegen (9) - Sort [c_customer_sk] + Sort [i_item_sk] InputAdapter - Exchange [c_customer_sk] #18 + Exchange [i_item_sk] #18 WholeStageCodegen (8) - Filter [c_customer_sk,c_birth_country] + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter WholeStageCodegen (13) Sort [ca_country,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt index 73f36e3a9ca23..273950bed3546 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt @@ -92,7 +92,7 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, (10) Exchange Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13] (11) Sort [codegen id : 3] Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] @@ -114,7 +114,7 @@ Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) (15) Exchange Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18] +Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#18] (16) Sort [codegen id : 5] Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] @@ -189,7 +189,7 @@ Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_c (32) Exchange Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] -Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29] (33) Sort [codegen id : 9] Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25] @@ -211,7 +211,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (37) Exchange Input [2]: [sr_item_sk#30, sr_ticket_number#31] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#32] (38) Sort [codegen id : 11] Input [2]: [sr_item_sk#30, sr_ticket_number#31] @@ -235,7 +235,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_ (42) Exchange Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#35] (43) HashAggregate [codegen id : 13] Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34] @@ -253,7 +253,7 @@ Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty# (45) Exchange Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), ENSURE_REQUIREMENTS, [id=#42] (46) HashAggregate [codegen id : 14] Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41] @@ -309,12 +309,12 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer : : : +- Exchange (64) : : : +- * Filter (63) : : : +- * ColumnarToRow (62) - : : : +- Scan parquet default.item (61) + : : : +- Scan parquet default.customer (61) : : +- * Sort (74) : : +- Exchange (73) : : +- * Filter (72) : : +- * ColumnarToRow (71) - : : +- Scan parquet default.customer (70) + : : +- Scan parquet default.item (70) : +- * Sort (83) : +- Exchange (82) : +- * Filter (81) @@ -374,88 +374,88 @@ Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (59) Exchange Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] -Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#49] (60) Sort [codegen id : 3] Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23] -Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 +Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 -(61) Scan parquet default.item -Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +(61) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct (62) ColumnarToRow [codegen id : 4] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (63) Filter [codegen id : 4] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Condition : isnotnull(i_item_sk#6) +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) (64) Exchange -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50] +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#50] (65) Sort [codegen id : 5] -Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 (66) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#14] Join condition: None (67) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Output [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] (68) Exchange -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51] +Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#51] (69) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] -Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 -(70) Scan parquet default.customer -Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +(70) Scan parquet default.item +Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct (71) ColumnarToRow [codegen id : 8] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (72) Filter [codegen id : 8] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17)) +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Condition : isnotnull(i_item_sk#6) (73) Exchange -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52] +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#52] (74) Sort [codegen id : 9] -Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 (75) SortMergeJoin [codegen id : 10] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#14] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#6] Join condition: None (76) Project [codegen id : 10] -Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] (77) Exchange -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] -Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] +Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), ENSURE_REQUIREMENTS, [id=#53] (78) Sort [codegen id : 11] -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11] Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0 (79) Scan parquet default.customer_address @@ -474,7 +474,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (82) Exchange Input [3]: [ca_state#25, ca_zip#26, ca_country#27] -Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), true, [id=#54] +Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [id=#54] (83) Sort [codegen id : 13] Input [3]: [ca_state#25, ca_zip#26, ca_country#27] @@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (85) Project [codegen id : 14] -Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27] +Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] +Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, ca_zip#26, ca_country#27] (86) Exchange -Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] +Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#55] (87) Sort [codegen id : 15] -Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] +Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25] Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0 (88) Scan parquet default.store_returns @@ -513,7 +513,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30)) (91) Exchange Input [2]: [sr_item_sk#30, sr_ticket_number#31] -Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56] +Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#56] (92) Sort [codegen id : 17] Input [2]: [sr_item_sk#30, sr_ticket_number#31] @@ -526,7 +526,7 @@ Join condition: None (94) Project [codegen id : 18] Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] -Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31] +Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, sr_item_sk#30, sr_ticket_number#31] (95) HashAggregate [codegen id : 18] Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25] @@ -537,7 +537,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_ (96) Exchange Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] -Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59] +Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#59] (97) HashAggregate [codegen id : 19] Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58] @@ -555,7 +555,7 @@ Results [2]: [sum#63, count#64] (99) Exchange Input [2]: [sum#63, count#64] -Arguments: SinglePartition, true, [id=#65] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65] (100) HashAggregate [codegen id : 20] Input [2]: [sum#63, count#64] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt index 10f874f8f5543..7de562c5d59a1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt @@ -21,7 +21,7 @@ WholeStageCodegen (14) InputAdapter Exchange [ss_ticket_number,ss_item_sk] #12 WholeStageCodegen (14) - Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,c_first_name,c_last_name,i_current_price,i_size,i_color,i_units,i_manager_id,ca_state] SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip] InputAdapter WholeStageCodegen (11) @@ -29,21 +29,21 @@ WholeStageCodegen (14) InputAdapter Exchange [c_birth_country,s_zip] #13 WholeStageCodegen (10) - Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - SortMergeJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country,i_current_price,i_size,i_color,i_units,i_manager_id] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter WholeStageCodegen (7) - Sort [ss_customer_sk] + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk] #14 + Exchange [ss_item_sk] #14 WholeStageCodegen (6) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - SortMergeJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter WholeStageCodegen (3) - Sort [ss_item_sk] + Sort [ss_customer_sk] InputAdapter - Exchange [ss_item_sk] #15 + Exchange [ss_customer_sk] #15 WholeStageCodegen (2) Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip] BroadcastHashJoin [ss_store_sk,s_store_sk] @@ -61,24 +61,24 @@ WholeStageCodegen (14) Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter WholeStageCodegen (5) - Sort [i_item_sk] + Sort [c_customer_sk] InputAdapter - Exchange [i_item_sk] #17 + Exchange [c_customer_sk] #17 WholeStageCodegen (4) - Filter [i_item_sk] + Filter [c_customer_sk,c_birth_country] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter WholeStageCodegen (9) - Sort [c_customer_sk] + Sort [i_item_sk] InputAdapter - Exchange [c_customer_sk] #18 + Exchange [i_item_sk] #18 WholeStageCodegen (8) - Filter [c_customer_sk,c_birth_country] + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter WholeStageCodegen (13) Sort [ca_country,ca_zip] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt index c6dc3db869003..3100e574e60e3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt @@ -5,57 +5,57 @@ TakeOrderedAndProject (57) +- * HashAggregate (54) +- * Project (53) +- * SortMergeJoin Inner (52) - :- * Sort (43) - : +- Exchange (42) - : +- * Project (41) - : +- * SortMergeJoin Inner (40) - : :- * Sort (27) - : : +- Exchange (26) - : : +- * Project (25) - : : +- * SortMergeJoin Inner (24) - : : :- * Sort (18) - : : : +- Exchange (17) - : : : +- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.store (11) - : : +- * Sort (23) - : : +- Exchange (22) - : : +- * Filter (21) - : : +- * ColumnarToRow (20) - : : +- Scan parquet default.item (19) - : +- * Sort (39) - : +- Exchange (38) - : +- * Project (37) - : +- * BroadcastHashJoin Inner BuildRight (36) - : :- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.store_returns (28) - : +- BroadcastExchange (35) - : +- * Project (34) - : +- * Filter (33) - : +- * ColumnarToRow (32) - : +- Scan parquet default.date_dim (31) + :- * Sort (27) + : +- Exchange (26) + : +- * Project (25) + : +- * SortMergeJoin Inner (24) + : :- * Sort (18) + : : +- Exchange (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- * Sort (23) + : +- Exchange (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.item (19) +- * Sort (51) +- Exchange (50) +- * Project (49) - +- * BroadcastHashJoin Inner BuildRight (48) - :- * Filter (46) - : +- * ColumnarToRow (45) - : +- Scan parquet default.catalog_sales (44) - +- ReusedExchange (47) + +- * SortMergeJoin Inner (48) + :- * Sort (39) + : +- Exchange (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.store_returns (28) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.date_dim (31) + +- * Sort (47) + +- Exchange (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.catalog_sales (40) + +- ReusedExchange (43) (1) Scan parquet default.store_sales @@ -132,7 +132,7 @@ Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, s (17) Exchange Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13] -Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15] +Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#15] (18) Sort [codegen id : 4] Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13] @@ -154,7 +154,7 @@ Condition : isnotnull(i_item_sk#16) (22) Exchange Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] -Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#19] +Arguments: hashpartitioning(i_item_sk#16, 5), ENSURE_REQUIREMENTS, [id=#19] (23) Sort [codegen id : 6] Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18] @@ -171,7 +171,7 @@ Input [9]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, (26) Exchange Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] -Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#20] +Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#20] (27) Sort [codegen id : 8] Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] @@ -224,89 +224,89 @@ Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_ (38) Exchange Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] -Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 11] Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] -Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0 - -(40) SortMergeJoin [codegen id : 12] -Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24] -Join condition: None - -(41) Project [codegen id : 12] -Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] -Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25] - -(42) Exchange -Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] -Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31] - -(43) Sort [codegen id : 13] -Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25] Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0 -(44) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +(40) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] ReadSchema: struct -(45) ColumnarToRow [codegen id : 15] -Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +(41) ColumnarToRow [codegen id : 13] +Input [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] + +(42) Filter [codegen id : 13] +Input [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] +Condition : ((isnotnull(cs_bill_customer_sk#32) AND isnotnull(cs_item_sk#33)) AND isnotnull(cs_sold_date_sk#31)) -(46) Filter [codegen id : 15] -Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] -Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32)) +(43) ReusedExchange [Reuses operator id: 35] +Output [1]: [d_date_sk#35] -(47) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#36] +(44) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#31] +Right keys [1]: [d_date_sk#35] +Join condition: None + +(45) Project [codegen id : 13] +Output [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] +Input [5]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34, d_date_sk#35] + +(46) Exchange +Input [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] +Arguments: hashpartitioning(cast(cs_bill_customer_sk#32 as bigint), cast(cs_item_sk#33 as bigint), 5), ENSURE_REQUIREMENTS, [id=#36] -(48) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [cs_sold_date_sk#32] -Right keys [1]: [d_date_sk#36] +(47) Sort [codegen id : 14] +Input [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] +Arguments: [cast(cs_bill_customer_sk#32 as bigint) ASC NULLS FIRST, cast(cs_item_sk#33 as bigint) ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin [codegen id : 15] +Left keys [2]: [sr_customer_sk#23, sr_item_sk#22] +Right keys [2]: [cast(cs_bill_customer_sk#32 as bigint), cast(cs_item_sk#33 as bigint)] Join condition: None (49) Project [codegen id : 15] -Output [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] -Input [5]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35, d_date_sk#36] +Output [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34] +Input [7]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34] (50) Exchange -Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] -Arguments: hashpartitioning(cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint), 5), true, [id=#37] +Input [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34] +Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), ENSURE_REQUIREMENTS, [id=#37] (51) Sort [codegen id : 16] -Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] -Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0 +Input [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34] +Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#23, sr_item_sk#22] -Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24] Join condition: None (53) Project [codegen id : 17] -Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] -Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35] +Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#34, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [13]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34] (54) HashAggregate [codegen id : 17] -Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] +Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#34, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#35))] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#34))] Aggregate Attributes [3]: [sum#38, sum#39, sum#40] Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] (55) Exchange Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] -Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), true, [id=#44] +Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), ENSURE_REQUIREMENTS, [id=#44] (56) HashAggregate [codegen id : 18] Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43] Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13] -Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#35))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#35))#47] -Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50] +Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#34))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#34))#47] +Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#34))#47,17,2) AS catalog_sales_profit#50] (57) TakeOrderedAndProject Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt index ad9fa718ff2bd..9b53cdaa5dc67 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt @@ -6,67 +6,67 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales WholeStageCodegen (17) HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] - SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] InputAdapter - WholeStageCodegen (13) - Sort [sr_customer_sk,sr_item_sk] + WholeStageCodegen (8) + Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] InputAdapter - Exchange [sr_customer_sk,sr_item_sk] #2 - WholeStageCodegen (12) - Project [ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_net_loss] - SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (7) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter - WholeStageCodegen (8) - Sort [ss_customer_sk,ss_item_sk,ss_ticket_number] + WholeStageCodegen (4) + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3 - WholeStageCodegen (7) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (4) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #4 - WholeStageCodegen (3) - Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] - InputAdapter - WholeStageCodegen (6) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #7 - WholeStageCodegen (5) - Filter [i_item_sk] + Exchange [ss_item_sk] #3 + WholeStageCodegen (3) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + WholeStageCodegen (6) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #6 + WholeStageCodegen (5) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + InputAdapter + WholeStageCodegen (16) + Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #7 + WholeStageCodegen (15) + Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,cs_net_profit] + SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] InputAdapter WholeStageCodegen (11) - Sort [sr_customer_sk,sr_item_sk,sr_ticket_number] + Sort [sr_customer_sk,sr_item_sk] InputAdapter - Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8 + Exchange [sr_customer_sk,sr_item_sk] #8 WholeStageCodegen (10) Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] @@ -82,17 +82,17 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - WholeStageCodegen (16) - Sort [cs_bill_customer_sk,cs_item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #10 - WholeStageCodegen (15) - Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] InputAdapter - ReusedExchange [d_date_sk] #9 + WholeStageCodegen (14) + Sort [cs_bill_customer_sk,cs_item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #10 + WholeStageCodegen (13) + Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + InputAdapter + ReusedExchange [d_date_sk] #9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt index 8185680b58670..cb8522545f1d3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt @@ -9,8 +9,8 @@ TakeOrderedAndProject (67) : +- * HashAggregate (30) : +- * Project (29) : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) : : :- * Project (10) : : : +- * BroadcastHashJoin Inner BuildRight (9) : : : :- * Filter (3) @@ -21,21 +21,21 @@ TakeOrderedAndProject (67) : : : +- * Filter (6) : : : +- * ColumnarToRow (5) : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (20) - : : +- * BroadcastHashJoin LeftSemi BuildRight (19) - : : :- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.item (11) - : : +- BroadcastExchange (18) - : : +- * Project (17) - : : +- * Filter (16) - : : +- * ColumnarToRow (15) - : : +- Scan parquet default.item (14) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) : +- BroadcastExchange (27) - : +- * Project (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.customer_address (23) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) :- * HashAggregate (47) : +- Exchange (46) : +- * HashAggregate (45) @@ -113,108 +113,108 @@ Join condition: None Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] -(11) Scan parquet default.item -Output [2]: [i_item_sk#9, i_manufact_id#10] +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_manufact_id#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct -(12) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#9, i_manufact_id#10] +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] -(13) Filter [codegen id : 3] -Input [2]: [i_item_sk#9, i_manufact_id#10] -Condition : isnotnull(i_item_sk#9) +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] +Condition : isnotnull(i_item_sk#12) -(14) Scan parquet default.item -Output [2]: [i_category#11, i_manufact_id#10] +(21) Scan parquet default.item +Output [2]: [i_category#14, i_manufact_id#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)] ReadSchema: struct -(15) ColumnarToRow [codegen id : 2] -Input [2]: [i_category#11, i_manufact_id#10] +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] -(16) Filter [codegen id : 2] -Input [2]: [i_category#11, i_manufact_id#10] -Condition : (isnotnull(i_category#11) AND (i_category#11 = Electronics)) +(23) Filter [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Electronics)) -(17) Project [codegen id : 2] -Output [1]: [i_manufact_id#10 AS i_manufact_id#10#12] -Input [2]: [i_category#11, i_manufact_id#10] +(24) Project [codegen id : 3] +Output [1]: [i_manufact_id#13 AS i_manufact_id#13#15] +Input [2]: [i_category#14, i_manufact_id#13] -(18) BroadcastExchange -Input [1]: [i_manufact_id#10#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +(25) BroadcastExchange +Input [1]: [i_manufact_id#13#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] -(19) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [i_manufact_id#10] -Right keys [1]: [i_manufact_id#10#12] +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_manufact_id#13] +Right keys [1]: [i_manufact_id#13#15] Join condition: None -(20) BroadcastExchange -Input [2]: [i_item_sk#9, i_manufact_id#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#9] -Join condition: None - -(22) Project [codegen id : 5] -Output [3]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10] -Input [5]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, i_item_sk#9, i_manufact_id#10] - -(23) Scan parquet default.customer_address -Output [2]: [ca_address_sk#15, ca_gmt_offset#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#15, ca_gmt_offset#16] - -(25) Filter [codegen id : 4] -Input [2]: [ca_address_sk#15, ca_gmt_offset#16] -Condition : ((isnotnull(ca_gmt_offset#16) AND (ca_gmt_offset#16 = -5.00)) AND isnotnull(ca_address_sk#15)) - -(26) Project [codegen id : 4] -Output [1]: [ca_address_sk#15] -Input [2]: [ca_address_sk#15, ca_gmt_offset#16] - (27) BroadcastExchange -Input [1]: [ca_address_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [2]: [i_item_sk#12, i_manufact_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#15] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] Join condition: None (29) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#4, i_manufact_id#10] -Input [4]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10, ca_address_sk#15] +Output [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_manufact_id#13] (30) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#4, i_manufact_id#10] -Keys [1]: [i_manufact_id#10] +Input [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] Aggregate Attributes [1]: [sum#18] -Results [2]: [i_manufact_id#10, sum#19] +Results [2]: [i_manufact_id#13, sum#19] (31) Exchange -Input [2]: [i_manufact_id#10, sum#19] -Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#20] +Input [2]: [i_manufact_id#13, sum#19] +Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#20] (32) HashAggregate [codegen id : 6] -Input [2]: [i_manufact_id#10, sum#19] -Keys [1]: [i_manufact_id#10] +Input [2]: [i_manufact_id#13, sum#19] +Keys [1]: [i_manufact_id#13] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] -Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] (33) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] @@ -242,47 +242,47 @@ Join condition: None Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] -(39) ReusedExchange [Reuses operator id: 20] -Output [2]: [i_item_sk#9, i_manufact_id#10] +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] (40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#25] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] Join condition: None (41) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10] -Input [5]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#9, i_manufact_id#10] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] (42) ReusedExchange [Reuses operator id: 27] -Output [1]: [ca_address_sk#15] +Output [2]: [i_item_sk#12, i_manufact_id#13] (43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#24] -Right keys [1]: [ca_address_sk#15] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] Join condition: None (44) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#26, i_manufact_id#10] -Input [4]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10, ca_address_sk#15] +Output [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_manufact_id#13] (45) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#26, i_manufact_id#10] -Keys [1]: [i_manufact_id#10] +Input [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] Aggregate Attributes [1]: [sum#27] -Results [2]: [i_manufact_id#10, sum#28] +Results [2]: [i_manufact_id#13, sum#28] (46) Exchange -Input [2]: [i_manufact_id#10, sum#28] -Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#29] +Input [2]: [i_manufact_id#13, sum#28] +Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#29] (47) HashAggregate [codegen id : 12] -Input [2]: [i_manufact_id#10, sum#28] -Keys [1]: [i_manufact_id#10] +Input [2]: [i_manufact_id#13, sum#28] +Keys [1]: [i_manufact_id#13] Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] -Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] (48) Scan parquet default.web_sales Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] @@ -310,69 +310,69 @@ Join condition: None Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] -(54) ReusedExchange [Reuses operator id: 20] -Output [2]: [i_item_sk#9, i_manufact_id#10] +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] (55) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#33] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] Join condition: None (56) Project [codegen id : 17] -Output [3]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10] -Input [5]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, i_item_sk#9, i_manufact_id#10] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] (57) ReusedExchange [Reuses operator id: 27] -Output [1]: [ca_address_sk#15] +Output [2]: [i_item_sk#12, i_manufact_id#13] (58) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#34] -Right keys [1]: [ca_address_sk#15] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] Join condition: None (59) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#35, i_manufact_id#10] -Input [4]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10, ca_address_sk#15] +Output [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_manufact_id#13] (60) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#35, i_manufact_id#10] -Keys [1]: [i_manufact_id#10] +Input [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] Aggregate Attributes [1]: [sum#36] -Results [2]: [i_manufact_id#10, sum#37] +Results [2]: [i_manufact_id#13, sum#37] (61) Exchange -Input [2]: [i_manufact_id#10, sum#37] -Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#38] +Input [2]: [i_manufact_id#13, sum#37] +Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#38] (62) HashAggregate [codegen id : 18] -Input [2]: [i_manufact_id#10, sum#37] -Keys [1]: [i_manufact_id#10] +Input [2]: [i_manufact_id#13, sum#37] +Keys [1]: [i_manufact_id#13] Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] -Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] (63) Union (64) HashAggregate [codegen id : 19] -Input [2]: [i_manufact_id#10, total_sales#22] -Keys [1]: [i_manufact_id#10] +Input [2]: [i_manufact_id#13, total_sales#22] +Keys [1]: [i_manufact_id#13] Functions [1]: [partial_sum(total_sales#22)] Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [3]: [i_manufact_id#10, sum#43, isEmpty#44] +Results [3]: [i_manufact_id#13, sum#43, isEmpty#44] (65) Exchange -Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] -Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#45] +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#45] (66) HashAggregate [codegen id : 20] -Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] -Keys [1]: [i_manufact_id#10] +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Keys [1]: [i_manufact_id#13] Functions [1]: [sum(total_sales#22)] Aggregate Attributes [1]: [sum(total_sales#22)#46] -Results [2]: [i_manufact_id#10, sum(total_sales#22)#46 AS total_sales#47] +Results [2]: [i_manufact_id#13, sum(total_sales#22)#46 AS total_sales#47] (67) TakeOrderedAndProject -Input [2]: [i_manufact_id#10, total_sales#47] -Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#10, total_sales#47] +Input [2]: [i_manufact_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#13, total_sales#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt index 410def2466e1a..14787f0bbce7b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt @@ -14,9 +14,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id] WholeStageCodegen (5) HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] @@ -33,28 +33,28 @@ TakeOrderedAndProject [total_sales,i_manufact_id] Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - BroadcastHashJoin [i_manufact_id,i_manufact_id] - Filter [i_item_sk] + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Project [i_manufact_id] - Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_category,i_manufact_id] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter - BroadcastExchange #6 + BroadcastExchange #5 WholeStageCodegen (4) - Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_manufact_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_manufact_id] WholeStageCodegen (12) HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] InputAdapter @@ -62,9 +62,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id] WholeStageCodegen (11) HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] Project [cs_ext_sales_price,i_manufact_id] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] @@ -74,9 +74,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id] InputAdapter ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [ca_address_sk] #6 + ReusedExchange [i_item_sk,i_manufact_id] #5 WholeStageCodegen (18) HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] InputAdapter @@ -84,9 +84,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id] WholeStageCodegen (17) HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] Project [ws_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] @@ -96,6 +96,6 @@ TakeOrderedAndProject [total_sales,i_manufact_id] InputAdapter ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [ca_address_sk] #6 + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt index d7a8c103285cb..6492918d3aa13 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt @@ -6,117 +6,117 @@ TakeOrderedAndProject (21) +- * Project (17) +- * BroadcastHashJoin Inner BuildRight (16) :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.item (4) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) +- BroadcastExchange (15) +- * Project (14) +- * Filter (13) +- * ColumnarToRow (12) - +- Scan parquet default.date_dim (11) + +- Scan parquet default.item (11) -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(3) Filter [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) -(4) Scan parquet default.item -Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] -ReadSchema: struct +(4) Project [codegen id : 1] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(5) ColumnarToRow [codegen id : 1] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +(5) BroadcastExchange +Input [2]: [d_date_sk#1, d_year#2] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] -(6) Filter [codegen id : 1] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] -Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 1)) AND isnotnull(i_item_sk#4)) +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct -(7) Project [codegen id : 1] -Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] -(8) BroadcastExchange -Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Condition : (isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6)) (9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#4] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] Join condition: None (10) Project [codegen id : 3] -Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6] +Output [3]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (13) Filter [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9)) +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) (14) Project [codegen id : 2] -Output [2]: [d_date_sk#9, d_year#10] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (15) BroadcastExchange -Input [2]: [d_date_sk#9, d_year#10] +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#9] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Input [6]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9, d_year#10] +Output [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Input [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] (19) Exchange -Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] -Arguments: hashpartitioning(d_year#10, i_brand#6, i_brand_id#5, 5), true, [id=#15] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14] -Keys [3]: [d_year#10, i_brand#6, i_brand_id#5] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] -Results [4]: [d_year#10, i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19] (21) TakeOrderedAndProject -Input [4]: [d_year#10, brand_id#17, brand#18, ext_price#19] -Arguments: 100, [d_year#10 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#10, brand_id#17, brand#18, ext_price#19] +Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt index 8ed500d84390c..f4aaf3df75135 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt @@ -6,26 +6,26 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand] WholeStageCodegen (3) HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) - Project [d_date_sk,d_year] - Filter [d_moy,d_year,d_date_sk] + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt index a1257cd292e48..b8d8aa358d532 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt @@ -6,115 +6,115 @@ TakeOrderedAndProject (21) +- * Project (17) +- * BroadcastHashJoin Inner BuildRight (16) :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.item (4) + : +- * BroadcastHashJoin Inner BuildLeft (9) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet default.store_sales (6) +- BroadcastExchange (15) +- * Project (14) +- * Filter (13) +- * ColumnarToRow (12) - +- Scan parquet default.date_dim (11) + +- Scan parquet default.item (11) -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +(2) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(3) Filter [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) +(3) Filter [codegen id : 1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) -(4) Scan parquet default.item -Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] -ReadSchema: struct +(4) Project [codegen id : 1] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -(5) ColumnarToRow [codegen id : 1] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +(5) BroadcastExchange +Input [1]: [d_date_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] -(6) Filter [codegen id : 1] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] -Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 28)) AND isnotnull(i_item_sk#4)) +(6) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct -(7) Project [codegen id : 1] -Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] -Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7] +(7) ColumnarToRow +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] -(8) BroadcastExchange -Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +(8) Filter +Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] +Condition : (isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6)) (9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#4] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#5] Join condition: None (10) Project [codegen id : 3] -Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6] +Output [2]: [ss_item_sk#6, ss_ext_sales_price#7] +Input [4]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7] -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (13) Filter [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8)) (14) Project [codegen id : 2] -Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (15) BroadcastExchange -Input [1]: [d_date_sk#9] +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#9] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Input [5]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9] +Output [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10] (18) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6] -Keys [2]: [i_brand#6, i_brand_id#5] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Input [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] Aggregate Attributes [1]: [sum#13] -Results [3]: [i_brand#6, i_brand_id#5, sum#14] +Results [3]: [i_brand#10, i_brand_id#9, sum#14] (19) Exchange -Input [3]: [i_brand#6, i_brand_id#5, sum#14] -Arguments: hashpartitioning(i_brand#6, i_brand_id#5, 5), true, [id=#15] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] (20) HashAggregate [codegen id : 4] -Input [3]: [i_brand#6, i_brand_id#5, sum#14] -Keys [2]: [i_brand#6, i_brand_id#5] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] -Results [3]: [i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16] +Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19] (21) TakeOrderedAndProject Input [3]: [brand_id#17, brand#18, ext_price#19] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt index b0d0e0d809441..4f375c80678e8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt @@ -6,26 +6,26 @@ TakeOrderedAndProject [ext_price,brand_id,brand] WholeStageCodegen (3) HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt index 3f8106c96379a..3007b11a1a860 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt @@ -11,60 +11,60 @@ TakeOrderedAndProject (79) : +- * BroadcastHashJoin LeftOuter BuildRight (65) : :- * Project (60) : : +- * SortMergeJoin Inner (59) - : : :- * Sort (47) - : : : +- Exchange (46) - : : : +- * Project (45) - : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : :- * Project (32) - : : : : +- * SortMergeJoin Inner (31) - : : : : :- * Sort (25) - : : : : : +- Exchange (24) - : : : : : +- * Project (23) - : : : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : : : :- * Project (17) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : : : :- * Project (10) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : : : :- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : +- BroadcastExchange (8) - : : : : : : : +- * Project (7) - : : : : : : : +- * Filter (6) - : : : : : : : +- * ColumnarToRow (5) - : : : : : : : +- Scan parquet default.household_demographics (4) - : : : : : : +- BroadcastExchange (15) - : : : : : : +- * Project (14) - : : : : : : +- * Filter (13) - : : : : : : +- * ColumnarToRow (12) - : : : : : : +- Scan parquet default.customer_demographics (11) - : : : : : +- BroadcastExchange (21) - : : : : : +- * Filter (20) - : : : : : +- * ColumnarToRow (19) - : : : : : +- Scan parquet default.date_dim (18) - : : : : +- * Sort (30) - : : : : +- Exchange (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.item (26) - : : : +- BroadcastExchange (43) - : : : +- * Project (42) - : : : +- * BroadcastHashJoin Inner BuildLeft (41) - : : : :- BroadcastExchange (37) - : : : : +- * Project (36) - : : : : +- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.date_dim (33) - : : : +- * Filter (40) - : : : +- * ColumnarToRow (39) - : : : +- Scan parquet default.date_dim (38) + : : :- * Sort (34) + : : : +- Exchange (33) + : : : +- * Project (32) + : : : +- * SortMergeJoin Inner (31) + : : : :- * Sort (25) + : : : : +- Exchange (24) + : : : : +- * Project (23) + : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.household_demographics (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.customer_demographics (11) + : : : : +- BroadcastExchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.date_dim (18) + : : : +- * Sort (30) + : : : +- Exchange (29) + : : : +- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.item (26) : : +- * Sort (58) : : +- Exchange (57) : : +- * Project (56) : : +- * BroadcastHashJoin Inner BuildRight (55) - : : :- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.inventory (48) + : : :- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildLeft (49) + : : : :- BroadcastExchange (45) + : : : : +- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildLeft (43) + : : : : :- BroadcastExchange (39) + : : : : : +- * Project (38) + : : : : : +- * Filter (37) + : : : : : +- * ColumnarToRow (36) + : : : : : +- Scan parquet default.date_dim (35) + : : : : +- * Filter (42) + : : : : +- * ColumnarToRow (41) + : : : : +- Scan parquet default.date_dim (40) + : : : +- * Filter (48) + : : : +- * ColumnarToRow (47) + : : : +- Scan parquet default.inventory (46) : : +- BroadcastExchange (54) : : +- * Filter (53) : : +- * ColumnarToRow (52) @@ -185,7 +185,7 @@ Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, c (24) Exchange Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] -Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18] +Arguments: hashpartitioning(cs_item_sk#5, 5), ENSURE_REQUIREMENTS, [id=#18] (25) Sort [codegen id : 5] Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] @@ -207,101 +207,101 @@ Condition : isnotnull(i_item_sk#19) (29) Exchange Input [2]: [i_item_sk#19, i_item_desc#20] -Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21] +Arguments: hashpartitioning(i_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21] (30) Sort [codegen id : 7] Input [2]: [i_item_sk#19, i_item_desc#20] Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 -(31) SortMergeJoin [codegen id : 10] +(31) SortMergeJoin [codegen id : 8] Left keys [1]: [cs_item_sk#5] Right keys [1]: [i_item_sk#19] Join condition: None -(32) Project [codegen id : 10] +(32) Project [codegen id : 8] Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20] -(33) Scan parquet default.date_dim -Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(33) Exchange +Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Arguments: hashpartitioning(cs_item_sk#5, cs_sold_date_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] + +(34) Sort [codegen id : 9] +Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_sold_date_sk#1 ASC NULLS FIRST], false, 0 + +(35) Scan parquet default.date_dim +Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct -(34) ColumnarToRow [codegen id : 8] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(36) ColumnarToRow [codegen id : 10] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] -(35) Filter [codegen id : 8] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) +(37) Filter [codegen id : 10] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] +Condition : ((((isnotnull(d_year#26) AND (d_year#26 = 1999)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24)) -(36) Project [codegen id : 8] -Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(38) Project [codegen id : 10] +Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] -(37) BroadcastExchange -Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26] +(39) BroadcastExchange +Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#27] -(38) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_week_seq#28] +(40) Scan parquet default.date_dim +Output [2]: [d_date_sk#28, d_week_seq#29] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct -(39) ColumnarToRow -Input [2]: [d_date_sk#27, d_week_seq#28] +(41) ColumnarToRow +Input [2]: [d_date_sk#28, d_week_seq#29] -(40) Filter -Input [2]: [d_date_sk#27, d_week_seq#28] -Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27)) +(42) Filter +Input [2]: [d_date_sk#28, d_week_seq#29] +Condition : (isnotnull(d_week_seq#29) AND isnotnull(d_date_sk#28)) -(41) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_week_seq#24] -Right keys [1]: [d_week_seq#28] +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#25] +Right keys [1]: [d_week_seq#29] Join condition: None -(42) Project [codegen id : 9] -Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] -Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28] - -(43) BroadcastExchange -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] - -(44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#22] -Join condition: (d_date#16 > d_date#23 + 5 days) - -(45) Project [codegen id : 10] -Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +(44) Project [codegen id : 11] +Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28] +Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, d_week_seq#29] -(46) Exchange -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30] +(45) BroadcastExchange +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#30] -(47) Sort [codegen id : 11] -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0 - -(48) Scan parquet default.inventory +(46) Scan parquet default.inventory Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Batched: true Location [not included in comparison]/{warehouse_dir}/inventory] PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] ReadSchema: struct -(49) ColumnarToRow [codegen id : 13] +(47) ColumnarToRow Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] -(50) Filter [codegen id : 13] +(48) Filter Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31)) +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [d_date_sk#28] +Right keys [1]: [inv_date_sk#31] +Join condition: None + +(50) Project [codegen id : 13] +Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] +Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] + (51) Scan parquet default.warehouse Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] Batched: true @@ -326,25 +326,25 @@ Right keys [1]: [w_warehouse_sk#35] Join condition: None (56) Project [codegen id : 13] -Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] +Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] (57) Exchange -Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38] +Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: hashpartitioning(inv_item_sk#32, d_date_sk#23, 5), ENSURE_REQUIREMENTS, [id=#38] (58) Sort [codegen id : 14] -Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 +Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: [inv_item_sk#32 ASC NULLS FIRST, d_date_sk#23 ASC NULLS FIRST], false, 0 (59) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#5, d_date_sk#27] -Right keys [2]: [inv_item_sk#32, inv_date_sk#31] -Join condition: (inv_quantity_on_hand#34 < cs_quantity#8) +Left keys [2]: [cs_item_sk#5, cs_sold_date_sk#1] +Right keys [2]: [inv_item_sk#32, d_date_sk#23] +Join condition: ((inv_quantity_on_hand#34 < cs_quantity#8) AND (d_date#16 > d_date#24 + 5 days)) (60) Project [codegen id : 16] -Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] (61) Scan parquet default.promotion Output [1]: [p_promo_sk#39] @@ -370,15 +370,15 @@ Right keys [1]: [p_promo_sk#39] Join condition: None (66) Project [codegen id : 16] -Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, p_promo_sk#39] (67) Exchange -Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), ENSURE_REQUIREMENTS, [id=#41] (68) Sort [codegen id : 17] -Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0 (69) Scan parquet default.catalog_returns @@ -397,7 +397,7 @@ Condition : (isnotnull(cr_item_sk#42) AND isnotnull(cr_order_number#43)) (72) Exchange Input [2]: [cr_item_sk#42, cr_order_number#43] -Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44] +Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), ENSURE_REQUIREMENTS, [id=#44] (73) Sort [codegen id : 19] Input [2]: [cr_item_sk#42, cr_order_number#43] @@ -409,28 +409,28 @@ Right keys [2]: [cr_item_sk#42, cr_order_number#43] Join condition: None (75) Project [codegen id : 20] -Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43] +Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, cr_item_sk#42, cr_order_number#43] (76) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] +Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#45] -Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] (77) Exchange -Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] -Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#47] (78) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] -Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#48] -Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] +Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] (79) TakeOrderedAndProject -Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] -Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] +Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt index 918508787c4b0..b88505ad7b9bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt @@ -16,95 +16,95 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] BroadcastHashJoin [cs_promo_sk,p_promo_sk] Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] - SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity] + SortMergeJoin [cs_item_sk,cs_sold_date_sk,inv_item_sk,d_date_sk,inv_quantity_on_hand,cs_quantity,d_date,d_date] InputAdapter - WholeStageCodegen (11) - Sort [cs_item_sk,d_date_sk] + WholeStageCodegen (9) + Sort [cs_item_sk,cs_sold_date_sk] InputAdapter - Exchange [cs_item_sk,d_date_sk] #3 - WholeStageCodegen (10) - Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date] - Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] - SortMergeJoin [cs_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #4 - WholeStageCodegen (4) - Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + Exchange [cs_item_sk,cs_sold_date_sk] #3 + WholeStageCodegen (8) + Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (4) + Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Project [cd_demo_sk] - Filter [cd_marital_status,cd_demo_sk] + BroadcastExchange #5 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (3) - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #8 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] + BroadcastExchange #6 + WholeStageCodegen (2) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - BroadcastExchange #9 - WholeStageCodegen (9) - Project [d_date_sk,d_date,d_week_seq,d_date_sk] - BroadcastHashJoin [d_week_seq,d_week_seq] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (8) - Project [d_date_sk,d_date,d_week_seq] - Filter [d_year,d_date_sk,d_week_seq,d_date] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] - Filter [d_week_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] InputAdapter WholeStageCodegen (14) - Sort [inv_item_sk,inv_date_sk] + Sort [inv_item_sk,d_date_sk] InputAdapter - Exchange [inv_item_sk,inv_date_sk] #11 + Exchange [inv_item_sk,d_date_sk] #9 WholeStageCodegen (13) - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow + Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastExchange #10 + WholeStageCodegen (11) + Project [d_date_sk,d_date,d_week_seq,d_date_sk] + BroadcastHashJoin [d_week_seq,d_week_seq] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #12 WholeStageCodegen (12) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt index 6e757528a3e68..6813696266ac5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt @@ -1,343 +1,343 @@ == Physical Plan == TakeOrderedAndProject (61) +- * Project (60) - +- * BroadcastHashJoin Inner BuildRight (59) - :- * Project (38) - : +- * SortMergeJoin Inner (37) - : :- * Sort (11) - : : +- Exchange (10) - : : +- * Project (9) - : : +- * BroadcastHashJoin Inner BuildRight (8) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.customer (1) - : : +- BroadcastExchange (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.customer_address (4) - : +- * Sort (36) - : +- Exchange (35) - : +- * Filter (34) - : +- * HashAggregate (33) - : +- Exchange (32) - : +- * HashAggregate (31) - : +- * Project (30) - : +- * SortMergeJoin Inner (29) - : :- * Sort (23) - : : +- Exchange (22) - : : +- * Project (21) - : : +- * BroadcastHashJoin Inner BuildRight (20) - : : :- * Filter (14) - : : : +- * ColumnarToRow (13) - : : : +- Scan parquet default.catalog_returns (12) - : : +- BroadcastExchange (19) - : : +- * Project (18) - : : +- * Filter (17) - : : +- * ColumnarToRow (16) - : : +- Scan parquet default.date_dim (15) - : +- * Sort (28) - : +- Exchange (27) - : +- * Filter (26) - : +- * ColumnarToRow (25) - : +- Scan parquet default.customer_address (24) - +- BroadcastExchange (58) - +- * Filter (57) - +- * HashAggregate (56) - +- Exchange (55) - +- * HashAggregate (54) - +- * HashAggregate (53) - +- Exchange (52) - +- * HashAggregate (51) - +- * Project (50) - +- * SortMergeJoin Inner (49) - :- * Sort (46) - : +- Exchange (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Filter (41) - : : +- * ColumnarToRow (40) - : : +- Scan parquet default.catalog_returns (39) - : +- ReusedExchange (42) - +- * Sort (48) - +- ReusedExchange (47) - - -(1) Scan parquet default.customer -Output [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] + +- * SortMergeJoin Inner (59) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (23) + : : +- * HashAggregate (22) + : : +- Exchange (21) + : : +- * HashAggregate (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer_address (13) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * HashAggregate (41) + : +- Exchange (40) + : +- * HashAggregate (39) + : +- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * Project (35) + : +- * SortMergeJoin Inner (34) + : :- * Sort (31) + : : +- Exchange (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet default.catalog_returns (24) + : : +- ReusedExchange (27) + : +- * Sort (33) + : +- ReusedExchange (32) + +- * Sort (58) + +- Exchange (57) + +- * Project (56) + +- * BroadcastHashJoin Inner BuildRight (55) + :- * Filter (50) + : +- * ColumnarToRow (49) + : +- Scan parquet default.customer (48) + +- BroadcastExchange (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.customer_address (51) + + +(1) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct (2) ColumnarToRow [codegen id : 2] -Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] (3) Filter [codegen id : 2] -Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : ((isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) AND isnotnull(cr_returning_customer_sk#2)) -(4) Scan parquet default.customer_address -Output [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct (5) ColumnarToRow [codegen id : 1] -Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +Input [2]: [d_date_sk#5, d_year#6] (6) Filter [codegen id : 1] -Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Condition : ((isnotnull(ca_state#14) AND (ca_state#14 = GA)) AND isnotnull(ca_address_sk#7)) +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] -(7) BroadcastExchange -Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] -(8) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#7] +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] Join condition: None -(9) Project [codegen id : 2] -Output [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Input [18]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6, ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] +(10) Project [codegen id : 2] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] -(10) Exchange -Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#20] +(11) Exchange +Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Arguments: hashpartitioning(cr_returning_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#8] -(11) Sort [codegen id : 3] -Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 +(12) Sort [codegen id : 3] +Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Arguments: [cr_returning_addr_sk#3 ASC NULLS FIRST], false, 0 -(12) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +(13) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_state#10] Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 5] -Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct -(14) Filter [codegen id : 5] -Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Condition : ((isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23)) AND isnotnull(cr_returning_customer_sk#22)) +(14) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_state#10] -(15) Scan parquet default.date_dim -Output [2]: [d_date_sk#25, d_year#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct +(15) Filter [codegen id : 4] +Input [2]: [ca_address_sk#9, ca_state#10] +Condition : (isnotnull(ca_address_sk#9) AND isnotnull(ca_state#10)) -(16) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#25, d_year#26] +(16) Exchange +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: hashpartitioning(ca_address_sk#9, 5), ENSURE_REQUIREMENTS, [id=#11] -(17) Filter [codegen id : 4] -Input [2]: [d_date_sk#25, d_year#26] -Condition : ((isnotnull(d_year#26) AND (d_year#26 = 2000)) AND isnotnull(d_date_sk#25)) +(17) Sort [codegen id : 5] +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0 -(18) Project [codegen id : 4] -Output [1]: [d_date_sk#25] -Input [2]: [d_date_sk#25, d_year#26] +(18) SortMergeJoin [codegen id : 6] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None -(19) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +(19) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#9, ca_state#10] + +(20) HashAggregate [codegen id : 6] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10] +Keys [2]: [cr_returning_customer_sk#2, ca_state#10] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#12] +Results [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13] + +(21) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#10, 5), ENSURE_REQUIREMENTS, [id=#14] + +(22) HashAggregate [codegen id : 15] +Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13] +Keys [2]: [cr_returning_customer_sk#2, ca_state#10] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#15] +Results [3]: [cr_returning_customer_sk#2 AS ctr_customer_sk#16, ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#15,17,2) AS ctr_total_return#18] + +(23) Filter [codegen id : 15] +Input [3]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18] +Condition : isnotnull(ctr_total_return#18) + +(24) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct -(20) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cr_returned_date_sk#21] -Right keys [1]: [d_date_sk#25] -Join condition: None +(25) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -(21) Project [codegen id : 5] -Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25] +(26) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : (isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) -(22) Exchange -Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#28] +(27) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] -(23) Sort [codegen id : 6] -Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0 +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None -(24) Scan parquet default.customer_address -Output [2]: [ca_address_sk#7, ca_state#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] -ReadSchema: struct +(29) Project [codegen id : 8] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] -(25) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#7, ca_state#14] +(30) Exchange +Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Arguments: hashpartitioning(cr_returning_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#19] -(26) Filter [codegen id : 7] -Input [2]: [ca_address_sk#7, ca_state#14] -Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#14)) +(31) Sort [codegen id : 9] +Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Arguments: [cr_returning_addr_sk#3 ASC NULLS FIRST], false, 0 -(27) Exchange -Input [2]: [ca_address_sk#7, ca_state#14] -Arguments: hashpartitioning(ca_address_sk#7, 5), true, [id=#29] +(32) ReusedExchange [Reuses operator id: 16] +Output [2]: [ca_address_sk#9, ca_state#10] -(28) Sort [codegen id : 8] -Input [2]: [ca_address_sk#7, ca_state#14] -Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0 +(33) Sort [codegen id : 11] +Input [2]: [ca_address_sk#9, ca_state#10] +Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0 -(29) SortMergeJoin [codegen id : 9] -Left keys [1]: [cr_returning_addr_sk#23] -Right keys [1]: [ca_address_sk#7] +(34) SortMergeJoin [codegen id : 12] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#9] Join condition: None -(30) Project [codegen id : 9] -Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] -Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14] - -(31) HashAggregate [codegen id : 9] -Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] -Keys [2]: [cr_returning_customer_sk#22, ca_state#14] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))] -Aggregate Attributes [1]: [sum#30] -Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] - -(32) Exchange -Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] -Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#32] - -(33) HashAggregate [codegen id : 10] -Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31] -Keys [2]: [cr_returning_customer_sk#22, ca_state#14] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#33] -Results [3]: [cr_returning_customer_sk#22 AS ctr_customer_sk#34, ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#33,17,2) AS ctr_total_return#36] - -(34) Filter [codegen id : 10] -Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] -Condition : isnotnull(ctr_total_return#36) - -(35) Exchange -Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] -Arguments: hashpartitioning(ctr_customer_sk#34, 5), true, [id=#37] - -(36) Sort [codegen id : 11] -Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] -Arguments: [ctr_customer_sk#34 ASC NULLS FIRST], false, 0 - -(37) SortMergeJoin [codegen id : 20] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ctr_customer_sk#34] -Join condition: None +(35) Project [codegen id : 12] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#9, ca_state#10] + +(36) HashAggregate [codegen id : 12] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10] +Keys [2]: [cr_returning_customer_sk#2, ca_state#10] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#20] +Results [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21] + +(37) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#10, 5), ENSURE_REQUIREMENTS, [id=#22] + +(38) HashAggregate [codegen id : 13] +Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21] +Keys [2]: [cr_returning_customer_sk#2, ca_state#10] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#23] +Results [2]: [ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#23,17,2) AS ctr_total_return#18] + +(39) HashAggregate [codegen id : 13] +Input [2]: [ctr_state#17, ctr_total_return#18] +Keys [1]: [ctr_state#17] +Functions [1]: [partial_avg(ctr_total_return#18)] +Aggregate Attributes [2]: [sum#24, count#25] +Results [3]: [ctr_state#17, sum#26, count#27] + +(40) Exchange +Input [3]: [ctr_state#17, sum#26, count#27] +Arguments: hashpartitioning(ctr_state#17, 5), ENSURE_REQUIREMENTS, [id=#28] + +(41) HashAggregate [codegen id : 14] +Input [3]: [ctr_state#17, sum#26, count#27] +Keys [1]: [ctr_state#17] +Functions [1]: [avg(ctr_total_return#18)] +Aggregate Attributes [1]: [avg(ctr_total_return#18)#29] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#18)#29) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17 AS ctr_state#17#31] + +(42) Filter [codegen id : 14] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30) + +(43) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#32] + +(44) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [ctr_state#17] +Right keys [1]: [ctr_state#17#31] +Join condition: (cast(ctr_total_return#18 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30) + +(45) Project [codegen id : 15] +Output [2]: [ctr_customer_sk#16, ctr_total_return#18] +Input [5]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31] + +(46) Exchange +Input [2]: [ctr_customer_sk#16, ctr_total_return#18] +Arguments: hashpartitioning(ctr_customer_sk#16, 5), ENSURE_REQUIREMENTS, [id=#33] + +(47) Sort [codegen id : 16] +Input [2]: [ctr_customer_sk#16, ctr_total_return#18] +Arguments: [ctr_customer_sk#16 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.customer +Output [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 18] +Input [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39] -(38) Project [codegen id : 20] -Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36] -Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#34, ctr_state#35, ctr_total_return#36] +(50) Filter [codegen id : 18] +Input [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39] +Condition : (isnotnull(c_customer_sk#34) AND isnotnull(c_current_addr_sk#36)) -(39) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +(51) Scan parquet default.customer_address +Output [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct -(40) ColumnarToRow [codegen id : 13] -Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] +(52) ColumnarToRow [codegen id : 17] +Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] -(41) Filter [codegen id : 13] -Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Condition : (isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23)) +(53) Filter [codegen id : 17] +Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Condition : ((isnotnull(ca_state#10) AND (ca_state#10 = GA)) AND isnotnull(ca_address_sk#9)) -(42) ReusedExchange [Reuses operator id: 19] -Output [1]: [d_date_sk#25] +(54) BroadcastExchange +Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50] -(43) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cr_returned_date_sk#21] -Right keys [1]: [d_date_sk#25] +(55) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_current_addr_sk#36] +Right keys [1]: [ca_address_sk#9] Join condition: None -(44) Project [codegen id : 13] -Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25] +(56) Project [codegen id : 18] +Output [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Input [18]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39, ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] -(45) Exchange -Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#38] +(57) Exchange +Input [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Arguments: hashpartitioning(c_customer_sk#34, 5), ENSURE_REQUIREMENTS, [id=#51] -(46) Sort [codegen id : 14] -Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24] -Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0 +(58) Sort [codegen id : 19] +Input [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0 -(47) ReusedExchange [Reuses operator id: 27] -Output [2]: [ca_address_sk#7, ca_state#14] - -(48) Sort [codegen id : 16] -Input [2]: [ca_address_sk#7, ca_state#14] -Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0 - -(49) SortMergeJoin [codegen id : 17] -Left keys [1]: [cr_returning_addr_sk#23] -Right keys [1]: [ca_address_sk#7] +(59) SortMergeJoin [codegen id : 20] +Left keys [1]: [ctr_customer_sk#16] +Right keys [1]: [c_customer_sk#34] Join condition: None -(50) Project [codegen id : 17] -Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] -Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14] - -(51) HashAggregate [codegen id : 17] -Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14] -Keys [2]: [cr_returning_customer_sk#22, ca_state#14] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))] -Aggregate Attributes [1]: [sum#39] -Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] - -(52) Exchange -Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] -Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#41] - -(53) HashAggregate [codegen id : 18] -Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40] -Keys [2]: [cr_returning_customer_sk#22, ca_state#14] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#42] -Results [2]: [ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#42,17,2) AS ctr_total_return#36] - -(54) HashAggregate [codegen id : 18] -Input [2]: [ctr_state#35, ctr_total_return#36] -Keys [1]: [ctr_state#35] -Functions [1]: [partial_avg(ctr_total_return#36)] -Aggregate Attributes [2]: [sum#43, count#44] -Results [3]: [ctr_state#35, sum#45, count#46] - -(55) Exchange -Input [3]: [ctr_state#35, sum#45, count#46] -Arguments: hashpartitioning(ctr_state#35, 5), true, [id=#47] - -(56) HashAggregate [codegen id : 19] -Input [3]: [ctr_state#35, sum#45, count#46] -Keys [1]: [ctr_state#35] -Functions [1]: [avg(ctr_total_return#36)] -Aggregate Attributes [1]: [avg(ctr_total_return#36)#48] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#36)#48) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35 AS ctr_state#35#50] - -(57) Filter [codegen id : 19] -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] -Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49) - -(58) BroadcastExchange -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#51] - -(59) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ctr_state#35] -Right keys [1]: [ctr_state#35#50] -Join condition: (cast(ctr_total_return#36 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49) - (60) Project [codegen id : 20] -Output [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] -Input [19]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50] +Output [16]: [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18] +Input [18]: [ctr_customer_sk#16, ctr_total_return#18, c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] (61) TakeOrderedAndProject -Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] -Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, ca_street_number#8 ASC NULLS FIRST, ca_street_name#9 ASC NULLS FIRST, ca_street_type#10 ASC NULLS FIRST, ca_suite_number#11 ASC NULLS FIRST, ca_city#12 ASC NULLS FIRST, ca_county#13 ASC NULLS FIRST, ca_state#14 ASC NULLS FIRST, ca_zip#15 ASC NULLS FIRST, ca_country#16 ASC NULLS FIRST, ca_gmt_offset#17 ASC NULLS FIRST, ca_location_type#18 ASC NULLS FIRST, ctr_total_return#36 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36] +Input [16]: [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18] +Arguments: 100, [c_customer_id#35 ASC NULLS FIRST, c_salutation#37 ASC NULLS FIRST, c_first_name#38 ASC NULLS FIRST, c_last_name#39 ASC NULLS FIRST, ca_street_number#40 ASC NULLS FIRST, ca_street_name#41 ASC NULLS FIRST, ca_street_type#42 ASC NULLS FIRST, ca_suite_number#43 ASC NULLS FIRST, ca_city#44 ASC NULLS FIRST, ca_county#45 ASC NULLS FIRST, ca_state#10 ASC NULLS FIRST, ca_zip#46 ASC NULLS FIRST, ca_country#47 ASC NULLS FIRST, ca_gmt_offset#48 ASC NULLS FIRST, ca_location_type#49 ASC NULLS FIRST, ctr_total_return#18 ASC NULLS FIRST], [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt index c603ab5194286..99677b6e39736 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt @@ -1,48 +1,29 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] WholeStageCodegen (20) Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] - BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_state,ctr_total_return] - SortMergeJoin [c_customer_sk,ctr_customer_sk] - InputAdapter - WholeStageCodegen (3) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #1 - WholeStageCodegen (2) - Project [c_customer_sk,c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] - InputAdapter - WholeStageCodegen (11) - Sort [ctr_customer_sk] - InputAdapter - Exchange [ctr_customer_sk] #3 - WholeStageCodegen (10) + SortMergeJoin [ctr_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (16) + Sort [ctr_customer_sk] + InputAdapter + Exchange [ctr_customer_sk] #1 + WholeStageCodegen (15) + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum] InputAdapter - Exchange [cr_returning_customer_sk,ca_state] #4 - WholeStageCodegen (9) + Exchange [cr_returning_customer_sk,ca_state] #2 + WholeStageCodegen (6) HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] SortMergeJoin [cr_returning_addr_sk,ca_address_sk] InputAdapter - WholeStageCodegen (6) + WholeStageCodegen (3) Sort [cr_returning_addr_sk] InputAdapter - Exchange [cr_returning_addr_sk] #5 - WholeStageCodegen (5) + Exchange [cr_returning_addr_sk] #3 + WholeStageCodegen (2) Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] @@ -50,55 +31,74 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st InputAdapter Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (4) + BroadcastExchange #4 + WholeStageCodegen (1) Project [d_date_sk] Filter [d_year,d_date_sk] ColumnarToRow InputAdapter Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - WholeStageCodegen (8) + WholeStageCodegen (5) Sort [ca_address_sk] InputAdapter - Exchange [ca_address_sk] #7 - WholeStageCodegen (7) + Exchange [ca_address_sk] #5 + WholeStageCodegen (4) Filter [ca_address_sk,ca_state] ColumnarToRow InputAdapter Scan parquet default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count] + InputAdapter + Exchange [ctr_state] #7 + WholeStageCodegen (13) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #8 + WholeStageCodegen (12) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + SortMergeJoin [cr_returning_addr_sk,ca_address_sk] + InputAdapter + WholeStageCodegen (9) + Sort [cr_returning_addr_sk] + InputAdapter + Exchange [cr_returning_addr_sk] #9 + WholeStageCodegen (8) + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + WholeStageCodegen (11) + Sort [ca_address_sk] + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #5 InputAdapter - BroadcastExchange #8 - WholeStageCodegen (19) - Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count] - InputAdapter - Exchange [ctr_state] #9 - WholeStageCodegen (18) - HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] - HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + WholeStageCodegen (19) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (18) + Project [c_customer_sk,c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow InputAdapter - Exchange [cr_returning_customer_sk,ca_state] #10 - WholeStageCodegen (17) - HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] - Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] - SortMergeJoin [cr_returning_addr_sk,ca_address_sk] - InputAdapter - WholeStageCodegen (14) - Sort [cr_returning_addr_sk] - InputAdapter - Exchange [cr_returning_addr_sk] #11 - WholeStageCodegen (13) - Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Filter [cr_returned_date_sk,cr_returning_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] - InputAdapter - ReusedExchange [d_date_sk] #6 - InputAdapter - WholeStageCodegen (16) - Sort [ca_address_sk] - InputAdapter - ReusedExchange [ca_address_sk,ca_state] #7 + Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (17) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt index 4e85516b594f7..6bcbe470cec50 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt @@ -8,206 +8,206 @@ +- * BroadcastHashJoin Inner BuildRight (41) :- * Project (36) : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (23) - : : +- * BroadcastHashJoin Inner BuildRight (22) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.customer_demographics (4) - : : : +- BroadcastExchange (14) - : : : +- * Project (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.household_demographics (10) - : : +- BroadcastExchange (21) - : : +- * Project (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.customer_address (17) + : :- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildLeft (15) + : : : : :- BroadcastExchange (11) + : : : : : +- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_returns (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- * Filter (14) + : : : : +- * ColumnarToRow (13) + : : : : +- Scan parquet default.customer (12) + : : : +- BroadcastExchange (21) + : : : +- * Project (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.household_demographics (17) + : : +- BroadcastExchange (28) + : : +- * Project (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet default.customer_address (24) : +- BroadcastExchange (34) - : +- * Project (33) - : +- * BroadcastHashJoin Inner BuildRight (32) - : :- * Filter (26) - : : +- * ColumnarToRow (25) - : : +- Scan parquet default.catalog_returns (24) - : +- BroadcastExchange (31) - : +- * Project (30) - : +- * Filter (29) - : +- * ColumnarToRow (28) - : +- Scan parquet default.date_dim (27) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet default.customer_demographics (31) +- BroadcastExchange (40) +- * Filter (39) +- * ColumnarToRow (38) +- Scan parquet default.call_center (37) -(1) Scan parquet default.customer -Output [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] +(1) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct -(2) ColumnarToRow [codegen id : 7] -Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] +(2) ColumnarToRow [codegen id : 2] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4] -(3) Filter [codegen id : 7] -Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4] -Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#4)) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) +(3) Filter [codegen id : 2] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4] +Condition : ((isnotnull(cr_call_center_sk#3) AND isnotnull(cr_returned_date_sk#1)) AND isnotnull(cr_returning_customer_sk#2)) -(4) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct (5) ColumnarToRow [codegen id : 1] -Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] (6) Filter [codegen id : 1] -Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] -Condition : ((((cd_marital_status#6 = M) AND (cd_education_status#7 = Unknown)) OR ((cd_marital_status#6 = W) AND (cd_education_status#7 = Advanced Degree))) AND isnotnull(cd_demo_sk#5)) +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 11)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] -(7) BroadcastExchange -Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] -(8) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#5] +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] Join condition: None -(9) Project [codegen id : 7] -Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7] -Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] +(10) Project [codegen id : 2] +Output [3]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4, d_date_sk#5] -(10) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#9, hd_buy_potential#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] -ReadSchema: struct +(11) BroadcastExchange +Input [3]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] -(11) ColumnarToRow [codegen id : 2] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] - -(12) Filter [codegen id : 2] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -Condition : ((isnotnull(hd_buy_potential#10) AND StartsWith(hd_buy_potential#10, Unknown)) AND isnotnull(hd_demo_sk#9)) +(12) Scan parquet default.customer +Output [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct -(13) Project [codegen id : 2] -Output [1]: [hd_demo_sk#9] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +(13) ColumnarToRow +Input [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13] -(14) BroadcastExchange -Input [1]: [hd_demo_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +(14) Filter +Input [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13] +Condition : (((isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#13)) AND isnotnull(c_current_cdemo_sk#11)) AND isnotnull(c_current_hdemo_sk#12)) (15) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#9] +Left keys [1]: [cr_returning_customer_sk#2] +Right keys [1]: [c_customer_sk#10] Join condition: None (16) Project [codegen id : 7] -Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7] -Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#9] +Output [5]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13] +Input [7]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4, c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13] -(17) Scan parquet default.customer_address -Output [2]: [ca_address_sk#12, ca_gmt_offset#13] +(17) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#14, hd_buy_potential#15] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] +Input [2]: [hd_demo_sk#14, hd_buy_potential#15] (19) Filter [codegen id : 3] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] -Condition : ((isnotnull(ca_gmt_offset#13) AND (ca_gmt_offset#13 = -7.00)) AND isnotnull(ca_address_sk#12)) +Input [2]: [hd_demo_sk#14, hd_buy_potential#15] +Condition : ((isnotnull(hd_buy_potential#15) AND StartsWith(hd_buy_potential#15, Unknown)) AND isnotnull(hd_demo_sk#14)) (20) Project [codegen id : 3] -Output [1]: [ca_address_sk#12] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] +Output [1]: [hd_demo_sk#14] +Input [2]: [hd_demo_sk#14, hd_buy_potential#15] (21) BroadcastExchange -Input [1]: [ca_address_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#4] -Right keys [1]: [ca_address_sk#12] +Left keys [1]: [c_current_hdemo_sk#12] +Right keys [1]: [hd_demo_sk#14] Join condition: None (23) Project [codegen id : 7] -Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7] -Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#12] +Output [4]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_addr_sk#13] +Input [6]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13, hd_demo_sk#14] -(24) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#17, ca_gmt_offset#18] Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct -(25) ColumnarToRow [codegen id : 5] -Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] -(26) Filter [codegen id : 5] -Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] -Condition : ((isnotnull(cr_call_center_sk#17) AND isnotnull(cr_returned_date_sk#15)) AND isnotnull(cr_returning_customer_sk#16)) +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] +Condition : ((isnotnull(ca_gmt_offset#18) AND (ca_gmt_offset#18 = -7.00)) AND isnotnull(ca_address_sk#17)) -(27) Scan parquet default.date_dim -Output [3]: [d_date_sk#19, d_year#20, d_moy#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct +(27) Project [codegen id : 4] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] -(28) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +(28) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] -(29) Filter [codegen id : 4] -Input [3]: [d_date_sk#19, d_year#20, d_moy#21] -Condition : ((((isnotnull(d_year#20) AND isnotnull(d_moy#21)) AND (d_year#20 = 1998)) AND (d_moy#21 = 11)) AND isnotnull(d_date_sk#19)) +(29) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#13] +Right keys [1]: [ca_address_sk#17] +Join condition: None -(30) Project [codegen id : 4] -Output [1]: [d_date_sk#19] -Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +(30) Project [codegen id : 7] +Output [3]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11] +Input [5]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_addr_sk#13, ca_address_sk#17] -(31) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +(31) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] +ReadSchema: struct -(32) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cr_returned_date_sk#15] -Right keys [1]: [d_date_sk#19] -Join condition: None +(32) ColumnarToRow [codegen id : 5] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -(33) Project [codegen id : 5] -Output [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] -Input [5]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18, d_date_sk#19] +(33) Filter [codegen id : 5] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Condition : ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Unknown)) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = Advanced Degree))) AND isnotnull(cd_demo_sk#20)) (34) BroadcastExchange -Input [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] (35) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cr_returning_customer_sk#16] +Left keys [1]: [c_current_cdemo_sk#11] +Right keys [1]: [cd_demo_sk#20] Join condition: None (36) Project [codegen id : 7] -Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18] -Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18] +Output [4]: [cr_call_center_sk#3, cr_net_loss#4, cd_marital_status#21, cd_education_status#22] +Input [6]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] (37) Scan parquet default.call_center Output [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] @@ -228,35 +228,35 @@ Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#2 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] (41) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cr_call_center_sk#17] +Left keys [1]: [cr_call_center_sk#3] Right keys [1]: [cc_call_center_sk#24] Join condition: None (42) Project [codegen id : 7] -Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7] -Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] +Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#4, cd_marital_status#21, cd_education_status#22] +Input [8]: [cr_call_center_sk#3, cr_net_loss#4, cd_marital_status#21, cd_education_status#22, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27] (43) HashAggregate [codegen id : 7] -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7] -Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7] -Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#18))] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#4, cd_marital_status#21, cd_education_status#22] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#4))] Aggregate Attributes [1]: [sum#29] -Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] +Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30] (44) Exchange -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] -Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, 5), true, [id=#31] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30] +Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, 5), ENSURE_REQUIREMENTS, [id=#31] (45) HashAggregate [codegen id : 8] -Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30] -Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7] -Functions [1]: [sum(UnscaledValue(cr_net_loss#18))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#18))#32] -Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#18))#32,17,2) AS Returns_Loss#36] +Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30] +Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22] +Functions [1]: [sum(UnscaledValue(cr_net_loss#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#4))#32] +Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#4))#32,17,2) AS Returns_Loss#36] (46) Exchange Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] -Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37] +Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#37] (47) Sort [codegen id : 9] Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt index 87beb3b565cc1..6c8d629feed3e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt @@ -10,58 +10,58 @@ WholeStageCodegen (9) HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] BroadcastHashJoin [cr_call_center_sk,cc_call_center_sk] - Project [cd_marital_status,cd_education_status,cr_call_center_sk,cr_net_loss] - BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] - Project [c_customer_sk,cd_marital_status,cd_education_status] + Project [cr_call_center_sk,cr_net_loss,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_customer_sk,c_current_addr_sk,cd_marital_status,cd_education_status] + Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk,c_current_addr_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_customer_sk,c_current_hdemo_sk,c_current_addr_sk,cd_marital_status,cd_education_status] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] ColumnarToRow InputAdapter Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Filter [cd_marital_status,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter - BroadcastExchange #4 - WholeStageCodegen (2) + BroadcastExchange #5 + WholeStageCodegen (3) Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) + BroadcastExchange #6 + WholeStageCodegen (4) Project [ca_address_sk] Filter [ca_gmt_offset,ca_address_sk] ColumnarToRow InputAdapter Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter - BroadcastExchange #6 + BroadcastExchange #7 WholeStageCodegen (5) - Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow InputAdapter - BroadcastExchange #7 - WholeStageCodegen (4) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #8 WholeStageCodegen (6) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt index 2d76deefcaa36..f6c5258701525 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt @@ -34,24 +34,24 @@ TakeOrderedAndProject (160) : +- * Sort (46) : +- Exchange (45) : +- * Project (44) - : +- * SortMergeJoin Inner (43) - : :- * Sort (37) - : : +- Exchange (36) - : : +- * Project (35) - : : +- * BroadcastHashJoin Inner BuildRight (34) - : : :- * Project (29) - : : : +- * Filter (28) - : : : +- * ColumnarToRow (27) - : : : +- Scan parquet default.customer (26) - : : +- BroadcastExchange (33) - : : +- * Filter (32) - : : +- * ColumnarToRow (31) - : : +- Scan parquet default.customer_address (30) - : +- * Sort (42) - : +- Exchange (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.customer_demographics (38) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (38) + : : +- * SortMergeJoin Inner (37) + : : :- * Sort (31) + : : : +- Exchange (30) + : : : +- * Project (29) + : : : +- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.customer (26) + : : +- * Sort (36) + : : +- Exchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.customer_demographics (32) + : +- BroadcastExchange (42) + : +- * Filter (41) + : +- * ColumnarToRow (40) + : +- Scan parquet default.customer_address (39) :- * HashAggregate (76) : +- Exchange (75) : +- * HashAggregate (74) @@ -266,7 +266,7 @@ Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6 (24) Exchange Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] -Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21] +Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#21] (25) Sort [codegen id : 5] Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] @@ -279,89 +279,89 @@ Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct -(27) ColumnarToRow [codegen id : 7] +(27) ColumnarToRow [codegen id : 6] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] -(28) Filter [codegen id : 7] +(28) Filter [codegen id : 6] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24)) -(29) Project [codegen id : 7] +(29) Project [codegen id : 6] Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] -(30) Scan parquet default.customer_address -Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 6] -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] - -(32) Filter [codegen id : 6] -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) - -(33) BroadcastExchange -Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] - -(34) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#24] -Right keys [1]: [ca_address_sk#27] -Join condition: None - -(35) Project [codegen id : 7] -Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30] - -(36) Exchange -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32] +(30) Exchange +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27] -(37) Sort [codegen id : 8] -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +(31) Sort [codegen id : 7] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26] Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 -(38) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#33] +(32) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 9] -Input [1]: [cd_demo_sk#33] +(33) ColumnarToRow [codegen id : 8] +Input [1]: [cd_demo_sk#28] -(40) Filter [codegen id : 9] -Input [1]: [cd_demo_sk#33] -Condition : isnotnull(cd_demo_sk#33) +(34) Filter [codegen id : 8] +Input [1]: [cd_demo_sk#28] +Condition : isnotnull(cd_demo_sk#28) -(41) Exchange -Input [1]: [cd_demo_sk#33] -Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34] +(35) Exchange +Input [1]: [cd_demo_sk#28] +Arguments: hashpartitioning(cd_demo_sk#28, 5), ENSURE_REQUIREMENTS, [id=#29] -(42) Sort [codegen id : 10] -Input [1]: [cd_demo_sk#33] -Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0 +(36) Sort [codegen id : 9] +Input [1]: [cd_demo_sk#28] +Arguments: [cd_demo_sk#28 ASC NULLS FIRST], false, 0 -(43) SortMergeJoin [codegen id : 11] +(37) SortMergeJoin [codegen id : 11] Left keys [1]: [c_current_cdemo_sk#23] -Right keys [1]: [cd_demo_sk#33] +Right keys [1]: [cd_demo_sk#28] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, cd_demo_sk#28] + +(39) Scan parquet default.customer_address +Output [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 10] +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] + +(41) Filter [codegen id : 10] +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30)) + +(42) BroadcastExchange +Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#24] +Right keys [1]: [ca_address_sk#30] Join condition: None (44) Project [codegen id : 11] -Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33] +Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] +Input [7]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33] (45) Exchange -Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] +Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35] (46) Sort [codegen id : 12] -Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 (47) SortMergeJoin [codegen id : 13] @@ -370,26 +370,26 @@ Right keys [1]: [c_customer_sk#22] Join condition: None (48) Project [codegen id : 13] -Output [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] -Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30] +Output [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33] (49) HashAggregate [codegen id : 13] -Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] -Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Input [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [4]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31] Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] Aggregate Attributes [14]: [sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] -Results [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] +Results [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] (50) Exchange -Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] -Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 5), true, [id=#71] +Input [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] +Arguments: hashpartitioning(i_item_id#19, ca_country#33, ca_state#32, ca_county#31, 5), ENSURE_REQUIREMENTS, [id=#71] (51) HashAggregate [codegen id : 14] -Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] -Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28] +Input [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70] +Keys [4]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31] Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] Aggregate Attributes [7]: [avg(agg1#36)#72, avg(agg2#37)#73, avg(agg3#38)#74, avg(agg4#39)#75, avg(agg5#40)#76, avg(agg6#41)#77, avg(agg7#42)#78] -Results [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, avg(agg1#36)#72 AS agg1#79, avg(agg2#37)#73 AS agg2#80, avg(agg3#38)#74 AS agg3#81, avg(agg4#39)#75 AS agg4#82, avg(agg5#40)#76 AS agg5#83, avg(agg6#41)#77 AS agg6#84, avg(agg7#42)#78 AS agg7#85] +Results [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, avg(agg1#36)#72 AS agg1#79, avg(agg2#37)#73 AS agg2#80, avg(agg3#38)#74 AS agg3#81, avg(agg4#39)#75 AS agg4#82, avg(agg5#40)#76 AS agg5#83, avg(agg6#41)#77 AS agg6#84, avg(agg7#42)#78 AS agg7#85] (52) ReusedExchange [Reuses operator id: 24] Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] @@ -417,41 +417,41 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] (58) Scan parquet default.customer_address -Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Output [3]: [ca_address_sk#30, ca_state#32, ca_country#33] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] ReadSchema: struct (59) ColumnarToRow [codegen id : 20] -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] (60) Filter [codegen id : 20] -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] -Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] +Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30)) (61) BroadcastExchange -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#86] (62) BroadcastHashJoin [codegen id : 21] Left keys [1]: [c_current_addr_sk#24] -Right keys [1]: [ca_address_sk#27] +Right keys [1]: [ca_address_sk#30] Join condition: None (63) Project [codegen id : 21] -Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] -Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_state#29, ca_country#30] +Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33] +Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_state#32, ca_country#33] (64) Exchange -Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#87] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#87] (65) Sort [codegen id : 22] -Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33] Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 -(66) ReusedExchange [Reuses operator id: 41] +(66) ReusedExchange [Reuses operator id: 35] Output [1]: [cd_demo_sk#88] (67) Sort [codegen id : 24] @@ -464,15 +464,15 @@ Right keys [1]: [cd_demo_sk#88] Join condition: None (69) Project [codegen id : 25] -Output [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30, cd_demo_sk#88] +Output [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33, cd_demo_sk#88] (70) Exchange -Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#89] +Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33] +Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#89] (71) Sort [codegen id : 26] -Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] +Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33] Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 (72) SortMergeJoin [codegen id : 27] @@ -481,26 +481,26 @@ Right keys [1]: [c_customer_sk#22] Join condition: None (73) Project [codegen id : 27] -Output [10]: [i_item_id#19, ca_country#30, ca_state#29, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] -Input [12]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30] +Output [10]: [i_item_id#19, ca_country#33, ca_state#32, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [12]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33] (74) HashAggregate [codegen id : 27] -Input [10]: [i_item_id#19, ca_country#30, ca_state#29, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] -Keys [3]: [i_item_id#19, ca_country#30, ca_state#29] +Input [10]: [i_item_id#19, ca_country#33, ca_state#32, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [3]: [i_item_id#19, ca_country#33, ca_state#32] Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] Aggregate Attributes [14]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97, sum#98, count#99, sum#100, count#101, sum#102, count#103] -Results [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] +Results [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] (75) Exchange -Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] -Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, 5), true, [id=#118] +Input [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] +Arguments: hashpartitioning(i_item_id#19, ca_country#33, ca_state#32, 5), ENSURE_REQUIREMENTS, [id=#118] (76) HashAggregate [codegen id : 28] -Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] -Keys [3]: [i_item_id#19, ca_country#30, ca_state#29] +Input [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117] +Keys [3]: [i_item_id#19, ca_country#33, ca_state#32] Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] Aggregate Attributes [7]: [avg(agg1#36)#119, avg(agg2#37)#120, avg(agg3#38)#121, avg(agg4#39)#122, avg(agg5#40)#123, avg(agg6#41)#124, avg(agg7#42)#125] -Results [11]: [i_item_id#19, ca_country#30, ca_state#29, null AS county#126, avg(agg1#36)#119 AS agg1#127, avg(agg2#37)#120 AS agg2#128, avg(agg3#38)#121 AS agg3#129, avg(agg4#39)#122 AS agg4#130, avg(agg5#40)#123 AS agg5#131, avg(agg6#41)#124 AS agg6#132, avg(agg7#42)#125 AS agg7#133] +Results [11]: [i_item_id#19, ca_country#33, ca_state#32, null AS county#126, avg(agg1#36)#119 AS agg1#127, avg(agg2#37)#120 AS agg2#128, avg(agg3#38)#121 AS agg3#129, avg(agg4#39)#122 AS agg4#130, avg(agg5#40)#123 AS agg5#131, avg(agg6#41)#124 AS agg6#132, avg(agg7#42)#125 AS agg7#133] (77) ReusedExchange [Reuses operator id: 24] Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19] @@ -528,45 +528,45 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] (83) Scan parquet default.customer_address -Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Output [3]: [ca_address_sk#30, ca_state#32, ca_country#33] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] ReadSchema: struct (84) ColumnarToRow [codegen id : 34] -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] (85) Filter [codegen id : 34] -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] -Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] +Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30)) (86) Project [codegen id : 34] -Output [2]: [ca_address_sk#27, ca_country#30] -Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30] +Output [2]: [ca_address_sk#30, ca_country#33] +Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33] (87) BroadcastExchange -Input [2]: [ca_address_sk#27, ca_country#30] +Input [2]: [ca_address_sk#30, ca_country#33] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#134] (88) BroadcastHashJoin [codegen id : 35] Left keys [1]: [c_current_addr_sk#24] -Right keys [1]: [ca_address_sk#27] +Right keys [1]: [ca_address_sk#30] Join condition: None (89) Project [codegen id : 35] -Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] -Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_country#30] +Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33] +Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_country#33] (90) Exchange -Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] -Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#135] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33] +Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#135] (91) Sort [codegen id : 36] -Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30] +Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33] Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0 -(92) ReusedExchange [Reuses operator id: 41] +(92) ReusedExchange [Reuses operator id: 35] Output [1]: [cd_demo_sk#136] (93) Sort [codegen id : 38] @@ -579,15 +579,15 @@ Right keys [1]: [cd_demo_sk#136] Join condition: None (95) Project [codegen id : 39] -Output [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] -Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30, cd_demo_sk#136] +Output [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33, cd_demo_sk#136] (96) Exchange -Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#137] +Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33] +Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#137] (97) Sort [codegen id : 40] -Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30] +Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33] Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 (98) SortMergeJoin [codegen id : 41] @@ -596,26 +596,26 @@ Right keys [1]: [c_customer_sk#22] Join condition: None (99) Project [codegen id : 41] -Output [9]: [i_item_id#19, ca_country#30, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] -Input [11]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_country#30] +Output [9]: [i_item_id#19, ca_country#33, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42] +Input [11]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_country#33] (100) HashAggregate [codegen id : 41] -Input [9]: [i_item_id#19, ca_country#30, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] -Keys [2]: [i_item_id#19, ca_country#30] +Input [9]: [i_item_id#19, ca_country#33, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42] +Keys [2]: [i_item_id#19, ca_country#33] Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)] Aggregate Attributes [14]: [sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147, sum#148, count#149, sum#150, count#151] -Results [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] +Results [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] (101) Exchange -Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] -Arguments: hashpartitioning(i_item_id#19, ca_country#30, 5), true, [id=#166] +Input [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] +Arguments: hashpartitioning(i_item_id#19, ca_country#33, 5), ENSURE_REQUIREMENTS, [id=#166] (102) HashAggregate [codegen id : 42] -Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] -Keys [2]: [i_item_id#19, ca_country#30] +Input [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165] +Keys [2]: [i_item_id#19, ca_country#33] Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)] Aggregate Attributes [7]: [avg(agg1#36)#167, avg(agg2#37)#168, avg(agg3#38)#169, avg(agg4#39)#170, avg(agg5#40)#171, avg(agg6#41)#172, avg(agg7#42)#173] -Results [11]: [i_item_id#19, ca_country#30, null AS ca_state#174, null AS county#175, avg(agg1#36)#167 AS agg1#176, avg(agg2#37)#168 AS agg2#177, avg(agg3#38)#169 AS agg3#178, avg(agg4#39)#170 AS agg4#179, avg(agg5#40)#171 AS agg5#180, avg(agg6#41)#172 AS agg6#181, avg(agg7#42)#173 AS agg7#182] +Results [11]: [i_item_id#19, ca_country#33, null AS ca_state#174, null AS county#175, avg(agg1#36)#167 AS agg1#176, avg(agg2#37)#168 AS agg2#177, avg(agg3#38)#169 AS agg3#178, avg(agg4#39)#170 AS agg4#179, avg(agg5#40)#171 AS agg5#180, avg(agg6#41)#172 AS agg6#181, avg(agg7#42)#173 AS agg7#182] (103) Scan parquet default.catalog_sales Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] @@ -674,35 +674,35 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26] (116) Scan parquet default.customer_address -Output [2]: [ca_address_sk#27, ca_state#29] +Output [2]: [ca_address_sk#30, ca_state#32] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)] ReadSchema: struct (117) ColumnarToRow [codegen id : 45] -Input [2]: [ca_address_sk#27, ca_state#29] +Input [2]: [ca_address_sk#30, ca_state#32] (118) Filter [codegen id : 45] -Input [2]: [ca_address_sk#27, ca_state#29] -Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27)) +Input [2]: [ca_address_sk#30, ca_state#32] +Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30)) (119) Project [codegen id : 45] -Output [1]: [ca_address_sk#27] -Input [2]: [ca_address_sk#27, ca_state#29] +Output [1]: [ca_address_sk#30] +Input [2]: [ca_address_sk#30, ca_state#32] (120) BroadcastExchange -Input [1]: [ca_address_sk#27] +Input [1]: [ca_address_sk#30] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#183] (121) BroadcastHashJoin [codegen id : 46] Left keys [1]: [c_current_addr_sk#24] -Right keys [1]: [ca_address_sk#27] +Right keys [1]: [ca_address_sk#30] Join condition: None (122) Project [codegen id : 46] Output [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26] -Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27] +Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30] (123) BroadcastExchange Input [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26] @@ -765,7 +765,7 @@ Results [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, co (136) Exchange Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214] -Arguments: hashpartitioning(i_item_id#19, 5), true, [id=#215] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#215] (137) HashAggregate [codegen id : 50] Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214] @@ -860,7 +860,7 @@ Results [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#2 (157) Exchange Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261] -Arguments: SinglePartition, true, [id=#262] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#262] (158) HashAggregate [codegen id : 58] Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261] @@ -872,6 +872,6 @@ Results [11]: [null AS i_item_id#270, null AS ca_country#271, null AS ca_state#2 (159) Union (160) TakeOrderedAndProject -Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] -Arguments: 100, [ca_country#30 ASC NULLS FIRST, ca_state#29 ASC NULLS FIRST, ca_county#28 ASC NULLS FIRST, i_item_id#19 ASC NULLS FIRST], [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] +Input [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] +Arguments: 100, [ca_country#33 ASC NULLS FIRST, ca_state#32 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#19 ASC NULLS FIRST], [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt index 5514e335f1b51..4566929712713 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt @@ -54,37 +54,37 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag Exchange [c_customer_sk] #6 WholeStageCodegen (11) Project [c_customer_sk,c_birth_year,ca_county,ca_state,ca_country] - SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] - InputAdapter - WholeStageCodegen (8) - Sort [c_current_cdemo_sk] - InputAdapter - Exchange [c_current_cdemo_sk] #7 - WholeStageCodegen (7) - Project [c_customer_sk,c_current_cdemo_sk,c_birth_year,ca_county,ca_state,ca_country] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,c_birth_year] + SortMergeJoin [c_current_cdemo_sk,cd_demo_sk] + InputAdapter + WholeStageCodegen (7) + Sort [c_current_cdemo_sk] + InputAdapter + Exchange [c_current_cdemo_sk] #7 + WholeStageCodegen (6) Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] ColumnarToRow InputAdapter Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (6) - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + WholeStageCodegen (9) + Sort [cd_demo_sk] + InputAdapter + Exchange [cd_demo_sk] #8 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] InputAdapter - WholeStageCodegen (10) - Sort [cd_demo_sk] - InputAdapter - Exchange [cd_demo_sk] #9 - WholeStageCodegen (9) - Filter [cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk] + BroadcastExchange #9 + WholeStageCodegen (10) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] WholeStageCodegen (28) HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] InputAdapter @@ -130,7 +130,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag WholeStageCodegen (24) Sort [cd_demo_sk] InputAdapter - ReusedExchange [cd_demo_sk] #9 + ReusedExchange [cd_demo_sk] #8 WholeStageCodegen (42) HashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] InputAdapter @@ -177,7 +177,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag WholeStageCodegen (38) Sort [cd_demo_sk] InputAdapter - ReusedExchange [cd_demo_sk] #9 + ReusedExchange [cd_demo_sk] #8 WholeStageCodegen (50) HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] InputAdapter diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt index a7f328537b7ac..04ff822b1ce52 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt @@ -11,60 +11,60 @@ TakeOrderedAndProject (79) : +- * BroadcastHashJoin LeftOuter BuildRight (65) : :- * Project (60) : : +- * SortMergeJoin Inner (59) - : : :- * Sort (47) - : : : +- Exchange (46) - : : : +- * Project (45) - : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : :- * Project (32) - : : : : +- * SortMergeJoin Inner (31) - : : : : :- * Sort (25) - : : : : : +- Exchange (24) - : : : : : +- * Project (23) - : : : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : : : :- * Project (17) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : : : :- * Project (10) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : : : :- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : +- BroadcastExchange (8) - : : : : : : : +- * Project (7) - : : : : : : : +- * Filter (6) - : : : : : : : +- * ColumnarToRow (5) - : : : : : : : +- Scan parquet default.household_demographics (4) - : : : : : : +- BroadcastExchange (15) - : : : : : : +- * Project (14) - : : : : : : +- * Filter (13) - : : : : : : +- * ColumnarToRow (12) - : : : : : : +- Scan parquet default.customer_demographics (11) - : : : : : +- BroadcastExchange (21) - : : : : : +- * Filter (20) - : : : : : +- * ColumnarToRow (19) - : : : : : +- Scan parquet default.date_dim (18) - : : : : +- * Sort (30) - : : : : +- Exchange (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.item (26) - : : : +- BroadcastExchange (43) - : : : +- * Project (42) - : : : +- * BroadcastHashJoin Inner BuildLeft (41) - : : : :- BroadcastExchange (37) - : : : : +- * Project (36) - : : : : +- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.date_dim (33) - : : : +- * Filter (40) - : : : +- * ColumnarToRow (39) - : : : +- Scan parquet default.date_dim (38) + : : :- * Sort (34) + : : : +- Exchange (33) + : : : +- * Project (32) + : : : +- * SortMergeJoin Inner (31) + : : : :- * Sort (25) + : : : : +- Exchange (24) + : : : : +- * Project (23) + : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.household_demographics (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.customer_demographics (11) + : : : : +- BroadcastExchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.date_dim (18) + : : : +- * Sort (30) + : : : +- Exchange (29) + : : : +- * Filter (28) + : : : +- * ColumnarToRow (27) + : : : +- Scan parquet default.item (26) : : +- * Sort (58) : : +- Exchange (57) : : +- * Project (56) : : +- * BroadcastHashJoin Inner BuildRight (55) - : : :- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.inventory (48) + : : :- * Project (50) + : : : +- * BroadcastHashJoin Inner BuildLeft (49) + : : : :- BroadcastExchange (45) + : : : : +- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildLeft (43) + : : : : :- BroadcastExchange (39) + : : : : : +- * Project (38) + : : : : : +- * Filter (37) + : : : : : +- * ColumnarToRow (36) + : : : : : +- Scan parquet default.date_dim (35) + : : : : +- * Filter (42) + : : : : +- * ColumnarToRow (41) + : : : : +- Scan parquet default.date_dim (40) + : : : +- * Filter (48) + : : : +- * ColumnarToRow (47) + : : : +- Scan parquet default.inventory (46) : : +- BroadcastExchange (54) : : +- * Filter (53) : : +- * ColumnarToRow (52) @@ -185,7 +185,7 @@ Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, c (24) Exchange Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] -Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18] +Arguments: hashpartitioning(cs_item_sk#5, 5), ENSURE_REQUIREMENTS, [id=#18] (25) Sort [codegen id : 5] Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16] @@ -207,101 +207,101 @@ Condition : isnotnull(i_item_sk#19) (29) Exchange Input [2]: [i_item_sk#19, i_item_desc#20] -Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21] +Arguments: hashpartitioning(i_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21] (30) Sort [codegen id : 7] Input [2]: [i_item_sk#19, i_item_desc#20] Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0 -(31) SortMergeJoin [codegen id : 10] +(31) SortMergeJoin [codegen id : 8] Left keys [1]: [cs_item_sk#5] Right keys [1]: [i_item_sk#19] Join condition: None -(32) Project [codegen id : 10] +(32) Project [codegen id : 8] Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20] -(33) Scan parquet default.date_dim -Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(33) Exchange +Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Arguments: hashpartitioning(cs_item_sk#5, cs_sold_date_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] + +(34) Sort [codegen id : 9] +Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20] +Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_sold_date_sk#1 ASC NULLS FIRST], false, 0 + +(35) Scan parquet default.date_dim +Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct -(34) ColumnarToRow [codegen id : 8] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(36) ColumnarToRow [codegen id : 10] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] -(35) Filter [codegen id : 8] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] -Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) +(37) Filter [codegen id : 10] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] +Condition : ((((isnotnull(d_year#26) AND (d_year#26 = 2001)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24)) -(36) Project [codegen id : 8] -Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25] +(38) Project [codegen id : 10] +Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25] +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26] -(37) BroadcastExchange -Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26] +(39) BroadcastExchange +Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#27] -(38) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_week_seq#28] +(40) Scan parquet default.date_dim +Output [2]: [d_date_sk#28, d_week_seq#29] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct -(39) ColumnarToRow -Input [2]: [d_date_sk#27, d_week_seq#28] +(41) ColumnarToRow +Input [2]: [d_date_sk#28, d_week_seq#29] -(40) Filter -Input [2]: [d_date_sk#27, d_week_seq#28] -Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27)) +(42) Filter +Input [2]: [d_date_sk#28, d_week_seq#29] +Condition : (isnotnull(d_week_seq#29) AND isnotnull(d_date_sk#28)) -(41) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_week_seq#24] -Right keys [1]: [d_week_seq#28] +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#25] +Right keys [1]: [d_week_seq#29] Join condition: None -(42) Project [codegen id : 9] -Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] -Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28] - -(43) BroadcastExchange -Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] - -(44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#22] -Join condition: (d_date#16 > d_date#23 + 5 days) - -(45) Project [codegen id : 10] -Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27] +(44) Project [codegen id : 11] +Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28] +Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, d_week_seq#29] -(46) Exchange -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30] +(45) BroadcastExchange +Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#30] -(47) Sort [codegen id : 11] -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27] -Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0 - -(48) Scan parquet default.inventory +(46) Scan parquet default.inventory Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Batched: true Location [not included in comparison]/{warehouse_dir}/inventory] PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] ReadSchema: struct -(49) ColumnarToRow [codegen id : 13] +(47) ColumnarToRow Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] -(50) Filter [codegen id : 13] +(48) Filter Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31)) +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [d_date_sk#28] +Right keys [1]: [inv_date_sk#31] +Join condition: None + +(50) Project [codegen id : 13] +Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] +Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34] + (51) Scan parquet default.warehouse Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] Batched: true @@ -326,25 +326,25 @@ Right keys [1]: [w_warehouse_sk#35] Join condition: None (56) Project [codegen id : 13] -Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] +Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36] (57) Exchange -Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38] +Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: hashpartitioning(inv_item_sk#32, d_date_sk#23, 5), ENSURE_REQUIREMENTS, [id=#38] (58) Sort [codegen id : 14] -Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] -Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 +Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Arguments: [inv_item_sk#32 ASC NULLS FIRST, d_date_sk#23 ASC NULLS FIRST], false, 0 (59) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#5, d_date_sk#27] -Right keys [2]: [inv_item_sk#32, inv_date_sk#31] -Join condition: (inv_quantity_on_hand#34 < cs_quantity#8) +Left keys [2]: [cs_item_sk#5, cs_sold_date_sk#1] +Right keys [2]: [inv_item_sk#32, d_date_sk#23] +Join condition: ((inv_quantity_on_hand#34 < cs_quantity#8) AND (d_date#16 > d_date#24 + 5 days)) (60) Project [codegen id : 16] -Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36] (61) Scan parquet default.promotion Output [1]: [p_promo_sk#39] @@ -370,15 +370,15 @@ Right keys [1]: [p_promo_sk#39] Join condition: None (66) Project [codegen id : 16] -Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, p_promo_sk#39] (67) Exchange -Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), ENSURE_REQUIREMENTS, [id=#41] (68) Sort [codegen id : 17] -Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24] +Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25] Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0 (69) Scan parquet default.catalog_returns @@ -397,7 +397,7 @@ Condition : (isnotnull(cr_item_sk#42) AND isnotnull(cr_order_number#43)) (72) Exchange Input [2]: [cr_item_sk#42, cr_order_number#43] -Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44] +Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), ENSURE_REQUIREMENTS, [id=#44] (73) Sort [codegen id : 19] Input [2]: [cr_item_sk#42, cr_order_number#43] @@ -409,28 +409,28 @@ Right keys [2]: [cr_item_sk#42, cr_order_number#43] Join condition: None (75) Project [codegen id : 20] -Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43] +Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, cr_item_sk#42, cr_order_number#43] (76) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24] -Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] +Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#45] -Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] +Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] (77) Exchange -Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] -Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] +Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#47] (78) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46] -Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24] +Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46] +Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#48] -Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] +Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51] (79) TakeOrderedAndProject -Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] -Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51] +Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51] +Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt index 918508787c4b0..b88505ad7b9bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt @@ -16,95 +16,95 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] BroadcastHashJoin [cs_promo_sk,p_promo_sk] Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] - SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity] + SortMergeJoin [cs_item_sk,cs_sold_date_sk,inv_item_sk,d_date_sk,inv_quantity_on_hand,cs_quantity,d_date,d_date] InputAdapter - WholeStageCodegen (11) - Sort [cs_item_sk,d_date_sk] + WholeStageCodegen (9) + Sort [cs_item_sk,cs_sold_date_sk] InputAdapter - Exchange [cs_item_sk,d_date_sk] #3 - WholeStageCodegen (10) - Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date] - Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] - SortMergeJoin [cs_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #4 - WholeStageCodegen (4) - Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] - BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + Exchange [cs_item_sk,cs_sold_date_sk] #3 + WholeStageCodegen (8) + Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc] + SortMergeJoin [cs_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (4) + Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (2) - Project [cd_demo_sk] - Filter [cd_marital_status,cd_demo_sk] + BroadcastExchange #5 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (3) - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #8 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] + BroadcastExchange #6 + WholeStageCodegen (2) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - BroadcastExchange #9 - WholeStageCodegen (9) - Project [d_date_sk,d_date,d_week_seq,d_date_sk] - BroadcastHashJoin [d_week_seq,d_week_seq] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (8) - Project [d_date_sk,d_date,d_week_seq] - Filter [d_year,d_date_sk,d_week_seq,d_date] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] - Filter [d_week_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #8 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] InputAdapter WholeStageCodegen (14) - Sort [inv_item_sk,inv_date_sk] + Sort [inv_item_sk,d_date_sk] InputAdapter - Exchange [inv_item_sk,inv_date_sk] #11 + Exchange [inv_item_sk,d_date_sk] #9 WholeStageCodegen (13) - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow + Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [d_date_sk,inv_date_sk] InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastExchange #10 + WholeStageCodegen (11) + Project [d_date_sk,d_date,d_week_seq,d_date_sk] + BroadcastHashJoin [d_week_seq,d_week_seq] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (10) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #12 WholeStageCodegen (12) From 122f8f0fdb0fdc87a5970f4b39938a0496bd4b4b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Jan 2021 07:30:59 +0000 Subject: [PATCH 0962/1009] [SPARK-33919][SQL][TESTS] Unify v1 and v2 SHOW NAMESPACES tests ### What changes were proposed in this pull request? 1. Port DS V2 tests from `DataSourceV2SQLSuite` to the base test suite `ShowNamespacesSuiteBase` to run those tests for v1 catalogs. 2. Port DS v1 tests from `DDLSuite` to `ShowNamespacesSuiteBase` to run the tests for v2 catalogs too. ### Why are the changes needed? To improve test coverage. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running new test suites: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowNamespacesSuite" ``` Closes #30937 from MaxGekk/unify-show-namespaces-tests. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/DDLParserSuite.scala | 34 ----- .../sql/connector/DataSourceV2SQLSuite.scala | 89 ------------ .../sql/execution/command/DDLSuite.scala | 30 ---- .../command/ShowNamespacesParserSuite.scala | 70 ++++++++++ .../command/ShowNamespacesSuiteBase.scala | 131 ++++++++++++++++++ .../command/v1/ShowNamespacesSuite.scala | 60 ++++++++ .../command/v2/ShowNamespacesSuite.scala | 72 ++++++++++ .../command/ShowNamespacesSuite.scala | 43 ++++++ 8 files changed, 376 insertions(+), 153 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 9ec22a982a588..4978a3a6653c4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1807,40 +1807,6 @@ class DDLParserSuite extends AnalysisTest { UnresolvedNamespace(Seq("a", "b", "c")), "/home/user/db")) } - test("show databases: basic") { - comparePlans( - parsePlan("SHOW DATABASES"), - ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None)) - comparePlans( - parsePlan("SHOW DATABASES LIKE 'defau*'"), - ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), Some("defau*"))) - } - - test("show databases: FROM/IN operator is not allowed") { - def verify(sql: String): Unit = { - val exc = intercept[ParseException] { parsePlan(sql) } - assert(exc.getMessage.contains("FROM/IN operator is not allowed in SHOW DATABASES")) - } - - verify("SHOW DATABASES FROM testcat.ns1.ns2") - verify("SHOW DATABASES IN testcat.ns1.ns2") - } - - test("show namespaces") { - comparePlans( - parsePlan("SHOW NAMESPACES"), - ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None)) - comparePlans( - parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"), - ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None)) - comparePlans( - parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"), - ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None)) - comparePlans( - parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"), - ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1")), Some("*pattern*"))) - } - test("analyze table statistics") { comparePlans(parsePlan("analyze table a.b.c compute statistics"), AnalyzeTable( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 0d61306628a44..5c67ad9cdfe2e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1285,95 +1285,6 @@ class DataSourceV2SQLSuite } } - test("ShowNamespaces: show root namespaces with default v2 catalog") { - spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat") - - testShowNamespaces("SHOW NAMESPACES", Seq()) - - spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo") - - testShowNamespaces("SHOW NAMESPACES", Seq("ns1", "ns2")) - testShowNamespaces("SHOW NAMESPACES LIKE '*1*'", Seq("ns1")) - } - - test("ShowNamespaces: show namespaces with v2 catalog") { - spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns1.ns1_2.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo") - spark.sql("CREATE TABLE testcat.ns2.ns2_1.table (id bigint) USING foo") - - // Look up only with catalog name, which should list root namespaces. - testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1", "ns2")) - - // Look up sub-namespaces. - testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns1_1", "ns1.ns1_2")) - testShowNamespaces("SHOW NAMESPACES IN testcat.ns1 LIKE '*2*'", Seq("ns1.ns1_2")) - testShowNamespaces("SHOW NAMESPACES IN testcat.ns2", Seq("ns2.ns2_1")) - - // Try to look up namespaces that do not exist. - testShowNamespaces("SHOW NAMESPACES IN testcat.ns3", Seq()) - testShowNamespaces("SHOW NAMESPACES IN testcat.ns1.ns3", Seq()) - } - - test("ShowNamespaces: default v2 catalog is not set") { - spark.sql("CREATE TABLE testcat.ns.table (id bigint) USING foo") - - // The current catalog is resolved to a v2 session catalog. - testShowNamespaces("SHOW NAMESPACES", Seq("default")) - } - - test("ShowNamespaces: default v2 catalog doesn't support namespace") { - spark.conf.set( - "spark.sql.catalog.testcat_no_namespace", - classOf[BasicInMemoryTableCatalog].getName) - spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namespace") - - val exception = intercept[AnalysisException] { - sql("SHOW NAMESPACES") - } - - assert(exception.getMessage.contains("does not support namespaces")) - } - - test("ShowNamespaces: v2 catalog doesn't support namespace") { - spark.conf.set( - "spark.sql.catalog.testcat_no_namespace", - classOf[BasicInMemoryTableCatalog].getName) - - val exception = intercept[AnalysisException] { - sql("SHOW NAMESPACES in testcat_no_namespace") - } - - assert(exception.getMessage.contains("does not support namespaces")) - } - - test("ShowNamespaces: session catalog is used and namespace doesn't exist") { - val exception = intercept[AnalysisException] { - sql("SHOW NAMESPACES in dummy") - } - - assert(exception.getMessage.contains("Namespace 'dummy' not found")) - } - - test("ShowNamespaces: change catalog and namespace with USE statements") { - sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo") - - // Initially, the current catalog is a v2 session catalog. - testShowNamespaces("SHOW NAMESPACES", Seq("default")) - - // Update the current catalog to 'testcat'. - sql("USE testcat") - testShowNamespaces("SHOW NAMESPACES", Seq("ns1")) - - // Update the current namespace to 'ns1'. - sql("USE ns1") - // 'SHOW NAMESPACES' is not affected by the current namespace and lists root namespaces. - testShowNamespaces("SHOW NAMESPACES", Seq("ns1")) - } - private def testShowNamespaces( sqlText: String, expected: Seq[String]): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 4e2b67e532933..946e8412cfa7a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1277,36 +1277,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assertUnsupported("ALTER VIEW dbx.tab1 DROP IF EXISTS PARTITION (b='2')") } - - test("show databases") { - sql("CREATE DATABASE showdb2B") - sql("CREATE DATABASE showdb1A") - - // check the result as well as its order - checkDataset(sql("SHOW DATABASES"), Row("default"), Row("showdb1a"), Row("showdb2b")) - - checkAnswer( - sql("SHOW DATABASES LIKE '*db1A'"), - Row("showdb1a") :: Nil) - - checkAnswer( - sql("SHOW DATABASES '*db1A'"), - Row("showdb1a") :: Nil) - - checkAnswer( - sql("SHOW DATABASES LIKE 'showdb1A'"), - Row("showdb1a") :: Nil) - - checkAnswer( - sql("SHOW DATABASES LIKE '*db1A|*db2B'"), - Row("showdb1a") :: - Row("showdb2b") :: Nil) - - checkAnswer( - sql("SHOW DATABASES LIKE 'non-existentdb'"), - Nil) - } - test("drop view - temporary view") { val catalog = spark.sessionState.catalog sql( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala new file mode 100644 index 0000000000000..c9e5d33fea87a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.ShowNamespaces +import org.apache.spark.sql.test.SharedSparkSession + +class ShowNamespacesParserSuite extends AnalysisTest with SharedSparkSession { + test("all namespaces") { + Seq("SHOW NAMESPACES", "SHOW DATABASES").foreach { sqlCmd => + comparePlans( + parsePlan(sqlCmd), + ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None)) + } + } + + test("basic pattern") { + Seq( + "SHOW DATABASES LIKE 'defau*'", + "SHOW NAMESPACES LIKE 'defau*'").foreach { sqlCmd => + comparePlans( + parsePlan(sqlCmd), + ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), Some("defau*"))) + } + } + + test("FROM/IN operator is not allowed by SHOW DATABASES") { + Seq( + "SHOW DATABASES FROM testcat.ns1.ns2", + "SHOW DATABASES IN testcat.ns1.ns2").foreach { sqlCmd => + val errMsg = intercept[ParseException] { + parsePlan(sqlCmd) + }.getMessage + assert(errMsg.contains("FROM/IN operator is not allowed in SHOW DATABASES")) + } + } + + test("show namespaces in/from a namespace") { + comparePlans( + parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"), + ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None)) + comparePlans( + parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"), + ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None)) + } + + test("namespaces by a pattern from another namespace") { + comparePlans( + parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"), + ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1")), Some("*pattern*"))) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala new file mode 100644 index 0000000000000..790489e0d47ce --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{StringType, StructType} + +/** + * This base suite contains unified tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands + * that check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are + * located in more specific test suites: + * + * - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.ShowNamespacesSuite` + * - V1 table catalog tests: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuiteBase` + * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuite` + * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowNamespacesSuite` + */ +trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils { + override val command = "SHOW NAMESPACES" + + protected def runShowNamespacesSql(sqlText: String, expected: Seq[String]): Unit = { + val df = spark.sql(sqlText) + assert(df.schema === new StructType().add("namespace", StringType, false)) + checkAnswer(df, expected.map(Row(_))) + } + + protected def builtinTopNamespaces: Seq[String] = Seq.empty + + test("default namespace") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { + runShowNamespacesSql("SHOW NAMESPACES", builtinTopNamespaces) + } + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog", builtinTopNamespaces) + } + + test("at the top level") { + withNamespace(s"$catalog.ns1", s"$catalog.ns2") { + sql(s"CREATE DATABASE $catalog.ns1") + sql(s"CREATE NAMESPACE $catalog.ns2") + + runShowNamespacesSql( + s"SHOW NAMESPACES IN $catalog", + Seq("ns1", "ns2") ++ builtinTopNamespaces) + } + } + + test("exact matching") { + withNamespace(s"$catalog.ns1", s"$catalog.ns2") { + sql(s"CREATE NAMESPACE $catalog.ns1") + sql(s"CREATE NAMESPACE $catalog.ns2") + Seq( + s"SHOW NAMESPACES IN $catalog LIKE 'ns2'", + s"SHOW NAMESPACES IN $catalog 'ns2'", + s"SHOW NAMESPACES FROM $catalog LIKE 'ns2'", + s"SHOW NAMESPACES FROM $catalog 'ns2'").foreach { sqlCmd => + withClue(sqlCmd) { + runShowNamespacesSql(sqlCmd, Seq("ns2")) + } + } + } + } + + test("does not match to any namespace") { + Seq( + "SHOW DATABASES LIKE 'non-existentdb'", + "SHOW NAMESPACES 'non-existentdb'").foreach { sqlCmd => + runShowNamespacesSql(sqlCmd, Seq.empty) + } + } + + test("show root namespaces with the default catalog") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { + runShowNamespacesSql("SHOW NAMESPACES", builtinTopNamespaces) + + withNamespace("ns1", "ns2") { + sql(s"CREATE NAMESPACE ns1") + sql(s"CREATE NAMESPACE ns2") + + runShowNamespacesSql("SHOW NAMESPACES", Seq("ns1", "ns2") ++ builtinTopNamespaces) + runShowNamespacesSql("SHOW NAMESPACES LIKE '*1*'", Seq("ns1")) + } + } + } + + test("complex namespace patterns") { + withNamespace(s"$catalog.showdb2b", s"$catalog.showdb1a") { + sql(s"CREATE NAMESPACE $catalog.showdb2b") + sql(s"CREATE NAMESPACE $catalog.showdb1a") + + Seq( + "'*db1A'" -> Seq("showdb1a"), + "'*2*'" -> Seq("showdb2b"), + "'*db1A|*db2B'" -> Seq("showdb1a", "showdb2b") + ).foreach { case (pattern, expected) => + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE $pattern", expected) + } + } + } + + test("change catalog and namespace with USE statements") { + try { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + sql(s"USE $catalog") + runShowNamespacesSql("SHOW NAMESPACES", Seq("ns") ++ builtinTopNamespaces) + + sql("USE ns") + // 'SHOW NAMESPACES' is not affected by the current namespace and lists root namespaces. + runShowNamespacesSql("SHOW NAMESPACES", Seq("ns") ++ builtinTopNamespaces) + } + } finally { + spark.sessionState.catalogManager.reset() + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala new file mode 100644 index 0000000000000..fd76ef2490f35 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v1 + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf + +/** + * This base suite contains unified tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands + * that check V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more + * specific test suites: + * + * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuite` + * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowNamespacesSuite` + */ +trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase { + override protected def builtinTopNamespaces: Seq[String] = Seq("default") + + test("IN namespace doesn't exist") { + val errMsg = intercept[AnalysisException] { + sql("SHOW NAMESPACES in dummy") + }.getMessage + assert(errMsg.contains("Namespace 'dummy' not found")) + } +} + +class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase { + test("case sensitivity") { + Seq(true, false).foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + withNamespace(s"$catalog.AAA", s"$catalog.bbb") { + sql(s"CREATE NAMESPACE $catalog.AAA") + sql(s"CREATE NAMESPACE $catalog.bbb") + val expected = if (caseSensitive) "AAA" else "aaa" + runShowNamespacesSql( + s"SHOW NAMESPACES IN $catalog", + Seq(expected, "bbb") ++ builtinTopNamespaces) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq(expected)) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq(expected)) + } + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala new file mode 100644 index 0000000000000..7a2c136eeada4 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connector.BasicInMemoryTableCatalog +import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf + +/** + * The class contains tests for the `SHOW NAMESPACES` command to check V2 table catalogs. + */ +class ShowNamespacesSuite extends command.ShowNamespacesSuiteBase with CommandSuiteBase { + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.testcat_no_namespace", classOf[BasicInMemoryTableCatalog].getName) + + test("IN namespace doesn't exist") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { + runShowNamespacesSql("SHOW NAMESPACES in dummy", Seq.empty) + } + runShowNamespacesSql(s"SHOW NAMESPACES in $catalog.ns1", Seq.empty) + runShowNamespacesSql(s"SHOW NAMESPACES in $catalog.ns1.ns3", Seq.empty) + } + + test("default v2 catalog doesn't support namespace") { + withSQLConf(SQLConf.DEFAULT_CATALOG.key -> "testcat_no_namespace") { + val errMsg = intercept[AnalysisException] { + sql("SHOW NAMESPACES") + }.getMessage + assert(errMsg.contains("does not support namespaces")) + } + } + + test("v2 catalog doesn't support namespace") { + val errMsg = intercept[AnalysisException] { + sql("SHOW NAMESPACES in testcat_no_namespace") + }.getMessage + assert(errMsg.contains("does not support namespaces")) + } + + test("case sensitivity") { + Seq(true, false).foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + withNamespace(s"$catalog.AAA", s"$catalog.bbb") { + sql(s"CREATE NAMESPACE $catalog.AAA") + sql(s"CREATE NAMESPACE $catalog.bbb") + runShowNamespacesSql( + s"SHOW NAMESPACES IN $catalog", + Seq("AAA", "bbb") ++ builtinTopNamespaces) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq("AAA")) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq("AAA")) + } + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala new file mode 100644 index 0000000000000..eba2569c07736 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution.command + +import org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.internal.SQLConf + +/** + * The class contains tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands to check + * V1 Hive external table catalog. + */ +class ShowNamespacesSuite extends v1.ShowNamespacesSuiteBase with CommandSuiteBase { + test("case sensitivity") { + Seq(true, false).foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + withNamespace(s"$catalog.AAA", s"$catalog.bbb") { + sql(s"CREATE NAMESPACE $catalog.AAA") + sql(s"CREATE NAMESPACE $catalog.bbb") + runShowNamespacesSql( + s"SHOW NAMESPACES IN $catalog", + Seq("aaa", "bbb") ++ builtinTopNamespaces) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq("aaa")) + runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq("aaa")) + } + } + } + } +} From 356fdc9a7fc88fd07751c40b920043eaebeb0abf Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 5 Jan 2021 17:20:08 +0900 Subject: [PATCH 0963/1009] [SPARK-34007][BUILD] Downgrade scala-maven-plugin to 4.3.0 ### What changes were proposed in this pull request? This PR is a partial revert of https://github.com/apache/spark/pull/30456 by downgrading scala-maven-plugin from 4.4.0 to 4.3.0. Currently, when you run the docker release script (`./dev/create-release/do-release-docker.sh`), it fails to compile as below during incremental compilation with zinc for an unknown reason: ``` [INFO] Compiling 21 Scala sources and 3 Java sources to /opt/spark-rm/output/spark-3.1.0-bin-hadoop2.7/resource-managers/yarn/target/scala-2.12/test-classes ... [ERROR] ## Exception when compiling 24 sources to /opt/spark-rm/output/spark-3.1.0-bin-hadoop2.7/resource-managers/yarn/target/scala-2.12/test-classes java.lang.SecurityException: class "javax.servlet.SessionCookieConfig"'s signer information does not match signer information of other classes in the same package java.lang.ClassLoader.checkCerts(ClassLoader.java:891) java.lang.ClassLoader.preDefineClass(ClassLoader.java:661) java.lang.ClassLoader.defineClass(ClassLoader.java:754) java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) java.net.URLClassLoader.defineClass(URLClassLoader.java:468) java.net.URLClassLoader.access$100(URLClassLoader.java:74) java.net.URLClassLoader$1.run(URLClassLoader.java:369) java.net.URLClassLoader$1.run(URLClassLoader.java:363) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader.findClass(URLClassLoader.java:362) java.lang.ClassLoader.loadClass(ClassLoader.java:418) java.lang.ClassLoader.loadClass(ClassLoader.java:351) java.lang.Class.getDeclaredMethods0(Native Method) java.lang.Class.privateGetDeclaredMethods(Class.java:2701) java.lang.Class.privateGetPublicMethods(Class.java:2902) java.lang.Class.getMethods(Class.java:1615) sbt.internal.inc.ClassToAPI$.toDefinitions0(ClassToAPI.scala:170) sbt.internal.inc.ClassToAPI$.$anonfun$toDefinitions$1(ClassToAPI.scala:123) scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86) sbt.internal.inc.ClassToAPI$.toDefinitions(ClassToAPI.scala:123) sbt.internal.inc.ClassToAPI$.$anonfun$process$1(ClassToAPI.scala:3 ``` This happens when it builds Spark with Hadoop 2. It doesn't reproduce when you build this alone. It should follow the sequence of build in the release script. This is fixed by downgrading. Looks like there is a regression in scala-maven-plugin somewhere between 4.4.0 and 4.3.0. ### Why are the changes needed? To unblock the release. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? It can be tested as below: ```bash ./dev/create-release/do-release-docker.sh -d $WORKING_DIR ``` Closes #31031 from HyukjinKwon/SPARK-34007. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 91ca0398a076e..1282d3cd8fd2b 100644 --- a/pom.xml +++ b/pom.xml @@ -2468,7 +2468,7 @@ net.alchim31.maven scala-maven-plugin - 4.4.0 + 4.3.0 eclipse-add-source From 329850c667305053e4433c4c6da0e47b231302d4 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 5 Jan 2021 17:21:32 +0900 Subject: [PATCH 0964/1009] [SPARK-32017][PYTHON][FOLLOW-UP] Rename HADOOP_VERSION to PYSPARK_HADOOP_VERSION in pip installation option ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/29703. It renames `HADOOP_VERSION` environment variable to `PYSPARK_HADOOP_VERSION` in case `HADOOP_VERSION` is already being used somewhere. Arguably `HADOOP_VERSION` is a pretty common name. I see here and there: - https://www.ibm.com/support/knowledgecenter/SSZUMP_7.2.1/install_grid_sym/understanding_advanced_edition.html - https://cwiki.apache.org/confluence/display/ARROW/HDFS+Filesystem+Support - http://crs4.github.io/pydoop/_pydoop1/installation.html ### Why are the changes needed? To avoid the environment variables is unexpectedly conflicted. ### Does this PR introduce _any_ user-facing change? It renames the environment variable but it's not released yet. ### How was this patch tested? Existing unittests will test. Closes #31028 from HyukjinKwon/SPARK-32017-followup. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/docs/source/getting_started/install.rst | 10 +++++----- python/pyspark/find_spark_home.py | 2 +- python/setup.py | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index a90f5fe159553..c5485424da664 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -48,11 +48,11 @@ If you want to install extra dependencies for a specific component, you can inst pip install pyspark[sql] -For PySpark with/without a specific Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below: +For PySpark with/without a specific Hadoop version, you can install it by using ``PYSPARK_HADOOP_VERSION`` environment variables as below: .. code-block:: bash - HADOOP_VERSION=2.7 pip install pyspark + PYSPARK_HADOOP_VERSION=2.7 pip install pyspark The default distribution uses Hadoop 3.2 and Hive 2.3. If users specify different versions of Hadoop, the pip installation automatically downloads a different version and use it in PySpark. Downloading it can take a while depending on @@ -60,15 +60,15 @@ the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manu .. code-block:: bash - PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org HADOOP_VERSION=2.7 pip install + PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org PYSPARK_HADOOP_VERSION=2.7 pip install It is recommended to use ``-v`` option in ``pip`` to track the installation and download status. .. code-block:: bash - HADOOP_VERSION=2.7 pip install pyspark -v + PYSPARK_HADOOP_VERSION=2.7 pip install pyspark -v -Supported values in ``HADOOP_VERSION`` are: +Supported values in ``PYSPARK_HADOOP_VERSION`` are: - ``without``: Spark pre-built with user-provided Apache Hadoop - ``2.7``: Spark pre-built for Apache Hadoop 2.7 diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py index 4521a36503a16..62a36d42ebc72 100755 --- a/python/pyspark/find_spark_home.py +++ b/python/pyspark/find_spark_home.py @@ -36,7 +36,7 @@ def is_spark_home(path): (os.path.isdir(os.path.join(path, "jars")) or os.path.isdir(os.path.join(path, "assembly")))) - # Spark distribution can be downloaded when HADOOP_VERSION environment variable is set. + # Spark distribution can be downloaded when PYSPARK_HADOOP_VERSION environment variable is set. # We should look up this directory first, see also SPARK-32017. spark_dist_dir = "spark-distribution" paths = [ diff --git a/python/setup.py b/python/setup.py index 7bb8a00171d37..c7f195b89aa7a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -125,16 +125,16 @@ def run(self): spark_dist = os.path.join(self.install_lib, "pyspark", "spark-distribution") rmtree(spark_dist, ignore_errors=True) - if ("HADOOP_VERSION" in os.environ) or ("HIVE_VERSION" in os.environ): - # Note that SPARK_VERSION environment is just a testing purpose. - # HIVE_VERSION environment variable is also internal for now in case + if ("PYSPARK_HADOOP_VERSION" in os.environ) or ("PYSPARK_HIVE_VERSION" in os.environ): + # Note that PYSPARK_VERSION environment is just a testing purpose. + # PYSPARK_HIVE_VERSION environment variable is also internal for now in case # we support another version of Hive in the future. spark_version, hadoop_version, hive_version = install_module.checked_versions( - os.environ.get("SPARK_VERSION", VERSION).lower(), - os.environ.get("HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(), - os.environ.get("HIVE_VERSION", install_module.DEFAULT_HIVE).lower()) + os.environ.get("PYSPARK_VERSION", VERSION).lower(), + os.environ.get("PYSPARK_HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(), + os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower()) - if ("SPARK_VERSION" not in os.environ and + if ("PYSPARK_VERSION" not in os.environ and ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) == (hadoop_version, hive_version))): # Do not download and install if they are same as default. From acf0a4fac2983a89c663d1622bf03a2e5929d121 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 5 Jan 2021 19:03:28 +0900 Subject: [PATCH 0965/1009] [SPARK-33999][BUILD] Make sbt unidoc success with JDK11 ### What changes were proposed in this pull request? This PR fixes an issue that `sbt unidoc` fails with JDK11. With the current master, `sbt unidoc` fails because the generated Java sources cause syntax error. As of JDK11, the default doclet seems to refuse such syntax error. Usually, it's enough to specify `--ignore-source-errors` option when `javadoc` runs to suppress the syntax error but unfortunately, we will then get an internal error. ``` [error] javadoc: error - An internal exception has occurred. [error] (java.lang.NullPointerException) [error] Please file a bug against the javadoc tool via the Java bug reporting page [error] (http://bugreport.java.com) after checking the Bug Database (http://bugs.java.com) [error] for duplicates. Include error messages and the following diagnostic in your report. Thank you. [error] java.lang.NullPointerException [error] at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2340) [error] at jdk.compiler/com.sun.tools.javac.code.Types$14.visitTypeVar(Types.java:2398) [error] at jdk.compiler/com.sun.tools.javac.code.Types$14.visitTypeVar(Types.java:2348) [error] at jdk.compiler/com.sun.tools.javac.code.Type$TypeVar.accept(Type.java:1659) [error] at jdk.compiler/com.sun.tools.javac.code.Types$DefaultTypeVisitor.visit(Types.java:4857) [error] at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2343) [error] at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2329) [error] at jdk.compiler/com.sun.tools.javac.model.JavacTypes.erasure(JavacTypes.java:134) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils$5.visitTypeVariable(Utils.java:1069) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils$5.visitTypeVariable(Utils.java:1048) [error] at jdk.compiler/com.sun.tools.javac.code.Type$TypeVar.accept(Type.java:1695) [error] at java.compiler11.0.9.1/javax.lang.model.util.AbstractTypeVisitor6.visit(AbstractTypeVisitor6.java:104) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils.asTypeElement(Utils.java:1086) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkInfoImpl.setContext(LinkInfoImpl.java:410) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkInfoImpl.(LinkInfoImpl.java:285) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkFactoryImpl.getTypeParameterLink(LinkFactoryImpl.java:184) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkFactoryImpl.getTypeParameterLinks(LinkFactoryImpl.java:167) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.links.LinkFactory.getLink(LinkFactory.java:196) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.getLink(HtmlDocletWriter.java:679) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.addPreQualifiedClassLink(HtmlDocletWriter.java:814) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.addPreQualifiedStrongClassLink(HtmlDocletWriter.java:839) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addPartialInfo(AbstractTreeWriter.java:185) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addLevelInfo(AbstractTreeWriter.java:92) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addLevelInfo(AbstractTreeWriter.java:94) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addTree(AbstractTreeWriter.java:129) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addTree(AbstractTreeWriter.java:112) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.PackageTreeWriter.generatePackageTreeFile(PackageTreeWriter.java:115) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.PackageTreeWriter.generate(PackageTreeWriter.java:92) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDoclet.generatePackageFiles(HtmlDoclet.java:312) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.AbstractDoclet.startGeneration(AbstractDoclet.java:210) [error] at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.AbstractDoclet.run(AbstractDoclet.java:114) [error] at jdk.javadoc/jdk.javadoc.doclet.StandardDoclet.run(StandardDoclet.java:72) [error] at jdk.javadoc/jdk.javadoc.internal.tool.Start.parseAndExecute(Start.java:588) [error] at jdk.javadoc/jdk.javadoc.internal.tool.Start.begin(Start.java:432) [error] at jdk.javadoc/jdk.javadoc.internal.tool.Start.begin(Start.java:345) [error] at jdk.javadoc/jdk.javadoc.internal.tool.Main.execute(Main.java:63) [error] at jdk.javadoc/jdk.javadoc.internal.tool.Main.main(Main.java:52) ``` I found the internal error happens when a generated Java class is from a Scala class which is package private and generic. I also found that if we don't generate class hierarchy tree in the JavaDoc, we can suppress the internal error for JDK11 and later. ### Why are the changes needed? Make the build success with sbt and JDK11. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I confirmed the following command successfully finish with JDK8 and JDK11. ``` $ build/sbt -Phive -Phive-thriftserver -Pyarn -Pkubernetes -Pmesos -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud clean unidoc ``` I also confirmed html files are successfully generated under `target/javaunidoc`. Closes #31023 from sarutak/fix-genjavadoc-java11. Authored-by: Kousuke Saruta Signed-off-by: HyukjinKwon --- project/SparkBuild.scala | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index aa3e2cd65e185..668701be0ae98 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -958,18 +958,24 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hadoop"))) }, - javacOptions in (JavaUnidoc, unidoc) := Seq( - "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", - "-public", - "-noqualifier", "java.lang", - "-tag", """example:a:Example\:""", - "-tag", """note:a:Note\:""", - "-tag", "group:X", - "-tag", "tparam:X", - "-tag", "constructor:X", - "-tag", "todo:X", - "-tag", "groupname:X" - ), + javacOptions in (JavaUnidoc, unidoc) := { + val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3) + var major = versionParts(0).toInt + if (major == 1) major = versionParts(1).toInt + + Seq( + "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-public", + "-noqualifier", "java.lang", + "-tag", """example:a:Example\:""", + "-tag", """note:a:Note\:""", + "-tag", "group:X", + "-tag", "tparam:X", + "-tag", "constructor:X", + "-tag", "todo:X", + "-tag", "groupname:X", + ) ++ { if (major >= 9) Seq("--ignore-source-errors", "-notree") else Seq.empty } + }, // Use GitHub repository for Scaladoc source links unidocSourceBase := s"https://github.com/apache/spark/tree/v${version.value}", From 8d09f9649510bf5d812c82b04f7711b9252a7db0 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 5 Jan 2021 19:48:10 +0900 Subject: [PATCH 0966/1009] [SPARK-34010][SQL][DODCS] Use python3 instead of python in SQL documentation build ### What changes were proposed in this pull request? This PR proposes to use python3 instead of python in SQL documentation build. After SPARK-29672, we use `sql/create-docs.sh` everywhere in Spark dev. We should fix it in `sql/create-docs.sh` too. This blocks release because the release container does not have `python` but only `python3`. ### Why are the changes needed? To unblock the release. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? I manually ran the script Closes #31041 from HyukjinKwon/SPARK-34010. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- sql/create-docs.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 6614c714e90c7..8721df874ee73 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -27,14 +27,14 @@ set -e FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)" SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)" -if ! hash python 2>/dev/null; then - echo "Missing python in your path, skipping SQL documentation generation." +if ! hash python3 2>/dev/null; then + echo "Missing python3 in your path, skipping SQL documentation generation." exit 0 fi if ! hash mkdocs 2>/dev/null; then echo "Missing mkdocs in your path, trying to install mkdocs for SQL documentation generation." - pip install mkdocs + pip3 install mkdocs fi pushd "$FWDIR" > /dev/null From 14c2edae7e8e02e18a24862a6c113b02719d4785 Mon Sep 17 00:00:00 2001 From: huangtianhua Date: Tue, 5 Jan 2021 21:50:21 +0900 Subject: [PATCH 0967/1009] [SPARK-34009][BUILD] To activate profile 'aarch64' based on OS settings Instead of taking parameter '-Paarch64' when maven build to activate the profile based on OS settings automatically, than we can use same command to build on aarch64. ### What changes were proposed in this pull request? Activate profile 'aarch64' based on OS ### Why are the changes needed? After this change, we build spark using the same command for aarch64 as x86. ### Does this PR introduce _any_ user-facing change? No. After this change, no need to taking parameter '-Paarch64' when build, but take the parameter works also. ### How was this patch tested? ARM daily CI. Closes #31036 from huangtianhua/SPARK-34009. Authored-by: huangtianhua Signed-off-by: HyukjinKwon --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 1282d3cd8fd2b..07c18f78e0735 100644 --- a/pom.xml +++ b/pom.xml @@ -3371,6 +3371,12 @@ org.openlabtesting.leveldbjni + + + linux + aarch64 + +
      From cc1d9d25fb4c2e4af912d6f9802de8f351c32deb Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 5 Jan 2021 16:15:33 +0000 Subject: [PATCH 0968/1009] [SPARK-33542][SQL] Group exception messages in catalyst/catalog ### What changes were proposed in this pull request? This PR group exception messages in `/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog`. ### Why are the changes needed? It will largely help with standardization of error messages and its maintenance. ### Does this PR introduce _any_ user-facing change? No. Error messages remain unchanged. ### How was this patch tested? No new tests - pass all original tests to make sure it doesn't break any existing behavior. Closes #30870 from beliefer/SPARK-33542. Lead-authored-by: gengjiaan Co-authored-by: Jiaan Geng Co-authored-by: beliefer Signed-off-by: Wenchen Fan --- .../spark/sql/QueryCompilationErrors.scala | 170 +++++++++++++++++- .../spark/sql/QueryExecutionErrors.scala | 56 ++++++ .../catalog/GlobalTempViewManager.scala | 5 +- .../catalyst/catalog/InMemoryCatalog.scala | 42 ++--- .../sql/catalyst/catalog/SessionCatalog.scala | 78 ++++---- .../catalyst/catalog/functionResources.scala | 4 +- .../sql/catalyst/catalog/interface.scala | 45 ++--- 7 files changed, 295 insertions(+), 105 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala index ff4c54df96f31..ed18e94f46ecc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.errors +import org.apache.hadoop.fs.Path + import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedView} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SerdeInfo} @@ -364,10 +366,16 @@ object QueryCompilationErrors { new AnalysisException(s"undefined function $name") } + def invalidFunctionArgumentsError( + name: String, expectedInfo: String, actualNumber: Int): Throwable = { + new AnalysisException(s"Invalid number of arguments for function $name. " + + s"Expected: $expectedInfo; Found: $actualNumber") + } + def invalidFunctionArgumentNumberError( validParametersCount: Seq[Int], name: String, params: Seq[Class[Expression]]): Throwable = { - val invalidArgumentsMsg = if (validParametersCount.length == 0) { - s"Invalid arguments for function $name" + if (validParametersCount.length == 0) { + new AnalysisException(s"Invalid arguments for function $name") } else { val expectedNumberOfParameters = if (validParametersCount.length == 1) { validParametersCount.head.toString @@ -375,10 +383,8 @@ object QueryCompilationErrors { validParametersCount.init.mkString("one of ", ", ", " and ") + validParametersCount.last } - s"Invalid number of arguments for function $name. " + - s"Expected: $expectedNumberOfParameters; Found: ${params.length}" + invalidFunctionArgumentsError(name, expectedNumberOfParameters, params.length) } - new AnalysisException(invalidArgumentsMsg) } def functionAcceptsOnlyOneArgumentError(name: String): Throwable = { @@ -504,4 +510,156 @@ object QueryCompilationErrors { def columnDoesNotExistError(colName: String): Throwable = { new AnalysisException(s"Column $colName does not exist") } + + def renameTempViewToExistingViewError(oldName: String, newName: String): Throwable = { + new AnalysisException( + s"rename temporary view from '$oldName' to '$newName': destination view already exists") + } + + def databaseNotEmptyError(db: String, details: String): Throwable = { + new AnalysisException(s"Database $db is not empty. One or more $details exist.") + } + + def invalidNameForTableOrDatabaseError(name: String): Throwable = { + new AnalysisException(s"`$name` is not a valid name for tables/databases. " + + "Valid names only contain alphabet characters, numbers and _.") + } + + def cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(database: String): Throwable = { + new AnalysisException(s"$database is a system preserved database, " + + "you cannot create a database with this name.") + } + + def cannotDropDefaultDatabaseError(): Throwable = { + new AnalysisException("Can not drop default database") + } + + def cannotUsePreservedDatabaseAsCurrentDatabaseError(database: String): Throwable = { + new AnalysisException(s"$database is a system preserved database, you cannot use it as " + + "current database. To access global temporary views, you should use qualified name with " + + s"the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM $database.viewName.") + } + + def createExternalTableWithoutLocationError(): Throwable = { + new AnalysisException("CREATE EXTERNAL TABLE must be accompanied by LOCATION") + } + + def cannotOperateManagedTableWithExistingLocationError( + methodName: String, tableIdentifier: TableIdentifier, tableLocation: Path): Throwable = { + new AnalysisException(s"Can not $methodName the managed table('$tableIdentifier')" + + s". The associated location('${tableLocation.toString}') already exists.") + } + + def dropNonExistentColumnsNotSupportedError( + nonExistentColumnNames: Seq[String]): Throwable = { + new AnalysisException( + s""" + |Some existing schema fields (${nonExistentColumnNames.mkString("[", ",", "]")}) are + |not present in the new schema. We don't support dropping columns yet. + """.stripMargin) + } + + def cannotRetrieveTableOrViewNotInSameDatabaseError( + qualifiedTableNames: Seq[QualifiedTableName]): Throwable = { + new AnalysisException("Only the tables/views belong to the same database can be retrieved. " + + s"Querying tables/views are $qualifiedTableNames") + } + + def renameTableSourceAndDestinationMismatchError(db: String, newDb: String): Throwable = { + new AnalysisException( + s"RENAME TABLE source and destination databases do not match: '$db' != '$newDb'") + } + + def cannotRenameTempViewWithDatabaseSpecifiedError( + oldName: TableIdentifier, newName: TableIdentifier): Throwable = { + new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': cannot " + + s"specify database name '${newName.database.get}' in the destination table") + } + + def cannotRenameTempViewToExistingTableError( + oldName: TableIdentifier, newName: TableIdentifier): Throwable = { + new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': " + + "destination table already exists") + } + + def invalidPartitionSpecError(details: String): Throwable = { + new AnalysisException(s"Partition spec is invalid. $details") + } + + def functionAlreadyExistsError(func: FunctionIdentifier): Throwable = { + new AnalysisException(s"Function $func already exists") + } + + def cannotLoadClassWhenRegisteringFunctionError( + className: String, func: FunctionIdentifier): Throwable = { + new AnalysisException(s"Can not load class '$className' when registering " + + s"the function '$func', please make sure it is on the classpath") + } + + def v2CatalogNotSupportFunctionError( + catalog: String, namespace: Seq[String]): Throwable = { + new AnalysisException("V2 catalog does not support functions yet. " + + s"catalog: $catalog, namespace: '${namespace.quoted}'") + } + + def resourceTypeNotSupportedError(resourceType: String): Throwable = { + new AnalysisException(s"Resource Type '$resourceType' is not supported.") + } + + def tableNotSpecifyDatabaseError(identifier: TableIdentifier): Throwable = { + new AnalysisException(s"table $identifier did not specify database") + } + + def tableNotSpecifyLocationUriError(identifier: TableIdentifier): Throwable = { + new AnalysisException(s"table $identifier did not specify locationUri") + } + + def partitionNotSpecifyLocationUriError(specString: String): Throwable = { + new AnalysisException(s"Partition [$specString] did not specify locationUri") + } + + def invalidBucketNumberError(bucketingMaxBuckets: Int, numBuckets: Int): Throwable = { + new AnalysisException( + s"Number of buckets should be greater than 0 but less than or equal to " + + s"bucketing.maxBuckets (`$bucketingMaxBuckets`). Got `$numBuckets`") + } + + def corruptedTableNameContextInCatalogError(numParts: Int, index: Int): Throwable = { + new AnalysisException("Corrupted table name context in catalog: " + + s"$numParts parts expected, but part $index is missing.") + } + + def corruptedViewSQLConfigsInCatalogError(e: Exception): Throwable = { + new AnalysisException("Corrupted view SQL configs in catalog", cause = Some(e)) + } + + def corruptedViewQueryOutputColumnsInCatalogError(numCols: String, index: Int): Throwable = { + new AnalysisException("Corrupted view query output column names in catalog: " + + s"$numCols parts expected, but part $index is missing.") + } + + def corruptedViewReferredTempViewInCatalogError(e: Exception): Throwable = { + new AnalysisException("corrupted view referred temp view names in catalog", cause = Some(e)) + } + + def corruptedViewReferredTempFunctionsInCatalogError(e: Exception): Throwable = { + new AnalysisException( + "corrupted view referred temp functions names in catalog", cause = Some(e)) + } + + def columnStatisticsDeserializationNotSupportedError( + name: String, dataType: DataType): Throwable = { + new AnalysisException("Column statistics deserialization is not supported for " + + s"column $name of data type: $dataType.") + } + + def columnStatisticsSerializationNotSupportedError( + colName: String, dataType: DataType): Throwable = { + new AnalysisException("Column statistics serialization is not supported for " + + s"column $colName of data type: $dataType.") + } + + def cannotReadCorruptedTablePropertyError(key: String, details: String = ""): Throwable = { + new AnalysisException(s"Cannot read table property '$key' as it's corrupted.$details") + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala index d24e61c699241..61dcddb979a1b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala @@ -17,7 +17,13 @@ package org.apache.spark.sql.errors +import java.io.IOException + +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator +import org.apache.spark.sql.catalyst.catalog.CatalogDatabase import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} /** @@ -56,4 +62,54 @@ object QueryExecutionErrors { def cannotTerminateGeneratorError(generator: UnresolvedGenerator): Throwable = { new UnsupportedOperationException(s"Cannot terminate expression: $generator") } + + def unableToCreateDatabaseAsFailedToCreateDirectoryError( + dbDefinition: CatalogDatabase, e: IOException): Throwable = { + new SparkException(s"Unable to create database ${dbDefinition.name} as failed " + + s"to create its directory ${dbDefinition.locationUri}", e) + } + + def unableToDropDatabaseAsFailedToDeleteDirectoryError( + dbDefinition: CatalogDatabase, e: IOException): Throwable = { + new SparkException(s"Unable to drop database ${dbDefinition.name} as failed " + + s"to delete its directory ${dbDefinition.locationUri}", e) + } + + def unableToCreateTableAsFailedToCreateDirectoryError( + table: String, defaultTableLocation: Path, e: IOException): Throwable = { + new SparkException(s"Unable to create table $table as failed " + + s"to create its directory $defaultTableLocation", e) + } + + def unableToDeletePartitionPathError(partitionPath: Path, e: IOException): Throwable = { + new SparkException(s"Unable to delete partition path $partitionPath", e) + } + + def unableToDropTableAsFailedToDeleteDirectoryError( + table: String, dir: Path, e: IOException): Throwable = { + new SparkException(s"Unable to drop table $table as failed " + + s"to delete its directory $dir", e) + } + + def unableToRenameTableAsFailedToRenameDirectoryError( + oldName: String, newName: String, oldDir: Path, e: IOException): Throwable = { + new SparkException(s"Unable to rename table $oldName to $newName as failed " + + s"to rename its directory $oldDir", e) + } + + def unableToCreatePartitionPathError(partitionPath: Path, e: IOException): Throwable = { + new SparkException(s"Unable to create partition path $partitionPath", e) + } + + def unableToRenamePartitionPathError(oldPartPath: Path, e: IOException): Throwable = { + new SparkException(s"Unable to rename partition path $oldPartPath", e) + } + + def methodNotImplementedError(methodName: String): Throwable = { + new UnsupportedOperationException(s"$methodName is not implemented") + } + + def tableStatsNotSpecifiedError(): Throwable = { + new IllegalStateException("table stats must be specified.") + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala index 6095ac0bc9c50..c7bd2a4cd800d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala @@ -21,10 +21,10 @@ import javax.annotation.concurrent.GuardedBy import scala.collection.mutable -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.errors.QueryCompilationErrors /** @@ -92,8 +92,7 @@ class GlobalTempViewManager(val database: String) { def rename(oldName: String, newName: String): Boolean = synchronized { if (viewDefinitions.contains(oldName)) { if (viewDefinitions.contains(newName)) { - throw new AnalysisException( - s"rename temporary view from '$oldName' to '$newName': destination view already exists") + throw QueryCompilationErrors.renameTempViewToExistingViewError(oldName, newName) } val viewDefinition = viewDefinitions(oldName) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 31644a5ae4e35..64b4a112fe786 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -24,13 +24,13 @@ import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.sql.AnalysisException +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.StructType /** @@ -112,8 +112,8 @@ class InMemoryCatalog( fs.mkdirs(location) } catch { case e: IOException => - throw new SparkException(s"Unable to create database ${dbDefinition.name} as failed " + - s"to create its directory ${dbDefinition.locationUri}", e) + throw QueryExecutionErrors.unableToCreateDatabaseAsFailedToCreateDirectoryError( + dbDefinition, e) } catalog.put(dbDefinition.name, new DatabaseDesc(dbDefinition)) } @@ -127,10 +127,10 @@ class InMemoryCatalog( if (!cascade) { // If cascade is false, make sure the database is empty. if (catalog(db).tables.nonEmpty) { - throw new AnalysisException(s"Database $db is not empty. One or more tables exist.") + throw QueryCompilationErrors.databaseNotEmptyError(db, "tables") } if (catalog(db).functions.nonEmpty) { - throw new AnalysisException(s"Database '$db' is not empty. One or more functions exist.") + throw QueryCompilationErrors.databaseNotEmptyError(db, "functions") } } // Remove the database. @@ -141,8 +141,8 @@ class InMemoryCatalog( fs.delete(location, true) } catch { case e: IOException => - throw new SparkException(s"Unable to drop database ${dbDefinition.name} as failed " + - s"to delete its directory ${dbDefinition.locationUri}", e) + throw QueryExecutionErrors.unableToDropDatabaseAsFailedToDeleteDirectoryError( + dbDefinition, e) } catalog.remove(db) } else { @@ -209,8 +209,8 @@ class InMemoryCatalog( fs.mkdirs(defaultTableLocation) } catch { case e: IOException => - throw new SparkException(s"Unable to create table $table as failed " + - s"to create its directory $defaultTableLocation", e) + throw QueryExecutionErrors.unableToCreateTableAsFailedToCreateDirectoryError( + table, defaultTableLocation, e) } tableDefinition.withNewStorage(locationUri = Some(defaultTableLocation.toUri)) } else { @@ -239,7 +239,7 @@ class InMemoryCatalog( fs.delete(partitionPath, true) } catch { case e: IOException => - throw new SparkException(s"Unable to delete partition path $partitionPath", e) + throw QueryExecutionErrors.unableToDeletePartitionPathError(partitionPath, e) } } assert(tableMeta.storage.locationUri.isDefined, @@ -252,8 +252,8 @@ class InMemoryCatalog( fs.delete(dir, true) } catch { case e: IOException => - throw new SparkException(s"Unable to drop table $table as failed " + - s"to delete its directory $dir", e) + throw QueryExecutionErrors.unableToDropTableAsFailedToDeleteDirectoryError( + table, dir, e) } } catalog(db).tables.remove(table) @@ -284,8 +284,8 @@ class InMemoryCatalog( fs.rename(oldDir, newDir) } catch { case e: IOException => - throw new SparkException(s"Unable to rename table $oldName to $newName as failed " + - s"to rename its directory $oldDir", e) + throw QueryExecutionErrors.unableToRenameTableAsFailedToRenameDirectoryError( + oldName, newName, oldDir, e) } oldDesc.table = oldDesc.table.withNewStorage(locationUri = Some(newDir.toUri)) } @@ -358,7 +358,7 @@ class InMemoryCatalog( loadPath: String, isOverwrite: Boolean, isSrcLocal: Boolean): Unit = { - throw new UnsupportedOperationException("loadTable is not implemented") + throw QueryExecutionErrors.methodNotImplementedError("loadTable") } override def loadPartition( @@ -369,7 +369,7 @@ class InMemoryCatalog( isOverwrite: Boolean, inheritTableSpecs: Boolean, isSrcLocal: Boolean): Unit = { - throw new UnsupportedOperationException("loadPartition is not implemented.") + throw QueryExecutionErrors.methodNotImplementedError("loadPartition") } override def loadDynamicPartitions( @@ -379,7 +379,7 @@ class InMemoryCatalog( partition: TablePartitionSpec, replace: Boolean, numDP: Int): Unit = { - throw new UnsupportedOperationException("loadDynamicPartitions is not implemented.") + throw QueryExecutionErrors.methodNotImplementedError("loadDynamicPartitions") } // -------------------------------------------------------------------------- @@ -416,7 +416,7 @@ class InMemoryCatalog( } } catch { case e: IOException => - throw new SparkException(s"Unable to create partition path $partitionPath", e) + throw QueryExecutionErrors.unableToCreatePartitionPathError(partitionPath, e) } existingParts.put( @@ -457,7 +457,7 @@ class InMemoryCatalog( fs.delete(partitionPath, true) } catch { case e: IOException => - throw new SparkException(s"Unable to delete partition path $partitionPath", e) + throw QueryExecutionErrors.unableToDeletePartitionPathError(partitionPath, e) } } existingParts.remove(p) @@ -490,7 +490,7 @@ class InMemoryCatalog( fs.rename(oldPartPath, newPartPath) } catch { case e: IOException => - throw new SparkException(s"Unable to rename partition path $oldPartPath", e) + throw QueryExecutionErrors.unableToRenamePartitionPathError(oldPartPath, e) } oldPartition.copy( spec = newSpec, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 9b542d6bd95ce..5f7028bf87c87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.types.StructType @@ -120,8 +121,7 @@ class SessionCatalog( */ private def validateName(name: String): Unit = { if (!validNameFormat.pattern.matcher(name).matches()) { - throw new AnalysisException(s"`$name` is not a valid name for tables/databases. " + - "Valid names only contain alphabet characters, numbers and _.") + throw QueryCompilationErrors.invalidNameForTableOrDatabaseError(name) } } @@ -216,9 +216,8 @@ class SessionCatalog( def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = { val dbName = formatDatabaseName(dbDefinition.name) if (dbName == globalTempViewManager.database) { - throw new AnalysisException( - s"${globalTempViewManager.database} is a system preserved database, " + - "you cannot create a database with this name.") + throw QueryCompilationErrors.cannotCreateDatabaseWithSameNameAsPreservedDatabaseError( + globalTempViewManager.database) } validateName(dbName) externalCatalog.createDatabase( @@ -238,7 +237,7 @@ class SessionCatalog( def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit = { val dbName = formatDatabaseName(db) if (dbName == DEFAULT_DATABASE) { - throw new AnalysisException(s"Can not drop default database") + throw QueryCompilationErrors.cannotDropDefaultDatabaseError } if (cascade && databaseExists(dbName)) { listTables(dbName).foreach { t => @@ -279,11 +278,8 @@ class SessionCatalog( def setCurrentDatabase(db: String): Unit = { val dbName = formatDatabaseName(db) if (dbName == globalTempViewManager.database) { - throw new AnalysisException( - s"${globalTempViewManager.database} is a system preserved database, " + - "you cannot use it as current database. To access global temporary views, you should " + - "use qualified name with the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM " + - s"${globalTempViewManager.database}.viewName.") + throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError( + globalTempViewManager.database) } requireDbExists(dbName) synchronized { currentDb = dbName } @@ -320,7 +316,7 @@ class SessionCatalog( validateLocation: Boolean = true): Unit = { val isExternal = tableDefinition.tableType == CatalogTableType.EXTERNAL if (isExternal && tableDefinition.storage.locationUri.isEmpty) { - throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION") + throw QueryCompilationErrors.createExternalTableWithoutLocationError } val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase)) @@ -359,8 +355,8 @@ class SessionCatalog( val fs = tableLocation.getFileSystem(hadoopConf) if (fs.exists(tableLocation) && fs.listStatus(tableLocation).nonEmpty) { - throw new AnalysisException(s"Can not create the managed table('${table.identifier}')" + - s". The associated location('${tableLocation.toString}') already exists.") + throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError( + "create", table.identifier, tableLocation) } } } @@ -428,11 +424,7 @@ class SessionCatalog( val nonExistentColumnNames = oldDataSchema.map(_.name).filterNot(columnNameResolved(newDataSchema, _)) if (nonExistentColumnNames.nonEmpty) { - throw new AnalysisException( - s""" - |Some existing schema fields (${nonExistentColumnNames.mkString("[", ",", "]")}) are - |not present in the new schema. We don't support dropping columns yet. - """.stripMargin) + throw QueryCompilationErrors.dropNonExistentColumnsNotSupportedError(nonExistentColumnNames) } externalCatalog.alterTableDataSchema(db, table, newDataSchema) @@ -508,10 +500,8 @@ class SessionCatalog( if (dbs.distinct.size != 1) { val tables = names.map(name => formatTableName(name.table)) val qualifiedTableNames = dbs.zip(tables).map { case (d, t) => QualifiedTableName(d, t)} - throw new AnalysisException( - s"Only the tables/views belong to the same database can be retrieved. Querying " + - s"tables/views are $qualifiedTableNames" - ) + throw QueryCompilationErrors.cannotRetrieveTableOrViewNotInSameDatabaseError( + qualifiedTableNames) } val db = formatDatabaseName(dbs.head) requireDbExists(db) @@ -722,8 +712,7 @@ class SessionCatalog( val db = formatDatabaseName(oldName.database.getOrElse(currentDb)) newName.database.map(formatDatabaseName).foreach { newDb => if (db != newDb) { - throw new AnalysisException( - s"RENAME TABLE source and destination databases do not match: '$db' != '$newDb'") + throw QueryCompilationErrors.renameTableSourceAndDestinationMismatchError(db, newDb) } } @@ -741,13 +730,12 @@ class SessionCatalog( externalCatalog.renameTable(db, oldTableName, newTableName) } else { if (newName.database.isDefined) { - throw new AnalysisException( - s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': cannot specify database " + - s"name '${newName.database.get}' in the destination table") + throw QueryCompilationErrors.cannotRenameTempViewWithDatabaseSpecifiedError( + oldName, newName) } if (tempViews.contains(newTableName)) { - throw new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': " + - "destination table already exists") + throw QueryCompilationErrors.cannotRenameTempViewToExistingTableError( + oldName, newName) } val table = tempViews(oldTableName) tempViews.remove(oldTableName) @@ -1192,8 +1180,8 @@ class SessionCatalog( specs.foreach { s => if (s.values.exists(_.isEmpty)) { val spec = s.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") - throw new AnalysisException( - s"Partition spec is invalid. The spec ($spec) contains an empty partition column value") + throw QueryCompilationErrors.invalidPartitionSpecError( + s"The spec ($spec) contains an empty partition column value") } } } @@ -1223,10 +1211,10 @@ class SessionCatalog( val defined = table.partitionColumnNames specs.foreach { s => if (!s.keys.forall(defined.contains)) { - throw new AnalysisException( - s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must be contained " + - s"within the partition spec (${table.partitionColumnNames.mkString(", ")}) defined " + - s"in table '${table.identifier}'") + throw QueryCompilationErrors.invalidPartitionSpecError( + s"The spec (${s.keys.mkString(", ")}) must be contained " + + s"within the partition spec (${table.partitionColumnNames.mkString(", ")}) defined " + + s"in table '${table.identifier}'") } } } @@ -1382,8 +1370,8 @@ class SessionCatalog( // Check input argument size if (e.inputTypes.size != input.size) { - throw new AnalysisException(s"Invalid number of arguments for function $name. " + - s"Expected: ${e.inputTypes.size}; Found: ${input.size}") + throw QueryCompilationErrors.invalidFunctionArgumentsError( + name, e.inputTypes.size.toString, input.size) } e } else { @@ -1409,15 +1397,14 @@ class SessionCatalog( functionBuilder: Option[FunctionBuilder] = None): Unit = { val func = funcDefinition.identifier if (functionRegistry.functionExists(func) && !overrideIfExists) { - throw new AnalysisException(s"Function $func already exists") + throw QueryCompilationErrors.functionAlreadyExistsError(func) } val info = new ExpressionInfo(funcDefinition.className, func.database.orNull, func.funcName) val builder = functionBuilder.getOrElse { val className = funcDefinition.className if (!Utils.classIsLoadable(className)) { - throw new AnalysisException(s"Can not load class '$className' when registering " + - s"the function '$func', please make sure it is on the classpath") + throw QueryCompilationErrors.cannotLoadClassWhenRegisteringFunctionError(className, func) } makeFunctionBuilder(func.unquotedString, className) } @@ -1522,7 +1509,6 @@ class SessionCatalog( def lookupFunction( name: FunctionIdentifier, children: Seq[Expression]): Expression = synchronized { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ // Note: the implementation of this function is a little bit convoluted. // We probably shouldn't use a single FunctionRegistry to register all three kinds of functions // (built-in, temp, and external). @@ -1545,9 +1531,7 @@ class SessionCatalog( case Seq() => getCurrentDatabase case Seq(_, db) => db case Seq(catalog, namespace @ _*) => - throw new AnalysisException( - s"V2 catalog does not support functions yet. " + - s"catalog: ${catalog}, namespace: '${namespace.quoted}'") + throw QueryCompilationErrors.v2CatalogNotSupportFunctionError(catalog, namespace) } // If the name itself is not qualified, add the current database to it. @@ -1685,8 +1669,8 @@ class SessionCatalog( val newTableLocation = new Path(new Path(databaseLocation), formatTableName(newName.table)) val fs = newTableLocation.getFileSystem(hadoopConf) if (fs.exists(newTableLocation)) { - throw new AnalysisException(s"Can not rename the managed table('$oldName')" + - s". The associated location('$newTableLocation') already exists.") + throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError( + "rename", oldName, newTableLocation) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala index 67bf2d06c95dd..7ebe3d8c5f880 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog import java.util.Locale -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.errors.QueryCompilationErrors /** A trait that represents the type of a resourced needed by a function. */ abstract class FunctionResourceType(val resourceType: String) @@ -40,7 +40,7 @@ object FunctionResourceType { case "file" => FileResource case "archive" => ArchiveResource case other => - throw new AnalysisException(s"Resource Type '$resourceType' is not supported.") + throw QueryCompilationErrors.resourceTypeNotSupportedError(resourceType) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index d25b1fe46d569..eb29b37a3c5d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -29,7 +29,6 @@ import org.json4s.JsonAST.{JArray, JString} import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal} @@ -37,6 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -145,7 +145,7 @@ case class CatalogTablePartition( /** Return the partition location, assuming it is specified. */ def location: URI = storage.locationUri.getOrElse { val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ") - throw new AnalysisException(s"Partition [$specString] did not specify locationUri") + throw QueryCompilationErrors.partitionNotSpecifyLocationUriError(specString) } /** @@ -182,9 +182,8 @@ case class BucketSpec( sortColumnNames: Seq[String]) extends SQLConfHelper { if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) { - throw new AnalysisException( - s"Number of buckets should be greater than 0 but less than or equal to " + - s"bucketing.maxBuckets (`${conf.bucketingMaxBuckets}`). Got `$numBuckets`") + throw QueryCompilationErrors.invalidBucketNumberError( + conf.bucketingMaxBuckets, numBuckets) } override def toString: String = { @@ -274,12 +273,12 @@ case class CatalogTable( /** Return the database this table was specified to belong to, assuming it exists. */ def database: String = identifier.database.getOrElse { - throw new AnalysisException(s"table $identifier did not specify database") + throw QueryCompilationErrors.tableNotSpecifyDatabaseError(identifier) } /** Return the table location, assuming it is specified. */ def location: URI = storage.locationUri.getOrElse { - throw new AnalysisException(s"table $identifier did not specify locationUri") + throw QueryCompilationErrors.tableNotSpecifyLocationUriError(identifier) } /** Return the fully qualified name of this table, assuming the database was specified. */ @@ -295,8 +294,7 @@ case class CatalogTable( (0 until numParts).map { index => properties.getOrElse( s"$VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX$index", - throw new AnalysisException("Corrupted table name context in catalog: " + - s"$numParts parts expected, but part $index is missing.") + throw QueryCompilationErrors.corruptedTableNameContextInCatalogError(numParts, index) ) } } else if (properties.contains(VIEW_DEFAULT_DATABASE)) { @@ -318,8 +316,7 @@ case class CatalogTable( yield (key.substring(CatalogTable.VIEW_SQL_CONFIG_PREFIX.length), value) } catch { case e: Exception => - throw new AnalysisException( - "Corrupted view SQL configs in catalog", cause = Some(e)) + throw QueryCompilationErrors.corruptedViewSQLConfigsInCatalogError(e) } } @@ -334,8 +331,7 @@ case class CatalogTable( index <- 0 until numCols.toInt } yield properties.getOrElse( s"$VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX$index", - throw new AnalysisException("Corrupted view query output column names in catalog: " + - s"$numCols parts expected, but part $index is missing.") + throw QueryCompilationErrors.corruptedViewQueryOutputColumnsInCatalogError(numCols, index) ) } @@ -352,8 +348,7 @@ case class CatalogTable( }.getOrElse(Seq.empty) } catch { case e: Exception => - throw new AnalysisException( - "corrupted view referred temp view names in catalog", cause = Some(e)) + throw QueryCompilationErrors.corruptedViewReferredTempViewInCatalogError(e) } } @@ -368,8 +363,7 @@ case class CatalogTable( }.getOrElse(Seq.empty) } catch { case e: Exception => - throw new AnalysisException( - "corrupted view referred temp functions names in catalog", cause = Some(e)) + throw QueryCompilationErrors.corruptedViewReferredTempFunctionsInCatalogError(e) } } @@ -497,14 +491,13 @@ object CatalogTable { None } else { val numParts = props.get(s"$key.numParts") - val errorMessage = s"Cannot read table property '$key' as it's corrupted." if (numParts.isEmpty) { - throw new AnalysisException(errorMessage) + throw QueryCompilationErrors.cannotReadCorruptedTablePropertyError(key) } else { val parts = (0 until numParts.get.toInt).map { index => props.getOrElse(s"$key.part.$index", { - throw new AnalysisException( - s"$errorMessage Missing part $index, ${numParts.get} parts are expected.") + throw QueryCompilationErrors.cannotReadCorruptedTablePropertyError( + key, s"Missing part $index, $numParts parts are expected.") }) } Some(parts.mkString) @@ -657,8 +650,8 @@ object CatalogColumnStat extends Logging { // This version of Spark does not use min/max for binary/string types so we ignore it. case BinaryType | StringType => null case _ => - throw new AnalysisException("Column statistics deserialization is not supported for " + - s"column $name of data type: $dataType.") + throw QueryCompilationErrors.columnStatisticsDeserializationNotSupportedError( + name, dataType) } } @@ -674,8 +667,8 @@ object CatalogColumnStat extends Logging { case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal // This version of Spark does not use min/max for binary/string types so we ignore it. case _ => - throw new AnalysisException("Column statistics serialization is not supported for " + - s"column $colName of data type: $dataType.") + throw QueryCompilationErrors.columnStatisticsSerializationNotSupportedError( + colName, dataType) } externalValue.toString } @@ -805,7 +798,7 @@ case class HiveTableRelation( tableMeta.stats.map(_.toPlanStats(output, conf.cboEnabled || conf.planStatsEnabled)) .orElse(tableStats) .getOrElse { - throw new IllegalStateException("table stats must be specified.") + throw QueryExecutionErrors.tableStatsNotSpecifiedError } } From 171db85aa2cdacf39caeb26162569275076fd52f Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 5 Jan 2021 13:48:52 -0800 Subject: [PATCH 0969/1009] [SPARK-33874][K8S][FOLLOWUP] Handle long lived sidecars - clean up logging ### What changes were proposed in this pull request? Switch log level from warn to debug when the spark container is not present in the pod's container statuses. ### Why are the changes needed? There are many non-critical situations where the Spark container may not be present, and the warning log level is too high. ### Does this PR introduce _any_ user-facing change? Log message change. ### How was this patch tested? N/A Closes #31047 from holdenk/SPARK-33874-follow-up. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala index 71355c7af10fa..37aaca7e8ceeb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala @@ -93,9 +93,10 @@ object ExecutorPodsSnapshot extends Logging { case _ => PodRunning(pod) } - // If we can't find the Spark container status, fall back to the pod status + // If we can't find the Spark container status, fall back to the pod status. This is + // expected to occur during pod startup and other situations. case _ => - logWarning(s"Unable to find container ${sparkContainerName} in pod ${pod} " + + logDebug(s"Unable to find container ${sparkContainerName} in pod ${pod} " + "defaulting to entire pod status (running).") PodRunning(pod) } From e279ed304475a6d5a9fbf739fe9ed32ef58171cb Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 6 Jan 2021 08:48:24 +0900 Subject: [PATCH 0970/1009] [SPARK-34012][SQL] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide ### What changes were proposed in this pull request? In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true` . This PR fix this issue and add UT. ### Why are the changes needed? Keep consistent behavior of migration guide. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? added UT Closes #31039 from AngersZhuuuu/SPARK-25780-Follow-up. Authored-by: angerszhu Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/parser/AstBuilder.scala | 6 +- .../resources/sql-tests/inputs/group-by.sql | 10 +++ .../sql-tests/results/group-by.sql.out | 63 ++++++++++++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3ea86c6ea2abf..395a9563cdc0a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -714,7 +714,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg val withProject = if (aggregationClause == null && havingClause != null) { if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) { // If the legacy conf is set, treat HAVING without GROUP BY as WHERE. - withHavingClause(havingClause, createProject()) + val predicate = expression(havingClause.booleanExpression) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + Filter(predicate, createProject()) } else { // According to SQL standard, HAVING without GROUP BY means global aggregate. withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 81e2204358bc9..6ee1014739759 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -86,6 +86,16 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0; SELECT id FROM range(10) HAVING id > 0; +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true; + +SELECT 1 FROM range(10) HAVING true; + +SELECT 1 FROM range(10) HAVING MAX(id) > 0; + +SELECT id FROM range(10) HAVING id > 0; + +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false; + -- Test data CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES (1, true), (1, false), diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 75bda87b37642..cc07cd64f3a89 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 57 +-- Number of queries: 62 -- !query @@ -277,6 +277,67 @@ org.apache.spark.sql.AnalysisException grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get. +-- !query +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true +-- !query schema +struct +-- !query output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere true + + +-- !query +SELECT 1 FROM range(10) HAVING true +-- !query schema +struct<1:int> +-- !query output +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query +SELECT 1 FROM range(10) HAVING MAX(id) > 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException + +Aggregate/Window/Generate expressions are not valid in where clause of the query. +Expression in where clause: [(max(`id`) > CAST(0 AS BIGINT))] +Invalid expressions: [max(`id`)] + + +-- !query +SELECT id FROM range(10) HAVING id > 0 +-- !query schema +struct +-- !query output +1 +2 +3 +4 +5 +6 +7 +8 +9 + + +-- !query +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false +-- !query schema +struct +-- !query output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere false + + -- !query CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES (1, true), (1, false), From b77d11dfd942ee2164dde2f5c25c6aaed65c444c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 6 Jan 2021 11:19:44 +0900 Subject: [PATCH 0971/1009] [SPARK-34011][SQL] Refresh cache in `ALTER TABLE .. RENAME TO PARTITION` ### What changes were proposed in this pull request? 1. Invoke `refreshTable()` from `AlterTableRenamePartitionCommand.run()` after partitions renaming. In particular, this re-creates the cache associated with the modified table. 2. Refresh the cache associated with tables from v2 table catalogs in the `ALTER TABLE .. RENAME TO PARTITION` command. ### Why are the changes needed? This fixes the issues portrayed by the example: ```sql spark-sql> CREATE TABLE tbl1 (col0 int, part0 int) USING parquet PARTITIONED BY (part0); spark-sql> INSERT INTO tbl1 PARTITION (part0=0) SELECT 0; spark-sql> INSERT INTO tbl1 PARTITION (part0=1) SELECT 1; spark-sql> CACHE TABLE tbl1; spark-sql> SELECT * FROM tbl1; 0 0 1 1 spark-sql> ALTER TABLE tbl1 PARTITION (part0=0) RENAME TO PARTITION (part=2); spark-sql> SELECT * FROM tbl1; 0 0 1 1 ``` The last query must not return `0 2` since `0 0` was renamed by previous command. ### Does this PR introduce _any_ user-facing change? Yes. After the changes for the example above: ```sql ... spark-sql> ALTER TABLE tbl1 PARTITION (part=0) RENAME TO PARTITION (part=2); spark-sql> SELECT * FROM tbl1; 0 2 1 1 ``` ### How was this patch tested? By running the affected test suite: ``` $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite" ``` Closes #31044 from MaxGekk/rename-partition-refresh-cache. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../apache/spark/sql/execution/command/ddl.scala | 1 + .../v2/AlterTableRenamePartitionExec.scala | 7 +++++-- .../datasources/v2/DataSourceV2Strategy.scala | 5 +++-- .../AlterTableRenamePartitionSuiteBase.scala | 15 +++++++++++++++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 5e3a67927e75a..8195d02e04b8b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -536,6 +536,7 @@ case class AlterTableRenamePartitionCommand( catalog.renamePartitions( tableName, Seq(normalizedOldPartition), Seq(normalizedNewPartition)) + sparkSession.catalog.refreshTable(table.identifier.quotedString) Seq.empty[Row] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala index 38b83e3ad74e7..0632bd75102fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala @@ -28,12 +28,15 @@ import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement case class AlterTableRenamePartitionExec( table: SupportsPartitionManagement, from: ResolvedPartitionSpec, - to: ResolvedPartitionSpec) extends V2CommandExec { + to: ResolvedPartitionSpec, + refreshCache: () => Unit) extends V2CommandExec { override def output: Seq[Attribute] = Seq.empty override protected def run(): Seq[InternalRow] = { - table.renamePartition(from.ident, to.ident) + if (table.renamePartition(from.ident, to.ident)) { + refreshCache() + } Seq.empty } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 1537ebf8f305c..fa9519bf3233c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -368,11 +368,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat invalidateCache(r, recacheTable = true)) :: Nil case AlterTableRenamePartition( - ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) => + r @ ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) => AlterTableRenamePartitionExec( table, Seq(from).asResolvedPartitionSpecs.head, - Seq(to).asResolvedPartitionSpecs.head) :: Nil + Seq(to).asResolvedPartitionSpecs.head, + invalidateCache(r, recacheTable = true)) :: Nil case AlterTableRecoverPartitions(_: ResolvedTable) => throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala index 58055262d3f11..7f66e282499d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala @@ -163,4 +163,19 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt } } } + + test("SPARK-34011: refresh cache after partition renaming") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)") + sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0") + sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1") + assert(!spark.catalog.isCached(t)) + sql(s"CACHE TABLE $t") + assert(spark.catalog.isCached(t)) + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1))) + sql(s"ALTER TABLE $t PARTITION (part=0) RENAME TO PARTITION (part=2)") + assert(spark.catalog.isCached(t)) + QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 2), Row(1, 1))) + } + } } From 3d8ee492d6cd0c086988f2970bc6ea1d70a98368 Mon Sep 17 00:00:00 2001 From: "Tom.Howland" Date: Wed, 6 Jan 2021 11:40:02 +0900 Subject: [PATCH 0972/1009] [SPARK-34015][R] Fixing input timing in gapply ### What changes were proposed in this pull request? When sparkR is run at log level INFO, a summary of how the worker spent its time processing the partition is printed. There is a logic error where it is over-reporting the time inputting rows. In detail: the variable inputElap in a wider context is used to mark the end of reading rows, but in the part changed here it was used as a local variable for measuring the beginning of compute time in a loop over the groups in the partition. Thus, the error is not observable if there is only one group per partition, which is what you get in unit tests. For our application, here's what a log entry looks like before these changes were applied: `20/10/09 04:08:58 INFO RRunner: Times: boot = 0.013 s, init = 0.005 s, broadcast = 0.000 s, read-input = 529.471 s, compute = 492.037 s, write-output = 0.020 s, total = 1021.546 s` this indicates that we're spending more time reading rows than operating on the rows. After these changes, it looks like this: `20/12/15 06:43:29 INFO RRunner: Times: boot = 0.013 s, init = 0.010 s, broadcast = 0.000 s, read-input = 120.275 s, compute = 1680.161 s, write-output = 0.045 s, total = 1812.553 s ` ### Why are the changes needed? Metrics shouldn't mislead? ### Does this PR introduce _any_ user-facing change? Aside from no longer misleading, no ### How was this patch tested? unit tests passed. Field test results seem plausible Closes #31021 from WamBamBoozle/input_timing. Authored-by: Tom.Howland Signed-off-by: HyukjinKwon --- R/pkg/inst/worker/worker.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R index dd271f91d0084..7fc4680bad10e 100644 --- a/R/pkg/inst/worker/worker.R +++ b/R/pkg/inst/worker/worker.R @@ -196,7 +196,7 @@ if (isEmpty != 0) { outputs <- list() for (i in seq_len(length(data))) { # Timing reading input data for execution - inputElap <- elapsedSecs() + computeStart <- elapsedSecs() output <- compute(mode, partition, serializer, deserializer, keys[[i]], colNames, computeFunc, data[[i]]) computeElap <- elapsedSecs() @@ -204,17 +204,18 @@ if (isEmpty != 0) { outputs[[length(outputs) + 1L]] <- output } else { outputResult(serializer, output, outputCon) + outputComputeElapsDiff <- outputComputeElapsDiff + (elapsedSecs() - computeElap) } - outputElap <- elapsedSecs() - computeInputElapsDiff <- computeInputElapsDiff + (computeElap - inputElap) - outputComputeElapsDiff <- outputComputeElapsDiff + (outputElap - computeElap) + computeInputElapsDiff <- computeInputElapsDiff + (computeElap - computeStart) } if (serializer == "arrow") { # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html # rbind.fill might be an alternative to make it faster if plyr is installed. + outputStart <- elapsedSecs() combined <- do.call("rbind", outputs) SparkR:::writeSerializeInArrow(outputCon, combined) + outputComputeElapsDiff <- elapsedSecs() - outputStart } } } else { From 29510821a0e3b1e09a7710ed02a0fa1caab506af Mon Sep 17 00:00:00 2001 From: Baohe Zhang Date: Tue, 5 Jan 2021 19:16:40 -0800 Subject: [PATCH 0973/1009] [SPARK-33029][CORE][WEBUI] Fix the UI executor page incorrectly marking the driver as excluded ### What changes were proposed in this pull request? Filter out the driver entity when updating the exclusion status of live executors(including the driver), so the UI won't be marked as excluded in the UI even if the node that hosts the driver has been marked as excluded. ### Why are the changes needed? Before this change, if we run spark with the standalone mode and with spark.blacklist.enabled=true. The driver will be marked as excluded when the host that hosts that driver has been marked as excluded. While it's incorrect because the exclude list feature will exclude executors only and the driver is still active. ![image](https://user-images.githubusercontent.com/26694233/103238740-35c05180-4911-11eb-99a2-c87c059ba0cf.png) After the fix, the driver won't be marked as excluded. ![image](https://user-images.githubusercontent.com/26694233/103238806-6f915800-4911-11eb-80d5-3c99266cfd0a.png) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual test. Reopen the UI and see the driver is no longer marked as excluded. Closes #30954 from baohe-zhang/SPARK-33029. Authored-by: Baohe Zhang Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/status/AppStatusListener.scala | 8 +++++--- .../executor_memory_usage_expectation.json | 4 ++-- .../executor_node_excludeOnFailure_expectation.json | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index bf19897e51fb3..6cb013b1a7c16 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -366,10 +366,12 @@ private[spark] class AppStatusListener( // Implicitly exclude every available executor for the stage associated with this node Option(liveStages.get((stageId, stageAttemptId))).foreach { stage => - val executorIds = liveExecutors.values.filter(_.host == hostId).map(_.executorId).toSeq + val executorIds = liveExecutors.values.filter(exec => exec.host == hostId + && exec.executorId != SparkContext.DRIVER_IDENTIFIER).map(_.executorId).toSeq setStageExcludedStatus(stage, now, executorIds: _*) } - liveExecutors.values.filter(_.hostname == hostId).foreach { exec => + liveExecutors.values.filter(exec => exec.hostname == hostId + && exec.executorId != SparkContext.DRIVER_IDENTIFIER).foreach { exec => addExcludedStageTo(exec, stageId, now) } } @@ -416,7 +418,7 @@ private[spark] class AppStatusListener( // Implicitly (un)exclude every executor associated with the node. liveExecutors.values.foreach { exec => - if (exec.hostname == host) { + if (exec.hostname == host && exec.executorId != SparkContext.DRIVER_IDENTIFIER) { updateExecExclusionStatus(exec, excluded, now) } } diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json index 9adda275b5609..51449340efe9f 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json @@ -16,7 +16,7 @@ "totalInputBytes" : 0, "totalShuffleRead" : 0, "totalShuffleWrite" : 0, - "isBlacklisted" : true, + "isBlacklisted" : false, "maxMemory" : 908381388, "addTime" : "2016-11-16T22:33:31.477GMT", "executorLogs" : { }, @@ -30,7 +30,7 @@ "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, - "isExcluded" : true, + "isExcluded" : false, "excludedInStages" : [ ] }, { "id" : "3", diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json index 65bd309c1025e..47a01b2596de9 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json @@ -16,7 +16,7 @@ "totalInputBytes" : 0, "totalShuffleRead" : 0, "totalShuffleWrite" : 0, - "isBlacklisted" : true, + "isBlacklisted" : false, "maxMemory" : 908381388, "addTime" : "2016-11-16T22:33:31.477GMT", "executorLogs" : { }, @@ -30,7 +30,7 @@ "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, - "isExcluded" : true, + "isExcluded" : false, "excludedInStages" : [ ] }, { "id" : "3", From 2ab77d634f2e87b080786f4f39cb17e0994bc550 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Tue, 5 Jan 2021 20:45:19 -0800 Subject: [PATCH 0974/1009] [SPARK-34004][SQL] Change FrameLessOffsetWindowFunction as sealed abstract class ### What changes were proposed in this pull request? Change `FrameLessOffsetWindowFunction` as sealed abstract class so that simplify pattern match. ### Why are the changes needed? Simplify pattern match ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Jenkins test Closes #31026 from beliefer/SPARK-30789-followup. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 --- .../spark/sql/catalyst/expressions/windowExpressions.scala | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e41d3de642d51..883ff46148ca6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2150,9 +2150,6 @@ class Analyzer(override val catalogManager: CatalogManager) lead.copy(ignoreNulls = ignoreNulls) case lag: Lag => lag.copy(ignoreNulls = ignoreNulls) - case _ => - throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( - owf.prettyName, "IGNORE NULLS") } } else { owf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index b167499620c0f..1934a9b190fc7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -369,7 +369,7 @@ trait OffsetWindowFunction extends WindowFunction { * within the partition. For instance: a FrameLessOffsetWindowFunction for value x with offset -2, * will get the value of x 2 rows back from the current row in the partition. */ -abstract class FrameLessOffsetWindowFunction +sealed abstract class FrameLessOffsetWindowFunction extends OffsetWindowFunction with Unevaluable with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(input, offset, default) From b1c4fc7fc71530d2d257500484f959282f5b6d44 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 5 Jan 2021 21:50:16 -0800 Subject: [PATCH 0975/1009] [SPARK-34008][BUILD] Upgrade derby to 10.14.2.0 ### What changes were proposed in this pull request? This PR upgrades `derby` to `10.14.2.0`. You can check the major changes from the following URLs. * 10.13.1.1 http://svn.apache.org/repos/asf/db/derby/code/tags/10.13.1.1/RELEASE-NOTES.html * 10.14.1.0 http://svn.apache.org/repos/asf/db/derby/code/tags/10.14.1.0/RELEASE-NOTES.html * 10.14.2.0 http://svn.apache.org/repos/asf/db/derby/code/tags/10.14.2.0/RELEASE-NOTES.html ### Why are the changes needed? It seems to be the final release which supports `JDK8` as the minimum required version. After `10.15.1.3`, the minimum required version is `JDK9`. https://db.apache.org/derby/ ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #31032 from sarutak/upgrade-derby. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 9c516203dd3fa..8d8ef2e972098 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -58,7 +58,7 @@ curator-recipes/2.7.1//curator-recipes-2.7.1.jar datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar -derby/10.12.1.1//derby-10.12.1.1.jar +derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 1d80fadb5762a..bf56fc18c0446 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -55,7 +55,7 @@ curator-recipes/2.13.0//curator-recipes-2.13.0.jar datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar -derby/10.12.1.1//derby-10.12.1.1.jar +derby/10.14.2.0//derby-10.14.2.0.jar dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar ehcache/3.3.1//ehcache-3.3.1.jar diff --git a/pom.xml b/pom.xml index 07c18f78e0735..f921e35a76b41 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,8 @@ 2.3 2.6.0 - 10.12.1.1 + + 10.14.2.0 1.10.1 1.6.6 9.4.34.v20201102 From fa9309001a47a2b87f7a735f964537886ed9bd4c Mon Sep 17 00:00:00 2001 From: "Jungtaek Lim (HeartSaVioR)" Date: Tue, 5 Jan 2021 21:59:49 -0800 Subject: [PATCH 0976/1009] [SPARK-33635][SS] Adjust the order of check in KafkaTokenUtil.needTokenUpdate to remedy perf regression ### What changes were proposed in this pull request? This PR proposes to adjust the order of check in KafkaTokenUtil.needTokenUpdate, so that short-circuit applies on the non-delegation token cases (insecure + secured without delegation token) and remedies the performance regression heavily. ### Why are the changes needed? There's a serious performance regression between Spark 2.4 vs Spark 3.0 on read path against Kafka data source. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually ran a reproducer (https://github.com/codegorillauk/spark-kafka-read with modification to just count instead of writing to Kafka topic) with measuring the time. > the branch applying the change with adding measurement https://github.com/HeartSaVioR/spark/commits/debug-SPARK-33635-v3.0.1 > the branch only adding measurement https://github.com/HeartSaVioR/spark/commits/debug-original-ver-SPARK-33635-v3.0.1 > the result (before the fix) count: 10280000 Took 41.634007047 secs 21/01/06 13:16:07 INFO KafkaDataConsumer: debug ver. 17-original 21/01/06 13:16:07 INFO KafkaDataConsumer: Total time taken to retrieve: 82118 ms > the result (after the fix) count: 10280000 Took 7.964058475 secs 21/01/06 13:08:22 INFO KafkaDataConsumer: debug ver. 17 21/01/06 13:08:22 INFO KafkaDataConsumer: Total time taken to retrieve: 987 ms Closes #31056 from HeartSaVioR/SPARK-33635. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala index bc790418decd3..f3f6b4de6f79c 100644 --- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala +++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala @@ -273,8 +273,8 @@ private[spark] object KafkaTokenUtil extends Logging { sparkConf: SparkConf, params: ju.Map[String, Object], clusterConfig: Option[KafkaTokenClusterConf]): Boolean = { - if (HadoopDelegationTokenManager.isServiceEnabled(sparkConf, "kafka") && - clusterConfig.isDefined && params.containsKey(SaslConfigs.SASL_JAAS_CONFIG)) { + if (clusterConfig.isDefined && params.containsKey(SaslConfigs.SASL_JAAS_CONFIG) && + HadoopDelegationTokenManager.isServiceEnabled(sparkConf, "kafka")) { logDebug("Delegation token used by connector, checking if uses the latest token.") val connectorJaasParams = params.get(SaslConfigs.SASL_JAAS_CONFIG).asInstanceOf[String] getTokenJaasParams(clusterConfig.get) != connectorJaasParams From c0d0dbabdb264180d5a88e2656e4a2fe353f21f1 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Tue, 5 Jan 2021 22:33:15 -0800 Subject: [PATCH 0977/1009] [SPARK-33934][SQL][FOLLOW-UP] Use SubProcessor's exit code as assert condition to fix flaky test ### What changes were proposed in this pull request? Follow comment and fix. flaky test https://github.com/apache/spark/pull/30973#issuecomment-754852130. This flaky test is similar as https://github.com/apache/spark/pull/30896 Some task's failed with root cause but in driver may return error without root cause , change. UT to check with status exit code since different root cause's exit code is not same. ### Why are the changes needed? Fix flaky test ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existed UT Closes #31046 from AngersZhuuuu/SPARK-33934-FOLLOW-UP. Lead-authored-by: angerszhu Co-authored-by: AngersZhuuuu Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/BaseScriptTransformationExec.scala | 2 +- sql/core/src/test/resources/test_script.py | 2 +- .../spark/sql/execution/BaseScriptTransformationSuite.scala | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index b66f94ae1107a..669b90f4d06dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution -import java.io._ +import java.io.{BufferedReader, File, InputStream, InputStreamReader, OutputStream} import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit diff --git a/sql/core/src/test/resources/test_script.py b/sql/core/src/test/resources/test_script.py index 75b4f106d3a1a..4fcd483f44d43 100644 --- a/sql/core/src/test/resources/test_script.py +++ b/sql/core/src/test/resources/test_script.py @@ -1,4 +1,4 @@ -#! /usr/bin/python +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala index a25e4b8f8ea07..cef870b249985 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala @@ -521,7 +521,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU 'd.cast("string"), 'e.cast("string")).collect()) }.getMessage - assert(e1.contains("Permission denied")) + // Check with status exit code since in GA test, it may lose detail failed root cause. + // Different root cause's exitcode is not same. + // In this test, root cause is `Permission denied` + assert(e1.contains("Subprocess exited with status 126")) // test `/path/to/script.py' with script executable scriptFilePath.setExecutable(true) From 45a4ff8e5472ed724b1bba40ce4ee5d314bf72c2 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 5 Jan 2021 23:11:23 -0800 Subject: [PATCH 0978/1009] [SPARK-33948][SQL] Fix CodeGen error of MapObjects.doGenCode method in Scala 2.13 ### What changes were proposed in this pull request? `MapObjects.doGenCode` method will generate wrong code when `inputDataType` is `ArrayBuffer`. For example `encode/decode for Tuple2: (ArrayBuffer[(String, String)],ArrayBuffer((a,b))) (codegen path)` in `ExpressionEncoderSuite`, the error generated code part as follow: ``` /* 126 */ private scala.collection.mutable.ArrayBuffer MapObjects_0(InternalRow i) { /* 127 */ boolean isNull_4 = i.isNullAt(1); /* 128 */ ArrayData value_4 = isNull_4 ? /* 129 */ null : (i.getArray(1)); /* 130 */ scala.collection.mutable.ArrayBuffer value_3 = null; /* 131 */ /* 132 */ if (!isNull_4) { /* 133 */ /* 134 */ int dataLength_0 = value_4.numElements(); /* 135 */ /* 136 */ scala.Tuple2[] convertedArray_0 = null; /* 137 */ convertedArray_0 = new scala.Tuple2[dataLength_0]; /* 138 */ /* 139 */ /* 140 */ int loopIndex_0 = 0; /* 141 */ /* 142 */ while (loopIndex_0 < dataLength_0) { /* 143 */ value_MapObject_lambda_variable_1 = (InternalRow) (value_4.getStruct(loopIndex_0, 2)); /* 144 */ isNull_MapObject_lambda_variable_1 = value_4.isNullAt(loopIndex_0); /* 145 */ /* 146 */ boolean isNull_5 = false; /* 147 */ scala.Tuple2 value_5 = null; /* 148 */ if (!false && isNull_MapObject_lambda_variable_1) { /* 149 */ /* 150 */ isNull_5 = true; /* 151 */ value_5 = ((scala.Tuple2)null); /* 152 */ } else { /* 153 */ scala.Tuple2 value_13 = NewInstance_0(i); /* 154 */ isNull_5 = false; /* 155 */ value_5 = value_13; /* 156 */ } /* 157 */ if (isNull_5) { /* 158 */ convertedArray_0[loopIndex_0] = null; /* 159 */ } else { /* 160 */ convertedArray_0[loopIndex_0] = value_5; /* 161 */ } /* 162 */ /* 163 */ loopIndex_0 += 1; /* 164 */ } /* 165 */ /* 166 */ value_3 = new org.apache.spark.sql.catalyst.util.GenericArrayData(convertedArray_0); /* 167 */ } /* 168 */ globalIsNull_0 = isNull_4; /* 169 */ return value_3; /* 170 */ } ``` Line 166 in generated code try to assign `GenericArrayData` to `value_3(ArrayBuffer)` because `ArrayBuffer` type can't match `s.c.i.Seq` branch in Scala 2.13 in `MapObjects.doGenCode` method now. So this pr change to use `s.c.Seq` instead of `Seq` alias to let `ArrayBuffer` type can enter the same branch as Scala 2.12. After this pr the generate code when `inputDataType` is `ArrayBuffer` as follow: ``` /* 126 */ private scala.collection.mutable.ArrayBuffer MapObjects_0(InternalRow i) { /* 127 */ boolean isNull_4 = i.isNullAt(1); /* 128 */ ArrayData value_4 = isNull_4 ? /* 129 */ null : (i.getArray(1)); /* 130 */ scala.collection.mutable.ArrayBuffer value_3 = null; /* 131 */ /* 132 */ if (!isNull_4) { /* 133 */ /* 134 */ int dataLength_0 = value_4.numElements(); /* 135 */ /* 136 */ scala.collection.mutable.Builder collectionBuilder_0 = scala.collection.mutable.ArrayBuffer$.MODULE$.newBuilder(); /* 137 */ collectionBuilder_0.sizeHint(dataLength_0); /* 138 */ /* 139 */ /* 140 */ int loopIndex_0 = 0; /* 141 */ /* 142 */ while (loopIndex_0 < dataLength_0) { /* 143 */ value_MapObject_lambda_variable_1 = (InternalRow) (value_4.getStruct(loopIndex_0, 2)); /* 144 */ isNull_MapObject_lambda_variable_1 = value_4.isNullAt(loopIndex_0); /* 145 */ /* 146 */ boolean isNull_5 = false; /* 147 */ scala.Tuple2 value_5 = null; /* 148 */ if (!false && isNull_MapObject_lambda_variable_1) { /* 149 */ /* 150 */ isNull_5 = true; /* 151 */ value_5 = ((scala.Tuple2)null); /* 152 */ } else { /* 153 */ scala.Tuple2 value_13 = NewInstance_0(i); /* 154 */ isNull_5 = false; /* 155 */ value_5 = value_13; /* 156 */ } /* 157 */ if (isNull_5) { /* 158 */ collectionBuilder_0.$plus$eq(null); /* 159 */ } else { /* 160 */ collectionBuilder_0.$plus$eq(value_5); /* 161 */ } /* 162 */ /* 163 */ loopIndex_0 += 1; /* 164 */ } /* 165 */ /* 166 */ value_3 = (scala.collection.mutable.ArrayBuffer) collectionBuilder_0.result(); /* 167 */ } /* 168 */ globalIsNull_0 = isNull_4; /* 169 */ return value_3; /* 170 */ } ``` ### Why are the changes needed? Bug fix in Scala 2.13 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass the Jenkins or GitHub Action - Manual test `sql/catalyst` and `sql/core` in Scala 2.13 passed ``` mvn clean test -pl sql/catalyst -Pscala-2.13 Run completed in 11 minutes, 23 seconds. Total number of tests run: 4711 Suites: completed 261, aborted 0 Tests: succeeded 4711, failed 0, canceled 0, ignored 5, pending 0 All tests passed. ``` - Manual cherry-pick this pr to branch 3.1 and test`sql/catalyst` in Scala 2.13 passed ``` mvn clean test -pl sql/catalyst -Pscala-2.13 Run completed in 11 minutes, 18 seconds. Total number of tests run: 4655 Suites: completed 256, aborted 0 Tests: succeeded 4655, failed 0, canceled 0, ignored 5, pending 0 ``` Closes #31055 from LuciferYang/SPARK-33948. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 9303df75af503..f391b3128cf41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -954,7 +954,7 @@ case class MapObjects private( } else { doCodeGenForScala213 } - case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) || + case Some(cls) if classOf[scala.collection.Seq[_]].isAssignableFrom(cls) || classOf[scala.collection.Set[_]].isAssignableFrom(cls) => // Scala sequence or set val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()" From 26d8df300a1a57e220b1a0f9814795f68101f28b Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Wed, 6 Jan 2021 08:25:34 +0000 Subject: [PATCH 0979/1009] [SPARK-33938][SQL] Optimize Like Any/All by LikeSimplification ### What changes were proposed in this pull request? We should optimize Like Any/All by LikeSimplification to improve performance. ### Why are the changes needed? Optimize Like Any/All ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Jenkins test. Closes #30975 from beliefer/SPARK-33938. Lead-authored-by: gengjiaan Co-authored-by: beliefer Co-authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../expressions/regexpExpressions.scala | 6 +- .../sql/catalyst/optimizer/expressions.scala | 81 +++++++++++++------ .../optimizer/LikeSimplificationSuite.scala | 68 ++++++++++++++++ 3 files changed, 128 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index dae954a579eb3..011371a513a8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -181,7 +181,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) } } -abstract class MultiLikeBase +sealed abstract class MultiLikeBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { protected def patterns: Seq[UTF8String] @@ -220,7 +220,7 @@ abstract class MultiLikeBase /** * Optimized version of LIKE ALL, when all pattern values are literal. */ -abstract class LikeAllBase extends MultiLikeBase { +sealed abstract class LikeAllBase extends MultiLikeBase { override def matches(exprValue: String): Any = { if (cache.forall(matchFunc(_, exprValue))) { @@ -276,7 +276,7 @@ case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends Like /** * Optimized version of LIKE ANY, when all pattern values are literal. */ -abstract class LikeAnyBase extends MultiLikeBase { +sealed abstract class LikeAnyBase extends MultiLikeBase { override def matches(exprValue: String): Any = { if (cache.exists(matchFunc(_, exprValue))) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 819bffeafb643..a40456da82977 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -21,7 +21,7 @@ import scala.collection.immutable.HashSet import scala.collection.mutable.{ArrayBuffer, Stack} import org.apache.spark.sql.catalyst.analysis._ -import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, _} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, MultiLikeBase, _} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String /* * Optimization rules defined in this file should not affect the structure of the logical plan. @@ -634,36 +635,68 @@ object LikeSimplification extends Rule[LogicalPlan] { private val contains = "%([^_%]+)%".r private val equalTo = "([^_%]*)".r + private def simplifyLike( + input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = { + if (pattern.contains(escapeChar)) { + // There are three different situations when pattern containing escapeChar: + // 1. pattern contains invalid escape sequence, e.g. 'm\aca' + // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca' + // 3. pattern contains escaped escape character, e.g. 'ma\\ca' + // Although there are patterns can be optimized if we handle the escape first, we just + // skip this rule if pattern contains any escapeChar for simplicity. + None + } else { + pattern match { + case startsWith(prefix) => + Some(StartsWith(input, Literal(prefix))) + case endsWith(postfix) => + Some(EndsWith(input, Literal(postfix))) + // 'a%a' pattern is basically same with 'a%' && '%a'. + // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. + case startsAndEndsWith(prefix, postfix) => + Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)), + And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))) + case contains(infix) => + Some(Contains(input, Literal(infix))) + case equalTo(str) => + Some(EqualTo(input, Literal(str))) + case _ => None + } + } + } + + private def simplifyMultiLike( + child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = { + val (remainPatternMap, replacementMap) = + patterns.map { p => p -> simplifyLike(child, p.toString)}.partition(_._2.isEmpty) + val remainPatterns = remainPatternMap.map(_._1) + val replacements = replacementMap.map(_._2.get) + if (replacements.isEmpty) { + multi + } else { + multi match { + case l: LikeAll => And(replacements.reduceLeft(And), l.copy(patterns = remainPatterns)) + case l: NotLikeAll => + And(replacements.map(Not(_)).reduceLeft(And), l.copy(patterns = remainPatterns)) + case l: LikeAny => Or(replacements.reduceLeft(Or), l.copy(patterns = remainPatterns)) + case l: NotLikeAny => + Or(replacements.map(Not(_)).reduceLeft(Or), l.copy(patterns = remainPatterns)) + } + } + } + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case l @ Like(input, Literal(pattern, StringType), escapeChar) => if (pattern == null) { // If pattern is null, return null value directly, since "col like null" == null. Literal(null, BooleanType) } else { - pattern.toString match { - // There are three different situations when pattern containing escapeChar: - // 1. pattern contains invalid escape sequence, e.g. 'm\aca' - // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca' - // 3. pattern contains escaped escape character, e.g. 'ma\\ca' - // Although there are patterns can be optimized if we handle the escape first, we just - // skip this rule if pattern contains any escapeChar for simplicity. - case p if p.contains(escapeChar) => l - case startsWith(prefix) => - StartsWith(input, Literal(prefix)) - case endsWith(postfix) => - EndsWith(input, Literal(postfix)) - // 'a%a' pattern is basically same with 'a%' && '%a'. - // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. - case startsAndEndsWith(prefix, postfix) => - And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)), - And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))) - case contains(infix) => - Contains(input, Literal(infix)) - case equalTo(str) => - EqualTo(input, Literal(str)) - case _ => l - } + simplifyLike(input, pattern.toString, escapeChar).getOrElse(l) } + case l @ LikeAll(child, patterns) => simplifyMultiLike(child, patterns, l) + case l @ NotLikeAll(child, patterns) => simplifyMultiLike(child, patterns, l) + case l @ LikeAny(child, patterns) => simplifyMultiLike(child, patterns, l) + case l @ NotLikeAny(child, patterns) => simplifyMultiLike(child, patterns, l) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala index 1812dce0da426..c06c92f9c1511 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala @@ -164,4 +164,72 @@ class LikeSimplificationSuite extends PlanTest { .analyze comparePlans(optimized5, correctAnswer5) } + + test("simplify LikeAll") { + val originalQuery = + testRelation + .where(('a likeAll( + "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc"))) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .where((((((StartsWith('a, "abc") && EndsWith('a, "xyz")) && + (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) && + Contains('a, "mn")) && ('a === "")) && ('a === "abc")) && + ('a likeAll("abc\\%", "abc\\%def", "%mn\\%"))) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("simplify NotLikeAll") { + val originalQuery = + testRelation + .where(('a notLikeAll( + "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc"))) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .where((((((Not(StartsWith('a, "abc")) && Not(EndsWith('a, "xyz"))) && + Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) && + Not(Contains('a, "mn"))) && Not('a === "")) && Not('a === "abc")) && + ('a notLikeAll("abc\\%", "abc\\%def", "%mn\\%"))) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("simplify LikeAny") { + val originalQuery = + testRelation + .where(('a likeAny( + "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc"))) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .where((((((StartsWith('a, "abc") || EndsWith('a, "xyz")) || + (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) || + Contains('a, "mn")) || ('a === "")) || ('a === "abc")) || + ('a likeAny("abc\\%", "abc\\%def", "%mn\\%"))) + .analyze + + comparePlans(optimized, correctAnswer) + } + + test("simplify NotLikeAny") { + val originalQuery = + testRelation + .where(('a notLikeAny( + "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc"))) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .where((((((Not(StartsWith('a, "abc")) || Not(EndsWith('a, "xyz"))) || + Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) || + Not(Contains('a, "mn"))) || Not('a === "")) || Not('a === "abc")) || + ('a notLikeAny("abc\\%", "abc\\%def", "%mn\\%"))) + .analyze + + comparePlans(optimized, correctAnswer) + } } From f64dfa8727b785f333a0c10f5f7175ab51f22764 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 6 Jan 2021 14:55:40 +0530 Subject: [PATCH 0980/1009] [SPARK-32221][K8S] Avoid possible errors due to incorrect file size or type supplied in spark conf ### What changes were proposed in this pull request? Skip files if they are binary or very large to fit the configMap's max size. ### Why are the changes needed? Config map cannot hold binary files and there is also a limit on how much data a configMap can hold. This limit can be configured by the k8s cluster admin. This PR, skips such files (with a warning) instead of failing with weird runtime errors. If such files are not skipped, then it would result in mount errors or encoding errors (if binary files are submitted). ### Does this PR introduce _any_ user-facing change? yes, in simple words avoids possible errors due to negligence (for example, placing a large file or a binary file in SPARK_CONF_DIR) and thus improves user experience. ### How was this patch tested? Added relevant tests and improved existing tests. Closes #30472 from ScrapCodes/SPARK-32221/avoid-conf-propagate-errors. Lead-authored-by: Prashant Sharma Co-authored-by: Prashant Sharma Signed-off-by: Prashant Sharma --- .../org/apache/spark/deploy/k8s/Config.scala | 8 ++ .../k8s/submit/KubernetesClientUtils.scala | 80 +++++++++++++++---- .../spark/deploy/k8s/submit/ClientSuite.scala | 21 +++-- .../submit/KubernetesClientUtilsSuite.scala | 79 ++++++++++++++++++ 4 files changed, 164 insertions(+), 24 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index 6939de4697979..8dca875b543c6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -99,6 +99,14 @@ private[spark] object Config extends Logging { .toSequence .createWithDefault(Nil) + val CONFIG_MAP_MAXSIZE = + ConfigBuilder("spark.kubernetes.configMap.maxSize") + .doc("Max size limit for a config map. This is configurable as per" + + " https://etcd.io/docs/v3.4.0/dev-guide/limit/ on k8s server end.") + .version("3.1.0") + .longConf + .createWithDefault(1572864) // 1.5 MiB + val KUBERNETES_AUTH_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver" val KUBERNETES_AUTH_EXECUTOR_CONF_PREFIX = "spark.kubernetes.authenticate.executor" val KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX = "spark.kubernetes.authenticate.driver.mounted" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala index 32f630f77d666..4207077677c25 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala @@ -18,15 +18,17 @@ package org.apache.spark.deploy.k8s.submit import java.io.{File, StringWriter} +import java.nio.charset.MalformedInputException import java.util.Properties import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.io.{Codec, Source} import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, KeyToPath} import org.apache.spark.SparkConf -import org.apache.spark.deploy.k8s.{Constants, KubernetesUtils} +import org.apache.spark.deploy.k8s.{Config, Constants, KubernetesUtils} import org.apache.spark.deploy.k8s.Constants.ENV_SPARK_CONF_DIR import org.apache.spark.internal.Logging @@ -54,8 +56,10 @@ private[spark] object KubernetesClientUtils extends Logging { /** * Build, file -> 'file's content' map of all the selected files in SPARK_CONF_DIR. */ - def buildSparkConfDirFilesMap(configMapName: String, - sparkConf: SparkConf, resolvedPropertiesMap: Map[String, String]): Map[String, String] = { + def buildSparkConfDirFilesMap( + configMapName: String, + sparkConf: SparkConf, + resolvedPropertiesMap: Map[String, String]): Map[String, String] = synchronized { val loadedConfFilesMap = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf) // Add resolved spark conf to the loaded configuration files map. if (resolvedPropertiesMap.nonEmpty) { @@ -90,29 +94,71 @@ private[spark] object KubernetesClientUtils extends Logging { .build() } - private def loadSparkConfDirFiles(conf: SparkConf): Map[String, String] = { + private def orderFilesBySize(confFiles: Seq[File]): Seq[File] = { + val fileToFileSizePairs = confFiles.map(f => (f, f.getName.length + f.length())) + // sort first by name and then by length, so that during tests we have consistent results. + fileToFileSizePairs.sortBy(f => f._1).sortBy(f => f._2).map(_._1) + } + + // exposed for testing + private[submit] def loadSparkConfDirFiles(conf: SparkConf): Map[String, String] = { val confDir = Option(conf.getenv(ENV_SPARK_CONF_DIR)).orElse( conf.getOption("spark.home").map(dir => s"$dir/conf")) + val maxSize = conf.get(Config.CONFIG_MAP_MAXSIZE) if (confDir.isDefined) { - val confFiles = listConfFiles(confDir.get) - logInfo(s"Spark configuration files loaded from $confDir : ${confFiles.mkString(",")}") - confFiles.map { file => - val source = Source.fromFile(file)(Codec.UTF8) - val mapping = (file.getName -> source.mkString) - source.close() - mapping - }.toMap + val confFiles: Seq[File] = listConfFiles(confDir.get, maxSize) + val orderedConfFiles = orderFilesBySize(confFiles) + var truncatedMapSize: Long = 0 + val truncatedMap = mutable.HashMap[String, String]() + val skippedFiles = mutable.HashSet[String]() + var source: Source = Source.fromString("") // init with empty source. + for (file <- orderedConfFiles) { + try { + source = Source.fromFile(file)(Codec.UTF8) + val (fileName, fileContent) = file.getName -> source.mkString + if ((truncatedMapSize + fileName.length + fileContent.length) < maxSize) { + truncatedMap.put(fileName, fileContent) + truncatedMapSize = truncatedMapSize + (fileName.length + fileContent.length) + } else { + skippedFiles.add(fileName) + } + } catch { + case e: MalformedInputException => + logWarning( + s"Unable to read a non UTF-8 encoded file ${file.getAbsolutePath}. Skipping...", e) + None + } finally { + source.close() + } + } + if (truncatedMap.nonEmpty) { + logInfo(s"Spark configuration files loaded from $confDir :" + + s" ${truncatedMap.keys.mkString(",")}") + } + if (skippedFiles.nonEmpty) { + logWarning(s"Skipped conf file(s) ${skippedFiles.mkString(",")}, due to size constraint." + + s" Please see, config: `${Config.CONFIG_MAP_MAXSIZE.key}` for more details.") + } + truncatedMap.toMap } else { Map.empty[String, String] } } - private def listConfFiles(confDir: String): Seq[File] = { - // We exclude all the template files and user provided spark conf or properties. - // As spark properties are resolved in a different step. + private def listConfFiles(confDir: String, maxSize: Long): Seq[File] = { + // At the moment configmaps do not support storing binary content (i.e. skip jar,tar,gzip,zip), + // and configMaps do not allow for size greater than 1.5 MiB(configurable). + // https://etcd.io/docs/v3.4.0/dev-guide/limit/ + def testIfTooLargeOrBinary(f: File): Boolean = (f.length() + f.getName.length > maxSize) || + f.getName.matches(".*\\.(gz|zip|jar|tar)") + + // We exclude all the template files and user provided spark conf or properties, + // Spark properties are resolved in a different step. + def testIfSparkConfOrTemplates(f: File) = f.getName.matches(".*\\.template") || + f.getName.matches("spark.*(conf|properties)") + val fileFilter = (f: File) => { - f.isFile && !(f.getName.endsWith("template") || - f.getName.matches("spark.*(conf|properties)")) + f.isFile && !testIfTooLargeOrBinary(f) && !testIfSparkConfOrTemplates(f) } val confFiles: Seq[File] = { val dir = new File(confDir) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala index 1a14d524003c0..18d0c00edf2c0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala @@ -191,25 +191,32 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter { assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf2key=conf2value")) } - test("All files from SPARK_CONF_DIR, except templates and spark config " + + test("All files from SPARK_CONF_DIR, " + + "except templates, spark config, binary files and are within size limit, " + "should be populated to pod's configMap.") { def testSetup: (SparkConf, Seq[String]) = { val tempDir = Utils.createTempDir() - val sparkConf = new SparkConf(loadDefaults = false).setSparkHome(tempDir.getAbsolutePath) + val sparkConf = new SparkConf(loadDefaults = false) + .setSparkHome(tempDir.getAbsolutePath) val tempConfDir = new File(s"${tempDir.getAbsolutePath}/conf") tempConfDir.mkdir() // File names - which should not get mounted on the resultant config map. val filteredConfFileNames = - Set("spark-env.sh.template", "spark.properties", "spark-defaults.conf") - val confFileNames = for (i <- 1 to 5) yield s"testConf.$i" ++ + Set("spark-env.sh.template", "spark.properties", "spark-defaults.conf", + "test.gz", "test2.jar", "non_utf8.txt") + val confFileNames = (for (i <- 1 to 5) yield s"testConf.$i") ++ List("spark-env.sh") ++ filteredConfFileNames - val testConfFiles = for (i <- confFileNames) yield { + val testConfFiles = (for (i <- confFileNames) yield { val file = new File(s"${tempConfDir.getAbsolutePath}/$i") - Files.write(file.toPath, "conf1key=conf1value".getBytes(StandardCharsets.UTF_8)) + if (i.startsWith("non_utf8")) { // filling some non-utf-8 binary + Files.write(file.toPath, Array[Byte](0x00.toByte, 0xA1.toByte)) + } else { + Files.write(file.toPath, "conf1key=conf1value".getBytes(StandardCharsets.UTF_8)) + } file.getName - } + }) assert(tempConfDir.listFiles().length == confFileNames.length) val expectedConfFiles: Seq[String] = testConfFiles.filterNot(filteredConfFileNames.contains) (sparkConf, expectedConfFiles) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala new file mode 100644 index 0000000000000..ee672cc041330 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.k8s.submit + +import java.io.File +import java.nio.charset.StandardCharsets +import java.nio.file.Files + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.k8s.Config +import org.apache.spark.util.Utils + +class KubernetesClientUtilsSuite extends SparkFunSuite with BeforeAndAfter { + + def testSetup(inputFiles: Map[String, Array[Byte]]): SparkConf = { + val tempDir = Utils.createTempDir() + val sparkConf = new SparkConf(loadDefaults = false) + .setSparkHome(tempDir.getAbsolutePath) + + val tempConfDir = new File(s"${tempDir.getAbsolutePath}/conf") + tempConfDir.mkdir() + for (i <- inputFiles) yield { + val file = new File(s"${tempConfDir.getAbsolutePath}/${i._1}") + Files.write(file.toPath, i._2) + file.getName + } + sparkConf + } + + test("verify load files, loads only allowed files and not the disallowed files.") { + val input: Map[String, Array[Byte]] = Map("test.txt" -> "test123", "z12.zip" -> "zZ", + "rere.jar" -> "@31", "spark.jar" -> "@31", "_test" -> "", "sample.conf" -> "conf") + .map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8)) ++ + Map("binary-file.conf" -> Array[Byte](0x00.toByte, 0xA1.toByte)) + val sparkConf = testSetup(input) + val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf) + val expectedOutput = Map("test.txt" -> "test123", "sample.conf" -> "conf", "_test" -> "") + assert(output === expectedOutput) + } + + test("verify load files, truncates the content to maxSize, when keys are very large in number.") { + val input = (for (i <- 10000 to 1 by -1) yield (s"testConf.${i}" -> "test123456")).toMap + val sparkConf = testSetup(input.map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8))) + .set(Config.CONFIG_MAP_MAXSIZE.key, "60") + val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf) + val expectedOutput = Map("testConf.1" -> "test123456", "testConf.2" -> "test123456") + assert(output === expectedOutput) + val output1 = KubernetesClientUtils.loadSparkConfDirFiles( + sparkConf.set(Config.CONFIG_MAP_MAXSIZE.key, "250000")) + assert(output1 === input) + } + + test("verify load files, truncates the content to maxSize, when keys are equal in length.") { + val input = (for (i <- 9 to 1 by -1) yield (s"testConf.${i}" -> "test123456")).toMap + val sparkConf = testSetup(input.map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8))) + .set(Config.CONFIG_MAP_MAXSIZE.key, "80") + val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf) + val expectedOutput = Map("testConf.1" -> "test123456", "testConf.2" -> "test123456", + "testConf.3" -> "test123456") + assert(output === expectedOutput) + } +} From ff284fb6ac624b2f38ef12f9b840be3077cd27a6 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 6 Jan 2021 18:46:20 +0900 Subject: [PATCH 0981/1009] [SPARK-30681][PYTHON][FOLLOW-UP] Keep the name similar with Scala side in higher order functions ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/27406. It fixes the naming to match with Scala side. Note that there are a bit of inconsistency already e.g.) `col`, `e`, `expr` and `column`. This part I did not change but other names like `zero` vs `initialValue` or `col1`/`col2` vs `left`/`right` looks unnecessary. ### Why are the changes needed? To make the usage similar with Scala side, and for consistency. ### Does this PR introduce _any_ user-facing change? No, this is not released yet. ### How was this patch tested? GitHub Actions and Jenkins build will test it out. Closes #31062 from HyukjinKwon/SPARK-30681. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/pyspark/sql/functions.py | 16 ++++++++-------- python/pyspark/sql/functions.pyi | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index f612d2d0366f2..c9d24dc668b8e 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -4355,7 +4355,7 @@ def filter(col, f): return _invoke_higher_order_function("ArrayFilter", [col], [f]) -def aggregate(col, zero, merge, finish=None): +def aggregate(col, initialValue, merge, finish=None): """ Applies a binary operator to an initial state and all elements in the array, and reduces this to a single state. The final state is converted into the final result @@ -4372,7 +4372,7 @@ def aggregate(col, zero, merge, finish=None): ---------- col : :class:`Column` or str name of column or expression - zero : :class:`Column` or str + initialValue : :class:`Column` or str initial value. Name of column or expression merge : function a binary function ``(acc: Column, x: Column) -> Column...`` returning expression @@ -4416,19 +4416,19 @@ def aggregate(col, zero, merge, finish=None): if finish is not None: return _invoke_higher_order_function( "ArrayAggregate", - [col, zero], + [col, initialValue], [merge, finish] ) else: return _invoke_higher_order_function( "ArrayAggregate", - [col, zero], + [col, initialValue], [merge] ) -def zip_with(col1, col2, f): +def zip_with(left, right, f): """ Merge two given arrays, element-wise, into a single array using a function. If one array is shorter, nulls are appended at the end to match the length of the longer @@ -4438,9 +4438,9 @@ def zip_with(col1, col2, f): Parameters ---------- - col1 : :class:`Column` or str + left : :class:`Column` or str name of the first column or expression - col2 : :class:`Column` or str + right : :class:`Column` or str name of the second column or expression f : function a binary function ``(x1: Column, x2: Column) -> Column...`` @@ -4471,7 +4471,7 @@ def zip_with(col1, col2, f): |[foo_1, bar_2, 3]| +-----------------+ """ - return _invoke_higher_order_function("ZipWith", [col1, col2], [f]) + return _invoke_higher_order_function("ZipWith", [left, right], [f]) def transform_keys(col, f): diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index acb17a2657d00..0cf60c0c26500 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -237,13 +237,13 @@ def filter(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ... def filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ... def aggregate( col: ColumnOrName, - zero: ColumnOrName, + initialValue: ColumnOrName, merge: Callable[[Column, Column], Column], finish: Optional[Callable[[Column], Column]] = ..., ) -> Column: ... def zip_with( - col1: ColumnOrName, - ColumnOrName: ColumnOrName, + left: ColumnOrName, + right: ColumnOrName, f: Callable[[Column, Column], Column], ) -> Column: ... def transform_keys( From 0d86a02ffbaf53c403a4c68bac0041e84acb0cdd Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 6 Jan 2021 20:31:27 +0900 Subject: [PATCH 0982/1009] [SPARK-34022][DOCS] Support latest mkdocs in SQL built-in function docs ### What changes were proposed in this pull request? This PR adds the support of the latest mkdocs, and makes the sidebar properly show. It works in lower versions too. Before: ![Screen Shot 2021-01-06 at 5 11 56 PM](https://user-images.githubusercontent.com/6477701/103745131-4e7fe400-5042-11eb-9c09-84f9f95e9fb9.png) After: ![Screen Shot 2021-01-06 at 5 10 53 PM](https://user-images.githubusercontent.com/6477701/103745139-5049a780-5042-11eb-8ded-30b6f7ef48aa.png) ### Why are the changes needed? This is a regression in the documentation. ### Does this PR introduce _any_ user-facing change? Technically no. It's not related yet. It fixes the list on the sidebar appears properly. ### How was this patch tested? Manually built the docs via `./sql/create-docs.sh` and `open ./sql/site/index.html` Closes #31061 from HyukjinKwon/SPARK-34022. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- sql/gen-sql-api-docs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py index 61328997c1c58..72518504847db 100644 --- a/sql/gen-sql-api-docs.py +++ b/sql/gen-sql-api-docs.py @@ -195,6 +195,7 @@ def generate_sql_api_markdown(jvm, path): """ with open(path, 'w') as mdfile: + mdfile.write("# Built-in Finctions\n\n") for info in _list_function_infos(jvm): name = info.name usage = _make_pretty_usage(info.usage) From 6788304240c416d173ebdb3d544f3361c6b9fe8e Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Wed, 6 Jan 2021 21:14:45 +0900 Subject: [PATCH 0983/1009] [SPARK-33977][SQL][DOCS] Add doc for "'like any' and 'like all' operators" ### What changes were proposed in this pull request? Add doc for 'like any' and 'like all' operators in sql-ref-syntx-qry-select-like.cmd ### Why are the changes needed? make the usage of 'like any' and 'like all' known to more users ### Does this PR introduce _any_ user-facing change? Yes. Screen Shot 2021-01-06 at 21 10 38 Screen Shot 2021-01-06 at 21 11 06 Screen Shot 2021-01-06 at 21 11 20 ### How was this patch tested? No tests Closes #31008 from beliefer/SPARK-33977. Lead-authored-by: gengjiaan Co-authored-by: beliefer Signed-off-by: Takeshi Yamamuro --- docs/sql-ref-syntax-qry-select-like.md | 60 +++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-syntax-qry-select-like.md b/docs/sql-ref-syntax-qry-select-like.md index 6211faa8d529e..3604a9ba1ea02 100644 --- a/docs/sql-ref-syntax-qry-select-like.md +++ b/docs/sql-ref-syntax-qry-select-like.md @@ -21,12 +21,14 @@ license: | ### Description -A LIKE predicate is used to search for a specific pattern. +A LIKE predicate is used to search for a specific pattern. This predicate also supports multiple patterns with quantifiers include `ANY`, `SOME` and `ALL`. ### Syntax ```sql [ NOT ] { LIKE search_pattern [ ESCAPE esc_char ] | [ RLIKE | REGEXP ] regex_pattern } + +[ NOT ] { LIKE quantifiers ( search_pattern [ , ... ]) } ``` ### Parameters @@ -45,6 +47,10 @@ A LIKE predicate is used to search for a specific pattern. * **regex_pattern** Specifies a regular expression search pattern to be searched by the `RLIKE` or `REGEXP` clause. + +* **quantifiers** + + Specifies the predicate quantifiers include `ANY`, `SOME` and `ALL`. `ANY` or `SOME` means if one of the patterns matches the input, then return true; `ALL` means if all the patterns matches the input, then return true. ### Examples @@ -111,6 +117,58 @@ SELECT * FROM person WHERE name LIKE '%$_%' ESCAPE '$'; +---+------+---+ |500|Evan_W| 16| +---+------+---+ + +SELECT * FROM person WHERE name LIKE ALL ('%an%', '%an'); ++---+----+----+ +| id|name| age| ++---+----+----+ +|400| Dan| 50| ++---+----+----+ + +SELECT * FROM person WHERE name LIKE ANY ('%an%', '%an'); ++---+------+---+ +| id| name|age| ++---+------+---+ +|400| Dan| 50| +|500|Evan_W| 16| ++---+------+---+ + +SELECT * FROM person WHERE name LIKE SOME ('%an%', '%an'); ++---+------+---+ +| id| name|age| ++---+------+---+ +|400| Dan| 50| +|500|Evan_W| 16| ++---+------+---+ + +SELECT * FROM person WHERE name NOT LIKE ALL ('%an%', '%an'); ++---+----+----+ +| id|name| age| ++---+----+----+ +|100|John| 30| +|200|Mary|null| +|300|Mike| 80| ++---+----+----+ + +SELECT * FROM person WHERE name NOT LIKE ANY ('%an%', '%an'); ++---+------+----+ +| id| name| age| ++---+------+----+ +|100| John| 30| +|200| Mary|null| +|300| Mike| 80| +|500|Evan_W| 16| ++---+------+----+ + +SELECT * FROM person WHERE name NOT LIKE SOME ('%an%', '%an'); ++---+------+----+ +| id| name| age| ++---+------+----+ +|100| John| 30| +|200| Mary|null| +|300| Mike| 80| +|500|Evan_W| 16| ++---+------+----+ ``` ### Related Statements From 3cdc4ef5b41ce1254610436a8721ea517124d62e Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 6 Jan 2021 13:45:48 +0000 Subject: [PATCH 0984/1009] [SPARK-32685][SQL][FOLLOW-UP] Update migration guide about change default filed.delim to '\t' when user specifies serde ### What changes were proposed in this pull request? Update migration guide according to https://github.com/apache/spark/pull/30942#issuecomment-755054562 ### Why are the changes needed? update migration guide. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Not need Closes #31051 from AngersZhuuuu/SPARK-32685-FOLLOW-UP. Authored-by: angerszhu Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index bd54554baa09d..8cf1a9c6f7017 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -30,7 +30,7 @@ license: | - In Spark 3.2, `ALTER TABLE .. RENAME TO PARTITION` throws `PartitionAlreadyExistsException` instead of `AnalysisException` for tables from Hive external when the target partition already exists. - - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`. + - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode, serde property `field.delim` is `\t` for Hive serde mode when user specifies serde. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`, serde property `field.delim` is `\u0001` for Hive serde mode when user specifies serde. ## Upgrading from Spark SQL 3.0 to 3.1 From a0269bb419a37c31850e02884385b889cd153133 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 6 Jan 2021 09:28:22 -0800 Subject: [PATCH 0985/1009] [SPARK-34022][DOCS][FOLLOW-UP] Fix typo in SQL built-in function docs ### What changes were proposed in this pull request? This PR is a follow-up of #31061. It fixes a typo in a document: `Finctions` -> `Functions` ### Why are the changes needed? Make the change better documented. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #31069 from kiszk/SPARK-34022-followup. Authored-by: Kazuaki Ishizaki Signed-off-by: Dongjoon Hyun --- sql/gen-sql-api-docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py index 72518504847db..2f734093b106c 100644 --- a/sql/gen-sql-api-docs.py +++ b/sql/gen-sql-api-docs.py @@ -195,7 +195,7 @@ def generate_sql_api_markdown(jvm, path): """ with open(path, 'w') as mdfile: - mdfile.write("# Built-in Finctions\n\n") + mdfile.write("# Built-in Functions\n\n") for info in _list_function_infos(jvm): name = info.name usage = _make_pretty_usage(info.usage) From 8bb70bf0d646f6d54d17690d23ee935e452e747e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 6 Jan 2021 12:59:47 -0800 Subject: [PATCH 0986/1009] [SPARK-34029][SQL][TESTS] Add OrcEncryptionSuite and FakeKeyProvider ### What changes were proposed in this pull request? This PR aims to add a basis for columnar encryption test framework by add `OrcEncryptionSuite` and `FakeKeyProvider`. Please note that we will improve more in both Apache Spark and Apache ORC in Apache Spark 3.2.0 timeframe. ### Why are the changes needed? Apache ORC 1.6 supports columnar encryption. ### Does this PR introduce _any_ user-facing change? No. This is for a test case. ### How was this patch tested? Pass the newly added test suite. Closes #31065 from dongjoon-hyun/SPARK-34029. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 1 + .../datasources/orc/FakeKeyProvider.java | 144 ++++++++++++++++++ ...pache.hadoop.crypto.key.KeyProviderFactory | 16 ++ .../datasources/orc/OrcEncryptionSuite.scala | 98 ++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java create mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 668701be0ae98..f126ee35efcca 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -489,6 +489,7 @@ object SparkParallelTestGrouping { "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite", "org.apache.spark.sql.catalyst.expressions.CastSuite", "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite", + "org.apache.spark.sql.execution.datasources.orc.OrcEncryptionSuite", "org.apache.spark.sql.hive.HiveExternalCatalogSuite", "org.apache.spark.sql.hive.StatisticsSuite", "org.apache.spark.sql.hive.client.VersionsSuite", diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java new file mode 100644 index 0000000000000..c48543802eb33 --- /dev/null +++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql.execution.datasources.orc; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderFactory; +import org.apache.hadoop.crypto.key.kms.KMSClientProvider; + +/** + * A Hadoop KeyProvider that lets us test the interaction + * with the Hadoop code. + * + * https://github.com/apache/orc/blob/rel/release-1.6.6/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java + * + * This file intentionally keeps the original file except + * (1) package name, (2) import order, (3) a few indentation + */ +public class FakeKeyProvider extends KeyProvider { + // map from key name to metadata + private final Map keyMetdata = new HashMap<>(); + // map from key version name to material + private final Map keyVersions = new HashMap<>(); + + public FakeKeyProvider(Configuration conf) { + super(conf); + } + + @Override + public KeyVersion getKeyVersion(String name) { + return keyVersions.get(name); + } + + @Override + public List getKeys() { + return new ArrayList<>(keyMetdata.keySet()); + } + + @Override + public List getKeyVersions(String name) { + List result = new ArrayList<>(); + Metadata meta = getMetadata(name); + for(int v=0; v < meta.getVersions(); ++v) { + String versionName = buildVersionName(name, v); + KeyVersion material = keyVersions.get(versionName); + if (material != null) { + result.add(material); + } + } + return result; + } + + @Override + public Metadata getMetadata(String name) { + return keyMetdata.get(name); + } + + @Override + public KeyVersion createKey(String name, byte[] bytes, Options options) { + String versionName = buildVersionName(name, 0); + keyMetdata.put(name, new TestMetadata(options.getCipher(), + options.getBitLength(), 1)); + KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes); + keyVersions.put(versionName, result); + return result; + } + + @Override + public void deleteKey(String name) { + throw new UnsupportedOperationException("Can't delete keys"); + } + + @Override + public KeyVersion rollNewVersion(String name, byte[] bytes) { + TestMetadata key = keyMetdata.get(name); + String versionName = buildVersionName(name, key.addVersion()); + KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, + bytes); + keyVersions.put(versionName, result); + return result; + } + + @Override + public void flush() { + // Nothing + } + + static class TestMetadata extends KeyProvider.Metadata { + + TestMetadata(String cipher, int bitLength, int versions) { + super(cipher, bitLength, null, null, null, versions); + } + + public int addVersion() { + return super.addVersion(); + } + } + + public static class Factory extends KeyProviderFactory { + + @Override + public KeyProvider createProvider(URI uri, Configuration conf) throws IOException { + if ("test".equals(uri.getScheme())) { + KeyProvider provider = new FakeKeyProvider(conf); + // populate a couple keys into the provider + byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; + org.apache.hadoop.crypto.key.KeyProvider.Options aes128 = new KeyProvider.Options(conf); + provider.createKey("pii", piiKey, aes128); + byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f}; + provider.rollNewVersion("pii", piiKey2); + byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f}; + provider.createKey("secret", secretKey, aes128); + return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider); + } + return null; + } + } +} diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory new file mode 100644 index 0000000000000..f436622b5fb42 --- /dev/null +++ b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala new file mode 100644 index 0000000000000..fac3cef5801dd --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.orc + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.SharedSparkSession + +class OrcEncryptionSuite extends OrcTest with SharedSparkSession { + import testImplicits._ + + val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun")) + val rowDataWithoutKey = + Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun") + + test("Write and read an encrypted file") { + val df = originalData.toDF("ssn", "email", "name") + + withTempPath { dir => + val path = dir.getAbsolutePath + withSQLConf( + "hadoop.security.key.provider.path" -> "test:///", + "orc.key.provider" -> "hadoop", + "orc.encrypt" -> "pii:ssn,email", + "orc.mask" -> "nullify:ssn;sha256:email") { + df.write.mode("overwrite").orc(path) + checkAnswer(spark.read.orc(path), df) + } + + withSQLConf( + "orc.key.provider" -> "memory", + "orc.encrypt" -> "pii:ssn,email", + "orc.mask" -> "nullify:ssn;sha256:email") { + checkAnswer(spark.read.orc(path), rowDataWithoutKey) + } + } + } + + test("Write and read an encrypted table") { + val df = originalData.toDF("ssn", "email", "name") + + withTempPath { dir => + val path = dir.getAbsolutePath + withTable("encrypted") { + sql( + s""" + |CREATE TABLE encrypted ( + | ssn STRING, + | email STRING, + | name STRING + |) + |USING ORC + |LOCATION "$path" + |OPTIONS ( + | hadoop.security.key.provider.path "test:///", + | orc.key.provider "hadoop", + | orc.encrypt "pii:ssn,email", + | orc.mask "nullify:ssn;sha256:email" + |) + |""".stripMargin) + sql("INSERT INTO encrypted VALUES('123456789', 'dongjoon@apache.org', 'Dongjoon Hyun')") + checkAnswer(sql("SELECT * FROM encrypted"), df) + } + withTable("normal") { + sql( + s""" + |CREATE TABLE normal ( + | ssn STRING, + | email STRING, + | name STRING + |) + |USING ORC + |LOCATION "$path" + |OPTIONS ( + | orc.key.provider "memory", + | orc.encrypt "pii:ssn,email", + | orc.mask "nullify:ssn;sha256:email" + |) + |""".stripMargin) + checkAnswer(sql("SELECT * FROM normal"), rowDataWithoutKey) + } + } + } +} From f9daf035f473fea12a2ee67428db8d78f29973d5 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 6 Jan 2021 17:22:14 -0800 Subject: [PATCH 0987/1009] [SPARK-33806][SQL][FOLLOWUP] Fold RepartitionExpression num partition should check if partition expression is empty ### What changes were proposed in this pull request? Add check partition expressions is empty. ### Why are the changes needed? We should keep `spark.range(1).hint("REPARTITION_BY_RANGE")` has default shuffle number instead of 1. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Add test. Closes #31074 from ulysses-you/SPARK-33806-FOLLOWUP. Authored-by: ulysses-you Signed-off-by: Dongjoon Hyun --- .../plans/logical/basicLogicalOperators.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index ee7db7ae83542..9e06f9bec7830 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1050,7 +1050,7 @@ case class RepartitionByExpression( val numPartitions = if (optNumPartitions.nonEmpty) { optNumPartitions.get } else { - if (partitionExpressions.forall(_.foldable)) { + if (partitionExpressions.nonEmpty && partitionExpressions.forall(_.foldable)) { 1 } else { SQLConf.get.numShufflePartitions diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 44f3c3449ddda..3f55a88f19505 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3768,6 +3768,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("Fold RepartitionExpression num partition should check if partition expression is empty") { + withSQLConf((SQLConf.SHUFFLE_PARTITIONS.key, "5")) { + val df = spark.range(1).hint("REPARTITION_BY_RANGE") + val plan = df.queryExecution.optimizedPlan + val res = plan.collect { + case r: RepartitionByExpression if r.numPartitions == 5 => true + } + assert(res.nonEmpty) + } + } + test("SPARK-33593: Vector reader got incorrect data with binary partition value") { Seq("false", "true").foreach(value => { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) { From 9b5df2afaa5df85f149ccf73b7a6b78ab0f393bc Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 6 Jan 2021 20:19:16 -0800 Subject: [PATCH 0988/1009] [SPARK-34036][DOCS] Update ORC data source documentation ### What changes were proposed in this pull request? This PR aims to update SQL documentation about ORC data sources. New structure looks like the following. - ORC Implementation - Vectorized Reader - Schema Merging - Zstandard - Bloom Filters - Columnar Encryption - Hive metastore ORC table conversion - Configuration ### Why are the changes needed? This document is not up-to-date. Apache Spark 3.2.0 can utilize new improvements from Apache ORC 1.6.6. ### Does this PR introduce _any_ user-facing change? No, this is a documentation. ### How was this patch tested? Manual. ``` SKIP_API=1 jekyll build ``` --- **BEFORE** ![Screen Shot 2021-01-06 at 5 08 19 PM](https://user-images.githubusercontent.com/9700541/103838399-d0bbd880-5041-11eb-8757-297728d2793f.png) --- **AFTER** ![Screen Shot 2021-01-06 at 7 03 38 PM](https://user-images.githubusercontent.com/9700541/103845972-0963ae00-5052-11eb-905e-8e8b335c760a.png) ![Screen Shot 2021-01-06 at 7 03 49 PM](https://user-images.githubusercontent.com/9700541/103845971-08cb1780-5052-11eb-9b2a-d3acfa4b9278.png) ![Screen Shot 2021-01-06 at 7 03 59 PM](https://user-images.githubusercontent.com/9700541/103845970-08328100-5052-11eb-8982-7079fd7b0efc.png) ![Screen Shot 2021-01-06 at 7 04 10 PM](https://user-images.githubusercontent.com/9700541/103845968-08328100-5052-11eb-9ef5-db99c7cc64d3.png) ![Screen Shot 2021-01-06 at 7 04 16 PM](https://user-images.githubusercontent.com/9700541/103845963-07015400-5052-11eb-955f-8126d417e8aa.png) Closes #31075 from dongjoon-hyun/SPARK-34036. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- docs/sql-data-sources-orc.md | 135 +++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 6 deletions(-) diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index 4c4b3b1eee8c2..f5c9677c343dc 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -19,12 +19,115 @@ license: | limitations under the License. --- -Since Spark 2.3, Spark supports a vectorized ORC reader with a new ORC file format for ORC files. -To do that, the following configurations are newly added. The vectorized reader is used for the -native ORC tables (e.g., the ones created using the clause `USING ORC`) when `spark.sql.orc.impl` -is set to `native` and `spark.sql.orc.enableVectorizedReader` is set to `true`. For the Hive ORC -serde tables (e.g., the ones created using the clause `USING HIVE OPTIONS (fileFormat 'ORC')`), -the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also set to `true`. +* Table of contents +{:toc} + +[Apache ORC](https://orc.apache.org) is a columnar format which has more advanced features like native zstd compression, bloom filter and columnar encryption. + +### ORC Implementation + +Spark supports two ORC implementations (`native` and `hive`) which is controlled by `spark.sql.orc.impl`. +Two implementations share most functionalities with different design goals. +- `native` implementation is designed to follow Spark's data source behavior like `Parquet`. +- `hive` implementation is designed to follow Hive's behavior and uses Hive SerDe. + +For example, historically, `native` implementation handles `CHAR/VARCHAR` with Spark's native `String` while `hive` implementation handles it via Hive `CHAR/VARCHAR`. The query results are different. Since Spark 3.1.0, [SPARK-33480](https://issues.apache.org/jira/browse/SPARK-33480) removes this difference by supporting `CHAR/VARCHAR` from Spark-side. + +### Vectorized Reader + +`native` implementation supports a vectorized ORC reader and has been the default ORC implementaion since Spark 2.3. +The vectorized reader is used for the native ORC tables (e.g., the ones created using the clause `USING ORC`) when `spark.sql.orc.impl` is set to `native` and `spark.sql.orc.enableVectorizedReader` is set to `true`. +For the Hive ORC serde tables (e.g., the ones created using the clause `USING HIVE OPTIONS (fileFormat 'ORC')`), +the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also set to `true`, and is turned on by default. + +### Schema Merging + +Like Protocol Buffer, Avro, and Thrift, ORC also supports schema evolution. Users can start with +a simple schema, and gradually add more columns to the schema as needed. In this way, users may end +up with multiple ORC files with different but mutually compatible schemas. The ORC data +source is now able to automatically detect this case and merge schemas of all these files. + +Since schema merging is a relatively expensive operation, and is not a necessity in most cases, we +turned it off by default . You may enable it by + +1. setting data source option `mergeSchema` to `true` when reading ORC files, or +2. setting the global SQL option `spark.sql.orc.mergeSchema` to `true`. + +### Zstandard + +Spark supports both Hadoop 2 and 3. Since Spark 3.2, you can take advantage +of Zstandard compression in ORC files on both Hadoop versions. +Please see [Zstandard](https://facebook.github.io/zstd/) for the benefits. + +
      +
      + +{% highlight sql %} +CREATE TABLE compressed ( + key STRING, + value STRING +) +USING ORC +OPTIONS ( + compression 'zstd' +) +{% endhighlight %} +
      +
      + +### Bloom Filters + +You can control bloom filters and dictionary encodings for ORC data sources. The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`. To find more detailed information about the extra ORC options, visit the official Apache ORC websites. + +
      +
      + +{% highlight sql %} +CREATE TABLE users_with_options ( + name STRING, + favorite_color STRING, + favorite_numbers array +) +USING ORC +OPTIONS ( + orc.bloom.filter.columns 'favorite_color', + orc.dictionary.key.threshold '1.0', + orc.column.encoding.direct 'name' +) +{% endhighlight %} +
      +
      + +### Columnar Encryption + +Since Spark 3.2, columnar encryption is supported for ORC tables with Apache ORC 1.6. +The following example is using Hadoop KMS as a key provider with the given location. +Please visit [Apache Hadoop KMS](https://hadoop.apache.org/docs/current/hadoop-kms/index.html) for the detail. + +
      +
      +{% highlight sql %} +CREATE TABLE encrypted ( + ssn STRING, + email STRING, + name STRING +) +USING ORC +OPTIONS ( + hadoop.security.key.provider.path "kms://http@localhost:9600/kms", + orc.key.provider "hadoop", + orc.encrypt "pii:ssn,email", + orc.mask "nullify:ssn;sha256:email" +) +{% endhighlight %} +
      +
      + +### Hive metastore ORC table conversion + +When reading from Hive metastore ORC tables and inserting to Hive metastore ORC tables, Spark SQL will try to use its own ORC support instead of Hive SerDe for better performance. For CTAS statement, only non-partitioned Hive metastore ORC tables are converted. This behavior is controlled by the `spark.sql.hive.convertMetastoreOrc` configuration, and is turned on by default. + +### Configuration @@ -48,4 +151,24 @@ the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also + + + + + + + + + + + +
      Property NameDefaultMeaningSince Version
      2.3.0
      spark.sql.orc.mergeSchemafalse +

      + When true, the ORC data source merges schemas collected from all data files, + otherwise the schema is picked from a random data file. +

      +
      3.0.0
      spark.sql.hive.convertMetastoreOrctrue + When set to false, Spark SQL will use the Hive SerDe for ORC tables instead of the built in + support. + 2.0.0
      From 0ba3ab4c23ee1cd3785caa0fde76862dce478530 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 7 Jan 2021 13:58:13 +0900 Subject: [PATCH 0989/1009] [SPARK-34021][R] Fix hyper links in SparkR documentation for CRAN submission ### What changes were proposed in this pull request? 3.0.1 CRAN submission was failed as the reason below: ``` Found the following (possibly) invalid URLs: URL: http://jsonlines.org/ (moved to https://jsonlines.org/) From: man/read.json.Rd man/write.json.Rd Status: 200 Message: OK URL: https://dl.acm.org/citation.cfm?id=1608614 (moved to https://dl.acm.org/doi/10.1109/MC.2009.263) From: inst/doc/sparkr-vignettes.html Status: 200 Message: OK ``` The links were being redirected now. This PR checked all hyperlinks in the docs such as `href{...}` and `url{...}`, and fixed all in SparkR: - Fix two problems above. - Fix http to https - Fix `https://www.apache.org/ https://spark.apache.org/` -> `https://www.apache.org https://spark.apache.org`. ### Why are the changes needed? For CRAN submission. ### Does this PR introduce _any_ user-facing change? Virtually no because it's just cleanup that CRAN requires. ### How was this patch tested? Manually tested by clicking the links Closes #31058 from HyukjinKwon/SPARK-34021. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- R/pkg/DESCRIPTION | 2 +- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/SQLContext.R | 2 +- R/pkg/R/install.R | 6 +++--- R/pkg/R/mllib_classification.R | 4 ++-- R/pkg/R/mllib_clustering.R | 4 ++-- R/pkg/R/mllib_recommendation.R | 2 +- R/pkg/R/mllib_regression.R | 2 +- R/pkg/R/mllib_stat.R | 2 +- R/pkg/R/mllib_tree.R | 12 ++++++------ R/pkg/R/stats.R | 3 ++- R/pkg/vignettes/sparkr-vignettes.Rmd | 2 +- 12 files changed, 22 insertions(+), 21 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 20433362459d9..c141baa51b8cb 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), email = "felixcheung@apache.org"), person(family = "The Apache Software Foundation", role = c("aut", "cph"))) License: Apache License (== 2.0) -URL: https://www.apache.org/ https://spark.apache.org/ +URL: https://www.apache.org https://spark.apache.org BugReports: https://spark.apache.org/contributing.html SystemRequirements: Java (>= 8, < 12) Depends: diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 8ca338f09969b..72d96151f6371 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -880,7 +880,7 @@ setMethod("toJSON", #' Save the contents of SparkDataFrame as a JSON file #' -#' Save the contents of a SparkDataFrame as a JSON file (\href{http://jsonlines.org/}{ +#' Save the contents of a SparkDataFrame as a JSON file (\href{https://jsonlines.org/}{ #' JSON Lines text format or newline-delimited JSON}). Files written out #' with this method can be read back in as a SparkDataFrame using read.json(). #' diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 5ed0481f33d8f..14262e1a74ab0 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -374,7 +374,7 @@ setMethod("toDF", signature(x = "RDD"), #' Create a SparkDataFrame from a JSON file. #' #' Loads a JSON file, returning the result as a SparkDataFrame -#' By default, (\href{http://jsonlines.org/}{JSON Lines text format or newline-delimited JSON} +#' By default, (\href{https://jsonlines.org/}{JSON Lines text format or newline-delimited JSON} #' ) is supported. For JSON (one record per file), set a named property \code{multiLine} to #' \code{TRUE}. #' It goes through the entire dataset once to determine the schema. diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 5bc5ae07c5f03..bbb9188cd083f 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -39,11 +39,11 @@ #' version number in the format of "x.y" where x and y are integer. #' If \code{hadoopVersion = "without"}, "Hadoop free" build is installed. #' See -#' \href{http://spark.apache.org/docs/latest/hadoop-provided.html}{ +#' \href{https://spark.apache.org/docs/latest/hadoop-provided.html}{ #' "Hadoop Free" Build} for more information. #' Other patched version names can also be used, e.g. \code{"cdh4"} #' @param mirrorUrl base URL of the repositories to use. The directory layout should follow -#' \href{http://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}. +#' \href{https://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}. #' @param localDir a local directory where Spark is installed. The directory contains #' version-specific folders of Spark packages. Default is path to #' the cache directory: @@ -64,7 +64,7 @@ #'} #' @note install.spark since 2.1.0 #' @seealso See available Hadoop versions: -#' \href{http://spark.apache.org/downloads.html}{Apache Spark} +#' \href{https://spark.apache.org/downloads.html}{Apache Spark} install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, localDir = NULL, overwrite = FALSE) { sparkHome <- Sys.getenv("SPARK_HOME") diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R index ec83b6bd406a7..71ebe4e26ef63 100644 --- a/R/pkg/R/mllib_classification.R +++ b/R/pkg/R/mllib_classification.R @@ -425,7 +425,7 @@ setMethod("write.ml", signature(object = "LogisticRegressionModel", path = "char #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' Only categorical data is supported. #' For more details, see -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html}{ #' Multilayer Perceptron} #' #' @param data a \code{SparkDataFrame} of observations and labels for model fitting. @@ -574,7 +574,7 @@ setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationMode #' @rdname spark.naiveBayes #' @aliases spark.naiveBayes,SparkDataFrame,formula-method #' @name spark.naiveBayes -#' @seealso e1071: \url{https://cran.r-project.org/package=e1071} +#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/index.html} #' @examples #' \dontrun{ #' data <- as.data.frame(UCBAdmissions) diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 8bc15353465d8..ff7cbd8fc9b74 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -204,7 +204,7 @@ setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "charact #' @return \code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model. #' @rdname spark.gaussianMixture #' @name spark.gaussianMixture -#' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools} +#' @seealso mixtools: \url{https://cran.r-project.org/web/packages/mixtools/index.html} #' @examples #' \dontrun{ #' sparkR.session() @@ -483,7 +483,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"), #' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model. #' @rdname spark.lda #' @aliases spark.lda,SparkDataFrame-method -#' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels} +#' @seealso topicmodels: \url{https://cran.r-project.org/web/packages/topicmodels/index.html} #' @examples #' \dontrun{ #' text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm") diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R index d238ff93ed245..87a1bc991f812 100644 --- a/R/pkg/R/mllib_recommendation.R +++ b/R/pkg/R/mllib_recommendation.R @@ -30,7 +30,7 @@ setClass("ALSModel", representation(jobj = "jobj")) #' to make predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' #' For more details, see -#' \href{http://spark.apache.org/docs/latest/ml-collaborative-filtering.html}{MLlib: +#' \href{https://spark.apache.org/docs/latest/ml-collaborative-filtering.html}{MLlib: #' Collaborative Filtering}. #' #' @param data a SparkDataFrame for training. diff --git a/R/pkg/R/mllib_regression.R b/R/pkg/R/mllib_regression.R index b2228a141689b..db9f367407df3 100644 --- a/R/pkg/R/mllib_regression.R +++ b/R/pkg/R/mllib_regression.R @@ -475,7 +475,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char #' @param ... additional arguments passed to the method. #' @return \code{spark.survreg} returns a fitted AFT survival regression model. #' @rdname spark.survreg -#' @seealso survival: \url{https://cran.r-project.org/package=survival} +#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/index.html} #' @examples #' \dontrun{ #' df <- createDataFrame(ovarian) diff --git a/R/pkg/R/mllib_stat.R b/R/pkg/R/mllib_stat.R index 6db4d5d4831dd..f82fb589bb5a5 100644 --- a/R/pkg/R/mllib_stat.R +++ b/R/pkg/R/mllib_stat.R @@ -49,7 +49,7 @@ setClass("KSTest", representation(jobj = "jobj")) #' @rdname spark.kstest #' @aliases spark.kstest,SparkDataFrame-method #' @name spark.kstest -#' @seealso \href{http://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing}{ +#' @seealso \href{https://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing}{ #' MLlib: Hypothesis Testing} #' @examples #' \dontrun{ diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R index b5a014b0a3cfd..f3192ee9b1382 100644 --- a/R/pkg/R/mllib_tree.R +++ b/R/pkg/R/mllib_tree.R @@ -127,9 +127,9 @@ print.summary.decisionTree <- function(x) { #' \code{write.ml}/\code{read.ml} to save/load fitted models. #' For more details, see # nolint start -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression}{ #' GBT Regression} and -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier}{ #' GBT Classification} # nolint end #' @@ -343,9 +343,9 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara #' save/load fitted models. #' For more details, see # nolint start -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression}{ #' Random Forest Regression} and -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier}{ #' Random Forest Classification} # nolint end #' @@ -568,9 +568,9 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path #' save/load fitted models. #' For more details, see # nolint start -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-regression}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-regression}{ #' Decision Tree Regression} and -#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-classifier}{ +#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-classifier}{ #' Decision Tree Classification} # nolint end #' diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index 7252351ebebb2..0aabceef226e3 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -109,7 +109,8 @@ setMethod("corr", #' #' Finding frequent items for columns, possibly with false positives. #' Using the frequent element count algorithm described in -#' \url{https://doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou. +#' \url{https://dl.acm.org/doi/10.1145/762471.762473}, proposed by Karp, Schenker, +#' and Papadimitriou. #' #' @param x A SparkDataFrame. #' @param cols A vector column names to search frequent items in. diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index a0608748696a3..3177b54dc5fac 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -1007,7 +1007,7 @@ perplexity #### Alternating Least Squares -`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/citation.cfm?id=1608614). +`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/doi/10.1109/MC.2009.263). There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, and `nonnegative`. For a complete list, refer to the help file. From 26b603992c4b9b5a58e46e0566c1547b86249709 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 7 Jan 2021 14:26:04 +0900 Subject: [PATCH 0990/1009] [SPARK-34028][SQL] Cleanup "unreachable code" compilation warning ### What changes were proposed in this pull request? There is one compilation warning as follow: ``` [WARNING] [Warn] /spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:1555: [other-match-analysis org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction] unreachable code ``` This compilation warning is due to `NoSuchPermanentFunctionException` is sub-class of `AnalysisException` and if there is `NoSuchPermanentFunctionException` be thrown out, it will be catch by `case _: AnalysisException => failFunctionLookup(name)`, so `case _: NoSuchPermanentFunctionException => failFunctionLookup(name)` is `unreachable code`. This pr remove `case _: NoSuchPermanentFunctionException => failFunctionLookup(name)` directly because both these 2 branches handle exceptions in the same way: `failFunctionLookup(name)` ### Why are the changes needed? Cleanup "unreachable code" compilation warnings. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #31064 from LuciferYang/SPARK-34028. Authored-by: yangjie01 Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 5f7028bf87c87..76358ef116cec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1552,7 +1552,6 @@ class SessionCatalog( externalCatalog.getFunction(database, name.funcName) } catch { case _: AnalysisException => failFunctionLookup(name) - case _: NoSuchPermanentFunctionException => failFunctionLookup(name) } loadFunctionResources(catalogFunction.resources) // Please note that qualifiedName is provided by the user. However, From 3aa4e113c5162f5de12c2aa43b6af65a7f2110af Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 7 Jan 2021 14:28:30 +0900 Subject: [PATCH 0991/1009] [SPARK-33861][SQL][FOLLOWUP] Simplify conditional in predicate should consider deterministic ### What changes were proposed in this pull request? This pr address https://github.com/apache/spark/pull/30865#pullrequestreview-562344089 to fix simplify conditional in predicate should consider deterministic. ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #31067 from wangyum/SPARK-33861-2. Authored-by: Yuming Wang Signed-off-by: HyukjinKwon --- .../optimizer/SimplifyConditionalsInPredicate.scala | 6 ------ .../SimplifyConditionalsInPredicateSuite.scala | 11 ++++++++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index 1ea85085bccdb..1225f1f318fc7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -39,9 +39,7 @@ import org.apache.spark.sql.types.BooleanType * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) - * - CASE WHEN cond THEN false END => false * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) - * - CASE WHEN cond THEN true END => cond */ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { @@ -64,12 +62,8 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { And(cond, trueValue) case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => Or(Not(cond), trueValue) - case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) => - FalseLiteral case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) => And(Not(cond), elseValue) - case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) => - cond case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) => Or(cond, elseValue) case e if e.dataType == BooleanType => e diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala index 1f3c24bdbb664..04ebb4e63c675 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or, Rand} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} @@ -158,6 +158,15 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } + test("CASE WHEN non-deterministic-cond THEN false END") { + val originalCond = + CaseWhen(Seq((UnresolvedAttribute("i") > Rand(0), FalseLiteral))) + val expectedCond = And(UnresolvedAttribute("i") > Rand(0), FalseLiteral) + // nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window, + testFilter(originalCond, expectedCond = FalseLiteral) + testProjection(originalCond, expectedExpr = originalCond) + } + test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)), From aa509c1eeed688ddf21553aefe7b48cdf072fc5b Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 7 Jan 2021 14:41:10 +0900 Subject: [PATCH 0992/1009] [SPARK-34031][SQL] Union operator missing rowCount when CBO enabled ### What changes were proposed in this pull request? This pr add row count to `Union` operator when CBO enabled. ```scala spark.sql("CREATE TABLE t1 USING parquet AS SELECT id FROM RANGE(10)") spark.sql("CREATE TABLE t2 USING parquet AS SELECT id FROM RANGE(10)") spark.sql("ANALYZE TABLE t1 COMPUTE STATISTICS FOR ALL COLUMNS") spark.sql("ANALYZE TABLE t2 COMPUTE STATISTICS FOR ALL COLUMNS") spark.sql("set spark.sql.cbo.enabled=true") spark.sql("SELECT * FROM t1 UNION ALL SELECT * FROM t2").explain("cost") ``` Before this pr: ``` == Optimized Logical Plan == Union false, false, Statistics(sizeInBytes=320.0 B) :- Relation[id#5880L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10) +- Relation[id#5881L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10) ``` After this pr: ``` == Optimized Logical Plan == Union false, false, Statistics(sizeInBytes=320.0 B, rowCount=20) :- Relation[id#2138L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10) +- Relation[id#2139L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10) ``` ### Why are the changes needed? Improve query performance, [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #31068 from wangyum/SPARK-34031. Lead-authored-by: Yuming Wang Co-authored-by: Hyukjin Kwon Signed-off-by: HyukjinKwon --- .../plans/logical/basicLogicalOperators.scala | 2 +- .../BasicStatsPlanVisitor.scala | 10 +- .../BasicStatsEstimationSuite.scala | 11 + .../approved-plans-v1_4/q2.sf100/explain.txt | 128 ++- .../q2.sf100/simplified.txt | 98 ++- .../approved-plans-v1_4/q5.sf100/explain.txt | 220 +++--- .../q5.sf100/simplified.txt | 64 +- .../approved-plans-v1_4/q54.sf100/explain.txt | 726 +++++++++--------- .../q54.sf100/simplified.txt | 244 +++--- .../approved-plans-v2_7/q5a.sf100/explain.txt | 210 ++--- .../q5a.sf100/simplified.txt | 64 +- 11 files changed, 874 insertions(+), 903 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 9e06f9bec7830..3fb2e991af554 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -217,7 +217,7 @@ object Union { } /** - * Logical plan for unioning two plans, without a distinct. This is UNION ALL in SQL. + * Logical plan for unioning multiple plans, without a distinct. This is UNION ALL in SQL. * * @param byName Whether resolves columns in the children by column names. * @param allowMissingCol Allows missing columns in children query plans. If it is true, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala index 34baf5b90e54e..05fc1f7958fef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala @@ -79,7 +79,15 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] { override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p) - override def visitUnion(p: Union): Statistics = fallback(p) + override def visitUnion(p: Union): Statistics = { + val stats = p.children.map(_.stats) + val rowCount = if (stats.exists(_.rowCount.isEmpty)) { + None + } else { + Some(stats.map(_.rowCount.get).sum) + } + Statistics(sizeInBytes = stats.map(_.sizeInBytes).sum, rowCount = rowCount) + } override def visitWindow(p: Window): Statistics = fallback(p) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala index 91f8fc406a43d..1d780142aede0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala @@ -141,6 +141,17 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase { expectedStatsCboOff = Statistics(sizeInBytes = 120)) } + test("SPARK-34031: Union operator missing rowCount when enable CBO") { + val union = Union(plan :: plan :: plan :: Nil) + val childrenSize = union.children.size + val sizeInBytes = plan.size.get * childrenSize + val rowCount = Some(plan.rowCount * childrenSize) + checkStats( + union, + expectedStatsCboOn = Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount), + expectedStatsCboOff = Statistics(sizeInBytes = sizeInBytes)) + } + /** Check estimated stats when cbo is turned on/off. */ private def checkStats( plan: LogicalPlan, diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt index 61e5ae0121819..52dfff442bf3a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt @@ -1,45 +1,43 @@ == Physical Plan == -* Sort (41) -+- Exchange (40) - +- * Project (39) - +- * SortMergeJoin Inner (38) - :- * Sort (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * HashAggregate (18) - : : +- Exchange (17) - : : +- * HashAggregate (16) - : : +- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- Union (9) - : : : :- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- * Project (8) - : : : +- * Filter (7) - : : : +- * ColumnarToRow (6) - : : : +- Scan parquet default.catalog_sales (5) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) - : +- BroadcastExchange (23) - : +- * Project (22) - : +- * Filter (21) - : +- * ColumnarToRow (20) - : +- Scan parquet default.date_dim (19) - +- * Sort (37) - +- Exchange (36) - +- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * HashAggregate (28) - : +- ReusedExchange (27) - +- BroadcastExchange (33) - +- * Project (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.date_dim (29) +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.catalog_sales (5) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * HashAggregate (27) + : +- ReusedExchange (26) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.date_dim (28) (1) Scan parquet default.web_sales @@ -116,9 +114,9 @@ Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum (17) Exchange Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [id=#27] -(18) HashAggregate [codegen id : 6] +(18) HashAggregate [codegen id : 12] Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] @@ -147,82 +145,74 @@ Input [2]: [d_week_seq#42, d_year#43] Input [1]: [d_week_seq#42] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] -(24) BroadcastHashJoin [codegen id : 6] +(24) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq#10] Right keys [1]: [d_week_seq#42] Join condition: None -(25) Project [codegen id : 6] +(25) Project [codegen id : 12] Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] -(26) Sort [codegen id : 6] -Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52] -Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0 - -(27) ReusedExchange [Reuses operator id: 17] +(26) ReusedExchange [Reuses operator id: 17] Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] -(28) HashAggregate [codegen id : 12] +(27) HashAggregate [codegen id : 11] Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))] Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66] Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41] -(29) Scan parquet default.date_dim +(28) Scan parquet default.date_dim Output [2]: [d_week_seq#67, d_year#68] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] ReadSchema: struct -(30) ColumnarToRow [codegen id : 11] +(29) ColumnarToRow [codegen id : 10] Input [2]: [d_week_seq#67, d_year#68] -(31) Filter [codegen id : 11] +(30) Filter [codegen id : 10] Input [2]: [d_week_seq#67, d_year#68] Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) -(32) Project [codegen id : 11] +(31) Project [codegen id : 10] Output [1]: [d_week_seq#67] Input [2]: [d_week_seq#67, d_year#68] -(33) BroadcastExchange +(32) BroadcastExchange Input [1]: [d_week_seq#67] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69] -(34) BroadcastHashJoin [codegen id : 12] +(33) BroadcastHashJoin [codegen id : 11] Left keys [1]: [d_week_seq#10] Right keys [1]: [d_week_seq#67] Join condition: None -(35) Project [codegen id : 12] +(34) Project [codegen id : 11] Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77] Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67] -(36) Exchange -Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -Arguments: hashpartitioning((d_week_seq2#70 - 53), 5), true, [id=#78] - -(37) Sort [codegen id : 13] +(35) BroadcastExchange Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -Arguments: [(d_week_seq2#70 - 53) ASC NULLS FIRST], false, 0 +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78] -(38) SortMergeJoin [codegen id : 14] +(36) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq1#45] Right keys [1]: [(d_week_seq2#70 - 53)] Join condition: None -(39) Project [codegen id : 14] +(37) Project [codegen id : 12] Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85] Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -(40) Exchange +(38) Exchange Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] -Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86] +Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#86] -(41) Sort [codegen id : 15] +(39) Sort [codegen id : 13] Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt index 3389774c46469..424a535e14847 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt @@ -1,67 +1,61 @@ -WholeStageCodegen (15) +WholeStageCodegen (13) Sort [d_week_seq1] InputAdapter Exchange [d_week_seq1] #1 - WholeStageCodegen (14) + WholeStageCodegen (12) Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] - SortMergeJoin [d_week_seq1,d_week_seq2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (4) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_sold_date_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen (2) + Project [cs_sold_date_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] InputAdapter - WholeStageCodegen (6) - Sort [d_week_seq1] + BroadcastExchange #5 + WholeStageCodegen (11) Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter - Exchange [d_week_seq] #2 - WholeStageCodegen (4) - HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [sold_date_sk,d_date_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [ws_sold_date_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] - WholeStageCodegen (2) - Project [cs_sold_date_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (3) - Filter [d_date_sk,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 InputAdapter - BroadcastExchange #4 - WholeStageCodegen (5) + BroadcastExchange #6 + WholeStageCodegen (10) Project [d_week_seq] Filter [d_year,d_week_seq] ColumnarToRow InputAdapter Scan parquet default.date_dim [d_week_seq,d_year] - InputAdapter - WholeStageCodegen (13) - Sort [d_week_seq2] - InputAdapter - Exchange [d_week_seq2] #5 - WholeStageCodegen (12) - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] - InputAdapter - ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (11) - Project [d_week_seq] - Filter [d_year,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt index 55bd25c501294..5a9c4715d4b05 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt @@ -10,8 +10,8 @@ TakeOrderedAndProject (81) : +- * HashAggregate (23) : +- * Project (22) : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) : : :- Union (9) : : : :- * Project (4) : : : : +- * Filter (3) @@ -21,22 +21,22 @@ TakeOrderedAndProject (81) : : : +- * Filter (7) : : : +- * ColumnarToRow (6) : : : +- Scan parquet default.store_returns (5) - : : +- BroadcastExchange (14) - : : +- * Project (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store (10) : +- BroadcastExchange (20) - : +- * Filter (19) - : +- * ColumnarToRow (18) - : +- Scan parquet default.store (17) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) :- * HashAggregate (46) : +- Exchange (45) : +- * HashAggregate (44) : +- * Project (43) : +- * BroadcastHashJoin Inner BuildRight (42) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) : : :- Union (34) : : : :- * Project (29) : : : : +- * Filter (28) @@ -46,18 +46,18 @@ TakeOrderedAndProject (81) : : : +- * Filter (32) : : : +- * ColumnarToRow (31) : : : +- Scan parquet default.catalog_returns (30) - : : +- ReusedExchange (35) - : +- BroadcastExchange (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.catalog_page (38) + : : +- BroadcastExchange (38) + : : +- * Filter (37) + : : +- * ColumnarToRow (36) + : : +- Scan parquet default.catalog_page (35) + : +- ReusedExchange (41) +- * HashAggregate (75) +- Exchange (74) +- * HashAggregate (73) +- * Project (72) +- * BroadcastHashJoin Inner BuildRight (71) - :- * Project (66) - : +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) : :- Union (63) : : :- * Project (50) : : : +- * Filter (49) @@ -75,11 +75,11 @@ TakeOrderedAndProject (81) : : +- * Filter (58) : : +- * ColumnarToRow (57) : : +- Scan parquet default.web_sales (56) - : +- ReusedExchange (64) - +- BroadcastExchange (70) - +- * Filter (69) - +- * ColumnarToRow (68) - +- Scan parquet default.web_site (67) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet default.web_site (64) + +- ReusedExchange (70) (1) Scan parquet default.store_sales @@ -119,81 +119,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los (9) Union -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_date#22] +(10) Scan parquet default.store +Output [2]: [s_store_sk#21, s_store_id#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct (11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] +Input [2]: [s_store_sk#21, s_store_id#22] (12) Filter [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) +Input [2]: [s_store_sk#21, s_store_id#22] +Condition : isnotnull(s_store_sk#21) -(13) Project [codegen id : 3] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_date#22] +(13) BroadcastExchange +Input [2]: [s_store_sk#21, s_store_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] -(14) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [date_sk#6] -Right keys [1]: [cast(d_date_sk#21 as bigint)] +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#21 as bigint)] Join condition: None -(16) Project [codegen id : 5] -Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] -Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +(15) Project [codegen id : 5] +Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22] -(17) Scan parquet default.store -Output [2]: [s_store_sk#24, s_store_id#25] +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#24, d_date#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] -(18) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] +(18) Filter [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] +Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 11192)) AND (d_date#25 <= 11206)) AND isnotnull(d_date_sk#24)) -(19) Filter [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] -Condition : isnotnull(s_store_sk#24) +(19) Project [codegen id : 4] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#25] (20) BroadcastExchange -Input [2]: [s_store_sk#24, s_store_id#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [store_sk#5] -Right keys [1]: [cast(s_store_sk#24 as bigint)] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#24 as bigint)] Join condition: None (22) Project [codegen id : 5] -Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24] (23) HashAggregate [codegen id : 5] -Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Keys [1]: [s_store_id#25] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Keys [1]: [s_store_id#22] Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] -Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] (24) Exchange -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Keys [1]: [s_store_id#25] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#22] Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#22) AS id#44] (26) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] @@ -233,44 +233,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, (34) Union -(35) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#50] -Right keys [1]: [d_date_sk#21] -Join condition: None - -(37) Project [codegen id : 11] -Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] -Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] - -(38) Scan parquet default.catalog_page +(35) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 10] +(36) ColumnarToRow [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -(40) Filter [codegen id : 10] +(37) Filter [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Condition : isnotnull(cp_catalog_page_sk#65) -(41) BroadcastExchange +(38) BroadcastExchange Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] -(42) BroadcastHashJoin [codegen id : 11] +(39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [page_sk#49] Right keys [1]: [cp_catalog_page_sk#65] Join condition: None +(40) Project [codegen id : 11] +Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(41) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#24] +Join condition: None + (43) Project [codegen id : 11] Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24] (44) HashAggregate [codegen id : 11] Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] @@ -281,7 +281,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] (45) Exchange Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] (46) HashAggregate [codegen id : 12] Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] @@ -324,7 +324,7 @@ Condition : isnotnull(wr_returned_date_sk#96) (54) Exchange Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] -Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101] +Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101] (55) Sort [codegen id : 15] Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] @@ -346,7 +346,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND (59) Exchange Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] -Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104] +Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104] (60) Sort [codegen id : 17] Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103] @@ -363,44 +363,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return (63) Union -(64) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(65) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(66) Project [codegen id : 21] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] - -(67) Scan parquet default.web_site +(64) Scan parquet default.web_site Output [2]: [web_site_sk#111, web_site_id#112] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct -(68) ColumnarToRow [codegen id : 20] +(65) ColumnarToRow [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] -(69) Filter [codegen id : 20] +(66) Filter [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] Condition : isnotnull(web_site_sk#111) -(70) BroadcastExchange +(67) BroadcastExchange Input [2]: [web_site_sk#111, web_site_id#112] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] -(71) BroadcastHashJoin [codegen id : 21] +(68) BroadcastHashJoin [codegen id : 21] Left keys [1]: [wsr_web_site_sk#90] Right keys [1]: [web_site_sk#111] Join condition: None +(69) Project [codegen id : 21] +Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(70) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#24 as bigint)] +Join condition: None + (72) Project [codegen id : 21] Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] +Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24] (73) HashAggregate [codegen id : 21] Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] @@ -411,7 +411,7 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] (74) Exchange Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] -Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122] +Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122] (75) HashAggregate [codegen id : 22] Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] @@ -435,7 +435,7 @@ Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, (79) Exchange Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), true, [id=#147] +Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#147] (80) HashAggregate [codegen id : 24] Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt index 80b07a3712d36..2db6cf767729d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt @@ -15,9 +15,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (5) HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] InputAdapter Union WholeStageCodegen (1) @@ -35,18 +35,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #3 WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #4 WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] WholeStageCodegen (12) HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter @@ -54,9 +54,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (11) HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] InputAdapter Union WholeStageCodegen (7) @@ -72,14 +72,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #6 + WholeStageCodegen (9) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [d_date_sk] #4 WholeStageCodegen (22) HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter @@ -87,9 +87,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (21) HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] InputAdapter Union WholeStageCodegen (13) @@ -122,11 +122,11 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #10 + WholeStageCodegen (19) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] InputAdapter - BroadcastExchange #10 - WholeStageCodegen (20) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + ReusedExchange [d_date_sk] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt index d78565986bc0a..a504149b00b94 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt @@ -1,494 +1,474 @@ == Physical Plan == -TakeOrderedAndProject (71) -+- * HashAggregate (70) - +- Exchange (69) - +- * HashAggregate (68) - +- * HashAggregate (67) - +- Exchange (66) - +- * HashAggregate (65) - +- * Project (64) - +- * BroadcastHashJoin Inner BuildRight (63) - :- * Project (57) - : +- * BroadcastHashJoin Inner BuildRight (56) - : :- * Project (51) - : : +- * SortMergeJoin Inner (50) - : : :- * Sort (44) - : : : +- Exchange (43) - : : : +- * Project (42) - : : : +- * SortMergeJoin Inner (41) - : : : :- * Sort (35) - : : : : +- * HashAggregate (34) - : : : : +- * HashAggregate (33) - : : : : +- * Project (32) - : : : : +- * SortMergeJoin Inner (31) - : : : : :- * Sort (25) - : : : : : +- Exchange (24) - : : : : : +- * Project (23) - : : : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : : : :- * Project (16) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : : : :- Union (9) - : : : : : : : :- * Project (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : +- * Project (8) - : : : : : : : +- * Filter (7) - : : : : : : : +- * ColumnarToRow (6) - : : : : : : : +- Scan parquet default.web_sales (5) - : : : : : : +- BroadcastExchange (14) - : : : : : : +- * Project (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.item (10) - : : : : : +- BroadcastExchange (21) - : : : : : +- * Project (20) - : : : : : +- * Filter (19) - : : : : : +- * ColumnarToRow (18) - : : : : : +- Scan parquet default.date_dim (17) - : : : : +- * Sort (30) - : : : : +- Exchange (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.customer (26) - : : : +- * Sort (40) - : : : +- Exchange (39) - : : : +- * Filter (38) - : : : +- * ColumnarToRow (37) - : : : +- Scan parquet default.store_sales (36) - : : +- * Sort (49) - : : +- Exchange (48) - : : +- * Filter (47) - : : +- * ColumnarToRow (46) - : : +- Scan parquet default.customer_address (45) - : +- BroadcastExchange (55) - : +- * Filter (54) - : +- * ColumnarToRow (53) - : +- Scan parquet default.store (52) - +- BroadcastExchange (62) - +- * Project (61) - +- * Filter (60) - +- * ColumnarToRow (59) - +- Scan parquet default.date_dim (58) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * Project (61) + +- * SortMergeJoin Inner (60) + :- * Sort (47) + : +- * Project (46) + : +- * BroadcastHashJoin Inner BuildLeft (45) + : :- BroadcastExchange (10) + : : +- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.customer_address (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.store (4) + : +- * HashAggregate (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * SortMergeJoin Inner (41) + : :- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- Union (19) + : : : : :- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet default.catalog_sales (11) + : : : : +- * Project (18) + : : : : +- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.web_sales (15) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.date_dim (20) + : : +- BroadcastExchange (31) + : : +- * Project (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet default.item (27) + : +- * Sort (40) + : +- Exchange (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer (36) + +- * Sort (59) + +- Exchange (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Filter (50) + : +- * ColumnarToRow (49) + : +- Scan parquet default.store_sales (48) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(1) Scan parquet default.customer_address +Output [3]: [ca_address_sk#1, ca_county#2, ca_state#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3] + +(3) Filter [codegen id : 2] +Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3] +Condition : ((isnotnull(ca_address_sk#1) AND isnotnull(ca_county#2)) AND isnotnull(ca_state#3)) + +(4) Scan parquet default.store +Output [2]: [s_county#4, s_state#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_county#4, s_state#5] + +(6) Filter [codegen id : 1] +Input [2]: [s_county#4, s_state#5] +Condition : (isnotnull(s_county#4) AND isnotnull(s_state#5)) + +(7) BroadcastExchange +Input [2]: [s_county#4, s_state#5] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ca_county#2, ca_state#3] +Right keys [2]: [s_county#4, s_state#5] +Join condition: None + +(9) Project [codegen id : 2] +Output [1]: [ca_address_sk#1] +Input [5]: [ca_address_sk#1, ca_county#2, ca_state#3, s_county#4, s_state#5] + +(10) BroadcastExchange +Input [1]: [ca_address_sk#1] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(11) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] ReadSchema: struct -(2) ColumnarToRow [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] -(3) Filter [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] -Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) +(13) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] +Condition : ((isnotnull(cs_item_sk#10) AND isnotnull(cs_sold_date_sk#8)) AND isnotnull(cs_bill_customer_sk#9)) -(4) Project [codegen id : 1] -Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +(14) Project [codegen id : 3] +Output [3]: [cs_sold_date_sk#8 AS sold_date_sk#11, cs_bill_customer_sk#9 AS customer_sk#12, cs_item_sk#10 AS item_sk#13] +Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10] -(5) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(15) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] ReadSchema: struct -(6) ColumnarToRow [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(16) ColumnarToRow [codegen id : 4] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] -(7) Filter [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] -Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) +(17) Filter [codegen id : 4] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] +Condition : ((isnotnull(ws_item_sk#15) AND isnotnull(ws_sold_date_sk#14)) AND isnotnull(ws_bill_customer_sk#16)) -(8) Project [codegen id : 2] -Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +(18) Project [codegen id : 4] +Output [3]: [ws_sold_date_sk#14 AS sold_date_sk#17, ws_bill_customer_sk#16 AS customer_sk#18, ws_item_sk#15 AS item_sk#19] +Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16] -(9) Union +(19) Union -(10) Scan parquet default.item -Output [3]: [i_item_sk#13, i_class#14, i_category#15] +(20) Scan parquet default.date_dim +Output [3]: [d_date_sk#20, d_year#21, d_moy#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct -(11) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] +(21) ColumnarToRow [codegen id : 5] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] -(12) Filter [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] -Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) +(22) Filter [codegen id : 5] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] +Condition : ((((isnotnull(d_moy#22) AND isnotnull(d_year#21)) AND (d_moy#22 = 12)) AND (d_year#21 = 1998)) AND isnotnull(d_date_sk#20)) -(13) Project [codegen id : 3] -Output [1]: [i_item_sk#13] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] +(23) Project [codegen id : 5] +Output [1]: [d_date_sk#20] +Input [3]: [d_date_sk#20, d_year#21, d_moy#22] -(14) BroadcastExchange -Input [1]: [i_item_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +(24) BroadcastExchange +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [item_sk#6] -Right keys [1]: [i_item_sk#13] +(25) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [sold_date_sk#11] +Right keys [1]: [d_date_sk#20] Join condition: None -(16) Project [codegen id : 5] -Output [2]: [sold_date_sk#4, customer_sk#5] -Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] +(26) Project [codegen id : 7] +Output [2]: [customer_sk#12, item_sk#13] +Input [4]: [sold_date_sk#11, customer_sk#12, item_sk#13, d_date_sk#20] -(17) Scan parquet default.date_dim -Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +(27) Scan parquet default.item +Output [3]: [i_item_sk#24, i_class#25, i_category#26] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] +ReadSchema: struct -(18) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +(28) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] -(19) Filter [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] -Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) +(29) Filter [codegen id : 6] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] +Condition : ((((isnotnull(i_category#26) AND isnotnull(i_class#25)) AND (i_category#26 = Women)) AND (i_class#25 = maternity)) AND isnotnull(i_item_sk#24)) -(20) Project [codegen id : 4] -Output [1]: [d_date_sk#17] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +(30) Project [codegen id : 6] +Output [1]: [i_item_sk#24] +Input [3]: [i_item_sk#24, i_class#25, i_category#26] -(21) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +(31) BroadcastExchange +Input [1]: [i_item_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] -(22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sold_date_sk#4] -Right keys [1]: [d_date_sk#17] +(32) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [item_sk#13] +Right keys [1]: [i_item_sk#24] Join condition: None -(23) Project [codegen id : 5] -Output [1]: [customer_sk#5] -Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] +(33) Project [codegen id : 7] +Output [1]: [customer_sk#12] +Input [3]: [customer_sk#12, item_sk#13, i_item_sk#24] -(24) Exchange -Input [1]: [customer_sk#5] -Arguments: hashpartitioning(customer_sk#5, 5), true, [id=#21] +(34) Exchange +Input [1]: [customer_sk#12] +Arguments: hashpartitioning(customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#28] -(25) Sort [codegen id : 6] -Input [1]: [customer_sk#5] -Arguments: [customer_sk#5 ASC NULLS FIRST], false, 0 +(35) Sort [codegen id : 8] +Input [1]: [customer_sk#12] +Arguments: [customer_sk#12 ASC NULLS FIRST], false, 0 -(26) Scan parquet default.customer -Output [2]: [c_customer_sk#22, c_current_addr_sk#23] +(36) Scan parquet default.customer +Output [2]: [c_customer_sk#29, c_current_addr_sk#30] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct -(27) ColumnarToRow [codegen id : 7] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] +(37) ColumnarToRow [codegen id : 9] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] -(28) Filter [codegen id : 7] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23)) +(38) Filter [codegen id : 9] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Condition : (isnotnull(c_customer_sk#29) AND isnotnull(c_current_addr_sk#30)) -(29) Exchange -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#24] +(39) Exchange +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#31] -(30) Sort [codegen id : 8] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +(40) Sort [codegen id : 10] +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 -(31) SortMergeJoin [codegen id : 9] -Left keys [1]: [customer_sk#5] -Right keys [1]: [c_customer_sk#22] +(41) SortMergeJoin +Left keys [1]: [customer_sk#12] +Right keys [1]: [c_customer_sk#29] Join condition: None -(32) Project [codegen id : 9] -Output [2]: [c_customer_sk#22, c_current_addr_sk#23] -Input [3]: [customer_sk#5, c_customer_sk#22, c_current_addr_sk#23] +(42) Project +Output [2]: [c_customer_sk#29, c_current_addr_sk#30] +Input [3]: [customer_sk#12, c_customer_sk#29, c_current_addr_sk#30] -(33) HashAggregate [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +(43) HashAggregate +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Keys [2]: [c_customer_sk#29, c_current_addr_sk#30] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#22, c_current_addr_sk#23] +Results [2]: [c_customer_sk#29, c_current_addr_sk#30] -(34) HashAggregate [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Keys [2]: [c_customer_sk#22, c_current_addr_sk#23] +(44) HashAggregate +Input [2]: [c_customer_sk#29, c_current_addr_sk#30] +Keys [2]: [c_customer_sk#29, c_current_addr_sk#30] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#22, c_current_addr_sk#23] +Results [2]: [c_customer_sk#29, c_current_addr_sk#30] -(35) Sort [codegen id : 9] -Input [2]: [c_customer_sk#22, c_current_addr_sk#23] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +(45) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#30] +Join condition: None -(36) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +(46) Project [codegen id : 11] +Output [1]: [c_customer_sk#29] +Input [3]: [ca_address_sk#1, c_customer_sk#29, c_current_addr_sk#30] + +(47) Sort [codegen id : 11] +Input [1]: [c_customer_sk#29] +Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 + +(48) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(37) ColumnarToRow [codegen id : 10] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] - -(38) Filter [codegen id : 10] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) - -(39) Exchange -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Arguments: hashpartitioning(ss_customer_sk#26, 5), true, [id=#28] - -(40) Sort [codegen id : 11] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Arguments: [ss_customer_sk#26 ASC NULLS FIRST], false, 0 - -(41) SortMergeJoin [codegen id : 12] -Left keys [1]: [c_customer_sk#22] -Right keys [1]: [ss_customer_sk#26] -Join condition: None - -(42) Project [codegen id : 12] -Output [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [5]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +(49) ColumnarToRow [codegen id : 13] +Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] -(43) Exchange -Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Arguments: hashpartitioning(c_current_addr_sk#23, 5), true, [id=#29] +(50) Filter [codegen id : 13] +Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34] +Condition : (isnotnull(ss_customer_sk#33) AND isnotnull(ss_sold_date_sk#32)) -(44) Sort [codegen id : 13] -Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27] -Arguments: [c_current_addr_sk#23 ASC NULLS FIRST], false, 0 - -(45) Scan parquet default.customer_address -Output [3]: [ca_address_sk#30, ca_county#31, ca_state#32] +(51) Scan parquet default.date_dim +Output [2]: [d_date_sk#20, d_month_seq#35] Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 14] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] - -(47) Filter [codegen id : 14] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Condition : ((isnotnull(ca_address_sk#30) AND isnotnull(ca_county#31)) AND isnotnull(ca_state#32)) - -(48) Exchange -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Arguments: hashpartitioning(ca_address_sk#30, 5), true, [id=#33] - -(49) Sort [codegen id : 15] -Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32] -Arguments: [ca_address_sk#30 ASC NULLS FIRST], false, 0 - -(50) SortMergeJoin [codegen id : 18] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#30] -Join condition: None +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct -(51) Project [codegen id : 18] -Output [5]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32] -Input [7]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#30, ca_county#31, ca_state#32] +(52) ColumnarToRow [codegen id : 12] +Input [2]: [d_date_sk#20, d_month_seq#35] -(52) Scan parquet default.store -Output [2]: [s_county#34, s_state#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] -ReadSchema: struct +(53) Filter [codegen id : 12] +Input [2]: [d_date_sk#20, d_month_seq#35] +Condition : (((isnotnull(d_month_seq#35) AND (d_month_seq#35 >= Subquery scalar-subquery#36, [id=#37])) AND (d_month_seq#35 <= Subquery scalar-subquery#38, [id=#39])) AND isnotnull(d_date_sk#20)) -(53) ColumnarToRow [codegen id : 16] -Input [2]: [s_county#34, s_state#35] - -(54) Filter [codegen id : 16] -Input [2]: [s_county#34, s_state#35] -Condition : (isnotnull(s_county#34) AND isnotnull(s_state#35)) +(54) Project [codegen id : 12] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_month_seq#35] (55) BroadcastExchange -Input [2]: [s_county#34, s_state#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#36] +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] -(56) BroadcastHashJoin [codegen id : 18] -Left keys [2]: [ca_county#31, ca_state#32] -Right keys [2]: [s_county#34, s_state#35] +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#32] +Right keys [1]: [d_date_sk#20] Join condition: None -(57) Project [codegen id : 18] -Output [3]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [7]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32, s_county#34, s_state#35] - -(58) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_month_seq#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(59) ColumnarToRow [codegen id : 17] -Input [2]: [d_date_sk#17, d_month_seq#37] - -(60) Filter [codegen id : 17] -Input [2]: [d_date_sk#17, d_month_seq#37] -Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= Subquery scalar-subquery#38, [id=#39])) AND (d_month_seq#37 <= Subquery scalar-subquery#40, [id=#41])) AND isnotnull(d_date_sk#17)) +(57) Project [codegen id : 13] +Output [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Input [4]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34, d_date_sk#20] -(61) Project [codegen id : 17] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_month_seq#37] +(58) Exchange +Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Arguments: hashpartitioning(ss_customer_sk#33, 5), ENSURE_REQUIREMENTS, [id=#41] -(62) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +(59) Sort [codegen id : 14] +Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34] +Arguments: [ss_customer_sk#33 ASC NULLS FIRST], false, 0 -(63) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_sold_date_sk#25] -Right keys [1]: [d_date_sk#17] +(60) SortMergeJoin [codegen id : 15] +Left keys [1]: [c_customer_sk#29] +Right keys [1]: [ss_customer_sk#33] Join condition: None -(64) Project [codegen id : 18] -Output [2]: [c_customer_sk#22, ss_ext_sales_price#27] -Input [4]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] - -(65) HashAggregate [codegen id : 18] -Input [2]: [c_customer_sk#22, ss_ext_sales_price#27] -Keys [1]: [c_customer_sk#22] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum#43] -Results [2]: [c_customer_sk#22, sum#44] - -(66) Exchange -Input [2]: [c_customer_sk#22, sum#44] -Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#45] - -(67) HashAggregate [codegen id : 19] -Input [2]: [c_customer_sk#22, sum#44] -Keys [1]: [c_customer_sk#22] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#46] -Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#46,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#47] - -(68) HashAggregate [codegen id : 19] -Input [1]: [segment#47] -Keys [1]: [segment#47] +(61) Project [codegen id : 15] +Output [2]: [c_customer_sk#29, ss_ext_sales_price#34] +Input [3]: [c_customer_sk#29, ss_customer_sk#33, ss_ext_sales_price#34] + +(62) HashAggregate [codegen id : 15] +Input [2]: [c_customer_sk#29, ss_ext_sales_price#34] +Keys [1]: [c_customer_sk#29] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#34))] +Aggregate Attributes [1]: [sum#42] +Results [2]: [c_customer_sk#29, sum#43] + +(63) HashAggregate [codegen id : 15] +Input [2]: [c_customer_sk#29, sum#43] +Keys [1]: [c_customer_sk#29] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#34))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#34))#44] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#34))#44,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#45] + +(64) HashAggregate [codegen id : 15] +Input [1]: [segment#45] +Keys [1]: [segment#45] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#48] -Results [2]: [segment#47, count#49] +Aggregate Attributes [1]: [count#46] +Results [2]: [segment#45, count#47] -(69) Exchange -Input [2]: [segment#47, count#49] -Arguments: hashpartitioning(segment#47, 5), true, [id=#50] +(65) Exchange +Input [2]: [segment#45, count#47] +Arguments: hashpartitioning(segment#45, 5), ENSURE_REQUIREMENTS, [id=#48] -(70) HashAggregate [codegen id : 20] -Input [2]: [segment#47, count#49] -Keys [1]: [segment#47] +(66) HashAggregate [codegen id : 16] +Input [2]: [segment#45, count#47] +Keys [1]: [segment#45] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#51] -Results [3]: [segment#47, count(1)#51 AS num_customers#52, (segment#47 * 50) AS segment_base#53] +Aggregate Attributes [1]: [count(1)#49] +Results [3]: [segment#45, count(1)#49 AS num_customers#50, (segment#45 * 50) AS segment_base#51] -(71) TakeOrderedAndProject -Input [3]: [segment#47, num_customers#52, segment_base#53] -Arguments: 100, [segment#47 ASC NULLS FIRST, num_customers#52 ASC NULLS FIRST], [segment#47, num_customers#52, segment_base#53] +(67) TakeOrderedAndProject +Input [3]: [segment#45, num_customers#50, segment_base#51] +Arguments: 100, [segment#45 ASC NULLS FIRST, num_customers#50 ASC NULLS FIRST], [segment#45, num_customers#50, segment_base#51] ===== Subqueries ===== -Subquery:1 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#38, [id=#39] -* HashAggregate (78) -+- Exchange (77) - +- * HashAggregate (76) - +- * Project (75) - +- * Filter (74) - +- * ColumnarToRow (73) - +- Scan parquet default.date_dim (72) +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#36, [id=#37] +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * Filter (70) + +- * ColumnarToRow (69) + +- Scan parquet default.date_dim (68) -(72) Scan parquet default.date_dim -Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +(68) Scan parquet default.date_dim +Output [3]: [d_month_seq#35, d_year#21, d_moy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct -(73) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(69) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(74) Filter [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) +(70) Filter [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] +Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12)) -(75) Project [codegen id : 1] -Output [1]: [(d_month_seq#37 + 1) AS (d_month_seq + 1)#54] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(71) Project [codegen id : 1] +Output [1]: [(d_month_seq#35 + 1) AS (d_month_seq + 1)#52] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(76) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 1)#54] -Keys [1]: [(d_month_seq + 1)#54] +(72) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#52] +Keys [1]: [(d_month_seq + 1)#52] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#54] +Results [1]: [(d_month_seq + 1)#52] -(77) Exchange -Input [1]: [(d_month_seq + 1)#54] -Arguments: hashpartitioning((d_month_seq + 1)#54, 5), true, [id=#55] +(73) Exchange +Input [1]: [(d_month_seq + 1)#52] +Arguments: hashpartitioning((d_month_seq + 1)#52, 5), ENSURE_REQUIREMENTS, [id=#53] -(78) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 1)#54] -Keys [1]: [(d_month_seq + 1)#54] +(74) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#52] +Keys [1]: [(d_month_seq + 1)#52] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#54] +Results [1]: [(d_month_seq + 1)#52] -Subquery:2 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#40, [id=#41] -* HashAggregate (85) -+- Exchange (84) - +- * HashAggregate (83) - +- * Project (82) - +- * Filter (81) - +- * ColumnarToRow (80) - +- Scan parquet default.date_dim (79) +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (81) ++- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * Filter (77) + +- * ColumnarToRow (76) + +- Scan parquet default.date_dim (75) -(79) Scan parquet default.date_dim -Output [3]: [d_month_seq#37, d_year#18, d_moy#19] +(75) Scan parquet default.date_dim +Output [3]: [d_month_seq#35, d_year#21, d_moy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct -(80) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(76) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(81) Filter [codegen id : 1] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) +(77) Filter [codegen id : 1] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] +Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12)) -(82) Project [codegen id : 1] -Output [1]: [(d_month_seq#37 + 3) AS (d_month_seq + 3)#56] -Input [3]: [d_month_seq#37, d_year#18, d_moy#19] +(78) Project [codegen id : 1] +Output [1]: [(d_month_seq#35 + 3) AS (d_month_seq + 3)#54] +Input [3]: [d_month_seq#35, d_year#21, d_moy#22] -(83) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 3)#56] -Keys [1]: [(d_month_seq + 3)#56] +(79) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#54] +Keys [1]: [(d_month_seq + 3)#54] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#56] +Results [1]: [(d_month_seq + 3)#54] -(84) Exchange -Input [1]: [(d_month_seq + 3)#56] -Arguments: hashpartitioning((d_month_seq + 3)#56, 5), true, [id=#57] +(80) Exchange +Input [1]: [(d_month_seq + 3)#54] +Arguments: hashpartitioning((d_month_seq + 3)#54, 5), ENSURE_REQUIREMENTS, [id=#55] -(85) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 3)#56] -Keys [1]: [(d_month_seq + 3)#56] +(81) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#54] +Keys [1]: [(d_month_seq + 3)#54] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#56] +Results [1]: [(d_month_seq + 3)#54] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt index cb7130f53c9a9..3b0622cbf9264 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt @@ -1,142 +1,130 @@ TakeOrderedAndProject [segment,num_customers,segment_base] - WholeStageCodegen (20) + WholeStageCodegen (16) HashAggregate [segment,count] [count(1),num_customers,segment_base,count] InputAdapter Exchange [segment] #1 - WholeStageCodegen (19) + WholeStageCodegen (15) HashAggregate [segment] [count,count] HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] - InputAdapter - Exchange [c_customer_sk] #2 - WholeStageCodegen (18) - HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] - Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] - BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ca_county,ca_state] - SortMergeJoin [c_current_addr_sk,ca_address_sk] - InputAdapter - WholeStageCodegen (13) - Sort [c_current_addr_sk] + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + SortMergeJoin [c_customer_sk,ss_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + Project [c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ca_address_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] InputAdapter - Exchange [c_current_addr_sk] #3 - WholeStageCodegen (12) - Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] - SortMergeJoin [c_customer_sk,ss_customer_sk] + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [s_county,s_state] + ColumnarToRow InputAdapter - WholeStageCodegen (9) - Sort [c_customer_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] - Project [c_customer_sk,c_current_addr_sk] - SortMergeJoin [customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (6) - Sort [customer_sk] + Scan parquet default.store [s_county,s_state] + HashAggregate [c_customer_sk,c_current_addr_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + SortMergeJoin [customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (8) + Sort [customer_sk] + InputAdapter + Exchange [customer_sk] #4 + WholeStageCodegen (7) + Project [customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + Project [customer_sk,item_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (3) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] + ColumnarToRow InputAdapter - Exchange [customer_sk] #4 - WholeStageCodegen (5) - Project [customer_sk] - BroadcastHashJoin [sold_date_sk,d_date_sk] - Project [sold_date_sk,customer_sk] - BroadcastHashJoin [item_sk,i_item_sk] - InputAdapter - Union - WholeStageCodegen (1) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - WholeStageCodegen (2) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [i_item_sk] - Filter [i_category,i_class,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (4) - Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - WholeStageCodegen (8) - Sort [c_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + WholeStageCodegen (4) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen (7) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] - InputAdapter - WholeStageCodegen (11) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #8 - WholeStageCodegen (10) - Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] - InputAdapter - WholeStageCodegen (15) - Sort [ca_address_sk] - InputAdapter - Exchange [ca_address_sk] #9 - WholeStageCodegen (14) - Filter [ca_address_sk,ca_county,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen (16) - Filter [s_county,s_state] + Scan parquet default.item [i_item_sk,i_class,i_category] + InputAdapter + WholeStageCodegen (10) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + WholeStageCodegen (14) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #8 + WholeStageCodegen (13) + Project [ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.store [s_county,s_state] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (17) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - Subquery #1 - WholeStageCodegen (2) - HashAggregate [(d_month_seq + 1)] - InputAdapter - Exchange [(d_month_seq + 1)] #12 - WholeStageCodegen (1) - HashAggregate [(d_month_seq + 1)] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - Subquery #2 - WholeStageCodegen (2) - HashAggregate [(d_month_seq + 3)] - InputAdapter - Exchange [(d_month_seq + 3)] #13 - WholeStageCodegen (1) - HashAggregate [(d_month_seq + 3)] - Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (12) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #10 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #11 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt index 432ef4db6b1eb..411cbf4809cd1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt @@ -13,8 +13,8 @@ TakeOrderedAndProject (94) : : +- * HashAggregate (23) : : +- * Project (22) : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) : : : :- Union (9) : : : : :- * Project (4) : : : : : +- * Filter (3) @@ -24,22 +24,22 @@ TakeOrderedAndProject (94) : : : : +- * Filter (7) : : : : +- * ColumnarToRow (6) : : : : +- Scan parquet default.store_returns (5) - : : : +- BroadcastExchange (14) - : : : +- * Project (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) : : +- BroadcastExchange (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.store (17) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.date_dim (16) : :- * HashAggregate (46) : : +- Exchange (45) : : +- * HashAggregate (44) : : +- * Project (43) : : +- * BroadcastHashJoin Inner BuildRight (42) - : : :- * Project (37) - : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (40) + : : : +- * BroadcastHashJoin Inner BuildRight (39) : : : :- Union (34) : : : : :- * Project (29) : : : : : +- * Filter (28) @@ -49,18 +49,18 @@ TakeOrderedAndProject (94) : : : : +- * Filter (32) : : : : +- * ColumnarToRow (31) : : : : +- Scan parquet default.catalog_returns (30) - : : : +- ReusedExchange (35) - : : +- BroadcastExchange (41) - : : +- * Filter (40) - : : +- * ColumnarToRow (39) - : : +- Scan parquet default.catalog_page (38) + : : : +- BroadcastExchange (38) + : : : +- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.catalog_page (35) + : : +- ReusedExchange (41) : +- * HashAggregate (75) : +- Exchange (74) : +- * HashAggregate (73) : +- * Project (72) : +- * BroadcastHashJoin Inner BuildRight (71) - : :- * Project (66) - : : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (69) + : : +- * BroadcastHashJoin Inner BuildRight (68) : : :- Union (63) : : : :- * Project (50) : : : : +- * Filter (49) @@ -78,11 +78,11 @@ TakeOrderedAndProject (94) : : : +- * Filter (58) : : : +- * ColumnarToRow (57) : : : +- Scan parquet default.web_sales (56) - : : +- ReusedExchange (64) - : +- BroadcastExchange (70) - : +- * Filter (69) - : +- * ColumnarToRow (68) - : +- Scan parquet default.web_site (67) + : : +- BroadcastExchange (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet default.web_site (64) + : +- ReusedExchange (70) :- * HashAggregate (84) : +- Exchange (83) : +- * HashAggregate (82) @@ -132,81 +132,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los (9) Union -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_date#22] +(10) Scan parquet default.store +Output [2]: [s_store_sk#21, s_store_id#22] Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct (11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] +Input [2]: [s_store_sk#21, s_store_id#22] (12) Filter [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10442)) AND (d_date#22 <= 10456)) AND isnotnull(d_date_sk#21)) +Input [2]: [s_store_sk#21, s_store_id#22] +Condition : isnotnull(s_store_sk#21) -(13) Project [codegen id : 3] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_date#22] +(13) BroadcastExchange +Input [2]: [s_store_sk#21, s_store_id#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] -(14) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [date_sk#6] -Right keys [1]: [cast(d_date_sk#21 as bigint)] +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#21 as bigint)] Join condition: None -(16) Project [codegen id : 5] -Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] -Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +(15) Project [codegen id : 5] +Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22] -(17) Scan parquet default.store -Output [2]: [s_store_sk#24, s_store_id#25] +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#24, d_date#25] Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] -(18) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] +(18) Filter [codegen id : 4] +Input [2]: [d_date_sk#24, d_date#25] +Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 10442)) AND (d_date#25 <= 10456)) AND isnotnull(d_date_sk#24)) -(19) Filter [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] -Condition : isnotnull(s_store_sk#24) +(19) Project [codegen id : 4] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#25] (20) BroadcastExchange -Input [2]: [s_store_sk#24, s_store_id#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] (21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [store_sk#5] -Right keys [1]: [cast(s_store_sk#24 as bigint)] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#24 as bigint)] Join condition: None (22) Project [codegen id : 5] -Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24] (23) HashAggregate [codegen id : 5] -Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Keys [1]: [s_store_id#25] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22] +Keys [1]: [s_store_id#22] Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] -Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] (24) Exchange -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35] (25) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Keys [1]: [s_store_id#25] +Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#22] Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] -Results [5]: [store channel AS channel#40, concat(store, s_store_id#25) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] +Results [5]: [store channel AS channel#40, concat(store, s_store_id#22) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44] (26) Scan parquet default.catalog_sales Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] @@ -246,44 +246,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, (34) Union -(35) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#50] -Right keys [1]: [d_date_sk#21] -Join condition: None - -(37) Project [codegen id : 11] -Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] -Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] - -(38) Scan parquet default.catalog_page +(35) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct -(39) ColumnarToRow [codegen id : 10] +(36) ColumnarToRow [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -(40) Filter [codegen id : 10] +(37) Filter [codegen id : 9] Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Condition : isnotnull(cp_catalog_page_sk#65) -(41) BroadcastExchange +(38) BroadcastExchange Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] -(42) BroadcastHashJoin [codegen id : 11] +(39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [page_sk#49] Right keys [1]: [cp_catalog_page_sk#65] Join condition: None +(40) Project [codegen id : 11] +Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(41) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#24] +Join condition: None + (43) Project [codegen id : 11] Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24] (44) HashAggregate [codegen id : 11] Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] @@ -376,44 +376,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return (63) Union -(64) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(65) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(66) Project [codegen id : 21] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] - -(67) Scan parquet default.web_site +(64) Scan parquet default.web_site Output [2]: [web_site_sk#111, web_site_id#112] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct -(68) ColumnarToRow [codegen id : 20] +(65) ColumnarToRow [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] -(69) Filter [codegen id : 20] +(66) Filter [codegen id : 19] Input [2]: [web_site_sk#111, web_site_id#112] Condition : isnotnull(web_site_sk#111) -(70) BroadcastExchange +(67) BroadcastExchange Input [2]: [web_site_sk#111, web_site_id#112] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] -(71) BroadcastHashJoin [codegen id : 21] +(68) BroadcastHashJoin [codegen id : 21] Left keys [1]: [wsr_web_site_sk#90] Right keys [1]: [web_site_sk#111] Join condition: None +(69) Project [codegen id : 21] +Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] +Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] + +(70) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#24] + +(71) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#24 as bigint)] +Join condition: None + (72) Project [codegen id : 21] Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112] +Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24] (73) HashAggregate [codegen id : 21] Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt index 233af6d8cc813..8d1794b903178 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt @@ -22,9 +22,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (5) HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] InputAdapter Union WholeStageCodegen (1) @@ -42,18 +42,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter BroadcastExchange #4 WholeStageCodegen (3) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #5 WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] WholeStageCodegen (12) HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter @@ -61,9 +61,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (11) HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] InputAdapter Union WholeStageCodegen (7) @@ -79,14 +79,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] #4 + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [d_date_sk] #5 WholeStageCodegen (22) HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] InputAdapter @@ -94,9 +94,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (21) HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + BroadcastHashJoin [date_sk,d_date_sk] + Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] InputAdapter Union WholeStageCodegen (13) @@ -129,14 +129,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] #4 + BroadcastExchange #11 + WholeStageCodegen (19) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (20) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + ReusedExchange [d_date_sk] #5 WholeStageCodegen (49) HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter From 194edc86a2959f912b4e4d0bb4867b5cb2fd0813 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 6 Jan 2021 23:41:27 -0800 Subject: [PATCH 0993/1009] Revert "[SPARK-34029][SQL][TESTS] Add OrcEncryptionSuite and FakeKeyProvider" This reverts commit 8bb70bf0d646f6d54d17690d23ee935e452e747e. --- project/SparkBuild.scala | 1 - .../datasources/orc/FakeKeyProvider.java | 144 ------------------ ...pache.hadoop.crypto.key.KeyProviderFactory | 16 -- .../datasources/orc/OrcEncryptionSuite.scala | 98 ------------ 4 files changed, 259 deletions(-) delete mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java delete mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f126ee35efcca..668701be0ae98 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -489,7 +489,6 @@ object SparkParallelTestGrouping { "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite", "org.apache.spark.sql.catalyst.expressions.CastSuite", "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite", - "org.apache.spark.sql.execution.datasources.orc.OrcEncryptionSuite", "org.apache.spark.sql.hive.HiveExternalCatalogSuite", "org.apache.spark.sql.hive.StatisticsSuite", "org.apache.spark.sql.hive.client.VersionsSuite", diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java deleted file mode 100644 index c48543802eb33..0000000000000 --- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package test.org.apache.spark.sql.execution.datasources.orc; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.crypto.key.KeyProvider; -import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; -import org.apache.hadoop.crypto.key.KeyProviderFactory; -import org.apache.hadoop.crypto.key.kms.KMSClientProvider; - -/** - * A Hadoop KeyProvider that lets us test the interaction - * with the Hadoop code. - * - * https://github.com/apache/orc/blob/rel/release-1.6.6/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java - * - * This file intentionally keeps the original file except - * (1) package name, (2) import order, (3) a few indentation - */ -public class FakeKeyProvider extends KeyProvider { - // map from key name to metadata - private final Map keyMetdata = new HashMap<>(); - // map from key version name to material - private final Map keyVersions = new HashMap<>(); - - public FakeKeyProvider(Configuration conf) { - super(conf); - } - - @Override - public KeyVersion getKeyVersion(String name) { - return keyVersions.get(name); - } - - @Override - public List getKeys() { - return new ArrayList<>(keyMetdata.keySet()); - } - - @Override - public List getKeyVersions(String name) { - List result = new ArrayList<>(); - Metadata meta = getMetadata(name); - for(int v=0; v < meta.getVersions(); ++v) { - String versionName = buildVersionName(name, v); - KeyVersion material = keyVersions.get(versionName); - if (material != null) { - result.add(material); - } - } - return result; - } - - @Override - public Metadata getMetadata(String name) { - return keyMetdata.get(name); - } - - @Override - public KeyVersion createKey(String name, byte[] bytes, Options options) { - String versionName = buildVersionName(name, 0); - keyMetdata.put(name, new TestMetadata(options.getCipher(), - options.getBitLength(), 1)); - KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes); - keyVersions.put(versionName, result); - return result; - } - - @Override - public void deleteKey(String name) { - throw new UnsupportedOperationException("Can't delete keys"); - } - - @Override - public KeyVersion rollNewVersion(String name, byte[] bytes) { - TestMetadata key = keyMetdata.get(name); - String versionName = buildVersionName(name, key.addVersion()); - KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, - bytes); - keyVersions.put(versionName, result); - return result; - } - - @Override - public void flush() { - // Nothing - } - - static class TestMetadata extends KeyProvider.Metadata { - - TestMetadata(String cipher, int bitLength, int versions) { - super(cipher, bitLength, null, null, null, versions); - } - - public int addVersion() { - return super.addVersion(); - } - } - - public static class Factory extends KeyProviderFactory { - - @Override - public KeyProvider createProvider(URI uri, Configuration conf) throws IOException { - if ("test".equals(uri.getScheme())) { - KeyProvider provider = new FakeKeyProvider(conf); - // populate a couple keys into the provider - byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf}; - org.apache.hadoop.crypto.key.KeyProvider.Options aes128 = new KeyProvider.Options(conf); - provider.createKey("pii", piiKey, aes128); - byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, - 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f}; - provider.rollNewVersion("pii", piiKey2); - byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, - 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f}; - provider.createKey("secret", secretKey, aes128); - return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider); - } - return null; - } - } -} diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory deleted file mode 100644 index f436622b5fb42..0000000000000 --- a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala deleted file mode 100644 index fac3cef5801dd..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import org.apache.spark.sql.Row -import org.apache.spark.sql.test.SharedSparkSession - -class OrcEncryptionSuite extends OrcTest with SharedSparkSession { - import testImplicits._ - - val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun")) - val rowDataWithoutKey = - Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun") - - test("Write and read an encrypted file") { - val df = originalData.toDF("ssn", "email", "name") - - withTempPath { dir => - val path = dir.getAbsolutePath - withSQLConf( - "hadoop.security.key.provider.path" -> "test:///", - "orc.key.provider" -> "hadoop", - "orc.encrypt" -> "pii:ssn,email", - "orc.mask" -> "nullify:ssn;sha256:email") { - df.write.mode("overwrite").orc(path) - checkAnswer(spark.read.orc(path), df) - } - - withSQLConf( - "orc.key.provider" -> "memory", - "orc.encrypt" -> "pii:ssn,email", - "orc.mask" -> "nullify:ssn;sha256:email") { - checkAnswer(spark.read.orc(path), rowDataWithoutKey) - } - } - } - - test("Write and read an encrypted table") { - val df = originalData.toDF("ssn", "email", "name") - - withTempPath { dir => - val path = dir.getAbsolutePath - withTable("encrypted") { - sql( - s""" - |CREATE TABLE encrypted ( - | ssn STRING, - | email STRING, - | name STRING - |) - |USING ORC - |LOCATION "$path" - |OPTIONS ( - | hadoop.security.key.provider.path "test:///", - | orc.key.provider "hadoop", - | orc.encrypt "pii:ssn,email", - | orc.mask "nullify:ssn;sha256:email" - |) - |""".stripMargin) - sql("INSERT INTO encrypted VALUES('123456789', 'dongjoon@apache.org', 'Dongjoon Hyun')") - checkAnswer(sql("SELECT * FROM encrypted"), df) - } - withTable("normal") { - sql( - s""" - |CREATE TABLE normal ( - | ssn STRING, - | email STRING, - | name STRING - |) - |USING ORC - |LOCATION "$path" - |OPTIONS ( - | orc.key.provider "memory", - | orc.encrypt "pii:ssn,email", - | orc.mask "nullify:ssn;sha256:email" - |) - |""".stripMargin) - checkAnswer(sql("SELECT * FROM normal"), rowDataWithoutKey) - } - } - } -} From d36cdd55419c104134f88930206bedccdbe4f3c0 Mon Sep 17 00:00:00 2001 From: Yu Zhong Date: Thu, 7 Jan 2021 08:59:26 +0000 Subject: [PATCH 0994/1009] [SPARK-33933][SQL] Materialize BroadcastQueryStage first to avoid broadcast timeout in AQE ### What changes were proposed in this pull request? In AdaptiveSparkPlanExec.getFinalPhysicalPlan, when newStages are generated, sort the new stages by class type to make sure BroadcastQueryState precede others. It can make sure the broadcast job are submitted before map jobs to avoid waiting for job schedule and cause broadcast timeout. ### Why are the changes needed? When enable AQE, in getFinalPhysicalPlan, spark traversal the physical plan bottom up and create query stage for materialized part by createQueryStages and materialize those new created query stages to submit map stages or broadcasting. When ShuffleQueryStage are materializing before BroadcastQueryStage, the map job and broadcast job are submitted almost at the same time, but map job will hold all the computing resources. If the map job runs slow (when lots of data needs to process and the resource is limited), the broadcast job cannot be started(and finished) before spark.sql.broadcastTimeout, thus cause whole job failed (introduced in SPARK-31475). The workaround to increase spark.sql.broadcastTimeout doesn't make sense and graceful, because the data to broadcast is very small. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? 1. Add UT 2. Test the code using dev environment in https://issues.apache.org/jira/browse/SPARK-33933 Closes #30998 from zhongyu09/aqe-broadcast. Authored-by: Yu Zhong Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveSparkPlanExec.scala | 11 ++++++++- .../adaptive/AdaptiveQueryExecSuite.scala | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 89d3b53510469..aa09f21af19b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -189,8 +189,17 @@ case class AdaptiveSparkPlanExec( stagesToReplace = result.newStages ++ stagesToReplace executionId.foreach(onUpdatePlan(_, result.newStages.map(_.plan))) + // SPARK-33933: we should submit tasks of broadcast stages first, to avoid waiting + // for tasks to be scheduled and leading to broadcast timeout. + val reorderedNewStages = result.newStages + .sortWith { + case (_: BroadcastQueryStageExec, _: BroadcastQueryStageExec) => false + case (_: BroadcastQueryStageExec, _) => true + case _ => false + } + // Start materialization of all new stages and fail fast if any stages failed eagerly - result.newStages.foreach { stage => + reorderedNewStages.foreach { stage => try { stage.materialize().onComplete { res => if (res.isSuccess) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 69f1565c2f8de..75993d49da677 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -1431,4 +1431,28 @@ class AdaptiveQueryExecSuite } } } + + test("SPARK-33933: AQE broadcast should not timeout with slow map tasks") { + val broadcastTimeoutInSec = 1 + val df = spark.sparkContext.parallelize(Range(0, 100), 100) + .flatMap(x => { + Thread.sleep(20) + for (i <- Range(0, 100)) yield (x % 26, x % 10) + }).toDF("index", "pv") + val dim = Range(0, 26).map(x => (x, ('a' + x).toChar.toString)) + .toDF("index", "name") + val testDf = df.groupBy("index") + .agg(sum($"pv").alias("pv")) + .join(dim, Seq("index")) + withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> broadcastTimeoutInSec.toString, + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + val startTime = System.currentTimeMillis() + val result = testDf.collect() + val queryTime = System.currentTimeMillis() - startTime + assert(result.length == 26) + // make sure the execution time is large enough + assert(queryTime > (broadcastTimeoutInSec + 1) * 1000) + } + } + } From 7b06acc28b5c37da6c48bc44c3d921309d4ad3a8 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 20:49:37 +0900 Subject: [PATCH 0995/1009] [SPARK-33100][SQL][FOLLOWUP] Find correct bound of bracketed comment in spark-sql ### What changes were proposed in this pull request? This PR help find correct bound of bracketed comment in spark-sql. Here is the log for UT of SPARK-33100 in CliSuite before: ``` 2021-01-05 13:22:34.768 - stdout> spark-sql> /* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.523 - stderr> Time taken: 6.716 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.599 - stdout> test 2021-01-05 13:22:41.6 - stdout> spark-sql> ;;/* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.709 - stdout> test 2021-01-05 13:22:41.709 - stdout> spark-sql> /* SELECT 'test';*/;; SELECT 'test'; 2021-01-05 13:22:41.902 - stdout> spark-sql> SELECT 'test'; -- SELECT 'test'; 2021-01-05 13:22:41.902 - stderr> Time taken: 0.129 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.902 - stderr> Error in query: 2021-01-05 13:22:41.902 - stderr> mismatched input '' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 19) 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> == SQL == 2021-01-05 13:22:42.006 - stderr> /* SELECT 'test';*/ 2021-01-05 13:22:42.006 - stderr> -------------------^^^ 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> Time taken: 0.226 seconds, Fetched 1 row(s) 2021-01-05 13:22:42.006 - stdout> test ``` The root cause is that the insideBracketedComment is not accurate. For `/* comment */`, the last character `/` is not insideBracketedComment and it would be treat as beginning of statements. In this PR, this issue is fixed. ### Why are the changes needed? To fix the issue described above. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #31054 from turboFei/SPARK-33100-followup. Authored-by: fwang12 Signed-off-by: Takeshi Yamamuro --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++------ .../sql/hive/thriftserver/CliSuite.scala | 4 ++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 9155eacfa4896..8606aaab1cae2 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -530,15 +530,24 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 - var includingStatement = false + var leavingBracketedComment = false + var isStatement = false val ret = new JArrayList[String] def insideBracketedComment: Boolean = bracketedCommentLevel > 0 def insideComment: Boolean = insideSimpleComment || insideBracketedComment - def statementBegin(index: Int): Boolean = includingStatement || (!insideComment && + def statementInProgress(index: Int): Boolean = isStatement || (!insideComment && index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) for (index <- 0 until line.length) { + // Checks if we need to decrement a bracketed comment level; the last character '/' of + // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true + // in the previous loop and we decrement the level here if needed. + if (leavingBracketedComment) { + bracketedCommentLevel -= 1 + leavingBracketedComment = false + } + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped // See the comment above about SPARK-31595 @@ -568,12 +577,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { - if (includingStatement) { + if (isStatement) { // split, do not include ; itself ret.add(line.substring(beginIndex, index)) } beginIndex = index + 1 - includingStatement = false + isStatement = false } } else if (line.charAt(index) == '\n') { // with a new line the inline simple comment should end. @@ -585,7 +594,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { - bracketedCommentLevel -= 1 + // Decrements `bracketedCommentLevel` at the beginning of the next loop + leavingBracketedComment = true } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 } @@ -597,9 +607,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } - includingStatement = statementBegin(index) + isStatement = statementInProgress(index) } - if (includingStatement) { + if (isStatement) { ret.add(line.substring(beginIndex)) } ret diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6708cf99e7f41..1a96012a0b4e9 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -577,8 +577,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "/* SELECT 'test';*/ SELECT 'test';" -> "test", ";;/* SELECT 'test';*/ SELECT 'test';" -> "test", "/* SELECT 'test';*/;; SELECT 'test';" -> "test", - "SELECT 'test'; -- SELECT 'test';" -> "", - "SELECT 'test'; /* SELECT 'test';*/;" -> "", + "SELECT 'test'; -- SELECT 'test';" -> "test", + "SELECT 'test'; /* SELECT 'test';*/;" -> "test", "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test", "/*\nmulti-line\n*/ SELECT 'test';" -> "test", "/*/* multi-level bracketed*/ SELECT 'test';" -> "test" From aa388cf3d0ff230eb0397876fe2db03bbe51658e Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 8 Jan 2021 09:28:31 +0900 Subject: [PATCH 0996/1009] [SPARK-34041][PYTHON][DOCS] Miscellaneous cleanup for new PySpark documentation ### What changes were proposed in this pull request? This PR proposes to: - Add a link of quick start in PySpark docs into "Programming Guides" in Spark main docs - `ML` / `MLlib` -> `MLlib (DataFrame-based)` / `MLlib (RDD-based)` in API reference page - Mention other user guides as well because the guide such as [ML](http://spark.apache.org/docs/latest/ml-guide.html) and [SQL](http://spark.apache.org/docs/latest/sql-programming-guide.html). - Mention other migration guides as well because PySpark can get affected by it. ### Why are the changes needed? For better documentation. ### Does this PR introduce _any_ user-facing change? It fixes user-facing docs. However, it's not released out yet. ### How was this patch tested? Manually tested by running: ```bash cd docs SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll serve --watch ``` Closes #31082 from HyukjinKwon/SPARK-34041. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- docs/_layouts/global.html | 1 + docs/index.md | 2 ++ python/docs/source/getting_started/index.rst | 3 +++ python/docs/source/migration_guide/index.rst | 12 ++++++++++-- python/docs/source/reference/pyspark.ml.rst | 12 ++++++------ python/docs/source/reference/pyspark.mllib.rst | 4 ++-- python/docs/source/user_guide/index.rst | 12 ++++++++++++ 7 files changed, 36 insertions(+), 10 deletions(-) diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index de98f29acf3b7..f10d46763cf76 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -84,6 +84,7 @@ MLlib (Machine Learning) GraphX (Graph Processing) SparkR (R on Spark) + PySpark (Python on Spark) diff --git a/docs/index.md b/docs/index.md index 8fd169e63f608..c4c2d722f975e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -113,6 +113,8 @@ options for deployment: * [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API) * [MLlib](ml-guide.html): applying machine learning algorithms * [GraphX](graphx-programming-guide.html): processing graphs +* [SparkR](sparkr.html): processing data with Spark in R +* [PySpark](api/python/getting_started/index.html): processing data with Spark in Python **API Docs:** diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst index 9fa3352ae27d8..38b9c935fc623 100644 --- a/python/docs/source/getting_started/index.rst +++ b/python/docs/source/getting_started/index.rst @@ -21,6 +21,9 @@ Getting Started =============== This page summarizes the basic steps required to setup and get started with PySpark. +There are more guides shared with other languages such as +`Quick Start `_ in Programming Guides +at `the Spark documentation `_. .. toctree:: :maxdepth: 2 diff --git a/python/docs/source/migration_guide/index.rst b/python/docs/source/migration_guide/index.rst index 41e36b16b3989..88e768dc464df 100644 --- a/python/docs/source/migration_guide/index.rst +++ b/python/docs/source/migration_guide/index.rst @@ -21,8 +21,6 @@ Migration Guide =============== This page describes the migration guide specific to PySpark. -Many items of other migration guides can also be applied when migrating PySpark to higher versions because PySpark internally shares other components. -Please also refer other migration guides such as `Migration Guide: SQL, Datasets and DataFrame `_. .. toctree:: :maxdepth: 2 @@ -33,3 +31,13 @@ Please also refer other migration guides such as `Migration Guide: SQL, Datasets pyspark_2.2_to_2.3 pyspark_1.4_to_1.5 pyspark_1.0_1.2_to_1.3 + + +Many items of other migration guides can also be applied when migrating PySpark to higher versions because PySpark internally shares other components. +Please also refer other migration guides: + +- `Migration Guide: Spark Core `_ +- `Migration Guide: SQL, Datasets and DataFrame `_ +- `Migration Guide: Structured Streaming `_ +- `Migration Guide: MLlib (Machine Learning) `_ + diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst index 2de0ff65a3ae8..cc904597d24c4 100644 --- a/python/docs/source/reference/pyspark.ml.rst +++ b/python/docs/source/reference/pyspark.ml.rst @@ -16,11 +16,11 @@ under the License. -ML -== +MLlib (DataFrame-based) +======================= -ML Pipeline APIs ----------------- +Pipeline APIs +------------- .. currentmodule:: pyspark.ml @@ -188,8 +188,8 @@ Clustering PowerIterationClustering -ML Functions ----------------------------- +Functions +--------- .. currentmodule:: pyspark.ml.functions diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst index df5ea017d0fbf..12fc4798dd8de 100644 --- a/python/docs/source/reference/pyspark.mllib.rst +++ b/python/docs/source/reference/pyspark.mllib.rst @@ -16,8 +16,8 @@ under the License. -MLlib -===== +MLlib (RDD-based) +================= Classification -------------- diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst index 3e535ce16b22e..704156b11d985 100644 --- a/python/docs/source/user_guide/index.rst +++ b/python/docs/source/user_guide/index.rst @@ -20,9 +20,21 @@ User Guide ========== +This page is the guide for PySpark users which contains PySpark specific topics. + .. toctree:: :maxdepth: 2 arrow_pandas python_packaging + +There are more guides shared with other languages in Programming Guides +at `the Spark documentation `_. + +- `RDD Programming Guide `_ +- `Spark SQL, DataFrames and Datasets Guide `_ +- `Structured Streaming Programming Guide `_ +- `Spark Streaming Programming Guide `_ +- `Machine Learning Library (MLlib) Guide `_ + From 5b16d70d6a51720660e7607c859fae4f28691952 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 8 Jan 2021 09:34:40 +0900 Subject: [PATCH 0997/1009] [SPARK-34044][DOCS] Add spark.sql.hive.metastore.jars.path to sql-data-sources-hive-tables.md ### What changes were proposed in this pull request? This PR adds new configuration to `sql-data-sources-hive-tables`. ### Why are the changes needed? SPARK-32852 added a new configuration, `spark.sql.hive.metastore.jars.path`. ### Does this PR introduce _any_ user-facing change? Yes, but a document only. ### How was this patch tested? **BEFORE** ![Screen Shot 2021-01-07 at 2 57 57 PM](https://user-images.githubusercontent.com/9700541/103954318-cc9ec200-50f8-11eb-86d3-cd89b07fcd21.png) **AFTER** ![Screen Shot 2021-01-07 at 2 56 34 PM](https://user-images.githubusercontent.com/9700541/103954221-9d885080-50f8-11eb-8938-fb91394a33cb.png) Closes #31085 from dongjoon-hyun/SPARK-34044. Authored-by: Dongjoon Hyun Signed-off-by: HyukjinKwon --- docs/sql-data-sources-hive-tables.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index ae3572c474e1a..376c2042d4227 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -139,7 +139,7 @@ The following options can be used to configure the version of Hive that is used builtin Location of the jars that should be used to instantiate the HiveMetastoreClient. This - property can be one of three options: + property can be one of four options:
      1. builtin
      2. Use Hive 2.3.7, which is bundled with the Spark assembly when -Phive is @@ -148,6 +148,9 @@ The following options can be used to configure the version of Hive that is used
      3. maven
      4. Use Hive jars of specified version downloaded from Maven repositories. This configuration is not generally recommended for production deployments. +
      5. path
      6. + Use Hive jars configured by spark.sql.hive.metastore.jars.path + in comma separated format. Support both local or remote paths.
      7. A classpath in the standard format for the JVM. This classpath must include all of Hive and its dependencies, including the correct version of Hadoop. These jars only need to be present on the driver, but if you are running in yarn cluster mode then you must ensure @@ -156,6 +159,28 @@ The following options can be used to configure the version of Hive that is used 1.4.0 + + spark.sql.hive.metastore.jars.path + (empty) + + Comma-separated paths of the jars that used to instantiate the HiveMetastoreClient. + This configuration is useful only when spark.sql.hive.metastore.jars is set as path. +
        + The paths can be any of the following format: +
          +
        1. file://path/to/jar/foo.jar
        2. +
        3. hdfs://nameservice/path/to/jar/foo.jar
        4. +
        5. /path/to/jar/(path without URI scheme follow conf fs.defaultFS's URI schema)
        6. +
        7. [http/https/ftp]://path/to/jar/foo.jar
        8. +
        + Note that 1, 2, and 3 support wildcard. For example: +
          +
        1. file://path/to/jar/*,file://path2/to/jar/*/*.jar
        2. +
        3. hdfs://nameservice/path/to/jar/*,hdfs://nameservice2/path/to/jar/*/*.jar
        4. +
        + + 3.1.0 + spark.sql.hive.metastore.sharedPrefixes com.mysql.jdbc,
        org.postgresql,
        com.microsoft.sqlserver,
        oracle.jdbc
        From 8e11ce5378a2cf69ec87501e86f7ed5963649cbf Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 7 Jan 2021 16:47:37 -0800 Subject: [PATCH 0998/1009] [SPARK-34018][K8S] NPE in ExecutorPodsSnapshot ### What changes were proposed in this pull request? Label both the statuses and ensure the ExecutorPodSnapshot starts with the default config to match. ### Why are the changes needed? The current test depends on the order rather than testing the desired property. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Labeled the containers statuses, observed failures, added the default label as the initialization point, tests passed again. Built Spark, ran on K8s cluster verified no NPE in driver log. Closes #31071 from holdenk/SPARK-34018-finishedExecutorWithRunningSidecar-doesnt-correctly-constructt-the-test-case. Authored-by: Holden Karau Signed-off-by: Dongjoon Hyun --- .../cluster/k8s/ExecutorPodsSnapshot.scala | 27 +++++++++---------- .../k8s/ExecutorLifecycleTestUtils.scala | 3 +++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala index 37aaca7e8ceeb..cb4d8810e5c38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala @@ -41,7 +41,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP object ExecutorPodsSnapshot extends Logging { private var shouldCheckAllContainers: Boolean = _ - private var sparkContainerName: String = _ + private var sparkContainerName: String = DEFAULT_EXECUTOR_CONTAINER_NAME def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = { ExecutorPodsSnapshot(toStatesByExecutorId(executorPods)) @@ -80,24 +80,21 @@ object ExecutorPodsSnapshot extends Logging { .anyMatch(t => t != null && t.getExitCode != 0)) { PodFailed(pod) } else { - // Otherwise look for the Spark container - val sparkContainerStatusOpt = pod.getStatus.getContainerStatuses.asScala - .find(_.getName() == sparkContainerName) - sparkContainerStatusOpt match { - case Some(sparkContainerStatus) => - sparkContainerStatus.getState.getTerminated match { - case t if t.getExitCode != 0 => - PodFailed(pod) - case t if t.getExitCode == 0 => + // Otherwise look for the Spark container and get the exit code if present. + val sparkContainerExitCode = pod.getStatus.getContainerStatuses.asScala + .find(_.getName() == sparkContainerName).flatMap(x => Option(x.getState)) + .flatMap(x => Option(x.getTerminated)).flatMap(x => Option(x.getExitCode)) + .map(_.toInt) + sparkContainerExitCode match { + case Some(t) => + t match { + case 0 => PodSucceeded(pod) case _ => - PodRunning(pod) + PodFailed(pod) } - // If we can't find the Spark container status, fall back to the pod status. This is - // expected to occur during pod startup and other situations. + // No exit code means we are running. case _ => - logDebug(s"Unable to find container ${sparkContainerName} in pod ${pod} " + - "defaulting to entire pod status (running).") PodRunning(pod) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala index 225278c2aad71..41cba573d89c2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala @@ -115,13 +115,16 @@ object ExecutorLifecycleTestUtils { .editOrNewStatus() .withPhase("running") .addNewContainerStatus() + .withName(DEFAULT_EXECUTOR_CONTAINER_NAME) .withNewState() .withNewTerminated() + .withMessage("message") .withExitCode(exitCode) .endTerminated() .endState() .endContainerStatus() .addNewContainerStatus() + .withName("SIDECARFRIEND") .withNewState() .withNewRunning() .endRunning() From 9b54da490d55d8c12e0a6b2b4b6e3a2d5b6bed86 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 7 Jan 2021 18:55:27 -0800 Subject: [PATCH 0999/1009] [SPARK-33818][SQL][DOC] Add descriptions about `spark.sql.parser.quotedRegexColumnNames` in the SQL documents ### What changes were proposed in this pull request? According to https://github.com/apache/spark/pull/30805#issuecomment-747179899, doc `spark.sql.parser.quotedRegexColumnNames` since we need user know about this in doc and it's useful. ![image](https://user-images.githubusercontent.com/46485123/103656543-afa4aa80-4fa3-11eb-8cd3-a9d1b87a3489.png) ![image](https://user-images.githubusercontent.com/46485123/103656551-b2070480-4fa3-11eb-9ce7-95cc424242a6.png) ### Why are the changes needed? Complete doc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Not need Closes #30816 from AngersZhuuuu/SPARK-33818. Authored-by: angerszhu Signed-off-by: Dongjoon Hyun --- docs/sql-ref-syntax-qry-select.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md index bac7c2bc6a06d..5820a5c9060e4 100644 --- a/docs/sql-ref-syntax-qry-select.md +++ b/docs/sql-ref-syntax-qry-select.md @@ -41,7 +41,7 @@ select_statement [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select_stat While `select_statement` is defined as ```sql -SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] } +SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ named_expression | regex_column_names ] [ , ... ] } FROM { from_item [ , ... ] } [ PIVOT clause ] [ LATERAL VIEW clause ] [ ... ] @@ -151,6 +151,18 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] } Specifies aliases for one or more source window specifications. The source window specifications can be referenced in the widow definitions in the query. + +* **regex_column_names** + + When `spark.sql.parser.quotedRegexColumnNames` is true, quoted identifiers (using backticks) in `SELECT` + statement are interpreted as regular expressions and `SELECT` statement can take regex-based column specification. + For example, below SQL will only take column `c`: + + ```sql + SELECT `(a|b)?+.+` FROM ( + SELECT 1 as a, 2 as b, 3 as c + ) + ``` ### Related Statements From 0de7f2ff1ebb9b3339ecf30074a0d7ffc1ff6325 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 7 Jan 2021 21:13:22 -0800 Subject: [PATCH 1000/1009] [SPARK-34039][SQL] ReplaceTable should invalidate cache ### What changes were proposed in this pull request? This changes `ReplaceTableExec`/`AtomicReplaceTableExec`, and uncaches the target table before it is dropped. In addition, this includes some refactoring by moving the `uncacheTable` method to `DataSourceV2Strategy` so that we don't need to pass a Spark session to the v2 exec. ### Why are the changes needed? Similar to SPARK-33492 (#30429). When a table is refreshed, the associated cache should be invalidated to avoid potential incorrect results. ### Does this PR introduce _any_ user-facing change? Yes. Now When a data source v2 is cached (either directly or indirectly), all the relevant caches will be refreshed or invalidated if the table is replaced. ### How was this patch tested? Added a new unit test. Closes #31081 from sunchao/SPARK-34039. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../datasources/v2/DataSourceV2Strategy.scala | 21 ++++++++++++------ .../datasources/v2/ReplaceTableExec.scala | 14 +++++++++--- .../v2/WriteToDataSourceV2Exec.scala | 22 +++++-------------- .../sql/connector/DataSourceV2SQLSuite.scala | 17 ++++++++++++++ 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index fa9519bf3233c..028a2fc690be1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression, Na import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.toPrettySQL -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.errors.QueryCompilationErrors @@ -81,6 +81,11 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } } + private def invalidateCache(catalog: TableCatalog, table: Table, ident: Identifier): Unit = { + val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) + session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true) + } + override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation @ DataSourceV2ScanRelation(_, V1ScanWrapper(scan, translated, pushed), output)) => @@ -164,10 +169,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat catalog match { case staging: StagingTableCatalog => AtomicReplaceTableExec( - staging, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil + staging, ident, schema, parts, propsWithOwner, orCreate = orCreate, + invalidateCache) :: Nil case _ => ReplaceTableExec( - catalog, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil + catalog, ident, schema, parts, propsWithOwner, orCreate = orCreate, + invalidateCache) :: Nil } case ReplaceTableAsSelect(catalog, ident, parts, query, props, options, orCreate) => @@ -176,7 +183,6 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat catalog match { case staging: StagingTableCatalog => AtomicReplaceTableAsSelectExec( - session, staging, ident, parts, @@ -184,10 +190,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat planLater(query), propsWithOwner, writeOptions, - orCreate = orCreate) :: Nil + orCreate = orCreate, + invalidateCache) :: Nil case _ => ReplaceTableAsSelectExec( - session, catalog, ident, parts, @@ -195,7 +201,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat planLater(query), propsWithOwner, writeOptions, - orCreate = orCreate) :: Nil + orCreate = orCreate, + invalidateCache) :: Nil } case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), query, writeOptions, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala index 1f3bcf2e3fe57..10c09f4be711f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils @@ -33,10 +33,13 @@ case class ReplaceTableExec( tableSchema: StructType, partitioning: Seq[Transform], tableProperties: Map[String, String], - orCreate: Boolean) extends V2CommandExec { + orCreate: Boolean, + invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { if (catalog.tableExists(ident)) { + val table = catalog.loadTable(ident) + invalidateCache(catalog, table, ident) catalog.dropTable(ident) } else if (!orCreate) { throw new CannotReplaceMissingTableException(ident) @@ -54,9 +57,14 @@ case class AtomicReplaceTableExec( tableSchema: StructType, partitioning: Seq[Transform], tableProperties: Map[String, String], - orCreate: Boolean) extends V2CommandExec { + orCreate: Boolean, + invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends V2CommandExec { override protected def run(): Seq[InternalRow] = { + if (catalog.tableExists(identifier)) { + val table = catalog.loadTable(identifier) + invalidateCache(catalog, table, identifier) + } val staged = if (orCreate) { catalog.stageCreateOrReplace( identifier, tableSchema, partitioning.toArray, tableProperties.asJava) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index fea8bd25f5a21..5fa091ea4e05c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -26,7 +26,6 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.expressions.Attribute @@ -130,7 +129,6 @@ case class AtomicCreateTableAsSelectExec( * ReplaceTableAsSelectStagingExec. */ case class ReplaceTableAsSelectExec( - session: SparkSession, catalog: TableCatalog, ident: Identifier, partitioning: Seq[Transform], @@ -138,7 +136,8 @@ case class ReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends TableWriteExecHelper { + orCreate: Boolean, + invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { // Note that this operation is potentially unsafe, but these are the strict semantics of @@ -151,7 +150,7 @@ case class ReplaceTableAsSelectExec( // 3. The table returned by catalog.createTable doesn't support writing. if (catalog.tableExists(ident)) { val table = catalog.loadTable(ident) - uncacheTable(session, catalog, table, ident) + invalidateCache(catalog, table, ident) catalog.dropTable(ident) } else if (!orCreate) { throw new CannotReplaceMissingTableException(ident) @@ -176,7 +175,6 @@ case class ReplaceTableAsSelectExec( * is left untouched. */ case class AtomicReplaceTableAsSelectExec( - session: SparkSession, catalog: StagingTableCatalog, ident: Identifier, partitioning: Seq[Transform], @@ -184,13 +182,14 @@ case class AtomicReplaceTableAsSelectExec( query: SparkPlan, properties: Map[String, String], writeOptions: CaseInsensitiveStringMap, - orCreate: Boolean) extends TableWriteExecHelper { + orCreate: Boolean, + invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends TableWriteExecHelper { override protected def run(): Seq[InternalRow] = { val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable if (catalog.tableExists(ident)) { val table = catalog.loadTable(ident) - uncacheTable(session, catalog, table, ident) + invalidateCache(catalog, table, ident) } val staged = if (orCreate) { catalog.stageCreateOrReplace( @@ -364,15 +363,6 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode { Nil } - - protected def uncacheTable( - session: SparkSession, - catalog: TableCatalog, - table: Table, - ident: Identifier): Unit = { - val plan = DataSourceV2Relation.create(table, Some(catalog), Some(ident)) - session.sharedState.cacheManager.uncacheQuery(session, plan, cascade = true) - } } object DataWritingSparkTask extends Logging { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 5c67ad9cdfe2e..0a6bd795cd0ae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -752,6 +752,23 @@ class DataSourceV2SQLSuite assert(t2.v1Table.provider == Some(conf.defaultDataSourceName)) } + test("SPARK-34039: ReplaceTable (atomic or non-atomic) should invalidate cache") { + Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t => + val view = "view" + withTable(t) { + withTempView(view) { + sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source") + sql(s"CACHE TABLE $view AS SELECT id FROM $t") + checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source")) + checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id")) + + sql(s"REPLACE TABLE $t (a bigint) USING foo") + assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty) + } + } + } + } + test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") { Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t => val view = "view" From cc201545626ffe556682f45edc370ac6fe29e9df Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 7 Jan 2021 21:24:15 -0800 Subject: [PATCH 1001/1009] [SPARK-34005][CORE] Update peak memory metrics for each Executor on task end ### What changes were proposed in this pull request? This PR makes `AppStatusListener` update the peak memory metrics for each Executor on task end like other peak memory metrics (e.g, stage, executors in a stage). ### Why are the changes needed? When `AppStatusListener#onExecutorMetricsUpdate` is called, peak memory metrics for Executors, stages and executors in a stage are updated but currently, the metrics only for Executors are not updated on task end. ### Does this PR introduce _any_ user-facing change? Yes. Executor peak memory metrics is updated more accurately. ### How was this patch tested? After I run a job with `local-cluster[1,1,1024]` and visited `/api/v1//executors`, I confirmed `peakExecutorMemory` metrics is shown for an Executor even though the life time of each job is very short . I also modify the json files for `HistoryServerSuite`. Closes #31029 from sarutak/update-executor-metrics-on-taskend. Authored-by: Kousuke Saruta Signed-off-by: Dongjoon Hyun --- .../spark/status/AppStatusListener.scala | 1 + .../executor_list_json_expectation.json | 22 +++++ .../executor_memory_usage_expectation.json | 88 +++++++++++++++++++ ...tor_node_excludeOnFailure_expectation.json | 88 +++++++++++++++++++ ...ludeOnFailure_unexcluding_expectation.json | 88 +++++++++++++++++++ 5 files changed, 287 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 6cb013b1a7c16..52d41cdd72664 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -759,6 +759,7 @@ private[spark] class AppStatusListener( exec.completedTasks += completedDelta exec.failedTasks += failedDelta exec.totalDuration += event.taskInfo.duration + exec.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics) // Note: For resubmitted tasks, we continue to use the metrics that belong to the // first attempt of this task. This may not be 100% accurate because the first attempt diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json index c18a2e31dff3c..be125075874a2 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json @@ -21,6 +21,28 @@ "addTime" : "2015-02-03T16:43:00.906GMT", "executorLogs" : { }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json index 51449340efe9f..0a3eb81140cdb 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json @@ -64,6 +64,28 @@ "totalOffHeapStorageMemory" : 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -101,6 +123,28 @@ "totalOffHeapStorageMemory" : 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -138,6 +182,28 @@ "totalOffHeapStorageMemory": 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -175,6 +241,28 @@ "totalOffHeapStorageMemory" : 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json index 47a01b2596de9..8869fb4e296e6 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json @@ -64,6 +64,28 @@ "totalOffHeapStorageMemory" : 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -101,6 +123,28 @@ "totalOffHeapStorageMemory" : 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -138,6 +182,28 @@ "totalOffHeapStorageMemory": 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -175,6 +241,28 @@ "totalOffHeapStorageMemory": 524288000 }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json index 46e8f81d0e245..21cc9d0812990 100644 --- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json @@ -52,6 +52,28 @@ "stderr" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stderr" }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -83,6 +105,28 @@ "stderr" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stderr" }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -114,6 +158,28 @@ "stderr" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stderr" }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, @@ -145,6 +211,28 @@ "stderr" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stderr" }, "blacklistedInStages" : [ ], + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0 + }, "attributes" : { }, "resources" : { }, "resourceProfileId" : 0, From b95a847ce1686dd1e1c6555afe2436caec6130e6 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 8 Jan 2021 07:52:39 +0000 Subject: [PATCH 1002/1009] [SPARK-34046][SQL][TESTS] Use join hint for constructing joins in JoinSuite and WholeStageCodegenSuite ### What changes were proposed in this pull request? There are some existing test cases that constructing various joins by tuning the SQL configuration AUTO_BROADCASTJOIN_THRESHOLD, PREFER_SORTMERGEJOIN,SHUFFLE_PARTITIONS, etc. This can be tricky and not straight-forward. In the future development we might have to tweak the configurations again . This PR is to construct specific joins by using join hint in test cases. ### Why are the changes needed? Make test cases for join simpler and more robust. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #31087 from gengliangwang/joinhintInTest. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/JoinSuite.scala | 123 +++++++----------- .../execution/WholeStageCodegenSuite.scala | 41 +++--- 2 files changed, 66 insertions(+), 98 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 1bdfdb5ab9c54..2e336b264cd3a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -1106,20 +1106,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan } test("SPARK-32330: Preserve shuffled hash join build side partitioning") { - withSQLConf( - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", - SQLConf.SHUFFLE_PARTITIONS.key -> "2", - SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { - val df1 = spark.range(10).select($"id".as("k1")) - val df2 = spark.range(30).select($"id".as("k2")) - Seq("inner", "cross").foreach(joinType => { - val plan = df1.join(df2, $"k1" === $"k2", joinType).groupBy($"k1").count() - .queryExecution.executedPlan - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - // No extra shuffle before aggregate - assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2) - }) - } + val df1 = spark.range(10).select($"id".as("k1")) + val df2 = spark.range(30).select($"id".as("k2")) + Seq("inner", "cross").foreach(joinType => { + val plan = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType) + .groupBy($"k1").count() + .queryExecution.executedPlan + assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) + // No extra shuffle before aggregate + assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2) + }) } test("SPARK-32383: Preserve hash join (BHJ and SHJ) stream side ordering") { @@ -1129,40 +1125,30 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan val df4 = spark.range(100).select($"id".as("k4")) // Test broadcast hash join - withSQLConf( - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") { - Seq("inner", "left_outer").foreach(joinType => { - val plan = df1.join(df2, $"k1" === $"k2", joinType) - .join(df3, $"k1" === $"k3", joinType) - .join(df4, $"k1" === $"k4", joinType) - .queryExecution - .executedPlan - assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2) - assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1) - // No extra sort before last sort merge join - assert(collect(plan) { case _: SortExec => true }.size === 3) - }) - } + Seq("inner", "left_outer").foreach(joinType => { + val plan = df1.join(df2.hint("SHUFFLE_MERGE"), $"k1" === $"k2", joinType) + .join(df3.hint("BROADCAST"), $"k1" === $"k3", joinType) + .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType) + .queryExecution + .executedPlan + assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2) + assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1) + // No extra sort before last sort merge join + assert(collect(plan) { case _: SortExec => true }.size === 3) + }) // Test shuffled hash join - withSQLConf( - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", - SQLConf.SHUFFLE_PARTITIONS.key -> "2", - SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { - val df3 = spark.range(10).select($"id".as("k3")) - - Seq("inner", "left_outer").foreach(joinType => { - val plan = df1.join(df2, $"k1" === $"k2", joinType) - .join(df3, $"k1" === $"k3", joinType) - .join(df4, $"k1" === $"k4", joinType) - .queryExecution - .executedPlan - assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2) - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - // No extra sort before last sort merge join - assert(collect(plan) { case _: SortExec => true }.size === 3) - }) - } + Seq("inner", "left_outer").foreach(joinType => { + val plan = df1.join(df2.hint("SHUFFLE_MERGE"), $"k1" === $"k2", joinType) + .join(df3.hint("SHUFFLE_HASH"), $"k1" === $"k3", joinType) + .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType) + .queryExecution + .executedPlan + assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2) + assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) + // No extra sort before last sort merge join + assert(collect(plan) { case _: SortExec => true }.size === 3) + }) } test("SPARK-32290: SingleColumn Null Aware Anti Join Optimize") { @@ -1250,24 +1236,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan $"k1" === $"k4" && $"k2" === $"k5" && $"k3" === $"k6") ) inputDFs.foreach { case (df1, df2, joinExprs) => - withSQLConf( - // Set broadcast join threshold and number of shuffle partitions, - // as shuffled hash join depends on these two configs. - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80", - SQLConf.SHUFFLE_PARTITIONS.key -> "2") { - val smjDF = df1.join(df2, joinExprs, "full") - assert(collect(smjDF.queryExecution.executedPlan) { - case _: SortMergeJoinExec => true }.size === 1) - val smjResult = smjDF.collect() - - withSQLConf(SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { - val shjDF = df1.join(df2, joinExprs, "full") - assert(collect(shjDF.queryExecution.executedPlan) { - case _: ShuffledHashJoinExec => true }.size === 1) - // Same result between shuffled hash join and sort merge join - checkAnswer(shjDF, smjResult) - } - } + val smjDF = df1.join(df2.hint("SHUFFLE_MERGE"), joinExprs, "full") + assert(collect(smjDF.queryExecution.executedPlan) { + case _: SortMergeJoinExec => true }.size === 1) + val smjResult = smjDF.collect() + + val shjDF = df1.join(df2.hint("SHUFFLE_HASH"), joinExprs, "full") + assert(collect(shjDF.queryExecution.executedPlan) { + case _: ShuffledHashJoinExec => true }.size === 1) + // Same result between shuffled hash join and sort merge join + checkAnswer(shjDF, smjResult) } } @@ -1284,10 +1262,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan ) inputDFs.foreach { case (df1, df2, joinType) => // Test broadcast hash join - withSQLConf( - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200", - SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { - val bhjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType) + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + val bhjCodegenDF = df1.join(df2.hint("BROADCAST"), $"k1" === $"k2", joinType) assert(bhjCodegenDF.queryExecution.executedPlan.collect { case WholeStageCodegenExec(_ : BroadcastHashJoinExec) => true case WholeStageCodegenExec(ProjectExec(_, _ : BroadcastHashJoinExec)) => true @@ -1303,13 +1279,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan } // Test shuffled hash join - withSQLConf(SQLConf.PREFER_SORTMERGEJOIN.key -> "false", - // Set broadcast join threshold and number of shuffle partitions, - // as shuffled hash join depends on these two configs. - SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", - SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", - SQLConf.SHUFFLE_PARTITIONS.key -> "2") { - val shjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType) + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + val shjCodegenDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType) assert(shjCodegenDF.queryExecution.executedPlan.collect { case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true case WholeStageCodegenExec(ProjectExec(_, _ : ShuffledHashJoinExec)) => true @@ -1317,7 +1288,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan checkAnswer(shjCodegenDF, Seq.empty) withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { - val shjNonCodegenDF = df1.join(df2, $"k1" === $"k2", joinType) + val shjNonCodegenDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType) assert(shjNonCodegenDF.queryExecution.executedPlan.collect { case _: ShuffledHashJoinExec => true }.size === 1) checkAnswer(shjNonCodegenDF, Seq.empty) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index eb5643df4c752..71eaed269e6c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -71,28 +71,25 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession } test("ShuffledHashJoin should be included in WholeStageCodegen") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "30", - SQLConf.SHUFFLE_PARTITIONS.key -> "2", - SQLConf.PREFER_SORTMERGEJOIN.key -> "false") { - val df1 = spark.range(5).select($"id".as("k1")) - val df2 = spark.range(15).select($"id".as("k2")) - val df3 = spark.range(6).select($"id".as("k3")) - - // test one shuffled hash join - val oneJoinDF = df1.join(df2, $"k1" === $"k2") - assert(oneJoinDF.queryExecution.executedPlan.collect { - case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true - }.size === 1) - checkAnswer(oneJoinDF, Seq(Row(0, 0), Row(1, 1), Row(2, 2), Row(3, 3), Row(4, 4))) - - // test two shuffled hash joins - val twoJoinsDF = df1.join(df2, $"k1" === $"k2").join(df3, $"k1" === $"k3") - assert(twoJoinsDF.queryExecution.executedPlan.collect { - case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true - }.size === 2) - checkAnswer(twoJoinsDF, - Seq(Row(0, 0, 0), Row(1, 1, 1), Row(2, 2, 2), Row(3, 3, 3), Row(4, 4, 4))) - } + val df1 = spark.range(5).select($"id".as("k1")) + val df2 = spark.range(15).select($"id".as("k2")) + val df3 = spark.range(6).select($"id".as("k3")) + + // test one shuffled hash join + val oneJoinDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2") + assert(oneJoinDF.queryExecution.executedPlan.collect { + case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true + }.size === 1) + checkAnswer(oneJoinDF, Seq(Row(0, 0), Row(1, 1), Row(2, 2), Row(3, 3), Row(4, 4))) + + // test two shuffled hash joins + val twoJoinsDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2") + .join(df3.hint("SHUFFLE_HASH"), $"k1" === $"k3") + assert(twoJoinsDF.queryExecution.executedPlan.collect { + case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true + }.size === 2) + checkAnswer(twoJoinsDF, + Seq(Row(0, 0, 0), Row(1, 1, 1), Row(2, 2, 2), Row(3, 3, 3), Row(4, 4, 4))) } test("Sort should be included in WholeStageCodegen") { From 0f8e5dd445b03161a27893ba714db57919d8bcab Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 8 Jan 2021 09:05:22 +0000 Subject: [PATCH 1003/1009] [SPARK-34003][SQL] Fix Rule conflicts between PaddingAndLengthCheckForCharVarchar and ResolveAggregateFunctions ### What changes were proposed in this pull request? ResolveAggregateFunctions is a hacky rule and it calls `executeSameContext` to generate a `resolved agg` to determine which unresolved sort attribute should be pushed into the agg. However, after we add the PaddingAndLengthCheckForCharVarchar rule which will rewrite the query output, thus, the `resolved agg` cannot match original attributes anymore. It causes some dissociative sort attribute to be pushed in and fails the query ``` logtalk [info] Failed to analyze query: org.apache.spark.sql.AnalysisException: expression 'testcat.t1.`v`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 [info] [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 ``` ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #31027 from yaooqinn/SPARK-34003. Authored-by: Kent Yao Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/Analyzer.scala | 14 ++++++++++---- .../apache/spark/sql/CharVarcharTestSuite.scala | 8 ++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 883ff46148ca6..bf5dbb8200e87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2400,16 +2400,22 @@ class Analyzer(override val catalogManager: CatalogManager) // to push down this ordering expression and can reference the original aggregate // expression instead. val needsPushDown = ArrayBuffer.empty[NamedExpression] - val evaluatedOrderings = resolvedAliasedOrdering.zip(unresolvedSortOrders).map { - case (evaluated, order) => + val orderToAlias = unresolvedSortOrders.zip(aliasedOrdering) + val evaluatedOrderings = resolvedAliasedOrdering.zip(orderToAlias).map { + case (evaluated, (order, aliasOrder)) => val index = originalAggExprs.indexWhere { case Alias(child, _) => child semanticEquals evaluated.child case other => other semanticEquals evaluated.child } if (index == -1) { - needsPushDown += evaluated - order.copy(child = evaluated.toAttribute) + if (CharVarcharUtils.getRawType(evaluated.metadata).nonEmpty) { + needsPushDown += aliasOrder + order.copy(child = aliasOrder) + } else { + needsPushDown += evaluated + order.copy(child = evaluated.toAttribute) + } } else { order.copy(child = originalAggExprs(index).toAttribute) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index d20cee0815d4d..fb35d6cf8dacb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -466,6 +466,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { Row("c")) } } + + test("SPARK-34003: fix char/varchar fails w/ both group by and order by ") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), i INT) USING $format") + sql("INSERT INTO t VALUES ('c', 1)") + checkAnswer(sql("SELECT v, sum(i) FROM t GROUP BY v ORDER BY v"), Row("c", 1)) + } + } } // Some basic char/varchar tests which doesn't rely on table implementation. From 71d261ab8fb3e7fb22d2687b8e038129ca766a65 Mon Sep 17 00:00:00 2001 From: Gabor Somogyi Date: Fri, 8 Jan 2021 20:04:56 +0900 Subject: [PATCH 1004/1009] [SPARK-34032][SS] Add truststore and keystore type config possibility for Kafka delegation token ### What changes were proposed in this pull request? Kafka delegation token is obtained with `AdminClient` where security settings can be set. Keystore and trustrore type however can't be set. In this PR I've added these new configurations. This can be useful when the type is different. A good example is to make Spark FIPS compliant where the default JKS is not accepted. ### Why are the changes needed? Missing configurations. ### Does this PR introduce _any_ user-facing change? Yes, adding 2 additional config parameters. ### How was this patch tested? Existing + modified unit tests + simple Kafka to Kafka app on cluster. Closes #31070 from gaborgsomogyi/SPARK-34032. Authored-by: Gabor Somogyi Signed-off-by: Jungtaek Lim (HeartSaVioR) --- docs/structured-streaming-kafka-integration.md | 17 +++++++++++++++++ .../spark/kafka010/KafkaTokenSparkConf.scala | 6 ++++++ .../apache/spark/kafka010/KafkaTokenUtil.scala | 6 ++++++ .../kafka010/KafkaDelegationTokenTest.scala | 4 ++++ .../kafka010/KafkaTokenSparkConfSuite.scala | 10 ++++++++++ .../spark/kafka010/KafkaTokenUtilSuite.scala | 6 ++++++ 6 files changed, 49 insertions(+) diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md index 5336695478c14..bf25d46f2e7e0 100644 --- a/docs/structured-streaming-kafka-integration.md +++ b/docs/structured-streaming-kafka-integration.md @@ -1004,6 +1004,14 @@ Delegation tokens can be obtained from multiple clusters and ${cluster} 3.0.0 + + spark.kafka.clusters.${cluster}.ssl.truststore.type + None + + The file format of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token. + + 3.2.0 + spark.kafka.clusters.${cluster}.ssl.truststore.location None @@ -1021,6 +1029,15 @@ Delegation tokens can be obtained from multiple clusters and ${cluster} 3.0.0 + + spark.kafka.clusters.${cluster}.ssl.keystore.type + None + + The file format of the key store file. This is optional for client. + For further details please see Kafka documentation. Only used to obtain delegation token. + + 3.2.0 + spark.kafka.clusters.${cluster}.ssl.keystore.location None diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala index ed4a6f1e34c55..21ba7b21ed9d6 100644 --- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala +++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala @@ -31,8 +31,10 @@ private[spark] case class KafkaTokenClusterConf( targetServersRegex: String, securityProtocol: String, kerberosServiceName: String, + trustStoreType: Option[String], trustStoreLocation: Option[String], trustStorePassword: Option[String], + keyStoreType: Option[String], keyStoreLocation: Option[String], keyStorePassword: Option[String], keyPassword: Option[String], @@ -44,8 +46,10 @@ private[spark] case class KafkaTokenClusterConf( s"targetServersRegex=$targetServersRegex, " + s"securityProtocol=$securityProtocol, " + s"kerberosServiceName=$kerberosServiceName, " + + s"trustStoreType=$trustStoreType, " + s"trustStoreLocation=$trustStoreLocation, " + s"trustStorePassword=${trustStorePassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " + + s"keyStoreType=$keyStoreType, " + s"keyStoreLocation=$keyStoreLocation, " + s"keyStorePassword=${keyStorePassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " + s"keyPassword=${keyPassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " + @@ -77,8 +81,10 @@ private [kafka010] object KafkaTokenSparkConf extends Logging { DEFAULT_SECURITY_PROTOCOL_CONFIG), sparkClusterConf.getOrElse(SaslConfigs.SASL_KERBEROS_SERVICE_NAME, KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME), + sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG), sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG), sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG), + sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG), sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG), sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG), sparkClusterConf.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG), diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala index f3f6b4de6f79c..a182d3c30858e 100644 --- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala +++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala @@ -162,6 +162,9 @@ private[spark] object KafkaTokenUtil extends Logging { private def setTrustStoreProperties( clusterConf: KafkaTokenClusterConf, properties: ju.Properties): Unit = { + clusterConf.trustStoreType.foreach { truststoreType => + properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, truststoreType) + } clusterConf.trustStoreLocation.foreach { truststoreLocation => properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation) } @@ -173,6 +176,9 @@ private[spark] object KafkaTokenUtil extends Logging { private def setKeyStoreProperties( clusterConf: KafkaTokenClusterConf, properties: ju.Properties): Unit = { + clusterConf.keyStoreType.foreach { keystoreType => + properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, keystoreType) + } clusterConf.keyStoreLocation.foreach { keystoreLocation => properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keystoreLocation) } diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala index 19335f4221e40..8271acdc7dfb6 100644 --- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala +++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala @@ -51,8 +51,10 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach { protected val bootStrapServers = "127.0.0.1:0" protected val matchingTargetServersRegex = "127.0.0.*:0" protected val nonMatchingTargetServersRegex = "127.0.intentionally_non_matching.*:0" + protected val trustStoreType = "customTrustStoreType" protected val trustStoreLocation = "/path/to/trustStore" protected val trustStorePassword = "trustStoreSecret" + protected val keyStoreType = "customKeyStoreType" protected val keyStoreLocation = "/path/to/keyStore" protected val keyStorePassword = "keyStoreSecret" protected val keyPassword = "keySecret" @@ -124,8 +126,10 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach { KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX, securityProtocol, KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME, + Some(trustStoreType), Some(trustStoreLocation), Some(trustStorePassword), + Some(keyStoreType), Some(keyStoreLocation), Some(keyStorePassword), Some(keyPassword), diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala index 61184a6fac33d..17caf96818e47 100644 --- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala +++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala @@ -29,8 +29,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach { private val targetServersRegex = "127.0.0.*:0" private val securityProtocol = SSL.name private val kerberosServiceName = "kafka1" + private val trustStoreType = "customTrustStoreType" private val trustStoreLocation = "/path/to/trustStore" private val trustStorePassword = "trustStoreSecret" + private val keyStoreType = "customKeyStoreType" private val keyStoreLocation = "/path/to/keyStore" private val keyStorePassword = "keyStoreSecret" private val keyPassword = "keySecret" @@ -60,8 +62,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach { assert(clusterConfig.securityProtocol === SASL_SSL.name) assert(clusterConfig.kerberosServiceName === KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME) + assert(clusterConfig.trustStoreType === None) assert(clusterConfig.trustStoreLocation === None) assert(clusterConfig.trustStorePassword === None) + assert(clusterConfig.keyStoreType === None) assert(clusterConfig.keyStoreLocation === None) assert(clusterConfig.keyStorePassword === None) assert(clusterConfig.keyPassword === None) @@ -75,8 +79,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach { sparkConf.set(s"spark.kafka.clusters.$identifier1.security.protocol", securityProtocol) sparkConf.set(s"spark.kafka.clusters.$identifier1.sasl.kerberos.service.name", kerberosServiceName) + sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.type", trustStoreType) sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.location", trustStoreLocation) sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.password", trustStorePassword) + sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.type", keyStoreType) sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.location", keyStoreLocation) sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.password", keyStorePassword) sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.key.password", keyPassword) @@ -88,8 +94,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach { assert(clusterConfig.targetServersRegex === targetServersRegex) assert(clusterConfig.securityProtocol === securityProtocol) assert(clusterConfig.kerberosServiceName === kerberosServiceName) + assert(clusterConfig.trustStoreType === Some(trustStoreType)) assert(clusterConfig.trustStoreLocation === Some(trustStoreLocation)) assert(clusterConfig.trustStorePassword === Some(trustStorePassword)) + assert(clusterConfig.keyStoreType === Some(keyStoreType)) assert(clusterConfig.keyStoreLocation === Some(keyStoreLocation)) assert(clusterConfig.keyStorePassword === Some(keyStorePassword)) assert(clusterConfig.keyPassword === Some(keyPassword)) @@ -127,8 +135,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach { assert(clusterConfig.securityProtocol === SASL_SSL.name) assert(clusterConfig.kerberosServiceName === KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME) + assert(clusterConfig.trustStoreType === None) assert(clusterConfig.trustStoreLocation === None) assert(clusterConfig.trustStorePassword === None) + assert(clusterConfig.keyStoreType === None) assert(clusterConfig.keyStoreLocation === None) assert(clusterConfig.keyStorePassword === None) assert(clusterConfig.keyPassword === None) diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala index 94f7853003bd9..ca34e14f2c261 100644 --- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala +++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala @@ -64,8 +64,10 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest { === bootStrapServers) assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) === SASL_PLAINTEXT.name) + assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)) + assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG)) @@ -80,10 +82,12 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest { === bootStrapServers) assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) === SASL_SSL.name) + assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG) === trustStoreType) assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG) === trustStoreLocation) assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG) === trustStorePassword) + assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG)) assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG)) @@ -99,10 +103,12 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest { === bootStrapServers) assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) === SSL.name) + assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG) === trustStoreType) assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG) === trustStoreLocation) assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG) === trustStorePassword) + assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG) === keyStoreType) assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG) === keyStoreLocation) assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG) === keyStorePassword) assert(adminClientProperties.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG) === keyPassword) From 157b72ac9fa0057d5fd6d7ed52a6c4b22ebd1dfc Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 8 Jan 2021 14:14:27 +0000 Subject: [PATCH 1005/1009] [SPARK-33591][SQL] Recognize `null` in partition spec values ### What changes were proposed in this pull request? 1. Recognize `null` while parsing partition specs, and put `null` instead of `"null"` as partition values. 2. For V1 catalog: replace `null` by `__HIVE_DEFAULT_PARTITION__`. 3. For V2 catalogs: pass `null` AS IS, and let catalog implementations to decide how to handle `null`s as partition values in spec. ### Why are the changes needed? Currently, `null` in partition specs is recognized as the `"null"` string which could lead to incorrect results, for example: ```sql spark-sql> CREATE TABLE tbl5 (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1); spark-sql> INSERT INTO TABLE tbl5 PARTITION (p1 = null) SELECT 0; spark-sql> SELECT isnull(p1) FROM tbl5; false ``` Even we inserted a row to the partition with the `null` value, **the resulted table doesn't contain `null`**. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the example above works as expected: ```sql spark-sql> SELECT isnull(p1) FROM tbl5; true ``` ### How was this patch tested? 1. By running the affected test suites `SQLQuerySuite`, `AlterTablePartitionV2SQLSuite` and `v1/ShowPartitionsSuite`. 2. Compiling by Scala 2.13: ``` $ ./dev/change-scala-version.sh 2.13 $ ./build/sbt -Pscala-2.13 compile ``` Closes #30538 from MaxGekk/partition-spec-value-null. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../catalog/ExternalCatalogUtils.scala | 10 ++++++ .../catalyst/catalog/InMemoryCatalog.scala | 7 +++- .../sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 1 + .../sql/execution/datasources/rules.scala | 3 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 9 +++++ .../AlterTableDropPartitionSuiteBase.scala | 11 +++++++ .../v1/AlterTableDropPartitionSuite.scala | 1 + .../command/v1/ShowPartitionsSuite.scala | 12 +++++++ .../v2/AlterTableDropPartitionSuite.scala | 2 +- .../spark/sql/hive/HiveExternalCatalog.scala | 33 ++++++++++--------- .../hive/execution/InsertIntoHiveTable.scala | 2 ++ 12 files changed, 74 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala index 00445a1614257..9d6e0a6d6ce66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala @@ -161,6 +161,10 @@ object ExternalCatalogUtils { } } + private def isNullPartitionValue(value: String): Boolean = { + value == null || value == DEFAULT_PARTITION_NAME + } + /** * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a * partial partition spec w.r.t. PARTITION (a=1,b=2). @@ -169,9 +173,15 @@ object ExternalCatalogUtils { spec1: TablePartitionSpec, spec2: TablePartitionSpec): Boolean = { spec1.forall { + case (partitionColumn, value) if isNullPartitionValue(value) => + isNullPartitionValue(spec2(partitionColumn)) case (partitionColumn, value) => spec2(partitionColumn) == value } } + + def convertNullPartitionValues(spec: TablePartitionSpec): TablePartitionSpec = { + spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).toMap + } } object CatalogUtils { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 64b4a112fe786..0d16f46d049a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -541,7 +541,12 @@ class InMemoryCatalog( listPartitions(db, table, partialSpec).map { partition => partitionColumnNames.map { name => - escapePathName(name) + "=" + escapePathName(partition.spec(name)) + val partValue = if (partition.spec(name) == null) { + DEFAULT_PARTITION_NAME + } else { + escapePathName(partition.spec(name)) + } + escapePathName(name) + "=" + partValue }.mkString("/") }.sorted } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 76358ef116cec..0428d12b7ced8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1178,7 +1178,7 @@ class SessionCatalog( */ private def requireNonEmptyValueInPartitionSpec(specs: Seq[TablePartitionSpec]): Unit = { specs.foreach { s => - if (s.values.exists(_.isEmpty)) { + if (s.values.exists(v => v != null && v.isEmpty)) { val spec = s.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") throw QueryCompilationErrors.invalidPartitionSpecError( s"The spec ($spec) contains an empty partition column value") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 395a9563cdc0a..4d028f6ce3569 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -511,6 +511,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg */ protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) { ctx match { + case _: NullLiteralContext => null case s: StringLiteralContext => createString(s) case o => o.getText } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index b9866e415c9b1..4fd6684b3b921 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -406,7 +406,8 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] { catalogTable.get.tracksPartitionsInCatalog if (partitionsTrackedByCatalog && normalizedPartSpec.nonEmpty) { // empty partition column value - if (normalizedPartSpec.filter(_._2.isDefined).exists(_._2.get.isEmpty)) { + if (normalizedPartSpec.map(_._2) + .filter(_.isDefined).map(_.get).exists(v => v != null && v.isEmpty)) { val spec = normalizedPartSpec.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") throw new AnalysisException( s"Partition spec is invalid. The spec ($spec) contains an empty partition column value") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 3f55a88f19505..7526bf0e6fbe9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3854,6 +3854,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark assert(unions.size == 1) } + + test("SPARK-33591: null as a partition value") { + val t = "part_table" + withTable(t) { + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1)") + sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0") + checkAnswer(sql(s"SELECT * FROM $t"), Row(0, null)) + } + } } case class Foo(bar: Option[String]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala index aadcda490b82b..942a3e8635698 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala @@ -39,6 +39,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil override val command = "ALTER TABLE .. DROP PARTITION" protected def notFullPartitionSpecErr: String + protected def nullPartitionValue: String protected def checkDropPartition( t: String, @@ -170,4 +171,14 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1))) } } + + test("SPARK-33591: null as a partition value") { + withNamespaceAndTable("ns", "tbl") { t => + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + sql(s"ALTER TABLE $t ADD PARTITION (p1 = null)") + checkPartitions(t, Map("p1" -> nullPartitionValue)) + sql(s"ALTER TABLE $t DROP PARTITION (p1 = null)") + checkPartitions(t) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala index a6490ebdb950c..509c0be28c26a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.execution.command */ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase { override protected val notFullPartitionSpecErr = "The following partitions not found in table" + override protected def nullPartitionValue: String = "__HIVE_DEFAULT_PARTITION__" test("purge partition data") { withNamespaceAndTable("ns", "tbl") { t => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index e85d62c51ef45..a26e29706e147 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -69,6 +69,18 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { assert(errMsg.contains("'SHOW PARTITIONS' expects a table")) } } + + test("SPARK-33591: null as a partition value") { + val t = "part_table" + withTable(t) { + sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)") + sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0") + checkAnswer(sql(s"SHOW PARTITIONS $t"), Row("p1=__HIVE_DEFAULT_PARTITION__")) + checkAnswer( + sql(s"SHOW PARTITIONS $t PARTITION (p1 = null)"), + Row("p1=__HIVE_DEFAULT_PARTITION__")) + } + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala index d6890d6faef70..3515fa3390206 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala @@ -27,8 +27,8 @@ import org.apache.spark.sql.execution.command class AlterTableDropPartitionSuite extends command.AlterTableDropPartitionSuiteBase with CommandSuiteBase { - override protected val notFullPartitionSpecErr = "Partition spec is invalid" + override protected def nullPartitionValue: String = "null" test("SPARK-33650: drop partition into a table which doesn't support partition management") { withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index b4aa073893df8..eeffe4f25d4c6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -942,9 +942,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // Hive metastore is not case preserving and the partition columns are always lower cased. We need // to lower case the column names in partition specification before calling partition related Hive // APIs, to match this behaviour. - private def lowerCasePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = { + private def toMetaStorePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = { // scalastyle:off caselocale - spec.map { case (k, v) => k.toLowerCase -> v } + val lowNames = spec.map { case (k, v) => k.toLowerCase -> v } + ExternalCatalogUtils.convertNullPartitionValues(lowNames) // scalastyle:on caselocale } @@ -993,8 +994,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat } p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toUri))) } - val lowerCasedParts = partsWithLocation.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec))) - client.createPartitions(db, table, lowerCasedParts, ignoreIfExists) + val metaStoreParts = partsWithLocation + .map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec))) + client.createPartitions(db, table, metaStoreParts, ignoreIfExists) } override def dropPartitions( @@ -1006,7 +1008,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat retainData: Boolean): Unit = withClient { requireTableExists(db, table) client.dropPartitions( - db, table, parts.map(lowerCasePartitionSpec), ignoreIfNotExists, purge, retainData) + db, table, parts.map(toMetaStorePartitionSpec), ignoreIfNotExists, purge, retainData) } override def renamePartitions( @@ -1015,7 +1017,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat specs: Seq[TablePartitionSpec], newSpecs: Seq[TablePartitionSpec]): Unit = withClient { client.renamePartitions( - db, table, specs.map(lowerCasePartitionSpec), newSpecs.map(lowerCasePartitionSpec)) + db, table, specs.map(toMetaStorePartitionSpec), newSpecs.map(toMetaStorePartitionSpec)) val tableMeta = getTable(db, table) val partitionColumnNames = tableMeta.partitionColumnNames @@ -1031,7 +1033,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val fs = tablePath.getFileSystem(hadoopConf) val newParts = newSpecs.map { spec => val rightPath = renamePartitionDirectory(fs, tablePath, partitionColumnNames, spec) - val partition = client.getPartition(db, table, lowerCasePartitionSpec(spec)) + val partition = client.getPartition(db, table, toMetaStorePartitionSpec(spec)) partition.copy(storage = partition.storage.copy(locationUri = Some(rightPath.toUri))) } alterPartitions(db, table, newParts) @@ -1141,12 +1143,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat db: String, table: String, newParts: Seq[CatalogTablePartition]): Unit = withClient { - val lowerCasedParts = newParts.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec))) + val metaStoreParts = newParts.map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec))) val rawTable = getRawTable(db, table) // convert partition statistics to properties so that we can persist them through hive api - val withStatsProps = lowerCasedParts.map { p => + val withStatsProps = metaStoreParts.map { p => if (p.stats.isDefined) { val statsProperties = statsToProperties(p.stats.get) p.copy(parameters = p.parameters ++ statsProperties) @@ -1162,7 +1164,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat db: String, table: String, spec: TablePartitionSpec): CatalogTablePartition = withClient { - val part = client.getPartition(db, table, lowerCasePartitionSpec(spec)) + val part = client.getPartition(db, table, toMetaStorePartitionSpec(spec)) restorePartitionMetadata(part, getTable(db, table)) } @@ -1200,7 +1202,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat db: String, table: String, spec: TablePartitionSpec): Option[CatalogTablePartition] = withClient { - client.getPartitionOption(db, table, lowerCasePartitionSpec(spec)).map { part => + client.getPartitionOption(db, table, toMetaStorePartitionSpec(spec)).map { part => restorePartitionMetadata(part, getTable(db, table)) } } @@ -1215,7 +1217,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val catalogTable = getTable(db, table) val partColNameMap = buildLowerCasePartColNameMap(catalogTable).mapValues(escapePathName) val clientPartitionNames = - client.getPartitionNames(catalogTable, partialSpec.map(lowerCasePartitionSpec)) + client.getPartitionNames(catalogTable, partialSpec.map(toMetaStorePartitionSpec)) clientPartitionNames.map { partitionPath => val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partitionPath) partSpec.map { case (partName, partValue) => @@ -1234,11 +1236,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat table: String, partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient { val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table)) - val res = client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part => - part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) + val metaStoreSpec = partialSpec.map(toMetaStorePartitionSpec) + val res = client.getPartitions(db, table, metaStoreSpec) + .map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) } - partialSpec match { + metaStoreSpec match { // This might be a bug of Hive: When the partition value inside the partial partition spec // contains dot, and we ask Hive to list partitions w.r.t. the partial partition spec, Hive // treats dot as matching any single character and may return more partitions than we diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 63e46880376e1..bfb24cfedb55a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -133,6 +133,7 @@ case class InsertIntoHiveTable( val numDynamicPartitions = partition.values.count(_.isEmpty) val numStaticPartitions = partition.values.count(_.nonEmpty) val partitionSpec = partition.map { + case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME case (key, Some(value)) => key -> value case (key, None) => key -> "" } @@ -229,6 +230,7 @@ case class InsertIntoHiveTable( val caseInsensitiveDpMap = CaseInsensitiveMap(dpMap) val updatedPartitionSpec = partition.map { + case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME case (key, Some(value)) => key -> value case (key, None) if caseInsensitiveDpMap.contains(key) => key -> caseInsensitiveDpMap(key) From 023eba2ad72f5119350c6c797808dadcfd1eaa19 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 8 Jan 2021 09:43:11 -0600 Subject: [PATCH 1006/1009] [SPARK-33796][DOCS][FOLLOWUP] Tweak the width of left-menu of Spark SQL Guide ### What changes were proposed in this pull request? This PR tweaks the width of left-menu of Spark SQL Guide. When I view the Spark SQL Guide with browsers on macOS, the title `Spark SQL Guide` looks prettily. But I often use Pop!_OS, an Ubuntu variant, and the title is overlapped with browsers on it. ![spark-sql-guide-layout-before](https://user-images.githubusercontent.com/4736016/104002743-d56cc200-51e4-11eb-9e3a-28abcd46e0bf.png) After this change, the title is no longer overlapped. ![spark-sql-guide-layout-after](https://user-images.githubusercontent.com/4736016/104002847-f9c89e80-51e4-11eb-85c0-01d69cee46b7.png) ### Why are the changes needed? For the pretty layout. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Built the document with `cd docs && SKIP_API=1 jekyll build` and confirmed the layout. Closes #31091 from sarutak/modify-layout-sparksql-guide. Authored-by: Kousuke Saruta Signed-off-by: Sean Owen --- docs/css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/css/main.css b/docs/css/main.css index 309ad7b3bdf0b..6710b6e8563c9 100755 --- a/docs/css/main.css +++ b/docs/css/main.css @@ -326,7 +326,7 @@ a.anchorjs-link:hover { text-decoration: none; } border-left-width: 0px; border-bottom-width: 0px; margin-top: 0px; - width: 210px; + width: 220px; height: 80%; float: left; position: fixed; From 0781ed4f5b7f692656651f9bb51f823c82e24e2d Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 8 Jan 2021 09:44:33 -0600 Subject: [PATCH 1007/1009] [MINOR][SQL][TESTS] Fix the incorrect unicode escape test in ParserUtilsSuite ### What changes were proposed in this pull request? This PR fixes an incorrect unicode literal test in `ParserUtilsSuite`. In that suite, string literals in queries have unicode escape characters like `\u7328` but the backslash should be escaped because the queriy strings are given as Java strings. ### Why are the changes needed? Correct the test. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Run `ParserUtilsSuite` and it passed. Closes #31088 from sarutak/fix-incorrect-unicode-test. Authored-by: Kousuke Saruta Signed-off-by: Sean Owen --- .../apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala index a4d1b5d5e6f29..5e7adaa7163fe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala @@ -94,11 +94,11 @@ class ParserUtilsSuite extends SparkFunSuite { assert(unescapeSQLString(""""\256"""") == "256") // String including a '\u0000' style literal characters (\u732B is a cat in Kanji). - assert(unescapeSQLString("\"How cute \u732B are\"") == "How cute \u732B are") + assert(unescapeSQLString("\"How cute \\u732B are\"") == "How cute \u732B are") // String including a surrogate pair character // (\uD867\uDE3D is Okhotsk atka mackerel in Kanji). - assert(unescapeSQLString("\"\uD867\uDE3D is a fish\"") == "\uD867\uDE3D is a fish") + assert(unescapeSQLString("\"\\uD867\\uDE3D is a fish\"") == "\uD867\uDE3D is a fish") // scalastyle:on nonascii } From d00f0695b7513046e42e47f35b280d7aa494de5b Mon Sep 17 00:00:00 2001 From: Chandni Singh Date: Fri, 8 Jan 2021 12:21:56 -0600 Subject: [PATCH 1008/1009] [SPARK-32917][SHUFFLE][CORE] Adds support for executors to push shuffle blocks after successful map task completion ### What changes were proposed in this pull request? This is the shuffle writer side change where executors can push data to remote shuffle services. This is needed for push-based shuffle - SPIP [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602). Summary of changes: - This adds support for executors to push shuffle blocks after map tasks complete writing shuffle data. - This also introduces a timeout specifically for creating connection to remote shuffle services. ### Why are the changes needed? - These changes are needed for push-based shuffle. Refer to the SPIP in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602). - The main reason to create a separate connection creation timeout is because the existing `connectionTimeoutMs` is overloaded and is used for connection creation timeouts as well as connection idle timeout. The connection creation timeout should be much lower than the idle timeouts. The default for `connectionTimeoutMs` is 120s. This is quite high for just establishing the connections. If a shuffle server node is bad then the connection creation will fail within few seconds. However, an overloaded shuffle server may take much longer to respond to a request and the channel can stay idle for a much longer time which is expected. Another reason is that with push-based shuffle, an executor may be fetching shuffle data and pushing shuffle data (next stage) simultaneously. Both these tasks will share the same connections with the shuffle service. If there is a bad shuffle server node and the connection creation timeout is very high then both these tasks end up waiting a long time time eventually impacting the performance. ### Does this PR introduce _any_ user-facing change? Yes. This PR introduces client-side configs for push-based shuffle. If push-based shuffle is turned-off then the users will not see any change. ### How was this patch tested? Added unit tests. The reference PR with the consolidated changes covering the complete implementation is also provided in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602). We have already verified the functionality and the improved performance as documented in the SPIP doc. Lead-authored-by: Min Shen mshenlinkedin.com Co-authored-by: Chandni Singh chsinghlinkedin.com Co-authored-by: Ye Zhou yezhoulinkedin.com Closes #30312 from otterc/SPARK-32917. Lead-authored-by: Chandni Singh Co-authored-by: Chandni Singh Co-authored-by: Min Shen Co-authored-by: Ye Zhou Signed-off-by: Mridul Muralidharan gmail.com> --- .../client/TransportClientFactory.java | 7 +- .../spark/network/util/TransportConf.java | 13 +- .../sort/BypassMergeSortShuffleWriter.java | 5 +- .../shuffle/sort/UnsafeShuffleWriter.java | 7 +- .../org/apache/spark/executor/Executor.scala | 3 +- .../spark/internal/config/package.scala | 29 ++ .../spark/shuffle/ShuffleBlockPusher.scala | 450 ++++++++++++++++++ .../spark/shuffle/ShuffleWriteProcessor.scala | 19 +- .../apache/spark/shuffle/ShuffleWriter.scala | 3 + .../shuffle/sort/SortShuffleWriter.scala | 6 +- .../org/apache/spark/storage/BlockId.scala | 11 +- .../shuffle/ShuffleBlockPusherSuite.scala | 355 ++++++++++++++ 12 files changed, 896 insertions(+), 12 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala create mode 100644 core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java index 24c436a504fa8..43408d43e577e 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java @@ -254,7 +254,7 @@ TransportClient createClient(InetSocketAddress address) // Disable Nagle's Algorithm since we don't want packets to wait .option(ChannelOption.TCP_NODELAY, true) .option(ChannelOption.SO_KEEPALIVE, true) - .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionTimeoutMs()) + .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionCreationTimeoutMs()) .option(ChannelOption.ALLOCATOR, pooledAllocator); if (conf.receiveBuf() > 0) { @@ -280,9 +280,10 @@ public void initChannel(SocketChannel ch) { // Connect to the remote server long preConnect = System.nanoTime(); ChannelFuture cf = bootstrap.connect(address); - if (!cf.await(conf.connectionTimeoutMs())) { + if (!cf.await(conf.connectionCreationTimeoutMs())) { throw new IOException( - String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs())); + String.format("Connecting to %s timed out (%s ms)", + address, conf.connectionCreationTimeoutMs())); } else if (cf.cause() != null) { throw new IOException(String.format("Failed to connect to %s", address), cf.cause()); } diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index d305dfa8e83cf..f051042a7adb4 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -19,6 +19,7 @@ import java.util.Locale; import java.util.Properties; +import java.util.concurrent.TimeUnit; import com.google.common.primitives.Ints; import io.netty.util.NettyRuntime; @@ -31,6 +32,7 @@ public class TransportConf { private final String SPARK_NETWORK_IO_MODE_KEY; private final String SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY; private final String SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY; + private final String SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY; private final String SPARK_NETWORK_IO_BACKLOG_KEY; private final String SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY; private final String SPARK_NETWORK_IO_SERVERTHREADS_KEY; @@ -54,6 +56,7 @@ public TransportConf(String module, ConfigProvider conf) { SPARK_NETWORK_IO_MODE_KEY = getConfKey("io.mode"); SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY = getConfKey("io.preferDirectBufs"); SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY = getConfKey("io.connectionTimeout"); + SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY = getConfKey("io.connectionCreationTimeout"); SPARK_NETWORK_IO_BACKLOG_KEY = getConfKey("io.backLog"); SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY = getConfKey("io.numConnectionsPerPeer"); SPARK_NETWORK_IO_SERVERTHREADS_KEY = getConfKey("io.serverThreads"); @@ -94,7 +97,7 @@ public boolean preferDirectBufs() { return conf.getBoolean(SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY, true); } - /** Connect timeout in milliseconds. Default 120 secs. */ + /** Connection idle timeout in milliseconds. Default 120 secs. */ public int connectionTimeoutMs() { long defaultNetworkTimeoutS = JavaUtils.timeStringAsSec( conf.get("spark.network.timeout", "120s")); @@ -103,6 +106,14 @@ public int connectionTimeoutMs() { return (int) defaultTimeoutMs; } + /** Connect creation timeout in milliseconds. Default 30 secs. */ + public int connectionCreationTimeoutMs() { + long connectionTimeoutS = TimeUnit.MILLISECONDS.toSeconds(connectionTimeoutMs()); + long defaultTimeoutMs = JavaUtils.timeStringAsSec( + conf.get(SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY, connectionTimeoutS + "s")) * 1000; + return (int) defaultTimeoutMs; + } + /** Number of concurrent connections between two nodes for fetching data. */ public int numConnectionsPerPeer() { return conf.getInt(SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY, 1); diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java index 256789b8c7827..3dbee1b13d287 100644 --- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java +++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java @@ -31,7 +31,6 @@ import scala.Tuple2; import scala.collection.Iterator; -import com.google.common.annotations.VisibleForTesting; import com.google.common.io.Closeables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -178,8 +177,8 @@ public void write(Iterator> records) throws IOException { } } - @VisibleForTesting - long[] getPartitionLengths() { + @Override + public long[] getPartitionLengths() { return partitionLengths; } diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java index 79e38a824fea4..e8f94ba8ffeee 100644 --- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java +++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java @@ -88,6 +88,7 @@ public class UnsafeShuffleWriter extends ShuffleWriter { @Nullable private MapStatus mapStatus; @Nullable private ShuffleExternalSorter sorter; + @Nullable private long[] partitionLengths; private long peakMemoryUsedBytes = 0; /** Subclass of ByteArrayOutputStream that exposes `buf` directly. */ @@ -219,7 +220,6 @@ void closeAndWriteOutput() throws IOException { serOutputStream = null; final SpillInfo[] spills = sorter.closeAndGetSpills(); sorter = null; - final long[] partitionLengths; try { partitionLengths = mergeSpills(spills); } finally { @@ -543,4 +543,9 @@ public void close() throws IOException { channel.close(); } } + + @Override + public long[] getPartitionLengths() { + return partitionLengths; + } } diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index c58009c166a60..3865c9c987b1c 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -47,7 +47,7 @@ import org.apache.spark.metrics.source.JVMCPUSource import org.apache.spark.resource.ResourceInformation import org.apache.spark.rpc.RpcTimeout import org.apache.spark.scheduler._ -import org.apache.spark.shuffle.FetchFailedException +import org.apache.spark.shuffle.{FetchFailedException, ShuffleBlockPusher} import org.apache.spark.storage.{StorageLevel, TaskResultBlockId} import org.apache.spark.util._ import org.apache.spark.util.io.ChunkedByteBuffer @@ -325,6 +325,7 @@ private[spark] class Executor( case NonFatal(e) => logWarning("Unable to stop heartbeater", e) } + ShuffleBlockPusher.stop() threadPool.shutdown() // Notify plugins that executor is shutting down so they can terminate cleanly diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index adaf92d5a8aa1..84c66470288ff 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2030,4 +2030,33 @@ package object config { .version("3.1.0") .doubleConf .createWithDefault(5) + + private[spark] val SHUFFLE_NUM_PUSH_THREADS = + ConfigBuilder("spark.shuffle.push.numPushThreads") + .doc("Specify the number of threads in the block pusher pool. These threads assist " + + "in creating connections and pushing blocks to remote shuffle services. By default, the " + + "threadpool size is equal to the number of spark executor cores.") + .version("3.2.0") + .intConf + .createOptional + + private[spark] val SHUFFLE_MAX_BLOCK_SIZE_TO_PUSH = + ConfigBuilder("spark.shuffle.push.maxBlockSizeToPush") + .doc("The max size of an individual block to push to the remote shuffle services. Blocks " + + "larger than this threshold are not pushed to be merged remotely. These shuffle blocks " + + "will be fetched by the executors in the original manner.") + .version("3.2.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("1m") + + private[spark] val SHUFFLE_MAX_BLOCK_BATCH_SIZE_FOR_PUSH = + ConfigBuilder("spark.shuffle.push.maxBlockBatchSize") + .doc("The max size of a batch of shuffle blocks to be grouped into a single push request.") + .version("3.2.0") + .bytesConf(ByteUnit.BYTE) + // Default is 3m because it is greater than 2m which is the default value for + // TransportConf#memoryMapBytes. If this defaults to 2m as well it is very likely that each + // batch of block will be loaded in memory with memory mapping, which has higher overhead + // with small MB sized chunk of data. + .createWithDefaultString("3m") } diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala new file mode 100644 index 0000000000000..88d084ce1b2f4 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala @@ -0,0 +1,450 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle + +import java.io.File +import java.net.ConnectException +import java.nio.ByteBuffer +import java.util.concurrent.ExecutorService + +import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue} + +import com.google.common.base.Throwables + +import org.apache.spark.{ShuffleDependency, SparkConf, SparkEnv} +import org.apache.spark.annotation.Since +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ +import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer} +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.BlockFetchingListener +import org.apache.spark.network.shuffle.ErrorHandler.BlockPushErrorHandler +import org.apache.spark.network.util.TransportConf +import org.apache.spark.shuffle.ShuffleBlockPusher._ +import org.apache.spark.storage.{BlockId, BlockManagerId, ShufflePushBlockId} +import org.apache.spark.util.{ThreadUtils, Utils} + +/** + * Used for pushing shuffle blocks to remote shuffle services when push shuffle is enabled. + * When push shuffle is enabled, it is created after the shuffle writer finishes writing the shuffle + * file and initiates the block push process. + * + * @param conf spark configuration + */ +@Since("3.2.0") +private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging { + private[this] val maxBlockSizeToPush = conf.get(SHUFFLE_MAX_BLOCK_SIZE_TO_PUSH) + private[this] val maxBlockBatchSize = conf.get(SHUFFLE_MAX_BLOCK_BATCH_SIZE_FOR_PUSH) + private[this] val maxBytesInFlight = + conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024 + private[this] val maxReqsInFlight = conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue) + private[this] val maxBlocksInFlightPerAddress = conf.get(REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS) + private[this] var bytesInFlight = 0L + private[this] var reqsInFlight = 0 + private[this] val numBlocksInFlightPerAddress = new HashMap[BlockManagerId, Int]() + private[this] val deferredPushRequests = new HashMap[BlockManagerId, Queue[PushRequest]]() + private[this] val pushRequests = new Queue[PushRequest] + private[this] val errorHandler = createErrorHandler() + // VisibleForTesting + private[shuffle] val unreachableBlockMgrs = new HashSet[BlockManagerId]() + + // VisibleForTesting + private[shuffle] def createErrorHandler(): BlockPushErrorHandler = { + new BlockPushErrorHandler() { + // For a connection exception against a particular host, we will stop pushing any + // blocks to just that host and continue push blocks to other hosts. So, here push of + // all blocks will only stop when it is "Too Late". Also see updateStateAndCheckIfPushMore. + override def shouldRetryError(t: Throwable): Boolean = { + // If the block is too late, there is no need to retry it + !Throwables.getStackTraceAsString(t).contains(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX) + } + } + } + + /** + * Initiates the block push. + * + * @param dataFile mapper generated shuffle data file + * @param partitionLengths array of shuffle block size so we can tell shuffle block + * @param dep shuffle dependency to get shuffle ID and the location of remote shuffle + * services to push local shuffle blocks + * @param mapIndex map index of the shuffle map task + */ + private[shuffle] def initiateBlockPush( + dataFile: File, + partitionLengths: Array[Long], + dep: ShuffleDependency[_, _, _], + mapIndex: Int): Unit = { + val numPartitions = dep.partitioner.numPartitions + val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle") + val requests = prepareBlockPushRequests(numPartitions, mapIndex, dep.shuffleId, dataFile, + partitionLengths, dep.getMergerLocs, transportConf) + // Randomize the orders of the PushRequest, so different mappers pushing blocks at the same + // time won't be pushing the same ranges of shuffle partitions. + pushRequests ++= Utils.randomize(requests) + + submitTask(() => { + pushUpToMax() + }) + } + + /** + * Triggers the push. It's a separate method for testing. + * VisibleForTesting + */ + protected def submitTask(task: Runnable): Unit = { + if (BLOCK_PUSHER_POOL != null) { + BLOCK_PUSHER_POOL.execute(task) + } + } + + /** + * Since multiple block push threads could potentially be calling pushUpToMax for the same + * mapper, we synchronize access to this method so that only one thread can push blocks for + * a given mapper. This helps to simplify access to the shared states. The down side of this + * is that we could unnecessarily block other mappers' block pushes if all the threads + * are occupied by block pushes from the same mapper. + * + * This code is similar to ShuffleBlockFetcherIterator#fetchUpToMaxBytes in how it throttles + * the data transfer between shuffle client/server. + */ + private def pushUpToMax(): Unit = synchronized { + // Process any outstanding deferred push requests if possible. + if (deferredPushRequests.nonEmpty) { + for ((remoteAddress, defReqQueue) <- deferredPushRequests) { + while (isRemoteBlockPushable(defReqQueue) && + !isRemoteAddressMaxedOut(remoteAddress, defReqQueue.front)) { + val request = defReqQueue.dequeue() + logDebug(s"Processing deferred push request for $remoteAddress with " + + s"${request.blocks.length} blocks") + sendRequest(request) + if (defReqQueue.isEmpty) { + deferredPushRequests -= remoteAddress + } + } + } + } + + // Process any regular push requests if possible. + while (isRemoteBlockPushable(pushRequests)) { + val request = pushRequests.dequeue() + val remoteAddress = request.address + if (isRemoteAddressMaxedOut(remoteAddress, request)) { + logDebug(s"Deferring push request for $remoteAddress with ${request.blocks.size} blocks") + deferredPushRequests.getOrElseUpdate(remoteAddress, new Queue[PushRequest]()) + .enqueue(request) + } else { + sendRequest(request) + } + } + + def isRemoteBlockPushable(pushReqQueue: Queue[PushRequest]): Boolean = { + pushReqQueue.nonEmpty && + (bytesInFlight == 0 || + (reqsInFlight + 1 <= maxReqsInFlight && + bytesInFlight + pushReqQueue.front.size <= maxBytesInFlight)) + } + + // Checks if sending a new push request will exceed the max no. of blocks being pushed to a + // given remote address. + def isRemoteAddressMaxedOut(remoteAddress: BlockManagerId, request: PushRequest): Boolean = { + (numBlocksInFlightPerAddress.getOrElse(remoteAddress, 0) + + request.blocks.size) > maxBlocksInFlightPerAddress + } + } + + /** + * Push blocks to remote shuffle server. The callback listener will invoke #pushUpToMax again + * to trigger pushing the next batch of blocks once some block transfer is done in the current + * batch. This way, we decouple the map task from the block push process, since it is netty + * client thread instead of task execution thread which takes care of majority of the block + * pushes. + */ + private def sendRequest(request: PushRequest): Unit = { + bytesInFlight += request.size + reqsInFlight += 1 + numBlocksInFlightPerAddress(request.address) = numBlocksInFlightPerAddress.getOrElseUpdate( + request.address, 0) + request.blocks.length + + val sizeMap = request.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap + val address = request.address + val blockIds = request.blocks.map(_._1.toString) + val remainingBlocks = new HashSet[String]() ++= blockIds + + val blockPushListener = new BlockFetchingListener { + // Initiating a connection and pushing blocks to a remote shuffle service is always handled by + // the block-push-threads. We should not initiate the connection creation in the + // blockPushListener callbacks which are invoked by the netty eventloop because: + // 1. TrasportClient.createConnection(...) blocks for connection to be established and it's + // recommended to avoid any blocking operations in the eventloop; + // 2. The actual connection creation is a task that gets added to the task queue of another + // eventloop which could have eventloops eventually blocking each other. + // Once the blockPushListener is notified of the block push success or failure, we + // just delegate it to block-push-threads. + def handleResult(result: PushResult): Unit = { + submitTask(() => { + if (updateStateAndCheckIfPushMore( + sizeMap(result.blockId), address, remainingBlocks, result)) { + pushUpToMax() + } + }) + } + + override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { + logTrace(s"Push for block $blockId to $address successful.") + handleResult(PushResult(blockId, null)) + } + + override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = { + // check the message or it's cause to see it needs to be logged. + if (!errorHandler.shouldLogError(exception)) { + logTrace(s"Pushing block $blockId to $address failed.", exception) + } else { + logWarning(s"Pushing block $blockId to $address failed.", exception) + } + handleResult(PushResult(blockId, exception)) + } + } + SparkEnv.get.blockManager.blockStoreClient.pushBlocks( + address.host, address.port, blockIds.toArray, + sliceReqBufferIntoBlockBuffers(request.reqBuffer, request.blocks.map(_._2)), + blockPushListener) + } + + /** + * Given the ManagedBuffer representing all the continuous blocks inside the shuffle data file + * for a PushRequest and an array of individual block sizes, load the buffer from disk into + * memory and slice it into multiple smaller buffers representing each block. + * + * With nio ByteBuffer, the individual block buffers share data with the initial in memory + * buffer loaded from disk. Thus only one copy of the block data is kept in memory. + * @param reqBuffer A {{FileSegmentManagedBuffer}} representing all the continuous blocks in + * the shuffle data file for a PushRequest + * @param blockSizes Array of block sizes + * @return Array of in memory buffer for each individual block + */ + private def sliceReqBufferIntoBlockBuffers( + reqBuffer: ManagedBuffer, + blockSizes: Seq[Int]): Array[ManagedBuffer] = { + if (blockSizes.size == 1) { + Array(reqBuffer) + } else { + val inMemoryBuffer = reqBuffer.nioByteBuffer() + val blockOffsets = new Array[Int](blockSizes.size) + var offset = 0 + for (index <- blockSizes.indices) { + blockOffsets(index) = offset + offset += blockSizes(index) + } + blockOffsets.zip(blockSizes).map { + case (offset, size) => + new NioManagedBuffer(inMemoryBuffer.duplicate() + .position(offset) + .limit(offset + size).asInstanceOf[ByteBuffer].slice()) + }.toArray + } + } + + /** + * Updates the stats and based on the previous push result decides whether to push more blocks + * or stop. + * + * @param bytesPushed number of bytes pushed. + * @param address address of the remote service + * @param remainingBlocks remaining blocks + * @param pushResult result of the last push + * @return true if more blocks should be pushed; false otherwise. + */ + private def updateStateAndCheckIfPushMore( + bytesPushed: Long, + address: BlockManagerId, + remainingBlocks: HashSet[String], + pushResult: PushResult): Boolean = synchronized { + remainingBlocks -= pushResult.blockId + bytesInFlight -= bytesPushed + numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1 + if (remainingBlocks.isEmpty) { + reqsInFlight -= 1 + } + if (pushResult.failure != null && pushResult.failure.getCause.isInstanceOf[ConnectException]) { + // Remove all the blocks for this address just once because removing from pushRequests + // is expensive. If there is a ConnectException for the first block, all the subsequent + // blocks to that address will fail, so should avoid removing multiple times. + if (!unreachableBlockMgrs.contains(address)) { + var removed = 0 + unreachableBlockMgrs.add(address) + removed += pushRequests.dequeueAll(req => req.address == address).length + removed += deferredPushRequests.remove(address).map(_.length).getOrElse(0) + logWarning(s"Received a ConnectException from $address. " + + s"Dropping $removed push-requests and " + + s"not pushing any more blocks to this address.") + } + } + if (pushResult.failure != null && !errorHandler.shouldRetryError(pushResult.failure)) { + logDebug(s"Received after merge is finalized from $address. Not pushing any more blocks.") + return false + } else { + remainingBlocks.isEmpty && (pushRequests.nonEmpty || deferredPushRequests.nonEmpty) + } + } + + /** + * Convert the shuffle data file of the current mapper into a list of PushRequest. Basically, + * continuous blocks in the shuffle file are grouped into a single request to allow more + * efficient read of the block data. Each mapper for a given shuffle will receive the same + * list of BlockManagerIds as the target location to push the blocks to. All mappers in the + * same shuffle will map shuffle partition ranges to individual target locations in a consistent + * manner to make sure each target location receives shuffle blocks belonging to the same set + * of partition ranges. 0-length blocks and blocks that are large enough will be skipped. + * + * @param numPartitions sumber of shuffle partitions in the shuffle file + * @param partitionId map index of the current mapper + * @param shuffleId shuffleId of current shuffle + * @param dataFile shuffle data file + * @param partitionLengths array of sizes of blocks in the shuffle data file + * @param mergerLocs target locations to push blocks to + * @param transportConf transportConf used to create FileSegmentManagedBuffer + * @return List of the PushRequest, randomly shuffled. + * + * VisibleForTesting + */ + private[shuffle] def prepareBlockPushRequests( + numPartitions: Int, + partitionId: Int, + shuffleId: Int, + dataFile: File, + partitionLengths: Array[Long], + mergerLocs: Seq[BlockManagerId], + transportConf: TransportConf): Seq[PushRequest] = { + var offset = 0L + var currentReqSize = 0 + var currentReqOffset = 0L + var currentMergerId = 0 + val numMergers = mergerLocs.length + val requests = new ArrayBuffer[PushRequest] + var blocks = new ArrayBuffer[(BlockId, Int)] + for (reduceId <- 0 until numPartitions) { + val blockSize = partitionLengths(reduceId) + logDebug( + s"Block ${ShufflePushBlockId(shuffleId, partitionId, reduceId)} is of size $blockSize") + // Skip 0-length blocks and blocks that are large enough + if (blockSize > 0) { + val mergerId = math.min(math.floor(reduceId * 1.0 / numPartitions * numMergers), + numMergers - 1).asInstanceOf[Int] + // Start a new PushRequest if the current request goes beyond the max batch size, + // or the number of blocks in the current request goes beyond the limit per destination, + // or the next block push location is for a different shuffle service, or the next block + // exceeds the max block size to push limit. This guarantees that each PushRequest + // represents continuous blocks in the shuffle file to be pushed to the same shuffle + // service, and does not go beyond existing limitations. + if (currentReqSize + blockSize <= maxBlockBatchSize + && blocks.size < maxBlocksInFlightPerAddress + && mergerId == currentMergerId && blockSize <= maxBlockSizeToPush) { + // Add current block to current batch + currentReqSize += blockSize.toInt + } else { + if (blocks.nonEmpty) { + // Convert the previous batch into a PushRequest + requests += PushRequest(mergerLocs(currentMergerId), blocks.toSeq, + createRequestBuffer(transportConf, dataFile, currentReqOffset, currentReqSize)) + blocks = new ArrayBuffer[(BlockId, Int)] + } + // Start a new batch + currentReqSize = 0 + // Set currentReqOffset to -1 so we are able to distinguish between the initial value + // of currentReqOffset and when we are about to start a new batch + currentReqOffset = -1 + currentMergerId = mergerId + } + // Only push blocks under the size limit + if (blockSize <= maxBlockSizeToPush) { + val blockSizeInt = blockSize.toInt + blocks += ((ShufflePushBlockId(shuffleId, partitionId, reduceId), blockSizeInt)) + // Only update currentReqOffset if the current block is the first in the request + if (currentReqOffset == -1) { + currentReqOffset = offset + } + if (currentReqSize == 0) { + currentReqSize += blockSizeInt + } + } + } + offset += blockSize + } + // Add in the final request + if (blocks.nonEmpty) { + requests += PushRequest(mergerLocs(currentMergerId), blocks.toSeq, + createRequestBuffer(transportConf, dataFile, currentReqOffset, currentReqSize)) + } + requests.toSeq + } + + // Visible for testing + protected def createRequestBuffer( + conf: TransportConf, + dataFile: File, + offset: Long, + length: Long): ManagedBuffer = { + new FileSegmentManagedBuffer(conf, dataFile, offset, length) + } +} + +private[spark] object ShuffleBlockPusher { + + /** + * A request to push blocks to a remote shuffle service + * @param address remote shuffle service location to push blocks to + * @param blocks list of block IDs and their sizes + * @param reqBuffer a chunk of data in the shuffle data file corresponding to the continuous + * blocks represented in this request + */ + private[spark] case class PushRequest( + address: BlockManagerId, + blocks: Seq[(BlockId, Int)], + reqBuffer: ManagedBuffer) { + val size = blocks.map(_._2).sum + } + + /** + * Result of the block push. + * @param blockId blockId + * @param failure exception if the push was unsuccessful; null otherwise; + */ + private case class PushResult(blockId: String, failure: Throwable) + + private val BLOCK_PUSHER_POOL: ExecutorService = { + val conf = SparkEnv.get.conf + if (Utils.isPushBasedShuffleEnabled(conf)) { + val numThreads = conf.get(SHUFFLE_NUM_PUSH_THREADS) + .getOrElse(conf.getInt(SparkLauncher.EXECUTOR_CORES, 1)) + ThreadUtils.newDaemonFixedThreadPool(numThreads, "shuffle-block-push-thread") + } else { + null + } + } + + /** + * Stop the shuffle pusher pool if it isn't null. + */ + private[spark] def stop(): Unit = { + if (BLOCK_PUSHER_POOL != null) { + BLOCK_PUSHER_POOL.shutdown() + } + } +} diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala index 1429144c6f6e2..abff650b0611b 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala @@ -21,6 +21,7 @@ import org.apache.spark.{Partition, ShuffleDependency, SparkEnv, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.scheduler.MapStatus +import org.apache.spark.util.Utils /** * The interface for customizing shuffle write process. The driver create a ShuffleWriteProcessor @@ -57,7 +58,23 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging { createMetricsReporter(context)) writer.write( rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) - writer.stop(success = true).get + val mapStatus = writer.stop(success = true) + if (mapStatus.isDefined) { + // Initiate shuffle push process if push based shuffle is enabled + // The map task only takes care of converting the shuffle data file into multiple + // block push requests. It delegates pushing the blocks to a different thread-pool - + // ShuffleBlockPusher.BLOCK_PUSHER_POOL. + if (Utils.isPushBasedShuffleEnabled(SparkEnv.get.conf) && dep.getMergerLocs.nonEmpty) { + manager.shuffleBlockResolver match { + case resolver: IndexShuffleBlockResolver => + val dataFile = resolver.getDataFile(dep.shuffleId, mapId) + new ShuffleBlockPusher(SparkEnv.get.conf) + .initiateBlockPush(dataFile, writer.getPartitionLengths(), dep, partition.index) + case _ => + } + } + } + mapStatus.get } catch { case e: Exception => try { diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala index 4cc4ef5f1886e..a279b4c8f42f4 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala @@ -31,4 +31,7 @@ private[spark] abstract class ShuffleWriter[K, V] { /** Close this writer, passing along whether the map completed */ def stop(success: Boolean): Option[MapStatus] + + /** Get the lengths of each partition */ + def getPartitionLengths(): Array[Long] } diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala index 83ebe3e12946c..af8d1e2fff413 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala @@ -45,6 +45,8 @@ private[spark] class SortShuffleWriter[K, V, C]( private var mapStatus: MapStatus = null + private var partitionLengths: Array[Long] = _ + private val writeMetrics = context.taskMetrics().shuffleWriteMetrics /** Write a bunch of records to this task's output */ @@ -67,7 +69,7 @@ private[spark] class SortShuffleWriter[K, V, C]( val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter( dep.shuffleId, mapId, dep.partitioner.numPartitions) sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter) - val partitionLengths = mapOutputWriter.commitAllPartitions().getPartitionLengths + partitionLengths = mapOutputWriter.commitAllPartitions().getPartitionLengths mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId) } @@ -93,6 +95,8 @@ private[spark] class SortShuffleWriter[K, V, C]( } } } + + override def getPartitionLengths(): Array[Long] = partitionLengths } private[spark] object SortShuffleWriter { diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala index 7b084e73c92f9..73bf809a08a68 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala @@ -20,7 +20,7 @@ package org.apache.spark.storage import java.util.UUID import org.apache.spark.SparkException -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} /** * :: DeveloperApi :: @@ -81,6 +81,12 @@ case class ShuffleIndexBlockId(shuffleId: Int, mapId: Long, reduceId: Int) exten override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index" } +@Since("3.2.0") +@DeveloperApi +case class ShufflePushBlockId(shuffleId: Int, mapIndex: Int, reduceId: Int) extends BlockId { + override def name: String = "shufflePush_" + shuffleId + "_" + mapIndex + "_" + reduceId +} + @DeveloperApi case class BroadcastBlockId(broadcastId: Long, field: String = "") extends BlockId { override def name: String = "broadcast_" + broadcastId + (if (field == "") "" else "_" + field) @@ -122,6 +128,7 @@ object BlockId { val SHUFFLE_BATCH = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)_([0-9]+)".r val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r + val SHUFFLE_PUSH = "shufflePush_([0-9]+)_([0-9]+)_([0-9]+)".r val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r val TASKRESULT = "taskresult_([0-9]+)".r val STREAM = "input-([0-9]+)-([0-9]+)".r @@ -140,6 +147,8 @@ object BlockId { ShuffleDataBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt) case SHUFFLE_INDEX(shuffleId, mapId, reduceId) => ShuffleIndexBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt) + case SHUFFLE_PUSH(shuffleId, mapIndex, reduceId) => + ShufflePushBlockId(shuffleId.toInt, mapIndex.toInt, reduceId.toInt) case BROADCAST(broadcastId, field) => BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_")) case TASKRESULT(taskId) => diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala new file mode 100644 index 0000000000000..cc561e6106019 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle + +import java.io.File +import java.net.ConnectException +import java.nio.ByteBuffer +import java.util.concurrent.LinkedBlockingQueue + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Answers.RETURNS_SMART_NULLS +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito._ +import org.mockito.invocation.InvocationOnMock +import org.scalatest.BeforeAndAfterEach + +import org.apache.spark._ +import org.apache.spark.network.buffer.ManagedBuffer +import org.apache.spark.network.shuffle.{BlockFetchingListener, BlockStoreClient} +import org.apache.spark.network.shuffle.ErrorHandler.BlockPushErrorHandler +import org.apache.spark.network.util.TransportConf +import org.apache.spark.serializer.JavaSerializer +import org.apache.spark.shuffle.ShuffleBlockPusher.PushRequest +import org.apache.spark.storage._ + +class ShuffleBlockPusherSuite extends SparkFunSuite with BeforeAndAfterEach { + + @Mock(answer = RETURNS_SMART_NULLS) private var blockManager: BlockManager = _ + @Mock(answer = RETURNS_SMART_NULLS) private var dependency: ShuffleDependency[Int, Int, Int] = _ + @Mock(answer = RETURNS_SMART_NULLS) private var shuffleClient: BlockStoreClient = _ + + private var conf: SparkConf = _ + private var pushedBlocks = new ArrayBuffer[String] + + override def beforeEach(): Unit = { + super.beforeEach() + conf = new SparkConf(loadDefaults = false) + MockitoAnnotations.initMocks(this) + when(dependency.partitioner).thenReturn(new HashPartitioner(8)) + when(dependency.serializer).thenReturn(new JavaSerializer(conf)) + when(dependency.getMergerLocs).thenReturn(Seq(BlockManagerId("test-client", "test-client", 1))) + conf.set("spark.shuffle.push.based.enabled", "true") + conf.set("spark.shuffle.service.enabled", "true") + // Set the env because the shuffler writer gets the shuffle client instance from the env. + val mockEnv = mock(classOf[SparkEnv]) + when(mockEnv.conf).thenReturn(conf) + when(mockEnv.blockManager).thenReturn(blockManager) + SparkEnv.set(mockEnv) + when(blockManager.blockStoreClient).thenReturn(shuffleClient) + } + + override def afterEach(): Unit = { + pushedBlocks.clear() + super.afterEach() + } + + private def interceptPushedBlocksForSuccess(): Unit = { + when(shuffleClient.pushBlocks(any(), any(), any(), any(), any())) + .thenAnswer((invocation: InvocationOnMock) => { + val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]] + pushedBlocks ++= blocks + val managedBuffers = invocation.getArguments()(3).asInstanceOf[Array[ManagedBuffer]] + val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener] + (blocks, managedBuffers).zipped.foreach((blockId, buffer) => { + blockFetchListener.onBlockFetchSuccess(blockId, buffer) + }) + }) + } + + private def verifyPushRequests( + pushRequests: Seq[PushRequest], + expectedSizes: Seq[Int]): Unit = { + (pushRequests, expectedSizes).zipped.foreach((req, size) => { + assert(req.size == size) + }) + } + + test("A batch of blocks is limited by maxBlocksBatchSize") { + conf.set("spark.shuffle.push.maxBlockBatchSize", "1m") + conf.set("spark.shuffle.push.maxBlockSizeToPush", "2048k") + val blockPusher = new TestShuffleBlockPusher(conf) + val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port)) + val largeBlockSize = 2 * 1024 * 1024 + val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0, + mock(classOf[File]), Array(2, 2, 2, largeBlockSize, largeBlockSize), mergerLocs, + mock(classOf[TransportConf])) + assert(pushRequests.length == 3) + verifyPushRequests(pushRequests, Seq(6, largeBlockSize, largeBlockSize)) + } + + test("Large blocks are excluded in the preparation") { + conf.set("spark.shuffle.push.maxBlockSizeToPush", "1k") + val blockPusher = new TestShuffleBlockPusher(conf) + val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port)) + val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0, + mock(classOf[File]), Array(2, 2, 2, 1028, 1024), mergerLocs, mock(classOf[TransportConf])) + assert(pushRequests.length == 2) + verifyPushRequests(pushRequests, Seq(6, 1024)) + } + + test("Number of blocks in a push request are limited by maxBlocksInFlightPerAddress ") { + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1") + val blockPusher = new TestShuffleBlockPusher(conf) + val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port)) + val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0, + mock(classOf[File]), Array(2, 2, 2, 2, 2), mergerLocs, mock(classOf[TransportConf])) + assert(pushRequests.length == 5) + verifyPushRequests(pushRequests, Seq(2, 2, 2, 2, 2)) + } + + test("Basic block push") { + interceptPushedBlocksForSuccess() + val blockPusher = new TestShuffleBlockPusher(conf) + blockPusher.initiateBlockPush(mock(classOf[File]), + Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + blockPusher.runPendingTasks() + verify(shuffleClient, times(1)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == dependency.partitioner.numPartitions) + ShuffleBlockPusher.stop() + } + + test("Large blocks are skipped for push") { + conf.set("spark.shuffle.push.maxBlockSizeToPush", "1k") + interceptPushedBlocksForSuccess() + val pusher = new TestShuffleBlockPusher(conf) + pusher.initiateBlockPush( + mock(classOf[File]), Array(2, 2, 2, 2, 2, 2, 2, 1100), dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(1)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == dependency.partitioner.numPartitions - 1) + ShuffleBlockPusher.stop() + } + + test("Number of blocks in flight per address are limited by maxBlocksInFlightPerAddress") { + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1") + interceptPushedBlocksForSuccess() + val pusher = new TestShuffleBlockPusher(conf) + pusher.initiateBlockPush( + mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(8)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == dependency.partitioner.numPartitions) + ShuffleBlockPusher.stop() + } + + test("Hit maxBlocksInFlightPerAddress limit so that the blocks are deferred") { + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "2") + var blockPendingResponse : String = null + var listener : BlockFetchingListener = null + when(shuffleClient.pushBlocks(any(), any(), any(), any(), any())) + .thenAnswer((invocation: InvocationOnMock) => { + val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]] + pushedBlocks ++= blocks + val managedBuffers = invocation.getArguments()(3).asInstanceOf[Array[ManagedBuffer]] + val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener] + // Expecting 2 blocks + assert(blocks.length == 2) + if (blockPendingResponse == null) { + blockPendingResponse = blocks(1) + listener = blockFetchListener + // Respond with success only for the first block which will cause all the rest of the + // blocks to be deferred + blockFetchListener.onBlockFetchSuccess(blocks(0), managedBuffers(0)) + } else { + (blocks, managedBuffers).zipped.foreach((blockId, buffer) => { + blockFetchListener.onBlockFetchSuccess(blockId, buffer) + }) + } + }) + val pusher = new TestShuffleBlockPusher(conf) + pusher.initiateBlockPush( + mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(1)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == 2) + // this will trigger push of deferred blocks + listener.onBlockFetchSuccess(blockPendingResponse, mock(classOf[ManagedBuffer])) + pusher.runPendingTasks() + verify(shuffleClient, times(4)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == 8) + ShuffleBlockPusher.stop() + } + + test("Number of shuffle blocks grouped in a single push request is limited by " + + "maxBlockBatchSize") { + conf.set("spark.shuffle.push.maxBlockBatchSize", "1m") + interceptPushedBlocksForSuccess() + val pusher = new TestShuffleBlockPusher(conf) + pusher.initiateBlockPush(mock(classOf[File]), + Array.fill(dependency.partitioner.numPartitions) { 512 * 1024 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(4)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == dependency.partitioner.numPartitions) + ShuffleBlockPusher.stop() + } + + test("Error retries") { + val pusher = new ShuffleBlockPusher(conf) + val errorHandler = pusher.createErrorHandler() + assert( + !errorHandler.shouldRetryError(new RuntimeException( + new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)))) + assert(errorHandler.shouldRetryError(new RuntimeException(new ConnectException()))) + assert( + errorHandler.shouldRetryError(new RuntimeException(new IllegalArgumentException( + BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)))) + assert (errorHandler.shouldRetryError(new Throwable())) + } + + test("Error logging") { + val pusher = new ShuffleBlockPusher(conf) + val errorHandler = pusher.createErrorHandler() + assert( + !errorHandler.shouldLogError(new RuntimeException( + new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)))) + assert(!errorHandler.shouldLogError(new RuntimeException( + new IllegalArgumentException( + BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)))) + assert(errorHandler.shouldLogError(new Throwable())) + } + + test("Blocks are continued to push even when a block push fails with collision " + + "exception") { + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1") + val pusher = new TestShuffleBlockPusher(conf) + var failBlock: Boolean = true + when(shuffleClient.pushBlocks(any(), any(), any(), any(), any())) + .thenAnswer((invocation: InvocationOnMock) => { + val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]] + val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener] + blocks.foreach(blockId => { + if (failBlock) { + failBlock = false + // Fail the first block with the collision exception. + blockFetchListener.onBlockFetchFailure(blockId, new RuntimeException( + new IllegalArgumentException( + BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX))) + } else { + pushedBlocks += blockId + blockFetchListener.onBlockFetchSuccess(blockId, mock(classOf[ManagedBuffer])) + } + }) + }) + pusher.initiateBlockPush( + mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(8)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.length == 7) + } + + test("More blocks are not pushed when a block push fails with too late " + + "exception") { + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1") + val pusher = new TestShuffleBlockPusher(conf) + var failBlock: Boolean = true + when(shuffleClient.pushBlocks(any(), any(), any(), any(), any())) + .thenAnswer((invocation: InvocationOnMock) => { + val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]] + val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener] + blocks.foreach(blockId => { + if (failBlock) { + failBlock = false + // Fail the first block with the too late exception. + blockFetchListener.onBlockFetchFailure(blockId, new RuntimeException( + new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX))) + } else { + pushedBlocks += blockId + blockFetchListener.onBlockFetchSuccess(blockId, mock(classOf[ManagedBuffer])) + } + }) + }) + pusher.initiateBlockPush( + mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(1)) + .pushBlocks(any(), any(), any(), any(), any()) + assert(pushedBlocks.isEmpty) + } + + test("Connect exceptions remove all the push requests for that host") { + when(dependency.getMergerLocs).thenReturn( + Seq(BlockManagerId("client1", "client1", 1), BlockManagerId("client2", "client2", 2))) + conf.set("spark.reducer.maxBlocksInFlightPerAddress", "2") + when(shuffleClient.pushBlocks(any(), any(), any(), any(), any())) + .thenAnswer((invocation: InvocationOnMock) => { + val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]] + pushedBlocks ++= blocks + val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener] + blocks.foreach(blockId => { + blockFetchListener.onBlockFetchFailure( + blockId, new RuntimeException(new ConnectException())) + }) + }) + val pusher = new TestShuffleBlockPusher(conf) + pusher.initiateBlockPush( + mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0) + pusher.runPendingTasks() + verify(shuffleClient, times(2)) + .pushBlocks(any(), any(), any(), any(), any()) + // 2 blocks for each merger locations + assert(pushedBlocks.length == 4) + assert(pusher.unreachableBlockMgrs.size == 2) + } + + private class TestShuffleBlockPusher(conf: SparkConf) extends ShuffleBlockPusher(conf) { + private[this] val tasks = new LinkedBlockingQueue[Runnable] + + override protected def submitTask(task: Runnable): Unit = { + tasks.add(task) + } + + def runPendingTasks(): Unit = { + // This ensures that all the submitted tasks - updateStateAndCheckIfPushMore and pushUpToMax + // are run synchronously. + while (!tasks.isEmpty) { + tasks.take().run() + } + } + + override protected def createRequestBuffer( + conf: TransportConf, + dataFile: File, + offset: Long, + length: Long): ManagedBuffer = { + val managedBuffer = mock(classOf[ManagedBuffer]) + val byteBuffer = new Array[Byte](length.toInt) + when(managedBuffer.nioByteBuffer()).thenReturn(ByteBuffer.wrap(byteBuffer)) + managedBuffer + } + } +} From 6b34745cb9b294c91cd126c2ea44c039ee83cb84 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Fri, 8 Jan 2021 20:37:35 -0800 Subject: [PATCH 1009/1009] [SPARK-34049][SS] DataSource V2: Use Write abstraction in StreamExecution ### What changes were proposed in this pull request? This PR makes `StreamExecution` use the `Write` abstraction introduced in SPARK-33779. Note: we will need separate plans for streaming writes in order to support the required distribution and ordering in SS. This change only migrates to the `Write` abstraction. ### Why are the changes needed? These changes prevent exceptions from data sources that implement only the `build` method in `WriteBuilder`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #31093 from aokolnychyi/spark-34049. Authored-by: Anton Okolnychyi Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/connector/InMemoryTable.scala | 10 ++++++---- .../sql/execution/streaming/StreamExecution.scala | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index a1253dfe67e7a..27561857c1225 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -274,11 +274,13 @@ class InMemoryTable( this } - override def buildForBatch(): BatchWrite = writer + override def build(): Write = new Write { + override def toBatch: BatchWrite = writer - override def buildForStreaming(): StreamingWrite = streamingWriter match { - case exc: StreamingNotSupportedOperation => exc.throwsException() - case s => s + override def toStreaming: StreamingWrite = streamingWriter match { + case exc: StreamingNotSupportedOperation => exc.throwsException() + case s => s + } } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index c9f40fa22bf9e..67803ad76d5e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -627,21 +627,22 @@ abstract class StreamExecution( inputPlan.schema, new CaseInsensitiveStringMap(options.asJava)) val writeBuilder = table.newWriteBuilder(info) - outputMode match { + val write = outputMode match { case Append => - writeBuilder.buildForStreaming() + writeBuilder.build() case Complete => // TODO: we should do this check earlier when we have capability API. require(writeBuilder.isInstanceOf[SupportsTruncate], table.name + " does not support Complete mode.") - writeBuilder.asInstanceOf[SupportsTruncate].truncate().buildForStreaming() + writeBuilder.asInstanceOf[SupportsTruncate].truncate().build() case Update => require(writeBuilder.isInstanceOf[SupportsStreamingUpdateAsAppend], table.name + " does not support Update mode.") - writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].buildForStreaming() + writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].build() } + write.toStreaming } protected def purge(threshold: Long): Unit = {